Save custom page number labels in bookmarks - java

In the screenshot you can see custom page number labels (i, ii, iii, vii).
How can I save bookmarks with custom page number labels using PDFBox 2.0?
My code actually looks like this:
PDDocumentOutline documentOutline = new PDDocumentOutline();
document.getDocumentCatalog().setDocumentOutline(documentOutline);
PDOutlineItem outline = new PDOutlineItem();
outline.setTitle(toc.getName());
documentOutline.addLast(outline);
addToc(toc, outline);
outline.openNode();
documentOutline.openNode();
private void addToc(Toc toc, PDOutlineItem outlineItem) {
PDPageFitWidthDestination destination = new PDPageFitWidthDestination();
PDPage page = document.getPage(toc.getPageNumber() - 1);
destination.setPage(page);
PDOutlineItem bookmark = new PDOutlineItem();
bookmark.setDestination(destination);
bookmark.setTitle(toc.getName());
outlineItem.addLast(bookmark);
if (toc.getChildren() != null) {
for (Toc subToc : toc.getChildren()) {
addToc(subToc, bookmark);
}
}
}

You can only label pages, not bookmarks. In the example below (with 3 empty pages), roman numbers start at 3, and then decimal at 1. The prefix for the romans is "RO ". So the pages are "RO III", "RO IV", "1".
PDDocument doc = new PDDocument();
doc.addPage(new PDPage());
doc.addPage(new PDPage());
doc.addPage(new PDPage());
PDPageLabels pageLabels = new PDPageLabels(doc);
PDPageLabelRange pageLabelRange1 = new PDPageLabelRange();
pageLabelRange1.setPrefix("RO ");
pageLabelRange1.setStart(3);
pageLabelRange1.setStyle(PDPageLabelRange.STYLE_ROMAN_UPPER);
pageLabels.setLabelItem(0, pageLabelRange1);
PDPageLabelRange pageLabelRange2 = new PDPageLabelRange();
pageLabelRange2.setStart(1);
pageLabelRange2.setStyle(PDPageLabelRange.STYLE_DECIMAL);
pageLabels.setLabelItem(2, pageLabelRange2);
doc.getDocumentCatalog().setPageLabels(pageLabels);
doc.save("labels.pdf");
doc.close();

Related

itext 7 pdf how to prevent text overflow on right side of the page

I am using itextpdf 7 (7.2.0) to create a pdf file. However even though the TOC part is rendered very well, in the content part the text overflows. Here is my code that generates the pdf:
public class Main {
public static void main(String[] args) throws IOException {
PdfWriter writer = new PdfWriter("fiftyfourthPdf.pdf");
PdfDocument pdf = new PdfDocument(writer);
Document document = new Document(pdf, PageSize.A4,false);
//document.setMargins(30,10,36,10);
// Create a PdfFont
PdfFont font = PdfFontFactory.createFont(StandardFonts.TIMES_ROMAN,"Cp1254");
document
.setTextAlignment(TextAlignment.JUSTIFIED)
.setFont(font)
.setFontSize(11);
PdfOutline outline = null;
java.util.List<AbstractMap.SimpleEntry<String, AbstractMap.SimpleEntry<String, Integer>>> toc = new ArrayList<>();
for(int i=0;i<5000;i++){
String line = "This is paragraph " + String.valueOf(i+1)+ " ";
line = line.concat(line).concat(line).concat(line).concat(line).concat(line);
Paragraph p = new Paragraph(line);
p.setKeepTogether(true);
document.add(p.setFont(font).setFontSize(10).setHorizontalAlignment(HorizontalAlignment.CENTER).setTextAlignment(TextAlignment.LEFT));
//PROCESS FOR TOC
String name = "para " + String.valueOf(i+1);
outline = createOutline(outline,pdf,line ,name );
AbstractMap.SimpleEntry<String, Integer> titlePage = new AbstractMap.SimpleEntry(line, pdf.getNumberOfPages());
p
.setFont(font)
.setFontSize(12)
//.setKeepWithNext(true)
.setDestination(name)
// Add the current page number to the table of contents list
.setNextRenderer(new UpdatePageRenderer(p));
toc.add(new AbstractMap.SimpleEntry(name, titlePage));
}
int contentPageNumber = pdf.getNumberOfPages();
for (int i = 1; i <= contentPageNumber; i++) {
// Write aligned text to the specified by parameters point
document.showTextAligned(new Paragraph(String.format("Sayfa %s / %s", i, contentPageNumber)).setFontSize(10),
559, 26, i, TextAlignment.RIGHT, VerticalAlignment.MIDDLE, 0);
}
//BEGINNING OF TOC
document.add(new AreaBreak());
Paragraph p = new Paragraph("Table of Contents")
.setFont(font)
.setDestination("toc");
document.add(p);
java.util.List<TabStop> tabStops = new ArrayList<>();
tabStops.add(new TabStop(580, TabAlignment.RIGHT, new DottedLine()));
for (AbstractMap.SimpleEntry<String, AbstractMap.SimpleEntry<String, Integer>> entry : toc) {
AbstractMap.SimpleEntry<String, Integer> text = entry.getValue();
p = new Paragraph()
.addTabStops(tabStops)
.add(text.getKey())
.add(new Tab())
.add(String.valueOf(text.getValue()))
.setAction(PdfAction.createGoTo(entry.getKey()));
document.add(p);
}
// Move the table of contents to the first page
int tocPageNumber = pdf.getNumberOfPages();
for (int i = 1; i <= tocPageNumber; i++) {
// Write aligned text to the specified by parameters point
document.showTextAligned(new Paragraph("\n footer text\n second line\nthird line").setFontColor(ColorConstants.RED).setFontSize(8),
300, 26, i, TextAlignment.CENTER, VerticalAlignment.MIDDLE, 0);
}
document.flush();
for(int z = 0; z< (tocPageNumber - contentPageNumber ); z++){
pdf.movePage(tocPageNumber,1);
pdf.getPage(1).setPageLabel(PageLabelNumberingStyle.UPPERCASE_LETTERS,
null, 1);
}
//pdf.movePage(tocPageNumber, 1);
// Add page labels
/*pdf.getPage(1).setPageLabel(PageLabelNumberingStyle.UPPERCASE_LETTERS,
null, 1);*/
pdf.getPage(tocPageNumber - contentPageNumber + 1).setPageLabel(PageLabelNumberingStyle.DECIMAL_ARABIC_NUMERALS,
null, 1);
document.close();
}
private static PdfOutline createOutline(PdfOutline outline, PdfDocument pdf, String title, String name) {
if (outline == null) {
outline = pdf.getOutlines(false);
outline = outline.addOutline(title);
outline.addDestination(PdfDestination.makeDestination(new PdfString(name)));
} else {
PdfOutline kid = outline.addOutline(title);
kid.addDestination(PdfDestination.makeDestination(new PdfString(name)));
}
return outline;
}
private static class UpdatePageRenderer extends ParagraphRenderer {
protected AbstractMap.SimpleEntry<String, Integer> entry;
public UpdatePageRenderer(Paragraph modelElement, AbstractMap.SimpleEntry<String, Integer> entry) {
super(modelElement);
this.entry = entry;
}
public UpdatePageRenderer(Paragraph modelElement) {
super(modelElement);
}
#Override
public LayoutResult layout(LayoutContext layoutContext) {
LayoutResult result = super.layout(layoutContext);
//entry.setValue(layoutContext.getArea().getPageNumber());
if (result.getStatus() != LayoutResult.FULL) {
if (null != result.getOverflowRenderer()) {
result.getOverflowRenderer().setProperty(
Property.LEADING,
result.getOverflowRenderer().getModelElement().getDefaultProperty(Property.LEADING));
} else {
// if overflow renderer is null, that could mean that the whole renderer will overflow
setProperty(
Property.LEADING,
result.getOverflowRenderer().getModelElement().getDefaultProperty(Property.LEADING));
}
}
return result;
}
#Override
// If not overriden, the default renderer will be used for the overflown part of the corresponding paragraph
public IRenderer getNextRenderer() {
return new UpdatePageRenderer((Paragraph) this.getModelElement());
}
}
}
Here are the screen shots of TOC part and content part :
TOC :
Content :
What am I missing? Thank you all for your help.
UPDATE
When I add the line below it renders with no overflow but the page margins of TOC and content part differ (the TOC margin is way more than the content margin). See the picture attached please :
document.setMargins(30,60,36,20);
Right Margin difference between TOC and content:
UPDATE 2 :
When I comment the line
document.setMargins(30,60,36,20);
and set the font size on line :
document.add(p.setFont(font).setFontSize(10).setHorizontalAlignment(HorizontalAlignment.CENTER).setTextAlignment(TextAlignment.LEFT));
to 12 then it renders fine. What difference should possibly the font size cause for the page content and margins? Are not there standard page margins and page setups? Am I unknowingly (I am newbie to itextpdf) messing some standard implementations?
TL; DR: either remove setFontSize in
p
.setFont(font)
.setFontSize(12)
//.setKeepWithNext(true)
.setDestination(name)
or change setFontSize(10) -> setFontSize(12) in
document.add(p.setFont(font).setFontSize(10).setHorizontalAlignment(HorizontalAlignment.CENTER).setTextAlignment(TextAlignment.LEFT));
Explanation: You are setting the Document to not immediately flush elements added to that document with the following line:
Document document = new Document(pdf, PageSize.A4,false);
Then you add an paragraph element with font size equal to 10 to the document with the following line:
document.add(p.setFont(font).setFontSize(10).setHorizontalAlignment(HorizontalAlignment.CENTER).setTextAlignment(TextAlignment.LEFT));
What happens is that the element is being laid out (split in lines etc), but now drawn on the page. Then you do .setFontSize(12) and this new font size is applied for draw only, so iText calculated that X characters would fit into one line assuming the font size is 10 while in reality the font size is 12 and obviously fewer characters can fit into one line.
There is no sense in setting the font size two times to different values - just pick one value you want to see in the resultant document and set it once.

Read acrofields with variable page size within document with iText

I am using iText to add and read acrofields. But it runs into issue where page size within document is variable.
So for eg. Pdf document with 3 pages -> letter, legal , letter
Its unable to get all acrofields. But if all pages legal or all pages letter,works perfectly
Here is code which i use to read the acrofields.
String pdf = "D:\\1350211.pdf";
PdfReader reader = new PdfReader( pdf );
AcroFields fields = reader.getAcroFields();
Set<String> fldNames = fields.getFields().keySet();
List<AcrofieldModel> lists = new ArrayList<>();
for (String fldName : fldNames) {
List<FieldPosition> position = fields.getFieldPositions(fldName);
float lowerLeftX = position.get(0).position.getLeft();
float lowerLeftY = position.get(0).position.getBottom();
float upperRightX = position.get(0).position.getRight();
float upperRightY = position.get(0).position.getTop();
float fieldLength = Math.abs(upperRightX-lowerLeftX);
AcrofieldModel acrofieldModel = new AcrofieldModel(fldName, fields.getField( fldName ), "(X:"+lowerLeftX + " , Y:"+lowerLeftY +") ", fieldLength);
lists.add(acrofieldModel);
}
return lists;

PDFBox: do PDDocument and PDPage have references to one another?

Does a PDPage object contains a reference to the PDDocument to which it belongs? In other words, does a PDPage has knowledge of its PDDocument?
Somewhere in the application I have a list of PDDocuments.
These documents get merged into one new PDDocument:
PDFMergerUtility pdfMerger = new PDFMergerUtility();
PDDocument mergedPDDocument = new PDDocument();
for (PDDocument pdfDocument : documentList) {
pdfMerger.appendDocument(mergedPDDocument, pdfDocument);
}
Then this PdDocument gets split into bundles of 10:
Splitter splitter = new Splitter();
splitter.setSplitAtPage(bundleSize);
List<PDDocument> bundleList = splitter.split(mergedDocument);
My question with this is now:
if I loop over the pages of these splitted PDDocuments in the list, is there a way to know to which PDDocument a page originally belonged?
Also, if you have a PDPage object, can you get information from it like, it's pagenumber, ....?
Or can you get this via another way?
Does a PDPage object contains a reference to the PDDocument to which it belongs? In other words, does a PDPage has knowledge of its PDDocument?
Unfortunately the PDPage does not contain a reference to its parent PDDocument, but it has a list of all other pages in the document that can be used to navigate between pages without a reference to the parent PDDocument.
If you have a PDPage object, can you get information from it like its page number, or can you get this via another way?
There is a workaround to get information about the position of a PDPage in the document without the PDDocument available. Each PDPage has a dictionary with information about the size of the page, resources, fonts, content, etc. One of these attributes is called Parent, this is an array of Pages that have all the information needed to create a shallow clone of the PDPage using the constructor PDPage(COSDictionary). The pages are in the correct order so the page number can be obtain by the position of the record in the array.
If I loop over the pages of these splitted PDDocuments in the list, is there a way to know to which PDDocument a page originally belonged?
Once you merge the document list into a single document all references to the original documents will be lost. You can confirm this by looking at the Parent object inside the PDPage, go to Parent > Kids > COSObject[n] > Parent and see if the number for Parent is the same for all the elements in the array. In this example Parent is COSName {Parent} : 1781256139; for all pages.
COSName {Parent} : COSObject {
COSDictionary {
COSName {Kids} : COSArray {
COSObject {
COSDictionary {
COSName {TrimBox} : COSArray {0; 0; 612; 792;};
COSName {MediaBox} : COSArray {0; 0; 612; 792;};
COSName {CropBox} : COSArray {0; 0; 612; 792;};
COSName {Resources} : COSDictionary {
...
};
COSName {Contents} : COSObject {
...
};
COSName {Parent} : 1781256139;
COSName {StructParents} : COSInt {68};
COSName {ArtBox} : COSArray {0; 0; 612; 792; };
COSName {BleedBox} : COSArray {0; 0; 612; 792; };
COSName {Type} : COSName {Page};
}
}
...
COSName {Count} : COSInt {4};
COSName {Type} : COSName {Pages};
}
};
Source code
I wrote the following code to show how the information from the PDPage dictionary can be used to navigate the pages back and forward and get the page number using the position in the array.
public class PDPageUtils {
public static void main(String[] args) throws InvalidPasswordException, IOException {
System.setProperty("sun.java2d.cmm", "sun.java2d.cmm.kcms.KcmsServiceProvider");
PDDocument document = null;
try {
String filename = "src/main/resources/pdf/us-017.pdf";
document = PDDocument.load(new File(filename));
System.out.println("listIterator(PDPage)");
ListIterator<PDPage> pageIterator = listIterator(document.getPage(0));
while (pageIterator.hasNext()) {
PDPage page = pageIterator.next();
System.out.println("page #: " + pageIterator.nextIndex() + ", Structural Parent Key: " + page.getStructParents());
}
} finally {
if (document != null) {
document.close();
}
}
}
/**
* Returns a <code>ListIterator</code> initialized with the list of pages from
* the dictionary embedded in the specified <code>PDPage</code>. The current
* position of this <code>ListIterator</code> is set to the position of the
* specified <code>PDPage</code>.
*
* #param page the specified <code>PDPage</code>
*
* #see {#link java.util.ListIterator}
* #see {#link org.apache.pdfbox.pdmodel.PDPage}
*/
public static ListIterator<PDPage> listIterator(PDPage page) {
List<PDPage> pages = new LinkedList<PDPage>();
COSDictionary pageDictionary = page.getCOSObject();
COSDictionary parentDictionary = pageDictionary.getCOSDictionary(COSName.PARENT);
COSArray kidsArray = parentDictionary.getCOSArray(COSName.KIDS);
List<? extends COSBase> kidList = kidsArray.toList();
for (COSBase kid : kidList) {
if (kid instanceof COSObject) {
COSObject kidObject = (COSObject) kid;
COSBase type = kidObject.getDictionaryObject(COSName.TYPE);
if (type == COSName.PAGE) {
COSBase kidPageBase = kidObject.getObject();
if (kidPageBase instanceof COSDictionary) {
COSDictionary kidPageDictionary = (COSDictionary) kidPageBase;
pages.add(new PDPage(kidPageDictionary));
}
}
}
}
int index = pages.indexOf(page);
return pages.listIterator(index);
}
}
Sample output
In this example the PDF document has 4 pages and the iterator was initialized with the first page. Notice that the page number is the previousIndex()
System.out.println("listIterator(PDPage)");
ListIterator<PDPage> pageIterator = listIterator(document.getPage(0));
while (pageIterator.hasNext()) {
PDPage page = pageIterator.next();
System.out.println("page #: " + pageIterator.previousIndex() + ", Structural Parent Key: " + page.getStructParents());
}
listIterator(PDPage)
page #: 0, Structural Parent Key: 68
page #: 1, Structural Parent Key: 69
page #: 2, Structural Parent Key: 70
page #: 3, Structural Parent Key: 71
You can also navigate backwards by starting from the last page. Notice now that the page number is the nextIndex().
ListIterator<PDPage> pageIterator = listIterator(document.getPage(3));
pageIterator.next();
while (pageIterator.hasPrevious()) {
PDPage page = pageIterator.previous();
System.out.println("page #: " + pageIterator.nextIndex() + ", Structural Parent Key: " + page.getStructParents());
}
listIterator(PDPage)
page #: 3, Structural Parent Key: 71
page #: 2, Structural Parent Key: 70
page #: 1, Structural Parent Key: 69
page #: 0, Structural Parent Key: 68

How set some kinds of font as fontFamily for styles in apache poi?

I want to create a new style for my docx file by Apache-poi and I want to set "IRnazanin" as fontFamily for this style (IRnazanin is a kind of persian fonts). I wrote this code from this link and this one, but every time that I run it, Arial is set for paragraphs with this style (when I open docx file that is created by apache-poi, paragraphs with this style have "Arial (Body CS)" font from Theme font not IRNazanin). what should I do to fix it? And also fontsize is not set.
XWPFDocument docx = new XWPFDocument(OPCPackage.open("8.docx"));
XWPFStyles styles = docx.getStyles();
String heading1 = "My Heading 1";
String heading4 = "My Heading 4";
addCustomHeadingStyle(docx, styles, heading1, 1, 36, "4288BC");
addCustomHeadingStyle(docx, styles, heading4, 4, 20, "000000");
XWPFParagraph paragraph = docx.createParagraph();
paragraph.setStyle(heading4);
XWPFRun run = paragraph.createRun();
run.setText("سلااااام!");
List<XWPFParagraph> xwpfparagraphs = docx.getParagraphs();
System.out.println();
for (int i = 0; i < xwpfparagraphs.size(); i++) {
if (xwpfparagraphs.get(i).getText().equals("اول")) {
xwpfparagraphs.get(i).setStyle(heading1);
System.out.println("!##$%^&*()(*&^%$##!");
}
System.out.println("paragraph style id " + (i + 1) + ":" + xwpfparagraphs.get(i).getStyleID());
if (xwpfparagraphs.get(i).getStyleID() != null) {
String styleid = xwpfparagraphs.get(i).getStyleID();
XWPFStyle style = styles.getStyle(styleid);
if (style != null) {
System.out.println(xwpfparagraphs.get(i).getText());
System.out.println("Style name:" + style.getName());
if (style.getName().startsWith("heading")) {
//this is a heading
System.out.println("###############");
}
}
}
}
docx.write(docxOut);
private static void addCustomHeadingStyle(XWPFDocument docxDocument, XWPFStyles styles, String strStyleId, int headingLevel, int pointSize, String hexColor) {
CTStyle ctStyle = CTStyle.Factory.newInstance();
ctStyle.setStyleId(strStyleId);
CTString styleName = CTString.Factory.newInstance();
styleName.setVal(strStyleId);
ctStyle.setName(styleName);
CTDecimalNumber indentNumber = CTDecimalNumber.Factory.newInstance();
indentNumber.setVal(BigInteger.valueOf(headingLevel));
// lower number > style is more prominent in the formats bar
ctStyle.setUiPriority(indentNumber);
CTOnOff onoffnull = CTOnOff.Factory.newInstance();
ctStyle.setUnhideWhenUsed(onoffnull);
// style shows up in the formats bar
ctStyle.setQFormat(onoffnull);
// style defines a heading of the given level
CTPPr ppr = CTPPr.Factory.newInstance();
ppr.setOutlineLvl(indentNumber);
ctStyle.setPPr(ppr);
XWPFStyle style = new XWPFStyle(ctStyle);
CTHpsMeasure size = CTHpsMeasure.Factory.newInstance();
size.setVal(new BigInteger(String.valueOf(pointSize)));
CTHpsMeasure size2 = CTHpsMeasure.Factory.newInstance();
size2.setVal(new BigInteger("24"));
CTFonts fonts = CTFonts.Factory.newInstance();
fonts.setAscii("IRnazanin");
fonts.setHAnsi("IRnazanin");
CTRPr rpr = CTRPr.Factory.newInstance();
rpr.setRFonts(fonts);
rpr.setSz(size);
rpr.setSzCs(size2);
CTColor color = CTColor.Factory.newInstance();
color.setVal(hexToBytes(hexColor));
rpr.setColor(color);
style.getCTStyle().setRPr(rpr);
// is a null op if already defined
style.setType(STStyleType.PARAGRAPH);
styles.addStyle(style);
}
public static byte[] hexToBytes(String hexString) {
HexBinaryAdapter adapter = new HexBinaryAdapter();
byte[] bytes = adapter.unmarshal(hexString);
return bytes;
}
I get this code from this link and this one.
I found answer:
I replace this code and it works:
CTFonts fonts = CTFonts.Factory.newInstance();
fonts.setAscii("IRnazanin");
fonts.setHAnsi("IRnazanin");
fonts.setCs("IRnazanin");
rpr.setRFonts(fonts);

How to properly print nested HTML lists using iText? [duplicate]

I have XHTML content, and I have to create from this content a PDF file on the fly. I use iText pdf converter.
I tried the simple way, but I always get bad result after calling the XMLWorkerHelper parser.
XHTML:
<ul>
<li>First
<ol>
<li>Second</li>
<li>Second</li>
</ol>
</li>
<li>First</li>
</ul>
The expected value:
First
Second
Second
First
PDF result:
First Second Second
First
In the result there is no nested list. I need a solution for calling the parser, and not creating an iText Document instance.
Please take a look at the example NestedListHtml
In this example, I take your code snippet list.html:
<ul>
<li>First
<ol>
<li>Second</li>
<li>Second</li>
</ol>
</li>
<li>First</li>
</ul>
And I parse it into an ElementList:
// CSS
CSSResolver cssResolver =
XMLWorkerHelper.getInstance().getDefaultCssResolver(true);
// HTML
HtmlPipelineContext htmlContext = new HtmlPipelineContext(null);
htmlContext.setTagFactory(Tags.getHtmlTagProcessorFactory());
htmlContext.autoBookmark(false);
// Pipelines
ElementList elements = new ElementList();
ElementHandlerPipeline end = new ElementHandlerPipeline(elements, null);
HtmlPipeline html = new HtmlPipeline(htmlContext, end);
CssResolverPipeline css = new CssResolverPipeline(cssResolver, html);
// XML Worker
XMLWorker worker = new XMLWorker(css, true);
XMLParser p = new XMLParser(worker);
p.parse(new FileInputStream(HTML));
Now I can add this list to the Document:
for (Element e : elements) {
document.add(e);
}
Or I can list this list to a Paragraph:
Paragraph para = new Paragraph();
for (Element e : elements) {
para.add(e);
}
document.add(para);
You will get the desired result as shown in nested_list.pdf
You can not add nested lists to a PdfPCell or to a ColumnText. For instance: this will not work:
PdfPTable table = new PdfPTable(2);
table.addCell("Nested lists don't work in a cell");
PdfPCell cell = new PdfPCell();
for (Element e : elements) {
cell.addElement(e);
}
table.addCell(cell);
document.add(table);
This is due to a limitation in the ColumnText class that has been there for many years. We have evaluated the problem and the only way to fix this, would be to rewrite ColumnText entirely. This is not an item on our current technical road map.
Here's a workaround for nested ordered and un-ordered lists.
The rich Text editor I am using giving the class attribute "ql-indent-1/2/2/" for li tags, based on the attribute adding ul/ol starting and ending tags.
public String replaceIndentSubList(String htmlContent) {
org.jsoup.nodes.Document document = Jsoup.parseBodyFragment(htmlContent);
Elements element_UL = document.select("ul");
Elements element_OL = document.select("ol");
if (!element_UL.isEmpty()) {
htmlContent = replaceIndents(htmlContent, element_UL, "ul");
}
if (!element_OL.isEmpty()) {
htmlContent = replaceIndents(htmlContent, element_OL, "ol");
}
return htmlContent;
}
public String replaceIndents(String htmlContent, Elements element, String tagType) {
String attributeKey = "class";
String startingULTgas = "<" + tagType + ">";
String endingULTags = "</" + tagType + ">";
int lengthOfQLIndenet = new String("ql-indent-").length();
HashMap<String, String> startingLiTagMap = new HashMap<String, String>();
HashMap<String, String> lastLiTagMap = new HashMap<String, String>();
Pattern regex = Pattern.compile("ql-indent-\\d");
HashSet<String> hash_Set = new HashSet<String>();
Elements element_Tag = element.select("li");
for (org.jsoup.nodes.Element element2 : element_Tag) {
org.jsoup.nodes.Attributes att = element2.attributes();
if (att.hasKey(attributeKey)) {
String attributeValue = att.get(attributeKey);
Matcher matcher = regex.matcher(attributeValue);
if (matcher.find()) {
if (!startingLiTagMap.containsKey(attributeValue)) {
startingLiTagMap.put(attributeValue, element2.toString());
}
hash_Set.add(matcher.group(0));
if (!startingLiTagMap.get(attributeValue)
.equalsIgnoreCase(element2.toString())) {
lastLiTagMap.put(attributeValue, element2.toString());
}
}
}
}
System.out.println(htmlContent);
Iterator value = hash_Set.iterator();
while (value.hasNext()) {
String liAttributeKey = (String) value.next();
int noOfIndentes = Integer
.parseInt(liAttributeKey.substring(lengthOfQLIndenet));
if (noOfIndentes > 1)
for (int i = 1; i < noOfIndentes; i++) {
startingULTgas = startingULTgas + "<" + tagType + ">";
endingULTags = endingULTags + "</" + tagType + ">";
}
htmlContent = htmlContent.replace(startingLiTagMap.get(liAttributeKey),
startingULTgas + startingLiTagMap.get(liAttributeKey));
if (lastLiTagMap.get(liAttributeKey) != null) {
System.out.println("Inside last Li Map");
htmlContent = htmlContent.replace(lastLiTagMap.get(liAttributeKey),
lastLiTagMap.get(liAttributeKey) + endingULTags);
}
else {
htmlContent = htmlContent.replace(startingLiTagMap.get(liAttributeKey),
startingLiTagMap.get(liAttributeKey) + endingULTags);
}
startingULTgas = "<" + tagType + ">";
endingULTags = "</" + tagType + ">";
}
System.out.println(htmlContent);[enter image description here][1]
return htmlContent;
}

Categories