Page size and formatting of PDF using iText pdfHTML

Page size and formatting of PDF using iText pdfHTML - java

I am trying to export 3 HTML pages (all with same content) into a PDF using iText7.1.0 and pdfHTML2.0.0 using this example. For some reason, the pages have formatting issue at the footer. The jsFiddle link to my HTML code that is being used by PDF renderer.
Below is the Java code used for rendering the PDF (Test.html is the same HTML code in the fiddle):
package com.itextpdf.htmlsamples.chapter01;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.utils.PdfMerger;
import com.itextpdf.licensekey.LicenseKey;
/**
* Can we parse different HTML files and combine them into one PDF?
* Yes, this can be done in different ways. This example shows how
* to create a PDF in memory for each HTML, then use PdfMerger to
* merge the different PDFs into one, on a page per page basis.
*/
public class C07E01_CombineHtml {
/** The Base URI of the HTML page. */
public static final String BASEURI = "src/main/resources/html/";
/** An array containing the paths to different HTML files. */
public static final String[] SRC = {
String.format("%sTest.html", BASEURI),
String.format("%sTest.html", BASEURI),
String.format("%sTest.html", BASEURI)
};
/** The target folder for the result. */
public static final String TARGET = "target/results/ch07/";
/** The path to the resulting PDF file. */
public static final String DEST = String.format("%sbundle.pdf", TARGET);
protected PageSize A4;
/**
* The main method of this example.
*
* #param args no arguments are needed to run this example.
* #throws IOException Signals that an I/O exception has occurred.
*/
public static void main(String[] args) throws IOException {
LicenseKey.loadLicenseFile("C://Users//Sparks//Desktop//itextkey-0.xml");
File file = new File(TARGET);
file.mkdirs();
new C07E01_CombineHtml().createPdf(BASEURI, SRC, DEST);
}
/**
* Creates the PDF file.
*
* #param baseUri the base URI
* #param src an array with the paths to different source HTML files
* #param dest the path to the resulting PDF
* #throws IOException Signals that an I/O exception has occurred.
*/
public void createPdf(String baseUri, String[] src, String dest) throws IOException {
ConverterProperties properties = new ConverterProperties();
properties.setBaseUri(baseUri);
PdfWriter writer = new PdfWriter(dest);
PdfDocument pdf = new PdfDocument(writer);
PdfMerger merger = new PdfMerger(pdf);
for (String html : src) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument temp = new PdfDocument(new PdfWriter(baos));
PageSize pageSize = PageSize.A4;
temp.setDefaultPageSize(pageSize);
HtmlConverter.convertToPdf(new FileInputStream(html), temp, properties);
temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
merger.merge(temp, 1, temp.getNumberOfPages());
temp.close();
}
pdf.close();
}
}
The output PDF file has 6 pages without footer. It should have 3 pages each of 'A4' size.
Any suggestions would be helpful.

Changing the PageSize to one that is larger should solve this specific issue.
Afterward you can scale the page down in order to get a PDF with A4 pages.
Take a look at the code sample below to get an idea about how you can do this.
public static void main(String[] args) throws IOException {
ByteArrayOutputStream pdf = createPdf("src/main/resources/SO47869248/html.html");
// To get from A3 to A4 the size has to shrink 71%
new SO47869248().scalePdf(DEST, new ByteArrayInputStream(pdf.toByteArray()), 0.7071f);
}
public static ByteArrayOutputStream createPdf(String htmlSrc) throws IOException {
ByteArrayOutputStream output = new ByteArrayOutputStream();
ConverterProperties converterProperties = new ConverterProperties();
converterProperties.setBaseUri(new File(htmlSrc).getParent());
PdfWriter writer = new PdfWriter(output);
PdfDocument pdfDocument = new PdfDocument(writer);
PdfMerger merger = new PdfMerger(pdfDocument);
for(int x=0; x < 3; x++){
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument temp = new PdfDocument(new PdfWriter(baos));
temp.setDefaultPageSize(PageSize.A3);
HtmlConverter.convertToPdf(new FileInputStream(htmlSrc), temp, converterProperties);
temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
merger.merge(temp, 1, temp.getNumberOfPages());
temp.close();
}
pdfDocument.close();
return output;
}
public void scalePdf(String dest, ByteArrayInputStream input, float scale) throws IOException {
// Create the source document
PdfDocument srcDoc = new PdfDocument(new PdfReader(input));
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(dest));
ScaleDownEventHandler eventHandler = new ScaleDownEventHandler(scale);
int n = srcDoc.getNumberOfPages();
pdfDoc.addEventHandler(PdfDocumentEvent.START_PAGE, eventHandler);
PdfCanvas canvas;
PdfFormXObject page;
for (int p = 1; p <= n; p++) {
eventHandler.setPageDict(srcDoc.getPage(p).getPdfObject());
canvas = new PdfCanvas(pdfDoc.addNewPage());
page = srcDoc.getPage(p).copyAsFormXObject(pdfDoc);
canvas.addXObject(page, scale, 0f, 0f, scale, 0f, 0f);
}
pdfDoc.close();
srcDoc.close();
}
protected class ScaleDownEventHandler implements IEventHandler {
protected float scale = 1;
protected PdfDictionary pageDict;
public ScaleDownEventHandler(float scale) {
this.scale = scale;
}
public void setPageDict(PdfDictionary pageDict) {
this.pageDict = pageDict;
}
#Override
public void handleEvent(Event event) {
PdfDocumentEvent docEvent = (PdfDocumentEvent) event;
PdfPage page = docEvent.getPage();
page.put(PdfName.Rotate, pageDict.getAsNumber(PdfName.Rotate));
scaleDown(page, pageDict, PdfName.MediaBox, scale);
scaleDown(page, pageDict, PdfName.CropBox, scale);
}
protected void scaleDown(PdfPage destPage, PdfDictionary pageDictSrc, PdfName box, float scale) {
PdfArray original = pageDictSrc.getAsArray(box);
if (original != null) {
float width = original.getAsNumber(2).floatValue() - original.getAsNumber(0).floatValue();
float height = original.getAsNumber(3).floatValue() - original.getAsNumber(1).floatValue();
PdfArray result = new PdfArray();
result.add(new PdfNumber(0));
result.add(new PdfNumber(0));
result.add(new PdfNumber(width * scale));
result.add(new PdfNumber(height * scale));
destPage.put(box, result);
}
}
}
For this example I picked the A3 pagesize constant. You can also create a PageSize object using specific measurements. As shown below:
Constructor:
public PageSize(float width, float height)
Example:
PageSize pageSize = new PageSize(750, 1000);
PdfDocument temp = new PdfDocument(pageSize);

try this.
style="page-break-after: always; width: 320pt;" in

Related

Adding UNICODE emoticon on pdf with itext

I have some problem adding unicode emoticon on pdf created with itext pdf. I tried with this and itextpdf core 5.5.13
public class MathSymbols {
public static final String DEST = "EXAMPLE.pdf";
public static final String FONT = "/res/fonts/arialuni.ttf";
public static String TEXT ;
public static void main(String[] args) throws IOException, DocumentException {
File file = new File(DEST);
TEXT = "this "+Character.toChars(0x1F600)+" string \uD83D\uDE00 contains \ud83d\ude00 special \u2609 characters like this \u2208, \u2229, \u2211, \u222b, \u2206";
new MathSymbols().createPdf(DEST);
}
public void createPdf(String dest) throws IOException, DocumentException {
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream(dest));
document.open();
BaseFont bf = BaseFont.createFont(FONT, BaseFont.IDENTITY_H, BaseFont.NOT_EMBEDDED);
Font f = new Font(bf,12);
Paragraph p = new Paragraph(TEXT, f);
document.add(p);
document.close();
}}
I have the same proble on itextpdf 7.x with this snippet
public class Main {
public static final String DEST = "example.pdf";
public static void main(String args[]) throws IOException {
File file = new File(DEST);
new Main().createPdf(DEST);
}
public void createPdf(String dest) throws IOException {
PdfWriter writer = new PdfWriter(dest);
PdfDocument pdf = new PdfDocument(writer);
Document document = new Document(pdf);
PdfFont f = PdfFontFactory.createFont("/resources/arialuni.ttf", PdfEncodings.IDENTITY_H,true);
Paragraph p = new Paragraph("H\u2082SO\u2074 1 \uD83D\uDE00 contains \ud83d\ude00 spe \u2702 cial \u2609 characters like this \u2208, \u2229, \u2211, \u222b, \u2206").setFont(f).setFontSize(10);
document.add(p);
document.close();
}}
I tried with different fonts and different way in java like here.
But I only obtaing a withe space, or a square, in the pdf and no emoticon

Java - Merge multiple images to a single PDF using PDFBox

I was able to merge multiple PDF files into a single PDF using the code below -
public void mergePDF() {
File file1 = new File("inputPDF/001.pdf");
File file2 = new File("inputPDF/002.pdf");
File file3 = new File("inputPDF/003.pdf");
File file4 = new File("inputPDF/004.pdf");
try {
PDDocument doc1 = PDDocument.load(file1);
PDDocument doc2 = PDDocument.load(file2);
PDDocument doc3 = PDDocument.load(file3);
PDDocument doc4 = PDDocument.load(file4);
PDFMergerUtility PDFmerger = new PDFMergerUtility();
PDFmerger.setDestinationFileName("outputImages/merged.pdf");
System.out.println("Destination path set to "+PDFmerger.getDestinationFileName());
PDFmerger.addSource(file1);
PDFmerger.addSource(file2);
PDFmerger.addSource(file3);
PDFmerger.addSource(file4);
//Merging the documents
PDFmerger.mergeDocuments();
doc1.close();
doc2.close();
doc3.close();
doc4.close();
System.out.println("Done!");
} catch (IOException e) {
e.printStackTrace();
}
}
However, my requirement is to merge multiple images (JPG, PNG) to a single PDF as well.
Is it possible to merge multiple images to a single PDF using PDFBox?

Since I struggled with this task, here's my code. The merged document is PDF/A-1b compliant
import com.google.common.io.Resources;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Calendar;
import java.util.List;
import javax.xml.transform.TransformerException;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class PDFMerger {
private static final Logger LOG = LoggerFactory.getLogger(PDFMerger3.class);
private static final String OUTPUT_CONDITION_IDENTIFIER = "sRGB IEC61966-2.1";
public static final String DOCUMENT_CREATOR = "Mr. Meeseeks";
public static final String DOCUMENT_SUBJECT = "Great subject";
public static final String DOCUMENT_TITLE = "Here goes your title";
/**
* Creates a compound PDF document from a list of input documents.
* <p>
* The merged document is PDF/A-1b compliant
*
* #param sources list of source PDF document streams.
* #return compound PDF document as a readable input stream.
* #throws IOException if anything goes wrong during PDF merge.
*/
public static ByteArrayOutputStream mergeFiles(final List<InputStream> sources) throws IOException {
Path mergeDirectory = Files.createTempDirectory("merge-" + System.currentTimeMillis());
try (ByteArrayOutputStream mergedPDFOutputStream = new ByteArrayOutputStream()) {
LOG.debug("Merging {} source documents into one PDF", sources.size());
PDFMergerUtility mixedPdfMerger = createMixedPdfMerger(sources, mergedPDFOutputStream, mergeDirectory);
mergeFileStreams(mergedPDFOutputStream, mixedPdfMerger);
return mergedPDFOutputStream;
} catch (Exception e) {
if (!(e instanceof IOException)) {
throw new IOException("PDF merge problem", e);
}
throw (IOException) e;
} finally {
FileUtils.deleteDirectory(mergeDirectory.toFile());
sources.forEach(IOUtils::closeQuietly);
}
}
private static void mergeFileStreams(ByteArrayOutputStream mergedPDFOutputStream, PDFMergerUtility pdfMerger)
throws IOException, BadFieldValueException, TransformerException {
LOG.debug("Initialising PDF merge utility");
try (COSStream cosStream = new COSStream()) {
// PDF and XMP properties must be identical, otherwise document is not PDF/A compliant
pdfMerger.setDestinationDocumentInformation(createPDFDocumentInfo());
pdfMerger.setDestinationMetadata(createXMPMetadata(cosStream));
pdfMerger.mergeDocuments(MemoryUsageSetting.setupTempFileOnly());
LOG.debug("PDF merge successful, size = {} bytes", mergedPDFOutputStream.size());
}
}
#SuppressWarnings("UnstableApiUsage")
private static PDFMergerUtility createMixedPdfMerger(List<InputStream> sources, ByteArrayOutputStream mergedPDFOutputStream, Path mergeDirectory) throws IOException {
PDFMergerUtility pdfMerger = new PDFMergerUtility();
byte[] colorProfile = org.apache.commons.io.IOUtils.toByteArray(Resources.getResource("sRGB.icc"));
for (InputStream source : sources) {
File file = streamToFile(mergeDirectory, source);
if (isPdf(file)) {
pdfMerger.addSource(file);
} else {
pdfMerger.addSource(imageToPDDocument(mergeDirectory, file, colorProfile));
}
}
pdfMerger.setDestinationStream(mergedPDFOutputStream);
return pdfMerger;
}
private static PDDocumentInformation createPDFDocumentInfo() {
LOG.debug("Setting document info (title, author, subject) for merged PDF");
PDDocumentInformation documentInformation = new PDDocumentInformation();
documentInformation.setTitle(DOCUMENT_TITLE);
documentInformation.setCreator(DOCUMENT_CREATOR);
documentInformation.setSubject(DOCUMENT_SUBJECT);
return documentInformation;
}
private static PDMetadata createXMPMetadata(COSStream cosStream)
throws BadFieldValueException, TransformerException, IOException {
LOG.debug("Setting XMP metadata (title, author, subject) for merged PDF");
XMPMetadata xmpMetadata = XMPMetadata.createXMPMetadata();
// PDF/A-1b properties
PDFAIdentificationSchema pdfaSchema = xmpMetadata.createAndAddPFAIdentificationSchema();
pdfaSchema.setPart(1);
pdfaSchema.setConformance("B");
pdfaSchema.setAboutAsSimple("");
// Dublin Core properties
DublinCoreSchema dublinCoreSchema = xmpMetadata.createAndAddDublinCoreSchema();
dublinCoreSchema.setTitle(DOCUMENT_TITLE);
dublinCoreSchema.addCreator(DOCUMENT_CREATOR);
dublinCoreSchema.setDescription(DOCUMENT_SUBJECT);
// XMP Basic properties
XMPBasicSchema basicSchema = xmpMetadata.createAndAddXMPBasicSchema();
Calendar creationDate = Calendar.getInstance();
basicSchema.setCreateDate(creationDate);
basicSchema.setModifyDate(creationDate);
basicSchema.setMetadataDate(creationDate);
basicSchema.setCreatorTool(DOCUMENT_CREATOR);
// Create and return XMP data structure in XML format
try (ByteArrayOutputStream xmpOutputStream = new ByteArrayOutputStream();
OutputStream cosXMPStream = cosStream.createOutputStream()) {
new XmpSerializer().serialize(xmpMetadata, xmpOutputStream, true);
cosXMPStream.write(xmpOutputStream.toByteArray());
return new PDMetadata(cosStream);
}
}
private static File imageToPDDocument(Path mergeDirectory, File file, byte[] colorProfile) throws IOException {
try (PDDocument doc = new PDDocument()) {
PDImageXObject pdImage = PDImageXObject.createFromFileByContent(file, doc);
drawPage(doc, pdImage);
doc.getDocumentCatalog().addOutputIntent(createColorScheme(doc, colorProfile));
File pdfFile = Files.createTempFile(mergeDirectory, String.valueOf(System.currentTimeMillis()), ".tmp").toFile();
doc.save(pdfFile);
return pdfFile;
}
}
private static void drawPage(PDDocument doc, PDImageXObject pdImage) throws IOException {
PDPage page;
pdImage.getCOSObject().setItem(COSName.SMASK, COSName.NONE);
boolean isLandscapeMode = pdImage.getWidth() > pdImage.getHeight();
if (isLandscapeMode) {
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getHeight(), PDRectangle.A4.getHeight() / pdImage.getWidth()), 1);
float width = pdImage.getWidth() * scale;
float height = pdImage.getHeight() * scale;
// center the image
float startWidth = (PDRectangle.A4.getHeight() - width) / 2;
float startHeight = (PDRectangle.A4.getWidth() - height) / 2;
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
}
} else {
page = new PDPage(PDRectangle.A4);
float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getWidth(), PDRectangle.A4.getHeight() / pdImage.getHeight()), 1);
float width = pdImage.getWidth() * scale;
float height = pdImage.getHeight() * scale;
// try to center the image
float startWidth = (PDRectangle.A4.getWidth() - width) / 2;
float startHeight = (PDRectangle.A4.getHeight() - height) / 2;
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
}
}
doc.addPage(page);
}
private static PDOutputIntent createColorScheme(PDDocument doc, byte[] colorProfile) throws IOException {
PDOutputIntent intent = new PDOutputIntent(doc, new ByteArrayInputStream(colorProfile));
intent.setInfo(OUTPUT_CONDITION_IDENTIFIER);
intent.setOutputCondition(OUTPUT_CONDITION_IDENTIFIER);
intent.setOutputConditionIdentifier(OUTPUT_CONDITION_IDENTIFIER);
intent.setRegistryName("http://www.color.org");
return intent;
}
private static boolean isPdf(File file) {
try {
PreflightParser preflightParser = new PreflightParser(file);
preflightParser.parse();
return true;
} catch (Exception e) {
return false;
}
}
private static File streamToFile(Path tempDirectory, InputStream in) throws IOException {
final Path tempFile = Files.createTempFile(tempDirectory, String.valueOf(System.currentTimeMillis()), ".tmp");
try (FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
IOUtils.copy(in, out);
}
return tempFile.toFile();
}
}
You can take a look at this gist for an option to merge pdf files as well.

You need to convert the images to a PDF first. See How can I convert a PNG file to PDF using java? or Create PDF from a PNG image Or Java Panel for an example on how to do this.
After that, use pdfbox to merge the resulting pdfs.

I have used itext library for merging images and convert them to pdf
Here is the code
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream(image_path+"\\"+image_name+".pdf"));
document.open();
Paragraph p = new Paragraph();
File files[] = new File(path).listFiles();
PdfPTable table = new PdfPTable(1);
for (File file : files) {
table.setWidthPercentage(100);
table.addCell(createImageCell(file.getAbsolutePath()));
}
document.add(table);
document.close();
Hope It helps

Read multiple PDF files inside a folder using iText [closed]

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 8 years ago.
Improve this question
I am getting problem to read PDF files using iText in java. I know the way to read a page in a PDF file. Now, I want to read multiple PDF files from a folder.
How can I achieve this task?

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import part1.chapter01.HelloWorld;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.io.RandomAccessSourceFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PRTokeniser;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.itextpdf.text.pdf.parser.ContentByteUtils;
import com.itextpdf.text.pdf.parser.PdfContentStreamProcessor;
import com.itextpdf.text.pdf.parser.RenderListener;
public class ParsingHelloWorld {
/** The resulting PDF. */
public static final String PDF = "results/part4/chapter15/hello_reverse.pdf";
/** A possible resulting after parsing the PDF. */
public static final String TEXT1 = "results/part4/chapter15/result1.txt";
/** A possible resulting after parsing the PDF. */
public static final String TEXT2 = "results/part4/chapter15/result2.txt";
/** A possible resulting after parsing the PDF. */
public static final String TEXT3 = "results/part4/chapter15/result3.txt";
/**
* Generates a PDF file with the text 'Hello World'
* #throws DocumentException
* #throws IOException
*/
public void createPdf(String filename) throws DocumentException, IOException {
// step 1
Document document = new Document();
// step 2
PdfWriter writer
= PdfWriter.getInstance(document, new FileOutputStream(filename));
// step 3
document.open();
// step 4
// we add the text to the direct content, but not in the right order
PdfContentByte cb = writer.getDirectContent();
BaseFont bf = BaseFont.createFont();
cb.beginText();
cb.setFontAndSize(bf, 12);
cb.moveText(88.66f, 367);
cb.showText("ld");
cb.moveText(-22f, 0);
cb.showText("Wor");
cb.moveText(-15.33f, 0);
cb.showText("llo");
cb.moveText(-15.33f, 0);
cb.showText("He");
cb.endText();
// we also add text in a form XObject
PdfTemplate tmp = cb.createTemplate(250, 25);
tmp.beginText();
tmp.setFontAndSize(bf, 12);
tmp.moveText(0, 7);
tmp.showText("Hello People");
tmp.endText();
cb.addTemplate(tmp, 36, 343);
// step 5
document.close();
}
/**
* Parses the PDF using PRTokeniser
* #param src the path to the original PDF file
* #param dest the path to the resulting text file
* #throws IOException
*/
public void parsePdf(String src, String dest) throws IOException {
PdfReader reader = new PdfReader(src);
// we can inspect the syntax of the imported page
byte[] streamBytes = reader.getPageContent(1);
PRTokeniser tokenizer = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(streamBytes)));
PrintWriter out = new PrintWriter(new FileOutputStream(dest));
while (tokenizer.nextToken()) {
if (tokenizer.getTokenType() == PRTokeniser.TokenType.STRING) {
out.println(tokenizer.getStringValue());
}
}
out.flush();
out.close();
reader.close();
}
/**
* Extracts text from a PDF document.
* #param src the original PDF document
* #param dest the resulting text file
* #throws IOException
*/
public void extractText(String src, String dest) throws IOException {
PrintWriter out = new PrintWriter(new FileOutputStream(dest));
PdfReader reader = new PdfReader(src);
RenderListener listener = new MyTextRenderListener(out);
PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener);
PdfDictionary pageDic = reader.getPageN(1);
PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
processor.processContent(ContentByteUtils.getContentBytesForPage(reader, 1), resourcesDic);
out.flush();
out.close();
reader.close();
}
/**
* Main method.
* #param args no arguments needed
* #throws DocumentException
* #throws IOException
*/
public static void main(String[] args) throws DocumentException, IOException {
ParsingHelloWorld example = new ParsingHelloWorld();
HelloWorld.main(args);
example.createPdf(PDF);
example.parsePdf(HelloWorld.RESULT, TEXT1);
example.parsePdf(PDF, TEXT2);
example.extractText(PDF, TEXT3);
}
}
Taken from the iTextPDF site. This shows you how you can read a single PDF. If you want to read multiple PDFs from a folder, you need a DirectoryStream<Path>.
DirectoryStream<Path> allPDF = Files.newDirectoryStream(Paths.get("/path/to/folder"),"*.pdf");
Now, the DirectoryStream contains only those Path objects that correspond to the PDF files in the folder.
Iterate over the DirectoryStream , get the absolute path of the Path objects and then do whatever you want.

I want to add a Line into a PDF document using java

I am currently using PDFBox and reading from within a.pdf which is found in folder 1
I first list all the Pdf files found within the folder.
Then I check the number of pages that each file has.
Now i want to go to the very end of the file below the footer to add an image that can be recognised by the printer to staple the pages since it will realise it has reached end of file.
I have arrived till getting list of files and the number of pages.
What command do i use to go to the end of the last page and write there.
Should i transform the .pdf file into text or
Should i be able to use PDPageContentStream
This is the code I am currently using I am trying to test and see if a AAA string will be insterted into my last page of the pdf file. the project is executing with no errors but for some reason it is not being inserted into the pdf.
package pdfviewer;
import java.io.*;
import java.util.*;
import java.util.List;
import java.io.IOException;
import org.apache.pdfbox.PDFReader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
public class Main {
/**
* #param args the command line arguments
*/
public static List flist()
{
List listfile = new ArrayList();
String path = "C:/1";
String files;
File folder = new File(path);
File[] listOfFiles = folder.listFiles();
for (int i = 0; i < listOfFiles.length; i++)
{
if (listOfFiles[i].isFile())
{
files = listOfFiles[i].getName();
if (files.endsWith(".pdf") || files.endsWith(".PDF"))
{
listfile.add(listOfFiles[i]);
}
}
}
System.out.println(listfile);
return listfile;
}
public static void CheckPages(List a)
{
String dir = null;
Object[] arraydir = a.toArray(new Object[0]);
for (int i=0; i< arraydir.length; i++)
{
int pages = 0;
PDFont font = PDType1Font.HELVETICA_BOLD;
float fontSize = 12.0f;
dir = arraydir[i].toString();
System.out.println(dir);
try {
PDDocument pdoc = PDDocument.load(dir);
List allPages = pdoc.getDocumentCatalog().getAllPages();
pages = pdoc.getNumberOfPages();
System.out.println(allPages);
int f = pages;
System.out.println(pages);
PDPage page = (PDPage) allPages.get(i);
//System.out.println(page);
PDRectangle pageSize = page.findMediaBox();
float stringWidth = font.getStringWidth( "AAA" );
float centeredPosition = (pageSize.getWidth() - (stringWidth*fontSize)/1000f)/2f;
PDPageContentStream contentStream = new PDPageContentStream(pdoc,page,true,true);
//System.out.println(contentStream);
contentStream.beginText();
contentStream.setFont( font, fontSize );
contentStream.moveTextPositionByAmount( centeredPosition, 30 );
contentStream.drawString( "AAA" );
contentStream.endText();
contentStream.close();
pdoc.close();
}
catch (Exception e)
{
System.err.println("An exception occured in parsing the PDF Document."+ e.getMessage());
}
}
}
public static void main(String[] args)
{
List l = new ArrayList();
l = pdfviewer.Main.flist();
pdfviewer.Main.CheckPages(l);
}
}
Thanks for your attention
The code I was using above is correct.
The problem is that the PDF files being generated are version 1.2, that is the reason why I am not being allowed to Edit the pdf document.
Does anyone know what I should do if i'm using a version 1.2, since I can't really upgrade it.

you can look at the examples supplied with the library.
there are two files that are of interest to you:
1- AddImageToPDF.java AddImageToPDF.java on google code search
2- AddMessageToEachPage.java AddMessageToEachPage.java on google code search
the second one adds a message to every page but you can modify it to work with the last page only. according to the PDFBox user guide document, they should be found under the folder: src/main/java/org/apache/pdfbox/examples
I have added links on google code search in case you have trouble locating the files.
I have not worked with the library or tried the examples and I am quite sure you will need to modify the code a little to suit your needs for the location of the added line/image.
In any case, if this helps you and you get a working solution, you can add the solution so that others can benefit from it.
EDIT:
After seeing the code posted by the question author, I add a modification to make it work.
I allowed myself also to make few changes for clarity.
import java.io.File;
import java.io.FileFilter;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.edit.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
public class Main {
/**
* #param args the command line arguments
*/
public static final FileFilter pdfFileFilter = new FileFilter() {
public boolean accept(File file) {
return file.isFile() && file.getName().toLowerCase().endsWith(".pdf");
}
};
public static void closeQuietly(PDDocument doc) {
if (doc != null) {
try {
doc.close();
} catch (Exception exception) {
//do something here if you wish like logging
}
}
}
public static void CheckPages(File[] sourcePdfFiles,String textToInsert, String prefix) {
for (File sourcePdfFile : sourcePdfFiles) {
PDFont font = PDType1Font.HELVETICA_BOLD;
float fontSize = 12.0f;
PDDocument pdoc = null;
try {
pdoc = PDDocument.load(sourcePdfFile);
List allPages = pdoc.getDocumentCatalog().getAllPages();
PDPage lastPage = (PDPage) allPages.get(allPages.size() - 1);
PDRectangle pageSize = lastPage.findMediaBox();
float stringWidth = font.getStringWidth(textToInsert);
float centeredPosition = (pageSize.getWidth() - (stringWidth * fontSize) / 1000f) / 2f;
PDPageContentStream contentStream = new PDPageContentStream(pdoc, lastPage, true, true);
contentStream.beginText();
contentStream.setFont(font, fontSize);
contentStream.moveTextPositionByAmount(centeredPosition, 30);
contentStream.drawString(textToInsert);
contentStream.endText();
contentStream.close();
File resultFile = new File(sourcePdfFile.getParentFile(), prefix + sourcePdfFile.getName());
pdoc.save(resultFile.getAbsolutePath());
} catch (Exception e) {
System.err.println("An exception occured in parsing the PDF Document." + e.getMessage());
} finally {
closeQuietly(pdoc);
}
}
}
public static void main(String[] args) {
File pdfFilesFolder = new File("C:\\1");
File[] pdfFiles = pdfFilesFolder.listFiles(pdfFileFilter);
//when a file is processed, the result will be saved in a new file having the location of the source file
//and the same name of source file prefixed with this
String modifiedFilePrefix = "modified-";
CheckPages(pdfFiles,"AAA", modifiedFilePrefix);
}
}

Rotate single page of document

How do I rotate the second page of my PdF when I'm using iText.
The first and other pages I would like to stay in the same orientation.
I know of ...
Document document = new Document(PageSize.A4.rotate(), 50, 50, 50, 50);
But that will rotate everything.

You can use document.setPageSize() before document.addNew();.
For example:
Document document = new Document();
....
document.setPageSize(PageSie.A4);
document.newPage();
......
document.setPageSize(PageSize.A4.rotate());
document.newPage();
It worked well for me.

From http://itextpdf.com/examples/iia.php?id=232 :
/*
* This class is part of the book "iText in Action - 2nd Edition"
* written by Bruno Lowagie (ISBN: 9781935182610)
* For more info, go to: http://itextpdf.com/examples/
* This example only works with the AGPL version of iText.
*/
package part4.chapter13;
import java.io.FileOutputStream;
import java.io.IOException;
import part1.chapter03.MovieTemplates;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
public class RotatePages {
/** The resulting PDF. */
public static final String RESULT
= "results/part4/chapter13/timetable_rotated.pdf";
/**
* Manipulates a PDF file src with the file dest as result
* #param src the original PDF
* #param dest the resulting PDF
* #throws IOException
* #throws DocumentException
*/
public void manipulatePdf(String src, String dest)
throws IOException, DocumentException {
PdfReader reader = new PdfReader(MovieTemplates.RESULT);
int n = reader.getNumberOfPages();
int rot;
PdfDictionary pageDict;
for (int i = 1; i <= n; i++) {
rot = reader.getPageRotation(i);
pageDict = reader.getPageN(i);
pageDict.put(PdfName.ROTATE, new PdfNumber(rot + 90));
}
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(RESULT));
stamper.close();
reader.close();
}
/**
* Main method creating the PDF.
* #param args no arguments needed
* #throws DocumentException
* #throws IOException
*/
public static void main(String[] args)
throws IOException, DocumentException {
new MovieTemplates().createPdf(MovieTemplates.RESULT);
new RotatePages().manipulatePdf(MovieTemplates.RESULT, RESULT);
}
}

I rotate the orientation:
PdfWriter writer = new PdfWriter(out);
PdfDocument pdf = new PdfDocument(writer);
Document document = new Document(pdf, PageSize.LETTER.rotate());

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Page size and formatting of PDF using iText pdfHTML - java

try this. style="page-break-after: always; width: 320pt;" in

Related

Adding UNICODE emoticon on pdf with itext

Java - Merge multiple images to a single PDF using PDFBox

Read multiple PDF files inside a folder using iText [closed]

I want to add a Line into a PDF document using java

Rotate single page of document

Categories

Resources