Read multiple PDF files inside a folder using iText [closed]

Read multiple PDF files inside a folder using iText [closed] - java

Closed. This question needs to be more focused. It is not currently accepting answers.
Want to improve this question? Update the question so it focuses on one problem only by editing this post.
Closed 8 years ago.
Improve this question
I am getting problem to read PDF files using iText in java. I know the way to read a page in a PDF file. Now, I want to read multiple PDF files from a folder.
How can I achieve this task?

import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import part1.chapter01.HelloWorld;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.io.RandomAccessSourceFactory;
import com.itextpdf.text.pdf.BaseFont;
import com.itextpdf.text.pdf.PRTokeniser;
import com.itextpdf.text.pdf.PdfContentByte;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfTemplate;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
import com.itextpdf.text.pdf.parser.ContentByteUtils;
import com.itextpdf.text.pdf.parser.PdfContentStreamProcessor;
import com.itextpdf.text.pdf.parser.RenderListener;
public class ParsingHelloWorld {
/** The resulting PDF. */
public static final String PDF = "results/part4/chapter15/hello_reverse.pdf";
/** A possible resulting after parsing the PDF. */
public static final String TEXT1 = "results/part4/chapter15/result1.txt";
/** A possible resulting after parsing the PDF. */
public static final String TEXT2 = "results/part4/chapter15/result2.txt";
/** A possible resulting after parsing the PDF. */
public static final String TEXT3 = "results/part4/chapter15/result3.txt";
/**
* Generates a PDF file with the text 'Hello World'
* #throws DocumentException
* #throws IOException
*/
public void createPdf(String filename) throws DocumentException, IOException {
// step 1
Document document = new Document();
// step 2
PdfWriter writer
= PdfWriter.getInstance(document, new FileOutputStream(filename));
// step 3
document.open();
// step 4
// we add the text to the direct content, but not in the right order
PdfContentByte cb = writer.getDirectContent();
BaseFont bf = BaseFont.createFont();
cb.beginText();
cb.setFontAndSize(bf, 12);
cb.moveText(88.66f, 367);
cb.showText("ld");
cb.moveText(-22f, 0);
cb.showText("Wor");
cb.moveText(-15.33f, 0);
cb.showText("llo");
cb.moveText(-15.33f, 0);
cb.showText("He");
cb.endText();
// we also add text in a form XObject
PdfTemplate tmp = cb.createTemplate(250, 25);
tmp.beginText();
tmp.setFontAndSize(bf, 12);
tmp.moveText(0, 7);
tmp.showText("Hello People");
tmp.endText();
cb.addTemplate(tmp, 36, 343);
// step 5
document.close();
}
/**
* Parses the PDF using PRTokeniser
* #param src the path to the original PDF file
* #param dest the path to the resulting text file
* #throws IOException
*/
public void parsePdf(String src, String dest) throws IOException {
PdfReader reader = new PdfReader(src);
// we can inspect the syntax of the imported page
byte[] streamBytes = reader.getPageContent(1);
PRTokeniser tokenizer = new PRTokeniser(new RandomAccessFileOrArray(new RandomAccessSourceFactory().createSource(streamBytes)));
PrintWriter out = new PrintWriter(new FileOutputStream(dest));
while (tokenizer.nextToken()) {
if (tokenizer.getTokenType() == PRTokeniser.TokenType.STRING) {
out.println(tokenizer.getStringValue());
}
}
out.flush();
out.close();
reader.close();
}
/**
* Extracts text from a PDF document.
* #param src the original PDF document
* #param dest the resulting text file
* #throws IOException
*/
public void extractText(String src, String dest) throws IOException {
PrintWriter out = new PrintWriter(new FileOutputStream(dest));
PdfReader reader = new PdfReader(src);
RenderListener listener = new MyTextRenderListener(out);
PdfContentStreamProcessor processor = new PdfContentStreamProcessor(listener);
PdfDictionary pageDic = reader.getPageN(1);
PdfDictionary resourcesDic = pageDic.getAsDict(PdfName.RESOURCES);
processor.processContent(ContentByteUtils.getContentBytesForPage(reader, 1), resourcesDic);
out.flush();
out.close();
reader.close();
}
/**
* Main method.
* #param args no arguments needed
* #throws DocumentException
* #throws IOException
*/
public static void main(String[] args) throws DocumentException, IOException {
ParsingHelloWorld example = new ParsingHelloWorld();
HelloWorld.main(args);
example.createPdf(PDF);
example.parsePdf(HelloWorld.RESULT, TEXT1);
example.parsePdf(PDF, TEXT2);
example.extractText(PDF, TEXT3);
}
}
Taken from the iTextPDF site. This shows you how you can read a single PDF. If you want to read multiple PDFs from a folder, you need a DirectoryStream<Path>.
DirectoryStream<Path> allPDF = Files.newDirectoryStream(Paths.get("/path/to/folder"),"*.pdf");
Now, the DirectoryStream contains only those Path objects that correspond to the PDF files in the folder.
Iterate over the DirectoryStream , get the absolute path of the Path objects and then do whatever you want.

Related

Page size and formatting of PDF using iText pdfHTML

I am trying to export 3 HTML pages (all with same content) into a PDF using iText7.1.0 and pdfHTML2.0.0 using this example. For some reason, the pages have formatting issue at the footer. The jsFiddle link to my HTML code that is being used by PDF renderer.
Below is the Java code used for rendering the PDF (Test.html is the same HTML code in the fiddle):
package com.itextpdf.htmlsamples.chapter01;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import com.itextpdf.html2pdf.ConverterProperties;
import com.itextpdf.html2pdf.HtmlConverter;
import com.itextpdf.kernel.pdf.PdfDocument;
import com.itextpdf.kernel.pdf.PdfReader;
import com.itextpdf.kernel.pdf.PdfWriter;
import com.itextpdf.kernel.geom.PageSize;
import com.itextpdf.kernel.utils.PdfMerger;
import com.itextpdf.licensekey.LicenseKey;
/**
* Can we parse different HTML files and combine them into one PDF?
* Yes, this can be done in different ways. This example shows how
* to create a PDF in memory for each HTML, then use PdfMerger to
* merge the different PDFs into one, on a page per page basis.
*/
public class C07E01_CombineHtml {
/** The Base URI of the HTML page. */
public static final String BASEURI = "src/main/resources/html/";
/** An array containing the paths to different HTML files. */
public static final String[] SRC = {
String.format("%sTest.html", BASEURI),
String.format("%sTest.html", BASEURI),
String.format("%sTest.html", BASEURI)
};
/** The target folder for the result. */
public static final String TARGET = "target/results/ch07/";
/** The path to the resulting PDF file. */
public static final String DEST = String.format("%sbundle.pdf", TARGET);
protected PageSize A4;
/**
* The main method of this example.
*
* #param args no arguments are needed to run this example.
* #throws IOException Signals that an I/O exception has occurred.
*/
public static void main(String[] args) throws IOException {
LicenseKey.loadLicenseFile("C://Users//Sparks//Desktop//itextkey-0.xml");
File file = new File(TARGET);
file.mkdirs();
new C07E01_CombineHtml().createPdf(BASEURI, SRC, DEST);
}
/**
* Creates the PDF file.
*
* #param baseUri the base URI
* #param src an array with the paths to different source HTML files
* #param dest the path to the resulting PDF
* #throws IOException Signals that an I/O exception has occurred.
*/
public void createPdf(String baseUri, String[] src, String dest) throws IOException {
ConverterProperties properties = new ConverterProperties();
properties.setBaseUri(baseUri);
PdfWriter writer = new PdfWriter(dest);
PdfDocument pdf = new PdfDocument(writer);
PdfMerger merger = new PdfMerger(pdf);
for (String html : src) {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument temp = new PdfDocument(new PdfWriter(baos));
PageSize pageSize = PageSize.A4;
temp.setDefaultPageSize(pageSize);
HtmlConverter.convertToPdf(new FileInputStream(html), temp, properties);
temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
merger.merge(temp, 1, temp.getNumberOfPages());
temp.close();
}
pdf.close();
}
}
The output PDF file has 6 pages without footer. It should have 3 pages each of 'A4' size.
Any suggestions would be helpful.

Changing the PageSize to one that is larger should solve this specific issue.
Afterward you can scale the page down in order to get a PDF with A4 pages.
Take a look at the code sample below to get an idea about how you can do this.
public static void main(String[] args) throws IOException {
ByteArrayOutputStream pdf = createPdf("src/main/resources/SO47869248/html.html");
// To get from A3 to A4 the size has to shrink 71%
new SO47869248().scalePdf(DEST, new ByteArrayInputStream(pdf.toByteArray()), 0.7071f);
}
public static ByteArrayOutputStream createPdf(String htmlSrc) throws IOException {
ByteArrayOutputStream output = new ByteArrayOutputStream();
ConverterProperties converterProperties = new ConverterProperties();
converterProperties.setBaseUri(new File(htmlSrc).getParent());
PdfWriter writer = new PdfWriter(output);
PdfDocument pdfDocument = new PdfDocument(writer);
PdfMerger merger = new PdfMerger(pdfDocument);
for(int x=0; x < 3; x++){
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PdfDocument temp = new PdfDocument(new PdfWriter(baos));
temp.setDefaultPageSize(PageSize.A3);
HtmlConverter.convertToPdf(new FileInputStream(htmlSrc), temp, converterProperties);
temp = new PdfDocument(new PdfReader(new ByteArrayInputStream(baos.toByteArray())));
merger.merge(temp, 1, temp.getNumberOfPages());
temp.close();
}
pdfDocument.close();
return output;
}
public void scalePdf(String dest, ByteArrayInputStream input, float scale) throws IOException {
// Create the source document
PdfDocument srcDoc = new PdfDocument(new PdfReader(input));
PdfDocument pdfDoc = new PdfDocument(new PdfWriter(dest));
ScaleDownEventHandler eventHandler = new ScaleDownEventHandler(scale);
int n = srcDoc.getNumberOfPages();
pdfDoc.addEventHandler(PdfDocumentEvent.START_PAGE, eventHandler);
PdfCanvas canvas;
PdfFormXObject page;
for (int p = 1; p <= n; p++) {
eventHandler.setPageDict(srcDoc.getPage(p).getPdfObject());
canvas = new PdfCanvas(pdfDoc.addNewPage());
page = srcDoc.getPage(p).copyAsFormXObject(pdfDoc);
canvas.addXObject(page, scale, 0f, 0f, scale, 0f, 0f);
}
pdfDoc.close();
srcDoc.close();
}
protected class ScaleDownEventHandler implements IEventHandler {
protected float scale = 1;
protected PdfDictionary pageDict;
public ScaleDownEventHandler(float scale) {
this.scale = scale;
}
public void setPageDict(PdfDictionary pageDict) {
this.pageDict = pageDict;
}
#Override
public void handleEvent(Event event) {
PdfDocumentEvent docEvent = (PdfDocumentEvent) event;
PdfPage page = docEvent.getPage();
page.put(PdfName.Rotate, pageDict.getAsNumber(PdfName.Rotate));
scaleDown(page, pageDict, PdfName.MediaBox, scale);
scaleDown(page, pageDict, PdfName.CropBox, scale);
}
protected void scaleDown(PdfPage destPage, PdfDictionary pageDictSrc, PdfName box, float scale) {
PdfArray original = pageDictSrc.getAsArray(box);
if (original != null) {
float width = original.getAsNumber(2).floatValue() - original.getAsNumber(0).floatValue();
float height = original.getAsNumber(3).floatValue() - original.getAsNumber(1).floatValue();
PdfArray result = new PdfArray();
result.add(new PdfNumber(0));
result.add(new PdfNumber(0));
result.add(new PdfNumber(width * scale));
result.add(new PdfNumber(height * scale));
destPage.put(box, result);
}
}
}
For this example I picked the A3 pagesize constant. You can also create a PageSize object using specific measurements. As shown below:
Constructor:
public PageSize(float width, float height)
Example:
PageSize pageSize = new PageSize(750, 1000);
PdfDocument temp = new PdfDocument(pageSize);

try this.
style="page-break-after: always; width: 320pt;" in

Cannot bring all the element from html to docx that have same tag with java

I have a project converting from html to docx with java, in the html document I have 2 paragraph with 2 header as a title, but when converting both of them to docx format, just one paragraph that successfully converted, but the other paragraph doesn't converted even they have same tag. Look the image below
And the code look like this
import java.io.File;
import java.io.FileOutputStream;
import java.util.List;
import java.util.Set;
import static org.apache.poi.hslf.model.textproperties.TextPropCollection.TextPropType.paragraph;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.zwobble.mammoth.DocumentConverter;
import org.zwobble.mammoth.Result;
/**
*
* #author Alwan
*/
public class TestWord {
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try
{
File file = new File("src/test/TEST.docx");
DocumentConverter converter = new DocumentConverter();
Result<String> result = converter.extractRawText(file);
String html = result.getValue(); // The generated HTML
Set<String> warnings = result.getWarnings(); // Any warnings during conversion
String[] part = html.split("<p>");
String[] part2 = html.split("<h1>");
FileOutputStream out = new FileOutputStream(new File("testformat.docx"));
XWPFDocument doc = new XWPFDocument();
XWPFParagraph paragraph = doc.createParagraph();
XWPFRun paragraphOneRunOne = paragraph.createRun();
XWPFRun paragraphOneRunThree = paragraph.createRun();
for (int i = 0; i < html.length(); i++)
{
if (i % 2 != 0)
{
paragraphOneRunOne.setBold(true);
paragraphOneRunOne.setItalic(true);
paragraphOneRunOne.setText(part[i].trim());
paragraphOneRunOne.addBreak();
paragraphOneRunThree.setStrike(true);
paragraphOneRunThree.setFontSize(20);
paragraphOneRunThree.setSubscript(VerticalAlign.SUBSCRIPT);
paragraphOneRunThree.setText(part2[i].trim());
System.out.println(part2[i].trim());
System.out.println(part[i].trim());
doc.write(out);
out.close();
}
System.out.println("testformat.docx written successully");
}
System.out.println("Success");
} catch(Exception e) {
e.printStackTrace();
}
}
}
The question is, how to bring all the paragraph from html into the docx format when its have a same tag? Thank you for your attention before. Sorry for my bad english

Java - Merge multiple images to a single PDF using PDFBox

I was able to merge multiple PDF files into a single PDF using the code below -
public void mergePDF() {
File file1 = new File("inputPDF/001.pdf");
File file2 = new File("inputPDF/002.pdf");
File file3 = new File("inputPDF/003.pdf");
File file4 = new File("inputPDF/004.pdf");
try {
PDDocument doc1 = PDDocument.load(file1);
PDDocument doc2 = PDDocument.load(file2);
PDDocument doc3 = PDDocument.load(file3);
PDDocument doc4 = PDDocument.load(file4);
PDFMergerUtility PDFmerger = new PDFMergerUtility();
PDFmerger.setDestinationFileName("outputImages/merged.pdf");
System.out.println("Destination path set to "+PDFmerger.getDestinationFileName());
PDFmerger.addSource(file1);
PDFmerger.addSource(file2);
PDFmerger.addSource(file3);
PDFmerger.addSource(file4);
//Merging the documents
PDFmerger.mergeDocuments();
doc1.close();
doc2.close();
doc3.close();
doc4.close();
System.out.println("Done!");
} catch (IOException e) {
e.printStackTrace();
}
}
However, my requirement is to merge multiple images (JPG, PNG) to a single PDF as well.
Is it possible to merge multiple images to a single PDF using PDFBox?

Since I struggled with this task, here's my code. The merged document is PDF/A-1b compliant
import com.google.common.io.Resources;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Calendar;
import java.util.List;
import javax.xml.transform.TransformerException;
import org.apache.commons.io.FileUtils;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSStream;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.MemoryUsageSetting;
import org.apache.pdfbox.multipdf.PDFMergerUtility;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDOutputIntent;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.preflight.parser.PreflightParser;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.PDFAIdentificationSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.type.BadFieldValueException;
import org.apache.xmpbox.xml.XmpSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public final class PDFMerger {
private static final Logger LOG = LoggerFactory.getLogger(PDFMerger3.class);
private static final String OUTPUT_CONDITION_IDENTIFIER = "sRGB IEC61966-2.1";
public static final String DOCUMENT_CREATOR = "Mr. Meeseeks";
public static final String DOCUMENT_SUBJECT = "Great subject";
public static final String DOCUMENT_TITLE = "Here goes your title";
/**
* Creates a compound PDF document from a list of input documents.
* <p>
* The merged document is PDF/A-1b compliant
*
* #param sources list of source PDF document streams.
* #return compound PDF document as a readable input stream.
* #throws IOException if anything goes wrong during PDF merge.
*/
public static ByteArrayOutputStream mergeFiles(final List<InputStream> sources) throws IOException {
Path mergeDirectory = Files.createTempDirectory("merge-" + System.currentTimeMillis());
try (ByteArrayOutputStream mergedPDFOutputStream = new ByteArrayOutputStream()) {
LOG.debug("Merging {} source documents into one PDF", sources.size());
PDFMergerUtility mixedPdfMerger = createMixedPdfMerger(sources, mergedPDFOutputStream, mergeDirectory);
mergeFileStreams(mergedPDFOutputStream, mixedPdfMerger);
return mergedPDFOutputStream;
} catch (Exception e) {
if (!(e instanceof IOException)) {
throw new IOException("PDF merge problem", e);
}
throw (IOException) e;
} finally {
FileUtils.deleteDirectory(mergeDirectory.toFile());
sources.forEach(IOUtils::closeQuietly);
}
}
private static void mergeFileStreams(ByteArrayOutputStream mergedPDFOutputStream, PDFMergerUtility pdfMerger)
throws IOException, BadFieldValueException, TransformerException {
LOG.debug("Initialising PDF merge utility");
try (COSStream cosStream = new COSStream()) {
// PDF and XMP properties must be identical, otherwise document is not PDF/A compliant
pdfMerger.setDestinationDocumentInformation(createPDFDocumentInfo());
pdfMerger.setDestinationMetadata(createXMPMetadata(cosStream));
pdfMerger.mergeDocuments(MemoryUsageSetting.setupTempFileOnly());
LOG.debug("PDF merge successful, size = {} bytes", mergedPDFOutputStream.size());
}
}
#SuppressWarnings("UnstableApiUsage")
private static PDFMergerUtility createMixedPdfMerger(List<InputStream> sources, ByteArrayOutputStream mergedPDFOutputStream, Path mergeDirectory) throws IOException {
PDFMergerUtility pdfMerger = new PDFMergerUtility();
byte[] colorProfile = org.apache.commons.io.IOUtils.toByteArray(Resources.getResource("sRGB.icc"));
for (InputStream source : sources) {
File file = streamToFile(mergeDirectory, source);
if (isPdf(file)) {
pdfMerger.addSource(file);
} else {
pdfMerger.addSource(imageToPDDocument(mergeDirectory, file, colorProfile));
}
}
pdfMerger.setDestinationStream(mergedPDFOutputStream);
return pdfMerger;
}
private static PDDocumentInformation createPDFDocumentInfo() {
LOG.debug("Setting document info (title, author, subject) for merged PDF");
PDDocumentInformation documentInformation = new PDDocumentInformation();
documentInformation.setTitle(DOCUMENT_TITLE);
documentInformation.setCreator(DOCUMENT_CREATOR);
documentInformation.setSubject(DOCUMENT_SUBJECT);
return documentInformation;
}
private static PDMetadata createXMPMetadata(COSStream cosStream)
throws BadFieldValueException, TransformerException, IOException {
LOG.debug("Setting XMP metadata (title, author, subject) for merged PDF");
XMPMetadata xmpMetadata = XMPMetadata.createXMPMetadata();
// PDF/A-1b properties
PDFAIdentificationSchema pdfaSchema = xmpMetadata.createAndAddPFAIdentificationSchema();
pdfaSchema.setPart(1);
pdfaSchema.setConformance("B");
pdfaSchema.setAboutAsSimple("");
// Dublin Core properties
DublinCoreSchema dublinCoreSchema = xmpMetadata.createAndAddDublinCoreSchema();
dublinCoreSchema.setTitle(DOCUMENT_TITLE);
dublinCoreSchema.addCreator(DOCUMENT_CREATOR);
dublinCoreSchema.setDescription(DOCUMENT_SUBJECT);
// XMP Basic properties
XMPBasicSchema basicSchema = xmpMetadata.createAndAddXMPBasicSchema();
Calendar creationDate = Calendar.getInstance();
basicSchema.setCreateDate(creationDate);
basicSchema.setModifyDate(creationDate);
basicSchema.setMetadataDate(creationDate);
basicSchema.setCreatorTool(DOCUMENT_CREATOR);
// Create and return XMP data structure in XML format
try (ByteArrayOutputStream xmpOutputStream = new ByteArrayOutputStream();
OutputStream cosXMPStream = cosStream.createOutputStream()) {
new XmpSerializer().serialize(xmpMetadata, xmpOutputStream, true);
cosXMPStream.write(xmpOutputStream.toByteArray());
return new PDMetadata(cosStream);
}
}
private static File imageToPDDocument(Path mergeDirectory, File file, byte[] colorProfile) throws IOException {
try (PDDocument doc = new PDDocument()) {
PDImageXObject pdImage = PDImageXObject.createFromFileByContent(file, doc);
drawPage(doc, pdImage);
doc.getDocumentCatalog().addOutputIntent(createColorScheme(doc, colorProfile));
File pdfFile = Files.createTempFile(mergeDirectory, String.valueOf(System.currentTimeMillis()), ".tmp").toFile();
doc.save(pdfFile);
return pdfFile;
}
}
private static void drawPage(PDDocument doc, PDImageXObject pdImage) throws IOException {
PDPage page;
pdImage.getCOSObject().setItem(COSName.SMASK, COSName.NONE);
boolean isLandscapeMode = pdImage.getWidth() > pdImage.getHeight();
if (isLandscapeMode) {
page = new PDPage(new PDRectangle(PDRectangle.A4.getHeight(), PDRectangle.A4.getWidth()));
float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getHeight(), PDRectangle.A4.getHeight() / pdImage.getWidth()), 1);
float width = pdImage.getWidth() * scale;
float height = pdImage.getHeight() * scale;
// center the image
float startWidth = (PDRectangle.A4.getHeight() - width) / 2;
float startHeight = (PDRectangle.A4.getWidth() - height) / 2;
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
}
} else {
page = new PDPage(PDRectangle.A4);
float scale = Math.min(Math.min(PDRectangle.A4.getWidth() / pdImage.getWidth(), PDRectangle.A4.getHeight() / pdImage.getHeight()), 1);
float width = pdImage.getWidth() * scale;
float height = pdImage.getHeight() * scale;
// try to center the image
float startWidth = (PDRectangle.A4.getWidth() - width) / 2;
float startHeight = (PDRectangle.A4.getHeight() - height) / 2;
try (PDPageContentStream contentStream = new PDPageContentStream(doc, page)) {
contentStream.drawImage(pdImage, startWidth, startHeight, width, height);
}
}
doc.addPage(page);
}
private static PDOutputIntent createColorScheme(PDDocument doc, byte[] colorProfile) throws IOException {
PDOutputIntent intent = new PDOutputIntent(doc, new ByteArrayInputStream(colorProfile));
intent.setInfo(OUTPUT_CONDITION_IDENTIFIER);
intent.setOutputCondition(OUTPUT_CONDITION_IDENTIFIER);
intent.setOutputConditionIdentifier(OUTPUT_CONDITION_IDENTIFIER);
intent.setRegistryName("http://www.color.org");
return intent;
}
private static boolean isPdf(File file) {
try {
PreflightParser preflightParser = new PreflightParser(file);
preflightParser.parse();
return true;
} catch (Exception e) {
return false;
}
}
private static File streamToFile(Path tempDirectory, InputStream in) throws IOException {
final Path tempFile = Files.createTempFile(tempDirectory, String.valueOf(System.currentTimeMillis()), ".tmp");
try (FileOutputStream out = new FileOutputStream(tempFile.toFile())) {
IOUtils.copy(in, out);
}
return tempFile.toFile();
}
}
You can take a look at this gist for an option to merge pdf files as well.

You need to convert the images to a PDF first. See How can I convert a PNG file to PDF using java? or Create PDF from a PNG image Or Java Panel for an example on how to do this.
After that, use pdfbox to merge the resulting pdfs.

I have used itext library for merging images and convert them to pdf
Here is the code
Document document = new Document();
PdfWriter.getInstance(document, new FileOutputStream(image_path+"\\"+image_name+".pdf"));
document.open();
Paragraph p = new Paragraph();
File files[] = new File(path).listFiles();
PdfPTable table = new PdfPTable(1);
for (File file : files) {
table.setWidthPercentage(100);
table.addCell(createImageCell(file.getAbsolutePath()));
}
document.add(table);
document.close();
Hope It helps

How to open and manipulate Word document/template in Java?

I need to open a .doc/.dot/.docx/.dotx (I'm not picky, I just want it to work) document,
parse it for placeholders (or something similar),
put my own data,
and then return generated .doc/.docx/.dotx/.pdf document.
And on top of all that, I need the tools to accomplish that to be free.
I've searched around for something that would suit my needs, but I can't find anything.
Tools like Docmosis, Javadocx, Aspose etc. are commercial.
From what I've read, Apache POI is nowhere near successfully implementing this (they currently don't have any official developer working on Word part of framework).
The only thing that looks that could do the trick is OpenOffice UNO API.
But that is a pretty big byte for someone that has never used this API (like me).
So if I am going to jump into this, I need to make sure that I am on the right path.
Can someone give me some advice on this?

I know it's been a long time since I've posted this question, and I said that I would post my solution when I'm finished.
So here it is.
I hope that it will help someone someday.
This is a full working class, and all you have to do is put it in your application, and place TEMPLATE_DIRECTORY_ROOT directory with .docx templates in your root directory.
Usage is very simple.
You put placeholders (key) in your .docx file, and then pass file name and Map containing corresponding key-value pairs for that file.
Enjoy!
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.Closeable;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.net.URI;
import java.util.Deque;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.UUID;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import javax.faces.context.ExternalContext;
import javax.faces.context.FacesContext;
import javax.servlet.http.HttpServletResponse;
public class DocxManipulator {
private static final String MAIN_DOCUMENT_PATH = "word/document.xml";
private static final String TEMPLATE_DIRECTORY_ROOT = "TEMPLATES_DIRECTORY/";
/* PUBLIC METHODS */
/**
* Generates .docx document from given template and the substitution data
*
* #param templateName
* Template data
* #param substitutionData
* Hash map with the set of key-value pairs that represent
* substitution data
* #return
*/
public static Boolean generateAndSendDocx(String templateName, Map<String,String> substitutionData) {
String templateLocation = TEMPLATE_DIRECTORY_ROOT + templateName;
String userTempDir = UUID.randomUUID().toString();
userTempDir = TEMPLATE_DIRECTORY_ROOT + userTempDir + "/";
try {
// Unzip .docx file
unzip(new File(templateLocation), new File(userTempDir));
// Change data
changeData(new File(userTempDir + MAIN_DOCUMENT_PATH), substitutionData);
// Rezip .docx file
zip(new File(userTempDir), new File(userTempDir + templateName));
// Send HTTP response
sendDOCXResponse(new File(userTempDir + templateName), templateName);
// Clean temp data
deleteTempData(new File(userTempDir));
}
catch (IOException ioe) {
System.out.println(ioe.getMessage());
return false;
}
return true;
}
/* PRIVATE METHODS */
/**
* Unzipps specified ZIP file to specified directory
*
* #param zipfile
* Source ZIP file
* #param directory
* Destination directory
* #throws IOException
*/
private static void unzip(File zipfile, File directory) throws IOException {
ZipFile zfile = new ZipFile(zipfile);
Enumeration<? extends ZipEntry> entries = zfile.entries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
File file = new File(directory, entry.getName());
if (entry.isDirectory()) {
file.mkdirs();
}
else {
file.getParentFile().mkdirs();
InputStream in = zfile.getInputStream(entry);
try {
copy(in, file);
}
finally {
in.close();
}
}
}
}
/**
* Substitutes keys found in target file with corresponding data
*
* #param targetFile
* Target file
* #param substitutionData
* Map of key-value pairs of data
* #throws IOException
*/
#SuppressWarnings({ "unchecked", "rawtypes" })
private static void changeData(File targetFile, Map<String,String> substitutionData) throws IOException{
BufferedReader br = null;
String docxTemplate = "";
try {
br = new BufferedReader(new InputStreamReader(new FileInputStream(targetFile), "UTF-8"));
String temp;
while( (temp = br.readLine()) != null)
docxTemplate = docxTemplate + temp;
br.close();
targetFile.delete();
}
catch (IOException e) {
br.close();
throw e;
}
Iterator substitutionDataIterator = substitutionData.entrySet().iterator();
while(substitutionDataIterator.hasNext()){
Map.Entry<String,String> pair = (Map.Entry<String,String>)substitutionDataIterator.next();
if(docxTemplate.contains(pair.getKey())){
if(pair.getValue() != null)
docxTemplate = docxTemplate.replace(pair.getKey(), pair.getValue());
else
docxTemplate = docxTemplate.replace(pair.getKey(), "NEDOSTAJE");
}
}
FileOutputStream fos = null;
try{
fos = new FileOutputStream(targetFile);
fos.write(docxTemplate.getBytes("UTF-8"));
fos.close();
}
catch (IOException e) {
fos.close();
throw e;
}
}
/**
* Zipps specified directory and all its subdirectories
*
* #param directory
* Specified directory
* #param zipfile
* Output ZIP file name
* #throws IOException
*/
private static void zip(File directory, File zipfile) throws IOException {
URI base = directory.toURI();
Deque<File> queue = new LinkedList<File>();
queue.push(directory);
OutputStream out = new FileOutputStream(zipfile);
Closeable res = out;
try {
ZipOutputStream zout = new ZipOutputStream(out);
res = zout;
while (!queue.isEmpty()) {
directory = queue.pop();
for (File kid : directory.listFiles()) {
String name = base.relativize(kid.toURI()).getPath();
if (kid.isDirectory()) {
queue.push(kid);
name = name.endsWith("/") ? name : name + "/";
zout.putNextEntry(new ZipEntry(name));
}
else {
if(kid.getName().contains(".docx"))
continue;
zout.putNextEntry(new ZipEntry(name));
copy(kid, zout);
zout.closeEntry();
}
}
}
}
finally {
res.close();
}
}
/**
* Sends HTTP Response containing .docx file to Client
*
* #param generatedFile
* Path to generated .docx file
* #param fileName
* File name of generated file that is being presented to user
* #throws IOException
*/
private static void sendDOCXResponse(File generatedFile, String fileName) throws IOException {
FacesContext facesContext = FacesContext.getCurrentInstance();
ExternalContext externalContext = facesContext.getExternalContext();
HttpServletResponse response = (HttpServletResponse) externalContext
.getResponse();
BufferedInputStream input = null;
BufferedOutputStream output = null;
response.reset();
response.setHeader("Content-Type", "application/msword");
response.setHeader("Content-Disposition", "attachment; filename=\"" + fileName + "\"");
response.setHeader("Content-Length",String.valueOf(generatedFile.length()));
input = new BufferedInputStream(new FileInputStream(generatedFile), 10240);
output = new BufferedOutputStream(response.getOutputStream(), 10240);
byte[] buffer = new byte[10240];
for (int length; (length = input.read(buffer)) > 0;) {
output.write(buffer, 0, length);
}
output.flush();
input.close();
output.close();
// Inform JSF not to proceed with rest of life cycle
facesContext.responseComplete();
}
/**
* Deletes directory and all its subdirectories
*
* #param file
* Specified directory
* #throws IOException
*/
public static void deleteTempData(File file) throws IOException {
if (file.isDirectory()) {
// directory is empty, then delete it
if (file.list().length == 0)
file.delete();
else {
// list all the directory contents
String files[] = file.list();
for (String temp : files) {
// construct the file structure
File fileDelete = new File(file, temp);
// recursive delete
deleteTempData(fileDelete);
}
// check the directory again, if empty then delete it
if (file.list().length == 0)
file.delete();
}
} else {
// if file, then delete it
file.delete();
}
}
private static void copy(InputStream in, OutputStream out) throws IOException {
byte[] buffer = new byte[1024];
while (true) {
int readCount = in.read(buffer);
if (readCount < 0) {
break;
}
out.write(buffer, 0, readCount);
}
}
private static void copy(File file, OutputStream out) throws IOException {
InputStream in = new FileInputStream(file);
try {
copy(in, out);
} finally {
in.close();
}
}
private static void copy(InputStream in, File file) throws IOException {
OutputStream out = new FileOutputStream(file);
try {
copy(in, out);
} finally {
out.close();
}
}
}

Since a docx file is merely a zip-archive of xml files (plus any binary files for embedded objects such as images), we met that requirement by unpacking the zip file, feeding the document.xml to a template engine (we used freemarker) that does the merging for us, and then zipping the output document to get the new docx file.
The template document then is simply an ordinary docx with embedded freemarker expressions / directives, and can be edited in Word.
Since (un)zipping can be done with the JDK, and Freemarker is open source, you don't incur any licence fees, not even for word itself.
The limitation is that this approach can only emit docx or rtf files, and the output document will have the same filetype as the template. If you need to convert the document to another format (such as pdf) you'll have to solve that problem separately.

I ended up relying on Apache Poi 3.12 and processing paragraphs (separately extracting paragraphs also from tables, headers/footers, and footnotes, as such paragraphs aren't returned by XWPFDocument.getParagraphs() ).
The processing code (~100 lines) and unit tests are here on github.

I've been in more or less the same situation as you, I had to modify a whole bunch of MS Word merge templates at once. After having googled a lot to try to find a Java solution I finally installed Visual Studio 2010 Express which is free and did the job in C#.

I have recently dealt with similar problem:
"A tool which accepts a template '.docx' file, processes the file by evaluation of passed parameter context and outputs a '.docx' file as the result of the process."
finally god brought us scriptlet4dox :).
the key features for this product is:
1. groovy code injection as scripts in template file (parameter injection, etc.)
2. loop over collection items in table
and so many other features.
but as I checked the last commit on the project is performed about a year ago, so there is a probability that the project is not supported for new features and new bug-fixes. this is your choice to use it or not.

Rotate single page of document

How do I rotate the second page of my PdF when I'm using iText.
The first and other pages I would like to stay in the same orientation.
I know of ...
Document document = new Document(PageSize.A4.rotate(), 50, 50, 50, 50);
But that will rotate everything.

You can use document.setPageSize() before document.addNew();.
For example:
Document document = new Document();
....
document.setPageSize(PageSie.A4);
document.newPage();
......
document.setPageSize(PageSize.A4.rotate());
document.newPage();
It worked well for me.

From http://itextpdf.com/examples/iia.php?id=232 :
/*
* This class is part of the book "iText in Action - 2nd Edition"
* written by Bruno Lowagie (ISBN: 9781935182610)
* For more info, go to: http://itextpdf.com/examples/
* This example only works with the AGPL version of iText.
*/
package part4.chapter13;
import java.io.FileOutputStream;
import java.io.IOException;
import part1.chapter03.MovieTemplates;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfNumber;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfStamper;
public class RotatePages {
/** The resulting PDF. */
public static final String RESULT
= "results/part4/chapter13/timetable_rotated.pdf";
/**
* Manipulates a PDF file src with the file dest as result
* #param src the original PDF
* #param dest the resulting PDF
* #throws IOException
* #throws DocumentException
*/
public void manipulatePdf(String src, String dest)
throws IOException, DocumentException {
PdfReader reader = new PdfReader(MovieTemplates.RESULT);
int n = reader.getNumberOfPages();
int rot;
PdfDictionary pageDict;
for (int i = 1; i <= n; i++) {
rot = reader.getPageRotation(i);
pageDict = reader.getPageN(i);
pageDict.put(PdfName.ROTATE, new PdfNumber(rot + 90));
}
PdfStamper stamper = new PdfStamper(reader, new FileOutputStream(RESULT));
stamper.close();
reader.close();
}
/**
* Main method creating the PDF.
* #param args no arguments needed
* #throws DocumentException
* #throws IOException
*/
public static void main(String[] args)
throws IOException, DocumentException {
new MovieTemplates().createPdf(MovieTemplates.RESULT);
new RotatePages().manipulatePdf(MovieTemplates.RESULT, RESULT);
}
}

I rotate the orientation:
PdfWriter writer = new PdfWriter(out);
PdfDocument pdf = new PdfDocument(writer);
Document document = new Document(pdf, PageSize.LETTER.rotate());

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Read multiple PDF files inside a folder using iText [closed] - java

Related

Page size and formatting of PDF using iText pdfHTML

Cannot bring all the element from html to docx that have same tag with java

Java - Merge multiple images to a single PDF using PDFBox

How to open and manipulate Word document/template in Java?

Rotate single page of document

Categories

Resources