Print PDF that contains JBIG2 images - java

Please, suggest me some libraries that will help me print PDF files that contain JBIG2 encoded images. PDFRenderer, PDFBox don't help me. These libs can print simple PDF, but not PDF containing JBIG2 images. PDFRenderer tries to fix it (according to bug issue on PDFRedndrer's bug tracker), but some pages still (especially where barcodes exist) don't want to print.
P.S. I use javax.print API within applet
Thanks!
UPDATE: also tried ICEPdf, is too don't want to work.
I came to the conclusion that all these libraries(PDFRenderer, ICEPdf, PDFBox) use JPedals jbig2 decoder. Bug (some pages didn't print) come from this decoder library. The open source version of this decoder (which is used in PDFRenderer, ICEPdf, PDFBox) is no longer supported, but JPedal has a new commercial branch of the project, and they wrote that the bug has been fixed in new commercial release, which costs $9k.
Any ideas?
UPDATE 2: yesterday I tried to replace JPedal's free library with other open-source jbig2-imageio libraries. But yet I don't get any successful results, so I created a new topic on their project's page (google-code's forum - here ). Would be grateful for any help.
I also found some helpfull discussions on Apache PDFBox bug-tracker: here and here.

As going through your comment in yms answer ie. " but what library I can use to extract images and (more importantly) put them back in PDF?"
Here is a simple demonstration of
1 ) Extracting jbig2 or you can say all images from pdf.
2 ) Converting jbig2 image to any other format, in my case its jpeg.
3 ) Creating new pdf containing the jpeg.
Using libraries jbig2-imageio and itext.
In the below example please change the resources and the directories path as per your need.
For this I had to go through several resources that I will attach in the end. Hope this helps.
import com.itextpdf.text.Document;
import com.itextpdf.text.Image;
import com.itextpdf.text.pdf.PdfPCell;
import com.itextpdf.text.pdf.PdfPTable;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.PdfWriter;
import com.itextpdf.text.pdf.parser.*;
import com.levigo.jbig2.JBIG2ImageReader;
import com.levigo.jbig2.JBIG2ImageReaderSpi;
import com.levigo.jbig2.JBIG2ReadParam;
import com.levigo.jbig2.io.DefaultInputStreamFactory;
import java.awt.image.BufferedImage;
import java.io.*;
import javax.imageio.ImageIO;
import javax.imageio.stream.ImageInputStream;
public class JBig2Image {
private String filepath;
private int imageIndex;
public JBig2Image() {
this.filepath = "/home/blackadmin/Desktop/pdf/demo18.jbig2";
this.imageIndex = 0;
extractImgFromPdf();
convertJBig2ToJpeg();
createPDF();
}
private void extractImgFromPdf() {
try {
/////////// Extract all Images from pdf /////////////////////////
PdfReader reader = new PdfReader("/home/blackadmin/Desktop/pdf/orig.pdf");
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
MyImageRenderListener listener = new MyImageRenderListener("/home/blackadmin/Desktop/pdf/demo%s.%s");
for (int i = 1; i <= reader.getNumberOfPages(); i++) {
parser.processContent(i, listener);
}
} catch (IOException ex) {
System.out.println(ex);
}
}
private void convertJBig2ToJpeg() {
InputStream inputStream = null;
try {
///////// Read jbig2 image ////////////////////////////////////////
inputStream = new FileInputStream(new File(filepath));
DefaultInputStreamFactory disf = new DefaultInputStreamFactory();
ImageInputStream imageInputStream = disf.getInputStream(inputStream);
JBIG2ImageReader imageReader = new JBIG2ImageReader(new JBIG2ImageReaderSpi());
imageReader.setInput(imageInputStream);
JBIG2ReadParam param = imageReader.getDefaultReadParam();
BufferedImage bufferedImage = imageReader.read(imageIndex, param);
////////// jbig2 to jpeg ///////////////////////////////////////////
ImageIO.write(bufferedImage, "jpeg", new File("/home/blackadmin/Desktop/pdf/demo18.jpeg"));
} catch (IOException ex) {
System.out.println(ex);
} finally {
try {
inputStream.close();
} catch (IOException ex) {
System.out.println(ex);
}
}
}
public void createPDF() {
Document document = new Document();
try {
PdfWriter.getInstance(document,
new FileOutputStream("/home/blackadmin/Desktop/pdf/output.pdf"));
document.open();
PdfPTable table = new PdfPTable(1); //1 column.
Image image = Image.getInstance("/home/blackadmin/Desktop/pdf/demo18.jpeg");
image.scaleToFit(800f, 600f);
image.scaleAbsolute(800f, 600f); // Give the size of image you want to print on pdf
PdfPCell nestedImgCell = new PdfPCell(image);
table.addCell(nestedImgCell);
document.add(table);
document.close();
System.out.println(
"======== PDF Created Successfully =========");
} catch (Exception e) {
System.out.println(e);
}
}
public static void main(String[] args) throws IOException {
new JBig2Image();
}
}
class MyImageRenderListener implements RenderListener {
/**
* The new document to which we've added a border rectangle.
*/
protected String path = "";
/**
* Creates a RenderListener that will look for images.
*/
public MyImageRenderListener(String path) {
this.path = path;
}
/**
* #see com.itextpdf.text.pdf.parser.RenderListener#beginTextBlock()
*/
public void beginTextBlock() {
}
/**
* #see com.itextpdf.text.pdf.parser.RenderListener#endTextBlock()
*/
public void endTextBlock() {
}
/**
* #see com.itextpdf.text.pdf.parser.RenderListener#renderImage(
* com.itextpdf.text.pdf.parser.ImageRenderInfo)
*/
public void renderImage(ImageRenderInfo renderInfo) {
try {
String filename;
FileOutputStream os;
PdfImageObject image = renderInfo.getImage();
if (image == null) {
return;
}
filename = String.format(path, renderInfo.getRef().getNumber(), image.getFileType());
os = new FileOutputStream(filename);
os.write(image.getImageAsBytes());
os.flush();
os.close();
} catch (IOException e) {
System.out.println(e.getMessage());
}
}
/**
* #see com.itextpdf.text.pdf.parser.RenderListener#renderText(
* com.itextpdf.text.pdf.parser.TextRenderInfo)
*/
public void renderText(TextRenderInfo renderInfo) {
}
}
References :
1 ) Extracting jbig2 from pdf (extract images) (MyImageRenderListener).
2 ) Converting jbig2 (JBIG2ImageReaderDemo)

There is a fork of the JPedal library by Borisvl located at
https://github.com/Borisvl/JBIG2-Image-Decoder#readme
which contains speed improvements and I believe it should also fix your bug.
EDIT : The bug is related to simple range checking. Basically you need to prevent GetPixel from accessing x,y values outside of the bitmap extents.
You need to make sure the following conditions are met before calling getPixel
col >= 0 and col < bitmap.width
row >= 0 and row < bitmap.height
Here is some Delphi code with a couple of small range checks. I cannot test the Java code myself but you need to make changes to src/org/jpedal/jbig2/image/JBIG2Bitmap.java
procedure TJBIG2Bitmap.combine(bitmap: TJBIG2Bitmap; x, y: Integer; combOp: Int64);
...
...
var
begin
srcWidth := bitmap.width;
srcHeight := bitmap.height;
srcRow := 0;
srcCol := 0;
if (x < 0) then x := 0;
if (y < 0) then y := 0;
for row := y to Min(y + srcHeight - 1, Self.height - 1) do // <<<<<<<< HERE
begin
for col := x to x + srcWidth - 1 do
begin
srcPixel := bitmap.getPixel(srcCol, srcRow);
Andrew.

How about using AcrobatReader itself? It's a bit muddy getting it to work, and not a robust solution I guess. But will probably print all of it perfectly. And be free
Some info about this route;
http://vineetreynolds.blogspot.nl/2005/12/silent-print-pdf-print-pdf.html
http://www.codeproject.com/Questions/98586/Programmatically-print-PDF-documents
http://forums.adobe.com/message/2336723

You have tools as ImageMagick which handle images and convert them to a lot of formats. I used it some years ago so I can't tell you if the jbig2 format is properly handled by default or if you have to install some plugin.
You can try the following to have a list of supported formats beginning with J like the JBIG2 you are searching for:
$ convert -list format | grep -i J
It is really obvious to convert to pdf with with tool too, coupled with gs tool aka GhostScript.
If fact nothing prevent you to display a PNG/JPEG version of the image and provide a download link to the original JBIG2 file with its own metadatas.

As an alternative, you could try doing this server-side:
Approach 1:
Convert the PDF files to raster images using an external application and print that instead.
Approach 2:
Adjust your PDF files by recompressing JBIG2 images:
1- Extracting the images compressed as JBIG2 from your files.
2- Re-compress them with some other algorithm (jpeg, png, etc). In order to do this you might need to go outside of Java using either JNI or calling an external application. You can try with jbig2dec or ImageMagic for example if the GPL lincense suits your needs.
3- Put the recompressed images back in your PDF.
This approach will imply some quality loss on those images, but at least you will be able to print the files.
You can do this in Java with iText, there is a chapter about resizing images in the book iText in Action (with sample code). The idea there is to extract the image, resize it (including recompression) and put it back. You can use this as starting point. Be aware that iText is an AGPL project, hence you cannot use it for free in commercial closed-source applications.
If you are using a Windows-based server and you can afford a commercial tool, you can also achieve this with Amyuni PDF Creator either with C#/VB.Net or C++ (Usual disclaimer applies for this suggestion). You just need to go though all objects of type acObjectTypePicture and set the attribute Compression to acJPegHigh, this approach does not require any external JBIG2 decoder, (I can include some sample code here if you are interested).
If you are using an applet just to print your PDF files, you could also try generating a PDF file that shows the print dialog when opened

Related

Using POI To read/write a doc with the full POIFSFileSystem

I have the following issue, as everybody it seems, I want to replace some items with others in Word doc.
Issue with the issue is, the doc contains headers and footers which are part of the POIFSFileSystem (I know this because reading the FS / writing the doc back -without any changes- loses these informations, whereas reading the FS / writing it back as a new file doesn't).
Currently I do this :
POIFSFileSystem pfs = new POIFSFileSystem(fis);
HWPFDocument document = new HWPFDocument(pfs);
Range r1 = document.getRange();
…
document.write();
ByteArrayOutputStream bos = new ByteArrayOutputStream(50000);
pfs.writeFilesystem(bos);
pfs.close();
However this fails, with this error:
Opened read-only or via an InputStream, a Writeable File is required
If I don't rewrite the document, it works fine, but my changes are lost.
The other way around if I only save the document, not the filesystem, I lose the header/footer.
Now the problem is, how can I update the document while "saving as" the entire filesystem, or is there a way to force the document to contain everything from the file system?
The HWPF stuff is always in scratchpad because the DOC binary file format is the most horrible of all the Horrible formats. So it will really not be ready and also will be buggy in many cases.
But in your special case, your observations are not reproducible. Using apache poi 4.0.1 the HWPFDocument contains the header story, which also contains the footer stories, after creating from *.doc file. So the following works for me:
Source:
Code:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.hwpf.*;
import org.apache.poi.hwpf.usermodel.*;
public class ReadAndWriteDOCWithHeaderFooter {
public static void main(String[] args) throws Exception {
HWPFDocument document = new HWPFDocument(new FileInputStream("TemplateDOCWithHeaderFooter.doc"));
Range bodyRange = document.getRange();
System.out.println(bodyRange);
for (int p = 0; p < bodyRange.numParagraphs(); p++) {
System.out.println(bodyRange.getParagraph(p).text());
if (bodyRange.getParagraph(p).text().contains("<<NAME>>"))
bodyRange.getParagraph(p).replaceText("<<NAME>>", "Axel Richter");
if (bodyRange.getParagraph(p).text().contains("<<DATE>>"))
bodyRange.getParagraph(p).replaceText("<<DATE>>", "12/21/1964");
if (bodyRange.getParagraph(p).text().contains("<<AMOUNT>>"))
bodyRange.getParagraph(p).replaceText("<<AMOUNT>>", "1,234.56");
System.out.println(bodyRange.getParagraph(p).text());
}
System.out.println("==============================================================================");
Range overallRange = document.getOverallRange();
System.out.println(overallRange);
for (int p = 0; p < overallRange.numParagraphs(); p++) {
System.out.println(overallRange.getParagraph(p).text()); // contains all inclusive header and footer
}
FileOutputStream out = new FileOutputStream("ResultDOCWithHeaderFooter.doc");
document.write(out);
out.close();
document.close();
}
}
Result:
So please do checking it again and tell us exactly what is not working for you. Because we need reproducing that, please do providing a minimal, complete, and verifiable example as I have done with my code.

Add html code with base64 images in a header using iText [duplicate]

I'm using itextpdf-5.0.6.jar (Java 8) and when I try to export html code with base64 image tag I get file not found exception.
if I remove the image tag everything works great!
I found few solutions about overriding image tag processor but most of them are old and not compatiable with the 5.0.6 version.
Here is the HTML I send:
"<!doctype html>\n<html lang=\"en\">\n<head>\n
<meta charset=\"UTF-8\">\n
<title>Test PDF</title>\n</head>\n<body>\n\n
<div class=\"pdf-header\">\n\n
<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAc4AAABQCAYAAACQ/ZU3AAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyJpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoV2luZG93cykiIHhtcE1NOkluc3RhbmNlSUQ9InhtcC5paWQ6MjRGMzU1Qjk5RjFFMTFFNEE2NzA4QzlBNERCRTcxRTUiIHhtcE1NOkRvY3VtZW50SUQ9InhtcC5kaWQ6MjRGMzU1QkE5RjFFMTFFNEE2NzA4QzlBNERCRTcxRTUiPiA8eG1wTU06RGVyaXZlZEZyb20gc3RSZWY6aW5zdGFuY2VJRD0ieG1wLmlpZDoyNEYzNTVCNzlGMUUxMUU0QTY3MDhDOUE0REJFNzFFNSIgc3RSZWY6ZG9jdW1lbnRJRD0ieG1wLmRpZDoyNEYzNTVCODlGMUUxMUU0QTY3MDhDOUE0REJFNzFFNSIvPiA8L3JkZjpEZXNjcmlwdGlvbj4gPC9yZGY6UkRGPiA8L3g6eG1wbWV0YT4gPD94cGFja2V0IGVuZD0iciI/PkQbS2MAABpBSURBVHja7F0JuBTVlT4PARcIqCgKEgQliolLjGtEBUEwjIrGfUMwypCIToyjURRQBNGITty3JMY1YVziviCoj7hLcI9BUYOIyiIMCAg4hjfnt047bVuvu+rcqupb3ef/vvM19Ot769atW/cs9ywNTU1NlADaMu3E1IPpe0ybMrVhWk8+k8ZqpsVM85jeZ/o70wympWQwGMLQyNQ7ZpsrmE7LYGynlVznM6a7mU5g2pPpI6axTEOYtpe/GwxVQ0uHtt2ZjmcawLQb01pVvhdIAK8yTZaXboY9XoMhF5gkjP0RpjFMLzKdyNRNmOVlTCOYOjC1sOky5JFxQmodxdSPqcGje8FYdhQ6m+kfTNcz3cS03B61weAt5gl9wfQu0xvy/QrROj9gam3TZPAFcaQ3aJiPimS4r2dMMwzbUGBqms10hr14BkPucL9onZcw3WnTYcgb44TU9zrTT3J4jzDvTKTgHHQve+QGQ26wkOlhps5MN9t0GPLCOBuE6cDc2Tbn9wrHpWlM48jOSQwGH/EW/f+xykdCv2N6julZpteY/mXTZKg2Gip41d7INKwG7/s+piMpOFMxGOoBjeSvV63BUDMa53k1yjSBgynw4LNzT4PBYDAkwjgHMp1f4/cOr+Df2xIwGAwGgyvj3JCCM816wGCm4bYMDAaDweDCOKFpblpHc4Dg6s1sKRgMBoNBwzjheXpync0BUgKOs6VgMBgMBg3j/BVVP3VeNQCTbSdbDgaDwWCohOKUe+2ZhibU78dMLzHNYppPycdetaPAnNyTaQcKzmVd5wFJHibYkjAYDAZDVMY5iIJqJlr8LwXZPW6gbBOsQ2velYLQmSEOGvOBxjgNBoPBEIdxHu7Qz9tMhzG9WYV7WMP0gtBVFOS37Bqh3Uqmd2TsSAj/li0Hg8FgMERlnPjsq+wD5ljkgF3owf2grBjKnL1cpD1/UsQc8TlTPlFxocmWgMFgMBg0jBNFqLUFp4/zhGkWa7/7UVDsGv+2orcGg8FgSJxx7qBs/xgFTkC+4Rl7tAaDwWBIA4VwlG2V7f9iU2gwGAyGetQ4uyrbv2RTWJfowrQHBd7MW1EQGoSE+csoCEWCo9WzQisTuF5ruRZoe6bvUhCCtEauOZeCs/YXKbA2LPdkntZh2oVpZwqSi2xBwZFIO/k7xr5Kxo8zd5y/T2d6z5aYweA/49QG/3/i6X1BEHhd0Q5Zk/5U8t3+THfk6JmOZLqu5LuHmPaM2c/FQkArpj4UhOyAukXsYwUFJdyup/jmczDHgygIk9pLmFAUfMk0hekWpnvk/1niO0w/ZTpG5mxtRR8LmB5kupdpchXuIWkgROzPFDjuaQFHvuNlXspiyvS5Q/njjwmOf3D/XbrcnlRnPD5UZhqYUHcf8Ni6hVxjNn9snuAcfE5ByOEKEfg+EiF5DgVOmXDIfJ/H0hRxDm6jwD8mLjCGHnydOY7PYF2Ml3TpZacXGKc2gYCv9Sxhgm6v1GxK0UrZV7UQtlG3VdwDvJJ7y+I+XDkH0K6OFYI2+Gumv5b5PTSxQ2WDxLUblMLgQCFobuOFiabtQb0R0+kifLmul45MJwp9yHQ10zWyaeURl5FbuBtwQRSmmRL6MyXCOHnDXlsEqryhEKVQWNtbh/zmU76/hykICXyEmdvqMv2hbOWRsr/GAX5/DtPPHe9nOOlzsp/bomgwSW3ShtoANNdGppMSEhx2Y5rGBEmzQ8nfeotGgixTN8nG0pDANbcUzQPMeosUtakRFJiKR6YgZEHz/o30PzSH6+g/mH7p2Mckqm6Zw/4J9tWLad0a3TMgPCIJDXxf5jATHcO0UeiE7tIF2t4Nyuv8jPvt5qhtnqVsPo3HPqVFAi+1oTbRMqV+ocG+LMwSUuObwqCPoujm2LiAmXoGJWceK6Az05OiEa6f8vPoJELA/bJB5QEoGP9bxz5gqTiBqhtz3Yk3220T6mvfOtk/YDUZC4GP5+5kpjBB+EKlFaWVoyDlom1COP7aq1Zrcu1DBkN8dBVmibPYH2R0TTC2BygwGyeBHzG9wrR3xnM3SJhJD8+fMRy5/kThpQujAudYOOde5cH9JKV17ldn7zreOxwzPMjMs32J1jnPQbA6jvuL/Q44apsP8ZifL2acnzpoDw1kMORHi76V6YAENNinRKquBmB2xgvc09N57k7BeaSLSXKZPKf5ntyTM+PkTRtHFDvW6bsHJ8tpPAcbl3yP8+9Fiv5wRDIqQ20TFo8xhf8UGOcHyslA4oTjbT825AhY8zgz+76y/TbCFNpV+T5grn2U/Cs6v4GMy0WoQJgRTPdveHRfvXnTb+3YR786VzTALx7geVynSOtcwh8XZaF1Omqbd/JYXyllnC7J2XG+s7Xtx4YcAd6+CDGK6xQHUxNCCdb35D66kbs5NEmAsdyfwH5wusyzT4BXaS/HPvazV492Z5pY8t21FMQya7TO8RlomyiLObpU+gamO0wEQh0aSZ+2z2CoBn5IQeH2OLiWosewZoV9mE7zYBzQpG6mIObWBYj5vcLTNeNqrt3XXruvMIK1vz2KtE4kSTlf2dcR3FdF6xH/BnxqpPIat/AYZ4UxzqfJ7QAeXPw5Crzf7MyzulhjUxAZiAeL6qGKM5pjPL0PbDobV3kM8JA82rEPJK441eP1omacvHEjw1ZXe+W+FrJKnYIQa/22sq8xEX53CumOD+A4O7b0ywLjBMd3NY3AlIEYvKdEHTdkD2TyuMOmITJgev33CL+DU9GlHt8HshWdW8Xrn+QgzReAdINHkN9ZknYSBx8NBtjr9g3synPZt0jr/NJhDZfVOkXbPEPZ93VhWYqKz0ZuSmhCEJ8Hjz/Etx1CliQhKxRSki2yqYiFEVT5jBAZTnp6fh9gXtVwWMK53fWOfSwSjX5JDjQlbd3i/vaqhb57xUDihOnK5zK+grapEXgQYzqhOUm6AGicKPa8TUKTso/Q/1AQP4dUTFNy8HLkFZcwPZHxNeFUBg9KJPtHSBM83E5M0eIAqRRZgB6Xa2P9wmFjKOlNlZ2lj6fL/Mb1DPFfIkjCGvOOMIoWwuiQ2GAHGYNLkH0bEZyuzvD545z4LgqcNLRA7lEkSng/J+/ZALnnyGCNp6Xshb4DSUKWV7BsdCJ9bvNSHIDYTtbolorW2cT/h+ViqqKvn3LbHYs9XxPQNq/g/hZUYpzQWHDmc2/CDwPu6UOEsIHgxp4RQvWMeZ4vJmzS3VO+xj6OGj8W/OiM5gNM8nYRhkrDmBqZ/iDS3zkJatJPyDXvDxG88B3O124QzVCDQ8owzu0oqG6iBVIJjqRoIV9IqoC4tj7Kax2eIeNEhZyHZDN11ZTzVD9XY3LdPYF5ysRqwYzi1QiCAAQ+JLg4TIQ1bbxua7E0fF1Yg6//BPcPxqlxpDpPhLAktE0w84nN/bE0rdp9wijSssevJZvQzkVS/EyRMCaLRrras8WEqgCzU+wfCfbHObSHOeEYkdzTAmz8NwvzmhWB0SEwGYHeAx2vCWYIp4GPIizyo8Vkc4TiWuUqxxzhcA+nxmRkL8uGgUwrw5X30YHSN9e3E6a5mWM/iN+7NWeWna68sX+v1MuyAmrKTMv3/pns2VN5LmDKvIPiV18qYG/6dkWqkUrGeVCx1umobV4sMaahCDvb+RkF5tWs0FOkAgSVLxCNZR+qH9zouAFBAHknhXGtkc1xkGjc50VgmsXMU+ss8p5IobjmhAhMs/iaw5RMA0y+ubN4bZahi5TaH6wyI8QaExctHDawqEDsK0yVruFnd1N1HZqy1Dpr9nxTHGdwzj1D2UXvkD7/JutDg3EJaJvzKr27YYwTG9VRVJ2whnbCuHEehKwhh1Bth7fAo/NQh/aoN/n7lMYGpnWgCDSatfAa6WqiQnh6RHlNSMJXKi0h3UK+h3lte0V//6QQF/aYzPNMZdtdU16z15G7RQqb7BCqbuL2TBin5GfdlWoYzOhglRuqfJ5b8xytF/L9KHkP4mJ/7m93R23zIr6n5XEZJ/C4SO/VxLbCGHD+sV0NrjdkV3GpHPERRQulcNm8XTFN0cY1HlFbszHsHHsX0mXluYrcjxzgmT5T0W77FNcENMQTHfuYKwLZ5zl+d/uIw08U9CU356m8MM83hW/ERQOFZJri/hDTqS1Gfj4F5ew02ibq31b0Ei+3KcBZZThVP6AeGSagup9aQ9onDsXhNLKesj0ku8FMiz2/T83G71qr8FUl0w8L5dBWIPlLQvOncZpJy5ENVWXGO/aB83iY/j/xcK2uiLlWdovKU2L0+yXlG/cr2zUX6gWrjSYxz36kSwAPnMdMu2K1sErSNM7f4KW03ANGc6WMpxakN2xALlUSEHryVA7uc6miTVvHa0Ko0FT7CRNitlT0AzPtBwnNn+bcKI3sNHs7SP8FrBHm+4qna/VdipcvtX/CvwNeyPm+9qyyXddmtFg8D62XuEYAh5Z7W5QfRjFDwfQFL9jXPHgwJ8kLnGfNEy/SmQ7tsww9yVKKTxJJ1W/cRNHm7wnehybxdZJhD7+gIPwH4UCtHPs620EjyQpTkmScU6bP7R7TajGV8o2ZpLNQlku8frFSANdqm5G0/qjnN+DEOOC+gKpfVBYmynE5XVjIi3qLIyNKO/TEUF4LrYSFVdbYgaQyCMHSA+eWlo79wIFtYg6edxzGuVtpYeYQxAmpeDdBS0V1NILAxKnxau9Ups9FGa0dHPHcGfXHcV4ITApCEm6WGzm0is/oHJGCn8rZ2voDuWXdSCv0xNA844gLeFw2JnR9rdm6hUdziLkYkZPnDY0Ppv4oFi0cGSFs7r4KayEqHq+RdwaMLq6DXyVB73IKQkvSrD17LjIXpcE4C8AZDjJG7CgM7FDK3nSK68Fku5Uw9DwAZq9BDu3TDD0xJIfNyD0xQK3gHdkfcvGO8sa5kLXIVym6/0H/5hgn9wPhpV9Mpt2+Bp65xmGxbYXnsoLnE34haWXFeo6vEavIiYtkikP+w4V5QQNdmPED2pzp5JwsJmTu/y+H9mmHnhgMSQNJVA4k/z2/XTS/chrlThSkG40CnAs+VSPP/TNFmyhHC7+jIDlKKtpm3AZJmHRgm/+1SNlIsXYrZZd56CxyP39JG8hIg9CTdZTt8xJ6YjAUgDP4QyifxwpxGGcP1oQ2VzDVUkwvl94tZ0glZaqcn56XQteTue/GajDO4pflMQoygsDGjUrwcOKZRunln4XN+988X0jwCnMJSs9L6InBUAAsQY05HTtCKlYmoHXGCUOZUufrJaozGxSQpKM7VPGeaTkRIAAdwduozN1HJgaMFPlLkf80yTCFYzxeED8ht5JUeQo9MRgAWEj+kdfBs/axOibT/xbjZC0UJd72iNHH1DpfMy0iPhuYtJPMb3yv5MX1hnGW4gthpNC+cO6B+DhUs3g6gb5xAO9jXGdHstATQ/0B7yKOa9rm+B7iaIB9xRGoGFAWWsV4z5+3ZRNZsHk4Ib7RRPrsQlVzW8dimURBRhK4dL/r0BdiI7fzcPP4ozBPLSz0xJBXbMF0RY7HH+ecE2UBdyr5Lk785rQoKd4MiWvoyEk7U9vYh3ivRgoqyT/m0McPPHuwiFtzOXu10BND3oEqR4PyOHBmZMj+FCefbul5ZhzHoFoz02oyTEVO9MHa/foUJHB3BdL8HZ9nxlnQQFEw+GNl+608WjjQfi91aG+hJ/5gpU2BE5DwY5Ocjl2Vfo83dkQXfL+OGWcbRZs45ciQrrRDQmMdy89Lk+TEqwwjyyg4A9VKDz4AISfw/Fpb2d5CT/zCapsCJ+AYJa+Wkzjm2l7iEATEMdPOY+32jRp75hpGFElA5TnuzB+nJzhW8A1VLoAWnk26Ngm0Lxk3oGm6mI0t9MQYp29a1wnklkP1AMqnBSWOxgnzZG/5dz2baQvCUlxErc06lvTx8M1hlBS9jgXfkgfMkUmMm1y7jQdj35/ccnJa6Il/0JhqkTml2qWzkqrr+BYFuanfF4FOK2gja9aT5OYEmClYE1wg6fd+GLUJ//7RmBqnMc4AyyJom9uIEJc0YPY9g4Li17llnABS920es826VR4zEre71CvEGe9RZKEnvkGTzWU+BeEItYS/UmANOVvZHoIt6hzuSboi49XUOiMzTgoSnXSM2X/tmCemz4XmvYGiaZR0rRMovVrMZ/DYr0Gu4qgNWng4/xpmXk0njgaRyjd26OPUPEnjdYQFijbdPH2vXIFkJi87tN+dgqIQecLkGL/FEc3QONo8b9Qf19ga6eagLJVjyL344+AUxw3BLlZMp48vuMZj6vMqjhfxlgMc2t/lqK0a0sM8RRtI3bVYHQXWkGMdhVQw351zdM9x0+/FcTSpRTOtNrVopdCf32Qw9l+UyTvsPeOEl5Pm8PfTKo13B9J7AgNzmYYbf/IW7yvbbVej84GA8f90aA9r0u2kKxCeOVgjXEXxstTE8SitRcb5Y2W7D8tom4gF7pXB2CHwXpBXxrl3hpqBK/DyTyKd+zWAvIvHUXaVZAzx8V7G6zgPuJ7pEYf2W2ekQSSFNApMw3mrFr3nD1C2m9UM02zpqJjExWC+5rZ5ZJyDle2qcT4IT8GeDu2xeUwz3uQ14O33T0W7g2p4ThBrjKxALlaeUygogJAHpOHA8wJrs8traVEww/mxCEUaBWJWM38byrSNok+cHWsy0cFf5cK8MU7krNWeFWZdjQEbo4uJdTqlU1vOkDw0oSU9ZT3XKuYL83QBsgp1yMG9IkFB0hatWjTTjtPu3WG5epkRI1JirMNYtI5og/jaFSvb+MI4u1Bw9qEBgtRfz3CsneWl18KqnuQLLyrbXUTpuc/7gAeZbnB8j67z/SZ5U29KQeustTAUOEj2UzZ/rpnvT5M1EhezmW7i5waB97+VY6poHvaBceI86AXlJAGop5ZVdQHM122OkrKFnuQLk5XtdmO6kvwseZcU4Cg0y6H94aQ/nsmUNyTYFxJkvFQjDLMF01n8z8scunk+pF/sr2cp+xtbpMHC2adJ0cdePIayRTpw+DqMgtyoC+VzkXymmW4MDjVIUQX3bdf4nEczXCvIMNHXoT2cKu5lWj/FMa4kSxWXJGDNgLt8J0VbrG+c+4wS4VADBJRvKMLahkWEDC0d5XMT+TdCm8ZkODewnhwrWoM2mcrVFJz1z/F4DSTpINTIG/uXeX4hpEIJvF1/RdETRIQBTO3hkO9hZtWkUYUQd3uRteAtHityhx+j6GsCt31MimeHMs4by7wUYKAo+bJEPpcW/R+S0yr5XC2/x4F36aJoVfRyb0lBoDCCoZPK9nNvRuvlR0zjHfuAFJO2Fy0W8+XG7xIDXu5JMq8a9BNCMoWZ8rlC/lYQoPAuoDAAPLXbMX1HPuPm0GxXhfnBeT3OorRnXBjzrSKQrvFxAfDmOZ83UQhQ2yfQnc9m2gv4PpsrMLGW7OPdKahGlYQl5VmkNixhyoilPEXZ35gQoQS+JEdS/GMThBoezXRHc4yzObQR+q7HDxov7VsZXAcb2J9JV2vOkH/c4sA4C+hIboXNfQbOcwcy7aFsD+sTql5c6vE9Pl4HjPPAjK8X5tcCAUwT4vcm050hQs+7zIxxnSGKPlF27K4w56W8pwa7NqPrXE5+1fw0ZIvXyEKHygH5Z3FWucyhjwsTYkxpIQmGN5c34bdtuXwFaLa3lWib0PKOU/Y3ujmzKgUWEY0zJiykw8L+kGfG+V5zanTCQNaKE22d1z3OtykoC2RZOtWhfWvZSNf29P6QQcjVd2CKLZOvcQUzus9DLBcaEzAqSzVbkpKvg1jsm5TjDC07lmfGOYqyCenY2Na4gdFI+nqx9QKYtO92aA+Nc7yPN8ab70qKl34vDFNtiXyF2UwTS7RNnHEP1PICCRsqhwlKfrEp0y9rhXHCO3WSrT9DxoCX7FKbhrJAYhCXqh8Icent6b25etca45T3SASRAtOElqlNwwgHo4pZgvg38NrWHu2dKSEyuWacCA0YZmvPUAWAIZjZvjxwdjXEoT02UXjZtvfw3lwY5+ulHqR1igk8D6UhhIjn1VbNiVMODMx5leIaWIsj88w4IaUMcpRoDQYX3AMJ1Kahomb1W4f2qJJ0lYf3hZAULfOz882gQMA3GJ0Uv56g7O8JZsKNUX/Mv/3EQescwWPtkkfGiXhRZN//m60/Q5VxqTDPJpuKZoEg9jcc2sNL9zCfbkjO0bRaZ70zToSZnBxyFgnT/pYZaJsFwAFphaIdyl2enzfGCfs0gsiftP3I4BHzhPVjkU1FKGASQ2iBiycqcuF29uy+NAwQcYBP1+k6+IBpADPMMaVMU7xVRyv7fZj7i52Ni9ugqs/VymsO5TF/VRGrZQ4mHokH4JSxxPYig2d4iIJMWJeIhtRgU/INvC6apzaXKVILIoxgoEfaPTTOuN7V74SEXtQ6UFEG5vqrih2BSoAUptqkIKMdxob39ecU/xwd2Yfg9X2Yz4wT+S+R6PcZ238MHgMltuAMA8cDZL85ioKMW4YA2DyRalJbPWM/EZyv8eFmmAmAIRxsjzUUOE57gII0qA+FZdwp0jY3EcapwT1S/UT7DBfz9ZHURlPa8VBuu4tvjBMH7/dRkD93hq1DQ46A1I8nURDzBUaB2rJIQQfTjsuRCEydyG+8pIQK3y0uojc9nJcmESxw3rmBsg/E/D1BQa5fQ/UB7flDodmyVyP9KTyHo8ZKjlYKmFhPSRQyAOM8jXTe2xc3NDU1Iei4g3SApNPtSgg3h6TTyOyBJNSFhNSwT7eU32BjaCV/rwSk5yoki/9QXgZsOo0UmHaSMMlgXF0U7WD/Lq3Mvh7lK8foYpH8ioEg3nVi9lPYoF2gmTukzXKtlNGF4h9DhD37JIB3pQcF1VXayXsGwgazQt4HpKr7Qj6Xybwvl38nVelGswY+k/WUBDZ21MQjr0c5O9soZv9f8KZfVW995bi/5HHPDelL8w6UQ6GAx9IIyQai3Kt2fKH3qxxDx4g861v4PwEGAPxb/SZEmJVjAAAAAElFTkSuQmCC\"> \n\n\n</div>\n\n<div class=\"main\">\n<div class=\"canvas\">\nHellow world</div></div></body>\n</html>"
part of my code:
fileOutputStream = new FileOutputStream(file);
Document document = new Document();
PdfWriter.getInstance(document, fileOutputStream);
document.open();
HTMLWorker htmlWorker = new HTMLWorker(document);
StringReader stringReader = new StringReader(htmlCode);
htmlWorker.parse(stringReader);
document.close();
fileOutputStream.close();
any help will be appricated
thanks
Please stop using HTMLWorker, as repeated many times on StackOverflow, the HTMLWorker class has been abandoned in favor of XML Worker a long time ago. We won't invest in further development of HTMLWorker so it's a very bad choice to use it. Please switch to XML Worker.
Also upgrade to the latest iText version, the version you are using dates from February 4, 2011, many bugs have been fixed in the 4 years that have passed. Make sure you have both the iText jar and the XML Worker jar with the same version number.
Base64 images aren't supported yet, but I have made you a very simple Proof of Concept, showing how easy it is to add support for such images. Take a look at the ParseHtml4 example and the resulting PDF: html_4.pdf.
To achieve this, you need to write an implementation of the ImageProvider interface. I have done this by extending the AbstractImageProvider class:
class Base64ImageProvider extends AbstractImageProvider {
#Override
public Image retrieve(String src) {
int pos = src.indexOf("base64,");
try {
if (src.startsWith("data") && pos > 0) {
byte[] img = Base64.decode(src.substring(pos + 7));
return Image.getInstance(img);
}
else {
return Image.getInstance(src);
}
} catch (BadElementException ex) {
return null;
} catch (IOException ex) {
return null;
}
}
#Override
public String getImageRootPath() {
return null;
}
}
As you can see, I check for the existence of "base64," in whatever is passed to XML Worker through the src attribute of the img tag. If that String is present, I decode whatever follows that "base64," and I return an Image object that is created using the resulting bytes.
Once you have this ImageProvider implementation, it's only a matter of passing it to XML Worker.

HTML to PDF with base64 images throws FileNotFoundException

I'm using itextpdf-5.0.6.jar (Java 8) and when I try to export html code with base64 image tag I get file not found exception.
if I remove the image tag everything works great!
I found few solutions about overriding image tag processor but most of them are old and not compatiable with the 5.0.6 version.
Here is the HTML I send:
"<!doctype html>\n<html lang=\"en\">\n<head>\n
<meta charset=\"UTF-8\">\n
<title>Test PDF</title>\n</head>\n<body>\n\n
<div class=\"pdf-header\">\n\n
<img src=\"data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAc4AAABQCAYAAACQ/ZU3AAAAGXRFWHRTb2Z0d2FyZQBBZG9iZSBJbWFnZVJlYWR5ccllPAAAAyJpVFh0WE1MOmNvbS5hZG9iZS54bXAAAAAAADw/eHBhY2tldCBiZWdpbj0i77u/IiBpZD0iVzVNME1wQ2VoaUh6cmVTek5UY3prYzlkIj8+IDx4OnhtcG1ldGEgeG1sbnM6eD0iYWRvYmU6bnM6bWV0YS8iIHg6eG1wdGs9IkFkb2JlIFhNUCBDb3JlIDUuMy1jMDExIDY2LjE0NTY2MSwgMjAxMi8wMi8wNi0xNDo1NjoyNyAgICAgICAgIj4gPHJkZjpSREYgeG1sbnM6cmRmPSJodHRwOi8vd3d3LnczLm9yZy8xOTk5LzAyLzIyLXJkZi1zeW50YXgtbnMjIj4gPHJkZjpEZXNjcmlwdGlvbiByZGY6YWJvdXQ9IiIgeG1sbnM6eG1wPSJodHRwOi8vbnMuYWRvYmUuY29tL3hhcC8xLjAvIiB4bWxuczp4bXBNTT0iaHR0cDovL25zLmFkb2JlLmNvbS94YXAvMS4wL21tLyIgeG1sbnM6c3RSZWY9Imh0dHA6Ly9ucy5hZG9iZS5jb20veGFwLzEuMC9zVHlwZS9SZXNvdXJjZVJlZiMiIHhtcDpDcmVhdG9yVG9vbD0iQWRvYmUgUGhvdG9zaG9wIENTNiAoV2luZG93cykiIHhtcE1NOkluc3RhbmNlSUQ9InhtcC5paWQ6MjRGMzU1Qjk5RjFFMTFFNEE2NzA4QzlBNERCRTcxRTUiIHhtcE1NOkRvY3VtZW50SUQ9InhtcC5kaWQ6MjRGMzU1QkE5RjFFMTFFNEE2NzA4QzlBNERCRTcxRTUiPiA8eG1wTU06RGVyaXZlZEZyb20gc3RSZWY6aW5zdGFuY2VJRD0ieG1wLmlpZDoyNEYzNTVCNzlGMUUxMUU0QTY3MDhDOUE0REJFNzFFNSIgc3RSZWY6ZG9jdW1lbnRJRD0ieG1wLmRpZDoyNEYzNTVCODlGMUUxMUU0QTY3MDhDOUE0REJFNzFFNSIvPiA8L3JkZjpEZXNjcmlwdGlvbj4gPC9yZGY6UkRGPiA8L3g6eG1wbWV0YT4gPD94cGFja2V0IGVuZD0iciI/PkQbS2MAABpBSURBVHja7F0JuBTVlT4PARcIqCgKEgQliolLjGtEBUEwjIrGfUMwypCIToyjURRQBNGITty3JMY1YVziviCoj7hLcI9BUYOIyiIMCAg4hjfnt047bVuvu+rcqupb3ef/vvM19Ot769atW/cs9ywNTU1NlADaMu3E1IPpe0ybMrVhWk8+k8ZqpsVM85jeZ/o70wympWQwGMLQyNQ7ZpsrmE7LYGynlVznM6a7mU5g2pPpI6axTEOYtpe/GwxVQ0uHtt2ZjmcawLQb01pVvhdIAK8yTZaXboY9XoMhF5gkjP0RpjFMLzKdyNRNmOVlTCOYOjC1sOky5JFxQmodxdSPqcGje8FYdhQ6m+kfTNcz3cS03B61weAt5gl9wfQu0xvy/QrROj9gam3TZPAFcaQ3aJiPimS4r2dMMwzbUGBqms10hr14BkPucL9onZcw3WnTYcgb44TU9zrTT3J4jzDvTKTgHHQve+QGQ26wkOlhps5MN9t0GPLCOBuE6cDc2Tbn9wrHpWlM48jOSQwGH/EW/f+xykdCv2N6julZpteY/mXTZKg2Gip41d7INKwG7/s+piMpOFMxGOoBjeSvV63BUDMa53k1yjSBgynw4LNzT4PBYDAkwjgHMp1f4/cOr+Df2xIwGAwGgyvj3JCCM816wGCm4bYMDAaDweDCOKFpblpHc4Dg6s1sKRgMBoNBwzjheXpync0BUgKOs6VgMBgMBg3j/BVVP3VeNQCTbSdbDgaDwWCohOKUe+2ZhibU78dMLzHNYppPycdetaPAnNyTaQcKzmVd5wFJHibYkjAYDAZDVMY5iIJqJlr8LwXZPW6gbBOsQ2velYLQmSEOGvOBxjgNBoPBEIdxHu7Qz9tMhzG9WYV7WMP0gtBVFOS37Bqh3Uqmd2TsSAj/li0Hg8FgMERlnPjsq+wD5ljkgF3owf2grBjKnL1cpD1/UsQc8TlTPlFxocmWgMFgMBg0jBNFqLUFp4/zhGkWa7/7UVDsGv+2orcGg8FgSJxx7qBs/xgFTkC+4Rl7tAaDwWBIA4VwlG2V7f9iU2gwGAyGetQ4uyrbv2RTWJfowrQHBd7MW1EQGoSE+csoCEWCo9WzQisTuF5ruRZoe6bvUhCCtEauOZeCs/YXKbA2LPdkntZh2oVpZwqSi2xBwZFIO/k7xr5Kxo8zd5y/T2d6z5aYweA/49QG/3/i6X1BEHhd0Q5Zk/5U8t3+THfk6JmOZLqu5LuHmPaM2c/FQkArpj4UhOyAukXsYwUFJdyup/jmczDHgygIk9pLmFAUfMk0hekWpnvk/1niO0w/ZTpG5mxtRR8LmB5kupdpchXuIWkgROzPFDjuaQFHvuNlXspiyvS5Q/njjwmOf3D/XbrcnlRnPD5UZhqYUHcf8Ni6hVxjNn9snuAcfE5ByOEKEfg+EiF5DgVOmXDIfJ/H0hRxDm6jwD8mLjCGHnydOY7PYF2Ml3TpZacXGKc2gYCv9Sxhgm6v1GxK0UrZV7UQtlG3VdwDvJJ7y+I+XDkH0K6OFYI2+Gumv5b5PTSxQ2WDxLUblMLgQCFobuOFiabtQb0R0+kifLmul45MJwp9yHQ10zWyaeURl5FbuBtwQRSmmRL6MyXCOHnDXlsEqryhEKVQWNtbh/zmU76/hykICXyEmdvqMv2hbOWRsr/GAX5/DtPPHe9nOOlzsp/bomgwSW3ShtoANNdGppMSEhx2Y5rGBEmzQ8nfeotGgixTN8nG0pDANbcUzQPMeosUtakRFJiKR6YgZEHz/o30PzSH6+g/mH7p2Mckqm6Zw/4J9tWLad0a3TMgPCIJDXxf5jATHcO0UeiE7tIF2t4Nyuv8jPvt5qhtnqVsPo3HPqVFAi+1oTbRMqV+ocG+LMwSUuObwqCPoujm2LiAmXoGJWceK6Az05OiEa6f8vPoJELA/bJB5QEoGP9bxz5gqTiBqhtz3Yk3220T6mvfOtk/YDUZC4GP5+5kpjBB+EKlFaWVoyDlom1COP7aq1Zrcu1DBkN8dBVmibPYH2R0TTC2BygwGyeBHzG9wrR3xnM3SJhJD8+fMRy5/kThpQujAudYOOde5cH9JKV17ldn7zreOxwzPMjMs32J1jnPQbA6jvuL/Q44apsP8ZifL2acnzpoDw1kMORHi76V6YAENNinRKquBmB2xgvc09N57k7BeaSLSXKZPKf5ntyTM+PkTRtHFDvW6bsHJ8tpPAcbl3yP8+9Fiv5wRDIqQ20TFo8xhf8UGOcHyslA4oTjbT825AhY8zgz+76y/TbCFNpV+T5grn2U/Cs6v4GMy0WoQJgRTPdveHRfvXnTb+3YR786VzTALx7geVynSOtcwh8XZaF1Omqbd/JYXyllnC7J2XG+s7Xtx4YcAd6+CDGK6xQHUxNCCdb35D66kbs5NEmAsdyfwH5wusyzT4BXaS/HPvazV492Z5pY8t21FMQya7TO8RlomyiLObpU+gamO0wEQh0aSZ+2z2CoBn5IQeH2OLiWosewZoV9mE7zYBzQpG6mIObWBYj5vcLTNeNqrt3XXruvMIK1vz2KtE4kSTlf2dcR3FdF6xH/BnxqpPIat/AYZ4UxzqfJ7QAeXPw5Crzf7MyzulhjUxAZiAeL6qGKM5pjPL0PbDobV3kM8JA82rEPJK441eP1omacvHEjw1ZXe+W+FrJKnYIQa/22sq8xEX53CumOD+A4O7b0ywLjBMd3NY3AlIEYvKdEHTdkD2TyuMOmITJgev33CL+DU9GlHt8HshWdW8Xrn+QgzReAdINHkN9ZknYSBx8NBtjr9g3synPZt0jr/NJhDZfVOkXbPEPZ93VhWYqKz0ZuSmhCEJ8Hjz/Etx1CliQhKxRSki2yqYiFEVT5jBAZTnp6fh9gXtVwWMK53fWOfSwSjX5JDjQlbd3i/vaqhb57xUDihOnK5zK+grapEXgQYzqhOUm6AGicKPa8TUKTso/Q/1AQP4dUTFNy8HLkFZcwPZHxNeFUBg9KJPtHSBM83E5M0eIAqRRZgB6Xa2P9wmFjKOlNlZ2lj6fL/Mb1DPFfIkjCGvOOMIoWwuiQ2GAHGYNLkH0bEZyuzvD545z4LgqcNLRA7lEkSng/J+/ZALnnyGCNp6Xshb4DSUKWV7BsdCJ9bvNSHIDYTtbolorW2cT/h+ViqqKvn3LbHYs9XxPQNq/g/hZUYpzQWHDmc2/CDwPu6UOEsIHgxp4RQvWMeZ4vJmzS3VO+xj6OGj8W/OiM5gNM8nYRhkrDmBqZ/iDS3zkJatJPyDXvDxG88B3O124QzVCDQ8owzu0oqG6iBVIJjqRoIV9IqoC4tj7Kax2eIeNEhZyHZDN11ZTzVD9XY3LdPYF5ysRqwYzi1QiCAAQ+JLg4TIQ1bbxua7E0fF1Yg6//BPcPxqlxpDpPhLAktE0w84nN/bE0rdp9wijSssevJZvQzkVS/EyRMCaLRrras8WEqgCzU+wfCfbHObSHOeEYkdzTAmz8NwvzmhWB0SEwGYHeAx2vCWYIp4GPIizyo8Vkc4TiWuUqxxzhcA+nxmRkL8uGgUwrw5X30YHSN9e3E6a5mWM/iN+7NWeWna68sX+v1MuyAmrKTMv3/pns2VN5LmDKvIPiV18qYG/6dkWqkUrGeVCx1umobV4sMaahCDvb+RkF5tWs0FOkAgSVLxCNZR+qH9zouAFBAHknhXGtkc1xkGjc50VgmsXMU+ss8p5IobjmhAhMs/iaw5RMA0y+ubN4bZahi5TaH6wyI8QaExctHDawqEDsK0yVruFnd1N1HZqy1Dpr9nxTHGdwzj1D2UXvkD7/JutDg3EJaJvzKr27YYwTG9VRVJ2whnbCuHEehKwhh1Bth7fAo/NQh/aoN/n7lMYGpnWgCDSatfAa6WqiQnh6RHlNSMJXKi0h3UK+h3lte0V//6QQF/aYzPNMZdtdU16z15G7RQqb7BCqbuL2TBin5GfdlWoYzOhglRuqfJ5b8xytF/L9KHkP4mJ/7m93R23zIr6n5XEZJ/C4SO/VxLbCGHD+sV0NrjdkV3GpHPERRQulcNm8XTFN0cY1HlFbszHsHHsX0mXluYrcjxzgmT5T0W77FNcENMQTHfuYKwLZ5zl+d/uIw08U9CU356m8MM83hW/ERQOFZJri/hDTqS1Gfj4F5ew02ibq31b0Ei+3KcBZZThVP6AeGSagup9aQ9onDsXhNLKesj0ku8FMiz2/T83G71qr8FUl0w8L5dBWIPlLQvOncZpJy5ENVWXGO/aB83iY/j/xcK2uiLlWdovKU2L0+yXlG/cr2zUX6gWrjSYxz36kSwAPnMdMu2K1sErSNM7f4KW03ANGc6WMpxakN2xALlUSEHryVA7uc6miTVvHa0Ko0FT7CRNitlT0AzPtBwnNn+bcKI3sNHs7SP8FrBHm+4qna/VdipcvtX/CvwNeyPm+9qyyXddmtFg8D62XuEYAh5Z7W5QfRjFDwfQFL9jXPHgwJ8kLnGfNEy/SmQ7tsww9yVKKTxJJ1W/cRNHm7wnehybxdZJhD7+gIPwH4UCtHPs620EjyQpTkmScU6bP7R7TajGV8o2ZpLNQlku8frFSANdqm5G0/qjnN+DEOOC+gKpfVBYmynE5XVjIi3qLIyNKO/TEUF4LrYSFVdbYgaQyCMHSA+eWlo79wIFtYg6edxzGuVtpYeYQxAmpeDdBS0V1NILAxKnxau9Ups9FGa0dHPHcGfXHcV4ITApCEm6WGzm0is/oHJGCn8rZ2voDuWXdSCv0xNA844gLeFw2JnR9rdm6hUdziLkYkZPnDY0Ppv4oFi0cGSFs7r4KayEqHq+RdwaMLq6DXyVB73IKQkvSrD17LjIXpcE4C8AZDjJG7CgM7FDK3nSK68Fku5Uw9DwAZq9BDu3TDD0xJIfNyD0xQK3gHdkfcvGO8sa5kLXIVym6/0H/5hgn9wPhpV9Mpt2+Bp65xmGxbYXnsoLnE34haWXFeo6vEavIiYtkikP+w4V5QQNdmPED2pzp5JwsJmTu/y+H9mmHnhgMSQNJVA4k/z2/XTS/chrlThSkG40CnAs+VSPP/TNFmyhHC7+jIDlKKtpm3AZJmHRgm/+1SNlIsXYrZZd56CxyP39JG8hIg9CTdZTt8xJ6YjAUgDP4QyifxwpxGGcP1oQ2VzDVUkwvl94tZ0glZaqcn56XQteTue/GajDO4pflMQoygsDGjUrwcOKZRunln4XN+988X0jwCnMJSs9L6InBUAAsQY05HTtCKlYmoHXGCUOZUufrJaozGxSQpKM7VPGeaTkRIAAdwduozN1HJgaMFPlLkf80yTCFYzxeED8ht5JUeQo9MRgAWEj+kdfBs/axOibT/xbjZC0UJd72iNHH1DpfMy0iPhuYtJPMb3yv5MX1hnGW4gthpNC+cO6B+DhUs3g6gb5xAO9jXGdHstATQ/0B7yKOa9rm+B7iaIB9xRGoGFAWWsV4z5+3ZRNZsHk4Ib7RRPrsQlVzW8dimURBRhK4dL/r0BdiI7fzcPP4ozBPLSz0xJBXbMF0RY7HH+ecE2UBdyr5Lk785rQoKd4MiWvoyEk7U9vYh3ivRgoqyT/m0McPPHuwiFtzOXu10BND3oEqR4PyOHBmZMj+FCefbul5ZhzHoFoz02oyTEVO9MHa/foUJHB3BdL8HZ9nxlnQQFEw+GNl+608WjjQfi91aG+hJ/5gpU2BE5DwY5Ocjl2Vfo83dkQXfL+OGWcbRZs45ciQrrRDQmMdy89Lk+TEqwwjyyg4A9VKDz4AISfw/Fpb2d5CT/zCapsCJ+AYJa+Wkzjm2l7iEATEMdPOY+32jRp75hpGFElA5TnuzB+nJzhW8A1VLoAWnk26Ngm0Lxk3oGm6mI0t9MQYp29a1wnklkP1AMqnBSWOxgnzZG/5dz2baQvCUlxErc06lvTx8M1hlBS9jgXfkgfMkUmMm1y7jQdj35/ccnJa6Il/0JhqkTml2qWzkqrr+BYFuanfF4FOK2gja9aT5OYEmClYE1wg6fd+GLUJ//7RmBqnMc4AyyJom9uIEJc0YPY9g4Li17llnABS920es826VR4zEre71CvEGe9RZKEnvkGTzWU+BeEItYS/UmANOVvZHoIt6hzuSboi49XUOiMzTgoSnXSM2X/tmCemz4XmvYGiaZR0rRMovVrMZ/DYr0Gu4qgNWng4/xpmXk0njgaRyjd26OPUPEnjdYQFijbdPH2vXIFkJi87tN+dgqIQecLkGL/FEc3QONo8b9Qf19ga6eagLJVjyL344+AUxw3BLlZMp48vuMZj6vMqjhfxlgMc2t/lqK0a0sM8RRtI3bVYHQXWkGMdhVQw351zdM9x0+/FcTSpRTOtNrVopdCf32Qw9l+UyTvsPeOEl5Pm8PfTKo13B9J7AgNzmYYbf/IW7yvbbVej84GA8f90aA9r0u2kKxCeOVgjXEXxstTE8SitRcb5Y2W7D8tom4gF7pXB2CHwXpBXxrl3hpqBK/DyTyKd+zWAvIvHUXaVZAzx8V7G6zgPuJ7pEYf2W2ekQSSFNApMw3mrFr3nD1C2m9UM02zpqJjExWC+5rZ5ZJyDle2qcT4IT8GeDu2xeUwz3uQ14O33T0W7g2p4ThBrjKxALlaeUygogJAHpOHA8wJrs8traVEww/mxCEUaBWJWM38byrSNok+cHWsy0cFf5cK8MU7krNWeFWZdjQEbo4uJdTqlU1vOkDw0oSU9ZT3XKuYL83QBsgp1yMG9IkFB0hatWjTTjtPu3WG5epkRI1JirMNYtI5og/jaFSvb+MI4u1Bw9qEBgtRfz3CsneWl18KqnuQLLyrbXUTpuc/7gAeZbnB8j67z/SZ5U29KQeustTAUOEj2UzZ/rpnvT5M1EhezmW7i5waB97+VY6poHvaBceI86AXlJAGop5ZVdQHM122OkrKFnuQLk5XtdmO6kvwseZcU4Cg0y6H94aQ/nsmUNyTYFxJkvFQjDLMF01n8z8scunk+pF/sr2cp+xtbpMHC2adJ0cdePIayRTpw+DqMgtyoC+VzkXymmW4MDjVIUQX3bdf4nEczXCvIMNHXoT2cKu5lWj/FMa4kSxWXJGDNgLt8J0VbrG+c+4wS4VADBJRvKMLahkWEDC0d5XMT+TdCm8ZkODewnhwrWoM2mcrVFJz1z/F4DSTpINTIG/uXeX4hpEIJvF1/RdETRIQBTO3hkO9hZtWkUYUQd3uRteAtHityhx+j6GsCt31MimeHMs4by7wUYKAo+bJEPpcW/R+S0yr5XC2/x4F36aJoVfRyb0lBoDCCoZPK9nNvRuvlR0zjHfuAFJO2Fy0W8+XG7xIDXu5JMq8a9BNCMoWZ8rlC/lYQoPAuoDAAPLXbMX1HPuPm0GxXhfnBeT3OorRnXBjzrSKQrvFxAfDmOZ83UQhQ2yfQnc9m2gv4PpsrMLGW7OPdKahGlYQl5VmkNixhyoilPEXZ35gQoQS+JEdS/GMThBoezXRHc4yzObQR+q7HDxov7VsZXAcb2J9JV2vOkH/c4sA4C+hIboXNfQbOcwcy7aFsD+sTql5c6vE9Pl4HjPPAjK8X5tcCAUwT4vcm050hQs+7zIxxnSGKPlF27K4w56W8pwa7NqPrXE5+1fw0ZIvXyEKHygH5Z3FWucyhjwsTYkxpIQmGN5c34bdtuXwFaLa3lWib0PKOU/Y3ujmzKgUWEY0zJiykw8L+kGfG+V5zanTCQNaKE22d1z3OtykoC2RZOtWhfWvZSNf29P6QQcjVd2CKLZOvcQUzus9DLBcaEzAqSzVbkpKvg1jsm5TjDC07lmfGOYqyCenY2Na4gdFI+nqx9QKYtO92aA+Nc7yPN8ab70qKl34vDFNtiXyF2UwTS7RNnHEP1PICCRsqhwlKfrEp0y9rhXHCO3WSrT9DxoCX7FKbhrJAYhCXqh8Icent6b25etca45T3SASRAtOElqlNwwgHo4pZgvg38NrWHu2dKSEyuWacCA0YZmvPUAWAIZjZvjxwdjXEoT02UXjZtvfw3lwY5+ulHqR1igk8D6UhhIjn1VbNiVMODMx5leIaWIsj88w4IaUMcpRoDQYX3AMJ1Kahomb1W4f2qJJ0lYf3hZAULfOz882gQMA3GJ0Uv56g7O8JZsKNUX/Mv/3EQescwWPtkkfGiXhRZN//m60/Q5VxqTDPJpuKZoEg9jcc2sNL9zCfbkjO0bRaZ70zToSZnBxyFgnT/pYZaJsFwAFphaIdyl2enzfGCfs0gsiftP3I4BHzhPVjkU1FKGASQ2iBiycqcuF29uy+NAwQcYBP1+k6+IBpADPMMaVMU7xVRyv7fZj7i52Ni9ugqs/VymsO5TF/VRGrZQ4mHokH4JSxxPYig2d4iIJMWJeIhtRgU/INvC6apzaXKVILIoxgoEfaPTTOuN7V74SEXtQ6UFEG5vqrih2BSoAUptqkIKMdxob39ecU/xwd2Yfg9X2Yz4wT+S+R6PcZ238MHgMltuAMA8cDZL85ioKMW4YA2DyRalJbPWM/EZyv8eFmmAmAIRxsjzUUOE57gII0qA+FZdwp0jY3EcapwT1S/UT7DBfz9ZHURlPa8VBuu4tvjBMH7/dRkD93hq1DQ46A1I8nURDzBUaB2rJIQQfTjsuRCEydyG+8pIQK3y0uojc9nJcmESxw3rmBsg/E/D1BQa5fQ/UB7flDodmyVyP9KTyHo8ZKjlYKmFhPSRQyAOM8jXTe2xc3NDU1Iei4g3SApNPtSgg3h6TTyOyBJNSFhNSwT7eU32BjaCV/rwSk5yoki/9QXgZsOo0UmHaSMMlgXF0U7WD/Lq3Mvh7lK8foYpH8ioEg3nVi9lPYoF2gmTukzXKtlNGF4h9DhD37JIB3pQcF1VXayXsGwgazQt4HpKr7Qj6Xybwvl38nVelGswY+k/WUBDZ21MQjr0c5O9soZv9f8KZfVW995bi/5HHPDelL8w6UQ6GAx9IIyQai3Kt2fKH3qxxDx4g861v4PwEGAPxb/SZEmJVjAAAAAElFTkSuQmCC\"> \n\n\n</div>\n\n<div class=\"main\">\n<div class=\"canvas\">\nHellow world</div></div></body>\n</html>"
part of my code:
fileOutputStream = new FileOutputStream(file);
Document document = new Document();
PdfWriter.getInstance(document, fileOutputStream);
document.open();
HTMLWorker htmlWorker = new HTMLWorker(document);
StringReader stringReader = new StringReader(htmlCode);
htmlWorker.parse(stringReader);
document.close();
fileOutputStream.close();
any help will be appricated
thanks
Please stop using HTMLWorker, as repeated many times on StackOverflow, the HTMLWorker class has been abandoned in favor of XML Worker a long time ago. We won't invest in further development of HTMLWorker so it's a very bad choice to use it. Please switch to XML Worker.
Also upgrade to the latest iText version, the version you are using dates from February 4, 2011, many bugs have been fixed in the 4 years that have passed. Make sure you have both the iText jar and the XML Worker jar with the same version number.
Base64 images aren't supported yet, but I have made you a very simple Proof of Concept, showing how easy it is to add support for such images. Take a look at the ParseHtml4 example and the resulting PDF: html_4.pdf.
To achieve this, you need to write an implementation of the ImageProvider interface. I have done this by extending the AbstractImageProvider class:
class Base64ImageProvider extends AbstractImageProvider {
#Override
public Image retrieve(String src) {
int pos = src.indexOf("base64,");
try {
if (src.startsWith("data") && pos > 0) {
byte[] img = Base64.decode(src.substring(pos + 7));
return Image.getInstance(img);
}
else {
return Image.getInstance(src);
}
} catch (BadElementException ex) {
return null;
} catch (IOException ex) {
return null;
}
}
#Override
public String getImageRootPath() {
return null;
}
}
As you can see, I check for the existence of "base64," in whatever is passed to XML Worker through the src attribute of the img tag. If that String is present, I decode whatever follows that "base64," and I return an Image object that is created using the resulting bytes.
Once you have this ImageProvider implementation, it's only a matter of passing it to XML Worker.

How to read shapes group as an image from Word document(.doc or .docx) using apachePOI?

I have a simple requirement to extract all the Images and Diagrams drawn in MS Word file.
I am able to extract only images but not group of shapes(like Use Case Diagram or Activity Diagram). I want to save all the Diagrams as Image.
I have used apachePOI.
Following code I have written
public class worddocreader {
public static void main(String args[]) {
FileInputStream fis;
try {
FileInputStream fs = new FileInputStream("F:/1.docx");
XWPFDocument docx = new XWPFDocument(fs);
List<XWPFPictureData> piclist = docx.getAllPictures();
Iterator<XWPFPictureData> iterator = piclist.iterator();
int i = 0;
while (iterator.hasNext()) {
XWPFPictureData pic = iterator.next();
byte[] bytepic = pic.getData();
BufferedImage imag = ImageIO.read(new ByteArrayInputStream(
bytepic));
ImageIO.write(imag, "image/jpeg", new File("F:/docParsing/imagefromword" + i + ".jpg"));
i++;
}
ArrayList<PackagePart> packArrayList = docx.getPackageRelationship().getPackage().getParts();
int size = packArrayList.size();
System.out.println("Array List Size : " + packArrayList.size());
while (size-->0) {
PackagePart packagePart = packArrayList.get(size);
System.out.println(packagePart.getContentType());
try{
BufferedImage bfrImage = ImageIO.read(packagePart.getInputStream());
ImageIO.write(bfrImage,"image/png",new File("F:/docParsing_emb/size"+size+".png"));
}catch(Exception e){
e.printStackTrace();
}
}
System.out.println("Done");
} catch (Exception e) {
e.printStackTrace();
}
}
}
It only extract Images not Shapes.
Does anybody knows How do I do this ?
So you are after the stuff defined in [MS-ODRAW], i.e. so-called OfficeDrawings which can be created directly in Word using its Drawing palette?
Unfortunately, POI offers only little help here. With HWPF (the old binary *.doc file format) you can get a handle to such data like so:
HWPFDocument document;
OfficeDrawings officeDrawings = document.getOfficeDrawingsMain();
OfficeDrawing drawing = officeDrawings.getOfficeDrawingAt(OFFSET);
// OFFSET is a global character offset describing the position of the drawing in question
// i.e. document.getRange().getStartOffset() + x
This drawing can then be further processed into individual records:
EscherRecordManager escherRecordManager = new EscherRecordManager(drawing.getOfficeArtSpContainer());
EscherSpRecord escherSpRecord = escherRecordManager.getSpRecord();
EscherOptRecord escherOptRecord = escherRecordManager.getOptRecord();
Using the data from all these records you can theoretically render out the original drawing again. But it's rather painful...
So far I've only done this in a single case where I had lots of simple arrows floating around on a page. Those had to be converted to a textual representation (something like: "Positions (x1, y1) and (x2, y2) are connected by an arrow"). Doing this essentially meant to implement a subset of [MS-ODRAW] relevant to those arrows using the above-mentioned records. Not exactly a pleasant task.
MS Word backup solution
If using MS Word itself is an option to you, then there is another pragmatic way:
extract all relevant offsets that contain OfficeDrawings using POI.
Inside Word: Iterate over the document with VBA and copy all the drawings at the given offsets to the clipboard.
Use some other application (I chose Visio) to dump the clipboard contents into a PNG.
The necessary check for a drawing in step 1 is very simple (see below). The rest can be completely automated in Word. If anyone is in need, I can share the respective VBA code.
if (characterRun.isSpecialCharacter()) {
for (char currentChar : characterRun.text().toCharArray()) {
if ('\u0008' == currentChar) return true;
}
}
If you mean Office Art objects then
In the class org.apache.poi.hwpf.HWPFDocument
there is a _officeDrawingsMain that contains the office art objects
check this link https://poi.apache.org/apidocs/org/apache/poi/hwpf/HWPFDocument.html

Creating tables in a MS Word file using Java

I want to create a table in a Microsoft Office Word file using Java. Can anybody tell me how to do it with an example?
Have a look at Apache POI
The POI project is the master project
for developing pure Java ports of file
formats based on Microsoft's OLE 2
Compound Document Format. OLE 2
Compound Document Format is used by
Microsoft Office Documents, as well as
by programs using MFC property sets to
serialize their document objects.
I've never seen it done, and I work in Word a lot. If you really want to programatically do something in a word document then I'd advise using Microsoft's scripting language VBA which is specifically designed for this purpose. In fact, I'm working in it right now.
If you're working under Open Office then they have a very similar set of macro-powered tools for doing the same thing.
Office 2003 has an xml format, and the default document format for office 2007 is xml (zipped). So you could just generate xml from java. If you open an existing document it's not too hard too see the xml required.
Alternatively, you could use openoffice's api to generate a document, and save it as a ms-word document.
This snippet can be used to create a table dynamically in MS Word document.
WPFDocument document = new XWPFDocument();
XWPFTable tableTwo = document.createTable();
XWPFTableRow tableTwoRowOne = tableTwo.getRow(0);
tableTwoRowOne.getCell(0).setText(Knode1);
tableTwoRowOne.createCell().setText(tags.get("node1").toString());
for (int i = 1; i < nodeList.length; i++) {
String node = "node";
String nodeVal = "";
XWPFTableRow tr = null;
node = node + (i + 1);
nodeVal = tags.get(node).toString();
if (tr == null) {
tr = tableTwo.createRow();
tr.getCell(0).setText(nodeList[i]);
tr.getCell(1).setText(tags.get(node).toString());
}
}
Our feature set is to hit a button in our web app and get the page you are looking at back as a Word document. We use the docx schema for description of documents and have a bunch of Java code on the server side which does the document creation and response back to our web client. The formatting itself is done with some compiled xsl-t's from within Java to translate from our own XML persistence tier.
The docx schema is pretty hard to understand. The way we made most progress was to create template docx's in Word with exactly the formatting that we needed but with bogus content. We then fooled around with them until we understood exactly what was going on. There is a huge amount in the docx that you don't really need to worry about. When reading / translating the docx Word is pretty tolerant to a partially complete formatting schema. In fact we chose to strip out pretty much all the formatting because it also means that the user's default formatting takes precedence, which they seem to prefer. It also makes the xsl process faster and the resulting document smaller.
I manage the docx4j project
docx4j contains a class TblFactory, which creates regular tables (ie no row or column spans), with the default settings which Word 2007 would create, and with the dimensions specified by the user.
If you want a more complex table, the easiest approach is to create it in Word, then copy the resulting XML into a String in your IDE, where you can use docx4j's XmlUtils.unmarshalString to create a Tbl object from it.
Using my little zip utility, you can create docx with ease, if you know what you're doing. Word's DOCX file format is simply zip (folders with xml files). By using java zip utilities, you can modify existing docx, just the content part.
For the following sample to work, simply open Word, enter few lines, save document. Then with zip program, remove file word/document.xml (this is file where main content of the Word document is residing) from the zip. Now you have the template prepared. Save modified zip.
Here is what creation of new Word file looks:
/* docx file head */
final String DOCUMENT_XML_HEAD =
"<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\" ?>" +
"<w:document xmlns:wpc=\"http://schemas.microsoft.com/office/word/2010/wordprocessingCanvas\" xmlns:mc=\"http://schemas.openxmlformats.org/markup-compatibility/2006\" xmlns:o=\"urn:schemas-microsoft-com:office:office\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" xmlns:m=\"http://schemas.openxmlformats.org/officeDocument/2006/math\" xmlns:v=\"urn:schemas-microsoft-com:vml\" xmlns:wp14=\"http://schemas.microsoft.com/office/word/2010/wordprocessingDrawing\" xmlns:wp=\"http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing\" xmlns:w10=\"urn:schemas-microsoft-com:office:word\" xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:w14=\"http://schemas.microsoft.com/office/word/2010/wordml\" xmlns:w15=\"http://schemas.microsoft.com/office/word/2012/wordml\" xmlns:wpg=\"http://schemas.microsoft.com/office/word/2010/wordprocessingGroup\" xmlns:wpi=\"http://schemas.microsoft.com/office/word/2010/wordprocessingInk\" xmlns:wne=\"http://schemas.microsoft.com/office/word/2006/wordml\" xmlns:wps=\"http://schemas.microsoft.com/office/word/2010/wordprocessingShape\" mc:Ignorable=\"w14 w15 wp14\">" +
"<w:body>";
/* docx file foot */
final String DOCUMENT_XML_FOOT =
"</w:body>" +
"</w:document>";
final ZipOutputStream zos = new ZipOutputStream(new FileOutputStream("c:\\TEMP\\test.docx"));
final String fullDocumentXmlContent = DOCUMENT_XML_HEAD + "<w:p><w:r><w:t>Hey MS Word, hello from java.</w:t></w:r></w:p>" + DOCUMENT_XML_FOOT;
final si.gustinmi.DocxZipCreator creator = new si.gustinmi.DocxZipCreator();
// create new docx file
creator.createDocxFromExistingDocx(zos, "c:\\TEMP\\existingDocx.docx", fullDocumentXmlContent);
These are zip utilities:
package si.gustinmi;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.util.logging.Logger;
import java.util.zip.CRC32;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import java.util.zip.ZipOutputStream;
/**
* Creates new docx from existing one.
* #author gustinmi [at] gmail [dot] com
*/
public class DocxZipCreator {
public static final Logger log = Logger.getLogger(DocxZipCreator.class.getCanonicalName());
private static final int BUFFER_SIZE = 4096;
/** OnTheFly zip creator. Traverses through existing docx zip and creates new one simultaneousl.
* On the end, custom document.xml is inserted inside
* #param zipFilePath location of existing docx template (without word/document.xml)
* #param documentXmlContent content of the word/document.xml
* #throws IOException
*/
public void createDocxFromExistingDocx(ZipOutputStream zos, String zipFilePath, String documentXmlContent) throws IOException {
final FileInputStream fis = new FileInputStream(zipFilePath);
final ZipInputStream zipIn = new ZipInputStream(fis);
try{
log.info("Starting to create new docx zip");
ZipEntry entry = zipIn.getNextEntry();
while (entry != null) { // iterates over entries in the zip file
copyEntryfromZipToZip(zipIn, zos, entry.getName());
zipIn.closeEntry();
entry = zipIn.getNextEntry();
}
// add document.xml to existing zip
addZipEntry(documentXmlContent, zos, "word/document.xml");
}finally{
zipIn.close();
zos.close();
log.info("End of docx creation");
}
}
/** Copies sin gle entry from zip to zip */
public void copyEntryfromZipToZip(ZipInputStream is, ZipOutputStream zos, String entryName)
{
final byte [] data = new byte[BUFFER_SIZE];
int len;
int lenTotal = 0;
try {
final ZipEntry entry = new ZipEntry(entryName);
zos.putNextEntry(entry);
final CRC32 crc32 = new CRC32();
while ((len = is.read(data)) > -1){
zos.write(data, 0, len);
crc32.update(data, 0, len);
lenTotal += len;
}
entry.setSize(lenTotal);
entry.setTime(System.currentTimeMillis());
entry.setCrc(crc32.getValue());
}
catch (IOException ioe){
ioe.printStackTrace();
}
finally{
try { zos.closeEntry();} catch (IOException e) {}
}
}
/** Create new zip entry with content
* #param content content of a new zip entry
* #param zos
* #param entryName name (npr: word/document.xml)
*/
public void addZipEntry(String content, ZipOutputStream zos, String entryName)
{
final byte [] data = new byte[BUFFER_SIZE];
int len;
int lenTotal = 0;
try {
final InputStream is = new ByteArrayInputStream(content.getBytes(StandardCharsets.UTF_8));
final ZipEntry entry = new ZipEntry(entryName);
zos.putNextEntry(entry);
final CRC32 crc32 = new CRC32();
while ((len = is.read(data)) > -1){
zos.write(data, 0, len);
crc32.update(data, 0, len);
lenTotal += len;
}
entry.setSize(lenTotal);
entry.setTime(System.currentTimeMillis());
entry.setCrc(crc32.getValue());
}
catch (IOException ioe){
ioe.printStackTrace();
}
finally{
try { zos.closeEntry();} catch (IOException e) {}
}
}
}
Office Writer would be a better tool to use than POI for your requirement.
If all you want is a simple table without too much of formatting, I would use this simple trick. Use Java to generate the table as HTML using plain old table,tr,td tags and copy the rendered HTML table into the word document ;)
Click here for a Working example with source code.
This example generates MS-Word docs from Java, based on a template concept.

Categories