How to find blank pages inside a PDF using PDFBox? - java

Here is the challenge I'm currently facing.
I have a lot of PDFs and I have to remove the blank pages inside them and display only the pages with content (text or images).
The problem is that those pdfs are scanned documents.
So the blank pages have some dirty left behind by the scanner.

I did some research and ended up with this code that checks for 99% of the page as white or light gray.
I needed the gray factor as the scanned documents sometimes are not pure white.
private static Boolean isBlank(PDPage pdfPage) throws IOException {
BufferedImage bufferedImage = pdfPage.convertToImage();
long count = 0;
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
Double areaFactor = (width * height) * 0.99;
for (int x = 0; x < width ; x++) {
for (int y = 0; y < height ; y++) {
Color c = new Color(bufferedImage.getRGB(x, y));
// verify light gray and white
if (c.getRed() == c.getGreen() && c.getRed() == c.getBlue()
&& c.getRed() >= 248) {
count++;
}
}
}
if (count >= areaFactor) {
return true;
}
return false;
}

#Shoyo's code works fine for PDFBox version < 2.0. For future readers, there's no much change but, just in case, here is the code for PDFBOX 2.0+ to make your life easier.
In your main (By main, I mean the place where you are loading your PDF into PDDocument) method:
try {
PDDocument document = PDDocument.load(new File("/home/codemantra/Downloads/tetml_ct_access/C.pdf"));
PDFRenderer renderedDoc = new PDFRenderer(document);
for (int pageNumber = 0; pageNumber < document.getNumberOfPages(); pageNumber++) {
if(isBlank(renderedDoc.renderImage(pageNumber))) {
System.out.println("Blank Page Number : " + pageNumber + 1);
}
}
} catch (Exception e) {
e.printStackTrace();
}
And isBlank method will just have BufferedImage passed in:
private static Boolean isBlank(BufferedImage pageImage) throws IOException {
BufferedImage bufferedImage = pageImage;
long count = 0;
int height = bufferedImage.getHeight();
int width = bufferedImage.getWidth();
Double areaFactor = (width * height) * 0.99;
for (int x = 0; x < width; x++) {
for (int y = 0; y < height; y++) {
Color c = new Color(bufferedImage.getRGB(x, y));
if (c.getRed() == c.getGreen() && c.getRed() == c.getBlue() && c.getRed() >= 248) {
count++;
}
}
}
if (count >= areaFactor) {
return true;
}
return false;
}
All the credits goes to #Shoyo
Update:
Some PDFs have "This Page was Intentionally Left Blank" to which the above code considers as blank. If this is your requirement then feel free to use the above code. But, my requirement was only to filter out the pages that were completely blank (No any images present nor consisting of any fonts). So, I ended up using this code (Plus this code runs faster :P) :
public static void main(String[] args) {
try {
PDDocument document = PDDocument.load(new File("/home/codemantra/Downloads/CTP2040.pdf"));
PDPageTree allPages = document.getPages();
Integer pageNumber = 1;
for (PDPage page : allPages) {
Iterable<COSName> xObjects = page.getResources().getXObjectNames();
Iterable<COSName> fonts = page.getResources().getFontNames();
if(xObjects.spliterator().getExactSizeIfKnown() == 0 && fonts.spliterator().getExactSizeIfKnown() == 0) {
System.out.println(pageNumber);
}
pageNumber++;
}
} catch (Exception e) {
e.printStackTrace();
}
}
This will return the page numbers of those pages which are completely blank.
Hope this helps someone! :)

#Pramesh Bajracharya, Your solution to find a blank page in a PDF document is intact!
If in case the requirement is to remove the blank pages the same code can be enhanced as below
List<Integer> blankPageList = new ArrayList<Integer>();
for( PDPage page : allPages )
{
Iterable<COSName> xObjects = page.getResources().getXObjectNames();
Iterable<COSName> fonts = page.getResources().getFontNames();
// condition to determine if the page is a blank page
if( xObjects.spliterator().getExactSizeIfKnown() == 0 && fonts.spliterator().getExactSizeIfKnown() == 0 )
{
pageRemovalList.add( pageNumber );
}
pageNumber++;
}
// remove the blank pages from the pdf document using the blank page numbers list
for( Integer i : blankPageList )
{
document.removePage( i );
}

http://www.rgagnon.com/javadetails/java-detect-and-remove-blank-page-in-pdf.html
import java.io.ByteArrayOutputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import com.itextpdf.text.Document;
import com.itextpdf.text.DocumentException;
import com.itextpdf.text.io.RandomAccessSourceFactory;
import com.itextpdf.text.pdf.PdfCopy;
import com.itextpdf.text.pdf.PdfDictionary;
import com.itextpdf.text.pdf.PdfImportedPage;
import com.itextpdf.text.pdf.PdfName;
import com.itextpdf.text.pdf.PdfReader;
import com.itextpdf.text.pdf.RandomAccessFileOrArray;
public class RemoveBlankPageFromPDF {
// value where we can consider that this is a blank image
// can be much higher or lower depending of what is considered as a blank page
public static final int BLANK_THRESHOLD = 160;
public static void removeBlankPdfPages(String source, String destination)
throws IOException, DocumentException
{
PdfReader r = null;
RandomAccessSourceFactory rasf = null;
RandomAccessFileOrArray raf = null;
Document document = null;
PdfCopy writer = null;
try {
r = new PdfReader(source);
// deprecated
// RandomAccessFileOrArray raf
// = new RandomAccessFileOrArray(pdfSourceFile);
// itext 5.4.1
rasf = new RandomAccessSourceFactory();
raf = new RandomAccessFileOrArray(rasf.createBestSource(source));
document = new Document(r.getPageSizeWithRotation(1));
writer = new PdfCopy(document, new FileOutputStream(destination));
document.open();
PdfImportedPage page = null;
for (int i=1; i<=r.getNumberOfPages(); i++) {
// first check, examine the resource dictionary for /Font or
// /XObject keys. If either are present -> not blank.
PdfDictionary pageDict = r.getPageN(i);
PdfDictionary resDict = (PdfDictionary) pageDict.get( PdfName.RESOURCES );
boolean noFontsOrImages = true;
if (resDict != null) {
noFontsOrImages = resDict.get( PdfName.FONT ) == null &&
resDict.get( PdfName.XOBJECT ) == null;
}
System.out.println(i + " noFontsOrImages " + noFontsOrImages);
if (!noFontsOrImages) {
byte bContent [] = r.getPageContent(i,raf);
ByteArrayOutputStream bs = new ByteArrayOutputStream();
bs.write(bContent);
System.out.println
(i + bs.size() + " > BLANK_THRESHOLD " + (bs.size() > BLANK_THRESHOLD));
if (bs.size() > BLANK_THRESHOLD) {
page = writer.getImportedPage(r, i);
writer.addPage(page);
}
}
}
}
finally {
if (document != null) document.close();
if (writer != null) writer.close();
if (raf != null) raf.close();
if (r != null) r.close();
}
}
public static void main (String ... args) throws Exception {
removeBlankPdfPages
("C://temp//documentwithblank.pdf", "C://temp//documentwithnoblank.pdf");
}
}

Related

PDFBOX 2.0+ java flatten annotations freetext created by foxit

I ran into a very tough issue. We have forms that were supposed to be filled out, but some people used annotation freeform text comments in foxit instead of filling the form fields, so the annotations never flatten. When our render software generates the final document annotations are not included.
The solution I tried is to basically go through the document, get the annotation text content and write it to the pdf so it is on the final document then remove the actual annotation, but I run into an issue where I don't know the font the annotation is using, line space, etc so cannot find out how to get it from a pdfbox to recreate exacactly as the annotation looks on the unflattened form.
Basically I want to flatten annotatations that are freeform created in foxit (The typewriter comment feature)
Here is the code. It is working, but again I am struggling with figuring out how to get the annotations to write to my final pdf document. Again flatten on the acroform is not working because these are not acroform fields! The live code filters out anything that is not a freetext type annotation, but below code should show my issue.
public static void main(String [] args)
{
String startDoc = "C:/test2/test.pdf";
String finalFlat = "C:/test2/test_FLAT.pdf";
try {
// for testing
try {
//BasicConfigurator.configure();
File myFile = new File(startDoc);
PDDocument pdDoc = PDDocument.load( myFile );
PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
// set the NeedApperances flag
pdAcroForm.setNeedAppearances(false);
// correct the missing page link for the annotations
for (PDPage page : pdDoc.getPages()) {
for (PDAnnotation annot : page.getAnnotations()) {
System.out.println(annot.getContents());
System.out.println(annot.isPrinted());
System.out.println(annot.isLocked());
System.out.println(annot.getAppearance().toString());
PDPageContentStream contentStream = new PDPageContentStream(pdDoc, page, PDPageContentStream.AppendMode.APPEND,true,true);
int fontHeight = 14;
contentStream.setFont(PDType1Font.TIMES_ROMAN, fontHeight);
float height = annot.getRectangle().getLowerLeftY();
String s = annot.getContents().replaceAll("\t", " ");
String ss[] = s.split("\\r");
for(String sss : ss)
{
contentStream.beginText();
contentStream.newLineAtOffset(annot.getRectangle().getLowerLeftX(),height );
contentStream.showText(sss);
height = height + fontHeight * 2 ;
contentStream.endText();
}
contentStream.close();
page.getAnnotations().remove(annot);
}
}
pdAcroForm.flatten();
pdDoc.save(finalFlat);
pdDoc.close();
}
catch (Exception e) {
e.printStackTrace();
}
}
catch (Exception e) {
System.err.println("Exception: " + e.getLocalizedMessage());
}
}
This was not a fun one. After a million different tests, and I STILL do not understand all the nuances, but this is the version that appeas to flatten all pdf files and annotations if they are visible on PDF. Tested about half a dozen pdf creators and if an annotation is visible on a page this hopefully flattens it. I suspect there is a better way by pulling the matrix and transforming it and what not, but this is the only way I got it to work everywhere.
public static void flattenv3(String startDoc, String endDoc) {
org.apache.log4j.Logger.getRootLogger().setLevel(org.apache.log4j.Level.INFO);
String finalFlat = endDoc;
try {
try {
//BasicConfigurator.configure();
File myFile = new File(startDoc);
PDDocument pdDoc = PDDocument.load(myFile);
PDDocumentCatalog pdCatalog = pdDoc.getDocumentCatalog();
PDAcroForm pdAcroForm = pdCatalog.getAcroForm();
if (pdAcroForm != null) {
pdAcroForm.setNeedAppearances(false);
pdAcroForm.flatten();
}
// set the NeedApperances flag
boolean isContentStreamWrapped;
int ii = 0;
for (PDPage page: pdDoc.getPages()) {
PDPageContentStream contentStream;
isContentStreamWrapped = false;
List < PDAnnotation > annotations = new ArrayList < > ();
for (PDAnnotation annotation: page.getAnnotations()) {
if (!annotation.isInvisible() && !annotation.isHidden() && annotation.getNormalAppearanceStream() != null)
{
ii++;
if (ii > 1) {
// contentStream.close();
// continue;
}
if (!isContentStreamWrapped) {
contentStream = new PDPageContentStream(pdDoc, page, AppendMode.APPEND, true, true);
isContentStreamWrapped = true;
} else {
contentStream = new PDPageContentStream(pdDoc, page, AppendMode.APPEND, true);
}
PDAppearanceStream appearanceStream = annotation.getNormalAppearanceStream();
PDFormXObject fieldObject = new PDFormXObject(appearanceStream.getCOSObject());
contentStream.saveGraphicsState();
boolean needsTranslation = resolveNeedsTranslation(appearanceStream);
Matrix transformationMatrix = new Matrix();
boolean transformed = false;
float lowerLeftX = annotation.getNormalAppearanceStream().getBBox().getLowerLeftX();
float lowerLeftY = annotation.getNormalAppearanceStream().getBBox().getLowerLeftY();
PDRectangle bbox = appearanceStream.getBBox();
PDRectangle fieldRect = annotation.getRectangle();
float xScale = fieldRect.getWidth() - bbox.getWidth();
transformed = true;
lowerLeftX = fieldRect.getLowerLeftX();
lowerLeftY = fieldRect.getLowerLeftY();
if (bbox.getLowerLeftX() <= 0 && bbox.getLowerLeftY() < 0 && Math.abs(xScale) < 1) //BASICALLY EQUAL TO 0 WITH ROUNDING
{
lowerLeftY = fieldRect.getLowerLeftY() - bbox.getLowerLeftY();
if (bbox.getLowerLeftX() < 0 && bbox.getLowerLeftY() < 0) //THis is for the o
{
lowerLeftX = lowerLeftX - bbox.getLowerLeftX();
}
} else if (bbox.getLowerLeftX() == 0 && bbox.getLowerLeftY() < 0 && xScale >= 0) {
lowerLeftX = fieldRect.getUpperRightX();
} else if (bbox.getLowerLeftY() <= 0 && xScale >= 0) {
lowerLeftY = fieldRect.getLowerLeftY() - bbox.getLowerLeftY() - xScale;
} else if (bbox.getUpperRightY() <= 0) {
if (annotation.getNormalAppearanceStream().getMatrix().getShearY() < 0) {
lowerLeftY = fieldRect.getUpperRightY();
lowerLeftX = fieldRect.getUpperRightX();
}
} else {
}
transformationMatrix.translate(lowerLeftX,
lowerLeftY);
contentStream.transform(transformationMatrix);
contentStream.drawForm(fieldObject);
contentStream.restoreGraphicsState();
contentStream.close();
}
}
page.setAnnotations(annotations);
}
pdDoc.save(finalFlat);
pdDoc.close();
File file = new File(finalFlat);
// Desktop.getDesktop().browse(file.toURI());
} catch (Exception e) {
e.printStackTrace();
}
} catch (Exception e) {
System.err.println("Exception: " + e.getLocalizedMessage());
}
}
}

PDFBox: put two A4 pages on one A3

I have a pdf document with one or more pages A4 paper.
The resulting pdf document should be A3 paper where each page contains two from the first one (odd on the left, even on the right side).
I already got it to render the A4 pages into images and the odd pages are successfully placed on the first parts of a new A3 pages but I cannot get the even pages to be placed.
public class CreateLandscapePDF {
public void renderPDF(File inputFile, String output) {
PDDocument docIn = null;
PDDocument docOut = null;
float width = 0;
float height = 0;
float posX = 0;
float posY = 0;
try {
docIn = PDDocument.load(inputFile);
PDFRenderer pdfRenderer = new PDFRenderer(docIn);
docOut = new PDDocument();
int pageCounter = 0;
for(PDPage pageIn : docIn.getPages()) {
pageIn.setRotation(270);
BufferedImage bufferedImage = pdfRenderer.renderImage(pageCounter);
width = bufferedImage.getHeight();
height = bufferedImage.getWidth();
PDPage pageOut = new PDPage(PDRectangle.A3);
PDImageXObject image = LosslessFactory.createFromImage(docOut, bufferedImage);
PDPageContentStream contentStream = new PDPageContentStream(docOut, pageOut, AppendMode.APPEND, true, true);
if((pageCounter & 1) == 0) {
pageOut.setRotation(90);
docOut.addPage(pageOut);
posX = 0;
posY = 0;
} else {
posX = 0;
posY = width;
}
contentStream.drawImage(image, posX, posY);
contentStream.close();
bufferedImage.flush();
pageCounter++;
}
docOut.save(output + "\\LandscapeTest.pdf");
docOut.close();
docIn.close();
} catch(IOException io) {
io.printStackTrace();
}
}
}
I'm using Apache PDFBox 2.0.2 (pdfbox-app-2.0.2.jar)
Thank you very much for your help and the link to the other question - I think I already read it but wasn't able to use in in my code yet.
But finally the PDFClown made the job, though I think it's not very nice to use PDFBox and PDFClown in the same program.
Anyway here's my working code to combine A4 pages on A3 paper.
public class CombinePages {
public void run(String input, String output) {
try {
Document source = new File(input).getDocument();
Pages sourcePages = source.getPages();
Document target = new File().getDocument();
Page targetPage = null;
int pageCounter = 0;
double moveByX = .0;
for(Page sourcePage : source.getPages()) {
if((pageCounter & 1) == 0) {
//even page gets a blank page
targetPage = new Page(target);
target.setPageSize(PageFormat.getSize(PageFormat.SizeEnum.A3, PageFormat.OrientationEnum.Landscape));
target.getPages().add(targetPage);
moveByX = .0;
} else {
moveByX = .50;
}
//get content from source page
XObject xObject = sourcePages.get(pageCounter).toXObject(target);
PrimitiveComposer composer = new PrimitiveComposer(targetPage);
Dimension2D targetSize = targetPage.getSize();
Dimension2D sourceSize = xObject.getSize();
composer.showXObject(xObject, new Point2D.Double(targetSize.getWidth() * moveByX, targetSize.getHeight() * .0), new Dimension(sourceSize.getWidth(), sourceSize.getHeight()), XAlignmentEnum.Left, YAlignmentEnum.Top, 0);
composer.flush();
pageCounter++;
}
target.getFile().save(output + "\\CombinePages.pdf", SerializationModeEnum.Standard);
source.getFile().close();
} catch (FileNotFoundException fnf) {
log.error(fnf);
} catch (IOException io) {
log.error(io);
}
}
}

Splitting a large Pdf file with PDFBox gets large result files

I am processing some large pdf files, (up to 100MB and about 2000 pages), with pdfbox. Some of the pages contain a QR code, I want to split those files into smaller ones with the pages from one QR code to the next.
I got this, but the result file sizes are the same as the source file. I mean, if I cut a 100MB pdf file into a ten files I am getting ten files 100MB each.
This is the code:
PDDocument documentoPdf =
PDDocument.loadNonSeq(new File("myFile.pdf"),
new RandomAccessFile(new File("./tmp/temp"), "rw"));
int numPages = documentoPdf.getNumberOfPages();
List pages = documentoPdf.getDocumentCatalog().getAllPages();
int previusQR = 0;
for(int i =0; i<numPages; i++){
PDPage page = (PDPage) pages.get(i);
BufferedImage firstPageImage =
page.convertToImage(BufferedImage.TYPE_USHORT_565_RGB , 200);
String qrText = readQRWithQRCodeMultiReader(firstPageImage, hintMap);
if(qrText != null and i!=0){
PDDocument outputDocument = new PDDocument();
for(int j = previusQR; j<i; j++){
outputDocument.importPage((PDPage)pages.get(j));
}
File f = new File("./splitting_files/"+previusQR+".pdf");
outputDocument.save(f);
outputDocument.close();
documentoPdf.close();
}
I also tried the following code for storing the new file:
PDDocument outputDocument = new PDDocument();
for(int j = previusQR; j<i; j++){
PDStream src = ((PDPage)pages.get(j)).getContents();
PDStream streamD = new PDStream(outputDocument);
streamD.addCompression();
PDPage newPage = new PDPage(new
COSDictionary(((PDPage)pages.get(j)).getCOSDictionary()));
newPage.setContents(streamD);
byte[] buf = new byte[10240];
int amountRead = 0;
InputStream is = null;
OutputStream os = null;
is = src.createInputStream();
os = streamD.createOutputStream();
while((amountRead = is.read(buf,0,10240)) > -1) {
os.write(buf, 0, amountRead);
}
outputDocument.addPage(newPage);
}
File f = new File("./splitting_files/"+previusQR+".pdf");
outputDocument.save(f);
outputDocument.close();
But this code creates files which lacks some content and also have the same size than the original.
How can I create smaller pdfs files from a larger one?
Is it posible with PDFBox? Is there any other library with which I can transform a single page into an image (for qr recognition), and also allows me to split a big pdf file into smaller ones?
Thx!
Thx! Tilman you are right, the PDFSplit command generates smaller files. I checked the PDFSplit code out and found that it removes the page links to avoid not needed resources.
Code extracted from Splitter.class :
private void processAnnotations(PDPage imported) throws IOException
{
List<PDAnnotation> annotations = imported.getAnnotations();
for (PDAnnotation annotation : annotations)
{
if (annotation instanceof PDAnnotationLink)
{
PDAnnotationLink link = (PDAnnotationLink)annotation;
PDDestination destination = link.getDestination();
if (destination == null && link.getAction() != null)
{
PDAction action = link.getAction();
if (action instanceof PDActionGoTo)
{
destination = ((PDActionGoTo)action).getDestination();
}
}
if (destination instanceof PDPageDestination)
{
// TODO preserve links to pages within the splitted result
((PDPageDestination) destination).setPage(null);
}
}
else
{
// TODO preserve links to pages within the splitted result
annotation.setPage(null);
}
}
}
So eventually my code looks like this:
PDDocument documentoPdf =
PDDocument.loadNonSeq(new File("docs_compuestos/50.pdf"), new RandomAccessFile(new File("./tmp/t"), "rw"));
int numPages = documentoPdf.getNumberOfPages();
List pages = documentoPdf.getDocumentCatalog().getAllPages();
int previusQR = 0;
for(int i =0; i<numPages; i++){
PDPage firstPage = (PDPage) pages.get(i);
String qrText ="";
BufferedImage firstPageImage = firstPage.convertToImage(BufferedImage.TYPE_USHORT_565_RGB , 200);
firstPage =null;
try {
qrText = readQRWithQRCodeMultiReader(firstPageImage, hintMap);
} catch (NotFoundException e) {
e.printStackTrace();
} finally {
firstPageImage = null;
}
if(i != 0 && qrText!=null){
PDDocument outputDocument = new PDDocument();
outputDocument.setDocumentInformation(documentoPdf.getDocumentInformation());
outputDocument.getDocumentCatalog().setViewerPreferences(
documentoPdf.getDocumentCatalog().getViewerPreferences());
for(int j = previusQR; j<i; j++){
PDPage importedPage = outputDocument.importPage((PDPage)pages.get(j));
importedPage.setCropBox( ((PDPage)pages.get(j)).findCropBox() );
importedPage.setMediaBox( ((PDPage)pages.get(j)).findMediaBox() );
// only the resources of the page will be copied
importedPage.setResources( ((PDPage)pages.get(j)).getResources() );
importedPage.setRotation( ((PDPage)pages.get(j)).findRotation() );
processAnnotations(importedPage);
}
File f = new File("./splitting_files/"+previusQR+".pdf");
previusQR = i;
outputDocument.save(f);
outputDocument.close();
}
}
}
Thank you very much!!

How to put color to specific cells in ods file using java

Here I am able to merge/span the cell using 'setColumnSpannedNumber()' but could not set background color of the cell and alignment.I am using odfdom-java-0.8.6.jar currently..please suggest me a way to set the color for the cells. Thank you.
try
{
document = OdfSpreadsheetDocument.newSpreadsheetDocument();
OdfOfficeSpreadsheet contentRoot = document.getContentRoot();
Node node = contentRoot.getFirstChild();
while (node != null) {
contentRoot.removeChild(node);
node = contentRoot.getFirstChild();
}
} catch (Exception e) {
signature throws Exception
throw new ReportFileGenerationException("Cannot create new ODF spread sheet document. Error: "+ e.getMessage(), e);
}
OdfTable table = OdfTable.newTable(document);
for (int i = 0; i < report.size(); i++) {
List<String> row = report.get(i);
for (int j = 0; j < row.size(); j++) {
String str= row.get(j);
String newStr = str.replaceAll("[\u0000-\u001f]", "");
OdfTableCell cell = table.getCellByPosition(j, i);
if(i==0 && j==17)
{
cell.setColumnSpannedNumber(4);
cell.setCellBackgroundColor(new Color("#ffff00"));
cell.setHorizontalAlignment("center");
}
else if(i==0 && j==21)
{
cell.setColumnSpannedNumber(4);
}
else if(i==0 && j==25)
{
cell.setColumnSpannedNumber(4);
}
else if(i==0 && j==29)
{
cell.setColumnSpannedNumber(4);
}
else if(i==0 && j==33)
{
cell.setColumnSpannedNumber(4);
}
cell.setStringValue(newStr);
}
}
ByteArrayOutputStream os = new ByteArrayOutputStream();
try {
document.save(os);
return os.toByteArray();
} catch (Exception e) {
throw new ReportFileGenerationException("Cannot save the ODF spread sheet document as byte array. Error: "
+ e.getMessage(), e);
} finally {
Helper.close(os);
}
}
}
I use the API property CellBackColor, not CellBackgroundColor.
Also HoriJustify rather than HorizontalAlignment.
At least in StarBasic, this is how I set background colors:
Dim Yellow As Long : Yellow = 16777113
Dim Blue As Long : Blue = 13434879
Dim White As Long : White = -1
Dim Red As Long : Red = 15425853
cell.setCellBackColor(Yellow)
If I want a new color, I manually recolor the background and then use a macro to read out the Long value associated with that color.
And to center align:
cell.setHoriJustify(2)

Fonts do not display correctly - Java

I have some words in English that have been translated into Tamil. The task requires me to display them. An example line is given below. The first and second lines are in Tamil while the last is in Bengali.
> unpopular ஜனங்கலால் வெறுக்கப்பட்ட ¤µ£»ªÀ»õu
inactive ஜடமான ö\¯»ØÓ
doctor வைத்தியர் ©¸zxÁº
apart வேறாக uµ
If you notice above, the text in some lines does not render correctly because it is written in custom fonts. The custom font can be downloaded from here. My problem:
1. All Tamil fonts (custom and pre-loaded) have been installed. None of the text displays correctly. Why?
2. Is there a problem with the way that I am loading custom fonts?
3. In the above lines, the second column is pre-loaded font while the third column is written in custom fonts. The third column does not seem like it is Unicode, which is why application of any font also fails. What is going on?
import java.awt.Color;
import java.awt.Font;
import java.awt.FontFormatException;
import java.awt.Graphics;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.imageio.ImageIO;
/* This is the imageMaker class.
* It loads a plain white image, writes text taken from another file
* and creates a new image from it.
*
* Steps:
* 1. A plain white image is loaded.
* 2. Text is taken from a file.
* 3. Text is written to the image.
* 4. New image is created and saved.
*/
public class imgMaker_so {
/**
* #param args
*/
private static String tgtDir = "YOUR_tgt directory to store images goes here";
private static String csvFile = "csv file goes here";
private static int fontSize = 22; //default to a 22 pt font.
private static Font f;
private static String fontName = "WTAM001";
public static void main(String[] args) {
// TODO Auto-generated method stub
//Step 0. Read the image.
//readPlainImage(plainImg);
//Step 0.a: Check if the directory exists. If not, create it.
File tgtDir_file = new File(tgtDir);
if(!tgtDir_file.exists()) { //this directory does not exist.
tgtDir_file.mkdir();
}
Font nf = null;
try {
nf = Font.createFont(Font.TRUETYPE_FONT, new File("C:\\Windows\\Fonts\\" + fontName + ".ttf"));
} catch (FontFormatException | IOException e3) {
// TODO Auto-generated catch block
e3.printStackTrace();
}
if(nf != null) {
f = nf.deriveFont(Font.BOLD, fontSize);
}
if(f == null) {
System.out.println("Font is still null.");
}
//Step 1. Read csv file and get the string.
FileInputStream fis = null;
BufferedReader br = null;
try {
fis = new FileInputStream(new File(csvFile));
} catch (FileNotFoundException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
String temp = "\u0b85";
System.out.println(temp.length());
for(int i = 0; i < temp.length(); i++) {
System.out.print(temp.charAt(i));
}
//SAMPLE CODE ONLY. CHECK IF IT CAN PRINT A SINGLE CHARACTER IN FONT.
BufferedImage img = new BufferedImage(410, 200, BufferedImage.TYPE_INT_RGB);
Graphics g = img.getGraphics();
g.setColor(Color.WHITE);
g.fillRect(0, 0, 410, 200);
System.out.println("String being printed = " + temp.codePointAt(0));
g.setColor(Color.BLACK);
g.setFont(f);
if(f.canDisplay('\u0b85')) {
System.out.println("Can display code = \u0b85");
} else {
System.out.println("Cannot display code = \u0b85");
}
g.drawString(temp, 10, 35);
//g.drawString(translation, 10, fontWidth); //a 22pt font is approx. 35 pixels long.
g.dispose();
try {
ImageIO.write(img, "jpeg", new File(tgtDir + "\\" + "a.jpg"));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("File written successfully to a");
//System.out.println("Cat,,बिल्ली,,,");
if(fis != null) {
try {
br = new BufferedReader(new InputStreamReader(fis, "UTF-8"));
} catch (UnsupportedEncodingException e2) {
// TODO Auto-generated catch block
e2.printStackTrace();
System.out.print("Unsupported encoding");
}
String line = null;
if(br != null) {
try {
while((line = br.readLine()) != null) {
if(line != null) {
System.out.println("Line = " + line);
List<String> word_translation = new ArrayList<String>();
parseLine(line, word_translation); //function to parse the line.
//printImages(word_translation);
if(word_translation.size() > 0) {
printImages_temp(word_translation);
}
//now that images have been read, read the plain image afresh.
//readPlainImage(plainImg);
word_translation.clear();
}
}
} catch (IOException e1) {
// TODO Auto-generated catch block
e1.printStackTrace();
}
}
}
}
public static void printImages_temp(List<String> list) {
/* Function to print translations contained in list to images.
* Steps:
* 1. Take plain white image.
* 2. Write English word on top.
* 3. Take each translation and print one to each line.
*/
String dest = tgtDir + "\\" + list.get(0) + ".jpg"; //destination file image.
//compute height and width of image.
int img_height = list.size() * 35 + 20;
int img_width = 0;
int max_length = 0;
for(int i = 0; i < list.size(); i++) {
if(list.get(i).length() > max_length) {
max_length = list.get(i).length();
}
}
img_width = max_length * 20;
System.out.println("New dimensions of image = " + img_width + " " + img_height);
BufferedImage img = new BufferedImage(img_width, img_height, BufferedImage.TYPE_INT_RGB);
Graphics g = img.getGraphics();
g.setColor(Color.WHITE);
g.fillRect(0, 0, img_width, img_height);
//image has to be written to another file. Do not write English word, which is why list starts iteration from 1.
for(int i = 1; i < list.size(); i++) {
System.out.println("String being printed = " + list.get(i).codePointAt(0));
g.setColor(Color.BLACK);
g.setFont(f);
g.drawString(list.get(i), 10, (i + 1) * 35);
}
//g.drawString(translation, 10, fontWidth); //a 22pt font is approx. 35 pixels long.
g.dispose();
try {
ImageIO.write(img, "jpeg", new File(dest));
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
System.out.println("File written successfully to " + dest);
}
public static void purge(String line) {
//This removes any inverted commas and tabs from the line apart from trimming it.
System.out.println("Line for purging = " + line);
int fromIndex = line.indexOf("\"");
//System.out.println("from index = " + fromIndex);
if(fromIndex != -1) {
line = line.substring((fromIndex + 1));
int toIndex = line.lastIndexOf("\"", line.length() - 1);
if(toIndex != -1) {
line = line.substring(0, (toIndex));
}
}
line.replaceAll("\t", " ");
line.trim();
System.out.println("Line after purging = " + line);
}
public static void parseLine(String line, List<String> result) {
/*
* This function parses the string and gets the different hindi meanings.
*/
//int index = line.indexOf(",");
//int prev_index = 0;
String[] arr = line.split(",");
List<String> l = new ArrayList<String>(Arrays.asList(arr));
for(int i = 0; i < l.size(); i++) {
if(l.get(i).isEmpty()) { //if the string at position i is empty.
l.remove(i);
}
}
for(int i = 0; i < l.size(); i++) { //inefficient copy but should be short.
String ith = l.get(i).trim();
if(!(ith.isEmpty())) { //add a string to result only if it is non-empty.
//in some entries, there are commas. they have been replaced with !?. find them and replace them.
if(ith.contains("!?")) {
//System.out.println(r + " contains !?");
String r = ith.replace("!?", ",");
result.add(r);
} else if(ith.contains("\n")) {
String r = ith.replace("\n", " ");
System.out.println("found new line in " + ith);
result.add(r);
} else {
result.add(ith);
}
}
}
for(int i = 0; i < result.size(); i++) {
System.out.println("Result[" + i + "] = " + result.get(i));
}
//System.out.println("Line being printed = " + line);
}
}
The above text was written by professional translators. So, is there something here that I am missing?
to test the concrete Font with methods in API Font.canDisplay
required to test in Unicode form (for mixing chars from a few languages (Tamil & Bengali))
please can you post and SSCCE, with used Font and can be based on

Categories