Detecting text field overflow - java

Assuming I have a PDF document with a text field with some font and size defined, is there a way to determine if some text will fit inside the field rectangle using PDFBox?
I'm trying to avoid cases where text is not fully displayed inside the field, so in case the text overflows given the font and size, I would like to change the font size to Auto (0).

This code recreates the appearance stream to be sure that it exists so that there is a bbox (which can be a little bit smaller than the rectangle).
public static void main(String[] args) throws IOException
{
// file can be found at https://issues.apache.org/jira/browse/PDFBOX-142
// https://issues.apache.org/jira/secure/attachment/12742551/Testformular1.pdf
try (PDDocument doc = PDDocument.load(new File("Testformular1.pdf")))
{
PDAcroForm acroForm = doc.getDocumentCatalog().getAcroForm();
PDTextField field = (PDTextField) acroForm.getField("Name");
PDAnnotationWidget widget = field.getWidgets().get(0);
// force generation of appearance stream
field.setValue(field.getValue());
PDRectangle rectangle = widget.getRectangle();
PDAppearanceEntry ap = widget.getAppearance().getNormalAppearance();
PDAppearanceStream appearanceStream = ap.getAppearanceStream();
PDRectangle bbox = appearanceStream.getBBox();
float fieldWidth = Math.min(bbox.getWidth(), rectangle.getWidth());
String defaultAppearance = field.getDefaultAppearance();
System.out.println(defaultAppearance);
// Pattern must be improved, font may have numbers
// /Helv 12 Tf 0 g
final Pattern p = Pattern.compile("\\/([A-z]+) (\\d+).+");
Matcher m = p.matcher(defaultAppearance);
if (!m.find() || m.groupCount() != 2)
{
System.out.println("oh-oh");
System.exit(-1);
}
String fontName = m.group(1);
int fontSize = Integer.parseInt(m.group(2));
PDResources resources = appearanceStream.getResources();
if (resources == null)
{
resources = acroForm.getDefaultResources();
}
PDFont font = resources.getFont(COSName.getPDFName(fontName));
float stringWidth = font.getStringWidth("Tilman Hausherr Tilman Hausherr");
System.out.println("stringWidth: " + stringWidth * fontSize / 1000);
System.out.println("field width: " + fieldWidth);
}
}
The output is:
/Helv 12 Tf 0 g
stringWidth: 180.7207
field width: 169.29082

Related

Show formatted number and save double value

I have seen PDF forms where you could write numbers in your local format and the PDF stored double values in the background which could be read with PDFBox.
How can I tell the field in my example e.g. take the number 125.5 (double) and show "125,5" (my locale)?
And when the user edits the field, the value in the background still is a valid double. Is there some built in mechanism or how can a workaround look like? Thanks in advance.
public final class CreateSimpleForm
{
private static final PDFont FONT = PDType1Font.HELVETICA;
private static final float FONT_SIZE = 12;
private CreateSimpleForm()
{
}
public static void main(String[] args) throws IOException
{
PDDocument document = new PDDocument();
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
PDResources resources = new PDResources();
resources.put(COSName.getPDFName("Helv"), FONT);
PDAcroForm acroForm = new PDAcroForm(document);
document.getDocumentCatalog().setAcroForm(acroForm);
acroForm.setDefaultResources(resources);
String defaultAppearanceString = "/Helv 0 Tf 0 g";
acroForm.setDefaultAppearance(defaultAppearanceString);
PDTextField textBox = new PDTextField(acroForm);
textBox.setPartialName("SampleField");
defaultAppearanceString = "/Helv " + FONT_SIZE + " Tf 0 0 0 rg";
textBox.setDefaultAppearance(defaultAppearanceString);
acroForm.getFields().add(textBox);
PDAnnotationWidget widget = textBox.getWidgets().get(0);
PDRectangle rect = new PDRectangle(50, 750, 200, 50);
widget.setRectangle(rect);
widget.setPage(page);
widget.setPrinted(true);
page.getAnnotations().add(widget);
textBox.setValue("Sample field");
document.save("test.pdf");
document.close();
}
}
I don't know if this is the best way but for now I'll leave it like that until someone comes up with a better solution.
I create my visible field with the UI representation and a hidden field with the "background" value which is formatted by javascript everytime I edit the visible field.
When I try to read the data I have to omit the visible fields and concentrate on the hidden ones.
This is the simplest solution for me (which of course needs to be cleaned up a bit)
public final class CreateSimpleForm {
private static final PDFont FONT = PDType1Font.HELVETICA;
private PDAcroForm acroForm;
private String defaultAppearanceString;
private PDPage page;
public static void main(String[] args) throws IOException {
new CreateSimpleForm();
}
private CreateSimpleForm() throws IOException {
PDDocument document = new PDDocument();
page = new PDPage(PDRectangle.A4);
document.addPage(page);
PDResources resources = new PDResources();
resources.put(COSName.getPDFName("Helv"), FONT);
acroForm = new PDAcroForm(document);
document.getDocumentCatalog().setAcroForm(acroForm);
acroForm.setDefaultResources(resources);
defaultAppearanceString = "/Helv 0 Tf 0 g";
acroForm.setDefaultAppearance(defaultAppearanceString);
createFormattedField("myField", 125.5);
document.save("test.pdf");
document.close();
}
private void createFormattedField(String name, Double value) throws IOException {
String nameHidden = name + "_hidden";
PDTextField textBox = createField(name, false);
textBox.setValue(String.format("%1$,.2f", value));
createField(name + "_hidden", true).setValue(value.toString());
PDActionJavaScript tfJs = new PDActionJavaScript("this.getField(\"" + nameHidden + "\").value = this.getField(\"" + name + "\").value.replace(/\\./g,'').replace(/\\,/g,'.');");
PDAnnotationAdditionalActions actions = new PDAnnotationAdditionalActions();
actions.setPC(tfJs);
actions.setBl(tfJs);
textBox.getWidgets().get(0).setActions(actions);
}
private PDTextField createField(String name, boolean hidden) throws IOException {
PDTextField textBox = new PDTextField(acroForm);
textBox.setPartialName(name);
textBox.setDefaultAppearance(defaultAppearanceString);
acroForm.getFields().add(textBox);
PDAnnotationWidget widget = textBox.getWidgets().get(0);
PDRectangle rect = new PDRectangle(50, 750, 200, 50);
widget.setRectangle(rect);
widget.setPage(page);
widget.setPrinted(true);
page.getAnnotations().add(widget);
widget.setHidden(hidden);
return textBox;
}
}

How can I get Images coordinates in pdf into JSONfile?

I have coded creating html page included images extracting a page in pdf document.
I had tried to extract images from pdf and then I succeeded to extract images from pdf and to apply the images to html page using PDFBox lib. but I did not extract image coordinates in html page.
So searched how to extract image coordinates in pdf, I tried to extract image coordinates in pdf using PDFBox Library.
Below code :
public static void main(String[] args) throws Exception
{
try
{
PDDocument document = PDDocument.load(
"/Users/tmdtjq/Downloads/PDFTest/test.pdf" );
PrintImageLocations printer = new PrintImageLocations();
List allPages = document.getDocumentCatalog().getAllPages();
for( int i=0; i<allPages.size(); i++ )
{
PDPage page = (PDPage)allPages.get( i );
int pageNum = i+1;
System.out.println( "Processing page: " + pageNum );
printer.processStream( page, page.findResources(),
page.getContents().getStream() );
}
}
finally
{
}
}
protected void processOperator( PDFOperator operator, List arguments ) throws IOException
{
String operation = operator.getOperation();
if( operation.equals( "Do" ) )
{
COSName objectName = (COSName)arguments.get( 0 );
Map xobjects = getResources().getXObjects();
PDXObject xobject = xobjects.get( objectName.getName() );
if( xobject instanceof PDXObjectImage )
{
try
{
PDXObjectImage image = (PDXObjectImage)xobject;
PDPage page = getCurrentPage();
Matrix ctm = getGraphicsState().getCurrentTransformationMatrix();
double rotationInRadians =(page.findRotation() * Math.PI)/180;
AffineTransform rotation = new AffineTransform();
rotation.setToRotation( rotationInRadians );
AffineTransform rotationInverse = rotation.createInverse();
Matrix rotationInverseMatrix = new Matrix();
rotationInverseMatrix.setFromAffineTransform( rotationInverse );
Matrix rotationMatrix = new Matrix();
rotationMatrix.setFromAffineTransform( rotation );
Matrix unrotatedCTM = ctm.multiply( rotationInverseMatrix );
float xScale = unrotatedCTM.getXScale();
float yScale = unrotatedCTM.getYScale();
float xPosition = unrotatedCTM.getXPosition();
float yPosition = unrotatedCTM.getYPosition();
System.out.println( "Found image[" + objectName.getName() + "] " +
"at " + xPosition + "," + yPosition +
" size=" + (xScale/100f*image.getWidth()) + "," + (yScale/100f*image.getHeight() ));
}
catch( NoninvertibleTransformException e )
{
throw new WrappedIOException( e );
}
}
}
}
Outputs printing X,Y Positions in images is All 0.0, 0.0.
I think because getGraphicsState() is method to return the graphicsState.
But I want to get specific images coordinates applied to height,width of a PDF page in order to create html page.
I think maybe it is solution to extract JSON from images coordinates in PDF.
Please introduce image coordinates in PDF to JSON tool or suggest PDF Library.
(Already I used pdf2json tool in FlexPaper. this tool extracts JSONfile including not images data but just texts data(content, coordinates, font..) from PDF page.)
I was able to find images with searching for cm operator.
I overrided PDFTextStripper the following way:
Note: it doesn't take into account rotation and mirroring!
public static class TextFinder extends PDFTextStripper {
public TextFinder() throws IOException {
super();
}
#Override
protected void startPage(PDPage page) throws IOException {
// process start of the page
super.startPage(page);
}
#Override
public void process(PDFOperator operator, List<COSBase> arguments)
throws IOException {
if ("cm".equals(operator.getOperation())) {
float width = ((COSNumber)arguments.get(0)).floatValue();
float height = ((COSNumber)arguments.get(3)).floatValue();
float x = ((COSNumber)arguments.get(4)).floatValue();
float y = ((COSNumber)arguments.get(5)).floatValue();
// process image coordinates
}
super.processOperator(operator, arguments);
}
#Override
protected void writeString(String text,
List<TextPosition> textPositions) throws IOException {
for (TextPosition position : textPositions) {
// process text coordinates
}
super.writeString(text, textPositions);
}
}
Of course, one can use PDFStreamEngine instead of PDFTextStripper, if one is not interested in finding text together with images.

iText : How does add image to top of PDF page

I going to convert tiff to pdf file, but image displayed bottom of page, how to start image from top of the pdf page.
private static String convertTiff2Pdf(String tiff) {
// target path PDF
String pdf = null;
try {
pdf = tiff.substring(0, tiff.lastIndexOf('.') + 1) + "pdf";
// New document A4 standard (LETTER)
Document document = new Document();
PdfWriter writer = PdfWriter.getInstance(document, new FileOutputStream(pdf));
document.setMarginMirroring(true);
int pages = 0;
document.open();
PdfContentByte cb = writer.getDirectContent();
RandomAccessFileOrArray ra = null;
int comps = 0;
ra = new RandomAccessFileOrArray(tiff);
comps = TiffImage.getNumberOfPages(ra);
// Convertion statement
for (int c = 0; c < comps; ++c) {
Image img = TiffImage.getTiffImage(ra, c+1);
if (img != null) {
img.scalePercent(7200f / img.getDpiX(), 7200f / img.getDpiY());
img.setAbsolutePosition(0, 0);
img.scaleAbsolute(600, 250);
cb.addImage(img);
document.newPage();
++pages;
}
}
ra.close();
document.close();
} catch (Exception e) {
System.out.println(e);
pdf = null;
}
System.out.println("[" + tiff + "] -> [" + pdf + "] OK");
return pdf;
}
You are creating a new document with A4 pages (as opposed to using the LETTER format). These pages have a width of 595 pt and a height of 842 pt. The origin of the coordinate system (0, 0) is in the lower-left corner, which is exactly where you're adding the image using the method setAbsolutePosition(0, 0);
Surprisingly, you don't adapt the size of the page to the size of the image. Instead you want to add the image at the top of the page. In this case, you need to change the coordinates of the absolute position like this:
img.setAbsolutePosition(0, PageSize.A4.getHeight() - img.getScaledHeight());
If img.getScaledHeight() exceeds PageSize.A4.getHeight() (which is equal to 842), your image will be clipped at the bottom. The image will be clipped on the right if img.getScaledWidth() exceeds PageSize.A4.getWidth() (which is equal to 595).
Based in the answer, this code center any size image.
image.setAbsolutePosition((PageSize.A4.getWidth() - img.getScaledWidth())/2, (PageSize.A4.getHeight() - img.getScaledHeight())/2 );

How to generate multiple lines in PDF using Apache pdfbox

I am using Pdfbox to generate PDF files using Java. The problem is that when i add long text contents in the document, it is not displayed properly. Only a part of it is displayed. That too in a single line.
I want text to be in multiple lines.
My code is given below:
PDPageContentStream pdfContent=new PDPageContentStream(pdfDocument, pdfPage, true, true);
pdfContent.beginText();
pdfContent.setFont(pdfFont, 11);
pdfContent.moveTextPositionByAmount(30,750);
pdfContent.drawString("I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox");
pdfContent.endText();
My output:
Adding to the answer of Mark you might want to know where to split your long string. You can use the PDFont method getStringWidth for that.
Putting everything together you get something like this (with minor differences depending on the PDFBox version):
PDFBox 1.8.x
PDDocument doc = null;
try
{
doc = new PDDocument();
PDPage page = new PDPage();
doc.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(doc, page);
PDFont pdfFont = PDType1Font.HELVETICA;
float fontSize = 25;
float leading = 1.5f * fontSize;
PDRectangle mediabox = page.getMediaBox();
float margin = 72;
float width = mediabox.getWidth() - 2*margin;
float startX = mediabox.getLowerLeftX() + margin;
float startY = mediabox.getUpperRightY() - margin;
String text = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox";
List<String> lines = new ArrayList<String>();
int lastSpace = -1;
while (text.length() > 0)
{
int spaceIndex = text.indexOf(' ', lastSpace + 1);
if (spaceIndex < 0)
spaceIndex = text.length();
String subString = text.substring(0, spaceIndex);
float size = fontSize * pdfFont.getStringWidth(subString) / 1000;
System.out.printf("'%s' - %f of %f\n", subString, size, width);
if (size > width)
{
if (lastSpace < 0)
lastSpace = spaceIndex;
subString = text.substring(0, lastSpace);
lines.add(subString);
text = text.substring(lastSpace).trim();
System.out.printf("'%s' is line\n", subString);
lastSpace = -1;
}
else if (spaceIndex == text.length())
{
lines.add(text);
System.out.printf("'%s' is line\n", text);
text = "";
}
else
{
lastSpace = spaceIndex;
}
}
contentStream.beginText();
contentStream.setFont(pdfFont, fontSize);
contentStream.moveTextPositionByAmount(startX, startY);
for (String line: lines)
{
contentStream.drawString(line);
contentStream.moveTextPositionByAmount(0, -leading);
}
contentStream.endText();
contentStream.close();
doc.save("break-long-string.pdf");
}
finally
{
if (doc != null)
{
doc.close();
}
}
(BreakLongString.java test testBreakString for PDFBox 1.8.x)
PDFBox 2.0.x
PDDocument doc = null;
try
{
doc = new PDDocument();
PDPage page = new PDPage();
doc.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(doc, page);
PDFont pdfFont = PDType1Font.HELVETICA;
float fontSize = 25;
float leading = 1.5f * fontSize;
PDRectangle mediabox = page.getMediaBox();
float margin = 72;
float width = mediabox.getWidth() - 2*margin;
float startX = mediabox.getLowerLeftX() + margin;
float startY = mediabox.getUpperRightY() - margin;
String text = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox";
List<String> lines = new ArrayList<String>();
int lastSpace = -1;
while (text.length() > 0)
{
int spaceIndex = text.indexOf(' ', lastSpace + 1);
if (spaceIndex < 0)
spaceIndex = text.length();
String subString = text.substring(0, spaceIndex);
float size = fontSize * pdfFont.getStringWidth(subString) / 1000;
System.out.printf("'%s' - %f of %f\n", subString, size, width);
if (size > width)
{
if (lastSpace < 0)
lastSpace = spaceIndex;
subString = text.substring(0, lastSpace);
lines.add(subString);
text = text.substring(lastSpace).trim();
System.out.printf("'%s' is line\n", subString);
lastSpace = -1;
}
else if (spaceIndex == text.length())
{
lines.add(text);
System.out.printf("'%s' is line\n", text);
text = "";
}
else
{
lastSpace = spaceIndex;
}
}
contentStream.beginText();
contentStream.setFont(pdfFont, fontSize);
contentStream.newLineAtOffset(startX, startY);
for (String line: lines)
{
contentStream.showText(line);
contentStream.newLineAtOffset(0, -leading);
}
contentStream.endText();
contentStream.close();
doc.save(new File(RESULT_FOLDER, "break-long-string.pdf"));
}
finally
{
if (doc != null)
{
doc.close();
}
}
(BreakLongString.java test testBreakString for PDFBox 2.0.x)
The result
This looks as expected.
Of course there are numerous improvements to make but this should show how to do it.
Adding unconditional line breaks
In a comment aleskv asked:
could you add line breaks when there are \n in the string?
One can easily extend the solution to unconditionally break at newline characters by first splitting the string at '\n' characters and then iterating over the split result.
E.g. if instead of the long string from above
String text = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox";
you want to process this even longer string with embedded new line characters
String textNL = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox.\nFurthermore, I have added some newline characters to the string at which lines also shall be broken.\nIt should work alright like this...";
you can simply replace
String text = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox";
List<String> lines = new ArrayList<String>();
int lastSpace = -1;
while (text.length() > 0)
{
[...]
}
in the solutions above by
String textNL = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox.\nFurthermore, I have added some newline characters to the string at which lines also shall be broken.\nIt should work alright like this...";
List<String> lines = new ArrayList<String>();
for (String text : textNL.split("\n"))
{
int lastSpace = -1;
while (text.length() > 0)
{
[...]
}
}
(from BreakLongString.java test testBreakStringNL)
The result:
I know it's a bit late, but i had a little problem with mkl's solution. If the last line would only contain one word, your algorithm writes it on the previous one.
For Example: "Lorem ipsum dolor sit amet" is your text and it should add a line break after "sit".
Lorem ipsum dolor sit
amet
But it does this:
Lorem ipsum dolor sit amet
I came up with my own solution i want to share with you.
/**
* #param text The text to write on the page.
* #param x The position on the x-axis.
* #param y The position on the y-axis.
* #param allowedWidth The maximum allowed width of the whole text (e.g. the width of the page - a defined margin).
* #param page The page for the text.
* #param contentStream The content stream to set the text properties and write the text.
* #param font The font used to write the text.
* #param fontSize The font size used to write the text.
* #param lineHeight The line height of the font (typically 1.2 * fontSize or 1.5 * fontSize).
* #throws IOException
*/
private void drawMultiLineText(String text, int x, int y, int allowedWidth, PDPage page, PDPageContentStream contentStream, PDFont font, int fontSize, int lineHeight) throws IOException {
List<String> lines = new ArrayList<String>();
String myLine = "";
// get all words from the text
// keep in mind that words are separated by spaces -> "Lorem ipsum!!!!:)" -> words are "Lorem" and "ipsum!!!!:)"
String[] words = text.split(" ");
for(String word : words) {
if(!myLine.isEmpty()) {
myLine += " ";
}
// test the width of the current line + the current word
int size = (int) (fontSize * font.getStringWidth(myLine + word) / 1000);
if(size > allowedWidth) {
// if the line would be too long with the current word, add the line without the current word
lines.add(myLine);
// and start a new line with the current word
myLine = word;
} else {
// if the current line + the current word would fit, add the current word to the line
myLine += word;
}
}
// add the rest to lines
lines.add(myLine);
for(String line : lines) {
contentStream.beginText();
contentStream.setFont(font, fontSize);
contentStream.moveTextPositionByAmount(x, y);
contentStream.drawString(line);
contentStream.endText();
y -= lineHeight;
}
}
///// FOR PDBOX 2.0.X
// FOR ADDING DYNAMIC PAGE ACCORDING THE LENGTH OF THE CONTENT
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDFont;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
public class Document_Creation {
public static void main (String args[]) throws IOException {
PDDocument doc = null;
try
{
doc = new PDDocument();
PDPage page = new PDPage();
doc.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(doc, page);
PDFont pdfFont = PDType1Font.HELVETICA;
float fontSize = 25;
float leading = 1.5f * fontSize;
PDRectangle mediabox = page.getMediaBox();
float margin = 72;
float width = mediabox.getWidth() - 2*margin;
float startX = mediabox.getLowerLeftX() + margin;
float startY = mediabox.getUpperRightY() - margin;
String text = "I am trying to create a PDF file with a lot of text contents in the document. I am using PDFBox.An essay is, generally, a piece of writing that gives the author's own argument — but the definition is vague, overlapping with those of an article, a pamphlet, and a short story. Essays have traditionally been sub-classified as formal and informal. Formal essays are characterized by serious purpose, dignity, logical organization, length,whereas the informal essay is characterized by the personal element (self-revelation, individual tastes and experiences, confidential manner), humor, graceful style, rambling structure, unconventionality or novelty of theme.Lastly, one of the most attractive features of cats as housepets is their ease of care. Cats do not have to be walked. They get plenty of exercise in the house as they play, and they do their business in the litter box. Cleaning a litter box is a quick, painless procedure. Cats also take care of their own grooming. Bathing a cat is almost never necessary because under ordinary circumstances cats clean themselves. Cats are more particular about personal cleanliness than people are. In addition, cats can be left home alone for a few hours without fear. Unlike some pets, most cats will not destroy the furnishings when left alone. They are content to go about their usual activities until their owners return.";
List<String> lines = new ArrayList<String>();
int lastSpace = -1;
while (text.length() > 0)
{
int spaceIndex = text.indexOf(' ', lastSpace + 1);
if (spaceIndex < 0)
spaceIndex = text.length();
String subString = text.substring(0, spaceIndex);
float size = fontSize * pdfFont.getStringWidth(subString) / 1000;
System.out.printf("'%s' - %f of %f\n", subString, size, width);
if (size > width)
{
if (lastSpace < 0)
lastSpace = spaceIndex;
subString = text.substring(0, lastSpace);
lines.add(subString);
text = text.substring(lastSpace).trim();
System.out.printf("'%s' is line\n", subString);
lastSpace = -1;
}
else if (spaceIndex == text.length())
{
lines.add(text);
System.out.printf("'%s' is line\n", text);
text = "";
}
else
{
lastSpace = spaceIndex;
}
}
contentStream.beginText();
contentStream.setFont(pdfFont, fontSize);
contentStream.newLineAtOffset(startX, startY);
float currentY=startY;
for (String line: lines)
{
currentY -=leading;
if(currentY<=margin)
{
contentStream.endText();
contentStream.close();
PDPage new_Page = new PDPage();
doc.addPage(new_Page);
contentStream = new PDPageContentStream(doc, new_Page);
contentStream.beginText();
contentStream.setFont(pdfFont, fontSize);
contentStream.newLineAtOffset(startX, startY);
currentY=startY;
}
contentStream.showText(line);
contentStream.newLineAtOffset(0, -leading);
}
contentStream.endText();
contentStream.close();
doc.save("C:/Users/VINAYAK/Desktop/docccc/break-long-string.pdf");
}
finally
{
if (doc != null)
{
doc.close();
}
}
}
}
Just draw the string in a position below, typically done within a loop:
float textx = margin+cellMargin;
float texty = y-15;
for(int i = 0; i < content.length; i++){
for(int j = 0 ; j < content[i].length; j++){
String text = content[i][j];
contentStream.beginText();
contentStream.moveTextPositionByAmount(textx,texty);
contentStream.drawString(text);
contentStream.endText();
textx += colWidth;
}
texty-=rowHeight;
textx = margin+cellMargin;
}
These are the important lines:
contentStream.beginText();
contentStream.moveTextPositionByAmount(textx,texty);
contentStream.drawString(text);
contentStream.endText();
Just keep drawing new strings in new positions. For an example using a table, see here:
http://fahdshariff.blogspot.ca/2010/10/creating-tables-with-pdfbox.html
contentStream.moveTextPositionByAmount(textx,texty) is key point.
say for example if you are using a A4 size means 580,800 is width and height correspondling(approximately). so you have move your text based on the position of your document size.
PDFBox supports varies page format . so the height and width will vary for different page format
Pdfbox-layout abstracts out all the tedious details of managing the layout. As a complete Kotlin example, here is how to convert a text file to a pdf without worrying about line wrapping and pagination.
import org.apache.pdfbox.pdmodel.font.PDType1Font
import rst.pdfbox.layout.elements.Document
import rst.pdfbox.layout.elements.Paragraph
import java.io.File
fun main() {
val textFile = "input.txt"
val pdfFile = "output.pdf"
val font = PDType1Font.COURIER
val fontSize = 12f
val document = Document(40f, 50f, 40f, 60f)
val paragraph = Paragraph()
File(textFile).forEachLine {
paragraph.addText("$it\n", fontSize, font)
}
document.add(paragraph)
document.save(File(pdfFile))
}

highlight text using pdfbox when it's location in the pdf is known

Does pdfbox provide some utility to highlight the text when I have it's co-ordinates?
Bounds of the text is known.
I know there are other libraries that provide the same functionality like pdfclown etc. But does pdfbox provide something like that?
well i found this out. it is simple.
PDDocument doc = PDDocument.load(/*path to the file*/);
PDPage page = (PDPage)doc.getDocumentCatalog.getAllPages.get(i);
List annots = page.getAnnotations;
PDAnnotationTextMarkup markup = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.Su....);
markup.setRectangle(/*your PDRectangle*/);
markup.setQuads(/*float array of size eight with all the vertices of the PDRectangle in anticlockwise order*/);
annots.add(markup);
doc.save(/*path to the output file*/);
This is an extended answer from the number 1 here, and basically is the same code as above.
Improves the coordinates points in respect to the page size in the current document, as well the yellow color that is very lighter and sometimes if the word is short and smaller is difficult to see.
Also highlight the full word taking the X, Y coordinates from the top-left to the top-right. Takes the coordinates from the first character and from the last one in the string.
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.List;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.graphics.color.PDColor;
import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotationTextMarkup;
import org.apache.pdfbox.text.PDFTextStripper;
import org.apache.pdfbox.text.TextPosition;
public class MainSource extends PDFTextStripper {
public MainSource() throws IOException {
super();
}
public static void main(String[] args) throws IOException {
PDDocument document = null;
String fileName = "C:/AnyPDFFile.pdf";
try {
document = PDDocument.load( new File(fileName) );
PDFTextStripper stripper = new MainSource();
stripper.setSortByPosition( true );
stripper.setStartPage( 0 );
stripper.setEndPage( document.getNumberOfPages() );
Writer dummy = new OutputStreamWriter(new ByteArrayOutputStream());
stripper.writeText(document, dummy);
File file1 = new File("C:/AnyPDFFile-New.pdf");
document.save(file1);
}
finally {
if( document != null ) {
document.close();
}
}
}
/**
* Override the default functionality of PDFTextStripper.writeString()
*/
#Override
protected void writeString(String string, List<TextPosition> textPositions) throws IOException {
boolean isFound = false;
float posXInit = 0,
posXEnd = 0,
posYInit = 0,
posYEnd = 0,
width = 0,
height = 0,
fontHeight = 0;
String[] criteria = {"Word1", "Word2", "Word3", ....};
for (int i = 0; i < criteria.length; i++) {
if (string.contains(criteria[i])) {
isFound = true;
}
}
if (isFound) {
posXInit = textPositions.get(0).getXDirAdj();
posXEnd = textPositions.get(textPositions.size() - 1).getXDirAdj() + textPositions.get(textPositions.size() - 1).getWidth();
posYInit = textPositions.get(0).getPageHeight() - textPositions.get(0).getYDirAdj();
posYEnd = textPositions.get(0).getPageHeight() - textPositions.get(textPositions.size() - 1).getYDirAdj();
width = textPositions.get(0).getWidthDirAdj();
height = textPositions.get(0).getHeightDir();
System.out.println(string + "X-Init = " + posXInit + "; Y-Init = " + posYInit + "; X-End = " + posXEnd + "; Y-End = " + posYEnd + "; Font-Height = " + fontHeight);
/* numeration is index-based. Starts from 0 */
float quadPoints[] = {posXInit, posYEnd + height + 2, posXEnd, posYEnd + height + 2, posXInit, posYInit - 2, posXEnd, posYEnd - 2};
List<PDAnnotation> annotations = document.getPage(this.getCurrentPageNo() - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
PDRectangle position = new PDRectangle();
position.setLowerLeftX(posXInit);
position.setLowerLeftY(posYEnd);
position.setUpperRightX(posXEnd);
position.setUpperRightY(posYEnd + height);
highlight.setRectangle(position);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 1 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);
}
}
}
This works for pdfbox 2.0.7
PDDocument document = /* get doc */
/* numeration is index-based. Starts from 0 */
List<PDAnnotation> annotations = document.getPage(yourPageNumber - 1).getAnnotations();
PDAnnotationTextMarkup highlight = new PDAnnotationTextMarkup(PDAnnotationTextMarkup.SUB_TYPE_HIGHLIGHT);
highlight.setRectangle(PDRectangle.A4);
// quadPoints is array of x,y coordinates in Z-like order (top-left, top-right, bottom-left,bottom-right)
// of the area to be highlighted
highlight.setQuadPoints(quadPoints);
PDColor yellow = new PDColor(new float[]{1, 1, 204 / 255F}, PDDeviceRGB.INSTANCE);
highlight.setColor(yellow);
annotations.add(highlight);
Note: such annotation will be displayed if you save doc in file, but it will not appear in image created from page since there is no AppearanceStream created for this annotation. I solved it with code drafts from PDFBOX-3353
Simplest way ... draw a rectangle in the desired location and set the height to 1 and the fill color to BLACK.
or ...
Using PDFBox ...
//create the page PDDocument doc = new PDDocument();
PDPage page1 = new PDPage();
doc.addPage(page1);
//create the stream
PDPageContentStream stream1 = new PDPageContentStream(doc, page1);
//to simply draw an underscore with the coordinates
//where the first is x start, second y start, third x end, fourth y end
stream1.drawLine(20, 740, 590, 740);
//to draw an underscore thicker than one pixel
//first x begin second y begin third length fourth thickness
stream1.addRect(345, 568, 70, 2);
stream1.setNonStrokingColor(Color.BLACK); stream1.fill();
Another solution could be drawing a yellow-ish rectangle with a lower alpha, like in the follow sample code:
PDDocument document = new PDDocument();
PDPage page = new PDPage();
document.addPage(page);
PDPageContentStream contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, true);
PDFont font = PDType1Font.COURIER;
final int fontSize = 16;
//Writing text
contentStream.beginText();
contentStream.setFont(font, fontSize );
contentStream.newLineAtOffset(25, 250);
contentStream.showText("Hello world");
contentStream.endText();
//Changing alpha mode
PDExtendedGraphicsState gs = new PDExtendedGraphicsState();
gs.setNonStrokingAlphaConstant(0.2f);
gs.setStrokingAlphaConstant(0.2f);
gs.setBlendMode(BlendMode.MULTIPLY);
contentStream.setGraphicsStateParameters(gs);
//Setting color
contentStream.setNonStrokingColor(new Color(255, 255, 0, 100));
//Highlighting (that is, drawing a rectangle)
contentStream.addRect(25, 250, font.getStringWidth("Hello world")*fontSize/1000, font.getBoundingBox().getHeight()*fontSize/1000);
contentStream.fill();
contentStream.close();
//Resetting alpha means creating a new content stream...
//writing a new rectangle just to test alpha changing
contentStream = new PDPageContentStream(document, page, AppendMode.APPEND, true, true);
gs = new PDExtendedGraphicsState();
gs.setNonStrokingAlphaConstant(1f);
gs.setStrokingAlphaConstant(1f);
gs.setBlendMode(BlendMode.MULTIPLY);
contentStream.setGraphicsStateParameters(gs);
contentStream.setNonStrokingColor(new Color(255, 255, 0, 100));
contentStream.addRect(50, 50, 50, 50);
contentStream.fill();
contentStream.close();
document.save(Constants.PATH);
document.close();
Producing this as result

Categories