Reading from a word document using java

Reading from a word document using java - java

I'm trying to read data from a docx file using java. The data is is tables. Is there a way to iterate through the table cells and extract the cell data?

You have two choices:
Write a library that can open, read, and manipulate Word documents, for some definitions of "open", "read", and "manipulate", and some definition of "Word document".
Find a third-party library that can do some or all of what is defined in (1).
Search "java docx library" in your favourite web search and see what comes up.

This should help you.
http://svn.apache.org/repos/asf/poi/trunk/src/examples/src/org/apache/poi/xwpf/usermodel/SimpleTable.java
The above was achieved using Apache POI
Here is the code from the link:
package org.apache.poi.xwpf.usermodel;
import java.io.FileOutputStream;
import java.math.BigInteger;
import java.util.List;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHeight;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTShd;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTString;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTblPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTcPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTrPr;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTVerticalJc;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STShd;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.STVerticalJc;
/**
* This program creates a simple WordprocessingML table using POI XWPF API, and
* a more complex, styled table using both XWPF and ooxml-schema. It's possible
* that not all referenced wordprocessingml classes are defined in
* poi-ooxml-schemas-3.8-beta4. If this is the case, you'll need to use the full
* ooxml-schemas.jar library.
*
* #author gisella bronzetti (original)
* #author Gregg Morris (styled table)
*/
public class SimpleTable {
public static void main(String[] args) throws Exception {
try {
createSimpleTable();
}
catch(Exception e) {
System.out.println("Error trying to create simple table.");
throw(e);
}
try {
createStyledTable();
}
catch(Exception e) {
System.out.println("Error trying to create styled table.");
throw(e);
}
}
public static void createSimpleTable() throws Exception {
XWPFDocument doc = new XWPFDocument();
XWPFTable table = doc.createTable(3, 3);
table.getRow(1).getCell(1).setText("EXAMPLE OF TABLE");
// table cells have a list of paragraphs; there is an initial
// paragraph created when the cell is created. If you create a
// paragraph in the document to put in the cell, it will also
// appear in the document following the table, which is probably
// not the desired result.
XWPFParagraph p1 = table.getRow(0).getCell(0).getParagraphs().get(0);
XWPFRun r1 = p1.createRun();
r1.setBold(true);
r1.setText("The quick brown fox");
r1.setItalic(true);
r1.setFontFamily("Courier");
r1.setUnderline(UnderlinePatterns.DOT_DOT_DASH);
r1.setTextPosition(100);
table.getRow(2).getCell(2).setText("only text");
FileOutputStream out = new FileOutputStream("simpleTable.docx");
doc.write(out);
out.close();
}
/**
* Create a table with some row and column styling. I "manually" add the
* style name to the table, but don't check to see if the style actually
* exists in the document. Since I'm creating it from scratch, it obviously
* won't exist. When opened in MS Word, the table style becomes "Normal".
* I manually set alternating row colors. This could be done using Themes,
* but that's left as an exercise for the reader. The cells in the last
* column of the table have 10pt. "Courier" font.
* I make no claims that this is the "right" way to do it, but it worked
* for me. Given the scarcity of XWPF examples, I thought this may prove
* instructive and give you ideas for your own solutions.
* #throws Exception
*/
public static void createStyledTable() throws Exception {
// Create a new document from scratch
XWPFDocument doc = new XWPFDocument();
// -- OR --
// open an existing empty document with styles already defined
//XWPFDocument doc = new XWPFDocument(new FileInputStream("base_document.docx"));
// Create a new table with 6 rows and 3 columns
int nRows = 6;
int nCols = 3;
XWPFTable table = doc.createTable(nRows, nCols);
// Set the table style. If the style is not defined, the table style
// will become "Normal".
CTTblPr tblPr = table.getCTTbl().getTblPr();
CTString styleStr = tblPr.addNewTblStyle();
styleStr.setVal("StyledTable");
// Get a list of the rows in the table
List<XWPFTableRow> rows = table.getRows();
int rowCt = 0;
int colCt = 0;
for (XWPFTableRow row : rows) {
// get table row properties (trPr)
CTTrPr trPr = row.getCtRow().addNewTrPr();
// set row height; units = twentieth of a point, 360 = 0.25"
CTHeight ht = trPr.addNewTrHeight();
ht.setVal(BigInteger.valueOf(360));
// get the cells in this row
List<XWPFTableCell> cells = row.getTableCells();
// add content to each cell
for (XWPFTableCell cell : cells) {
// get a table cell properties element (tcPr)
CTTcPr tcpr = cell.getCTTc().addNewTcPr();
// set vertical alignment to "center"
CTVerticalJc va = tcpr.addNewVAlign();
va.setVal(STVerticalJc.CENTER);
// create cell color element
CTShd ctshd = tcpr.addNewShd();
ctshd.setColor("auto");
ctshd.setVal(STShd.CLEAR);
if (rowCt == 0) {
// header row
ctshd.setFill("A7BFDE");
}
else if (rowCt % 2 == 0) {
// even row
ctshd.setFill("D3DFEE");
}
else {
// odd row
ctshd.setFill("EDF2F8");
}
// get 1st paragraph in cell's paragraph list
XWPFParagraph para = cell.getParagraphs().get(0);
// create a run to contain the content
XWPFRun rh = para.createRun();
// style cell as desired
if (colCt == nCols - 1) {
// last column is 10pt Courier
rh.setFontSize(10);
rh.setFontFamily("Courier");
}
if (rowCt == 0) {
// header row
rh.setText("header row, col " + colCt);
rh.setBold(true);
para.setAlignment(ParagraphAlignment.CENTER);
}
else if (rowCt % 2 == 0) {
// even row
rh.setText("row " + rowCt + ", col " + colCt);
para.setAlignment(ParagraphAlignment.LEFT);
}
else {
// odd row
rh.setText("row " + rowCt + ", col " + colCt);
para.setAlignment(ParagraphAlignment.LEFT);
}
colCt++;
} // for cell
colCt = 0;
rowCt++;
} // for row
// write the file
FileOutputStream out = new FileOutputStream("styledTable.docx");
doc.write(out);
out.close();
}
}

Related

How do I ADD bullet points to a word document using Apache POI in Java

I have a word document which is used as a template. Inside this template I have some tables that contain predefined bullet points. Now I'm trying to replace the placeholder string with a set of strings.
I'm totally stuck on this. My simplified methods looks like this.
replaceKeyValue.put("[DescriptionOfItem]", new HashSet<>(Collections.singletonList("This is the description")));
replaceKeyValue.put("[AllowedEntities]", new HashSet<>(Arrays.asList("a", "b")));
replaceKeyValue.put("[OptionalEntities]", new HashSet<>(Arrays.asList("c", "d")));
replaceKeyValue.put("[NotAllowedEntities]", new HashSet<>(Arrays.asList("e", "f")));
try (XWPFDocument template = new XWPFDocument(OPCPackage.open(file))) {
template.getTables().forEach(
xwpfTable -> xwpfTable.getRows().forEach(
xwpfTableRow -> xwpfTableRow.getTableCells().forEach(
xwpfTableCell -> replaceInCell(replaceKeyValue, xwpfTableCell)
)
));
ByteArrayOutputStream baos = new ByteArrayOutputStream();
template.write(baos);
return new ByteArrayResource(baos.toByteArray());
} finally {
if (file.exists()) {
file.delete();
}
}
private void replaceInCell(Map<String, Set<String>> replacementsKeyValuePairs, XWPFTableCell xwpfTableCell) {
for (XWPFParagraph xwpfParagraph : xwpfTableCell.getParagraphs()) {
for (Map.Entry<String, Set<String>> replPair : replacementsKeyValuePairs.entrySet()) {
String keyToFind = replPair.getKey();
Set<String> replacementStrings = replacementsKeyValuePairs.get(keyToFind);
if (xwpfParagraph.getText().contains(keyToFind)) {
replacementStrings.forEach(replacementString -> {
XWPFParagraph paragraph = xwpfTableCell.addParagraph();
XWPFRun run = paragraph.createRun();
run.setText(replacementString);
});
}
}
}
I was expecting that some more bullet points will be added to the current cell. Am I missing something? The paragraph is the one containing the placeholder string and format.
Thanks for any help!
UPDATE: This is how part of the template looks like. I would like to automatically search for the terms and replace them. Searching works so far. But trying to replace the bullet points ends in an unlocatable NullPointer.
Would it be easier to use fields? I need to keep the bullet point style though.
UPDATE 2: added download link and updated the code. Seems I can't alter the paragraphs if I'm iterating through them. I get a null-pointer.
Download link: WordTemplate

Since Microsoft Word is very, very "strange" in how it divides text in different runs in it's storage, such questions are not possible to answer without having a complete example including all code and the Word documents in question. Having a general usable code for adding content to Word documents seems not be possible, except all the adding or replacement is only in fields (form fields or content controls or mail merge fields).
So I downloaded your WordTemplate.docx which looks like so:
Then I runned the following code:
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.apache.xmlbeans.XmlCursor;
import java.util.*;
import java.math.BigInteger;
public class WordReadAndRewrite {
static void addItems(XWPFTableCell cell, XWPFParagraph paragraph, Set<String> items) {
XmlCursor cursor = null;
XWPFRun run = null;
CTR cTR = null; // for a deep copy of the run's low level object
BigInteger numID = paragraph.getNumID();
int indentationLeft = paragraph.getIndentationLeft();
int indentationHanging = paragraph.getIndentationHanging();
boolean first = true;
for (String item : items) {
if (first) {
for (int r = paragraph.getRuns().size()-1; r > 0; r--) {
paragraph.removeRun(r);
}
run = (paragraph.getRuns().size() > 0)?paragraph.getRuns().get(0):null;
if (run == null) run = paragraph.createRun();
run.setText(item, 0);
cTR = (CTR)run.getCTR().copy(); // take a deep copy of the run's low level object
first = false;
} else {
cursor = paragraph.getCTP().newCursor();
boolean thereWasParagraphAfter = cursor.toNextSibling(); // move cursor to next paragraph
// because the new paragraph shall be **after** that paragraph
// thereWasParagraphAfter is true if there is a next paragraph, else false
if (thereWasParagraphAfter) {
paragraph = cell.insertNewParagraph(cursor); // insert new paragraph if there are next paragraphs in cell
} else {
paragraph = cell.addParagraph(); // add new paragraph if there are no other paragraphs present in cell
}
paragraph.setNumID(numID); // set template paragraph's numbering Id
paragraph.setIndentationLeft(indentationLeft); // set template paragraph's indenting from left
if (indentationHanging != -1) paragraph.setIndentationHanging(indentationHanging); // set template paragraph's hanging indenting
run = paragraph.createRun();
if (cTR != null) run.getCTR().set(cTR); // set template paragraph's run formatting
run.setText(item, 0);
}
}
}
public static void main(String[] args) throws Exception {
Map<String, Set<String>> replaceKeyValue = new HashMap<String, Set<String>>();
replaceKeyValue.put("[AllowedEntities]", new HashSet<>(Arrays.asList("allowed 1", "allowed 2", "allowed 3")));
replaceKeyValue.put("[OptionalEntities]", new HashSet<>(Arrays.asList("optional 1", "optional 2", "optional 3")));
replaceKeyValue.put("[NotAllowedEntities]", new HashSet<>(Arrays.asList("not allowed 1", "not allowed 2", "not allowed 3")));
XWPFDocument document = new XWPFDocument(new FileInputStream("WordTemplate.docx"));
List<XWPFTable> tables = document.getTables();
for (XWPFTable table : tables) {
List<XWPFTableRow> rows = table.getRows();
for (XWPFTableRow row : rows) {
List<XWPFTableCell> cells = row.getTableCells();
for (XWPFTableCell cell : cells) {
int countParagraphs = cell.getParagraphs().size();
for (int p = 0; p < countParagraphs; p++) { // do not for each since new paragraphs were added
XWPFParagraph paragraph = cell.getParagraphArray(p);
String placeholder = paragraph.getText();
placeholder = placeholder.trim(); // this is the tricky part to get really the correct placeholder
Set<String> items = replaceKeyValue.get(placeholder);
if (items != null) {
addItems(cell, paragraph, items);
}
}
}
}
}
FileOutputStream out = new FileOutputStream("Result.docx");
document.write(out);
out.close();
document.close();
}
}
The Result.docx looks like so:
The code loops trough the table cells in the Word document and looks for a paragraph which contains exactly the placeholder. This even might be the tricky part since that placeholder might be splitted into differnt text runs by Word. If found it runs a method addItems which takes the found paragraph as a template for numbering and indention (might be incomplter though). Then it sets the first new item in first text run of found paragraph and removes all other text runs which possibly are there. Then it determines wheter new paragraphs must be inserted or added to the cell. For this a XmlCursor is used. In new inserted or added paragrahs the other items are placed and the numbering and indention settings are taken from the placeholder's paragraph.
As said, this is code for showing the principles of how to do. It would must be extended very much to be general usable. In my opinion those trials using text placeholders in Word documents for text replacements are not really good. Placeholders for variable text in Word documents should be fields. This could be form fields, content controls or mail merge fields. Advantage of fields in contrast of text placeholders is that Word knows the fields being entities for variable texts. It will not split them into multiple text runs for multiple strange reasons as it often does with normal text.

Can't change row text in .docx file once row is added to table

I have the problem with the following code:
XWPFTable table = <get table somehow>;
CTRow firstRow = table.getRow(0).getCtRow();
for (int i = 0; i < data.getRowCount(); i++) {
CTRow ctRow = (CTRow) firstRow.copy();
XWPFTableRow row = new XWPFTableRow(ctRow, table);
XWPFRun[] cellRuns = row.getTableCells()
.stream()
.map(c -> c.getParagraphs().get(0))
.map(p -> p.getRuns().isEmpty() ? p.createRun() : p.getRuns().get(0))
.toArray(XWPFRun[]::new);
for (int j = 0; j < cellRuns.length; j++) {
cellRuns[j].setText(data.getValueAt(i, j).toString(), 0);
}
table.addRow(row);
}
table.getRow(1).getTableCells()
.get(0).getParagraphs()
.get(0).getRuns()
.get(0).setText("FooBar", 0); //change text in some added row
This code is copying the first row of the table several times and then copying values from data. Works perfectly fine (except text style) except the last operator, which was supposed to change the text in some added table row. Also, the "FooBar" string doesn't even appear in document.xml of created WORD document. I failed to see any clues from debug, because it seems, that table.addRow(row); operator just copies row pointer to it's internal list of rows. Also, I didn't have problems with altering already existing rows. So do you have any ideas why this could happen?

To reproducing the problem do having a source.docx having a first table having at least two rows.
Then do running following code:
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
public class WordInsertTableRow {
static XWPFTableRow insertNewTableRow(XWPFTableRow sourceTableRow, int pos) throws Exception {
XWPFTable table = sourceTableRow.getTable();
CTRow newCTRrow = CTRow.Factory.parse(sourceTableRow.getCtRow().newInputStream());
XWPFTableRow tableRow = new XWPFTableRow(newCTRrow, table);
table.addRow(tableRow, pos);
return tableRow;
}
static void commitTableRows(XWPFTable table) {
int rowNr = 0;
for (XWPFTableRow tableRow : table.getRows()) {
table.getCTTbl().setTrArray(rowNr++, tableRow.getCtRow());
}
}
public static void main(String[] args) throws Exception {
XWPFDocument doc = new XWPFDocument(new FileInputStream("source.docx"));
boolean weMustCommitTableRows = false;
XWPFTable table = doc.getTableArray(0);
// insert new row, which is a copy of row 2, as new row 3:
XWPFTableRow sourceTableRow = table.getRow(1);
XWPFTableRow newRow3 = insertNewTableRow(sourceTableRow, 2);
// now changing something in that new row:
int i = 1;
for (XWPFTableCell cell : newRow3.getTableCells()) {
for (XWPFParagraph paragraph : cell.getParagraphs()) {
for (XWPFRun run : paragraph.getRuns()) {
run.setText("New row 3 run " + i++, 0);
}
}
}
System.out.println(newRow3.getCtRow()); // was changed
System.out.println(table.getRow(2).getCtRow()); // even this is changed
System.out.println(table.getCTTbl().getTrArray(2)); // but this was not changed, why not?
weMustCommitTableRows = true;
if (weMustCommitTableRows) commitTableRows(table); // now it is changed
FileOutputStream out = new FileOutputStream("result.docx");
doc.write(out);
out.close();
doc.close();
}
}
This code creates a copy of second row and inserts it as third row in the table. Then it does changing something in that new third row.
The issue ist, that the changings do appearing in low level CTRow of the row itself but do not appearing in low Level CTTbl of the table. For me this is not logically and I cannot get the reason of that. It looks as if the new CTRow elements are not part of the CTTbl at all. But they were added to it using ctTbl.setTrArray in XWPFTable.addRow. So I suspect there is something wrong with setTrArray in org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl. It seems updating the XML correctly but losing the object relations in the array (or list) of CTRows in CTTbl. But this is very hard to determining because of the kind of programming the org.openxmlformats.schemas classes. At least I was not able to do so. Maybe another of the professional and enthusiast programmers here may be able?
I am using the same approach for inserting rows having tthe same styling as a given source row. But after I have done this, I am setting boolean weMustCommitTableRows = true; and then I am doing if (weMustCommitTableRows) commitTableRows(table); before writing out the document. Then all changings will be committed.

Expanding an existing table in Excel using Apache POI

I've got an table in excel with formulae I would like to add data to.
My motivation for this is the fact that tables in excel can dynamically expand to the range of data you add to them, meaning that the formula rows automatically keep up with the amount of data rows.
I'm however having a hard time finding out if this is possible using apache-POI.
One thing I was going to try (see code below) was to expand the AreaReference of the table to cover the data, however both AreaReference(CR,CR2); (as used in this example) and AreaReference(CR,CR2, SpreadsheetVersion.EXCEL2007) (seen in the apache docs) give "constructor is undefined".
No idea what is causing that constructor error as I do have org.apache.poi.ss.util imported.
The other option on the apache docs AreaReference(java.lang.String reference) lets me compile and run but instead gives a "NoSuchMethod" error.
List<XSSFTable> tableList = spreadSheet.getTables();
CellReference CR = new CellReference(0, 0);
CellReference CR2 = new CellReference(5, 2);
AreaReference my_data_range = new AreaReference(CR,CR2);
tableList.get(0).setArea(my_data_range);
Any help will be appreciated.

The main problem using apache poi until now is that it is not ready to be used without having detailed knowledge about Microsoft Office as such and about the storage of Microsoft Office files. There are many things only half way ready and there are regressions often in new versions (bugs occur again which were solved already).
So your requirement: "Expanding an existing table in Excel using Apache POI" is not possible only simply using apache poi. One must know that Office Open XML files *.xlsx are simply ZIP archives which can be unzipped. And after unzipping we find /xl/tables/table1.xml for storage of the table. This XML we can analyzing and comparing it with XML which was created using Excel's GUI. So we can find problems which results from shortcomings of apache poi. Same is with the sheet's XML in /xl/tables/sheet1.xml.
Also we need to know that apache poi builds on the low level classes of ooxml-schemas. Partially we need using those classes because of the halfway readiness of apache poi. In the following example we need ooxml-schemas-1.4.jar additionally because apache poi's poi-ooxml-schemas-4.0.0.jar has not included org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableFormula until now. Unfortunately there is no documentation about ooxml-schemas public available. So we need downloading the sources and doing javadoc our own.
The following example works for me using apache poi 4.0.0. If you get problems while compiling or running, the reason might be that multiple different versions of apache poi jars are in class path while compile time and/or run time. Do not mix different apache poi versions. Also, as said already, my code needs the full jar of all of the schemas ooxml-schemas-1.4.jar.
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.SpreadsheetVersion;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn;
class ExcelExpandingTable {
static void addRowToTable(XSSFTable table) {
int lastTableRow = table.getEndCellReference().getRow();
int totalsRowCount = table.getTotalsRowCount();
int lastTableDataRow = lastTableRow - totalsRowCount;
// we will add one row in table data
lastTableRow++;
lastTableDataRow++;
// new table area plus one row
AreaReference newTableArea = new AreaReference(
table.getStartCellReference(),
new CellReference(
lastTableRow,
table.getEndCellReference().getCol()
),
SpreadsheetVersion.EXCEL2007
);
// new table data area plus one row
AreaReference newTableDataArea = new AreaReference(
table.getStartCellReference(),
new CellReference(
lastTableDataRow,
table.getEndCellReference().getCol()
),
SpreadsheetVersion.EXCEL2007
);
XSSFSheet sheet = table.getXSSFSheet();
if (totalsRowCount > 0) {
//if we have totals rows, shift totals rows down
sheet.shiftRows(lastTableDataRow, lastTableRow, 1);
// correcting bug that shiftRows does not adjusting references of the cells
// if row 3 is shifted down, then reference in the cells remain r="A3", r="B3", ...
// they must be adjusted to the new row thoug: r="A4", r="B4", ...
// apache poi 3.17 has done this properly but had have other bugs in shiftRows.
for (int r = lastTableDataRow; r < lastTableRow + 1; r++) {
XSSFRow row = sheet.getRow(r);
if (row != null) {
long rRef = row.getCTRow().getR();
for (Cell cell : row) {
String cRef = ((XSSFCell)cell).getCTCell().getR();
((XSSFCell)cell).getCTCell().setR(cRef.replaceAll("[0-9]", "") + rRef);
}
}
}
// end correcting bug
}
// if there are CalculatedColumnFormulas do filling them to the new row
XSSFRow row = sheet.getRow(lastTableDataRow); if (row == null) row = sheet.createRow(lastTableDataRow);
int firstTableCol = table.getStartCellReference().getCol();
for (CTTableColumn tableCol : table.getCTTable().getTableColumns().getTableColumnList()) {
if (tableCol.getCalculatedColumnFormula() != null) {
int id = (int)tableCol.getId();
String formula = tableCol.getCalculatedColumnFormula().getStringValue();
XSSFCell cell = row.getCell(firstTableCol + id - 1); if (cell == null) cell = row.createCell(firstTableCol + id - 1);
cell.setCellFormula(formula);
}
}
table.setArea(newTableArea);
// correcting bug that Autofilter includes possible TotalsRows after setArea new
// Autofilter must only contain data area
table.getCTTable().getAutoFilter().setRef(newTableDataArea.formatAsString());
// end correcting bug
table.updateReferences();
}
public static void main(String[] args) throws Exception {
try (Workbook workbook = WorkbookFactory.create(new FileInputStream("SAMPLE.xlsx"));
FileOutputStream out = new FileOutputStream("SAMPLE_NEW.xlsx")) {
XSSFSheet sheet = ((XSSFWorkbook)workbook).getSheetAt(0);
XSSFTable table = sheet.getTables().get(0);
addRowToTable(table);
workbook.write(out);
}
}
}
The above was 2018. Now we have 2022.
Using current apache poi 5.2.2 the code may be like follows:
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.SpreadsheetVersion;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTTableColumn;
class ExcelExpandingTable {
static XSSFRow addRowToTable(XSSFTable table) {
int lastTableRow = table.getEndCellReference().getRow();
int totalsRowCount = table.getTotalsRowCount();
int lastTableDataRow = lastTableRow - totalsRowCount;
int firstTableCol = table.getStartCellReference().getCol();
// we will add one row in table data
lastTableRow++;
lastTableDataRow++;
// new table area plus one row
AreaReference newTableArea = new AreaReference(
table.getStartCellReference(),
new CellReference(
lastTableRow,
table.getEndCellReference().getCol()
),
SpreadsheetVersion.EXCEL2007
);
XSSFSheet sheet = table.getXSSFSheet();
if (totalsRowCount > 0) {
//if we have totals rows, shift totals rows down
sheet.shiftRows(lastTableDataRow, lastTableRow, 1);
//correct all sheet table-reference-formulas which probably got damaged after shift rows
for (CTTableColumn tableCol : table.getCTTable().getTableColumns().getTableColumnList()) {
if (tableCol.getCalculatedColumnFormula() != null) {
int id = (int)tableCol.getId();
String formula = tableCol.getCalculatedColumnFormula().getStringValue();
int rFirst = table.getStartCellReference().getRow() + table.getHeaderRowCount();
int rLast = table.getEndCellReference().getRow() - table.getTotalsRowCount();
int c = table.getStartCellReference().getCol() + id - 1;
sheet.getWorkbook().setCellFormulaValidation(false);
for (int r = rFirst; r <= rLast; r++) {
XSSFRow row = sheet.getRow(r); if (row == null) row = sheet.createRow(r);
XSSFCell cell = row.getCell(c); if (cell == null) cell = row.createCell(c);
cell.setCellFormula(formula);
}
}
}
}
// if there are CalculatedColumnFormulas do filling them to the new row
XSSFRow row = sheet.getRow(lastTableDataRow); if (row == null) row = sheet.createRow(lastTableDataRow);
for (CTTableColumn tableCol : table.getCTTable().getTableColumns().getTableColumnList()) {
if (tableCol.getCalculatedColumnFormula() != null) {
int id = (int)tableCol.getId();
String formula = tableCol.getCalculatedColumnFormula().getStringValue();
XSSFCell cell = row.getCell(firstTableCol + id - 1); if (cell == null) cell = row.createCell(firstTableCol + id - 1);
cell.getSheet().getWorkbook().setCellFormulaValidation(false); // see https://bz.apache.org/bugzilla/show_bug.cgi?id=66039
cell.setCellFormula(formula);
}
}
// copy cell styles to the new row from the row above
row = sheet.getRow(lastTableDataRow); if (row == null) row = sheet.createRow(lastTableDataRow);
XSSFRow rowAbove = sheet.getRow(lastTableDataRow - 1); if (row == null) row = sheet.createRow(lastTableDataRow - 1);
for (CTTableColumn tableCol : table.getCTTable().getTableColumns().getTableColumnList()) {
int id = (int)tableCol.getId();
XSSFCell cellAbove = rowAbove.getCell(firstTableCol + id - 1);
if (cellAbove != null) {
XSSFCellStyle styleAbove = cellAbove.getCellStyle();
XSSFCell cell = row.getCell(firstTableCol + id - 1); if (cell == null) cell = row.createCell(firstTableCol + id - 1);
cell.setCellStyle(styleAbove);
}
}
// set new table area
table.setArea(newTableArea);
// update table references
table.updateReferences();
return sheet.getRow(lastTableDataRow);
}
public static void main(String[] args) throws Exception {
try (Workbook workbook = WorkbookFactory.create(new FileInputStream("SAMPLE.xlsx"));
FileOutputStream out = new FileOutputStream("SAMPLE_NEW.xlsx")) {
XSSFSheet sheet = ((XSSFWorkbook)workbook).getSheetAt(0);
XSSFTable table = sheet.getTables().get(0);
XSSFRow row = addRowToTable(table);
workbook.write(out);
}
}
}
According to FAQ, this code needs poi-ooxml-full-5.2.2.jar instead of ooxml-schemas-1.4.jar now for run using apache poi 5.2.2.
Some bugs are fixed. But there is one new bug when using XSSFCell.setCellFormula when formula contains table references. But XSSFWorkbook.setCellFormulaValidation(false) avoids this.

How to set Multiple default value in Apache POI pivot table report filter

I need some help, I'm working on a project where I need to create a pivot table with report filter. Now I want to set Multiple default value to the report filter. I've gone through this previously posted question Click here!
I couldn't find anything online related this. Thanks in advance.

There has not much changed at creating pivot tables using apache poi since my last answer. So my code in the linked Q/A in your question only can extended a little bit for fulfilling the requirement selecting multiple items as default in page filter. A code which would be general able creating pivot tables from all kind of data, as Excel can do, is further on not possible.
If the need is selecting multiple items, first MultipleItemSelectionAllowed needs to be set in appropriate pivot field.
Then for each item which not shall be selected H(idden) needs set true.
Example:
import org.apache.poi.xssf.usermodel.*;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.ss.util.*;
import org.apache.poi.ss.SpreadsheetVersion;
import java.util.Random;
import java.io.*;
class PivotTableTest4 {
private static void setCellData(Sheet sheet) {
Row row = sheet.createRow(0);
Cell cell = row.createCell(0);
cell.setCellValue("Name");
cell = row.createCell(1);
cell.setCellValue("Value1");
cell = row.createCell(2);
cell.setCellValue("Value2");
cell = row.createCell(3);
cell.setCellValue("City");
for (int r = 1; r < 15; r++) {
row = sheet.createRow(r);
cell = row.createCell(0);
cell.setCellValue("Name " + ((r-1) % 5 + 1));
cell = row.createCell(1);
cell.setCellValue(r * new java.util.Random().nextDouble());
cell = row.createCell(2);
cell.setCellValue(r * new java.util.Random().nextDouble());
cell = row.createCell(3);
cell.setCellValue("City " + ((r-1) % 4 + 1));
}
}
public static void main(String[] args) {
try {
XSSFWorkbook wb = new XSSFWorkbook();
XSSFSheet sheet = wb.createSheet();
//Create some data to build the pivot table on
setCellData(sheet);
XSSFPivotTable pivotTable = sheet.createPivotTable(
new AreaReference(new CellReference("A1"), new CellReference("D15"), SpreadsheetVersion.EXCEL2007), new CellReference("H5"));
//Configure the pivot table
//Use first column as row label
pivotTable.addRowLabel(0);
//Sum up the second column
pivotTable.addColumnLabel(DataConsolidateFunction.SUM, 1);
//Avarage the third column
pivotTable.addColumnLabel(DataConsolidateFunction.AVERAGE, 2);
//Add fourth column as page filter
pivotTable.addReportFilter(3);
/*
Apache poi adds 15 pivot field items of type "default" (<item t="default"/>) here.
This is because there are 15 rows (A1:D15) and, because they don't have a look at the data,
they are assuming max 15 different values. This is fine because Excel will rebuild its pivot cache while opening.
But if we want preselect items, then this is not fine. Then we must know what items there are that can be preselected.
So we need at least as much items as we want preselecting as numbered items: <item x="0"/><item x="1"/><item x="2"/>...
And we must build a cache definition which has shared elements for those items.
*/
for (int i = 0; i < 4; i++) {
//take the first 4 items as numbered items: <item x="0"/><item x="1"/><item x="2"/><item x="3"/>
pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(3).getItems().getItemArray(i).unsetT();
pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(3).getItems().getItemArray(i).setX((long)i);
//build a cache definition which has shared elements for those items
//<sharedItems><s v="City 1"/><s v="City 2"/><s v="City 3"/><s v="City 4"/></sharedItems>
pivotTable.getPivotCacheDefinition().getCTPivotCacheDefinition().getCacheFields().getCacheFieldArray(3).getSharedItems().addNewS().setV("City " + (i+1));
}
//Now we can predefinite a page filter. Second item, which is "City 2", in this case.
//pivotTable.getCTPivotTableDefinition().getPageFields().getPageFieldArray(0).setItem(1);
//If the need is selecting multiple items, first MultipleItemSelectionAllowed needs to be set.
pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(3).setMultipleItemSelectionAllowed(true);
//Then set H(idden) true for all items which not shall be selected. First ("City 1") and fourth ("City 4") in this case.
pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(3).getItems().getItemArray(0).setH(true);
pivotTable.getCTPivotTableDefinition().getPivotFields().getPivotFieldArray(3).getItems().getItemArray(3).setH(true);
FileOutputStream fileOut = new FileOutputStream("PivotTableTest4.xlsx");
wb.write(fileOut);
fileOut.close();
wb.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
This needs the full jar of all of the schemas, ooxml-schemas-1.3.jar, as mentioned in the FAQ.

Table creation on separate method using iText in Java

I'm trying to create an PDF document using iText. I followed to THIS nice tutorial and tried to create single page pdf document which has a table. In the tutorial the author keeps table creation of table on separate method such as addMetaData, addTitlePage and addContent. I also would keep them separately, but I'm new to iText and currently I'm stuck. The current code is:
public static void main(String args[]) {
try {
Document document = new Document(PageSize.A4);
PdfWriter.getInstance(document, new FileOutputStream(FILE));
document.open();
addMetaData(document);
addTitlePage(document);
addContent(document);
document.close();
} catch (Exception e) {
e.printStackTrace();
}
}
private static void addTitlePage(Document document) throws DocumentException {
Paragraph preface = new Paragraph();
// Add one empty line
addEmptyLine(preface, 1);
// Header of the document
preface.add(new Paragraph("Title here", capFont));
addEmptyLine(preface, 1);
// Report generated by: _name, _date
preface.add(new Paragraph("Report generated by: " + System.getProperty("user.name") + ", " + new Date(), //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
normFont));
addEmptyLine(preface, 2);
preface.add(new Paragraph("This document describes some kind of price list which is unknown to me.", normFont));
document.add(preface);
}
private static void addContent(Document document) throws DocumentException {
Paragraph content = new Paragraph();
// Add one empty line
addEmptyLine(content, 1);
// Content of the document
content.add(new Paragraph(createTable(subPart), normFont)); // not working line
addEmptyLine(content, 5);
content.add(new Paragraph("This document is a preliminary version and not subject to the license agreement.", redFont));
document.add(content);
}
private static void createTable(Section subPart) throws BadElementException {
PdfPTable table = new PdfPTable(3);
table.setHorizontalAlignment(Element.ALIGN_CENTER);
// Data
table.addCell("1");
table.addCell("2");
table.addCell("3");
subPart.add(table);
}
Any help would be appreciated.

So, after one day literature reading and api homepage visiting I came to my solution:
Instead of: content.add(new Paragraph(createTable(subPart), normFont));
I have now: createTable(content);
and of course I changed the type of variable in createTable method to the paragraph in order to get it working.

package src.AutosysPolicyWriter.Utility;
import java.util.StringTokenizer;
import com.lowagie.text.Cell;
import com.lowagie.text.Document;
import com.lowagie.text.Element;
import com.lowagie.text.Font;
import com.lowagie.text.Phrase;
import com.lowagie.text.Rectangle;
import com.lowagie.text.Table;
/**
* Modification/itext 1.4 - <Modified : September 26, 2013>
* #author Oliver Lundag
* #date 2013-09-06
* Can handle table creation in the PDF
*/
public class PdfTableUtility {
/***
*
* #param titleFont - font of the title of the table
* #param fontHeader - font of the headers
* #param fontData - font of the data
* #param thisReport - Document
* #param tableTitle - Name of the table
* #param headerStrings - headers
* #param data - data
*
* How to use this function;
*
* Example:
*
* // Initialize PdfTableUtility object
* 1. PdfTableUtility tableUtility = new PdfTableUtility();
*
* // Declare the value for headers.
* // Note: Number of columns will depends on how many headers has been declared
* 2. String[] headers = {"Customer Name","Age","Plan","Amount"};
*
* // Declare the data that will be put inside
* // Note that the arrangement of the strings are the actual display in the pdf
* // ; - separator
* 3. String data = "1.Oliver Lundag;26;Plan A;250,000;"+
* "2.Oliver Lundag;26;Plan A;250,000;"+
* "3.Oliver Lundag;26;Plan A;250,000";
*
* //call the function with specified arguments
* //arguments will depend on developers perspective
* 4. tableUtility.displaytable(FontBold11, FontBold9, FontNormal9, thisReport, "Information", 4, headers, data);
*/
public void displaytable(Font titleFont, Font fontHeader, Font fontData, Document thisReport,String tableTitle, String[] headerStrings, String data) {
try{
//START-(Modification/itext 1.4) SR-CS-13035 - OLUND <Modified : September 26, 2013> - title
//1.create table
Table title = new Table(1);
title.setLastHeaderRow(1);
title.setOffset(12f);
title.setSpaceInsideCell(1f);
title.setBorder(Rectangle.NO_BORDER);
//2.create table
Cell celltitle = new Cell(new Phrase(tableTitle, titleFont));
celltitle.setLeading(12);
celltitle.setBorder(Rectangle.NO_BORDER);
celltitle.setHorizontalAlignment(Element.ALIGN_CENTER);
title.addCell(celltitle);
//3.add the title in document
thisReport.add(title);
//END -(Modification/itext 1.4) SR-CS-13035 - OLUND <Modified : September 26, 2013> - title
//START-(Modification/itext 1.4) SR-CS-13035 - OLUND <Modified : September 26, 2013> - data
//1. get the max number of columns
int numColumns = headerStrings.length;
//2. create a table
Table table = new Table(numColumns);
table.setOffset(12f);
table.setLastHeaderRow(1);
table.setSpaceInsideCell(1f);
table.setTableFitsPage(true);
table.setAutoFillEmptyCells(true);
//3.get headers and add it into cells
for (String header : headerStrings) {
Cell headerCell = new Cell(new Phrase(header, fontHeader));
headerCell.setLeading(12);
headerCell.setHorizontalAlignment(Element.ALIGN_CENTER);
table.addCell(headerCell);
}
//4.get data and add it into cells
String strToken;
if (data.length() > 0) {
StringTokenizer stStr = new StringTokenizer(data,";",false);
while(stStr.hasMoreTokens()){
strToken = stStr.nextToken().toString();
Cell cell = new Cell(new Phrase(strToken,fontData));
cell.setHorizontalAlignment(Element.ALIGN_CENTER);
cell.setLeading(12);
table.addCell(cell);
}
}
//5. add the table in the document
thisReport.add(table);
//END - (Modification/itext 1.4) SR-CS-13035 - OLUND <Modified : September 26, 2013> - data
}catch (Exception e) {
e.printStackTrace();
}
}
}

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

Reading from a word document using java - java

I'm trying to read data from a docx file using java. The data is is tables. Is there a way to iterate through the table cells and extract the cell data?

Related

How do I ADD bullet points to a word document using Apache POI in Java

Can't change row text in .docx file once row is added to table

Expanding an existing table in Excel using Apache POI

How to set Multiple default value in Apache POI pivot table report filter

Table creation on separate method using iText in Java

Categories

Resources