How to read and replace bookmark values with apache POI

How to read and replace bookmark values with apache POI - java

i'm a complete novice with apache POI and i already tried several things. My problem is that i have a few bookmarks in a docx-File and i want to replace the value of them.
i already got so far that i add the text to the bookmark, but the previous value is still there
my code:
InputStream fis = new FileInputStream(fileName);
XWPFDocument document = new XWPFDocument(fis);
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs)
{
//Here you have your paragraph;
CTP ctp = paragraph.getCTP();
// Get all bookmarks and loop through them
List<CTBookmark> bookmarks = ctp.getBookmarkStartList();
for(CTBookmark bookmark : bookmarks)
{
if(bookmark.getName().equals("Firma1234"))
{
System.out.println(bookmark.getName());
XWPFRun run = paragraph.createRun();
run.setText(lcFirma);
ctp.getDomNode().insertBefore(run.getCTR().getDomNode(), bookmark.getDomNode());
}
}
}
OutputStream out = new FileOutputStream(output);
document.write(out);
document.close();
out.close();
the value of "lcFirma" is "Firma"
the value of the Bookmark is "Testmark"
my docx-File before:
Testmark -> name=Firma1234
my docx-File after:
FirmaTestmark
like i said the text is inserted before the value of the bookmark instead of replacing it, how do i replace the text instead?
Greetings,
Kevin

I also had similar requirement of setting the "Default text" field of a .docx bookmark. I was not able to do so, so, I did this as a workaround : Replaced the entire paragraph containing the bookmark with text. So, instead of the bookmark being populated with a default text, I had a paragraph that held the bookmarked text. In my case, the .docx had to finally converted to a .pdf file, so the absence of bookmark did not matter, but the presence of correct text was more important.
This is how I did it with Apache POI :
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.util.TempFileCreationStrategy;
import org.apache.poi.xdgf.usermodel.section.geometry.RelMoveTo;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBookmark;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.w3c.dom.DOMException;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.UserDataHandler;
/**
*
* #author binita.bharati#gmail.com
*
* This code will replace bookmark with plain text. A bookmark is seen as "Text Form Field" in a .docx file.
*
*/
public class BookmarkReplacer {
public static void main(String[] args) throws Exception {
replaceBookmark();
}
private static String replaceBookmarkedPara(String input, String bookmarkTxt) {
char[] tmp = input.toCharArray();
StringBuilder sb = new StringBuilder();
int bookmarkedCharCount = 0;
for (int i = 0 ; i < tmp.length ; i++) {
int asciiCode = tmp[i];
if (asciiCode == 8194) {
bookmarkedCharCount ++;
if (bookmarkedCharCount == 5) {
sb.append(bookmarkTxt);
}
}
else {
sb.append(tmp[i]);
}
}
return sb.toString();
}
private static void removeAllRuns(XWPFParagraph paragraph) {
int size = paragraph.getRuns().size();
for (int i = 0; i < size; i++) {
paragraph.removeRun(0);
}
}
private static void insertReplacementRuns(XWPFParagraph paragraph, String replacedText) {
String[] replacementTextSplitOnCarriageReturn = StringUtils.split(replacedText, "\n");
for (int j = 0; j < replacementTextSplitOnCarriageReturn.length; j++) {
String part = replacementTextSplitOnCarriageReturn[j];
XWPFRun newRun = paragraph.insertNewRun(j);
newRun.setText(part);
if (j+1 < replacementTextSplitOnCarriageReturn.length) {
newRun.addCarriageReturn();
}
}
}
public static void replaceBookmark () throws Exception
{
InputStream fis = new FileInputStream("C:\\input.docx");
XWPFDocument document = new XWPFDocument(fis);
List<XWPFParagraph> paragraphs = document.getParagraphs();
for (XWPFParagraph paragraph : paragraphs)
{
//Here you have your paragraph;
CTP ctp = paragraph.getCTP();
// Get all bookmarks and loop through them
List<CTBookmark> bookmarks = ctp.getBookmarkStartList();
for(CTBookmark bookmark : bookmarks)
{
if(bookmark.getName().equals("data_incipit") || bookmark.getName().equals("incipit_Codcli")
|| bookmark.getName().equals("Incipit_titolo"))
{
String paraText = paragraph.getText();
System.out.println("paraText = "+paraText +" for bookmark name "+bookmark.getName());
String replacementText = replaceBookmarkedPara(paraText, "haha");
removeAllRuns(paragraph);
insertReplacementRuns(paragraph, replacementText);
}
}
}
OutputStream out = new FileOutputStream("C:\\output.docx");
document.write(out);
document.close();
out.close();
}
}

Try below code
private List<XWPFParagraph> collectParagraphs()
{
List<XWPFParagraph> paragraphs = new ArrayList<>();
paragraphs.addAll(this.document.getParagraphs());
for (XWPFTable table : this.document.getTables())
{
for (XWPFTableRow row : table.getRows())
{
for (XWPFTableCell cell : row.getTableCells())
paragraphs.addAll(cell.getParagraphs());
}
}
return paragraphs;
}
public List<String> getBookmarkNames()
{
List<String> bookmarkNames = new ArrayList<>();
Iterator<XWPFParagraph> paraIter = null;
XWPFParagraph para = null;
List<CTBookmark> bookmarkList = null;
Iterator<CTBookmark> bookmarkIter = null;
CTBookmark bookmark = null;
XWPFRun run = null;
// Get an Iterator for the XWPFParagraph object and step through them
// one at a time.
paraIter = collectParagraphs().iterator();
while (paraIter.hasNext())
{
para = paraIter.next();
// Get a List of the CTBookmark object sthat the paragraph
// 'contains' and step through these one at a time.
bookmarkList = para.getCTP().getBookmarkStartList();
bookmarkIter = bookmarkList.iterator();
while (bookmarkIter.hasNext())
{
bookmark = bookmarkIter.next();
bookmarkNames.add(bookmark.getName());
}
}
return bookmarkNames;
}

Related

Apache poi get table from text box

I'm using apache poi for iteration table in docx file. All works fine but if table in text box, my code don't see table - table.size() = 0
XWPFDocument doc = new XWPFDocument(new FileInputStream(fileName));
List<XWPFTable> table = doc.getTables();
for (XWPFTable xwpfTable : table) {
List<XWPFTableRow> row = xwpfTable.getRows();
for (XWPFTableRow xwpfTableRow : row) {
List<XWPFTableCell> cell = xwpfTableRow.getTableCells();
for (XWPFTableCell xwpfTableCell : cell) {
if(xwpfTableCell != null){
List<XWPFTable> itable = xwpfTableCell.getTables();
if(itable.size()!=0){
for (XWPFTable xwpfiTable : itable) {
List<XWPFTableRow> irow = xwpfiTable.getRows();
for (XWPFTableRow xwpfiTableRow : irow) {
List<XWPFTableCell> icell = xwpfiTableRow.getTableCells();
for (XWPFTableCell xwpfiTableCell : icell) {
if(xwpfiTableCell!=null){
}
}
}
}
}
}
}
}
}

Following code is low level parsing a *.docx document and getting all tables in document body of it.
The approach is using a org.apache.xmlbeans.XmlCursor and searching for all w:tbl elements in document.xml. If found add them to a List<CTTbl>.
Because a text box rectangle shape provides fall-back content in the document.xml, we need to skip the mc:Fallback elements. Else we would have the tables within the text boxes twice.
At last we go through the List<CTTbl> and get the contents of all the tables.
import java.io.*;
import org.apache.poi.xwpf.usermodel.*;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRow;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTc;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.apache.xmlbeans.impl.values.XmlAnyTypeImpl;
import org.apache.xmlbeans.XmlCursor;
import javax.xml.namespace.QName;
import java.util.List;
import java.util.ArrayList;
public class WordReadAllTables {
public static void main(String[] args) throws Exception {
XWPFDocument document = new XWPFDocument(new FileInputStream("22.docx"));
CTBody ctbody = document.getDocument().getBody();
XmlCursor xmlcursor = ctbody.newCursor();
QName qnameTbl = new QName("http://schemas.openxmlformats.org/wordprocessingml/2006/main", "tbl", "w");
QName qnameFallback = new QName("http://schemas.openxmlformats.org/markup-compatibility/2006", "Fallback", "mc");
List<CTTbl> allCTTbls = new ArrayList<CTTbl>();
while (xmlcursor.hasNextToken()) {
XmlCursor.TokenType tokentype = xmlcursor.toNextToken();
if (tokentype.isStart()) {
if (qnameTbl.equals(xmlcursor.getName())) {
if (xmlcursor.getObject() instanceof CTTbl) {
allCTTbls.add((CTTbl)xmlcursor.getObject());
} else if (xmlcursor.getObject() instanceof XmlAnyTypeImpl) {
allCTTbls.add(CTTbl.Factory.parse(xmlcursor.getObject().newInputStream()));
}
} else if (qnameFallback.equals(xmlcursor.getName())) {
xmlcursor.toEndToken();
}
}
}
for (CTTbl cTTbl : allCTTbls) {
StringBuffer tableHTML = new StringBuffer();
tableHTML.append("<table>\n");
for (CTRow cTRow : cTTbl.getTrList()) {
tableHTML.append(" <tr>\n");
for (CTTc cTTc : cTRow.getTcList()) {
tableHTML.append(" <td>");
for (CTP cTP : cTTc.getPList()) {
for (CTR cTR : cTP.getRList()) {
for (CTText cTText : cTR.getTList()) {
tableHTML.append(cTText.getStringValue());
}
}
}
tableHTML.append("</td>");
}
tableHTML.append("\n </tr>\n");
}
tableHTML.append("</table>");
System.out.println(tableHTML);
}
document.close();
}
}
This code needs the full jar of all of the schemas ooxml-schemas-1.3.jar as mentioned in faq-N10025.

Cannot bring all the element from html to docx that have same tag with java

I have a project converting from html to docx with java, in the html document I have 2 paragraph with 2 header as a title, but when converting both of them to docx format, just one paragraph that successfully converted, but the other paragraph doesn't converted even they have same tag. Look the image below
And the code look like this
import java.io.File;
import java.io.FileOutputStream;
import java.util.List;
import java.util.Set;
import static org.apache.poi.hslf.model.textproperties.TextPropCollection.TextPropType.paragraph;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xwpf.usermodel.VerticalAlign;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
import org.apache.poi.xwpf.usermodel.XWPFRun;
import org.zwobble.mammoth.DocumentConverter;
import org.zwobble.mammoth.Result;
/**
*
* #author Alwan
*/
public class TestWord {
/**
* #param args the command line arguments
*/
public static void main(String[] args) {
// TODO code application logic here
try
{
File file = new File("src/test/TEST.docx");
DocumentConverter converter = new DocumentConverter();
Result<String> result = converter.extractRawText(file);
String html = result.getValue(); // The generated HTML
Set<String> warnings = result.getWarnings(); // Any warnings during conversion
String[] part = html.split("<p>");
String[] part2 = html.split("<h1>");
FileOutputStream out = new FileOutputStream(new File("testformat.docx"));
XWPFDocument doc = new XWPFDocument();
XWPFParagraph paragraph = doc.createParagraph();
XWPFRun paragraphOneRunOne = paragraph.createRun();
XWPFRun paragraphOneRunThree = paragraph.createRun();
for (int i = 0; i < html.length(); i++)
{
if (i % 2 != 0)
{
paragraphOneRunOne.setBold(true);
paragraphOneRunOne.setItalic(true);
paragraphOneRunOne.setText(part[i].trim());
paragraphOneRunOne.addBreak();
paragraphOneRunThree.setStrike(true);
paragraphOneRunThree.setFontSize(20);
paragraphOneRunThree.setSubscript(VerticalAlign.SUBSCRIPT);
paragraphOneRunThree.setText(part2[i].trim());
System.out.println(part2[i].trim());
System.out.println(part[i].trim());
doc.write(out);
out.close();
}
System.out.println("testformat.docx written successully");
}
System.out.println("Success");
} catch(Exception e) {
e.printStackTrace();
}
}
}
The question is, how to bring all the paragraph from html into the docx format when its have a same tag? Thank you for your attention before. Sorry for my bad english

Reading an ORC file in Java

How do you read an ORC file in Java? I'm wanting to read in a small file for some unit test output verification, but I can't find a solution.

Came across this and implemented one myself recently
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.ql.io.orc.RecordReader;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import java.util.List;
public class OrcFileDirectReaderExample {
public static void main(String[] argv)
{
try {
Reader reader = OrcFile.createReader(HdfsFactory.getFileSystem(), new Path("/user/hadoop/000000_0"));
StructObjectInspector inspector = (StructObjectInspector)reader.getObjectInspector();
System.out.println(reader.getMetadata());
RecordReader records = reader.rows();
Object row = null;
//These objects are the metadata for each column. They give you the type of each column and can parse it unless you
//want to parse each column yourself
List fields = inspector.getAllStructFieldRefs();
for(int i = 0; i < fields.size(); ++i) {
System.out.print(((StructField)fields.get(i)).getFieldObjectInspector().getTypeName() + '\t');
}
while(records.hasNext())
{
row = records.next(row);
List value_lst = inspector.getStructFieldsDataAsList(row);
StringBuilder builder = new StringBuilder();
//iterate over the fields
//Also fields can be null if a null was passed as the input field when processing wrote this file
for(Object field : value_lst) {
if(field != null)
builder.append(field.toString());
builder.append('\t');
}
//this writes out the row as it would be if this were a Text tab seperated file
System.out.println(builder.toString());
}
}catch (Exception e)
{
e.printStackTrace();
}
}
}

As per Apache Wiki, ORC file format was introduced in Hive 0.11.
So you will need Hive packages in your project source path to read ORC files. The package for the same are
org.apache.hadoop.hive.ql.io.orc.Reader;
org.apache.hadoop.hive.ql.io.orc.OrcFile

read orc testcase
#Test
public void read_orc() throws Exception {
//todo do kerberos auth
String orcPath = "hdfs://user/hive/warehouse/demo.db/orc_path";
//load hdfs conf
Configuration conf = new Configuration();
conf.addResource(getClass().getResource("/hdfs-site.xml"));
conf.addResource(getClass().getResource("/core-site.xml"));
FileSystem fs = FileSystem.get(conf);
// custom read column
List<String> columns = Arrays.asList("id", "title");
final List<Map<String, Object>> maps = OrcUtil.readOrcFile(fs, orcPath, columns);
System.out.println(new Gson().toJson(maps));
}
OrcUtil to read orc path with special columns
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.hive.ql.io.orc.OrcFile;
import org.apache.hadoop.hive.ql.io.orc.OrcInputFormat;
import org.apache.hadoop.hive.ql.io.orc.OrcSerde;
import org.apache.hadoop.hive.ql.io.orc.OrcSplit;
import org.apache.hadoop.hive.ql.io.orc.OrcStruct;
import org.apache.hadoop.hive.ql.io.orc.Reader;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.InputFormat;
import org.apache.hadoop.mapred.InputSplit;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.RecordReader;
import org.apache.hadoop.mapred.Reporter;
public class OrcUtil {
public static List<Map<String, Object>> readOrcFile(FileSystem fs, String orcPath, List<String> readColumns)
throws IOException, SerDeException {
JobConf jobConf = new JobConf();
for (Map.Entry<String, String> entry : fs.getConf()) {
jobConf.set(entry.getKey(), entry.getValue());
}
FileInputFormat.setInputPaths(jobConf, orcPath);
FileInputFormat.setInputPathFilter(jobConf, ((PathFilter) path1 -> true).getClass());
InputSplit[] splits = new OrcInputFormat().getSplits(jobConf, 1);
InputFormat<NullWritable, OrcStruct> orcInputFormat = new OrcInputFormat();
List<Map<String, Object>> rows = new ArrayList<>();
for (InputSplit split : splits) {
OrcSplit orcSplit = (OrcSplit) split;
System.out.printf("read orc split %s%n", ((OrcSplit) split).getPath());
StructObjectInspector inspector = getStructObjectInspector(orcSplit.getPath(), jobConf, fs);
List<? extends StructField> readFields = inspector.getAllStructFieldRefs()
.stream().filter(e -> readColumns.contains(e.getFieldName())).collect(Collectors.toList());
// 49B file is empty
if (orcSplit.getLength() > 49) {
RecordReader<NullWritable, OrcStruct> recordReader = orcInputFormat.getRecordReader(orcSplit, jobConf, Reporter.NULL);
NullWritable key = recordReader.createKey();
OrcStruct value = recordReader.createValue();
while (recordReader.next(key, value)) {
Map<String, Object> entity = new HashMap<>();
for (StructField field : readFields) {
entity.put(field.getFieldName(), inspector.getStructFieldData(value, field));
}
rows.add(entity);
}
}
}
return rows;
}
private static StructObjectInspector getStructObjectInspector(Path path, JobConf jobConf, FileSystem fs)
throws IOException, SerDeException {
OrcFile.ReaderOptions readerOptions = OrcFile.readerOptions(jobConf);
readerOptions.filesystem(fs);
Reader reader = OrcFile.createReader(path, readerOptions);
String typeStruct = reader.getObjectInspector().getTypeName();
System.out.println(typeStruct);
List<String> columnList = parseColumnAndType(typeStruct);
String[] fullColNames = new String[columnList.size()];
String[] fullColTypes = new String[columnList.size()];
for (int i = 0; i < columnList.size(); ++i) {
String[] temp = columnList.get(i).split(":");
fullColNames[i] = temp[0];
fullColTypes[i] = temp[1];
}
Properties p = new Properties();
p.setProperty("columns", StringUtils.join(fullColNames, ","));
p.setProperty("columns.types", StringUtils.join(fullColTypes, ":"));
OrcSerde orcSerde = new OrcSerde();
orcSerde.initialize(jobConf, p);
return (StructObjectInspector) orcSerde.getObjectInspector();
}
private static List<String> parseColumnAndType(String typeStruct) {
int startIndex = typeStruct.indexOf("<") + 1;
int endIndex = typeStruct.lastIndexOf(">");
typeStruct = typeStruct.substring(startIndex, endIndex);
List<String> columnList = new ArrayList<>();
List<String> splitList = Arrays.asList(typeStruct.split(","));
Iterator<String> it = splitList.iterator();
while (it.hasNext()) {
StringBuilder current = new StringBuilder(it.next());
String currentStr = current.toString();
boolean left = currentStr.contains("(");
boolean right = currentStr.contains(")");
if (!left && !right) {
columnList.add(currentStr);
continue;
}
if (left && right) {
columnList.add(currentStr);
continue;
}
if (left && !right) {
while (it.hasNext()) {
String next = it.next();
current.append(",").append(next);
if (next.contains(")")) {
break;
}
}
columnList.add(current.toString());
}
}
return columnList;
}
}

Try this for getting ORCFile rowcount...
private long getRowCount(FileSystem fs, String fName) throws Exception {
long tempCount = 0;
Reader rdr = OrcFile.createReader(fs, new Path(fName));
StructObjectInspector insp = (StructObjectInspector) rdr.getObjectInspector();
Iterable<StripeInformation> iterable = rdr.getStripes();
for(StripeInformation stripe:iterable){
tempCount = tempCount + stripe.getNumberOfRows();
}
return tempCount;
}
//fName is hdfs path to file.
long rowCount = getRowCount(fs,fName);

Generating a .docx document from a .dotx template with docx4j (in an XPages application)

I'm using docx4j in an XPages application to create Word documents containing content from an XPage. The Word document (in .docx format) is created based on a template (also in docx.format). This all works fine. However, when I change the template from a .docx to a .dotx format, the Word document (.docx) which is generated cannot be opened. On trying to open the document, I get an error saying that the content causes problems.
Can anyone tell me how to convert a .dotx file to a .docx file using docx4j?
The code I am currently using is:
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import javax.xml.bind.JAXBElement;
import javax.xml.bind.JAXBException;
import org.docx4j.XmlUtils;
import org.docx4j.openpackaging.exceptions.Docx4JException;
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
import org.docx4j.wml.ContentAccessor;
import org.slf4j.impl.*;
import java.io.FileInputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import org.docx4j.wml.*;
import org.apache.commons.lang3.StringUtils;
import java.util.Enumeration;
import java.util.Map;
import java.util.Iterator;
import java.util.Vector;
import lotus.domino.Document;
import lotus.domino.*;
import org.docx4j.openpackaging.parts.WordprocessingML.DocumentSettingsPart;
import org.docx4j.jaxb.Context;
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
import org.docx4j.openpackaging.parts.relationships.Namespaces;
public class JavaTemplateDocument {
public void mainCode(Session session, Document currDoc, String empLang, String templateType, String sArt) throws Exception {
Database dbCurr = session.getCurrentDatabase();
String viewName = "vieTemplateLookup";
View tview = dbCurr.getView(viewName);
Vector viewKey = new Vector();
viewKey.addElement(empLang);
viewKey.addElement(templateType);
Document templateDoc = tview.getDocumentByKey(viewKey);
if (tview.getDocumentByKey(viewKey) == null ) System.out.println("templateDoc is NULL");
Item itmNotesFields = templateDoc.getFirstItem("NotesFieldList");
Item itmWordFields = templateDoc.getFirstItem("WordFieldList");
Vector<String[]> notesFields = itmNotesFields.getValues();
Vector<String[]> wordFields = itmWordFields.getValues();
int z = notesFields.size();
int x = wordFields.size();
Enumeration e1 = notesFields.elements();
Enumeration e2 = wordFields.elements();
WordprocessingMLPackage template = getTemplate("C:\\Temp\\AZG Sample Template.dotx","C:\\Temp\\AZG Sample Template.docx");
for (int y = 0; y < x; y++) {
if (currDoc.hasItem(String.valueOf(notesFields.elementAt(y)))) {
Item itmNotesName = currDoc.getFirstItem(String.valueOf(notesFields.elementAt(y)));
replacePlaceholder(template, itmNotesName.getText(), String.valueOf(wordFields.elementAt(y))); }
else {
replacePlaceholder(template, "", String.valueOf(wordFields.elementAt(y)));
}
}
writeDocxToStream(template, "C:\\Temp\\AZG Sample Document.docx");
createResponseDocument(dbCurr, currDoc, templateDoc, sArt);
}
private void createResponseDocument(Database dbCurr, Document currDoc, Document templateDoc, String sArt) throws NotesException{
Document respDoc = dbCurr.createDocument(); // create the response document
String refVal = currDoc.getUniversalID();
respDoc.appendItemValue("IsDocTemplate", "1");
if (currDoc.hasItem("Name")) {
respDoc.appendItemValue("Name", currDoc.getItemValue("Name"));}
else {System.out.println("Name is not available"); }
if (currDoc.hasItem("Firstname")) {
respDoc.appendItemValue("Firstname", currDoc.getItemValue("Firstname"));}
else {System.out.println("Firstname is not available"); }
if (currDoc.hasItem("ReferenceTypeTexts")) {
respDoc.appendItemValue("ReferenceTypeTexts", currDoc.getItemValue("ReferenceTypeTexts"));}
else {System.out.println("ReferenceTypeTexts is not available"); }
if (currDoc.hasItem("ReferenceType")) {
respDoc.appendItemValue("ReferenceType", currDoc.getItemValue("ReferenceType"));}
else {System.out.println("ReferenceType is not available"); }
System.out.println("Append Form value");
respDoc.appendItemValue("Form", "frmRespTempl");
respDoc.makeResponse(currDoc);
RichTextItem body = respDoc.createRichTextItem("Body");
body.embedObject(1454, "", "C:\\Temp\\AZG Sample Document.docx", null);
respDoc.save();
}
/*
* Create a simple word document that we can use as a template.
* For this just open Word, create a new document and save it as template.docx.
* This is the word template we'll use to add content to.
* The first thing we need to do is load this document with docx4j.
*/
private WordprocessingMLPackage getTemplate(String source, String target) throws Docx4JException, FileNotFoundException, IOException {
String WORDPROCESSINGML_DOCUMENT = "application/vnd.openxmlformats- officedocument.wordprocessingml.document.main+xml";
final ContentType contentType = new ContentType(WORDPROCESSINGML_DOCUMENT);
String templatePath = source;
File sourceFile = new File(source);
File targetFile = new File(target);
copyFileUsingFileChannels(sourceFile, targetFile);
WordprocessingMLPackage template = WordprocessingMLPackage.load(new FileInputStream(targetFile));
ContentTypeManager ctm = wordMLPackage.getContentTypeManager();
ctm.addOverrideContentType(new URI("/word/document.xml"),WORDPROCESSINGML_DOCUMENT);
DocumentSettingsPart dsp = new DocumentSettingsPart();
CTSettings settings = Context.getWmlObjectFactory().createCTSettings();
dsp.setJaxbElement(settings);
wordMLPackage.getMainDocumentPart().addTargetPart(dsp);
// Create external rel
RelationshipsPart rp = RelationshipsPart.createRelationshipsPartForPart(dsp);
org.docx4j.relationships.Relationship rel = new org.docx4j.relationships.ObjectFactory().createRelationship();
rel.setType( Namespaces.ATTACHED_TEMPLATE );
rel.setTarget(templatePath);
rel.setTargetMode("External");
rp.addRelationship(rel); // addRelationship sets the rel's #Id
settings.setAttachedTemplate(
(CTRel)XmlUtils.unmarshalString("<w:attachedTemplate xmlns:w=\"http://schemas.openxmlformats.org/wordprocessingml/2006/main\" xmlns:r=\"http://schemas.openxmlformats.org/officeDocument/2006/relationships\" r:id=\"" + rel.getId() + "\"/>", Context.jc, CTRel.class)
);
return template;
}
private static List<Object> getAllElementFromObject(Object obj, Class<?> toSearch) {
List<Object> result = new ArrayList<Object>();
if (obj instanceof JAXBElement) obj = ((JAXBElement<?>) obj).getValue();
if (obj.getClass().equals(toSearch))
result.add(obj);
else if (obj instanceof ContentAccessor) {
List<?> children = ((ContentAccessor) obj).getContent();
for (Object child : children) {
result.addAll(getAllElementFromObject(child, toSearch));
}
}
return result;
}
/*
* This will look for all the Text elements in the document, and those that match are replaced with the value we specify.
*/
private void replacePlaceholder(WordprocessingMLPackage template, String name, String placeholder ) {
List<Object> texts = getAllElementFromObject(template.getMainDocumentPart(), Text.class);
for (Object text : texts) {
Text textElement = (Text) text;
if (textElement.getValue().equals(placeholder)) {
textElement.setValue(name);
}
}
}
/*
* write the document back to a file
*/
private void writeDocxToStream(WordprocessingMLPackage template, String target) throws IOException, Docx4JException {
File f = new File(target);
template.save(f);
}
/*
* Example code for replaceParagraph
*
String placeholder = "SJ_EX1";
String toAdd = "jos\ndirksen";
replaceParagraph(placeholder, toAdd, template, template.getMainDocumentPart());
*/
private void replaceParagraph(String placeholder, String textToAdd, WordprocessingMLPackage template, ContentAccessor addTo) {
// 1. get the paragraph
List<Object> paragraphs = getAllElementFromObject(template.getMainDocumentPart(), P.class);
P toReplace = null;
for (Object p : paragraphs) {
List<Object> texts = getAllElementFromObject(p, Text.class);
for (Object t : texts) {
Text content = (Text) t;
if (content.getValue().equals(placeholder)) {
toReplace = (P) p;
break;
}
}
}
// we now have the paragraph that contains our placeholder: toReplace
// 2. split into seperate lines
String as[] = StringUtils.splitPreserveAllTokens(textToAdd, '\n');
for (int i = 0; i < as.length; i++) {
String ptext = as[i];
// 3. copy the found paragraph to keep styling correct
P copy = (P) XmlUtils.deepCopy(toReplace);
// replace the text elements from the copy
List<?> texts = getAllElementFromObject(copy, Text.class);
if (texts.size() > 0) {
Text textToReplace = (Text) texts.get(0);
textToReplace.setValue(ptext);
}
// add the paragraph to the document
addTo.getContent().add(copy);
}
// 4. remove the original one
((ContentAccessor)toReplace.getParent()).getContent().remove(toReplace);
}
/*
* A set of hashmaps that contain the name of the placeholder to replace and the value to replace it with.
*
* Map<String,String> repl1 = new HashMap<String, String>();
repl1.put("SJ_FUNCTION", "function1");
repl1.put("SJ_DESC", "desc1");
repl1.put("SJ_PERIOD", "period1");
Map<String,String> repl2 = new HashMap<String, String>();
repl2.put("SJ_FUNCTION", "function2");
repl2.put("SJ_DESC", "desc2");
repl2.put("SJ_PERIOD", "period2");
Map<String,String> repl3 = new HashMap<String, String>();
repl3.put("SJ_FUNCTION", "function3");
repl3.put("SJ_DESC", "desc3");
repl3.put("SJ_PERIOD", "period3");
replaceTable(new String[]{"SJ_FUNCTION","SJ_DESC","SJ_PERIOD"}, Arrays.asList(repl1,repl2,repl3), template);
*/
private void replaceTable(String[] placeholders, List<Map<String, String>> textToAdd,
WordprocessingMLPackage template) throws Docx4JException, JAXBException {
List<Object> tables = getAllElementFromObject(template.getMainDocumentPart(), Tbl.class);
// 1. find the table
Tbl tempTable = getTemplateTable(tables, placeholders[0]);
List<Object> rows = getAllElementFromObject(tempTable, Tr.class);
// first row is header, second row is content
if (rows.size() == 2) {
// this is our template row
Tr templateRow = (Tr) rows.get(1);
for (Map<String, String> replacements : textToAdd) {
// 2 and 3 are done in this method
addRowToTable(tempTable, templateRow, replacements);
}
// 4. remove the template row
tempTable.getContent().remove(templateRow);
}
}
private Tbl getTemplateTable(List<Object> tables, String templateKey) throws Docx4JException, JAXBException {
for (Iterator<Object> iterator = tables.iterator(); iterator.hasNext();) {
Object tbl = iterator.next();
List<?> textElements = getAllElementFromObject(tbl, Text.class);
for (Object text : textElements) {
Text textElement = (Text) text;
if (textElement.getValue() != null && textElement.getValue().equals(templateKey))
return (Tbl) tbl;
}
}
return null;
}
private static void addRowToTable(Tbl reviewtable, Tr templateRow, Map<String, String> replacements) {
Tr workingRow = (Tr) XmlUtils.deepCopy(templateRow);
List<?> textElements = getAllElementFromObject(workingRow, Text.class);
for (Object object : textElements) {
Text text = (Text) object;
String replacementValue = (String) replacements.get(text.getValue());
if (replacementValue != null)
text.setValue(replacementValue);
}
reviewtable.getContent().add(workingRow);
}
private static void copyFileUsingFileChannels(File source, File dest)
throws IOException {
FileChannel inputChannel = null;
FileChannel outputChannel = null;
try {
inputChannel = new FileInputStream(source).getChannel();
outputChannel = new FileOutputStream(dest).getChannel();
outputChannel.transferFrom(inputChannel, 0, inputChannel.size());
} finally {
inputChannel.close();
outputChannel.close();
}
}
}

Broadly, there are a few things that comprise the difference between a template (.dotx) and a document (.docx). This means you have a few things that you need to do -- it's not as simple as just changing the file extension, whether you're saving a doc as a template, or attempting to create a document from a template.
Hopefully this outline will assist:
First do what you've already done: your new document should be a file copy of the template
Change your new WordprocessingMLPackage's document type as appropriate (see WORDPROCESSINGML_TEMPLATE in the ContentTypes class)
Create an attached template and attach it to your document: see the sample code on Github for more detail on that (TemplateAttach.java sample).
Good luck!

Let's hack it.
New office formats are just ZIPs with many XML configurations and data. Try to save identical document as template and document in MS Word. IMHO the core of your problem is in (packed) file [Content_Types].xml.
They differ in the property:
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml"
ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"
I would expect #benpoole's advice should work (it should alter the content of said file). If that is not the case, simply hack the content of it inside the file (it is just ordinary ZIP archive, remember).
Disclaimer: there IS difference in few more files, that might need tweaking to make it work.

I would say that you need to change the returning filename to a dotx from docx
do a filecopy from docx to dotx and change this row
body.embedObject(1454, "", "C:\\Temp\\AZG Sample Document.dotx", null);

How to hide the data between the two tags(<hidden> .. </hidden>) in Word document

I am able to Read the input document using Apache POI and also able to find the data between the tags(What to be hidden) but the problem is i'm unable to write the data in the output file. How can i do the same to write the data and hide it in the output generated file..
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import org.apache.poi.hwpf.HWPFDocument;
import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.hwpf.usermodel.CharacterRun;
import org.apache.poi.hwpf.usermodel.Range;
public class Hidden {
public static void main(String args[]) throws Exception
{
File file = new File("D://me1.doc");
FileInputStream fin = new FileInputStream(file);
FileOutputStream fout = new FileOutputStream("D://Test.doc");
HWPFDocument doc = new HWPFDocument(fin);
Range range = doc.getRange();
WordExtractor extractor = new WordExtractor(doc);
String para[] = extractor.getParagraphText();
String output="";
String hidden="";
for (String p : para) {
String[] w = p.split("[<\\>]");
for(int k=0 ;k<w.length;k++){
if(w[k]!=null && !"".equalsIgnoreCase(w[k])){
if("hidden".equalsIgnoreCase(w[k])){
k++;
CharacterRun run = range.getCharacterRun(k);
hidden= w[k];
k++;
System.out.println(hidden);
run.setVanished(true);
doc.write(fout);
}else{
}
}
}
}
fout.close();
fin.close();
}
}

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

How to read and replace bookmark values with apache POI - java

Related

Apache poi get table from text box

Cannot bring all the element from html to docx that have same tag with java

Reading an ORC file in Java

Generating a .docx document from a .dotx template with docx4j (in an XPages application)

How to hide the data between the two tags(<hidden> .. </hidden>) in Word document

Categories

Resources