I have this error with write and read xml file parsed.
those are function for write and read:
protected void write_xml_file(String file_name) {
//if (file_name == null) file_name = "spells.xml";
FileOutputStream fos;
try {
fos = openFileOutput(file_name, Context.MODE_APPEND);
XmlSerializer serializer = Xml.newSerializer();
serializer.setOutput(fos, "UTF-8");
serializer.startDocument(null, Boolean.valueOf(true));
serializer.setFeature("http://xmlpull.org/v1/doc/features.html#indent-output", true);
serializer.startTag(null, "spells");
for (int j = 0; j < 3; j++) {
serializer.startTag(null, "spell");
serializer.text("asd" +j);
serializer.endTag(null, "spell");
}
serializer.endDocument();
serializer.flush();
fos.close();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
protected void read_xml_file(String file_name,Context context) {
try {
String path=context.getFilesDir() + File.separator + file_name;
File fXmlFile = new File(context.getFilesDir() + File.separator + file_name);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = null;
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
NodeList nList = doc.getElementsByTagName("spells");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
//System.out.println("\nCurrent Element :" + nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
//System.out.println("spell id : " + eElement.getAttribute("id"));
//System.out.println("name : " + eElement.getElementsByTagName("firstname").item(0).getTextContent());
//System.out.println("description : " + eElement.getElementsByTagName("firstname").item(0).getTextContent());
//System.out.println("school : " + eElement.getElementsByTagName("lastname").item(0).getTextContent());
}
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
This is the error:
org.xml.sax.SAXParseException: processing instructions must not start
with xml (position:unknown #6:10 in java.io.InputStreamReader#b64434e)
org.xml.sax.SAXParseException: processing instructions must not start with xml (position:unknown #6:10 in java.io.InputStreamReader#b64434e)
It looks like you have <?xml version="1.0" encoding="utf-8"?> in your XML file. Remove it then try it!
This error usually happens when you load several times (at least two times) the file in the stream and then try to parse it. Check if you do not load the file two time in fXmlFile
Related
We have a program in Java that needs to convert CSV file to Hierarchy XML:
the output should be like this:
`<?xml version="1.0" encoding="UTF-8"?>
<UteXmlComuniction xmlns="http://www....../data">
<Client Genaral Data>
<Client>
<pfPg></pfPg>
<name>Arnold</name>
<Family>Bordon</family>
</Client>
<Contract>
<ContractDetail>
<Contract>100020</Contract>
<ContractYear>2019</ContractYear>
</ContractDetail>
</Contract>
</Client Genaral Data>``
But for CSV file we are flexible, we can define it as we want. I thought maybe in this way it works:
"UteXmlComuniction/ClientGeneralData/Client/pfpg", "UteXmlComuniction/ClientGeneralData/Client/name" ,
"UteXmlComuniction/ClientGeneralData/Client/Family" , ...```
This is our code, but it just gives me the flat XML. Also I can not insert "/" character in CSV file, because program can not accept this character.
public class XMLCreators {
// Protected Properties
protected DocumentBuilderFactory domFactory = null;
protected DocumentBuilder domBuilder = null;
public XMLCreators() {
try {
domFactory = DocumentBuilderFactory.newInstance();
domBuilder = domFactory.newDocumentBuilder();
} catch (FactoryConfigurationError exp) {
System.err.println(exp.toString());
} catch (ParserConfigurationException exp) {
System.err.println(exp.toString());
} catch (Exception exp) {
System.err.println(exp.toString());
}
}
public int convertFile(String csvFileName, String xmlFileName,
String delimiter) {
int rowsCount = -1;
try {
Document newDoc = domBuilder.newDocument();
// Root element
Element rootElement = newDoc.createElement("XMLCreators");
newDoc.appendChild(rootElement);
// Read csv file
BufferedReader csvReader;
csvReader = new BufferedReader(new FileReader(csvFileName));
int line = 0;
List<String> headers = new ArrayList<String>(5);
String text = null;
while ((text = csvReader.readLine()) != null) {
StringTokenizer st = new StringTokenizer(text, delimiter, false);
String[] rowValues = new String[st.countTokens()];
int index = 0;
while (st.hasMoreTokens()) {
String next = st.nextToken();
rowValues[index++] = next;
}
if (line == 0) { // Header row
for (String col : rowValues) {
headers.add(col);
}
} else { // Data row
rowsCount++;
Element rowElement = newDoc.createElement("row");
rootElement.appendChild(rowElement);
for (int col = 0; col < headers.size(); col++) {
String header = headers.get(col);
String value = null;
if (col < rowValues.length) {
value = rowValues[col];
} else {
// ?? Default value
value = "";
}
Element curElement = newDoc.createElement(header);
curElement.appendChild(newDoc.createTextNode(value));
rowElement.appendChild(curElement);
}
}
line++;
}
ByteArrayOutputStream baos = null;
OutputStreamWriter osw = null;
try {
baos = new ByteArrayOutputStream();
osw = new OutputStreamWriter(baos);
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
aTransformer.setOutputProperty(OutputKeys.INDENT, "yes");
aTransformer.setOutputProperty(OutputKeys.METHOD, "xml");
aTransformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
Source src = new DOMSource(newDoc);
Result result = new StreamResult(osw);
aTransformer.transform(src, result);
osw.flush();
System.out.println(new String(baos.toByteArray()));
} catch (Exception exp) {
exp.printStackTrace();
} finally {
try {
osw.close();
} catch (Exception e) {
}
try {
baos.close();
} catch (Exception e) {
}
}
// Output to console for testing
// Resultt result = new StreamResult(System.out);
} catch (IOException exp) {
System.err.println(exp.toString());
} catch (Exception exp) {
System.err.println(exp.toString());
}
return rowsCount;
// "XLM Document has been created" + rowsCount;
}
}
Do you have any suggestion that how should I modify the code or how can I change my CSV in order to have a Hierarchy XML?
csv:
pfPg;name;Family;Contract;ContractYear
There are several libs for reading csv in Java. Store the values in a container e.g. hashmap.
Then create java classes representing your xml structure.
class Client {
private String pfPg;
private String name;
private String Family
}
class ClientGenaralData {
private Client client;
private Contract contract;
}
Do the mapping from csv to your Java classes by writing custom code or a mapper like dozer... Then use xml binding with Jackson or JAXB to create xml from Java objects.
Jackson xml
Dozer HowTo
I have posted this before. I am an absolutely new to this type of situation. I am well versed in the M3/MOVEX proprietary software, but I have never used sockets to connect to a server before. I have gotten to this point: I can connect - I can send an XML String to the server - I can get a response back and
put it in a string. I have tried document builder to parse it, but when I do, the document comes back null.
try {
//TCPSocket = new Socket("10.33.106.29", 10101);
//Open the socket connection
TCPSocket = new Socket("idc-v-pobap02t.ebsco.com", 10101);
//TCPSocket = new Socket("idc-v-pobap02t", 10101);
//Initialize the reader and writer for the socket
is = new BufferedReader(new
InputStreamReader(TCPSocket.getInputStream()));
pw = new PrintWriter(TCPSocket.getOutputStream(), true);
//Send Data to the socket server
if (TCPSocket != null && is != null) {
pw.println(XMLString + "\n");
pw.flush();
}
///The
while((responseLine = is.readLine()) != null){
//Do Something
}
//Hangs so I took it out
//Read the Data from the socket server.
responseLine = is.readLine(); //Has my input XML String
is.close();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder;
builder = factory.newDocumentBuilder();
doc = builder.newDocument();
doc.setXmlVersion("1.0");
doc = builder.parse(new InputSource(new
StringReader(responseLine)));
I tried this as well earlier:
XMLInputFactory f = XMLInputFactory.newInstance();
XMLStreamReader r;
try {
r = f.createXMLStreamReader(new StringReader(responseLine));
int eventType = r.next();
while(r.hasNext()) {
r.require(XMLStreamReader.START_ELEMENT, null,
"TagName");
if(eventType == XMLStreamConstants.END_ELEMENT){
break;
}
//EventName = r.getName();
if(eventType == XMLStreamConstants.CHARACTERS
|| eventType == XMLStreamConstants.CDATA
|| eventType == XMLStreamConstants.SPACE
|| eventType == XMLStreamConstants.ENTITY_REFERENCE) {
serverResponse.append(r.getText());
}
r.nextTag();
eventType = r.nextTag();
}
} catch (XMLStreamException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
Is there something I am missing here? As I said, I am totally new at this, so please don't assume that I know anything if you respond.
I have it figured out. This is what I did - it works, but if there is a better way - let me know.
XML Stream looks like this:
<loads>
<load date="" groupsplitting="allowed" sequencemaxlookahead="0"
usesequenceovercontainers="true" containerselectionrule="7"
numstagingpositions="0" loadproportionally="false"
sequenceiscontainertype="false" customerid="" stage="1" userint1="0"
userstring2="" userstring1="" shipdate="" notes=""
loadid="2000151290:01001072269" id="2000151290:01001072269"/>
</loads>
serverResponse.append(responseLine);
DocumentBuilder builder =
DocumentBuilderFactory.newInstance().newDocumentBuilder();
doc = builder.parse(new InputSource(new StringReader(responseLine)));
doc.getDocumentElement().normalize();
doc.getDocumentElement().getNodeName();
Element rootElement = doc.getDocumentElement();
NodeList sElement = doc.getElementsByTagName("load");
for (int i = 0; i < sElement.getLength(); i++) {
Element node = (Element)sElement.item(i);
NamedNodeMap attributes = node.getAttributes();
for (int i2 = 0; i2 < attributes.getLength(); i2++) {
Attr attr = (Attr) attributes.item(i2);
if(attr.getNodeName() == "loadid"){
Delivery = attr.getNodeValue();
}
}
}
I have below code which is going by all XML content.
Now, I have open stream writer on beginig, but I don't know how to add to the method:
bw.write
ReadXML.java
public class ReadXML {
public static void main(String[] args) {
try {
File file = new File("C:\\test.xml");
File outputFile = new File("C:\\test.csv");
DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = dBuilder.parse(file);
BufferedWriter bw = null;
FileWriter fw = null;
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
if (doc.hasChildNodes()) {
printNote(doc.getChildNodes());
}
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
private static void printNote(NodeList nodeList) {
for (int count = 0; count < nodeList.getLength(); count++) {
Node tempNode = nodeList.item(count);
if (tempNode.getNodeType() == Node.ELEMENT_NODE) {
System.out.println("\nNode Name =" + tempNode.getNodeName() + " [OPEN]");
System.out.println("Node Value =" + tempNode.getTextContent());
if (tempNode.hasAttributes()) {
// get attributes names and values
NamedNodeMap nodeMap = tempNode.getAttributes();
for (int i = 0; i < nodeMap.getLength(); i++) {
Node node = nodeMap.item(i);
System.out.println("attr name : " + node.getNodeName());
System.out.println("attr value : " + node.getNodeValue());
}
}
if (tempNode.hasChildNodes()) {
// loop again if has child nodes
printNote(tempNode.getChildNodes());
}
System.out.println("Node Name =" + tempNode.getNodeName() + " [CLOSE]");
} } }}
can you please help me with it? if you know how to resolve the issue will be great.
thanks!
Okay, still not sure what's exactly your problem but maybe this helps.
First, open the writer:
final BufferedWriter w = new BufferedWriter(new FileWriter(outputFile));
Then pass it to printNote:
printNote(doc.getChildNodes(), w);
Modify the method accordingly:
private static void printNote(final NodeList nodeList, final BufferedWriter w) throws IOException {
// ...
}
When you have the node you want to write to the file do:
w.write(node.getTextContent());
w.newLine();
Don't forget to close your writer after you're done!
Edit
Examples for closing the writer:
Old school
public static void mainv1(String[] args) {
File file = new File("C:\\test.xml");
File outputFile = new File("C:\\test.csv");
BufferedWriter bw = null;
try {
DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = dBuilder.parse(file);
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
// Open in try because FileWriter constructor throws IOException
bw = new BufferedWriter(new FileWriter(outputFile));
if (doc.hasChildNodes()) {
printNote(doc.getChildNodes(), bw);
}
} catch (Exception e) {
System.out.println(e.getMessage());
} finally {
// Check for null because bw won't be initialized if document parsing failed
if (bw != null) {
try {
bw.close();
} catch (final IOException e) {
// Log error
}
}
}
}
Java7 and higher
public static void main(String[] args) {
File file = new File("C:\\test.xml");
File outputFile = new File("C:\\test.csv");
// Since Java7 you can use try-with-resources
// The finally block closing the writer will be created automatically
try (BufferedWriter bw = new BufferedWriter(new FileWriter(outputFile))) {
DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = dBuilder.parse(file);
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
if (doc.hasChildNodes()) {
printNote(doc.getChildNodes(), bw);
}
} catch (Exception e) {
System.out.println(e.getMessage());
}
}
I have an xml file with something like this:
<Verbiage>
The whiskers plots are based on the responses of incarcerated
<Choice>
<Juvenile> juveniles who have committed sexual offenses. </Juvenile>
<Adult> adult sexual offenders. </Adult>
</Choice>
If the respondent is a
<Choice>
<Adult>convicted sexual offender, </Adult>
<Juvenile>juvenile who has sexually offended, </Juvenile>
</Choice>
#his/her_lc# percentile score, which defines #his/her_lc# position
relative to other such offenders, should be taken into account as well as #his/her_lc# T score. Percentile
scores in the top decile (> 90 %ile) of such offenders suggest that the respondent
may be defensive and #his/her_lc# report should be interpreted with this in mind.
</Verbiage>
I am trying to find a way to parse the xml file (I've been using DOM), search for #his/her_lc# and replace that with "her". I've tried using FileReader,BufferedReader, string.replaceAll, FileWriter, but those didn't work.
Is there a way I could do this using XPath?
Ultimately I want to search this xml file for this string and replace it with another string.
do I have to add a tag around the string I want it parse it that way?
Code I tried:
protected void parse() throws ElementNotValidException {
try {
//Parse xml File
File inputXML = new File("template.xml");
DocumentBuilderFactory parser = DocumentBuilderFactory.newInstance(); // new instance of doc builder
DocumentBuilder dParser = parser.newDocumentBuilder(); // calls it
Document doc = dParser.parse(inputXML); // parses file
FileReader reader = new FileReader(inputXML);
String search = "#his/her_lc#";
String newString;
BufferedReader br = new BufferedReader(reader);
while ((newString = br.readLine()) != null){
newString.replaceAll(search, "her");
}
FileWriter writer = new FileWriter(inputXML);
writer.write(newString);
writer.close();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
Code I was given to fix:
try {
File inputXML = new File("template.xml"); // creates new input file
DocumentBuilderFactory parser = DocumentBuilderFactory.newInstance(); // new instance of doc builder
DocumentBuilder dParser = parser.newDocumentBuilder(); // calls it
Document doc = dParser.parse(inputXML); // parses file
doc.getDocumentElement().normalize();
NodeList pList = doc.getElementsByTagName("Verbiage"); // gets element by tag name and places into list to begin parsing
int gender = 1; // gender has to be taken from the response file, it is hard coded for testing purposes
System.out.println("----------------------------"); // new line
// loops through the list of Verbiage tags
for (int temp = 0; temp < pList.getLength(); temp++) {
Node pNode = pList.item(0); // sets node to temp
if (pNode.getNodeType() == Node.ELEMENT_NODE) { // if the node type = the element node
Element eElement = (Element) pNode;
NodeList pronounList = doc.getElementsByTagName("pronoun"); // gets a list of pronoun element tags
if (gender == 0) { // if the gender is male
int count1 = 0;
while (count1 < pronounList.getLength()) {
if ("#he/she_lc#".equals(pronounList.item(count1).getTextContent())) {
pronounList.item(count1).setTextContent("he");
}
if ("#he/she_caps#".equals(pronounList.item(count1).getTextContent())) {
pronounList.item(count1).setTextContent("He");
}
if ("#his/her_lc#".equals(pronounList.item(count1).getTextContent())) {
pronounList.item(count1).setTextContent("his");
}
if ("#his/her_caps#".equals(pronounList.item(count1).getTextContent())) {
pronounList.item(count1).setTextContent("His");
}
if ("#him/her_lc#".equals(pronounList.item(count1).getTextContent())) {
pronounList.item(count1).setTextContent("him");
}
count1++;
}
pNode.getNextSibling();
} else if (gender == 1) { // female
int count = 0;
while (count < pronounList.getLength()) {
if ("#he/she_lc#".equals(pronounList.item(count).getTextContent())) {
pronounList.item(count).setTextContent("she");
}
if ("#he/she_caps3".equals(pronounList.item(count).getTextContent())) {
pronounList.item(count).setTextContent("She");
}
if ("#his/her_lc#".equals(pronounList.item(count).getTextContent())) {
pronounList.item(count).setTextContent("her");
}
if ("#his/her_caps#".equals(pronounList.item(count).getTextContent())) {
pronounList.item(count).setTextContent("Her");
}
if ("#him/her_lc#".equals(pronounList.item(count).getTextContent())) {
pronounList.item(count).setTextContent("her");
}
count++;
}
pNode.getNextSibling();
}
}
}
// write the content to file
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
DOMSource source = new DOMSource(doc);
System.out.println("-----------Modified File-----------");
StreamResult consoleResult = new StreamResult(System.out);
transformer.transform(source, new StreamResult(new FileOutputStream("template.xml"))); // writes changes to file
} catch (Exception e) {
e.printStackTrace();
}
}
This code I think would work if I could figure out how to associate the tag Pronoun with the pronounParser that this code is in.
I used this example and your template.xml, and I think it works.
public static void main(String[] args) {
File inputXML = new File("template.xml");
BufferedReader br = null;
String newString = "";
StringBuilder strTotale = new StringBuilder();
try {
FileReader reader = new FileReader(inputXML);
String search = "#his/her_lc#";
br = new BufferedReader(reader);
while ((newString = br.readLine()) != null){
newString = newString.replaceAll(search, "her");
strTotale.append(newString);
}
} catch ( IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} // calls it
finally
{
try {
br.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
System.out.println(strTotale.toString());
}
First you must reassign the result of replaceAll:
newString = newString.replaceAll(search, "her");
Second I used a StringBuffer to collect all lines.
I hope this help.
Since strings are immutable you can not modify them, use
StringBuilder/StringBuffer
instead of String.
FileReader reader = new FileReader(inputXML);
String search = "#his/her_lc#";
String newString;
StringBuffer str;
BufferedReader br = new BufferedReader(reader);
while ((newString = br.readLine()) != null){
str.append(newString.replaceAll(search, "her"));
}
FileWriter writer = new FileWriter(inputXML);
writer.write(str);
writer.close();
Dear Users I am working on apache lucene for indexing and searching .
I have to index html files stored on the local disc of computer . I have to make indexing on filename and contents of the html files . I am able to store the file names in the lucene index but not the html file contents which should index not only the data but the entire page consisting images link and url and how can i access the contents from those indexed files
for indexing i am using the following code:
File indexDir = new File(indexpath);
File dataDir = new File(datapath);
String suffix = ".htm";
IndexWriter indexWriter = new IndexWriter(
FSDirectory.open(indexDir),
new SimpleAnalyzer(),
true,
IndexWriter.MaxFieldLength.LIMITED);
indexWriter.setUseCompoundFile(false);
indexDirectory(indexWriter, dataDir, suffix);
numIndexed = indexWriter.maxDoc();
indexWriter.optimize();
indexWriter.close();
private void indexDirectory(IndexWriter indexWriter, File dataDir, String suffix) throws IOException {
try {
for (File f : dataDir.listFiles()) {
if (f.isDirectory()) {
indexDirectory(indexWriter, f, suffix);
} else {
indexFileWithIndexWriter(indexWriter, f, suffix);
}
}
} catch (Exception ex) {
System.out.println("exception 2 is" + ex);
}
}
private void indexFileWithIndexWriter(IndexWriter indexWriter, File f,
String suffix) throws IOException {
try {
if (f.isHidden() || f.isDirectory() || !f.canRead() || !f.exists()) {
return;
}
if (suffix != null && !f.getName().endsWith(suffix)) {
return;
}
Document doc = new Document();
doc.add(new Field("contents", new FileReader(f)));
doc.add(new Field("filename", f.getFileName(),
Field.Store.YES, Field.Index.ANALYZED));
indexWriter.addDocument(doc);
} catch (Exception ex) {
System.out.println("exception 4 is" + ex);
}
}
thanks in advance
This line of code is the reason why your contents is not being stored:
doc.add(new Field("contents", new FileReader(f)));
This method DOES NOT STORE the contents being indexed.
If you are trying to index HTML files, try using JTidy. It will make the process much easier.
Sample Codes:
public class JTidyHTMLHandler {
public org.apache.lucene.document.Document getDocument(InputStream is) throws DocumentHandlerException {
Tidy tidy = new Tidy();
tidy.setQuiet(true);
tidy.setShowWarnings(false);
org.w3c.dom.Document root = tidy.parseDOM(is, null);
Element rawDoc = root.getDocumentElement();
org.apache.lucene.document.Document doc =
new org.apache.lucene.document.Document();
String body = getBody(rawDoc);
if ((body != null) && (!body.equals(""))) {
doc.add(new Field("contents", body, Field.Store.NO, Field.Index.ANALYZED));
}
return doc;
}
protected String getTitle(Element rawDoc) {
if (rawDoc == null) {
return null;
}
String title = "";
NodeList children = rawDoc.getElementsByTagName("title");
if (children.getLength() > 0) {
Element titleElement = ((Element) children.item(0));
Text text = (Text) titleElement.getFirstChild();
if (text != null) {
title = text.getData();
}
}
return title;
}
protected String getBody(Element rawDoc) {
if (rawDoc == null) {
return null;
}
String body = "";
NodeList children = rawDoc.getElementsByTagName("body");
if (children.getLength() > 0) {
body = getText(children.item(0));
}
return body;
}
protected String getText(Node node) {
NodeList children = node.getChildNodes();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < children.getLength(); i++) {
Node child = children.item(i);
switch (child.getNodeType()) {
case Node.ELEMENT_NODE:
sb.append(getText(child));
sb.append(" ");
break;
case Node.TEXT_NODE:
sb.append(((Text) child).getData());
break;
}
}
return sb.toString();
}
}
To get an InputStream from a URL:
URL url = new URL(htmlURLlocation);
URLConnection connection = url.openConnection();
InputStream stream = connection.getInputStream();
To get an InputStream from a File:
InputStream stream = new FileInputStream(new File (htmlFile));