Get element text content from xml with <break> using DOM - java

I have the following part from xml file:
<database>
<document form='Record'>
<item name='SystemsList'><text>2000;Generl;All equipment<break/>
2001;General;All equipment<break/>
2002;General;All equipment<break/>
2003;General;All Equipment</text></item>
<item name='RmNumber'><text>001</text></item>
<item name='Reason'><text>Don't know</text></item>
<item name='Something'><text>smth</text></item>
</document>
</database>
For now I use the following code:
Document doc1 = dBuilder.parse(fXmlFile1);
doc1.getDocumentElement().normalize();
NodeList kList1 =doc1.getElementsByTagName("item");
for(int temp=0;temp<kList1.getLength();temp++)
{
Node kNode1=kList1.item(temp);
//System.out.println("\nCurrent Element :" + kNode.getNodeName());
if (kNode1.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) kNode1;
//System.out.println("node name"+eElement.getNodeName());
Node in=eElement.getFirstChild();
//System.out.println("__________________"+in.getFirstChild().getTextContent());
//System.out.println("IN text content----:"+in.getTextContent()+":--------");
if(eElement.getAttribute("name").equals("SystemsList")==true)
{
NodeList kList2=in.getChildNodes();
//if((in.getTextContent()!=null)&&!(in.getTextContent()).isEmpty()&& !(in.getTextContent().length()==0))
//{
for(int k=0;k<kList2.getLength();k++)
{
Node kNode2 = kList2.item(k);
if((kNode2.getTextContent()!=null)&&!(kNode2.getTextContent()).isEmpty()&& !(kNode2.getTextContent().length()==0))
stringBuilder.append(kNode2.getTextContent()+"\n");
}
//}
}
}
}
String s=new String(stringBuilder);
String sa[]=s.split("\n");
System.out.println("size"+sa.length);
for(String st:sa)
{
System.out.println(st);
}
This code makes the following String="2000;General;All equipment2001;General;All equipment2002;General;All equipment2003;General;All Equipment".
The question is how can I get this xml part with break to be ArrayList where each element is 1 line from the xml above or just to make a String array, f/e : SystemsListByYear[0]="2000;Generl;All equipment", SystemsListByYear[1]="2001;Generl;All equipment" and etc.
P.S. I use the DOM library.
Edited-question-to-correct
Edit part:
if (kNode1.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) kNode1;
//System.out.println("node name"+eElement.getNodeName());
Node in=eElement.getFirstChild();
//System.out.println("__________________"+in.getFirstChild().getTextContent());
//System.out.println("IN text content----:"+in.getTextContent()+":--------");
if(eElement.getAttribute("name").equals("SystemsList")==true)
{
NodeList kList2=in.getChildNodes();
//if((in.getTextContent()!=null)&&!(in.getTextContent()).isEmpty()&& !(in.getTextContent().length()==0))
//{
for(int k=0;k<kList2.getLength();k++)
{
Node kNode2 = kList2.item(k);
if((kNode2.getTextContent()!=null)&&!(kNode2.getTextContent()).isEmpty()&& !(kNode2.getTextContent().length()==0))
stringBuilder.append(kNode2.getTextContent()+"\n");
}
//}
}
}

Then this will solve your problem
package com.test;
import java.io.File;
import java.io.FileInputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class Test {
public static void main(String args[]) throws Exception {
FileInputStream fileInputStream = new FileInputStream(new File(
"src/file.xml"));
DocumentBuilderFactory builderFactory = DocumentBuilderFactory
.newInstance();
DocumentBuilder builder = builderFactory.newDocumentBuilder();
Document doc1 = builder.parse(fileInputStream);
doc1.getDocumentElement().normalize();
NodeList kList1 = doc1.getElementsByTagName("item");
List<String> alist=new ArrayList<String>();
StringBuilder stringBuilder=new StringBuilder();
String SystemsListByYear;
for (int temp = 0; temp < kList1.getLength(); temp++) {
Node kNode1 = kList1.item(temp);
System.out.println("\nCurrent Element :" + kNode1.getNodeName());
if (kNode1.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) kNode1;
System.out.println("node name"+eElement.getNodeName());
Node in=eElement.getFirstChild();
if((in.getTextContent()!=null)&&!(in.getTextContent()).isEmpty()&& !(in.getTextContent().length()==0))
stringBuilder.append(in.getTextContent());
}
}
String s=new String(stringBuilder);
String sa[]=s.split("\n");
System.out.println("size"+sa.length);
for(String st:sa)
{
System.out.println(st);
}
}
}
output
node nameitem
size4
2000;Generl;All equipment
2001;General;All equipment
2002;General;All equipment
2003;General;All Equipment

Split the text content at <break/> and add each split element to an ArrayList.

Related

XML parsing by tag-names and attributes method in DOM tree structure

I am trying to parse this xml file but I'm only getting the root elements and not it's child nodes.
I need information of some specific values from the nodes like using .item() method. Since, I'm not entering it's child nodes so it didn't give me the specified values. Please help me solving this...
XML file
<Ws>
<Id V='862631039910699'>
<Dt V='08/07/22;11/25'>
<T V='24.3;24.3;24.3'/>
<H V='98.0;98.0;98.0'/>
<W V='1.3;272'/>
<G V='25;2.4'/>
<A V='0.00;468;472;471'/>
<D V='0.00;8.9;8.065;0.0000;0.0000'/>
</Dt>
</Id>
</Ws>
package api;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;
public class Web{
public static void main(String argv[])
{
try {
File file = new File("C:\\Users\\Prakhar\\OneDrive\\Desktop\\WBE2.xml.txt");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(file);
doc.getDocumentElement().normalize();
System.out.println("Root element: " + doc.getDocumentElement().getNodeName());
NodeList nodeList = doc.getElementsByTagName("Ws");
for (int i = 0; i < nodeList.getLength(); ++i) {
Node node = nodeList.item(i);
System.out.println("\nNode Name :" + node.getNodeName());
if (node.getNodeType()== Node.ELEMENT_NODE) {
Element tElement = (Element)node;
System.out.println("IMEI: " +
doc.getDocumentElement().getChildNodes().item(0).getFirstChild().getChildNodes().item(0).getAttributes().getNamedItem("V").getNodeValue());
System.out.println("Date/Time: " +
doc.getDocumentElement().getChildNodes().item(0).getFirstChild().getChildNodes().item(1).getAttributes().getNamedItem("V").getNodeValue());
System.out.println("Temperature: " +
doc.getDocumentElement().getChildNodes().item(0).getFirstChild().getChildNodes().item(2).getAttributes().getNamedItem("V").getNodeValue());
System.out.println("Humidity: " +
doc.getDocumentElement().getChildNodes().item(0).getFirstChild().getChildNodes().item(1).getAttributes().getNamedItem("V").getNodeValue());
System.out.println("Wind Speed: " +
doc.getDocumentElement().getChildNodes().item(0).getFirstChild().getChildNodes().item(2).getAttributes().getNamedItem("V").getNodeValue());
}
}
}
catch (Exception e) {
System.out.println(e);
}
}
}

Reading XML tags getting value from inner tag

I don't know how to explain my situation, I can provide example below.
I have an XML file to be read in Java, something like this:
<Author AffiliationIDS="Aff1">
<AuthorName DisplayOrder="Western">
<GivenName>Wei</GivenName>
<GivenName>Long</GivenName>
<FamilyName>
<Value>Tan</Value>
</FamilyName>
</AuthorName>
</Author>
As you can see the <FamilyName> tag, inside the <FamilyName> tag is surrounded by a Value tag. This is because in the xsd it stated the element as maxOccurs="unbounded" which mean more than one value can be in that element tag. How should I modify the code so that it can read in the <FamilyName> tag and get Value tag element no matter how many occurrence of the Value exist?
Example:
<FamilyName>
<Value>Sarah</Value>
<Value>Johnson</Value>
</FamilyName>
The code look like this.
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;
public class ReadXMLFile {
public static void main(String argv[]) {
try {
File fXmlFile = new File("/fileaddress/test-1.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("AuthorName");
System.out.println("----------------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :" + nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
System.out.println("Given Name : " + eElement.getElementsByTagName("GivenName").item(0).getTextContent());
System.out.println("Family Name : " + eElement.getElementsByTagName("FamilyName").item(0).getTextContent());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Get the FamilyName node by getElementsByTagName("FamilyName").item(0) and loop over its child nodes (.getChildNodes()) and get the value of the textNode
Or,
You can even getElementsByTagName("Value") if you are sure that value tag does not occur anywhere else other than inside FamilyName
Here is a code Sample
NodeList children = doc.getElementsByTagName("FamilyName").item(0).getChildNodes();
for(int i=0;i<children.getLength();i++) {
if(children.item(i).getNodeType()== Node.ELEMENT_NODE) {
Element child = (Element)children.item(i);
System.out.println(child.getTextContent());
}
}

reading data using JAVA from XML files

I know there was a lot of answers about this question but all didn't work in my case. I would read data from European Central Bank from this link ECB. For example, how to read "rate" of USD where time="2015-02-27" and how to read "rate" of USD from all 90 days ?
One of the simplest ways to do it is to use a DOM (Document Object Model) parser. It will load your xml document in memory and turns it into a tree made of Nodes so that you can travers it being able to get the information of any node at any position. It is memory consumming and is generally less prefered than a SAX parser.
Here is an example:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.File;
public class DomParsing {
public static final String ECB_DATAS ="C:\\xml\\eurofxref-hist-90d.xml";
public static void main(String argv[]) {
try {
File fXmlFile = new File(ECB_DATAS);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(fXmlFile);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("Cube");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :" + nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
System.out.println("currency : " + eElement.getAttribute("currency") + " and rate is " + eElement.getAttribute("rate"));
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Applied to your file produces the following result:
currency : BGN and rate is 1.9558
Current Element :Cube
currency : CZK and rate is 27.797
Current Element :Cube
currency : DKK and rate is 7.444

Trying to parse XML data with Java - Getting error: "The method getNodeType() is undefined for the type NodeList"

I'm just starting out with learning how to process/parse XML data in Java. I'm getting the error, "The method getNodeType() is undefined for the type NodeList" on the line, after my for-loop, that contains:
if (n.getNodeType() == Node.ELEMENT_NODE){
The type of error seems like I forgot to import something, but I believe I got everything. I am using an XML example from microsoft, in the following link:
http://msdn.microsoft.com/en-us/library/ms762271(v=vs.85).aspx
Thanks in advance.
import java.io.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Element;
public class Files {
public static void main (String [] args) throws IOException, ParserConfigurationException{
String address = "/home/leo/workspace/Test/Files/src/file.xml";
File xmlFile = new File(address);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = factory.newDocumentBuilder();
Document doc = dBuilder.parse(xmlFile);
doc.getDocumentElement().normalize();
System.out.println(doc.getDocumentElement().getNodeName());
NodeList n = doc.getElementsByTagName("book id");
for (int temp = 0; temp < n.getLength(); temp++){
System.out.println(n.item(temp));
if (n.getNodeType() == Node.ELEMENT_NODE){
Element e = (Element) n;
System.out.println("author : " + e.getAttribute("author"));
System.out.println("title : " + e.getAttribute("title") );
System.out.println("genre : " + e.getAttribute("genre"));
System.out.println("price : " + e.getAttribute("price"));
System.out.println("publish_date : " + e.getAttribute("publish_date"));
System.out.println("description : " + e.getAttribute("description"));
}
}
}
}
You are calling getNodeType() on a NodeList object (n).
You need to call this function on a Node object. Example :
n.item(temp).getNodeType();

Getting XML Node text value with Java DOM

I can't fetch text value with Node.getNodeValue(), Node.getFirstChild().getNodeValue() or with Node.getTextContent().
My XML is like
<add job="351">
<tag>foobar</tag>
<tag>foobar2</tag>
</add>
And I'm trying to get tag value (non-text element fetching works fine). My Java code sounds like
Document doc = db.parse(new File(args[0]));
Node n = doc.getFirstChild();
NodeList nl = n.getChildNodes();
Node an,an2;
for (int i=0; i < nl.getLength(); i++) {
an = nl.item(i);
if(an.getNodeType()==Node.ELEMENT_NODE) {
NodeList nl2 = an.getChildNodes();
for(int i2=0; i2<nl2.getLength(); i2++) {
an2 = nl2.item(i2);
// DEBUG PRINTS
System.out.println(an2.getNodeName() + ": type (" + an2.getNodeType() + "):");
if(an2.hasChildNodes())
System.out.println(an2.getFirstChild().getTextContent());
if(an2.hasChildNodes())
System.out.println(an2.getFirstChild().getNodeValue());
System.out.println(an2.getTextContent());
System.out.println(an2.getNodeValue());
}
}
}
It prints out
tag type (1):
tag1
tag1
tag1
null
#text type (3):
_blank line_
_blank line_
...
Thanks for the help.
I'd print out the result of an2.getNodeName() as well for debugging purposes. My guess is that your tree crawling code isn't crawling to the nodes that you think it is. That suspicion is enhanced by the lack of checking for node names in your code.
Other than that, the javadoc for Node defines "getNodeValue()" to return null for Nodes of type Element. Therefore, you really should be using getTextContent(). I'm not sure why that wouldn't give you the text that you want.
Perhaps iterate the children of your tag node and see what types are there?
Tried this code and it works for me:
String xml = "<add job=\"351\">\n" +
" <tag>foobar</tag>\n" +
" <tag>foobar2</tag>\n" +
"</add>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
ByteArrayInputStream bis = new ByteArrayInputStream(xml.getBytes());
Document doc = db.parse(bis);
Node n = doc.getFirstChild();
NodeList nl = n.getChildNodes();
Node an,an2;
for (int i=0; i < nl.getLength(); i++) {
an = nl.item(i);
if(an.getNodeType()==Node.ELEMENT_NODE) {
NodeList nl2 = an.getChildNodes();
for(int i2=0; i2<nl2.getLength(); i2++) {
an2 = nl2.item(i2);
// DEBUG PRINTS
System.out.println(an2.getNodeName() + ": type (" + an2.getNodeType() + "):");
if(an2.hasChildNodes()) System.out.println(an2.getFirstChild().getTextContent());
if(an2.hasChildNodes()) System.out.println(an2.getFirstChild().getNodeValue());
System.out.println(an2.getTextContent());
System.out.println(an2.getNodeValue());
}
}
}
Output was:
#text: type (3): foobar foobar
#text: type (3): foobar2 foobar2
If your XML goes quite deep, you might want to consider using XPath, which comes with your JRE, so you can access the contents far more easily using:
String text = xp.evaluate("//add[#job='351']/tag[position()=1]/text()",
document.getDocumentElement());
Full example:
import static org.junit.Assert.assertEquals;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import org.junit.Before;
import org.junit.Test;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
public class XPathTest {
private Document document;
#Before
public void setup() throws Exception {
String xml = "<add job=\"351\"><tag>foobar</tag><tag>foobar2</tag></add>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
document = db.parse(new InputSource(new StringReader(xml)));
}
#Test
public void testXPath() throws Exception {
XPathFactory xpf = XPathFactory.newInstance();
XPath xp = xpf.newXPath();
String text = xp.evaluate("//add[#job='351']/tag[position()=1]/text()",
document.getDocumentElement());
assertEquals("foobar", text);
}
}
I use a very old java. Jdk 1.4.08 and I had the same issue. The Node class for me did not had the getTextContent() method. I had to use Node.getFirstChild().getNodeValue() instead of Node.getNodeValue() to get the value of the node. This fixed for me.
If you are open to vtd-xml, which excels at both performance and memory efficiency, below is the code to do what you are looking for...in both XPath and manual navigation... the overall code is much concise and easier to understand ...
import com.ximpleware.*;
public class queryText {
public static void main(String[] s) throws VTDException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("input.xml", true))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
// first manually navigate
if(vn.toElement(VTDNav.FC,"tag")){
int i= vn.getText();
if (i!=-1){
System.out.println("text ===>"+vn.toString(i));
}
if (vn.toElement(VTDNav.NS,"tag")){
i=vn.getText();
System.out.println("text ===>"+vn.toString(i));
}
}
// second version use XPath
ap.selectXPath("/add/tag/text()");
int i=0;
while((i=ap.evalXPath())!= -1){
System.out.println("text node ====>"+vn.toString(i));
}
}
}

Categories