Reading contents of the XML using java - java

I'm trying to read an XML file using java. I can sucessfully read the file but the problem is, I don't know how to read the values inside the column tag.
Since the column tags are not unique, I have no idea how to read them. Can someone help me.
Thanks in advance.
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class XMLReader {
public static void main(String argv[]) {
try {
//new code
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(new URL("http://www.cse.lk/listedcompanies/overview.htm?d-16544-e=3&6578706f7274=1").openStream());
doc.getDocumentElement().normalize();
System.out.println("Root element " + doc.getDocumentElement().getNodeName());
NodeList nodeLst = doc.getElementsByTagName("row");
System.out.println("Information of all Stocks");
for (int s = 0; s < nodeLst.getLength(); s++) {
Node fstNode = nodeLst.item(s);
if (fstNode.getNodeType() == Node.ELEMENT_NODE) {
Element fstElmnt = (Element) fstNode;
//NodeList fstNmElmntLst = fstElmnt.getElementsByTagName("column");
//Element fstNmElmnt = (Element) fstNmElmntLst.item(0);
//NodeList fstNm = fstNmElmnt.getChildNodes();
//System.out.println("First Tag : " + ((Node) fstNm.item(0)).getNodeValue());
NodeList lstNmElmntLst = fstElmnt.getElementsByTagName("column");
// Element lstNmElmnt = (Element) lstNmElmntLst.item(0);
for (int columnIndex = 0; columnIndex < lstNmElmntLst.getLength(); columnIndex++) {
Element lstNmElmnt = (Element) lstNmElmntLst.item(columnIndex);
NodeList lstNm = lstNmElmnt.getChildNodes();
System.out.println("Last Tag : " + ((Node) lstNm.item(0)).getNodeValue());
}
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

This code :
NodeList fstNmElmntLst = fstElmnt.getElementsByTagName("column");
Return a List of column nodes, why not just use a for loop to iterate over them all instead of just reading the first one ?
for (int columnIndex = 0; columnIndex < fstNmElmntLst.getLength(); columnIndex++) {
Element fstNmElmnt = (Element) fstNmElmntLst.item(columnIndex);
...
}

You now get a NPE on:
<column/>
and you should check your list size before getting element 0:
NodeList lstNm = lstNmElmnt.getChildNodes();
if (lstNm.getLength() > 0) {
System.out.println("Last Tag : " + ((Node)lstNm.item(0)).getNodeValue());
} else {
System.out.println("No content");
}
And as you're processing text content in nodes, have a look at the answer to this SO question. Text nodes are irriting as:
<foo>
a
b
c
</foo>
can be or are more than one child node of foo, and getTextContent() can ease the pain a bit.

Related

Java DOM Parser reading xml files information - nodes attributes

I have got an xml file and try to read in some information and try to arrange them.
The data in the xml looks like:
<Class code="1-10" kind="category">
<Meta name="P17b-d" value="2"/>
<SuperClass code="1-10...1-10"/>
<SubClass code="1-100"/>
<Rubric kind="preferred">
<Label xml:lang="de" xml:space="default">Klinische Untersuchung</Label>
</Rubric>
</Class>
and my Java class looks like:
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
public class Importer {
public static void main(String[] args) {
try {
File inputFile = new File("ops2022.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
NodeList nList = doc.getElementsByTagName("Class");
for (int temp = 0; temp < 10; temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :" + nNode.getNodeName() );
Element iElement = (Element) nNode;
if (nNode.getNodeType() == Node.ELEMENT_NODE && iElement.getAttribute("kind").equals("category") ) {
Element eElement = (Element) nNode;
System.out.println("code : "
+ eElement.getAttribute("code"));
System.out.println("Label : "
+ eElement
.getElementsByTagName("Label")
.item(0)
.getTextContent());
System.out.println("SuperClass : "
+ eElement
.getElementsByTagName("SuperClass")
//I don't know how to get the attribute code here
);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
But how do I get the attribute's information of the "SuperClass" Node? Idon't know why but java handles eElement.getAttributeNode("SuperClass") as a node, although it is an Element. So I can't use the getAttribute().
I added the code in your answer (#Hiran Chaudhuri) to get my needed information:
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
public class Importer {
public static void main(String[] args) {
try {
File inputFile = new File("ops2022.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
NodeList nList = doc.getElementsByTagName("Class");
for (int temp = 0; temp < 10; temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :" + nNode.getNodeName() );
Element iElement = (Element) nNode;
if (nNode.getNodeType() == Node.ELEMENT_NODE && iElement.getAttribute("kind").equals("category") ) {
Element eElement = (Element) nNode;
System.out.println("code : "
+ eElement.getAttribute("code"));
System.out.println("Label : "
+ eElement
.getElementsByTagName("Label")
.item(0)
.getTextContent());
System.out.println("SuperClass : "
+ eElement
.getElementsByTagName("SuperClass")
Node n = eElement.getElementsByTagName("SuperClass").item(0);
if (n instanceof Attr) {
Attr a = (Attr)n;
System.out.println(a.getName());
System.out.println(a.getValue());
}
);
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
And I get the following
----------------------------
Current Element :Class
Current Element :Class
Current Element :Class
code : 1-10
Label : Klinische Untersuchung
and if I add another else clause like
else {
Attr a = (Attr)n;
System.out.println(a.getValue());
}
java throws the following error:
java.lang.ClassCastException: class com.sun.org.apache.xerces.internal.dom.DeferredElementImpl cannot be cast to class org.w3c.dom.Attr (com.sun.org.apache.xerces.internal.dom.DeferredElementImpl and org.w3c.dom.Attr are in module java.xml of loader 'bootstrap')
at Importer.main(Importer.java:46)
.
With Element.getAttributeNode() you do receive a subclass/subinterface of Node called Attr. This Attr has getName() and getValue() methods that you should be interested in.
Using Element.getAttribute() will directly deliver the value of the corresponding attribute.
If you lost the chance to directly obtain the correct type, you can still recover like
Node n = ... // this is the attribute you are interested in
if (n instanceof Attr) {
Attr a = (Attr)n;
System.out.println(a.getName());
System.out.println(a.getValue());
}
So you are wondering how to access the SuperClass' code attribute. This code prints exactly the one value:
Document doc = dBuilder.parse(inputFile);
NodeList nList = doc.getElementsByTagName("Class"); // this list only contains Element nodes
for (int temp = 0; temp < nList.getLength(); temp++) {
Element nNode = (Element)nList.item(temp); // this is one 'class' element
NodeList nList2 = nNode.getElementsByTagName("SuperClass"); // this list only contains Element nodes
for (int temp2 = 0; temp2 < nList2.getLength(); temp2++) {
Element superclass = (Element)nList2.item(temp2);
String code = superclass.getAttribute("code");
System.out.println(code);
}
}
However this code does the same:
Document doc = dBuilder.parse(inputFile);
XPath xpath = XPathFactory.newInstance().newXPath();
String code = xpath.evaluate("/Class/SuperClass/#code", doc);
With XPath expressions you can navigate the DOM tree much more efficiently.
The following code did the job for me:
for (int i = 0; i < nList.getLength(); i++) {
//for (int i = 0; i < 20; i++) {
Node nNode = nList.item(i);
//System.out.println("\nCurrent Element :" + nNode.getNodeName() );
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
String supString = "OPS-2022";
NodeList fieldNodes = eElement.getElementsByTagName("SuperClass");
for(int j = 0; j < fieldNodes.getLength(); j++) {
Node fieldNode = fieldNodes.item(j);
NamedNodeMap attributes = fieldNode.getAttributes();
Node attr = attributes.getNamedItem("code");
if(attr != null) {
supString =attr.getTextContent();
}
}
}
}
Thanks for your help!

Return values from xml, xml Iteration

I have an xml file as such
<?xml version="1.0" encoding="UTF-8"?>
<folder name="c">
<folder name="program files">
<folder name="uninstall information" />
</folder>
<folder name="users"/>
</folder>
I want to print out "c", "program files", "uninstall information" and "users" what i finally want to do is to print out only values of the name attribute with string starting from u , therefore users and uninsall information.
But i have not been able to print all the values out,
Below is my code where you can see i have tried to ways but no success so far.
public static Collection<String> folderNames(String xml, char startingLetter) throws Exception {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
FileInputStream fis = new FileInputStream("src/main/resources/test.xml");
org.xml.sax.InputSource is = new InputSource(fis);
Document doc = documentBuilder.parse(is);
NodeList nodeList = doc.getElementsByTagName("*");
for(int i =0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
/// Tried this
if(node.getNodeType() == Node.ELEMENT_NODE) {
String value = node.getTextContent();
System.out.println("value:::" +value);
}
/// tried this
// Element element = (Element)nodeList.item(i);
// NamedNodeMap attributes = element.getAttributes();
// Node nodeValue1 = nodeList.item(i);
// System.out.println(nodeValue1.getAttributes().item(i));
}
} catch (Exception e) {
e.getMessage();
}
return Collections.EMPTY_LIST;
}
for speedy test my imported classes looks like test
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
My approach without using getElementByTagsName
Document doc = documentBuilder.parse(is);
NodeList nodeList = doc.getElementsByTagName("folder");
for(int i =0; i < nodeList.getLength(); i++) {
if (nodeList.item(i).hasChildNodes()) {
for(int i1 = 0; i1 < nodeList.item(i).getChildNodes().getLength(); i1++) {
Node node = nodeList.item(i).getChildNodes().item(i);
System.out.println(node.getAttributes().item(i));
}
}
Node nodeValue1 = nodeList.item(i);
System.out.println(nodeValue1.getAttributes().item(i));
This isnt complete but it will require a recursive call, due to hierarchy in the xml
Example of printing all folder names starting with u:
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<folder name=\"c\">\n" +
" <folder name=\"program files\">\n" +
" <folder name=\"uninstall information\" />\n" +
" </folder>\n" +
" <folder name=\"users\"/>\n" +
"</folder>";
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document doc = documentBuilder.parse(new InputSource(new StringReader(xml)));
NodeList nodeList = doc.getElementsByTagName("folder");
for (int i = 0; i < nodeList.getLength(); i++) {
Element element = (Element) nodeList.item(i);
String name = element.getAttribute("name");
if (name.startsWith("u"))
System.out.println(name);
}
Output
uninstall information
users
You almost had it. First you have to identify the XML element, which you did.
if(node.getNodeType() == Node.ELEMENT_NODE) {
String value = node.getTextContent();
System.out.println("value:::" +value);
}
but instead of getting invoking getTextContent(), you need to find the attribute in that element. Some variation of the below. Of course, if there is more than one attribute you will need to accomodate looking at them all (using node.getAttributes().getLength()):
if(node.getNodeType() == Node.ELEMENT_NODE) {
if (node.getAttributes() != null) {
String name = node.getAttributes().item(0).getNodeName();
String value = node.getAttributes().item(0).getNodeValue();
System.out.println("attribute name:::" +name + " value:::" +value);
}
}

Java XPath - find tags prefixed with

I have a following HTML
<data-my-tag>
<data-another-tag>
... content ...
</data-another-tag>
<data-my-tag>
... content ...
</data-my-tag>
</data-my-tag>
Now I need to find all tags starting with prefix <data-. I need to find their names and also their contents. I know this is not possible to achieve with regex, so I started to work with javax.xml.parsers. It is easy for me to find some tags according to a particular name, but I am unable to find tags starting with some prefix.
What is the expression or code to find tags starting with prefix?
You can use XPath's starts-with function:
public void findElements(InputSource source,
String prefix) {
try {
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList matches = (NodeList) xpath.evaluate(
"//*[starts-with(local-name(), '" + prefix + "')]",
source, XPathConstants.NODESET);
int count = matches.getLength();
for (int i = 0; i < count; i++) {
Node match = matches.item(i);
System.out.println("Element: " + match.getNodeName());
System.out.println("Text: " + match.getTextContent().trim());
System.out.println();
}
} catch (XPathException e) {
throw new RuntimeException(e);
}
}
Can we use something like this :
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.File;
public class Demo {
public static void main(String[] args) {
try {
File inputFile = new File("input.txt");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(inputFile);
doc.getDocumentElement().normalize();
NodeList nList = doc.getDocumentElement().getChildNodes();
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE || nNode.getNodeName().startsWith("<data-")) {
System.out.println(nNode.getTextContent());
}
}
} catch (Exception e) {
e.printStackTrace();
}
}
}

Parsing XML from webpage

If I copy and paste the xml from this site into a xml file I can parse it with java
http://api.indeed.com/ads/apisearch?publisher=8397709210207872&q=java&l=austin%2C+tx&sort&radius&st&jt&start&limit&fromage&filter&latlong=1&chnl&userip=1.2.3.4&v=2
However, I want to parse it directly from a webpage if possible!
Here's my current code:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import org.xml.sax.SAXException;
import java.io.File;
import java.io.IOException;
public class XMLParser {
public void readXML(String parse) {
File xml = new File(parse);
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder;
try {
dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(xml);
// System.out.println("Root element :"
// + doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("result");
System.out.println("----------------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
// System.out.println("\nCurrent Element :" +
nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
System.out.println("job title : "
+
eElement.getElementsByTagName("jobtitle").item(0)
.getTextContent());;
System.out.println("Company: "
+
eElement.getElementsByTagName("company")
.item(0).getTextContent());
System.out.println("City : "
+
eElement.getElementsByTagName("city").item(0)
.getTextContent());
System.out.println("State : "
+
eElement.getElementsByTagName("state").item(0)
.getTextContent());
System.out.println("Country : "
+
eElement.getElementsByTagName("country").item(0)
.getTextContent());
System.out.println("Date posted : "
+
eElement.getElementsByTagName("date").item(0)
.getTextContent());
System.out.println("Job summary : "
+
eElement.getElementsByTagName("snippet").item(0)
.getTextContent());
System.out.println("Latitude : "
+
eElement.getElementsByTagName("latitude").item(0).getTextContent());
System.out.println("longitude : "
+
eElement.getElementsByTagName("longitude").item(0).getTextContent());
}
}
} catch (ParserConfigurationException | SAXException | IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
new XMLParser().readXML("test.xml");
}
}
any help would be appreciated.
Give it the URI instead of the XML. It will download it for you.
Document doc = dBuilder.parse(uriString)
Please find the code snippet like this
String url = "http://api.indeed.com/ads/apisearch?publisher=8397709210207872&q=java&l=austin%2C+tx&sort&radius&st&jt&start&limit&fromage&filter&latlong=1&chnl&userip=1.2.3.4&v=2";
try
{
DocumentBuilderFactory f = DocumentBuilderFactory.newInstance();
DocumentBuilder b = f.newDocumentBuilder();
Document doc = b.parse(url);
}
you need to have the element/nodes you want in a for loop. So it can scan through xml file, and find the right node you searching for.
reads the xml file as a string, and creates a xml structure
builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document doc = builder.parse(connection.getInputStream());
NodeList nodes = doc.getElementsByTagName("mode");
for (int i = 0; i < nodes.getLength(); i++)
Element element = (Element) nodes.item(i);
//Gets tag from XML and it´s content
NodeList nodeMode = element.getElementsByTagName("mode");
Element elemMode = (Element) nodeMode.item(0);
and after if you want to pick out a value and parse to an int or what you want you do like this:
int currentMode = Integer.parseInt(elemMode.getFirstChild().getTextContent());
That's how I parsed data directly from url http://www.nbp.pl/kursy/xml/+something
static class Kurs {
public float kurs_sprzedazy;
public float kurs_kupna;
}
private static DocumentBuilder dBuilder;
private static Kurs getData(String filename, String currency) throws Exception {
Document doc = dBuilder.parse("http://www.nbp.pl/kursy/xml/"+filename+".xml");
doc.getDocumentElement().normalize();
NodeList nList = doc.getElementsByTagName("pozycja");
for(int i = 0; i < nList.getLength(); i++) {
Element nNode = (Element)nList.item(i);
if(nNode.getElementsByTagName("kod_waluty").item(0).getTextContent().equals(currency)) {
Kurs kurs = new Kurs();
String data = nNode.getElementsByTagName("kurs_sprzedazy").item(0).getTextContent();
data = data.replace(',', '.');
kurs.kurs_sprzedazy = Float.parseFloat(data);
data = nNode.getElementsByTagName("kurs_kupna").item(0).getTextContent();
data = data.replace(',', '.');
kurs.kurs_kupna = Float.parseFloat(data);
return kurs;
}
}
return null;
}

Possible way to parse the text alone from an xml document using java dom

I need to receive all the text alone from an xml file for receiving the specific tag i use this code. But i am not sure how to parse all the text from the XML i the XML files are different i don't know their root node and child nodes but i need the text alone from the xml.
try {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory
.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(streamLimiter.getFile());
doc.getDocumentElement().normalize();
System.out.println("Root element :"
+ doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("employee");
System.out.println("-----------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
NodeList nlList = eElement.getElementsByTagName("firstname")
.item(0).getChildNodes();
Node nValue = (Node) nlList.item(0);
System.out.println("First Name : "
+ nValue.getNodeValue());
}
}
} catch (Exception e) {
e.printStackTrace();
}
Quoting jsight's reply in this post: Getting XML Node text value with Java DOM
import java.io.ByteArrayInputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
class Test {
/**
* #param args the command line arguments
*/
public static void main(String[] args) throws Exception {
String xml = "<add job=\"351\">\n"
+ " <tag>foobar</tag>\n"
+ " <tag>foobar2</tag>\n"
+ "</add>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
ByteArrayInputStream bis = new ByteArrayInputStream(xml.getBytes());
org.w3c.dom.Document doc = db.parse(bis);
Node n = doc.getFirstChild();
NodeList nl = n.getChildNodes();
Node an, an2;
for (int i = 0; i < nl.getLength(); i++) {
an = nl.item(i);
if (an.getNodeType() == Node.ELEMENT_NODE) {
NodeList nl2 = an.getChildNodes();
for (int i2 = 0; i2 < nl2.getLength(); i2++) {
an2 = nl2.item(i2);
// DEBUG PRINTS
System.out.println(an2.getNodeName() + ": type (" + an2.getNodeType() + "):");
if (an2.hasChildNodes()) {
System.out.println(an2.getFirstChild().getTextContent());
}
if (an2.hasChildNodes()) {
System.out.println(an2.getFirstChild().getNodeValue());
}
System.out.println(an2.getTextContent());
System.out.println(an2.getNodeValue());
}
}
}
}
}
Output:
#text: type (3):
foobar
foobar
#text: type (3):
foobar2
Adapt this code to your problem and it should work.

Categories