I want to read XML data using XPath in Java, so for the information I have gathered I am not able to parse XML according to my requirement.
here is what I want to do:
Get XML file from online via its URL, then use XPath to parse it, I want to create two methods in it. One is in which I enter a specific node attribute id, and I get all the child nodes as result, and second is suppose I just want to get a specific child node value only
<?xml version="1.0"?>
<howto>
<topic name="Java">
<url>http://www.rgagnonjavahowto.htm</url>
<car>taxi</car>
</topic>
<topic name="PowerBuilder">
<url>http://www.rgagnon/pbhowto.htm</url>
<url>http://www.rgagnon/pbhowtonew.htm</url>
</topic>
<topic name="Javascript">
<url>http://www.rgagnon/jshowto.htm</url>
</topic>
<topic name="VBScript">
<url>http://www.rgagnon/vbshowto.htm</url>
</topic>
</howto>
In above example I want to read all the elements if I search via #name and also one function in which I just want the url from #name 'Javascript' only return one node element.
You need something along the lines of this:
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(<uri_as_string>);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(<xpath_expression>);
Then you call expr.evaluate() passing in the document defined in that code and the return type you are expecting, and cast the result to the object type of the result.
If you need help with a specific XPath expressions, you should probably ask it as separate questions (unless that was your question in the first place here - I understood your question to be how to use the API in Java).
Edit: (Response to comment): This XPath expression will get you the text of the first URL element under PowerBuilder:
/howto/topic[#name='PowerBuilder']/url/text()
This will get you the second:
/howto/topic[#name='PowerBuilder']/url[2]/text()
You get that with this code:
expr.evaluate(doc, XPathConstants.STRING);
If you don't know how many URLs are in a given node, then you should rather do something like this:
XPathExpression expr = xpath.compile("/howto/topic[#name='PowerBuilder']/url");
NodeList nl = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
And then loop over the NodeList.
You can try this.
XML Document
Save as employees.xml.
<?xml version="1.0" encoding="UTF-8"?>
<Employees>
<Employee id="1">
<age>29</age>
<name>Pankaj</name>
<gender>Male</gender>
<role>Java Developer</role>
</Employee>
<Employee id="2">
<age>35</age>
<name>Lisa</name>
<gender>Female</gender>
<role>CEO</role>
</Employee>
<Employee id="3">
<age>40</age>
<name>Tom</name>
<gender>Male</gender>
<role>Manager</role>
</Employee>
<Employee id="4">
<age>25</age>
<name>Meghan</name>
<gender>Female</gender>
<role>Manager</role>
</Employee>
</Employees>
Parser class
The class have following methods
List item
A Method that will return the Employee Name for input ID.
A Method that will return list of Employees Name with age greater than the input age.
A Method that will return list of Female Employees Name.
Source Code
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Parser {
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc = null;
try {
builder = factory.newDocumentBuilder();
doc = builder.parse("employees.xml");
// Create XPathFactory object
XPathFactory xpathFactory = XPathFactory.newInstance();
// Create XPath object
XPath xpath = xpathFactory.newXPath();
String name = getEmployeeNameById(doc, xpath, 4);
System.out.println("Employee Name with ID 4: " + name);
List<String> names = getEmployeeNameWithAge(doc, xpath, 30);
System.out.println("Employees with 'age>30' are:" + Arrays.toString(names.toArray()));
List<String> femaleEmps = getFemaleEmployeesName(doc, xpath);
System.out.println("Female Employees names are:" +
Arrays.toString(femaleEmps.toArray()));
} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}
}
private static List<String> getFemaleEmployeesName(Document doc, XPath xpath) {
List<String> list = new ArrayList<>();
try {
//create XPathExpression object
XPathExpression expr =
xpath.compile("/Employees/Employee[gender='Female']/name/text()");
//evaluate expression result on XML document
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static List<String> getEmployeeNameWithAge(Document doc, XPath xpath, int age) {
List<String> list = new ArrayList<>();
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[age>" + age + "]/name/text()");
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++)
list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
private static String getEmployeeNameById(Document doc, XPath xpath, int id) {
String name = null;
try {
XPathExpression expr =
xpath.compile("/Employees/Employee[#id='" + id + "']/name/text()");
name = (String) expr.evaluate(doc, XPathConstants.STRING);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return name;
}
}
Getting started example:
xml file:
<inventory>
<book year="2000">
<title>Snow Crash</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553380958</isbn>
<price>14.95</price>
</book>
<book year="2005">
<title>Burning Tower</title>
<author>Larry Niven</author>
<author>Jerry Pournelle</author>
<publisher>Pocket</publisher>
<isbn>0743416910</isbn>
<price>5.99</price>
</book>
<book year="1995">
<title>Zodiac</title>
<author>Neal Stephenson</author>
<publisher>Spectra</publisher>
<isbn>0553573862</isbn>
<price>7.50</price>
</book>
<!-- more books... -->
</inventory>
Java code:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
try {
DocumentBuilderFactory docBuilderFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docBuilderFactory.newDocumentBuilder();
Document doc = docBuilder.parse (new File("c:\\tmp\\my.xml"));
// normalize text representation
doc.getDocumentElement().normalize();
System.out.println ("Root element of the doc is " + doc.getDocumentElement().getNodeName());
NodeList listOfBooks = doc.getElementsByTagName("book");
int totalBooks = listOfBooks.getLength();
System.out.println("Total no of books : " + totalBooks);
for(int i=0; i<listOfBooks.getLength() ; i++) {
Node firstBookNode = listOfBooks.item(i);
if(firstBookNode.getNodeType() == Node.ELEMENT_NODE) {
Element firstElement = (Element)firstBookNode;
System.out.println("Year :"+firstElement.getAttribute("year"));
//-------
NodeList firstNameList = firstElement.getElementsByTagName("title");
Element firstNameElement = (Element)firstNameList.item(0);
NodeList textFNList = firstNameElement.getChildNodes();
System.out.println("title : " + ((Node)textFNList.item(0)).getNodeValue().trim());
}
}//end of for loop with s var
} catch (SAXParseException err) {
System.out.println ("** Parsing error" + ", line " + err.getLineNumber () + ", uri " + err.getSystemId ());
System.out.println(" " + err.getMessage ());
} catch (SAXException e) {
Exception x = e.getException ();
((x == null) ? e : x).printStackTrace ();
} catch (Throwable t) {
t.printStackTrace ();
}
Here is an example of processing xpath with vtd-xml... for heavy duty XML processing it is second to none. here is the a recent paper on this subject Processing XML with Java – A Performance Benchmark
import com.ximpleware.*;
public class changeAttrVal {
public static void main(String s[]) throws VTDException,java.io.UnsupportedEncodingException,java.io.IOException{
VTDGen vg = new VTDGen();
if (!vg.parseFile("input.xml", false))
return;
VTDNav vn = vg.getNav();
AutoPilot ap = new AutoPilot(vn);
XMLModifier xm = new XMLModifier(vn);
ap.selectXPath("/*/place[#id=\"p14\" and #initialMarking=\"2\"]/#initialMarking");
int i=0;
while((i=ap.evalXPath())!=-1){
xm.updateToken(i+1, "499");// change initial marking from 2 to 499
}
xm.output("new.xml");
}
}
If you have a xml like below
<e:Envelope
xmlns:d = "http://www.w3.org/2001/XMLSchema"
xmlns:e = "http://schemas.xmlsoap.org/soap/envelope/"
xmlns:wn0 = "http://systinet.com/xsd/SchemaTypes/"
xmlns:i = "http://www.w3.org/2001/XMLSchema-instance">
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
<e:Body>
<n0:ForAnsiHeaderOperResponse xmlns:n0 = "http://systinet.com/wsdl/com/magicsoftware/ibolt/localhost/ForAnsiHeader/ForAnsiHeaderImpl#ForAnsiHeaderOper?KExqYXZhL2xhbmcvU3RyaW5nOylMamF2YS9sYW5nL1N0cmluZzs=">
<response i:type = "d:string">12--abc--pqr</response>
</n0:ForAnsiHeaderOperResponse>
</e:Body>
</e:Envelope>
and wanted to extract the below xml
<e:Header>
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
</e:Header>
The below code helps to achieve the same
public static void main(String[] args) {
File fXmlFile = new File("C://Users//abhijitb//Desktop//Test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document document;
Node result = null;
try {
document = dbf.newDocumentBuilder().parse(fXmlFile);
XPath xPath = XPathFactory.newInstance().newXPath();
String xpathStr = "//Envelope//Header";
result = (Node) xPath.evaluate(xpathStr, document, XPathConstants.NODE);
System.out.println(nodeToString(result));
} catch (SAXException | IOException | ParserConfigurationException | XPathExpressionException
| TransformerException e) {
e.printStackTrace();
}
}
private static String nodeToString(Node node) throws TransformerException {
StringWriter buf = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer();
xform.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
xform.transform(new DOMSource(node), new StreamResult(buf));
return (buf.toString());
}
Now if you want only the xml like below
<Friends>
<friend>
<Name>Testabc</Name>
<Age>12121</Age>
<Phone>Testpqr</Phone>
</friend>
</Friends>
You need to change the
String xpathStr = "//Envelope//Header"; to String xpathStr = "//Envelope//Header/*";
This shows you how to
Read in an XML file to a DOM
Filter out a set of Nodes with XPath
Perform a certain action on each of the extracted Nodes.
We will call the code with the following statement
processFilteredXml(xmlIn, xpathExpr,(node) -> {/*Do something...*/;});
In our case we want to print some creatorNames from a book.xml using "//book/creators/creator/creatorName" as xpath to perform a printNode action on each Node that matches the XPath.
Full code
#Test
public void printXml() {
try (InputStream in = readFile("book.xml")) {
processFilteredXml(in, "//book/creators/creator/creatorName", (node) -> {
printNode(node, System.out);
});
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private InputStream readFile(String yourSampleFile) {
return Thread.currentThread().getContextClassLoader().getResourceAsStream(yourSampleFile);
}
private void processFilteredXml(InputStream in, String xpath, Consumer<Node> process) {
Document doc = readXml(in);
NodeList list = filterNodesByXPath(doc, xpath);
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
process.accept(node);
}
}
public Document readXml(InputStream xmlin) {
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
return db.parse(xmlin);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private NodeList filterNodesByXPath(Document doc, String xpathExpr) {
try {
XPathFactory xPathFactory = XPathFactory.newInstance();
XPath xpath = xPathFactory.newXPath();
XPathExpression expr = xpath.compile(xpathExpr);
Object eval = expr.evaluate(doc, XPathConstants.NODESET);
return (NodeList) eval;
} catch (Exception e) {
throw new RuntimeException(e);
}
}
private void printNode(Node node, PrintStream out) {
try {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2");
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(node);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
out.println(xmlString);
} catch (Exception e) {
throw new RuntimeException(e);
}
}
Prints
<creatorName>Fosmire, Michael</creatorName>
<creatorName>Wertz, Ruth</creatorName>
<creatorName>Purzer, Senay</creatorName>
For book.xml
<book>
<creators>
<creator>
<creatorName>Fosmire, Michael</creatorName>
<givenName>Michael</givenName>
<familyName>Fosmire</familyName>
</creator>
<creator>
<creatorName>Wertz, Ruth</creatorName>
<givenName>Ruth</givenName>
<familyName>Wertz</familyName>
</creator>
<creator>
<creatorName>Purzer, Senay</creatorName>
<givenName>Senay</givenName>
<familyName>Purzer</familyName>
</creator>
</creators>
<titles>
<title>Critical Engineering Literacy Test (CELT)</title>
</titles>
</book>
Expanding on the excellent answer by #bluish and #Yishai, here is how you make the NodeLists and node attributes support iterators, i.e. the for(Node n: nodelist) interface.
Use it like:
NodeList nl = ...
for(Node n : XmlUtil.asList(nl))
{...}
and
Node n = ...
for(Node attr : XmlUtil.asList(n.getAttributes())
{...}
The code:
/**
* Converts NodeList to an iterable construct.
* From: https://stackoverflow.com/a/19591302/779521
*/
public final class XmlUtil {
private XmlUtil() {}
public static List<Node> asList(NodeList n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeListWrapper(n);
}
static final class NodeListWrapper extends AbstractList<Node> implements RandomAccess {
private final NodeList list;
NodeListWrapper(NodeList l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
public static List<Node> asList(NamedNodeMap n) {
return n.getLength() == 0 ? Collections.<Node>emptyList() : new NodeMapWrapper(n);
}
static final class NodeMapWrapper extends AbstractList<Node> implements RandomAccess {
private final NamedNodeMap list;
NodeMapWrapper(NamedNodeMap l) {
this.list = l;
}
public Node get(int index) {
return this.list.item(index);
}
public int size() {
return this.list.getLength();
}
}
}
Read XML file using XPathFactory, SAXParserFactory and StAX (JSR-173).
Using XPath get node and its child data.
public static void main(String[] args) {
String xml = "<soapenv:Body xmlns:soapenv='http://schemas.xmlsoap.org/soap/envelope/'>"
+ "<Yash:Data xmlns:Yash='http://Yash.stackoverflow.com/Services/Yash'>"
+ "<Yash:Tags>Java</Yash:Tags><Yash:Tags>Javascript</Yash:Tags><Yash:Tags>Selenium</Yash:Tags>"
+ "<Yash:Top>javascript</Yash:Top><Yash:User>Yash-777</Yash:User>"
+ "</Yash:Data></soapenv:Body>";
String jsonNameSpaces = "{'soapenv':'http://schemas.xmlsoap.org/soap/envelope/',"
+ "'Yash':'http://Yash.stackoverflow.com/Services/Yash'}";
String xpathExpression = "//Yash:Data";
Document doc1 = getDocument(false, "fileName", xml);
getNodesFromXpath(doc1, xpathExpression, jsonNameSpaces);
System.out.println("\n===== ***** =====");
Document doc2 = getDocument(true, "./books.xml", xml);
getNodesFromXpath(doc2, "//person", "{}");
}
static Document getDocument( boolean isFileName, String fileName, String xml ) {
Document doc = null;
try {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
DocumentBuilder builder = factory.newDocumentBuilder();
if( isFileName ) {
File file = new File( fileName );
FileInputStream stream = new FileInputStream( file );
doc = builder.parse( stream );
} else {
doc = builder.parse( string2Source( xml ) );
}
} catch (SAXException | IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return doc;
}
/**
* ELEMENT_NODE[1],ATTRIBUTE_NODE[2],TEXT_NODE[3],CDATA_SECTION_NODE[4],
* ENTITY_REFERENCE_NODE[5],ENTITY_NODE[6],PROCESSING_INSTRUCTION_NODE[7],
* COMMENT_NODE[8],DOCUMENT_NODE[9],DOCUMENT_TYPE_NODE[10],DOCUMENT_FRAGMENT_NODE[11],NOTATION_NODE[12]
*/
public static void getNodesFromXpath( Document doc, String xpathExpression, String jsonNameSpaces ) {
try {
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
JSONObject namespaces = getJSONObjectNameSpaces(jsonNameSpaces);
if ( namespaces.size() > 0 ) {
NamespaceContextImpl nsContext = new NamespaceContextImpl();
Iterator<?> key = namespaces.keySet().iterator();
while (key.hasNext()) { // Apache WebServices Common Utilities
String pPrefix = key.next().toString();
String pURI = namespaces.get(pPrefix).toString();
nsContext.startPrefixMapping(pPrefix, pURI);
}
xpath.setNamespaceContext(nsContext );
}
XPathExpression compile = xpath.compile(xpathExpression);
NodeList nodeList = (NodeList) compile.evaluate(doc, XPathConstants.NODESET);
displayNodeList(nodeList);
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
static void displayNodeList( NodeList nodeList ) {
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
String NodeName = node.getNodeName();
NodeList childNodes = node.getChildNodes();
if ( childNodes.getLength() > 1 ) {
for (int j = 0; j < childNodes.getLength(); j++) {
Node child = childNodes.item(j);
short nodeType = child.getNodeType();
if ( nodeType == 1 ) {
System.out.format( "\n\t Node Name:[%s], Text[%s] ", child.getNodeName(), child.getTextContent() );
}
}
} else {
System.out.format( "\n Node Name:[%s], Text[%s] ", NodeName, node.getTextContent() );
}
}
}
static InputSource string2Source( String str ) {
InputSource inputSource = new InputSource( new StringReader( str ) );
return inputSource;
}
static JSONObject getJSONObjectNameSpaces( String jsonNameSpaces ) {
if(jsonNameSpaces.indexOf("'") > -1) jsonNameSpaces = jsonNameSpaces.replace("'", "\"");
JSONParser parser = new JSONParser();
JSONObject namespaces = null;
try {
namespaces = (JSONObject) parser.parse(jsonNameSpaces);
} catch (ParseException e) {
e.printStackTrace();
}
return namespaces;
}
XML Document
<?xml version="1.0" encoding="UTF-8"?>
<book>
<person>
<first>Yash</first>
<last>M</last>
<age>22</age>
</person>
<person>
<first>Bill</first>
<last>Gates</last>
<age>46</age>
</person>
<person>
<first>Steve</first>
<last>Jobs</last>
<age>40</age>
</person>
</book>
Out put for the given XPathExpression:
String xpathExpression = "//person/first";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[first], Text[Bill]
Node Name:[first], Text[Steve] */
String xpathExpression = "//person";
/*OutPut:
Node Name:[first], Text[Yash]
Node Name:[last], Text[M]
Node Name:[age], Text[22]
Node Name:[first], Text[Bill]
Node Name:[last], Text[Gates]
Node Name:[age], Text[46]
Node Name:[first], Text[Steve]
Node Name:[last], Text[Jobs]
Node Name:[age], Text[40] */
String xpathExpression = "//Yash:Data";
/*OutPut:
Node Name:[Yash:Tags], Text[Java]
Node Name:[Yash:Tags], Text[Javascript]
Node Name:[Yash:Tags], Text[Selenium]
Node Name:[Yash:Top], Text[javascript]
Node Name:[Yash:User], Text[Yash-777] */
See this link for our own Implementation of NamespaceContext
Without using a namespace the XPath example worked fine. A list is printed. With the namespace added, no result is returned.
How can I use namespaces and XPath properly?
My sample (minimalized) xml file is:
<?xml version="1.0" encoding="UTF-8"?>
<nodespace:Employees xmlns:nodespace="my_unique_namespace_name">
<nodespace:Employee id="1">
<nodespace:age>29</nodespace:age>
<nodespace:name>Pankaj</nodespace:name>
<nodespace:gender>Male</nodespace:gender>
<nodespace:role>Java Developer</nodespace:role>
</nodespace:Employee>
<nodespace:Employee id="2">
<nodespace:age>35</nodespace:age>
<nodespace:name>Lisa</nodespace:name>
<nodespace:gender>Female</nodespace:gender>
<nodespace:role>CEO</nodespace:role>
</nodespace:Employee>
</nodespace:Employees>
The XPath Java source code is:
public class XpathNamespaceTest {
public static final String PREFIX_NAME = "bdn";
public static final String NODESPACE_UNIQUE_NAME = "nodespace";
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder;
Document doc = null;
try {
ClassLoader classLoader = new XpathNamespaceTest().getClass().getClassLoader();
URL resource = classLoader.getResource("employees_namespace.xml");
File file = new File( resource.getFile());
builder = factory.newDocumentBuilder();
doc = builder.parse(file);
XPathFactory xpathFactory = XPathFactory.newInstance();
XPath xpath = xpathFactory.newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
#Override
public Iterator getPrefixes(String arg0) { return null; }
#Override
public String getPrefix(String ns) {
if(ns.equals(NODESPACE_UNIQUE_NAME)) {
return PREFIX_NAME;
}
return null;
}
#Override
public String getNamespaceURI(String arg0) {
if (PREFIX_NAME.equals(arg0)) {
return NODESPACE_UNIQUE_NAME;
}
return null;
}
});
List<String> names = getEmployeeNameWithAge(doc, xpath, 30);
System.out.println("Employees with 'age>30' are:" + Arrays.toString(names.toArray()));
} catch (ParserConfigurationException | SAXException | IOException e) {
e.printStackTrace();
}
}
private static List<String> getEmployeeNameWithAge(Document doc, XPath xpath, int age) {
List<String> list = new ArrayList<>();
try {
XPathExpression expr = xpath.compile("/bdn:Employees/bdn:Employee[bdn:age>" + age + "]/bdn:name/text()");
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) list.add(nodes.item(i).getNodeValue());
} catch (XPathExpressionException e) {
e.printStackTrace();
}
return list;
}
}
The namespace URI is my_unique_namespace_name so you need
public static final String NODESPACE_UNIQUE_NAME = "my_unique_namespace_name";
I have the below element in a web service response. As you can see, it's escaped XML dumped as CDATA, so the XML parser just looks at it as a string and I'm unable to get the data I need from it through the usual means of XSLT and XPath. I need to turn this ugly string back into XML so that I can read it properly.
I have tried to do a search replace and simply converted all < to < and > to > and this works great, but there is a problem: The message.body element can actually contain HTML which is not valid XML. Might not even be valid HTML for all I know. So if I just replace everything, this will probably crash when I try to turn the string back into an XML document.
How can I unescape this safely? Is there a good way to do the replacement in the whole string except between the message.body open and closing tags for example?
<output><item type="object">
<ticket.id type="string">171</ticket.id>
<ticket.title type="string">SoapUI Test</ticket.title>
<ticket.created_at type="string">2013-12-03 12:50:54</ticket.created_at>
<ticket.status type="string">Open</ticket.status>
<updated type="string">false</updated>
<message type="object">
<message.id type="string">520</message.id>
<message.created_at type="string">2013-12-03 12:50:54.000</message.created_at>
<message.author type="string"/>
<message.body type="string">Just a test message...</message.body>
</message>
<message type="object">
<message.id type="string">521</message.id>
<message.created_at type="string">2013-12-03 13:58:32.000</message.created_at>
<message.author type="string"/>
<message.body type="string">Another message!</message.body>
</message>
</item>
</output>
This is actually lifted from the project i'm working on right now.
private Node stringToNode(String textContent) {
Element node = null;
try {
node = DocumentBuilderFactory.newInstance().newDocumentBuilder()
.parse(new ByteArrayInputStream(textContent.getBytes()))
.getDocumentElement();
} catch (SAXException e) {
logger.error(e.getMessage(), e);
} catch (IOException e) {
logger.error(e.getMessage(), e);
} catch (ParserConfigurationException e) {
logger.error(e.getMessage(), e);
}
return node;
}
This will give you a document object representing the string. I use this to get this back into the original document:
if (textContent.contains(XML_HEADER)) {
textContent = textContent.substring(textContent.indexOf(XML_HEADER) + XML_HEADER.length());
}
Node newNode = stringToNode(textContent);
if (newNode != null) {
Node importedNode = soapBody.getOwnerDocument().importNode(newNode, true);
nextChild.setTextContent(null);
nextChild.appendChild(importedNode);
}
This is my current solution. You give it an XPath for the nodes that are messed up and a set of element names that might include messed up HTML and other problems. Works roughly as follows
Pull out text content of nodes matched by XPATH
Run regex to wrap problematic child elements in CDATA
Wrap text in temporary element (otherwise it crashes if there are multiple root nodes)
Parse text back to DOM
Add child nodes of temporary node back in place of previous text content.
The regex solution in step 2 is probably not fool-proof, but don't really see a better solution at the moment. If you do, let me know!
CDataFixer
import java.util.*;
import javax.xml.xpath.*;
import org.w3c.dom.*;
public class CDataFixer
{
private final XmlHelper xml = XmlHelper.getInstance();
public Document fix(Document document, String nodesToFix, Set<String> excludes) throws XPathExpressionException, XmlException
{
return fix(document, xml.newXPath().compile(nodesToFix), excludes);
}
private Document fix(Document document, XPathExpression nodesToFix, Set<String> excludes) throws XPathExpressionException, XmlException
{
Document wc = xml.copy(document);
NodeList nodes = (NodeList) nodesToFix.evaluate(wc, XPathConstants.NODESET);
int nodeCount = nodes.getLength();
for(int n=0; n<nodeCount; n++)
parse(nodes.item(n), excludes);
return wc;
}
private void parse(Node node, Set<String> excludes) throws XmlException
{
String text = node.getTextContent();
for(String exclude : excludes)
{
String regex = String.format("(?s)(<%1$s\\b[^>]*>)(.*?)(</%1$s>)", Pattern.quote(exclude));
text = text.replaceAll(regex, "$1<![CDATA[$2]]>$3");
}
String randomNode = "tmp_"+UUID.randomUUID().toString();
text = String.format("<%1$s>%2$s</%1$s>", randomNode, text);
NodeList parsed = xml
.parse(text)
.getFirstChild()
.getChildNodes();
node.setTextContent(null);
for(int n=0; n<parsed.getLength(); n++)
node.appendChild(node.getOwnerDocument().importNode(parsed.item(n), true));
}
}
XmlHelper
import java.io.*;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.sax.*;
import javax.xml.transform.stream.*;
import javax.xml.xpath.*;
import org.w3c.dom.*;
import org.xml.sax.*;
public final class XmlHelper
{
private static final XmlHelper instance = new XmlHelper();
public static XmlHelper getInstance()
{
return instance;
}
private final SAXTransformerFactory transformerFactory;
private final DocumentBuilderFactory documentBuilderFactory;
private final XPathFactory xpathFactory;
private XmlHelper()
{
documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setNamespaceAware(true);
xpathFactory = XPathFactory.newInstance();
TransformerFactory tf = TransformerFactory.newInstance();
if (!tf.getFeature(SAXTransformerFactory.FEATURE))
throw new RuntimeException("Failed to create SAX-compatible TransformerFactory.");
transformerFactory = (SAXTransformerFactory) tf;
}
public DocumentBuilder newDocumentBuilder()
{
try
{
return documentBuilderFactory.newDocumentBuilder();
}
catch (ParserConfigurationException e)
{
throw new RuntimeException("Failed to create new "+DocumentBuilder.class, e);
}
}
public XPath newXPath()
{
return xpathFactory.newXPath();
}
public Transformer newIdentityTransformer(boolean omitXmlDeclaration, boolean indent)
{
try
{
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, indent ? "yes" : "no");
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, omitXmlDeclaration ? "yes" : "no");
return transformer;
}
catch (TransformerConfigurationException e)
{
throw new RuntimeException("Failed to create Transformer instance: "+e.getMessage(), e);
}
}
public Templates newTemplates(String xslt) throws XmlException
{
try
{
return transformerFactory.newTemplates(new DOMSource(parse(xslt)));
}
catch (TransformerConfigurationException e)
{
throw new RuntimeException("Failed to create templates: "+e.getMessage(), e);
}
}
public Document parse(String xml) throws XmlException
{
return parse(new InputSource(new StringReader(xml)));
}
public Document parse(InputSource xml) throws XmlException
{
try
{
return newDocumentBuilder().parse(xml);
}
catch (SAXException e)
{
throw new XmlException("Failed to parse xml: "+e.getMessage(), e);
}
catch (IOException e)
{
throw new XmlException("Failed to read xml: "+e.getMessage(), e);
}
}
public String toString(Node node)
{
return toString(node, true, false);
}
public String toString(Node node, boolean omitXMLDeclaration, boolean indent)
{
try
{
StringWriter writer = new StringWriter();
newIdentityTransformer(omitXMLDeclaration, indent)
.transform(new DOMSource(node), new StreamResult(writer));
return writer.toString();
}
catch (TransformerException e)
{
throw new RuntimeException("Failed to transform XML into string: " + e.getMessage(), e);
}
}
public Document copy(Document document)
{
DOMSource source = new DOMSource(document);
DOMResult result = new DOMResult();
try
{
newIdentityTransformer(true, false)
.transform(source, result);
return (Document) result.getNode();
}
catch (TransformerException e)
{
throw new RuntimeException("Failed to copy XML: " + e.getMessage(), e);
}
}
}
Does anyone help find the problem why no xml file is not parsing with the following code.... the xml format can be seen here.
I tried to test line by line using log.i("your string goes here", xml); function but unable to see the execution of code when loop starts....
I have used to Splash activity in which AsyncTask() function is executed in the background and then ListActivity is used to display all the DOMParser activity output....
So anyone help me get out from this problem at all....
Thank you in advance
package com.wfwf.everestnewsapp.parser;
import java.net.MalformedURLException;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.jsoup.Jsoup;
import org.jsoup.select.Elements;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import android.widget.Toast;
public class DOMParser {
private RSSFeed _feed = new RSSFeed();
public RSSFeed parseXml(String xml) {
URL url = null;
try {
url = new URL(xml);
} catch (MalformedURLException e1) {
e1.printStackTrace();
}
try {
// Create required instances
DocumentBuilderFactory dbf;
dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
// Parse the xml
Document doc = db.parse(new InputSource(url.openStream()));
doc.getDocumentElement().normalize();
// Get all <item> tags.
NodeList nl = doc.getElementsByTagName("item");
int length = nl.getLength();
for (int i = 0; i < length; i++) {
Node currentNode = nl.item(i);
RSSItem _item = new RSSItem();
NodeList nchild = currentNode.getChildNodes();
int clength = nchild.getLength();
// Get the required elements from each Item
// Ishwor changed the code j=0 and j= j+1
for (int j = 0; j < clength; j = j + 1) {
Node thisNode = nchild.item(j);
String theString = null;
/*//ishwor changed as
if (thisNode != null && thisNode.getFirstChild() != null) {
theString = thisNode.getFirstChild().getNodeValue();
}
*/
String nodeName = thisNode.getNodeName();
//theString = nchild.item(j).getFirstChild().getNodeValue();
if(nchild.item(j).getFirstChild().getNodeValue()!=null){
//if (theString != null) {
//String nodeName = thisNode.getNodeName();
if ("title".equals(nodeName)) {
// Node name is equals to 'title' so set the Node
// value to the Title in the RSSItem.
_item.setTitle(theString);
}
else if ("description".equals(nodeName)) {
_item.setDescription(theString);
// Parse the html description to get the image url
String html = theString;
org.jsoup.nodes.Document docHtml = Jsoup.parse(html);
Elements imgEle = docHtml.select("img");
_item.setImage(imgEle.attr("src"));
}
else if ("pubDate".equals(nodeName)) {
// We replace the plus and zero's in the date with
// empty string
String formatedDate = theString.replace(" +0000",
"");
_item.setDate(formatedDate);
}
}
}
// add item to the list
_feed.addItem(_item);
}
} catch (Exception e) {
}
// Return the final feed once all the Items are added to the RSSFeed
// Object(_feed).
return _feed;
}
}
private Document getDocument(InputStream stream) {
Document doc = null;
try {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory
.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
if (stream != null) {
doc = dBuilder.parse(stream);
doc.getDocumentElement().normalize();
} else {
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
// TODO: handle exception
}
return doc;
}
private String getTagValue(String sTag, Element eElement) {
String value = "";
try {
NodeList nlList = eElement.getElementsByTagName(sTag).item(0)
.getChildNodes();
for (int i = 0; i < nlList.getLength(); i++) {
Node nValue = nlList.item(i);
if (nValue != null) {
value = value + nValue.getNodeValue().trim();
}
}
} catch (Exception e) {
}
return value;
}
public EntityBean parseDoLogin(String requestOutput) {
EntityBean response = null;
InputStream stream = (InputStream) new ByteArrayInputStream(
requestOutput.getBytes());
Document doc = getDocument(stream);
NodeList nodeList = doc.getElementsByTagName("startTag");
for (int i = 0; i < nodeList.getLength(); i++) {
Node nNode = nodeList.item(i);
Element iElement = (Element) nNode;
response = new EntityBean();
String result = new String(getTagValue("result", iElement));
response.setResult(result);
String msg = new String(getTagValue("message", iElement));
response.setMsg(msg);
}
return response;
}
I would like to get all the content in between the tags but I do not know how to do this because of the urn: namespace.
<urn:ResponseStatus version="1.0" xmlns:urn="urn:camera-org">
<urn:requestURL>/CAMERA/Streaming/status</urn:requestURL>
<urn:statusCode>4</urn:statusCode>
<urn:statusString>Invalid Operation</urn:statusString>
<urn:id>0</urn:id>
</urn:ResponseStatus>
Any ideas?
Short answer: use XPath local-name(). Like this: xPathFactory.newXPath().compile("//*[local-name()='requestURL']/text()"); will return /CAMERA/Streaming/status
Or you can implement a NamespaceContext that maps namespaces names and URIs and set it on the XPath object before querying.
Take a look at this blog article, Update: the article is down, you can see it on webarchive
Solution 1 sample:
XPath xpath = XPathFactory.newInstance().newXPath();
String responseStatus = xpath.evaluate("//*[local-name()='ResponseStatus']/text()", document);
System.out.println("-> " + responseStatus);
Solution 2 sample:
// load the Document
Document document = ...;
NamespaceContext ctx = new NamespaceContext() {
public String getNamespaceURI(String prefix) {
return prefix.equals("urn") ? "urn:camera-org" : null;
}
public Iterator getPrefixes(String val) {
return null;
}
public String getPrefix(String uri) {
return null;
}
};
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(ctx);
String responseStatus = xpath.evaluate("//urn:ResponseStatus/text()", document);
System.out.println("-> " + responseStatus);
Edit
This is a complete example, it correctly retrieve the element:
String xml = "<urn:ResponseStatus version=\"1.0\" xmlns:urn=\"urn:camera-org\">\r\n" + //
"\r\n" + //
"<urn:requestURL>/CAMERA/Streaming/status</urn:requestURL>\r\n" + //
"<urn:statusCode>4</urn:statusCode>\r\n" + //
"<urn:statusString>Invalid Operation</urn:statusString>\r\n" + //
"<urn:id>0</urn:id>\r\n" + //
"\r\n" + //
"</urn:ResponseStatus>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(new java.io.ByteArrayInputStream(xml.getBytes()));
XPath xpath = XPathFactory.newInstance().newXPath();
xpath.setNamespaceContext(new NamespaceContext() {
public String getNamespaceURI(String prefix) {
return prefix.equals("urn") ? "urn:camera-org" : null;
}
public Iterator<?> getPrefixes(String val) {
return null;
}
public String getPrefix(String uri) {
return null;
}
});
XPathExpression expr = xpath.compile("//urn:ResponseStatus");
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
Node currentItem = nodes.item(i);
System.out.println("found node -> " + currentItem.getLocalName() + " (namespace: " + currentItem.getNamespaceURI() + ")");
}
XML xpath with Namespace
Simple XML
String namespaceXML = "<?xml version='1.0' ?><information><person id='1'><name>Deep</name><age>34</age><gender>Male</gender></person> <person id='2'><name>Kumar</name><age>24</age><gender>Male</gender></person> <person id='3'><name>Deepali</name><age>19</age><gender>Female</gender></person><!-- more persons... --></information>";
String jsonString = "{}";
String expression = "//information";
Name space XML
String namespaceXML = "<soap:Envelope xmlns:soap=\"http://schemas.xmlsoap.org/soap/envelope/\"><soap:Body><m:NumberToDollarsResponse xmlns:m=\"http://www.dataaccess.com/webservicesserver/\"><m:NumberToDollarsResult>nine hundred and ninety nine dollars</m:NumberToDollarsResult></m:NumberToDollarsResponse></soap:Body></soap:Envelope>";
String jsonString = "{'soap':'http://schemas.xmlsoap.org/soap/envelope/', 'm':'http://www.dataaccess.com/webservicesserver/'}";
String expression = "//m:NumberToDollarsResponse/m:NumberToDollarsResult/text()";
Supply namespace xml file as a string, to asscerionXpath(namespaceXML, jsonString, expression) method and get result in the form of text/node.
text() : nine hundred and ninety nine dollars
node :
<m:NumberToDollarsResult xmlns:m="http://www.dataaccess.com/webservicesserver/">
nine hundred and ninety nine dollars
</m:NumberToDollarsResult>
public static String asscerionXpath(String namespaceXML, String jsonString, String expression){
if(namespaceXML.indexOf("><") > -1) namespaceXML = namespaceXML.replace("><", ">\r\n<");
if(jsonString.indexOf("'") > -1) jsonString = jsonString.replace("'", "\"");
System.out.println("namespaceXML : \n"+namespaceXML);
System.out.println("nsmespaces : \n"+jsonString);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(false);
factory.setNamespaceAware(true);
factory.setIgnoringComments(true);
factory.setIgnoringElementContentWhitespace(true);
try {
DocumentBuilder builder = factory.newDocumentBuilder();
Document source = builder.parse( string2Source(namespaceXML) );
XPath xpath = XPathFactory.newInstance().newXPath();
addNameSpaces(jsonString, xpath);
// An XPath expression is not thread-safe. Make sure it is accessible by only one Thread.
XPathExpression expr = xpath.compile(expression);
// The NodeList interface provides the abstraction of an ordered collection of nodes,
NodeList nodes = (org.w3c.dom.NodeList) expr.evaluate(source, XPathConstants.NODESET);;
Node tree_base = nodes.item(0);
return document2String(tree_base);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (XPathExpressionException e) {
System.out.println("If the expression cannot be evaluated.");
}
return "";
}
static InputSource string2Source( String str ) {
InputSource inputSource = new InputSource( new StringReader( str ) );
return inputSource;
}
static void addNameSpaces(String jsonString, XPath xpath) {
JSONParser parser = new JSONParser();
try {
JSONObject namespaces = (JSONObject) parser.parse(jsonString);
if (namespaces.size() > 0) {
final JSONObject declaredPrefix = namespaces; // To access in Inner-class.
NamespaceContext nameSpace = new NamespaceContext() {
// To get all prefixes bound to a Namespace URI in the current scope, XPath 1.0 specification
// --> "no prefix means no namespace"
public String getNamespaceURI(String prefix) {
Iterator<?> key = declaredPrefix.keySet().iterator();
System.out.println("Keys : "+key.toString());
while (key.hasNext()) {
String name = key.next().toString();
if (prefix.equals(name)) {
System.out.println(declaredPrefix.get(name));
return declaredPrefix.get(name).toString();
}
}
return "";
}
public Iterator<?> getPrefixes(String val) {
return null;
}
public String getPrefix(String uri) {
return null;
}
};// Inner class.
xpath.setNamespaceContext( nameSpace );
}
} catch ( org.json.simple.parser.ParseException e) {
e.printStackTrace();
}
}