XML Parser and xpath expression - java

I am using java default documentbuilder to parse a xml document which has less than 100 lines of code . It will take 35 milliseconds to parse a document , a single xpath expression takes 15 milliseconds to execute . How can I optimize the time taken for both xml and parser? .
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.namespace.QName;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XMLParser {
public static final Logger LOGGER = Logger.getLogger(XMLParser.class.getName());
private Map<String,List<NamedNodeMap>> fileVsProperties = new HashMap<String, List<NamedNodeMap>>();
private Document document;
public XMLParser(File file){
this.document = XMLUtil.getDocument(file);
}
public void setProperties(Element file){
NodeList properties = file.getElementsByTagName("property");
List<NamedNodeMap> props = new ArrayList<NamedNodeMap>();
String type = file.getAttribute("type");
String path = file.getAttribute("path");
if("".equals(path)){
LOGGER.log(Level.INFO,"Attribute path is required for a file.");
return;
}
path = path+":"+type;
for(int i = 0;i<properties.getLength();i++){
Element property = (Element) properties.item(i);
props.add(property.getAttributes());
}
setProperties(props,path);
}
private void setProperties(List<NamedNodeMap> properties , String path){
List<NamedNodeMap> previousValue = fileVsProperties.get(path);
if(previousValue != null){
previousValue.addAll(properties);
}else{
fileVsProperties.put(path,properties);
}
}
public Element getConfiguration(String branchName) throws XPathExpressionException{
return (Element)XMLUtil.getElements("/configurations/configuration[#name='"+branchName+"']",document.getDocumentElement(),XPathConstants.NODE);
}
public static void main(String[] args) throws XPathExpressionException {
long start = System.currentTimeMillis();
File doc = new File("install.xml");
XMLParser parser = new XMLParser(doc);
long end = System.currentTimeMillis();
System.out.println("Time Taken For Parsing :: "+ (end-start) + " milliseconds");
start = end;
Element configuration = parser.getConfiguration("BHARATHIKANNAN");
end = System.currentTimeMillis();
System.out.println("Time Taken For XPATH Expression TO Finding the Configuration :: "+ (end-start) + " milliseconds");
start = end;
NodeList files = parser.getFiles(configuration);
for(int i=0;i<files.getLength();i++){
parser.setProperties((Element) files.item(i));
}
end = System.currentTimeMillis();
System.out.println(parser.fileVsProperties);
System.out.println("Time Taken For Setting Properties :: "+ (end-start) + " milliseconds");
}
public NodeList getFiles(Element configuration){
return configuration.getElementsByTagName("file");
}
}
class XMLUtil{
private static DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
private static DocumentBuilder builder;
public static final Logger LOGGER = Logger.getLogger(XMLUtil.class.getName());
private static XPathFactory xpathFactory = XPathFactory.newInstance();
private static XPath xpath;
static {
try {
builder = factory.newDocumentBuilder();
xpath = xpathFactory.newXPath();
} catch (ParserConfigurationException e) {
LOGGER.log(Level.INFO,"");
}
}
public static Document getDocument(File f){
Document doc = null;
try {
doc = builder.parse(f);
} catch (SAXException e) {
LOGGER.log(Level.WARNING,"Invalid XML Document ",e);
} catch (IOException e) {
LOGGER.log(Level.SEVERE,"No Document Found in the given path",e);
}
return doc;
}
public static Object getElements(String xpathExpression , Element ele ,QName dataType) throws XPathExpressionException{
return xpath.evaluate(xpathExpression, ele,dataType);
}
}
XML File
<?xml version="1.0"?>
<!--
Note : Default configuration loaded using your current branch name . You can extend configurations using extend attribute in configuration
node .
-->
<configurations>
<configuration name="default">
<files>
<file type="xml" path="conf/server.xml.orig">
<property regex="(port=).*" replace="\18080" xpath="/Server/Connector"></property>
<property regex="(port=).*" replace="\18080"></property>
</file>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
<configuration name="BHARATHIKANNAN" extends="default">
<files>
<file type="text" path="conf/system_properties.conf">
<property regex="(username=).*" replace="\1root" ></property>
</file>
</files>
</configuration>
</configurations>
Output :
Time Taken For Parsing :: 24 milliseconds
Time Taken For XPATH Expression TO Finding the Configuration :: 14 milliseconds
{conf/system_properties.conf:text=[com.sun.org.apache.xerces.internal.dom.AttributeMap#75d9fd51]}
Time Taken For Setting Properties :: 0 milliseconds

Someone asked recently about a very similar task but with a much larger document (2Mb), and I gave some Saxon timings here:
https://stackoverflow.com/questions/12497928/xpath-speed-comparision/12508614#12508614
These timings are much faster than you are seeing, on a much larger document. Since you are already using Java, switching to Saxon should be very straightforward.
One caveat though is that you start your timings immediately on entry to main() which means you are mainly measuring class loading time rather than XML processing time. My measurements took care to warm up the Java VM before measurement started.
Note that if you're using Saxon, it's best by far to use Saxon's native tree model rather than DOM or other alternatives. We recently published some measurements here:
http://dev.saxonica.com/blog/mike/2012/09/index.html#000194
DOM comes out 8 times worse than Saxon's native tree on average, 23 times worse in the worst case.

Related

How to keep xml attribute in fasterXml Jackson XmlMapper?

I am writing test cases which test generated xml structures. I am supplying the xml structures via an xml file. I am using currently FasterXMLs Jackson XmlMapper for reading and testing for expected xml.
Java: adoptopenjdk 11
Maven: 3.6.3
JUnit (Jupiter): 5.7.1 (JUnit Jupiter)
Mapper: com.fasterxml.jackson.dataformat.xml.XmlMapper
Dependency: <dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-xml</artifactId>
<version>2.11.4</version>
</dependency>
I have an xml file which contains expected xml (e.g.: /test/testcases.xml:
<testcases>
<testcase1>
<response>
<sizegroup-list>
<sizeGroup id="1">
<sizes>
<size>
<technicalSize>38</technicalSize>
<textSize>38</textSize>
<size>
<size>
<technicalSize>705</technicalSize>
<textSize>110cm</textSize>
<size>
</sizes>
</sizeGroup-list>
</response>
</testcase1>
</testcases>
My code looks like this (simplified):
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.dataformat.xml.XmlMapper;
import org.apache.commons.lang3.StringUtils;
import org.junit.jupiter.api.Test;
import java.io.FileInputStream;
import java.io.InputStream;
import static org.junit.jupiter.api.Assertions.assertEquals;
class Testcases {
private static final String OBJECT_NODE_START_TAG = "<ObjectNode>";
private static final String OBJECT_NODE_CLOSE_TAG = "</ObjectNode>";
private static final String TESTCASES_XML = "/test/testcases.xml";
private static final XmlMapper XML_MAPPER = new XmlMapper();
#Test
void testcase1() throws Exception {
final String nodePtr = "/testcase1/response";
try (InputStream inputStream = new FileInputStream(TESTCASES_XML)) {
JsonNode rootNode = XML_MAPPER.readTree(inputStream);
JsonNode subNode = rootNode.at(nodePtr);
if (subNode.isMissingNode()) {
throw new IllegalArgumentException(
"Node '" + nodePtr + "' not found in file " + TESTCASES_XML);
}
String expectedXml = XML_MAPPER.writeValueAsString(subNode);
expectedXml = unwrapObjectNode(expectedXml);
// Testcalls, e.g. someService.generateXmlData()
String generatedXml = "...";
assertEquals(expectedXml, generatedXml);
};
}
// FIXME: Ugly: Tell XmlMapper to unwrap ObjectNode automatically
private String unwrapObjectNode(String xmlString) {
if(StringUtils.isBlank(xmlString)) {
return xmlString;
}
if(xmlString.startsWith(OBJECT_NODE_START_TAG)) {
xmlString = xmlString.substring(OBJECT_NODE_START_TAG.length());
if(xmlString.endsWith(OBJECT_NODE_CLOSE_TAG)) {
xmlString = xmlString.substring(0, xmlString.length() - OBJECT_NODE_CLOSE_TAG.length());
}
}
return xmlString;
}
}
But the returned expected xml looks like this:
<sizegroup-list>
<sizeGroup>
<id>1</id>
<sizes>
<size>
<technicalSize>38</technicalSize>
<textSize>38</textSize>
<size>
<size>
<technicalSize>705</technicalSize>
<textSize>110cm</textSize>
<size>
</sizes>
</sizeGroup-list>
The former attribute id of the element sizeGroup gets mapped as a sub element and fails my test. How can I tell XmlMapper to keep the attributes of xml elements?
Best regards,
David
i was not able to tell XmlMapper to keep the attributes of xml tags from the loaded xml file. But i have found another way by parsing xml test data with xPath expressions.
A simple String.equals(...) proofed to be unreliable if expected and actual xml contain different whitespaces or xml tag order. Luckily there is a library for comparing xml. XmlUnit!
Additional dependency (seems to be present as transitive dependency as of Spring Boot 2.6.x):
<dependency>
<groupId>org.xmlunit</groupId>
<artifactId>xmlunit-core</artifactId>
<!-- version transitive in spring-boot-starter-parent 2.6.7 -->
<version>2.8.4</version>
<scope>test</test>
</dependency>
ResourceUtil.java:
import org.apache.commons.lang3.StringUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.IOException;
import java.io.InputStream;
import java.io.StringWriter;
import java.net.URL;
public class ResourceUtil {
private static final DocumentBuilderFactory XML_DOCUMENT_BUILDER_FACTORY = DocumentBuilderFactory.newInstance();
private static final XPathFactory X_PATH_FACTORY = XPathFactory.newInstance();
private ResourceUtil() {}
/** Reads an xml file named after the testcase class (e.g. MyTestcase.class
* -> MyTestcase.xml) and parses the data at the supplied xPath expression. */
public static String xmlData(Class<?> testClass, String xPathExpression) {
return getXmlDocumentAsString(testClass, testClass.getSimpleName() + ".xml", xPathExpression);
}
/** Reads the specified xml file and parses the data at the supplied xPath
* expression. The xml file is expected in the same package/directory as
* the testcase class. */
private static String getXmlDocumentAsString(Class<?> ctxtClass, String fileName, String xPathExpression) {
Document xmlDocument = getXmlDocument(ctxtClass, fileName);
XPath xPath = X_PATH_FACTORY.newXPath();
try {
Node subNode = (Node)xPath.compile(xPathExpression).evaluate(xmlDocument, XPathConstants.NODE);
return nodeToString(subNode.getChildNodes());
} catch (TransformerException | XPathExpressionException var6) {
throw new IllegalArgumentException("Unable to read value of '" + xPathExpression + "' from file " + fileName, var6);
}
}
/** Reads the specified xml file and returns a Document instance of the
* xml data. The xml file is expected in the same package/directory as
* the testcase class. */
private static Document getXmlDocument(Class<?> ctxtClass, String xmlFileName) {
InputStream inputStream = getResourceFile(ctxtClass, xmlFileName);
try {
DocumentBuilder builder = XML_DOCUMENT_BUILDER_FACTORY.newDocumentBuilder();
return builder.parse(inputStream);
} catch (SAXException | IOException | ParserConfigurationException var4) {
throw new IllegalStateException("Unable to read xml content from file '" + xmlFileName + "'.", var4);
}
}
/** Returns an InputStream of the specified xml file. The xml file is
* expected in the same package/directory as the testcase class. */
private static InputStream getResourceFile(Class<?> ctxtClass, String fileName) {
String pkgPath = StringUtils.replaceChars(ctxtClass.getPackage().getName(), ".", "/");
String filePath = "/" + pkgPath + "/" + fileName;
URL url = ctxtClass.getResource(filePath);
if (url == null) {
throw new IllegalArgumentException("Resource file not found: " + filePath);
}
return ResourceTestUtil.class.getResourceAsStream(filePath);
}
/** Deserializes a NodeList to a String with (formatted) xml. */
private static String nodeToString(NodeList nodeList) throws TransformerException {
StringWriter buf = new StringWriter();
Transformer xform = TransformerFactory.newInstance().newTransformer(getXsltAsResource());
xform.setOutputProperty("omit-xml-declaration", "yes");
xform.setOutputProperty("indent", "no");
for(int i = 0; i < nodeList.getLength(); ++i) {
xform.transform(new DOMSource(nodeList.item(i)), new StreamResult(buf));
}
return buf.toString().trim();
}
/** Returns a Source of an XSLT file for formatting xml data */
private static Source getXsltAsResource() {
return new StreamSource(ResourceTestUtil.class.getResourceAsStream("xmlstylesheet.xslt"));
}
xmlstylesheet.xslt (works for me, you may alter to your preferences):
<xsl:stylesheet version="1.0" xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:strip-space elements="*"/>
<xsl:output method="xml" encoding="UTF-8"/>
<xsl:template match="#*|node()">
<xsl:copy>
<xsl:apply-templates select="#*|node()"/>
</xsl:copy>
</xsl:template>
</xsl:stylesheet>
MyTestcase.java:
import org.xmlunit.builder.DiffBuilder;
import org.xmlunit.diff.DefaultNodeMatcher;
import org.xmlunit.diff.Diff;
import org.xmlunit.diff.ElementSelectors;
import static ResourceUtil.xmldata;
public class MyTestcase {
#Test
void testcase1() {
// Execute logic to generate xml
String xml = ...
assertXmlEquals(xmlData(getClass(), "/test/testcase1/result"), xml);
}
/** Compare xml using XmlUnit assertion. Expected and actual xml need
* to be equal in content (ignoring whitespace and xml tag order) */
void assertXmlEquals(String expectedXml, String testXml) {
Diff diff = DiffBuilder.compare(expectedXml)
.withTest(testXml)
.ignoreWhitespace()
.checkForSimilar()
.withNodeMatcher(new DefaultNodeMatcher(ElementSelectors.byNameAndText, ElementSelectors.byName))
.build();
assertFalse(diff.fullDescription(), diff.hasDifferences());
}
}
MyTestcase.xml:
<test>
<testcase1>
<result>
<myData>
...
</myData>
</result>
</testcase1>
</test>
Best regards,
David

XPathFactoryImpl not able to identify the root node if xml doc contains the namesapcecontext

I am quite new to XML and Saxon API's, Here I am using Saxon 10.3 HE jar to extract the data from the XML file. Here I want to extract the country attribute from the active country_information node where I am using the date functions.
Sample input XML :
<person xmlns="urn:my.poctest.com">
<country_information>
<country>FRA</country>
<end_date>9999-12-31</end_date>
<start_date>2009-12-01</start_date>
</country_information>
<country_information>
<country>FRA</country>
<end_date>9999-12-31</end_date>
<start_date>2009-12-01</start_date>
</country_information>
</person>
Code :
import java.io.IOException;
import java.io.StringReader;
import java.util.Iterator;
import java.util.Map;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathFactoryConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import net.sf.saxon.xpath.XPathFactoryImpl;
public class SaxonPoc {
public static void main(String[] args) throws SAXException, IOException, ParserConfigurationException,
XPathExpressionException, XPathFactoryConfigurationException {
String xml = " <person xmlns=\"urn:my.poctest.com\">\r\n"
+ " <country_information>\r\n"
+ " <country>FRA</country>\r\n"
+ " <end_date>9999-12-31</end_date>\r\n"
+ " <start_date>2020-02-24</start_date>\r\n"
+ " </country_information>\r\n"
+ " <country_information>\r\n"
+ " <country>USA</country>\r\n"
+ " <end_date>2020-02-23</end_date>\r\n"
+ " <start_date>2009-12-01</start_date>\r\n"
+ " </country_information> \r\n"
+ " </person>";
Document doc = SaxonPoc.getDocument(xml, false);
NodeList matches = (NodeList) SaxonTest.getXpathExpression("//person", null).evaluate(doc,
XPathConstants.NODESET);
if (matches != null) {
Element node = (Element) matches.item(0);
XPath xPath1 = SaxonPoc.getXpath(null);
String xPathStatement = "/person/country_information[xs:date(start_date) le current-date() and xs:date(end_date) ge current-date()]/country";
NodeList childNodes = (NodeList) xPath1.evaluate(xPathStatement, node, XPathConstants.NODESET);
if (childNodes.getLength() > 0) {
String nodeName = childNodes.item(0).getFirstChild().getNodeName();
System.out.println("Node :" + nodeName);
String value = childNodes.item(0).getTextContent();
System.out.println("Country Name :" + value);
}
}
System.out.println("Finished");
}
public static Document getDocument(String xml, boolean isNamespaceAware)
throws SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(isNamespaceAware);
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xml));
return builder.parse(is);
}
public static XPath getXpath(Map<String, String> namespaceMappings) throws XPathFactoryConfigurationException {
XPathFactory xpathFactory = new XPathFactoryImpl();
XPath xpath = xpathFactory.newXPath();
NamespaceContext nsc = new NamespaceContext() {
#Override
public String getNamespaceURI(String prefix) {
return (null != namespaceMappings) ? namespaceMappings.get(prefix) : null;
}
#Override
public String getPrefix(String namespaceURI) {
return null;
}
#Override
public Iterator getPrefixes(String namespaceURI) {
return null;
}
};
xpath.setNamespaceContext(nsc);
return xpath;
}
public static XPathExpression getXpathExpression(String xpathExpr, Map<String, String> namespaceMappings)
throws XPathExpressionException, XPathFactoryConfigurationException {
XPath xpath = getXpath(namespaceMappings);
return xpath.compile(xpathExpr);
}
}
I am facing a null pointer as it is not able to find the root node person an XML doc. If I remove the xmlns="urn:my.poctest.com" then it is able to get the root path but in a later stage, it is failing with javax.xml.xpath.XPathExpressionException: net.sf.saxon.trans.XPathException: Namespace prefix 'xs' has not been declared. If I remove the namespace from XML doc and NamespaceContext implementation from code then it is working fine. But here actually I don't want to remove both things.
Can someone point out me here, what I am doing wrong? Thanks in advance!!
You might like to know that recent versions of Saxon include the option to do
((net.sf.saxon.xpath.XPathEvaluator)XPath).getStaticContext()
.setUnprefixedElementMatchingPolicy(
UnprefixedElementMatchingPolicy.ANY_NAMESPACE))
which causes an unprefixed element name in your XPath expression to match on local name alone, regardless of the namespace.
This was mainly introduced for HTML, where there is complete confusion as to whether elements in an HTML DOM are in a namespace or not; but it's useful more generally where you really don't care about the namespaces and just wish they weren't there to make your life a misery.

XPathExpression.evaluate using Node [duplicate]

I want to manipulate xml doc having default namespace but no prefix. Is there a way to use xpath without namespace uri just as if there is no namespace?
I believe it should be possible if we set namespaceAware property of documentBuilderFactory to false. But in my case it is not working.
Is my understanding is incorrect or I am doing some mistake in code?
Here is my code:
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(false);
try {
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document dDoc = builder.parse("E:/test.xml");
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList nl = (NodeList) xPath.evaluate("//author", dDoc, XPathConstants.NODESET);
System.out.println(nl.getLength());
} catch (Exception e) {
e.printStackTrace();
}
Here is my xml:
<?xml version="1.0" encoding="UTF-8"?>
<root xmlns="http://www.mydomain.com/schema">
<author>
<book title="t1"/>
<book title="t2"/>
</author>
</root>
The XPath processing for a document that uses the default namespace (no prefix) is the same as the XPath processing for a document that uses prefixes:
For namespace qualified documents you can use a NamespaceContext when you execute the XPath. You will need to prefix the fragments in the XPath to match the NamespaceContext. The prefixes you use do not need to match the prefixes used in the document.
http://download.oracle.com/javase/6/docs/api/javax/xml/namespace/NamespaceContext.html
Here is how it looks with your code:
import java.util.Iterator;
import javax.xml.namespace.NamespaceContext;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class Demo {
public static void main(String[] args) {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
domFactory.setNamespaceAware(true);
try {
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document dDoc = builder.parse("E:/test.xml");
XPath xPath = XPathFactory.newInstance().newXPath();
xPath.setNamespaceContext(new MyNamespaceContext());
NodeList nl = (NodeList) xPath.evaluate("/ns:root/ns:author", dDoc, XPathConstants.NODESET);
System.out.println(nl.getLength());
} catch (Exception e) {
e.printStackTrace();
}
}
private static class MyNamespaceContext implements NamespaceContext {
public String getNamespaceURI(String prefix) {
if("ns".equals(prefix)) {
return "http://www.mydomain.com/schema";
}
return null;
}
public String getPrefix(String namespaceURI) {
return null;
}
public Iterator getPrefixes(String namespaceURI) {
return null;
}
}
}
Note:
I also used the corrected XPath suggested by Dennis.
The following also appears to work, and is closer to your original question:
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class Demo {
public static void main(String[] args) {
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = domFactory.newDocumentBuilder();
Document dDoc = builder.parse("E:/test.xml");
XPath xPath = XPathFactory.newInstance().newXPath();
NodeList nl = (NodeList) xPath.evaluate("/root/author", dDoc, XPathConstants.NODESET);
System.out.println(nl.getLength());
} catch (Exception e) {
e.printStackTrace();
}
}
}
Blaise Doughan is right, attached code is correct.
Problem was somewhere elese. I was running all my tests through Application launcher in Eclipse IDE and nothing was working. Then I discovered Eclipse project was cause of all grief. I ran my class from command prompt, it worked. Created a new eclipse project and pasted same code there, it worked there too.
Thank you all guys for your time and efforts.
I've written a simple NamespaceContext implementation (here), that might be of help. It takes a Map<String, String> as input, where the key is a prefix, and the value is a namespace.
It follows the NamespaceContext spesification, and you can see how it works in the unit tests.
Map<String, String> mappings = new HashMap<>();
mappings.put("foo", "http://foo");
mappings.put("foo2", "http://foo");
mappings.put("bar", "http://bar");
context = new SimpleNamespaceContext(mappings);
context.getNamespaceURI("foo"); // "http://foo"
context.getPrefix("http://foo"); // "foo" or "foo2"
context.getPrefixes("http://foo"); // ["foo", "foo2"]
Note that it has a dependency on Google Guava

read xml nodes using java code

i am connecting to database of many servers to retrieve data from them all, i am using xml file contains the information of each database server [ip,port,user,passwd] followed by the query to be executed on that server. i read the queries in my code as arrayList and get them one by one to be processed. what i need is a clear simple code to also read the database server info to connect to it and execute the relative query , i.e. for each a query i need to get the database IP,port,user,passwd in my code. here is my xml file structure. thanks in adavnce.
<?xml version="1.0"?>
<Database>
<DB>
<ip></ip>
<port></port>
<user></user>
<pass></pass>
</DB>
<Query>
select date from myTable
</Query>
<DB>
<ip></ip>
<port></port>
<user></user>
<pass></pass>
</DB>
<Query>
select time from myTable
</Query>
<DB>
<ip></ip>
<port></port>
<user></user>
<pass><></pass>
</DB>
<Query>
select name from myTable
</Query>
</Database>
You can use Java's DOM api for xml processing and xpath.
DOM: https://docs.oracle.com/javase/tutorial/jaxp/dom/index.html
Xpath: http://www.w3schools.com/xsl/xpath_syntax.asp
Below is some sample code that does part of what you want (extracts the ip and port). You will need to modify it to do the rest:
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class DomTester {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
InputStream inStr = null;
try {
String xml = "<?xml version=\"1.0\"?>"
+ "<Database>"
+ "<DB>"
+ "<ip>666</ip>"
+ "<port>7</port>"
+ "</DB>"
+ "<DB>"
+ " <ip>13</ip>"
+ "<port>1</port>"
+ "</DB>"
+ "</Database>";
inStr = new ByteArrayInputStream(xml.getBytes());
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(inStr);
XPathFactory xFactory = XPathFactory.newInstance();
XPath xpath = xFactory.newXPath();
XPathExpression expr = xpath.compile("/Database");
Element databaseEl = (Element)expr.evaluate(doc, XPathConstants.NODE);
NodeList databaseNodeList = databaseEl.getChildNodes();
for (int ii=0; ii<databaseNodeList.getLength(); ++ii) {
Node dbNode = databaseNodeList.item(ii);
XPathExpression ipExpr = xpath.compile("ip");
Element ipElement = (Element)ipExpr.evaluate(dbNode, XPathConstants.NODE);
String ip = ipElement.getTextContent();
XPathExpression portExpr = xpath.compile("port");
Element portEl = (Element)portExpr.evaluate(dbNode, XPathConstants.NODE);
String port = portEl.getTextContent();
System.out.println("DB node[" + ii + "] = ip: " + ip + " port: " + port);
}
} finally {
if (null != inStr) {
inStr.close();
}
}
}
}
Since you are reading from a file instead of using a string, instead of doing this:
inStr = new ByteArrayInputStream(xml.getBytes());
do this:
File f = new File("your/file/path.xml");
inStr = new FileInputStream(f);

Parse Last.Fm XML from API in Java

URL: http://ws.audioscrobbler.com/2.0/?method=chart.gethypedtracks&api_key=1732077d6772048ccc671c754061cb18&limit=10
From the above url I need to somehow remove the Artist name and the track name from the XML file produced from each Song given but I have no Idea how to work with an XML file structured in this way ??
Any help or pointers would be very much appreciated !
Thanks,
Ross
Here's a fully working class that loads the URL you have indicated and parses the Track and artist names.
Basically it reads the xml into a Document, and runs 2 xpath queries in loops to get the data you want.
The document itself is simple xml, if you reformat it, it looks like:
<?xml version="1.0" encoding="utf-8"?>
<lfm status="ok">
<tracks page="1" perPage="10" totalPages="50" total="500">
<track>
<name>Hysterical</name>
<duration>231</duration>
<percentagechange>3626</percentagechange>
<mbid/>
<url>http://www.last.fm/music/Clap+Your+Hands+Say+Yeah/_/Hysterical</url>
<streamable fulltrack="0">0</streamable>
<artist>
<name>Clap Your Hands Say Yeah</name>
...
All I did to clean it up was run it through a re-formatter like xmlstarlet as I mentioned in my comment. Note: you don't have to reformat it for java to read it if it's well formed. Human readable is all a re-format does for you.
The first xpath query gets the track name using a path lfm/tracks/track/name. You can use something like this xpath tester to try out your xpath queries (you can paste your xml in and it will reformat it too). If you don't understand xpath, there are many sources on the net.
The second xpath works relative to the current track name node, and looks for a following-sibling node of type artist with a name sub-node, and then displays the text of the node.
Here's the code
package net.fish;
import java.net.URL;
import java.net.URLConnection;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class ParseXML {
private static final DocumentBuilderFactory DOCUMENT_BUILDER_FACTORY = DocumentBuilderFactory.newInstance();
private static final XPathFactory XPATH_FACTORY = XPathFactory.newInstance();
public static void main(String[] args) throws Exception {
new ParseXML().parseXml("http://ws.audioscrobbler.com/2.0/?method=chart.gethypedtracks&api_key=1732077d6772048ccc671c754061cb18&limit=10");
}
private void parseXml(String urlPath) throws Exception {
URL url = new URL(urlPath);
URLConnection connection = url.openConnection();
DocumentBuilder db = DOCUMENT_BUILDER_FACTORY.newDocumentBuilder();
final Document document = db.parse(connection.getInputStream());
XPath xPathEvaluator = XPATH_FACTORY.newXPath();
XPathExpression nameExpr = xPathEvaluator.compile("lfm/tracks/track/name");
NodeList trackNameNodes = (NodeList) nameExpr.evaluate(document, XPathConstants.NODESET);
for (int i = 0; i < trackNameNodes.getLength(); i++) {
Node trackNameNode = trackNameNodes.item(i);
System.out.println(String.format("Track Name: %s" , trackNameNode.getTextContent()));
XPathExpression artistNameExpr = xPathEvaluator.compile("following-sibling::artist/name");
NodeList artistNameNodes = (NodeList) artistNameExpr.evaluate(trackNameNode, XPathConstants.NODESET);
for (int j=0; j < artistNameNodes.getLength(); j++) {
System.out.println(String.format(" - Artist Name: %s", artistNameNodes.item(j).getTextContent()));
}
}
}
}

Categories