handle many XML files in a directory (java)

handle many XML files in a directory (java) - java

I have managed a code to handle a file.
Now I want to use the same code to handle all the XML files which are located in a directory.
Can someone tell me how can I declare the path and how to look for a loop.
Thanks in advance
import org.xml.sax.SAXException;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.IOException;
public class XmlReadWrite3 {
public static void main(String[] args) {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse("C:/Users/Desktop/1381.xml");
Element langs = doc.getDocumentElement();
Element filename= getElement(langs, "Filename");
Element beschreibung = getElement(langs, "Beschreibung");
Element name = getElement(langs, "Name");
Element ide = getElement(langs, "IDe");
System.out.println("Filename: " + filename.getTextContent() + "\n" + "Beschreibung: "
+ beschreibung.getTextContent() + "\n" + "Ersteller: " + name.getTextContent() + "\n"
+ "Pnummer: " + ide.getTextContent() + "\n\n");
}catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (SAXException se) {
se.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
private static Element getElement(Element langs, String tag){
return (Element) langs.getElementsByTagName(tag).item(0);
}
}

Hi you can use the Path and File classes to loop through a directory:
import org.xml.sax.SAXException;
import org.w3c.dom.*;
import javax.xml.parsers.*;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
class XmlReadWrite3 {
public static void main(String[] args) {
// here you enter the path to your directory.
// for example: Path workDir = Paths.get("c:\\workspace\\xml-files")
Path workDir = Paths.get("path/to/dir"); // enter the path to your xml-dir
// the if checks whether the directory truly exists
if (!Files.notExists(workDir)) {
// this part stores all files withn the directory in a list
try (DirectoryStream<Path> directoryStream = Files.newDirectoryStream(workDir)) {
for (Path path : directoryStream) {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(path.toString());
Element langs = doc.getDocumentElement();
Element filename = getElement(langs, "Filename");
Element beschreibung = getElement(langs, "Beschreibung");
Element name = getElement(langs, "Name");
Element ide = getElement(langs, "IDe");
System.out.println("Filename: " + filename.getTextContent() + "\n" + "Beschreibung: "
+ beschreibung.getTextContent() + "\n" + "Ersteller: " + name.getTextContent() + "\n"
+ "Pnummer: " + ide.getTextContent() + "\n\n");
} catch (ParserConfigurationException pce) {
pce.printStackTrace();
} catch (SAXException se) {
se.printStackTrace();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
} catch (Exception e) {
System.out.println(e.getMessage())
}
}
}
private static Element getElement(Element langs, String tag) {
return (Element) langs.getElementsByTagName(tag).item(0);
}
}

Related

Java crawler to get all first and third party cookies

i would like to build a crawler in Java that give me all cookies from a website. This crawler is believed to crawl a list of websites (and obviously the undersides) automatic.
I have used jSoup and Selenium for my plan.
package com.mycompany.app;
import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.openqa.selenium.Cookie;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
public class BasicWebCrawler {
private static Set<String> uniqueURL = new HashSet<String>();
private static List<String> link_list = new ArrayList<String>();
private static Set<String> uniqueCookies = new HashSet<String>();
private static void get_links(String url) {
Connection connection = null;
Connection.Response response = null;
String this_link = null;
try {
connection = Jsoup.connect(url);
response = connection.execute();
//cookies_http = response.cookies();
// fetch the document over HTTP
Document doc = response.parse();
// get all links in page
Elements links = doc.select("a[href]");
if(links.isEmpty()) {
return;
}
for (Element link : links) {
this_link = link.attr("href");
boolean add = uniqueURL.add(this_link);
System.out.println("\n" + this_link + "\n" + "title: " + doc.title());
if (add && (this_link.contains(url))) {
System.out.println("\n" + this_link + "\n" + "title: " + doc.title());
link_list.add(this_link);
get_links(this_link);
}
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static void main(String[] args) {
get_links("https://de.wikipedia.org/wiki/Wikipedia");
/**
* Hier kommt Selenium ins Spiel
*/
WebDriver driver;
System.setProperty("webdriver.chrome.driver", "D:\\crawler\\driver\\chromedriver.exe");
driver = new ChromeDriver();
// create file named Cookies to store Login Information
File file = new File("Cookies.data");
FileWriter fileWrite = null;
BufferedWriter Bwrite = null;
try {
// Delete old file if exists
file.delete();
file.createNewFile();
fileWrite = new FileWriter(file);
Bwrite = new BufferedWriter(fileWrite);
// loop for getting the cookie information
} catch (Exception ex) {
ex.printStackTrace();
}
for(String link : link_list) {
System.out.println("Open Link: " + link);
driver.get(link);
try {
// loop for getting the cookie information
for (Cookie ck : driver.manage().getCookies()) {
String tmp = (ck.getName() + ";" + ck.getValue() + ";" + ck.getDomain() + ";" + ck.getPath() + ";" + ck.getExpiry() + ";" + ck.isSecure());
if(uniqueCookies.add(tmp)) {
Bwrite.write("Link: " + link + "\n" + (ck.getName() + ";" + ck.getValue() + ";" + ck.getDomain() + ";" + ck.getPath() + ";" + ck.getExpiry() + ";" + ck.isSecure())+ "\n\n");
Bwrite.newLine();
}
}
} catch (Exception ex) {
ex.printStackTrace();
}
}
try {
Bwrite.close();
fileWrite.close();
driver.close();
driver.quit();
} catch (Exception ex) {
ex.printStackTrace();
}
}
}
I test this code on a wikipedia page and compare the result with a cookie scanner call CookieMetrix.
My code shows only four cookies:
Link: https://de.wikipedia.org/wiki/Wikipedia:Lizenzbestimmungen_Commons_Attribution-ShareAlike_3.0_Unported
GeoIP;DE:NW:M__nster:51.95:7.54:v4;.wikipedia.org;/;null;true
Link: https://de.wikipedia.org/wiki/Wikipedia:Lizenzbestimmungen_Commons_Attribution-ShareAlike_3.0_Unported
WMF-Last-Access-Global;13-May-2019;.wikipedia.org;/;Mon Jan 19 02:28:33 CET 1970;true
Link: https://de.wikipedia.org/wiki/Wikipedia:Lizenzbestimmungen_Commons_Attribution-ShareAlike_3.0_Unported
WMF-Last-Access;13-May-2019;de.wikipedia.org;/;Mon Jan 19 02:28:33 CET 1970;true
Link: https://de.wikipedia.org/wiki/Wikipedia:Lizenzbestimmungen_Commons_Attribution-ShareAlike_3.0_Unported
mwPhp7Seed;55e;de.wikipedia.org;/;Mon Jan 19 03:09:08 CET 1970;false
But the cookie scanner shows seven. I don't know why my code shows lesser than the CookieMetrix. Can you help me?

JavaDoc for java.util.Set<Cookie> getCookies():
Get all the cookies for the current domain. This is the equivalent of calling "document.cookie" and parsing the result
document.cookie will not return HttpOnly cookies, simply because JavaScript does not allow it.
Also notice that the “CookieMetrix” seems to list cookies from different domains.
Solutions:
To get a listing such as “CookieMetrix” (1+2) you could add a proxy after your browser and sniff the requests.
In case you want to get all cookies for the current domain, including HttpOnly (1), you could try accessing Chrome’s DevTools API directly (afair, it’ll also return HttpOnly cookies)

How do I create individual XML files from the parsed data output of a XML file?

I have written a program (DOM Parser) that parses data from a XMl File. I would like to create an individual file with the corresponding name for each set of data parsed from the xml document. If the parsed output is Single, Double, Triple, I would like to create an individual xml file (Single.xml, Double.xml, Triple.xml)with those corresponding names. How do I create the xml files and give each file the name of my parsed data output? Thanks in advance for your help.
import java.io.IOException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class MyDomParser {
public static void main(String[] args) {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("ENtemplate.xml");
doc.normalize();
NodeList rootNodes = doc.getElementsByTagName("templates");
Node rootNode = rootNodes.item(0);
Element rootElement = (Element) rootNode;
NodeList templateList = rootElement.getElementsByTagName("template");
for(int i=0; i < templateList.getLength(); i++) {
Node theTemplate = templateList.item(i);
Element templateElement = (Element) theTemplate;
System.out.println("Template" + ": " +templateElement.getAttribute("name")+ ".xml");
}
} catch (ParserConfigurationException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (SAXException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}

Just use the File I/O APIs as usual to create your xml files within the loop.
// for every Node in the template List
for(int i=0; i < templateList.getLength(); i++) {
// cast each Node to a template Element
Node theTemplate = templateList.item(i);
Element templateElement = (Element) theTemplate;
// get the xml filename as = template's name attribute + .xml
String fileName = templateElement.getAttribute("name") + ".xml";
// create a Path that points to the current directory
Path filePath = Paths.get(fileName);
// create the xml file at the specified Path
Files.createFile(filePath);
System.out.println("File "+ fileName + ".xml created");
}
The above code would create the xml files in your current working directory. You'll need to handle the checked IOException too.

How to get a specific event/attribute content from an xml string via stAX or SAX

I have a xml POST response which I receive as a string. I need the content of the particular "pnr" (see in xml) to pass it on to another GET request.
I am trying sax and stAX to achieve this but failing miserably.
I used getElementsByTagName and also getAttribute, but no go...
Here's my code and later the xml string that I receive.
Any kind of help will be a gift
package rest;
import javax.xml.parsers.*;
import org.xml.sax.InputSource;
import org.w3c.dom.*;
import java.io.*;
public class ParseXMLString {
public static void main(String arg[]) {
String outputString = RESTClient.postConfirm(); // this is the xml string response I am getting
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(outputString));
Document doc = db.parse(is);
String Parentnode = doc.getDocumentElement().getAttribute("pnr");
// Element element = (Element) Parentnode.;
// NodeList name = element.getElementsByTagName("pnr");
// Element line = (Element) name.item(0);
//String IDList = getCharacterDataFromElement(line);
System.out.println(Parentnode);
}
catch (Exception e) {
e.printStackTrace();
}
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "?";
}
}
And here is the the XML String I have received:
<?xml version="1.0" encoding="UTF-8"?><Ticket><bookedSeats>3</bookedSeats><bpAddress>Anand Rao Circle</bpAddress><bpLandMark>ganesha temple</bpLandMark><bpLocation> Ghousia College</bpLocation><bpPhoneNo>98798679769</bpPhoneNo><bpTime>1200</bpTime><busServiceName>efdf</busServiceName><busType>Volvo A/C Semi Sleeper (2+2)</busType><commission>66.19</commission><dateOfJourney>2012-10-05</dateOfJourney><destination>Chennai</destination><fare>600.0</fare><issueTime>2012-10-04T15:46:45.073+05:30</issueTime><noOfSeats>1</noOfSeats><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><pnr>RATPKES44974756</pnr><seatDetails><seatDetail><commission>66.19</commission><fare>600.0</fare><gender>MALE</gender><passengerAge>0</passengerAge><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><prime>false</prime><seatName>3</seatName></seatDetail></seatDetails><source>Bangalore</source><status>BOOKED</status><travelsName>Rajratan Travels</travelsName></Ticket>

Instead of SAX or StAX you could do the following with the javax.xml.xpath APIs in the JDK/JRE since Java SE 5:
Demo
import java.io.StringReader;
import javax.xml.xpath.*;
import org.xml.sax.InputSource;
public class Demo {
public static void main(String[] args) throws Exception {
String xml = "<?xml version='1.0' encoding='UTF-8'?><Ticket><bookedSeats>3</bookedSeats><bpAddress>Anand Rao Circle</bpAddress><bpLandMark>ganesha temple</bpLandMark><bpLocation> Ghousia College</bpLocation><bpPhoneNo>98798679769</bpPhoneNo><bpTime>1200</bpTime><busServiceName>efdf</busServiceName><busType>Volvo A/C Semi Sleeper (2+2)</busType><commission>66.19</commission><dateOfJourney>2012-10-05</dateOfJourney><destination>Chennai</destination><fare>600.0</fare><issueTime>2012-10-04T15:46:45.073+05:30</issueTime><noOfSeats>1</noOfSeats><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><pnr>RATPKES44974756</pnr><seatDetails><seatDetail><commission>66.19</commission><fare>600.0</fare><gender>MALE</gender><passengerAge>0</passengerAge><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><prime>false</prime><seatName>3</seatName></seatDetail></seatDetails><source>Bangalore</source><status>BOOKED</status><travelsName>Rajratan Travels</travelsName></Ticket>";
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
InputSource inputSource = new InputSource(new StringReader(xml));
String pnr = (String) xpath.evaluate("/Ticket/pnr", inputSource, XPathConstants.STRING);
System.out.println(pnr);
}
}
Output
RATPKES44974756

this bit of code will get you the pnr :
NodeList nodeLst = doc.getElementsByTagName("Ticket");
Node ticket = nodeLst.item(0);
NodeList attr = ticket.getChildNodes();
for (int i = 0; i < attr.getLength(); i++){
if (attr.item(i).getNodeName().equals("pnr"))
System.out.println(attr.item(i).getTextContent());
}

If I were to solve this problem, I'd probably use XPath. But since you specifically asked for StAX, here's an example parser (note that this is just skeleton code to get you started).
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
public class ParseXMLStringStAX {
private static final String PNR = "pnr";
private String characters;
public String parse(String xmlString) throws XMLStreamException, Exception {
XMLEventReader reader = null;
try {
if (xmlString == null || xmlString.isEmpty()) {
throw new IllegalArgumentException("Illegal initializiation (xmlString is null or empty)");
}
StringReader stringReader = new StringReader(xmlString);
XMLInputFactory inputFact = XMLInputFactory.newInstance();
XMLStreamReader streamReader = inputFact.createXMLStreamReader(stringReader);
reader = inputFact.createXMLEventReader(streamReader);
while (reader.hasNext()) {
XMLEvent event = reader.nextEvent();
if (event.isCharacters()) {
characters(event);
}
if (event.isStartElement()) {
startElement(event);
// handle attributes
Iterator<Attribute> attributes = event.asStartElement().getAttributes();
while(attributes.hasNext()) {
attribute(attributes.next());
}
}
if (event.isEndElement()) {
// found what we want?
if (endElement(event)) {
endDocument(null);
break;
}
}
if (event.isStartDocument()) {
startDocument(event);
}
if (event.isEndDocument()) {
endDocument(event);
}
}
} catch (XMLStreamException ex) {
throw ex;
} finally {
try {
if (reader != null) {
reader.close();
}
} catch (XMLStreamException ex) {
}
}
return characters;
}
private void attribute(XMLEvent event) throws Exception {
}
private void characters(XMLEvent event) throws Exception {
Characters asCharacters = event.asCharacters();
if (asCharacters.isWhiteSpace())
return;
if (characters == null) {
characters = asCharacters.getData();
} else {
characters += asCharacters.getData();
}
}
private void startElement(XMLEvent event) throws Exception {
StartElement startElement = event.asStartElement();
String name = startElement.getName().getLocalPart();
characters = null;
}
private boolean endElement(XMLEvent event) throws Exception {
EndElement endElement = event.asEndElement();
String name = endElement.getName().getLocalPart();
if (PNR.equals(name)) {
return true;
}
return false;
}
private void startDocument(XMLEvent event) {
System.out.println("Parsing started");
}
private void endDocument(XMLEvent event) {
System.out.println("Parsing ended");
}
public static void main(String[] argv) throws XMLStreamException, Exception {
String xml = "";
xml += "<Ticket>";
xml += " <bookedSeats>3</bookedSeats>";
xml += " <bpAddress>Anand Rao Circle</bpAddress>";
xml += " <bpLandMark>ganesha temple</bpLandMark>";
xml += " <bpLocation> Ghousia College</bpLocation>";
xml += " <bpPhoneNo>98798679769</bpPhoneNo>";
xml += " <bpTime>1200</bpTime>";
xml += " <busServiceName>efdf</busServiceName>";
xml += " <busType>Volvo A/C Semi Sleeper (2+2)</busType>";
xml += " <commission>66.19</commission>";
xml += " <dateOfJourney>2012-10-05</dateOfJourney>";
xml += " <destination>Chennai</destination>";
xml += " <fare>600.0</fare>";
xml += " <issueTime>2012-10-04T15:46:45.073+05:30</issueTime>";
xml += " <noOfSeats>1</noOfSeats>";
xml += " <passengerMobile>1234567890</passengerMobile>";
xml += " <passengerName>Test</passengerName>";
xml += " <pnr>RATPKES44974756</pnr>";
xml += " <seatDetails>";
xml += " <seatDetail>";
xml += " <commission>66.19</commission>";
xml += " <fare>600.0</fare>";
xml += " <gender>MALE</gender>";
xml += " <passengerAge>0</passengerAge>";
xml += " <passengerMobile>1234567890</passengerMobile>";
xml += " <passengerName>Test</passengerName>";
xml += " <prime>false</prime>";
xml += " <seatName>3</seatName>";
xml += " </seatDetail>";
xml += " </seatDetails>";
xml += " <source>Bangalore</source>";
xml += " <status>BOOKED</status>";
xml += " <travelsName>Rajratan Travels</travelsName>";
xml += "</Ticket>";
ParseXMLStringStAX parser = new ParseXMLStringStAX();
String pnr = parser.parse(xml);
System.out.println("--> Result: " + String.valueOf(pnr));
}
}

XML Document traverser in java

every one knows we can traverse entire xml document using DocumentTraversal's NodeIterator.
my application require some extra work so i decided to write my own XML traverser with the support of java Stack<>.
here is my code (i am not good at coding so the code and logic might look messy).
public class test
{
private static Stack<Node> gStack = new Stack<Node>();
public static void main(String[] args) throws XPathExpressionException
{
String str =
"<section>"
+ "<paragraph>This example combines regular wysiwyg editing of a document with very controlled editing of semantic rich content. The main content can be"
+ "edited like you would in a normal word processor. Though the difference is that the content remains schema valid XML because Xopus will not allow you to perform actions"
+ "on the document that would render it invalid.</paragraph>"
+ "<paragraph>The table is an example of controlled style. The style of the table is controlled by three attributes:</paragraph>"
+ "<unorderedlist>"
+ "<item><paragraph><emphasis>alternaterowcolor</emphasis>, do all rows have the same color, or should the background color alternate?</paragraph></item>"
+ "<item><paragraph><emphasis>border</emphasis>, a limited choice of border styles.</paragraph></item>"
+ "<item><paragraph><emphasis>color</emphasis>, a limited choice of colors.</paragraph></item>"
+ "</unorderedlist>"
+ "<paragraph>You have quite some freedom to style the table, but you can't break the predefined style.</paragraph>"
+ "</section>";
Document domDoc = null;
try
{
DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
ByteArrayInputStream bis = new ByteArrayInputStream(str.getBytes());
domDoc = docBuilder.parse(bis);
}
catch (Exception e)
{
e.printStackTrace();
}
Element root = null;
NodeList list = domDoc.getChildNodes();
for (int i = 0; i < list.getLength(); i++)
{
if (list.item(i) instanceof Element)
{
root = (Element) list.item(i);
break;
}
}
NodeList nlist = root.getChildNodes();
System.out.println("root = " + root.getNodeName() + " child count = " + nlist.getLength());
domTraversor(root);
}
private static void domTraversor(Node node)
{
if (node.getNodeName().equals("#text"))
{
System.out.println("textElem = " + node.getTextContent());
if (node.getNextSibling() != null)
{
gStack.push(node.getNextSibling());
domTraversor(node.getNextSibling());
}
else
{
if (node.getParentNode().getNextSibling() != null)
domTraversor(node.getParentNode().getNextSibling());
}
}
else
{
if (node.getChildNodes().getLength() > 1)
{
gStack.push(node);
Node n = node.getFirstChild();
if (n.getNodeName().equals("#text"))
{
System.out.println("textElem = " + n.getTextContent());
if (n.getNextSibling() != null)
{
gStack.push(n.getNextSibling());
domTraversor(n.getNextSibling());
}
}
else
{
gStack.push(n);
domTraversor(n);
}
}
else if (node.getChildNodes().getLength() == 1)
{
Node fnode = node.getFirstChild();
if (fnode.getChildNodes().getLength() > 1)
{
gStack.push(fnode);
domTraversor(fnode);
}
else
{
if (!fnode.getNodeName().equals("#text"))
{
gStack.push(fnode);
domTraversor(fnode);
}
else
{
System.out.println("textElem = " + fnode.getTextContent());
if (fnode.getNodeName().equals("#text"))
{
if (node.getNextSibling() != null)
{
gStack.push(node.getNextSibling());
domTraversor(node.getNextSibling());
}
else
{
if (!gStack.empty())
{
Node sibPn = gStack.pop();
if (sibPn.getNextSibling() == null)
{
sibPn = gStack.pop();
}
domTraversor(sibPn.getNextSibling());
}
}
}
else
{
if (fnode.getNextSibling() != null)
{
domTraversor(fnode.getNextSibling());
}
else
{
if (!gStack.empty())
{
Node sibPn = gStack.pop().getNextSibling();
domTraversor(sibPn);
}
}
}
}
}
}
}
}
}
and it is working fine with some xml document, but not with the document which has tag like.
<unorderedlist>
<item>
<paragraph>
<emphasis>alternaterowcolor</emphasis>
, do all rows have the same color, or should the background
color
alternate?
</paragraph>
</item>
<item>
<paragraph>
<emphasis>border</emphasis>
, a limited choice of border styles.
</paragraph>
</item>
<item>
<paragraph>
<emphasis>color</emphasis>
, a limited choice of colors.
</paragraph>
</item>
</unorderedlist>
here is the scenario if any element has more than three nested children my code get stopped and not going further.
is there any one has a better implementation, please suggest.

try this way
Element e;
NodeList n;
Document doc=StudyParser.XMLfromString(xmlString);
String starttag=doc.getFirstChild().getNodeName();
Log.e("start",starttag );
n=doc.getElementsByTagName(starttag);
for(int i=0;i<n.getLength();i++){
e=(Element)n.item(i);
NodeList np = e.getElementsByTagName("item");
for(int j=0;j<np.getLength();j++){
e=(Element)n.item(i);
try{
String para=StudyParser.getValue(e, "paragraph");
Log.e("paravalue",para);
String emp=StudyParser.getValue(e, "emphasis");
Log.e("empval",emp);
}catch(Exception e){
e.printStackTrace();
}
}
}
StudyParser Class
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.net.MalformedURLException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class StudyParser {
public StudyParser() {
}
public final static Document XMLfromString(String xml){
Document doc = null;
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(xml));
doc = db.parse(is);
} catch (ParserConfigurationException e) {
System.out.println("XML parse error: " + e.getMessage());
return null;
} catch (SAXException e) {
System.out.println("Wrong XML file structure: " + e.getMessage());
return null;
} catch (IOException e) {
System.out.println("I/O exeption: " + e.getMessage());
return null;
}
return doc;
}
public static String getXMLstring(String xml){
String line = null;
try {
DefaultHttpClient httpClient = new DefaultHttpClient();
HttpPost httpPost = new HttpPost(xml);
HttpResponse httpResponse = httpClient.execute(httpPost);
HttpEntity httpEntity = httpResponse.getEntity();
line = EntityUtils.toString(httpEntity);
} catch (UnsupportedEncodingException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
} catch (MalformedURLException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
} catch (IOException e) {
line = "<results status=\"error\"><msg>Can't connect to server</msg></results>";
}
return line;
}
public static String getXML(InputStream is)throws IOException {
BufferedInputStream bis = new BufferedInputStream(is);
ByteArrayOutputStream buf = new ByteArrayOutputStream();
int result = bis.read();
while(result != -1) {
byte b = (byte)result;
buf.write(b);
result = bis.read();
}
return buf.toString();
}
public final static String getElementValue( Node elem ) {
Node kid;
if( elem != null){
if (elem.hasChildNodes()){
for( kid = elem.getFirstChild(); kid != null; kid = kid.getNextSibling() ){
if( kid.getNodeType() == Node.TEXT_NODE ){
return kid.getNodeValue();
}
}
}
}
return "";
}
public static int numResults(Document doc){
Node results = doc.getDocumentElement();
int res = -1;
try{
res = Integer.valueOf(results.getAttributes().getNamedItem("Categories").getNodeValue());
}catch(Exception e ){
res = -1;
}
return res;
}
public static String getValue(Element item, String str) {
NodeList n = item.getElementsByTagName(str);
return StudyParser.getElementValue(n.item(0));
}
}
Just Normal demo for dynamic xml i have assumed the same xml and but without using getElementByTagName there are many properties you can check accodringlly see
doc = StudyParser.XMLfromString(xml);
String starttag=doc.getFirstChild().getNodeName();
Log.e("start",starttag );
n=doc.getElementsByTagName(starttag);
for(int i=0;i<n.getLength();i++){
e=(Element)n.item(i);
try{
Log.e("1234",""+ e.getTextContent());
}catch(Exception e){
e.printStackTrace();
}
}

Output on namespaced xpath in java

I have the following code and have had some trouble with a specific field and it's output. The namespace is connected but doesn't seem to be outputting on the required field. Any info on this would be great.
import org.w3c.dom.Document;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathExpressionException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
public class test
{
public static void main(String args[])
{
String xmlStr = "<aws:UrlInfoResponse xmlns:aws=\"http://alexa.amazonaws.com/doc/2005-10-05/\">\n" +
" <aws:Response xmlns:aws=\"http://awis.amazonaws.com/doc/2005-07-11\">\n" +
" <aws:OperationRequest>\n" +
" <aws:RequestId>blah</aws:RequestId>\n" +
" </aws:OperationRequest>\n" +
" <aws:UrlInfoResult>\n" +
" <aws:Alexa>\n" +
" <aws:TrafficData>\n" +
" <aws:DataUrl type=\"canonical\">harvard.edu/</aws:DataUrl>\n" +
" <aws:Rank>1635</aws:Rank>\n" +
" </aws:TrafficData>\n" +
" </aws:Alexa>\n" +
" </aws:UrlInfoResult>\n" +
" <aws:ResponseStatus xmlns:aws=\"http://alexa.amazonaws.com/doc/2005-10-05/\">\n" +
" <aws:StatusCode>Success</aws:StatusCode>\n" +
" </aws:ResponseStatus>\n" +
" </aws:Response>\n" +
"</aws:UrlInfoResponse>";
DocumentBuilderFactory xmlFact = DocumentBuilderFactory.newInstance();
xmlFact.setNamespaceAware(true);
DocumentBuilder builder = null;
try {
builder = xmlFact.newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace(); }
Document doc = null;
try {
doc = builder.parse(
new ByteArrayInputStream( xmlStr.getBytes()));
} catch (SAXException e) {
e.printStackTrace(); } catch (IOException e) {
e.printStackTrace(); }
System.out.println(doc.getDocumentElement().getNamespaceURI());
System.out.println(xmlFact.isNamespaceAware());
String xpathStr = "//aws:OperationRequest";
XPathFactory xpathFact = XPathFactory.newInstance();
XPath xpath = xpathFact.newXPath();
String result = null;
try {
result = xpath.evaluate(xpathStr, doc);
} catch (XPathExpressionException e) {
e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates.
}
System.out.println("XPath result is \"" + result + "\"");
}
}

namespace matching in an Xpath isn't just string matching the prefix. You have to actually define a NamespaceContext object and associate it with the xPath. It doesn't even actually matter at all if the prefixes are the same in the document and in the xPath
private NamespaceContext ns = new NamespaceContext() {
public String getNamespaceURI(String prefix) {
if (prefix.equals("ns1") return "http://alexa.amazonaws.com/doc/2005-10-05/";
else return XMLConstants.NULL_NS_URI;
}
public String getPrefix(String namespace) {
throw new UnsupportedOperationException();
}
public Iterator getPrefixes(String namespace) {
throw new UnsupportedOperationException();
}};
XPathFactory xpfactory = XPathFactory.newInstance();
XPath xpath = xpfactory.newXPath();
xpath.setNamespaceContext(ns);
String xpathStr = "//ns1:OperationRequest";
//and so on

We Keep Coding

Java is a programming language and computing platform first released by Sun Microsystems in 1995.

handle many XML files in a directory (java) - java

Related

Java crawler to get all first and third party cookies

How do I create individual XML files from the parsed data output of a XML file?

How to get a specific event/attribute content from an xml string via stAX or SAX

XML Document traverser in java

Output on namespaced xpath in java

Categories

Resources