XML string parsing in Java - java

I am trying to parse through a XML format String for example;
<params city="SANTA ANA" dateOfBirth="1970-01-01"/>
My goal is to add attributes name in an array list such as {city,dateOfBirth} and values of the attributes in another array list such as {Santa Ana, 1970-01-01}
any advice, please help!

Create SAXParserFactory.
Create SAXParser.
Create YourHandler, which extends DefaultHandler.
Parse your file using SAXParser and YourHandler.
For example:
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
parser.parse(yourFile, new YourHandler());
} catch (ParserConfigurationException e) {
System.err.println(e.getMessage());
}
where, yourFile - object of the File class.
In YourHandler class:
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class YourHandler extends DefaultHandler {
String tag = "params"; // needed tag
String city = "city"; // name of the attribute
String value; // your value of the city
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if(localName.equals(tag)) {
value = attributes.getValue(city);
}
}
public String getValue() {
return value;
}
}`
More information for SAX parser and DefaultHandler here and here respectively.

Using JDOM (http://www.jdom.org/docs/apidocs/):
String myString = "<params city='SANTA ANA' dateOfBirth='1970-01-01'/>";
SAXBuilder builder = new SAXBuilder();
Document myStringAsXML = builder.build(new StringReader(myString));
Element rootElement = myStringAsXML.getRootElement();
ArrayList<String> attributeNames = new ArrayList<String>();
ArrayList<String> values = new ArrayList<String>();
List<Attribute> attributes = new ArrayList<Attribute>();
attributes.addAll(rootElement.getAttributes());
Iterator<Element> childIterator = rootElement.getDescendants();
while (childIterator.hasNext()) {
Element childElement = childIterator.next();
attributes.addAll(childElement.getAttributes());
}
for (Attribute attribute: attributes) {
attributeNames.add(attribute.getName());
values.add(attribute.getValue());
}
System.out.println("Attribute names: " + attributeNames);
System.out.println("Values: " + values);

Related

I am trying to use a url xml parse but it looks like i keep getting an empty xml

I am trying to get info from a weather API called even though when i am making the request i am getting a response, but when i am trying to get only a specific part of the response i get null response every time can someone help? here is the code for my handler :
package weathercalls;
import java.util.ArrayList;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.DefaultHandler;
public class Handler extends DefaultHandler
{
// Create three array lists to store the data
public ArrayList<Integer> lows = new ArrayList<Integer>();
public ArrayList<Integer> highs = new ArrayList<Integer>();
public ArrayList<String> regions = new ArrayList<String>();
// Make sure that the code in DefaultHandler's
// constructor is called:
public Handler()
{
super();
}
/*** Below are the three methods that we are extending ***/
#Override
public void startDocument()
{
System.out.println("Start document");
}
#Override
public void endDocument()
{
System.out.println("End document");
}
// This is where all the work is happening:
#Override
public void startElement(String uri, String name, String qName, Attributes atts)
{
if(qName.compareTo("region") == 0)
{
String region = atts.getLocalName(0);
System.out.println("Day: " + region);
this.regions.add(region);
}
if(qName.compareToIgnoreCase("wind_degree") == 0)
{
int low = atts.getLength();
System.out.println("Low: " + low);
this.lows.add(low);
}
if(qName.compareToIgnoreCase("high") == 0)
{
int high = Integer.parseInt(atts.getValue(0));
System.out.println("High: " + high);
this.highs.add(high);
}
}
}
and here is my main file code :
package weathercalls;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.SAXException;
public class weatherCalls {
public static void main(String[] args) throws Exception {
//Main url
String main_url = "http://api.weatherapi.com/v1/";
//Live or Weekly forecast
String live_weather = "current.xml?key=";
//String sevendays_weather = "orecast.xml?key=";
//API Key + q
String API_Key = "c2e285e55db74def97f151114201701&q=";
//Location Setters
String location = "London";
InputSource inSource = null;
InputStream in = null;
XMLReader xr = null;
/**
URL weather = new URL(main_url + live_weather + API_Key + location);
URLConnection yc = weather.openConnection();
BufferedReader in1 = new BufferedReader(
new InputStreamReader(
yc.getInputStream()));
String inputLine;
while ((inputLine = in1.readLine()) != null)
System.out.println(inputLine);
in1.close();**/
try
{
// Turn the string into a URL object
String complete_url = main_url + live_weather + API_Key + location;
URL urlObject = new URL(complete_url);
// Open the stream (which returns an InputStream):
in = urlObject.openStream();
/** Now parse the data (the stream) that we received back ***/
// Create an XML reader
SAXParserFactory parserFactory = SAXParserFactory.newInstance();
SAXParser parser = parserFactory.newSAXParser();
xr = parser.getXMLReader();
// Tell that XML reader to use our special Google Handler
Handler ourSpecialHandler = new Handler();
xr.setContentHandler(ourSpecialHandler);
// We have an InputStream, but let's just wrap it in
// an InputSource (the SAX parser likes it that way)
inSource = new InputSource(in);
// And parse it!
xr.parse(inSource);
System.out.println(complete_url);
System.out.println(urlObject);
System.out.println(in);
System.out.println(xr);
System.out.println(inSource);
System.out.println(parser);
}
catch(IOException ioe)
{
ioe.printStackTrace();
}
catch(SAXException se)
{
se.printStackTrace();
}
}
}
and this is my console print:
Start document
Day: null
Low: 0
End document
http://api.weatherapi.com/v1/current.xml?key=c2e285e55db74def97f151114201701&q=London
http://api.weatherapi.com/v1/current.xml?key=c2e285e55db74def97f151114201701&q=London
sun.net.www.protocol.http.HttpURLConnection$HttpInputStream#2471cca7
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl$JAXPSAXParser#5fe5c6f
org.xml.sax.InputSource#6979e8cb
com.sun.org.apache.xerces.internal.jaxp.SAXParserImpl#763d9750
I think you are trying to extract the values from the XML tags and if it is the case then you are doing it wrong. Attributes object contains the attributes of a particular tag and to get the value you have to do some extra work. Similar to the start of a tag, there are separate events for the contents and the end of a tag. current_tag variable will keep track of the current tag being processed. Below is a sample code:
class Handler extends DefaultHandler {
// Create three array lists to store the data
public ArrayList<Integer> lows = new ArrayList<Integer>();
public ArrayList<Integer> highs = new ArrayList<Integer>();
public ArrayList<String> regions = new ArrayList<String>();
// Make sure that the code in DefaultHandler's
// constructor is called:
public Handler() {
super();
}
/*** Below are the three methods that we are extending ***/
#Override
public void startDocument() {
System.out.println("Start document");
}
#Override
public void endDocument() {
System.out.println("End document");
}
//Keeps track of the current tag;
String currentTag = "";
// This is where all the work is happening:
#Override
public void startElement(String uri, String name, String qName, Attributes atts) {
//Save the current tag being handled
currentTag = qName;
}
//Detect end tag
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
//Reset it
currentTag = "";
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
//Rules based on current tag
switch (currentTag) {
case "region":
String region = String.valueOf(ch, start, length);
this.regions.add(region);
System.out.println("Day: " + region);
break;
case "wind_degree":
int low = Integer.parseInt(String.valueOf(ch, start, length));
System.out.println("Low: " + low);
this.lows.add(low);
break;
case "high":
int high = Integer.parseInt(String.valueOf(ch, start, length));
System.out.println("High: " + high);
this.highs.add(high);
break;
}
}}
NOTE: Please refrain from sharing your API keys or passwords on the internet.

Create xml Elements and sub elements in Java using dom [duplicate]

I have to read and write to and from an XML file. What is the easiest way to read and write XML files using Java?
Here is a quick DOM example that shows how to read and write a simple xml file with its dtd:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE roles SYSTEM "roles.dtd">
<roles>
<role1>User</role1>
<role2>Author</role2>
<role3>Admin</role3>
<role4/>
</roles>
and the dtd:
<?xml version="1.0" encoding="UTF-8"?>
<!ELEMENT roles (role1,role2,role3,role4)>
<!ELEMENT role1 (#PCDATA)>
<!ELEMENT role2 (#PCDATA)>
<!ELEMENT role3 (#PCDATA)>
<!ELEMENT role4 (#PCDATA)>
First import these:
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.xml.sax.*;
import org.w3c.dom.*;
Here are a few variables you will need:
private String role1 = null;
private String role2 = null;
private String role3 = null;
private String role4 = null;
private ArrayList<String> rolev;
Here is a reader (String xml is the name of your xml file):
public boolean readXML(String xml) {
rolev = new ArrayList<String>();
Document dom;
// Make an instance of the DocumentBuilderFactory
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
// use the factory to take an instance of the document builder
DocumentBuilder db = dbf.newDocumentBuilder();
// parse using the builder to get the DOM mapping of the
// XML file
dom = db.parse(xml);
Element doc = dom.getDocumentElement();
role1 = getTextValue(role1, doc, "role1");
if (role1 != null) {
if (!role1.isEmpty())
rolev.add(role1);
}
role2 = getTextValue(role2, doc, "role2");
if (role2 != null) {
if (!role2.isEmpty())
rolev.add(role2);
}
role3 = getTextValue(role3, doc, "role3");
if (role3 != null) {
if (!role3.isEmpty())
rolev.add(role3);
}
role4 = getTextValue(role4, doc, "role4");
if ( role4 != null) {
if (!role4.isEmpty())
rolev.add(role4);
}
return true;
} catch (ParserConfigurationException pce) {
System.out.println(pce.getMessage());
} catch (SAXException se) {
System.out.println(se.getMessage());
} catch (IOException ioe) {
System.err.println(ioe.getMessage());
}
return false;
}
And here a writer:
public void saveToXML(String xml) {
Document dom;
Element e = null;
// instance of a DocumentBuilderFactory
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
// use factory to get an instance of document builder
DocumentBuilder db = dbf.newDocumentBuilder();
// create instance of DOM
dom = db.newDocument();
// create the root element
Element rootEle = dom.createElement("roles");
// create data elements and place them under root
e = dom.createElement("role1");
e.appendChild(dom.createTextNode(role1));
rootEle.appendChild(e);
e = dom.createElement("role2");
e.appendChild(dom.createTextNode(role2));
rootEle.appendChild(e);
e = dom.createElement("role3");
e.appendChild(dom.createTextNode(role3));
rootEle.appendChild(e);
e = dom.createElement("role4");
e.appendChild(dom.createTextNode(role4));
rootEle.appendChild(e);
dom.appendChild(rootEle);
try {
Transformer tr = TransformerFactory.newInstance().newTransformer();
tr.setOutputProperty(OutputKeys.INDENT, "yes");
tr.setOutputProperty(OutputKeys.METHOD, "xml");
tr.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
tr.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM, "roles.dtd");
tr.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4");
// send DOM to file
tr.transform(new DOMSource(dom),
new StreamResult(new FileOutputStream(xml)));
} catch (TransformerException te) {
System.out.println(te.getMessage());
} catch (IOException ioe) {
System.out.println(ioe.getMessage());
}
} catch (ParserConfigurationException pce) {
System.out.println("UsersXML: Error trying to instantiate DocumentBuilder " + pce);
}
}
getTextValue is here:
private String getTextValue(String def, Element doc, String tag) {
String value = def;
NodeList nl;
nl = doc.getElementsByTagName(tag);
if (nl.getLength() > 0 && nl.item(0).hasChildNodes()) {
value = nl.item(0).getFirstChild().getNodeValue();
}
return value;
}
Add a few accessors and mutators and you are done!
Writing XML using JAXB (Java Architecture for XML Binding):
http://www.mkyong.com/java/jaxb-hello-world-example/
package com.mkyong.core;
import javax.xml.bind.annotation.XmlAttribute;
import javax.xml.bind.annotation.XmlElement;
import javax.xml.bind.annotation.XmlRootElement;
#XmlRootElement
public class Customer {
String name;
int age;
int id;
public String getName() {
return name;
}
#XmlElement
public void setName(String name) {
this.name = name;
}
public int getAge() {
return age;
}
#XmlElement
public void setAge(int age) {
this.age = age;
}
public int getId() {
return id;
}
#XmlAttribute
public void setId(int id) {
this.id = id;
}
}
package com.mkyong.core;
import java.io.File;
import javax.xml.bind.JAXBContext;
import javax.xml.bind.JAXBException;
import javax.xml.bind.Marshaller;
public class JAXBExample {
public static void main(String[] args) {
Customer customer = new Customer();
customer.setId(100);
customer.setName("mkyong");
customer.setAge(29);
try {
File file = new File("C:\\file.xml");
JAXBContext jaxbContext = JAXBContext.newInstance(Customer.class);
Marshaller jaxbMarshaller = jaxbContext.createMarshaller();
// output pretty printed
jaxbMarshaller.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true);
jaxbMarshaller.marshal(customer, file);
jaxbMarshaller.marshal(customer, System.out);
} catch (JAXBException e) {
e.printStackTrace();
}
}
}
The above answer only deal with DOM parser (that normally reads the entire file in memory and parse it, what for a big file is a problem), you could use a SAX parser that uses less memory and is faster (anyway that depends on your code).
SAX parser callback some functions when it find a start of element, end of element, attribute, text between elements, etc, so it can parse the document and at the same time you
get what you need.
Some example code:
http://www.mkyong.com/java/how-to-read-xml-file-in-java-sax-parser/
The answers only cover DOM / SAX and a copy paste implementation of a JAXB example.
However, one big area of when you are using XML is missing. In many projects / programs there is a need to store / retrieve some basic data structures. Your program has already a classes for your nice and shiny business objects / data structures, you just want a comfortable way to convert this data to a XML structure so you can do more magic on it (store, load, send, manipulate with XSLT).
This is where XStream shines. You simply annotate the classes holding your data, or if you do not want to change those classes, you configure a XStream instance for marshalling (objects -> xml) or unmarshalling (xml -> objects).
Internally XStream uses reflection, the readObject and readResolve methods of standard Java object serialization.
You get a good and speedy tutorial here:
To give a short overview of how it works, I also provide some sample code which marshalls and unmarshalls a data structure.
The marshalling / unmarshalling happens all in the main method, the rest is just code to generate some test objects and populate some data to them.
It is super simple to configure the xStream instance and marshalling / unmarshalling is done with one line of code each.
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import com.thoughtworks.xstream.XStream;
public class XStreamIsGreat {
public static void main(String[] args) {
XStream xStream = new XStream();
xStream.alias("good", Good.class);
xStream.alias("pRoDuCeR", Producer.class);
xStream.alias("customer", Customer.class);
Producer a = new Producer("Apple");
Producer s = new Producer("Samsung");
Customer c = new Customer("Someone").add(new Good("S4", 10, new BigDecimal(600), s))
.add(new Good("S4 mini", 5, new BigDecimal(450), s)).add(new Good("I5S", 3, new BigDecimal(875), a));
String xml = xStream.toXML(c); // objects -> xml
System.out.println("Marshalled:\n" + xml);
Customer unmarshalledCustomer = (Customer)xStream.fromXML(xml); // xml -> objects
}
static class Good {
Producer producer;
String name;
int quantity;
BigDecimal price;
Good(String name, int quantity, BigDecimal price, Producer p) {
this.producer = p;
this.name = name;
this.quantity = quantity;
this.price = price;
}
}
static class Producer {
String name;
public Producer(String name) {
this.name = name;
}
}
static class Customer {
String name;
public Customer(String name) {
this.name = name;
}
List<Good> stock = new ArrayList<Good>();
Customer add(Good g) {
stock.add(g);
return this;
}
}
}
Ok, already having DOM, JaxB and XStream in the list of answers, there is still a complete different way to read and write XML: Data projection You can decouple the XML structure and the Java structure by using a library that provides read and writeable views to the XML Data as Java interfaces. From the tutorials:
Given some real world XML:
<weatherdata>
<weather
...
degreetype="F"
lat="50.5520210266113" lon="6.24060010910034"
searchlocation="Monschau, Stadt Aachen, NW, Germany"
... >
<current ... skytext="Clear" temperature="46"/>
</weather>
</weatherdata>
With data projection you can define a projection interface:
public interface WeatherData {
#XBRead("/weatherdata/weather/#searchlocation")
String getLocation();
#XBRead("/weatherdata/weather/current/#temperature")
int getTemperature();
#XBRead("/weatherdata/weather/#degreetype")
String getDegreeType();
#XBRead("/weatherdata/weather/current/#skytext")
String getSkytext();
/**
* This would be our "sub projection". A structure grouping two attribute
* values in one object.
*/
interface Coordinates {
#XBRead("#lon")
double getLongitude();
#XBRead("#lat")
double getLatitude();
}
#XBRead("/weatherdata/weather")
Coordinates getCoordinates();
}
And use instances of this interface just like POJOs:
private void printWeatherData(String location) throws IOException {
final String BaseURL = "http://weather.service.msn.com/find.aspx?outputview=search&weasearchstr=";
// We let the projector fetch the data for us
WeatherData weatherData = new XBProjector().io().url(BaseURL + location).read(WeatherData.class);
// Print some values
System.out.println("The weather in " + weatherData.getLocation() + ":");
System.out.println(weatherData.getSkytext());
System.out.println("Temperature: " + weatherData.getTemperature() + "°"
+ weatherData.getDegreeType());
// Access our sub projection
Coordinates coordinates = weatherData.getCoordinates();
System.out.println("The place is located at " + coordinates.getLatitude() + ","
+ coordinates.getLongitude());
}
This works even for creating XML, the XPath expressions can be writable.
SAX parser is working differently with a DOM parser, it neither load any XML document into memory nor create any object representation of the XML document. Instead, the SAX parser use callback function org.xml.sax.helpers.DefaultHandler to informs clients of the XML document structure.
SAX Parser is faster and uses less memory than DOM parser.
See following SAX callback methods :
startDocument() and endDocument() – Method called at the start and end of an XML document.
startElement() and endElement() – Method called at the start and end of a document element.
characters() – Method called with the text contents in between the start and end tags of an XML document element.
XML file
Create a simple XML file.
<?xml version="1.0"?>
<company>
<staff>
<firstname>yong</firstname>
<lastname>mook kim</lastname>
<nickname>mkyong</nickname>
<salary>100000</salary>
</staff>
<staff>
<firstname>low</firstname>
<lastname>yin fong</lastname>
<nickname>fong fong</nickname>
<salary>200000</salary>
</staff>
</company>
XML parser:
Java file Use SAX parser to parse the XML file.
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXMLFile {
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bfname = false;
boolean blname = false;
boolean bnname = false;
boolean bsalary = false;
public void startElement(String uri, String localName,String qName,
Attributes attributes) throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("FIRSTNAME")) {
bfname = true;
}
if (qName.equalsIgnoreCase("LASTNAME")) {
blname = true;
}
if (qName.equalsIgnoreCase("NICKNAME")) {
bnname = true;
}
if (qName.equalsIgnoreCase("SALARY")) {
bsalary = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length) throws SAXException {
if (bfname) {
System.out.println("First Name : " + new String(ch, start, length));
bfname = false;
}
if (blname) {
System.out.println("Last Name : " + new String(ch, start, length));
blname = false;
}
if (bnname) {
System.out.println("Nick Name : " + new String(ch, start, length));
bnname = false;
}
if (bsalary) {
System.out.println("Salary : " + new String(ch, start, length));
bsalary = false;
}
}
};
saxParser.parse("c:\\file.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
Result
Start Element :company
Start Element :staff
Start Element :firstname
First Name : yong
End Element :firstname
Start Element :lastname
Last Name : mook kim
End Element :lastname
Start Element :nickname
Nick Name : mkyong
End Element :nickname
and so on...
Source(MyKong) - http://www.mkyong.com/java/how-to-read-xml-file-in-java-sax-parser/

Java extracting every HTML content in a String list

I m actually making my first RSS reader with JAVA android and I have a problem.
In fact, I get some RSS informations, but there are HTML tags all around.
What I need is to extract every HTML content in these tags and put them in a string list, but I dont know how to do that.
Can you help me with this ?
Thanks for advance
If your rss is xml format, you will need dom4j.jar
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class test {
public static void main(String[] args) throws Exception {
String rssUrl = ""; // paste url here
List<RssDocument> docList = new ArrayList<RssDocument>();
try
{
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(rssUrl);
Element channel = (Element) document.getRootElement().element("channel");
for (Iterator i = channel.elementIterator("item"); i.hasNext();)
{
Element element = (Element) i.next();
String title = element.elementText("title");
String pubDate = element.elementText("pubDate");
String description = element.elementText("description");
RssDocument doc = new RssDocument(title, pubDate, description);
docList.add(doc);
}
}
catch (Exception e)
{
e.printStackTrace();
}
// do something with docList
}
public static class RssDocument {
String title;
String pubDate;
String description;
RssDocument(String title, String pubDate, String description) {
this.title = title;
this.pubDate = pubDate;
this.description = description;
}
}
}
Paste your rss url into variable "rssUrl", and run this main. You will get a list of RSS document, which contains title, published date and description.
If what you need is only the title and description of every rss item, use the following codes.
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class test {
public static void main(String[] args) throws Exception {
String rssUrl = ""; // paste url here
List<String> strList = new ArrayList<String>();
try
{
SAXReader saxReader = new SAXReader();
Document document = saxReader.read(rssUrl);
Element channel = (Element) document.getRootElement().element("channel");
for (Iterator i = channel.elementIterator("item"); i.hasNext();)
{
Element element = (Element) i.next();
String title = element.elementText("title").replaceAll("\\<.*?>","");
String description = element.elementText("description").replaceAll("\\<.*?>","");
strList.add(title + " " + description);
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
Then strList will be the list of string, which contains title and description.
For example:
{
"title1 description1"
"title2 description2"
"title3 description3"
}
Assume you have a html content called htmlString, you can clean that with regular expressions.
String htmlString = "<tr><td>12345</td></tr>";
String noHTMLString = htmlString.replaceAll("\\<.*?>","");
This should extract a list of all contents between html tags into the list called matches. You should modify the regex in brackets to match your content. The current version only matches text containing digits, letters, dots, commas, brackets, minuses and spaces.
Pattern pattern = Pattern.compile("<\\w+>([\\w\\s\\.,\\-\\(\\)]+)</\\w+>");
Matcher matcher = pattern.matcher(content);
List<String> matches = new ArrayList<String>();
while(matcher.find()){
matches.add(matcher.group(1));
}

Getting Parent Child Hierarchy in Sax XML parser

I'm using SAX (Simple API for XML) to parse an XML document. I'm getting output for all the tags the file have, but i want it to show the tags in parent child hierarchy.
For Example:
This is my output
<dblp>
<www>
<author>
</author><title>
</title><url>
</url><year>
</year></www><inproceedings>
<month>
</month><pages>
</pages><booktitle>
</booktitle><note>
</note><cdrom>
</cdrom></inproceedings><article>
<journal>
</journal><volume>
</volume></article><ee>
</ee><book>
<publisher>
</publisher><isbn>
</isbn></book><incollection>
<crossref>
</crossref></incollection><editor>
</editor><series>
</series></dblp>
But i want it to display the output like this (it displays the children with extra spacing (that's how i want it to be))
<dblp>
<www>
<author>
</author>
<title>
</title>
<url>
</url>
<year>
</year>
</www>
<inproceedings>
<month>
</month>
<pages>
</pages>
<booktitle>
</booktitle>
<note>
</note>
<cdrom>
</cdrom>
</inproceedings>
<article>
<journal>
</journal>
<volume>
</volume>
</article>
<ee>
</ee>
<book>
<publisher>
</publisher>
<isbn>
</isbn>
</book>
<incollection>
<crossref>
</crossref>
</incollection>
<editor>
</editor>
<series>
</series>
</dblp>
But i can't figure out how can i detect that parser is parsing a parent tag or a children.
here is my code:
package com.teamincredibles.sax;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class Parser extends DefaultHandler {
public void getXml() {
try {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxParserFactory.newSAXParser();
final MySet openingTagList = new MySet();
final MySet closingTagList = new MySet();
DefaultHandler defaultHandler = new DefaultHandler() {
public void startDocument() throws SAXException {
System.out.println("Starting Parsing...\n");
}
public void endDocument() throws SAXException {
System.out.print("\n\nDone Parsing!");
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (!openingTagList.contains(qName)) {
openingTagList.add(qName);
System.out.print("<" + qName + ">\n");
}
}
public void characters(char ch[], int start, int length)
throws SAXException {
/*for(int i=start; i<(start+length);i++){
System.out.print(ch[i]);
}*/
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (!closingTagList.contains(qName)) {
closingTagList.add(qName);
System.out.print("</" + qName + ">");
}
}
};
saxParser.parse("xml/sample.xml", defaultHandler);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
Parser readXml = new Parser();
readXml.getXml();
}
}
You can consider a StAX implementation:
package be.duo.stax;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
public class StaxExample {
public void getXml() {
InputStream is = null;
try {
is = new FileInputStream("c:\\dev\\sample.xml");
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLStreamReader reader = inputFactory.createXMLStreamReader(is);
parse(reader, 0);
} catch(Exception ex) {
System.out.println(ex.getMessage());
} finally {
if(is != null) {
try {
is.close();
} catch(IOException ioe) {
System.out.println(ioe.getMessage());
}
}
}
}
private void parse(XMLStreamReader reader, int depth) throws XMLStreamException {
while(true) {
if(reader.hasNext()) {
switch(reader.next()) {
case XMLStreamConstants.START_ELEMENT:
writeBeginTag(reader.getLocalName(), depth);
parse(reader, depth+1);
break;
case XMLStreamConstants.END_ELEMENT:
writeEndTag(reader.getLocalName(), depth-1);
return;
}
}
}
}
private void writeBeginTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("<" + tag + ">");
}
private void writeEndTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("</" + tag + ">");
}
public static void main(String[] args) {
StaxExample app = new StaxExample();
app.getXml();
}
}
There is an idiom for StAX with a loop like this for every tag in the XML:
private MyTagObject parseMyTag(XMLStreamReader reader, String myTag) throws XMLStreamException {
MyTagObject myTagObject = new MyTagObject();
while (true) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT:
String localName = reader.getLocalName();
if(localName.equals("myOtherTag1")) {
myTagObject.setMyOtherTag1(parseMyOtherTag1(reader, localName));
} else if(localName.equals("myOtherTag2")) {
myTagObject.setMyOtherTag2(parseMyOtherTag2(reader, localName));
}
// and so on
break;
case XMLStreamConstants.END_ELEMENT:
if(reader.getLocalName().equals(myTag) {
return myTagObject;
}
break;
}
}
well what have you tried? you should use a transformer found here: How to pretty print XML from Java?
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
//initialize StreamResult with File object to save to file
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(doc);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
System.out.println(xmlString);
Almost any useful SAX application needs to maintain a stack. When startElement is called, you push information to the stack, when endElement is called, you pop the stack. Exactly what you put on the stack depends on the application; it's often the element name. For your application, you don't actually need a full stack, you only need to know its depth. You could get by with maintaining this using depth++ in startElement and depth-- in endElement(). Then you just output depth spaces before the element name.

How do I parse my simple XML file with Java and SAX?

I am trying to parse the file below. I want to print the id and name of each passenger. Can you give me code to parse it ?
<?xml version="1.0" encoding="utf-8"?>
<root xmlns:android="www.google.com">
<passenger id = "001">
<name>Tom Cruise</name>
</passenger>
<passenger id = "002">
<name>Tom Hanks</name>
</passenger>
</root>
UPDATE
This is what i had tried. Code, problems etc mentioned here -
Error in output of a simple SAX parser
Here is a working example to start with, though I suggest you to use StAX instead, you will see that SAX is not very convenient
import java.io.File;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAX2 {
public static void main(String[] args) throws Exception {
SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
parser.parse(new File("test.xml"), new DefaultHandler() {
#Override
public void startElement(String uri, String localName,
String qName, Attributes atts) throws SAXException {
if (qName.equals("passenger")) {
System.out.println("id = " + atts.getValue(0));
}
}
#Override
public void endElement(String uri, String localName, String qName)
throws SAXException {
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
String text = new String(ch, start, length);
if (!text.trim().isEmpty()) {
System.out.println("name " + text);
}
}
});
}
}
output
id = 001
name Tom Cruise
id = 002
name Tom Hanks
Create a DocumentBuilderFactory.
Obtain a DocumentBuilder from the factory.
Use one of the parse() methods of the builder to create a Document.
Once you have a Document, you can get the passenger Elements with Document's getElementsByTagName() method.
I'm sure you'll be able to work out the rest.
SAXParserFactory factory = SAXParserFactory.newInstance();
try {
InputStream xmlInput = new FileInputStream("theFile.xml");
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new SaxHandler();
saxParser.parse(xmlInput, handler);
} catch (Throwable err) {
err.printStackTrace ();
}
String str = "<?xml version=\"1.0\" encoding=\"utf-8\"?> " +
"<root xmlns:android=\"www.google.com\">" +
"<passenger id = \"001\">" +
"<name>Tom Cruise</name>" +
"</passenger>" +
"<passenger id = \"002\">" +
"<name>Tom Hanks</name>" +
"</passenger>" +
"</root>";
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(str));
final Document document = db.parse(is);
System.out.println("node Name " + document.getChildNodes().item(0).getChildNodes().item(1).getNodeName());

Categories