I'm using SAX (Simple API for XML) to parse an XML document. I'm getting output for all the tags the file have, but i want it to show the tags in parent child hierarchy.
For Example:
This is my output
<dblp>
<www>
<author>
</author><title>
</title><url>
</url><year>
</year></www><inproceedings>
<month>
</month><pages>
</pages><booktitle>
</booktitle><note>
</note><cdrom>
</cdrom></inproceedings><article>
<journal>
</journal><volume>
</volume></article><ee>
</ee><book>
<publisher>
</publisher><isbn>
</isbn></book><incollection>
<crossref>
</crossref></incollection><editor>
</editor><series>
</series></dblp>
But i want it to display the output like this (it displays the children with extra spacing (that's how i want it to be))
<dblp>
<www>
<author>
</author>
<title>
</title>
<url>
</url>
<year>
</year>
</www>
<inproceedings>
<month>
</month>
<pages>
</pages>
<booktitle>
</booktitle>
<note>
</note>
<cdrom>
</cdrom>
</inproceedings>
<article>
<journal>
</journal>
<volume>
</volume>
</article>
<ee>
</ee>
<book>
<publisher>
</publisher>
<isbn>
</isbn>
</book>
<incollection>
<crossref>
</crossref>
</incollection>
<editor>
</editor>
<series>
</series>
</dblp>
But i can't figure out how can i detect that parser is parsing a parent tag or a children.
here is my code:
package com.teamincredibles.sax;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class Parser extends DefaultHandler {
public void getXml() {
try {
SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
SAXParser saxParser = saxParserFactory.newSAXParser();
final MySet openingTagList = new MySet();
final MySet closingTagList = new MySet();
DefaultHandler defaultHandler = new DefaultHandler() {
public void startDocument() throws SAXException {
System.out.println("Starting Parsing...\n");
}
public void endDocument() throws SAXException {
System.out.print("\n\nDone Parsing!");
}
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if (!openingTagList.contains(qName)) {
openingTagList.add(qName);
System.out.print("<" + qName + ">\n");
}
}
public void characters(char ch[], int start, int length)
throws SAXException {
/*for(int i=start; i<(start+length);i++){
System.out.print(ch[i]);
}*/
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if (!closingTagList.contains(qName)) {
closingTagList.add(qName);
System.out.print("</" + qName + ">");
}
}
};
saxParser.parse("xml/sample.xml", defaultHandler);
} catch (Exception e) {
e.printStackTrace();
}
}
public static void main(String args[]) {
Parser readXml = new Parser();
readXml.getXml();
}
}
You can consider a StAX implementation:
package be.duo.stax;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
public class StaxExample {
public void getXml() {
InputStream is = null;
try {
is = new FileInputStream("c:\\dev\\sample.xml");
XMLInputFactory inputFactory = XMLInputFactory.newInstance();
XMLStreamReader reader = inputFactory.createXMLStreamReader(is);
parse(reader, 0);
} catch(Exception ex) {
System.out.println(ex.getMessage());
} finally {
if(is != null) {
try {
is.close();
} catch(IOException ioe) {
System.out.println(ioe.getMessage());
}
}
}
}
private void parse(XMLStreamReader reader, int depth) throws XMLStreamException {
while(true) {
if(reader.hasNext()) {
switch(reader.next()) {
case XMLStreamConstants.START_ELEMENT:
writeBeginTag(reader.getLocalName(), depth);
parse(reader, depth+1);
break;
case XMLStreamConstants.END_ELEMENT:
writeEndTag(reader.getLocalName(), depth-1);
return;
}
}
}
}
private void writeBeginTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("<" + tag + ">");
}
private void writeEndTag(String tag, int depth) {
for(int i = 0; i < depth; i++) {
System.out.print(" ");
}
System.out.println("</" + tag + ">");
}
public static void main(String[] args) {
StaxExample app = new StaxExample();
app.getXml();
}
}
There is an idiom for StAX with a loop like this for every tag in the XML:
private MyTagObject parseMyTag(XMLStreamReader reader, String myTag) throws XMLStreamException {
MyTagObject myTagObject = new MyTagObject();
while (true) {
switch (reader.next()) {
case XMLStreamConstants.START_ELEMENT:
String localName = reader.getLocalName();
if(localName.equals("myOtherTag1")) {
myTagObject.setMyOtherTag1(parseMyOtherTag1(reader, localName));
} else if(localName.equals("myOtherTag2")) {
myTagObject.setMyOtherTag2(parseMyOtherTag2(reader, localName));
}
// and so on
break;
case XMLStreamConstants.END_ELEMENT:
if(reader.getLocalName().equals(myTag) {
return myTagObject;
}
break;
}
}
well what have you tried? you should use a transformer found here: How to pretty print XML from Java?
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.setOutputProperty(OutputKeys.INDENT, "yes");
//initialize StreamResult with File object to save to file
StreamResult result = new StreamResult(new StringWriter());
DOMSource source = new DOMSource(doc);
transformer.transform(source, result);
String xmlString = result.getWriter().toString();
System.out.println(xmlString);
Almost any useful SAX application needs to maintain a stack. When startElement is called, you push information to the stack, when endElement is called, you pop the stack. Exactly what you put on the stack depends on the application; it's often the element name. For your application, you don't actually need a full stack, you only need to know its depth. You could get by with maintaining this using depth++ in startElement and depth-- in endElement(). Then you just output depth spaces before the element name.
Related
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.jdom.Document;
import org.jdom.Element;
import org.jdom.JDOMException;
import org.jdom.input.SAXBuilder;
public class ReadXMLFile {
public static void main(String[] args) {
SAXBuilder builder = new SAXBuilder();
File xmlFile = new File("c:\\test.xml");
try {
Document document = (Document) builder.build(xmlFile);
Element rootNode = document.getRootElement();
List list = rootNode.getChildren("raum");
for (int i = 0; i < list.size(); i++) {
Element node = (Element) list.get(i);
System.out.println("ID : " + node.getChildText("ID"));
}
} catch (IOException io) {
System.out.println(io.getMessage());
} catch (JDOMException jdomex) {
System.out.println(jdomex.getMessage());
}
}
}
I don't understand how the step in between has to look like in order to insert the imported coordinates into the polygon.. Maybe someone can help me with this?
You can follow any sample JDOM parser example and do it.
For example, this explains how to read the xml and take the data in a list and iterate over it. Just follow the steps and understand what you are doing, you can easily get it done.
For the sake of completeness?
This is how to achieve it using SAX parser.
Note that it is not clear to me, from your question, which Polygon you are referring to. I presume it is a java class. It can't be java.awt.Polygon because its points are all int whereas your sample XML file contains only double values. The only other class I thought of was javafx.scene.shape.Polygon that contains an array of points where each point is a double. Hence in the below code, I create an instance of javafx.scene.shape.Polygon.
For the situation you describe in your question, I don't see the point (no pun intended) in loading the entire DOM tree into memory. You simply need to create a point every time you encounter a x and a y coordinate in the XML file and add those coordinates to a collection of points.
Here is the code. Note that I created an XML file named polygon0.xml that contains the entire XML from your question. Also note that you can extend class org.xml.sax.helpers.DefaultHandler rather than implement interface ContentHandler.
import java.io.FileReader;
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import javafx.scene.shape.Polygon;
public class Polygons implements ContentHandler {
private boolean isX;
private boolean isY;
private Polygon polygon;
/* Start 'ContentHandler' interface methods. */
#Override // org.xml.sax.ContentHandler
public void setDocumentLocator(Locator locator) {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void startDocument() throws SAXException {
polygon = new Polygon();
}
#Override // org.xml.sax.ContentHandler
public void endDocument() throws SAXException {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void startPrefixMapping(String prefix, String uri) throws SAXException {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void endPrefixMapping(String prefix) throws SAXException {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
isX = "x".equals(qName);
isY = "y".equals(qName);
}
#Override // org.xml.sax.ContentHandler
public void endElement(String uri, String localName, String qName) throws SAXException {
if (isX) {
isX = false;
}
if (isY) {
isY = false;
}
}
#Override // org.xml.sax.ContentHandler
public void characters(char[] ch, int start, int length) throws SAXException {
if (isX || isY) {
StringBuilder sb = new StringBuilder(length);
int end = start + length;
for (int i = start; i < end; i++) {
sb.append(ch[i]);
}
polygon.getPoints().add(Double.parseDouble(sb.toString()));
}
}
#Override // org.xml.sax.ContentHandler
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void processingInstruction(String target, String data) throws SAXException {
// Do nothing.
}
#Override // org.xml.sax.ContentHandler
public void skippedEntity(String name) throws SAXException {
// Do nothing.
}
/* End 'ContentHandler' interface methods. */
public static void main(String[] args) {
Polygons instance = new Polygons();
Path path = Paths.get("polygon0.xml");
SAXParserFactory spf = SAXParserFactory.newInstance();
try (FileReader reader = new FileReader(path.toFile())) { // throws java.io.IOException
SAXParser saxParser = spf.newSAXParser(); // throws javax.xml.parsers.ParserConfigurationException , org.xml.sax.SAXException
XMLReader xmlReader = saxParser.getXMLReader(); // throws org.xml.sax.SAXException
xmlReader.setContentHandler(instance);
InputSource input = new InputSource(reader);
xmlReader.parse(input);
System.out.println(instance.polygon);
}
catch (IOException |
ParserConfigurationException |
SAXException x) {
x.printStackTrace();
}
}
}
Here is the output from running the above code:
Polygon[points=[400.3, 997.2, 400.3, 833.1, 509.9, 833.1, 509.9, 700.0, 242.2, 700.0, 242.2, 600.1, 111.1, 600.1, 111.1, 300.0, 300.0, 300.0, 300.0, 420.0, 600.5, 420.0, 600.5, 101.9, 717.8, 101.9, 717.8, 200.0, 876.5, 200.0, 876.5, 500.8, 1012.1, 500.8, 1012.1, 900.2, 902.0, 900.2, 902.0, 997.2], fill=0x000000ff]
EDIT
As requested, by OP, here is an implementation using JDOM (version 2.0.6)
import java.io.IOException;
import java.nio.file.Path;
import java.nio.file.Paths;
import org.jdom2.Document;
import org.jdom2.Element;
import org.jdom2.JDOMException;
import org.jdom2.filter.ElementFilter;
import org.jdom2.input.SAXBuilder;
import org.jdom2.util.IteratorIterable;
import javafx.scene.shape.Polygon;
public class Polygon2 {
public static void main(String[] args) {
Polygon polygon = new Polygon();
Path path = Paths.get("polygon0.xml");
SAXBuilder builder = new SAXBuilder();
try {
Document jdomDoc = builder.build(path.toFile()); // throws java.io.IOException , org.jdom2.JDOMException
Element root = jdomDoc.getRootElement();
IteratorIterable<Element> iter = root.getDescendants(new ElementFilter("edge"));
while (iter.hasNext()) {
Element elem = iter.next();
Element childX = elem.getChild("x");
polygon.getPoints().add(Double.parseDouble(childX.getText()));
Element childY = elem.getChild("y");
polygon.getPoints().add(Double.parseDouble(childY.getText()));
}
}
catch (IOException | JDOMException x) {
x.printStackTrace();
}
System.out.println(polygon);
}
}
You can read XML files DOM parser library check this article.
I assume you are working on a Desktop application so you might want to use FileChooser for file selection. Here is an example of this.
Also, I think you would need to make some structural changes (for convinience) to your XML file so that it would have something like this:
<xpoints>
<x>5<x/>
...
</xpoints>
<ypoints>
<y>5<y/>
...
</ypoints>
But for existing structure doing something like this would be enogh:
File file = new File("file");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
Document doc = db.parse(file);
doc.getDocumentElement().normalize();
NodeList nodeList = doc.getElementsByTagName("edge");
// you can iterate over all edges
for (int itr = 0; itr < nodeList.getLength(); itr++)
{
Node node = nodeList.item(itr);
if (node.getNodeType() == Node.ELEMENT_NODE)
{
Element eElement = (Element) node;
//then you can access values, for example, to pass them to an array
array.add(eElement.getElementsByTagName("x").item(0).getTextContent()));
}
}
I'm reading XML file using SAX parser utility.
Here is my sample XML
<?xml version="1.0"?><company><Account AccountNumber="100"><staff><firstname>yong</firstname><firstname>jin</firstname></staff></Account></company>
Here is the code
import java.util.Arrays;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXML {
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bAccount = false;
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("ACCOUNT")) {
bAccount = true;
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char[] ch, int start, int length) throws SAXException {
System.out.println("Im here:" + bAccount);
if (bAccount) {
System.out.println("Account First Name : " + new String(ch, start, length));
bAccount = false;
StringBuilder Account = new StringBuilder();
for (int i = start; i < ch.length - 1; i--) {
if (String.valueOf(ch[i]).equals("<")) {
System.out.println("Account:" +Account);
break;
} else {
Account.append(ch[i]);
}
}
}
}
};
saxParser.parse("C:\\Lenny\\Work\\XML\\Out_SaxParsing_01.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
As you can see in XML, Account tag is something like this Account AccountNumber="100", What I want to do is, I want to capture Tag too as well.
So to achieve that, in characters method, I'm trying to read the array from right to left, So that I could get the Account AccountNumber="100", when Account encountered as event.
But am not able to reach there, The event is getting generated, but its not going to characters method. I think it should go into characters method once Account tag is encountered. But its not..!
May I know please what am missing or doing wrong ?
Any Help please..!
AccountNumber="100" is an attribute of the Account element so inside the startElement handler you have you can read out the attributes parameter to access that value.
I am trying to parse attached xml(Please find attachment) file.
xml document is as given below.check the attachment 1 and 2
sample data of xml file
In order to parse this xml, I used SAX parser. program is as follows.
package com.dom;
import java.io.File;
import java.io.IOException;
import java.util.Enumeration;
import java.util.Hashtable;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
public class DemoXML {
File file;
SAXParserFactory factory;
SAXParser saxParser;
UserHandler handler;
public void loadXML()
{
file = new File("E:/fifthWorkbenchProjects/XMLUtility/src/input/FIXBOND.xml");
System.out.println(file.exists());
}
public void readXML()
{
factory = SAXParserFactory.newInstance();
try {
saxParser = factory.newSAXParser();
handler = new UserHandler();
try {
saxParser.parse(file,handler);
} catch (IOException e) {
e.printStackTrace();
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
}
}
public static void main(String args[])
{
DemoXML ob = new DemoXML();
ob.loadXML();
ob.readXML();
}
}
class UserHandler extends DefaultHandler
{
Hashtable tags;
#Override
public void startDocument()
{
System.out.println("Document started");
tags = new Hashtable();
}
#Override
public void endDocument()
{
System.out.println("Documents ended");
}
#Override
public void startElement(String namespaceURI,String localName,String qname,Attributes atts) throws SAXException
{
// System.out.println("Element started");
// if(qname.equals("Currency"))
System.out.print(qname+"-->");
}
#Override
public void endElement(String uri,String localName, String qname)
{
}
#Override
public void characters(char[] ch, int start, int length)
{
String str = new String(ch,start,length);
System.out.println(str);
System.out.println();
}
}
I get output in following manner.
true
Document started
FIgovcorpagncy-->InstrumentDescription-->InstrumentType-->FI GOVCORPAGNCY
InstrumentSubType-->FIXDBOND
InstrumentName-->QUEENSNR 0% 07/06/2016
InstrumentDescription-->QUEENSNR 0% 07/06/2016
Currency-->GBP
InstrumentStatus-->ACTIVE
AmountOutstanding-->48384375
AmtOutstandingDate-->2012-06-27T00:00:00.000
PrincipalExchange-->N
CountryOfRisk-->GB
InstrumentCompleteness-->50
CapitalRanking-->1
AtIssuance-->IssueDate-->2012-06-27T00:00:00.000
OriginalIssueAmount-->48384375
PrivatePlacementFlag-->Y
MinimumDenomination-->1000
MinimumIncrement-->0.01
and so on ....I am able to access all nodes but observe one thing over here for first element in tree,complete element address is printed like
FIgovcorpagncy-->InstrumentDescription-->InstrumentType-->FI GOVCORPAGNCY
then for rest of the elements in tree, it prints tag name and corresponding value like
InstrumentSubType-->FIXDBOND
InstrumentName-->QUEENSNR 0% 07/06/2016
InstrumentDescription-->QUEENSNR 0% 07/06/2016
Currency-->GBP
InstrumentStatus-->ACTIVE
AmountOutstanding-->48384375
so on....
here my requirement is I want to print these elements also with full hierarchic manner as the first element.
how to go about it?
class UserHandler extends DefaultHandler
{
List li_elements,li_values;
LinkedHashMap<List<String>,List<String>> hm;
boolean endElementFlag;
#Override
public void startDocument()
{
System.out.println("Document started");
li_elements = new ArrayList<String>();
li_values=new ArrayList<String>();
}
#Override
public void endDocument()
{
System.out.println("Documents ended"+hm.size());
for(Map.Entry m:hm.entrySet())
{
System.out.println(m.getKey()+""+m.getValue());
}
}
#Override
public void startElement(String namespaceURI,String localName,String qname,Attributes atts) throws SAXException
{
li_elements.add(qname);
//System.out.println("Element Started");
//System.out.println(qname+" added in element list");
}
#Override
public void endElement(String uri,String localName, String qname)
{
if(!li_values.isEmpty())
{
System.out.println("Element address list:-"+li_elements+"and Corresponding Value:-"+li_values);
System.out.println();
}
li_elements.remove(li_elements.size()-1);
li_values.clear();
}
#Override
public void characters(char[] ch, int start, int length)
{
String str = new String(ch,start,length);
li_values.add(str);
}
}
I was expecting something like this. this prints the output in a format that I was hoping for.
I am new to JAVA programming, now I in need of JAVA program to read a big XML file that containing .. tags. Sample input as follows.
Input.xml
<row>
<Name>Filename1</Name>
</row>
<row>
<Name>Filename2</Name>
</row>
<row>
<Name>Filename3</Name>
</row>
<row>
<Name>Filename4</Name>
</row>
<row>
<Name>Filename5</Name>
</row>
<row>
<Name>Filename6</Name>
</row>
.
.
I need output as first <row> </row> as a single .xml file with filename as filename1.xml
and second <row>..</row> as filename2.xml and so.
Can anyone tell the steps how to do it in simple way with Java, it will be very useful if you give any sample codes ?
You could do the following with StAX because you said your xml is large
Code for Your Use Case
The following code uses StAX APIs to break up the document as outlined in your question:
import java.io.*;
import java.util.*;
import javax.xml.namespace.QName;
import javax.xml.stream.*;
import javax.xml.stream.events.*;
public class Demo {
public static void main(String[] args) throws Exception {
Demo demo = new Demo();
demo.split("src/forum7408938/input.xml", "nickname");
//demo.split("src/forum7408938/input.xml", null);
}
private void split(String xmlResource, String condition) throws Exception {
XMLEventFactory xef = XMLEventFactory.newFactory();
XMLInputFactory xif = XMLInputFactory.newInstance();
XMLEventReader xer = xif.createXMLEventReader(new FileReader(xmlResource));
StartElement rootStartElement = xer.nextTag().asStartElement(); // Advance to statements element
StartDocument startDocument = xef.createStartDocument();
EndDocument endDocument = xef.createEndDocument();
XMLOutputFactory xof = XMLOutputFactory.newFactory();
while(xer.hasNext() && !xer.peek().isEndDocument()) {
boolean metCondition;
XMLEvent xmlEvent = xer.nextTag();
if(!xmlEvent.isStartElement()) {
break;
}
// Be able to split XML file into n parts with x split elements(from
// the dummy XML example staff is the split element).
StartElement breakStartElement = xmlEvent.asStartElement();
List<XMLEvent> cachedXMLEvents = new ArrayList<XMLEvent>();
// BOUNTY CRITERIA
// I'd like to be able to specify condition that must be in the
// split element i.e. I want only staff which have nickname, I want
// to discard those without nicknames. But be able to also split
// without conditions while running split without conditions.
if(null == condition) {
cachedXMLEvents.add(breakStartElement);
metCondition = true;
} else {
cachedXMLEvents.add(breakStartElement);
xmlEvent = xer.nextEvent();
metCondition = false;
while(!(xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().equals(breakStartElement.getName()))) {
cachedXMLEvents.add(xmlEvent);
if(xmlEvent.isStartElement() && xmlEvent.asStartElement().getName().getLocalPart().equals(condition)) {
metCondition = true;
break;
}
xmlEvent = xer.nextEvent();
}
}
if(metCondition) {
// Create a file for the fragment, the name is derived from the value of the id attribute
FileWriter fileWriter = null;
fileWriter = new FileWriter("src/forum7408938/" + breakStartElement.getAttributeByName(new QName("id")).getValue() + ".xml");
// A StAX XMLEventWriter will be used to write the XML fragment
XMLEventWriter xew = xof.createXMLEventWriter(fileWriter);
xew.add(startDocument);
// BOUNTY CRITERIA
// The content of the spitted files should be wrapped in the
// root element from the original file(like in the dummy example
// company)
xew.add(rootStartElement);
// Write the XMLEvents that were cached while when we were
// checking the fragment to see if it matched our criteria.
for(XMLEvent cachedEvent : cachedXMLEvents) {
xew.add(cachedEvent);
}
// Write the XMLEvents that we still need to parse from this
// fragment
xmlEvent = xer.nextEvent();
while(xer.hasNext() && !(xmlEvent.isEndElement() && xmlEvent.asEndElement().getName().equals(breakStartElement.getName()))) {
xew.add(xmlEvent);
xmlEvent = xer.nextEvent();
}
xew.add(xmlEvent);
// Close everything we opened
xew.add(xef.createEndElement(rootStartElement.getName(), null));
xew.add(endDocument);
fileWriter.close();
}
}
}
}
I can suggest using SAXParser and extending the DefaultHandler class' methods.
You can use a few booleans to keep a track of which tag you are in.
DefaultHandler will let you know when you are in a particular tag by the startElement() method. Then, you will be given the contents of the tag by the characters() method and finally you will be notified of the end of a tag by the endElement() method.
As soon as you are notified of the end of a <row>, you can get the contents of the tag you just saved and create a file out of it.
Looking at your example, you just need a couple of boolean values -- boolean inRow and boolean inName so this should not be a hard task =)
Example from Mykong (I am leaving out the actual code, you must do it on your own. It is fairly trivial):
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXMLFile {
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bfname = false;
boolean blname = false;
boolean bnname = false;
boolean bsalary = false;
public void startElement(String uri, String localName,String qName,
Attributes attributes) throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("FIRSTNAME")) {
bfname = true;
}
if (qName.equalsIgnoreCase("LASTNAME")) {
blname = true;
}
if (qName.equalsIgnoreCase("NICKNAME")) {
bnname = true;
}
if (qName.equalsIgnoreCase("SALARY")) {
bsalary = true;
}
}
public void endElement(String uri, String localName,
String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char ch[], int start, int length) throws SAXException {
if (bfname) {
System.out.println("First Name : " + new String(ch, start, length));
bfname = false;
}
if (blname) {
System.out.println("Last Name : " + new String(ch, start, length));
blname = false;
}
if (bnname) {
System.out.println("Nick Name : " + new String(ch, start, length));
bnname = false;
}
if (bsalary) {
System.out.println("Salary : " + new String(ch, start, length));
bsalary = false;
}
}
};
saxParser.parse("c:\\file.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
The best approach is JAXB MArshal and unmarshaller to read and create xml fils.
Here is example
Assuming that your file have element that contains those rows:
<root>
<row><Name>Filename1</Name></row>
<row><Name>Filename2</Name></row>
<row><Name>Filename3</Name></row>
<row><Name>Filename4</Name></row>
<row><Name>Filename5</Name></row>
<row><Name>Filename6</Name></row>
</root>
This code will do the trick:
package com.example;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Main {
public static String readXmlFromFile(String fileName) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(fileName));
String line = null;
StringBuilder stringBuilder = new StringBuilder();
String lineSeparator = System.getProperty("line.separator");
while ((line = reader.readLine()) != null) {
stringBuilder.append(line);
stringBuilder.append(lineSeparator);
}
return stringBuilder.toString();
}
public static List<String> divideXmlByTag(String xml, String tag) throws Exception {
List<String> list = new ArrayList<String>();
Document document = loadXmlDocument(xml);
TransformerFactory transformerFactory = TransformerFactory.newInstance();
Transformer transformer = transformerFactory.newTransformer();
transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
NodeList rowList = document.getElementsByTagName(tag);
for(int i=0; i<rowList.getLength(); i++) {
Node rowNode = rowList.item(i);
if (rowNode.getNodeType() == Node.ELEMENT_NODE) {
DOMSource source = new DOMSource(rowNode);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
StreamResult streamResult = new StreamResult(baos);
transformer.transform(source, streamResult);
list.add(baos.toString());
}
}
return list;
}
private static Document loadXmlDocument(String xml) throws SAXException, IOException, ParserConfigurationException {
return loadXmlDocument(new ByteArrayInputStream(xml.getBytes()));
}
private static Document loadXmlDocument(InputStream inputStream) throws SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory documentBuilderFactory = DocumentBuilderFactory.newInstance();
documentBuilderFactory.setNamespaceAware(true);
DocumentBuilder documentBuilder = null;
documentBuilder = documentBuilderFactory.newDocumentBuilder();
Document document = documentBuilder.parse(inputStream);
inputStream.close();
return document;
}
public static void main(String[] args) throws Exception {
String xmlString = readXmlFromFile("d:/test.xml");
System.out.println("original xml:\n" + xmlString + "\n");
System.out.println("divided xml:\n");
List<String> dividedXmls = divideXmlByTag(xmlString, "row");
for (String xmlPart : dividedXmls) {
System.out.println(xmlPart + "\n");
}
}
}
You only need to write this xml parts to separates files.
Since the user requested one more solution posting other way.
use a StAX parser for this situation. It will prevent the entire document from being read into memory at one time.
Advance the XMLStreamReader to the local root element of the sub-fragment.
You can then use the javax.xml.transform APIs to produce a new document from this XML fragment. This will advance the XMLStreamReader to the end of that fragment.
Repeat step 1 for the next fragment.
Code Example
For the following XML, output each "statement" section into a file named after the "account attributes value":
<statements>
<statement account="123">
...stuff...
</statement>
<statement account="456">
...stuff...
</statement>
import java.io.File;
import java.io.FileReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stax.StAXSource;
import javax.xml.transform.stream.StreamResult;
public class Demo {
public static void main(String[] args) throws Exception {
XMLInputFactory xif = XMLInputFactory.newInstance();
XMLStreamReader xsr = xif.createXMLStreamReader(new FileReader("input.xml"));
xsr.nextTag(); // Advance to statements element
TransformerFactory tf = TransformerFactory.newInstance();
Transformer t = tf.newTransformer();
while(xsr.nextTag() == XMLStreamConstants.START_ELEMENT) {
File file = new File("out/" + xsr.getAttributeValue(null, "account") + ".xml");
t.transform(new StAXSource(xsr), new StreamResult(file));
}
}
}
If you're new to Java then the people recommending SAX and StAX parsing are throwing you in at the deep end! This is pretty low-level stuff, highly efficient, but not designed for beginners. You said the file is "big" and they've all assumed that to mean "very big", but in my experience an unquantified "big" can mean anything from 1Mb to 20Gb, so designing a solution based on that description is somewhat premature.
It's much easier to do this with XSLT 2.0 than with Java. All it takes is a stylesheet like this:
<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform" version="2.0">
<xsl:template match="row">
<xsl:result-document href="{FileName}">
<xsl:copy-of select="."/>
</xsl:result-document>
</xsl:template>
</xsl:stylesheet>
And if it has to be within a Java application, you can easily invoke the transformation from Java using an API.
Try out this,
import java.io.*;
import javax.xml.parsers.*;
import org.w3c.dom.*;
import org.xml.sax.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
public class Test{
static public void main(String[] arg) throws Exception{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse("foo.xml");
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
NodeList list = doc.getFirstChild().getChildNodes();
for (int i=0; i<list.getLength(); i++){
Node element = list.item(i).cloneNode(true);
if(element.hasChildNodes()){
Source src = new DOMSource(element);
FileOutputStream fs=new FileOutputStream("k" + i + ".xml");
Result dest = new StreamResult(fs);
aTransformer.transform(src, dest);
fs.close();
}
}
}
}
Source: Related Answer
I have a xml POST response which I receive as a string. I need the content of the particular "pnr" (see in xml) to pass it on to another GET request.
I am trying sax and stAX to achieve this but failing miserably.
I used getElementsByTagName and also getAttribute, but no go...
Here's my code and later the xml string that I receive.
Any kind of help will be a gift
package rest;
import javax.xml.parsers.*;
import org.xml.sax.InputSource;
import org.w3c.dom.*;
import java.io.*;
public class ParseXMLString {
public static void main(String arg[]) {
String outputString = RESTClient.postConfirm(); // this is the xml string response I am getting
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
DocumentBuilder db = dbf.newDocumentBuilder();
InputSource is = new InputSource();
is.setCharacterStream(new StringReader(outputString));
Document doc = db.parse(is);
String Parentnode = doc.getDocumentElement().getAttribute("pnr");
// Element element = (Element) Parentnode.;
// NodeList name = element.getElementsByTagName("pnr");
// Element line = (Element) name.item(0);
//String IDList = getCharacterDataFromElement(line);
System.out.println(Parentnode);
}
catch (Exception e) {
e.printStackTrace();
}
}
public static String getCharacterDataFromElement(Element e) {
Node child = e.getFirstChild();
if (child instanceof CharacterData) {
CharacterData cd = (CharacterData) child;
return cd.getData();
}
return "?";
}
}
And here is the the XML String I have received:
<?xml version="1.0" encoding="UTF-8"?><Ticket><bookedSeats>3</bookedSeats><bpAddress>Anand Rao Circle</bpAddress><bpLandMark>ganesha temple</bpLandMark><bpLocation> Ghousia College</bpLocation><bpPhoneNo>98798679769</bpPhoneNo><bpTime>1200</bpTime><busServiceName>efdf</busServiceName><busType>Volvo A/C Semi Sleeper (2+2)</busType><commission>66.19</commission><dateOfJourney>2012-10-05</dateOfJourney><destination>Chennai</destination><fare>600.0</fare><issueTime>2012-10-04T15:46:45.073+05:30</issueTime><noOfSeats>1</noOfSeats><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><pnr>RATPKES44974756</pnr><seatDetails><seatDetail><commission>66.19</commission><fare>600.0</fare><gender>MALE</gender><passengerAge>0</passengerAge><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><prime>false</prime><seatName>3</seatName></seatDetail></seatDetails><source>Bangalore</source><status>BOOKED</status><travelsName>Rajratan Travels</travelsName></Ticket>
Instead of SAX or StAX you could do the following with the javax.xml.xpath APIs in the JDK/JRE since Java SE 5:
Demo
import java.io.StringReader;
import javax.xml.xpath.*;
import org.xml.sax.InputSource;
public class Demo {
public static void main(String[] args) throws Exception {
String xml = "<?xml version='1.0' encoding='UTF-8'?><Ticket><bookedSeats>3</bookedSeats><bpAddress>Anand Rao Circle</bpAddress><bpLandMark>ganesha temple</bpLandMark><bpLocation> Ghousia College</bpLocation><bpPhoneNo>98798679769</bpPhoneNo><bpTime>1200</bpTime><busServiceName>efdf</busServiceName><busType>Volvo A/C Semi Sleeper (2+2)</busType><commission>66.19</commission><dateOfJourney>2012-10-05</dateOfJourney><destination>Chennai</destination><fare>600.0</fare><issueTime>2012-10-04T15:46:45.073+05:30</issueTime><noOfSeats>1</noOfSeats><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><pnr>RATPKES44974756</pnr><seatDetails><seatDetail><commission>66.19</commission><fare>600.0</fare><gender>MALE</gender><passengerAge>0</passengerAge><passengerMobile>1234567890</passengerMobile><passengerName>Test</passengerName><prime>false</prime><seatName>3</seatName></seatDetail></seatDetails><source>Bangalore</source><status>BOOKED</status><travelsName>Rajratan Travels</travelsName></Ticket>";
XPathFactory xpf = XPathFactory.newInstance();
XPath xpath = xpf.newXPath();
InputSource inputSource = new InputSource(new StringReader(xml));
String pnr = (String) xpath.evaluate("/Ticket/pnr", inputSource, XPathConstants.STRING);
System.out.println(pnr);
}
}
Output
RATPKES44974756
this bit of code will get you the pnr :
NodeList nodeLst = doc.getElementsByTagName("Ticket");
Node ticket = nodeLst.item(0);
NodeList attr = ticket.getChildNodes();
for (int i = 0; i < attr.getLength(); i++){
if (attr.item(i).getNodeName().equals("pnr"))
System.out.println(attr.item(i).getTextContent());
}
If I were to solve this problem, I'd probably use XPath. But since you specifically asked for StAX, here's an example parser (note that this is just skeleton code to get you started).
import java.io.StringReader;
import java.util.Iterator;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.EndElement;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
public class ParseXMLStringStAX {
private static final String PNR = "pnr";
private String characters;
public String parse(String xmlString) throws XMLStreamException, Exception {
XMLEventReader reader = null;
try {
if (xmlString == null || xmlString.isEmpty()) {
throw new IllegalArgumentException("Illegal initializiation (xmlString is null or empty)");
}
StringReader stringReader = new StringReader(xmlString);
XMLInputFactory inputFact = XMLInputFactory.newInstance();
XMLStreamReader streamReader = inputFact.createXMLStreamReader(stringReader);
reader = inputFact.createXMLEventReader(streamReader);
while (reader.hasNext()) {
XMLEvent event = reader.nextEvent();
if (event.isCharacters()) {
characters(event);
}
if (event.isStartElement()) {
startElement(event);
// handle attributes
Iterator<Attribute> attributes = event.asStartElement().getAttributes();
while(attributes.hasNext()) {
attribute(attributes.next());
}
}
if (event.isEndElement()) {
// found what we want?
if (endElement(event)) {
endDocument(null);
break;
}
}
if (event.isStartDocument()) {
startDocument(event);
}
if (event.isEndDocument()) {
endDocument(event);
}
}
} catch (XMLStreamException ex) {
throw ex;
} finally {
try {
if (reader != null) {
reader.close();
}
} catch (XMLStreamException ex) {
}
}
return characters;
}
private void attribute(XMLEvent event) throws Exception {
}
private void characters(XMLEvent event) throws Exception {
Characters asCharacters = event.asCharacters();
if (asCharacters.isWhiteSpace())
return;
if (characters == null) {
characters = asCharacters.getData();
} else {
characters += asCharacters.getData();
}
}
private void startElement(XMLEvent event) throws Exception {
StartElement startElement = event.asStartElement();
String name = startElement.getName().getLocalPart();
characters = null;
}
private boolean endElement(XMLEvent event) throws Exception {
EndElement endElement = event.asEndElement();
String name = endElement.getName().getLocalPart();
if (PNR.equals(name)) {
return true;
}
return false;
}
private void startDocument(XMLEvent event) {
System.out.println("Parsing started");
}
private void endDocument(XMLEvent event) {
System.out.println("Parsing ended");
}
public static void main(String[] argv) throws XMLStreamException, Exception {
String xml = "";
xml += "<Ticket>";
xml += " <bookedSeats>3</bookedSeats>";
xml += " <bpAddress>Anand Rao Circle</bpAddress>";
xml += " <bpLandMark>ganesha temple</bpLandMark>";
xml += " <bpLocation> Ghousia College</bpLocation>";
xml += " <bpPhoneNo>98798679769</bpPhoneNo>";
xml += " <bpTime>1200</bpTime>";
xml += " <busServiceName>efdf</busServiceName>";
xml += " <busType>Volvo A/C Semi Sleeper (2+2)</busType>";
xml += " <commission>66.19</commission>";
xml += " <dateOfJourney>2012-10-05</dateOfJourney>";
xml += " <destination>Chennai</destination>";
xml += " <fare>600.0</fare>";
xml += " <issueTime>2012-10-04T15:46:45.073+05:30</issueTime>";
xml += " <noOfSeats>1</noOfSeats>";
xml += " <passengerMobile>1234567890</passengerMobile>";
xml += " <passengerName>Test</passengerName>";
xml += " <pnr>RATPKES44974756</pnr>";
xml += " <seatDetails>";
xml += " <seatDetail>";
xml += " <commission>66.19</commission>";
xml += " <fare>600.0</fare>";
xml += " <gender>MALE</gender>";
xml += " <passengerAge>0</passengerAge>";
xml += " <passengerMobile>1234567890</passengerMobile>";
xml += " <passengerName>Test</passengerName>";
xml += " <prime>false</prime>";
xml += " <seatName>3</seatName>";
xml += " </seatDetail>";
xml += " </seatDetails>";
xml += " <source>Bangalore</source>";
xml += " <status>BOOKED</status>";
xml += " <travelsName>Rajratan Travels</travelsName>";
xml += "</Ticket>";
ParseXMLStringStAX parser = new ParseXMLStringStAX();
String pnr = parser.parse(xml);
System.out.println("--> Result: " + String.valueOf(pnr));
}
}