Parsing Xml with SAX Parser - java

I am trying to parse an xml file with SAX Parser.
I need to get attributes and it's values of a start element
<?xml version="1.0" encoding="ISO-8859-1" ?>
<API type="Connection">
<INFO server="com.com" function="getAccount2" />
<RESULT code="0">Operation Succeeded</RESULT>
<RESPONSE numaccounts="1">
<ACCOUNT login="fa051981" skynum="111111" maxaliases="1" creationdate="Fri Nov 16 00:59:59 2001" password="pass" type="2222" status="open" mnemonic="32051981" ratelimit="0">
<CHECKATTR />
<REPLYATTR>Service-Type = Frames-User, Framed-Protocol = PPP, Framed-Routing = None</REPLYATTR>
<SETTINGS bitval="4" status="open" />
<SETTINGS bitval="8192" status="open" session_timeout="10800" />
<SETTINGS bitval="32768" status="open" cisco_address_pool="thepool" />
<ALIASES numaliases="0" />
</ACCOUNT>
</RESPONSE>
</API>
IN this xml, I need to get Settings tag/start element attributes along with it's values.
These attributes are dynamic, so I am trying to make a map of them. I am new to SAX Parser.
So far my java code:
public void startElement(String s, String s1, String elementName, Attributes attributes) throws SAXException {
if (elementName.equalsIgnoreCase(GenericConstants.INFO)) {
this.searchRaidusBean.setServer(attributes.getValue(GenericConstants.SERVER));
this.searchRaidusBean.setFunction(attributes.getValue(GenericConstants.FUNCTION));
}
if (elementName.equalsIgnoreCase(GenericConstants.RESULT)) {
this.searchRaidusBean.setResultCode(attributes.getValue(GenericConstants.CODE));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setLoginId(attributes.getValue(GenericConstants.LOGIN));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setSkyNum(attributes.getValue(GenericConstants.SKYNUM));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setMaxAliases(attributes.getValue(GenericConstants.MAXALIASES));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setCreationDate(attributes.getValue(GenericConstants.CREATION_DATE));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setType(attributes.getValue(GenericConstants.TYPE));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setStatus(attributes.getValue(GenericConstants.STATUS));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setMnemonic(attributes.getValue(GenericConstants.MNEMONIC));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setRateLimit(attributes.getValue(GenericConstants.RATELIMIT));
}
if (elementName.equalsIgnoreCase(GenericConstants.SETTINGS)) {
//this.searchRaidusBean.getBitval().add(attributes.getValue(GenericConstants.BITVAL));
System.out.println(attributes);
//stuck here
}
if (elementName.equalsIgnoreCase(GenericConstants.ALIASES)) {
this.tempKey = attributes.getValue(GenericConstants.MNEMONIC);
}
}
public void endElement(String str1, String str2, String element) throws SAXException {
if (element.equalsIgnoreCase(GenericConstants.RESULT)) {
this.searchRaidusBean.setResultMessage(this.tempValue);
}
if (element.equalsIgnoreCase(GenericConstants.ALIASES)) {
if (!StringUtils.isBlank(this.tempKey)) {
this.searchRaidusBean.getAlias().put(this.tempKey, this.tempValue);
}
}
}
public void characters(char[] charArray, int i, int j) throws SAXException {
this.tempValue = new String(charArray, i, j);
}

If you are using the DefaultHandler, then you will be receiving a startElement event.
This method carries the Attributes as one of it's parameters.
You will need to use getIndex(String) to get the index of the named attribute and getValue(int) to get the value of said attribute.
As Nambari has pointed out, there are hundreds of tutorials on the internet and more then a few posts on the subject on SO (I answered one over the weekend).
UPDATED
I'd suggest it should look something like this (I've not tested it)
public void startElement(String uri, String localName, String elementName, Attributes attributes) throws SAXException {
if (elementName.equalsIgnoreCase(GenericConstants.INFO)) {
this.searchRaidusBean.setServer(attributes.getValue(GenericConstants.SERVER));
this.searchRaidusBean.setFunction(attributes.getValue(GenericConstants.FUNCTION));
}
if (elementName.equalsIgnoreCase(GenericConstants.RESULT)) {
this.searchRaidusBean.setResultCode(attributes.getValue(GenericConstants.CODE));
}
if (elementName.equalsIgnoreCase(GenericConstants.ACCOUNT)) {
this.searchRaidusBean.setLoginId(attributes.getValue(GenericConstants.LOGIN));
this.searchRaidusBean.setSkyNum(attributes.getValue(GenericConstants.SKYNUM));
this.searchRaidusBean.setMaxAliases(attributes.getValue(GenericConstants.MAXALIASES));
this.searchRaidusBean.setCreationDate(attributes.getValue(GenericConstants.CREATION_DATE));
this.searchRaidusBean.setType(attributes.getValue(GenericConstants.TYPE));
this.searchRaidusBean.setStatus(attributes.getValue(GenericConstants.STATUS));
this.searchRaidusBean.setMnemonic(attributes.getValue(GenericConstants.MNEMONIC));
this.searchRaidusBean.setRateLimit(attributes.getValue(GenericConstants.RATELIMIT));
}
if (elementName.equalsIgnoreCase(GenericConstants.SETTINGS)) {
for (int index = 0; index < attributes.getLength(); index++) {
String attName = attributes.getLocalName(index);
String value = attributes.getValue(index);
map.put(attName, value);
}
}
if (elementName.equalsIgnoreCase(GenericConstants.ALIASES)) {
this.tempKey = attributes.getValue(GenericConstants.MNEMONIC);
}
}
UPDATED with tested example
I took you data (from the OP) and run it through the following handler
DefaultHandler handler = new DefaultHandler() {
#Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("settings")) {
System.out.println("Parse settings attributes...");
for (int index = 0; index < attributes.getLength(); index++) {
String aln = attributes.getLocalName(index);
String value = attributes.getValue(index);
System.out.println(" " + aln + " = " + value);
}
}
}
};
And I got the following output
Parse settings attributes...
bitval = 4
status = open
Parse settings attributes...
bitval = 8192
status = open
session_timeout = 10800
Parse settings attributes...
bitval = 32768
status = open
cisco_address_pool = thepool
So I don't know what you're doing.

Related

sax parsing - mapping nested tags into main tag

I want to use sax parser for a large xml file. Handler looks like this:
DefaultHandler handler = new DefaultHandler() {
String temp;
HashSet < String > xml_Elements = new LinkedHashSet < String > ();
HashMap < String, Boolean > xml_Tags = new LinkedHashMap < String, Boolean > ();
HashMap < String, ArrayList < String >> tags_Value = new LinkedHashMap < String, ArrayList < String >> ();
//###startElement#######
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
xml_Elements.add(qName);
for (String tag: xml_Elements) {
if (qName == tag) {
xml_Tags.put(qName, true);
}
}
}
//###########characters###########
public void characters(char ch[], int start, int length) throws SAXException {
temp = new String(ch, start, length);
}
//###########endElement############
public void endElement(String uri, String localName,
String qName) throws SAXException {
if (xml_Tags.get(qName) == true) {
if (tags_Value.containsKey(qName)) {
tags_Value.get(qName).add(temp);
tags_Value.put(qName, tags_Value.get(qName));
} else {
ArrayList < String > tempList = new ArrayList < String > ();
tempList.add(temp);
//tags_Value.put(qName, new ArrayList<String>());
tags_Value.put(qName, tempList);
}
//documentWriter.write(qName+":"+temp+"\t");
for (String a: tags_Value.keySet()) {
try {
documentWriter.write(tags_Value.get(a) + "\t");
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
xml_Tags.put(qName, false);
}
tags_Value.clear();
}
};
My xml is like :
<TermInfo>
<A>1/f noise</A>
<B>Random noise</B>
<C>Accepted</C>
<D>Flicker noise</D>
<F>Pink noise</F>
<I>1-f</I>
<I>1/f</I>
<I>1/f noise</I>
<I>1:f</I>
<I>flicker noise</I>
<I>noise</I>
<I>pink noise</I>
<ID>1</ID>
</TermInfo>
<TermInfo>
<A>3D printing</A>
<B>Materials fabrication</B>
<C>Accepted</C>
<D>3d printing</D>
<F>2</F>
<I>three dimension*</I>
<I>three-dimension*</I>
<I>3d</I>
<I>3-d</I>
<I>3d*</I>
</TermInfo>
I wanted to cluster all nested tags under Tag A.
ie for each A.. its B,C,D and I together.. etc. But using the above handler the output is like A-B-C-D-I-I-etc . Can I make one object for each A and add other elements into it. How can I include this..
I think this is along the lines of what you are asking for. It creates a List of HashMap objects. Every time it starts a TermInfo, it creates a new HashMap. Each endElement inside TermInfo puts a value into the Map. When endElement is TermInfo, it sets fieldMap to null so no intermediate tags are added. "TermInfo" represents A from your description.
public class TestHandler extends DefaultHandler
{
Map<String, String> fieldMap = null;
List<Map<String, String>> tags_Value = new ArrayList<Map<String, String>>();
String temp;
// ###startElement#######
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException
{
if (localName.equals("TermInfo")) // A
{
fieldMap = new HashMap<String, String>();
tags_Value.add(fieldMap);
}
}
// ###########characters###########
public void characters(char ch[], int start, int length)
throws SAXException
{
temp = new String(ch, start, length);
}
// ###########endElement############
public void endElement(String uri, String localName, String qName)
throws SAXException
{
if (fieldMap != null)
{
if (!localName.equals("TermInfo")) // A
{
fieldMap.put(localName, temp);
}
else
{
//END of TermInfo
fieldMap = null;
}
}
}

Join two strings retrieved by SAX

I have an XML file like this one:
<?xml version="1.0" encoding="UTF-8"?>
<Article>
<ArticleTitle>Java-SAX Tutorial</ArticleTitle>
<Author>
<FamilyName>Yong</FamilyName>
<GivenName>Mook</GivenName>
<GivenName>Kim</GivenName>
<nickname>mkyong</nickname>
<salary>100000</salary>
</Author>
<Author>
<FamilyName>Low</FamilyName>
<GivenName>Yin</GivenName>
<GivenName>Fong</GivenName>
<nickname>fong fong</nickname>
<salary>200000</salary>
</Author>
</Article>
I have tried the example in mkyong's tutorial here and I can retrieve data perfectly from it using SAX, it gives me:
Article Title : Java-SAX Tutorial
Given Name : Kim
Given Name : Mook
Family Name : Yong
Given Name : Yin
Given Name : Fong
Family Name : Low
But I want it to give me something like this:
Article Title : Java-SAX Tutorial
Author : Kim Mook Yong
Author : Yin Fong Low
In other terms, I would like to retrieve some of the child nodes of the node Author, not all of them, put them in a string variable and display them.
This is the class I use in order to parse the Authors with the modification I have tried to do:
public class ReadAuthors {
public void parse(String filePath) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bFamilyName = false;
boolean bGivenName = false;
#Override
public void startElement(String uri, String localName,String qName,
Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("FamilyName")) {
bFamilyName = true;
}
if (qName.equalsIgnoreCase("GivenName")) {
bGivenName = true;
}
}
#Override
public void endElement(String uri, String localName,
String qName) throws SAXException {
}
#Override
public void characters(char ch[], int start, int length) throws SAXException {
String fullName = "";
String familyName = "";
String givenName ="";
if (bFamilyName) {
familyName = new String(ch, start, length);
fullName += familyName;
bFamilyName = false;
}
if (bGivenName) {
givenName = new String(ch, start, length);
fullName += " " + givenName;
bGivenName = false;
}
System.out.println("Full Name : " + fullName);
}
};
saxParser.parse(filePath, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
With this modification, it only gives me the ArticleTitle value and it doesn't return anything regarding the authors full names.
I have another class for parsing the ArticleTitle node and they are both called in a Main class.
What did I do wrong? And how can I fix it?
The fullName variable is overwritten everytime when the characters method is called. I think you should move out that variable into the handler: init with empty string when Author starts and write out when it ends. The concatenation should work as you did. I haven't tried this out but something similear should work:
public class ReadAuthors {
public void parse(String filePath) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bName = false;
String fullName = "";
#Override
public void startElement(String uri, String localName,String qName,
Attributes attributes) throws SAXException {
if (qName.equalsIgnoreCase("FamilyName")) {
bName = true;
}
if (qName.equalsIgnoreCase("GivenName")) {
bName = true;
}
if (qName.equalsIgnoreCase("Author")) {
fullName = "";
}
}
#Override
public void endElement(String uri, String localName,
String qName) throws SAXException {
if (qName.equalsIgnoreCase("Author")) {
System.out.println("Full Name : " + fullName);
}
}
#Override
public void characters(char ch[], int start, int length) throws SAXException {
String name = "";
if (bName) {
name = new String(ch, start, length);
fullName += name;
bName = false;
}
}
};
saxParser.parse(filePath, handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}

Java SAX is not parsing properly

I would appreciate any help on this.
This is my first handler I wrote.
I got I REST Webservice returning XML of links. It has quite simple structure and is not deep.
I wrote a handler for this:
public class SAXHandlerLnk extends DefaultHandler {
public List<Link> lnkList = new ArrayList();
Link lnk = null;
private StringBuilder content = new StringBuilder();
#Override
//Triggered when the start of tag is found.
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (qName.equals("link")) {
lnk = new Link();
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("link")) {
lnkList.add(lnk);
}
else if (qName.equals("applicationCode")) {
lnk.applicationCode = content.toString();
}
else if (qName.equals("moduleCode")) {
lnk.moduleCode = content.toString();
}
else if (qName.equals("linkCode")) {
lnk.linkCode = content.toString();
}
else if (qName.equals("languageCode")) {
lnk.languageCode = content.toString();
}
else if (qName.equals("value")) {
lnk.value = content.toString();
}
else if (qName.equals("illustrationUrl")) {
lnk.illustrationUrl = content.toString();
}
}
#Override
public void characters(char[] ch, int start, int length) throws SAXException {
content.append(ch, start, length);
}
}
Some XML returned can be empty eg. or . When this happens my handler unfortunatelly adds previous value to the Object lnk. So when is empty in XML, I got lnk.illustrationUrl = content; equal to lnk.value.
Link{applicationCode='onedownload', moduleCode='onedownload',...}
In the above example, I would like moduleCode to be empty or null, because in XML it is an empty tag.
Here is the calling class:
public class XMLRepositoryRestLinksFilterSAXParser {
public static void main(String[] args) throws Exception {
SAXParserFactory parserFactor = SAXParserFactory.newInstance();
SAXParser parser = parserFactor.newSAXParser();
SAXHandlerLnk handler = new SAXHandlerLnk();
parser.parse({URL}, handler);
for ( Link lnk : handler.lnkList){
System.out.println(lnk);
}
}
}
Like stated in my comment, you'd do the following. The callbacks are usually called in startElement, characters, (nested?), characters, endElement order, where (nested?) represents an optional repeat of the entire sequence.
#Override
//Triggered when the start of tag is found.
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
content = null;
if (qName.equals("link")) {
lnk = new Link();
}
}
Note that characters may be called multiple times per a single XML element in your document, so your current code might fail to capture all content. You'd be better off using a StringBuilder instead of a String object to hold your character content and append to it. See this answer for an example.

Getting SAX Parser attributes

<Details><propname key="workorderid">799</propname>
How do i get 799 from workorderid useing SAXParing?
when i use this code i get "workorderid" but not the value of workorderid
if(localName.equals("propname")){
String workid = attributes.getValue("key");
if(localName.equals("propname")){
//set one flag here and in endElement() get the value associated with your localname(propname)
String workid = attributes.getValue("key");
I am providing you the code try to understand and customize in your way.
public class ExampleHandler extends DefaultHandler {
private String item;
private boolean inItem = false;
private StringBuilder content;
public ExampleHandler() {
items = new Items();
content = new StringBuilder();
}
public void startElement(String uri, String localName, String qName,
Attributes atts) throws SAXException {
content = new StringBuilder();
if(localName.equalsIgnoreCase("propname")) {
inItem = true;
} else attributes.getValue("key");
}
public void endElement(String uri, String localName, String qName)
throws SAXException {
if(localName.equalsIgnoreCase("propname")) {
if(inItem) {
item = (content.toString());
}
}
public void characters(char[] ch, int start, int length)
throws SAXException {
content.append(ch, start, length);
}
public void endDocument() throws SAXException {
// you can do something here for example send
// the Channel object somewhere or whatever.
}
}
May somewhere wrong i'm in hurry. If helps Appreciate.
The following will hold the value of the node.
public void characters(char[] ch, int start, int length) throws SAXException {
tempVal = new String(ch,start,length);
}
In the event handler method, you need to get it like this:
if(qName.equals("propname")) {
System.out.println(" node value " + tempVal); // node value
String attr = attributes.getValue("key") ; // will return attribute value for the propname node.
}
In propname the attribute Key having value workorderid which is correct.
You need to get the value propname.
//Provide you tagname which is propname
NodeList nl = ele.getElementsByTagName(tagName);
if(nl != null && nl.getLength() > 0) {
Element el = (Element)nl.item(0);
textVal = el.getFirstChild().getNodeValue();
}

Extracting Text Nodes From XML File Using SAX Parser in JAVA

So I am currently using SAX to try and extract some information from a a number of xml documents I am working from. Thus far, it is really easy to extract the attribute values. However, I have no clue how to go about extracting actual values from a text node.
For example, in the given XML document:
<w:rStyle w:val="Highlight" />
</w:rPr>
</w:pPr>
- <w:r>
<w:t>Text to Extract</w:t>
</w:r>
</w:p>
- <w:p w:rsidR="00B41602" w:rsidRDefault="00B41602" w:rsidP="007C3A42">
- <w:pPr>
<w:pStyle w:val="Copy" />
I can extract "Highlight" no problem by getting the value from val. But I have no idea how to get into that text node and get out "Text to Extract".
Here is my Java code thus far to pull out the attribute values...
private static final class SaxHandler extends DefaultHandler
{
// invoked when document-parsing is started:
public void startDocument() throws SAXException
{
System.out.println("Document processing starting:");
}
// notifies about finish of parsing:
public void endDocument() throws SAXException
{
System.out.println("Document processing finished. \n");
}
// we enter to element 'qName':
public void startElement(String uri, String localName,
String qName, Attributes attrs) throws SAXException
{
if(qName.equalsIgnoreCase("Relationships"))
{
// do nothing
}
else if(qName.equalsIgnoreCase("Relationship"))
{
// goes into the element and if the attribute is equal to "Target"...
String val = attrs.getValue("Target");
// ...and the value is not null
if(val != null)
{
// ...and if the value contains "image" in it...
if (val.contains("image"))
{
// ...then get the id value
String id = attrs.getValue("Id");
// ...and use the substring method to isolate and print out only the image & number
int begIndex = val.lastIndexOf("/");
int endIndex = val.lastIndexOf(".");
System.out.println("Id: " + id + " & Target: " + val.substring(begIndex+1, endIndex));
}
}
}
else
{
throw new IllegalArgumentException("Element '" +
qName + "' is not allowed here");
}
}
// we leave element 'qName' without any actions:
public void endElement(String uri, String localName, String qName) throws SAXException
{
// do nothing;
}
}
But I have no clue where to start to get into that text node and pull out the values inside. Anyone have some ideas?
Here's some pseudo-code:
private boolean insideElementContainingTextNode;
private StringBuilder textBuilder;
public void startElement(String uri, String localName, String qName, Attributes attrs) {
if ("w:t".equals(qName)) { // or is it localName?
insideElementContainingTextNode = true;
textBuilder = new StringBuilder();
}
}
public void characters(char[] ch, int start, int length) {
if (insideElementContainingTextNode) {
textBuilder.append(ch, start, length);
}
}
public void endElement(String uri, String localName, String qName) {
if ("w:t".equals(qName)) { // or is it localName?
insideElementContainingTextNode = false;
String theCompleteText = this.textBuilder.toString();
this.textBuilder = null;
}
}

Categories