Parsing and updating xml using SAX parser in java - java

I have an xml file with similar tags ->
<properties>
<definition>
<name>IP</name>
<description></description>
<defaultValue>10.1.1.1</defaultValue>
</definition>
<definition>
<name>Name</name>
<description></description>
<defaultValue>MyName</defaultValue>
</definition>
<definition>
<name>Environment</name>
<description></description>
<defaultValue>Production</defaultValue>
</definition>
</properties>
I want to update the default value of the definition with name : Environment.
Is it possible to do that using SAX parser?
Can you please point me to proper documentation?
So far I have parsed the document but when I update defaultValue, it updates all defaultValues. I dont know how to parse the exact default value tag.

Anything is possible with SAX, it's just waaaaay harder than it has to be. It's pretty old school and there are many easier ways to do this (JAXB, XQuery, XPath, DOM etc ).
That said lets do it with SAX.
It sounds like the problem you are having is that you are not tracking the state of your progress through the document. SAX simply works by making the callbacks when it stumbles across an event within the document
This is a fairly crude way of parsing the doc and updating the relevant node using SAX. Basically I am checking when we hit a element with the value you want to update (Environment) and setting a flag so that when we get to the contents of the defaultValue node, the characters callback lets me remove the existing value and replace it with the new value.
import java.io.StringReader;
import java.util.Arrays;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class Q26897496 extends DefaultHandler {
public static String xmlDoc = "<?xml version='1.0'?>"
+ "<properties>"
+ " <definition>"
+ " <name>IP</name>"
+ " <description></description>"
+ " <defaultValue>10.1.1.1</defaultValue>"
+ " </definition>"
+ " <definition>"
+ " <name>Name</name>"
+ " <description></description>"
+ " <defaultValue>MyName</defaultValue>"
+ " </definition>"
+ " <definition>"
+ " <name>Environment</name>"
+ " <description></description>"
+ " <defaultValue>Production</defaultValue>"
+ " </definition>"
+ "</properties>";
String elementName;
boolean mark = false;
char[] updatedDoc;
public static void main(String[] args) {
Q26897496 q = new Q26897496();
try {
q.parse();
} catch (Exception e) {
e.printStackTrace();
}
}
public Q26897496() {
}
public void parse() throws Exception {
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
SAXParser saxParser = spf.newSAXParser();
XMLReader xml = saxParser.getXMLReader();
xml.setContentHandler(this);
xml.parse(new InputSource(new StringReader(xmlDoc)));
System.out.println("new xml: \n" + new String(updatedDoc));
}
#Override
public void startDocument() throws SAXException {
System.out.println("starting");
}
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
this.elementName = localName;
}
#Override
public void characters(char[] ch, int start, int length)
throws SAXException {
String value = new String(ch).substring(start, start + length);
if (elementName.equals("name")) {
if (value.equals("Environment")) {
this.mark = true;
}
}
if (elementName.equals("defaultValue") && mark == true) {
// update
String tmpDoc = new String(ch);
String leading = tmpDoc.substring(0, start);
String trailing = tmpDoc.substring(start + length, tmpDoc.length());
this.updatedDoc = (leading + "NewValueForDefaulValue" + trailing).toCharArray();
mark = false;
}
}
}

Related

How to read XML declaration with Java SAX

I want to read the XML declaration from an XML file with Java SAX. For example
<?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
I tried using DefaultHandler, but characters and startElement don't get called for the XML declaration. This is my code:
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class SAXStuff {
public static void main(String[] args) throws ParserConfigurationException, SAXException, IOException {
SAXParser sp = SAXParserFactory.newInstance().newSAXParser();
sp.parse("test.xml", new DefaultHandler() {
public void characters(char[] ch, int start, int length) throws SAXException {
for(int i = start; i < start + length; i++) {
System.out.print(ch[i]);
}
}
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
System.out.println(qName);
}
});
}
}
How can I get the XML declaration using SAX in Java?
Since Java 14, org.xml.sax.ContentHandler has a declaration method for this purpose. DefaultHandler implements ContentHandler, so this method can be overriden to provide a custom action.
This is the method signature:
void declaration​(String version, String encoding, String standalone) throws SAXException
version - the version string as in the input document, null if not specified
encoding - the encoding string as in the input document, null if not specified
standalone - the standalone string as in the input document, null if not specified
Example:
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser parser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler(){
#Override
public void declaration(String version, String encoding, String standalone) {
String declaration = "<?xml "
+ (version != null ? "version=\"" + version + "\"": "")
+ (encoding != null ? " encoding=\"" + encoding + "\"": "")
+ (standalone != null ? " standalone=\"" + standalone + "\"": "")
+ "?>";
System.out.println(declaration);
}
};
parser.parse(new File("file.xml"), handler);

Not able to Catch Element using SAX Parser

I'm reading XML file using SAX parser utility.
Here is my sample XML
<?xml version="1.0"?><company><Account AccountNumber="100"><staff><firstname>yong</firstname><firstname>jin</firstname></staff></Account></company>
Here is the code
import java.util.Arrays;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
public class ReadXML {
public static void main(String argv[]) {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
SAXParser saxParser = factory.newSAXParser();
DefaultHandler handler = new DefaultHandler() {
boolean bAccount = false;
public void startElement(String uri, String localName, String qName, Attributes attributes)
throws SAXException {
System.out.println("Start Element :" + qName);
if (qName.equalsIgnoreCase("ACCOUNT")) {
bAccount = true;
}
}
public void endElement(String uri, String localName, String qName) throws SAXException {
System.out.println("End Element :" + qName);
}
public void characters(char[] ch, int start, int length) throws SAXException {
System.out.println("Im here:" + bAccount);
if (bAccount) {
System.out.println("Account First Name : " + new String(ch, start, length));
bAccount = false;
StringBuilder Account = new StringBuilder();
for (int i = start; i < ch.length - 1; i--) {
if (String.valueOf(ch[i]).equals("<")) {
System.out.println("Account:" +Account);
break;
} else {
Account.append(ch[i]);
}
}
}
}
};
saxParser.parse("C:\\Lenny\\Work\\XML\\Out_SaxParsing_01.xml", handler);
} catch (Exception e) {
e.printStackTrace();
}
}
}
As you can see in XML, Account tag is something like this Account AccountNumber="100", What I want to do is, I want to capture Tag too as well.
So to achieve that, in characters method, I'm trying to read the array from right to left, So that I could get the Account AccountNumber="100", when Account encountered as event.
But am not able to reach there, The event is getting generated, but its not going to characters method. I think it should go into characters method once Account tag is encountered. But its not..!
May I know please what am missing or doing wrong ?
Any Help please..!
AccountNumber="100" is an attribute of the Account element so inside the startElement handler you have you can read out the attributes parameter to access that value.

Java Replace words within xml

I have the following xml
<some tag>
<some_nested_tag attr="Hello"> Text </some_nested_tag>
Hello world Hello Programming
</some tag>
From the above xml, I want to replace the occurances of the word "Hello" which are part of the tag content but not part of tag attribute.
I want the following output (Replacing Hello by HI):
<some tag>
<some_nested_tag attr="Hello"> Text </some_nested_tag>
HI world HI Programming
</some tag>
I tried java regex and also some of the DOM parser tutorials, but without any luck. I am posting here for help as I have limited time available to fix this in my project. Help would be appreciated.
That can be done by using a negative lookbehind.
Try this regex:
(?<!attr=")Hello
It will match Hello that is not preceded by attr=.
So you could try this:
str = str.replaceAll("(?<!attr=")Hello", "Hi");
It can also be done by negative lookahead:
Hello(?!([^<]+)?>)
string.replaceAll("(?i)\\shello\\s", " HI ");
Regex Explanation:
\sHello\s
Options: Case insensitive
Match a single character that is a “whitespace character” (ASCII space, tab, line feed, carriage return, vertical tab, form feed) «\s»
Match the character string “Hello” literally (case insensitive) «Hello»
Match a single character that is a “whitespace character” (ASCII space, tab, line feed, carriage return, vertical tab, form feed) «\s»
hi
Insert the character string “ HI ” literally « HI »
Regex101 Demo
XSLT is a language for transforming XML documents into other XML documents. You can match all the text nodes containing 'Hello' and replace the content of those particular nodes.
A small example of using XSLT in Java:
import javax.xml.transform.*;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
public class TestMain {
public static void main(String[] args) throws IOException, URISyntaxException, TransformerException {
TransformerFactory factory = TransformerFactory.newInstance();
Source xslt = new StreamSource(new File("transform.xslt"));
Transformer transformer = factory.newTransformer(xslt);
Source text = new StreamSource(new File("input.xml"));
transformer.transform(text, new StreamResult(new File("output.xml")));
}
}
There was a good question on replacing string using XSLT - you can find an example of XSLT template there:
XSLT string replace
Here is a fully functional example using SAX parser. It is adapted to your case with minimal changes from this example
The actual replacement takes place in MyCopyHandler#endElement() and MyCopyHandler#startElement() and the XML element text content is collected in MyCopyHandler#characters(). Note the buffer maintenance too - it is important in handling mixed element content (text and child elements)
I know XSLT solution is also possible, but it is not that portable.
public class XMLReplace {
/**
* #param args
* #throws SAXException
* #throws ParserConfigurationException
*/
public static void main(String[] args) throws Exception {
final String str = "<root> Hello <nested attr='Hello'> Text </nested> Hello world Hello Programming </root>";
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser parser = spf.newSAXParser();
XMLReader reader = parser.getXMLReader();
reader.setErrorHandler(new MyErrorHandler());
ByteArrayOutputStream baos = new ByteArrayOutputStream();
PrintWriter out = new PrintWriter(baos);
MyCopyHandler duper = new MyCopyHandler(out);
reader.setContentHandler(duper);
InputSource is = new InputSource(new StringReader(str));
reader.parse(is);
out.close();
System.out.println(baos);
}
}
class MyCopyHandler implements ContentHandler {
private boolean namespaceBegin = false;
private String currentNamespace;
private String currentNamespaceUri;
private Locator locator;
private final PrintWriter out;
private final StringBuilder buffer = new StringBuilder();
public MyCopyHandler(PrintWriter out) {
this.out = out;
}
public void setDocumentLocator(Locator locator) {
this.locator = locator;
}
public void startDocument() {
}
public void endDocument() {
}
public void startPrefixMapping(String prefix, String uri) {
namespaceBegin = true;
currentNamespace = prefix;
currentNamespaceUri = uri;
}
public void endPrefixMapping(String prefix) {
}
public void startElement(String namespaceURI, String localName, String qName, Attributes atts) {
// Flush buffer - needed in case of mixed content (text + elements)
out.print(buffer.toString().replaceAll("Hello", "HI"));
// Prepare to collect element text content
this.buffer.setLength(0);
out.print("<" + qName);
if (namespaceBegin) {
out.print(" xmlns:" + currentNamespace + "=\"" + currentNamespaceUri + "\"");
namespaceBegin = false;
}
for (int i = 0; i < atts.getLength(); i++) {
out.print(" " + atts.getQName(i) + "=\"" + atts.getValue(i) + "\"");
}
out.print(">");
}
public void endElement(String namespaceURI, String localName, String qName) {
// Process text content
out.print(buffer.toString().replaceAll("Hello", "HI"));
out.print("</" + qName + ">");
// Reset buffer
buffer.setLength(0);
}
public void characters(char[] ch, int start, int length) {
// Store chunk of text - parser is allowed to provide text content in chunks for performance reasons
buffer.append(Arrays.copyOfRange(ch, start, start + length));
}
public void ignorableWhitespace(char[] ch, int start, int length) {
for (int i = start; i < start + length; i++)
out.print(ch[i]);
}
public void processingInstruction(String target, String data) {
out.print("<?" + target + " " + data + "?>");
}
public void skippedEntity(String name) {
out.print("&" + name + ";");
}
}
class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException e) throws SAXException {
show("Warning", e);
throw (e);
}
public void error(SAXParseException e) throws SAXException {
show("Error", e);
throw (e);
}
public void fatalError(SAXParseException e) throws SAXException {
show("Fatal Error", e);
throw (e);
}
private void show(String type, SAXParseException e) {
System.out.println(type + ": " + e.getMessage());
System.out.println("Line " + e.getLineNumber() + " Column " + e.getColumnNumber());
System.out.println("System ID: " + e.getSystemId());
}
}

XML parse section by section in SAX or StAX

shorten version of my XML file look like this:
<?xml version="1.0" encoding="UTF-8"?>
<MzIdentML id="MS-GF+">
<SequenceCollection xmlns="http://psidev.info/psi/pi/mzIdentML/1.1">
<DBSequence length="146" id="DBSeq143">
<cvParam cvRef="PSI-MS" accession="MS:1001088"></cvParam>
</DBSequence>
<Peptide id="Pep7">
<PeptideSequence>MFLSFPTTK</PeptideSequence>
<Modification location="1" monoisotopicMassDelta="15.994915">
<cvParam cvRef="UNIMOD" accession="UNIMOD:35" name="Oxidation"></cvParam>
</Modification>
</Peptide>
<PeptideEvidence dBSequence_ref="DBSeq143" id="PepEv_160_1_18"></PeptideEvidence>
<PeptideEvidence dBSequence_ref="DBSeq143" id="PepEv_275_8_133"></PeptideEvidence>
</SequenceCollection>
</MzIdentML>
I want to get DBSequence, Peptide and PeptideEvidence details separately.but attributes of parent and children(or nested children..if there are).In other words, I want all the attribues as key-value pairs in each section I illustrated bellow:
----------------------------------------------------------------------
<DBSequence length="146" id="DBSeq143">
<cvParam cvRef="PSI-MS" accession="MS:1001088"></cvParam>
</DBSequence>
----------------------------------------------------------------------
<Peptide id="Pep7">
<PeptideSequence>MFLSFPTTK</PeptideSequence>
<Modification location="1" monoisotopicMassDelta="15.994915">
<cvParam cvRef="UNIMOD" accession="UNIMOD:35" name="Oxidation"></cvParam>
</Modification>
</Peptide>
----------------------------------------------------------------------
<PeptideEvidence dBSequence_ref="DBSeq143" id="PepEv_160_1_18"></PeptideEvidence>
<PeptideEvidence dBSequence_ref="DBSeq143" id="PepEv_275_8_133"></PeptideEvidence>
----------------------------------------------------------------------
For example, if we consider <DBSequence> section:
<DBSequence length="146" id="DBSeq143">
<cvParam cvRef="PSI-MS" accession="MS:1001088"></cvParam>
</DBSequence>
should be output as:
DBSequence=>length=146;id=DBSeq143;cvRef=PSI-MS;accession=MS:1001088;
This is the code I wrote in SAX:
package lucene.parse;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
public class MzIdentMLSAXParser extends DefaultHandler {
private boolean isDBsequence = false;
String DBSequenceSection;
String PeptideEvidenceDocument;
public static void main(String[] argv) throws SAXException, ParserConfigurationException, IOException {
MzIdentMLSAXParser ps = new MzIdentMLSAXParser("file_path_here/sample.xml");
}
public MzIdentMLSAXParser(String dataDir) throws FileNotFoundException, SAXException, ParserConfigurationException, IOException {
FileInputStream fis = new FileInputStream(dataDir);
SAXParserFactory spf = SAXParserFactory.newInstance();
SAXParser parser = spf.newSAXParser();
parser.parse(fis, this);
}
#Override
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
if (qName.equals("DBSequence")) {
// each time we found a new DBSequence, we re-initialize DBSequenceSection
DBSequenceSection = "";
// get attributes of DBSequence
for (int i = 0; i < atts.getLength(); i++) {
DBSequenceSection += atts.getQName(i) + "=" + atts.getValue(i) + ";";
}
isDBsequence = true;
} else if ((qName.equals("cvParam")) && (isDBsequence)) {
// get attributes of cvParam which are belongs to DBSequence
// there can be cvParam that are not belongs to DBSequence.
for (int i = 0; i < atts.getLength(); i++) {
DBSequenceSection += atts.getQName(i) + "=" + atts.getValue(i) + ";";
}
} else if (qName.equals("PeptideEvidence")) {
// each time we found a new PeptideEvidence, we re-initialize docuDBSequenceSectionment
PeptideEvidenceDocument = "";
for (int i = 0; i < atts.getLength(); i++) {
PeptideEvidenceDocument += atts.getQName(i) + "=" + atts.getValue(i) + ";";
}
}
}
#Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals("DBSequence")) {
System.out.println(qName +"=>"+DBSequenceSection);
isDBsequence = false;
} else if (qName.equals("PeptideEvidence")) {
System.out.println(qName +"=>"+PeptideEvidenceDocument);
}
}
}
Is there any easy way of doing this? because I have lots of tags like this with nested nodes. Challenge here is <cvParam> appears not only in <DBSequence> tag, but in other tags like <Modification> etc. I tried with StAX too. but couldn't make it.
Here is a working example of using StAX. StAX excels when parsing known XML structures, but can be used for dynamic parsing too.
This code relies on knowledge, e.g. knowing that we want the content of DBSequence, Peptide, and PeptideEvidence, and that PeptideSequence has text content, while the others don't.
The methods use recursion to follow the structure of the XML.
public static void main(String[] args) throws Exception {
String xml = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<MzIdentML id=\"MS-GF+\">\n" +
" <SequenceCollection xmlns=\"http://psidev.info/psi/pi/mzIdentML/1.1\">\n" +
" <DBSequence length=\"146\" id=\"DBSeq143\">\n" +
" <cvParam cvRef=\"PSI-MS\" accession=\"MS:1001088\"></cvParam>\n" +
" </DBSequence>\n" +
" <Peptide id=\"Pep7\">\n" +
" <PeptideSequence>MFLSFPTTK</PeptideSequence>\n" +
" <Modification location=\"1\" monoisotopicMassDelta=\"15.994915\">\n" +
" <cvParam cvRef=\"UNIMOD\" accession=\"UNIMOD:35\" name=\"Oxidation\"></cvParam>\n" +
" </Modification>\n" +
" </Peptide>\n" +
" <PeptideEvidence dBSequence_ref=\"DBSeq143\" id=\"PepEv_160_1_18\"></PeptideEvidence>\n" +
" <PeptideEvidence dBSequence_ref=\"DBSeq143\" id=\"PepEv_275_8_133\"></PeptideEvidence>\n" +
" </SequenceCollection>\n" +
"</MzIdentML>";
XMLStreamReader reader = XMLInputFactory.newFactory().createXMLStreamReader(new StringReader(xml));
try {
reader.nextTag();
search(reader);
} finally {
reader.close();
}
}
private static void search(XMLStreamReader reader) throws XMLStreamException {
// reader must be on START_ELEMENT upon entry, and will be on matching END_ELEMENT on return
assert reader.getEventType() == XMLStreamConstants.START_ELEMENT;
while (reader.nextTag() == XMLStreamConstants.START_ELEMENT) {
String name = reader.getLocalName();
switch (name) {
case "DBSequence":
case "Peptide":
case "PeptideEvidence": {
Map<String, String> props = new LinkedHashMap<>();
collectProps(reader, props);
System.out.println(name + ": " + props);
break; }
default:
search(reader);
}
}
}
private static void collectProps(XMLStreamReader reader, Map<String, String> props) throws XMLStreamException {
// reader must be on START_ELEMENT upon entry, and will be on matching END_ELEMENT on return
assert reader.getEventType() == XMLStreamConstants.START_ELEMENT;
for (int i = 0; i < reader.getAttributeCount(); i++)
props.put(reader.getAttributeLocalName(i), reader.getAttributeValue(i));
String name = reader.getLocalName();
switch (name) {
case "PeptideSequence":
props.put(name, reader.getElementText());
break;
default:
while (reader.nextTag() == XMLStreamConstants.START_ELEMENT)
collectProps(reader, props);
}
}
OUTPUT
DBSequence: {length=146, id=DBSeq143, cvRef=PSI-MS, accession=MS:1001088}
Peptide: {id=Pep7, PeptideSequence=MFLSFPTTK, location=1, monoisotopicMassDelta=15.994915, cvRef=UNIMOD, accession=UNIMOD:35, name=Oxidation}
PeptideEvidence: {dBSequence_ref=DBSeq143, id=PepEv_160_1_18}
PeptideEvidence: {dBSequence_ref=DBSeq143, id=PepEv_275_8_133}

SaxParseException in XSD validation does not give element name

I have an xsd file and an xml file, I am validating the xml file against the xsd file using the following code
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setValidating(true);
factory.setAttribute(
"http://java.sun.com/xml/jaxp/properties/schemaLanguage",
"http://www.w3.org/2001/XMLSchema");
factory.setAttribute(
"http://java.sun.com/xml/jaxp/properties/schemaSource",
new InputSource(new StringReader(xsd)));
Document doc = null;
try {
DocumentBuilder parser = factory.newDocumentBuilder();
MyErrorHandler errorHandler = new MyErrorHandler();
parser.setErrorHandler(errorHandler);
doc = parser.parse(new InputSource(new StringReader(xml)));
return true;
} catch (ParserConfigurationException e) {
System.out.println("Parser not configured: " + e.getMessage());
} catch (SAXException e) {
System.out.print("Parsing XML failed due to a "
+ e.getClass().getName() + ":");
System.out.println(e.getMessage());
} catch (IOException e) {
System.out.println("IOException thrown");
e.printStackTrace();
}
return false;
MyErrorHanlder is
private static class MyErrorHandler implements ErrorHandler {
public void warning(SAXParseException spe) throws SAXException {
System.out.println("Warning: " + spe.getMessage() + " getColumnNumber is " + spe.getColumnNumber() + " getLineNumber " + spe.getLineNumber() + " getPublicId " + spe.getPublicId() + " getSystemId " + spe.getSystemId());
}
public void error(SAXParseException spe) throws SAXException {
System.out.println("Error: " + spe.getMessage() + " getColumnNumber is " + spe.getColumnNumber() + " getLineNumber " + spe.getLineNumber() + " getPublicId " + spe.getPublicId() + " getSystemId " + spe.getSystemId());
throw new SAXException("Error: " + spe.getMessage());
}
public void fatalError(SAXParseException spe) throws SAXException {
System.out.println("Fatal Error: " + spe.getMessage() + " getColumnNumber is " + spe.getColumnNumber() + " getLineNumber " + spe.getLineNumber() + " getPublicId " + spe.getPublicId() + " getSystemId " + spe.getSystemId());
throw new SAXException("Fatal Error: " + spe.getMessage());
}
}
And when the xml does not comply with xsd I get an exception.. but this exception does not have the name of the xsd element due to which this error has occured .. The message looks like
Parsing XML failed due to a org.xml.sax.SAXException:Error: cvc-minLength-valid: Value '' with length = '0' is not facet-valid with respect to minLength '1' for type 'null'.
Instead of printing the name of the xsd element, the error message just has ''. Because of this I am not able to find and display(to the user) the exact element which is causing the error.
My xsd element looks like this
<xs:element name="FullName_FirstName">
<xs:annotation>
<xs:appinfo>
<ie:label>First Name</ie:label>
<ie:html_element>0</ie:html_element>
</xs:appinfo>
</xs:annotation>
<xs:simpleType>
<xs:restriction base="xs:string">
<xs:minLength value="1"/>
</xs:restriction>
</xs:simpleType>
</xs:element>
Thanks in advance
First of all, some advice. You don't need to build a DOM document just to do validation. This causes a large amount of memory overhead, maybe even running out on large input XML documents. You could just use a SAXParser. If you're using Java 1.5 or later, that isn't even necessary. From that version on, an XML validation API was included in Java SE. Check package javax.xml.validation for more info. The idea is that you first build a Schema object, then obtain a Validator from that which can be used to do validation. It accepts any Source implementation for input. Validators can also be given ErrorHandlers, so you can just reuse your class. Of course, it is possible that you actually will need a DOM, but in that case it's still better to make a Schema instance and register that with your DocumentBuilderFactory.
Now, for the actual problem. This isn't entirely easy, since the SAXParseException doesn't provide you with much context information. Your best bet is to have a ContentHandler hooked up somewhere and keep track of what element you're in, or some other positional information. You could then have that given to the error handler when needed. The class DefaultHandler or DefaultHandler2 is a convenient way of combining both error and content handling. You'll find those classes in package org.xml.sax.ext.
I've put together a test that I'll post below. Now, I do get two lines of output instead of the expected one. If this is because I'm using a Schema, or because I'm not throwing an exception and keep on processing, I'm not certain. The second line does contain the name of the element, so that might be enough. You could have some flag set on errors instead of throwing an exception and ending the parsing.
package jaxb.test;
import java.io.StringReader;
import javax.xml.XMLConstants;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.Source;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
public class ValidationTest {
public static void main(String[] args) throws Exception {
//Test XML and schema
final String xml = "<?xml version=\"1.0\"?><test><test2></test2></test>";
final String schemaString =
"<?xml version=\"1.0\"?>"
+ "<xsd:schema xmlns:xsd=\"http://www.w3.org/2001/XMLSchema\" elementFormDefault=\"unqualified\" attributeFormDefault=\"unqualified\">"
+ "<xsd:element name=\"test\" type=\"Test\"/>"
+ "<xsd:element name=\"test2\" type=\"Test2\"/>"
+ "<xsd:complexType name=\"Test\">"
+ "<xsd:sequence>"
+ "<xsd:element ref=\"test2\" minOccurs=\"1\" maxOccurs=\"unbounded\"/>"
+ "</xsd:sequence>"
+ "</xsd:complexType>"
+ "<xsd:simpleType name=\"Test2\">"
+ "<xsd:restriction base=\"xsd:string\"><xsd:minLength value=\"1\"/></xsd:restriction>"
+ "</xsd:simpleType>"
+ "</xsd:schema>";
//Building a Schema instance
final Source schemaSource =
new StreamSource(new StringReader(schemaString));
final Schema schema =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI).newSchema(schemaSource);
//Creating a SAXParser for our input XML
//First the factory
final SAXParserFactory factory = SAXParserFactory.newInstance();
//Must be namespace aware to receive element names
factory.setNamespaceAware(true);
//Setting the Schema for validation
factory.setSchema(schema);
//Now the parser itself
final SAXParser parser = factory.newSAXParser();
//Creating an instance of our special handler
final MyContentHandler handler = new MyContentHandler();
//Parsing
parser.parse(new InputSource(new StringReader(xml)), handler);
}
private static class MyContentHandler extends DefaultHandler {
private String element = "";
#Override
public void startElement(String uri, String localName, String qName,
Attributes attributes) throws SAXException {
if(localName != null && !localName.isEmpty())
element = localName;
else
element = qName;
}
#Override
public void warning(SAXParseException exception) throws SAXException {
System.out.println(element + ": " + exception.getMessage());
}
#Override
public void error(SAXParseException exception) throws SAXException {
System.out.println(element + ": " + exception.getMessage());
}
#Override
public void fatalError(SAXParseException exception) throws SAXException {
System.out.println(element + ": " + exception.getMessage());
}
public String getElement() {
return element;
}
}
}
It's a bit rough, but you can work on from this to get what you need.

Categories