I trying to parse Lingvo xml dictionary with help of DOM parser.
Problem: DOM parser doesn't see the subnodes of card node (see code below).
Question?: How to pull word and translation nodes from card node
My code:
import entity.Item;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class DOMParser {
public void parseXMLFile(String xmlFilePath) throws IOException, SAXException {
Document document = builder.parse(ClassLoader.getSystemResourceAsStream(xmlFilePath));
List<Item> itemList = new ArrayList<Item>();
NodeList nodeList = document.getDocumentElement().getChildNodes();
//iterates through cards
for (int i = 0; i < nodeList.getLength(); i++) {
Node node = nodeList.item(i);
System.out.println(node.getNodeName());
if (node instanceof Element) {
if ("card".equals(node.getNodeName())) {
// HERE node hasn't got anything!!! I mean attributes, childs etc.
}
}
}
}
}
My xml:
<?xml version="1.0" encoding="UTF-16"?>
<dictionary formatVersion="5" title="User ;vocabulary_user1" sourceLanguageId="1058" destinationLanguageId="1033" nextWordId="611" targetNamespace="http://www.abbyy.com/TutorDictionary">
<statistics readyMeaningsQuantity="90" activeMeaningsQuantity="148" learnedMeaningsQuantity="374" />
<card>
<word>загальна цікавість</word>
<meanings>
<meaning>
<statistics status="4" answered="122914" />
<translations>
<word>genaral wondering</word>
</translations>
</meaning>
</meanings>
</card>
</dictionary>
You can use a recursive approach to read through all your contents without getting into the mess of nested for loops.
For your xml:
public static void main(String[] args) throws ParserConfigurationException,
SAXException, IOException {
InputStream path = new FileInputStream("dom.xml");
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(path);
traverse(document.getDocumentElement());
}
public static void traverse(Node node) {
NodeList list = node.getChildNodes();
for (int i = 0; i < list.getLength(); i++) {
Node currentNode = list.item(i);
traverse(currentNode);
}
if (node.getNodeName().equals("word")) {
System.out.println("This -> " + node.getTextContent());
}
}
Gives,
This -> загальна цікавість
This -> genaral wondering
Related
Throw this program I'm trying to unzip my xml file and parse it using SAX, for the parsing part I have an exception which appeared about ClassCastException class com.sun.org.apache.xerces.internal.dom.DeferredTextImpl cannot be cast to class javax.swing.text.Element, here is my code :
import java.io.File;
import java.io.IOException;
import javax.swing.text.Element;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import net.lingala.zip4j.ZipFile;
import net.lingala.zip4j.exception.ZipException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class Parser {
public static void main(String[] args) throws IOException, ZipException {
String source = "C:\\Users\\java_program\\xml.zip";
String destination = "C:\\Users\\java_program\\xml";
ZipFile zipFile = new ZipFile(source);
zipFile.extractAll(destination);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File ("C:\\Users\\java_program\\xml\\xml.xml"));
document.getDocumentElement().normalize();
NodeList productList = document.getElementsByTagName("product");
for(int i = 0; i<productList.getLength(); i++) {
Node product = productList.item(i);
if (product.getNodeType() == Node.ELEMENT_NODE) {
Element productElement = (Element) product; // the problem is here
NodeList productDetails = product.getChildNodes();
for(int j=0; j<productDetails.getLength(); j++) {
Node detail = productDetails.item(j);
if (product.getNodeType() == Node.ELEMENT_NODE) {
Element detailElement = (Element) detail; // the problem is here
System.out.println(" " + ((org.w3c.dom.Element) detailElement).getTagName() + ": " + detailElement.getAttributes());
}
}
}
}
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (SAXException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
My XML file have this structure :
<?xml version="1.0" encoding="utf-8"?>
<import xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema">
<products>
<product>
<article_sku>000c66dawt</article_sku>
<brand>HUAWEI</brand>
<brand_reference>HUAWEI-NEXUS-6P-64GO-ARGENT</brand_reference>
<category>SMARTP</category>
<ean13>6901443077359</ean13>
<it_cpu_smartphone_type><![CDATA[Qualcomm Snapdragon S810]]></it_cpu_smartphone_type>
</product>
<product>
<article_sku>000cfxlysl</article_sku>
<brand>HERCULES</brand>
<brand_reference>HERCULES-DJCONTROL-COMPACT</brand_reference>
<category>PLATIN-1</category>
<ean13>3362934745288</ean13>
</product>
</products>
</import>
Any help is really appreciated
You imported the wrong Element class.
Change
import javax.swing.text.Element;
to
import org.w3c.dom.Element;
Also you create detail object which is a node and check that if the previously created product variable is an element and then cast detail to Element.
Node detail = productDetails.item(j);
if (product.getNodeType() == Node.ELEMENT_NODE) {
Element detailElement = (Element) detail;
the product line:
Node product = productList.item(i);
I think you should chacnge to conditional to detail.getNodetype() == Node.ELEMENT_NODE
I am trying to use an xml file to be able to access locations of user specified files however with the one I have created I keep getting a #text for one method and the other gives [Settings: null]
package tests;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import utils.Cryption;
import utils.CryptoException;
public class Test {
public static void main(String[] args) throws CryptoException, SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("D:\\Projects\\Utils\\exampleSettings.xml"));
NodeList nodes = document.getElementsByTagName("Settings");
for(int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
System.out.println(element.toString());
}
Element rootElement = document.getDocumentElement();
String hold = getSeting("debugLocation", rootElement);
System.out.println(hold);
}
private static String getSeting(String tagName, Element element) {
NodeList list = element.getElementsByTagName(tagName);
if (list != null && list.getLength() > 0) {
NodeList subList = list.item(0).getChildNodes();
if (subList != null && subList.getLength() > 0) {
return subList.item(0).getNodeName();
}
}
return null;
}
}
This is the xml file
<Settings>
<debugLocation> test </debugLocation>
</Settings>
I thought I should be getting test back but its not.
For Some reason I want to be able to read the Properties in my hibernate Configuration file, say for example I need to know what dialect is used or what database driver class. I have attempted to do this by parsing the xml But I am not getting past the attributes:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.ParserConfigurationException;
import org.xml.sax.SAXException;
/**
*
* #author User
*/
public class XmlParser {
public XmlParser() {
}
public void readXml() throws SAXException, IOException, ParserConfigurationException {
InputStream in = this.getClass().getResourceAsStream("/hibernate.cfg.xml");
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(in);
doc.getDocumentElement().normalize();
System.out.println("Root element :" + doc.getDocumentElement().getNodeName());
NodeList nList = doc.getElementsByTagName("property");
System.out.println("----------------------------");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
System.out.println("\nCurrent Element :" + nNode.getNodeName());
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
System.out.println("Driver : " + eElement.getAttribute("name"));
System.out.println("Not Sure : " + eElement.getElementsByTagName("hibernate.connection.url").item(0).getTextContent());
System.out.println(eElement.getElementsByTagNameNS("name", "hibernate.dialect").item(0).getTextContent());
}
}
}
}
My Output is:
Root element :hibernate-configuration
----------------------------
Current Element :property
Driver : hibernate.dialect
null
I need to be able to get the property values any time in my application. Is there another way or what am I missing in this method?
can you try with this
Configuration hibernateConfiguration = new Configuration().configure(new File("/hibernate.cfg.xml"));
String url = configuration.getProperty("hibernate.connection.url");
Like this, you can get all the properties, set the same somewhere in your static hashMap and use throughout your application.
You can use SAXReader to parse the file, here is the code:
import java.io.InputStream;
import java.util.Iterator;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;
public class ReadXMLFile {
public static void main(String argv[]) throws DocumentException {
new ReadXMLFile().readMyXML();
}
private void readMyXML() throws DocumentException {
InputStream in = this.getClass().getResourceAsStream(
"/hibernate.cfg.xml");
Document document = new SAXReader().read(in);
Element root = document.getRootElement();
System.out.println("Root element : " + root.getName());
Element sfNode = document.getRootElement().element("session-factory");
Iterator<Element> itr = sfNode.elementIterator("property");
System.out.println("----------Properties-----------");
while (itr.hasNext()) {
Element node = itr.next();
String name = node.attributeValue("name");
String value = node.getText().trim();
System.out.println(name + " = " + value);
}
}
}
i need to split my xml into multiple xml based on the tag start and close. For this i tried with the following code
public class XmlSplit {
public static void main(String [] args) throws Exception {
File input = new File("/home/dev702/Desktop/cadgraf-test/Data_Entry.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document doc = (Document) dbf.newDocumentBuilder().parse(input);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList) xpath.evaluate("//Data_x0020_Entry", doc, XPathConstants.NODESET);
int itemsPerFile = 500;
int fileNumber = 0;
Document currentDoc = (Document) dbf.newDocumentBuilder().newDocument();
Node rootNode;
rootNode = currentDoc.createElement("Data_x0020_Entry");
File currentFile = new File(fileNumber+".xml");
for (int i=1; i <= nodes.getLength(); i++) {
Node imported = currentDoc.importNode(nodes.item(i-1), true);
rootNode.appendChild(imported);
if (i % itemsPerFile == 0) {
writeToFile(rootNode, currentFile);
rootNode = currentDoc.createElement("Data_x0020_Entry");
currentFile = new File((++fileNumber)+".xml");
}
}
writeToFile(rootNode, currentFile);
}
private static void writeToFile(Node node, File file) throws Exception {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
}
In this am getting error on currentDoc.createElement. Am not able to compile this code and it is saying createElement not available.
Sample xml file
<?xml version="1.0" encoding="UTF-8"?>
<dataroot
xmlns:od="urn:schemas-microsoft-com:officedata"
xmlns:xsi="w3.org/2001/XMLSchema-instance" ;
xsi:noNamespaceSchemaLocation="Data%20Entry.xsd"
generated="2014-02-12T14:35:47"
>
<Data_x0020_Entry>
<ID>1004</ID>
<User>006Parthiban</User>
<Data_x0020_Entry_x0020_Date>2013-12-26T00:00:00</Data_x0020_Entry_x0020_Date>
<Publication>Daily Thanthi</Publication>
<Edition>CE City Edition</Edition>
<Location>Bangalore</Location>
</Data_x0020_Entry>
</dataroot>
I could not see any compilation issue in this code. Please check your import statements and verify that you have imported proper classes. the list of classes which I import are as below
import java.io.File;
import java.io.FileWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
i think u have imported import javax.swing.text.Document; change that to import org.w3c.dom.Document;
And if ur using Swing too than use fully qualified that for to create Document
like
org.w3c.dom.Document currentDoc = (org.w3c.dom.Document) dbf.newDocumentBuilder().newDocument();
Update
I think the root element should be "dataroot" for splitted xmls. and you need to change your code to generate correct number of xml files (look at below code)..look at the code below..i tested for
int itemsPerFile = 2; ..it is working fine..
NOTE remove ";" from input xml file after namespcae
import java.io.File;
import java.io.FileWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class XmlSplit {
public static void main(String [] args) throws Exception {
File input = new File("src/test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document doc = (Document) dbf.newDocumentBuilder().parse(input);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList) xpath.evaluate("//Data_x0020_Entry", doc, XPathConstants.NODESET);
int itemsPerFile = 2;
int fileNumber = 0;
Document currentDoc = (Document) dbf.newDocumentBuilder().newDocument();
Node rootNode;
rootNode = currentDoc.createElement("dataroot");
File currentFile = new File(fileNumber+".xml");
for (int i=1; i <= nodes.getLength(); i++) {
Node imported = currentDoc.importNode(nodes.item(i-1), true);
rootNode.appendChild(imported);
if (i % itemsPerFile == 0) {
writeToFile(rootNode, currentFile);
rootNode = currentDoc.createElement("dataroot");
currentFile = new File((++fileNumber)+".xml");
}
else
{
writeToFile(rootNode, currentFile);
}
}
}
private static void writeToFile(Node node, File file) throws Exception {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
}
let me know if u face any issues :)
This is the JAVA XML i'm parsing..
<objects>
<object>...<class>A /<class>...</object>
<object>...<class>B</class>....</object>
<object>...<class>A /<class>...</object>
</objects>
Now i split the XML into 3 XML's based on object tag with the below code.
DocumentBuilder builder = dbf.newDocumentBuilder();
Document doc = builder.parse("xml");
doc.getDocumentElement().normalize();
TransformerFactory tranFactory = TransformerFactory.newInstance();
Transformer aTransformer = tranFactory.newTransformer();
NodeList list =(NodeList) doc.getElementsByTagName("object");
System.out.println("XML SPLITED");
for (int i=0; i<list.getLength(); i++){
Node element = list.item(i).cloneNode(true);
if(element.hasChildNodes()){
Source src = new DOMSource(element);
FileOutputStream fs=new FileOutputStream("XML" + i + ".xml");
Result dest = new StreamResult(fs);
aTransformer.transform(src, dest);
fs.close();
}
My requirement is to get only the files with class tag A.So my output will be only 2 XML's.Please post your answers.
Using your example, you can do that this way:
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;
public class XmlSplitting {
private static final Logger logger = Logger.getLogger(XmlSplitting.class.getName());
private static final String FILE_PATH = "./";
private DocumentBuilder builder;
private Transformer transformer;
public XmlSplitting() throws ParserConfigurationException, TransformerConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
this.builder = factory.newDocumentBuilder();
TransformerFactory transfromerFactory = TransformerFactory.newInstance();
this.transformer = transfromerFactory.newTransformer();
}
public static void main(String[] args) {
try {
XmlSplitting s = new XmlSplitting();
s.run();
} catch (ParserConfigurationException | SAXException | IOException | TransformerException x) {
logger.log(Level.SEVERE, "Error", x);
}
}
private void run() throws ParserConfigurationException, SAXException, IOException, TransformerException {
File file = new File(FILE_PATH + "xml.xml");
if (file.exists()) {
Document document = this.builder.parse(file);
document.normalize();
NodeList list = document.getElementsByTagName("object");
for (int i = 0; i < list.getLength(); i++) {
Node node = list.item(i);
if (Node.ELEMENT_NODE == node.getNodeType()) {
Element object = (Element)node;
NodeList classes = object.getElementsByTagName("class");
if (1 == classes.getLength()) {
Node clazz = classes.item(0);
if (Node.ELEMENT_NODE == clazz.getNodeType()) {
this.copyNodeToNewFile(clazz.getTextContent(), node, i);
}
} else {
logger.log(Level.SEVERE, "Number of class nodes in node object is different than expected. Number of class nodes found: {0}.", classes.getLength());
}
}
}
} else {
logger.log(Level.SEVERE, "You provided wrong path for xml file.");
}
}
private void copyNodeToNewFile(String content, Node node, int i) throws IOException, TransformerException {
boolean copy = this.nodeShouldBeCopied(content);
logger.log(Level.INFO, "Node with content {0} will {1}be moved to separete file.", new Object[]{content, true == copy ? "" : "not "});
if (copy) {
String fileName = String.format("%sxml%d.xml", FILE_PATH, i);
try (FileOutputStream fos = new FileOutputStream(fileName)) {
Source source = new DOMSource(node);
Result destination = new StreamResult(fos);
this.transformer.transform(source, destination);
}
}
}
// here you can change condition to copy given node to file
private boolean nodeShouldBeCopied(String content) {
return content.contains("A");
}
}
How about using a Brazilian framework to parse your XML? it will make your code more exotic and you can tell your friends about it:
http://jcoltrane.sourceforge.net/userguide/extending_parsing_process.html