Making rss feed parser in java - java

I'm trying to make a function that will take url and xpath as arguments, and query the xml file from supplied url and return String results. Here's my code: `package
uforia.tests.daoTests;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.junit.Assert;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class XpathHelper {
public static final String NL = System.getProperty("line.separator");
#Test
public void testBelow() {
System.out.println(xmlQuery("http://abcnews.go.com/US/wireStory/10-things-today-19933443", "//*[#id=\"storyText\"]/p[3]"));
Assert.assertTrue(true);
}
public String xmlQuery(String url, String xpath) {
StringBuilder sb = new StringBuilder();
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); // Getting
// the
// instance
// of
// DocumentBuilderFactory
domFactory.setNamespaceAware(true); // true if the parser produced will
// provide support for XML
// namespaces;
try {
DocumentBuilder builder = domFactory.newDocumentBuilder();
// Creating document builder
Document doc = builder.parse(new URL(url).openStream()); // e.g.
XPath xPath = XPathFactory.newInstance().newXPath();
// getting instance of xPath
XPathExpression expr = xPath.compile(xpath);
// e.g. "//#id"
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
sb.append(nodes.item(i).getNodeValue()).append(NL);
}
}
catch (Exception e) {
e.printStackTrace();
}
// Think of closing connection in finnaly branch...
return sb.toString();
}
}
`
And I'm getting this error:
[Fatal Error] :37:108: The reference to entity "asset" must end with
the ';' delimiter. org.xml.sax.SAXParseException; lineNumber: 37;
columnNumber: 108; The reference to entity "asset" must end with the
';' delimiter.
I think the problem is with escaping ampersands, but I can't get it to work.
Thank for help in advance...

Related

Passing parameters to REST web-service

I'm dealing with the problem with passing parameteres to web-service.
I have created web-service which works OK for the case fromLanguage = "eng"
But, when I test service through Glassfish console and send fromLanguage = "bos" I don't get appropriate result.
package pckgTranslator;
import javax.ws.rs.GET;
import javax.ws.rs.Path;
import javax.ws.rs.PathParam;
#Path("/MyRestService/{wordToTranslate},{fromLanguage},{toLanguage}")
public class clsTranslate {
#GET
public String doGet(#PathParam("wordToTranslate") String wordToTranslate,
#PathParam("fromLanguage") String fromLanguage, #PathParam("toLanguage") String toLanguage)
throws Exception{
Translator translator = new Translator();
return translator.getTranslation(wordToTranslate,fromLanguage, toLanguage);
}
}
This is XML fajl which I try to parse:
<?xml version="1.0" encoding="utf-8" ?>
<gloss>
<word id="001">
<eng>ball</eng>
<bos>lopta</bos>
</word>
<word id="002">
<eng>house</eng>
<bos>kuca</bos>
</word>
<word id="003">
<eng>game</eng>
<bos>igra</bos>
</word>
</gloss>
And this is the class which I'm using for parsing XML.
package pckgTranslator;
import java.io.IOException;
import java.io.InputStream;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
public class Translator {
String translation = null;
String getTranslation(String wordForTransl, String fromLanguage, String toLanguage)
throws ParserConfigurationException, SAXException, IOException, XPathExpressionException {
//fromLanguage = "eng";
//toLanguage = "bos";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
InputStream is = Translator.class.getResourceAsStream("/resource/glossary.xml");
Document doc = builder.parse(new InputSource(is));
XPathFactory xpathfactory = XPathFactory.newInstance();
XPath xpath = xpathfactory.newXPath();
//XPathExpression expr = null; //xpath.compile("//word[eng='house']/bos/text()");
XPathExpression expr = xpath.compile("//word['" + wordForTransl + "'='" + wordForTransl + "']/bos/text()");
if (fromLanguage == "eng") {
expr = xpath.compile("//word[eng='" + wordForTransl + "']/bos/text()");
} else if (fromLanguage == "bos") {
expr = xpath.compile("//word[bos='" + wordForTransl + "']/eng/text()");
}
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
for (int i = 0; i < nodes.getLength(); i++) {
//System.out.println(nodes.item(i).getNodeValue());
translation = nodes.item(i).getNodeValue();
}
//return nodes.item(i).getNodeValue();
if (translation != null) {
return translation;
} else {
return "We are sorry, there is no translation for this word!";
}
}
}
It seems to me that something is wrong with the parameters fromLanguage and toLanguage, but I can't realize what exactly.
Thanks in advance.
As I mentioned in the comment, you have hardcoded fromLanguage and toLanguage variables to eng and bos at the beginning of getTranslation() method. Due to this, the fromLanguage and 'toLangugaevalues passed togetTranslation()` method are lost.
Secondly, instead of separating #PathParm by , separate those by /. It will look like:
#Path("/MyRestService/{wordToTranslate}/{fromLanguage}/{toLanguage}")
#GET
public String doGet(#PathParam("wordToTranslate") String wordToTranslate,
#PathParam("fromLanguage") String fromLanguage, #PathParam("toLanguage") String toLanguage) throws Exception
Invocation: curl -X GET http://localhost:8080/MyRestService/x/y/z
Alternatively use #QueryParam. In that case your path would be:
#Path("/MyRestService")
public String doGet(#QueryParam("wordToTranslate") String wordToTranslate,
#QueryParam("fromLanguage") String fromLanguage, #QueryParam("toLanguage") String toLanguage) throws Exception
Invocation: curl -X GET http://localhost:8080/MyRestService?wordToTranslate=x&fromLanguage=y&toLanguage=z
Remove or comment the below lines in getTranslation() method:
fromLanguage = "eng";
toLanguage = "bos";
Note: To fix your issue the above solution is sufficient. However, to make you code better please see the below suggestions.
In addition to the above I see two more issues:
You are storing translated value in translation instance variable. In case you are using the same Translator object (singleton instance) and the current translation fails, getTranslation() will return the previously translated value.
Why are you initializing expr with the below?
XPathExpression expr = xpath.compile("//word['" + wordForTransl + "'='" + wordForTransl + "']/bos/text()");
Lastly, every time you are calling getTranslation() the XML is being parsed. Instead, parse it once in init() method and then use it in getTranslation() method.
I have modified your Translator class based on the above points:
package org.openapex.samples.misc.parse.xml;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.*;
import java.io.IOException;
import java.io.InputStream;
public class ParseXMLAndTranslate {
public static void main(String[] args) throws Exception{
Translator translator = new Translator();
translator.init();
System.out.println(translator.getTranslation("house","eng", "bos"));
System.out.println(translator.getTranslation("igra","bos", "eng"));
}
private static class Translator {
//String translation = null;
private Document doc;
public void init() throws ParserConfigurationException, SAXException, IOException{
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
factory.setNamespaceAware(true);
DocumentBuilder builder = factory.newDocumentBuilder();
InputStream is = Translator.class.getResourceAsStream("/resource/glossary.xml");
this.doc = builder.parse(new InputSource(is));
}
String getTranslation(String wordForTransl, String fromLanguage, String toLanguage)
throws XPathExpressionException {
//fromLanguage = "eng";
//toLanguage = "bos";
XPathFactory xpathfactory = XPathFactory.newInstance();
XPath xpath = xpathfactory.newXPath();
//XPathExpression expr = null; //xpath.compile("//word[eng='house']/bos/text()");
//XPathExpression expr = xpath.compile("//word['" + wordForTransl + "'='" + wordForTransl + "']/bos/text()");
XPathExpression expr = null;
if (fromLanguage == "eng") {
expr = xpath.compile("//word[eng='" + wordForTransl + "']/bos/text()");
} else if (fromLanguage == "bos") {
expr = xpath.compile("//word[bos='" + wordForTransl + "']/eng/text()");
}
Object result = expr.evaluate(doc, XPathConstants.NODESET);
NodeList nodes = (NodeList) result;
String translation = null;
/*for (int i = 0; i < nodes.getLength(); i++) {
//System.out.println(nodes.item(i).getNodeValue());
translation = nodes.item(i).getNodeValue();
}*/
if(nodes.getLength() > 0){
translation = nodes.item(0).getNodeValue();
}
//return nodes.item(i).getNodeValue();
if (translation != null) {
return translation;
} else {
return "We are sorry, there is no translation for this word!";
}
}
}
}
Here is the output:
kuca
game

getting only #text and null when trying to get value from xml

I am trying to use an xml file to be able to access locations of user specified files however with the one I have created I keep getting a #text for one method and the other gives [Settings: null]
package tests;
import java.io.File;
import java.io.IOException;
import java.io.StringReader;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import utils.Cryption;
import utils.CryptoException;
public class Test {
public static void main(String[] args) throws CryptoException, SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document document = builder.parse(new File("D:\\Projects\\Utils\\exampleSettings.xml"));
NodeList nodes = document.getElementsByTagName("Settings");
for(int i = 0; i < nodes.getLength(); i++) {
Element element = (Element) nodes.item(i);
System.out.println(element.toString());
}
Element rootElement = document.getDocumentElement();
String hold = getSeting("debugLocation", rootElement);
System.out.println(hold);
}
private static String getSeting(String tagName, Element element) {
NodeList list = element.getElementsByTagName(tagName);
if (list != null && list.getLength() > 0) {
NodeList subList = list.item(0).getChildNodes();
if (subList != null && subList.getLength() > 0) {
return subList.item(0).getNodeName();
}
}
return null;
}
}
This is the xml file
<Settings>
<debugLocation> test </debugLocation>
</Settings>
I thought I should be getting test back but its not.

How to add an attribute to the root node

I have an xml as String
<color>
<name>black</name>
<color>
I want to add an attribute to root node and save a xml as String again.
<color id="1">
<name>black</name>
<color>
But I can't. Here is my code
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.xml.sax.InputSource;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
String xml = "<color><name>black</name></color>";
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xml));
Document document = builder.parse(is);
Element element = (Element) document.getFirstChild();
element.setAttribute("id", "1");
String result = document.toString();
System.out.println(result);
The output is [#document: null]. Help me please resolve my problem
You can not use document.toString() to get output. You need to convert document object back to xml string.
Please check following code. It will help to solve your problem.
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
public class test1 {
public static void main(String[] args) throws SAXException, IOException,
ParserConfigurationException {
final String xmlStr = "<color><name>black</name></color>";
Document doc = convertStringToDocument(xmlStr);
String str = convertDocumentToString(doc);
System.out.println(str);
}
private static Document convertStringToDocument(String xmlStr)
throws SAXException, IOException, ParserConfigurationException {
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
InputSource is = new InputSource(new StringReader(xmlStr));
Document document = builder.parse(is);
Element element = (Element) document.getFirstChild();
element.setAttribute("id", "1");
String result = document.toString();
System.out.println(result);
return document;
}
private static String convertDocumentToString(Document doc) {
TransformerFactory tf = TransformerFactory.newInstance();
Transformer transformer;
try {
transformer = tf.newTransformer();
StringWriter writer = new StringWriter();
transformer.transform(new DOMSource(doc), new StreamResult(writer));
String output = writer.getBuffer().toString();
return output;
} catch (TransformerException e) {
e.printStackTrace();
}
return null;
}
}

splitting xml ino mutiple xmls based on xml tags using java

i need to split my xml into multiple xml based on the tag start and close. For this i tried with the following code
public class XmlSplit {
public static void main(String [] args) throws Exception {
File input = new File("/home/dev702/Desktop/cadgraf-test/Data_Entry.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document doc = (Document) dbf.newDocumentBuilder().parse(input);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList) xpath.evaluate("//Data_x0020_Entry", doc, XPathConstants.NODESET);
int itemsPerFile = 500;
int fileNumber = 0;
Document currentDoc = (Document) dbf.newDocumentBuilder().newDocument();
Node rootNode;
rootNode = currentDoc.createElement("Data_x0020_Entry");
File currentFile = new File(fileNumber+".xml");
for (int i=1; i <= nodes.getLength(); i++) {
Node imported = currentDoc.importNode(nodes.item(i-1), true);
rootNode.appendChild(imported);
if (i % itemsPerFile == 0) {
writeToFile(rootNode, currentFile);
rootNode = currentDoc.createElement("Data_x0020_Entry");
currentFile = new File((++fileNumber)+".xml");
}
}
writeToFile(rootNode, currentFile);
}
private static void writeToFile(Node node, File file) throws Exception {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
}
In this am getting error on currentDoc.createElement. Am not able to compile this code and it is saying createElement not available.
Sample xml file
<?xml version="1.0" encoding="UTF-8"?>
<dataroot
xmlns:od="urn:schemas-microsoft-com:officedata"
xmlns:xsi="w3.org/2001/XMLSchema-instance" ;
xsi:noNamespaceSchemaLocation="Data%20Entry.xsd"
generated="2014-02-12T14:35:47"
>
<Data_x0020_Entry>
<ID>1004</ID>
<User>006Parthiban</User>
<Data_x0020_Entry_x0020_Date>2013-12-26T00:00:00</Data_x0020_Entry_x0020_Date>
<Publication>Daily Thanthi</Publication>
<Edition>CE City Edition</Edition>
<Location>Bangalore</Location>
</Data_x0020_Entry>
</dataroot>
I could not see any compilation issue in this code. Please check your import statements and verify that you have imported proper classes. the list of classes which I import are as below
import java.io.File;
import java.io.FileWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
i think u have imported import javax.swing.text.Document; change that to import org.w3c.dom.Document;
And if ur using Swing too than use fully qualified that for to create Document
like
org.w3c.dom.Document currentDoc = (org.w3c.dom.Document) dbf.newDocumentBuilder().newDocument();
Update
I think the root element should be "dataroot" for splitted xmls. and you need to change your code to generate correct number of xml files (look at below code)..look at the code below..i tested for
int itemsPerFile = 2; ..it is working fine..
NOTE remove ";" from input xml file after namespcae
import java.io.File;
import java.io.FileWriter;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class XmlSplit {
public static void main(String [] args) throws Exception {
File input = new File("src/test.xml");
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
Document doc = (Document) dbf.newDocumentBuilder().parse(input);
XPath xpath = XPathFactory.newInstance().newXPath();
NodeList nodes = (NodeList) xpath.evaluate("//Data_x0020_Entry", doc, XPathConstants.NODESET);
int itemsPerFile = 2;
int fileNumber = 0;
Document currentDoc = (Document) dbf.newDocumentBuilder().newDocument();
Node rootNode;
rootNode = currentDoc.createElement("dataroot");
File currentFile = new File(fileNumber+".xml");
for (int i=1; i <= nodes.getLength(); i++) {
Node imported = currentDoc.importNode(nodes.item(i-1), true);
rootNode.appendChild(imported);
if (i % itemsPerFile == 0) {
writeToFile(rootNode, currentFile);
rootNode = currentDoc.createElement("dataroot");
currentFile = new File((++fileNumber)+".xml");
}
else
{
writeToFile(rootNode, currentFile);
}
}
}
private static void writeToFile(Node node, File file) throws Exception {
Transformer transformer = TransformerFactory.newInstance().newTransformer();
transformer.transform(new DOMSource(node), new StreamResult(new FileWriter(file)));
}
}
let me know if u face any issues :)

Parsing xml document Java "cannot be resolved"

I'm going through a tutorial on how to parse an xml document with java and encountering a problem. I am getting the error "dom cannot be resolved" I know it has something to do with the way I am declaring the variables and being out of scope but I can't figure out how to fix it.
Any help would be greatly appreciated, I will post the relevant parts below:
package com.xmlparse;
import java.io.IOException;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import com.entities.Employee;
public class XmlParser
{
private void parseXmlFile(){
//get the factory
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
try {
//Using factory get an instance of document builder
DocumentBuilder db = dbf.newDocumentBuilder();
//parse using builder to get DOM representation of the XML file
Document dom = db.parse("test.xml");
} catch(ParserConfigurationException pce) {
pce.printStackTrace();
} catch(SAXException se) {
se.printStackTrace();
} catch(IOException ioe) {
ioe.printStackTrace();
}
}
private void parseDocument() {
Document dom = db.parse("test.xml");
//get the root element
Element docEle = dom.getDocumentElement();
//get a nodelist of elements
NodeList nl = docEle.getElementsByTagName("Employee");
if(nl != null && nl.getLength() > 0) {
for(int i = 0 ; i < nl.getLength(); i++) {
//get the employee element
Element el = (Element)nl.item(i);
//get the Employee object
Employee e = getEmployee(el);
//add it to list
myEmpls.add(e);
}
}
}
As you are using DocumentBuilder db in different methods, you could declare db as a class member variable:
private DocumentBuilder db;
and initialize like so in parseXmlFile:
db = dbf.newDocumentBuilder();
You could change method signature like below and when call to it pass the created document builder instance.
private void parseDocument(DocumentBuilder db)

Categories