Help with URL as InputSource - java

So I have been asking questions here trying to get the answer for myself but I just cant get it to work without running into a new error. If anyone can help me I would appreciate it. I want to replace this portion
"<a>\n" +
"<b>\n" +
"<c id=\"00001\" time=\"1:00\" day=\"Friday\" name1=\"John\" name2=\"Mary\"></c>\n" +
"<c id=\"00002\" time=\"2:00\" day=\"Monday\" name1=\"Ed\" name2=\"Kate\"></c>\n" +
"<c id=\"00003\" time=\"3:00\" day=\"Sunday\" name1=\"Mary\" name2=\"Ed\"></c>\n" +
"<c id=\"00004\" time=\"4:00\" day=\"Friday\" name1=\"Kate\" name2=\"John\"></c>\n" +
"</b>\n" +
"</a>"
with a XML url instead, as that information will be pulled from a server as the data changes.
Here is the source as you can see what I am trying accomplish once I have the data from the xml file. It works fine as it is, but whenever I try and implement a url as the InputSource I get tons of errors that no matter what ive tried does not resolve the problem.
package com.newxpath;
import java.io.StringReader;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import android.app.Activity;
import android.os.Bundle;
import android.widget.EditText;
public class NewxpathActivity extends Activity {
#Override
public void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.main);
InputSource xml = new InputSource(new StringReader("<a>\n" +
"<b>\n" +
"<c id=\"00001\" time=\"1:00\" day=\"Friday\" name1=\"John\" name2=\"Mary\"></c>\n" +
"<c id=\"00002\" time=\"2:00\" day=\"Monday\" name1=\"Ed\" name2=\"Kate\"></c>\n" +
"<c id=\"00003\" time=\"3:00\" day=\"Sunday\" name1=\"Mary\" name2=\"Ed\"></c>\n" +
"<c id=\"00004\" time=\"4:00\" day=\"Friday\" name1=\"Kate\" name2=\"John\"></c>\n" +
"</b>\n" +
"</a>"));
String name = "Ed";
XPath xpath = XPathFactory.newInstance().newXPath();
String expr = String.format("//a/b/c[#name2='%s']", name);
Node c = null;
try {
c = (Node) xpath.evaluate(expr, xml, XPathConstants.NODE);
} catch (XPathExpressionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
NamedNodeMap attribs = c.getAttributes();
String id = attribs.getNamedItem("id").getNodeValue();
String time = attribs.getNamedItem("time").getNodeValue();
// etc.
EditText id2 = (EditText) findViewById(R.id.id2);
EditText time2 = (EditText) findViewById(R.id.time2);
id2.setText(String.valueOf(id));
time2.setText(String.valueOf(time));
}
}

You probably need to add the INTERNET permission to your AndroidManifest.xml file (I assume from the Android imports that this is taking place on Android). Otherwise I don't see why this wouldn't work. I copied your XML to the URL http://pastebin.com/raw.php?i=RF8cL5YZ and then ran it against the following code at a command line and it worked just fine.
import java.io.StringReader;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.xml.sax.InputSource;
import java.net.*;
public class test
{
public static void main(String[] args) throws Exception
{
URL url = new URL("http://pastebin.com/raw.php?i=RF8cL5YZ");
InputSource xml = new InputSource(url.openStream());
String name = "Ed";
XPath xpath = XPathFactory.newInstance().newXPath();
String expr = String.format("//a/b/c[#name2='%s']", name);
Node c = null;
try {
c = (Node) xpath.evaluate(expr, xml, XPathConstants.NODE);
} catch (XPathExpressionException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
NamedNodeMap attribs = c.getAttributes();
String id = attribs.getNamedItem("id").getNodeValue();
String time = attribs.getNamedItem("time").getNodeValue();
// etc.
System.out.println("["+String.valueOf(id)+"]["+String.valueOf(time)+"]");
}
}

Related

javax.xml.xpath.XPath.evalute method could not resolve simple xpath

I need to design a utility which searches node using input xpath in XML document and set its value as per given input.
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathFactory;
import org.apache.commons.io.FileUtils;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
public class XMLUtil
{
private XPath xPath;
private Document xmlDoc;
private String xmlName;
public XMLUtil(String xmlName) {
this.xmlName = xmlName;
System.out.println(System.getProperty("user.dir"));
File srcXMLFile = new File(System.getProperty("user.dir") + xmlName);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
try {
xmlDoc = factory.newDocumentBuilder().parse(srcXMLFile);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
xPath = XPathFactory.newInstance().newXPath();
}
public void changeXMLNodeValue(String xpath, String query) {
try {
Node node = (Node) xPath.compile(xpath).evaluate(xmlDoc, XPathConstants.NODE);
node.setTextContent(query);
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public static void main(String[] args) {
XMLUtil xmlUtil = new XMLUtil("tc01.xml");
xmlUtil.changeXMLNodeValue("//Customer/State/City","Hyderabad");
}
}
XML File:tc01.xml
<?xml version="1.0" encoding="UTF-8"?>
<Customer>
<State>
<value>Andhra</value>
<City>
<value>amravati</value>
</City>
</State>
</Customer>
Running the code is throwing NullPointerException at changeXMLNodeValue method on trying to compile and evalute the xpath to fetch the node.
Node node = (Node) xPath.compile(xpath).evaluate(xmlDoc, XPathConstants.NODE);
This is a simple xml without any namespace requirement.
I am using the API first please let me know if I missed something.

Jsoup Reddit Image Scraper Over 18 issue

I'm working on an image scraper that scrapes the first page of various subreddits using JSOUP. The issue that arises however is when attempting to scrape a NSFW subreddit, reddit redirects to an over 18 authentication page and the scraper scrapes the authentication page instead. I'm new to scraping and understand this is a noob question, but any help would be much appreciated as I am totally lost.
import java.io.IOException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.io.*;
import java.net.URL;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.io.*;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Attributes;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.net.URL;
import java.util.Scanner;
public class javascraper{
public static final String USER_AGENT = "<User-Agent: github.com/dabeermasood:v1.2.3 (by /u/swedenotswiss)>";
public static void main (String[]args) throws MalformedURLException
{
Scanner scan = new Scanner (System.in);
System.out.println("Where do you want to store the files?");
String folderpath = scan.next();
System.out.println("What subreddit do you want to scrape?");
String subreddit = scan.next();
subreddit = ("http://reddit.com/r/" + subreddit);
new File(folderpath + "/" + subreddit).mkdir();
//test
try{
//gets http protocol
Document doc = Jsoup.connect(subreddit).userAgent(USER_AGENT).timeout(0).get();
//get page title
String title = doc.title();
System.out.println("title : " + title);
//get all links
Elements links = doc.select("a[href]");
for(Element link : links){
//get value from href attribute
String checkLink = link.attr("href");
Elements images = doc.select("img[src~=(?i)\\.(png|jpe?g|gif)]");
if (imgCheck(checkLink)){ // checks to see if img link j
System.out.println("link : " + link.attr("href"));
downloadImages(checkLink, folderpath);
}
}
}
catch (IOException e){
e.printStackTrace();
}
}
public static boolean imgCheck(String http){
String png = ".png";
String jpg = ".jpg";
String jpeg = "jpeg"; // no period so checker will only check last four characaters
String gif = ".gif";
int length = http.length();
if (http.contains(png)|| http.contains("gfycat") || http.contains(jpg)|| http.contains(jpeg) || http.contains(gif)){
return true;
}
else{
return false;
}
}
private static void downloadImages(String src, String folderpath) throws IOException{
String folder = null;
//Exctract the name of the image from the src attribute
int indexname = src.lastIndexOf("/");
if (indexname == src.length()) {
src = src.substring(1, indexname);
}
indexname = src.lastIndexOf("/");
String name = src.substring(indexname, src.length());
System.out.println(name);
//Open a URL Stream
URLConnection connection = (new URL(src)).openConnection();
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
} //Delay to comply with rate limiting
connection.setRequestProperty("User-Agent", USER_AGENT);
InputStream in = connection.getInputStream();
OutputStream out = new BufferedOutputStream(new FileOutputStream( folderpath+ name));
for (int b; (b = in.read()) != -1;) {
out.write(b);
}
out.close();
in.close();
}
}
I've posted an answer to authenticate against the server using Jsoup in this link. Basically you need to POST your login ID & password and other required data to the server using:
Connection.Response res = Jsoup.connect(url).data(...).method(Method.Post).execute();, then save the response cookie from the server to keep your session authenticated.

Making rss feed parser in java

I'm trying to make a function that will take url and xpath as arguments, and query the xml file from supplied url and return String results. Here's my code: `package
uforia.tests.daoTests;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URL;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathFactory;
import org.junit.Assert;
import org.junit.Test;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
public class XpathHelper {
public static final String NL = System.getProperty("line.separator");
#Test
public void testBelow() {
System.out.println(xmlQuery("http://abcnews.go.com/US/wireStory/10-things-today-19933443", "//*[#id=\"storyText\"]/p[3]"));
Assert.assertTrue(true);
}
public String xmlQuery(String url, String xpath) {
StringBuilder sb = new StringBuilder();
DocumentBuilderFactory domFactory = DocumentBuilderFactory.newInstance(); // Getting
// the
// instance
// of
// DocumentBuilderFactory
domFactory.setNamespaceAware(true); // true if the parser produced will
// provide support for XML
// namespaces;
try {
DocumentBuilder builder = domFactory.newDocumentBuilder();
// Creating document builder
Document doc = builder.parse(new URL(url).openStream()); // e.g.
XPath xPath = XPathFactory.newInstance().newXPath();
// getting instance of xPath
XPathExpression expr = xPath.compile(xpath);
// e.g. "//#id"
NodeList nodes = (NodeList) expr.evaluate(doc, XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
sb.append(nodes.item(i).getNodeValue()).append(NL);
}
}
catch (Exception e) {
e.printStackTrace();
}
// Think of closing connection in finnaly branch...
return sb.toString();
}
}
`
And I'm getting this error:
[Fatal Error] :37:108: The reference to entity "asset" must end with
the ';' delimiter. org.xml.sax.SAXParseException; lineNumber: 37;
columnNumber: 108; The reference to entity "asset" must end with the
';' delimiter.
I think the problem is with escaping ampersands, but I can't get it to work.
Thank for help in advance...

Regarding XPath using Java

I have a problem in getting the value of an element by providing the XPath using java. I tried lot of things but could not succeed.
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpression;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.dell.logistics.framework.transform.NamespaceContext;
public class GetXPath {
protected Object evaluate(String xpathStr, String xml, String namespaces) throws XPathExpressionException {
InputSource inputSource = new InputSource(new StringReader(xml));
XPathFactory factory = XPathFactory.newInstance();
XPath xPath = factory.newXPath();
NamespaceContext nsContext = new NamespaceContext();
nsContext.setNamespacesMap(getNsMap(namespaces));
//System.out.println(nsContext.getPrefix(namespaces));
xPath.setNamespaceContext(nsContext);
XPathExpression xpExp = xPath.compile(xpathStr);
return xpExp.evaluate(inputSource, XPathConstants.NODESET);
}
private Map<String, String> getNsMap(String namespaces) {
String delims = ",";
String[] nsKeyValue = namespaces.split(delims);
Map<String, String> mp = new HashMap<String, String>();
for (String string : nsKeyValue) {
mp.put(string.split("=")[0], string.split("=")[1]);
System.out.println(string.split("=")[0] + string.split("=")[1]);
}
return mp;
}
public static String readFile(String fileName) {
try {
// InputStream is = null;
InputStream is = GetWorkOrderDataExtractor.class.getResourceAsStream(fileName);
BufferedReader br = new BufferedReader(new InputStreamReader(is));
StringBuffer sb = new StringBuffer();
String l = null;
while ((l = br.readLine()) != null) {
sb.append(l).append("\n");
}
return sb.toString();
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
public static void main(String[] args)
throws ParserConfigurationException, SAXException,
IOException, XPathExpressionException {
GetXPath g = new GetXPath();
String xml = readFile("fooewo.xml");
String value = null;
System.out.println(xml);
NodeList containerNodes = (NodeList) g.evaluate(
"/demo",xml,
"a=http://schemas.demo.com/it/WorkOrderChannelAckNackResponse/1.0");
try{
for (int i = 0; i < containerNodes.getLength(); i++) {
// get the node value.
value = containerNodes.item(i).getTextContent();
System.out.println(value);
}
System.out.println("Node Found : " + containerNodes.getLength() + " times");
}
catch (Exception e)
{
e.printStackTrace();
}
}
}
"
XML file:
<?xml version="1.0" encoding="utf-8"?>
<demo xmlns="try with ur schema">
<test>
<value>10</value>
<color>red</color>
<animal>dog</animal>
<day>13</day>
<age>22</age>
</test>
<test>
<value>20</value>
<color>green</color>
<animal>cat</animal>
<day>12</day>
<age>23</age>
</test>
</demo>
Any help appreciated.
Thanks,
Pradeep
I think the best way to evaluate XPath easily is using AXIOMXPath
Here is an example,
OMElement documentElement = new StAXOMBuilder(inStreamToXML).getDocumentElement();
AXIOMXPath xpathExpression = new AXIOMXPath ("/demo");
List nodeList = (OMNode)xpathExpression.selectNodes(documentElement);
By traversing the list you can get the result easily.

DocumentBuilder.parse(String Uri) is returning the IOException

package com.converter;
import java.io.IOException;
import java.net.URI;
import java.net.URL;
import java.util.StringTokenizer;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXParseException;
//import XmlReader.java;
public class XMLReader {
public Float value = 25f;
public XMLReader(){
String parseString = "";
try {
DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
// dbf.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
DocumentBuilder db = dbf.newDocumentBuilder();
URI uri = new URI("http://themoneyconverter.com/USD/rss.xml");
****Document doc = db.parse(uri.toString());****
doc.getDocumentElement().normalize();
NodeList nodeLst = doc.getElementsByTagName("description");
int length = nodeLst.getLength();
for (int s = 0; s < length; s++) {
Node fstNode = nodeLst.item(s);
parseString = fstNode.getTextContent();
if(parseString.contains("Indian Rupee")){
System.out.println(parseString);
StringTokenizer parser = new StringTokenizer(parseString,"=");
parser.nextToken();
StringTokenizer parser1 = new StringTokenizer(parser.nextToken());
value = Float.valueOf(parser1.nextToken());
System.out.println(value);
}
}
} catch (SAXParseException e) {
value = 30f;
e.printStackTrace();
}catch (IOException e) {
value = 33f;
e.printStackTrace();
}catch (Exception e) {
value = 32f;
e.printStackTrace();
}
}
}
This one is working for me outputing:
1 US Dollar = 45.92697 Indian Rupee
45.92697
So I guess you have some network problems accessing the resource. Check your firewall settings, anti virus programs, etc.
Or maybe paste the stack trace here if you need more help ;)
Cheers!

Categories