Not able to take screenShot using HtmlUnitDriver [Selenium WebDriver java] - java

I am wanting to take a screenshot of a page using HtmlUnitDriver I came across this Link where this guy has made a custom HTML unit driver to take the screenshot.
But unfortunately, while implementing that I am getting an exception.
"Exception in thread "main" java.lang.ClassCastException: [B cannot be cast to java.io.File
at Test.main(Test.java:39)"
My code is as follows-
import java.io.File;
import java.io.IOException;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.WebDriver;
import com.gargoylesoftware.htmlunit.BrowserVersion;
public class Test extends ScreenCaptureHtmlUnitDriver {
public static void main(String[] args) throws InterruptedException, IOException {
WebDriver driver = new ScreenCaptureHtmlUnitDriver(BrowserVersion.FIREFOX_38);
driver.get("https://www.google.com/?gws_rd=ssl");
try{
File scrFile = ((ScreenCaptureHtmlUnitDriver) driver).getScreenshotAs(OutputType.FILE);
FileUtils.copyFile(scrFile, new File("D:\\TEMP.PNG"));
}catch (Exception e) {
e.printStackTrace();
}
}
}
HtmlUnit driver which I am using(the one which is in the link) is this-
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URL;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipOutputStream;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.io.IOUtils;
import org.openqa.selenium.Capabilities;
import org.openqa.selenium.OutputType;
import org.openqa.selenium.TakesScreenshot;
import org.openqa.selenium.WebDriverException;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.internal.Base64Encoder;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebWindow;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class ScreenCaptureHtmlUnitDriver extends HtmlUnitDriver implements TakesScreenshot {
private static Map<String, byte[]> imagesCache = Collections.synchronizedMap(new HashMap<String, byte[]>());
private static Map<String, String> cssjsCache = Collections.synchronizedMap(new HashMap<String, String>());
// http://stackoverflow.com/questions/4652777/java-regex-to-get-the-urls-from-css
private final static Pattern cssUrlPattern = Pattern.compile("background(-image)?[\\s]*:[^url]*url[\\s]*\\([\\s]*([^\\)]*)[\\s]*\\)[\\s]*");// ?<url>
public ScreenCaptureHtmlUnitDriver() {
super();
}
public ScreenCaptureHtmlUnitDriver(boolean enableJavascript) {
super(enableJavascript);
}
public ScreenCaptureHtmlUnitDriver(Capabilities capabilities) {
super(capabilities);
}
public ScreenCaptureHtmlUnitDriver(BrowserVersion version) {
super(version);
DesiredCapabilities var = ((DesiredCapabilities) getCapabilities());
var.setCapability(CapabilityType.TAKES_SCREENSHOT, true);
}
//#Override
#SuppressWarnings("unchecked")
public <X> X getScreenshotAs(OutputType<X> target) throws WebDriverException {
byte[] archive = new byte[0];
try {
archive = downloadCssAndImages(getWebClient(), (HtmlPage) getCurrentWindow().getEnclosedPage());
} catch (Exception e) {
}
if(target.equals(OutputType.BASE64)){
return target.convertFromBase64Png(new Base64Encoder().encode(archive));
}
if(target.equals(OutputType.BYTES)){
return (X) archive;
}
return (X) archive;
}
// http://stackoverflow.com/questions/2244272/how-can-i-tell-htmlunits-webclient-to-download-images-and-css
protected byte[] downloadCssAndImages(WebClient webClient, HtmlPage page) throws Exception {
WebWindow currentWindow = webClient.getCurrentWindow();
Map<String, String> urlMapping = new HashMap<String, String>();
Map<String, byte[]> files = new HashMap<String, byte[]>();
WebWindow window = null;
try {
window = webClient.getWebWindowByName(page.getUrl().toString()+"_screenshot");
webClient.getPage(window, new WebRequest(page.getUrl()));
} catch (Exception e) {
window = webClient.openWindow(page.getUrl(), page.getUrl().toString()+"_screenshot");
}
String xPathExpression = "//*[name() = 'img' or name() = 'link' and (#type = 'text/css' or #type = 'image/x-icon') or #type = 'text/javascript']";
List<?> resultList = page.getByXPath(xPathExpression);
Iterator<?> i = resultList.iterator();
while (i.hasNext()) {
try {
HtmlElement el = (HtmlElement) i.next();
String resourceSourcePath = el.getAttribute("src").equals("") ? el.getAttribute("href") : el
.getAttribute("src");
if (resourceSourcePath == null || resourceSourcePath.equals(""))
continue;
URL resourceRemoteLink = page.getFullyQualifiedUrl(resourceSourcePath);
String resourceLocalPath = mapLocalUrl(page, resourceRemoteLink, resourceSourcePath, urlMapping);
urlMapping.put(resourceSourcePath, resourceLocalPath);
if (!resourceRemoteLink.toString().endsWith(".css")) {
byte[] image = downloadImage(webClient, window, resourceRemoteLink);
files.put(resourceLocalPath, image);
} else {
String css = downloadCss(webClient, window, resourceRemoteLink);
for (String cssImagePath : getLinksFromCss(css)) {
URL cssImagelink = page.getFullyQualifiedUrl(cssImagePath.replace("\"", "").replace("\'", "")
.replace(" ", ""));
String cssImageLocalPath = mapLocalUrl(page, cssImagelink, cssImagePath, urlMapping);
files.put(cssImageLocalPath, downloadImage(webClient, window, cssImagelink));
}
files.put(resourceLocalPath, replaceRemoteUrlsWithLocal(css, urlMapping)
.replace("resources/", "./").getBytes());
}
} catch (Exception e) {
}
}
String pagesrc = replaceRemoteUrlsWithLocal(page.getWebResponse().getContentAsString(), urlMapping);
files.put("page.html", pagesrc.getBytes());
webClient.setCurrentWindow(currentWindow);
return createZip(files);
}
String downloadCss(WebClient webClient, WebWindow window, URL resourceUrl) throws Exception {
if (cssjsCache.get(resourceUrl.toString()) == null) {
cssjsCache.put(resourceUrl.toString(), webClient.getPage(window, new WebRequest(resourceUrl))
.getWebResponse().getContentAsString());
}
return cssjsCache.get(resourceUrl.toString());
}
byte[] downloadImage(WebClient webClient, WebWindow window, URL resourceUrl) throws Exception {
if (imagesCache.get(resourceUrl.toString()) == null) {
imagesCache.put(
resourceUrl.toString(),
IOUtils.toByteArray(webClient.getPage(window, new WebRequest(resourceUrl)).getWebResponse()
.getContentAsStream()));
}
return imagesCache.get(resourceUrl.toString());
}
public static byte[] createZip(Map<String, byte[]> files) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream();
ZipOutputStream zipfile = new ZipOutputStream(bos);
Iterator<String> i = files.keySet().iterator();
String fileName = null;
ZipEntry zipentry = null;
while (i.hasNext()) {
fileName = i.next();
zipentry = new ZipEntry(fileName);
zipfile.putNextEntry(zipentry);
zipfile.write(files.get(fileName));
}
zipfile.close();
return bos.toByteArray();
}
List<String> getLinksFromCss(String css) {
List<String> result = new LinkedList<String>();
Matcher m = cssUrlPattern.matcher(css);
while (m.find()) { // find next match
result.add( m.group(2));
}
return result;
}
String replaceRemoteUrlsWithLocal(String source, Map<String, String> replacement) {
for (String object : replacement.keySet()) {
// background:url(http://org.com/images/image.gif)
source = source.replace(object, replacement.get(object));
}
return source;
}
String mapLocalUrl(HtmlPage page, URL link, String path, Map<String, String> replacementToAdd) throws Exception {
String resultingFileName = "resources/" + FilenameUtils.getName(link.getFile());
replacementToAdd.put(path, resultingFileName);
return resultingFileName;
}
}
UPDATE
Code provided by Andrew works- but I wanted to know if there is a way by which we can download only selected resources. For eg this website I would like to download only captcha image those id is "//*[#id='cimage']" because downloading all the resources will take a long time. Is there a way by which we can download only the specific resource. Because with the existing code provided
below all the resources get downloaded.
byte[] zipFileBytes = ((ScreenCaptureHtmlUnitDriver) driver).getScreenshotAs(OutputType.BYTES);
FileUtils.writeByteArrayToFile(new File("D:\\TEMP.PNG"), zipFileBytes);

The error says that the code is trying to convert a byte[] to a File. It's easy to see why if you just strip out the unused paths from getScreenshotAs:
public <X> X getScreenshotAs(OutputType<X> target) throws WebDriverException {
byte[] archive = new byte[0];
try {
archive = downloadCssAndImages(getWebClient(), (HtmlPage) getCurrentWindow().getEnclosedPage());
} catch (Exception e) {
}
return (X) archive;
}
There's no way you can get a File out of that. OutputType.FILE is not supported, so you have to handle file output yourself. Luckily, that's easy. You can change your code to:
byte[] zipFileBytes = ((ScreenCaptureHtmlUnitDriver) driver).getScreenshotAs(OutputType.BYTES);
FileUtils.writeByteArrayToFile(new File("D:\\TEMP.PNG"), zipFileBytes);
See FileUtils.writeByteArrayToFile() for more.

Check this out this may helpful for you
File scrFile = ((TakesScreenshot)driver).getScreenshotAs(OutputType.FILE);
FileUtils.copyFile(scrFile, new File("C:/Users/home/Desktop/screenshot.png"));// copy it somewhere

Related

Calling forEach on a stream causes java.lang.IllegalStateException

I have a program which does the following:
Stores a file name in Main method
Passes that file to the below method(StreamParser)from Main
Method StreamParser reads that file as Stream
StreamParser should return Stream
In main method when I call forEach on purchaseEventStream it gives an error in line
purchaseEventStream.forEach(purchaseEvent -> {
Exception in thread "main" java.lang.IllegalStateException: source already consumed or
closed
at java.base/java.util.stream.AbstractPipeline.sourceSpliterator(AbstractPipeline.java:409)
at java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at java.base/java.util.stream.ReferencePipeline.forEach(ReferencePipeline.java:497)
at com.cognitree.internship.streamprocessing.Main.main(Main.java:22)
StreamParser Class
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.stream.Stream;
public class StreamParser {
public Stream<PurchaseEvent> parser(String fileName) throws IOException {
Stream<PurchaseEvent> purchaseEventStream;
try (Stream<String> lines = Files.lines(Paths.get(fileName))) {
purchaseEventStream= lines.map(line -> {
String[] fields = line.split(",");
PurchaseEvent finalPurchaseEvent = new PurchaseEvent();
finalPurchaseEvent.setSessionId(fields[0]);
finalPurchaseEvent.setTimeStamp(fields[1]);
finalPurchaseEvent.setItemId(fields[2]);
finalPurchaseEvent.setPrice(fields[3]);
finalPurchaseEvent.setQuantity(fields[4]);
return finalPurchaseEvent;
});
return purchaseEventStream;
}
}
}
Main Class
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
public class Main {
public static void main(String[] args) throws IOException {
OutputStreamWriter outputStream = new OutputStreamWriter(new
FileOutputStream("output1.txt"));
String file = "/Users/mohit/intern-mohit/yoochoose-buys.dat";
StreamParser streamParser = new StreamParser();
List<ReportGenerator> reports = new ArrayList<>();
PurchaseEventCount purchaseEventCount = new PurchaseEventCount();
QuantityPerSession quantityPerSession = new QuantityPerSession();
SessionCount sessionCount = new SessionCount();
reports.add(purchaseEventCount);
reports.add(sessionCount);
reports.add(quantityPerSession);
Stream<PurchaseEvent> purchaseEventStream = streamParser.parser(file);
purchaseEventStream.forEach(purchaseEvent -> {
for (ReportGenerator report : reports) {
report.generateReports(purchaseEvent);
}
});
reports.forEach(report -> {
try {
report.printReports(outputStream);
} catch (IOException e) {
e.printStackTrace();
}
});
}
}
Why am i getting the error?
A stream in java is not a collection. It does not store data. You should create and return a collection from method parser() in class StreamParser and then create a stream from the returned collection.
I rewrote your StreamParser class to return a List.
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.List;
import java.util.stream.Collectors;
public class StreamParser {
public List<PurchaseEvent> parser(String fileName) throws IOException {
List<PurchaseEvent> purchaseEventStream = Files.lines(Paths.get(fileName))
.map(line -> {
String[] fields = line.split(",");
PurchaseEvent finalPurchaseEvent = new PurchaseEvent();
finalPurchaseEvent.setSessionId(fields[0]);
finalPurchaseEvent.setTimeStamp(fields[1]);
finalPurchaseEvent.setItemId(fields[2]);
finalPurchaseEvent.setPrice(fields[3]);
finalPurchaseEvent.setQuantity(fields[4]);
return finalPurchaseEvent;
})
.collect(Collectors.toList());
return purchaseEventStream;
}
}
And I changed your Main class accordingly.
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) throws IOException {
OutputStreamWriter outputStream = new OutputStreamWriter(new FileOutputStream("output1.txt"));
String file = "/Users/mohit/intern-mohit/yoochoose-buys.dat";
StreamParser streamParser = new StreamParser();
List<ReportGenerator> reports = new ArrayList<>();
PurchaseEventCount purchaseEventCount = new PurchaseEventCount();
QuantityPerSession quantityPerSession = new QuantityPerSession();
SessionCount sessionCount = new SessionCount();
reports.add(purchaseEventCount);
reports.add(sessionCount);
reports.add(quantityPerSession);
List<PurchaseEvent> purchaseEventStream = streamParser.parser(file);
purchaseEventStream.forEach(purchaseEvent -> {
for (ReportGenerator report : reports) {
report.generateReports(purchaseEvent);
}
});
reports.forEach(report -> {
try {
report.printReports(outputStream);
}
catch (IOException e) {
e.printStackTrace();
}
});
}
}

How to access many resource images in a folder in a jar in Java?

I have got this class for loading blue images, which works fine in Eclipse but not in the exported jar. How can I access all the blue images in the folder (directory) called "blue" without knowing the names of the images?
public class Blue
{
public static void read() throws Exception
{
File directoryBlueImages = new File(
Blue.class.getResource("blue").getFile());
String[] blueImages = directoryBlueImages.list();
List<BufferedImage> blueImagesList = new ArrayList<>();
for (String blueImage : java.util.Objects.requireNonNull(blueImages))
{
blueImagesList.add(ImageIO
.read(Blue.class.getResourceAsStream("blue/" + blueImage)));
}
ApplicationImages.setBlueImages(blueImagesList);
}
}
UPDATE
I have tried this, but it does not work either. I am getting a NullPointer exception. I tried "/blue" and "blue" and even ".blue".
import java.awt.image.BufferedImage;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.stream.Stream;
import javax.imageio.ImageIO;
import vokabeltrainer.ApplicationImages;
public class Blue
{
public static void read() throws Exception
{
List<BufferedImage> blueImagesList = new ArrayList<>();
try (Stream<Path> pathStream = Files.walk(Paths.get(Blue.class
.getClassLoader().getResource("blue").toURI().toURL().getPath()))
.filter(Files::isRegularFile))
{
for (Path file : (Iterable<Path>) pathStream::iterator)
{
blueImagesList.add(ImageIO
.read(Blue.class.getResourceAsStream(file.toString())));
;
}
}
ApplicationImages.setBlueImages(blueImagesList);
}
}
I adapted an answer from How to list the files inside a JAR file?
First I distinguish wether I am running from jar or Eclipse:
try
{
Blue.readZip(); // when inside jar
}
catch (Exception e)
{
try
{
Blue.read(); // during development
}
catch (Exception e1)
{
System.out.println("Could not read blue.");
e1.printStackTrace();
}
}
Then class Blue looks like this:
public class Blue
{
private static List<BufferedImage> blueImagesList = new ArrayList<>();
public static void read() throws Exception
{
File directoryBlueImages = new File(
Blue.class.getResource("blue").getFile());
String[] blueImages = directoryBlueImages.list();
for (String blueImage : java.util.Objects.requireNonNull(blueImages))
{
blueImagesList.add(ImageIO
.read(Blue.class.getResourceAsStream("blue/" + blueImage)));
}
ApplicationImages.setBlueImages(blueImagesList);
}
public static void readZip() throws Exception
{
CodeSource src = Blue.class.getProtectionDomain().getCodeSource();
if (src != null)
{
URL jar = src.getLocation();
ZipFile zipFile = new ZipFile(jar.getFile());
ZipInputStream zip = new ZipInputStream(jar.openStream());
while (true)
{
ZipEntry ze = zip.getNextEntry();
if (ze == null)
break;
String name = ze.getName();
if (name.startsWith("vokabeltrainer/resources/blue/"))
{
blueImagesList.add(ImageIO.read(zipFile.getInputStream(ze)));
}
}
}
else
{
throw new IOException("can not find code source for blue images");
}
ApplicationImages.setBlueImages(blueImagesList);
}
}

Copied data paste into String function

Can anyone tell me how to store copied data into String in selenium?
example:
driver.findElement(By.xpath("//*[#id="x")).sendKeys(Keys.CONTROL, "a"));
driver.findElement(By.xpath("//*[#id="xy")).sendKeys(Keys.CONTROL, "c"));
driver.findElement(By.xpath("//*[#id="xy")).sendKeys(Keys.CONTROL, "v"));
I need put the copied data into String
Something like this
String text = driver.findElement(By.xpath("//*[#id="xy")).sendKeys(Keys.CONTROL, "v"));
It seems you want to send ctrl + c keys, which will copy some data in clipboard.. want to store that data in String variable..right?
You have to use Clipboard class to do so..See implementation below...
package resources;
import java.awt.Toolkit;
import java.awt.datatransfer.Clipboard;
import java.awt.datatransfer.DataFlavor;
import java.awt.datatransfer.Transferable;
import java.awt.datatransfer.UnsupportedFlavorException;
import java.io.IOException;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.testng.annotations.Test;
public class One {
#Test
public void getClipboardContents() {
String result = "";
System.setProperty("webdriver.chrome.driver", "C://WebDrivers/chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("http://www.google.com");
String copy = Keys.chord(Keys.CONTROL,Keys.chord("c"));
driver.findElement(By.linkText("Images")).sendKeys(copy);
Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
Transferable contents = clipboard.getContents(null);
boolean hasTransferableText = (contents != null)
&& contents.isDataFlavorSupported(DataFlavor.stringFlavor);
if (hasTransferableText) {
try {
result = (String) contents.getTransferData(DataFlavor.stringFlavor);
} catch (UnsupportedFlavorException | IOException ex) {
System.out.println(ex);
ex.printStackTrace();
}
}
System.out.println(result);
}
}
#Starlord ..modify locators as per your need.
#Gaurav Thanks for the code.. I have modified some part of the code.. now I am getting what i want exactly
public void getClipboardContents()
throws UnsupportedFlavorException, IOException {
String result = "google.com";
System.setProperty("webdriver.chrome.driver", "E:\\New folder\\chromedriver.exe");
WebDriver driver = new ChromeDriver();
driver.get("https://www.google.co.in/");
String copy = Keys.chord(Keys.CONTROL,Keys.chord("c"));
driver.findElement(By.xpath("//*[#id=\"lst-ib\"]")).sendKeys("google.com");
driver.findElement(By.xpath("//*[#id=\"lst-ib\"]")).sendKeys(Keys.CONTROL+"a");
driver.findElement(By.xpath("//*[#id=\"lst-ib\"]")).sendKeys(copy);
Clipboard clipboard = Toolkit.getDefaultToolkit().getSystemClipboard();
Transferable contents = clipboard.getContents(null);
String x = (String) contents.getTransferData(DataFlavor.stringFlavor);
System.out.println(x);
int a= result.length();
int b = x.length();
System.out.println(a);
System.out.println(b);
if(a<=b)
{
System.out.println("Matched Character length")
}else
{
System.out.println("Issue In Character length");
}
}
}

Character encoding in com.sun.net.httpserver

I'm trying to write a mock HTTP server for unit tests, I'm using the com.sun.net.httpserver classes for that.
I'm having problem with the encoding of the URL: the query parameters are ISO-8859-1 encoded, but the URI that is passed to the handler (via HttpExchange) is not.
As I can't change the encoding of the original server, I was wondering if there was a way to tell the HttpServer which encoding to use when decoding the URL.
Thanks in advance.
Here is a test program:
package test34;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLEncoder;
import java.util.logging.Level;
import java.util.logging.Logger;
public class Main {
public static void main(String[] args) {
try {
MockServer mock = new MockServer();
mock.start(8642);
URL url = new URL("http://localhost:8642/?p="
+ URLEncoder.encode("téléphone", "ISO-8859-1"));
System.out.println(url);
InputStream in = url.openStream();
while (in.read() > 0) {
}
in.close();
mock.stop();
System.out.println(mock.getLastParams().get("p"));
} catch (IOException ex) {
Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
}
}
}
And here is the code of the mock server:
package test34;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.Writer;
import java.net.InetSocketAddress;
import java.net.URI;
import java.net.URLDecoder;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Executors;
import com.sun.net.httpserver.Headers;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;
public class MockServer {
private HttpServer httpServer;
private Map<String, String> params;
public void start(int port) {
if (httpServer == null) {
try {
InetSocketAddress addr = new InetSocketAddress(port);
httpServer = HttpServer.create(addr, 0);
httpServer.createContext("/", new HttpHandler() {
#Override
public void handle(HttpExchange exchange) throws IOException {
try {
handleRoot(exchange);
} catch (RuntimeException e) {
throw e;
} catch (IOException e) {
throw e;
}
}
});
httpServer.setExecutor(Executors.newFixedThreadPool(1));
httpServer.start();
} catch (IOException e) {
throw new RuntimeException(e.getMessage());
}
}
}
public void stop() {
if (httpServer != null) {
httpServer.stop(10);
httpServer = null;
}
}
public Map<String, String> getLastParams() {
Map<String, String> result = new HashMap<String, String>();
if (params != null) {
result.putAll(params);
}
return result;
}
private void handleRoot(HttpExchange exchange) throws IOException {
URI uri = exchange.getRequestURI();
params = parseQuery(uri.getQuery());
Headers responseHeaders = exchange.getResponseHeaders();
responseHeaders.set("Content-Type", "text/plain;charset=ISO-8859-1");
exchange.sendResponseHeaders(200, 0);
OutputStream stream = exchange.getResponseBody();
try {
Writer writer = new OutputStreamWriter(stream, "ISO-8859-1");
try {
PrintWriter out = new PrintWriter(writer);
try {
out.println("OK");
} finally {
out.close();
}
} finally {
writer.close();
}
} finally {
stream.close();
}
}
private static Map<String, String> parseQuery(String qry)
throws IOException {
Map<String, String> result = new HashMap<String, String>();
if (qry != null) {
String defs[] = qry.split("[&]");
for (String def : defs) {
int ix = def.indexOf('=');
if (ix < 0) {
result.put(def, "");
} else {
String name = def.substring(0, ix);
String value = URLDecoder.decode(
def.substring(ix + 1), "ISO-8859-1");
result.put(name, value);
}
}
}
return result;
}
}
The javadoc of HttpExchange.getQueryString() says it returns "undecoded query string of request URI, or null if the request URI doesn't have one."
If it's not decoded, and since http headers have to be in 7 bit ASCII (ietf.org/rfc/rfc2616.txt) , then you can decode later with URLDecoder.decode(... "ISO-8859-1");

Different Result on DBPedia Spotlight by using the code and DBPedia Spotlight endpoint

This is the main class in which query is being fired
package extractKeyword;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.methods.GetMethod;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.DBpediaResource;
import org.dbpedia.spotlight.model.Text;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.File;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.util.LinkedList;
import java.util.List;
public class db extends AnnotationClient {
//private final static String API_URL = "http://jodaiber.dyndns.org:2222/";
private static String API_URL = "http://spotlight.dbpedia.org/";
private static double CONFIDENCE = 0.0;
private static int SUPPORT = 0;
// private static String powered_by ="non";
// private static String spotter ="CoOccurrenceBasedSelector";//"LingPipeSpotter"=Annotate all spots
//AtLeastOneNounSelector"=No verbs and adjs.
//"CoOccurrenceBasedSelector" =No 'common words'
//"NESpotter"=Only Per.,Org.,Loc.
//private static String disambiguator ="Default";//Default ;Occurrences=Occurrence-centric;Document=Document-centric
//private static String showScores ="yes";
#SuppressWarnings("static-access")
public void configiration(double CONFIDENCE,int SUPPORT)
//, String powered_by,String spotter,String disambiguator,String showScores)
{
this.CONFIDENCE=CONFIDENCE;
this.SUPPORT=SUPPORT;
// this.powered_by=powered_by;
//this.spotter=spotter;
//this.disambiguator=disambiguator;
//showScores=showScores;
}
public List<DBpediaResource> extract(Text text) throws AnnotationException {
// LOG.info("Querying API.");
String spotlightResponse;
try {
String Query=API_URL + "rest/annotate/?" +
"confidence=" + CONFIDENCE
+ "&support=" + SUPPORT
// + "&spotter=" + spotter
// + "&disambiguator=" + disambiguator
// + "&showScores=" + showScores
// + "&powered_by=" + powered_by
+ "&text=" + URLEncoder.encode(text.text(), "utf-8");
//LOG.info(Query);
GetMethod getMethod = new GetMethod(Query);
getMethod.addRequestHeader(new Header("Accept", "application/json"));
spotlightResponse = request(getMethod);
} catch (UnsupportedEncodingException e) {
throw new AnnotationException("Could not encode text.", e);
}
assert spotlightResponse != null;
JSONObject resultJSON = null;
JSONArray entities = null;
try {
resultJSON = new JSONObject(spotlightResponse);
entities = resultJSON.getJSONArray("Resources");
} catch (JSONException e) {
//throw new AnnotationException("Received invalid response from DBpedia Spotlight API.");
}
LinkedList<DBpediaResource> resources = new LinkedList<DBpediaResource>();
if(entities!=null)
for(int i = 0; i < entities.length(); i++) {
try {
JSONObject entity = entities.getJSONObject(i);
resources.add(
new DBpediaResource(entity.getString("#URI"),
Integer.parseInt(entity.getString("#support"))));
} catch (JSONException e) {
//((Object) LOG).error("JSON exception "+e);
}
}
return resources;
}
}
The extended class
package extractKeyword;
import org.apache.commons.httpclient.DefaultHttpMethodRetryHandler;
import org.apache.commons.httpclient.Header;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpException;
import org.apache.commons.httpclient.HttpMethodBase;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.dbpedia.spotlight.exceptions.AnnotationException;
import org.dbpedia.spotlight.model.DBpediaResource;
import org.dbpedia.spotlight.model.Text;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.logging.Logger;
import javax.ws.rs.HttpMethod;
/**
* #author pablomendes
*/
public abstract class AnnotationClient {
//public Logger LOG = Logger.getLogger(this.getClass());
private List<String> RES = new ArrayList<String>();
// Create an instance of HttpClient.
private static HttpClient client = new HttpClient();
public List<String> getResu(){
return RES;
}
public String request(GetMethod getMethod) throws AnnotationException {
String response = null;
// Provide custom retry handler is necessary
( getMethod).getParams().setParameter(HttpMethodParams.RETRY_HANDLER,
new DefaultHttpMethodRetryHandler(3, false));
try {
// Execute the method.
int statusCode = client.executeMethod((org.apache.commons.httpclient.HttpMethod) getMethod);
if (statusCode != HttpStatus.SC_OK) {
// LOG.error("Method failed: " + ((HttpMethodBase) method).getStatusLine());
}
// Read the response body.
byte[] responseBody = ((HttpMethodBase) getMethod).getResponseBody(); //TODO Going to buffer response body of large or unknown size. Using getResponseBodyAsStream instead is recommended.
// Deal with the response.
// Use caution: ensure correct character encoding and is not binary data
response = new String(responseBody);
} catch (HttpException e) {
// LOG.error("Fatal protocol violation: " + e.getMessage());
throw new AnnotationException("Protocol error executing HTTP request.",e);
} catch (IOException e) {
//((Object) LOG).error("Fatal transport error: " + e.getMessage());
//((Object) LOG).error(((HttpMethodBase) method).getQueryString());
throw new AnnotationException("Transport error executing HTTP request.",e);
} finally {
// Release the connection.
((HttpMethodBase) getMethod).releaseConnection();
}
return response;
}
protected static String readFileAsString(String filePath) throws java.io.IOException{
return readFileAsString(new File(filePath));
}
protected static String readFileAsString(File file) throws IOException {
byte[] buffer = new byte[(int) file.length()];
#SuppressWarnings("resource")
BufferedInputStream f = new BufferedInputStream(new FileInputStream(file));
f.read(buffer);
return new String(buffer);
}
static abstract class LineParser {
public abstract String parse(String s) throws ParseException;
static class ManualDatasetLineParser extends LineParser {
public String parse(String s) throws ParseException {
return s.trim();
}
}
static class OccTSVLineParser extends LineParser {
public String parse(String s) throws ParseException {
String result = s;
try {
result = s.trim().split("\t")[3];
} catch (ArrayIndexOutOfBoundsException e) {
throw new ParseException(e.getMessage(), 3);
}
return result;
}
}
}
public void saveExtractedEntitiesSet(String Question, LineParser parser, int restartFrom) throws Exception {
String text = Question;
int i=0;
//int correct =0 ; int error = 0;int sum = 0;
for (String snippet: text.split("\n")) {
String s = parser.parse(snippet);
if (s!= null && !s.equals("")) {
i++;
if (i<restartFrom) continue;
List<DBpediaResource> entities = new ArrayList<DBpediaResource>();
try {
entities = extract(new Text(snippet.replaceAll("\\s+"," ")));
System.out.println(entities.get(0).getFullUri());
} catch (AnnotationException e) {
// error++;
//LOG.error(e);
e.printStackTrace();
}
for (DBpediaResource e: entities) {
RES.add(e.uri());
}
}
}
}
public abstract List<DBpediaResource> extract(Text text) throws AnnotationException;
public void evaluate(String Question) throws Exception {
evaluateManual(Question,0);
}
public void evaluateManual(String Question, int restartFrom) throws Exception {
saveExtractedEntitiesSet(Question,new LineParser.ManualDatasetLineParser(), restartFrom);
}
}
The Main Class
package extractKeyword;
public class startAnnonation {
public static void main(String[] args) throws Exception {
String question = "What is the winning chances of BJP in New Delhi elections?";
db c = new db ();
c.configiration(0.25,0);
//, 0, "non", "AtLeastOneNounSelector", "Default", "yes");
c.evaluate(question);
System.out.println("resource : "+c.getResu());
}
}
The main problem is here when I am using DBPedia spotlight using spotlight jar (above code)then i am getting different result as compared to the dbpedia spotlight endpoint(dbpedia-spotlight.github.io/demo/)
Result using the above code:-
Text :-What is the winning chances of BJP in New Delhi elections?
Confidence level:-0.35
resource : [Election]
Result on DBPedia Spotlight endpoint(//dbpedia-spotlight.github.io/demo/)
Text:-What is the winning chances of BJP in New Delhi elections?
Confidence level:-0.35
resource : [Bharatiya_Janata_Party, New_Delhi, Election]
Why also the spotlight now don't have support as a parameter?

Categories