search text only using Java - java

I am using ajax google API to search a string against google. It is returning me all HTML files which have all tags including text.
If I want to get text only, what should I use?
My program is in Java.
Regards
Manjot

I did some googling and found this:
http://www.ajaxlines.com/ajax/stuff/article/using_google_is_ajax_search_api_with_java.php
Here's the example code snippet from there:
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.URL;
import java.net.URLConnection;
import java.net.URLEncoder;
import org.json.JSONArray; // JSON library from http://www.json.org/java/
import org.json.JSONObject;
public class GoogleQuery {
// Put your website here
private final String HTTP_REFERER = "http://www.example.com/";
public GoogleQuery() {
makeQuery("questio verum");
makeQuery("info:http://frankmccown.blogspot.com/");
makeQuery("site:frankmccown.blogspot.com");
}
private void makeQuery(String query) {
System.out.println(" Querying for " + query);
try
{
// Convert spaces to +, etc. to make a valid URL
query = URLEncoder.encode(query, "UTF-8");
URL url = new URL("http://ajax.googleapis.com/ajax/services/search/web?start=0&rsz=large&v=1.0&q=" + query);
URLConnection connection = url.openConnection();
connection.addRequestProperty("Referer", HTTP_REFERER);
// Get the JSON response
String line;
StringBuilder builder = new StringBuilder();
BufferedReader reader = new BufferedReader(
new InputStreamReader(connection.getInputStream()));
while((line = reader.readLine()) != null) {
builder.append(line);
}
String response = builder.toString();
JSONObject json = new JSONObject(response);
System.out.println("Total results = " +
json.getJSONObject("responseData")
.getJSONObject("cursor")
.getString("estimatedResultCount"));
JSONArray ja = json.getJSONObject("responseData")
.getJSONArray("results");
System.out.println(" Results:");
for (int i = 0; i < ja.length(); i++) {
System.out.print((i+1) + ". ");
JSONObject j = ja.getJSONObject(i);
System.out.println(j.getString("titleNoFormatting"));
System.out.println(j.getString("url"));
}
}
catch (Exception e) {
System.err.println("Something went wrong...");
e.printStackTrace();
}
}
public static void main(String args[]) {
new GoogleQuery();
}
}
As a side note, you should be careful not to violate the Google TOS:
"You specifically agree not to access (or attempt to access) any of the Services through any automated means (including use of scripts or web crawlers) and shall ensure that you comply with the instructions set out in any robots.txt"
- http://www.google.com/accounts/TOS

Related

Json data fetching lag problem from jsonbin.io

I'm working on a app that is a simple game, so here's a picture for the UI I have, the problem is when I fetch a level using Json web service, the first time I choose the level from activity_level_list, the picture and level number doesn't show, only the 4 words works but neither the image nor the level number appear like this.
NOTE: The image URL variable is null (because of the lag that happens the first time I open first level activity) so an error happen (that's why it shows ic_launcher instead of showing the level picture).
So, I said the problem was "lag" because when I go back to LevelList and click on level 1, the picture and the level number get fetched and everything becomes good.
How can I make it both the image and level ID appears in the first time I open the activity?
NOTE: I use jsonbin.io for data fetching.
Here's my Json data fetching class:
import android.os.AsyncTask;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
public class FetchData extends AsyncTask<Void,Void,Void> {
String update = "4";
String data ="";
String id;
String img;
String w1;
String w2;
String w3;
String w4;
#Override
protected Void doInBackground(Void... voids) {
try {
URL url = new URL("https://api.jsonbin.io/b/5e42776dd18e4016617690ce/" + update);
HttpURLConnection httpURLConnection = (HttpURLConnection) url.openConnection();
InputStream inputStream = httpURLConnection.getInputStream();
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = "";
while(line != null){
line = bufferedReader.readLine();
data = data + line;
}
JSONArray JA = new JSONArray(data);
// for(int i = 0 ;i < JA.length(); i++) {
JSONObject JO = (JSONObject) JA.get(1);
id = (String) JO.get("id");
img = (String) JO.get("img");
w1 = (String) JO.get("w1");
w2 = (String) JO.get("w2");
w3 = (String) JO.get("w3");
w4 = (String) JO.get("w4");
// }
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (JSONException e) {
e.printStackTrace();
}
return null;
}
#Override
protected void onPostExecute(Void aVoid) {
super.onPostExecute(aVoid);
int levelId = Integer.parseInt(id);
levelId++;
LevelActivity.levelID = String.valueOf(levelId);
LevelActivity.imageURL = img;
LevelActivity.button1Word = w1;
LevelActivity.button2Word = w2;
LevelActivity.button3Word = w3;
LevelActivity.button4Word = w4;
}
}
Thanks ^-^
I just needed to update the image and level text view number in onPostExecute method to make it load from the first time.
Answer by NIKHIL AGGARWAL

Finding a specific sequence of characters in a string

I'm trying to write a code to get the latitude and longitude of a user inputted city.
I've written a code which searches a website and then stores its HTML data in a string.
I now want to search the string(the HTML of the website) for the values.
I've looked into API's, and HTML parsers, but they are all too complicated for me (I'm still in school, just starting out), so please don't recommend those unless its absolutely impossible to do without them.
Code:
import java.net.*;
import java.io.*;
import java.util.Scanner;
import static java.lang.System.*;
class websearch {
public static void main(String[] args) {
Scanner sc = new Scanner(System.in);
//gets the city
out.println("enter city, add plus between multiple words");
String term = sc.nextLine();
try {URL url = new URL("http://www.geonames.org/search.html?q=" + term + "&country=");
URLConnection ucl = url.openConnection();
InputStream stream = ucl.getInputStream();
int i;
//the string in which the html code will be stored
String code = " ";
while ((i=stream.read())!= -1) {
code += Character.toString((char)i);
}
//printing the html, only for testing
System.out.print(code);
} catch(Exception e) {
System.out.println("error");
}
}
}
This code prints a string too large to be pasted here, but the values I want to find look like this:
<td nowrap="">N 40° 42' 51''</td>
<td nowrap="">W 74° 0' 21''</td>
How could I find this sequence of characters, and then store only the latitude and longitude in a variable?
This tutorial should be of some use to you. it goes over how to get the geocode data without actually using the google maps API. If you follow what it says, you should have a fairly easy time implementing it.
The end result is that you will enter the street address of the location you want, and it will return the latitude and longitude in searchable variables.
Primary working class
import java.io.BufferedReader;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import org.apache.log4j.Logger;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import org.json.simple.parser.JSONParser;
public class OpenStreetMapUtils {
public final static Logger log = Logger.getLogger("OpenStreeMapUtils");
private static OpenStreetMapUtils instance = null;
private JSONParser jsonParser;
public OpenStreetMapUtils() {
jsonParser = new JSONParser();
}
public static OpenStreetMapUtils getInstance() {
if (instance == null) {
instance = new OpenStreetMapUtils();
}
return instance;
}
private String getRequest(String url) throws Exception {
final URL obj = new URL(url);
final HttpURLConnection con = (HttpURLConnection) obj.openConnection();
con.setRequestMethod("GET");
if (con.getResponseCode() != 200) {
return null;
}
BufferedReader in = new BufferedReader(new InputStreamReader(con.getInputStream()));
String inputLine;
StringBuffer response = new StringBuffer();
while ((inputLine = in.readLine()) != null) {
response.append(inputLine);
}
in.close();
return response.toString();
}
public Map<String, Double> getCoordinates(String address) {
Map<String, Double> res;
StringBuffer query;
String[] split = address.split(" ");
String queryResult = null;
query = new StringBuffer();
res = new HashMap<String, Double>();
query.append("http://nominatim.openstreetmap.org/search?q=");
if (split.length == 0) {
return null;
}
for (int i = 0; i < split.length; i++) {
query.append(split[i]);
if (i < (split.length - 1)) {
query.append("+");
}
}
query.append("&format=json&addressdetails=1");
log.debug("Query:" + query);
try {
queryResult = getRequest(query.toString());
} catch (Exception e) {
log.error("Error when trying to get data with the following query " + query);
}
if (queryResult == null) {
return null;
}
Object obj = JSONValue.parse(queryResult);
log.debug("obj=" + obj);
if (obj instanceof JSONArray) {
JSONArray array = (JSONArray) obj;
if (array.size() > 0) {
JSONObject jsonObject = (JSONObject) array.get(0);
String lon = (String) jsonObject.get("lon");
String lat = (String) jsonObject.get("lat");
log.debug("lon=" + lon);
log.debug("lat=" + lat);
res.put("lon", Double.parseDouble(lon));
res.put("lat", Double.parseDouble(lat));
}
}
return res;
}
}
How to call the above working class:
public class GetCoordinates {
static String address = "The White House, Washington DC";
public static void main(String[] args) {
Map<String, Double> coords;
coords = OpenStreetMapUtils.getInstance().getCoordinates(address);
System.out.println("latitude :" + coords.get("lat"));
System.out.println("longitude:" + coords.get("lon"));
}
}
use XPath is a easy way.
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
Document doc = builder.parse(<uri_as_string>);
XPathFactory xPathfactory = XPathFactory.newInstance();
XPath xpath = xPathfactory.newXPath();
XPathExpression expr = xpath.compile(<xpath_expression>);
as for , try something like below:
/xxx/td[#nowrap='']/text()

My HTML fetcher program in java returns incomplete results

My java code is:
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class celebGrepper {
static class CelebData {
URL link;
String name;
CelebData(URL link, String name) {
this.link=link;
this.name=name;
}
}
public static String grepper(String url) {
URL source;
String data = null;
try {
source = new URL(url);
HttpURLConnection connection = (HttpURLConnection) source.openConnection();
connection.connect();
InputStream is = connection.getInputStream();
/**
* Attempting to fetch an entire line at a time instead of just a character each time!
*/
StringBuilder str = new StringBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
while((data = br.readLine()) != null)
str.append(data);
data=str.toString();
} catch (IOException e) {
e.printStackTrace();
}
return data;
}
public static ArrayList<CelebData> parser(String html) throws MalformedURLException {
ArrayList<CelebData> list = new ArrayList<CelebData>();
Pattern p = Pattern.compile("<td class=\"image\".*<img src=\"(.*?)\"[\\s\\S]*<td class=\"name\"><a.*?>([\\w\\s]+)<\\/a>");
Matcher m = p.matcher(html);
while(m.find()) {
CelebData current = new CelebData(new URL(m.group(1)),m.group(2));
list.add(current);
}
return list;
}
public static void main(String... args) throws MalformedURLException {
String html = grepper("https://www.forbes.com/celebrities/list/");
System.out.println("RAW Input: "+html);
System.out.println("Start Grepping...");
ArrayList<CelebData> celebList = parser(html);
for(CelebData item: celebList) {
System.out.println("Name:\t\t "+item.name);
System.out.println("Image URL:\t "+item.link+"\n");
}
System.out.println("Grepping Done!");
}
}
It's supposed to fetch the entire HTML content of https://www.forbes.com/celebrities/list/. However, when I compare the actual result below to the original page, I find the entire table that I need is missing! Is it because the page isn't completely loaded when I start getting the bytes from the page via the input stream? Please help me understand.
The Output of the page:
https://jsfiddle.net/e0771aLz/
What can I do to just extract the Image link and the names of the celebs?
I know it's an extremely bad practice to try to parse HTML using regex and is the stuff of nightmares, but on a certain video training course for android, that's exactly what the guy did, and I just wanna follow along since it's just in this one lesson.

Use getJSONObject(int)

I'm trying to parse some JSON. Here is the code. frc-manual.usfirst.org/a/GetAllItems/ManualID=3 I have been trying for several hours to get it work but every example I have seen online uses getJSONObject(int) but I can only use getJSONObject(String). This is making impossible. Am I overlooking something?
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONObject;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserFactory;
import android.annotation.SuppressLint;
public class JSON {
private String html = "html";
private String version = "version";
private String pageString = null;
private String urlString = "http://frc-manual.usfirst.org/a/GetAllItems/ManualID=3";
public volatile boolean parsingComplete = true;
public JSON(String page){
this.pageString = page;
}
public String getHTML(){
return html;
}
public String getVersion(){
return version;
}
#SuppressLint("NewApi")
public void readAndParseJSON(String in) {
try {
JSONObject reader = new JSONObject(in);
JSONObject head = reader.getJSONObject("data").getJSONObject("SubChapter").getJSONObject("3").getJSONObject("children").getJSONObject(pageString);
if(head != null){
html = head.getString("item_content_text");
html = html + head.length();
for(int i = 0; i < head.length();i++){
JSONObject children = head.getJSONObject(i);
if(children != null){
html = html + children.getString("item_content_text");
}
}
}
//html = html + listFromJsonSorted(head.getJSONObject("children"));
JSONObject main = reader.getJSONObject("data");
version = main.getString("LatestManualUpdate");
parsingComplete = false;
} catch (Exception e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public void fetchJSON(){
Thread thread = new Thread(new Runnable(){
#Override
public void run() {
try {
URL url = new URL(urlString);
HttpURLConnection conn = (HttpURLConnection) url.openConnection();
conn.setReadTimeout(10000 /* milliseconds */);
conn.setConnectTimeout(15000 /* milliseconds */);
conn.setRequestMethod("GET");
conn.setDoInput(true);
// Starts the query
conn.connect();
InputStream stream = conn.getInputStream();
String data = convertStreamToString(stream);
readAndParseJSON(data);
stream.close();
} catch (Exception e) {
e.printStackTrace();
}
}
});
thread.start();
}
static String convertStreamToString(java.io.InputStream is) {
java.util.Scanner s = new java.util.Scanner(is).useDelimiter("\\A");
return s.hasNext() ? s.next() : "";
}
}
Probably what you are seeing in this example is a JSONArray.
JSONArray arr = json.getJSONArray("array");
JSONObject obj = arr.getJSONObject(0);
In your case, i don't see any array in this JSON, head in your code is a JSONObject. To get the item_content_text you just need head.getString("item_content_text");
You can do this to get all childrens in your JSON:
html = head.getString("item_content_text");
JSONObject children = head;
while (children.containsKey("children")) {
children = children.getJSONObject("children");
html += children.getString("item_content_text");
}
You are only able to use getJSONObject(String) not getJSONObject(int) because the method only takes String.. Check documentation here..
The reason that is the because the keys in json are always strings only.. read here
I think you are looking to retrieve an int value from the json but you got confused.. The way to do that would be getInt("key_name") if the json indeed has a key with int value..
Sure you can, just that I think your "head" variable should have a return type of JSONArray instead of JSONObject.
I have this code running in my program -
//response = some http response
final JSONObject object = new JSONObject(response);
final JSONArray array = object.getJSONArray("repeatedStuff"); //$NON-NLS-1$
Assert.assertEquals(2, array.length());
for (int i = 0; i < array.length(); i++) {
final JSONObject element = array.getJSONObject(i);
//do something
}
Also you may refer this link to see what they are doing - android: The method getJSONObject(int) in the type JSONArray is not applicable for the arguments (String)
Let me know if that helped!
You have used JSONObject everywhere in your code. Thus you are only able to pass string parameter in getJSONObject().
I suggest you you change it to JSONArray and then you will be able to pass int parameter in getJSONObject().

Parsing the web page response as Json, in Java

I have a java code:
URL oracle = new URL("https://x.x.x.x.x.x.-001");
System.out.println(oracle.openStream());
BufferedReader in = new BufferedReader(new InputStreamReader(oracle.openStream()));
String inputLine;
while ((inputLine = in.readLine()) != null)
System.out.println(inputLine);
Which is opening the connection and printing the contents of it. The contents are indeed Json. The output is something like:
{
"merchantId": "guest",
"txnId": "guest-1349269250-001",
}
I wish to parse this in json simple jar. I changed the code loop like this:
JSONObject obj = new JSONObject();
while ((inputLine = in.readLine()) != null)
obj.put("Result",inputLine);
But that doesn't seem to be working. The output I'm getting is:
{"Result":"}"}
You should use the JSONParser#Parse() method or the JSONValue#parse() method :
URL oracle = new URL("https://x.x.x.x.x.x.-001");
System.out.println(oracle.openStream());
Reader in = new InputStreamReader(oracle.openStream());
Object json = JSONValue.parse(in);
Are you sure you're following the documentation on how to parse a JSON string?
By the looks of it you have to obtain the entire string and call a JSONParse#parse() on it, but your code is filling up a HashMap (JSONObject's parent class) with each of the lines of the JSON. In fact it stores just the last line because you're calling put() with the same "Result" key on every iteration.
You should read whole contents to String variable first and parse it to json. Be careful of ""(double quote). Java uses \" for double quote. Like.
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
public class JsonSimpleExample3 {
public static void main(String args[]) {
JSONParser parser = new JSONParser();
//String str = "{\"merchantId\": \"guest\",\"txnId\": \"guest-1349269250-001\",}";
//intilize an InputStream
InputStream is = new ByteArrayInputStream("file content".getBytes());
//read it with BufferedReader and create string
BufferedReader br = new BufferedReader(new InputStreamReader(is));// Instead of is, you should use oracle.openStream()
StringBuilder sb = new StringBuilder();
String line;
try {
while ((line = br.readLine()) != null) {
sb.append(line);
}
} catch (IOException e1) {
e1.printStackTrace();
}
// parse string
try {
JSONObject jsonObject = (JSONObject) parser.parse(sb.toString());
String merchantId = (String) jsonObject.get("merchantId");
System.out.println(merchantId);
String txnId = (String) jsonObject.get("txnId");
System.out.println(txnId);
} catch (ParseException e) {
e.printStackTrace();
}
}
}
try this link its really helpful if you are going to be logging in or staff like that
Java Json simple
import java.io.IOException;
import java.net.URL;
import org.apache.commons.io.IOUtils;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.JSONValue;
import org.json.simple.parser.ParseException;
public class ParseJson1 {
public static void main(String[] args) {
String url = "http://freemusicarchive.org/api/get/genres.json?api_key=60BLHNQCAOUFPIBZ&limit=2";
/*
* {"title":"Free Music Archive - Genres","message":"","errors":[],"total" : "161","total_pages":81,"page":1,"limit":"2",
* "dataset":
* [{"genre_id": "1","genre_parent_id":"38","genre_title":"Avant-Garde" ,"genre_handle": "Avant-Garde","genre_color":"#006666"},
* {"genre_id":"2","genre_parent_id" :null,"genre_title":"International","genre_handle":"International","genre_color":"#CC3300"}]}
*/
try {
String genreJson = IOUtils.toString(new URL(url));
JSONObject genreJsonObject = (JSONObject) JSONValue.parseWithException(genreJson);
// get the title
System.out.println(genreJsonObject.get("title"));
// get the data
JSONArray genreArray = (JSONArray) genreJsonObject.get("dataset");
// get the first genre
JSONObject firstGenre = (JSONObject) genreArray.get(0);
System.out.println(firstGenre.get("genre_title"));
} catch (IOException | ParseException e) {
e.printStackTrace();
}
}
}

Categories