Unable to query local version of Linked Movie Database - java

I am trying to query a local version of Linked Movie Database using SPARQL. The file is in N-Triples format and its size is approximately 450mb. I am using servlets for implementation. Now when I pass the query, it takes about more than five minutes for the servlet to process it and at the end I get the following exception:
type Exception report
message
description The server encountered an internal error () that prevented it from fulfilling this request.
exception
javax.servlet.ServletException: Servlet execution threw an exception
root cause
java.lang.OutOfMemoryError: Java heap space
java.util.Arrays.copyOfRange(Arrays.java:3209)
java.lang.String.<init>(String.java:215)
java.lang.StringBuilder.toString(StringBuilder.java:430)
org.openjena.riot.tokens.TokenizerText.allBetween(TokenizerText.java:732)
org.openjena.riot.tokens.TokenizerText.parseToken(TokenizerText.java:152)
org.openjena.riot.tokens.TokenizerText.hasNext(TokenizerText.java:69)
org.openjena.atlas.iterator.PeekIterator.fill(PeekIterator.java:37)
org.openjena.atlas.iterator.PeekIterator.next(PeekIterator.java:77)
org.openjena.riot.lang.LangBase.nextToken(LangBase.java:145)
org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:59)
org.openjena.riot.lang.LangNTriples.parseOne(LangNTriples.java:21)
org.openjena.riot.lang.LangNTuple.runParser(LangNTuple.java:58)
org.openjena.riot.lang.LangBase.parse(LangBase.java:75)
org.openjena.riot.system.JenaReaderNTriples2.readWorker(JenaReaderNTriples2.java:28)
org.openjena.riot.system.JenaReaderRIOT.readImpl(JenaReaderRIOT.java:124)
org.openjena.riot.system.JenaReaderRIOT.read(JenaReaderRIOT.java:79)
com.hp.hpl.jena.rdf.model.impl.ModelCom.read(ModelCom.java:226)
com.hp.hpl.jena.util.FileManager.readModelWorker(FileManager.java:395)
com.hp.hpl.jena.util.FileManager.loadModelWorker(FileManager.java:299)
com.hp.hpl.jena.util.FileManager.loadModel(FileManager.java:250)
ServletExample.runQuery(ServletExample.java:92)
ServletExample.doGet(ServletExample.java:62)
javax.servlet.http.HttpServlet.service(HttpServlet.java:627)
javax.servlet.http.HttpServlet.service(HttpServlet.java:729)
note The full stack trace of the root cause is available in the Apache Tomcat/5.5.31 logs.
My code is:
import java.io.IOException;
import java.io.PrintWriter;
import javax.servlet.ServletException;
import javax.servlet.http.*;
import com.hp.hpl.jena.query.*;
import com.hp.hpl.jena.rdf.model.*;
import com.hp.hpl.jena.util.FileManager;
public class ServletExample
extends HttpServlet
{
/***********************************/
/* Constants */
/***********************************/
private static final long serialVersionUID = 1L;
public static final String SPARQL_ENDPOINT = "http://data.linkedmdb.org/sparql";
public static final String QUERY ="PREFIX m: <http://data.linkedmdb.org/resource/movie/>"
+"SELECT DISTINCT ?actorName WHERE {"+
"?dir1 m:director_name \"Sofia Coppola\"."+
"?dir2 m:director_name \"Francis Ford Coppola\"."+
"?dir1film m:director ?dir1;"+
"m:actor ?actor."+
"?dir2film m:director ?dir2;"+
"m:actor ?actor."+
"?actor m:actor_name ?actorName."+
"}";
/*"PREFIX m: <http://data.linkedmdb.org/resource/movie/>\n" +
"SELECT DISTINCT ?actorName WHERE {\n" +
" ?dir1 m:director_name %dir_name_1%.\n" +
" ?dir2 m:director_name %dir_name_2%.\n" +
" ?dir1film m:director ?dir1;\n" +
" m:actor ?actor.\n" +
" ?dir2film m:director ?dir2;\n" +
" m:actor ?actor.\n" +
" ?actor m:actor_name ?actorName.\n" +
"}\n" +
"";*/
private static final String HEADER = "<html>\n" +
" <head>\n" +
" <title>results</title>\n" +
" <link href=\"simple.css\" type=\"text/css\" rel=\"stylesheet\" />\n" +
" </head>\n" +
" <body>\n" +
"";
private static final String FOOTER = "</body></html>";
/**
* Respond to HTTP GET request. Will need to be mounted against some URL
* pattern in web.xml
*/
#Override
protected void doGet( HttpServletRequest req, HttpServletResponse resp )
throws ServletException, IOException
{
String dir1 = req.getParameter( "dir1" );//"Sofia";
String dir2 = req.getParameter( "dir2" );//"Francis Ford Coppola";
//String dir1 = "Sofia";
//String dir2 = "Francis Ford Coppola";
if (dir1 == null || dir2 == null || dir1.isEmpty() || dir2.isEmpty()) {
noInput( resp );
}
else {
runQuery( resp, dir1, dir2 );
}
}
protected void noInput( HttpServletResponse resp )
throws IOException
{
header( resp );
resp.getWriter().println( "<p>Please select director names as query params <code>dir1</code> and <code>dir2</code></p>" );
footer( resp );
}
protected void footer( HttpServletResponse resp ) throws IOException {
resp.getWriter().println( FOOTER );
}
protected void header( HttpServletResponse resp ) throws IOException {
resp.getWriter().println( HEADER );
}
protected void runQuery( HttpServletResponse resp, String dir1, String dir2 )
throws IOException
{
PrintWriter out = resp.getWriter();
// Set up the query
// String q = QUERY.replace( "%dir_name_1%", "\"" + dir1 + "\"" )
// .replace( "%dir_name_2%", "\"" + dir2 + "\"" );
String q=QUERY;
Query query = QueryFactory.create( q ) ;
Model model = FileManager.get().loadModel( "e:\\applications\\linkedmdb-18-05-2009-dump\\dump.nt" );
// QueryExecution qexec = QueryExecutionFactory.sparqlService( SPARQL_ENDPOINT, query );
//com.hp.hpl.jena.query.Query query = QueryFactory.create(QUERY);
QueryExecution qexec = QueryExecutionFactory.create(query, model);
// perform the query
ResultSet results = qexec.execSelect();
// generate the output
header( resp );
if (!results.hasNext()) {
out.println( "<p>No results, sorry.</p>" );
}
else {
out.println( "<h1>Results</h1>" );
while (results.hasNext()) {
QuerySolution qs = results.next();
String actorName = qs.getLiteral( "actorName" ).getLexicalForm();
out.println( String.format( "<div>Actor named: %s</div>", actorName ) );
}
}
footer( resp );
}
}
Is there any way to resolve this exception?

It seems you're loading all your data in memory using Jena/RIOT. As far as I know, LinkedIMDB is large enough to give you problems with this approach. What you're doing is bringing all your database to memory.
Increasing the heap of your JVM could be one possible solution but it won't scale if your data keeps growing.
The right solution is to go for other configurations of Jena that are designed for this size of datasets. These are:
Jena SDB, which uses relational databases as backend.
Jena TDB, which uses a native Java storage based on B-trees indexes to speed up queries. It scales better than (1).
Optionally you could go for scalable RDF databases such 4store and query your data via Jena ARQ. This solution is by far the one that will scale and perform better.

You are running out of heap memory in Java Virtual Machine (JVM). Either increase amount of heap memory that is available to JVM or design your software to use less memory, for example process the stuff in smaller chunks.
To increase heap memory, add these parameters to your servlet container's or application server's startup script, somewhere your java binary is executed. This tells JVM that it may use up to 512 megabytes of memory, if that is not enough, try with larger values:
-Xmx512m
It is hard to say how to improve your software to use less memory without seeing the actual code.

Related

How to delete the repeated occurrences of a file in documentum if it exists more than once using dql query in java?

I want to delete repeated occurrence of a file in documentum leaving only one
file if it exists more than once.
Following is my code.
import com.documentum.fc.client.IDfQuery;
import com.documentum.fc.client.IDfSession;
import com.documentum.fc.client.IDfSessionManager;
import com.documentum.fc.common.DfException;
import com.documentum.fc.common.DfId;
import com.documentum.fc.common.IDfLoginInfo;
import com.documentum.operations.IDfDeleteOperation;
public class CountFiles {
// Documentum target repository where the files will be imported
private static final String REPO_NAME = "rep";
public static void main( String[] args ) throws Exception {
try {
String username = "user";
String password = "pwd";
System.out.println( "Starting to connect ..." );
IDfSessionManager sessMgr = createSessionManager( );
addIdentity( sessMgr, username, password);
IDfSession sess = sessMgr.getSession(REPO_NAME );
System.out.println( "Successfully connected to the server.");
queryDocumentum(sess);
} catch( Exception ex ) {
System.out.println( ex );
ex.printStackTrace( );
}
}
private static void queryDocumentum(IDfSession sess) throws DfException {
IDfQuery query = new DfQuery();
String queryStr= "select count(*) from dm_document where folder('/XXX/YYY', DESCEND) and object_name = 'abc.pdf' ";
query.setDQL(queryStr);
IDfCollection coll = query.execute(sess,IDfQuery.DF_EXEC_QUERY);
while(coll.next())
{
int count = coll.getValueAt(0);
if(count>1)
{
String qry = "delete dm_sysobject (all) objects where object_name='abc.pdf';";
IDfQuery q= new DfQuery();
query.setDQL(qry);
IDfCollection col = query.execute(sess,IDfQuery.DF_EXEC_QUERY);
}
}
coll.close();
}
/**
* Creates a new session manager instance. The session manager does not have
* any identities associated with it.
*
* #return a new session manager object.
* #throws DfException
*/
private static IDfSessionManager createSessionManager( )
throws Exception {
IDfClientX clientX = new DfClientX( );
IDfClient localClient = clientX.getLocalClient( );
IDfSessionManager sessMgr = localClient.newSessionManager( );
System.out.println( "Created session manager." );
return sessMgr;
}
/**
* Adds a new identity to the session manager.
*
*/
private static void addIdentity( final IDfSessionManager sm,
final String username, final String password )
throws Exception {
IDfClientX clientX = new DfClientX( );
IDfLoginInfo li = clientX.getLoginInfo( );
li.setUser( username );
li.setPassword( password );
// check if session manager already has an identity.
// if yes, remove it.
if( sm.hasIdentity( REPO_NAME ) ) {
sm.clearIdentity( REPO_NAME );
System.out.println( "Cleared identity on :" + REPO_NAME );
}
sm.setIdentity( REPO_NAME, li );
System.out.println( "Set up identity for the user." );
}
}
But something is wrong in the way that I am doing the operation. It is not working.I am not giving the path of the file here because I don't know the exact path of the file.Without giving the path of the file is it possible to delete all the occurrences of the file except one.
If you are coding the logic in Java anyway have a look at the IDfOperatiins. Also this method for doing bulk stuff is described well in the guides.
I would suggest the following changes in your DQL and in the logic, this is mostly because you are using DFC API:
DQL:
//Retrieves all the root object ids of the documents version tree where there are duplicates as their name.
//Here I thought the latest created document is the one to keep, you can adapt it to your needs - **see ORDER by clause**.
select i_chronicle_id from dm_document where folder('/XXX/YYYY', DESCEND) and object_name in (select object_name from dm_document where folder('/XXX/YYYY', DESCEND) and object_name = 'abc.pdf' group by object_name having count(*) > 1) order by r_creation_date asc;
Logic:
//"pseudo-code" - deletes the entire version tree of the document that is not the last in the returned list
while 'not last i_chronicle_id from the result collection'
//execute this dql:
`delete dm_document (all) objects where i_chronicle_id='<i_chronicle_id from the list>';`
Hope this helps,
Emilian
P.S. The DQLs I have tested against a CS 7.3

List all attachments stored in cloudant with java

I'm developing a demo and I'm stuck with this.
I want to list in a java web app all the attachments (PDFs for example), but a I am not able to retrieve and list them.
I'm only able to retrieve common data (String, Ints).
Is there a standard way to retrieve and show ?
I been reading all the posts but nothing seems to work.
Here is where I add the vendor, with the attachment:
public void addVendor(final Vendor vendor, final InputStream inputStream, final long size, final String contentType)
{
final Database db = getDb();
final int id = Integer.valueOf(vendor.get_id()) + 1;
final Response r1 = db.saveAttachment(inputStream, vendor.getName() + ".txt", contentType, String.valueOf(id), null);
vendor.setAttachment(r1);
final Response r = db.post(vendor);
System.out.println("Vendor created successfully. Id: " + r.getId() + ", rev: " + r.getRev());
System.out.println("File created successfully. Id: " + r1.getId() + ", rev: " + r1.getRev());
}
Here I where I try to retrive the data:
public List<Vendor> getAllVendors()
{
List<Vendor> Vendors = new ArrayList<Vendor>();
final List<Vendor> vend2 = new ArrayList<Vendor>();
//Get db
final Database db = getDb();
final InputStream s = null;
//Get all documents
Vendors = db.view("_all_docs").includeDocs(true).query(Vendor.class);
final Database db1 = getDb();
for (final Vendor vend : Vendors) {
final Response r1 = vend.getAttachment();
final int id = Integer.valueOf(vend.get_id()) + 1;
// Here I am look to the attachment with the _ID and _REV
final InputStream in = db1.find(r1.getId(), r1.getRev()); vend.setInput(in); vend2.add(vend);
}
return Vendors;
}
I this last code, I intended to create a new list with all my Vendor data plus the blob.
When I add the vendor ( in the first part ) , I saved the " response " of the attachement in the vendor object, SO when I tried to retrive I have the data to work with ( _id and _rev ) .
I'm assuming you want to list all documents that contain attachments. If so, you can create a MapReduce view similar to this:
function(doc) {
if (doc._attachments) {
emit(doc._id, null);
}
}
You would then call the view using something like this to get a list of document ids of documents that contain attachments:
GET /dbname/_design/designdocname/_view/docswithattachments
The above GET request would look something like this in Java:
List<Foo> list = db.view("designdocname/docswithattachments")
.query(Foo.class);

Error in Lucene text Search

I'm new to text search and I'm studying some examples related to lucene. I found one of the example from this link. http://javatechniques.com/blog/lucene-in-memory-text-search-example/ I tried it in my eclipse IDE. But it gives some errors. I imported all the relevent jar files as well.
Here Is the code :
public class InMemoryExample {
public static void main(String[] args) {
// Construct a RAMDirectory to hold the in-memory representation
// of the index.
RAMDirectory idx = new RAMDirectory();
try {
// Make an writer to create the index
IndexWriter writer =
new IndexWriter(idx, new StandardAnalyzer(Version.LUCENE_48),
IndexWriter.MaxFieldLength.LIMITED);
// Add some Document objects containing quotes
writer.addDocument(createDocument("Theodore Roosevelt",
"It behooves every man to remember that the work of the " +
"critic, is of altogether secondary importance, and that, " +
"in the end, progress is accomplished by the man who does " +
"things."));
writer.addDocument(createDocument("Friedrich Hayek",
"The case for individual freedom rests largely on the " +
"recognition of the inevitable and universal ignorance " +
"of all of us concerning a great many of the factors on " +
"which the achievements of our ends and welfare depend."));
writer.addDocument(createDocument("Ayn Rand",
"There is nothing to take a man's freedom away from " +
"him, save other men. To be free, a man must be free " +
"of his brothers."));
writer.addDocument(createDocument("Mohandas Gandhi",
"Freedom is not worth having if it does not connote " +
"freedom to err."));
// Optimize and close the writer to finish building the index
writer.optimize();
writer.close();
// Build an IndexSearcher using the in-memory index
Searcher searcher = new IndexSearcher(idx);
// Run some queries
search(searcher, "freedom");
search(searcher, "free");
search(searcher, "progress or achievements");
searcher.close();
}
catch (IOException ioe) {
// In this example we aren't really doing an I/O, so this
// exception should never actually be thrown.
ioe.printStackTrace();
}
catch (ParseException pe) {
pe.printStackTrace();
}
}
/**
* Make a Document object with an un-indexed title field and an
* indexed content field.
*/
private static Document createDocument(String title, String content) {
Document doc = new Document();
// Add the title as an unindexed field...
doc.add(new Field("title", title, Field.Store.YES, Field.Index.NO));
// ...and the content as an indexed field. Note that indexed
// Text fields are constructed using a Reader. Lucene can read
// and index very large chunks of text, without storing the
// entire content verbatim in the index. In this example we
// can just wrap the content string in a StringReader.
doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
return doc;
}
/**
* Searches for the given string in the "content" field
*/
private static void search(Searcher searcher, String queryString)
throws ParseException, IOException {
// Build a Query object
//Query query = QueryParser.parse(
QueryParser parser = new QueryParser("content", new StandardAnalyzer(Version.LUCENE_48));
Query query = parser.parse(queryString);
int hitsPerPage = 10;
// Search for the query
TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int hitCount = collector.getTotalHits();
System.out.println(hitCount + " total matching documents");
// Examine the Hits object to see if there were any matches
if (hitCount == 0) {
System.out.println(
"No matches were found for \"" + queryString + "\"");
} else {
System.out.println("Hits for \"" +
queryString + "\" were found in quotes by:");
// Iterate over the Documents in the Hits object
for (int i = 0; i < hitCount; i++) {
// Document doc = hits.doc(i);
ScoreDoc scoreDoc = hits[i];
int docId = scoreDoc.doc;
float docScore = scoreDoc.score;
System.out.println("docId: " + docId + "\t" + "docScore: " + docScore);
Document doc = searcher.doc(docId);
// Print the value that we stored in the "title" field. Note
// that this Field was not indexed, but (unlike the
// "contents" field) was stored verbatim and can be
// retrieved.
System.out.println(" " + (i + 1) + ". " + doc.get("title"));
System.out.println("Content: " + doc.get("content"));
}
}
System.out.println();
} }
but it shows few syntax errors in following lines :
Error 1:
IndexWriter writer = underline MaxFieldLength in red
new IndexWriter(idx, new StandardAnalyzer(Version.LUCENE_48),
IndexWriter.MaxFieldLength.LIMITED);
Error 2: underline optimeze() in red
writer.optimize();
Error 3: underline new IndexSearcher(idx) in red
Searcher searcher = new IndexSearcher(idx);
Error 4: underline search in red
searcher.search(query, collector);
Could you please help me to get rid of these errors? It will be a great help. Thanks
Modified code:
public class InMemoryExample {
public static void main(String[] args) throws Exception{
// Construct a RAMDirectory to hold the in-memory representation
// of the index.
RAMDirectory idx = new RAMDirectory();
// Make an writer to create the index
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_48, new
StandardAnalyzer(Version.LUCENE_48));
IndexWriter writer = new IndexWriter(idx, cfg);
// Add some Document objects containing quotes
writer.addDocument(createDocument("Theodore Roosevelt",
"It behooves every man to remember that the work of the " +
"critic, is of altogether secondary importance, and that, " +
"in the end, progress is accomplished by the man who does " +
"things."));
writer.addDocument(createDocument("Friedrich Hayek",
"The case for individual freedom rests largely on the " +
"recognition of the inevitable and universal ignorance " +
"of all of us concerning a great many of the factors on " +
"which the achievements of our ends and welfare depend."));
writer.addDocument(createDocument("Ayn Rand",
"There is nothing to take a man's freedom away from " +
"him, save other men. To be free, a man must be free " +
"of his brothers."));
writer.addDocument(createDocument("Mohandas Gandhi",
"Freedom is not worth having if it does not connote " +
"freedom to err."));
// Optimize and close the writer to finish building the index
writer.commit();
writer.close();
// Build an IndexSearcher using the in-memory index
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));
// Run some queries
search(searcher, "freedom");
search(searcher, "free");
search(searcher, "progress or achievements");
//searcher.close();
}
/**
* Make a Document object with an un-indexed title field and an
* indexed content field.
*/
private static Document createDocument(String title, String content) {
Document doc = new Document();
// Add the title as an unindexed field...
doc.add(new Field("title", title, Field.Store.YES, Field.Index.NO));
// ...and the content as an indexed field. Note that indexed
// Text fields are constructed using a Reader. Lucene can read
// and index very large chunks of text, without storing the
// entire content verbatim in the index. In this example we
// can just wrap the content string in a StringReader.
doc.add(new Field("content", content, Field.Store.YES, Field.Index.ANALYZED));
return doc;
}
/**
* Searches for the given string in the "content" field
*/
private static void search(IndexSearcher searcher, String queryString)
throws ParseException, IOException {
// Build a Query object
//Query query = QueryParser.parse(
QueryParser parser = new QueryParser("content", new StandardAnalyzer(Version.LUCENE_48));
Query query = parser.parse(queryString);
int hitsPerPage = 10;
// Search for the query
TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
int hitCount = collector.getTotalHits();
System.out.println(hitCount + " total matching documents");
// Examine the Hits object to see if there were any matches
if (hitCount == 0) {
System.out.println(
"No matches were found for \"" + queryString + "\"");
} else {
System.out.println("Hits for \"" +
queryString + "\" were found in quotes by:");
// Iterate over the Documents in the Hits object
for (int i = 0; i < hitCount; i++) {
// Document doc = hits.doc(i);
ScoreDoc scoreDoc = hits[i];
int docId = scoreDoc.doc;
float docScore = scoreDoc.score;
System.out.println("docId: " + docId + "\t" + "docScore: " + docScore);
Document doc = searcher.doc(docId);
// Print the value that we stored in the "title" field. Note
// that this Field was not indexed, but (unlike the
// "contents" field) was stored verbatim and can be
// retrieved.
System.out.println(" " + (i + 1) + ". " + doc.get("title"));
System.out.println("Content: " + doc.get("content"));
}
}
System.out.println();
} }
and this is the output:
Exception in thread "main" java.lang.VerifyError: class
org.apache.lucene.analysis.SimpleAnalyzer overrides final method
tokenStream.(Ljava/lang/String;Ljava/io/Reader;)Lorg/apache/lucene/analysis/TokenStream;
at java.lang.ClassLoader.defineClass1(Native Method) at
java.lang.ClassLoader.defineClass(Unknown Source) at
java.security.SecureClassLoader.defineClass(Unknown Source) at
java.net.URLClassLoader.defineClass(Unknown Source) at
java.net.URLClassLoader.access$100(Unknown Source) at
java.net.URLClassLoader$1.run(Unknown Source) at
java.net.URLClassLoader$1.run(Unknown Source) at
java.security.AccessController.doPrivileged(Native Method) at
java.net.URLClassLoader.findClass(Unknown Source) at
java.lang.ClassLoader.loadClass(Unknown Source) at
sun.misc.Launcher$AppClassLoader.loadClass(Unknown Source) at
java.lang.ClassLoader.loadClass(Unknown Source) at
beehex.inmemeory.textsearch.InMemoryExample.search(InMemoryExample.java:98)
at
beehex.inmemeory.textsearch.InMemoryExample.main(InMemoryExample.java:58)
I don't see a third argument on the IndexWriter constructor. You should modify The code to fit to the new lucene api like so :
IndexWriterConfig cfg = new IndexWriterConfig(Version.LUCENE_48, new StandardAnalyzer(Version.LUCENE_48));
IndexWriter writer = new IndexWriter(idx, cfg);
Also , rather than catching an exception here , i'd rather make my main method throw Exception and let the program fail altogether
EDIT :
2) remove the optimize call as the IndexWriter class does not have that method any longer (i think commit will do the trick here) .
3) define the IndexSearcher class like so :
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));

JSoup select from HTML in unix

I have a program that extracts certain elements(article author names) from many articles, from the PubMed site. While the program works correctly in my pc (windows), when i try to run it on unix returns an empty list. I suspect this is because the syntax should be somewhat different in a unix system. The problem is the JSoup documentation does not mention something. Anyone know anything on this? My code is something like this:
Document doc = Jsoup.connect("http://www.ncbi.nlm.nih.gov/pubmed/" + pmidString).timeout(60000).userAgent("Mozilla/25.0").get();
System.out.println("connected");
Elements authors = doc.select("div.auths >*");
System.out.println("number of elements is " + authors.size());
The final System.out.println always says the size is 0 therefore it cannot do anything more.
Thanks in advance
Complete Example:
protected static void searchLink(HashMap<String, HashSet<String>> authorsMap, HashMap<String, HashSet<String>> reverseAuthorsMap,
String fileLine
) throws IOException, ParseException, InterruptedException
{
JSONParser parser = new JSONParser();
JSONObject jsonObj = (JSONObject) parser.parse(fileLine.substring(0, fileLine.length() - 1 ));
String pmidString = (String)jsonObj.get("pmid");
System.out.println(pmidString);
Document doc = Jsoup.connect("http://www.ncbi.nlm.nih.gov/pubmed/" + pmidString).timeout(60000).userAgent("Mozilla/25.0").get();
System.out.println("connected");
Elements authors = doc.select("div.auths >*");
System.out.println("found the element");
HashSet<String> authorsList = new HashSet<>();
System.out.println("authors list hashSet created");
System.out.println("number of elements is " + authors.size());
for (int i =0; i < authors.size(); i++)
{
// add the current name to the names list
authorsList.add(authors.get(i).text());
// pmidList variable
HashSet<String> pmidList;
System.out.println("stage 1");
// if the author name is new, then create the list, add the current pmid and put it in the map
if(!authorsMap.containsKey(authors.get(i).text()))
{
pmidList = new HashSet<>();
pmidList.add(pmidString);
System.out.println("made it to searchLink");
authorsMap.put(authors.get(i).text(), pmidList);
}
// if the author name has been found before, get the list of articles and add the current
else
{
System.out.println("Author exists in map");
pmidList = authorsMap.get(authors.get(i).text());
pmidList.add(pmidString);
authorsMap.put(authors.get(i).text(), pmidList);
//authorsMap.put((String) authorName, null);
}
// finally, add the pmid-authorsList to the map
reverseAuthorsMap.put(pmidString, authorsList);
System.out.println("reverseauthors populated");
}
}
I have a thread pool, and each thread uses this method to populate two maps. The fileline argument is a single line that I parse as json and keep the "pmid" field. Using this string I access the url of this article, and parse the HTML for the names of the authors. The rest should work (it does work in my pc), but because the authors.size is 0 always, the for directly below the number of elements System.out does not get executed at all.
I've tried an example doing exactly what you're trying:
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
public class Test {
public static void main (String[] args) throws IOException {
String docId = "24312906";
if (args.length > 0) {
docId = args[0];
}
String url = "http://www.ncbi.nlm.nih.gov/pubmed/" + docId;
Document doc = Jsoup.connect(url).timeout(60000).userAgent("Mozilla/25.0").get();
Elements authors = doc.select("div.auths >*");
System.out.println("os.name=" + System.getProperty("os.name"));
System.out.println("os.arch=" + System.getProperty("os.arch"));
// System.out.println("doc=" + doc);
System.out.println("authors=" + authors);
System.out.println("authors.length=" + authors.size());
for (Element a : authors) {
System.out.println(" author: " + a);
}
}
}
My OS is Linux:
# uname -a
Linux graphene 3.11.0-13-generic #20-Ubuntu SMP Wed Oct 23 07:38:26 UTC 2013 x86_64 x86_64 x86_64
GNU/Linux
# lsb_release -a
No LSB modules are available.
Distributor ID: Ubuntu
Description: Ubuntu 13.10
Release: 13.10
Codename: saucy
Running that program produces:
os.name=Linux
os.arch=amd64
authors=Liu W
Chen D
authors.length=2
author: Liu W
author: Chen D
Which seems to work. Perhaps the issue is with fileLine? Can you print out the value of 'url':
System.out.println("url='" + "http://www.ncbi.nlm.nih.gov/pubmed/" + pmidString+ "'");
Since you're not getting an exception from your code, I suspect you're getting a document, just not one your code is anticipating. Printing out the document so you can see what you've gotten back will probably help as well.

How to log exceptions in Java?

There's a common problem I've come across a few times when logging exceptions in Java. There seem to be various different types to deal with. E.g. some wrap other exceptions and some don't have a message at all - only a type.
Most code I've seen logs an exception by using either getMessage() or toString(). But these methods don't always capture all the information needed to pinpoint the problem - other methods such as getCause() and getStackTrace() sometimes provide additional info.
For example, the exception I'm looking at right now in my Eclipse Inspect window is an InvocationTargetException. The exception itself has no cause, no message, no stacktrace ... but the target from getCause() is InvalidUseOfMatchersException, with these details populated.
So my question is: Given an exception of any type as an input, please provide a single method that will output a nicely formatted string containing all relevant information about the Exception (e.g. possibly recursively calling getCause() amongst other things?) Before posting, I was nearly going to have a stab at it myself but really don't want to reinvent the wheel - surely such a thing must have been done many times before...?
The java.util.logging package is standard in Java SE. Its Logger includes an overloaded log method that accepts Throwable objects.
It will log stacktraces of exceptions and their cause for you.
For example:
import java.util.logging.Level;
import java.util.logging.Logger;
[...]
Logger logger = Logger.getAnonymousLogger();
Exception e1 = new Exception();
Exception e2 = new Exception(e1);
logger.log(Level.SEVERE, "an exception was thrown", e2);
Will log:
SEVERE: an exception was thrown
java.lang.Exception: java.lang.Exception
at LogStacktrace.main(LogStacktrace.java:21)
Caused by: java.lang.Exception
at LogStacktrace.main(LogStacktrace.java:20)
Internally, this does exactly what #philipp-wendler suggests, by the way.
See the source code for SimpleFormatter.java. This is just a higher level interface.
What's wrong with the printStacktrace() method provided by Throwable (and thus every exception)? It shows all the info you requested, including the type, message, and stack trace of the root exception and all (nested) causes. In Java 7, it even shows you the information about "supressed" exceptions that might occur in a try-with-resources statement.
Of course you wouldn't want to write to System.err, which the no-argument version of the method does, so instead use one of the available overloads.
In particular, if you just want to get a String:
Exception e = ...
StringWriter sw = new StringWriter();
e.printStackTrace(new PrintWriter(sw));
String exceptionDetails = sw.toString();
If you happen to use the great Guava library, it provides a utility method doing this: com.google.common.base.Throwables#getStackTraceAsString(Throwable).
It should be quite simple if you are using LogBack or SLF4J. I do it as below
//imports
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
//Initialize logger
Logger logger = LoggerFactory.getLogger(<classname>.class);
try {
//try something
} catch(Exception e){
//Actual logging of error
logger.error("some message", e);
}
A logging script that I have written some time ago might be of help, although it is not exactly what you want. It acts in a way like a System.out.println but with much more information about StackTrace etc. It also provides Clickable text for Eclipse:
private static final SimpleDateFormat extended = new SimpleDateFormat( "dd MMM yyyy (HH:mm:ss) zz" );
public static java.util.logging.Logger initLogger(final String name) {
final java.util.logging.Logger logger = java.util.logging.Logger.getLogger( name );
try {
Handler ch = new ConsoleHandler();
logger.addHandler( ch );
logger.setLevel( Level.ALL ); // Level selbst setzen
logger.setUseParentHandlers( false );
final java.util.logging.SimpleFormatter formatter = new SimpleFormatter() {
#Override
public synchronized String format(final LogRecord record) {
StackTraceElement[] trace = new Throwable().getStackTrace();
String clickable = "(" + trace[ 7 ].getFileName() + ":" + trace[ 7 ].getLineNumber() + ") ";
/* Clickable text in Console. */
for( int i = 8; i < trace.length; i++ ) {
/* 0 - 6 is the logging trace, 7 - x is the trace until log method was called */
if( trace[ i ].getFileName() == null )
continue;
clickable = "(" + trace[ i ].getFileName() + ":" + trace[ i ].getLineNumber() + ") -> " + clickable;
}
final String time = "<" + extended.format( new Date( record.getMillis() ) ) + "> ";
StringBuilder level = new StringBuilder("[" + record.getLevel() + "] ");
while( level.length() < 15 ) /* extend for tabby display */
level.append(" ");
StringBuilder name = new StringBuilder(record.getLoggerName()).append(": ");
while( name.length() < 15 ) /* extend for tabby display */
name.append(" ");
String thread = Thread.currentThread().getName();
if( thread.length() > 18 ) /* trim if too long */
thread = thread.substring( 0, 16 ) + "...";
else {
StringBuilder sb = new StringBuilder(thread);
while( sb.length() < 18 ) /* extend for tabby display */
sb.append(" ");
thread = sb.insert( 0, "Thread " ).toString();
}
final String message = "\"" + record.getMessage() + "\" ";
return level + time + thread + name + clickable + message + "\n";
}
};
ch.setFormatter( formatter );
ch.setLevel( Level.ALL );
} catch( final SecurityException e ) {
e.printStackTrace();
}
return logger;
}
Notice this outputs to the console, you can change that, see http://docs.oracle.com/javase/1.4.2/docs/api/java/util/logging/Logger.html for more information on that.
Now, the following will probably do what you want. It will go through all causes of a Throwable and save it in a String. Note that this does not use StringBuilder, so you can optimize by changing it.
Throwable e = ...
String detail = e.getClass().getName() + ": " + e.getMessage();
for( final StackTraceElement s : e.getStackTrace() )
detail += "\n\t" + s.toString();
while( ( e = e.getCause() ) != null ) {
detail += "\nCaused by: ";
for( final StackTraceElement s : e.getStackTrace() )
detail += "\n\t" + s.toString();
}
Regards,
Danyel
You can also use Apache's ExceptionUtils.
Example:
import org.apache.commons.lang.exception.ExceptionUtils;
import org.apache.log4j.Logger;
public class Test {
static Logger logger = Logger.getLogger(Test.class);
public static void main(String[] args) {
try{
String[] avengers = null;
System.out.println("Size: "+avengers.length);
} catch (NullPointerException e){
logger.info(ExceptionUtils.getFullStackTrace(e));
}
}
}
Console output:
java.lang.NullPointerException
at com.aimlessfist.avengers.ironman.Test.main(Test.java:11)
Something that I do is to have a static method that handles all exceptions and I add the log to a JOptionPane to show it to the user, but you could write the result to a file in FileWriter wraped in a BufeeredWriter.
For the main static method, to catch the Uncaught Exceptions I do:
SwingUtilities.invokeLater( new Runnable() {
#Override
public void run() {
//Initializations...
}
});
Thread.setDefaultUncaughtExceptionHandler(
new Thread.UncaughtExceptionHandler() {
#Override
public void uncaughtException( Thread t, Throwable ex ) {
handleExceptions( ex, true );
}
}
);
And as for the method:
public static void handleExceptions( Throwable ex, boolean shutDown ) {
JOptionPane.showMessageDialog( null,
"A CRITICAL ERROR APPENED!\n",
"SYSTEM FAIL",
JOptionPane.ERROR_MESSAGE );
StringBuilder sb = new StringBuilder(ex.toString());
for (StackTraceElement ste : ex.getStackTrace()) {
sb.append("\n\tat ").append(ste);
}
while( (ex = ex.getCause()) != null ) {
sb.append("\n");
for (StackTraceElement ste : ex.getStackTrace()) {
sb.append("\n\tat ").append(ste);
}
}
String trace = sb.toString();
JOptionPane.showMessageDialog( null,
"PLEASE SEND ME THIS ERROR SO THAT I CAN FIX IT. \n\n" + trace,
"SYSTEM FAIL",
JOptionPane.ERROR_MESSAGE);
if( shutDown ) {
Runtime.getRuntime().exit( 0 );
}
}
In you case, instead of "screaming" to the user, you could write a log like I told you before:
String trace = sb.toString();
File file = new File("mylog.txt");
FileWriter myFileWriter = null;
BufferedWriter myBufferedWriter = null;
try {
//with FileWriter(File file, boolean append) you can writer to
//the end of the file
myFileWriter = new FileWriter( file, true );
myBufferedWriter = new BufferedWriter( myFileWriter );
myBufferedWriter.write( trace );
}
catch ( IOException ex1 ) {
//Do as you want. Do you want to use recursive to handle
//this exception? I don't advise that. Trust me...
}
finally {
try {
myBufferedWriter.close();
}
catch ( IOException ex1 ) {
//Idem...
}
try {
myFileWriter.close();
}
catch ( IOException ex1 ) {
//Idem...
}
}
I hope I have helped.
Have a nice day. :)
Best practice is to log the whole exception:
log.error("Our custom message", ex);
By logging the developer message together with the whole exception object, we can see the developer message followed by the exception name and message, then the stack trace.
This gives us a complete picture of what methods were invoked when the exception occurred, down to the library level.
Reference/credits:
https://medium.com/w-logs/how-to-log-exception-properly-6aa80b62ff8a

Categories