Searching in Lucene provides 0 results - java

I am using Lucene 6.4.0 and use the following code.
But for each query it returns 0 results. Code seems to be correct and index is also being built but I am not able to know where I am going wrong.
public void buildIndex(){
Connection con = null;
Statement stmt = null;
ResultSet rs = null;
IndexWriter writer=null;
StandardAnalyzer analyzer = null;
analyzer = new StandardAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(analyzer);
try{
System.out.println("Start indexing");
//get a reference to index directory filejdbc:mysql://localhost/MTDATABASE
writer = new IndexWriter(FSDirectory.open(file.toPath()), config);
//initialize the driver class
Class.forName("com.mysql.jdbc.Driver").newInstance();
//get connection object
con = DriverManager.getConnection(
"jdbc:mysql://"+DB_HOST_NAME+"/MTDATABASE",
DB_USER_NAME, DB_PASSWORD);
//create statement object
stmt = con.createStatement();
//execute query
rs = stmt.executeQuery("SELECT * FROM products");
//iterate through result set
while(rs.next()){
productId = rs.getInt("productId");
productImageName = rs.getString("productImageName");
productCategory = rs.getString("productCategory");
productBrandName = rs.getString("productBrandName");
productType = rs.getString("productType");
productName = rs.getString("productName");
prop1 = rs.getString("prop1");
prop2 = rs.getString("prop2");
prop3 = rs.getString("prop3");
prop4 = rs.getString("prop4");
prop5 = rs.getString("prop5");
description = rs.getString("description");
price = rs.getInt("price");
discount = rs.getFloat("discount");
cashback = rs.getFloat("cashback");
availability = rs.getString("availability");
//create a full text field which contains name,
//age and designation
String fulltext = productId + " " + productImageName +
" " + productCategory+" "+productBrandName+" "+productType+
" " + productName + " "+prop1+" "+prop2+" "+prop3+" "+prop4+" "+prop5+
" "+description+" "+price+" "+discount+" "+cashback+" "+availability;
/*doc.add(new StringField("id",
Integer.toString(id), StringField.Store.YES));
doc.add(new TextField("title", title,
TextField.Store.YES));
w.addDocument(doc);*/
//create document object
addDoc(writer,productId,productImageName,productCategory,productBrandName,productType,
productName,prop1,prop2,prop3,prop4,prop5,description,price,discount,cashback,availability);
writer.close();
}
}catch(Exception e){
e.printStackTrace();
}finally{
try{
if(writer!=null)
writer.close();
if(rs!=null)
rs.close();
if(stmt!=null)
stmt.close();
if(con!=null)
con.close();
System.out.println("Finished indexing");
}catch(Exception ex){
ex.printStackTrace();
}
}
}
public static void main(String[] args) throws Exception {
IndexBuilder builder = new IndexBuilder();
builder.buildIndex();
}
private void addDoc(IndexWriter w, int productId, String productImageName,String productCategory,String productBrandName,String productType,
String productName,String prop1,String prop2,String prop3,String prop4,String prop5,
String description,int price,float discount,float cashback,String availability) throws IOException {
Document doc = new Document();
doc.add(new StringField("produciId",Integer.toString(productId), StringField.Store.YES));
doc.add(new StringField("productImageName",productImageName,StringField.Store.YES));
doc.add(new TextField("productCategory",productCategory,TextField.Store.YES));
doc.add(new TextField("productBrandName",productBrandName,TextField.Store.YES));
doc.add(new TextField("productType",productType,TextField.Store.YES));
doc.add(new TextField("productName",productName,TextField.Store.YES));
doc.add(new TextField("prop1",prop1,TextField.Store.YES));
doc.add(new TextField("prop2",prop2,TextField.Store.YES));
doc.add(new TextField("prop3",prop3,TextField.Store.YES));
doc.add(new TextField("prop4",prop4,TextField.Store.YES));
doc.add(new TextField("prop5",prop5,TextField.Store.YES));
doc.add(new StringField("description",description,StringField.Store.YES));
doc.add(new StringField("price",Integer.toString(price),StringField.Store.YES));
doc.add(new StringField("discount",Float.toString(discount),StringField.Store.YES));
doc.add(new StringField("cashback",Float.toString(cashback),StringField.Store.YES));
doc.add(new StringField("availability",availability,StringField.Store.YES));
w.addDocument(doc);
}
And for search I'am using:
public class Testing {
public static void main(String[] args) {
try{
String LUCENE_INDEX_DIRECTORY = "C:\\lucene";
File file = new File(LUCENE_INDEX_DIRECTORY);
Directory index = FSDirectory.open(file.toPath());
StandardAnalyzer analyzer = new StandardAnalyzer();
String query = "mountain";
QueryParser parser = new QueryParser("productName",analyzer);
Query q = null;
q=parser.parse(query);
int hitsPerPage = 10;
IndexReader reader=null;
TopScoreDocCollector collector = null;
IndexSearcher searcher = null;
reader=DirectoryReader.open(index);
searcher=new IndexSearcher(reader);
collector = TopScoreDocCollector.create(10);
searcher.search(q,collector);
ScoreDoc[] hits=collector.topDocs().scoreDocs;
System.out.println(hits.length);
if(hits.length>0){
for(int i=0; i<hits.length; i++){
int docId = hits[i].doc;
Document document = searcher.doc(docId);
System.out.println("BrandName is: "+document.get("productBrandName")+"and ProductName is: "+document.get("productName")+
"productCategory is: "+document.get("productCategory")+"and prop is:"+document.get("prop1"));
}
}else{
System.out.println("Not Done");
}
}catch(Exception e){
System.out.println("Not done ... ");
}
}
}

If I were you, the first thing might be use the Luke to open the search database to see if your content were really wrote into the search index.
https://github.com/DmitryKey/luke
And also double check if there were any value in the "productName" being populated.
Once you can certain the integrity of your index is good, then you can check does the search word "mountain" are really being a valid search term.

Related

Lucene issues in indexing processus

I have a problem with lucene indexing, my documments conains TEXT and FIELD and DocNO. AND my queries containt Title and description. I have a Judgment of relevance. the problem is when i have calculating the MAP, it is very small (0.017). however my frieds have fiding a value of 0.13. I think that i have a problrm with IndexFiles class?? can you help me ? ^-^
public class IndexFiles {
public IndexFiles() {}
public static void main(String[] args) throws IOException, ParseException {
ReadDocuments t = new ReadDocuments();
List<DocumentsParser> docs = new ArrayList<>();
t.readXml(docs, "documents");
final String FIELD_PATH = "path";
final String FIELD_CONTENTS = "contents";
String indexPath = "index1";
Directory dir = FSDirectory.open(new File(indexPath));
Reader r=new FileReader(new File("stopwords.txt"));
StandardAnalyzer analyzer=new StandardAnalyzer(Version.LUCENE_40,r);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,analyzer);
/* use BM25 similarity*/
Similarity bm25similarity = new BM25Similarity();
iwc.setSimilarity(bm25similarity);
IndexWriter indexWriter = new IndexWriter(dir, iwc);
for (DocumentsParser doc : docs){
Document document = new Document();
document.add(new StringField("DocNo", doc.getDOCNO(), Field.Store.YES));
document.add(new TextField("TEXT", doc.getTEXT()+" "+doc.getHEAD(),Field.Store.YES));
indexWriter.addDocument(document); }
indexWriter.close();}}
/class SearchFiles/
public class SearchFiles {
public static void main(String[] args) throws Exception {
SearchFiles ch=new SearchFiles();
searchStemTfidfQLong();
}
SearchFiles() {}
public static void searchStemTfidfQLong() throws ParseException, IOException{
String index = "index1";
String field = "TEXT";
int hitsPerPage = 1000;
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
IndexSearcher searcher = new IndexSearcher(reader);
/* use BM25 similarity*/
Similarity bm25similarity = new BM25Similarity();
searcher.setSimilarity(bm25similarity);
Reader r=new FileReader(new File("stopwords.txt"));
StandardAnalyzer analyzer=new StandardAnalyzer(Version.LUCENE_40,r);
QueryParser parser = new QueryParser(Version.LUCENE_40,field,analyzer);
int i=0;
File file = new File("fichier.txt");
FileWriter writere=new FileWriter(file.getAbsoluteFile(), true);
for(Topic topic : Parser.getQuerysTopics(Parser.filename)){
/*query chort*/
String queryChort=topic.getTitle();
queryChort=queryChort.replaceAll("([<>\\(\\):/\\\\',\\s\"])", " ").trim();
i++;
//writere.write(queryChort+"\n");
Query query = parser.parse(queryChort);
System.out.println("Query number : "+(i));
searcher.search(query,1000);
doSearch(i, searcher, query, hitsPerPage);
}
reader.close();
writere.close();
}
public static void doSearch(int idReq, IndexSearcher searcher, Query query, int hitsPerPage) throws IOException {
TopDocs results = searcher.search(query, null, hitsPerPage);
System.out.println(query);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 1;
int end = Math.min(numTotalHits, hitsPerPage);
File file = new File("file.txt");
FileWriter writer=new FileWriter(file.getAbsoluteFile(), true);
File file1 = new File("fichier.txt");
FileWriter writere=new FileWriter(file1.getAbsoluteFile(), true);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String DocNo = doc.get("DocNo");
writere.write(DocNo+"\n");
if (DocNo != null) {
writer.write(idReq+" 0 "+DocNo+" "+String.format("%.6f", new Double(hits[i].score))+" "+ i + " "+"ScoreID"+"\n");
} else {
System.out.println((i + 1) + ". " + "No DocNo for this document"); } }
writer.close();
writere.close();}}

How does regex query work on lucene?

I am trying to implement luecene search engine in my application.
I am using lucene 5.4.1
I have successfully implemented wildequeries and normal queries of lucene.
But my main focus is to search specific text in a text file with regex patterns.
Index Writer code:
public IndexWriter generateIndex(String docsPath) throws IOException {
String indexPath = System.getProperty("java.io.tmpdir") +File.separator+"indexDirectory";
if (indexPath == null) {
throw new IOException("System property 'java.io.tmpdir' does not specify a tmp dir");
}
File tmpDir = new File(indexPath);
if (!tmpDir.exists()) {
boolean created = tmpDir.mkdirs();
if (!created) {
throw new IOException("Unable to create tmp dir " + tmpDir);
}
}
boolean create = true;
final Path docDir = Paths.get(docsPath);
if (!Files.isReadable(docDir)) {
System.out.println("Document directory '" + docDir.toAbsolutePath()
+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
setIndexWriter(writer);
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
}
return getIndexWriter();
}
static void indexDocs(final IndexWriter writer, Path path) throws IOException {
if (Files.isDirectory(path)) {
Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
#Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
try {
indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
} catch (IOException ignore) {
// don't index files that can't be read.
}
return FileVisitResult.CONTINUE;
}
});
} else {
indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
}
}
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
try (InputStream stream = Files.newInputStream(file)) {
Document doc = new Document();
Field pathField = new StringField("path", file.toString(), Field.Store.NO);
doc.add(pathField);
doc.add(new LongField("modified", lastModified, Field.Store.NO));
doc.add(new TextField("contents",
new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
}
Index Searching Code:
public IndexReader searchExecutor(String index, String queryString, RegexCapabilities capability) throws Exception {
String field = "contents";
String queries = null;
boolean raw = false;
int hitsPerPage = Integer.MAX_VALUE;
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
BufferedReader in = null;
Query q = new RegexpQuery(new Term("text", queryString));
q = q.rewrite(reader);
RegexQuery query = new RegexQuery(new Term("\\s*(FIND|find)"));
if (capability != null)
query.setRegexImplementation(capability);
System.out.println("Searching for: " + query.toString(field));
searcher.search(query, null, 1000);
doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
//reader.close();
return reader;
}
public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
boolean interactive)
throws IOException {
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
//generateIndex.deleteDocuments(query);
//generateIndex.getDirectory();
// TermsEnum.totalTermFreq();
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
File file = new File(path);
if (path != null) {
System.out.println((i + 1) + ". " + path);
String title = doc.get("title");
if (title != null) {
System.out.println(" Title: " + doc.get("title"));
}
} else {
System.out.println((i + 1) + ". " + "No path for this document");
}
}
}
Please help.
Your question is about search with regular expressions in lucene.
You are using RegexQuery which is deprecated so try RegexpQuery
Your regEx-example starts with \s* but you do not use KeywordTokenizer. Most other tokenizer will remove (aka "split at") whitespace
Your regEx-example is not purely lower case. But standard analyzer contains LowerCaseFilter. Be aware: your regEx will go directly against the tokens of your index (not against the original text)
--> read Supported RegExp syntax and syntax in ES and TestRegexpRandom (test class) and play with https://github.com/DmitryKey/luke on your index.

Getting TF-IDF values from index

The below code is for getting tf-idf value from indexes. But I get an error while running it, on the line with Correct_ME.
Using Lucene 4.8.
DocIndexing.java
public class DocIndexing {
private DocIndexing() {}
/** Index all text files under a directory.
* #param args
* #throws java.io.IOException */
public static void main(String[] args) throws IOException {
String usage = "java org.apache.lucene.demo.IndexFiles"
+ " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
+ "This indexes the documents in DOCS_PATH, creating a Lucene index"
+ "in INDEX_PATH that can be searched with Searching";
String indexPath = "C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/Index";
String docsPath = "C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/ToBeIndexed";
boolean create = true;
for(int i=0;i<args.length;i++) {
if (null != args[i]) switch (args[i]) {
case "-index":
indexPath = args[i+1];
i++;
break;
case "-docs":
docsPath = args[i+1];
i++;
break;
case "-update":
create = false;
break;
}
}
if (docsPath == null) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(docsPath);
if (!docDir.canRead() && !docDir.isDirectory() &&
!docDir.isHidden() &&
!docDir.exists()) {
System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
//Filter filter = new PorterStemFilter();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
// Add new documents to an existing index:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
try (
IndexWriter writer = new IndexWriter(dir, iwc)) {
indexDocs(writer, docDir);
}
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
Tf_Idf tfidf = new Tf_Idf();
String field = null,term = null;
tfidf.scoreCalculator(field, term);
}
/*
* #param writer Writer to the index where the given file/dir info will be stored
* #param file The file to index, or the directory to recurse into to find files to index
* #throws IOException If there is a low-level I/O error
*/
static void indexDocs(IndexWriter writer, File file)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return;
}
try {
// make a new, empty document
Document doc = new Document();
// Field termV = new LongField("termVector", file.g)
Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
doc.add(pathField);
Field modifiedField = new LongField("modified", file.lastModified(), Field.Store.NO);
doc.add(modifiedField);
Field titleField = new TextField("title", file.getName(), Field.Store.YES);
doc.add(titleField);
Field contentsField = new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)));
doc.add(contentsField);
//contentsField.setBoost((float)0.5);
//titleField.setBoost((float)2.5);
/* doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
doc.add(new TextField("title", file.getName(), Field.Store.YES));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));
*/
// StringField..setBoost(1.2F);
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}
} finally {
fis.close();
}
}
}
}
}
Tf-idf.java
public class Tf_Idf {
static float tf = 1;
static float idf = 0;
private float tfidf_score;
static float [] tfidf = null;
IndexReader indexReader;
public Tf_Idf() throws IOException {
this.indexReader = DirectoryReader.open(FSDirectory.open(new File("C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/Index")));
}
public void scoreCalculator (String field, String term) throws IOException
{
TFIDFSimilarity tfidfSIM = new DefaultSimilarity();
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
TermsEnum termEnum = MultiFields.getTerms(indexReader, field).iterator(null);
BytesRef bytesRef=null;
while ((bytesRef = termEnum.next()) != null) {
if(bytesRef.utf8ToString().trim().equals(term.trim())) {
if(termEnum.seekExact(bytesRef)) {
idf = tfidfSIM.idf(termEnum.docFreq(), indexReader.numDocs());
DocsEnum docsEnum = termEnum.docs(liveDocs, null);
if(docsEnum != null) {
int doc=0;
while((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
tf = tfidfSIM.tf(docsEnum.freq());
tfidf_score = tf * idf ;
System.out.println(" -tfidf_score-" + tfidf_score);
}
}
}
}
}
}
}
It's obvious that you pass to MultiFields method a null IndexReader
IndexReader reader = null;
tfidf.scoreCalculator( reader, field,term);
You need to write something like this:
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(PATH_TO_LUCENE_INDEX)));
tfidf.scoreCalculator( reader, field,term);
You need to repalce PATH_TO_LUCENE_INDEX with real path, of course.
Another problem, that I see - you open IndexReader in Tf_Idf, but don't use it anywhere, may be it's a good idea to remove it or use it, inside of scoreCalculator method, e.g.
tfidf.scoreCalculator(field,term);
but in method use field of this class, - this.indexReader instead of just indexReader that you try to pass inside method scoreCalculator
UPD
public Tf_Idf() throws IOException {
this.reader = DirectoryReader.open(FSDirectory.open(new File("Index")));
}
In this code, you need to replace "Index" with real path to your Lucene index, e.g. - /home/user/index or C://index or wherever you have it.

the barcodes i created with JAVA FOR LOOP can't seem to put every image in MS Word

i can't seem to put every image of the barcodes that i created in the MS Word it only puts the last generated barcode i dont know what to do i tried a lot of things but doesnt work. see my images. any help would be appreciated.
Here are the images :
Here are the codes :
import com.aspose.barcode.BarCodeBuilder;
import com.aspose.barcode.Symbology;
import com.aspose.words.Document;
import com.aspose.words.DocumentBuilder;
JButton btnCreate = new JButton("Create Barcode");
btnCreate.addActionListener(new ActionListener() {
public void actionPerformed(ActionEvent arg0) {
String strBaseFolder = "C:\\users\\ronjonathan\\desktop\\barcode\\";
String query = "Select MAX(ProductID) from tblindividualproduct";
try
{
// Generate barcode image
BarCodeBuilder builder = new BarCodeBuilder();
builder.setSymbologyType(Symbology.Code128);
pst=con.prepareStatement(query);
rs=pst.executeQuery();
if(rs.next()){
int MAX = rs.getInt(1);
for(int i = 1;i <= Integer.parseInt(txtBarcode.getText()); i++){
builder.setCodeText(Integer.toString(i+MAX));
String strBarCodeImageSave = ""+(i+MAX)+".jpg";
builder.save(strBaseFolder + strBarCodeImageSave);
// Add the image to a Word doc
Document doc = new Document();
DocumentBuilder docBuilder = new DocumentBuilder(doc);
docBuilder.insertImage(strBaseFolder + strBarCodeImageSave);
String strWordFile = "doc.doc";
doc.save(strBaseFolder + strWordFile);
}
JOptionPane.showMessageDialog(null, "Success!");
}
}catch(Exception e){
e.printStackTrace();
}
}
});
i have solved my problem. i just removed the doc and docbuilder in the for loop and moved it to the upper part because if i didnt remove it. it just overwrites the document over and over again.
String strBaseFolder = "C:\\users\\ronjonathan\\desktop\\barcode\\";
String query = "Select MAX(ProductID) from tblindividualproduct";
String strBarCodeImageSave = null;
try
{
Document doc = new Document();
DocumentBuilder docBuilder = new DocumentBuilder(doc);
// Generate barcode image
BarCodeBuilder builder = new BarCodeBuilder();
builder.setSymbologyType(Symbology.Code128);
pst=con.prepareStatement(query);
rs=pst.executeQuery();
if(rs.next()){
int MAX = rs.getInt(1);
for(int i = 1;i <= Integer.parseInt(txtBarcode.getText()); i++){
builder.setCodeText(Integer.toString(i+MAX));
strBarCodeImageSave = +(i+MAX)+".jpg";
builder.save(strBaseFolder + strBarCodeImageSave);
docBuilder.insertImage(strBaseFolder + strBarCodeImageSave);
String strWordFile = "doc.doc";
doc.save(strBaseFolder + strWordFile);
}
// Add the image to a Word doc
JOptionPane.showMessageDialog(null, "Success!");

PhraseQuery Not working Lucene 4.5.0

I tried to work with PhraseQuery but could not get hits from search. I am using Lucene 4.5.0.
My Indexing code
private IndexWriter writer;
public LuceneIndexSF(final String indexDir) throws IOException {
Analyzer analyzer = new KeywordAnalyzer();
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,
analyzer);
Directory directory = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(directory, config);
}
private Document getDocument(File f, String line, int lineNum)
throws IOException {
Document doc = new Document();
Field field = null;
if (line != null && line.split(DELIMITER).length >= 5) {
String[] lineValues = line.split(DELIMITER);
field = new Field("name", line.split("\t")[1],
TextField.TYPE_STORED);
doc.add(field);
if (lineValues[2] != null && !lineValues[2].trim().isEmpty()) {
field = new Field("ref", lineValues[2], TextField.TYPE_STORED);
doc.add(field);
}
field = new Field("type", lineValues[3], TextField.TYPE_STORED);
doc.add(field);
field = new LongField("code", Long.parseLong(lineValues[4]),
LongField.TYPE_STORED);
doc.add(field);
if (lineValues.length == 7 && lineValues[5] != null
&& !lineValues[5].trim().isEmpty()) {
field = new Field("alias1", lineValues[5],
TextField.TYPE_STORED);
doc.add(field);
}
if (lineValues.length == 7 && lineValues[6] != null
&& !lineValues[6].trim().isEmpty()) {
field = new Field("alias2", lineValues[6],
TextField.TYPE_STORED);
doc.add(field);
}
}
field = new IntField("linenum", lineNum, IntField.TYPE_STORED);
doc.add(field);
return doc;
}
.... and other code where i add document in writer using writer.addDocument(doc);
My Searching Code
private static void search(String indexDir, String quer) throws IOException,
ParseException {
IndexReader inxRead = DirectoryReader.open(FSDirectory.open(new File(
indexDir)));
IndexSearcher is = new IndexSearcher(inxRead);
String[] termArr = quer.split(" ");
PhraseQuery phraseQuery= new PhraseQuery();
for(int inx = 0; inx < termArr.length; inx++){
phraseQuery.add(new Term("name", termArr[inx]));
}
phraseQuery.setSlop(4);
long start = System.currentTimeMillis();
TopDocs hits = is.search(phraseQuery, 1000);
long end = System.currentTimeMillis();
System.err.println("Parser> Found " + hits.totalHits
+ " document(s) (in " + (end - start)
+ " milliseconds) that matched query '" + multiQuery + "':");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println("Parser> " + scoreDoc.score + " :: "
+ doc.get("type") + " - " + doc.get("code") + " - "
+ doc.get("name") + ", " + doc.get("linenum"));
}
inxRead.close();
}
Please tell me if i am doing any thing wrong.
Edit
also tried with Standard Analyzer still not results
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
Solution
According to Arun's answer PhraseQuery to work properly requires Analyzer which Tokenize each word in Document's Field for my case i used LowerCaseFilter with making all queries lower case so that it can work without case sensitivity. And used EdgeNGramTokenFilter which for auto completion purposes.
public LuceneIndexSF(final String indexDir) throws IOException {
Analyzer analyzer = new Analyzer() {
#Override
protected TokenStreamComponents createComponents(String fieldName,
java.io.Reader reader) {
Tokenizer source = new StandardTokenizer(Version.LUCENE_45,
reader);
TokenStream result = new StandardFilter(Version.LUCENE_45,
source);
result = new LowerCaseFilter(Version.LUCENE_45, result);
result = new EdgeNGramTokenFilter(Version.LUCENE_45, result, 1,
20);
return new TokenStreamComponents(source, result);
}
};
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45,
analyzer);
Directory directory = FSDirectory.open(new File(indexDir));
writer = new IndexWriter(directory, config);
}
My final search method
private static void search(String indexDir, String quer) throws IOException,
ParseException {
IndexReader inxRead = DirectoryReader.open(FSDirectory.open(new File(
indexDir)));
IndexSearcher is = new IndexSearcher(inxRead);
String[] termArr = quer.split(" ");
PhraseQuery query1 = new PhraseQuery();
PhraseQuery query2 = new PhraseQuery();
PhraseQuery query3 = new PhraseQuery();
for (int inx = 0; inx < termArr.length; inx++) {
query1.add(new Term(SchoolFinderConstant.ENTITY_NAME,termArr[inx]),inx);
query2.add(new Term(SchoolFinderConstant.ENTITY_ALIAS1,termArr[inx]),inx);
query3.add(new Term(SchoolFinderConstant.ENTITY_ALIAS2,termArr[inx]),inx);
}
BooleanQuery mainQuery = new BooleanQuery();
mainQuery.add(query1, Occur.SHOULD);
mainQuery.add(query2, Occur.SHOULD);
mainQuery.add(query3, Occur.SHOULD);
long start = System.currentTimeMillis();
TopDocs hits = is.search(mainQuery, 1000);
long end = System.currentTimeMillis();
System.err.println("Parser> Found " + hits.totalHits
+ " document(s) (in " + (end - start)
+ " milliseconds) that matched query '" + multiQuery + "':");
for (ScoreDoc scoreDoc : hits.scoreDocs) {
Document doc = is.doc(scoreDoc.doc);
System.out.println("Parser> " + scoreDoc.score + " :: "
+ doc.get("type") + " - " + doc.get("code") + " - "
+ doc.get("name") + ", " + doc.get("linenum"));
}
inxRead.close();
}
I played with your code with KeywordAnalyzer, obviously it did not work as the KeywordAnalyzer "Tokenizes" the entire stream as a single token. This is useful for data like zip codes, ids, and some product names. http://lucene.apache.org/core/4_5_0/analyzers-common/org/apache/lucene/analysis/core/KeywordAnalyzer.html , for this to work you need to specify entire token without any change.
Then i used WhitespaceAnalyzer it worked and able to find matches for your PhraseQuery. No change to rest of the code. Let me know if this worked for you.
The reason search worked with MultiFieldQueryParser is you are forced to use the analyzer at the time of query which matches to your index. So in short , you need to make sure your index analyzer and query time analyzers are similar.

Categories