The background info here is that I have a working Indexer and Search (in java) that indexes and searches a file directory for the filenames and then copies the files to a "Results" Directory.
What I need/ don't have much experience in is writing jsp files. I need the jsp file to have a search bar for the text and then a search button. When text is entered in the bar, and the button is clicked, I need it to run my search program with the entered text as an arg.
I have added the IndexFiles and the SearchFiles classes for reference.
Please explain with a good example if you can help out!
public class SearchFiles {
static File searchDirectory = new File(
"C:\\Users\\flood.j.2\\Desktop\\IndexSearch\\Results");
static String v = new String();
static String path = null;
String title = null;
File addedFile = null;
OutputStream out = null;
String dirName = "C:\\Users\\flood.j.2\\Desktop\\IndexSearch\\Results";
public static void main(String[] args) throws Exception {
String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string]";
if (args.length > 0
&& ("-h".equals(args[0]) || "-help".equals(args[0]))) {
System.out.println(usage);
System.exit(0);
}
for (int j = 5; j < args.length; j++) {
v += args[j] + " ";
}
String index = "index";
String field = "contents";
String queries = null;
boolean raw = false;
String queryString = null;
int hits = 100;
for (int i = 0; i < args.length; i++) {
if ("-index".equals(args[i])) {
index = args[i + 1];
i++;
} else if ("-field".equals(args[i])) {
field = args[i + 1];
i++;
} else if ("-queries".equals(args[i])) {
queries = args[i + 1];
i++;
} else if ("-query".equals(args[i])) {
queryString = v;
i++;
}
}
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(
index)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
BufferedReader in = null;
if (queries != null) {
in = new BufferedReader(new InputStreamReader(new FileInputStream(
queries), "UTF-8"));
} else {
in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
}
QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);
for (int m = 0; m < 2; m++) {
if (queries == null && queryString == null) {
System.out.println("Enter query: ");
}
String line = queryString != null ? queryString : in.readLine();
if (line == null || line.length() == -1) {
break;
}
line = line.trim();
if (line.length() == 0) {
break;
}
Query query = parser.parse(line);
System.out.println("Searching for: " + query.toString(field));
doPagingSearch(in, searcher, query, hits, raw, queries == null
&& queryString == null);
if (queryString == null) {
break;
}
}
reader.close();
}
public static void doPagingSearch(BufferedReader in,
IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
boolean interactive) throws IOException {
// Collect enough docs to show 500 pages
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
FileUtils.deleteDirectory(searchDirectory);
while (true) {
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
path = doc.get("path");
if (path != null) {
System.out.println((i + 1) + ". " + path);
File addFile = new File(path);
try {
FileUtils.copyFileToDirectory(addFile, searchDirectory);
} catch (IOException e) {
e.printStackTrace();
}
}
}
if (!interactive || end == 0) {
break;
}
System.exit(0);
}
}
}
public class IndexFiles {
private IndexFiles() {
}
public static void main(String[] args) {
String usage = "java org.apache.lucene.demo.IndexFiles"
+ " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
+ "This indexes the documents in DOCS_PATH, creating a Lucene index"
+ "in INDEX_PATH that can be searched with SearchFiles";
String indexPath = null;
String docsPath = null;
boolean create = true;
for (int i = 0; i < args.length; i++) {
if ("-index".equals(args[i])) {
indexPath = args[i + 1];
i++;
} else if ("-docs".equals(args[i])) {
docsPath = args[i + 1];
i++;
} else if ("-update".equals(args[i])) {
create = false;
}
}
if (docsPath == null) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(docsPath);
if (!docDir.exists() || !docDir.canRead()) {
System.out
.println("Document directory '"
+ docDir.getAbsolutePath()
+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40,
analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
writer.close();
Date end = new Date();
System.out.println(end.getTime() - start.getTime()
+ " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass()
+ "\n with message: " + e.getMessage());
}
}
static void indexDocs(IndexWriter writer, File file) throws IOException {
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return;
}
try {
Document doc = new Document();
Field pathField = new StringField("path",
file.getAbsolutePath(), Field.Store.YES);
doc.add(pathField);
doc.add(new LongField("modified", file.lastModified(),
Field.Store.NO));
doc.add(new TextField("title", file.getName(), null));
System.out.println(pathField);
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()),
doc);
}
} finally {
fis.close();
}
}
}
}
}
First, you should definitely do this in a servlet rather than a JSP. Putting lots of logic in JSP is bad practice. (See the servlets info page).
Second, it would probably be better on performance to make a cronjob (Linux) or Task (Windows) to run the search program every hour and store the results in a database and just have your servlet pull from there rather than allow the user to initiate the search program.
Related
I have already uploaded the excel file in my local directory but the problem is that I can't read it from that location.
I am using struts 1.1 ,db2.
package mj.eps.action;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.Vector;
import javax.servlet.ServletConfig;
import javax.servlet.ServletContext;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import jxl.Cell;
import jxl.Sheet;
import jxl.Workbook;
import org.apache.struts.action.ActionError;
import org.apache.struts.action.ActionErrors;
import org.apache.struts.action.ActionForm;
import org.apache.struts.action.ActionForward;
import org.apache.struts.action.ActionMapping;
import com.mj.eps.dto.business.auction.UploadObject;
import com.mj.eps.dto.training.IndexValueReportObject;
import com.mj.eps.framework.util.FileScaner;
import com.mj.eps.framework.util.IConstant;
public class IndexValueAction extends EPSBaseAction{
public IndexValueAction() {
super();
}
#SuppressWarnings("unchecked")
#Override
public ActionForward execute(
ActionMapping mapping,
ActionForm form,
HttpServletRequest request,
HttpServletResponse response)
throws Exception {
Vector<String> vector = new Vector<String>();
ActionErrors errors = new ActionErrors();
ActionForward forward = new ActionForward();
try{
String contentType = request.getContentType();
if ((contentType != null)
&& (contentType.indexOf("multipart/form-data") >= 0)) {
DataInputStream in =
new DataInputStream(request.getInputStream());
int formDataLength = request.getContentLength();
byte dataBytes[] = new byte[formDataLength];
int byteRead = 0;
int totalBytesRead = 0;
while (totalBytesRead < formDataLength) {
byteRead =
in.read(dataBytes, totalBytesRead, formDataLength);
totalBytesRead += byteRead;
}
String file = new String(dataBytes);
try {
String saveFile = file.substring(file.indexOf("filename=\"") + 10);
saveFile = saveFile.substring(0, saveFile.indexOf("\n"));
saveFile = saveFile.substring(saveFile.lastIndexOf("\\") + 1, saveFile.indexOf("\""));
int lastIndex = contentType.lastIndexOf("=");
String boundary =
contentType.substring(
lastIndex + 1,
contentType.length());
int pos;
pos = file.indexOf("filename=\"");
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
int boundaryLocation = file.indexOf(boundary, pos) - 4;
int startPos = ((file.substring(0, pos)).getBytes()).length;
int endPos =
(
(file.substring(0, boundaryLocation))
.getBytes())
.length;
Vector<IndexValueReportObject> indexObjectVector = new Vector<IndexValueReportObject>();
Date dt = new Date();
SimpleDateFormat df = new SimpleDateFormat();
df.applyPattern("dd-MM-yy hh-mm-ss");
saveFile = saveFile.substring(0, saveFile.indexOf("."))+ "-"+ df.format(dt)+ ".xls";
String uploadFilePath = "";
ServletContext sc1 = request.getSession().getServletContext();
Properties properties = new Properties();
String realPath1 = sc1.getRealPath("serverPath.properties");
FileInputStream fis = new FileInputStream(realPath1);
properties.load(fis);
uploadFilePath = properties.getProperty("fileUpload.path");
FileOutputStream fileOut = new FileOutputStream(uploadFilePath + saveFile);
System.out.println(uploadFilePath);
if(endPos > dataBytes.length )
endPos = dataBytes.length;
fileOut.write(dataBytes, startPos, (endPos - startPos));
fileOut.flush();
fileOut.close();
Vector<String[]> rowVector = new Vector<String[]>();
Vector<Object> errorVector = new Vector<Object>();
UploadObject uploadObject = new UploadObject();
int errorId=0;
try {
File outFile = new File(uploadFilePath + saveFile);
ServletConfig config = getServlet();
ServletContext sc = config.getServletContext();
boolean retVal = false;
boolean exists = true;
String filepath = outFile.getAbsolutePath();
String realPath = sc.getRealPath("virusCheck.properties");
exists = FileScaner.loadProperty(realPath);
if (exists) {
retVal = FileScaner.checkVirus(realPath, filepath);
if (!retVal) {
FileScaner.cleanFile(filepath);
errors.add(
ActionErrors.GLOBAL_ERROR,
new ActionError(
"errors.dynamic",
"<li>Virus found!!"));
}
if (retVal) {
retVal = FileScaner.checkFileSign(filepath);
if (!retVal) {
FileScaner.cleanFile(filepath);
errors.add(
ActionErrors.GLOBAL_ERROR,
new ActionError(
"errors.dynamic",
"<li>File type not supported!!"));
}
}
}
else {
}
if (retVal) {
}
if (errors.isEmpty())
{
***FileInputStream inputStream=new FileInputStream(uploadFilePath + saveFile);
Workbook w = Workbook.getWorkbook(inputStream);
Sheet read_sheet = w.getSheet(0);***
int rows=read_sheet.getRows();
for (int j=1;j<rows;j++){
String[] fields=new String[4];
for(int i=0;i<fields.length;i++)
{
Cell cell=read_sheet.getCell(i,j);
fields[i]=cell.getContents().trim();
if(fields[0] != null ){
try{
if (fields[i].indexOf(".") > 0) {
if (i != 1) {
fields[i] = fields[i];
}else {
fields[i].substring(0, fields[i].indexOf("."));
}
}
}
catch(Exception e1)
{
int k=i+1;
errorVector.add("Invalid data at field "+k);
System.out.println(" err1 :" + e1.toString());
errorId = 1;
}
}
}
if(!(fields[0] == null || (fields[0].trim().equals(""))))
{
rowVector.add(fields);
}
else{
break;
}
}
uploadObject.setErrorId(errorId);
uploadObject.setErrorVector(errorVector);
uploadObject.setRowVector(rowVector);
errorId = uploadObject.getErrorId();
rowVector = uploadObject.getRowVector();
errorVector = uploadObject.getErrorVector();
if (errorId == 0) {
int row = 0;
if (rowVector.size() > 0) {
for (int i = 0; i < rowVector.size(); i++) {
IndexValueReportObject indexValueReportObj =
new IndexValueReportObject();
String[] fields =
rowVector.elementAt(i);
System.out.println("abc>>>>>"+fields[0]);
System.out.println("abc>>>>>>>>>>>>>"+fields[1]);
System.out.println("abc>>>>>>>>>>>>>>>>>"+fields[2]);
System.out.println("abc>>>>>>>>>>>>>>>>>>>>>>>"+fields[3]);
try{
indexValueReportObj
.setDate(
Timestamp.valueOf(fields[0]));
}catch (Exception e1)
{
row = i + 1;
vector.add("Invalid Date at row " + (row+1));
}
try{
indexValueReportObj
.setPlatts(
new BigDecimal(fields[1]));
}catch (Exception e1)
{
row = i + 1;
vector.add("Invalid Platts Value at row " + (row+1));
}
try{
indexValueReportObj
.setArgus(
new BigDecimal(fields[2]));
}catch (Exception e1)
{
row = i + 1;
vector.add("Invalid Argus Value at row " + (row+1));
}
try{
indexValueReportObj
.setTsi(
new BigDecimal(fields[3]));
}catch (Exception e1)
{
row = i + 1;
vector.add("Invalid TSI value at row " + (row+1));
}
indexObjectVector.add(
indexValueReportObj);
}
} else {
vector.add(
"No Bidder is created in the excel file ");
}
} else {
for (int k = 0; k < errorVector.size(); k++) {
String errorDescription =
(String) errorVector.get(k);
vector.add(errorDescription);
}
}
}
} catch (Exception e1) {
errorId = uploadObject.getErrorId();
rowVector = uploadObject.getRowVector();
errorVector = uploadObject.getErrorVector();
for (int k = 0; k < errorVector.size(); k++) {
String errorDescription =
(String) errorVector.get(k);
vector.add(errorDescription);
}
}
String filePathAndName=uploadFilePath + saveFile;
System.out.println("indexObjectVector"+indexObjectVector);
request.setAttribute(
IConstant.INDEX_OBJECT_VECTOR,
indexObjectVector);
request.getSession().setAttribute(
IConstant.FILE_NAME,
filePathAndName);
request.setAttribute(IConstant.UPLOAD_MESSAGE, "uploaded");
} catch (Exception e1) {
vector.add("File name or sheet name error ");
}
} else {
vector.add("File Type mismatch ");
}
} catch (Exception e) {
vector.add("No excel has been selected ");
}
if (!errors.isEmpty()) {
request.setAttribute(IConstant.ERROR_VECTOR, errors);
forward = mapping.findForward("failureUpload");
} else {
forward = mapping.findForward("success");
}
return forward;
}
}
In Workbook portion I want to read the excel file from local directory. When I run this code in debug mode, the control is going into exception block and forward it in my success page. So can some one please help me with this? I put sysout in that portion but nothing shows. I am using JXl for excel read and write. My confusion is in 3 star portion.
Not an answer, but a partial refactoring.
This is an abomination of an action. There is zero way this should have ever passed any code review:
Actions and business logic should be completely separate.
Methods should be responsible for a single responsibility. This method:
Checks content type (arguably reasonable since that's web-related)
Manually parses the form (inexcusable; use a library)
Processes an Excel file (inexcusable; extract) which in turn:
Validates against business logic
Converts Excel values to BO values
The amount of un-equivalent work the action does defies explanation. This makes it essentially impossible to reason about, debug, fix, or anything. The below is a very rough start of refactoring, but you have another day or two of work to properly isolate levels of functionality and error handling (which is wrong, by the way, because you never check vector, possibly the worst-named variable in history, for errors, it's just thrown away?!?!?!)
public class IndexValueAction extends EPSBaseAction {
public IndexValueAction() {
super();
}
boolean isValidContentType(contentType) {
return (contentType != null) && (contentType.indexOf("multipart/form-data") >= 0)
}
String readContent(request) throws Exception {
DataInputStream in = new DataInputStream(request.getInputStream());
int formDataLength = request.getContentLength();
byte dataBytes[] = new byte[formDataLength];
int totalBytesRead = 0;
while (totalBytesRead < formDataLength) {
totalBytesRead += in.read(dataBytes, totalBytesRead, formDataLength);
}
return new String(dataBytes);
}
void scanFile(realPath, outFilePath, errors) throws Exception {
if (!FileScanner.loadProperty(realPath)) {
return false;
}
if (!FileScanner.cleanFile(outFilePath)) {
FileScaner.cleanFile(outFilePath);
errors.add(ActionErrors.GLOBAL_ERROR, new ActionError("errors.dynamic", "<li>Virus found!!"));
return;
}
if (!FileScanner.checkFileSign(outFilePath)) {
FileScaner.cleanFile(filepath);
errors.add(ActionErrors.GLOBAL_ERROR, new ActionError("errors.dynamic", "<li>File type not supported!!"));
}
}
IndexValueReportObject loadIndexValueReportObject(String[] fields, int i, Vector<String> vector) {
IndexValueReportObject indexValueReportObj = new IndexValueReportObject();
try {
indexValueReportObj.setDate(Timestamp.valueOf(fields[0]));
} catch (Exception e) {
vector.add("Invalid Date at row " + (i + 2));
}
try {
indexValueReportObj.setPlatts(new BigDecimal(fields[1]));
} catch (Exception e) {
vector.add("Invalid Platts Value at row " + (i + 2));
}
try {
indexValueReportObj.setArgus(new BigDecimal(fields[2]));
} catch (Exception e) {
vector.add("Invalid Argus Value at row " + (i + 2));
}
try {
indexValueReportObj.setTsi(new BigDecimal(fields[3]));
} catch (Exception e) {
vector.add("Invalid TSI value at row " + (i + 2));
}
return indexValueReportObj;
}
#Override
#SuppressWarnings("unchecked")
public ActionForward execute(ActionMapping mapping, ActionForm form, HttpServletRequest request, HttpServletResponse response) throws Exception {
Vector<String> vector = new Vector<String>();
if (!isValidContentType(request.getContentType())) {
vector.add("File Type mismatch");
return mapping.findForward("failureUpload");
}
String file;
try {
file = readContent(request);
} catch (Exception e) {
vector.add("No excel has been selected");
return mapping.findForward("failureUpload");
}
ActionErrors errors = new ActionErrors();
try {
String saveFile = file.substring(file.indexOf("filename=\"") + 10);
saveFile = saveFile.substring(0, saveFile.indexOf("\n"));
saveFile = saveFile.substring(saveFile.lastIndexOf("\\") + 1, saveFile.indexOf("\""));
int lastIndex = contentType.lastIndexOf("=");
String boundary = contentType.substring(lastIndex + 1, contentType.length());
int pos = file.indexOf("filename=\"");
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
pos = file.indexOf("\n", pos) + 1;
int boundaryLocation = file.indexOf(boundary, pos) - 4;
int startPos = ((file.substring(0, pos)).getBytes()).length;
int endPos = file.substring(0, boundaryLocation).getBytes().length;
Vector<IndexValueReportObject> indexObjectVector = new Vector<IndexValueReportObject>();
SimpleDateFormat df = new SimpleDateFormat("dd-MM-yy hh-mm-ss");
String fileSuffix = "-" + df.format(new Date()) + ".xls";
saveFile = saveFile.substring(0, saveFile.indexOf(".")) + fileSuffix;
ServletContext sc1 = request.getSession().getServletContext();
Properties properties = new Properties();
String realPath1 = sc1.getRealPath("serverPath.properties");
FileInputStream fis = new FileInputStream(realPath1);
properties.load(fis);
String uploadFilePath = properties.getProperty("fileUpload.path");
FileOutputStream fileOut = new FileOutputStream(uploadFilePath + saveFile);
System.out.println(uploadFilePath);
if (endPos > dataBytes.length) {
endPos = dataBytes.length;
}
fileOut.write(dataBytes, startPos, (endPos - startPos));
fileOut.flush();
fileOut.close();
Vector<String []> rowVector = new Vector<String[]>();
Vector<Object> errorVector = new Vector<Object>();
UploadObject uploadObject = new UploadObject();
int errorId = 0;
try {
ServletContext sc = getServlet().getServletContext();
String realPath = sc.getRealPath("virusCheck.properties");
File outFile = new File(uploadFilePath + saveFile);
String outFilePath = outFile.getAbsolutePath();
scanFile(realPath, outFilePath, errors);
if (errors.isEmpty()) {
FileInputStream inputStream = new FileInputStream(uploadFilePath + saveFile);
Workbook w = Workbook.getWorkbook(inputStream);
Sheet read_sheet = w.getSheet(0);***
int rows = read_sheet.getRows();
for (int j = 1; j < rows; j++) {
String[] fields = new String[4];
for(int i = 0; i < fields.length; i++) {
Cell cell = read_sheet.getCell(i, j);
fields[i] = cell.getContents().trim();
if (fields[0] != null) {
try {
if (fields[i].indexOf(".") > 0) {
if (i != 1) {
fields[i] = fields[i];
} else {
fields[i].substring(0, fields[i].indexOf("."));
}
}
} catch(Exception e1) {
int k = i+1;
errorVector.add("Invalid data at field " + k);
errorId = 1;
}
}
}
if (!(fields[0] == null || (fields[0].trim().equals("")))) {
rowVector.add(fields);
} else {
break;
}
}
uploadObject.setErrorId(errorId);
uploadObject.setErrorVector(errorVector);
uploadObject.setRowVector(rowVector);
if (errorId == 0) {
if (rowVector.size() > 0) {
for (int i = 0; i < rowVector.size(); i++) {
String[] fields = rowVector.elementAt(i);
indexObjectVector.add(loadIndexValueReportObject(loadIndexValueReportObject(fields, i, vector)));
}
} else {
vector.add("No Bidder is created in the excel file ");
}
} else {
for (int k = 0; k < errorVector.size(); k++) {
String errorDescription = (String) errorVector.get(k);
vector.add(errorDescription);
}
}
}
} catch (Exception e1) {
errorId = uploadObject.getErrorId();
rowVector = uploadObject.getRowVector();
errorVector = uploadObject.getErrorVector();
for (int k = 0; k < errorVector.size(); k++) {
String errorDescription = (String) errorVector.get(k);
vector.add(errorDescription);
}
}
request.setAttribute(IConstant.UPLOAD_MESSAGE, "uploaded");
request.setAttribute(IConstant.INDEX_OBJECT_VECTOR, indexObjectVector);
request.getSession().setAttribute(IConstant.FILE_NAME, uploadFilePath + saveFile);
} catch (Exception e1) {
vector.add("File name or sheet name error ");
}
if (!errors.isEmpty()) {
request.setAttribute(IConstant.ERROR_VECTOR, errors);
return mapping.findForward("failureUpload");
}
return mapping.findForward("success");
}
}
So I have a LWJGL program that loads config files for entities and when I load a 29kb file the ram is increased from normal by about 200mb(total of 300mb) I don't know what to do about this or why it is happening.
Here is my full file parsing code:
FileReader isr = null;
File file = new File(CONTAINER + file_name + BMEF);
String line = null;
try {
isr = new FileReader(file);
} catch (FileNotFoundException e) {
System.err.println("No BMEF file found in the given context of; BMEF:" + BMEF + ", CONTAINER:" + CONTAINER + ", FILE NAME:" + file_name + ", FULL DIRECTORY USED:" + file);
}
BufferedReader reader = new BufferedReader(isr);
strings = new HashMap<String,String>();
float_arrays = new HashMap<String,float[]>();
int_arrays = new HashMap<String,int[]>();
try {
String name = null;
String current = "";
short type = -1;
short stage = 0;
while (true) {
System.out.println(current);
line = reader.readLine();
if(line == null){
break;
}
for(char c : line.toCharArray()){
current += c;
if(stage == 0){
if(c == ';'){
current = current.substring(0, current.length()-1);
name = current;
current = "";
stage++;
}
}else if(stage == 1){
if(c == '<'){
current = current.substring(0, current.length()-1);
if(current.equals(STRING)){
type = 0;
}else if(current.equals(INT_ARRAY)){
type = 1;
}else if(current.equals(FLOAT_ARRAY)){
type = 2;
}else{
System.err.println("NO SUCH TYPE: '" + current + "'");
System.exit(-1);
}
current = "";
stage++;
}
}else if(stage == 2){
if(c == '>'){
current = current.substring(0, current.length()-1);
if(type == 0){
strings.put(name, current);
}else if(type == 1){
String[] string_arr = current.split(",");
int[] out = new int[string_arr.length];
for(int i = 0; i < string_arr.length; i++){
out[i] = Integer.parseInt(string_arr[i]);
}
int_arrays.put(name, out);
}else if(type == 2){
String[] string_arr = current.split(",");
float[] out = new float[string_arr.length];
for(int i = 0; i < string_arr.length; i++){
out[i] = Float.parseFloat(string_arr[i]);
}
float_arrays.put(name, out);
}
name = null;
current = "";
type = -1;
stage = 0;
}
}
}
}
} catch (IOException e) {
System.err.println("An error was encounter whilst reading " + file_name + '.' + BMEF + "the line in use durring this encounter was:'" + line + "'");
}
I am trying to implement luecene search engine in my application.
I am using lucene 5.4.1
I have successfully implemented wildequeries and normal queries of lucene.
But my main focus is to search specific text in a text file with regex patterns.
Index Writer code:
public IndexWriter generateIndex(String docsPath) throws IOException {
String indexPath = System.getProperty("java.io.tmpdir") +File.separator+"indexDirectory";
if (indexPath == null) {
throw new IOException("System property 'java.io.tmpdir' does not specify a tmp dir");
}
File tmpDir = new File(indexPath);
if (!tmpDir.exists()) {
boolean created = tmpDir.mkdirs();
if (!created) {
throw new IOException("Unable to create tmp dir " + tmpDir);
}
}
boolean create = true;
final Path docDir = Paths.get(docsPath);
if (!Files.isReadable(docDir)) {
System.out.println("Document directory '" + docDir.toAbsolutePath()
+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(Paths.get(indexPath));
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
IndexWriter writer = new IndexWriter(dir, iwc);
indexDocs(writer, docDir);
setIndexWriter(writer);
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
writer.close();
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
}
return getIndexWriter();
}
static void indexDocs(final IndexWriter writer, Path path) throws IOException {
if (Files.isDirectory(path)) {
Files.walkFileTree(path, new SimpleFileVisitor<Path>() {
#Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException {
try {
indexDoc(writer, file, attrs.lastModifiedTime().toMillis());
} catch (IOException ignore) {
// don't index files that can't be read.
}
return FileVisitResult.CONTINUE;
}
});
} else {
indexDoc(writer, path, Files.getLastModifiedTime(path).toMillis());
}
}
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
try (InputStream stream = Files.newInputStream(file)) {
Document doc = new Document();
Field pathField = new StringField("path", file.toString(), Field.Store.NO);
doc.add(pathField);
doc.add(new LongField("modified", lastModified, Field.Store.NO));
doc.add(new TextField("contents",
new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.toString()), doc);
}
}
}
Index Searching Code:
public IndexReader searchExecutor(String index, String queryString, RegexCapabilities capability) throws Exception {
String field = "contents";
String queries = null;
boolean raw = false;
int hitsPerPage = Integer.MAX_VALUE;
IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
IndexSearcher searcher = new IndexSearcher(reader);
Analyzer analyzer = new StandardAnalyzer();
BufferedReader in = null;
Query q = new RegexpQuery(new Term("text", queryString));
q = q.rewrite(reader);
RegexQuery query = new RegexQuery(new Term("\\s*(FIND|find)"));
if (capability != null)
query.setRegexImplementation(capability);
System.out.println("Searching for: " + query.toString(field));
searcher.search(query, null, 1000);
doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null);
//reader.close();
return reader;
}
public static void doSearch(BufferedReader in, IndexSearcher searcher, Query query, int hitsPerPage, boolean raw,
boolean interactive)
throws IOException {
TopDocs results = searcher.search(query, 5 * hitsPerPage);
ScoreDoc[] hits = results.scoreDocs;
//generateIndex.deleteDocuments(query);
//generateIndex.getDirectory();
// TermsEnum.totalTermFreq();
int numTotalHits = results.totalHits;
System.out.println(numTotalHits + " total matching documents");
int start = 0;
int end = Math.min(numTotalHits, hitsPerPage);
for (int i = start; i < end; i++) {
Document doc = searcher.doc(hits[i].doc);
String path = doc.get("path");
File file = new File(path);
if (path != null) {
System.out.println((i + 1) + ". " + path);
String title = doc.get("title");
if (title != null) {
System.out.println(" Title: " + doc.get("title"));
}
} else {
System.out.println((i + 1) + ". " + "No path for this document");
}
}
}
Please help.
Your question is about search with regular expressions in lucene.
You are using RegexQuery which is deprecated so try RegexpQuery
Your regEx-example starts with \s* but you do not use KeywordTokenizer. Most other tokenizer will remove (aka "split at") whitespace
Your regEx-example is not purely lower case. But standard analyzer contains LowerCaseFilter. Be aware: your regEx will go directly against the tokens of your index (not against the original text)
--> read Supported RegExp syntax and syntax in ES and TestRegexpRandom (test class) and play with https://github.com/DmitryKey/luke on your index.
The below code is for getting tf-idf value from indexes. But I get an error while running it, on the line with Correct_ME.
Using Lucene 4.8.
DocIndexing.java
public class DocIndexing {
private DocIndexing() {}
/** Index all text files under a directory.
* #param args
* #throws java.io.IOException */
public static void main(String[] args) throws IOException {
String usage = "java org.apache.lucene.demo.IndexFiles"
+ " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
+ "This indexes the documents in DOCS_PATH, creating a Lucene index"
+ "in INDEX_PATH that can be searched with Searching";
String indexPath = "C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/Index";
String docsPath = "C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/ToBeIndexed";
boolean create = true;
for(int i=0;i<args.length;i++) {
if (null != args[i]) switch (args[i]) {
case "-index":
indexPath = args[i+1];
i++;
break;
case "-docs":
docsPath = args[i+1];
i++;
break;
case "-update":
create = false;
break;
}
}
if (docsPath == null) {
System.err.println("Usage: " + usage);
System.exit(1);
}
final File docDir = new File(docsPath);
if (!docDir.canRead() && !docDir.isDirectory() &&
!docDir.isHidden() &&
!docDir.exists()) {
System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
System.exit(1);
}
Date start = new Date();
try {
System.out.println("Indexing to directory '" + indexPath + "'...");
Directory dir = FSDirectory.open(new File(indexPath));
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
//Filter filter = new PorterStemFilter();
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);
if (create) {
iwc.setOpenMode(OpenMode.CREATE);
} else {
// Add new documents to an existing index:
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
}
try (
IndexWriter writer = new IndexWriter(dir, iwc)) {
indexDocs(writer, docDir);
}
Date end = new Date();
System.out.println(end.getTime() - start.getTime() + " total milliseconds");
} catch (IOException e) {
System.out.println(" caught a " + e.getClass() +
"\n with message: " + e.getMessage());
}
Tf_Idf tfidf = new Tf_Idf();
String field = null,term = null;
tfidf.scoreCalculator(field, term);
}
/*
* #param writer Writer to the index where the given file/dir info will be stored
* #param file The file to index, or the directory to recurse into to find files to index
* #throws IOException If there is a low-level I/O error
*/
static void indexDocs(IndexWriter writer, File file)
throws IOException {
// do not try to index files that cannot be read
if (file.canRead()) {
if (file.isDirectory()) {
String[] files = file.list();
// an IO error could occur
if (files != null) {
for (int i = 0; i < files.length; i++) {
indexDocs(writer, new File(file, files[i]));
}
}
} else {
FileInputStream fis;
try {
fis = new FileInputStream(file);
} catch (FileNotFoundException fnfe) {
return;
}
try {
// make a new, empty document
Document doc = new Document();
// Field termV = new LongField("termVector", file.g)
Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
doc.add(pathField);
Field modifiedField = new LongField("modified", file.lastModified(), Field.Store.NO);
doc.add(modifiedField);
Field titleField = new TextField("title", file.getName(), Field.Store.YES);
doc.add(titleField);
Field contentsField = new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)));
doc.add(contentsField);
//contentsField.setBoost((float)0.5);
//titleField.setBoost((float)2.5);
/* doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
doc.add(new TextField("title", file.getName(), Field.Store.YES));
doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));
*/
// StringField..setBoost(1.2F);
if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
// New index, so we just add the document (no old document can be there):
System.out.println("adding " + file);
writer.addDocument(doc);
} else {
// Existing index (an old copy of this document may have been indexed) so
// we use updateDocument instead to replace the old one matching the exact
// path, if present:
System.out.println("updating " + file);
writer.updateDocument(new Term("path", file.getPath()), doc);
}
} finally {
fis.close();
}
}
}
}
}
Tf-idf.java
public class Tf_Idf {
static float tf = 1;
static float idf = 0;
private float tfidf_score;
static float [] tfidf = null;
IndexReader indexReader;
public Tf_Idf() throws IOException {
this.indexReader = DirectoryReader.open(FSDirectory.open(new File("C:/Users/dell/Documents/NetBeansProjects/IndexingSearching/Index")));
}
public void scoreCalculator (String field, String term) throws IOException
{
TFIDFSimilarity tfidfSIM = new DefaultSimilarity();
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
TermsEnum termEnum = MultiFields.getTerms(indexReader, field).iterator(null);
BytesRef bytesRef=null;
while ((bytesRef = termEnum.next()) != null) {
if(bytesRef.utf8ToString().trim().equals(term.trim())) {
if(termEnum.seekExact(bytesRef)) {
idf = tfidfSIM.idf(termEnum.docFreq(), indexReader.numDocs());
DocsEnum docsEnum = termEnum.docs(liveDocs, null);
if(docsEnum != null) {
int doc=0;
while((doc = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
tf = tfidfSIM.tf(docsEnum.freq());
tfidf_score = tf * idf ;
System.out.println(" -tfidf_score-" + tfidf_score);
}
}
}
}
}
}
}
It's obvious that you pass to MultiFields method a null IndexReader
IndexReader reader = null;
tfidf.scoreCalculator( reader, field,term);
You need to write something like this:
IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(PATH_TO_LUCENE_INDEX)));
tfidf.scoreCalculator( reader, field,term);
You need to repalce PATH_TO_LUCENE_INDEX with real path, of course.
Another problem, that I see - you open IndexReader in Tf_Idf, but don't use it anywhere, may be it's a good idea to remove it or use it, inside of scoreCalculator method, e.g.
tfidf.scoreCalculator(field,term);
but in method use field of this class, - this.indexReader instead of just indexReader that you try to pass inside method scoreCalculator
UPD
public Tf_Idf() throws IOException {
this.reader = DirectoryReader.open(FSDirectory.open(new File("Index")));
}
In this code, you need to replace "Index" with real path to your Lucene index, e.g. - /home/user/index or C://index or wherever you have it.
This code gets the file name, but I want to get the file path:
private List <String> checkFiles(FTPClient clients){
List <String> it = new ArrayList <String>();
try {
FTPFile[] ftpFiles = clients.listFiles();
int length = ftpFiles.length;
for (int i = 0; i < length; i++) {
String name = ftpFiles[i].getName();
Calendar date = ftpFiles[i].getTimestamp();
Log.v("aasd", name );
it.add (name);
}
} catch(Exception e) {
e.printStackTrace();
}
return it ;
}
The path is in the client, not the files.
String path = clients.printWorkingDirectory()
if you want specific path
client.changeWorkingDirectory(PathName) eg client.changeWorkingDirectory(folder1/folder2) where folder 2 is inside folder 1
System.out.println(client.printWorkingDirectory)
printWorkingDirectory gives the current path
Below code finds that all files path in any folder on ftp server.
ftpPath is likes that "ftpserver/folder". List contains paths of all files in folder.
public List<string> GetFilesPath(string ftpPath)
{
FtpWebRequest request;
string FtpServerPath = ftpPath;
List<string> filePathList=new List<string>();
try
{
request = WebRequest.Create(new Uri(FtpServerPath)) as FtpWebRequest;
request.Method = WebRequestMethods.Ftp.ListDirectoryDetails;
request.UseBinary = true;
request.UsePassive = true;
request.KeepAlive = true;
request.Credentials = new NetworkCredential("ftpuser", "ftpPassword");
request.ConnectionGroupName = "group";
Stream rs = (Stream)request.GetResponse().GetResponseStream();
StreamReader sr = new StreamReader(rs);
string strList = sr.ReadToEnd();
string[] lines = null;
if (strList.Contains("\r\n"))
{
lines = strList.Split(new string[] { "\r\n" }, StringSplitOptions.None);
}
else if (strList.Contains("\n"))
{
lines = strList.Split(new string[] { "\n" }, StringSplitOptions.None);
}
if (lines == null || lines.Length == 0)
return null;
else{
foreach (string line in lines)
{
if (line.Length == 0)
continue;
int x=line.LastIndexOf(' ');
int len = line.Length;
var str = line.Substring( (x+1), (len - x - 1));
var filePath = FtpServerPath+"/"+str;
filePathList.Add(filePath);
}
}
catch (Exception ex)
{
MessageBox.Show("Error: " + ex.Message);
}
}