A Simple example about Full-text search based Java:lucene
Last Update:2017-02-28
Source: Internet
Author: User
Index Source code:
package Lucene;
/**
* <p>title: </p>
* <p>description: </p>
* <p>copyright:copyright (c) 2003</p>
* <p>company: </p>
* @author Shirley
* @version 1.0
*/
import org.apache.lucene.index.*;
import org.apache.lucene.analysis.*;
import java.io.*;
import org.apache.lucene.document.*;
public class Indexfiles {
//Usage:: indexfiles [index output DIRECTORY] [indexed file list] ...
public static void Main (string[] arg) throws Exception {
string[] args = new string[2];
//Index The path to store index information
Args[0] = System.getproperty ("Java.io.tmpdir", "tmp") + system.getproperty ("file.separator") + "index-1";
//To index file
args[1] = "E:\\appwork\\lucene\\rfc2047.txt";
args[2] = "E:\\appwork\\cyberoffice\\co\\sheldon Java mail.htm";
args[3] = "E:\\appwork\\lucene\\englishtest.doc";
args[4] = "E:\\appwork\\cyberoffice\\co\\xls1.xls";
args[5] = "e:\\appwork\\cyberoffice\\co\\ppt1.ppt";
String indexpath = args[0];
IndexWriter writer;
//Constructs a new write indexer with the specified language parser (the 3rd parameter indicates whether it is an append index)
writer = new IndexWriter (Indexpath, New Simpleanalyzer (), false);
for (int i=1; i<args.length; i++) {
System.out.println ("indexing file" + args[i]);
InputStream is = new FileInputStream (args[i]);
//Constructs a Document object that contains 2 field fields
//One is the path, not index, and only store
//One is the Content body field, full-text indexed, and stored
Document doc = new document ();
Doc.add (field.unindexed ("path", args[i));
Doc.add (Field.text ("Body", (Reader) New InputStreamReader (IS));
//write documents to index
Writer.adddocument (DOC);
Is.close ();
};
//Close Write indexer
Writer.close ();
}
}
Search Source code:
package Lucene;
/**
* <p>title: </p>
* <p>description: </p>
* <p>copyright:copyright (c) 2003</p>
* <p>company: </p>
* @author Shirley
* @version 1.0
*/
import org.apache.lucene.search.*;
import org.apache.lucene.queryparser.*;
import org.apache.lucene.analysis.*;
public class Search {
public static void Main (string[] arg) throws Exception {
string[] args = new string[2];
The path to which index information is stored after the index
Args[0] = System.getproperty ("Java.io.tmpdir", "tmp") + system.getproperty ("file.separator") + "index-1";
//Search keyword
Args[1] = "sending";
String indexpath = args[0];
String querystring = args[1];
//pointing to the index directory of the search engine
Searcher Searcher = new Indexsearcher (Indexpath);
//Query parser: Use and index the same language parser
query query = Queryparser.parse (QueryString, "Body", new Simpleanalyzer ());
//Search results using hits storage
Hits Hits = searcher.search (query);
//The matching degree of the data and query that can access the corresponding field through hits
for (int i=0; i<hits.length (); i++) {
System.out.println (Hits.doc (i). Get ("path") + "; Score: "+ hits.score (i));
};
}
}
Note: Currently the program only supports English index, you can filter the file type is. txt. doc. htm. xls. ppt
Indexing of
Chinese indexes and other types of files is under study ...