通過敲寫著連個例子,大概瞭解了lucene 的核心類以及主要api 的功能。
package Demo;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexFiles {private IndexFiles(){}/**Index all text files under a directory **//** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = args[0]; boolean create = true; if(docsPath==null) { System.err.println("input the docsPath"); System.exit(1); } final File docDir = new File(docsPath); if(!docDir.exists()||!docDir.canRead()) { System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try{// System.out.println("Indexing to directory '") Directory dir =FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer); if(create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime()-start.getTime() + "total milliseconds"); }catch(IOException e) { System.out.println("caught a "+ e.getClass()+ "\n with message:" + e.getMessage()); } }/** * do not try to index files that cannot be read * @throws IOException */static void indexDocs(IndexWriter writer,File file) throws IOException{if(file.canRead()){ if(file.isDirectory()) { String[] files = file.list(); if(files != null){ for(int i = 0;i < files.length;i++) { indexDocs(writer,new File(file,files[i])); } } } else{ FileInputStream fis; fis = new FileInputStream(file); try{ //make a new ,empty document Document doc = new Document(); Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis,"UTF-8")))); if(writer.getConfig().getOpenMode()==OpenMode.CREATE) { //new index so we just add the document (no old document can be there) System.out.println("adding " + file); writer.addDocument(doc); } else { //Existing index (an old copy of the document may have been indexed) // so we use updataDocument instead to replace the old one matching //the exact path,if present System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } }finally{ fis.close(); } }} }}
package Demo;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.nio.Buffer;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.xml.sax.InputSource;public class Searchfiles {/** * @param args * @throws IOException * @throws ParseException */public static void main(String[] args) throws IOException, ParseException {// TODO Auto-generated method stubString index = "index";String field = "contents";String queries = null;int repeat = 0;boolean raw = false;String queryString = null;int hitsPerPage = 10;// 開啟索引所在的檔案夾IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));//根據indexReader 開啟的索引檔案 建立檢索IndexSearcher searcher = new IndexSearcher(reader);Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);BufferedReader in = null;if(queries != null){in = new BufferedReader(new InputStreamReader(new FileInputStream(queries),"UTF-8"));}else {in = new BufferedReader(new InputStreamReader(System.in,"UTF-8"));}QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);while(true){if(queries == null && queryString == null){System.out.println("Enter query: ");}String line = queryString !=null ? queryString : in.readLine();if(line==null || line.length()== -1)break;line = line.trim();if(line.length()==0)break;Query query = parser.parse(line);System.out.println("Searching for : " + query.toString(field));if(repeat > 0) //repeat & time as benchmark{Date start = new Date();for(int i =0 ;i < repeat;i++)searcher.search(query, null,100);Date end = new Date();System.out.println("Time: + " +(end.getTime() - start.getTime()) + "ms.");}doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString==null);if(queryString == null)break;}}public static void doPagingSearch(BufferedReader in , IndexSearcher searcher, Query query,int hitsPerPage,boolean raw ,boolean interactive ) throws IOException{// Collect enough docs to show 5 pagesTopDocs results = searcher.search(query, 5*hitsPerPage);ScoreDoc[] hits =results.scoreDocs;int numTotalHits = results.totalHits;System.out.println(numTotalHits + " total matching documents");int start = 0;int end = Math.min(numTotalHits, hitsPerPage);while(true){if(end > hits.length){System.out.println("Only results 1 - "+ hits.length+ " of" + numTotalHits + "total matching documents collected .");System.out.println("Collect more (y/n)?");String line = in.readLine();if(line.length()==0||line.charAt(0)=='n'){break;}hits =searcher.search(query, numTotalHits).scoreDocs;}end = Math.min(hits.length, start+hitsPerPage);for(int i = start; i < end ;i++){if(raw) // output raw format{System.out.println("doc="+hits[i].doc + " score= " + hits[i].score);continue;}Document doc = searcher.doc(hits[i].doc);String path = doc.get("path");if(path!=null){System.out.println((i+1)+"."+path);String title = doc.get("title");if(title!=null){System.out.println(" Title:" + doc.get("title"));}}else{System.out.println((i+1) + "." + "No path for this document");}}if(!interactive || end==0){break;}if(numTotalHits >= end){boolean quit = false;while(true){System.out.print("Press ");if(start - hitsPerPage >=0){System.out.print("<p>revious page, ");}if(start + hitsPerPage < numTotalHits){System.out.print("(n)ext page, ");}System.out.print("(q) uit or enter number to jump to a page.");String line = in.readLine();if(line.length()==0||line.charAt(0)=='q'){quit = true;break;}if(line.charAt(0)=='p'){start = Math.max(0, start - hitsPerPage);break;}else if(line.charAt(0)=='n'){if(start+hitsPerPage < numTotalHits)start+=hitsPerPage;break;}else{int page = Integer.parseInt(line);if((page - 1)*hitsPerPage < numTotalHits){start = (page -1 )*hitsPerPage;break;}else {System.out.println("No such page!");}}}if(quit)break;end = Math.min(numTotalHits, start+hitsPerPage);}}}}