Lucene 入門樣本

最後更新：2018-12-04 來源：互聯網

上載者：User

創建阿里雲帳戶，並獲得超過 40 款產品的免費試用版；而企業帳戶則可以享有總值 $1200 的免費試用版。立即註冊！

通過敲寫著連個例子，大概瞭解了lucene 的核心類以及主要api 的功能。

package Demo;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field;import org.apache.lucene.document.LongField;import org.apache.lucene.document.StringField;import org.apache.lucene.document.TextField;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.index.IndexWriterConfig.OpenMode;import org.apache.lucene.index.Term;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;public class IndexFiles {private IndexFiles(){}/**Index all text files under a directory **//** * @param args */public static void main(String[] args) {// TODO Auto-generated method stub String usage = "java org.apache.lucene.demo.IndexFiles"                  + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"                  + "This indexes the documents in DOCS_PATH, creating a Lucene index"                  + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index";  String docsPath = args[0];  boolean create = true;  if(docsPath==null) { System.err.println("input the docsPath"); System.exit(1); }  final File docDir = new File(docsPath);  if(!docDir.exists()||!docDir.canRead()) { System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");     System.exit(1); }  Date start = new Date(); try{// System.out.println("Indexing to directory '") Directory dir =FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);  IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);  if(create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); writer.close(); Date end = new Date(); System.out.println(end.getTime()-start.getTime() + "total milliseconds");  }catch(IOException e) { System.out.println("caught a "+ e.getClass()+ "\n with message:" + e.getMessage()); } }/** * do not try to index files that cannot be read * @throws IOException  */static void indexDocs(IndexWriter writer,File file) throws IOException{if(file.canRead()){    if(file.isDirectory())    {    String[] files = file.list();    if(files != null){    for(int i = 0;i < files.length;i++)    {    indexDocs(writer,new File(file,files[i]));    }    }    }  else{    FileInputStream fis;    fis = new FileInputStream(file);        try{    //make a new ,empty document    Document doc = new Document();        Field pathField = new StringField("path", file.getPath(), Field.Store.YES);        doc.add(pathField);                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));        doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis,"UTF-8"))));                if(writer.getConfig().getOpenMode()==OpenMode.CREATE)        {        //new index so we just add the document (no old document can be there)        System.out.println("adding " + file);        writer.addDocument(doc);        }        else         {        //Existing index (an old copy of the document may have been indexed)        // so we use updataDocument instead to replace the old one matching         //the exact path,if present        System.out.println("updating " + file);        writer.updateDocument(new Term("path", file.getPath()), doc);                }        }finally{        fis.close();        }    }}       }}

package Demo;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.IOException;import java.io.InputStreamReader;import java.nio.Buffer;import java.util.Date;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.xml.sax.InputSource;public class Searchfiles {/** * @param args * @throws IOException  * @throws ParseException  */public static void main(String[] args) throws IOException, ParseException {// TODO Auto-generated method stubString index = "index";String field = "contents";String queries = null;int repeat = 0;boolean raw = false;String queryString = null;int hitsPerPage = 10;// 開啟索引所在的檔案夾IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));//根據indexReader 開啟的索引檔案 建立檢索IndexSearcher searcher = new IndexSearcher(reader);Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);BufferedReader in = null;if(queries != null){in = new BufferedReader(new InputStreamReader(new FileInputStream(queries),"UTF-8"));}else {in = new BufferedReader(new InputStreamReader(System.in,"UTF-8"));}QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer);while(true){if(queries == null && queryString == null){System.out.println("Enter query: ");}String line = queryString !=null ? queryString : in.readLine();if(line==null || line.length()== -1)break;line = line.trim();if(line.length()==0)break;Query query = parser.parse(line);System.out.println("Searching for : " + query.toString(field));if(repeat > 0) //repeat & time as benchmark{Date start = new Date();for(int i =0 ;i < repeat;i++)searcher.search(query, null,100);Date end = new Date();System.out.println("Time: + " +(end.getTime() - start.getTime()) + "ms.");}doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString==null);if(queryString == null)break;}}public static void doPagingSearch(BufferedReader in , IndexSearcher searcher,     Query query,int hitsPerPage,boolean raw ,boolean interactive ) throws IOException{// Collect enough docs to show 5 pagesTopDocs results = searcher.search(query, 5*hitsPerPage);ScoreDoc[] hits =results.scoreDocs;int numTotalHits = results.totalHits;System.out.println(numTotalHits + " total matching documents");int start = 0;int end = Math.min(numTotalHits, hitsPerPage);while(true){if(end > hits.length){System.out.println("Only results 1 - "+ hits.length+ "  of" + numTotalHits + "total matching documents collected .");System.out.println("Collect more (y/n)?");String line = in.readLine();if(line.length()==0||line.charAt(0)=='n'){break;}hits =searcher.search(query, numTotalHits).scoreDocs;}end = Math.min(hits.length, start+hitsPerPage);for(int i = start; i < end ;i++){if(raw)  // output raw format{System.out.println("doc="+hits[i].doc + " score= " + hits[i].score);continue;}Document doc = searcher.doc(hits[i].doc);String path = doc.get("path");if(path!=null){System.out.println((i+1)+"."+path);String title = doc.get("title");if(title!=null){System.out.println("  Title:" + doc.get("title"));}}else{System.out.println((i+1) + "." + "No path for this document");}}if(!interactive || end==0){break;}if(numTotalHits >= end){boolean quit = false;while(true){System.out.print("Press ");if(start - hitsPerPage >=0){System.out.print("<p>revious page, ");}if(start + hitsPerPage < numTotalHits){System.out.print("(n)ext page, ");}System.out.print("(q) uit or enter number to jump to a page.");String line = in.readLine();if(line.length()==0||line.charAt(0)=='q'){quit = true;break;}if(line.charAt(0)=='p'){start = Math.max(0, start - hitsPerPage);break;}else if(line.charAt(0)=='n'){if(start+hitsPerPage < numTotalHits)start+=hitsPerPage;break;}else{int  page = Integer.parseInt(line);if((page - 1)*hitsPerPage  < numTotalHits){start = (page -1 )*hitsPerPage;break;}else {System.out.println("No such page!");}}}if(quit)break;end = Math.min(numTotalHits, start+hitsPerPage);}}}}

本文章原先以中文撰寫並發佈於 aliyun.com，亦設英文版本，僅作資訊用途。本網站不對文章的準確性，完整性或可靠性或其任何翻譯作出任何明示或暗示的陳述或保證。如對該文章有任何疑慮或投訴，請傳送電郵至 info-contact@alibabacloud.com 並提供相關疑慮或投訴的詳細說明。職員會於 5 個工作天內與您聯絡，一經驗證之後，即會刪除該侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Lucene 入門樣本

聯繫我們

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support