package com.jadyer.lucene; import java.io.File; import java.io.IOException; import java.text.SimpleDateFormat; import java.util.Date; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; /** * 【Lucene3.6.2入門系列】第06節_進階搜尋之排序 * @create Aug 19, 2013 10:38:19 AM * @author 玄玉<http://blog.csdn.net/jadyer> */public class AdvancedSearchBySort { private Directory directory; private IndexReader reader; public AdvancedSearchBySort(){ /**檔案大小*/ int[] sizes = {90, 10, 20, 10, 60, 50}; /**檔案名稱*/ String[] names = {"Michael.java", "Scofield.ini", "Tbag.txt", "Jack", "Jade", "Jadyer"}; /**檔案內容*/ String[] contents = {"my java blog is http://blog.csdn.net/jadyer", "my Java Website is http://www.jadyer.cn", "my name is jadyer", "I am a Java Developer", "I am from Haerbin", "I like java of Lucene"}; /**檔案日期*/ Date[] dates = new Date[sizes.length]; SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd HH:mm:ss"); IndexWriter writer = null; Document doc = null; try { dates[0] = sdf.parse("20130407 15:25:30"); dates[1] = sdf.parse("20130407 16:30:45"); dates[2] = sdf.parse("20130213 11:15:25"); dates[3] = sdf.parse("20130808 09:30:55"); dates[4] = sdf.parse("20130526 13:54:22"); dates[5] = sdf.parse("20130701 17:35:34"); directory = FSDirectory.open(new File("myExample/01_index/")); writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36))); writer.deleteAll(); for(int i=0; i<sizes.length; i++){ doc = new Document(); doc.add(new NumericField("size",Field.Store.YES, true).setIntValue(sizes[i])); doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.ANALYZED_NO_NORMS)); doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED)); doc.add(new NumericField("date", Field.Store.YES, true).setLongValue(dates[i].getTime())); writer.addDocument(doc); } } catch (Exception e) { e.printStackTrace(); } finally { if(null != writer){ try { writer.close(); } catch (IOException ce) { ce.printStackTrace(); } } } } /** * 擷取IndexReader執行個體 */ private IndexReader getIndexReader(){ try { if(reader == null){ reader = IndexReader.open(directory); }else{ //if the index was changed since the provided reader was opened, open and return a new reader; else,return null //如果當前reader在開啟期間index發生改變,則開啟並返回一個新的IndexReader,否則返回null // http://www.bianceng.cn IndexReader ir = IndexReader.openIfChanged(reader); if(ir != null){ reader.close(); //關閉原reader reader = ir; //賦予新reader } } return reader; }catch(Exception e) { e.printStackTrace(); } return null; //發生異常則返回null } /** * 搜尋排序 * @see 關於Sort參數的可輸入規則,如下所示 * @see 1)Sort.INDEXORDER--使用文檔編號從小到大的順序進行排序 * @see 2)Sort.RELEVANCE---使用文檔評分從大到小的順序進行排序,也是預設的定序,等價於search(query, 10) * @see 3)new Sort(new SortField("size", SortField.INT))-----------使用檔案大小從小到大的順序排序 * @see 4)new Sort(new SortField("date", SortField.LONG))----------使用檔案日期從以前到現在的順序排序 * @see 5)new Sort(new SortField("name", SortField.STRING))--------使用檔案名稱從A到Z的順序排序 * @see 6)new Sort(new SortField("name", SortField.STRING, true))--使用檔案名稱從Z到A的順序排序 * @see 7)new Sort(new SortField("size", SortField.INT), SortField.FIELD_SCORE)--先按照檔案大小排序,再按照文檔評分排序(可以指定多個定序) * @see 注意:以上7個Sort再列印文檔評分時都是NaN,只有search(query, 10)才會正確列印文檔評分 * @param expr 搜尋運算式 * @param sort 定序 */ public void searchBySort(String expr, Sort sort){ IndexSearcher searcher = new IndexSearcher(this.getIndexReader()); QueryParser parser = new QueryParser(Version.LUCENE_36, "content", new StandardAnalyzer(Version.LUCENE_36)); TopDocs tds = null; try { if(null == sort){ tds = searcher.search(parser.parse(expr), 10); }else{ tds = searcher.search(parser.parse(expr), 10, sort); } for(ScoreDoc sd : tds.scoreDocs){ Document doc = searcher.doc(sd.doc); System.out.print("文檔編號=" + sd.doc + " 文檔權值=" + doc.getBoost() + " 文檔評分=" + sd.score + " "); System.out.println("size=" + doc.get("size") + " date=" + new SimpleDateFormat("yyyyMMdd HH:mm:ss").format(new Date(Long.parseLong(doc.get("date")))) + " name=" + doc.get("name")); } } catch (Exception e) { e.printStackTrace(); } finally { if(searcher != null){ try { searcher.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 測試一下排序效果 */ public static void main(String[] args) { AdvancedSearchBySort advancedSearch = new AdvancedSearchBySort(); // //使用文檔評分從大到小的順序進行排序,也是預設的定序 // advancedSearch.searchBySort("Java", null); // advancedSearch.searchBySort("Java", Sort.RELEVANCE); // //使用文檔編號從小到大的順序進行排序 // advancedSearch.searchBySort("Java", Sort.INDEXORDER); // //使用檔案大小從小到大的順序排序 // advancedSearch.searchBySort("Java", new Sort(new SortField("size", SortField.INT))); // //使用檔案日期從以前到現在的順序排序 // advancedSearch.searchBySort("Java", new Sort(new SortField("date", SortField.LONG))); // //使用檔案名稱從A到Z的順序排序 // advancedSearch.searchBySort("Java", new Sort(new SortField("name", SortField.STRING))); // //使用檔案名稱從Z到A的順序排序 // advancedSearch.searchBySort("Java", new Sort(new SortField("name", SortField.STRING, true))); //先按照檔案大小排序,再按照文檔評分排序(可以指定多個定序) advancedSearch.searchBySort("Java", new Sort(new SortField("size", SortField.INT), SortField.FIELD_SCORE)); } }