標籤:mod ram append com [] exists 字元 null director
其實接下來就是貼一下代碼,熟悉一下Lucene的正常工作流程,或者說怎麼使用這個API,更深層次的東西這篇文章不會講到。
上一篇文章也說了maven的配置,只要你電腦連網就可以下載下來。我貼一下代碼。
package com.muyi.lucene.mavenlucene.Ltest;import java.io.BufferedReader;import java.io.File;import java.io.FileInputStream;import java.io.FileReader;import java.nio.file.FileSystems;import java.util.ArrayList;import java.util.Date;import java.util.List;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.usermodel.Range;import jxl.Cell;import jxl.Sheet;import jxl.Workbook;/** * @author xinghl * */public class IndexManager2{ private static IndexManager indexManager; private static String content=""; private static String INDEX_DIR = "D:\\luceneIndex"; private static String DATA_DIR = "D:\\luceneData"; private static Analyzer analyzer = null; private static Directory directory = null; private static IndexWriter indexWriter = null; /** * 建立索引管理器 * @return 返回索引管理器對象 */ public IndexManager getManager(){ if(indexManager == null){ this.indexManager = new IndexManager(); } return indexManager; } /** * 建立當前檔案目錄的索引 * @param path 當前檔案目錄 * @return 是否成功 */ public static boolean createIndex(String path){ Date date1 = new Date(); List<File> fileList = getFileList(path); for (File file : fileList) { content = ""; //擷取檔案尾碼 String type = file.getName().substring(file.getName().lastIndexOf(".")+1); if("txt".equalsIgnoreCase(type)){ content += txt2String(file); System.out.println("檔案名稱字:"+file.getPath()+"檔案內容"+content); }else if("doc".equalsIgnoreCase(type)){ content += doc2String(file); System.out.println("檔案名稱字:"+file.getPath()+"檔案內容"+content); }else if("xls".equalsIgnoreCase(type)){ content += xls2String(file); System.out.println("檔案名稱字:"+file.getPath()+"檔案內容"+content); } try{ analyzer = new StandardAnalyzer(); directory = FSDirectory.open(FileSystems.getDefault().getPath(INDEX_DIR)); File indexFile = new File(INDEX_DIR); if (!indexFile.exists()) { indexFile.mkdirs(); } IndexWriterConfig config = new IndexWriterConfig(analyzer); indexWriter = new IndexWriter(directory, config); indexWriter.deleteAll();// 清除以前的index Document document = new Document(); document.add(new TextField("filename", file.getName(), Store.YES)); document.add(new TextField("content", content, Store.YES)); document.add(new TextField("path", file.getPath(), Store.YES)); indexWriter.addDocument(document); indexWriter.commit(); closeWriter(); }catch(Exception e){ e.printStackTrace(); } content = ""; } Date date2 = new Date(); System.out.println("建立索引-----耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); return true; } /** * 讀取txt檔案的內容 * @param file 想要讀取的檔案對象 * @return 返迴文件內容 */ public static String txt2String(File file){ String result = ""; try{ FileReader fileReader = new FileReader(file); BufferedReader br = new BufferedReader(fileReader);//構造一個BufferedReader類來讀取檔案 String s = null; while((s = br.readLine())!=null){//使用readLine方法,一次讀一行 result = result + "\n" +s; } br.close(); }catch(Exception e){ e.printStackTrace(); } return result; } /** * 讀取doc檔案內容 * @param file 想要讀取的檔案對象 * @return 返迴文件內容 */ public static String doc2String(File file){ String result = ""; try{ FileInputStream fis = new FileInputStream(file); HWPFDocument doc = new HWPFDocument(fis); Range rang = doc.getRange(); result += rang.text(); fis.close(); }catch(Exception e){ e.printStackTrace(); } return result; } /** * 讀取xls檔案內容 * @param file 想要讀取的檔案對象 * @return 返迴文件內容 */ public static String xls2String(File file){ String result = ""; try{ FileInputStream fis = new FileInputStream(file); StringBuilder sb = new StringBuilder(); jxl.Workbook rwb = Workbook.getWorkbook(fis); Sheet[] sheet = rwb.getSheets(); for (int i = 0; i < sheet.length; i++) { Sheet rs = rwb.getSheet(i); for (int j = 0; j < rs.getRows(); j++) { Cell[] cells = rs.getRow(j); for(int k=0;k<cells.length;k++) sb.append(cells[k].getContents()); } } fis.close(); result += sb.toString(); }catch(Exception e){ e.printStackTrace(); } return result; } /** * 尋找索引,返回合格檔案 * @param text 尋找的字串 * @return 合格檔案List */ public static void searchIndex(String text){ Date date1 = new Date(); try{ directory = FSDirectory.open(FileSystems.getDefault().getPath("D:\\luceneIndex")); analyzer = new StandardAnalyzer(); DirectoryReader ireader = DirectoryReader.open(directory); IndexSearcher isearcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser("content", analyzer); Query query = parser.parse(text); TopDocs topDocs = isearcher.search(query, 1000); System.out.println(topDocs.totalHits); ScoreDoc[] scoreDocs = topDocs.scoreDocs; System.out.println("--------------------尋找結果-----------------------"); for (ScoreDoc scoreDoc : scoreDocs) { // 7、根據searcher和ScoreDoc對象擷取具體的Document對象 Document document = isearcher.doc(scoreDoc.doc); // 8、根據Document對象擷取需要的值 System.out.println(document.get("filename") + document.get("content") + " " + document.get("path")); } System.out.println("--------------------尋找結果-----------------------"); ireader.close(); directory.close(); }catch(Exception e){ e.printStackTrace(); } Date date2 = new Date(); System.out.println("查看索引-----耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); } /** * 過濾目錄下的檔案 * @param dirPath 想要擷取檔案的目錄 * @return 返迴文件list */ public static List<File> getFileList(String dirPath) { File[] files = new File(dirPath).listFiles(); List<File> fileList = new ArrayList<File>(); for (File file : files) { if (isTxtFile(file.getName())) { fileList.add(file); } } return fileList; } /** * 判斷是否為目標檔案,目前支援txt xls doc格式 * @param fileName 檔案名稱 * @return 如果是檔案類型滿足過濾條件,返回true;否則返回false */ public static boolean isTxtFile(String fileName) { if (fileName.lastIndexOf(".txt") > 0) { return true; }else if (fileName.lastIndexOf(".xls") > 0) { return true; }else if (fileName.lastIndexOf(".doc") > 0) { return true; } return false; } public static void closeWriter() throws Exception { if (indexWriter != null) { indexWriter.close(); } } /** * 刪除檔案目錄下的所有檔案 * @param file 要刪除的檔案目錄 * @return 如果成功,返回true. */ public static boolean deleteDir(File file){ if(file.isDirectory()){ File[] files = file.listFiles(); for(int i=0; i<files.length; i++){ deleteDir(files[i]); } } file.delete(); return true; } public static void main(String[] args){ Date date1 = new Date(); File fileIndex = new File(INDEX_DIR); if(deleteDir(fileIndex)){ fileIndex.mkdir(); }else{ fileIndex.mkdir(); } createIndex(DATA_DIR); searchIndex("蒙特內格羅洞"); Date date2 = new Date(); System.out.println("執行耗時:" + (date2.getTime() - date1.getTime()) + "ms\n"); }}
其實就是這幾部,建立閱讀器--建立索引--尋找索引--獲得結果--輸出結果。
大概就是這些流程。Lucene先到此為止。我突然想學一些其他東西。
Lucene 個人領悟 (三)