標籤:lucene查詢並高亮顯示
1.匯入jar包
650) this.width=650;" src="https://s1.51cto.com/wyfs02/M02/9B/4F/wKioL1lhjZXR8xzCAAAfzPxm9aM717.png-wh_500x0-wm_3-wmp_4-s_3480792273.png" title="QQ20170709095730.png" alt="wKioL1lhjZXR8xzCAAAfzPxm9aM717.png-wh_50" />
2.建立實體Bean
package com.zhishang.lucene;/** * Created by Administrator on 2017/7/8. */public class HtmlBean { private String title; private String content; private String url; public void setTitle(String title) { this.title = title; } public void setContent(String content) { this.content = content; } public void setUrl(String url) { this.url = url; } public String getTitle() { return title; } public String getContent() { return content; } public String getUrl() { return url; }}
3.建立工具Bean
package com.zhishang.lucene;import net.htmlparser.jericho.Element;import net.htmlparser.jericho.HTMLElementName;import net.htmlparser.jericho.Source;import org.junit.Test;import java.io.File;import java.io.IOException;/** * Created by Administrator on 2017/7/8. */public class HtmlBeanUtil { public static HtmlBean parseHtml(File file){ try { Source sc = new Source(file); Element element = sc.getFirstElement(HTMLElementName.TITLE); if (element == null || element.getTextExtractor() == null){ return null; } HtmlBean htmlBean = new HtmlBean(); htmlBean.setTitle(element.getTextExtractor().toString()); htmlBean.setContent(sc.getTextExtractor().toString()); htmlBean.setUrl(file.getAbsolutePath()); return htmlBean; } catch (IOException e) { e.printStackTrace(); } return null; }}
4.建立操作Bean
package com.zhishang.lucene;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexReader;import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;import org.apache.lucene.queryparser.classic.ParseException;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.junit.Test;import org.wltea.analyzer.lucene.IKAnalyzer;import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.Collection;import java.util.List;/** * Created by Administrator on 2017/7/7. */public class SearchIndex { public List<HtmlBean> search(String keyword){ Directory dir = null; try { dir = FSDirectory.open(new File(CreateIndex.indexDir)); IndexReader reader = DirectoryReader.open(dir); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new IKAnalyzer(); MultiFieldQueryParser multiFieldQueryParser = new MultiFieldQueryParser(Version.LUCENE_4_9,new String[]{"title","content"},analyzer); Query query = multiFieldQueryParser.parse(keyword); TopDocs search = searcher.search(query,10); ScoreDoc[] scoreDocs = search.scoreDocs; SimpleHTMLFormatter simpleHTMLFormatter = new SimpleHTMLFormatter("<font color=‘red‘>","</font>"); QueryScorer queryScorer = new QueryScorer(query,"title"); Highlighter highlighter = new Highlighter(simpleHTMLFormatter,queryScorer); List<HtmlBean> htmlBeanList = new ArrayList<HtmlBean>(); for (ScoreDoc scoreDoc:scoreDocs){ Document document = reader.document(scoreDoc.doc); String title = highlighter.getBestFragment(new IKAnalyzer(),"title",document.get("title")); String content = highlighter.getBestFragments(new IKAnalyzer().tokenStream("content",document.get("content")),document.get("content"),3,"..."); String url = document.get("url"); HtmlBean htmlBean = new HtmlBean(); htmlBean.setTitle(title); htmlBean.setContent(content); htmlBean.setUrl(url); htmlBeanList.add(htmlBean); } return htmlBeanList;// System.out.println(search.totalHits); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } catch (InvalidTokenOffsetsException e) { e.printStackTrace(); } return null; }}
4.建立測試Bean
package com.zhishang.lucene;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.util.Version;import org.junit.Test;import java.io.File;import java.util.List;/** * Created by Administrator on 2017/7/8. */public class LuceneBean { @Test public void search(){ SearchIndex searchIndex = new SearchIndex(); List<HtmlBean> htmlBeanList = searchIndex.search("java"); for (HtmlBean bean:htmlBeanList){ System.out.println(bean.getTitle()); System.out.println(bean.getContent()); System.out.println(bean.getUrl()); System.out.println("-----------------------------------------------------"); } } /* 建立索引 */ @Test public void createIndex(){ File file = new File(CreateIndex.indexDir); if (file.exists()){ file.delete(); file.mkdirs(); } CreateIndex createIndex = new CreateIndex(); createIndex.createIndex(); }}
本文出自 “素顏” 部落格,請務必保留此出處http://suyanzhu.blog.51cto.com/8050189/1945606
Lucene查詢並高亮顯示