Lucene CREATE Index
To add and revise the indexed document
Copy maven dependencies
http://mvnrepository.com/
<dependencies> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency></dependencies>
1 Creating an Index
Indexingtest.java
Package Com.matrix.lucene;import Java.nio.file.paths;import Org.apache.lucene.analysis.analyzer;import Org.apache.lucene.analysis.standard.standardanalyzer;import Org.apache.lucene.document.document;import Org.apache.lucene.document.field;import Org.apache.lucene.document.stringfield;import Org.apache.lucene.document.textfield;import Org.apache.lucene.index.indexwriter;import Org.apache.lucene.index.indexwriterconfig;import Org.apache.lucene.store.directory;import Org.apache.lucene.store.fsdirectory;import Org.junit.before;import Org.junit.test;public class IndexingTest {//test data Private String ids[] = {"1", "2", "3"}; Private String citys[] = {"Qingdao", "Nanjing", "Shanghai"}; Private String descs[] = {"Qingdao is a beautiful city.", "Nanjing are a city of culture", "Shanghai is a Bustl ing City "}; Private Directory dir; @Before public void SetUp () throws Exception {dir = Fsdirectory.open (Paths.get ("E:\\software\\lucene\\demo2")); InchDexwriter writer = getwriter (); Traverse array for (int i = 0; i < ids.length; i++) {Document doc = new document (); Storage, can improve efficiency, with space for Time Doc.add (new Stringfield ("id", Ids[i], Field.Store.YES)); Doc.add (New Stringfield ("City", Citys[i], Field.Store.YES)); Store content many times with TextField Doc.add (new TextField ("desc", Descs[i], Field.Store.NO)); Add document Operation Writer.adddocument (DOC); Write when there is cache in memory}//close Writer.close (); }/** * Description: Get IndexWriter Instance <BR> * Method Name:getwriter<br> * Creator: Matrix <BR> * Time: 2016 April 26-Morning 1:51:47 <BR> * * @return * @throws Exception * indexwriter<br> * @ex Ception <BR> * @see * @since 1.0.0 * * Private IndexWriter getwriter () throws Exception {//Create Standard Word breaker Analyzer Analyzer = new StandardAnalyzer (); Config, incoming parser instance//To beWhen the index is written to a file in the data source directory, it needs to parse/analyze indexwriterconfig IWC = new Indexwriterconfig (analyzer); IndexWriter writer = new IndexWriter (dir, IWC); return writer; }/** * Description: Test wrote several documents <BR> * Method name:testindexwriter<br> * Creator: Matrix <BR> * Time: 2016 April 26-Morning 2:08:05 <BR> * * @throws Exception * void<br> * @exception <BR> * @see * @since 1.0.0 * */@Test public void Testindexwriter () throws Exception {IndexWriter writer = Getwriter (); System.out.println ("Write" + writer.numdocs () + "documents"); Writer.close (); }}
E:\software\lucene\demo2
1.1 Using the Luke tool to view the index
Prerequisite: Install JDK1.8, and configure environment variables
2 reading a document
@Testpublic void testIndexReader() throws Exception { IndexReader reader = DirectoryReader.open(dir); System.out.println("最大文档数:" + reader.maxDoc()); System.out.println("实际文档数:" + reader.numDocs()); reader.close();}
3 Deleting a document before merging
// 第一种方式:不合并索引,删除文档,索引不动,只是做标记// 第二种方式:@Testpublic void testDeleteBeforeMerge() throws Exception { IndexWriter writer = getWriter(); System.out.println("删除前:" + writer.numDocs()); writer.deleteDocuments(new Term("id", "1")); writer.commit(); System.out.println("删除后:最大文档数即writer.maxDoc()" + writer.maxDoc()); System.out.println("删除后:实际文档数即writer.numDocs()" + writer.numDocs()); writer.close();}
4 Deleting a document after merging
@Testpublic void testDeleteAfterMerge() throws Exception { IndexWriter writer = getWriter(); System.out.println("删除前:"+writer.numDocs()); writer.deleteDocuments(new Term("id","1")); // 强制删除 writer.forceMergeDeletes(); writer.commit(); System.out.println("writer.maxDocs():"+writer.maxDoc()); System.out.println("writer.numDocs():"+writer.numDocs()); writer.close();}
Use the tool to see if the index is deleted
5 Test Updates
@Testpublic void testUpdate() throws Exception { IndexWriter writer = getWriter(); // 进行更新操作 Document doc = new Document(); doc.add(new StringField("id", "1", Field.Store.YES)); doc.add(new StringField("city", "shenzhen", Field.Store.YES)); doc.add(new TextField("city", "shenzhen is a great city", Field.Store.NO)); writer.updateDocument(new Term("id", "1"), doc); writer.close(); }
6 document Field weighting
加权之后,搜索排名会提高
Build Index
@Testpublic void index() throws Exception { // 打开索引目录,该目录存放创建的索引文件 dir = FSDirectory.open(Paths.get("E:\\software\\lucene\\demo3")); IndexWriter writer = getWriter(); // 遍历数组 for (int i = 0; i < ids.length; i++) { Document doc = new Document(); // 存储的话,能提高效率,用空间换时间 doc.add(new StringField("id", ids[i], Field.Store.YES)); doc.add(new StringField("author", authors[i], Field.Store.YES)); doc.add(new StringField("position", positions[i], Field.Store.YES)); // 使用StringField字段则不会进行分词 doc.add(new TextField("title", titles[i], Field.Store.YES)); // 存储内容很多的时候用TextField doc.add(new TextField("content", contents[i], Field.Store.NO)); // 添加文档 writer.addDocument(doc); // 写的时候在内存中有缓存 } // 关闭 writer.close();}
Inquire
@Testpublic void search() throws Exception { dir = FSDirectory.open(Paths.get("E:\\software\\lucene\\demo3")); IndexReader reader = DirectoryReader.open(dir); IndexSearcher is = new IndexSearcher(reader); // 定义要查询的字段 String seachField = "title"; // 定义要查询的值 String q = "java"; Term t = new Term(seachField, q); Query query = new TermQuery(t); TopDocs hits = is.search(query, 10); System.out.println("匹配‘" + q + "‘,总共查询到" + hits.totalHits + "个文档"); // 遍历TopDocs for (ScoreDoc scoreDoc : hits.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println("document:" + doc.get("author")); } reader.close();}
Weighted operations
Indexingtext2.java
Package Com.matrix.lucene;import Java.nio.file.paths;import Org.apache.lucene.analysis.analyzer;import Org.apache.lucene.analysis.standard.standardanalyzer;import Org.apache.lucene.document.document;import Org.apache.lucene.document.field;import Org.apache.lucene.document.stringfield;import Org.apache.lucene.document.textfield;import Org.apache.lucene.index.directoryreader;import Org.apache.lucene.index.indexreader;import Org.apache.lucene.index.indexwriter;import Org.apache.lucene.index.indexwriterconfig;import Org.apache.lucene.index.term;import Org.apache.lucene.search.indexsearcher;import Org.apache.lucene.search.query;import Org.apache.lucene.search.scoredoc;import Org.apache.lucene.search.termquery;import Org.apache.lucene.search.topdocs;import Org.apache.lucene.store.directory;import Org.apache.lucene.store.fsdirectory;import Org.junit.test;public class IndexingText2 {private String ids[] = {"1", "2 "," 3 "," 4 "}; Private String authors[] = {"Jack", "Marry", "John", "JsoN "}; Private String positions[] = {"Accounting", "Technician", "Salesperson", "Boss"}; Private String titles[] = {"Java is a good language.", "Java was a cross platform language", "java Powerful", " You should learn Java "}; Private String contents[] = {"If possible, use the same JRE major version at both index and search time.", "Wh En upgrading to a different JRE major version, consider re-indexing. "," Different JRE major versions may implement Different versions of Unicode, "," for Example:with J Ava 1.4, ' Lettertokenizer ' would split around the character u+02c6, "}; Private Directory dir; Get IndexWriter instance//Indexdir: Write index to index directory (this directory holds the index file created) go to public indexwriter getwriter () throws Exception {// Create a standard word breaker analyzer Analyzer = new StandardAnalyzer (); Configuration, incoming parser instance//to write an index to a file in the data source directory you need to parse/analyze indexwriterconfig WC = new Indexwriterconfig (analyzer); IndexWriter writer = new IndexWriter (dir, WC); return writer; }//Generate index @Test public void Index () throws Exception {//Open index directory, which holds the index file created dir = Fsdirectory.ope N (Paths.get ("E:\\software\\lucene\\demo3")); IndexWriter writer = getwriter (); Traverse array for (int i = 0; i < ids.length; i++) {Document doc = new document (); Storage, can improve efficiency, with space for Time Doc.add (new Stringfield ("id", Ids[i], Field.Store.YES)); Doc.add (New Stringfield ("Author", Authors[i], Field.Store.YES)); Doc.add (New Stringfield ("position", positions[i], Field.Store.YES)); Using the Stringfield field does not make a word breaker TextField field = new TextField ("title", Titles[i], Field.Store.YES); Weighted action if ("Boss". Equals (Positions[i])) {field.setboost (1.5f); } doc.add (field); Store content in a lot of time with TextField Doc.add (new TextField ("content", Contents[i], Field.Store.NO)); Add a document Writer.adddocument (DOC); Write when there is cache in memory}//close Writer.close (); }//Query @Test public void Search () throws Exception {dir = Fsdirectory.open (Paths.get ("E:\\software\\luce Ne\\demo3 ")); Indexreader reader = Directoryreader.open (dir); Indexsearcher is = new Indexsearcher (reader); Defines the field to query String Seachfield = "title"; Defines the value to query String q = "Java"; Term T = new term (Seachfield, q); Query query = new Termquery (t); Topdocs hits = is.search (query, 10); System.out.println ("match" + q + "', a total of query to" + hits.totalhits + "documents"); Traverse Topdocs for (Scoredoc scoreDoc:hits.scoreDocs) {Document doc = Is.doc (scoredoc.doc); System.out.println ("Document:" + doc.get ("author")); } reader.close (); public static void Main (string[] args) {}}
Json在最前面是因为权值变高了
Lucene CREATE Index