Recently, because the company needs, fuzzy search for similar keywords, so directly consider the use of Lucene.
Lucene allows you to add search functionality to your program, and Lucene can index and search the data you parse from the text, and Lucene doesn't care about the data source or even the language, but you need to convert it to a text format. That is, you can search for HTML pages, text documents, Word documents, PDFs, or anything else, just to extract text messages. You can also use Lucene to index the data stored in the database to give your users some such as Full-text search functions, anyway, lucene is very powerful. There are a lot of open source for different languages to analyze the plug-ins and so on.
Here's an example where I'm adding an index to each row of a TXT document, which means treating each row as a document object, and virtually every document in Lucene corresponds to our library name in the database, and each field is equivalent to my The table name, it can automatically handle the text to remove some of the mood words, it can be the domain you specified as a keyword to index for the query use, Lucene is easier to use, but not as flexible as the database, fast. Let me use an example to illustrate (here I use the lucene4.7.2, the highest version, you need to pay attention to the need for some of the jar package into your project, using Maven can directly introduce dependency http://mvnrepository.com/artifact/ Org.apache.lucene I have written an example here that you can use for reference learning.
Package LUCENE.HOME.CLQ; /** * @author Chenlongquan * Copyright Manning Publications Co.
COM * * Licensed under the Apache License, Version 2.0 (the "License");
* You could not use this file, except in compliance with the License. * Obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * unless Requi Red by applicable or agreed into writing, software * Distributed under the License is distributed on a "as is" BAS
IS, * without warranties or CONDITIONS to any KIND, either express or implied.
* The License for the specific LAN *////CREATE index import org.apache.lucene.index.IndexWriter;
Import Org.apache.lucene.index.IndexWriterConfig;
Import Org.apache.lucene.analysis.Analyzer;
Import Org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer;
Import Org.apache.lucene.analysis.standard.StandardAnalyzer;
Import org.apache.lucene.document.Document; Import Org.apache.lucene.document.FielD
Import Org.apache.lucene.document.TextField;
Import Org.apache.lucene.store.FSDirectory;
Import Org.apache.lucene.store.Directory;
Import org.apache.lucene.util.Version;
Import Java.io.BufferedReader;
Import Java.io.File;
Import Java.io.FileFilter;
Import Java.io.FileInputStream;
Import java.io.IOException;
Import Java.io.FileReader;
Import Java.io.InputStreamReader;
Import Java.io.LineNumberReader;
Import java.util.ArrayList;
Import Java.util.HashSet;
Import java.util.List;
Import Java.util.Set; /** * This code is originally build for the index * */public class Indexer {public static void main (S Tring[] args) throws Exception {String Indexdir = "F:\\index"; 1 String datadir = "F:\\baidu";
2 Long start = System.currenttimemillis ();
Indexer Indexer = new Indexer (indexdir);
int numindexed; try {numindexed = Indexer.index (DataDir, New TextfilEsfilter ());
finally {indexer.close ();
Long end = System.currenttimemillis ();
System.out.println ("indexing" + Numindexed + "files took" + (End-start) + "milliseconds");
Private IndexWriter writer;
Public Indexer (String indexdir) throws IOException {Directory dir = fsdirectory.open (new File (Indexdir));
writer = new IndexWriter (Dir,indexwriterconfig ()); Debug here for indexing} public void Close () throws IOException {writer.close (); 4} Private Indexwriterconfig Indexwriterconfig () {Analyzer Analyzer = new Smartchineseanalyzer
(version.lucene_47); More highlights of this column: http://www.bianceng.cnhttp://www.bianceng.cn/webkf/tools/IndexWriterConfig config = new
Indexwriterconfig (version.lucene_47, analyzer);
return config; public int index (String datadir, FileFilter filter) throws Exception {file[] files = new File (datadir). Listfiles (); for (File f:files) {if (!f.isdirectory () &&!f.ishidden () && f.exists ()
&& F.canread () && (filter = null | | filter.accept (f))) {indexfile (f); } return Writer.numdocs (); 5} private Static class Textfilesfilter implements FileFilter {public Boolean accept (File Path) {return Path.getname (). toLowerCase ()//6. EndsWith (". txt"); 6}}/** * traverses each file and reads each line of data in the file, treating it as a document * @param f * @throws exceptio n */private void Indexfile (File f) throws Exception {System.out.println ("indexing" + F.getcanonicalpath ())
;
Document doc = GetDocument (f);
list<string> lists = Readfilenodup (f); for (String list:lists) {Documentdoc = new Document ();
Doc.add (New Field ("Contents", list,textfield.type_stored));
Writer.adddocument (DOC); //10}//read a file private list<string> readFile files (file Filepathan
Dname) throws IOException {FileInputStream fis = new FileInputStream (filepathandname);
InputStreamReader ISR = new InputStreamReader (FIS, "UTF-8");
BufferedReader br = new BufferedReader (ISR);
LineNumberReader LNR = new LineNumberReader (BR);
List<string> returnvalue = new arraylist<string> ();
int cnt = 0;
while (true) {cnt++;
String TempStr = Lnr.readline ();
if (TempStr = null) break;
if (Tempstr.length () < 2) continue;
Returnvalue.add (TEMPSTR);
} lnr.close ();
Br.close ();
Isr.close ();
Fis.close ();
Return returnvalue;
Read a file and then return to the public static list<string> readfilenodup (file filepathandname) throws IOException { FileInputStream FIS = newFileInputStream (Filepathandname);
InputStreamReader ISR = new InputStreamReader (FIS, "UTF-8");
BufferedReader br = new BufferedReader (ISR);
LineNumberReader LNR = new LineNumberReader (BR);
set<string> set = new hashset<string> ();
while (true) {String tempstr = Lnr.readline ();
if (TempStr = null) break;
if (Tempstr.length () < 2) continue;
Set.add (Tempstr.trim ());
} lnr.close ();
Br.close ();
Isr.close ();
Fis.close ();
List<string> returnvalue = new arraylist<string> (Set.size ());
Returnvalue.addall (set);
Return returnvalue; }
}