1. The key point, lucene.net to use the version number 3.0 below, otherwise inconsistent with Pangu participle interface.
Key codes such as the following
Using System;Using System. IO;Using System. Collections. Generic;Using System. Linq;Using System. Text;Using Lucene. Net. Analysis;Using Lucene. Net. Documents;Using Lucene. Net. Index;Using Lucene. Net. Search;Using Lucene. Net. Store;Using Lucene. Net. Analysis. Pangu;Using Maticsoft. Dbutility;Using System. Data;Using System. Diagnostics;Namespace Searchsentence{class Program {public static string[] Splitwords (string content) { list<string> strlist = new List<string> ();Analyzer Analyzer = new Panguanalyzer ();//Specify using Pangu Panguanalyzer segmentation algorithmTokenstream Tokenstream = Analyzer. Tokenstream("", new StringReader (content));Lucene. Net. Analysis. Tokentoken = NULL;while (token = Tokenstream. Next()) = null) {//next continues the participle until it returns null Strlist. ADD(token. Termtext());//Get results after participle} return Strlist. ToArray();public static void CreateIndex (String strdirectory) {Fsdirectory directory = Fsdirectory. Open(New DirectoryInfo (strdirectory), New Nativefslockfactory ());Indexreader: class bool isexist = Indexreader for reading the index library. Indexexists(directory);//Whether there is an index library folder and an index library feature fileif (isexist) {//Assume that the index folder is locked (for example, the program exits unexpectedly during the indexing process or there is a process in the Operations Index Library). Then unlock//q: There is a problem assuming that a user is writing to the index library at this time is locked and there is a user who comes in to unlock the lock and then create a conflict-the solution might be if (indexwriter. IsLocked(directory)) {IndexWriter. Unlock(directory);}}//Create a write operation object to the index library IndexWriter (index folder, specify cut words with Pangu participle, maximum write length limit)//supplement: Open Directory with IndexWriter IndexWriter writer = new IndexWriter (directory, new Panguanalyzer (),!isexist, IndexWriter. MaxFieldLength. UNLIMITED);DataSet ds = Dbhelpersql. Query("SELECT * FROM dbo. Sentences ", -);DataTable dt = ds. Tables[0];int rowscount = DT. Rows. Count;for (int n =0N < rowscount; n++){if (dt. RowsN ["Id"]. ToString() !=""&& DT. RowsN ["Textzh"]. ToString() !=""{//--------------------------------traverse the data source to convert the data into a Document object into the index library documen T document = new document ();//new a Document Object--a document in the corresponding index libraryAdding fields to your documentADD(field, value, whether to save the field original value, whether to create an index on the column) document. ADD(New Field ("id"Dt. RowsN ["Id"]. ToString(), Field. Store. YES, Field. Index. not_analyzed));//--the values of all fields are saved as String types because the index library stores only String type dataField. Store: Indicates whether to save the field's original value.Specify field. Store. YESThe field is retrieved with document. GetRemove the original value//field. Index. not_analyzed: Specifies that the result is not saved according to the word breaker-whether the result is saved by Word segmentation depends on whether the column contents are blurred query document. ADD(New Field ("Textzh"Dt. RowsN ["Textzh"]. ToString(), Field. Store. YES, Field. Index. ANALYZED, Field. Termvector. with_positions_offsets));Field. Index. ANALYZED: Specify the content of the article according to the results of the word after the result is saved otherwise can not be achieved fuzzy query//with_positions_offsets: Indicates not only save the cut words but also save the distance between words//doc Ument. ADD(New Field ("Content","I often go out to play.", Field. Store. YES, Field. Index. ANALYZED, Field. Termvector. with_positions_offsets));Writer. Adddocument(document);//Document write to index libraryConsole. Write("{0}\r", n+1);}} writer. Close();//will unlock it on its own initiativeDirectory. Close();//Don't forget close, otherwise the index results cannot be searched} public static void Query (String strquery) {Stopwatch SW = new Stopwatch ();Sw. Start();Fsdirectory directory = Fsdirectory. Open(New DirectoryInfo ("Ch-eg"), New Nolockfactory ());Indexreader reader = Indexreader. Open(Directory, True);Indexsearcher searcher = new Indexsearcher (reader);Search criteria Phrasequery query = new Phrasequery ();The keyword that the user entered is word-breaker foreach (string wordinchSplitwords (strquery)) {Query. ADD(New Term ("Textzh", word));}//query. ADD(New Term ("Content","C #"));//relationships when multiple query conditions areQuery. Setslop( -);//Specify the maximum distance between keywordsContainer TopScoreDocCollector collector = TopScoreDocCollector for topscoredoccollector query results. Create( +, true);Searcher. Search(query, NULL, collector);//query according to query criteria, query results into the collector containerSw. Stop();Topdocs Specify0To Gettotalhits () that is, the document in all query results assumes Topdocs ( -,Ten) means to obtain the first -- -The effect of document content to paging between scoredoc[] Docs = Collector. Topdocs(0, collector. Gettotalhits()). Scoredocs;Display data entity object collection for (int i =0; i < docs. Length; i++){int docId = Docs[i]. doc;//Get the ID of the query result document (Lucene internally assigned ID)Document doc = Searcher. Doc(docId)to obtain Document object documents based on document IDConsole. Write("{0}\n", doc. Get("Textzh"));} TimeSpan TS2 = SW. Elapsed;Console. WriteLine("This query costs a total of {0}ms.\n", TS2. TotalMilliseconds); } static void Main (string[] args) {//createindex ("Ch-eg");Console. Write("Press phrase: \ n");String strquery = Console. ReadLine();while (strquery! ="") {Query (strquery);Console. Write("Press phrase: \ n");strquery = Console. ReadLine();} } }}
Demo http://download.csdn.net/detail/lampqiu/8909427
Lucene.Net and Pangu participle