ArticleDirectory
- Lucene.net 2.0 syntax highlighting after Chinese Word Segmentation
Lucene.net 2.0 syntax highlighting after Chinese Word Segmentation/birdshover source/blog
Lucene.net 2.0 SRC contains highlighter. Net-2.0.0, which can be used to implement Syntax Highlight.
// Define a multi-condition search Analyzer
Booleanquery bquery = new booleanquery ();
// Define the word Divider
Analyzer analyzer = new standardanalyzer ();
// Multi-condition search Splitter
Multifieldqueryparser parser = new multifieldqueryparser (New String [] {"title", "content"}, analyzer );
Query = parser. parse (key );
Bquery. Add (query, booleanclause. occur. Must );
Datetime now = datetime. now;
// Initialize the searcher
// Implement distributed search
List <indexreader> indexreaders = new list <indexreader> ();
String [] dirs = directory. getdirectories (DIR );
If (searchtype = searchtype. None)
{
Foreach (string item in dirs)
{
// System. Web. httpcontext. Current. response. Write (item );
Indexreaders. Add (indexreader. Open (path. Combine (path. Combine (Dir, item), "save ")));
}
}
Else
{
// System. Web. httpcontext. Current. response. Write (searchtype. tostring ());
Indexreaders. Add (indexreader. Open (path. Combine (path. Combine (Dir, searchtype. tostring (), "save ")));
}
Multireader reader = new multireader (indexreaders. toarray ());
Indexsearcher = new indexsearcher (Reader );
Hits hits = NULL;
Hits = indexsearcher. Search (bquery );
Timer = (datetime. Now-Now). totalmilliseconds;
Int COUNT = hits. Length ();
/* Calculate the displayed entries */
Int start = (pageno-1) * 10;
Int end = pageno * 10> count? Count: pageno * 10;
// Response. Write (readerhelper. myquery. tostring ());
/* Syntax highlighting settings */
Highlighter = new highlighter (New queryscorer (query ));
Highlighter. settextfragmenter (New simplefragmenter (100 ));
For (INT I = start; I <end; I ++)
{
Lucene. net. Documents. Document Doc = hits. DOC (I );
System. String text = Doc. Get ("content ");
// Add the end to ensure that special characters at the end are not filtered
String title = Doc. Get ("title") + "+ aaaaaaaaa ";
Lucene. net. analysis. tokenstream = analyzer. tokenstream ("content", new system. Io. stringreader (text ));
Lucene. net. analysis. tokenstream titkestream = analyzer. tokenstream ("title", new system. Io. stringreader (title ));
System. String result = highlighter. getbestfragments (tokenstream, text, 2 ,"...");
String tresult = highlighter. getbestfragments (titkestream, title, 0 ,"..");
// Remove the ending mark of the title
If (tresult. length> 10)
Tresult = tresult. Remove (tresult. Length-10, 10 );
If (string. isnullorempty (tresult ))
Tresult = title. Remove (title. Length-10, 10 );
// Read unlabeled content
If (string. isnullorempty (result ))
{
If (text. length> 100)
Result = text. substring (0,100 );
Else
Result = text;
}
If (result. Length <text. length)
Result = Result + "...";
}