Use Lucene. NET for intra-Site Search

Source: Internet
Author: User

Use Lucene. NET for intra-Site Search

When it comes to Lucene, you may have heard of it. It was already an open-source technology that emerged several years ago. Many websites use it to set up intra-site searches for their websites. Recently, I have also learned how to use e.net in data retrieval.

Import Lucene. NET Development Kit

Lucene is an open-source full-text search engine toolkit of the apache Software Foundation. It is a full-text search engine architecture that provides a complete query engine and index engine, part of the text analysis engine. Lucene aims to provide software developers with a simple and easy-to-use toolkit to conveniently implement full-text retrieval in the target system, or build a complete full-text retrieval engine based on this. Lucene. Net is a. NET version of Lucene.

You can download the latest Lucene. NET

Create, update, and delete Indexes

Search, search by index

IndexHelper add, update, and delete Indexes

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

Using System;

Using Lucene. Net. Store;

Using Lucene. Net. Index;

Using Lucene. Net. Analysis. PanGu;

Using Lucene. Net. Documents;

 

Namespace BLL

{

Class IndexHelper

{

/// <Summary>

/// Log Assistant

/// </Summary>

Static Common. LogHelper logger = new Common. LogHelper (typeof (SearchBLL ));

/// <Summary>

/// Index storage location, which is saved in the configuration file and read from the configuration file

/// </Summary>

Static string indexPath = Common. ConfigurationHelper. deleettingmappath ("IndexPath ");

 

/// <Summary>

/// Create an index file or update an index file

/// </Summary>

/// <Param name = "item"> index information </param>

Public static void CreateIndex (Model. HelperModel. IndexFileHelper item)

{

Try

{

// Index Repository

FSDirectory directory = FSDirectory. Open (new System. IO. DirectoryInfo (indexPath), new NativeFSLockFactory ());

// Determine whether the index exists

Bool isUpdate = IndexReader. IndexExists (directory );

If (isUpdate)

{

// If the index directory is locked (for example, the program unexpectedly exits during the indexing process), unlock the directory first.

If (IndexWriter. IsLocked (directory ))

{

// Unlock the index Library

IndexWriter. Unlock (directory );

}

}

// Create an IndexWriter object and add an index

IndexWriter writer = new IndexWriter (directory, new PanGuAnalyzer (),! IsUpdate, Lucene. Net. Index. IndexWriter. MaxFieldLength. UNLIMITED );

// Obtain the news title

String title = item. FileTitle;

// Obtain the news subject content

String body = item. FileContent;

// To avoid repeated indexes, delete the number = I record and add it again.

// Especially for updates, you must delete the previous index first.

Writer. DeleteDocuments (new Term ("id", item. FileName ));

// Create an index file Document

Document document = new Document ();

// ANALYZED is only available for fields that require full-text search.

// Add the id field

Document. Add (new Field ("id", item. FileName, Field. Store. YES, Field. Index. NOT_ANALYZED ));

// Add the title Field

Document. Add (new Field ("title", title, Field. Store. YES, Field. Index. NOT_ANALYZED ));

// Add the body Field

Document. Add (new Field ("body", body, Field. Store. YES, Field. Index. ANALYZED, Lucene. Net. Documents. Field. TermVector. WITH_POSITIONS_OFFSETS ));

// Add a url Field

Document. Add (new Field ("url", item. FilePath, Field. Store. YES, Field. Index. NOT_ANALYZED ));

// Write to the index database

Writer. AddDocument (document );

// Close the resource

Writer. Close ();

// Do not forget to Close, otherwise the index results cannot be found

Directory. Close ();

// Record logs

Logger. Debug (String. Format ("index {0} created", item. FileName ));

}

Catch (SystemException ex)

{

// Record error logs

Logger. Error (ex );

Throw;

}

Catch (Exception ex)

{

// Record error logs

Logger. Error (ex );

Throw;

}

}

 

/// <Summary>

/// Delete the corresponding index based on the id

/// </Summary>

/// <Param name = "guid"> id of the index to be deleted </param>

Public static void DeleteIndex (string guid)

{

Try

{

//// Index Repository

FSDirectory directory = FSDirectory. Open (new System. IO. DirectoryInfo (indexPath), new NativeFSLockFactory ());

// Determine whether an index exists in the index database

Bool isUpdate = IndexReader. IndexExists (directory );

If (isUpdate)

{

// If the index directory is locked (for example, the program unexpectedly exits during the indexing process), unlock the directory first.

If (IndexWriter. IsLocked (directory ))

{

IndexWriter. Unlock (directory );

}

}

IndexWriter writer = new IndexWriter (directory, new PanGuAnalyzer (),! IsUpdate, Lucene. Net. Index. IndexWriter. MaxFieldLength. UNLIMITED );

// Delete the index file

Writer. DeleteDocuments (new Term ("id", guid ));

Writer. Close ();

Directory. Close (); // do not forget to Close; otherwise, the index results cannot be found.

Logger. Debug (String. Format ("index deleted {0} succeeded", guid ));

}

Catch (Exception ex)

{

// Record logs

Logger. Error (ex );

// Throw an exception

Throw;

}

}

}

}

Search by searching Indexes

?

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

Using Lucene. Net. Analysis;

Using Lucene. Net. Analysis. PanGu;

Using Lucene. Net. Documents;

Using Lucene. Net. Index;

Using Lucene. Net. Search;

Using Lucene. Net. Store;

Using Model. HelperModel;

Using System;

Using System. Collections. Generic;

 

Namespace BLL

{

Public static class SearchBLL

{

// A class may be output to logs in multiple places. Logs need to be recorded in multiple places. logger is often made into static variables.

/// <Summary>

/// Log Assistant

/// </Summary>

Static Common. LogHelper logger = new Common. LogHelper (typeof (SearchBLL ));

/// <Summary>

/// Index storage location

/// </Summary>

Static string indexPath = Common. ConfigurationHelper. deleettingmappath ("IndexPath ");

/// <Summary>

/// Search

/// </Summary>

/// <Param name = "keywords"> keywords searched by users </param>

/// <Returns> returns the search result </returns>

Public static List <SearchResult> Search (string keywords)

{

Try

{

// Index Repository

FSDirectory directory = FSDirectory. Open (new System. IO. DirectoryInfo (indexPath), new NoLockFactory ());

// Create an IndexReader object

IndexReader reader = IndexReader. Open (directory, true );

// Create an IndexSearcher object

IndexSearcher searcher = new IndexSearcher (reader );

// Create a PhraseQuery query object

PhraseQuery query = new PhraseQuery ();

// Split the keywords entered by the user

Foreach (string word in SplitWord (keywords ))

{

// Add a search keyword

Query. Add (new Term ("body", word ));

}

// Set the word segmentation interval to within 100 words

Query. SetSlop (100 );

TopScoreDocCollector collector = TopScoreDocCollector. create (1000, true );

// Query results based on query Conditions

Searcher. Search (query, null, collector );

// ScoreDoc result

ScoreDoc [] docs = collector. TopDocs (0, collector. GetTotalHits (). scoreDocs;

// Save the list of search results

List <SearchResult> listResult = new List <SearchResult> ();

For (int I = 0; I <docs. Length; I ++)

{

// Obtain the document number (primary key, which is allocated by Lucene. net)

// Only the Document id is found in the search result. If you want to retrieve the Document, you need the Doc to retrieve it again.

// Reduce content occupation

Int docId = docspolici2.16.doc;

// Search for Document by id

Document doc = searcher. Doc (docId );

String number = doc. Get ("id ");

String title = doc. Get ("title ");

String body = doc. Get ("body ");

String url = doc. Get ("url ");

// Create a search result object

SearchResult result = new SearchResult ();

Result. Number = number;

Result. Title = title;

Result. BodyPreview = Preview (body, keywords );

Result. Url = url;

// Add to result list

ListResult. Add (result );

}

If (listResult. Count = 0)

{

Return null;

}

Else

{

Return listResult;

}

}

Catch (SystemException ex)

{

Logger. Error (ex );

Return null;

}

Catch (Exception ex)

{

Logger. Error (ex );

Return null;

}

}

 

/// <Summary>

/// Get Content preview

/// </Summary>

/// <Param name = "body"> content </param>

/// <Param name = "keyword"> keywords </param>

/// <Returns> </returns>

Private static string Preview (string body, string keyword)

{

// Create an HTMLFormatter. The parameter is the prefix and suffix of the highlighted word.

PanGu. HighLight. SimpleHTMLFormatter simpleHTMLFormatter = new PanGu. HighLight. SimpleHTMLFormatter ("<font color = \" red \ ">", "</font> ");

// Create a Highlighter and input the HTMLFormatter and pangu word segmentation object Semgent

PanGu. HighLight. Highlighter highlighter = new PanGu. HighLight. Highlighter (simpleHTMLFormatter, new PanGu. Segment ());

// Set the number of characters for each abstract segment

Highlighter. FragmentSize = 100;

// Obtain the most matched abstract segment

String bodyPreview = highlighter. GetBestFragment (keyword, body );

Return bodyPreview;

}

 

/// <Summary>

/// Pangu Word Segmentation: Word Segmentation for the search keywords entered by the user

/// </Summary>

/// <Param name = "str"> keywords entered by the user </param>

/// <Returns> An array composed of results after word segmentation </returns>

Private static string [] SplitWord (string str)

{

List <string> list = new List <string> ();

Analyzer analyzer = new PanGuAnalyzer ();

TokenStream tokenStream = analyzer. TokenStream ("", new System. IO. StringReader (str ));

Lucene. Net. Analysis. Token token = null;

While (token = tokenStream. Next ())! = Null)

{

List. Add (token. TermText ());

}

Return list. ToArray ();

}

}

}

SearchResult Model

?

1

2

3

4

5

6

7

8

9

10

11

12

13

Namespace Model. HelperModel

{

Public class SearchResult

{

Public string Number {get; set ;}

 

Public string Title {get; set ;}

 

Public string BodyPreview {get; set ;}

 

Public string Url {get; set ;}

}

}

The above is all the content of this article. I hope you will like it.

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.