asp.net 基於html的檔案搜尋引擎實現

來源:互聯網
上載者:User
1.引擎的實現部分(其中涉及到資料庫的表的操作,這裡使用的是預存程序):

using System;<br />using System.Collections.Generic;<br />using System.Linq;<br />using System.Web;</p><p>using System.Data;<br />using System.Data.SqlClient;</p><p>using System.Text;<br />using System.Text.RegularExpressions;<br />using Internship.Data;</p><p>namespace Internship.WebSite<br />{</p><p> public static class Searching<br /> {</p><p> private static List<Entry> _Catalog = new List<Entry>();</p><p> public static List<Entry> Catalog<br /> {<br /> get { return _Catalog; }<br /> set { _Catalog = value; }<br /> }</p><p> public static void AddToCatalog(Entry entry)<br /> {<br /> Catalog.Add(entry);<br /> }<br /> public static void RemoveFromCatalog(Entry entry)<br /> {<br /> Catalog.Remove(entry);<br /> }</p><p> static Searching()<br /> {<br /> Catalog = BuildCatalog();<br /> }</p><p> public static List<Entry> Hit(String searchItem)<br /> {<br /> List<Entry> entry = new List<Entry>();</p><p> List<Result> result = BuildResultSet(searchItem, false);<br /> foreach (var e in result)<br /> entry.Add(e.IEntry);</p><p> return entry;<br /> }</p><p> private static List<Result> BuildResultSet(string searchTerm, bool includeComments)<br /> {<br /> List<Result> results = new List<Result>();<br /> string term = CleanContent(searchTerm.ToLowerInvariant().Trim(), false);<br /> string[] terms = term.Split(new char[] { ' ' }, StringSplitOptions.RemoveEmptyEntries);<br /> string regex = string.Format(System.Globalization.CultureInfo.InvariantCulture, "({0})", string.Join("|", terms));</p><p> foreach (Entry entry in Catalog)<br /> {<br /> Result result = new Result();<br /> result.Rank = 0;<br /> result.IEntry = entry;</p><p> int titleMatches = Regex.Matches(entry.Title, regex).Count;<br /> result.Rank = titleMatches * 20;</p><p> int postMatches = Regex.Matches(entry.Content, regex).Count;<br /> result.Rank += postMatches;</p><p> if (result.Rank > 0)<br /> {</p><p> if (result.IEntry.Content.Length > 200)<br /> {<br /> result.IEntry.Content = result.IEntry.Content.Substring(0, 200);<br /> result.IEntry.Content += "....";<br /> }<br /> result.IEntry.Title = Replace(result.IEntry.Title, terms);<br /> result.IEntry.Content = Replace(result.IEntry.Content, terms);</p><p> results.Add(result);<br /> }<br /> }</p><p> results.Sort();<br /> return results;<br /> }<br /> public static List<Entry> BuildCatalog()<br /> {<br /> List<Entry> entry = new List<Entry>();<br /> using (SqlConnection cn = new SqlConnection(InternshipSettings.WebSiteConnectionString))<br /> {<br /> SqlCommand cmd = new SqlCommand("GetAllJobPostings", cn);<br /> cmd.CommandType = CommandType.StoredProcedure;</p><p> cn.Open();<br /> using (SqlDataReader dr = cmd.ExecuteReader())<br /> {<br /> while (dr.Read())<br /> {<br /> Entry en = new Entry();<br /> en.Kinds = 1;// 1 for jobposting ,2 for company instroduction<br /> en.ID = dr["JobPostingID"].ToString();<br /> en.Title = dr["JobPostingTitle"].ToString();<br /> en.Content = dr["JobPostingText"].ToString();<br /> en.PostDate = dr["JobPostingTime"].ToString();</p><p> en.Content = CleanContent(en.Content, true);<br /> entry.Add(en);</p><p> }</p><p> }</p><p> }</p><p> using (SqlConnection cn = new SqlConnection(InternshipSettings.WebSiteConnectionString))<br /> {</p><p> SqlCommand cmd = new SqlCommand("GetAllCompanyIntroductions", cn);<br /> cmd.CommandType = CommandType.StoredProcedure;</p><p> cn.Open();</p><p> using (SqlDataReader dr = cmd.ExecuteReader())<br /> {<br /> while (dr.Read())<br /> {<br /> Entry en = new Entry();<br /> en.Kinds = 2; // 1 for jobposting ,2 for company instroduction<br /> en.ID = dr["IntroductionID"].ToString();<br /> en.Title = dr["CompanyName"].ToString();<br /> en.Content = dr["Introduction"].ToString() + dr["Address"].ToString() + dr["Phone"].ToString();</p><p> en.Content = CleanContent(en.Content, true);<br /> entry.Add(en);<br /> }<br /> }<br /> }</p><p> return entry;<br /> }</p><p> private static readonly Regex STRIP_HTML = new Regex("<[^>]*>", RegexOptions.Compiled);<br /> /// <summary><br /> /// Strips all HTML tags from the specified string.<br /> /// </summary><br /> /// <param name="html">The string containing HTML</param><br /> /// <returns>A string without HTML tags</returns><br /> public static string StripHtml(string html)<br /> {<br /> if (string.IsNullOrEmpty(html))<br /> return string.Empty;</p><p> return STRIP_HTML.Replace(html, " "/*string.Empty*/);<br /> }<br /> public static string CleanContent(string content, bool removeHtml)<br /> {<br /> if (removeHtml)<br /> content = StripHtml(content);</p><p> content = content<br /> .Replace("\\", string.Empty)<br /> .Replace("|", string.Empty)<br /> .Replace("(", string.Empty)<br /> .Replace(")", string.Empty)<br /> .Replace("[", string.Empty)<br /> .Replace("]", string.Empty)<br /> .Replace("*", string.Empty)<br /> .Replace("?", string.Empty)<br /> .Replace("}", string.Empty)<br /> .Replace("{", string.Empty)<br /> .Replace("^", string.Empty)<br /> .Replace("+", string.Empty)<br /> .Replace(" ", string.Empty);</p><p> string[] words = content.Split(new char[] { ' ', '\n', '\r' }, StringSplitOptions.RemoveEmptyEntries);<br /> StringBuilder sb = new StringBuilder();<br /> for (int i = 0; i < words.Length; i++)<br /> {<br /> string word = words[i].ToLowerInvariant().Trim();<br /> if (word.Length > 1 /*&& !_StopWords.Contains(word)*/)<br /> sb.Append(word + " ");<br /> //sb.Append(word);<br /> }</p><p> return sb.ToString();<br /> }</p><p> private static string Replace(string src, string[] terms)<br /> {</p><p> foreach (var term in terms)<br /> {<br /> string replace = "<font color=\"#FF0066\">" + term + "</font>";<br /> src = src.Replace(term, replace);<br /> }<br /> return src;<br /> }<br /> }<br />}<br />

2。如何使用?

<br /> if (!string.IsNullOrEmpty(TB.Text))<br /> {<br /> CurPage = 1;</p><p> entry = Searching.Hit(TB.Text);</p><p> if (entry.Count % PAGE_SIZE == 0)<br /> TolPage = entry.Count / PAGE_SIZE;<br /> else<br /> TolPage = entry.Count / PAGE_SIZE+1;</p><p> DataBind(CurPage, TolPage);<br /> }

相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.