標籤:kwic 搜尋引擎搜尋 關鍵字搜尋匹配演算法
下面是基於KWIC 的關鍵字匹配演算法(管道+過濾器模式下實現)
關鍵區段的管道+過濾器 軟體體系下的實現, 在很多的關鍵字搜尋平台都使用了這一 迴圈移位+排序輸出的 關鍵字匹配演算法:
具體需求如下:
1、使用管道-過濾器風格:
每個過濾器處理資料,然後將結果送至下一個過濾器,。要有資料傳入,過濾器即開始工作。過濾器之間的資料共用被嚴格限制在管道傳輸
四個過濾器:
輸入(Input filter):
從資料來源讀取輸入檔案,解析格式,將行寫入輸出管道
移位(CircularShifter filter):迴圈移位
排序(Alphabetizer filter):
輸出(Output filter)
管道:
in_cs pipe
cs_al pipe
al_ou pile
例如:
代碼如下:
using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.IO;namespace KWIC{ /// <summary> /// 管道類 /// </summary> public class Pipe { List<string> word; public List<string> read() { return word; } public void write(List<string> word) { this.word = word; } } /// <summary> /// 管道之間的過濾器介面 /// </summary> public abstract class Filter { public virtual void Transform() { } } /// <summary> /// 繼承並實現實現管道介面 /// </summary> public class InputFilter : Filter { public Pipe outPipe; public List<string> word; public InputFilter(List<string> word, Pipe outPipe) { this.word = word; this.outPipe = outPipe; } public void Transform() { outPipe.write(word); } } /// <summary> /// 繼承並實現過濾器介面 /// </summary> public class CircleShiftFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public CircleShiftFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } /// <summary> /// 關鍵的迴圈移位函數 /// </summary> public virtual void Transform() { List<string> word = inputPipe.read(); /////////////////////////////////////////////// 補充代碼,將WORD數組中字串迴圈移位//////////////////////////////////////////////////////// List<string> turned_words = new List<string>(); // 獲得每一行字串資料 foreach (string line in word) { // 拆分一句話 string[] words = line.Split(' '); // 擷取單詞數 ulong word_number = (ulong)words.LongLength; // 臨時儲存中間排序好的串 List<string> tmp_words = new List<string>(); tmp_words.Clear(); tmp_words.Add(line); string tmp_line = ""; for (ulong i = 0; i < word_number - 1; i++) { // 擷取上一行串 tmp_line = tmp_words[tmp_words.Count - 1]; // 擷取上一行串的最後一個單詞 string last_word = tmp_line.Split(' ')[word_number -1]; // 擷取上一行串的除了最後一個單詞之外的所有單詞 string left_words = tmp_line.Substring(0, (tmp_line.Length -last_word.Length-1 )); tmp_words.Add(last_word +" "+ left_words ); } // 移除原有的串 tmp_words.RemoveAt(0); // 將一句移位的串加到臨時的list集合 turned_words.AddRange(tmp_words); } // 將所有移位的串加到原來list集合 word.AddRange(turned_words); ///////////////////////////////////// outPipe.write(word); } } /// <summary> /// 實現的排序過濾器類 /// </summary> public class AlphaFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public AlphaFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } /// <summary> /// 排序輸出函數 /// </summary> public void Transform() { List<string> word = inputPipe.read(); ////////////////////////////////////// 補充代碼,將word數組中單詞排序輸出///////////////////////////////////////////////// word.Sort(); outPipe.write(word); } } /// <summary> /// 實現輸出過濾器介面類 /// </summary> public class OutputFilter : Filter { public Pipe inputPipe; public Pipe outPipe; public OutputFilter(Pipe inputPipe, Pipe outPipe) { this.inputPipe = inputPipe; this.outPipe = outPipe; } public void Transform() { List<string> word = inputPipe.read(); outPipe.write(word); } } /// <summary> /// 程式的整體運行架構 /// </summary> public class KWIC_System { Pipe in_cs; // create three objects of Pipe Pipe cs_al; // and one object of type Pipe al_ou; // FileInputStream Pipe ou_ui; // FileInputStream InputFilter inputFilter; CircleShiftFilter shifter; AlphaFilter alpha; OutputFilter output; // output to screen public KWIC_System() { in_cs = new Pipe(); // create three objects of Pipe cs_al = new Pipe(); // and one object of type al_ou = new Pipe(); // FileInputStream ou_ui = new Pipe(); // FileInputStream List<string> word = new List<string>(); word.Add(Regex.Replace("I love you".Trim(), @"\s+", " ")); //正則會擷取到所有類型的空格(比如定位字元,新行等等),然後將其替換為一個空格 word.Add(Regex.Replace("me too".Trim(), @"\s+", " ")); word.Add(Regex.Replace("do you know".Trim(), @"\s+", " ")); inputFilter = new InputFilter(word, in_cs); shifter = new CircleShiftFilter(in_cs, cs_al); alpha = new AlphaFilter(cs_al, al_ou); output = new OutputFilter(al_ou,ou_ui); // output to screen } public List<string > GetResult() { inputFilter.Transform(); shifter.Transform(); alpha.Transform(); output.Transform(); return ou_ui.read(); } }}
(備忘:如果想換行這裡想換行輸出,需要在結尾輸出的每一行結尾加‘\r\n’)
在廣泛的搜尋技術中,其實這個關鍵字匹配演算法應用範圍很廣,比如我們常見的Baidu和Google的搜尋索引鍵 提示功能。