Trie tree, also known as character search tree, Prefix Tree, mainly used for character matching (see http://en.wikipedia.org/wiki/Trie ). It is suitable for keyword search. For example, you can search for keywords in an article and add links to them. Of course, the application of dirty word filtering is also similar, but the replacement of the connection is replaced with a replacement character.
The current Code is just a simple replacement and does not process some characters. For example, "I saw your mom yesterday and forced me to buy a house" is not a dirty word, because there are commas (,), the range of characters must be added to the program.
The Skip in the program is a simple variant used to filter out dirty words. For example, if you are "looking for a ** sister", you can skip up to three characters by default. This can be adjusted as needed. In short, it is a trie exercise.
- Public class trietree
- {
- Private readonly dictionary <char, trietree> children;
- Public bool end {Get; set ;}
- Public trietree ()
- {
- Children = new dictionary <char, trietree> ();
- }
- Public void addkey (string keyword)
- {
- If (string. isnullorempty (keyword ))
- {
- Return;
- }
- VaR cNode = this;
- Foreach (var key in keyword)
- {
- If (cNode. Children. containskey (key ))
- {
- CNode = cNode. Children [Key];
- }
- Else
- {
- VaR node = new trietree ();
- CNode. Children. Add (Key, node );
- CNode = node;
- }
- }
- CNode. End = true;
- }
- Public void Replace (ref string text)
- {
- For (VAR I = 0; I <text. length; I ++)
- {
- VaR cNode = this;
- Var key = text [I];
- // The first dirty word
- If (cNode. Children. containskey (key ))
- {
- CNode = cNode. Children [Key];
- // Search for words that contain dirty words
- VaR skip = 0;
- For (var j = I + 1; j <text. length; j ++)
- {
- If (cNode. Children. containskey (Text [J])
- {
- CNode = cNode. Children [Text [J];
- Skip = 0;
- }
- Else
- {
- // Allow skipping a few characters
- Skip ++;
- If (skip> 3)
- {
- Break;
- }
- }
- If (cNode. End)
- {
- VaR Len = J + 1-I;
- TEXT = text. Replace (text. substring (I, Len), String. Empty. padleft (Len ,'*'));
- I + = Len;
- Break;
- }
- }
- }
- }
- }
- }
The usage is as follows:
- Class Program
- {
- Static void main (string [] ARGs)
- {
- VaR trie = new trietree ();
- VaR KEYWORDS = "fuck, prostitute, fuck". Split (',');
- Foreach (var key in keywords)
- {
- Trie. addkey (key );
- }
- VaR text = @ "I wiped it, Nima, fuck you, you prostitute, bitch. ";
- Trie. Replace (ref text );
- Console. writeline (text );
- Console. Read ();
- }
- }
Execution result:
From http://blog.csdn.net/maddemon/article/details/7011699