Sensitive word filtering is essential in website development. The general use of DFA, this relatively good algorithm to achieve.
Reference Link: http://cmsblogs.com/?p=1031
A better code implementation:
1 Importjava.io.IOException;2 Importjava.util.ArrayList;3 ImportJava.util.HashMap;4 Importjava.util.List;5 6 /**7 * @title KeywordFilter8 * @description TODO9 * @author Ten * @date 2014-4-17 One * @version1.0 A */ - Public classKeywordFilter { - /**collection of sensitive words the * {FA ={isend=0, ={isend=1}}, Zhong ={isend=0, Guo ={isend=0, man ={isend=1}, male ={isend=0, man ={isend=1} }}} - * */ - PrivateHashMap Keysmap =NewHashMap (); - + /** - * Add sensitive words + * @paramkeywords A */ at Public voidAddkeywords (list<string>keywords) { - for(inti = 0; I < keywords.size (); i++) { -String key =Keywords.get (i). Trim (); -HashMap Nowhash = Keysmap;//initial traversal from the outermost layer - for(intj = 0; J < Key.length (); J + +) { - CharWord =Key.charat (j); inObject Wordmap =nowhash.get (word); - if(Wordmap! =NULL) { toNowhash =(HASHMAP) wordmap; +}Else { -hashmap<string, string> Newwordhash =NewHashmap<string, string>(); theNewwordhash.put ("Isend", "0"); * nowhash.put (Word, newwordhash); $Nowhash =Newwordhash;Panax Notoginseng } - if(j = = Key.length ()-1) { theNowhash.put ("Isend", "1"); + } A } the } + } - $ /** $ * Check if a string starts from the start position for keyword compliance, - * If not, return 0 - * If there is a conforming keyword value, continue traversing until Isend = 1 is encountered, returning the length of the matching keyword, the */ - Private intCheckkeywords (String txt,intbegin) {WuyiHashMap Nowhash =Keysmap; the intres = 0; - for(inti = begin; I < txt.length (); i++) { Wu CharWord =Txt.charat (i); -Object Wordmap = Nowhash.get (word);//get the HashMap of the character corresponding About if(Wordmap = =NULL) { $ return0;//If the character does not have a corresponding Hashmap,return 0 - } - -res++;//if the character corresponding to the hashmap is not NULL, the description matches to a character, +1 ANowhash = (HashMap) wordmap;//points the traversed hashmap to the corresponding hashmap of the character + if((String) nowhash.get ("Isend"). Equals ("1")) {//If the character is the end character of a sensitive word, return directly the returnRes; -}Else { $ Continue; the } the } the returnRes; the } - in /** the * Determine if there are any keywords in txt the */ About Public Booleaniscontentkeywords (String txt) { the for(inti = 0; I < txt.length (); i++) { the intLen =checkkeywords (TXT, i); the if(Len > 0) { + return true; - } the }Bayi return false; the } the - /** - * Returns a list of keywords in txt the */ the PublicList<string>gettxtkeywords (String txt) { thelist<string> list =NewArraylist<string>(); the intL =txt.length (); - for(inti = 0; I <l;) { the intLen =checkkeywords (TXT, i); the if(Len > 0) { theString TT = txt.substring (i, i +len);94 List.add (TT); thei + =Len; the}Else { thei++;98 } About } - returnlist;101 }102 103 /** 104 * Initialize the list of sensitive words the * */106 Public voidInitfiltercode () {107list<string> keywords =NewArraylist<string>();108Keywords.add ("Chinese");109Keywords.add ("Chinese man")); theKeywords.add ("Falun");111 This. Addkeywords (keywords); the }113 the Public Static voidMain (string[] args)throwsIOException { theKeywordFilter filter =NewKeywordFilter (); the Filter.initfiltercode ();117String txt = "haha, but the Falun Gong, the Chinese,";118 BooleanBoo =filter.iscontentkeywords (TXT);119 System.out.println (boo); -List<string> set =filter.gettxtkeywords (TXT);121System.out.println ("contains the following sensitive words:" +set);122 }123 124 the 126 127}
Java Sensitive word filtering