5. filter class definition:
Filter. h:
# Include <string>
# Include "trie. H"
Class filter {
PRIVATE:
Static bool _ initialized;
Static trie _ trie;
Static void load (const char * filename, int startpos );
Static bool censor (STD: string & source );
Public:
Static bool _ legal;
Static bool _ illegal;
Enum level {
Normal = 1,
High = 2
};
Static void Init (Level level, STD: String LoadFile = "");
Static bool isinitialized ();
Static bool censor (const char * Source, int length );
};
Inline bool filter: isinitialized (){
Return _ initialized;
}
6. Implement the filter class:
Filter. cpp:
# Include <iostream>
# Include <fstream>
# Include "filter. H"
# Include "logger. H"
Bool filter: :__ legal = true;
Bool filter ::__ illegal = false;
Bool filter ::__ initialized = false;
Trie filter ::__ trie (trienode: UTF16 );
Void filter: Init (Level level, STD: String LoadFile ){
If (Level = filter: normal ){
Load ("normal_keywords_utf16", 82); // 175822 bytes
} Else if (Level = filter: High ){
Load ("high_keywords_utf16", 2); // high_keywords_utf16
} Else {
Logger: Error ("filter level error ");
}
_ Initialized = true;
}
Void filter: load (const char * filepath, int startpos ){
STD: ifstream keywordsfile (filepath, STD: IOS: In | STD: IOS: Binary | STD: IOS: ATE );
If (keywordsfile. is_open ()){
STD: ifstream: pos_type ___ totalsize = keywordsfile. tellg ();
Keywordsfile. seekg (0, STD: IOS: Beg );
Char header [startpos]; // Header
Keywordsfile. Read (header, startpos); // Header
Char buffer [256];
Int ___ COUNT = 0;
Int ___ offset = 0;
While (___ count <___ totalsize ){
Keywordsfile. Read (buffer + ___ offset, 2 );
___ Count + = 2;
___ Offset + = 2;
If (buffer [___ offset-4] = '\ x0d' & buffer [___ offset-3] =' \ x00'
& Buffer [___ offset-2] = '\ x0a' & buffer [___ offset-1] =' \ x00 '){
___ Offset-= 4;
STD: String ___ utf16word;
___ Utf16word. Assign (buffer, ___ offset );
_ Trie. insert (___ utf16word );
___ Offset = 0;
}
}
}
Keywordsfile. Close ();
}
Bool filter: censor (const char * Source, int length ){
STD: String ___ source;
___ Source. Assign (source, length );
Return censor (___ source );
}
Bool filter: censor (STD: string & source ){
If (! _ Initialized ){
Logger: Error ("INVOKE Init () before censoring any strings .");
Return _ illegal;
} Else {
Int length = source. Size ();
For (INT I = 0; I <length; I + = 2 ){
STD: string substring = source. substr (I, length-I );
If (_ trie. Find (substring )! = NULL ){
Return _ illegal;
}
}
Return _ legal;
}
}
Note:
1. the key function is: bool filter: censor (STD: string & source). This function is used to filter sensitive words, and source is the input string, if source is a sensitive word or contains a sensitive word, false is returned; otherwise, true is returned.
2. filter: load () function is used to load sensitive Word files.
According to the previous description, if "Gunpowder formula" is a sensitive word, "fire" is also a sensitive word, "Gunpowder" is also a sensitive word, and "Gunpowder configuration" is also a sensitive word, however, "match" is not a sensitive word, and "Gunpowder diesel" is not a sensitive word, but "Gunpowder diesel" is a sensitive word, because according to trie: Find () if (___ firstnode->__ map in the function. empty ()){
Return ___ firstnode;
}
___ Firstnode-> __map when matching the "Gunpowder formula. if empty () is true, ___ firstnode will be returned, that is, an independent sensitive word unit using the "Gunpowder formula ".
This article reference self-written by Michael, specific reference: http://my.csdn.net/Poechant