At the leader's request today, I tested and improved the performance, and found that the improved performance has increased by more than 100 times! It took more than 130 milliseconds to replace an article. Now it only takes less than 1 millisecond!
The main difference is the regular expression generation and the number of times the circular article content is generated.
The main code below is provided for your reference.
Private Static readonly RegEx reg_ B = new RegEx (@ "\ B", regexoptions. Compiled );
Private Static readonly RegEx reg_en = new RegEx (@ "[A-Za-Z] +", regexoptions. Compiled );
Private Static readonly RegEx reg_num = new RegEx (@ "^ [\-\. \ s \ D] + $", regexoptions. Compiled );
Private Static RegEx reg_word = NULL; // combines the regular expressions of all blocked words.
Private Static RegEx getregex ()
{
If (reg_word = NULL)
{
Reg_word = new RegEx (getpattern (), regexoptions. Compiled | regexoptions. ignorecase );
}
Return reg_word;
}
/// <Summary>
/// Check whether the input content contains dirty words (true is returned if it contains)
/// </Summary>
Public static bool hasblockwords (string raw)
{
Return getregex (). Match (raw). success;
}
/// <Summary>
/// Replace the dirty word with the * sign
/// </Summary>
Public static string wordsfilter (string raw)
{
Return getregex (). Replace (raw ,"***");
}
/// <Summary>
/// Obtain the dirty words contained in the content
/// </Summary>
Public static ienumerable <string> getblockwords (string raw)
{
Foreach (match mat in reg_word.matches (raw ))
{
Yield return (mat. value );
}
}
Private Static string getpattern ()
{
Stringbuilder patt = new stringbuilder ();
String S;
Foreach (string word in getblockwords ())
{
If (word. Length = 0) continue;
If (word. Length = 1)
{
Patt. appendformat ("| ({0})", word );
}
Else if (reg_num.ismatch (Word ))
{
Patt. appendformat ("| ({0})", word );
}
Else if (reg_en.ismatch (Word ))
{
S = reg_ B .replace (word ,@"(? : [^ A-Za-Z] {0, 3 })");
Patt. appendformat ("| ({0})", S );
}
Else
{
S = reg_ B .replace (word ,@"(? : [^ \ U4e00-\ u9fa5] {0, 3 })");
Patt. appendformat ("| ({0})", S );
}
}
If (patt. length> 0)
{
Patt. Remove (0, 1 );
}
Return patt. tostring ();
}
/// <Summary>
/// Obtain all dirty words
/// </Summary>
Public static string [] getblockwords ()
{
Return New String [] {"Kuomintang", "Fuck", "110"}; // You should obtain it from the database
}
This program can replace the following content:
Kuomintang
State-civilian-party
Guo o Mino party
Fuck
F. U. C. K
110 (the 110 deformation statement is not replaced)