Share C # Replace the blocked words with regular expressions, and process 1500 blocked words in 1 millisecond with a speed of 6 kb.

Source: Internet
Author: User

At the leader's request today, I tested and improved the performance, and found that the improved performance has increased by more than 100 times! It took more than 130 milliseconds to replace an article. Now it only takes less than 1 millisecond!

The main difference is the regular expression generation and the number of times the circular article content is generated.

The main code below is provided for your reference.
Private Static readonly RegEx reg_ B = new RegEx (@ "\ B", regexoptions. Compiled );
Private Static readonly RegEx reg_en = new RegEx (@ "[A-Za-Z] +", regexoptions. Compiled );
Private Static readonly RegEx reg_num = new RegEx (@ "^ [\-\. \ s \ D] + $", regexoptions. Compiled );
 
Private Static RegEx reg_word = NULL; // combines the regular expressions of all blocked words.
 
Private Static RegEx getregex ()
{
If (reg_word = NULL)
{
Reg_word = new RegEx (getpattern (), regexoptions. Compiled | regexoptions. ignorecase );
}
Return reg_word;
}
 
/// <Summary>
/// Check whether the input content contains dirty words (true is returned if it contains)
/// </Summary>
Public static bool hasblockwords (string raw)
{
Return getregex (). Match (raw). success;
}
/// <Summary>
/// Replace the dirty word with the * sign
/// </Summary>
Public static string wordsfilter (string raw)
{
Return getregex (). Replace (raw ,"***");
}
/// <Summary>
/// Obtain the dirty words contained in the content
/// </Summary>
Public static ienumerable <string> getblockwords (string raw)
{
Foreach (match mat in reg_word.matches (raw ))
{
Yield return (mat. value );
}
}
Private Static string getpattern ()
{
Stringbuilder patt = new stringbuilder ();
String S;
Foreach (string word in getblockwords ())
{
If (word. Length = 0) continue;
If (word. Length = 1)
{
Patt. appendformat ("| ({0})", word );
}
Else if (reg_num.ismatch (Word ))
{
Patt. appendformat ("| ({0})", word );
}
Else if (reg_en.ismatch (Word ))
{
S = reg_ B .replace (word ,@"(? : [^ A-Za-Z] {0, 3 })");
Patt. appendformat ("| ({0})", S );
}
Else
{
S = reg_ B .replace (word ,@"(? : [^ \ U4e00-\ u9fa5] {0, 3 })");
Patt. appendformat ("| ({0})", S );
}
}
If (patt. length> 0)
{
Patt. Remove (0, 1 );
}
Return patt. tostring ();
}
 
/// <Summary>
/// Obtain all dirty words
/// </Summary>
Public static string [] getblockwords ()
{
Return New String [] {"Kuomintang", "Fuck", "110"}; // You should obtain it from the database
}

This program can replace the following content:

Kuomintang

State-civilian-party

Guo o Mino party

Fuck

F. U. C. K

110 (the 110 deformation statement is not replaced)

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.