/// <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "strHtml"> including the source code of HTML </param>
/// <Returns> removed text </returns>
Public static string StripHTML (string strHtml)
{
String [] aryReg = {
@ "<Script [^>] *?>. *? </Script> ",
@ "<(\/\ S *)?!? (\ W + :)? \ W +) (\ w + (\ s * =? \ S * (["" ']) (\ ["" 'tbnr] | [^ \ 7]) *? \ 7 | \ w +) |. {0}) | \ s )*? (\/\ S *)?> ",
@ "([\ R \ n]) [\ s] + ",
@ "& (Quot | #34 );",
@ "& (Amp | #38 );",
@ "& (Lt | #60 );",
@ "& (Gt | #62 );",
@ "& (Nbsp | #160 );",
@ "& (Iexcl | #161 );",
@ "& (Cent | #162 );",
@ "& (Pound | #163 );",
@ "& (Copy | #169 );",
@ "& # (\ D + );",
@ "--> ",
@ "<! --. * \ N"
};
String [] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
"> ",
"",
"\ Xa1", // chr (161 ),
"\ Xa2", // chr (162 ),
"\ Xa3", // chr (163 ),
"\ Xa9", // chr (169 ),
"",
"\ R \ n ",
""
};
String newReg = aryReg [0];
String strOutput = strHtml;
For (int I = 0; I <aryReg. Length; I ++)
{
Regex regex = new Regex (aryReg [I], RegexOptions. IgnoreCase );
StrOutput = regex. Replace (strOutput, aryRep [I]);
}
StrOutput. Replace ("<","");
StrOutput. Replace ("> ","");
StrOutput. Replace ("\ r \ n ","");
Return strOutput;
}