Public class subhtmlstring
{
/// <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "nohtml"> including the source code of HTML </param>
/// <Returns> removed text </returns>
Public static string nohtml (string htmlstring)
{
If (htmlstring = NULL)
Return "";
// Delete the script
Htmlstring = RegEx. Replace (htmlstring, @ "<SCRIPT [^>] *?>. *? </SCRIPT> "," ", regexoptions. ignorecase );
// Delete HTML
Htmlstring = RegEx. Replace (htmlstring, @ "<(. [^>] *)>", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "([/R/n]) [/S] +", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "-->", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "<! --. * "," ", Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (quot | #34);", "/" ", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (Amp | #38);", "&", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (LT | #60);", "<", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (GT | #62);", ">", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (nbsp | #160);", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (iexcl | #161);", "/xA1", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (cent | #162);", "/xa2", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (pound | #163);", "/xa3", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& (copy | #169);", "/xa9", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& # (/d +);", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& ldquo;", "", regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @ "& rdquo;", ", regexoptions. ignorecase );
Htmlstring. Replace ("<","");
Htmlstring. Replace ("> ","");
Htmlstring. Replace ("/R/N ","");
Htmlstring = system. Web. httpcontext. Current. server. htmlencode (htmlstring). Trim ();
Return htmlstring;
}
/// <Summary>
/// Remove the HTML Tag and extract the C # function of the text in the HTML code
/// </Summary>
/// <Param name = "strhtml"> including the source code of HTML </param>
/// <Returns> removed text </returns>
Public static string striphtml (string strhtml)
{
String [] aryreg = {
@ "<SCRIPT [^>] *?>. *? </SCRIPT> ",
@ "<(// S *)?!? (/W + :)? /W +) (/W + (/S * =? /S * (["" ']) (// ["" 'tbnr] | [^/7]) *? /7 |/W +) |. {0}) |/s )*? (// S *)?> ",
@ "([/R/n]) [/S] + ",
@ "& (Quot | #34 );",
@ "& (Amp | #38 );",
@ "& (LT | #60 );",
@ "& (GT | #62 );",
@ "& (Nbsp | #160 );",
@ "& (Iexcl | #161 );",
@ "& (Cent | #162 );",
@ "& (Pound | #163 );",
@ "& (Copy | #169 );",
@ "& # (/D + );",
@ "--> ",
@ "<! --. */N"
};
String [] aryrep = {
"",
"",
"",
"/"",
"&",
"<",
"> ",
"",
"/XA1", // CHR (161 ),
"/Xa2", // CHR (162 ),
"/Xa3", // CHR (163 ),
"/Xa9", // CHR (169 ),
"",
"/R/N ",
""
};
String newreg = aryreg [0];
String stroutput = strhtml;
For (INT I = 0; I <aryreg. length; I ++)
{
RegEx = new RegEx (aryreg [I], regexoptions. ignorecase );
Stroutput = RegEx. Replace (stroutput, aryrep [I]);
}
Stroutput. Replace ("<","");
Stroutput. Replace ("> ","");
Stroutput. Replace ("/R/N ","");
Return stroutput;
}
// Write a static method
# Region remove HTML tags
/// <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "htmlstr"> htmlstr </param>
Public static string parsetags (string htmlstr)
{
Return System. Text. regularexpressions. RegEx. Replace (htmlstr, "<[^>] *> ","");
}
# Endregion
# Region retrieve the image address in the text
/// <Summary>
/// Retrieve the image address in the text
/// </Summary>
/// <Param name = "htmlstr"> htmlstr </param>
Public static string getimgurl (string htmlstr)
{
String STR = string. empty;
String spattern = @ "^ ] *> ";
RegEx r = new RegEx (@ "] */S * src/S * =/S * ([']?) (? <URL>/S + )'? [^>] *> ",
Regexoptions. Compiled );
Match m = R. Match (htmlstr. tolower ());
If (M. Success)
STR = M. Result ("$ {URL }");
Return STR;
}
# Endregion
}