The following is a reference clip: ----- /**/ /// <Summary> /// Remove HTML tags /// </Summary> /// <Param name = "nohtml"> including the source code of HTML </param> /// <Returns> removed text </returns> Public static string nohtml (string htmlstring) { // Delete the script Htmlstring = RegEx. Replace (htmlstring, @ "<SCRIPT [^>] *?>. *? </SCRIPT> ","", Regexoptions. ignorecase ); // Delete HTML Htmlstring = RegEx. Replace (htmlstring, @ "<(. [^>] *)> ","", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "([\ r \ n]) [\ s] + ","", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "-->", "", regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "<! --. * "," ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (quot | #34 );","\"", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (Amp | #38 );","&", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (LT | #60);", "<", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (GT | #62);", "> ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (nbsp | #160 );","", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (iexcl | #161);", "\ xA1 ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (cent | #162);", "\ xa2 ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (pound | #163);", "\ xa3 ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& (copy | #169);", "\ xa9 ", Regexoptions. ignorecase ); Htmlstring = RegEx. Replace (htmlstring, @ "& # (\ D + );","", Regexoptions. ignorecase );Htmlstring. Replace ("<",""); Htmlstring. Replace ("> ",""); Htmlstring. Replace ("\ r \ n ",""); Htmlstring = httpcontext. Current. server. htmlencode (htmlstring). Trim (); Return htmlstring; } /** // Extract the C # function of the text in HTML code /// <Summary> /// Remove HTML tags /// </Summary> /// <Param name = "strhtml"> including the source code of HTML </param> /// <Returns> removed text </returns> Using system; Using system. Text. regularexpressions; Public class striphtmltest { Public static void main () { String S = striphtml ( "<HTML> Console. writeline (s ); } Public static string striphtml (string strhtml) { String [] aryreg = { @ "<SCRIPT [^>] *?>. *? </SCRIPT> ", @ "<(\/\ S *)?!? (\ W + :)? \ W +) (\ W + (\ s * =? \ S * (["']) (\ [" "'Tbnr] | [^ \ 7]) *? \ 7 | \ W +) |. {0}) | \ s )*? (\/\ S *)?> ", @" ([\ R \ n]) [\ s] + ",@ "& (Quot | #34);", @ "& (Amp | #38);", @ "& (LT | #60 );", @ "& (GT | #62 );",@ "& (Nbsp | #160);", @ "& (iexcl | #161);", @ "& (cent | #162 );", @ "& (pound | #163 );", @ "& (Copy | #169);", @ "& # (\ D +);", @ "-->", @ "<! --. * \ N" }; String [] aryrep = { "", "\" "," & "," <","> "," "," \ XA1 ", // CHR (1, 161 ), "\ Xa2", // CHR (162 ), "\ Xa3", // CHR (163 ), "\ Xa9", // CHR (169 ), "", "\ R \ n ","" }; String newreg = aryreg [0]; String stroutput = strhtml; For (INT I = 0; I <aryreg. length; I ++) { RegEx = new RegEx (aryreg [I], regexoptions. ignorecase ); Stroutput = RegEx. Replace (stroutput, aryrep [I]); } Stroutput. Replace ("<",""); Stroutput. Replace ("> ",""); Stroutput. Replace ("\ r \ n ",""); Return stroutput; } } Write a static method to remove HTML tags # Region /// Remove HTML tags /** // <Summary> /// Remove HTML tags /// </Summary> /// <Param name = "htmlstr"> htmlstr </param> Public static string parsetags (string htmlstr) { Return System. Text. regularexpressions. RegEx. Replace (htmlstr, "<[^>] *> ",""); } # Endregion
/// Retrieve the image address in the text # Region /// Retrieve the image address in the text /** // <Summary> /// Retrieve the image address in the text /// </Summary> /// <Param name = "htmlstr"> htmlstr </param> Public static string getimgurl (string htmlstr) { String STR = string. empty; String spattern = @ "^ ] *> "; RegEx r = new RegEx (@ "] * \ s * SRC \ s * = \ s * ([']?) (? <URL> \ s + )'? [^>] *> ", Regexoptions. Compiled ); Match m = R. Match (htmlstr. tolower ()); If (M. Success) STR = M. Result ("$ {URL }"); Return STR; } # Endregion
|