From: http://www.cnblogs.com/grokyao/archive/2008/12/04/1347512.html
The text obtained by using popular editors such as freetextbox is mixed with some HTML tags, and sometimes they need to be processed. Here we provide a solution, regular expressions are used for rule filtering. Because HTML tags are based on the <> format, and there are symbols like & nbsp, therefore, the string is processed as a string without HTML format twice.
SimpleCode:
String Html = @" <SPAN lang = " En - Us " > & Rdquo; </span> allows you to receive your favorite video and TV content at any time. <SPAN lang = " En - Us " > <A target = " _ Blank " Href = " HTTP: // Info.tele.hc360.com/list/mobile.shtml "> <SPAN lang =" En-us "> <SPAN lang =" En-us "> mobile phone </span> </a> </span> ";
String Strnohtml = System. Text. regularexpressions. RegEx. Replace (HTML, " <[^>] +> " , "" );
Strnohtml=System. Text. regularexpressions. RegEx. Replace (strnohtml,"& [^;] +;","");
Console. writeline (strnohtml );
Function enhancement code:
Public String Nohtml ( String Htmlstring) // Replacing HTML tags
{
// Delete script
Htmlstring = RegEx. Replace (htmlstring, @" <SCRIPT [^>] *?>. *? </SCRIPT> " , "" , Regexoptions. ignorecase );
// Delete HTML
Htmlstring = RegEx. Replace (htmlstring, @" <(. [^>] *)> " , "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" ([\ R \ n]) [\ s] + " , "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" --> " , "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" <! --.* " , "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Quot | #34 ); " , " \ "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Amp | #38 ); " , " & " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (LT | #60 ); " , " < " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (GT | #62 ); " , " > " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & Amp; (nbsp | #160 ); " , " " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Iexcl | #161 ); " , " \ XA1 " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Cent | #162 ); " , " \ Xa2 " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Pound | #163 ); " , " \ Xa3 " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & (Copy | #169 ); " , " \ Xa9 " , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" & # (\ D + ); " , "" , Regexoptions. ignorecase );
Htmlstring = RegEx. Replace (htmlstring, @" ] *>; " , "" , Regexoptions. ignorecase );
Htmlstring. Replace ( " < " , "" );
Htmlstring. Replace ( " > " , "" );
Htmlstring. Replace ( " \ R \ n " , "" );
Htmlstring = Httpcontext. Current. server. htmlencode (htmlstring). Trim ();
Return Htmlstring;
}