Using System.Text.RegularExpressions
<summary>
Remove HTML tags
</summary>
<param name= "nohtml" > including HTML source code </param>
<returns> has been removed after the text </returns>
public static string nohtml (String htmlstring)
{
Delete Script
htmlstring = Regex.Replace (htmlstring,@ "<script[^>]*?>.*?</script>", "", regexoptions.ignorecase);
Delete HTML
htmlstring = Regex.Replace (htmlstring,@ "< (. [ ^>]*) > "," ", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "([\ r \ n]) [\s]+", "", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "--", "" ", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "<!--. *", "", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (quot| #34);", "\" ", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (amp| #38);", "&", Regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (lt| #60);", "<", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (gt| #62);", ">", Regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (nbsp| #160);", "", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (iexcl| #161);", "\xa1", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (cent| #162);", "\xa2", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (pound| #163);", "\xa3", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring,@ "& (copy| #169);", "\xa9", regexoptions.ignorecase);
htmlstring = Regex.Replace (htmlstring, @ "(\d+);", "", regexoptions.ignorecase);
Htmlstring.replace ("<", "");
Htmlstring.replace (">", "");
Htmlstring.replace ("\ r \ n", "");
Htmlstring=httpcontext.current.server.htmlencode (htmlstring). Trim ();
return htmlstring;
}
Write a static method
#region Removing HTML tags
<summary>
Remove HTML tags
</summary>
<param name= "Htmlstr" >HTMLStr</param>
public static string Parsetags (String htmlstr)
{
Return System.Text.RegularExpressions.Regex.Replace (Htmlstr, "<[^>]*>", "" ");
}
#endregion
#region Remove the image address from the text
<summary>
Remove the image address from the text
</summary>
<param name= "Htmlstr" >HTMLStr</param>
public static string Getimgurl (String htmlstr)
{
String str = string. Empty;
String spattern = @ "^]*>";
Regex r = new Regex (@ "]*\s*src\s*=\s* ([']?) (? <url>\s+) '? [^>]*> ",
regexoptions.compiled);
Match m = R.match (Htmlstr.tolower ());
if (m.success)
str = M.result ("${url}");
return str;
}
#endregion
How to remove all HTML markup from ASP