Write it down first, and make it a backup!
/// <summary>Remove HTML Tags/// /// </summary> /// <param name= "htmlstring" >including the HTML source code</param> /// <returns>text that has been removed</returns> Public Static stringGetnohtmlstring (stringhtmlstring) { //Delete Scripthtmlstring = Regex.Replace (htmlstring,@"<script[^>]*?>.*?</script>","", regexoptions.ignorecase); //Delete HTMLhtmlstring = Regex.Replace (htmlstring,@"< (. [ ^>]*) >","", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"([\ r \ n]) [\s]+","", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@" -","", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"<!--. *","", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (quot| #34);","\"", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (amp| #38);","&", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (lt| #60);","<", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (gt| #62);",">", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (nbsp| #160);"," ", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (iexcl| #161);","\xa1", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (cent| #162);","\xa2", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (pound| #163);","\xa3", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"& (copy| #169);","\xa9", regexoptions.ignorecase); Htmlstring= Regex.Replace (htmlstring,@"(\d+);","", regexoptions.ignorecase); Htmlstring.replace ("<",""); Htmlstring.replace (">",""); Htmlstring.replace ("\ r \ n",""); Htmlstring=HttpContext.Current.Server.HtmlEncode (htmlstring). Trim (); returnhtmlstring; }
/// <summary>Gets the displayed string, which can display HTML tags, but filters out dangerous HTML tags, such as iframe,script.
///
/// </summary>
/// <param name= "str" >Unhandled string</param>
/// <returns></returns>
PublicStaticstringGetsafehtmlstring (stringStr
{
str = Regex.Replace (str,@"<applet[^>]*?>.*?</applet>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<body[^>]*?>.*?</body>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<embed[^>]*?>.*?</embed>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<frame[^>]*?>.*?</frame>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<script[^>]*?>.*?</script>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<frameset[^>]*?>.*?</frameset>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<iframe[^>]*?>.*?</iframe>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<style[^>]*?>.*?</style>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<layer[^>]*?>.*?</layer>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<link[^>]*?>.*?</link>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<ilayer[^>]*?>.*?</ilayer>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<meta[^>]*?>.*?</meta>","", regexoptions.ignorecase);
str = Regex.Replace (str,@"<object[^>]*?>.*?</object>","", regexoptions.ignorecase);
returnStr }
Asp. Two ways to filter HTML strings in net