Protected string str = "<table> <tr> <td> sdasasdsdd </td> </tr> </table> <br> <p> sds </p> aaassss <br> say yes ";
Protected void Page_Load (object sender, EventArgs e)
{
// String regexstr = @ "<[^>] *>"; // remove all tags
// @ "<Script [^>] *?>. *? </Script> "// remove all scripts and delete the intermediate part.
// String regexstr = @ "] *>"; // remove the regular expression of the image.
// String regexstr = @ "<(?! Br). *?> "; // Remove all tags with only br left
// String regexstr = @ "<table [^>] *?>. *? </Table> "; // remove all contents in the table.
String regexstr = @ "<(?! Img | br | p |/p). *?> "; // Remove all tags, with only img, br, and p left.
Str = Regex. Replace (str, regexstr, string. Empty, RegexOptions. IgnoreCase );
}
ASP. NET removes all HTML tags <type = "text/javascript"> function StorePage () {d = document; t = d. selection? (D. selection. type! = 'None '? D. selection. createRange (). text: '') :( d. getSelection? D. getSelection (): ''); void (keyit = window. open ('HTTP: // www.365key.com/storeit.aspx? T = '+ escape (d. title) + '& u =' + escape (d. location. href) + '& c =' + escape (t), 'keyit', 'scrollbars = no, width = 475, height = 575, left = 75, top = 20, status = no, resizable = yes '); keyit. focus ();}
Note: using System. Text. RegularExpressions is required first;
/** // <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "NoHTML"> including the source code of HTML </param>
/// <Returns> removed text </returns>
Public static string NoHTML (string Htmlstring)
{
// Delete the script
Htmlstring = Regex. Replace (Htmlstring, @ "<script [^>] *?>. *? </Script> "," ", RegexOptions. IgnoreCase );
// Delete HTML
Htmlstring = Regex. Replace (Htmlstring, @ "<(. [^>] *)>", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "([\ r \ n]) [\ s] +", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "-->", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "<! --. * "," ", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (quot | #34);", "\" ", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (amp | #38);", "&", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (lt | #60);", "<", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (gt | #62);", ">", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (nbsp | #160);", "", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (iexcl | #161);", "\ xa1", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (cent | #162);", "\ xa2", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (pound | #163);", "\ xa3", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& (copy | #169);", "\ xa9", RegexOptions. IgnoreCase );
Htmlstring = Regex. Replace (Htmlstring, @ "& # (\ d +);", "", RegexOptions. IgnoreCase );
Htmlstring. Replace ("<","");
Htmlstring. Replace ("> ","");
Htmlstring. Replace ("\ r \ n ","");
Htmlstring = HttpContext. Current. Server. HtmlEncode (Htmlstring). Trim ();
Return Htmlstring;
}
/** // Extract the C # function of the text in HTML code
/// <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "strHtml"> including the source code of HTML </param>
/// <Returns> removed text </returns>
Using System;
Using System. Text. RegularExpressions;
Public class StripHTMLTest {
Public static void Main (){
String s = StripHTML ("<HTML> <HEAD> <TITLE> China shilong Information Platform </TITLE> </HEAD> <BODY> faddfs dragon Information Platform </BODY> </ HTML> ");
Console. WriteLine (s );
}
Public static string StripHTML (string strHtml ){
String [] aryReg = {
@ "<Script [^>] *?>. *? </Script> ",
@ "<(\/\ S *)?!? (\ W + :)? \ W +) (\ w + (\ s * =? \ S * (["" ']) (bytes) *? \ 7 | \ w +) |. {0}) | \ s )*? (\/\ S *)?> ",
@ "([\ R \ n]) [\ s] + ",
@ "& (Quot | #34 );",
@ "& (Amp | #38 );",
@ "& (Lt | #60 );",
@ "& (Gt | #62 );",
@ "& (Nbsp | #160 );",
@ "& (Iexcl | #161 );",
@ "& (Cent | #162 );",
@ "& (Pound | #163 );",
@ "& (Copy | #169 );",
@ "& # (\ D + );",
@ "--> ",
@ "<! --. * \ N"
};
String [] aryRep = {
"",
"",
"",
"\"",
"&",
"<",
"> ",
"",
"\ Xa1", // chr (161 ),
"\ Xa2", // chr (162 ),
"\ Xa3", // chr (163 ),
"\ Xa9", // chr (169 ),
"",
"\ R \ n ",
""
};
String newReg = aryReg [0];
String strOutput = strHtml;
For (int I = 0; I <aryReg. Length; I ++ ){
Regex regex = new Regex (aryReg [I], RegexOptions. IgnoreCase );
StrOutput = regex. Replace (strOutput, aryRep [I]);
}
StrOutput. Replace ("<","");
StrOutput. Replace ("> ","");
StrOutput. Replace ("\ r \ n ","");
Return strOutput;
}
}
Write a static method
Remove HTML tags # Remove HTML tags from region
/** // <Summary>
/// Remove HTML tags
/// </Summary>
/// <Param name = "HTMLStr"> HTMLStr </param>
Public static string ParseTags (string HTMLStr)
{
Return System. Text. RegularExpressions. Regex. Replace (HTMLStr, "<[^>] *> ","");
}
# Endregion
Retrieve the image address in the text # region retrieve the image address in the text
/** // <Summary>
/// Retrieve the image address in the text
/// </Summary>
/// <Param name = "HTMLStr"> HTMLStr </param>
Public static string GetImgUrl (string HTMLStr)
{
String str = string. Empty;
String sPattern = @ "^ ] *> ";
Regex r = new Regex (@ "] * \ s * src \ s * = \ s * ([']?) (? <Url> \ S + )'? [^>] *> ",
RegexOptions. Compiled );
Match m = r. Match (HTMLStr. ToLower ());
If (m. Success)
Str = m. Result ("$ {url }");
Return str;
}
# Endregion