Regular
A using system.text.regularexpressions;//requires a reference
Using regular expressions to remove content between "<" and ">"
private string Stripht (string strhtml)
{
Regex regex=new Regex ("<.+?>", regexoptions.ignorecase);
String Stroutput=regex. Replace (strHTML, "");
return stroutput;
}
Method Two (I wonder why this method occupies cpu100%)
public static string drophtml (String strhtml)
{
string [] Aryreg ={
@ "<script[^>]*?>.*?</script>",
@ "< (\/\s*)?!? ((\w+:) \w+) (\w+ (\s*=?\s* (["" "]) (\\[" "" tbnr]|[ ^\7] *?\7|\w+) |. {0}) | \s) *? (\/\s*)?> ",
@ "([\ r]) [\s]+",
@ "& (quot| #34);",
@ "& (amp| #38);",
@ "& (lt| #60);",
@ "& (gt| #62);",
@ "& (nbsp| #160);",
@ "& (iexcl| #161);",
@ "& (cent| #162);",
@ "& (pound| #163);",
@ "& (copy| #169);",
@ "&# (\d+);",
@ "-->",
@ "<!--. *"
};
string [] Aryrep = {
"",
"",
"",
"\"",
"&",
"<",
">",
" ",
"\xa1",//CHR (161),
"\xa2",//CHR (162),
"\xa3",//CHR (163),
"\xa9",//CHR (169),
"",
"\ r",
""
};
String Newreg =aryreg[0];
String stroutput=strhtml;
for (int i = 0;i<aryreg.length;i++)
{
Regex regex = new Regex (aryreg[i],regexoptions.ignorecase);
Stroutput = Regex. Replace (Stroutput,aryrep[i]);
}
Stroutput.replace ("<", "");
Stroutput.replace (">", "");
Stroutput.replace ("R", "");
return stroutput;
}