To remove all the HTML tags except the paragraph tags, as long as the text on the page is like the effect of sticking the code to notepad and removing the Code such as links. You can try it.
Public static string delhtml (string htmlstring) // remove html { # Region // Delete a style Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "<style> [/S] * <\/style>", "", system. text. regularexpressions. regexoptions. ignorecase ); // Delete the word style begin Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "Normal [^>] *? MSO "," ", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "Normal [^>] *? MSO "," ", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "V [^>] *?} "," ", System. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "O [^>] *? Beha "," ", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "W [^>] *? Beha "," ", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ ". Shape [^>] *? Beha "," ", system. Text. regularexpressions. regexoptions. ignorecase ); // Delete the word style end // Delete the script Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "<SCRIPT [^>] *?>. *? </SCRIPT> "," ", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "<(. [^>] *)> "," ", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "([\ r \ n]) [\ s] +", "", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "-->", "", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "<! --. * "," ", System. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (quot | #34);", "\" ", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (Amp | #38);", "&", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (LT | #60);", "<", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (GT | #62);", ">", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "& (nbsp | #160);", "", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (iexcl | #161);", "\ xA1", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (cent |# 162);", "\ xa2", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (pound | #163);", "\ xa3", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. text. regularexpressions. regEx. replace (htmlstring, @ "& (copy | #169);", "\ xa9", system. text. regularexpressions. regexoptions. ignorecase ); Htmlstring = system. Text. regularexpressions. RegEx. Replace (htmlstring, @ "& # (\ D +);", "", system. Text. regularexpressions. regexoptions. ignorecase ); Htmlstring = htmlstring. Replace ("<",""); Htmlstring = htmlstring. Replace ("> ",""); Htmlstring = htmlstring. Replace ("\ r \ n ",""); # Endregion // Htmlstring = RegEx. Replace (htmlstring, @ "[^ 1-marker]", ""); // only Chinese characters are reserved Return htmlstring; }
Input the original data when calling. The returned data is removed.
|