using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
// First you need to import the namespace
using System.Text.RegularExpressions;
namespace WindowsFormsApplication1
{
public class Class1
{
/// <summary>
/// Convert html text to text content method NoHTML
/// </ summary>
/// <param name = "Htmlstring"> HTML text value </ param>
/// <returns> </ returns>
public string NoHTML (string Htmlstring)
{
// Delete script
Htmlstring = Regex.Replace (Htmlstring, @ "<script [^>] *?>. *? </ Script>", "", RegexOptions.IgnoreCase);
// Delete HTML
Htmlstring = Regex.Replace (Htmlstring, @ "<(. [^>] *)>", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "([/ r / n]) [/ s] +", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "->", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "<!-. *", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (quot | # 34);", "/", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (amp | # 38);", "&", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (lt | # 60);", "<", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (gt | # 62);", ">", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (nbsp | # 160);", "", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (iexcl | # 161);", "/ xa1", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (cent | # 162);", "/ xa2", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (pound | # 163);", "/ xa3", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& (copy | # 169);", "/ xa9", RegexOptions.IgnoreCase);
Htmlstring = Regex.Replace (Htmlstring, @ "& # (/ d +);", "", RegexOptions.IgnoreCase);
// Replace the <and> tags
Htmlstring.Replace ("<", "");
Htmlstring.Replace (">", "");
Htmlstring.Replace ("/ r / n", "");
// Return the string with html tags removed
return Htmlstring;
}
}
}
Simple version:
public string NoHtml(string html)
{ string StrNohtml = System.Text.RegularExpressions.Regex.Replace(html, "<[^>]+>", "");
StrNohtml = System.Text.RegularExpressions.Regex.Replace(StrNohtml, "&[^;]+;", ""); return StrNohtml;
}
Use C # to convert HTML text to normal text, removing all HTML tags (go)