Convert HTML code to text

Source: Internet
Author: User
SRC: http://www.cnblogs.com/NoblePaul/archive/2004/09/25/46532.aspx
When capturing HTML pages, you need to filter out html Code , GET html Source code In text, there is a regular expression to solve this problem:
VB. NET

' ''-----------------------------------------------------------------------------
' ''<Summary>
' ''Remove all HTML tags
' ''</Summary>
' ''<Param name =" html "> HTML code </param>
' ''<Returns> </returns>
' ''<Remarks>
' ''</Remarks>
' ''<History>
' ''[Administrator] 2004-9-25 created
' ''</History>
' ''-----------------------------------------------------------------------------
Public   Function parsetags () Function Parsetags ( Byval Html As   String ) As   String
' Use regular expressions to recognize and remove all HTML tags, and return the text filtered out by HTML tags.
Dim Objregex As System. Text. regularexpressions. RegEx
Return Objregex. Replace (HTML, " <[^>] *> " , "" )
End Function

C #

/**/ ///   <Summary>
/// Remove all HTML tags
///   </Summary>
///   <Param name = "html"> HTML source code </Param>
///   <Returns> </returns>
Public   String Parsetags ( String Html)
{
ReturnSystem. Text. regularexpressions. RegEx. Replace (HTML,"<[^>] *>","");
}

A simple example is provided as follows:
VB. NET

Private   Sub page_load () Sub Page_load ( Byval Sender As System. Object , Byval E As System. eventargs) Handles   Mybase . Load
Dim Ostringbuilder As System. Text. stringbuilder

Ostringbuilder =   New System. Text. stringbuilder
Ostringbuilder. append (controlchars. CRLF +   " <! Doctype HTML public ""-// W3C // dtd html 4.0 transitional // en ""> " )
Ostringbuilder. append (controlchars. CRLF +   " <HTML> " )
Ostringbuilder. append (controlchars. CRLF +   " <Head> " )
Ostringbuilder. append (controlchars. CRLF +   " <Title> webform1 </title> " )
Ostringbuilder. append (controlchars. CRLF +   " <Meta name = "" generator "" content = "" Microsoft Visual Studio. NET 7.1 "> " )
Ostringbuilder. append (controlchars. CRLF +   " <Meta name = "" code_language "" content = "" Visual Basic. Net 7.1 "> " )
Ostringbuilder. append (controlchars. CRLF +   " <Meta name = "" vs_defaultclientscript "" content = "" JavaScript ""> " )
Ostringbuilder. append (controlchars. CRLF +   " <Meta name = "" vs_targetschema "" content = "" http://schemas.microsoft.com/intellisense/ie5 ""> " )
Ostringbuilder. append (controlchars. CRLF +   " </Head> " )
Ostringbuilder. append (controlchars. CRLF +   " <Body ms_positioning = "" gridlayout ""> " )
Ostringbuilder. append (controlchars. CRLF +   " <Form ID = "" form1 "" method = "" Post "" runat = "" server ""> " )
Ostringbuilder. append (controlchars. CRLF +   " <Font face = "" ""> test </font> " )
Ostringbuilder. append (controlchars. CRLF +   " </Form> " )
Ostringbuilder. append (controlchars. CRLF +   " </Body> " )
Ostringbuilder. append (controlchars. CRLF +   " </Html> " )
Response. Write (Parsetags (ostringbuilder. tostring ))
End sub

C # Private   Void Page_load ( Object Sender, system. eventargs E)
{
System. Text. stringbuilder ostringbuilder;
Ostringbuilder =   New System. Text. stringbuilder ();
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <! Doctype HTML public "-// W3C // dtd html 4.0 transitional // en"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <HTML> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Head> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Title> webform1 </title> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Meta name = "generator" content = "Microsoft Visual Studio. NET 7.1"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Meta name = "code_language" content = "Visual Basic. Net 7.1"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Meta name = "vs_defaultclientscript" content = "JavaScript"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Meta name = "vs_targetschema" content = "http://schemas.microsoft.com/intellisense/ie5"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " </Head> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Body ms_positioning = "gridlayout"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Form ID = "form1" method = "Post" runat = "server"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " <Font face = ""> test </font> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " </Form> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " </Body> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF +   " </Html> " );
Response. Write (parsetags (ostringbuilder. tostring ()));
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.