SRC: http://www.cnblogs.com/NoblePaul/archive/2004/09/25/46532.aspx
When capturing HTML pages, you need to filter out html Code , GET html Source code In text, there is a regular expression to solve this problem:
VB. NET
' ''-----------------------------------------------------------------------------
' ''<Summary>
' ''Remove all HTML tags
' ''</Summary>
' ''<Param name =" html "> HTML code </param>
' ''<Returns> </returns>
' ''<Remarks>
' ''</Remarks>
' ''<History>
' ''[Administrator] 2004-9-25 created
' ''</History>
' ''-----------------------------------------------------------------------------
Public Function parsetags () Function Parsetags ( Byval Html As String ) As String
' Use regular expressions to recognize and remove all HTML tags, and return the text filtered out by HTML tags.
Dim Objregex As System. Text. regularexpressions. RegEx
Return Objregex. Replace (HTML, " <[^>] *> " , "" )
End Function
C #
/**/ /// <Summary>
/// Remove all HTML tags
/// </Summary>
/// <Param name = "html"> HTML source code </Param>
/// <Returns> </returns>
Public String Parsetags ( String Html)
{
ReturnSystem. Text. regularexpressions. RegEx. Replace (HTML,"<[^>] *>","");
}
A simple example is provided as follows:
VB. NET
Private Sub page_load () Sub Page_load ( Byval Sender As System. Object , Byval E As System. eventargs) Handles Mybase . Load
Dim Ostringbuilder As System. Text. stringbuilder
Ostringbuilder = New System. Text. stringbuilder
Ostringbuilder. append (controlchars. CRLF + " <! Doctype HTML public ""-// W3C // dtd html 4.0 transitional // en ""> " )
Ostringbuilder. append (controlchars. CRLF + " <HTML> " )
Ostringbuilder. append (controlchars. CRLF + " <Head> " )
Ostringbuilder. append (controlchars. CRLF + " <Title> webform1 </title> " )
Ostringbuilder. append (controlchars. CRLF + " <Meta name = "" generator "" content = "" Microsoft Visual Studio. NET 7.1 "> " )
Ostringbuilder. append (controlchars. CRLF + " <Meta name = "" code_language "" content = "" Visual Basic. Net 7.1 "> " )
Ostringbuilder. append (controlchars. CRLF + " <Meta name = "" vs_defaultclientscript "" content = "" JavaScript ""> " )
Ostringbuilder. append (controlchars. CRLF + " <Meta name = "" vs_targetschema "" content = "" http://schemas.microsoft.com/intellisense/ie5 ""> " )
Ostringbuilder. append (controlchars. CRLF + " </Head> " )
Ostringbuilder. append (controlchars. CRLF + " <Body ms_positioning = "" gridlayout ""> " )
Ostringbuilder. append (controlchars. CRLF + " <Form ID = "" form1 "" method = "" Post "" runat = "" server ""> " )
Ostringbuilder. append (controlchars. CRLF + " <Font face = "" ""> test </font> " )
Ostringbuilder. append (controlchars. CRLF + " </Form> " )
Ostringbuilder. append (controlchars. CRLF + " </Body> " )
Ostringbuilder. append (controlchars. CRLF + " </Html> " )
Response. Write (Parsetags (ostringbuilder. tostring ))
End sub
C # Private Void Page_load ( Object Sender, system. eventargs E)
{
System. Text. stringbuilder ostringbuilder;
Ostringbuilder = New System. Text. stringbuilder ();
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <! Doctype HTML public "-// W3C // dtd html 4.0 transitional // en"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <HTML> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Head> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Title> webform1 </title> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Meta name = "generator" content = "Microsoft Visual Studio. NET 7.1"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Meta name = "code_language" content = "Visual Basic. Net 7.1"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Meta name = "vs_defaultclientscript" content = "JavaScript"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Meta name = "vs_targetschema" content = "http://schemas.microsoft.com/intellisense/ie5"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " </Head> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Body ms_positioning = "gridlayout"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Form ID = "form1" method = "Post" runat = "server"> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " <Font face = ""> test </font> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " </Form> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " </Body> " );
Ostringbuilder. append (Microsoft. VisualBasic. controlchars. CRLF + " </Html> " );
Response. Write (parsetags (ostringbuilder. tostring ()));
}