. Net2.0 captures all links to a web page
Author: qingyueer
Home page:Http://blog.csdn.net/21aspnet/Time: 2007.4.18
This method is tested on major portals with a capture rate of 100%!
Background code:
Using System;
Using System. Data;
Using System. Configuration;
Using System. Web;
Using System. Web. Security;
Using System. Web. UI;
Using System. Web. UI. WebControls;
Using System. Web. UI. WebControls. WebParts;
Using System. Web. UI. HtmlControls;
Using System. Text. RegularExpressions;
Using System. Net;
Using System. IO;
Using System. Collections;
Public partial class _ Default: System. Web. UI. Page
{
Protected void Page_Load (object sender, EventArgs e)
{
If (! IsPostBack)
{
}
}
Protected void button#click (Object sender, eventargs E)
{
Textbox2.text = "";
String web_url = This. textbox1.text; // "http://blog.csdn.net/21aspnet"
String all_code = "";
HttpWebRequest all_codeRequest = (HttpWebRequest) WebRequest. Create (web_url );
Webresponse all_coderesponse = all_coderequest.getresponse ();
Streamreader the_reader = new streamreader (all_coderesponse.getresponsestream ());
All_code = the_Reader.ReadToEnd ();
The_Reader.Close ();
ArrayList my_list = new ArrayList ();
String p = @ "http: // ([/w-] +/.) + [/w-] + (/[/w -./? % & =] *)? ";
Regex re = new Regex (p, RegexOptions. IgnoreCase );
MatchCollection mc = re. Matches (all_code );
For (int I = 0; I <= mc. Count-1; I ++)
{
Bool _ foo = false;
String name = mc [I]. ToString ();
Foreach (string list in my_list)
{
If (name = list)
{
_ Foo = true;
Break;
}
} // Filter
If (! _ Foo)
{
TextBox2.Text + = name + "/n ";
}
}
}
}
Front-end
<% @ Page Language = "C #" AutoEventWireup = "true" CodeFile = "Default. aspx. cs" Inherits = "_ Default" %>
<! DOCTYPE html PUBLIC "-// W3C // dtd xhtml 1.0 Transitional // EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<Html xmlns = "http://www.w3.org/1999/xhtml">
<Head runat = "server">
<Title> capture all links on a web page </title>
</Head>
<Body>
<Form ID = "form1" runat = "server">
<Div>
<Asp: textbox id = "textbox1" runat = "server" width = "481px"> </ASP: textbox>
<Asp: button id = "button1" runat = "server" onclick = "button#click" text = "extract"/>
<Br/>
<Asp: textbox id = "textbox2" runat = "server" Height = "304px" textmode = "multiline" width = "524px"> </ASP: textbox> </div>
</Form>
</Body>
</Html>