. Net2.0 captures all links to a web page
Author: qingyueer
Home page:Http://blog.csdn.net/21aspnet/Time: 2007.4.18
This method is tested on major portals with a capture rate of 100%!
BackgroundCode:
Using system;
Using system. Data;
Using system. configuration;
Using system. Web;
Using system. Web. Security;
Using system. Web. UI;
Using system. Web. UI. webcontrols;
Using system. Web. UI. webcontrols. webparts;
Using system. Web. UI. htmlcontrols;
Using system. Text. regularexpressions;
Using system. net;
Using system. IO;
Using system. collections;
Public partial class _ default: system. Web. UI. Page
{
Protected void page_load (Object sender, eventargs E)
{
If (! Ispostback)
{
}
}
Protected void button#click (Object sender, eventargs E)
{
Textbox2.text = "";
String web_url = This. textbox1.text; // "http://blog.csdn.net/21aspnet"
String all_code = "";
Httpwebrequest all_coderequest = (httpwebrequest) webrequest. Create (web_url );
Webresponse all_coderesponse = all_coderequest.getresponse ();
Streamreader the_reader = new streamreader (all_coderesponse.getresponsestream ());
All_code = the_reader.readtoend ();
The_reader.close ();
Arraylist my_list = new arraylist ();
String P = @ "http: // ([\ W-] + \.) + [\ W-] + (/[\ W -./? % & =] *)? ";
RegEx Re = new RegEx (p, regexoptions. ignorecase );
Matchcollection MC = Re. Matches (all_code );
For (INT I = 0; I <= mc. Count-1; I ++)
{
Bool _ Foo = false;
String name = mc [I]. tostring ();
Foreach (string list in my_list)
{
If (name = List)
{
_ Foo = true;
Break;
}
} // Filter
If (! _ Foo)
{
Textbox2.text + = Name + "\ n ";
}
}
}
}
Front-end
<% @ Page Language = "C #" autoeventwireup = "true" codefile = "default. aspx. cs" inherits = "_ default" %>
<! Doctype HTML public "-// W3C // dtd xhtml 1.0 transitional // en" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<HTML xmlns = "http://www.w3.org/1999/xhtml">
<Head runat = "server">
<Title> capture all links on a web page </title>
</Head>
<Body>
<Form ID = "form1" runat = "server">
<Div>
<Asp: textbox id = "textbox1" runat = "server" width = "481px"> </ASP: textbox>
<Asp: button id = "button1" runat = "server" onclick = "button#click" text = "extract"/>
<Br/>
<Asp: textbox id = "textbox2" runat = "server" Height = "304px" textmode = "multiline" width = "524px"> </ASP: textbox> </div>
</Form>
</Body>
</Html>
Comments:
Essence: String P = @ http: // ([\ W-] + \.) + [\ W-] + (/[\ W -./? % & =] *)?;