I used regular expressions. For more information, see here. Haha
The code is still rough. For example, the Code has not yet been implemented. The code is written to XML and then displayed.
There are still some things to filter out. Click 1.1. Record it first, so that you do not forget it later.
Default. aspx
<% @ Page Language = "C #" autoeventwireup = "true" codefile = "default. aspx. cs" inherits = "_ default" validaterequest = "false" %>
<! Doctype HTML public "-// W3C // dtd xhtml 1.0 transitional // en" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<HTML xmlns = "http://www.w3.org/1999/xhtml">
<Head runat = "server">
<Title> untitled page </title>
</Head>
<Body>
<Form ID = "aspbuffer" method = post runat = "server">
<Div align = "center" style = "font-weight: bold"> obtain the source code of any webpage </div>
<Br/>
<Div>
<Asp: textbox id = "urltext" runat = "server" style = "Z-index: 100; left: 9px; position: absolute;
Top: 47px "width =" 400px "> </ASP: textbox>
& Nbsp;
<Asp: button id = "webrequestbutton" runat = "server" text = "use webrequest to get" style = "Z-index: 101; left: 444px; position: absolute; top: 45px "onclick =" webrequestbutton_click "> </ASP: button>
& Nbsp;
<Asp: textbox id = "contenthtml" runat = "server" width = "100%" Height = "360px" textmode = "multiline" style = "Z-index: 102; left: 3px; position: absolute; top: 92px ">
</ASP: textbox>
<Asp: button id = "geturl" runat = "server" onclick = "geturl_click" style = "Z-index: 104;
Left: 675px; position: absolute; top: 45px "text =" Get webpage link "/>
</Div>
</Form>
</Body>
</Html>
Default. aspx. CS
Using system;
Using system. Data;
Using system. configuration;
Using system. Web;
Using system. Web. Security;
Using system. Web. UI;
Using system. Web. UI. webcontrols;
Using system. Web. UI. webcontrols. webparts;
Using system. Web. UI. htmlcontrols;
Using system. IO;
Using system. net;
Using system. text;
Using system. Text. regularexpressions;
Using system. collections;
Public partial class _ default: system. Web. UI. Page
{
Public String urlpage = "";
Protected void page_load (Object sender, eventargs E)
{
}
Protected void webrequestbutton_click (Object sender, eventargs E)
{
Urlpage = urltext. text;
Webrequest request = webrequest. Create (urlpage );
Webresponse response = request. getresponse ();
Stream resstream = response. getresponsestream ();
Streamreader sr = new streamreader (resstream, system. Text. encoding. Default );
Contenthtml. Text = server. htmlencode (Sr. readtoend ());
Resstream. Close ();
Sr. Close ();
Response. Close ();
}
Protected void geturl_click (Object sender, eventargs E)
{
Arraylist alllinks;
Alllinks = gethyperlinks (contenthtml. Text. tostring ());
Contenthtml. Text = "";
String strtemp = "";
For (Int J = 0; j <alllinks. Count-1; j ++)
{
Strtemp + = alllinks [J]. tostring ();
}
Contenthtml. Text = strtemp;
}
Static arraylist gethyperlinks (string htmlcode)
{
Arraylist myal = new arraylist ();
String strregex = @ "http: // s +/./S + ";
RegEx Rg = new RegEx (strregex, regexoptions. ignorecase );
Matchcollection M = RG. Matches (htmlcode );
For (INT I = 0; I <= M. Count-1; I ++)
{
Bool rep = false;
String strnew = m [I]. tostring ();
// Filter duplicate URLs
Foreach (string STR in myal)
{
If (strnew = Str)
{
Rep = true;
Break;
}
}
If (! Rep) myal. Add (strnew );
}
Myal. Sort ();
Return myal;
}
}