Catalog: Information collection Entry Series catalogue
The following record is my own collation of the C # Request page Core class, mainly has the following several methods
1.HttpWebRequest GET request for page HTML
2.HttpWebRequest POST request for page HTML
3. Demo login to get cookie content
4. Impersonation Login Get Cookie string
5. Settings for Agents
6. Use WebBrowser to get the page JS generated
7. Set cookies for WebBrowser, simulate login
8. Use of each method Demo+demo download
HttpWebRequest GET request for page HTML
/// <summary> ///Get request HTML for the page/// </summary> /// <param name= "url" >the URL to get</param> /// <param name= "proxy" >proxy, not set to NULL, or the request is slow every time the agent is read</param> /// <param name= "cookies" >The cookies required by the website</param> /// <param name= "Timeout" >Timeout period</param> /// <returns>HTML after a page request</returns> Public Static stringCrawl (stringURL, WebProxy proxy, Cookiecontainer cookie,intTimeout =10000) { stringresult =string. Empty; HttpWebRequest Request=NULL; WebResponse response=NULL; StreamReader StreamReader=NULL; Try{Request=(HttpWebRequest) httpwebrequest.create (URL); Request. Proxy=proxy; Request. Timeout=timeout; Request. AllowAutoRedirect=true; Request. Cookiecontainer=cookies; Response=(HttpWebResponse) request. GetResponse (); StreamReader=NewStreamReader (response. GetResponseStream (), Encoding.UTF8); Result=Streamreader.readtoend (); } Catch(Exception ex) {Throwex; } finally { if(Request! =NULL) {request. Abort (); } if(Response! =NULL) {Response. Close (); } if(StreamReader! =NULL) {streamreader.dispose (); } } returnresult; }
HttpWebRequest POST request for page HTML
/// <summary> ///POST request get page/// </summary> /// <param name= "url" >the URL to get</param> /// <param name= "PostData" >the data string for the post, such as Id=1&name=test</param> /// <param name= "proxy" >Agent</param> /// <param name= "cookies" >Coolie</param> /// <param name= "Timeout" >timed out</param> /// <returns></returns> Public Static stringCrawl (stringUrlstringPostdata,webproxy Proxy, Cookiecontainer cookie,intTimeout =10000) { stringresult =string. Empty; HttpWebRequest Request=NULL; WebResponse response=NULL; StreamReader StreamReader=NULL; Try{Request=(HttpWebRequest) httpwebrequest.create (URL); Request. Proxy=proxy; Request. Timeout=timeout; Request. AllowAutoRedirect=true; Request. Cookiecontainer=cookies; byte[] bs =Encoding.ASCII.GetBytes (postdata); stringResponseData =String.Empty; Request. Method="POST"; Request. ContentType="application/x-www-form-urlencoded"; Request. ContentLength=BS. Length; using(Stream Reqstream =request. GetRequestStream ()) {Reqstream.write (BS,0, BS. Length); Reqstream.close (); } Response=(HttpWebResponse) request. GetResponse (); StreamReader=NewStreamReader (response. GetResponseStream (), Encoding.UTF8); Result=Streamreader.readtoend (); } Catch(Exception ex) {Throwex; } finally { if(Request! =NULL) {request. Abort (); } if(Response! =NULL) {Response. Close (); } if(StreamReader! =NULL) {streamreader.dispose (); } } returnresult; }
Demo Login Get Cookie Content
/// <summary> ///get a cookie based on the mock request page/// </summary> /// <param name= "url" >simulated URLs</param> /// <returns>Cookies</returns> Public StaticCookiecontainer GetCookie (stringURL, WebProxy proxy,intTimeout =10000) {HttpWebRequest Request=NULL; HttpWebResponse Response=NULL; Try{cookiecontainer cc=NewCookiecontainer (); Request=(HttpWebRequest) httpwebrequest.create (URL); Request. Proxy=proxy; Request. Timeout=timeout; Request. AllowAutoRedirect=true; Request. Cookiecontainer=cc; Response=(HttpWebResponse) request. GetResponse (); Response. Cookies=request. Cookiecontainer.getcookies (Request. RequestUri); returncc; } Catch(Exception ex) {Throwex; } finally { if(Request! =NULL) {request. Abort (); } if(Response! =NULL) {Response. Close (); } } }
Impersonation Login Get Cookie string
/// <summary> ///get a cookie string that WebBrowser can use/// </summary> /// <param name= "url" ></param> /// <param name= "proxy" ></param> /// <param name= "Timeout" ></param> /// <returns></returns> Public Static stringGetcookiestring (stringURL, WebProxy proxy,intTimeout =10000) {HttpWebRequest Request=NULL; HttpWebResponse Response=NULL; Try{cookiecontainer cc=NewCookiecontainer (); Request=(HttpWebRequest) httpwebrequest.create (URL); Request. Proxy=proxy; Request. Timeout=timeout; Request. AllowAutoRedirect=true; Request. Cookiecontainer=cc; Response=(HttpWebResponse) request. GetResponse (); Response. Cookies=request. Cookiecontainer.getcookies (Request. RequestUri); stringStrcrook =request. Cookiecontainer.getcookieheader (Request. RequestUri); returnStrcrook; } Catch(Exception ex) {Throwex; } finally { if(Request! =NULL) {request. Abort (); } if(Response! =NULL) {Response. Close (); } } }
Settings for Agents
/// <summary> ///Create an agent/// </summary> /// <param name= "Port" >Proxy Port</param> /// <param name= "user" >User name</param> /// <param name= "password" >Password</param> /// <returns></returns> Public StaticWebProxy Createporxy (stringPortstringUserstringpassword) {WebProxy Proxy=NewWebProxy (); Proxy. Address=NewUri (port); Proxy. Credentials=Newnetworkcredential (user, password); returnproxy; }
Using WebBrowser to get the page JS generates
Set cookies for WebBrowser, impersonate a login
C # Getting the Web Information core approach (Getting started)