/// <summary>Method One: Compare recommendations///using HttpWebRequest to obtain the Web source///Useful for Web pages with BOMs, no matter what code is correctly identified/// </summary> /// <param name= "url" >Web Address "</param> /// <returns>return to Web page source file</returns> Public Static stringGetHtmlSource2 (stringURL) { //Working with Content stringHTML =""; HttpWebRequest Request=(HttpWebRequest) webrequest.create (URL); Request. Accept="*/*";//Accept arbitrary filesRequest. UserAgent ="mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.2;. NET CLR 1.1.4322)";//simulate using IE in browsinghttp://www.52mvc.comRequest. AllowAutoRedirect =true;//whether to allow 302//request. Cookiecontainer = new Cookiecontainer ();//Cookie Container,Request. Referer = URL;//references to the current pageHttpWebResponse response =(HttpWebResponse) request. GetResponse (); Stream Stream=Response. GetResponseStream (); StreamReader Reader=NewStreamReader (stream, Encoding.default); HTML=Reader. ReadToEnd (); Stream. Close (); returnhtml; } //Method Two: Public Static stringGETHTTPDATA2 (stringUrl) { stringSexception =NULL; stringSRSLT =NULL; WebResponse Owebrps=NULL; WebRequest Owebrqst=WebRequest.Create (URL); Owebrqst.timeout=50000; Try{Owebrps=Owebrqst.getresponse (); } Catch(WebException e) {sexception=e.message.tostring (); } Catch(Exception e) {sexception=e.tostring (); } finally { if(Owebrps! =NULL) {StreamReader OSTREAMRD=NewStreamReader (Owebrps.getresponsestream (), Encoding.GetEncoding ("Utf-8")); Srslt=Ostreamrd.readtoend (); Ostreamrd.close (); Owebrps.close (); } } returnsrslt; } /// <summary>Method Three:/// /// </summary> /// <param name= "url" >/Address of the website to be visited</param> /// <param name= "charsets" >The encoding of the target page, if it is passed null or "", then the encoding of the page is automatically parsed</param> /// <returns></returns> Public Static stringGethtml (stringUrlparams string[] charsets) { Try { stringCharSet =NULL; if(Charsets.length = =1) {CharSet= charsets[0]; } WebClient mywebclient=NewWebClient ();//Create an WebClient instance mywebclient//to be aware of://Some Web pages may not come down, for a variety of reasons, such as the need for cookies, coding problems, etc.//this is about specific problems, such as adding cookies to the head.//WebClient. Headers.add ("Cookie", cookie); //this may require some overloaded methods. Just write it if you need it.//Gets or sets the network credentials that are used to authenticate requests to Internet resources.Mywebclient.credentials =CredentialCache.DefaultCredentials; //If the server wants to verify the user name, the password//networkcredential mycred = new NetworkCredential (struser, strpassword); //mywebclient.credentials = mycred; //downloads data from a resource and returns a byte array. (add @ Because there is a "/" symbol in the middle of the URL) byte[] Mydatabuffer =mywebclient.downloaddata (URL); stringStrwebdata =Encoding.Default.GetString (Mydatabuffer); //Get page character encoding description informationMatch Charsetmatch = Regex.match (Strwebdata,"<meta ([^<]*) charset= ([^<]*)", Regexoptions.ignorecase |regexoptions.multiline); stringWebcharset = charsetmatch.groups[2]. Value; if(CharSet = =NULL|| CharSet = ="") CharSet=Webcharset; if(CharSet! =NULL&& CharSet! =""&& encoding.getencoding (charSet)! =encoding.default) {strwebdata=encoding.getencoding (CharSet). GetString (Mydatabuffer); } Else{strwebdata= Encoding.GetEncoding ("Utf-8"). GetString (Mydatabuffer); } returnStrwebdata; } Catch(Exception e) {return ""; } }
Three types of ASP. NET crawl Web page source code