<summary> method One: Comparative recommendation /// use HttpWebRequest to obtain the Web source /// is effective for Web pages with BOMs, no matter what codes are correctly identified /// </summary> /// < Param name= "url" > Web address " </param> /// <returns> return to Web page source file </returns> public static String gethtmlsource2 (String url) { //Processing Content string html = ""; HttpWebRequest request = (HttpWebRequest) webrequest.create (URL); &Nbsp; request. accept = "*/*"; //Accept any file request. useragent = "mozilla/4.0 (compatible; msie 6.0; windows nt 5.2; . net clr 1.1.4322) "; // simulation using IE in browsing request. allowautoredirect = true;//whether to allow 302 //request. Cookiecontainer = new cookiecontainer ();//cookie container, request. referer = url; //references to the current page HttpWebResponse response = (HttpWebResponse) request. GetResponse (); stream stream = response. GetrespoNsestream (); streamreader reader = new streamreader (Stream, encoding.default); html = reader. ReadToEnd (); stream. Close (); return html; }
//Method Two: public static string gethttpdata2 (String url) { string sException = null; string srslt = null; webresponse owebrps = null; webrequest owebrqst = webrequest.create (URL); Owebrqst.timeout = 50000; try { &nbsP; owebrps = owebrqst.getresponse (); } catch (webexception e) { sexception = e.message.tostring (); } catch (exception e) { sexception = e.tostring (); } finally { if (owebrps != null) { streamreader ostreamrd = new streamreader (Owebrps.getresponsestream (), encoding.getencoding (" Utf-8 ")); srslt = ostreamrd.readtoend (); ostreamrd.close (); Owebrps.close (); } } return srslt; }
/// <summary> Method Three: /// /// </summary> /// <param name= "url" >/the address of the website you want to visit </param> /// <param name= the encoding of the "charsets" > Target page, if the incoming is null or "", Then automatically analyze the page encoding </param> /// <returns></returns > public static string gethtml (string Url, params string[] charsets) { try { string charset = null; if (charsets.length == 1) { charSet = charSets[0]; } webclient mywebclient = new webclient (); //Create WebClient instance mywebclient // : to be aware of //Some Web pages may not come down, for various reasons such as the need for cookies, coding problems and so on //this is about specific problems, such as adding cookie to the head. // webclient. Headers.add ("Cookie", cookie); //this may require some overloaded methods. If you need to, just write it. //Gets or sets the network credentials that are used to authenticate requests to Internet resources . myWebClient.Credentials = credentialcache.defaultcredentials; //If the server wants to verify the user name, password //networkcredential mycred = new networkcredential (struser, strpassword); //mywebclient.credentials = mycred; //downloads data from a resource and returns a byte array. (plus @ because there's a "/" symbol in the middle of the URL) byte[] mydatabuffer = mywebclient.downloaddata (URL); string strWebData = Encoding.Default.GetString (Mydatabuffer); //Get page character encoding description information match charsetmatch = regex.match (strWebData, "<meta" ([^ <]*) charset= ([^<]*) ", regexoptions.ignorecase | regexoptions.multiline); string webcharset = charsetmatch.groups[2]. value; if (charset == null | | charSet == "") charSet = webCharSet; if (charset != null && charSet != "" && encoding.getencoding (CharSet) != encoding.default ) { Strwebdata = encoding.getencoding (CharSet). GetString (mydatabuffer); } else { strwebdata = encoding.getencoding ("Utf-8"). GetString (Mydatabuffer); } return strwebdata; } catch (exception e) { return ""; } }
This article is from the "Rain Wandering Blog" blog, please be sure to keep this source http://101779.blog.51cto.com/91779/1543677
Three types of ASP. NET crawl Web page source code