Three types of ASP. NET crawl Web page source code

Source: Internet
Author: User

 <summary> method One: Comparative recommendation         ///  use HttpWebRequest to obtain the Web source         ///  is effective for Web pages with BOMs, no matter what codes are correctly identified          /// </summary>        /// < Param name= "url" > Web address " </param>        ///  <returns> return to Web page source file </returns>        public static  String gethtmlsource2 (String url)         {             //Processing Content              string html =  "";             HttpWebRequest request =  (HttpWebRequest) webrequest.create (URL);      &Nbsp;      request. accept =  "*/*";  //Accept any file              request. useragent =  "mozilla/4.0  (compatible; msie 6.0; windows nt 5.2; . net clr 1.1.4322) "; //  simulation using IE in browsing               request. allowautoredirect = true;//whether to allow 302             //request. Cookiecontainer = new cookiecontainer ();//cookie container,             request. referer = url; //references to the current page              HttpWebResponse response =  (HttpWebResponse) request. GetResponse ();            stream stream  = response. GetrespoNsestream ();             streamreader reader  = new streamreader (Stream, encoding.default);             html = reader. ReadToEnd ();             stream. Close ();            return html;         }
        //Method Two:         public  static string gethttpdata2 (String url)         {             string sException =  null;            string srslt =  null;            webresponse owebrps =  null;            webrequest owebrqst  = webrequest.create (URL);             Owebrqst.timeout = 50000;            try             {           &nbsP;     owebrps = owebrqst.getresponse ();             }             catch  (webexception e)              {                sexception  = e.message.tostring ();             }             catch  (exception e)              {                 sexception = e.tostring ();             }             finally            {                 if  (owebrps != null)                  {                     streamreader  ostreamrd = new streamreader (Owebrps.getresponsestream (),  encoding.getencoding (" Utf-8 "));                     srslt = ostreamrd.readtoend ();                     ostreamrd.close ();                      Owebrps.close ();                 }             }             return srslt;        }
        /// <summary> Method Three:         ///         /// </summary>         /// <param name= "url" >/the address of the website you want to visit </param>         /// <param name= the encoding of the "charsets" > Target page, if the incoming is null or "", Then automatically analyze the page encoding </param>        /// <returns></returns >        public static string gethtml (string  Url, params  string[] charsets)         {             try             {                 string charset = null;                 if  (charsets.length == 1)                  {                     charSet = charSets[0];                 }                 webclient mywebclient = new  webclient ();  //Create WebClient instance mywebclient                 // :           to be aware of       //Some Web pages may not come down, for various reasons such as the need for cookies, coding problems and so on                  //this is about specific problems, such as adding cookie         to the head.         // webclient. Headers.add ("Cookie",  cookie);                 //this may require some overloaded methods. If you need to, just write it.                  //Gets or sets the network credentials that are used to authenticate requests to  Internet  resources .                 myWebClient.Credentials =  credentialcache.defaultcredentials;                 //If the server wants to verify the user name, password                  //networkcredential mycred = new networkcredential (struser,  strpassword);                 //mywebclient.credentials = mycred;                 //downloads data from a resource and returns a byte array. (plus @ because there's a "/" symbol in the middle of the URL)                  byte[] mydatabuffer = mywebclient.downloaddata (URL);                 string strWebData =  Encoding.Default.GetString (Mydatabuffer);                 //Get page character encoding description information                  match charsetmatch = regex.match (strWebData,  "<meta" ([^ <]*) charset= ([^<]*) ",  regexoptions.ignorecase | regexoptions.multiline);                  string webcharset = charsetmatch.groups[2]. value;                if  (charset == null | |  charSet ==  "")                      charSet = webCharSet;                 if  (charset != null &&  charSet !=  ""  && encoding.getencoding (CharSet)  != encoding.default )                 {                      Strwebdata = encoding.getencoding (CharSet). GetString (mydatabuffer);                 }                 else                 {                     strwebdata = encoding.getencoding ("Utf-8"). GetString (Mydatabuffer);                 }                 return strwebdata;            }             catch  (exception e)  { return   "";  }        }

This article is from the "Rain Wandering Blog" blog, please be sure to keep this source http://101779.blog.51cto.com/91779/1543677

Three types of ASP. NET crawl Web page source code

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.