Asp.net uses httpwebrequest to automatically obtain the webpage code and the webpage source code

Last Update:2018-12-03 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

/// <Summary> /// obtain the source code /// </Summary> /// <Param name = "url"> </param> /// <returns> </returns> Public static string gethtml (string URL, encoding encoding) {httpwebrequest request = NULL; httpwebresponse response = NULL; streamreader reader = NULL; try {request = (httpwebrequest) webrequest. create (URL); Request. timeout = 20000; request. allowautoredirect = false; response = (httpwebresponse) request. g Etresponse (); If (response. statuscode = httpstatuscode. OK & response. contentlength <1024*1024) {If (response. contentencoding! = NULL & response. contentencoding. equals ("gzip", stringcomparison. invariantcultureignorecase) reader = new streamreader (New gzipstream (response. getresponsestream (), compressionmode. decompress), encoding); else reader = new streamreader (response. getresponsestream (), encoding); string html = reader. readtoend (); Return HTML ;}} catch {} finally {If (response! = NULL) {response. Close (); response = NULL;} If (reader! = NULL) reader. Close (); If (request! = NULL) Request = NULL;} return string. Empty ;}

    public static string GetEncoding(string url)    {        HttpWebRequest request = null;        HttpWebResponse response = null;        StreamReader reader = null;        try        {            request = (HttpWebRequest)WebRequest.Create(url);            request.Timeout = 20000;            request.AllowAutoRedirect = false;            response = (HttpWebResponse)request.GetResponse();            if (response.StatusCode == HttpStatusCode.OK && response.ContentLength < 1024 * 1024)            {                if (response.ContentEncoding != null && response.ContentEncoding.Equals("gzip", StringComparison.InvariantCultureIgnoreCase))                    reader = new StreamReader(new GZipStream(response.GetResponseStream(), CompressionMode.Decompress));                else                    reader = new StreamReader(response.GetResponseStream(), Encoding.ASCII);                string html = reader.ReadToEnd();                Regex reg_charset = new Regex(@"charset\b\s*=\s*(?<charset>[^""]*)");                if (reg_charset.IsMatch(html))                {                    return reg_charset.Match(html).Groups["charset"].Value;                }                else if (response.CharacterSet != string.Empty)                {                    return response.CharacterSet;                }                else                    return Encoding.Default.BodyName;            }        }        catch        {        }        finally        {            if (response != null)            {                response.Close();                response = null;            }            if (reader != null)                reader.Close();            if (request != null)                request = null;        }    }

The following shows how to obtain the webpage title:

Using system; using system. net; using system. text; using system. text. regularexpressions; Class program {// obtain the HTML content of the webpage. Based on the charset of the webpage, the system automatically determines encoding static string gethtml (string URL) {return gethtml (URL, null );} // obtain the HTML content of the webpage. Specify encoding static string gethtml (string URL, encoding) {byte [] Buf = new WebClient (). downloaddata (URL); If (encoding! = NULL) return encoding. getstring (BUF); string html = encoding. utf8.getstring (BUF); encoding = getencoding (HTML); If (encoding = NULL | encoding = encoding. utf8) return HTML; return encoding. getstring (BUF);} // extract the webpage's encoding static encoding getencoding (string html) {string pattern = @"(? I) \ bcharset = (? <Charset> [-a-zA-Z_0-9] +) "; string charset = RegEx. match (HTML, pattern ). groups ["charset"]. value; try {return encoding. getencoding (charset);} catch (argumentexception) {return NULL ;}// extract the title static string gettitle (string html) of the webpage based on the HTML content of the webpage) {string pattern = @"(? Si) <title (?: \ S + (?: "" [^ ""] * "" | '[^'] * '| [^ ""'>]) *)?> (? <Title> .*?) </Title> "; return RegEx. match (HTML, pattern ). groups ["title"]. value. trim ();} // print the encoding and title static void printencodingandtitle (string URL) {string html = gethtml (URL); console. writeline ("[{0}] [{1}]", getencoding (HTML), gettitle (HTML);} // static void main () {printencodingandtitle ("http://www.msdn.net/"); printencodingandtitle ("http://www.cnblogs.com/"); printencodingandtitle ("http://www.cnblogs.com/skyiv/"); printencodingandtitle ("http://www.csdn.net /"); printencodingandtitle ("http://news.163.com/");}/* program output: [] [msdn: Microsoft Developer Network] [system. text. utf8encoding] [blog garden-programmer's online home] [system. text. utf8encoding] [Space/IV-blog park] [system. text. utf8encoding] [csdn. net-the largest IT technology community in China, providing the most comprehensive information dissemination and service platform for IT professionals] [system. text. dbcscodepageencoding] [News center_netease news] */

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Asp.net uses httpwebrequest to automatically obtain the webpage code and the webpage source code

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Asp.net uses httpwebrequest to automatically obtain the webpage code and the webpage source code

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support