C # obtain the webpage source code and automatically judge the webpage character set encoding

Source: Internet
Author: User

1 using System. Net;
2 using System. IO;
3 using System. Text. RegularExpressions;
4 private string getHtml (string url, string charSet)
5 // The url is the address of the website to be accessed, and charSet is the encoding of the target webpage. If null or "" is input, the code of the webpage is automatically analyzed.
6 {WebClient myWebClient = new WebClient ();
7 // create a WebClient instance myWebClient
8 // note the following:
9 // some webpages may not be available, for various reasons such as cookie and Encoding Problems
10 // This is a detailed analysis of the problem, such as adding a cookie to the header
11 // webclient. Headers. Add ("Cookie", cookie );
12 // some overload methods may be required. Write as needed
13 // obtain or set network creden。 used to authenticate requests to Internet resources.
14 myWebClient. Credentials = CredentialCache. DefaultCredentials;
15 // if the server needs to verify the user name and password
16 // NetworkCredential mycred = new NetworkCredential (struser, strpassword );
17 // myWebClient. Credentials = mycred;
18 // download data from the resource and return a byte array. (Add @ because there is a "/" symbol in the middle of the URL)
19 byte [] myDataBuffer = myWebClient. DownloadData (url );
20 string strWebData = Encoding. Default. GetString (myDataBuffer );
21 // get the character encoding description of the webpage
22 Match charSetMatch = Regex. Match (strWebData, "<] *) charset = ([^ <] *)" ", RegexOptions. IgnoreCase | RegexOptions. Multiline );
23 string webCharSet = charSetMatch. Groups [2]. Value; if (charSet = null | charSet = "") charSet = webCharSet;
24 if (charSet! = Null & charSet! = "" & Encoding. GetEncoding (charSet )! = Encoding. Default)
25 strWebData = Encoding. GetEncoding (charSet). GetString (myDataBuffer );
26 return strWebData;
27}

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.