Httpwebrequest automatically identifies the webpage code when obtaining the webpage source code. by reading the charset in the page and reading the encoding information in the HTTP header, you can obtain the webpage code correctly.
Static string getencoding (string URL)
{
Httpwebrequest request = NULL;
Httpwebresponse response = NULL;
Streamreader reader = NULL;
Try
{
Request = (httpwebrequest) webrequest. Create (URL );
Request. Timeout = 20000;
Request. allowautoredirect = false;
Response = (httpwebresponse) request. getresponse ();
If (response. statuscode = httpstatuscode. OK & response. contentlength <1024*1024)
{
If (response. contentencoding! = NULL & response. contentencoding. Equals ("gzip", stringcomparison. invariantcultureignorecase ))
Reader = new streamreader (New gzipstream (response. getresponsestream (), compressionmode. Decompress ));
Else
Reader = new streamreader (response. getresponsestream (), encoding. ASCII );
String html = reader. readtoend ();
RegEx reg_charset = new RegEx (@ "charset/B/S * =/S *(? <Charset> [^ ""] *) ");
If (reg_charset.ismatch (HTML ))
{
Return reg_charset.match (HTML). Groups ["charset"]. value;
}
Else if (response. characterset! = String. Empty)
{
Return response. characterset;
}
Else
Return encoding. Default. bodyname;
}
}
Catch
{
}
Finally
{
If (response! = NULL)
{
Response. Close ();
Response = NULL;
}
If (reader! = NULL)
Reader. Close ();
If (request! = NULL)
Request = NULL;
}
Return encoding. Default. bodyname;
}
/// <Summary>
/// Obtain the source code
/// </Summary>
/// <Param name = "url"> </param>
/// <Returns> </returns>
Static string gethtml (string URL, encoding)
{
Httpwebrequest request = NULL;
Httpwebresponse response = NULL;
Streamreader reader = NULL;
Try
{
Request = (httpwebrequest) webrequest. Create (URL );
Request. Timeout = 20000;
Request. allowautoredirect = false;
Response = (httpwebresponse) request. getresponse ();
If (response. statuscode = httpstatuscode. OK & response. contentlength <1024*1024)
{
If (response. contentencoding! = NULL & response. contentencoding. Equals ("gzip", stringcomparison. invariantcultureignorecase ))
Reader = new streamreader (New gzipstream (response. getresponsestream (), compressionmode. Decompress), encoding );
Else
Reader = new streamreader (response. getresponsestream (), encoding );
String html = reader. readtoend ();
Return HTML;
}
}
Catch
{
}
Finally
{
If (response! = NULL)
{
Response. Close ();
Response = NULL;
}
If (reader! = NULL)
Reader. Close ();
If (request! = NULL)
Request = NULL;
}
Return string. empty;
}