C # three methods for obtaining webpage content,
C # There are three methods to obtain webpage content: WebClient, WebBrowser, or HttpWebRequest/HttpWebResponse...
Method 1: Use WebClient (reference from: http://fbljava.blog.163.com/blog/static/265211742008712105145244)
Static void Main (string [] args)
{
Try {
WebClient MyWebClient = new WebClient ();
MyWebClient. Credentials = CredentialCache. DefaultCredentials; // gets or sets the network Credentials used to authenticate requests to Internet resources.
Byte [] pageData = MyWebClient. DownloadData ("http://www.163.com"); // download data from a specified website
String pageHtml = Encoding. Default. GetString (pageData); // if you use GB2312 to retrieve the website page, use this sentence.
// String pageHtml = Encoding. UTF8.GetString (pageData); // if you get a website page using a UTF-8, use this sentence
Console. WriteLine (pageHtml); // enter the obtained content on the Console
Using (StreamWriter sw = new StreamWriter ("c: \ test \ ouput.html") // write the obtained content to the text
{
Sw. Write (pageHtml );
}
Console. ReadLine (); // pause the Console; otherwise, the Console will be suspended.
}
Catch (WebException webEx ){
Console. WriteLine (webEx. Message. ToString ());
}
}
Method 2: Use WebBrowser (reference from: http://topic.csdn.net/u/20091225/14/4ea221cd-4c1e-4931-a6db-1fd4ee7398ef.html)
WebBrowser web = new WebBrowser();
web.Navigate("http://www.xjflcp.com/ssc/");
web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);
void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)
{
WebBrowser web = (WebBrowser)sender;
HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table");
foreach (HtmlElement item in ElementCollection)
{
File.AppendAllText("Kaijiang_xj.txt", item.InnerText);
}
}
Method 3: Use HttpWebRequest/HttpWebResponse (reference from: http://hi.baidu.com/onlyafar/blog/item/7ac4c6bf92d4810019d81f98.html)
HttpWebRequest httpReq;
HttpWebResponse httpResp;
String strBuff = "";
Char [] cbuffer = new char [256];
Int byteRead = 0;
String filename = @ "c: \ log.txt ";
/// Define the write stream operation
Public void WriteStream ()
{
Uri httpURL = new Uri (txtURL. Text );
/// The HttpWebRequest class inherits from WebRequest and does not have its own constructor. You must use the Creat method of WebRequest and perform forced type conversion.
HttpReq = (HttpWebRequest) WebRequest. Create (httpURL );
/// Use the GetResponse () method of HttpWebRequest to establish HttpWebResponse and force type conversion
HttpResp = (HttpWebResponse) httpReq. GetResponse ();
/// GetResponseStream () method to get the HTTP response data stream and try to get the webpage content specified in the URL
/// If the webpage content is obtained successfully, the System. IO. Stream format is returned. If the webpage content fails, the ProtoclViolationException error is returned. In this case, the following code should be put into a try block for processing. Simple processing
Stream respStream = httpResp. GetResponseStream ();
/// The returned content is in the Stream format. Therefore, you can use the StreamReader class to obtain the content of GetResponseStream and
The Read method of the StreamReader class reads the content of each line of the source code of the web page in sequence until the end of the line (Read encoding format: UTF8)
StreamReader respStreamReader = new StreamReader (respStream, Encoding. UTF8 );
ByteRead = respStreamReader. Read (cbuffer, 0,256 );
While (byteRead! = 0)
{
String strResp = new string (cbuffer, 0, byteRead );
StrBuff = strBuff + strResp;
ByteRead = respStreamReader. Read (cbuffer, 0,256 );
}
RespStream. Close ();
TxtHTML. Text = strBuff;
}