Online a lot of content collection tools, today I try to write one, found that C # can easily grasp the content of the Web page, and then through the regular to separate their own interesting data. Here is the code to grab the Web content:
usingSystem;usingSystem.Collections.Generic;usingSystem.Linq;usingsystem.web;usingSystem.Net;usingSystem.Text;usingSystem.IO;usingSystem.Text.RegularExpressions;namespaceweb{/// <summary> ///Public Method Classes/// </summary> Public classWebHandler {/// <summary> ///get HTML code for a Web page/// </summary> /// <param name= "url" >Link Address</param> /// <param name= "encoding" >Encoding Type</param> /// <returns></returns> Public Static stringGETHTMLSTR (stringUrlstringencoding) { stringHtmlstr =""; Try { if(!string.isnullorempty (URL)) {WebRequest Request= WebRequest.Create (URL);//instantiating a WebRequest objectWebResponse response = Request. GetResponse ();//Create a WebResponse objectStream datastream = Response. GetResponseStream ();//creating a Stream objectEncoding EC =Encoding.default; if(Encoding = ="UTF8") {EC=Encoding.UTF8; } Else if(Encoding = ="Default") {EC=Encoding.default; } StreamReader Reader=NewStreamReader (datastream, EC); Htmlstr= Reader. ReadToEnd ();//read Web page contentReader. Close (); DataStream. Close (); Response. Close (); } } Catch { } returnHtmlstr; } } }
This method can get the HTML content of the Web page, with HTML we can use the regular to grab the content we want ...