C # How does one capture html webpage data? (Code instance ),
C # How does one capture html webpage data? (Code instance)
// Method 1: using System. text. regularExpressions; public static void webClientMethod1 () {WebClient wc = new WebClient (); wc. encoding = Encoding. UTF8; // return the data string html = wc in the form of a string. downloadString ("https://www.baidu.com/"); // match the data MatchCollection matches = Regex in the form of a regular expression in the string web page. matches (html ,"(. *) "); // obtain the matched data foreach (Match item in matches) {Console. writeLine (item. groups [1]. value);} Console. readKe Y ();} // method 2 public static string SendRequest () {string url = "https://www.baidu.com/"; Uri httpURL = new Uri (url ); /// the HttpWebRequest class inherits from WebRequest and does not have its own constructor. It must be created using the Creat method of WebRequest and be forcibly converted to HttpWebRequest httpReq = (HttpWebRequest) WebRequest. create (httpURL); // httpReq. headers. add ("cityen", "tj"); // create HttpWebResponse using the GetResponse () method of HttpWebRequest, and forcibly convert the type to HttpWebResponse httpResp = (HttpWebRe Response SE) httpReq. getResponse (); // GetResponseStream () method to get the HTTP response data stream and try to get the content of the webpage specified in the URL. // if the content of the webpage is obtained successfully, System is used. IO. returns in Stream format. If it fails, the ProtoclViolationException error is generated. System. IO. stream respStream = httpResp. getResponseStream (); // The returned content is in the Stream format. Therefore, you can use the StreamReader class to obtain the GetResponseStream content System. IO. streamReader respStreamReader = new System. IO. streamReader (respStream, Encoding. UTF8); // read string strBuff = respStreamReader from the current position of the stream to the end. readToEnd (); // simple syntax, same as the preceding result // using (var sr = new System. IO. streamReader (httpReq. getResponse (). getResponseStream () // {// var result = sr. readToEnd (); // Console. writeLine ("--" + DateTime. now. toString () + "--" + result); //} respStreamReader. close (); respStream. close (); return strBuff ;}