Csharp: using HtmlAgilityPack and ScrapySharp reading Url find text,

Source: Internet
Author: User

Csharp: using HtmlAgilityPack and ScrapySharp reading Url find text,

Https://github.com/exaphaser/ScrapySharp

Https://github.com/zzzprojects/html-agility-pack

Https://github.com/atifaziz/Fizzler

Https://archive.codeplex.com /? P = fizzlerex

Https://github.com/aspnet/blazor

Https://github.com/SteveSanderson/Blazor

Mathematical formula of https://www.mathjax.org/#samples

 

Https://github.com/robinvanderknaap/MvcJqGrid

Http://www.defenseinnovationmarketplace.mil/strategy.html

Using System; using System. collections. generic; using System. componentModel; using System. data; using System. drawing; using System. linq; using System. text; using System. windows. forms; using System. IO; using System. net; using System. collections; using ScrapySharp. network; using ScrapySharp. core; using HtmlAgilityPack; namespace HtmlAgilityPackDemo {/// <summary> // HtmlAgilityPack /// geovindu // tu juwen // 20180305 /// </summary> public partial class Form1: form {public Form1 () {InitializeComponent ();} /// <summary> //// </summary> /// <param name = "sender"> </param> /// <param name = "e "> </param> private void Form1_Load (object sender, eventArgs e) {this. textBox1.Text = "ln"; // List <CityList> lis = new List <CityList> ();} /// <summary> ///// </summary> /// <param name = "url"> </param> /// <returns> </returns> public static string GetWebClient (string url) {string strHTML = ""; WebClient myWebClient = new WebClient (); Stream myStream = myWebClient. openRead (url); StreamReader sr = new StreamReader (myStream, Encoding. default); // encode strHTML = sr. readToEnd (); myStream. close (); return strHTML ;} /// <summary> /// nl /// </summary> /// <param name = "cityCode"> </param> public string ParsePageByArea (String cityCode, out List <CityList> listcity) {StringBuilder stp = new StringBuilder (); CityList city = null; List <CityList> clits = new List <CityList> (); // URL String url = String in a more URL format and province code structure. format (" http://www.tianqihoubao.com/lishi/ {02.16.htm ", cityCode); // download the webpage source code var docText = GetWebClient (url); // load the source code and obtain the Document Object var doc = new HtmlAgilityPack. htmlDocument (); doc. loadHtml (docText); // obtain the total object through xpath. If it is not empty, select the dl label var res = doc. documentNode. selectSingleNode (@ "/html [1]/body [1]/div [1]/div [6]/div [1]/div [1]/div [3]" ); if (res! = Null) {var list = res. selectNodes (@ "dl"); // select the tag array if (list. count <1) {listcity = clits; return "";} foreach (var item in list) {var dd = item. selectSingleNode (@ "dd "). selectNodes ("a"); foreach (var node in dd) {city = new CityList (); var text = node. innerText. trim (); // the pinyin code must be separated from the href attribute to extract var herf = node. attributes ["href"]. value. trim (). split ('/','. '); string str = string. format ("{0 }:{ 1}", text, herf [herf. length-2]); city. cityName = text; city. cityCode = herf [herf. length-2]; stp. append ("\ r \ n" + str); clits. add (city) ;}} listcity = clits; return stp. toString ();} // <summary> /// http://www.tianqihoubao.com/lishi/ Dalian/month/201802.html /// </summary> /// <param name = "cityCode"> </param> /// <param name = "year"> </ param> // <param name = "month"> </param> public string ParsePageByCityMonth (String cityCode, int32 year, Int32 month, out List <WeatherList> wea) {StringBuilder stp = new StringBuilder (); List <WeatherList> wlist = new List <WeatherList> (); weatherList wt = null; // more pinyin code, month Information Construction URL String url = String. format (" http://www.tianqihoubao.com/lishi/ {0}/month/{1} {2: d22.16.html ", cityCode, year, month); // obtain the source code var docText = GetWebClient (url) of the link ); // load the source code and obtain the Page Structure object var doc = new HtmlAgilityPack. htmlDocument (); doc. loadHtml (docText); // obtain the table object var res = doc using Xpath. documentNode. selectSingleNode (@ "/html [1]/body [1]/div [2]/div [6]/div [1]/div [1]/table [1]" ); if (res! = Null) {// obtain all rows var list = res. selectNodes (@ "tr"); list. removeAt (0); // remove the first line, which is the header // traverse each line, get the date, weather conditions, and other information foreach (var item in list) {wt = new WeatherList (); var dd = item. selectNodes (@ "td"); // Date--temperature-wind direction if (dd. count! = 4) continue; // obtain the current row date var date1 = dd [0]. innerText. replace ("\ r \ n ",""). replace ("",""). trim (); // obtain the current weather condition var tq = dd [1]. innerText. replace ("\ r \ n ",""). replace ("",""). trim (); // obtain the current row temperature var qw = dd [2]. innerText. replace ("\ r \ n ",""). replace ("",""). trim (); // obtain the current wind direction var fx = dd [3]. innerText. replace ("\ r \ n ",""). replace ("",""). trim (); // output string str = string. format ("{0 }:{ 1}, {2}, {3}", date1, tq, qw, fx); stp. append (str); wt. climate = tq; wt. date = DateTime. parse (date1); wt. temperature = qw; wt. windDirection = fx; wlist. add (wt) ;}} wea = wlist; return stp. toString ();} // <summary> /// http://www.dusystem.com/geovindu.html /// ScrapingBrowser /// obtain the file title /// </summary> /// <param name = "url"> </param> /// <returns> </ returns> public string getHtmlTitle (string url) {StringBuilder titl = new StringBuilder (); var uri = new Uri (url); var browser1 = new ScrapingBrowser (); var html1 = browser1.DownloadString (uri); var doc = new HtmlAgilityPack. htmlDocument (); doc. loadHtml (html1); var html = doc. documentNode; var title = html. selectNodes ("title"); foreach (var htmlNode in title) {titl. append (htmlNode. innerText);} // CssSelect CssSelectAncestors var ps = html. selectNodes ("p "). elements ("div # endText"); foreach (var htmlNode in ps) {titl. append (htmlNode. innerHtml);} return titl. toString ();} /// <summary> //// </summary> /// <param name = "sender"> </param> /// <param name = "e "> </param> private void button#click (object sender, eventArgs e) {List <CityList> list = new List <CityList> (); this. richTextBox1.Text = ParsePageByArea (this. textBox1.Text. trim (), out list); this. comboBox1.DataSource = list; this. comboBox1.DisplayMember = "CityName"; this. comboBox1.ValueMember = "CityCode ";} /// <summary> //// </summary> /// <param name = "sender"> </param> /// <param name = "e "> </param> private void button2_Click (object sender, eventArgs e) {List <WeatherList> list = new List <WeatherList> (); int year = DateTime. now. year; int mont = DateTime. now. month-1; this. richTextBox2.Text = ParsePageByCityMonth (this. comboBox1.SelectedValue. toString (), year, mont, out list); this. dataGridView1.DataSource = list ;}} /// <summary> ///// </summary> public class CityList {/// <summary> /// </summary> public string CityName {get; set ;}//< summary >///// </summary> public string CityCode {get; set ;}/// <summary >/// Climate, temperature, wind direction // </summary> public class WeatherList {// <summary> // Climate // </summary> public string Climate {get; set ;}//< summary> /// Temperature /// </summary> public string Temperature {get; set ;} /// <summary> // wind direction // </summary> public string WindDirection {get; set ;} /// <summary >///// </summary> public DateTime Date {get; set ;}}}

  

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.