Reference Document: Https://www.cnblogs.com/asxinyu/p/CSharp_HtmlAgilityPack_XPath_Weather_Data.html#_label0
Htmlagilitypack is a class library of open source parsing HTML elements, and the biggest feature is the ability to parse through XPath HMTL
as follows: http://htmlagilitypack.codeplex.com/
XPath Tutorial: http://www.w3school.com.cn/xpath/index.asp
XPath Get method:
Official Api:http://html-agility-pack.net/parser
PublicActionResult Index () {//Crawl Weather ForecastHtmlweb Htmlweb =NewHtmlweb (); stringURL ="http://www.tianqihoubao.com/lishi/taizhou/month/201712.html"; Htmlweb.overrideencoding= Encoding.GetEncoding ("gb2312");//Solve garbled problemsHtmlagilitypack.htmldocument document =htmlweb.load (URL); varres = document. Documentnode.selectsinglenode (@"//*[@id = ' content ']/table"); List<WeatherReport> Weatherreports =NewList<weatherreport>(); if(res!=NULL) { varList = Res. SelectNodes (@"TR"); List. RemoveAt (0);//Remove the first row, which is the table header//traverse each row, get dates, and weather conditions foreach(varIteminchlist) { varDD = Item. SelectNodes (@"TD"); //Date--air temperature-wind direction if(DD. Count! =4)Continue; //gets the current line date varDate1 = dd[0]. Innertext.replace ("\ r \ n",""). Replace (" ",""). Trim (); //get current line weather conditions varTQ = dd[1]. Innertext.replace ("\ r \ n",""). Replace (" ",""). Trim (); //get current line temperature varQW = dd[2]. Innertext.replace ("\ r \ n",""). Replace (" ",""). Trim (); //get current line wind direction varFX = dd[3]. Innertext.replace ("\ r \ n",""). Replace (" ",""). Trim (); //OutputConsole.WriteLine ("{0}:{1},{2},{3}", Date1, TQ, QW, FX); Weatherreport Weatherreport=NewWeatherreport {Date=Date1, State=TQ, Temperature=QW, Wind=FX}; Weatherreports.add (Weatherreport); } } returnView (weatherreports); }
View Code
Htmlagilitypack parsing HTML