"Go" using Htmlagilitypack to crawl Web data in bulk
Related software Click to download
The processing of the login. Because some of the web data needs to be logged in order to extract. Here you use Iehttpheaders to extract the commit information at login.
Crawling Web pages
htmlagilitypack.htmldocument Htmldoc;
if (!string. IsNullOrEmpty (login URL))
{
Htmldoc=htmlweb.load (login URL, submit user authentication information, get data page URL);
}
Else
{
Htmldoc=htmlweb.load (Gets the page URL of the data);
}
ArrayList List= NewArrayList ();
List.add ("//table/tr[1]/td");
List.add ("//table/tr[2]/td");
//gets the XPath of the loop node, for example://table/tr
htmlnodecollection repeatnodes=HtmlDoc.DocumentNode.SelectNodes ("//table/tr");
//Loop Node
foreach(Htmlnode nodeinchrepeatnodes)
{
//Loop Fetch Data
foreach (stringDataPathinchlist)
{
Htmlnode DataNode=node. selectSingleNode (list);
if(DataNode!= NULL)
{
stringtext=Datanode.innertext;
}
}
}
If garbled, adjust the encoding set to gb2312 or Utf-8
htmlweb.defaultencoding=System.Text.Encoding.GetEncoding (strencode);
-------------------------------------------------------------------------------------------
using System;
using System.Collections.Generic;
using System.Text;
using Microsoft.VisualStudio.TestTools.WebTesting;
using Htmlagilitypack;
Public class webtest1coded : WebTest
{
Public Override IEnumerator < WebTestRequest > Getrequestenumerator ()
{
webtestrequest new webtestrequest ( " http://www.microsoft.com/" );
request1. Validateresponse + = new eventhandler < validationeventargs > (request1_validateresponse);
yield return Request1;
}
void request1_validateresponse (object sender, ValidationEventArgs e)
{
Load the response body string as an htmlagilitypack.htmldocument
Htmlagilitypack. HTMLDocument doc = New Htmlagilitypack. HTMLDocument ();
Doc. Loadhtml (e.response.bodystring);
Locate the "Nav" element
Htmlnode Navnode = doc. getElementById ("Nav");
Pick the first <li> element
Htmlnode Firstnavitemnode = Navnode.selectsinglenode (".//li");
Validate the first list item in the NAV element says "Windows"
E.isvalid = Firstnavitemnode.innertext = = "Windows" ;
}
}
Crawl Web page data in bulk using Htmlagilitypack