Public Static voidGet () {//string Xpathtri = "//*[@id = ' classify-list ']/dl/dd/a/cite/span/i "; #regionGet the category of the homepage////HTML address of the home page //string urlhome = "http://www.qidian.com/"; //htmlweb web = new Htmlweb (); //htmlagilitypack.htmldocument htmldoc = web. Load (urlhome); ////Create an HTML node //Htmlnode rootNode1 = Htmldoc.documentnode; //string Xpathtra = "//*[@id = ' classify-list ']/dl/dd "; //htmlnodecollection classlist = Rootnode1.selectnodes (Xpathtra); //list<string> listinode = new list<string> (); //foreach (Htmlnode item in classlist)//{ // //Get Categories//string inode = Item. selectSingleNode ("//a/cite/span/i "). InnerText; //Listinode.add (inode); //} #endregion #regionCategories and Details//string urldetail = "http://xuanhuan.qidian.com/"; //list<string> urllist = new list<string> (); //Urllist.add ("http://xuanhuan.qidian.com/"); //Urllist.add ("http://qihuan.qidian.com/"); //Urllist.add ("http://wuxia.qidian.com/"); //Urllist.add ("http://xianxia.qidian.com/"); //Urllist.add ("http://dushi.qidian.com/"); //Urllist.add ("http://zhichang.qidian.com/"); //Urllist.add ("http://junshi.qidian.com/"); //Urllist.add ("http://lishi.qidian.com/"); //Urllist.add ("http://youxi.qidian.com/"); //Urllist.add ("http://tiyu.qidian.com/"); //Urllist.add ("http://kehuan.qidian.com/"); //Urllist.add ("http://lingyi.qidian.com/");//foreach (string URL in urllist)//{ //htmlagilitypack.htmldocument htmldetail = web. Load (URL); //Htmlnode rootNode2 = Htmldetail.documentnode; //string a = "//*[@class = ' book-info '] "; //htmlnodecollection classList2 = Rootnode2.selectnodes (a); //list<string> listINode2 = new list<string> (); //foreach (Htmlnode item in CLASSLIST2)// { // //Get Categories//string inode = Item. InnerHtml; //Listinode2.add (inode); // } //} #endregion #regionArticle contentHtmlweb Web=NewHtmlweb (); stringU ="http://read.qidian.com/chapter/zOGI9RYmNdFhO--gcH8iFg2/h3iHSEH1cSpMs5iq0oQwLQ2"; Htmlagilitypack.htmldocument htmldocment=web. Load (U); //Create a node for HTMLHtmlnode Htmlnode =Htmldocment.documentnode; stringx ="//*[@class = ' read-content j_readcontent ']"; Htmlnode HTMLNODEP=Htmlnode.selectsinglenode (x); stringHTMLD =Htmlnodep.innertext; #endregion }
Just take an example.
C # uses XPath to simply crawl the contents of a Web site