1 //Filter HTML Tags2 Static voidInnerText ()3 {4Htmlweb Htmlweb =NewHtmlweb ();5HTMLDocument doc = Htmlweb.load ("http://www.cnblogs.com/","GET");6Htmlnode RootNode =Doc. Documentnode;7 Console.WriteLine (rootnode.innerhtml);8 //Console.WriteLine (rootnode.innertext);9 }Ten One //Selector Selector A Static voidgetblogs () - { - stringURL ="http://www.cnblogs.com/"; theHtmlweb Htmlweb =NewHtmlweb (); -HTMLDocument doc = htmlweb.load (URL,"GET"); - //Doc. getElementById ("AA"); -Htmlnode RootNode =Doc. Documentnode; +Htmlnodecollection h3nodes = Rootnode.selectnodes ("//div[@class = ' post_item_body ']/h3"); - foreach(varH3nodeinchh3nodes) + { AHtmlnode anode = H3node.selectsinglenode ("a");//filter a label node atHtmlnode pnode = h3Node.NextSibling.NextSibling;//Next Node - stringBloglink = Anode.getattributevalue ("href","");//Get element Properties - stringtitle =Anode.innertext; - stringContent =Pnode.innertext; - Console.WriteLine (title); - Console.WriteLine (bloglink); in Console.WriteLine (content); -Console.WriteLine ("------------------------------------------------------"); to } + return; - } the * //an XPath expression $ Static voidxpathtest ()Panax Notoginseng { - stringPath =@"test.html"; theHTMLDocument doc =NewHTMLDocument (); +Htmlnode RootNode =Doc. Documentnode; A Doc. Load (path); the //Get H1 tags + varH1 = Rootnode.selectsinglenode ("/html/body/div[1]/h1[1]"); - Console.WriteLine (H1. InnerText); $ //Get Ul>li name tags $ varLiname = Rootnode.selectsinglenode ("/html/body/div[2]/ul[1]/li[1]"); - Console.WriteLine (liname.innertext); - //get Ul>li Age tag the varLiage = Rootnode.selectsinglenode ("/html/body/div[2]/ul[1]/li[2]"); - Console.WriteLine (liage.innertext);Wuyi}
The test.html code is as follows:
1 <HTML>2 <Head>3 </Head>4 <Body>5 <Div> 6 <H1>Welcome to visit this webpage!</H1>7 </Div>8 9 <Div>Ten <ulclass= "User_match Clear"> One <Li>Name: Zhang San</Li> A <Li>Age: 18</Li> - </ul> - </Div> the </Body> - </HTML>
Htmlagilitypach Basic Use Method