This is a test.html file content
<!--hello.html-->
<div>
<ul>
<li class= "item-0" text= "1" ><a href= " Link1.html ">first item</a></li> <li class=" item-1 "text="
2 "><a href=" link2.html "> Second item</a></li>
<li class= "item-inactive" text= "3" ><a href= "link3.html" ><span Class= "Bold" >third item</span></a></li>
<li class= "item-1" text= "2" ><a href= " Link4.html ">fourth item</a></li> <li class=" item-0 "text="
1 "><a href=" link5.html "> Fifth item</a></li>
</ul>
</div>
Here's how XPath is used
#coding: Utf-8 import lxml import lxml.etree html=lxml.etree.parse ("test.html") print type (HTML) res=html.xpath ("//li") Print res print len (res) #列表长度 Print type (res) #元素列表 print type (res[0]) #树的元素 res1=html.xpath ("//li/@class") #同级目录 print R Es1 Res2=html.xpath ("//li/@text") Print Res2 Res3=html.xpath ("//li/a") #取出下一级 print Res3 res4=html.xpath ("//li/a/@href ") #取出下一级的某个通有元素 print res4 res5=html.xpath ("//li/a[@href =\ "link3.html\"] ") #取出下一级的某个通有元素 print res5 Res6=html.xpath (" Li//span ") #取出下一级的某个通有元素 print res6 res6=html.xpath ("//li//span/@class ") #取出下一级的某个通有元素de class Property print Res6 res7=html . XPath ("//li/a//@class") #取出下一级的某个通有元素de the Class property print Res7 #res8 =html.xpath ("//li[1]") #第一个 Res8=html.xpath ("//li[ Last ()] "") #最后一个 print Res8 Res9=html.xpath ("//li[last ()]/a/@href") #最后一个li下面的a中的href print Res9 res9=html.xpath ("//li[ Last () -1]/a/@href ") #最后一个li下面的a中的href print Res9 res10=html.xpath ("//*[@class =\ "bold\"] ") #最后一个li下面的a中的href print Res10 Res11=html.xpath ("//*[@text =\" 3\ "]") #最The href print res11 res11=html.xpath ("//*[@text =\" 3\ "]/@class") #最后一个li下面的a中的href print res under the next Li