Pythno2.7 lxml XPath
1: Download setuptools:https://pypi.python.org/pypi/setuptools, download here Windows (Simplified) below the ez_setup.py
2: Install: Execute python ez_setup.py under cmd, if you have many versions of Python installed, go to the corresponding version of the folder and use the corresponding Python.exe ez_setup.py
3: Download the desired lxml installation package
https://pypi.python.org/simple/lxml/
32-bit, 64-bit:
Lxml-2.3-py2.7-win-amd64.egg
Lxml-2.3-py2.7-win32.egg
4: Installation
Enter C:\Python27\Scripts
Using the command line:
Easy_install D:\\downloads\lxml-2.3-py2.7-win-amd64.egg
Example:
#coding: Utf-8import urllibimport urllib2from lxml import etree as etree if __name__ = "__main__": req_url= ' Www.baidu . com ' headers= {' user-agent ': ' "mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) gecko/20100101 firefox/26.0 "'} req= urllib2. Request (req_url, headers = headers) content= urllib2.urlopen (req, timeout=60). Read () ifisinstance (content, Unicode): pass else: content= content.decode (' GBK ') htmlsource= etree. HTML (content) names= htmlsource.xpath (u '//*[@id = "Buildhistory"]/div[2]/table/*[@class = "Finish_mousenone"]/ TD[2] ') fori in Names: Printi.text
#-*-coding:cp936-*-import urllibimport urllib2from lxml import etree as etree def initsogoubranchinfo (req_url=none): Ifreq_url = = None or Req_url = = ": print" Req_url = = None,return "return headers= {' User-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) gecko/20100101 firefox/26.0 "'} req= urllib2. Request (req_url, headers = headers) content= urllib2.urlopen (req, timeout=60). Read () #printcontent Ifisi Nstance (content, Unicode): Pass else:content= content.decode (' GBK ') htmlsource= ETR Ee. HTML (content) buildresultraw= htmlsource.xpath (ur '//*[@id = "Buildhistory"]/div[2]/table/*[@class = "Finish_ Mousenone "]) #buildTime =htmlsource.xpath (ur '//*[@id =" Buildhistory "]/div[2]/table/*[@class =" Finish_mousenone "]/ TD[2] ') #buildVersion = Htmlsource.xpath (ur '//*[@id = "Buildhistory"]/div[2]/table/*[@class = "Finish_mousenone"]/td[ 3] ') #buildBranch =htmlsource.xpath (ur '//*[@id = "BuiLdhistory "]/div[2]/table/*[@class =" Finish_mousenone "]/td[6] ') #buildAuthor =htmlsource.xpath (ur '//*[@id =" Buildhistory "]/div[2]/table/*[@class =" Finish_mousenone "]/td[7] ') #buildDownloadUrl =htmlsource.xpath (ur '//*[@id = "Buildhistory"]/div[2]/table/*[@class = "Finish_mousenone"]/td[12]/a ') result= {' Buildtime ': [], ' Buildversion ': [], ' buildbranch ': [], ' buildauthor ': [], ' Builddow Nloadurl ': []} sum= 0 fortree in buildresultraw:str= ' Buildtime= Tree.xpat h (Ur ' td[2] ') Iflen (buildtime) >0 and not buildtime[0].text==none:str= str + buildtime[ 0].text+ ' result[' buildtime '].append (buildtime[0].text) Else:resu lt[' Buildtime '].append ("") buildversion= tree.xpath (Ur ' td[3] ') Iflen (buildversion) & Gt;0 and not Buildversion[0].text==none: str= str + buildversion[0].text+ ' result[' buildversion '].append (buildversion[0] . Text) else:result[' buildversion '].append ("") buildbranch= Tre E.xpath (Ur ' td[6] ') Iflen (buildbranch) >0 and not buildbranch[0].text==none:str= str + buildbranch[0].text+ ' result[' Buildbranch '].append (buildbranch[0].text) Else: result[' Buildbranch '].append ("") buildauthor= tree.xpath (Ur ' td[7] ') IFL En (Buildauthor) >0 and not buildauthor[0].text==none:str= str + buildauthor[0].text+ " result[' Buildauthor '].append (buildauthor[0].text) else:result[' Buildauthor '].a Ppend ("") builddownloadurl= tree.xpath (ur ' td[12]/a/@href ') Ifnot builddownloadurl== []:str= str + builddownloadurl[0]+ ' result[' Builddownloadurl '].append (builddownloadurl[0]) else:result[' Builddownloadurl '].append ("") #printstr #sum = s Um + 1 #printsum returnresult if __name__ = = "__main__": Result=initsogoubranchinfo (' Http://build.sogou -inc.com/system_build/common_module/project.php?project=ime&class=ime&branch=branch\pinyindev_r_7_4_ Update_kernel50 ') f= open (R ' D:/t.txt ', ' W ') length= len (result[' buildversion ']) Fori in range (0,len GTH): printresult[' Builddownloadurl '][i]
Reference:
Http://www.cnblogs.com/zhuyp1015/archive/2012/07/17/2596495.html
Http://blog.sina.com.cn/s/blog_641289eb0100yf84.html
http://blog.csdn.net/zhaokuo719/article/details/8209496
http://blog.csdn.net/shirdrn/article/details/7030026
http://bbs.csdn.net/topics/390823000
Http://www.cnblogs.com/bluescorpio/archive/2010/05/31/1748503.html
Pythno2.7 lxml XPath