from Urllib ImportRequest from lxml ImportEtree# URL of the requestwr.= "Http://www.dfenqi.cn/Product/Index"# requested header fileHeaders={"User-agent":"mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/64.0.3282.186 safari/537.36 "}# Create Request ObjectReq=Request.Request (url,headers=Headers# Create Processor ObjectsHttpHandler=Request.HttpHandler ()# Create openerOpener=Request.Build_opener (HttpHandler)# Send RequestResponse=Opener.Open (req)# Read source fileHtml=Response.Read ().Decode' Utf-8 ')# Create an XPath relationshipXpath= "//div[@class = ' Liebiao ']/ul/li/p/text ()"# Get a list of property values# XPath = "//div[@class = ' Liebiao ']/ul/li/p/@class"# Convert HTML to resolvable objectsSelector=Etree.HTML (HTML)# Returns the XPath query listGoodslist=Selector.XPath (XPath)# Show Product title forGoodsinchGoodslist:Print(goods)
6. Get Web page data through XPath