First use of the crawler Baidu glutinous rice provided by the API to capture the day of Beijing's group purchase information, save as Numi.html
Import Xml.etree.ElementTree as ET
Import OS
Class Nuomi ():
def __init__ (self):
Self.numi=[]
def Parse (Self,filepath):
Tree=et.parse (filepath)
Root =tree.getroot ()
For URL in root.iter (' URL '):
nuomi_lei={}
Data=url.find (' data ')
If data is not None:
Display=data.find (' Display ')
If display is not None:
Try
nuomi_lei[' title ']=display.find (' title '). Text
Except Exception as E:
Print ("No title")
Try
nuomi_lei[' Businesstitle ']=display.find (' Businesstitle '). Text
Except Exception as E:
Print ("No businesstitle")
Try
nuomi_lei[' value '] =display.find (' value '). Text
Except Exception as E:
Print ("No value")
Try
nuomi_lei[' Price ']=float (display.find (' price '). Text)
Except Exception as E:
Print ("No pire")
Self.numi.append (Nuomi_lei)
Return (Self.numi)
if __name__ = = ' __main__ ':
Nuomi=nuomi ()
Date=nuomi. Parse (' numi.html ')
Print (len (date))
Python XML parsing Baidu sticky rice information