"""Grab a picture of today's headline Street"""ImportOSImport TimeImportRequests fromHashlibImportMD5classSpidertoutiao (object):def __init__(self):#Specify download directorySelf.download_dir = Os.path.join (Os.path.dirname (Os.path.abspath (__file__)),"Download") #by analyzing the request, we found the required address as follows, and the paging is controlled by offset + 20Self.url ="https://www.toutiao.com/search_content/" "? offset={0}&format=json&keyword=%e8%a1%97%e6%8b%8d&autoload=true&count=20&cur_tab=3 &from=gallery" #constructs the request header, masquerading as an Ajax requestSelf.headers = { "user-agent":"mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko)" "chrome/66.0.3359.139 safari/537.36", "Referer":"Https://www.toutiao.com/search/?keyword=%E8%A1%97%E6%8B%8D", "X-requested-with":"XMLHttpRequest" } defHandler (self, offset=0): whileTrue:response= Requests.get (Self.url.format (offset), headers=self.headers)ifResponse.status_code = = 200: Print("INFO--Current URL: <%s>"%response.url) Json_data= Response.json (). Get ("Data") #Start parsing Data ifJson_data: forIteminchjson_data: _title= Item.get ("title") _imgdict= Item.get ("image_list") #fix the URL, the default image address is a small image, we want a large HD image_imglist = [Str ("http:"+ _.get ("URL"). Replace ("List","Large") for_inch_imgdict]#Create a storage directory_downloaddir =Os.path.join (Self.download_dir, _title)if notos.path.exists (_downloaddir): Os.makedirs (_downloaddir)#download and save the file forImginch_imglist:r=requests.get (img) _file= Os.path.join (_downloaddir, MD5 (r.content). Hexdigest () +". jpg") if notos.path.exists (_file): With open (_file,"WB") as F:f.write (r.content)Else: Print("IG <%s> INFO"%_file)#Description No data, program exit Else: Break #paging self-incrementOffset + = 20#time intervalTime.sleep (. 9) Else: Print(Response.reason) exit (999)if __name__=="__main__": Spider=Spidertoutiao () Spider.handler ( )
Python Spider captures the beauty of today's headline street picture