1 ImportUrllib22 ImportRe3 ImportOS4 5 defProcess_item (self, item, spider):6headers = {7 "Host":'img31.mtime.cn',8 "user-agent":'mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:53.0) gecko/20100101 firefox/53.0',9 "Accept":'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',Ten "Accept-language":'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3', One "accept-encoding":'gzip, deflate', A "Connection":'keep-alive', - "upgrade-insecure-requests":"1", - } the -req = Urllib2. Request (url=item['Addr'], headers=headers) -res =Urllib2.urlopen (req) - +Savefilepath = Os.path.join (Os.path.curdir,"Down_pic", item['name'].split ("_") [0]+"_"+str (item["Piccount"])) - ifos.path.exists (savefilepath): + Pass A Else: at #Os.mkdir (savefilepath) #只能建单层文件夹 - os.makedirs (Savefilepath) -file_name = Os.path.join (Savefilepath, item['name'] +'. jpg') -With open (file_name,'WB') as FP: - Fp.write (Res.read ()) -With open ("./savepath.txt","a") as FH: inFh.write (file_name+"\ n")
When grabbing a picture with urllib2, part of the request reports HTTP Error 400:bad request
Reference: Https://stackoverflow.com/questions/8840303/urllib2-http-error-400-bad-request?answertab=active#tab-top
Because aren ' t escaping the string for a URL.
Use requests to successfully resolve the issue.
1 ImportOS2 ImportRequests3 4 defProcess_item (self, item, spider):5headers = {6 "Host":'img31.mtime.cn',7 "user-agent":'mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:53.0) gecko/20100101 firefox/53.0',8 "Accept":'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',9 "Accept-language":'zh-cn,zh;q=0.8,en-us;q=0.5,en;q=0.3',Ten "accept-encoding":'gzip, deflate', One "Connection":'keep-alive', A "upgrade-insecure-requests":"1", - } - theres = Requests.get (item['Addr'], headers=headers) - #Print Res - #Print Res.url - #Print Res.headers + #print res.content binary data - +Savefilepath = Os.path.join (Os.path.curdir,"Down_pic", item['name'].split ("_") [0]+"_"+str (item["Piccount"])) A ifos.path.exists (savefilepath): at Pass - Else: - #Os.mkdir (savefilepath) #只能建单层文件夹 - os.makedirs (Savefilepath) -file_name = Os.path.join (Savefilepath, item['name'] +'. jpg') -With open (file_name,'WB') as FP: in fp.write (res.content) -With open ("./savepath.txt","a") as FH: toFh.write (file_name+"\ n")
Urllib2. Httperror:http Error 400:bad Request