標籤:bad log err 進位 mac os x api import 解決問題 nec
1 import urllib2 2 import re 3 import os 4 5 def process_item(self, item, spider): 6 headers = { 7 "Host": ‘img31.mtime.cn‘, 8 "User-Agent": ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:53.0) Gecko/20100101 Firefox/53.0‘, 9 "Accept": ‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8‘,10 "Accept-Language": ‘zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3‘,11 "Accept-Encoding": ‘gzip, deflate‘,12 "Connection": ‘keep-alive‘,13 "Upgrade-Insecure-Requests": "1", 14 }15 16 req = urllib2.Request(url=item[‘addr‘], headers=headers) 17 res = urllib2.urlopen(req)18 19 saveFilePath = os.path.join(os.path.curdir, "down_pic", item[‘name‘].split("_")[0]+"_"+str(item["picCount"]))20 if os.path.exists(saveFilePath):21 pass22 else:23 #os.mkdir(saveFilePath) #只能建單層檔案夾24 os.makedirs(saveFilePath)25 file_name = os.path.join(saveFilePath, item[‘name‘] + ‘.jpg‘)26 with open(file_name, ‘wb‘) as fp:27 fp.write(res.read())28 with open("./savePath.txt", "a") as fh:29 fh.write(file_name+"\n")
用urllib2抓取圖片的時候,部分request報HTTP Error 400: Bad Request
參照:https://stackoverflow.com/questions/8840303/urllib2-http-error-400-bad-request?answertab=active#tab-top
because you aren‘t escaping the string for a URL.
改用requests成功解決問題。
1 import os 2 import requests 3 4 def process_item(self, item, spider): 5 headers = { 6 "Host": ‘img31.mtime.cn‘, 7 "User-Agent": ‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:53.0) Gecko/20100101 Firefox/53.0‘, 8 "Accept": ‘text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8‘, 9 "Accept-Language": ‘zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3‘,10 "Accept-Encoding": ‘gzip, deflate‘,11 "Connection": ‘keep-alive‘,12 "Upgrade-Insecure-Requests": "1", 13 }14 15 res = requests.get(item[‘addr‘], headers=headers)16 #print res17 #print res.url18 #print res.headers19 #print res.content 位元據20 21 saveFilePath = os.path.join(os.path.curdir, "down_pic", item[‘name‘].split("_")[0]+"_"+str(item["picCount"]))22 if os.path.exists(saveFilePath):23 pass24 else:25 #os.mkdir(saveFilePath) #只能建單層檔案夾26 os.makedirs(saveFilePath)27 file_name = os.path.join(saveFilePath, item[‘name‘] + ‘.jpg‘)28 with open(file_name, ‘wb‘) as fp:29 fp.write(res.content)30 with open("./savePath.txt", "a") as fh:31 fh.write(file_name+"\n")
urllib2.HTTPError: HTTP Error 400: Bad Request