Baidu picture Some have coding problem, temporarily can not crawl, more try a few
#idea: Grab image address, dump picture according to address (note name); Difficulty: Transcoding#-*-coding:utf-8-*- fromUrllibImportRequest,errorImportJson,re#For page in range (4):#url = "http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp= Result&queryword=%e5%93%88%e5%a3%ab%e5%a5%87&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid= &st=-1&z=&ic=0&word=%e5%93%88%e5%a3%ab%e5%a5%87&s=&se=&tab=&width=&height= &face=0&istype=2&qc=&nc=1&fr=&pn= "+str (page*30) +" &rn=30&gsm=1e& 1520997016315= "#Try:#response = Request.urlopen (URL). read (). Decode ("Utf-8")#Print (Type (response))##except error. Urlerror as E:#print (E.reason)#classbaiduimg (object):def __init__(self): Super (baiduimg,self).__init__() Print('Start collecting Pictures') Self.page= 30defrequest (self): whileSelf.page <= 30: Request_url='Http://image.baidu.com/search/acjson?tn=resultjson_com&ipn=rj&ct=201326592&is=&fp=result &queryword=%e5%93%88%e5%a3%ab%e5%a5%87&cl=2&lm=-1&ie=utf-8&oe=utf-8&adpicid=&st=-1 &z=&ic=0&word=%e5%93%88%e5%a3%ab%e5%a5%87&s=&se=&tab=&width=&height=&face= 0&istype=2&qc=&nc=1&fr=&pn=30&rn=30&gsm=1e&1520997014923=' #print (len (request_url))headers = { 'user-agent':'mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/64.0.3282.140 safari/537.36' #' content-type ': ' text/html ' #' content-type ': ' text/html '} req=request. Request (request_url,headers=headers) with Request.urlopen (req) as F:ifF.status = = 200: Content= F.read (). Decode ('Utf-8') Content_dict=json.loads (content) self.download2 (content_dict['Data']) Self.page+ = 30#How to download pictures defdowload (self,data): forImageinchData:ifImage.get ('Middleurl'): URL= image['Middleurl'] elifImage.get ('Thumburl'): URL= image['Thumburl'] elifImage.get ('Hoverurl'): URL= image['Hoverurl'] Else: URL="' ifUrl:data=request.urlopen (URL). Read () ImageName= Strip (image['Frompagetitleenc']) FileName= STR ('images/') +imagename+str ('. jpg') with open (FileName,'WB') as F:f.write (data)#save a picture using urllib.request.urlretrive () #save a picture using Urllib.request.urlretrieve () defdownload2 (self, data): forImageinchData:ifImage.get ('Middleurl'): URL= image['Middleurl'] elifImage.get ('Thumburl'): URL= image['Thumburl'] Else: URL="" ifUrl:imagename= Strip (image['Frompagetitleenc']) FilePath= STR ('images/') + ImageName + str ('. jpg') request.urlretrieve (URL, filePath)#Filter Functiondefstrip (Path): Path= Re.sub (r'[?\\*|" <>:/!?]',"', str (path))returnPathif __name__=='__main__': Bi=baiduimg () bi.request ( )
Python_ Crawler _ Baidu Pictures