#!/USR/BIN/EVN Python3ImportRequestsImportReclassCrawler (object):#initiating a request defrequest (Self,page): Headers= { 'Host':'www.xiaohuar.com', 'Cookies':'__51cke__ =; hm_lvt_0dfa94cc970f5368ddbe743609970944 = 1533890508;bdshare_firstime = 1533890520508; hm_lpvt_0dfa94cc970f5368ddbe743609970944 = 1533891345;__tins__17172513 =% 7B 22sid% 3 A% 201533890507945% 2C% 2 0% 22VD% 3 A% 208% 2C% 22expires% 3 A% 201533893209290% 7 d;__51laig__ = 8', 'user-agent':'mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/67.0.3396.79 safari/537.36'} HTML= Requests.get ('http://www.xiaohuar.com/list-1-%d.html'%page,headers=headers)Print("is crawling .....") Print(Html.url) now= Re.sub (r'src= "','src= "Http://www.xiaohuar.com', Html.text)return Now#regular Match defgetimages (self,html): IMG= Re.compile (r'()'); URL=re.findall (img, HTML)returnUrlcrawler=crawler () HTML= Crawler.request (1)#print (HTML)A =crawler.getimages (HTML) forIinchA:Print(i)View Code
Need PIP install requests
Using Python to crawl the school campus, just learn the crawler can see