#coding =utf-8import requestsfrom bs4 import beautifulsoupimport osall_url = ' http://www.mzitu.com ' #http请求头Hostreferer = {' user-agent ': ' Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) ', ' Referer ': ' http://www.mzitu.com '}picreferer = {' user-agent ': ' Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1) ', ' Referer ': ' http://i.meizitu.net '} #此请求头破解盗链start_html = Requests.get (all_url,headers = hostreferer) # Save Address Path = ' G:\python\ sister \liang ' #找寻最大页数soup = BeautifulSoup (Start_html.text, "html.parser") page = Soup.find_all (' A ', class_= ' page-numbers ') max_page = Page[-2].textsame_url = ' http://www.mzitu.com/page/' for N in range (1,int (max_page) +1 ): ul = Same_url+str (n) start_html = Requests.get (ul, headers = hostreferer) soup = BeautifulSoup (Start_html.text, "HTML.PA Rser ") all_a = Soup.find (' div ', class_= ' postlist '). Find_all (' A ', target= ' _blank ') for a in all_a:title = A.get_text () #提取文 this if (title! =): Print ("Ready to pick:" +title) #win不能创建带? Directory if (os.path.exists () (Path+title.strip (). Replace ('? ', ')): #print (' Directory savedIn ') Flag=1 Else:os.makedirs (Path+title.strip (). Replace ('? ', ')) flag=0 os.chdir (path + title.strip (). Replace ('? ', ') ) href = a[' href '] html = requests.get (href,headers = hostreferer) mess = BeautifulSoup (Html.text, "Html.parser") Pic_max = Mess.find_all (' span ') Pic_max = Pic_max[10].text #最大页数 if (flag = = 1 and len (Os.listdir (Path+title.strip (). Replace ('? ', ' ')) >= int (pic_max)): Print (' already saved, skipped ') continue for Num in range (1,int (Pic_max) +1): pic = href+ '/' +STR (num) HTML = r Equests.get (pic,headers = hostreferer) mess = BeautifulSoup (Html.text, "Html.parser") Pic_url = Mess.find (' img ', alt = title) print (pic_url[' src ')) #exit (0) HTML = requests.get (pic_url[' src '],headers = picreferer) file_name = pic_url[' src ' ].split (R '/') [-1] f = open (file_name, ' WB ') F.write (html.content) f.close () print (' Finish ') print (' P ', N, ' page complete ')
Python crawler--crawl pictures of sister