Because usually love photography, so like to see the Shadow Mowgli Forum award-winning photographic works, so wrote a small script to grab the above award-winning pictures, personally test can be used.
#-*-Coding:utf-8-*-
#作者Rocky Chen Import Re, urllib, sys, OS, time, Urllib2, Cookielib, Stringclass download:def __init__ (self, URL): Self.url=url def getphotos (self): #获取的是跳转收的各个页面的图片 such as: HTTP://VISION.XITEK.COM/MONTHLY/YUESAIPINGXUAN/201404/1 4-149893.html user_agent = ' mozilla/4.0 (compatible; MSIE 5.5; Windows NT) ' header = {' User-agent ': user_agent} req=urllib2. Request (Self.url,headers=header); Resp=urllib2.urlopen (req) web_content=resp.read () print web_content All_link = Re.findall (R ' <div c lass= "Mshow" ><a href= "(. +?)" target= "_blank" > ", web_content) print all_link print" All link done " For link in all_link:print "one link" print link req1=urllib2. Request (Link,headers=header) resp1=urllib2.urlopen (req1) web_content_each=resp1.read () PR int Web_content_each My_photos=re.findall (R ' </div> ', Web_content_each) print My_photos for My_photo in My_photos: File_name_obj=re.findall (R ' http://.+?/(\w+.jpg) ', My_photo) file_name=file_name_obj[0] Print file_name Urllib.urlretrieve (My_photo,file_name) Time.sleep (4) def usage (): print "Usage: xitek_month_match.py http://--Help "if __name__ = =" __main__ ": If Len (SYS.ARGV) <1:usage () exit () R Eload (SYS) sys.setdefaultencoding (' Utf-8 ') for ARG in sys.argv[1:]: Print arg xitek=download (ARG) Xitek.getphotos () print "Done"
Automatically grab the winning pictures from January to December
Python crawler captures the winning pictures of the Shadow Mowgli monthly race