Anime category wallpapers multi-threaded download, there are bugs
#-*-coding:utf-8-*-ImportOs,urllib2,re,urllib fromBs4ImportBeautifulSoupImportSocketsocket.setdefaulttimeout (25)#In case of overtime:http://outofmemory.cn/code-snippet/16848/ Python-through-urllib-urlretrieve-file-setting-method#another way:urllib.request.socket.setdefaulttimeout (a)Re_link = Re.compile (r'/anime/. {0,50}-\d{5}') re_404= Re.compile (r'Page Not found-please Try some of the popular items below') Main_url= []#main_url.append ("http://www.superbwallpapers.com/anime/")Pic_page =[]pic_name=[]pic_url=[]pic_url_number=[]end_page= 40 forEach_pageinchRange (end_page): Main_url.append ("http://www.superbwallpapers.com/anime/"+ STR (each_page + 1) +". html") Each_page+ = 1PrintMain_url#how_many = 0defone_page (main_url):#Global How_manymain_page_html =Urllib2.urlopen (Main_url). Read () Soup= BeautifulSoup (main_page_html,fromencoding="GB18030") Match_pic= [] forLinkinchSoup.find_all ('a'): href= Str (Link.get ('href') Match=re_link.match (HREF)ifmatch:#print Match.group ()Match_pic.append (Match.group ())GlobalPic_url_numberGlobalPic_pageGlobalPic_nameGlobalPic_url forIinchRange (len (match_pic)-1)/2): Pic_page.append ('http://www.superbwallpapers.com'+ match_pic[i*2]) pic_name.append (match_pic[i*2][7:]) Pic_url.append ("http://cdn.superbwallpapers.com/wallpapers"+ match_pic[i*2] +"-1920x1080.jpg") pic_url_number.append (x) Output= Open ('K://pic/url.txt','w+') forXinchRange (end_page): One_page (Main_url[x]) title="k://pic/"+str (x)if notOs.path.isdir (title): Os.mkdir (title) Output.write (str (pic_url)) output.close () Pic_number=0url_fail= []ImportThreadinghow_many=0lock=Threading. Lock ()#one_page (Main_url[how_many])classMyThread (Threading. Thread):def __init__(self, Pic_url): Threading. Thread.__init__(self) self.pic_url=Pic_urldefRun (self):GlobalPic_numberGlobalHow_manyPrint '%s acquire lock ...'%Threading.currentthread (). GetName () Lock.acquire ( )Print '%s Get the lock.'%Threading.currentthread (). GetName () O1=Pic_number Pic_number+ = 1#Release Locklock.release ()Print '%s Release Lock ...'%Threading.currentthread (). GetName ()Try: Urllib.urlretrieve (Pic_url[o1],"k://pic/"+ str (PIC_URL_NUMBER[O1]) +"/"+ str (PIC_NAME[O1]) +". jpg") #detail:http://www.nowamagic.net/academy/detail/1302861 except:#except Socket.timeout as E: Try: Urllib.urlretrieve (Pic_url[o1],"k://pic/"+ str (PIC_URL_NUMBER[O1]) +"/"+ str (PIC_NAME[O1]) +". jpg") except: Globalurl_fail url_fail.append (Pic_url[o1]) url_fail.append (Pic_url_number[o1]) Print "-----Socket timout-----, record ..." Print " Picture"+ str (PIC_NAME[O1]) +"downloaded" defstart_new_thread (): Thread=MyThread (Pic_url) Thread.Start () whilePic_number <=Len (pic_url):ifThreading.activecount () < 7: Start_new_thread ()
Python superbwallpapers Anime category download