Tag: Write user style pre HTTP request headers file Ror folder
Read the image URL and name from the file and download the file from the URL. Each row in the file contains a URL and a file name, separated by a tab.
1. Use requests request URL and download file
def Download (Img_url, img_name): With closing (Requests.get (Img_url, stream=True)) as R: ' wb') as F: for in R.iter_content ( 1024x768): F.write (data)
2, read the URL from the file, consider the large file, using the way the generator read.
def get_imgurl_generate (): with open ('./example.txt'R ' ) as F: for in f: = line.strip () yield IMGs
3. Use multi-thread to download
Lock = Threading. Lock ()def Loop (IMGs): while True: try: with Lock: = Next (IMGs) except stopiteration: break = Imgurl_generate () for in range (0, thread_num): = Threading. Thread (Target=loop, args=(Img_gen,)) T.start ()
Complete code, adding exception handling
1 #-*-coding:utf-8-*-2 ImportOS3 fromContextlibImportclosing4 ImportThreading5 ImportRequests6 Import Time7 8 9headers = {Ten 'user-agent':'mozilla/5.0 (Windows NT 10.0; Win64; x64) applewebkit/537.36 (khtml, like Gecko) chrome/57.0.2987.133 safari/537.36' One } A - #output Folder -Out_dir ='./output' the #Number of Threads -Thread_num = 20 - #HTTP request Timeout setting -Timeout = 5 + - if notos.path.exists (out_dir): + Os.mkdir (Out_dir) A at - - defDownload (Img_url, img_name): - ifOs.path.isfile (Os.path.join (Out_dir, Img_name)): - return -With closing (Requests.get (Img_url, Stream=true, Headers=headers, timeout=timeout)) As r: inrc =R.status_code - if299 < RCorRC < 200: to Print 'returncode%s\t%s'%(RC, Img_url) + return -content_length = Int (R.headers.get ('Content-length','0')) the ifContent_length = =0: * Print 'size0\t%s'%Img_url $ returnPanax Notoginseng Try: -With open (Os.path.join (Out_dir, Img_name),'WB') as F: the forDatainchR.iter_content (1024): + f.write (data) A except: the Print 'savefail\t%s'%Img_url + - defget_imgurl_generate (): $With open ('./FINAL.SCP','R') as F: $index =0 - forLineinchF: -Index + = 1 the ifIndex% 500 = =0: - Print 'Execute%s line at%s'%(Index, Time.time ())Wuyi if notLine : the Printur'Line %s is empty "\ T"'%Index - Continue Wuline =Line.strip () - Try: AboutIMGs = Line.split ('\ t') $ ifLen (imgs)! = 2: - Printur'Line %s splite error'%Index - Continue - if notImgs[0]or notImgs[1]: A Printur'Line %s img is empty'%Index + Continue the yieldIMGs - except: $ Printur'Line %s can not split by "\ T"'%Index the the theLock =Threading. Lock () the defLoop (IMGs): - Print 'thread%s is running ...'%Threading.current_thread (). Name in the whileTrue: the Try: About With Lock: theImg_url, Img_name =Next (IMGs) the exceptstopiteration: the Break + Try: - Download (img_url, img_name) the except:Bayi Print 'exceptfail\t%s'%Img_url the Print 'thread%s is end ...'%Threading.current_thread (). Name the -Img_gen =get_imgurl_generate () - the forIinchRange (0, thread_num): thet = Threading. Thread (Target=loop, name='Loopthread%s'% i, args=(Img_gen,)) theT.start ()
View Code
Python multi-threaded download file