Python multi-threaded http download implementation example. for details, refer to use the testing platform Ubuntu 13.04 X86_64 Python 2.7.4.
It took nearly two hours. at the beginning, I did not expect to upload a file object to the thread. as a result, the downloaded file is different from the source file MD5, which wastes a lot of time.
If you are interested, you can add parameters, improve them, or add resumable data transfer.
The code is as follows:
#-*-Coding: UTF-8 -*-
# Author: ToughGuy
# Email: wj0630@gmail.com
# Write this stuff to get a preliminary understanding of python's multithreading mechanism
# The habit of not writing comments at ordinary times. please correct me when I spend some time writing comments in the code, because I may not understand it myself.
# Testing platform Ubuntu 13.04 X86_64 Python 2.7.4
Import threading
Import urllib2
Import sys
Max_thread = 10
# Initialization lock
Lock = threading. RLock ()
Class Downloader (threading. Thread ):
Def _ init _ (self, url, start_size, end_size, fobj, buffer ):
Self. url = url
Self. buffer = buffer
Self. start_size = start_size
Self. end_size = end_size
Self. fobj = fobj
Threading. Thread. _ init _ (self)
Def run (self ):
"""
Vest
"""
With lock:
Print 'starting: % s' % self. getName ()
Self. _ download ()
Def _ download (self ):
"""
I just moved bricks.
"""
Req = urllib2.Request (self. url)
# Add HTTP Header (RANGE) to set the RANGE of downloaded data
Req. headers ['range'] = 'bytes = % s-% s' % (self. start_size, self. end_size)
F = urllib2.urlopen (req)
# Initializing the object offset of the current thread
Offset = self. start_size
While 1:
Block = f. read (self. buffer)
# Exit after the data of the current thread is obtained
If not block:
With lock:
Print '% s done.' % self. getName ()
Break
# Lock the thread when writing data such as data
# Use with lock to replace the traditional lock. acquire () ...... lock. release ()
# Python> = 2.5
With lock:
Sys. stdout. write ('% s saveing block...' % self. getName ())
# Setting the file object offset address
Self. fobj. seek (offset)
# Write the obtained data
Self. fobj. write (block)
Offset = offset + len (block)
Sys. stdout. write ('done. \ n ')
Def main (url, thread = 3, save_file = '', buffer = 1024 ):
# The maximum number of threads cannot exceed max_thread
Thread = thread if thread <= max_thread else max_thread
# Getting the file size
Req = urllib2.urlopen (url)
Size = int (req.info (). getheaders ('content-length') [0])
# Initializing file objects
Fobj = open (save_file, 'WB ')
# Calculate the http Range of each thread based on the number of threads
Avg_size, pad_size = pmod (size, thread)
Plist = []
For I in xrange (thread ):
Start_size = I * avg_size
End_size = start_size + avg_size-1
If I = thread-1:
# Add pad_size to the last thread
End_size = end_size + pad_size + 1
T = Downloader (url, start_size, end_size, fobj, buffer)
Plist. append (t)
# Start to move bricks
For t in plist:
T. start ()
# Wait until all threads end
For t in plist:
T. join ()
# Close the object. remember to close the object.
Fobj. close ()
Print 'download completed! '
If _ name _ = '_ main __':
Url = 'http: // 192.168.1.2: 8082/downloads/10M.zip'
Main (url = url, thread = 10, save_file = 'Test. iso ', buffer = 4096)