To achieve a simple multithreaded download, you need to focus on the following points:
1. File size: Can be extracted from the reponse header, such as "content-length:911" indicates that the size is 911 bytes
2. Task split: Specifies which pieces of files are downloaded by each thread, and you can add "range:bytes=300-400" to the request header (which means the content of the download 300~400byte), and note that the Range of files that can be requested is [0, size-1 Bytes.
3. Aggregation of downloaded files: Each thread saves its own downloaded block of files as a temporary file, all threads are completed, and then the temporary files are aggregated into the final file in order.
Implementation code:
Copy Code code as follows:
#!/usr/bin/python
#-*-Coding:utf-8-*-
# filename:paxel.py
# from:http://jb51.net/code/view/58/full/
# Jay modified it a little and save for further potential usage.
' It is a multi-thread downloading tool
It was developed following Axel.
Author:volans
E-MAIL:VOLANSW [at] gmail.com
'''
Import Sys
Import OS
Import time
Import Urllib
From threading Import Thread
# in the case for your want to use Http_proxy
local_proxies = {' http ': ' http://131.139.58.200:8080 '}
Class Axelpython (Thread, Urllib. Fancyurlopener):
"" Multi-thread downloading class.
Run () is a vitural method of Thread.
'''
def __init__ (self, threadname, url, filename, ranges=0, proxies={}):
Thread.__init__ (self, name=threadname)
Urllib. Fancyurlopener.__init__ (self, proxies)
Self.name = ThreadName
Self.url = URL
Self.filename = filename
Self.ranges = Ranges
self.downloaded = 0
def run (self):
"' vertual function in Thread '"
Try
self.downloaded = Os.path.getsize (self.filename)
Except OSError:
#print ' never downloaded '
self.downloaded = 0
# Rebuild Start Poind
Self.startpoint = self.ranges[0] + self.downloaded
# This is completed
If Self.startpoint >= self.ranges[1]:
Print ' Part%s has been downloaded over. '% Self.filename
Return
Self.onetimesize = 16384 # 16kbyte/time
print ' task%s ' download from%d to%d '% (Self.name, Self.startpoint, self.ranges[1])
Self.addheader ("Range", "bytes=%d-%d"% (Self.startpoint, self.ranges[1))
Self.urlhandle = Self.open (Self.url)
data = Self.urlhandle.read (self.onetimesize)
While data:
FileHandle = open (Self.filename, ' ab+ ')
Filehandle.write (data)
Filehandle.close ()
self.downloaded = Len (data)
#print '%s '% (self.name)
#progress = U ' \ r ... '
data = Self.urlhandle.read (self.onetimesize)
def geturlfilesize (URL, proxies={}):
Urlhandler = Urllib.urlopen (URL, proxies=proxies)
headers = Urlhandler.info (). Headers
Length = 0
For header in headers:
If Header.find (' Length ')!=-1:
Length = Header.split (': ') [ -1].strip ()
length = Int (length)
return length
def spliteblocks (TotalSize, Blocknumber):
BlockSize = Totalsize/blocknumber
ranges = []
For I in range (0, blocknumber-1):
Ranges.append ((i * blocksize, I * blocksize + blocksize-1))
Ranges.append ((BlockSize * (blocknumber-1), totalsize-1))
return ranges
def islive (Tasks):
For task in tasks:
If Task.isalive ():
Return True
Return False
def paxel (URL, output, blocks=6, proxies=local_proxies):
"' Paxel
'''
size = geturlfilesize (URL, proxies)
Ranges = spliteblocks (size, blocks)
ThreadName = ["thread_%d"% i for I in range (0, blocks)]
filename = ["tmpfile_%d"% i for I in range (0, blocks)]
tasks = []
For I in range (0, blocks):
Task = Axelpython (threadname[i], URL, filename[i], ranges[i])
Task.setdaemon (True)
Task.start ()
Tasks.append (Task)
Time.sleep (2)
While islive (tasks):
downloaded = SUM ([task.downloaded for task in tasks])
Process = downloaded/float (size) * 100
Show = U ' \rfilesize:%d downloaded:%d completed:%.2f%% '% (size, downloaded, process)
Sys.stdout.write (show)
Sys.stdout.flush ()
Time.sleep (0.5)
FileHandle = open (output, ' wb+ ')
For i in FileName:
f = open (I, ' RB ')
Filehandle.write (F.read ())
F.close ()
Try
Os.remove (i)
Pass
Except
Pass
Filehandle.close ()
if __name__ = = ' __main__ ':
url = ' HTTP://DLDIR1.QQ.COM/QQFILE/QQFORMAC/QQ_V3.1.1.DMG '
Output = ' download.file '
Paxel (URL, output, blocks=4, proxies={})