To implement simple multi-threaded download, pay attention to the following points:
1. file size: It can be extracted from the reponse header. For example, "Content-Length: 911" indicates the size is 911 bytes.
2. Job Splitting: Specifies the part of the file downloaded by each thread. You can add "Range: bytes = 300-400" to the request header, which indicates downloading 300 ~ Bytes). Note that the range of the requested file is [0, size-1] bytes.
3. download file aggregation: Each thread saves the downloaded file blocks as temporary files. After all the threads are completed, these temporary files are aggregated and written to the final file in sequence.
Implementation Code:
Copy codeThe Code is as follows:
#! /Usr/bin/python
#-*-Coding: UTF-8 -*-
# Filename: paxel. py
# FROM: http://jb51.net/code/view/58/full/
# Jay modified it a little and save for further potential usage.
'''It is a multi-thread downloading tool
It was developed following axel.
Author: volans
E-mail: volansw [at] gmail.com
'''
Import sys
Import OS
Import time
Import urllib
From threading import Thread
# In case you want to use http_proxy
Local_proxies = {'http': 'http: // 131.139.58.200: 8080 '}
Class AxelPython (Thread, urllib. FancyURLopener ):
'''Multi-thread downloading class.
Run () is a vitural method of Thread.
'''
Def _ init _ (self, threadname, url, filename, ranges = 0, proxies = {}):
Thread. _ init _ (self, name = threadname)
Urllib. FancyURLopener. _ init _ (self, proxies)
Self. name = threadname
Self. url = url
Self. filename = filename
Self. ranges = ranges
Self. downloaded = 0
Def run (self ):
'''Vertual function in thread '''
Try:
Self. downloaded = OS. path. getsize (self. filename)
Failed t OSError:
# Print 'never downloaded'
Self. downloaded = 0
# Rebuild start poind
Self. startpoint = self. ranges [0] + self. downloaded
# This part is completed
If self. startpoint> = self. ranges [1]:
Print 'Part % s has been downloaded over. '% self. filename
Return
Self. oneTimeSize = 16384 #16 kByte/time
Print 'Task % s will download from % d to % d' % (self. name, self. startpoint, self. ranges [1])
Self. addheader ("Range", "bytes = % d-% d" % (self. startpoint, self. ranges [1])
Self. urlhandle = self. open (self. url)
Data = self. urlhandle. read (self. oneTimeSize)
While data:
Filehandle = open (self. filename, 'AB + ')
Filehandle. write (data)
Filehandle. close ()
Self. downloaded + = len (data)
# Print "% s" % (self. name)
# Progress = U' \ r ...'
Data = self. urlhandle. read (self. oneTimeSize)
Def GetUrlFileSize (url, proxies = {}):
UrlHandler = urllib. urlopen (url, proxies = proxies)
Headers = urlHandler.info (). headers
Length = 0
For header in headers:
If header. find ('length ')! =-1:
Length = header. split (':') [-1]. strip ()
Length = int (length)
Return length
Def SpliteBlocks (totalsize, blocknumber ):
Blocksize = totalsize/blocknumber
Ranges = []
For I in range (0, blocknumber-1 ):
Ranges. append (I * blocksize, I * blocksize + blocksize-1 ))
Ranges. append (blocksize * (blocknumber-1), totalsize-1 ))
Return ranges
Def islive (tasks ):
For task in tasks:
If task. isAlive ():
Return True
Return False
Def paxel (url, output, blocks = 6, proxies = local_proxies ):
'''Paxel
'''
Size = GetUrlFileSize (url, proxies)
Ranges = SpliteBlocks (size, blocks)
Threadname = ["thread _ % d" % I for I in range (0, blocks)]
Filename = ["tmpfile _ % d" % I for I in range (0, blocks)]
Tasks = []
For I in range (0, blocks ):
Task = AxelPython (threadname [I], url, filename [I], ranges [I])
Task. setDaemon (True)
Task. start ()
Tasks. append (task)
Time. sleep (2)
While islive (tasks ):
Downloaded = sum ([task. downloaded for task in tasks])
Process = downloaded/float (size) * 100
Show = U' \ rFilesize: % d Downloaded: % d Completed: %. 2f % '% (size, downloaded, process)
Sys. stdout. write (show)
Sys. stdout. flush ()
Time. sleep (0.5)
Filehandle = open (output, 'wb + ')
For I in filename:
F = open (I, 'rb ')
Filehandle. write (f. read ())
F. close ()
Try:
OS. remove (I)
Pass
Except t:
Pass
Filehandle. close ()
If _ name _ = '_ main __':
Url = 'HTTP: // dldir1.qq.com/qqfile/QQforMac/QQ_V3.1.1.dmg'
Output = 'download. file'
Paxel (url, output, blocks = 4, proxies = {})