#! /usr/bin/env python
#coding =utf-8
From __future__ import unicode_literals
From Multiprocessing.dummy import Pool as ThreadPool
Import threading
Import OS
Import Sys
Import Cpickle
From collections Import Namedtuple
Import Urllib2
From Urlparse import Urlsplit
Import time
# Global Lock
Lock = Threading. Lock ()
# Default Parameters
Defaults = Dict (thread_count=10,
buffer_size=10*1024,
block_size=1000*1024)
def progress (Percent, WIDTH=50):
Print "%s%d%%\r"% ('%%-%ds '% width)% (width * percent/100 * ' = '), percent),
If percent >= 100:
Print
Sys.stdout.flush ()
def write_data (filepath, data):
With open (filepath, ' WB ') as output:
Cpickle.dump (data, Output)
def read_data (filepath):
With open (filepath, ' RB ') as output:
return Cpickle.load (Output)
FileInfo = namedtuple (' FileInfo ', ' url name size lastmodified ')
def get_file_info (URL):
Class Headrequest (Urllib2. Request):
def get_method (self):
Return ' head '
res = Urllib2.urlopen (headrequest (URL))
Res.read ()
headers = Dict (res.headers)
size = Int (headers.get (' Content-length ', 0))
LastModified = Headers.get (' last-modified ', ')
Name = None
If Headers.has_key (' Content-disposition '):
Name = headers[' content-disposition '].split (' filename= ') [1]
If name[0] = = ' "' or name[0] = =" ":
name = Name[1:-1]
Else
Name = Os.path.basename (urlsplit (URL) [2])
Return FileInfo (URL, name, size, lastmodified)
def download (URL, output,
Thread_count = defaults[' Thread_count '],
Buffer_size = defaults[' buffer_size '],
Block_size = defaults[' block_size ']):
# Get Latest File info
File_info = get_file_info (URL)
# init path
If output is None:
Output = File_info.name
Workpath = '%s.ing '% output
InfoPath = '%s.inf '% output
# split file to blocks. Every block are a array [start, offset, end],
# then each greenlet download Filepart according to a block, and
# Update the block ' offset.
blocks = []
if Os.path.exists (InfoPath):
# Load Blocks
_x, blocks = Read_data (InfoPath)
if (_x.url!= URL or
_x.name!= file_info.name or
_x.lastmodified!= file_info.lastmodified):
blocks = []
If len (blocks) = 0:
# set blocks
if block_size > file_info.size:
blocks = [[0, 0, File_info.size]]
Else:
Block_count, remain = Divmod (File_info.size, block_size)
blocks = [[I*block_size, I*block _size, (i+1) *block_size-1] for I in range (Block_count)]
Blocks[-1][-1] + = remain
# Create new blank Workpath
with open (Workpath, ' WB ') as Fobj:
fobj.write (")
print ' downloading%s '% URL
# Start Monitor
Threading. Thread (Target=_monitor, Args= (InfoPath, File_info, blocks)). Start ()
# Start downloading
With open (Workpath, ' rb+ ') as Fobj:
args = [(URL, blocks[i], fobj, buffer_size) for I in range (len (blocks)) if BLOCKS[I][1] < blocks[i][2]]
If Thread_count > Len (args):
Thread_count = Len (args)
Pool = ThreadPool (Thread_count)
Pool.map (_worker, args)
Pool.close ()
Pool.join ()
# rename Workpath to Output
If os.path.exists (output):
Os.remove (Output)
Os.rename (Workpath, Output)
# Delete InfoPath
If Os.path.exists (InfoPath):
Os.remove (InfoPath)
Assert all ([block[1]>=block[2] to block in blocks]) is True
def _worker ((URL, block, Fobj, buffer_size)):
req = Urllib2. Request (URL)
req.headers[' Range '] = ' bytes=%s-%s '% (block[1], block[2])
res = Urllib2.urlopen (req)
while 1:
chunk = Res.read (buffer_size)
if not chunk:
break
with Lock:
Fobj.seek (block[1])
Fobj.write (chunk)
Block[1] = = Len (chunk)
def _monitor (InfoPath, File_info, blocks):
while 1:
with Lock:
percent = SUM ([block[ 1]-block[0] for blocks in blocks]) * 100/file_info.size
& nbsp; Progress (percent)
if percent >=:
break
Write_data (InfoPath, (File_info, blocks))
time.sleep (2)
if __name__ = = ' __main__ ':
Import Argparse
Parser = Argparse. Argumentparser (description= ' Download file by Multi-threads. ')
Parser.add_argument (' url ', type=str, help= ' URL of the download file ')
Parser.add_argument (' O ', Type=str, Default=none, dest= "Output", help= ' output file ')
Parser.add_argument (' t ', type=int, default=defaults[' Thread_count '), dest= ' Thread_count ', help= ' thread counts to Downloading ')
Parser.add_argument (' B ', type=int, default=defaults[' buffer_size '), dest= "Buffer_size", help= ' buffer size ')
Parser.add_argument (' s ', Type=int, default=defaults[' block_size '), dest= "Block_size", help= ' block size ')
argv = sys.argv[1:]
If Len (argv) = = 0:
argv = [' Https://eyes.nasa.gov/eyesproduct/EYES/os/win ']
args = Parser.parse_args (argv)
Start_time = Time.time ()
Download (Args.url, Args.output, Args.thread_count, Args.buffer_size, Args.block_size)
print ' times:%ds '% int (time.time ()-start_time)