Yunfile file download script in Linux system

Last Update:2017-01-13 Source: Internet

Author: User

Tags flush

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

Script memory and CPU footprint is very low!

Answer the question when my script has been completed 60%, yesterday morning overtime completed, using the Orc parsing verification code, specify only the analysis of numbers!

Before using the need to install a lot of libraries, if you need to orc identification, you need to install the system TESSERACT-OCR, the system is Linux in the words here should be able to find compiled a good package, old high Mac with a word on the brew install Tesseract.

Requirements.txt

ProgressBar = 2.3
Pyquery = = 1.2.9
Requests = = 2.4.3
Pillow = = 2.8.2
Optional:

Pytesseract = 0.1.6
Pip one-click installation dependencies

Pip install requests ProgressBar pyquery Pillow pytesseract
Test Platform

OS X CentOS
Python 2.6 2.7

How to use

Yunfile_downloader-u xxx-p/tmp/download-adb

-U Download Address
-A auto-upload (requires bypy coordination)
-B Background Download
-D Debug
-P Download Path (default current folder)

Background Download View Progress

Tail-f/tmp/yunfile.log
#!/usr/bin/env python
# Encoding:utf-8

"""
@version: 0.3
@author: Endoffiht
@file: yunfile_downloader.py
@time: 15/6/29 18:06
"""

Import requests
Import Httplib

Try
From Cstringio import Stringio
Except
From Stringio import Stringio
From pyquery import Pyquery as PQ
From Urlparse import Urlparse
From PIL import Image
Import re
Import CGI
From ProgressBar Import *
Import time
Import OS
Import Sys
Import getopt

def yun_download (URL, background=false, File_path=none, Debug=false, Auto=false):
# Display Header
If debug:
Patch_send ()
print ' Initiate requests.session '
Init ()
Download_link, Vcode_url = wait_page (URL)
print ' Requseting for Vcode '
Vcode = Get_vcode (Vcode_url, Download_link)

print ' Please wait 30s '

Download_link = download_link[:-5] + '/' + Vcode + '. html '
print ' Download_link with code-->%s '% Download_link

Wait (30)

print ' Begin download process '

If background:
Background_download (Download_link, File_path, Auto)
Else
Download_page (Download_link, File_path, Auto)

# first step, get to the next page link and verify code picture link
def wait_page (File_url):
R = S.get (File_url)
File_url = R.url
S.get (File_url + ' &dr= ')
D = PQ (R.text)
U = Urlparse (File_url)
Download_link = '. Join ((U.scheme, '://', U.netloc, D (' #downpage_link '). attr ("href"))
Vcode_url = '. Join ((U.scheme, '://', U.netloc, '/verifyimg/getpcv.html '))
Return Download_link, Vcode_url

def auto_upload (Dir_name, cmd= '/usr/local/bin/python/usr/bin/bypy.py upload '):
Exec_cmd = ' cd {0} && {1} '. Format (dir_name, cmd)
TMP = Os.popen (exec_cmd). Read ()
Print tmp
Sys.exit (0)

def wait (seconds):
For I in range (0, seconds-1):
If I% 5 = 0 or i > seconds-5:
Print Seconds-i
Time.sleep (1)

def background_download (link, file_path, auto):
Try
If Os.fork () > 0:
Sys.exit (0)
Except OSError, E:
print ' fork #1 failed:%d (%s) '% (E.errno, e.strerror)
Sys.exit (1)

Os.setsid ()
Os.umask (0)

Try
PID = Os.fork ()
If PID > 0:
Sys.exit (0)
Except OSError, E:
print ' fork #2 failed:%d (%s) '% (E.errno, e.strerror)
Sys.exit (1)

Sys.stdout.flush ()
Sys.stderr.flush ()
Out_filename = "/tmp/yunfile.log"

If Os.path.exists (out_filename):
Os.system (' cat/dev/null > ' + out_filename)
Else
Os.system (' touch ' + out_filename)
Si = File (out_filename, ' R ')
so = File (Out_filename, ' A + ')
SE = File (out_filename, ' A + ', 0)
Os.dup2 (Si.fileno (), Sys.stdin.fileno ())
Os.dup2 (So.fileno (), Sys.stdout.fileno ())
Os.dup2 (Se.fileno (), Sys.stderr.fileno ())

Download_page (link, file_path, auto)
Sys.exit (0)

Def patch_send ():
Old_send = Httplib. Httpconnection.send

def new_send (self, header):
print '-----start-----'
Print Header
print '-----End-----'

Return Old_send (self, header) # return isn't necessary, but never hurts, in case the library is changed

Httplib. Httpconnection.send = New_send

DEF init ():

Origin_url = ' http://www.yunfile.com '

Login_url = ' Http://www.yunfile.com/view '

headers = {' user-agent ': ' mozilla/5.0 ' (Macintosh; Intel Mac OS X 10_7_2) '

' Applewebkit/537.36 (khtml, like Gecko) chrome/27.0.1453.93 safari/537.36 ',

' Referer ': ' Http://www.baidu.com/link?url=yRbMCjHoOmVlf-cn9ef '

' Zre0vhjkaymuutkdd2o24lyizp2mrsvv_vfdfs4uiprc7&wd= '

' &eqid=cde1aa8f00001b72000000025587c4ff ',

' Connection ': ' Keep-alive ',

' Cache-control ': ' max-age=0 ',

' Accept ': ' text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8 ',

' accept-encoding ': ' gzip, deflate, SDCH ',

' Accept-language ': ' zh-cn,zh;q=0.8,en;q=0.6 '

}

Global S

s = requests.session ()

S.headers = Headers

S.get (Origin_url)

# Go to download page
def download_page (Download_link, File_path, Auto):
R = S.get (Download_link)
# need to set Referer
s.headers[' Referer '] = Download_link
If not r.history:
# need to access these two URLs and may get a new cookie
URLs = Re.findall (R ' http://www.yunfile.com/ckcounter.jsp[^ ']* ', R.text)

For URL in URLs:
S.get (URL)

data = {}

# It's easy to get a form with Pyquery
D = PQ (R.text)
Action = d (' #d_down_from '). attr (' action ')

For x in D (' #d_down_from input '):
x = PQ (x)
Key = x.attr (' name ')
If key:
Value = x.attr (' value ')
Data[key] = value

# The following two variables are hidden in the JS script.
data[' vid '] = Re.search (R ' var vericode = "(\w+)" ', R.text). Group (1)
data[' Fileid '] = Re.search (r ' Fileid\.value = "(\w+)"; ", R.text). Group (1)

# Post get file, stream needs to be opened
R = S.post (action, data, Stream=true)

# get filename
Try
Value, params = Cgi.parse_header (r.headers[' content-disposition '))
file_name = params[' filename ']

If File_path:
# Judgment Path exists
If not os.path.exists (File_path):
Os.makedirs (File_path)
If file_path[-1]!= '/':
File_path + = '/'

Real_path = File_path + file_name

Else

Real_path = file_name

print ' Start downloading ' + Real_path

# Initialize progress bar

print '

Total = Int (r.headers[' content-length '])

Widgets = [' Downloading ' + file_name, percentage (), ', Bar (Marker=rotatingmarker ()),

", ETA (),", Filetransferspeed ()]

Pbar = ProgressBar (Widgets=widgets, Maxval=total). Start ()

# Write files

With open (Real_path, ' WB ') as FD:

Progress = 0

For chunk in R.iter_content (1024):

progress = Len (chunk)

Fd.write (Chunk)

Pbar.update (Progress)

Pbar.finish ()

print ' Download complete '

If Auto:

print ' Start upload '

Auto_upload (File_path)

Else

Sys.exit (0)

Except Exception, E:
Print S.headers
Else
print ' Error when downloading '
Print S.headers
Sys.exit (2)

# get Authenticode
def get_vcode (Vcode_url, refer):
    # need to set Referer
    s.headers[' Referer ' = refer
    flag = True
    while (flag):
         r = S.get (vcode_url)
        m_image = Image.open (Stringio (r.content))
        image_to_ascii (m_image)
       # If you do not have pytesseract or are not ready to install, you will automatically skip the orc link
       Try:
            import pytesseract
           Guess_code = re.sub (R ' [\d] ', ', pytesseract.image_to_ String (m_image, config= ' digits '))
        except:
             Guess_code = None

# Enter confirm N Refresh verification code
If not Guess_code or Len (guess_code)!= 4:
Ask = ' Please tell me the code------> '
Else
Ask = "Vcode = =%s?" Press ENTER to Confirm,n for refresh or tell me------> "% guess_code

Code = raw_input (ASK)

If not code:
Break
Elif code.lower () = = ' n ':
Continue
Else
Guess_code = code
Flag = False

Return Guess_code

# Picture turn to ASCII, http://a-eter.blogspot.com/2010/04/image-to-ascii-art-in-python.html
def image_to_ascii (image):
Ascii_chars = [' # ', ' A ', ' @ ', '% ', ' S ', ' + ', ' < ', ' * ', ': ', ', ', ', ', ', ', '. ']

    def image_transfer (image):
        image_as_ascii = []
        all_pixels = List (Image.getdata ())
         for Pixel_value in All_pixels:
           index = pixel_value/25 # 0-10
            Image_as_ascii.append (Ascii_chars[index])
        return Image_as_ascii

width, heigth = image.size
New_width = 100
new_heigth = Int ((HEIGTH * new_width)/width)
New_image = Image.resize ((new_width, new_heigth))
New_image = New_image.convert ("L") # Convert to Grayscale
Img_as_ascii = Image_transfer (new_image)
Img_as_ascii = '. Join (CH for ch in img_as_ascii)
For C in range (0, Len (img_as_ascii), new_width):
Print Img_as_ascii[c:c + new_width]

# User Login, pending development
def login ():
Pass

def main (Argv=none):
short_opts = ' U:p:dba '
Try
Optlist, args = Getopt.getopt (sys.argv[1:], short_opts)
Except Getopt. Getopterror, E:
Print_help ()
Sys.exit (2)

Config = Dict ()
config[' Debug ' = False
config[' backgroud ' = False
config[' path ' = None
config[' auto_upload ' = False

For K, v. in Optlist:
if k = = '-U ':
config[' url ' = V
if k = = ' d ':
config[' Debug ' = True
if k = = ' B ':
config[' backgroud '] = True
if k = = '-P ':
config[' path ' = V
if k = = '-A ':
config[' auto_upload '] = True

If ' url ' not in config:
Print_help ()
Sys.exit (2)

Yun_download (config[' url '), config[' Backgroud '], config[' path ', config[' Debug '], config[' auto_upload ']

Def print_help ():
print ' Help '

if __name__ = = ' __main__ ':
Main ()

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More