Python downloading JavaScript special effects in the library, pythonjavascript
This is a simple Python script. It is mainly used to download JavaScript special effect templates from the lazy library. The gevent third-party library is used in the script and needs to be installed first.
#! /Usr/bin/python #-*-coding: UTF-8-*-import urllib, OS, sysimport gevent, refrom gevent import monkeyfrom bs4 import BeautifulSoupgevent. monkey. patch_socket () ''' Description: Python crawlers capture the JS Script Template Author: adminCreate-Date: 2015-05-25Version: 1.0 ''' HTTP_URL = 'HTTP: // www.lanw.uku.com % s 'Download _ URL = HTTP_URL [:-2] + '/js/d % szip 'reg = R' \ d {1 ,}\. + 'def encode (text): return text. encode ("utf8") def createDirectory (curPath): myPath = OS. path. join (getSubDirectory (), u 'js code template') if not OS. path. exists (myPath): OS. mkdir (myPath) return OS. path. join (myPath, curPath) def getSubDirectory (): return OS. getcwd () def schedule (a, B, c): per = 100.0 * a * B/c if per> 100: per = 100 sys. stdout. write ('%. 1f % \ R' % per) sys. stdout. flush () def geturllist (url): url_list ={} html = urllib. urlopen (url) content = html. read () html. close () # Use BeautifulSoup to parse decodeHtml = BeautifulSoup (content) try: aTags = decodeHtml. find_all ('div ', {'class': 'LIST-pngjs'}) [0]. find_all ('A') failed t IndexError, e: print e aTags = None # obtain the link address and title if aTags is not None: for a_tag in aTags: url_list [HTTP_URL % a_tag.get ('href ')] = a_tag.get_text () return url_list def download (down_url): try: m = re. search (reg, down_url [0]) name = DOWNLOAD_URL % m. group (0) urllib. urlretrieve (name, createDirectory (down_url [1] + name [-4:]), schedule) failed t Exception, e: print e. message def getpageurl (xUrl): # perform a list page loop return [xUrl % page for page in xrange ()] if _ name _ = '_ main _': jobs = [] pageurl = getpageurl ('HTTP: // www.lanw.uku.com/js/p;s.html ') # Crawling all links for I in pageurl: for k in geturllist (I ). items (): jobs. append (gevent. spawn (download, k) gevent. joinall (jobs)
The above is all the content of this article. I hope you will like it.