Incredibly powerful! Python crawls the template of the Cssmoban site and downloads

Source: Internet
Author: User
Tags save file

Python implementation crawl http://www.cssmoban.com/cssthemes site template and download


Implementation code

#-*-Coding:utf-8-*-import urlparseimport urllib2import reimport os import os.pathurl= ' http://www.cssmoban.com/cssthe Mes ' #全局超时设置 urllib2.socket.setdefaulttimeout #依据url获取内容def geturlcontent (URL): response = Urllib2.urlopen (URL) h    tml = Response.read (); Return html# gets the a tag in the HTML. And the format is <a target= "_blank" href= "/showcase/*" > Def getallurl (HTML): Return Re.findall (' <a[\\s]+href= '/ Cssthemes/\d+\.shtml ">.*?

\/a> ', HTML) #获取下载文件的标题def getdowntitle (HTML): Return Re.findall (' \

\

\/a> ', HTML) #获取下一页的urldef GetNextURL (HTML): Return Re.findall (' <a.*?

Next page </a> ', html ' #下载文件def download (title,url): result = Urllib2.urlopen (URL). Read () If os.path.exists ("template/" ) ==false:os.makedirs ("template/") newname= ("template/" +title.decode (' Utf-8 ')) newname=newname+ '. ' +url[url.rfind ('. ') +1:len (URL)] Open (NewName, "WB"). Write (Result) #记录日志def I (msg): Fileobj=open (' Info.log ', ' a ') fileobj.write (msg+ ' \ n ') Fileobj.close (); Print msg# record error log def e (msg): Fileobj=open (' Error.log ', ' a ') fileobj.write (msg+ ' \ n ') fileobj.close (); Print Msgif __name__ = = ' __main__ ': #print getdownurl (' <a href= ' http://down.cssmoban.com/cssthemes1/cctp_17_ Jeans.zip "target=" _blank "class=" button Btn-down "title=" free download "><i class=" Icon-down icon-white "></i> <i class= "Icon-white icon-down-transiton" ></i> free download </a> ') html= geturlcontent (URL) I (' Start download:%s ') % (URL)) while True:lista= Getallurl (HTML); #print lista; Nextpage=getnexturl (HTML) #print nextpage[0] Nexturl= ' #i (' next page%s '% (nextPage)) If Len (nextPage) <=0:e (' Address:%s. No next page found, program exit '% (nextPage)) break; Nexturl=nextpage[0] nexturl=url+ '/' +nexturl[nexturl.index (' href= ') +6:nexturl.index (' "target ')] #print next Page for a in lista:downgotourl= ' try: #print a.decode (' Utf-8 ') Downgotourl= (url+ ' +a[a.index (' href= ') +6:a.index (' "> ')]) downgotourl=downgotourl.replace (URL, ' http:/ /www.cssmoban.com ') #print downgotourl downhtml=geturlcontent (downgotourl) #p Rint downhtml downtitlelist= getdowntitle (downhtml) downtitle= "If Len (downt itlelist) >0:downtitle=downtitlelist[0] #print downtitle downurllist= Getdownurl (downhtml) downurl= "If Len (downurllist) >0: Downurl=downurllist[0] downurl= downurl[downurl.index (' href= "') +6:downurl.index ('" Target ')] #print downurl I (' Start download:%s, file name:%s '% (downurl,downtitle)) Download (Downtitle,downurl) I ('%s ' download complete. Save file Name:%s '% (downurl,downtitle)) except Exception,e:e (' address:%s failed to download, failure message: '% (downgotourl) ') E (str (e)) I ('-----------------------------------------') I (' Run next page:%s '% (Nexturl)) Html= geturlcontent (Nexturl)



Incredibly powerful! Python crawls the template of the Cssmoban site and downloads

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.