Python下載百度新歌100的代碼
最後更新:2018-12-07
來源:互聯網
上載者:User
#!/usr/bin/python
# -*- coding: utf-8 -*-
# Copyright (c) 2006 UbuntuChina <http://www.ubuntu.org.cn>
# License: GPLv2
# Author: oneleaf <oneleaf AT gmail.com>
import httplib
import re
import urllib
import os
import locale
def getdownurl(url):
urllist=[]
conn = httplib.HTTPConnection('mp3.baidu.com')
conn.request("GET",url)
response = conn.getresponse()
html=response.read()
conn.close()
expression='http://220.181.27.54/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
mp3url=re.search('title=(.*)onclick',listSentence[lineno])
if mp3url:
mp3url=mp3url.group(0)
mp3url=re.search('http(\S*)',mp3url)
if mp3url:
mp3url=mp3url.group(0)
try:
mp3url=mp3url.decode('gbk')
except:pass
urllist.append(mp3url)
lineno+=2
return urllist
def downmp3(url,author,name,filelist):
filename=author+"-"+name;
for i in filelist:
name=unicode(i,locale.getpreferredencoding())
if name.find(filename) == 0:
print u"檔案已經下載,忽略。"
return 1
urllists=getdownurl(url)
for i in urllists:
print u"正在串連",i
ext=i[-4:]
try:
urlopen = urllib.URLopener()
fp=urlopen.open(i)
data = fp.read()
fp.close()
filename=filename+ext;
file=open(filename,'w+b')
file.write(data)
file.close()
print u"下載成功!"
return 1
except:
continue
return 0
if __name__ == "__main__":
conn = httplib.HTTPConnection('list.mp3.baidu.com')
conn.request("GET",'/list/newhits.html?id=1')
response = conn.getresponse()
html=response.read().decode('gbk')
conn.close()
expression='<a href="http://mp3.baidu.com/m(.*)</a>'
listSentence = re.findall(expression, html)
lineno=0
while lineno<len(listSentence):
url=re.search('(.*)target',listSentence[lineno])
url='/m'+url.group(0)[:-8]
name=re.search('blank>(.*)',listSentence[lineno])
name=name.group(0)[6:]
author=re.search('blank>(.*)',listSentence[lineno+1])
author=author.group(0)[6:]
print u"開始下載",author,name
filelist=os.listdir('.');
if downmp3(url,author,name,filelist)==0:
print u"下載",author,name,u'失敗!'
lineno+=2