豆瓣電台加心歌曲自動下載（python實現）

最後更新：2018-12-05 來源：互聯網

上載者：User

創建阿里雲帳戶，並獲得超過 40 款產品的免費試用版；而企業帳戶則可以享有總值 $1200 的免費試用版。立即註冊！

最近寫了個豆瓣個人電台自動下載加心歌曲的小程式，基本能夠下載，但需要手動將"http://douban.fm/mine?type=liked"頁面全都下載下來，有點蛋疼，- - !!。由於還沒有實現程式登入豆瓣的功能，暫時先這樣用吧。

#!/usr/bin/python2.7# -*- coding:utf -*-import urllibimport reimport socketimport cookielibimport urllib2socket.setdefaulttimeout(1)def getpag(url):done = Falsetry:response = urllib.urlopen(url)re = response.read()done = Trueexcept Exception as e:print "error in getpag({0})".format(url)if done: return re else: return ""def removehtml(s):p = re.compile(r'(<.*?>)|(&.*?;)', re.S)return p.sub("", s)def removeotherword(s):p = re.compile(r'((([\(\[{])|(\xef\xbc\x88)).*?(([\)\]}])|(\xef\xbc\x89)))|(^\s+)|(\s+$)')return p.sub("", s)# login douban & sv cookie# tododef logindouban():loginurl = "http://www.douban.com/accounts/login"data = urllib.urlencode({'source':'simple','form_email':'vodmaker@gmail.com','form_password':'xxx','remember':'on',})print datacj = cookielib.CookieJar()opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cj))urllib2.install_opener(opener)opener.addheaders = [("User-agent", "Mozilla/5.0 (X11; Linux i686; rv:2.0.1) Gecko/20100101 Firefox/4.0.1")]req = urllib2.Request(loginurl, data)print urllib2.urlopen(req).geturl() ## s1 match exactly to s2def matchexact(s1, s2):ps1 = re.compile(s1, re.I)if ps1.search(s2) is not None:return Trueelse:return False# s1 match to s2def matchmost(s1, s2):s1 = re.compile(r'\s').sub("\\s*", s1)ps1 = re.compile(s1, re.I)if ps1.search(s2) is not None:return Trueelse:return False# [[name, artist], ...] per list_pagdef getmusiclist_perpag(list_pag):ptable = re.compile(r'<table\s*class="olts"\s*width="100%">.*?</table>', re.S)htable = ptable.search(list_pag).group()ptbody = re.compile(r'<tbody>.*?</tbody>', re.S)htbody = ptbody.search(htable).group()ptr = re.compile(r'<tr>\s*<td>(.*?)</td>.*?<span>(.*?)</span>.*?</tr>', re.S)musiclist = []for m in ptr.finditer(htbody):print "music:" + removeotherword(m.group(1)) + "artist:" + removeotherword(m.group(2))musiclist.append([removeotherword(m.group(1)), removeotherword(m.group(2))])return musiclist# download music from mp3.yahoo.com# parameter muscilist [[name, artist], ...]def downloadfromyahoo(musiclist):listurl = "http://music.yahoo.cn/s?q={0}&m=0"for ma in musiclist:music = ma[0]artist = ma[1]print "Music:\t" + music + "\tArtist:\t" + artist + "is Downloading..."u = listurl.format(urllib.quote_plus(music))listpag = getpag(u)ptable = re.compile(r'<div class="yst-music">.*?</table>', re.S)if ptable.search(listpag) is None:print "No search result of {0} in yahoo.cn".format(music)continuehtable = ptable.search(listpag).group()ptr = re.compile(r'<tr>\s*<td class="m_song">\s*<a href=".*?url=(.*?)"' r'.*?>(.*?)</a>' r'.*?<td class="m_singer">.*?>(.*?)</a>' r'.*?<td.*?<td>(.*?)</td>' r'.*?<td>(.*?)[mM][bB]' r'.*?</tr>' , re.S);find = Falsefor m in ptr.finditer(htable):downurl = urllib.unquote(m.group(1))music_t = removeotherword(removehtml(m.group(2)))artist_t = removeotherword(removehtml(m.group(3)))type_t = removeotherword(removehtml(m.group(4)))size_t = removeotherword(removehtml(m.group(5)))if matchexact(music_t, music) and matchexact(artist_t, artist) and float(size_t) > 2:print "download from :" + downurl + ""try:music_stream = urllib.urlopen(downurl).read()open("./down/"+music+"."+type_t, "wb").write(music_stream)find = Trueprint "download success: music:{0}, artist:{1}".format(music, artist)breakexcept Exception as e:continueprint eif not find:for m in ptr.finditer(htable):downurl = urllib.unquote(m.group(1))music_t = removeotherword(removehtml(m.group(2)))artist_t = removeotherword(removehtml(m.group(3)))type_t = removeotherword(removehtml(m.group(4)))size_t = removeotherword(removehtml(m.group(5)))if matchmost(music_t, music) and matchmost(artist_t, artist) and float(size_t) > 1:print "download from :" + downurl + ""try:music_stream = urllib.urlopen(downurl).read()open("./down/"+music+"."+type_t, "wb").write(music_stream)find = Trueprint "download success: music:{0}, artist:{1}".format(music, artist)breakexcept Exception as e:continueprint eif not find:print "download failed: music:{0}, artist:{1}".format(music, artist) # end downloadfromyahoo funcmusiclist = []for i in range(17):f = open("{0}.html".format(i), "r")listp = f.read()musiclist += getmusiclist_perpag(listp)downloadfromyahoo(musiclist)

貌似豆瓣有屏蔽程式訪問頁面的措施，目前仍糾結於如何?登入的部分，不能保證一定能解決登入豆瓣自動抓取加心頁面的功能，程式更新期限未知。

本文章原先以中文撰寫並發佈於 aliyun.com，亦設英文版本，僅作資訊用途。本網站不對文章的準確性，完整性或可靠性或其任何翻譯作出任何明示或暗示的陳述或保證。如對該文章有任何疑慮或投訴，請傳送電郵至 info-contact@alibabacloud.com 並提供相關疑慮或投訴的詳細說明。職員會於 5 個工作天內與您聯絡，一經驗證之後，即會刪除該侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

豆瓣電台加心歌曲自動下載（python實現）

聯繫我們

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support