標籤:ref gecko temp ongl soup 尋找 nbsp image sel
擷取酷狗TOP 100
http://www.kugou.com/yy/rank/home/1-8888.html
排名
檔案&&歌手
時間長度
效果:
附源碼:
import timeimport jsonfrom bs4 import BeautifulSoupimport requestsclass Kugou(object): def __init__(self): self.header = { "User-Agent": ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) Gecko/20100101 Firefox/60.0‘ } def getInfo(self, url): html = requests.get(url, headers=self.header) soup = BeautifulSoup(html.text, ‘html.parser‘) # print(soup.prettify()) ranks = soup.select(‘.pc_temp_num‘) titles = soup.select(‘.pc_temp_songlist > ul > li > a‘) # 層層標籤尋找 times = soup.select(‘.pc_temp_time‘) for rank, title, songTime in zip(ranks, titles, times): data = { # rank 全列印就是帶HTML標籤的 ‘rank‘: rank.get_text().strip(), ‘title‘: title.get_text().split(‘-‘)[1].strip(), ‘singer‘: title.get_text().split(‘-‘)[0].strip(), ‘songTime‘: songTime.get_text().strip() } s = str(data) print(‘rank:%2s\t‘ % data[‘rank‘], ‘title:%2s\t‘ % data[‘title‘], ‘singer:%2s\t‘ %data[‘singer‘], ‘songTime:%2s\t‘ % data[‘songTime‘]) with open(‘hhh.txt‘, ‘a‘, encoding=‘utf8‘) as f: f.writelines(s + ‘\n‘)if __name__ == ‘__main__‘: urls = [ ‘http://www.kugou.com/yy/rank/home/{}-8888.html‘.format(str(i)) for i in range(30) ] kugou = Kugou() for url in urls: kugou.getInfo(url) time.sleep(1)
部分代碼解析
--------------------------------------------------------------------
urls = [‘http://www.kugou.com/yy/rank/home/{}-8888.html‘.format(str(i)) for i in range(1, 5)]
for i in urls:
print(i)
結果列印:
http://www.kugou.com/yy/rank/home/1-8888.html
http://www.kugou.com/yy/rank/home/2-8888.html
http://www.kugou.com/yy/rank/home/3-8888.html
http://www.kugou.com/yy/rank/home/4-8888.html
--------------------------------------------------------------------
for rank, title, songTime in zip(ranks, titles, times):
data = {
# rank 全列印就是帶HTML標籤的
‘rank‘: rank.get_text().strip(),
‘title‘: title.get_text().split(‘-‘)[0].strip(),
‘singer‘: title.get_text().split(‘-‘)[1].strip(),
‘songTime‘: songTime.get_text()
}
print(data[‘rank‘])
print(data[‘title‘])
print(data[‘singer‘])
print(data[‘songTime‘])
結果列印:
1
飛馳於你
許嵩
4: 04
--------------------------------------------------------------------
for rank, title, songTime in zip(ranks, titles, times):
data = {
# rank 全列印就是帶HTML標籤的
‘rank‘: rank,
‘title‘: title,
‘songTime‘: songTime
}
print(data[‘rank‘])
print(data[‘title‘])
print(data[‘songTime‘])
結果列印:
<span class="pc_temp_num">
<strong>1</strong>
</span>
<a class="pc_temp_songname" data-active="playDwn" data-index="0" hidefocus="true" href="http://www.kugou.com/song/pjn5xaa.html" title="許嵩 - 飛馳於你">許嵩 - 飛馳於你</a>
<span class="pc_temp_time">4:04 </span>
Python執行個體---擷取酷狗音樂Top100