Get the cool dog Top 100
http://www.kugou.com/yy/rank/home/1-8888.html
ranking
Documents && Singers
Duration
Effect:
Attached Source:
Import timeimport jsonfrom bs4 import beautifulsoupimport requestsclass Kugou (object): Def __init__ (self): self. Header = {"User-agent": ' mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:60.0) gecko/20100101 firefox/60.0 ' Def getInfo (self, url): html = requests.get (URL, Headers=self.hea Der) soup = BeautifulSoup (html.text, ' Html.parser ') # Print (Soup.prettify ()) ranks = Soup.select ('. PC _temp_num ') titles = Soup.select ('. Pc_temp_songlist > Ul > li > A ') # layer label Find times = Soup.select (' . Pc_temp_time ') for rank, title, songtime in Zip (ranks, titles, times): data = {# rank full Printing is ' rank ' with HTML Tags: rank.get_text (). Strip (), ' title ': Title.get_text (). Split ('-') [1].strip ( ), ' singer ': Title.get_text (). Split ('-') [0].strip (), ' Songtime ': Songtime.get_text (). Strip () } s = str (data) print (' Rank:%2s\t'% data[' rank '], ' title:%2s\t '% data[' title ', ' Singer:%2s\t '%data[' singer '], ' songtime:%2s\t '% data[' songtime ']) With open (' Hhh.txt ', ' a ', encoding= ' UTF8 ') as f:f.writelines (s + ' \ n ') if __name__ = = ' __main__ ': URLs = [' http://www.kugou.com/yy/rank/home/{}-8888.html '. Format (str (i)) for I in range ()] Kugou = Kugou () For-url in urls:kugou.getInfo (URL) time.sleep (1)
Partial code parsing
--------------------------------------------------------------------
URLs = [' http://www.kugou.com/yy/rank/home/{}-8888.html '. Format (str (i)) for I in range (1, 5)]
For I in URLs:
Print (i)
Results Print:
Http://www.kugou.com/yy/rank/home/1-8888.html
Http://www.kugou.com/yy/rank/home/2-8888.html
Http://www.kugou.com/yy/rank/home/3-8888.html
Http://www.kugou.com/yy/rank/home/4-8888.html
--------------------------------------------------------------------
For rank, title, songtime in Zip (ranks, titles, times):
data = {
# rank full print is the HTML tag
' Rank ': Rank.get_text (). Strip (),
' title ': Title.get_text (). Split ('-') [0].strip (),
' Singer ': Title.get_text (). Split ('-') [1].strip (),
' Songtime ': Songtime.get_text ()
}
Print (data[' rank ')
Print (data[' title ')
Print (data[' singer ')
Print (data[' songtime ')
Results Print:
1
Flying in your
Xusong
4:04
--------------------------------------------------------------------
For rank, title, songtime in Zip (ranks, titles, times):
data = {
# rank full print is the HTML tag
' Rank ': rank,
' title ': Title,
' Songtime ': songtime
}
Print (data[' rank ')
Print (data[' title ')
Print (data[' songtime ')
Results Print:
<span class= "Pc_temp_num" >
<strong>1</strong>
</span>
<a class= "Pc_temp_songname" data-active= "playdwn" data-index= "0" hidefocus= "true" href= "http://www.kugou.com/ Song/pjn5xaa.html "Title=" Xu Song-Speeding at You "> Xu Song-flying in your </a>
<span class= "Pc_temp_time" >4:04 </span>
Python instance---get cool dog music Top100