標籤:[1] tps int [] ext download top 面向 pen
寫了兩個版本:
1、面向過程版本:
import requestsfrom pyquery import PyQuery as pqurl=‘https://movie.douban.com/top250‘moves=[]def sec(item): return item[1]for i in range(0,255,25): content=requests.get(url+"?start="+str(i))#?start=25 for movie in pq(content.text).find(‘.item‘): moves.append([pq(movie).find(‘.title‘).html(),pq(movie).find(‘.rating_num‘).html()])moves.sort(key=sec,reverse=True)for move in moves: print(move[0],move[1])
2、物件導向版本:
import requestsfrom pyquery import PyQuery as pqclass Douban: def __init__(self): self.moves=[] def geturl(self): url=‘https://movie.douban.com/top250?start=%s‘ urls=[] for i in range(0,250,25): urls.append(url%i) return urls def downloader(self,url): r=requests.get(url) return r.text def html_parser(self,page): for movie in pq(page).find(‘.item‘): title=pq(movie).find(‘.title‘).html() score=pq(movie).find(‘.rating_num‘).html() self.moves.append({ ‘title‘:title, ‘score‘:score, }) def output(self): self.moves.sort(key=lambda x:x[‘score‘],reverse=True) for move in self.moves: print(move[‘title‘],move[‘score‘]) def start(self): for url in self.geturl(): #print(url) page=self.downloader(url) self.html_parser(page) self.output()dou=Douban()dou.start()
python爬蟲爬取豆瓣電影前250名電影及評分(requests+pyquery)