Crawl the top 12 movies in the watercress movie (http://movie.douban.com/nowplaying/chengdu/), and sort by rating, save to TXT file
#coding =utf-8from selenium import webdriverimport unittestfrom time import Sleepclass doubanmovie (unittest. TestCase): def setup (self): Self.dr = webdriver. Chrome () self.top_movie_list = self.get_douban_movies _TOP12 () self.movie = self.get_movie_top12_file () &NBSP;&NBSP;&NBSP;&NBSP;DEF&NBSP;GET_DOUBAN_MOVIES_TOP12 (self): "Get the top 12 film names and ratings of the watercress film in Chengdu" self.dr.get ("https:// movie.douban.com/nowplaying/chengdu/") sleep (3) movie_list = [] #定义空list为后面存放电影名字和电影评分作准备 i = 0 while i < 60: #12 *5=60 movie_name = self.dr.find_elements_by_css_selector ('. Lists li ') [i ].get_attribute (' Data-title ') #定位电影名字 Movie_grand = self.dr.find_elements_by_css_selector ('. Lists li ') [I].get_attribute (' Data-score ') #定位电影评分 movie_list.append ([Movie_name, Movie_grand]) #向空list追加插入获取的电影名字和电影评分 i += 5 #每个电影的li标签间隔为5个 movie_list.sort (key=lambda x:float (X[1]), reverse=true) #利用sort中key方法来根据电影评分高到低对所获取的电影进行排序 (sorted (movie_list, key=lambda movie: movic[2], reverse=true) # sort by movie_grand reverse) return movie_list def get_movie_top12_file ( Self): self.file_title = ' watercress film the top 12 films in Chengdu ' self.file = open (self.file_title + '. txt ', ' WB ') for item in self.top_movie_list: self.file.write (' Movie Name: ' + item[0 ') + ' ' + ' movie Rating: ' + item[1] + ' \ n '). Encode (' Utf-8 ')) self.file.close () def test_movie (self): pass print (" Get finished ") def teardown (self): Self.dr.quit () IF&NBSP;__NAME__&Nbsp;== ' __main__ ': unittest.main ()
650) this.width=650; "Src=" Http://s5.51cto.com/wyfs02/M00/8B/8B/wKioL1hQ_J-TKejAAAWWd9nc0uU397.png-wh_500x0-wm_3 -wmp_4-s_888900667.png "style=" Float:none; "title=" Qq20161214160134.png "alt=" wkiol1hq_ J-tkejaaawwd9nc0uu397.png-wh_50 "/>
650) this.width=650; "Src=" Http://s3.51cto.com/wyfs02/M02/8B/8F/wKiom1hQ_KCj8euSAACED_rYxFg265.png-wh_500x0-wm_3 -wmp_4-s_1186924762.png "style=" Float:none; "title=" Qq20161214160148.png "alt=" wkiom1hq_kcj8eusaaced_ Ryxfg265.png-wh_50 "/>
Note: There is no rating in the movie for 0 points.
This article comes from "No idea, no achievement!" "Blog, be sure to keep this provenance http://kemixing.blog.51cto.com/10774787/1882707
Use Python+selenium to crawl aggressively top 12 movies in watercress movies and sort by rating