Top 15 of Twitter 24-hour hot topics , please save the content of the crawl to TXT file, you need to crawl the number of reading
#coding =utf-8from selenium import webdriverimport unittestfrom time import Sleepclass weibo (unittest. TestCase):d Ef setup (self): Self.dr = webdriver. Chrome () self.hot_list = self.get_weibo_hot_topic () self.weibo_topic = self.get_top_rank_ File () Def get_weibo_hot_topic (self): self.dr.get (' http://weibo.com/') sleep (5) self.login (' [email Protected] ', ' kemi_xxxx ') #微博帐号密码self. Dr.get (' http://d.weibo.com/100803?refer=index_hot_new ') # Hot Topics Urlsleep (5) hot_topic_list = []i = 0while i < 15:rank_and_topic = self.dr.find_elements_by_css_selector ('. Title. W_autocut ') [i].text #定位排行和话题number = self.dr.find_elements_by_css_selector ('. Number ') [I].text #定位阅读数hot_topic_list. Append ([Rank_and_topic, number]) i += 1return hot_topic_listdef get_top_rank_file (self):self.file_title = ' Weibo 24-hour hot topic ' Self.file = open(self.file_title + '. txt ', ' WB ') for item in self.hot_list:separate_line = ' ~~~~~~~~~~~~~~~~~~~~~~~~\n ' Self.file.write (Separate_line.encode (' Utf-8 ')) Self.file.write ((item[0]+ ' ' + ' reading: ' +item[1]+ ' \ n '). Encode (' Utf-8 ') self.file.close () Def login (self, username, Password): Self.dr.find_element_by_name (' username '). Clear () self.dr.find_element_by_name (' username '). Send_keys ( username) self.dr.find_element_by_name (' Password '). Send_keys (password) self.dr.find_element_by_css_selector ('. Info_list.login_btn '). Click () def test_weibo_topic (self):p assprint (' crawl Complete ') Def teardown (self): Self.dr.quit () if __name__== ' __main__ ': Unittest.main ()
The page is as follows:
650) this.width=650; "Src=" http://images2015.cnblogs.com/blog/942023/201612/942023-20161215234614667-550311380. PNG "alt=" 942023-20161215234614667-550311380.png "/>
Generate the TXT file as follows:
650) this.width=650; "Src=" http://images2015.cnblogs.com/blog/942023/201612/942023-20161215234651292-1790454800. PNG "alt=" 942023-20161215234651292-1790454800.png "/>
This article comes from "No idea, no achievement!" "Blog, be sure to keep this provenance http://kemixing.blog.51cto.com/10774787/1883205
Use Python+selenium to crawl the top 15 of Twitter's 24-hour hot topics and save them in txt