Crawl the top 15 of the 24-hour hot topic, crawl content, Please save to txt file, you need to crawl the number of reading
1 #Coding=utf-82 fromSeleniumImportWebdriver3 ImportUnitTest4 fromTimeImportSleep5 6 classWeibo (unittest. TestCase):7 8 defsetUp (self):9Self.dr =Webdriver. Chrome ()TenSelf.hot_list =self.get_weibo_hot_topic () oneSelf.weibo_topic =self.get_top_rank_file () a - defget_weibo_hot_topic (self): -Self.dr.get ('http://weibo.com/') theSleep (5) -Self.login ('[email protected]','kemi_xxxx') #微博帐号密码 -Self.dr.get ('http://d.weibo.com/100803?refer=index_hot_new') -Sleep (5) +Hot_topic_list = [] -i =0 + whileI < 15: aRank_and_topic = Self.dr.find_elements_by_css_selector ('. Title. W_autocut') [i].text#positioning rankings and topics atNumber = Self.dr.find_elements_by_css_selector ('. number') [i].text#number of targeted readings - hot_topic_list.append ([rank_and_topic, number]) -i + = 1 - returnhot_topic_list - - defget_top_rank_file (self): inSelf.file_title ='Twitter 24-hour Hot Topic' -Self.file = Open (self.file_title +'. txt','WB') to forIteminchself.hot_list: +Separate_line ='~~~~~~~~~~~~~~~~~~~~~~~~\n' #分隔线 -Self.file.write (separate_line.encode ('Utf-8')) theSelf.file.write ((item[0]+' '+'Number of Readings:'+item[1]+'\ n'). Encode ('Utf-8')) * self.file.close () $ Panax Notoginseng deflogin (self, username, password): -Self.dr.find_element_by_name ('username'). Clear () theSelf.dr.find_element_by_name ('username'). Send_keys (username) +Self.dr.find_element_by_name ('Password'). Send_keys (password) aSelf.dr.find_element_by_css_selector ('. Info_list.login_btn'). Click () the + deftest_weibo_topic (self): - Pass $ Print('Crawl Complete') $ - defTearDown (self): - self.dr.quit () the - if __name__=='__main__':WuyiUnittest.main ()
The page is as Follows:
Generate the TXT file as Follows:
Use Python+selenium to crawl the top 15 of Twitter's 24-hour hot topics and save them in txt