Dynamic page Emulation click
#!/usr/bin/env python#-*-Coding:utf-8-*-# Python's Test moduleImport UnitTestFrom seleniumImport WebdriverFrom BS4Import BeautifulSoupClassDouyuselenium(UnitTest. TestCase):# initialization methodDefSetUp(self): Self.driver = Webdriver. PHANTOMJS ()#具体的测试用例方法, be sure to start with testDefTestdouyu(self): Self.driver.get (' Http://www.douyu.com/directory/all ')WhileTrue:# specify XML parsing soup = BeautifulSoup (Driver.page_source,' XML ')# return the current page All room titles list and audience list titles = Soup.find_all (' H3 ', {' Class ':' Ellipsis '}) Nums = Soup.find_all (' Span ', {' Class ':' Dy-num fr '})# Use the zip () function to merge the list and create a list of tuple pairs [(3,4)]For title, numIn Zip (nums, titles):print u "number of spectators:" + num.get_text (). Strip (), u "\ T Room title:" + Title.get_text (). Strip () # page_source.find () No content found returns-1 if driver.page_source.find ( Shark-pager-disable-next ')! =- 1: break # simulate next page click Self.driver.find_element_by_class_name ( Shark-pager-next '). Click () # the cleanup method when exiting def teardown (self): print " Loading complete ... ' self.driver.quit () if __name__ = = "__main__": Unittest.main ()
Crawler Dynamic HTML processing (selenium and PHANTOMJS) dynamic page simulation Click