The code is as follows:
1 #Coding:utf-82 fromSeleniumImportWebdriver3 ImportRequests4 ImportSYS5 Import Time6 fromlxmlImportetree7 #Reload (SYS)8 #sys.setdefaultencoding (' utf-8 ')9 Ten classZhihu: One def __init__(self,homeurl): ASelf.homeurl =Homeurl - - defgetcookies (self): theBrowser =Webdriver. Chrome () -Browser.get ("Https://www.zhihu.com/signin") -Browser.find_element_by_css_selector (". Signflow-accountinput.input-wrapper Input"). Send_keys ("13060882373") -Browser.find_element_by_css_selector (". Signflow-password input"). Send_keys ("XXXXXX") +Browser.find_element_by_css_selector (". Button.signflow-submitbutton"). Click () -Time.sleep (3) + #js = "Window.scrollto (0, document.body.scrollHeight);" A #browser.execute_script (JS) at #Time.sleep (3) -cookies =browser.get_cookies () - browser.quit () - returnCookies - - defCrawl (self): ins =requests. Session () - s.headers.clear () to forCookiesinchSelf . GetCookies (): +S.cookies.set (cookie['name'], cookie['value']) -HTML =S.get (self.homeurl). Text theHtml_tree =etree. HTML (HTML) *Items = Html_tree.xpath ('//*[@id = "root"]/div/main/div/div/div[1]/div[2]/div//div[@class = "ContentItem answeritem"]/@data-zop') $ forIteminchItems:Panax Notoginseng #Print Item -Content =eval (item) theAuthorName = content['AuthorName'] +title = content['title'] A PrintAuthorName +"answered the following:"+title the + -Zhihu = Zhihu ('https://www.zhihu.com/') $Zhihu. Crawl ()
Python crawler--The selenium analog landing +requests. Session () Get cookies