To download the officialdom romance fiction as an example:
Specific code:
#Coding=utf-8ImportOSImportRe fromSeleniumImportWebdriver fromSelenium.common.exceptionsImporttimeoutexceptionImportSelenium.webdriver.support.ui as UIImport Time fromDatetimeImportdatetime fromSelenium.webdriver.common.action_chainsImportActionchains#From threading Import Thread fromPyqueryImportPyquery as PQImportLogFileImportUrllibclassDownfile (object):def __init__(self,websearch_url,novelname): Self.driver=Webdriver. PHANTOMJS ()#Self.driver.set_page_load_timeout (Ten)Self.driver.maximize_window () novel_name= Unicode (Novelname,'UTF8') LogFile= Os.path.join (OS.GETCWD (),'novel\\'+ Novel_name +'. txt') Self.log=logfile.logfile (LogFile) Self.websearch_url=Websearch_urldefScroll_foot (self):" "scroll bar to bottom: return:" "JS="" #How to use Chrome drive or PHANTOMJS crawl ifSelf.driver.name = ="Chrome" orSelf.driver.name = ='Phantomjs': JS="var q=document.body.scrolltop=10000" #how to use IE to drive crawl elifSelf.driver.name = ='Internet Explorer': JS="var q=document.documentelement.scrolltop=10000" returnself.driver.execute_script (JS)defscrapy_date (self): Self.driver.get (self.websearch_url) Htext= Self.driver.execute_script ("return Document.documentElement.outerHTML") dochtml=PQ (htext) Elements= Dochtml ('div[class= "Novel_list"]'). Find ('ul'). Find ('Li'). Find ('a') forEinchelements.items (): URL='http://www.shanxixsa.com/sxtvi/21/21051/'+e.attr ('href') txt= E.text (). Encode ('UTF8'). Strip ()Printtxt self.log.WriteLog (txt) self.driver.get (URL) shtext= Self.driver.execute_script ("return Document.documentElement.outerHTML") sdochtml=PQ (shtext) selements= Sdochtml ('div[id= "Novel_content"]') forSeinchSelements.items (): Stxt= Se.text (). Encode ('UTF8'). Strip () Self.log.WriteLog (stxt) obj= Downfile ('http://www.shanxixsa.com/sxtvi/21/21051/index.html','Officialdom') obj.scrapy_date ()#-*-coding:utf-8-*-ImportOSImportCodecsImportdatetimeImport TimeImportLogging#Encapsulating logging LogsclassLogFile:#def __init__ (self,filename): #self.filename = Os.path.join (OS.GETCWD (), fileName) #def writelog (self,message): #strmessage = ' \r\n%s:%s '% (time.strftime ('%y-%m-%d_%h-%m-%s '), message) #with open (Self.filename, ' a ') as F: #f.write (strmessage) #constructor FileName: file name def __init__(self,filename,level=logging.info): FH=logging. Filehandler (fileName) Self.logger=Logging.getlogger () Self.logger.setLevel (level)#formatter = logging. Formatter ('% (asctime) s:% (message) s ', '%y-%m-%d%h:%m:%s ')Formatter = logging. Formatter ('% (message) s','%y-%m-%d%h:%m:%s') Fh.setformatter (formatter) self.logger.addHandler (FH)defWritelog (self,message): self.logger.info (message)defWriteerrorlog (self,message): Self.logger.setLevel (logging. ERROR) self.logger.error (message)
Python download novel