Refer to the Bookworm http://www.cnblogs.com/fnng/p/3576154.html
#automatically visit a URL fromSeleniumImportWebdriverImportTimem= 100000I=0URL='http://www.yyxxww.com/html/2015/edu_0318/3386.html'Browser= Webdriver. Firefox ()#browser name, subject to native installation whileI <m:browser.get (URL) time.sleep (1) I+ = 1browser.quit ()Print 'this time Python has opened a total of'I'Times' #extract first-level headingsImportUrllib2 fromSgmllibImportSgmlparserurl='http://www.yyxxww.com/html/2015/edu_0318/3386.html' classListName (sgmlparser):def __init__(self): Sgmlparser.__init__(self) self.is_h4=""Self.name= [] defStart_h4 (Self, attrs): Self.is_h4= 1defEnd_h4 (self): Self.is_h4="" defHandle_data (self, text):ifSelf.is_h4 = = 1: Self.name.append (text) content=Urllib2.urlopen (URL). Read () ListName=listname () listname.feed (content) forIteminchListname.name:PrintItem.decode ('GBK'). Encode ('UTF8') #visit Baidu, and fill out the form, Chinese temporarily difficult to solve, English no problem#coding = Utf-8Importsysreload (SYS) sys.setdefaultencoding ('UTF8') fromSeleniumImportWebdriverbrowser=Webdriver. Firefox () Browser.get ("http://www.baidu.com") browser.find_element_by_id ("kw"). Send_keys ("Hello"). Decode ('GBK'). Encode ('gb2312') browser.find_element_by_id ("su"). Click () time.sleep (60L#Sleep 3 secondsBrowser.quit ()
Python Crawler Notes (not updated regularly)