Code:
# _*_ Coding:utf-8 _*_import urllib2import refrom datetime import Datetimeclass qsbk:def __init__ (self): SELF.P Ageindex = 1 self.user_agent = ' mozilla/4.0 (compatible; MSIE 5.5; Windows NT) ' self.headers = {' user-agent ': self.user_agent} self.stories = [] self.enable = False def getpage (self,pageindex): Try:url = ' http://www.qiushibaike.com/hot/page ' +str (pageIndex) Request = Urllib2. Request (Url,headers = self.headers) response = Urllib2.urlopen (request) Pagecode = Response.read (). D Ecode (' Utf-8 ') return Pagecode except Urllib2. Urlerror,e:if hasattr (E, ' reason '): Print U "QSBK connect Error,reason:", E.reason Return None def getpageitems (self,pageindex): Pagecode = Self.getpage (pageIndex) if not pagecode: Print "Page Loading Error ..." return None pattern = re.compile (' <div.*?author CLearfix ">.*?<a.*? (. *?) </a>.*?<a.*?
Python crawler Combat (a): Crawl embarrassing encyclopedia jokes