#!/usr/bi/env python#-*-coding:utf-8-*-import urllibimport urllib2import reclass Turtle (object): Def __init__ (self): Self.pageindex = 1 self.stories = [] self.enable = True Self.header = {' user-agent ': ' mozilla/ 4.0 (compatible; MSIE 5.5; Windows NT) ' self.enable = True ' get web content ' def getpage (self,pageindex): Try:ur L = ' http://www.qiushibaike.com/hot/page/' + str (pageIndex) request = Urllib2. Request (URL, headers = self.header) response = Urllib2.urlopen (Request) return Response.read (). decod E (' Utf-8 ') except URLLIB2. Urlerror,e:if hasattr (E, ' code '): Print U ' error code: ', E.code if Hasattr (E, ' reason '): Print U ' ERROR reason: ', E.reason ' gets the page inside the Satin ' def Getpageitem (self,pageindex): PageContent = self. GetPage (PageIndex) if not pagecontent:print U ' page load failed ... ' Return None pattern = Re.Compile (' <div.*?author.*?
Python crawler-Embarrassing reader