1 #-*-coding:utf-8-*-2 Importscrapy3 fromPyqueryImportPyquery as PQ4 5 fromZolphone.itemsImportZolphoneitem6 7 8 classPhonespider (scrapy. Spider):9Name ="Phone"Ten #allowed_domains = ["www.zol.com.cn"] One #start_url = ' http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_1.html ' AStart_url ='http://detail.zol.com.cn/cell_phone_index/subcate57_0_list_1_0_1_1_0_' - - defstart_requests (self): the - forPageinchRange (1, 209): -url = self.start_url + str (page) +'. html' - yieldScrapy. Request (url,callback=Self.parse_index) + - + defParse_index (Self, Response): ABase_url ='http://detail.zol.com.cn' atDoc =PQ (Response.text) -Lis = Doc ('. List-box. List-item'). Items () - forResultinchlis: -Detail_url = Base_url + result.find ('. Pro-intro H3 a'). attr ('href') - yieldScrapy. Request (Url=detail_url, callback=self.parse_detail) - in defParse_detail (self,response): -Doc =PQ (Response.text) toTitle1 = Response.css ('. Page-title H1::text'). Extract_first () +Title2 = Doc ('. Page-title H2'). Text () -Price = Doc ('. Product-price. Price-type'). Text () theRelease_time = Doc ('. Section div h3. Showdate'). Text () * Print(Title1, Title2, Price, release_time) $item =Zolphoneitem ()Panax Notoginsengitem['Title1'] =Title1 -item['Title2'] =Title2 theitem[' Price'] = Price +item['Release_time'] =Release_time A the yieldItem
1 Importscrapy2 3 4 classZolphoneitem (scrapy. Item):5 #Define the fields for your item here is like:6 #name = Scrapy. Field ()7Title1 =Scrapy. Field ()8Title2 =Scrapy. Field ()9Price =Scrapy. Field ()TenRelease_time = Scrapy. Field ()
Scrapy crawl Zhongguancun Online mobile channel