Get the ID nickname and homepage address of all users of the heap sugar website
#!/usr/bin/env python#-*-encoding:utf-8-*-#Created on 2016-06-21 13:57:13#Project:duitang fromPyspider.libs.base_handlerImport*classHandler (Basehandler): Crawl_config={} @every (minutes=24 * 60) defOn_Start (self): Self.crawl ('http://www.duitang.com/napi/friendship/fans/?start=0&limit=1000&user_id=116965', callback=self.index_page) @config ( age=10 * 24 * 60 * 60) defindex_page (Self, Response): foreachinchresponse.json['Data']['object_list']: ID= each['ID'] Self.crawl ('http://www.duitang.com/napi/friendship/fans/?start=0&limit=1000&user_id='+str (ID), callback=self.index_page) Self.crawl ('http://www.duitang.com/napi/people/profile/?user_id='+str (ID), callback=self.detail_page) Start= response.json['Data']['Next_start'] Total= response.json['Data'][' Total'] User= response.json['Data']['Visit_user']['user_id'] ifStart <Total:self.crawl ('http://www.duitang.com/napi/friendship/fans/?start='+str (start) +'&limit=1000&user_id='+STR (user), callback=self.index_page) @config ( priority=2) defdetail_page (Self, Response):return { "username": response.json['Data']['username'], "ID": response.json['Data']['ID'] }
Crawling and parsing JSON strings with Pyspider