#scrapy Crawl Web site IP#-*-coding:utf-8-*-Importscrapy fromXici.itemsImportXiciitemclassXicispiderspider (scrapy. Spider): Name="Xicispider"Allowed_domains= ["Www.xicidaili.com/nn"] Start_urls= ['http://www.xicidaili.com/nn/'] defParse (self, Response): Item=Xiciitem () foreachinchRESPONSE.CSS ('#ip_list TR'): IP= Each.css ('Td:nth-child (2):: Text'). Extract_first () port= Each.css ('Td:nth-child (3):: Text'). Extract_first ()ifIp:ip_port= IP +':'+Port item['Ip_port'] =Ip_portyieldItem
ImportPymongoclassXicipipeline (object): Collection_name='Scrapy_items' def __init__(self, Mongo_uri, mongo_db): Self.mongo_uri=Mongo_uri self.mongo_db=mongo_db#the from is often spelled wrong.@classmethoddefFrom_crawler (CLS, crawler):returnCLS (Mongo_uri=crawler.settings.get ('Mongo_uri'), mongo_db=crawler.settings.get ('mongo_db') ) defOpen_spider (self, spider): Self.client=Pymongo. Mongoclient (Self.mongo_uri) self.db=self.client[self.mongo_db]defClose_spider (self, Spider): Self.client.close ()defProcess_item (self, item, spider): Self.db[self.collection_name].insert (Dict (item))returnItem
Scrapy Crawl Web site IP