The example in this paper describes how the Scrapy custom pipeline class implements the method of saving the collected data to MongoDB. Share to everyone for your reference. Specific as follows:
# Standard Python Library imports# 3rd party modulesimport pymongofrom scrapy import logfrom scrapy.conf import SETTINGSFR Om scrapy.exceptions Import dropitemclass mongodbpipeline (object): def __init__ (self): self.server = Settings [' Mongodb_server '] Self.port = settings[' Mongodb_port '] self.db = settings[' mongodb_db '] self.col = settings[' mongodb_ COLLECTION '] connection = Pymongo. Connection (Self.server, self.port) db = connection[self.db] self.collection = db[self.col] def process_ Item (self, item, spider): err_msg = " for field", Data in Item.items (): If not data: err_msg + = ' Missin G%s of poem from%s\n '% (field, item[' URL ')) if err_msg: raise Dropitem (err_msg) Self.collection.insert (Dict (item)) Log.msg (' Item written to MongoDB database%s/%s '% (self.db, self.col), Level=log. DEBUG, Spider=spider) return item
Hopefully this article will help you with Python programming.