Scrapy save MySQL or MONGO, and picture download saved

Source: Internet
Author: User

#-*-Coding:utf-8-*-# Define Your item pipelines here## Don ' t forget to add your pipeline to the Item_pipelines setting # see:https://doc.scrapy.org/en/latest/topics/item-pipeline.htmlimport Pymongoimport pymysqlfrom scrapy Import Requestfrom scrapy.exceptions Import dropitemfrom scrapy.pipelines.images import Imagespipelineclass Images360pipeline (object): Def process_item (self, item, spider): Return item# MONGO Dbclass mongopipeline (object ): Def __init__ (self, Mongo_url, mongo_db): Self.mongo_url = Mongo_url self.mongo_db = mongo_db @class            Method Def from_crawler (CLS, crawler): Return CLS (Mongo_url=crawler.settings.get (' Mongo_url '), Mongo_db=crawler.settings.get (' mongo_db ')) def open_spider (self, spider): Self.client = Pymongo. Mongoclient (self.mongo_url) self.db = self.client[self.mongo_db] def process_item (self, item, spider): SE Lf.db[item.collection].insert (Dict (item)) Return ITem def close_spider (Self, Spider): Self.client.close () # Mysqlclass Mysqlpipeline (object): Def __init__ (self,        Host, database, user, password, port): Self.host = host Self.database = Database Self.user = user Self.password = password Self.port = Port @classmethod def from_crawler (CLS, crawler): Return CLS            (Host=crawler.settings.get (' Mysql_host '), Database=crawler.settings.get (' mysql_database '), User=crawler.settings.get (' Mysql_user '), Password=crawler.settings.get (' Mysql_password '), PORT=CR Awler.settings.get (' Mysql_port ')) def open_spider (self, spider): self.db = Pymysql.connect (Self.host, SE Lf.user, Self.password, self.database, charset= ' UTF8 ', port=self.port) self.cursor = Self.db.cursor () def close_  Spider (Self, Spider): Self.db.close () def process_item (self, item, spider): data = Dict (item) keys = ', '. Join (Data.keys ())        Value = ', '. Join (['%s '] * LEN (data)) sql = ' insert into%s (%s) values (%s) '% (item.table, keys, value) Self.cursor.execute (SQL, tuple (data.values ())) Self.db.commit () return item# download Picture class Imagepipeline (Im Agespipeline): Def file_path (self, request, Response=none, info=none): url = request.url file_name = URL. Split ('/') [-1] return file_name # If the picture download fails, do not save the database, Images_store = ' saved file name such as:./images ' def item_completed (SE            LF, results, item, info): image_paths = [x[' path '] for OK, X-results if OK] if not image_paths: Raise Dropitem (' Image downloaded Failed ') return item def get_media_requests (self, item, info): Yield R Equest (item[' url ')

settings.py Configuration

# only part of the code is listed, first executed imagepipelineitem_pipelines = {    ' images360.pipelines.ImagePipeline ':    Images360.pipelines.MongoPipeline ': 301,    ' images360.pipelines.MysqlPipeline ': 302,}max_page = 50mongo_url = ' localhost ' mongo_db = ' images360 ' bot_name = ' images360 ' mysql_host = ' localhost ' mysql_database = ' images360 ' Mysql_user = ' Root ' Mysql_password = ' 123456 ' mysql_port = ' 3306 ' # download picture save path Image_store = './images ' spider_modules = [' Images360.spiders ']newspider_module = ' images360.spiders ' # Crawl responsibly by identifying yourself (and your website) o n the user-agent# user_agent = ' images360 (+http://www.yourdomain.com) ' # Obey robots.txt Rulesrobotstxt_obey = False

  

Scrapy save MySQL or MONGO, and picture download saved

Related Article

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.