1. Create a database with tools Scrapy
2. Create a Douban table in the Scrapy database
int primary key auto_increment, name varchar (+) NOT NULL, author varchar () null, press Varc Har (+) NULL, date varchar (() NULL, page varchar () NULL, price varchar (30
) NULL, score varchar (+) NULL, ISBN varchar (()) NULL, Author_profile varchar ( ) NULL, content_description varchar ($) NULL, link varchar (255) null)default Charset=utf8;
3. Set the parameters to the database in the Scrapy crawler code pipeline.py
#-*-coding:utf-8-*-# Define Your item pipelines here## Don't forget to add your pipeline to the Item_pipelines setting# See:http://doc.scrapy.org/en/latest/topics/item-pipeline.htmlImport JSON fromtwisted.enterprise Import Adbapi fromscrapy Import logimport mysqldbimport mysqldb.cursorsclassDoubanpipeline (Object): def __init__ (self): Self.file= Open ("./books.json","WB") def process_item (self, item, spider): # encoded conversion forKinchItem:item[k]= Item[k].encode ("UTF8") Line= Json.dumps (Dict (item), Ensure_ascii=false) +"\ n"Self.file.write (line)returnItemclassMysqlpipeline (Object): def __init__ (self): Self.dbpool= Adbapi. ConnectionPool ("MySQLdb", DB="scrapy", # database name user="Root", # Database user name passwd="qmf123456", # password Cursorclass=MySQLdb.cursors.DictCursor, CharSet="UTF8", Use_unicode=False) def process_item (self, item, spider): Query=self.dbpool.runInteraction (Self._conditional_insert, item) query.adderrback (Self.handle_error)returnItem def _conditional_insert (self, TB, item): Tb.execute ("INSERT INTO douban (name, author, press, date, page, price, score, ISBN, author_profile,\Content_description, link) values (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",\(item["name"], item["author"], item[" Press"], item["Date"], item["page"], item[" Price"], item["score"], item["ISBN"], item["Author_profile"], item["content_description"], item["Link"])) log.msg ("Item data in db:%s"% Item, level=log. DEBUG) def handle_error (self, E): Log.err (E)
Set in the setting.py file
4. Installing the MYSQLDB Drive
Mysql-python-1.2.3.win-amd64-py2.7.exe
To see if the driver was installed successfully:
5. Querying database information through Python's mysqldb
Import Mysqldbconn=mysqldb.connect (host="127.0.0.1", user="Root", passwd="qmf123456", db="scrapy") Cursor=conn.cursor () n= Cursor.execute ("Select COUNT (*) from Douban") forRowinchCursor.fetchall (): forRinchRow:print R
https://my.oschina.net/u/993130/blog/213617
Http://www.jb51.net/article/57290.htm
Http://www.cnblogs.com/sislcb/archive/2008/11/24/1339913.html
http://drizzlewalk.blog.51cto.com/2203401/448874
Scrapy crawler results into MySQL database