#-* * coding:utf-8-*-
Import Urllib2
Import re
Import MySQLdb
Import SYS
Reload (SYS)
sys.setdefaultencoding (' utf-8 ')
class SQL (object):
conn=mysqldb.connect (host= "localhost",
port=3306,
user= "Root",
passwd= "123",
db= "Test",
charset= "UTF8",) #转换成utf8若没有则会出来乱码
def insert (self,name,content): #函数的调用
cur=self.conn.cursor ()
cur.execute ("insert INTO Xiaoshuo VALUES (NULL, '%s ', '%s ')"% (name,content)) #变量插入的形式 Nam E is a parameter in a database table
cur.close ()
Self.conn.commit ()
mysql=sql () #实例化类
user_agent= ' mozilla/5.0 (Windows NT 10.0; WOW64; rv:49.0) gecko/20100101 firefox/49.0 '
url= "http://book.qidian.com/info/3608595"
headers={' user-agent ': user_agent}
Request=urllib2. Request (url,headers=headers)
Htmll=urllib2.urlopen (Request). Read ()
reg=re.compile (R ' <li data-rid= ". *?" ><a href= "(. *?)" target= "_blank" data-eid= "qd_g55" data-cid= ". *" title= ". *?" > (. *?) </a> ')
Titles=re.finditer (REG,HTMLL)
For I in titles:
Curl_=i.group (1)
Name=i.group (2)
print "Crawling%s"%name
Htmlll=urllib2.urlopen (' http: ' +curl_). Read ()
regs=re.compile (R ' <div class= "Read-content j_readcontent" >\s* ([\s\s]*?) \s*</div> ')
Contents=re.findall (regs,htmlll)
For m in contents:
content=m.replace (' <p> ', ' \ r \ n ')
Mysql.insert (name,content) #在数据库插入数据 name,content is different from the name in the upper function, which is the name that the crawler takes
print "completed%s"%name
Mysql.conn.close () #关闭数据库
MySQL Database small knowledge point
Create a table
Cur.execute ("CREATE TABLE student (ID int, name varchar (), class varchar (), age varchar (10))")
The content of the novel should be in the text format, without filling Len the longest Len is 255 type (str) If the character length is small, use varchar
#插入一条数据 #cur.execute ("INSERT into student values (' 2 ', ' Tom ', ' 3 Year 2 class ', ' 9 ')")
#修改查询条件的数据 #cur.execute ("Update student set class= ' 3 Year 1 class ' WHERE name = ' Tom '")
#删除查询条件的数据 #cur.execute ("Delete from student where age= ' 9 '")
Simple usage of the MYSQLDB module