Import pymysql
Import requests
From bs4 import BeautifulSoup
#pymysqllinked database
Conn=pymysql.connect(host=‘127.0.1‘, unix_socket=‘/tmp/mysql.sock‘,user=‘root’,passwd=‘19950311’,db=‘mysql’)
Cur=conn.cursor()
Cur.execute("USE scraping")
#Storage subtitle, content
Def store(title,content):
Cur.execute("insert into pages(title,content) values(\"%s\",\"%s\")",(title,content))
Cur.connection.commit()
Global links
Class QiuShi(object):
Def __init__(self, start_url):
Self.url=start_url
Def crawing(self):
Try:
Html=requests.get(self.url,‘lxml‘)
Return html.content
Except ConnectionError as e:
Return ‘‘
Def extract(self,htmlContent):
If len(htmlContent)>0:
Bsobj=BeautifulSoup(htmlContent,‘lxml‘)
#print bsobj
Jokes=bsobj.findAll(‘div‘,{‘class‘:‘article block untagged mb15’})
For j in jokes:
Text=j.find(‘h2‘).text
Content=j.find(‘div‘,{‘class‘:‘content‘}).string
If text != None and content != None:
# print text,content,database encoding is utf-8
Store(text.encode(‘utf-8‘),content.encode(‘utf-8‘))
Print text.encode(‘utf-8‘),content.encode(‘utf-8‘)
Print '------------------------------------------------ ------------------------------'
Else:
Print ‘‘
Def main(self):
Text=self.crawing()
Self.extract(text)
Try:
Qiushi=QiuShi(‘http://www.qiushibaike.com/‘)
Qiushi.main()
Finally:
#close cursor,connection
Cur.close()
Conn.close()
Python crawler Learning (2) __ grabbing embarrassing jokes, and storing them in MySQL database