Link MySQL to do a little exercise: crawl http://wufazhuce.com on the problem, description and answer, stored in the local database.
Data table structure:
CREATE TABLE' questions ' (' title 'varchar( -)DEFAULT NULL, ' description 'varchar( $)DEFAULT NULL, ' answers 'varchar( -)DEFAULT NULL, ' url 'varchar( -)DEFAULT NULL, ' Daynum 'varchar( -)DEFAULT NULL) ENGINE=InnoDBDEFAULTCHARSET=Utf8
View Code
Code:
#Author:Importpymysql.cursorsImportRequests fromBs4ImportBeautifulsoupcon= Pymysql.connect (host='192.168.86.130', user='Root', password='Letmein', db='0603simon', port=3306, CharSet='UTF8')#With con.cursor () as cur:#sql = ' Show tables '#result = Cur.execute (SQL)#print (Result)#exit ()Cur =con.cursor () forP_numinchRange (1, 1872): #For P_num in range (8,9):URL ='http://wufazhuce.com/question/%s'%p_num Response= Requests.get (url=URL) response.encoding=response.apparent_encoding Soup= BeautifulSoup (Response.text, features="Html.parser") #print (soup)tar = Soup.find ('Div', class_='one-cuestion') #print (TAR) if notTar:Print('Not tar') SQL=" "INSERT INTO questions (Title,description,answers,url,daynum) values (' 404 ', ' 404 ', ' 404 ', '%s ', '%s ')" "%(URL, p_num)Print(SQL) result=cur.execute (SQL) Con.commit ()Print('Execution Result:'+Str (result))Continuetitle= Tar.find ('h4'). Text.strip ()#Print (title) #exit ()desc = Soup.find ('Div', class_='Cuestion-contenido'). Text.strip ()#Print (DESC)Ans = soup.find_all ('Div', class_='Cuestion-contenido') forIndex, answerinchEnumerate (ans):ifindex = =0:Continue #print (Answer.text.strip ())Answer =Answer.text.strip ()#print (URL) ifAnswer.__len__() > 1800: Answer= answer[0:1800] SQL=" "INSERT INTO questions (Title,description,answers,url,daynum) VALUES (%s,%s,%s, '%s ', '%s ')" "%(Con.escape (title), Con.escape (DESC), Con.escape (answer), URL, p_num)Print(SQL) result=cur.execute (SQL) Con.commit ()Print('Execution Result:'+Str (result)) Cur.close () con.close ()
View Code
Python uses MySQL