python3.x: Periodically fetch page data into the database
ImportPymysqlImporturllib.request fromBs4ImportBeautifulSoup#Data Warehousing processingdefDODATAWLPC (JJDM, JJMC, DWJZ, DWJZRQ): R_code=0Print('Fund Information:'+ JJDM +','+ JJMC +','+ DWJZ +','+Dwjzrq)Try: #Open a database connectionconn = Pymysql.connect (host='localhost', user='Root', passwd='Lizm', db='pythondb', port=3306, charset='UTF8') #gets a cursorcursor =conn.cursor ()#whether the query data already existsSql_check ="""SELECT * from PYTHONDB.T_X01_WLPC WHERE dwjz= '"""+DWJZ+"""' and dwjzrq= '"""+ DWJZRQ +"""';""" Print('Sql_check>>>:'+Sql_check) Cursor.execute (sql_check) Results=Cursor.fetchall ()#determine if there is a record count ifLen (results) = =0:check_code=0Else: Check_code= 1ifCheck_code = =0:sql="""INSERT into PYTHONDB.T_X01_WLPC (jjdm,jjmc,dwjz,dwjzrq,oprdate) VALUES ('"""+JJDM+"""', '"""+ JJMC +"""', '"""+ DWJZ + """', '"""+ DWJZRQ +"""', sysdate ());""" Try: Print('Sql>>>:'+sql)#Execute SQL statementcursor.execute (SQL)#commit to database executionconn.commit () R_code=0except: #Rollback If an error occursconn.rollback () R_code= 1Else: R_code=0Print('Fund'+ JJMC +'Data already exists') Cursor.close ()#Close CursorsConn.close ()#Releasing database Resources except: R_code= 1Print("failed, exception") returnR_code#get information about the funddefgetjjinfor (Header_, url_):#returns an arrayR_info =[] Req= Urllib.request.Request (Url=url_, headers=header_) Res=Urllib.request.urlopen (req) HTML= Res.read (). Decode ('Utf-8') Soup= BeautifulSoup (HTML,'Html.parser') #CSS class gets the valueJJDM =Soup.find ('Div', class_='Funddetail-tit'). Find ('span', class_='Ui-num') R_info.append (Jjdm.get_text ())#Print (' Fund code: ' + jjdm.get_text ())Title_name= Soup.find ('Div', class_='Funddetail-tit') R_info.append (Title_name.text.split ('(') [0])#Print (' Fund name: ' + title_name.text.split (' (') [0]) #Get estimated net worth, unit value, net worth forDatanumsinchSoup.find_all ('DD', class_='datanums'): forJzs_inchDatanums.find_all ('span', class_='ui-font-large ui-color-red ui-num'): R_info.append (Jzs_.text)#print (' + jzs_.text)Gz_gztime= Soup.find (id='Gz_gztime') R_info.append (Gz_gztime.text.replace ('(',"'). Replace (')',"')) #print (' Estimated equity Date: ' + gz_gztime.text.replace (' (', ') '. Replace (') ', ' ) #the output class is the first P element in the ' dataItem02 ' labeldwjzrq_s = Soup.find ('DL', class_='dataItem02'). P R_info.append (Dwjzrq_s.text.split ('(') [1].split (')') [0])#print (' unit value Date: ' + dwjzrq_s.text.split (' (') [1].split (') ') [0]) returnR_info#Testif __name__=='__main__': URL= R'Http://fund.eastmoney.com/340007.html?spm=search'Headers= { 'user-agent':'mozilla/5.0 (Windows NT 10.0; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/55.0.2883.87 safari/537.36'} jj_infor=[] jj_infor=getjjinfor (headers, url) return_code=DODATAWLPC (jj_infor[0], jj_infor[1], jj_infor[3], jj_infor[6]) ifReturn_code = =0:Print('Successful Execution') Else: Print('Execution failed')
Subsequent increase of the timing function;
python3.x: Periodically fetch page data into the database