#!/usr/local/bin/python#-*-coding:utf-8-*-#python:3.5#Author:wucl (), Zhenghai.zhang#Program : Crawls the names of all the movies on the CBO website and writes them to the database. #version:0.1#history:2017.10.25ImportRequests,time, Pymysql, RE, datetime fromExchangelibImportDELEGATE, account, Credentials, Message, Mailbox, Htmlbodyhost='XXX'User=' xxx'passwd=' xxx'DBME='Crawl'Dbtarget='Back_brace'Table='Movie_hotwords'Tabledelta='Movie_hotwords_delta'Tablesync='Slot_value'Port= 3306ToList= [' xxx@xxx. com']defget_info ():Try: URL='http://www.cbooo.cn/mdata/getmdata_movie?area=50&type=0&year=0&initial=%e5%85%a8%e9%83%a8& Pindex=1'PData=requests.get (URL). JSON ()returnpdata['Tpage'], pdata['TCount'] except: Print("failed to get total number of pages and total movie Count")defget_movies (page):Try: URL='http://www.cbooo.cn/Mdata/getMdata_movie?area=50&type=0&year=0&initial=%E5%85%A8%E9%83%A8 &pindex='+Str (page) PData=requests.get (URL). JSON () Movies_list= pdata['PData'] returnmovies_listexcept: Print('failed to get movie list for page%s'%page)defMovie_insert (host, user, passwd, DBME, port, table, movies_list): Conn=pymysql.connect (Host=host, User=user, passwd=passwd, DB=DBME, Port=port, charset="UTF8") cur=conn.cursor () new_movies=[] Punc="!?? "#$%&" () *+,-/: <=> @[\]^_ ' {|}~?????、 "" "" "()〖〗????? 〝〞???? –—‘’?“”?? ...? ﹏. ()::. ·"Punctuation=Punc forMovieinchmovies_list:Try: movie['Moviename'] = Re.sub (r"[%s]+"% punctuation,"", movie["Moviename"]) cmd='insert into%s (movie_id, Movie_name) VALUES ("%s", "%s")'% (table, movie['ID'], movie['Moviename']) cur.execute (cmd) new_movies.append (movie)exceptPymysql. Error:Print(" "*20, movie['Moviename'],"already exists, skip ...") Cur.close () Conn.commit () Conn.close ( )returnnew_moviesdefMovie_new_and_sync (host, user, passwd, DBME, Dbtarget, Port, Tabledelta, Movies_list, Tablesync): Conn= Pymysql.connect (Host=host, User=user, passwd=passwd, DB=DBME, Port=port, charset="UTF8") cur=conn.cursor () Cur.execute ("Delete from%s"% dbme+"."+Tabledelta) forMovieinchmovies_list:Try: cmd='insert into%s (movie_id, Movie_name) VALUES ("%s", "%s")'% (Tabledelta, movie['ID'], movie['Moviename']) Cmdsync='insert into%s (slot_type_id, Slot_value, create_by, modify_by, Gmt_create, gmt_modify, Out_value) VALUES ("%s", "%s" , "%s", " %s", "%s", "%s", "%s")'% (dbtarget+"."+tablesync,"xxxxxx", movie['Moviename'],"system","system", Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s"), Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s"),"") cur.execute (cmd) cur.execute (cmdsync)exceptPymysql. Error:Print(" "*, movie['Moviename'],"already exists, skip ...") Try: Cmdbacktoskill='INSERT INTO Back_brace.release_task (app_type,app_status,type,ref_id,status,register_id,create_by,modify_by, Gmt_create,gmt_modify) VALUES ("Backbrace", "Testpass", "SLOT", "xxxxxx", "Init", "Slot_backbrace_testpass", " Zhenghai.zhang "," Zhenghai.zhang ","%s ","%s ")'% (Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s"), Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s")) Cmdskilltoskillpro='INSERT INTO Back_brace.release_task (app_type,app_status,type,ref_id,status,register_id,create_by,modify_by, Gmt_create,gmt_modify) VALUES ("SKILL", "Deploy", "SLOT", "xxxxxx", "Init", "Slot_skill_deploy", "Zhenghai.zhang", " Zhenghai.zhang ","%s ","%s ")'% (Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s"), Datetime.datetime.now (). Strftime ("%y-%m-%d%h:%m:%s")) Print(Cmdbacktoskill) Cur.execute (Cmdbacktoskill)Print(Cmdskilltoskillpro) Cur.execute (Cmdskilltoskillpro)exceptPymysql. Error:Print("write into Back_brace.release_task error!!!") Cur.close () Conn.commit () Conn.close ( )defEmail (to, subject, body): Creds=Credentials (username='xxxxxx', Password=' xxxxxx') Account=Account (primary_smtp_address=' xxx@xxx. com', Credentials=creds, Autodiscover=True, Access_type=DELEGATE) M=Message ( account=Account , subject=subject, Body=HTMLBody (body), to_recipients=[mailbox (email_address=to )]) M.send_and_save ()if __name__=='__main__': Update_movies=[] pages, counts=get_info () pages= 1 forIinchRange (1,pages + 1): Print("*"*30,i,"*"*30) Movies_list=get_movies (i) new_movies=Movie_insert (host, user, passwd, DBME, port, table, Movies_list) forNew_movieinchnew_movies:Print(new_movie['Moviename'],"Added") Onemovie={} onemovie["ID"] = new_movie["ID"] onemovie["Moviename"] = new_movie["Moviename"] Update_movies.append (Onemovie) time.sleep (1) Print(update_movies)Try: Movie_new_and_sync (host, user, passwd, DBME, Dbtarget, Port, Tabledelta, Update_movies, Tablesync)#writes the added movie to the Movie_hotwords_delta table except: Print("Movie Update and sync error!") Subject='This new movie name'Body="This new movie name is:" forMovieinchUpdate_movies:body+ = movie["Moviename"] +"<br>" forToinchTolist:email (to, subject, body)
Welcome to the HERO's guidance
Python3.5 Crawl cbooo.cn data and sync to MySQL