Tag:python mysql movie
# coding:utf-8import Requestsfrom BS4 Import beautifulsoupfrom multiprocessing import poolimport urllib2import Reimport J Sonimport chardetimport pymysql# url = "http://dytt8.net/" # page = Requests.get (URL). content# page_html = BeautifulSoup (P Age, ' lxml ') # name = Page_html.select ("Td.inddline > A:nth-of-type (2)") # for N in name:# if ' Dyzz ' in N.encode (' GBK '): # Print N.encode (' GBK ') # file = Open ("Move.txt", "A +") # File.write (N.encode (' utf-8 ') + ' \ n ') # file.close () def getmoveinfo ( URL): page = Requests.get (URL). Content page_html = beautifulsoup (page, ' lxml ') # title = Page_html.select ("Div.title_all" # title = Title[4].select ("h1") # title = Title[0].select ("Font") # return title[0].contents; title = Page_html.find_all ("Font", attrs={"color": "#07519a"}) Title_content = Title[0].contents if (Re.findall (r) translation (. * ?) <br/> ", str (page_html)): Yiming = Re.findall (. *?) <br/> ", str (page_html)) [0] else:yiming =" if (Re.findall (. *?) " <br/> ", str (page_html)): Leibie = Re.findall (r "category (. *?) <br/> ", str (page_html)) [0] Else:leibie =" if (Re.findall (. *?) " <br/>, str (page_html)): Yuyan = Re.findall (r "Language (. *?)) <br/> ", str (page_html)) [0] Else:yuyan =" if (Re.findall (. *?) <br/> ", str (page_html)): Zimu = Re.findall (r" subtitle (. *?)) <br/> ", str (page_html)) [0] Else:zimu =" if (r "Release date (. *?) <br/>, str (page_html)): date = Re.findall (r) Release date (. *?) <br/> ", str (page_html)) [0] else:date =" if (Re.findall (. *?) <br/> ", str (page_html)): Douban = Re.findall (r" Watercress score (. *?) <br/> ", str (page_html)) [0] Else:douban =" if (Re.findall (. *?) <br/> ", str (page_html)): Pianchang = Re.findall (r" Chip length (. *?)) <br/> ", str (page_html)) [0] Else:pianchang =" if (Re.findall (. *?) <br/>, str (page_html)): Daoyan = Re.findall (r "director" (. *?)) <br/> ", str (page_html)) [0] Else:daoyan = ' if (Re.findall (. *?) <br/> ", str (page_html)): Zhuyan = Re.findall (R" starring (. *?) <br/> ", str (page_html)) [0] Else:zhuyan =" if (Re.findall ("R") "" ", str (page_html)): Jianjie = Re.findall (. *?) "" ", str (page_html)) [0] Else:jianjie = ' addres = Page_html.find_all (" TD ", attrs={" bgcolor ":" #fdfddf "}) if (addres): add res = addres[0].contents; Addres = Addres[0].get ("href"). Encode (' utf-8 ') else:addres = ' res = {} res[' title ' =title_content[0].encode ("Utf-8") r es[' yiming ' = yiming res[' leibie '] = Leibie res[' Yuyan '] = Yuyan res[' zimu '] = Zimu res[' date '] = Date res[' Douban '] = do Uban res[' pianchang '] = Pianchang res[' Daoyan '] = Daoyan res[' Zhuyan '] = Zhuyan res[' Jianjie '] = Jianjie.replace ("<br/& gt; "," ") res[' addres ') = addres return Resurl =" http://dytt8.net/"page = Requests.get (URL). contentpage_html = Beautifuls OUP (page, ' lxml ') name = Page_html.select ("Td.inddline > A:nth-of-type (2)") conn = pymysql.connect (host= ' localhost ', Port=3306,user= ' root ', password= ' root ', db= ' moves ', charset= ' UTF8 ') cursor = conn.cursor () for n in Name:if ' Dyzz ' in N.encode (' GBK '): info = getmoveinfo ("http://dytt8.net" +n.get ("href")) title = info[' title '] yiming = info[' yiming '] Leibie = info[' Le Ibie '] Yuyan = info[' Yuyan '] Zimu = info[' Zimu '] date = info[' Date ' Douban = info[' Douban '] Pianchang = info[' Pianchang '] Daoyan = info[' Daoyan '] Zhuyan = info[' Zhuyan '] Jianjie = info[' Jianjie '] addres = info[' addres '] # print Title.decode (' u Tf-8 '). Encode (' GBK ') cursor.execute ("INSERT into Move_info (Title,yiming,leibie,yuyan,zimu,date,douban,pianchang, daoyan,zhuyan,jianjie,addres) VALUES (' {0} ', ' {1} ', ' {2} ', ' {3} ', ' {4} ', ' {5} ', ' {6} ', ' {7} ', ' {8} ', ' {9} ', ' {10} ', ' {11} ') ;". Format (title,yiming,leibie,yuyan,zimu,date,douban,pianchang,daoyan,zhuyan,jianjie,addres)) Conn.commit () Cursor.close () conn.close () print ' OK '
Python crawls movie Paradise movie information into the database