python 抓取電影天堂電影資訊放入資料庫

來源:互聯網
上載者:User

標籤:python   mysql   電影   

# coding:utf-8import requestsfrom bs4 import BeautifulSoupfrom multiprocessing import Poolimport urllib2import reimport jsonimport chardetimport pymysql# url = "http://dytt8.net/"# page = requests.get(url).content# page_html = BeautifulSoup(page,‘lxml‘)# name = page_html.select("td.inddline > a:nth-of-type(2)")# for n in name:# if ‘dyzz‘ in n.encode(‘gbk‘):# print n.encode(‘gbk‘)# file = open("move.txt","a+")# file.write(n.encode(‘utf-8‘)+‘\n‘)# file.close()def getmoveinfo( url ): page = requests.get(url).content page_html = BeautifulSoup(page,‘lxml‘) # title = page_html.select("div.title_all") # title = title[4].select("h1") # title = title[0].select("font") # return title[0].contents; title = page_html.find_all("font", attrs={"color": "#07519a"}) title_content = title[0].contents if(re.findall(r"譯  名(.*?)<br/>", str(page_html))): yiming = re.findall(r"譯  名(.*?)<br/>", str(page_html))[0] else: yiming = ‘‘ if(re.findall(r"類  別(.*?)<br/>", str(page_html))): leibie = re.findall(r"類  別(.*?)<br/>", str(page_html))[0] else: leibie = ‘‘ if(re.findall(r"語  言(.*?)<br/>", str(page_html))): yuyan = re.findall(r"語  言(.*?)<br/>", str(page_html))[0] else: yuyan = ‘‘ if(re.findall(r"字  幕(.*?)<br/>", str(page_html))): zimu = re.findall(r"字  幕(.*?)<br/>", str(page_html))[0] else: zimu = ‘‘ if(re.findall(r"上映日期(.*?)<br/>", str(page_html))): date = re.findall(r"上映日期(.*?)<br/>", str(page_html))[0] else: date = ‘‘ if(re.findall(r"豆瓣評分(.*?)<br/>", str(page_html))): douban = re.findall(r"豆瓣評分(.*?)<br/>", str(page_html))[0] else: douban = ‘‘ if(re.findall(r"片  長(.*?)<br/>", str(page_html))): pianchang = re.findall(r"片  長(.*?)<br/>", str(page_html))[0] else: pianchang = ‘‘ if(re.findall(r"導  演(.*?)<br/>", str(page_html))): daoyan = re.findall(r"導  演(.*?)<br/>", str(page_html))[0] else: daoyan = ‘‘ if(re.findall(r"主  演(.*?)<br/>", str(page_html))): zhuyan = re.findall(r"主  演(.*?)<br/>", str(page_html))[0] else: zhuyan = ‘‘ if(re.findall(r"簡  介(.*?)【】", str(page_html))): jianjie = re.findall(r"簡  介(.*?)【】", str(page_html))[0] else: jianjie = ‘‘ addres = page_html.find_all("td", attrs={"bgcolor": "#fdfddf"}) if(addres): addres = addres[0].contents; addres = addres[0].get("href").encode(‘utf-8‘) else: addres = ‘‘ res = {} res[‘title‘] =title_content[0].encode("utf-8") res[‘yiming‘] = yiming res[‘leibie‘] = leibie res[‘yuyan‘] = yuyan res[‘zimu‘] = zimu res[‘date‘] = date res[‘douban‘] = douban res[‘pianchang‘] = pianchang res[‘daoyan‘] = daoyan res[‘zhuyan‘] = zhuyan res[‘jianjie‘] = jianjie.replace("<br/>", "") res[‘addres‘] = addres return resurl = "http://dytt8.net/"page = requests.get(url).contentpage_html = BeautifulSoup(page,‘lxml‘)name = page_html.select("td.inddline > a:nth-of-type(2)")conn = pymysql.connect(host=‘localhost‘,port=3306,user=‘root‘,password=‘root‘,db=‘moves‘,charset=‘utf8‘)cursor = conn.cursor()for n in name: if ‘dyzz‘ in n.encode(‘gbk‘): info = getmoveinfo("http://dytt8.net"+n.get("href")) title = info[‘title‘] yiming = info[‘yiming‘] leibie = info[‘leibie‘] yuyan = info[‘yuyan‘] zimu = info[‘zimu‘] date = info[‘date‘] douban = info[‘douban‘] pianchang = info[‘pianchang‘] daoyan = info[‘daoyan‘] zhuyan = info[‘zhuyan‘] jianjie = info[‘jianjie‘] addres = info[‘addres‘] # print title.decode(‘utf-8‘).encode(‘gbk‘) cursor.execute("INSERT INTO move_info(title,yiming,leibie,yuyan,zimu,date,douban,pianchang,daoyan,zhuyan,jianjie,addres)VALUES(‘{0}‘,‘{1}‘,‘{2}‘,‘{3}‘,‘{4}‘,‘{5}‘,‘{6}‘,‘{7}‘,‘{8}‘,‘{9}‘,‘{10}‘,‘{11}‘);".format(title,yiming,leibie,yuyan,zimu,date,douban,pianchang,daoyan,zhuyan,jianjie,addres)) conn.commit()cursor.close()conn.close()print ‘ok‘

python 抓取電影天堂電影資訊放入資料庫

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.