Import requests
From lxml import etree
Import re
Import Pymysql
Import time
conn = pymysql.connect (host= ' localhost ', user= ' root ', passwd= ' 123456 ', db= ' mydb ', port=3306,charset= ' Urf8 ')
cursor = Conn.cursor ()
headers = {' user-agent ': ' mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Win64; x64; trident/5.0) '}
def get_movie_url (URL):
html = requests.get (url,headers=headers)
selector = etree. HTML (Html.text)
Movie_hrefs = Selector.xpath ('//div[@class = "HD"]/a/@href ')
For Movie_href in Movie_hrefs:
Get_movie_info (MOVIE_HREF)
def get_movie_info (URL):
html = requests.get (url,headers=headers)
selector = etree. HTML (Html.text)
Try
Name = Selector.xpath ('//*[@id = "Content"]/h1/span[1]/text () ') [0]
Director = Selector.xpath ('//*[@id = ' info ']/span[1]/span[2]/a/text () ') [0]
Actors = Selector.xpath ('//*[@id = "info"]/span[3]/span[2] ') [0]
Actor = Actors.xpath (' string (.) ')
style = Re.findall (' <span property= ' v:genre ' > (. *?) </span> ', Html.text,re. S) [0]
Country = Re.findall (' <span class= ' pl ' > Producer country/Region:</span> (. *?) <br/> ', Html.text,re. S) [0]
Release_time = Re.findall (' Release time:</span>.*?> (. *?) </span> ', Html.text,re. S) [0]
Time = Re.findall (' Piece length:</span>.*?> (. *?) </span> ', Html.text,re. S) [0]
Score = Selector.xpath ('//*[@id = "Interest_sect"]/div[1]/div[2]/strong/text () ') [0]
Cursor.execute ("INSERT into Doubanmovie (Name,director,actor,style,country,release_time,time,score) VALUES (%s,%s,% s,%s,%s,%s,%s,%s) "(Str (name), STR (director), STR (actor), Str (STYLE,STR (country), str (release_time), str (time), str ( Score))))
Except Indexerror:
Pass
if __name__ = = ' __main__ ':
URLs = [' https://movie.douban.com/top250?strart={} '. Format (str (i)) for I in Range (0,250,25)]
For URL in URLs:
Get_movie_url (URL)
Time.sleep (2)
Conn.commit ()
Problem: Unable to connect to database cannot detect code run
Crawl movie top250 movie name director actor style Country time length score input MySQL Database