#coding =utf-8
From selenium import Webdriver
Def Crawmovie ():
Driver=webdriver. PHANTOMJS ()
Driver.get ("https://movie.douban.com/")
Movie_list=[]
More_btn=driver.find_element_by_xpath (' (//a[@class = "More-link"]) [1] ')
More_btn.click ()
While True:
Start_index=len (Movie_list)
Xpath_str= '//a[@class = "item"][position () >%d] '%start_index
Item_tags=driver.find_elements_by_xpath (XPATH_STR)
Print "Start_index:", Start_index
Print Item_tags
Print "Number:", Len (item_tags)
For Item_tag in Item_tags:
Img_tag=item_tag.find_element_by_tag_name (' img ')
Cover=img_tag.get_attribute ("src")
Title=img_tag.get_attribute ("alt")
Rating=item_tag.find_element_by_xpath (".//p/strong"). Text
movie={' cover ': Cover,
' title ': Title,
' Rating ': Rating
}
Movie_list.append (Movie)
Print "--" *20
Load_more_btn=driver.find_element_by_xpath ('//a[@class = "more"]
If Load_more_btn.get_attribute ("style"):
Break
Load_more_btn.click ()
With open ("E:\\movie_list.txt", "W") as FP:
For D in Movie_list:
Temp= ""
For K in D:
temp+=k+ ":" +d[k]+ ","
Fp.write ("{" +temp.strip (",") + "}" + "\ n")
If __name__== "__main__":
Crawmovie ()
SELENIUM+PHANTOMJS Small case-crawl Douban all movie code python