1 #-*-coding:utf-8-*-2 """3 Created on Wed Oct 16:48:334 5 @author: Fuzzier6 """7 8 ImportRequests9 fromBs4ImportBeautifulSoupTen ImportRe One ImportOS A ImportCodecs - -URL ='http://www.xxxxx.net' the - defdownload_page (URL): -headers = {'user_agent':'mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_1) applewebkit/537.36 (khtml, like Gecko) chrome/30.0.1581.2 safari/537.36'} -html = requests.get (url,headers=headers). Content + returnHTML - + defparser_html (data): ASoup = beautifulsoup (data,'Html.parser') atFilms = [] -TRS = Soup.find ('Div', class_='BD3RL'). Find ('Div', class_='co_content8'). Find_all ('TR') - forIinchTRS: -TR = I.find ('a', Href=re.compile (R'/\w+?/\w+?/\w+?/\d+?/\d+?. HTML') . String - iftr: - films.append (TR) in Else: -Films.append ('None') to returnFilms + - if __name__=='__main__': theHTML =download_page (URL) *Film_list =parser_html (HTML) $With Codecs.open (OS.GETCWD () +'\\dytt8_hot.txt','W', encoding='UTF8') as F:Panax Notoginseng forIinchfilm_list: -F.write (i+'\ r \ n')
Crawl a movie website latest movie