Crawl Douban movie TOP250 list-------code not finished, waiting to be updated
ImportRequests fromRequests.exceptionsImportrequestexceptionImportReImportJSONdefget_one_page (URL):Try: Response=requests.get (URL)ifResponse.status_code = = 200:#determine if the Web page is open properly returnResponse.text#returns no returnNoneexceptrequestexception:returnNonedefparse_one_page (HTML): pattern= Re.compile ("", Re. S#The laptop screen is too underestimated.Items =Re.findall (pattern, HTML) forIteminchItems#Format crawled Information yield{ "": item[0],"": item[1], }defwrite_to_file (content): with open ("Result.txt","a", encoding="Utf-8") as F:f.write (json.dumps (content, Ensure_ascii=false) +"\ n") F.close ()defmain (): URL="https://movie.douban.com/top250"HTML=get_one_page (URL)#parse_one_page (HTML) forIteminchparse_one_page (HTML):Print(item) write_to_file (item)if __name__=="__main__": Main ()#This code does not have a matching method with Re (notebook screen small)#The second piece of code can only match one page of something, and play its own imagination to put
Python Crawler's first Contact