#Conding=utf-8 fromParseImportParse_urlImportJSONclassDoubanspider:def __init__(self): Self.temp_url="Https://m.douban.com/rexxar/api/v2/subject_collection/filter_movie_occident_hot/items?os=android&for_ Mobile=1&callback=jsonp3&start={}&count=18&loc_id=108288&_=0" defGet_content_list (SELF,HTML_STR):#Extracting DataDict_data=json.loads (html_str) content_list= dict_data["Subject_collection_items"] Total=dict_data[" Total"] returnContent_list,totaldefsave_content_list (self,content_list): With open ("Db.json","a", encoding="Utf-8") as F: forContentinchContent_list:f.write (json.dumps (Content,ensure_ascii=False)) F.write ("\ n") Print('Add Success') defRun (self):#implementing the primary logicNum=0 Total= 100 whileNum < Total + 18: #1.start_urlStart_url =Self.temp_url.format (num)#2. Send a request to get a responseHtml_str =Parse_url (Start_url)#3. Extracting DataContent_list, total =self.get_content_list (HTML_STR)#4. Saveself.save_content_list (content_list)#5. Construct the URL address of the next page, cycle 2-5 stepsNum +=18if __name__=='__main__': Douban=Doubanspider () douban.run ( )
Python crawl watercress movie case