"" "This is a module, multiline comment" "" Import refrom urllib import request# beautifulsoup: Parsing data structure recommended library Scrapy: Crawler frame # Crawler, anti-crawler, anti-reverse crawler #ip Seal # Agent I P Library class Spider (): Url= ' https://www.panda.tv/cate/lol ' root_pattern= ' <div class= "Video-info" > ([\s\s]*?) </div> ' name_pattern= ' </i> ([\s\s]*?) </span> ' number_pattern= ' <span class= "Video-number" > ([\s\s]*?) </span> ' Def __fetch_content (self): R=request.urlopen (Spider.url) Htmls=r.read () htmls=str (H tmls,encoding= ' Utf-8 ') return htmls a=1 def __analysis (self,htmls): Root_html=re.findall (spider.ro OT_PATTERN,HTMLS) anchors=[] for HTML in Root_html:name=re.findall (spider.name_pattern,html) Number=re.findall (spider.number_pattern,html) anchor={' name ': Name, ' Number ': number} ANCHORS.A Ppend (anchor) return anchors Def __refine (self,achors): L=lambda anchor:{' name ': anchor[' name '][0].strip () , ' number ': anchor[' number '[0]} return map (l,achors) def __sort (self,anchors): anchors=sorted (anchors,key=self.__sord_seed,r Everse=true) return anchors def __show (self,anchors): for rank in range (0,len (anchors)): Print (' Rank ' +str (rank+1) + ': ' +anchors[rank][' name '] + ' +anchors[rank][' number ') def __sord_seed (Self,anchor): R=re.findall (' \d* ', anchor[' number ']) number= float (r[0]) if ' million ' in anchor[' Number ']: number*=10000 return number def go (self): Htmls=self.__fetch_content () anchor S=self.__analysis (HTMLS) anchors=list (Self.__refine (anchors)) Anchors=self.__sort (anchors) Self.__sho W (anchors) Splider=spider () Splider.go ()
Python native crawler (crawl the panda host leaderboard)