#-*-Coding:utf-8-*-import scrapy,jsonfrom urllib import parseclass googletrendsspider (scrapy. Spider): name = ' google_trends ' allowed_domains = [' google.com '] #获取token地址 general_url = ' Https://trends.goog Le.com/trends/api/explore? {} ' #获取关键字csv地址 Interest_over_time_url = ' https://trends.google.com/trends/api/widgetdata/multiline/csv? {} ' #开启useragent和代理中间件 custom_settings = {' Downloader_middlewares ': {' blockchain.middlewares.Ra Ndomuseragent ': 390, ' blockchain.middlewares.RandomProxy ': 544,}, # ' cookies_enabled ': False ' Download_delay ': 1} def start_requests (self): "The CSV download URL parameter from the page ' keyword ' : ' keyword ', ' time ': ' Now 7-d ', ' geo ': ' ' req = {' Comparisonitem ': [{' Key Word ': ' keyword ', ' time ': ' Now 7-d ', ' Geo ': '} ', ' Category ': 0} req = json.dumps (req). Encode (' Utf-8 ') Token_payload = { ' HL ': B ' en-us ', ' TZ ': B ' -480 ', ' req ': req, ' property ': B ',} BODY = PA Rse.urlencode (token_payload) url = self. General_url.format (body) reqs.append (scrapy. Request (url=url,callback=self.parse_token,meta={' item ': {' coin_id ': Row.id}})) return reqs def parse_token (self,re Sponse): ' Parse result, get token ' bodyobj = json.loads (Response.body.decode (' Utf-8 ') [4:]) For row in bodyobj[' widgets ']: if row[' id '] = = ' TIMESERIES ': token = row[' token '] Request = row[' request '] params = {' tz ': ' -480 ', ' req ': json.dum PS (Request), ' token ': token} BODY = Parse.urlencode (params) url = self. Interest_over_time_url.format (body) yield scrapy. Request (url=url,callback=self.parse_row,meta={' item ': response.meta[' item '}) def Parse_row(Self, Response): ' Parse csv ' ' BodyText = Response.body.decode (' utf-8 ') print (bodytext )
Copied from: Pytrends
Python scrapy Google Trends