Scrapy automatic landing of the revised API interface

Source: Internet
Author: User
Tags oauth

Recently used scrapy simulation landing, found that all the interface has changed, including the verification code has also undergone a great change, through the capture packet analysis, record the revision of the knowledge of the simulation landing, nonsense not to say, directly on the code, pro-Test effective

#-*-Coding:utf-8-*-from PIL import imagefrom scrapy.exceptions import closespiderimport scrapyimport jsonimport base64 Class Zhihuspider (Scrapy. Spider): name = ' Zhihu ' allowed_domains = [' www.zhihu.com '] start_urls = [' http://www.zhihu.com/'] handle_http Status_list = [401, 403] client_id = ' c3cef7c66a1843f8b3a9e6a1e3160e20 ' #固定不变 signature = ' B858d0c8b1f2e86c6cb0d9        3d4055963bcf1121ec ' #抓包获取 timestamp = ' 1519567594106 ' #抓包获取 headers = {"HOST": "Www.zhihu.com", "Referer": "Https://www.zhihu.com/signup?next=%2F", "user-agent": "mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) applewebkit/537.36 (khtml, like Gecko) chrome/63.0.3239.132 safari/537.36name "," Authorizat Ion ":" OAuth C3cef7c66a1843f8b3a9e6a1e3160e20 ",} def parse (self, Response): Pass Def start_requests (self) : ' Get landing page, Set_cookie:return: ' ' return [scrapy.  Request (url= ' Https://www.zhihu.com/signup?next=%2F ',                             Headers=self.headers, method= "GET", meta={' Cookiejar ': 1}, Callback=self.post_captchareq, Dont_f Ilter=true,)] def post_captchareq (self, Response): "Sends a request to get a CAPTCHA:p Aram Response:: return: ' Return [Scrapy.            Request (url= ' https://www.zhihu.com/api/v3/oauth/captcha?lang=en ', headers=self.headers,        meta={' Cookiejar ': response.meta[' Cookiejar '}, Dont_filter=true, Callback=self.deal_captchareq,        ] def deal_captchareq (self, Response): "" To determine if a CAPTCHA is required:p Aram response:: return: "' Json_res = Json.loads (response.text) Post_data = {" client_id ": self.client_id," G Rant_type ":" Password "," timestamp ": Self.timestamp," source ": "Com.zhihu.web", "signature": Self.signature, "username": ' +86 your mobile number ', ' Password ': ' Password ',         "Captcha": "," Lang ":" en "," Ref_source ":" homepage "," Utm_source ":" "} If Json_res.get ("Show_captcha", None): return [Scrapy. Request (url= ' https://www.zhihu.com/api/v3/oauth/captcha?lang=en ', headers=self.head                            ERs, method= ' PUT ', meta={' Cookiejar ': response.meta[' Cookiejar '], ' Post_data ':p ost_data}, callback=self.get_captchaimg)] Retu RN [Scrapy.                Formrequest (url= "https://www.zhihu.com/api/v3/oauth/sign_in", Formdata=post_data,                Method= "POST", Headers=self.headers, meta={' Cookiejar ': response.meta[' Cookiejar ']}, Callback=self.check_Login, Dont_filter=true,)] def get_captchaimg (self, Response): "' Get verification Code picture stream data, manually enter the verification code:p Aram Response:: Return: "' Post_data = response.meta[' Post_data '] tr y:json_img = Json.loads (response.text) bs64_img = json_img["Img_base64"] bs64_img = Bs64 _img.encode (' utf-8 ') Img_steam = Base64.b64decode (bs64_img) with open ("Zhihucaptcha.jpg", ' WB ') as F : F.write (img_steam) img = Image.open ("zhihucaptcha.jpg") img.show () input _captcha = input ("Please enter the verification code in the figure:"). Strip () post_data[' captcha '] = Input_captcha img.close () PO St_code = {"Input_text": Input_captcha,} return [Scrapy. Formrequest (url= "https://www.zhihu.com/api/v3/oauth/captcha?lang=en", Formdata=post _code, Headers=seLf.headers, method= ' POST ', meta={' Cookiejar ': response.meta[' Cookiejar '],                 ' Post_data ':p ost_data}, Callback=self.post_captcha, Dont_filter=true, )] except Exception as E:raise closespider (' Get captcha error: {error} '. Format (Error        =E) def post_captcha (self, Response): "Send user authentication information to login:p Aram response:: Return:"                Post_data = Response.meta.get (' post_data ') if Json.loads (Response.text). Get (' success '): return [ Scrapy.                    Formrequest (url= "https://www.zhihu.com/api/v3/oauth/sign_in", Formdata=post_data, Headers=self.headers, method= ' POST ', meta={' Cookiejar ': RESPONSE.M      eta[' Cookiejar '}, Callback=self.check_login, Dont_filter=true,)      ] Else:raise closespider (' Authenticode incorrect ') def check_login (self, Response): #验证是否登陆成功 PR Int (' ==============> ', response.text) print (response.status) if response.status = = 201:self.lo        Gger.info ("Landing success!") Else:raise closespider (' Login info wrong!   ‘)

Among them, other parameters such as client_id, OAuth, etc. are fixed, signature and timestamp with time stamp change, it is used to verify the token of legitimate users, the essence is a client's JS run generated, here for convenience, Get the signature of a fixed timestamp directly by grabbing a packet

First enter the wrong account information on the PC side, grab the packet to get timestamp and signature, replace the corresponding can

Scrapy automatic landing of the revised API interface

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.