Directly on the code
Import urllib.requestimport http.cookiejarfrom lxml import etree# from spiderimg import getimghead = { ' Connection ': ' Keep-alive ', ' Accept ': ' text/html, Application/xhtml+xml, */* ', ' accept-language ': ' en-us,en;q=0.8, zh-hans-cn;q=0.5,zh-hans;q=0.3 ', ' user-agent ': ' mozilla/5.0 (Windows NT 6.3; WOW64; trident/7.0; rv:11.0) like Gecko '}def Makemyopener (head): CJ = Http.cookiejar.CookieJar () opener = Urllib.request.build_ Opener (Urllib.request.HTTPCookieProcessor (CJ)) header = [] for Key,value in Head.items (): Elem = (key, Value) opener.addheaders = Header return openeroper = Makemyopener (head) UOP = Oper.open (' https:// Accounts.douban.com/login ', timeout = +) data = Uop.read () HTML = Data.decode () spath = './doubanlogin.html ' F=open ( spath, "W", encoding= ' Utf-8 ') f.write (HTML) f.close () print (HTML)
Python Crawler-Crawl Login page