baiduclient.py
Copy CodeThe code is as follows:
Import Urllib.parse
Import gzip
Import JSON
Import re
From http.client import httpconnection
From htmlutils import Tiebaparser
Import Httputils as Utils
# Request Header
headers = Dict ()
headers["Connection"] = "keep-alive"
headers["Cache-control"] = "max-age=0"
headers["Accept"] = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"
headers["user-agent"] = "mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) chrome/32.0.1700.107 safari/537.36 "
headers["Content-type"] = "application/x-www-form-urlencoded"
headers["accept-encoding"] = "GZIP,DEFLATE,SDCH"
headers["accept-language"] = "zh-cn,zh;q=0.8"
headers["Cookie"] = ""
# cookies
cookies = List ()
# Personal Information
UserInfo = {}
Def login (account, password):
' login '
Global cookie
headers["Host"] = "wappass.baidu.com"
BODY = "us Ername={0}&password={1}&submit=%e7%99%bb%e5%bd%95&quick_user=0&isphone=0&sp_login=waprate &uname_login=&loginmerge=1&vcodestr=&u=http%253a%252f%252fwap.baidu.com%253fuid% 253d1392873796936_247&skin=default_v2&tpl=&ssid=&from=&uid=1392873796936_247&pu=& Tn=&bdcm=3f7d51b436d12f2e83389b504fc2d56285356820&type=&bd_page_type= "
BODY = Body.format (account , password)
conn = httpconnection ("wappass.baidu.com", "a")
Conn.request ("POST", "/passport/login", body, Headers)
Resp = conn.getresponse ()
Cookies + utils.getcookiesfromheaders (resp.getheaders ())
Utils.savecookies (headers, cookies)
# Login Success will return 302
return True if Resp.code = = 302 Else False
Def gettiebalist ():
"Get a list of the bars you have followed"
conn = Httpconnection ("tieba.baidu.com", 80)
Conn.request ("GET", "/mo/m?tn=bdfbw&tab=favorite", "" ", headers)
RESP = Conn.getresponse ()
Tiebaparser = Tiebaparser ()
Tiebaparser.feed (Resp.read (). Decode ())
Tblist = Tiebaparser.gettiebalist ()
Return tblist
def getsigninfo (tiebaname):
"Get post-registration information"
Querystr = Urllib.parse.urlencode ({"KW": tiebaname, "ie": "Utf-8", "T": 0.571444})
conn = Httpconnection ("tieba.baidu.com", 80)
Conn.request ("GET", "/sign/loadmonth?" + Querystr, "", headers)
data = Gzip.decompress (Conn.getresponse (). read ()). Decode ("GBK")
Signinfo = json.loads (data)
Return Signinfo
Tbspattern = Re.compile (' "TBS" value= ".} 20,35} "')
def signIn (tiebaname):
' Sign in '
# Get the parameters in the page TBS
Conn1 = Httpconnection ("tieba.baidu.com", 80)
QUERYSTR1 = Urllib.parse.urlencode ({"KW": Tiebaname})
Conn1.request ("GET", "/mo/m?" + QueryStr1, "", headers)
html = Conn1.getresponse (). Read (). Decode ()
TBS = Tbspattern.search (HTML). Group (0) [13:-1]
# Sign In
CONN2 = Httpconnection ("tieba.baidu.com", 80)
BODY = Urllib.parse.urlencode ({"KW": Tiebaname, "TBS": TBS, "ie": "Utf-8"})
Conn2.request ("POST", "/sign/add", body, headers)
RESP2 = Conn2.getresponse ()
data = Json.loads ((gzip.decompress (Resp2.read ())). Decode ())
Return data
def getuserinfo ():
"' access to personal information '
Headers.pop ("Host")
conn = Httpconnection ("tieba.baidu.com", 80)
Conn.request ("GET", "/f/user/json_userinfo", "" ", headers)
RESP = Conn.getresponse ()
data = Gzip.decompress (Resp.read ()). Decode ("GBK")
Global UserInfo
UserInfo = json.loads (data)
if __name__ = = "__main__":
account = input ("Please enter username:")
Password = input ("Please enter password:")
OK = login (account, password)
If OK:
GetUserInfo ()
Print (userinfo["Data" ["User_name_weak"] + "~ ~ ~ Login Successful", end= "\ n------\ n")
For TB in Gettiebalist ():
Print (TB + "bar:")
Signinfo = SignIn (TB)
If signinfo["No"]! = 0:
Print ("Sign in failed!")
Print (signinfo["error"])
Else
Print ("Sign in successfully!")
Print ("Sign in days:" + str (signinfo["Data" ["Uinfo"] ["Cout_total_sing_num"]))
Print ("Consecutive sign-in days:" + str (signinfo["Data" ["Uinfo"] ["Cont_sign_num"]))
Print ("------")
Else
Print ("Login Failed")
htmlutils.py
Copy the Code code as follows:
'''
Created on 2014-2-20
@author: Vincent
'''
From Html.parser import Htmlparser
Class Tiebaparser (Htmlparser):
def __init__ (self):
Htmlparser.__init__ (self)
Self.tiebalist = List ()
Self.flag = False
def gettiebalist (self):
Return self.tiebalist
def handle_starttag (self, Tag, attrs):
if tag = = "a":
For name, value in Attrs:
If name = = "href" and "m?kw=" in Value:
Self.flag = True
def handle_data (self, data):
If Self.flag:
Self.tieBaList.append (data)
Self.flag = False
httputils.py
Copy the Code code as follows:
'''
Created on 2014-2-20
@author: Vincent
'''
def getcookiesfromheaders (headers):
"Get all cookies from the HTTP response"
cookies = List ()
For header in headers:
If "Set-cookie" in header:
Cookie = Header[1].split (";") [0]
Cookies.append (Cookie)
return cookies
def savecookies (headers, cookies):
"Save Cookies"
For cookie in cookies:
headers["Cookie"] + = cookie + ";"
def getcookievalue (Cookies, cookiename):
"" To obtain the value of the specified cookie from the cookies.
For cookie in cookies:
If cookiename in Cookie:
index = cookie.index ("=") + 1
Value = Cookie[index:]
return value
def parsequerystring (queryString):
"Parse query string"
result = Dict ()
STRs = Querystring.split ("&")
For S in STRs:
Name = S.split ("=") [0]
Value = S.split ("=") [1]
Result[name] = value
return result