Baiduclient. py
Copy codeThe Code is as follows:
Import urllib. parse
Import gzip
Import json
Import re
From http. client import HTTPConnection
From htmlutils import TieBaParser
Import httputils as utils
# Request Header
Headers = dict ()
Headers ["Connection"] = "keep-alive"
Headers ["Cache-Control"] = "max-age = 0"
Headers ["Accept"] = "text/html, application/xhtml + xml, application/xml; q = 0.9, image/webp, */*; q = 0.8"
Headers ["User-Agent"] = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/32.0.1700.107 Safari/537.36"
Headers ["Content-Type"] = "application/x-www-form-urlencoded"
Headers ["Accept-Encoding"] = "gzip, deflate, sdch"
Headers ["Accept-Language"] = "zh-CN, zh; q = 0.8"
Headers ["Cookie"] = ""
# Cookie
Cookies = list ()
# Personal Information
UserInfo = {}
Def login (account, password ):
'''Login '''
Global cookies
Headers ["Host"] = "wappass.baidu.com"
Body = "username = {0} & password = {1} & submit = % E7 % 99% BB % E5 % BD % 95 & quick_user = 0 & isphone = 0 & sp_login = waprate & uname_login = & loginmerge = 1 & vcodestr = & u = http % 253A % 252F % found % 253 Fuid % 253d1392871_6936_247 & skin = default_v2 & tpl = & ssid = & from = & uid = 1392871096936_247 & pu = & tn = & bdcm = 3f7d51b436d12f2e83389b504fc2d56285356820 & type = & bd_page_type ="
Body = body. format (account, password)
Conn = HTTPConnection ("wappass.baidu.com", 80)
Conn. request ("POST", "/passport/login", body, headers)
Resp = conn. getresponse ()
Cookies + = utils. getCookiesFromHeaders (resp. getheaders ())
Utils. saveCookies (headers, cookies)
#302 will be returned if logon is successful
Return True if resp. code == 302 else False
Def getTieBaList ():
'''Retrieve the list of followed bars '''
Conn = HTTPConnection ("tieba.baidu.com", 80)
Conn. request ("GET", "/mo/m? Tn = bdFBW & tab = favorite "," ", headers)
Resp = conn. getresponse ()
TieBaParser = TieBaParser ()
TieBaParser. feed (resp. read (). decode ())
TbList = tieBaParser. getTieBaList ()
Return tbList
Def getSignInfo (tieBaName ):
'''Obtain the information of the post bar sign-In '''
QueryStr = urllib. parse. urlencode ({"kw": tieBaName, "ie": "UTF-8", "t": 0.571444 })
Conn = HTTPConnection ("tieba.baidu.com", 80)
Conn. request ("GET", "/sign/loadmonth? "+ QueryStr," ", headers)
Data = gzip. decompress (conn. getresponse (). read (). decode ("GBK ")
SignInfo = json. loads (data)
Return signInfo
TbsPattern = re. compile ('"tbs" value = ". {20, 35 }"')
Def signIn (tieBaName ):
'''Sign in '''
# Getting parameters tbs on the page
Conn1 = HTTPConnection ("tieba.baidu.com", 80)
QueryStr1 = urllib. parse. urlencode ({"kw": tieBaName })
Conn1.request ("GET", "/mo/m? "+ QueryStr1," ", headers)
Html = conn1.getresponse (). read (). decode ()
Tbs = tbsPattern. search (html). group (0) [13:-1]
# Sign-in
Conn2 = HTTPConnection ("tieba.baidu.com", 80)
Body = urllib. parse. urlencode ({"kw": tieBaName, "tbs": tbs, "ie": "UTF-8 "})
Conn2.request ("POST", "/sign/add", body, headers)
Resp2 = conn2.getresponse ()
Data = json. loads (gzip. decompress (resp2.read (). decode ())
Return data
Def getUserInfo ():
'''Retrieve personal information '''
Headers. pop ("Host ")
Conn = HTTPConnection ("tieba.baidu.com", 80)
Conn. request ("GET", "/f/user/json_userinfo", "", headers)
Resp = conn. getresponse ()
Data = gzip. decompress (resp. read (). decode ("GBK ")
Global userInfo
UserInfo = json. loads (data)
If _ name _ = "_ main __":
Account = input ("enter an account :")
Password = input ("enter the password :")
OK = login (account, password)
If OK:
GetUserInfo ()
Print (userInfo ["data"] ["user_name_weak"] + "~~~ Login successful ", end =" \ n ------ \ n ")
For tb in getTieBaList ():
Print (tb + :")
SignInfo = signIn (tb)
If signInfo ["no"]! = 0:
Print ("failed to sign in! ")
Print (signInfo ["error"])
Else:
Print ("check in successful! ")
Print ("check-in days:" + str (signInfo ["data"] ["uinfo"] ["cout_total_sing_num"])
Print ("consecutive sign-In days:" + str (signInfo ["data"] ["uinfo"] ["cont_sign_num"])
Print ("------")
Else:
Print ("Logon Failed ")
Htmlutils. py
Copy codeThe Code is as follows:
'''
Created on 2014-2-20
@ Author: Vincent
'''
From html. parser import HTMLParser
Class TieBaParser (HTMLParser ):
Def _ init _ (self ):
HTMLParser. _ init _ (self)
Self. tieBaList = list ()
Self. flag = False
Def getTieBaList (self ):
Return self. tieBaList
Def handle_starttag (self, tag, attrs ):
If tag = "":
For name, value in attrs:
If name = "href" and "m? Kw = "in value:
Self. flag = True
Def handle_data (self, data ):
If self. flag:
Self. tieBaList. append (data)
Self. flag = False
Httputils. py
Copy codeThe Code is as follows:
'''
Created on 2014-2-20
@ Author: Vincent
'''
Def getCookiesFromHeaders (headers ):
'''Retrieve all cookies from the http Response '''
Cookies = list ()
For header in headers:
If "Set-Cookie" in header:
Cookie = header [1]. split (";") [0]
Cookies. append (cookie)
Return cookies
Def saveCookies (headers, cookies ):
'''Save cookies '''
For cookie in cookies:
Headers ["Cookie"] + = cookie + ";"
Def getCookieValue (cookies, cookieName ):
'''Obtain the value of the specified cookie from cookies '''
For cookie in cookies:
If cookieName in cookie:
Index = cookie. index ("=") + 1
Value = cookie [index:]
Return value
Def parseQueryString (queryString ):
'''Resolution query string '''
Result = dict ()
Strs = queryString. split ("&")
For s in strs:
Name = s. split ("=") [0]
Value = s. split ("=") [1]
Result [name] = value
Return result