Sina Weibo simulation landing based on Python

Last Update:2015-02-06 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

The main file is as follows:

#coding =utf-8import requestsimport urllibimport urllib2import cookielib  import  WeiboEncodeimport WeiboSearch import timeimport reimport randomimport  Httplibclass weibologin:def __init__ (Self, user, pwd, enableproxy = false): #构造方法, the parameter is itself, the user, the password, whether to use a proxy server "Initialize Weibologin,enableproxy indicates whether to use a proxy server, the default shutdown"   print  "Initializing  weibologin ... "self.username = userself.password = pwdself.enableproxy =  enableproxyself.serverurl =  "http://login.sina.com.cn/sso/prelogin.php?entry=weibo&callback= Sinassocontroller.prelogincallback&su=&rsakt=mod&client=ssologin.js (v1.4.11) &_=1379834957683 " self.loginurl =  "Http://login.sina.com.cn/sso/login.php?client=ssologin.js (v1.4.11)" Self.postheader  = {' user-agent ':  ' mozilla/5.0  (windows nt 6.1; rv:24.0)  Gecko/ 20100101 firefox/24.0 '} #用户代 user agent, refers to the browser, its information includes hardware platform, system software, application software and user's personal preferences. Def login (self): #登陆程序 "Login Program"   self. Enablecookie (Self.enableproxy) #cookie或代理服务器配置serverTime,  nonce, pubkey, rsakv = self. GetServerTime () #登陆的第一步postData  = weiboencode.postencode (self.username, self.password,  SERVERTIME,&NBSP;NONCE,&NBSP;PUBKEY,&NBSP;RSAKV) #加密用户和密码print   "post data length:\n",  len ( PostData)   req = urllib2. Request (self.loginurl, postdata, self.postheader) #构造网络请求print   "Posting request ..." result  = urllib2.urlopen (req) #发出网络请求text  = result.read () try:loginurl =  Weibosearch.sredirectdata (text) #解析重定位结果 (automatically jump to the page after landing) Urllib2.urlopen (loginurl) except:print  ' login  error! ' return falseprint  ' login sucess! ' Return truedef enablecookie (Self, enableproxy): # "enable cookie & proxy  (if needed). " Cookiejar = cookielib. Lwpcookiejar () #建立cookiecookie_support  = urllib2. Httpcookieprocessor (Cookiejar)    #HTTPCookieProcessor  instances have one  Attribute: #HTTPCookieProcessor .cookiejar  (the cookielib. Cookiejar in which cookies are stored.) If enableproxy:proxy_support = urllib2. Proxyhandler ({' http ': ' 59.59.100.123:8118 '}) #使用代理opener  = urllib2.build_opener (proxy_support,  Cookie_support, urllib2. HttpHandler) #Return  an openerdirector instance#the openerdirector class opens  URLs via BaseHandlers chained together.print  "Proxy enabled" Else:opener  = urllib2.build_opener (COOKIE_SUPPORT,&NBSP;URLLIB2. HttpHandler)  urllib2.install_opener (opener) #构建cookie对应的openerdef  getservertime (self): # "get  Server time and nonce, which are used to encode the password "#在摘要认证中服务器让客户选一个随机数 (called" NOnce "), then the browser uses a one-way cryptographic function to generate a message digest (message  #digest), which is about the user name, password, given nonce value, HTTP method, and the requested URL. print  "Getting server time and nonce ..." Serverdata = urllib2.urlopen ( Self.serverurl). Read () #得到网页内容print  serverdatatry:servertime, nonce, pubkey, rsakv =  weibosearch.sserverdata (serverdata) #解析得到serverTime, nonce etc Return servertime, nonce, pubkey , rsakvexcept:print  ' get server time & nonce error! ' Return nonedef fetch_weibo (id, filename): #不借助API取回微博列表, but only the first few, parameters are user ID, file name target =  Open (filename,  ' a ') myurl= ' http://weibo.com/u/' +idline = urllib2.urlopen (myurl). Read () Target.write (line) if re.search (R ' \ "Wb_detail ',  line):p rint " Success "P = re.compile (R ' \ "Wb_detail\" ') Linelist = p.split (line) for fraction in linelist:matchobj =  Re.search (R ' nick-name=\ ". +?\" >\\n + (. +?) < ',  fraction) if matchobj:target.write (Matchobj.group (1)) Target.write ("\ n") Def fetchqueryresult (): # This method can retrieve the search results of myurl= "Http://s.weibo.com/user/&auth=ord&age=22y&gender=women&region=custom : 33:1&page= "#找人页面的urltarget  = open (" filename ",  ' a ') #输出文件名称for  i in range ( 37,51): #起止页码line  = urllib2.urlopen (Myurl). Read () While re.search (R ' ids\= (\d+?) \ \ ',  line): Matchobj = re.search (R ' ids\= (\d+?) \ \ ',  line) print matchobj.group (1) target.write (Matchobj.group (1)) Target.write ("\ n") p =  Re.compile (R ' +matchobj.group (1)) Linelist = p.split (line) Line = linelist[len (linelist)-1] Print itime.sleep (2+random.random ());d Ef getjson (): #本方法可调用微博API, retrieving the list of users who have logged in to their blog       headers = {' user-agent ':  ' mozilla/5.0  (windows nt 6.1; rv:24.0)   gecko/20100101 firefox/24.0 '} #定义一些文件头url  =  "Https://api.weibo.com/2/statuses/user_timEline.json "  #  here is urlyour_param = {' source ':  ' 1675437817 '}  #  Here is the request parameter! Result = requests.get (Url, params=your_param)   #  send request if URL is http://s.weibo.com/ weibo/s  so   The effect of this sentence is &NBSP;HTTP://S.WEIBO.COM/WEIBO/S? refer=sina_indexresult_final = result.text   #这样就获取到了你发送的这个URL  +  Parameters   After the result print result.textif __name__ ==  ' __main__ ': #if  the python interpreter  is running that module  (The source file)  as the main  program,  #it  sets the special __name__ variable to have a  value # "__main__". #If  this file is being imported from another  module, #__name__  will be set to the module ' s name.weibologin =  Weibologin (' [email protected] ',  ' XXXXXXXX ') #邮箱 (account), password If&nbsP;weibologin.login ()  == True:print  "Landing success!" "Myurl=" http://api.weibo.com/2/statuses/timeline_batch.json?source=1675437817&uids=5029941840 "htmlcontent  = urllib2.urlopen (Myurl). Read () print htmlcontent

The other two classes to use are weibosearch.py and weiboencode.py.

The weiboencode.py code is as follows:

#coding =utf-8import urllibimport base64import rsaimport binascii  def  Postencode (USERNAME,&NBSP;PASSWORD,&NBSP;SERVERTIME,&NBSP;NONCE,&NBSP;PUBKEY,&NBSP;RSAKV):      "Used to generate post data"     encodedUserName =  GetUserName (userName) #用户名使用base64加密     encodedpassword = get_pwd (password,  Servertime, nonce, pubkey) #目前密码采用rsa加密     postPara = {          ' entry ':  ' Weibo ',         ' Gateway ':  ' 1 ',         ' from ':  ',          ' savestate ':  ' 7 ',         ' Userticket ':   ' 1 ',         ' ssosimplelogin ':  ' 1 ',          ' VSNF ': &nbsP; ' 1 ',         ' vsnval ':  ',          ' su ': encodedusername,         ' service ':  ' Miniblog ',         ' servertime ': servertime,          ' nonce ': nonce,         ' pwencode ':   ' rsa2 ',         ' SP ': encodedpassword,          ' encoding ':  ' UTF-8 ',         ' Prelt ' :  ',         ' rsakv ': rsakv,               ' url ':  ' http://weibo.com/ajaxlogin.php?framelogin=1 &callback=parent.sinassocontroller.feedbackurlcallback ',         ' ReturnType ':  ' META ' &NBSP;&NBSp;  }    postdata = urllib.urlencode (PostPara) #网络编码      return postdata def getusername (userName):     "Used to  Encode user name "    usernametemp = urllib.quote (userName)      usernameencoded = base64.encodestring (usernametemp) [: -1]    return  usernameencoded   def get_pwd (Password, servertime, nonce, pubkey):     rsapublickey = int (pubkey, 16)     key =  Rsa. PublicKey (rsapublickey, 65537)   #创建公钥     message = str (servertime)  +  ' \ t '  + str (nonce)  +  ' \ n '  + str (password)   #拼接明文js加密文件中得到      passwd = rsa.encrypt (message, key)   #加密     passwd  = binascii.b2a_hex (passwd)   #将加密信息转换为16进制.     return passwd

weibosearch.py

#coding =utf-8import reimport json  def sserverdata (serverdata): #解析得到serverTime, nonce, etc.      "Search the server time & nonce from server  data "    p = re.compile (' \ (. *) \) ')   #re .compile  The regular expression can be compiled into a regular expression object     jsondata = p.search (Serverdata). Group (1)   #查找      data = json.loads (jsondata)   #对encodedjson进行decode, get raw data, need to use Json.loads () function     servertime = str (data[' servertime ')     nonce =  data[' nonce ']    pubkey = data[' PubKey ']#    rsakv  = data[' rsakv ']#    print  "Server time is:",  serverTime     print  "Nonce is:",  nonce    return servertime,  nonce, pubkey, rsakv def sredirectdata (text):     p = re.compile (' location\.replace\ ([\ ' "] (. *?) [\ ' "]\) ')     loginurl = p.search (text). Group (1)     print   ' loginurl: ', Loginurl    return loginurl

Currently, the crawler can log in automatically and invoke the normal API of Sina Weibo. However, the bulk retrieval of other people's Weibo requires a high level of authorization and is currently being applied.

Sina Weibo simulation landing based on Python

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Sina Weibo simulation landing based on Python

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Sina Weibo simulation landing based on Python

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support