Twitter 的API有很多,但是總感覺不給力,總是扣扣索索不給你給全了。我總結了下,寫了幾個API
如下code:
'''Created on Jun 22, 2013@author: Yang'''import twitterimport jsonimport timeimport datetimefrom email.utils import parsedateimport math# Consumerkey = 'myY0zZaRQ1eDEELFfoen7g'# Consumersecret = 'RLXWJrU07HaX2KVOyrhIQ8aV81XHMeMKcbAxqDN4Jc'# token = '321341780-gQ1hpc6gLfqUuDc8TKH6HyvbAnJ1uWWiYEOoKwoY'# tokensecret = 'RndJtFxDJrhJVNNqjS3XMHgQi6ufY5U6OoRes9URwQ'# # t = twitter.Api(Consumerkey, Consumersecret, token, tokensecret)def timestamp(str): #this function is to convert the time into stamptiem type #this time is computed for us in the next steps datatime = parsedate(str) temp = list(datatime)[0:6] c = datetime.datetime(temp[0], temp[1], temp[2], temp[3], temp[4], temp[5]) temp = time.mktime(c.timetuple()) return tempdef SearchQuery(query, t): #in order to search tweets by query #return a tweet list tweets = t.GetSearch(term=query, count=200) #count is the numver of tweets t = [] c = 1 for s in tweets: temp = json.loads(str(s)) t.append(temp) tweetid = temp['id'] for i in range(0,5): tweets = t.GetSearch(temp=query, max_id=tweetid, count=200) tlist = [] for s in tweets: temp = json.loads(str(s)) tlist.append(temp) tweetid = temp['id'] t = t+tlist return tdef SearchLocation(geo, t): #in order to search by get information #return a tweet list tweets = t.GetSearch(geocode=geo, count=200) t = [] c = 1 for s in tweets: temp = json.loads(str(s)) t.append(temp) tweetid = temp['id'] for i in range(0,5): tweets = t.GetSearch(geocode=geo, max_id=tweetid, count=200) tlist = [] for s in tweets: temp = json.loads(str(s)) tlist.append(temp) tweetid = temp['id'] t = t+tlist return tdef GetUsertweets(id, tweetid, tweettime, delay=24*60*60, t): #in order to get the user tweets since and befor tweets #in time stamp one day is 24*60*60 #so we think that the delay is 24*3600 tweets = t.GetUserTimeline(id, max_id=tweetid, count=100) #here we use the max_id to get the tweets t = [] time = timestamp(tweettime) if len(tweets)!=0: for s in tweets: temp = json.loads(str(s)) #t.append(temp) creattime = temp['created_at'] creattime = timestamp(str(creattime)) if abs(creattime-time)>delay: continue else: t.append(temp) tweets = t.GetUserTimeline(id, since_id=tweetid, count=100) #then we use the since_id to get the tweets if len(tweets)!=0: for s in tweets: temp = json.loads(str(s)) #t.append(temp) creattime = temp['created_at'] creattime = timestamp(str(creattime)) if abs(creattime-time)>delay: continue else: t.append(temp) return t
這個裡面有兩個主要程式search和getusertweets兩個函數
裡面都用到了max_id和since_id,並且反覆的用,這樣就會儘可能多抓到多的tweets。