This article mainly introduces python to implement Baidu keyword ranking query. For more information, see a simple python function to query Baidu keyword ranking. The following are some introductions:
1. Random UA
2. The operation is simple and convenient. you can directly use getRank (keyword, domain name ).
3. encoding conversion. There should be no problem with coding.
4. rich results. Not only rankings, but also the title, URL, snapshot time of the search results, which meets SEO requirements
5. it is convenient to use software or use it on your own.
The function is implemented in a single thread, and the speed is slow. you can refer to modifying it as needed.
The code is as follows:
# Coding = UTF-8
Import requests
Import BeautifulSoup
Import re
Import random
Def decodeAnyWord (w ):
Try:
W. decode ('utf-8 ')
Except t:
W = w. decode ('gb2312 ')
Else:
W = w. decode ('utf-8 ')
Return w
Def createURL (checkWord): # create baidu URL with search words
CheckWord = checkWord. strip ()
CheckWord = checkWord. replace ('', '+'). replace ('\ n ','')
BaiduURL = 'http: // www.baidu.com/s? Wd = % s & rn = 100 '% checkWord
Return baiduURL
Def getContent (baiduURL): # get the content of the serp
UaList = ['mozilla/4.0 + (compatible; + MSIE + 6.0; + Windows + NT + 5.1; + SV1; +. NET + CLR + 1.1.4322; + tentraveler )',
'Mozilla/4.0 + (compatible; + MSIE + 6.0; + Windows + NT + 5.1; + SV1; +. NET + CLR + 2.0.50727; +. NET + CLR + 3.0.20.6.2152; +. NET + CLR + 3.5.30729 )',
'Mozilla/5.0 + (Windows + NT + 5.1) + AppleWebKit/537.1 + (KHTML, + like + Gecko) + Chrome/21.0.1180.89 + Safari/123456 ',
'Mozilla/4.0 + (compatible; + MSIE + 6.0; + Windows + NT + 5.1; + SV1 )',
'Mozilla/5.0 + (Windows + NT + 6.1; + rv: 11.0) + Gecko/20100101 + Firefox/123 ',
'Mozilla/4.0 + (compatible; + MSIE + 8.0; + Windows + NT + 5.1; + Trident/4.0; + SV1 )',
'Mozilla/4.0 + (compatible; + MSIE + 8.0; + Windows + NT + 5.1; + Trident/4.0; + GTB7.1; +. NET + CLR + 2.0.50727 )',
'Mozilla/4.0 + (compatible; + MSIE + 8.0; + Windows + NT + 5.1; + Trident/4.0; + KB974489) ']
Headers = {'user-Agent': random. choice (uaList )}
R = requests. get (baiduURL, headers = headers)
Return r. content
Def getLastURL (rawurl): # get final URL while there're redirects
R = requests. get (rawurl)
Return r. url
Def getAtext (atext): # get the text with and
Pat = re. compile (r '(.*?) ')
Match = pat. findall (atext. replace ('\ n ',''))
PureText = match [0]. replace ('', ''). Replace ('','')
Return pureText. replace ('\ n ','')
Def getCacheDate (t): # get the date of cache
Pat = re. compile (R '.*? (\ D {4}-\ d {1, 2}-\ d {1, 2 })')
Match = pat. findall (t)
CacheDate = match [0]
Return cacheDate
Def getRank (checkWord, domain): # main line
CheckWord = checkWord. replace ('\ n ','')
CheckWord = decodeAnyWord (checkWord)
BaiduURL = createURL (checkWord)
Cont = getContent (baiduURL)
Soup = BeautifulSoup. BeautifulSoup (cont)
Results = soup. findAll ('table', {'class': 'result'}) # find all results in this page
For result in results:
CheckData = unicode (result. find ('span ', {'class': 'G '}))
If re. compile (r' ^ [^/] * % s .*? '% Domain). match (checkData. replace ('', ''). Replace ('', ''): # Correct
NowRank = result ['id'] # get the rank if match the domain info
ResLink = result. find ('h3 ').
ResURL = resLink ['href ']
DomainURL = getLastURL (resURL) # get the target URL
ResTitle = getAtext (unicode (resLink) # get the title of the target page
Rescache = result. find ('span ', {'class': 'G '})
CacheDate = getCacheDate (unicode (rescache) # get the cache date of the target page
Res = u '% s, % s name, % s, % s, % s' % (checkWord, nowRank, resTitle, cacheDate, domainURL)
Return res. encode ('gb2312 ')
Break
Else:
Return '> 100'
Domain = 'www .baidu.com '# set the domain which you want to search.
Print getRank ('Baidu ', domain)