Python crawler _ collect Lenovo Word Code
Copy codeThe Code is as follows:
# Coding: UTF-8
Import urllib2
Import urllib
Import re
Import time
From random import choice
# Note: The proxy ip address in the list below may be invalid. Please replace it with a valid proxy ip address.
Iplist = ['27. 24.158.153: 81 ', '46. 209.70.74: 8080', '60. 29.20.88: 8888 ']
List1 = ["group", "technology"]
For item in list1:
Ip = choice (iplist)
Gjc = urllib. quote (item)
Url = "http://sug.so.360.cn/suggest/word? Callback = suggest_so & encodein = UTF-8 & encodeout = UTF-8 & word = "+ gjc
Headers = {
"GET": url,
"Host": "sug.so.360.cn ",
"Referer": "http://www.so.com /",
"User-Agent": "sMozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.56 Safari/537.17 ",
}
Proxy_support = urllib2.ProxyHandler ({'http': 'http: // '+ ip })
Opener = urllib2.build _ opener (proxy_support)
Urllib2.install _ opener (opener)
Req = urllib2.Request (url)
For key in headers:
Req. add_header (key, headers [key])
Html = urllib2.urlopen (req). read ()
Ss = re. findall ("\"(.*?) \ "", Html)
For item in ss:
Print item
Time. sleep (2)