# -*- coding: utf8 -*- '''
''' import urllib2 import urllib import cookielib import hashlib import re import time import json from pip._vendor.distlib._backport.tarfile import TUREAD class Spide: def __init__(self,proxy_ip,proxy_type,proxy_port,use_proxy=False): print 'using the proxy info :',proxy_ip self.proxy = urllib2.ProxyHandler({proxy_type: proxy_ip+":"+proxy_port}) self.usercode = "" self.userid = "" self.cj = cookielib.LWPCookieJar(); self.opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(self.cj)); if use_proxy: self.opener = urllib2.build_opener(self.proxy) urllib2.install_opener(self.opener); #擷取代理列表 def get_proxy(self): proxy_info_json = "" #first get the proxy info from try: reqRequest_proxy = urllib2.Request('http://gXiXmXmXeXpXrXoXxXy.com/api/getProxy'); reqRequest_proxy.add_header('Accept','*/*'); reqRequest_proxy.add_header('Accept-Language','zh-CN,zh;q=0.8'); reqRequest_proxy.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'); reqRequest_proxy.add_header('Content-Type','application/x-www-form-urlencoded'); proxy_info = urllib2.urlopen(reqRequest_proxy).read(); print proxy_info proxy_info_json = json.loads(proxy_info) return_str=proxy_info_json['protocol']+":"+proxy_info_json['ip']+proxy_info_json['port'] except Exception,e: print 'proxy have problem' #print proxy_info_json['protocol'] #print proxy_info_json['ip'] #print proxy_info_json['port'] return proxy_info_json #print proxy_info def chrome(self): try: reqRequest = urllib2.Request('http://www.503error.com'); reqRequest.add_header('Accept','*/*'); reqRequest.add_header('Accept-Language','zh-CN,zh;q=0.8'); reqRequest.add_header('User-Agent','Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/28.0.1500.72 Safari/537.36'); reqRequest.add_header('Content-Type','application/x-www-form-urlencoded'); content = urllib2.urlopen(reqRequest).read(); except Exception,e: print 'oppps' print 'done' if __name__ == "__main__": for count in range(100): print '################################:',count print 'Geting the new proxy info:' test = Spide(proxy_ip='test',proxy_type='http',proxy_port='3128',use_proxy=False) proxy_list = test.get_proxy() #print proxy_list print 'start to chrome' spide1 = Spide(proxy_ip=proxy_list['ip'],proxy_type=proxy_list['protocol'],proxy_port=proxy_list['port'],use_proxy=True) spide1.chrome() time.sleep(5) |