The example in this paper describes the method of Python3 implementing concurrent inspection Agent pool address. Share to everyone for your reference, as follows:
#encoding =utf-8#author:walker#date:2016-04-14#summary: Using the coprocessor/thread pool concurrency to verify agent validity import OS, sys, Timeimport requestsfrom Concurrent Import Futurescur_dir_fullpath = Os.path.dirname (Os.path.abspath (__file__)) Headers = {' Accept ': ' */* ', ' User-agent ': ' Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; trident/4.0; SLCC2;. NET CLR 2.0.50727;. NET CLR 3.5.30729;. NET CLR 3.0.30729;. net4.0c;. net4.0e) ',} #检验单个代理的有效性 # If valid, return the proxy; otherwise, return an empty string def Check (Desturl, Proxy, feature): proxies = {' http ': '/http ' + ProX Y} r = none #声明 exmsg = None Try:r = Requests.get (Url=desturl, Headers=headers, Proxies=proxies, timeout=3) except : exmsg = ' * ' + TRACEBACK.FORMAT_EXC () #print (exmsg) finally:if ' R ' in locals () and R:r.close () if EXMS G:return ' if r.status_code! = 200:return ' if R.text.find (feature) < 0:return ' return proxy# input proxy list ( set/list), return valid proxy list def getvalidproxypool (Rawproxypool, Desturl, feature): Validproxylist = list () #有效代理列表 POOL = Futures. Threadpoolexecutor (8) futurelist = List () for proxy in RawProxyPool:futureList.append (Pool.submit (Check, Desturl, PR Oxy, feature)) print (' \ n submit done, waiting for responses\n ') for the future in Futures.as_completed (futurelist): Proxy = Future.result () print (' Proxy: ' + proxy) If proxy: #有效代理 validproxylist.append print (' Validproxylist Size: ' + str (len (validproxylist))) return validproxylist# get the original proxy pool Def getrawproxypool (): Rawproxypool = set () #通过某种方式获取原始 Agent pool ... return rawproxypoolif __name__ = = "__main__": Rawproxypool = Getrawproxypool () Desturl = ' http://... ' #需要通 Destination address of the proxy access feature = ' xxx ' #目标网页的特征码 validproxypool = Getvalidproxypool (Rawproxypool, Desturl, feature)