Python3 implements the concurrent check proxy pool address method, and python3 implements the concurrent check
The example in this article describes how to implement concurrent check proxy pool address in Python3. We will share this with you for your reference. The details are as follows:
# Encoding = UTF-8 # author: walker # date: 2016-04-14 # summary: Use coroutine/thread pool concurrency to check agent validity import OS, sys, timeimport requestsfrom concurrent import futurescur_dir_fullpath = OS. path. dirname (OS. path. abspath (_ file _) Headers = {'access': '*/*', 'user-agent': 'mozilla/4.0 (compatible; MSIE 8.0; windows NT 6.1; WOW64; Trident/4.0; SLCC2 ;. net clr 2.0.50727 ;. net clr 3.5.30729 ;. net clr 3.0.30729 ;. NET4.0C ;. NET4.0E )' ,} # Verify the validity of a single proxy # if valid, return the proxy; otherwise, return the Null String def Check (desturl, proxy, feature): proxies = {'http ': 'http: // '+ proxy} r = None # declare exMsg = None try: r = requests. get (url = desturl, headers = Headers, proxies = proxies, timeout = 3) failed T: exMsg = '*' + traceback. format_exc () # print (exMsg) finally: if 'R' in locals () and r: r. close () if exMsg: return ''if r. status_code! = 200: return ''if r. text. find (feature) <0: return ''return proxy # enter the proxy list (set/list) and return the valid proxy list def GetValidProxyPool (rawProxyPool, desturl, feature ): validProxyList = list () # valid proxy list pool = futures. threadPoolExecutor (8) futureList = list () for proxy in rawProxyPool: futureList. append (pool. submit (Check, desturl, proxy, feature) print ('\ n submit done, waiting for responses \ n') for future in futures. as_completed (futureList): proxy = future. result () print ('proxy: '+ proxy) if proxy: # valid proxy validProxyList. append (proxy) print ('validproxylist size: '+ str (len (validProxyList) return validProxyList # obtain the original proxy pool def GetRawProxyPool (): rawProxyPool = set () # obtain the original proxy pool in some way ...... return rawProxyPoolif _ name _ = "_ main _": rawProxyPool = GetRawProxyPool () desturl = 'HTTP ://... '# target address to be accessed through a proxy feature = 'xxx' # signature of the target webpage validProxyPool = GetValidProxyPool (rawProxyPool, desturl, feature)