import urllib2<br />import time<br />import socket<br />from datetime import datetime<br />from thread_pool import *</p><p>def main():<br /> url_list = {"sina":"http://www.sina.com.cn",<br /> "sohu":"http://www.sohu.com",<br /> "yahoo":"http://www.yahoo.com",<br /> "xiaonei":"http://www.xiaonei.com",<br /> "qihoo":"http://www.qihoo.com",<br /> "laohan":"http://www.laohan.org",<br /> "eyou":"http://www.eyou.com",<br /> "chinaren":"http://www.chinaren.com",<br /> "douban":"http://www.douban.com",<br /> "163":"http://www.163.com",<br /> "daqi":"http://www.daqi.com",<br /> "qq":"http://www.qq.com",<br /> "baidu_1":"http://www.baidu.com/s?wd=asdfasdf",<br /> "baidu_2":"http://www.baidu.com/s?wd=dddddddf",<br /> "google_1":"http://www.baidu.com/s?wd=sadfas",<br /> "google_2":"http://www.baidu.com/s?wd=sadflasd",<br /> "hainei":"http://www.hainei.com",<br /> "microsoft":"http://www.microsoft.com",<br /> "wlzuojia":"http://www.wlzuojia.com"}</p><p> #使用線程池<br /> socket.setdefaulttimeout(10)<br /> print 'start testing'<br /> wm = WorkerManager(50)<br /> for url_name in url_list.keys():<br /> wm.add_job(do_get_con, url_name, url_list[url_name])<br /> wm.wait_for_complete()<br /> print 'end testing'</p><p>def do_get_con(url_name,url_link):<br /> try:<br /> fd = urllib2.urlopen(url_link)<br /> data = fd.read()<br /> f_hand = open("/tmp/ttt/%s" % url_name,"w")<br /> f_hand.write(data)<br /> f_hand.close()<br /> except Exception,e:<br /> pass</p><p>if __name__ == "__main__":<br /> main()<br />thread_pool的代碼(非原創,轉自:http://blog.daviesliu.net/2006/10/09/234822/)</p><p>import Queue, threading, sys<br />from threading import Thread<br />import time<br />import urllib</p><p># working thread<br />class Worker(Thread):<br /> worker_count = 0<br /> timeout = 1<br /> def __init__( self, workQueue, resultQueue, **kwds):<br /> Thread.__init__( self, **kwds )<br /> self.id = Worker.worker_count<br /> Worker.worker_count += 1<br /> self.setDaemon( True )<br /> self.workQueue = workQueue<br /> self.resultQueue = resultQueue<br /> self.start( )</p><p> def run( self ):<br /> ''' the get-some-work, do-some-work main loop of worker threads '''<br /> while True:<br /> try:<br /> callable, args, kwds = self.workQueue.get(timeout=Worker.timeout)<br /> res = callable(*args, **kwds)<br /> print "worker[%2d]: %s" % (self.id, str(res) )<br /> self.resultQueue.put( res )<br /> #time.sleep(Worker.sleep)<br /> except Queue.Empty:<br /> break<br /> except :<br /> print 'worker[%2d]' % self.id, sys.exc_info()[:2]<br /> raise</p><p>class WorkerManager:<br /> def __init__( self, num_of_workers=10, timeout = 2):<br /> self.workQueue = Queue.Queue()<br /> self.resultQueue = Queue.Queue()<br /> self.workers = []<br /> self.timeout = timeout<br /> self._recruitThreads( num_of_workers )</p><p> def _recruitThreads( self, num_of_workers ):<br /> for i in range( num_of_workers ):<br /> worker = Worker( self.workQueue, self.resultQueue )<br /> self.workers.append(worker)</p><p> def wait_for_complete( self):<br /> # ...then, wait for each of them to terminate:<br /> while len(self.workers):<br /> worker = self.workers.pop()<br /> worker.join( )<br /> if worker.isAlive() and not self.workQueue.empty():<br /> self.workers.append( worker )<br /> print "All jobs are are completed."</p><p> def add_job( self, callable, *args, **kwds ):<br /> self.workQueue.put( (callable, args, kwds) )</p><p> def get_result( self, *args, **kwds ):<br /> return self.resultQueue.get( *args, **kwds )