It's too powerful to use URLLIB2.
Try to use the proxy login to pull the cookie, jump to capture the picture ...
Document: Http://docs.python.org/library/urllib2.html
Directly on the demo code.
Includes: direct pull, use of reuqest (Post/get), use of proxies, cookies, jump processing
#!/usr/bin/python#-*-coding:utf-8-*-# urllib2_test.py# author:wklken# 2012-03-17 wklken@yeah.netimport urllib, Urllib2,cookielib,socketurl = "Http://www.testurl ..." #change yourself# the simplest way def use_urllib2 (): Try:f = Urllib2.urlopen (URL, timeout=5). Read () except URLLIB2. Urlerror, E:print E.reason print len (f) #使用Requestdef get_request (): #可以设置超时 socket.setdefaulttimeout (5) #可以加入参数 [no parameters, using Get, using post] params = {"WD": "A", "B": "2"} #可以加入请求头信息 in order to identify i_headers = {"User-agent": "mozilla/5.0 (Windows; U Windows NT 5.1; ZH-CN; rv:1.9.1) gecko/20090624 firefox/3.5 "," Accept ":" Text/plain "} #use post,have some params post to server,if not supp ORT, would throw exception #req = Urllib2. Request (URL, data=urllib.urlencode (params), headers=i_headers) req = urllib2. Request (URL, headers=i_headers) #创建request后, other additions can be made, and if key repeats, the latter takes effect #request. Add_header (' Accept ', ' application/ JSON ') #可以指定提交方式 #request. Get_method = lambda: ' PUT ' try:page = Urllib2.urlopen (req) Print Len (Page.read ()) #likE Get #url_params = Urllib.urlencode ({"A": "1", "B": "2"}) #final_url = URL + "?" + url_params #print final_url #data = Urllib2.urlopen (Final_url). Read () #print "Method:get", Len (data) except URLLIB2. Httperror, E:print "Error Code:", E.code except URLLIB2. Urlerror, E:print "Error Reason:", E.reasondef use_proxy (): Enable_proxy = False Proxy_handler = urllib2. Proxyhandler ({"http": "http://proxyurlXXXX.com:8080"}) Null_proxy_handler = Urllib2. Proxyhandler ({}) if Enable_proxy:opener = Urllib2.build_opener (Proxy_handler, Urllib2. HttpHandler) Else:opener = Urllib2.build_opener (Null_proxy_handler, Urllib2. HttpHandler) #此句设置urllib2的全局opener Urllib2.install_opener (opener) content = Urllib2.urlopen (URL). Read () print "Proxy Len: Len (Content) class Noexceptioncookieprocesser (Urllib2. Httpcookieprocessor): Def http_error_403 (self, req, FP, code, MSG, HDRs): Return FP def http_error_400 (self, req, FP, cod E, MSG, HDRs): Return FP def http_error_500 (self, req, FP, code, MSG, HDRs): return fpdef Hand_cookie (): cookie = cookielib. Cookiejar () #cookie_handler = Urllib2. Httpcookieprocessor (cookie) #after Add error exception handler Cookie_handler = Noexceptioncookieprocesser (cookie) Opener = Urllib2.build_opener (Cookie_handler, Urllib2. HttpHandler) Url_login = "Https://www.yourwebsite/?login" params = {"username": "User", "Password": "111111"} Opener.open (Url_login, Urllib.urlencode (params)) for item in Cookie:print item.name,item.value #urllib2. Install_ Opener (opener) #content = Urllib2.urlopen (URL). Read () #print len (content) #得到重定向 N times after the last page Urldef Get_request_direct (): Import Httplib httplib. Httpconnection.debuglevel = 1 request = Urllib2. Request ("http://www.google.com") Request.add_header ("Accept", "text/html,*/*") Request.add_header ("Connection", " Keep-alive ") opener = Urllib2.build_opener () F = opener.open (request) Print F.url print f.headers.dict print len (F.read ()) if __name__ = = "__main__": Use_urllib2 () get_request () Get_request_direct () Use_proxy () Hand_cookie ()