#! /Usr/bin/python #-*-Coding: UTF-8 -*- # Urllib2_test.py # Author: wklken #2012-03-17 wklken@yeah.net Import urllib, urllib2, cookielib, socket Url = "http://www.testurl..." # change yourself # The simplest way Def use_urllib2 (): Try: F = urllib2.urlopen (url, timeout = 5). read () Failed t urllib2.URLError, e: Print e. reason Print len (f) # Use Request Def get_request (): # Timeout can be set. Socket. setdefatimetimeout (5) # You can add the parameter [No parameter, use get, in this way, use post] Params = {"wd": "a", "B": "2 "} # You can add request header information for identification I _headers = {"User-Agent": "Mozilla/5.0 (Windows; U; Windows NT 5.1; zh-CN; rv: 1.9.1) Gecko/20090624 Firefox/3.5 ", "Accept": "text/plain "} # Use post, have some params post to server, if not support, will throw exception # Req = urllib2.Request (url, data = urllib. urlencode (params), headers = I _headers) Req = urllib2.Request (url, headers = I _headers) # After creating a request, you can add another one. If the key is duplicate, the latter takes effect. # Request. add_header ('accept', 'application/json ') # You can specify the submission method. # Request. get_method = lambda: 'put' Try: Page = urllib2.urlopen (req) Print len (page. read ()) # Like get # Url_params = urllib. urlencode ({"a": "1", "B": "2 "}) # Final_url = url + "? "+ Url_params # Print final_url # Data = urllib2.urlopen (final_url). read () # Print "Method: get", len (data) Failed t urllib2.HTTPError, e: Print "Error Code:", e. code Failed t urllib2.URLError, e: Print "Error Reason:", e. reason Def use_proxy (): Enable_proxy = False Proxy_handler = urllib2.ProxyHandler ({"http": "http://proxyurlXXXX.com: 8080 "}) Null_proxy_handler = urllib2.ProxyHandler ({}) If enable_proxy: Opener = urllib2.build _ opener (proxy_handler, urllib2.HTTPHandler) Else: Opener = urllib2.build _ opener (null_proxy_handler, urllib2.HTTPHandler) # Set the global opener of urllib2 Urllib2.install _ opener (opener) Content = urllib2.urlopen (url). read () Print "proxy len:", len (content) Class NoExceptionCookieProcesser (urllib2.HTTPCookieProcessor ): Def http_error_403 (self, req, fp, code, msg, hdrs ): Return fp Def http_error_400 (self, req, fp, code, msg, hdrs ): Return fp Def http_error_500 (self, req, fp, code, msg, hdrs ): Return fp Def hand_cookie (): Cookie = cookielib. CookieJar () # Cookie_handler = urllib2.HTTPCookieProcessor (cookie) # After add error exception handler Cookie_handler = NoExceptionCookieProcesser (cookie) Opener = urllib2.build _ opener (cookie_handler, urllib2.HTTPHandler) Url_login = "https://www.yourwebsite /? Login" Params = {"username": "user", "password": "111111 "} Opener. open (url_login, urllib. urlencode (params )) For item in cookie: Print item. name, item. value # Urllib2.install _ opener (opener) # Content = urllib2.urlopen (url). read () # Print len (content) # Get the last page URL after the redirection N times Def get_request_direct (): Import httplib Httplib. HTTPConnection. debuglevel = 1 Request = urllib2.Request ("http://www.google.com ") Request. add_header ("Accept", "text/html ,*/*") Request. add_header ("Connection", "Keep-Alive ") Opener = urllib2.build _ opener () F = opener. open (request) Print f. url Print f. headers. dict Print len (f. read ()) If _ name _ = "_ main __": Use_urllib2 () Get_request () Get_request_direct () Use_proxy () Hand_cookie () |