1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26-27--28 29---30 31--32 33 34 35 36 37 38-39 40 41 42 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 5 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
#!/usr/bin/python #-*-Coding:utf-8-*-# urllib2_test.py # author:wklken # 2012-03-17 wklken@yeah.net IM Port urllib,urllib2,cookielib,socket URL = "Http://www.testurl ..." #change yourself def use_urllib2 (): TR y:f = Urllib2.urlopen (URL, timeout=5). Read () except URLLIB2. Urlerror, E:print E.reason print len (f) #使用Request def get_request (): #可以设置超时 socket.setdefaulttimeout (5) #可以加入参数 [No parameters, use GET, this way, use post] params = {"WD": "A", "B": "2"} #可以加入请求头信息 in order to identify i_headers = {"User-agent": "mozilla/5.0 (Windows; U Windows NT 5.1; ZH-CN; rv:1.9.1) gecko/20090624 firefox/3.5 ", Accept": "Text/plain"} #use post,have some params post to server,if not support, W Ill throw exception #req = Urllib2. Request (URL, data=urllib.urlencode (params), headers=i_headers) req = urllib2. Request (URL, headers=i_headers) #创建request后, you can also add other, if the key is repeated, the latter is effective #request. Add_header (' Accept ', ' Application/json ') #可以指定提交方式 #request. Get_method = lambda: ' Put ' try:page = Urllib2.urlopEn (req) Print Len (Page.read ()) #like Get #url_params = Urllib.urlencode ({"A": "1", "B": "2"}) #final_url = URL + "?" + url_p Arams #print Final_url #data = Urllib2.urlopen (Final_url). Read () #print "Method:get", Len (data) except URLLIB2. Httperror, E:print "Error Code:", E.code except URLLIB2. Urlerror, E:print "Error Reason:", E.reason def use_proxy (): Enable_proxy = False Proxy_handler = urllib2. Proxyhandler ({"http": "http://proxyurlXXXX.com:8080"}) Null_proxy_handler = Urllib2. Proxyhandler ({}) if Enable_proxy:opener = Urllib2.build_opener (Proxy_handler, Urllib2. HttpHandler) Else:opener = Urllib2.build_opener (Null_proxy_handler, Urllib2. HttpHandler) #此句设置urllib2的全局opener Urllib2.install_opener (opener) content = Urllib2.urlopen (URL). Read () print "Proxy Len: ", Len (content) class Noexceptioncookieprocesser (URLLIB2). Httpcookieprocessor): Def http_error_403 (self, req, FP, code, MSG, HDRs): Return FP def http_error_400 (self, req, FP, code , MSG, HDRs): return FP def http_error_500(Self, req, FP, code, MSG, HDRs): return FP def hand_cookie (): cookie = cookielib. Cookiejar () #cookie_handler = Urllib2. Httpcookieprocessor (cookie) #after Add error exception handler Cookie_handler = Noexceptioncookieprocesser (cookie) Opener = Urllib2.build_opener (Cookie_handler, Urllib2. HttpHandler) Url_login = "Https://www.yourwebsite/?login" params = {"username": "User", "Password": "111111"} Opener.open (Url_login, Urllib.urlencode (params)) for item in Cookie:print item.name,item.value #urllib2. Install_ Opener (opener) #content = Urllib2.urlopen (URL). Read () #print len (content) #得到重定向 N times after the last page URL def get_request_direct (): I Mport Httplib Httplib. Httpconnection.debuglevel = 1 request = Urllib2. Request ("http://www.google.com") Request.add_header ("Accept", "text/html,*/*") Request.add_header ("Connection", " Keep-alive ") opener = Urllib2.build_opener () F = opener.open (request) Print F.url print f.headers.dict print len (F.read ()) if __name__ = = "__main__": Use_urllib2 () Get_requeSt () Get_request_direct () Use_proxy () Hand_cookie () |