fromUrllibImportResponse,request,parse,error fromhttpImportCookiejarif __name__ == ' __main__ ':#response = Urllib.request.urlopen ("http://www.baidu.com") #print (Response.read (). Decode ("Utf-8")) #以post形式发送, no data is a get form #请求头 #data = bytes (Urllib.parse.urlencode ({"word": "Hello"}), encoding= "Utf-8") #response = Urllib.request.urlopen ("Http://httpbin.org/post", Data=data) #print (Response.read ()) #时间限制 #response = Urllib.request.urlopen ("http://www.baidu.com", timeout=0.01) #print (Response.read (). Decode ("Utf-8")) #响应处理 #response = Urllib.request.urlopen ("http://www.python.org") #print (Type (response)) #状态码 #print (Response.Status) #相应头 #print (Response.getheaders ()) #print (Response.getheader ("Server")) #复杂请求 Request #request = urllib.request.Request ("http://python.org") #response = urllib.request.urlopen (Request) #print (Response.read (). Decode ("Utf-8")) #请求头 # Add_header can also """url = "Http://httpbin.org/post"headers = {"user-agent": "mozilla/5.0 (Windows NT 10.0; Win64; x64) ","Host": "Httpbin.org" }dict = {"name": "Germey" }data = bytes (Parse.urlencode (dict), encoding= "UTF8")req = Request. Request (url,data,headers,method= "POST")response = Request.urlopen (req);print (Response.read ()) """ #代理 """Proxy_header = Request. Proxyhandler ({#代理IP })opener = Request.build_opener (Proxy_header)response = Opener.open ("Http://httpbin.org/get") #cookies (Maintain login status)cookie = Cookiejar. Cookiejar ()handler = Request. Httpcookieprocessor (Cookie)opener = Request.build_opener (handler)response = Opener.open ("http://www.baidu.com") """ #保存cookies #MozillaCookieJar, Lwpcookiejar #捕捉异常 basically httperror or Urlerror . """Try:response = Request.urlopen ("Http://amojury.github.io")except error. Urlerror as E:print (E.reason) """ #URL解析相关 urlparse urlunparse (Inverse parsing) UrlEncode (dictionary to request parameter) #result = Parse.urlparse ("Https://www.baidu.com/s?ie=utf-8&f=3&rsv_bp=0&rsv_idx=1&tn=baidu &wd=python%20%e6%89%b9%e9%87%8f%e6%b3%a8%e9%87%8a&rsv_pq=f9b1a8b300011700&rsv_t=1252nvpabhdm% 2fedlsdrpguxihlfk4qnb443estukorchfx9g09yzi9n9dvo&rqlang=cn&rsv_enter=1&rsv_sug3=9&rsv_sug1=8 &rsv_sug7=101&rsv_sug2=1&prefixsug=python%2520%25e6%2589%25b9%25e9%2587%258f&rsp=0&inputt= 10498&rsv_sug4=14994 ") #print (Result)
Basic usage of Python crawler-urllib