#-*-coding:-utf-8
Import Urllib
#url = ' http://iplaypython.com/'
#url1 =urllib.urlopen (URL) # open URL address , Urlopen (URL, Data=none, Proxies=none)
#print Url1.read () # reads read (), ReadLine (), ReadLines (), Fileno (), close () : These methods are used exactly like file objects
#print Url1.getcode () #getcode (): Returns the Http status code. In the case of http requests, the request completed successfully ; 404 Indicates that the URL was not found;
#print Url1.geturl () # return URL address
#print Url1.info () # returns a httplib. A httpmessage object that represents the header information returned by the remote server;
#c =html.read (). Decode (' GBK '). Encode (' Utf-8 ')
#print c can also set the encoding
#c =html.read (). Decode (' GBK ', ' ignore '). Encode (' Utf-8 ')
#print C
# ' Ignore ' ignore
#urlretrieve () method, the callback function is applied
# requires 3 parameters to use it
# parameter 1, incoming URL. , the URL type must be a string
# parameter 2, incoming local page save path + file name
# parameter 3, a function call, can arbitrarily define the behavior of this function, but make sure that the function has 3 Parameters
"""
(1). The number of data blocks passed to this date
(2). is the size of each data block, the unit's byte ( bytes )
(3). the size of the remote file . ( sometimes return -1)
"""
def CBK (a,b,c):
Abc=100*a*b/c
If abc>100:
abc=100
print '%.2f%% '%abc
Url= ' http://www.qq.com '
Locpath= ' C:\Users\Administrator\Desktop\sinaa.html '
Print Urllib.urlretrieve (URL,LOCPATH,CBK)
# when obtaining remote data, internal use of urlopener or fancyurlopener class, for inflammatory urllib use , rarely use these 2 classes, if you are interested in Urllib implementations, or if you want urllib to support more protocols, You can study these two classes. In the Python manual,urllib 's author also lists the flaws and weaknesses of this module, and interested students can open python Manual to understand
"""
urllib url encode, decode. url get When the data is submitted, it is in the Span style= "Font-family:verdana;" >url So the string, so in value is not allowed to have ' = '
* Urllib.quote (string[, safe]): Encodes a string. The parameter safe Specifies a character that does not need to be encoded ;
* Urllib.unquote (String) : Decodes the string;
* Urllib.quote_plus (string [, safe]) : similar to Urllib.quote, but this method is replaced with ' + ' "' , while Quote with '%20 ' to replace "'
* Urllib.unquote_plus (String) : Decodes the string;
* Urllib.urlencode (query[, Doseq]): converts a dict or a list of tuples containing two elements into a URL parameter. For example , the dictionary {' name ': ' Dark-bull ', ' Age ': $} will be converted to ' name=dark-bull&age=200 '
* Urllib.pathname2url (PATH): Converts a local path to a URL path;
* Urllib.url2pathname (PATH): Converts the URL path to the cost ground path;
"""
data1= ' name=~a+3 '
Data1=urllib.quote (DATA1)
Print data1#result:name%20%3d%20%7ea%2b3
Print Urllib.unquote (data1) #name =~a+3
Data2=urllib.quote_plus (DATA1)
Print data2# result:name+%3d+%7ea%2b3
Print Urllib.unquote_plus (data2) # result:name = ~a+3
Data3 = Urllib.urlencode ({' name ': ' Dark-bull ', ' Age ': 200})
Print Data3 # Result:age=200&name=dark-bull
DATA4 = Urllib.pathname2url (R ' d:\a\b\c\23.php ')
Print DATA4 # Result:///d|/a/b/c/23.php
Print Urllib.url2pathname (data4) # result:d:\a\b\c\23.php
:
------------Introduction 2
#-*-coding:utf-8
# import Urllib
# url= ' http://www.qq.com '
# info=urllib.urlopen (URL). info ()
# Print Info
# Print Info.getparam (' charset ') #-*-coding:-utf-8 get page encoding
Import chardet# Character Set detection ( used to implement a string / file Encoding detection template )
Import Urllib
# url= ' http://www.jd.com '
# conten=urllib.urlopen (URL). Read ()
# print Chardet.detect (conten)
# Result=chardet.detect (Conten)
# print result[' encoding '
# Print Chardet.detect (' I am Chinese ')
def auto_chardet (URL):
"" "Doc" ""
Content=urllib.urlopen (URL). Read ()
Result=chardet.detect (content)
encoding=result[' encoding ']
return encoding
urls=[' http://WWW.IPLAYPYTHON.com ', ' http://www.baidu.com ']
For URL in URLs:
Print Url,auto_chardet (URL)
----------Introduction 3
#-*-coding:utf-8
# import Urllib
# url= ' http://blog.csdn.net/yuanmeng001 '
# Html=urllib.urlopen (URL)
# #print Html.read ()
# #print Html.getcode () #403 forbidden : 404 page does not exist (example : Http://www.jd.com/robots.txt )
Import Urllib2,random
Url= ' Http://blog.csdn.net/happydeer '
# my_header={' user-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',
# ' GET ': URL,
# ' HOst ': ' Blog.csdn.net ',
# ' Referer ': ' http://blog.csdn.net/'
# }
# HTTPS=URLLIB2. Request (Url,headers=my_header)
# #print Https.head () #urllib2. Httperror:http Error 403:forbidden Disable Access
# # # REQ=URLLIB2. Request (URL) # requests an object
# # # # Req.add_header (' user-agent ', ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ')
# # # # #add_header Add Header Info
# # # # Req.add_header (' GET ', url)
# # Req.add_header (' HOst ', ' blog.csdn.net ')
# # Req.add_header (' GET ', url)
# # Req.add_header (' Referer ', ' http://blog.csdn.net/')
# Html=urllib2.urlopen (HTTPS)
#print Html.read () # Read
# print Html.headers.items () # Get Info
my1_header={' user-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',
' User-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',
' User-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',
' User-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',
' User-agent ': ' mozilla/5.0 (Windows NT 6.1; WOW64) applewebkit/537.36 (khtml, like Gecko) maxthon/4.3.1.2000 chrome/30.0.1599.101 safari/537.36 ',}
def get_connect (url,heads):
"@ get 403 Forbidden pages "
Rand_head=random.choice (heads)
Req=urllib2. Request (URL)
Req.add_header (' user-agent ', rand_head)
Req.add_header (' HOst ', ' blog.csdn.net ')
Req.add_header (' Request ', ' http://blog.csdn.net/')
Req.add_header (' GET ', url)
Content=urllib2.urlopen (req). Read ()
return content
Print Get_connect (Url,my1_header)
Python urllib related Learning