# One of three ways Python crawls Web pages: Using the Urllib or URLLIB2 module GetParam method
Import Urllib
Fopen1 = Urllib.urlopen (' http://www.baidu.com '). info ()
Fopen2 = Urllib2.urlopen (' http://www.sina.com '). info ()
Print Fopen1.getparam (' CharSet ')
Print Fopen2.getparam (' CharSet ')
#----Some websites have anti-crawler technology and need the following methods----
url = ' HTTP://WWW.QIUSHIBAIKE.COM/HOT/PAGE/1 '
User_agent = ' mozilla/4.0 (compatible; MSIE 5.5; Windows NT) '
headers = {' User-agent ': user_agent}
Request = Urllib2. Request (url,headers = headers)
C_res=urllib2.urlopen (Request). info ()
Print C_res.getparam (' CharSet ')
# Three ways to crawl Web pages in Python two: Using the Chardet module---feel a little slower than the method
Import Chardet
Import Urllib
#先获取网页内容
Data1 = Urllib.urlopen (' http://www.baidu.com '). Read ()
#用chardet进行内容分析
Chardit1 = Chardet.detect (data1)
Print chardit1[' encoding ']
#----Some websites have anti-crawler technology and need the following methods----
url = ' HTTP://WWW.QIUSHIBAIKE.COM/HOT/PAGE/1 '
User_agent = ' mozilla/4.0 (compatible; MSIE 5.5; Windows NT) '
headers = {' User-agent ': user_agent}
Response = Urllib2.urlopen (Request). Read ()
Chardit1 = Chardet.detect (response)
Print chardit1[' encoding ']
# Three ways to crawl Web pages in Python three: Using the BeautifulSoup module approach
From BS4 import BeautifulSoup
Import Urllib2
Content=urllib2.urlopen (' http://www.baidu.com ')
Soup=beautifulsoup (content)
Print Soup.original_encoding #这里的输出就是网页的编码方式
#----Some sites have anti-crawler technology and need to be treated like these two approaches----
Three ways to crawl a Web page in Python