In the acquisition of web information, often need to forge headers to achieve the effective implementation of the acquisition script
Below, we will use the header part of the URLLIB2 header to achieve the acquisition of information
Method 1,
#!/usr/bin/python
#-*-coding:utf-8-*-
#encoding =utf-8
#Filename: urllib2-header.py
Import URLLIB2
Import sys
#抓取网页内容-Send header-1
url= "http://www.jb51.net"
send_headers = {
' Host ': ' Www.jb51.net ',
' user-agent ': ' mozilla/5.0 (Windows NT 6.2; rv:16.0) gecko/20100101 firefox/16.0 ',
' Accept ': ' text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8 ',
' Connection ': ' Keep-alive '
}
req = Urllib2. Request (url,headers=send_headers)
r = Urllib2.urlopen (req)
html = r.read () #返回网页内容
receive_ Header = R.info () #返回的报头信息
# sys.getfilesystemencoding ()
html = html.decode (' utf-8 ', ' replace '). Encode (sys.getfilesystemencoding ()) #转码: Avoid the output of garbled
print Receive_header
# print ' ######################### ########### '
Print HTML
Method 2,
#!/usr/bin/python
#-*-coding:utf-8-*-
#encoding =utf-8
#Filename: urllib2-header.py
Import URLLIB2
import sys
url = ' http://www.jb51.net '
req = urllib2. Request (URL)
req.add_header (' Referer ', ' http://www.jb51.net/')
req.add_header (' user-agent ', ' mozilla/5.0 (Windows NT 6.2; rv:16.0) gecko/20100101 firefox/16.0 ')
r = Urllib2.urlopen (req)
html = r.read ()
Receive_header = R.info ()
html = Html.decode (' Utf-8 '). Encode (Sys.getfilesystemencoding ())
print receive_header
print ' ############ ######################### '
Print HTML