In 2018/7/21, some python crawlers learned the code these days.
Import Urllib2
Response = Urllib2.urlopen ("http://baidu.com")
html = Response.read ()
Print HTML
Further, you can request
Import Urllib2
req = Urllib2. Request ("http://www.baidu.com")
Response = Urllib2.urlopen (req)
html = Response.read ()
Print HTML
Disguise browser
Import Urllib2
url = "Http://www.baidu.com"
User_agent = "mozilla/5.0 (compatible; Mste 9.0; Windows NT 6.1; trident/5.0; "
headers = {"User-agent ': user_agent}
req = Urllib2. Request (rul,headers = headers)
Response = Urllib2.urlopen (req)
The_page = Response.read ()
Print The_page
Code: Input/Output Web page
# _*_ Coding:utf-8 _*_
Import Urllib2
def load_page (URL):
User_agent = "mozilla/5.0 (compatible; Mste 9.0; Windows NT 6.1; trident/5.0; "
headers = {"User-agent": User_agent}
req = Urllib2. Request (url,headers = Headerss)
Response = Urllib2.urlopen (req)
html = Response.read ()
return HTML
def tieba_spider (url,begin_page,end_page):
"""
How to paste the crawler
"""
For I in range (Begin_page,end_page + 1):
PN = * (i-1)
My_url = URL + str (PN)
html = load_page (My_url)
Print "################# #第% page ########################"% (i)
Print HTML
Print "###############################################"
if __name__ = = "__main__":
url = raw_input ("Please enter the URL address")
begin_page = Int (raw_input ("Please enter the starting page number")
end_page = Int (raw_input ("Enter end page number"))
Tieba_spider (Url,begin_page,end_page)
Code: Input Output Save Web page
# _*_ Coding:utf-8 _*_
Import Urllib2
def load_page (URL):
User_agent = "mozilla/5.0 (compatible; Mste 9.0; Windows NT 6.1; trident/5.0; "
headers = {"User-agent": User_agent}
req = Urllib2. Request (url,headers = headers)
Response = Urllib2.urlopen (req)
html = Response.read ()
return HTML
def writee_to_file (file_name,txt):
"" To deposit txt text in the file_name file
"""
Print "Saving files" +filr_name
f = oprn (file_name, ' W ')
f = Write (TXT)
F.close (0
def tieba_spider (url,begin_page,end_page):
"""
How to paste the crawler
"""
For I in range (Begin_page,end_page + 1):
PN = * (i-1)
My_url = URL + srt (PN)
html = load_page (My_url)
Filr_name = str (i) + ". html"
Write_to_file (file_name,html)
if __name__ = = "__main__":
url = raw_input ("Please enter the URL address")
begin_page = Int (raw_input ("Please enter the starting page number")
end_page = Int (raw_input ("Enter end page number"))
Tieba_spider (Url,begin_page,end_page)
2018/7/21 Python Crawler Learning