#-*-Coding:utf-8-*-#---------------------------------------# program: Baidu paste Crawler # version: 0.1 # Author: Why # Date: 201
3-05-14 # language: Python 2.7 # Operation: Enter the address with the paging, remove the back of the number, set the starting page and end page.
# function: Download all pages in the corresponding page number and store them as HTML files. #---------------------------------------Import String, Urllib2 #定义百度函数 def baidu_tieba (Url,begin_pa
Ge,end_page): For I in range (Begin_page, end_page+1): sname = String.zfill (i,5) + '. html ' #自动填充成六位的文件名 print ' is downloading the ' + str (i) + ' page and storing it as ' + sname + ' ... ' F = open (sname, ' w+ ') m = Urllib2.urlop
En (url + str (i)). Read () F.write (M) f.close () #--------Enter parameters here------------------
# This is the address of a post in the Baidu Bar of Shandong University #bdurl = ' http://tieba.baidu.com/p/2296017831?pn= ' #iPostBegin = 1 #iPostEnd = 10 Bdurl = str (raw_input (U ' Please enter the address of the post, remove the number after pn=: \ n ')) begin_page = Int (raw_input (U ' Please enter the number of pages to start: \ n ')) end_page = Int (raw _input (U ' Please enter end page: \ n ')] #--------ENTER hereParameter------------------#调用 Baidu_tieba (bdurl,begin_page,end_page)