http://blog.csdn.net/pleasecallmewhy/article/details/8927832
[Python] View plain Copy # -*- coding: utf-8 -*- #--------------------------------------- # Program: Baidu paste crawler # version:0.1 # Author:why # date:2013-05-14 # language:python 2.7 # Operation: Enter the address with pagination, remove the number of the back, set the starting page and end page. # function: Download all pages in the corresponding page number and store them as HTML files. #--------------------------------------- import string, urllib2 #定义百度函数 Def baidu_tieba (url,begin_page,end_page): for i in range (begin_page, end_page+1): sname = string.zfill (i,5) + '. HTML ' #自动填充成六位的文件名 print ' downloading the first ' + sTR (i) + ' Web page, and store it as ' + sName + ' ... ' f = open (sname, ' w+ ') m = urllib2.urlopen (Url + str (i)). Read () f.write (m) f.close () #-------- Here input parameters ------------------ # This is the Shandong University of Baidu posted in a post address #bdurl = ' http://tieba.baidu.com/p/2296017831?pn= ' #iPostBegin = 1 #iPostEnd = 10 bdurl = STR (raw_input (U ' Please enter the address of the post bar, remove the number after pn=: \ n ')) begin_page = int (raw_input (U ' enter start page: \ n ')) End_page = int (raw_input (U ' Please enter the end page: \ n ')) #-------- Here enter parameter ------- ----------- #调用 Baidu_tieba (bdurl,begin_page,end_page)