#-*-Coding: UTF-8 -*-#---------------------------------------# Program : Baidu Post crawler # version: 0.1 # Author: Why # Date: 2013-05-14 # language: Python 2.7 # operation: Enter the address with pagination and remove the last number, set the start and end pages. # Function: Download all pages on the corresponding page and store them as HTML files. # Define import string, urllib2 # define Baidu function def baidu_tieba (URL, begin_page, end_page): for I in range (begin_page, end_page + 1): sname = string. zfill (I, 5) + '.html '# automatically fill in the six-digit file name print 'Download' + STR (I) +, and store it as '+ sname + '...... 'F = open (sname, 'W + ') M = urllib2.urlopen (URL + STR (I )). read () F. write (m) F. close () # -------- enter the parameter ------------------ # This is the address of a post in Baidu Post Bar of Shandong University # Bdurl = 'HTTP: // tieba.baidu.com/p/2296017831? Pn = '# ipostbegin = 1 # ipostend = 10 bdurl = STR (raw_input (U' enter the address of the clipboard and remove the number \ n' After Pn = ')) begin_page = int (raw_input (U' enter the start page number: \ n') end_page = int (raw_input (U' enter the end page number: \ n ')) # -------- enter the parameter ---------------- # Call baidu_tieba (bdurl, begin_page, end_page)