urllib讀取網頁,然後用Py-excel寫excel。
import urllibfrom xlwt import Workbookimport datetimedef FetchData(): book = Workbook(encoding='gbk') #如果採集資料有中文,需要添加這個 sheet1 = book.add_sheet('Sheet 2') #表格緩衝 i = 0 theday = datetime.date(2009,12,31) while i < 100: #這邊的情境就是採集100個網頁,每個網址都包含日期 i += 1 theday = theday + datetime.timedelta(days = 1) print theday theday_str = str(theday) sheet1.write(i,0,theday_str) #寫表格 check_url = r'http://www.xxx.com/index?date=' + theday_str #網頁地址 try: checkfile = urllib.urlopen(check_url) #網頁儲存為文字檔 except Exception,e: print e return type = sys.getfilesystemencoding() for line in checkfile: line = line.decode("UTF-8").encode(type) #網頁編碼為UTF-8 date_west = getdata('date_west', line) #擷取特定資料 if date_west != False: sheet1.write(i,1,date_west) book.save('simple.xls') #儲存excel檔案 print 'finish!''if keywords in the line, get data from > to </'def getdata(keywords, line): data = '' if keywords in line: start = line.find('>',) end = line.find('</', start) data = line[start+1:end] return data return False