這幾天需要對實驗室叢集中機器的資料進行處理,藉此機會熟悉下python這個語言。實驗室叢集中機器各異,python版本也很多樣化,總共有四個版本。寫的程式調成了三種樣子,才得以在各個機器上跑完。記錄下常用的代碼。
一個在2.7中可以啟動並執行代碼:
import globimport osimport shutilimport reoutlinkPath="/POOL_Temp_space/xzm/run/"putPosition="/POOL_Temp_space/lyn/infoMall/allOutlinksFile/"def fun(path): #path為要遍曆的頂端目錄路徑 patt=re.compile(r'outlinks.\d+$') fileList=[] for root, dirs, files in os.walk(outlinkPath): for fn in files: if patt.match(fn): #用Regex篩選檔案名稱 fileList.append(root+"/"+fn) return fileList #返回完整檔案路徑列表fileList=fun(outlinkPath)for filePath in fileList: fileName='' for c in filePath: if not (c.isalpha() or c.isdigit()): fileName+='_' else: fileName+=c if not os.path.exists(putPosition): os.makedirs(putPosition) open(putPosition+fileName,'w') #2.7中可以不用這句,下一局會自動建立檔案,但是2.6中需要在沒有此檔案時先建立下 shutil.copyfile(filePath,putPosition+fileName) #將filePath檔案拷貝到相應位置
os.walk() 較低版本的python不支援,所以寫了一個在2.2中可以啟動並執行代碼,如果在2.3中運行,需要去掉第一句話:
from __future__ import generators #2.2需要加此句才可以用yield,更高版本的不用此句import globimport osimport shutilimport reimport sysoutlinkPath="/POOL_Temp_space/xzm/run/"putPosition="/POOL_Temp_space/lyn/infoMall/allOutlinksFile/"fileList=[]def walktree(top = ".", depthfirst = True): #遞迴的遍曆 try: import stat, types names = os.listdir(top) if not depthfirst: yield top, names for name in names: try: st = os.lstat(os.path.join(top, name)) except os.error: continue if stat.S_ISDIR(st.st_mode): for (newtop, children) in walktree (os.path.join(top, name), depthfirst): yield newtop, children if depthfirst: yield top, names except os.error: yield top, []patt=re.compile('outlinks.\d+$')if not os.path.exists(putPosition): os.makedirs(putPosition)for top, names in walktree(outlinkPath):#遍曆產生的檔案元組 for name in names: filePath=top+'/'+name if not patt.match(name): continue fileName='' for c in filePath: if not (c.isalpha() or c.isdigit()): fileName+='_' else: fileName+=c open(putPosition+fileName,'w') shutil.copyfile(filePath,putPosition+fileName)