#!D:\Python32# -*- coding: utf-8-*-# 過濾JAVA程式中的注釋# 如果字串中有注釋符號的話會有問題。import osimport reimport io# 改這個目錄!!!top_dir = "E:\\work2\\";# 狀態S_INIT = 0;S_SLASH = 1;S_BLOCK_COMMENT = 2;S_BLOCK_COMMENT_DOT = 3;S_LINE_COMMENT = 4;S_STR = 5;S_STR_ESCAPE = 6;def trim_dir(path): print("dir:" + path); for root, dirs, files in os.walk(path): for name in files: trim_file(os.path.join(root, name)) #for name in dirs: #trim_dir(os.path.join(root, name))def trim_file(path): print("file:" + path); if re.match(r".*?\.(java|c|cpp|h)$", path): print("process"); else: print("ignore"); return; bak_file = path + ".bak"; try: os.rename(path, bak_file); except: print "bak except",bak_file; fp_src = open(bak_file); fp_dst = open(path, 'w'); state = S_INIT; for line in fp_src.readlines(): for c in line: if state == S_INIT: if c == '/': state = S_SLASH; elif c == '"': state = S_STR; fp_dst.write(c); else: fp_dst.write(c); elif state == S_SLASH: if c == '*': state = S_BLOCK_COMMENT; elif c == '/': state = S_LINE_COMMENT; else: fp_dst.write('/'); fp_dst.write(c); state=S_INIT; elif state == S_BLOCK_COMMENT: if c == '*': state = S_BLOCK_COMMENT_DOT; elif state == S_BLOCK_COMMENT_DOT: if c == '/': state = S_INIT; elif c=='*': state=S_BLOCK_COMMENT_DOT; #再次碰到*號還是要繼續狀態,否則會出錯 else: state = S_BLOCK_COMMENT; elif state == S_LINE_COMMENT: if c == '\n': state = S_INIT; fp_dst.write(c); elif state == S_STR: if c == '\\': state = S_STR_ESCAPE; elif c == '"': state = S_INIT; fp_dst.write(c); elif state == S_STR_ESCAPE: # 這裡未完全實現全部序列,如\oNNN \xHH \u1234 \U12345678,但沒影響 state = S_STR; fp_dst.write(c); fp_src.close(); fp_dst.close(); #os.remove(bak_file);trim_dir(top_dir);
需要說明的是這段程式來自:http://blog.csdn.net/codearhat/article/details/6852483#comments
但是裡面有兩個問題,會引起錯誤,現在經驗證可以在我的項目中使用,但是不保證完全沒有錯誤,有錯誤希望和我聯絡,也可以和原作者聯絡