#! D: \ python32 #-*-coding: UTF-8-*-# filter comments in the Java program # If the strings contain comments, the problem may occur. Import osimport reimport Io # modify this directory !!! Top_dir = "e :\\ work2 \\"; # status s_init = 0; s_slash = 1; s_block_comment = 2; s_block_comment_dot = 3; s_line_comment = 4; s_str = 5; s_str_escape = 6; def trim_dir (PATH): Print ("dir:" + path); for root, dirs, files in OS. walk (PATH): For name in files: trim_file (OS. path. join (root, name) # For name in dirs: # trim_dir (OS. path. join (root, name) def trim_file (PATH): Print ("file:" + path); If re. match (R ". *? \. (Java | c | CPP | h) $ ", PATH): Print (" process "); else: Print (" Ignore "); return; bak_file = path + ". bak "; try: OS. rename (path, bak_file); doesn t: Print "Bak failed t", bak_file; fp_src = open (bak_file); fp_dst = open (path, 'w'); State = s_init; for line in fp_src.readlines (): For C in line: If State = s_init: If C = '/': State = s_slash; Elif c = '"': state = s_str; fp_dst.write (c); else: fp_dst.write (c); Elif state = s_slash: If C = '*': State = s_block_comment; elif c = '/': State = s_line_comment; else: fp_dst.write ('/'); fp_dst.write (c); State = s_init; Elif state = s_block_comment: if C = '*': State = s_block_comment_dot; Elif state = s_block_comment_dot: If C = '/': State = s_init; Elif c = '*': state = s_block_comment_dot; # whether to continue when * is re-encountered, or else: State = s_block_comment; Elif state = s_line_comment: If C = '\ N ': state = s_init; fp_dst.write (c); Elif state = s_str: If C = '\': State = s_str_escape; Elif c = '"': State = s_init; fp_dst.write (c); Elif state = s_str_escape: # All sequences are not fully implemented here, such as \ onnn \ xhh \ u1234 \ u12345678, but the state = s_str is not affected; fp_dst.write (c); fp_src.close (); fp_dst.close (); # OS. remove (bak_file); trim_dir (top_dir );
It should be noted that this program comes from: http://blog.csdn.net/codearhat/article/details/6852483#comments
But there are two problems in it, which will cause errors. Now it has been verified that it can be used in my project, but it is not guaranteed that there is no error at all. If there is an error, please contact me or the original author.