#filename Seek.py
import unicodedata
import sys
import os
class Seek():
"""
功能:尋找中文,並替換成指定字元或字串
使用方法:python指令碼用法
參數說明:
-d : 檔案目錄(絕對或相對路徑)(預設為指令碼所在目錄)
-t : 檔案類型(檔案名稱尾碼,如.jsp;.txt)(預設為所有檔案)
-sf : 是否包括子目錄(Y|N)(option,預設不包括子目錄)
-r : 輸出檔案名(option,預設為'ChineseCharacter.txt'),位於指令碼目錄下
-encoding:檔案字元編碼(預設為utf-8)
"""
def __init__(self):
"""
初始化尋找程式
參數解析
"""
#TODO:
self.d = '.'
self.sf = 'N'
self.t = 'ALL'
self.r = 'ChineseCharacter.txt'
self.encoding = 'utf-8'
varL = 0
for ar in sys.argv:
if ar == '-d':
self.d = sys.argv[varL+1]
continue
if ar =='-sf':
sf = sys.argv[varL+1].upper()
if (sf == 'Y') | (sf == 'N'):
self.sf = sf
else:
print('input error with sf parameter')
continue
if ar=='-r':
self.r = sys.argv[varL+1]
continue
if ar=='-t':
self.t = sys.argv[varL+1]
continue
if ar == '-encoding':
self.encoding = sys.argv[varL+1]
continue
varL+=1
def seeking(self):
""""
開始尋找字元
"""
try:
#output file
self.rfile = open(self.r,'w',encoding=self.encoding)
#start seek
for f in os.listdir(self.d):
path = os.path.join(self.d,f)
if self.__isFile(path):
if self.t != 'ALL':
if f.endswith(self.t):
self.__seek(path)
else:
self.__seek(path)
elif self.__isDir(path) and self.sf == 'Y' :
#seek the sub folder when the self.sf equals 'Y'
self.start(path)
except Exception as error:
print('seek error %s' % error)
finally:
self.__close()
def __close(self):
"""
關閉檔案及輸入資料流和輸出資料流
"""
#close the stream and file
self.rfile.close()
def __isFile(self,file):
#
return os.path.isfile(file)
def __isDir(self,path):
#
return os.path.isdir(path)
def __openFile(self,file):
pass
def __closeFile(self,file):
file.close()
def __seek(self,file):
"""
尋找
"""
#seek character
fileObj = open(file,'r',encoding=self.encoding)
lineList = fileObj.readlines()
#塊注釋標記
blockComment = 'finish'
try:
isC = False
for line in lineList:
#尋找出注釋部分,並跳過
#
#跳過'/*'和'*/'中的內容,處理剩餘的內容
if blockComment == 'start':
#塊注釋內容
index = line.find('*/')
if index != -1:
blockComment = 'finish'
#塊注釋結束
#處理當前行'*/'後的內容
line = line[index+2:]
else:
#仍處於塊注釋內容中,跳過
continue
if line.startswith('//'):
#行注釋
#跳過行
continue
if line.startswith('/*'):
#塊注釋開始
blockComment = 'start'
continue
#尋找字元
indexTag = 0;
for s in line:
sIndex = line.index(s)
try:
#將不是LATIN開頭的字元都找出來
if unicodedata.name(s).startswith('CJK') ==
True:
#TODO
#content = lineList.index(line)+1+s
isC = True
#如果兩個字元間隔大於1,表示為不連續的中文
if (sIndex - indexTag) > 1 :
self.__writeFile('/t'+s)
else:
self.__writeFile(s)
indexTag = sIndex
except Exception as error:
print('seek character error : %s in %s' %
(error,fileObj.name))
continue
if isC:
for t in range(8):
self.__writeFile('/t')
self.__writeFile('line:')
self.__writeFile('%d' % (lineList.index(line)+1))
self.__writeFile('/n')
isC = False
finally:
self.__writeFile('/n')
self.__writeFile('------------'+fileObj.name)
self.__writeFile('/n')
fileObj.close()
def __writeFile(self,content):
self.rfile.write(content)
if __name__ == '__main__':
seek=Seek()
seek.seeking()