# Filename seek. py
Import unicodedata
Import sys
Import OS
Class seek ():
"""
Function: Search for Chinese characters and replace them with specified characters or strings.
Usage: Python script usage
Parameter description:
-D: file directory (absolute or relative path) (the directory where the script is located by default)
-T: file suffix (for example, .jsp;.txt) (all files by default)
-SF: whether to include sub-directories (Y | N) (option, excluding sub-directories by default)
-R: The output file name (option,'chinesecharacter.txt '), which is located in the script directory.
-Encoding: file character encoding (default: UTF-8)
"""
Def _ init _ (Self ):
"""
Initialize the lookup Program
Parameter Parsing
"""
# Todo:
Self. d = '.'
Self. Sf = 'n'
Self. t = 'all'
Self. r = 'chinesecharacter.txt'
Self. Encoding = 'utf-8'
Varl = 0
For Ar in SYS. argv:
If Ar = '-d ':
Self. d = SYS. argv [varl + 1]
Continue
If Ar = '-SF ':
Sf = SYS. argv [varl + 1]. Upper ()
If (Sf = 'y') | (Sf = 'n '):
Self. Sf = SF
Else:
Print ('input error with SF parameter ')
Continue
If Ar = '-R ':
Self. r = SYS. argv [varl + 1]
Continue
If Ar = '-t ':
Self. t = SYS. argv [varl + 1]
Continue
If Ar = '-encoding ':
Self. Encoding = SYS. argv [varl + 1]
Continue
Varl + = 1
Def seeking (Self ):
""""
Start searching characters
"""
Try:
# Output file
Self. rfile = open (self. R, 'w', encoding = self. Encoding)
# Start seek
For f in OS. listdir (self. d ):
Path = OS. Path. Join (self. d, f)
If self. _ isfile (PATH ):
If self. T! = 'All ':
If F. endswith (self. T ):
Self. _ seek (PATH)
Else:
Self. _ seek (PATH)
Elif self. _ isdir (PATH) and self. Sf = 'y ':
# Seek the sub folder when the self. SF equals 'y'
Self. Start (PATH)
Failed t exception as error:
Print ('seek error % s' % error)
Finally:
Self. _ close ()
Def _ close (Self ):
"""
Close file and input streams and output streams
"""
# Close the stream and file
Self. rfile. Close ()
Def _ isfile (self, file ):
#
Return OS. Path. isfile (file)
Def _ isdir (self, PATH ):
#
Return OS. Path. isdir (PATH)
Def _ openfile (self, file ):
Pass
Def _ closefile (self, file ):
File. Close ()
Def _ seek (self, file ):
"""
Search
"""
# Seek character
Fileobj = open (file, 'R', encoding = self. Encoding)
Linelist = fileobj. readlines ()
# Block comment mark
Blockcomment = 'hangzhou'
Try:
ISC = false
For line in linelist:
# Find the comments and skip
#
# Skip '/*' and '*/' to process the Remaining Content
If blockcomment = 'start ':
# Block comment content
Index = line. Find ('*/')
If index! =-1:
Blockcomment = 'hangzhou'
# Block comment ended
# Process the content after '*/' of the current row
Line = line [index + 2:]
Else:
# It is still in the block comment content, skip
Continue
If line. startswith ('//'):
# Line comment
# Skip rows
Continue
If line. startswith ('/*'):
# Start of block Annotation
Blockcomment = 'start'
Continue
# Search for characters
Indextag = 0;
For s in line:
Sindex = line. Index (s)
Try:
# Search for all characters starting with not Latin
If unicodedata. Name (s). startswith ('cjk ') =
True:
# Todo
# Content = linelist. Index (line) + 1 + S
ISC = true
# If the interval between two characters is greater than 1, it indicates a non-consecutive Chinese character.
If (sindex-indextag)> 1:
Self. _ writefile ('/t' + S)
Else:
Self. _ writefile (s)
Indextag = sindex
Failed t exception as error:
Print ('seek character error: % s in % s' %
(Error, fileobj. Name ))
Continue
If ISC:
For T in range (8 ):
Self. _ writefile ('/t ')
Self. _ writefile ('line :')
Self. _ writefile ('% d' % (linelist. Index (line) + 1 ))
Self. _ writefile ('/N ')
ISC = false
Finally:
Self. _ writefile ('/N ')
Self. _ writefile ('------------' + fileobj. Name)
Self. _ writefile ('/N ')
Fileobj. Close ()
Def _ writefile (self, content ):
Self. rfile. Write (content)
If _ name _ = '_ main __':
Seek = seek ()
Seek. Seeking ()