Search for Chinese Characters in Python

Source: Internet
Author: User

# Filename seek. py
Import unicodedata

Import sys
Import OS
Class seek ():
"""
Function: Search for Chinese characters and replace them with specified characters or strings.
Usage: Python script usage
Parameter description:
-D: file directory (absolute or relative path) (the directory where the script is located by default)
-T: file suffix (for example, .jsp;.txt) (all files by default)
-SF: whether to include sub-directories (Y | N) (option, excluding sub-directories by default)
-R: The output file name (option,'chinesecharacter.txt '), which is located in the script directory.
-Encoding: file character encoding (default: UTF-8)

"""

Def _ init _ (Self ):
"""
Initialize the lookup Program
Parameter Parsing

"""
# Todo:
Self. d = '.'
Self. Sf = 'n'
Self. t = 'all'
Self. r = 'chinesecharacter.txt'
Self. Encoding = 'utf-8'
Varl = 0
For Ar in SYS. argv:
If Ar = '-d ':
Self. d = SYS. argv [varl + 1]
Continue

If Ar = '-SF ':
Sf = SYS. argv [varl + 1]. Upper ()
If (Sf = 'y') | (Sf = 'n '):
Self. Sf = SF
Else:
Print ('input error with SF parameter ')
Continue

If Ar = '-R ':
Self. r = SYS. argv [varl + 1]
Continue

If Ar = '-t ':
Self. t = SYS. argv [varl + 1]
Continue

If Ar = '-encoding ':
Self. Encoding = SYS. argv [varl + 1]
Continue

Varl + = 1

Def seeking (Self ):
""""
Start searching characters

"""
Try:
# Output file
Self. rfile = open (self. R, 'w', encoding = self. Encoding)
# Start seek
For f in OS. listdir (self. d ):
Path = OS. Path. Join (self. d, f)
If self. _ isfile (PATH ):
If self. T! = 'All ':
If F. endswith (self. T ):
Self. _ seek (PATH)
Else:
Self. _ seek (PATH)
Elif self. _ isdir (PATH) and self. Sf = 'y ':
# Seek the sub folder when the self. SF equals 'y'
Self. Start (PATH)
Failed t exception as error:
Print ('seek error % s' % error)
Finally:
Self. _ close ()


Def _ close (Self ):
"""
Close file and input streams and output streams

"""
# Close the stream and file
Self. rfile. Close ()

Def _ isfile (self, file ):
#
Return OS. Path. isfile (file)

Def _ isdir (self, PATH ):
#
Return OS. Path. isdir (PATH)

Def _ openfile (self, file ):
Pass

Def _ closefile (self, file ):
File. Close ()

Def _ seek (self, file ):
"""
Search

"""
# Seek character
Fileobj = open (file, 'R', encoding = self. Encoding)
Linelist = fileobj. readlines ()
# Block comment mark
Blockcomment = 'hangzhou'
Try:
ISC = false
For line in linelist:
# Find the comments and skip
#
# Skip '/*' and '*/' to process the Remaining Content
If blockcomment = 'start ':
# Block comment content
Index = line. Find ('*/')
If index! =-1:
Blockcomment = 'hangzhou'
# Block comment ended
# Process the content after '*/' of the current row
Line = line [index + 2:]
Else:
# It is still in the block comment content, skip
Continue
If line. startswith ('//'):
# Line comment
# Skip rows
Continue
If line. startswith ('/*'):
# Start of block Annotation
Blockcomment = 'start'
Continue

# Search for characters
Indextag = 0;
For s in line:
Sindex = line. Index (s)
Try:
# Search for all characters starting with not Latin
If unicodedata. Name (s). startswith ('cjk ') =
True:
# Todo
# Content = linelist. Index (line) + 1 + S
ISC = true
# If the interval between two characters is greater than 1, it indicates a non-consecutive Chinese character.
If (sindex-indextag)> 1:
Self. _ writefile ('/t' + S)
Else:
Self. _ writefile (s)
Indextag = sindex
Failed t exception as error:
Print ('seek character error: % s in % s' %
(Error, fileobj. Name ))
Continue
If ISC:
For T in range (8 ):
Self. _ writefile ('/t ')
Self. _ writefile ('line :')
Self. _ writefile ('% d' % (linelist. Index (line) + 1 ))
Self. _ writefile ('/N ')
ISC = false

Finally:
Self. _ writefile ('/N ')
Self. _ writefile ('------------' + fileobj. Name)
Self. _ writefile ('/N ')
Fileobj. Close ()

Def _ writefile (self, content ):
Self. rfile. Write (content)


If _ name _ = '_ main __':
Seek = seek ()
Seek. Seeking ()

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.