Text processing often determines whether a string is a letter, a number, or a Chinese character, and the Python str module provides methods but does not support Unicode, so imitating the Str method enables the kanji/digit/alphanumeric method of Unicode strings, as well as full-width half-width conversions.
#!/usr/bin/env python #-*-coding:utf-8-*-import sys #set default encoding as UTF-8 reload (SYS) Sys.setdefaultencodi Ng (' utf-8 ') #judge a character is a Chinese character def Is_chinese (Uchar): If Len (uchar)!= 1:raise, TypeError, ' Expe
CTED a character, but a string found! ' If Uchar >= u ' \u4e00 ' and Uchar <= u ' \u9fa5 ': return True else:return False #Judge A ustr are all Chinese def Is_all_chinese (USTR): For Uchar in Ustr:if not Is_chinese (UCHAR): Return False to True #Judge a char is a n
Umber def is_digit (uchar): If Len (uchar)!= 1:raise, ' TypeError a expected, character a string but ' If Uchar >= u ' \u0030 ' and Uchar<=u ' \u0039 ': return True else:return False #Judge a str are all num def is_all_ Digit (USTR): For Uchar in Ustr:if not Is_digit (UCHAR): Return False to True #Judge a char is a alphabet def
Is_alpha (UCHAR): If Len (uchar)!= 1:raise TypeError, ' expected a character, but a string found! ' if (Uchar ≫= u ' \u0041 ' and Uchar<=u ' \u005a ') or \ (uchar >= u ' \u0061 ' and Uchar<=u ' \u007a '): Return True Else:re Turn False #Judge A str is all Alphabet def is_all_alpha (USTR): For Uchar in Ustr:if not Is_alpha (UCHAR): return False return True #半角转全角 def b2q (uchar): If Len (uchar)!= 1:raise TypeError, ' expected a character, but a string
found! ' Inner_code = Ord (uchar) if Inner_code < 0x0020 or Inner_code > 0x7e: #不是半角字符就返回原来的字符 return UCHAR if Inner_ Code = = 0x0020: #除了空格其他的全角半角的公式为: half-width = full-width -0xfee0 Inner_code = 0x3000 Else:inner_code + + 0xfee0 return UNICHR (inner_c
ODE) #全角转半角 def q2b (uchar): If Len (uchar)!= 1:raise, ' TypeError a expected, character a string but ' Inner_code = Ord (uchar) If Inner_code = = 0x3000:inner_code = 0x0020 Else:inner_code-= 0xfee0 if Inner_code < 0x0020 or Inner_code > 0x7e: #转完之后不是半角字符返回原来的字符 return Uchar return UNICHR (inner_code) #把字符串全角转半角 def str INGQ2B (ustring): RETUrn '. Join ([Q2B (Uchar) for Uchar in ustring]) #main function if __name__ = ' __main__ ': Pass