Python checks whether unicode is a Chinese character, number, English, or other character,

Source: Internet
Author: User
Tags ustring

Python checks whether unicode is a Chinese character, number, English, or other character,

The following tool determines whether unicode is a Chinese character, number, English, or other character. Full-width to half-width. Unicode string normalization.

 

#! /Usr/bin/env python

#-*-Coding: GBK -*-

 

"Chinese character processing tool:

Determines whether unicode is a Chinese character, number, English, or other character.

Full-width to half-width. """

Def is_chinese (uchar ):

 "Determining whether a unicode character is a Chinese character """

 If uchar> = u'u4e00' and uchar <= u'u9fa5 ':

 Return True

 Else:

 Return False

 

Def is_number (uchar ):

 "Determining whether a unicode is a number """

 If uchar> = u'u0030' and uchar <= u'u0039 ':

 Return True

 Else:

 Return False

 

Def is_alphabet (uchar ):

 "Determining whether a unicode character is an English letter """

 If (uchar> = u'u0041 'and uchar <= u'u005a') or (uchar> = u'u0061 'and uchar <= u'u007a '):

 Return True

 Else:

 Return False

 

Def is_other (uchar ):

 "Judge whether it is not a Chinese character, number or English character """

 If not (is_chinese (uchar) or is_number (uchar) or is_alphabet (uchar )):

 Return True

 Else:

 Return False

 

Def B2Q (uchar ):

 "Halfwidth to fullwidth """

 Inside_code = ord (uchar)

 If inside_code <0x0020 or inside_code> 0x7e:# Returns the original character if it is not a halfwidth character

 Return uchar

 If inside_code = 0x0020: # except for spaces, the formula for the full-width half-width is: half-width = full-width-0xfee0.

 Inside_code = 0x3000

 Else:

 Inside_code + = 0xfee0

 Return unichr (inside_code)

 

Def Q2B (uchar ):

 "Turn all corners to half corners """

 Inside_code = ord (uchar)

 If inside_code = 0x3000:

 Inside_code = 0x0020

 Else:

 Inside_code-= 0xfee0

 If inside_code <0x0020 or inside_code> 0x7e:# The original character is returned after the conversion is not a halfwidth character

 Return uchar

 Return unichr (inside_code)

 

Def stringQ2B (ustring ):

 "Turn full-width character strings to half-width """

 Return "". join ([Q2B (uchar) for uchar in ustring])

 

Def uniform (ustring ):

 "Format the string to complete full-width, half-width, and lowercase conversion """

 Return stringQ2B (ustring). lower ()

 

Def string2List (ustring ):

 "Separate ustring from Chinese characters, letters, and numbers """

 RetList = []

 Utmp = []

 For uchar in ustring:

 If is_other (uchar ):

 If len (utmp) = 0:

 Continue

 Else:

 RetList. append ("". join (utmp ))

 Utmp = []

 Else:

 Utmp. append (uchar)

 If len (utmp )! = 0:

 RetList. append ("". join (utmp ))

 Return retList

 

If _ name __= = "_ main __":

 # Test Q2B and B2Q

 For I in range (0x0020, 0x007F ):

 Print Q2B (B2Q (unichr (I), B2Q (unichr (I ))

 

 # Test uniform

 Ustring = u'high frequency A' of Chinese name'

 Ustring = uniform (ustring)

 Ret = string2List (ustring)

 Print ret

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.