A set of processing methods for strings containing Chinese Characters

Source: Internet
Author: User
1,
STR = "AAA is ";
Re =/[\ u4e00-\ u9fa5]/g
If (Re. Test (STR ))
{
Alert ("with Chinese characters ")
}

2,
Simple Chinese Character judgment
Private Static int ischaracter (string word ){
Byte [] str_byte = NULL;
Str_byte = word. substring (0, 1). getbytes ();
If (str_byte.length = 2 ){
Return 1; // It is a Chinese character
} Else {
Return 0; // not a Chinese character
}
}

3,
<%
Public Function isallgb (s)
Dim P: Set P = new Regexp
P. Global = true
P. ignorecase = true
P. multiline = true
P. pattern = "[\ u4e00-\ u9fa5 \ ufe30-\ uffa0]"
Dim Q
Q = P. Replace (S ,"**")
Set P = nothing
If Len (q) <> Len (s) * 2 then
Isallgb = false
Else
Isallgb = true
End if
End Function
Response. Write isallgb ("s hello ")
%>

4,
<% STRs = "the character to be compared"
If strlen (STRs) <> Len (STRs) then
Response. Write "Chinese characters"
Else
Response. Write "English character"
End if
'*************************************** ***********
'Function name: strlen
'Usage: Evaluate the string length. Two Chinese characters are counted, and one English character is counted.
'Parameter: Str ---- string of the required length
'Return value: String Length
'*************************************** ***********
Public Function strlen (STR)
If isnull (STR) or str = "" then
Strlen = 0
Exit Function
End if
Dim winnt_chinese
Winnt_chinese = (LEN ("example") = 2)
If winnt_chinese then
L = Len (STR)
T = L
For I = 1 to L
C = ASC (mid (STR, I, 1 ))
If C <0 then C = C ++ 65536
If C> 255 then t = t + 1
Next
Strlen = T
Else
Strlen = Len (STR)
End if
End Function
%>

5,
If isgb (STR) then
Return true;
Else
Return false;
End if

6,
For I = 1 to Len (STR)
Char = (mid (STR, I, 1 ))
TMP = 65536 + ASC (char)
If TMP> = 45217 and TMP <= 62289 then
Msgbox "Chinese characters"
End if
Next

7,
Returns the ASCII value of this character.
If ~ Between 0x7 and English characters (including punctuation marks)

For GB2312-80 Encoding
The encoding range is high: 0xa1-0xfe; low: 0xa1-0xfe
The Chinese characters include 0xb0a1 and 0xf7fe.

8,
Cstring Ss = "Hello! ";
Bool BCH = false; // specifies whether a Chinese character exists.
For (INT I = 0; 1 <(INT) strlen (SS); I ++)
{
If (byte) ss [I]> = 0xa1)
BCH = true;
}

9,
Using system;
Using system. Text. regularexpressions;
Namespace Test
{
/// <Summary>
/// Summary of class10.
/// </Summary>
Public class class10
{
Public class10 ()
{
//
// Todo: add the constructor logic here
//
}
Public static string check (string pendingstring)
{
If (RegEx. ismatch (pendingstring, @ "[\ u4e00-\ u9fa5] + "))
{
Return "with Chinese characters ";
}
Else
{
Return "no Chinese characters ";
}
}
Static void main ()
{
String S = "I love my motherland";
Console. writeline (check (s ));

S = @ "abcdefodfl980883247) (* (^ & * ^ & $ % ^ $ % # $ ##@% &*&(&*)""::: \ http :";
Console. writeline (check (s ));

Console. Readline ();
}


}
}

10,
<Script language = JavaScript>
STR = "AAA is ";
If (/[\ u4e00-\ u9fa5]/. Test (STR ))
{
Alert ("with Chinese characters ")
}
</SCRIPT>

11,
String. Prototype. existchinese = function ()
{
// [\ U4e00-\ u9fa5] is the escape character, and [\ ufe30-\ uffa0] is the fullwidth character.
Return/[\ u4e00-\ u9fa5]/. Test (this );
}

12,
Public bool ischina (string cstring)

{

Bool boolvalue = false;

For (INT I = 0; I <cstring. length; I ++)


{

If (convert. toint32 (convert. tochar (cstring. substring (I, 1) <convert. toint32 (convert. tochar (128 )))

{

Boolvalue = false;

}

Else

{

Boolvalue = true;

}

}

Return boolvalue;

}

13,
Char. getunicodecategory (c) = unicodecategory. otherletter
Getunicodecategory can be used to determine the full-width symbols, characters, numbers, and so on. Recently, many of these questions have been asked. They all like to use two bytes for determination, but no one looks at my answers.
One lineCodeTo solve the problem.

14,
System. Text. regularexpressions. RegEx = new system. Text. regularexpressions. RegEx ("[\ u4e00-\ u9fa5]");
String replacedstring = RegEx. Replace (STR, ""); // If a specified encoded string exists, filter it out.
The regular expression is used for filtering. I use this code to filter Chinese characters. You can just change it.

15. Extract Chinese Characters

Private void button#click (Object sender, system. eventargs E)
{
If (txtin. Text! = "")
{
Int I = 0;
String strin = txtin. text;
String temp;
Byte [] array = new byte [2];
Txtout. Text = "";
For (I = 0; I <strin. length; I ++)
{
Temp = strin. substring (I, 1 );
Array = encoding. Default. getbytes (temp );
If (array. length! = 1)
{
Txtout. Text = txtout. Text + temp;
}
}
}
}
When the button is clicked, the Chinese characters in the string in txtin are displayed in txtout.

 

Foreach (char C in Str)
{
If (char. getunicodecategory (c) = unicodecategory. otherletter ){
Console. Write (C. tostring ());
}
}
The Chinese character is otherletter. If there are other language characters in the string, you can only get the U value to judge.

16,
// Calculate the actual length of the string. One Chinese character or fullwidth character is counted as two bytes.
Public static int getstringlength (string PARAM)
{
Asciiencoding n = new asciiencoding ();
Byte [] B = n. getbytes (PARAM );
Int L = 0; // L is the actual length of the string
For (INT I = 0; I <= B. Length-1; I ++)
{
If (B [I] = 63) // determines whether it is a Chinese character or full-legged symbol.
{
L ++;
}
L ++;
}
Return L;
}

// Strlen: the actual length of the string. Len: the length of the string to be retrieved. One Chinese character or fullwidth character is counted as two bytes.
Public static string getsubstring (string Param, int Len, ref int strlen)
{
String returnstr = "";
Bool issub = false;
Asciiencoding n = new asciiencoding ();
Byte [] B = n. getbytes (PARAM );
Int L = 0; // L is the actual length of the string
For (INT I = 0; I <= B. Length-1; I ++)
{
If (B [I] = 63) // determines whether it is a Chinese character or full-legged symbol.
{
L ++;
}
L ++;
If (issub = false) & (L = Len) | (L + 1) = Len )))
{
Returnstr = Param. substring (0, I + 1 );
Issub = true;
}
}

Strlen = L;
If (returnstr = "")
{
Returnstr = Param;
}
Return returnstr;
}

17,

Private void button#click (Object sender, system. eventargs E)
{
If (txtin. Text! = "")
{
Int I = 0;
String strin = txtin. text;
String temp;
Byte [] array = new byte [2];
Txtout. Text = "";
For (I = 0; I <strin. length; I ++)
{
Temp = strin. substring (I, 1 );
Array = encoding. Default. getbytes (temp );
If (array. length! = 1)
{
Txtout. Text = txtout. Text + temp;
}
}
}
}
When the button is clicked, the Chinese characters in the string in txtin are displayed in txtout.

Foreach (char C in Str)
{
If (char. getunicodecategory (c) = unicodecategory. otherletter ){
Console. Write (C. tostring ());
}
}
The Chinese character is otherletter. If there are other language characters in the string, you can only get the U value to judge.

18. Regular Expression (Chinese character, letter, and digit mixed verification)
First Chinese character,
2-4 digits are letters,
5th Chinese characters,
6-14 digits are numbers,
The last character is a Chinese character.

^ [\ U4e00-\ u9fa5 \ uf900-\ ufa2d] [A-Za-Z] {3} [\ u4e00-\ u9fa5 \ uf900-\ ufa2d] \ D {9} [\ u4e00-\ u9fa5 \ uf900-\ ufa2d] $
Or
^ [\ U4e00-\ u9fa5] [A-Za-Z] {3} [\ u4e00-\ u9fa5] \ D {9} [\ u4e00-\ u9fa5] $

Method 1:
Public static bool isnum (string Str)
{
For (INT I = 0; I <Str. length; I ++)
{
If (STR [I] <= '0' STR [I]> = '9 ')
Return false;
}
Return true;
}

Method 2:
Int. parse ();
If an error occurs, it cannot be a number.

Method 3:
Public static bool isnum (string Str)
{
For (INT I = 0; I <Str. length; I ++)
{

If (char. isnumber (STR, I) = false)
Return false;
Break;

}
Return true;
}

Method 4:
/// <Summary>
/// Check whether all items are numbers
/// </Summary>
/// <Param name = "str"> string to be checked </param>
/// <Returns> bool </returns>
Public static bool isnumeric (string Str)
{
If (STR = NULL)
{
Return false;
}
Else
{
RegEx Reg = new RegEx ("^ (-? \ D +) (\. \ D + )? $ ");
Return Reg. ismatch (STR );
}
}

Method 5:
Private RegEx regintegral = new RegEx ("^ \ D + $"); // integer 0 + Positive Integer

/// <Summary>
/// Whether the integer is 0 + Positive Integer
/// </Summary>
/// <Param name = "inputdata"> input string </param>
/// <Returns> </returns>
Public bool isintegral (string inputdata)
{
Match m = regintegral. Match (inputdata );
Return M. success;
}

Public static bool isnum (string Str)
{
Foreach (char C in Str)
{
If (C <= '0' | C> = '9 ')
Return false;
}
Return true;
}

Method 6:
Public static bool isnumber (char C)
{
If (! Char. islatin1 (c ))
{
Return Char. checknumber (charunicodeinfo. getunicodecategory (c ));
}
If (! Char. isascii (c ))
{
Return Char. checknumber (char. getlatin1unicodecategory (c ));
}
If (C> = '0 ')
{
Return (C <= '9 ');
}
Return false;
}

Source Code :
private void button9_click (Object sender, eventargs E)
{< br> int COUNT = 1000000;
bool BL = true;
string STR = textbox1.text; // if you want to test, modify
datetime time1 = datetime. now;
for (INT I = 0; I {< br> BL = isnum1 (STR );
}< br> datetime time2 = datetime. now;
for (INT I = 0; I {< br> BL = isnum2 (STR );
}< br> datetime time3 = datetime. now;
for (INT I = 0; I {< br> BL = isnum3 (STR );
}< br> datetime time4 = datetime. now;

timespan span1 = time2-time1;
timespan span2 = time3-time2;
timespan span3 = time4-time3;
MessageBox. show (span1.totalmilliseconds + "\ r \ n"
+ span2.totalmilliseconds + "\ r \ n"
+ span3.totalmilliseconds );
}< br> RegEx Reg = new RegEx (@ "^ \ D $", regexoptions. compiled);
bool isnum1 (string Str)
{< br> return Reg. ismatch (STR);
}< br> bool isnum2 (string Str)
{< br> foreach (Cha R c in Str)
{< br> If (! Char. isnumber (c)
{< br> return false;
}< BR >}< br> return true;
}< br> bool isnum3 (string Str)
{< br> foreach (char C in Str)
{< br> If (C <= '0' | C> = '9')
{< br> return false;
}< BR >}< br> return true;
}

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.