Chinese, verification, php code, instance Chinese, verification, php code, instance
Script ec (2); script
Encoding range: 1. GBK (GB2312/GB18030)
Pstutorial: Korean is a character greater than [u9fa5]
Regular Expression example:
Preg_replace ("/([x80-xff])/", ", $ str );
Preg_replace ("/([u4e00-u9fa5])/", ", $ str );
X00-xff GBK dubyte encoding range
X20-x7f (ASCII)
Xa1-xff (Chinese)
X80-xff (Chinese)
2. UTF-8 (Unicode)
U4e00-u9fa5)
X3130-x318F (Korean
XAC00-xD7A3 (Korean)
U0800-u4e00 (Japanese)
// Determine whether the content contains Chinese characters-GBK (PHP)
Function check_is_chinese ($ s ){
Return preg_match ('/[x80-xff]./', $ s );
}
Returns the length of a single character.
// Obtain the string length-GBK (PHP)
Function gb_strlen ($ str ){
$ Count = 0;
For ($ I = 0; $ I $ S = substr ($ str, $ I, 1 );
If (preg_match ("/[x80-xff]/", $ s) ++ $ I;
+ + $ Count;
}
Return $ count;
}
// Count the length of the string-UTF8 (PHP)
Function utf8_strlen ($ str ){
$ Count = 0;
For ($ I = 0; $ I <strlen ($ str); $ I ++ ){
$ Value = ord ($ str [$ I]);
If ($ value & gt; 127 ){
$ Count ++;
If ($ value >=192 & $ value <= 223) $ I ++;
Elseif ($ value >=224 & $ value <= 239) $ I = $ I + 2;
Elseif ($ value >=240 & $ value <= 247) $ I = $ I + 3;
Else die ('not a UTF-8 compatible string ');
}
$ Count ++;
}
Return $ count;
}
// Truncate the string-GBK (PHP)
Function gb_substr ($ str, $ len ){
$ Count = 0;
For ($ I = 0; $ I If ($ count = $ len) break;
If (preg_match ("/[x80-xff]/", substr ($ str, $ I, 1) + + $ I;
+ + $ Count;
}
Return substr ($ str, 0, $ I );
}
// Truncate the string-UTF8 (PHP)
Function utf8_substr ($ str, $ position, $ length ){
$ Start_position = strlen ($ str );
$ Start_byte = 0;
$ End_position = strlen ($ str );
$ Count = 0;
For ($ I = 0; $ I <strlen ($ str); $ I ++ ){
If ($ count >=$ position & $ start_position> $ I ){
$ Start_position = $ I;
$ Start_byte = $ count;
}
If ($ count-$ start_byte)> = $ length ){
$ End_position = $ I;
Break;
}
$ Value = ord ($ str [$ I]);
If ($ value & gt; 127 ){
$ Count ++;
If ($ value >=192 & $ value <= 223) $ I ++;
Elseif ($ value >=224 & $ value <= 239) $ I = $ I + 2;
Elseif ($ value >=240 & $ value <= 247) $ I = $ I + 3;
Else die ('not a UTF-8 compatible string ');
}
$ Count ++;
}
Return (substr ($ str, $ start_position, $ end_position-$ start_position ));
}
// Determine whether a Chinese character-GBK (JavaScript) exists)
Function check_chinese_char (s ){
Return (s. length! = S. replace (/[^ x00-xff]/g, "**"). length );
}
// Determine if there is a Korean-UTF-8 (JavaScript)
Function checkKoreaChar (str ){
For (I = 0; I If (str. charCodeAt (I)> 0x3130 & str. charCodeAt (I) <0x318F) | (str. charCodeAt (I)> = 0xAC00 & str. charCodeAt (I) <= 0xD7A3 ))){
Return true;
}
}
Return false;
}