There are various types of codes commonly used in development, such as uft8-> gbk to utf8 traditional to simplified to traditional utf8 to unicode gbk to pinyin Ascii to pinyin, etc.
<? Php Tutorial
/**
* Utf8 to gbk
* @ Param $ utfstr
*/
Function utf8_to_gbk ($ utfstr ){
Global $ UC2GBTABLE;
$ Okstr = '';
If (empty ($ UC2GBTABLE )){
$ Filename = CODETABLEDIR. 'GB-unicode. Table ';
$ Fp = fopen ($ filename, 'RB ');
While ($ l = fgets ($ fp, 15 )){
$ UC2GBTABLE [hexdec (substr ($ l, 7, 6)] = hexdec (substr ($ l, 0, 6 ));
}
Fclose ($ fp );
}
$ Okstr = '';
$ Ulen = strlen ($ utfstr );
For ($ I = 0; $ I <$ ulen; $ I ++ ){
$ C = $ utfstr [$ I];
$ Cb = decbin (ord ($ utfstr [$ I]);
If (strlen ($ cb) = 8 ){
$ Csize = strpos (decbin (ord ($ cb), '0 ');
For ($ j = 0; $ j <$ csize; $ j ++ ){
$ I ++;
$ C. = $ utfstr [$ I];
}
$ C = utf8_to_unicode ($ c );
If (isset ($ UC2GBTABLE [$ c]) {
$ C = dechex ($ UC2GBTABLE [$ c] + 0x8080 );
$ Okstr. = chr (hexdec ($ c [0]. $ c [1]). chr (hexdec ($ c [2]. $ c [3]);
} Else {
$ Okstr. = '& #'. $ c .';';
}
} Else {
$ Okstr. = $ c;
}
}
$ Okstr = trim ($ okstr );
Return $ okstr;
}
/**
* Convert gbk to utf8
* @ Param $ gbstr
*/
Function gbk_to_utf8 ($ gbstr ){
Global $ CODETABLE;
If (empty ($ CODETABLE )){
$ Filename = CODETABLEDIR. 'GB-unicode. Table ';
$ Fp = fopen ($ filename, 'RB ');
While ($ l = fgets ($ fp, 15 )){
$ CODETABLE [hexdec (substr ($ l, 0, 6)] = substr ($ l, 7, 6 );
}
Fclose ($ fp );
}
$ Ret = '';
$ Utf8 = '';
While ($ gbstr ){
If (ord (substr ($ gbstr, 0, 1)> 0x80 ){
$ ThisW = substr ($ gbstr, 0, 2 );
$ Gbstr = substr ($ gbstr, 2, strlen ($ gbstr ));
$ Utf8 = '';
@ $ Utf8 = unicode_to_utf8 (hexdec ($ CODETABLE [hexdec (bin2hex ($ thisW)-0x8080]);
If ($ utf8! = ''){
For ($ I = 0; $ I <strlen ($ utf8); $ I + = 3) $ ret. = chr (substr ($ utf8, $ I, 3 ));
}
} Else {
$ Ret. = substr ($ gbstr, 0, 1 );
$ Gbstr = substr ($ gbstr, 1, strlen ($ gbstr ));
}
}
Return $ ret;
}
/**
* Traditional to simplified
* @ Param $ Text
*/
Function big5_to_gbk ($ Text ){
Global $ BIG5_DATA;
If (empty ($ BIG5_DATA )){
$ Filename = CODETABLEDIR. 'Big5-gb. Table ';
$ Fp = fopen ($ filename, 'RB ');
$ BIG5_DATA = fread ($ fp, filesize ($ filename ));
Fclose ($ fp );
}
$ Max = strlen ($ Text)-1;
For ($ I = 0; $ I <$ max; $ I ++ ){
$ H = ord ($ Text [$ I]);
If ($ h> = 0x80 ){
$ L = ord ($ Text [$ I + 1]);
If ($ h = 161 & $ l = 64 ){
$ Gbstr = '';
} Else {
$ P = ($ h-160) * 510 + ($ L-1) * 2;
$ Gbstr = $ BIG5_DATA [$ p]. $ BIG5_DATA [$ p + 1];
}
$ Text [$ I] = $ gbstr [0];
$ Text [$ I + 1] = $ gbstr [1];
$ I ++;
}
}
Return $ Text;
}
/**
* Simplified to Traditional Chinese
* @ Param $ Text
*/
Function gbk_to_big5 ($ Text ){
Global $ GB_DATA;
If (empty ($ GB_DATA )){
$ Filename = CODETABLEDIR. 'GB-big5.table ';
$ Fp = fopen ($ filename, 'RB ');
$ Gb = fread ($ fp, filesize ($ filename ));
Fclose ($ fp );
}
$ Max = strlen ($ Text)-1;
For ($ I = 0; $ I <$ max; $ I ++ ){
$ H = ord ($ Text [$ I]);
If ($ h> = 0x80 ){
$ L = ord ($ Text [$ I + 1]);
If ($ h = 161 & $ l = 64 ){
$ Big = '';
} Else {
$ P = ($ h-160) * 510 + ($ L-1) * 2;
$ Big = $ GB_DATA [$ p]. $ GB_DATA [$ p + 1];
}
$ Text [$ I] = $ big [0];
$ Text [$ I + 1] = $ big [1];
$ I ++;
}
}
Return $ Text;
}
/**
* Unicode to utf8
* @ Param $ c
*/
Function unicode_to_utf8 ($ c ){
$ Str = '';
If ($ c <0x80 ){
$ Str. = $ c;
} Elseif ($ c <0x800 ){
$ Str. = (0xC0 | $ c> 6 );
$ Str. = (0x80 | $ c & 0x3F );
} Elseif ($ c <0x10000 ){
$ Str. = (0xE0 | $ c> 12 );
$ Str. = (0x80 | $ c> 6 & 0x3F );
$ Str. = (0x80 | $ c & 0x3F );
} Elseif ($ c <0x200000 ){
$ Str. = (0xF0 | $ c> 18 );
$ Str. = (0x80 | $ c> 12 & 0x3F );
$ Str. = (0x80 | $ c> 6 & 0x3F );
$ Str. = (0x80 | $ c & 0x3F );
}
Return $ str;
}
/**
* Utf8 to unicode
* @ Param $ c
*/
Function utf8_to_unicode ($ c ){
Switch (strlen ($ c )){
Case 1:
Return ord ($ c );
Case 2:
$ N = (ord ($ c [0]) & 0x3f) <6;
$ N + = ord ($ c [1]) & 0x3f;
Return $ n;
Case 3:
$ N = (ord ($ c [0]) & 0x1f) <12;
$ N + = (ord ($ c [1]) & 0x3f) <6;
$ N + = ord ($ c [2]) & 0x3f;
Return $ n;
Case 4:
$ N = (ord ($ c [0]) & 0x0f) <18;
$ N + = (ord ($ c [1]) & 0x3f) <12;
$ N + = (ord ($ c [2]) & 0x3f) <6;
$ N + = ord ($ c [3]) & 0x3f;
Return $ n;
}
}
/**
* Convert Ascii to pinyin
* @ Param $ asc
* @ Param $ pyarr
*/
Function asc_to_pinyin ($ asc, & $ pyarr ){
If ($ asc <128) return chr ($ asc );
Elseif (isset ($ pyarr [$ asc]) return $ pyarr [$ asc];
Else {
Foreach ($ pyarr as $ id => $ p ){
If ($ id >=$ asc) return $ p;
}
}
}
/**
* Gbk to pinyin
* @ Param $ txt
*/
Function gbk_to_pinyin ($ txt ){
If (CHARSET! = 'Gbk '){
$ Txt = iconv (CHARSET, 'gbk', $ txt );
}
$ L = strlen ($ txt );
$ I = 0;
$ Pyarr = array ();
$ Py = array ();
$ Filename = CODETABLEDIR. 'GB-pinyin. Table ';
$ Fp = fopen ($ filename, 'r ');
While (! Feof ($ fp )){
$ P = explode ("-", fgets ($ fp, 32 ));
$ Pyarr [intval ($ p [1])] = trim ($ p [0]);
}
Fclose ($ fp );
Ksort ($ pyarr );
While ($ I <$ l ){
$ Tmp = ord ($ txt [$ I]);
If ($ tmp> = 128 ){
$ Asc = abs ($ tmp * 256 + ord ($ txt [$ I + 1])-65536 );
$ I = $ I + 1;
} Else $ asc = $ tmp;
$ Py [] = asc_to_pinyin ($ asc, $ pyarr );
$ I ++;
}
Return $ py;
}
?>