Chinese encoding conversion class, supporting utf, gbk, and big conversion
Last Update:2014-01-14
Source: Internet
Author: User
The author of simplified and traditional conversion of utf, gbk, and big is unknown. I forgot my website because I have no post on the original code. sorry. /**
* Name: Chinese
* Version: 0.2.1 Beta
* Copyright: None. you can use it whenever you like.
* Struggling azy
* Mail: supporter@qq.com
* QQ: 8820857
* Pig page: http://www.ourmind.cn/(planned, though planned for 1 year ...)
* Thanks: The class code is from the Internet, so I dare not paste the author's name, where =>
* The author of simplified and traditional conversion is unknown. I forgot the website from Google. The original code does not post the Author. sorry.
* GBK to pinyin from hightman, see: http://cws.twomice.net/py/getpy.php? Source
* GBK, BIG to UTF, UTF to GBK, BIG code from Wen, see: http://www.wensh.net/archive.php/topic/287.html
* GBK, BIG, UTF code table from http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
* UTF-to-UNI conversion comes from Zhimeng (I don't know, but Google has to mark it like this)
* Update log: fixed an incorrect operator in the code table. if you have any bugs or other problems, please feedback...
* Usage:
* $ Conv = new converter;
* $ String = 'gb2312 to BIG (simplified to traditional Chinese )';
* $ String = $ Conv-> convert ('GB', 'Big ', $ String );
* $ String = 'Big to GB2312 (traditional to simplified );
* $ String = $ Conv-> convert ('Big ', 'GB', $ String );
* $ String = 'gbk to utf ';
* $ String = $ Conv-> convert ('gbk', 'utf', $ String );
* $ String = 'Big to utf ';
* $ String = $ Conv-> convert ('Big ', 'utf', $ String );
* $ String = 'utf to gbk ';
* $ String = $ Conv-> convert ('utf', 'gbk', $ String );
* $ String = 'utf to BIG ';
* $ String = $ Conv-> convert ('utf', 'Big ', $ String );
*/
Class converter {
Var $ ICONV; // whether the iconv function is available
Var $ TablePath; // code table path
Var $ UseMemSize; // read the code table to the memory to speed up, but more memory is needed.
/**
* Constructor
* Initialize the variable
*/
Function converter ($ TablePath = null ){
$ This-> TablePath = empty ($ TablePath )? COMMON. './map/' TablePath;
If (function_exists ('iconv ')){
$ This-> ICONV = true;
}
}
/**
* Conversion functions
*/
Function convert ($ Source, $ Target ){
$ Source = strtolower ($ Source );
$ Target = strtolower ($ Target );
$ Func = $ Source. '2'. $ Target;
Return $ this-> $ Func ($ String );
}
/**
* Unicode to utf
* Detailed visibility
* Http://www.linuxforum.net/books/UTF-8-Unicode.html
*/
Function uni2utf ($ Char ){
$ Return = '';
If ($ Char <0x80 ){
$ Return. = $ Char;
} Elseif ($ Char <0x800 ){
$ Return. = chr (0xC0 | $ Char> 6 );
$ Return. = chr (0x80 | $ Char & 0x3F );
} Elseif ($ Char <0x10000 ){
$ Return. = chr (0xE0 | $ Char> 12 );
$ Return. = chr (0x80 | $ Char> 6 & 0x3F );
$ Return. = chr (0x80 | $ Char & 0x3F );
} Elseif ($ Char <0x200000 ){
$ Return. = chr (0xF0 | $ Char> 18 );
$ Return. = chr (0x80 | $ Char> 12 & 0x3F );
$ Return. = chr (0x80 | $ Char> 6 & 0x3F );
$ Return. = chr (0x80 | $ Char & 0x3F );
}
Return $ Return;
}
/**
* Utf8 to unicode
*/
Function utf2uni ($ Char ){
Switch (strlen ($ Char )){
Case 1:
Return ord ($ Char );
Case 2:
$ OutStr = (ord ($ Char [0]) & 0x3f) <6;
$ OutStr + = ord ($ Char [1]) & 0x3f;
Return $ OutStr;
Case 3:
$ OutStr = (ord ($ Char [0]) & 0x1f) <12;
$ OutStr + = (ord ($ Char [1]) & 0x3f) <6;
$ OutStr + = ord ($ Char [2]) & 0x3f;
Return $ OutStr;
Case 4:
$ OutStr = (ord ($ Char [0]) & 0x0f) <18;
$ OutStr + = (ord ($ Char [1]) & 0x3f) <12;
$ OutStr + = (ord ($ Char [2]) & 0x3f) <6;
$ OutStr + = ord ($ Char [3]) & 0x3f;
Return $ OutStr;
}
}
/**
* Conversion of Chinese characters
* Bg to big
* Convert big to bg.
* Note: GB2312 is a subset of GBK.
*/
Function chs2chs ($ String, $ Target, $ Type ){
If ($ Type = 'GB' & $ this-> ICONV ){
Return iconv ('gbk', 'utf-8', $ String );
}
If ($ Type = 'Big '& $ this-> ICONV ){
Return iconv ('big5', 'utf-8', $ String );
}
$ TableFile = $ this-> TablePath. $ Target = 'GB '? 'Big2gb. table': 'gb2big. table ';
If (! File_exists ($ MapFile )){
Return false;
}
$ MapTable = ($ Type = 'mem '? File_get_contents ($ TableFile): fopen ($ TableFile, 'RB '));
$ StringLenth = strlen ($ String );
$ ReturnStr = '';
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord (substr ($ String, $ Foo, 1)> 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ High = ord ($ Str [0]);
$ Low = ord ($ Str [1]);
$ MapAddr = ($ High-160) * 510) + ($ Low-1) * 2;
If ($ Type = 'mem '){
$ High = $ MapTable [$ MapAddr];
$ Low = $ MapTable [$ MapAddr + 1];
} Else {
Fseek ($ MapTable, $ MapAddr );
$ High = fgetc ($ MapTable );
$ Low = fgetc ($ MapTable );
}
$ ReturnStr. = "$ High $ Low ";
$ Foo ++;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}
/**
* Converting Chinese characters to pinyin
* Because the code table uses GBK, it is also applicable to GB2312.
* This function is used to convert Chinese characters from Higthman to pinyin.
* For details, see
* Http://www.hightman.cn/demo/getpy.php? Source
*/
Function GBK2PINYIN ($ String, $ Type = 'file '){
$ TableFile = $ this-> TablePath. 'gbk2py. table ';
If (! File_exists ($ MapFile )){
Return false;
}
$ MapTable = ($ Type = 'mem '? File_get_contents ($ TableFile) MapTable = fopen ($ TableFile, 'RB '));
$ StringLenth = strlen ($ String );
$ ReturnStr = '';
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
$ Char = ord (substr ($ String, $ Foo, 1 ));
If ($ Char & gt; 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ High = ord ($ Str [0])-129;
$ Low = ord ($ Str [1])-64;
$ Addr = ($ High <8) + $ Low-($ High * 64 );
If ($ Addr <0 ){
$ ReturnStr. = '_';
} Else {
$ MapAddr = $ Addr * 8;
If ($ Type = 'mem '){
$ MapStr = '';
For ($ Tmp = 0; $ Tmp <8; $ Tmp ++ ){
$ MapStr. = $ MapTable [($ MapAddr + $ Tmp)];
}
$ BinStr = unpack ('a8py', $ MapStr );
} Else {
Fseek ($ MapTable, $ MapAddr, SEEK_SET );
$ BinStr = unpack ('a8py', fread ($ MapTable, 8 ));
}
$ Foo ++;
$ ReturnStr. = $ BinStr ['py'];
}
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}
/**
* GBK to UNI
* GBK to UTF8 is obtained based on uni2utf.
* This code comes from
* Http://www.wensh.net/archive.php/topic/287.html
*/
Function chs2uni ($ String, $ Source = 'gbk', $ Target = 'utf', $ Type = 'file '){
If ($ Source = 'gbk' & $ this-> ICONV ){
Return iconv ('gbk', 'utf-8', $ String );
}
If ($ Source = 'Big '& $ this-> ICONV ){
Return iconv ('big5', 'utf-8', $ String );
}
$ MapFile = $ this-> TablePath;
$ MapFile. = ($ Source = 'gbk '? 'Gbk2uni. table': 'big2uni. table ');
If (! File_exists ($ MapFile )){
Return false;
}
If ($ Type = 'file '){
$ MapTable = fopen ($ MapFile, 'RB ');
$ Tmp = fread ($ MapTable, 2 );
$ MapSize = ord ($ Tmp [0]) + 256 * ord ($ Tmp [1]);
} Else {
$ MapTable = file_get_contents ($ MapFile );
$ MapSize = ord ($ MapTable [0]) + 256 * ord ($ MapTable [1]);
}
$ ReturnStr = '';
$ StringLenth = strlen ($ String );
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord ($ String [$ Foo])> 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ StrEncode = hexdec (bin2hex ($ Str ));
$ SearchStart = 1;
$ SearchEnd = $ MapSize;
While ($ SearchStart <$ SearchEnd-1 ){
$ SearchMid = floor ($ SearchStart + $ SearchEnd)/2 );
$ MapAddr = 4 * ($ SearchMid-1) + 2;
If ($ Type = 'mem '){
$ MapEncode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ MapEncode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ MapEncode ){
$ SearchStart = $ SearchMid;
Break;
}
$ StrEncode> $ MapEncode? $ SearchStart = $ SearchMidSearchEnd = $ SearchMid;
}
$ MapAddr = 2 + 4 * ($ SearchStart-1 );
If ($ Type = 'mem '){
$ Encode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ Encode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ Encode ){
If ($ Type = 'mem '){
$ StrUni = ord ($ MapTable [$ MapAddr + 2]) + 256 * ord ($ MapTable [$ MapAddr + 3]);
} Else {
$ TmpStr = fread ($ MapTable, 2 );
$ StrUni = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
$ ReturnStr. = $ Target = 'utf '? $ This-> uni2utf ($ StrUni) StrUni;
} Else {
$ ReturnStr. = '__';
}
$ Foo ++;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}
/**
* Utf to gbk
*/
Function utf2chs ($ String, $ Target = 'gbk', $ Type = 'file '){
If ($ Source = 'gbk' & $ this-> ICONV ){
Return iconv ('utf-8', 'gbk', $ String );
}
If ($ Source = 'Big '& $ this-> ICONV ){
Return iconv ('utf-8', 'big5', $ String );
}
$ MapFile = $ this-> TablePath. $ Target = 'gbk '? 'Uni2gbk. table': 'uni2big. table ';
If (! File_exists ($ MapFile )){
Return false;
}
If ($ Type = 'file '){
$ MapTable = fopen ($ MapFile, 'RB ');
$ Tmp = fread ($ MapTable, 2 );
$ MapSize = ord ($ Tmp [0]) + 256 * ord ($ Tmp [1]);
} Else {
$ MapTable = file_get_contents ($ MapFile );
$ MapSize = ord ($ MapTable [0]) + 256 * ord ($ MapTable [1]);
}
$ ReturnStr = '';
$ StringLenth = strlen ($ String );
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord ($ String [$ Foo])> 127 ){
$ StrEncode = $ this-> UTF2UNI (substr ($ String, $ Foo, 3 ));
$ SearchStart = 1;
$ SearchEnd = $ MapSize;
While ($ SearchStart <$ SearchEnd-1 ){
$ SearchMid = floor ($ SearchStart + $ SearchEnd)/2 );
$ MapAddr = 4 * ($ SearchMid-1) + 2;
If ($ Type = 'mem '){
$ MapEncode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ MapEncode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ MapEncode ){
$ SearchStart = $ SearchMid;
Break;
}
$ StrEncode> $ MapEncode? $ SearchStart = $ SearchMidSearchEnd = $ SearchMid;
}
$ MapAddr = 2 + 4 * ($ SearchStart-1 );
If ($ Type = 'mem '){
$ Encode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ Encode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ Encode ){
If ($ Type = 'mem '){
$ Low = $ MapTable [$ MapAddr + 2];
$ High = $ MapTable [$ MapAddr + 3];
} Else {
$ TmpStr = fread ($ MapTable, 2 );
$ High = $ TmpStr [1];
$ Low = $ TmpStr [0];
}
$ ReturnStr. = "$ High $ Low ";
} Else {
$ ReturnStr. = '__';
}
$ Foo = $ Foo + 2;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}
Function gb2big ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2chs ($ String, 'Big ', 'file') this-> chs2chs ($ String, 'Big', 'mem ');
}
Function big2gb ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2chs ($ String, 'GB', 'file') this-> chs2chs ($ String, 'GB', 'mem ');
}
Function gbk2py ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> gbk2py ($ String, 'file') this-> gbk2py ($ String, 'mem ');
}
Function gbk2utf ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2uni ($ String, 'gbk', 'utf', 'file') this-> chs2uni ($ String, 'gbk', 'utf ', 'mem ');
}
Function big2utf ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2uni ($ String, 'Big ', 'utf', 'file') this-> chs2uni ($ String, 'Big', 'utf ', 'mem ');
}
Function utf2gbk ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> utf2chs ($ String, 'gbk', 'file') this-> utf2chs ($ String, 'gbk', 'mem ');
}
Function utf2big ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> utf2chs ($ String, 'Big ', 'file') this-> utf2chs ($ String, 'Big', 'mem ');
}
}
?>