Chinese encoding conversion class, supporting utf, gbk, and big conversion

Source: Internet
Author: User
Tags unpack
The author of simplified and traditional conversion of utf, gbk, and big is unknown. I forgot my website because I have no post on the original code. sorry. /**
* Name: Chinese
* Version: 0.2.1 Beta
* Copyright: None. you can use it whenever you like.
* Struggling azy
* Mail: supporter@qq.com
* QQ: 8820857
* Pig page: http://www.ourmind.cn/(planned, though planned for 1 year ...)
* Thanks: The class code is from the Internet, so I dare not paste the author's name, where =>
* The author of simplified and traditional conversion is unknown. I forgot the website from Google. The original code does not post the Author. sorry.
* GBK to pinyin from hightman, see: http://cws.twomice.net/py/getpy.php? Source
* GBK, BIG to UTF, UTF to GBK, BIG code from Wen, see: http://www.wensh.net/archive.php/topic/287.html
* GBK, BIG, UTF code table from http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/
* UTF-to-UNI conversion comes from Zhimeng (I don't know, but Google has to mark it like this)
* Update log: fixed an incorrect operator in the code table. if you have any bugs or other problems, please feedback...
* Usage:
* $ Conv = new converter;
* $ String = 'gb2312 to BIG (simplified to traditional Chinese )';
* $ String = $ Conv-> convert ('GB', 'Big ', $ String );
* $ String = 'Big to GB2312 (traditional to simplified );
* $ String = $ Conv-> convert ('Big ', 'GB', $ String );
* $ String = 'gbk to utf ';
* $ String = $ Conv-> convert ('gbk', 'utf', $ String );
* $ String = 'Big to utf ';
* $ String = $ Conv-> convert ('Big ', 'utf', $ String );
* $ String = 'utf to gbk ';
* $ String = $ Conv-> convert ('utf', 'gbk', $ String );
* $ String = 'utf to BIG ';
* $ String = $ Conv-> convert ('utf', 'Big ', $ String );
*/
Class converter {

Var $ ICONV; // whether the iconv function is available
Var $ TablePath; // code table path
Var $ UseMemSize; // read the code table to the memory to speed up, but more memory is needed.
/**
* Constructor
* Initialize the variable
*/
Function converter ($ TablePath = null ){
$ This-> TablePath = empty ($ TablePath )? COMMON. './map/' TablePath;
If (function_exists ('iconv ')){
$ This-> ICONV = true;
}
}

/**
* Conversion functions
*/
Function convert ($ Source, $ Target ){
$ Source = strtolower ($ Source );
$ Target = strtolower ($ Target );
$ Func = $ Source. '2'. $ Target;
Return $ this-> $ Func ($ String );
}

/**
* Unicode to utf
* Detailed visibility
* Http://www.linuxforum.net/books/UTF-8-Unicode.html
*/
Function uni2utf ($ Char ){
$ Return = '';
If ($ Char <0x80 ){
$ Return. = $ Char;
} Elseif ($ Char <0x800 ){
$ Return. = chr (0xC0 | $ Char> 6 );
$ Return. = chr (0x80 | $ Char & 0x3F );
} Elseif ($ Char <0x10000 ){
$ Return. = chr (0xE0 | $ Char> 12 );
$ Return. = chr (0x80 | $ Char> 6 & 0x3F );
$ Return. = chr (0x80 | $ Char & 0x3F );
} Elseif ($ Char <0x200000 ){
$ Return. = chr (0xF0 | $ Char> 18 );
$ Return. = chr (0x80 | $ Char> 12 & 0x3F );
$ Return. = chr (0x80 | $ Char> 6 & 0x3F );
$ Return. = chr (0x80 | $ Char & 0x3F );
}
Return $ Return;
}

/**
* Utf8 to unicode
*/
Function utf2uni ($ Char ){
Switch (strlen ($ Char )){
Case 1:
Return ord ($ Char );
Case 2:
$ OutStr = (ord ($ Char [0]) & 0x3f) <6;
$ OutStr + = ord ($ Char [1]) & 0x3f;
Return $ OutStr;
Case 3:
$ OutStr = (ord ($ Char [0]) & 0x1f) <12;
$ OutStr + = (ord ($ Char [1]) & 0x3f) <6;
$ OutStr + = ord ($ Char [2]) & 0x3f;
Return $ OutStr;
Case 4:
$ OutStr = (ord ($ Char [0]) & 0x0f) <18;
$ OutStr + = (ord ($ Char [1]) & 0x3f) <12;
$ OutStr + = (ord ($ Char [2]) & 0x3f) <6;
$ OutStr + = ord ($ Char [3]) & 0x3f;
Return $ OutStr;
}
}

/**
* Conversion of Chinese characters
* Bg to big
* Convert big to bg.
* Note: GB2312 is a subset of GBK.
*/
Function chs2chs ($ String, $ Target, $ Type ){
If ($ Type = 'GB' & $ this-> ICONV ){
Return iconv ('gbk', 'utf-8', $ String );
}
If ($ Type = 'Big '& $ this-> ICONV ){
Return iconv ('big5', 'utf-8', $ String );
}
$ TableFile = $ this-> TablePath. $ Target = 'GB '? 'Big2gb. table': 'gb2big. table ';
If (! File_exists ($ MapFile )){
Return false;
}
$ MapTable = ($ Type = 'mem '? File_get_contents ($ TableFile): fopen ($ TableFile, 'RB '));
$ StringLenth = strlen ($ String );
$ ReturnStr = '';
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord (substr ($ String, $ Foo, 1)> 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ High = ord ($ Str [0]);
$ Low = ord ($ Str [1]);
$ MapAddr = ($ High-160) * 510) + ($ Low-1) * 2;
If ($ Type = 'mem '){
$ High = $ MapTable [$ MapAddr];
$ Low = $ MapTable [$ MapAddr + 1];
} Else {
Fseek ($ MapTable, $ MapAddr );
$ High = fgetc ($ MapTable );
$ Low = fgetc ($ MapTable );
}
$ ReturnStr. = "$ High $ Low ";
$ Foo ++;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}

/**
* Converting Chinese characters to pinyin
* Because the code table uses GBK, it is also applicable to GB2312.
* This function is used to convert Chinese characters from Higthman to pinyin.
* For details, see
* Http://www.hightman.cn/demo/getpy.php? Source
*/
Function GBK2PINYIN ($ String, $ Type = 'file '){
$ TableFile = $ this-> TablePath. 'gbk2py. table ';
If (! File_exists ($ MapFile )){
Return false;
}
$ MapTable = ($ Type = 'mem '? File_get_contents ($ TableFile) MapTable = fopen ($ TableFile, 'RB '));
$ StringLenth = strlen ($ String );
$ ReturnStr = '';
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
$ Char = ord (substr ($ String, $ Foo, 1 ));
If ($ Char & gt; 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ High = ord ($ Str [0])-129;
$ Low = ord ($ Str [1])-64;
$ Addr = ($ High <8) + $ Low-($ High * 64 );
If ($ Addr <0 ){
$ ReturnStr. = '_';
} Else {
$ MapAddr = $ Addr * 8;
If ($ Type = 'mem '){
$ MapStr = '';
For ($ Tmp = 0; $ Tmp <8; $ Tmp ++ ){
$ MapStr. = $ MapTable [($ MapAddr + $ Tmp)];
}
$ BinStr = unpack ('a8py', $ MapStr );
} Else {
Fseek ($ MapTable, $ MapAddr, SEEK_SET );
$ BinStr = unpack ('a8py', fread ($ MapTable, 8 ));
}
$ Foo ++;
$ ReturnStr. = $ BinStr ['py'];
}
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}

/**
* GBK to UNI
* GBK to UTF8 is obtained based on uni2utf.
* This code comes from
* Http://www.wensh.net/archive.php/topic/287.html
*/
Function chs2uni ($ String, $ Source = 'gbk', $ Target = 'utf', $ Type = 'file '){
If ($ Source = 'gbk' & $ this-> ICONV ){
Return iconv ('gbk', 'utf-8', $ String );
}
If ($ Source = 'Big '& $ this-> ICONV ){
Return iconv ('big5', 'utf-8', $ String );
}
$ MapFile = $ this-> TablePath;
$ MapFile. = ($ Source = 'gbk '? 'Gbk2uni. table': 'big2uni. table ');
If (! File_exists ($ MapFile )){
Return false;
}
If ($ Type = 'file '){
$ MapTable = fopen ($ MapFile, 'RB ');
$ Tmp = fread ($ MapTable, 2 );
$ MapSize = ord ($ Tmp [0]) + 256 * ord ($ Tmp [1]);
} Else {
$ MapTable = file_get_contents ($ MapFile );
$ MapSize = ord ($ MapTable [0]) + 256 * ord ($ MapTable [1]);
}
$ ReturnStr = '';
$ StringLenth = strlen ($ String );
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord ($ String [$ Foo])> 127 ){
$ Str = substr ($ String, $ Foo, 2 );
$ StrEncode = hexdec (bin2hex ($ Str ));
$ SearchStart = 1;
$ SearchEnd = $ MapSize;
While ($ SearchStart <$ SearchEnd-1 ){
$ SearchMid = floor ($ SearchStart + $ SearchEnd)/2 );
$ MapAddr = 4 * ($ SearchMid-1) + 2;
If ($ Type = 'mem '){
$ MapEncode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ MapEncode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ MapEncode ){
$ SearchStart = $ SearchMid;
Break;
}
$ StrEncode> $ MapEncode? $ SearchStart = $ SearchMidSearchEnd = $ SearchMid;
}
$ MapAddr = 2 + 4 * ($ SearchStart-1 );
If ($ Type = 'mem '){
$ Encode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ Encode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ Encode ){
If ($ Type = 'mem '){
$ StrUni = ord ($ MapTable [$ MapAddr + 2]) + 256 * ord ($ MapTable [$ MapAddr + 3]);
} Else {
$ TmpStr = fread ($ MapTable, 2 );
$ StrUni = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
$ ReturnStr. = $ Target = 'utf '? $ This-> uni2utf ($ StrUni) StrUni;
} Else {
$ ReturnStr. = '__';
}
$ Foo ++;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}

/**
* Utf to gbk
*/
Function utf2chs ($ String, $ Target = 'gbk', $ Type = 'file '){
If ($ Source = 'gbk' & $ this-> ICONV ){
Return iconv ('utf-8', 'gbk', $ String );
}
If ($ Source = 'Big '& $ this-> ICONV ){
Return iconv ('utf-8', 'big5', $ String );
}
$ MapFile = $ this-> TablePath. $ Target = 'gbk '? 'Uni2gbk. table': 'uni2big. table ';
If (! File_exists ($ MapFile )){
Return false;
}
If ($ Type = 'file '){
$ MapTable = fopen ($ MapFile, 'RB ');
$ Tmp = fread ($ MapTable, 2 );
$ MapSize = ord ($ Tmp [0]) + 256 * ord ($ Tmp [1]);
} Else {
$ MapTable = file_get_contents ($ MapFile );
$ MapSize = ord ($ MapTable [0]) + 256 * ord ($ MapTable [1]);
}
$ ReturnStr = '';
$ StringLenth = strlen ($ String );
For ($ Foo = 0; $ Foo <$ StringLenth; $ Foo ++ ){
If (ord ($ String [$ Foo])> 127 ){
$ StrEncode = $ this-> UTF2UNI (substr ($ String, $ Foo, 3 ));
$ SearchStart = 1;
$ SearchEnd = $ MapSize;
While ($ SearchStart <$ SearchEnd-1 ){
$ SearchMid = floor ($ SearchStart + $ SearchEnd)/2 );
$ MapAddr = 4 * ($ SearchMid-1) + 2;
If ($ Type = 'mem '){
$ MapEncode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ MapEncode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ MapEncode ){
$ SearchStart = $ SearchMid;
Break;
}
$ StrEncode> $ MapEncode? $ SearchStart = $ SearchMidSearchEnd = $ SearchMid;
}
$ MapAddr = 2 + 4 * ($ SearchStart-1 );
If ($ Type = 'mem '){
$ Encode = ord ($ MapTable [$ MapAddr]) + 256 * ord ($ MapTable [$ MapAddr + 1]);
} Else {
Fseek ($ MapTable, $ MapAddr );
$ TmpStr = fread ($ MapTable, 2 );
$ Encode = ord ($ TmpStr [0]) + 256 * ord ($ TmpStr [1]);
}
If ($ StrEncode = $ Encode ){
If ($ Type = 'mem '){
$ Low = $ MapTable [$ MapAddr + 2];
$ High = $ MapTable [$ MapAddr + 3];
} Else {
$ TmpStr = fread ($ MapTable, 2 );
$ High = $ TmpStr [1];
$ Low = $ TmpStr [0];
}
$ ReturnStr. = "$ High $ Low ";
} Else {
$ ReturnStr. = '__';
}
$ Foo = $ Foo + 2;
} Else {
$ ReturnStr. = $ String [$ Foo];
}
}
$ Type = 'mem '? Null: fclose ($ MapTable );
Return $ ReturnStr;
}

Function gb2big ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2chs ($ String, 'Big ', 'file') this-> chs2chs ($ String, 'Big', 'mem ');
}

Function big2gb ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2chs ($ String, 'GB', 'file') this-> chs2chs ($ String, 'GB', 'mem ');
}

Function gbk2py ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> gbk2py ($ String, 'file') this-> gbk2py ($ String, 'mem ');
}

Function gbk2utf ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2uni ($ String, 'gbk', 'utf', 'file') this-> chs2uni ($ String, 'gbk', 'utf ', 'mem ');
}

Function big2utf ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> chs2uni ($ String, 'Big ', 'utf', 'file') this-> chs2uni ($ String, 'Big', 'utf ', 'mem ');
}

Function utf2gbk ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> utf2chs ($ String, 'gbk', 'file') this-> utf2chs ($ String, 'gbk', 'mem ');
}

Function utf2big ($ String ){
Return strlen ($ String) <$ this-> UseMemSize? $ This-> utf2chs ($ String, 'Big ', 'file') this-> utf2chs ($ String, 'Big', 'mem ');
}
}
?>

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.