Php code of the Chinese encoding set class library
Pinyin one-way conversion, * Simplified Chinese, Traditional Chinese UTF8 encoding conversion, Simplified Chinese, traditional Chinese-> Unicode one-way conversion ** @ author Hessian (solarischan@21cn.com) * @ version 1.5 * @ copyright all rights reserved Hessian/NETiS * @ use authorized GPL (it cannot be used for any commercial purposes and can be modified without the consent of the author, but the modified code must be published according to the GPL protocol. * @ special thanks to unknow (simplified conversion code snippet) * @ start * @ last modify * @ access public ** UPDATE record ** ver 1.7 * modified the bug caused by the while loop. This bug will handle errors when the last character of the string is "0. * Affected methods: CHStoUTF8 (), CHStoUNICODE () * (by Zeal Li, http://www.zeali.net/ ) ** Ver 1.6 * added a parameter to the constructor so that you can conveniently set the configuration file path * (by Zeal Li, http://www.zeali.net/ ) ** Ver 1.5 * added the UTF8 conversion function to GB2312 and BIG5. ** Ver 1.4 * adds the charset value that is set to true when HTML is converted. ** Ver 1.3 * added the function of converting traditional Chinese to pinyin. ** Ver 1.2 * combines functions that convert simplified and traditional Chinese to UTF8. * The function for converting Simplified Chinese to pinyin. The return value is changed to a string. the pinyin of each Chinese character is separated by spaces. * The function for converting Simplified Chinese to UNICODE is added. * The function of converting traditional Chinese to UNICODE is added. ** Ver 1.1 * added the OpenFile () function to support opening local and remote files. * Added the function of converting Simplified Chinese to UTF8. * The function of converting traditional Chinese to UTF8 is added. ** Ver 1.0 * a set of simplified Chinese characters. the class libraries corresponding to various encoding swaps in traditional Chinese have been initially completed. */Class Chinese {/*** stores the simplified Chinese and pinyin table ** @ variable type array * @ start 1.0 * @ Last modification 1.0 * @ access internal */var $ pinyin_table = array (); /*** content of the gb unicode table ** @ variable type * @ start 1.1 * @ Last modification 1.2 * @ access internal */var $ unicode_table = array (); /*** file pointer for accessing the simplified Chinese simplified interchange table ** @ variable type object * @ start 1.0 * @ Last modification 1.0 * @ access internal */var $ ctf; /*** string awaiting conversion ** @ variable type * @ start 1.0 * @ last modify 1.0 * @ access internally */var $ SourceText = ""; /*** run configuration in Chinese ** @ variable type array * @ START 1. 0 * @ finally modify 1.2 * @ access public */var $ config = array ('codetable _ dir' => ". /config/", // The directory that stores the table of exchange of various languages 'sourcelang '=>'', // The original character encoding 'targetlang' => '', // The converted encoding 'gbtobig5 _ table' => 'GB-big5.table ', // Convert simplified Chinese to traditional Chinese: 'big5togb _ table' => 'big5-gb. table ', // comparison table for converting traditional Chinese to simplified Chinese: 'gbtopinyin _ table' => 'GB-pinyin. table ', // The table for converting Simplified Chinese to pinyin: 'gbtounicode _ table' => 'GB-unicode. table ', // The UNICODE-based table for converting Simplified Chinese to 'big5tounicode _ tabl E '=> 'big5-unicode. table '// The UNICODE conversion table for traditional Chinese ); /*** ** @ parameter string $ SourceLang is the original encoding of the string to be converted * string $ TargetLang is the conversion target encoding * string $ SourceText the Directory of the table corresponding to the string * string $ CodetableDir encoding to be converted ** @ start 1.0 * @ Last modified 1.2 * @ access public * @ return value no * @ throws */function Chinese ($ SourceLang, $ TargetLang, $ SourceString = '', $ CodetableDir ='') {if ($ SourceLang! = '') {$ This-> config ['sourcelang '] = $ SourceLang;} if ($ TargetLang! = '') {$ This-> config ['targetlang '] = $ TargetLang;} if ($ SourceString! = '') {$ This-> SourceText = $ SourceString;} // codes Added by Zeal Li on ver 1.6 begin. if ($ CodetableDir! = '') {If (! Is_dir ($ CodetableDir) {echo "configuration directory [". $ CodetableDir. "] not exists! "; Exit;} $ this-> config ['codetable _ dir'] = $ CodetableDir;} // codes Added by Zeal Li on ver 1.6 end. $ this-> OpenTable ();} // enable the octal conversion to a 2-digit character. *** detailed description ** @ parameter $ hexdata: hexadecimal encoding * @ start 1.5 * @ last modify 1.5 * @ access internal * @ return string * @ throws */function _ hex2bin ($ hexdata) {for ($ I = 0; $ iconfig ['sourcelang '] = "GB2312 ") {// if the conversion target is encoded as traditional Chinese, if ($ this-> config ['targetlang '] = "BIG5") {$ this-> ctf = f Open ($ this-> config ['detable _ dir']. $ this-> config ['gbtobig5 _ table'], "r"); if (is_null ($ this-> ctf) {echo "failed to open the conversion table file! "; Exit ;}// if the conversion target is encoded as PinYin, if ($ this-> config ['targetlang '] =" PinYin ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['gbtopinyin _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit;} // $ I = 0; for ($ I = 0; $ ipinyin_table [$ I] = array ($ tmp1 [0], $ tmp1 [1]) ;}/// if the conversion target code is UTF8, if ($ this-> config ['targetlang '] = "UTF8 ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['gbtounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value,)] = substr ($ value );} // if the conversion target is encoded as UNICODE, if ($ this-> config ['targetlang '] = "UNICODE ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['gbtounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value,)] = substr ($ value, 9, 4 );}} // if the original encoding is traditional Chinese, if ($ this-> config ['sourcelang '] = "BIG5 ") {// if the conversion target is encoded as simplified Chinese, if ($ this-> config ['targetlang '] = "GB2312 ") {$ this-> ctf = fopen ($ this-> config ['detable _ dir']. $ this-> config ['big5togb _ table'], "r"); if (is_null ($ this-> ctf) {echo "failed to open the conversion table file! "; Exit ;}// if the conversion target code is UTF8, if ($ this-> config ['targetlang '] =" UTF8 ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['big5tounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value,)] = substr ($ value );} // if the conversion target is encoded as UNICODE, if ($ this-> config ['targetlang '] = "UNICODE ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['big5tounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value, 0, 6)] = substr ($ value, 9, 4 );} // if the conversion target is encoded as PinYin, if ($ this-> config ['targetlang '] = "PinYin ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['gbtopinyin _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit;} // $ I = 0; for ($ I = 0; $ ipinyin_table [$ I] = array ($ tmp1 [0], $ tmp1 [1]) ;}}// if the original code is UTF8, if ($ this-> config ['sourcelang '] = "UTF8 ") {// if the conversion target code is GB2312, if ($ this-> config ['targetlang '] = "GB2312 ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['gbtounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value,)] = substr ($ value );} // if the conversion target is encoded as BIG5, if ($ this-> config ['targetlang '] = "BIG5 ") {$ tmp = @ file ($ this-> config ['codetable _ dir']. $ this-> config ['big5tounicode _ table']); if (! $ Tmp) {echo "an error occurred while opening the conversion table file! "; Exit ;}$ this-> unicode_table = array (); while (list ($ key, $ value) = each ($ tmp )) $ this-> unicode_table [hexdec (substr ($ value,)] = substr ($ value );}}} // end the OpenTable function/*** open a local or remote file ** detailed description * @ parameter string $ position is the name of the file to be opened, support the path or URL * Boolean value $ isHTML indicates whether the file to be converted is an html file * @ start 1.1 * @ Last modification 1.1 * @ access public * @ return None * @ throws * /function OpenFile ($ position, $ isHTML = false) {$ tempcontent = @ file ($ position); if (! $ Tempcontent) {echo "failed to open the file! "; Exit ;}$ this-> SourceText = implode (" ", $ tempcontent); if ($ isHTML) {$ this-> SourceText = eregi_replace (" charset = ". $ this-> config ['sourcelang '], "charset = ". $ this-> config ['targetlang '], $ this-> SourceText); $ this-> SourceText = eregi_replace ("\ n ","", $ this-> SourceText); $ this-> SourceText = eregi_replace ("\ r", "", $ this-> SourceText );}} // end the OpenFile function/*** open a local or remote file ** detailed description * @ parameter string $ position required File name, supporting path or URL * @ start 1.1 * @ Last modification 1.1 * @ access public * @ return None * @ throws */function SiteOpen ($ position) {$ tempcontent = @ file ($ position); if (! $ Tempcontent) {echo "failed to open the file! "; Exit;} // Convert all contents of the array to a string $ this-> SourceText = implode (" ", $ tempcontent ); $ this-> SourceText = eregi_replace ("charset = ". $ this-> config ['sourcelang '], "charset = ". $ this-> config ['targetlang '], $ this-> SourceText); // ereg (href = "css/dir.css? 1.1.6 "} // end OpenFile function/*** set variable value ** detailed description * @ form parameter * @ start 1.0 * @ Last modification 1.0 * @ access public * @ return value none * @ throws */function setvar ($ parameter, $ value) {if (! Trim ($ parameter) return $ parameter; $ this-> config [$ parameter] = $ value ;} // end the setvar function/*** to convert the UNICODE encoding of simplified and traditional Chinese to the UTF8 character. ** details ** @ 10-step UNICODE encoding of simplified Chinese characters in string $ c * @ start 1.1 * @ last modify 1.2 * @ access internal * @ return string * @ throws */function CHSUtoUTF8 ($ c) {$ str = ""; if ($ c <0x80) {$ str. = $ c;} else if ($ c <0x800) {$ str. = (0xC0 | $ c> 6); $ str. = (0x80 | $ c & 0x3F);} else if ($ c <0x10000) {$ str. = (0xE0 | $ c> 12); $ str. = (0x80 | $ c> 6 & 0x3F); $ str. = (0x80 | $ c & 0x3F);} else if ($ c <0x200000) {$ str. = (0xF0 | $ c> 18); $ str. = (0x80 | $ c> 12 & 0x3F); $ str. = (0x80 | $ c> 6 & 0x3F); $ str. = (0x80 | $ c & 0x3F);} return $ str ;} // end the CHSUtoUTF8 function/*** function for mutual conversion between Simplified and Traditional Chinese UTF8 ** detailed description * @ form parameter * @ start 1.1 * @ Last modification 1.5 * @ internal access *@ returns the string * @ throws */function CHStoUTF8 () {if ($ this-> config ["SourceLang"] = "BIG5" | $ this-> config ["SourceLang"] = "G B2312 ") {$ ret =" "; while ($ this-> SourceText! = "") {If (ord (substr ($ this-> SourceText, 0, 1)> 127) {if ($ this-> config ["SourceLang"] = "BIG5 ") {$ utf8 = $ this-> CHSUtoUTF8 (hexdec ($ this-> unicode_table [hexdec (bin2hex (substr ($ this-> SourceText,)]);} if ($ this-> config ["SourceLang"] = "GB2312 ") {$ utf8 = $ this-> CHSUtoUTF8 (hexdec ($ this-> unicode_table [hexdec (bin2hex (substr ($ this-> SourceText, 0, 2 ))) -0x8080]);} for ($ I = 0; $ iSourceText = substr ($ this-> SourceText, 2, strl En ($ this-> SourceText);} else {$ ret. = substr ($ this-> SourceText, 0, 1); $ this-> SourceText = substr ($ this-> SourceText, 1, strlen ($ this-> SourceText ));}} $ this-> unicode_table = array (); $ this-> SourceText = ""; return $ ret ;} if ($ this-> config ["SourceLang"] = "UTF8") {$ out = ""; $ len = strlen ($ this-> SourceText ); $ I = 0; while ($ I <$ len) {$ c = ord (substr ($ this-> SourceText, $ I ++, 1 )); switch ($ c> 4) {case 0: case 1: ca Se 2: case 3: case 4: case 5: case 6: case 7: // 0 xxxxxxx $ out. = substr ($ this-> SourceText, $ i-1, 1); break; case 12: case 13: // 110x xxxx 10xx xxxx $ char2 = ord (substr ($ this-> SourceText, $ I ++, 1 )); $ char3 = $ this-> unicode_table [($ c & 0x1F) <6) | ($ char2 & 0x3F)]; if ($ this-> config ["TargetLang"] = "GB2312") $ out. = $ this-> _ hex2bin (dechex ($ char3 + 0x8080); if ($ this-> config ["TargetLang"] = "BIG5 ") $ Out. = $ this-> _ hex2bin ($ char3); break; case 14: // 1110 xxxx 10xx xxxx 10xx xxxx $ char2 = ord (substr ($ this-> SourceText, $ I ++, 1); $ char3 = ord (substr ($ this-> SourceText, $ I ++, 1 )); $ char4 = $ this-> unicode_table [($ c & 0x0F) <12) | ($ char2 & 0x3F) <6) | ($ char3 & 0x3F) <0)]; if ($ this-> config ["TargetLang"] = "GB2312") $ out. = $ this-> _ hex2bin (dechex ($ char4 + 0x8080); if ($ this-> conf Ig ["TargetLang"] = "BIG5") $ out. = $ this-> _ hex2bin ($ char4); break ;}/// return result return $ out ;}} // end the CHStoUTF8 function/*** convert simplified and traditional Chinese to UNICODE encoding ** detailed description ** @ parameter * @ start 1.2 * @ Last modification 1.2 * @ access internal * @ return string * @ throws */function CHStoUNICODE () {$ utf = ""; while ($ this-> SourceText! = "") {If (ord (substr ($ this-> SourceText, 0, 1)> 127) {if ($ this-> config ["SourceLang"] = "GB2312") $ utf. = "& # x ". $ this-> unicode_table [hexdec (bin2hex (substr ($ this-> SourceText, 8080)-0 x]. ";"; if ($ this-> config ["SourceLang"] = "BIG5") $ utf. = "& # x ". $ this-> unicode_table [hexdec (bin2hex (substr ($ this-> SourceText, 0, 2)]. ";"; $ this-> SourceText = substr ($ this-> SourceText, 2, strlen ($ this-> SourceText);} else {$ utf. = substr ($ This-> SourceText, 0, 1); $ this-> SourceText = substr ($ this-> SourceText, 1, strlen ($ this-> SourceText ));}} return $ utf ;} // end the CHStoUNICODE function/*** functions for mutual conversion between Simplified Chinese and traditional Chinese ** detailed description * @ start 1.0 * @ access internal * @ return value encoded utf8 character * @ throws */function GB2312toBIG5 () {// obtain the total length of the string waiting for conversion $ max = strlen ($ this-> SourceText)-1; for ($ I = 0; $ iSourceText [$ I]); if ($ h >=160) {$ l = ord ($ this-> SourceText [$ I + 1]); if ($ h = 161 & $ l = 64) {$ gb = "";} else {fseek ($ This-> ctf, ($ h-160) * 510 + ($ L-1) * 2); $ gb = fread ($ this-> ctf, 2 );} $ this-> SourceText [$ I] = $ gb [0]; $ this-> SourceText [$ I + 1] = $ gb [1]; $ I ++ ;}} fclose ($ this-> ctf); // assign the converted result to $ result; $ result = $ this-> SourceText; // clear $ thisSourceText $ this-> SourceText = ""; // return the conversion result return $ result ;} // end the GB2312toBIG5 function/*** search for Pinyin based on the obtained encoding ** detailed description * @ start 1.0 * @ Last modification 1.0 * @ access internally * @ return value string * @ throws */function PinYinSearch ($ num) {if ($ Num> 0 & $ num <160) {return chr ($ num);} elseif ($ num-10247) {return "";} else {for ($ I = count ($ this-> pinyin_table)-1; $ I >=0; $ I --) {if ($ this-> pinyin_table [$ I] [1] pinyin_table [$ I] [0];} // end the PinYinSearch function/*** simplified and traditional Chinese> Pinyin Conversion ** description * @ start 1.0 * @ last modify 1.3 * @ access internally * @ return value string, separate each pinyin with spaces * @ throws */function CHStoPinYin () {if ($ this-> config ['sourcelang '] = "BIG5 ") {$ this-> ctf = fopen ($ this-> config ['detable _ di R']. $ this-> config ['big5togb _ table'], "r"); if (is_null ($ this-> ctf) {echo "failed to open the conversion table file! "; Exit ;}$ this-> SourceText = $ this-> GB2312toBIG5 (); $ this-> config ['targetlang '] =" PinYin ";} $ ret = array (); $ ri = 0; for ($ I = 0; $ iSourceText); $ I ++) {$ p = ord (substr ($ this-> SourceText, $ I, 1); if ($ p> 160) {$ q = ord (substr ($ this-> SourceText, ++ $ I, 1); $ p = $ p * 256 + $ q-65536 ;} $ ret [$ ri] = $ this-> PinYinSearch ($ p); $ ri = $ ri + 1 ;} // clear $ this-> SourceText = ""; $ this-> pinyin_table = array (); // return the converted result return implo De ("", $ ret );} // end the CHStoPinYin function/*** output conversion result ** detailed description * @ form parameter * @ start 1.0 * @ Last modification 1.2 * @ access public * @ return character * @ throws */function ConvertIT () {// Determine whether the conversion is a traditional Chinese character. if ($ this-> config ['sourcelang '] = "GB2312" | $ this-> config ['sourcelang'] = "BIG5 ") & ($ this-> config ['targetlang '] = "GB2312" | $ this-> config ['targetlang'] = "BIG5 ")) {return $ this-> GB2312toBIG5 ();} // determine whether to convert simplified Chinese to pinyin (if ($ this-> config ['sourcelang '] = "GB2312" | $ this-> config ['sourcelang'] = "BIG5 ") & $ this-> config ['targetlang '] = "PinYin") {return $ this-> CHStoPinYin ();} // determine whether to convert the string to simplified or traditional Chinese or UTF8. if ($ this-> config ['sourcelang '] = "GB2312" | $ this-> config ['sourcelang '] = "BIG5" | $ this-> config ['sourcelang'] = "UTF8 ") & ($ this-> config ['targetlang '] = "UTF8" | $ this-> config ['targetlang'] = "GB2312" | $ this-> config ['targetlang '] = "BIG5 ")) {retur N $ this-> CHStoUTF8 ();} // determine whether to convert the string to simplified or traditional Chinese or UNICODE. if ($ this-> config ['sourcelang '] = "GB2312" | $ this-> config ['sourcelang '] = "BIG5 ") & $ this-> config ['targetlang '] = "UNICODE") {return $ this-> CHStoUNICODE ();}} // end ConvertIT function} // end class library/***/?>