<?php
Class Charset {
UTF-8 GB Encoding
static public Function UTF82GB ($UTFSTR)
{
if (function_exists (' Iconv '))
{
Return Iconv (' Utf-8 ', ' Gbk//ignore ', $utfstr);
}
Global $UC 2GBTABLE;
$okstr = "";
if (Trim ($utfstr) = = "")
{
return $utfstr;
}
if (Empty ($UC 2GBTABLE))
{
$filename = "Data/gb2312-utf8.dat";
$fp = fopen ($filename, "R");
while ($l = fgets ($fp, 15))
{
$UC 2gbtable[hexdec (substr ($l, 7, 6)] = Hexdec (substr ($l, 0, 6));
}
Fclose ($FP);
}
$okstr = "";
$ulen = strlen ($UTFSTR);
for ($i =0; $i < $ulen; $i + +)
{
$c = $utfstr [$i];
$CB = Decbin (Ord ($utfstr [$i]);
if (strlen ($CB) ==8)
{
$csize = Strpos (Decbin (Ord ($CB)), "0");
for ($j =0; $j < $csize; $j + +)
{
$i + +; $c. = $utfstr [$i];
}
$c = utf82u ($c);
if (Isset ($UC 2gbtable[$c]))
{
$c = Dechex ($UC 2gbtable[$c]+0x8080);
$okstr. = Chr (Hexdec ($c [0]. $c [1]). Chr (Hexdec ($c [2]. $c [3]));
}
Else
{
$okstr. = "the". $c. ";";
}
}
Else
{
$okstr. = $c;
}
}
$okstr = Trim ($OKSTR);
return $okstr;
}
GB to UTF-8 encoding
static public Function Gb2utf8 ($GBSTR)
{
if (function_exists (' Iconv '))
{
Return Iconv (' GBK ', ' Utf-8//ignore ', $gbstr);
}
Global $CODETABLE;
if (Trim ($gbstr) = = "")
{
return $gbstr;
}
if (empty ($CODETABLE))
{
$filename = "Data/gb2312-utf8.dat";
$fp = fopen ($filename, "R");
while ($l = fgets ($fp, 15))
{
$CODETABLE [Hexdec (substr ($l, 0, 6)] = substr ($l, 7, 6);
}
Fclose ($FP);
}
$ret = "";
$utf 8 = "";
while ($gbstr! = ")
{
if (Ord (substr ($gbstr, 0, 1)) > 0x80)
{
$thisW = substr ($gbstr, 0, 2);
$gbstr = substr ($gbstr, 2, strlen ($GBSTR));
$utf 8 = "";
@ $utf 8 = U2utf8 (Hexdec ($CODETABLE [Hexdec (Bin2Hex ($thisW))-0x8080]));
if ($utf 8!= "")
{
for ($i = 0; $i < strlen ($utf 8); $i + = 3)
$ret. = Chr (substr ($utf 8, $i, 3));
}
}
Else
{
$ret. = substr ($gbstr, 0, 1);
$gbstr = substr ($gbstr, 1, strlen ($GBSTR));
}
}
return $ret;
}
Unicode goto UTF8
static public Function U2utf8 ($c)
{
for ($i = 0; $i < count ($c); $i + +)
{
$str = "";
}
if ($c < 0x80)
{
$str. = $c;
}
else if ($c < 0x800)
{
$str. = (0xC0 | $c >> 6);
$str. = (0x80 | $c & 0x3F);
}
else if ($c < 0x10000)
{
$str. = (0xE0 | $c >> 12);
$str. = (0x80 | $c >> 6 & 0x3F);
$str. = (0x80 | $c & 0x3F);
}
else if ($c < 0x200000)
{
$str. = (0xF0 | $c >> 18);
$str. = (0x80 | $c >> & 0x3F);
$str. = (0x80 | $c >> 6 & 0x3F);
$str. = (0x80 | $c & 0x3F);
}
return $str;
}
UTF8 Turn Unicode
static public Function utf82u ($c)
{
Switch (strlen ($c))
{
Case 1:
Return ord ($c);
Case 2:
$n = (ord ($c [0]) & 0x3f) << 6;
$n + = Ord ($c [1]) & 0x3f;
return $n;
Case 3:
$n = (ord ($c [0]) & 0x1f) << 12;
$n + = (ord ($c [1]) & 0x3f) << 6;
$n + = Ord ($c [2]) & 0x3f;
return $n;
Case 4:
$n = (ord ($c [0]) & 0x0f) << 18;
$n + = (ord ($c [1]) & 0x3f) << 12;
$n + = (ord ($c [2]) & 0x3f) << 6;
$n + = Ord ($c [3]) & 0x3f;
return $n;
}
}
BIG5 code conversion to GB code
static public Function BIG52GB ($Text)
{
if (function_exists (' Iconv '))
{
Return Iconv (' Big5 ', ' Gbk//ignore ', $Text);
}
Global $BIG 5_data;
if (Empty ($BIG 5_data))
{
$filename = "Data/big5-gb.dat";
$fp = fopen ($filename, "RB");
$BIG 5_data = fread ($fp, FileSize ($filename));
Fclose ($FP);
}
$max = strlen ($Text)-1;
for ($i =0; $i < $max; $i + +)
{
$h = Ord ($Text [$i]);
if ($h >=0x80)
{
$l = Ord ($Text [$i +1]);
if ($h ==161 && $l ==64)
{
$gbstr = "";
}
Else
{
$p = ($h -160) *510+ ($l-1) * *;
$gbstr = $BIG 5_data[$p]. $BIG 5_data[$p +1];
}
$Text [$i] = $gbstr [0];
$Text [$i +1] = $gbstr [1];
$i + +;
}
}
return $Text;
}
GB code converted into BIG5 code
static public Function Gb2big5 ($Text)
{
if (function_exists (' Iconv '))
{
Return Iconv (' GBK ', ' Big5//ignore ', $Text);
}
Global $GB _data;
if (Empty ($GB _data))
{
$filename = "Data/gb-big5.dat";
$fp = fopen ($filename, "RB");
$GB = Fread ($fp, FileSize ($filename));
Fclose ($FP);
}
$max = strlen ($Text)-1;
for ($i =0; $i < $max; $i + +)
{
$h = Ord ($Text [$i]);
if ($h >=0x80)
{
$l = Ord ($Text [$i +1]);
if ($h ==161 && $l ==64)
{
$big = "";
}
Else
{
$p = ($h -160) *510+ ($l-1) * *;
$big = $GB _data[$p]. $GB _data[$p +1];
}
$Text [$i] = $big [0];
$Text [$i +1] = $big [1];
$i + +;
}
}
return $Text;
}
Unicode URL encoding to GBK encoding function
static public Function UNICODEURL2GBK ($STR)
{
Loading a control dictionary
if (!isset ($GLOBALS [' gbkunidic ']))
{
$fp = fopen (' Data/gbk-unicode.dat ', ' RB ');
while (!feof ($FP))
{
$GLOBALS [' Gbkunidic '][bin2hex (Fread ($FP, 2))] = Fread ($fp, 2);
}
Fclose ($FP);
}
Working with strings
$str = Str_replace (' $#$ ', ' + ', $str);
$glen = strlen ($STR);
$okstr = "";
for ($i =0; $i < $glen; $i + +)
{
if ($glen-$i > 4)
{
if ($str [$i]== '% ' && $str [$i +1]== ' u ')
{
$uni = Strtolower (substr ($str, $i +2,4));
$i = $i +5;
if (Isset ($GLOBALS [' gbkunidic '] [$uni]))
{
$okstr. = $GLOBALS [' Gbkunidic '] [$uni];
}
Else
{
$okstr. = "Hexdec". (' 0x '. $uni). ";";
}
}
Else
{
$okstr. = $str [$i];
}
}
Else
{
$okstr. = $str [$i];
}
}
return $okstr;
}
}
?>
PHP transcoding functions