Okay? $ Text e-book library; preg_match_all ([x80-xff]?., $ Text, $ ar); foreach (encoding conversion between gb2312 and unicode
The following example converts gb2312 to "full ".
The iconv function after php4.3.1 is very useful, but you only need to write a conversion function from uft8 to unicode.
Check the table (gb2312.txt ).
$ Text = "e-book library ";
Preg_match_all ("/[\ x80-\ xff]?. /", $ Text, $ ar );
Foreach ($ ar [0] as $ v)
Echo "& #". utf8_unicode (iconv ("GB2312", "UTF-8", $ v )).";";
?>
// Utf8-> unicode
Function utf8_unicode ($ c ){
Switch (strlen ($ c )){
Case 1:
Return ord ($ c );
Case 2:
$ N = (ord ($ c [0]) & 0x3f) <6;
$ N + = ord ($ c [1]) & 0x3f;
Return $ n;
Case 3:
$ N = (ord ($ c [0]) & 0x1f) <12;
$ N + = (ord ($ c [1]) & 0x3f) <6;
$ N + = ord ($ c [2]) & 0x3f;
Return $ n;
Case 4:
$ N = (ord ($ c [0]) & 0x0f) <18;
$ N + = (ord ($ c [1]) & 0x3f) <12;
$ N + = (ord ($ c [2]) & 0x3f) <6;
$ N + = ord ($ c [3]) & 0x3f;
Return $ n;
}
}
?>
The following example uses php to convert the "full" encoding to gb2312.
$ Str = "TTL auto-focus around the clock ";
$ Str = preg_replace ("| & # ([0-9] {1, 5}); | ","\". u2utf82gb (\ 1 ). \ "", $ str );
$ Str = "\ $ str = \" $ str \";";
Eval ($ str );
Echo $ str;
Function u2utf82gb ($ c ){
$ Str = "";
If ($ c <0x80 ){
$ Str. = $ c;
} Else if ($ c <0x800 ){
$ Str. = chr (0xC0 | $ c> 6 );
$ Str. = chr (0x80 | $ c & 0x3F );
} Else if ($ c <0x10000 ){
$ Str. = chr (0xE0 | $ c> 12 );
$ Str. = chr (0x80 | $ c> 6 & 0x3F );
$ Str. = chr (0x80 | $ c & 0x3F );
} Else if ($ c <0x200000 ){
$ Str. = chr (0xF0 | $ c> 18 );
$ Str. = chr (0x80 | $ c> 12 & 0x3F );
$ Str. = chr (0x80 | $ c> 6 & 0x3F );
$ Str. = chr (0x80 | $ c & 0x3F );
}
Return iconv ('utf-8', 'gb2312 ', $ str );
}
?>
Or
Function unescape ($ str ){
$ Str = rawurldecode ($ str );
Preg_match_all ("/(? : % U. {4}) | & # x. {4}; | & # \ d +; |. +/U ", $ str, $ r );
$ Ar = $ r [0];
Print_r ($ ar );
Foreach ($ ar as $ k => $ v ){
If (substr ($ v, 0, 2) = "% u ")
$ Ar [$ k] = iconv ("UCS-2", "GB2312", pack ("H4", substr ($ v,-4 )));
Elseif (substr ($ v, 0, 3) = "& # x ")
$ Ar [$ k] = iconv ("UCS-2", "GB2312", pack ("H4", substr ($ v, 3,-1 )));
Elseif (substr ($ v, 0, 2) = "&#"){
Echo substr ($ v, 2,-1 )."
";
$ Ar [$ k] = iconv ("UCS-2", "GB2312", pack ("n", substr ($ v, 2,-1 )));
}
}
Return join ("", $ ar );
}
$ Str = "TTL auto-focus around the clock ";
Echo unescape ($ str); // out TTL automatically focus around the clock
Use javascript for conversion