A UTF-8 encoded Unicode function implemented by php
Function Utf8ToUnicode (strUtf8)
- {
- Var bstr = "";
- Var nTotalChars = strUtf8.length; // total chars to be processed.
- Var nOffset = 0; // processing point on strUtf8
- Var nRemainingBytes = nTotalChars; // how many bytes left to be converted
- Var nOutputPosition = 0;
- Var iCode, iCode1, iCode2; // the value of the unicode.
While (nOffset <nTotalChars)
- {
- ICode = strUtf8.charCodeAt (nOffset );
- If (iCode & 0x80) = 0) // 1 byte.
- {
- If (nRemainingBytes <1) // not enough data
- Break;
Bstr + = String. fromCharCode (iCode & 0 × 7F );
- NOffset ++;
- NRemainingBytes-= 1;
- }
- Else if (iCode & 0xE0) = 0xC0) // 2 bytes
- {
- ICode1 = strUtf8.charCodeAt (nOffset + 1 );
- If (nRemainingBytes <2 | // not enough data
- (ICode1 & 0xC0 )! = 0 × 80) // invalid pattern
- {
- Break;
- }
Bstr + = String. fromCharCode (iCode & 0 × 3F) <6) | (iCode1 & 0 × 3F ));
- NOffset + = 2;
- NRemainingBytes-= 2;
- }
- Else if (iCode & 0xF0) = 0xE0) // 3 bytes
- {
- ICode1 = strUtf8.charCodeAt (nOffset + 1 );
- ICode2 = strUtf8.charCodeAt (nOffset + 2 );
- If (nRemainingBytes <3 | // not enough data
- (ICode1 & 0xC0 )! = 0 × 80 | // invalid pattern
- (ICode2 & 0xC0 )! = 0 × 80)
- {
- Break;
- }
Bstr + = String. fromCharCode (iCode & 0 × 0F) <12) |
- (ICode1 & 0 × 3F) <6) |
- (ICode2 & 0 × 3F ));
- NOffset + = 3;
- NRemainingBytes-= 3;
- }
- Else // 4 or more bytes-unsupported
- Break;
- }
If (nRemainingBytes! = 0)
- {
- // Bad UTF8 string.
- Return "";
- }
Return bstr;
- }
- ?>
|