In the UTF-8, and Unicode conversion, the binary operation, instead of the string conversion. UTF-8 a Chinese character, with 3 bytes, and Unicode with 2 bytes; the relationship is as follows:
UTF-8 code:
[, A5, A6, A7, A8], [, B3, B4, B5, B6, B7, B8],
[1, 0, C3, C4, C5, C6, C7, C8];
Corresponding unicode encoding:
[A5, A6, A7, A8, B3, B4, B5, B6],
[B7, B8, C3, C4, C5, C6, C7, C8]
Therefore, we only need to perform a bit operation to achieve the goal. For example:
// Convert the UTF-8 to Unicode
Void cchinesecodelib: utf_8tounicode (wchar * pout, char * ptext)
{
Char * uchar = (char *) pout;
Uchar [1] = (ptext [0] & 0x0f) <4) + (ptext [1]> 2) & 0x0f );
Uchar [0] = (ptext [1] & 0x03) <6) + (ptext [2] & 0x3f );
Return;
}
// Unicode to UTF-8
Void cchinesecodelib: unicodetoutf_8 (char * pout, wchar * ptext)
{
// Pay attention to the order of wchar high and low characters. The lower byte is in the front and the higher byte is in the back
Char * pchar = (char *) ptext;
Pout [0] = (0xe0 | (pchar [1] & 0xf0)> 4 ));
Pout [1] = (0x80 | (pchar [1] & 0x0f) <2) + (pchar [0] & 0xc0)> 6 );
Pout [2] = (0x80 | (pchar [0] & 0x3f ));
Return;
}
// Convert Unicode to gb2312
Void cchinesecodelib: unicodetogb2312 (char * pout, unsigned short udata)
{
Widechartomultibyte (cp_acp, null, & udata, 1, pout, sizeof (wchar), null, null );
Return;
}
// Convert gb2312 to Unicode
Void cchinesecodelib: gb2312tounicode (wchar * pout, char * gbbuffer)
{
: Multibytetowidechar (cp_acp, mb_precomposed, gbbuffer, 2, pout, 1 );
Return;
}
// Gb2312 into UTF-8
Void cchinesecodelib: gb2312toutf_8 (string & pout, char * ptext, int Plen)
{
Char Buf [4];
Char * rst = new char [Plen + (Plen> 2) + 2];
Memset (BUF, 0, 4 );
Memset (RST, 0, Plen + (Plen> 2) + 2 );
Int I = 0;
Int J = 0;
While (I <Plen)
{
// Directly copy data in English
If (* (ptext + I)> = 0)
{
RST [J ++] = ptext [I ++];
}
Else
{
Wchar pbuffer;
Gb2312tounicode (& pbuffer, ptext + I );
Unicodetoutf_8 (BUF, & pbuffer );
Unsigned short int TMP = 0;
TMP = rst [J] = Buf [0];
TMP = rst [J + 1] = Buf [1];
TMP = rst [J + 2] = Buf [2];
J + = 3;
I + = 2;
}
}
RST [J] = ''/0 '';
// Return results
Pout = RST;
Delete [] RST;
Return;
}
// Convert UTF-8 to gb2312
Void cchinesecodelib: utf_8togb2312 (string & pout, char * ptext, int Plen)
{
Char * newbuf = new char [Plen];
Char ctemp [4];
Memset (ctemp, 0, 4 );
Int I = 0;
Int J = 0;
While (I <Plen)
{
If (ptext [I]> 0)
{
Newbuf [J ++] = ptext [I ++];
}
Else
{
Wchar wtemp;
Utf_8tounicode (& wtemp, ptext + I );
Unicodetogb2312 (ctemp, wtemp );
Newbuf [J] = ctemp [0];
Newbuf [J + 1] = ctemp [1];
I + = 3;
J + = 2;
}
}
Newbuf [J] = ''/0 '';
Pout = newbuf;
Delete [] newbuf;
Return;
}