This article describes the interchange between utf8 and gb2312 and provides Code. However, the code is incorrect. The following is the change:
Class cchinesecodelib
{
Public:
Static void utf_8togb2312 (string & pout, char * ptext, int Plen );
Static void gb2312toutf_8 (string & pout, char * ptext, int Plen );
// Unicode to UTF-8
Static void unicodetoutf_8 (char * pout, wchar * ptext );
// Convert gb2312 to Unicode
Static void gb2312tounicode (wchar * pout, char * gbbuffer );
// Convert Unicode to gb2312
Static void unicodetogb2312 (char * pout, wchar udata );
// Convert the UTF-8 to Unicode
Static void utf_8tounicode (wchar * pout, char * ptext );
Cchinesecodelib ();
Virtual ~ Cchinesecodelib ();
};
Void cchinesecodelib: utf_8tounicode (wchar * pout, char * ptext)
{
Char * uchar = (char *) pout;
Uchar [1] = (ptext [0] & 0x0f) <4) + (ptext [1]> 2) & 0x0f );
Uchar [0] = (ptext [1] & 0x03) <6) + (ptext [2] & 0x3f );
Return;
}
Void cchinesecodelib: unicodetogb2312 (char * pout, wchar udata)
{
Widechartomultibyte (cp_acp, null, & udata, 1, pout, sizeof (wchar), null, null );
Return;
}
Void cchinesecodelib: gb2312tounicode (wchar * pout, char * gbbuffer)
{
: Multibytetowidechar (cp_acp, mb_precomposed, gbbuffer, 2, pout, 1 );
Return;
}
Void cchinesecodelib: unicodetoutf_8 (char * pout, wchar * ptext)
{
// Pay attention to the order of wchar high and low characters. The lower byte is in the front and the higher byte is in the back
Char * pchar = (char *) ptext;
Pout [0] = (0xe0 | (pchar [1] & 0xf0)> 4 ));
Pout [1] = (0x80 | (pchar [1] & 0x0f) <2) + (pchar [0] & 0xc0)> 6 ); // there is also an error here. The author of this article has already raised it!
Pout [2] = (0x80 | (pchar [0] & 0x3f ));
Return;
}
Void cchinesecodelib: gb2312toutf_8 (string & pout, char * ptext, int Plen)
{
Char Buf [4];
Char * rst = new char [(Plen/2) * 3 + 1]; // the original code does not allocate enough space, leading to an out-of-bounds!
Memset (BUF, 0, 4 );
Memset (RST, 0, sizeof (RST ));
Int I = 0;
Int J = 0;
While (I <Plen)
{
// Directly copy data in English
If (* (ptext + I)> = 0)
{
RST [J ++] = ptext [I ++];
}
Else
{
Wchar pbuffer;
Gb2312tounicode (& pbuffer, ptext + I );
Unicodetoutf_8 (BUF, & pbuffer );
Unsigned short int TMP = 0;
TMP = rst [J] = Buf [0];
TMP = rst [J + 1] = Buf [1];
TMP = rst [J + 2] = Buf [2];
J + = 3;
I + = 2;
}
}
RST [J] = '\ 0 ';
// Return results
Pout = RST;
Delete [] RST;
Return;
}
Void cchinesecodelib: utf_8togb2312 (string & pout, char * ptext, int Plen)
{
Char * newbuf = new char [Plen + 1]; // the original code does not allocate enough space. An error occurs when all ptext is in English. Plen is the length of the ptext character // string, excluding the Terminator
Char ctemp [4];
Memset (ctemp, 0, 4 );
Int I = 0;
Int J = 0;
While (I <Plen)
{
If (ptext [I]> 0)
{
Newbuf [J ++] = ptext [I ++];
}
Else
{
Wchar wtemp;
Utf_8tounicode (& wtemp, ptext + I );
Unicodetogb2312 (ctemp, wtemp );
Newbuf [J] = ctemp [0];
Newbuf [J + 1] = ctemp [1];
I + = 3;
J + = 2;
}
}
Newbuf [J] = '\ 0 ';
Pout = newbuf;
Delete [] newbuf;
Return;
}
Karlson, 13:42:35
Practice
-Data <gitem. m_gmessage; // text for gossip item
+ // Karlson solves vs2005 Chinese problems>
+ STD: String STR = gitem. m_gmessage;
+ If (STR [0] = '') {// determines whether to convert the string based on whether the first character is a space.
+ Char * CHR = (char *) Str. c_str ();
+ Cchinesecode: gb2312toutf_8 (STR, CHR, strlen (CHR ));
+}
+ Data <STR;
+ // <Karlson
+ // Data <gitem. m_gmessage;Special knowledge about character sets UTF-8 gb2312 UnicodeKarlson, 13:39:39