Special knowledge about character sets UTF-8 gb2312 Unicode

Source: Internet
Author: User

This article describes the interchange between utf8 and gb2312 and provides Code. However, the code is incorrect. The following is the change:

Class cchinesecodelib
{
Public:
Static void utf_8togb2312 (string & pout, char * ptext, int Plen );
Static void gb2312toutf_8 (string & pout, char * ptext, int Plen );
// Unicode to UTF-8
Static void unicodetoutf_8 (char * pout, wchar * ptext );
// Convert gb2312 to Unicode
Static void gb2312tounicode (wchar * pout, char * gbbuffer );
// Convert Unicode to gb2312
Static void unicodetogb2312 (char * pout, wchar udata );
// Convert the UTF-8 to Unicode
Static void utf_8tounicode (wchar * pout, char * ptext );

Cchinesecodelib ();
Virtual ~ Cchinesecodelib ();
};

Void cchinesecodelib: utf_8tounicode (wchar * pout, char * ptext)
{
Char * uchar = (char *) pout;

Uchar [1] = (ptext [0] & 0x0f) <4) + (ptext [1]> 2) & 0x0f );
Uchar [0] = (ptext [1] & 0x03) <6) + (ptext [2] & 0x3f );

Return;
}

Void cchinesecodelib: unicodetogb2312 (char * pout, wchar udata)
{
Widechartomultibyte (cp_acp, null, & udata, 1, pout, sizeof (wchar), null, null );
Return;
}

Void cchinesecodelib: gb2312tounicode (wchar * pout, char * gbbuffer)
{
: Multibytetowidechar (cp_acp, mb_precomposed, gbbuffer, 2, pout, 1 );
Return;
}

Void cchinesecodelib: unicodetoutf_8 (char * pout, wchar * ptext)
{
// Pay attention to the order of wchar high and low characters. The lower byte is in the front and the higher byte is in the back
Char * pchar = (char *) ptext;

Pout [0] = (0xe0 | (pchar [1] & 0xf0)> 4 ));
Pout [1] = (0x80 | (pchar [1] & 0x0f) <2) + (pchar [0] & 0xc0)> 6 ); // there is also an error here. The author of this article has already raised it!
Pout [2] = (0x80 | (pchar [0] & 0x3f ));

Return;
}

Void cchinesecodelib: gb2312toutf_8 (string & pout, char * ptext, int Plen)
{
Char Buf [4];
Char * rst = new char [(Plen/2) * 3 + 1]; // the original code does not allocate enough space, leading to an out-of-bounds!

Memset (BUF, 0, 4 );
Memset (RST, 0, sizeof (RST ));

Int I = 0;
Int J = 0;
While (I <Plen)
{
// Directly copy data in English
If (* (ptext + I)> = 0)
{
RST [J ++] = ptext [I ++];
}
Else
{
Wchar pbuffer;
Gb2312tounicode (& pbuffer, ptext + I );

Unicodetoutf_8 (BUF, & pbuffer );

Unsigned short int TMP = 0;
TMP = rst [J] = Buf [0];
TMP = rst [J + 1] = Buf [1];
TMP = rst [J + 2] = Buf [2];


J + = 3;
I + = 2;
}
}
RST [J] = '\ 0 ';

// Return results
Pout = RST;
Delete [] RST;

Return;
}

Void cchinesecodelib: utf_8togb2312 (string & pout, char * ptext, int Plen)
{
Char * newbuf = new char [Plen + 1]; // the original code does not allocate enough space. An error occurs when all ptext is in English. Plen is the length of the ptext character // string, excluding the Terminator
Char ctemp [4];
Memset (ctemp, 0, 4 );

Int I = 0;
Int J = 0;

While (I <Plen)
{
If (ptext [I]> 0)
{
Newbuf [J ++] = ptext [I ++];
}
Else
{
Wchar wtemp;
Utf_8tounicode (& wtemp, ptext + I );

Unicodetogb2312 (ctemp, wtemp );

Newbuf [J] = ctemp [0];
Newbuf [J + 1] = ctemp [1];

I + = 3;
J + = 2;
}
}

Newbuf [J] = '\ 0 ';

Pout = newbuf;
Delete [] newbuf;

Return;
}

 

Karlson, 13:42:35

Practice
-Data <gitem. m_gmessage; // text for gossip item
+ // Karlson solves vs2005 Chinese problems>
+ STD: String STR = gitem. m_gmessage;
+ If (STR [0] = '') {// determines whether to convert the string based on whether the first character is a space.
+ Char * CHR = (char *) Str. c_str ();
+ Cchinesecode: gb2312toutf_8 (STR, CHR, strlen (CHR ));
+}
+ Data <STR;
+ // <Karlson
+ // Data <gitem. m_gmessage;Special knowledge about character sets UTF-8 gb2312 UnicodeKarlson, 13:39:39

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.