Special knowledge about character sets UTF-8 gb2312 Unicode

Last Update:2018-12-03 Source: Internet

Author: User

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

This article describes the interchange between utf8 and gb2312 and provides Code. However, the code is incorrect. The following is the change:

Class cchinesecodelib
{
Public:
Static void utf_8togb2312 (string & pout, char * ptext, int Plen );
Static void gb2312toutf_8 (string & pout, char * ptext, int Plen );
// Unicode to UTF-8
Static void unicodetoutf_8 (char * pout, wchar * ptext );
// Convert gb2312 to Unicode
Static void gb2312tounicode (wchar * pout, char * gbbuffer );
// Convert Unicode to gb2312
Static void unicodetogb2312 (char * pout, wchar udata );
// Convert the UTF-8 to Unicode
Static void utf_8tounicode (wchar * pout, char * ptext );

Cchinesecodelib ();
Virtual ~ Cchinesecodelib ();
};

Void cchinesecodelib: utf_8tounicode (wchar * pout, char * ptext)
{
Char * uchar = (char *) pout;

Uchar [1] = (ptext [0] & 0x0f) <4) + (ptext [1]> 2) & 0x0f );
Uchar [0] = (ptext [1] & 0x03) <6) + (ptext [2] & 0x3f );

Return;
}

Void cchinesecodelib: unicodetogb2312 (char * pout, wchar udata)
{
Widechartomultibyte (cp_acp, null, & udata, 1, pout, sizeof (wchar), null, null );
Return;
}

Void cchinesecodelib: gb2312tounicode (wchar * pout, char * gbbuffer)
{
: Multibytetowidechar (cp_acp, mb_precomposed, gbbuffer, 2, pout, 1 );
Return;
}

Void cchinesecodelib: unicodetoutf_8 (char * pout, wchar * ptext)
{
// Pay attention to the order of wchar high and low characters. The lower byte is in the front and the higher byte is in the back
Char * pchar = (char *) ptext;

Pout [0] = (0xe0 | (pchar [1] & 0xf0)> 4 ));
Pout [1] = (0x80 | (pchar [1] & 0x0f) <2) + (pchar [0] & 0xc0)> 6 ); // there is also an error here. The author of this article has already raised it!
Pout [2] = (0x80 | (pchar [0] & 0x3f ));

Return;
}

Void cchinesecodelib: gb2312toutf_8 (string & pout, char * ptext, int Plen)
{
Char Buf [4];
Char * rst = new char [(Plen/2) * 3 + 1]; // the original code does not allocate enough space, leading to an out-of-bounds!

Memset (BUF, 0, 4 );
Memset (RST, 0, sizeof (RST ));

Int I = 0;
Int J = 0;
While (I <Plen)
{
// Directly copy data in English
If (* (ptext + I)> = 0)
{
RST [J ++] = ptext [I ++];
}
Else
{
Wchar pbuffer;
Gb2312tounicode (& pbuffer, ptext + I );

Unicodetoutf_8 (BUF, & pbuffer );

Unsigned short int TMP = 0;
TMP = rst [J] = Buf [0];
TMP = rst [J + 1] = Buf [1];
TMP = rst [J + 2] = Buf [2];

J + = 3;
I + = 2;
}
}
RST [J] = '\ 0 ';

// Return results
Pout = RST;
Delete [] RST;

Return;
}

Void cchinesecodelib: utf_8togb2312 (string & pout, char * ptext, int Plen)
{
Char * newbuf = new char [Plen + 1]; // the original code does not allocate enough space. An error occurs when all ptext is in English. Plen is the length of the ptext character // string, excluding the Terminator
Char ctemp [4];
Memset (ctemp, 0, 4 );

Int I = 0;
Int J = 0;

While (I <Plen)
{
If (ptext [I]> 0)
{
Newbuf [J ++] = ptext [I ++];
}
Else
{
Wchar wtemp;
Utf_8tounicode (& wtemp, ptext + I );

Unicodetogb2312 (ctemp, wtemp );

Newbuf [J] = ctemp [0];
Newbuf [J + 1] = ctemp [1];

I + = 3;
J + = 2;
}
}

Newbuf [J] = '\ 0 ';

Pout = newbuf;
Delete [] newbuf;

Return;
}

Karlson, 13:42:35

Practice
-Data <gitem. m_gmessage; // text for gossip item
+ // Karlson solves vs2005 Chinese problems>
+ STD: String STR = gitem. m_gmessage;
+ If (STR [0] = '') {// determines whether to convert the string based on whether the first character is a space.
+ Char * CHR = (char *) Str. c_str ();
+ Cchinesecode: gb2312toutf_8 (STR, CHR, strlen (CHR ));
+}
+ Data <STR;
+ // <Karlson
+ // Data <gitem. m_gmessage;Special knowledge about character sets UTF-8 gb2312 UnicodeKarlson, 13:39:39

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More

Special knowledge about character sets UTF-8 gb2312 Unicode

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support

Special knowledge about character sets UTF-8 gb2312 Unicode

Contact Us

What's Trending

Top 10 Tags

Top 10 Keywords

Trending Topic

A Free Trial That Lets You Build Big!

Sales Support

After-Sales Support