Conversion between Unicode, UTF8, GB2312, UCS2, and GBK

Source: Internet
Author: User

Conversion between Unicode, UTF8, GB2312, UCS2, and GBK

Conversion of several encoding formats. Not many are used at ordinary times. However, we often encounter text message protocols. During this time, I made an SMS platform interface and summarized a few, which is not very complete.

 

//////////////////////////////////////// //////////////////////////////////////

// Format Chinese characters to % HH
Int URLEncode (LPCTSTR pInBuf, LPTSTR szOut)
{
LPBYTE pInTmp, pOutTmp;
PInTmp = (LPBYTE) pInBuf;
POutTmp = (LPBYTE) szOut;
While (* pInTmp ){
If (isalnum (* pInTmp )){
* POutTmp ++ = * pInTmp;
} Else {

If (isspace (* pInTmp )){
* POutTmp ++ = '+ ';
} Else {
* POutTmp ++ = '% ';
* POutTmp ++ = toHex (* pInTmp> 4 );
* POutTmp ++ = toHex (* pInTmp & 0xF );
}
}
PInTmp ++;
}
* POutTmp = '/0 ';
Return (int) (pOutTmp-(LPBYTE) szOut );
}

 

 

//////////////////////////////////////// ///////////////////////////

// Unicode Character converted to UTF-8 Encoding
LPCTSTR UnicodeToUTF8Char (LPTSTR pOut, WCHAR wcText)
{
// Pay attention to the order of WCHAR high and low characters. The lower byte is in the front and the higher byte is in the back
LPTSTR pchar = (LPTSTR) & wcText;
POut [0] = (0xE0 | (pchar [1] & 0xF0)> 4 ));
POut [1] = (0x80 | (pchar [1] & 0x0F) <2) + (pchar [0] & 0xC0)> 6 );
POut [2] = (0x80 | (pchar [0] & 0x3F ));
POut [3] = '/0 ';
Return pOut;
}

 

 

 

// Convert GB2312 string to UTF-8 Encoding
LPCTSTR GB2312ToUTF8 (LPTSTR pUTF8Out, LPCTSTR pGB2312Input, int GB2312Len)
{
CHAR buf [4];
LPCTSTR lpReturn, pGB2312Cursor, pGB2312InputEnd;
WCHAR wcBuffer;
 
LpReturn = (LPCTSTR) pUTF8Out;
PGB2312Cursor = (LPTSTR) pGB2312Input;
PGB2312InputEnd = pGB2312Cursor + GB2312Len;
 
While (pGB2312Cursor <pGB2312InputEnd ){
// You can copy the data directly if anⅱ is used.
If (* pGB2312Cursor> 0 ){
* PUTF8Out ++ = * pGB2312Cursor ++;
} Else {
: MultiByteToWideChar (CP_ACP, MB_PRECOMPOSED, pGB2312Cursor, 2, & wcBuffer, 1 );
Memcpy (pUTF8Out, UnicodeToUTF8Char (buf, wcBuffer), 3 );
PGB2312Cursor + = 2;
PUTF8Out + = 3;
}
}
* PUTF8Out = '/0 ';
Return lpReturn;
}

 

 

 

 

Int UTF8ToGB (const char * str, char * out)
{
WCHAR * strSrc;
TCHAR * szRes;
Int len;
 
// Obtain the size of the Temporary Variable
Int I = MultiByteToWideChar (CP_UTF8, 0, str,-1, NULL, 0 );
StrSrc = new WCHAR [I + 1];
MultiByteToWideChar (CP_UTF8, 0, str,-1, strSrc, I );
 
// Obtain the size of the Temporary Variable
I = WideCharToMultiByte (CP_ACP, 0, strSrc,-1, NULL, 0, NULL, NULL );
SzRes = new TCHAR [I + 1];
WideCharToMultiByte (CP_ACP, 0, strSrc,-1, szRes, I, NULL, NULL );
 
Len = (I + 1) * sizeof (CHAR );
Memcpy (out, szRes, len );
Out [len + 1] = '/0 ';
 
Delete [] strSrc;
Delete [] szRes;
 
Return len;
}

 

 

 

// Convert the GB2312 string to GB2312 webpage Encoding
LPCTSTR GB2312ToWebGB2312 (LPTSTR pWebGB2312Out, LPCTSTR pGB2312Input, int GB2312Len)
{
 
LPCTSTR lpReturn, pGB2312Cursor, pGB2312InputEnd;
WCHAR wcBuffer;
 
LpReturn = (LPCTSTR) pWebGB2312Out;
PGB2312Cursor = (LPTSTR) pGB2312Input;
PGB2312InputEnd = pGB2312Cursor + GB2312Len;
 
While (pGB2312Cursor <pGB2312InputEnd ){
// You can copy the data directly if anⅱ is used.
If (* pGB2312Cursor> 0 ){
* PWebGB2312Out ++ = * pGB2312Cursor ++;
} Else {
: MultiByteToWideChar (CP_ACP, MB_PRECOMPOSED, pGB2312Cursor, 2, & wcBuffer, 1 );
PWebGB2312Out + = sprintf (pWebGB2312Out, "& # % d;", wcBuffer );
PGB2312Cursor + = 2;
}
}
* PWebGB2312Out = '/0 ';
Return lpReturn;
}

 

 

 

Void UCS2toUTF8 (unsigned short * ucs2, int count, char * utf8)
{
Unsigned short unicode;
Unsigned char bytes [4] = {0 };
Int nbytes = 0;
Int I = 0, j = 0;
Int len = 0;
 
If (ucs2! = NULL) & (utf8! = NULL ))
{
If (count = 0)
{
Len = 0;
}
Else
{
For (I = 0; I <count; I ++)
{
Unicode = ucs2 [I];

If (unicode <0x80)
{
Nbytes = 1;
Bytes [0] = unicode;
}
Else if (unicode <0x800)
{
Nbytes = 2;
Bytes [1] = (unicode & 0x3f) | 0x80;
Bytes [0] = (unicode <2) & 0x1f00 | 0xc000)> 8;
}
Else
{
Nbytes = 3;
Bytes [2] = (unicode & 0x3f) | 0x80;
Bytes [1] = (unicode <2) & 0x3f00 | 0x8000)> 8;
Bytes [0] = (unicode <4) & 0x0f0000 | 0xe00000)> 16;
}

For (j = 0; j <nbytes; j ++)
{
Utf8 [len] = bytes [j];
Len ++;
}
}
}

Utf8 [len] = '/0 ';
}
}

 

 

//////////////////////////////////////// ///////////////////////////

// UCS2 converts UTF8

//////////////////////////////////////// ///////////////////////////

//////////////////////////////////////// ///////////////////////////

// Convert UTF8 to GB

//////////////////////////////////////// ///////////////////////////

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.