Conversion between Unicode, UTF8, GB2312, UCS2, and GBK
Conversion of several encoding formats. Not many are used at ordinary times. However, we often encounter text message protocols. During this time, I made an SMS platform interface and summarized a few, which is not very complete.
//////////////////////////////////////// //////////////////////////////////////
// Format Chinese characters to % HH
Int URLEncode (LPCTSTR pInBuf, LPTSTR szOut)
{
LPBYTE pInTmp, pOutTmp;
PInTmp = (LPBYTE) pInBuf;
POutTmp = (LPBYTE) szOut;
While (* pInTmp ){
If (isalnum (* pInTmp )){
* POutTmp ++ = * pInTmp;
} Else {
If (isspace (* pInTmp )){
* POutTmp ++ = '+ ';
} Else {
* POutTmp ++ = '% ';
* POutTmp ++ = toHex (* pInTmp> 4 );
* POutTmp ++ = toHex (* pInTmp & 0xF );
}
}
PInTmp ++;
}
* POutTmp = '/0 ';
Return (int) (pOutTmp-(LPBYTE) szOut );
}
//////////////////////////////////////// ///////////////////////////
// Unicode Character converted to UTF-8 Encoding
LPCTSTR UnicodeToUTF8Char (LPTSTR pOut, WCHAR wcText)
{
// Pay attention to the order of WCHAR high and low characters. The lower byte is in the front and the higher byte is in the back
LPTSTR pchar = (LPTSTR) & wcText;
POut [0] = (0xE0 | (pchar [1] & 0xF0)> 4 ));
POut [1] = (0x80 | (pchar [1] & 0x0F) <2) + (pchar [0] & 0xC0)> 6 );
POut [2] = (0x80 | (pchar [0] & 0x3F ));
POut [3] = '/0 ';
Return pOut;
}
// Convert GB2312 string to UTF-8 Encoding
LPCTSTR GB2312ToUTF8 (LPTSTR pUTF8Out, LPCTSTR pGB2312Input, int GB2312Len)
{
CHAR buf [4];
LPCTSTR lpReturn, pGB2312Cursor, pGB2312InputEnd;
WCHAR wcBuffer;
LpReturn = (LPCTSTR) pUTF8Out;
PGB2312Cursor = (LPTSTR) pGB2312Input;
PGB2312InputEnd = pGB2312Cursor + GB2312Len;
While (pGB2312Cursor <pGB2312InputEnd ){
// You can copy the data directly if anⅱ is used.
If (* pGB2312Cursor> 0 ){
* PUTF8Out ++ = * pGB2312Cursor ++;
} Else {
: MultiByteToWideChar (CP_ACP, MB_PRECOMPOSED, pGB2312Cursor, 2, & wcBuffer, 1 );
Memcpy (pUTF8Out, UnicodeToUTF8Char (buf, wcBuffer), 3 );
PGB2312Cursor + = 2;
PUTF8Out + = 3;
}
}
* PUTF8Out = '/0 ';
Return lpReturn;
}
Int UTF8ToGB (const char * str, char * out)
{
WCHAR * strSrc;
TCHAR * szRes;
Int len;
// Obtain the size of the Temporary Variable
Int I = MultiByteToWideChar (CP_UTF8, 0, str,-1, NULL, 0 );
StrSrc = new WCHAR [I + 1];
MultiByteToWideChar (CP_UTF8, 0, str,-1, strSrc, I );
// Obtain the size of the Temporary Variable
I = WideCharToMultiByte (CP_ACP, 0, strSrc,-1, NULL, 0, NULL, NULL );
SzRes = new TCHAR [I + 1];
WideCharToMultiByte (CP_ACP, 0, strSrc,-1, szRes, I, NULL, NULL );
Len = (I + 1) * sizeof (CHAR );
Memcpy (out, szRes, len );
Out [len + 1] = '/0 ';
Delete [] strSrc;
Delete [] szRes;
Return len;
}
// Convert the GB2312 string to GB2312 webpage Encoding
LPCTSTR GB2312ToWebGB2312 (LPTSTR pWebGB2312Out, LPCTSTR pGB2312Input, int GB2312Len)
{
LPCTSTR lpReturn, pGB2312Cursor, pGB2312InputEnd;
WCHAR wcBuffer;
LpReturn = (LPCTSTR) pWebGB2312Out;
PGB2312Cursor = (LPTSTR) pGB2312Input;
PGB2312InputEnd = pGB2312Cursor + GB2312Len;
While (pGB2312Cursor <pGB2312InputEnd ){
// You can copy the data directly if anⅱ is used.
If (* pGB2312Cursor> 0 ){
* PWebGB2312Out ++ = * pGB2312Cursor ++;
} Else {
: MultiByteToWideChar (CP_ACP, MB_PRECOMPOSED, pGB2312Cursor, 2, & wcBuffer, 1 );
PWebGB2312Out + = sprintf (pWebGB2312Out, "& # % d;", wcBuffer );
PGB2312Cursor + = 2;
}
}
* PWebGB2312Out = '/0 ';
Return lpReturn;
}
Void UCS2toUTF8 (unsigned short * ucs2, int count, char * utf8)
{
Unsigned short unicode;
Unsigned char bytes [4] = {0 };
Int nbytes = 0;
Int I = 0, j = 0;
Int len = 0;
If (ucs2! = NULL) & (utf8! = NULL ))
{
If (count = 0)
{
Len = 0;
}
Else
{
For (I = 0; I <count; I ++)
{
Unicode = ucs2 [I];
If (unicode <0x80)
{
Nbytes = 1;
Bytes [0] = unicode;
}
Else if (unicode <0x800)
{
Nbytes = 2;
Bytes [1] = (unicode & 0x3f) | 0x80;
Bytes [0] = (unicode <2) & 0x1f00 | 0xc000)> 8;
}
Else
{
Nbytes = 3;
Bytes [2] = (unicode & 0x3f) | 0x80;
Bytes [1] = (unicode <2) & 0x3f00 | 0x8000)> 8;
Bytes [0] = (unicode <4) & 0x0f0000 | 0xe00000)> 16;
}
For (j = 0; j <nbytes; j ++)
{
Utf8 [len] = bytes [j];
Len ++;
}
}
}
Utf8 [len] = '/0 ';
}
}
//////////////////////////////////////// ///////////////////////////
// UCS2 converts UTF8
//////////////////////////////////////// ///////////////////////////
//////////////////////////////////////// ///////////////////////////
// Convert UTF8 to GB
//////////////////////////////////////// ///////////////////////////