Two functions are used: multibytetowidechar and widechartomultibyte. Using these two APIs to convert any two types of character encoding is very simple: converting the source string to Unicode using multibytetowidechar, then, use widechartomultibyte to convert the converted unicode encoding to the specified character encoding. The following is a demo program:
- # Include <windows. h>
- # Include <tchar. h>
- # Include <stdio. h>
- # Include <locale. h>
- //////////////////////////////////////// ////////////////////////////
- Void hexdump (const void * ADDR, int bytes ){
- Int lines = Bytes/16, I = 0;
- Int J;
- Const unsigned char * pmem = (const unsigned char *) ADDR;
- Setlocale (lc_all, ". ACP ");
- While (I <lines ){
- For (j = 0; j <16; j ++)
- Printf ("%-4x", pmem [I * 16 + J]);
- Printf ("/N ");
- For (j = 0; j <16; j ++ ){
- Char CH = pmem [I * 16 + J];
- Printf ("%-4C", isprint (CH )? Ch :'.');
- }
- Printf ("/N ");
- I ++;
- }
- I = lines * 16;
- For (j = I; j <bytes; j ++)
- Printf ("%-4x", pMem [j]);
- Printf ("/n ");
- For (j = I; j <bytes; j ++ ){
- Char ch = pMem [j];
- Printf ("%-4c", isprint (ch )? Ch :'.');
- }
- Printf ("/n ");
- }
- //////////////////////////////////////// ///////////////////////
- Bool callback EnumCodePagesProc (
- LPTSTR lpCodePageString // code page identifier string
- ){
- UINT uCodePage = atoi (lpCodePageString );
- Printf ("uCodePage: % d/n", uCodePage );
- CPINFOEX cpinfo;
- GetCPInfoEx (uCodePage, 0, & cpinfo );
- Printf ("MaxCharSize: % d/n"
- "CodePage: % d/n"
- "CodePageName: % s/n ",
- Cpinfo. MaxCharSize,
- Cpinfo. CodePage,
- Cpinfo. CodePageName );
- Printf ("DefaultChar :");
- For (int I = 0; I <MAX_DEFAULTCHAR; I ++)
- Printf ("% 3x", cpinfo. DefaultChar [I]);
- Printf ("/nLeadByte :");
- For (I = 0; I <MAX_LEADBYTES; I ++)
- Printf ("% 3x", cpinfo. LeadByte [I]);
- Printf ("/nUnicodeDefaultChar :");
- Wprintf (L "% x", cpinfo. unicodedefachar char );
- Puts ("/n ");
- Return TRUE;
- }
- //////////////////////////////////////// ////////////////////////
- Int main (){
- Char szTest [] = "this ² â zookeeper test ";
- Printf ("local settings: % s/n", setlocale (LC_CTYPE ,""));
- Puts ("original string ");
- Hexdump (szTest, sizeof (szTest ));
- Puts ("To Unicode ");
- Wchar_t out [MAX_PATH] = {0 };
- MultiByteToWideChar (
- CP_ACP,
- 0,
- SzTest,
- Sizeof (szTest ),
- Out,
- MAX_PATH );
- Hexdump (out, wcslen (out) * sizeof (wchar_t ));
- Puts ("To UTF-8 ");
- Char utf8 [max_path] = {0 };
- Widechartomultibyte (
- // 54936, // GB18030--54936 cp_utf8,
- // 950, // big5
- 65001, // cp_utf8, // 65001, UTF-7: 65000
- 0,
- Out,
- Wcslen (out ),
- Utf8,
- Max_path,
- Null,
- Null
- );
- Hexdump (utf8, lstrlen (utf8 ));
- // EnumSystemCodePages (EnumCodePagesProc, CP_INSTALLED );
- Return 0;
- }
The output result is as follows:
Local Settings: chinese_people 'S republic of china.936
Original string
74 68 69 73 B2 E2 ca D4 74 65 73 74 0
T h I s... t e S t.
To Unicode
74 0 68 0 69 0 73 0 4B 6D D5 8B 74 0 65 0
T. h. I. s. K m. t. e.
73 0 74 0
S. t.
To UTF-8
74 68 69 73 e6 b5 8b e8 af 95 74 65 73 74
T h I s... t e s t
We can see that the two Chinese characters "test" are in GBK encoding (the code page is 936) in hexadecimal format e2 b2 ca d4. In UNICODE, they are in hexadecimal format: 4b 6d d5 8b, in UTF8, is hexadecimal: e6 b5 8b e8 af 95.