Win32 programming (2) Unicode

Source: Internet
Author: User
One-encoding history single-byte encoding

2.1.1 ASCII 0-127 7-bit representation
2.1.2 ASCII extended code 0-255 8-bit representation
Code Page: use the code page to switch the corresponding

Multi-byte encoding

2.1.3 dual-Byte Character Set DBCS
One or two bytes are used to represent characters.

"Country A and country B"
12 1 2
A: 0x41 medium: 0x8051
B: 0x42 countries: 0x8253

1 2 3 4 5 60x41 0x80 0x51 0x42 0x82 0x53 A in Country B

In this way, multi-byte encoding is performed for both multibyte and single-byte encoding.

Byte encoding

2.1.4 Unicode
All characters are expressed in 2 bytes.
"Country A and country B"
2 2 2 2
A: 0x0041 medium: 0x8051
B: 0x0042 countries: 0x8253
1 2 3 4 5 6 7 8
41 00 51 80 42 00 53 82

Memory, hard disk, and other resources become larger.
Supports programming.

2 C language and 1-byte characters and strings

Char ctext = 'a ';
Char * psztext = "ABCD ";

2-byte characters

Wchar_t ctext = 'A'
Wchar_t * psztext = l "ABCD ";

3 related functions

A single-character function. It must be a function of multiple bytes in width.
Strlen wcslen mbslen
Printf wprintf

4 tchar

For Unicode and multi-byte characters that can be easily supported in the program, tchar is used to define characters and strings.
Based on the _ Unicode macro switch, tchar is compiled into different character types.

#ifndef _UNICODE  typedef char TCHAR#define __T(x) x#elsetypedef wchar_t TCHAR#define __T(x)  L##x#endif

When using the tchar. h header file, use
_ Unicode macro switch for compilation

CL window. c/d_unicode
 
# DEFINE _ Unicode
# Include "tchar. H"
 
Definition method:
Tcahr * psztext = _ T ("abcdef ");


Code usage: use Unicode macro switch, notification
The compiler selects the compiled code.

#ifndef _UNICODEint nLen = strlen( pszText );#elseint nLen = wcslen( pszText );#endif

2.5 Unicode console Printing
Bool writeconsole (handle hconsoleoutput, // console output stream handle const void * lpbuffer, // output string pointer DWORD nnumberofcharstowrite, // output String Length lpdword lpnumberofcharswritten, // return the number of output characters (lpvoid lpreserved); // reserved value

Code example:

# Include "stdafx. H "# include <stdio. h> # include <stdlib. h> # include <string. h> # include <windows. h> # include <tchar. h >/// tcharvoid ASCII () {for (INT I = 0; I <255; I ++) {printf ("% C", I );} printf ("\ n"); // because it is a Chinese Character Set and the Chinese character set is dubyte, question marks will be displayed after 128} void codePage (INT ncodepage) {/* bool setconsolecp (uint wcodepageid // code page to set); 437 us 936 China */setconsoleoutputcp (ncodepage); For (INT I = 0; I <255; I ++) {printf ("% C", I);} PR INTF ("\ n");} void c_char () {char * psztext = "Hello World"; int nlen = strlen (psztext); printf ("% d, % s \ n ", nlen, psztext);} void c_wchar () {setconsoleoutputcp (936); wchar_t ctext = 'a '; // char can be directly converted to wchar_t // wchar_t * psztext = "ABCD"; the error cannot be converted from const char [5] to wchar_t * psztext = l "ABCD "; // L tells the compiler that the string is a double byte string int nlen = wcslen (psztext ); // here, 4 is the same as before, indicating that there are 4 dual-byte // printf ("% s \ n", psztext); // It is found that only a is printed because a's ACI Yes, it is 41 bytes and then 41 00 in the small-end mode memory. Therefore, when printing by single byte, only a is displayed. view the memory method debugging-window-memory wprintf (L "% d, % s \ n ", nlen, psztext); // before formatting the output, write an L // wide byte string wchar_t * pszchs = l" I Am a programmer "; nlen = wcslen (pszchs); wprintf (L "% d, % s \ n", nlen, pszchs); // 5 ???? Printf does not support Unicode, so Chinese characters cannot be printed. // multi-byte string char * PTR = "I Am a programmer"; nlen = strlen (PTR); printf ("% d, % s \ n ", nlen, PTR); // 10 I am a programmer} void tchar () {tchar * psztext = _ T (" I Am a programmer "); // _ t add L # ifndef _ unicodeint Len = strlen (psztext) before Unicode; # elseint Len = wcslen (psztext ); # endifprintf ("% d \ n", Len);} void printunicode (wchar_t * pszstr) {// how to print Unicode Windows API/* bool writeconsole (handle hconsoleoutput, // handle to a console screen bufferconst void * lpbuffer, // pointer to buffer to write from DWORD nnumberofcharstowrite, // number of characters to writelpdword lpnumberofcharswritten, // pointer to number of characters writtenlpvoid lpreserved // reserved); */handle Hout = getstdhandle (std_output_handle); // obtain the console handle int nlen = wcslen (pszstr ); writeconsole (Hout, pszstr, nlen, null, null); // print all Unicode codes out wchar_t sztext [2] = {0}; For (byte nhigh = 0; nhigh <0xff; nhigh ++) {for (byte nlow = 0; nlow <0xff; nlow ++) {sztext [0] = makeword (nlow, nhigh ); // macro makeword: writeconsole (Hout, sztext, wcslen (sztext), null, null);} */} int _ tmain (INT argc, _ tchar * argv []) {printf ("Hello world \ n"); c_char (); ASCII (); printf ("---------------------- \ n"); codePage (437 ); c_wchar (); tchar (); printunicode (L "I Am a programmer \ n"); System ("pause"); Return 0 ;}

3. Win32 program and encoding 1. Win32 API Definition

Each API has multiple-byte characters and Unicode
Different versions.
MessageBox
Messageboxa multi-byte characters
Messageboxw Unicode Character
 

2-character definition, using text, which is defined by winnt. h

# Ifdef Unicode
# DEFINE _ text (quote) L # quote
# Else/* Unicode */
# DEFINE _ text (quote) quote
# Endif/* Unicode */

Tchar * psztext = text ("ABCD ");

3-character conversion from wide byte to multi-byte

Int widechartomultibyte (
Uint codePage, // code page cp_acp ASCII code
DWORD dwflags, // conversion method 0
Lpcwstr lpwidecharstr, // The wchar address to be converted
Int cchwidechar, // The length of the wchar to be converted
Lpstr lpmultibytestr, // used to store the converted result buff
Int cchmultibyte, // buff Length
Lpstr lpdefaultchar, // the address of the default string used is null
Lpbool lpuseddefaultchar // ID 0 of the default string used
);

Multi-byte to wide byte

Int multibytetowidechar (
Uint codePage, // code page
DWORD dwflags, // Conversion Method
Lpcstr lpmultibytestr, // The Char address to be converted
Int cchmultibyte, // The length of the char to be converted
Lpwstr lpwidecharstr, // used to store the converted result buff
Int cchwidechar); // The length of the buff

Usage:
  
1. the string to be converted and passed to the function (Cchwidechar or cchmultibyte = 0) To obtain the number of elements in the converted string from the return value.
2. Allocate string Space
3. Call the function again and pass the allocated space to the function to obtain the result.

4 Examples

Void mymessagebox () {MessageBox (null, text ("hellowide"), text ("wide"), mb_ OK);} void multi2wide () {char * psztext = "multibyte"; // calculate the length of int nlen = multibytetowidechar (cp_acp, 0, psztext, strlen (psztext), null, 0 ); // after obtaining the length, allocate the space wchar_t * pwsztext = (wchar_t *) malloc (nlen + 1) * sizeof (wchar_t )); // One More wchar space to store \ 0 \ 0 memset (pwsztext, 0, (nlen + 1) * sizeof (wchar_t); // convert multibytetowidechar (cp_acp, 0, psztext, strlen (psztext), pwsztext, nlen); messageboxw (null, pwsztext, text ("multi2wide"), mb_ OK); free (pwsztext );}; void wide2multi () {wchar * pwsztext = text ("widebyte"); int nlen = widechartomultibyte (cp_acp, 0, pwsztext, wcslen (pwsztext), null, 0, null, 0); char * psztext = (char *) malloc (nlen + 1) * sizeof (char); memset (psztext, 0, (nlen + 1) * sizeof (char); widechartomultibyte (cp_acp, 0, pwsztext, wcslen (pwsztext), psztext, nlen, null, 0); messageboxa (null, psztext, "wide2multi ", mb_ OK );};

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.