1. Relationships between tchar, Unicode, Char, and wchar_t
It is often found that some people love to use standard ANSI functions such as strcpy, and some love to use the _ txxxx function. This problem has been very confusing. To ensure unification, it is necessary to clarify the relationship between them.
To understand these functions, you must write several character types. Not to mention Char. Let's talk about wchar_t first. Wchar_t is the data type of Unicode characters. It is actually defined in <string. h>:
Typedef unsigned short wchar_t;
You cannot use ansi c string functions such as strcpy to process wchar_t strings. You must use functions prefixed with WCS, such as wcscpy. To enable the compiler to recognize Unicode strings, you must add an "L" to the front. For example:
Wchar_t * sztest = l "this is a unicode string .";
Next let's take a look at tchar. If you want to compile source code for both ANSI and Unicode, You need to include tchar. h. Tchar is a macro defined in it. It is defined as Char or wchar_t based on whether _ Unicode macro is defined. If you use tchar, you should not use the ANSI strxxx function or the Unicode wcsxxx function. Instead, you must use the _ tcsxxx function defined in tchar. h. In addition, to solve the problem with "L" mentioned earlier, tchar. h defines a macro: "_ text ".
Take the strcpy function as an example to summarize:
. If you want to use an ANSI string, use this method:
Char szstring [100];
Strcpy (szstring, "test ");
. If you want to use a unicode string, use this set:
Wchar_t szstring [100];
Wcscpyszstring, l "test ");
. If you want to compile the ANSI or Unicode string code by defining the _ Unicode macro:
Tchar szstring [100];
_ Tcscpy (szstring, _ text ("test "));
2. added the Unicode macro to define Unicode, _ Unicode
3. to display Unicode characters in a program, check the "display Unicode strings" option on the "Tools"> "options" à "debug" page of the VC development tool.
4. Unicode-based wwinmaincrtstartup
Project-> Settings-> link in category: Select output in Entry Point Symbol: Add wwinmaincrtstartup
5. Conversion between several encodings
// Convert the utf8 format to the GB format
Cstring convertutf8togbk (cstring strutf8)
{
Int Len = multibytetowidechar (cp_utf8, 0, (lpcstr) strutf8.getbuffer (0),-1, null, 0 );
Unsigned short * wszgbk = new unsigned short [Len + 1];
Memset (wszgbk, 0, Len * 2 + 2 );
Multibytetowidechar (cp_utf8, 0, (lpcstr) strutf8.getbuffer (0),-1, wszgbk, Len );
Len = widechartomultibyte (cp_acp, 0, wszgbk,-1, null, 0, null, null );
Char * szgbk = new char [Len + 1];
Memset (szgbk, 0, Len + 1 );
Widechartomultibyte (cp_acp, 0, wszgbk,-1, szgbk, Len, null, null );
Cstring strgbk;
Strgbk = szgbk;
Delete [] szgbk;
Delete [] wszgbk;
Return strgbk;
}
// Convert the GB format to utf8 format
Cstring convertgbktoutf8 (cstring strgbk)
{
Int Len = multibytetowidechar (cp_acp, 0, (lpcstr) strgbk. getbuffer (0),-1, null, 0 );
Unsigned short * wszutf8 = new unsigned short [Len + 1];
Memset (wszutf8, 0, Len * 2 + 2 );
Multibytetowidechar (cp_acp, 0, (lpcstr) strgbk. getbuffer (0),-1, wszutf8, Len );
Len = widechartomultibyte (cp_utf8, 0, wszutf8,-1, null, 0, null, null );
Char * szutf8 = new char [Len + 1];
Memset (szutf8, 0, Len + 1 );
Widechartomultibyte (cp_utf8, 0, wszutf8,-1, szutf8, Len, null, null );
Cstring sutsf_8;
Sutsf_8 = szutf8;
Delete [] szutf8;
Delete [] wszutf8;
Return sutsf_8;
}
// Float for String Conversion
Float strtofloat (cstring Str)
{
Char A [max_path];
Memset (A, 0, max_path );
Widechartomultibyte (cp_acp, 0, (lpcwstr) STR,-1, A, max_path, null, null );
Float F = (float) atof ();
Return F;
}
// Utf8 to unicdoe
Wchar_t * u8tounicode (const char * szu8)
{
Int wcslen =: multibytetowidechar (cp_utf8, null, szu8, strlen (szu8), null, 0 );
Wchar_t * wszstring = new wchar_t [wcslen + 1];
: Multibytetowidechar (cp_utf8, null, szu8, strlen (szu8), wszstring, wcslen );
Wszstring [wcslen] = '/0 ';
Return wszstring;
}
// Convert Unicode to utf8
Char * unicodetou8 (wchar_t * wszstring)
{
Int u8len =: widechartomultibyte (cp_utf8, null, wszstring, wcslen (wszstring), null, 0, null, null );
Char * szu8 = new char [u8len + 1];
: Widechartomultibyte (cp_utf8, null, wszstring, wcslen (wszstring), szu8, u8len, null, null );
Szu8 [u8len] = '/0 ';
Return szu8;
}
// Convert Unicode to ANSI
Char * unicodetoansi (wchar_t * wszstring)
{
Int ansilen =: widechartomultibyte (cp_acp, null, wszstring, wcslen (wszstring), null, 0, null, null );
Char * szansi = new char [ansilen + 1];
: Widechartomultibyte (cp_acp, null, wszstring, wcslen (wszstring), szansi, ansilen, null, null );
Szansi [ansilen] = '/0 ';
Return szansi;
}
This article from the csdn blog, reproduced please indicate the source: http://blog.csdn.net/lantian22/archive/2009/07/04/4317839.aspx