This paper mainly discusses the VC compiler environment, the implementation of string and file encoding method of conversion, under Linux, please use StrConv to achieve. The specific methods are as follows:
I. File encoding format conversion
GB2312 encoded files to Unicode:
if (File_handle = fopen (Filenam, "RB")!= NULL)
{
//read buffer in binary form from GB2312 source file
Numread = fread (str_buf_pool,sizeof (char), pool_buff_size,file_handle);
Fclose (file_handle);
GB2312 file buffer into Unicode
nlen =multibytetowidechar (cp_acp,0,str_buf_pool,-1,null,0);
MultiByteToWideChar (Cp_acp,0,str_buf_pool,-1, (LPWSTR) str_unicode_buf_pool,nlen);
Assemble Unicode Little endian encoded file header "0xFF 0xFE"
//Memo: Unicode big endian encoded file header "0xFF 0xFE"
//little Endian and big endian coding differences here do not dwell on
unicode_little_file_header[0]=0xff;
Unicode_little_file_header[1]=0xfe;
Storage target File
if ((File_handle=fopen (Filenewname, "wb+"))!= NULL)
{
fwrite (Unicode_little_file_header, sizeof (char), 2,file_handle);
Numwrite = fwrite (str_unicode_buf_pool,sizeof (LPWSTR), nlen,file_handle);
Fclose (File_handle);
}
Second, string encoding format conversion
GB2312 into unicode:wchar_t* gb2312tounicode (const char* szgbstring) {UINT ncodepage = 936;//gb2312 int Nleng
Th=multibytetowidechar (ncodepage,0,szgbstring,-1,null,0);
wchar_t* pbuffer = new Wchar_t[nlength+1];
MultiByteToWideChar (ncodepage,0,szgbstring,-1,pbuffer,nlength);
pbuffer[nlength]=0;
return pbuffer; //big5 Convert to unicode:wchar_t* big5tounicode (const char* szbig5string) {UINT ncodepage = 950;//big5 int nlength=
MultiByteToWideChar (ncodepage,0,szbig5string,-1,null,0);
wchar_t* pbuffer = new Wchar_t[nlength+1];
MultiByteToWideChar (ncodepage,0,szbig5string,-1,pbuffer,nlength);
pbuffer[nlength]=0;
return pbuffer; //unicode Convert to gb2312:char* UnicodeToGB2312 (const wchar_t* szunicodestring) {UINT ncodepage = 936;//gb2312 int
Nlength=widechartomultibyte (Ncodepage,0,szunicodestring,-1,null,0,null,null);
char* Pbuffer=new char[nlength+1]; WideCharToMultiByte (Ncodepage,0,szunicodestring,-1,pbuffer,nlength,null,null);
pbuffer[nlength]=0;
return pbuffer; //unicode Convert to big5:char* UnicodeToBIG5 (const wchar_t* szunicodestring) {UINT ncodepage = 950;//big5 int Nleng
Th=widechartomultibyte (Ncodepage,0,szunicodestring,-1,null,0,null,null);
char* Pbuffer=new char[nlength+1];
WideCharToMultiByte (Ncodepage,0,szunicodestring,-1,pbuffer,nlength,null,null);
pbuffer[nlength]=0;
return pbuffer; //Traditional Chinese BIG5 convert to Simplified Chinese GB2312 char* big5togb2312 (const char* szbig5string) {LCID LCID = makelcid (Makelangid (lang_chine
se,sublang_chinese_simplified), SORT_CHINESE_PRC);
wchar_t* Szunicodebuff = Big5tounicode (szbig5string);
char* Szgb2312buff = UnicodeToGB2312 (Szunicodebuff);
int nlength = lcmapstring (Lcid,lcmap_simplified_chinese, szgb2312buff,-1,null,0);
char* pbuffer = new Char[nlength + 1];
LCMapString (0x0804,lcmap_simplified_chinese,szgb2312buff,-1,pbuffer,nlength);
Pbuffer[nlength] = 0;
Delete[] Szunicodebuff;
Delete[] Szgb2312buff; Return pbuffer; }//Simplified Chinese GB2312 convert to Traditional Chinese BIG5 char* gb2312tobig5 (const char* szgbstring) {LCID LCID = makelcid (Makelangid (Lang_chinese
, sublang_chinese_simplified), SORT_CHINESE_PRC);
int nlength = lcmapstring (lcid,lcmap_traditional_chinese,szgbstring,-1,null,0);
char* Pbuffer=new char[nlength+1];
LCMapString (lcid,lcmap_traditional_chinese,szgbstring,-1,pbuffer,nlength);
pbuffer[nlength]=0;
wchar_t* Punicodebuff = Gb2312tounicode (pbuffer);
char* Pbig5buff = UnicodeToBIG5 (Punicodebuff);
Delete[] pbuffer;
Delete[] Punicodebuff;
return pbig5buff;
}
Third, API function: MultiByteToWideChar parameter description
The first parameter is the code page, and the GetLocaleInfo function gets the code page for the current system, 936: Simplified Chinese, 950: Traditional Chinese
The second parameter is an option, generally 0 is ok
The third argument is the address of the ANSI string, which is the ANSI string for the language specified by the first parameter (ansistring)
The fourth argument is the length of the ANSI string and, if 1, represents a string with 0 as the Terminator
The fifth parameter is the address of the converted Unicode string (widestring), if NULL, which represents the length of the computed string being generated
The sixth parameter is the capacity of the Unicode string cache generated by the transformation, that is, how many Unicode characters are available.