Conversion interface between Unicode characters and multibyte characters

Source: Internet
Author: User

Author: Zhu Jinchan

Source: http://blog.csdn.net/clever101

Found open source code of the available resources are really many, from the source of the sqlite3 to pull out a few characters to transform the interface, a little transformation of the discovery is quite good to use. Here is the implementation code:


* * * * * Convert a UTF-8 string to Microsoft Unicode (UTF-16?).
* * * * * * Hold the returned string is obtained from malloc.
	*/Static WCHAR *utf8tounicode (const char *zfilename) {int nChar;

	WCHAR *zwidefilename;
	NChar = MultiByteToWideChar (Cp_utf8, 0, Zfilename,-1, NULL, 0);
	Zwidefilename = Static_cast<wchar *> (malloc (nchar*sizeof (zwidefilename[0)));

	if (zwidefilename==0) {return 0;
	} NChar = MultiByteToWideChar (Cp_utf8, 0, Zfilename,-1, Zwidefilename, NChar);
		if (nchar==0) {free (zwidefilename);

	Zwidefilename = 0;

return zwidefilename;  /* * * * Convert Microsoft Unicode to UTF-8.
Spaces to hold the returned string is * * obtained from malloc ().
	*/Static char *unicodetoutf8 (const WCHAR *zwidefilename) {int nbyte;

	Char *zfilename;
	Nbyte = WideCharToMultiByte (Cp_utf8, 0, Zwidefilename,-1, 0, 0, 0, 0);
	Zfilename = static_cast<char*> (malloc (nbyte));

	if (zfilename==0) {return 0; } nbyte = WideCharToMultiByte (Cp_utf8, 0, ZwidefilenAme,-1, Zfilename, nbyte, 0, 0);
		if (Nbyte = = 0) {free (zfilename);
	Zfilename = 0;

return zfilename;
/* * * * Convert an ANSI string to Microsoft Unicode, based on the * * Current codepage settings for file APIs.
* * * * * * Hold the returned string is obtained * * from malloc.
	*/Static WCHAR *mbcstounicode (const char *zfilename) {int nbyte;
	WCHAR *zmbcsfilename; int codepage = Arefileapisansi ()?

	CP_ACP:CP_OEMCP;
	Nbyte = MultiByteToWideChar (codepage, 0, Zfilename,-1, null,0) *sizeof (WCHAR);
	Zmbcsfilename = static_cast<wchar*> (malloc (nbyte*sizeof (zmbcsfilename[0)));

	if (zmbcsfilename==0) {return 0;
	} Nbyte = MultiByteToWideChar (codepage, 0, Zfilename,-1, Zmbcsfilename, nbyte);
		if (nbyte==0) {free (zmbcsfilename);
	Zmbcsfilename = 0;

return zmbcsfilename;
}/* * * * Convert Microsoft Unicode to multibyte character string, based on the * * user's Ansi codepage.
* * * * * * Hold the returned string is obtained from * * malloc (). * * STATIC char* Unicodetombcs (const wchar* zwidefilename) {int nbyte;
	Char *zfilename; int codepage = Arefileapisansi ()?

	CP_ACP:CP_OEMCP;
	Nbyte = WideCharToMultiByte (codepage, 0, Zwidefilename,-1, 0, 0, 0, 0);
	Zfilename = static_cast<char*> (malloc (nbyte));

	if (zfilename==0) {return 0;
	Nbyte = WideCharToMultiByte (codepage, 0, Zwidefilename,-1, Zfilename, nbyte, 0, 0);
		if (Nbyte = = 0) {free (zfilename);
	Zfilename = 0;

return zfilename;  } * * * * Convert multibyte character string to UTF-8.
Spaces to hold the * * returned string is obtained from malloc ().
	*/Static char* MbcsToUtf8 (const char *zfilename) {char *zfilenameutf8;

	WCHAR *ztmpwide;
	Ztmpwide = Mbcstounicode (zfilename);

	if (ztmpwide==0) {return 0;
	} ZFilenameUtf8 = UnicodeToUtf8 (ztmpwide);
	Free (ztmpwide);
return ZFilenameUtf8;  } * * * * Convert UTF-8 to multibyte character string.
Spaces to hold the * * returned string is obtained from malloc (). */Static char* Utf8tombcs (const char *zfilename) {char *zfilenamembcs;

	wchar* Ztmpwide;
	Ztmpwide = Utf8tounicode (zfilename);

	if (ztmpwide==0) {return 0;
	} Zfilenamembcs = Unicodetombcs (ztmpwide);
	Free (ztmpwide);
return ZFILENAMEMBCS;
	} std::string MbcsToUtf8 (const char* PSZMBCS) {std::string str;
	WCHAR *pwchar=0;
	CHAR *pchar=0;
	int len=0; int codepage = Arefileapisansi ()?
	CP_ACP:CP_OEMCP;
	Len=multibytetowidechar (codepage, 0, Pszmbcs,-1, null,0);
	Pwchar=new Wchar[len];
		if (pwchar!=0) {len = MultiByteToWideChar (codepage, 0, Pszmbcs,-1, Pwchar, Len);
			if (len!=0) {len = WideCharToMultiByte (Cp_utf8, 0, Pwchar,-1, 0, 0, 0, 0);
			Pchar=new Char[len];
				if (pchar!=0) {len = WideCharToMultiByte (Cp_utf8, 0, Pwchar,-1, Pchar, len,0, 0);                   
				if (len!=0) {str = Pchar;
			} Delete Pchar;
		} Delete Pwchar;
} return str; }

To test these interfaces, I wrote a test project that reads an XML file to convert the characters inside, and the code for the test project downloads the following address:

The interface between Unicode character and multibyte character and test engineering

Contact Us

The content source of this page is from Internet, which doesn't represent Alibaba Cloud's opinion; products and services mentioned on that page don't have any relationship with Alibaba Cloud. If the content of the page makes you feel confusing, please write us an email, we will handle the problem within 5 days after receiving your email.

If you find any instances of plagiarism from the community, please send an email to: info-contact@alibabacloud.com and provide relevant evidence. A staff member will contact you within 5 working days.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.