Read and Write files of different encoding formats

Last Update:2018-12-05 Source: Internet

Author: User

Tags string to file

Developer on Alibaba Coud: Build your first app with APIs, SDKs, and tutorials on the Alibaba Cloud. Read more ＞

BOOL CStdioFileEx::ReadString(CString& rString){const intnMAX_LINE_CHARS = 4096;BOOLbReadData = FALSE;LPTSTRlpsz;intnLen = 0;wchar_t*pszUnicodeString = NULL;wchar_t*pszMultiByteString= NULL;intnChars = 0;try{// If at position 0, discard byte-order mark before readingif (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText)){wchar_tcDummy;//Read(&cDummy, sizeof(_TCHAR));Read(&cDummy, sizeof(wchar_t));}// If compiled for Unicode#ifdef _UNICODEif (m_bIsUnicodeText){// Do standard stuff - Unicode to Unicode. Seems to work OK.bReadData = CStdioFile::ReadString(rString);}else{   //MultiByte(ASCII)--WideChar(UNICODE)pszUnicodeString= new wchar_t[nMAX_LINE_CHARS]; pszMultiByteString  = new wchar_t[nMAX_LINE_CHARS]; // Initialise to something safememset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);memset(pszMultiByteString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);// Read the string from the file pointer to a temporary array//bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));char *pszTemp=new char[nMAX_LINE_CHARS];ZeroMemory(pszTemp,nMAX_LINE_CHARS);    bReadData=(NULL != fgets(pszTemp, nMAX_LINE_CHARS, m_pStream));/*if(fgets( pszTemp, nMAX_LINE_CHARS, m_pStream )==NULL){bReadData=0;}else{bReadData=1;}*///open the file of UTF_8 and change to the UNICODE default coding            MultiByteToWideChar( CP_UTF8, 0, pszTemp, /*sizeof(pszTemp)*/ -1, pszMultiByteString, nMAX_LINE_CHARS );delete [] pszTemp;            //bReadData = (NULL != fgetws(pszMultiByteString, nMAX_LINE_CHARS, m_pStream));//if (bReadData)//{// Convert multibyte to Unicode, using the specified code page//nChars = GetUnicodeStringFromMultiByteString((LPCSTR)pszMultiByteString, pszUnicodeString, nMAX_LINE_CHARS, m_nFileCodePage);memcpy( pszUnicodeString, pszMultiByteString, sizeof(wchar_t) * nMAX_LINE_CHARS );//if (nChars > 0)//{rString = (CString)pszUnicodeString;//}//}}#elseif (!m_bIsUnicodeText){// Do standard stuff -- read ANSI in ANSIbReadData = CStdioFile::ReadString(rString);// Get the current code pageUINT nLocaleCodePage = GetCurrentLocaleCodePage();// If we got it OK...if (nLocaleCodePage > 0){// if file code page does not match the system code page, we need to do a double conversion!if (nLocaleCodePage != (UINT)m_nFileCodePage){int nStringBufferChars = rString.GetLength() + 1;pszUnicodeString= new wchar_t[nStringBufferChars]; // Initialise to something safememset(pszUnicodeString, 0, sizeof(wchar_t) * nStringBufferChars);// Convert to Unicode using the file code pagenChars = GetUnicodeStringFromMultiByteString(rString, pszUnicodeString, nStringBufferChars, m_nFileCodePage);// Convert back to multibyte using the system code page// (This doesn't really confer huge advantages except to avoid "mangling" of non-convertible special// characters. So, if a file in the E.European code page is displayed on a system using the // western European code page, special accented characters which the system cannot display will be// replaced by the default character (a hash or something), rather than being incorrectly mapped to// other, western European accented characters).if (nChars > 0){// Calculate how much we need for the MB buffer (it might be larger)nStringBufferChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,nLocaleCodePage);pszMultiByteString= new char[nStringBufferChars];  nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nStringBufferChars, nLocaleCodePage);rString = (CString)pszMultiByteString;}}}}else{pszUnicodeString= new wchar_t[nMAX_LINE_CHARS]; // Initialise to something safememset(pszUnicodeString, 0, sizeof(wchar_t) * nMAX_LINE_CHARS);// Read as Unicode, convert to ANSI// Bug fix by Dennis Jeryd 06/07/2003: initialise bReadDatabReadData = (NULL != fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream));if (bReadData){// Calculate how much we need for the multibyte stringint nRequiredMBBuffer = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString,m_nFileCodePage);pszMultiByteString= new char[nRequiredMBBuffer];  nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nRequiredMBBuffer, m_nFileCodePage);if (nChars > 0){rString = (CString)pszMultiByteString;}}}#endif// Then remove end-of-line character if in Unicode text modeif (bReadData){// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'. nLen = rString.GetLength();if (nLen > 1 && rString.Mid(nLen-2) == sNEWLINE){rString.GetBufferSetLength(nLen-2);}else{lpsz = rString.GetBuffer(0);if (nLen != 0 && (lpsz[nLen-1] == _T('\r') || lpsz[nLen-1] == _T('\n'))){rString.GetBufferSetLength(nLen-1);}}}}// Ensure we always delete in case of exceptioncatch(...){if (pszUnicodeString)delete [] pszUnicodeString;if (pszMultiByteString) delete [] pszMultiByteString;throw;}if (pszUnicodeString)delete [] pszUnicodeString;if (pszMultiByteString)delete [] pszMultiByteString;return bReadData;}// --------------------------------------------------------------------------------------------////CStdioFileEx::WriteString()//// --------------------------------------------------------------------------------------------// Returns:    void// Parameters: LPCTSTR lpsz//// Purpose:Writes string to file either in Unicode or multibyte, depending on whether the caller specified the//CStdioFileEx::modeWriteUnicode flag. Override of base class function.// Notes:If writing in Unicode we need to://a) Write the Byte-order-mark at the beginning of the file//b) Write all strings in byte-mode//-If we were compiled in Unicode, we need to convert Unicode to multibyte if //we want to write in multibyte//-If we were compiled in multi-byte, we need to convert multibyte to Unicode if //we want to write in Unicode.// Exceptions:None.//void CStdioFileEx::WriteString(LPCTSTR lpsz){wchar_t*pszUnicodeString= NULL; char*pszMultiByteString= NULL;try{// If writing Unicode and at the start of the file, need to write byte markif (m_nFlags & CStdioFileEx::modeWriteUnicode){// If at position 0, write byte-order mark before writing anything elseif (!m_pStream || GetPosition() == 0){wchar_t cBOM = (wchar_t)nUNICODE_BOM;CFile::Write(&cBOM, sizeof(wchar_t));}}// If compiled in Unicode...#ifdef _UNICODE// If writing Unicode, no conversion neededif (m_nFlags & CStdioFileEx::modeWriteUnicode){// Write in byte modeCFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));}// Else if we don't want to write Unicode, need to convertelse{intnChars = lstrlen(lpsz);// Why plus 1? Because yes//intnBufferSize = nChars * sizeof(char);// leave space for multi-byte charsintnCharsWritten = 0;intnBufferSize = 0;pszUnicodeString= new wchar_t[nChars + 1]; ZeroMemory(pszUnicodeString,nChars + 1);// Copy string to Unicode bufferlstrcpy(pszUnicodeString, lpsz);// Work out how much space we need for the multibyte conversion //nBufferSize= GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);// Get multibyte string//nCharsWritten = GetMultiByteStringFromUnicodeString(pszUnicodeString,pszMultiByteString , nBufferSize, m_nFileCodePage);int u8len=::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),NULL,0,NULL,NULL);char*szU8=new char[u8len+1];ZeroMemory(szU8,u8len+1);::WideCharToMultiByte(CP_UTF8,NULL,pszUnicodeString,wcslen(pszUnicodeString),szU8,u8len,NULL,NULL);szU8[u8len]='\0';if(pszUnicodeString[0]==0xfeff){//CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));                  CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));}else{                //CFile::Write(/*(const void*)*/pszUnicodeString,nChars*sizeof(wchar_t));  CFile::Write(/*(const void*)*/szU8,u8len*sizeof(char));}}// Else if *not* compiled in Unicode#else// If writing Unicode, need to convertif (m_nFlags & CStdioFileEx::modeWriteUnicode){intnChars = lstrlen(lpsz) + 1; // Why plus 1? Because yesintnBufferSize = nChars * sizeof(wchar_t);intnCharsWritten = 0;pszUnicodeString= new wchar_t[nChars];pszMultiByteString= new char[nChars]; // Copy string to multibyte bufferlstrcpy(pszMultiByteString, lpsz);nCharsWritten = GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nChars, m_nFileCodePage);if (nCharsWritten > 0){// Do byte-mode write using actual chars written (fix by Howard J Oh)//CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));CFile::Write(pszUnicodeString, nCharsWritten*sizeof(wchar_t));}else{ASSERT(false);}}// Else if we don't want to write Unicode, no conversion needed, unless the code page differselse{//// Do standard stuff//CStdioFile::WriteString(lpsz);// Get the current code pageUINT nLocaleCodePage = GetCurrentLocaleCodePage();// If we got it OK, and if file code page does not match the system code page, we need to do a double conversion!if (nLocaleCodePage > 0 && nLocaleCodePage != (UINT)m_nFileCodePage){intnChars = lstrlen(lpsz) + 1; // Why plus 1? Because yespszUnicodeString= new wchar_t[nChars]; // Initialise to something safememset(pszUnicodeString, 0, sizeof(wchar_t) * nChars);// Convert to Unicode using the locale code page (the code page we are using in memory)nChars = GetUnicodeStringFromMultiByteString((LPCSTR)(const char*)lpsz, pszUnicodeString, nChars, nLocaleCodePage);// Convert back to multibyte using the file code page// (Note that you can't reliably read a non-Unicode file written in code page A on a system using a code page B,// modify the file and write it back using code page A, unless you disable all this double-conversion code.// In effect, you have to choose between a mangled character display and mangled file writing).if (nChars > 0){// Calculate how much we need for the MB buffer (it might be larger)nChars = GetRequiredMultiByteLengthForUnicodeString(pszUnicodeString, m_nFileCodePage);pszMultiByteString= new char[nChars];  memset(pszMultiByteString, 0, sizeof(char) * nChars);nChars = GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nChars, m_nFileCodePage);// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\nCFile::Write((const void*)pszMultiByteString, nChars * sizeof(char));}}else{// Do byte-mode write. This avoids annoying "interpretation" of \n's as \r\nCFile::Write((const void*)lpsz, lstrlen(lpsz)*sizeof(char));}}#endif}// Ensure we always clean upcatch(...){if (pszUnicodeString)delete [] pszUnicodeString;if (pszMultiByteString)delete [] pszMultiByteString;throw;}if (pszUnicodeString)delete [] pszUnicodeString;if (pszMultiByteString)delete [] pszMultiByteString;}

This article is an English version of an article which is originally in the Chinese language on aliyun.com and is provided for information purposes only. This website makes no representation or warranty of any kind, either expressed or implied, as to the accuracy, completeness ownership or reliability of the article or any translations thereof. If you have any concerns or complaints relating to the article, please send an email, providing a detailed description of the concern or complaint, to info-contact@alibabacloud.com. A staff member will contact you within 5 working days. Once verified, infringing content will be removed immediately.

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

Get Started for Free

Sales Support

1 on 1 presale consultation

Chat Contact Sales
After-Sales Support

24/7 Technical Support 6 Free Tickets per Quarter Faster Response

Open a Ticket
Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.

Learn More