Windows Notepad 判斷字元集的方法

來源:互聯網
上載者:User
/* IsTextUTF8 * * UTF-8 is the encoding of Unicode based on Internet Society RFC2279 * ( See http://www.cis.ohio-state.edu/htbin/rfc/rfc2279.html ) * * Basicly: * 0000 0000-0000 007F - 0xxxxxxx  (ascii converts to 1 octet!) * 0000 0080-0000 07FF - 110xxxxx 10xxxxxx    ( 2 octet format) * 0000 0800-0000 FFFF - 1110xxxx 10xxxxxx 10xxxxxx (3 octet format) * (this keeps going for 32 bit unicode)  *  * * Return value:  TRUE, if the text is in UTF-8 format. *                FALSE, if the text is not in UTF-8 format. *                We will also return FALSE is it is only 7-bit ascii, so the right code page *                will be used. * *                Actually for 7 bit ascii, it doesn't matter which code page we use, but *                notepad will remember that it is utf-8 and "save" or "save as" will store *                the file with a UTF-8 BOM.  Not cool. */INT IsTextUTF8( LPSTR lpstrInputStream, INT iLen ){    INT   i;    DWORD cOctets;  // octets to go in this UTF-8 encoded character    UCHAR chr;    BOOL  bAllAscii= TRUE;    cOctets= 0;    for( i=0; i < iLen; i++ ) {        chr= *(lpstrInputStream+i);        if( (chr&0x80) != 0 ) bAllAscii= FALSE;        if( cOctets == 0 )  {            //            // 7 bit ascii after 7 bit ascii is just fine.  Handle start of encoding case.            //            if( chr >= 0x80 ) {                 //               // count of the leading 1 bits is the number of characters encoded               //               do {                  chr <<= 1;                  cOctets++;               }               while( (chr&0x80) != 0 );               cOctets--;                        // count includes this character               if( cOctets == 0 ) return FALSE;  // must start with 11xxxxxx            }        }        else {            // non-leading bytes must start as 10xxxxxx            if( (chr&0xC0) != 0x80 ) {                return FALSE;            }            cOctets--;                           // processed another octet in encoding        }    }    //    // End of text.  Check for consistency.    //    if( cOctets > 0 ) {   // anything left over at the end is an error        return FALSE;    }    if( bAllAscii ) {     // Not utf-8 if all ascii.  Forces caller to use code pages for conversion        return FALSE;    }    return TRUE;}/* IsInputTextUnicode * Verify if the input stream is in Unicode format. * * Return value:  TRUE, if the text is in Unicode format. * * 29 June 1998           */INT IsInputTextUnicode  (LPSTR lpstrInputStream, INT iLen){    INT  iResult= ~0; // turn on IS_TEXT_UNICODE_DBCS_LEADBYTE    BOOL bUnicode;    // We would like to check the possibility    // of IS_TEXT_UNICODE_DBCS_LEADBYTE.    //    bUnicode= IsTextUnicode( lpstrInputStream, iLen, &iResult);    if (bUnicode                                         &&       ((iResult & IS_TEXT_UNICODE_STATISTICS)    != 0 ) &&       ((iResult & (~IS_TEXT_UNICODE_STATISTICS)) == 0 )    )    {        CPINFO cpiInfo;        CHAR* pch= (CHAR*)lpstrInputStream;        INT  cb;        //        // If the result depends only upon statistics, check        // to see if there is a possibility of DBCS.        // Only do this check if the ansi code page is DBCS        //        GetCPInfo( CP_ACP, &cpiInfo);        if( cpiInfo.MaxCharSize > 1 )        {            for( cb=0; cb<iLen; cb++ )            {                if( IsDBCSLeadByte(*pch++) )                {                    return FALSE;                }            }        }     }     return bUnicode;}
相關文章

聯繫我們

該頁面正文內容均來源於網絡整理,並不代表阿里雲官方的觀點,該頁面所提到的產品和服務也與阿里云無關,如果該頁面內容對您造成了困擾,歡迎寫郵件給我們,收到郵件我們將在5個工作日內處理。

如果您發現本社區中有涉嫌抄襲的內容,歡迎發送郵件至: info-contact@alibabacloud.com 進行舉報並提供相關證據,工作人員會在 5 個工作天內聯絡您,一經查實,本站將立刻刪除涉嫌侵權內容。

A Free Trial That Lets You Build Big!

Start building with 50+ products and up to 12 months usage for Elastic Compute Service

  • Sales Support

    1 on 1 presale consultation

  • After-Sales Support

    24/7 Technical Support 6 Free Tickets per Quarter Faster Response

  • Alibaba Cloud offers highly flexible support services tailored to meet your exact needs.