DLL tesseract-3.02.02-win32-lib-include-dirs for Tesseract-OCR:
Google-Tesseract-OCR
Decompress the package, including the include folder and Lib folder, configure the header file and library file, and create a project for testing.
# Include "baseapi. H"
# Include "strngs. H"
# Pragma comment (Lib, "libtesseract302.lib ")
Char * STR = "test.jpg"; tesseract: tessbaseapi API; API. init (null, "chi_sim", tesseract: oem_default); // initialization, set the Language Pack, simplified Chinese: chi_sim; English: ENG; you can also train the Language Pack by yourself // API. setvariable ("tessedit_char_whitelist", "0123456789 abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"); string text_out; If (! Api. processpages (STR, null, 0, & text_out) {return 0 ;}
The above output is text_out.string () and the result is in UTF-8 encoding format. Therefore, transcoding is required and a code is written to convert it to GBK:
string UTF8ToGBK(const std::string& strUTF8){int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);unsigned short * wszGBK = new unsigned short[len + 1];memset(wszGBK, 0, len * 2 + 2);MultiByteToWideChar(CP_UTF8, 0,LPCSTR(strUTF8.c_str()), -1, LPWSTR(wszGBK), len);len = WideCharToMultiByte(CP_ACP, 0,LPCTSTR(wszGBK), -1, NULL, 0, NULL, NULL);char *szGBK = new char[len + 1];memset(szGBK, 0, len + 1);WideCharToMultiByte(CP_ACP,0, LPCTSTR(wszGBK), -1, szGBK, len, NULL, NULL);//strUTF8 = szGBK;std::string strTemp(szGBK);delete[]szGBK;delete[]wszGBK;return strTemp;}
Because it is used in MFC, I wrote another code to convert it into Unicode in MFC:
Wchar_t * result_str; cstring result; // convert UTF-8 to unicodeint Len = multibytetowidechar (cp_utf8, 0, text_out.string (),-1, null, 0 ); result_str = new wchar_t [Len + 1]; memset (result_str, 0, Len + 1); multibytetowidechar (cp_utf8, 0, text_out.string (),-1, result_str, Len ); // recognition result = result_str;