1. Slave;
2. store all the files in the E: \ OCR \ tesseract_ocr3.02 folder and decompress them;
3. Open the Tesseract. sln project under the tesseract-ocr-3.02-vs2008 folder;
4. Copy the corresponding files under the tesseract-ocr-3.02.02 folder such as API, ccmain to E: \ OCR \ tesseract_ocr3.02 \ tesseract-ocr-3.02-vs2008 \ Tesseract-OCR folder, copy the include folder under the leptonica-1.68-win32-lib-include-dirs folder to E: \ OCR \ tesseract_ocr3.02 \ tesseract-ocr-3.02-vs2008 folder, copy files under the tesseract_ocr3.02 \ tesseract-ocr-3.02.02 \ Tesseract-OCR \ vs2008 \ port folder to E: \ OCR \ tesseract_ocr3.02 \ tesseract-ocr-3.02-vs2008 \ Tesseract-OCR \ vs2008 \ port folder, copy the Lib folder under the leptonica-1.68-win32-lib-include-dirs folder to the tesseract_ocr3.02 \ tesseract-ocr-3.02-vs2008 folder;
5. Run equationdetect in the ccmain folder. static const string kcharstoex [] = {"'","'","\"","\\",",",". ",
"<", ">", "," "}; Modify it to static const string kcharstoex [] = {"'","'","\"","\\",",",". "," <","> "," <",">, this file does not exist in Version 3.01, and compilation 3.01 does not need to make any changes to the source file)
6. recompile the entire solution;
7. Create a blank project in the console. The configuration environment is similar to the Tesseract project. Create a tessdata folder under the execution folder to store the chi_sim.traineddata file. The sample code is as follows:
#include "allheaders.h"#include "baseapi.h"#include "basedir.h"#include "strngs.h"#include "tesseractmain.h"#include "tprintf.h"int main(int argc, char **argv){ tesseract::TessBaseAPI api; STRING tessdata_dir; truncate_path(argv[0], &tessdata_dir); int rc = api.Init(tessdata_dir.string(), NULL); if (rc) { fprintf(stderr, ("Could not initialize tesseract.\n")); exit(1); } api.End(); // Make the order of args a bit more forgiving than it used to be. const char* lang = "chi_sim";//eng const char* image = "E:\\OCR\\tesseract_ocr3.02\\tesseract-ocr-3.02-vs2008\\tesseract-ocr\\vs2008\\Debug\\ABC.tif";//NULL; const char* output = "E:\\OCR\\tesseract_ocr3.02\\tesseract-ocr-3.02-vs2008\\tesseract-ocr\\vs2008\\Debug\\xxxxx";//NULL; tesseract::PageSegMode pagesegmode = tesseract::PSM_AUTO; int arg = 1; api.SetOutputName(output); rc = api.Init(tessdata_dir.string(), lang, tesseract::OEM_DEFAULT, &(argv[arg]), argc - arg, NULL, NULL, false); if (rc) { fprintf(stderr, ("Could not initialize tesseract.\n")); exit(1); } if (api.GetPageSegMode() == tesseract::PSM_SINGLE_BLOCK) { api.SetPageSegMode(pagesegmode); } tprintf("Tesseract Open Source OCR Engine v%s with Leptonica\n", tesseract::TessBaseAPI::Version()); FILE* fin = fopen(image, "rb"); if (fin == NULL) { fprintf(stderr, ("Cannot open input file: %s\n"), image); exit(2); } fclose(fin); PIX *pixs; if ((pixs = pixRead(image)) == NULL) { fprintf(stderr, ("Unsupported image type.\n")); exit(3); } pixDestroy(&pixs); STRING text_out; if (!api.ProcessPages(image, NULL, 0, &text_out)) { fprintf(stderr, ("Error during processing.\n")); } bool output_hocr = false; api.GetBoolVariable("tessedit_create_hocr", &output_hocr); bool output_box = false; api.GetBoolVariable("tessedit_create_boxfile", &output_box); STRING outfile = output; outfile += output_hocr ? ".html" : output_box ? ".box" : ".txt"; FILE* fout = fopen(outfile.string(), "wb"); if (fout == NULL) { fprintf(stderr, ("Cannot create output file %s\n"), outfile.string()); exit(1); } fwrite(text_out.string(), 1, text_out.length(), fout); fclose(fout); return 0; // Normal exit}
The code above is compiled and run successfully. You can directly enter the entire image for character recognition. The effect is average.