C++实例调用Tesseract OCR的API

- 1. 前言
- 2. 模式
- 3. 调用方式C++ Examples**【转自官网】
- - `3.1 Basic_example`
  - `3.2 SetRectangle_example`
  - `3.3 GetComponentImages_example`
  - `3.4 ResultIterator_example`
  - `3.5 OSD_example`
  - `3.6 LSTM_Choices_example`
  - `3.7 OpenCV_example`
  - `3.8 UserPatterns_example`

1. 前言

Tesseract OCR支持不同调用方式(详情请看具体实例)，同一种调用方式也可以设置不同模式。
调用方法或模式不同，对OCR识别结果的精度有一定影响。模式设置不同，输出的结果格式也不一致。
实际项目中，需要根据需求比较各方法的优劣从而选择最合适的。

2. 模式

构造体定义
enum PageIteratorLevel {RIL_BLOCK,    // Block of text/image/separator line.RIL_PARA,     // Paragraph within a block.RIL_TEXTLINE, // Line within a paragraph.RIL_WORD,     // Word within a textline.RIL_SYMBOL    // Symbol/character within a word.
};

RIL_BLOCK：把原稿分割成不同区域，按区域识别文字，OCR结果是每个区域的字符串
RIL_TEXTLINE：按行识别文字，OCR结果是一行一行的字符串
RIL_WORD：按单词识别文字，OCR结果是一个一个的单词
RIL_SYMBOL：按字符识别文字，OCR结果是一个一个的字符

3. 调用方式C++ Examples**【转自官网】

C++API实例：https://tesseract-ocr.github.io/tessdoc/Examples_C++.html
API实例：https://tesseract-ocr.github.io/tessdoc/#api-examples

注意
如果C++的实例代码编译不通过，可能是需要添加以下头文件

#include <leptonica/pix_internal.h>

`3.1 Basic_example`

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{char *outText;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}// Open input image with leptonica libraryPix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");api->SetImage(image);// Get OCR resultoutText = api->GetUTF8Text();printf("OCR output:\n%s", outText);// Destroy used object and release memoryapi->End();delete api;delete [] outText;pixDestroy(&image);return 0;
}

`3.2 SetRectangle_example`

如果只想识别特定区域的文字，可以用这个方法。需提前设定指定区域的坐标。

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{char *outText;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}// Open input image with leptonica libraryPix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");api->SetImage(image);// Restrict recognition to a sub-rectangle of the image// SetRectangle(left, top, width, height)api->SetRectangle(30, 86, 590, 100);// Get OCR resultoutText = api->GetUTF8Text();printf("OCR output:\n%s", outText);// Destroy used object and release memoryapi->End();delete api;delete [] outText;pixDestroy(&image);return 0;
}

`3.3 GetComponentImages_example`

以Box的形式返回OCR结果

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{char *outText;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");api->SetImage(image);Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL);printf("Found %d textline image components.\n", boxes->n);for (int i = 0; i < boxes->n; i++) {BOX* box = boxaGetBox(boxes, i, L_CLONE);api->SetRectangle(box->x, box->y, box->w, box->h);char* ocrResult = api->GetUTF8Text();int conf = api->MeanTextConf();fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",i, box->x, box->y, box->w, box->h, conf, ocrResult);boxDestroy(&box);}// Destroy used object and release memoryapi->End();delete api;delete [] outText;pixDestroy(&image);return 0;
}

`3.4 ResultIterator_example`

以迭代器的形式返回OCR结果

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{char *outText;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");api->SetImage(image);api->Recognize(0);tesseract::ResultIterator* ri = api->GetIterator();tesseract::PageIteratorLevel level = tesseract::RIL_WORD;if (ri != 0) {do {const char* word = ri->GetUTF8Text(level);float conf = ri->Confidence(level);int x1, y1, x2, y2;ri->BoundingBox(level, &x1, &y1, &x2, &y2);printf("word: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",word, conf, x1, y1, x2, y2);delete[] word;} while (ri->Next(level));}// Destroy used object and release memoryapi->End();delete api;delete [] outText;pixDestroy(&image);return 0;
}

`3.5 OSD_example`

如果需要判断文字的方向，可以参考这各个方法

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{const char* inputfile = "/tesseract/testing/devatest-rotated-270.png";PIX *image = pixRead(inputfile);tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();api->Init(NULL, "osd");api->SetPageSegMode(tesseract::PSM_OSD_ONLY);api->SetImage(image);int orient_deg;float orient_conf;const char* script_name;float script_conf;api->DetectOrientationScript(&orient_deg, &orient_conf, &script_name, &script_conf);printf("************\n Orientation in degrees: %d\n Orientation confidence: %.2f\n"" Script: %s\n Script confidence: %.2f\n",orient_deg, orient_conf,script_name, script_conf);// Destroy used object and release memoryapi->End();delete api;pixDestroy(&image);return 0;
}

`3.6 LSTM_Choices_example`

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
int main()
{tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata pathif (api->Init(NULL, "eng")) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}
// Open input image with leptonica libraryPix *image = pixRead("choices.png");api->SetImage(image);
// Set lstm_choice_mode to alternative symbol choices per character, bbox is at word level.api->SetVariable("lstm_choice_mode", "2");api->Recognize(0);tesseract::PageIteratorLevel level = tesseract::RIL_WORD;tesseract::ResultIterator* res_it = api->GetIterator();
// Get confidence level for alternative symbol choices. Code is based on 
// https://github.com/tesseract-ocr/tesseract/blob/main/src/api/hocrrenderer.cpp#L325-L344std::vector<std::vector<std::pair<const char*, float>>>* choiceMap = nullptr;if (res_it != 0) {do {const char* word;float conf;int x1, y1, x2, y2, tcnt = 1, gcnt = 1, wcnt = 0;res_it->BoundingBox(level, &x1, &y1, &x2, &y2);choiceMap = res_it->GetBestLSTMSymbolChoices();for (auto timestep : *choiceMap) {if (timestep.size() > 0) {for (auto & j : timestep) {conf = int(j.second * 100);word =  j.first;printf("%d  symbol: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",wcnt, word, conf, x1, y1, x2, y2);gcnt++;}tcnt++;}wcnt++;printf("\n");}} while (res_it->Next(level));}
// Destroy used object and release memoryapi->End();delete api;pixDestroy(&image);return 0;
}

`3.7 OpenCV_example`

/*
Windows compile example:

SET TESS_INSTALATION=C:/win64
SET OPENCV_INSTALATION=C:/opencv/build
cl OpenCV_example.cc -I %TESS_INSTALATION%/include -I %OPENCV_INSTALATION%/include /link /LIBPATH:%TESS_INSTALATION%/lib /LIBPATH:%OPENCV_INSTALATION%/x64/vc14/lib tesseract51.lib leptonica-1.83.0.lib opencv_world460.lib /machine:x64

#include <leptonica/allheaders.h>
#include <opencv2/opencv.hpp>
#include
#include <tesseract/baseapi.h>

int main(int argc, char *argv[]) {

std::string outText, imPath = argv[1];
cv::Mat im = cv::imread(imPath, cv::IMREAD_COLOR);
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();

api->Init(NULL, “eng”, tesseract::OEM_LSTM_ONLY);
api->SetPageSegMode(tesseract::PSM_AUTO);
api->SetImage(im.data, im.cols, im.rows, 3, im.step);
outText = std::string(api->GetUTF8Text());
std::cout << outText;
api->End();
delete api;
return 0;
}

`3.8 UserPatterns_example`

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>int main()
{Pix *image;char *outText;char *configs[]={"path/to/my.patterns.config"};int configs_size = 1;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();if (api->Init(NULL, "eng", tesseract::OEM_LSTM_ONLY, configs, configs_size, NULL, NULL, false)) {fprintf(stderr, "Could not initialize tesseract.\n");exit(1);}image = pixRead("Arial.png");api->SetImage(image);outText = api->GetUTF8Text();printf(outText);api->End();delete api;delete [] outText;pixDestroy(&image);return 0;
}