文字检测
方案
利用opencv二值化处理。最后检测使用google的开源库libtesseract识别文字。
tesseract安装
apt install libtesseract-dev
# 前面那个是英文,后面那个是中文
apt install tesseract-oct tesseract-ocr-chi-sim
手册
手册
qt creator中使用
pro文件中加入下面内容
CONFIG += link_pkgconfig
PKGCONFIG += tesseract opencv4 lept
使用
// 前面是opencv的内容,不明白请自行学习
#include <opencv2/highgui.hpp>
#include <opencv2/opencv.hpp>
#include <iostream>
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>using namespace std;
using namespace cv;static char* get_picture_text(char *file_name);int main(int argc, char **argv)
{Mat srcImage = imread("../../image/chineseTest.png", 1);if(!srcImage.data){cout<<"Open chineseTest error!!!"<<endl;return -1;}Mat imageGuussian;GaussianBlur(srcImage, imageGuussian, Size(3,3), 0);Mat thresholdImage;threshold(imageGuussian, thresholdImage, 150, 255, ADAPTIVE_THRESH_GAUSSIAN_C);Mat distImage;erode(thresholdImage, distImage, Mat::ones(5,5,CV_8U));dilate(distImage, distImage, Mat::ones(5,5,CV_8U));char *temp_file = "temp_file.png";imwrite(temp_file, distImage);cout<<"Get the image text:"<<get_picture_text(temp_file)<<endl;while(waitKey(-1) != 27);return 0;
}static char* get_picture_text(char *file_name)
{char *ret_text;tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();// 英文用"eng",中文简体用"chi_sim"if(api->Init(NULL, "chi_sim")){cout<<"Couldn't init tesseract!"<<endl;return NULL;}Pix *image = pixRead(file_name);api->SetImage(image);ret_text = api->GetUTF8Text();api->End();delete api;pixDestory(&image);return ret_text;
}