C# PaddleOCR 单字识别效果
效果
说明
根据《百度办公文档识别C++离线SDKV1.2用户接入文档.pdf》,使用C++封装DLL,C#调用。
背景
为使客户、第三方开发者等能够更快速、方便的接入使用百度办公文档识别 SDK、促进百度 OCR产品赋能更多客户,特设计支持 c++语言的 Windows 高精通用文字识别 SDK,该 SDK 提供 pdf 转图文的能力和通过 pdf 识别文字并可以转存成 word 的能力。
SDK 简介
本 SDK 适应于 Windows 平台下的人脸识别系统, ,开发者可在 vs2015 下⾯进⾏开发(推荐使⽤,不保证其他版本 vs 都兼容)。SDK 采⽤ c++的动态库 dll 的⽅式。上层 UI 框架支持主流框架如QT,MFC 等。
自动批量授权
鉴权采用自动激活的方式进行授权,可参考 SDK 示例中,把申请到的授权 key 串码(仅支持批量授权)填入到 license 文件夹的 license.key 文件中,运行 SDK,即可自动激活生成授权文件 license.ini 在license 文件夹中。SDK 授权是通过接口方法 auth_from_file 实现,该方法参数分别是传入授权 key 的串码和授权文件 license.ini 的绝对路径。确保参数正确后,在 SDK 中运行了该方法,就会生成授权license.ini 文件。若授权正确,该方法的返回值为 0,若非 0,则为授权失败,错误原因可根据错误码参考后续文档查看。
离线授权
离线授权,采用从 sdk 附带的 license_tool 工具,bin 文件夹的 license_tool 下,双击 LicenseTool.exe,再点击拷贝,把设备指纹拷贝到剪贴板中,到百度 OCR 官网进行离线激活,填入得到的设备指纹后,从官网下载离线授权文件,解压,形成 license.key 和 license.ini 两个文件,替换到 SDK 中的 license 文件夹中,运行 SDK,若在 SDK 的授权方法 auth_from_file 中返回 0,则为通过了授权。(具体可参考SDK 中的授权代码示例)
项目
代码
using HightOCRTest.Common;
using Newtonsoft.Json;
using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Text;
using System.Windows.Forms;
namespace HightOCRTest
{
public partial class Form1 : Form
{
public Form1()
{
InitializeComponent();
}
string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";
string image_path = "";
bool isDraw = false;
static IntPtr engine;
private void button6_Click(object sender, EventArgs e)
{
//授权校验 初始化引擎
string key = "";
string licenseKeyPath = Application.StartupPath + "\\license\\license.key";
string licenseFile = Application.StartupPath + "\\license\\license.ini";
int res = -1;
string ini_path = "";
key = File.ReadAllText(licenseKeyPath);
res = Native.init_license(key, licenseFile);
if (res != 0)
{
MessageBox.Show(res.ToString());
return;
}
engine = Native.create();
if (engine == null)
{
MessageBox.Show("创建引擎失败!");
return;
}
ini_path = Application.StartupPath + "\\resource";
res = Native.init(engine, "", 6);
if (res != 0)
{
MessageBox.Show(res.ToString());
return;
}
MessageBox.Show("初始化成功!");
button1.Enabled = true;
button3.Enabled = true;
button4.Enabled = true;
button6.Enabled = false;
}
private void Form1_Load(object sender, EventArgs e)
{
//image_path = Application.StartupPath + "\\images\\1.jpg";
image_path = Application.StartupPath + "\\test2.jpg";
pictureBox1.Image = new Bitmap(image_path);
}
private void button2_Click(object sender, EventArgs e)
{
OpenFileDialog ofd = new OpenFileDialog();
ofd.Filter = fileFilter;
if (ofd.ShowDialog() != DialogResult.OK) return;
pictureBox1.Image = null;
image_path = ofd.FileName;
pictureBox1.Image = new Bitmap(image_path);
textBox1.Text = "";
}
StringBuilder ocr_result_texts = new StringBuilder(1024 * 10);
StringBuilder ocr_result_words = new StringBuilder(1024 * 100);
private void button1_Click(object sender, EventArgs e)
{
if (image_path == "")
{
return;
}
textBox1.Text = "";
Application.DoEvents();
ocr_result_texts.Clear();
ocr_result_words.Clear();
Mat image = new Mat(image_path);
Stopwatch stopwatch = new Stopwatch();
stopwatch.Start();
int res = Native.ocr(engine, image.CvPtr, ocr_result_texts, ocr_result_words);
stopwatch.Stop();
double totalTime = stopwatch.Elapsed.TotalSeconds;
textBox1.Text += $"耗时: {totalTime:F2}s";
textBox1.Text += "\r\n-------------------\r\n";
if (res == 0)
{
textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_texts.ToString()), Newtonsoft.Json.Formatting.Indented);
textBox1.Text += "\r\n-------------------\r\n";
textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_words.ToString()), Newtonsoft.Json.Formatting.Indented);
}
else
{
textBox1.Text = "识别失败";
}
}
//绘制文字区域
private void button3_Click(object sender, EventArgs e)
{
if (ocr_result_texts.Length == 0)
{
return;
}
Mat image = new Mat(image_path);
List<OcrResTexts> lt = JsonConvert.DeserializeObject<List<OcrResTexts>>(ocr_result_texts.ToString());
foreach (OcrResTexts item in lt)
{
string[] pts = item.coordinator.Split(' ');
//多边形的顶点
OpenCvSharp.Point[] points = new OpenCvSharp.Point[]
{
new OpenCvSharp.Point(Convert.ToDouble( pts[0]), Convert.ToDouble( pts[1])),
new OpenCvSharp.Point(Convert.ToDouble( pts[2]), Convert.ToDouble( pts[3])),
new OpenCvSharp.Point(Convert.ToDouble( pts[4]), Convert.ToDouble( pts[5])),
new OpenCvSharp.Point(Convert.ToDouble( pts[6]), Convert.ToDouble( pts[7])),
};
// 绘制多边形
Cv2.Polylines(image, new OpenCvSharp.Point[][] { points }, isClosed: true, color: new Scalar(0, 255, 0), thickness: 2);
}
if (pictureBox1.Image != null)
{
pictureBox1.Image.Dispose();
pictureBox1.Image = null;
}
pictureBox1.Image = new Bitmap(image.ToMemoryStream());
image.Dispose();
}
//绘制单字区域
private void button4_Click(object sender, EventArgs e)
{
if (ocr_result_words.Length == 0)
{
return;
}
Mat image = new Mat(image_path);
List<OcrResWords> lt = JsonConvert.DeserializeObject<List<OcrResWords>>(ocr_result_words.ToString());
foreach (OcrResWords item in lt)
{
string[] pts = item.coordinator.Split(' ');
//left top width height
OpenCvSharp.Rect rect = new Rect((int)Convert.ToDouble(pts[0]), (int)Convert.ToDouble(pts[1]), (int)Convert.ToDouble(pts[2]), (int)Convert.ToDouble(pts[3]));
Cv2.Rectangle(image, rect, color: new Scalar(255, 0, 0), thickness: 1);
}
if (pictureBox1.Image != null)
{
pictureBox1.Image.Dispose();
pictureBox1.Image = null;
}
pictureBox1.Image = new Bitmap(image.ToMemoryStream());
image.Dispose();
}
//识别小语种→
private void button5_Click(object sender, EventArgs e)
{
//if (image_path == "")
//{
// return;
//}
//textBox1.Text = "";
//Application.DoEvents();
//ocr_result_texts.Clear();
//ocr_result_words.Clear();
//Mat image = new Mat(image_path);
//Stopwatch stopwatch = new Stopwatch();
//stopwatch.Start();
//int res = Native.ocr_other(engine, image.CvPtr, ocr_result_texts, ocr_result_words);
//stopwatch.Stop();
//double totalTime = stopwatch.Elapsed.TotalSeconds;
//textBox1.Text += $"耗时: {totalTime:F2}s";
//textBox1.Text += "\r\n-------------------\r\n";
//if (res == 0)
//{
// textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_texts.ToString()), Newtonsoft.Json.Formatting.Indented);
// textBox1.Text += "\r\n-------------------\r\n";
// textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_words.ToString()), Newtonsoft.Json.Formatting.Indented);
//}
//else
//{
// textBox1.Text = "识别失败";
//}
}
}
}
using HightOCRTest.Common;
using Newtonsoft.Json;
using OpenCvSharp;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Drawing;
using System.IO;
using System.Text;
using System.Windows.Forms;namespace HightOCRTest
{public partial class Form1 : Form{public Form1(){InitializeComponent();}string fileFilter = "*.*|*.bmp;*.jpg;*.jpeg;*.tiff;*.tiff;*.png";string image_path = "";bool isDraw = false;static IntPtr engine;private void button6_Click(object sender, EventArgs e){//授权校验 初始化引擎string key = "";string licenseKeyPath = Application.StartupPath + "\\license\\license.key";string licenseFile = Application.StartupPath + "\\license\\license.ini";int res = -1;string ini_path = "";key = File.ReadAllText(licenseKeyPath);res = Native.init_license(key, licenseFile);if (res != 0){MessageBox.Show(res.ToString());return;}engine = Native.create();if (engine == null){MessageBox.Show("创建引擎失败!");return;}ini_path = Application.StartupPath + "\\resource";res = Native.init(engine, "", 6);if (res != 0){MessageBox.Show(res.ToString());return;}MessageBox.Show("初始化成功!");button1.Enabled = true;button3.Enabled = true;button4.Enabled = true;button6.Enabled = false;}private void Form1_Load(object sender, EventArgs e){//image_path = Application.StartupPath + "\\images\\1.jpg";image_path = Application.StartupPath + "\\test2.jpg";pictureBox1.Image = new Bitmap(image_path);}private void button2_Click(object sender, EventArgs e){OpenFileDialog ofd = new OpenFileDialog();ofd.Filter = fileFilter;if (ofd.ShowDialog() != DialogResult.OK) return;pictureBox1.Image = null;image_path = ofd.FileName;pictureBox1.Image = new Bitmap(image_path);textBox1.Text = "";}StringBuilder ocr_result_texts = new StringBuilder(1024 * 10);StringBuilder ocr_result_words = new StringBuilder(1024 * 100);private void button1_Click(object sender, EventArgs e){if (image_path == ""){return;}textBox1.Text = "";Application.DoEvents();ocr_result_texts.Clear();ocr_result_words.Clear();Mat image = new Mat(image_path);Stopwatch stopwatch = new Stopwatch();stopwatch.Start();int res = Native.ocr(engine, image.CvPtr, ocr_result_texts, ocr_result_words);stopwatch.Stop();double totalTime = stopwatch.Elapsed.TotalSeconds;textBox1.Text += $"耗时: {totalTime:F2}s";textBox1.Text += "\r\n-------------------\r\n";if (res == 0){textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_texts.ToString()), Newtonsoft.Json.Formatting.Indented);textBox1.Text += "\r\n-------------------\r\n";textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_words.ToString()), Newtonsoft.Json.Formatting.Indented);}else{textBox1.Text = "识别失败";}}//绘制文字区域private void button3_Click(object sender, EventArgs e){if (ocr_result_texts.Length == 0){return;}Mat image = new Mat(image_path);List<OcrResTexts> lt = JsonConvert.DeserializeObject<List<OcrResTexts>>(ocr_result_texts.ToString());foreach (OcrResTexts item in lt){string[] pts = item.coordinator.Split(' ');//多边形的顶点OpenCvSharp.Point[] points = new OpenCvSharp.Point[]{new OpenCvSharp.Point(Convert.ToDouble( pts[0]), Convert.ToDouble( pts[1])),new OpenCvSharp.Point(Convert.ToDouble( pts[2]), Convert.ToDouble( pts[3])),new OpenCvSharp.Point(Convert.ToDouble( pts[4]), Convert.ToDouble( pts[5])),new OpenCvSharp.Point(Convert.ToDouble( pts[6]), Convert.ToDouble( pts[7])),};// 绘制多边形Cv2.Polylines(image, new OpenCvSharp.Point[][] { points }, isClosed: true, color: new Scalar(0, 255, 0), thickness: 2);}if (pictureBox1.Image != null){pictureBox1.Image.Dispose();pictureBox1.Image = null;}pictureBox1.Image = new Bitmap(image.ToMemoryStream());image.Dispose();}//绘制单字区域private void button4_Click(object sender, EventArgs e){if (ocr_result_words.Length == 0){return;}Mat image = new Mat(image_path);List<OcrResWords> lt = JsonConvert.DeserializeObject<List<OcrResWords>>(ocr_result_words.ToString());foreach (OcrResWords item in lt){string[] pts = item.coordinator.Split(' ');//left top width heightOpenCvSharp.Rect rect = new Rect((int)Convert.ToDouble(pts[0]), (int)Convert.ToDouble(pts[1]), (int)Convert.ToDouble(pts[2]), (int)Convert.ToDouble(pts[3]));Cv2.Rectangle(image, rect, color: new Scalar(255, 0, 0), thickness: 1);}if (pictureBox1.Image != null){pictureBox1.Image.Dispose();pictureBox1.Image = null;}pictureBox1.Image = new Bitmap(image.ToMemoryStream());image.Dispose();}//识别小语种→private void button5_Click(object sender, EventArgs e){//if (image_path == "")//{// return;//}//textBox1.Text = "";//Application.DoEvents();//ocr_result_texts.Clear();//ocr_result_words.Clear();//Mat image = new Mat(image_path);//Stopwatch stopwatch = new Stopwatch();//stopwatch.Start();//int res = Native.ocr_other(engine, image.CvPtr, ocr_result_texts, ocr_result_words);//stopwatch.Stop();//double totalTime = stopwatch.Elapsed.TotalSeconds;//textBox1.Text += $"耗时: {totalTime:F2}s";//textBox1.Text += "\r\n-------------------\r\n";//if (res == 0)//{// textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_texts.ToString()), Newtonsoft.Json.Formatting.Indented);// textBox1.Text += "\r\n-------------------\r\n";// textBox1.Text += JsonConvert.SerializeObject(JsonConvert.DeserializeObject(ocr_result_words.ToString()), Newtonsoft.Json.Formatting.Indented);//}//else//{// textBox1.Text = "识别失败";//}}}
}