代码分析:
头文件
#include <fstream> //文件
#include <sstream> //流
#include <iostream>
#include <opencv2/dnn.hpp> //深度学习模块-仅提供推理功能
#include <opencv2/imgproc.hpp> //图像处理模块
#include <opencv2/highgui.hpp> //媒体的输入输出/视频捕捉/图像和视频的编码解码/图形界面的接口
命名空间
using namespace cv;
using namespace dnn;
using namespace std;
结构体 Net_config
struct Net_config{float confThreshold; // 置信度阈值float nmsThreshold; // 非最大抑制阈值float objThreshold; // 对象置信度阈值string modelpath;
};
里面存了三个阈值和模型地址,其中置信度,顾名思义,看检测出来的物体的精准度。以测量值为中心,在一定范围内,真值出现在该范围内的几率。
endsWith()函数 判断sub是不是s的子串
int endsWith(string s, string sub) {return s.rfind(sub) == (s.length() - sub.length()) ? 1 : 0;
}
anchors_640图像接收数组
const float anchors_640[3][6] = { {10.0, 13.0, 16.0, 30.0, 33.0, 23.0},{30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0} };
根据图像大小,选择相应长度的二维数组。深度为3
,每层6
个位置。
YOLO类
class YOLO{
public:YOLO(Net_config config); //构造函数void detect(Mat& frame); //通过图像参数,进行目标检测
private:float* anchors;int num_stride;int inpWidth;int inpHeight;vector<string> class_names;int num_class;float confThreshold;float nmsThreshold;float objThreshold;const bool keep_ratio = true;Net net;void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);Mat resize_image(Mat srcimg, int* newh, int* neww, int* top, int* left);
};
YOLO类构造函数的重载
YOLO::YOLO(Net_config config){this->confThreshold = config.confThreshold;this->nmsThreshold = config.nmsThreshold;this->objThreshold = config.objThreshold;this->net = readNet(config.modelpath);ifstream ifs("class.names"); //class.name中写入标签内容,当前只有'person',位置与当前.cpp文件同级string line;while (getline(ifs, line)) this->class_names.push_back(line);this->num_class = class_names.size();if (endsWith(config.modelpath, "6.onnx")){ //根据onnx的输入图像格式 选择分辨率 当前为1280x1280 可灵活调整anchors = (float*)anchors_1280;this->num_stride = 4; //深度this->inpHeight = 1280; //高this->inpWidth = 1280; //宽}else{ //当前为640x640 可以resize满足onnx需求 也可以调整数组或if中的onnx判断anchors = (float*)anchors_640;this->num_stride = 3;this->inpHeight = 640;this->inpWidth = 640;}
}
重新规定图像大小函数 resize_image()
Mat YOLO::resize_image(Mat srcimg, int* newh, int* neww, int* top, int* left){//传入图像以及需要改变的参数int srch = srcimg.rows, srcw = srcimg.cols;*newh = this->inpHeight;*neww = this->inpWidth;Mat dstimg;if (this->keep_ratio && srch != srcw) {float hw_scale = (float)srch / srcw;if (hw_scale > 1) {*newh = this->inpHeight;*neww = int(this->inpWidth / hw_scale);resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);*left = int((this->inpWidth - *neww) * 0.5);copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, BORDER_CONSTANT, 114);}else {*newh = (int)this->inpHeight * hw_scale;*neww = this->inpWidth;resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);*top = (int)(this->inpHeight - *newh) * 0.5;copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, BORDER_CONSTANT, 114);}}else {resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);}return dstimg;
}
绘制预测的边界框函数 drawPred()
void YOLO::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid){//绘制一个显示边界框的矩形rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);//获取类名的标签及其置信度string label = format("%.2f", conf);label = this->class_names[classid] + ":" + label;//在边界框顶部显示标签int baseLine;Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);top = max(top, labelSize.height);//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}
【核心代码】检测函数 detect()
void YOLO::detect(Mat& frame){int newh = 0, neww = 0, padh = 0, padw = 0;Mat dstimg = this->resize_image(frame, &newh, &neww, &padh, &padw);Mat blob = blobFromImage(dstimg, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);this->net.setInput(blob);vector<Mat> outs;this->net.forward(outs, this->net.getUnconnectedOutLayersNames());int num_proposal = outs[0].size[1];int nout = outs[0].size[2];if (outs[0].dims > 2){outs[0] = outs[0].reshape(0, num_proposal);}//生成提案vector<float> confidences;vector<Rect> boxes;vector<int> classIds;float ratioh = (float)frame.rows / newh, ratiow = (float)frame.cols / neww;int n = 0, q = 0, i = 0, j = 0, row_ind = 0; //xmin,ymin,xamx,ymax,box_score,class_scorefloat* pdata = (float*)outs[0].data;for (n = 0; n < this->num_stride; n++){ //特征图尺度const float stride = pow(2, n + 3);int num_grid_x = (int)ceil((this->inpWidth / stride));int num_grid_y = (int)ceil((this->inpHeight / stride));for (q = 0; q < 3; q++){const float anchor_w = this->anchors[n * 6 + q * 2];const float anchor_h = this->anchors[n * 6 + q * 2 + 1];for (i = 0; i < num_grid_y; i++){for (j = 0; j < num_grid_x; j++){float box_score = pdata[4];if (box_score > this->objThreshold){Mat scores = outs[0].row(row_ind).colRange(5, nout);Point classIdPoint;double max_class_socre;//获取最高分的值和位置minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);max_class_socre *= box_score;if (max_class_socre > this->confThreshold){const int class_idx = classIdPoint.x;float cx = pdata[0]; //cxfloat cy = pdata[1]; //cyfloat w = pdata[2]; //wfloat h = pdata[3]; //hint left = int((cx - padw - 0.5 * w) * ratiow);int top = int((cy - padh - 0.5 * h) * ratioh);confidences.push_back((float)max_class_socre);boxes.push_back(Rect(left, top, (int)(w * ratiow), (int)(h * ratioh)));classIds.push_back(class_idx);}}row_ind++;pdata += nout;}}}}// 执行非最大抑制以消除冗余重叠框// 置信度较低vector<int> indices;dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);for (size_t i = 0; i < indices.size(); ++i){int idx = indices[i];Rect box = boxes[idx];this->drawPred(confidences[idx], box.x, box.y,box.x + box.width, box.y + box.height, frame, classIds[idx]);}
}
主函数
int main(){//加载onnx模型Net_config yolo_nets = { 0.3, 0.5, 0.3, "yolov5n_person_320.onnx" };YOLO yolo_model(yolo_nets);//加载单张图片string imgpath = "112.png";Mat srcimg = imread(imgpath);//开始检测yolo_model.detect(srcimg);static const string kWinName = "Deep learning object detection in OpenCV";namedWindow(kWinName, WINDOW_NORMAL);imshow(kWinName, srcimg); //显示图片waitKey(0); //保持停留destroyAllWindows(); //关闭窗口并取消分配任何相关的内存使用
}
完整代码
#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>using namespace cv;
using namespace dnn;
using namespace std;struct Net_config
{float confThreshold; // Confidence thresholdfloat nmsThreshold; // Non-maximum suppression thresholdfloat objThreshold; //Object Confidence thresholdstring modelpath;
};int endsWith(string s, string sub) {return s.rfind(sub) == (s.length() - sub.length()) ? 1 : 0;
}const float anchors_640[3][6] = { {10.0, 13.0, 16.0, 30.0, 33.0, 23.0},{30.0, 61.0, 62.0, 45.0, 59.0, 119.0},{116.0, 90.0, 156.0, 198.0, 373.0, 326.0} };const float anchors_1280[4][6] = { {19, 27, 44, 40, 38, 94},{96, 68, 86, 152, 180, 137},{140, 301, 303, 264, 238, 542},{436, 615, 739, 380, 925, 792} };class YOLO
{
public:YOLO(Net_config config);void detect(Mat& frame);
private:float* anchors;int num_stride;int inpWidth;int inpHeight;vector<string> class_names;int num_class;float confThreshold;float nmsThreshold;float objThreshold;const bool keep_ratio = true;Net net;void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);Mat resize_image(Mat srcimg, int* newh, int* neww, int* top, int* left);
};YOLO::YOLO(Net_config config)
{this->confThreshold = config.confThreshold;this->nmsThreshold = config.nmsThreshold;this->objThreshold = config.objThreshold;this->net = readNet(config.modelpath);ifstream ifs("class.names");string line;while (getline(ifs, line)) this->class_names.push_back(line);this->num_class = class_names.size();if (endsWith(config.modelpath, "6.onnx")){anchors = (float*)anchors_1280;this->num_stride = 4;this->inpHeight = 1280;this->inpWidth = 1280;}else{anchors = (float*)anchors_640;this->num_stride = 3;this->inpHeight = 640;this->inpWidth = 640;}
}Mat YOLO::resize_image(Mat srcimg, int* newh, int* neww, int* top, int* left)
{int srch = srcimg.rows, srcw = srcimg.cols;*newh = this->inpHeight;*neww = this->inpWidth;Mat dstimg;if (this->keep_ratio && srch != srcw) {float hw_scale = (float)srch / srcw;if (hw_scale > 1) {*newh = this->inpHeight;*neww = int(this->inpWidth / hw_scale);resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);*left = int((this->inpWidth - *neww) * 0.5);copyMakeBorder(dstimg, dstimg, 0, 0, *left, this->inpWidth - *neww - *left, BORDER_CONSTANT, 114);}else {*newh = (int)this->inpHeight * hw_scale;*neww = this->inpWidth;resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);*top = (int)(this->inpHeight - *newh) * 0.5;copyMakeBorder(dstimg, dstimg, *top, this->inpHeight - *newh - *top, 0, 0, BORDER_CONSTANT, 114);}}else {resize(srcimg, dstimg, Size(*neww, *newh), INTER_AREA);}return dstimg;
}void YOLO::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid) // Draw the predicted bounding box
{//Draw a rectangle displaying the bounding boxrectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);//Get the label for the class name and its confidencestring label = format("%.2f", conf);label = this->class_names[classid] + ":" + label;//Display the label at the top of the bounding boxint baseLine;Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);top = max(top, labelSize.height);//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}void YOLO::detect(Mat& frame)
{int newh = 0, neww = 0, padh = 0, padw = 0;Mat dstimg = this->resize_image(frame, &newh, &neww, &padh, &padw);Mat blob = blobFromImage(dstimg, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);this->net.setInput(blob);vector<Mat> outs;this->net.forward(outs, this->net.getUnconnectedOutLayersNames());int num_proposal = outs[0].size[1];int nout = outs[0].size[2];if (outs[0].dims > 2){outs[0] = outs[0].reshape(0, num_proposal);}/generate proposalsvector<float> confidences;vector<Rect> boxes;vector<int> classIds;float ratioh = (float)frame.rows / newh, ratiow = (float)frame.cols / neww;int n = 0, q = 0, i = 0, j = 0, row_ind = 0; ///xmin,ymin,xamx,ymax,box_score,class_scorefloat* pdata = (float*)outs[0].data;for (n = 0; n < this->num_stride; n++) ///特征图尺度{const float stride = pow(2, n + 3);int num_grid_x = (int)ceil((this->inpWidth / stride));int num_grid_y = (int)ceil((this->inpHeight / stride));for (q = 0; q < 3; q++) ///anchor{const float anchor_w = this->anchors[n * 6 + q * 2];const float anchor_h = this->anchors[n * 6 + q * 2 + 1];for (i = 0; i < num_grid_y; i++){for (j = 0; j < num_grid_x; j++){float box_score = pdata[4];if (box_score > this->objThreshold){Mat scores = outs[0].row(row_ind).colRange(5, nout);Point classIdPoint;double max_class_socre;// Get the value and location of the maximum scoreminMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);max_class_socre *= box_score;if (max_class_socre > this->confThreshold){const int class_idx = classIdPoint.x;//float cx = (pdata[0] * 2.f - 0.5f + j) * stride; ///cx//float cy = (pdata[1] * 2.f - 0.5f + i) * stride; ///cy//float w = powf(pdata[2] * 2.f, 2.f) * anchor_w; ///w//float h = powf(pdata[3] * 2.f, 2.f) * anchor_h; ///hfloat cx = pdata[0]; ///cxfloat cy = pdata[1]; ///cyfloat w = pdata[2]; ///wfloat h = pdata[3]; ///hint left = int((cx - padw - 0.5 * w) * ratiow);int top = int((cy - padh - 0.5 * h) * ratioh);confidences.push_back((float)max_class_socre);boxes.push_back(Rect(left, top, (int)(w * ratiow), (int)(h * ratioh)));classIds.push_back(class_idx);}}row_ind++;pdata += nout;}}}}// Perform non maximum suppression to eliminate redundant overlapping boxes with// lower confidencesvector<int> indices;dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);for (size_t i = 0; i < indices.size(); ++i){int idx = indices[i];Rect box = boxes[idx];this->drawPred(confidences[idx], box.x, box.y,box.x + box.width, box.y + box.height, frame, classIds[idx]);}
}int main()
{Net_config yolo_nets = { 0.3, 0.5, 0.3, "yolov5n_person_320.onnx" };YOLO yolo_model(yolo_nets);//string imgpath = "112.png";//Mat srcimg = imread(imgpath);//yolo_model.detect(srcimg);int n = 588;for (int i = 1; i <= n; i++) {string s = to_string(i) + ".png";string imgpath = "F://test//p1//yanfa2//bh//cc//" + s;cout << imgpath << endl;Mat srcimg = imread(imgpath);yolo_model.detect(srcimg);imwrite("F://test//p2//yanfa2//bh//cc//" + s, srcimg);}//static const string kWinName = "Deep learning object detection in OpenCV";//namedWindow(kWinName, WINDOW_NORMAL);//imshow(kWinName, srcimg);//waitKey(0);//destroyAllWindows();
}