VS+QT+Opencv使用YOLOv4对视频流进行目标检测

对单张图像的检测，请参考：https://blog.csdn.net/qq_45445740/article/details/109659938

#include <fstream>
#include <sstream>
#include <iostream>
#include <opencv2/dnn.hpp>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>using namespace cv;
using namespace dnn;
using namespace std;// 初始化参数
float confThreshold = 0.5; // 置信度
float nmsThreshold = 0.4;  // NMS
int inpWidth = 416;  // 网络输入图像的宽度
int inpHeight = 416; // 网络输入图像的高度
vector<string> classes;// 使用非最大值抑制去除低置信度的边界框
void postprocess(Mat& frame, const vector<Mat>& out);// 绘制预测的边界框
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);// 获取输出层的名称
vector<String> getOutputsNames(const Net& net);
void detect_image(string image_path, string modelWeights, string modelConfiguration, string classesFile);
void detect_video(string video_path, string modelWeights, string modelConfiguration, string classesFile);int main(int argc, char** argv)
{// 给出模型的配置和权重文件String modelConfiguration = "E:/00000000E/QT/pracitce/PcbDetectv2/01图像识别/model/yolo-obj.cfg";String modelWeights = "E:/00000000E/QT/pracitce/PcbDetectv2/01图像识别/model/yolo-obj_4000.weights";string image_path = "E:/00000000E/QT/pracitce/PcbDetectv2/01图像识别/image/01.jpg";string classesFile = "E:/00000000E/QT/pracitce/PcbDetectv2/01图像识别/model/classes.names";// "coco.names";// detect_image(image_path, modelWeights, modelConfiguration, classesFile);string video_path = "E:/00000000E/QT/pracitce/PcbDetectv2/02视频检测/video/test.mp4";detect_video(video_path, modelWeights, modelConfiguration, classesFile);cv::waitKey(0);return 0;
}void detect_image(string image_path, string modelWeights, string modelConfiguration, string classesFile) 
{// 加载分类类别ifstream ifs(classesFile.c_str());string line;while (getline(ifs, line)) classes.push_back(line);// 加载网络Net net = readNetFromDarknet(modelConfiguration, modelWeights);net.setPreferableBackend(DNN_BACKEND_OPENCV);net.setPreferableTarget(DNN_TARGET_OPENCL);// 打开视频文件或图像文件或摄像机流string str, outputFile;cv::Mat frame = cv::imread(image_path);// Create a windowstatic const string kWinName = "Deep learning object detection in OpenCV";namedWindow(kWinName, WINDOW_NORMAL);Mat blob;blobFromImage(frame, blob, 1 / 255.0, Size(inpWidth, inpHeight), Scalar(0, 0, 0), true, false);// 设置网络输入net.setInput(blob);// 运行前向传递以获得输出层的输出vector<Mat> outs;net.forward(outs, getOutputsNames(net));// 移除低置信度的边界框postprocess(frame, outs);// 返回推断的总时间(t)和每个层的计时(以layersTimes表示)vector<double> layersTimes;double freq = getTickFrequency() / 1000;double t = net.getPerfProfile(layersTimes) / freq;string label = format("Inference time for a frame : %.2f ms", t);putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));// 写入带有检测框的帧imshow(kWinName, frame);cv::waitKey(30);
}void detect_video(string video_path, string modelWeights, string modelConfiguration, string classesFile) 
{string outputFile = "./yolo_out_cpp.avi";;ifstream ifs(classesFile.c_str());string line;while (getline(ifs, line)) classes.push_back(line);Net net = readNetFromDarknet(modelConfiguration, modelWeights);net.setPreferableBackend(DNN_BACKEND_OPENCV);net.setPreferableTarget(DNN_TARGET_CPU);VideoCapture cap;Mat frame, blob;try {// 打开视频文件ifstream ifile(video_path);if (!ifile) throw("error");cap.open(video_path);}catch (...) {cout << "Could not open the input image/video stream" << endl;return;}// Get the video writer initialized to save the output video//  video.open(outputFile, //	VideoWriter::fourcc('M', 'J', 'P', 'G'), 28, Size(cap.get(CAP_PROP_FRAME_WIDTH), cap.get(CAP_PROP_FRAME_HEIGHT)));static const string kWinName = "Deep learning object detection in OpenCV";namedWindow(kWinName, WINDOW_NORMAL);while (waitKey(1) < 0){cap >> frame;if (frame.empty()) {cout << "Done processing !!!" << endl;cout << "Output file is stored as " << outputFile << endl;waitKey(3000);break;}blobFromImage(frame, blob, 1 / 255.0, Size(inpWidth, inpHeight), Scalar(0, 0, 0), true, false);net.setInput(blob);vector<Mat> outs;net.forward(outs, getOutputsNames(net));postprocess(frame, outs);vector<double> layersTimes;double freq = getTickFrequency() / 1000;double t = net.getPerfProfile(layersTimes) / freq;string label = format("Inference time for a frame : %.2f ms", t);putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 0, 255));Mat detectedFrame;frame.convertTo(detectedFrame, CV_8U);imshow(kWinName, frame);}cap.release();
}void postprocess(Mat& frame, const vector<Mat>& outs)
{vector<int> classIds;vector<float> confidences;vector<Rect> boxes;for (size_t i = 0; i < outs.size(); ++i){// 扫描网络输出的所有边界框，只保留置信度高的边界框。将盒子的类标签指定为盒子得分最高的类float* data = (float*)outs[i].data;for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols){Mat scores = outs[i].row(j).colRange(5, outs[i].cols);Point classIdPoint;double confidence;// 获取最高分的值和位置minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);if (confidence > confThreshold){int centerX = (int)(data[0] * frame.cols);int centerY = (int)(data[1] * frame.rows);int width = (int)(data[2] * frame.cols);int height = (int)(data[3] * frame.rows);int left = centerX - width / 2;int top = centerY - height / 2;classIds.push_back(classIdPoint.x);confidences.push_back((float)confidence);boxes.push_back(Rect(left, top, width, height));}}}// 执行非最大抑制以消除置信度较低的冗余重叠框vector<int> indices;NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);for (size_t i = 0; i < indices.size(); ++i){int idx = indices[i];Rect box = boxes[idx];drawPred(classIds[idx], confidences[idx], box.x, box.y,box.x + box.width, box.y + box.height, frame);}}// 绘制预测框
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{// 绘制显示边界框的矩形rectangle(frame, Point(left, top), Point(right, bottom), Scalar(255, 178, 50), 3);// 获取类名及其置信度的标签string label = format("%.2f", conf);if (!classes.empty()){CV_Assert(classId < (int)classes.size());label = classes[classId] + ":" + label;}// 在边界框的顶部显示标签int baseLine;Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);top = max(top, labelSize.height);rectangle(frame, Point(left, top - round(1.5 * labelSize.height)), Point(left + round(1.5 * labelSize.width), top + baseLine), Scalar(255, 255, 255), FILLED);putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 0, 0), 1);
}vector<String> getOutputsNames(const Net& net)
{static vector<String> names;if (names.empty()){// 获取输出层的索引，即输出不相连的层vector<int> outLayers = net.getUnconnectedOutLayers();vector<String> layersNames = net.getLayerNames();names.resize(outLayers.size());for (size_t i = 0; i < outLayers.size(); ++i)names[i] = layersNames[outLayers[i] - 1];}return names;
}