libtorch推理
#include <iostream>
#include <algorithm>
#include <opencv2/opencv.hpp>
#include <torch/script.h>
#include <torch/torch.h>std::vector<torch::Tensor> non_max_suppression(torch::Tensor preds, float score_thresh = 0.01, float iou_thresh = 0.35)
{std::vector<torch::Tensor> output;for (size_t i = 0; i < preds.sizes()[0]; ++i){torch::Tensor pred = preds.select(0, i);pred = pred.to(at::kCPU); torch::Tensor scores = pred.select(1, 4) * std::get<0>(torch::max(pred.slice(1, 5, pred.sizes()[1]), 1));pred = torch::index_select(pred, 0, torch::nonzero(scores > score_thresh).select(1, 0));if (pred.sizes()[0] == 0) continue;pred.select(1, 0) = pred.select(1, 0) - pred.select(1, 2) / 2;pred.select(1, 1) = pred.select(1, 1) - pred.select(1, 3) / 2;pred.select(1, 2) = pred.select(1, 0) + pred.select(1, 2);pred.select(1, 3) = pred.select(1, 1) + pred.select(1, 3);std::tuple<torch::Tensor, torch::Tensor> max_tuple = torch::max(pred.slice(1, 5, pred.sizes()[1]), 1);pred.select(1, 4) = pred.select(1, 4) * std::get<0>(max_tuple);pred.select(1, 5) = std::get<1>(max_tuple);torch::Tensor dets = pred.slice(1, 0, 6);torch::Tensor keep = torch::empty({ dets.sizes()[0] });torch::Tensor areas = (dets.select(1, 3) - dets.select(1, 1)) * (dets.select(1, 2) - dets.select(1, 0));std::tuple<torch::Tensor, torch::Tensor> indexes_tuple = torch::sort(dets.select(1, 4), 0, 1);torch::Tensor v = std::get<0>(indexes_tuple);torch::Tensor indexes = std::get<1>(indexes_tuple);int count = 0;while (indexes.sizes()[0] > 0){keep[count] = (indexes[0].item().toInt());count += 1;torch::Tensor lefts = torch::empty(indexes.sizes()[0] - 1);torch::Tensor tops = torch::empty(indexes.sizes()[0] - 1);torch::Tensor rights = torch::empty(indexes.sizes()[0] - 1);torch::Tensor bottoms = torch::empty(indexes.sizes()[0] - 1);torch::Tensor widths = torch::empty(indexes.sizes()[0] - 1);torch::Tensor heights = torch::empty(indexes.sizes()[0] - 1);for (size_t i = 0; i < indexes.sizes()[0] - 1; ++i){lefts[i] = std::max(dets[indexes[0]][0].item().toFloat(), dets[indexes[i + 1]][0].item().toFloat());tops[i] = std::max(dets[indexes[0]][1].item().toFloat(), dets[indexes[i + 1]][1].item().toFloat());rights[i] = std::min(dets[indexes[0]][2].item().toFloat(), dets[indexes[i + 1]][2].item().toFloat());bottoms[i] = std::min(dets[indexes[0]][3].item().toFloat(), dets[indexes[i + 1]][3].item().toFloat());widths[i] = std::max(float(0), rights[i].item().toFloat() - lefts[i].item().toFloat());heights[i] = std::max(float(0), bottoms[i].item().toFloat() - tops[i].item().toFloat());}torch::Tensor overlaps = widths * heights;torch::Tensor ious = overlaps / (areas.select(0, indexes[0].item().toInt()) + torch::index_select(areas, 0, indexes.slice(0, 1, indexes.sizes()[0])) - overlaps);indexes = torch::index_select(indexes, 0, torch::nonzero(ious <= iou_thresh).select(1, 0) + 1);}keep = keep.toType(torch::kInt64);output.push_back(torch::index_select(dets, 0, keep.slice(0, 0, count)));}return output;
}int main(int argc, char* argv[])
{torch::DeviceType device_type = at::kCPU; if (torch::cuda::is_available()){device_type = at::kCUDA;std::cout << "cuda::is_available():" << torch::cuda::is_available() << std::endl;}torch::jit::script::Module module = torch::jit::load("yolov5n-gpu.torchscript");module.to(device_type); std::vector<std::string> classnames;std::ifstream f("class_det.txt");std::string name;while (std::getline(f, name)){classnames.push_back(name);}cv::Mat frame = cv::imread("bus.jpg"), img;cv::resize(frame, img, cv::Size(640, 640));cv::cvtColor(img, img, cv::COLOR_BGR2RGB); img.convertTo(img, CV_32FC3, 1.0f / 255.0f); auto imgTensor = torch::from_blob(img.data, { 1, img.rows, img.cols, img.channels() }).to(device_type);imgTensor = imgTensor.permute({ 0, 3, 1, 2 }).contiguous(); std::vector<torch::jit::IValue> inputs;inputs.emplace_back(imgTensor);torch::jit::IValue output = module.forward(inputs);auto preds = output.toTuple()->elements()[0].toTensor();std::vector<torch::Tensor> dets = non_max_suppression(preds, 0.35, 0.5);if (dets.size() > 0){for (size_t i = 0; i < dets[0].sizes()[0]; ++i){float left = dets[0][i][0].item().toFloat() * frame.cols / 640;float top = dets[0][i][1].item().toFloat() * frame.rows / 640;float right = dets[0][i][2].item().toFloat() * frame.cols / 640;float bottom = dets[0][i][3].item().toFloat() * frame.rows / 640;float score = dets[0][i][4].item().toFloat();int classID = dets[0][i][5].item().toInt();cv::rectangle(frame, cv::Rect(left, top, (right - left), (bottom - top)), cv::Scalar(0, 255, 0), 2);cv::putText(frame, classnames[classID] + ": " + cv::format("%.2f", score),cv::Point(left, top), cv::FONT_HERSHEY_SIMPLEX, (right - left) / 200, cv::Scalar(0, 255, 0), 2);}}cv::imshow("", frame);cv::waitKey();return 0;
}
onnxruntime推理
#include <iostream>
#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>void LetterBox(const cv::Mat& image, cv::Mat& outImage,cv::Vec4d& params, const cv::Size& newShape = cv::Size(640, 640),bool autoShape = false,bool scaleFill = false,bool scaleUp = true,int stride = 32,const cv::Scalar& color = cv::Scalar(114, 114, 114))
{cv::Size shape = image.size();float r = std::min((float)newShape.height / (float)shape.height, (float)newShape.width / (float)shape.width);if (!scaleUp){r = std::min(r, 1.0f);}float ratio[2]{ r, r };int new_un_pad[2] = { (int)std::round((float)shape.width * r),(int)std::round((float)shape.height * r) };auto dw = (float)(newShape.width - new_un_pad[0]);auto dh = (float)(newShape.height - new_un_pad[1]);if (autoShape){dw = (float)((int)dw % stride);dh = (float)((int)dh % stride);}else if (scaleFill){dw = 0.0f;dh = 0.0f;new_un_pad[0] = newShape.width;new_un_pad[1] = newShape.height;ratio[0] = (float)newShape.width / (float)shape.width;ratio[1] = (float)newShape.height / (float)shape.height;}dw /= 2.0f;dh /= 2.0f;if (shape.width != new_un_pad[0] && shape.height != new_un_pad[1])cv::resize(image, outImage, cv::Size(new_un_pad[0], new_un_pad[1]));elseoutImage = image.clone();int top = int(std::round(dh - 0.1f));int bottom = int(std::round(dh + 0.1f));int left = int(std::round(dw - 0.1f));int right = int(std::round(dw + 0.1f));params[0] = ratio[0];params[1] = ratio[1];params[2] = left;params[3] = top;cv::copyMakeBorder(outImage, outImage, top, bottom, left, right, cv::BORDER_CONSTANT, color);
}void nms(std::vector<cv::Rect>& boxes, std::vector<float>& confs, std::vector<int>& classIds, float threshold)
{assert(boxes.size() == confs.size());struct BoxConf{cv::Rect box;float conf;int id;};std::vector<BoxConf> boxes_confs;for (size_t i = 0; i < boxes.size(); i++){BoxConf box_conf;box_conf.box = boxes[i];box_conf.conf = confs[i];box_conf.id = classIds[i];boxes_confs.push_back(box_conf);}std::sort(boxes_confs.begin(), boxes_confs.end(), [](BoxConf a, BoxConf b) { return a.conf > b.conf; });std::vector<float> area(boxes_confs.size());for (size_t i = 0; i < boxes_confs.size(); ++i){area[i] = boxes_confs[i].box.width * boxes_confs[i].box.height;}std::vector<bool> isSuppressed(boxes_confs.size(), false);for (size_t i = 0; i < boxes_confs.size(); ++i){if (isSuppressed[i]) { continue; }for (size_t j = i + 1; j < boxes_confs.size(); ++j){if (isSuppressed[j]) { continue; }float x1 = (std::max)(boxes_confs[i].box.x, boxes_confs[j].box.x);float y1 = (std::max)(boxes_confs[i].box.y, boxes_confs[j].box.y);float x2 = (std::min)(boxes_confs[i].box.x + boxes_confs[i].box.width, boxes_confs[j].box.x + boxes_confs[j].box.width);float y2 = (std::min)(boxes_confs[i].box.y + boxes_confs[i].box.height, boxes_confs[j].box.y + boxes_confs[j].box.height);float w = (std::max)(0.0f, x2 - x1);float h = (std::max)(0.0f, y2 - y1);float inter = w * h;float ovr = inter / (area[i] + area[j] - inter);if (ovr >= threshold) { isSuppressed[j] = true; }}}boxes.clear();confs.clear();classIds.clear();for (int i = 0; i < boxes_confs.size(); ++i){if (!isSuppressed[i]){boxes.push_back(boxes_confs[i].box);confs.push_back(boxes_confs[i].conf);classIds.push_back(boxes_confs[i].id);}}
}int main(int argc, char* argv[])
{std::string imgpath = "zidane.jpg";Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "yolov5s");Ort::SessionOptions session_options;session_options.SetIntraOpNumThreads(1);session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED);OrtCUDAProviderOptions cuda_option;cuda_option.device_id = 0;cuda_option.arena_extend_strategy = 0;cuda_option.cudnn_conv_algo_search = OrtCudnnConvAlgoSearchExhaustive;cuda_option.gpu_mem_limit = SIZE_MAX;cuda_option.do_copy_in_default_stream = 1;session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_ALL);session_options.AppendExecutionProvider_CUDA(cuda_option);const wchar_t* model_path = L"yolov5s-det.onnx"; std::vector<std::string> class_names;std::string classesFile = "coco.names";std::ifstream ifs(classesFile.c_str());std::string line;while (getline(ifs, line)) class_names.push_back(line);Ort::Session session(env, model_path, session_options);Ort::AllocatorWithDefaultOptions allocator;size_t num_input_nodes = session.GetInputCount();std::vector<const char*> input_node_names = { "images" };std::vector<const char*> output_node_names = { "output" };const size_t input_tensor_size = 3 * 640 * 640;std::vector<float> input_tensor_values(input_tensor_size);cv::Mat srcimg = cv::imread(imgpath);int newh = 0, neww = 0, padh = 0, padw = 0;cv::Mat dstimg;cv::Vec4d params;cv::Mat letterbox;LetterBox(srcimg, dstimg, params);for (int c = 0; c < 3; c++){for (int i = 0; i < 640; i++){for (int j = 0; j < 640; j++){float pix = dstimg.ptr<uchar>(i)[j * 3 + 2 - c];input_tensor_values[c * 640 * 640 + i * 640 + size_t(j)] = pix / 255.0;}}}std::vector<int64_t> input_node_dims = { 1, 3, 640, 640 };auto memory_info = Ort::MemoryInfo::CreateCpu(OrtArenaAllocator, OrtMemTypeDefault);Ort::Value input_tensor = Ort::Value::CreateTensor<float>(memory_info, input_tensor_values.data(), input_tensor_size, input_node_dims.data(), input_node_dims.size());std::vector<Ort::Value> ort_inputs;ort_inputs.push_back(std::move(input_tensor));std::vector<Ort::Value> output_tensors;for (int i = 0; i < 10; i++){clock_t start_time = clock();output_tensors = session.Run(Ort::RunOptions{ nullptr }, input_node_names.data(), ort_inputs.data(), input_node_names.size(), output_node_names.data(), output_node_names.size());clock_t end_time = clock();std::cout << "inference time: " << static_cast<double>(end_time - start_time) / CLOCKS_PER_SEC << " seconds" << std::endl;}const float* rawOutput = output_tensors[0].GetTensorData<float>();std::vector<int64_t> outputShape = output_tensors[0].GetTensorTypeAndShapeInfo().GetShape();size_t count = output_tensors[0].GetTensorTypeAndShapeInfo().GetElementCount();std::vector<float> output(rawOutput, rawOutput + count);std::vector<cv::Rect> boxes;std::vector<float> confs;std::vector<int> classIds;int numClasses = (int)outputShape[2] - 5;int elementsInBatch = (int)(outputShape[1] * outputShape[2]);float confThreshold = 0.5; float iouThreshold = 0.5; for (auto it = output.begin(); it != output.begin() + elementsInBatch; it += outputShape[2]){float clsConf = *(it + 4);if (clsConf > confThreshold){int centerX = (int)(*it);int centerY = (int)(*(it + 1));int width = (int)(*(it + 2));int height = (int)(*(it + 3));int x1 = centerX - width / 2;int y1 = centerY - height / 2;boxes.emplace_back(cv::Rect(x1, y1, width, height));int bestClassId = -1;float bestConf = 0.0;for (int i = 5; i < numClasses + 5; i++){if ((*(it + i)) > bestConf){bestConf = it[i];bestClassId = i - 5;}}confs.push_back(clsConf);classIds.push_back(bestClassId);}}nms(boxes, confs, classIds, iouThreshold);for (size_t i = 0; i < boxes.size(); ++i){cv::rectangle(dstimg, cv::Point(boxes[i].tl().x, boxes[i].tl().y), cv::Point(boxes[i].br().x, boxes[i].br().y), cv::Scalar(0, 0, 255), 1);cv::putText(dstimg, class_names[classIds[i]] + " " + std::to_string(confs[i]), cv::Point(boxes[i].tl().x, boxes[i].tl().y - 5), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(255, 0, 0), 1);}cv::imshow("result", dstimg);cv::waitKey();return 0;
}