YOLOv10部署教程,使用tensorRT部署,有转化和推理代码
- 一、使用平台
- 1. 转化onnx模型
- 转化trt模型
- 模型推理
- 全部的代码
论文题目:YOLOv10: Real-Time End-to-End Object Detection
研究单位:清华大学
论文链接:http://arxiv.org/abs/2405.14458
代码链接:https://github.com/THU-MIG/yolov10
作者提供的模型性能评价图,如下:
YOLOv10-N:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10n.pt
YOLOv10-S:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10s.pt
YOLOv10-M:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10m.pt
YOLOv10-B:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10b.pt
YOLOv10-L:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10l.pt
YOLOv10-X:https://github.com/jameslahm/yolov10/releases/download/v1.0/yolov10x.pt
推理时间速度很快,最主要是不需要后处理,就是网络比较难训练,有spa多占用了几g显存,并且收敛较慢
一、使用平台
win10、TensorRT=8.6.1
1. 转化onnx模型
git clone https://github.com/THU-MIG/yolov10.git
conda create -n YOLO python=3.9
conda activate YOLO
cd yolov10
pip install -r requirements.txt
下载pt模型
用下面代码转化
# -*- coding: utf-8 -*-
# @Time : 2024/6/13 10:54
# @Site :
# @File : export.py
# @Comment :
from ultralytics import YOLOv10# Load a model
model = YOLOv10(r"yolov10s.pt") # load an official model# Export the model
model.export(format="onnx",device='0',batch=2,opset=12, half=True)"""
Argument Type Default Description
format str 'torchscript' Target format for the exported model, such as 'onnx', 'torchscript', 'tensorflow', or others, defining compatibility with various deployment environments.
imgsz int or tuple 640 Desired image size for the model input. Can be an integer for square images or a tuple (height, width) for specific dimensions.
keras bool False Enables export to Keras format for TensorFlow SavedModel, providing compatibility with TensorFlow serving and APIs.
optimize bool False Applies optimization for mobile devices when exporting to TorchScript, potentially reducing model size and improving performance.
half bool False Enables FP16 (half-precision) quantization, reducing model size and potentially speeding up inference on supported hardware.
int8 bool False Activates INT8 quantization, further compressing the model and speeding up inference with minimal accuracy loss, primarily for edge devices.
dynamic bool False Allows dynamic input sizes for ONNX and TensorRT exports, enhancing flexibility in handling varying image dimensions.
simplify bool False Simplifies the model graph for ONNX exports with onnxslim, potentially improving performance and compatibility.
opset int None Specifies the ONNX opset version for compatibility with different ONNX parsers and runtimes. If not set, uses the latest supported version.
workspace float 4.0 Sets the maximum workspace size in GiB for TensorRT optimizations, balancing memory usage and performance.
nms bool False Adds Non-Maximum Suppression (NMS) to the CoreML export, essential for accurate and efficient detection post-processing.
batch int 1 Specifies export model batch inference size or the max number of images the exported model will process concurrently in predict mode.
"""
转化trt模型
import onnx
import tensorrt as trt
# import sys
# sys.setrecursionlimit(500000)def onnx_export_engine(workspace,onnx_path,trt_path):#创建构建器logger=trt.Logger(trt.Logger.WARNING)builder=trt.Builder(logger)#创建一个构建配置config=builder.create_builder_config()config.max_workspace_size=workspace*1<<30#创建网络定义flag=(1<<int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH))network=builder.create_network(flag)#导入onnx模型parser=trt.OnnxParser(network,logger)if not parser.parse_from_file(str(onnx_path)):raise RuntimeError(f'failed to load ONNX file: {onnx}')inputs=[network.get_input(i) for i in range(network.num_inputs)]outputs=[network.get_output(i) for i in range(network.num_outputs)]# network.get_input(0).setAllowedFormats(int)# network.get_input(1).setAllowedFormats(int)# for inp in inputs:# LOGGER.info(f'{prefix}\tinput "{inp.name}" with shape {inp.shape} and dtype {inp.dtype}')# for out in outputs:# LOGGER.info(f'{prefix}\toutput "{out.name}" with shape {out.shape} and dtype {out.dtype}')## LOGGER.info(f'{prefix} building FP{16 if builder.platform_has_fast_fp16 else 32} engine in {f}')# if builder.platform_has_fast_fp16:## config.set_flag(trt.BuilderFlag.FP16)# config.set_flag(trt.BuilderFlag.FP16)engine_path=trt_pathwith builder.build_serialized_network(network,config) as engine:with open(engine_path,'wb') as t:# t.write(engine.serialize())t.write(engine)print('转化完成')if __name__ == '__main__':onnx_path='weights2/best.onnx'trt_path='end2end.engine'onnx_export_engine(4,onnx_path,trt_path)
模型推理
- 定义变量
from models import TRTModule # isort:skip
import argparse
import cv2
from numpy import ndarray
import time
import random
import numpy as np
import os
import pickle
from collections import defaultdict, namedtuple
from pathlib import Path
from typing import List, Optional, Tuple, Union
import onnx
import tensorrt as trt
import torchos.environ['CUDA_MODULE_LOADING'] = 'LAZY'
random.seed(0)# detection model classes
CLASSES = ('person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus','train', 'truck', 'boat', 'traffic light', 'fire hydrant','stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog','horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe','backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee','skis', 'snowboard', 'sports ball', 'kite', 'baseball bat','baseball glove', 'skateboard', 'surfboard', 'tennis racket','bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl','banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot','hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch','potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop','mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven','toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase','scissors', 'teddy bear', 'hair drier', 'toothbrush')
# # three:
# CLASSES = (
# 'person', 'sports ball', 'car'
# )# colors for per classes
COLORS = {cls: [random.randint(0, 255) for _ in range(3)]for i, cls in enumerate(CLASSES)
}
# image suffixs
SUFFIXS = ('.bmp', '.dng', '.jpeg', '.jpg', '.mpo', '.png', '.tif', '.tiff','.webp', '.pfm')
- 定义模型加载类
class TRTModule(torch.nn.Module):dtypeMapping = {trt.bool: torch.bool,trt.int8: torch.int8,trt.int32: torch.int32,trt.float16: torch.float16,trt.float32: torch.float32}def __init__(self, weight: Union[str, Path],device: Optional[torch.device]) -> None:super(TRTModule, self).__init__()self.weight = Path(weight) if isinstance(weight, str) else weightself.device = device if device is not None else torch.device('cuda:0')self.stream = torch.cuda.Stream(device=device)self.__init_engine()self.__init_bindings()def __init_engine(self) -> None:logger = trt.Logger(trt.Logger.WARNING)trt.init_libnvinfer_plugins(logger, namespace='')with trt.Runtime(logger) as runtime:model = runtime.deserialize_cuda_engine(self.weight.read_bytes())context = model.create_execution_context()num_bindings = model.num_bindingsnames = [model.get_binding_name(i) for i in range(num_bindings)]self.bindings: List[int] = [0] * num_bindingsnum_inputs, num_outputs = 0, 0for i in range(num_bindings):if model.binding_is_input(i):num_inputs += 1else:num_outputs += 1self.num_bindings = num_bindingsself.num_inputs = num_inputsself.num_outputs = num_outputsself.model = modelself.context = contextself.input_names = names[:num_inputs]self.output_names = names[num_inputs:]self.idx = list(range(self.num_outputs))def __init_bindings(self) -> None:idynamic = odynamic = FalseTensor = namedtuple('Tensor', ('name', 'dtype', 'shape'))inp_info = []out_info = []for i, name in enumerate(self.input_names):assert self.model.get_binding_name(i) == namedtype = self.dtypeMapping[self.model.get_binding_dtype(i)]shape = tuple(self.model.get_binding_shape(i))if -1 in shape:idynamic |= Trueinp_info.append(Tensor(name, dtype, shape))for i, name in enumerate(self.output_names):i += self.num_inputsassert self.model.get_binding_name(i) == namedtype = self.dtypeMapping[self.model.get_binding_dtype(i)]shape = tuple(self.model.get_binding_shape(i))if -1 in shape:odynamic |= Trueout_info.append(Tensor(name, dtype, shape))if not odynamic:self.output_tensor = [torch.empty(info.shape, dtype=info.dtype, device=self.device)for info in out_info]self.idynamic = idynamicself.odynamic = odynamicself.inp_info = inp_infoself.out_info