yolov8-seg分割模型TensorRt部署，去掉torch

已完成的yolov8-seg分割模型TensorRt部署

准备
下载yolov8-seg模型
转化为onnx和trt
推理
- 写好的推理接口

准备

https://github.com/songjiahao-wq/yolov8_seg_trtinference.git下载代码
安装TensorRt=8.6版本，以及pip install -r requirements.txt

下载yolov8-seg模型

转化为onnx和trt

转化方法如下：

# tensorRT==8.6
## yolov8-seg CLI指令
### 转化ONNX模型
`python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0``python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 448 512 --device cuda:0`
### 导出trt模型
`python build.py --weights yolov8m-seg.onnx --fp16  --device cuda:0 --seg`
### 采用trtexec导出trt模型
`E:\Download\TensorRT-10.0.1.6\bin/trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16`
### 不需要torch环境推理
`python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart`
### 需要torch环境推理
`python infer-seg.py`- [x] infer-seg-without-torch-port.py 调用接口，每次只保存mask.txt
- [x] infer-seg-without-torch.py 不需要torch调用，有cuda和pycuda

首先转化为onnx模型

python export-seg.py --weights yolov8m-seg.pt --opset 14 --sim --input-shape 1 3 640 640 --device cuda:0

然后转化为trt模型
有两种转化方式：
代码转化：python build.py --weights yolov8m-seg.onnx --fp16 --device cuda:0 --seg
trtexec转化：trtexec --onnx=yolov8m-seg.onnx --saveEngine=yolov8s-seg.engine --fp16

推理

推理方法有两种：
cudart推理，不包含torch

python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method cudart
pycuda推理，不包含torch
`python infer-seg-without-torch.py --engine yolov8m-seg.engine --imgs data --show --out-dir outputs --method pycuda
带torch的推理
python infer-seg.py

写好的推理接口

import argparse
import time
from pathlib import Pathimport cv2
import numpy as npfrom config import ALPHA, CLASSES, COLORS, MASK_COLORS
from models.utils import blob, letterbox, path_to_list, seg_postprocess
import torchdef clip_segments(segments, shape):"""Clips segment coordinates (xy1, xy2, ...) to an image's boundaries given its shape (height, width)."""if isinstance(segments, torch.Tensor):  # faster individuallysegments[:, 0].clamp_(0, shape[1])  # xsegments[:, 1].clamp_(0, shape[0])  # yelse:  # np.array (faster grouped)segments[:, 0] = segments[:, 0].clip(0, shape[1])  # xsegments[:, 1] = segments[:, 1].clip(0, shape[0])  # ydef scale_segments(img1_shape, segments, img0_shape, ratio_pad=None, normalize=False):"""Rescales segment coordinates from img1_shape to img0_shape, optionally normalizing them with custom padding."""if ratio_pad is None:  # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / newpad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]segments[:, 0] -= pad[0]  # x paddingsegments[:, 1] -= pad[1]  # y paddingsegments /= gainclip_segments(segments, img0_shape)if normalize:segments[:, 0] /= img0_shape[1]  # widthsegments[:, 1] /= img0_shape[0]  # heightreturn segmentsdef masks2segments(masks, strategy="largest"):"""Converts binary (n,160,160) masks to polygon segments with options for concatenation or selecting the largestsegment."""segments = []for x in masks.int().cpu().numpy().astype("uint8"):c = cv2.findContours(x, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)[0]if c:if strategy == "concat":  # concatenate all segmentsc = np.concatenate([x.reshape(-1, 2) for x in c])elif strategy == "largest":  # select largest segmentc = np.array(c[np.array([len(x) for x in c]).argmax()]).reshape(-1, 2)else:c = np.zeros((0, 2))  # no segments foundsegments.append(c.astype("float32"))return segmentsdef keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0):# 组合成新的检测结果数组det = np.hstack((bboxes, scores[:, np.newaxis], labels[:, np.newaxis], np.array(segments)[:, np.newaxis]))if det.shape[0] == 0:return det  # 如果没有检测到任何对象，直接返回unique_classes = np.unique(det[:, 5])  # 获取所有独特的类标签max_conf_indices = []# 对每一个类别找到最高置信度的检测框cls_mask = det[:, 5] == classes  # 找到所有该类别的检测框cls_detections = det[cls_mask]  # 提取该类别的所有检测框# 计算每个检测框的面积areas = (cls_detections[:, 2] - cls_detections[:, 0]) * (cls_detections[:, 3] - cls_detections[:, 1])# 合并置信度和面积为一个复合评分，这里用置信度 + 面积的小部分作为评分scores_combined = cls_detections[:, 4] * 0.1 + 1.0 * areas# 找到评分最高的检测框max_score_index = np.argmax(scores_combined)# 找到原始的索引original_max_conf_index = np.where(cls_mask)[0][max_score_index]max_conf_indices.append(original_max_conf_index)# 选取评分最高的检测框return det[max_conf_indices][:, :4], det[max_conf_indices][:, 4], det[max_conf_indices][:, 5], det[max_conf_indices][:,6], max_conf_indicesclass YOLOv8_seg_main:def __init__(self, args: argparse.Namespace):if args.method == 'cudart':from models.cudart_api import TRTEngineelif args.method == 'pycuda':from models.pycuda_api import TRTEngineelse:raise NotImplementedErrorself.Engine = TRTEngine(args.engine)self.H, self.W = self.Engine.inp_info[0].shape[-2:]self.args = argsdef main(self, bgr, imagename, outtxtdir) -> None:outtxtdir = Path(outtxtdir)save_path = Path(args.out_dir)if not self.args.show and not save_path.exists():save_path.mkdir(parents=True, exist_ok=True)draw = bgr.copy()bgr, ratio, dwdh = letterbox(bgr, (self.W, self.H))dw, dh = int(dwdh[0]), int(dwdh[1])rgb = cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB)tensor, seg_img = blob(rgb, return_seg=True)dwdh = np.array(dwdh * 2, dtype=np.float32)tensor = np.ascontiguousarray(tensor)# inferencedata = self.Engine(tensor)seg_img = seg_img[dh:self.H - dh, dw:self.W - dw, [2, 1, 0]]bboxes, scores, labels, masks = seg_postprocess(data, bgr.shape[:2], self.args.conf_thres, self.args.iou_thres)if bboxes.size == 0:# if no bounding boxassert print(f'image: no object!')masks = masks[:, dh:self.H - dh, dw:self.W - dw, :]segments = [scale_segments(tensor.shape[2:], x, rgb.shape, normalize=True)for x in reversed(masks2segments(torch.from_numpy(masks)))]bboxes -= dwdhbboxes /= ratio# 应用 keep_highest_conf_per_class 函数bboxes, scores, labels, segments, max_conf_indices = keep_highest_conf_per_class(bboxes, scores, labels, segments, classes=0)if args.show:masks = masks[max_conf_indices]mask_colors = MASK_COLORS[0]mask_colors = mask_colors.reshape(-1, 1, 1, 3) * ALPHAmask_colors = masks @ mask_colorsinv_alph_masks = (1 - masks * 0.5).cumprod(0)mcs = (mask_colors * inv_alph_masks).sum(0) * 2seg_img = (seg_img * inv_alph_masks[-1] + mcs) * 255draw = cv2.resize(seg_img.astype(np.uint8), draw.shape[:2][::-1])if args.save_txt:seg = segments[0].reshape(-1)  # (n,2) to (n*2)line = (int(labels[0]), *seg)  # label formatwith open(outtxtdir / f"{Path(imagename).stem}.txt", "w") as f:f.write(("%g " * len(line)).rstrip() % line + "\n")if args.show:save_image = save_path / Path(imagename).namecv2.imwrite(str(save_image), draw)def parse_args():parser = argparse.ArgumentParser()parser.add_argument('--engine', type=str, default="../yolov8l-seg.engine", help='Engine file')parser.add_argument('--imgs', type=str, default="data", help='Images file')parser.add_argument('--show',action='store_true',default=False,help='Show the detection results')parser.add_argument('--save_txt',action='store_true',default=True,help='save_txt the detection results')parser.add_argument('--out-dir',type=str,default='./output',help='Path to output file')parser.add_argument('--conf-thres',type=float,default=0.25,help='Confidence threshold')parser.add_argument('--iou-thres',type=float,default=0.25,help='Confidence threshold')parser.add_argument('--method',type=str,default='cudart',help='CUDART pipeline')args = parser.parse_args()return argsif __name__ == '__main__':args = parse_args()YOLOv8_seg_main = YOLOv8_seg_main(args)imgpath = './data/1.jpg'outtxtdir = './output'bgr_img = cv2.imread(imgpath)t1 = time.time()for i in range(100):YOLOv8_seg_main.main(bgr_img, imgpath, outtxtdir)print(time.time() - t1)