为了适配C++工程代码,我在导出onnx模型时,会把models/yolo.py里面的forward函数改成下面这样,
#转模型def forward(self, x):z = [] # inference outputfor i in range(self.nl):x[i] = self.m[i](x[i]) # convbs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()if not self.training: # inferenceif self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)y = x[i].sigmoid()if self.inplace:y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xyy[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh z.append(y.view(bs, -1, self.no)) else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xywh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # whanchor, conf, prob = torch.split(y, [4, 1, self.nc], dim=4)# add a idx (label ids before prob)# oriidxs = torch.argmax(prob, dim=-1).unsqueeze(axis=-1).type(x[i].dtype)# new#idxs = torch.max(prob, dim=-1)[1].data.unsqueeze(axis=-1).type(x[i].dtype)y = torch.cat((xy, wh, conf, idxs, prob), -1)z.append(y.view(bs, -1, self.no + 1))return x if self.training else (torch.cat(z, 1))
也就是把后面类别得分中最大的那个计算出来赋值给idxs,
原来的yolov5输出是x y w h box_score label1_confidence label2_confidence .... labeln_confidence.
我改完之后,输出变成x y w h box_score idxs label1_confidence label2_confidence .... labeln_confidence.
然后之前我都是在转onnx之前手动的去改代码,然后转完模型再改回来因为train和detect也要用到这个yolo.py中的forward函数,但是后来某项目中,要实现一个自动训练、自动检测、自动转模型,这就不能我手动改了,所以我第一个方法是我复制一份yolo.py复制成yolo_onnx.py,然后export.py中from models.yolo_onnx import Detect,这种方法不可行,因为其他还有还有很多地方也是用的from models.yolo import Detect,最后用的方法如下:
首先在yolo.py中的Detect类中增加一个成员export
class Detect(nn.Module):stride = None # strides computed during buildonnx_dynamic = False # ONNX export parameterexport = False #增加的成员......
然后我在export.py的run函数中给这个值赋值为true
@torch.no_grad()
def run(data=ROOT / 'data/coco128.yaml', # 'dataset.yaml path'weights=ROOT / 'yolov5s.pt', # weights pathimgsz=(640, 640), # image (height, width)batch_size=1, # batch sizedevice='cpu', # cuda device, i.e. 0 or 0,1,2,3 or cpuinclude=('torchscript', 'onnx'), # include formatshalf=False, # FP16 half-precision exportinplace=False, # set YOLOv5 Detect() inplace=Truetrain=False, # model.train() modeoptimize=False, # TorchScript: optimize for mobileint8=False, # CoreML/TF INT8 quantizationdynamic=False, # ONNX/TF: dynamic axessimplify=False, # ONNX: simplify modelopset=12, # ONNX: opset versionverbose=False, # TensorRT: verbose logworkspace=4, # TensorRT: workspace size (GB)nms=False, # TF: add NMS to modelagnostic_nms=False, # TF: add agnostic NMS to modeltopk_per_class=100, # TF.js NMS: topk per class to keeptopk_all=100, # TF.js NMS: topk for all classes to keepiou_thres=0.45, # TF.js NMS: IoU thresholdconf_thres=0.25 # TF.js NMS: confidence threshold):t = time.time()include = [x.lower() for x in include] # to lowercaseformats = tuple(export_formats()['Argument'][1:]) # --include argumentsflags = [x in include for x in formats]assert sum(flags) == len(include), f'ERROR: Invalid --include {include}, valid --include arguments are {formats}'jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs = flags # export booleansfile = Path(url2file(weights) if str(weights).startswith(('http:/', 'https:/')) else weights) # PyTorch weights# Load PyTorch modeldevice = select_device(device)assert not (device.type == 'cpu' and half), '--half only compatible with GPU export, i.e. use --device 0'model = attempt_load(weights, map_location=device, inplace=True, fuse=True) # load FP32 modelnc, names = model.nc, model.names # number of classes, class namesmodel.model[-1].export = True# Checksimgsz *= 2 if len(imgsz) == 1 else 1 # expandopset = 12 if ('openvino' in include) else opset # OpenVINO requires opset <= 12assert nc == len(names), f'Model class count {nc} != len(names) {len(names)}'# Inputgs = int(max(model.stride)) # grid size (max stride)imgsz = [check_img_size(x, gs) for x in imgsz] # verify img_size are gs-multiplesim = torch.zeros(batch_size, 3, *imgsz).to(device) # image size(1,3,320,192) BCHW iDetection# Update modelif half:im, model = im.half(), model.half() # to FP16model.train() if train else model.eval() # training mode = no Detect() layer grid constructionfor k, m in model.named_modules():if isinstance(m, Conv): # assign export-friendly activationsif isinstance(m.act, nn.SiLU):m.act = SiLU()elif isinstance(m, Detect):m.inplace = inplacem.onnx_dynamic = dynamicif hasattr(m, 'forward_export'):m.forward = m.forward_export # assign custom forward (optional)for _ in range(2):y = model(im) # dry runsshape = tuple(y[0].shape) # model output shapeLOGGER.info(f"\n{colorstr('PyTorch:')} starting from {file} with output shape {shape} ({file_size(file):.1f} MB)")# Exportsf = [''] * 10 # exported filenameswarnings.filterwarnings(action='ignore', category=torch.jit.TracerWarning) # suppress TracerWarningif jit:f[0] = export_torchscript(model, im, file, optimize)if engine: # TensorRT required before ONNXf[1] = export_engine(model, im, file, train, half, simplify, workspace, verbose)if onnx or xml: # OpenVINO requires ONNXf[2] = export_onnx(model, im, file, opset, train, dynamic, simplify)if xml: # OpenVINOf[3] = export_openvino(model, im, file)if coreml:_, f[4] = export_coreml(model, im, file)# TensorFlow Exportsif any((saved_model, pb, tflite, edgetpu, tfjs)):if int8 or edgetpu: # TFLite --int8 bug https://github.com/ultralytics/yolov5/issues/5707check_requirements(('flatbuffers==1.12',)) # required before `import tensorflow`assert not (tflite and tfjs), 'TFLite and TF.js models must be exported separately, please pass only one type.'model, f[5] = export_saved_model(model, im, file, dynamic, tf_nms=nms or agnostic_nms or tfjs,agnostic_nms=agnostic_nms or tfjs, topk_per_class=topk_per_class,topk_all=topk_all, conf_thres=conf_thres, iou_thres=iou_thres) # keras modelif pb or tfjs: # pb prerequisite to tfjsf[6] = export_pb(model, im, file)if tflite or edgetpu:f[7] = export_tflite(model, im, file, int8=int8 or edgetpu, data=data, ncalib=100)if edgetpu:f[8] = export_edgetpu(model, im, file)if tfjs:f[9] = export_tfjs(model, im, file)# Finishf = [str(x) for x in f if x] # filter out '' and Noneif any(f):LOGGER.info(f'\nExport complete ({time.time() - t:.2f}s)'f"\nResults saved to {colorstr('bold', file.parent.resolve())}"f"\nDetect: python detect.py --weights {f[-1]}"f"\nPyTorch Hub: model = torch.hub.load('ultralytics/yolov5', 'custom', '{f[-1]}')"f"\nValidate: python val.py --weights {f[-1]}"f"\nVisualize: https://netron.app")return f # return list of exported files/dirs
然后修改yolo.py中的forward函数,增加分支判断
def forward(self, x):if self.export:print("self.export===============",self.export)z = [] # inference outputfor i in range(self.nl):x[i] = self.m[i](x[i]) # convbs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()if not self.training: # inferenceif self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)y = x[i].sigmoid()if self.inplace:y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xyy[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh z.append(y.view(bs, -1, self.no)) else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xywh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # whanchor, conf, prob = torch.split(y, [4, 1, self.nc], dim=4)# add a idx (label ids before prob)# oriidxs = torch.argmax(prob, dim=-1).unsqueeze(axis=-1).type(x[i].dtype)# new#idxs = torch.max(prob, dim=-1)[1].data.unsqueeze(axis=-1).type(x[i].dtype)y = torch.cat((xy, wh, conf, idxs, prob), -1)z.append(y.view(bs, -1, self.no + 1))return x if self.training else (torch.cat(z, 1))else:print("self.export===============",self.export)z = [] # inference outputfor i in range(self.nl):x[i] = self.m[i](x[i]) # convbs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85)x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()if not self.training: # inferenceif self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)y = x[i].sigmoid()if self.inplace:y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xyy[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # whelse: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xywh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # why = torch.cat((xy, wh, y[..., 4:]), -1)z.append(y.view(bs, -1, self.no))return x if self.training else (torch.cat(z, 1), x)
这样就可以实现train和export分别跑不同的代码了。