发现 cfg/default.yaml 参数 mask_ratio 等于4 直接训练如下边缘分割标签,推理时mask 稀疏,训练时分数偏低,mask_ratio 改为1训练时打印的mask 的 P指标一直为0,将imgsz=原图size 训练分数也不高
标注用的是labelme多边形
阅读源码发现可能是因为mask缩放导致
且出现上边缘mask被box过度剪裁的情况
修改了源码中的两处,还是保持mask_ratio等于4,重新训练,推理如下,虽然mask粗糙但几乎不产生断裂
修改如下:
1.ultralytics/data/utils.py
从
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):mask = np.zeros(imgsz, dtype=np.uint8)polygons = np.asarray(polygons, dtype=np.int32)polygons = polygons.reshape((polygons.shape[0], -1, 2))cv2.fillPoly(mask, polygons, color=color)nh, nw = (imgsz[0] // downsample_ratio, imgsz[1] // downsample_ratio)# Note: fillPoly first then resize is trying to keep the same loss calculation method when mask-ratio=1return cv2.resize(mask, (nw, nh))
到
def polygon2mask(imgsz, polygons, color=1, downsample_ratio=1):mask = np.zeros((imgsz[0]// downsample_ratio,imgsz[1]// downsample_ratio), dtype=np.uint8)polygons=[[j*0.25 for j in i] for i in polygons]polygons = np.asarray(polygons, dtype=np.int32)polygons = polygons.reshape((polygons.shape[0], -1, 2))cv2.fillPoly(mask, polygons, color=color)return mask
2.ultralytics/models/yolo/segment/predict.py
类SegmentationPredictor postprocess方法 外扩mask 1个像素
def postprocess(self, preds, img, orig_imgs):"""Applies non-max suppression and processes detections for each image in an input batch."""p = ops.non_max_suppression(preds[0],self.args.conf,self.args.iou,agnostic=self.args.agnostic_nms,max_det=self.args.max_det,nc=len(self.model.names),classes=self.args.classes,)if not isinstance(orig_imgs, list): # input images are a torch.Tensor, not a listorig_imgs = ops.convert_torch2numpy_batch(orig_imgs)results = []proto = preds[1][-1] if len(preds[1]) == 3 else preds[1] # second output is len 3 if pt, but only 1 if exportedfor i, pred in enumerate(p):orig_img = orig_imgs[i]img_path = self.batch[0][i]if not len(pred): # save empty boxesmasks = Noneelif self.args.retina_masks:pred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)masks = ops.process_mask_native(proto[i], pred[:, 6:], pred[:, :4], orig_img.shape[:2]) # HWCelse:#外扩盒子c, mh, mw = proto[i].shape # CHWih, iw = img.shape[2:]pred[:, :4][:, 0] -= iw / mw*1pred[:, :4][:, 1] -= ih / mh*1pred[:, :4][:, 2] += iw / mw*1pred[:, :4][:, 3] += ih / mh*1masks = ops.process_mask(proto[i], pred[:, 6:], pred[:, :4], img.shape[2:], upsample=True) # HWCpred[:, :4] = ops.scale_boxes(img.shape[2:], pred[:, :4], orig_img.shape)results.append(Results(orig_img, path=img_path, names=self.model.names, boxes=pred[:, :6], masks=masks))return results