1. LetterBox增强
当输入图片的尺寸和模型实际接收的尺寸可能不一致时,通常需要使用LetterBox增强技术。具体步骤是先将图片按比例缩放,将较长的边缩放到设定的尺寸以后,再将较短的边进行填充,最终短边的长度为stride的倍数即可。这种方法可以保留原始图像的纵横比,同时还可以使图像更加适合目标检测算法的输入。
在YOLOv8代码中,ultralytics/data/augment.py
中class LetterBox
类别实现了该功能。
import cv2
import numpy as npclass LetterBox:"""Resize image and padding for detection, instance segmentation, pose."""def __init__(self, new_shape=(640, 640), auto=False, scaleFill=False, scaleup=True, center=True, stride=32):"""Initialize LetterBox object with specific parameters."""self.new_shape = new_shapeself.auto = autoself.scaleFill = scaleFillself.scaleup = scaleupself.stride = strideself.center = center # Put the image in the middle or top-leftdef __call__(self, labels=None, image=None):"""Return updated labels and image with added border."""if labels is None:labels = {}img = labels.get("img") if image is None else imageshape = img.shape[:2] # current shape [height, width]new_shape = labels.pop("rect_shape", self.new_shape)if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# Scale ratio (new / old)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])if not self.scaleup: # only scale down, do not scale up (for better val mAP)r = min(r, 1.0)# Compute paddingratio = r, r # width, height ratiosnew_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh paddingif self.auto: # minimum rectangledw, dh = np.mod(dw, self.stride), np.mod(dh, self.stride) # wh paddingelif self.scaleFill: # stretchdw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratiosif self.center:dw /= 2 # divide padding into 2 sidesdh /= 2if shape[::-1] != new_unpad: # resizeimg = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)top, bottom = int(round(dh - 0.1)) if self.center else 0, int(round(dh + 0.1))left, right = int(round(dw - 0.1)) if self.center else 0, int(round(dw + 0.1))img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=(114, 114, 114)) # add borderif labels.get("ratio_pad"):labels["ratio_pad"] = (labels["ratio_pad"], (left, top)) # for evaluationif len(labels):labels = self._update_labels(labels, ratio, dw, dh)labels["img"] = imglabels["resized_shape"] = new_shapereturn labelselse:return imgnew_shape=(640, 640)
aug = LetterBox(new_shape,center=True)labels=None
img = cv2.imread("./2.png",-1)lettered_img = aug(labels,img)cv2.imshow('v8 letter_box',lettered_img)
cv2.waitKey(0)
center=True
center=False