目标检测矩形框与polygon数据增加--裁剪，拓展，旋转

1.裁剪

import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import random
class RandomSampleCrop(object):"""CropArguments:img (Image): the image being input during trainingboxes (Tensor): the original bounding boxes in pt formlabels (Tensor): the class labels for each bboxmode (float tuple): the min and max jaccard overlapsReturn:(img, boxes, classes)img (Image): the cropped imageboxes (Tensor): the adjusted bounding boxes in pt formlabels (Tensor): the class labels for each bbox"""def __init__(self):self.sample_options = (# using entire original input imageNone,# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9(0.1, None),(0.3, None),(0.7, None),(0.9, None),# randomly sample a patch(None, None),)def __call__(self, image, boxes=None, labels=None):height, width, _ = image.shapewhile True:# randomly choose a modemode = random.choice(self.sample_options)if mode is None:return image, boxes, labelsmin_iou, max_iou = modeif min_iou is None:min_iou = float('-inf')if max_iou is None:max_iou = float('inf')# max trails (50)for _ in range(50):current_image = imagew = random.uniform(0.3 * width, width)h = random.uniform(0.3 * height, height)# aspect ratio constraint b/t .5 & 2if h / w < 0.5 or h / w > 2:continueleft = random.uniform(width - w)top = random.uniform(height - h)# convert to integer rect x1,y1,x2,y2rect = np.array([int(left), int(top), int(left + w), int(top + h)])# calculate IoU (jaccard overlap) b/t the cropped and gt boxesoverlap = jaccard_numpy(boxes, rect)# is min and max overlap constraint satisfied? if not try againif overlap.min() < min_iou and max_iou < overlap.max():continue# cut the crop from the imagecurrent_image = current_image[rect[1]:rect[3], rect[0]:rect[2],:]# keep overlap with gt box IF center in sampled patchcenters = (boxes[:, :2] + boxes[:, 2:]) / 2.0# mask in all gt boxes that above and to the left of centersm1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])# mask in all gt boxes that under and to the right of centersm2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])# mask in that both m1 and m2 are truemask = m1 * m2# have any valid boxes? try again if notif not mask.any():continue# take only matching gt boxescurrent_boxes = boxes[mask, :].copy()# take only matching gt labelscurrent_labels = labels[mask]# should we use the box left and top corner or the crop'scurrent_boxes[:, :2] = np.maximum(current_boxes[:, :2],rect[:2])# adjust to crop (by substracting crop's left,top)current_boxes[:, :2] -= rect[:2]current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],rect[2:])# adjust to crop (by substracting crop's left,top)current_boxes[:, 2:] -= rect[:2]return current_image, current_boxes, current_labelsdef debug_random_crop():random_crop = RandomSampleCrop()import cv2path = './test.jpg'img = cv2.imread(path)print(img.shape)boxes = np.array([[68, 62, 311, 523],[276, 235, 498, 535],[480, 160, 701, 510]])labels = np.array([[1],[1],[1]])current_image, current_boxes, current_labels = random_crop(img, boxes, labels)print('==current_image.shape:', current_image.shape)print('==current_boxes:', current_boxes)print('==current_labels:', current_labels)for box in current_boxes:x1,y1,x2,y2 = boxcv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':debug_random_crop()

变为

2.拓展

def expand(image, boxes, filler):"""Perform a zooming out operation by placing the image in a larger canvas of filler material.Helps to learn to detect smaller objects.:param image: image, a tensor of dimensions (3, original_h, original_w):param boxes: bounding boxes in boundary coordinates, a tensor of dimensions (n_objects, 4):param filler: RBG values of the filler material, a list like [R, G, B]:return: expanded image, updated bounding box coordinates"""# Calculate dimensions of proposed expanded (zoomed-out) imageoriginal_h = image.size(1)original_w = image.size(2)max_scale = 4scale = random.uniform(1, max_scale)new_h = int(scale * original_h)new_w = int(scale * original_w)# Create such an image with the fillerfiller = torch.FloatTensor(filler)  # (3)new_image = torch.ones((3, new_h, new_w), dtype=torch.float) * filler.unsqueeze(1).unsqueeze(1)  # (3, new_h, new_w)# Note - do not use expand() like new_image = filler.unsqueeze(1).unsqueeze(1).expand(3, new_h, new_w)# because all expanded values will share the same memory, so changing one pixel will change all# Place the original image at random coordinates in this new image (origin at top-left of image)left = random.randint(0, new_w - original_w)right = left + original_wtop = random.randint(0, new_h - original_h)bottom = top + original_hnew_image[:, top:bottom, left:right] = imageprint('==boxes:', boxes)# Adjust bounding boxes' coordinates accordinglynew_boxes = boxes + torch.FloatTensor([left, top, left, top]).unsqueeze(0)  # (n_objects, 4), n_objects is the no. of objects in this imageprint('===new_boxes:', new_boxes)return new_image, new_boxesdef torch_cutout():info = {"boxes": [[52, 86, 470, 419],[157, 43, 288, 166]],"labels": [13, 15], "difficulties": [0, 0]}image = Image.open('./2008_000008.jpg', mode='r')image = image.convert('RGB')bboxs = info['boxes']lables = info['labels']difficulties = info['difficulties']img = np.array(image)[..., ::-1].copy()for box in bboxs:x1, y1, x2, y2 = boxprint('x1, y1, x2, y2:', x1, y1, x2, y2)cv2.rectangle(img, (x1,y1), (x2,y2),color=(0,0,255),thickness=2)cv2.imwrite('./img_rect.jpg', img)mean = [0.485, 0.456, 0.406]std = [0.229, 0.224, 0.225]new_image = FT.to_tensor(image)boxes = torch.FloatTensor(bboxs)labels = torch.LongTensor(lables)  # (n_objects)difficulties = torch.ByteTensor(difficulties)  # (n_objects)# new_image, new_boxes, new_labels, new_difficulties = random_crop(new_image, boxes, labels, difficulties)# print('new_image, new_boxes, new_labels, new_difficulties', new_image.shape, new_boxes, new_labels, new_difficulties)new_image, new_boxes = expand(new_image, boxes, filler=mean)fin_img = new_image.permute(1, 2, 0).numpy()*255.fin_img = fin_img[..., ::-1].copy()print('fin_img.shape:', fin_img.shape)fin_boxes = new_boxes.numpy()print(fin_boxes)for box in fin_boxes:x1, y1, x2, y2 = boxprint('x1, y1, x2, y2:', x1, y1, x2, y2)cv2.rectangle(fin_img, (x1, y1), (x2, y2), color=(0, 0, 255), thickness=2)cv2.imwrite('./fin_img_rect.jpg', fin_img)
if __name__ == '__main__':torch_cutout()

变为

检测的整个transform包括随机裁剪，扩张，resize等等。

import torch
from torchvision import transforms
import cv2
import numpy as np
import types
from numpy import randomdef intersect(box_a, box_b):max_xy = np.minimum(box_a[:, 2:], box_b[2:])min_xy = np.maximum(box_a[:, :2], box_b[:2])inter = np.clip((max_xy - min_xy), a_min=0, a_max=np.inf)return inter[:, 0] * inter[:, 1]def jaccard_numpy(box_a, box_b):"""Compute the jaccard overlap of two sets of boxes.  The jaccard overlapis simply the intersection over union of two boxes.E.g.:A ∩ B / A ∪ B = A ∩ B / (area(A) + area(B) - A ∩ B)Args:box_a: Multiple bounding boxes, Shape: [num_boxes,4]box_b: Single bounding box, Shape: [4]Return:jaccard overlap: Shape: [box_a.shape[0], box_a.shape[1]]"""inter = intersect(box_a, box_b)area_a = ((box_a[:, 2] - box_a[:, 0]) *(box_a[:, 3] - box_a[:, 1]))  # [A,B]area_b = ((box_b[2] - box_b[0]) *(box_b[3] - box_b[1]))  # [A,B]union = area_a + area_b - interreturn inter / union  # [A,B]class Compose(object):"""Composes several augmentations together.Args:transforms (List[Transform]): list of transforms to compose.Example:>>> augmentations.Compose([>>>     transforms.CenterCrop(10),>>>     transforms.ToTensor(),>>> ])"""def __init__(self, transforms):self.transforms = transformsdef __call__(self, img, boxes=None, labels=None):for t in self.transforms:img, boxes, labels = t(img, boxes, labels)return img, boxes, labelsclass Lambda(object):"""Applies a lambda as a transform."""def __init__(self, lambd):assert isinstance(lambd, types.LambdaType)self.lambd = lambddef __call__(self, img, boxes=None, labels=None):return self.lambd(img, boxes, labels)class ConvertFromInts(object):def __call__(self, image, boxes=None, labels=None):return image.astype(np.float32), boxes, labelsclass Normalize(object):def __init__(self, mean=None, std=None):self.mean = np.array(mean, dtype=np.float32)self.std = np.array(std, dtype=np.float32)def __call__(self, image, boxes=None, labels=None):image = image.astype(np.float32)image /= 255.image -= self.meanimage /= self.stdreturn image, boxes, labelsclass ToAbsoluteCoords(object):def __call__(self, image, boxes=None, labels=None):height, width, channels = image.shapeboxes[:, 0] *= widthboxes[:, 2] *= widthboxes[:, 1] *= heightboxes[:, 3] *= heightreturn image, boxes, labelsclass ToPercentCoords(object):def __call__(self, image, boxes=None, labels=None):height, width, channels = image.shapeboxes[:, 0] /= widthboxes[:, 2] /= widthboxes[:, 1] /= heightboxes[:, 3] /= heightreturn image, boxes, labelsclass Resize(object):def __init__(self, size=300):self.size = sizedef __call__(self, image, boxes=None, labels=None):image = cv2.resize(image, (self.size,self.size))return image, boxes, labelsclass RandomSaturation(object):def __init__(self, lower=0.5, upper=1.5):self.lower = lowerself.upper = upperassert self.upper >= self.lower, "contrast upper must be >= lower."assert self.lower >= 0, "contrast lower must be non-negative."def __call__(self, image, boxes=None, labels=None):if random.randint(2):image[:, :, 1] *= random.uniform(self.lower, self.upper)return image, boxes, labelsclass RandomHue(object):def __init__(self, delta=18.0):assert delta >= 0.0 and delta <= 360.0self.delta = deltadef __call__(self, image, boxes=None, labels=None):if random.randint(2):image[:, :, 0] += random.uniform(-self.delta, self.delta)image[:, :, 0][image[:, :, 0] > 360.0] -= 360.0image[:, :, 0][image[:, :, 0] < 0.0] += 360.0return image, boxes, labelsclass RandomLightingNoise(object):def __init__(self):self.perms = ((0, 1, 2), (0, 2, 1),(1, 0, 2), (1, 2, 0),(2, 0, 1), (2, 1, 0))def __call__(self, image, boxes=None, labels=None):if random.randint(2):swap = self.perms[random.randint(len(self.perms))]shuffle = SwapChannels(swap)  # shuffle channelsimage = shuffle(image)return image, boxes, labelsclass ConvertColor(object):def __init__(self, current='BGR', transform='HSV'):self.transform = transformself.current = currentdef __call__(self, image, boxes=None, labels=None):if self.current == 'BGR' and self.transform == 'HSV':image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)elif self.current == 'HSV' and self.transform == 'BGR':image = cv2.cvtColor(image, cv2.COLOR_HSV2BGR)else:raise NotImplementedErrorreturn image, boxes, labelsclass RandomContrast(object):def __init__(self, lower=0.5, upper=1.5):self.lower = lowerself.upper = upperassert self.upper >= self.lower, "contrast upper must be >= lower."assert self.lower >= 0, "contrast lower must be non-negative."# expects float imagedef __call__(self, image, boxes=None, labels=None):if random.randint(2):alpha = random.uniform(self.lower, self.upper)image *= alphareturn image, boxes, labelsclass RandomBrightness(object):def __init__(self, delta=32):assert delta >= 0.0assert delta <= 255.0self.delta = deltadef __call__(self, image, boxes=None, labels=None):if random.randint(2):delta = random.uniform(-self.delta, self.delta)image += deltareturn image, boxes, labelsclass ToCV2Image(object):def __call__(self, tensor, boxes=None, labels=None):return tensor.cpu().numpy().astype(np.float32).transpose((1, 2, 0)), boxes, labelsclass ToTensor(object):def __call__(self, cvimage, boxes=None, labels=None):return torch.from_numpy(cvimage.astype(np.float32)).permute(2, 0, 1), boxes, labelsclass RandomSampleCrop(object):"""CropArguments:img (Image): the image being input during trainingboxes (Tensor): the original bounding boxes in pt formlabels (Tensor): the class labels for each bboxmode (float tuple): the min and max jaccard overlapsReturn:(img, boxes, classes)img (Image): the cropped imageboxes (Tensor): the adjusted bounding boxes in pt formlabels (Tensor): the class labels for each bbox"""def __init__(self):self.sample_options = (# using entire original input imageNone,# sample a patch s.t. MIN jaccard w/ obj in .1,.3,.4,.7,.9(0.1, None),(0.3, None),(0.7, None),(0.9, None),# randomly sample a patch(None, None),)def __call__(self, image, boxes=None, labels=None):height, width, _ = image.shapewhile True:# randomly choose a modemode = random.choice(self.sample_options)if mode is None:return image, boxes, labelsmin_iou, max_iou = modeif min_iou is None:min_iou = float('-inf')if max_iou is None:max_iou = float('inf')# max trails (50)for _ in range(50):current_image = imagew = random.uniform(0.3 * width, width)h = random.uniform(0.3 * height, height)# aspect ratio constraint b/t .5 & 2if h / w < 0.5 or h / w > 2:continueleft = random.uniform(width - w)top = random.uniform(height - h)# convert to integer rect x1,y1,x2,y2rect = np.array([int(left), int(top), int(left + w), int(top + h)])# calculate IoU (jaccard overlap) b/t the cropped and gt boxesoverlap = jaccard_numpy(boxes, rect)# is min and max overlap constraint satisfied? if not try againif overlap.min() < min_iou and max_iou < overlap.max():continue# cut the crop from the imagecurrent_image = current_image[rect[1]:rect[3], rect[0]:rect[2],:]# keep overlap with gt box IF center in sampled patchcenters = (boxes[:, :2] + boxes[:, 2:]) / 2.0# mask in all gt boxes that above and to the left of centersm1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])# mask in all gt boxes that under and to the right of centersm2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])# mask in that both m1 and m2 are truemask = m1 * m2# have any valid boxes? try again if notif not mask.any():continue# take only matching gt boxescurrent_boxes = boxes[mask, :].copy()# take only matching gt labelscurrent_labels = labels[mask]# should we use the box left and top corner or the crop'scurrent_boxes[:, :2] = np.maximum(current_boxes[:, :2],rect[:2])# adjust to crop (by substracting crop's left,top)current_boxes[:, :2] -= rect[:2]current_boxes[:, 2:] = np.minimum(current_boxes[:, 2:],rect[2:])# adjust to crop (by substracting crop's left,top)current_boxes[:, 2:] -= rect[:2]return current_image, current_boxes, current_labelsclass Expand(object):def __init__(self, mean):self.mean = meandef __call__(self, image, boxes, labels):if random.randint(2):return image, boxes, labelsheight, width, depth = image.shaperatio = random.uniform(1, 4)left = random.uniform(0, width * ratio - width)top = random.uniform(0, height * ratio - height)expand_image = np.zeros((int(height * ratio), int(width * ratio), depth),dtype=image.dtype)expand_image[:, :, :] = self.meanexpand_image[int(top):int(top + height),int(left):int(left + width)] = imageimage = expand_imageboxes = boxes.copy()boxes[:, :2] += (int(left), int(top))boxes[:, 2:] += (int(left), int(top))return image, boxes, labelsclass RandomMirror(object):def __call__(self, image, boxes, classes):_, width, _ = image.shapeif random.randint(2):image = image[:, ::-1]boxes = boxes.copy()boxes[:, 0::2] = width - boxes[:, 2::-2]return image, boxes, classesclass SwapChannels(object):"""Transforms a tensorized image by swapping the channels in the orderspecified in the swap tuple.Args:swaps (int triple): final order of channelseg: (2, 1, 0)"""def __init__(self, swaps):self.swaps = swapsdef __call__(self, image):"""Args:image (Tensor): image tensor to be transformedReturn:a tensor with channels swapped according to swap"""# if torch.is_tensor(image):#     image = image.data.cpu().numpy()# else:#     image = np.array(image)image = image[:, :, self.swaps]return imageclass PhotometricDistort(object):def __init__(self):self.pd = [RandomContrast(),ConvertColor(transform='HSV'),RandomSaturation(),RandomHue(),ConvertColor(current='HSV', transform='BGR'),RandomContrast()]self.rand_brightness = RandomBrightness()# self.rand_light_noise = RandomLightingNoise()def __call__(self, image, boxes, labels):im = image.copy()im, boxes, labels = self.rand_brightness(im, boxes, labels)if random.randint(2):distort = Compose(self.pd[:-1])else:distort = Compose(self.pd[1:])im, boxes, labels = distort(im, boxes, labels)return im, boxes, labels# return self.rand_light_noise(im, boxes, labels)class SSDAugmentation(object):def __init__(self, size=300, mean=(0.406, 0.456, 0.485), std=(0.225, 0.224, 0.229)):self.mean = meanself.size = sizeself.std = stdself.augment = Compose([ConvertFromInts(),ToAbsoluteCoords(),PhotometricDistort(),Expand(self.mean),RandomSampleCrop(),RandomMirror(),ToPercentCoords(),Resize(self.size),Normalize(self.mean, self.std)])def __call__(self, img, boxes, labels):return self.augment(img, boxes, labels)def debug_random_crop():random_crop = RandomSampleCrop()import cv2path = './test.jpg'img = cv2.imread(path)print(img.shape)boxes = np.array([[68, 62, 311, 523],[276, 235, 498, 535],[480, 160, 701, 510]])labels = np.array([[1],[1],[1]])current_image, current_boxes, current_labels = random_crop(img, boxes, labels)print('==current_image.shape:', current_image.shape)print('==current_boxes:', current_boxes)print('==current_labels:', current_labels)for box in current_boxes:x1,y1,x2,y2 = boxcv2.rectangle(current_image,(x1,y1),(x2,y2),color=(0,0,255),thickness=2)cv2.imwrite('./draw_current_image.jpg', current_image)
if __name__ == '__main__':debug_random_crop()

3.旋转

imgaug文档说明


import os
import cv2
import numpy as np
import json
import imgaug as ia
from imgaug import augmenters as iaadef may_augment_poly(aug, img_shape, poly):# for p in poly:#     print('==p', p)keypoints = [ia.Keypoint(p[0], p[1]) for p in poly]keypoints = aug.augment_keypoints([ia.KeypointsOnImage(keypoints, shape=img_shape)])[0].keypointspoly = [(p.x, p.y) for p in keypoints]return polydef get_express_code_txt():path = './标好快递单二维码数据'# output_path = './标好快递单二维码数据_out'# if not os.path.exists(output_path):#     os.mkdir(output_path)imgs_list_path =[os.path.join(path, i) for i in os.listdir(path) if '.jpg' in i]for i, img_list_path in enumerate(imgs_list_path):if i < 1:print('==img_list_path:', img_list_path)img = cv2.imread(img_list_path)json_list_path = img_list_path.replace('.jpg', '.json')with open(json_list_path, 'r') as file:json_info = json.load(file)shapes = json_info['shapes']output_points = []for shape in shapes:points = np.array(shape['points']).astype(np.int)# print('===before points', points)points = cal_stand_points(points)points = polygon_area1(points)# print('===after points', points)# cv2.polylines(img, [np.array(points).reshape(-1, 1, 2)], True, (0, 255, 0), thickness=2)output_points.append(list(map(int, (points.reshape(-1).tolist()))))print('==output_points:', output_points)seq = iaa.Sequential([# iaa.Multiply((1.2, 1.5)),  # change brightness, doesn't affect keypointsiaa.Fliplr(0.5),iaa.Affine(rotate=(0, 360),#0~360随机旋转scale=(0.7, 1.0),#通过增加黑边缩小图片),  # rotate by exactly 0~360deg and scale to 70-100%, affects keypoints# iaa.Resize(0.5, 3)])seq_def = seq.to_deterministic()image_aug = seq_def.augment_image(img)print('==image_aug.shape:', image_aug.shape)line_polys = []polys = np.array(output_points).reshape(-1, 4, 2).astype(np.int)print('==polys:', polys.shape)for poly in polys:new_poly = may_augment_poly(seq_def, img.shape, poly)line_polys.append(new_poly)print('=line_polys:', line_polys)#debugfor line_poly in line_polys:# print('==line_poly:', line_poly)cv2.polylines(image_aug, [np.array(line_poly).reshape(-1, 1, 2).astype(np.int)], True, (0, 0, 255), thickness=2)cv2.imwrite('./image_aug.jpg', image_aug)
if __name__ == '__main__':get_express_code_txt()

json文件：

{"version": "4.2.10","shapes": [{"shape_type": "polygon","group_id": null,"label": "code","points": [[207.6190476190476,689.2857142857143],[613.5714285714286,545.2380952380953],[654.047619047619,635.7142857142858],[254.04761904761904,777.3809523809524]],"flags": {}},{"shape_type": "polygon","group_id": null,"label": "code","points": [[500.4761904761905,883.3333333333334],[858.8095238095239,757.1428571428572],[881.4285714285716,796.4285714285714],[513.5714285714286,925.0]],"flags": {}},{"shape_type": "polygon","group_id": null,"label": "code","points": [[595.7142857142858,1059.5238095238096],[960.0,933.3333333333334],[981.4285714285716,973.8095238095239],[606.4285714285714,1101.1904761904761]],"flags": {}}],"lineColor": [0,255,0,128],"fillColor": [255,0,0,128],"imageHeight": 1422,"imageData": null,"imageWidth": 1152,"imagePath": "72.jpg","flags": {}
}