项目背景
https://blog.csdn.net/x1131230123/article/details/140606459
似乎这个任务是简单的,利用目标检测是否可以完成得好呢?
生成数据集
利用这个代码产生数据集:
为了将标签转换为YOLOv5格式,需要将左上角和右下角的坐标转换为YOLO格式的中心点坐标和宽高。YOLOv5标签格式是这样的:
\text{{class}} , \text{{x_center}} , \text{{y_center}} , \text{{width}} , \text{{height}}
其中,(\text{{x_center}})、(\text{{y_center}})、(\text{{width}})和(\text{{height}})都是相对于图片宽度和高度的归一化值。以下是修改后的代码:
import os
import random
from PIL import Imagedef list_path_all_files(dirname):result = []for maindir, subdir, file_name_list in os.walk(dirname):for filename in file_name_list:if filename.lower().endswith('.jpg'):apath = os.path.join(maindir, filename)result.append(apath)return resultdef resize_image(image, target_size, resize_by='height'):w, h = image.sizeif resize_by == 'height':if h != target_size:ratio = target_size / hnew_width = int(w * ratio)image = image.resize((new_width, target_size), Image.ANTIALIAS)elif resize_by == 'width':if w != target_size:ratio = target_size / wnew_height = int(h * ratio)image = image.resize((target_size, new_height), Image.ANTIALIAS)return imagedef create_2x2_image(images):target_size = (640, 640)new_image = Image.new('RGB', (1280, 1280))coords = []for i, img in enumerate(images):img = img.resize(target_size, Image.ANTIALIAS)if i == 0:new_image.paste(img, (0, 0))coords.append((0, 0, 640, 640))elif i == 1:new_image.paste(img, (640, 0))coords.append((640, 0, 1280, 640))elif i == 2:new_image.paste(img, (0, 640))coords.append((0, 640, 640, 1280))elif i == 3:new_image.paste(img, (640, 640))coords.append((640, 640, 1280, 1280))return new_image, coordsdef concatenate_images(image_list, mode='horizontal', target_size=768):if mode == 'horizontal':resized_images = [resize_image(image, target_size, 'height') for image in image_list]total_width = sum(image.size[0] for image in resized_images)max_height = target_sizenew_image = Image.new('RGB', (total_width, max_height))x_offset = 0coords = []for image in resized_images:new_image.paste(image, (x_offset, 0))coords.append((x_offset, 0, x_offset + image.size[0], max_height))x_offset += image.size[0]elif mode == 'vertical':resized_images = [resize_image(image, target_size, 'width') for image in image_list]total_height = sum(image.size[1] for image in resized_images)max_width = target_sizenew_image = Image.new('RGB', (max_width, total_height))y_offset = 0coords = []for image in resized_images:new_image.paste(image, (0, y_offset))coords.append((0, y_offset, max_width, y_offset + image.size[1]))y_offset += image.size[1]return new_image, coordsdef generate_labels(coords, image_size):labels = []width, height = image_sizefor coord in coords:x_min, y_min, x_max, y_max = coordx_center = (x_min + x_max) / 2.0 / widthy_center = (y_min + y_max) / 2.0 / heightbox_width = (x_max - x_min) / widthbox_height = (y_max - y_min) / heightlabels.append(f"0 {x_center} {y_center} {box_width} {box_height}")return labelsdef generate_dataset(image_folder, output_folder, label_folder, num_images):image_paths = list_path_all_files(image_folder)if not os.path.exists(output_folder):os.makedirs(output_folder)if not os.path.exists(label_folder):os.makedirs(label_folder)for i in range(num_images):random_choice = random.randint(1, 6)if random_choice == 1:selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]new_image, coords = concatenate_images(selected_images, mode='horizontal')elif random_choice == 2:selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]new_image, coords = concatenate_images(selected_images, mode='horizontal')elif random_choice == 3:selected_images = [Image.open(random.choice(image_paths)) for _ in range(2)]new_image, coords = concatenate_images(selected_images, mode='vertical')elif random_choice == 4:selected_images = [Image.open(random.choice(image_paths)) for _ in range(3)]new_image, coords = concatenate_images(selected_images, mode='vertical')elif random_choice == 5:selected_images = [Image.open(random.choice(image_paths)) for _ in range(4)]new_image, coords = create_2x2_image(selected_images)elif random_choice == 6:# Single image caseselected_images = [Image.open(random.choice(image_paths))]new_image = selected_images[0]coords = [(0, 0, new_image.size[0], new_image.size[1])]output_image_path = os.path.join(output_folder, f'composite_image_{i + 1:06d}.jpg')new_image.save(output_image_path, 'JPEG')label_path = os.path.join(label_folder, f'composite_image_{i + 1:06d}.txt')labels = generate_labels(coords, new_image.size)with open(label_path, 'w') as label_file:for label in labels:label_file.write(label + '\n')# 示例用法
image_folder = '/ssd/xiedong/datasets/multilabelsTask/multilabels_new/'
output_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/images/'
label_folder = '/ssd/xiedong/yolov8detdir/composite_images/train/labels/'
num_images = 8000
generate_dataset(image_folder, output_folder, label_folder, num_images)
修改后的 generate_labels
函数将坐标转换为YOLOv5标签格式。生成的标签文件会包含每个图像中的标签,格式为 “0 x_center y_center width height”。
训练
data_det.yaml:
path: /ssd/xiedong/yolov8detdir/composite_images
train: train/images # train images (relative to 'path') 128 images
val: val/images # val images (relative to 'path') 128 images
test: # test images (optional)# Classes
names:0: paper
x03train_det.py:
from ultralytics import YOLO# Load a model
model = YOLO("yolov8m.pt") # load a pretrained model (recommended for training)project = "/ssd/xiedong/yolov8detdir/paperdet"
# Train the model with 2 GPUs
results = model.train(data="data_det.yaml", epochs=50, imgsz=640, device=[2, 3], batch=180, project=project)
启动容器:
docker run -it --gpus all --shm-size=8g -v /ssd/xiedong/yolov8detdir:/ssd/xiedong/yolov8detdir ultralytics/ultralytics:8.2.62 bash
启动训练:
cd /ssd/xiedong/yolov8detdir
python -m torch.distributed.run --nproc_per_node 2 x03train_det.py
推理
/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt
from ultralytics import YOLO# Load a model
model = YOLO("/ssd/xiedong/yolov8detdir/paperdet/train2/weights/best.pt") # pretrained YOLOv8n model# Run batched inference on a list of images
results = model(["composite_image_000006.jpg"]) # return a list of Results objects# Process results list
for result in results:boxes = result.boxes # Boxes object for bounding box outputsmasks = result.masks # Masks object for segmentation masks outputskeypoints = result.keypoints # Keypoints object for pose outputsprobs = result.probs # Probs object for classification outputsobb = result.obb # Oriented boxes object for OBB outputsresult.save(filename="result.jpg") # save to disk
推理效果是很好的: