YOLO格式数据集转COCO格式

网上找了很久的YOLO格式数据集转COCO格式的代码，但是没有一个成功的，费尽千辛万苦终于找到一个能用的，因此记录一下。

一、首先YOLO格式数据集文件布局

其中lmages和labels 中的内容布局如下，只不过一个存放图片，一个存放标签

二、COCO数据集布局

下面的代码生产生成的是下图的第一个文件夹，存放标注文件json，其他三个文件夹都是图片

三、转换代码

import json
import os
import shutilimport cv2# info，license暂时用不到
info = {"year": 2024"version": '1.0',"date_created": 2024 - 03 - 29
}licenses = {"id": 1,"name": "yiquan","url": "null",
}#自己的标签类别，跟yolo格式的数据集类别要对应好；
categories = [{"id": 0,"name": 'Eating',"supercategory": 'lines',},{"id": 1,"name": 'Raising_a_hand',"supercategory": 'lines',},
{"id": 2,"name": 'Reading',"supercategory": 'lines',},{"id": 3,"name": 'Sleeping_At_a_desk',"supercategory": 'lines',},{"id": 4,"name": 'Writing',"supercategory": 'lines',}]#初始化train,test、val数据字典
# info licenses categories 在train和test,val里面都是一致的；
train_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []}
test_data = {'info': info, 'licenses': licenses, 'categories': categories, 'images': [], 'annotations': []}# image_path 对应yolo的图像路径，比如images/train；
# label_path 对应yolo的label路径，比如labels/train 跟images要对应；
def v5_covert_coco_format(image_path, label_path):images = []annotations = []for index, img_file in enumerate(os.listdir(image_path)):if img_file.endswith('.jpg'):image_info = {}img = cv2.imread(os.path.join(image_path, img_file))height, width, channel = img.shapeimage_info['id'] = indeximage_info['file_name'] = img_fileimage_info['width'], image_info['height'] = width, heightelse:continueif image_info != {}:images.append(image_info)# 处理label信息-------label_file = os.path.join(label_path, img_file.replace('.jpg', '.txt'))with open(label_file, 'r') as f:for idx, line in enumerate(f.readlines()):info_annotation = {}class_num, xs, ys, ws, hs = line.strip().split(' ')class_id, xc, yc, w, h = int(class_num), float(xs), float(ys), float(ws), float(hs)xmin = (xc - w / 2) * widthymin = (yc - h / 2) * heightxmax = (xc + w / 2) * widthymax = (yc + h / 2) * heightbbox_w = int(width * w)bbox_h = int(height * h)img_copy = img[int(ymin):int(ymax),int(xmin):int(xmax)].copy()info_annotation["category_id"] = class_id  # 类别的idinfo_annotation['bbox'] = [xmin, ymin, bbox_w, bbox_h]  ## bbox的坐标info_annotation['area'] = bbox_h * bbox_w ###areainfo_annotation['image_id'] = index # bbox的idinfo_annotation['id'] = index * 100 + idx  # bbox的id# cv2.imwrite(f"./temp/{info_annotation['id']}.jpg", img_copy)info_annotation['segmentation'] = [[xmin, ymin, xmax, ymin, xmax, ymax, xmin, ymax]]  # 四个点的坐标info_annotation['iscrowd'] = 0  # 单例annotations.append(info_annotation)return images, annotations# key == train，test，val
# 对应要生成的json文件，比如instances_train.json，instances_test.json，instances_val.jsondef gen_json_file(yolov5_data_path, coco_format_path, key):# json pathjson_path = os.path.join(coco_format_path, f'annotations/instances_{key}.json')dst_path = os.path.join(coco_format_path, f'{key}')if not os.path.exists(os.path.dirname(json_path)):os.makedirs(os.path.dirname(json_path), exist_ok=True)data_path = os.path.join(yolov5_data_path, f'images/{key}')label_path = os.path.join(yolov5_data_path, f'labels/{key}')images, anns = v5_covert_coco_format(data_path, label_path)if key == 'train':train_data['images'] = imagestrain_data['annotations'] = annswith open(json_path, 'w') as f:json.dump(train_data, f, indent=2)# shutil.copy(data_path,'')elif key == 'val':test_data['images'] = imagestest_data['annotations'] = annswith open(json_path, 'w') as f:json.dump(test_data, f, indent=2)else:print(f'key is {key}')print(f'generate {key} json success!')returnif __name__ == '__main__':yolov5_data_path = 'D:/deep_learn/yolov8_20230701/ClassroomBehavior'  #保存YOLO数据集的目录coco_format_path = 'D:/deep_learn/yolov8_20230701/COCO'               #转换后生成的COCO数据集   json文件gen_json_file(yolov5_data_path, coco_format_path,key='train')gen_json_file(yolov5_data_path, coco_format_path,key='val')gen_json_file(yolov5_data_path, coco_format_path, key='test')

运行后生成，至此任务完成。如果帮到你，麻烦帮忙点点赞