ChestX-Det-Dataset数据集网址:https://github.com/Deepwise-AILab/ChestX-Det-Dataset/tree/main
数据集JSON内容:
[{"file_name": "36199.png","syms": [],"boxes": [],"polygons": []},{"file_name": "36302.png","syms": ["Effusion"],"boxes": [[799,666,937,761]],"polygons": [[[799,678],[799,678],[799,680],[801,681],[805,684],[807,684],[809,685],[811,686],[813,686],[814,686],[817,687],[820,687],[824,690],[827,690],[830,691],[832,691],[833,691],[836,693],[837,693],[840,695],[844,696],[848,696],[851,697],[854,697],[855,697],[856,698],[861,699],[864,699],[870,701],[872,703],[875,704],[878,705],[881,705],[886,707],[890,709],[894,711],[896,713],[897,714],[899,714],[902,716],[903,717],[906,720],[908,721],[910,725],[912,726],[914,728],[916,731],[916,732],[917,733],[918,734],[921,738],[922,740],[924,741],[925,744],[927,745],[929,747],[930,751],[931,752],[934,753],[935,755],[935,756],[935,757],[936,758],[937,759],[937,761],[937,759],[937,757],[937,756],[937,752],[937,750],[937,747],[937,745],[937,744],[937,743],[937,741],[937,740],[937,739],[937,738],[937,737],[937,735],[937,733],[937,731],[937,729],[937,728],[937,726],[937,723],[937,720],[937,717],[937,716],[936,714],[935,710],[935,709],[935,708],[934,705],[934,704],[934,703],[934,702],[933,701],[933,698],[933,696],[931,695],[931,692],[931,691],[930,690],[930,686],[930,685],[929,681],[929,680],[929,679],[929,677],[928,674],[928,673],[927,672],[927,671],[925,671],[924,668],[924,666],[924,667],[924,669],[924,672],[924,674],[923,677],[923,678],[923,679],[923,680],[922,681],[921,683],[921,684],[920,685],[918,685],[918,686],[915,689],[912,690],[910,691],[909,692],[908,692],[906,692],[905,693],[904,693],[902,695],[900,695],[900,695],[899,695],[898,695],[896,695],[894,693],[891,693],[887,693],[886,693],[884,693],[881,692],[879,692],[876,691],[874,691],[870,690],[867,690],[866,690],[863,690],[861,689],[860,689],[857,689],[856,687],[854,687],[851,687],[848,686],[845,686],[842,686],[840,686],[839,685],[837,685],[834,684],[828,683],[825,683],[822,681],[819,680],[815,679],[814,679],[812,679],[811,679],[811,678],[808,677],[806,675],[803,675],[802,675],[801,675]]]},
转化后coco格式样本json:
使用的python代码如下:
import json
import os
import sys
import cv2
from tqdm import tqdm
import mathche_json = './chetrain.json'
dst_json = './chestrain_coco.json'
test_img = './train_data/train'# che_json = './chetest.json'
# dst_json = './chetest_coco.json'
# test_img = './test_data/test'def polygon_area(vertices):n = len(vertices)area = 0.0for i in range(n):x1, y1 = vertices[i]x2, y2 = vertices[(i + 1) % n]area += (x1 * y2 - x2 * y1)return abs(area) / 2.0def main():coco_data = {"info": {},"licenses": [],"categories": [],"images": [],"annotations": []}category_mapping = {}category_id = 1image_id = 1annotation_id = 1with open(che_json,'r',encoding='utf-8') as js:json_info = json.load(js)image = {}annotation = {}boxid = 0for jsfo in tqdm(json_info):image['file_name'] = jsfo['file_name']img =cv2.imread(test_img + '/' + jsfo['file_name']) image['height'] = img.shape[0]image['width'] = img.shape[1]image['id'] = image_idcoco_data["images"].append(image)image = {}category_name = jsfo['syms']for ii in range(len(category_name)):if category_name[ii] not in category_mapping:category_mapping[category_name[ii]] = category_idcoco_data["categories"].append({"supercategory": category_name[ii],"id": category_id,"name": category_name[ii]})category_id += 1box_cnt = len(jsfo['boxes'])for i in range(box_cnt):boxid = boxid + 1segpnts = []segtmp = jsfo['polygons'][i]for segt in segtmp:segpnts.append(segt[0])segpnts.append(segt[1])segarea = polygon_area(segtmp)annotation['segmentation'] = [segpnts]annotation['image_id'] = image_idannotation['area'] = segareaboxtmp = jsfo['boxes'][i]x_left,y_left,x_br,y_br = boxtmpbox_w,box_h = x_br-x_left, y_br-y_leftannotation['bbox'] = [x_left,y_left,box_w,box_h]annotation['category_id'] = category_mapping[category_name[i]]annotation['id'] = boxidcoco_data["annotations"].append(annotation)annotation = {}image_id = image_id + 1with open(dst_json,'w') as jsout:json.dump(coco_data, jsout)if __name__=='__main__':main()
先跑训练集或先跑验证集会生成category_mapping 的字典内容,生成后统一用同一个,就可以保证训练集和验证集的标签一致