含掩膜mask的单通道灰度图转化为COCO数据集格式标签的json文件（python）

输入：单通道的灰度图，灰度图内含掩膜mask
目标：把灰度图中的语义mask转换为COCO数据集格式的json文件
输出：COCO数据集格式的json文件

期间遇到的问题：
发现有的掩膜内部存在其他类别的掩膜，即mask内部还套了mask，这种情况的mask怎么只用一个数组来表示？

以下是查找的可用代码：

from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import os
from tqdm import tqdmdef create_sub_masks(mask_image):width, height = mask_image.size# Initialize a dictionary of sub-masks indexed by RGB colorssub_masks = {}for x in range(width):for y in range(height):# Get the RGB values of the pixelpixel = mask_image.getpixel((x, y))[:3]# If the pixel is not black...if pixel != (0, 0, 0):# Check to see if we've created a sub-mask...pixel_str = str(pixel)sub_mask = sub_masks.get(pixel_str)if sub_mask is None:# Create a sub-mask (one bit per pixel) and add to the dictionary# Note: we add 1 pixel of padding in each direction# because the contours module doesn't handle cases# where pixels bleed to the edge of the imagesub_masks[pixel_str] = Image.new('1', (width+2, height+2))# Set the pixel value to 1 (default is 0), accounting for paddingsub_masks[pixel_str].putpixel((x+1, y+1), 1)return sub_masksdef create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):# Find contours (boundary lines) around each sub-mask# Note: there could be multiple contours if the object# is partially occluded. (E.g. an elephant behind a tree)contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation='low')segmentations = []polygons = []for contour in contours:# Flip from (row, col) representation to (x, y)# and subtract the padding pixel# import ipdb;ipdb.set_trace()for i in range(len(contour)):row, col = contour[i]contour[i] = (col - 1, row - 1)# Make a polygon and simplify itpoly = Polygon(contour)poly = poly.simplify(1.0, preserve_topology=False)polygons.append(poly)segmentation = np.array(poly.exterior.coords)segmentation = np.maximum(segmentation, 0).ravel().tolist()# import ipdb;ipdb.set_trace()# print(segmentation)#if segmentation == []:#    continuesegmentations.append(segmentation)# Combine the polygons to calculate the bounding box and areamulti_poly = MultiPolygon(polygons)if multi_poly.bounds == ():return "skip"x, y, max_x, max_y = multi_poly.bounds# x = max(0, x)# y = max(0, y)width = max_x - xheight = max_y - ybbox = (x, y, width, height)area = multi_poly.areaannotation = {'segmentation': segmentations,'iscrowd': is_crowd,'image_id': image_id,'category_id': category_id,'id': annotation_id,'bbox': bbox,'area': area}return annotationdef get_name(root, mode_folder=True):for root, dirs, file in os.walk(root):if mode_folder:return sorted(dirs)else:return sorted(file)def get_annotation(mask_image_root):dataset = {"info": {"year": 2023, "version": "2023", "description": "", "url": "",},"license": {},"images": [],"annotations": [],"categories": []}class_index = {0: "background",1:'cate1',2:'cate2'}for s, k in enumerate(list(class_index.keys())):dataset["categories"].append({"id": k, "name": class_index[k], "supercategory": "xxx"})is_crowd = 0# These ids will be automatically increased as we goannotation_id = 0image_id = 0# Create the annotationsrrr = maskdirfor i, root in tqdm(enumerate(mask_image_root)):print(i)mask_image = Image.open(rrr + root).convert('RGB')print(root)weight, height = mask_image.size# file_name = "rgb_" + root.split("/")[-1].split("_")[-1]file_name = mask_image_root[i]print(file_name)dataset["images"].append({"file_name": file_name,"id": i,"width": weight,"height": height})# import ipdb;ipdb.set_trace()sub_masks = create_sub_masks(mask_image)# import ipdb;ipdb.set_trace()for color, sub_mask in sub_masks.items():category_id = 1annotation = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)if annotation == "skip":continuedataset["annotations"].append(annotation)annotation_id += 1image_id += 1with open("trainmask.json", "w") as f:json.dump(dataset, f)# rrr = "./InstanceSegmentation/"
# all_root = get_name(rrr, mode_folder=False)
# get_annotation(all_root)
if __name__=='__main__':maskdir = './mask/trainmask/'maskimglist = os.listdir(maskdir)get_annotation(maskimglist)

问题：
上述代码仍然存在不足，有的mask太小segmentation输出是 []，这需要检查一下，我在里面输出的位置判断是空就不保存可以避免这种问题，但是bbox等信息有的会出现Nah的情况，需要自己判断处理。

整体上来说，这个代码还是挺好用的。

还有一点，有些内部mask比较极端的情况，代码执行容易出错，建议把问题图像删除，或者自己查找问题修改代码。

更新：实际测试有的格式有问题，以下是修改后版本代码：

from PIL import Image
import numpy as np
from skimage import measure
from shapely.geometry import Polygon, MultiPolygon
import json
import os
from tqdm import tqdmdef create_sub_masks(mask_image):width, height = mask_image.size# Initialize a dictionary of sub-masks indexed by RGB colorssub_masks = {}for x in range(width):for y in range(height):# Get the RGB values of the pixelpixel = mask_image.getpixel((x, y))[:3]# If the pixel is not black...if pixel != (0, 0, 0):# Check to see if we've created a sub-mask...pixel_str = str(pixel)sub_mask = sub_masks.get(pixel_str)if sub_mask is None:# Create a sub-mask (one bit per pixel) and add to the dictionary# Note: we add 1 pixel of padding in each direction# because the contours module doesn't handle cases# where pixels bleed to the edge of the imagesub_masks[pixel_str] = Image.new('1', (width+2, height+2))# Set the pixel value to 1 (default is 0), accounting for paddingsub_masks[pixel_str].putpixel((x+1, y+1), 1)# import ipdb;ipdb.set_trace()return sub_masksdef create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd):# Find contours (boundary lines) around each sub-mask# Note: there could be multiple contours if the object# is partially occluded. (E.g. an elephant behind a tree)contours = measure.find_contours(np.array(sub_mask), 0.5, positive_orientation='low')segmentations = []polygons = []annotations = []for contour in contours:# Flip from (row, col) representation to (x, y)# and subtract the padding pixelfor i in range(len(contour)):row, col = contour[i]contour[i] = (col - 1, row - 1)# Make a polygon and simplify itpoly = Polygon(contour)poly = poly.simplify(1.0, preserve_topology=False)segmentation = np.array(poly.exterior.coords)segmentation = np.maximum(segmentation, 0).ravel().tolist()# import ipdb;ipdb.set_trace()# print(segmentation)if segmentation == []:continue# segmentations.append(segmentation)# polygons.append(poly)x, y, max_x, max_y = poly.boundswidth = max_x - xheight = max_y - ybbox = (x, y, width, height)area = poly.areaannotation = {'segmentation': [segmentation],'iscrowd': is_crowd,'image_id': image_id,'category_id': category_id,'id': annotation_id,'bbox': bbox,'area': area}annotations.append(annotation)annotation = {}annotation_id = annotation_id + 1return annotations, annotation_iddef get_name(root, mode_folder=True):for root, dirs, file in os.walk(root):if mode_folder:return sorted(dirs)else:return sorted(file)def get_annotation(mask_image_root):dataset = {"info": {"year": 2023, "version": "2023", "description": "", "url": "",},"license": {},"images": [],"annotations": [],"categories": []}class_index = {0: "background",1:'junban',2:'yachi'}for s, k in enumerate(list(class_index.keys())):dataset["categories"].append({"id": k, "name": class_index[k], "supercategory": "yachi"})is_crowd = 0# These ids will be automatically increased as we goannotation_id = 0image_id = 0# Create the annotationsrrr = maskdir# maskcolor ={}# colorid = 1for i, root in tqdm(enumerate(mask_image_root)):print(i)mask_image = Image.open(rrr + root).convert('RGB')print(root)weight, height = mask_image.size# file_name = "rgb_" + root.split("/")[-1].split("_")[-1]file_name = mask_image_root[i]print(file_name)dataset["images"].append({"file_name": file_name,"id": i,"width": weight,"height": height})# import ipdb;ipdb.set_trace()sub_masks = create_sub_masks(mask_image)for color, sub_mask in sub_masks.items():if color == '(1, 1, 1)':category_id = 1elif color == '(2, 2, 2)':category_id = 2annotation, annotation_idout = create_sub_mask_annotation(sub_mask, image_id, category_id, annotation_id, is_crowd)if annotation == "skip":continuefor anno in annotation:dataset["annotations"].append(anno)# annotation_id += 1annotation_id = annotation_idoutimage_id += 1with open("post_val.json", "w") as f:json.dump(dataset, f)# rrr = "./InstanceSegmentation/"
# all_root = get_name(rrr, mode_folder=False)
# get_annotation(all_root)
if __name__=='__main__':maskdir = './mask/valmask/'# maskdir = './mask/posttest/mask/'maskimglist = os.listdir(maskdir)get_annotation(maskimglist)