一、标签转换
我们在使用labeme
标签工具,标注完数据后会获得json
文件。在标注结束过后,我们需要通过标签转换操作,生成jpg
格式原始图片和png
格式mask
标签图。
1.1 使用img_b64_to_arr
将json
标签中二进制图像数据变成numpy
格式数据,再变成jpg
图像
/************ Anaconda3\Lib\site-packages\labelme\utils\image.py *************/
def img_b64_to_arr(img_b64):img_data = base64.b64decode(img_b64)img_arr = img_data_to_arr(img_data)return img_arr
# 将输出结果保存,
PIL.Image.fromarray(img).save(osp.join(out_jpgs_path, file_name.split(".")[0]+'.jpg'))
1.2 使用shapes_to_label
将分割标签数据变成单通道png mask
标签图
/************ Anaconda3\Lib\site-packages\labelme\utils\shape.py *************/
def shapes_to_label(img_shape, shapes, label_name_to_value):cls = np.zeros(img_shape[:2], dtype=np.int32)ins = np.zeros_like(cls)instances = []for shape in shapes:points = shape["points"]label = shape["label"]group_id = shape.get("group_id")if group_id is None:group_id = uuid.uuid1()shape_type = shape.get("shape_type", None)cls_name = labelinstance = (cls_name, group_id)if instance not in instances:instances.append(instance)ins_id = instances.index(instance) + 1cls_id = label_name_to_value[cls_name]mask = shape_to_mask(img_shape[:2], points, shape_type)cls[mask] = cls_idins[mask] = ins_idreturn cls, ins
1.3 语义分割标签转换完整代码
def json2mask(json_file,img_file,filter_cls:list,out_jpgs_path,out_mask_path):assert json_file.endswith(".json") file_name = Path(json_file).stem if os.path.isfile(json_file):data = json.load(open(json_file))# 获取json里面的图片数据,也就是二进制数据imageData = data.get("imageData")# 如果通过data.get获取到的数据为空,就重新读取图片数据if not imageData:# imagePath = os.path.join(json_file, data["imagePath"])imagePath = img_filewith open(imagePath, "rb") as f:imageData = f.read()imageData = base64.b64encode(imageData).decode("utf-8")# 将二进制数据转变成numpy格式的数据img = utils.img_b64_to_arr(imageData)if len(filter_cls):data["shapes"]=[data_item for data_item in data["shapes"] if not data_item['label'] in filter_cls]label_name_2_id =label_name_2_id_sample# 将标签数据变成单通道的png mask图lbl, _ = utils.shapes_to_label(img.shape, data["shapes"], label_name_2_id)label_names = [None] * (max(label_name_2_id.values()) + 1)for name, value in label_name_2_id.items():label_names[value] = namelbl_viz = imgviz.label2rgb(label=lbl, image=imgviz.asgray(img), label_names=label_names, loc="rb")# 将输出结果保存,PIL.Image.fromarray(img).save(osp.join(out_jpgs_path, file_name.split(".")[0]+'.jpg'))utils.lblsave(osp.join(out_mask_path, "%s.png" % file_name.split(".")[0]), lbl)
二、验证、训练数据集划分
在经过标签转换后,生成原始jpg
图片和png mask
标签图。我们还需要进一步进行验证集,训练集划分。
def split_mask(mask_path_list,Base_mask,trainval_percent,train_percent):# random.sample随机划分num = len(mask_path_list) _indexes = range(num) tv = int(num*trainval_percent) tr = int(tv*train_percent) trainval= random.sample(_indexes,tv) train = random.sample(trainval,tr) print("train and val size",tv)print("train size",tr)# 创建txt文件ftrainval = open(os.path.join(Base_mask,'trainval.txt'), 'w') ftest = open(os.path.join(Base_mask,'test.txt'), 'w') ftrain = open(os.path.join(Base_mask,'train.txt'), 'w') fval = open(os.path.join(Base_mask,'val.txt'), 'w') # 写入txt文件for i in _indexes: name = mask_path_list[i].split('.')[0]+'\n' if i in trainval: ftrainval.write(name) if i in train: ftrain.write(name) else: fval.write(name) else: ftest.write(name) # 关闭txt文件ftrainval.close() ftrain.close() fval.close()
生成的验证集,训练集路径txt
文件