- 摘要
- 模型详解
- 模型实战
- 训练COCO数据集
- 下载数据集
- COCO转yolo格式数据集(适用V4,V5,V6,V7,V8)
- 配置yolov10环境
- 训练
- 断点训练
- 测试
- 训练自定义数据集
- Labelme数据集
- 格式转换
- 训练
- 测试
- 总结
- 2017 Train images [118K/18GB] :http://images.cocodataset.org/zips/train2017.zip
- 2017 Val images [5K/1GB]:http://images.cocodataset.org/zips/val2017.zip
- 2017 Test images [41K/6GB]:http://images.cocodataset.org/zips/unlabeled2017.zip
- 2017 annotations_trainval2017 [241MB]:http://images.cocodataset.org/annotations/annotations_trainval2017.zip
1 | person | person | person | person |
2 | bicycle | bicycle | bicycle | vehicle |
3 | car | car | car | vehicle |
4 | motorcycle | motorcycle | motorcycle | vehicle |
5 | airplane | airplane | airplane | vehicle |
6 | bus | bus | bus | vehicle |
7 | train | train | train | vehicle |
8 | truck | truck | truck | vehicle |
9 | boat | boat | boat | vehicle |
10 | trafficlight | traffic light | traffic light | outdoor |
11 | fire hydrant | fire hydrant | fire hydrant | outdoor |
12 | street | sign | - | - |
13 | stop sign | stop sign | stop sign | outdoor |
14 | parking meter | parking meter | parking meter | outdoor |
15 | bench | bench | bench | outdoor |
16 | bird | bird | bird | animal |
17 | cat | cat | cat | animal |
18 | dog | dog | dog | animal |
19 | horse | horse | horse | animal |
20 | sheep | sheep | sheep | animal |
21 | cow | cow | cow | animal |
22 | elephant | elephant | elephant | animal |
23 | bear | bear | bear | animal |
24 | zebra | zebra | zebra | animal |
25 | giraffe | giraffe | giraffe | animal |
26 | hat | - | - | accessory |
27 | backpack | backpack | backpack | accessory |
28 | umbrella | umbrella | umbrella | accessory |
29 | shoe | - | - | accessory |
30 | eye glasses | - | - | accessory |
31 | handbag | handbag | handbag | accessory |
32 | tie | tie | tie | accessory |
33 | suitcase | suitcase | suitcase | accessory |
34 | frisbee | frisbee | frisbee | sports |
35 | skis | skis | skis | sports |
36 | snowboard | snowboard | snowboard | sports |
37 | sports ball | sports ball | sports ball | sports |
38 | kite | kite | kite | sports |
39 | baseball bat | baseball bat | baseball bat | sports |
40 | baseball glove | baseball glove | baseball glove | sports |
41 | skateboard | skateboard | skateboard | sports |
42 | surfboard | surfboard | surfboard | sports |
43 | tennis racket | tennis racket | tennis racket | sports |
44 | bottle | bottle | bottle | kitchen |
45 | plate | - | - | kitchen |
46 | wine glass | wine glass | wine glass | kitchen |
47 | cup | cup | cup | kitchen |
48 | fork | fork | fork | kitchen |
49 | knife | knife | knife | kitchen |
50 | spoon | spoon | spoon | kitchen |
51 | bowl | bowl | bowl | kitchen |
52 | banana | banana | banana | food |
53 | apple | apple | apple | food |
54 | sandwich | sandwich | sandwich | food |
55 | orange | orange | orange | food |
56 | broccoli | broccoli | broccoli | food |
57 | carrot | carrot | carrot | food |
58 | hot dog | hot dog | hot dog | food |
59 | pizza | pizza | pizza | food |
60 | donut | donut | donut | food |
61 | cake | cake | cake | food |
62 | chair | chair | chair | furniture |
63 | couch | couch | couch | furniture |
64 | potted plant | potted plant | potted plant | furniture |
65 | bed | bed | bed | furniture |
66 | mirror | - | - | furniture |
67 | dining table | dining table | dining table | furniture |
68 | window | - | - | furniture |
69 | desk | - | - | furniture |
70 | toilet | toilet | toilet | furniture |
71 | door | - | - | furniture |
72 | tv | tv | tv | electronic |
73 | laptop | laptop | laptop | electronic |
74 | mouse | mouse | mouse | electronic |
75 | remote | remote | remote | electronic |
76 | keyboard | keyboard | keyboard | electronic |
77 | cell phone | cell phone | cell phone | electronic |
78 | microwave | microwave | microwave | appliance |
79 | oven | oven | oven | appliance |
80 | toaster | toaster | toaster | appliance |
81 | sink | sink | sink | appliance |
82 | refrigerator | refrigerator | refrigerator | appliance |
83 | blender | - | - | appliance |
84 | book | book | book | indoor |
85 | clock | clock | clock | indoor |
86 | vase | vase | vase | indoor |
87 | scissors | scissors | scissors | indoor |
88 | teddy bear | teddy bear | teddy bear | indoor |
89 | hair drier | hair drier | hair drier | indoor |
90 | toothbrush | toothbrush | toothbrush | indoor |
91 | hair brush | - | - | indoor |
def coco91_to_coco80_class(): # converts 80-index (val2014) to 91-index (paper)# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknetx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,None, 73, 74, 75, 76, 77, 78, 79, None]return x
- coco:存放解压后的数据集。
import json
import glob
import os
import shutil
from pathlib import Path
import numpy as np
from tqdm import tqdmdef make_folders(path='../out/'):# Create foldersif os.path.exists(path):shutil.rmtree(path) # delete output folderos.makedirs(path) # make new output folderos.makedirs(path + os.sep + 'labels') # make new labels folderos.makedirs(path + os.sep + 'images') # make new labels folderreturn pathdef convert_coco_json(json_dir='./coco/annotations_trainval2017/annotations/'):jsons = glob.glob(json_dir + '*.json')coco80 = coco91_to_coco80_class()# Import jsonfor json_file in sorted(jsons):fn = 'out/labels/%s/' % Path(json_file).stem.replace('instances_', '') # folder namefn_images = 'out/images/%s/' % Path(json_file).stem.replace('instances_', '') # folder nameos.makedirs(fn,exist_ok=True)os.makedirs(fn_images,exist_ok=True)with open(json_file) as f:data = json.load(f)print(fn)# Create image dictimages = {'%g' % x['id']: x for x in data['images']}# Write labels filefor x in tqdm(data['annotations'], desc='Annotations %s' % json_file):if x['iscrowd']:continueimg = images['%g' % x['image_id']]h, w, f = img['height'], img['width'], img['file_name']file_path='coco/'+fn.split('/')[-2]+"/"+f# The Labelbox bounding box format is [top left x, top left y, width, height]box = np.array(x['bbox'], dtype=np.float64)box[:2] += box[2:] / 2 # xy top-left corner to centerbox[[0, 2]] /= w # normalize xbox[[1, 3]] /= h # normalize yif (box[2] > 0.) and (box[3] > 0.): # if w > 0 and h > 0with open(fn + Path(f).stem + '.txt', 'a') as file:file.write('%g %.6f %.6f %.6f %.6f\n' % (coco80[x['category_id'] - 1], *box))file_path_t=fn_images+fprint(file_path,file_path_t)shutil.copy(file_path,file_path_t)def coco91_to_coco80_class(): # converts 80-index (val2014) to 91-index (paper)# https://tech.amikelive.com/node-718/what-object-categories-labels-are-in-coco-dataset/# a = np.loadtxt('data/coco.names', dtype='str', delimiter='\n')# b = np.loadtxt('data/coco_paper.names', dtype='str', delimiter='\n')# x1 = [list(a[i] == b).index(True) + 1 for i in range(80)] # darknet to coco# x2 = [list(b[i] == a).index(True) if any(b[i] == a) else None for i in range(91)] # coco to darknetx = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, None, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, None, 24, 25, None,None, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, None, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,51, 52, 53, 54, 55, 56, 57, 58, 59, None, 60, None, None, 61, None, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72,None, 73, 74, 75, 76, 77, 78, 79, None]return xconvert_coco_json()
import cv2
import osdef draw_box_in_single_image(image_path, txt_path):# 读取图像image = cv2.imread(image_path)# 读取txt文件信息def read_list(txt_path):pos = []with open(txt_path, 'r') as file_to_read:while True:lines = file_to_read.readline() # 整行读取数据if not lines:break# 将整行数据分割处理,如果分割符是空格,括号里就不用传入参数,如果是逗号, 则传入‘,'字符。p_tmp = [float(i) for i in lines.split(' ')]pos.append(p_tmp) # 添加新读取的数据# Efield.append(E_tmp)passreturn pos# txt转换为boxdef convert(size, box):xmin = (box[1]-box[3]/2.)*size[1]xmax = (box[1]+box[3]/2.)*size[1]ymin = (box[2]-box[4]/2.)*size[0]ymax = (box[2]+box[4]/2.)*size[0]box = (int(xmin), int(ymin), int(xmax), int(ymax))return boxpos = read_list(txt_path)print(pos)tl = int((image.shape[0]+image.shape[1])/2)lf = max(tl-1,1)for i in range(len(pos)):label = str(int(pos[i][0]))print('label is '+label)box = convert(image.shape, pos[i])image = cv2.rectangle(image,(box[0], box[1]),(box[2],box[3]),(0,0,255),2)cv2.putText(image,label,(box[0],box[1]-2), 0, 1, [0,0,255], thickness=2, lineType=cv2.LINE_AA)passif pos:cv2.imwrite('./Data/see_images/{}.png'.format(image_path.split('\\')[-1][:-4]), image)else:print('None')img_folder = "./out/images/val2017"
img_list = os.listdir(img_folder)
img_list.sort()label_folder = "./out/labels/val2017"
label_list = os.listdir(label_folder)
if not os.path.exists('./Data/see_images'):os.makedirs('./Data/see_images')
for i in range(len(img_list)):image_path = img_folder + "\\" + img_list[i]txt_path = label_folder + "\\" + label_list[i]draw_box_in_single_image(image_path, txt_path)
pip install -r requirements.txt
from ultralytics import YOLOv10
if __name__ == '__main__':model = YOLOv10(model="ultralytics/cfg/models/v10/yolov10l.yaml") # 从头开始构建新模型# If you want to finetune the model with pretrained weights, you could load the# pretrained weights like below# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')# or# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt# model = YOLOv10('yolov10{n/s/m/b/l/x}.pt')# Use the modelresults = model.train(data="VOC.yaml", patience=0, epochs=150, device='0', batch=8, seed=42) # 训练模
model = YOLOv10('yolov10n.pt')
# Use the model
model.train(data="coco128.yaml", epochs=3) # train the model
第一步 找到ultralytics/cfg/datasets/coco.yaml
# Ultralytics YOLO 🚀, GPL-3.0 license
# COCO 2017 dataset http://cocodataset.org by Microsoft
# Example usage: yolo train data=coco.yaml
# parent
# ├── ultralytics
# └── datasets
# └── coco ← downloads here (20.1 GB)# Train/val/test sets as 1) dir: path/to/imgs, 2) file: path/to/imgs.txt, or 3) list: [path/to/imgs1, path/to/imgs2, ..]train: ./coco/images/train2017 # train images (relative to 'path') 118287 images
val: ./coco/images/val2017 # val images (relative to 'path') 5000 images
test: test-dev2017.txt # 20288 of 40670 images, submit to https://competitions.codalab.org/competitions/20794
第二步 新建train.py
from ultralytics import YOLOv10
if __name__ == '__main__':model = YOLOv10(model="ultralytics/cfg/models/v10/yolov10l.yaml") # 从头开始构建新模型# If you want to finetune the model with pretrained weights, you could load the# pretrained weights like below# model = YOLOv10.from_pretrained('jameslahm/yolov10{n/s/m/b/l/x}')# or# wget https://github.com/THU-MIG/yolov10/releases/download/v1.1/yolov10{n/s/m/b/l/x}.pt# model = YOLOv10('yolov10{n/s/m/b/l/x}.pt')# Use the modelresults = model.train(data="coco.yaml", epochs=3,device='3') # 训练模型
results = model.train(data="coco.yaml", epochs=3,device='0,1,2,3') # 训练模型
第三步 修改参数,在ultralytics/cfg/default.yaml
# Train settings -------------------------------------------------------------------------------------------------------
model: # path to model file, i.e. yolov8n.pt, yolov8n.yaml
data: # path to data file, i.e. coco128.yaml
epochs: 100 # number of epochs to train for
patience: 50 # epochs to wait for no observable improvement for early stopping of training
batch: 16 # number of images per batch (-1 for AutoBatch)
imgsz: 640 # size of input images as integer or w,h
save: True # save train checkpoints and predict results
save_period: -1 # Save checkpoint every x epochs (disabled if < 1)
cache: False # True/ram, disk or False. Use cache for data loading
device: # device to run on, i.e. cuda device=0 or device=0,1,2,3 or device=cpu
workers: 8 # number of worker threads for data loading (per RANK if DDP)
project: # project name
name: # experiment name, results saved to 'project/name' directory
exist_ok: False # whether to overwrite existing experiment
pretrained: False # whether to use a pretrained model
optimizer: SGD # optimizer to use, choices=['SGD', 'Adam', 'AdamW', 'RMSProp']
verbose: True # whether to print verbose output
seed: 0 # random seed for reproducibility
deterministic: True # whether to enable deterministic mode
single_cls: False # train multi-class data as single-class
image_weights: False # use weighted image selection for training
rect: False # support rectangular training if mode='train', support rectangular evaluation if mode='val'
cos_lr: False # use cosine learning rate scheduler
close_mosaic: 10 # disable mosaic augmentation for final 10 epochs
resume: False # resume training from last checkpoint
from ultralytics import YOLOv10
if __name__ == '__main__':# 加载模型model = YOLOv10("runs/detect/train8/weights/last.pt") # 从头开始构建新模型print(model.model)# Use the modelresults = model.train(data="VOC.yaml", epochs=100, device='0', batch=16,workers=0,resume=True) # 训练模型
from ultralytics import YOLOv10# Load a model
model = YOLOv10("runs/detect/train11/weights/best.pt") # load a pretrained model (recommended for training)results = model.predict(source="ultralytics/assets",device='3') # predict on an image
# Prediction settings --------------------------------------------------------------------------------------------------
source: # source directory for images or videos
show: False # show results if possible
save_txt: False # save results as .txt file
save_conf: False # save results with confidence scores
save_crop: False # save cropped images with results
hide_labels: False # hide labels
hide_conf: False # hide confidence scores
vid_stride: 1 # video frame-rate stride
line_thickness: 3 # bounding box thickness (pixels)
visualize: False # visualize model features
augment: False # apply image augmentation to prediction sources
agnostic_nms: False # class-agnostic NMS
classes: # filter results by class, i.e. class=0, or class=[0,2,3]
retina_masks: False # use high-resolution segmentation masks
boxes: True # Show boxes in segmentation predictions
类别如下: [‘c17’, ‘c5’, ‘helicopter’, ‘c130’, ‘f16’, ‘b2’,
‘other’, ‘b52’, ‘kc10’, ‘command’, ‘f15’, ‘kc135’, ‘a10’,
‘b1’, ‘aew’, ‘f22’, ‘p3’, ‘p8’, ‘f35’, ‘f18’, ‘v22’, ‘f4’,
‘globalhawk’, ‘u2’, ‘su-27’, ‘il-38’, ‘tu-134’, ‘su-33’,
‘an-70’, ‘su-24’, ‘tu-22’, ‘il-76’]
import os
import shutilimport numpy as np
import json
from glob import glob
import cv2
from sklearn.model_selection import train_test_split
from os import getcwddef convert(size, box):dw = 1. / (size[0])dh = 1. / (size[1])x = (box[0] + box[1]) / 2.0 - 1y = (box[2] + box[3]) / 2.0 - 1w = box[1] - box[0]h = box[3] - box[2]x = x * dww = w * dwy = y * dhh = h * dhreturn (x, y, w, h)def change_2_yolo5(files, txt_Name):imag_name=[]for json_file_ in files:json_filename = labelme_path + json_file_ + ".json"out_file = open('%s/%s.txt' % (labelme_path, json_file_), 'w')json_file = json.load(open(json_filename, "r", encoding="utf-8"))# image_path = labelme_path + json_file['imagePath']imag_name.append(json_file_+'.jpg')height, width, channels = cv2.imread(labelme_path + json_file_ + ".jpg").shapefor multi in json_file["shapes"]:points = np.array(multi["points"])xmin = min(points[:, 0]) if min(points[:, 0]) > 0 else 0xmax = max(points[:, 0]) if max(points[:, 0]) > 0 else 0ymin = min(points[:, 1]) if min(points[:, 1]) > 0 else 0ymax = max(points[:, 1]) if max(points[:, 1]) > 0 else 0label = multi["label"].lower()if xmax <= xmin:passelif ymax <= ymin:passelse:cls_id = classes.index(label)b = (float(xmin), float(xmax), float(ymin), float(ymax))bb = convert((width, height), b)out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')# print(json_filename, xmin, ymin, xmax, ymax, cls_id)return imag_namedef image_txt_copy(files,scr_path,dst_img_path,dst_txt_path):""":param files: 图片名字组成的list:param scr_path: 图片的路径:param dst_img_path: 图片复制到的路径:param dst_txt_path: 图片对应的txt复制到的路径:return:"""for file in files:img_path=scr_path+fileprint(file)shutil.copy(img_path, dst_img_path+file)scr_txt_path=scr_path+file.split('.')[0]+'.txt'shutil.copy(scr_txt_path, dst_txt_path + file.split('.')[0]+'.txt')if __name__ == '__main__':classes = ['c17', 'c5', 'helicopter', 'c130', 'f16', 'b2','other', 'b52', 'kc10', 'command', 'f15', 'kc135', 'a10','b1', 'aew', 'f22', 'p3', 'p8', 'f35', 'f18', 'v22', 'f4','globalhawk', 'u2', 'su-27', 'il-38', 'tu-134', 'su-33','an-70', 'su-24', 'tu-22', 'il-76']# 1.标签路径labelme_path = "USA-Labelme/"isUseTest = True # 是否创建test集# 3.获取待处理文件files = glob(labelme_path + "*.json")files = [i.replace("\\", "/").split("/")[-1].split(".json")[0] for i in files]for i in files:print(i)trainval_files, test_files = train_test_split(files, test_size=0.1, random_state=55)# splittrain_files, val_files = train_test_split(trainval_files, test_size=0.1, random_state=55)train_name_list=change_2_yolo5(train_files, "train")print(train_name_list)val_name_list=change_2_yolo5(val_files, "val")test_name_list=change_2_yolo5(test_files, "test")#创建数据集文件夹。file_List = ["train", "val", "test"]for file in file_List:if not os.path.exists('./VOC/images/%s' % file):os.makedirs('./VOC/images/%s' % file)if not os.path.exists('./VOC/labels/%s' % file):os.makedirs('./VOC/labels/%s' % file)image_txt_copy(train_name_list,labelme_path,'./VOC/images/train/','./VOC/labels/train/')image_txt_copy(val_name_list, labelme_path, './VOC/images/val/', './VOC/labels/val/')image_txt_copy(test_name_list, labelme_path, './VOC/images/test/', './VOC/labels/test/')
train: ./VOC/images/train # train images
val: VOC/images/val # val images
test: VOC/images/test # test images (optional)names: ['c17', 'c5', 'helicopter', 'c130', 'f16', 'b2','other', 'b52', 'kc10', 'command', 'f15', 'kc135', 'a10','b1', 'aew', 'f22', 'p3', 'p8', 'f35', 'f18', 'v22', 'f4','globalhawk', 'u2', 'su-27', 'il-38', 'tu-134', 'su-33','an-70', 'su-24', 'tu-22', 'il-76']
from ultralytics import YOLOv10
if __name__ == '__main__':# 加载模型model = YOLOv10("ultralytics/models/v108/yolov10l.yaml") # 从头开始构建新模型print(model.model)# Use the modelresults = model.train(data="VOC.yaml", epochs=100, device='0', batch=16,workers=0) # 训练模型
YOLOv10l summary (fused): 461 layers, 25765712 parameters, 0 gradients, 126.6 GFLOPsClass Images Instances Box(P R mAP50 mAP50-95): 100%|██████████| 15/15 [00:02<00:00, 7.06it/s]all 230 1412 0.847 0.94 0.978 0.711c17 230 131 0.842 0.977 0.98 0.79c5 230 68 0.835 0.941 0.958 0.788helicopter 230 43 0.804 0.953 0.948 0.564c130 230 85 0.967 0.953 0.984 0.642f16 230 57 0.732 0.912 0.918 0.589b2 230 2 0.397 1 0.995 0.696other 230 86 0.805 0.93 0.954 0.51b52 230 70 0.893 0.957 0.962 0.795kc10 230 62 0.985 0.968 0.983 0.803command 230 40 0.831 1 0.987 0.778f15 230 123 0.837 0.959 0.98 0.648kc135 230 91 0.879 0.989 0.969 0.662a10 230 27 0.885 0.963 0.93 0.435b1 230 20 0.666 1 0.985 0.699aew 230 25 0.816 1 0.989 0.789f22 230 17 0.844 1 0.99 0.692p3 230 105 0.957 0.971 0.993 0.803p8 230 1 0.902 1 0.995 0.697f35 230 32 0.82 0.938 0.967 0.558f18 230 125 0.938 0.984 0.986 0.796v22 230 41 0.959 1 0.995 0.714su-27 230 31 0.915 1 0.995 0.853il-38 230 27 0.94 1 0.994 0.817tu-134 230 1 1 0 0.995 0.895su-33 230 2 0.785 1 0.995 0.697an-70 230 2 0.728 1 0.995 0.697tu-22 230 98 0.91 0.99 0.986 0.783
from ultralytics import YOLOv10# Load a model
model = YOLOv10("runs/detect/train/weights/best.pt") # load a pretrained model (recommended for training)
results = model.predict(source="datasets/VOC/images/test",device='0',save=True) # predict on an image
# Prediction settings --------------------------------------------------------------------------------------------------
source: # source directory for images or videos
show: False # show results if possible
save_txt: False # save results as .txt file
save_conf: False # save results with confidence scores
save_crop: False # save cropped images with results
hide_labels: False # hide labels
hide_conf: False # hide confidence scores
vid_stride: 1 # video frame-rate stride
line_thickness: 3 # bounding box thickness (pixels)
visualize: False # visualize model features
augment: False # apply image augmentation to prediction sources
agnostic_nms: False # class-agnostic NMS
classes: # filter results by class, i.e. class=0, or class=[0,2,3]
retina_masks: False # use high-resolution segmentation masks
boxes: True # Show boxes in segmentation predictions
import cv2
import time
import random
import numpy as np
import torch, torchvisiondef load_model(model_path):model = torch.load(model_path, map_location='cpu')category_list = model.get('CLASSES', model.get('model').names)model = (model.get('ema') or model['model']).to("cuda:0").float() # FP32 modelmodel.__setattr__('CLASSES', category_list)model.fuse().eval()return model# def data_preprocess(model, img, img_scale):
# stride, auto = 32, True
# stride = max(int(model.stride.max()), 32)
# img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0] # padded resize
# img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1]) # HWC to CHW, BGR to RGB,contiguous
# img = torch.from_numpy(img).to("cuda:0") # ndarray to tensor
# img = img.float() # uint8 to fp32
# img /= 255 # 0 - 255 to 0.0 - 1.0
# if len(img.shape) == 3:
# img = img[None] # expand for batch dim
# return imgdef data_preprocess(model, img, img_scale):# 定义步长和是否自动调整stride, auto = 32, True# 确保步长至少为模型的最大步长或32stride = max(int(model.stride.max()), 32)# 对图像进行填充并调整大小,以适应模型输入img = letterbox(img, new_shape=img_scale, stride=stride, auto=auto)[0] # padded resize# 将图像的维度从(高度, 宽度, 通道)转换为(通道, 高度, 宽度),并将数据类型从uint8转为fp32img = np.ascontiguousarray(img.transpose((2, 0, 1))[::-1]) # HWC to CHW, BGR to RGB,contiguous# 将numpy数组转换为PyTorch张量,并将数据移动到GPU上img = torch.from_numpy(img).to("cuda:0") # ndarray to tensor# 将像素值从0-255的范围缩放到0.0-1.0img = img.float() # uint8 to fp32img /= 255 ## 如果图像是单通道的,则在其前面添加一个维度以模拟批处理大小if len(img.shape) == 3:img = img[None] # expand for batch dimreturn imgdef letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):# 获取图像当前形状 [高度, 宽度]shape = im.shape[:2]# 如果 new_shape 是一个整数,将其转换为元组 (宽度, 高度)if isinstance(new_shape, int):new_shape = (new_shape, new_shape)# 计算缩放比例 (新尺寸 / 旧尺寸)r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])# 如果不允许放大,则只进行缩小操作 (为更好的验证 mAP)if not scaleup:r = min(r, 1.0)# 计算缩放后的尺寸和填充ratio = r, r # 宽度和高度比例new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # 宽度和高度填充# 如果 auto 为 True,则按 stride 取模 (最小矩形)if auto:dw, dh = np.mod(dw, stride), np.mod(dh, stride)# 如果 scaleFill 为 True,则拉伸填充elif scaleFill:dw, dh = 0.0, 0.0new_unpad = (new_shape[1], new_shape[0])ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # 宽度和高度比例# 将填充分为两部分,每部分为原来的一半dw /= 2dh /= 2# 如果原始尺寸与缩放后的尺寸不同,则进行缩放操作if shape[::-1] != new_unpad:im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)# 在图像周围添加边框,高度和宽度分别为上面计算得到的 dw 和 dhtop, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))left, right = int(round(dw - 0.1)), int(round(dw + 0.1))im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # 添加边框return im, ratio, (dw, dh) # 返回处理后的图像、宽高比例和填充值def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False,labels=(), max_det=300, nc=0, max_time_img=0.05, max_nms=30000, max_wh=7680, ):# Checksassert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'if isinstance(prediction, (list, tuple)): # YOLOv8 model in validation model, output = (inference_out, loss_out)prediction = prediction[0] # select only inference outputdevice = prediction.devicemps = 'mps' in device.type # Apple MPSif mps: # MPS not fully supported yet, convert tensors to CPU before NMSprediction = prediction.cpu()bs = prediction.shape[0] # batch sizenc = nc or (prediction.shape[1] - 4) # number of classesnm = prediction.shape[1] - nc - 4mi = 4 + nc # mask start indexxc = prediction[:, 4:mi].amax(1) > conf_thres # candidates# Settings# min_wh = 2 # (pixels) minimum box width and heighttime_limit = 0.5 + max_time_img * bs # seconds to quit aftermulti_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)prediction = prediction.transpose(-1, -2) # shape(1,84,6300) to shape(1,6300,84)prediction[..., :4] = xywh2xyxy(prediction[..., :4]) # xywh to xyxyt = time.time()output = [torch.zeros((0, 6 + nm), device=prediction.device)] * bsfor xi, x in enumerate(prediction): # image index, image inference# Apply constraints# x[((x[:, 2:4] < min_wh) | (x[:, 2:4] > max_wh)).any(1), 4] = 0 # width-heightx = x[xc[xi]] # confidence# Cat apriori labels if autolabellingif labels and len(labels[xi]):lb = labels[xi]v = torch.zeros((len(lb), nc + nm + 4), device=x.device)v[:, :4] = xywh2xyxy(lb[:, 1:5]) # boxv[range(len(lb)), lb[:, 0].long() + 4] = 1.0 # clsx = torch.cat((x, v), 0)# If none remain process next imageif not x.shape[0]:continue# Detections matrix nx6 (xyxy, conf, cls)box, cls, mask = x.split((4, nc, nm), 1)if multi_label:i, j = torch.where(cls > conf_thres)x = torch.cat((box[i], x[i, 4 + j, None], j[:, None].float(), mask[i]), 1)else: # best class onlyconf, j = cls.max(1, keepdim=True)x = torch.cat((box, conf, j.float(), mask), 1)[conf.view(-1) > conf_thres]# Filter by classif classes is not None:x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]# Check shapen = x.shape[0] # number of boxesif not n: # no boxescontinueif n > max_nms: # excess boxesx = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence and remove excess boxes# Batched NMSc = x[:, 5:6] * (0 if agnostic else max_wh) # classesboxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scoresi = torchvision.ops.nms(boxes, scores, iou_thres) # NMSi = i[:max_det] # limit detectionsoutput[xi] = x[i]if mps:output[xi] = output[xi].to(device)if (time.time() - t) > time_limit:print(f'WARNING ⚠️ NMS time limit {time_limit:.3f}s exceeded')break # time limit exceededreturn outputdef xywh2xyxy(x):"""Convert bounding box coordinates from (x, y, width, height) format to (x1, y1, x2, y2) format where (x1, y1) is thetop-left corner and (x2, y2) is the bottom-right corner.Args:x (np.ndarray | torch.Tensor): The input bounding box coordinates in (x, y, width, height) format.Returns:y (np.ndarray | torch.Tensor): The bounding box coordinates in (x1, y1, x2, y2) format."""assert x.shape[-1] == 4, f'input shape last dimension expected 4 but input shape is {x.shape}'y = torch.empty_like(x) if isinstance(x, torch.Tensor) else np.empty_like(x) # faster than clone/copydw = x[..., 2] / 2 # half-widthdh = x[..., 3] / 2 # half-heighty[..., 0] = x[..., 0] - dw # top left xy[..., 1] = x[..., 1] - dh # top left yy[..., 2] = x[..., 0] + dw # bottom right xy[..., 3] = x[..., 1] + dh # bottom right yreturn ydef scale_boxes(img1_shape, boxes, img0_shape, ratio_pad=None, padding=True):"""Rescales bounding boxes (in the format of xyxy) from the shape of the image they were originally specified in(img1_shape) to the shape of a different image (img0_shape).Args:img1_shape (tuple): The shape of the image that the bounding boxes are for, in the format of (height, width).boxes (torch.Tensor): the bounding boxes of the objects in the image, in the format of (x1, y1, x2, y2)img0_shape (tuple): the shape of the target image, in the format of (height, width).ratio_pad (tuple): a tuple of (ratio, pad) for scaling the boxes. If not provided, the ratio and pad will becalculated based on the size difference between the two images.padding (bool): If True, assuming the boxes is based on image augmented by yolo style. If False then do regularrescaling.Returns:boxes (torch.Tensor): The scaled bounding boxes, in the format of (x1, y1, x2, y2)"""if ratio_pad is None: # calculate from img0_shapegain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / newpad = round((img1_shape[1] - img0_shape[1] * gain) / 2 - 0.1), round((img1_shape[0] - img0_shape[0] * gain) / 2 - 0.1) # wh paddingelse:gain = ratio_pad[0][0]pad = ratio_pad[1]if padding:boxes[..., [0, 2]] -= pad[0] # x paddingboxes[..., [1, 3]] -= pad[1] # y paddingboxes[..., :4] /= gainclip_boxes(boxes, img0_shape)return boxesdef clip_boxes(boxes, shape):"""Takes a list of bounding boxes and a shape (height, width) and clips the bounding boxes to the shape.Args:boxes (torch.Tensor): the bounding boxes to clipshape (tuple): the shape of the image"""if isinstance(boxes, torch.Tensor): # faster individuallyboxes[..., 0].clamp_(0, shape[1]) # x1boxes[..., 1].clamp_(0, shape[0]) # y1boxes[..., 2].clamp_(0, shape[1]) # x2boxes[..., 3].clamp_(0, shape[0]) # y2else: # np.array (faster grouped)boxes[..., [0, 2]] = boxes[..., [0, 2]].clip(0, shape[1]) # x1, x2boxes[..., [1, 3]] = boxes[..., [1, 3]].clip(0, shape[0]) # y1, y2def plot_result(det_cpu, dst_img, category_names, image_name):for i, item in enumerate(det_cpu):rand_color = (random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))# 画boxbox_x1, box_y1, box_x2, box_y2 = item[0:4].astype(np.int32)cv2.rectangle(dst_img, (box_x1, box_y1), (box_x2, box_y2), color=rand_color, thickness=2)# 画labellabel = category_names[int(item[5])]score = item[4]org = (min(box_x1, box_x2), min(box_y1, box_y2) - 8)text = '{}|{:.2f}'.format(label, score)cv2.putText(dst_img, text, org=org, fontFace=cv2.FONT_HERSHEY_SIMPLEX, fontScale=0.8, color=rand_color,thickness=2)cv2.imshow('result', dst_img)cv2.waitKey()cv2.imwrite(image_name, dst_img)if __name__ == '__main__':img_path = "./ultralytics/assets/bus.jpg"image_name = img_path.split('/')[-1]ori_img = cv2.imread(img_path)# load modelmodel = load_model("runs/detect/train2/weights/best.pt")# 数据预处理img = data_preprocess(model, ori_img, [640, 640])# 推理result = model(img, augment=False)preds = result[0]# NMSdet = non_max_suppression(preds, conf_thres=0.35, iou_thres=0.45, nc=len(model.CLASSES))[0]# bbox还原至原图尺寸det[:, :4] = scale_boxes(img.shape[2:], det[:, :4], ori_img.shape)category_names = model.CLASSES# showplot_result(det.cpu().numpy(), ori_img, category_names, image_name)