【PyTorch】基于YOLO的多目标检测项目(一)
【PyTorch】基于YOLO的多目标检测项目(二)
YOLO-v3网络由跨距为2的卷积层、跳跃连接层和上采样层组成,没有池化层。网络接收一幅416 * 416的图像作为输入,并提供三个YOLO输出。
目录
准备配置文件
搭建YOLO模型
搭建PyTorch模块
搭建DarkNet模型
定义损失函数
训练模型
部署模型
准备配置文件
新建一个py文件导入以下代码,命名为myutils.py作为配置文件,辅助构建模型。
import torch
from torch import nndevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")def parse_model_config(path2file):cfg_file = open(path2file, 'r')lines = cfg_file.read().split('\n')lines = [x for x in lines if x and not x.startswith('#')]lines = [x.rstrip().lstrip() for x in lines] blocks_list = []for line in lines:if line.startswith('['): blocks_list.append({})blocks_list[-1]['type'] = line[1:-1].rstrip()else:key, value = line.split("=")value = value.strip()blocks_list[-1][key.rstrip()] = value.strip()return blocks_listdef create_layers(blocks_list):hyperparams = blocks_list[0]channels_list = [int(hyperparams["channels"])]module_list = nn.ModuleList()for layer_ind, layer_dict in enumerate(blocks_list[1:]):modules = nn.Sequential()if layer_dict["type"] == "convolutional":filters = int(layer_dict["filters"])kernel_size = int(layer_dict["size"])pad = (kernel_size - 1) // 2bn=layer_dict.get("batch_normalize",0) conv2d= nn.Conv2d(in_channels=channels_list[-1],out_channels=filters,kernel_size=kernel_size,stride=int(layer_dict["stride"]),padding=pad,bias=not bn)modules.add_module("conv_{0}".format(layer_ind), conv2d)if bn:bn_layer = nn.BatchNorm2d(filters,momentum=0.9, eps=1e-5)modules.add_module("batch_norm_{0}".format(layer_ind), bn_layer)if layer_dict["activation"] == "leaky":activn = nn.LeakyReLU(0.1)modules.add_module("leaky_{0}".format(layer_ind), activn)elif layer_dict["type"] == "upsample":stride = int(layer_dict["stride"])upsample = nn.Upsample(scale_factor = stride)modules.add_module("upsample_{}".format(layer_ind), upsample) elif layer_dict["type"] == "shortcut":backwards=int(layer_dict["from"])filters = channels_list[1:][backwards]modules.add_module("shortcut_{}".format(layer_ind), EmptyLayer())elif layer_dict["type"] == "route":layers = [int(x) for x in layer_dict["layers"].split(",")]filters = sum([channels_list[1:][l] for l in layers])modules.add_module("route_{}".format(layer_ind), EmptyLayer())elif layer_dict["type"] == "yolo":anchors = [int(a) for a in layer_dict["anchors"].split(",")]anchors = [(anchors[i], anchors[i + 1]) for i in range(0, len(anchors), 2)]mask = [int(m) for m in layer_dict["mask"].split(",")]anchors = [anchors[i] for i in mask]num_classes = int(layer_dict["classes"])img_size = int(hyperparams["height"])yolo_layer = YOLOLayer(anchors, num_classes, img_size)modules.add_module("yolo_{}".format(layer_ind), yolo_layer)module_list.append(modules) channels_list.append(filters)return hyperparams, module_list class EmptyLayer(nn.Module):def __init__(self):super(EmptyLayer, self).__init__()class YOLOLayer(nn.Module):def __init__(self, anchors, num_classes, img_dim=416):super(YOLOLayer, self).__init__()self.anchors = anchorsself.num_anchors = len(anchors)self.num_classes = num_classesself.img_dim = img_dimself.grid_size = 0 def forward(self, x_in):batch_size = x_in.size(0)grid_size = x_in.size(2)devide=x_in.deviceprediction=x_in.view(batch_size, self.num_anchors, self.num_classes + 5, grid_size, grid_size)prediction=prediction.permute(0, 1, 3, 4, 2)prediction=prediction.contiguous()obj_score = torch.sigmoid(prediction[..., 4]) pred_cls = torch.sigmoid(prediction[..., 5:]) if grid_size != self.grid_size:self.compute_grid_offsets(grid_size, cuda=x_in.is_cuda)pred_boxes=self.transform_outputs(prediction) output = torch.cat((pred_boxes.view(batch_size, -1, 4),obj_score.view(batch_size, -1, 1),pred_cls.view(batch_size, -1, self.num_classes),), -1,)return output def compute_grid_offsets(self, grid_size, cuda=True):self.grid_size = grid_sizeself.stride = self.img_dim / self.grid_sizeself.grid_x = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1 ).type(torch.float32)self.grid_y = torch.arange(grid_size, device=device).repeat(1, 1, grid_size, 1).transpose(3, 2).type(torch.float32)scaled_anchors=[(a_w / self.stride, a_h / self.stride) for a_w, a_h in self.anchors]self.scaled_anchors=torch.tensor(scaled_anchors,device=device)self.anchor_w = self.scaled_anchors[:, 0:1].view((1, self.num_anchors, 1, 1))self.anchor_h = self.scaled_anchors[:, 1:2].view((1, self.num_anchors, 1, 1))def transform_outputs(self,prediction):device=prediction.devicex = torch.sigmoid(prediction[..., 0]) # Center xy = torch.sigmoid(prediction[..., 1]) # Center yw = prediction[..., 2] # Widthh = prediction[..., 3] # Heightpred_boxes = torch.zeros_like(prediction[..., :4]).to(device)pred_boxes[..., 0] = x.data + self.grid_xpred_boxes[..., 1] = y.data + self.grid_ypred_boxes[..., 2] = torch.exp(w.data) * self.anchor_wpred_boxes[..., 3] = torch.exp(h.data) * self.anchor_hreturn pred_boxes * self.stride
搭建YOLO模型
解析配置文件,使用parse_model_config助手读取并打印
from myutils import parse_model_configpath2config="./config/yolov3.cfg"
blocks_list = parse_model_config(path2config)
blocks_list[:2]
搭建PyTorch模块
基于解析的配置文件创建PyTorch模块,调用 create_layers 辅助函数进行转换并获取 PyTorch 模块的列表
from myutils import create_layershy_pa, m_l= create_layers(blocks_list)
print(m_l)
print(hy_pa)
搭建DarkNet模型
from torch import nnclass Darknet(nn.Module):def __init__(self, config_path, img_size=416):super(Darknet, self).__init__()self.blocks_list = parse_model_config(config_path)self.hyperparams, self.module_list = create_layers(self.blocks_list)self.img_size = img_sizedef forward(self, x):img_dim = x.shape[2]layer_outputs, yolo_outputs = [], []for block, module in zip(self.blocks_list[1:], self.module_list):if block["type"] in ["convolutional", "upsample", "maxpool"]:x = module(x) elif block["type"] == "shortcut":layer_ind = int(block["from"])x = layer_outputs[-1] + layer_outputs[layer_ind]elif block["type"] == "yolo":x= module[0](x)yolo_outputs.append(x)elif block["type"] == "route":x = torch.cat([layer_outputs[int(l_i)] for l_i in block["layers"].split(",")], 1)layer_outputs.append(x)yolo_out_cat = torch.cat(yolo_outputs, 1)return yolo_out_cat, yolo_outputs model = Darknet(path2config).to(device)
print(model)
# 创建一个随机的dummy_img,大小为1x3x416x416,并将其移动到指定的设备上
dummy_img=torch.rand(1,3,416,416).to(device)
# 在不计算梯度的情况下,执行模型的前向传播
with torch.no_grad():# 获取模型的前向传播结果dummy_out_cat, dummy_out=model.forward(dummy_img)# 打印dummy_out_cat的形状print(dummy_out_cat.shape)# 打印dummy_out中每个元素的形状print(dummy_out[0].shape,dummy_out[1].shape,dummy_out[2].shape)
定义损失函数
YOLO通常使用组合损失函数
def get_loss_batch(output,targets, params_loss, opt=None):# 获取损失函数的参数ignore_thres=params_loss["ignore_thres"]scaled_anchors= params_loss["scaled_anchors"] mse_loss= params_loss["mse_loss"]bce_loss= params_loss["bce_loss"]# 获取yolo的参数num_yolos=params_loss["num_yolos"]num_anchors= params_loss["num_anchors"]obj_scale= params_loss["obj_scale"]noobj_scale= params_loss["noobj_scale"]# 初始化损失loss=0.0for yolo_ind in range(num_yolos):# 获取yolo的输出yolo_out=output[yolo_ind]batch_size, num_bbxs, _=yolo_out.shape# 获取网格大小gz_2=num_bbxs/num_anchorsgrid_size=int(np.sqrt(gz_2))# 将yolo的输出reshape为(batch_size,num_anchors,grid_size,grid_size,-1)yolo_out=yolo_out.view(batch_size,num_anchors,grid_size,grid_size,-1)# 获取预测的边界框pred_boxes=yolo_out[:,:,:,:,:4]x,y,w,h= transform_bbox(pred_boxes, scaled_anchors[yolo_ind])# 获取预测的置信度pred_conf=yolo_out[:,:,:,:,4]# 获取预测的类别概率pred_cls_prob=yolo_out[:,:,:,:,5:]# 获取yolo的目标yolo_targets = get_yolo_targets({"pred_cls_prob": pred_cls_prob,"pred_boxes":pred_boxes, "targets": targets, "anchors": scaled_anchors[yolo_ind], "ignore_thres": ignore_thres,}) # 获取目标掩码obj_mask=yolo_targets["obj_mask"] noobj_mask=yolo_targets["noobj_mask"] # 获取目标的x,y,w,htx=yolo_targets["tx"] ty=yolo_targets["ty"] tw=yolo_targets["tw"] th=yolo_targets["th"] # 获取目标的类别tcls=yolo_targets["tcls"] # 获取目标的置信度t_conf=yolo_targets["t_conf"]# 计算x,y,w,h的损失loss_x = mse_loss(x[obj_mask], tx[obj_mask])loss_y = mse_loss(y[obj_mask], ty[obj_mask])loss_w = mse_loss(w[obj_mask], tw[obj_mask])loss_h = mse_loss(h[obj_mask], th[obj_mask])# 计算置信度的损失loss_conf_obj = bce_loss(pred_conf[obj_mask], t_conf[obj_mask])loss_conf_noobj = bce_loss(pred_conf[noobj_mask], t_conf[noobj_mask])loss_conf = obj_scale * loss_conf_obj + noobj_scale * loss_conf_noobj# 计算类别的损失loss_cls = bce_loss(pred_cls_prob[obj_mask], tcls[obj_mask])# 累加损失loss += loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls# 如果有优化器,则进行反向传播和优化if opt is not None:opt.zero_grad()loss.backward()opt.step()# 返回损失return loss.item()
def transform_bbox(bbox, anchors):# 将bbox的x、y、w、h分别赋值给x、y、w、hx=bbox[:,:,:,:,0]y=bbox[:,:,:,:,1]w=bbox[:,:,:,:,2]h=bbox[:,:,:,:,3]# 将anchors的w、h分别赋值给anchor_w、anchor_hanchor_w = anchors[:, 0].view((1, 3, 1, 1))anchor_h = anchors[:, 1].view((1, 3, 1, 1)) # 将x、y分别减去其向下取整的值x=x-x.floor()y=y-y.floor()# 将w、h分别除以anchor_w、anchor_h,并取对数w= torch.log(w / anchor_w + 1e-16)h= torch.log(h / anchor_h + 1e-16)return x, y, w, hdef get_yolo_targets(params):# 获取预测框、预测类别概率、目标、锚点、忽略阈值pred_boxes=params["pred_boxes"]pred_cls_prob=params["pred_cls_prob"]target=params["targets"]anchors=params["anchors"] ignore_thres=params["ignore_thres"] # 获取批量大小、锚点数量、网格大小、类别数量batch_size = pred_boxes.size(0)num_anchors = pred_boxes.size(1)grid_size = pred_boxes.size(2)num_cls = pred_cls_prob.size(-1)# 定义目标张量的形状sizeT=batch_size, num_anchors, grid_size, grid_size# 定义目标张量,用于存储目标框的掩码obj_mask = torch.zeros(sizeT,device=device,dtype=torch.uint8)# 定义目标张量,用于存储非目标框的掩码noobj_mask = torch.ones(sizeT,device=device,dtype=torch.uint8)# 定义目标张量,用于存储目标框的x坐标tx = torch.zeros(sizeT, device=device, dtype=torch.float32)# 定义目标张量,用于存储目标框的y坐标ty= torch.zeros(sizeT, device=device, dtype=torch.float32)# 定义目标张量,用于存储目标框的宽度tw= torch.zeros(sizeT, device=device, dtype=torch.float32)# 定义目标张量,用于存储目标框的高度th= torch.zeros(sizeT, device=device, dtype=torch.float32)# 定义目标张量的形状sizeT=batch_size, num_anchors, grid_size, grid_size, num_cls# 定义目标张量,用于存储目标类别tcls= torch.zeros(sizeT, device=device, dtype=torch.float32)# 将目标框的坐标乘以网格大小target_bboxes = target[:, 2:] * grid_size# 获取目标框的xy坐标t_xy = target_bboxes[:, :2]# 获取目标框的wh坐标t_wh = target_bboxes[:, 2:]# 获取目标框的x坐标t_x, t_y = t_xy.t()# 获取目标框的宽度t_w, t_h = t_wh.t()# 获取目标框的网格坐标grid_i, grid_j = t_xy.long().t()# 计算每个锚点与目标框的iouiou_with_anchors=[get_iou_WH(anchor, t_wh) for anchor in anchors]# 将iou转换为张量iou_with_anchors = torch.stack(iou_with_anchors)# 获取iou最大的锚点索引best_iou_wa, best_anchor_ind = iou_with_anchors.max(0)# 获取目标框的batch索引和类别标签batch_inds, target_labels = target[:, :2].long().t()# 将目标框的掩码设置为1obj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 1# 将非目标框的掩码设置为0noobj_mask[batch_inds, best_anchor_ind, grid_j, grid_i] = 0# 将大于忽略阈值的iou对应的非目标框掩码设置为0for ind, iou_wa in enumerate(iou_with_anchors.t()):noobj_mask[batch_inds[ind], iou_wa > ignore_thres, grid_j[ind], grid_i[ind]] = 0# 将目标框的x坐标减去网格的整数部分tx[batch_inds, best_anchor_ind, grid_j, grid_i] = t_x - t_x.floor()# 将目标框的y坐标减去网格的整数部分ty[batch_inds, best_anchor_ind, grid_j, grid_i] = t_y - t_y.floor()# 获取最佳锚点的宽度anchor_w=anchors[best_anchor_ind][:, 0]# 将目标框的宽度除以锚点的宽度,并取对数tw[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_w / anchor_w + 1e-16)# 获取最佳锚点的高度anchor_h=anchors[best_anchor_ind][:, 1]# 将目标框的高度除以锚点的高度,并取对数th[batch_inds, best_anchor_ind, grid_j, grid_i] = torch.log(t_h / anchor_h + 1e-16)# 将目标类别设置为1tcls[batch_inds, best_anchor_ind, grid_j, grid_i, target_labels] = 1# 返回目标张量output={"obj_mask" : obj_mask,"noobj_mask" : noobj_mask,"tx": tx,"ty": ty,"tw": tw,"th": th,"tcls": tcls,"t_conf": obj_mask.float(),}return output def get_iou_WH(wh1, wh2):# 将wh2转置wh2 = wh2.t()# 获取wh1的宽度和高度w1, h1 = wh1[0], wh1[1]# 获取wh2的宽度和高度w2, h2 = wh2[0], wh2[1]# 计算交集面积inter_area = torch.min(w1, w2) * torch.min(h1, h2)# 计算并集面积union_area = (w1 * h1 + 1e-16) + w2 * h2 - inter_area# 返回交集面积与并集面积的比值return inter_area / union_area
训练模型
在训练数据上训练模型,并在验证数据上对其进行评估,训练过程遵循标准的随机梯度下降(SGD)。
def loss_epoch(model,params_loss,dataset_dl,sanity_check=False,opt=None):running_loss=0.0len_data=len(dataset_dl.dataset)running_metrics= {}for xb, yb,_ in dataset_dl:yb=yb.to(device)_,output=model(xb.to(device))loss_b=get_loss_batch(output,yb, params_loss,opt)running_loss+=loss_bif sanity_check is True:break loss=running_loss/float(len_data)return lossimport copy
def train_val(model, params):num_epochs=params["num_epochs"]params_loss=params["params_loss"]opt=params["optimizer"]train_dl=params["train_dl"]val_dl=params["val_dl"]sanity_check=params["sanity_check"]lr_scheduler=params["lr_scheduler"]path2weights=params["path2weights"]loss_history={"train": [],"val": [],}best_model_wts = copy.deepcopy(model.state_dict())best_loss=float('inf') for epoch in range(num_epochs):current_lr=get_lr(opt)print('Epoch {}/{}, current lr={}'.format(epoch, num_epochs - 1, current_lr)) model.train()train_loss=loss_epoch(model,params_loss,train_dl,sanity_check,opt)loss_history["train"].append(train_loss)print("train loss: %.6f" %(train_loss)) model.eval()with torch.no_grad():val_loss=loss_epoch(model,params_loss,val_dl,sanity_check)loss_history["val"].append(val_loss)print("val loss: %.6f" %(val_loss))if val_loss < best_loss:best_loss = val_lossbest_model_wts = copy.deepcopy(model.state_dict())torch.save(model.state_dict(), path2weights)print("Copied best model weights!")lr_scheduler.step(val_loss)if current_lr != get_lr(opt):print("Loading best model weights!")model.load_state_dict(best_model_wts) print("-"*10) model.load_state_dict(best_model_wts)return model, loss_history def get_lr(opt):for param_group in opt.param_groups:return param_group['lr']
from torch import optim
from torch.optim.lr_scheduler import ReduceLROnPlateauopt = optim.Adam(model.parameters(), lr=1e-3)
lr_scheduler = ReduceLROnPlateau(opt, mode='min',factor=0.5, patience=20,verbose=1)path2models= "./models/"
if not os.path.exists(path2models):os.mkdir(path2models)scaled_anchors=[model.module_list[82][0].scaled_anchors,model.module_list[94][0].scaled_anchors,model.module_list[106][0].scaled_anchors] mse_loss = nn.MSELoss(reduction="sum")
bce_loss = nn.BCELoss(reduction="sum")
params_loss={"scaled_anchors" : scaled_anchors,"ignore_thres": 0.5,"mse_loss": mse_loss,"bce_loss": bce_loss,"num_yolos": 3,"num_anchors": 3,"obj_scale": 1,"noobj_scale": 100,
} params_train={"num_epochs": 5,"optimizer": opt,"params_loss": params_loss,"train_dl": train_dl,"val_dl": val_dl,"sanity_check": True,"lr_scheduler": lr_scheduler,"path2weights": path2models+"weights.pt",
}
model,loss_hist=train_val(model,params_train)
部署模型
将训练后的权重加载到模型中
path2weights="./models/weights.pt"
model.load_state_dict(torch.load(path2weights))
img,tg,_=coco_val[11]
print(img.shape)
print(tg.shape)
show_img_bbox(img,tg)