SSD300网络结构(pytorch)+多尺度训练与测试

一.SSD300

1.如图是预测框的相应feature map 

这里smin是0.2,表示最底层的scale是0.2;smax是0.9,表示最高层的scale是0.9,m代表产生尺度预测的feature map个数。

其中anchor的长宽关系,s就是上图中的scale,a就是上图中的anchor ratio

2.代码

主要由三部分组成

1.vgg作为基础网络

要注意的是作者对38*38*512进行L2正则化,并用一个可学习参数调节通道权重

2.增加大目标检测网络

3.输出包括预测框的偏移量输出与分类

偏移量计算,神经网络学习偏移量即可。

误检的HEM(hard negative mine)loss函数,用于分类

1.回归量与坐标的转换

def cxcy_to_gcxgcy(cxcy, priors_cxcy):# See https://github.com/weiliu89/caffe/issues/155return torch.cat([(cxcy[:, :2] - priors_cxcy[:, :2]) / (priors_cxcy[:, 2:] / 10),  # g_c_x, g_c_ytorch.log(cxcy[:, 2:] / priors_cxcy[:, 2:]) * 5], 1)  # g_w, g_hdef gcxgcy_to_cxcy(gcxgcy, priors_cxcy):return torch.cat([gcxgcy[:, :2] * priors_cxcy[:, 2:] / 10 + priors_cxcy[:, :2],  # c_x, c_ytorch.exp(gcxgcy[:, 2:] / 5) * priors_cxcy[:, 2:]], 1)  # w, h

2.anchor与gt框匹配示例,保证每个gt至少有一个anchor


#两个gt框 3个anchor 的框分配示例
import torch
objects = 2
overlap = torch.tensor([[0.4, 0.5, 0.6],[0.8, 0.9, 0.7]])
iou_for_each_prior, index_for_each_prior = torch.max(overlap, dim=0)
print(iou_for_each_prior, index_for_each_prior)iou_for_each_box, index_for_each_box = torch.max(overlap, dim=1)
print(iou_for_each_box, index_for_each_box)index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects))
print(index_for_each_prior)

3.gt框与对应anchor框做回归的示例,其中的true_classes是两个样本,每一个样本有3个box框的类别示例,0代表背景


#两个gt框 3个anchor 的框分配示例
import torch
objects = 2
overlap = torch.tensor([[0.4, 0.5, 0.6],[0.8, 0.9, 0.7]])
iou_for_each_prior, index_for_each_prior = torch.max(overlap, dim=0)
print(iou_for_each_prior, index_for_each_prior)iou_for_each_box, index_for_each_box = torch.max(overlap, dim=1)
print(iou_for_each_box, index_for_each_box)index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects))
print(index_for_each_prior)batch_size = 2
true_classes = torch.tensor([[0, 1, 3],#每一个样本3个box框的类别示例,0代表背景[2, 4, 5]])
positive_priors = true_classes != 0
print('=positive_priors:\n', positive_priors)pre_locs = torch.rand((batch_size, 3, 4))
print('==pre_locs[positive_priors].shape:\n', pre_locs[positive_priors].shape)true_locs = torch.rand((batch_size, 3, 4))
print('==true_locs[positive_priors].shape:\n', true_locs[positive_priors].shape)

4.总体代码:

import torch
import os
from torch import nn
import torch.nn.functional as F
import torchvision
from torchvision import models
from utils import decimate, find_jaccard_overlap, cxcy_to_xy, xy_to_cxcy
from utils import cxcy_to_gcxgcy as cx_cy_dxdy
from math import sqrt
# vgg16 = models.vgg16(pretrained=True)
# print(vgg16)
# vgg16_state_dict = vgg16.state_dict()
# print(list(vgg16_state_dict.keys()))
# print(vgg16_state_dict.values())
# for key, value in vgg16.named_parameters():
#     print('key:', key)device = torch.device("cuda" if torch.cuda.is_available() else "cpu")class VGGbase(nn.Module):"""vgg 主干网络"""def __init__(self):super(VGGbase, self).__init__()self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)self.pool4 = nn.MaxPool2d(kernel_size=2, stride=2)self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)self.pool5 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)#为了保证尺寸不在减少self.conv6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=6, dilation=6)#空洞卷积扩大感受野self.conv7 = nn.Conv2d(1024, 1024, kernel_size=1)self.load_pretrained_layers()#载入预训练权重#(BS, 3, 300, 300)def forward(self, image):out = F.relu(self.conv1_1(image))out = F.relu(self.conv1_2(out))out = self.pool1(out)#(B,64, 150, 150)out = F.relu(self.conv2_1(out))out = F.relu(self.conv2_2(out))out = self.pool2(out)  #(B, 128, 75, 75)out = F.relu(self.conv3_1(out))out = F.relu(self.conv3_2(out))out = F.relu(self.conv3_3(out))out = self.pool3(out)  # (B, 256, 38, 38)out = F.relu(self.conv4_1(out))out = F.relu(self.conv4_2(out))out = F.relu(self.conv4_3(out))conv4_3feats = out     # (B, 512, 38, 38)out = self.pool4(out)  # (B, 512, 19, 19)out = F.relu(self.conv5_1(out))out = F.relu(self.conv5_2(out))out = F.relu(self.conv5_3(out))out = self.pool5(out)  # (B, 512, 19, 19)out = F.relu(self.conv6(out))conv7_feats = F.relu(self.conv7(out))# (B, 1024, 19, 19)# print(out.shape)return conv4_3feats, conv7_featsdef load_pretrained_layers(self):state_dict = self.state_dict()param_name = list(state_dict.keys())print('param_name', param_name)pretrained_state_dict = models.vgg16(pretrained=True).state_dict()pretrained_param_name = list(pretrained_state_dict.keys())print('pretrained_param_name', pretrained_param_name)#由于最后两层与原vgg网络相比多出来的,故权重和偏置要点到为止for i, param in enumerate(param_name[:-4]):# print('pretrained_state_dict[pretrained_param_name[i]].shape', pretrained_state_dict[pretrained_param_name[i]].shape)state_dict[param] = pretrained_state_dict[pretrained_param_name[i]]# #最后两层的权重由分类器权重修改而来# print("pretrained_state_dict['classifier.0.weight'].shape",pretrained_state_dict['classifier.0.weight'].shape)conv_fc6_weight = pretrained_state_dict['classifier.0.weight'].reshape(4096, 512, 7, 7)# print('===conv_fc6_weight.dim()==', conv_fc6_weight.dim())state_dict['conv6.weight'] = decimate(conv_fc6_weight, m=[4, None, 3, 3])#(1024, 512, 3, 3)conv_fc6_bias = pretrained_state_dict['classifier.0.bias']#(4096)state_dict['conv6.bias'] = decimate(conv_fc6_bias, m=[4])#(1024)# print(pretrained_state_dict['classifier.3.weight'].shape)# print(pretrained_state_dict['classifier.6.weight'].shape)conv_fc7_weight = pretrained_state_dict['classifier.3.weight'].reshape(4096, 4096, 1, 1)state_dict['conv7.weight'] = decimate(conv_fc7_weight, m=[4, 4, None, None])  # (1024, 1024, 1, 1)conv_fc7_bias = pretrained_state_dict['classifier.3.bias']  # (4096)state_dict['conv7.bias'] = decimate(conv_fc7_bias, m=[4])  # (1024)self.load_state_dict(state_dict)class AuxiliaryConvolutions(nn.Module):"继续在vgg基础上添加conv网络"def __init__(self):super(AuxiliaryConvolutions, self).__init__()#调用父类初始化self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1)self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)self.conv8_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1)self.conv8_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)self.conv9_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1)self.conv9_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)self.conv10_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1)self.conv10_2 = nn.Conv2d(128, 256, kernel_size=3, stride=1)self.conv11_1 = nn.Conv2d(256, 128, kernel_size=1, stride=1)self.conv11_2 = nn.Conv2d(128, 256, kernel_size=3, stride=1)self.init_conv2d()def init_conv2d(self):for c in self.children():if isinstance(c, nn.Conv2d):nn.init.xavier_uniform_(c.weight)# nn.init.kaiming_normal_(c.weight)nn.init.constant_(c.bias, 0)def forward(self, input):out = F.relu(self.conv8_1(input))#(B,1024,19,19)out = F.relu(self.conv8_2(out))  #(B,512,19,19)conv8_2feats = outout = F.relu(self.conv9_1(out))  #(B,512,10,10)out = F.relu(self.conv9_2(out))  ##(B,256,5,5)conv9_2feats = outout = F.relu(self.conv10_1(out))  # (B,128,5,5)out = F.relu(self.conv10_2(out))  ##(B,256,3,3)conv10_2feats = outout = F.relu(self.conv11_1(out))  # (B,128,3,3)out = F.relu(self.conv11_2(out))  ##(B,256,1,1)conv11_2feats = out# print(out.size())return conv8_2feats, conv9_2feats, conv10_2feats, conv11_2featsclass PredictionConvolutions(nn.Module):"""卷积层输出框偏移量与分类"""def __init__(self, n_classes):super(PredictionConvolutions, self).__init__()self.n_classes = n_classesbboxs={'conv4_3': 4,'conv7': 6,'conv8_2': 6,'conv9_2': 6,'conv10_2': 4,'conv11_2': 4}self.loc_conv4_3 = nn.Conv2d(512, bboxs['conv4_3']*4, kernel_size=3, padding=1)self.loc_conv7 = nn.Conv2d(1024, bboxs['conv7'] * 4, kernel_size=3, padding=1)self.loc_conv8_2 = nn.Conv2d(512, bboxs['conv8_2'] * 4, kernel_size=3, padding=1)self.loc_conv9_2 = nn.Conv2d(256, bboxs['conv9_2'] * 4, kernel_size=3, padding=1)self.loc_conv10_2 = nn.Conv2d(256, bboxs['conv10_2'] * 4, kernel_size=3, padding=1)self.loc_conv11_2 = nn.Conv2d(256, bboxs['conv11_2'] * 4, kernel_size=3, padding=1)self.cl_conv4_3 = nn.Conv2d(512, bboxs['conv4_3'] * n_classes, kernel_size=3, padding=1)self.cl_conv7 = nn.Conv2d(1024, bboxs['conv7'] * n_classes, kernel_size=3, padding=1)self.cl_conv8_2 = nn.Conv2d(512, bboxs['conv8_2'] * n_classes, kernel_size=3, padding=1)self.cl_conv9_2 = nn.Conv2d(256, bboxs['conv9_2'] * n_classes, kernel_size=3, padding=1)self.cl_conv10_2 = nn.Conv2d(256, bboxs['conv10_2'] * n_classes, kernel_size=3, padding=1)self.cl_conv11_2 = nn.Conv2d(256, bboxs['conv11_2'] * n_classes, kernel_size=3, padding=1)self.init_conv2d()def init_conv2d(self):for c in self.children():if isinstance(c, nn.Conv2d):nn.init.xavier_uniform_(c.weight)# nn.init.kaiming_normal_(c.weight)nn.init.constant_(c.bias, 0)def forward(self, conv4_3feats,conv7_feats,conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats):batch_size = conv4_3feats.size(0)loc_conv4_3 = self.loc_conv4_3(conv4_3feats)#(N, 4*4, 38, 38)loc_conv4_3 = loc_conv4_3.permute(0, 2, 3, 1)#(N, 38, 38, 4*4)loc_conv4_3 = loc_conv4_3.reshape(batch_size, -1, 4)# print(loc_conv4_3.shape)loc_conv7 = self.loc_conv7(conv7_feats)  # (N, 6*4, 19, 19)loc_conv7 = loc_conv7.permute(0, 2, 3, 1)loc_conv7 = loc_conv7.reshape(batch_size, -1, 4)loc_conv8_2 = self.loc_conv8_2(conv8_2feats)  # (N, 6*4, 10, 10)loc_conv8_2 = loc_conv8_2.permute(0, 2, 3, 1)loc_conv8_2 = loc_conv8_2.reshape(batch_size, -1, 4)loc_conv9_2 = self.loc_conv9_2(conv9_2feats)  # (N, 6*4, 5, 5)loc_conv9_2 = loc_conv9_2.permute(0, 2, 3, 1)loc_conv9_2 = loc_conv9_2.reshape(batch_size, -1, 4)loc_conv10_2 = self.loc_conv10_2(conv10_2feats)  # (N, 4*4, 3, 3)loc_conv10_2 = loc_conv10_2.permute(0, 2, 3, 1)loc_conv10_2 = loc_conv10_2.reshape(batch_size, -1, 4)loc_conv11_2 = self.loc_conv11_2(conv11_2feats)  # (N, 4*4, 1, 1)loc_conv11_2 = loc_conv11_2.permute(0, 2, 3, 1)loc_conv11_2 = loc_conv11_2.reshape(batch_size, -1, 4)cl_conv4_3 = self.cl_conv4_3(conv4_3feats)  # (N, 4*n_classes, 38, 38)cl_conv4_3 = cl_conv4_3.permute(0, 2, 3, 1)cl_conv4_3 = cl_conv4_3.reshape(batch_size, -1, self.n_classes)cl_conv7 = self.cl_conv7(conv7_feats)  # (N, 6*n_classes, 19, 19)cl_conv7 = cl_conv7.permute(0, 2, 3, 1)cl_conv7 = cl_conv7.reshape(batch_size, -1, self.n_classes)cl_conv8_2 = self.cl_conv8_2(conv8_2feats)  # (N, 6*n_classes, 10, 10)cl_conv8_2 = cl_conv8_2.permute(0, 2, 3, 1)cl_conv8_2 = cl_conv8_2.reshape(batch_size, -1, self.n_classes)cl_conv9_2 = self.cl_conv9_2(conv9_2feats)  # (N, 6*n_classes, 5, 5)cl_conv9_2 = cl_conv9_2.permute(0, 2, 3, 1)cl_conv9_2 = cl_conv9_2.reshape(batch_size, -1, self.n_classes)cl_conv10_2 = self.cl_conv10_2(conv10_2feats)  # (N, 4*n_classes, 3, 3)cl_conv10_2 = cl_conv10_2.permute(0, 2, 3, 1)cl_conv10_2 = cl_conv10_2.reshape(batch_size, -1, self.n_classes)cl_conv11_2 = self.cl_conv11_2(conv11_2feats)  # (N, 4*n_classes, 1, 1)cl_conv11_2 = cl_conv11_2.permute(0, 2, 3, 1)cl_conv11_2 = cl_conv11_2.reshape(batch_size, -1, self.n_classes)# return loc_conv4_3, loc_conv7, loc_conv8_2, loc_conv9_2, loc_conv10_2, loc_conv11_2,\#        cl_conv4_3, cl_conv7, cl_conv8_2, cl_conv9_2, cl_conv10_2, cl_conv11_2locs = torch.cat((loc_conv4_3, loc_conv7, loc_conv8_2, loc_conv9_2, loc_conv10_2, loc_conv11_2),dim=1)class_scores = torch.cat((cl_conv4_3, cl_conv7, cl_conv8_2, cl_conv9_2, cl_conv10_2, cl_conv11_2),dim=1)return locs,class_scores#(10, 8732, 4) (10, 8732, 21)class SSD300(nn.Module):def __init__(self, n_classes):super(SSD300, self).__init__()self.n_classes = n_classesself.base_vgg = VGGbase()self.aux_convs = AuxiliaryConvolutions()self.pre_convs = PredictionConvolutions(self.n_classes)#对conv4_3添加每个通道添加可学习参数,并进行L2正则化self.rescale_factors = nn.Parameter(torch.FloatTensor(1, 512, 1, 1))nn.init.constant_(self.rescale_factors, 20)self.create_prior_boxes()def forward(self, input):conv4_3feats, conv7_feats = self.base_vgg(input)#(N,512,38,38) (N,1024,19,19)norm = torch.pow(conv4_3feats, 2).sum(dim=1, keepdim=True).sqrt()#(B, 1, 38, 38)对所有通道的每一行求平方和L2正则 开更号conv4_3feats = conv4_3feats/norm*self.rescale_factorsconv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats = self.aux_convs(conv7_feats)locs, class_scores = self.pre_convs(conv4_3feats, conv7_feats, conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats)return locs, class_scores#(10, 8732, 4) (10, 8732, 21)def create_prior_boxes(self):"""创建SSD300的先验框(cx, cy, w, h)(8372,4)个box"""fmap_size = {'conv4_3': 38, 'conv7': 19, 'conv8_2': 10,'conv9_2': 5, 'conv10_2': 3, 'conv11_2': 1}anchor_scale = {'conv4_3': 0.1, 'conv7': 0.2, 'conv8_2': 0.375,'conv9_2': 0.55, 'conv10_2': 0.725, 'conv11_2': 0.9}anchor_ratio = {'conv4_3': [1, 2, 0.5], 'conv7': [1, 2, 3, 0.5, 0.33], 'conv8_2': [1, 2, 3, 0.5, 0.33],'conv9_2': [1, 2, 3, 0.5, 0.33], 'conv10_2': [1, 2, 0.5], 'conv11_2': [1, 2, 0.5]}prior_boxes = []for index, fmap in enumerate(fmap_size):for i in range(fmap_size[fmap]):for j in range(fmap_size[fmap]):cy, cx = (i + 0.5) / fmap_size[fmap], (j + 0.5) / fmap_size[fmap]for ratio in anchor_ratio[fmap]:prior_boxes.append([cx, cy, anchor_scale[fmap] * sqrt(ratio), anchor_scale[fmap] / sqrt(ratio)])if ratio == 1:  # 添加额外框try:extra_scale = sqrt(anchor_scale[fmap] * anchor_scale[fmap_size[index + 1]])except:extra_scale = 1.prior_boxes.append([cx, cy, extra_scale, extra_scale])# print('len(prior_boxes)',len(prior_boxes))# prior_boxes = [[1,2,3,4],#                [3,4,5,6]]prior_boxes = torch.FloatTensor(prior_boxes).to(device)prior_boxes.clamp_(0, 1)  # 防止越界print('prior_boxes.shape', prior_boxes.shape)# print(prior_boxes)return prior_boxes#(8732, 4)class MultiBoxLoss(nn.Module):"""定位loss和分类loss,其中定位loss采用Hard Negative Mining."""def __init__(self, prior_cxcy, threshold=0.5, neg_pos_ratio=3, alph=1.):super(MultiBoxLoss, self).__init__()self.prior_cxcy = prior_cxcy#(8732,4)self.priors_xy = cxcy_to_xy(prior_cxcy)self.threshold = thresholdself.neg_pos_ratio = neg_pos_ratioself.alph = alphself.smooth_l1 = nn.L1Loss()self.cross_entropy = nn.CrossEntropyLoss(reduce=False)#不计算batch的平均loss因为要用到hard mine模式def forward(self, prediction_locs, prediction_scores, boxes, labels):"""prediction_locs,(N, 8732, 4)prediction_scores,(N, 8732, n_classes)boxes,[[],[[],[]]]labels[[],[]]"""batch_size = prediction_locs.shape[0]#(N,)n_priors = self.prior_cxcy.shape[0]#(8732,)n_classes = prediction_scores.shape[-1]#(n_classes)# print('==batch_size', batch_size)assert batch_size == len(boxes)assert n_priors == prediction_locs.shape[1] == prediction_scores.shape[1]true_locs = torch.zeros((batch_size, n_priors, 4),dtype=torch.float)#(N, 8732, 4)true_classes = torch.zeros((batch_size, n_priors),dtype=torch.long)#(N, 8732)for i in range(batch_size):# print('===boxes[i]', boxes[i])objects = boxes[i].shape[0]   #(objects, 4)  (8732, 4)overlap = find_jaccard_overlap(boxes[i], self.priors_xy)#(objects, 8732)# 每个先验框与gt框的最大IOU 以及索引iou_for_each_prior, index_for_each_prior = overlap.max(dim=0)# 每个gt框与先验框的最大IOU 以及索引iou_for_each_box, index_for_each_box = overlap.max(dim=1)#为了防止没有相应的先验框与gt相交index_for_each_prior[index_for_each_box] = torch.LongTensor(range(objects)).to(device)iou_for_each_prior[index_for_each_box] = 1.label_for_each_prior = labels[i][index_for_each_prior]#得到对应的每个先验框的标签label_for_each_prior[iou_for_each_prior<self.threshold] = 0#将小于阈值的置为背景#依次存储batchsizetrue_classes[i] = label_for_each_priortrue_locs[i] = cx_cy_dxdy(xy_to_cxcy(boxes[i][index_for_each_prior]), self.prior_cxcy)#得到偏移量print('true_classes.dtype',true_classes.dtype)positive_priors = true_classes != 0#batch_size 正样本(N,8732)print('positive_priors.dtype',positive_priors.dtype)print('==positive_priors.shape', positive_priors.shape)print('==positive_priors', positive_priors)loc_loss = self.smooth_l1(prediction_locs[positive_priors], true_locs[positive_priors])n_postives = positive_priors.sum(dim=1)#(N,)n_hard_negatives = self.neg_pos_ratio*n_postives#(N,)confidence_loss_all = self.cross_entropy(prediction_scores.reshape(-1, n_classes), true_classes.reshape(-1))confidence_loss_all = confidence_loss_all.reshape(batch_size, n_priors)print('==confidence_loss_all.shape', confidence_loss_all.shape)confidence_loss_pos = confidence_loss_all[positive_priors]#print('==confidence_loss_pos.shape', confidence_loss_pos.shape)confidence_loss_neg = confidence_loss_all.clone()#(N, 8732)confidence_loss_neg[positive_priors] = 0.#(N, 8732)#把正样本loss清零再去做HEMconfidence_loss_neg, _ = confidence_loss_neg.sort(dim=1, descending=True)#(N,8732)按行从大到小hardness_ranks = torch.LongTensor(range(n_priors)).unsqueeze(0).expand_as(confidence_loss_neg) # (N, 8732)hard_negatives = hardness_ranks < n_hard_negatives.unsqueeze(1)  # (N, 8732)confidence_loss_hard = confidence_loss_all[hard_negatives]# print('==confidence_loss_hard.shape', confidence_loss_hard.shape)confidence_loss = (confidence_loss_pos.sum()+confidence_loss_hard.sum())/n_postives.sum().float()return loc_loss+self.alph*confidence_lossdef test_vgg_base():model = VGGbase()x = torch.rand((10, 3, 300, 300))conv4_3feats, conv7_feats = model(x)print('conv4_3feats.shape:', conv4_3feats.shape)print('conv7_feats.shape:', conv7_feats.shape)
def test_AUx_conv():model = AuxiliaryConvolutions()# (B, 1024, 19, 19)x = torch.rand((10, 1024, 19, 19))conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats = model(x)print('conv8_2feats.shape:', conv8_2feats.shape)print('conv9_2feats.shape:', conv9_2feats.shape)print('conv10_2feats.shape:', conv10_2feats.shape)print('conv11_2feats.shape:', conv11_2feats.shape)def test_pre_conv():n_classes = 21model = PredictionConvolutions(n_classes)conv4_3feats = torch.rand((10, 512, 38, 38))conv7_feats = torch.rand((10, 1024, 19, 19))conv8_2feats = torch.rand((10, 512, 10, 10))conv9_2feats = torch.rand((10, 256, 5, 5))conv10_2feats = torch.rand((10, 256, 3, 3))conv11_2feats = torch.rand((10, 256, 1, 1))locs, class_scores = model(conv4_3feats, conv7_feats, conv8_2feats, conv9_2feats, conv10_2feats, conv11_2feats)# print(loc_conv4_3.shape, loc_conv7.shape, loc_conv8_2.shape, loc_conv9_2.shape,#       loc_conv10_2.shape, loc_conv11_2.shape,\#            cl_conv4_3.shape, cl_conv7.shape, cl_conv8_2.shape, cl_conv9_2.shape,#       cl_conv10_2.shape, cl_conv11_2.shape)print(locs.shape)print(class_scores.shape)
def test_SSD300():os.environ["CUDA_VISIBLE_DEVICES"] = '0'n_classes = 21model = SSD300(n_classes)print('==model', model)x = torch.rand((10, 3, 300, 300))locs, class_scores = model(x)print('locs.shape', locs.shape)print('class_scores.shape', class_scores.shape)def test_mutiboxloss():prior_boxes = create_prior_boxes()loss_model = MultiBoxLoss(prior_boxes)prediction_locs = torch.rand(2, 8732, 4)prediction_scores = torch.rand(2, 8732, 21)boxes = [torch.tensor([[0.1040, 0.1946, 0.9400, 0.9480],[0.3140, 0.0973, 0.5760, 0.3756]]).to(device),torch.tensor([[0.0000, 0.6107, 0.8540, 0.7787]]).to(device)]labels = [torch.tensor([13,  15]).to(device),torch.tensor([4]).to(device)]# boxes = torch.tensor([[[1, 2, 3, 4]],#                       [[7, 8, 9, 10],#                       [4, 5, 6, 7]]])# labels = torch.tensor([[1],#                     [1, 3]])loss_sclar = loss_model(prediction_locs, prediction_scores, boxes, labels)print('==loss_sclar',loss_sclar)def create_prior_boxes():"""创建SSD300的先验框(cx, cy, w, h)(8prediction_locs, prediction_scores, boxes, labels372,4)个box"""os.environ["CUDA_VISIBLE_DEVICES"] = '0'device = torch.device("cuda" if torch.cuda.is_available() else "cpu")from math import sqrtfmap_size = {'conv4_3':38, 'conv7':19, 'conv8_2':10,'conv9_2':5, 'conv10_2':3, 'conv11_2':1}anchor_scale = {'conv4_3':0.1,'conv7':0.2,'conv8_2':0.375,'conv9_2':0.55,'conv10_2':0.725,'conv11_2':0.9}anchor_ratio = {'conv4_3':[1,2,0.5], 'conv7':[1,2,3,0.5,0.33], 'conv8_2':[1,2,3,0.5,0.33],'conv9_2':[1,2,3,0.5,0.33], 'conv10_2':[1,2,0.5], 'conv11_2':[1,2,0.5]}prior_boxes = []for index,fmap in enumerate(fmap_size):for i in range(fmap_size[fmap]):for j in range(fmap_size[fmap]):cy,cx = (i+0.5)/fmap_size[fmap], (j+0.5)/fmap_size[fmap]for ratio in anchor_ratio[fmap]:prior_boxes.append([cx, cy, anchor_scale[fmap]*sqrt(ratio), anchor_scale[fmap]/sqrt(ratio)])if ratio==1:#添加额外框try:extra_scale = sqrt(anchor_scale[fmap]*anchor_scale[fmap_size[index+1]])except:extra_scale = 1.prior_boxes.append([cx, cy, extra_scale, extra_scale])# print('len(prior_boxes)',len(prior_boxes))# prior_boxes = [[1,2,3,4],#                [3,4,5,6]]prior_boxes = torch.FloatTensor(prior_boxes).to(device)prior_boxes.clamp_(0,1)#防止越界print('prior_boxes.shape', prior_boxes.shape)# print(prior_boxes)return prior_boxesdef decimate(tensor, m):"""Decimate a tensor by a factor 'm', i.e. downsample by keeping every 'm'th value.This is used when we convert FC layers to equivalent Convolutional layers, BUT of a smaller size.:param tensor: tensor to be decimated:param m: list of decimation factors for each dimension of the tensor; None if not to be decimated along a dimension:return: decimated tensor"""assert tensor.dim() == len(m)for d in range(tensor.dim()):if m[d] is not None:tensor = tensor.index_select(dim=d,index=torch.arange(start=0, end=tensor.size(d), step=m[d]).long())# print('==tensor.shape:', tensor.shape)return tensor
def test_fc_conv():"""fc (4096,25088)-->conv (1024,512,3,3)"""fc_weight_init = torch.rand(4096, 25088)fc_weight = fc_weight_init.reshape(4096, 512, 7, 7)m = [4, None, 3, 3]conv_weight = decimate(fc_weight, m)print('==conv_weight.shape', conv_weight.shape)def index_select():x = torch.linspace(1, 12, steps=12, requires_grad=True).reshape(3, 4)print('==x', x)print(x.dtype)print(x.data)print(x.data.dtype)# indices = torch.LongTensor([0, 2])# y = torch.index_select(x, 0, indices)  # 对行操作# print('==y', y)## z = torch.index_select(x, 1, indices)  # 对列操作# print('==z', z)## z = torch.index_select(y, 1, indices)  # 对列操作# print('==z', z)if __name__ == '__main__':os.environ["CUDA_VISIBLE_DEVICES"] = '0'# test_vgg_base()# test_AUx_conv()# test_pre_conv()# test_fc_conv()# index_select()# create_prior_boxes()# test_SSD300()test_mutiboxloss()

二.多尺度训练与测试

1.多尺度训练

目的:用不同的尺度去帮助模型适应各种大小的目标,获得对尺寸的鲁棒性。一般是每个batch随机选择一个合适的尺度进行训练即可.

2.多尺度测试

2.1 one-stage 多尺度测试

对单个尺度的结果先进行NMS,在resize成同一个尺度大小在进行一次NMS.先对单个尺度结果进行NMS可以减少推理时间.

2.2 two-stage 多尺度测试

(1) 不同尺度图,通过Backbone+RPN和各自的NMS之后,会得到各自的proposals。再把尺度统一到同一张图的大小上去,然后合并到一起做阈值为0.7的NMS,得到Proposals。

(2) R-CNN阶段依然希望用多尺度,所以需要把proposals分别resize到橙色和绿色的图的尺寸上去,然后各自过R-CNN。后面的步骤与RPN和one stage是一样的,先各自做NMS,然后Resize到统一尺寸后再合并做阈值为0.5的NMS。

参考:
https://mp.weixin.qq.com/s/lBhPjOiT_05WXwxFCXj2mQ

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/news/493196.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

01-08-02【Nhibernate (版本3.3.1.4000) 出入江湖】二级缓存:NHibernate自带的HashtableProvider...

第一步骤&#xff1a;hibernate.cfg.xml文件补上如下配置&#xff1a; <?xml version"1.0" encoding"utf-8"?> <!-- This template was written to work with NHibernate.Test. Copy the template to your NHibernate.Test project folder and…

2018, 自动驾驶异常艰难的一年

编译&#xff1a;张玺 &#xff0c;编辑&#xff1a;宇多田摘要&#xff1a;虽然文章几乎聚焦于美国硅谷的技术公司&#xff0c;但这并不意味着作者提出的种种问题不存在于中国的技术公司身上。有意思的是&#xff0c;作者批评了各大公司此前疯狂立 flag&#xff0c;却最后纷纷…

目标检测矩形框与polygon数据增加--裁剪,拓展,旋转

1.裁剪 import torch from torchvision import transforms import cv2 import numpy as np import types from numpy import random class RandomSampleCrop(object):"""CropArguments:img (Image): the image being input during trainingboxes (Tensor): th…

医生们说,AI不会取代我们!

来源&#xff1a;IEEE电气电子工程师学会每次人工智能在医疗任务中与医生进行竞争&#xff08;对此我们已经报道过很多次&#xff09;时&#xff0c;一个问题不可避免地浮出水面&#xff1a;人工智能会取代医生吗&#xff1f;如果你与AI 专家或硅谷投资者交谈&#xff0c;答案往…

ubuntu安装python3.5+pycharm+anaconda+opencv+docker+nvidia-docker+tensorflow+pytorch+Cmake3.8

一&#xff0c;切换python版本为3.5 装好ubuntu&#xff0c;python版本是2.7的 我自己安装并更改打开为python3.5 sudo apt-get install python3.5 设置优先级和默认环境&#xff1a; sudo update-alternatives --install /usr/bin/python python /usr/bin/python2 100 su…

学界 | 量化深度强化学习算法的泛化能力

来源&#xff1a;AI 科技评论OpenAI 近期发布了一个新的训练环境 CoinRun&#xff0c;它提供了一个度量智能体将其学习经验活学活用到新情况的能力指标&#xff0c;而且还可以解决一项长期存在于强化学习中的疑难问题——即使是广受赞誉的强化算法在训练过程中也总是没有运用监…

《科学》评出2018年度十大科学突破事件

来源&#xff1a;科学大院《科学》杂志每年会评出在即将过去的一年里最为重要的十大科学突破&#xff08;Science Breakthrough&#xff09;。今年&#xff0c;夺得年度突破桂冠的是“单细胞水平细胞谱系追踪技术”&#xff0c;帮助破获多起悬案的法医系谱技术、#MeToo 运动等也…

递归理解以及时间复杂度计算

一.复杂度分析&#xff1a; 可以理解为递归的深度就是空间复杂度&#xff0c;时间复杂度就是O(T*depth),其中&#xff34;是每个递归函数的时间复杂度&#xff0c;depth是递归深度&#xff0e; #空间复杂度O(1) def sum1_(n):res 0for i in range(n1):resireturn res#递归 空…

性价比高出英特尔45%,亚马逊的云服务器芯片如何做到?| 解读

来源&#xff1a;TheNextPlatform编译&#xff1a;机器之能 张玺摘要&#xff1a;到目前为止&#xff0c;亚马逊和其他大型云运营商几乎全部使用英特尔的 Xeon 芯片。虽然在服务器芯片市场&#xff0c;英特尔市场占有率非常高&#xff0c;但亚马逊正使用折扣策略来赢得客户。亚…

GIOU loss+DIOU loss+CIOU loss

一.IOU 1.GIOU解决没有交集的框,IOU为0,其损失函数导数为0,无法优化的问题。 图1 GIOU,IOU,l2范数差异 a)可看出 l2值一样,IOU值是不一样的,说明L1,L2这些Loss用于回归任务时&#xff0c;不能等价于最后用于评测检测的IoU. b)可看出当框有包含关系,GIOU就退化为IOU 其是找…

《科学》十大年度科学突破反映的新动向

来源&#xff1a;新华网摘要&#xff1a;从测定分子结构到宇宙探索&#xff0c;从发现远古动物到揭示细胞的秘密&#xff0c;美国权威学术刊物《科学》杂志评选的2018年十大科学突破&#xff0c;在时间和空间尺度上拓宽着人类认知的边界&#xff0c;也反映了近年来科学发展的三…

ctpn论文阅读与代码

代码地址: https://github.com/zonghaofan/ctpn_torch 1.通用的目标检测是封闭的,而文字是封闭且连续 2. 构造一系列宽度相等的小文本,回归中心y坐标和高度 3. 对于边界回归x坐标,在进一次修正 4.整个模型就是backbone提取特征,将每个像素点的相邻3*3像素拉成行向量,利用空间…

yum配置与使用

yum配置与使用(很详细) yum的配置一般有两种方式&#xff0c;一种是直接配置/etc目录下的yum.conf文件&#xff0c;另外一种是在/etc/yum.repos.d目录下增加.repo文件。一、yum的配置文件$ cat /etc/yum.conf [main]cachedir/var/cache/yum #yum下载的RPM包的缓存目录k…

新技术不断涌现,下一代云计算的突破口在哪里?

来源&#xff1a;日知录技术社区这是一个IT技术飞速发展的时代&#xff0c;在硬件基础设施的不断升级以及虚拟化网络等技术的日益成熟下&#xff0c;云厂商也正面临着各种新技术带来的巨大挑战。从数据中心的基础建设到云平台的系统构建再到产品底层的技术改革&#xff0c;该如…

生成高斯热力图(craft中有使用)+2d heatmap+3d heatmap

一.生成高斯热力图 from math import exp import numpy as np import cv2 import osclass GaussianTransformer(object):def __init__(self, imgSize512, region_threshold0.4,affinity_threshold0.2):distanceRatio 3.34scaledGaussian lambda x: exp(-(1 / 2) * (x ** 2))…

POP动画[1]

POP动画[1] pop动画是facebook扩展CoreAnimation的,使用及其方便:) 1:Spring系列的弹簧效果(两个动画kPOPLayerBounds与kPOPLayerCornerRadius同时运行) #import "RootViewController.h" #import "YXEasing.h" #import "POP.h" #import "YX…

远比5G发展凶猛!物联网2018白皮书,国内规模已达1.2万亿

来源&#xff1a;智东西摘要&#xff1a;研判物联网的技术产业进展情况&#xff0c;梳理消费物联网、智慧城市物联网、生产性物联网三类物联网应用现状及驱动因素 。在供给侧和需求侧的双重推动下&#xff0c;物联网进入以基础性行业和规模消费为代表的第三次发展浪潮。 5G、 低…

收缩分割多边形(PSENet中有使用)

目的:为了解决密集文本的分割问题 代码: # -*- codingutf-8 -*- import os import cv2 import Polygon as plg import pyclipper import numpy as npdef dist(a, b):return np.sqrt(np.sum((a - b) ** 2))#计算周长 def perimeter(bbox):peri 0.0for i in range(bbox.shape[…

Android 3D emulation 架构理解

Android Emulator 给用户提供 GPU on 选项&#xff0c;意思是利用 Host ( 就是执行 Emulator 的PC机) 的 GPU. 当然PC机必须把 OpenGL 的驱动装好 在实现上就是把 libGLESv1_CM.so libGLESv2.so 替换掉&#xff0c;当system调用 gl的函数的时候&#xff0c;把调用打包为strea…

年度回顾:2018年的人工智能/机器学习惊喜及预测19年的走势

来源&#xff1a;网络大数据考虑到技术变革的速度&#xff0c;我认为让专业IT人士分享他们对2018年最大惊喜及2019年预测的看法会很有趣。以下是他们对人工智能(AI)&#xff0c;机器学习( ML)和其他数据科学迭代的看法&#xff1a;CLARA分析公司首席执行官兼创始人&#xff1a;…