Inception_V2_V3_pytorch
在上一节我们已经精度了Inception_V2_V3这篇论文,本篇我们将用pyorch复现论文中的网络结构!
从论文中我们可以知道InceptionV3的主要改进为:
- 5 * 5卷积分解为2个3 * 3卷积核
- 分解为不对称卷积
- 滤波器组
我们可将GoogLeNetv3分解为以下5个模块:
- InceptionV3模块
- 辅助分类器模块
- 非对称分离式卷积
- 滤波器组
- 用2个3 * 3卷积核代替1个5 * 5 卷积核
接下来我们将在Layers.py中定义Separable_Conv2d,Concat_Separable_Conv2d,Conv2d,Flatten,Squeeze这些类。
实现的是下面这中非对称分离式卷积结构!
实现的是下面滤波器组的结构
Layers.py中代码如下:
import torch
import torch.nn as nn# 非对称分离式卷积
class Separable_Conv2d(nn.Module):def __init__ (self,in_channels,out_channels,kernel_size,stride = 1,padding = 0):super(Separable_Conv2d,self).__init__()self.conv_h = nn.Conv2d(in_channels,in_channels,(kernel_size,1),stride=(stride,1),padding=(padding,0))self.conv_w = nn.Conv2d(in_channels,out_channels,(1,kernel_size),stride=(1,stride),padding=(0,padding))self.bn = nn.BatchNorm2d(out_channels)self.relu = nn.ReLU(inplace=True)def forward(self,x):x = self.conv_h(x)x = self.conv_w(x)x = self.bn(x)x = self.relu(x)return x# 滤波器组
class Concat_Separable_Conv2d(nn.Module):def __init__(self,in_channels,out_channels,kernel_size,stride = 1,padding=0):super(Concat_Separable_Conv2d,self).__init__()self.conv_h = nn.Conv2d(in_channels,in_channels,(kernel_size,1),stride=(stride,1),padding=(padding,0))self.conv_w = nn.Conv2d(in_channels,out_channels,(1,kernel_size),stride=(1,stride),padding=(0,padding))self.bn = nn.BatchNorm2d(out_channels * 2)self.relu = nn.ReLU(inplace=True)def forward(self,x):x_h = self.conv_h(x)x_w = self.conv_w(x)x = torch.cat([x_h,x_w],dim=1)x = self.bn(x)x = self.relu(x)return x# 定义基础的卷积模块添加BN和ReLU
class Conv2d(nn.Module):def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, output=False):super(Conv2d, self).__init__()self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)self.output = outputif self.output == False:self.bn = nn.BatchNorm2d(out_channels)self.relu = nn.ReLU(inplace=True)def forward(self, x):x = self.conv(x)if self.output:return xelse:x = self.bn(x)x = self.relu(x)return xclass Flatten(nn.Module):def __init__(self):super(Flatten, self).__init__()def forward(self, x):return torch.flatten(x, 1)class Squeeze(nn.Module):def __init__(self):super(Squeeze, self).__init__()def forward(self, x):return torch.squeeze(x)
import torch
import torch.nn as nn
from Layers import *
import torch.nn.functional as F
from functools import partialclass Inceptionv3(nn.Module):def __init__(self,input_channels,conv1_channel,conv3_reduce_channel,conv3_channel,conv3_double_reduce_channel,conv3_double_channel,pool_reduce_channel,stride = 1,pool_type = 'AVG',mode = 1):super(Inceptionv3,self).__init__()self.stride = strideif stride == 2:padding_conv3 = 0padding_conv7 = 2else:padding_conv3 = 1padding_conv7 = 3if conv1_channel != 0:self.conv1 = Conv2d(input_channels,conv1_channel,kernel_size = 1)else:self.conv1 = Noneself.conv3_reduce = Conv2d(input_channels,conv3_reduce_channel,kernel_size = 1)match mode:case '1': # 用两个3*3卷积代替一个5*5卷积self.conv3 = Conv2d(conv3_reduce_channel,conv3_channel,kernel_size = 3,stride = stride,padding = padding_conv3)self.conv3_double1 = Conv2d(conv3_double_reduce_channel,conv3_double_channel,kernel_size = 3,padding = 1)self.conv3_double2 = Conv2d(conv3_double_channel,conv3_double_channel,kernel_size = 3,stride=stride,padding = padding_conv3)case '2': # 非对称分离式卷积self.conv3 = Separable_Conv2d(conv3_reduce_channel,conv3_channel,kernel_size = 7,stride = stride,padding = padding_conv7)self.conv3_double1 = Separable_Conv2d(conv3_double_reduce_channel,conv3_double_channel,kernel_size = 7,padding = 3)self.conv3_double2 = Separable_Conv2d(conv3_double_channel,conv3_double_channel,kernel_size = 7,stride=stride,padding = padding_conv7)case '3': # 滤波器组self.conv3 = Concat_Separable_Conv2d(conv3_reduce_channel,conv3_channel,kernel_size = 3,stride = stride,padding = 1)self.conv3_double1 = Conv2d(conv3_double_reduce_channel,conv3_double_channel,kernel_size = 3,padding = 1)self.conv3_double2 = Concat_Separable_Conv2d(conv3_double_channel,conv3_double_channel,kernel_size = 3,stride=stride,padding = 1)self.conv3_double_reduce = Conv2d(input_channels,conv3_double_reduce_channel,kernel_size = 1)if pool_type == 'MAX':self.pool = nn.MaxPool2d(kernel_size = 3,stride = stride,padding = padding_conv3)elif pool_type == 'AVG':self.pool = nn.AvgPool2d(kernel_size = 3,stride = stride,padding = padding_conv3)if pool_reduce_channel != 0:self.pool_reduce = Conv2d(input_channels,pool_reduce_channel,kernel_size = 1)else:self.pool_reduce = Nonedef forward(self,x):output_conv3 = self.conv3(self.conv3_reduce(x))output_conv3_double = self.conv3_double2(self.conv3_double1(self.conv3_double_reduce(x)))if self.pool_reduce != None:output_pool = self.pool_reduce(self.pool(x))else:output_pool = self.pool(x)if self.conv1 != None:output_conv1 = self.conv1(x)outputs = torch.cat([output_conv1,output_conv3,output_conv3_double,output_pool],dim = 1)else:outputs = torch.cat([output_conv3,output_conv3_double,output_pool],dim = 1)return outputs
# 辅助分类器
class InceptionAux(nn.Module):def __init__(self,input_channels,num_classes):super(InceptionAux,self).__init__()self.aux = nn.Sequential(nn.AvgPool2d(kernel_size = 5,stride = 3),Conv2d(input_channels,128,kernel_size = 1),Conv2d(128,1024,kernel_size = 5),Conv2d(1024,num_classes,kernel_size = 1,output = True),Squeeze())def forward(self,x):x = self.aux(x)return x
class GoogLeNetv3(nn.Module):def __init__(self,num_classes,mode = 'train'):super(GoogLeNetv3,self).__init__()self.num_classes = num_classesself.mode = modeself.layers = nn.Sequential(Conv2d(3,32,3,stride=2),Conv2d(32,32,3,stride = 1),Conv2d(32,64,kernel_size = 3,stride=1,padding = 1),nn.MaxPool2d(kernel_size= 3 ,stride = 2),Conv2d(64,80,kernel_size = 3),Conv2d(80,192,kernel_size=3,stride=2),Conv2d(192,288,kernel_size=3,stride=1,padding=1),Inceptionv3(288,64,48,64,64,96,64,mode='1'), #3aInceptionv3(288,64,48,64,64,96,64,mode='1'), #3bInceptionv3(288,0,128,384,64,96,0,stride=2,pool_type='MAX',mode='1'), #3cInceptionv3(768,192,128,192,128,192,192,mode='2'), #4aInceptionv3(768,192,160,192,160,192,192,mode='2'), #4bInceptionv3(768,192,160,192,160,192,192,mode='2'), #4cInceptionv3(768,192,192,192,192,192,192,mode='2'), #4dInceptionv3(768,0,192,320,192,192,0,stride=2,pool_type='MAX',mode='2'), #4eInceptionv3(1280,320,384,384,448,384,192,mode='3'), #5aInceptionv3(2048,320,384,384,448,384,192,pool_type='MAX',mode='3'), #5bnn.AvgPool2d(8,1),Conv2d(2048,num_classes,kernel_size=1,output=True),Squeeze(),)if mode == 'train':self.aux = InceptionAux(768,num_classes)def forward(self,x):for idx,layer in enumerate(self.layers):if(idx == 14 and self.mode == 'train'):aux = self.aux(x)x = layer(x)if self.mode == 'train':return x,auxelse:return xdef init_weights(self,init_mode = 'VGG'):def init_function(m,init_mode):if type(m) == nn.Linear or type(m) == nn.Conv2d:if init_mode == 'VGG':torch.nn.init.normal_(m.weight,mean=0.0,std=0.01)elif init_mode == 'XAVIER':fan_in,fan_out = torch.nn.init._calculate_fan_in_and_fan_out(m.weight)std = (2.0 / float(fan_in + fan_out)) ** 0.5a = (3.0)**0.5 * stdwith torch.no_grad():m.weight.uniform_(-a,a)elif init_mode == 'KAMING':torch.nn.init.kaiming_uniform(m.weight)m.bias.data.fill_(0)_ = self.apply(partial(init_function,init_mode = init_mode))
from torchsummary import summary
net = GoogLeNetv3(1000).cuda()
summary(net, (3, 299, 299))
----------------------------------------------------------------Layer (type) Output Shape Param #
================================================================Conv2d-1 [-1, 32, 149, 149] 896BatchNorm2d-2 [-1, 32, 149, 149] 64ReLU-3 [-1, 32, 149, 149] 0Conv2d-4 [-1, 32, 149, 149] 0Conv2d-5 [-1, 32, 147, 147] 9,248BatchNorm2d-6 [-1, 32, 147, 147] 64ReLU-7 [-1, 32, 147, 147] 0Conv2d-8 [-1, 32, 147, 147] 0
...
Concat_Separable_Conv2d-331 [-1, 768, 8, 8] 0MaxPool2d-332 [-1, 2048, 8, 8] 0Conv2d-333 [-1, 192, 8, 8] 393,408BatchNorm2d-334 [-1, 192, 8, 8] 384ReLU-335 [-1, 192, 8, 8] 0Conv2d-336 [-1, 192, 8, 8] 0Conv2d-337 [-1, 320, 8, 8] 655,680BatchNorm2d-338 [-1, 320, 8, 8] 640ReLU-339 [-1, 320, 8, 8] 0Conv2d-340 [-1, 320, 8, 8] 0Inceptionv3-341 [-1, 2048, 8, 8] 0AvgPool2d-342 [-1, 2048, 1, 1] 0Conv2d-343 [-1, 1000, 1, 1] 2,049,000Conv2d-344 [-1, 1000, 1, 1] 0Squeeze-345 [-1, 1000] 0
================================================================
Total params: 28,850,400
Trainable params: 28,850,400
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.02
Forward/backward pass size (MB): 270.86
Params size (MB): 110.06
Estimated Total Size (MB): 381.94
----------------------------------------------------------------
1, 1000, 1, 1] 2,049,000
Conv2d-344 [-1, 1000, 1, 1] 0
Squeeze-345 [-1, 1000] 0
================================================================
Total params: 28,850,400
Trainable params: 28,850,400
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 1.02
Forward/backward pass size (MB): 270.86
Params size (MB): 110.06
Estimated Total Size (MB): 381.94
----------------------------------------------------------------