卷积的计算 - numpy的实现 2
flyfish
PyTorch计算
用于验证结果
import torchconv = torch.nn.Conv2d(in_channels=1, out_channels=1, kernel_size=2, stride=1)# 定义一个卷积核
W = torch.arange(1, 5).view(-1, 1, 2, 2).float()# 创建一个二维图像数据(4x4 像素)
image = torch.arange(1, 17).view(-1, 1, 4, 4).float()
print("input:",image)
conv.weight.data = W
conv.bias.data = torch.zeros([1])print("weight:",conv.weight.data)
print("bias:",conv.bias.data )r = conv(image)
print(r)
输出
input: tensor([[[[ 1., 2., 3., 4.],[ 5., 6., 7., 8.],[ 9., 10., 11., 12.],[13., 14., 15., 16.]]]])
weight: tensor([[[[1., 2.],[3., 4.]]]])
bias: tensor([0.])
tensor([[[[ 44., 54., 64.],[ 84., 94., 104.],[124., 134., 144.]]]], grad_fn=<ConvolutionBackward0>)
计算输出大小的公式
简化版
o u t p u t = ( i n p u t + 2 ∗ p a d d i n g − k e r n e l ) / s t r i d e + 1 output= ({{input+ 2*padding - kernel} )/ {stride}}+1 output=(input+2∗padding−kernel)/stride+1
详细版
H o u t = ⌊ H i n + 2 × padding [ 0 ] − dilation [ 0 ] × ( kernel_size [ 0 ] − 1 ) − 1 stride [ 0 ] + 1 ⌋ H_{out} = \left\lfloor\frac{H_{in} + 2 \times \text{padding}[0] - \text{dilation}[0] \times (\text{kernel\_size}[0] - 1) - 1}{\text{stride}[0]} + 1\right\rfloor Hout=⌊stride[0]Hin+2×padding[0]−dilation[0]×(kernel_size[0]−1)−1+1⌋
W o u t = ⌊ W i n + 2 × padding [ 1 ] − dilation [ 1 ] × ( kernel_size [ 1 ] − 1 ) − 1 stride [ 1 ] + 1 ⌋ W_{out} = \left\lfloor\frac{W_{in} + 2 \times \text{padding}[1] - \text{dilation}[1] \times (\text{kernel\_size}[1] - 1) - 1}{\text{stride}[1]} + 1\right\rfloor Wout=⌊stride[1]Win+2×padding[1]−dilation[1]×(kernel_size[1]−1)−1+1⌋
使用纯numpy实现
import numpy as np
# 创建一个二维图像数据(4x4 像素)
image = np.array([[1, 2, 3, 4],[5, 6, 7, 8],[9, 10, 11, 12],[13, 14, 15, 16]], dtype=np.float32)# 定义一个卷积核
kernel = np.array([[1, 2],[3, 4]], dtype=np.float32)image_height, image_width = image.shape
kernel_size = 2def output_size(image_dim, padding, kernel_size, stride):return (image_dim - kernel_size + 2 * padding)/stride + 1out_size = output_size(image_dim=4, kernel_size=2, padding=0, stride=1)
print(out_size)
out_size=int(out_size)
# 初始化输出图像
output_image = np.zeros((out_size, out_size), dtype=np.float32)# 用最普通的方式实现卷积操作,这样能看出来乘法和加法
for i in range(out_size):for j in range(out_size):output_image[i, j] = np.sum(image[i:i+kernel_size, j:j+kernel_size] * kernel)print(output_image)
输出
[[ 44. 54. 64.][ 84. 94. 104.][124. 134. 144.]]