yolov10模块

- 1 C2f
- 2 C2fCIB
- - 2.1 CIB
  - 2.2 RepVGGDW
- 3 PSA
- 4 SCDown
- 5 v10Detect

论文代码：https://github.com/THU-MIG/yolov10
论文链接：https://arxiv.org/abs/2405.14458

Conv是Conv2d+BN+SiLU
PW是Pointwise Convolution(逐点卷积)
DW是Depthwise Convolution(逐深度卷积)
ch_in是输入通道 ch_out是输出通道
下图所有的C都是根据代码设定以及yolov10n.yaml、yolov10s.yaml、yolov10m.yaml、yolov10b.yaml、yolov10l.yaml、yolov10x.yaml的默认值计算得到

1 C2f

n(n=1)表示n默认为1，C1是ch_in，C2是ch_out
在这里插入图片描述

class C2f(nn.Module):"""Faster Implementation of CSP Bottleneck with 2 convolutions."""def __init__(self, c1, c2, n=1, shortcut=False, g=1, e=0.5):"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,expansion."""super().__init__()self.c = int(c2 * e)  # hidden channelsself.cv1 = Conv(c1, 2 * self.c, 1, 1)self.cv2 = Conv((2 + n) * self.c, c2, 1)  # optional act=FReLU(c2)self.m = nn.ModuleList(Bottleneck(self.c, self.c, shortcut, g, k=((3, 3), (3, 3)), e=1.0) for _ in range(n))def forward(self, x):"""Forward pass through C2f layer."""y = list(self.cv1(x).chunk(2, 1))y.extend(m(y[-1]) for m in self.m)return self.cv2(torch.cat(y, 1))def forward_split(self, x):"""Forward pass using split() instead of chunk()."""y = list(self.cv1(x).split((self.c, self.c), 1))y.extend(m(y[-1]) for m in self.m)return self.cv2(torch.cat(y, 1))

2 C2fCIB

C2fCIB与C2f的区别只是使用CIB代替了Bottleneck
在这里插入图片描述

class C2fCIB(C2f):"""Faster Implementation of CSP Bottleneck with 2 convolutions."""def __init__(self, c1, c2, n=1, shortcut=False, lk=False, g=1, e=0.5):"""Initialize CSP bottleneck layer with two convolutions with arguments ch_in, ch_out, number, shortcut, groups,expansion."""super().__init__(c1, c2, n, shortcut, g, e)self.m = nn.ModuleList(CIB(self.c, self.c, shortcut, e=1.0, lk=lk) for _ in range(n))

2.1 CIB

CIB是中间的这个模块，小模型时(n和s)，CIB是下面的n s，提供更大的感受野，如果是较大的模型(m、b、l、x)时，CIB是上面的m b l x。C1=ch_cin C2=ch_out
在这里插入图片描述

class CIB(nn.Module):"""Standard bottleneck."""def __init__(self, c1, c2, shortcut=True, e=0.5, lk=False):"""Initializes a bottleneck module with given input/output channels, shortcut option, group, kernels, andexpansion."""super().__init__()c_ = int(c2 * e)  # hidden channelsself.cv1 = nn.Sequential(Conv(c1, c1, 3, g=c1),Conv(c1, 2 * c_, 1),Conv(2 * c_, 2 * c_, 3, g=2 * c_) if not lk else RepVGGDW(2 * c_),Conv(2 * c_, c2, 1),Conv(c2, c2, 3, g=c2),)self.add = shortcut and c1 == c2def forward(self, x):"""'forward()' applies the YOLO FPN to input data."""return x + self.cv1(x) if self.add else self.cv1(x)

2.2 RepVGGDW

下图中的卷积都是DW，且都不使用SiLu, ed=ch_in
在这里插入图片描述

class RepVGGDW(torch.nn.Module):def __init__(self, ed) -> None:super().__init__()self.conv = Conv(ed, ed, 7, 1, 3, g=ed, act=False)self.conv1 = Conv(ed, ed, 3, 1, 1, g=ed, act=False)self.dim = edself.act = nn.SiLU()def forward(self, x):return self.act(self.conv(x) + self.conv1(x))def forward_fuse(self, x):return self.act(self.conv(x))@torch.no_grad()def fuse(self):conv = fuse_conv_and_bn(self.conv.conv, self.conv.bn)conv1 = fuse_conv_and_bn(self.conv1.conv, self.conv1.bn)conv_w = conv.weightconv_b = conv.biasconv1_w = conv1.weightconv1_b = conv1.biasconv1_w = torch.nn.functional.pad(conv1_w, [2,2,2,2])final_conv_w = conv_w + conv1_wfinal_conv_b = conv_b + conv1_bconv.weight.data.copy_(final_conv_w)conv.bias.data.copy_(final_conv_b)self.conv = convdel self.conv1

3 PSA

在这里插入图片描述

class PSA(nn.Module):def __init__(self, c1, c2, e=0.5):super().__init__()assert(c1 == c2)self.c = int(c1 * e)self.cv1 = Conv(c1, 2 * self.c, 1, 1)self.cv2 = Conv(2 * self.c, c1, 1)self.attn = Attention(self.c, attn_ratio=0.5, num_heads=self.c // 64)self.ffn = nn.Sequential(Conv(self.c, self.c*2, 1),Conv(self.c*2, self.c, 1, act=False))def forward(self, x):a, b = self.cv1(x).split((self.c, self.c), dim=1)b = b + self.attn(b)b = b + self.ffn(b)return self.cv2(torch.cat((a, b), 1))

4 SCDown

C1=ch_in C2=ch_out，第一个Conv是PW，第二个Conv是DW，且不需要激活函数
在这里插入图片描述

class SCDown(nn.Module):def __init__(self, c1, c2, k, s):super().__init__()self.cv1 = Conv(c1, c2, 1, 1)self.cv2 = Conv(c2, c2, k=k, s=s, g=c2, act=False)def forward(self, x):return self.cv2(self.cv1(x))

5 v10Detect

v10Detect只有分类头上与v8Detect有改变
在这里插入图片描述

class v10Detect(Detect):max_det = -1def __init__(self, nc=80, ch=()):super().__init__(nc, ch)c3 = max(ch[0], min(self.nc, 100))  # channelsself.cv3 = nn.ModuleList(nn.Sequential(nn.Sequential(Conv(x, x, 3, g=x), Conv(x, c3, 1)), \nn.Sequential(Conv(c3, c3, 3, g=c3), Conv(c3, c3, 1)), \nn.Conv2d(c3, self.nc, 1)) for i, x in enumerate(ch))self.one2one_cv2 = copy.deepcopy(self.cv2)self.one2one_cv3 = copy.deepcopy(self.cv3)def forward(self, x):one2one = self.forward_feat([xi.detach() for xi in x], self.one2one_cv2, self.one2one_cv3)if not self.export:one2many = super().forward(x)if not self.training:one2one = self.inference(one2one)if not self.export:return {"one2many": one2many, "one2one": one2one}else:assert(self.max_det != -1)boxes, scores, labels = ops.v10postprocess(one2one.permute(0, 2, 1), self.max_det, self.nc)return torch.cat([boxes, scores.unsqueeze(-1), labels.unsqueeze(-1)], dim=-1)else:return {"one2many": one2many, "one2one": one2one}def bias_init(self):super().bias_init()"""Initialize Detect() biases, WARNING: requires stride availability."""m = self  # self.model[-1]  # Detect() module# cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1# ncf = math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # nominal class frequencyfor a, b, s in zip(m.one2one_cv2, m.one2one_cv3, m.stride):  # froma[-1].bias.data[:] = 1.0  # boxb[-1].bias.data[: m.nc] = math.log(5 / m.nc / (640 / s) ** 2)  # cls (.01 objects, 80 classes, 640 img)