YOLOv5之Common.py

文章目录

    • 1.学习目的
    • 2.网络模型![在这里插入图片描述](https://i-blog.csdnimg.cn/direct/67b8dbd00c9b4034ba370fc8b8a6031a.jpeg)
    • 3.common.py分析

1.学习目的

YOLOv5中最关键一个模型类

2.网络模型在这里插入图片描述

在这里插入图片描述

3.common.py分析

# Ultralytics YOLOv5 🚀, AGPL-3.0 license
"""Common modules."""import ast
import contextlib
import json
import math
import platform
import warnings
import zipfile
from collections import OrderedDict, namedtuple
from copy import copy
from pathlib import Path
from urllib.parse import urlparseimport cv2
import numpy as np
import pandas as pd
import requests
import torch
import torch.nn as nn
from PIL import Image
from torch.cuda import amp# Import 'ultralytics' package or install if missing
try:import ultralyticsassert hasattr(ultralytics, "__version__")  # verify package is not directory
except (ImportError, AssertionError):import osos.system("pip install -U ultralytics")import ultralyticsfrom ultralytics.utils.plotting import Annotator, colors, save_one_boxfrom utils import TryExcept
from utils.dataloaders import exif_transpose, letterbox
from utils.general import (LOGGER,ROOT,Profile,check_requirements,check_suffix,check_version,colorstr,increment_path,is_jupyter,make_divisible,non_max_suppression,scale_boxes,xywh2xyxy,xyxy2xywh,yaml_load,
)
from utils.torch_utils import copy_attr, smart_inference_mode# 实现合适的p,使得输出形状和输入一致
def autopad(k, p=None, d=1):"""Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.`k`: kernel, `p`: padding, `d`: dilation."""if d > 1:# 卷积核有可能是标量,也可能是列表,当d>1的时候就会调整卷积核尺寸k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-sizeif p is None:# 如果p没有赋值,那么就将p赋值为他的一半并取整p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-padreturn p# 卷积类,继承自父类nn.Module
class Conv(nn.Module):# 卷积 归一化 激活函数"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""# 这里默认激活函数为SiLU函数default_act = nn.SiLU()  # default activation# 初始化操作def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):"""Initializes a standard convolution layer with optional batch normalization and activation."""super().__init__()self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p, d), groups=g, dilation=d, bias=False)self.bn = nn.BatchNorm2d(c2)# 激活函数的配置,默认是SiLu函数,但是如果是别的也行,再就是保持不变,给的啥就是啥self.act = self.default_act if act is True else act if isinstance(act, nn.Module) else nn.Identity()# 前向传播函数def forward(self, x):"""Applies a convolution followed by batch normalization and an activation function to the input tensor `x`."""# 卷积--批量归一化--激活return self.act(self.bn(self.conv(x)))# 融合卷积,不经过BN层直接激活def forward_fuse(self, x):"""Applies a fused convolution and activation function to the input tensor `x`."""return self.act(self.conv(x))# DW卷积 一个纵深的卷积层
class DWConv(Conv):"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""def __init__(self, c1, c2, k=1, s=1, d=1, act=True):"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), outputchannels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act)."""super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)# DW卷积实现上采样
class DWConvTranspose2d(nn.ConvTranspose2d):"""A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):"""Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels(c2), kernel size (k), stride (s), input padding (p1), output padding (p2)."""super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))# 带有多头注意力机制的Transformer层
class TransformerLayer(nn.Module):"""Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""def __init__(self, c, num_heads):"""Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.See  as described in https://arxiv.org/abs/2010.11929."""super().__init__()# 配置q,k,v,ma,fc1,fc2初始值 注意赋两个值self.q = nn.Linear(c, c, bias=False)self.k = nn.Linear(c, c, bias=False)self.v = nn.Linear(c, c, bias=False)self.ma = nn.MultiheadAttention(embed_dim=c, num_heads=num_heads)self.fc1 = nn.Linear(c, c, bias=False)self.fc2 = nn.Linear(c, c, bias=False)# 前向传播函数def forward(self, x):"""Performs forward pass using MultiheadAttention and two linear transformations with residual connections."""# 多头注意力模块配置,由三个线性层构成x = self.ma(self.q(x), self.k(x), self.v(x))[0] + x# 经过多头注意力模块后,在经过两层线性层x = self.fc2(self.fc1(x)) + xreturn x# 转换模块,肩负视觉任务,位置嵌入以及转换层
class TransformerBlock(nn.Module):"""A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""def __init__(self, c1, c2, num_heads, num_layers):"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specifiedlayers."""super().__init__()self.conv = Noneif c1 != c2:self.conv = Conv(c1, c2)#  可以学习的位置嵌入self.linear = nn.Linear(c2, c2)  # learnable position embedding# 序列化操作-===将多个层连接起来self.tr = nn.Sequential(*(TransformerLayer(c2, num_heads) for _ in range(num_layers)))self.c2 = c2# 前向传播 现将每个通道展平与原始通道进行叠加,然后改变通道维度def forward(self, x):"""Processes input through an optional convolution, followed by Transformer layers and position embeddings forobject detection."""if self.conv is not None:x = self.conv(x)b, _, w, h = x.shape# 从第三个维度开始展平  轻量化的操作p = x.flatten(2).permute(2, 0, 1)# 将展平后张量送到神经网络中计算,得到的结果再次转换维度并reshapereturn self.tr(p + self.linear(p)).permute(1, 2, 0).reshape(b, self.c2, w, h)# Bottleneck类 瓶颈类 核心类
class Bottleneck(nn.Module):# 特色:随机裁剪+分组卷积 == 特征提取"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""# 初始化函数def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):# 初始化一个标准的颈部层,带有随机裁剪以及分组卷积,支持通道拓展"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channelexpansion."""super().__init__()# 定义隐藏层通道数 将c2通道数减半c_ = int(c2 * e)  # hidden channels# 定义两个卷积self.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_, c2, 3, 1, g=g)# 叠加标志,当c1与c2相同时就为Trueself.add = shortcut and c1 == c2# 前向传播函数def forward(self, x):"""Processes input through two convolutions, optionally adds shortcut if channel dimensions match; input is atensor."""# 如果叠加标志为True,则将输入域两次卷积后的输出进行叠加,否则只需要两次卷积后的输出return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# 颈部CSP网络 ==== 特征提取带有交叉空间连接以及随机裁剪
class BottleneckCSP(nn.Module):"""CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):# 采用随机裁剪初始化CSP瓶颈网络,参数:输入通道数,输出通道数,模块重复个数,剪切标志,分组,拓展标志"""Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,groups, expansion."""super().__init__()# 获取隐藏层输入通道数c_ = int(c2 * e)  # hidden channels# 卷积1self.cv1 = Conv(c1, c_, 1, 1)# 卷积2self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)# 卷积3self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)# 卷积4self.cv4 = Conv(2 * c_, c2, 1, 1)# 归一化层self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)# 激活函数层self.act = nn.SiLU()# 序列化self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))# 前向传播函数 通过运用这些网络层,激活函数,专注于输入x,返回特征增强的输出def forward(self, x):"""Performs forward pass by applying layers, activation, and concatenation on input x, returning feature-enhanced output."""# 先将x进行1*1卷积,然后送入搭建好的神经网络,最后再进行一次卷积,得到通道数减半的输出层y1 = self.cv3(self.m(self.cv1(x)))# 直接将输入进行卷积,得到通道数减半的卷积y2 = self.cv2(x)# 将y1,y2在第二个维度上进行连接,得到通道数叠加的特征图,然后进行归一化处理,最后用SiLu函数激活,再进行最后一次卷积,恢复通道数为c2return self.cv4(self.act(self.bn(torch.cat((y1, y2), 1))))# 交叉卷积类 下采样,拓展,随机裁剪
class CrossConv(nn.Module):"""Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""# 初始化函数def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):"""Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` outputchannels.Inputs are ch_in, ch_out, kernel, stride, groups, expansion, shortcut."""# 调用父类进行初始化super().__init__()# 获取隐藏层的输入通道数,由c2减半而来c_ = int(c2 * e)  # hidden channels# 卷积1,卷积核大小改变,默认(1,3),步长(1,1)self.cv1 = Conv(c1, c_, (1, k), (1, s))# 卷积2,默认卷积核大小为(3,1),步长(1,1)self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)# 裁剪标志位 只有调用时c1=c2,才会使裁剪标志位值置1self.add = shortcut and c1 == c2def forward(self, x):"""Performs feature sampling, expanding, and applies shortcut if channels match; expects `x` input tensor."""# 根据标志位不同进行不同的操作return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))# C3模块类
class C3(nn.Module):# 用3个卷积层搭建一个CSP瓶颈网络来增强特征提取"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""# 初始化函数def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, groupconvolutions, and expansion."""super().__init__()c_ = int(c2 * e)  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c1, c_, 1, 1)self.cv3 = Conv(2 * c_, c2, 1)  # optional act=FReLU(c2)self.m = nn.Sequential(*(Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)))def forward(self, x):"""Performs forward propagation using concatenated outputs from two convolutions and a Bottleneck sequence."""return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), 1))# C3x模块类 继承了带有交叉卷积的C3模块
class C3x(C3):"""Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,and expansion."""# 调用父类方法进行子类参数初始化super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = nn.Sequential(*(CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)))#C3TR模块类
class C3TR(C3):"""C3 module with TransformerBlock for enhanced feature extraction in object detection models."""# 初始化函数 == 用Transformer模块来初始化C3模块def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcutconfig, group, and expansion."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)self.m = TransformerBlock(c_, c_, 4, n)# C3SPP模块===继承C3模块,重写了一个SPP层,以此来进行增强空间特征提取以及通道定制(可控)
class C3SPP(C3):"""Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernelsizes, shortcut, group, and expansion ratio."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)# 调用下方SPP模块self.m = SPP(c_, c_, k)# C3Ghost模块类 == 实现了一个配置有Ghost瓶颈网络的C3模块===增强特征提取
class C3Ghost(C3):"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""super().__init__(c1, c2, n, shortcut, g, e)c_ = int(c2 * e)  # hidden channelsself.m = nn.Sequential(*(GhostBottleneck(c_, c_) for _ in range(n)))# SPP模块类
class SPP(nn.Module):"""Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""# 初始化函数,其中k是一个三个元素构成的元组,后期用来进行三次最大池化操作def __init__(self, c1, c2, k=(5, 9, 13)):"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input channels), c2 (output channels), k (kernel sizes)."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])def forward(self, x):"""Applies convolution and max pooling layers to the input tensor `x`, concatenates results, and returns outputtensor."""x = self.cv1(x)with warnings.catch_warnings():warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warningreturn self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))# SPPF层===快速空间金字塔池化层
class SPPF(nn.Module):"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""# 初始化SPPF层用给定的通道和卷积核def __init__(self, c1, c2, k=5):"""Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution andmax pooling.Equivalent to SPP(k=(5, 9, 13))."""super().__init__()c_ = c1 // 2  # hidden channelsself.cv1 = Conv(c1, c_, 1, 1)self.cv2 = Conv(c_ * 4, c2, 1, 1)# 最大池化self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2)# 前向传播函数===通过一系列卷积和最大池化操作进行特征提取def forward(self, x):"""Processes input through a series of convolutions and max pooling operations for feature extraction."""# 对输入进行卷积操作x = self.cv1(x)# 捕获异常信息with warnings.catch_warnings():# 抑制一些异常信息warnings.simplefilter("ignore")  # suppress torch 1.9.0 max_pool2d() warning# 对输入进行最大池化操作y1 = self.m(x)# 对上面结果再次池化y2 = self.m(y1)# 对x,y1,y2,再次池化的y2进行通道维度连接,然后再次进行池化return self.cv2(torch.cat((x, y1, y2, self.m(y2)), 1))# Focus类 使用切片与卷积操作来专注于空间信息转化为通道空间
class Focus(nn.Module):"""Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""# 初始化函数 初始化Focus模块从专注于宽高信息到通道空间def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):"""Initializes Focus module to concentrate width-height info into channel space with configurable convolutionparameters."""super().__init__()# 卷积操作self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)# self.contract = Contract(gain=2)#前向传播函数def forward(self, x):"""Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""# todo 在通道上面进行切片操作,最终导致宽度与高度减半 ==== 改进点return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))# return self.conv(self.contract(x))# Ghost卷积
class GhostConv(nn.Module):"""Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""#初始化函数def __init__(self, c1, c2, k=1, s=1, g=1, act=True):"""Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channelsfor efficiency."""super().__init__()c_ = c2 // 2  # hidden channelsself.cv1 = Conv(c1, c_, k, s, None, g, act=act)self.cv2 = Conv(c_, c_, 5, 1, None, c_, act=act)def forward(self, x):"""Performs forward pass, concatenating outputs of two convolutions on input `x`: shape (B,C,H,W)."""y = self.cv1(x)# 将输出和输出的卷积进行连接return torch.cat((y, self.cv2(y)), 1)#GhostBottleneck类
class GhostBottleneck(nn.Module):"""Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""def __init__(self, c1, c2, k=3, s=1):"""Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet."""super().__init__()c_ = c2 // 2# 初始化一个由多种卷积组成的卷积self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pwDWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dwGhostConv(c_, c2, 1, 1, act=False),)  # pw-linear# 初始化一个由多个卷积组成的网络序列构成剪切属性self.shortcut = (nn.Sequential(DWConv(c1, c1, k, s, act=False), Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity())def forward(self, x):"""Processes input through conv and shortcut layers, returning their summed output."""# 构成该网络的模型return self.conv(x) + self.shortcut(x)# Contract模块类 === 降维类
class Contract(nn.Module):"""Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""def __init__(self, gain=2):"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape(1,64,80,80) to (1,256,40,40)."""super().__init__()# 增益初始化self.gain = gain# 前向传播函数def forward(self, x):"""Processes input tensor to expand channel dimensions by contracting spatial dimensions, yielding output shape`(b, c*s*s, h//s, w//s)`."""# 获取张量尺寸b, c, h, w = x.size()  # assert (h / s == 0) and (W / s == 0), 'Indivisible gain'# 将gain->strides = self.gain# 进行reshape操作x = x.view(b, c, h // s, s, w // s, s)  # x(1,64,40,2,40,2)#再次将张量进行维度调换,并且将内存进行连续操作x = x.permute(0, 3, 5, 1, 2, 4).contiguous()  # x(1,2,2,64,40,40)# 返回维度被转换的张量return x.view(b, c * s * s, h // s, w // s)  # x(1,256,40,40)# 拓展类==拓展空间维度通过重新分配通道数---reshape操作
class Expand(nn.Module):"""Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""# 初始化函数def __init__(self, gain=2):"""Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional gainfactor.Example: x(1,64,80,80) to x(1,16,160,160)."""super().__init__()self.gain = gaindef forward(self, x):"""Processes input tensor x to expand spatial dimensions by redistributing channels, requiring C / gain^2 ==0."""b, c, h, w = x.size()  # assert C / s ** 2 == 0, 'Indivisible gain's = self.gainx = x.view(b, s, s, c // s**2, h, w)  # x(1,2,2,16,80,80)x = x.permute(0, 3, 4, 1, 5, 2).contiguous()  # x(1,16,80,2,80,2)return x.view(b, c // s**2, h * s, w * s)  # x(1,16,160,160)# Concat类  指定通道维度进行连接
class Concat(nn.Module):"""Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""def __init__(self, dimension=1):"""Initializes a Concat module to concatenate tensors along a specified dimension."""super().__init__()# 维度初始化self.d = dimensiondef forward(self, x):"""Concatenates a list of tensors along a specified dimension; `x` is a list of tensors, `dimension` is anint."""# 张量在指定维度上进行连接return torch.cat(x, self.d)# 检测多个后端模型类
class DetectMultiBackend(nn.Module):"""YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""#   PyTorch:              weights = *.pt#   TorchScript:                    *.torchscript#   ONNX Runtime:                   *.onnx#   ONNX OpenCV DNN:                *.onnx --dnn#   OpenVINO:                       *_openvino_model#   CoreML:                         *.mlpackage#   TensorRT:                       *.engine#   TensorFlow SavedModel:          *_saved_model#   TensorFlow GraphDef:            *.pb#   TensorFlow Lite:                *.tflite#   TensorFlow Edge TPU:            *_edgetpu.tflite#   PaddlePaddle:                   *_paddle_modelfrom models.experimental import attempt_download, attempt_load  # scoped to avoid circular importsuper().__init__()# 获取字符串类型的权重文件名w = str(weights[0] if isinstance(weights, list) else weights)pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle, triton = self._model_type(w)fp16 &= pt or jit or onnx or engine or triton  # FP16nhwc = coreml or saved_model or pb or tflite or edgetpu  # BHWC formats (vs torch BCWH)stride = 32  # default stride# 判断cuda是否可用cuda = torch.cuda.is_available() and device.type != "cpu"  # use CUDAif not (pt or triton):w = attempt_download(w)  # download if not local#如果选择的pytorch模型则进入下面的程序if pt:  # PyTorchmodel = attempt_load(weights if isinstance(weights, list) else w, device=device, inplace=True, fuse=fuse)# 获取模型步长最大值stride = max(int(model.stride.max()), 32)  # model stride# 获取模型名字names = model.module.names if hasattr(model, "module") else model.names  # get class names# 将模型数据类型降级model.half() if fp16 else model.float()self.model = model  # explicitly assign for to(), cpu(), cuda(), half()elif jit:  # TorchScriptLOGGER.info(f"Loading {w} for TorchScript inference...")extra_files = {"config.txt": ""}  # model metadatamodel = torch.jit.load(w, _extra_files=extra_files, map_location=device)model.half() if fp16 else model.float()if extra_files["config.txt"]:  # load metadata dictd = json.loads(extra_files["config.txt"],object_hook=lambda d: {int(k) if k.isdigit() else k: v for k, v in d.items()},)stride, names = int(d["stride"]), d["names"]elif dnn:  # ONNX OpenCV DNNLOGGER.info(f"Loading {w} for ONNX OpenCV DNN inference...")check_requirements("opencv-python>=4.5.4")net = cv2.dnn.readNetFromONNX(w)elif onnx:  # ONNX RuntimeLOGGER.info(f"Loading {w} for ONNX Runtime inference...")check_requirements(("onnx", "onnxruntime-gpu" if cuda else "onnxruntime"))import onnxruntimeproviders = ["CUDAExecutionProvider", "CPUExecutionProvider"] if cuda else ["CPUExecutionProvider"]session = onnxruntime.InferenceSession(w, providers=providers)output_names = [x.name for x in session.get_outputs()]meta = session.get_modelmeta().custom_metadata_map  # metadataif "stride" in meta:stride, names = int(meta["stride"]), eval(meta["names"])elif xml:  # OpenVINOLOGGER.info(f"Loading {w} for OpenVINO inference...")check_requirements("openvino>=2023.0")  # requires openvino-dev: https://pypi.org/project/openvino-dev/from openvino.runtime import Core, Layout, get_batchcore = Core()if not Path(w).is_file():  # if not *.xmlw = next(Path(w).glob("*.xml"))  # get *.xml file from *_openvino_model dirov_model = core.read_model(model=w, weights=Path(w).with_suffix(".bin"))if ov_model.get_parameters()[0].get_layout().empty:ov_model.get_parameters()[0].set_layout(Layout("NCHW"))batch_dim = get_batch(ov_model)if batch_dim.is_static:batch_size = batch_dim.get_length()ov_compiled_model = core.compile_model(ov_model, device_name="AUTO")  # AUTO selects best available devicestride, names = self._load_metadata(Path(w).with_suffix(".yaml"))  # load metadataelif engine:  # TensorRTLOGGER.info(f"Loading {w} for TensorRT inference...")import tensorrt as trt  # https://developer.nvidia.com/nvidia-tensorrt-downloadcheck_version(trt.__version__, "7.0.0", hard=True)  # require tensorrt>=7.0.0if device.type == "cpu":device = torch.device("cuda:0")Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))logger = trt.Logger(trt.Logger.INFO)with open(w, "rb") as f, trt.Runtime(logger) as runtime:model = runtime.deserialize_cuda_engine(f.read())context = model.create_execution_context()bindings = OrderedDict()output_names = []fp16 = False  # default updated belowdynamic = Falseis_trt10 = not hasattr(model, "num_bindings")num = range(model.num_io_tensors) if is_trt10 else range(model.num_bindings)for i in num:if is_trt10:name = model.get_tensor_name(i)dtype = trt.nptype(model.get_tensor_dtype(name))is_input = model.get_tensor_mode(name) == trt.TensorIOMode.INPUTif is_input:if -1 in tuple(model.get_tensor_shape(name)):  # dynamicdynamic = Truecontext.set_input_shape(name, tuple(model.get_profile_shape(name, 0)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_tensor_shape(name))else:name = model.get_binding_name(i)dtype = trt.nptype(model.get_binding_dtype(i))if model.binding_is_input(i):if -1 in tuple(model.get_binding_shape(i)):  # dynamicdynamic = Truecontext.set_binding_shape(i, tuple(model.get_profile_shape(0, i)[2]))if dtype == np.float16:fp16 = Trueelse:  # outputoutput_names.append(name)shape = tuple(context.get_binding_shape(i))im = torch.from_numpy(np.empty(shape, dtype=dtype)).to(device)bindings[name] = Binding(name, dtype, shape, im, int(im.data_ptr()))binding_addrs = OrderedDict((n, d.ptr) for n, d in bindings.items())batch_size = bindings["images"].shape[0]  # if dynamic, this is instead max batch sizeelif coreml:  # CoreMLLOGGER.info(f"Loading {w} for CoreML inference...")import coremltools as ctmodel = ct.models.MLModel(w)elif saved_model:  # TF SavedModelLOGGER.info(f"Loading {w} for TensorFlow SavedModel inference...")import tensorflow as tfkeras = False  # assume TF1 saved_modelmodel = tf.keras.models.load_model(w) if keras else tf.saved_model.load(w)elif pb:  # GraphDef https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxtLOGGER.info(f"Loading {w} for TensorFlow GraphDef inference...")import tensorflow as tfdef wrap_frozen_graph(gd, inputs, outputs):"""Wraps a TensorFlow GraphDef for inference, returning a pruned function."""x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrappedge = x.graph.as_graph_elementreturn x.prune(tf.nest.map_structure(ge, inputs), tf.nest.map_structure(ge, outputs))def gd_outputs(gd):"""Generates a sorted list of graph outputs excluding NoOp nodes and inputs, formatted as '<name>:0'."""name_list, input_list = [], []for node in gd.node:  # tensorflow.core.framework.node_def_pb2.NodeDefname_list.append(node.name)input_list.extend(node.input)return sorted(f"{x}:0" for x in list(set(name_list) - set(input_list)) if not x.startswith("NoOp"))gd = tf.Graph().as_graph_def()  # TF GraphDefwith open(w, "rb") as f:gd.ParseFromString(f.read())frozen_func = wrap_frozen_graph(gd, inputs="x:0", outputs=gd_outputs(gd))elif tflite or edgetpu:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_pythontry:  # https://coral.ai/docs/edgetpu/tflite-python/#update-existing-tf-lite-code-for-the-edge-tpufrom tflite_runtime.interpreter import Interpreter, load_delegateexcept ImportError:import tensorflow as tfInterpreter, load_delegate = (tf.lite.Interpreter,tf.lite.experimental.load_delegate,)if edgetpu:  # TF Edge TPU https://coral.ai/software/#edgetpu-runtimeLOGGER.info(f"Loading {w} for TensorFlow Lite Edge TPU inference...")delegate = {"Linux": "libedgetpu.so.1", "Darwin": "libedgetpu.1.dylib", "Windows": "edgetpu.dll"}[platform.system()]interpreter = Interpreter(model_path=w, experimental_delegates=[load_delegate(delegate)])else:  # TFLiteLOGGER.info(f"Loading {w} for TensorFlow Lite inference...")interpreter = Interpreter(model_path=w)  # load TFLite modelinterpreter.allocate_tensors()  # allocateinput_details = interpreter.get_input_details()  # inputsoutput_details = interpreter.get_output_details()  # outputs# load metadatawith contextlib.suppress(zipfile.BadZipFile):with zipfile.ZipFile(w, "r") as model:meta_file = model.namelist()[0]meta = ast.literal_eval(model.read(meta_file).decode("utf-8"))stride, names = int(meta["stride"]), meta["names"]elif tfjs:  # TF.jsraise NotImplementedError("ERROR: YOLOv5 TF.js inference is not supported")elif paddle:  # PaddlePaddleLOGGER.info(f"Loading {w} for PaddlePaddle inference...")check_requirements("paddlepaddle-gpu" if cuda else "paddlepaddle")import paddle.inference as pdiif not Path(w).is_file():  # if not *.pdmodelw = next(Path(w).rglob("*.pdmodel"))  # get *.pdmodel file from *_paddle_model dirweights = Path(w).with_suffix(".pdiparams")config = pdi.Config(str(w), str(weights))if cuda:config.enable_use_gpu(memory_pool_init_size_mb=2048, device_id=0)predictor = pdi.create_predictor(config)input_handle = predictor.get_input_handle(predictor.get_input_names()[0])output_names = predictor.get_output_names()elif triton:  # NVIDIA Triton Inference ServerLOGGER.info(f"Using {w} as Triton Inference Server...")check_requirements("tritonclient[all]")from utils.triton import TritonRemoteModelmodel = TritonRemoteModel(url=w)nhwc = model.runtime.startswith("tensorflow")else:raise NotImplementedError(f"ERROR: {w} is not a supported format")# class namesif "names" not in locals():names = yaml_load(data)["names"] if data else {i: f"class{i}" for i in range(999)}if names[0] == "n01440764" and len(names) == 1000:  # ImageNetnames = yaml_load(ROOT / "data/ImageNet.yaml")["names"]  # human-readable namesself.__dict__.update(locals())  # assign all variables to self# 定义前向传播函数def forward(self, im, augment=False, visualize=False):"""Performs YOLOv5 inference on input images with options for augmentation and visualization."""b, ch, h, w = im.shape  # batch, channel, height, widthif self.fp16 and im.dtype != torch.float16:im = im.half()  # to FP16if self.nhwc:im = im.permute(0, 2, 3, 1)  # torch BCHW to numpy BHWC shape(1,320,192,3)# 我们用这种模型即可if self.pt:  # PyTorchy = self.model(im, augment=augment, visualize=visualize) if augment or visualize else self.model(im)elif self.jit:  # TorchScripty = self.model(im)elif self.dnn:  # ONNX OpenCV DNNim = im.cpu().numpy()  # torch to numpyself.net.setInput(im)y = self.net.forward()elif self.onnx:  # ONNX Runtimeim = im.cpu().numpy()  # torch to numpyy = self.session.run(self.output_names, {self.session.get_inputs()[0].name: im})elif self.xml:  # OpenVINOim = im.cpu().numpy()  # FP32y = list(self.ov_compiled_model(im).values())elif self.engine:  # TensorRTif self.dynamic and im.shape != self.bindings["images"].shape:i = self.model.get_binding_index("images")self.context.set_binding_shape(i, im.shape)  # reshape if dynamicself.bindings["images"] = self.bindings["images"]._replace(shape=im.shape)for name in self.output_names:i = self.model.get_binding_index(name)self.bindings[name].data.resize_(tuple(self.context.get_binding_shape(i)))s = self.bindings["images"].shapeassert im.shape == s, f"input size {im.shape} {'>' if self.dynamic else 'not equal to'} max model size {s}"self.binding_addrs["images"] = int(im.data_ptr())self.context.execute_v2(list(self.binding_addrs.values()))y = [self.bindings[x].data for x in sorted(self.output_names)]elif self.coreml:  # CoreMLim = im.cpu().numpy()im = Image.fromarray((im[0] * 255).astype("uint8"))# im = im.resize((192, 320), Image.BILINEAR)y = self.model.predict({"image": im})  # coordinates are xywh normalizedif "confidence" in y:box = xywh2xyxy(y["coordinates"] * [[w, h, w, h]])  # xyxy pixelsconf, cls = y["confidence"].max(1), y["confidence"].argmax(1).astype(np.float)y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)else:y = list(reversed(y.values()))  # reversed for segmentation models (pred, proto)elif self.paddle:  # PaddlePaddleim = im.cpu().numpy().astype(np.float32)self.input_handle.copy_from_cpu(im)self.predictor.run()y = [self.predictor.get_output_handle(x).copy_to_cpu() for x in self.output_names]elif self.triton:  # NVIDIA Triton Inference Servery = self.model(im)else:  # TensorFlow (SavedModel, GraphDef, Lite, Edge TPU)im = im.cpu().numpy()if self.saved_model:  # SavedModely = self.model(im, training=False) if self.keras else self.model(im)elif self.pb:  # GraphDefy = self.frozen_func(x=self.tf.constant(im))else:  # Lite or Edge TPUinput = self.input_details[0]int8 = input["dtype"] == np.uint8  # is TFLite quantized uint8 modelif int8:scale, zero_point = input["quantization"]im = (im / scale + zero_point).astype(np.uint8)  # de-scaleself.interpreter.set_tensor(input["index"], im)self.interpreter.invoke()y = []for output in self.output_details:x = self.interpreter.get_tensor(output["index"])if int8:scale, zero_point = output["quantization"]x = (x.astype(np.float32) - zero_point) * scale  # re-scaley.append(x)y = [x if isinstance(x, np.ndarray) else x.numpy() for x in y]y[0][..., :4] *= [w, h, w, h]  # xywh normalized to pixelsif isinstance(y, (list, tuple)):return self.from_numpy(y[0]) if len(y) == 1 else [self.from_numpy(x) for x in y]else:return self.from_numpy(y)# 张量转numpy函数def from_numpy(self, x):"""Converts a NumPy array to a torch tensor, maintaining device compatibility."""return torch.from_numpy(x).to(self.device) if isinstance(x, np.ndarray) else xdef warmup(self, imgsz=(1, 3, 640, 640)):"""Performs a single inference warmup to initialize model weights, accepting an `imgsz` tuple for image size."""warmup_types = self.pt, self.jit, self.onnx, self.engine, self.saved_model, self.pb, self.tritonif any(warmup_types) and (self.device.type != "cpu" or self.triton):im = torch.empty(*imgsz, dtype=torch.half if self.fp16 else torch.float, device=self.device)  # inputfor _ in range(2 if self.jit else 1):  #self.forward(im)  # warmup# 模型类型类 有预训练权重以及后期正式训练的一些权重@staticmethoddef _model_type(p="path/to/model.pt"):"""Determines model type from file path or URL, supporting various export formats.Example: path='path/to/model.onnx' -> type=onnx"""# types = [pt, jit, onnx, xml, engine, coreml, saved_model, pb, tflite, edgetpu, tfjs, paddle]from export import export_formatsfrom utils.downloads import is_url# 导出格式,取前缀sf = list(export_formats().Suffix)  # export suffixesif not is_url(p, check=False):check_suffix(p, sf)  # checksurl = urlparse(p)  # if url may be Triton inference servertypes = [s in Path(p).name for s in sf]types[8] &= not types[9]  # tflite &= not edgetputriton = not any(types) and all([any(s in url.scheme for s in ["http", "grpc"]), url.netloc])return types + [triton]# 导入元数据@staticmethoddef _load_metadata(f=Path("path/to/meta.yaml")):"""Loads metadata from a YAML file, returning strides and names if the file exists, otherwise `None`."""if f.exists():d = yaml_load(f)return d["stride"], d["names"]  # assign stride, namesreturn None, None# 数据预处理类
class AutoShape(nn.Module):"""AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""conf = 0.25  # NMS confidence thresholdiou = 0.45  # NMS IoU thresholdagnostic = False  # NMS class-agnosticmulti_label = False  # NMS multiple labels per boxclasses = None  # (optional list) filter by class, i.e. = [0, 15, 16] for COCO persons, cats and dogsmax_det = 1000  # maximum number of detections per imageamp = False  # Automatic Mixed Precision (AMP) inference# 初始化操作def __init__(self, model, verbose=True):"""Initializes YOLOv5 model for inference, setting up attributes and preparing model for evaluation."""super().__init__()if verbose:LOGGER.info("Adding AutoShape... ")copy_attr(self, model, include=("yaml", "nc", "hyp", "names", "stride", "abc"), exclude=())  # copy attributesself.dmb = isinstance(model, DetectMultiBackend)  # DetectMultiBackend() instanceself.pt = not self.dmb or model.pt  # PyTorch modelself.model = model.eval()if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.inplace = False  # Detect.inplace=False for safe multithread inferencem.export = True  # do not output loss values# 运用函数def _apply(self, fn):"""Applies to(), cpu(), cuda(), half() etc.to model tensors excluding parameters or registered buffers."""self = super()._apply(fn)if self.pt:m = self.model.model.model[-1] if self.dmb else self.model.model[-1]  # Detect()m.stride = fn(m.stride)m.grid = list(map(fn, m.grid))if isinstance(m.anchor_grid, list):m.anchor_grid = list(map(fn, m.anchor_grid))return self@smart_inference_mode()def forward(self, ims, size=640, augment=False, profile=False):"""Performs inference on inputs with optional augment & profiling.Supports various formats including file, URI, OpenCV, PIL, numpy, torch."""# For size(height=640, width=1280), RGB images example inputs are:#   file:        ims = 'data/images/zidane.jpg'  # str or PosixPath#   URI:             = 'https://ultralytics.com/images/zidane.jpg'#   OpenCV:          = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(640,1280,3)#   PIL:             = Image.open('image.jpg') or ImageGrab.grab()  # HWC x(640,1280,3)#   numpy:           = np.zeros((640,1280,3))  # HWC#   torch:           = torch.zeros(16,3,320,640)  # BCHW (scaled to size=640, 0-1 values)#   multiple:        = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of imagesdt = (Profile(), Profile(), Profile())with dt[0]:# 尺寸拓展if isinstance(size, int):  # expandsize = (size, size)p = next(self.model.parameters()) if self.pt else torch.empty(1, device=self.model.device)  # paramautocast = self.amp and (p.device.type != "cpu")  # Automatic Mixed Precision (AMP) inferenceif isinstance(ims, torch.Tensor):  # torchwith amp.autocast(autocast):return self.model(ims.to(p.device).type_as(p), augment=augment)  # inference# Pre-process# 数据预处理===获取数量以及图片集合n, ims = (len(ims), list(ims)) if isinstance(ims, (list, tuple)) else (1, [ims])  # number, list of imagesshape0, shape1, files = [], [], []  # image and inference shapes, filenamesfor i, im in enumerate(ims):f = f"image{i}"  # filenameif isinstance(im, (str, Path)):  # filename or uriim, f = Image.open(requests.get(im, stream=True).raw if str(im).startswith("http") else im), imim = np.asarray(exif_transpose(im))elif isinstance(im, Image.Image):  # PIL Imageim, f = np.asarray(exif_transpose(im)), getattr(im, "filename", f) or ffiles.append(Path(f).with_suffix(".jpg").name)if im.shape[0] < 5:  # image in CHWim = im.transpose((1, 2, 0))  # reverse dataloader .transpose(2, 0, 1)#  取三个通道图像数据,如果是三维图像,否则将灰度图像处理为BGR格式im = im[..., :3] if im.ndim == 3 else cv2.cvtColor(im, cv2.COLOR_GRAY2BGR)  # enforce 3ch inputs = im.shape[:2]  # HWC# 将图像形状数据放到shape0中shape0.append(s)  # image shape# 取最大尺寸与图像尺寸的比值g = max(size) / max(s)  # gain# shap1列表存放预处理后的图像形状shape1.append([int(y * g) for y in s])# 获取图像信息ims[i] = im if im.data.contiguous else np.ascontiguousarray(im)  # updateshape1 = [make_divisible(x, self.stride) for x in np.array(shape1).max(0)]  # inf shapex = [letterbox(im, shape1, auto=False)[0] for im in ims]  # pad# 维度转换x = np.ascontiguousarray(np.array(x).transpose((0, 3, 1, 2)))  # stack and BHWC to BCHWx = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32with amp.autocast(autocast):# Inferencewith dt[1]:y = self.model(x, augment=augment)  # forward# Post-processwith dt[2]:y = non_max_suppression(y if self.dmb else y[0],self.conf,self.iou,self.classes,self.agnostic,self.multi_label,max_det=self.max_det,)  # NMSfor i in range(n):scale_boxes(shape1, y[i][:, :4], shape0[i])# 初始化一些图像信息return Detections(ims, y, files, dt, self.names, x.shape)class Detections:"""Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""super().__init__()# 获取设备信息d = pred[0].device  # devicegn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in ims]  # normalizationsself.ims = ims  # list of images as numpy arrays# 预测框信息self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)self.names = names  # class namesself.files = files  # image filenamesself.times = times  # profiling timesself.xyxy = pred  # xyxy pixelsself.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixelsself.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalizedself.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalizedself.n = len(self.pred)  # number of images (batch size)self.t = tuple(x.t / self.n * 1e3 for x in times)  # timestamps (ms)self.s = tuple(shape)  # inference BCHW shapedef _run(self, pprint=False, show=False, save=False, crop=False, render=False, labels=True, save_dir=Path("")):"""Executes model predictions, displaying and/or saving outputs with optional crops and labels."""s, crops = "", []for i, (im, pred) in enumerate(zip(self.ims, self.pred)):s += f"\nimage {i + 1}/{len(self.pred)}: {im.shape[0]}x{im.shape[1]} "  # stringif pred.shape[0]:for c in pred[:, -1].unique():n = (pred[:, -1] == c).sum()  # detections per classs += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, "  # add to strings = s.rstrip(", ")if show or save or render or crop:annotator = Annotator(im, example=str(self.names))for *box, conf, cls in reversed(pred):  # xyxy, confidence, classlabel = f"{self.names[int(cls)]} {conf:.2f}"if crop:file = save_dir / "crops" / self.names[int(cls)] / self.files[i] if save else Nonecrops.append({"box": box,"conf": conf,"cls": cls,"label": label,"im": save_one_box(box, im, file=file, save=save),})else:  # all others# 盒子标签annotator.box_label(box, label if labels else "", color=colors(cls))im = annotator.imelse:s += "(no detections)"im = Image.fromarray(im.astype(np.uint8)) if isinstance(im, np.ndarray) else im  # from npif show:if is_jupyter():from IPython.display import displaydisplay(im)else:im.show(self.files[i])if save:f = self.files[i]im.save(save_dir / f)  # saveif i == self.n - 1:LOGGER.info(f"Saved {self.n} image{'s' * (self.n > 1)} to {colorstr('bold', save_dir)}")if render:self.ims[i] = np.asarray(im)if pprint:s = s.lstrip("\n")return f"{s}\nSpeed: %.1fms pre-process, %.1fms inference, %.1fms NMS per image at shape {self.s}" % self.tif crop:if save:LOGGER.info(f"Saved results to {save_dir}\n")return crops@TryExcept("Showing images is not supported in this environment")def show(self, labels=True):"""Displays detection results with optional labels.Usage: show(labels=True)"""self._run(show=True, labels=labels)  # show resultsdef save(self, labels=True, save_dir="runs/detect/exp", exist_ok=False):"""Saves detection results with optional labels to a specified directory.Usage: save(labels=True, save_dir='runs/detect/exp', exist_ok=False)"""save_dir = increment_path(save_dir, exist_ok, mkdir=True)  # increment save_dirself._run(save=True, labels=labels, save_dir=save_dir)  # save resultsdef crop(self, save=True, save_dir="runs/detect/exp", exist_ok=False):"""Crops detection results, optionally saves them to a directory.Args: save (bool), save_dir (str), exist_ok (bool)."""save_dir = increment_path(save_dir, exist_ok, mkdir=True) if save else Nonereturn self._run(crop=True, save=save, save_dir=save_dir)  # crop resultsdef render(self, labels=True):"""Renders detection results with optional labels on images; args: labels (bool) indicating label inclusion."""self._run(render=True, labels=labels)  # render resultsreturn self.imsdef pandas(self):"""Returns detections as pandas DataFrames for various box formats (xyxy, xyxyn, xywh, xywhn).Example: print(results.pandas().xyxy[0])."""new = copy(self)  # return copyca = "xmin", "ymin", "xmax", "ymax", "confidence", "class", "name"  # xyxy columnscb = "xcenter", "ycenter", "width", "height", "confidence", "class", "name"  # xywh columnsfor k, c in zip(["xyxy", "xyxyn", "xywh", "xywhn"], [ca, ca, cb, cb]):a = [[x[:5] + [int(x[5]), self.names[int(x[5])]] for x in x.tolist()] for x in getattr(self, k)]  # updatesetattr(new, k, [pd.DataFrame(x, columns=c) for x in a])return newdef tolist(self):"""Converts a Detections object into a list of individual detection results for iteration.Example: for result in results.tolist():"""r = range(self.n)  # iterablereturn [Detections([self.ims[i]],[self.pred[i]],[self.files[i]],self.times,self.names,self.s,)for i in r]def print(self):"""Logs the string representation of the current object's state via the LOGGER."""LOGGER.info(self.__str__())def __len__(self):"""Returns the number of results stored, overrides the default len(results)."""return self.ndef __str__(self):"""Returns a string representation of the model's results, suitable for printing, overrides defaultprint(results)."""return self._run(pprint=True)  # print resultsdef __repr__(self):"""Returns a string representation of the YOLOv5 object, including its class and formatted results."""return f"YOLOv5 {self.__class__} instance\n" + self.__str__()class Proto(nn.Module):"""YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""def __init__(self, c1, c_=256, c2=32):"""Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""super().__init__()self.cv1 = Conv(c1, c_, k=3)self.upsample = nn.Upsample(scale_factor=2, mode="nearest")self.cv2 = Conv(c_, c_, k=3)self.cv3 = Conv(c_, c2)def forward(self, x):"""Performs a forward pass using convolutional layers and upsampling on input tensor `x`."""return self.cv3(self.cv2(self.upsample(self.cv1(x))))# 分类模块
class Classify(nn.Module):"""YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""def __init__(self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0):  # ch_in, ch_out, kernel, stride, padding, groups, dropout probability"""Initializes YOLOv5 classification head with convolution, pooling, and dropout layers for input to outputchannel transformation."""super().__init__()c_ = 1280  # efficientnet_b0 sizeself.conv = Conv(c1, c_, k, s, autopad(k, p), g)self.pool = nn.AdaptiveAvgPool2d(1)  # to x(b,c_,1,1)self.drop = nn.Dropout(p=dropout_p, inplace=True)self.linear = nn.Linear(c_, c2)  # to x(b,c2)def forward(self, x):"""Processes input through conv, pool, drop, and linear layers; supports list concatenation input."""if isinstance(x, list):x = torch.cat(x, 1)return self.linear(self.drop(self.pool(self.conv(x)).flatten(1)))

总的来说没那么复杂

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如若转载,请注明出处:http://www.mzph.cn/web/57838.shtml

如若内容造成侵权/违法违规/事实不符,请联系多彩编程网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

【设计模式】如何用C++实现依赖倒置

【设计模式】如何用C实现依赖倒置 一、什么是依赖倒置&#xff1f; 依赖倒置原则&#xff08;Dependency Inversion Principle&#xff0c;DIP&#xff09;是SOLID面向对象设计原则中的一项。它的核心思想是&#xff1a; 高层模块不应该依赖于低层模块&#xff0c;两者都应该…

2、liunx网络基础

一、TCP/IP协议概述 Linux服务器默认网卡配置文件在/etc/sysconfig/network-scripts/下&#xff0c;命名的名称一般为:ifcfg-eth0 ifcfg-eth1 &#xff0c;eth0表示第一块网卡&#xff0c;eth1表示第二块网卡&#xff0c;依次类推。一般DELL R720标配有4块千兆网卡。 TCP/IP&a…

MySQL的SQL语句之触发器的创建和应用

触发器 Trigger 一.触发器 作用&#xff1a;当检测到某种数据表发生数据变化时&#xff0c;自动执行操作&#xff0c;保证数据的完整性&#xff0c;保证数据的一致性。 1.创建一个触发器 如上图所示&#xff0c;查看这个create的帮助信息的时候&#xff0c;这个create trig…

2016年ATom-1活动期间航班的每10秒一次的碳 monoxide (CO) 观测数据

目录 简介 摘要 代码 引用 网址推荐 知识星球 机器学习 ATom: Observed and GEOS-5 Simulated CO Concentrations with Tagged Tracers for ATom-1 简介 该数据集包含2016年ATom-1活动期间航班的每10秒一次的碳 monoxide (CO) 观测数据&#xff0c;以及来自戈达德地球观…

Ambari里面添加hive组件

1.创建hive数据库 在添加hive组件之前需要做的事情&#xff0c;先在master这个虚拟机里面创建好hive 先进入虚拟机里面进入mysql 然后输入这个命令看看有没有自己创建的hive数据库 show databases;有的话会显示下面这个样子 没有的同学使用以下命令可以在MySQL中创建hive数…

Java 集合一口气讲完!(上)||o(*°▽°*)o|Ю [有人吗?]

Java 集合遍历 Java集合教程 - Java集合遍历 在Java Collections Framework中&#xff0c;不同类型的集合使用不同类型的数据结构以不同的方式存储它们的元素。 一些集合对它们的元素有排序&#xff0c;有些没有。集合框架提供了遍历集合的以下方法&#xff1a; 使用迭代器使…

Oracle视频基础1.1.3练习

1.1.3 需求&#xff1a; 完整格式查看所有用户进程里的oracle后台进程 查看物理网卡&#xff0c;虚拟网卡的ip地址 ps -ef | grep oracle /sbin/ifconfig要以完整格式查看所有用户进程中的 Oracle 后台进程&#xff0c;并查看物理和虚拟网卡的 IP 地址&#xff0c;可以使用以下…

akshare股票涨跌板与资金流向相关分析

## akshare文档涨跌板 https://akshare.akfamily.xyz/data/stock/stock.html#id374资金流向 https://akshare.akfamily.xyz/data/stock/stock.html#id154涨跌板&#xff1a;https://quote.eastmoney.com/ztb/detail#typeztgc 资金流向&#xff1a;https://data.eastmoney.com/…

11 P4995 跳跳!

题目&#xff1a; 核心&#xff1a; 贪心策略&#xff1a;每次都跳距离自己最远的石头 收获&#xff1a;双指针&#xff08;下标&#xff09;的使用 代码&#xff1a; #include<iostream> using namespace std; # define M 100 #include<algorithm> #include<ma…

热点聚焦:AI在医疗保健领域的深度渗透与变革

内容概要 随着人工智能技术的不断进步&#xff0c;我们正在见证一个充满奇迹的转变&#xff0c;尤其是在医疗保健领域。这种转变不仅仅涉及到提高效率&#xff0c;更在于重新定义我们对疾病诊断和治疗方案的理解。通过智能算法&#xff0c;AI能够在早期识别潜在的健康问题&…

计算机网络八股文个人总结

1.TCP/IP模型和OSI模型的区别 在计算机网络中&#xff0c;TCP/IP 模型和 OSI 模型是两个重要的网络协议模型。它们帮助我们理解计算机通信的工作原理。以下是它们的主要区别&#xff0c;以通俗易懂的方式进行解释&#xff1a; 1. 模型层数 OSI 模型&#xff1a;有 7 层&#…

使用onnxruntime-web 运行yolov8-nano推理

ONNX&#xff08;Open Neural Network Exchange&#xff09;模型具有以下两个特点促成了我们可以使用onnxruntime-web 直接在web端上运行推理模型&#xff0c;为了让这个推理更直观&#xff0c;我选择了试验下yolov8 识别预览图片&#xff1a; 1. 跨平台兼容性 ONNX 是一种开…

Qt 练习做一个登录界面

练习做一个登录界面 效果 UI图 UI代码 <?xml version"1.0" encoding"UTF-8"?> <ui version"4.0"><class>Dialog</class><widget class"QDialog" name"Dialog"><property name"ge…

ArcGIS软件之“新建中学最适合地址”地图制作

目录 最终效果图(全文图中的颜色类似即可&#xff0c;形状一样为标准&#xff09;第一步、设置现有中学的欧式距离第二步、将计算好的欧式距离 进行重分类第三步、进行核密度分析第四步、进行人口密度的重分类第五步、进行土地使用的要素转栅格第六步、对上一步进行重分类第七步…

就业市场变革:AI时代,我们将如何评估人才?

内容概要 在这个充满变革的时代&#xff0c;就业市场正被人工智能&#xff08;AI&#xff09;技术深刻改变。随着技术的进步&#xff0c;传统的人才评估方式逐渐显示出其局限性。例如&#xff0c;过去依赖于纸质简历和面试评估的方式在快速变化的环境中难以准确识别真实的人才…

Linux Kernel Programming (个人读书笔记)

目录 Before everything begins 笔者的环境 关于如何在Arch Linux下载Virtual Box 下载一个镜像&#xff0c;然后开启一个简单的虚拟机 在Ubuntu虚拟机下东西 配置我们的内核 啥是KConfig和KBuild? 构建内核配置选择 启动&#xff01;一个好的内核配置的开始 使用分发…

【传知代码】智慧医疗:纹理特征VS卷积特征

&#x1f351;个人主页&#xff1a;Jupiter. &#x1f680; 所属专栏&#xff1a;传知代码 欢迎大家点赞收藏评论&#x1f60a; 目录 论文概述纹理特征和深度卷积特征算法流程数据预处理方法纹理特征提取深度卷积特征提取分类网络搭建代码复现BLS_Model.py文件——分类器搭建py…

算法深度剖析:前缀和

文章目录 前言一、一维前缀和模板二、二维前缀和模板三、寻找数组的中心下标四、除自身以外数组的乘积五、和为 K 的子数组六、和可被 K 整除的子数组七、连续数组八、矩阵区域和 前言 本章将深度剖析前缀和&#xff0c;以及总结前缀和模板。 前缀和是一种在算法和数据处理中…

leetcode双指针题目总结

文章目录 283. 移动零题目描述解题 11. 盛最多水的容器题目描述解法解释时间复杂度 15. 三数之和题目描述解法思路解释优势 42. 接雨水题目描述解答具体思路 283. 移动零 题目描述 给定一个数组 nums&#xff0c;编写一个函数将所有 0 移动到数组的末尾&#xff0c;同时保持非…

Android——显式/隐式Intent

概述 在Android中&#xff0c;Intent是各个组件之间信息通信的桥梁&#xff0c;它用于Android各组件的通信。 Intent 的组成部分 一、显式 Intent 第一种方式 Intent intent new Intent(this, ActFinishActivity.class);startActivity(intent);第二种方式 Intent intent …