yolov5中head修改為decouple?head詳解

更新時間：2022年06月11日 15:12:49 作者：qq_34496674

現(xiàn)成的YOLOv5代碼真的很香,不管口碑怎么樣,我用著反正是挺爽的,下面這篇文章主要給大家介紹了關于yolov5中head修改為decouple?head的相關資料,需要的朋友可以參考下

yolox的decoupled head結構

本來想將yolov5的head修改為decoupled head，與yolox的decouple head對齊，但是沒注意，該成了如下結構：

感謝少年肩上楊柳依依的指出，如還有問題歡迎指出

1.修改models下的yolo.py文件中的Detect

class Detect(nn.Module):
    stride = None  # strides computed during build
    onnx_dynamic = False  # ONNX export parameter

    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        self.anchor_grid = [torch.zeros(1)] * self.nl  # init anchor grid
        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
        # self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.m_box = nn.ModuleList(nn.Conv2d(256, 4 * self.na, 1) for x in ch)  # output conv
        self.m_conf = nn.ModuleList(nn.Conv2d(256, 1 * self.na, 1) for x in ch)  # output conv
        self.m_labels = nn.ModuleList(nn.Conv2d(256, self.nc * self.na, 1) for x in ch)  # output conv
        self.base_conv = nn.ModuleList(BaseConv(in_channels = x, out_channels = 256, ksize = 1, stride = 1) for x in ch)
        self.cls_convs = nn.ModuleList(BaseConv(in_channels = 256, out_channels = 256, ksize = 3, stride = 1) for x in ch)
        self.reg_convs = nn.ModuleList(BaseConv(in_channels = 256, out_channels = 256, ksize = 3, stride = 1) for x in ch)
        
        # self.m = nn.ModuleList(nn.Conv2d(x, 4 * self.na, 1) for x in ch, nn.Conv2d(x, 1 * self.na, 1) for x in ch,nn.Conv2d(x, self.nc * self.na, 1) for x in ch)
        self.inplace = inplace  # use in-place ops (e.g. slice assignment)self.ch = ch

    def forward(self, x):
        z = []  # inference output
        for i in range(self.nl):
            # # x[i] = self.m[i](x[i])  # convs
            # print("&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&&", i)
            # print(x[i].shape)
            # print(self.base_conv[i])
            # print("%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%")
            
            
            
            x_feature = self.base_conv[i](x[i])
            # x_feature = x[i]
            
            cls_feature = self.cls_convs[i](x_feature)
            reg_feature = self.reg_convs[i](x_feature)
            # reg_feature = x_feature
            
            m_box = self.m_box[i](reg_feature)
            m_conf = self.m_conf[i](reg_feature)
            m_labels = self.m_labels[i](cls_feature)
            x[i] = torch.cat((m_box,m_conf, m_labels),1)
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                y = x[i].sigmoid()
                if self.inplace:
                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)

2.在yolo.py中添加

def get_activation(name="silu", inplace=True):
    if name == "silu":
        module = nn.SiLU(inplace=inplace)
    elif name == "relu":
        module = nn.ReLU(inplace=inplace)
    elif name == "lrelu":
        module = nn.LeakyReLU(0.1, inplace=inplace)
    else:
        raise AttributeError("Unsupported act type: {}".format(name))
    return module



class BaseConv(nn.Module):
    """A Conv2d -> Batchnorm -> silu/leaky relu block"""

    def __init__(
        self, in_channels, out_channels, ksize, stride, groups=1, bias=False, act="silu"
    ):
        super().__init__()
        # same padding
        pad = (ksize - 1) // 2
        self.conv = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=ksize,
            stride=stride,
            padding=pad,
            groups=groups,
            bias=bias,
        )
        self.bn = nn.BatchNorm2d(out_channels)
        self.act = get_activation(act, inplace=True)

    def forward(self, x):
        # print(self.bn(self.conv(x)).shape)
        return self.act(self.bn(self.conv(x)))
        # return self.bn(self.conv(x))

    def fuseforward(self, x):
        return self.act(self.conv(x))

decouple head的特點：

由于訓練模型時，應該是channels = 256的地方改成了channels = x(失誤)，所以在decoupled head的部分參數(shù)量比yolox要大一些，以下的結果是在channels= x的情況下得出

比yolov5s參數(shù)多，計算量大，在我自己的2.5萬的數(shù)據(jù)量下map提升了3%多

1.模型給出的目標cls較高，需要將conf的閾值設置較大（0.5），不然準確率較低

parser.add_argument('--conf-thres', type=float, default=0.5, help='confidence threshold')

2.對于少樣本的檢測效果較好，召回率的提升比準確率多

3.在conf設置為0.25時，召回率比yolov5s高，但是準確率低；在conf設置為0.5時，召回率與準確率比yolov5s高

4.比yolov5s參數(shù)多，計算量大，在2.5萬的數(shù)據(jù)量下map提升了3%多

對于decouple head的改進

改進：

1.將紅色框中的conv去掉，縮小參數(shù)量和計算量；

2.channels =256 ，512 ，1024是考慮不增加參數(shù)，不進行featuremap的信息壓縮

class Detect(nn.Module):
    stride = None  # strides computed during build
    onnx_dynamic = False  # ONNX export parameter

    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
        super().__init__()
        self.nc = nc  # number of classes
        self.no = nc + 5  # number of outputs per anchor
        self.nl = len(anchors)  # number of detection layers
        self.na = len(anchors[0]) // 2  # number of anchors
        self.grid = [torch.zeros(1)] * self.nl  # init grid
        self.anchor_grid = [torch.zeros(1)] * self.nl  # init anchor grid
        self.register_buffer('anchors', torch.tensor(anchors).float().view(self.nl, -1, 2))  # shape(nl,na,2)
        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
        self.inplace = inplace  # use in-place ops (e.g. slice assignment)

    def forward(self, x):
        z = []  # inference output
        for i in range(self.nl):
            x[i] = self.m[i](x[i])  # conv
            bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
            x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()

            if not self.training:  # inference
                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                    self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)

                y = x[i].sigmoid()
                if self.inplace:
                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                    wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                    y = torch.cat((xy, wh, y[..., 4:]), -1)
                z.append(y.view(bs, -1, self.no))

        return x if self.training else (torch.cat(z, 1), x)