Spaces:
Runtime error
Runtime error
# Copyright (c) OpenMMLab. All rights reserved. | |
# Copyright (c) 2019 Western Digital Corporation or its affiliates. | |
import warnings | |
import torch.nn as nn | |
from mmcv.cnn import ConvModule | |
from mmengine.model import BaseModule | |
from torch.nn.modules.batchnorm import _BatchNorm | |
from mmdet.registry import MODELS | |
class ResBlock(BaseModule): | |
"""The basic residual block used in Darknet. Each ResBlock consists of two | |
ConvModules and the input is added to the final output. Each ConvModule is | |
composed of Conv, BN, and LeakyReLU. In YoloV3 paper, the first convLayer | |
has half of the number of the filters as much as the second convLayer. The | |
first convLayer has filter size of 1x1 and the second one has the filter | |
size of 3x3. | |
Args: | |
in_channels (int): The input channels. Must be even. | |
conv_cfg (dict): Config dict for convolution layer. Default: None. | |
norm_cfg (dict): Dictionary to construct and config norm layer. | |
Default: dict(type='BN', requires_grad=True) | |
act_cfg (dict): Config dict for activation layer. | |
Default: dict(type='LeakyReLU', negative_slope=0.1). | |
init_cfg (dict or list[dict], optional): Initialization config dict. | |
Default: None | |
""" | |
def __init__(self, | |
in_channels, | |
conv_cfg=None, | |
norm_cfg=dict(type='BN', requires_grad=True), | |
act_cfg=dict(type='LeakyReLU', negative_slope=0.1), | |
init_cfg=None): | |
super(ResBlock, self).__init__(init_cfg) | |
assert in_channels % 2 == 0 # ensure the in_channels is even | |
half_in_channels = in_channels // 2 | |
# shortcut | |
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) | |
self.conv1 = ConvModule(in_channels, half_in_channels, 1, **cfg) | |
self.conv2 = ConvModule( | |
half_in_channels, in_channels, 3, padding=1, **cfg) | |
def forward(self, x): | |
residual = x | |
out = self.conv1(x) | |
out = self.conv2(out) | |
out = out + residual | |
return out | |
class Darknet(BaseModule): | |
"""Darknet backbone. | |
Args: | |
depth (int): Depth of Darknet. Currently only support 53. | |
out_indices (Sequence[int]): Output from which stages. | |
frozen_stages (int): Stages to be frozen (stop grad and set eval mode). | |
-1 means not freezing any parameters. Default: -1. | |
conv_cfg (dict): Config dict for convolution layer. Default: None. | |
norm_cfg (dict): Dictionary to construct and config norm layer. | |
Default: dict(type='BN', requires_grad=True) | |
act_cfg (dict): Config dict for activation layer. | |
Default: dict(type='LeakyReLU', negative_slope=0.1). | |
norm_eval (bool): Whether to set norm layers to eval mode, namely, | |
freeze running stats (mean and var). Note: Effect on Batch Norm | |
and its variants only. | |
pretrained (str, optional): model pretrained path. Default: None | |
init_cfg (dict or list[dict], optional): Initialization config dict. | |
Default: None | |
Example: | |
>>> from mmdet.models import Darknet | |
>>> import torch | |
>>> self = Darknet(depth=53) | |
>>> self.eval() | |
>>> inputs = torch.rand(1, 3, 416, 416) | |
>>> level_outputs = self.forward(inputs) | |
>>> for level_out in level_outputs: | |
... print(tuple(level_out.shape)) | |
... | |
(1, 256, 52, 52) | |
(1, 512, 26, 26) | |
(1, 1024, 13, 13) | |
""" | |
# Dict(depth: (layers, channels)) | |
arch_settings = { | |
53: ((1, 2, 8, 8, 4), ((32, 64), (64, 128), (128, 256), (256, 512), | |
(512, 1024))) | |
} | |
def __init__(self, | |
depth=53, | |
out_indices=(3, 4, 5), | |
frozen_stages=-1, | |
conv_cfg=None, | |
norm_cfg=dict(type='BN', requires_grad=True), | |
act_cfg=dict(type='LeakyReLU', negative_slope=0.1), | |
norm_eval=True, | |
pretrained=None, | |
init_cfg=None): | |
super(Darknet, self).__init__(init_cfg) | |
if depth not in self.arch_settings: | |
raise KeyError(f'invalid depth {depth} for darknet') | |
self.depth = depth | |
self.out_indices = out_indices | |
self.frozen_stages = frozen_stages | |
self.layers, self.channels = self.arch_settings[depth] | |
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) | |
self.conv1 = ConvModule(3, 32, 3, padding=1, **cfg) | |
self.cr_blocks = ['conv1'] | |
for i, n_layers in enumerate(self.layers): | |
layer_name = f'conv_res_block{i + 1}' | |
in_c, out_c = self.channels[i] | |
self.add_module( | |
layer_name, | |
self.make_conv_res_block(in_c, out_c, n_layers, **cfg)) | |
self.cr_blocks.append(layer_name) | |
self.norm_eval = norm_eval | |
assert not (init_cfg and pretrained), \ | |
'init_cfg and pretrained cannot be specified at the same time' | |
if isinstance(pretrained, str): | |
warnings.warn('DeprecationWarning: pretrained is deprecated, ' | |
'please use "init_cfg" instead') | |
self.init_cfg = dict(type='Pretrained', checkpoint=pretrained) | |
elif pretrained is None: | |
if init_cfg is None: | |
self.init_cfg = [ | |
dict(type='Kaiming', layer='Conv2d'), | |
dict( | |
type='Constant', | |
val=1, | |
layer=['_BatchNorm', 'GroupNorm']) | |
] | |
else: | |
raise TypeError('pretrained must be a str or None') | |
def forward(self, x): | |
outs = [] | |
for i, layer_name in enumerate(self.cr_blocks): | |
cr_block = getattr(self, layer_name) | |
x = cr_block(x) | |
if i in self.out_indices: | |
outs.append(x) | |
return tuple(outs) | |
def _freeze_stages(self): | |
if self.frozen_stages >= 0: | |
for i in range(self.frozen_stages): | |
m = getattr(self, self.cr_blocks[i]) | |
m.eval() | |
for param in m.parameters(): | |
param.requires_grad = False | |
def train(self, mode=True): | |
super(Darknet, self).train(mode) | |
self._freeze_stages() | |
if mode and self.norm_eval: | |
for m in self.modules(): | |
if isinstance(m, _BatchNorm): | |
m.eval() | |
def make_conv_res_block(in_channels, | |
out_channels, | |
res_repeat, | |
conv_cfg=None, | |
norm_cfg=dict(type='BN', requires_grad=True), | |
act_cfg=dict(type='LeakyReLU', | |
negative_slope=0.1)): | |
"""In Darknet backbone, ConvLayer is usually followed by ResBlock. This | |
function will make that. The Conv layers always have 3x3 filters with | |
stride=2. The number of the filters in Conv layer is the same as the | |
out channels of the ResBlock. | |
Args: | |
in_channels (int): The number of input channels. | |
out_channels (int): The number of output channels. | |
res_repeat (int): The number of ResBlocks. | |
conv_cfg (dict): Config dict for convolution layer. Default: None. | |
norm_cfg (dict): Dictionary to construct and config norm layer. | |
Default: dict(type='BN', requires_grad=True) | |
act_cfg (dict): Config dict for activation layer. | |
Default: dict(type='LeakyReLU', negative_slope=0.1). | |
""" | |
cfg = dict(conv_cfg=conv_cfg, norm_cfg=norm_cfg, act_cfg=act_cfg) | |
model = nn.Sequential() | |
model.add_module( | |
'conv', | |
ConvModule( | |
in_channels, out_channels, 3, stride=2, padding=1, **cfg)) | |
for idx in range(res_repeat): | |
model.add_module('res{}'.format(idx), | |
ResBlock(out_channels, **cfg)) | |
return model | |