Spaces:

KyanChen
/

ai-photo-gallery

Runtime error

App Files Files Community

ai-photo-gallery / mmcls /models /backbones /replknet.py

KyanChen

init

f549064 over 1 year ago

raw

history blame contribute delete

No virus

25.3 kB

	# Copyright (c) OpenMMLab. All rights reserved.
	import torch
	import torch.nn as nn
	import torch.utils.checkpoint as checkpoint
	from mmcv.cnn import build_activation_layer, build_norm_layer
	from mmcv.cnn.bricks import DropPath
	from mmengine.model import BaseModule
	from mmengine.utils.dl_utils.parrots_wrapper import _BatchNorm

	from mmcls.registry import MODELS
	from .base_backbone import BaseBackbone


	def conv_bn(in_channels,
	out_channels,
	kernel_size,
	stride,
	padding,
	groups,
	dilation=1,
	norm_cfg=dict(type='BN')):
	"""Construct a sequential conv and bn.

	Args:
	in_channels (int): Dimension of input features.
	out_channels (int): Dimension of output features.
	kernel_size (int): kernel_size of the convolution.
	stride (int): stride of the convolution.
	padding (int): stride of the convolution.
	groups (int): groups of the convolution.
	dilation (int): dilation of the convolution. Default to 1.
	norm_cfg (dict): dictionary to construct and config norm layer.
	Default to ``dict(type='BN', requires_grad=True)``.

	Returns:
	nn.Sequential(): A conv layer and a batch norm layer.
	"""
	if padding is None:
	padding = kernel_size // 2
	result = nn.Sequential()
	result.add_module(
	'conv',
	nn.Conv2d(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	stride=stride,
	padding=padding,
	dilation=dilation,
	groups=groups,
	bias=False))
	result.add_module('bn', build_norm_layer(norm_cfg, out_channels)[1])
	return result


	def conv_bn_relu(in_channels,
	out_channels,
	kernel_size,
	stride,
	padding,
	groups,
	dilation=1):
	"""Construct a sequential conv, bn and relu.

	Args:
	in_channels (int): Dimension of input features.
	out_channels (int): Dimension of output features.
	kernel_size (int): kernel_size of the convolution.
	stride (int): stride of the convolution.
	padding (int): stride of the convolution.
	groups (int): groups of the convolution.
	dilation (int): dilation of the convolution. Default to 1.

	Returns:
	nn.Sequential(): A conv layer, batch norm layer and a relu function.
	"""

	if padding is None:
	padding = kernel_size // 2
	result = conv_bn(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	stride=stride,
	padding=padding,
	groups=groups,
	dilation=dilation)
	result.add_module('nonlinear', nn.ReLU())
	return result


	def fuse_bn(conv, bn):
	"""Fuse the parameters in a branch with a conv and bn.

	Args:
	conv (nn.Conv2d): The convolution module to fuse.
	bn (nn.BatchNorm2d): The batch normalization to fuse.

	Returns:
	tuple[torch.Tensor, torch.Tensor]: The parameters obtained after
	fusing the parameters of conv and bn in one branch.
	The first element is the weight and the second is the bias.
	"""
	kernel = conv.weight
	running_mean = bn.running_mean
	running_var = bn.running_var
	gamma = bn.weight
	beta = bn.bias
	eps = bn.eps
	std = (running_var + eps).sqrt()
	t = (gamma / std).reshape(-1, 1, 1, 1)
	return kernel * t, beta - running_mean * gamma / std


	class ReparamLargeKernelConv(BaseModule):
	"""Super large kernel implemented by with large convolutions.

	Input: Tensor with shape [B, C, H, W].
	Output: Tensor with shape [B, C, H, W].

	Args:
	in_channels (int): Dimension of input features.
	out_channels (int): Dimension of output features.
	kernel_size (int): kernel_size of the large convolution.
	stride (int): stride of the large convolution.
	groups (int): groups of the large convolution.
	small_kernel (int): kernel_size of the small convolution.
	small_kernel_merged (bool): Whether to switch the model structure to
	deployment mode (merge the small kernel to the large kernel).
	Default to False.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Defaults to None
	"""

	def __init__(self,
	in_channels,
	out_channels,
	kernel_size,
	stride,
	groups,
	small_kernel,
	small_kernel_merged=False,
	init_cfg=None):
	super(ReparamLargeKernelConv, self).__init__(init_cfg)
	self.kernel_size = kernel_size
	self.small_kernel = small_kernel
	self.small_kernel_merged = small_kernel_merged
	# We assume the conv does not change the feature map size,
	# so padding = k//2.
	# Otherwise, you may configure padding as you wish,
	# and change the padding of small_conv accordingly.
	padding = kernel_size // 2
	if small_kernel_merged:
	self.lkb_reparam = nn.Conv2d(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	stride=stride,
	padding=padding,
	dilation=1,
	groups=groups,
	bias=True)
	else:
	self.lkb_origin = conv_bn(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=kernel_size,
	stride=stride,
	padding=padding,
	dilation=1,
	groups=groups)
	if small_kernel is not None:
	assert small_kernel <= kernel_size
	self.small_conv = conv_bn(
	in_channels=in_channels,
	out_channels=out_channels,
	kernel_size=small_kernel,
	stride=stride,
	padding=small_kernel // 2,
	groups=groups,
	dilation=1)

	def forward(self, inputs):
	if hasattr(self, 'lkb_reparam'):
	out = self.lkb_reparam(inputs)
	else:
	out = self.lkb_origin(inputs)
	if hasattr(self, 'small_conv'):
	out += self.small_conv(inputs)
	return out

	def get_equivalent_kernel_bias(self):
	eq_k, eq_b = fuse_bn(self.lkb_origin.conv, self.lkb_origin.bn)
	if hasattr(self, 'small_conv'):
	small_k, small_b = fuse_bn(self.small_conv.conv,
	self.small_conv.bn)
	eq_b += small_b
	# add to the central part
	eq_k += nn.functional.pad(
	small_k, [(self.kernel_size - self.small_kernel) // 2] * 4)
	return eq_k, eq_b

	def merge_kernel(self):
	"""Switch the model structure from training mode to deployment mode."""
	if self.small_kernel_merged:
	return
	eq_k, eq_b = self.get_equivalent_kernel_bias()
	self.lkb_reparam = nn.Conv2d(
	in_channels=self.lkb_origin.conv.in_channels,
	out_channels=self.lkb_origin.conv.out_channels,
	kernel_size=self.lkb_origin.conv.kernel_size,
	stride=self.lkb_origin.conv.stride,
	padding=self.lkb_origin.conv.padding,
	dilation=self.lkb_origin.conv.dilation,
	groups=self.lkb_origin.conv.groups,
	bias=True)

	self.lkb_reparam.weight.data = eq_k
	self.lkb_reparam.bias.data = eq_b
	self.__delattr__('lkb_origin')
	if hasattr(self, 'small_conv'):
	self.__delattr__('small_conv')

	self.small_kernel_merged = True


	class ConvFFN(BaseModule):
	"""Mlp implemented by with 1*1 convolutions.

	Input: Tensor with shape [B, C, H, W].
	Output: Tensor with shape [B, C, H, W].

	Args:
	in_channels (int): Dimension of input features.
	internal_channels (int): Dimension of hidden features.
	out_channels (int): Dimension of output features.
	drop_path (float): Stochastic depth rate. Defaults to 0.
	norm_cfg (dict): dictionary to construct and config norm layer.
	Default to ``dict(type='BN', requires_grad=True)``.
	act_cfg (dict): The config dict for activation between pointwise
	convolution. Defaults to ``dict(type='GELU')``.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Defaults to None.
	"""

	def __init__(self,
	in_channels,
	internal_channels,
	out_channels,
	drop_path,
	norm_cfg=dict(type='BN'),
	act_cfg=dict(type='GELU'),
	init_cfg=None):
	super(ConvFFN, self).__init__(init_cfg)
	self.drop_path = DropPath(
	drop_prob=drop_path) if drop_path > 0. else nn.Identity()
	self.preffn_bn = build_norm_layer(norm_cfg, in_channels)[1]
	self.pw1 = conv_bn(
	in_channels=in_channels,
	out_channels=internal_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	groups=1)
	self.pw2 = conv_bn(
	in_channels=internal_channels,
	out_channels=out_channels,
	kernel_size=1,
	stride=1,
	padding=0,
	groups=1)
	self.nonlinear = build_activation_layer(act_cfg)

	def forward(self, x):
	out = self.preffn_bn(x)
	out = self.pw1(out)
	out = self.nonlinear(out)
	out = self.pw2(out)
	return x + self.drop_path(out)


	class RepLKBlock(BaseModule):
	"""RepLKBlock for RepLKNet backbone.

	Args:
	in_channels (int): The input channels of the block.
	dw_channels (int): The intermediate channels of the block,
	i.e., input channels of the large kernel convolution.
	block_lk_size (int): size of the super large kernel. Defaults: 31.
	small_kernel (int): size of the parallel small kernel. Defaults: 5.
	drop_path (float): Stochastic depth rate. Defaults: 0.
	small_kernel_merged (bool): Whether to switch the model structure to
	deployment mode (merge the small kernel to the large kernel).
	Default to False.
	norm_cfg (dict): dictionary to construct and config norm layer.
	Default to ``dict(type='BN', requires_grad=True)``.
	act_cfg (dict): Config dict for activation layer.
	Default to ``dict(type='ReLU')``.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Default to None
	"""

	def __init__(self,
	in_channels,
	dw_channels,
	block_lk_size,
	small_kernel,
	drop_path,
	small_kernel_merged=False,
	norm_cfg=dict(type='BN'),
	act_cfg=dict(type='ReLU'),
	init_cfg=None):
	super(RepLKBlock, self).__init__(init_cfg)
	self.pw1 = conv_bn_relu(in_channels, dw_channels, 1, 1, 0, groups=1)
	self.pw2 = conv_bn(dw_channels, in_channels, 1, 1, 0, groups=1)
	self.large_kernel = ReparamLargeKernelConv(
	in_channels=dw_channels,
	out_channels=dw_channels,
	kernel_size=block_lk_size,
	stride=1,
	groups=dw_channels,
	small_kernel=small_kernel,
	small_kernel_merged=small_kernel_merged)
	self.lk_nonlinear = build_activation_layer(act_cfg)
	self.prelkb_bn = build_norm_layer(norm_cfg, in_channels)[1]
	self.drop_path = DropPath(
	drop_prob=drop_path) if drop_path > 0. else nn.Identity()
	# print('drop path:', self.drop_path)

	def forward(self, x):
	out = self.prelkb_bn(x)
	out = self.pw1(out)
	out = self.large_kernel(out)
	out = self.lk_nonlinear(out)
	out = self.pw2(out)
	return x + self.drop_path(out)


	class RepLKNetStage(BaseModule):
	"""
	generate RepLKNet blocks for a stage
	return: RepLKNet blocks

	Args:
	channels (int): The input channels of the stage.
	num_blocks (int): The number of blocks of the stage.
	stage_lk_size (int): size of the super large kernel. Defaults: 31.
	drop_path (float): Stochastic depth rate. Defaults: 0.
	small_kernel (int): size of the parallel small kernel. Defaults: 5.
	dw_ratio (float): The intermediate channels
	expansion ratio of the block. Defaults: 1.
	ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
	with_cp (bool): Use checkpoint or not. Using checkpoint will save some
	memory while slowing down the training speed. Default to False.
	small_kernel_merged (bool): Whether to switch the model structure to
	deployment mode (merge the small kernel to the large kernel).
	Default to False.
	norm_intermediate_features (bool): Construct and config norm layer
	or not.
	Using True will normalize the intermediate features for
	downstream dense prediction tasks.
	norm_cfg (dict): dictionary to construct and config norm layer.
	Default to ``dict(type='BN', requires_grad=True)``.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	Default to None
	"""

	def __init__(
	self,
	channels,
	num_blocks,
	stage_lk_size,
	drop_path,
	small_kernel,
	dw_ratio=1,
	ffn_ratio=4,
	with_cp=False, # train with torch.utils.checkpoint to save memory
	small_kernel_merged=False,
	norm_intermediate_features=False,
	norm_cfg=dict(type='BN'),
	init_cfg=None):
	super(RepLKNetStage, self).__init__(init_cfg)
	self.with_cp = with_cp
	blks = []
	for i in range(num_blocks):
	block_drop_path = drop_path[i] if isinstance(drop_path,
	list) else drop_path
	# Assume all RepLK Blocks within a stage share the same lk_size.
	# You may tune it on your own model.
	replk_block = RepLKBlock(
	in_channels=channels,
	dw_channels=int(channels * dw_ratio),
	block_lk_size=stage_lk_size,
	small_kernel=small_kernel,
	drop_path=block_drop_path,
	small_kernel_merged=small_kernel_merged)
	convffn_block = ConvFFN(
	in_channels=channels,
	internal_channels=int(channels * ffn_ratio),
	out_channels=channels,
	drop_path=block_drop_path)
	blks.append(replk_block)
	blks.append(convffn_block)
	self.blocks = nn.ModuleList(blks)
	if norm_intermediate_features:
	self.norm = build_norm_layer(norm_cfg, channels)[1]
	else:
	self.norm = nn.Identity()

	def forward(self, x):
	for blk in self.blocks:
	if self.with_cp:
	x = checkpoint.checkpoint(blk, x) # Save training memory
	else:
	x = blk(x)
	return x


	@MODELS.register_module()
	class RepLKNet(BaseBackbone):
	"""RepLKNet backbone.

	A PyTorch impl of :
	`Scaling Up Your Kernels to 31x31: Revisiting Large Kernel Design in CNNs
	<https://arxiv.org/abs/2203.06717>`_

	Args:
	arch (str \| dict): The parameter of RepLKNet.
	If it's a dict, it should contain the following keys:

	- large_kernel_sizes (Sequence[int]):
	Large kernel size in each stage.
	- layers (Sequence[int]): Number of blocks in each stage.
	- channels (Sequence[int]): Number of channels in each stage.
	- small_kernel (int): size of the parallel small kernel.
	- dw_ratio (float): The intermediate channels
	expansion ratio of the block.
	in_channels (int): Number of input image channels. Default to 3.
	ffn_ratio (float): Mlp expansion ratio. Defaults to 4.
	out_indices (Sequence[int]): Output from which stages.
	Default to (3, ).
	strides (Sequence[int]): Strides of the first block of each stage.
	Default to (2, 2, 2, 2).
	dilations (Sequence[int]): Dilation of each stage.
	Default to (1, 1, 1, 1).
	frozen_stages (int): Stages to be frozen
	(all param fixed). -1 means not freezing any parameters.
	Default to -1.
	conv_cfg (dict \| None): The config dict for conv layers.
	Default to None.
	norm_cfg (dict): The config dict for norm layers.
	Default to ``dict(type='BN')``.
	act_cfg (dict): Config dict for activation layer.
	Default to ``dict(type='ReLU')``.
	with_cp (bool): Use checkpoint or not. Using checkpoint will save some
	memory while slowing down the training speed. Default to False.
	deploy (bool): Whether to switch the model structure to deployment
	mode. Default to False.
	norm_intermediate_features (bool): Construct and
	config norm layer or not.
	Using True will normalize the intermediate features
	for downstream dense prediction tasks.
	norm_eval (bool): Whether to set norm layers to eval mode, namely,
	freeze running stats (mean and var). Note: Effect on Batch Norm
	and its variants only. Default to False.
	init_cfg (dict or list[dict], optional): Initialization config dict.
	"""

	arch_settings = {
	'31B':
	dict(
	large_kernel_sizes=[31, 29, 27, 13],
	layers=[2, 2, 18, 2],
	channels=[128, 256, 512, 1024],
	small_kernel=5,
	dw_ratio=1),
	'31L':
	dict(
	large_kernel_sizes=[31, 29, 27, 13],
	layers=[2, 2, 18, 2],
	channels=[192, 384, 768, 1536],
	small_kernel=5,
	dw_ratio=1),
	'XL':
	dict(
	large_kernel_sizes=[27, 27, 27, 13],
	layers=[2, 2, 18, 2],
	channels=[256, 512, 1024, 2048],
	small_kernel=None,
	dw_ratio=1.5),
	}

	def __init__(self,
	arch,
	in_channels=3,
	ffn_ratio=4,
	out_indices=(3, ),
	strides=(2, 2, 2, 2),
	dilations=(1, 1, 1, 1),
	frozen_stages=-1,
	conv_cfg=None,
	norm_cfg=dict(type='BN'),
	act_cfg=dict(type='ReLU'),
	with_cp=False,
	drop_path_rate=0.3,
	small_kernel_merged=False,
	norm_intermediate_features=False,
	norm_eval=False,
	init_cfg=[
	dict(type='Kaiming', layer=['Conv2d']),
	dict(
	type='Constant',
	val=1,
	layer=['_BatchNorm', 'GroupNorm'])
	]):
	super(RepLKNet, self).__init__(init_cfg)

	if isinstance(arch, str):
	assert arch in self.arch_settings, \
	f'"arch": "{arch}" is not one of the arch_settings'
	arch = self.arch_settings[arch]
	elif not isinstance(arch, dict):
	raise TypeError('Expect "arch" to be either a string '
	f'or a dict, got {type(arch)}')

	assert len(arch['layers']) == len(
	arch['channels']) == len(strides) == len(dilations)
	assert max(out_indices) < len(arch['layers'])

	self.arch = arch
	self.in_channels = in_channels
	self.out_indices = out_indices
	self.strides = strides
	self.dilations = dilations
	self.frozen_stages = frozen_stages
	self.conv_cfg = conv_cfg
	self.norm_cfg = norm_cfg
	self.act_cfg = act_cfg
	self.with_cp = with_cp
	self.drop_path_rate = drop_path_rate
	self.small_kernel_merged = small_kernel_merged
	self.norm_eval = norm_eval
	self.norm_intermediate_features = norm_intermediate_features

	self.out_indices = out_indices

	base_width = self.arch['channels'][0]
	self.norm_intermediate_features = norm_intermediate_features
	self.num_stages = len(self.arch['layers'])
	self.stem = nn.ModuleList([
	conv_bn_relu(
	in_channels=in_channels,
	out_channels=base_width,
	kernel_size=3,
	stride=2,
	padding=1,
	groups=1),
	conv_bn_relu(
	in_channels=base_width,
	out_channels=base_width,
	kernel_size=3,
	stride=1,
	padding=1,
	groups=base_width),
	conv_bn_relu(
	in_channels=base_width,
	out_channels=base_width,
	kernel_size=1,
	stride=1,
	padding=0,
	groups=1),
	conv_bn_relu(
	in_channels=base_width,
	out_channels=base_width,
	kernel_size=3,
	stride=2,
	padding=1,
	groups=base_width)
	])
	# stochastic depth. We set block-wise drop-path rate.
	# The higher level blocks are more likely to be dropped.
	# This implementation follows Swin.
	dpr = [
	x.item() for x in torch.linspace(0, drop_path_rate,
	sum(self.arch['layers']))
	]
	self.stages = nn.ModuleList()
	self.transitions = nn.ModuleList()
	for stage_idx in range(self.num_stages):
	layer = RepLKNetStage(
	channels=self.arch['channels'][stage_idx],
	num_blocks=self.arch['layers'][stage_idx],
	stage_lk_size=self.arch['large_kernel_sizes'][stage_idx],
	drop_path=dpr[sum(self.arch['layers'][:stage_idx]
	):sum(self.arch['layers'][:stage_idx + 1])],
	small_kernel=self.arch['small_kernel'],
	dw_ratio=self.arch['dw_ratio'],
	ffn_ratio=ffn_ratio,
	with_cp=with_cp,
	small_kernel_merged=small_kernel_merged,
	norm_intermediate_features=(stage_idx in out_indices))
	self.stages.append(layer)
	if stage_idx < len(self.arch['layers']) - 1:
	transition = nn.Sequential(
	conv_bn_relu(
	self.arch['channels'][stage_idx],
	self.arch['channels'][stage_idx + 1],
	1,
	1,
	0,
	groups=1),
	conv_bn_relu(
	self.arch['channels'][stage_idx + 1],
	self.arch['channels'][stage_idx + 1],
	3,
	stride=2,
	padding=1,
	groups=self.arch['channels'][stage_idx + 1]))
	self.transitions.append(transition)

	def forward_features(self, x):
	x = self.stem[0](x)
	for stem_layer in self.stem[1:]:
	if self.with_cp:
	x = checkpoint.checkpoint(stem_layer, x) # save memory
	else:
	x = stem_layer(x)

	# Need the intermediate feature maps
	outs = []
	for stage_idx in range(self.num_stages):
	x = self.stages[stage_idx](x)
	if stage_idx in self.out_indices:
	outs.append(self.stages[stage_idx].norm(x))
	# For RepLKNet-XL normalize the features
	# before feeding them into the heads
	if stage_idx < self.num_stages - 1:
	x = self.transitions[stage_idx](x)
	return outs

	def forward(self, x):
	x = self.forward_features(x)
	return tuple(x)

	def _freeze_stages(self):
	if self.frozen_stages >= 0:
	self.stem.eval()
	for param in self.stem.parameters():
	param.requires_grad = False
	for i in range(self.frozen_stages):
	stage = self.stages[i]
	stage.eval()
	for param in stage.parameters():
	param.requires_grad = False

	def train(self, mode=True):
	super(RepLKNet, self).train(mode)
	self._freeze_stages()
	if mode and self.norm_eval:
	for m in self.modules():
	if isinstance(m, _BatchNorm):
	m.eval()

	def switch_to_deploy(self):
	for m in self.modules():
	if hasattr(m, 'merge_kernel'):
	m.merge_kernel()
	self.small_kernel_merged = True