EG

Running

EG / upscaler /RealESRGAN /arch_utils.py

Harisreedhar

update

71c9afb over 1 year ago

7.14 kB

	import math
	import torch
	from torch import nn as nn
	from torch.nn import functional as F
	from torch.nn import init as init
	from torch.nn.modules.batchnorm import _BatchNorm

	@torch.no_grad()
	def default_init_weights(module_list, scale=1, bias_fill=0, **kwargs):
	"""Initialize network weights.

	Args:
	module_list (list[nn.Module] \| nn.Module): Modules to be initialized.
	scale (float): Scale initialized weights, especially for residual
	blocks. Default: 1.
	bias_fill (float): The value to fill bias. Default: 0
	kwargs (dict): Other arguments for initialization function.
	"""
	if not isinstance(module_list, list):
	module_list = [module_list]
	for module in module_list:
	for m in module.modules():
	if isinstance(m, nn.Conv2d):
	init.kaiming_normal_(m.weight, **kwargs)
	m.weight.data *= scale
	if m.bias is not None:
	m.bias.data.fill_(bias_fill)
	elif isinstance(m, nn.Linear):
	init.kaiming_normal_(m.weight, **kwargs)
	m.weight.data *= scale
	if m.bias is not None:
	m.bias.data.fill_(bias_fill)
	elif isinstance(m, _BatchNorm):
	init.constant_(m.weight, 1)
	if m.bias is not None:
	m.bias.data.fill_(bias_fill)


	def make_layer(basic_block, num_basic_block, **kwarg):
	"""Make layers by stacking the same blocks.

	Args:
	basic_block (nn.module): nn.module class for basic block.
	num_basic_block (int): number of blocks.

	Returns:
	nn.Sequential: Stacked blocks in nn.Sequential.
	"""
	layers = []
	for _ in range(num_basic_block):
	layers.append(basic_block(**kwarg))
	return nn.Sequential(*layers)


	class ResidualBlockNoBN(nn.Module):
	"""Residual block without BN.

	It has a style of:
	---Conv-ReLU-Conv-+-
	\|________________\|

	Args:
	num_feat (int): Channel number of intermediate features.
	Default: 64.
	res_scale (float): Residual scale. Default: 1.
	pytorch_init (bool): If set to True, use pytorch default init,
	otherwise, use default_init_weights. Default: False.
	"""

	def __init__(self, num_feat=64, res_scale=1, pytorch_init=False):
	super(ResidualBlockNoBN, self).__init__()
	self.res_scale = res_scale
	self.conv1 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True)
	self.conv2 = nn.Conv2d(num_feat, num_feat, 3, 1, 1, bias=True)
	self.relu = nn.ReLU(inplace=True)

	if not pytorch_init:
	default_init_weights([self.conv1, self.conv2], 0.1)

	def forward(self, x):
	identity = x
	out = self.conv2(self.relu(self.conv1(x)))
	return identity + out * self.res_scale


	class Upsample(nn.Sequential):
	"""Upsample module.

	Args:
	scale (int): Scale factor. Supported scales: 2^n and 3.
	num_feat (int): Channel number of intermediate features.
	"""

	def __init__(self, scale, num_feat):
	m = []
	if (scale & (scale - 1)) == 0: # scale = 2^n
	for _ in range(int(math.log(scale, 2))):
	m.append(nn.Conv2d(num_feat, 4 * num_feat, 3, 1, 1))
	m.append(nn.PixelShuffle(2))
	elif scale == 3:
	m.append(nn.Conv2d(num_feat, 9 * num_feat, 3, 1, 1))
	m.append(nn.PixelShuffle(3))
	else:
	raise ValueError(f'scale {scale} is not supported. ' 'Supported scales: 2^n and 3.')
	super(Upsample, self).__init__(*m)


	def flow_warp(x, flow, interp_mode='bilinear', padding_mode='zeros', align_corners=True):
	"""Warp an image or feature map with optical flow.

	Args:
	x (Tensor): Tensor with size (n, c, h, w).
	flow (Tensor): Tensor with size (n, h, w, 2), normal value.
	interp_mode (str): 'nearest' or 'bilinear'. Default: 'bilinear'.
	padding_mode (str): 'zeros' or 'border' or 'reflection'.
	Default: 'zeros'.
	align_corners (bool): Before pytorch 1.3, the default value is
	align_corners=True. After pytorch 1.3, the default value is
	align_corners=False. Here, we use the True as default.

	Returns:
	Tensor: Warped image or feature map.
	"""
	assert x.size()[-2:] == flow.size()[1:3]
	_, _, h, w = x.size()
	# create mesh grid
	grid_y, grid_x = torch.meshgrid(torch.arange(0, h).type_as(x), torch.arange(0, w).type_as(x))
	grid = torch.stack((grid_x, grid_y), 2).float() # W(x), H(y), 2
	grid.requires_grad = False

	vgrid = grid + flow
	# scale grid to [-1,1]
	vgrid_x = 2.0 * vgrid[:, :, :, 0] / max(w - 1, 1) - 1.0
	vgrid_y = 2.0 * vgrid[:, :, :, 1] / max(h - 1, 1) - 1.0
	vgrid_scaled = torch.stack((vgrid_x, vgrid_y), dim=3)
	output = F.grid_sample(x, vgrid_scaled, mode=interp_mode, padding_mode=padding_mode, align_corners=align_corners)

	# TODO, what if align_corners=False
	return output


	def resize_flow(flow, size_type, sizes, interp_mode='bilinear', align_corners=False):
	"""Resize a flow according to ratio or shape.

	Args:
	flow (Tensor): Precomputed flow. shape [N, 2, H, W].
	size_type (str): 'ratio' or 'shape'.
	sizes (list[int \| float]): the ratio for resizing or the final output
	shape.
	1) The order of ratio should be [ratio_h, ratio_w]. For
	downsampling, the ratio should be smaller than 1.0 (i.e., ratio
	< 1.0). For upsampling, the ratio should be larger than 1.0 (i.e.,
	ratio > 1.0).
	2) The order of output_size should be [out_h, out_w].
	interp_mode (str): The mode of interpolation for resizing.
	Default: 'bilinear'.
	align_corners (bool): Whether align corners. Default: False.

	Returns:
	Tensor: Resized flow.
	"""
	_, _, flow_h, flow_w = flow.size()
	if size_type == 'ratio':
	output_h, output_w = int(flow_h * sizes[0]), int(flow_w * sizes[1])
	elif size_type == 'shape':
	output_h, output_w = sizes[0], sizes[1]
	else:
	raise ValueError(f'Size type should be ratio or shape, but got type {size_type}.')

	input_flow = flow.clone()
	ratio_h = output_h / flow_h
	ratio_w = output_w / flow_w
	input_flow[:, 0, :, :] *= ratio_w
	input_flow[:, 1, :, :] *= ratio_h
	resized_flow = F.interpolate(
	input=input_flow, size=(output_h, output_w), mode=interp_mode, align_corners=align_corners)
	return resized_flow


	# TODO: may write a cpp file
	def pixel_unshuffle(x, scale):
	""" Pixel unshuffle.

	Args:
	x (Tensor): Input feature with shape (b, c, hh, hw).
	scale (int): Downsample ratio.

	Returns:
	Tensor: the pixel unshuffled feature.
	"""
	b, c, hh, hw = x.size()
	out_channel = c * (scale**2)
	assert hh % scale == 0 and hw % scale == 0
	h = hh // scale
	w = hw // scale
	x_view = x.view(b, c, h, scale, w, scale)
	return x_view.permute(0, 1, 3, 5, 2, 4).reshape(b, out_channel, h, w)