Spaces:
Sleeping
Sleeping
File size: 4,001 Bytes
749745d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import torch
import torch.nn as nn
import torch.nn.functional as F
def _make_divisible(v, divisor, min_value=None):
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
# Make sure that round down does not go down by more than 10%.
if new_v < 0.9 * v:
new_v += divisor
return new_v
class swish(nn.Module):
def forward(self, x):
return x * torch.sigmoid(x)
class h_swish(nn.Module):
def __init__(self, inplace=False):
super(h_swish, self).__init__()
self.inplace = inplace
def forward(self, x):
return x * F.relu6(x + 3.0, inplace=self.inplace) / 6.0
class h_sigmoid(nn.Module):
def __init__(self, inplace=True, h_max=1):
super(h_sigmoid, self).__init__()
self.relu = nn.ReLU6(inplace=inplace)
self.h_max = h_max
def forward(self, x):
return self.relu(x + 3) * self.h_max / 6
class DYReLU(nn.Module):
def __init__(
self,
inp,
oup,
reduction=4,
lambda_a=1.0,
K2=True,
use_bias=True,
use_spatial=False,
init_a=[1.0, 0.0],
init_b=[0.0, 0.0],
):
super(DYReLU, self).__init__()
self.oup = oup
self.lambda_a = lambda_a * 2
self.K2 = K2
self.avg_pool = nn.AdaptiveAvgPool2d(1)
self.use_bias = use_bias
if K2:
self.exp = 4 if use_bias else 2
else:
self.exp = 2 if use_bias else 1
self.init_a = init_a
self.init_b = init_b
# determine squeeze
if reduction == 4:
squeeze = inp // reduction
else:
squeeze = _make_divisible(inp // reduction, 4)
# print('reduction: {}, squeeze: {}/{}'.format(reduction, inp, squeeze))
# print('init_a: {}, init_b: {}'.format(self.init_a, self.init_b))
self.fc = nn.Sequential(
nn.Linear(inp, squeeze), nn.ReLU(inplace=True), nn.Linear(squeeze, oup * self.exp), h_sigmoid()
)
if use_spatial:
self.spa = nn.Sequential(
nn.Conv2d(inp, 1, kernel_size=1),
nn.BatchNorm2d(1),
)
else:
self.spa = None
def forward(self, x):
if isinstance(x, list):
x_in = x[0]
x_out = x[1]
else:
x_in = x
x_out = x
b, c, h, w = x_in.size()
y = self.avg_pool(x_in).view(b, c)
y = self.fc(y).view(b, self.oup * self.exp, 1, 1)
if self.exp == 4:
a1, b1, a2, b2 = torch.split(y, self.oup, dim=1)
a1 = (a1 - 0.5) * self.lambda_a + self.init_a[0] # 1.0
a2 = (a2 - 0.5) * self.lambda_a + self.init_a[1]
b1 = b1 - 0.5 + self.init_b[0]
b2 = b2 - 0.5 + self.init_b[1]
out = torch.max(x_out * a1 + b1, x_out * a2 + b2)
elif self.exp == 2:
if self.use_bias: # bias but not PL
a1, b1 = torch.split(y, self.oup, dim=1)
a1 = (a1 - 0.5) * self.lambda_a + self.init_a[0] # 1.0
b1 = b1 - 0.5 + self.init_b[0]
out = x_out * a1 + b1
else:
a1, a2 = torch.split(y, self.oup, dim=1)
a1 = (a1 - 0.5) * self.lambda_a + self.init_a[0] # 1.0
a2 = (a2 - 0.5) * self.lambda_a + self.init_a[1]
out = torch.max(x_out * a1, x_out * a2)
elif self.exp == 1:
a1 = y
a1 = (a1 - 0.5) * self.lambda_a + self.init_a[0] # 1.0
out = x_out * a1
if self.spa:
ys = self.spa(x_in).view(b, -1)
ys = F.softmax(ys, dim=1).view(b, 1, h, w) * h * w
ys = F.hardtanh(ys, 0, 3, inplace=True) / 3
out = out * ys
return out
|