Spaces:
Sleeping
Sleeping
Staticaliza
commited on
Upload 3 files
Browse files- modules/campplus/DTDNN.py +115 -0
- modules/campplus/classifier.py +70 -0
- modules/campplus/layers.py +253 -0
modules/campplus/DTDNN.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
|
2 |
+
# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
3 |
+
|
4 |
+
from collections import OrderedDict
|
5 |
+
|
6 |
+
import torch
|
7 |
+
from torch import nn
|
8 |
+
import torch.nn.functional as F
|
9 |
+
|
10 |
+
from modules.campplus.layers import DenseLayer, StatsPool, TDNNLayer, CAMDenseTDNNBlock, TransitLayer, BasicResBlock, get_nonlinear
|
11 |
+
|
12 |
+
|
13 |
+
class FCM(nn.Module):
|
14 |
+
def __init__(self,
|
15 |
+
block=BasicResBlock,
|
16 |
+
num_blocks=[2, 2],
|
17 |
+
m_channels=32,
|
18 |
+
feat_dim=80):
|
19 |
+
super(FCM, self).__init__()
|
20 |
+
self.in_planes = m_channels
|
21 |
+
self.conv1 = nn.Conv2d(1, m_channels, kernel_size=3, stride=1, padding=1, bias=False)
|
22 |
+
self.bn1 = nn.BatchNorm2d(m_channels)
|
23 |
+
|
24 |
+
self.layer1 = self._make_layer(block, m_channels, num_blocks[0], stride=2)
|
25 |
+
self.layer2 = self._make_layer(block, m_channels, num_blocks[1], stride=2)
|
26 |
+
|
27 |
+
self.conv2 = nn.Conv2d(m_channels, m_channels, kernel_size=3, stride=(2, 1), padding=1, bias=False)
|
28 |
+
self.bn2 = nn.BatchNorm2d(m_channels)
|
29 |
+
self.out_channels = m_channels * (feat_dim // 8)
|
30 |
+
|
31 |
+
def _make_layer(self, block, planes, num_blocks, stride):
|
32 |
+
strides = [stride] + [1] * (num_blocks - 1)
|
33 |
+
layers = []
|
34 |
+
for stride in strides:
|
35 |
+
layers.append(block(self.in_planes, planes, stride))
|
36 |
+
self.in_planes = planes * block.expansion
|
37 |
+
return nn.Sequential(*layers)
|
38 |
+
|
39 |
+
def forward(self, x):
|
40 |
+
x = x.unsqueeze(1)
|
41 |
+
out = F.relu(self.bn1(self.conv1(x)))
|
42 |
+
out = self.layer1(out)
|
43 |
+
out = self.layer2(out)
|
44 |
+
out = F.relu(self.bn2(self.conv2(out)))
|
45 |
+
|
46 |
+
shape = out.shape
|
47 |
+
out = out.reshape(shape[0], shape[1]*shape[2], shape[3])
|
48 |
+
return out
|
49 |
+
|
50 |
+
class CAMPPlus(nn.Module):
|
51 |
+
def __init__(self,
|
52 |
+
feat_dim=80,
|
53 |
+
embedding_size=512,
|
54 |
+
growth_rate=32,
|
55 |
+
bn_size=4,
|
56 |
+
init_channels=128,
|
57 |
+
config_str='batchnorm-relu',
|
58 |
+
memory_efficient=True):
|
59 |
+
super(CAMPPlus, self).__init__()
|
60 |
+
|
61 |
+
self.head = FCM(feat_dim=feat_dim)
|
62 |
+
channels = self.head.out_channels
|
63 |
+
|
64 |
+
self.xvector = nn.Sequential(
|
65 |
+
OrderedDict([
|
66 |
+
|
67 |
+
('tdnn',
|
68 |
+
TDNNLayer(channels,
|
69 |
+
init_channels,
|
70 |
+
5,
|
71 |
+
stride=2,
|
72 |
+
dilation=1,
|
73 |
+
padding=-1,
|
74 |
+
config_str=config_str)),
|
75 |
+
]))
|
76 |
+
channels = init_channels
|
77 |
+
for i, (num_layers, kernel_size,
|
78 |
+
dilation) in enumerate(zip((12, 24, 16), (3, 3, 3), (1, 2, 2))):
|
79 |
+
block = CAMDenseTDNNBlock(num_layers=num_layers,
|
80 |
+
in_channels=channels,
|
81 |
+
out_channels=growth_rate,
|
82 |
+
bn_channels=bn_size * growth_rate,
|
83 |
+
kernel_size=kernel_size,
|
84 |
+
dilation=dilation,
|
85 |
+
config_str=config_str,
|
86 |
+
memory_efficient=memory_efficient)
|
87 |
+
self.xvector.add_module('block%d' % (i + 1), block)
|
88 |
+
channels = channels + num_layers * growth_rate
|
89 |
+
self.xvector.add_module(
|
90 |
+
'transit%d' % (i + 1),
|
91 |
+
TransitLayer(channels,
|
92 |
+
channels // 2,
|
93 |
+
bias=False,
|
94 |
+
config_str=config_str))
|
95 |
+
channels //= 2
|
96 |
+
|
97 |
+
self.xvector.add_module(
|
98 |
+
'out_nonlinear', get_nonlinear(config_str, channels))
|
99 |
+
|
100 |
+
self.xvector.add_module('stats', StatsPool())
|
101 |
+
self.xvector.add_module(
|
102 |
+
'dense',
|
103 |
+
DenseLayer(channels * 2, embedding_size, config_str='batchnorm_'))
|
104 |
+
|
105 |
+
for m in self.modules():
|
106 |
+
if isinstance(m, (nn.Conv1d, nn.Linear)):
|
107 |
+
nn.init.kaiming_normal_(m.weight.data)
|
108 |
+
if m.bias is not None:
|
109 |
+
nn.init.zeros_(m.bias)
|
110 |
+
|
111 |
+
def forward(self, x):
|
112 |
+
x = x.permute(0, 2, 1) # (B,T,F) => (B,F,T)
|
113 |
+
x = self.head(x)
|
114 |
+
x = self.xvector(x)
|
115 |
+
return x
|
modules/campplus/classifier.py
ADDED
@@ -0,0 +1,70 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
|
2 |
+
# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn as nn
|
6 |
+
import torch.nn.functional as F
|
7 |
+
|
8 |
+
from modules.campplus.layers import DenseLayer
|
9 |
+
|
10 |
+
|
11 |
+
class CosineClassifier(nn.Module):
|
12 |
+
def __init__(
|
13 |
+
self,
|
14 |
+
input_dim,
|
15 |
+
num_blocks=0,
|
16 |
+
inter_dim=512,
|
17 |
+
out_neurons=1000,
|
18 |
+
):
|
19 |
+
|
20 |
+
super().__init__()
|
21 |
+
self.blocks = nn.ModuleList()
|
22 |
+
|
23 |
+
for index in range(num_blocks):
|
24 |
+
self.blocks.append(
|
25 |
+
DenseLayer(input_dim, inter_dim, config_str='batchnorm')
|
26 |
+
)
|
27 |
+
input_dim = inter_dim
|
28 |
+
|
29 |
+
self.weight = nn.Parameter(
|
30 |
+
torch.FloatTensor(out_neurons, input_dim)
|
31 |
+
)
|
32 |
+
nn.init.xavier_uniform_(self.weight)
|
33 |
+
|
34 |
+
def forward(self, x):
|
35 |
+
# x: [B, dim]
|
36 |
+
for layer in self.blocks:
|
37 |
+
x = layer(x)
|
38 |
+
|
39 |
+
# normalized
|
40 |
+
x = F.linear(F.normalize(x), F.normalize(self.weight))
|
41 |
+
return x
|
42 |
+
|
43 |
+
class LinearClassifier(nn.Module):
|
44 |
+
def __init__(
|
45 |
+
self,
|
46 |
+
input_dim,
|
47 |
+
num_blocks=0,
|
48 |
+
inter_dim=512,
|
49 |
+
out_neurons=1000,
|
50 |
+
):
|
51 |
+
|
52 |
+
super().__init__()
|
53 |
+
self.blocks = nn.ModuleList()
|
54 |
+
|
55 |
+
self.nonlinear = nn.ReLU(inplace=True)
|
56 |
+
for index in range(num_blocks):
|
57 |
+
self.blocks.append(
|
58 |
+
DenseLayer(input_dim, inter_dim, bias=True)
|
59 |
+
)
|
60 |
+
input_dim = inter_dim
|
61 |
+
|
62 |
+
self.linear = nn.Linear(input_dim, out_neurons, bias=True)
|
63 |
+
|
64 |
+
def forward(self, x):
|
65 |
+
# x: [B, dim]
|
66 |
+
x = self.nonlinear(x)
|
67 |
+
for layer in self.blocks:
|
68 |
+
x = layer(x)
|
69 |
+
x = self.linear(x)
|
70 |
+
return x
|
modules/campplus/layers.py
ADDED
@@ -0,0 +1,253 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Copyright 3D-Speaker (https://github.com/alibaba-damo-academy/3D-Speaker). All Rights Reserved.
|
2 |
+
# Licensed under the Apache License, Version 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
|
3 |
+
|
4 |
+
import torch
|
5 |
+
import torch.nn.functional as F
|
6 |
+
import torch.utils.checkpoint as cp
|
7 |
+
from torch import nn
|
8 |
+
|
9 |
+
|
10 |
+
def get_nonlinear(config_str, channels):
|
11 |
+
nonlinear = nn.Sequential()
|
12 |
+
for name in config_str.split('-'):
|
13 |
+
if name == 'relu':
|
14 |
+
nonlinear.add_module('relu', nn.ReLU(inplace=True))
|
15 |
+
elif name == 'prelu':
|
16 |
+
nonlinear.add_module('prelu', nn.PReLU(channels))
|
17 |
+
elif name == 'batchnorm':
|
18 |
+
nonlinear.add_module('batchnorm', nn.BatchNorm1d(channels))
|
19 |
+
elif name == 'batchnorm_':
|
20 |
+
nonlinear.add_module('batchnorm',
|
21 |
+
nn.BatchNorm1d(channels, affine=False))
|
22 |
+
else:
|
23 |
+
raise ValueError('Unexpected module ({}).'.format(name))
|
24 |
+
return nonlinear
|
25 |
+
|
26 |
+
def statistics_pooling(x, dim=-1, keepdim=False, unbiased=True, eps=1e-2):
|
27 |
+
mean = x.mean(dim=dim)
|
28 |
+
std = x.std(dim=dim, unbiased=unbiased)
|
29 |
+
stats = torch.cat([mean, std], dim=-1)
|
30 |
+
if keepdim:
|
31 |
+
stats = stats.unsqueeze(dim=dim)
|
32 |
+
return stats
|
33 |
+
|
34 |
+
|
35 |
+
class StatsPool(nn.Module):
|
36 |
+
def forward(self, x):
|
37 |
+
return statistics_pooling(x)
|
38 |
+
|
39 |
+
|
40 |
+
class TDNNLayer(nn.Module):
|
41 |
+
def __init__(self,
|
42 |
+
in_channels,
|
43 |
+
out_channels,
|
44 |
+
kernel_size,
|
45 |
+
stride=1,
|
46 |
+
padding=0,
|
47 |
+
dilation=1,
|
48 |
+
bias=False,
|
49 |
+
config_str='batchnorm-relu'):
|
50 |
+
super(TDNNLayer, self).__init__()
|
51 |
+
if padding < 0:
|
52 |
+
assert kernel_size % 2 == 1, 'Expect equal paddings, but got even kernel size ({})'.format(
|
53 |
+
kernel_size)
|
54 |
+
padding = (kernel_size - 1) // 2 * dilation
|
55 |
+
self.linear = nn.Conv1d(in_channels,
|
56 |
+
out_channels,
|
57 |
+
kernel_size,
|
58 |
+
stride=stride,
|
59 |
+
padding=padding,
|
60 |
+
dilation=dilation,
|
61 |
+
bias=bias)
|
62 |
+
self.nonlinear = get_nonlinear(config_str, out_channels)
|
63 |
+
|
64 |
+
def forward(self, x):
|
65 |
+
x = self.linear(x)
|
66 |
+
x = self.nonlinear(x)
|
67 |
+
return x
|
68 |
+
|
69 |
+
|
70 |
+
class CAMLayer(nn.Module):
|
71 |
+
def __init__(self,
|
72 |
+
bn_channels,
|
73 |
+
out_channels,
|
74 |
+
kernel_size,
|
75 |
+
stride,
|
76 |
+
padding,
|
77 |
+
dilation,
|
78 |
+
bias,
|
79 |
+
reduction=2):
|
80 |
+
super(CAMLayer, self).__init__()
|
81 |
+
self.linear_local = nn.Conv1d(bn_channels,
|
82 |
+
out_channels,
|
83 |
+
kernel_size,
|
84 |
+
stride=stride,
|
85 |
+
padding=padding,
|
86 |
+
dilation=dilation,
|
87 |
+
bias=bias)
|
88 |
+
self.linear1 = nn.Conv1d(bn_channels, bn_channels // reduction, 1)
|
89 |
+
self.relu = nn.ReLU(inplace=True)
|
90 |
+
self.linear2 = nn.Conv1d(bn_channels // reduction, out_channels, 1)
|
91 |
+
self.sigmoid = nn.Sigmoid()
|
92 |
+
|
93 |
+
def forward(self, x):
|
94 |
+
y = self.linear_local(x)
|
95 |
+
context = x.mean(-1, keepdim=True)+self.seg_pooling(x)
|
96 |
+
context = self.relu(self.linear1(context))
|
97 |
+
m = self.sigmoid(self.linear2(context))
|
98 |
+
return y*m
|
99 |
+
|
100 |
+
def seg_pooling(self, x, seg_len=100, stype='avg'):
|
101 |
+
if stype == 'avg':
|
102 |
+
seg = F.avg_pool1d(x, kernel_size=seg_len, stride=seg_len, ceil_mode=True)
|
103 |
+
elif stype == 'max':
|
104 |
+
seg = F.max_pool1d(x, kernel_size=seg_len, stride=seg_len, ceil_mode=True)
|
105 |
+
else:
|
106 |
+
raise ValueError('Wrong segment pooling type.')
|
107 |
+
shape = seg.shape
|
108 |
+
seg = seg.unsqueeze(-1).expand(*shape, seg_len).reshape(*shape[:-1], -1)
|
109 |
+
seg = seg[..., :x.shape[-1]]
|
110 |
+
return seg
|
111 |
+
|
112 |
+
|
113 |
+
class CAMDenseTDNNLayer(nn.Module):
|
114 |
+
def __init__(self,
|
115 |
+
in_channels,
|
116 |
+
out_channels,
|
117 |
+
bn_channels,
|
118 |
+
kernel_size,
|
119 |
+
stride=1,
|
120 |
+
dilation=1,
|
121 |
+
bias=False,
|
122 |
+
config_str='batchnorm-relu',
|
123 |
+
memory_efficient=False):
|
124 |
+
super(CAMDenseTDNNLayer, self).__init__()
|
125 |
+
assert kernel_size % 2 == 1, 'Expect equal paddings, but got even kernel size ({})'.format(
|
126 |
+
kernel_size)
|
127 |
+
padding = (kernel_size - 1) // 2 * dilation
|
128 |
+
self.memory_efficient = memory_efficient
|
129 |
+
self.nonlinear1 = get_nonlinear(config_str, in_channels)
|
130 |
+
self.linear1 = nn.Conv1d(in_channels, bn_channels, 1, bias=False)
|
131 |
+
self.nonlinear2 = get_nonlinear(config_str, bn_channels)
|
132 |
+
self.cam_layer = CAMLayer(bn_channels,
|
133 |
+
out_channels,
|
134 |
+
kernel_size,
|
135 |
+
stride=stride,
|
136 |
+
padding=padding,
|
137 |
+
dilation=dilation,
|
138 |
+
bias=bias)
|
139 |
+
|
140 |
+
def bn_function(self, x):
|
141 |
+
return self.linear1(self.nonlinear1(x))
|
142 |
+
|
143 |
+
def forward(self, x):
|
144 |
+
if self.training and self.memory_efficient:
|
145 |
+
x = cp.checkpoint(self.bn_function, x)
|
146 |
+
else:
|
147 |
+
x = self.bn_function(x)
|
148 |
+
x = self.cam_layer(self.nonlinear2(x))
|
149 |
+
return x
|
150 |
+
|
151 |
+
|
152 |
+
class CAMDenseTDNNBlock(nn.ModuleList):
|
153 |
+
def __init__(self,
|
154 |
+
num_layers,
|
155 |
+
in_channels,
|
156 |
+
out_channels,
|
157 |
+
bn_channels,
|
158 |
+
kernel_size,
|
159 |
+
stride=1,
|
160 |
+
dilation=1,
|
161 |
+
bias=False,
|
162 |
+
config_str='batchnorm-relu',
|
163 |
+
memory_efficient=False):
|
164 |
+
super(CAMDenseTDNNBlock, self).__init__()
|
165 |
+
for i in range(num_layers):
|
166 |
+
layer = CAMDenseTDNNLayer(in_channels=in_channels + i * out_channels,
|
167 |
+
out_channels=out_channels,
|
168 |
+
bn_channels=bn_channels,
|
169 |
+
kernel_size=kernel_size,
|
170 |
+
stride=stride,
|
171 |
+
dilation=dilation,
|
172 |
+
bias=bias,
|
173 |
+
config_str=config_str,
|
174 |
+
memory_efficient=memory_efficient)
|
175 |
+
self.add_module('tdnnd%d' % (i + 1), layer)
|
176 |
+
|
177 |
+
def forward(self, x):
|
178 |
+
for layer in self:
|
179 |
+
x = torch.cat([x, layer(x)], dim=1)
|
180 |
+
return x
|
181 |
+
|
182 |
+
|
183 |
+
class TransitLayer(nn.Module):
|
184 |
+
def __init__(self,
|
185 |
+
in_channels,
|
186 |
+
out_channels,
|
187 |
+
bias=True,
|
188 |
+
config_str='batchnorm-relu'):
|
189 |
+
super(TransitLayer, self).__init__()
|
190 |
+
self.nonlinear = get_nonlinear(config_str, in_channels)
|
191 |
+
self.linear = nn.Conv1d(in_channels, out_channels, 1, bias=bias)
|
192 |
+
|
193 |
+
def forward(self, x):
|
194 |
+
x = self.nonlinear(x)
|
195 |
+
x = self.linear(x)
|
196 |
+
return x
|
197 |
+
|
198 |
+
|
199 |
+
class DenseLayer(nn.Module):
|
200 |
+
def __init__(self,
|
201 |
+
in_channels,
|
202 |
+
out_channels,
|
203 |
+
bias=False,
|
204 |
+
config_str='batchnorm-relu'):
|
205 |
+
super(DenseLayer, self).__init__()
|
206 |
+
self.linear = nn.Conv1d(in_channels, out_channels, 1, bias=bias)
|
207 |
+
self.nonlinear = get_nonlinear(config_str, out_channels)
|
208 |
+
|
209 |
+
def forward(self, x):
|
210 |
+
if len(x.shape) == 2:
|
211 |
+
x = self.linear(x.unsqueeze(dim=-1)).squeeze(dim=-1)
|
212 |
+
else:
|
213 |
+
x = self.linear(x)
|
214 |
+
x = self.nonlinear(x)
|
215 |
+
return x
|
216 |
+
|
217 |
+
|
218 |
+
class BasicResBlock(nn.Module):
|
219 |
+
expansion = 1
|
220 |
+
|
221 |
+
def __init__(self, in_planes, planes, stride=1):
|
222 |
+
super(BasicResBlock, self).__init__()
|
223 |
+
self.conv1 = nn.Conv2d(in_planes,
|
224 |
+
planes,
|
225 |
+
kernel_size=3,
|
226 |
+
stride=(stride, 1),
|
227 |
+
padding=1,
|
228 |
+
bias=False)
|
229 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
230 |
+
self.conv2 = nn.Conv2d(planes,
|
231 |
+
planes,
|
232 |
+
kernel_size=3,
|
233 |
+
stride=1,
|
234 |
+
padding=1,
|
235 |
+
bias=False)
|
236 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
237 |
+
|
238 |
+
self.shortcut = nn.Sequential()
|
239 |
+
if stride != 1 or in_planes != self.expansion * planes:
|
240 |
+
self.shortcut = nn.Sequential(
|
241 |
+
nn.Conv2d(in_planes,
|
242 |
+
self.expansion * planes,
|
243 |
+
kernel_size=1,
|
244 |
+
stride=(stride, 1),
|
245 |
+
bias=False),
|
246 |
+
nn.BatchNorm2d(self.expansion * planes))
|
247 |
+
|
248 |
+
def forward(self, x):
|
249 |
+
out = F.relu(self.bn1(self.conv1(x)))
|
250 |
+
out = self.bn2(self.conv2(out))
|
251 |
+
out += self.shortcut(x)
|
252 |
+
out = F.relu(out)
|
253 |
+
return out
|