KingNish commited on
Commit
5af1f06
·
verified ·
1 Parent(s): b3b0dda

Upload ./RepCodec/repcodec/modules/decoder.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. RepCodec/repcodec/modules/decoder.py +109 -0
RepCodec/repcodec/modules/decoder.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) ByteDance, Inc. and its affiliates.
2
+ # Copyright (c) Chutong Meng
3
+ #
4
+ # This source code is licensed under the CC BY-NC license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ # Based on AudioDec (https://github.com/facebookresearch/AudioDec)
7
+
8
+ import torch
9
+ import torch.nn as nn
10
+
11
+ from RepCodec.repcodec.layers.conv_layer import Conv1d, ConvTranspose1d
12
+ from RepCodec.repcodec.modules.residual_unit import ResidualUnit
13
+
14
+
15
+ class DecoderBlock(nn.Module):
16
+ """ Decoder block (no up-sampling) """
17
+
18
+ def __init__(
19
+ self,
20
+ in_channels: int,
21
+ out_channels: int,
22
+ stride: int,
23
+ dilations=(1, 1),
24
+ unit_kernel_size=3,
25
+ bias=True
26
+ ):
27
+ super().__init__()
28
+
29
+ if stride == 1:
30
+ self.conv = Conv1d(
31
+ in_channels=in_channels,
32
+ out_channels=out_channels,
33
+ kernel_size=3, # fix kernel=3 when stride=1 for unchanged shape
34
+ stride=stride,
35
+ bias=bias,
36
+ )
37
+ else:
38
+ self.conv = ConvTranspose1d(
39
+ in_channels=in_channels,
40
+ out_channels=out_channels,
41
+ kernel_size=(2 * stride),
42
+ stride=stride,
43
+ bias=bias,
44
+ )
45
+
46
+ self.res_units = torch.nn.ModuleList()
47
+ for idx, dilation in enumerate(dilations):
48
+ self.res_units += [
49
+ ResidualUnit(out_channels, out_channels,
50
+ kernel_size=unit_kernel_size,
51
+ dilation=dilation)
52
+ ]
53
+ self.num_res = len(self.res_units)
54
+
55
+ def forward(self, x):
56
+ x = self.conv(x)
57
+ for idx in range(self.num_res):
58
+ x = self.res_units[idx](x)
59
+ return x
60
+
61
+
62
+ class Decoder(nn.Module):
63
+ def __init__(
64
+ self,
65
+ code_dim: int,
66
+ output_channels: int,
67
+ decode_channels: int,
68
+ channel_ratios=(1, 1),
69
+ strides=(1, 1),
70
+ kernel_size=3,
71
+ bias=True,
72
+ block_dilations=(1, 1),
73
+ unit_kernel_size=3,
74
+ ):
75
+ super().__init__()
76
+ assert len(channel_ratios) == len(strides)
77
+
78
+ self.conv1 = Conv1d(
79
+ in_channels=code_dim,
80
+ out_channels=int(decode_channels * channel_ratios[0]),
81
+ kernel_size=kernel_size,
82
+ stride=1,
83
+ bias=False
84
+ )
85
+
86
+ self.conv_blocks = torch.nn.ModuleList()
87
+ for idx, stride in enumerate(strides):
88
+ in_channels = int(decode_channels * channel_ratios[idx])
89
+ if idx < (len(channel_ratios) - 1):
90
+ out_channels = int(decode_channels * channel_ratios[idx + 1])
91
+ else:
92
+ out_channels = decode_channels
93
+ self.conv_blocks += [
94
+ DecoderBlock(
95
+ in_channels, out_channels, stride,
96
+ dilations=block_dilations, unit_kernel_size=unit_kernel_size,
97
+ bias=bias
98
+ )
99
+ ]
100
+ self.num_blocks = len(self.conv_blocks)
101
+
102
+ self.conv2 = Conv1d(out_channels, output_channels, kernel_size, 1, bias=False)
103
+
104
+ def forward(self, z):
105
+ x = self.conv1(z)
106
+ for i in range(self.num_blocks):
107
+ x = self.conv_blocks[i](x)
108
+ x = self.conv2(x)
109
+ return x