Spaces:

innova-ai
/

YuE-music-generator-demo

Running on Zero

App Files Files Community

KingNish commited on Jan 28

Commit

5af1f06

verified ·

1 Parent(s): b3b0dda

Upload ./RepCodec/repcodec/modules/decoder.py with huggingface_hub

Browse files

Files changed (1) hide show

RepCodec/repcodec/modules/decoder.py +109 -0

RepCodec/repcodec/modules/decoder.py ADDED Viewed

	@@ -0,0 +1,109 @@

+# Copyright (c) ByteDance, Inc. and its affiliates.
+# Copyright (c) Chutong Meng
+#
+# This source code is licensed under the CC BY-NC license found in the
+# LICENSE file in the root directory of this source tree.
+# Based on AudioDec (https://github.com/facebookresearch/AudioDec)
+import torch
+import torch.nn as nn
+from RepCodec.repcodec.layers.conv_layer import Conv1d, ConvTranspose1d
+from RepCodec.repcodec.modules.residual_unit import ResidualUnit
+class DecoderBlock(nn.Module):
+    """ Decoder block (no up-sampling) """
+    def __init__(
+            self,
+            in_channels: int,
+            out_channels: int,
+            stride: int,
+            dilations=(1, 1),
+            unit_kernel_size=3,
+            bias=True
+    ):
+        super().__init__()
+        if stride == 1:
+            self.conv = Conv1d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=3,  # fix kernel=3 when stride=1 for unchanged shape
+                stride=stride,
+                bias=bias,
+            )
+        else:
+            self.conv = ConvTranspose1d(
+                in_channels=in_channels,
+                out_channels=out_channels,
+                kernel_size=(2 * stride),
+                stride=stride,
+                bias=bias,
+            )
+        self.res_units = torch.nn.ModuleList()
+        for idx, dilation in enumerate(dilations):
+            self.res_units += [
+                ResidualUnit(out_channels, out_channels,
+                             kernel_size=unit_kernel_size,
+                             dilation=dilation)
+            ]
+        self.num_res = len(self.res_units)
+    def forward(self, x):
+        x = self.conv(x)
+        for idx in range(self.num_res):
+            x = self.res_units[idx](x)
+        return x
+class Decoder(nn.Module):
+    def __init__(
+            self,
+            code_dim: int,
+            output_channels: int,
+            decode_channels: int,
+            channel_ratios=(1, 1),
+            strides=(1, 1),
+            kernel_size=3,
+            bias=True,
+            block_dilations=(1, 1),
+            unit_kernel_size=3,
+    ):
+        super().__init__()
+        assert len(channel_ratios) == len(strides)
+        self.conv1 = Conv1d(
+            in_channels=code_dim,
+            out_channels=int(decode_channels * channel_ratios[0]),
+            kernel_size=kernel_size,
+            stride=1,
+            bias=False
+        )
+        self.conv_blocks = torch.nn.ModuleList()
+        for idx, stride in enumerate(strides):
+            in_channels = int(decode_channels * channel_ratios[idx])
+            if idx < (len(channel_ratios) - 1):
+                out_channels = int(decode_channels * channel_ratios[idx + 1])
+            else:
+                out_channels = decode_channels
+            self.conv_blocks += [
+                DecoderBlock(
+                    in_channels, out_channels, stride,
+                    dilations=block_dilations, unit_kernel_size=unit_kernel_size,
+                    bias=bias
+                )
+            ]
+        self.num_blocks = len(self.conv_blocks)
+        self.conv2 = Conv1d(out_channels, output_channels, kernel_size, 1, bias=False)
+    def forward(self, z):
+        x = self.conv1(z)
+        for i in range(self.num_blocks):
+            x = self.conv_blocks[i](x)
+        x = self.conv2(x)
+        return x