chore: upload model & code
Browse files- config.json +48 -0
- configuration_hifigan.py +22 -0
- modeling_hifigan.py +184 -0
- pytorch_model.bin +3 -0
config.json
ADDED
@@ -0,0 +1,48 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"HiFiGAN"
|
4 |
+
],
|
5 |
+
"auto_map": {
|
6 |
+
"AutoConfig": "configuration_hifigan.HiFiGANConfig",
|
7 |
+
"AutoModel": "modeling_hifigan.HiFiGAN"
|
8 |
+
},
|
9 |
+
"model_in_dim": 80,
|
10 |
+
"model_type": "hifigan",
|
11 |
+
"resblock_dilation_sizes": [
|
12 |
+
[
|
13 |
+
1,
|
14 |
+
3,
|
15 |
+
5
|
16 |
+
],
|
17 |
+
[
|
18 |
+
1,
|
19 |
+
3,
|
20 |
+
5
|
21 |
+
],
|
22 |
+
[
|
23 |
+
1,
|
24 |
+
3,
|
25 |
+
5
|
26 |
+
]
|
27 |
+
],
|
28 |
+
"resblock_kernel_sizes": [
|
29 |
+
3,
|
30 |
+
7,
|
31 |
+
11
|
32 |
+
],
|
33 |
+
"torch_dtype": "float32",
|
34 |
+
"transformers_version": "4.17.0.dev0",
|
35 |
+
"upsample_initial_channel": 512,
|
36 |
+
"upsample_kernel_sizes": [
|
37 |
+
16,
|
38 |
+
16,
|
39 |
+
4,
|
40 |
+
4
|
41 |
+
],
|
42 |
+
"upsample_rates": [
|
43 |
+
8,
|
44 |
+
8,
|
45 |
+
2,
|
46 |
+
2
|
47 |
+
]
|
48 |
+
}
|
configuration_hifigan.py
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transformers import PretrainedConfig
|
2 |
+
|
3 |
+
|
4 |
+
class HiFiGANConfig(PretrainedConfig):
|
5 |
+
model_type = "hifigan"
|
6 |
+
|
7 |
+
def __init__(
|
8 |
+
self,
|
9 |
+
resblock_kernel_sizes=[3, 7, 11],
|
10 |
+
resblock_dilation_sizes=[[1, 3, 5], [1, 3, 5], [1, 3, 5]],
|
11 |
+
upsample_rates=[8, 8, 2, 2],
|
12 |
+
upsample_initial_channel=512,
|
13 |
+
upsample_kernel_sizes=[16, 16, 4, 4],
|
14 |
+
model_in_dim=80,
|
15 |
+
):
|
16 |
+
self.resblock_kernel_sizes = resblock_kernel_sizes
|
17 |
+
self.resblock_dilation_sizes = resblock_dilation_sizes
|
18 |
+
self.upsample_rates = upsample_rates
|
19 |
+
self.model_in_dim = model_in_dim
|
20 |
+
self.upsample_initial_channel = upsample_initial_channel
|
21 |
+
self.upsample_kernel_sizes = upsample_kernel_sizes
|
22 |
+
super().__init__()
|
modeling_hifigan.py
ADDED
@@ -0,0 +1,184 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
from torch.nn import Conv1d, ConvTranspose1d
|
5 |
+
from torch.nn.utils import remove_weight_norm, weight_norm
|
6 |
+
from transformers import PreTrainedModel
|
7 |
+
|
8 |
+
from configuration_hifigan import HiFiGANConfig
|
9 |
+
|
10 |
+
LRELU_SLOPE = 0.1
|
11 |
+
|
12 |
+
|
13 |
+
def init_weights(m, mean=0.0, std=0.01):
|
14 |
+
classname = m.__class__.__name__
|
15 |
+
if classname.find("Conv") != -1:
|
16 |
+
m.weight.data.normal_(mean, std)
|
17 |
+
|
18 |
+
|
19 |
+
def get_padding(kernel_size, dilation=1):
|
20 |
+
return (kernel_size * dilation - dilation) // 2
|
21 |
+
|
22 |
+
|
23 |
+
class ResBlock(torch.nn.Module):
|
24 |
+
def __init__(self, channels, kernel_size=3, dilation=(1, 3, 5)):
|
25 |
+
super(ResBlock, self).__init__()
|
26 |
+
self.convs1 = nn.ModuleList(
|
27 |
+
[
|
28 |
+
weight_norm(
|
29 |
+
Conv1d(
|
30 |
+
channels,
|
31 |
+
channels,
|
32 |
+
kernel_size,
|
33 |
+
1,
|
34 |
+
dilation=dilation[0],
|
35 |
+
padding=get_padding(kernel_size, dilation[0]),
|
36 |
+
)
|
37 |
+
),
|
38 |
+
weight_norm(
|
39 |
+
Conv1d(
|
40 |
+
channels,
|
41 |
+
channels,
|
42 |
+
kernel_size,
|
43 |
+
1,
|
44 |
+
dilation=dilation[1],
|
45 |
+
padding=get_padding(kernel_size, dilation[1]),
|
46 |
+
)
|
47 |
+
),
|
48 |
+
weight_norm(
|
49 |
+
Conv1d(
|
50 |
+
channels,
|
51 |
+
channels,
|
52 |
+
kernel_size,
|
53 |
+
1,
|
54 |
+
dilation=dilation[2],
|
55 |
+
padding=get_padding(kernel_size, dilation[2]),
|
56 |
+
)
|
57 |
+
),
|
58 |
+
]
|
59 |
+
)
|
60 |
+
self.convs1.apply(init_weights)
|
61 |
+
|
62 |
+
self.convs2 = nn.ModuleList(
|
63 |
+
[
|
64 |
+
weight_norm(
|
65 |
+
Conv1d(
|
66 |
+
channels,
|
67 |
+
channels,
|
68 |
+
kernel_size,
|
69 |
+
1,
|
70 |
+
dilation=1,
|
71 |
+
padding=get_padding(kernel_size, 1),
|
72 |
+
)
|
73 |
+
),
|
74 |
+
weight_norm(
|
75 |
+
Conv1d(
|
76 |
+
channels,
|
77 |
+
channels,
|
78 |
+
kernel_size,
|
79 |
+
1,
|
80 |
+
dilation=1,
|
81 |
+
padding=get_padding(kernel_size, 1),
|
82 |
+
)
|
83 |
+
),
|
84 |
+
weight_norm(
|
85 |
+
Conv1d(
|
86 |
+
channels,
|
87 |
+
channels,
|
88 |
+
kernel_size,
|
89 |
+
1,
|
90 |
+
dilation=1,
|
91 |
+
padding=get_padding(kernel_size, 1),
|
92 |
+
)
|
93 |
+
),
|
94 |
+
]
|
95 |
+
)
|
96 |
+
self.convs2.apply(init_weights)
|
97 |
+
|
98 |
+
def forward(self, x):
|
99 |
+
for c1, c2 in zip(self.convs1, self.convs2):
|
100 |
+
xt = F.leaky_relu(x, LRELU_SLOPE)
|
101 |
+
xt = c1(xt)
|
102 |
+
xt = F.leaky_relu(xt, LRELU_SLOPE)
|
103 |
+
xt = c2(xt)
|
104 |
+
x = xt + x
|
105 |
+
return x
|
106 |
+
|
107 |
+
def remove_weight_norm(self):
|
108 |
+
for layer in self.convs1:
|
109 |
+
remove_weight_norm(layer)
|
110 |
+
for layer in self.convs2:
|
111 |
+
remove_weight_norm(layer)
|
112 |
+
|
113 |
+
|
114 |
+
class HiFiGAN(PreTrainedModel):
|
115 |
+
config_class = HiFiGANConfig
|
116 |
+
|
117 |
+
def __init__(self, config):
|
118 |
+
super().__init__(config)
|
119 |
+
self.num_kernels = len(config.resblock_kernel_sizes)
|
120 |
+
self.num_upsamples = len(config.upsample_rates)
|
121 |
+
self.conv_pre = weight_norm(
|
122 |
+
Conv1d(
|
123 |
+
config.model_in_dim,
|
124 |
+
config.upsample_initial_channel,
|
125 |
+
7,
|
126 |
+
1,
|
127 |
+
padding=3,
|
128 |
+
)
|
129 |
+
)
|
130 |
+
|
131 |
+
self.ups = nn.ModuleList()
|
132 |
+
for i, (u, k) in enumerate(
|
133 |
+
zip(config.upsample_rates, config.upsample_kernel_sizes)
|
134 |
+
):
|
135 |
+
self.ups.append(
|
136 |
+
weight_norm(
|
137 |
+
ConvTranspose1d(
|
138 |
+
config.upsample_initial_channel // (2**i),
|
139 |
+
config.upsample_initial_channel // (2 ** (i + 1)),
|
140 |
+
k,
|
141 |
+
u,
|
142 |
+
padding=(k - u) // 2,
|
143 |
+
)
|
144 |
+
)
|
145 |
+
)
|
146 |
+
|
147 |
+
self.resblocks = nn.ModuleList()
|
148 |
+
for i in range(len(self.ups)):
|
149 |
+
ch = config.upsample_initial_channel // (2 ** (i + 1))
|
150 |
+
for k, d in zip(
|
151 |
+
config.resblock_kernel_sizes, config.resblock_dilation_sizes
|
152 |
+
):
|
153 |
+
self.resblocks.append(ResBlock(ch, k, d))
|
154 |
+
|
155 |
+
self.conv_post = weight_norm(Conv1d(ch, 1, 7, 1, padding=3))
|
156 |
+
self.ups.apply(init_weights)
|
157 |
+
self.conv_post.apply(init_weights)
|
158 |
+
|
159 |
+
def forward(self, x):
|
160 |
+
x = self.conv_pre(x)
|
161 |
+
for i in range(self.num_upsamples):
|
162 |
+
x = F.leaky_relu(x, LRELU_SLOPE)
|
163 |
+
x = self.ups[i](x)
|
164 |
+
xs = None
|
165 |
+
for j in range(self.num_kernels):
|
166 |
+
if xs is None:
|
167 |
+
xs = self.resblocks[i * self.num_kernels + j](x)
|
168 |
+
else:
|
169 |
+
xs += self.resblocks[i * self.num_kernels + j](x)
|
170 |
+
x = xs / self.num_kernels
|
171 |
+
x = F.leaky_relu(x)
|
172 |
+
x = self.conv_post(x)
|
173 |
+
x = torch.tanh(x)
|
174 |
+
|
175 |
+
return x
|
176 |
+
|
177 |
+
def remove_weight_norm(self):
|
178 |
+
print("Removing weight norm...")
|
179 |
+
for layer in self.ups:
|
180 |
+
remove_weight_norm(layer)
|
181 |
+
for layer in self.resblocks:
|
182 |
+
layer.remove_weight_norm()
|
183 |
+
remove_weight_norm(self.conv_pre)
|
184 |
+
remove_weight_norm(self.conv_post)
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6265dc8996049cfe01c4e148d6ad6db1e9a1784b1978bda56fc29d74daddb5a2
|
3 |
+
size 55819885
|