generator: | |
name: SoundStream | |
config: | |
n_filters: 32 | |
D: 256 | |
target_bandwidths: | |
- 0.5 | |
- 1 | |
- 1.5 | |
- 2 | |
- 4 | |
ratios: | |
- 8 | |
- 5 | |
- 4 | |
- 2 | |
sample_rate: 16000 | |
bins: 1024 | |
semantic_techer: hubert_base_general | |
d_list: | |
- mfd | |
mfd: | |
name: MultiFrequencyDiscriminator | |
config: | |
hop_lengths: | |
- 32 | |
- 64 | |
- 128 | |
- 256 | |
- 512 | |
- 1024 | |
hidden_channels: | |
- 64 | |
- 128 | |
- 256 | |
- 512 | |
- 512 | |
- 512 | |
domain: double | |
mel_scale: true | |
sample_rate: 16000 | |
mpd: | |
name: MultiPeriodDiscriminator | |
config: | |
period_sizes: | |
- 2 | |
- 3 | |
- 5 | |
- 7 | |
- 11 | |
period_kernel_size: 5 | |
msd: | |
name: MultiScaleDiscriminator | |
config: | |
num_scales: 3 | |
pool_kernel_size: 4 | |
pool_stride: 2 | |
optimizer: | |
g: | |
name: AdamW | |
config: | |
lr: 0.0002 | |
betas: | |
- 0.8 | |
- 0.99 | |
eps: 1.0e-06 | |
d: | |
name: AdamW | |
config: | |
lr: 0.0002 | |
betas: | |
- 0.8 | |
- 0.99 | |
eps: 1.0e-06 | |
lr_scheduler: | |
g: | |
name: ExponentialLR | |
config: | |
gamma: 0.999 | |
d: | |
name: ExponentialLR | |
config: | |
gamma: 0.999 | |
criterion: | |
g_criterion: | |
name: losses.generator_loss.GeneratorSTFTLoss | |
config: | |
use_mel_loss: false | |
adv_criterion: MSEGLoss | |
mel_loss_weight: 45 | |
use_feature_match: true | |
feat_match_loss_weight: 20 | |
use_full_stft_loss: true | |
use_sub_stft_loss: true | |
full_stft_loss_weight: 1 | |
sub_stft_loss_weight: 1 | |
mel_scale_loss: | |
sampling_rate: 16000 | |
n_fft: 1024 | |
num_mels: 80 | |
hop_size: 160 | |
win_size: 800 | |
fmin: 0 | |
full_multi_scale_stft_loss: | |
fft_sizes: | |
- 512 | |
- 1024 | |
- 2048 | |
win_sizes: | |
- 480 | |
- 960 | |
- 1200 | |
hop_sizes: | |
- 120 | |
- 240 | |
- 300 | |
sub_multi_scale_stft_loss: | |
num_bands: 6 | |
fft_sizes: | |
- 128 | |
- 256 | |
- 256 | |
win_sizes: | |
- 80 | |
- 120 | |
- 200 | |
hop_sizes: | |
- 20 | |
- 40 | |
- 50 | |
d_criterion: | |
name: losses.discriminator_loss.MSEDiscriminatorLoss | |
config: null | |
commit_loss_weight: 1.0 | |
codebook_loss_weight: 100 | |
audio_norm_scale: 0.95 | |