pre_trained_model / config.yaml
Dongchao's picture
Upload config.yaml
ac13e2e verified
generator:
name: ScalarModel
config:
num_bands: 1
sample_rate: 48000
causal: true
num_samples: 2
downsample_factors:
- 4
- 5
- 5
- 5
downsample_kernel_sizes:
- 8
- 10
- 10
- 10
upsample_factors:
- 5
- 5
- 5
- 4
upsample_kernel_sizes:
- 10
- 10
- 10
- 8
latent_hidden_dim: 32
default_kernel_size: 7
delay_kernel_size: 5
init_channel: 64
res_kernel_size: 7
d_list:
- mfd
mfd:
name: MultiFrequencyDiscriminator
config:
hop_lengths:
- 32
- 64
- 128
- 256
- 512
- 1024
hidden_channels:
- 64
- 128
- 256
- 512
- 512
- 512
domain: double
mel_scale: true
sample_rate: 48000
mpd:
name: MultiPeriodDiscriminator
config:
period_sizes:
- 2
- 3
- 5
- 7
- 11
period_kernel_size: 5
msd:
name: MultiScaleDiscriminator
config:
num_scales: 3
pool_kernel_size: 4
pool_stride: 2
optimizer:
g:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
d:
name: AdamW
config:
lr: 0.0002
betas:
- 0.8
- 0.99
eps: 1.0e-06
lr_scheduler:
g:
name: ExponentialLR
config:
gamma: 0.999
d:
name: ExponentialLR
config:
gamma: 0.999
criterion:
g_criterion:
name: losses.generator_loss.GeneratorSTFTLoss
config:
use_mel_loss: false
adv_criterion: MSEGLoss
mel_loss_weight: 45
use_feature_match: true
feat_match_loss_weight: 20
use_full_stft_loss: true
use_sub_stft_loss: true
full_stft_loss_weight: 1
sub_stft_loss_weight: 1
mel_scale_loss:
sampling_rate: 48000
n_fft: 1024
num_mels: 80
hop_size: 160
win_size: 800
fmin: 0
full_multi_scale_stft_loss:
fft_sizes:
- 512
- 1024
- 2048
win_sizes:
- 480
- 960
- 1200
hop_sizes:
- 120
- 240
- 300
sub_multi_scale_stft_loss:
num_bands: 6
fft_sizes:
- 128
- 256
- 256
win_sizes:
- 80
- 120
- 200
hop_sizes:
- 20
- 40
- 50
d_criterion:
name: losses.discriminator_loss.MSEDiscriminatorLoss
config: null
commit_loss_weight: 1.0
training_file: train.scp
validation_file: val.scp
seed: 2333
cudnn_deterministic: false
tensorboard: true
checkpoint_interval: 5000
summary_interval: 100
validation_interval: 5000
num_epoches: 500
print_freq: 10
discriminator_iter_start: 0
num_ckpt_keep: 10
segment_size: 48000
audio_norm_scale: 0.95
batch_size: 12
num_workers: 8
num_plots: 8
local_rank: -1
basic_model_config: config/scalar48k.yaml
exp_model_config: null
log_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k
hop_length: 2000
ngpus_per_node: 8
sample_rate: 48000
model_ckpt_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k/model_ckpts