|
generator: |
|
name: ScalarModel |
|
config: |
|
num_bands: 1 |
|
sample_rate: 48000 |
|
causal: true |
|
num_samples: 2 |
|
downsample_factors: |
|
- 4 |
|
- 5 |
|
- 5 |
|
- 5 |
|
downsample_kernel_sizes: |
|
- 8 |
|
- 10 |
|
- 10 |
|
- 10 |
|
upsample_factors: |
|
- 5 |
|
- 5 |
|
- 5 |
|
- 4 |
|
upsample_kernel_sizes: |
|
- 10 |
|
- 10 |
|
- 10 |
|
- 8 |
|
latent_hidden_dim: 32 |
|
default_kernel_size: 7 |
|
delay_kernel_size: 5 |
|
init_channel: 64 |
|
res_kernel_size: 7 |
|
d_list: |
|
- mfd |
|
mfd: |
|
name: MultiFrequencyDiscriminator |
|
config: |
|
hop_lengths: |
|
- 32 |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 1024 |
|
hidden_channels: |
|
- 64 |
|
- 128 |
|
- 256 |
|
- 512 |
|
- 512 |
|
- 512 |
|
domain: double |
|
mel_scale: true |
|
sample_rate: 48000 |
|
mpd: |
|
name: MultiPeriodDiscriminator |
|
config: |
|
period_sizes: |
|
- 2 |
|
- 3 |
|
- 5 |
|
- 7 |
|
- 11 |
|
period_kernel_size: 5 |
|
msd: |
|
name: MultiScaleDiscriminator |
|
config: |
|
num_scales: 3 |
|
pool_kernel_size: 4 |
|
pool_stride: 2 |
|
optimizer: |
|
g: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
d: |
|
name: AdamW |
|
config: |
|
lr: 0.0002 |
|
betas: |
|
- 0.8 |
|
- 0.99 |
|
eps: 1.0e-06 |
|
lr_scheduler: |
|
g: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
d: |
|
name: ExponentialLR |
|
config: |
|
gamma: 0.999 |
|
criterion: |
|
g_criterion: |
|
name: losses.generator_loss.GeneratorSTFTLoss |
|
config: |
|
use_mel_loss: false |
|
adv_criterion: MSEGLoss |
|
mel_loss_weight: 45 |
|
use_feature_match: true |
|
feat_match_loss_weight: 20 |
|
use_full_stft_loss: true |
|
use_sub_stft_loss: true |
|
full_stft_loss_weight: 1 |
|
sub_stft_loss_weight: 1 |
|
mel_scale_loss: |
|
sampling_rate: 48000 |
|
n_fft: 1024 |
|
num_mels: 80 |
|
hop_size: 160 |
|
win_size: 800 |
|
fmin: 0 |
|
full_multi_scale_stft_loss: |
|
fft_sizes: |
|
- 512 |
|
- 1024 |
|
- 2048 |
|
win_sizes: |
|
- 480 |
|
- 960 |
|
- 1200 |
|
hop_sizes: |
|
- 120 |
|
- 240 |
|
- 300 |
|
sub_multi_scale_stft_loss: |
|
num_bands: 6 |
|
fft_sizes: |
|
- 128 |
|
- 256 |
|
- 256 |
|
win_sizes: |
|
- 80 |
|
- 120 |
|
- 200 |
|
hop_sizes: |
|
- 20 |
|
- 40 |
|
- 50 |
|
d_criterion: |
|
name: losses.discriminator_loss.MSEDiscriminatorLoss |
|
config: null |
|
commit_loss_weight: 1.0 |
|
training_file: train.scp |
|
validation_file: val.scp |
|
seed: 2333 |
|
cudnn_deterministic: false |
|
tensorboard: true |
|
checkpoint_interval: 5000 |
|
summary_interval: 100 |
|
validation_interval: 5000 |
|
num_epoches: 500 |
|
print_freq: 10 |
|
discriminator_iter_start: 0 |
|
num_ckpt_keep: 10 |
|
segment_size: 48000 |
|
audio_norm_scale: 0.95 |
|
batch_size: 12 |
|
num_workers: 8 |
|
num_plots: 8 |
|
local_rank: -1 |
|
basic_model_config: config/scalar48k.yaml |
|
exp_model_config: null |
|
log_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k |
|
hop_length: 2000 |
|
ngpus_per_node: 8 |
|
sample_rate: 48000 |
|
model_ckpt_dir: /apdcephfs/share_1316500/lavenywang/exp_data/codec/48k/model_ckpts |
|
|