|
data:
|
|
block_size: 512
|
|
cnhubertsoft_gate: 10
|
|
duration: 2
|
|
encoder: vec768l12
|
|
encoder_hop_size: 320
|
|
encoder_out_channels: 768
|
|
encoder_sample_rate: 16000
|
|
extensions:
|
|
- wav
|
|
sampling_rate: 44100
|
|
training_files: filelists/train.txt
|
|
unit_interpolate_mode: nearest
|
|
validation_files: filelists/val.txt
|
|
device: cuda
|
|
env:
|
|
expdir: logs/44k/diffusion
|
|
gpu_id: 0
|
|
infer:
|
|
method: dpm-solver++
|
|
speedup: 10
|
|
model:
|
|
k_step_max: 0
|
|
n_chans: 512
|
|
n_hidden: 256
|
|
n_layers: 20
|
|
n_spk: 1
|
|
timesteps: 1000
|
|
type: Diffusion
|
|
use_pitch_aug: true
|
|
spk:
|
|
Hanser: 0
|
|
train:
|
|
amp_dtype: fp32
|
|
batch_size: 48
|
|
cache_all_data: true
|
|
cache_device: cpu
|
|
cache_fp16: true
|
|
decay_step: 100000
|
|
epochs: 100000
|
|
gamma: 0.5
|
|
interval_force_save: 5000
|
|
interval_log: 10
|
|
interval_val: 2000
|
|
lr: 0.0001
|
|
num_workers: 4
|
|
save_opt: false
|
|
weight_decay: 0
|
|
vocoder:
|
|
ckpt: pretrain/nsf_hifigan/model
|
|
type: nsf-hifigan
|
|
|