|
data: |
|
block_size: 512 |
|
cnhubertsoft_gate: 10 |
|
duration: 2 |
|
encoder: vec768l12 |
|
encoder_hop_size: 320 |
|
encoder_out_channels: 768 |
|
encoder_sample_rate: 16000 |
|
extensions: |
|
- wav |
|
sampling_rate: 44100 |
|
training_files: filelists/train.txt |
|
unit_interpolate_mode: nearest |
|
validation_files: filelists/val.txt |
|
device: cuda |
|
env: |
|
expdir: logs/44k/diffusion |
|
gpu_id: 0 |
|
infer: |
|
method: dpm-solver++ |
|
speedup: 10 |
|
model: |
|
k_step_max: 0 |
|
n_chans: 512 |
|
n_hidden: 256 |
|
n_layers: 20 |
|
n_spk: 3 |
|
timesteps: 1000 |
|
type: Diffusion |
|
use_pitch_aug: true |
|
spk: |
|
ferocious: 0 |
|
husky: 1 |
|
puppy: 2 |
|
train: |
|
amp_dtype: fp32 |
|
batch_size: 48 |
|
cache_all_data: true |
|
cache_device: cpu |
|
cache_fp16: true |
|
decay_step: 100000 |
|
epochs: 100000 |
|
gamma: 0.5 |
|
interval_force_save: 5000 |
|
interval_log: 10 |
|
interval_val: 2000 |
|
lr: 0.0001 |
|
num_workers: 4 |
|
save_opt: false |
|
weight_decay: 0 |
|
vocoder: |
|
ckpt: pretrain/nsf_hifigan/model |
|
type: nsf-hifigan |
|
|