maskgct-audio-lab / egs /vocoder /diffusion /exp_config_base.json
Hecheng0625's picture
Upload 167 files
8c92a11 verified
raw
history blame
1.65 kB
{
"base_config": "config/vocoder.json",
"model_type": "DiffusionVocoder",
// TODO: Choose your needed datasets
"dataset": [
"csd",
"kising",
"m4singer",
"nus48e",
"opencpop",
"opensinger",
"opera",
"pjs",
"popbutfy",
"popcs",
"ljspeech",
"vctk",
"libritts",
],
"dataset_path": {
// TODO: Fill in your dataset path
"csd": "[dataset path]",
"kising": "[dataset path]",
"m4singer": "[dataset path]",
"nus48e": "[dataset path]",
"opencpop": "[dataset path]",
"opensinger": "[dataset path]",
"opera": "[dataset path]",
"pjs": "[dataset path]",
"popbutfy": "[dataset path]",
"popcs": "[dataset path]",
"ljspeech": "[dataset path]",
"vctk": "[dataset path]",
"libritts": "[dataset path]",
},
// TODO: Fill in the output log path
"log_dir": "ckpts/vocoder",
"preprocess": {
// Acoustic features
"extract_mel": true,
"extract_audio": true,
"extract_pitch": false,
"extract_uv": false,
"pitch_extractor": "parselmouth",
// Features used for model training
"use_mel": true,
"use_frame_pitch": false,
"use_uv": false,
"use_audio": true,
// TODO: Fill in the output data path
"processed_dir": "data/",
"n_mel": 100,
"sample_rate": 24000
},
"train": {
// TODO: Choose a suitable batch size, training epoch, and save stride
"batch_size": 32,
"max_epoch": 1000000,
"save_checkpoint_stride": [20],
"adamw": {
"lr": 2.0e-4,
"adam_b1": 0.8,
"adam_b2": 0.99
},
"exponential_lr": {
"lr_decay": 0.999
},
}
}