maskgct-audio-lab / config /facodec.json
Hecheng0625's picture
Upload 167 files
8c92a11 verified
raw
history blame
1.83 kB
{
"exp_name": "facodec",
"model_type": "FAcodec",
"log_dir": "./runs/",
"log_interval": 10,
"save_interval": 1000,
"device": "cuda",
"epochs": 1000,
"batch_size": 4,
"batch_length": 100,
"max_len": 80,
"pretrained_model": "",
"load_only_params": false,
"F0_path": "modules/JDC/bst.t7",
"dataset": "dummy",
"preprocess_params": {
"sr": 24000,
"frame_rate": 80,
"duration_range": [1.0, 25.0],
"spect_params": {
"n_fft": 2048,
"win_length": 1200,
"hop_length": 300,
"n_mels": 80,
},
},
"train": {
"gradient_accumulation_step": 1,
"batch_size": 1,
"save_checkpoint_stride": [20],
"random_seed": 1234,
"max_epoch": -1,
"max_frame_len": 80,
"tracker": ["tensorboard"],
"run_eval": [false],
"sampler": {"holistic_shuffle": true, "drop_last": true},
"dataloader": {"num_worker": 0, "pin_memory": true},
},
"model_params": {
"causal": true,
"lstm": 2,
"norm_f0": true,
"use_gr_content_f0": false,
"use_gr_prosody_phone": false,
"use_gr_timbre_prosody": false,
"separate_prosody_encoder": true,
"n_c_codebooks": 2,
"timbre_norm": true,
"use_gr_content_global_f0": true,
"DAC": {
"encoder_dim": 64,
"encoder_rates": [2, 5, 5, 6],
"decoder_dim": 1536,
"decoder_rates": [6, 5, 5, 2],
"sr": 24000,
},
},
"loss_params": {
"base_lr": 0.0001,
"warmup_steps": 200,
"discriminator_iter_start": 2000,
"lambda_spk": 1.0,
"lambda_mel": 45,
"lambda_f0": 1.0,
"lambda_uv": 1.0,
},
}