{ "output_path": "./models/11-ZA_multilingual", "logger_uri": null, "run_name": "11_ZA", "project_name": null, "run_description": "\ud83d\udc38Coqui trainer run.", "print_step": 25, "plot_step": 100, "model_param_stats": false, "wandb_entity": null, "dashboard_logger": "tensorboard", "log_model_step": 5000, "save_step": 5000, "save_n_checkpoints": 5, "save_checkpoints": true, "save_all_best": false, "save_best_after": 10000, "target_loss": null, "print_eval": false, "test_delay_epochs": -1, "run_eval": true, "run_eval_steps": null, "distributed_backend": "nccl", "distributed_url": "tcp://localhost:54321", "mixed_precision": true, "epochs": 1000, "batch_size": 16, "eval_batch_size": 16, "grad_clip": [ 1000, 1000 ], "scheduler_after_epoch": true, "lr": 0.001, "optimizer": "AdamW", "optimizer_params": { "betas": [ 0.8, 0.99 ], "eps": 1e-09, "weight_decay": 0.01 }, "lr_scheduler": null, "lr_scheduler_params": {}, "use_grad_scaler": false, "cudnn_enable": true, "cudnn_deterministic": false, "cudnn_benchmark": false, "training_seed": 54321, "model": "vits", "num_loader_workers": 4, "num_eval_loader_workers": 4, "use_noise_augment": false, "audio": { "fft_size": 1024, "sample_rate": 16000, "win_length": 1024, "hop_length": 256, "num_mels": 80, "mel_fmin": 0, "mel_fmax": null }, "use_phonemes": false, "phonemizer": null, "phoneme_language": null, "compute_input_seq_cache": true, "text_cleaner": "multilingual_cleaners", "enable_eos_bos_chars": false, "test_sentences_file": "", "phoneme_cache_path": null, "characters": { "characters_class": "TTS.tts.models.vits.VitsCharacters", "vocab_dict": null, "pad": "", "eos": "", "bos": "", "blank": "", "characters": "\u014b\u0266\u0254ma\u02d0spfut\u031dkr\u026ad\u025b!\u00a1'(),\u2014-.:;\u00bf?abcdefghijklmnopqrstuvwxyz\u00b5\u00df\u00e0\u00e1\u00e2\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f6\u00f9\u00fa\u00fb\u00fc\u0105\u0107\u0119\u0142\u0144\u0153\u015b\u015f\u017a\u017c\u0192\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u0451\u0454\u0456\u0457\u0491\u04e7 \u00ab\u00b0\u00b1\u00b5\u00bb$%&\u2018\u2019\u201a\u201c`\u201d\u201e", "punctuations": "!\u00a1'(),-.:;\u00bf? ", "phonemes": "\u014b\u0266\u0254ma\u02d0spfut\u031dkr\u026ad\u025baa\u02d0a\u0303\u02d0bcdefii\u02d0jklmnoprstuu\u02d0vwxyy\u02d0\u00e6\u02d0\u00f8\u014b\u0153\u0153\u02d0\u0254\u0254\u02d0\u0254\u0303\u02d0\u0258\u025b\u025b\u02d0\u025b\u0303\u02d0\u0261\u0266\u026a\u031d", "is_unique": true, "is_sorted": true }, "add_blank": false, "batch_group_size": 0, "loss_masking": null, "min_audio_len": 32768, "max_audio_len": 160000, "min_text_len": 1, "max_text_len": Infinity, "compute_f0": false, "compute_energy": false, "compute_linear_spec": true, "precompute_num_workers": 0, "start_by_longest": false, "shuffle": false, "drop_last": false, "datasets": [ { "formatter": "nchlt", "dataset_name": "tts_eng", "path": "", "meta_file_train": "eng_metadata.txt", "ignored_speakers": null, "language": "eng", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_afr", "path": "", "meta_file_train": "afr_metadata.txt", "ignored_speakers": null, "language": "afr", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_zul", "path": "", "meta_file_train": "zul_metadata.txt", "ignored_speakers": null, "language": "zul", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_zho", "path": "", "meta_file_train": "xho_metadata.txt", "ignored_speakers": null, "language": "xho", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_ven", "path": "", "meta_file_train": "ven_metadata.txt", "ignored_speakers": null, "language": "ven", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_tso", "path": "", "meta_file_train": "tso_metadata.txt", "ignored_speakers": null, "language": "tso", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_tsn", "path": "", "meta_file_train": "tsn_metadata.txt", "ignored_speakers": null, "language": "tsn", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_sot", "path": "", "meta_file_train": "sot_metadata.txt", "ignored_speakers": null, "language": "sot", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_ssw", "path": "", "meta_file_train": "ssw_metadata.txt", "ignored_speakers": null, "language": "ssw", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_nbl", "path": "", "meta_file_train": "nbl_metadata.txt", "ignored_speakers": null, "language": "nbl", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" }, { "formatter": "nchlt", "dataset_name": "tts_nso", "path": "", "meta_file_train": "nso_metadata.txt", "ignored_speakers": null, "language": "nso", "phonemizer": "", "meta_file_val": "", "meta_file_attn_mask": "" } ], "test_sentences": [ [ "Something has to be put in place.", "p244", null, "eng" ], [ "The gaseous matters (carbon dioxide, oxygen, nitrogen) enter the plant mainly by diffusion from the atmosphere,", "lq_eng_f", null, "eng" ], [ "Anyone who was familiar with that area of Dallas would have known that the motorcade would probably pass the Texas School Book Depository to get from Main Street", "Leah", null, "eng" ], [ "verbruik gedurende dieselfde", "lq_afr_f", null, "afr" ], [ "In ons howe wil ons 'n regering van wette h\u00ea en nie van mans nie.", "Willem", null, "afr" ], [ "persvrystellings aanbiedings tentoon", "adri", null, "afr" ], [ "siphunyuke olakeni lukamama", "lq_zul_f", null, "zul" ], [ "Abampofu bezwe elizungezile bahlala izigodlo zalo ezihlakazekile.", "thando", null, "zul" ], [ "Futhi eyodwa enikeze izaba zokuphikisana nephakheji elinobuningi ekuseni ngakusasa ekuseni.", "Themba", null, "zul" ], [ "le tjhabile le monatjana ho", "lq_sot_f", null, "sot" ], [ "ha o ne o mpolella", "lq_sot_f", null, "sot" ], [ "Ho sheba eufrate e kholo e koahelang phuleng ka tlase.", "lq_sot_f", null, "sot" ], [ "pheji leyi ku fanele ku", "lq_tso_f", null, "tso" ], [ "kota ku hlayela no langutisela", "lq_tso_f", null, "tso" ], [ "vanwa milenge va kotaku tsutsuma", "lq_tso_f", null, "tso" ], [ "motsu o o mo letshwaong", "lq_tsn_f", null, "tsn" ], [ "itshokodisetsa eng tota ausi thando", "lq_tsn_f", null, "tsn" ], [ "la kwa bogodimong jwa bolaodi", "lq_tsn_f", null, "tsn" ], [ "kesinye nesinye isikhathi", "lq_nbl_m", null, "nbl" ], [ "kibo boke abatlolisiweko", "lq_nbl_m", null, "nbl" ], [ "kwamalungelo wabantu kanye", "lq_nbl_m", null, "nbl" ], [ "nga thusa vhathu kana vhafarisi", "lq_ven_m", null, "ven" ], [ "shumiselwaho vhutali i do tavhanyisa", "lq_ven_m", null, "ven" ], [ "badela mushumi we a lova", "lq_ven_m", null, "ven" ], [ "nanobe kuneluhlelo kwakhiwe", "lq_ssw_f", null, "ssw" ], [ "umcimbi wekugidza umnandzi", "lq_ssw_f", null, "ssw" ], [ "lomunye sitijabulisa ngawo", "lq_ssw_f", null, "ssw" ], [ "ga setso se se itsego", "lq_nso_m", null, "nso" ], [ "motho a ka ba a", "lq_nso_m", null, "nso" ], [ "O ile a bolela gape gore o tla diri\u0161a sethunya ge e ba sebaka se ka t\u0161welela.", "lq_nso_m", null, "nso" ], [ "ezigqibeleleyo zezemidlalo nezokuziphumza", "lq_xho_m", null, "xho" ], [ "Ngeendlela zokufumana impilo ngokunyaniseka ukuba ilahlekile.", "lq_xho_m", null, "xho" ], [ "nemeko yokuqesha yahlukene", "lq_xho_f", null, "xho" ] ], "eval_split_max_size": null, "eval_split_size": 0.01, "use_speaker_weighted_sampler": true, "speaker_weighted_sampler_alpha": 1.0, "use_language_weighted_sampler": true, "language_weighted_sampler_alpha": 1.0, "use_length_weighted_sampler": false, "length_weighted_sampler_alpha": 1.0, "model_args": { "num_chars": 236, "out_channels": 513, "spec_segment_size": 32, "hidden_channels": 192, "hidden_channels_ffn_text_encoder": 768, "num_heads_text_encoder": 2, "num_layers_text_encoder": 6, "kernel_size_text_encoder": 3, "dropout_p_text_encoder": 0.1, "dropout_p_duration_predictor": 0.5, "kernel_size_posterior_encoder": 5, "dilation_rate_posterior_encoder": 1, "num_layers_posterior_encoder": 16, "kernel_size_flow": 5, "dilation_rate_flow": 1, "num_layers_flow": 4, "resblock_type_decoder": "1", "resblock_kernel_sizes_decoder": [ 3, 7, 11 ], "resblock_dilation_sizes_decoder": [ [ 1, 3, 5 ], [ 1, 3, 5 ], [ 1, 3, 5 ] ], "upsample_rates_decoder": [ 8, 8, 2, 2 ], "upsample_initial_channel_decoder": 512, "upsample_kernel_sizes_decoder": [ 16, 16, 4, 4 ], "periods_multi_period_discriminator": [ 2, 3, 5, 7, 11 ], "use_sdp": false, "noise_scale": 1.0, "inference_noise_scale": 0.667, "length_scale": 1, "noise_scale_dp": 1.0, "inference_noise_scale_dp": 1.0, "max_inference_len": null, "init_discriminator": false, "use_spectral_norm_disriminator": false, "use_speaker_embedding": true, "num_speakers": 130, "speakers_file": "./models/11-ZA_multilingual/11_ZA-May-16-2023_12+09PM-0000000/speakers.pth", "d_vector_file": null, "speaker_embedding_channels": 256, "use_d_vector_file": false, "d_vector_dim": 0, "detach_dp_input": true, "use_language_embedding": true, "embedded_language_dim": 12, "num_languages": 11, "language_ids_file": "./models/11-ZA_multilingual/11_ZA-May-16-2023_12+09PM-0000000/language_ids.json", "use_speaker_encoder_as_loss": false, "speaker_encoder_config_path": "", "speaker_encoder_model_path": "", "condition_dp_on_speaker": true, "freeze_encoder": false, "freeze_DP": false, "freeze_PE": false, "freeze_flow_decoder": false, "freeze_waveform_decoder": false, "encoder_sample_rate": null, "interpolate_z": true, "reinit_DP": false, "reinit_text_encoder": false }, "lr_gen": 0.0002, "lr_disc": 0.0002, "lr_scheduler_gen": "ExponentialLR", "lr_scheduler_gen_params": { "gamma": 0.999875, "last_epoch": -1 }, "lr_scheduler_disc": "ExponentialLR", "lr_scheduler_disc_params": { "gamma": 0.999875, "last_epoch": -1 }, "kl_loss_alpha": 1.0, "disc_loss_alpha": 1.0, "gen_loss_alpha": 1.0, "feat_loss_alpha": 1.0, "mel_loss_alpha": 45.0, "dur_loss_alpha": 1.0, "speaker_encoder_loss_alpha": 1.0, "return_wav": true, "use_weighted_sampler": false, "weighted_sampler_attrs": {}, "weighted_sampler_multipliers": {}, "r": 1, "num_speakers": 0, "use_speaker_embedding": true, "speakers_file": "./models/11-ZA_multilingual/11_ZA-May-16-2023_12+09PM-0000000/speakers.pth", "speaker_embedding_channels": 256, "language_ids_file": "./models/11-ZA_multilingual/11_ZA-May-16-2023_12+09PM-0000000/language_ids.json", "use_language_embedding": true, "use_d_vector_file": false, "d_vector_file": null, "d_vector_dim": 0 }