AOLCDROM commited on
Commit
a5aabf2
1 Parent(s): f601266

Upload 8 files

Browse files
config.json ADDED
@@ -0,0 +1,835 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "output_path": "/home/nano/TTS/models/vctk-yourtts2-ft-16k-ds/traineroutput/",
3
+ "logger_uri": null,
4
+ "run_name": "VITS-ml-ms",
5
+ "project_name": "YourTTS",
6
+ "run_description": "YourTTS EN-DE-ES-FR",
7
+ "print_step": 50,
8
+ "plot_step": 100,
9
+ "model_param_stats": false,
10
+ "wandb_entity": null,
11
+ "dashboard_logger": "tensorboard",
12
+ "save_on_interrupt": true,
13
+ "log_model_step": 500,
14
+ "save_step": 1000,
15
+ "save_n_checkpoints": 3,
16
+ "save_checkpoints": true,
17
+ "save_all_best": true,
18
+ "save_best_after": 10000,
19
+ "target_loss": "loss_1",
20
+ "print_eval": true,
21
+ "test_delay_epochs": -1,
22
+ "run_eval": true,
23
+ "run_eval_steps": null,
24
+ "distributed_backend": "nccl",
25
+ "distributed_url": "tcp://localhost:54321",
26
+ "mixed_precision": false,
27
+ "precision": "fp16",
28
+ "epochs": 1000,
29
+ "batch_size": 16,
30
+ "eval_batch_size": 16,
31
+ "grad_clip": [
32
+ 1000,
33
+ 1000
34
+ ],
35
+ "scheduler_after_epoch": true,
36
+ "lr": 0.001,
37
+ "optimizer": "AdamW",
38
+ "optimizer_params": {
39
+ "betas": [
40
+ 0.8,
41
+ 0.99
42
+ ],
43
+ "eps": 1e-09,
44
+ "weight_decay": 0.01
45
+ },
46
+ "lr_scheduler": null,
47
+ "lr_scheduler_params": {},
48
+ "use_grad_scaler": false,
49
+ "allow_tf32": false,
50
+ "cudnn_enable": true,
51
+ "cudnn_deterministic": false,
52
+ "cudnn_benchmark": false,
53
+ "training_seed": 54321,
54
+ "model": "vits",
55
+ "num_loader_workers": 4,
56
+ "num_eval_loader_workers": 4,
57
+ "use_noise_augment": false,
58
+ "audio": {
59
+ "fft_size": 1024,
60
+ "sample_rate": 16000,
61
+ "win_length": 1024,
62
+ "hop_length": 256,
63
+ "num_mels": 80,
64
+ "mel_fmin": 0.0,
65
+ "mel_fmax": null
66
+ },
67
+ "use_phonemes": false,
68
+ "phonemizer": "espeak",
69
+ "phoneme_language": "en-us",
70
+ "compute_input_seq_cache": true,
71
+ "text_cleaner": "basic_cleaners",
72
+ "enable_eos_bos_chars": false,
73
+ "test_sentences_file": "",
74
+ "phoneme_cache_path": "/home/nano/TTS/models/vctk-yourtts2-ft-16k-ds/traineroutput/phoneme_cache",
75
+ "characters": {
76
+ "characters_class": "TTS.tts.models.vits.VitsCharacters",
77
+ "vocab_dict": null,
78
+ "pad": "_",
79
+ "eos": "&",
80
+ "bos": "*",
81
+ "blank": null,
82
+ "characters": "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00af\u00b7\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u00ff\u0101\u0105\u0107\u0113\u0119\u011b\u012b\u0131\u0142\u0144\u014d\u0151\u0153\u015b\u016b\u0171\u017a\u017c\u01ce\u01d0\u01d2\u01d4\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u0451\u0454\u0456\u0457\u0491\u2013!'(),-.:;? ",
83
+ "punctuations": "!'(),-.:;? ",
84
+ "phonemes": "",
85
+ "is_unique": true,
86
+ "is_sorted": true
87
+ },
88
+ "add_blank": true,
89
+ "batch_group_size": 0,
90
+ "loss_masking": null,
91
+ "min_audio_len": 1,
92
+ "max_audio_len": 160000,
93
+ "min_text_len": 1,
94
+ "max_text_len": 525,
95
+ "compute_f0": false,
96
+ "compute_energy": false,
97
+ "compute_linear_spec": true,
98
+ "precompute_num_workers": 0,
99
+ "start_by_longest": false,
100
+ "shuffle": false,
101
+ "drop_last": false,
102
+ "datasets": [
103
+ {
104
+ "formatter": "vctk",
105
+ "dataset_name": "vctk-en-us-16k-ds",
106
+ "path": "/home/nano/TTS/training-datasets/vctk-en-us-16k-ds/",
107
+ "meta_file_train": "",
108
+ "ignored_speakers": [],
109
+ "language": "en-us",
110
+ "phonemizer": "espeak",
111
+ "meta_file_val": "",
112
+ "meta_file_attn_mask": ""
113
+ },
114
+ {
115
+ "formatter": "vctk",
116
+ "dataset_name": "vctk_es",
117
+ "path": "/home/nano/TTS/training-datasets/vctk-es-16k-ds/",
118
+ "meta_file_train": "",
119
+ "ignored_speakers": [],
120
+ "language": "es",
121
+ "phonemizer": "espeak",
122
+ "meta_file_val": "",
123
+ "meta_file_attn_mask": ""
124
+ },
125
+ {
126
+ "formatter": "vctk",
127
+ "dataset_name": "vctk-en-gb-16k-ds",
128
+ "path": "/home/nano/TTS/training-datasets/vctk-en-gb-16k-ds/",
129
+ "meta_file_train": "",
130
+ "ignored_speakers": [],
131
+ "language": "en-gb",
132
+ "phonemizer": "espeak",
133
+ "meta_file_val": "",
134
+ "meta_file_attn_mask": ""
135
+ },
136
+ {
137
+ "formatter": "vctk",
138
+ "dataset_name": "vctk_de",
139
+ "path": "/home/nano/TTS/training-datasets/vctk-de-16k-ds/",
140
+ "meta_file_train": "",
141
+ "ignored_speakers": [],
142
+ "language": "de",
143
+ "phonemizer": "espeak",
144
+ "meta_file_val": "",
145
+ "meta_file_attn_mask": ""
146
+ },
147
+ {
148
+ "formatter": "vctk",
149
+ "dataset_name": "vctk_fr",
150
+ "path": "/home/nano/TTS/training-datasets/vctk-fr-16k-ds/",
151
+ "meta_file_train": "",
152
+ "ignored_speakers": [],
153
+ "language": "fr",
154
+ "phonemizer": "espeak",
155
+ "meta_file_val": "",
156
+ "meta_file_attn_mask": ""
157
+ }
158
+ ],
159
+ "test_sentences": [
160
+ [
161
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
162
+ "VCTK_johnw",
163
+ null,
164
+ "en-us"
165
+ ],
166
+ [
167
+ "A pot of tea helps to pass the evening.",
168
+ "VCTK_johnw",
169
+ null,
170
+ "en-us"
171
+ ],
172
+ [
173
+ "Smoky fires lack flame and heat.",
174
+ "VCTK_johnw",
175
+ null,
176
+ "en-us"
177
+ ],
178
+ [
179
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
180
+ "VCTK_lah",
181
+ null,
182
+ "en-us"
183
+ ],
184
+ [
185
+ "A pot of tea helps to pass the evening.",
186
+ "VCTK_lah",
187
+ null,
188
+ "en-us"
189
+ ],
190
+ [
191
+ "Smoky fires lack flame and heat.",
192
+ "VCTK_lah",
193
+ null,
194
+ "en-us"
195
+ ],
196
+ [
197
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
198
+ "VCTK_ljs",
199
+ null,
200
+ "en-us"
201
+ ],
202
+ [
203
+ "A pot of tea helps to pass the evening.",
204
+ "VCTK_ljs",
205
+ null,
206
+ "en-us"
207
+ ],
208
+ [
209
+ "Smoky fires lack flame and heat.",
210
+ "VCTK_ljs",
211
+ null,
212
+ "en-us"
213
+ ],
214
+ [
215
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
216
+ "VCTK_p294",
217
+ null,
218
+ "en-us"
219
+ ],
220
+ [
221
+ "A pot of tea helps to pass the evening.",
222
+ "VCTK_p294",
223
+ null,
224
+ "en-us"
225
+ ],
226
+ [
227
+ "Smoky fires lack flame and heat.",
228
+ "VCTK_p294",
229
+ null,
230
+ "en-us"
231
+ ],
232
+ [
233
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
234
+ "VCTK_p297",
235
+ null,
236
+ "en-us"
237
+ ],
238
+ [
239
+ "A pot of tea helps to pass the evening.",
240
+ "VCTK_p297",
241
+ null,
242
+ "en-us"
243
+ ],
244
+ [
245
+ "Smoky fires lack flame and heat.",
246
+ "VCTK_p297",
247
+ null,
248
+ "en-us"
249
+ ],
250
+ [
251
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
252
+ "VCTK_p299",
253
+ null,
254
+ "en-us"
255
+ ],
256
+ [
257
+ "A pot of tea helps to pass the evening.",
258
+ "VCTK_p299",
259
+ null,
260
+ "en-us"
261
+ ],
262
+ [
263
+ "Smoky fires lack flame and heat.",
264
+ "VCTK_p299",
265
+ null,
266
+ "en-us"
267
+ ],
268
+ [
269
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
270
+ "VCTK_p300",
271
+ null,
272
+ "en-us"
273
+ ],
274
+ [
275
+ "A pot of tea helps to pass the evening.",
276
+ "VCTK_p300",
277
+ null,
278
+ "en-us"
279
+ ],
280
+ [
281
+ "Smoky fires lack flame and heat.",
282
+ "VCTK_p300",
283
+ null,
284
+ "en-us"
285
+ ],
286
+ [
287
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
288
+ "VCTK_p301",
289
+ null,
290
+ "en-us"
291
+ ],
292
+ [
293
+ "A pot of tea helps to pass the evening.",
294
+ "VCTK_p301",
295
+ null,
296
+ "en-us"
297
+ ],
298
+ [
299
+ "Smoky fires lack flame and heat.",
300
+ "VCTK_p301",
301
+ null,
302
+ "en-us"
303
+ ],
304
+ [
305
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
306
+ "VCTK_p305",
307
+ null,
308
+ "en-us"
309
+ ],
310
+ [
311
+ "A pot of tea helps to pass the evening.",
312
+ "VCTK_p305",
313
+ null,
314
+ "en-us"
315
+ ],
316
+ [
317
+ "Smoky fires lack flame and heat.",
318
+ "VCTK_p305",
319
+ null,
320
+ "en-us"
321
+ ],
322
+ [
323
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
324
+ "VCTK_p306",
325
+ null,
326
+ "en-us"
327
+ ],
328
+ [
329
+ "A pot of tea helps to pass the evening.",
330
+ "VCTK_p306",
331
+ null,
332
+ "en-us"
333
+ ],
334
+ [
335
+ "Smoky fires lack flame and heat.",
336
+ "VCTK_p306",
337
+ null,
338
+ "en-us"
339
+ ],
340
+ [
341
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
342
+ "VCTK_p308",
343
+ null,
344
+ "en-us"
345
+ ],
346
+ [
347
+ "A pot of tea helps to pass the evening.",
348
+ "VCTK_p308",
349
+ null,
350
+ "en-us"
351
+ ],
352
+ [
353
+ "Smoky fires lack flame and heat.",
354
+ "VCTK_p308",
355
+ null,
356
+ "en-us"
357
+ ],
358
+ [
359
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
360
+ "VCTK_p310",
361
+ null,
362
+ "en-us"
363
+ ],
364
+ [
365
+ "A pot of tea helps to pass the evening.",
366
+ "VCTK_p310",
367
+ null,
368
+ "en-us"
369
+ ],
370
+ [
371
+ "Smoky fires lack flame and heat.",
372
+ "VCTK_p310",
373
+ null,
374
+ "en-us"
375
+ ],
376
+ [
377
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
378
+ "VCTK_p311",
379
+ null,
380
+ "en-us"
381
+ ],
382
+ [
383
+ "A pot of tea helps to pass the evening.",
384
+ "VCTK_p311",
385
+ null,
386
+ "en-us"
387
+ ],
388
+ [
389
+ "Smoky fires lack flame and heat.",
390
+ "VCTK_p311",
391
+ null,
392
+ "en-us"
393
+ ],
394
+ [
395
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
396
+ "VCTK_p318",
397
+ null,
398
+ "en-us"
399
+ ],
400
+ [
401
+ "A pot of tea helps to pass the evening.",
402
+ "VCTK_p318",
403
+ null,
404
+ "en-us"
405
+ ],
406
+ [
407
+ "Smoky fires lack flame and heat.",
408
+ "VCTK_p318",
409
+ null,
410
+ "en-us"
411
+ ],
412
+ [
413
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
414
+ "VCTK_p329",
415
+ null,
416
+ "en-us"
417
+ ],
418
+ [
419
+ "A pot of tea helps to pass the evening.",
420
+ "VCTK_p329",
421
+ null,
422
+ "en-us"
423
+ ],
424
+ [
425
+ "Smoky fires lack flame and heat.",
426
+ "VCTK_p329",
427
+ null,
428
+ "en-us"
429
+ ],
430
+ [
431
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
432
+ "VCTK_p330",
433
+ null,
434
+ "en-us"
435
+ ],
436
+ [
437
+ "A pot of tea helps to pass the evening.",
438
+ "VCTK_p330",
439
+ null,
440
+ "en-us"
441
+ ],
442
+ [
443
+ "Smoky fires lack flame and heat.",
444
+ "VCTK_p330",
445
+ null,
446
+ "en-us"
447
+ ],
448
+ [
449
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
450
+ "VCTK_p333",
451
+ null,
452
+ "en-us"
453
+ ],
454
+ [
455
+ "A pot of tea helps to pass the evening.",
456
+ "VCTK_p333",
457
+ null,
458
+ "en-us"
459
+ ],
460
+ [
461
+ "Smoky fires lack flame and heat.",
462
+ "VCTK_p333",
463
+ null,
464
+ "en-us"
465
+ ],
466
+ [
467
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
468
+ "VCTK_p334",
469
+ null,
470
+ "en-us"
471
+ ],
472
+ [
473
+ "A pot of tea helps to pass the evening.",
474
+ "VCTK_p334",
475
+ null,
476
+ "en-us"
477
+ ],
478
+ [
479
+ "Smoky fires lack flame and heat.",
480
+ "VCTK_p334",
481
+ null,
482
+ "en-us"
483
+ ],
484
+ [
485
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
486
+ "VCTK_p339",
487
+ null,
488
+ "en-us"
489
+ ],
490
+ [
491
+ "A pot of tea helps to pass the evening.",
492
+ "VCTK_p339",
493
+ null,
494
+ "en-us"
495
+ ],
496
+ [
497
+ "Smoky fires lack flame and heat.",
498
+ "VCTK_p339",
499
+ null,
500
+ "en-us"
501
+ ],
502
+ [
503
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
504
+ "VCTK_p341",
505
+ null,
506
+ "en-us"
507
+ ],
508
+ [
509
+ "A pot of tea helps to pass the evening.",
510
+ "VCTK_p341",
511
+ null,
512
+ "en-us"
513
+ ],
514
+ [
515
+ "Smoky fires lack flame and heat.",
516
+ "VCTK_p341",
517
+ null,
518
+ "en-us"
519
+ ],
520
+ [
521
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
522
+ "VCTK_p345",
523
+ null,
524
+ "en-us"
525
+ ],
526
+ [
527
+ "A pot of tea helps to pass the evening.",
528
+ "VCTK_p345",
529
+ null,
530
+ "en-us"
531
+ ],
532
+ [
533
+ "Smoky fires lack flame and heat.",
534
+ "VCTK_p345",
535
+ null,
536
+ "en-us"
537
+ ],
538
+ [
539
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
540
+ "VCTK_tomh",
541
+ null,
542
+ "en-us"
543
+ ],
544
+ [
545
+ "A pot of tea helps to pass the evening.",
546
+ "VCTK_tomh",
547
+ null,
548
+ "en-us"
549
+ ],
550
+ [
551
+ "Smoky fires lack flame and heat.",
552
+ "VCTK_tomh",
553
+ null,
554
+ "en-us"
555
+ ],
556
+ [
557
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
558
+ "VCTK_es1",
559
+ null,
560
+ "en-us"
561
+ ],
562
+ [
563
+ "A pot of tea helps to pass the evening.",
564
+ "VCTK_es1",
565
+ null,
566
+ "en-us"
567
+ ],
568
+ [
569
+ "Smoky fires lack flame and heat.",
570
+ "VCTK_es1",
571
+ null,
572
+ "en-us"
573
+ ],
574
+ [
575
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
576
+ "VCTK_es2",
577
+ null,
578
+ "en-us"
579
+ ],
580
+ [
581
+ "A pot of tea helps to pass the evening.",
582
+ "VCTK_es2",
583
+ null,
584
+ "en-us"
585
+ ],
586
+ [
587
+ "Smoky fires lack flame and heat.",
588
+ "VCTK_es2",
589
+ null,
590
+ "en-us"
591
+ ],
592
+ [
593
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
594
+ "VCTK_ruthg",
595
+ null,
596
+ "en-us"
597
+ ],
598
+ [
599
+ "A pot of tea helps to pass the evening.",
600
+ "VCTK_ruthg",
601
+ null,
602
+ "en-us"
603
+ ],
604
+ [
605
+ "Smoky fires lack flame and heat.",
606
+ "VCTK_ruthg",
607
+ null,
608
+ "en-us"
609
+ ],
610
+ [
611
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
612
+ "VCTK_evak",
613
+ null,
614
+ "en-us"
615
+ ],
616
+ [
617
+ "A pot of tea helps to pass the evening.",
618
+ "VCTK_evak",
619
+ null,
620
+ "en-us"
621
+ ],
622
+ [
623
+ "Smoky fires lack flame and heat.",
624
+ "VCTK_evak",
625
+ null,
626
+ "en-us"
627
+ ],
628
+ [
629
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
630
+ "VCTK_hok",
631
+ null,
632
+ "en-us"
633
+ ],
634
+ [
635
+ "A pot of tea helps to pass the evening.",
636
+ "VCTK_hok",
637
+ null,
638
+ "en-us"
639
+ ],
640
+ [
641
+ "Smoky fires lack flame and heat.",
642
+ "VCTK_hok",
643
+ null,
644
+ "en-us"
645
+ ],
646
+ [
647
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
648
+ "VCTK_bern",
649
+ null,
650
+ "en-us"
651
+ ],
652
+ [
653
+ "A pot of tea helps to pass the evening.",
654
+ "VCTK_bern",
655
+ null,
656
+ "en-us"
657
+ ],
658
+ [
659
+ "Smoky fires lack flame and heat.",
660
+ "VCTK_bern",
661
+ null,
662
+ "en-us"
663
+ ],
664
+ [
665
+ "I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categorical.",
666
+ "VCTK_gilles",
667
+ null,
668
+ "en-us"
669
+ ],
670
+ [
671
+ "A pot of tea helps to pass the evening.",
672
+ "VCTK_gilles",
673
+ null,
674
+ "en-us"
675
+ ],
676
+ [
677
+ "Smoky fires lack flame and heat.",
678
+ "VCTK_gilles",
679
+ null,
680
+ "en-us"
681
+ ]
682
+ ],
683
+ "eval_split_max_size": null,
684
+ "eval_split_size": 0.01,
685
+ "use_speaker_weighted_sampler": false,
686
+ "speaker_weighted_sampler_alpha": 1.0,
687
+ "use_language_weighted_sampler": true,
688
+ "language_weighted_sampler_alpha": 1.0,
689
+ "use_length_weighted_sampler": false,
690
+ "length_weighted_sampler_alpha": 1.0,
691
+ "model_args": {
692
+ "num_chars": 165,
693
+ "out_channels": 513,
694
+ "spec_segment_size": 32,
695
+ "hidden_channels": 192,
696
+ "hidden_channels_ffn_text_encoder": 768,
697
+ "num_heads_text_encoder": 2,
698
+ "num_layers_text_encoder": 10,
699
+ "kernel_size_text_encoder": 3,
700
+ "dropout_p_text_encoder": 0.1,
701
+ "dropout_p_duration_predictor": 0.5,
702
+ "kernel_size_posterior_encoder": 5,
703
+ "dilation_rate_posterior_encoder": 1,
704
+ "num_layers_posterior_encoder": 16,
705
+ "kernel_size_flow": 5,
706
+ "dilation_rate_flow": 1,
707
+ "num_layers_flow": 4,
708
+ "resblock_type_decoder": "2",
709
+ "resblock_kernel_sizes_decoder": [
710
+ 3,
711
+ 7,
712
+ 11
713
+ ],
714
+ "resblock_dilation_sizes_decoder": [
715
+ [
716
+ 1,
717
+ 3,
718
+ 5
719
+ ],
720
+ [
721
+ 1,
722
+ 3,
723
+ 5
724
+ ],
725
+ [
726
+ 1,
727
+ 3,
728
+ 5
729
+ ]
730
+ ],
731
+ "upsample_rates_decoder": [
732
+ 8,
733
+ 8,
734
+ 2,
735
+ 2
736
+ ],
737
+ "upsample_initial_channel_decoder": 512,
738
+ "upsample_kernel_sizes_decoder": [
739
+ 16,
740
+ 16,
741
+ 4,
742
+ 4
743
+ ],
744
+ "periods_multi_period_discriminator": [
745
+ 2,
746
+ 3,
747
+ 5,
748
+ 7,
749
+ 11
750
+ ],
751
+ "use_sdp": true,
752
+ "noise_scale": 1.0,
753
+ "inference_noise_scale": 0.667,
754
+ "length_scale": 1,
755
+ "noise_scale_dp": 1.0,
756
+ "inference_noise_scale_dp": 1.0,
757
+ "max_inference_len": null,
758
+ "init_discriminator": true,
759
+ "use_spectral_norm_disriminator": false,
760
+ "use_speaker_embedding": false,
761
+ "num_speakers": 0,
762
+ "speakers_file": "speakers.pth",
763
+ "d_vector_file": [
764
+ "models/test/vctk-en-us-16k-ds_speakers.pth",
765
+ "models/test/vctk_es_speakers.pth",
766
+ "models/test/vctk-en-gb-16k-ds_speakers.pth",
767
+ "models/test/vctk_de_speakers.pth",
768
+ "models/test/vctk_fr_speakers.pth"
769
+ ],
770
+ "speaker_embedding_channels": 256,
771
+ "use_d_vector_file": true,
772
+ "d_vector_dim": 512,
773
+ "detach_dp_input": true,
774
+ "use_language_embedding": true,
775
+ "embedded_language_dim": 4,
776
+ "num_languages": 5,
777
+ "language_ids_file": "models/test/language_ids.json",
778
+ "use_speaker_encoder_as_loss": true,
779
+ "speaker_encoder_config_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json",
780
+ "speaker_encoder_model_path": "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar",
781
+ "condition_dp_on_speaker": true,
782
+ "freeze_encoder": false,
783
+ "freeze_DP": false,
784
+ "freeze_PE": false,
785
+ "freeze_flow_decoder": false,
786
+ "freeze_waveform_decoder": false,
787
+ "encoder_sample_rate": null,
788
+ "interpolate_z": true,
789
+ "reinit_DP": false,
790
+ "reinit_text_encoder": false
791
+ },
792
+ "lr_gen": 0.0002,
793
+ "lr_disc": 0.0002,
794
+ "lr_scheduler_gen": "ExponentialLR",
795
+ "lr_scheduler_gen_params": {
796
+ "gamma": 0.999875,
797
+ "last_epoch": -1
798
+ },
799
+ "lr_scheduler_disc": "ExponentialLR",
800
+ "lr_scheduler_disc_params": {
801
+ "gamma": 0.999875,
802
+ "last_epoch": -1
803
+ },
804
+ "kl_loss_alpha": 1.0,
805
+ "disc_loss_alpha": 1.0,
806
+ "gen_loss_alpha": 1.0,
807
+ "feat_loss_alpha": 1.0,
808
+ "mel_loss_alpha": 45.0,
809
+ "dur_loss_alpha": 1.0,
810
+ "speaker_encoder_loss_alpha": 9.0,
811
+ "return_wav": true,
812
+ "use_weighted_sampler": true,
813
+ "weighted_sampler_attrs": {
814
+ "language": 1.0
815
+ },
816
+ "weighted_sampler_multipliers": {
817
+ "language": {}
818
+ },
819
+ "r": 1,
820
+ "num_speakers": 0,
821
+ "use_speaker_embedding": false,
822
+ "speakers_file": "speakers.pth",
823
+ "speaker_embedding_channels": 256,
824
+ "language_ids_file": "models/test/language_ids.json",
825
+ "use_language_embedding": true,
826
+ "use_d_vector_file": true,
827
+ "d_vector_file": [
828
+ "models/test/vctk-en-us-16k-ds_speakers.pth",
829
+ "models/test/vctk_es_speakers.pth",
830
+ "models/test/vctk-en-gb-16k-ds_speakers.pth",
831
+ "models/test/vctk_de_speakers.pth",
832
+ "models/test/vctk_fr_speakers.pth"
833
+ ],
834
+ "d_vector_dim": 512
835
+ }
language_ids.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "de": 0,
3
+ "en-gb": 1,
4
+ "en-us": 2,
5
+ "es": 3,
6
+ "fr": 4
7
+ }
speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b297c35f15a162e3fd428614b07da08b4b0e6d8da20b415a88a4cca92a20288
3
+ size 943
vctk-en-gb-16k-ds_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57f5562a5aa1da6700735a575bd3b700bd49fe7e6b06e3d4b53532ea7fcb998b
3
+ size 16725999
vctk-en-us-16k-ds_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef29f3c881f3d6159b476a81a3e198224c0fb907b69da23e2ea2b47990726e87
3
+ size 184978287
vctk_de_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8009c9fa1e18a52f67be306c7f9349aaee1e0f337dfadb7b8952f81b80ffaf99
3
+ size 61151663
vctk_es_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6df4d1774311d3db05fc2a09bb477dcad48481febceb5969f2caed9cfb733e
3
+ size 72457839
vctk_fr_speakers.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2ce000fbce2555111e432aa1b2ff5e08cd56bfebd1381a917543625db513c6
3
+ size 73762543