iamanigeeit commited on
Commit
7f0b33f
1 Parent(s): ed4d11a

Upload 3 files

Browse files
exp/ljspeech_tts_fastspeech2_mfa/checkpoint.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a4d55f75674e7ff2357b2d59fab46214207ba798fb8001b9c2fa833f690a2e
3
+ size 446104753
exp/ljspeech_tts_fastspeech2_mfa/config.yaml ADDED
@@ -0,0 +1,306 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_fastspeech2.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/ljspeech_tts_fastspeech2_mfa
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 0
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 400
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - loss
39
+ - min
40
+ - - train
41
+ - loss
42
+ - min
43
+ keep_nbest_models: 1
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 1.0
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 8
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ val_teacher_forcing: true
57
+ save_epochs: []
58
+ create_graph_in_tensorboard: false
59
+ use_wandb: false
60
+ wandb_project: null
61
+ wandb_id: null
62
+ wandb_entity: null
63
+ wandb_name: null
64
+ wandb_model_log_interval: -1
65
+ detect_anomaly: false
66
+ pretrain_path: null
67
+ init_param: []
68
+ ignore_init_mismatch: false
69
+ freeze_param: []
70
+ num_iters_per_epoch: 800
71
+ batch_size: 20
72
+ valid_batch_size: null
73
+ batch_bins: 3000000
74
+ valid_batch_bins: null
75
+ train_shape_file:
76
+ - data/stats/train/text_shape.phn
77
+ - data/stats/train/speech_shape
78
+ valid_shape_file:
79
+ - data/stats/valid/text_shape.phn
80
+ - data/stats/valid/speech_shape
81
+ batch_type: numel
82
+ valid_batch_type: null
83
+ fold_length:
84
+ - 150
85
+ - 204800
86
+ sort_in_batch: descending
87
+ sort_batch: descending
88
+ multiple_iterator: false
89
+ chunk_length: 500
90
+ chunk_shift_ratio: 0.5
91
+ num_cache_chunks: 1024
92
+ train_data_path_and_name_and_type:
93
+ - - dump/raw/tr_no_dev/text
94
+ - text
95
+ - text
96
+ - - data/tr_no_dev/durations
97
+ - durations
98
+ - text_int
99
+ - - dump/raw/tr_no_dev/wav.scp
100
+ - speech
101
+ - sound
102
+ - - data/stats/train/collect_feats/pitch.scp
103
+ - pitch
104
+ - npy
105
+ - - data/stats/train/collect_feats/energy.scp
106
+ - energy
107
+ - npy
108
+ valid_data_path_and_name_and_type:
109
+ - - dump/raw/dev/text
110
+ - text
111
+ - text
112
+ - - data/dev/durations
113
+ - durations
114
+ - text_int
115
+ - - dump/raw/dev/wav.scp
116
+ - speech
117
+ - sound
118
+ - - data/stats/valid/collect_feats/pitch.scp
119
+ - pitch
120
+ - npy
121
+ - - data/stats/valid/collect_feats/energy.scp
122
+ - energy
123
+ - npy
124
+ allow_variable_data_keys: false
125
+ max_cache_size: 0.0
126
+ max_cache_fd: 32
127
+ valid_max_cache_size: null
128
+ optim: adam
129
+ optim_conf:
130
+ lr: 1.0
131
+ scheduler: noamlr
132
+ scheduler_conf:
133
+ model_size: 384
134
+ warmup_steps: 4000
135
+ token_list:
136
+ - <blank>
137
+ - <unk>
138
+ - ə
139
+ - ɪ
140
+ - n
141
+ - s
142
+ - ɹ
143
+ - t
144
+ - d
145
+ - ð
146
+ - ɛ
147
+ - z
148
+ - æ
149
+ - w
150
+ - m
151
+ - sil
152
+ - v
153
+ - ɚ
154
+ - ej
155
+ - iː
156
+ - f
157
+ - i
158
+ - ','
159
+ - k
160
+ - ɐ
161
+ - ɑ
162
+ - aj
163
+ - p
164
+ - b
165
+ - ɒ
166
+ - l
167
+ - ow
168
+ - tʲ
169
+ - ʔ
170
+ - tʰ
171
+ - ɫ
172
+ - .
173
+ - ʎ
174
+ - ʉː
175
+ - h
176
+ - ʃ
177
+ - ŋ
178
+ - pʰ
179
+ - ɲ
180
+ - kʰ
181
+ - dʲ
182
+ - ɫ̩
183
+ - ɝ
184
+ - ç
185
+ - tʃ
186
+ - dʒ
187
+ - bʲ
188
+ - aw
189
+ - θ
190
+ - c
191
+ - cʰ
192
+ - mʲ
193
+ - ʉ
194
+ - ɟ
195
+ - ʊ
196
+ - ɡ
197
+ - n̩
198
+ - fʲ
199
+ - ɒː
200
+ - vʲ
201
+ - j
202
+ - pʲ
203
+ - ɑː
204
+ - ɾ
205
+ - ɱ
206
+ - ɔj
207
+ - m̩
208
+ - ʒ
209
+ - ɾʲ
210
+ - '?'
211
+ - '"'
212
+ - '!'
213
+ - t̪
214
+ - d̪
215
+ - ''''
216
+ - <sos/eos>
217
+ odim: null
218
+ model_conf: {}
219
+ use_preprocessor: true
220
+ token_type: phn
221
+ bpemodel: null
222
+ non_linguistic_symbols: null
223
+ cleaner: null
224
+ g2p: null
225
+ feats_extract: fbank
226
+ feats_extract_conf:
227
+ n_fft: 1024
228
+ hop_length: 256
229
+ win_length: null
230
+ fs: 22050
231
+ fmin: 80
232
+ fmax: 7600
233
+ n_mels: 80
234
+ normalize: global_mvn
235
+ normalize_conf:
236
+ stats_file: data/stats/train/feats_stats.npz
237
+ tts: fastspeech2
238
+ tts_conf:
239
+ adim: 384
240
+ aheads: 2
241
+ elayers: 4
242
+ eunits: 1536
243
+ dlayers: 4
244
+ dunits: 1536
245
+ positionwise_layer_type: conv1d
246
+ positionwise_conv_kernel_size: 3
247
+ duration_predictor_layers: 2
248
+ duration_predictor_chans: 256
249
+ duration_predictor_kernel_size: 3
250
+ postnet_layers: 5
251
+ postnet_filts: 5
252
+ postnet_chans: 256
253
+ use_masking: true
254
+ use_scaled_pos_enc: true
255
+ encoder_normalize_before: true
256
+ decoder_normalize_before: true
257
+ reduction_factor: 1
258
+ init_type: xavier_uniform
259
+ init_enc_alpha: 1.0
260
+ init_dec_alpha: 1.0
261
+ transformer_enc_dropout_rate: 0.2
262
+ transformer_enc_positional_dropout_rate: 0.2
263
+ transformer_enc_attn_dropout_rate: 0.2
264
+ transformer_dec_dropout_rate: 0.2
265
+ transformer_dec_positional_dropout_rate: 0.2
266
+ transformer_dec_attn_dropout_rate: 0.2
267
+ pitch_predictor_layers: 5
268
+ pitch_predictor_chans: 256
269
+ pitch_predictor_kernel_size: 5
270
+ pitch_predictor_dropout: 0.5
271
+ pitch_embed_kernel_size: 1
272
+ pitch_embed_dropout: 0.0
273
+ stop_gradient_from_pitch_predictor: true
274
+ energy_predictor_layers: 2
275
+ energy_predictor_chans: 256
276
+ energy_predictor_kernel_size: 3
277
+ energy_predictor_dropout: 0.5
278
+ energy_embed_kernel_size: 1
279
+ energy_embed_dropout: 0.0
280
+ stop_gradient_from_energy_predictor: false
281
+ pitch_extract: dio
282
+ pitch_extract_conf:
283
+ fs: 22050
284
+ n_fft: 1024
285
+ hop_length: 256
286
+ f0max: 400
287
+ f0min: 80
288
+ reduction_factor: 1
289
+ pitch_normalize: global_mvn
290
+ pitch_normalize_conf:
291
+ stats_file: data/stats/train/pitch_stats.npz
292
+ energy_extract: energy
293
+ energy_extract_conf:
294
+ fs: 22050
295
+ n_fft: 1024
296
+ hop_length: 256
297
+ win_length: null
298
+ reduction_factor: 1
299
+ energy_normalize: global_mvn
300
+ energy_normalize_conf:
301
+ stats_file: data/stats/train/energy_stats.npz
302
+ required:
303
+ - output_dir
304
+ - token_list
305
+ version: '202207'
306
+ distributed: false
exp/ljspeech_tts_fastspeech2_mfa/valid.loss.best.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fee2c763c2deb2160e0ec8dc97db85e95145a6d1a569fdcf9df3ceef47ceaa1
3
+ size 148729545