ftshijt commited on
Commit
627190b
·
1 Parent(s): b33aaff

Update model

Browse files
Files changed (39) hide show
  1. README.md +789 -0
  2. exp/universa_train_universa_raw_fs16000/96epoch.pth +3 -0
  3. exp/universa_train_universa_raw_fs16000/config.yaml +714 -0
  4. exp/universa_train_universa_raw_fs16000/images/backward_time.png +0 -0
  5. exp/universa_train_universa_raw_fs16000/images/clip.png +0 -0
  6. exp/universa_train_universa_raw_fs16000/images/dns_overall_l1.png +0 -0
  7. exp/universa_train_universa_raw_fs16000/images/dns_overall_overall.png +0 -0
  8. exp/universa_train_universa_raw_fs16000/images/f0corr_l1.png +0 -0
  9. exp/universa_train_universa_raw_fs16000/images/f0corr_overall.png +0 -0
  10. exp/universa_train_universa_raw_fs16000/images/forward_time.png +0 -0
  11. exp/universa_train_universa_raw_fs16000/images/gpu_max_cached_mem_GB.png +0 -0
  12. exp/universa_train_universa_raw_fs16000/images/grad_norm.png +0 -0
  13. exp/universa_train_universa_raw_fs16000/images/iter_time.png +0 -0
  14. exp/universa_train_universa_raw_fs16000/images/loss.png +0 -0
  15. exp/universa_train_universa_raw_fs16000/images/loss_scale.png +0 -0
  16. exp/universa_train_universa_raw_fs16000/images/mcd_l1.png +0 -0
  17. exp/universa_train_universa_raw_fs16000/images/mcd_overall.png +0 -0
  18. exp/universa_train_universa_raw_fs16000/images/mos_l1.png +0 -0
  19. exp/universa_train_universa_raw_fs16000/images/mos_overall.png +0 -0
  20. exp/universa_train_universa_raw_fs16000/images/optim0_lr0.png +0 -0
  21. exp/universa_train_universa_raw_fs16000/images/optim_step_time.png +0 -0
  22. exp/universa_train_universa_raw_fs16000/images/pesq_l1.png +0 -0
  23. exp/universa_train_universa_raw_fs16000/images/pesq_overall.png +0 -0
  24. exp/universa_train_universa_raw_fs16000/images/sheet_ssqa_l1.png +0 -0
  25. exp/universa_train_universa_raw_fs16000/images/sheet_ssqa_overall.png +0 -0
  26. exp/universa_train_universa_raw_fs16000/images/si_snr_l1.png +0 -0
  27. exp/universa_train_universa_raw_fs16000/images/si_snr_overall.png +0 -0
  28. exp/universa_train_universa_raw_fs16000/images/speech_bert_l1.png +0 -0
  29. exp/universa_train_universa_raw_fs16000/images/speech_bert_overall.png +0 -0
  30. exp/universa_train_universa_raw_fs16000/images/spk_similarity_l1.png +0 -0
  31. exp/universa_train_universa_raw_fs16000/images/spk_similarity_overall.png +0 -0
  32. exp/universa_train_universa_raw_fs16000/images/stoi_l1.png +0 -0
  33. exp/universa_train_universa_raw_fs16000/images/stoi_overall.png +0 -0
  34. exp/universa_train_universa_raw_fs16000/images/train_time.png +0 -0
  35. exp/universa_train_universa_raw_fs16000/images/utmos_l1.png +0 -0
  36. exp/universa_train_universa_raw_fs16000/images/utmos_overall.png +0 -0
  37. exp/universa_train_universa_raw_fs16000/images/wer_l1.png +0 -0
  38. exp/universa_train_universa_raw_fs16000/images/wer_overall.png +0 -0
  39. meta.yaml +8 -0
README.md ADDED
@@ -0,0 +1,789 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - universa
6
+ language: multilingual
7
+ datasets:
8
+ - urgent24
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 universa model
13
+
14
+ ### `espnet/universa-base_urgent24_multi-metric`
15
+
16
+ This model was trained by ftshijt using urgent24 recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ Follow the [ESPnet installation instructions](https://espnet.github.io/espnet/installation.html)
21
+ if you haven't done that already.
22
+
23
+ ```bash
24
+ cd espnet
25
+ git checkout ab8e929b3d605aaf8c766e28c8080a50aeb92312
26
+ pip install -e .
27
+ cd egs2/urgent24/uni_versa1
28
+ ./run.sh --skip_data_prep false --skip_train true --download_model espnet/universa-base_urgent24_multi-metric
29
+ ```
30
+
31
+
32
+
33
+ ## universa config
34
+
35
+ <details><summary>expand</summary>
36
+
37
+ ```
38
+ config: conf/train_universa.yaml
39
+ print_config: false
40
+ log_level: INFO
41
+ drop_last_iter: false
42
+ dry_run: false
43
+ iterator_type: sequence
44
+ valid_iterator_type: null
45
+ output_dir: exp/universa_train_universa_raw_fs16000
46
+ ngpu: 1
47
+ seed: 777
48
+ num_workers: 1
49
+ num_att_plot: 0
50
+ dist_backend: nccl
51
+ dist_init_method: env://
52
+ dist_world_size: null
53
+ dist_rank: null
54
+ local_rank: 0
55
+ dist_master_addr: null
56
+ dist_master_port: null
57
+ dist_launcher: null
58
+ multiprocessing_distributed: false
59
+ unused_parameters: false
60
+ sharded_ddp: false
61
+ use_deepspeed: false
62
+ deepspeed_config: null
63
+ cudnn_enabled: true
64
+ cudnn_benchmark: false
65
+ cudnn_deterministic: false
66
+ use_tf32: false
67
+ collect_stats: false
68
+ write_collected_feats: false
69
+ max_epoch: 100
70
+ patience: null
71
+ val_scheduler_criterion:
72
+ - valid
73
+ - loss
74
+ early_stopping_criterion:
75
+ - valid
76
+ - loss
77
+ - min
78
+ best_model_criterion:
79
+ - - train
80
+ - loss
81
+ - min
82
+ - - valid
83
+ - loss
84
+ - min
85
+ - - train
86
+ - acc
87
+ - max
88
+ - - valid
89
+ - acc
90
+ - max
91
+ keep_nbest_models: 1
92
+ nbest_averaging_interval: 0
93
+ grad_clip: -1
94
+ grad_clip_type: 2.0
95
+ grad_noise: false
96
+ accum_grad: 1
97
+ no_forward_run: false
98
+ resume: true
99
+ train_dtype: float32
100
+ use_amp: false
101
+ log_interval: 50
102
+ use_matplotlib: true
103
+ use_tensorboard: true
104
+ create_graph_in_tensorboard: false
105
+ use_wandb: false
106
+ wandb_project: null
107
+ wandb_id: null
108
+ wandb_entity: null
109
+ wandb_name: null
110
+ wandb_model_log_interval: -1
111
+ detect_anomaly: false
112
+ use_adapter: false
113
+ adapter: lora
114
+ save_strategy: all
115
+ adapter_conf: {}
116
+ pretrain_path: null
117
+ init_param: []
118
+ ignore_init_mismatch: false
119
+ freeze_param: []
120
+ num_iters_per_epoch: null
121
+ batch_size: 16
122
+ valid_batch_size: null
123
+ batch_bins: 1000000
124
+ valid_batch_bins: null
125
+ category_sample_size: 10
126
+ train_shape_file:
127
+ - exp/universa_stats_raw/train/audio_shape
128
+ - exp/universa_stats_raw/train/ref_audio_shape
129
+ - exp/universa_stats_raw/train/ref_text_shape
130
+ valid_shape_file:
131
+ - exp/universa_stats_raw/valid/audio_shape
132
+ - exp/universa_stats_raw/valid/ref_audio_shape
133
+ - exp/universa_stats_raw/valid/ref_text_shape
134
+ batch_type: sorted
135
+ valid_batch_type: null
136
+ fold_length:
137
+ - 256000
138
+ sort_in_batch: descending
139
+ shuffle_within_batch: false
140
+ sort_batch: descending
141
+ multiple_iterator: false
142
+ chunk_length: 500
143
+ chunk_shift_ratio: 0.5
144
+ num_cache_chunks: 1024
145
+ chunk_excluded_key_prefixes: []
146
+ chunk_default_fs: null
147
+ chunk_max_abs_length: null
148
+ chunk_discard_short_samples: true
149
+ train_data_path_and_name_and_type:
150
+ - - dump/raw/train/wav.scp
151
+ - audio
152
+ - sound
153
+ - - dump/raw/train/metric.scp
154
+ - metrics
155
+ - metric
156
+ - - dump/raw/train/ref_wav.scp
157
+ - ref_audio
158
+ - sound
159
+ - - dump/raw/train/text
160
+ - ref_text
161
+ - text
162
+ valid_data_path_and_name_and_type:
163
+ - - dump/raw/dev/wav.scp
164
+ - audio
165
+ - sound
166
+ - - dump/raw/dev/metric.scp
167
+ - metrics
168
+ - metric
169
+ - - dump/raw/dev/ref_wav.scp
170
+ - ref_audio
171
+ - sound
172
+ - - dump/raw/dev/text
173
+ - ref_text
174
+ - text
175
+ multi_task_dataset: false
176
+ allow_variable_data_keys: false
177
+ max_cache_size: 0.0
178
+ max_cache_fd: 32
179
+ allow_multi_rates: false
180
+ valid_max_cache_size: null
181
+ exclude_weight_decay: false
182
+ exclude_weight_decay_conf: {}
183
+ optim: adamw
184
+ optim_conf:
185
+ lr: 0.001
186
+ scheduler: warmuplr
187
+ scheduler_conf:
188
+ warmup_steps: 25000
189
+ metric2id: dump/raw/train/metric2id
190
+ metric2type: null
191
+ metric_pad_value: -100
192
+ token_list:
193
+ - <blank>
194
+ - <unk>
195
+ - s
196
+ - ▁
197
+ - t
198
+ - e
199
+ - ▁the
200
+ - i
201
+ - a
202
+ - o
203
+ - ▁a
204
+ - r
205
+ - ▁to
206
+ - d
207
+ - ▁and
208
+ - ''''
209
+ - m
210
+ - n
211
+ - ing
212
+ - u
213
+ - y
214
+ - p
215
+ - c
216
+ - ▁of
217
+ - l
218
+ - ed
219
+ - ▁I
220
+ - ▁in
221
+ - er
222
+ - re
223
+ - ▁it
224
+ - ▁you
225
+ - ar
226
+ - ▁f
227
+ - ▁is
228
+ - ▁that
229
+ - ','
230
+ - .
231
+ - in
232
+ - al
233
+ - g
234
+ - 'on'
235
+ - ▁b
236
+ - b
237
+ - or
238
+ - ▁c
239
+ - ▁s
240
+ - f
241
+ - h
242
+ - ▁we
243
+ - an
244
+ - en
245
+ - ▁for
246
+ - le
247
+ - ▁p
248
+ - ly
249
+ - es
250
+ - w
251
+ - ▁re
252
+ - ▁on
253
+ - ▁m
254
+ - ▁be
255
+ - ic
256
+ - ll
257
+ - th
258
+ - ▁he
259
+ - k
260
+ - ur
261
+ - ve
262
+ - ▁with
263
+ - ▁so
264
+ - ▁from
265
+ - ▁was
266
+ - v
267
+ - ch
268
+ - st
269
+ - ▁w
270
+ - ▁i
271
+ - ▁this
272
+ - ▁de
273
+ - ▁like
274
+ - ▁do
275
+ - ce
276
+ - at
277
+ - il
278
+ - ck
279
+ - ▁A
280
+ - ▁have
281
+ - ▁not
282
+ - ad
283
+ - ▁st
284
+ - ow
285
+ - ro
286
+ - ne
287
+ - ▁me
288
+ - ▁my
289
+ - ▁but
290
+ - ation
291
+ - ▁at
292
+ - ▁or
293
+ - '-'
294
+ - ter
295
+ - ent
296
+ - ▁B
297
+ - ▁n
298
+ - ▁know
299
+ - ▁t
300
+ - out
301
+ - ▁are
302
+ - nd
303
+ - ▁one
304
+ - ▁li
305
+ - ▁g
306
+ - ▁The
307
+ - ol
308
+ - ion
309
+ - te
310
+ - ▁go
311
+ - ut
312
+ - ▁as
313
+ - ▁just
314
+ - as
315
+ - ▁sh
316
+ - ▁they
317
+ - is
318
+ - ▁C
319
+ - et
320
+ - ▁h
321
+ - ▁an
322
+ - ▁there
323
+ - ▁up
324
+ - ▁S
325
+ - ▁M
326
+ - ▁she
327
+ - ▁by
328
+ - ▁su
329
+ - om
330
+ - ▁can
331
+ - us
332
+ - ▁your
333
+ - ng
334
+ - ▁con
335
+ - el
336
+ - ▁us
337
+ - ment
338
+ - z
339
+ - ▁see
340
+ - ▁ab
341
+ - ▁what
342
+ - ▁out
343
+ - ▁her
344
+ - me
345
+ - ate
346
+ - ▁all
347
+ - ▁th
348
+ - ▁if
349
+ - ▁right
350
+ - ▁his
351
+ - ▁ma
352
+ - ▁lo
353
+ - ▁which
354
+ - ide
355
+ - ▁P
356
+ - ▁more
357
+ - ▁then
358
+ - ul
359
+ - ast
360
+ - x
361
+ - ight
362
+ - ill
363
+ - ▁So
364
+ - ▁sp
365
+ - ▁going
366
+ - ▁some
367
+ - ure
368
+ - ▁their
369
+ - ig
370
+ - ▁no
371
+ - ▁ro
372
+ - ▁think
373
+ - ▁who
374
+ - ▁pro
375
+ - ver
376
+ - ive
377
+ - est
378
+ - ▁co
379
+ - ▁di
380
+ - '0'
381
+ - ist
382
+ - ▁k
383
+ - age
384
+ - ▁d
385
+ - ▁time
386
+ - ▁L
387
+ - ies
388
+ - ▁will
389
+ - ▁man
390
+ - ▁when
391
+ - ▁D
392
+ - les
393
+ - ▁F
394
+ - ▁want
395
+ - ff
396
+ - ity
397
+ - ▁un
398
+ - '?'
399
+ - ▁start
400
+ - ▁G
401
+ - ▁uh
402
+ - ▁get
403
+ - ok
404
+ - ▁take
405
+ - ▁po
406
+ - li
407
+ - ▁ho
408
+ - ▁way
409
+ - ▁don
410
+ - ▁yeah
411
+ - ▁really
412
+ - ▁say
413
+ - ▁look
414
+ - ▁good
415
+ - ▁ra
416
+ - ▁pr
417
+ - ▁had
418
+ - ttle
419
+ - ▁comp
420
+ - ort
421
+ - ish
422
+ - ▁ex
423
+ - ally
424
+ - ▁sa
425
+ - ▁how
426
+ - end
427
+ - ant
428
+ - ▁O
429
+ - ▁um
430
+ - way
431
+ - ance
432
+ - ▁other
433
+ - ▁two
434
+ - ine
435
+ - ever
436
+ - able
437
+ - ▁com
438
+ - other
439
+ - ▁first
440
+ - ▁back
441
+ - ▁al
442
+ - ers
443
+ - ions
444
+ - ▁now
445
+ - ▁off
446
+ - ning
447
+ - ▁down
448
+ - ▁has
449
+ - ▁than
450
+ - ▁car
451
+ - ▁Th
452
+ - very
453
+ - ice
454
+ - ▁dr
455
+ - ▁been
456
+ - ▁him
457
+ - ▁here
458
+ - ated
459
+ - '5'
460
+ - ▁hand
461
+ - ▁day
462
+ - ▁hear
463
+ - each
464
+ - ▁would
465
+ - ▁over
466
+ - ▁oh
467
+ - ▁cha
468
+ - ood
469
+ - ▁did
470
+ - ugh
471
+ - ▁per
472
+ - ▁let
473
+ - ▁str
474
+ - ▁tra
475
+ - ▁got
476
+ - ext
477
+ - '1'
478
+ - ▁We
479
+ - ▁Shields
480
+ - ▁come
481
+ - ▁should
482
+ - ▁could
483
+ - light
484
+ - '2'
485
+ - ▁people
486
+ - ▁again
487
+ - ▁year
488
+ - ▁app
489
+ - ▁into
490
+ - ▁any
491
+ - ▁N
492
+ - ▁mean
493
+ - ▁o
494
+ - ▁mus
495
+ - ▁lot
496
+ - ▁said
497
+ - ▁long
498
+ - ▁these
499
+ - ▁lea
500
+ - sh
501
+ - ▁vi
502
+ - ▁part
503
+ - ▁every
504
+ - ▁our
505
+ - ▁You
506
+ - ious
507
+ - ▁fight
508
+ - ▁Ch
509
+ - ark
510
+ - ▁may
511
+ - ▁Hammer
512
+ - ▁because
513
+ - ▁most
514
+ - ▁came
515
+ - ▁four
516
+ - ful
517
+ - ▁No
518
+ - ize
519
+ - ▁where
520
+ - ▁okay
521
+ - ▁much
522
+ - ▁ask
523
+ - ▁through
524
+ - ▁before
525
+ - ▁work
526
+ - ▁even
527
+ - ▁three
528
+ - mber
529
+ - ▁win
530
+ - ▁flight
531
+ - ake
532
+ - K
533
+ - ▁place
534
+ - ▁play
535
+ - ▁though
536
+ - ▁pound
537
+ - ▁bit
538
+ - land
539
+ - ▁va
540
+ - ▁talk
541
+ - ▁kind
542
+ - ▁Line
543
+ - ▁make
544
+ - hap
545
+ - ▁big
546
+ - ▁leav
547
+ - ▁something
548
+ - ▁game
549
+ - ▁under
550
+ - ▁feel
551
+ - self
552
+ - ▁give
553
+ - ▁includ
554
+ - U
555
+ - ▁twenty
556
+ - ▁guard
557
+ - ▁left
558
+ - ▁round
559
+ - ▁great
560
+ - body
561
+ - ▁gra
562
+ - ress
563
+ - lso
564
+ - '3'
565
+ - ▁everything
566
+ - ▁those
567
+ - ▁after
568
+ - ▁tell
569
+ - ▁need
570
+ - ▁yes
571
+ - qua
572
+ - ham
573
+ - ▁minutes
574
+ - ▁question
575
+ - ▁around
576
+ - ▁punch
577
+ - ▁course
578
+ - ▁gonna
579
+ - ▁person
580
+ - ▁move
581
+ - ▁plan
582
+ - ▁ear
583
+ - ept
584
+ - ▁Airport
585
+ - ▁Okay
586
+ - ▁found
587
+ - ▁seven
588
+ - ▁help
589
+ - que
590
+ - ▁qui
591
+ - ▁keep
592
+ - ▁guys
593
+ - ▁house
594
+ - ▁run
595
+ - ▁turn
596
+ - ▁better
597
+ - ▁stop
598
+ - ward
599
+ - ddle
600
+ - ▁second
601
+ - ground
602
+ - ▁world
603
+ - ▁high
604
+ - ▁point
605
+ - ▁hold
606
+ - ▁call
607
+ - '6'
608
+ - ▁actually
609
+ - ▁probably
610
+ - ▁heaven
611
+ - ▁speci
612
+ - ▁everyone
613
+ - ▁why
614
+ - ▁presen
615
+ - ▁thir
616
+ - lright
617
+ - ▁eye
618
+ - eath
619
+ - ▁Tak
620
+ - '!'
621
+ - '"'
622
+ - '4'
623
+ - ▁hundred
624
+ - ▁answer
625
+ - ▁small
626
+ - ▁wait
627
+ - ▁nothing
628
+ - q
629
+ - '8'
630
+ - V
631
+ - ▁countr
632
+ - ▁problem
633
+ - ▁continu
634
+ - ▁close
635
+ - ▁priva
636
+ - ▁20
637
+ - ▁pleas
638
+ - ▁walk
639
+ - ▁open
640
+ - ▁lay
641
+ - ▁Station
642
+ - ▁moment
643
+ - ▁Yeah
644
+ - ▁public
645
+ - possibl
646
+ - ▁happen
647
+ - together
648
+ - ▁while
649
+ - asically
650
+ - ▁money
651
+ - ▁wrong
652
+ - B
653
+ - ▁puzzle
654
+ - '7'
655
+ - ▁journ
656
+ - ▁rainbow
657
+ - ▁thousand
658
+ - I
659
+ - '9'
660
+ - S
661
+ - P
662
+ - '%'
663
+ - A
664
+ - D
665
+ - L
666
+ - F
667
+ - ’
668
+ - O
669
+ - G
670
+ - N
671
+ - á
672
+ - C
673
+ - $
674
+ - Z
675
+ - Y
676
+ - R
677
+ - E
678
+ - J
679
+ - W
680
+ - M
681
+ - H
682
+ - j
683
+ - –
684
+ - ;
685
+ - Q
686
+ - X
687
+ - ']'
688
+ - −
689
+ - '&'
690
+ - T
691
+ - '['
692
+ - <sos/eos>
693
+ init: xavier_uniform
694
+ model_conf: {}
695
+ use_ref_audio: true
696
+ use_ref_text: true
697
+ use_preprocessor: true
698
+ token_type: bpe
699
+ bpemodel: data/token_list/bpe_unigram500/bpe.model
700
+ non_linguistic_symbols: null
701
+ cleaner: null
702
+ g2p: null
703
+ frontend: default
704
+ frontend_conf: {}
705
+ universa: base
706
+ universa_conf:
707
+ embedding_dim: 256
708
+ audio_encoder_type: transformer
709
+ audio_encoder_params:
710
+ num_blocks: 4
711
+ attention_heads: 4
712
+ linear_units: 1024
713
+ dropout_rate: 0.1
714
+ positional_dropout_rate: 0.1
715
+ attention_dropout_rate: 0.1
716
+ input_layer: conv2d
717
+ normalize_before: true
718
+ concat_after: false
719
+ positionwise_layer_type: linear
720
+ positionwise_conv_kernel_size: 1
721
+ layer_drop_rate: 0.1
722
+ qk_norm: false
723
+ use_flash_attn: false
724
+ text_encoder_type: transformer
725
+ text_encoder_params:
726
+ num_blocks: 4
727
+ attention_heads: 4
728
+ linear_units: 1024
729
+ dropout_rate: 0.1
730
+ positional_dropout_rate: 0.1
731
+ attention_dropout_rate: 0.1
732
+ input_layer: linear
733
+ normalize_before: true
734
+ concat_after: false
735
+ positionwise_layer_type: linear
736
+ positionwise_conv_kernel_size: 1
737
+ layer_drop_rate: 0.1
738
+ qk_norm: false
739
+ use_flash_attn: false
740
+ cross_attention_type: multihead
741
+ cross_attention_params:
742
+ n_head: 4
743
+ dropout_rate: 0.1
744
+ pooling_type: mean
745
+ projector_type: linear
746
+ multi_branch: true
747
+ required:
748
+ - output_dir
749
+ - metric2id
750
+ version: '202409'
751
+ distributed: false
752
+ ```
753
+
754
+ </details>
755
+
756
+
757
+
758
+ ### Citing ESPnet
759
+
760
+ ```BibTex
761
+ @inproceedings{watanabe2018espnet,
762
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
763
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
764
+ year={2018},
765
+ booktitle={Proceedings of Interspeech},
766
+ pages={2207--2211},
767
+ doi={10.21437/Interspeech.2018-1456},
768
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
769
+ }
770
+
771
+
772
+
773
+
774
+
775
+
776
+ ```
777
+
778
+ or arXiv:
779
+
780
+ ```bibtex
781
+ @misc{watanabe2018espnet,
782
+ title={ESPnet: End-to-End Speech Processing Toolkit},
783
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
784
+ year={2018},
785
+ eprint={1804.00015},
786
+ archivePrefix={arXiv},
787
+ primaryClass={cs.CL}
788
+ }
789
+ ```
exp/universa_train_universa_raw_fs16000/96epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:978fe21afaaa7903c7d12a858b8f7347eba73e87510b00fb5883de94b01621fc
3
+ size 166255490
exp/universa_train_universa_raw_fs16000/config.yaml ADDED
@@ -0,0 +1,714 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/train_universa.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ drop_last_iter: false
5
+ dry_run: false
6
+ iterator_type: sequence
7
+ valid_iterator_type: null
8
+ output_dir: exp/universa_train_universa_raw_fs16000
9
+ ngpu: 1
10
+ seed: 777
11
+ num_workers: 1
12
+ num_att_plot: 0
13
+ dist_backend: nccl
14
+ dist_init_method: env://
15
+ dist_world_size: null
16
+ dist_rank: null
17
+ local_rank: 0
18
+ dist_master_addr: null
19
+ dist_master_port: null
20
+ dist_launcher: null
21
+ multiprocessing_distributed: false
22
+ unused_parameters: false
23
+ sharded_ddp: false
24
+ use_deepspeed: false
25
+ deepspeed_config: null
26
+ cudnn_enabled: true
27
+ cudnn_benchmark: false
28
+ cudnn_deterministic: false
29
+ use_tf32: false
30
+ collect_stats: false
31
+ write_collected_feats: false
32
+ max_epoch: 100
33
+ patience: null
34
+ val_scheduler_criterion:
35
+ - valid
36
+ - loss
37
+ early_stopping_criterion:
38
+ - valid
39
+ - loss
40
+ - min
41
+ best_model_criterion:
42
+ - - train
43
+ - loss
44
+ - min
45
+ - - valid
46
+ - loss
47
+ - min
48
+ - - train
49
+ - acc
50
+ - max
51
+ - - valid
52
+ - acc
53
+ - max
54
+ keep_nbest_models: 1
55
+ nbest_averaging_interval: 0
56
+ grad_clip: -1
57
+ grad_clip_type: 2.0
58
+ grad_noise: false
59
+ accum_grad: 1
60
+ no_forward_run: false
61
+ resume: true
62
+ train_dtype: float32
63
+ use_amp: false
64
+ log_interval: 50
65
+ use_matplotlib: true
66
+ use_tensorboard: true
67
+ create_graph_in_tensorboard: false
68
+ use_wandb: false
69
+ wandb_project: null
70
+ wandb_id: null
71
+ wandb_entity: null
72
+ wandb_name: null
73
+ wandb_model_log_interval: -1
74
+ detect_anomaly: false
75
+ use_adapter: false
76
+ adapter: lora
77
+ save_strategy: all
78
+ adapter_conf: {}
79
+ pretrain_path: null
80
+ init_param: []
81
+ ignore_init_mismatch: false
82
+ freeze_param: []
83
+ num_iters_per_epoch: null
84
+ batch_size: 16
85
+ valid_batch_size: null
86
+ batch_bins: 1000000
87
+ valid_batch_bins: null
88
+ category_sample_size: 10
89
+ train_shape_file:
90
+ - exp/universa_stats_raw/train/audio_shape
91
+ - exp/universa_stats_raw/train/ref_audio_shape
92
+ - exp/universa_stats_raw/train/ref_text_shape
93
+ valid_shape_file:
94
+ - exp/universa_stats_raw/valid/audio_shape
95
+ - exp/universa_stats_raw/valid/ref_audio_shape
96
+ - exp/universa_stats_raw/valid/ref_text_shape
97
+ batch_type: sorted
98
+ valid_batch_type: null
99
+ fold_length:
100
+ - 256000
101
+ sort_in_batch: descending
102
+ shuffle_within_batch: false
103
+ sort_batch: descending
104
+ multiple_iterator: false
105
+ chunk_length: 500
106
+ chunk_shift_ratio: 0.5
107
+ num_cache_chunks: 1024
108
+ chunk_excluded_key_prefixes: []
109
+ chunk_default_fs: null
110
+ chunk_max_abs_length: null
111
+ chunk_discard_short_samples: true
112
+ train_data_path_and_name_and_type:
113
+ - - dump/raw/train/wav.scp
114
+ - audio
115
+ - sound
116
+ - - dump/raw/train/metric.scp
117
+ - metrics
118
+ - metric
119
+ - - dump/raw/train/ref_wav.scp
120
+ - ref_audio
121
+ - sound
122
+ - - dump/raw/train/text
123
+ - ref_text
124
+ - text
125
+ valid_data_path_and_name_and_type:
126
+ - - dump/raw/dev/wav.scp
127
+ - audio
128
+ - sound
129
+ - - dump/raw/dev/metric.scp
130
+ - metrics
131
+ - metric
132
+ - - dump/raw/dev/ref_wav.scp
133
+ - ref_audio
134
+ - sound
135
+ - - dump/raw/dev/text
136
+ - ref_text
137
+ - text
138
+ multi_task_dataset: false
139
+ allow_variable_data_keys: false
140
+ max_cache_size: 0.0
141
+ max_cache_fd: 32
142
+ allow_multi_rates: false
143
+ valid_max_cache_size: null
144
+ exclude_weight_decay: false
145
+ exclude_weight_decay_conf: {}
146
+ optim: adamw
147
+ optim_conf:
148
+ lr: 0.001
149
+ scheduler: warmuplr
150
+ scheduler_conf:
151
+ warmup_steps: 25000
152
+ metric2id: dump/raw/train/metric2id
153
+ metric2type: null
154
+ metric_pad_value: -100
155
+ token_list:
156
+ - <blank>
157
+ - <unk>
158
+ - s
159
+ - ▁
160
+ - t
161
+ - e
162
+ - ▁the
163
+ - i
164
+ - a
165
+ - o
166
+ - ▁a
167
+ - r
168
+ - ▁to
169
+ - d
170
+ - ▁and
171
+ - ''''
172
+ - m
173
+ - n
174
+ - ing
175
+ - u
176
+ - y
177
+ - p
178
+ - c
179
+ - ▁of
180
+ - l
181
+ - ed
182
+ - ▁I
183
+ - ▁in
184
+ - er
185
+ - re
186
+ - ▁it
187
+ - ▁you
188
+ - ar
189
+ - ▁f
190
+ - ▁is
191
+ - ▁that
192
+ - ','
193
+ - .
194
+ - in
195
+ - al
196
+ - g
197
+ - 'on'
198
+ - ▁b
199
+ - b
200
+ - or
201
+ - ▁c
202
+ - ▁s
203
+ - f
204
+ - h
205
+ - ▁we
206
+ - an
207
+ - en
208
+ - ▁for
209
+ - le
210
+ - ▁p
211
+ - ly
212
+ - es
213
+ - w
214
+ - ▁re
215
+ - ▁on
216
+ - ▁m
217
+ - ▁be
218
+ - ic
219
+ - ll
220
+ - th
221
+ - ▁he
222
+ - k
223
+ - ur
224
+ - ve
225
+ - ▁with
226
+ - ▁so
227
+ - ▁from
228
+ - ▁was
229
+ - v
230
+ - ch
231
+ - st
232
+ - ▁w
233
+ - ▁i
234
+ - ▁this
235
+ - ▁de
236
+ - ▁like
237
+ - ▁do
238
+ - ce
239
+ - at
240
+ - il
241
+ - ck
242
+ - ▁A
243
+ - ▁have
244
+ - ▁not
245
+ - ad
246
+ - ▁st
247
+ - ow
248
+ - ro
249
+ - ne
250
+ - ▁me
251
+ - ▁my
252
+ - ▁but
253
+ - ation
254
+ - ▁at
255
+ - ▁or
256
+ - '-'
257
+ - ter
258
+ - ent
259
+ - ▁B
260
+ - ▁n
261
+ - ▁know
262
+ - ▁t
263
+ - out
264
+ - ▁are
265
+ - nd
266
+ - ▁one
267
+ - ▁li
268
+ - ▁g
269
+ - ▁The
270
+ - ol
271
+ - ion
272
+ - te
273
+ - ▁go
274
+ - ut
275
+ - ▁as
276
+ - ▁just
277
+ - as
278
+ - ▁sh
279
+ - ▁they
280
+ - is
281
+ - ▁C
282
+ - et
283
+ - ▁h
284
+ - ▁an
285
+ - ▁there
286
+ - ▁up
287
+ - ▁S
288
+ - ▁M
289
+ - ▁she
290
+ - ▁by
291
+ - ▁su
292
+ - om
293
+ - ▁can
294
+ - us
295
+ - ▁your
296
+ - ng
297
+ - ▁con
298
+ - el
299
+ - ▁us
300
+ - ment
301
+ - z
302
+ - ▁see
303
+ - ▁ab
304
+ - ▁what
305
+ - ▁out
306
+ - ▁her
307
+ - me
308
+ - ate
309
+ - ▁all
310
+ - ▁th
311
+ - ▁if
312
+ - ▁right
313
+ - ▁his
314
+ - ▁ma
315
+ - ▁lo
316
+ - ▁which
317
+ - ide
318
+ - ▁P
319
+ - ▁more
320
+ - ▁then
321
+ - ul
322
+ - ast
323
+ - x
324
+ - ight
325
+ - ill
326
+ - ▁So
327
+ - ▁sp
328
+ - ▁going
329
+ - ▁some
330
+ - ure
331
+ - ▁their
332
+ - ig
333
+ - ▁no
334
+ - ▁ro
335
+ - ▁think
336
+ - ▁who
337
+ - ▁pro
338
+ - ver
339
+ - ive
340
+ - est
341
+ - ▁co
342
+ - ▁di
343
+ - '0'
344
+ - ist
345
+ - ▁k
346
+ - age
347
+ - ▁d
348
+ - ▁time
349
+ - ▁L
350
+ - ies
351
+ - ▁will
352
+ - ▁man
353
+ - ▁when
354
+ - ▁D
355
+ - les
356
+ - ▁F
357
+ - ▁want
358
+ - ff
359
+ - ity
360
+ - ▁un
361
+ - '?'
362
+ - ▁start
363
+ - ▁G
364
+ - ▁uh
365
+ - ▁get
366
+ - ok
367
+ - ▁take
368
+ - ▁po
369
+ - li
370
+ - ▁ho
371
+ - ▁way
372
+ - ▁don
373
+ - ▁yeah
374
+ - ▁really
375
+ - ▁say
376
+ - ▁look
377
+ - ▁good
378
+ - ▁ra
379
+ - ▁pr
380
+ - ▁had
381
+ - ttle
382
+ - ▁comp
383
+ - ort
384
+ - ish
385
+ - ▁ex
386
+ - ally
387
+ - ▁sa
388
+ - ▁how
389
+ - end
390
+ - ant
391
+ - ▁O
392
+ - ▁um
393
+ - way
394
+ - ance
395
+ - ▁other
396
+ - ▁two
397
+ - ine
398
+ - ever
399
+ - able
400
+ - ▁com
401
+ - other
402
+ - ▁first
403
+ - ▁back
404
+ - ▁al
405
+ - ers
406
+ - ions
407
+ - ▁now
408
+ - ▁off
409
+ - ning
410
+ - ▁down
411
+ - ▁has
412
+ - ▁than
413
+ - ▁car
414
+ - ▁Th
415
+ - very
416
+ - ice
417
+ - ▁dr
418
+ - ▁been
419
+ - ▁him
420
+ - ▁here
421
+ - ated
422
+ - '5'
423
+ - ▁hand
424
+ - ▁day
425
+ - ▁hear
426
+ - each
427
+ - ▁would
428
+ - ▁over
429
+ - ▁oh
430
+ - ▁cha
431
+ - ood
432
+ - ▁did
433
+ - ugh
434
+ - ▁per
435
+ - ▁let
436
+ - ▁str
437
+ - ▁tra
438
+ - ▁got
439
+ - ext
440
+ - '1'
441
+ - ▁We
442
+ - ▁Shields
443
+ - ▁come
444
+ - ▁should
445
+ - ▁could
446
+ - light
447
+ - '2'
448
+ - ▁people
449
+ - ▁again
450
+ - ▁year
451
+ - ▁app
452
+ - ▁into
453
+ - ▁any
454
+ - ▁N
455
+ - ▁mean
456
+ - ▁o
457
+ - ▁mus
458
+ - ▁lot
459
+ - ▁said
460
+ - ▁long
461
+ - ▁these
462
+ - ▁lea
463
+ - sh
464
+ - ▁vi
465
+ - ▁part
466
+ - ▁every
467
+ - ▁our
468
+ - ▁You
469
+ - ious
470
+ - ▁fight
471
+ - ▁Ch
472
+ - ark
473
+ - ▁may
474
+ - ▁Hammer
475
+ - ▁because
476
+ - ▁most
477
+ - ▁came
478
+ - ▁four
479
+ - ful
480
+ - ▁No
481
+ - ize
482
+ - ▁where
483
+ - ▁okay
484
+ - ▁much
485
+ - ▁ask
486
+ - ▁through
487
+ - ▁before
488
+ - ▁work
489
+ - ▁even
490
+ - ▁three
491
+ - mber
492
+ - ▁win
493
+ - ▁flight
494
+ - ake
495
+ - K
496
+ - ▁place
497
+ - ▁play
498
+ - ▁though
499
+ - ▁pound
500
+ - ▁bit
501
+ - land
502
+ - ▁va
503
+ - ▁talk
504
+ - ▁kind
505
+ - ▁Line
506
+ - ▁make
507
+ - hap
508
+ - ▁big
509
+ - ▁leav
510
+ - ▁something
511
+ - ▁game
512
+ - ▁under
513
+ - ▁feel
514
+ - self
515
+ - ▁give
516
+ - ▁includ
517
+ - U
518
+ - ▁twenty
519
+ - ▁guard
520
+ - ▁left
521
+ - ▁round
522
+ - ▁great
523
+ - body
524
+ - ▁gra
525
+ - ress
526
+ - lso
527
+ - '3'
528
+ - ▁everything
529
+ - ▁those
530
+ - ▁after
531
+ - ▁tell
532
+ - ▁need
533
+ - ▁yes
534
+ - qua
535
+ - ham
536
+ - ▁minutes
537
+ - ▁question
538
+ - ▁around
539
+ - ▁punch
540
+ - ▁course
541
+ - ▁gonna
542
+ - ▁person
543
+ - ▁move
544
+ - ▁plan
545
+ - ▁ear
546
+ - ept
547
+ - ▁Airport
548
+ - ▁Okay
549
+ - ▁found
550
+ - ▁seven
551
+ - ▁help
552
+ - que
553
+ - ▁qui
554
+ - ▁keep
555
+ - ▁guys
556
+ - ▁house
557
+ - ▁run
558
+ - ▁turn
559
+ - ▁better
560
+ - ▁stop
561
+ - ward
562
+ - ddle
563
+ - ▁second
564
+ - ground
565
+ - ▁world
566
+ - ▁high
567
+ - ▁point
568
+ - ▁hold
569
+ - ▁call
570
+ - '6'
571
+ - ▁actually
572
+ - ▁probably
573
+ - ▁heaven
574
+ - ▁speci
575
+ - ▁everyone
576
+ - ▁why
577
+ - ▁presen
578
+ - ▁thir
579
+ - lright
580
+ - ▁eye
581
+ - eath
582
+ - ▁Tak
583
+ - '!'
584
+ - '"'
585
+ - '4'
586
+ - ▁hundred
587
+ - ▁answer
588
+ - ▁small
589
+ - ▁wait
590
+ - ▁nothing
591
+ - q
592
+ - '8'
593
+ - V
594
+ - ▁countr
595
+ - ▁problem
596
+ - ▁continu
597
+ - ▁close
598
+ - ▁priva
599
+ - ▁20
600
+ - ▁pleas
601
+ - ▁walk
602
+ - ▁open
603
+ - ▁lay
604
+ - ▁Station
605
+ - ▁moment
606
+ - ▁Yeah
607
+ - ▁public
608
+ - possibl
609
+ - ▁happen
610
+ - together
611
+ - ▁while
612
+ - asically
613
+ - ▁money
614
+ - ▁wrong
615
+ - B
616
+ - ▁puzzle
617
+ - '7'
618
+ - ▁journ
619
+ - ▁rainbow
620
+ - ▁thousand
621
+ - I
622
+ - '9'
623
+ - S
624
+ - P
625
+ - '%'
626
+ - A
627
+ - D
628
+ - L
629
+ - F
630
+ - ’
631
+ - O
632
+ - G
633
+ - N
634
+ - á
635
+ - C
636
+ - $
637
+ - Z
638
+ - Y
639
+ - R
640
+ - E
641
+ - J
642
+ - W
643
+ - M
644
+ - H
645
+ - j
646
+ - –
647
+ - ;
648
+ - Q
649
+ - X
650
+ - ']'
651
+ - −
652
+ - '&'
653
+ - T
654
+ - '['
655
+ - <sos/eos>
656
+ init: xavier_uniform
657
+ model_conf: {}
658
+ use_ref_audio: true
659
+ use_ref_text: true
660
+ use_preprocessor: true
661
+ token_type: bpe
662
+ bpemodel: data/token_list/bpe_unigram500/bpe.model
663
+ non_linguistic_symbols: null
664
+ cleaner: null
665
+ g2p: null
666
+ frontend: default
667
+ frontend_conf: {}
668
+ universa: base
669
+ universa_conf:
670
+ embedding_dim: 256
671
+ audio_encoder_type: transformer
672
+ audio_encoder_params:
673
+ num_blocks: 4
674
+ attention_heads: 4
675
+ linear_units: 1024
676
+ dropout_rate: 0.1
677
+ positional_dropout_rate: 0.1
678
+ attention_dropout_rate: 0.1
679
+ input_layer: conv2d
680
+ normalize_before: true
681
+ concat_after: false
682
+ positionwise_layer_type: linear
683
+ positionwise_conv_kernel_size: 1
684
+ layer_drop_rate: 0.1
685
+ qk_norm: false
686
+ use_flash_attn: false
687
+ text_encoder_type: transformer
688
+ text_encoder_params:
689
+ num_blocks: 4
690
+ attention_heads: 4
691
+ linear_units: 1024
692
+ dropout_rate: 0.1
693
+ positional_dropout_rate: 0.1
694
+ attention_dropout_rate: 0.1
695
+ input_layer: linear
696
+ normalize_before: true
697
+ concat_after: false
698
+ positionwise_layer_type: linear
699
+ positionwise_conv_kernel_size: 1
700
+ layer_drop_rate: 0.1
701
+ qk_norm: false
702
+ use_flash_attn: false
703
+ cross_attention_type: multihead
704
+ cross_attention_params:
705
+ n_head: 4
706
+ dropout_rate: 0.1
707
+ pooling_type: mean
708
+ projector_type: linear
709
+ multi_branch: true
710
+ required:
711
+ - output_dir
712
+ - metric2id
713
+ version: '202409'
714
+ distributed: false
exp/universa_train_universa_raw_fs16000/images/backward_time.png ADDED
exp/universa_train_universa_raw_fs16000/images/clip.png ADDED
exp/universa_train_universa_raw_fs16000/images/dns_overall_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/dns_overall_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/f0corr_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/f0corr_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/forward_time.png ADDED
exp/universa_train_universa_raw_fs16000/images/gpu_max_cached_mem_GB.png ADDED
exp/universa_train_universa_raw_fs16000/images/grad_norm.png ADDED
exp/universa_train_universa_raw_fs16000/images/iter_time.png ADDED
exp/universa_train_universa_raw_fs16000/images/loss.png ADDED
exp/universa_train_universa_raw_fs16000/images/loss_scale.png ADDED
exp/universa_train_universa_raw_fs16000/images/mcd_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/mcd_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/mos_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/mos_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/optim0_lr0.png ADDED
exp/universa_train_universa_raw_fs16000/images/optim_step_time.png ADDED
exp/universa_train_universa_raw_fs16000/images/pesq_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/pesq_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/sheet_ssqa_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/sheet_ssqa_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/si_snr_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/si_snr_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/speech_bert_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/speech_bert_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/spk_similarity_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/spk_similarity_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/stoi_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/stoi_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/train_time.png ADDED
exp/universa_train_universa_raw_fs16000/images/utmos_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/utmos_overall.png ADDED
exp/universa_train_universa_raw_fs16000/images/wer_l1.png ADDED
exp/universa_train_universa_raw_fs16000/images/wer_overall.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202409'
2
+ files:
3
+ model_file: exp/universa_train_universa_raw_fs16000/96epoch.pth
4
+ python: 3.10.15 | packaged by conda-forge | (main, Oct 16 2024, 01:24:24) [GCC 13.3.0]
5
+ timestamp: 1737373782.018871
6
+ torch: 2.5.1+cu124
7
+ yaml_files:
8
+ train_config: exp/universa_train_universa_raw_fs16000/config.yaml