YushiUeda commited on
Commit
5b22d8d
·
1 Parent(s): 5d7ec8c

Update model

Browse files
README.md ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - espnet
4
+ - audio
5
+ - diarization
6
+ language: noinfo
7
+ datasets:
8
+ - callhome
9
+ license: cc-by-4.0
10
+ ---
11
+
12
+ ## ESPnet2 DIAR model
13
+
14
+ ### `YushiUeda/callhome_adapt_real`
15
+
16
+ This model was trained by YushiUeda using callhome recipe in [espnet](https://github.com/espnet/espnet/).
17
+
18
+ ### Demo: How to use in ESPnet2
19
+
20
+ ```bash
21
+ cd espnet
22
+ git checkout 0cabe65afd362122e77b04e2e967986a91de0fd8
23
+ pip install -e .
24
+ cd egs2/callhome/diar1
25
+ ./run.sh --skip_data_prep false --skip_train true --download_model YushiUeda/callhome_adapt_real
26
+ ```
27
+
28
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
29
+ # RESULTS
30
+ ## Environments
31
+ - date: `Mon Jun 20 10:30:23 EDT 2022`
32
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
33
+ - espnet version: `espnet 202205`
34
+ - pytorch version: `pytorch 1.9.1+cu102`
35
+ - Git hash: `fc62b1ce3e50c5ef8a2ac8cedb0d92ac41df54ca`
36
+ - Commit date: `Thu Jun 9 16:29:52 2022 +0900`
37
+
38
+ ## diar_train_diar_eda_adapt_real_lr0001
39
+ ### DER
40
+ diarized_callhome2_spkall
41
+ |threshold_median_collar|DER|
42
+ |---|---|
43
+ |result_th0.3_med11_collar0.25|22.29|
44
+ |result_th0.3_med1_collar0.25|23.27|
45
+ |result_th0.4_med11_collar0.25|19.85|
46
+ |result_th0.4_med1_collar0.25|20.80|
47
+ |result_th0.5_med11_collar0.25|19.26|
48
+ |result_th0.5_med1_collar0.25|20.18|
49
+ |result_th0.6_med11_collar0.25|20.24|
50
+ |result_th0.6_med1_collar0.25|21.08|
51
+ |result_th0.7_med11_collar0.25|22.38|
52
+ |result_th0.7_med1_collar0.25|23.17|
53
+
54
+ ## DIAR config
55
+
56
+ <details><summary>expand</summary>
57
+
58
+ ```
59
+ config: conf/tuning/train_diar_eda_adapt.yaml
60
+ print_config: false
61
+ log_level: INFO
62
+ dry_run: false
63
+ iterator_type: sequence
64
+ output_dir: exp/diar_train_diar_eda_adapt_real_lr0001
65
+ ngpu: 1
66
+ seed: 0
67
+ num_workers: 1
68
+ num_att_plot: 3
69
+ dist_backend: nccl
70
+ dist_init_method: env://
71
+ dist_world_size: null
72
+ dist_rank: null
73
+ local_rank: 0
74
+ dist_master_addr: null
75
+ dist_master_port: null
76
+ dist_launcher: null
77
+ multiprocessing_distributed: false
78
+ unused_parameters: false
79
+ sharded_ddp: false
80
+ cudnn_enabled: true
81
+ cudnn_benchmark: false
82
+ cudnn_deterministic: true
83
+ collect_stats: false
84
+ write_collected_feats: false
85
+ max_epoch: 50
86
+ patience: null
87
+ val_scheduler_criterion:
88
+ - valid
89
+ - loss
90
+ early_stopping_criterion:
91
+ - valid
92
+ - loss
93
+ - min
94
+ best_model_criterion:
95
+ - - valid
96
+ - acc
97
+ - max
98
+ - - train
99
+ - acc
100
+ - max
101
+ keep_nbest_models: 10
102
+ nbest_averaging_interval: 0
103
+ grad_clip: 5
104
+ grad_clip_type: 2.0
105
+ grad_noise: false
106
+ accum_grad: 16
107
+ no_forward_run: false
108
+ resume: true
109
+ train_dtype: float32
110
+ use_amp: false
111
+ log_interval: null
112
+ use_matplotlib: true
113
+ use_tensorboard: true
114
+ use_wandb: false
115
+ wandb_project: null
116
+ wandb_id: null
117
+ wandb_entity: null
118
+ wandb_name: null
119
+ wandb_model_log_interval: -1
120
+ detect_anomaly: false
121
+ pretrain_path: null
122
+ init_param:
123
+ - exp/diar_train_diar_eda_adapt_simu/latest.pth
124
+ ignore_init_mismatch: false
125
+ freeze_param: []
126
+ num_iters_per_epoch: null
127
+ batch_size: 1
128
+ valid_batch_size: null
129
+ batch_bins: 1000000
130
+ valid_batch_bins: null
131
+ train_shape_file:
132
+ - exp/diar_stats_8k/train/speech_shape
133
+ - exp/diar_stats_8k/train/spk_labels_shape
134
+ valid_shape_file:
135
+ - exp/diar_stats_8k/valid/speech_shape
136
+ - exp/diar_stats_8k/valid/spk_labels_shape
137
+ batch_type: sorted
138
+ valid_batch_type: null
139
+ fold_length:
140
+ - 80000
141
+ - 800
142
+ sort_in_batch: descending
143
+ sort_batch: descending
144
+ multiple_iterator: false
145
+ chunk_length: 500
146
+ chunk_shift_ratio: 0.5
147
+ num_cache_chunks: 1024
148
+ train_data_path_and_name_and_type:
149
+ - - dump/raw/callhome1_spkall/wav.scp
150
+ - speech
151
+ - sound
152
+ - - dump/raw/callhome1_spkall/espnet_rttm
153
+ - spk_labels
154
+ - rttm
155
+ valid_data_path_and_name_and_type:
156
+ - - dump/raw/callhome2_spkall/wav.scp
157
+ - speech
158
+ - sound
159
+ - - dump/raw/callhome2_spkall/espnet_rttm
160
+ - spk_labels
161
+ - rttm
162
+ allow_variable_data_keys: false
163
+ max_cache_size: 0.0
164
+ max_cache_fd: 32
165
+ valid_max_cache_size: null
166
+ optim: adam
167
+ optim_conf:
168
+ lr: 0.001
169
+ scheduler: null
170
+ scheduler_conf: {}
171
+ num_spk: 7
172
+ init: null
173
+ input_size: null
174
+ model_conf:
175
+ attractor_weight: 1.0
176
+ use_preprocessor: true
177
+ frontend: default
178
+ frontend_conf:
179
+ fs: 8k
180
+ hop_length: 128
181
+ specaug: specaug
182
+ specaug_conf:
183
+ apply_time_warp: false
184
+ apply_freq_mask: true
185
+ freq_mask_width_range:
186
+ - 0
187
+ - 30
188
+ num_freq_mask: 2
189
+ apply_time_mask: true
190
+ time_mask_width_range:
191
+ - 0
192
+ - 40
193
+ num_time_mask: 2
194
+ normalize: global_mvn
195
+ normalize_conf:
196
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
197
+ encoder: transformer
198
+ encoder_conf:
199
+ input_layer: conv2d
200
+ num_blocks: 4
201
+ linear_units: 512
202
+ dropout_rate: 0.1
203
+ output_size: 256
204
+ attention_heads: 4
205
+ attention_dropout_rate: 0.1
206
+ decoder: linear
207
+ decoder_conf: {}
208
+ label_aggregator: label_aggregator
209
+ label_aggregator_conf:
210
+ win_length: 1024
211
+ hop_length: 512
212
+ attractor: rnn
213
+ attractor_conf:
214
+ unit: 256
215
+ layer: 1
216
+ dropout: 0.0
217
+ attractor_grad: false
218
+ required:
219
+ - output_dir
220
+ version: '202204'
221
+ distributed: false
222
+ ```
223
+
224
+ </details>
225
+
226
+
227
+
228
+ ### Citing ESPnet
229
+
230
+ ```BibTex
231
+ @inproceedings{watanabe2018espnet,
232
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
233
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
234
+ year={2018},
235
+ booktitle={Proceedings of Interspeech},
236
+ pages={2207--2211},
237
+ doi={10.21437/Interspeech.2018-1456},
238
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
239
+ }
240
+
241
+
242
+
243
+
244
+ ```
245
+
246
+ or arXiv:
247
+
248
+ ```bibtex
249
+ @misc{watanabe2018espnet,
250
+ title={ESPnet: End-to-End Speech Processing Toolkit},
251
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
252
+ year={2018},
253
+ eprint={1804.00015},
254
+ archivePrefix={arXiv},
255
+ primaryClass={cs.CL}
256
+ }
257
+ ```
exp/diar_stats_8k/train/feats_stats.npz ADDED
Binary file (1.4 kB). View file
 
exp/diar_train_diar_eda_adapt_real_lr0001/25epoch.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:335acf767e99da4d31b81e45c65b48e4b40334517b0c6f74401e74cb2372e1d9
3
+ size 20113016
exp/diar_train_diar_eda_adapt_real_lr0001/RESULTS.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Generated by scripts/utils/show_diar_result.sh -->
2
+ # RESULTS
3
+ ## Environments
4
+ - date: `Mon Jun 20 10:30:23 EDT 2022`
5
+ - python version: `3.7.11 (default, Jul 27 2021, 14:32:16) [GCC 7.5.0]`
6
+ - espnet version: `espnet 202205`
7
+ - pytorch version: `pytorch 1.9.1+cu102`
8
+ - Git hash: `fc62b1ce3e50c5ef8a2ac8cedb0d92ac41df54ca`
9
+ - Commit date: `Thu Jun 9 16:29:52 2022 +0900`
10
+
11
+ ## diar_train_diar_eda_adapt_real_lr0001
12
+ ### DER
13
+ diarized_callhome2_spkall
14
+ |threshold_median_collar|DER|
15
+ |---|---|
16
+ |result_th0.3_med11_collar0.25|22.29|
17
+ |result_th0.3_med1_collar0.25|23.27|
18
+ |result_th0.4_med11_collar0.25|19.85|
19
+ |result_th0.4_med1_collar0.25|20.80|
20
+ |result_th0.5_med11_collar0.25|19.26|
21
+ |result_th0.5_med1_collar0.25|20.18|
22
+ |result_th0.6_med11_collar0.25|20.24|
23
+ |result_th0.6_med1_collar0.25|21.08|
24
+ |result_th0.7_med11_collar0.25|22.38|
25
+ |result_th0.7_med1_collar0.25|23.17|
exp/diar_train_diar_eda_adapt_real_lr0001/config.yaml ADDED
@@ -0,0 +1,163 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_diar_eda_adapt.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/diar_train_diar_eda_adapt_real_lr0001
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: 0
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 50
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ - - train
41
+ - acc
42
+ - max
43
+ keep_nbest_models: 10
44
+ nbest_averaging_interval: 0
45
+ grad_clip: 5
46
+ grad_clip_type: 2.0
47
+ grad_noise: false
48
+ accum_grad: 16
49
+ no_forward_run: false
50
+ resume: true
51
+ train_dtype: float32
52
+ use_amp: false
53
+ log_interval: null
54
+ use_matplotlib: true
55
+ use_tensorboard: true
56
+ use_wandb: false
57
+ wandb_project: null
58
+ wandb_id: null
59
+ wandb_entity: null
60
+ wandb_name: null
61
+ wandb_model_log_interval: -1
62
+ detect_anomaly: false
63
+ pretrain_path: null
64
+ init_param:
65
+ - exp/diar_train_diar_eda_adapt_simu/latest.pth
66
+ ignore_init_mismatch: false
67
+ freeze_param: []
68
+ num_iters_per_epoch: null
69
+ batch_size: 1
70
+ valid_batch_size: null
71
+ batch_bins: 1000000
72
+ valid_batch_bins: null
73
+ train_shape_file:
74
+ - exp/diar_stats_8k/train/speech_shape
75
+ - exp/diar_stats_8k/train/spk_labels_shape
76
+ valid_shape_file:
77
+ - exp/diar_stats_8k/valid/speech_shape
78
+ - exp/diar_stats_8k/valid/spk_labels_shape
79
+ batch_type: sorted
80
+ valid_batch_type: null
81
+ fold_length:
82
+ - 80000
83
+ - 800
84
+ sort_in_batch: descending
85
+ sort_batch: descending
86
+ multiple_iterator: false
87
+ chunk_length: 500
88
+ chunk_shift_ratio: 0.5
89
+ num_cache_chunks: 1024
90
+ train_data_path_and_name_and_type:
91
+ - - dump/raw/callhome1_spkall/wav.scp
92
+ - speech
93
+ - sound
94
+ - - dump/raw/callhome1_spkall/espnet_rttm
95
+ - spk_labels
96
+ - rttm
97
+ valid_data_path_and_name_and_type:
98
+ - - dump/raw/callhome2_spkall/wav.scp
99
+ - speech
100
+ - sound
101
+ - - dump/raw/callhome2_spkall/espnet_rttm
102
+ - spk_labels
103
+ - rttm
104
+ allow_variable_data_keys: false
105
+ max_cache_size: 0.0
106
+ max_cache_fd: 32
107
+ valid_max_cache_size: null
108
+ optim: adam
109
+ optim_conf:
110
+ lr: 0.001
111
+ scheduler: null
112
+ scheduler_conf: {}
113
+ num_spk: 7
114
+ init: null
115
+ input_size: null
116
+ model_conf:
117
+ attractor_weight: 1.0
118
+ use_preprocessor: true
119
+ frontend: default
120
+ frontend_conf:
121
+ fs: 8k
122
+ hop_length: 128
123
+ specaug: specaug
124
+ specaug_conf:
125
+ apply_time_warp: false
126
+ apply_freq_mask: true
127
+ freq_mask_width_range:
128
+ - 0
129
+ - 30
130
+ num_freq_mask: 2
131
+ apply_time_mask: true
132
+ time_mask_width_range:
133
+ - 0
134
+ - 40
135
+ num_time_mask: 2
136
+ normalize: global_mvn
137
+ normalize_conf:
138
+ stats_file: exp/diar_stats_8k/train/feats_stats.npz
139
+ encoder: transformer
140
+ encoder_conf:
141
+ input_layer: conv2d
142
+ num_blocks: 4
143
+ linear_units: 512
144
+ dropout_rate: 0.1
145
+ output_size: 256
146
+ attention_heads: 4
147
+ attention_dropout_rate: 0.1
148
+ decoder: linear
149
+ decoder_conf: {}
150
+ label_aggregator: label_aggregator
151
+ label_aggregator_conf:
152
+ win_length: 1024
153
+ hop_length: 512
154
+ attractor: rnn
155
+ attractor_conf:
156
+ unit: 256
157
+ layer: 1
158
+ dropout: 0.0
159
+ attractor_grad: false
160
+ required:
161
+ - output_dir
162
+ version: '202204'
163
+ distributed: false
exp/diar_train_diar_eda_adapt_real_lr0001/images/acc.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/backward_time.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/cf.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/der.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/fa.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/forward_time.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/gpu_max_cached_mem_GB.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/iter_time.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/loss.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/loss_att.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/loss_pit.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/mi.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/optim0_lr0.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/optim_step_time.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/sad_fr.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/sad_mr.png ADDED
exp/diar_train_diar_eda_adapt_real_lr0001/images/train_time.png ADDED
meta.yaml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ espnet: '202205'
2
+ files:
3
+ model_file: exp/diar_train_diar_eda_adapt_real_lr0001/25epoch.pth
4
+ python: "3.7.11 (default, Jul 27 2021, 14:32:16) \n[GCC 7.5.0]"
5
+ timestamp: 1656444860.039342
6
+ torch: 1.9.1+cu102
7
+ yaml_files:
8
+ train_config: exp/diar_train_diar_eda_adapt_real_lr0001/config.yaml