huseinzol05 commited on
Commit
c8cace6
·
1 Parent(s): 1113752

Upload model_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. model_config.yaml +70 -33
model_config.yaml CHANGED
@@ -1,115 +1,152 @@
1
  decoder:
2
  _target_: nemo.collections.asr.modules.SpeakerDecoder
3
  angular: false
4
- emb_sizes: 256
5
- feat_in: 1500
6
  num_classes: 7
7
- pool_mode: xvector
8
  encoder:
9
  _target_: nemo.collections.asr.modules.ConvASREncoder
10
  activation: relu
11
  conv_mask: true
12
- feat_in: 64
13
  jasper:
14
  - dilation:
15
  - 1
16
- dropout: 0.5
17
- filters: 512
18
  kernel:
19
  - 3
20
  repeat: 1
21
- residual: true
 
 
22
  separable: true
23
  stride:
24
  - 1
25
  - dilation:
26
  - 1
27
- dropout: 0.5
28
- filters: 512
29
  kernel:
30
  - 7
31
- repeat: 2
32
  residual: true
 
 
33
  separable: true
34
  stride:
35
  - 1
36
  - dilation:
37
  - 1
38
- dropout: 0.5
39
- filters: 512
40
  kernel:
41
  - 11
42
- repeat: 2
43
  residual: true
 
 
44
  separable: true
45
  stride:
46
  - 1
47
  - dilation:
48
  - 1
49
- dropout: 0.5
50
- filters: 512
51
  kernel:
52
  - 15
53
- repeat: 2
54
  residual: true
 
 
55
  separable: true
56
  stride:
57
  - 1
58
  - dilation:
59
  - 1
60
  dropout: 0.0
61
- filters: 1500
62
  kernel:
63
  - 1
64
  repeat: 1
65
  residual: false
 
 
66
  separable: true
67
  stride:
68
  - 1
69
  loss:
70
  margin: 0.2
71
  scale: 30
 
 
 
 
 
 
 
 
 
 
 
72
  optim:
73
- lr: 0.006
74
  momentum: 0.9
75
  name: sgd
76
  sched:
77
- min_lr: 0.0001
78
  name: CosineAnnealing
79
  warmup_ratio: 0.1
80
- weight_decay: 0.001
81
  preprocessor:
82
  _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
83
  dither: 1.0e-05
84
- features: 64
85
  frame_splicing: 1
86
  n_fft: 512
87
  normalize: per_feature
88
  sample_rate: 16000
89
- stft_conv: false
90
  window: hann
91
- window_size: 0.02
92
  window_stride: 0.01
 
 
 
 
 
 
93
  target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel
94
  train_ds:
95
  augmentor:
96
  noise:
97
- manifest_path: /ws/manifests/raid/musan/musan_music_noise_manifest_dur8.json
98
  max_snr_db: 15
99
- min_snr_db: 5
100
- prob: 0.2
 
 
 
 
 
 
101
  batch_size: 64
 
102
  labels: null
103
- manifest_filepath: /ws/manifests/raid/combined/train_manifest.json
104
- num_workers: 4
 
105
  sample_rate: 16000
106
  shuffle: true
107
- time_length: 8
 
 
108
  validation_ds:
109
- batch_size: 64
110
  labels: null
111
- manifest_filepath: /ws/manifests/raid/voxceleb/small_manifest.json
112
- num_workers: 1
 
113
  sample_rate: 16000
114
  shuffle: false
115
- time_length: 8
 
1
  decoder:
2
  _target_: nemo.collections.asr.modules.SpeakerDecoder
3
  angular: false
4
+ emb_sizes: 192
5
+ feat_in: 3072
6
  num_classes: 7
7
+ pool_mode: attention
8
  encoder:
9
  _target_: nemo.collections.asr.modules.ConvASREncoder
10
  activation: relu
11
  conv_mask: true
12
+ feat_in: 80
13
  jasper:
14
  - dilation:
15
  - 1
16
+ dropout: 0.0
17
+ filters: 1024
18
  kernel:
19
  - 3
20
  repeat: 1
21
+ residual: false
22
+ se: true
23
+ se_context_size: -1
24
  separable: true
25
  stride:
26
  - 1
27
  - dilation:
28
  - 1
29
+ dropout: 0.1
30
+ filters: 1024
31
  kernel:
32
  - 7
33
+ repeat: 3
34
  residual: true
35
+ se: true
36
+ se_context_size: -1
37
  separable: true
38
  stride:
39
  - 1
40
  - dilation:
41
  - 1
42
+ dropout: 0.1
43
+ filters: 1024
44
  kernel:
45
  - 11
46
+ repeat: 3
47
  residual: true
48
+ se: true
49
+ se_context_size: -1
50
  separable: true
51
  stride:
52
  - 1
53
  - dilation:
54
  - 1
55
+ dropout: 0.1
56
+ filters: 1024
57
  kernel:
58
  - 15
59
+ repeat: 3
60
  residual: true
61
+ se: true
62
+ se_context_size: -1
63
  separable: true
64
  stride:
65
  - 1
66
  - dilation:
67
  - 1
68
  dropout: 0.0
69
+ filters: 3072
70
  kernel:
71
  - 1
72
  repeat: 1
73
  residual: false
74
+ se: true
75
+ se_context_size: -1
76
  separable: true
77
  stride:
78
  - 1
79
  loss:
80
  margin: 0.2
81
  scale: 30
82
+ model_defaults:
83
+ dropout: 0.1
84
+ enc_hidden: 640
85
+ filters: 1024
86
+ joint_hidden: 640
87
+ kernel_size_factor: 1.0
88
+ pred_hidden: 640
89
+ repeat: 3
90
+ se: true
91
+ se_context_size: -1
92
+ separable: true
93
  optim:
94
+ lr: 0.08
95
  momentum: 0.9
96
  name: sgd
97
  sched:
98
+ min_lr: 0.0
99
  name: CosineAnnealing
100
  warmup_ratio: 0.1
101
+ weight_decay: 0.0002
102
  preprocessor:
103
  _target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
104
  dither: 1.0e-05
105
+ features: 80
106
  frame_splicing: 1
107
  n_fft: 512
108
  normalize: per_feature
109
  sample_rate: 16000
 
110
  window: hann
111
+ window_size: 0.025
112
  window_stride: 0.01
113
+ spec_augment:
114
+ _target_: nemo.collections.asr.modules.SpectrogramAugmentation
115
+ freq_masks: 3
116
+ freq_width: 4
117
+ time_masks: 5
118
+ time_width: 0.03
119
  target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel
120
  train_ds:
121
  augmentor:
122
  noise:
123
+ manifest_path: /manifests/noise/rir_noise_manifest.json
124
  max_snr_db: 15
125
+ min_snr_db: 0
126
+ prob: 0.5
127
+ speed:
128
+ max_speed_rate: 1.05
129
+ min_speed_rate: 0.95
130
+ prob: 0.5
131
+ resample_type: kaiser_fast
132
+ sr: 16000
133
  batch_size: 64
134
+ is_tarred: false
135
  labels: null
136
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
137
+ num_workers: 15
138
+ pin_memory: true
139
  sample_rate: 16000
140
  shuffle: true
141
+ tarred_audio_filepaths: null
142
+ tarred_shard_strategy: scatter
143
+ time_length: 3
144
  validation_ds:
145
+ batch_size: 128
146
  labels: null
147
+ manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
148
+ num_workers: 15
149
+ pin_memory: true
150
  sample_rate: 16000
151
  shuffle: false
152
+ time_length: 3