File size: 2,866 Bytes
135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 135750e c8cace6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
decoder:
_target_: nemo.collections.asr.modules.SpeakerDecoder
angular: false
emb_sizes: 192
feat_in: 3072
num_classes: 7
pool_mode: attention
encoder:
_target_: nemo.collections.asr.modules.ConvASREncoder
activation: relu
conv_mask: true
feat_in: 80
jasper:
- dilation:
- 1
dropout: 0.0
filters: 1024
kernel:
- 3
repeat: 1
residual: false
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 7
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 11
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.1
filters: 1024
kernel:
- 15
repeat: 3
residual: true
se: true
se_context_size: -1
separable: true
stride:
- 1
- dilation:
- 1
dropout: 0.0
filters: 3072
kernel:
- 1
repeat: 1
residual: false
se: true
se_context_size: -1
separable: true
stride:
- 1
loss:
margin: 0.2
scale: 30
model_defaults:
dropout: 0.1
enc_hidden: 640
filters: 1024
joint_hidden: 640
kernel_size_factor: 1.0
pred_hidden: 640
repeat: 3
se: true
se_context_size: -1
separable: true
optim:
lr: 0.08
momentum: 0.9
name: sgd
sched:
min_lr: 0.0
name: CosineAnnealing
warmup_ratio: 0.1
weight_decay: 0.0002
preprocessor:
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor
dither: 1.0e-05
features: 80
frame_splicing: 1
n_fft: 512
normalize: per_feature
sample_rate: 16000
window: hann
window_size: 0.025
window_stride: 0.01
spec_augment:
_target_: nemo.collections.asr.modules.SpectrogramAugmentation
freq_masks: 3
freq_width: 4
time_masks: 5
time_width: 0.03
target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel
train_ds:
augmentor:
noise:
manifest_path: /manifests/noise/rir_noise_manifest.json
max_snr_db: 15
min_snr_db: 0
prob: 0.5
speed:
max_speed_rate: 1.05
min_speed_rate: 0.95
prob: 0.5
resample_type: kaiser_fast
sr: 16000
batch_size: 64
is_tarred: false
labels: null
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json
num_workers: 15
pin_memory: true
sample_rate: 16000
shuffle: true
tarred_audio_filepaths: null
tarred_shard_strategy: scatter
time_length: 3
validation_ds:
batch_size: 128
labels: null
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json
num_workers: 15
pin_memory: true
sample_rate: 16000
shuffle: false
time_length: 3
|