decoder: | |
_target_: nemo.collections.asr.modules.SpeakerDecoder | |
angular: false | |
emb_sizes: 192 | |
feat_in: 3072 | |
num_classes: 7 | |
pool_mode: attention | |
encoder: | |
_target_: nemo.collections.asr.modules.ConvASREncoder | |
activation: relu | |
conv_mask: true | |
feat_in: 80 | |
jasper: | |
- dilation: | |
- 1 | |
dropout: 0.0 | |
filters: 1024 | |
kernel: | |
- 3 | |
repeat: 1 | |
residual: false | |
se: true | |
se_context_size: -1 | |
separable: true | |
stride: | |
- 1 | |
- dilation: | |
- 1 | |
dropout: 0.1 | |
filters: 1024 | |
kernel: | |
- 7 | |
repeat: 3 | |
residual: true | |
se: true | |
se_context_size: -1 | |
separable: true | |
stride: | |
- 1 | |
- dilation: | |
- 1 | |
dropout: 0.1 | |
filters: 1024 | |
kernel: | |
- 11 | |
repeat: 3 | |
residual: true | |
se: true | |
se_context_size: -1 | |
separable: true | |
stride: | |
- 1 | |
- dilation: | |
- 1 | |
dropout: 0.1 | |
filters: 1024 | |
kernel: | |
- 15 | |
repeat: 3 | |
residual: true | |
se: true | |
se_context_size: -1 | |
separable: true | |
stride: | |
- 1 | |
- dilation: | |
- 1 | |
dropout: 0.0 | |
filters: 3072 | |
kernel: | |
- 1 | |
repeat: 1 | |
residual: false | |
se: true | |
se_context_size: -1 | |
separable: true | |
stride: | |
- 1 | |
loss: | |
margin: 0.2 | |
scale: 30 | |
model_defaults: | |
dropout: 0.1 | |
enc_hidden: 640 | |
filters: 1024 | |
joint_hidden: 640 | |
kernel_size_factor: 1.0 | |
pred_hidden: 640 | |
repeat: 3 | |
se: true | |
se_context_size: -1 | |
separable: true | |
optim: | |
lr: 0.08 | |
momentum: 0.9 | |
name: sgd | |
sched: | |
min_lr: 0.0 | |
name: CosineAnnealing | |
warmup_ratio: 0.1 | |
weight_decay: 0.0002 | |
preprocessor: | |
_target_: nemo.collections.asr.modules.AudioToMelSpectrogramPreprocessor | |
dither: 1.0e-05 | |
features: 80 | |
frame_splicing: 1 | |
n_fft: 512 | |
normalize: per_feature | |
sample_rate: 16000 | |
window: hann | |
window_size: 0.025 | |
window_stride: 0.01 | |
spec_augment: | |
_target_: nemo.collections.asr.modules.SpectrogramAugmentation | |
freq_masks: 3 | |
freq_width: 4 | |
time_masks: 5 | |
time_width: 0.03 | |
target: nemo.collections.asr.models.label_models.EncDecSpeakerLabelModel | |
train_ds: | |
augmentor: | |
noise: | |
manifest_path: /manifests/noise/rir_noise_manifest.json | |
max_snr_db: 15 | |
min_snr_db: 0 | |
prob: 0.5 | |
speed: | |
max_speed_rate: 1.05 | |
min_speed_rate: 0.95 | |
prob: 0.5 | |
resample_type: kaiser_fast | |
sr: 16000 | |
batch_size: 64 | |
is_tarred: false | |
labels: null | |
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/train.json | |
num_workers: 15 | |
pin_memory: true | |
sample_rate: 16000 | |
shuffle: true | |
tarred_audio_filepaths: null | |
tarred_shard_strategy: scatter | |
time_length: 3 | |
validation_ds: | |
batch_size: 128 | |
labels: null | |
manifest_filepath: /manifests/combined_fisher_swbd_voxceleb12_librispeech/dev.json | |
num_workers: 15 | |
pin_memory: true | |
sample_rate: 16000 | |
shuffle: false | |
time_length: 3 | |