speechbrain
/

asr-branchformer-large-tedlium2

Automatic Speech Recognition

hf-asr-leaderboard

Model card Files Files and versions Community

Adel-Moumen commited on Oct 27, 2023

Commit

8f98994

•

1 Parent(s): a22900d

Update hyperparams.yaml

Files changed (1) hide show

hyperparams.yaml +23 -9

hyperparams.yaml CHANGED Viewed

@@ -12,6 +12,8 @@
 sample_rate: 16000
 n_fft: 400
 n_mels: 80
 ####################### Model parameters ###########################
 # Transformer
@@ -83,8 +85,6 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
     max_decode_ratio: !ref <max_decode_ratio>
     beam_size: !ref <beam_size>
     ctc_weight: !ref <ctc_weight_decode>
-    lm_weight: !ref <lm_weight>
-    lm_modules: !ref <lm_model>
     temperature: 1.15
     temperature_lm: 1.15
     using_eos_threshold: False
@@ -93,7 +93,7 @@ decoder: !new:speechbrain.decoders.S2STransformerBeamSearch
 log_softmax: !new:torch.nn.LogSoftmax
     dim: -1
-normalize: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
@@ -105,16 +105,30 @@ compute_features: !new:speechbrain.lobes.features.Fbank
 tokenizer: !new:sentencepiece.SentencePieceProcessor
-modules:
-    CNN: !ref <CNN>
-    Transformer: !ref <Transformer>
-    seq_lin: !ref <seq_lin>
-    ctc_lin: !ref <ctc_lin>
-    normalize: !ref <normalize>
 model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
 # The pretrainer allows a mapping between pretrained files and instances that
 # are declared in the yaml.
 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer

 sample_rate: 16000
 n_fft: 400
 n_mels: 80
+win_length: 25
+n_time_mask: 7
 ####################### Model parameters ###########################
 # Transformer
     max_decode_ratio: !ref <max_decode_ratio>
     beam_size: !ref <beam_size>
     ctc_weight: !ref <ctc_weight_decode>
     temperature: 1.15
     temperature_lm: 1.15
     using_eos_threshold: False
 log_softmax: !new:torch.nn.LogSoftmax
     dim: -1
+normalizer: !new:speechbrain.processing.features.InputNormalization
     norm_type: global
     update_until_epoch: 4
 tokenizer: !new:sentencepiece.SentencePieceProcessor
+Tencoder: !new:speechbrain.lobes.models.transformer.TransformerASR.EncoderWrapper
+    transformer: !ref <Transformer>
+encoder: !new:speechbrain.nnet.containers.LengthsCapableSequential
+    input_shape: [null, null, !ref <n_mels>]
+    compute_features: !ref <compute_features>
+    normalize: !ref <normalizer>
+    cnn: !ref <CNN>
+    transformer_encoder: !ref <Tencoder>
 model: !new:torch.nn.ModuleList
     - [!ref <CNN>, !ref <Transformer>, !ref <seq_lin>, !ref <ctc_lin>]
+modules:
+    pre_transformer: !ref <CNN>
+    transformer: !ref <Transformer>
+    seq_lin: !ref <seq_lin>
+    ctc_lin: !ref <ctc_lin>
+    normalizer: !ref <normalizer>
+    encoder: !ref <encoder>
+    compute_features: !ref <compute_features>
+    model: !ref <model>
+    decoder: !ref <decoder>
 # The pretrainer allows a mapping between pretrained files and instances that
 # are declared in the yaml.
 pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer