Porjaz commited on
Commit
125efcc
1 Parent(s): b835215

Update hyperparams.yalm

Browse files
Files changed (1) hide show
  1. hyperparams.yalm +24 -94
hyperparams.yalm CHANGED
@@ -1,66 +1,28 @@
1
- # Generated 2022-01-19 from:
2
- # /scratch/elec/t405-puhe/p/porjazd1/Metadata_Classification/TCN/asr_topic_speechbrain/mgb_asr/hyperparams.yaml
3
- # yamllint disable
4
- # Seed needs to be set at top of yaml, before objects with parameters are made
5
- seed: 1234
6
- __set_seed: !apply:torch.manual_seed [1234]
7
 
8
- skip_training: True
9
 
10
- output_folder: output_folder_wavlm_base_full_data
11
- label_encoder_file: !ref <output_folder>/label_encoder.txt
12
-
13
- train_log: !ref <output_folder>/train_log.txt
14
- train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
15
- save_file: !ref <output_folder>/train_log.txt
16
- save_folder: !ref <output_folder>/save
17
 
 
18
  wav2vec2_hub: microsoft/wavlm-base-plus-sv
19
 
 
20
  pretrained_path: Porjaz/wavlm-base-emo-fi
21
 
22
- wav2vec2_folder: !ref <save_folder>/wav2vec2_checkpoint
23
-
24
- # Feature parameters
25
- sample_rate: 22050
26
- new_sample_rate: 16000
27
- window_size: 25
28
- n_mfcc: 23
29
-
30
- # Training params
31
- n_epochs: 28
32
- stopping_factor: 10
33
-
34
- dataloader_options:
35
- batch_size: 10
36
- shuffle: false
37
-
38
- test_dataloader_options:
39
- batch_size: 1
40
- shuffle: false
41
 
42
- lr: 0.0001
43
- lr_wav2vec2: 0.00001
44
-
45
- #freeze all wav2vec2
46
- freeze_wav2vec2: False
47
- #set to true to freeze the CONV part of the wav2vec2 model
48
- # We see an improvement of 2% with freezing CNNs
49
- freeze_wav2vec2_conv: True
50
-
51
- label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
52
-
53
- encoder_dims: 768
54
- n_classes: 5
55
-
56
- # Wav2vec2 encoder
57
- wav2vec2: !new:speechbrain.lobes.models.huggingface_wav2vec.HuggingFaceWav2Vec2
58
  source: !ref <wav2vec2_hub>
59
  output_norm: True
60
- freeze: !ref <freeze_wav2vec2>
61
- freeze_feature_extractor: !ref <freeze_wav2vec2_conv>
62
- save_path: !ref <wav2vec2_folder>
63
- output_all_hiddens: True
64
 
65
  avg_pool: !new:speechbrain.nnet.pooling.StatisticsPooling
66
  return_std: False
@@ -70,52 +32,18 @@ label_lin: !new:speechbrain.nnet.linear.Linear
70
  n_neurons: !ref <n_classes>
71
  bias: False
72
 
73
- log_softmax: !new:speechbrain.nnet.activations.Softmax
74
- apply_log: True
75
-
76
- opt_class: !name:torch.optim.Adam
77
- lr: !ref <lr>
78
-
79
- wav2vec2_opt_class: !name:torch.optim.Adam
80
- lr: !ref <lr_wav2vec2>
81
-
82
- epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
83
- limit: !ref <n_epochs>
84
-
85
- # Functions that compute the statistics to track during the validation step.
86
- accuracy_computer: !name:speechbrain.utils.Accuracy.AccuracyStats
87
-
88
- compute_cost: !name:speechbrain.nnet.losses.nll_loss
89
 
90
- error_stats: !name:speechbrain.utils.metric_stats.MetricStats
91
- metric: !name:speechbrain.nnet.losses.classification_error
92
- reduction: batch
93
  modules:
94
  wav2vec2: !ref <wav2vec2>
95
  label_lin: !ref <label_lin>
 
96
 
97
- model: !new:torch.nn.ModuleList
98
- - [!ref <label_lin>]
99
-
100
- lr_annealing: !new:speechbrain.nnet.schedulers.NewBobScheduler
101
- initial_value: !ref <lr>
102
- improvement_threshold: 0.0025
103
- annealing_factor: 0.9
104
- patient: 0
105
-
106
- lr_annealing_wav2vec2: !new:speechbrain.nnet.schedulers.NewBobScheduler
107
- initial_value: !ref <lr_wav2vec2>
108
- improvement_threshold: 0.0025
109
- annealing_factor: 0.9
110
-
111
- checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
112
- checkpoints_dir: !ref <save_folder>
113
- recoverables:
114
- model: !ref <model>
115
- wav2vec2: !ref <wav2vec2>
116
- lr_annealing_output: !ref <lr_annealing>
117
- lr_annealing_wav2vec2: !ref <lr_annealing_wav2vec2>
118
- counter: !ref <epoch_counter>
119
 
120
 
121
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
@@ -127,3 +55,5 @@ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
127
  wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
128
  model: !ref <pretrained_path>/model.ckpt
129
  label_encoder: !ref <pretrained_path>/label_encoder.txt
 
 
 
1
+ # ############################################################################
2
+ # Model: WAV2VEC base for Emotion Recognition
3
+ # ############################################################################
 
 
 
4
 
 
5
 
6
+ # Hparams NEEDED
7
+ HPARAMS_NEEDED: ["encoder_dim", "out_n_neurons", "label_encoder", "softmax"]
8
+ # Modules Needed
9
+ MODULES_NEEDED: ["wav2vec2", "avg_pool", "output_mlp"]
 
 
 
10
 
11
+ # Feature parameters
12
  wav2vec2_hub: microsoft/wavlm-base-plus-sv
13
 
14
+ # Pretrain folder (HuggingFace)
15
  pretrained_path: Porjaz/wavlm-base-emo-fi
16
 
17
+ # parameters
18
+ encoder_dim: 768
19
+ out_n_neurons: 5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ wav2vec2: !new:speechbrain.lobes.models.huggingface_transformers.wav2vec2.Wav2Vec2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  source: !ref <wav2vec2_hub>
23
  output_norm: True
24
+ freeze: True
25
+ save_path: wav2vec2_checkpoints
 
 
26
 
27
  avg_pool: !new:speechbrain.nnet.pooling.StatisticsPooling
28
  return_std: False
 
32
  n_neurons: !ref <n_classes>
33
  bias: False
34
 
35
+ model: !new:torch.nn.ModuleList
36
+ - [!ref <label_lin>]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
 
 
 
38
  modules:
39
  wav2vec2: !ref <wav2vec2>
40
  label_lin: !ref <label_lin>
41
+ avg_pool: !ref <avg_pool>
42
 
43
+ softmax: !new:speechbrain.nnet.activations.Softmax
44
+
45
+
46
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
 
48
 
49
  pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
 
55
  wav2vec2: !ref <pretrained_path>/wav2vec2.ckpt
56
  model: !ref <pretrained_path>/model.ckpt
57
  label_encoder: !ref <pretrained_path>/label_encoder.txt
58
+
59
+