Porjaz commited on
Commit
3f3ce46
·
verified ·
1 Parent(s): be9d6d0

Create hyperparams.yaml

Browse files
Files changed (1) hide show
  1. hyperparams.yaml +165 -0
hyperparams.yaml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Seed needs to be set at top of yaml, before objects with parameters are made
2
+ seed: 2024
3
+ __set_seed: !apply:torch.manual_seed [!ref <seed>]
4
+
5
+ skip_training: True
6
+
7
+ # Hparams NEEDED
8
+ HPARAMS_NEEDED: ["log_softmax"]
9
+ # Modules Needed
10
+ MODULES_NEEDED: ["whisper"]
11
+
12
+ output_folder: !ref output_folder_whisper
13
+ pretrained_path: Macedonian-ASR/buki-whisper-capitalised-2.0
14
+ output_wer_folder: !ref <output_folder>/
15
+ save_folder: !ref <output_folder>/save
16
+ train_log: !ref <output_folder>/train_log.txt
17
+
18
+
19
+ # URL for the biggest Fairseq english whisper model.
20
+ whisper_hub: openai/whisper-large-v3
21
+
22
+ # Normalize inputs with the same normalization done in the paper (https://cdn.openai.com/papers/whisper.pdf). Refer to Appendix C for further information.
23
+ normalized_transcripts: False
24
+ restore_capitalization: False
25
+
26
+ # Data files
27
+ language: "macedonian"
28
+ data_folder: "../../data/combined_data/speechbrain_splits"
29
+ accented_letters: True
30
+
31
+ ckpt_interval_minutes: 30 # save checkpoint every N min
32
+
33
+ ####################### Training Parameters ####################################
34
+ freeze_whisper: False
35
+ freeze_encoder: True
36
+ number_of_epochs: 50
37
+ weight_decay: 0.01
38
+ lr_whisper: 1e-5
39
+ warmup_steps: 500
40
+ max_grad_norm: 2.0
41
+ precision: fp16 # bf16, fp16 or fp32
42
+ eval_precision: fp16
43
+ sample_rate: 16000
44
+
45
+ # With data_parallel batch_size is split into N jobs
46
+ batch_size: 6
47
+ test_batch_size: 1
48
+ grad_accumulation_factor: 2
49
+
50
+
51
+ # Decoding parameters
52
+ min_decode_ratio: 0.0
53
+ max_decode_ratio: 1.0
54
+ test_beam_size: 8
55
+
56
+ ####################### Model Parameters #######################################
57
+ train_dataloader_opts:
58
+ batch_size: !ref <batch_size>
59
+
60
+ valid_dataloader_opts:
61
+ batch_size: !ref <batch_size>
62
+
63
+ test_dataloader_opts:
64
+ batch_size: !ref <test_batch_size>
65
+
66
+ epoch_counter: !new:speechbrain.utils.epoch_loop.EpochCounter
67
+ limit: !ref <number_of_epochs>
68
+
69
+ ############################## Augmentations ###################################
70
+
71
+ # Speed perturbation
72
+ speed_perturb: !new:speechbrain.augment.time_domain.SpeedPerturb
73
+ orig_freq: 16000
74
+ speeds: [95, 100, 105]
75
+
76
+ # Frequency drop: randomly drops a number of frequency bands to zero.
77
+ drop_freq: !new:speechbrain.augment.time_domain.DropFreq
78
+ drop_freq_low: 0
79
+ drop_freq_high: 1
80
+ drop_freq_count_low: 1
81
+ drop_freq_count_high: 3
82
+ drop_freq_width: 0.05
83
+
84
+ # Time drop: randomly drops a number of temporal chunks.
85
+ drop_chunk: !new:speechbrain.augment.time_domain.DropChunk
86
+ drop_length_low: 1000
87
+ drop_length_high: 2000
88
+ drop_count_low: 1
89
+ drop_count_high: 5
90
+
91
+ # Augmenter: Combines previously defined augmentations to perform data augmentation
92
+ wav_augment: !new:speechbrain.augment.augmenter.Augmenter
93
+ concat_original: False
94
+ min_augmentations: 1
95
+ max_augmentations: 3
96
+ augment_prob: 0.5
97
+ augmentations: [
98
+ !ref <speed_perturb>,
99
+ !ref <drop_freq>,
100
+ !ref <drop_chunk>]
101
+
102
+
103
+ ############################## Models ##########################################
104
+
105
+
106
+ whisper: !new:speechbrain.lobes.models.huggingface_transformers.whisper.Whisper
107
+ source: !ref <whisper_hub>
108
+ freeze: !ref <freeze_whisper>
109
+ freeze_encoder: !ref <freeze_encoder>
110
+ save_path: !ref <save_folder>/whisper_checkpoint
111
+ language: !ref <language>
112
+ task: "transcribe"
113
+
114
+ log_softmax: !new:speechbrain.nnet.activations.Softmax
115
+ apply_log: True
116
+
117
+ nll_loss: !name:speechbrain.nnet.losses.nll_loss
118
+
119
+ modules:
120
+ whisper: !ref <whisper>
121
+
122
+ ############################## Decoding & optimiser ############################
123
+
124
+ whisper_opt_class: !name:torch.optim.AdamW
125
+ lr: !ref <lr_whisper>
126
+ weight_decay: !ref <weight_decay>
127
+
128
+ valid_search: !new:speechbrain.decoders.seq2seq.S2SWhisperGreedySearcher
129
+ model: !ref <whisper>
130
+ min_decode_ratio: !ref <min_decode_ratio>
131
+ max_decode_ratio: !ref <max_decode_ratio>
132
+
133
+ test_search: !new:speechbrain.decoders.seq2seq.S2SWhisperBeamSearcher
134
+ module: [!ref <whisper>]
135
+ min_decode_ratio: !ref <min_decode_ratio>
136
+ max_decode_ratio: !ref <max_decode_ratio>
137
+ beam_size: !ref <test_beam_size>
138
+
139
+ lr_annealing_whisper: !new:speechbrain.nnet.schedulers.NoamScheduler
140
+ lr_initial: !ref <lr_whisper>
141
+ n_warmup_steps: !ref <warmup_steps>
142
+
143
+ ############################## Logging and Pretrainer ##########################
144
+
145
+ checkpointer: !new:speechbrain.utils.checkpoints.Checkpointer
146
+ checkpoints_dir: !ref <save_folder>
147
+ recoverables:
148
+ whisper: !ref <whisper>
149
+ scheduler_whisper: !ref <lr_annealing_whisper>
150
+ counter: !ref <epoch_counter>
151
+
152
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
153
+ loadables:
154
+ whisper: !ref <whisper>
155
+ paths:
156
+ whisper: !ref <pretrained_path>/model.ckpt
157
+
158
+
159
+ train_logger: !new:speechbrain.utils.train_logger.FileTrainLogger
160
+ save_file: !ref <train_log>
161
+
162
+ error_rate_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
163
+
164
+ cer_computer: !name:speechbrain.utils.metric_stats.ErrorRateStats
165
+ split_tokens: True