shreyasdesaisuperU commited on
Commit
1945e5b
·
1 Parent(s): f2ff8d7

Uploading checkpoints

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. checkpoints/README.md +82 -0
  2. checkpoints/checkpoint-100/config.json +61 -0
  3. checkpoints/checkpoint-100/generation_config.json +240 -0
  4. checkpoints/checkpoint-100/model.safetensors +3 -0
  5. checkpoints/checkpoint-100/optimizer.pt +3 -0
  6. checkpoints/checkpoint-100/preprocessor_config.json +14 -0
  7. checkpoints/checkpoint-100/rng_state.pth +3 -0
  8. checkpoints/checkpoint-100/scheduler.pt +3 -0
  9. checkpoints/checkpoint-100/trainer_state.json +70 -0
  10. checkpoints/checkpoint-100/training_args.bin +3 -0
  11. checkpoints/checkpoint-1000/config.json +61 -0
  12. checkpoints/checkpoint-1000/generation_config.json +240 -0
  13. checkpoints/checkpoint-1000/model.safetensors +3 -0
  14. checkpoints/checkpoint-1000/optimizer.pt +3 -0
  15. checkpoints/checkpoint-1000/preprocessor_config.json +14 -0
  16. checkpoints/checkpoint-1000/rng_state.pth +3 -0
  17. checkpoints/checkpoint-1000/scheduler.pt +3 -0
  18. checkpoints/checkpoint-1000/trainer_state.json +403 -0
  19. checkpoints/checkpoint-1000/training_args.bin +3 -0
  20. checkpoints/checkpoint-1100/config.json +61 -0
  21. checkpoints/checkpoint-1100/generation_config.json +240 -0
  22. checkpoints/checkpoint-1100/model.safetensors +3 -0
  23. checkpoints/checkpoint-1100/optimizer.pt +3 -0
  24. checkpoints/checkpoint-1100/preprocessor_config.json +14 -0
  25. checkpoints/checkpoint-1100/rng_state.pth +3 -0
  26. checkpoints/checkpoint-1100/scheduler.pt +3 -0
  27. checkpoints/checkpoint-1100/trainer_state.json +440 -0
  28. checkpoints/checkpoint-1100/training_args.bin +3 -0
  29. checkpoints/checkpoint-1200/config.json +61 -0
  30. checkpoints/checkpoint-1200/generation_config.json +240 -0
  31. checkpoints/checkpoint-1200/model.safetensors +3 -0
  32. checkpoints/checkpoint-1200/optimizer.pt +3 -0
  33. checkpoints/checkpoint-1200/preprocessor_config.json +14 -0
  34. checkpoints/checkpoint-1200/rng_state.pth +3 -0
  35. checkpoints/checkpoint-1200/scheduler.pt +3 -0
  36. checkpoints/checkpoint-1200/trainer_state.json +477 -0
  37. checkpoints/checkpoint-1200/training_args.bin +3 -0
  38. checkpoints/checkpoint-1300/config.json +61 -0
  39. checkpoints/checkpoint-1300/generation_config.json +240 -0
  40. checkpoints/checkpoint-1300/model.safetensors +3 -0
  41. checkpoints/checkpoint-1300/optimizer.pt +3 -0
  42. checkpoints/checkpoint-1300/preprocessor_config.json +14 -0
  43. checkpoints/checkpoint-1300/rng_state.pth +3 -0
  44. checkpoints/checkpoint-1300/scheduler.pt +3 -0
  45. checkpoints/checkpoint-1300/trainer_state.json +514 -0
  46. checkpoints/checkpoint-1300/training_args.bin +3 -0
  47. checkpoints/checkpoint-1400/config.json +61 -0
  48. checkpoints/checkpoint-1400/generation_config.json +240 -0
  49. checkpoints/checkpoint-1400/model.safetensors +3 -0
  50. checkpoints/checkpoint-1400/optimizer.pt +3 -0
checkpoints/README.md ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model: shreyasdesaisuperU/whisper-medium-attempt2-1000-orders
5
+ tags:
6
+ - generated_from_trainer
7
+ metrics:
8
+ - wer
9
+ model-index:
10
+ - name: Whisper Medium 1000 orders Eleven Labs SSD superU
11
+ results: []
12
+ ---
13
+
14
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
15
+ should probably proofread and complete it, then remove this comment. -->
16
+
17
+ # Whisper Medium 1000 orders Eleven Labs SSD superU
18
+
19
+ This model is a fine-tuned version of [shreyasdesaisuperU/whisper-medium-attempt2-1000-orders](https://huggingface.co/shreyasdesaisuperU/whisper-medium-attempt2-1000-orders) on the None dataset.
20
+ It achieves the following results on the evaluation set:
21
+ - Loss: 0.0128
22
+ - Wer: 0.8606
23
+
24
+ ## Model description
25
+
26
+ More information needed
27
+
28
+ ## Intended uses & limitations
29
+
30
+ More information needed
31
+
32
+ ## Training and evaluation data
33
+
34
+ More information needed
35
+
36
+ ## Training procedure
37
+
38
+ ### Training hyperparameters
39
+
40
+ The following hyperparameters were used during training:
41
+ - learning_rate: 1e-05
42
+ - train_batch_size: 16
43
+ - eval_batch_size: 8
44
+ - seed: 42
45
+ - optimizer: Use OptimizerNames.ADAMW_TORCH with betas=(0.9,0.999) and epsilon=1e-08 and optimizer_args=No additional optimizer arguments
46
+ - lr_scheduler_type: linear
47
+ - lr_scheduler_warmup_steps: 500
48
+ - training_steps: 2000
49
+ - mixed_precision_training: Native AMP
50
+
51
+ ### Training results
52
+
53
+ | Training Loss | Epoch | Step | Validation Loss | Wer |
54
+ |:-------------:|:------:|:----:|:---------------:|:-------:|
55
+ | 0.0668 | 0.4032 | 100 | 0.0388 | 17.3838 |
56
+ | 0.0142 | 0.8065 | 200 | 0.0061 | 11.3597 |
57
+ | 0.0075 | 1.2097 | 300 | 0.0075 | 9.6386 |
58
+ | 0.0073 | 1.6129 | 400 | 0.0104 | 7.7453 |
59
+ | 0.0087 | 2.0161 | 500 | 0.0125 | 2.9260 |
60
+ | 0.0046 | 2.4194 | 600 | 0.0080 | 1.5491 |
61
+ | 0.0087 | 2.8226 | 700 | 0.0039 | 1.7212 |
62
+ | 0.0066 | 3.2258 | 800 | 0.0042 | 1.3769 |
63
+ | 0.0032 | 3.6290 | 900 | 0.0095 | 1.0327 |
64
+ | 0.0027 | 4.0323 | 1000 | 0.0114 | 1.5491 |
65
+ | 0.0021 | 4.4355 | 1100 | 0.0099 | 1.7212 |
66
+ | 0.0039 | 4.8387 | 1200 | 0.0121 | 1.8933 |
67
+ | 0.0017 | 5.2419 | 1300 | 0.0126 | 1.3769 |
68
+ | 0.0033 | 5.6452 | 1400 | 0.0093 | 1.8933 |
69
+ | 0.0037 | 6.0484 | 1500 | 0.0126 | 1.2048 |
70
+ | 0.0013 | 6.4516 | 1600 | 0.0090 | 1.2048 |
71
+ | 0.0014 | 6.8548 | 1700 | 0.0102 | 1.2048 |
72
+ | 0.0002 | 7.2581 | 1800 | 0.0115 | 0.8606 |
73
+ | 0.0005 | 7.6613 | 1900 | 0.0142 | 1.0327 |
74
+ | 0.0002 | 8.0645 | 2000 | 0.0128 | 0.8606 |
75
+
76
+
77
+ ### Framework versions
78
+
79
+ - Transformers 4.46.2
80
+ - Pytorch 2.2.2+cu121
81
+ - Datasets 3.1.0
82
+ - Tokenizers 0.20.3
checkpoints/checkpoint-100/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-100/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ec943efacb75601634d541c31b658f1cbe1bb0c308e9b6559c3be586f7a0c3f
3
+ size 3055544304
checkpoints/checkpoint-100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:077d2fa0c4c528fa90cb6f1abdf61c84b51a511652d5a65d2c97c2b3ede45716
3
+ size 6111664103
checkpoints/checkpoint-100/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoints/checkpoint-100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fd91acef56149d62ef60f134f6bef8f3143b1426e8731dcb1c5449312d3ea8c
3
+ size 14244
checkpoints/checkpoint-100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e144495668e3e93894bea4f11cf77f19996bcabde650cbfd5074044c2f1f718f
3
+ size 1064
checkpoints/checkpoint-100/trainer_state.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 17.38382099827883,
3
+ "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-100",
4
+ "epoch": 0.4032258064516129,
5
+ "eval_steps": 100,
6
+ "global_step": 100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10080645161290322,
13
+ "grad_norm": 40.95878982543945,
14
+ "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 3.6067,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.20161290322580644,
20
+ "grad_norm": 28.143661499023438,
21
+ "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.7859,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.3024193548387097,
27
+ "grad_norm": 10.631092071533203,
28
+ "learning_rate": 1.42e-06,
29
+ "loss": 0.9512,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.4032258064516129,
34
+ "grad_norm": 1.839168906211853,
35
+ "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 0.0668,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.4032258064516129,
41
+ "eval_loss": 0.03882083296775818,
42
+ "eval_runtime": 54.3208,
43
+ "eval_samples_per_second": 3.848,
44
+ "eval_steps_per_second": 0.497,
45
+ "eval_wer": 17.38382099827883,
46
+ "step": 100
47
+ }
48
+ ],
49
+ "logging_steps": 25,
50
+ "max_steps": 2000,
51
+ "num_input_tokens_seen": 0,
52
+ "num_train_epochs": 9,
53
+ "save_steps": 100,
54
+ "stateful_callbacks": {
55
+ "TrainerControl": {
56
+ "args": {
57
+ "should_epoch_stop": false,
58
+ "should_evaluate": false,
59
+ "should_log": false,
60
+ "should_save": true,
61
+ "should_training_stop": false
62
+ },
63
+ "attributes": {}
64
+ }
65
+ },
66
+ "total_flos": 1.632967852032e+18,
67
+ "train_batch_size": 16,
68
+ "trial_name": null,
69
+ "trial_params": null
70
+ }
checkpoints/checkpoint-100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1fc1f027287c9d1d723e42e6556d0cf6728657bd6a51fde9e5c2d703ba34
3
+ size 5496
checkpoints/checkpoint-1000/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-1000/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-1000/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:493ff23d90f1a4436dbd6aa0f3a919438ada7cb3a71e9629c5dda8521bae3d30
3
+ size 3055544304
checkpoints/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fcb41f1bca0945558cb3ce8b9e2b35082b354211336ec60faad4e4c1cedbd84
3
+ size 6111664103
checkpoints/checkpoint-1000/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoints/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0a60a604e57f9db73d42dc98cbfa48c91bc6104e402f5e9c890e6c38209f6d
3
+ size 14244
checkpoints/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd39c420b98e7b8f6f97d29a9fa11555599f3a2c8c219f2d2777fb99cfc517a6
3
+ size 1064
checkpoints/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,403 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0327022375215147,
3
+ "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-900",
4
+ "epoch": 4.032258064516129,
5
+ "eval_steps": 100,
6
+ "global_step": 1000,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10080645161290322,
13
+ "grad_norm": 40.95878982543945,
14
+ "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 3.6067,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.20161290322580644,
20
+ "grad_norm": 28.143661499023438,
21
+ "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.7859,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.3024193548387097,
27
+ "grad_norm": 10.631092071533203,
28
+ "learning_rate": 1.42e-06,
29
+ "loss": 0.9512,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.4032258064516129,
34
+ "grad_norm": 1.839168906211853,
35
+ "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 0.0668,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.4032258064516129,
41
+ "eval_loss": 0.03882083296775818,
42
+ "eval_runtime": 54.3208,
43
+ "eval_samples_per_second": 3.848,
44
+ "eval_steps_per_second": 0.497,
45
+ "eval_wer": 17.38382099827883,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 0.5040322580645161,
50
+ "grad_norm": 5.860921859741211,
51
+ "learning_rate": 2.42e-06,
52
+ "loss": 0.0347,
53
+ "step": 125
54
+ },
55
+ {
56
+ "epoch": 0.6048387096774194,
57
+ "grad_norm": 4.118633270263672,
58
+ "learning_rate": 2.92e-06,
59
+ "loss": 0.0292,
60
+ "step": 150
61
+ },
62
+ {
63
+ "epoch": 0.7056451612903226,
64
+ "grad_norm": 0.7270166873931885,
65
+ "learning_rate": 3.4200000000000007e-06,
66
+ "loss": 0.0204,
67
+ "step": 175
68
+ },
69
+ {
70
+ "epoch": 0.8064516129032258,
71
+ "grad_norm": 0.20527507364749908,
72
+ "learning_rate": 3.920000000000001e-06,
73
+ "loss": 0.0142,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.8064516129032258,
78
+ "eval_loss": 0.00608229311183095,
79
+ "eval_runtime": 54.4284,
80
+ "eval_samples_per_second": 3.84,
81
+ "eval_steps_per_second": 0.496,
82
+ "eval_wer": 11.359724612736661,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 0.907258064516129,
87
+ "grad_norm": 3.9435720443725586,
88
+ "learning_rate": 4.42e-06,
89
+ "loss": 0.0091,
90
+ "step": 225
91
+ },
92
+ {
93
+ "epoch": 1.0080645161290323,
94
+ "grad_norm": 0.173879012465477,
95
+ "learning_rate": 4.92e-06,
96
+ "loss": 0.0112,
97
+ "step": 250
98
+ },
99
+ {
100
+ "epoch": 1.1088709677419355,
101
+ "grad_norm": 0.018560703843832016,
102
+ "learning_rate": 5.420000000000001e-06,
103
+ "loss": 0.0063,
104
+ "step": 275
105
+ },
106
+ {
107
+ "epoch": 1.2096774193548387,
108
+ "grad_norm": 4.80478048324585,
109
+ "learning_rate": 5.92e-06,
110
+ "loss": 0.0075,
111
+ "step": 300
112
+ },
113
+ {
114
+ "epoch": 1.2096774193548387,
115
+ "eval_loss": 0.007460152264684439,
116
+ "eval_runtime": 54.3247,
117
+ "eval_samples_per_second": 3.847,
118
+ "eval_steps_per_second": 0.497,
119
+ "eval_wer": 9.63855421686747,
120
+ "step": 300
121
+ },
122
+ {
123
+ "epoch": 1.310483870967742,
124
+ "grad_norm": 0.022897183895111084,
125
+ "learning_rate": 6.42e-06,
126
+ "loss": 0.0163,
127
+ "step": 325
128
+ },
129
+ {
130
+ "epoch": 1.4112903225806452,
131
+ "grad_norm": 0.06287501752376556,
132
+ "learning_rate": 6.92e-06,
133
+ "loss": 0.0078,
134
+ "step": 350
135
+ },
136
+ {
137
+ "epoch": 1.5120967741935485,
138
+ "grad_norm": 0.019055448472499847,
139
+ "learning_rate": 7.420000000000001e-06,
140
+ "loss": 0.0057,
141
+ "step": 375
142
+ },
143
+ {
144
+ "epoch": 1.6129032258064515,
145
+ "grad_norm": 0.04413852468132973,
146
+ "learning_rate": 7.92e-06,
147
+ "loss": 0.0073,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 1.6129032258064515,
152
+ "eval_loss": 0.010435141623020172,
153
+ "eval_runtime": 54.1901,
154
+ "eval_samples_per_second": 3.857,
155
+ "eval_steps_per_second": 0.498,
156
+ "eval_wer": 7.74526678141136,
157
+ "step": 400
158
+ },
159
+ {
160
+ "epoch": 1.713709677419355,
161
+ "grad_norm": 0.6023885607719421,
162
+ "learning_rate": 8.42e-06,
163
+ "loss": 0.0173,
164
+ "step": 425
165
+ },
166
+ {
167
+ "epoch": 1.814516129032258,
168
+ "grad_norm": 0.062626414000988,
169
+ "learning_rate": 8.920000000000001e-06,
170
+ "loss": 0.0114,
171
+ "step": 450
172
+ },
173
+ {
174
+ "epoch": 1.9153225806451613,
175
+ "grad_norm": 4.65421724319458,
176
+ "learning_rate": 9.42e-06,
177
+ "loss": 0.0046,
178
+ "step": 475
179
+ },
180
+ {
181
+ "epoch": 2.0161290322580645,
182
+ "grad_norm": 0.11005760729312897,
183
+ "learning_rate": 9.920000000000002e-06,
184
+ "loss": 0.0087,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 2.0161290322580645,
189
+ "eval_loss": 0.012454940006136894,
190
+ "eval_runtime": 54.5944,
191
+ "eval_samples_per_second": 3.828,
192
+ "eval_steps_per_second": 0.495,
193
+ "eval_wer": 2.9259896729776247,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 2.1169354838709675,
198
+ "grad_norm": 0.016795210540294647,
199
+ "learning_rate": 9.86e-06,
200
+ "loss": 0.0076,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 2.217741935483871,
205
+ "grad_norm": 0.1811501681804657,
206
+ "learning_rate": 9.693333333333334e-06,
207
+ "loss": 0.007,
208
+ "step": 550
209
+ },
210
+ {
211
+ "epoch": 2.318548387096774,
212
+ "grad_norm": 0.0245444867759943,
213
+ "learning_rate": 9.526666666666668e-06,
214
+ "loss": 0.0099,
215
+ "step": 575
216
+ },
217
+ {
218
+ "epoch": 2.4193548387096775,
219
+ "grad_norm": 0.06346794962882996,
220
+ "learning_rate": 9.360000000000002e-06,
221
+ "loss": 0.0046,
222
+ "step": 600
223
+ },
224
+ {
225
+ "epoch": 2.4193548387096775,
226
+ "eval_loss": 0.00803467072546482,
227
+ "eval_runtime": 54.4661,
228
+ "eval_samples_per_second": 3.837,
229
+ "eval_steps_per_second": 0.496,
230
+ "eval_wer": 1.549053356282272,
231
+ "step": 600
232
+ },
233
+ {
234
+ "epoch": 2.5201612903225805,
235
+ "grad_norm": 0.037542328238487244,
236
+ "learning_rate": 9.193333333333334e-06,
237
+ "loss": 0.0065,
238
+ "step": 625
239
+ },
240
+ {
241
+ "epoch": 2.620967741935484,
242
+ "grad_norm": 2.8664350509643555,
243
+ "learning_rate": 9.026666666666666e-06,
244
+ "loss": 0.0092,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 2.721774193548387,
249
+ "grad_norm": 4.69785737991333,
250
+ "learning_rate": 8.860000000000002e-06,
251
+ "loss": 0.0052,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 2.8225806451612905,
256
+ "grad_norm": 0.005957436747848988,
257
+ "learning_rate": 8.693333333333334e-06,
258
+ "loss": 0.0087,
259
+ "step": 700
260
+ },
261
+ {
262
+ "epoch": 2.8225806451612905,
263
+ "eval_loss": 0.003936439286917448,
264
+ "eval_runtime": 54.4976,
265
+ "eval_samples_per_second": 3.835,
266
+ "eval_steps_per_second": 0.495,
267
+ "eval_wer": 1.7211703958691909,
268
+ "step": 700
269
+ },
270
+ {
271
+ "epoch": 2.9233870967741935,
272
+ "grad_norm": 0.008426151238381863,
273
+ "learning_rate": 8.526666666666667e-06,
274
+ "loss": 0.0059,
275
+ "step": 725
276
+ },
277
+ {
278
+ "epoch": 3.024193548387097,
279
+ "grad_norm": 0.004362072329968214,
280
+ "learning_rate": 8.36e-06,
281
+ "loss": 0.0054,
282
+ "step": 750
283
+ },
284
+ {
285
+ "epoch": 3.125,
286
+ "grad_norm": 1.2939238548278809,
287
+ "learning_rate": 8.193333333333335e-06,
288
+ "loss": 0.005,
289
+ "step": 775
290
+ },
291
+ {
292
+ "epoch": 3.225806451612903,
293
+ "grad_norm": 0.0022686896845698357,
294
+ "learning_rate": 8.026666666666667e-06,
295
+ "loss": 0.0066,
296
+ "step": 800
297
+ },
298
+ {
299
+ "epoch": 3.225806451612903,
300
+ "eval_loss": 0.00420153234153986,
301
+ "eval_runtime": 54.3822,
302
+ "eval_samples_per_second": 3.843,
303
+ "eval_steps_per_second": 0.496,
304
+ "eval_wer": 1.376936316695353,
305
+ "step": 800
306
+ },
307
+ {
308
+ "epoch": 3.3266129032258065,
309
+ "grad_norm": 0.0034014617558568716,
310
+ "learning_rate": 7.860000000000001e-06,
311
+ "loss": 0.0063,
312
+ "step": 825
313
+ },
314
+ {
315
+ "epoch": 3.4274193548387095,
316
+ "grad_norm": 7.198966026306152,
317
+ "learning_rate": 7.693333333333333e-06,
318
+ "loss": 0.0052,
319
+ "step": 850
320
+ },
321
+ {
322
+ "epoch": 3.528225806451613,
323
+ "grad_norm": 0.0018629450350999832,
324
+ "learning_rate": 7.526666666666668e-06,
325
+ "loss": 0.003,
326
+ "step": 875
327
+ },
328
+ {
329
+ "epoch": 3.629032258064516,
330
+ "grad_norm": 3.725576877593994,
331
+ "learning_rate": 7.360000000000001e-06,
332
+ "loss": 0.0032,
333
+ "step": 900
334
+ },
335
+ {
336
+ "epoch": 3.629032258064516,
337
+ "eval_loss": 0.009456031955778599,
338
+ "eval_runtime": 54.5839,
339
+ "eval_samples_per_second": 3.829,
340
+ "eval_steps_per_second": 0.495,
341
+ "eval_wer": 1.0327022375215147,
342
+ "step": 900
343
+ },
344
+ {
345
+ "epoch": 3.7298387096774195,
346
+ "grad_norm": 0.3921663761138916,
347
+ "learning_rate": 7.1933333333333345e-06,
348
+ "loss": 0.0041,
349
+ "step": 925
350
+ },
351
+ {
352
+ "epoch": 3.8306451612903225,
353
+ "grad_norm": 2.3925461769104004,
354
+ "learning_rate": 7.0266666666666674e-06,
355
+ "loss": 0.0017,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 3.931451612903226,
360
+ "grad_norm": 0.002618137514218688,
361
+ "learning_rate": 6.860000000000001e-06,
362
+ "loss": 0.0047,
363
+ "step": 975
364
+ },
365
+ {
366
+ "epoch": 4.032258064516129,
367
+ "grad_norm": 3.606403112411499,
368
+ "learning_rate": 6.693333333333334e-06,
369
+ "loss": 0.0027,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 4.032258064516129,
374
+ "eval_loss": 0.01142825372517109,
375
+ "eval_runtime": 54.5807,
376
+ "eval_samples_per_second": 3.829,
377
+ "eval_steps_per_second": 0.495,
378
+ "eval_wer": 1.549053356282272,
379
+ "step": 1000
380
+ }
381
+ ],
382
+ "logging_steps": 25,
383
+ "max_steps": 2000,
384
+ "num_input_tokens_seen": 0,
385
+ "num_train_epochs": 9,
386
+ "save_steps": 100,
387
+ "stateful_callbacks": {
388
+ "TrainerControl": {
389
+ "args": {
390
+ "should_epoch_stop": false,
391
+ "should_evaluate": false,
392
+ "should_log": false,
393
+ "should_save": true,
394
+ "should_training_stop": false
395
+ },
396
+ "attributes": {}
397
+ }
398
+ },
399
+ "total_flos": 1.62684422258688e+19,
400
+ "train_batch_size": 16,
401
+ "trial_name": null,
402
+ "trial_params": null
403
+ }
checkpoints/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1fc1f027287c9d1d723e42e6556d0cf6728657bd6a51fde9e5c2d703ba34
3
+ size 5496
checkpoints/checkpoint-1100/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-1100/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-1100/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b09c7ffa17bceecb108825e0ce251b86f6c35ace1092ad13f64f509de80c4c92
3
+ size 3055544304
checkpoints/checkpoint-1100/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58272003346b25c8cfb16dd7935caa70ac2372066bfef39734c447ad32f8be4f
3
+ size 6111664103
checkpoints/checkpoint-1100/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoints/checkpoint-1100/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce66231dede098237b096e7d2fb5ba7b3452a82c1861ef2527195c89ac204a0
3
+ size 14244
checkpoints/checkpoint-1100/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:902c88c15e18f179fd5237d522191ed53c448962513c1017b32e0cecdc8ef217
3
+ size 1064
checkpoints/checkpoint-1100/trainer_state.json ADDED
@@ -0,0 +1,440 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0327022375215147,
3
+ "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-900",
4
+ "epoch": 4.435483870967742,
5
+ "eval_steps": 100,
6
+ "global_step": 1100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10080645161290322,
13
+ "grad_norm": 40.95878982543945,
14
+ "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 3.6067,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.20161290322580644,
20
+ "grad_norm": 28.143661499023438,
21
+ "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.7859,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.3024193548387097,
27
+ "grad_norm": 10.631092071533203,
28
+ "learning_rate": 1.42e-06,
29
+ "loss": 0.9512,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.4032258064516129,
34
+ "grad_norm": 1.839168906211853,
35
+ "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 0.0668,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.4032258064516129,
41
+ "eval_loss": 0.03882083296775818,
42
+ "eval_runtime": 54.3208,
43
+ "eval_samples_per_second": 3.848,
44
+ "eval_steps_per_second": 0.497,
45
+ "eval_wer": 17.38382099827883,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 0.5040322580645161,
50
+ "grad_norm": 5.860921859741211,
51
+ "learning_rate": 2.42e-06,
52
+ "loss": 0.0347,
53
+ "step": 125
54
+ },
55
+ {
56
+ "epoch": 0.6048387096774194,
57
+ "grad_norm": 4.118633270263672,
58
+ "learning_rate": 2.92e-06,
59
+ "loss": 0.0292,
60
+ "step": 150
61
+ },
62
+ {
63
+ "epoch": 0.7056451612903226,
64
+ "grad_norm": 0.7270166873931885,
65
+ "learning_rate": 3.4200000000000007e-06,
66
+ "loss": 0.0204,
67
+ "step": 175
68
+ },
69
+ {
70
+ "epoch": 0.8064516129032258,
71
+ "grad_norm": 0.20527507364749908,
72
+ "learning_rate": 3.920000000000001e-06,
73
+ "loss": 0.0142,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.8064516129032258,
78
+ "eval_loss": 0.00608229311183095,
79
+ "eval_runtime": 54.4284,
80
+ "eval_samples_per_second": 3.84,
81
+ "eval_steps_per_second": 0.496,
82
+ "eval_wer": 11.359724612736661,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 0.907258064516129,
87
+ "grad_norm": 3.9435720443725586,
88
+ "learning_rate": 4.42e-06,
89
+ "loss": 0.0091,
90
+ "step": 225
91
+ },
92
+ {
93
+ "epoch": 1.0080645161290323,
94
+ "grad_norm": 0.173879012465477,
95
+ "learning_rate": 4.92e-06,
96
+ "loss": 0.0112,
97
+ "step": 250
98
+ },
99
+ {
100
+ "epoch": 1.1088709677419355,
101
+ "grad_norm": 0.018560703843832016,
102
+ "learning_rate": 5.420000000000001e-06,
103
+ "loss": 0.0063,
104
+ "step": 275
105
+ },
106
+ {
107
+ "epoch": 1.2096774193548387,
108
+ "grad_norm": 4.80478048324585,
109
+ "learning_rate": 5.92e-06,
110
+ "loss": 0.0075,
111
+ "step": 300
112
+ },
113
+ {
114
+ "epoch": 1.2096774193548387,
115
+ "eval_loss": 0.007460152264684439,
116
+ "eval_runtime": 54.3247,
117
+ "eval_samples_per_second": 3.847,
118
+ "eval_steps_per_second": 0.497,
119
+ "eval_wer": 9.63855421686747,
120
+ "step": 300
121
+ },
122
+ {
123
+ "epoch": 1.310483870967742,
124
+ "grad_norm": 0.022897183895111084,
125
+ "learning_rate": 6.42e-06,
126
+ "loss": 0.0163,
127
+ "step": 325
128
+ },
129
+ {
130
+ "epoch": 1.4112903225806452,
131
+ "grad_norm": 0.06287501752376556,
132
+ "learning_rate": 6.92e-06,
133
+ "loss": 0.0078,
134
+ "step": 350
135
+ },
136
+ {
137
+ "epoch": 1.5120967741935485,
138
+ "grad_norm": 0.019055448472499847,
139
+ "learning_rate": 7.420000000000001e-06,
140
+ "loss": 0.0057,
141
+ "step": 375
142
+ },
143
+ {
144
+ "epoch": 1.6129032258064515,
145
+ "grad_norm": 0.04413852468132973,
146
+ "learning_rate": 7.92e-06,
147
+ "loss": 0.0073,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 1.6129032258064515,
152
+ "eval_loss": 0.010435141623020172,
153
+ "eval_runtime": 54.1901,
154
+ "eval_samples_per_second": 3.857,
155
+ "eval_steps_per_second": 0.498,
156
+ "eval_wer": 7.74526678141136,
157
+ "step": 400
158
+ },
159
+ {
160
+ "epoch": 1.713709677419355,
161
+ "grad_norm": 0.6023885607719421,
162
+ "learning_rate": 8.42e-06,
163
+ "loss": 0.0173,
164
+ "step": 425
165
+ },
166
+ {
167
+ "epoch": 1.814516129032258,
168
+ "grad_norm": 0.062626414000988,
169
+ "learning_rate": 8.920000000000001e-06,
170
+ "loss": 0.0114,
171
+ "step": 450
172
+ },
173
+ {
174
+ "epoch": 1.9153225806451613,
175
+ "grad_norm": 4.65421724319458,
176
+ "learning_rate": 9.42e-06,
177
+ "loss": 0.0046,
178
+ "step": 475
179
+ },
180
+ {
181
+ "epoch": 2.0161290322580645,
182
+ "grad_norm": 0.11005760729312897,
183
+ "learning_rate": 9.920000000000002e-06,
184
+ "loss": 0.0087,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 2.0161290322580645,
189
+ "eval_loss": 0.012454940006136894,
190
+ "eval_runtime": 54.5944,
191
+ "eval_samples_per_second": 3.828,
192
+ "eval_steps_per_second": 0.495,
193
+ "eval_wer": 2.9259896729776247,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 2.1169354838709675,
198
+ "grad_norm": 0.016795210540294647,
199
+ "learning_rate": 9.86e-06,
200
+ "loss": 0.0076,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 2.217741935483871,
205
+ "grad_norm": 0.1811501681804657,
206
+ "learning_rate": 9.693333333333334e-06,
207
+ "loss": 0.007,
208
+ "step": 550
209
+ },
210
+ {
211
+ "epoch": 2.318548387096774,
212
+ "grad_norm": 0.0245444867759943,
213
+ "learning_rate": 9.526666666666668e-06,
214
+ "loss": 0.0099,
215
+ "step": 575
216
+ },
217
+ {
218
+ "epoch": 2.4193548387096775,
219
+ "grad_norm": 0.06346794962882996,
220
+ "learning_rate": 9.360000000000002e-06,
221
+ "loss": 0.0046,
222
+ "step": 600
223
+ },
224
+ {
225
+ "epoch": 2.4193548387096775,
226
+ "eval_loss": 0.00803467072546482,
227
+ "eval_runtime": 54.4661,
228
+ "eval_samples_per_second": 3.837,
229
+ "eval_steps_per_second": 0.496,
230
+ "eval_wer": 1.549053356282272,
231
+ "step": 600
232
+ },
233
+ {
234
+ "epoch": 2.5201612903225805,
235
+ "grad_norm": 0.037542328238487244,
236
+ "learning_rate": 9.193333333333334e-06,
237
+ "loss": 0.0065,
238
+ "step": 625
239
+ },
240
+ {
241
+ "epoch": 2.620967741935484,
242
+ "grad_norm": 2.8664350509643555,
243
+ "learning_rate": 9.026666666666666e-06,
244
+ "loss": 0.0092,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 2.721774193548387,
249
+ "grad_norm": 4.69785737991333,
250
+ "learning_rate": 8.860000000000002e-06,
251
+ "loss": 0.0052,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 2.8225806451612905,
256
+ "grad_norm": 0.005957436747848988,
257
+ "learning_rate": 8.693333333333334e-06,
258
+ "loss": 0.0087,
259
+ "step": 700
260
+ },
261
+ {
262
+ "epoch": 2.8225806451612905,
263
+ "eval_loss": 0.003936439286917448,
264
+ "eval_runtime": 54.4976,
265
+ "eval_samples_per_second": 3.835,
266
+ "eval_steps_per_second": 0.495,
267
+ "eval_wer": 1.7211703958691909,
268
+ "step": 700
269
+ },
270
+ {
271
+ "epoch": 2.9233870967741935,
272
+ "grad_norm": 0.008426151238381863,
273
+ "learning_rate": 8.526666666666667e-06,
274
+ "loss": 0.0059,
275
+ "step": 725
276
+ },
277
+ {
278
+ "epoch": 3.024193548387097,
279
+ "grad_norm": 0.004362072329968214,
280
+ "learning_rate": 8.36e-06,
281
+ "loss": 0.0054,
282
+ "step": 750
283
+ },
284
+ {
285
+ "epoch": 3.125,
286
+ "grad_norm": 1.2939238548278809,
287
+ "learning_rate": 8.193333333333335e-06,
288
+ "loss": 0.005,
289
+ "step": 775
290
+ },
291
+ {
292
+ "epoch": 3.225806451612903,
293
+ "grad_norm": 0.0022686896845698357,
294
+ "learning_rate": 8.026666666666667e-06,
295
+ "loss": 0.0066,
296
+ "step": 800
297
+ },
298
+ {
299
+ "epoch": 3.225806451612903,
300
+ "eval_loss": 0.00420153234153986,
301
+ "eval_runtime": 54.3822,
302
+ "eval_samples_per_second": 3.843,
303
+ "eval_steps_per_second": 0.496,
304
+ "eval_wer": 1.376936316695353,
305
+ "step": 800
306
+ },
307
+ {
308
+ "epoch": 3.3266129032258065,
309
+ "grad_norm": 0.0034014617558568716,
310
+ "learning_rate": 7.860000000000001e-06,
311
+ "loss": 0.0063,
312
+ "step": 825
313
+ },
314
+ {
315
+ "epoch": 3.4274193548387095,
316
+ "grad_norm": 7.198966026306152,
317
+ "learning_rate": 7.693333333333333e-06,
318
+ "loss": 0.0052,
319
+ "step": 850
320
+ },
321
+ {
322
+ "epoch": 3.528225806451613,
323
+ "grad_norm": 0.0018629450350999832,
324
+ "learning_rate": 7.526666666666668e-06,
325
+ "loss": 0.003,
326
+ "step": 875
327
+ },
328
+ {
329
+ "epoch": 3.629032258064516,
330
+ "grad_norm": 3.725576877593994,
331
+ "learning_rate": 7.360000000000001e-06,
332
+ "loss": 0.0032,
333
+ "step": 900
334
+ },
335
+ {
336
+ "epoch": 3.629032258064516,
337
+ "eval_loss": 0.009456031955778599,
338
+ "eval_runtime": 54.5839,
339
+ "eval_samples_per_second": 3.829,
340
+ "eval_steps_per_second": 0.495,
341
+ "eval_wer": 1.0327022375215147,
342
+ "step": 900
343
+ },
344
+ {
345
+ "epoch": 3.7298387096774195,
346
+ "grad_norm": 0.3921663761138916,
347
+ "learning_rate": 7.1933333333333345e-06,
348
+ "loss": 0.0041,
349
+ "step": 925
350
+ },
351
+ {
352
+ "epoch": 3.8306451612903225,
353
+ "grad_norm": 2.3925461769104004,
354
+ "learning_rate": 7.0266666666666674e-06,
355
+ "loss": 0.0017,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 3.931451612903226,
360
+ "grad_norm": 0.002618137514218688,
361
+ "learning_rate": 6.860000000000001e-06,
362
+ "loss": 0.0047,
363
+ "step": 975
364
+ },
365
+ {
366
+ "epoch": 4.032258064516129,
367
+ "grad_norm": 3.606403112411499,
368
+ "learning_rate": 6.693333333333334e-06,
369
+ "loss": 0.0027,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 4.032258064516129,
374
+ "eval_loss": 0.01142825372517109,
375
+ "eval_runtime": 54.5807,
376
+ "eval_samples_per_second": 3.829,
377
+ "eval_steps_per_second": 0.495,
378
+ "eval_wer": 1.549053356282272,
379
+ "step": 1000
380
+ },
381
+ {
382
+ "epoch": 4.133064516129032,
383
+ "grad_norm": 2.7271430492401123,
384
+ "learning_rate": 6.526666666666666e-06,
385
+ "loss": 0.0078,
386
+ "step": 1025
387
+ },
388
+ {
389
+ "epoch": 4.233870967741935,
390
+ "grad_norm": 3.484804153442383,
391
+ "learning_rate": 6.360000000000001e-06,
392
+ "loss": 0.0026,
393
+ "step": 1050
394
+ },
395
+ {
396
+ "epoch": 4.334677419354839,
397
+ "grad_norm": 0.0015495093539357185,
398
+ "learning_rate": 6.193333333333333e-06,
399
+ "loss": 0.0017,
400
+ "step": 1075
401
+ },
402
+ {
403
+ "epoch": 4.435483870967742,
404
+ "grad_norm": 0.0017610510112717748,
405
+ "learning_rate": 6.026666666666668e-06,
406
+ "loss": 0.0021,
407
+ "step": 1100
408
+ },
409
+ {
410
+ "epoch": 4.435483870967742,
411
+ "eval_loss": 0.009943087585270405,
412
+ "eval_runtime": 54.465,
413
+ "eval_samples_per_second": 3.837,
414
+ "eval_steps_per_second": 0.496,
415
+ "eval_wer": 1.7211703958691909,
416
+ "step": 1100
417
+ }
418
+ ],
419
+ "logging_steps": 25,
420
+ "max_steps": 2000,
421
+ "num_input_tokens_seen": 0,
422
+ "num_train_epochs": 9,
423
+ "save_steps": 100,
424
+ "stateful_callbacks": {
425
+ "TrainerControl": {
426
+ "args": {
427
+ "should_epoch_stop": false,
428
+ "should_evaluate": false,
429
+ "should_log": false,
430
+ "should_save": true,
431
+ "should_training_stop": false
432
+ },
433
+ "attributes": {}
434
+ }
435
+ },
436
+ "total_flos": 1.79014100779008e+19,
437
+ "train_batch_size": 16,
438
+ "trial_name": null,
439
+ "trial_params": null
440
+ }
checkpoints/checkpoint-1100/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1fc1f027287c9d1d723e42e6556d0cf6728657bd6a51fde9e5c2d703ba34
3
+ size 5496
checkpoints/checkpoint-1200/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-1200/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-1200/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0f7fa6557f8e4908003bbd387d2f15c3a70beac66263cd571631680599d7ce6
3
+ size 3055544304
checkpoints/checkpoint-1200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cbff3f828b7a44d52de0c95e7a118141a04242d0315e54546d7c0459753c46d
3
+ size 6111664103
checkpoints/checkpoint-1200/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoints/checkpoint-1200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:912b97097f1d4c3f98c785ebdbd9946964bae1da32026e20ab38d5198ef7b02a
3
+ size 14244
checkpoints/checkpoint-1200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:067f667bf1a48411b0331ba139009caedf69bb78407a00ee5dc1d45a8b6f0de4
3
+ size 1064
checkpoints/checkpoint-1200/trainer_state.json ADDED
@@ -0,0 +1,477 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0327022375215147,
3
+ "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-900",
4
+ "epoch": 4.838709677419355,
5
+ "eval_steps": 100,
6
+ "global_step": 1200,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10080645161290322,
13
+ "grad_norm": 40.95878982543945,
14
+ "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 3.6067,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.20161290322580644,
20
+ "grad_norm": 28.143661499023438,
21
+ "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.7859,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.3024193548387097,
27
+ "grad_norm": 10.631092071533203,
28
+ "learning_rate": 1.42e-06,
29
+ "loss": 0.9512,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.4032258064516129,
34
+ "grad_norm": 1.839168906211853,
35
+ "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 0.0668,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.4032258064516129,
41
+ "eval_loss": 0.03882083296775818,
42
+ "eval_runtime": 54.3208,
43
+ "eval_samples_per_second": 3.848,
44
+ "eval_steps_per_second": 0.497,
45
+ "eval_wer": 17.38382099827883,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 0.5040322580645161,
50
+ "grad_norm": 5.860921859741211,
51
+ "learning_rate": 2.42e-06,
52
+ "loss": 0.0347,
53
+ "step": 125
54
+ },
55
+ {
56
+ "epoch": 0.6048387096774194,
57
+ "grad_norm": 4.118633270263672,
58
+ "learning_rate": 2.92e-06,
59
+ "loss": 0.0292,
60
+ "step": 150
61
+ },
62
+ {
63
+ "epoch": 0.7056451612903226,
64
+ "grad_norm": 0.7270166873931885,
65
+ "learning_rate": 3.4200000000000007e-06,
66
+ "loss": 0.0204,
67
+ "step": 175
68
+ },
69
+ {
70
+ "epoch": 0.8064516129032258,
71
+ "grad_norm": 0.20527507364749908,
72
+ "learning_rate": 3.920000000000001e-06,
73
+ "loss": 0.0142,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.8064516129032258,
78
+ "eval_loss": 0.00608229311183095,
79
+ "eval_runtime": 54.4284,
80
+ "eval_samples_per_second": 3.84,
81
+ "eval_steps_per_second": 0.496,
82
+ "eval_wer": 11.359724612736661,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 0.907258064516129,
87
+ "grad_norm": 3.9435720443725586,
88
+ "learning_rate": 4.42e-06,
89
+ "loss": 0.0091,
90
+ "step": 225
91
+ },
92
+ {
93
+ "epoch": 1.0080645161290323,
94
+ "grad_norm": 0.173879012465477,
95
+ "learning_rate": 4.92e-06,
96
+ "loss": 0.0112,
97
+ "step": 250
98
+ },
99
+ {
100
+ "epoch": 1.1088709677419355,
101
+ "grad_norm": 0.018560703843832016,
102
+ "learning_rate": 5.420000000000001e-06,
103
+ "loss": 0.0063,
104
+ "step": 275
105
+ },
106
+ {
107
+ "epoch": 1.2096774193548387,
108
+ "grad_norm": 4.80478048324585,
109
+ "learning_rate": 5.92e-06,
110
+ "loss": 0.0075,
111
+ "step": 300
112
+ },
113
+ {
114
+ "epoch": 1.2096774193548387,
115
+ "eval_loss": 0.007460152264684439,
116
+ "eval_runtime": 54.3247,
117
+ "eval_samples_per_second": 3.847,
118
+ "eval_steps_per_second": 0.497,
119
+ "eval_wer": 9.63855421686747,
120
+ "step": 300
121
+ },
122
+ {
123
+ "epoch": 1.310483870967742,
124
+ "grad_norm": 0.022897183895111084,
125
+ "learning_rate": 6.42e-06,
126
+ "loss": 0.0163,
127
+ "step": 325
128
+ },
129
+ {
130
+ "epoch": 1.4112903225806452,
131
+ "grad_norm": 0.06287501752376556,
132
+ "learning_rate": 6.92e-06,
133
+ "loss": 0.0078,
134
+ "step": 350
135
+ },
136
+ {
137
+ "epoch": 1.5120967741935485,
138
+ "grad_norm": 0.019055448472499847,
139
+ "learning_rate": 7.420000000000001e-06,
140
+ "loss": 0.0057,
141
+ "step": 375
142
+ },
143
+ {
144
+ "epoch": 1.6129032258064515,
145
+ "grad_norm": 0.04413852468132973,
146
+ "learning_rate": 7.92e-06,
147
+ "loss": 0.0073,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 1.6129032258064515,
152
+ "eval_loss": 0.010435141623020172,
153
+ "eval_runtime": 54.1901,
154
+ "eval_samples_per_second": 3.857,
155
+ "eval_steps_per_second": 0.498,
156
+ "eval_wer": 7.74526678141136,
157
+ "step": 400
158
+ },
159
+ {
160
+ "epoch": 1.713709677419355,
161
+ "grad_norm": 0.6023885607719421,
162
+ "learning_rate": 8.42e-06,
163
+ "loss": 0.0173,
164
+ "step": 425
165
+ },
166
+ {
167
+ "epoch": 1.814516129032258,
168
+ "grad_norm": 0.062626414000988,
169
+ "learning_rate": 8.920000000000001e-06,
170
+ "loss": 0.0114,
171
+ "step": 450
172
+ },
173
+ {
174
+ "epoch": 1.9153225806451613,
175
+ "grad_norm": 4.65421724319458,
176
+ "learning_rate": 9.42e-06,
177
+ "loss": 0.0046,
178
+ "step": 475
179
+ },
180
+ {
181
+ "epoch": 2.0161290322580645,
182
+ "grad_norm": 0.11005760729312897,
183
+ "learning_rate": 9.920000000000002e-06,
184
+ "loss": 0.0087,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 2.0161290322580645,
189
+ "eval_loss": 0.012454940006136894,
190
+ "eval_runtime": 54.5944,
191
+ "eval_samples_per_second": 3.828,
192
+ "eval_steps_per_second": 0.495,
193
+ "eval_wer": 2.9259896729776247,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 2.1169354838709675,
198
+ "grad_norm": 0.016795210540294647,
199
+ "learning_rate": 9.86e-06,
200
+ "loss": 0.0076,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 2.217741935483871,
205
+ "grad_norm": 0.1811501681804657,
206
+ "learning_rate": 9.693333333333334e-06,
207
+ "loss": 0.007,
208
+ "step": 550
209
+ },
210
+ {
211
+ "epoch": 2.318548387096774,
212
+ "grad_norm": 0.0245444867759943,
213
+ "learning_rate": 9.526666666666668e-06,
214
+ "loss": 0.0099,
215
+ "step": 575
216
+ },
217
+ {
218
+ "epoch": 2.4193548387096775,
219
+ "grad_norm": 0.06346794962882996,
220
+ "learning_rate": 9.360000000000002e-06,
221
+ "loss": 0.0046,
222
+ "step": 600
223
+ },
224
+ {
225
+ "epoch": 2.4193548387096775,
226
+ "eval_loss": 0.00803467072546482,
227
+ "eval_runtime": 54.4661,
228
+ "eval_samples_per_second": 3.837,
229
+ "eval_steps_per_second": 0.496,
230
+ "eval_wer": 1.549053356282272,
231
+ "step": 600
232
+ },
233
+ {
234
+ "epoch": 2.5201612903225805,
235
+ "grad_norm": 0.037542328238487244,
236
+ "learning_rate": 9.193333333333334e-06,
237
+ "loss": 0.0065,
238
+ "step": 625
239
+ },
240
+ {
241
+ "epoch": 2.620967741935484,
242
+ "grad_norm": 2.8664350509643555,
243
+ "learning_rate": 9.026666666666666e-06,
244
+ "loss": 0.0092,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 2.721774193548387,
249
+ "grad_norm": 4.69785737991333,
250
+ "learning_rate": 8.860000000000002e-06,
251
+ "loss": 0.0052,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 2.8225806451612905,
256
+ "grad_norm": 0.005957436747848988,
257
+ "learning_rate": 8.693333333333334e-06,
258
+ "loss": 0.0087,
259
+ "step": 700
260
+ },
261
+ {
262
+ "epoch": 2.8225806451612905,
263
+ "eval_loss": 0.003936439286917448,
264
+ "eval_runtime": 54.4976,
265
+ "eval_samples_per_second": 3.835,
266
+ "eval_steps_per_second": 0.495,
267
+ "eval_wer": 1.7211703958691909,
268
+ "step": 700
269
+ },
270
+ {
271
+ "epoch": 2.9233870967741935,
272
+ "grad_norm": 0.008426151238381863,
273
+ "learning_rate": 8.526666666666667e-06,
274
+ "loss": 0.0059,
275
+ "step": 725
276
+ },
277
+ {
278
+ "epoch": 3.024193548387097,
279
+ "grad_norm": 0.004362072329968214,
280
+ "learning_rate": 8.36e-06,
281
+ "loss": 0.0054,
282
+ "step": 750
283
+ },
284
+ {
285
+ "epoch": 3.125,
286
+ "grad_norm": 1.2939238548278809,
287
+ "learning_rate": 8.193333333333335e-06,
288
+ "loss": 0.005,
289
+ "step": 775
290
+ },
291
+ {
292
+ "epoch": 3.225806451612903,
293
+ "grad_norm": 0.0022686896845698357,
294
+ "learning_rate": 8.026666666666667e-06,
295
+ "loss": 0.0066,
296
+ "step": 800
297
+ },
298
+ {
299
+ "epoch": 3.225806451612903,
300
+ "eval_loss": 0.00420153234153986,
301
+ "eval_runtime": 54.3822,
302
+ "eval_samples_per_second": 3.843,
303
+ "eval_steps_per_second": 0.496,
304
+ "eval_wer": 1.376936316695353,
305
+ "step": 800
306
+ },
307
+ {
308
+ "epoch": 3.3266129032258065,
309
+ "grad_norm": 0.0034014617558568716,
310
+ "learning_rate": 7.860000000000001e-06,
311
+ "loss": 0.0063,
312
+ "step": 825
313
+ },
314
+ {
315
+ "epoch": 3.4274193548387095,
316
+ "grad_norm": 7.198966026306152,
317
+ "learning_rate": 7.693333333333333e-06,
318
+ "loss": 0.0052,
319
+ "step": 850
320
+ },
321
+ {
322
+ "epoch": 3.528225806451613,
323
+ "grad_norm": 0.0018629450350999832,
324
+ "learning_rate": 7.526666666666668e-06,
325
+ "loss": 0.003,
326
+ "step": 875
327
+ },
328
+ {
329
+ "epoch": 3.629032258064516,
330
+ "grad_norm": 3.725576877593994,
331
+ "learning_rate": 7.360000000000001e-06,
332
+ "loss": 0.0032,
333
+ "step": 900
334
+ },
335
+ {
336
+ "epoch": 3.629032258064516,
337
+ "eval_loss": 0.009456031955778599,
338
+ "eval_runtime": 54.5839,
339
+ "eval_samples_per_second": 3.829,
340
+ "eval_steps_per_second": 0.495,
341
+ "eval_wer": 1.0327022375215147,
342
+ "step": 900
343
+ },
344
+ {
345
+ "epoch": 3.7298387096774195,
346
+ "grad_norm": 0.3921663761138916,
347
+ "learning_rate": 7.1933333333333345e-06,
348
+ "loss": 0.0041,
349
+ "step": 925
350
+ },
351
+ {
352
+ "epoch": 3.8306451612903225,
353
+ "grad_norm": 2.3925461769104004,
354
+ "learning_rate": 7.0266666666666674e-06,
355
+ "loss": 0.0017,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 3.931451612903226,
360
+ "grad_norm": 0.002618137514218688,
361
+ "learning_rate": 6.860000000000001e-06,
362
+ "loss": 0.0047,
363
+ "step": 975
364
+ },
365
+ {
366
+ "epoch": 4.032258064516129,
367
+ "grad_norm": 3.606403112411499,
368
+ "learning_rate": 6.693333333333334e-06,
369
+ "loss": 0.0027,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 4.032258064516129,
374
+ "eval_loss": 0.01142825372517109,
375
+ "eval_runtime": 54.5807,
376
+ "eval_samples_per_second": 3.829,
377
+ "eval_steps_per_second": 0.495,
378
+ "eval_wer": 1.549053356282272,
379
+ "step": 1000
380
+ },
381
+ {
382
+ "epoch": 4.133064516129032,
383
+ "grad_norm": 2.7271430492401123,
384
+ "learning_rate": 6.526666666666666e-06,
385
+ "loss": 0.0078,
386
+ "step": 1025
387
+ },
388
+ {
389
+ "epoch": 4.233870967741935,
390
+ "grad_norm": 3.484804153442383,
391
+ "learning_rate": 6.360000000000001e-06,
392
+ "loss": 0.0026,
393
+ "step": 1050
394
+ },
395
+ {
396
+ "epoch": 4.334677419354839,
397
+ "grad_norm": 0.0015495093539357185,
398
+ "learning_rate": 6.193333333333333e-06,
399
+ "loss": 0.0017,
400
+ "step": 1075
401
+ },
402
+ {
403
+ "epoch": 4.435483870967742,
404
+ "grad_norm": 0.0017610510112717748,
405
+ "learning_rate": 6.026666666666668e-06,
406
+ "loss": 0.0021,
407
+ "step": 1100
408
+ },
409
+ {
410
+ "epoch": 4.435483870967742,
411
+ "eval_loss": 0.009943087585270405,
412
+ "eval_runtime": 54.465,
413
+ "eval_samples_per_second": 3.837,
414
+ "eval_steps_per_second": 0.496,
415
+ "eval_wer": 1.7211703958691909,
416
+ "step": 1100
417
+ },
418
+ {
419
+ "epoch": 4.536290322580645,
420
+ "grad_norm": 0.0015177098102867603,
421
+ "learning_rate": 5.86e-06,
422
+ "loss": 0.0022,
423
+ "step": 1125
424
+ },
425
+ {
426
+ "epoch": 4.637096774193548,
427
+ "grad_norm": 0.06703988462686539,
428
+ "learning_rate": 5.6933333333333344e-06,
429
+ "loss": 0.0099,
430
+ "step": 1150
431
+ },
432
+ {
433
+ "epoch": 4.737903225806452,
434
+ "grad_norm": 0.0013143372489139438,
435
+ "learning_rate": 5.5266666666666666e-06,
436
+ "loss": 0.0028,
437
+ "step": 1175
438
+ },
439
+ {
440
+ "epoch": 4.838709677419355,
441
+ "grad_norm": 3.9103496074676514,
442
+ "learning_rate": 5.36e-06,
443
+ "loss": 0.0039,
444
+ "step": 1200
445
+ },
446
+ {
447
+ "epoch": 4.838709677419355,
448
+ "eval_loss": 0.0121237151324749,
449
+ "eval_runtime": 54.615,
450
+ "eval_samples_per_second": 3.827,
451
+ "eval_steps_per_second": 0.494,
452
+ "eval_wer": 1.8932874354561102,
453
+ "step": 1200
454
+ }
455
+ ],
456
+ "logging_steps": 25,
457
+ "max_steps": 2000,
458
+ "num_input_tokens_seen": 0,
459
+ "num_train_epochs": 9,
460
+ "save_steps": 100,
461
+ "stateful_callbacks": {
462
+ "TrainerControl": {
463
+ "args": {
464
+ "should_epoch_stop": false,
465
+ "should_evaluate": false,
466
+ "should_log": false,
467
+ "should_save": true,
468
+ "should_training_stop": false
469
+ },
470
+ "attributes": {}
471
+ }
472
+ },
473
+ "total_flos": 1.95343779299328e+19,
474
+ "train_batch_size": 16,
475
+ "trial_name": null,
476
+ "trial_params": null
477
+ }
checkpoints/checkpoint-1200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1fc1f027287c9d1d723e42e6556d0cf6728657bd6a51fde9e5c2d703ba34
3
+ size 5496
checkpoints/checkpoint-1300/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-1300/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-1300/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c45ba4d1b239ea7f9766425a3d0f8ff60f11f166e1aadf47d7a4d571cf90b3d4
3
+ size 3055544304
checkpoints/checkpoint-1300/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80d82c152ae23952a2a7336bc197bd3f5453046878347db533536b013e79a81d
3
+ size 6111664103
checkpoints/checkpoint-1300/preprocessor_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "chunk_length": 30,
3
+ "feature_extractor_type": "WhisperFeatureExtractor",
4
+ "feature_size": 80,
5
+ "hop_length": 160,
6
+ "n_fft": 400,
7
+ "n_samples": 480000,
8
+ "nb_max_frames": 3000,
9
+ "padding_side": "right",
10
+ "padding_value": 0.0,
11
+ "processor_class": "WhisperProcessor",
12
+ "return_attention_mask": false,
13
+ "sampling_rate": 16000
14
+ }
checkpoints/checkpoint-1300/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fb75196bc20f6a6c3cb0bb2eb12be8bb3853cca8737ce6bbfa1ef07b1ece09d
3
+ size 14244
checkpoints/checkpoint-1300/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b45e1ef697e353e493c116c498700947a2f57225666098bd0afd3e051b66541a
3
+ size 1064
checkpoints/checkpoint-1300/trainer_state.json ADDED
@@ -0,0 +1,514 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0327022375215147,
3
+ "best_model_checkpoint": "./whisper-medium-attempt2-1000-orders-eleven-labs/checkpoint-900",
4
+ "epoch": 5.241935483870968,
5
+ "eval_steps": 100,
6
+ "global_step": 1300,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.10080645161290322,
13
+ "grad_norm": 40.95878982543945,
14
+ "learning_rate": 4.2000000000000006e-07,
15
+ "loss": 3.6067,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.20161290322580644,
20
+ "grad_norm": 28.143661499023438,
21
+ "learning_rate": 9.200000000000001e-07,
22
+ "loss": 2.7859,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.3024193548387097,
27
+ "grad_norm": 10.631092071533203,
28
+ "learning_rate": 1.42e-06,
29
+ "loss": 0.9512,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.4032258064516129,
34
+ "grad_norm": 1.839168906211853,
35
+ "learning_rate": 1.9200000000000003e-06,
36
+ "loss": 0.0668,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.4032258064516129,
41
+ "eval_loss": 0.03882083296775818,
42
+ "eval_runtime": 54.3208,
43
+ "eval_samples_per_second": 3.848,
44
+ "eval_steps_per_second": 0.497,
45
+ "eval_wer": 17.38382099827883,
46
+ "step": 100
47
+ },
48
+ {
49
+ "epoch": 0.5040322580645161,
50
+ "grad_norm": 5.860921859741211,
51
+ "learning_rate": 2.42e-06,
52
+ "loss": 0.0347,
53
+ "step": 125
54
+ },
55
+ {
56
+ "epoch": 0.6048387096774194,
57
+ "grad_norm": 4.118633270263672,
58
+ "learning_rate": 2.92e-06,
59
+ "loss": 0.0292,
60
+ "step": 150
61
+ },
62
+ {
63
+ "epoch": 0.7056451612903226,
64
+ "grad_norm": 0.7270166873931885,
65
+ "learning_rate": 3.4200000000000007e-06,
66
+ "loss": 0.0204,
67
+ "step": 175
68
+ },
69
+ {
70
+ "epoch": 0.8064516129032258,
71
+ "grad_norm": 0.20527507364749908,
72
+ "learning_rate": 3.920000000000001e-06,
73
+ "loss": 0.0142,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.8064516129032258,
78
+ "eval_loss": 0.00608229311183095,
79
+ "eval_runtime": 54.4284,
80
+ "eval_samples_per_second": 3.84,
81
+ "eval_steps_per_second": 0.496,
82
+ "eval_wer": 11.359724612736661,
83
+ "step": 200
84
+ },
85
+ {
86
+ "epoch": 0.907258064516129,
87
+ "grad_norm": 3.9435720443725586,
88
+ "learning_rate": 4.42e-06,
89
+ "loss": 0.0091,
90
+ "step": 225
91
+ },
92
+ {
93
+ "epoch": 1.0080645161290323,
94
+ "grad_norm": 0.173879012465477,
95
+ "learning_rate": 4.92e-06,
96
+ "loss": 0.0112,
97
+ "step": 250
98
+ },
99
+ {
100
+ "epoch": 1.1088709677419355,
101
+ "grad_norm": 0.018560703843832016,
102
+ "learning_rate": 5.420000000000001e-06,
103
+ "loss": 0.0063,
104
+ "step": 275
105
+ },
106
+ {
107
+ "epoch": 1.2096774193548387,
108
+ "grad_norm": 4.80478048324585,
109
+ "learning_rate": 5.92e-06,
110
+ "loss": 0.0075,
111
+ "step": 300
112
+ },
113
+ {
114
+ "epoch": 1.2096774193548387,
115
+ "eval_loss": 0.007460152264684439,
116
+ "eval_runtime": 54.3247,
117
+ "eval_samples_per_second": 3.847,
118
+ "eval_steps_per_second": 0.497,
119
+ "eval_wer": 9.63855421686747,
120
+ "step": 300
121
+ },
122
+ {
123
+ "epoch": 1.310483870967742,
124
+ "grad_norm": 0.022897183895111084,
125
+ "learning_rate": 6.42e-06,
126
+ "loss": 0.0163,
127
+ "step": 325
128
+ },
129
+ {
130
+ "epoch": 1.4112903225806452,
131
+ "grad_norm": 0.06287501752376556,
132
+ "learning_rate": 6.92e-06,
133
+ "loss": 0.0078,
134
+ "step": 350
135
+ },
136
+ {
137
+ "epoch": 1.5120967741935485,
138
+ "grad_norm": 0.019055448472499847,
139
+ "learning_rate": 7.420000000000001e-06,
140
+ "loss": 0.0057,
141
+ "step": 375
142
+ },
143
+ {
144
+ "epoch": 1.6129032258064515,
145
+ "grad_norm": 0.04413852468132973,
146
+ "learning_rate": 7.92e-06,
147
+ "loss": 0.0073,
148
+ "step": 400
149
+ },
150
+ {
151
+ "epoch": 1.6129032258064515,
152
+ "eval_loss": 0.010435141623020172,
153
+ "eval_runtime": 54.1901,
154
+ "eval_samples_per_second": 3.857,
155
+ "eval_steps_per_second": 0.498,
156
+ "eval_wer": 7.74526678141136,
157
+ "step": 400
158
+ },
159
+ {
160
+ "epoch": 1.713709677419355,
161
+ "grad_norm": 0.6023885607719421,
162
+ "learning_rate": 8.42e-06,
163
+ "loss": 0.0173,
164
+ "step": 425
165
+ },
166
+ {
167
+ "epoch": 1.814516129032258,
168
+ "grad_norm": 0.062626414000988,
169
+ "learning_rate": 8.920000000000001e-06,
170
+ "loss": 0.0114,
171
+ "step": 450
172
+ },
173
+ {
174
+ "epoch": 1.9153225806451613,
175
+ "grad_norm": 4.65421724319458,
176
+ "learning_rate": 9.42e-06,
177
+ "loss": 0.0046,
178
+ "step": 475
179
+ },
180
+ {
181
+ "epoch": 2.0161290322580645,
182
+ "grad_norm": 0.11005760729312897,
183
+ "learning_rate": 9.920000000000002e-06,
184
+ "loss": 0.0087,
185
+ "step": 500
186
+ },
187
+ {
188
+ "epoch": 2.0161290322580645,
189
+ "eval_loss": 0.012454940006136894,
190
+ "eval_runtime": 54.5944,
191
+ "eval_samples_per_second": 3.828,
192
+ "eval_steps_per_second": 0.495,
193
+ "eval_wer": 2.9259896729776247,
194
+ "step": 500
195
+ },
196
+ {
197
+ "epoch": 2.1169354838709675,
198
+ "grad_norm": 0.016795210540294647,
199
+ "learning_rate": 9.86e-06,
200
+ "loss": 0.0076,
201
+ "step": 525
202
+ },
203
+ {
204
+ "epoch": 2.217741935483871,
205
+ "grad_norm": 0.1811501681804657,
206
+ "learning_rate": 9.693333333333334e-06,
207
+ "loss": 0.007,
208
+ "step": 550
209
+ },
210
+ {
211
+ "epoch": 2.318548387096774,
212
+ "grad_norm": 0.0245444867759943,
213
+ "learning_rate": 9.526666666666668e-06,
214
+ "loss": 0.0099,
215
+ "step": 575
216
+ },
217
+ {
218
+ "epoch": 2.4193548387096775,
219
+ "grad_norm": 0.06346794962882996,
220
+ "learning_rate": 9.360000000000002e-06,
221
+ "loss": 0.0046,
222
+ "step": 600
223
+ },
224
+ {
225
+ "epoch": 2.4193548387096775,
226
+ "eval_loss": 0.00803467072546482,
227
+ "eval_runtime": 54.4661,
228
+ "eval_samples_per_second": 3.837,
229
+ "eval_steps_per_second": 0.496,
230
+ "eval_wer": 1.549053356282272,
231
+ "step": 600
232
+ },
233
+ {
234
+ "epoch": 2.5201612903225805,
235
+ "grad_norm": 0.037542328238487244,
236
+ "learning_rate": 9.193333333333334e-06,
237
+ "loss": 0.0065,
238
+ "step": 625
239
+ },
240
+ {
241
+ "epoch": 2.620967741935484,
242
+ "grad_norm": 2.8664350509643555,
243
+ "learning_rate": 9.026666666666666e-06,
244
+ "loss": 0.0092,
245
+ "step": 650
246
+ },
247
+ {
248
+ "epoch": 2.721774193548387,
249
+ "grad_norm": 4.69785737991333,
250
+ "learning_rate": 8.860000000000002e-06,
251
+ "loss": 0.0052,
252
+ "step": 675
253
+ },
254
+ {
255
+ "epoch": 2.8225806451612905,
256
+ "grad_norm": 0.005957436747848988,
257
+ "learning_rate": 8.693333333333334e-06,
258
+ "loss": 0.0087,
259
+ "step": 700
260
+ },
261
+ {
262
+ "epoch": 2.8225806451612905,
263
+ "eval_loss": 0.003936439286917448,
264
+ "eval_runtime": 54.4976,
265
+ "eval_samples_per_second": 3.835,
266
+ "eval_steps_per_second": 0.495,
267
+ "eval_wer": 1.7211703958691909,
268
+ "step": 700
269
+ },
270
+ {
271
+ "epoch": 2.9233870967741935,
272
+ "grad_norm": 0.008426151238381863,
273
+ "learning_rate": 8.526666666666667e-06,
274
+ "loss": 0.0059,
275
+ "step": 725
276
+ },
277
+ {
278
+ "epoch": 3.024193548387097,
279
+ "grad_norm": 0.004362072329968214,
280
+ "learning_rate": 8.36e-06,
281
+ "loss": 0.0054,
282
+ "step": 750
283
+ },
284
+ {
285
+ "epoch": 3.125,
286
+ "grad_norm": 1.2939238548278809,
287
+ "learning_rate": 8.193333333333335e-06,
288
+ "loss": 0.005,
289
+ "step": 775
290
+ },
291
+ {
292
+ "epoch": 3.225806451612903,
293
+ "grad_norm": 0.0022686896845698357,
294
+ "learning_rate": 8.026666666666667e-06,
295
+ "loss": 0.0066,
296
+ "step": 800
297
+ },
298
+ {
299
+ "epoch": 3.225806451612903,
300
+ "eval_loss": 0.00420153234153986,
301
+ "eval_runtime": 54.3822,
302
+ "eval_samples_per_second": 3.843,
303
+ "eval_steps_per_second": 0.496,
304
+ "eval_wer": 1.376936316695353,
305
+ "step": 800
306
+ },
307
+ {
308
+ "epoch": 3.3266129032258065,
309
+ "grad_norm": 0.0034014617558568716,
310
+ "learning_rate": 7.860000000000001e-06,
311
+ "loss": 0.0063,
312
+ "step": 825
313
+ },
314
+ {
315
+ "epoch": 3.4274193548387095,
316
+ "grad_norm": 7.198966026306152,
317
+ "learning_rate": 7.693333333333333e-06,
318
+ "loss": 0.0052,
319
+ "step": 850
320
+ },
321
+ {
322
+ "epoch": 3.528225806451613,
323
+ "grad_norm": 0.0018629450350999832,
324
+ "learning_rate": 7.526666666666668e-06,
325
+ "loss": 0.003,
326
+ "step": 875
327
+ },
328
+ {
329
+ "epoch": 3.629032258064516,
330
+ "grad_norm": 3.725576877593994,
331
+ "learning_rate": 7.360000000000001e-06,
332
+ "loss": 0.0032,
333
+ "step": 900
334
+ },
335
+ {
336
+ "epoch": 3.629032258064516,
337
+ "eval_loss": 0.009456031955778599,
338
+ "eval_runtime": 54.5839,
339
+ "eval_samples_per_second": 3.829,
340
+ "eval_steps_per_second": 0.495,
341
+ "eval_wer": 1.0327022375215147,
342
+ "step": 900
343
+ },
344
+ {
345
+ "epoch": 3.7298387096774195,
346
+ "grad_norm": 0.3921663761138916,
347
+ "learning_rate": 7.1933333333333345e-06,
348
+ "loss": 0.0041,
349
+ "step": 925
350
+ },
351
+ {
352
+ "epoch": 3.8306451612903225,
353
+ "grad_norm": 2.3925461769104004,
354
+ "learning_rate": 7.0266666666666674e-06,
355
+ "loss": 0.0017,
356
+ "step": 950
357
+ },
358
+ {
359
+ "epoch": 3.931451612903226,
360
+ "grad_norm": 0.002618137514218688,
361
+ "learning_rate": 6.860000000000001e-06,
362
+ "loss": 0.0047,
363
+ "step": 975
364
+ },
365
+ {
366
+ "epoch": 4.032258064516129,
367
+ "grad_norm": 3.606403112411499,
368
+ "learning_rate": 6.693333333333334e-06,
369
+ "loss": 0.0027,
370
+ "step": 1000
371
+ },
372
+ {
373
+ "epoch": 4.032258064516129,
374
+ "eval_loss": 0.01142825372517109,
375
+ "eval_runtime": 54.5807,
376
+ "eval_samples_per_second": 3.829,
377
+ "eval_steps_per_second": 0.495,
378
+ "eval_wer": 1.549053356282272,
379
+ "step": 1000
380
+ },
381
+ {
382
+ "epoch": 4.133064516129032,
383
+ "grad_norm": 2.7271430492401123,
384
+ "learning_rate": 6.526666666666666e-06,
385
+ "loss": 0.0078,
386
+ "step": 1025
387
+ },
388
+ {
389
+ "epoch": 4.233870967741935,
390
+ "grad_norm": 3.484804153442383,
391
+ "learning_rate": 6.360000000000001e-06,
392
+ "loss": 0.0026,
393
+ "step": 1050
394
+ },
395
+ {
396
+ "epoch": 4.334677419354839,
397
+ "grad_norm": 0.0015495093539357185,
398
+ "learning_rate": 6.193333333333333e-06,
399
+ "loss": 0.0017,
400
+ "step": 1075
401
+ },
402
+ {
403
+ "epoch": 4.435483870967742,
404
+ "grad_norm": 0.0017610510112717748,
405
+ "learning_rate": 6.026666666666668e-06,
406
+ "loss": 0.0021,
407
+ "step": 1100
408
+ },
409
+ {
410
+ "epoch": 4.435483870967742,
411
+ "eval_loss": 0.009943087585270405,
412
+ "eval_runtime": 54.465,
413
+ "eval_samples_per_second": 3.837,
414
+ "eval_steps_per_second": 0.496,
415
+ "eval_wer": 1.7211703958691909,
416
+ "step": 1100
417
+ },
418
+ {
419
+ "epoch": 4.536290322580645,
420
+ "grad_norm": 0.0015177098102867603,
421
+ "learning_rate": 5.86e-06,
422
+ "loss": 0.0022,
423
+ "step": 1125
424
+ },
425
+ {
426
+ "epoch": 4.637096774193548,
427
+ "grad_norm": 0.06703988462686539,
428
+ "learning_rate": 5.6933333333333344e-06,
429
+ "loss": 0.0099,
430
+ "step": 1150
431
+ },
432
+ {
433
+ "epoch": 4.737903225806452,
434
+ "grad_norm": 0.0013143372489139438,
435
+ "learning_rate": 5.5266666666666666e-06,
436
+ "loss": 0.0028,
437
+ "step": 1175
438
+ },
439
+ {
440
+ "epoch": 4.838709677419355,
441
+ "grad_norm": 3.9103496074676514,
442
+ "learning_rate": 5.36e-06,
443
+ "loss": 0.0039,
444
+ "step": 1200
445
+ },
446
+ {
447
+ "epoch": 4.838709677419355,
448
+ "eval_loss": 0.0121237151324749,
449
+ "eval_runtime": 54.615,
450
+ "eval_samples_per_second": 3.827,
451
+ "eval_steps_per_second": 0.494,
452
+ "eval_wer": 1.8932874354561102,
453
+ "step": 1200
454
+ },
455
+ {
456
+ "epoch": 4.939516129032258,
457
+ "grad_norm": 0.015102783218026161,
458
+ "learning_rate": 5.193333333333333e-06,
459
+ "loss": 0.0042,
460
+ "step": 1225
461
+ },
462
+ {
463
+ "epoch": 5.040322580645161,
464
+ "grad_norm": 0.0012233111774548888,
465
+ "learning_rate": 5.026666666666667e-06,
466
+ "loss": 0.0013,
467
+ "step": 1250
468
+ },
469
+ {
470
+ "epoch": 5.141129032258065,
471
+ "grad_norm": 0.004319190047681332,
472
+ "learning_rate": 4.86e-06,
473
+ "loss": 0.002,
474
+ "step": 1275
475
+ },
476
+ {
477
+ "epoch": 5.241935483870968,
478
+ "grad_norm": 0.008857857435941696,
479
+ "learning_rate": 4.693333333333334e-06,
480
+ "loss": 0.0017,
481
+ "step": 1300
482
+ },
483
+ {
484
+ "epoch": 5.241935483870968,
485
+ "eval_loss": 0.012567833997309208,
486
+ "eval_runtime": 54.7115,
487
+ "eval_samples_per_second": 3.82,
488
+ "eval_steps_per_second": 0.493,
489
+ "eval_wer": 1.376936316695353,
490
+ "step": 1300
491
+ }
492
+ ],
493
+ "logging_steps": 25,
494
+ "max_steps": 2000,
495
+ "num_input_tokens_seen": 0,
496
+ "num_train_epochs": 9,
497
+ "save_steps": 100,
498
+ "stateful_callbacks": {
499
+ "TrainerControl": {
500
+ "args": {
501
+ "should_epoch_stop": false,
502
+ "should_evaluate": false,
503
+ "should_log": false,
504
+ "should_save": true,
505
+ "should_training_stop": false
506
+ },
507
+ "attributes": {}
508
+ }
509
+ },
510
+ "total_flos": 2.1152036708352e+19,
511
+ "train_batch_size": 16,
512
+ "trial_name": null,
513
+ "trial_params": null
514
+ }
checkpoints/checkpoint-1300/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f7a1fc1f027287c9d1d723e42e6556d0cf6728657bd6a51fde9e5c2d703ba34
3
+ size 5496
checkpoints/checkpoint-1400/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "shreyasdesaisuperU/whisper-medium-attempt2-1000-orders",
3
+ "activation_dropout": 0.0,
4
+ "activation_function": "gelu",
5
+ "apply_spec_augment": false,
6
+ "architectures": [
7
+ "WhisperForConditionalGeneration"
8
+ ],
9
+ "attention_dropout": 0.0,
10
+ "begin_suppress_tokens": null,
11
+ "bos_token_id": 50257,
12
+ "classifier_proj_size": 256,
13
+ "d_model": 1024,
14
+ "decoder_attention_heads": 16,
15
+ "decoder_ffn_dim": 4096,
16
+ "decoder_layerdrop": 0.0,
17
+ "decoder_layers": 24,
18
+ "decoder_start_token_id": 50258,
19
+ "dropout": 0.0,
20
+ "encoder_attention_heads": 16,
21
+ "encoder_ffn_dim": 4096,
22
+ "encoder_layerdrop": 0.0,
23
+ "encoder_layers": 24,
24
+ "eos_token_id": 50257,
25
+ "forced_decoder_ids": [
26
+ [
27
+ 1,
28
+ 50259
29
+ ],
30
+ [
31
+ 2,
32
+ 50359
33
+ ],
34
+ [
35
+ 3,
36
+ 50363
37
+ ]
38
+ ],
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "mask_feature_length": 10,
42
+ "mask_feature_min_masks": 0,
43
+ "mask_feature_prob": 0.0,
44
+ "mask_time_length": 10,
45
+ "mask_time_min_masks": 2,
46
+ "mask_time_prob": 0.05,
47
+ "max_length": null,
48
+ "max_source_positions": 1500,
49
+ "max_target_positions": 448,
50
+ "median_filter_width": 7,
51
+ "model_type": "whisper",
52
+ "num_hidden_layers": 24,
53
+ "num_mel_bins": 80,
54
+ "pad_token_id": 50257,
55
+ "scale_embedding": false,
56
+ "torch_dtype": "float32",
57
+ "transformers_version": "4.46.2",
58
+ "use_cache": true,
59
+ "use_weighted_layer_sum": false,
60
+ "vocab_size": 51865
61
+ }
checkpoints/checkpoint-1400/generation_config.json ADDED
@@ -0,0 +1,240 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alignment_heads": [
3
+ [
4
+ 13,
5
+ 15
6
+ ],
7
+ [
8
+ 15,
9
+ 4
10
+ ],
11
+ [
12
+ 15,
13
+ 15
14
+ ],
15
+ [
16
+ 16,
17
+ 1
18
+ ],
19
+ [
20
+ 20,
21
+ 0
22
+ ],
23
+ [
24
+ 23,
25
+ 4
26
+ ]
27
+ ],
28
+ "begin_suppress_tokens": [
29
+ 220,
30
+ 50257
31
+ ],
32
+ "bos_token_id": 50257,
33
+ "decoder_start_token_id": 50258,
34
+ "eos_token_id": 50257,
35
+ "is_multilingual": true,
36
+ "lang_to_id": {
37
+ "<|af|>": 50327,
38
+ "<|am|>": 50334,
39
+ "<|ar|>": 50272,
40
+ "<|as|>": 50350,
41
+ "<|az|>": 50304,
42
+ "<|ba|>": 50355,
43
+ "<|be|>": 50330,
44
+ "<|bg|>": 50292,
45
+ "<|bn|>": 50302,
46
+ "<|bo|>": 50347,
47
+ "<|br|>": 50309,
48
+ "<|bs|>": 50315,
49
+ "<|ca|>": 50270,
50
+ "<|cs|>": 50283,
51
+ "<|cy|>": 50297,
52
+ "<|da|>": 50285,
53
+ "<|de|>": 50261,
54
+ "<|el|>": 50281,
55
+ "<|en|>": 50259,
56
+ "<|es|>": 50262,
57
+ "<|et|>": 50307,
58
+ "<|eu|>": 50310,
59
+ "<|fa|>": 50300,
60
+ "<|fi|>": 50277,
61
+ "<|fo|>": 50338,
62
+ "<|fr|>": 50265,
63
+ "<|gl|>": 50319,
64
+ "<|gu|>": 50333,
65
+ "<|haw|>": 50352,
66
+ "<|ha|>": 50354,
67
+ "<|he|>": 50279,
68
+ "<|hi|>": 50276,
69
+ "<|hr|>": 50291,
70
+ "<|ht|>": 50339,
71
+ "<|hu|>": 50286,
72
+ "<|hy|>": 50312,
73
+ "<|id|>": 50275,
74
+ "<|is|>": 50311,
75
+ "<|it|>": 50274,
76
+ "<|ja|>": 50266,
77
+ "<|jw|>": 50356,
78
+ "<|ka|>": 50329,
79
+ "<|kk|>": 50316,
80
+ "<|km|>": 50323,
81
+ "<|kn|>": 50306,
82
+ "<|ko|>": 50264,
83
+ "<|la|>": 50294,
84
+ "<|lb|>": 50345,
85
+ "<|ln|>": 50353,
86
+ "<|lo|>": 50336,
87
+ "<|lt|>": 50293,
88
+ "<|lv|>": 50301,
89
+ "<|mg|>": 50349,
90
+ "<|mi|>": 50295,
91
+ "<|mk|>": 50308,
92
+ "<|ml|>": 50296,
93
+ "<|mn|>": 50314,
94
+ "<|mr|>": 50320,
95
+ "<|ms|>": 50282,
96
+ "<|mt|>": 50343,
97
+ "<|my|>": 50346,
98
+ "<|ne|>": 50313,
99
+ "<|nl|>": 50271,
100
+ "<|nn|>": 50342,
101
+ "<|no|>": 50288,
102
+ "<|oc|>": 50328,
103
+ "<|pa|>": 50321,
104
+ "<|pl|>": 50269,
105
+ "<|ps|>": 50340,
106
+ "<|pt|>": 50267,
107
+ "<|ro|>": 50284,
108
+ "<|ru|>": 50263,
109
+ "<|sa|>": 50344,
110
+ "<|sd|>": 50332,
111
+ "<|si|>": 50322,
112
+ "<|sk|>": 50298,
113
+ "<|sl|>": 50305,
114
+ "<|sn|>": 50324,
115
+ "<|so|>": 50326,
116
+ "<|sq|>": 50317,
117
+ "<|sr|>": 50303,
118
+ "<|su|>": 50357,
119
+ "<|sv|>": 50273,
120
+ "<|sw|>": 50318,
121
+ "<|ta|>": 50287,
122
+ "<|te|>": 50299,
123
+ "<|tg|>": 50331,
124
+ "<|th|>": 50289,
125
+ "<|tk|>": 50341,
126
+ "<|tl|>": 50348,
127
+ "<|tr|>": 50268,
128
+ "<|tt|>": 50351,
129
+ "<|uk|>": 50280,
130
+ "<|ur|>": 50290,
131
+ "<|uz|>": 50337,
132
+ "<|vi|>": 50278,
133
+ "<|yi|>": 50335,
134
+ "<|yo|>": 50325,
135
+ "<|zh|>": 50260
136
+ },
137
+ "language": "english",
138
+ "max_initial_timestamp_index": 50,
139
+ "max_length": 448,
140
+ "no_timestamps_token_id": 50363,
141
+ "pad_token_id": 50257,
142
+ "prev_sot_token_id": 50361,
143
+ "return_timestamps": false,
144
+ "suppress_tokens": [
145
+ 1,
146
+ 2,
147
+ 7,
148
+ 8,
149
+ 9,
150
+ 10,
151
+ 14,
152
+ 25,
153
+ 26,
154
+ 27,
155
+ 28,
156
+ 29,
157
+ 31,
158
+ 58,
159
+ 59,
160
+ 60,
161
+ 61,
162
+ 62,
163
+ 63,
164
+ 90,
165
+ 91,
166
+ 92,
167
+ 93,
168
+ 359,
169
+ 503,
170
+ 522,
171
+ 542,
172
+ 873,
173
+ 893,
174
+ 902,
175
+ 918,
176
+ 922,
177
+ 931,
178
+ 1350,
179
+ 1853,
180
+ 1982,
181
+ 2460,
182
+ 2627,
183
+ 3246,
184
+ 3253,
185
+ 3268,
186
+ 3536,
187
+ 3846,
188
+ 3961,
189
+ 4183,
190
+ 4667,
191
+ 6585,
192
+ 6647,
193
+ 7273,
194
+ 9061,
195
+ 9383,
196
+ 10428,
197
+ 10929,
198
+ 11938,
199
+ 12033,
200
+ 12331,
201
+ 12562,
202
+ 13793,
203
+ 14157,
204
+ 14635,
205
+ 15265,
206
+ 15618,
207
+ 16553,
208
+ 16604,
209
+ 18362,
210
+ 18956,
211
+ 20075,
212
+ 21675,
213
+ 22520,
214
+ 26130,
215
+ 26161,
216
+ 26435,
217
+ 28279,
218
+ 29464,
219
+ 31650,
220
+ 32302,
221
+ 32470,
222
+ 36865,
223
+ 42863,
224
+ 47425,
225
+ 49870,
226
+ 50254,
227
+ 50258,
228
+ 50358,
229
+ 50359,
230
+ 50360,
231
+ 50361,
232
+ 50362
233
+ ],
234
+ "task": "transcribe",
235
+ "task_to_id": {
236
+ "transcribe": 50359,
237
+ "translate": 50358
238
+ },
239
+ "transformers_version": "4.46.2"
240
+ }
checkpoints/checkpoint-1400/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e5812665f6cc9021c27a7f1dc523833ff158d05a34b1fccd137a0d86fdb3e1f
3
+ size 3055544304
checkpoints/checkpoint-1400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49569f452c0e16d36e1b8f06c76ead34c7010e0184d6fc8a6afe998e3e212699
3
+ size 6111664103