riteshkr commited on
Commit
0c946e7
1 Parent(s): 41f99a8

Upload checkpoint-600/trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. checkpoint-600/trainer_state.json +237 -0
checkpoint-600/trainer_state.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.5542770299609205,
3
+ "best_model_checkpoint": "./whisper-large-v3-quantized/checkpoint-200",
4
+ "epoch": 0.36877688998156116,
5
+ "eval_steps": 200,
6
+ "global_step": 600,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.015365703749231715,
13
+ "grad_norm": 0.47920289635658264,
14
+ "learning_rate": 5.000000000000001e-07,
15
+ "loss": 1.5655,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.03073140749846343,
20
+ "grad_norm": 0.15030227601528168,
21
+ "learning_rate": 1.0000000000000002e-06,
22
+ "loss": 1.0487,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.046097111247695145,
27
+ "grad_norm": 0.5764341354370117,
28
+ "learning_rate": 1.5e-06,
29
+ "loss": 1.6787,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.06146281499692686,
34
+ "grad_norm": 0.6326006650924683,
35
+ "learning_rate": 2.0000000000000003e-06,
36
+ "loss": 1.3895,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.07682851874615858,
41
+ "grad_norm": 0.5107858180999756,
42
+ "learning_rate": 2.5e-06,
43
+ "loss": 1.5308,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.09219422249539029,
48
+ "grad_norm": 0.4186854362487793,
49
+ "learning_rate": 3e-06,
50
+ "loss": 1.1231,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.107559926244622,
55
+ "grad_norm": 0.580566942691803,
56
+ "learning_rate": 3.5e-06,
57
+ "loss": 1.8924,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.12292562999385372,
62
+ "grad_norm": 0.3107517957687378,
63
+ "learning_rate": 4.000000000000001e-06,
64
+ "loss": 1.3077,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.12292562999385372,
69
+ "eval_loss": 1.5075420141220093,
70
+ "eval_runtime": 620.2783,
71
+ "eval_samples_per_second": 0.656,
72
+ "eval_steps_per_second": 0.082,
73
+ "eval_wer": 0.5542770299609205,
74
+ "step": 200
75
+ },
76
+ {
77
+ "epoch": 0.13829133374308544,
78
+ "grad_norm": 0.6910067796707153,
79
+ "learning_rate": 4.5e-06,
80
+ "loss": 1.8306,
81
+ "step": 225
82
+ },
83
+ {
84
+ "epoch": 0.15365703749231716,
85
+ "grad_norm": 0.04023272544145584,
86
+ "learning_rate": 5e-06,
87
+ "loss": 1.5791,
88
+ "step": 250
89
+ },
90
+ {
91
+ "epoch": 0.16902274124154887,
92
+ "grad_norm": 0.700638473033905,
93
+ "learning_rate": 5.500000000000001e-06,
94
+ "loss": 1.4698,
95
+ "step": 275
96
+ },
97
+ {
98
+ "epoch": 0.18438844499078058,
99
+ "grad_norm": 0.697817325592041,
100
+ "learning_rate": 6e-06,
101
+ "loss": 1.7872,
102
+ "step": 300
103
+ },
104
+ {
105
+ "epoch": 0.1997541487400123,
106
+ "grad_norm": 0.5358330011367798,
107
+ "learning_rate": 6.5000000000000004e-06,
108
+ "loss": 1.1349,
109
+ "step": 325
110
+ },
111
+ {
112
+ "epoch": 0.215119852489244,
113
+ "grad_norm": 0.7219414710998535,
114
+ "learning_rate": 7e-06,
115
+ "loss": 1.4458,
116
+ "step": 350
117
+ },
118
+ {
119
+ "epoch": 0.23048555623847572,
120
+ "grad_norm": 0.5983926653862,
121
+ "learning_rate": 7.500000000000001e-06,
122
+ "loss": 1.3574,
123
+ "step": 375
124
+ },
125
+ {
126
+ "epoch": 0.24585125998770743,
127
+ "grad_norm": 1.1959525346755981,
128
+ "learning_rate": 8.000000000000001e-06,
129
+ "loss": 1.4669,
130
+ "step": 400
131
+ },
132
+ {
133
+ "epoch": 0.24585125998770743,
134
+ "eval_loss": 1.4504855871200562,
135
+ "eval_runtime": 634.3044,
136
+ "eval_samples_per_second": 0.642,
137
+ "eval_steps_per_second": 0.08,
138
+ "eval_wer": 0.5688232739904473,
139
+ "step": 400
140
+ },
141
+ {
142
+ "epoch": 0.26121696373693915,
143
+ "grad_norm": 0.9689226746559143,
144
+ "learning_rate": 8.5e-06,
145
+ "loss": 1.2762,
146
+ "step": 425
147
+ },
148
+ {
149
+ "epoch": 0.2765826674861709,
150
+ "grad_norm": 0.2560756504535675,
151
+ "learning_rate": 9e-06,
152
+ "loss": 1.2975,
153
+ "step": 450
154
+ },
155
+ {
156
+ "epoch": 0.29194837123540257,
157
+ "grad_norm": 1.208335518836975,
158
+ "learning_rate": 9.5e-06,
159
+ "loss": 1.3111,
160
+ "step": 475
161
+ },
162
+ {
163
+ "epoch": 0.3073140749846343,
164
+ "grad_norm": 0.11639931052923203,
165
+ "learning_rate": 1e-05,
166
+ "loss": 1.2754,
167
+ "step": 500
168
+ },
169
+ {
170
+ "epoch": 0.322679778733866,
171
+ "grad_norm": 3.112171173095703,
172
+ "learning_rate": 9.5e-06,
173
+ "loss": 1.4163,
174
+ "step": 525
175
+ },
176
+ {
177
+ "epoch": 0.33804548248309774,
178
+ "grad_norm": 1.0575857162475586,
179
+ "learning_rate": 9e-06,
180
+ "loss": 1.1581,
181
+ "step": 550
182
+ },
183
+ {
184
+ "epoch": 0.3534111862323294,
185
+ "grad_norm": 1.2147361040115356,
186
+ "learning_rate": 8.5e-06,
187
+ "loss": 1.1211,
188
+ "step": 575
189
+ },
190
+ {
191
+ "epoch": 0.36877688998156116,
192
+ "grad_norm": 3.342254161834717,
193
+ "learning_rate": 8.000000000000001e-06,
194
+ "loss": 0.7042,
195
+ "step": 600
196
+ },
197
+ {
198
+ "epoch": 0.36877688998156116,
199
+ "eval_loss": 1.2238162755966187,
200
+ "eval_runtime": 654.1777,
201
+ "eval_samples_per_second": 0.622,
202
+ "eval_steps_per_second": 0.078,
203
+ "eval_wer": 0.5605731654363874,
204
+ "step": 600
205
+ }
206
+ ],
207
+ "logging_steps": 25,
208
+ "max_steps": 1000,
209
+ "num_input_tokens_seen": 0,
210
+ "num_train_epochs": 1,
211
+ "save_steps": 200,
212
+ "stateful_callbacks": {
213
+ "EarlyStoppingCallback": {
214
+ "args": {
215
+ "early_stopping_patience": 3,
216
+ "early_stopping_threshold": 0.0
217
+ },
218
+ "attributes": {
219
+ "early_stopping_patience_counter": 0
220
+ }
221
+ },
222
+ "TrainerControl": {
223
+ "args": {
224
+ "should_epoch_stop": false,
225
+ "should_evaluate": false,
226
+ "should_log": false,
227
+ "should_save": true,
228
+ "should_training_stop": false
229
+ },
230
+ "attributes": {}
231
+ }
232
+ },
233
+ "total_flos": 2.1979496448e+18,
234
+ "train_batch_size": 1,
235
+ "trial_name": null,
236
+ "trial_params": null
237
+ }