fsicoli commited on
Commit
a0d0d95
1 Parent(s): 2a26cc1

End of training

Browse files
README.md CHANGED
@@ -5,7 +5,7 @@ base_model: openai/whisper-large-v3
5
  tags:
6
  - generated_from_trainer
7
  datasets:
8
- - common_voice_18_0
9
  metrics:
10
  - wer
11
  model-index:
@@ -15,15 +15,15 @@ model-index:
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
- name: common_voice_18_0
19
- type: common_voice_18_0
20
  config: pt
21
  split: None
22
  args: pt
23
  metrics:
24
  - name: Wer
25
  type: wer
26
- value: 0.10419602818705957
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -31,10 +31,10 @@ should probably proofread and complete it, then remove this comment. -->
31
 
32
  # whisper-large-v3-pt-3000h-3
33
 
34
- This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the common_voice_18_0 dataset.
35
  It achieves the following results on the evaluation set:
36
- - Loss: 0.2671
37
- - Wer: 0.1042
38
 
39
  ## Model description
40
 
 
5
  tags:
6
  - generated_from_trainer
7
  datasets:
8
+ - fsicoli/common_voice_18_0
9
  metrics:
10
  - wer
11
  model-index:
 
15
  name: Automatic Speech Recognition
16
  type: automatic-speech-recognition
17
  dataset:
18
+ name: fsicoli/common_voice_18_0 pt
19
+ type: fsicoli/common_voice_18_0
20
  config: pt
21
  split: None
22
  args: pt
23
  metrics:
24
  - name: Wer
25
  type: wer
26
+ value: 0.10736707238949392
27
  ---
28
 
29
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
31
 
32
  # whisper-large-v3-pt-3000h-3
33
 
34
+ This model is a fine-tuned version of [openai/whisper-large-v3](https://huggingface.co/openai/whisper-large-v3) on the fsicoli/common_voice_18_0 pt dataset.
35
  It achieves the following results on the evaluation set:
36
+ - Loss: 0.1501
37
+ - Wer: 0.1074
38
 
39
  ## Model description
40
 
all_results.json CHANGED
@@ -1,15 +1,15 @@
1
  {
2
- "epoch": 1.999819135467535,
3
- "eval_loss": 0.14780554175376892,
4
- "eval_runtime": 18406.7999,
5
  "eval_samples": 9494,
6
- "eval_samples_per_second": 0.516,
7
- "eval_steps_per_second": 0.516,
8
- "eval_wer": 0.10174567584881486,
9
- "total_flos": 1.5025096510930944e+20,
10
- "train_loss": 0.0,
11
- "train_runtime": 0.2812,
12
  "train_samples": 22116,
13
- "train_samples_per_second": 157281.733,
14
- "train_steps_per_second": 4914.165
15
  }
 
1
  {
2
+ "epoch": 9.99638336347197,
3
+ "eval_loss": 0.1500573754310608,
4
+ "eval_runtime": 50421.184,
5
  "eval_samples": 9494,
6
+ "eval_samples_per_second": 0.188,
7
+ "eval_steps_per_second": 0.024,
8
+ "eval_wer": 0.10736707238949392,
9
+ "total_flos": 7.51132515607511e+20,
10
+ "train_loss": 0.05691683961323822,
11
+ "train_runtime": 650193.1322,
12
  "train_samples": 22116,
13
+ "train_samples_per_second": 0.34,
14
+ "train_steps_per_second": 0.011
15
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.999819135467535,
3
- "eval_loss": 0.14780554175376892,
4
- "eval_runtime": 18406.7999,
5
  "eval_samples": 9494,
6
- "eval_samples_per_second": 0.516,
7
- "eval_steps_per_second": 0.516,
8
- "eval_wer": 0.10174567584881486
9
  }
 
1
  {
2
+ "epoch": 9.99638336347197,
3
+ "eval_loss": 0.1500573754310608,
4
+ "eval_runtime": 50421.184,
5
  "eval_samples": 9494,
6
+ "eval_samples_per_second": 0.188,
7
+ "eval_steps_per_second": 0.024,
8
+ "eval_wer": 0.10736707238949392
9
  }
runs/Aug20_11-34-00_DITEC2014063010/events.out.tfevents.1724865406.DITEC2014063010.27248.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e41817bbf37be9c0b8489bfb806165aa585307cc8379119344925e309d89fc8
3
+ size 406
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.999819135467535,
3
- "total_flos": 1.5025096510930944e+20,
4
- "train_loss": 0.0,
5
- "train_runtime": 0.2812,
6
  "train_samples": 22116,
7
- "train_samples_per_second": 157281.733,
8
- "train_steps_per_second": 4914.165
9
  }
 
1
  {
2
+ "epoch": 9.99638336347197,
3
+ "total_flos": 7.51132515607511e+20,
4
+ "train_loss": 0.05691683961323822,
5
+ "train_runtime": 650193.1322,
6
  "train_samples": 22116,
7
+ "train_samples_per_second": 0.34,
8
+ "train_steps_per_second": 0.011
9
  }
trainer_state.json CHANGED
@@ -1,421 +1,2049 @@
1
  {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.999819135467535,
5
  "eval_steps": 500,
6
- "global_step": 1382,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.03617290649303671,
13
- "grad_norm": 5.598344326019287,
14
- "learning_rate": 1.0000000000000001e-07,
15
- "loss": 0.8052,
16
  "step": 25
17
  },
18
  {
19
- "epoch": 0.07234581298607343,
20
- "grad_norm": 15.454778671264648,
21
- "learning_rate": 2.0000000000000002e-07,
22
- "loss": 1.3834,
23
  "step": 50
24
  },
25
  {
26
- "epoch": 0.10851871947911014,
27
- "grad_norm": 6.023103713989258,
28
- "learning_rate": 3.0000000000000004e-07,
29
- "loss": 0.6989,
30
  "step": 75
31
  },
32
  {
33
- "epoch": 0.14469162597214685,
34
- "grad_norm": 14.213159561157227,
35
- "learning_rate": 4.0000000000000003e-07,
36
- "loss": 0.8097,
37
  "step": 100
38
  },
39
  {
40
- "epoch": 0.18086453246518358,
41
- "grad_norm": 3.366466999053955,
42
- "learning_rate": 5.000000000000001e-07,
43
- "loss": 0.2367,
44
  "step": 125
45
  },
46
  {
47
- "epoch": 0.21703743895822028,
48
- "grad_norm": 15.736528396606445,
49
- "learning_rate": 6.000000000000001e-07,
50
- "loss": 0.3249,
51
  "step": 150
52
  },
53
  {
54
- "epoch": 0.25321034545125704,
55
- "grad_norm": 2.5983786582946777,
56
- "learning_rate": 7.000000000000001e-07,
57
- "loss": 0.1757,
58
  "step": 175
59
  },
60
  {
61
- "epoch": 0.2893832519442937,
62
- "grad_norm": 10.108122825622559,
63
- "learning_rate": 8.000000000000001e-07,
64
- "loss": 0.2398,
65
  "step": 200
66
  },
67
  {
68
- "epoch": 0.32555615843733043,
69
- "grad_norm": 2.5293455123901367,
70
- "learning_rate": 9.000000000000001e-07,
71
- "loss": 0.1667,
72
  "step": 225
73
  },
74
  {
75
- "epoch": 0.36172906493036716,
76
- "grad_norm": 9.013643264770508,
77
- "learning_rate": 1.0000000000000002e-06,
78
- "loss": 0.22,
79
  "step": 250
80
  },
81
  {
82
- "epoch": 0.3979019714234039,
83
- "grad_norm": 3.5457277297973633,
84
- "learning_rate": 1.1e-06,
85
- "loss": 0.168,
86
  "step": 275
87
  },
88
  {
89
- "epoch": 0.43407487791644056,
90
- "grad_norm": 6.44632625579834,
91
- "learning_rate": 1.2000000000000002e-06,
92
- "loss": 0.2021,
93
  "step": 300
94
  },
95
  {
96
- "epoch": 0.4702477844094773,
97
- "grad_norm": 2.5131373405456543,
98
- "learning_rate": 1.3e-06,
99
- "loss": 0.1573,
100
  "step": 325
101
  },
102
  {
103
- "epoch": 0.5064206909025141,
104
- "grad_norm": 10.056652069091797,
105
- "learning_rate": 1.4000000000000001e-06,
106
- "loss": 0.1839,
107
  "step": 350
108
  },
109
  {
110
- "epoch": 0.5425935973955507,
111
- "grad_norm": 1.9256174564361572,
112
- "learning_rate": 1.5e-06,
113
- "loss": 0.1432,
114
  "step": 375
115
  },
116
  {
117
- "epoch": 0.5787665038885874,
118
- "grad_norm": 5.919682025909424,
119
- "learning_rate": 1.6000000000000001e-06,
120
- "loss": 0.178,
121
  "step": 400
122
  },
123
  {
124
- "epoch": 0.6149394103816241,
125
- "grad_norm": 2.477151393890381,
126
- "learning_rate": 1.7000000000000002e-06,
127
- "loss": 0.1377,
128
  "step": 425
129
  },
130
  {
131
- "epoch": 0.6511123168746609,
132
- "grad_norm": 6.950107097625732,
133
- "learning_rate": 1.8000000000000001e-06,
134
- "loss": 0.1865,
135
  "step": 450
136
  },
137
  {
138
- "epoch": 0.6872852233676976,
139
- "grad_norm": 2.580976963043213,
140
- "learning_rate": 1.9000000000000002e-06,
141
- "loss": 0.1296,
142
  "step": 475
143
  },
144
  {
145
- "epoch": 0.7234581298607343,
146
- "grad_norm": 6.907202243804932,
147
- "learning_rate": 2.0000000000000003e-06,
148
- "loss": 0.1769,
149
  "step": 500
150
  },
151
  {
152
- "epoch": 0.759631036353771,
153
- "grad_norm": 2.5415403842926025,
154
- "learning_rate": 2.1000000000000002e-06,
155
- "loss": 0.1345,
156
  "step": 525
157
  },
158
  {
159
- "epoch": 0.7958039428468078,
160
- "grad_norm": 8.191935539245605,
161
- "learning_rate": 2.2e-06,
162
- "loss": 0.1727,
163
  "step": 550
164
  },
165
  {
166
- "epoch": 0.8319768493398445,
167
- "grad_norm": 4.426870822906494,
168
- "learning_rate": 2.3000000000000004e-06,
169
- "loss": 0.1267,
170
  "step": 575
171
  },
172
  {
173
- "epoch": 0.8681497558328811,
174
- "grad_norm": 5.906033515930176,
175
- "learning_rate": 2.4000000000000003e-06,
176
- "loss": 0.1617,
177
  "step": 600
178
  },
179
  {
180
- "epoch": 0.9043226623259178,
181
- "grad_norm": 2.5345969200134277,
182
- "learning_rate": 2.5e-06,
183
- "loss": 0.1444,
184
  "step": 625
185
  },
186
  {
187
- "epoch": 0.9404955688189546,
188
- "grad_norm": 5.994137287139893,
189
- "learning_rate": 2.6e-06,
190
- "loss": 0.1766,
191
  "step": 650
192
  },
193
  {
194
- "epoch": 0.9766684753119913,
195
- "grad_norm": 3.034348249435425,
196
- "learning_rate": 2.7000000000000004e-06,
197
- "loss": 0.13,
198
  "step": 675
199
  },
200
  {
201
- "epoch": 0.9998191354675349,
202
- "eval_loss": 0.1486499309539795,
203
- "eval_runtime": 18037.1186,
204
- "eval_samples_per_second": 0.526,
205
- "eval_steps_per_second": 0.526,
206
- "eval_wer": 0.10366752081998719,
207
  "step": 691
208
  },
209
  {
210
- "epoch": 1.0128413818050281,
211
- "grad_norm": 2.1960830688476562,
212
- "learning_rate": 2.8000000000000003e-06,
213
- "loss": 0.1467,
214
  "step": 700
215
  },
216
  {
217
- "epoch": 1.0490142882980646,
218
- "grad_norm": 2.957355260848999,
219
- "learning_rate": 2.9e-06,
220
- "loss": 0.1113,
221
  "step": 725
222
  },
223
  {
224
- "epoch": 1.0851871947911014,
225
- "grad_norm": 1.6470922231674194,
226
- "learning_rate": 3e-06,
227
- "loss": 0.1469,
228
  "step": 750
229
  },
230
  {
231
- "epoch": 1.121360101284138,
232
- "grad_norm": 3.09590220451355,
233
- "learning_rate": 3.1000000000000004e-06,
234
- "loss": 0.1034,
235
  "step": 775
236
  },
237
  {
238
- "epoch": 1.1575330077771748,
239
- "grad_norm": 2.3706791400909424,
240
- "learning_rate": 3.2000000000000003e-06,
241
- "loss": 0.1463,
242
  "step": 800
243
  },
244
  {
245
- "epoch": 1.1937059142702116,
246
- "grad_norm": 1.995174765586853,
247
- "learning_rate": 3.3000000000000006e-06,
248
- "loss": 0.1004,
249
  "step": 825
250
  },
251
  {
252
- "epoch": 1.2298788207632483,
253
- "grad_norm": 1.665616750717163,
254
- "learning_rate": 3.4000000000000005e-06,
255
- "loss": 0.1297,
256
  "step": 850
257
  },
258
  {
259
- "epoch": 1.266051727256285,
260
- "grad_norm": 2.44356369972229,
261
- "learning_rate": 3.5e-06,
262
- "loss": 0.1112,
263
  "step": 875
264
  },
265
  {
266
- "epoch": 1.3022246337493217,
267
- "grad_norm": 1.8907896280288696,
268
- "learning_rate": 3.6000000000000003e-06,
269
- "loss": 0.1379,
270
  "step": 900
271
  },
272
  {
273
- "epoch": 1.3383975402423585,
274
- "grad_norm": 2.632768154144287,
275
- "learning_rate": 3.7e-06,
276
- "loss": 0.1011,
277
  "step": 925
278
  },
279
  {
280
- "epoch": 1.3745704467353952,
281
- "grad_norm": 1.6871304512023926,
282
- "learning_rate": 3.8000000000000005e-06,
283
- "loss": 0.133,
284
  "step": 950
285
  },
286
  {
287
- "epoch": 1.410743353228432,
288
- "grad_norm": 2.9852051734924316,
289
- "learning_rate": 3.900000000000001e-06,
290
- "loss": 0.1126,
291
  "step": 975
292
  },
293
  {
294
- "epoch": 1.4469162597214686,
295
- "grad_norm": 2.0160508155822754,
296
- "learning_rate": 4.000000000000001e-06,
297
- "loss": 0.1361,
298
  "step": 1000
299
  },
300
  {
301
- "epoch": 1.4832700307469704,
302
- "grad_norm": 2.4528961181640625,
303
- "learning_rate": 9.345549738219896e-06,
304
- "loss": 0.1073,
305
  "step": 1025
306
  },
307
  {
308
- "epoch": 1.5194429372400071,
309
- "grad_norm": 3.373425006866455,
310
- "learning_rate": 8.691099476439791e-06,
311
- "loss": 0.1388,
312
  "step": 1050
313
  },
314
  {
315
- "epoch": 1.5556158437330438,
316
- "grad_norm": 2.545165538787842,
317
- "learning_rate": 8.036649214659686e-06,
318
- "loss": 0.1101,
319
  "step": 1075
320
  },
321
  {
322
- "epoch": 1.5917887502260806,
323
- "grad_norm": 1.6243386268615723,
324
- "learning_rate": 7.382198952879581e-06,
325
- "loss": 0.1339,
326
  "step": 1100
327
  },
328
  {
329
- "epoch": 1.6279616567191173,
330
- "grad_norm": 3.5856447219848633,
331
- "learning_rate": 6.727748691099477e-06,
332
- "loss": 0.1069,
333
  "step": 1125
334
  },
335
  {
336
- "epoch": 1.664134563212154,
337
- "grad_norm": 1.4985790252685547,
338
- "learning_rate": 6.073298429319372e-06,
339
- "loss": 0.1331,
340
  "step": 1150
341
  },
342
  {
343
- "epoch": 1.7003074697051908,
344
- "grad_norm": 3.4537761211395264,
345
- "learning_rate": 5.418848167539268e-06,
346
- "loss": 0.0903,
347
  "step": 1175
348
  },
349
  {
350
- "epoch": 1.7364803761982275,
351
- "grad_norm": 2.7335891723632812,
352
- "learning_rate": 4.764397905759163e-06,
353
- "loss": 0.1207,
354
  "step": 1200
355
  },
356
  {
357
- "epoch": 1.7726532826912642,
358
- "grad_norm": 2.135629892349243,
359
- "learning_rate": 4.109947643979058e-06,
360
- "loss": 0.0948,
361
  "step": 1225
362
  },
363
  {
364
- "epoch": 1.808826189184301,
365
- "grad_norm": 1.6300333738327026,
366
- "learning_rate": 3.455497382198953e-06,
367
- "loss": 0.1099,
368
  "step": 1250
369
  },
370
  {
371
- "epoch": 1.8449990956773377,
372
- "grad_norm": 2.582153797149658,
373
- "learning_rate": 2.8010471204188483e-06,
374
- "loss": 0.0896,
375
  "step": 1275
376
  },
377
  {
378
- "epoch": 1.8811720021703744,
379
- "grad_norm": 1.5254755020141602,
380
- "learning_rate": 2.1465968586387435e-06,
381
- "loss": 0.0942,
382
  "step": 1300
383
  },
384
  {
385
- "epoch": 1.9173449086634111,
386
- "grad_norm": 3.2325875759124756,
387
- "learning_rate": 1.4921465968586387e-06,
388
- "loss": 0.0932,
389
  "step": 1325
390
  },
391
  {
392
- "epoch": 1.9535178151564478,
393
- "grad_norm": 1.1424560546875,
394
- "learning_rate": 8.376963350785341e-07,
395
- "loss": 0.1005,
396
  "step": 1350
397
  },
398
  {
399
- "epoch": 1.9896907216494846,
400
- "grad_norm": 3.229748010635376,
401
- "learning_rate": 1.8324607329842932e-07,
402
- "loss": 0.0844,
403
  "step": 1375
404
  },
405
  {
406
- "epoch": 1.999819135467535,
407
- "step": 1382,
408
- "total_flos": 1.5025096510930944e+20,
409
- "train_loss": 0.0,
410
- "train_runtime": 0.2812,
411
- "train_samples_per_second": 157281.733,
412
- "train_steps_per_second": 4914.165
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
413
  }
414
  ],
415
  "logging_steps": 25,
416
- "max_steps": 1382,
417
  "num_input_tokens_seen": 0,
418
- "num_train_epochs": 2,
419
  "save_steps": 500,
420
  "stateful_callbacks": {
421
  "TrainerControl": {
@@ -429,8 +2057,8 @@
429
  "attributes": {}
430
  }
431
  },
432
- "total_flos": 1.5025096510930944e+20,
433
- "train_batch_size": 1,
434
  "trial_name": null,
435
  "trial_params": null
436
  }
 
1
  {
2
+ "best_metric": 0.1500573754310608,
3
+ "best_model_checkpoint": "d:\\\\whisper-large-v3-pt-3000h-3\\checkpoint-691",
4
+ "epoch": 9.99638336347197,
5
  "eval_steps": 500,
6
+ "global_step": 6910,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03616636528028933,
13
+ "grad_norm": 5.811695098876953,
14
+ "learning_rate": 2.5000000000000004e-07,
15
+ "loss": 0.7755,
16
  "step": 25
17
  },
18
  {
19
+ "epoch": 0.07233273056057866,
20
+ "grad_norm": Infinity,
21
+ "learning_rate": 4.900000000000001e-07,
22
+ "loss": 1.2265,
23
  "step": 50
24
  },
25
  {
26
+ "epoch": 0.10849909584086799,
27
+ "grad_norm": 3.2242519855499268,
28
+ "learning_rate": 7.4e-07,
29
+ "loss": 0.3546,
30
  "step": 75
31
  },
32
  {
33
+ "epoch": 0.14466546112115733,
34
+ "grad_norm": 11.40543270111084,
35
+ "learning_rate": 9.9e-07,
36
+ "loss": 0.2988,
37
  "step": 100
38
  },
39
  {
40
+ "epoch": 0.18083182640144665,
41
+ "grad_norm": 2.457425117492676,
42
+ "learning_rate": 1.2400000000000002e-06,
43
+ "loss": 0.174,
44
  "step": 125
45
  },
46
  {
47
+ "epoch": 0.21699819168173598,
48
+ "grad_norm": 10.275086402893066,
49
+ "learning_rate": 1.4900000000000001e-06,
50
+ "loss": 0.2317,
51
  "step": 150
52
  },
53
  {
54
+ "epoch": 0.25316455696202533,
55
+ "grad_norm": 2.2328078746795654,
56
+ "learning_rate": 1.74e-06,
57
+ "loss": 0.1487,
58
  "step": 175
59
  },
60
  {
61
+ "epoch": 0.28933092224231466,
62
+ "grad_norm": 8.097187995910645,
63
+ "learning_rate": 1.9900000000000004e-06,
64
+ "loss": 0.205,
65
  "step": 200
66
  },
67
  {
68
+ "epoch": 0.325497287522604,
69
+ "grad_norm": 2.458644390106201,
70
+ "learning_rate": 2.24e-06,
71
+ "loss": 0.1509,
72
  "step": 225
73
  },
74
  {
75
+ "epoch": 0.3616636528028933,
76
+ "grad_norm": 8.181588172912598,
77
+ "learning_rate": 2.4900000000000003e-06,
78
+ "loss": 0.2057,
79
  "step": 250
80
  },
81
  {
82
+ "epoch": 0.39783001808318263,
83
+ "grad_norm": 2.0923259258270264,
84
+ "learning_rate": 2.7400000000000004e-06,
85
+ "loss": 0.1579,
86
  "step": 275
87
  },
88
  {
89
+ "epoch": 0.43399638336347196,
90
+ "grad_norm": 5.871222496032715,
91
+ "learning_rate": 2.99e-06,
92
+ "loss": 0.188,
93
  "step": 300
94
  },
95
  {
96
+ "epoch": 0.4701627486437613,
97
+ "grad_norm": 2.132258653640747,
98
+ "learning_rate": 3.2400000000000003e-06,
99
+ "loss": 0.1513,
100
  "step": 325
101
  },
102
  {
103
+ "epoch": 0.5063291139240507,
104
+ "grad_norm": 9.259086608886719,
105
+ "learning_rate": 3.49e-06,
106
+ "loss": 0.1773,
107
  "step": 350
108
  },
109
  {
110
+ "epoch": 0.5424954792043399,
111
+ "grad_norm": 2.059004783630371,
112
+ "learning_rate": 3.74e-06,
113
+ "loss": 0.1469,
114
  "step": 375
115
  },
116
  {
117
+ "epoch": 0.5786618444846293,
118
+ "grad_norm": 6.2740302085876465,
119
+ "learning_rate": 3.990000000000001e-06,
120
+ "loss": 0.1765,
121
  "step": 400
122
  },
123
  {
124
+ "epoch": 0.6148282097649186,
125
+ "grad_norm": 2.315974473953247,
126
+ "learning_rate": 4.24e-06,
127
+ "loss": 0.1364,
128
  "step": 425
129
  },
130
  {
131
+ "epoch": 0.650994575045208,
132
+ "grad_norm": 6.675321102142334,
133
+ "learning_rate": 4.49e-06,
134
+ "loss": 0.1921,
135
  "step": 450
136
  },
137
  {
138
+ "epoch": 0.6871609403254972,
139
+ "grad_norm": 2.4726405143737793,
140
+ "learning_rate": 4.74e-06,
141
+ "loss": 0.1365,
142
  "step": 475
143
  },
144
  {
145
+ "epoch": 0.7233273056057866,
146
+ "grad_norm": 7.037698745727539,
147
+ "learning_rate": 4.9900000000000005e-06,
148
+ "loss": 0.1792,
149
  "step": 500
150
  },
151
  {
152
+ "epoch": 0.759493670886076,
153
+ "grad_norm": 2.1146976947784424,
154
+ "learning_rate": 5.240000000000001e-06,
155
+ "loss": 0.1347,
156
  "step": 525
157
  },
158
  {
159
+ "epoch": 0.7956600361663653,
160
+ "grad_norm": 4.760638236999512,
161
+ "learning_rate": 5.490000000000001e-06,
162
+ "loss": 0.1702,
163
  "step": 550
164
  },
165
  {
166
+ "epoch": 0.8318264014466547,
167
+ "grad_norm": 3.498173952102661,
168
+ "learning_rate": 5.74e-06,
169
+ "loss": 0.132,
170
  "step": 575
171
  },
172
  {
173
+ "epoch": 0.8679927667269439,
174
+ "grad_norm": 6.879450798034668,
175
+ "learning_rate": 5.99e-06,
176
+ "loss": 0.1589,
177
  "step": 600
178
  },
179
  {
180
+ "epoch": 0.9041591320072333,
181
+ "grad_norm": 2.3761587142944336,
182
+ "learning_rate": 6.24e-06,
183
+ "loss": 0.15,
184
  "step": 625
185
  },
186
  {
187
+ "epoch": 0.9403254972875226,
188
+ "grad_norm": 5.599714756011963,
189
+ "learning_rate": 6.4900000000000005e-06,
190
+ "loss": 0.1801,
191
  "step": 650
192
  },
193
  {
194
+ "epoch": 0.976491862567812,
195
+ "grad_norm": 3.146120071411133,
196
+ "learning_rate": 6.740000000000001e-06,
197
+ "loss": 0.1388,
198
  "step": 675
199
  },
200
  {
201
+ "epoch": 0.9996383363471971,
202
+ "eval_loss": 0.1500573754310608,
203
+ "eval_runtime": 40633.1057,
204
+ "eval_samples_per_second": 0.234,
205
+ "eval_steps_per_second": 0.029,
206
+ "eval_wer": 0.10736707238949392,
207
  "step": 691
208
  },
209
  {
210
+ "epoch": 1.0126582278481013,
211
+ "grad_norm": 1.3991518020629883,
212
+ "learning_rate": 6.99e-06,
213
+ "loss": 0.1468,
214
  "step": 700
215
  },
216
  {
217
+ "epoch": 1.0488245931283906,
218
+ "grad_norm": 2.3540124893188477,
219
+ "learning_rate": 7.24e-06,
220
+ "loss": 0.0971,
221
  "step": 725
222
  },
223
  {
224
+ "epoch": 1.0849909584086799,
225
+ "grad_norm": 1.6329786777496338,
226
+ "learning_rate": 7.49e-06,
227
+ "loss": 0.1155,
228
  "step": 750
229
  },
230
  {
231
+ "epoch": 1.1211573236889691,
232
+ "grad_norm": 2.5000386238098145,
233
+ "learning_rate": 7.74e-06,
234
+ "loss": 0.0903,
235
  "step": 775
236
  },
237
  {
238
+ "epoch": 1.1573236889692586,
239
+ "grad_norm": 1.3056849241256714,
240
+ "learning_rate": 7.990000000000001e-06,
241
+ "loss": 0.1255,
242
  "step": 800
243
  },
244
  {
245
+ "epoch": 1.193490054249548,
246
+ "grad_norm": 2.1964187622070312,
247
+ "learning_rate": 8.24e-06,
248
+ "loss": 0.0987,
249
  "step": 825
250
  },
251
  {
252
+ "epoch": 1.2296564195298372,
253
+ "grad_norm": 1.5802552700042725,
254
+ "learning_rate": 8.49e-06,
255
+ "loss": 0.1393,
256
  "step": 850
257
  },
258
  {
259
+ "epoch": 1.2658227848101267,
260
+ "grad_norm": 1.7570548057556152,
261
+ "learning_rate": 8.740000000000001e-06,
262
+ "loss": 0.1094,
263
  "step": 875
264
  },
265
  {
266
+ "epoch": 1.301989150090416,
267
+ "grad_norm": 1.5590579509735107,
268
+ "learning_rate": 8.99e-06,
269
+ "loss": 0.1276,
270
  "step": 900
271
  },
272
  {
273
+ "epoch": 1.3381555153707052,
274
+ "grad_norm": 2.429781913757324,
275
+ "learning_rate": 9.240000000000001e-06,
276
+ "loss": 0.1023,
277
  "step": 925
278
  },
279
  {
280
+ "epoch": 1.3743218806509945,
281
+ "grad_norm": 2.3684589862823486,
282
+ "learning_rate": 9.49e-06,
283
+ "loss": 0.1372,
284
  "step": 950
285
  },
286
  {
287
+ "epoch": 1.410488245931284,
288
+ "grad_norm": 5.570235252380371,
289
+ "learning_rate": 9.74e-06,
290
+ "loss": 0.1122,
291
  "step": 975
292
  },
293
  {
294
+ "epoch": 1.4466546112115732,
295
+ "grad_norm": 2.2822372913360596,
296
+ "learning_rate": 9.990000000000001e-06,
297
+ "loss": 0.1463,
298
  "step": 1000
299
  },
300
  {
301
+ "epoch": 1.4828209764918625,
302
+ "grad_norm": 2.185589075088501,
303
+ "learning_rate": 9.959390862944164e-06,
304
+ "loss": 0.1055,
305
  "step": 1025
306
  },
307
  {
308
+ "epoch": 1.518987341772152,
309
+ "grad_norm": 2.054370164871216,
310
+ "learning_rate": 9.917089678511e-06,
311
+ "loss": 0.1426,
312
  "step": 1050
313
  },
314
  {
315
+ "epoch": 1.5551537070524413,
316
+ "grad_norm": 2.4509952068328857,
317
+ "learning_rate": 9.874788494077835e-06,
318
+ "loss": 0.0985,
319
  "step": 1075
320
  },
321
  {
322
+ "epoch": 1.5913200723327305,
323
+ "grad_norm": 1.7492316961288452,
324
+ "learning_rate": 9.83248730964467e-06,
325
+ "loss": 0.1553,
326
  "step": 1100
327
  },
328
  {
329
+ "epoch": 1.6274864376130198,
330
+ "grad_norm": 2.5610837936401367,
331
+ "learning_rate": 9.790186125211507e-06,
332
+ "loss": 0.1235,
333
  "step": 1125
334
  },
335
  {
336
+ "epoch": 1.663652802893309,
337
+ "grad_norm": 1.8614284992218018,
338
+ "learning_rate": 9.747884940778342e-06,
339
+ "loss": 0.1524,
340
  "step": 1150
341
  },
342
  {
343
+ "epoch": 1.6998191681735986,
344
+ "grad_norm": 3.679600954055786,
345
+ "learning_rate": 9.705583756345178e-06,
346
+ "loss": 0.1237,
347
  "step": 1175
348
  },
349
  {
350
+ "epoch": 1.7359855334538878,
351
+ "grad_norm": 1.617661952972412,
352
+ "learning_rate": 9.663282571912015e-06,
353
+ "loss": 0.1341,
354
  "step": 1200
355
  },
356
  {
357
+ "epoch": 1.7721518987341773,
358
+ "grad_norm": 2.3510446548461914,
359
+ "learning_rate": 9.62098138747885e-06,
360
+ "loss": 0.1232,
361
  "step": 1225
362
  },
363
  {
364
+ "epoch": 1.8083182640144666,
365
+ "grad_norm": 1.9561399221420288,
366
+ "learning_rate": 9.578680203045685e-06,
367
+ "loss": 0.1294,
368
  "step": 1250
369
  },
370
  {
371
+ "epoch": 1.8444846292947559,
372
+ "grad_norm": 3.439204216003418,
373
+ "learning_rate": 9.536379018612522e-06,
374
+ "loss": 0.1119,
375
  "step": 1275
376
  },
377
  {
378
+ "epoch": 1.8806509945750451,
379
+ "grad_norm": 2.9221997261047363,
380
+ "learning_rate": 9.494077834179358e-06,
381
+ "loss": 0.1494,
382
  "step": 1300
383
  },
384
  {
385
+ "epoch": 1.9168173598553344,
386
+ "grad_norm": 2.623828411102295,
387
+ "learning_rate": 9.451776649746193e-06,
388
+ "loss": 0.1109,
389
  "step": 1325
390
  },
391
  {
392
+ "epoch": 1.952983725135624,
393
+ "grad_norm": 1.5950006246566772,
394
+ "learning_rate": 9.40947546531303e-06,
395
+ "loss": 0.134,
396
  "step": 1350
397
  },
398
  {
399
+ "epoch": 1.9891500904159132,
400
+ "grad_norm": 2.305098533630371,
401
+ "learning_rate": 9.367174280879865e-06,
402
+ "loss": 0.108,
403
  "step": 1375
404
  },
405
  {
406
+ "epoch": 1.9992766726943942,
407
+ "eval_loss": 0.16191039979457855,
408
+ "eval_runtime": 48873.7373,
409
+ "eval_samples_per_second": 0.194,
410
+ "eval_steps_per_second": 0.024,
411
+ "eval_wer": 0.11531069827033953,
412
+ "step": 1382
413
+ },
414
+ {
415
+ "epoch": 2.0253164556962027,
416
+ "grad_norm": 1.2765154838562012,
417
+ "learning_rate": 9.324873096446702e-06,
418
+ "loss": 0.1064,
419
+ "step": 1400
420
+ },
421
+ {
422
+ "epoch": 2.061482820976492,
423
+ "grad_norm": 2.579493999481201,
424
+ "learning_rate": 9.282571912013538e-06,
425
+ "loss": 0.0676,
426
+ "step": 1425
427
+ },
428
+ {
429
+ "epoch": 2.097649186256781,
430
+ "grad_norm": 1.4855302572250366,
431
+ "learning_rate": 9.240270727580373e-06,
432
+ "loss": 0.0733,
433
+ "step": 1450
434
+ },
435
+ {
436
+ "epoch": 2.1338155515370705,
437
+ "grad_norm": 1.7606139183044434,
438
+ "learning_rate": 9.19796954314721e-06,
439
+ "loss": 0.069,
440
+ "step": 1475
441
+ },
442
+ {
443
+ "epoch": 2.1699819168173597,
444
+ "grad_norm": 1.383319616317749,
445
+ "learning_rate": 9.155668358714045e-06,
446
+ "loss": 0.069,
447
+ "step": 1500
448
+ },
449
+ {
450
+ "epoch": 2.206148282097649,
451
+ "grad_norm": 3.2015483379364014,
452
+ "learning_rate": 9.11336717428088e-06,
453
+ "loss": 0.0725,
454
+ "step": 1525
455
+ },
456
+ {
457
+ "epoch": 2.2423146473779383,
458
+ "grad_norm": 2.2851688861846924,
459
+ "learning_rate": 9.071065989847716e-06,
460
+ "loss": 0.0743,
461
+ "step": 1550
462
+ },
463
+ {
464
+ "epoch": 2.278481012658228,
465
+ "grad_norm": 3.721728801727295,
466
+ "learning_rate": 9.028764805414553e-06,
467
+ "loss": 0.0704,
468
+ "step": 1575
469
+ },
470
+ {
471
+ "epoch": 2.3146473779385173,
472
+ "grad_norm": 1.077505111694336,
473
+ "learning_rate": 8.986463620981388e-06,
474
+ "loss": 0.0743,
475
+ "step": 1600
476
+ },
477
+ {
478
+ "epoch": 2.3508137432188065,
479
+ "grad_norm": 2.2788355350494385,
480
+ "learning_rate": 8.944162436548224e-06,
481
+ "loss": 0.071,
482
+ "step": 1625
483
+ },
484
+ {
485
+ "epoch": 2.386980108499096,
486
+ "grad_norm": 3.0096864700317383,
487
+ "learning_rate": 8.901861252115061e-06,
488
+ "loss": 0.0808,
489
+ "step": 1650
490
+ },
491
+ {
492
+ "epoch": 2.423146473779385,
493
+ "grad_norm": 3.3069839477539062,
494
+ "learning_rate": 8.859560067681896e-06,
495
+ "loss": 0.0731,
496
+ "step": 1675
497
+ },
498
+ {
499
+ "epoch": 2.4593128390596743,
500
+ "grad_norm": 1.791975498199463,
501
+ "learning_rate": 8.817258883248732e-06,
502
+ "loss": 0.0788,
503
+ "step": 1700
504
+ },
505
+ {
506
+ "epoch": 2.495479204339964,
507
+ "grad_norm": 1.9154149293899536,
508
+ "learning_rate": 8.774957698815567e-06,
509
+ "loss": 0.0737,
510
+ "step": 1725
511
+ },
512
+ {
513
+ "epoch": 2.5316455696202533,
514
+ "grad_norm": 1.7026249170303345,
515
+ "learning_rate": 8.732656514382404e-06,
516
+ "loss": 0.0821,
517
+ "step": 1750
518
+ },
519
+ {
520
+ "epoch": 2.5678119349005426,
521
+ "grad_norm": 1.5672781467437744,
522
+ "learning_rate": 8.69035532994924e-06,
523
+ "loss": 0.0708,
524
+ "step": 1775
525
+ },
526
+ {
527
+ "epoch": 2.603978300180832,
528
+ "grad_norm": 1.1428169012069702,
529
+ "learning_rate": 8.648054145516075e-06,
530
+ "loss": 0.0825,
531
+ "step": 1800
532
+ },
533
+ {
534
+ "epoch": 2.640144665461121,
535
+ "grad_norm": 4.432084560394287,
536
+ "learning_rate": 8.60575296108291e-06,
537
+ "loss": 0.0749,
538
+ "step": 1825
539
+ },
540
+ {
541
+ "epoch": 2.6763110307414104,
542
+ "grad_norm": 2.4532995223999023,
543
+ "learning_rate": 8.563451776649747e-06,
544
+ "loss": 0.0856,
545
+ "step": 1850
546
+ },
547
+ {
548
+ "epoch": 2.7124773960216997,
549
+ "grad_norm": 2.286349296569824,
550
+ "learning_rate": 8.521150592216582e-06,
551
+ "loss": 0.075,
552
+ "step": 1875
553
+ },
554
+ {
555
+ "epoch": 2.748643761301989,
556
+ "grad_norm": 1.524868130683899,
557
+ "learning_rate": 8.478849407783418e-06,
558
+ "loss": 0.0829,
559
+ "step": 1900
560
+ },
561
+ {
562
+ "epoch": 2.7848101265822782,
563
+ "grad_norm": 3.2034952640533447,
564
+ "learning_rate": 8.436548223350255e-06,
565
+ "loss": 0.0749,
566
+ "step": 1925
567
+ },
568
+ {
569
+ "epoch": 2.820976491862568,
570
+ "grad_norm": 2.6012070178985596,
571
+ "learning_rate": 8.39424703891709e-06,
572
+ "loss": 0.089,
573
+ "step": 1950
574
+ },
575
+ {
576
+ "epoch": 2.857142857142857,
577
+ "grad_norm": 2.747398614883423,
578
+ "learning_rate": 8.351945854483925e-06,
579
+ "loss": 0.0774,
580
+ "step": 1975
581
+ },
582
+ {
583
+ "epoch": 2.8933092224231465,
584
+ "grad_norm": 1.3775649070739746,
585
+ "learning_rate": 8.309644670050762e-06,
586
+ "loss": 0.0795,
587
+ "step": 2000
588
+ },
589
+ {
590
+ "epoch": 2.9294755877034357,
591
+ "grad_norm": 6.074094295501709,
592
+ "learning_rate": 8.267343485617598e-06,
593
+ "loss": 0.0658,
594
+ "step": 2025
595
+ },
596
+ {
597
+ "epoch": 2.965641952983725,
598
+ "grad_norm": 2.2065982818603516,
599
+ "learning_rate": 8.225042301184433e-06,
600
+ "loss": 0.091,
601
+ "step": 2050
602
+ },
603
+ {
604
+ "epoch": 2.9989150090415913,
605
+ "eval_loss": 0.1697043776512146,
606
+ "eval_runtime": 49669.5944,
607
+ "eval_samples_per_second": 0.191,
608
+ "eval_steps_per_second": 0.024,
609
+ "eval_wer": 0.11237988468930173,
610
+ "step": 2073
611
+ },
612
+ {
613
+ "epoch": 3.0018083182640143,
614
+ "grad_norm": 1.5902258157730103,
615
+ "learning_rate": 8.18274111675127e-06,
616
+ "loss": 0.0754,
617
+ "step": 2075
618
+ },
619
+ {
620
+ "epoch": 3.037974683544304,
621
+ "grad_norm": 1.6310546398162842,
622
+ "learning_rate": 8.140439932318105e-06,
623
+ "loss": 0.0409,
624
+ "step": 2100
625
+ },
626
+ {
627
+ "epoch": 3.0741410488245933,
628
+ "grad_norm": 1.9858086109161377,
629
+ "learning_rate": 8.098138747884942e-06,
630
+ "loss": 0.0529,
631
+ "step": 2125
632
+ },
633
+ {
634
+ "epoch": 3.1103074141048825,
635
+ "grad_norm": 0.8269912004470825,
636
+ "learning_rate": 8.055837563451778e-06,
637
+ "loss": 0.0376,
638
+ "step": 2150
639
+ },
640
+ {
641
+ "epoch": 3.146473779385172,
642
+ "grad_norm": 0.8812470436096191,
643
+ "learning_rate": 8.013536379018613e-06,
644
+ "loss": 0.0522,
645
+ "step": 2175
646
+ },
647
+ {
648
+ "epoch": 3.182640144665461,
649
+ "grad_norm": 1.5196110010147095,
650
+ "learning_rate": 7.97123519458545e-06,
651
+ "loss": 0.043,
652
+ "step": 2200
653
+ },
654
+ {
655
+ "epoch": 3.2188065099457503,
656
+ "grad_norm": 1.6323350667953491,
657
+ "learning_rate": 7.928934010152285e-06,
658
+ "loss": 0.0577,
659
+ "step": 2225
660
+ },
661
+ {
662
+ "epoch": 3.2549728752260396,
663
+ "grad_norm": 1.475954532623291,
664
+ "learning_rate": 7.88663282571912e-06,
665
+ "loss": 0.0396,
666
+ "step": 2250
667
+ },
668
+ {
669
+ "epoch": 3.291139240506329,
670
+ "grad_norm": 1.9558379650115967,
671
+ "learning_rate": 7.844331641285958e-06,
672
+ "loss": 0.0433,
673
+ "step": 2275
674
+ },
675
+ {
676
+ "epoch": 3.3273056057866186,
677
+ "grad_norm": 1.1998658180236816,
678
+ "learning_rate": 7.802030456852793e-06,
679
+ "loss": 0.0383,
680
+ "step": 2300
681
+ },
682
+ {
683
+ "epoch": 3.363471971066908,
684
+ "grad_norm": 1.4807426929473877,
685
+ "learning_rate": 7.759729272419628e-06,
686
+ "loss": 0.06,
687
+ "step": 2325
688
+ },
689
+ {
690
+ "epoch": 3.399638336347197,
691
+ "grad_norm": 0.8246263265609741,
692
+ "learning_rate": 7.717428087986464e-06,
693
+ "loss": 0.0443,
694
+ "step": 2350
695
+ },
696
+ {
697
+ "epoch": 3.4358047016274864,
698
+ "grad_norm": 0.9691349267959595,
699
+ "learning_rate": 7.675126903553301e-06,
700
+ "loss": 0.0535,
701
+ "step": 2375
702
+ },
703
+ {
704
+ "epoch": 3.4719710669077757,
705
+ "grad_norm": 0.7182671427726746,
706
+ "learning_rate": 7.632825719120136e-06,
707
+ "loss": 0.0417,
708
+ "step": 2400
709
+ },
710
+ {
711
+ "epoch": 3.508137432188065,
712
+ "grad_norm": 1.2362239360809326,
713
+ "learning_rate": 7.5905245346869715e-06,
714
+ "loss": 0.0529,
715
+ "step": 2425
716
+ },
717
+ {
718
+ "epoch": 3.5443037974683547,
719
+ "grad_norm": 1.6033586263656616,
720
+ "learning_rate": 7.5482233502538086e-06,
721
+ "loss": 0.0377,
722
+ "step": 2450
723
+ },
724
+ {
725
+ "epoch": 3.580470162748644,
726
+ "grad_norm": 1.3885324001312256,
727
+ "learning_rate": 7.505922165820644e-06,
728
+ "loss": 0.056,
729
+ "step": 2475
730
+ },
731
+ {
732
+ "epoch": 3.616636528028933,
733
+ "grad_norm": 0.925649881362915,
734
+ "learning_rate": 7.463620981387479e-06,
735
+ "loss": 0.0399,
736
+ "step": 2500
737
+ },
738
+ {
739
+ "epoch": 3.6528028933092225,
740
+ "grad_norm": 2.046786308288574,
741
+ "learning_rate": 7.4213197969543146e-06,
742
+ "loss": 0.064,
743
+ "step": 2525
744
+ },
745
+ {
746
+ "epoch": 3.6889692585895117,
747
+ "grad_norm": 2.5204615592956543,
748
+ "learning_rate": 7.379018612521152e-06,
749
+ "loss": 0.0392,
750
+ "step": 2550
751
+ },
752
+ {
753
+ "epoch": 3.725135623869801,
754
+ "grad_norm": 1.6770093441009521,
755
+ "learning_rate": 7.336717428087987e-06,
756
+ "loss": 0.0508,
757
+ "step": 2575
758
+ },
759
+ {
760
+ "epoch": 3.7613019891500903,
761
+ "grad_norm": 1.5132670402526855,
762
+ "learning_rate": 7.294416243654823e-06,
763
+ "loss": 0.0442,
764
+ "step": 2600
765
+ },
766
+ {
767
+ "epoch": 3.7974683544303796,
768
+ "grad_norm": 1.3414489030838013,
769
+ "learning_rate": 7.2521150592216584e-06,
770
+ "loss": 0.0551,
771
+ "step": 2625
772
+ },
773
+ {
774
+ "epoch": 3.833634719710669,
775
+ "grad_norm": 1.41124427318573,
776
+ "learning_rate": 7.209813874788495e-06,
777
+ "loss": 0.0382,
778
+ "step": 2650
779
+ },
780
+ {
781
+ "epoch": 3.8698010849909585,
782
+ "grad_norm": 1.133798360824585,
783
+ "learning_rate": 7.167512690355331e-06,
784
+ "loss": 0.0585,
785
+ "step": 2675
786
+ },
787
+ {
788
+ "epoch": 3.905967450271248,
789
+ "grad_norm": 2.0255684852600098,
790
+ "learning_rate": 7.125211505922166e-06,
791
+ "loss": 0.0417,
792
+ "step": 2700
793
+ },
794
+ {
795
+ "epoch": 3.942133815551537,
796
+ "grad_norm": 1.0678452253341675,
797
+ "learning_rate": 7.082910321489002e-06,
798
+ "loss": 0.0578,
799
+ "step": 2725
800
+ },
801
+ {
802
+ "epoch": 3.9783001808318263,
803
+ "grad_norm": 1.9355206489562988,
804
+ "learning_rate": 7.0406091370558385e-06,
805
+ "loss": 0.0461,
806
+ "step": 2750
807
+ },
808
+ {
809
+ "epoch": 4.0,
810
+ "eval_loss": 0.17637395858764648,
811
+ "eval_runtime": 49043.1261,
812
+ "eval_samples_per_second": 0.194,
813
+ "eval_steps_per_second": 0.024,
814
+ "eval_wer": 0.1119795003203075,
815
+ "step": 2765
816
+ },
817
+ {
818
+ "epoch": 4.014466546112116,
819
+ "grad_norm": 0.5305593013763428,
820
+ "learning_rate": 6.998307952622674e-06,
821
+ "loss": 0.0582,
822
+ "step": 2775
823
+ },
824
+ {
825
+ "epoch": 4.050632911392405,
826
+ "grad_norm": 1.1978957653045654,
827
+ "learning_rate": 6.956006768189509e-06,
828
+ "loss": 0.0205,
829
+ "step": 2800
830
+ },
831
+ {
832
+ "epoch": 4.086799276672695,
833
+ "grad_norm": 1.0628869533538818,
834
+ "learning_rate": 6.913705583756346e-06,
835
+ "loss": 0.024,
836
+ "step": 2825
837
+ },
838
+ {
839
+ "epoch": 4.122965641952984,
840
+ "grad_norm": 3.1139473915100098,
841
+ "learning_rate": 6.8714043993231816e-06,
842
+ "loss": 0.0242,
843
+ "step": 2850
844
+ },
845
+ {
846
+ "epoch": 4.159132007233273,
847
+ "grad_norm": 0.7646371126174927,
848
+ "learning_rate": 6.829103214890017e-06,
849
+ "loss": 0.0363,
850
+ "step": 2875
851
+ },
852
+ {
853
+ "epoch": 4.195298372513562,
854
+ "grad_norm": 1.3536927700042725,
855
+ "learning_rate": 6.786802030456854e-06,
856
+ "loss": 0.0254,
857
+ "step": 2900
858
+ },
859
+ {
860
+ "epoch": 4.231464737793852,
861
+ "grad_norm": 1.744105339050293,
862
+ "learning_rate": 6.744500846023689e-06,
863
+ "loss": 0.0362,
864
+ "step": 2925
865
+ },
866
+ {
867
+ "epoch": 4.267631103074141,
868
+ "grad_norm": 0.8838434815406799,
869
+ "learning_rate": 6.702199661590525e-06,
870
+ "loss": 0.0236,
871
+ "step": 2950
872
+ },
873
+ {
874
+ "epoch": 4.30379746835443,
875
+ "grad_norm": 1.056674838066101,
876
+ "learning_rate": 6.659898477157361e-06,
877
+ "loss": 0.0311,
878
+ "step": 2975
879
+ },
880
+ {
881
+ "epoch": 4.3399638336347195,
882
+ "grad_norm": 1.42670476436615,
883
+ "learning_rate": 6.617597292724197e-06,
884
+ "loss": 0.0277,
885
+ "step": 3000
886
+ },
887
+ {
888
+ "epoch": 4.376130198915009,
889
+ "grad_norm": 1.0685067176818848,
890
+ "learning_rate": 6.575296108291032e-06,
891
+ "loss": 0.0346,
892
+ "step": 3025
893
+ },
894
+ {
895
+ "epoch": 4.412296564195298,
896
+ "grad_norm": 0.6285632252693176,
897
+ "learning_rate": 6.5329949238578685e-06,
898
+ "loss": 0.0271,
899
+ "step": 3050
900
+ },
901
+ {
902
+ "epoch": 4.448462929475587,
903
+ "grad_norm": 1.1495152711868286,
904
+ "learning_rate": 6.490693739424705e-06,
905
+ "loss": 0.0339,
906
+ "step": 3075
907
+ },
908
+ {
909
+ "epoch": 4.484629294755877,
910
+ "grad_norm": 1.4339568614959717,
911
+ "learning_rate": 6.448392554991541e-06,
912
+ "loss": 0.0218,
913
+ "step": 3100
914
+ },
915
+ {
916
+ "epoch": 4.520795660036167,
917
+ "grad_norm": 0.9594871997833252,
918
+ "learning_rate": 6.406091370558376e-06,
919
+ "loss": 0.029,
920
+ "step": 3125
921
+ },
922
+ {
923
+ "epoch": 4.556962025316456,
924
+ "grad_norm": 1.1738754510879517,
925
+ "learning_rate": 6.3637901861252115e-06,
926
+ "loss": 0.0312,
927
+ "step": 3150
928
+ },
929
+ {
930
+ "epoch": 4.593128390596745,
931
+ "grad_norm": 1.202968716621399,
932
+ "learning_rate": 6.3214890016920485e-06,
933
+ "loss": 0.032,
934
+ "step": 3175
935
+ },
936
+ {
937
+ "epoch": 4.6292947558770345,
938
+ "grad_norm": 1.722782850265503,
939
+ "learning_rate": 6.279187817258884e-06,
940
+ "loss": 0.0327,
941
+ "step": 3200
942
+ },
943
+ {
944
+ "epoch": 4.665461121157324,
945
+ "grad_norm": 0.8393648862838745,
946
+ "learning_rate": 6.236886632825719e-06,
947
+ "loss": 0.0356,
948
+ "step": 3225
949
+ },
950
+ {
951
+ "epoch": 4.701627486437613,
952
+ "grad_norm": 2.255413770675659,
953
+ "learning_rate": 6.194585448392556e-06,
954
+ "loss": 0.0298,
955
+ "step": 3250
956
+ },
957
+ {
958
+ "epoch": 4.737793851717902,
959
+ "grad_norm": 1.082092523574829,
960
+ "learning_rate": 6.152284263959392e-06,
961
+ "loss": 0.035,
962
+ "step": 3275
963
+ },
964
+ {
965
+ "epoch": 4.773960216998192,
966
+ "grad_norm": 1.9351752996444702,
967
+ "learning_rate": 6.109983079526227e-06,
968
+ "loss": 0.025,
969
+ "step": 3300
970
+ },
971
+ {
972
+ "epoch": 4.810126582278481,
973
+ "grad_norm": 1.7391856908798218,
974
+ "learning_rate": 6.067681895093063e-06,
975
+ "loss": 0.0398,
976
+ "step": 3325
977
+ },
978
+ {
979
+ "epoch": 4.84629294755877,
980
+ "grad_norm": 1.332651972770691,
981
+ "learning_rate": 6.025380710659899e-06,
982
+ "loss": 0.0269,
983
+ "step": 3350
984
+ },
985
+ {
986
+ "epoch": 4.882459312839059,
987
+ "grad_norm": 1.1322509050369263,
988
+ "learning_rate": 5.983079526226735e-06,
989
+ "loss": 0.0296,
990
+ "step": 3375
991
+ },
992
+ {
993
+ "epoch": 4.918625678119349,
994
+ "grad_norm": 1.640367865562439,
995
+ "learning_rate": 5.940778341793571e-06,
996
+ "loss": 0.0245,
997
+ "step": 3400
998
+ },
999
+ {
1000
+ "epoch": 4.954792043399638,
1001
+ "grad_norm": 1.5555723905563354,
1002
+ "learning_rate": 5.898477157360406e-06,
1003
+ "loss": 0.0455,
1004
+ "step": 3425
1005
+ },
1006
+ {
1007
+ "epoch": 4.990958408679928,
1008
+ "grad_norm": 2.142411947250366,
1009
+ "learning_rate": 5.856175972927242e-06,
1010
+ "loss": 0.0264,
1011
+ "step": 3450
1012
+ },
1013
+ {
1014
+ "epoch": 4.999638336347197,
1015
+ "eval_loss": 0.20242072641849518,
1016
+ "eval_runtime": 49328.761,
1017
+ "eval_samples_per_second": 0.192,
1018
+ "eval_steps_per_second": 0.024,
1019
+ "eval_wer": 0.11330877642536835,
1020
+ "step": 3456
1021
+ },
1022
+ {
1023
+ "epoch": 5.027124773960217,
1024
+ "grad_norm": 2.6065480709075928,
1025
+ "learning_rate": 5.8138747884940785e-06,
1026
+ "loss": 0.0223,
1027
+ "step": 3475
1028
+ },
1029
+ {
1030
+ "epoch": 5.063291139240507,
1031
+ "grad_norm": 6.660514831542969,
1032
+ "learning_rate": 5.771573604060914e-06,
1033
+ "loss": 0.0196,
1034
+ "step": 3500
1035
+ },
1036
+ {
1037
+ "epoch": 5.099457504520796,
1038
+ "grad_norm": 0.8771660923957825,
1039
+ "learning_rate": 5.729272419627751e-06,
1040
+ "loss": 0.0211,
1041
+ "step": 3525
1042
+ },
1043
+ {
1044
+ "epoch": 5.135623869801085,
1045
+ "grad_norm": 5.249636650085449,
1046
+ "learning_rate": 5.686971235194586e-06,
1047
+ "loss": 0.0212,
1048
+ "step": 3550
1049
+ },
1050
+ {
1051
+ "epoch": 5.1717902350813745,
1052
+ "grad_norm": 0.7272325754165649,
1053
+ "learning_rate": 5.6446700507614216e-06,
1054
+ "loss": 0.018,
1055
+ "step": 3575
1056
+ },
1057
+ {
1058
+ "epoch": 5.207956600361664,
1059
+ "grad_norm": 1.247758388519287,
1060
+ "learning_rate": 5.602368866328257e-06,
1061
+ "loss": 0.0174,
1062
+ "step": 3600
1063
+ },
1064
+ {
1065
+ "epoch": 5.244122965641953,
1066
+ "grad_norm": 1.6683036088943481,
1067
+ "learning_rate": 5.560067681895094e-06,
1068
+ "loss": 0.0208,
1069
+ "step": 3625
1070
+ },
1071
+ {
1072
+ "epoch": 5.280289330922242,
1073
+ "grad_norm": 0.9653752446174622,
1074
+ "learning_rate": 5.517766497461929e-06,
1075
+ "loss": 0.0158,
1076
+ "step": 3650
1077
+ },
1078
+ {
1079
+ "epoch": 5.3164556962025316,
1080
+ "grad_norm": 2.857499361038208,
1081
+ "learning_rate": 5.475465313028765e-06,
1082
+ "loss": 0.0215,
1083
+ "step": 3675
1084
+ },
1085
+ {
1086
+ "epoch": 5.352622061482821,
1087
+ "grad_norm": 2.953227996826172,
1088
+ "learning_rate": 5.433164128595602e-06,
1089
+ "loss": 0.017,
1090
+ "step": 3700
1091
+ },
1092
+ {
1093
+ "epoch": 5.38878842676311,
1094
+ "grad_norm": 2.159450054168701,
1095
+ "learning_rate": 5.390862944162437e-06,
1096
+ "loss": 0.0199,
1097
+ "step": 3725
1098
+ },
1099
+ {
1100
+ "epoch": 5.424954792043399,
1101
+ "grad_norm": 2.423708200454712,
1102
+ "learning_rate": 5.348561759729273e-06,
1103
+ "loss": 0.0185,
1104
+ "step": 3750
1105
+ },
1106
+ {
1107
+ "epoch": 5.461121157323689,
1108
+ "grad_norm": 1.3024959564208984,
1109
+ "learning_rate": 5.3062605752961085e-06,
1110
+ "loss": 0.0212,
1111
+ "step": 3775
1112
+ },
1113
+ {
1114
+ "epoch": 5.497287522603978,
1115
+ "grad_norm": 1.6463954448699951,
1116
+ "learning_rate": 5.263959390862945e-06,
1117
+ "loss": 0.018,
1118
+ "step": 3800
1119
+ },
1120
+ {
1121
+ "epoch": 5.533453887884267,
1122
+ "grad_norm": 0.8385624885559082,
1123
+ "learning_rate": 5.221658206429781e-06,
1124
+ "loss": 0.0191,
1125
+ "step": 3825
1126
+ },
1127
+ {
1128
+ "epoch": 5.569620253164557,
1129
+ "grad_norm": 1.0305800437927246,
1130
+ "learning_rate": 5.179357021996616e-06,
1131
+ "loss": 0.0179,
1132
+ "step": 3850
1133
+ },
1134
+ {
1135
+ "epoch": 5.605786618444847,
1136
+ "grad_norm": 1.5330723524093628,
1137
+ "learning_rate": 5.137055837563452e-06,
1138
+ "loss": 0.0203,
1139
+ "step": 3875
1140
+ },
1141
+ {
1142
+ "epoch": 5.641952983725136,
1143
+ "grad_norm": 1.5121113061904907,
1144
+ "learning_rate": 5.0947546531302885e-06,
1145
+ "loss": 0.0187,
1146
+ "step": 3900
1147
+ },
1148
+ {
1149
+ "epoch": 5.678119349005425,
1150
+ "grad_norm": 0.41953858733177185,
1151
+ "learning_rate": 5.052453468697124e-06,
1152
+ "loss": 0.0207,
1153
+ "step": 3925
1154
+ },
1155
+ {
1156
+ "epoch": 5.714285714285714,
1157
+ "grad_norm": 3.4192609786987305,
1158
+ "learning_rate": 5.010152284263959e-06,
1159
+ "loss": 0.0176,
1160
+ "step": 3950
1161
+ },
1162
+ {
1163
+ "epoch": 5.750452079566004,
1164
+ "grad_norm": 3.3937628269195557,
1165
+ "learning_rate": 4.967851099830795e-06,
1166
+ "loss": 0.0207,
1167
+ "step": 3975
1168
+ },
1169
+ {
1170
+ "epoch": 5.786618444846293,
1171
+ "grad_norm": 1.6074724197387695,
1172
+ "learning_rate": 4.925549915397632e-06,
1173
+ "loss": 0.0207,
1174
+ "step": 4000
1175
+ },
1176
+ {
1177
+ "epoch": 5.822784810126582,
1178
+ "grad_norm": 0.9468584656715393,
1179
+ "learning_rate": 4.883248730964467e-06,
1180
+ "loss": 0.018,
1181
+ "step": 4025
1182
+ },
1183
+ {
1184
+ "epoch": 5.8589511754068715,
1185
+ "grad_norm": 1.2002973556518555,
1186
+ "learning_rate": 4.840947546531303e-06,
1187
+ "loss": 0.0151,
1188
+ "step": 4050
1189
+ },
1190
+ {
1191
+ "epoch": 5.895117540687161,
1192
+ "grad_norm": 1.0356348752975464,
1193
+ "learning_rate": 4.798646362098139e-06,
1194
+ "loss": 0.0201,
1195
+ "step": 4075
1196
+ },
1197
+ {
1198
+ "epoch": 5.93128390596745,
1199
+ "grad_norm": 1.657277226448059,
1200
+ "learning_rate": 4.756345177664975e-06,
1201
+ "loss": 0.0202,
1202
+ "step": 4100
1203
+ },
1204
+ {
1205
+ "epoch": 5.967450271247739,
1206
+ "grad_norm": 1.1826387643814087,
1207
+ "learning_rate": 4.714043993231811e-06,
1208
+ "loss": 0.0203,
1209
+ "step": 4125
1210
+ },
1211
+ {
1212
+ "epoch": 5.9992766726943945,
1213
+ "eval_loss": 0.22004875540733337,
1214
+ "eval_runtime": 49062.4528,
1215
+ "eval_samples_per_second": 0.194,
1216
+ "eval_steps_per_second": 0.024,
1217
+ "eval_wer": 0.10991351697629724,
1218
+ "step": 4147
1219
+ },
1220
+ {
1221
+ "epoch": 6.003616636528029,
1222
+ "grad_norm": 0.2818962335586548,
1223
+ "learning_rate": 4.671742808798647e-06,
1224
+ "loss": 0.0216,
1225
+ "step": 4150
1226
+ },
1227
+ {
1228
+ "epoch": 6.039783001808318,
1229
+ "grad_norm": 0.8754785656929016,
1230
+ "learning_rate": 4.631133671742809e-06,
1231
+ "loss": 0.0117,
1232
+ "step": 4175
1233
+ },
1234
+ {
1235
+ "epoch": 6.075949367088608,
1236
+ "grad_norm": 0.6040045619010925,
1237
+ "learning_rate": 4.588832487309645e-06,
1238
+ "loss": 0.0185,
1239
+ "step": 4200
1240
+ },
1241
+ {
1242
+ "epoch": 6.112115732368897,
1243
+ "grad_norm": 0.5590214729309082,
1244
+ "learning_rate": 4.546531302876481e-06,
1245
+ "loss": 0.0095,
1246
+ "step": 4225
1247
+ },
1248
+ {
1249
+ "epoch": 6.1482820976491865,
1250
+ "grad_norm": 1.0092438459396362,
1251
+ "learning_rate": 4.504230118443317e-06,
1252
+ "loss": 0.0132,
1253
+ "step": 4250
1254
+ },
1255
+ {
1256
+ "epoch": 6.184448462929476,
1257
+ "grad_norm": 0.68355393409729,
1258
+ "learning_rate": 4.461928934010153e-06,
1259
+ "loss": 0.0073,
1260
+ "step": 4275
1261
+ },
1262
+ {
1263
+ "epoch": 6.220614828209765,
1264
+ "grad_norm": 0.854202926158905,
1265
+ "learning_rate": 4.4196277495769884e-06,
1266
+ "loss": 0.0208,
1267
+ "step": 4300
1268
+ },
1269
+ {
1270
+ "epoch": 6.256781193490054,
1271
+ "grad_norm": 0.3510783314704895,
1272
+ "learning_rate": 4.377326565143825e-06,
1273
+ "loss": 0.0112,
1274
+ "step": 4325
1275
+ },
1276
+ {
1277
+ "epoch": 6.292947558770344,
1278
+ "grad_norm": 0.36134082078933716,
1279
+ "learning_rate": 4.33502538071066e-06,
1280
+ "loss": 0.0136,
1281
+ "step": 4350
1282
+ },
1283
+ {
1284
+ "epoch": 6.329113924050633,
1285
+ "grad_norm": 1.4613643884658813,
1286
+ "learning_rate": 4.292724196277496e-06,
1287
+ "loss": 0.0109,
1288
+ "step": 4375
1289
+ },
1290
+ {
1291
+ "epoch": 6.365280289330922,
1292
+ "grad_norm": 0.7109003067016602,
1293
+ "learning_rate": 4.2504230118443315e-06,
1294
+ "loss": 0.0179,
1295
+ "step": 4400
1296
+ },
1297
+ {
1298
+ "epoch": 6.401446654611211,
1299
+ "grad_norm": 0.505111813545227,
1300
+ "learning_rate": 4.208121827411168e-06,
1301
+ "loss": 0.0094,
1302
+ "step": 4425
1303
+ },
1304
+ {
1305
+ "epoch": 6.437613019891501,
1306
+ "grad_norm": 1.2527493238449097,
1307
+ "learning_rate": 4.165820642978004e-06,
1308
+ "loss": 0.0195,
1309
+ "step": 4450
1310
+ },
1311
+ {
1312
+ "epoch": 6.47377938517179,
1313
+ "grad_norm": 1.6427637338638306,
1314
+ "learning_rate": 4.123519458544839e-06,
1315
+ "loss": 0.0102,
1316
+ "step": 4475
1317
+ },
1318
+ {
1319
+ "epoch": 6.509945750452079,
1320
+ "grad_norm": 0.24043454229831696,
1321
+ "learning_rate": 4.081218274111675e-06,
1322
+ "loss": 0.0181,
1323
+ "step": 4500
1324
+ },
1325
+ {
1326
+ "epoch": 6.5461121157323685,
1327
+ "grad_norm": 0.9318906664848328,
1328
+ "learning_rate": 4.0389170896785115e-06,
1329
+ "loss": 0.011,
1330
+ "step": 4525
1331
+ },
1332
+ {
1333
+ "epoch": 6.582278481012658,
1334
+ "grad_norm": 0.46116721630096436,
1335
+ "learning_rate": 3.996615905245348e-06,
1336
+ "loss": 0.0208,
1337
+ "step": 4550
1338
+ },
1339
+ {
1340
+ "epoch": 6.618444846292948,
1341
+ "grad_norm": 1.371081829071045,
1342
+ "learning_rate": 3.954314720812183e-06,
1343
+ "loss": 0.0092,
1344
+ "step": 4575
1345
+ },
1346
+ {
1347
+ "epoch": 6.654611211573237,
1348
+ "grad_norm": 1.1018704175949097,
1349
+ "learning_rate": 3.912013536379019e-06,
1350
+ "loss": 0.014,
1351
+ "step": 4600
1352
+ },
1353
+ {
1354
+ "epoch": 6.6907775768535265,
1355
+ "grad_norm": 1.1697708368301392,
1356
+ "learning_rate": 3.869712351945855e-06,
1357
+ "loss": 0.0126,
1358
+ "step": 4625
1359
+ },
1360
+ {
1361
+ "epoch": 6.726943942133816,
1362
+ "grad_norm": 0.5917466878890991,
1363
+ "learning_rate": 3.827411167512691e-06,
1364
+ "loss": 0.0168,
1365
+ "step": 4650
1366
+ },
1367
+ {
1368
+ "epoch": 6.763110307414105,
1369
+ "grad_norm": 0.7262076735496521,
1370
+ "learning_rate": 3.785109983079526e-06,
1371
+ "loss": 0.0077,
1372
+ "step": 4675
1373
+ },
1374
+ {
1375
+ "epoch": 6.799276672694394,
1376
+ "grad_norm": 0.16528570652008057,
1377
+ "learning_rate": 3.7428087986463623e-06,
1378
+ "loss": 0.0142,
1379
+ "step": 4700
1380
+ },
1381
+ {
1382
+ "epoch": 6.8354430379746836,
1383
+ "grad_norm": 2.227794885635376,
1384
+ "learning_rate": 3.7005076142131985e-06,
1385
+ "loss": 0.0077,
1386
+ "step": 4725
1387
+ },
1388
+ {
1389
+ "epoch": 6.871609403254973,
1390
+ "grad_norm": 0.8085231184959412,
1391
+ "learning_rate": 3.6582064297800342e-06,
1392
+ "loss": 0.0154,
1393
+ "step": 4750
1394
+ },
1395
+ {
1396
+ "epoch": 6.907775768535262,
1397
+ "grad_norm": 1.447237253189087,
1398
+ "learning_rate": 3.61590524534687e-06,
1399
+ "loss": 0.0087,
1400
+ "step": 4775
1401
+ },
1402
+ {
1403
+ "epoch": 6.943942133815551,
1404
+ "grad_norm": 0.4593859910964966,
1405
+ "learning_rate": 3.5736040609137058e-06,
1406
+ "loss": 0.0131,
1407
+ "step": 4800
1408
+ },
1409
+ {
1410
+ "epoch": 6.980108499095841,
1411
+ "grad_norm": 1.0502936840057373,
1412
+ "learning_rate": 3.531302876480542e-06,
1413
+ "loss": 0.0129,
1414
+ "step": 4825
1415
+ },
1416
+ {
1417
+ "epoch": 6.998915009041592,
1418
+ "eval_loss": 0.22768299281597137,
1419
+ "eval_runtime": 48990.2447,
1420
+ "eval_samples_per_second": 0.194,
1421
+ "eval_steps_per_second": 0.024,
1422
+ "eval_wer": 0.11143497757847534,
1423
+ "step": 4838
1424
+ },
1425
+ {
1426
+ "epoch": 7.01627486437613,
1427
+ "grad_norm": 0.29863396286964417,
1428
+ "learning_rate": 3.4890016920473773e-06,
1429
+ "loss": 0.0131,
1430
+ "step": 4850
1431
+ },
1432
+ {
1433
+ "epoch": 7.052441229656419,
1434
+ "grad_norm": 1.6121262311935425,
1435
+ "learning_rate": 3.4467005076142135e-06,
1436
+ "loss": 0.0067,
1437
+ "step": 4875
1438
+ },
1439
+ {
1440
+ "epoch": 7.0886075949367084,
1441
+ "grad_norm": 0.682396411895752,
1442
+ "learning_rate": 3.4043993231810496e-06,
1443
+ "loss": 0.0104,
1444
+ "step": 4900
1445
+ },
1446
+ {
1447
+ "epoch": 7.124773960216999,
1448
+ "grad_norm": 1.451640009880066,
1449
+ "learning_rate": 3.362098138747885e-06,
1450
+ "loss": 0.0061,
1451
+ "step": 4925
1452
+ },
1453
+ {
1454
+ "epoch": 7.160940325497288,
1455
+ "grad_norm": 1.0819129943847656,
1456
+ "learning_rate": 3.319796954314721e-06,
1457
+ "loss": 0.0098,
1458
+ "step": 4950
1459
+ },
1460
+ {
1461
+ "epoch": 7.197106690777577,
1462
+ "grad_norm": 0.8002307415008545,
1463
+ "learning_rate": 3.277495769881557e-06,
1464
+ "loss": 0.0076,
1465
+ "step": 4975
1466
+ },
1467
+ {
1468
+ "epoch": 7.233273056057866,
1469
+ "grad_norm": 0.34655046463012695,
1470
+ "learning_rate": 3.235194585448393e-06,
1471
+ "loss": 0.0119,
1472
+ "step": 5000
1473
+ },
1474
+ {
1475
+ "epoch": 7.269439421338156,
1476
+ "grad_norm": 0.7425087690353394,
1477
+ "learning_rate": 3.1928934010152284e-06,
1478
+ "loss": 0.0051,
1479
+ "step": 5025
1480
+ },
1481
+ {
1482
+ "epoch": 7.305605786618445,
1483
+ "grad_norm": 0.48938363790512085,
1484
+ "learning_rate": 3.1505922165820646e-06,
1485
+ "loss": 0.0103,
1486
+ "step": 5050
1487
+ },
1488
+ {
1489
+ "epoch": 7.341772151898734,
1490
+ "grad_norm": 1.4168592691421509,
1491
+ "learning_rate": 3.1082910321489e-06,
1492
+ "loss": 0.0053,
1493
+ "step": 5075
1494
+ },
1495
+ {
1496
+ "epoch": 7.3779385171790235,
1497
+ "grad_norm": 0.7530602812767029,
1498
+ "learning_rate": 3.065989847715736e-06,
1499
+ "loss": 0.0124,
1500
+ "step": 5100
1501
+ },
1502
+ {
1503
+ "epoch": 7.414104882459313,
1504
+ "grad_norm": 0.81525719165802,
1505
+ "learning_rate": 3.0236886632825723e-06,
1506
+ "loss": 0.0047,
1507
+ "step": 5125
1508
+ },
1509
+ {
1510
+ "epoch": 7.450271247739602,
1511
+ "grad_norm": 0.5934809446334839,
1512
+ "learning_rate": 2.981387478849408e-06,
1513
+ "loss": 0.0073,
1514
+ "step": 5150
1515
+ },
1516
+ {
1517
+ "epoch": 7.486437613019891,
1518
+ "grad_norm": 1.5411005020141602,
1519
+ "learning_rate": 2.939086294416244e-06,
1520
+ "loss": 0.0076,
1521
+ "step": 5175
1522
+ },
1523
+ {
1524
+ "epoch": 7.522603978300181,
1525
+ "grad_norm": 3.0440473556518555,
1526
+ "learning_rate": 2.8967851099830796e-06,
1527
+ "loss": 0.0119,
1528
+ "step": 5200
1529
+ },
1530
+ {
1531
+ "epoch": 7.55877034358047,
1532
+ "grad_norm": 0.15616849064826965,
1533
+ "learning_rate": 2.8544839255499158e-06,
1534
+ "loss": 0.0071,
1535
+ "step": 5225
1536
+ },
1537
+ {
1538
+ "epoch": 7.594936708860759,
1539
+ "grad_norm": 0.6418558955192566,
1540
+ "learning_rate": 2.812182741116751e-06,
1541
+ "loss": 0.0125,
1542
+ "step": 5250
1543
+ },
1544
+ {
1545
+ "epoch": 7.631103074141048,
1546
+ "grad_norm": 1.245208978652954,
1547
+ "learning_rate": 2.7698815566835873e-06,
1548
+ "loss": 0.009,
1549
+ "step": 5275
1550
+ },
1551
+ {
1552
+ "epoch": 7.6672694394213385,
1553
+ "grad_norm": 0.6857640743255615,
1554
+ "learning_rate": 2.7275803722504235e-06,
1555
+ "loss": 0.0143,
1556
+ "step": 5300
1557
+ },
1558
+ {
1559
+ "epoch": 7.703435804701628,
1560
+ "grad_norm": 0.6764671802520752,
1561
+ "learning_rate": 2.6852791878172592e-06,
1562
+ "loss": 0.0081,
1563
+ "step": 5325
1564
+ },
1565
+ {
1566
+ "epoch": 7.739602169981917,
1567
+ "grad_norm": 0.6338791251182556,
1568
+ "learning_rate": 2.642978003384095e-06,
1569
+ "loss": 0.0128,
1570
+ "step": 5350
1571
+ },
1572
+ {
1573
+ "epoch": 7.775768535262206,
1574
+ "grad_norm": 1.4596633911132812,
1575
+ "learning_rate": 2.6006768189509308e-06,
1576
+ "loss": 0.0075,
1577
+ "step": 5375
1578
+ },
1579
+ {
1580
+ "epoch": 7.811934900542496,
1581
+ "grad_norm": 0.14179235696792603,
1582
+ "learning_rate": 2.558375634517767e-06,
1583
+ "loss": 0.0142,
1584
+ "step": 5400
1585
+ },
1586
+ {
1587
+ "epoch": 7.848101265822785,
1588
+ "grad_norm": 0.7647730112075806,
1589
+ "learning_rate": 2.5160744500846023e-06,
1590
+ "loss": 0.0068,
1591
+ "step": 5425
1592
+ },
1593
+ {
1594
+ "epoch": 7.884267631103074,
1595
+ "grad_norm": 0.1921517550945282,
1596
+ "learning_rate": 2.4737732656514385e-06,
1597
+ "loss": 0.0078,
1598
+ "step": 5450
1599
+ },
1600
+ {
1601
+ "epoch": 7.920433996383363,
1602
+ "grad_norm": 1.385634183883667,
1603
+ "learning_rate": 2.4314720812182742e-06,
1604
+ "loss": 0.0067,
1605
+ "step": 5475
1606
+ },
1607
+ {
1608
+ "epoch": 7.956600361663653,
1609
+ "grad_norm": 0.08770379424095154,
1610
+ "learning_rate": 2.38917089678511e-06,
1611
+ "loss": 0.007,
1612
+ "step": 5500
1613
+ },
1614
+ {
1615
+ "epoch": 7.992766726943942,
1616
+ "grad_norm": 2.0286593437194824,
1617
+ "learning_rate": 2.346869712351946e-06,
1618
+ "loss": 0.0091,
1619
+ "step": 5525
1620
+ },
1621
+ {
1622
+ "epoch": 8.0,
1623
+ "eval_loss": 0.2551898956298828,
1624
+ "eval_runtime": 49008.1266,
1625
+ "eval_samples_per_second": 0.194,
1626
+ "eval_steps_per_second": 0.024,
1627
+ "eval_wer": 0.10667841127482383,
1628
+ "step": 5530
1629
+ },
1630
+ {
1631
+ "epoch": 8.028933092224232,
1632
+ "grad_norm": 0.40499478578567505,
1633
+ "learning_rate": 2.304568527918782e-06,
1634
+ "loss": 0.011,
1635
+ "step": 5550
1636
+ },
1637
+ {
1638
+ "epoch": 8.065099457504521,
1639
+ "grad_norm": 1.5098172426223755,
1640
+ "learning_rate": 2.2622673434856177e-06,
1641
+ "loss": 0.0074,
1642
+ "step": 5575
1643
+ },
1644
+ {
1645
+ "epoch": 8.10126582278481,
1646
+ "grad_norm": 2.1074013710021973,
1647
+ "learning_rate": 2.219966159052454e-06,
1648
+ "loss": 0.0079,
1649
+ "step": 5600
1650
+ },
1651
+ {
1652
+ "epoch": 8.1374321880651,
1653
+ "grad_norm": 1.3512495756149292,
1654
+ "learning_rate": 2.1776649746192896e-06,
1655
+ "loss": 0.0049,
1656
+ "step": 5625
1657
+ },
1658
+ {
1659
+ "epoch": 8.17359855334539,
1660
+ "grad_norm": 0.27601808309555054,
1661
+ "learning_rate": 2.1353637901861254e-06,
1662
+ "loss": 0.0037,
1663
+ "step": 5650
1664
+ },
1665
+ {
1666
+ "epoch": 8.209764918625678,
1667
+ "grad_norm": 0.38552117347717285,
1668
+ "learning_rate": 2.093062605752961e-06,
1669
+ "loss": 0.0054,
1670
+ "step": 5675
1671
+ },
1672
+ {
1673
+ "epoch": 8.245931283905968,
1674
+ "grad_norm": 0.6290847063064575,
1675
+ "learning_rate": 2.050761421319797e-06,
1676
+ "loss": 0.0075,
1677
+ "step": 5700
1678
+ },
1679
+ {
1680
+ "epoch": 8.282097649186257,
1681
+ "grad_norm": 0.34640949964523315,
1682
+ "learning_rate": 2.008460236886633e-06,
1683
+ "loss": 0.0038,
1684
+ "step": 5725
1685
+ },
1686
+ {
1687
+ "epoch": 8.318264014466546,
1688
+ "grad_norm": 0.47082650661468506,
1689
+ "learning_rate": 1.966159052453469e-06,
1690
+ "loss": 0.0072,
1691
+ "step": 5750
1692
+ },
1693
+ {
1694
+ "epoch": 8.354430379746836,
1695
+ "grad_norm": 0.7971699237823486,
1696
+ "learning_rate": 1.9238578680203046e-06,
1697
+ "loss": 0.0074,
1698
+ "step": 5775
1699
+ },
1700
+ {
1701
+ "epoch": 8.390596745027125,
1702
+ "grad_norm": 1.3511334657669067,
1703
+ "learning_rate": 1.8815566835871408e-06,
1704
+ "loss": 0.0065,
1705
+ "step": 5800
1706
+ },
1707
+ {
1708
+ "epoch": 8.426763110307414,
1709
+ "grad_norm": 0.04755210131406784,
1710
+ "learning_rate": 1.8392554991539766e-06,
1711
+ "loss": 0.0054,
1712
+ "step": 5825
1713
+ },
1714
+ {
1715
+ "epoch": 8.462929475587703,
1716
+ "grad_norm": 0.7288948893547058,
1717
+ "learning_rate": 1.7969543147208123e-06,
1718
+ "loss": 0.0075,
1719
+ "step": 5850
1720
+ },
1721
+ {
1722
+ "epoch": 8.499095840867993,
1723
+ "grad_norm": 2.372441291809082,
1724
+ "learning_rate": 1.7546531302876483e-06,
1725
+ "loss": 0.0071,
1726
+ "step": 5875
1727
+ },
1728
+ {
1729
+ "epoch": 8.535262206148282,
1730
+ "grad_norm": 0.029580192640423775,
1731
+ "learning_rate": 1.712351945854484e-06,
1732
+ "loss": 0.0057,
1733
+ "step": 5900
1734
+ },
1735
+ {
1736
+ "epoch": 8.571428571428571,
1737
+ "grad_norm": 3.4771149158477783,
1738
+ "learning_rate": 1.6700507614213198e-06,
1739
+ "loss": 0.0063,
1740
+ "step": 5925
1741
+ },
1742
+ {
1743
+ "epoch": 8.60759493670886,
1744
+ "grad_norm": 0.47946012020111084,
1745
+ "learning_rate": 1.6277495769881558e-06,
1746
+ "loss": 0.0095,
1747
+ "step": 5950
1748
+ },
1749
+ {
1750
+ "epoch": 8.64376130198915,
1751
+ "grad_norm": 1.6082369089126587,
1752
+ "learning_rate": 1.5854483925549915e-06,
1753
+ "loss": 0.0061,
1754
+ "step": 5975
1755
+ },
1756
+ {
1757
+ "epoch": 8.679927667269439,
1758
+ "grad_norm": 0.8396530747413635,
1759
+ "learning_rate": 1.5431472081218277e-06,
1760
+ "loss": 0.005,
1761
+ "step": 6000
1762
+ },
1763
+ {
1764
+ "epoch": 8.716094032549728,
1765
+ "grad_norm": 0.683506965637207,
1766
+ "learning_rate": 1.5008460236886635e-06,
1767
+ "loss": 0.0042,
1768
+ "step": 6025
1769
+ },
1770
+ {
1771
+ "epoch": 8.752260397830018,
1772
+ "grad_norm": 0.23053808510303497,
1773
+ "learning_rate": 1.4585448392554992e-06,
1774
+ "loss": 0.0085,
1775
+ "step": 6050
1776
+ },
1777
+ {
1778
+ "epoch": 8.788426763110307,
1779
+ "grad_norm": 1.4277093410491943,
1780
+ "learning_rate": 1.4162436548223352e-06,
1781
+ "loss": 0.0057,
1782
+ "step": 6075
1783
+ },
1784
+ {
1785
+ "epoch": 8.824593128390596,
1786
+ "grad_norm": 0.8418155908584595,
1787
+ "learning_rate": 1.373942470389171e-06,
1788
+ "loss": 0.0085,
1789
+ "step": 6100
1790
+ },
1791
+ {
1792
+ "epoch": 8.860759493670885,
1793
+ "grad_norm": 0.16715645790100098,
1794
+ "learning_rate": 1.3316412859560067e-06,
1795
+ "loss": 0.0039,
1796
+ "step": 6125
1797
+ },
1798
+ {
1799
+ "epoch": 8.896925858951175,
1800
+ "grad_norm": 1.1576683521270752,
1801
+ "learning_rate": 1.2893401015228427e-06,
1802
+ "loss": 0.0056,
1803
+ "step": 6150
1804
+ },
1805
+ {
1806
+ "epoch": 8.933092224231466,
1807
+ "grad_norm": 4.466509819030762,
1808
+ "learning_rate": 1.2470389170896787e-06,
1809
+ "loss": 0.008,
1810
+ "step": 6175
1811
+ },
1812
+ {
1813
+ "epoch": 8.969258589511753,
1814
+ "grad_norm": 0.09006431698799133,
1815
+ "learning_rate": 1.2047377326565144e-06,
1816
+ "loss": 0.0063,
1817
+ "step": 6200
1818
+ },
1819
+ {
1820
+ "epoch": 8.999638336347196,
1821
+ "eval_loss": 0.25654470920562744,
1822
+ "eval_runtime": 49994.6568,
1823
+ "eval_samples_per_second": 0.19,
1824
+ "eval_steps_per_second": 0.024,
1825
+ "eval_wer": 0.10539718129404228,
1826
+ "step": 6221
1827
+ },
1828
+ {
1829
+ "epoch": 9.005424954792044,
1830
+ "grad_norm": 0.04043040797114372,
1831
+ "learning_rate": 1.1624365482233504e-06,
1832
+ "loss": 0.0099,
1833
+ "step": 6225
1834
+ },
1835
+ {
1836
+ "epoch": 9.041591320072333,
1837
+ "grad_norm": 0.31738966703414917,
1838
+ "learning_rate": 1.1201353637901864e-06,
1839
+ "loss": 0.0031,
1840
+ "step": 6250
1841
+ },
1842
+ {
1843
+ "epoch": 9.077757685352623,
1844
+ "grad_norm": 0.8276641368865967,
1845
+ "learning_rate": 1.0778341793570221e-06,
1846
+ "loss": 0.0064,
1847
+ "step": 6275
1848
+ },
1849
+ {
1850
+ "epoch": 9.113924050632912,
1851
+ "grad_norm": 0.3469080626964569,
1852
+ "learning_rate": 1.035532994923858e-06,
1853
+ "loss": 0.0032,
1854
+ "step": 6300
1855
+ },
1856
+ {
1857
+ "epoch": 9.150090415913201,
1858
+ "grad_norm": 0.6140421628952026,
1859
+ "learning_rate": 9.932318104906939e-07,
1860
+ "loss": 0.0088,
1861
+ "step": 6325
1862
+ },
1863
+ {
1864
+ "epoch": 9.18625678119349,
1865
+ "grad_norm": 1.1443873643875122,
1866
+ "learning_rate": 9.509306260575297e-07,
1867
+ "loss": 0.0032,
1868
+ "step": 6350
1869
+ },
1870
+ {
1871
+ "epoch": 9.22242314647378,
1872
+ "grad_norm": 0.21027229726314545,
1873
+ "learning_rate": 9.086294416243656e-07,
1874
+ "loss": 0.0105,
1875
+ "step": 6375
1876
+ },
1877
+ {
1878
+ "epoch": 9.258589511754069,
1879
+ "grad_norm": 0.2232620120048523,
1880
+ "learning_rate": 8.663282571912015e-07,
1881
+ "loss": 0.0047,
1882
+ "step": 6400
1883
+ },
1884
+ {
1885
+ "epoch": 9.294755877034358,
1886
+ "grad_norm": 0.5159393548965454,
1887
+ "learning_rate": 8.240270727580372e-07,
1888
+ "loss": 0.0069,
1889
+ "step": 6425
1890
+ },
1891
+ {
1892
+ "epoch": 9.330922242314648,
1893
+ "grad_norm": 0.5648090839385986,
1894
+ "learning_rate": 7.817258883248732e-07,
1895
+ "loss": 0.0031,
1896
+ "step": 6450
1897
+ },
1898
+ {
1899
+ "epoch": 9.367088607594937,
1900
+ "grad_norm": 0.24160954356193542,
1901
+ "learning_rate": 7.394247038917091e-07,
1902
+ "loss": 0.0076,
1903
+ "step": 6475
1904
+ },
1905
+ {
1906
+ "epoch": 9.403254972875226,
1907
+ "grad_norm": 0.21812845766544342,
1908
+ "learning_rate": 6.971235194585449e-07,
1909
+ "loss": 0.0036,
1910
+ "step": 6500
1911
+ },
1912
+ {
1913
+ "epoch": 9.439421338155515,
1914
+ "grad_norm": 0.3133035898208618,
1915
+ "learning_rate": 6.548223350253807e-07,
1916
+ "loss": 0.0086,
1917
+ "step": 6525
1918
+ },
1919
+ {
1920
+ "epoch": 9.475587703435805,
1921
+ "grad_norm": 0.16532699763774872,
1922
+ "learning_rate": 6.125211505922167e-07,
1923
+ "loss": 0.0029,
1924
+ "step": 6550
1925
+ },
1926
+ {
1927
+ "epoch": 9.511754068716094,
1928
+ "grad_norm": 0.5517584681510925,
1929
+ "learning_rate": 5.702199661590524e-07,
1930
+ "loss": 0.0096,
1931
+ "step": 6575
1932
+ },
1933
+ {
1934
+ "epoch": 9.547920433996383,
1935
+ "grad_norm": 0.2974016070365906,
1936
+ "learning_rate": 5.279187817258884e-07,
1937
+ "loss": 0.0018,
1938
+ "step": 6600
1939
+ },
1940
+ {
1941
+ "epoch": 9.584086799276673,
1942
+ "grad_norm": 0.7380957007408142,
1943
+ "learning_rate": 4.856175972927242e-07,
1944
+ "loss": 0.0091,
1945
+ "step": 6625
1946
+ },
1947
+ {
1948
+ "epoch": 9.620253164556962,
1949
+ "grad_norm": 0.2390100508928299,
1950
+ "learning_rate": 4.433164128595601e-07,
1951
+ "loss": 0.0024,
1952
+ "step": 6650
1953
+ },
1954
+ {
1955
+ "epoch": 9.656419529837251,
1956
+ "grad_norm": 0.22240670025348663,
1957
+ "learning_rate": 4.0101522842639594e-07,
1958
+ "loss": 0.0054,
1959
+ "step": 6675
1960
+ },
1961
+ {
1962
+ "epoch": 9.69258589511754,
1963
+ "grad_norm": 0.03914798051118851,
1964
+ "learning_rate": 3.5871404399323186e-07,
1965
+ "loss": 0.0029,
1966
+ "step": 6700
1967
+ },
1968
+ {
1969
+ "epoch": 9.72875226039783,
1970
+ "grad_norm": 0.5454278588294983,
1971
+ "learning_rate": 3.1641285956006767e-07,
1972
+ "loss": 0.0053,
1973
+ "step": 6725
1974
+ },
1975
+ {
1976
+ "epoch": 9.764918625678119,
1977
+ "grad_norm": 0.21154353022575378,
1978
+ "learning_rate": 2.741116751269036e-07,
1979
+ "loss": 0.0024,
1980
+ "step": 6750
1981
+ },
1982
+ {
1983
+ "epoch": 9.801084990958408,
1984
+ "grad_norm": 0.47212332487106323,
1985
+ "learning_rate": 2.3181049069373945e-07,
1986
+ "loss": 0.0081,
1987
+ "step": 6775
1988
+ },
1989
+ {
1990
+ "epoch": 9.837251356238697,
1991
+ "grad_norm": 0.09104613214731216,
1992
+ "learning_rate": 1.8950930626057532e-07,
1993
+ "loss": 0.0048,
1994
+ "step": 6800
1995
+ },
1996
+ {
1997
+ "epoch": 9.873417721518987,
1998
+ "grad_norm": 0.35835790634155273,
1999
+ "learning_rate": 1.4720812182741118e-07,
2000
+ "loss": 0.0085,
2001
+ "step": 6825
2002
+ },
2003
+ {
2004
+ "epoch": 9.909584086799276,
2005
+ "grad_norm": 0.13305124640464783,
2006
+ "learning_rate": 1.0490693739424705e-07,
2007
+ "loss": 0.0028,
2008
+ "step": 6850
2009
+ },
2010
+ {
2011
+ "epoch": 9.945750452079565,
2012
+ "grad_norm": 0.4060196280479431,
2013
+ "learning_rate": 6.260575296108291e-08,
2014
+ "loss": 0.0096,
2015
+ "step": 6875
2016
+ },
2017
+ {
2018
+ "epoch": 9.981916817359856,
2019
+ "grad_norm": 1.0367088317871094,
2020
+ "learning_rate": 2.0304568527918784e-08,
2021
+ "loss": 0.0019,
2022
+ "step": 6900
2023
+ },
2024
+ {
2025
+ "epoch": 9.99638336347197,
2026
+ "eval_loss": 0.26707419753074646,
2027
+ "eval_runtime": 51487.4721,
2028
+ "eval_samples_per_second": 0.184,
2029
+ "eval_steps_per_second": 0.023,
2030
+ "eval_wer": 0.10419602818705957,
2031
+ "step": 6910
2032
+ },
2033
+ {
2034
+ "epoch": 9.99638336347197,
2035
+ "step": 6910,
2036
+ "total_flos": 7.51132515607511e+20,
2037
+ "train_loss": 0.05691683961323822,
2038
+ "train_runtime": 650193.1322,
2039
+ "train_samples_per_second": 0.34,
2040
+ "train_steps_per_second": 0.011
2041
  }
2042
  ],
2043
  "logging_steps": 25,
2044
+ "max_steps": 6910,
2045
  "num_input_tokens_seen": 0,
2046
+ "num_train_epochs": 10,
2047
  "save_steps": 500,
2048
  "stateful_callbacks": {
2049
  "TrainerControl": {
 
2057
  "attributes": {}
2058
  }
2059
  },
2060
+ "total_flos": 7.51132515607511e+20,
2061
+ "train_batch_size": 8,
2062
  "trial_name": null,
2063
  "trial_params": null
2064
  }