anuragshas commited on
Commit
e9f741c
1 Parent(s): 7cf5aec

End of training

Browse files
Files changed (4) hide show
  1. all_results.json +15 -0
  2. eval_results.json +10 -0
  3. train_results.json +8 -0
  4. trainer_state.json +451 -0
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.16,
3
+ "eval_cer": 0.10498671252149445,
4
+ "eval_loss": 0.4146759808063507,
5
+ "eval_runtime": 116.6347,
6
+ "eval_samples": 2796,
7
+ "eval_samples_per_second": 23.972,
8
+ "eval_steps_per_second": 0.377,
9
+ "eval_wer": 0.3171619442524764,
10
+ "train_loss": 1.6543282796931436,
11
+ "train_runtime": 29157.3737,
12
+ "train_samples": 6538,
13
+ "train_samples_per_second": 22.424,
14
+ "train_steps_per_second": 0.175
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.16,
3
+ "eval_cer": 0.10498671252149445,
4
+ "eval_loss": 0.4146759808063507,
5
+ "eval_runtime": 116.6347,
6
+ "eval_samples": 2796,
7
+ "eval_samples_per_second": 23.972,
8
+ "eval_steps_per_second": 0.377,
9
+ "eval_wer": 0.3171619442524764
10
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 100.16,
3
+ "train_loss": 1.6543282796931436,
4
+ "train_runtime": 29157.3737,
5
+ "train_samples": 6538,
6
+ "train_samples_per_second": 22.424,
7
+ "train_steps_per_second": 0.175
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 100.15533980582525,
5
+ "global_step": 5108,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.95,
12
+ "learning_rate": 1.487205882352941e-05,
13
+ "loss": 16.4084,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 3.91,
18
+ "learning_rate": 2.9430882352941174e-05,
19
+ "loss": 6.2142,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 5.87,
24
+ "learning_rate": 4.398970588235294e-05,
25
+ "loss": 4.1261,
26
+ "step": 300
27
+ },
28
+ {
29
+ "epoch": 7.83,
30
+ "learning_rate": 5.8548529411764694e-05,
31
+ "loss": 3.2894,
32
+ "step": 400
33
+ },
34
+ {
35
+ "epoch": 7.83,
36
+ "eval_cer": 1.0,
37
+ "eval_loss": 3.1501412391662598,
38
+ "eval_runtime": 111.2121,
39
+ "eval_samples_per_second": 25.141,
40
+ "eval_steps_per_second": 0.396,
41
+ "eval_wer": 1.0,
42
+ "step": 400
43
+ },
44
+ {
45
+ "epoch": 9.8,
46
+ "learning_rate": 7.310735294117646e-05,
47
+ "loss": 3.083,
48
+ "step": 500
49
+ },
50
+ {
51
+ "epoch": 11.76,
52
+ "learning_rate": 7.5e-05,
53
+ "loss": 2.9638,
54
+ "step": 600
55
+ },
56
+ {
57
+ "epoch": 13.72,
58
+ "learning_rate": 7.5e-05,
59
+ "loss": 2.534,
60
+ "step": 700
61
+ },
62
+ {
63
+ "epoch": 15.68,
64
+ "learning_rate": 7.5e-05,
65
+ "loss": 1.8586,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 15.68,
70
+ "eval_cer": 0.24023761138033453,
71
+ "eval_loss": 0.8871385455131531,
72
+ "eval_runtime": 114.0709,
73
+ "eval_samples_per_second": 24.511,
74
+ "eval_steps_per_second": 0.386,
75
+ "eval_wer": 0.6721492743607463,
76
+ "step": 800
77
+ },
78
+ {
79
+ "epoch": 17.64,
80
+ "learning_rate": 7.5e-05,
81
+ "loss": 1.6008,
82
+ "step": 900
83
+ },
84
+ {
85
+ "epoch": 19.6,
86
+ "learning_rate": 7.5e-05,
87
+ "loss": 1.4898,
88
+ "step": 1000
89
+ },
90
+ {
91
+ "epoch": 21.56,
92
+ "learning_rate": 7.5e-05,
93
+ "loss": 1.4105,
94
+ "step": 1100
95
+ },
96
+ {
97
+ "epoch": 23.52,
98
+ "learning_rate": 7.5e-05,
99
+ "loss": 1.3431,
100
+ "step": 1200
101
+ },
102
+ {
103
+ "epoch": 23.52,
104
+ "eval_cer": 0.19387212755979366,
105
+ "eval_loss": 0.5813264846801758,
106
+ "eval_runtime": 110.7438,
107
+ "eval_samples_per_second": 25.247,
108
+ "eval_steps_per_second": 0.397,
109
+ "eval_wer": 0.5502418797512094,
110
+ "step": 1200
111
+ },
112
+ {
113
+ "epoch": 25.49,
114
+ "learning_rate": 7.5e-05,
115
+ "loss": 1.2993,
116
+ "step": 1300
117
+ },
118
+ {
119
+ "epoch": 27.45,
120
+ "learning_rate": 7.5e-05,
121
+ "loss": 1.2563,
122
+ "step": 1400
123
+ },
124
+ {
125
+ "epoch": 29.41,
126
+ "learning_rate": 7.5e-05,
127
+ "loss": 1.2298,
128
+ "step": 1500
129
+ },
130
+ {
131
+ "epoch": 31.37,
132
+ "learning_rate": 7.5e-05,
133
+ "loss": 1.2052,
134
+ "step": 1600
135
+ },
136
+ {
137
+ "epoch": 31.37,
138
+ "eval_cer": 0.1664738679589391,
139
+ "eval_loss": 0.49559420347213745,
140
+ "eval_runtime": 111.374,
141
+ "eval_samples_per_second": 25.105,
142
+ "eval_steps_per_second": 0.395,
143
+ "eval_wer": 0.47878369039391844,
144
+ "step": 1600
145
+ },
146
+ {
147
+ "epoch": 33.33,
148
+ "learning_rate": 7.5e-05,
149
+ "loss": 1.186,
150
+ "step": 1700
151
+ },
152
+ {
153
+ "epoch": 35.29,
154
+ "learning_rate": 7.5e-05,
155
+ "loss": 1.1473,
156
+ "step": 1800
157
+ },
158
+ {
159
+ "epoch": 37.25,
160
+ "learning_rate": 7.5e-05,
161
+ "loss": 1.1235,
162
+ "step": 1900
163
+ },
164
+ {
165
+ "epoch": 39.21,
166
+ "learning_rate": 7.5e-05,
167
+ "loss": 1.1097,
168
+ "step": 2000
169
+ },
170
+ {
171
+ "epoch": 39.21,
172
+ "eval_cer": 0.1397321661195352,
173
+ "eval_loss": 0.44468843936920166,
174
+ "eval_runtime": 109.918,
175
+ "eval_samples_per_second": 25.437,
176
+ "eval_steps_per_second": 0.4,
177
+ "eval_wer": 0.41428242340474547,
178
+ "step": 2000
179
+ },
180
+ {
181
+ "epoch": 41.17,
182
+ "learning_rate": 7.5e-05,
183
+ "loss": 1.0951,
184
+ "step": 2100
185
+ },
186
+ {
187
+ "epoch": 43.14,
188
+ "learning_rate": 7.5e-05,
189
+ "loss": 1.0785,
190
+ "step": 2200
191
+ },
192
+ {
193
+ "epoch": 45.1,
194
+ "learning_rate": 7.5e-05,
195
+ "loss": 1.0575,
196
+ "step": 2300
197
+ },
198
+ {
199
+ "epoch": 47.06,
200
+ "learning_rate": 7.5e-05,
201
+ "loss": 1.0528,
202
+ "step": 2400
203
+ },
204
+ {
205
+ "epoch": 47.06,
206
+ "eval_cer": 0.13331249022979522,
207
+ "eval_loss": 0.4439217448234558,
208
+ "eval_runtime": 111.2432,
209
+ "eval_samples_per_second": 25.134,
210
+ "eval_steps_per_second": 0.396,
211
+ "eval_wer": 0.3960838516470859,
212
+ "step": 2400
213
+ },
214
+ {
215
+ "epoch": 49.02,
216
+ "learning_rate": 7.5e-05,
217
+ "loss": 1.0359,
218
+ "step": 2500
219
+ },
220
+ {
221
+ "epoch": 50.97,
222
+ "learning_rate": 7.377299412915851e-05,
223
+ "loss": 1.0118,
224
+ "step": 2600
225
+ },
226
+ {
227
+ "epoch": 52.93,
228
+ "learning_rate": 7.098434442270058e-05,
229
+ "loss": 1.0083,
230
+ "step": 2700
231
+ },
232
+ {
233
+ "epoch": 54.89,
234
+ "learning_rate": 6.819569471624266e-05,
235
+ "loss": 0.9939,
236
+ "step": 2800
237
+ },
238
+ {
239
+ "epoch": 54.89,
240
+ "eval_cer": 0.13785628680110468,
241
+ "eval_loss": 0.43479105830192566,
242
+ "eval_runtime": 112.3814,
243
+ "eval_samples_per_second": 24.88,
244
+ "eval_steps_per_second": 0.392,
245
+ "eval_wer": 0.40138217000691084,
246
+ "step": 2800
247
+ },
248
+ {
249
+ "epoch": 56.85,
250
+ "learning_rate": 6.540704500978474e-05,
251
+ "loss": 0.9884,
252
+ "step": 2900
253
+ },
254
+ {
255
+ "epoch": 58.82,
256
+ "learning_rate": 6.26183953033268e-05,
257
+ "loss": 0.9771,
258
+ "step": 3000
259
+ },
260
+ {
261
+ "epoch": 60.78,
262
+ "learning_rate": 5.982974559686888e-05,
263
+ "loss": 0.9577,
264
+ "step": 3100
265
+ },
266
+ {
267
+ "epoch": 62.74,
268
+ "learning_rate": 5.704109589041095e-05,
269
+ "loss": 0.9441,
270
+ "step": 3200
271
+ },
272
+ {
273
+ "epoch": 62.74,
274
+ "eval_cer": 0.12230733156166954,
275
+ "eval_loss": 0.42357733845710754,
276
+ "eval_runtime": 114.6173,
277
+ "eval_samples_per_second": 24.394,
278
+ "eval_steps_per_second": 0.384,
279
+ "eval_wer": 0.365261460492974,
280
+ "step": 3200
281
+ },
282
+ {
283
+ "epoch": 64.7,
284
+ "learning_rate": 5.425244618395303e-05,
285
+ "loss": 0.9296,
286
+ "step": 3300
287
+ },
288
+ {
289
+ "epoch": 66.66,
290
+ "learning_rate": 5.14637964774951e-05,
291
+ "loss": 0.9203,
292
+ "step": 3400
293
+ },
294
+ {
295
+ "epoch": 68.62,
296
+ "learning_rate": 4.867514677103718e-05,
297
+ "loss": 0.9215,
298
+ "step": 3500
299
+ },
300
+ {
301
+ "epoch": 70.58,
302
+ "learning_rate": 4.5886497064579256e-05,
303
+ "loss": 0.913,
304
+ "step": 3600
305
+ },
306
+ {
307
+ "epoch": 70.58,
308
+ "eval_cer": 0.11566880308477932,
309
+ "eval_loss": 0.43085047602653503,
310
+ "eval_runtime": 112.648,
311
+ "eval_samples_per_second": 24.821,
312
+ "eval_steps_per_second": 0.391,
313
+ "eval_wer": 0.3475236120709514,
314
+ "step": 3600
315
+ },
316
+ {
317
+ "epoch": 72.54,
318
+ "learning_rate": 4.309784735812133e-05,
319
+ "loss": 0.9033,
320
+ "step": 3700
321
+ },
322
+ {
323
+ "epoch": 74.5,
324
+ "learning_rate": 4.03091976516634e-05,
325
+ "loss": 0.8945,
326
+ "step": 3800
327
+ },
328
+ {
329
+ "epoch": 76.47,
330
+ "learning_rate": 3.752054794520548e-05,
331
+ "loss": 0.8714,
332
+ "step": 3900
333
+ },
334
+ {
335
+ "epoch": 78.43,
336
+ "learning_rate": 3.473189823874755e-05,
337
+ "loss": 0.8678,
338
+ "step": 4000
339
+ },
340
+ {
341
+ "epoch": 78.43,
342
+ "eval_cer": 0.1109895263404721,
343
+ "eval_loss": 0.42695942521095276,
344
+ "eval_runtime": 109.8691,
345
+ "eval_samples_per_second": 25.448,
346
+ "eval_steps_per_second": 0.4,
347
+ "eval_wer": 0.3337479843354066,
348
+ "step": 4000
349
+ },
350
+ {
351
+ "epoch": 80.39,
352
+ "learning_rate": 3.1943248532289626e-05,
353
+ "loss": 0.8612,
354
+ "step": 4100
355
+ },
356
+ {
357
+ "epoch": 82.35,
358
+ "learning_rate": 2.9154598825831697e-05,
359
+ "loss": 0.8603,
360
+ "step": 4200
361
+ },
362
+ {
363
+ "epoch": 84.31,
364
+ "learning_rate": 2.6365949119373778e-05,
365
+ "loss": 0.8451,
366
+ "step": 4300
367
+ },
368
+ {
369
+ "epoch": 86.27,
370
+ "learning_rate": 2.357729941291585e-05,
371
+ "loss": 0.8414,
372
+ "step": 4400
373
+ },
374
+ {
375
+ "epoch": 86.27,
376
+ "eval_cer": 0.10695638580584649,
377
+ "eval_loss": 0.41582536697387695,
378
+ "eval_runtime": 110.6892,
379
+ "eval_samples_per_second": 25.26,
380
+ "eval_steps_per_second": 0.398,
381
+ "eval_wer": 0.32204561161022804,
382
+ "step": 4400
383
+ },
384
+ {
385
+ "epoch": 88.23,
386
+ "learning_rate": 2.0788649706457926e-05,
387
+ "loss": 0.8394,
388
+ "step": 4500
389
+ },
390
+ {
391
+ "epoch": 90.19,
392
+ "learning_rate": 1.7999999999999997e-05,
393
+ "loss": 0.8304,
394
+ "step": 4600
395
+ },
396
+ {
397
+ "epoch": 92.16,
398
+ "learning_rate": 1.5211350293542076e-05,
399
+ "loss": 0.8188,
400
+ "step": 4700
401
+ },
402
+ {
403
+ "epoch": 94.12,
404
+ "learning_rate": 1.2422700587084147e-05,
405
+ "loss": 0.817,
406
+ "step": 4800
407
+ },
408
+ {
409
+ "epoch": 94.12,
410
+ "eval_cer": 0.10721692460007295,
411
+ "eval_loss": 0.4184626638889313,
412
+ "eval_runtime": 112.2012,
413
+ "eval_samples_per_second": 24.92,
414
+ "eval_steps_per_second": 0.392,
415
+ "eval_wer": 0.323105275282193,
416
+ "step": 4800
417
+ },
418
+ {
419
+ "epoch": 96.08,
420
+ "learning_rate": 9.634050880626226e-06,
421
+ "loss": 0.8126,
422
+ "step": 4900
423
+ },
424
+ {
425
+ "epoch": 98.04,
426
+ "learning_rate": 6.845401174168297e-06,
427
+ "loss": 0.8105,
428
+ "step": 5000
429
+ },
430
+ {
431
+ "epoch": 99.99,
432
+ "learning_rate": 4.056751467710376e-06,
433
+ "loss": 0.7959,
434
+ "step": 5100
435
+ },
436
+ {
437
+ "epoch": 100.16,
438
+ "step": 5108,
439
+ "total_flos": 8.553278225483388e+19,
440
+ "train_loss": 1.6543282796931436,
441
+ "train_runtime": 29157.3737,
442
+ "train_samples_per_second": 22.424,
443
+ "train_steps_per_second": 0.175
444
+ }
445
+ ],
446
+ "max_steps": 5108,
447
+ "num_train_epochs": 101,
448
+ "total_flos": 8.553278225483388e+19,
449
+ "trial_name": null,
450
+ "trial_params": null
451
+ }