HorcruxNo13 commited on
Commit
c4b45d7
·
verified ·
1 Parent(s): 2d317e6

End of training

Browse files
Files changed (3) hide show
  1. all_results.json +8 -0
  2. train_results.json +8 -0
  3. trainer_state.json +516 -0
all_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.0,
3
+ "total_flos": 1.2659877490145034e+18,
4
+ "train_loss": 0.10912525819407569,
5
+ "train_runtime": 949.2365,
6
+ "train_samples_per_second": 21.523,
7
+ "train_steps_per_second": 0.095
8
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 36.0,
3
+ "total_flos": 1.2659877490145034e+18,
4
+ "train_loss": 0.10912525819407569,
5
+ "train_runtime": 949.2365,
6
+ "train_samples_per_second": 21.523,
7
+ "train_steps_per_second": 0.095
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.8627450980392157,
3
+ "best_model_checkpoint": "beit-base-patch16-224/checkpoint-47",
4
+ "epoch": 36.0,
5
+ "eval_steps": 500,
6
+ "global_step": 90,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.8,
13
+ "eval_accuracy": 0.5882352941176471,
14
+ "eval_f1_score": 0.554074074074074,
15
+ "eval_loss": 0.6992508172988892,
16
+ "eval_precision": 0.5390243902439025,
17
+ "eval_recall": 0.5882352941176471,
18
+ "eval_runtime": 0.8655,
19
+ "eval_samples_per_second": 58.922,
20
+ "eval_steps_per_second": 2.311,
21
+ "step": 2
22
+ },
23
+ {
24
+ "epoch": 2.0,
25
+ "eval_accuracy": 0.6862745098039216,
26
+ "eval_f1_score": 0.6032520325203252,
27
+ "eval_loss": 0.5970537662506104,
28
+ "eval_precision": 0.6805555555555555,
29
+ "eval_recall": 0.6862745098039216,
30
+ "eval_runtime": 0.8959,
31
+ "eval_samples_per_second": 56.925,
32
+ "eval_steps_per_second": 2.232,
33
+ "step": 5
34
+ },
35
+ {
36
+ "epoch": 2.8,
37
+ "eval_accuracy": 0.803921568627451,
38
+ "eval_f1_score": 0.800595238095238,
39
+ "eval_loss": 0.5305531024932861,
40
+ "eval_precision": 0.7999999999999999,
41
+ "eval_recall": 0.803921568627451,
42
+ "eval_runtime": 0.9046,
43
+ "eval_samples_per_second": 56.379,
44
+ "eval_steps_per_second": 2.211,
45
+ "step": 7
46
+ },
47
+ {
48
+ "epoch": 4.0,
49
+ "eval_accuracy": 0.7254901960784313,
50
+ "eval_f1_score": 0.6858974358974359,
51
+ "eval_loss": 0.48283636569976807,
52
+ "eval_precision": 0.722943722943723,
53
+ "eval_recall": 0.7254901960784313,
54
+ "eval_runtime": 0.9029,
55
+ "eval_samples_per_second": 56.482,
56
+ "eval_steps_per_second": 2.215,
57
+ "step": 10
58
+ },
59
+ {
60
+ "epoch": 4.8,
61
+ "eval_accuracy": 0.7843137254901961,
62
+ "eval_f1_score": 0.7784340451310011,
63
+ "eval_loss": 0.3811856508255005,
64
+ "eval_precision": 0.7786357786357786,
65
+ "eval_recall": 0.7843137254901961,
66
+ "eval_runtime": 0.92,
67
+ "eval_samples_per_second": 55.436,
68
+ "eval_steps_per_second": 2.174,
69
+ "step": 12
70
+ },
71
+ {
72
+ "epoch": 6.0,
73
+ "grad_norm": 4.578022480010986,
74
+ "learning_rate": 4.62962962962963e-05,
75
+ "loss": 0.5413,
76
+ "step": 15
77
+ },
78
+ {
79
+ "epoch": 6.0,
80
+ "eval_accuracy": 0.7450980392156863,
81
+ "eval_f1_score": 0.7141125541125543,
82
+ "eval_loss": 0.5268120765686035,
83
+ "eval_precision": 0.7461240310077519,
84
+ "eval_recall": 0.7450980392156863,
85
+ "eval_runtime": 0.9096,
86
+ "eval_samples_per_second": 56.066,
87
+ "eval_steps_per_second": 2.199,
88
+ "step": 15
89
+ },
90
+ {
91
+ "epoch": 6.8,
92
+ "eval_accuracy": 0.7450980392156863,
93
+ "eval_f1_score": 0.7502256608639587,
94
+ "eval_loss": 0.5349109768867493,
95
+ "eval_precision": 0.8555555555555555,
96
+ "eval_recall": 0.7450980392156863,
97
+ "eval_runtime": 0.9137,
98
+ "eval_samples_per_second": 55.818,
99
+ "eval_steps_per_second": 2.189,
100
+ "step": 17
101
+ },
102
+ {
103
+ "epoch": 8.0,
104
+ "eval_accuracy": 0.803921568627451,
105
+ "eval_f1_score": 0.7756410256410257,
106
+ "eval_loss": 0.4119790494441986,
107
+ "eval_precision": 0.8484848484848485,
108
+ "eval_recall": 0.803921568627451,
109
+ "eval_runtime": 0.9237,
110
+ "eval_samples_per_second": 55.215,
111
+ "eval_steps_per_second": 2.165,
112
+ "step": 20
113
+ },
114
+ {
115
+ "epoch": 8.8,
116
+ "eval_accuracy": 0.803921568627451,
117
+ "eval_f1_score": 0.7962962962962962,
118
+ "eval_loss": 0.3156317472457886,
119
+ "eval_precision": 0.8002699055330634,
120
+ "eval_recall": 0.803921568627451,
121
+ "eval_runtime": 0.9335,
122
+ "eval_samples_per_second": 54.63,
123
+ "eval_steps_per_second": 2.142,
124
+ "step": 22
125
+ },
126
+ {
127
+ "epoch": 10.0,
128
+ "eval_accuracy": 0.803921568627451,
129
+ "eval_f1_score": 0.7908622908622909,
130
+ "eval_loss": 0.3216821253299713,
131
+ "eval_precision": 0.806060606060606,
132
+ "eval_recall": 0.803921568627451,
133
+ "eval_runtime": 0.9256,
134
+ "eval_samples_per_second": 55.1,
135
+ "eval_steps_per_second": 2.161,
136
+ "step": 25
137
+ },
138
+ {
139
+ "epoch": 10.8,
140
+ "eval_accuracy": 0.7843137254901961,
141
+ "eval_f1_score": 0.7664197530864199,
142
+ "eval_loss": 0.5160595774650574,
143
+ "eval_precision": 0.7869918699186993,
144
+ "eval_recall": 0.7843137254901961,
145
+ "eval_runtime": 0.9267,
146
+ "eval_samples_per_second": 55.031,
147
+ "eval_steps_per_second": 2.158,
148
+ "step": 27
149
+ },
150
+ {
151
+ "epoch": 12.0,
152
+ "grad_norm": 3.5482540130615234,
153
+ "learning_rate": 3.7037037037037037e-05,
154
+ "loss": 0.0919,
155
+ "step": 30
156
+ },
157
+ {
158
+ "epoch": 12.0,
159
+ "eval_accuracy": 0.8431372549019608,
160
+ "eval_f1_score": 0.845117845117845,
161
+ "eval_loss": 0.36771491169929504,
162
+ "eval_precision": 0.849780701754386,
163
+ "eval_recall": 0.8431372549019608,
164
+ "eval_runtime": 0.942,
165
+ "eval_samples_per_second": 54.142,
166
+ "eval_steps_per_second": 2.123,
167
+ "step": 30
168
+ },
169
+ {
170
+ "epoch": 12.8,
171
+ "eval_accuracy": 0.8431372549019608,
172
+ "eval_f1_score": 0.8404761904761906,
173
+ "eval_loss": 0.46310773491859436,
174
+ "eval_precision": 0.8407407407407408,
175
+ "eval_recall": 0.8431372549019608,
176
+ "eval_runtime": 0.9403,
177
+ "eval_samples_per_second": 54.24,
178
+ "eval_steps_per_second": 2.127,
179
+ "step": 32
180
+ },
181
+ {
182
+ "epoch": 14.0,
183
+ "eval_accuracy": 0.8235294117647058,
184
+ "eval_f1_score": 0.8221343873517787,
185
+ "eval_loss": 0.5000560879707336,
186
+ "eval_precision": 0.8214285714285714,
187
+ "eval_recall": 0.8235294117647058,
188
+ "eval_runtime": 0.9615,
189
+ "eval_samples_per_second": 53.039,
190
+ "eval_steps_per_second": 2.08,
191
+ "step": 35
192
+ },
193
+ {
194
+ "epoch": 14.8,
195
+ "eval_accuracy": 0.8431372549019608,
196
+ "eval_f1_score": 0.8431372549019608,
197
+ "eval_loss": 0.4489041268825531,
198
+ "eval_precision": 0.8431372549019608,
199
+ "eval_recall": 0.8431372549019608,
200
+ "eval_runtime": 0.9337,
201
+ "eval_samples_per_second": 54.621,
202
+ "eval_steps_per_second": 2.142,
203
+ "step": 37
204
+ },
205
+ {
206
+ "epoch": 16.0,
207
+ "eval_accuracy": 0.7843137254901961,
208
+ "eval_f1_score": 0.7731065973862385,
209
+ "eval_loss": 0.5892294049263,
210
+ "eval_precision": 0.7799145299145298,
211
+ "eval_recall": 0.7843137254901961,
212
+ "eval_runtime": 1.0872,
213
+ "eval_samples_per_second": 46.909,
214
+ "eval_steps_per_second": 1.84,
215
+ "step": 40
216
+ },
217
+ {
218
+ "epoch": 16.8,
219
+ "eval_accuracy": 0.7843137254901961,
220
+ "eval_f1_score": 0.7731065973862385,
221
+ "eval_loss": 0.6578794717788696,
222
+ "eval_precision": 0.7799145299145298,
223
+ "eval_recall": 0.7843137254901961,
224
+ "eval_runtime": 0.9215,
225
+ "eval_samples_per_second": 55.345,
226
+ "eval_steps_per_second": 2.17,
227
+ "step": 42
228
+ },
229
+ {
230
+ "epoch": 18.0,
231
+ "grad_norm": 3.25277042388916,
232
+ "learning_rate": 2.777777777777778e-05,
233
+ "loss": 0.006,
234
+ "step": 45
235
+ },
236
+ {
237
+ "epoch": 18.0,
238
+ "eval_accuracy": 0.7843137254901961,
239
+ "eval_f1_score": 0.7731065973862385,
240
+ "eval_loss": 0.703818678855896,
241
+ "eval_precision": 0.7799145299145298,
242
+ "eval_recall": 0.7843137254901961,
243
+ "eval_runtime": 1.0077,
244
+ "eval_samples_per_second": 50.61,
245
+ "eval_steps_per_second": 1.985,
246
+ "step": 45
247
+ },
248
+ {
249
+ "epoch": 18.8,
250
+ "eval_accuracy": 0.8627450980392157,
251
+ "eval_f1_score": 0.865142065142065,
252
+ "eval_loss": 0.5864243507385254,
253
+ "eval_precision": 0.8736559139784946,
254
+ "eval_recall": 0.8627450980392157,
255
+ "eval_runtime": 0.9259,
256
+ "eval_samples_per_second": 55.08,
257
+ "eval_steps_per_second": 2.16,
258
+ "step": 47
259
+ },
260
+ {
261
+ "epoch": 20.0,
262
+ "eval_accuracy": 0.8627450980392157,
263
+ "eval_f1_score": 0.865142065142065,
264
+ "eval_loss": 0.5488199591636658,
265
+ "eval_precision": 0.8736559139784946,
266
+ "eval_recall": 0.8627450980392157,
267
+ "eval_runtime": 0.9318,
268
+ "eval_samples_per_second": 54.735,
269
+ "eval_steps_per_second": 2.146,
270
+ "step": 50
271
+ },
272
+ {
273
+ "epoch": 20.8,
274
+ "eval_accuracy": 0.803921568627451,
275
+ "eval_f1_score": 0.7962962962962962,
276
+ "eval_loss": 0.6650967597961426,
277
+ "eval_precision": 0.8002699055330634,
278
+ "eval_recall": 0.803921568627451,
279
+ "eval_runtime": 0.9328,
280
+ "eval_samples_per_second": 54.677,
281
+ "eval_steps_per_second": 2.144,
282
+ "step": 52
283
+ },
284
+ {
285
+ "epoch": 22.0,
286
+ "eval_accuracy": 0.803921568627451,
287
+ "eval_f1_score": 0.800595238095238,
288
+ "eval_loss": 0.6264931559562683,
289
+ "eval_precision": 0.7999999999999999,
290
+ "eval_recall": 0.803921568627451,
291
+ "eval_runtime": 0.9317,
292
+ "eval_samples_per_second": 54.741,
293
+ "eval_steps_per_second": 2.147,
294
+ "step": 55
295
+ },
296
+ {
297
+ "epoch": 22.8,
298
+ "eval_accuracy": 0.8627450980392157,
299
+ "eval_f1_score": 0.8636815920398009,
300
+ "eval_loss": 0.5228903889656067,
301
+ "eval_precision": 0.8653198653198653,
302
+ "eval_recall": 0.8627450980392157,
303
+ "eval_runtime": 0.9295,
304
+ "eval_samples_per_second": 54.868,
305
+ "eval_steps_per_second": 2.152,
306
+ "step": 57
307
+ },
308
+ {
309
+ "epoch": 24.0,
310
+ "grad_norm": 0.0452270582318306,
311
+ "learning_rate": 1.8518518518518518e-05,
312
+ "loss": 0.0048,
313
+ "step": 60
314
+ },
315
+ {
316
+ "epoch": 24.0,
317
+ "eval_accuracy": 0.8627450980392157,
318
+ "eval_f1_score": 0.8636815920398009,
319
+ "eval_loss": 0.542142927646637,
320
+ "eval_precision": 0.8653198653198653,
321
+ "eval_recall": 0.8627450980392157,
322
+ "eval_runtime": 0.9409,
323
+ "eval_samples_per_second": 54.206,
324
+ "eval_steps_per_second": 2.126,
325
+ "step": 60
326
+ },
327
+ {
328
+ "epoch": 24.8,
329
+ "eval_accuracy": 0.8235294117647058,
330
+ "eval_f1_score": 0.8187187641980918,
331
+ "eval_loss": 0.6334545016288757,
332
+ "eval_precision": 0.8204633204633205,
333
+ "eval_recall": 0.8235294117647058,
334
+ "eval_runtime": 0.9368,
335
+ "eval_samples_per_second": 54.438,
336
+ "eval_steps_per_second": 2.135,
337
+ "step": 62
338
+ },
339
+ {
340
+ "epoch": 26.0,
341
+ "eval_accuracy": 0.803921568627451,
342
+ "eval_f1_score": 0.7840755735492576,
343
+ "eval_loss": 1.0379055738449097,
344
+ "eval_precision": 0.82010582010582,
345
+ "eval_recall": 0.803921568627451,
346
+ "eval_runtime": 0.927,
347
+ "eval_samples_per_second": 55.015,
348
+ "eval_steps_per_second": 2.157,
349
+ "step": 65
350
+ },
351
+ {
352
+ "epoch": 26.8,
353
+ "eval_accuracy": 0.8235294117647058,
354
+ "eval_f1_score": 0.808888888888889,
355
+ "eval_loss": 0.9758451581001282,
356
+ "eval_precision": 0.8365853658536586,
357
+ "eval_recall": 0.8235294117647058,
358
+ "eval_runtime": 0.927,
359
+ "eval_samples_per_second": 55.017,
360
+ "eval_steps_per_second": 2.158,
361
+ "step": 67
362
+ },
363
+ {
364
+ "epoch": 28.0,
365
+ "eval_accuracy": 0.8235294117647058,
366
+ "eval_f1_score": 0.8187187641980918,
367
+ "eval_loss": 0.6116669774055481,
368
+ "eval_precision": 0.8204633204633205,
369
+ "eval_recall": 0.8235294117647058,
370
+ "eval_runtime": 0.9261,
371
+ "eval_samples_per_second": 55.07,
372
+ "eval_steps_per_second": 2.16,
373
+ "step": 70
374
+ },
375
+ {
376
+ "epoch": 28.8,
377
+ "eval_accuracy": 0.8627450980392157,
378
+ "eval_f1_score": 0.8616600790513834,
379
+ "eval_loss": 0.540273904800415,
380
+ "eval_precision": 0.8613095238095237,
381
+ "eval_recall": 0.8627450980392157,
382
+ "eval_runtime": 0.9247,
383
+ "eval_samples_per_second": 55.15,
384
+ "eval_steps_per_second": 2.163,
385
+ "step": 72
386
+ },
387
+ {
388
+ "epoch": 30.0,
389
+ "grad_norm": 0.026938632130622864,
390
+ "learning_rate": 9.259259259259259e-06,
391
+ "loss": 0.0063,
392
+ "step": 75
393
+ },
394
+ {
395
+ "epoch": 30.0,
396
+ "eval_accuracy": 0.8431372549019608,
397
+ "eval_f1_score": 0.8404761904761906,
398
+ "eval_loss": 0.6468568444252014,
399
+ "eval_precision": 0.8407407407407408,
400
+ "eval_recall": 0.8431372549019608,
401
+ "eval_runtime": 0.9235,
402
+ "eval_samples_per_second": 55.223,
403
+ "eval_steps_per_second": 2.166,
404
+ "step": 75
405
+ },
406
+ {
407
+ "epoch": 30.8,
408
+ "eval_accuracy": 0.8235294117647058,
409
+ "eval_f1_score": 0.8187187641980918,
410
+ "eval_loss": 0.7013790607452393,
411
+ "eval_precision": 0.8204633204633205,
412
+ "eval_recall": 0.8235294117647058,
413
+ "eval_runtime": 1.1388,
414
+ "eval_samples_per_second": 44.785,
415
+ "eval_steps_per_second": 1.756,
416
+ "step": 77
417
+ },
418
+ {
419
+ "epoch": 32.0,
420
+ "eval_accuracy": 0.8235294117647058,
421
+ "eval_f1_score": 0.8187187641980918,
422
+ "eval_loss": 0.7514360547065735,
423
+ "eval_precision": 0.8204633204633205,
424
+ "eval_recall": 0.8235294117647058,
425
+ "eval_runtime": 0.9424,
426
+ "eval_samples_per_second": 54.118,
427
+ "eval_steps_per_second": 2.122,
428
+ "step": 80
429
+ },
430
+ {
431
+ "epoch": 32.8,
432
+ "eval_accuracy": 0.8235294117647058,
433
+ "eval_f1_score": 0.8143599433160132,
434
+ "eval_loss": 0.7771488428115845,
435
+ "eval_precision": 0.8247863247863249,
436
+ "eval_recall": 0.8235294117647058,
437
+ "eval_runtime": 0.9338,
438
+ "eval_samples_per_second": 54.616,
439
+ "eval_steps_per_second": 2.142,
440
+ "step": 82
441
+ },
442
+ {
443
+ "epoch": 34.0,
444
+ "eval_accuracy": 0.803921568627451,
445
+ "eval_f1_score": 0.7962962962962962,
446
+ "eval_loss": 0.7598747611045837,
447
+ "eval_precision": 0.8002699055330634,
448
+ "eval_recall": 0.803921568627451,
449
+ "eval_runtime": 0.9331,
450
+ "eval_samples_per_second": 54.655,
451
+ "eval_steps_per_second": 2.143,
452
+ "step": 85
453
+ },
454
+ {
455
+ "epoch": 34.8,
456
+ "eval_accuracy": 0.803921568627451,
457
+ "eval_f1_score": 0.7962962962962962,
458
+ "eval_loss": 0.7554459571838379,
459
+ "eval_precision": 0.8002699055330634,
460
+ "eval_recall": 0.803921568627451,
461
+ "eval_runtime": 0.9307,
462
+ "eval_samples_per_second": 54.796,
463
+ "eval_steps_per_second": 2.149,
464
+ "step": 87
465
+ },
466
+ {
467
+ "epoch": 36.0,
468
+ "grad_norm": 0.014645076356828213,
469
+ "learning_rate": 0.0,
470
+ "loss": 0.0045,
471
+ "step": 90
472
+ },
473
+ {
474
+ "epoch": 36.0,
475
+ "eval_accuracy": 0.803921568627451,
476
+ "eval_f1_score": 0.7962962962962962,
477
+ "eval_loss": 0.7308478951454163,
478
+ "eval_precision": 0.8002699055330634,
479
+ "eval_recall": 0.803921568627451,
480
+ "eval_runtime": 0.9231,
481
+ "eval_samples_per_second": 55.246,
482
+ "eval_steps_per_second": 2.167,
483
+ "step": 90
484
+ },
485
+ {
486
+ "epoch": 36.0,
487
+ "step": 90,
488
+ "total_flos": 1.2659877490145034e+18,
489
+ "train_loss": 0.10912525819407569,
490
+ "train_runtime": 949.2365,
491
+ "train_samples_per_second": 21.523,
492
+ "train_steps_per_second": 0.095
493
+ },
494
+ {
495
+ "epoch": 36.0,
496
+ "eval_accuracy": 0.8267716535433071,
497
+ "eval_f1_score": 0.8283048858023182,
498
+ "eval_loss": 0.8527529239654541,
499
+ "eval_precision": 0.8302904444636728,
500
+ "eval_recall": 0.8267716535433071,
501
+ "eval_runtime": 2.5545,
502
+ "eval_samples_per_second": 49.716,
503
+ "eval_steps_per_second": 1.174,
504
+ "step": 90
505
+ }
506
+ ],
507
+ "logging_steps": 15,
508
+ "max_steps": 90,
509
+ "num_input_tokens_seen": 0,
510
+ "num_train_epochs": 45,
511
+ "save_steps": 500,
512
+ "total_flos": 1.2659877490145034e+18,
513
+ "train_batch_size": 48,
514
+ "trial_name": null,
515
+ "trial_params": null
516
+ }