bert_uncased_tiny-multi-emails-hq / trainer_state.json
pszemraj's picture
End of training
8f37d6c
raw
history blame
47.8 kB
{
"best_metric": 0.47660093674971465,
"best_model_checkpoint": "./runtime-masked/bert_uncased_L-2_H-128_A-2-mlm-multi-emails-hq/checkpoint-987",
"epoch": 7.994708994708994,
"global_step": 1128,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"learning_rate": 1.5789473684210522e-05,
"loss": 5.0353,
"step": 3
},
{
"epoch": 0.04,
"learning_rate": 3.1578947368421045e-05,
"loss": 5.0947,
"step": 6
},
{
"epoch": 0.06,
"learning_rate": 4.7368421052631574e-05,
"loss": 5.0512,
"step": 9
},
{
"epoch": 0.08,
"learning_rate": 6.315789473684209e-05,
"loss": 4.9084,
"step": 12
},
{
"epoch": 0.11,
"learning_rate": 7.894736842105262e-05,
"loss": 4.9194,
"step": 15
},
{
"epoch": 0.13,
"learning_rate": 9.473684210526315e-05,
"loss": 4.8347,
"step": 18
},
{
"epoch": 0.15,
"learning_rate": 0.00011052631578947366,
"loss": 4.7759,
"step": 21
},
{
"epoch": 0.17,
"learning_rate": 0.00012631578947368418,
"loss": 4.7372,
"step": 24
},
{
"epoch": 0.19,
"learning_rate": 0.0001421052631578947,
"loss": 4.7002,
"step": 27
},
{
"epoch": 0.21,
"learning_rate": 0.00015789473684210524,
"loss": 4.6299,
"step": 30
},
{
"epoch": 0.23,
"learning_rate": 0.0001736842105263158,
"loss": 4.5503,
"step": 33
},
{
"epoch": 0.25,
"learning_rate": 0.0001894736842105263,
"loss": 4.5794,
"step": 36
},
{
"epoch": 0.28,
"learning_rate": 0.00020526315789473683,
"loss": 4.4404,
"step": 39
},
{
"epoch": 0.3,
"learning_rate": 0.00022105263157894733,
"loss": 4.5015,
"step": 42
},
{
"epoch": 0.32,
"learning_rate": 0.00023684210526315788,
"loss": 4.3954,
"step": 45
},
{
"epoch": 0.34,
"learning_rate": 0.00025263157894736836,
"loss": 4.3344,
"step": 48
},
{
"epoch": 0.36,
"learning_rate": 0.0002684210526315789,
"loss": 4.3077,
"step": 51
},
{
"epoch": 0.38,
"learning_rate": 0.0002842105263157894,
"loss": 4.275,
"step": 54
},
{
"epoch": 0.4,
"learning_rate": 0.0003,
"loss": 4.2386,
"step": 57
},
{
"epoch": 0.42,
"learning_rate": 0.00029999419206464364,
"loss": 4.2538,
"step": 60
},
{
"epoch": 0.44,
"learning_rate": 0.0002999767687083362,
"loss": 4.1491,
"step": 63
},
{
"epoch": 0.47,
"learning_rate": 0.0002999477312803273,
"loss": 4.1288,
"step": 66
},
{
"epoch": 0.49,
"learning_rate": 0.00029990708202925034,
"loss": 4.1081,
"step": 69
},
{
"epoch": 0.51,
"learning_rate": 0.0002998548241029483,
"loss": 4.1276,
"step": 72
},
{
"epoch": 0.53,
"learning_rate": 0.00029979096154823,
"loss": 4.0664,
"step": 75
},
{
"epoch": 0.55,
"learning_rate": 0.0002997154993105566,
"loss": 4.0946,
"step": 78
},
{
"epoch": 0.57,
"learning_rate": 0.0002996284432336587,
"loss": 4.0326,
"step": 81
},
{
"epoch": 0.59,
"learning_rate": 0.00029952980005908385,
"loss": 4.1449,
"step": 84
},
{
"epoch": 0.61,
"learning_rate": 0.0002994195774256746,
"loss": 4.0859,
"step": 87
},
{
"epoch": 0.63,
"learning_rate": 0.0002992977838689765,
"loss": 4.052,
"step": 90
},
{
"epoch": 0.66,
"learning_rate": 0.0002991644288205777,
"loss": 3.9777,
"step": 93
},
{
"epoch": 0.68,
"learning_rate": 0.00029901952260737825,
"loss": 4.0233,
"step": 96
},
{
"epoch": 0.7,
"learning_rate": 0.0002988630764507904,
"loss": 3.9186,
"step": 99
},
{
"epoch": 0.72,
"learning_rate": 0.0002986951024658695,
"loss": 3.9352,
"step": 102
},
{
"epoch": 0.74,
"learning_rate": 0.0002985156136603764,
"loss": 3.9825,
"step": 105
},
{
"epoch": 0.76,
"learning_rate": 0.0002983246239337692,
"loss": 3.9961,
"step": 108
},
{
"epoch": 0.78,
"learning_rate": 0.0002981221480761281,
"loss": 3.9801,
"step": 111
},
{
"epoch": 0.8,
"learning_rate": 0.0002979082017670087,
"loss": 3.9136,
"step": 114
},
{
"epoch": 0.83,
"learning_rate": 0.00029768280157422883,
"loss": 3.8618,
"step": 117
},
{
"epoch": 0.85,
"learning_rate": 0.00029744596495258525,
"loss": 3.8953,
"step": 120
},
{
"epoch": 0.87,
"learning_rate": 0.00029719771024250166,
"loss": 3.95,
"step": 123
},
{
"epoch": 0.89,
"learning_rate": 0.000296938056668609,
"loss": 3.8829,
"step": 126
},
{
"epoch": 0.91,
"learning_rate": 0.0002966670243382561,
"loss": 3.8937,
"step": 129
},
{
"epoch": 0.93,
"learning_rate": 0.00029638463423995305,
"loss": 3.8596,
"step": 132
},
{
"epoch": 0.95,
"learning_rate": 0.0002960909082417457,
"loss": 3.8494,
"step": 135
},
{
"epoch": 0.97,
"learning_rate": 0.0002957858690895221,
"loss": 3.8545,
"step": 138
},
{
"epoch": 0.99,
"learning_rate": 0.0002954695404052514,
"loss": 3.8974,
"step": 141
},
{
"epoch": 0.99,
"eval_accuracy": 0.42182521025962494,
"eval_loss": 3.5129101276397705,
"eval_runtime": 7.1205,
"eval_samples_per_second": 283.265,
"eval_steps_per_second": 141.703,
"step": 141
},
{
"epoch": 1.02,
"learning_rate": 0.00029514194668515416,
"loss": 4.7932,
"step": 144
},
{
"epoch": 1.04,
"learning_rate": 0.00029480311329780576,
"loss": 3.8152,
"step": 147
},
{
"epoch": 1.06,
"learning_rate": 0.0002944530664821717,
"loss": 3.7853,
"step": 150
},
{
"epoch": 1.08,
"learning_rate": 0.00029409183334557556,
"loss": 3.7879,
"step": 153
},
{
"epoch": 1.11,
"learning_rate": 0.00029371944186160016,
"loss": 3.8584,
"step": 156
},
{
"epoch": 1.13,
"learning_rate": 0.00029333592086792107,
"loss": 3.8126,
"step": 159
},
{
"epoch": 1.15,
"learning_rate": 0.0002929413000640735,
"loss": 3.8505,
"step": 162
},
{
"epoch": 1.17,
"learning_rate": 0.0002925356100091522,
"loss": 3.8638,
"step": 165
},
{
"epoch": 1.19,
"learning_rate": 0.00029211888211944553,
"loss": 3.8271,
"step": 168
},
{
"epoch": 1.21,
"learning_rate": 0.0002916911486660021,
"loss": 3.76,
"step": 171
},
{
"epoch": 1.23,
"learning_rate": 0.0002912524427721317,
"loss": 3.7647,
"step": 174
},
{
"epoch": 1.25,
"learning_rate": 0.00029080279841084075,
"loss": 3.7656,
"step": 177
},
{
"epoch": 1.28,
"learning_rate": 0.000290342250402201,
"loss": 3.7425,
"step": 180
},
{
"epoch": 1.3,
"learning_rate": 0.0002898708344106533,
"loss": 3.7809,
"step": 183
},
{
"epoch": 1.32,
"learning_rate": 0.00028938858694224574,
"loss": 3.8349,
"step": 186
},
{
"epoch": 1.34,
"learning_rate": 0.0002888955453418066,
"loss": 3.7542,
"step": 189
},
{
"epoch": 1.36,
"learning_rate": 0.0002883917477900524,
"loss": 3.7652,
"step": 192
},
{
"epoch": 1.38,
"learning_rate": 0.0002878772333006314,
"loss": 3.7324,
"step": 195
},
{
"epoch": 1.4,
"learning_rate": 0.0002873520417171021,
"loss": 3.7123,
"step": 198
},
{
"epoch": 1.42,
"learning_rate": 0.00028681621370984817,
"loss": 3.7046,
"step": 201
},
{
"epoch": 1.44,
"learning_rate": 0.0002862697907729285,
"loss": 3.6747,
"step": 204
},
{
"epoch": 1.47,
"learning_rate": 0.00028571281522086453,
"loss": 3.6714,
"step": 207
},
{
"epoch": 1.49,
"learning_rate": 0.0002851453301853628,
"loss": 3.7639,
"step": 210
},
{
"epoch": 1.51,
"learning_rate": 0.0002845673796119755,
"loss": 3.811,
"step": 213
},
{
"epoch": 1.53,
"learning_rate": 0.0002839790082566967,
"loss": 3.7407,
"step": 216
},
{
"epoch": 1.55,
"learning_rate": 0.00028338026168249714,
"loss": 3.7508,
"step": 219
},
{
"epoch": 1.57,
"learning_rate": 0.0002827711862557954,
"loss": 3.6624,
"step": 222
},
{
"epoch": 1.59,
"learning_rate": 0.00028215182914286766,
"loss": 3.7081,
"step": 225
},
{
"epoch": 1.61,
"learning_rate": 0.0002815222383061948,
"loss": 3.7027,
"step": 228
},
{
"epoch": 1.63,
"learning_rate": 0.00028088246250074857,
"loss": 3.7365,
"step": 231
},
{
"epoch": 1.66,
"learning_rate": 0.0002802325512702159,
"loss": 3.6906,
"step": 234
},
{
"epoch": 1.68,
"learning_rate": 0.0002795725549431624,
"loss": 3.7296,
"step": 237
},
{
"epoch": 1.7,
"learning_rate": 0.0002789025246291347,
"loss": 3.6572,
"step": 240
},
{
"epoch": 1.72,
"learning_rate": 0.0002782225122147029,
"loss": 3.7676,
"step": 243
},
{
"epoch": 1.74,
"learning_rate": 0.0002775325703594421,
"loss": 3.7506,
"step": 246
},
{
"epoch": 1.76,
"learning_rate": 0.00027683275249185504,
"loss": 3.6966,
"step": 249
},
{
"epoch": 1.78,
"learning_rate": 0.0002761231128052341,
"loss": 3.6229,
"step": 252
},
{
"epoch": 1.8,
"learning_rate": 0.00027540370625346507,
"loss": 3.6964,
"step": 255
},
{
"epoch": 1.83,
"learning_rate": 0.0002746745885467712,
"loss": 3.6367,
"step": 258
},
{
"epoch": 1.85,
"learning_rate": 0.00027393581614739923,
"loss": 3.6899,
"step": 261
},
{
"epoch": 1.87,
"learning_rate": 0.000273187446265247,
"loss": 3.661,
"step": 264
},
{
"epoch": 1.89,
"learning_rate": 0.00027242953685343327,
"loss": 3.7046,
"step": 267
},
{
"epoch": 1.91,
"learning_rate": 0.0002716621466038095,
"loss": 3.6271,
"step": 270
},
{
"epoch": 1.93,
"learning_rate": 0.0002708853349424152,
"loss": 3.5838,
"step": 273
},
{
"epoch": 1.95,
"learning_rate": 0.000270099162024876,
"loss": 3.6223,
"step": 276
},
{
"epoch": 1.97,
"learning_rate": 0.0002693036887317449,
"loss": 3.6492,
"step": 279
},
{
"epoch": 1.99,
"learning_rate": 0.0002684989766637882,
"loss": 3.7009,
"step": 282
},
{
"epoch": 1.99,
"eval_accuracy": 0.44521651429020515,
"eval_loss": 3.329479217529297,
"eval_runtime": 7.098,
"eval_samples_per_second": 284.164,
"eval_steps_per_second": 142.153,
"step": 282
},
{
"epoch": 2.02,
"learning_rate": 0.0002676850881372147,
"loss": 4.5343,
"step": 285
},
{
"epoch": 2.04,
"learning_rate": 0.00026686208617885055,
"loss": 3.6713,
"step": 288
},
{
"epoch": 2.06,
"learning_rate": 0.0002660300345212579,
"loss": 3.5313,
"step": 291
},
{
"epoch": 2.08,
"learning_rate": 0.0002651889975978001,
"loss": 3.6284,
"step": 294
},
{
"epoch": 2.11,
"learning_rate": 0.0002643390405376515,
"loss": 3.6498,
"step": 297
},
{
"epoch": 2.13,
"learning_rate": 0.00026348022916075436,
"loss": 3.5984,
"step": 300
},
{
"epoch": 2.15,
"learning_rate": 0.00026261262997272135,
"loss": 3.6395,
"step": 303
},
{
"epoch": 2.17,
"learning_rate": 0.00026173631015968585,
"loss": 3.6397,
"step": 306
},
{
"epoch": 2.19,
"learning_rate": 0.00026085133758309883,
"loss": 3.6392,
"step": 309
},
{
"epoch": 2.21,
"learning_rate": 0.0002599577807744739,
"loss": 3.6054,
"step": 312
},
{
"epoch": 2.23,
"learning_rate": 0.0002590557089300802,
"loss": 3.649,
"step": 315
},
{
"epoch": 2.25,
"learning_rate": 0.0002581451919055837,
"loss": 3.5249,
"step": 318
},
{
"epoch": 2.28,
"learning_rate": 0.0002572263002106381,
"loss": 3.599,
"step": 321
},
{
"epoch": 2.3,
"learning_rate": 0.00025629910500342423,
"loss": 3.6279,
"step": 324
},
{
"epoch": 2.32,
"learning_rate": 0.00025536367808513973,
"loss": 3.5986,
"step": 327
},
{
"epoch": 2.34,
"learning_rate": 0.000254420091894439,
"loss": 3.613,
"step": 330
},
{
"epoch": 2.36,
"learning_rate": 0.0002534684195018232,
"loss": 3.5667,
"step": 333
},
{
"epoch": 2.38,
"learning_rate": 0.0002525087346039822,
"loss": 3.5672,
"step": 336
},
{
"epoch": 2.4,
"learning_rate": 0.0002515411115180875,
"loss": 3.5449,
"step": 339
},
{
"epoch": 2.42,
"learning_rate": 0.00025056562517603666,
"loss": 3.5743,
"step": 342
},
{
"epoch": 2.44,
"learning_rate": 0.0002495823511186512,
"loss": 3.5829,
"step": 345
},
{
"epoch": 2.47,
"learning_rate": 0.0002485913654898268,
"loss": 3.6092,
"step": 348
},
{
"epoch": 2.49,
"learning_rate": 0.0002475927450306363,
"loss": 3.5628,
"step": 351
},
{
"epoch": 2.51,
"learning_rate": 0.0002465865670733873,
"loss": 3.613,
"step": 354
},
{
"epoch": 2.53,
"learning_rate": 0.00024557290953563366,
"loss": 3.5926,
"step": 357
},
{
"epoch": 2.55,
"learning_rate": 0.00024455185091414136,
"loss": 3.6004,
"step": 360
},
{
"epoch": 2.57,
"learning_rate": 0.00024352347027881003,
"loss": 3.6115,
"step": 363
},
{
"epoch": 2.59,
"learning_rate": 0.00024248784726654961,
"loss": 3.6296,
"step": 366
},
{
"epoch": 2.61,
"learning_rate": 0.0002414450620751136,
"loss": 3.567,
"step": 369
},
{
"epoch": 2.63,
"learning_rate": 0.00024039519545688846,
"loss": 3.5447,
"step": 372
},
{
"epoch": 2.66,
"learning_rate": 0.00023933832871264016,
"loss": 3.5445,
"step": 375
},
{
"epoch": 2.68,
"learning_rate": 0.00023827454368521844,
"loss": 3.602,
"step": 378
},
{
"epoch": 2.7,
"learning_rate": 0.00023720392275321895,
"loss": 3.5547,
"step": 381
},
{
"epoch": 2.72,
"learning_rate": 0.0002361265488246039,
"loss": 3.5888,
"step": 384
},
{
"epoch": 2.74,
"learning_rate": 0.0002350425053302817,
"loss": 3.555,
"step": 387
},
{
"epoch": 2.76,
"learning_rate": 0.0002339518762176462,
"loss": 3.5593,
"step": 390
},
{
"epoch": 2.78,
"learning_rate": 0.00023285474594407585,
"loss": 3.5636,
"step": 393
},
{
"epoch": 2.8,
"learning_rate": 0.00023175119947039342,
"loss": 3.5646,
"step": 396
},
{
"epoch": 2.83,
"learning_rate": 0.00023064132225428659,
"loss": 3.5013,
"step": 399
},
{
"epoch": 2.85,
"learning_rate": 0.0002295252002436904,
"loss": 3.4809,
"step": 402
},
{
"epoch": 2.87,
"learning_rate": 0.00022840291987013143,
"loss": 3.4732,
"step": 405
},
{
"epoch": 2.89,
"learning_rate": 0.00022727456804203436,
"loss": 3.4971,
"step": 408
},
{
"epoch": 2.91,
"learning_rate": 0.00022614023213799234,
"loss": 3.502,
"step": 411
},
{
"epoch": 2.93,
"learning_rate": 0.000225,
"loss": 3.5042,
"step": 414
},
{
"epoch": 2.95,
"learning_rate": 0.00022385395992665128,
"loss": 3.6605,
"step": 417
},
{
"epoch": 2.97,
"learning_rate": 0.0002227022006663018,
"loss": 3.5878,
"step": 420
},
{
"epoch": 2.99,
"learning_rate": 0.00022154481141019585,
"loss": 3.5845,
"step": 423
},
{
"epoch": 2.99,
"eval_accuracy": 0.45894501850558433,
"eval_loss": 3.2218551635742188,
"eval_runtime": 7.1194,
"eval_samples_per_second": 283.31,
"eval_steps_per_second": 141.725,
"step": 423
},
{
"epoch": 3.02,
"learning_rate": 0.00022038188178555992,
"loss": 4.4562,
"step": 426
},
{
"epoch": 3.04,
"learning_rate": 0.00021921350184866178,
"loss": 3.5152,
"step": 429
},
{
"epoch": 3.06,
"learning_rate": 0.0002180397620778366,
"loss": 3.5466,
"step": 432
},
{
"epoch": 3.08,
"learning_rate": 0.00021686075336648075,
"loss": 3.5805,
"step": 435
},
{
"epoch": 3.11,
"learning_rate": 0.00021567656701601247,
"loss": 3.4612,
"step": 438
},
{
"epoch": 3.13,
"learning_rate": 0.00021448729472880215,
"loss": 3.5251,
"step": 441
},
{
"epoch": 3.15,
"learning_rate": 0.00021329302860107063,
"loss": 3.5363,
"step": 444
},
{
"epoch": 3.17,
"learning_rate": 0.00021209386111575753,
"loss": 3.5013,
"step": 447
},
{
"epoch": 3.19,
"learning_rate": 0.00021088988513535931,
"loss": 3.542,
"step": 450
},
{
"epoch": 3.21,
"learning_rate": 0.0002096811938947381,
"loss": 3.5095,
"step": 453
},
{
"epoch": 3.23,
"learning_rate": 0.00020846788099390188,
"loss": 3.5267,
"step": 456
},
{
"epoch": 3.25,
"learning_rate": 0.00020725004039075587,
"loss": 3.4362,
"step": 459
},
{
"epoch": 3.28,
"learning_rate": 0.00020602776639382672,
"loss": 3.5144,
"step": 462
},
{
"epoch": 3.3,
"learning_rate": 0.00020480115365495926,
"loss": 3.4616,
"step": 465
},
{
"epoch": 3.32,
"learning_rate": 0.0002035702971619867,
"loss": 3.5137,
"step": 468
},
{
"epoch": 3.34,
"learning_rate": 0.00020233529223137502,
"loss": 3.4768,
"step": 471
},
{
"epoch": 3.36,
"learning_rate": 0.00020109623450084154,
"loss": 3.5287,
"step": 474
},
{
"epoch": 3.38,
"learning_rate": 0.00019985321992194892,
"loss": 3.5211,
"step": 477
},
{
"epoch": 3.4,
"learning_rate": 0.00019860634475267488,
"loss": 3.5589,
"step": 480
},
{
"epoch": 3.42,
"learning_rate": 0.00019735570554995776,
"loss": 3.4963,
"step": 483
},
{
"epoch": 3.44,
"learning_rate": 0.00019610139916221943,
"loss": 3.5406,
"step": 486
},
{
"epoch": 3.47,
"learning_rate": 0.0001948435227218655,
"loss": 3.5183,
"step": 489
},
{
"epoch": 3.49,
"learning_rate": 0.0001935821736377634,
"loss": 3.5413,
"step": 492
},
{
"epoch": 3.51,
"learning_rate": 0.0001923174495876989,
"loss": 3.4245,
"step": 495
},
{
"epoch": 3.53,
"learning_rate": 0.00019104944851081244,
"loss": 3.5263,
"step": 498
},
{
"epoch": 3.55,
"learning_rate": 0.00018977826860001442,
"loss": 3.4864,
"step": 501
},
{
"epoch": 3.57,
"learning_rate": 0.00018850400829438155,
"loss": 3.5034,
"step": 504
},
{
"epoch": 3.59,
"learning_rate": 0.00018722676627153358,
"loss": 3.4382,
"step": 507
},
{
"epoch": 3.61,
"learning_rate": 0.00018594664143999187,
"loss": 3.5283,
"step": 510
},
{
"epoch": 3.63,
"learning_rate": 0.00018466373293152,
"loss": 3.5367,
"step": 513
},
{
"epoch": 3.66,
"learning_rate": 0.00018337814009344714,
"loss": 3.5376,
"step": 516
},
{
"epoch": 3.68,
"learning_rate": 0.00018208996248097458,
"loss": 3.5159,
"step": 519
},
{
"epoch": 3.7,
"learning_rate": 0.00018079929984946637,
"loss": 3.489,
"step": 522
},
{
"epoch": 3.72,
"learning_rate": 0.0001795062521467242,
"loss": 3.4791,
"step": 525
},
{
"epoch": 3.74,
"learning_rate": 0.00017821091950524768,
"loss": 3.4478,
"step": 528
},
{
"epoch": 3.76,
"learning_rate": 0.00017691340223448016,
"loss": 3.4824,
"step": 531
},
{
"epoch": 3.78,
"learning_rate": 0.00017561380081304058,
"loss": 3.4876,
"step": 534
},
{
"epoch": 3.8,
"learning_rate": 0.00017431221588094307,
"loss": 3.5219,
"step": 537
},
{
"epoch": 3.83,
"learning_rate": 0.00017300874823180282,
"loss": 3.4727,
"step": 540
},
{
"epoch": 3.85,
"learning_rate": 0.00017170349880503107,
"loss": 3.4474,
"step": 543
},
{
"epoch": 3.87,
"learning_rate": 0.00017039656867801845,
"loss": 3.4553,
"step": 546
},
{
"epoch": 3.89,
"learning_rate": 0.00016908805905830752,
"loss": 3.4837,
"step": 549
},
{
"epoch": 3.91,
"learning_rate": 0.00016777807127575542,
"loss": 3.4804,
"step": 552
},
{
"epoch": 3.93,
"learning_rate": 0.00016646670677468686,
"loss": 3.4665,
"step": 555
},
{
"epoch": 3.95,
"learning_rate": 0.00016515406710603867,
"loss": 3.4832,
"step": 558
},
{
"epoch": 3.97,
"learning_rate": 0.0001638402539194953,
"loss": 3.428,
"step": 561
},
{
"epoch": 3.99,
"learning_rate": 0.00016252536895561753,
"loss": 3.4976,
"step": 564
},
{
"epoch": 3.99,
"eval_accuracy": 0.46655492116704206,
"eval_loss": 3.1618497371673584,
"eval_runtime": 7.1255,
"eval_samples_per_second": 283.069,
"eval_steps_per_second": 141.605,
"step": 564
},
{
"epoch": 4.02,
"learning_rate": 0.00016120951403796364,
"loss": 4.4347,
"step": 567
},
{
"epoch": 4.04,
"learning_rate": 0.00015989279106520427,
"loss": 3.4532,
"step": 570
},
{
"epoch": 4.06,
"learning_rate": 0.00015857530200323163,
"loss": 3.4226,
"step": 573
},
{
"epoch": 4.08,
"learning_rate": 0.000157257148877263,
"loss": 3.4455,
"step": 576
},
{
"epoch": 4.11,
"learning_rate": 0.00015593843376394043,
"loss": 3.4435,
"step": 579
},
{
"epoch": 4.13,
"learning_rate": 0.00015461925878342556,
"loss": 3.4103,
"step": 582
},
{
"epoch": 4.15,
"learning_rate": 0.00015329972609149191,
"loss": 3.4701,
"step": 585
},
{
"epoch": 4.17,
"learning_rate": 0.00015197993787161384,
"loss": 3.4195,
"step": 588
},
{
"epoch": 4.19,
"learning_rate": 0.00015065999632705353,
"loss": 3.4592,
"step": 591
},
{
"epoch": 4.21,
"learning_rate": 0.00014934000367294647,
"loss": 3.4675,
"step": 594
},
{
"epoch": 4.23,
"learning_rate": 0.00014802006212838616,
"loss": 3.5138,
"step": 597
},
{
"epoch": 4.25,
"learning_rate": 0.00014670027390850809,
"loss": 3.485,
"step": 600
},
{
"epoch": 4.28,
"learning_rate": 0.00014538074121657447,
"loss": 3.4549,
"step": 603
},
{
"epoch": 4.3,
"learning_rate": 0.0001440615662360596,
"loss": 3.4515,
"step": 606
},
{
"epoch": 4.32,
"learning_rate": 0.000142742851122737,
"loss": 3.4109,
"step": 609
},
{
"epoch": 4.34,
"learning_rate": 0.00014142469799676831,
"loss": 3.4207,
"step": 612
},
{
"epoch": 4.36,
"learning_rate": 0.00014010720893479567,
"loss": 3.4871,
"step": 615
},
{
"epoch": 4.38,
"learning_rate": 0.00013879048596203636,
"loss": 3.4328,
"step": 618
},
{
"epoch": 4.4,
"learning_rate": 0.00013747463104438247,
"loss": 3.4716,
"step": 621
},
{
"epoch": 4.42,
"learning_rate": 0.0001361597460805047,
"loss": 3.432,
"step": 624
},
{
"epoch": 4.44,
"learning_rate": 0.00013484593289396133,
"loss": 3.4307,
"step": 627
},
{
"epoch": 4.47,
"learning_rate": 0.00013353329322531311,
"loss": 3.5387,
"step": 630
},
{
"epoch": 4.49,
"learning_rate": 0.0001322219287242446,
"loss": 3.466,
"step": 633
},
{
"epoch": 4.51,
"learning_rate": 0.0001309119409416925,
"loss": 3.4677,
"step": 636
},
{
"epoch": 4.53,
"learning_rate": 0.00012960343132198158,
"loss": 3.3951,
"step": 639
},
{
"epoch": 4.55,
"learning_rate": 0.00012829650119496896,
"loss": 3.433,
"step": 642
},
{
"epoch": 4.57,
"learning_rate": 0.00012699125176819716,
"loss": 3.4642,
"step": 645
},
{
"epoch": 4.59,
"learning_rate": 0.00012568778411905688,
"loss": 3.429,
"step": 648
},
{
"epoch": 4.61,
"learning_rate": 0.00012438619918695934,
"loss": 3.4187,
"step": 651
},
{
"epoch": 4.63,
"learning_rate": 0.00012308659776551984,
"loss": 3.466,
"step": 654
},
{
"epoch": 4.66,
"learning_rate": 0.00012178908049475228,
"loss": 3.4039,
"step": 657
},
{
"epoch": 4.68,
"learning_rate": 0.00012049374785327578,
"loss": 3.4337,
"step": 660
},
{
"epoch": 4.7,
"learning_rate": 0.00011920070015053363,
"loss": 3.4728,
"step": 663
},
{
"epoch": 4.72,
"learning_rate": 0.00011791003751902542,
"loss": 3.5019,
"step": 666
},
{
"epoch": 4.74,
"learning_rate": 0.00011662185990655284,
"loss": 3.3873,
"step": 669
},
{
"epoch": 4.76,
"learning_rate": 0.00011533626706848,
"loss": 3.3441,
"step": 672
},
{
"epoch": 4.78,
"learning_rate": 0.00011405335856000816,
"loss": 3.4262,
"step": 675
},
{
"epoch": 4.8,
"learning_rate": 0.00011277323372846643,
"loss": 3.4619,
"step": 678
},
{
"epoch": 4.83,
"learning_rate": 0.00011149599170561842,
"loss": 3.4034,
"step": 681
},
{
"epoch": 4.85,
"learning_rate": 0.00011022173139998555,
"loss": 3.5172,
"step": 684
},
{
"epoch": 4.87,
"learning_rate": 0.00010895055148918756,
"loss": 3.4081,
"step": 687
},
{
"epoch": 4.89,
"learning_rate": 0.00010768255041230107,
"loss": 3.4555,
"step": 690
},
{
"epoch": 4.91,
"learning_rate": 0.00010641782636223658,
"loss": 3.4756,
"step": 693
},
{
"epoch": 4.93,
"learning_rate": 0.00010515647727813445,
"loss": 3.3765,
"step": 696
},
{
"epoch": 4.95,
"learning_rate": 0.00010389860083778056,
"loss": 3.4485,
"step": 699
},
{
"epoch": 4.97,
"learning_rate": 0.00010264429445004229,
"loss": 3.4879,
"step": 702
},
{
"epoch": 4.99,
"learning_rate": 0.00010139365524732513,
"loss": 3.4356,
"step": 705
},
{
"epoch": 4.99,
"eval_accuracy": 0.4739487637146307,
"eval_loss": 3.100182056427002,
"eval_runtime": 7.093,
"eval_samples_per_second": 284.366,
"eval_steps_per_second": 142.254,
"step": 705
},
{
"epoch": 5.02,
"learning_rate": 0.00010014678007805106,
"loss": 4.224,
"step": 708
},
{
"epoch": 5.04,
"learning_rate": 9.890376549915847e-05,
"loss": 3.4427,
"step": 711
},
{
"epoch": 5.06,
"learning_rate": 9.766470776862499e-05,
"loss": 3.4274,
"step": 714
},
{
"epoch": 5.08,
"learning_rate": 9.642970283801327e-05,
"loss": 3.4132,
"step": 717
},
{
"epoch": 5.11,
"learning_rate": 9.519884634504074e-05,
"loss": 3.3957,
"step": 720
},
{
"epoch": 5.13,
"learning_rate": 9.397223360617325e-05,
"loss": 3.4081,
"step": 723
},
{
"epoch": 5.15,
"learning_rate": 9.274995960924409e-05,
"loss": 3.4419,
"step": 726
},
{
"epoch": 5.17,
"learning_rate": 9.15321190060981e-05,
"loss": 3.4608,
"step": 729
},
{
"epoch": 5.19,
"learning_rate": 9.031880610526189e-05,
"loss": 3.401,
"step": 732
},
{
"epoch": 5.21,
"learning_rate": 8.91101148646407e-05,
"loss": 3.3991,
"step": 735
},
{
"epoch": 5.23,
"learning_rate": 8.790613888424248e-05,
"loss": 3.4329,
"step": 738
},
{
"epoch": 5.25,
"learning_rate": 8.670697139892933e-05,
"loss": 3.4085,
"step": 741
},
{
"epoch": 5.28,
"learning_rate": 8.551270527119782e-05,
"loss": 3.4269,
"step": 744
},
{
"epoch": 5.3,
"learning_rate": 8.432343298398748e-05,
"loss": 3.3984,
"step": 747
},
{
"epoch": 5.32,
"learning_rate": 8.313924663351926e-05,
"loss": 3.3986,
"step": 750
},
{
"epoch": 5.34,
"learning_rate": 8.196023792216334e-05,
"loss": 3.4602,
"step": 753
},
{
"epoch": 5.36,
"learning_rate": 8.078649815133826e-05,
"loss": 3.4649,
"step": 756
},
{
"epoch": 5.38,
"learning_rate": 7.961811821444008e-05,
"loss": 3.3909,
"step": 759
},
{
"epoch": 5.4,
"learning_rate": 7.845518858980415e-05,
"loss": 3.3896,
"step": 762
},
{
"epoch": 5.42,
"learning_rate": 7.729779933369818e-05,
"loss": 3.3746,
"step": 765
},
{
"epoch": 5.44,
"learning_rate": 7.614604007334866e-05,
"loss": 3.4258,
"step": 768
},
{
"epoch": 5.47,
"learning_rate": 7.500000000000002e-05,
"loss": 3.3834,
"step": 771
},
{
"epoch": 5.49,
"learning_rate": 7.385976786200765e-05,
"loss": 3.3626,
"step": 774
},
{
"epoch": 5.51,
"learning_rate": 7.272543195796558e-05,
"loss": 3.4169,
"step": 777
},
{
"epoch": 5.53,
"learning_rate": 7.15970801298685e-05,
"loss": 3.4288,
"step": 780
},
{
"epoch": 5.55,
"learning_rate": 7.047479975630955e-05,
"loss": 3.3825,
"step": 783
},
{
"epoch": 5.57,
"learning_rate": 6.935867774571337e-05,
"loss": 3.3973,
"step": 786
},
{
"epoch": 5.59,
"learning_rate": 6.82488005296066e-05,
"loss": 3.4212,
"step": 789
},
{
"epoch": 5.61,
"learning_rate": 6.714525405592412e-05,
"loss": 3.4038,
"step": 792
},
{
"epoch": 5.63,
"learning_rate": 6.604812378235381e-05,
"loss": 3.404,
"step": 795
},
{
"epoch": 5.66,
"learning_rate": 6.495749466971827e-05,
"loss": 3.4048,
"step": 798
},
{
"epoch": 5.68,
"learning_rate": 6.38734511753961e-05,
"loss": 3.4066,
"step": 801
},
{
"epoch": 5.7,
"learning_rate": 6.279607724678103e-05,
"loss": 3.3839,
"step": 804
},
{
"epoch": 5.72,
"learning_rate": 6.172545631478156e-05,
"loss": 3.4525,
"step": 807
},
{
"epoch": 5.74,
"learning_rate": 6.066167128735988e-05,
"loss": 3.3788,
"step": 810
},
{
"epoch": 5.76,
"learning_rate": 5.960480454311155e-05,
"loss": 3.3995,
"step": 813
},
{
"epoch": 5.78,
"learning_rate": 5.855493792488637e-05,
"loss": 3.4489,
"step": 816
},
{
"epoch": 5.8,
"learning_rate": 5.751215273345036e-05,
"loss": 3.3495,
"step": 819
},
{
"epoch": 5.83,
"learning_rate": 5.6476529721189974e-05,
"loss": 3.4403,
"step": 822
},
{
"epoch": 5.85,
"learning_rate": 5.5448149085858596e-05,
"loss": 3.3715,
"step": 825
},
{
"epoch": 5.87,
"learning_rate": 5.4427090464366323e-05,
"loss": 3.3936,
"step": 828
},
{
"epoch": 5.89,
"learning_rate": 5.3413432926612655e-05,
"loss": 3.3675,
"step": 831
},
{
"epoch": 5.91,
"learning_rate": 5.240725496936372e-05,
"loss": 3.405,
"step": 834
},
{
"epoch": 5.93,
"learning_rate": 5.140863451017318e-05,
"loss": 3.4457,
"step": 837
},
{
"epoch": 5.95,
"learning_rate": 5.0417648881348774e-05,
"loss": 3.4159,
"step": 840
},
{
"epoch": 5.97,
"learning_rate": 4.943437482396336e-05,
"loss": 3.3632,
"step": 843
},
{
"epoch": 5.99,
"learning_rate": 4.845888848191248e-05,
"loss": 3.4493,
"step": 846
},
{
"epoch": 5.99,
"eval_accuracy": 0.4745788157886148,
"eval_loss": 3.1028223037719727,
"eval_runtime": 7.1484,
"eval_samples_per_second": 282.16,
"eval_steps_per_second": 141.15,
"step": 846
},
{
"epoch": 6.02,
"learning_rate": 4.74912653960177e-05,
"loss": 4.272,
"step": 849
},
{
"epoch": 6.04,
"learning_rate": 4.65315804981768e-05,
"loss": 3.4332,
"step": 852
},
{
"epoch": 6.06,
"learning_rate": 4.5579908105561016e-05,
"loss": 3.3962,
"step": 855
},
{
"epoch": 6.08,
"learning_rate": 4.463632191486022e-05,
"loss": 3.3835,
"step": 858
},
{
"epoch": 6.11,
"learning_rate": 4.3700894996575764e-05,
"loss": 3.3852,
"step": 861
},
{
"epoch": 6.13,
"learning_rate": 4.277369978936187e-05,
"loss": 3.4379,
"step": 864
},
{
"epoch": 6.15,
"learning_rate": 4.185480809441631e-05,
"loss": 3.4083,
"step": 867
},
{
"epoch": 6.17,
"learning_rate": 4.094429106991981e-05,
"loss": 3.447,
"step": 870
},
{
"epoch": 6.19,
"learning_rate": 4.004221922552608e-05,
"loss": 3.3643,
"step": 873
},
{
"epoch": 6.21,
"learning_rate": 3.914866241690115e-05,
"loss": 3.4416,
"step": 876
},
{
"epoch": 6.23,
"learning_rate": 3.826368984031414e-05,
"loss": 3.4144,
"step": 879
},
{
"epoch": 6.25,
"learning_rate": 3.738737002727863e-05,
"loss": 3.3951,
"step": 882
},
{
"epoch": 6.28,
"learning_rate": 3.651977083924563e-05,
"loss": 3.39,
"step": 885
},
{
"epoch": 6.3,
"learning_rate": 3.566095946234842e-05,
"loss": 3.4053,
"step": 888
},
{
"epoch": 6.32,
"learning_rate": 3.4811002402199874e-05,
"loss": 3.4033,
"step": 891
},
{
"epoch": 6.34,
"learning_rate": 3.396996547874203e-05,
"loss": 3.2884,
"step": 894
},
{
"epoch": 6.36,
"learning_rate": 3.3137913821149425e-05,
"loss": 3.415,
"step": 897
},
{
"epoch": 6.38,
"learning_rate": 3.2314911862785275e-05,
"loss": 3.3832,
"step": 900
},
{
"epoch": 6.4,
"learning_rate": 3.150102333621181e-05,
"loss": 3.3488,
"step": 903
},
{
"epoch": 6.42,
"learning_rate": 3.0696311268255093e-05,
"loss": 3.3449,
"step": 906
},
{
"epoch": 6.44,
"learning_rate": 2.990083797512401e-05,
"loss": 3.3563,
"step": 909
},
{
"epoch": 6.47,
"learning_rate": 2.9114665057584768e-05,
"loss": 3.4071,
"step": 912
},
{
"epoch": 6.49,
"learning_rate": 2.8337853396190503e-05,
"loss": 3.456,
"step": 915
},
{
"epoch": 6.51,
"learning_rate": 2.7570463146566758e-05,
"loss": 3.4042,
"step": 918
},
{
"epoch": 6.53,
"learning_rate": 2.6812553734752956e-05,
"loss": 3.375,
"step": 921
},
{
"epoch": 6.55,
"learning_rate": 2.6064183852600797e-05,
"loss": 3.3807,
"step": 924
},
{
"epoch": 6.57,
"learning_rate": 2.532541145322881e-05,
"loss": 3.3436,
"step": 927
},
{
"epoch": 6.59,
"learning_rate": 2.4596293746534905e-05,
"loss": 3.2923,
"step": 930
},
{
"epoch": 6.61,
"learning_rate": 2.3876887194765874e-05,
"loss": 3.4021,
"step": 933
},
{
"epoch": 6.63,
"learning_rate": 2.3167247508144954e-05,
"loss": 3.3814,
"step": 936
},
{
"epoch": 6.66,
"learning_rate": 2.24674296405579e-05,
"loss": 3.4269,
"step": 939
},
{
"epoch": 6.68,
"learning_rate": 2.1777487785297114e-05,
"loss": 3.3474,
"step": 942
},
{
"epoch": 6.7,
"learning_rate": 2.1097475370865242e-05,
"loss": 3.4056,
"step": 945
},
{
"epoch": 6.72,
"learning_rate": 2.0427445056837544e-05,
"loss": 3.4254,
"step": 948
},
{
"epoch": 6.74,
"learning_rate": 1.9767448729784046e-05,
"loss": 3.3823,
"step": 951
},
{
"epoch": 6.76,
"learning_rate": 1.9117537499251413e-05,
"loss": 3.3903,
"step": 954
},
{
"epoch": 6.78,
"learning_rate": 1.8477761693805203e-05,
"loss": 3.3717,
"step": 957
},
{
"epoch": 6.8,
"learning_rate": 1.7848170857132325e-05,
"loss": 3.3791,
"step": 960
},
{
"epoch": 6.83,
"learning_rate": 1.7228813744204556e-05,
"loss": 3.3805,
"step": 963
},
{
"epoch": 6.85,
"learning_rate": 1.661973831750283e-05,
"loss": 3.3905,
"step": 966
},
{
"epoch": 6.87,
"learning_rate": 1.6020991743303263e-05,
"loss": 3.4504,
"step": 969
},
{
"epoch": 6.89,
"learning_rate": 1.5432620388024512e-05,
"loss": 3.3748,
"step": 972
},
{
"epoch": 6.91,
"learning_rate": 1.4854669814637143e-05,
"loss": 3.4381,
"step": 975
},
{
"epoch": 6.93,
"learning_rate": 1.428718477913543e-05,
"loss": 3.3435,
"step": 978
},
{
"epoch": 6.95,
"learning_rate": 1.3730209227071436e-05,
"loss": 3.3974,
"step": 981
},
{
"epoch": 6.97,
"learning_rate": 1.3183786290151838e-05,
"loss": 3.3399,
"step": 984
},
{
"epoch": 6.99,
"learning_rate": 1.264795828289787e-05,
"loss": 3.4199,
"step": 987
},
{
"epoch": 6.99,
"eval_accuracy": 0.47660093674971465,
"eval_loss": 3.0857138633728027,
"eval_runtime": 7.115,
"eval_samples_per_second": 283.486,
"eval_steps_per_second": 141.813,
"step": 987
},
{
"epoch": 7.02,
"learning_rate": 1.2122766699368603e-05,
"loss": 4.2135,
"step": 990
},
{
"epoch": 7.04,
"learning_rate": 1.1608252209947566e-05,
"loss": 3.4164,
"step": 993
},
{
"epoch": 7.06,
"learning_rate": 1.1104454658193395e-05,
"loss": 3.4037,
"step": 996
},
{
"epoch": 7.08,
"learning_rate": 1.061141305775422e-05,
"loss": 3.3729,
"step": 999
},
{
"epoch": 7.11,
"learning_rate": 1.0129165589346643e-05,
"loss": 3.4295,
"step": 1002
},
{
"epoch": 7.13,
"learning_rate": 9.657749597798947e-06,
"loss": 3.4044,
"step": 1005
},
{
"epoch": 7.15,
"learning_rate": 9.197201589159215e-06,
"loss": 3.4028,
"step": 1008
},
{
"epoch": 7.17,
"learning_rate": 8.747557227868263e-06,
"loss": 3.3725,
"step": 1011
},
{
"epoch": 7.19,
"learning_rate": 8.308851333997917e-06,
"loss": 3.33,
"step": 1014
},
{
"epoch": 7.21,
"learning_rate": 7.881117880554421e-06,
"loss": 3.4139,
"step": 1017
},
{
"epoch": 7.23,
"learning_rate": 7.4643899908477525e-06,
"loss": 3.4329,
"step": 1020
},
{
"epoch": 7.25,
"learning_rate": 7.058699935926526e-06,
"loss": 3.3783,
"step": 1023
},
{
"epoch": 7.28,
"learning_rate": 6.664079132078881e-06,
"loss": 3.3888,
"step": 1026
},
{
"epoch": 7.3,
"learning_rate": 6.280558138399805e-06,
"loss": 3.3738,
"step": 1029
},
{
"epoch": 7.32,
"learning_rate": 5.908166654424412e-06,
"loss": 3.3097,
"step": 1032
},
{
"epoch": 7.34,
"learning_rate": 5.546933517828317e-06,
"loss": 3.4202,
"step": 1035
},
{
"epoch": 7.36,
"learning_rate": 5.196886702194203e-06,
"loss": 3.3502,
"step": 1038
},
{
"epoch": 7.38,
"learning_rate": 4.8580533148458225e-06,
"loss": 3.3874,
"step": 1041
},
{
"epoch": 7.4,
"learning_rate": 4.530459594748592e-06,
"loss": 3.3369,
"step": 1044
},
{
"epoch": 7.42,
"learning_rate": 4.214130910477853e-06,
"loss": 3.3929,
"step": 1047
},
{
"epoch": 7.44,
"learning_rate": 3.90909175825429e-06,
"loss": 3.362,
"step": 1050
},
{
"epoch": 7.47,
"learning_rate": 3.6153657600469176e-06,
"loss": 3.3281,
"step": 1053
},
{
"epoch": 7.49,
"learning_rate": 3.3329756617438917e-06,
"loss": 3.352,
"step": 1056
},
{
"epoch": 7.51,
"learning_rate": 3.06194333139097e-06,
"loss": 3.3818,
"step": 1059
},
{
"epoch": 7.53,
"learning_rate": 2.8022897574982826e-06,
"loss": 3.4417,
"step": 1062
},
{
"epoch": 7.55,
"learning_rate": 2.554035047414732e-06,
"loss": 3.3567,
"step": 1065
},
{
"epoch": 7.57,
"learning_rate": 2.3171984257711385e-06,
"loss": 3.4005,
"step": 1068
},
{
"epoch": 7.59,
"learning_rate": 2.0917982329912987e-06,
"loss": 3.3674,
"step": 1071
},
{
"epoch": 7.61,
"learning_rate": 1.8778519238719203e-06,
"loss": 3.3507,
"step": 1074
},
{
"epoch": 7.63,
"learning_rate": 1.6753760662307215e-06,
"loss": 3.3754,
"step": 1077
},
{
"epoch": 7.66,
"learning_rate": 1.4843863396236388e-06,
"loss": 3.4043,
"step": 1080
},
{
"epoch": 7.68,
"learning_rate": 1.3048975341304835e-06,
"loss": 3.4084,
"step": 1083
},
{
"epoch": 7.7,
"learning_rate": 1.1369235492096397e-06,
"loss": 3.4293,
"step": 1086
},
{
"epoch": 7.72,
"learning_rate": 9.804773926217092e-07,
"loss": 3.4001,
"step": 1089
},
{
"epoch": 7.74,
"learning_rate": 8.355711794222342e-07,
"loss": 3.3856,
"step": 1092
},
{
"epoch": 7.76,
"learning_rate": 7.022161310234642e-07,
"loss": 3.3659,
"step": 1095
},
{
"epoch": 7.78,
"learning_rate": 5.80422574325401e-07,
"loss": 3.3653,
"step": 1098
},
{
"epoch": 7.8,
"learning_rate": 4.70199940916105e-07,
"loss": 3.437,
"step": 1101
},
{
"epoch": 7.83,
"learning_rate": 3.715567663412966e-07,
"loss": 3.382,
"step": 1104
},
{
"epoch": 7.85,
"learning_rate": 2.845006894433843e-07,
"loss": 3.3835,
"step": 1107
},
{
"epoch": 7.87,
"learning_rate": 2.0903845176995503e-07,
"loss": 3.3901,
"step": 1110
},
{
"epoch": 7.89,
"learning_rate": 1.4517589705164145e-07,
"loss": 3.3445,
"step": 1113
},
{
"epoch": 7.91,
"learning_rate": 9.291797074961748e-08,
"loss": 3.4404,
"step": 1116
},
{
"epoch": 7.93,
"learning_rate": 5.226871967267121e-08,
"loss": 3.404,
"step": 1119
},
{
"epoch": 7.95,
"learning_rate": 2.3231291663772245e-08,
"loss": 3.3689,
"step": 1122
},
{
"epoch": 7.97,
"learning_rate": 5.80793535631674e-09,
"loss": 3.3911,
"step": 1125
},
{
"epoch": 7.99,
"learning_rate": 0.0,
"loss": 3.4086,
"step": 1128
},
{
"epoch": 7.99,
"eval_accuracy": 0.47283051057151976,
"eval_loss": 3.0980591773986816,
"eval_runtime": 7.1137,
"eval_samples_per_second": 283.538,
"eval_steps_per_second": 141.839,
"step": 1128
},
{
"epoch": 7.99,
"step": 1128,
"total_flos": 197844098719744.0,
"train_loss": 3.6050218030916037,
"train_runtime": 557.9113,
"train_samples_per_second": 260.099,
"train_steps_per_second": 2.022
}
],
"max_steps": 1128,
"num_train_epochs": 8,
"total_flos": 197844098719744.0,
"trial_name": null,
"trial_params": null
}