|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 13.0, |
|
"global_step": 84409, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 1.9897325324708663e-05, |
|
"loss": 0.0613, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 1.9794650649417323e-05, |
|
"loss": 0.0384, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 1.9691975974125984e-05, |
|
"loss": 0.0347, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 1.9589301298834645e-05, |
|
"loss": 0.0304, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 1.9486626623543306e-05, |
|
"loss": 0.0317, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 1.9383951948251964e-05, |
|
"loss": 0.0293, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 1.9281277272960624e-05, |
|
"loss": 0.0264, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.9178602597669285e-05, |
|
"loss": 0.0267, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.907592792237795e-05, |
|
"loss": 0.0276, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 1.8973253247086607e-05, |
|
"loss": 0.0246, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 1.8870578571795268e-05, |
|
"loss": 0.0247, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.876790389650393e-05, |
|
"loss": 0.0231, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.9923576779814453, |
|
"eval_f1": 0.9008350730688935, |
|
"eval_loss": 0.021180663257837296, |
|
"eval_precision": 0.8908472243462303, |
|
"eval_recall": 0.9110494213324992, |
|
"eval_runtime": 51.6414, |
|
"eval_samples_per_second": 430.469, |
|
"eval_steps_per_second": 26.916, |
|
"step": 6493 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.866522922121259e-05, |
|
"loss": 0.0246, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 1.856255454592125e-05, |
|
"loss": 0.0166, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 1.845987987062991e-05, |
|
"loss": 0.0169, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 1.8357205195338572e-05, |
|
"loss": 0.0155, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.825453052004723e-05, |
|
"loss": 0.0157, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 1.8151855844755894e-05, |
|
"loss": 0.0165, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.8049181169464555e-05, |
|
"loss": 0.0165, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.7946506494173212e-05, |
|
"loss": 0.0157, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 1.7843831818881873e-05, |
|
"loss": 0.0161, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 1.7741157143590534e-05, |
|
"loss": 0.0166, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 1.7638482468299195e-05, |
|
"loss": 0.0163, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 1.7535807793007856e-05, |
|
"loss": 0.0147, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.7433133117716517e-05, |
|
"loss": 0.0154, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9932060344168305, |
|
"eval_f1": 0.9151427443757401, |
|
"eval_loss": 0.020859336480498314, |
|
"eval_precision": 0.881111794723318, |
|
"eval_recall": 0.9519080387863622, |
|
"eval_runtime": 51.2977, |
|
"eval_samples_per_second": 433.353, |
|
"eval_steps_per_second": 27.097, |
|
"step": 12986 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.7330458442425178e-05, |
|
"loss": 0.0167, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 1.722778376713384e-05, |
|
"loss": 0.0097, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 1.71251090918425e-05, |
|
"loss": 0.0096, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 1.702243441655116e-05, |
|
"loss": 0.0092, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 1.691975974125982e-05, |
|
"loss": 0.0106, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 1.681708506596848e-05, |
|
"loss": 0.0102, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 1.671441039067714e-05, |
|
"loss": 0.0103, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 1.66117357153858e-05, |
|
"loss": 0.0094, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.650906104009446e-05, |
|
"loss": 0.0103, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.6406386364803122e-05, |
|
"loss": 0.0096, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 1.6303711689511783e-05, |
|
"loss": 0.0103, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 1.6201037014220444e-05, |
|
"loss": 0.01, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 1.6098362338929105e-05, |
|
"loss": 0.0099, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9953580496931748, |
|
"eval_f1": 0.9364340787171451, |
|
"eval_loss": 0.017423413693904877, |
|
"eval_precision": 0.9212537358604774, |
|
"eval_recall": 0.9521230841413826, |
|
"eval_runtime": 51.2859, |
|
"eval_samples_per_second": 433.453, |
|
"eval_steps_per_second": 27.103, |
|
"step": 19479 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.5995687663637766e-05, |
|
"loss": 0.0097, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 3.08, |
|
"learning_rate": 1.5893012988346427e-05, |
|
"loss": 0.0067, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 3.16, |
|
"learning_rate": 1.5790338313055087e-05, |
|
"loss": 0.0072, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 3.23, |
|
"learning_rate": 1.5687663637763745e-05, |
|
"loss": 0.0065, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 3.31, |
|
"learning_rate": 1.5584988962472406e-05, |
|
"loss": 0.0063, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 3.39, |
|
"learning_rate": 1.5482314287181067e-05, |
|
"loss": 0.0082, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 1.537963961188973e-05, |
|
"loss": 0.0063, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 3.54, |
|
"learning_rate": 1.527696493659839e-05, |
|
"loss": 0.0073, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 1.517429026130705e-05, |
|
"loss": 0.0067, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 3.7, |
|
"learning_rate": 1.507161558601571e-05, |
|
"loss": 0.0066, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 1.4968940910724373e-05, |
|
"loss": 0.0073, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 1.4866266235433032e-05, |
|
"loss": 0.0063, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.93, |
|
"learning_rate": 1.4763591560141693e-05, |
|
"loss": 0.0066, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9954778037113075, |
|
"eval_f1": 0.9395088840087842, |
|
"eval_loss": 0.018721075728535652, |
|
"eval_precision": 0.9144290023502415, |
|
"eval_recall": 0.9660032843290585, |
|
"eval_runtime": 51.1646, |
|
"eval_samples_per_second": 434.48, |
|
"eval_steps_per_second": 27.167, |
|
"step": 25972 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.4660916884850354e-05, |
|
"loss": 0.007, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 4.08, |
|
"learning_rate": 1.4558242209559013e-05, |
|
"loss": 0.0045, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 4.16, |
|
"learning_rate": 1.4455567534267674e-05, |
|
"loss": 0.0055, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 1.4352892858976335e-05, |
|
"loss": 0.0048, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 4.31, |
|
"learning_rate": 1.4250218183684995e-05, |
|
"loss": 0.0047, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 4.39, |
|
"learning_rate": 1.4147543508393655e-05, |
|
"loss": 0.0046, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 1.4044868833102317e-05, |
|
"loss": 0.0046, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 4.54, |
|
"learning_rate": 1.3942194157810978e-05, |
|
"loss": 0.0049, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 1.3839519482519637e-05, |
|
"loss": 0.0056, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 4.7, |
|
"learning_rate": 1.3736844807228298e-05, |
|
"loss": 0.0054, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 1.3634170131936959e-05, |
|
"loss": 0.0055, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 1.353149545664562e-05, |
|
"loss": 0.0052, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 4.93, |
|
"learning_rate": 1.3428820781354279e-05, |
|
"loss": 0.0047, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.996386037155759, |
|
"eval_f1": 0.9499301079443971, |
|
"eval_loss": 0.01743621565401554, |
|
"eval_precision": 0.9434096868733611, |
|
"eval_recall": 0.9565412887081639, |
|
"eval_runtime": 50.9241, |
|
"eval_samples_per_second": 436.532, |
|
"eval_steps_per_second": 27.296, |
|
"step": 32465 |
|
}, |
|
{ |
|
"epoch": 5.01, |
|
"learning_rate": 1.332614610606294e-05, |
|
"loss": 0.0049, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 5.08, |
|
"learning_rate": 1.3223471430771601e-05, |
|
"loss": 0.0032, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 5.16, |
|
"learning_rate": 1.3120796755480263e-05, |
|
"loss": 0.0035, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 1.3018122080188923e-05, |
|
"loss": 0.0033, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 5.31, |
|
"learning_rate": 1.2915447404897584e-05, |
|
"loss": 0.0031, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 5.39, |
|
"learning_rate": 1.2812772729606244e-05, |
|
"loss": 0.0037, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 1.2710098054314904e-05, |
|
"loss": 0.0037, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 1.2607423379023564e-05, |
|
"loss": 0.0043, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 1.2504748703732225e-05, |
|
"loss": 0.0034, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 5.7, |
|
"learning_rate": 1.2402074028440886e-05, |
|
"loss": 0.0036, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.2299399353149545e-05, |
|
"loss": 0.003, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 1.2196724677858208e-05, |
|
"loss": 0.0037, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 5.93, |
|
"learning_rate": 1.2094050002566869e-05, |
|
"loss": 0.0032, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9964631065733692, |
|
"eval_f1": 0.9498019389642716, |
|
"eval_loss": 0.020042115822434425, |
|
"eval_precision": 0.9411696961553965, |
|
"eval_recall": 0.9585939943697216, |
|
"eval_runtime": 50.9154, |
|
"eval_samples_per_second": 436.607, |
|
"eval_steps_per_second": 27.3, |
|
"step": 38958 |
|
}, |
|
{ |
|
"epoch": 6.01, |
|
"learning_rate": 1.199137532727553e-05, |
|
"loss": 0.0034, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 6.08, |
|
"learning_rate": 1.1888700651984189e-05, |
|
"loss": 0.0023, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 6.16, |
|
"learning_rate": 1.178602597669285e-05, |
|
"loss": 0.0022, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 1.168335130140151e-05, |
|
"loss": 0.0026, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 6.31, |
|
"learning_rate": 1.158067662611017e-05, |
|
"loss": 0.003, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 6.39, |
|
"learning_rate": 1.147800195081883e-05, |
|
"loss": 0.0024, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 1.1375327275527492e-05, |
|
"loss": 0.0026, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 6.55, |
|
"learning_rate": 1.1272652600236154e-05, |
|
"loss": 0.0029, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 1.1169977924944813e-05, |
|
"loss": 0.0028, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 6.7, |
|
"learning_rate": 1.1067303249653474e-05, |
|
"loss": 0.0022, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 6.78, |
|
"learning_rate": 1.0964628574362135e-05, |
|
"loss": 0.0026, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.0861953899070794e-05, |
|
"loss": 0.0026, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 6.93, |
|
"learning_rate": 1.0759279223779455e-05, |
|
"loss": 0.0028, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9966623013758077, |
|
"eval_f1": 0.9540412662796937, |
|
"eval_loss": 0.02194616012275219, |
|
"eval_precision": 0.9521741670399003, |
|
"eval_recall": 0.955915702220832, |
|
"eval_runtime": 51.3344, |
|
"eval_samples_per_second": 433.043, |
|
"eval_steps_per_second": 27.077, |
|
"step": 45451 |
|
}, |
|
{ |
|
"epoch": 7.01, |
|
"learning_rate": 1.0656604548488116e-05, |
|
"loss": 0.0024, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 7.08, |
|
"learning_rate": 1.0553929873196777e-05, |
|
"loss": 0.0017, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 7.16, |
|
"learning_rate": 1.0451255197905436e-05, |
|
"loss": 0.0017, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 7.24, |
|
"learning_rate": 1.0348580522614099e-05, |
|
"loss": 0.0017, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 7.32, |
|
"learning_rate": 1.024590584732276e-05, |
|
"loss": 0.0018, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 7.39, |
|
"learning_rate": 1.014323117203142e-05, |
|
"loss": 0.0019, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 7.47, |
|
"learning_rate": 1.004055649674008e-05, |
|
"loss": 0.0019, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 7.55, |
|
"learning_rate": 9.93788182144874e-06, |
|
"loss": 0.0021, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 7.62, |
|
"learning_rate": 9.835207146157401e-06, |
|
"loss": 0.0021, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 9.732532470866062e-06, |
|
"loss": 0.0024, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 7.78, |
|
"learning_rate": 9.629857795574721e-06, |
|
"loss": 0.0024, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 7.85, |
|
"learning_rate": 9.527183120283382e-06, |
|
"loss": 0.0022, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 7.93, |
|
"learning_rate": 9.424508444992043e-06, |
|
"loss": 0.002, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9969071449871383, |
|
"eval_f1": 0.95708591545198, |
|
"eval_loss": 0.02347610704600811, |
|
"eval_precision": 0.9496353034006274, |
|
"eval_recall": 0.9646543634657492, |
|
"eval_runtime": 51.2371, |
|
"eval_samples_per_second": 433.865, |
|
"eval_steps_per_second": 27.129, |
|
"step": 51944 |
|
}, |
|
{ |
|
"epoch": 8.01, |
|
"learning_rate": 9.321833769700704e-06, |
|
"loss": 0.0014, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 8.09, |
|
"learning_rate": 9.219159094409365e-06, |
|
"loss": 0.0014, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 8.16, |
|
"learning_rate": 9.116484419118026e-06, |
|
"loss": 0.0013, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 8.24, |
|
"learning_rate": 9.013809743826687e-06, |
|
"loss": 0.0016, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 8.32, |
|
"learning_rate": 8.911135068535346e-06, |
|
"loss": 0.0014, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 8.39, |
|
"learning_rate": 8.808460393244007e-06, |
|
"loss": 0.0016, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 8.47, |
|
"learning_rate": 8.705785717952668e-06, |
|
"loss": 0.0013, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 8.55, |
|
"learning_rate": 8.603111042661328e-06, |
|
"loss": 0.0013, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 8.62, |
|
"learning_rate": 8.50043636736999e-06, |
|
"loss": 0.0011, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 8.7, |
|
"learning_rate": 8.39776169207865e-06, |
|
"loss": 0.0014, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 8.78, |
|
"learning_rate": 8.29508701678731e-06, |
|
"loss": 0.0016, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 8.86, |
|
"learning_rate": 8.19241234149597e-06, |
|
"loss": 0.0015, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 8.93, |
|
"learning_rate": 8.089737666204631e-06, |
|
"loss": 0.0019, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9969735432546178, |
|
"eval_f1": 0.957957696804553, |
|
"eval_loss": 0.02377239800989628, |
|
"eval_precision": 0.9470949005521695, |
|
"eval_recall": 0.9690725680325305, |
|
"eval_runtime": 57.1242, |
|
"eval_samples_per_second": 389.152, |
|
"eval_steps_per_second": 24.333, |
|
"step": 58437 |
|
}, |
|
{ |
|
"epoch": 9.01, |
|
"learning_rate": 7.987062990913292e-06, |
|
"loss": 0.0014, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 9.09, |
|
"learning_rate": 7.884388315621953e-06, |
|
"loss": 0.0014, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 9.16, |
|
"learning_rate": 7.781713640330612e-06, |
|
"loss": 0.0013, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 9.24, |
|
"learning_rate": 7.679038965039273e-06, |
|
"loss": 0.0013, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 9.32, |
|
"learning_rate": 7.576364289747934e-06, |
|
"loss": 0.0012, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 7.473689614456595e-06, |
|
"loss": 0.0011, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 9.47, |
|
"learning_rate": 7.371014939165256e-06, |
|
"loss": 0.001, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 9.55, |
|
"learning_rate": 7.2683402638739165e-06, |
|
"loss": 0.0014, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 7.1656655885825765e-06, |
|
"loss": 0.0011, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 9.7, |
|
"learning_rate": 7.062990913291237e-06, |
|
"loss": 0.0014, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 9.78, |
|
"learning_rate": 6.960316237999898e-06, |
|
"loss": 0.0009, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 9.86, |
|
"learning_rate": 6.857641562708558e-06, |
|
"loss": 0.0011, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 9.93, |
|
"learning_rate": 6.754966887417219e-06, |
|
"loss": 0.0012, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9970482413055323, |
|
"eval_f1": 0.9589748568100184, |
|
"eval_loss": 0.02672559767961502, |
|
"eval_precision": 0.9524470669906282, |
|
"eval_recall": 0.9655927431967469, |
|
"eval_runtime": 50.0835, |
|
"eval_samples_per_second": 443.858, |
|
"eval_steps_per_second": 27.754, |
|
"step": 64930 |
|
}, |
|
{ |
|
"epoch": 10.01, |
|
"learning_rate": 6.652292212125879e-06, |
|
"loss": 0.0014, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 10.09, |
|
"learning_rate": 6.54961753683454e-06, |
|
"loss": 0.001, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 10.16, |
|
"learning_rate": 6.446942861543201e-06, |
|
"loss": 0.0009, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 10.24, |
|
"learning_rate": 6.344268186251862e-06, |
|
"loss": 0.0007, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 10.32, |
|
"learning_rate": 6.241593510960522e-06, |
|
"loss": 0.0009, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 10.4, |
|
"learning_rate": 6.138918835669183e-06, |
|
"loss": 0.001, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 10.47, |
|
"learning_rate": 6.036244160377844e-06, |
|
"loss": 0.0011, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 10.55, |
|
"learning_rate": 5.9335694850865045e-06, |
|
"loss": 0.0011, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 10.63, |
|
"learning_rate": 5.8308948097951645e-06, |
|
"loss": 0.001, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 10.7, |
|
"learning_rate": 5.7282201345038246e-06, |
|
"loss": 0.0011, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 10.78, |
|
"learning_rate": 5.6255454592124854e-06, |
|
"loss": 0.001, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 10.86, |
|
"learning_rate": 5.522870783921146e-06, |
|
"loss": 0.0007, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 10.93, |
|
"learning_rate": 5.420196108629807e-06, |
|
"loss": 0.0012, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9972326150661226, |
|
"eval_f1": 0.9617262499878666, |
|
"eval_loss": 0.026968594640493393, |
|
"eval_precision": 0.9550791416838574, |
|
"eval_recall": 0.9684665311229278, |
|
"eval_runtime": 49.8699, |
|
"eval_samples_per_second": 445.76, |
|
"eval_steps_per_second": 27.873, |
|
"step": 71423 |
|
}, |
|
{ |
|
"epoch": 11.01, |
|
"learning_rate": 5.317521433338467e-06, |
|
"loss": 0.0008, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 11.09, |
|
"learning_rate": 5.214846758047128e-06, |
|
"loss": 0.0008, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 11.17, |
|
"learning_rate": 5.112172082755789e-06, |
|
"loss": 0.0008, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 11.24, |
|
"learning_rate": 5.00949740746445e-06, |
|
"loss": 0.0007, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 4.90682273217311e-06, |
|
"loss": 0.0007, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 11.4, |
|
"learning_rate": 4.804148056881771e-06, |
|
"loss": 0.0007, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 11.47, |
|
"learning_rate": 4.701473381590432e-06, |
|
"loss": 0.0009, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 11.55, |
|
"learning_rate": 4.598798706299092e-06, |
|
"loss": 0.0007, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 11.63, |
|
"learning_rate": 4.4961240310077525e-06, |
|
"loss": 0.0008, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 11.7, |
|
"learning_rate": 4.393449355716413e-06, |
|
"loss": 0.0007, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 11.78, |
|
"learning_rate": 4.2907746804250734e-06, |
|
"loss": 0.0007, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 11.86, |
|
"learning_rate": 4.188100005133734e-06, |
|
"loss": 0.0007, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 11.94, |
|
"learning_rate": 4.085425329842394e-06, |
|
"loss": 0.0007, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9971656239569692, |
|
"eval_f1": 0.9604331205247123, |
|
"eval_loss": 0.02845791168510914, |
|
"eval_precision": 0.953384443502716, |
|
"eval_recall": 0.9675868001251173, |
|
"eval_runtime": 49.6251, |
|
"eval_samples_per_second": 447.959, |
|
"eval_steps_per_second": 28.01, |
|
"step": 77916 |
|
}, |
|
{ |
|
"epoch": 12.01, |
|
"learning_rate": 3.982750654551055e-06, |
|
"loss": 0.0007, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 12.09, |
|
"learning_rate": 3.880075979259716e-06, |
|
"loss": 0.0007, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 12.17, |
|
"learning_rate": 3.7774013039683766e-06, |
|
"loss": 0.0008, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 12.24, |
|
"learning_rate": 3.6747266286770374e-06, |
|
"loss": 0.0008, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 12.32, |
|
"learning_rate": 3.572051953385698e-06, |
|
"loss": 0.0007, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 12.4, |
|
"learning_rate": 3.4693772780943583e-06, |
|
"loss": 0.0007, |
|
"step": 80500 |
|
}, |
|
{ |
|
"epoch": 12.47, |
|
"learning_rate": 3.366702602803019e-06, |
|
"loss": 0.0007, |
|
"step": 81000 |
|
}, |
|
{ |
|
"epoch": 12.55, |
|
"learning_rate": 3.2640279275116792e-06, |
|
"loss": 0.0006, |
|
"step": 81500 |
|
}, |
|
{ |
|
"epoch": 12.63, |
|
"learning_rate": 3.16135325222034e-06, |
|
"loss": 0.0005, |
|
"step": 82000 |
|
}, |
|
{ |
|
"epoch": 12.71, |
|
"learning_rate": 3.0586785769290006e-06, |
|
"loss": 0.0007, |
|
"step": 82500 |
|
}, |
|
{ |
|
"epoch": 12.78, |
|
"learning_rate": 2.9560039016376615e-06, |
|
"loss": 0.0006, |
|
"step": 83000 |
|
}, |
|
{ |
|
"epoch": 12.86, |
|
"learning_rate": 2.853329226346322e-06, |
|
"loss": 0.0006, |
|
"step": 83500 |
|
}, |
|
{ |
|
"epoch": 12.94, |
|
"learning_rate": 2.7506545510549828e-06, |
|
"loss": 0.0006, |
|
"step": 84000 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9972806352417104, |
|
"eval_f1": 0.9628492298722122, |
|
"eval_loss": 0.02913038246333599, |
|
"eval_precision": 0.9572585164145073, |
|
"eval_recall": 0.968505630278386, |
|
"eval_runtime": 49.7978, |
|
"eval_samples_per_second": 446.405, |
|
"eval_steps_per_second": 27.913, |
|
"step": 84409 |
|
} |
|
], |
|
"max_steps": 97395, |
|
"num_train_epochs": 15, |
|
"total_flos": 3.5286877927060275e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|