|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.190901512444921, |
|
"global_step": 50000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.9999999999999997e-06, |
|
"loss": 1.0412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.999999999999999e-06, |
|
"loss": 0.835, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.999999999999999e-06, |
|
"loss": 0.7822, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.1999999999999999e-05, |
|
"loss": 0.7718, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.4999999999999999e-05, |
|
"loss": 0.7707, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 1.7999999999999997e-05, |
|
"loss": 0.7697, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.1e-05, |
|
"loss": 0.769, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.3999999999999997e-05, |
|
"loss": 0.7682, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.6999999999999996e-05, |
|
"loss": 0.7674, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 2.9999999999999997e-05, |
|
"loss": 0.767, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"eval_runtime": 45.7675, |
|
"eval_samples_per_second": 235.975, |
|
"eval_steps_per_second": 7.385, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.2999999999999996e-05, |
|
"loss": 0.7665, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.5999999999999994e-05, |
|
"loss": 0.7662, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 3.9e-05, |
|
"loss": 0.7661, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 4.2e-05, |
|
"loss": 0.766, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.4999999999999996e-05, |
|
"loss": 0.7659, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 4.7999999999999994e-05, |
|
"loss": 0.7656, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.1e-05, |
|
"loss": 0.7655, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5.399999999999999e-05, |
|
"loss": 0.7655, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.6999999999999996e-05, |
|
"loss": 0.7653, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 5.9999999999999995e-05, |
|
"loss": 0.7655, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_runtime": 45.5917, |
|
"eval_samples_per_second": 236.885, |
|
"eval_steps_per_second": 7.414, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.299999999999999e-05, |
|
"loss": 0.7651, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.599999999999999e-05, |
|
"loss": 0.7653, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 6.9e-05, |
|
"loss": 0.7654, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.199999999999999e-05, |
|
"loss": 0.765, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.7649, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 7.8e-05, |
|
"loss": 0.7648, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 8.1e-05, |
|
"loss": 0.7647, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.4e-05, |
|
"loss": 0.7645, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.699999999999999e-05, |
|
"loss": 0.7645, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 8.999999999999999e-05, |
|
"loss": 0.7644, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_runtime": 45.7281, |
|
"eval_samples_per_second": 236.179, |
|
"eval_steps_per_second": 7.392, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 9.3e-05, |
|
"loss": 0.7641, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.599999999999999e-05, |
|
"loss": 0.764, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 9.9e-05, |
|
"loss": 0.7638, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000102, |
|
"loss": 0.763, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00010499999999999999, |
|
"loss": 0.7665, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00010799999999999998, |
|
"loss": 0.7669, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011099999999999999, |
|
"loss": 0.7653, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00011399999999999999, |
|
"loss": 0.7535, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000117, |
|
"loss": 0.7218, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00011999999999999999, |
|
"loss": 0.6956, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"eval_runtime": 45.9121, |
|
"eval_samples_per_second": 235.232, |
|
"eval_steps_per_second": 7.362, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00012299999999999998, |
|
"loss": 0.6758, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00012599999999999997, |
|
"loss": 0.6557, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000129, |
|
"loss": 0.6402, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00013199999999999998, |
|
"loss": 0.6302, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000135, |
|
"loss": 0.623, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000138, |
|
"loss": 0.6169, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014099999999999998, |
|
"loss": 0.6121, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00014399999999999998, |
|
"loss": 0.607, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.000147, |
|
"loss": 0.6039, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00015, |
|
"loss": 0.6012, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"eval_runtime": 46.0979, |
|
"eval_samples_per_second": 234.284, |
|
"eval_steps_per_second": 7.332, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0001499996172456075, |
|
"loss": 0.5981, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00014999846898661572, |
|
"loss": 0.5954, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00014999655523558183, |
|
"loss": 0.5935, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00014999387601343436, |
|
"loss": 0.5911, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00014999043134947282, |
|
"loss": 0.5895, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00014998622128136748, |
|
"loss": 0.5877, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.000149981245855159, |
|
"loss": 0.5866, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014997550512525784, |
|
"loss": 0.5845, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001499689991544437, |
|
"loss": 0.5784, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00014996172801386482, |
|
"loss": 0.5684, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_runtime": 46.0154, |
|
"eval_samples_per_second": 234.704, |
|
"eval_steps_per_second": 7.345, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00014995369178303722, |
|
"loss": 0.5642, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001499448905498439, |
|
"loss": 0.5625, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00014993532441053364, |
|
"loss": 0.5601, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001499249934697203, |
|
"loss": 0.5581, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001499138978403813, |
|
"loss": 0.554, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00014990203764385677, |
|
"loss": 0.5462, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00014988941300984784, |
|
"loss": 0.5284, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001498760240764155, |
|
"loss": 0.5032, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000149861870989979, |
|
"loss": 0.4751, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0001498469539053142, |
|
"loss": 0.4574, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"eval_runtime": 45.9402, |
|
"eval_samples_per_second": 235.088, |
|
"eval_steps_per_second": 7.357, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00014983127298555198, |
|
"loss": 0.4453, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00014981482840217632, |
|
"loss": 0.437, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00014979762033502262, |
|
"loss": 0.4306, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00014977964897227547, |
|
"loss": 0.4254, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00014976091451046687, |
|
"loss": 0.4204, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00014974141715447386, |
|
"loss": 0.4178, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00014972115711751644, |
|
"loss": 0.4135, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00014970013462115505, |
|
"loss": 0.4099, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00014967834989528843, |
|
"loss": 0.4077, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00014965580317815078, |
|
"loss": 0.405, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_runtime": 45.7648, |
|
"eval_samples_per_second": 235.989, |
|
"eval_steps_per_second": 7.386, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00014963249471630944, |
|
"loss": 0.4017, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000149608424764662, |
|
"loss": 0.4006, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001495835935864336, |
|
"loss": 0.3977, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00014955800145317397, |
|
"loss": 0.3964, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00014953164864475466, |
|
"loss": 0.3949, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001495045354493657, |
|
"loss": 0.3961, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00014947666216351272, |
|
"loss": 0.398, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00014944802909201344, |
|
"loss": 0.3924, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00014941863654799456, |
|
"loss": 0.3938, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00014938848485288825, |
|
"loss": 0.3885, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_runtime": 45.9868, |
|
"eval_samples_per_second": 234.85, |
|
"eval_steps_per_second": 7.35, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001493575743364286, |
|
"loss": 0.391, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00014932590533664808, |
|
"loss": 0.3884, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001492934781998738, |
|
"loss": 0.3856, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0001492602932807237, |
|
"loss": 0.3843, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00014922635094210277, |
|
"loss": 0.3848, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.000149191651555199, |
|
"loss": 0.3795, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001491561954994793, |
|
"loss": 0.3735, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00014911998316268537, |
|
"loss": 0.3658, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014908301494082963, |
|
"loss": 0.362, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014904529123819054, |
|
"loss": 0.3595, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_runtime": 46.3224, |
|
"eval_samples_per_second": 233.148, |
|
"eval_steps_per_second": 7.297, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014900681246730852, |
|
"loss": 0.3585, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00014896757904898125, |
|
"loss": 0.3578, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014892759141225904, |
|
"loss": 0.3568, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014888684999444035, |
|
"loss": 0.355, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014884535524106675, |
|
"loss": 0.3537, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00014880310760591824, |
|
"loss": 0.3523, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0001487601075510082, |
|
"loss": 0.3524, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001487163555465783, |
|
"loss": 0.3515, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001486718520710935, |
|
"loss": 0.3508, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00014862659761123663, |
|
"loss": 0.3493, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_runtime": 46.1625, |
|
"eval_samples_per_second": 233.956, |
|
"eval_steps_per_second": 7.322, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00014858059266190327, |
|
"loss": 0.3472, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014853383772619612, |
|
"loss": 0.3463, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014848633331541967, |
|
"loss": 0.3363, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0001484380799490746, |
|
"loss": 0.3265, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00014838907815485194, |
|
"loss": 0.3235, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014833932846862748, |
|
"loss": 0.3218, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00014828883143445582, |
|
"loss": 0.3203, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001482375876045644, |
|
"loss": 0.3204, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001481855975393476, |
|
"loss": 0.3184, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001481328618073604, |
|
"loss": 0.318, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_runtime": 46.1354, |
|
"eval_samples_per_second": 234.094, |
|
"eval_steps_per_second": 7.326, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001480793809853123, |
|
"loss": 0.3163, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014802515565806107, |
|
"loss": 0.3155, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00014797018641860612, |
|
"loss": 0.314, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001479144738680823, |
|
"loss": 0.3136, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014785801861575312, |
|
"loss": 0.3117, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014780082127900416, |
|
"loss": 0.3086, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014774288248333635, |
|
"loss": 0.3074, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00014768420286235908, |
|
"loss": 0.3074, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014762478305778328, |
|
"loss": 0.3064, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001475646237194144, |
|
"loss": 0.3057, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"eval_runtime": 46.1242, |
|
"eval_samples_per_second": 234.15, |
|
"eval_steps_per_second": 7.328, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00014750372550514533, |
|
"loss": 0.3048, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001474420890809492, |
|
"loss": 0.3037, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00014737971512087202, |
|
"loss": 0.3029, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00014731660430702552, |
|
"loss": 0.3024, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00014725275732957937, |
|
"loss": 0.3011, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00014718817488675387, |
|
"loss": 0.3006, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00014712285768481235, |
|
"loss": 0.3009, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00014705680643805323, |
|
"loss": 0.2991, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00014699002186880232, |
|
"loss": 0.2991, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00014692250470740503, |
|
"loss": 0.2979, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_runtime": 46.2531, |
|
"eval_samples_per_second": 233.498, |
|
"eval_steps_per_second": 7.308, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00014685425569221819, |
|
"loss": 0.2975, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00014678527556960207, |
|
"loss": 0.2955, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0001467155650939123, |
|
"loss": 0.295, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00014664512502749141, |
|
"loss": 0.2941, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014657395614066075, |
|
"loss": 0.2931, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001465020592117118, |
|
"loss": 0.2921, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001464294350268979, |
|
"loss": 0.2918, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014635608438042546, |
|
"loss": 0.2907, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00014628200807444543, |
|
"loss": 0.2899, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0001462072069190444, |
|
"loss": 0.2898, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_runtime": 46.2774, |
|
"eval_samples_per_second": 233.375, |
|
"eval_steps_per_second": 7.304, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014613168173223585, |
|
"loss": 0.2885, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014605543333995113, |
|
"loss": 0.288, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00014597846257603038, |
|
"loss": 0.2875, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001459007702822136, |
|
"loss": 0.2876, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014582235730813128, |
|
"loss": 0.2862, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014574322451129507, |
|
"loss": 0.2849, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00014566337275708863, |
|
"loss": 0.2852, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001455828029187579, |
|
"loss": 0.2833, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00014550151587740178, |
|
"loss": 0.2836, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00014541951252196225, |
|
"loss": 0.2817, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"eval_runtime": 46.1169, |
|
"eval_samples_per_second": 234.187, |
|
"eval_steps_per_second": 7.329, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00014533679374921493, |
|
"loss": 0.2824, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00014525336046375905, |
|
"loss": 0.2817, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00014516921357800766, |
|
"loss": 0.2812, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00014508435401217759, |
|
"loss": 0.2812, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00014499878269427948, |
|
"loss": 0.2795, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014491250056010758, |
|
"loss": 0.2788, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014482550855322943, |
|
"loss": 0.2775, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001447378076249757, |
|
"loss": 0.2773, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014464939873442973, |
|
"loss": 0.2769, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00014456028284841693, |
|
"loss": 0.2765, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_runtime": 46.3516, |
|
"eval_samples_per_second": 233.002, |
|
"eval_steps_per_second": 7.292, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014447046094149437, |
|
"loss": 0.2752, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014437993399594003, |
|
"loss": 0.2765, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001442887030017421, |
|
"loss": 0.2752, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00014419676895658807, |
|
"loss": 0.2748, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.000144104132865854, |
|
"loss": 0.2739, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001440107957425933, |
|
"loss": 0.2729, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001439167586075258, |
|
"loss": 0.2722, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001438220224890265, |
|
"loss": 0.2725, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014372658842311449, |
|
"loss": 0.2726, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014363045745344137, |
|
"loss": 0.2715, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_runtime": 46.2247, |
|
"eval_samples_per_second": 233.641, |
|
"eval_steps_per_second": 7.312, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00014353363063128005, |
|
"loss": 0.2705, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001434361090155131, |
|
"loss": 0.2706, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00014333789367262136, |
|
"loss": 0.2701, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00014323898567667202, |
|
"loss": 0.2693, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00014313938610930712, |
|
"loss": 0.2693, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00014303909605973154, |
|
"loss": 0.2691, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001429381166247012, |
|
"loss": 0.2681, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00014283644890851103, |
|
"loss": 0.2672, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00014273409402298291, |
|
"loss": 0.2671, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00014263105308745343, |
|
"loss": 0.2676, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_runtime": 46.3331, |
|
"eval_samples_per_second": 233.095, |
|
"eval_steps_per_second": 7.295, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00014252732722876176, |
|
"loss": 0.2654, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001424229175812373, |
|
"loss": 0.2649, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00014231782528668717, |
|
"loss": 0.2647, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00014221205149438394, |
|
"loss": 0.2649, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001421055973610528, |
|
"loss": 0.264, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00014199846405085913, |
|
"loss": 0.2647, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00014189065273539564, |
|
"loss": 0.2635, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00014178216459366958, |
|
"loss": 0.2623, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00014167300081208988, |
|
"loss": 0.2627, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00014156316258445421, |
|
"loss": 0.2932, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_runtime": 46.169, |
|
"eval_samples_per_second": 233.923, |
|
"eval_steps_per_second": 7.321, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00014145265111193583, |
|
"loss": 0.2645, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00014134146760307043, |
|
"loss": 0.2625, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00014122961327374313, |
|
"loss": 0.2615, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001411170893471749, |
|
"loss": 0.2605, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00014100389705390938, |
|
"loss": 0.26, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001408900376317994, |
|
"loss": 0.2583, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001407755123259933, |
|
"loss": 0.258, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014066032238892152, |
|
"loss": 0.2569, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014054446908028272, |
|
"loss": 0.2568, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014042795366703018, |
|
"loss": 0.2563, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_runtime": 46.2726, |
|
"eval_samples_per_second": 233.4, |
|
"eval_steps_per_second": 7.305, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001403107774233577, |
|
"loss": 0.256, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00014019294163068597, |
|
"loss": 0.2548, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00014007444757764835, |
|
"loss": 0.2543, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001399552965600768, |
|
"loss": 0.2537, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001398354898809877, |
|
"loss": 0.2531, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001397150288505678, |
|
"loss": 0.2531, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00013959391478615959, |
|
"loss": 0.2526, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00013947214901224706, |
|
"loss": 0.2522, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001393497328604412, |
|
"loss": 0.2515, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00013922666766946545, |
|
"loss": 0.2513, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_runtime": 46.224, |
|
"eval_samples_per_second": 233.645, |
|
"eval_steps_per_second": 7.312, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00013910295478514106, |
|
"loss": 0.2504, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001389785955603722, |
|
"loss": 0.2503, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00013885359135513154, |
|
"loss": 0.2501, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000138727943536445, |
|
"loss": 0.2488, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013860165347837698, |
|
"loss": 0.2492, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013847472256201535, |
|
"loss": 0.2483, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013834715217545625, |
|
"loss": 0.248, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.000138218943713789, |
|
"loss": 0.2479, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001380900985790808, |
|
"loss": 0.2485, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013796061818036138, |
|
"loss": 0.2467, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_runtime": 46.1546, |
|
"eval_samples_per_second": 233.996, |
|
"eval_steps_per_second": 7.323, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013783050393360768, |
|
"loss": 0.2468, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001376997572617282, |
|
"loss": 0.2463, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013756837959454766, |
|
"loss": 0.2456, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001374363723687911, |
|
"loss": 0.2459, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013730373702806846, |
|
"loss": 0.2447, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013717047502285855, |
|
"loss": 0.245, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001370365878104933, |
|
"loss": 0.2446, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013690207685514185, |
|
"loss": 0.2442, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001367669436277944, |
|
"loss": 0.2439, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001366311896062463, |
|
"loss": 0.2438, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"eval_runtime": 46.5558, |
|
"eval_samples_per_second": 231.98, |
|
"eval_steps_per_second": 7.26, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00013649481627508181, |
|
"loss": 0.2436, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001363578251256578, |
|
"loss": 0.2429, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013622021765608754, |
|
"loss": 0.2424, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00013608199537122425, |
|
"loss": 0.242, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001359431597826447, |
|
"loss": 0.2422, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001358037124086327, |
|
"loss": 0.2418, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013566365477416233, |
|
"loss": 0.2407, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013552298841088144, |
|
"loss": 0.2416, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00013538171485709486, |
|
"loss": 0.2411, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013523983565774753, |
|
"loss": 0.2401, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_runtime": 46.0773, |
|
"eval_samples_per_second": 234.389, |
|
"eval_steps_per_second": 7.336, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013509735236440766, |
|
"loss": 0.2401, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013495426653524972, |
|
"loss": 0.2402, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013481057973503742, |
|
"loss": 0.24, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00013466629353510651, |
|
"loss": 0.239, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013452140951334787, |
|
"loss": 0.239, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013437592925418985, |
|
"loss": 0.2388, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013422985434858133, |
|
"loss": 0.238, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00013408318639397405, |
|
"loss": 0.2387, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013393592699430525, |
|
"loss": 0.2372, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013378807775998012, |
|
"loss": 0.2377, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_runtime": 46.2501, |
|
"eval_samples_per_second": 233.513, |
|
"eval_steps_per_second": 7.308, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013363964030785422, |
|
"loss": 0.2373, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00013349061626121578, |
|
"loss": 0.238, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00013334100724976783, |
|
"loss": 0.2367, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001331908149096106, |
|
"loss": 0.2367, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00013304004088322342, |
|
"loss": 0.2356, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00013288868681944692, |
|
"loss": 0.2365, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013273675437346487, |
|
"loss": 0.236, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013258424520678618, |
|
"loss": 0.2356, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013243116098722663, |
|
"loss": 0.2363, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00013227750338889077, |
|
"loss": 0.2345, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_runtime": 46.2738, |
|
"eval_samples_per_second": 233.394, |
|
"eval_steps_per_second": 7.304, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00013212327409215343, |
|
"loss": 0.2351, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001319684747836415, |
|
"loss": 0.2351, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001318131071562154, |
|
"loss": 0.2342, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00013165717290895067, |
|
"loss": 0.2338, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.0001315006737471192, |
|
"loss": 0.234, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001313436113821708, |
|
"loss": 0.233, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013118598753171425, |
|
"loss": 0.2331, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001310278039194988, |
|
"loss": 0.2329, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00013086906227539506, |
|
"loss": 0.2332, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013070976433537623, |
|
"loss": 0.2338, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_runtime": 46.2625, |
|
"eval_samples_per_second": 233.45, |
|
"eval_steps_per_second": 7.306, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013054991184149905, |
|
"loss": 0.2325, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013038950654188476, |
|
"loss": 0.2312, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00013022855019070005, |
|
"loss": 0.2323, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001300670445481378, |
|
"loss": 0.2319, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001299049913803978, |
|
"loss": 0.2324, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00012974239245966754, |
|
"loss": 0.2313, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001295792495641028, |
|
"loss": 0.2318, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00012941556447780813, |
|
"loss": 0.2309, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001292513389908174, |
|
"loss": 0.231, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.0001290865748990742, |
|
"loss": 0.2298, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_runtime": 46.1555, |
|
"eval_samples_per_second": 233.992, |
|
"eval_steps_per_second": 7.323, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00012892127400441228, |
|
"loss": 0.2302, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012875543811453576, |
|
"loss": 0.2305, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001285890690429993, |
|
"loss": 0.2293, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00012842216860918846, |
|
"loss": 0.2298, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.0001282547386382996, |
|
"loss": 0.2296, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001280867809613201, |
|
"loss": 0.2291, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001279182974150082, |
|
"loss": 0.2279, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012774928984187297, |
|
"loss": 0.2278, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00012757976009015413, |
|
"loss": 0.228, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.0001274097100138019, |
|
"loss": 0.2282, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"eval_runtime": 46.6895, |
|
"eval_samples_per_second": 231.315, |
|
"eval_steps_per_second": 7.239, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012723914147245663, |
|
"loss": 0.2276, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012706805633142863, |
|
"loss": 0.2276, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012689645646167755, |
|
"loss": 0.2281, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00012672434373979207, |
|
"loss": 0.2265, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012655172004796936, |
|
"loss": 0.2286, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012637858727399448, |
|
"loss": 0.227, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012620494731121966, |
|
"loss": 0.2267, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00012603080205854372, |
|
"loss": 0.2266, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00012585615342039126, |
|
"loss": 0.2258, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001256810033066918, |
|
"loss": 0.226, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"eval_runtime": 47.0689, |
|
"eval_samples_per_second": 229.451, |
|
"eval_steps_per_second": 7.181, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001255053536328589, |
|
"loss": 0.2257, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001253292063197693, |
|
"loss": 0.2256, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.0001251525632937418, |
|
"loss": 0.2257, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00012497542648651615, |
|
"loss": 0.2248, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00012479779783523216, |
|
"loss": 0.225, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00012461967928240828, |
|
"loss": 0.2246, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012444107277592047, |
|
"loss": 0.2247, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001242619802689809, |
|
"loss": 0.2246, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012408240372011647, |
|
"loss": 0.2238, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001239023450931476, |
|
"loss": 0.2243, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"eval_runtime": 47.1954, |
|
"eval_samples_per_second": 228.836, |
|
"eval_steps_per_second": 7.162, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00012372180635716656, |
|
"loss": 0.2235, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012354078948651604, |
|
"loss": 0.2239, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012335929646076758, |
|
"loss": 0.2231, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012317732926469976, |
|
"loss": 0.2225, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00012299488988827675, |
|
"loss": 0.2233, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001228119803266263, |
|
"loss": 0.223, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001226286025800181, |
|
"loss": 0.2229, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012244475865384177, |
|
"loss": 0.222, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00012226045055858505, |
|
"loss": 0.2217, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012207568030981174, |
|
"loss": 0.2222, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"eval_runtime": 47.0101, |
|
"eval_samples_per_second": 229.738, |
|
"eval_steps_per_second": 7.19, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012189044992813972, |
|
"loss": 0.2213, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001217047614392187, |
|
"loss": 0.2206, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00012151861687370828, |
|
"loss": 0.2221, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00012133201826725558, |
|
"loss": 0.2209, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0001211449676604731, |
|
"loss": 0.2211, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00012095746709891632, |
|
"loss": 0.2205, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00012076951863306127, |
|
"loss": 0.2203, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001205811243182823, |
|
"loss": 0.22, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00012039228621482949, |
|
"loss": 0.2192, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00012020300638780604, |
|
"loss": 0.219, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"eval_runtime": 47.0946, |
|
"eval_samples_per_second": 229.325, |
|
"eval_steps_per_second": 7.177, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00012001328690714582, |
|
"loss": 0.2194, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00011982312984759068, |
|
"loss": 0.2194, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011963253728866778, |
|
"loss": 0.2189, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011944151131466675, |
|
"loss": 0.219, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011925005401461709, |
|
"loss": 0.2184, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00011905816748226513, |
|
"loss": 0.2182, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011886585381605125, |
|
"loss": 0.2188, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011867311511908693, |
|
"loss": 0.2179, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011847995349913162, |
|
"loss": 0.218, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00011828637106856989, |
|
"loss": 0.2173, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_runtime": 46.7598, |
|
"eval_samples_per_second": 230.968, |
|
"eval_steps_per_second": 7.228, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011809236994438816, |
|
"loss": 0.2171, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011789795224815164, |
|
"loss": 0.2175, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011770312010598116, |
|
"loss": 0.2167, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00011750787564852973, |
|
"loss": 0.2167, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011731222101095955, |
|
"loss": 0.2171, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011711615833291833, |
|
"loss": 0.2161, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.0001169196897585161, |
|
"loss": 0.2168, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00011672281743630175, |
|
"loss": 0.2162, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001165255435192394, |
|
"loss": 0.2152, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011632787016468506, |
|
"loss": 0.216, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_runtime": 47.0992, |
|
"eval_samples_per_second": 229.303, |
|
"eval_steps_per_second": 7.176, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001161297995343628, |
|
"loss": 0.2157, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011593133379434138, |
|
"loss": 0.215, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00011573247511501028, |
|
"loss": 0.2154, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011553322567105619, |
|
"loss": 0.2155, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011533358764143905, |
|
"loss": 0.2149, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011513356320936841, |
|
"loss": 0.2144, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00011493315456227943, |
|
"loss": 0.2147, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011473236389180894, |
|
"loss": 0.2145, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011453119339377154, |
|
"loss": 0.2146, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011432964526813558, |
|
"loss": 0.2145, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_runtime": 46.8321, |
|
"eval_samples_per_second": 230.611, |
|
"eval_steps_per_second": 7.217, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00011412772171899904, |
|
"loss": 0.2132, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011392542495456556, |
|
"loss": 0.2133, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011372275718712006, |
|
"loss": 0.2125, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011351972063300484, |
|
"loss": 0.2135, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00011331631751259515, |
|
"loss": 0.213, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011311255005027487, |
|
"loss": 0.2132, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011290842047441232, |
|
"loss": 0.2125, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011270393101733585, |
|
"loss": 0.2122, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00011249908391530946, |
|
"loss": 0.2113, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011229388140850814, |
|
"loss": 0.2119, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_runtime": 46.8036, |
|
"eval_samples_per_second": 230.751, |
|
"eval_steps_per_second": 7.222, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011208832574099368, |
|
"loss": 0.2113, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011188241916068993, |
|
"loss": 0.2111, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011167616391935826, |
|
"loss": 0.2111, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00011146956227257293, |
|
"loss": 0.2119, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00011126261647969645, |
|
"loss": 0.2115, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00011105532880385487, |
|
"loss": 0.2104, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00011084770151191299, |
|
"loss": 0.2107, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00011063973687444962, |
|
"loss": 0.2097, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00011043143716573272, |
|
"loss": 0.2107, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00011022280466369448, |
|
"loss": 0.2113, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_runtime": 47.0898, |
|
"eval_samples_per_second": 229.349, |
|
"eval_steps_per_second": 7.178, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00011001384164990662, |
|
"loss": 0.2099, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00010980455040955506, |
|
"loss": 0.21, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010959493323141538, |
|
"loss": 0.2091, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010938499240782739, |
|
"loss": 0.2098, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010917473023467032, |
|
"loss": 0.2096, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00010896414901133761, |
|
"loss": 0.2085, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010875325104071177, |
|
"loss": 0.2093, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010854203862913927, |
|
"loss": 0.2084, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010833051408640509, |
|
"loss": 0.2083, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00010811867972570786, |
|
"loss": 0.2084, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_runtime": 46.8854, |
|
"eval_samples_per_second": 230.349, |
|
"eval_steps_per_second": 7.209, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010790653786363416, |
|
"loss": 0.2082, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010769409082013337, |
|
"loss": 0.2081, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010748134091849238, |
|
"loss": 0.2077, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010726829048531, |
|
"loss": 0.2078, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00010705494185047165, |
|
"loss": 0.2077, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001068412973471238, |
|
"loss": 0.2073, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00010662735931164853, |
|
"loss": 0.2076, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001064131300836379, |
|
"loss": 0.2069, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001061986120058684, |
|
"loss": 0.2067, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00010598380742427543, |
|
"loss": 0.206, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"eval_runtime": 46.6481, |
|
"eval_samples_per_second": 231.521, |
|
"eval_steps_per_second": 7.246, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00010576871868792746, |
|
"loss": 0.206, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001055533481490004, |
|
"loss": 0.2058, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.000105337698162752, |
|
"loss": 0.206, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010512177108749594, |
|
"loss": 0.2057, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010490556928457616, |
|
"loss": 0.2039, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010468909511834088, |
|
"loss": 0.205, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00010447235095611692, |
|
"loss": 0.2045, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00010425533916818376, |
|
"loss": 0.2047, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00010403806212774747, |
|
"loss": 0.205, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.000103820522210915, |
|
"loss": 0.2042, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_runtime": 46.7967, |
|
"eval_samples_per_second": 230.786, |
|
"eval_steps_per_second": 7.223, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00010360272179666802, |
|
"loss": 0.204, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00010338466326683697, |
|
"loss": 0.2037, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00010316634900607497, |
|
"loss": 0.2033, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00010294778140183182, |
|
"loss": 0.2035, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00010272896284432785, |
|
"loss": 0.2037, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00010250989572652766, |
|
"loss": 0.2028, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00010229058244411427, |
|
"loss": 0.2019, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00010207102539546251, |
|
"loss": 0.2032, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00010185122698161311, |
|
"loss": 0.2026, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00010163118960624632, |
|
"loss": 0.2024, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_runtime": 46.9319, |
|
"eval_samples_per_second": 230.121, |
|
"eval_steps_per_second": 7.202, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00010141091567565561, |
|
"loss": 0.2028, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00010119040759872142, |
|
"loss": 0.2018, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00010096966778688472, |
|
"loss": 0.2016, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00010074869865412074, |
|
"loss": 0.2024, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00010052750261691254, |
|
"loss": 0.2017, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.0001003060820942245, |
|
"loss": 0.2015, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00010008443950747599, |
|
"loss": 0.2014, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 9.986257728051483e-05, |
|
"loss": 0.2014, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.964049783959082e-05, |
|
"loss": 0.2012, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.94182036133291e-05, |
|
"loss": 0.201, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"eval_runtime": 47.2136, |
|
"eval_samples_per_second": 228.748, |
|
"eval_steps_per_second": 7.159, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.919569703270376e-05, |
|
"loss": 0.1998, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 9.89729805310111e-05, |
|
"loss": 0.2004, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.875005654384307e-05, |
|
"loss": 0.2009, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.852692750906071e-05, |
|
"loss": 0.1999, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.830359586676737e-05, |
|
"loss": 0.1997, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.808006405928215e-05, |
|
"loss": 0.2006, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 9.785633453111306e-05, |
|
"loss": 0.1999, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.763240972893037e-05, |
|
"loss": 0.1992, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.740829210153984e-05, |
|
"loss": 0.1991, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.718398409985593e-05, |
|
"loss": 0.199, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_runtime": 46.9221, |
|
"eval_samples_per_second": 230.169, |
|
"eval_steps_per_second": 7.203, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 9.695948817687504e-05, |
|
"loss": 0.1987, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.673480678764858e-05, |
|
"loss": 0.1982, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.650994238925626e-05, |
|
"loss": 0.1989, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.628489744077911e-05, |
|
"loss": 0.1985, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 9.60596744032726e-05, |
|
"loss": 0.1981, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.583427573973982e-05, |
|
"loss": 0.1976, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.560870391510441e-05, |
|
"loss": 0.1981, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.538296139618371e-05, |
|
"loss": 0.1978, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 9.515705065166178e-05, |
|
"loss": 0.1977, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.493097415206228e-05, |
|
"loss": 0.1974, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"eval_runtime": 47.1161, |
|
"eval_samples_per_second": 229.221, |
|
"eval_steps_per_second": 7.174, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.47047343697216e-05, |
|
"loss": 0.1978, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.447833377876176e-05, |
|
"loss": 0.1974, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 9.425177485506336e-05, |
|
"loss": 0.1971, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.402506007623848e-05, |
|
"loss": 0.1968, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.379819192160362e-05, |
|
"loss": 0.1969, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.357117287215258e-05, |
|
"loss": 0.1966, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.334400541052928e-05, |
|
"loss": 0.1971, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 9.311669202100073e-05, |
|
"loss": 0.1962, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.288923518942968e-05, |
|
"loss": 0.1959, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.26616374032477e-05, |
|
"loss": 0.1964, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"eval_runtime": 46.7963, |
|
"eval_samples_per_second": 230.788, |
|
"eval_steps_per_second": 7.223, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.243390115142761e-05, |
|
"loss": 0.196, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 9.220602892445661e-05, |
|
"loss": 0.1955, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.197802321430889e-05, |
|
"loss": 0.1958, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.174988651441833e-05, |
|
"loss": 0.1951, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.152162131965137e-05, |
|
"loss": 0.1954, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 9.129323012627956e-05, |
|
"loss": 0.1948, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.106471543195244e-05, |
|
"loss": 0.1954, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.08360797356701e-05, |
|
"loss": 0.1953, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.060732553775582e-05, |
|
"loss": 0.1949, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 9.037845533982892e-05, |
|
"loss": 0.1947, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"eval_runtime": 46.9646, |
|
"eval_samples_per_second": 229.96, |
|
"eval_steps_per_second": 7.197, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 9.014947164477721e-05, |
|
"loss": 0.1946, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.992037695672967e-05, |
|
"loss": 0.1938, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.969117378102912e-05, |
|
"loss": 0.1946, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 8.946186462420478e-05, |
|
"loss": 0.1942, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.923245199394482e-05, |
|
"loss": 0.1934, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.900293839906903e-05, |
|
"loss": 0.194, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.87733263495013e-05, |
|
"loss": 0.1936, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.85436183562422e-05, |
|
"loss": 0.1933, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 8.83138169313416e-05, |
|
"loss": 0.1933, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.808392458787103e-05, |
|
"loss": 0.1931, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_runtime": 46.9712, |
|
"eval_samples_per_second": 229.928, |
|
"eval_steps_per_second": 7.196, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.78539438398963e-05, |
|
"loss": 0.1922, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.762387720245008e-05, |
|
"loss": 0.1922, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 8.73937271915042e-05, |
|
"loss": 0.1926, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.716349632394235e-05, |
|
"loss": 0.1924, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.69331871175324e-05, |
|
"loss": 0.1927, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.67028020908989e-05, |
|
"loss": 0.1924, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 8.647234376349565e-05, |
|
"loss": 0.1921, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.624181465557794e-05, |
|
"loss": 0.1914, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.601121728817519e-05, |
|
"loss": 0.1917, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 8.578055418306327e-05, |
|
"loss": 0.1918, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"eval_runtime": 47.0452, |
|
"eval_samples_per_second": 229.566, |
|
"eval_steps_per_second": 7.185, |
|
"step": 50000 |
|
} |
|
], |
|
"max_steps": 100000, |
|
"num_train_epochs": 3, |
|
"total_flos": 3.504974211922538e+21, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|