AlienKevin's picture
Upload 113 files
a54c5b6
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.38198354265916,
"global_step": 11000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04,
"learning_rate": 5e-06,
"loss": 1.7639,
"step": 200
},
{
"epoch": 0.09,
"learning_rate": 1e-05,
"loss": 0.4886,
"step": 400
},
{
"epoch": 0.13,
"learning_rate": 1.5e-05,
"loss": 0.2481,
"step": 600
},
{
"epoch": 0.17,
"learning_rate": 2e-05,
"loss": 0.1724,
"step": 800
},
{
"epoch": 0.22,
"learning_rate": 2.5e-05,
"loss": 0.1417,
"step": 1000
},
{
"epoch": 0.22,
"eval_bleu": 15.49859933982253,
"eval_chrf": 14.312711083470898,
"eval_loss": 0.37249916791915894,
"eval_runtime": 140.7211,
"eval_samples_per_second": 4.548,
"eval_steps_per_second": 0.071,
"step": 1000
},
{
"epoch": 0.26,
"learning_rate": 3e-05,
"loss": 0.1238,
"step": 1200
},
{
"epoch": 0.3,
"learning_rate": 3.5e-05,
"loss": 0.1094,
"step": 1400
},
{
"epoch": 0.35,
"learning_rate": 4e-05,
"loss": 0.1004,
"step": 1600
},
{
"epoch": 0.39,
"learning_rate": 4.5e-05,
"loss": 0.0889,
"step": 1800
},
{
"epoch": 0.43,
"learning_rate": 5e-05,
"loss": 0.0843,
"step": 2000
},
{
"epoch": 0.43,
"eval_bleu": 13.930224375278131,
"eval_chrf": 15.927172187426772,
"eval_loss": 0.3602243661880493,
"eval_runtime": 171.4695,
"eval_samples_per_second": 3.732,
"eval_steps_per_second": 0.058,
"step": 2000
},
{
"epoch": 0.48,
"learning_rate": 4.952584163110479e-05,
"loss": 0.0795,
"step": 2200
},
{
"epoch": 0.52,
"learning_rate": 4.905168326220958e-05,
"loss": 0.0748,
"step": 2400
},
{
"epoch": 0.56,
"learning_rate": 4.8577524893314366e-05,
"loss": 0.0705,
"step": 2600
},
{
"epoch": 0.61,
"learning_rate": 4.8103366524419156e-05,
"loss": 0.0665,
"step": 2800
},
{
"epoch": 0.65,
"learning_rate": 4.7629208155523946e-05,
"loss": 0.0625,
"step": 3000
},
{
"epoch": 0.65,
"eval_bleu": 20.903227705509728,
"eval_chrf": 19.94358238391718,
"eval_loss": 0.363214910030365,
"eval_runtime": 179.3923,
"eval_samples_per_second": 3.568,
"eval_steps_per_second": 0.056,
"step": 3000
},
{
"epoch": 0.69,
"learning_rate": 4.7155049786628736e-05,
"loss": 0.0607,
"step": 3200
},
{
"epoch": 0.74,
"learning_rate": 4.6680891417733527e-05,
"loss": 0.0584,
"step": 3400
},
{
"epoch": 0.78,
"learning_rate": 4.620673304883831e-05,
"loss": 0.0578,
"step": 3600
},
{
"epoch": 0.82,
"learning_rate": 4.57325746799431e-05,
"loss": 0.0549,
"step": 3800
},
{
"epoch": 0.87,
"learning_rate": 4.525841631104789e-05,
"loss": 0.0531,
"step": 4000
},
{
"epoch": 0.87,
"eval_bleu": 17.338938784234383,
"eval_chrf": 15.930568457831859,
"eval_loss": 0.36546987295150757,
"eval_runtime": 170.3063,
"eval_samples_per_second": 3.758,
"eval_steps_per_second": 0.059,
"step": 4000
},
{
"epoch": 0.91,
"learning_rate": 4.478425794215268e-05,
"loss": 0.0521,
"step": 4200
},
{
"epoch": 0.95,
"learning_rate": 4.431009957325747e-05,
"loss": 0.0513,
"step": 4400
},
{
"epoch": 1.0,
"learning_rate": 4.383594120436226e-05,
"loss": 0.05,
"step": 4600
},
{
"epoch": 1.04,
"learning_rate": 4.3361782835467044e-05,
"loss": 0.0462,
"step": 4800
},
{
"epoch": 1.08,
"learning_rate": 4.2887624466571834e-05,
"loss": 0.0442,
"step": 5000
},
{
"epoch": 1.08,
"eval_bleu": 35.5303321748609,
"eval_chrf": 30.398609275779588,
"eval_loss": 0.3768843710422516,
"eval_runtime": 147.673,
"eval_samples_per_second": 4.334,
"eval_steps_per_second": 0.068,
"step": 5000
},
{
"epoch": 1.13,
"learning_rate": 4.2413466097676624e-05,
"loss": 0.0437,
"step": 5200
},
{
"epoch": 1.17,
"learning_rate": 4.1939307728781414e-05,
"loss": 0.0436,
"step": 5400
},
{
"epoch": 1.21,
"learning_rate": 4.1465149359886204e-05,
"loss": 0.0428,
"step": 5600
},
{
"epoch": 1.26,
"learning_rate": 4.099099099099099e-05,
"loss": 0.0418,
"step": 5800
},
{
"epoch": 1.3,
"learning_rate": 4.051683262209578e-05,
"loss": 0.0408,
"step": 6000
},
{
"epoch": 1.3,
"eval_bleu": 40.96986293672358,
"eval_chrf": 35.0063576863817,
"eval_loss": 0.38005733489990234,
"eval_runtime": 82.4647,
"eval_samples_per_second": 7.761,
"eval_steps_per_second": 0.121,
"step": 6000
},
{
"epoch": 1.34,
"learning_rate": 4.004267425320057e-05,
"loss": 0.041,
"step": 6200
},
{
"epoch": 1.39,
"learning_rate": 3.956851588430536e-05,
"loss": 0.0385,
"step": 6400
},
{
"epoch": 1.43,
"learning_rate": 3.909435751541015e-05,
"loss": 0.0397,
"step": 6600
},
{
"epoch": 1.47,
"learning_rate": 3.862019914651494e-05,
"loss": 0.0384,
"step": 6800
},
{
"epoch": 1.52,
"learning_rate": 3.814604077761973e-05,
"loss": 0.0389,
"step": 7000
},
{
"epoch": 1.52,
"eval_bleu": 41.51574989819788,
"eval_chrf": 35.55197531009423,
"eval_loss": 0.38628411293029785,
"eval_runtime": 89.9685,
"eval_samples_per_second": 7.114,
"eval_steps_per_second": 0.111,
"step": 7000
},
{
"epoch": 1.56,
"learning_rate": 3.767188240872452e-05,
"loss": 0.038,
"step": 7200
},
{
"epoch": 1.6,
"learning_rate": 3.719772403982931e-05,
"loss": 0.0374,
"step": 7400
},
{
"epoch": 1.65,
"learning_rate": 3.67235656709341e-05,
"loss": 0.0359,
"step": 7600
},
{
"epoch": 1.69,
"learning_rate": 3.624940730203888e-05,
"loss": 0.0358,
"step": 7800
},
{
"epoch": 1.73,
"learning_rate": 3.577524893314367e-05,
"loss": 0.0359,
"step": 8000
},
{
"epoch": 1.73,
"eval_bleu": 23.208736406312035,
"eval_chrf": 23.97795821953749,
"eval_loss": 0.3921656310558319,
"eval_runtime": 182.5523,
"eval_samples_per_second": 3.506,
"eval_steps_per_second": 0.055,
"step": 8000
},
{
"epoch": 1.78,
"learning_rate": 3.530109056424846e-05,
"loss": 0.0348,
"step": 8200
},
{
"epoch": 1.82,
"learning_rate": 3.482693219535325e-05,
"loss": 0.0352,
"step": 8400
},
{
"epoch": 1.86,
"learning_rate": 3.435277382645804e-05,
"loss": 0.0351,
"step": 8600
},
{
"epoch": 1.91,
"learning_rate": 3.3878615457562826e-05,
"loss": 0.0345,
"step": 8800
},
{
"epoch": 1.95,
"learning_rate": 3.3404457088667616e-05,
"loss": 0.0337,
"step": 9000
},
{
"epoch": 1.95,
"eval_bleu": 41.547921684162176,
"eval_chrf": 35.46471050376956,
"eval_loss": 0.40451329946517944,
"eval_runtime": 89.4039,
"eval_samples_per_second": 7.159,
"eval_steps_per_second": 0.112,
"step": 9000
},
{
"epoch": 1.99,
"learning_rate": 3.2930298719772407e-05,
"loss": 0.0343,
"step": 9200
},
{
"epoch": 2.04,
"learning_rate": 3.24561403508772e-05,
"loss": 0.0309,
"step": 9400
},
{
"epoch": 2.08,
"learning_rate": 3.198198198198199e-05,
"loss": 0.0296,
"step": 9600
},
{
"epoch": 2.12,
"learning_rate": 3.150782361308677e-05,
"loss": 0.0291,
"step": 9800
},
{
"epoch": 2.17,
"learning_rate": 3.103366524419156e-05,
"loss": 0.0295,
"step": 10000
},
{
"epoch": 2.17,
"eval_bleu": 41.51485442459467,
"eval_chrf": 35.46553158852993,
"eval_loss": 0.4056099057197571,
"eval_runtime": 89.2092,
"eval_samples_per_second": 7.174,
"eval_steps_per_second": 0.112,
"step": 10000
},
{
"epoch": 2.21,
"learning_rate": 3.055950687529635e-05,
"loss": 0.0285,
"step": 10200
},
{
"epoch": 2.25,
"learning_rate": 3.008534850640114e-05,
"loss": 0.0293,
"step": 10400
},
{
"epoch": 2.3,
"learning_rate": 2.9611190137505927e-05,
"loss": 0.0285,
"step": 10600
},
{
"epoch": 2.34,
"learning_rate": 2.9137031768610717e-05,
"loss": 0.0288,
"step": 10800
},
{
"epoch": 2.38,
"learning_rate": 2.8662873399715508e-05,
"loss": 0.0279,
"step": 11000
},
{
"epoch": 2.38,
"eval_bleu": 41.75820932324433,
"eval_chrf": 35.69247581900476,
"eval_loss": 0.41462868452072144,
"eval_runtime": 152.578,
"eval_samples_per_second": 4.195,
"eval_steps_per_second": 0.066,
"step": 11000
}
],
"max_steps": 23090,
"num_train_epochs": 5,
"total_flos": 2.682581278261248e+16,
"trial_name": null,
"trial_params": null
}