|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.38198354265916, |
|
"global_step": 11000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 5e-06, |
|
"loss": 1.7639, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 1e-05, |
|
"loss": 0.4886, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 1.5e-05, |
|
"loss": 0.2481, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2e-05, |
|
"loss": 0.1724, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 2.5e-05, |
|
"loss": 0.1417, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_bleu": 15.49859933982253, |
|
"eval_chrf": 14.312711083470898, |
|
"eval_loss": 0.37249916791915894, |
|
"eval_runtime": 140.7211, |
|
"eval_samples_per_second": 4.548, |
|
"eval_steps_per_second": 0.071, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 3e-05, |
|
"loss": 0.1238, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 3.5e-05, |
|
"loss": 0.1094, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4e-05, |
|
"loss": 0.1004, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.0889, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0843, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_bleu": 13.930224375278131, |
|
"eval_chrf": 15.927172187426772, |
|
"eval_loss": 0.3602243661880493, |
|
"eval_runtime": 171.4695, |
|
"eval_samples_per_second": 3.732, |
|
"eval_steps_per_second": 0.058, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.952584163110479e-05, |
|
"loss": 0.0795, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.905168326220958e-05, |
|
"loss": 0.0748, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 4.8577524893314366e-05, |
|
"loss": 0.0705, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 4.8103366524419156e-05, |
|
"loss": 0.0665, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 4.7629208155523946e-05, |
|
"loss": 0.0625, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"eval_bleu": 20.903227705509728, |
|
"eval_chrf": 19.94358238391718, |
|
"eval_loss": 0.363214910030365, |
|
"eval_runtime": 179.3923, |
|
"eval_samples_per_second": 3.568, |
|
"eval_steps_per_second": 0.056, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 4.7155049786628736e-05, |
|
"loss": 0.0607, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.6680891417733527e-05, |
|
"loss": 0.0584, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 4.620673304883831e-05, |
|
"loss": 0.0578, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.57325746799431e-05, |
|
"loss": 0.0549, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 4.525841631104789e-05, |
|
"loss": 0.0531, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_bleu": 17.338938784234383, |
|
"eval_chrf": 15.930568457831859, |
|
"eval_loss": 0.36546987295150757, |
|
"eval_runtime": 170.3063, |
|
"eval_samples_per_second": 3.758, |
|
"eval_steps_per_second": 0.059, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.478425794215268e-05, |
|
"loss": 0.0521, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 4.431009957325747e-05, |
|
"loss": 0.0513, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 4.383594120436226e-05, |
|
"loss": 0.05, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.3361782835467044e-05, |
|
"loss": 0.0462, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 4.2887624466571834e-05, |
|
"loss": 0.0442, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"eval_bleu": 35.5303321748609, |
|
"eval_chrf": 30.398609275779588, |
|
"eval_loss": 0.3768843710422516, |
|
"eval_runtime": 147.673, |
|
"eval_samples_per_second": 4.334, |
|
"eval_steps_per_second": 0.068, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 4.2413466097676624e-05, |
|
"loss": 0.0437, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.1939307728781414e-05, |
|
"loss": 0.0436, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 4.1465149359886204e-05, |
|
"loss": 0.0428, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 4.099099099099099e-05, |
|
"loss": 0.0418, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.051683262209578e-05, |
|
"loss": 0.0408, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_bleu": 40.96986293672358, |
|
"eval_chrf": 35.0063576863817, |
|
"eval_loss": 0.38005733489990234, |
|
"eval_runtime": 82.4647, |
|
"eval_samples_per_second": 7.761, |
|
"eval_steps_per_second": 0.121, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 4.004267425320057e-05, |
|
"loss": 0.041, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.956851588430536e-05, |
|
"loss": 0.0385, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 3.909435751541015e-05, |
|
"loss": 0.0397, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.862019914651494e-05, |
|
"loss": 0.0384, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 3.814604077761973e-05, |
|
"loss": 0.0389, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"eval_bleu": 41.51574989819788, |
|
"eval_chrf": 35.55197531009423, |
|
"eval_loss": 0.38628411293029785, |
|
"eval_runtime": 89.9685, |
|
"eval_samples_per_second": 7.114, |
|
"eval_steps_per_second": 0.111, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.767188240872452e-05, |
|
"loss": 0.038, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 3.719772403982931e-05, |
|
"loss": 0.0374, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 3.67235656709341e-05, |
|
"loss": 0.0359, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.624940730203888e-05, |
|
"loss": 0.0358, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.577524893314367e-05, |
|
"loss": 0.0359, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_bleu": 23.208736406312035, |
|
"eval_chrf": 23.97795821953749, |
|
"eval_loss": 0.3921656310558319, |
|
"eval_runtime": 182.5523, |
|
"eval_samples_per_second": 3.506, |
|
"eval_steps_per_second": 0.055, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 3.530109056424846e-05, |
|
"loss": 0.0348, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.482693219535325e-05, |
|
"loss": 0.0352, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.435277382645804e-05, |
|
"loss": 0.0351, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 3.3878615457562826e-05, |
|
"loss": 0.0345, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.3404457088667616e-05, |
|
"loss": 0.0337, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"eval_bleu": 41.547921684162176, |
|
"eval_chrf": 35.46471050376956, |
|
"eval_loss": 0.40451329946517944, |
|
"eval_runtime": 89.4039, |
|
"eval_samples_per_second": 7.159, |
|
"eval_steps_per_second": 0.112, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.2930298719772407e-05, |
|
"loss": 0.0343, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 3.24561403508772e-05, |
|
"loss": 0.0309, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 3.198198198198199e-05, |
|
"loss": 0.0296, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 3.150782361308677e-05, |
|
"loss": 0.0291, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 3.103366524419156e-05, |
|
"loss": 0.0295, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"eval_bleu": 41.51485442459467, |
|
"eval_chrf": 35.46553158852993, |
|
"eval_loss": 0.4056099057197571, |
|
"eval_runtime": 89.2092, |
|
"eval_samples_per_second": 7.174, |
|
"eval_steps_per_second": 0.112, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 3.055950687529635e-05, |
|
"loss": 0.0285, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 3.008534850640114e-05, |
|
"loss": 0.0293, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 2.9611190137505927e-05, |
|
"loss": 0.0285, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.9137031768610717e-05, |
|
"loss": 0.0288, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 2.8662873399715508e-05, |
|
"loss": 0.0279, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"eval_bleu": 41.75820932324433, |
|
"eval_chrf": 35.69247581900476, |
|
"eval_loss": 0.41462868452072144, |
|
"eval_runtime": 152.578, |
|
"eval_samples_per_second": 4.195, |
|
"eval_steps_per_second": 0.066, |
|
"step": 11000 |
|
} |
|
], |
|
"max_steps": 23090, |
|
"num_train_epochs": 5, |
|
"total_flos": 2.682581278261248e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|