|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 5.0, |
|
"eval_steps": 2000, |
|
"global_step": 12575, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.801192842942346e-05, |
|
"loss": 0.2793, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.602385685884692e-05, |
|
"loss": 0.167, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 4.403578528827038e-05, |
|
"loss": 0.1142, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 4.204771371769384e-05, |
|
"loss": 0.1172, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_f1": 0.8002317832826308, |
|
"eval_loss": 0.6091572739421863, |
|
"eval_runtime": 11.3587, |
|
"eval_samples_per_second": 7.571, |
|
"eval_steps_per_second": 7.571, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.00596421471173e-05, |
|
"loss": 0.1046, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 3.8071570576540756e-05, |
|
"loss": 0.1103, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 3.6083499005964215e-05, |
|
"loss": 0.1216, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 3.409542743538768e-05, |
|
"loss": 0.1201, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"eval_f1": 0.797415004214667, |
|
"eval_loss": 0.570012144717545, |
|
"eval_runtime": 11.2237, |
|
"eval_samples_per_second": 7.662, |
|
"eval_steps_per_second": 7.662, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.210735586481113e-05, |
|
"loss": 0.1239, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.0119284294234595e-05, |
|
"loss": 0.1259, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 2.813121272365805e-05, |
|
"loss": 0.1254, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.6143141153081513e-05, |
|
"loss": 0.1738, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"eval_f1": 0.785323775968898, |
|
"eval_loss": 1.3082878860688905, |
|
"eval_runtime": 11.1911, |
|
"eval_samples_per_second": 7.685, |
|
"eval_steps_per_second": 7.685, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 2.415506958250497e-05, |
|
"loss": 0.1656, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 2.216699801192843e-05, |
|
"loss": 0.1316, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 2.017892644135189e-05, |
|
"loss": 0.1284, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 1.8190854870775348e-05, |
|
"loss": 0.1431, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"eval_f1": 0.7811158798283263, |
|
"eval_loss": 0.6317664683518408, |
|
"eval_runtime": 11.2067, |
|
"eval_samples_per_second": 7.674, |
|
"eval_steps_per_second": 7.674, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 1.6202783300198807e-05, |
|
"loss": 0.1353, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.4214711729622268e-05, |
|
"loss": 0.1013, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 3.78, |
|
"learning_rate": 1.2226640159045727e-05, |
|
"loss": 0.1174, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"learning_rate": 1.0238568588469186e-05, |
|
"loss": 0.1213, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.98, |
|
"eval_f1": 0.8029407524866656, |
|
"eval_loss": 0.4704339896264236, |
|
"eval_runtime": 11.4371, |
|
"eval_samples_per_second": 7.519, |
|
"eval_steps_per_second": 7.519, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 4.17, |
|
"learning_rate": 8.250497017892645e-06, |
|
"loss": 0.0851, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 4.37, |
|
"learning_rate": 6.2624254473161034e-06, |
|
"loss": 0.096, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 4.274353876739562e-06, |
|
"loss": 0.0727, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"learning_rate": 2.286282306163022e-06, |
|
"loss": 0.0764, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.77, |
|
"eval_f1": 0.8055235903337169, |
|
"eval_loss": 0.4711939039551899, |
|
"eval_runtime": 11.2703, |
|
"eval_samples_per_second": 7.631, |
|
"eval_steps_per_second": 7.631, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 2.982107355864811e-07, |
|
"loss": 0.09, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"step": 12575, |
|
"total_flos": 1.933020837593007e+16, |
|
"train_loss": 0.12557823169776508, |
|
"train_runtime": 2775.7723, |
|
"train_samples_per_second": 4.53, |
|
"train_steps_per_second": 4.53 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12575, |
|
"num_train_epochs": 5, |
|
"save_steps": 500, |
|
"total_flos": 1.933020837593007e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|