|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"global_step": 56, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5647, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 6e-06, |
|
"loss": 2.6377, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_accuracy": 0.3015994648178556, |
|
"eval_loss": 2.46875, |
|
"eval_runtime": 64.0845, |
|
"eval_samples_per_second": 4.915, |
|
"eval_steps_per_second": 0.624, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 6e-06, |
|
"loss": 2.5046, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_accuracy": 0.3096454418293499, |
|
"eval_loss": 2.384765625, |
|
"eval_runtime": 64.5855, |
|
"eval_samples_per_second": 4.877, |
|
"eval_steps_per_second": 0.619, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 6e-06, |
|
"loss": 2.4755, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_accuracy": 0.315568935109165, |
|
"eval_loss": 2.322265625, |
|
"eval_runtime": 64.83, |
|
"eval_samples_per_second": 4.859, |
|
"eval_steps_per_second": 0.617, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 6e-06, |
|
"loss": 2.459, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_accuracy": 0.32014839141275925, |
|
"eval_loss": 2.271484375, |
|
"eval_runtime": 64.7722, |
|
"eval_samples_per_second": 4.863, |
|
"eval_steps_per_second": 0.618, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3602, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"eval_accuracy": 0.3243203794927933, |
|
"eval_loss": 2.224609375, |
|
"eval_runtime": 63.7938, |
|
"eval_samples_per_second": 4.938, |
|
"eval_steps_per_second": 0.627, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3829, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_accuracy": 0.3275071458979505, |
|
"eval_loss": 2.189453125, |
|
"eval_runtime": 64.8117, |
|
"eval_samples_per_second": 4.86, |
|
"eval_steps_per_second": 0.617, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3188, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_accuracy": 0.331490603904397, |
|
"eval_loss": 2.146484375, |
|
"eval_runtime": 63.7849, |
|
"eval_samples_per_second": 4.938, |
|
"eval_steps_per_second": 0.627, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 6e-06, |
|
"loss": 2.2895, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"eval_accuracy": 0.3365140181232135, |
|
"eval_loss": 2.103515625, |
|
"eval_runtime": 64.7342, |
|
"eval_samples_per_second": 4.866, |
|
"eval_steps_per_second": 0.618, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 6e-06, |
|
"loss": 2.3062, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_accuracy": 0.341166453810132, |
|
"eval_loss": 2.05859375, |
|
"eval_runtime": 64.7609, |
|
"eval_samples_per_second": 4.864, |
|
"eval_steps_per_second": 0.618, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 56, |
|
"total_flos": 3701894676480.0, |
|
"train_loss": 2.4000658307756697, |
|
"train_runtime": 10898.1431, |
|
"train_samples_per_second": 0.041, |
|
"train_steps_per_second": 0.005 |
|
} |
|
], |
|
"max_steps": 56, |
|
"num_train_epochs": 1, |
|
"total_flos": 3701894676480.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|