|
{ |
|
"best_metric": 0.029607130214571953, |
|
"best_model_checkpoint": "model/checkpoint-6500", |
|
"epoch": 2.138157894736842, |
|
"eval_steps": 500, |
|
"global_step": 6500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.7944078947368425e-05, |
|
"loss": 0.1494, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_accuracy": 0.9837171052631579, |
|
"eval_loss": 0.0790267065167427, |
|
"eval_runtime": 113.0978, |
|
"eval_samples_per_second": 53.759, |
|
"eval_steps_per_second": 6.72, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.588815789473684e-05, |
|
"loss": 0.1072, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.9824013157894737, |
|
"eval_loss": 0.06455110013484955, |
|
"eval_runtime": 113.235, |
|
"eval_samples_per_second": 53.694, |
|
"eval_steps_per_second": 6.712, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.383223684210527e-05, |
|
"loss": 0.0765, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_accuracy": 0.9899671052631579, |
|
"eval_loss": 0.04877911135554314, |
|
"eval_runtime": 113.1524, |
|
"eval_samples_per_second": 53.733, |
|
"eval_steps_per_second": 6.717, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 4.177631578947369e-05, |
|
"loss": 0.0742, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_accuracy": 0.9886513157894737, |
|
"eval_loss": 0.054583221673965454, |
|
"eval_runtime": 113.2769, |
|
"eval_samples_per_second": 53.674, |
|
"eval_steps_per_second": 6.709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.972039473684211e-05, |
|
"loss": 0.0748, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"eval_accuracy": 0.990625, |
|
"eval_loss": 0.04383059963583946, |
|
"eval_runtime": 113.3147, |
|
"eval_samples_per_second": 53.656, |
|
"eval_steps_per_second": 6.707, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 3.7664473684210526e-05, |
|
"loss": 0.0437, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_accuracy": 0.9901315789473685, |
|
"eval_loss": 0.05416030064225197, |
|
"eval_runtime": 113.1123, |
|
"eval_samples_per_second": 53.752, |
|
"eval_steps_per_second": 6.719, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 3.560855263157895e-05, |
|
"loss": 0.0134, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"eval_accuracy": 0.990953947368421, |
|
"eval_loss": 0.05749928951263428, |
|
"eval_runtime": 113.4787, |
|
"eval_samples_per_second": 53.578, |
|
"eval_steps_per_second": 6.697, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.355263157894737e-05, |
|
"loss": 0.0277, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"eval_accuracy": 0.9904605263157895, |
|
"eval_loss": 0.06314379721879959, |
|
"eval_runtime": 113.4629, |
|
"eval_samples_per_second": 53.586, |
|
"eval_steps_per_second": 6.698, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.1496710526315794e-05, |
|
"loss": 0.0231, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_accuracy": 0.9912828947368421, |
|
"eval_loss": 0.04834901914000511, |
|
"eval_runtime": 113.5144, |
|
"eval_samples_per_second": 53.561, |
|
"eval_steps_per_second": 6.695, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 2.944078947368421e-05, |
|
"loss": 0.0243, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"eval_accuracy": 0.9901315789473685, |
|
"eval_loss": 0.06605446338653564, |
|
"eval_runtime": 113.6185, |
|
"eval_samples_per_second": 53.512, |
|
"eval_steps_per_second": 6.689, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 2.7384868421052633e-05, |
|
"loss": 0.0232, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"eval_accuracy": 0.9932565789473684, |
|
"eval_loss": 0.037462268024683, |
|
"eval_runtime": 112.9475, |
|
"eval_samples_per_second": 53.83, |
|
"eval_steps_per_second": 6.729, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.5328947368421052e-05, |
|
"loss": 0.0198, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_accuracy": 0.9945723684210527, |
|
"eval_loss": 0.034890029579401016, |
|
"eval_runtime": 112.9353, |
|
"eval_samples_per_second": 53.836, |
|
"eval_steps_per_second": 6.73, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 2.3273026315789475e-05, |
|
"loss": 0.0122, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"eval_accuracy": 0.9962171052631579, |
|
"eval_loss": 0.029607130214571953, |
|
"eval_runtime": 112.9987, |
|
"eval_samples_per_second": 53.806, |
|
"eval_steps_per_second": 6.726, |
|
"step": 6500 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 12160, |
|
"num_train_epochs": 4, |
|
"save_steps": 500, |
|
"total_flos": 1.368177487872e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|