|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 15915, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 6.056573390960693, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 2.4245, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.744064211845398, |
|
"eval_runtime": 51.3994, |
|
"eval_samples_per_second": 17.393, |
|
"eval_steps_per_second": 1.09, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 16.25078773498535, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 2.0113, |
|
"step": 2122 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.7332948446273804, |
|
"eval_runtime": 51.7885, |
|
"eval_samples_per_second": 17.263, |
|
"eval_steps_per_second": 1.081, |
|
"step": 2122 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 6.307107925415039, |
|
"learning_rate": 4e-05, |
|
"loss": 1.869, |
|
"step": 3183 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.7425885200500488, |
|
"eval_runtime": 51.851, |
|
"eval_samples_per_second": 17.242, |
|
"eval_steps_per_second": 1.08, |
|
"step": 3183 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 5.612180233001709, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 1.7756, |
|
"step": 4244 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.7457906007766724, |
|
"eval_runtime": 51.5711, |
|
"eval_samples_per_second": 17.335, |
|
"eval_steps_per_second": 1.086, |
|
"step": 4244 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"grad_norm": 5.046538352966309, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.7034, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.7198069095611572, |
|
"eval_runtime": 52.1824, |
|
"eval_samples_per_second": 17.132, |
|
"eval_steps_per_second": 1.073, |
|
"step": 5305 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 4.726966381072998, |
|
"learning_rate": 3e-05, |
|
"loss": 1.6286, |
|
"step": 6366 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.7528493404388428, |
|
"eval_runtime": 52.0378, |
|
"eval_samples_per_second": 17.18, |
|
"eval_steps_per_second": 1.076, |
|
"step": 6366 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"grad_norm": 4.87706184387207, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.5654, |
|
"step": 7427 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.7497186660766602, |
|
"eval_runtime": 52.0226, |
|
"eval_samples_per_second": 17.185, |
|
"eval_steps_per_second": 1.076, |
|
"step": 7427 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 4.516915798187256, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 1.5124, |
|
"step": 8488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.7384775876998901, |
|
"eval_runtime": 52.0172, |
|
"eval_samples_per_second": 17.187, |
|
"eval_steps_per_second": 1.077, |
|
"step": 8488 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"grad_norm": 4.607875347137451, |
|
"learning_rate": 2e-05, |
|
"loss": 1.4688, |
|
"step": 9549 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.6982604265213013, |
|
"eval_runtime": 51.9662, |
|
"eval_samples_per_second": 17.203, |
|
"eval_steps_per_second": 1.078, |
|
"step": 9549 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 4.221675395965576, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.4296, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.7344964742660522, |
|
"eval_runtime": 51.2487, |
|
"eval_samples_per_second": 17.444, |
|
"eval_steps_per_second": 1.093, |
|
"step": 10610 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"grad_norm": 4.773486137390137, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 1.3963, |
|
"step": 11671 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.7325119972229004, |
|
"eval_runtime": 51.5417, |
|
"eval_samples_per_second": 17.345, |
|
"eval_steps_per_second": 1.086, |
|
"step": 11671 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"grad_norm": 4.954598426818848, |
|
"learning_rate": 1e-05, |
|
"loss": 1.3703, |
|
"step": 12732 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.7416211366653442, |
|
"eval_runtime": 51.7473, |
|
"eval_samples_per_second": 17.276, |
|
"eval_steps_per_second": 1.082, |
|
"step": 12732 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"grad_norm": 4.158045768737793, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.3447, |
|
"step": 13793 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.7334271669387817, |
|
"eval_runtime": 51.5979, |
|
"eval_samples_per_second": 17.326, |
|
"eval_steps_per_second": 1.085, |
|
"step": 13793 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"grad_norm": 6.926255226135254, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.3276, |
|
"step": 14854 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.7397375106811523, |
|
"eval_runtime": 51.6734, |
|
"eval_samples_per_second": 17.301, |
|
"eval_steps_per_second": 1.084, |
|
"step": 14854 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 15915, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.970105106825216e+17, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|