|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 59.80732177263969, |
|
"eval_steps": 1164, |
|
"global_step": 11640, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.980732177263969, |
|
"grad_norm": 1.6488584280014038, |
|
"learning_rate": 9.00171821305842e-06, |
|
"loss": 2.4506, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 5.980732177263969, |
|
"eval_accuracy": 0.07356671740233384, |
|
"eval_loss": 2.987065315246582, |
|
"eval_runtime": 32.1198, |
|
"eval_samples_per_second": 245.456, |
|
"eval_steps_per_second": 12.298, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 11.961464354527939, |
|
"grad_norm": 4.022498607635498, |
|
"learning_rate": 8.00257731958763e-06, |
|
"loss": 2.3229, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 11.961464354527939, |
|
"eval_accuracy": 0.07331303906646372, |
|
"eval_loss": 3.1384434700012207, |
|
"eval_runtime": 28.9812, |
|
"eval_samples_per_second": 272.038, |
|
"eval_steps_per_second": 13.63, |
|
"step": 2328 |
|
}, |
|
{ |
|
"epoch": 17.942196531791907, |
|
"grad_norm": 3.224060535430908, |
|
"learning_rate": 7.002577319587629e-06, |
|
"loss": 2.2364, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 17.942196531791907, |
|
"eval_accuracy": 0.07952815829528158, |
|
"eval_loss": 3.1919777393341064, |
|
"eval_runtime": 28.8853, |
|
"eval_samples_per_second": 272.942, |
|
"eval_steps_per_second": 13.675, |
|
"step": 3492 |
|
}, |
|
{ |
|
"epoch": 23.922928709055878, |
|
"grad_norm": 3.2662484645843506, |
|
"learning_rate": 6.003436426116839e-06, |
|
"loss": 2.1589, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 23.922928709055878, |
|
"eval_accuracy": 0.0824771689497717, |
|
"eval_loss": 3.2666664123535156, |
|
"eval_runtime": 28.8936, |
|
"eval_samples_per_second": 272.863, |
|
"eval_steps_per_second": 13.671, |
|
"step": 4656 |
|
}, |
|
{ |
|
"epoch": 29.903660886319845, |
|
"grad_norm": 4.750115394592285, |
|
"learning_rate": 5.003436426116839e-06, |
|
"loss": 2.1034, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 29.903660886319845, |
|
"eval_accuracy": 0.08351090816844242, |
|
"eval_loss": 3.2883238792419434, |
|
"eval_runtime": 28.8169, |
|
"eval_samples_per_second": 273.589, |
|
"eval_steps_per_second": 13.707, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 35.884393063583815, |
|
"grad_norm": 4.091363430023193, |
|
"learning_rate": 4.004295532646048e-06, |
|
"loss": 2.0555, |
|
"step": 6984 |
|
}, |
|
{ |
|
"epoch": 35.884393063583815, |
|
"eval_accuracy": 0.08365043125317098, |
|
"eval_loss": 3.351201295852661, |
|
"eval_runtime": 28.9642, |
|
"eval_samples_per_second": 272.198, |
|
"eval_steps_per_second": 13.638, |
|
"step": 6984 |
|
}, |
|
{ |
|
"epoch": 41.86512524084778, |
|
"grad_norm": 3.1545722484588623, |
|
"learning_rate": 3.004295532646048e-06, |
|
"loss": 2.0231, |
|
"step": 8148 |
|
}, |
|
{ |
|
"epoch": 41.86512524084778, |
|
"eval_accuracy": 0.08360513155033703, |
|
"eval_loss": 3.4304280281066895, |
|
"eval_runtime": 28.8426, |
|
"eval_samples_per_second": 273.345, |
|
"eval_steps_per_second": 13.695, |
|
"step": 8148 |
|
}, |
|
{ |
|
"epoch": 47.845857418111756, |
|
"grad_norm": 3.3551816940307617, |
|
"learning_rate": 2.005154639175258e-06, |
|
"loss": 2.0056, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 47.845857418111756, |
|
"eval_accuracy": 0.08306380010147134, |
|
"eval_loss": 3.553959369659424, |
|
"eval_runtime": 29.0501, |
|
"eval_samples_per_second": 271.393, |
|
"eval_steps_per_second": 13.597, |
|
"step": 9312 |
|
}, |
|
{ |
|
"epoch": 53.82658959537572, |
|
"grad_norm": 3.364650249481201, |
|
"learning_rate": 1.005154639175258e-06, |
|
"loss": 1.9964, |
|
"step": 10476 |
|
}, |
|
{ |
|
"epoch": 53.82658959537572, |
|
"eval_accuracy": 0.08203675517221941, |
|
"eval_loss": 3.589355230331421, |
|
"eval_runtime": 28.9839, |
|
"eval_samples_per_second": 272.014, |
|
"eval_steps_per_second": 13.628, |
|
"step": 10476 |
|
}, |
|
{ |
|
"epoch": 59.80732177263969, |
|
"grad_norm": 5.201988697052002, |
|
"learning_rate": 6.0137457044673545e-09, |
|
"loss": 1.9848, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 59.80732177263969, |
|
"eval_accuracy": 0.08131659056316591, |
|
"eval_loss": 3.626406192779541, |
|
"eval_runtime": 29.0612, |
|
"eval_samples_per_second": 271.29, |
|
"eval_steps_per_second": 13.592, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 59.80732177263969, |
|
"step": 11640, |
|
"total_flos": 1.472280663793582e+18, |
|
"train_loss": 2.133767731656733, |
|
"train_runtime": 32669.9946, |
|
"train_samples_per_second": 114.321, |
|
"train_steps_per_second": 0.356 |
|
} |
|
], |
|
"logging_steps": 1164, |
|
"max_steps": 11640, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 60, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.472280663793582e+18, |
|
"train_batch_size": 40, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|