thomnis's picture
Training in progress, step 3180
e4a7887 verified
raw
history blame
4.91 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 3180,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968553459119497,
"grad_norm": 0.26659590005874634,
"learning_rate": 0.00016775241359960757,
"loss": 0.1876,
"step": 317
},
{
"epoch": 1.0,
"eval_accuracy": 0.9,
"eval_loss": 0.04595751687884331,
"eval_runtime": 5.5039,
"eval_samples_per_second": 563.242,
"eval_steps_per_second": 11.81,
"step": 318
},
{
"epoch": 1.9937106918238994,
"grad_norm": 0.1902252733707428,
"learning_rate": 0.00015561522064766448,
"loss": 0.0427,
"step": 634
},
{
"epoch": 2.0,
"eval_accuracy": 0.9351612903225807,
"eval_loss": 0.030528072267770767,
"eval_runtime": 5.4415,
"eval_samples_per_second": 569.694,
"eval_steps_per_second": 11.945,
"step": 636
},
{
"epoch": 2.990566037735849,
"grad_norm": 0.21384550631046295,
"learning_rate": 0.00013670282173226981,
"loss": 0.0291,
"step": 951
},
{
"epoch": 3.0,
"eval_accuracy": 0.9364516129032258,
"eval_loss": 0.02449769712984562,
"eval_runtime": 5.4302,
"eval_samples_per_second": 570.878,
"eval_steps_per_second": 11.97,
"step": 954
},
{
"epoch": 3.9874213836477987,
"grad_norm": 0.10294859856367111,
"learning_rate": 0.00011285496445235415,
"loss": 0.023,
"step": 1268
},
{
"epoch": 4.0,
"eval_accuracy": 0.9435483870967742,
"eval_loss": 0.021032005548477173,
"eval_runtime": 5.5078,
"eval_samples_per_second": 562.834,
"eval_steps_per_second": 11.801,
"step": 1272
},
{
"epoch": 4.984276729559748,
"grad_norm": 0.06512714922428131,
"learning_rate": 8.639150460022785e-05,
"loss": 0.0201,
"step": 1585
},
{
"epoch": 5.0,
"eval_accuracy": 0.9490322580645161,
"eval_loss": 0.01880320906639099,
"eval_runtime": 5.4592,
"eval_samples_per_second": 567.844,
"eval_steps_per_second": 11.906,
"step": 1590
},
{
"epoch": 5.981132075471698,
"grad_norm": 0.09834893047809601,
"learning_rate": 5.988673679288492e-05,
"loss": 0.018,
"step": 1902
},
{
"epoch": 6.0,
"eval_accuracy": 0.9480645161290323,
"eval_loss": 0.018181176856160164,
"eval_runtime": 5.4267,
"eval_samples_per_second": 571.246,
"eval_steps_per_second": 11.978,
"step": 1908
},
{
"epoch": 6.977987421383648,
"grad_norm": 0.09281986206769943,
"learning_rate": 3.591897397475451e-05,
"loss": 0.0167,
"step": 2219
},
{
"epoch": 7.0,
"eval_accuracy": 0.9474193548387096,
"eval_loss": 0.017256448045372963,
"eval_runtime": 5.5033,
"eval_samples_per_second": 563.302,
"eval_steps_per_second": 11.811,
"step": 2226
},
{
"epoch": 7.9748427672955975,
"grad_norm": 0.0637020543217659,
"learning_rate": 1.681973602830377e-05,
"loss": 0.0158,
"step": 2536
},
{
"epoch": 8.0,
"eval_accuracy": 0.9483870967741935,
"eval_loss": 0.01661858893930912,
"eval_runtime": 5.4493,
"eval_samples_per_second": 568.879,
"eval_steps_per_second": 11.928,
"step": 2544
},
{
"epoch": 8.971698113207546,
"grad_norm": 0.0715932622551918,
"learning_rate": 4.446945753838205e-06,
"loss": 0.0152,
"step": 2853
},
{
"epoch": 9.0,
"eval_accuracy": 0.9487096774193549,
"eval_loss": 0.016348928213119507,
"eval_runtime": 5.423,
"eval_samples_per_second": 571.636,
"eval_steps_per_second": 11.986,
"step": 2862
},
{
"epoch": 9.968553459119496,
"grad_norm": 0.057582587003707886,
"learning_rate": 4.195107161476372e-09,
"loss": 0.0149,
"step": 3170
}
],
"logging_steps": 317,
"max_steps": 3180,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000000000.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 825404033099184.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": {
"alpha": 0.2541194692185785,
"learning_rate": 0.00017193372690989549,
"lr_scheduler_type": "cosine",
"num_train_epochs": 10,
"temperature": 11.572807433105293,
"weight_decay": 0.16868839944734676
}
}