thomnis's picture
Training in progress, step 4770
29cfbc7 verified
raw
history blame
7.01 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.0,
"eval_steps": 500,
"global_step": 4770,
"is_hyper_param_search": true,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.9968553459119497,
"grad_norm": 0.36009669303894043,
"learning_rate": 0.00018598534310647178,
"loss": 0.1867,
"step": 317
},
{
"epoch": 1.0,
"eval_accuracy": 0.8990322580645161,
"eval_loss": 0.04464339092373848,
"eval_runtime": 5.5111,
"eval_samples_per_second": 562.5,
"eval_steps_per_second": 11.794,
"step": 318
},
{
"epoch": 1.9937106918238994,
"grad_norm": 0.34826424717903137,
"learning_rate": 0.00017994931344688276,
"loss": 0.0427,
"step": 634
},
{
"epoch": 2.0,
"eval_accuracy": 0.927741935483871,
"eval_loss": 0.03450721129775047,
"eval_runtime": 5.4344,
"eval_samples_per_second": 570.443,
"eval_steps_per_second": 11.961,
"step": 636
},
{
"epoch": 2.990566037735849,
"grad_norm": 0.19493499398231506,
"learning_rate": 0.0001701809729547082,
"loss": 0.0298,
"step": 951
},
{
"epoch": 3.0,
"eval_accuracy": 0.94,
"eval_loss": 0.02594602108001709,
"eval_runtime": 5.4515,
"eval_samples_per_second": 568.654,
"eval_steps_per_second": 11.923,
"step": 954
},
{
"epoch": 3.9874213836477987,
"grad_norm": 0.12020555883646011,
"learning_rate": 0.00015710457389761306,
"loss": 0.0235,
"step": 1268
},
{
"epoch": 4.0,
"eval_accuracy": 0.9393548387096774,
"eval_loss": 0.023855317384004593,
"eval_runtime": 5.4198,
"eval_samples_per_second": 571.973,
"eval_steps_per_second": 11.993,
"step": 1272
},
{
"epoch": 4.984276729559748,
"grad_norm": 0.23387576639652252,
"learning_rate": 0.00014128804201050683,
"loss": 0.0209,
"step": 1585
},
{
"epoch": 5.0,
"eval_accuracy": 0.9441935483870968,
"eval_loss": 0.02296462096273899,
"eval_runtime": 5.3896,
"eval_samples_per_second": 575.184,
"eval_steps_per_second": 12.06,
"step": 1590
},
{
"epoch": 5.981132075471698,
"grad_norm": 0.0913698598742485,
"learning_rate": 0.00012341831071110512,
"loss": 0.0193,
"step": 1902
},
{
"epoch": 6.0,
"eval_accuracy": 0.9429032258064516,
"eval_loss": 0.0211862213909626,
"eval_runtime": 5.4057,
"eval_samples_per_second": 573.472,
"eval_steps_per_second": 12.024,
"step": 1908
},
{
"epoch": 6.977987421383648,
"grad_norm": 0.07358141243457794,
"learning_rate": 0.0001042714866505231,
"loss": 0.0176,
"step": 2219
},
{
"epoch": 7.0,
"eval_accuracy": 0.9435483870967742,
"eval_loss": 0.02031989023089409,
"eval_runtime": 5.4481,
"eval_samples_per_second": 569.006,
"eval_steps_per_second": 11.931,
"step": 2226
},
{
"epoch": 7.9748427672955975,
"grad_norm": 0.06623157858848572,
"learning_rate": 8.46791423494761e-05,
"loss": 0.0164,
"step": 2536
},
{
"epoch": 8.0,
"eval_accuracy": 0.9438709677419355,
"eval_loss": 0.01943557895720005,
"eval_runtime": 5.431,
"eval_samples_per_second": 570.797,
"eval_steps_per_second": 11.968,
"step": 2544
},
{
"epoch": 8.971698113207546,
"grad_norm": 0.07008218765258789,
"learning_rate": 6.549219987658473e-05,
"loss": 0.0152,
"step": 2853
},
{
"epoch": 9.0,
"eval_accuracy": 0.9470967741935484,
"eval_loss": 0.018487755209207535,
"eval_runtime": 5.4068,
"eval_samples_per_second": 573.354,
"eval_steps_per_second": 12.022,
"step": 2862
},
{
"epoch": 9.968553459119496,
"grad_norm": 0.053510431200265884,
"learning_rate": 4.754397414958767e-05,
"loss": 0.0143,
"step": 3170
},
{
"epoch": 10.0,
"eval_accuracy": 0.9496774193548387,
"eval_loss": 0.017804542556405067,
"eval_runtime": 5.4089,
"eval_samples_per_second": 573.127,
"eval_steps_per_second": 12.017,
"step": 3180
},
{
"epoch": 10.965408805031446,
"grad_norm": 0.06439082324504852,
"learning_rate": 3.161398093898263e-05,
"loss": 0.0136,
"step": 3487
},
{
"epoch": 11.0,
"eval_accuracy": 0.95,
"eval_loss": 0.017272653058171272,
"eval_runtime": 5.4681,
"eval_samples_per_second": 566.924,
"eval_steps_per_second": 11.887,
"step": 3498
},
{
"epoch": 11.962264150943396,
"grad_norm": 0.05270359292626381,
"learning_rate": 1.8394081441551166e-05,
"loss": 0.0131,
"step": 3804
},
{
"epoch": 12.0,
"eval_accuracy": 0.9480645161290323,
"eval_loss": 0.01707734353840351,
"eval_runtime": 5.4779,
"eval_samples_per_second": 565.907,
"eval_steps_per_second": 11.866,
"step": 3816
},
{
"epoch": 12.959119496855346,
"grad_norm": 0.05668620765209198,
"learning_rate": 8.458433810828254e-06,
"loss": 0.0128,
"step": 4121
},
{
"epoch": 13.0,
"eval_accuracy": 0.947741935483871,
"eval_loss": 0.016805246472358704,
"eval_runtime": 5.433,
"eval_samples_per_second": 570.591,
"eval_steps_per_second": 11.964,
"step": 4134
},
{
"epoch": 13.955974842767295,
"grad_norm": 0.05400459095835686,
"learning_rate": 2.2385566902754444e-06,
"loss": 0.0125,
"step": 4438
},
{
"epoch": 14.0,
"eval_accuracy": 0.9490322580645161,
"eval_loss": 0.016659492626786232,
"eval_runtime": 5.4145,
"eval_samples_per_second": 572.537,
"eval_steps_per_second": 12.005,
"step": 4452
},
{
"epoch": 14.952830188679245,
"grad_norm": 0.04551490768790245,
"learning_rate": 4.587773711453001e-09,
"loss": 0.0124,
"step": 4755
}
],
"logging_steps": 317,
"max_steps": 4770,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 1000000000.0,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1240673718275628.0,
"train_batch_size": 48,
"trial_name": null,
"trial_params": {
"alpha": 0.8625117920088357,
"learning_rate": 0.0001880269089840747,
"lr_scheduler_type": "cosine",
"num_train_epochs": 15,
"temperature": 9.794825072054605,
"weight_decay": 0.2689344090657866
}
}