|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 15.0, |
|
"eval_steps": 500, |
|
"global_step": 4770, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9968553459119497, |
|
"grad_norm": 0.42379891872406006, |
|
"learning_rate": 0.00013801466204863748, |
|
"loss": 0.2007, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.8958064516129032, |
|
"eval_loss": 0.04874483495950699, |
|
"eval_runtime": 5.4128, |
|
"eval_samples_per_second": 572.716, |
|
"eval_steps_per_second": 12.009, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9937106918238994, |
|
"grad_norm": 0.2129104882478714, |
|
"learning_rate": 0.0001281896793696754, |
|
"loss": 0.0453, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.9332258064516129, |
|
"eval_loss": 0.031727299094200134, |
|
"eval_runtime": 5.5097, |
|
"eval_samples_per_second": 562.647, |
|
"eval_steps_per_second": 11.797, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.990566037735849, |
|
"grad_norm": 0.2193145602941513, |
|
"learning_rate": 0.00011836469669071336, |
|
"loss": 0.0301, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.9374193548387096, |
|
"eval_loss": 0.023718850687146187, |
|
"eval_runtime": 5.4248, |
|
"eval_samples_per_second": 571.454, |
|
"eval_steps_per_second": 11.982, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.9874213836477987, |
|
"grad_norm": 0.14876721799373627, |
|
"learning_rate": 0.00010853971401175128, |
|
"loss": 0.0243, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.9406451612903226, |
|
"eval_loss": 0.021157393231987953, |
|
"eval_runtime": 5.4343, |
|
"eval_samples_per_second": 570.447, |
|
"eval_steps_per_second": 11.961, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.984276729559748, |
|
"grad_norm": 0.07504376024007797, |
|
"learning_rate": 9.871473133278923e-05, |
|
"loss": 0.0215, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.9429032258064516, |
|
"eval_loss": 0.020281489938497543, |
|
"eval_runtime": 5.5277, |
|
"eval_samples_per_second": 560.811, |
|
"eval_steps_per_second": 11.759, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.981132075471698, |
|
"grad_norm": 0.11960328370332718, |
|
"learning_rate": 8.888974865382715e-05, |
|
"loss": 0.0197, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.9454838709677419, |
|
"eval_loss": 0.019814670085906982, |
|
"eval_runtime": 5.4509, |
|
"eval_samples_per_second": 568.713, |
|
"eval_steps_per_second": 11.925, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 6.977987421383648, |
|
"grad_norm": 0.08081384003162384, |
|
"learning_rate": 7.906476597486509e-05, |
|
"loss": 0.0184, |
|
"step": 2219 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.9435483870967742, |
|
"eval_loss": 0.01884547807276249, |
|
"eval_runtime": 5.4199, |
|
"eval_samples_per_second": 571.969, |
|
"eval_steps_per_second": 11.993, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 7.9748427672955975, |
|
"grad_norm": 0.07167127728462219, |
|
"learning_rate": 6.923978329590303e-05, |
|
"loss": 0.0173, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.9451612903225807, |
|
"eval_loss": 0.018296979367733, |
|
"eval_runtime": 5.4972, |
|
"eval_samples_per_second": 563.924, |
|
"eval_steps_per_second": 11.824, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 8.971698113207546, |
|
"grad_norm": 0.08106838166713715, |
|
"learning_rate": 5.941480061694095e-05, |
|
"loss": 0.0164, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9419354838709677, |
|
"eval_loss": 0.018038731068372726, |
|
"eval_runtime": 5.4283, |
|
"eval_samples_per_second": 571.085, |
|
"eval_steps_per_second": 11.974, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 9.968553459119496, |
|
"grad_norm": 0.05913994088768959, |
|
"learning_rate": 4.9589817937978886e-05, |
|
"loss": 0.0157, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9487096774193549, |
|
"eval_loss": 0.017092861235141754, |
|
"eval_runtime": 5.4238, |
|
"eval_samples_per_second": 571.555, |
|
"eval_steps_per_second": 11.984, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 10.965408805031446, |
|
"grad_norm": 0.0745585560798645, |
|
"learning_rate": 3.976483525901682e-05, |
|
"loss": 0.015, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.9480645161290323, |
|
"eval_loss": 0.01634313352406025, |
|
"eval_runtime": 5.4783, |
|
"eval_samples_per_second": 565.874, |
|
"eval_steps_per_second": 11.865, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 11.962264150943396, |
|
"grad_norm": 0.05740603059530258, |
|
"learning_rate": 2.9939852580054752e-05, |
|
"loss": 0.0144, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9474193548387096, |
|
"eval_loss": 0.016295555979013443, |
|
"eval_runtime": 5.4332, |
|
"eval_samples_per_second": 570.57, |
|
"eval_steps_per_second": 11.964, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 12.959119496855346, |
|
"grad_norm": 0.07131999731063843, |
|
"learning_rate": 2.0114869901092685e-05, |
|
"loss": 0.0139, |
|
"step": 4121 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9470967741935484, |
|
"eval_loss": 0.015728095546364784, |
|
"eval_runtime": 5.4211, |
|
"eval_samples_per_second": 571.838, |
|
"eval_steps_per_second": 11.99, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 13.955974842767295, |
|
"grad_norm": 0.05492794141173363, |
|
"learning_rate": 1.0289887222130618e-05, |
|
"loss": 0.0134, |
|
"step": 4438 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_accuracy": 0.9493548387096774, |
|
"eval_loss": 0.01537332870066166, |
|
"eval_runtime": 5.4466, |
|
"eval_samples_per_second": 569.159, |
|
"eval_steps_per_second": 11.934, |
|
"step": 4452 |
|
}, |
|
{ |
|
"epoch": 14.952830188679245, |
|
"grad_norm": 0.04989900812506676, |
|
"learning_rate": 4.6490454316855204e-07, |
|
"loss": 0.0131, |
|
"step": 4755 |
|
} |
|
], |
|
"logging_steps": 317, |
|
"max_steps": 4770, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 15, |
|
"save_steps": 1000000000.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1236646073993904.0, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.37288483884424406, |
|
"learning_rate": 0.00014783964472759955, |
|
"lr_scheduler_type": "linear", |
|
"num_train_epochs": 15, |
|
"temperature": 9.186082756077344, |
|
"weight_decay": 0.11993053192781417 |
|
} |
|
} |
|
|