|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 14.0, |
|
"eval_steps": 500, |
|
"global_step": 4452, |
|
"is_hyper_param_search": true, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9968553459119497, |
|
"grad_norm": 0.47506219148635864, |
|
"learning_rate": 9.46923688245487e-06, |
|
"loss": 0.4817, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.33064516129032256, |
|
"eval_loss": 0.3162553906440735, |
|
"eval_runtime": 2.713, |
|
"eval_samples_per_second": 1142.656, |
|
"eval_steps_per_second": 23.959, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 1.9937106918238994, |
|
"grad_norm": 0.5554618239402771, |
|
"learning_rate": 8.743300221816854e-06, |
|
"loss": 0.2672, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.6270967741935484, |
|
"eval_loss": 0.18112796545028687, |
|
"eval_runtime": 2.624, |
|
"eval_samples_per_second": 1181.409, |
|
"eval_steps_per_second": 24.771, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 2.990566037735849, |
|
"grad_norm": 0.6009519100189209, |
|
"learning_rate": 8.017363561178839e-06, |
|
"loss": 0.1758, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_accuracy": 0.7761290322580645, |
|
"eval_loss": 0.12200149148702621, |
|
"eval_runtime": 2.6358, |
|
"eval_samples_per_second": 1176.11, |
|
"eval_steps_per_second": 24.66, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 3.9874213836477987, |
|
"grad_norm": 0.5635930299758911, |
|
"learning_rate": 7.291426900540823e-06, |
|
"loss": 0.1322, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.8258064516129032, |
|
"eval_loss": 0.09276342391967773, |
|
"eval_runtime": 2.6813, |
|
"eval_samples_per_second": 1156.15, |
|
"eval_steps_per_second": 24.242, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 4.984276729559748, |
|
"grad_norm": 0.3592129349708557, |
|
"learning_rate": 6.5654902399028074e-06, |
|
"loss": 0.1073, |
|
"step": 1585 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.8583870967741936, |
|
"eval_loss": 0.07522393763065338, |
|
"eval_runtime": 2.6613, |
|
"eval_samples_per_second": 1164.858, |
|
"eval_steps_per_second": 24.424, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 5.981132075471698, |
|
"grad_norm": 0.5057026147842407, |
|
"learning_rate": 5.839553579264793e-06, |
|
"loss": 0.0919, |
|
"step": 1902 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_accuracy": 0.8806451612903226, |
|
"eval_loss": 0.06308811157941818, |
|
"eval_runtime": 2.6307, |
|
"eval_samples_per_second": 1178.407, |
|
"eval_steps_per_second": 24.709, |
|
"step": 1908 |
|
}, |
|
{ |
|
"epoch": 6.977987421383648, |
|
"grad_norm": 0.35764896869659424, |
|
"learning_rate": 5.1136169186267775e-06, |
|
"loss": 0.0813, |
|
"step": 2219 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_accuracy": 0.8877419354838709, |
|
"eval_loss": 0.05563005059957504, |
|
"eval_runtime": 2.6575, |
|
"eval_samples_per_second": 1166.531, |
|
"eval_steps_per_second": 24.46, |
|
"step": 2226 |
|
}, |
|
{ |
|
"epoch": 7.9748427672955975, |
|
"grad_norm": 0.3608967363834381, |
|
"learning_rate": 4.387680257988761e-06, |
|
"loss": 0.0733, |
|
"step": 2536 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.8977419354838709, |
|
"eval_loss": 0.05038844048976898, |
|
"eval_runtime": 2.6676, |
|
"eval_samples_per_second": 1162.09, |
|
"eval_steps_per_second": 24.366, |
|
"step": 2544 |
|
}, |
|
{ |
|
"epoch": 8.971698113207546, |
|
"grad_norm": 0.43566691875457764, |
|
"learning_rate": 3.661743597350746e-06, |
|
"loss": 0.0679, |
|
"step": 2853 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_accuracy": 0.9019354838709678, |
|
"eval_loss": 0.045959629118442535, |
|
"eval_runtime": 2.69, |
|
"eval_samples_per_second": 1152.43, |
|
"eval_steps_per_second": 24.164, |
|
"step": 2862 |
|
}, |
|
{ |
|
"epoch": 9.968553459119496, |
|
"grad_norm": 0.3841456472873688, |
|
"learning_rate": 2.935806936712731e-06, |
|
"loss": 0.0636, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.9038709677419355, |
|
"eval_loss": 0.04300825670361519, |
|
"eval_runtime": 2.6725, |
|
"eval_samples_per_second": 1159.945, |
|
"eval_steps_per_second": 24.321, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 10.965408805031446, |
|
"grad_norm": 0.3714827597141266, |
|
"learning_rate": 2.2098702760747155e-06, |
|
"loss": 0.0605, |
|
"step": 3487 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_accuracy": 0.905483870967742, |
|
"eval_loss": 0.04122327268123627, |
|
"eval_runtime": 2.6669, |
|
"eval_samples_per_second": 1162.413, |
|
"eval_steps_per_second": 24.373, |
|
"step": 3498 |
|
}, |
|
{ |
|
"epoch": 11.962264150943396, |
|
"grad_norm": 0.28599628806114197, |
|
"learning_rate": 1.4839336154367003e-06, |
|
"loss": 0.0584, |
|
"step": 3804 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.9080645161290323, |
|
"eval_loss": 0.039800167083740234, |
|
"eval_runtime": 2.6582, |
|
"eval_samples_per_second": 1166.221, |
|
"eval_steps_per_second": 24.453, |
|
"step": 3816 |
|
}, |
|
{ |
|
"epoch": 12.959119496855346, |
|
"grad_norm": 0.3374980390071869, |
|
"learning_rate": 7.579969547986848e-07, |
|
"loss": 0.0568, |
|
"step": 4121 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_accuracy": 0.9103225806451613, |
|
"eval_loss": 0.03894847258925438, |
|
"eval_runtime": 2.6631, |
|
"eval_samples_per_second": 1164.046, |
|
"eval_steps_per_second": 24.407, |
|
"step": 4134 |
|
}, |
|
{ |
|
"epoch": 13.955974842767295, |
|
"grad_norm": 0.2531619071960449, |
|
"learning_rate": 3.206029416066945e-08, |
|
"loss": 0.0562, |
|
"step": 4438 |
|
} |
|
], |
|
"logging_steps": 317, |
|
"max_steps": 4452, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 14, |
|
"save_steps": 1000000000.0, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1234918384253064.0, |
|
"train_batch_size": 48, |
|
"trial_name": null, |
|
"trial_params": { |
|
"alpha": 0.6907680425307066, |
|
"fp16": true, |
|
"learning_rate": 1.015395316488631e-05, |
|
"lr_scheduler": "cosine", |
|
"num_train_epochs": 14.721187464037747, |
|
"temperature": 13, |
|
"warmup_steps": 18, |
|
"weight_decay": 0.2870223156122959 |
|
} |
|
} |
|
|