|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"global_step": 30, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3e-05, |
|
"loss": 2.17, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_accuracy": 0.06213930632535284, |
|
"eval_loss": 2.0546875, |
|
"eval_runtime": 3.2113, |
|
"eval_samples_per_second": 26.781, |
|
"eval_steps_per_second": 0.934, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3e-05, |
|
"loss": 2.1814, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_accuracy": 0.06213930632535284, |
|
"eval_loss": 2.0546875, |
|
"eval_runtime": 3.0319, |
|
"eval_samples_per_second": 28.365, |
|
"eval_steps_per_second": 0.989, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.99178284305241e-05, |
|
"loss": 2.0963, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_accuracy": 0.06251071367350437, |
|
"eval_loss": 2.0234375, |
|
"eval_runtime": 3.4167, |
|
"eval_samples_per_second": 25.17, |
|
"eval_steps_per_second": 0.878, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.9672214011007087e-05, |
|
"loss": 2.1383, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_accuracy": 0.062453574081481056, |
|
"eval_loss": 2.01953125, |
|
"eval_runtime": 3.4275, |
|
"eval_samples_per_second": 25.091, |
|
"eval_steps_per_second": 0.875, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.9265847744427305e-05, |
|
"loss": 2.1625, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_accuracy": 0.062453574081481056, |
|
"eval_loss": 2.01953125, |
|
"eval_runtime": 3.4165, |
|
"eval_samples_per_second": 25.172, |
|
"eval_steps_per_second": 0.878, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 2.8703181864639013e-05, |
|
"loss": 2.1808, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_accuracy": 0.06241071938746357, |
|
"eval_loss": 2.015625, |
|
"eval_runtime": 3.0274, |
|
"eval_samples_per_second": 28.408, |
|
"eval_steps_per_second": 0.991, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 2.7990381056766583e-05, |
|
"loss": 2.1587, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"eval_accuracy": 0.06255356836752185, |
|
"eval_loss": 2.017578125, |
|
"eval_runtime": 3.4415, |
|
"eval_samples_per_second": 24.989, |
|
"eval_steps_per_second": 0.872, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 2.0847, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_accuracy": 0.06273927204159763, |
|
"eval_loss": 2.013671875, |
|
"eval_runtime": 3.4339, |
|
"eval_samples_per_second": 25.045, |
|
"eval_steps_per_second": 0.874, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.7135254915624213e-05, |
|
"loss": 2.0336, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"eval_accuracy": 0.06273927204159763, |
|
"eval_loss": 2.013671875, |
|
"eval_runtime": 3.4267, |
|
"eval_samples_per_second": 25.097, |
|
"eval_steps_per_second": 0.875, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 2.6147172382160913e-05, |
|
"loss": 2.1777, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_accuracy": 0.06286783612365007, |
|
"eval_loss": 2.005859375, |
|
"eval_runtime": 3.4314, |
|
"eval_samples_per_second": 25.062, |
|
"eval_steps_per_second": 0.874, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 2.5036959095382875e-05, |
|
"loss": 2.2034, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_accuracy": 0.06299640020570253, |
|
"eval_loss": 2.0, |
|
"eval_runtime": 3.0112, |
|
"eval_samples_per_second": 28.56, |
|
"eval_steps_per_second": 0.996, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 2.3816778784387097e-05, |
|
"loss": 2.1665, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_accuracy": 0.06283926632763842, |
|
"eval_loss": 1.994140625, |
|
"eval_runtime": 2.8048, |
|
"eval_samples_per_second": 30.661, |
|
"eval_steps_per_second": 1.07, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.0352, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_accuracy": 0.0628821210216559, |
|
"eval_loss": 1.98828125, |
|
"eval_runtime": 2.7144, |
|
"eval_samples_per_second": 31.682, |
|
"eval_steps_per_second": 1.105, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 2.1101049646137008e-05, |
|
"loss": 2.1263, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"eval_accuracy": 0.06281069653162677, |
|
"eval_loss": 1.9833984375, |
|
"eval_runtime": 3.4127, |
|
"eval_samples_per_second": 25.2, |
|
"eval_steps_per_second": 0.879, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.963525491562421e-05, |
|
"loss": 2.1282, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_accuracy": 0.06315353408376664, |
|
"eval_loss": 1.978515625, |
|
"eval_runtime": 3.4158, |
|
"eval_samples_per_second": 25.177, |
|
"eval_steps_per_second": 0.878, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 1.8118675362266388e-05, |
|
"loss": 1.7159, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"eval_accuracy": 0.0633392377578424, |
|
"eval_loss": 1.9765625, |
|
"eval_runtime": 3.3102, |
|
"eval_samples_per_second": 25.98, |
|
"eval_steps_per_second": 0.906, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 1.6567926949014805e-05, |
|
"loss": 1.8346, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_accuracy": 0.06351065653391234, |
|
"eval_loss": 1.9775390625, |
|
"eval_runtime": 2.7366, |
|
"eval_samples_per_second": 31.426, |
|
"eval_steps_per_second": 1.096, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 1.5e-05, |
|
"loss": 1.7183, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_accuracy": 0.06342494714587738, |
|
"eval_loss": 1.982421875, |
|
"eval_runtime": 3.4207, |
|
"eval_samples_per_second": 25.141, |
|
"eval_steps_per_second": 0.877, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 1.3432073050985201e-05, |
|
"loss": 1.6086, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_accuracy": 0.06346780183989487, |
|
"eval_loss": 1.98828125, |
|
"eval_runtime": 3.4214, |
|
"eval_samples_per_second": 25.136, |
|
"eval_steps_per_second": 0.877, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.1881324637733613e-05, |
|
"loss": 1.6497, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"eval_accuracy": 0.0634392320438832, |
|
"eval_loss": 1.9892578125, |
|
"eval_runtime": 3.3147, |
|
"eval_samples_per_second": 25.945, |
|
"eval_steps_per_second": 0.905, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.036474508437579e-05, |
|
"loss": 1.6267, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"eval_accuracy": 0.06368207530998228, |
|
"eval_loss": 1.9853515625, |
|
"eval_runtime": 2.7501, |
|
"eval_samples_per_second": 31.272, |
|
"eval_steps_per_second": 1.091, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 8.898950353863e-06, |
|
"loss": 1.5962, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"eval_accuracy": 0.06368207530998228, |
|
"eval_loss": 1.9765625, |
|
"eval_runtime": 3.1007, |
|
"eval_samples_per_second": 27.736, |
|
"eval_steps_per_second": 0.968, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 7.500000000000004e-06, |
|
"loss": 1.5168, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_accuracy": 0.06372493000399977, |
|
"eval_loss": 1.9697265625, |
|
"eval_runtime": 3.0137, |
|
"eval_samples_per_second": 28.536, |
|
"eval_steps_per_second": 0.995, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 6.1832212156129045e-06, |
|
"loss": 1.6213, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_accuracy": 0.06372493000399977, |
|
"eval_loss": 1.9619140625, |
|
"eval_runtime": 3.4172, |
|
"eval_samples_per_second": 25.167, |
|
"eval_steps_per_second": 0.878, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 4.963040904617131e-06, |
|
"loss": 1.4789, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"eval_accuracy": 0.06382492429004057, |
|
"eval_loss": 1.9580078125, |
|
"eval_runtime": 2.7014, |
|
"eval_samples_per_second": 31.836, |
|
"eval_steps_per_second": 1.111, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 3.852827617839085e-06, |
|
"loss": 1.6796, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"eval_accuracy": 0.06378206959602309, |
|
"eval_loss": 1.955078125, |
|
"eval_runtime": 3.0318, |
|
"eval_samples_per_second": 28.366, |
|
"eval_steps_per_second": 0.99, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 2.86474508437579e-06, |
|
"loss": 1.5964, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"eval_accuracy": 0.06383920918804639, |
|
"eval_loss": 1.953125, |
|
"eval_runtime": 3.4212, |
|
"eval_samples_per_second": 25.137, |
|
"eval_steps_per_second": 0.877, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 2.0096189432334194e-06, |
|
"loss": 1.787, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"eval_accuracy": 0.0639392034740872, |
|
"eval_loss": 1.951171875, |
|
"eval_runtime": 3.4776, |
|
"eval_samples_per_second": 24.729, |
|
"eval_steps_per_second": 0.863, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.2968181353609854e-06, |
|
"loss": 1.6536, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"eval_accuracy": 0.0639963430661105, |
|
"eval_loss": 1.94921875, |
|
"eval_runtime": 3.3251, |
|
"eval_samples_per_second": 25.864, |
|
"eval_steps_per_second": 0.902, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 7.341522555726971e-07, |
|
"loss": 1.7178, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_accuracy": 0.06395348837209303, |
|
"eval_loss": 1.9482421875, |
|
"eval_runtime": 3.013, |
|
"eval_samples_per_second": 28.543, |
|
"eval_steps_per_second": 0.996, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 30, |
|
"total_flos": 3114896719872.0, |
|
"train_loss": 1.8948323567708334, |
|
"train_runtime": 176.8477, |
|
"train_samples_per_second": 5.191, |
|
"train_steps_per_second": 0.17 |
|
} |
|
], |
|
"max_steps": 30, |
|
"num_train_epochs": 2, |
|
"total_flos": 3114896719872.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|