|
{ |
|
"best_metric": 1.6590510606765747, |
|
"best_model_checkpoint": "outputs/checkpoint-408", |
|
"epoch": 11.698924731182796, |
|
"eval_steps": 500, |
|
"global_step": 408, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.5734767025089605, |
|
"grad_norm": 1.2473632097244263, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"loss": 2.9249, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.974910394265233, |
|
"eval_loss": 2.817430019378662, |
|
"eval_runtime": 11.5884, |
|
"eval_samples_per_second": 32.101, |
|
"eval_steps_per_second": 4.056, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.146953405017921, |
|
"grad_norm": 1.330991268157959, |
|
"learning_rate": 4.000000000000001e-06, |
|
"loss": 2.8932, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.7204301075268817, |
|
"grad_norm": 1.5549895763397217, |
|
"learning_rate": 6e-06, |
|
"loss": 2.8287, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.978494623655914, |
|
"eval_loss": 2.5992982387542725, |
|
"eval_runtime": 11.5924, |
|
"eval_samples_per_second": 32.09, |
|
"eval_steps_per_second": 4.054, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.293906810035842, |
|
"grad_norm": 1.9673638343811035, |
|
"learning_rate": 8.000000000000001e-06, |
|
"loss": 2.6659, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.867383512544803, |
|
"grad_norm": 1.759926199913025, |
|
"learning_rate": 1e-05, |
|
"loss": 2.3145, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.982078853046595, |
|
"eval_loss": 2.0121684074401855, |
|
"eval_runtime": 11.5907, |
|
"eval_samples_per_second": 32.095, |
|
"eval_steps_per_second": 4.055, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.4408602150537635, |
|
"grad_norm": 0.7806220650672913, |
|
"learning_rate": 9.896320793787106e-06, |
|
"loss": 2.0158, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.985663082437276, |
|
"eval_loss": 1.8575435876846313, |
|
"eval_runtime": 11.5927, |
|
"eval_samples_per_second": 32.089, |
|
"eval_steps_per_second": 4.054, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 4.014336917562724, |
|
"grad_norm": 0.6744422316551208, |
|
"learning_rate": 9.589582926268798e-06, |
|
"loss": 1.9297, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.587813620071684, |
|
"grad_norm": 0.636353075504303, |
|
"learning_rate": 9.092507332892968e-06, |
|
"loss": 1.8752, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.989247311827957, |
|
"eval_loss": 1.7877445220947266, |
|
"eval_runtime": 11.5902, |
|
"eval_samples_per_second": 32.096, |
|
"eval_steps_per_second": 4.055, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 5.161290322580645, |
|
"grad_norm": 0.6430570483207703, |
|
"learning_rate": 8.425708574839221e-06, |
|
"loss": 1.813, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.734767025089606, |
|
"grad_norm": 0.6457915306091309, |
|
"learning_rate": 7.616839918483061e-06, |
|
"loss": 1.7878, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.992831541218638, |
|
"eval_loss": 1.7403738498687744, |
|
"eval_runtime": 11.5876, |
|
"eval_samples_per_second": 32.103, |
|
"eval_steps_per_second": 4.056, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 6.308243727598566, |
|
"grad_norm": 0.6910040974617004, |
|
"learning_rate": 6.699446507913083e-06, |
|
"loss": 1.7346, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.881720430107527, |
|
"grad_norm": 0.673575758934021, |
|
"learning_rate": 5.711574191366427e-06, |
|
"loss": 1.7293, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 6.996415770609319, |
|
"eval_loss": 1.705881118774414, |
|
"eval_runtime": 11.588, |
|
"eval_samples_per_second": 32.102, |
|
"eval_steps_per_second": 4.056, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 7.455197132616488, |
|
"grad_norm": 0.6435667276382446, |
|
"learning_rate": 4.694191695890788e-06, |
|
"loss": 1.7172, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.6822781562805176, |
|
"eval_runtime": 11.5832, |
|
"eval_samples_per_second": 32.115, |
|
"eval_steps_per_second": 4.058, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 8.028673835125447, |
|
"grad_norm": 0.5970215201377869, |
|
"learning_rate": 3.689491585304491e-06, |
|
"loss": 1.6713, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 8.602150537634408, |
|
"grad_norm": 0.734722375869751, |
|
"learning_rate": 2.7391404635865725e-06, |
|
"loss": 1.6777, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 8.974910394265233, |
|
"eval_loss": 1.6685205698013306, |
|
"eval_runtime": 11.5892, |
|
"eval_samples_per_second": 32.099, |
|
"eval_steps_per_second": 4.056, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 9.175627240143369, |
|
"grad_norm": 0.6750782132148743, |
|
"learning_rate": 1.8825509907063328e-06, |
|
"loss": 1.6782, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 9.74910394265233, |
|
"grad_norm": 0.8320801854133606, |
|
"learning_rate": 1.1552473733031893e-06, |
|
"loss": 1.6629, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 9.978494623655914, |
|
"eval_loss": 1.6614694595336914, |
|
"eval_runtime": 11.5856, |
|
"eval_samples_per_second": 32.109, |
|
"eval_steps_per_second": 4.057, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 10.32258064516129, |
|
"grad_norm": 0.6981084942817688, |
|
"learning_rate": 5.873921160683943e-07, |
|
"loss": 1.6662, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 10.89605734767025, |
|
"grad_norm": 0.7434980273246765, |
|
"learning_rate": 2.0253513192751374e-07, |
|
"loss": 1.6533, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 10.982078853046595, |
|
"eval_loss": 1.6592113971710205, |
|
"eval_runtime": 11.5971, |
|
"eval_samples_per_second": 32.077, |
|
"eval_steps_per_second": 4.053, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 11.469534050179211, |
|
"grad_norm": 0.723169207572937, |
|
"learning_rate": 1.6637087529033925e-08, |
|
"loss": 1.6614, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 11.698924731182796, |
|
"eval_loss": 1.6590510606765747, |
|
"eval_runtime": 11.6001, |
|
"eval_samples_per_second": 32.069, |
|
"eval_steps_per_second": 4.052, |
|
"step": 408 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 408, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"total_flos": 2.147562637683917e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|