|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.017770597738287562, |
|
"eval_steps": 1, |
|
"global_step": 11, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0016155088852988692, |
|
"grad_norm": 10.710270881652832, |
|
"learning_rate": 2.6666666666666664e-06, |
|
"loss": 1.8319, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0016155088852988692, |
|
"eval_loss": 1.8248138427734375, |
|
"eval_runtime": 220.146, |
|
"eval_samples_per_second": 0.454, |
|
"eval_steps_per_second": 0.059, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0032310177705977385, |
|
"grad_norm": 9.534977912902832, |
|
"learning_rate": 5.333333333333333e-06, |
|
"loss": 1.8296, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0032310177705977385, |
|
"eval_loss": 1.816626787185669, |
|
"eval_runtime": 220.6901, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.004846526655896607, |
|
"grad_norm": 9.70173168182373, |
|
"learning_rate": 8e-06, |
|
"loss": 1.7468, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.004846526655896607, |
|
"eval_loss": 1.8004165887832642, |
|
"eval_runtime": 220.5949, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.006462035541195477, |
|
"grad_norm": 9.628801345825195, |
|
"learning_rate": 7.529411764705882e-06, |
|
"loss": 1.4906, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.006462035541195477, |
|
"eval_loss": 1.7767361402511597, |
|
"eval_runtime": 220.4948, |
|
"eval_samples_per_second": 0.454, |
|
"eval_steps_per_second": 0.059, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.008077544426494346, |
|
"grad_norm": 9.02491283416748, |
|
"learning_rate": 7.058823529411764e-06, |
|
"loss": 1.7261, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.008077544426494346, |
|
"eval_loss": 1.7553186416625977, |
|
"eval_runtime": 220.7639, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.009693053311793215, |
|
"grad_norm": 8.989361763000488, |
|
"learning_rate": 6.588235294117646e-06, |
|
"loss": 1.8311, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.009693053311793215, |
|
"eval_loss": 1.7363536357879639, |
|
"eval_runtime": 220.3852, |
|
"eval_samples_per_second": 0.454, |
|
"eval_steps_per_second": 0.059, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.011308562197092083, |
|
"grad_norm": 8.91972541809082, |
|
"learning_rate": 6.1176470588235285e-06, |
|
"loss": 1.8441, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.011308562197092083, |
|
"eval_loss": 1.720198631286621, |
|
"eval_runtime": 220.8055, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.012924071082390954, |
|
"grad_norm": 5.602985858917236, |
|
"learning_rate": 5.647058823529412e-06, |
|
"loss": 1.6051, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.012924071082390954, |
|
"eval_loss": 1.7061126232147217, |
|
"eval_runtime": 220.3213, |
|
"eval_samples_per_second": 0.454, |
|
"eval_steps_per_second": 0.059, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.014539579967689823, |
|
"grad_norm": 4.554469108581543, |
|
"learning_rate": 5.176470588235294e-06, |
|
"loss": 1.6065, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.014539579967689823, |
|
"eval_loss": 1.69382643699646, |
|
"eval_runtime": 220.1928, |
|
"eval_samples_per_second": 0.454, |
|
"eval_steps_per_second": 0.059, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01615508885298869, |
|
"grad_norm": 4.634820461273193, |
|
"learning_rate": 4.705882352941176e-06, |
|
"loss": 1.6142, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01615508885298869, |
|
"eval_loss": 1.6832895278930664, |
|
"eval_runtime": 220.9396, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.017770597738287562, |
|
"grad_norm": 3.6815476417541504, |
|
"learning_rate": 4.235294117647058e-06, |
|
"loss": 1.3256, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.017770597738287562, |
|
"eval_loss": 1.6742274761199951, |
|
"eval_runtime": 220.8408, |
|
"eval_samples_per_second": 0.453, |
|
"eval_steps_per_second": 0.059, |
|
"step": 11 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1, |
|
"total_flos": 4063376418275328.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|