|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"global_step": 13600, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.816176470588236e-05, |
|
"loss": 2.6313, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.632352941176471e-05, |
|
"loss": 2.2069, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 4.448529411764706e-05, |
|
"loss": 2.035, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 4.2647058823529415e-05, |
|
"loss": 1.9491, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 4.08125e-05, |
|
"loss": 1.8742, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.897426470588236e-05, |
|
"loss": 1.8387, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 3.713602941176471e-05, |
|
"loss": 1.6941, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 3.529779411764706e-05, |
|
"loss": 1.5224, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 3.3459558823529415e-05, |
|
"loss": 1.4897, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 3.1621323529411765e-05, |
|
"loss": 1.4445, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 2.978308823529412e-05, |
|
"loss": 1.4593, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 2.7944852941176468e-05, |
|
"loss": 1.4251, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 2.6113970588235297e-05, |
|
"loss": 1.39, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 2.427573529411765e-05, |
|
"loss": 1.2959, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.24375e-05, |
|
"loss": 1.1621, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 2.0599264705882353e-05, |
|
"loss": 1.1374, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.876102941176471e-05, |
|
"loss": 1.1649, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.6926470588235294e-05, |
|
"loss": 1.1513, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 1.5088235294117647e-05, |
|
"loss": 1.1463, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.3250000000000002e-05, |
|
"loss": 1.1466, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 1.1411764705882353e-05, |
|
"loss": 1.0411, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 9.573529411764706e-06, |
|
"loss": 0.9581, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 7.735294117647058e-06, |
|
"loss": 0.9514, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 5.897058823529412e-06, |
|
"loss": 0.9429, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 4.058823529411765e-06, |
|
"loss": 0.9676, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 2.2205882352941175e-06, |
|
"loss": 0.9324, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 3.8235294117647064e-07, |
|
"loss": 0.9555, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 13600, |
|
"total_flos": 3.918346910230118e+16, |
|
"train_loss": 1.4005291703168083, |
|
"train_runtime": 11994.4751, |
|
"train_samples_per_second": 18.139, |
|
"train_steps_per_second": 1.134 |
|
} |
|
], |
|
"max_steps": 13600, |
|
"num_train_epochs": 4, |
|
"total_flos": 3.918346910230118e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|