|
{ |
|
"best_metric": 1.2222630977630615, |
|
"best_model_checkpoint": "./outputs/checkpoint-4000", |
|
"epoch": 2.914754098360656, |
|
"eval_steps": 100, |
|
"global_step": 4000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1822, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.0121946334838867, |
|
"eval_runtime": 144.2461, |
|
"eval_samples_per_second": 43.495, |
|
"eval_steps_per_second": 5.442, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.9624, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9195032119750977, |
|
"eval_runtime": 144.1813, |
|
"eval_samples_per_second": 43.515, |
|
"eval_steps_per_second": 5.445, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.8597155809402466, |
|
"eval_runtime": 144.1877, |
|
"eval_samples_per_second": 43.513, |
|
"eval_steps_per_second": 5.444, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.8130855560302734, |
|
"eval_runtime": 144.1652, |
|
"eval_samples_per_second": 43.52, |
|
"eval_steps_per_second": 5.445, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7855, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.772993564605713, |
|
"eval_runtime": 144.0971, |
|
"eval_samples_per_second": 43.54, |
|
"eval_steps_per_second": 5.448, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7573, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.7435261011123657, |
|
"eval_runtime": 144.1427, |
|
"eval_samples_per_second": 43.526, |
|
"eval_steps_per_second": 5.446, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7188, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.7126474380493164, |
|
"eval_runtime": 144.1825, |
|
"eval_samples_per_second": 43.514, |
|
"eval_steps_per_second": 5.444, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6978, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6834746599197388, |
|
"eval_runtime": 144.2172, |
|
"eval_samples_per_second": 43.504, |
|
"eval_steps_per_second": 5.443, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6627, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6580406427383423, |
|
"eval_runtime": 144.1973, |
|
"eval_samples_per_second": 43.51, |
|
"eval_steps_per_second": 5.444, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6478, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.6355735063552856, |
|
"eval_runtime": 144.1901, |
|
"eval_samples_per_second": 43.512, |
|
"eval_steps_per_second": 5.444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6278, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.6143633127212524, |
|
"eval_runtime": 144.1411, |
|
"eval_samples_per_second": 43.527, |
|
"eval_steps_per_second": 5.446, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5927, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.5936414003372192, |
|
"eval_runtime": 144.3064, |
|
"eval_samples_per_second": 43.477, |
|
"eval_steps_per_second": 5.44, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6005, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.573447346687317, |
|
"eval_runtime": 144.216, |
|
"eval_samples_per_second": 43.504, |
|
"eval_steps_per_second": 5.443, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5531, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.5534614324569702, |
|
"eval_runtime": 144.2247, |
|
"eval_samples_per_second": 43.502, |
|
"eval_steps_per_second": 5.443, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.525, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.5355420112609863, |
|
"eval_runtime": 144.287, |
|
"eval_samples_per_second": 43.483, |
|
"eval_steps_per_second": 5.441, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5066, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.5180158615112305, |
|
"eval_runtime": 144.1973, |
|
"eval_samples_per_second": 43.51, |
|
"eval_steps_per_second": 5.444, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5049, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.5018609762191772, |
|
"eval_runtime": 144.11, |
|
"eval_samples_per_second": 43.536, |
|
"eval_steps_per_second": 5.447, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4781, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.4871549606323242, |
|
"eval_runtime": 148.0177, |
|
"eval_samples_per_second": 42.387, |
|
"eval_steps_per_second": 5.303, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4858, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.498619794845581, |
|
"eval_runtime": 133.9486, |
|
"eval_samples_per_second": 46.839, |
|
"eval_steps_per_second": 5.86, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4665, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.475897192955017, |
|
"eval_runtime": 133.9594, |
|
"eval_samples_per_second": 46.835, |
|
"eval_steps_per_second": 5.86, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4389, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.458662509918213, |
|
"eval_runtime": 136.2607, |
|
"eval_samples_per_second": 46.044, |
|
"eval_steps_per_second": 5.761, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4404, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.4420702457427979, |
|
"eval_runtime": 133.798, |
|
"eval_samples_per_second": 46.892, |
|
"eval_steps_per_second": 5.867, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4162, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4285529851913452, |
|
"eval_runtime": 133.9432, |
|
"eval_samples_per_second": 46.841, |
|
"eval_steps_per_second": 5.861, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4165, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.4121776819229126, |
|
"eval_runtime": 133.8876, |
|
"eval_samples_per_second": 46.86, |
|
"eval_steps_per_second": 5.863, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3912, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.3997336626052856, |
|
"eval_runtime": 133.7984, |
|
"eval_samples_per_second": 46.891, |
|
"eval_steps_per_second": 5.867, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3742, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.3850334882736206, |
|
"eval_runtime": 133.6811, |
|
"eval_samples_per_second": 46.933, |
|
"eval_steps_per_second": 5.872, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3701, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.3720322847366333, |
|
"eval_runtime": 133.6791, |
|
"eval_samples_per_second": 46.933, |
|
"eval_steps_per_second": 5.872, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3339, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.3605430126190186, |
|
"eval_runtime": 133.753, |
|
"eval_samples_per_second": 46.907, |
|
"eval_steps_per_second": 5.869, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3132, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.3472238779067993, |
|
"eval_runtime": 133.6686, |
|
"eval_samples_per_second": 46.937, |
|
"eval_steps_per_second": 5.873, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3122, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.3349665403366089, |
|
"eval_runtime": 133.7348, |
|
"eval_samples_per_second": 46.914, |
|
"eval_steps_per_second": 5.87, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2997, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.3228119611740112, |
|
"eval_runtime": 133.8158, |
|
"eval_samples_per_second": 46.885, |
|
"eval_steps_per_second": 5.866, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2979, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.3118646144866943, |
|
"eval_runtime": 133.8382, |
|
"eval_samples_per_second": 46.878, |
|
"eval_steps_per_second": 5.865, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2821, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.2987654209136963, |
|
"eval_runtime": 133.6905, |
|
"eval_samples_per_second": 46.929, |
|
"eval_steps_per_second": 5.872, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2616, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.288960576057434, |
|
"eval_runtime": 133.7641, |
|
"eval_samples_per_second": 46.903, |
|
"eval_steps_per_second": 5.869, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2611, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.278650164604187, |
|
"eval_runtime": 133.8143, |
|
"eval_samples_per_second": 46.886, |
|
"eval_steps_per_second": 5.866, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2484, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.265230655670166, |
|
"eval_runtime": 133.7663, |
|
"eval_samples_per_second": 46.903, |
|
"eval_steps_per_second": 5.868, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2333, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.2536190748214722, |
|
"eval_runtime": 133.8137, |
|
"eval_samples_per_second": 46.886, |
|
"eval_steps_per_second": 5.866, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2116, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.245966911315918, |
|
"eval_runtime": 133.7977, |
|
"eval_samples_per_second": 46.892, |
|
"eval_steps_per_second": 5.867, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2198, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.2307173013687134, |
|
"eval_runtime": 133.8147, |
|
"eval_samples_per_second": 46.886, |
|
"eval_steps_per_second": 5.866, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2025, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"eval_loss": 1.2222630977630615, |
|
"eval_runtime": 133.8615, |
|
"eval_samples_per_second": 46.869, |
|
"eval_steps_per_second": 5.864, |
|
"step": 4000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2.3841325139747635e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|