{ "best_metric": 1.2222630977630615, "best_model_checkpoint": "./outputs/checkpoint-4000", "epoch": 2.914754098360656, "eval_steps": 100, "global_step": 4000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07, "learning_rate": 0.0002, "loss": 2.1822, "step": 100 }, { "epoch": 0.07, "eval_loss": 2.0121946334838867, "eval_runtime": 144.2461, "eval_samples_per_second": 43.495, "eval_steps_per_second": 5.442, "step": 100 }, { "epoch": 0.15, "learning_rate": 0.0002, "loss": 1.9624, "step": 200 }, { "epoch": 0.15, "eval_loss": 1.9195032119750977, "eval_runtime": 144.1813, "eval_samples_per_second": 43.515, "eval_steps_per_second": 5.445, "step": 200 }, { "epoch": 0.22, "learning_rate": 0.0002, "loss": 1.8883, "step": 300 }, { "epoch": 0.22, "eval_loss": 1.8597155809402466, "eval_runtime": 144.1877, "eval_samples_per_second": 43.513, "eval_steps_per_second": 5.444, "step": 300 }, { "epoch": 0.29, "learning_rate": 0.0002, "loss": 1.8371, "step": 400 }, { "epoch": 0.29, "eval_loss": 1.8130855560302734, "eval_runtime": 144.1652, "eval_samples_per_second": 43.52, "eval_steps_per_second": 5.445, "step": 400 }, { "epoch": 0.36, "learning_rate": 0.0002, "loss": 1.7855, "step": 500 }, { "epoch": 0.36, "eval_loss": 1.772993564605713, "eval_runtime": 144.0971, "eval_samples_per_second": 43.54, "eval_steps_per_second": 5.448, "step": 500 }, { "epoch": 0.44, "learning_rate": 0.0002, "loss": 1.7573, "step": 600 }, { "epoch": 0.44, "eval_loss": 1.7435261011123657, "eval_runtime": 144.1427, "eval_samples_per_second": 43.526, "eval_steps_per_second": 5.446, "step": 600 }, { "epoch": 0.51, "learning_rate": 0.0002, "loss": 1.7188, "step": 700 }, { "epoch": 0.51, "eval_loss": 1.7126474380493164, "eval_runtime": 144.1825, "eval_samples_per_second": 43.514, "eval_steps_per_second": 5.444, "step": 700 }, { "epoch": 0.58, "learning_rate": 0.0002, "loss": 1.6978, "step": 800 }, { "epoch": 0.58, "eval_loss": 1.6834746599197388, "eval_runtime": 144.2172, "eval_samples_per_second": 43.504, "eval_steps_per_second": 5.443, "step": 800 }, { "epoch": 0.66, "learning_rate": 0.0002, "loss": 1.6627, "step": 900 }, { "epoch": 0.66, "eval_loss": 1.6580406427383423, "eval_runtime": 144.1973, "eval_samples_per_second": 43.51, "eval_steps_per_second": 5.444, "step": 900 }, { "epoch": 0.73, "learning_rate": 0.0002, "loss": 1.6478, "step": 1000 }, { "epoch": 0.73, "eval_loss": 1.6355735063552856, "eval_runtime": 144.1901, "eval_samples_per_second": 43.512, "eval_steps_per_second": 5.444, "step": 1000 }, { "epoch": 0.8, "learning_rate": 0.0002, "loss": 1.6278, "step": 1100 }, { "epoch": 0.8, "eval_loss": 1.6143633127212524, "eval_runtime": 144.1411, "eval_samples_per_second": 43.527, "eval_steps_per_second": 5.446, "step": 1100 }, { "epoch": 0.87, "learning_rate": 0.0002, "loss": 1.5927, "step": 1200 }, { "epoch": 0.87, "eval_loss": 1.5936414003372192, "eval_runtime": 144.3064, "eval_samples_per_second": 43.477, "eval_steps_per_second": 5.44, "step": 1200 }, { "epoch": 0.95, "learning_rate": 0.0002, "loss": 1.6005, "step": 1300 }, { "epoch": 0.95, "eval_loss": 1.573447346687317, "eval_runtime": 144.216, "eval_samples_per_second": 43.504, "eval_steps_per_second": 5.443, "step": 1300 }, { "epoch": 1.02, "learning_rate": 0.0002, "loss": 1.5531, "step": 1400 }, { "epoch": 1.02, "eval_loss": 1.5534614324569702, "eval_runtime": 144.2247, "eval_samples_per_second": 43.502, "eval_steps_per_second": 5.443, "step": 1400 }, { "epoch": 1.09, "learning_rate": 0.0002, "loss": 1.525, "step": 1500 }, { "epoch": 1.09, "eval_loss": 1.5355420112609863, "eval_runtime": 144.287, "eval_samples_per_second": 43.483, "eval_steps_per_second": 5.441, "step": 1500 }, { "epoch": 1.17, "learning_rate": 0.0002, "loss": 1.5066, "step": 1600 }, { "epoch": 1.17, "eval_loss": 1.5180158615112305, "eval_runtime": 144.1973, "eval_samples_per_second": 43.51, "eval_steps_per_second": 5.444, "step": 1600 }, { "epoch": 1.24, "learning_rate": 0.0002, "loss": 1.5049, "step": 1700 }, { "epoch": 1.24, "eval_loss": 1.5018609762191772, "eval_runtime": 144.11, "eval_samples_per_second": 43.536, "eval_steps_per_second": 5.447, "step": 1700 }, { "epoch": 1.31, "learning_rate": 0.0002, "loss": 1.4781, "step": 1800 }, { "epoch": 1.31, "eval_loss": 1.4871549606323242, "eval_runtime": 148.0177, "eval_samples_per_second": 42.387, "eval_steps_per_second": 5.303, "step": 1800 }, { "epoch": 1.38, "learning_rate": 0.0002, "loss": 1.4858, "step": 1900 }, { "epoch": 1.38, "eval_loss": 1.498619794845581, "eval_runtime": 133.9486, "eval_samples_per_second": 46.839, "eval_steps_per_second": 5.86, "step": 1900 }, { "epoch": 1.46, "learning_rate": 0.0002, "loss": 1.4665, "step": 2000 }, { "epoch": 1.46, "eval_loss": 1.475897192955017, "eval_runtime": 133.9594, "eval_samples_per_second": 46.835, "eval_steps_per_second": 5.86, "step": 2000 }, { "epoch": 1.53, "learning_rate": 0.0002, "loss": 1.4389, "step": 2100 }, { "epoch": 1.53, "eval_loss": 1.458662509918213, "eval_runtime": 136.2607, "eval_samples_per_second": 46.044, "eval_steps_per_second": 5.761, "step": 2100 }, { "epoch": 1.6, "learning_rate": 0.0002, "loss": 1.4404, "step": 2200 }, { "epoch": 1.6, "eval_loss": 1.4420702457427979, "eval_runtime": 133.798, "eval_samples_per_second": 46.892, "eval_steps_per_second": 5.867, "step": 2200 }, { "epoch": 1.68, "learning_rate": 0.0002, "loss": 1.4162, "step": 2300 }, { "epoch": 1.68, "eval_loss": 1.4285529851913452, "eval_runtime": 133.9432, "eval_samples_per_second": 46.841, "eval_steps_per_second": 5.861, "step": 2300 }, { "epoch": 1.75, "learning_rate": 0.0002, "loss": 1.4165, "step": 2400 }, { "epoch": 1.75, "eval_loss": 1.4121776819229126, "eval_runtime": 133.8876, "eval_samples_per_second": 46.86, "eval_steps_per_second": 5.863, "step": 2400 }, { "epoch": 1.82, "learning_rate": 0.0002, "loss": 1.3912, "step": 2500 }, { "epoch": 1.82, "eval_loss": 1.3997336626052856, "eval_runtime": 133.7984, "eval_samples_per_second": 46.891, "eval_steps_per_second": 5.867, "step": 2500 }, { "epoch": 1.89, "learning_rate": 0.0002, "loss": 1.3742, "step": 2600 }, { "epoch": 1.89, "eval_loss": 1.3850334882736206, "eval_runtime": 133.6811, "eval_samples_per_second": 46.933, "eval_steps_per_second": 5.872, "step": 2600 }, { "epoch": 1.97, "learning_rate": 0.0002, "loss": 1.3701, "step": 2700 }, { "epoch": 1.97, "eval_loss": 1.3720322847366333, "eval_runtime": 133.6791, "eval_samples_per_second": 46.933, "eval_steps_per_second": 5.872, "step": 2700 }, { "epoch": 2.04, "learning_rate": 0.0002, "loss": 1.3339, "step": 2800 }, { "epoch": 2.04, "eval_loss": 1.3605430126190186, "eval_runtime": 133.753, "eval_samples_per_second": 46.907, "eval_steps_per_second": 5.869, "step": 2800 }, { "epoch": 2.11, "learning_rate": 0.0002, "loss": 1.3132, "step": 2900 }, { "epoch": 2.11, "eval_loss": 1.3472238779067993, "eval_runtime": 133.6686, "eval_samples_per_second": 46.937, "eval_steps_per_second": 5.873, "step": 2900 }, { "epoch": 2.19, "learning_rate": 0.0002, "loss": 1.3122, "step": 3000 }, { "epoch": 2.19, "eval_loss": 1.3349665403366089, "eval_runtime": 133.7348, "eval_samples_per_second": 46.914, "eval_steps_per_second": 5.87, "step": 3000 }, { "epoch": 2.26, "learning_rate": 0.0002, "loss": 1.2997, "step": 3100 }, { "epoch": 2.26, "eval_loss": 1.3228119611740112, "eval_runtime": 133.8158, "eval_samples_per_second": 46.885, "eval_steps_per_second": 5.866, "step": 3100 }, { "epoch": 2.33, "learning_rate": 0.0002, "loss": 1.2979, "step": 3200 }, { "epoch": 2.33, "eval_loss": 1.3118646144866943, "eval_runtime": 133.8382, "eval_samples_per_second": 46.878, "eval_steps_per_second": 5.865, "step": 3200 }, { "epoch": 2.4, "learning_rate": 0.0002, "loss": 1.2821, "step": 3300 }, { "epoch": 2.4, "eval_loss": 1.2987654209136963, "eval_runtime": 133.6905, "eval_samples_per_second": 46.929, "eval_steps_per_second": 5.872, "step": 3300 }, { "epoch": 2.48, "learning_rate": 0.0002, "loss": 1.2616, "step": 3400 }, { "epoch": 2.48, "eval_loss": 1.288960576057434, "eval_runtime": 133.7641, "eval_samples_per_second": 46.903, "eval_steps_per_second": 5.869, "step": 3400 }, { "epoch": 2.55, "learning_rate": 0.0002, "loss": 1.2611, "step": 3500 }, { "epoch": 2.55, "eval_loss": 1.278650164604187, "eval_runtime": 133.8143, "eval_samples_per_second": 46.886, "eval_steps_per_second": 5.866, "step": 3500 }, { "epoch": 2.62, "learning_rate": 0.0002, "loss": 1.2484, "step": 3600 }, { "epoch": 2.62, "eval_loss": 1.265230655670166, "eval_runtime": 133.7663, "eval_samples_per_second": 46.903, "eval_steps_per_second": 5.868, "step": 3600 }, { "epoch": 2.7, "learning_rate": 0.0002, "loss": 1.2333, "step": 3700 }, { "epoch": 2.7, "eval_loss": 1.2536190748214722, "eval_runtime": 133.8137, "eval_samples_per_second": 46.886, "eval_steps_per_second": 5.866, "step": 3700 }, { "epoch": 2.77, "learning_rate": 0.0002, "loss": 1.2116, "step": 3800 }, { "epoch": 2.77, "eval_loss": 1.245966911315918, "eval_runtime": 133.7977, "eval_samples_per_second": 46.892, "eval_steps_per_second": 5.867, "step": 3800 }, { "epoch": 2.84, "learning_rate": 0.0002, "loss": 1.2198, "step": 3900 }, { "epoch": 2.84, "eval_loss": 1.2307173013687134, "eval_runtime": 133.8147, "eval_samples_per_second": 46.886, "eval_steps_per_second": 5.866, "step": 3900 }, { "epoch": 2.91, "learning_rate": 0.0002, "loss": 1.2025, "step": 4000 }, { "epoch": 2.91, "eval_loss": 1.2222630977630615, "eval_runtime": 133.8615, "eval_samples_per_second": 46.869, "eval_steps_per_second": 5.864, "step": 4000 } ], "logging_steps": 100, "max_steps": 4116, "num_train_epochs": 3, "save_steps": 100, "total_flos": 2.3841325139747635e+17, "trial_name": null, "trial_params": null }