|
{ |
|
"best_metric": 1.2287747859954834, |
|
"best_model_checkpoint": "./outputs/checkpoint-3900", |
|
"epoch": 2.841894353369763, |
|
"eval_steps": 100, |
|
"global_step": 3900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002, |
|
"loss": 2.1823, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 2.0118842124938965, |
|
"eval_runtime": 144.2983, |
|
"eval_samples_per_second": 43.479, |
|
"eval_steps_per_second": 5.44, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002, |
|
"loss": 1.962, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_loss": 1.9193025827407837, |
|
"eval_runtime": 144.1022, |
|
"eval_samples_per_second": 43.539, |
|
"eval_steps_per_second": 5.448, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"eval_loss": 1.8596361875534058, |
|
"eval_runtime": 144.0831, |
|
"eval_samples_per_second": 43.544, |
|
"eval_steps_per_second": 5.448, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002, |
|
"loss": 1.8371, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_loss": 1.813263177871704, |
|
"eval_runtime": 144.1028, |
|
"eval_samples_per_second": 43.538, |
|
"eval_steps_per_second": 5.447, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7855, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"eval_loss": 1.772437572479248, |
|
"eval_runtime": 144.0494, |
|
"eval_samples_per_second": 43.555, |
|
"eval_steps_per_second": 5.45, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0002, |
|
"loss": 1.757, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_loss": 1.7428079843521118, |
|
"eval_runtime": 144.0319, |
|
"eval_samples_per_second": 43.56, |
|
"eval_steps_per_second": 5.45, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0002, |
|
"loss": 1.7183, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_loss": 1.7120596170425415, |
|
"eval_runtime": 144.1455, |
|
"eval_samples_per_second": 43.525, |
|
"eval_steps_per_second": 5.446, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6973, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_loss": 1.6833879947662354, |
|
"eval_runtime": 144.034, |
|
"eval_samples_per_second": 43.559, |
|
"eval_steps_per_second": 5.45, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0002, |
|
"loss": 1.662, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"eval_loss": 1.6580077409744263, |
|
"eval_runtime": 144.0204, |
|
"eval_samples_per_second": 43.563, |
|
"eval_steps_per_second": 5.451, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6473, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_loss": 1.6349676847457886, |
|
"eval_runtime": 144.1987, |
|
"eval_samples_per_second": 43.509, |
|
"eval_steps_per_second": 5.444, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.0002, |
|
"loss": 1.6273, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_loss": 1.6135053634643555, |
|
"eval_runtime": 144.1005, |
|
"eval_samples_per_second": 43.539, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5919, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"eval_loss": 1.5944637060165405, |
|
"eval_runtime": 144.0899, |
|
"eval_samples_per_second": 43.542, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5994, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"eval_loss": 1.5728504657745361, |
|
"eval_runtime": 144.043, |
|
"eval_samples_per_second": 43.556, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5528, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"eval_loss": 1.552846074104309, |
|
"eval_runtime": 144.0891, |
|
"eval_samples_per_second": 43.543, |
|
"eval_steps_per_second": 5.448, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5246, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"eval_loss": 1.5355615615844727, |
|
"eval_runtime": 144.0408, |
|
"eval_samples_per_second": 43.557, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5062, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"eval_loss": 1.5179370641708374, |
|
"eval_runtime": 144.0242, |
|
"eval_samples_per_second": 43.562, |
|
"eval_steps_per_second": 5.45, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5038, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"eval_loss": 1.5012134313583374, |
|
"eval_runtime": 144.0166, |
|
"eval_samples_per_second": 43.564, |
|
"eval_steps_per_second": 5.451, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0002, |
|
"loss": 1.5144, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"eval_loss": 1.511275291442871, |
|
"eval_runtime": 133.6708, |
|
"eval_samples_per_second": 46.936, |
|
"eval_steps_per_second": 5.873, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4715, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"eval_loss": 1.48880934715271, |
|
"eval_runtime": 133.6301, |
|
"eval_samples_per_second": 46.95, |
|
"eval_steps_per_second": 5.874, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4621, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"eval_loss": 1.4694921970367432, |
|
"eval_runtime": 133.7845, |
|
"eval_samples_per_second": 46.896, |
|
"eval_steps_per_second": 5.868, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4364, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"eval_loss": 1.4534579515457153, |
|
"eval_runtime": 133.5069, |
|
"eval_samples_per_second": 46.994, |
|
"eval_steps_per_second": 5.88, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4388, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_loss": 1.4382610321044922, |
|
"eval_runtime": 133.6146, |
|
"eval_samples_per_second": 46.956, |
|
"eval_steps_per_second": 5.875, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4139, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"eval_loss": 1.4250658750534058, |
|
"eval_runtime": 133.6118, |
|
"eval_samples_per_second": 46.957, |
|
"eval_steps_per_second": 5.875, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.0002, |
|
"loss": 1.4145, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"eval_loss": 1.408768892288208, |
|
"eval_runtime": 133.5488, |
|
"eval_samples_per_second": 46.979, |
|
"eval_steps_per_second": 5.878, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3897, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_loss": 1.3950382471084595, |
|
"eval_runtime": 133.6055, |
|
"eval_samples_per_second": 46.959, |
|
"eval_steps_per_second": 5.876, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3718, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"eval_loss": 1.38124418258667, |
|
"eval_runtime": 133.4827, |
|
"eval_samples_per_second": 47.002, |
|
"eval_steps_per_second": 5.881, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3685, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_loss": 1.3680918216705322, |
|
"eval_runtime": 133.3719, |
|
"eval_samples_per_second": 47.041, |
|
"eval_steps_per_second": 5.886, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3321, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"eval_loss": 1.356438159942627, |
|
"eval_runtime": 133.4175, |
|
"eval_samples_per_second": 47.025, |
|
"eval_steps_per_second": 5.884, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3105, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"eval_loss": 1.3440583944320679, |
|
"eval_runtime": 133.4445, |
|
"eval_samples_per_second": 47.016, |
|
"eval_steps_per_second": 5.883, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.0002, |
|
"loss": 1.3096, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"eval_loss": 1.3319122791290283, |
|
"eval_runtime": 133.4813, |
|
"eval_samples_per_second": 47.003, |
|
"eval_steps_per_second": 5.881, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2963, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"eval_loss": 1.320979356765747, |
|
"eval_runtime": 133.3932, |
|
"eval_samples_per_second": 47.034, |
|
"eval_steps_per_second": 5.885, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2953, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"eval_loss": 1.3095366954803467, |
|
"eval_runtime": 138.3583, |
|
"eval_samples_per_second": 45.346, |
|
"eval_steps_per_second": 5.674, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2786, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_loss": 1.2959859371185303, |
|
"eval_runtime": 133.5245, |
|
"eval_samples_per_second": 46.988, |
|
"eval_steps_per_second": 5.879, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2585, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"eval_loss": 1.2855974435806274, |
|
"eval_runtime": 133.542, |
|
"eval_samples_per_second": 46.981, |
|
"eval_steps_per_second": 5.878, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2586, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"eval_loss": 1.2743042707443237, |
|
"eval_runtime": 133.3944, |
|
"eval_samples_per_second": 47.033, |
|
"eval_steps_per_second": 5.885, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2466, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"eval_loss": 1.2626945972442627, |
|
"eval_runtime": 133.3673, |
|
"eval_samples_per_second": 47.043, |
|
"eval_steps_per_second": 5.886, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2303, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"eval_loss": 1.2512463331222534, |
|
"eval_runtime": 133.4502, |
|
"eval_samples_per_second": 47.014, |
|
"eval_steps_per_second": 5.882, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2088, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"eval_loss": 1.2429386377334595, |
|
"eval_runtime": 133.4443, |
|
"eval_samples_per_second": 47.016, |
|
"eval_steps_per_second": 5.883, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0002, |
|
"loss": 1.2171, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"eval_loss": 1.2287747859954834, |
|
"eval_runtime": 133.4716, |
|
"eval_samples_per_second": 47.006, |
|
"eval_steps_per_second": 5.881, |
|
"step": 3900 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 4116, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 2.3246378200834867e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|