{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.002146082027547257, "global_step": 29000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.6876, "step": 500 }, { "epoch": 0.0, "eval_loss": 1.4209991693496704, "eval_runtime": 29774.1937, "eval_samples_per_second": 24.175, "eval_steps_per_second": 6.044, "step": 500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.4255, "step": 1000 }, { "epoch": 0.0, "eval_loss": 1.4117300510406494, "eval_runtime": 29117.2533, "eval_samples_per_second": 24.72, "eval_steps_per_second": 6.18, "step": 1000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2975, "step": 1500 }, { "epoch": 0.0, "eval_loss": 1.3992412090301514, "eval_runtime": 29718.2883, "eval_samples_per_second": 24.22, "eval_steps_per_second": 6.055, "step": 1500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.3514, "step": 2000 }, { "epoch": 0.0, "eval_loss": 1.4060639142990112, "eval_runtime": 28900.2542, "eval_samples_per_second": 24.906, "eval_steps_per_second": 6.226, "step": 2000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.3757, "step": 2500 }, { "epoch": 0.0, "eval_loss": 1.399295687675476, "eval_runtime": 29509.4537, "eval_samples_per_second": 24.392, "eval_steps_per_second": 6.098, "step": 2500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.295, "step": 3000 }, { "epoch": 0.0, "eval_loss": 1.393278956413269, "eval_runtime": 28449.3173, "eval_samples_per_second": 25.301, "eval_steps_per_second": 6.325, "step": 3000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2565, "step": 3500 }, { "epoch": 0.0, "eval_loss": 1.3863052129745483, "eval_runtime": 28789.7552, "eval_samples_per_second": 25.001, "eval_steps_per_second": 6.25, "step": 3500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.3095, "step": 4000 }, { "epoch": 0.0, "eval_loss": 1.3882980346679688, "eval_runtime": 28567.3896, "eval_samples_per_second": 25.196, "eval_steps_per_second": 6.299, "step": 4000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.3021, "step": 4500 }, { "epoch": 0.0, "eval_loss": 1.3863459825515747, "eval_runtime": 28424.5337, "eval_samples_per_second": 25.323, "eval_steps_per_second": 6.331, "step": 4500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2299, "step": 5000 }, { "epoch": 0.0, "eval_loss": 1.3776334524154663, "eval_runtime": 28753.4447, "eval_samples_per_second": 25.033, "eval_steps_per_second": 6.258, "step": 5000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2306, "step": 5500 }, { "epoch": 0.0, "eval_loss": 1.3795045614242554, "eval_runtime": 28113.3911, "eval_samples_per_second": 25.603, "eval_steps_per_second": 6.401, "step": 5500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2425, "step": 6000 }, { "epoch": 0.0, "eval_loss": 1.3737467527389526, "eval_runtime": 28752.4369, "eval_samples_per_second": 25.034, "eval_steps_per_second": 6.258, "step": 6000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1932, "step": 6500 }, { "epoch": 0.0, "eval_loss": 1.3799411058425903, "eval_runtime": 27577.5799, "eval_samples_per_second": 26.1, "eval_steps_per_second": 6.525, "step": 6500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2312, "step": 7000 }, { "epoch": 0.0, "eval_loss": 1.3715393543243408, "eval_runtime": 28034.4873, "eval_samples_per_second": 25.675, "eval_steps_per_second": 6.419, "step": 7000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2841, "step": 7500 }, { "epoch": 0.0, "eval_loss": 1.3804839849472046, "eval_runtime": 28127.7804, "eval_samples_per_second": 25.59, "eval_steps_per_second": 6.397, "step": 7500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1463, "step": 8000 }, { "epoch": 0.0, "eval_loss": 1.3776183128356934, "eval_runtime": 27577.1163, "eval_samples_per_second": 26.101, "eval_steps_per_second": 6.525, "step": 8000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1313, "step": 8500 }, { "epoch": 0.0, "eval_loss": 1.3699731826782227, "eval_runtime": 28190.8614, "eval_samples_per_second": 25.533, "eval_steps_per_second": 6.383, "step": 8500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2267, "step": 9000 }, { "epoch": 0.0, "eval_loss": 1.366495132446289, "eval_runtime": 28103.2881, "eval_samples_per_second": 25.612, "eval_steps_per_second": 6.403, "step": 9000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1866, "step": 9500 }, { "epoch": 0.0, "eval_loss": 1.3763595819473267, "eval_runtime": 27696.0614, "eval_samples_per_second": 25.989, "eval_steps_per_second": 6.497, "step": 9500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2347, "step": 10000 }, { "epoch": 0.0, "eval_loss": 1.3777934312820435, "eval_runtime": 28118.8859, "eval_samples_per_second": 25.598, "eval_steps_per_second": 6.399, "step": 10000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1514, "step": 10500 }, { "epoch": 0.0, "eval_loss": 1.370295763015747, "eval_runtime": 27665.6655, "eval_samples_per_second": 26.017, "eval_steps_per_second": 6.504, "step": 10500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2867, "step": 11000 }, { "epoch": 0.0, "eval_loss": 1.372216820716858, "eval_runtime": 28029.069, "eval_samples_per_second": 25.68, "eval_steps_per_second": 6.42, "step": 11000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.3031, "step": 11500 }, { "epoch": 0.0, "eval_loss": 1.3675533533096313, "eval_runtime": 28060.4111, "eval_samples_per_second": 25.651, "eval_steps_per_second": 6.413, "step": 11500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2353, "step": 12000 }, { "epoch": 0.0, "eval_loss": 1.3742448091506958, "eval_runtime": 27439.4619, "eval_samples_per_second": 26.232, "eval_steps_per_second": 6.558, "step": 12000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2022, "step": 12500 }, { "epoch": 0.0, "eval_loss": 1.3690038919448853, "eval_runtime": 28177.5616, "eval_samples_per_second": 25.545, "eval_steps_per_second": 6.386, "step": 12500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1925, "step": 13000 }, { "epoch": 0.0, "eval_loss": 1.3660128116607666, "eval_runtime": 28183.7194, "eval_samples_per_second": 25.539, "eval_steps_per_second": 6.385, "step": 13000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2097, "step": 13500 }, { "epoch": 0.0, "eval_loss": 1.3706327676773071, "eval_runtime": 27638.252, "eval_samples_per_second": 26.043, "eval_steps_per_second": 6.511, "step": 13500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1606, "step": 14000 }, { "epoch": 0.0, "eval_loss": 1.369661569595337, "eval_runtime": 28259.5204, "eval_samples_per_second": 25.47, "eval_steps_per_second": 6.368, "step": 14000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2216, "step": 14500 }, { "epoch": 0.0, "eval_loss": 1.368302822113037, "eval_runtime": 28149.9769, "eval_samples_per_second": 25.57, "eval_steps_per_second": 6.392, "step": 14500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1916, "step": 15000 }, { "epoch": 0.0, "eval_loss": 1.3689770698547363, "eval_runtime": 27702.5821, "eval_samples_per_second": 25.983, "eval_steps_per_second": 6.496, "step": 15000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2369, "step": 15500 }, { "epoch": 0.0, "eval_loss": 1.3695650100708008, "eval_runtime": 28082.9192, "eval_samples_per_second": 25.631, "eval_steps_per_second": 6.408, "step": 15500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1862, "step": 16000 }, { "epoch": 0.0, "eval_loss": 1.3672432899475098, "eval_runtime": 27790.3978, "eval_samples_per_second": 25.9, "eval_steps_per_second": 6.475, "step": 16000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1875, "step": 16500 }, { "epoch": 0.0, "eval_loss": 1.372326135635376, "eval_runtime": 27957.8295, "eval_samples_per_second": 25.745, "eval_steps_per_second": 6.436, "step": 16500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2237, "step": 17000 }, { "epoch": 0.0, "eval_loss": 1.3708332777023315, "eval_runtime": 28123.0687, "eval_samples_per_second": 25.594, "eval_steps_per_second": 6.399, "step": 17000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1123, "step": 17500 }, { "epoch": 0.0, "eval_loss": 1.3687807321548462, "eval_runtime": 27597.4681, "eval_samples_per_second": 26.082, "eval_steps_per_second": 6.52, "step": 17500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1219, "step": 18000 }, { "epoch": 0.0, "eval_loss": 1.376206874847412, "eval_runtime": 28127.0268, "eval_samples_per_second": 25.59, "eval_steps_per_second": 6.398, "step": 18000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1776, "step": 18500 }, { "epoch": 0.0, "eval_loss": 1.368283748626709, "eval_runtime": 28116.6088, "eval_samples_per_second": 25.6, "eval_steps_per_second": 6.4, "step": 18500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1627, "step": 19000 }, { "epoch": 0.0, "eval_loss": 1.3710017204284668, "eval_runtime": 27585.1366, "eval_samples_per_second": 26.093, "eval_steps_per_second": 6.523, "step": 19000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0627, "step": 19500 }, { "epoch": 0.0, "eval_loss": 1.3697084188461304, "eval_runtime": 28272.5185, "eval_samples_per_second": 25.459, "eval_steps_per_second": 6.365, "step": 19500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0632, "step": 20000 }, { "epoch": 0.0, "eval_loss": 1.3792474269866943, "eval_runtime": 28381.2307, "eval_samples_per_second": 25.361, "eval_steps_per_second": 6.34, "step": 20000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.2426, "step": 20500 }, { "epoch": 0.0, "eval_loss": 1.373166799545288, "eval_runtime": 28634.6137, "eval_samples_per_second": 25.137, "eval_steps_per_second": 6.284, "step": 20500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1263, "step": 21000 }, { "epoch": 0.0, "eval_loss": 1.3743404150009155, "eval_runtime": 28674.6776, "eval_samples_per_second": 25.102, "eval_steps_per_second": 6.275, "step": 21000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1131, "step": 21500 }, { "epoch": 0.0, "eval_loss": 1.3682280778884888, "eval_runtime": 28099.2766, "eval_samples_per_second": 25.616, "eval_steps_per_second": 6.404, "step": 21500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0595, "step": 22000 }, { "epoch": 0.0, "eval_loss": 1.3719111680984497, "eval_runtime": 28695.4529, "eval_samples_per_second": 25.084, "eval_steps_per_second": 6.271, "step": 22000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1468, "step": 22500 }, { "epoch": 0.0, "eval_loss": 1.3663983345031738, "eval_runtime": 28026.4183, "eval_samples_per_second": 25.682, "eval_steps_per_second": 6.421, "step": 22500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.184, "step": 23000 }, { "epoch": 0.0, "eval_loss": 1.3723489046096802, "eval_runtime": 28690.9854, "eval_samples_per_second": 25.087, "eval_steps_per_second": 6.272, "step": 23000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1262, "step": 23500 }, { "epoch": 0.0, "eval_loss": 1.3714051246643066, "eval_runtime": 28156.2291, "eval_samples_per_second": 25.564, "eval_steps_per_second": 6.391, "step": 23500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1758, "step": 24000 }, { "epoch": 0.0, "eval_loss": 1.368726134300232, "eval_runtime": 28657.2462, "eval_samples_per_second": 25.117, "eval_steps_per_second": 6.279, "step": 24000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0438, "step": 24500 }, { "epoch": 0.0, "eval_loss": 1.3730684518814087, "eval_runtime": 28686.5378, "eval_samples_per_second": 25.091, "eval_steps_per_second": 6.273, "step": 24500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1404, "step": 25000 }, { "epoch": 0.0, "eval_loss": 1.3706409931182861, "eval_runtime": 28123.1244, "eval_samples_per_second": 25.594, "eval_steps_per_second": 6.399, "step": 25000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1135, "step": 25500 }, { "epoch": 0.0, "eval_loss": 1.3763220310211182, "eval_runtime": 28682.6176, "eval_samples_per_second": 25.095, "eval_steps_per_second": 6.274, "step": 25500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0536, "step": 26000 }, { "epoch": 0.0, "eval_loss": 1.3709115982055664, "eval_runtime": 28032.6358, "eval_samples_per_second": 25.677, "eval_steps_per_second": 6.419, "step": 26000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1203, "step": 26500 }, { "epoch": 0.0, "eval_loss": 1.3710169792175293, "eval_runtime": 28678.3157, "eval_samples_per_second": 25.099, "eval_steps_per_second": 6.275, "step": 26500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0784, "step": 27000 }, { "epoch": 0.0, "eval_loss": 1.370123028755188, "eval_runtime": 28008.2176, "eval_samples_per_second": 25.699, "eval_steps_per_second": 6.425, "step": 27000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0521, "step": 27500 }, { "epoch": 0.0, "eval_loss": 1.3733536005020142, "eval_runtime": 17990.0293, "eval_samples_per_second": 40.01, "eval_steps_per_second": 10.003, "step": 27500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1773, "step": 28000 }, { "epoch": 0.0, "eval_loss": 1.3757646083831787, "eval_runtime": 18000.6521, "eval_samples_per_second": 39.987, "eval_steps_per_second": 9.997, "step": 28000 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.1688, "step": 28500 }, { "epoch": 0.0, "eval_loss": 1.3776638507843018, "eval_runtime": 17989.8584, "eval_samples_per_second": 40.011, "eval_steps_per_second": 10.003, "step": 28500 }, { "epoch": 0.0, "learning_rate": 5e-05, "loss": 2.0855, "step": 29000 }, { "epoch": 0.0, "eval_loss": 1.3749734163284302, "eval_runtime": 17990.6544, "eval_samples_per_second": 40.009, "eval_steps_per_second": 10.002, "step": 29000 } ], "max_steps": 30000, "num_train_epochs": 1, "total_flos": 1.5154937856e+16, "trial_name": null, "trial_params": null }