|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.002146082027547257, |
|
"global_step": 29000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.6876, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.4209991693496704, |
|
"eval_runtime": 29774.1937, |
|
"eval_samples_per_second": 24.175, |
|
"eval_steps_per_second": 6.044, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.4255, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.4117300510406494, |
|
"eval_runtime": 29117.2533, |
|
"eval_samples_per_second": 24.72, |
|
"eval_steps_per_second": 6.18, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2975, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3992412090301514, |
|
"eval_runtime": 29718.2883, |
|
"eval_samples_per_second": 24.22, |
|
"eval_steps_per_second": 6.055, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3514, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.4060639142990112, |
|
"eval_runtime": 28900.2542, |
|
"eval_samples_per_second": 24.906, |
|
"eval_steps_per_second": 6.226, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3757, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.399295687675476, |
|
"eval_runtime": 29509.4537, |
|
"eval_samples_per_second": 24.392, |
|
"eval_steps_per_second": 6.098, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.295, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.393278956413269, |
|
"eval_runtime": 28449.3173, |
|
"eval_samples_per_second": 25.301, |
|
"eval_steps_per_second": 6.325, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2565, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3863052129745483, |
|
"eval_runtime": 28789.7552, |
|
"eval_samples_per_second": 25.001, |
|
"eval_steps_per_second": 6.25, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3095, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3882980346679688, |
|
"eval_runtime": 28567.3896, |
|
"eval_samples_per_second": 25.196, |
|
"eval_steps_per_second": 6.299, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3021, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3863459825515747, |
|
"eval_runtime": 28424.5337, |
|
"eval_samples_per_second": 25.323, |
|
"eval_steps_per_second": 6.331, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2299, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3776334524154663, |
|
"eval_runtime": 28753.4447, |
|
"eval_samples_per_second": 25.033, |
|
"eval_steps_per_second": 6.258, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2306, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3795045614242554, |
|
"eval_runtime": 28113.3911, |
|
"eval_samples_per_second": 25.603, |
|
"eval_steps_per_second": 6.401, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2425, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3737467527389526, |
|
"eval_runtime": 28752.4369, |
|
"eval_samples_per_second": 25.034, |
|
"eval_steps_per_second": 6.258, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1932, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3799411058425903, |
|
"eval_runtime": 27577.5799, |
|
"eval_samples_per_second": 26.1, |
|
"eval_steps_per_second": 6.525, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2312, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3715393543243408, |
|
"eval_runtime": 28034.4873, |
|
"eval_samples_per_second": 25.675, |
|
"eval_steps_per_second": 6.419, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2841, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3804839849472046, |
|
"eval_runtime": 28127.7804, |
|
"eval_samples_per_second": 25.59, |
|
"eval_steps_per_second": 6.397, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1463, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3776183128356934, |
|
"eval_runtime": 27577.1163, |
|
"eval_samples_per_second": 26.101, |
|
"eval_steps_per_second": 6.525, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1313, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3699731826782227, |
|
"eval_runtime": 28190.8614, |
|
"eval_samples_per_second": 25.533, |
|
"eval_steps_per_second": 6.383, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2267, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.366495132446289, |
|
"eval_runtime": 28103.2881, |
|
"eval_samples_per_second": 25.612, |
|
"eval_steps_per_second": 6.403, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1866, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3763595819473267, |
|
"eval_runtime": 27696.0614, |
|
"eval_samples_per_second": 25.989, |
|
"eval_steps_per_second": 6.497, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2347, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3777934312820435, |
|
"eval_runtime": 28118.8859, |
|
"eval_samples_per_second": 25.598, |
|
"eval_steps_per_second": 6.399, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1514, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.370295763015747, |
|
"eval_runtime": 27665.6655, |
|
"eval_samples_per_second": 26.017, |
|
"eval_steps_per_second": 6.504, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2867, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.372216820716858, |
|
"eval_runtime": 28029.069, |
|
"eval_samples_per_second": 25.68, |
|
"eval_steps_per_second": 6.42, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.3031, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3675533533096313, |
|
"eval_runtime": 28060.4111, |
|
"eval_samples_per_second": 25.651, |
|
"eval_steps_per_second": 6.413, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2353, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3742448091506958, |
|
"eval_runtime": 27439.4619, |
|
"eval_samples_per_second": 26.232, |
|
"eval_steps_per_second": 6.558, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2022, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3690038919448853, |
|
"eval_runtime": 28177.5616, |
|
"eval_samples_per_second": 25.545, |
|
"eval_steps_per_second": 6.386, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1925, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3660128116607666, |
|
"eval_runtime": 28183.7194, |
|
"eval_samples_per_second": 25.539, |
|
"eval_steps_per_second": 6.385, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2097, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3706327676773071, |
|
"eval_runtime": 27638.252, |
|
"eval_samples_per_second": 26.043, |
|
"eval_steps_per_second": 6.511, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1606, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.369661569595337, |
|
"eval_runtime": 28259.5204, |
|
"eval_samples_per_second": 25.47, |
|
"eval_steps_per_second": 6.368, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2216, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.368302822113037, |
|
"eval_runtime": 28149.9769, |
|
"eval_samples_per_second": 25.57, |
|
"eval_steps_per_second": 6.392, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1916, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3689770698547363, |
|
"eval_runtime": 27702.5821, |
|
"eval_samples_per_second": 25.983, |
|
"eval_steps_per_second": 6.496, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2369, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3695650100708008, |
|
"eval_runtime": 28082.9192, |
|
"eval_samples_per_second": 25.631, |
|
"eval_steps_per_second": 6.408, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1862, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3672432899475098, |
|
"eval_runtime": 27790.3978, |
|
"eval_samples_per_second": 25.9, |
|
"eval_steps_per_second": 6.475, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1875, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.372326135635376, |
|
"eval_runtime": 27957.8295, |
|
"eval_samples_per_second": 25.745, |
|
"eval_steps_per_second": 6.436, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2237, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3708332777023315, |
|
"eval_runtime": 28123.0687, |
|
"eval_samples_per_second": 25.594, |
|
"eval_steps_per_second": 6.399, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1123, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3687807321548462, |
|
"eval_runtime": 27597.4681, |
|
"eval_samples_per_second": 26.082, |
|
"eval_steps_per_second": 6.52, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1219, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.376206874847412, |
|
"eval_runtime": 28127.0268, |
|
"eval_samples_per_second": 25.59, |
|
"eval_steps_per_second": 6.398, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1776, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.368283748626709, |
|
"eval_runtime": 28116.6088, |
|
"eval_samples_per_second": 25.6, |
|
"eval_steps_per_second": 6.4, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1627, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3710017204284668, |
|
"eval_runtime": 27585.1366, |
|
"eval_samples_per_second": 26.093, |
|
"eval_steps_per_second": 6.523, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0627, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3697084188461304, |
|
"eval_runtime": 28272.5185, |
|
"eval_samples_per_second": 25.459, |
|
"eval_steps_per_second": 6.365, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0632, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3792474269866943, |
|
"eval_runtime": 28381.2307, |
|
"eval_samples_per_second": 25.361, |
|
"eval_steps_per_second": 6.34, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2426, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.373166799545288, |
|
"eval_runtime": 28634.6137, |
|
"eval_samples_per_second": 25.137, |
|
"eval_steps_per_second": 6.284, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1263, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3743404150009155, |
|
"eval_runtime": 28674.6776, |
|
"eval_samples_per_second": 25.102, |
|
"eval_steps_per_second": 6.275, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1131, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3682280778884888, |
|
"eval_runtime": 28099.2766, |
|
"eval_samples_per_second": 25.616, |
|
"eval_steps_per_second": 6.404, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0595, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3719111680984497, |
|
"eval_runtime": 28695.4529, |
|
"eval_samples_per_second": 25.084, |
|
"eval_steps_per_second": 6.271, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1468, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3663983345031738, |
|
"eval_runtime": 28026.4183, |
|
"eval_samples_per_second": 25.682, |
|
"eval_steps_per_second": 6.421, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.184, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3723489046096802, |
|
"eval_runtime": 28690.9854, |
|
"eval_samples_per_second": 25.087, |
|
"eval_steps_per_second": 6.272, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1262, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3714051246643066, |
|
"eval_runtime": 28156.2291, |
|
"eval_samples_per_second": 25.564, |
|
"eval_steps_per_second": 6.391, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1758, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.368726134300232, |
|
"eval_runtime": 28657.2462, |
|
"eval_samples_per_second": 25.117, |
|
"eval_steps_per_second": 6.279, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0438, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3730684518814087, |
|
"eval_runtime": 28686.5378, |
|
"eval_samples_per_second": 25.091, |
|
"eval_steps_per_second": 6.273, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1404, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3706409931182861, |
|
"eval_runtime": 28123.1244, |
|
"eval_samples_per_second": 25.594, |
|
"eval_steps_per_second": 6.399, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1135, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3763220310211182, |
|
"eval_runtime": 28682.6176, |
|
"eval_samples_per_second": 25.095, |
|
"eval_steps_per_second": 6.274, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0536, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3709115982055664, |
|
"eval_runtime": 28032.6358, |
|
"eval_samples_per_second": 25.677, |
|
"eval_steps_per_second": 6.419, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1203, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3710169792175293, |
|
"eval_runtime": 28678.3157, |
|
"eval_samples_per_second": 25.099, |
|
"eval_steps_per_second": 6.275, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0784, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.370123028755188, |
|
"eval_runtime": 28008.2176, |
|
"eval_samples_per_second": 25.699, |
|
"eval_steps_per_second": 6.425, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0521, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3733536005020142, |
|
"eval_runtime": 17990.0293, |
|
"eval_samples_per_second": 40.01, |
|
"eval_steps_per_second": 10.003, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1773, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3757646083831787, |
|
"eval_runtime": 18000.6521, |
|
"eval_samples_per_second": 39.987, |
|
"eval_steps_per_second": 9.997, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.1688, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3776638507843018, |
|
"eval_runtime": 17989.8584, |
|
"eval_samples_per_second": 40.011, |
|
"eval_steps_per_second": 10.003, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5e-05, |
|
"loss": 2.0855, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"eval_loss": 1.3749734163284302, |
|
"eval_runtime": 17990.6544, |
|
"eval_samples_per_second": 40.009, |
|
"eval_steps_per_second": 10.002, |
|
"step": 29000 |
|
} |
|
], |
|
"max_steps": 30000, |
|
"num_train_epochs": 1, |
|
"total_flos": 1.5154937856e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|