|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 400.0, |
|
"global_step": 15200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 5.26, |
|
"eval_loss": 1.7080078125, |
|
"eval_runtime": 3.7763, |
|
"eval_samples_per_second": 68.321, |
|
"eval_steps_per_second": 8.739, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 10.53, |
|
"eval_loss": 1.7001953125, |
|
"eval_runtime": 3.7858, |
|
"eval_samples_per_second": 68.149, |
|
"eval_steps_per_second": 8.717, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 13.16, |
|
"learning_rate": 5e-05, |
|
"loss": 1.671, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 15.79, |
|
"eval_loss": 1.7138671875, |
|
"eval_runtime": 3.7865, |
|
"eval_samples_per_second": 68.137, |
|
"eval_steps_per_second": 8.715, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 21.05, |
|
"eval_loss": 1.744140625, |
|
"eval_runtime": 3.7879, |
|
"eval_samples_per_second": 68.112, |
|
"eval_steps_per_second": 8.712, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"learning_rate": 5e-05, |
|
"loss": 1.4438, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 26.32, |
|
"eval_loss": 1.794921875, |
|
"eval_runtime": 3.7869, |
|
"eval_samples_per_second": 68.13, |
|
"eval_steps_per_second": 8.714, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 31.58, |
|
"eval_loss": 1.84375, |
|
"eval_runtime": 3.7879, |
|
"eval_samples_per_second": 68.112, |
|
"eval_steps_per_second": 8.712, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 36.84, |
|
"eval_loss": 1.896484375, |
|
"eval_runtime": 3.7905, |
|
"eval_samples_per_second": 68.066, |
|
"eval_steps_per_second": 8.706, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 39.47, |
|
"learning_rate": 5e-05, |
|
"loss": 1.2806, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 42.11, |
|
"eval_loss": 1.9619140625, |
|
"eval_runtime": 3.7916, |
|
"eval_samples_per_second": 68.044, |
|
"eval_steps_per_second": 8.703, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 47.37, |
|
"eval_loss": 2.01953125, |
|
"eval_runtime": 3.7897, |
|
"eval_samples_per_second": 68.08, |
|
"eval_steps_per_second": 8.708, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"learning_rate": 5e-05, |
|
"loss": 1.1433, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 52.63, |
|
"eval_loss": 2.068359375, |
|
"eval_runtime": 3.7891, |
|
"eval_samples_per_second": 68.091, |
|
"eval_steps_per_second": 8.709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 57.89, |
|
"eval_loss": 2.1171875, |
|
"eval_runtime": 3.7902, |
|
"eval_samples_per_second": 68.069, |
|
"eval_steps_per_second": 8.707, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 63.16, |
|
"eval_loss": 2.1953125, |
|
"eval_runtime": 3.7898, |
|
"eval_samples_per_second": 68.077, |
|
"eval_steps_per_second": 8.708, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 65.79, |
|
"learning_rate": 5e-05, |
|
"loss": 1.027, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 68.42, |
|
"eval_loss": 2.25, |
|
"eval_runtime": 3.7881, |
|
"eval_samples_per_second": 68.108, |
|
"eval_steps_per_second": 8.711, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 73.68, |
|
"eval_loss": 2.291015625, |
|
"eval_runtime": 3.7876, |
|
"eval_samples_per_second": 68.118, |
|
"eval_steps_per_second": 8.713, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"learning_rate": 5e-05, |
|
"loss": 0.9216, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 78.95, |
|
"eval_loss": 2.34765625, |
|
"eval_runtime": 3.7885, |
|
"eval_samples_per_second": 68.101, |
|
"eval_steps_per_second": 8.711, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 84.21, |
|
"eval_loss": 2.423828125, |
|
"eval_runtime": 3.7907, |
|
"eval_samples_per_second": 68.062, |
|
"eval_steps_per_second": 8.706, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 89.47, |
|
"eval_loss": 2.482421875, |
|
"eval_runtime": 3.7903, |
|
"eval_samples_per_second": 68.068, |
|
"eval_steps_per_second": 8.706, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 92.11, |
|
"learning_rate": 5e-05, |
|
"loss": 0.8209, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 94.74, |
|
"eval_loss": 2.529296875, |
|
"eval_runtime": 3.7863, |
|
"eval_samples_per_second": 68.14, |
|
"eval_steps_per_second": 8.716, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 100.0, |
|
"eval_loss": 2.5859375, |
|
"eval_runtime": 3.785, |
|
"eval_samples_per_second": 68.164, |
|
"eval_steps_per_second": 8.719, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 105.26, |
|
"learning_rate": 5e-05, |
|
"loss": 0.7231, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 105.26, |
|
"eval_loss": 2.6640625, |
|
"eval_runtime": 3.7856, |
|
"eval_samples_per_second": 68.153, |
|
"eval_steps_per_second": 8.717, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 110.53, |
|
"eval_loss": 2.703125, |
|
"eval_runtime": 3.7862, |
|
"eval_samples_per_second": 68.142, |
|
"eval_steps_per_second": 8.716, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 115.79, |
|
"eval_loss": 2.78515625, |
|
"eval_runtime": 3.7894, |
|
"eval_samples_per_second": 68.084, |
|
"eval_steps_per_second": 8.708, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 118.42, |
|
"learning_rate": 5e-05, |
|
"loss": 0.6281, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 121.05, |
|
"eval_loss": 2.84375, |
|
"eval_runtime": 3.7883, |
|
"eval_samples_per_second": 68.105, |
|
"eval_steps_per_second": 8.711, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 126.32, |
|
"eval_loss": 2.921875, |
|
"eval_runtime": 3.79, |
|
"eval_samples_per_second": 68.074, |
|
"eval_steps_per_second": 8.707, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 131.58, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5384, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 131.58, |
|
"eval_loss": 2.994140625, |
|
"eval_runtime": 3.7895, |
|
"eval_samples_per_second": 68.082, |
|
"eval_steps_per_second": 8.708, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 136.84, |
|
"eval_loss": 3.048828125, |
|
"eval_runtime": 3.7912, |
|
"eval_samples_per_second": 68.053, |
|
"eval_steps_per_second": 8.704, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 142.11, |
|
"eval_loss": 3.107421875, |
|
"eval_runtime": 3.7872, |
|
"eval_samples_per_second": 68.123, |
|
"eval_steps_per_second": 8.713, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 144.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.4574, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 147.37, |
|
"eval_loss": 3.169921875, |
|
"eval_runtime": 3.7886, |
|
"eval_samples_per_second": 68.1, |
|
"eval_steps_per_second": 8.71, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 152.63, |
|
"eval_loss": 3.2265625, |
|
"eval_runtime": 3.7924, |
|
"eval_samples_per_second": 68.03, |
|
"eval_steps_per_second": 8.702, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 157.89, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3848, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 157.89, |
|
"eval_loss": 3.291015625, |
|
"eval_runtime": 3.7859, |
|
"eval_samples_per_second": 68.148, |
|
"eval_steps_per_second": 8.717, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 163.16, |
|
"eval_loss": 3.376953125, |
|
"eval_runtime": 3.7886, |
|
"eval_samples_per_second": 68.099, |
|
"eval_steps_per_second": 8.71, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 168.42, |
|
"eval_loss": 3.408203125, |
|
"eval_runtime": 3.7885, |
|
"eval_samples_per_second": 68.1, |
|
"eval_steps_per_second": 8.71, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 171.05, |
|
"learning_rate": 5e-05, |
|
"loss": 0.3224, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 173.68, |
|
"eval_loss": 3.4765625, |
|
"eval_runtime": 3.7922, |
|
"eval_samples_per_second": 68.034, |
|
"eval_steps_per_second": 8.702, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 178.95, |
|
"eval_loss": 3.529296875, |
|
"eval_runtime": 3.7898, |
|
"eval_samples_per_second": 68.077, |
|
"eval_steps_per_second": 8.708, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 184.21, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2697, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 184.21, |
|
"eval_loss": 3.591796875, |
|
"eval_runtime": 3.7854, |
|
"eval_samples_per_second": 68.157, |
|
"eval_steps_per_second": 8.718, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 189.47, |
|
"eval_loss": 3.634765625, |
|
"eval_runtime": 3.7918, |
|
"eval_samples_per_second": 68.041, |
|
"eval_steps_per_second": 8.703, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 194.74, |
|
"eval_loss": 3.68359375, |
|
"eval_runtime": 3.7891, |
|
"eval_samples_per_second": 68.09, |
|
"eval_steps_per_second": 8.709, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 197.37, |
|
"learning_rate": 5e-05, |
|
"loss": 0.2258, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 200.0, |
|
"eval_loss": 3.7265625, |
|
"eval_runtime": 3.7895, |
|
"eval_samples_per_second": 68.083, |
|
"eval_steps_per_second": 8.708, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 205.26, |
|
"eval_loss": 3.79296875, |
|
"eval_runtime": 3.7901, |
|
"eval_samples_per_second": 68.073, |
|
"eval_steps_per_second": 8.707, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 210.53, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1893, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 210.53, |
|
"eval_loss": 3.828125, |
|
"eval_runtime": 3.7891, |
|
"eval_samples_per_second": 68.09, |
|
"eval_steps_per_second": 8.709, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 215.79, |
|
"eval_loss": 3.880859375, |
|
"eval_runtime": 3.7907, |
|
"eval_samples_per_second": 68.062, |
|
"eval_steps_per_second": 8.706, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 221.05, |
|
"eval_loss": 3.923828125, |
|
"eval_runtime": 3.7895, |
|
"eval_samples_per_second": 68.082, |
|
"eval_steps_per_second": 8.708, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 223.68, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1602, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 226.32, |
|
"eval_loss": 3.974609375, |
|
"eval_runtime": 3.7894, |
|
"eval_samples_per_second": 68.084, |
|
"eval_steps_per_second": 8.708, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 231.58, |
|
"eval_loss": 4.00390625, |
|
"eval_runtime": 3.7923, |
|
"eval_samples_per_second": 68.032, |
|
"eval_steps_per_second": 8.702, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 236.84, |
|
"learning_rate": 5e-05, |
|
"loss": 0.137, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 236.84, |
|
"eval_loss": 4.046875, |
|
"eval_runtime": 3.7922, |
|
"eval_samples_per_second": 68.034, |
|
"eval_steps_per_second": 8.702, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 242.11, |
|
"eval_loss": 4.07421875, |
|
"eval_runtime": 3.7901, |
|
"eval_samples_per_second": 68.072, |
|
"eval_steps_per_second": 8.707, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 247.37, |
|
"eval_loss": 4.12109375, |
|
"eval_runtime": 3.7896, |
|
"eval_samples_per_second": 68.08, |
|
"eval_steps_per_second": 8.708, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 250.0, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1179, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 252.63, |
|
"eval_loss": 4.15625, |
|
"eval_runtime": 3.7912, |
|
"eval_samples_per_second": 68.053, |
|
"eval_steps_per_second": 8.704, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 257.89, |
|
"eval_loss": 4.203125, |
|
"eval_runtime": 3.7923, |
|
"eval_samples_per_second": 68.032, |
|
"eval_steps_per_second": 8.702, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 263.16, |
|
"learning_rate": 5e-05, |
|
"loss": 0.1024, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 263.16, |
|
"eval_loss": 4.234375, |
|
"eval_runtime": 3.7852, |
|
"eval_samples_per_second": 68.159, |
|
"eval_steps_per_second": 8.718, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 268.42, |
|
"eval_loss": 4.2734375, |
|
"eval_runtime": 3.7869, |
|
"eval_samples_per_second": 68.129, |
|
"eval_steps_per_second": 8.714, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 273.68, |
|
"eval_loss": 4.3046875, |
|
"eval_runtime": 3.7892, |
|
"eval_samples_per_second": 68.088, |
|
"eval_steps_per_second": 8.709, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 276.32, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0901, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 278.95, |
|
"eval_loss": 4.3125, |
|
"eval_runtime": 3.7869, |
|
"eval_samples_per_second": 68.129, |
|
"eval_steps_per_second": 8.714, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 284.21, |
|
"eval_loss": 4.375, |
|
"eval_runtime": 3.7872, |
|
"eval_samples_per_second": 68.125, |
|
"eval_steps_per_second": 8.714, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 289.47, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0796, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 289.47, |
|
"eval_loss": 4.390625, |
|
"eval_runtime": 3.7843, |
|
"eval_samples_per_second": 68.177, |
|
"eval_steps_per_second": 8.72, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 294.74, |
|
"eval_loss": 4.4375, |
|
"eval_runtime": 3.7881, |
|
"eval_samples_per_second": 68.107, |
|
"eval_steps_per_second": 8.711, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 300.0, |
|
"eval_loss": 4.453125, |
|
"eval_runtime": 3.7869, |
|
"eval_samples_per_second": 68.129, |
|
"eval_steps_per_second": 8.714, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 302.63, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0706, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 305.26, |
|
"eval_loss": 4.5078125, |
|
"eval_runtime": 3.7854, |
|
"eval_samples_per_second": 68.156, |
|
"eval_steps_per_second": 8.718, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 310.53, |
|
"eval_loss": 4.515625, |
|
"eval_runtime": 3.787, |
|
"eval_samples_per_second": 68.128, |
|
"eval_steps_per_second": 8.714, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 315.79, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0631, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 315.79, |
|
"eval_loss": 4.53515625, |
|
"eval_runtime": 3.7837, |
|
"eval_samples_per_second": 68.187, |
|
"eval_steps_per_second": 8.722, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 321.05, |
|
"eval_loss": 4.5859375, |
|
"eval_runtime": 3.7869, |
|
"eval_samples_per_second": 68.13, |
|
"eval_steps_per_second": 8.714, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 326.32, |
|
"eval_loss": 4.609375, |
|
"eval_runtime": 3.788, |
|
"eval_samples_per_second": 68.11, |
|
"eval_steps_per_second": 8.712, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 328.95, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0573, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 331.58, |
|
"eval_loss": 4.63671875, |
|
"eval_runtime": 3.7891, |
|
"eval_samples_per_second": 68.09, |
|
"eval_steps_per_second": 8.709, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 336.84, |
|
"eval_loss": 4.63671875, |
|
"eval_runtime": 3.7855, |
|
"eval_samples_per_second": 68.154, |
|
"eval_steps_per_second": 8.717, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 342.11, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0521, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 342.11, |
|
"eval_loss": 4.6640625, |
|
"eval_runtime": 3.7838, |
|
"eval_samples_per_second": 68.185, |
|
"eval_steps_per_second": 8.721, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 347.37, |
|
"eval_loss": 4.70703125, |
|
"eval_runtime": 3.7834, |
|
"eval_samples_per_second": 68.192, |
|
"eval_steps_per_second": 8.722, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 352.63, |
|
"eval_loss": 4.69921875, |
|
"eval_runtime": 3.789, |
|
"eval_samples_per_second": 68.092, |
|
"eval_steps_per_second": 8.709, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 355.26, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0475, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 357.89, |
|
"eval_loss": 4.75390625, |
|
"eval_runtime": 3.7901, |
|
"eval_samples_per_second": 68.073, |
|
"eval_steps_per_second": 8.707, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 363.16, |
|
"eval_loss": 4.765625, |
|
"eval_runtime": 3.7877, |
|
"eval_samples_per_second": 68.116, |
|
"eval_steps_per_second": 8.712, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 368.42, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0437, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 368.42, |
|
"eval_loss": 4.80078125, |
|
"eval_runtime": 3.7858, |
|
"eval_samples_per_second": 68.15, |
|
"eval_steps_per_second": 8.717, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 373.68, |
|
"eval_loss": 4.83203125, |
|
"eval_runtime": 3.7888, |
|
"eval_samples_per_second": 68.095, |
|
"eval_steps_per_second": 8.71, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 378.95, |
|
"eval_loss": 4.8515625, |
|
"eval_runtime": 3.7901, |
|
"eval_samples_per_second": 68.073, |
|
"eval_steps_per_second": 8.707, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 381.58, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0399, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 384.21, |
|
"eval_loss": 4.86328125, |
|
"eval_runtime": 3.7938, |
|
"eval_samples_per_second": 68.006, |
|
"eval_steps_per_second": 8.698, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 389.47, |
|
"eval_loss": 4.89453125, |
|
"eval_runtime": 3.7887, |
|
"eval_samples_per_second": 68.098, |
|
"eval_steps_per_second": 8.71, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 394.74, |
|
"learning_rate": 5e-05, |
|
"loss": 0.0367, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 394.74, |
|
"eval_loss": 4.90625, |
|
"eval_runtime": 3.7864, |
|
"eval_samples_per_second": 68.138, |
|
"eval_steps_per_second": 8.715, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"eval_loss": 4.94140625, |
|
"eval_runtime": 3.791, |
|
"eval_samples_per_second": 68.057, |
|
"eval_steps_per_second": 8.705, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 400.0, |
|
"step": 15200, |
|
"total_flos": 1.2604727427386573e+17, |
|
"train_loss": 0.4328666927939967, |
|
"train_runtime": 22235.2418, |
|
"train_samples_per_second": 10.848, |
|
"train_steps_per_second": 0.684 |
|
} |
|
], |
|
"max_steps": 15200, |
|
"num_train_epochs": 400, |
|
"total_flos": 1.2604727427386573e+17, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|