gpt2-waiting / trainer_state.json
louis030195's picture
first commit
fe556ee
raw
history blame
18.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 400.0,
"global_step": 15200,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 5.26,
"eval_loss": 1.7080078125,
"eval_runtime": 3.7763,
"eval_samples_per_second": 68.321,
"eval_steps_per_second": 8.739,
"step": 200
},
{
"epoch": 10.53,
"eval_loss": 1.7001953125,
"eval_runtime": 3.7858,
"eval_samples_per_second": 68.149,
"eval_steps_per_second": 8.717,
"step": 400
},
{
"epoch": 13.16,
"learning_rate": 5e-05,
"loss": 1.671,
"step": 500
},
{
"epoch": 15.79,
"eval_loss": 1.7138671875,
"eval_runtime": 3.7865,
"eval_samples_per_second": 68.137,
"eval_steps_per_second": 8.715,
"step": 600
},
{
"epoch": 21.05,
"eval_loss": 1.744140625,
"eval_runtime": 3.7879,
"eval_samples_per_second": 68.112,
"eval_steps_per_second": 8.712,
"step": 800
},
{
"epoch": 26.32,
"learning_rate": 5e-05,
"loss": 1.4438,
"step": 1000
},
{
"epoch": 26.32,
"eval_loss": 1.794921875,
"eval_runtime": 3.7869,
"eval_samples_per_second": 68.13,
"eval_steps_per_second": 8.714,
"step": 1000
},
{
"epoch": 31.58,
"eval_loss": 1.84375,
"eval_runtime": 3.7879,
"eval_samples_per_second": 68.112,
"eval_steps_per_second": 8.712,
"step": 1200
},
{
"epoch": 36.84,
"eval_loss": 1.896484375,
"eval_runtime": 3.7905,
"eval_samples_per_second": 68.066,
"eval_steps_per_second": 8.706,
"step": 1400
},
{
"epoch": 39.47,
"learning_rate": 5e-05,
"loss": 1.2806,
"step": 1500
},
{
"epoch": 42.11,
"eval_loss": 1.9619140625,
"eval_runtime": 3.7916,
"eval_samples_per_second": 68.044,
"eval_steps_per_second": 8.703,
"step": 1600
},
{
"epoch": 47.37,
"eval_loss": 2.01953125,
"eval_runtime": 3.7897,
"eval_samples_per_second": 68.08,
"eval_steps_per_second": 8.708,
"step": 1800
},
{
"epoch": 52.63,
"learning_rate": 5e-05,
"loss": 1.1433,
"step": 2000
},
{
"epoch": 52.63,
"eval_loss": 2.068359375,
"eval_runtime": 3.7891,
"eval_samples_per_second": 68.091,
"eval_steps_per_second": 8.709,
"step": 2000
},
{
"epoch": 57.89,
"eval_loss": 2.1171875,
"eval_runtime": 3.7902,
"eval_samples_per_second": 68.069,
"eval_steps_per_second": 8.707,
"step": 2200
},
{
"epoch": 63.16,
"eval_loss": 2.1953125,
"eval_runtime": 3.7898,
"eval_samples_per_second": 68.077,
"eval_steps_per_second": 8.708,
"step": 2400
},
{
"epoch": 65.79,
"learning_rate": 5e-05,
"loss": 1.027,
"step": 2500
},
{
"epoch": 68.42,
"eval_loss": 2.25,
"eval_runtime": 3.7881,
"eval_samples_per_second": 68.108,
"eval_steps_per_second": 8.711,
"step": 2600
},
{
"epoch": 73.68,
"eval_loss": 2.291015625,
"eval_runtime": 3.7876,
"eval_samples_per_second": 68.118,
"eval_steps_per_second": 8.713,
"step": 2800
},
{
"epoch": 78.95,
"learning_rate": 5e-05,
"loss": 0.9216,
"step": 3000
},
{
"epoch": 78.95,
"eval_loss": 2.34765625,
"eval_runtime": 3.7885,
"eval_samples_per_second": 68.101,
"eval_steps_per_second": 8.711,
"step": 3000
},
{
"epoch": 84.21,
"eval_loss": 2.423828125,
"eval_runtime": 3.7907,
"eval_samples_per_second": 68.062,
"eval_steps_per_second": 8.706,
"step": 3200
},
{
"epoch": 89.47,
"eval_loss": 2.482421875,
"eval_runtime": 3.7903,
"eval_samples_per_second": 68.068,
"eval_steps_per_second": 8.706,
"step": 3400
},
{
"epoch": 92.11,
"learning_rate": 5e-05,
"loss": 0.8209,
"step": 3500
},
{
"epoch": 94.74,
"eval_loss": 2.529296875,
"eval_runtime": 3.7863,
"eval_samples_per_second": 68.14,
"eval_steps_per_second": 8.716,
"step": 3600
},
{
"epoch": 100.0,
"eval_loss": 2.5859375,
"eval_runtime": 3.785,
"eval_samples_per_second": 68.164,
"eval_steps_per_second": 8.719,
"step": 3800
},
{
"epoch": 105.26,
"learning_rate": 5e-05,
"loss": 0.7231,
"step": 4000
},
{
"epoch": 105.26,
"eval_loss": 2.6640625,
"eval_runtime": 3.7856,
"eval_samples_per_second": 68.153,
"eval_steps_per_second": 8.717,
"step": 4000
},
{
"epoch": 110.53,
"eval_loss": 2.703125,
"eval_runtime": 3.7862,
"eval_samples_per_second": 68.142,
"eval_steps_per_second": 8.716,
"step": 4200
},
{
"epoch": 115.79,
"eval_loss": 2.78515625,
"eval_runtime": 3.7894,
"eval_samples_per_second": 68.084,
"eval_steps_per_second": 8.708,
"step": 4400
},
{
"epoch": 118.42,
"learning_rate": 5e-05,
"loss": 0.6281,
"step": 4500
},
{
"epoch": 121.05,
"eval_loss": 2.84375,
"eval_runtime": 3.7883,
"eval_samples_per_second": 68.105,
"eval_steps_per_second": 8.711,
"step": 4600
},
{
"epoch": 126.32,
"eval_loss": 2.921875,
"eval_runtime": 3.79,
"eval_samples_per_second": 68.074,
"eval_steps_per_second": 8.707,
"step": 4800
},
{
"epoch": 131.58,
"learning_rate": 5e-05,
"loss": 0.5384,
"step": 5000
},
{
"epoch": 131.58,
"eval_loss": 2.994140625,
"eval_runtime": 3.7895,
"eval_samples_per_second": 68.082,
"eval_steps_per_second": 8.708,
"step": 5000
},
{
"epoch": 136.84,
"eval_loss": 3.048828125,
"eval_runtime": 3.7912,
"eval_samples_per_second": 68.053,
"eval_steps_per_second": 8.704,
"step": 5200
},
{
"epoch": 142.11,
"eval_loss": 3.107421875,
"eval_runtime": 3.7872,
"eval_samples_per_second": 68.123,
"eval_steps_per_second": 8.713,
"step": 5400
},
{
"epoch": 144.74,
"learning_rate": 5e-05,
"loss": 0.4574,
"step": 5500
},
{
"epoch": 147.37,
"eval_loss": 3.169921875,
"eval_runtime": 3.7886,
"eval_samples_per_second": 68.1,
"eval_steps_per_second": 8.71,
"step": 5600
},
{
"epoch": 152.63,
"eval_loss": 3.2265625,
"eval_runtime": 3.7924,
"eval_samples_per_second": 68.03,
"eval_steps_per_second": 8.702,
"step": 5800
},
{
"epoch": 157.89,
"learning_rate": 5e-05,
"loss": 0.3848,
"step": 6000
},
{
"epoch": 157.89,
"eval_loss": 3.291015625,
"eval_runtime": 3.7859,
"eval_samples_per_second": 68.148,
"eval_steps_per_second": 8.717,
"step": 6000
},
{
"epoch": 163.16,
"eval_loss": 3.376953125,
"eval_runtime": 3.7886,
"eval_samples_per_second": 68.099,
"eval_steps_per_second": 8.71,
"step": 6200
},
{
"epoch": 168.42,
"eval_loss": 3.408203125,
"eval_runtime": 3.7885,
"eval_samples_per_second": 68.1,
"eval_steps_per_second": 8.71,
"step": 6400
},
{
"epoch": 171.05,
"learning_rate": 5e-05,
"loss": 0.3224,
"step": 6500
},
{
"epoch": 173.68,
"eval_loss": 3.4765625,
"eval_runtime": 3.7922,
"eval_samples_per_second": 68.034,
"eval_steps_per_second": 8.702,
"step": 6600
},
{
"epoch": 178.95,
"eval_loss": 3.529296875,
"eval_runtime": 3.7898,
"eval_samples_per_second": 68.077,
"eval_steps_per_second": 8.708,
"step": 6800
},
{
"epoch": 184.21,
"learning_rate": 5e-05,
"loss": 0.2697,
"step": 7000
},
{
"epoch": 184.21,
"eval_loss": 3.591796875,
"eval_runtime": 3.7854,
"eval_samples_per_second": 68.157,
"eval_steps_per_second": 8.718,
"step": 7000
},
{
"epoch": 189.47,
"eval_loss": 3.634765625,
"eval_runtime": 3.7918,
"eval_samples_per_second": 68.041,
"eval_steps_per_second": 8.703,
"step": 7200
},
{
"epoch": 194.74,
"eval_loss": 3.68359375,
"eval_runtime": 3.7891,
"eval_samples_per_second": 68.09,
"eval_steps_per_second": 8.709,
"step": 7400
},
{
"epoch": 197.37,
"learning_rate": 5e-05,
"loss": 0.2258,
"step": 7500
},
{
"epoch": 200.0,
"eval_loss": 3.7265625,
"eval_runtime": 3.7895,
"eval_samples_per_second": 68.083,
"eval_steps_per_second": 8.708,
"step": 7600
},
{
"epoch": 205.26,
"eval_loss": 3.79296875,
"eval_runtime": 3.7901,
"eval_samples_per_second": 68.073,
"eval_steps_per_second": 8.707,
"step": 7800
},
{
"epoch": 210.53,
"learning_rate": 5e-05,
"loss": 0.1893,
"step": 8000
},
{
"epoch": 210.53,
"eval_loss": 3.828125,
"eval_runtime": 3.7891,
"eval_samples_per_second": 68.09,
"eval_steps_per_second": 8.709,
"step": 8000
},
{
"epoch": 215.79,
"eval_loss": 3.880859375,
"eval_runtime": 3.7907,
"eval_samples_per_second": 68.062,
"eval_steps_per_second": 8.706,
"step": 8200
},
{
"epoch": 221.05,
"eval_loss": 3.923828125,
"eval_runtime": 3.7895,
"eval_samples_per_second": 68.082,
"eval_steps_per_second": 8.708,
"step": 8400
},
{
"epoch": 223.68,
"learning_rate": 5e-05,
"loss": 0.1602,
"step": 8500
},
{
"epoch": 226.32,
"eval_loss": 3.974609375,
"eval_runtime": 3.7894,
"eval_samples_per_second": 68.084,
"eval_steps_per_second": 8.708,
"step": 8600
},
{
"epoch": 231.58,
"eval_loss": 4.00390625,
"eval_runtime": 3.7923,
"eval_samples_per_second": 68.032,
"eval_steps_per_second": 8.702,
"step": 8800
},
{
"epoch": 236.84,
"learning_rate": 5e-05,
"loss": 0.137,
"step": 9000
},
{
"epoch": 236.84,
"eval_loss": 4.046875,
"eval_runtime": 3.7922,
"eval_samples_per_second": 68.034,
"eval_steps_per_second": 8.702,
"step": 9000
},
{
"epoch": 242.11,
"eval_loss": 4.07421875,
"eval_runtime": 3.7901,
"eval_samples_per_second": 68.072,
"eval_steps_per_second": 8.707,
"step": 9200
},
{
"epoch": 247.37,
"eval_loss": 4.12109375,
"eval_runtime": 3.7896,
"eval_samples_per_second": 68.08,
"eval_steps_per_second": 8.708,
"step": 9400
},
{
"epoch": 250.0,
"learning_rate": 5e-05,
"loss": 0.1179,
"step": 9500
},
{
"epoch": 252.63,
"eval_loss": 4.15625,
"eval_runtime": 3.7912,
"eval_samples_per_second": 68.053,
"eval_steps_per_second": 8.704,
"step": 9600
},
{
"epoch": 257.89,
"eval_loss": 4.203125,
"eval_runtime": 3.7923,
"eval_samples_per_second": 68.032,
"eval_steps_per_second": 8.702,
"step": 9800
},
{
"epoch": 263.16,
"learning_rate": 5e-05,
"loss": 0.1024,
"step": 10000
},
{
"epoch": 263.16,
"eval_loss": 4.234375,
"eval_runtime": 3.7852,
"eval_samples_per_second": 68.159,
"eval_steps_per_second": 8.718,
"step": 10000
},
{
"epoch": 268.42,
"eval_loss": 4.2734375,
"eval_runtime": 3.7869,
"eval_samples_per_second": 68.129,
"eval_steps_per_second": 8.714,
"step": 10200
},
{
"epoch": 273.68,
"eval_loss": 4.3046875,
"eval_runtime": 3.7892,
"eval_samples_per_second": 68.088,
"eval_steps_per_second": 8.709,
"step": 10400
},
{
"epoch": 276.32,
"learning_rate": 5e-05,
"loss": 0.0901,
"step": 10500
},
{
"epoch": 278.95,
"eval_loss": 4.3125,
"eval_runtime": 3.7869,
"eval_samples_per_second": 68.129,
"eval_steps_per_second": 8.714,
"step": 10600
},
{
"epoch": 284.21,
"eval_loss": 4.375,
"eval_runtime": 3.7872,
"eval_samples_per_second": 68.125,
"eval_steps_per_second": 8.714,
"step": 10800
},
{
"epoch": 289.47,
"learning_rate": 5e-05,
"loss": 0.0796,
"step": 11000
},
{
"epoch": 289.47,
"eval_loss": 4.390625,
"eval_runtime": 3.7843,
"eval_samples_per_second": 68.177,
"eval_steps_per_second": 8.72,
"step": 11000
},
{
"epoch": 294.74,
"eval_loss": 4.4375,
"eval_runtime": 3.7881,
"eval_samples_per_second": 68.107,
"eval_steps_per_second": 8.711,
"step": 11200
},
{
"epoch": 300.0,
"eval_loss": 4.453125,
"eval_runtime": 3.7869,
"eval_samples_per_second": 68.129,
"eval_steps_per_second": 8.714,
"step": 11400
},
{
"epoch": 302.63,
"learning_rate": 5e-05,
"loss": 0.0706,
"step": 11500
},
{
"epoch": 305.26,
"eval_loss": 4.5078125,
"eval_runtime": 3.7854,
"eval_samples_per_second": 68.156,
"eval_steps_per_second": 8.718,
"step": 11600
},
{
"epoch": 310.53,
"eval_loss": 4.515625,
"eval_runtime": 3.787,
"eval_samples_per_second": 68.128,
"eval_steps_per_second": 8.714,
"step": 11800
},
{
"epoch": 315.79,
"learning_rate": 5e-05,
"loss": 0.0631,
"step": 12000
},
{
"epoch": 315.79,
"eval_loss": 4.53515625,
"eval_runtime": 3.7837,
"eval_samples_per_second": 68.187,
"eval_steps_per_second": 8.722,
"step": 12000
},
{
"epoch": 321.05,
"eval_loss": 4.5859375,
"eval_runtime": 3.7869,
"eval_samples_per_second": 68.13,
"eval_steps_per_second": 8.714,
"step": 12200
},
{
"epoch": 326.32,
"eval_loss": 4.609375,
"eval_runtime": 3.788,
"eval_samples_per_second": 68.11,
"eval_steps_per_second": 8.712,
"step": 12400
},
{
"epoch": 328.95,
"learning_rate": 5e-05,
"loss": 0.0573,
"step": 12500
},
{
"epoch": 331.58,
"eval_loss": 4.63671875,
"eval_runtime": 3.7891,
"eval_samples_per_second": 68.09,
"eval_steps_per_second": 8.709,
"step": 12600
},
{
"epoch": 336.84,
"eval_loss": 4.63671875,
"eval_runtime": 3.7855,
"eval_samples_per_second": 68.154,
"eval_steps_per_second": 8.717,
"step": 12800
},
{
"epoch": 342.11,
"learning_rate": 5e-05,
"loss": 0.0521,
"step": 13000
},
{
"epoch": 342.11,
"eval_loss": 4.6640625,
"eval_runtime": 3.7838,
"eval_samples_per_second": 68.185,
"eval_steps_per_second": 8.721,
"step": 13000
},
{
"epoch": 347.37,
"eval_loss": 4.70703125,
"eval_runtime": 3.7834,
"eval_samples_per_second": 68.192,
"eval_steps_per_second": 8.722,
"step": 13200
},
{
"epoch": 352.63,
"eval_loss": 4.69921875,
"eval_runtime": 3.789,
"eval_samples_per_second": 68.092,
"eval_steps_per_second": 8.709,
"step": 13400
},
{
"epoch": 355.26,
"learning_rate": 5e-05,
"loss": 0.0475,
"step": 13500
},
{
"epoch": 357.89,
"eval_loss": 4.75390625,
"eval_runtime": 3.7901,
"eval_samples_per_second": 68.073,
"eval_steps_per_second": 8.707,
"step": 13600
},
{
"epoch": 363.16,
"eval_loss": 4.765625,
"eval_runtime": 3.7877,
"eval_samples_per_second": 68.116,
"eval_steps_per_second": 8.712,
"step": 13800
},
{
"epoch": 368.42,
"learning_rate": 5e-05,
"loss": 0.0437,
"step": 14000
},
{
"epoch": 368.42,
"eval_loss": 4.80078125,
"eval_runtime": 3.7858,
"eval_samples_per_second": 68.15,
"eval_steps_per_second": 8.717,
"step": 14000
},
{
"epoch": 373.68,
"eval_loss": 4.83203125,
"eval_runtime": 3.7888,
"eval_samples_per_second": 68.095,
"eval_steps_per_second": 8.71,
"step": 14200
},
{
"epoch": 378.95,
"eval_loss": 4.8515625,
"eval_runtime": 3.7901,
"eval_samples_per_second": 68.073,
"eval_steps_per_second": 8.707,
"step": 14400
},
{
"epoch": 381.58,
"learning_rate": 5e-05,
"loss": 0.0399,
"step": 14500
},
{
"epoch": 384.21,
"eval_loss": 4.86328125,
"eval_runtime": 3.7938,
"eval_samples_per_second": 68.006,
"eval_steps_per_second": 8.698,
"step": 14600
},
{
"epoch": 389.47,
"eval_loss": 4.89453125,
"eval_runtime": 3.7887,
"eval_samples_per_second": 68.098,
"eval_steps_per_second": 8.71,
"step": 14800
},
{
"epoch": 394.74,
"learning_rate": 5e-05,
"loss": 0.0367,
"step": 15000
},
{
"epoch": 394.74,
"eval_loss": 4.90625,
"eval_runtime": 3.7864,
"eval_samples_per_second": 68.138,
"eval_steps_per_second": 8.715,
"step": 15000
},
{
"epoch": 400.0,
"eval_loss": 4.94140625,
"eval_runtime": 3.791,
"eval_samples_per_second": 68.057,
"eval_steps_per_second": 8.705,
"step": 15200
},
{
"epoch": 400.0,
"step": 15200,
"total_flos": 1.2604727427386573e+17,
"train_loss": 0.4328666927939967,
"train_runtime": 22235.2418,
"train_samples_per_second": 10.848,
"train_steps_per_second": 0.684
}
],
"max_steps": 15200,
"num_train_epochs": 400,
"total_flos": 1.2604727427386573e+17,
"trial_name": null,
"trial_params": null
}