newsSum / checkpoint-1500 /trainer_state.json
Jorkieboe's picture
Upload 40 files
add959d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 109.0909090909091,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.95,
"eval_gen_len": 13.1091,
"eval_loss": 21.40757179260254,
"eval_rouge1": 0.0745,
"eval_rouge2": 0.0159,
"eval_rougeL": 0.0584,
"eval_rougeLsum": 0.0582,
"eval_runtime": 11.5492,
"eval_samples_per_second": 9.524,
"eval_steps_per_second": 1.212,
"step": 13
},
{
"epoch": 1.96,
"eval_gen_len": 12.9636,
"eval_loss": 21.254241943359375,
"eval_rouge1": 0.0726,
"eval_rouge2": 0.0153,
"eval_rougeL": 0.0568,
"eval_rougeLsum": 0.0569,
"eval_runtime": 9.5424,
"eval_samples_per_second": 11.528,
"eval_steps_per_second": 1.467,
"step": 27
},
{
"epoch": 2.98,
"eval_gen_len": 12.9636,
"eval_loss": 21.031164169311523,
"eval_rouge1": 0.0724,
"eval_rouge2": 0.0176,
"eval_rougeL": 0.0568,
"eval_rougeLsum": 0.0569,
"eval_runtime": 9.5313,
"eval_samples_per_second": 11.541,
"eval_steps_per_second": 1.469,
"step": 41
},
{
"epoch": 4.0,
"eval_gen_len": 12.6727,
"eval_loss": 20.743330001831055,
"eval_rouge1": 0.0722,
"eval_rouge2": 0.0175,
"eval_rougeL": 0.055,
"eval_rougeLsum": 0.0551,
"eval_runtime": 9.5638,
"eval_samples_per_second": 11.502,
"eval_steps_per_second": 1.464,
"step": 55
},
{
"epoch": 4.95,
"eval_gen_len": 12.5273,
"eval_loss": 20.430522918701172,
"eval_rouge1": 0.0708,
"eval_rouge2": 0.0177,
"eval_rougeL": 0.0545,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.583,
"eval_samples_per_second": 11.479,
"eval_steps_per_second": 1.461,
"step": 68
},
{
"epoch": 5.96,
"eval_gen_len": 12.6091,
"eval_loss": 20.10591697692871,
"eval_rouge1": 0.0725,
"eval_rouge2": 0.0185,
"eval_rougeL": 0.0559,
"eval_rougeLsum": 0.0559,
"eval_runtime": 9.5892,
"eval_samples_per_second": 11.471,
"eval_steps_per_second": 1.46,
"step": 82
},
{
"epoch": 6.98,
"eval_gen_len": 12.6091,
"eval_loss": 19.850391387939453,
"eval_rouge1": 0.0727,
"eval_rouge2": 0.0169,
"eval_rougeL": 0.0551,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.6928,
"eval_samples_per_second": 11.349,
"eval_steps_per_second": 1.444,
"step": 96
},
{
"epoch": 8.0,
"eval_gen_len": 13.2909,
"eval_loss": 19.628494262695312,
"eval_rouge1": 0.0816,
"eval_rouge2": 0.0228,
"eval_rougeL": 0.062,
"eval_rougeLsum": 0.0622,
"eval_runtime": 9.7244,
"eval_samples_per_second": 11.312,
"eval_steps_per_second": 1.44,
"step": 110
},
{
"epoch": 8.95,
"eval_gen_len": 13.2909,
"eval_loss": 19.41258430480957,
"eval_rouge1": 0.0811,
"eval_rouge2": 0.0214,
"eval_rougeL": 0.0613,
"eval_rougeLsum": 0.0614,
"eval_runtime": 9.5996,
"eval_samples_per_second": 11.459,
"eval_steps_per_second": 1.458,
"step": 123
},
{
"epoch": 9.96,
"eval_gen_len": 13.5182,
"eval_loss": 19.160032272338867,
"eval_rouge1": 0.0818,
"eval_rouge2": 0.0208,
"eval_rougeL": 0.0632,
"eval_rougeLsum": 0.0633,
"eval_runtime": 9.5959,
"eval_samples_per_second": 11.463,
"eval_steps_per_second": 1.459,
"step": 137
},
{
"epoch": 10.98,
"eval_gen_len": 13.8909,
"eval_loss": 18.8905086517334,
"eval_rouge1": 0.09,
"eval_rouge2": 0.024,
"eval_rougeL": 0.0698,
"eval_rougeLsum": 0.0696,
"eval_runtime": 9.6532,
"eval_samples_per_second": 11.395,
"eval_steps_per_second": 1.45,
"step": 151
},
{
"epoch": 12.0,
"eval_gen_len": 14.1818,
"eval_loss": 18.593591690063477,
"eval_rouge1": 0.094,
"eval_rouge2": 0.0324,
"eval_rougeL": 0.0735,
"eval_rougeLsum": 0.0732,
"eval_runtime": 9.6893,
"eval_samples_per_second": 11.353,
"eval_steps_per_second": 1.445,
"step": 165
},
{
"epoch": 12.95,
"eval_gen_len": 14.4364,
"eval_loss": 18.286243438720703,
"eval_rouge1": 0.0928,
"eval_rouge2": 0.0329,
"eval_rougeL": 0.0746,
"eval_rougeLsum": 0.0749,
"eval_runtime": 9.7163,
"eval_samples_per_second": 11.321,
"eval_steps_per_second": 1.441,
"step": 178
},
{
"epoch": 13.96,
"eval_gen_len": 15.0727,
"eval_loss": 17.896913528442383,
"eval_rouge1": 0.096,
"eval_rouge2": 0.0328,
"eval_rougeL": 0.0788,
"eval_rougeLsum": 0.0792,
"eval_runtime": 9.6929,
"eval_samples_per_second": 11.348,
"eval_steps_per_second": 1.444,
"step": 192
},
{
"epoch": 14.98,
"eval_gen_len": 15.7364,
"eval_loss": 17.442358016967773,
"eval_rouge1": 0.1015,
"eval_rouge2": 0.0334,
"eval_rougeL": 0.0816,
"eval_rougeLsum": 0.0818,
"eval_runtime": 9.6767,
"eval_samples_per_second": 11.368,
"eval_steps_per_second": 1.447,
"step": 206
},
{
"epoch": 16.0,
"eval_gen_len": 16.2364,
"eval_loss": 16.91552734375,
"eval_rouge1": 0.1001,
"eval_rouge2": 0.0337,
"eval_rougeL": 0.0811,
"eval_rougeLsum": 0.0814,
"eval_runtime": 9.5645,
"eval_samples_per_second": 11.501,
"eval_steps_per_second": 1.464,
"step": 220
},
{
"epoch": 16.95,
"eval_gen_len": 16.6818,
"eval_loss": 16.372838973999023,
"eval_rouge1": 0.102,
"eval_rouge2": 0.0336,
"eval_rougeL": 0.081,
"eval_rougeLsum": 0.0809,
"eval_runtime": 9.7488,
"eval_samples_per_second": 11.283,
"eval_steps_per_second": 1.436,
"step": 233
},
{
"epoch": 17.96,
"eval_gen_len": 16.5364,
"eval_loss": 15.727865219116211,
"eval_rouge1": 0.0901,
"eval_rouge2": 0.0301,
"eval_rougeL": 0.0739,
"eval_rougeLsum": 0.0741,
"eval_runtime": 9.6417,
"eval_samples_per_second": 11.409,
"eval_steps_per_second": 1.452,
"step": 247
},
{
"epoch": 18.98,
"eval_gen_len": 16.9818,
"eval_loss": 14.980103492736816,
"eval_rouge1": 0.0828,
"eval_rouge2": 0.0258,
"eval_rougeL": 0.0673,
"eval_rougeLsum": 0.0671,
"eval_runtime": 9.6417,
"eval_samples_per_second": 11.409,
"eval_steps_per_second": 1.452,
"step": 261
},
{
"epoch": 20.0,
"eval_gen_len": 16.4727,
"eval_loss": 14.11096477508545,
"eval_rouge1": 0.0687,
"eval_rouge2": 0.0146,
"eval_rougeL": 0.0549,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.556,
"eval_samples_per_second": 11.511,
"eval_steps_per_second": 1.465,
"step": 275
},
{
"epoch": 20.95,
"eval_gen_len": 14.2364,
"eval_loss": 13.337847709655762,
"eval_rouge1": 0.051,
"eval_rouge2": 0.0102,
"eval_rougeL": 0.0418,
"eval_rougeLsum": 0.0414,
"eval_runtime": 9.7157,
"eval_samples_per_second": 11.322,
"eval_steps_per_second": 1.441,
"step": 288
},
{
"epoch": 21.96,
"eval_gen_len": 12.3818,
"eval_loss": 12.585112571716309,
"eval_rouge1": 0.0377,
"eval_rouge2": 0.007,
"eval_rougeL": 0.0317,
"eval_rougeLsum": 0.0317,
"eval_runtime": 9.5457,
"eval_samples_per_second": 11.524,
"eval_steps_per_second": 1.467,
"step": 302
},
{
"epoch": 22.98,
"eval_gen_len": 12.0455,
"eval_loss": 11.860977172851562,
"eval_rouge1": 0.023,
"eval_rouge2": 0.0048,
"eval_rougeL": 0.0204,
"eval_rougeLsum": 0.0204,
"eval_runtime": 9.5524,
"eval_samples_per_second": 11.515,
"eval_steps_per_second": 1.466,
"step": 316
},
{
"epoch": 24.0,
"eval_gen_len": 10.8545,
"eval_loss": 11.155168533325195,
"eval_rouge1": 0.012,
"eval_rouge2": 0.001,
"eval_rougeL": 0.0108,
"eval_rougeLsum": 0.0108,
"eval_runtime": 9.5834,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 330
},
{
"epoch": 24.95,
"eval_gen_len": 10.0273,
"eval_loss": 10.512735366821289,
"eval_rouge1": 0.0037,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0035,
"eval_rougeLsum": 0.0036,
"eval_runtime": 9.613,
"eval_samples_per_second": 11.443,
"eval_steps_per_second": 1.456,
"step": 343
},
{
"epoch": 25.96,
"eval_gen_len": 11.6,
"eval_loss": 9.83348560333252,
"eval_rouge1": 0.0039,
"eval_rouge2": 0.0002,
"eval_rougeL": 0.0038,
"eval_rougeLsum": 0.0039,
"eval_runtime": 9.5253,
"eval_samples_per_second": 11.548,
"eval_steps_per_second": 1.47,
"step": 357
},
{
"epoch": 26.98,
"eval_gen_len": 13.0455,
"eval_loss": 9.216172218322754,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0016,
"eval_rougeLsum": 0.0016,
"eval_runtime": 9.5441,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 371
},
{
"epoch": 28.0,
"eval_gen_len": 14.6818,
"eval_loss": 8.572382926940918,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.6018,
"eval_samples_per_second": 11.456,
"eval_steps_per_second": 1.458,
"step": 385
},
{
"epoch": 28.95,
"eval_gen_len": 15.2727,
"eval_loss": 8.037731170654297,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0009,
"eval_rougeLsum": 0.0009,
"eval_runtime": 9.5646,
"eval_samples_per_second": 11.501,
"eval_steps_per_second": 1.464,
"step": 398
},
{
"epoch": 29.96,
"eval_gen_len": 16.3909,
"eval_loss": 7.415070056915283,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5548,
"eval_samples_per_second": 11.513,
"eval_steps_per_second": 1.465,
"step": 412
},
{
"epoch": 30.98,
"eval_gen_len": 17.8364,
"eval_loss": 6.802865028381348,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5629,
"eval_samples_per_second": 11.503,
"eval_steps_per_second": 1.464,
"step": 426
},
{
"epoch": 32.0,
"eval_gen_len": 18.2818,
"eval_loss": 6.211207866668701,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5163,
"eval_samples_per_second": 11.559,
"eval_steps_per_second": 1.471,
"step": 440
},
{
"epoch": 32.95,
"eval_gen_len": 18.7091,
"eval_loss": 5.717328071594238,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.505,
"eval_samples_per_second": 11.573,
"eval_steps_per_second": 1.473,
"step": 453
},
{
"epoch": 33.96,
"eval_gen_len": 18.7091,
"eval_loss": 5.17288064956665,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5443,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 467
},
{
"epoch": 34.98,
"eval_gen_len": 19.0,
"eval_loss": 4.669548034667969,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4845,
"eval_samples_per_second": 11.598,
"eval_steps_per_second": 1.476,
"step": 481
},
{
"epoch": 36.0,
"eval_gen_len": 19.0,
"eval_loss": 4.216309547424316,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5515,
"eval_samples_per_second": 11.516,
"eval_steps_per_second": 1.466,
"step": 495
},
{
"epoch": 36.36,
"grad_norm": 5.376431465148926,
"learning_rate": 1.660854700854701e-05,
"loss": 14.4939,
"step": 500
},
{
"epoch": 36.95,
"eval_gen_len": 19.0,
"eval_loss": 3.8450570106506348,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5449,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 508
},
{
"epoch": 37.96,
"eval_gen_len": 19.0,
"eval_loss": 3.507812261581421,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5902,
"eval_samples_per_second": 11.47,
"eval_steps_per_second": 1.46,
"step": 522
},
{
"epoch": 38.98,
"eval_gen_len": 19.0,
"eval_loss": 3.231505870819092,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5074,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 536
},
{
"epoch": 40.0,
"eval_gen_len": 19.0,
"eval_loss": 2.9967288970947266,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5195,
"eval_samples_per_second": 11.555,
"eval_steps_per_second": 1.471,
"step": 550
},
{
"epoch": 40.95,
"eval_gen_len": 18.9545,
"eval_loss": 2.8089849948883057,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5224,
"eval_samples_per_second": 11.552,
"eval_steps_per_second": 1.47,
"step": 563
},
{
"epoch": 41.96,
"eval_gen_len": 18.3727,
"eval_loss": 2.6384663581848145,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5129,
"eval_samples_per_second": 11.563,
"eval_steps_per_second": 1.472,
"step": 577
},
{
"epoch": 42.98,
"eval_gen_len": 16.7273,
"eval_loss": 2.492598295211792,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.5174,
"eval_samples_per_second": 11.558,
"eval_steps_per_second": 1.471,
"step": 591
},
{
"epoch": 44.0,
"eval_gen_len": 11.9636,
"eval_loss": 2.3677501678466797,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.5796,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.461,
"step": 605
},
{
"epoch": 44.95,
"eval_gen_len": 9.2455,
"eval_loss": 2.2777955532073975,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6685,
"eval_samples_per_second": 11.377,
"eval_steps_per_second": 1.448,
"step": 618
},
{
"epoch": 45.96,
"eval_gen_len": 7.9455,
"eval_loss": 2.198147773742676,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.585,
"eval_samples_per_second": 11.476,
"eval_steps_per_second": 1.461,
"step": 632
},
{
"epoch": 46.98,
"eval_gen_len": 7.5909,
"eval_loss": 2.1306562423706055,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6008,
"eval_samples_per_second": 11.457,
"eval_steps_per_second": 1.458,
"step": 646
},
{
"epoch": 48.0,
"eval_gen_len": 7.4091,
"eval_loss": 2.0773117542266846,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.6506,
"eval_samples_per_second": 11.398,
"eval_steps_per_second": 1.451,
"step": 660
},
{
"epoch": 48.95,
"eval_gen_len": 7.2909,
"eval_loss": 2.036808729171753,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5097,
"eval_samples_per_second": 11.567,
"eval_steps_per_second": 1.472,
"step": 673
},
{
"epoch": 49.96,
"eval_gen_len": 6.8364,
"eval_loss": 1.9949748516082764,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6031,
"eval_samples_per_second": 11.455,
"eval_steps_per_second": 1.458,
"step": 687
},
{
"epoch": 50.98,
"eval_gen_len": 7.8273,
"eval_loss": 1.957520842552185,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5071,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 701
},
{
"epoch": 52.0,
"eval_gen_len": 7.5545,
"eval_loss": 1.9219788312911987,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5836,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 715
},
{
"epoch": 52.95,
"eval_gen_len": 7.5364,
"eval_loss": 1.8916202783584595,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6019,
"eval_samples_per_second": 11.456,
"eval_steps_per_second": 1.458,
"step": 728
},
{
"epoch": 53.96,
"eval_gen_len": 7.1182,
"eval_loss": 1.8674402236938477,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5024,
"eval_samples_per_second": 11.576,
"eval_steps_per_second": 1.473,
"step": 742
},
{
"epoch": 54.98,
"eval_gen_len": 7.0364,
"eval_loss": 1.846158742904663,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6139,
"eval_samples_per_second": 11.442,
"eval_steps_per_second": 1.456,
"step": 756
},
{
"epoch": 56.0,
"eval_gen_len": 7.0,
"eval_loss": 1.827086329460144,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.591,
"eval_samples_per_second": 11.469,
"eval_steps_per_second": 1.46,
"step": 770
},
{
"epoch": 56.95,
"eval_gen_len": 7.5455,
"eval_loss": 1.8088210821151733,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.572,
"eval_samples_per_second": 11.492,
"eval_steps_per_second": 1.463,
"step": 783
},
{
"epoch": 57.96,
"eval_gen_len": 7.9,
"eval_loss": 1.789602279663086,
"eval_rouge1": 0.0001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0001,
"eval_rougeLsum": 0.0001,
"eval_runtime": 9.5837,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 797
},
{
"epoch": 58.98,
"eval_gen_len": 8.2545,
"eval_loss": 1.7687013149261475,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6152,
"eval_samples_per_second": 11.44,
"eval_steps_per_second": 1.456,
"step": 811
},
{
"epoch": 60.0,
"eval_gen_len": 8.3636,
"eval_loss": 1.7496564388275146,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4902,
"eval_samples_per_second": 11.591,
"eval_steps_per_second": 1.475,
"step": 825
},
{
"epoch": 60.95,
"eval_gen_len": 9.1455,
"eval_loss": 1.7332907915115356,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5867,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 838
},
{
"epoch": 61.96,
"eval_gen_len": 8.9,
"eval_loss": 1.7185932397842407,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4797,
"eval_samples_per_second": 11.604,
"eval_steps_per_second": 1.477,
"step": 852
},
{
"epoch": 62.98,
"eval_gen_len": 9.7545,
"eval_loss": 1.7047526836395264,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.618,
"eval_samples_per_second": 11.437,
"eval_steps_per_second": 1.456,
"step": 866
},
{
"epoch": 64.0,
"eval_gen_len": 9.9818,
"eval_loss": 1.6921414136886597,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6264,
"eval_samples_per_second": 11.427,
"eval_steps_per_second": 1.454,
"step": 880
},
{
"epoch": 64.95,
"eval_gen_len": 9.6909,
"eval_loss": 1.6816327571868896,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5134,
"eval_samples_per_second": 11.563,
"eval_steps_per_second": 1.472,
"step": 893
},
{
"epoch": 65.96,
"eval_gen_len": 8.9545,
"eval_loss": 1.6697723865509033,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5506,
"eval_samples_per_second": 11.518,
"eval_steps_per_second": 1.466,
"step": 907
},
{
"epoch": 66.98,
"eval_gen_len": 9.6818,
"eval_loss": 1.6568113565444946,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5388,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 921
},
{
"epoch": 68.0,
"eval_gen_len": 9.9455,
"eval_loss": 1.6469463109970093,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4825,
"eval_samples_per_second": 11.6,
"eval_steps_per_second": 1.476,
"step": 935
},
{
"epoch": 68.95,
"eval_gen_len": 9.3545,
"eval_loss": 1.6408612728118896,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5212,
"eval_samples_per_second": 11.553,
"eval_steps_per_second": 1.47,
"step": 948
},
{
"epoch": 69.96,
"eval_gen_len": 9.1545,
"eval_loss": 1.6316603422164917,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4986,
"eval_samples_per_second": 11.581,
"eval_steps_per_second": 1.474,
"step": 962
},
{
"epoch": 70.98,
"eval_gen_len": 9.7818,
"eval_loss": 1.623169183731079,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5276,
"eval_samples_per_second": 11.545,
"eval_steps_per_second": 1.469,
"step": 976
},
{
"epoch": 72.0,
"eval_gen_len": 10.0273,
"eval_loss": 1.6152759790420532,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5801,
"eval_samples_per_second": 11.482,
"eval_steps_per_second": 1.461,
"step": 990
},
{
"epoch": 72.73,
"grad_norm": 3.344996213912964,
"learning_rate": 1.3196581196581197e-05,
"loss": 2.6089,
"step": 1000
},
{
"epoch": 72.95,
"eval_gen_len": 9.1727,
"eval_loss": 1.6071548461914062,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.6124,
"eval_samples_per_second": 11.444,
"eval_steps_per_second": 1.456,
"step": 1003
},
{
"epoch": 73.96,
"eval_gen_len": 9.1545,
"eval_loss": 1.599768042564392,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.498,
"eval_samples_per_second": 11.581,
"eval_steps_per_second": 1.474,
"step": 1017
},
{
"epoch": 74.98,
"eval_gen_len": 9.5273,
"eval_loss": 1.5934444665908813,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5664,
"eval_samples_per_second": 11.499,
"eval_steps_per_second": 1.463,
"step": 1031
},
{
"epoch": 76.0,
"eval_gen_len": 10.4091,
"eval_loss": 1.5867650508880615,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.579,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.462,
"step": 1045
},
{
"epoch": 76.95,
"eval_gen_len": 10.3,
"eval_loss": 1.5827070474624634,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.4875,
"eval_samples_per_second": 11.594,
"eval_steps_per_second": 1.476,
"step": 1058
},
{
"epoch": 77.96,
"eval_gen_len": 9.6182,
"eval_loss": 1.5748482942581177,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4782,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1072
},
{
"epoch": 78.98,
"eval_gen_len": 9.6273,
"eval_loss": 1.5662298202514648,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4778,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1086
},
{
"epoch": 80.0,
"eval_gen_len": 9.5273,
"eval_loss": 1.5585095882415771,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.4851,
"eval_samples_per_second": 11.597,
"eval_steps_per_second": 1.476,
"step": 1100
},
{
"epoch": 80.95,
"eval_gen_len": 10.1364,
"eval_loss": 1.5539778470993042,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6048,
"eval_samples_per_second": 11.453,
"eval_steps_per_second": 1.458,
"step": 1113
},
{
"epoch": 81.96,
"eval_gen_len": 9.6182,
"eval_loss": 1.541092038154602,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4829,
"eval_samples_per_second": 11.6,
"eval_steps_per_second": 1.476,
"step": 1127
},
{
"epoch": 82.98,
"eval_gen_len": 9.6091,
"eval_loss": 1.5330486297607422,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5558,
"eval_samples_per_second": 11.511,
"eval_steps_per_second": 1.465,
"step": 1141
},
{
"epoch": 84.0,
"eval_gen_len": 9.0818,
"eval_loss": 1.5241070985794067,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6145,
"eval_samples_per_second": 11.441,
"eval_steps_per_second": 1.456,
"step": 1155
},
{
"epoch": 84.95,
"eval_gen_len": 9.3,
"eval_loss": 1.5192241668701172,
"eval_rouge1": 0.0004,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0004,
"eval_rougeLsum": 0.0004,
"eval_runtime": 9.4781,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1168
},
{
"epoch": 85.96,
"eval_gen_len": 9.5364,
"eval_loss": 1.5117179155349731,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5291,
"eval_samples_per_second": 11.544,
"eval_steps_per_second": 1.469,
"step": 1182
},
{
"epoch": 86.98,
"eval_gen_len": 9.4545,
"eval_loss": 1.4990766048431396,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6213,
"eval_samples_per_second": 11.433,
"eval_steps_per_second": 1.455,
"step": 1196
},
{
"epoch": 88.0,
"eval_gen_len": 9.4182,
"eval_loss": 1.4878661632537842,
"eval_rouge1": 0.0011,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4886,
"eval_samples_per_second": 11.593,
"eval_steps_per_second": 1.475,
"step": 1210
},
{
"epoch": 88.95,
"eval_gen_len": 9.5727,
"eval_loss": 1.479432463645935,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5867,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1223
},
{
"epoch": 89.96,
"eval_gen_len": 9.3909,
"eval_loss": 1.471197247505188,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6139,
"eval_samples_per_second": 11.442,
"eval_steps_per_second": 1.456,
"step": 1237
},
{
"epoch": 90.98,
"eval_gen_len": 9.5,
"eval_loss": 1.4649511575698853,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5947,
"eval_samples_per_second": 11.465,
"eval_steps_per_second": 1.459,
"step": 1251
},
{
"epoch": 92.0,
"eval_gen_len": 9.5273,
"eval_loss": 1.4548052549362183,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0004,
"eval_rougeL": 0.0016,
"eval_rougeLsum": 0.0017,
"eval_runtime": 9.6868,
"eval_samples_per_second": 11.356,
"eval_steps_per_second": 1.445,
"step": 1265
},
{
"epoch": 92.95,
"eval_gen_len": 9.3727,
"eval_loss": 1.444838285446167,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5071,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 1278
},
{
"epoch": 93.96,
"eval_gen_len": 9.6,
"eval_loss": 1.4365838766098022,
"eval_rouge1": 0.0014,
"eval_rouge2": 0.0004,
"eval_rougeL": 0.0014,
"eval_rougeLsum": 0.0015,
"eval_runtime": 9.5507,
"eval_samples_per_second": 11.517,
"eval_steps_per_second": 1.466,
"step": 1292
},
{
"epoch": 94.98,
"eval_gen_len": 9.3364,
"eval_loss": 1.4285393953323364,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6323,
"eval_samples_per_second": 11.42,
"eval_steps_per_second": 1.453,
"step": 1306
},
{
"epoch": 96.0,
"eval_gen_len": 9.3455,
"eval_loss": 1.4242411851882935,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5163,
"eval_samples_per_second": 11.559,
"eval_steps_per_second": 1.471,
"step": 1320
},
{
"epoch": 96.95,
"eval_gen_len": 9.4,
"eval_loss": 1.4160754680633545,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5868,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1333
},
{
"epoch": 97.96,
"eval_gen_len": 9.4455,
"eval_loss": 1.4052343368530273,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6109,
"eval_samples_per_second": 11.445,
"eval_steps_per_second": 1.457,
"step": 1347
},
{
"epoch": 98.98,
"eval_gen_len": 9.5273,
"eval_loss": 1.3928413391113281,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6591,
"eval_samples_per_second": 11.388,
"eval_steps_per_second": 1.449,
"step": 1361
},
{
"epoch": 100.0,
"eval_gen_len": 9.5182,
"eval_loss": 1.3840864896774292,
"eval_rouge1": 0.0011,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0011,
"eval_rougeLsum": 0.0011,
"eval_runtime": 9.6661,
"eval_samples_per_second": 11.38,
"eval_steps_per_second": 1.448,
"step": 1375
},
{
"epoch": 100.95,
"eval_gen_len": 9.3,
"eval_loss": 1.381872296333313,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0001,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6513,
"eval_samples_per_second": 11.397,
"eval_steps_per_second": 1.451,
"step": 1388
},
{
"epoch": 101.96,
"eval_gen_len": 9.3455,
"eval_loss": 1.37588632106781,
"eval_rouge1": 0.0004,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0004,
"eval_rougeLsum": 0.0004,
"eval_runtime": 9.5607,
"eval_samples_per_second": 11.505,
"eval_steps_per_second": 1.464,
"step": 1402
},
{
"epoch": 102.98,
"eval_gen_len": 9.2636,
"eval_loss": 1.3675447702407837,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5449,
"eval_samples_per_second": 11.524,
"eval_steps_per_second": 1.467,
"step": 1416
},
{
"epoch": 104.0,
"eval_gen_len": 9.2455,
"eval_loss": 1.358955979347229,
"eval_rouge1": 0.0012,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0012,
"eval_rougeLsum": 0.0012,
"eval_runtime": 9.5868,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1430
},
{
"epoch": 104.95,
"eval_gen_len": 9.1455,
"eval_loss": 1.3501193523406982,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.6684,
"eval_samples_per_second": 11.377,
"eval_steps_per_second": 1.448,
"step": 1443
},
{
"epoch": 105.96,
"eval_gen_len": 9.1636,
"eval_loss": 1.344258189201355,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5388,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 1457
},
{
"epoch": 106.98,
"eval_gen_len": 9.8,
"eval_loss": 1.3355817794799805,
"eval_rouge1": 0.001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5263,
"eval_samples_per_second": 11.547,
"eval_steps_per_second": 1.47,
"step": 1471
},
{
"epoch": 108.0,
"eval_gen_len": 9.9182,
"eval_loss": 1.3305474519729614,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.6315,
"eval_samples_per_second": 11.421,
"eval_steps_per_second": 1.454,
"step": 1485
},
{
"epoch": 108.95,
"eval_gen_len": 10.1636,
"eval_loss": 1.3212946653366089,
"eval_rouge1": 0.0028,
"eval_rouge2": 0.001,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0026,
"eval_runtime": 9.5797,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.461,
"step": 1498
},
{
"epoch": 109.09,
"grad_norm": 5.480063438415527,
"learning_rate": 9.784615384615387e-06,
"loss": 1.7753,
"step": 1500
}
],
"logging_steps": 500,
"max_steps": 2925,
"num_input_tokens_seen": 0,
"num_train_epochs": 225,
"save_steps": 500,
"total_flos": 5.805822083648717e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}