newsSum / checkpoint-2500 /trainer_state.json
Jorkieboe's picture
Upload 40 files
add959d verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 181.8181818181818,
"eval_steps": 500,
"global_step": 2500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.95,
"eval_gen_len": 13.1091,
"eval_loss": 21.40757179260254,
"eval_rouge1": 0.0745,
"eval_rouge2": 0.0159,
"eval_rougeL": 0.0584,
"eval_rougeLsum": 0.0582,
"eval_runtime": 11.5492,
"eval_samples_per_second": 9.524,
"eval_steps_per_second": 1.212,
"step": 13
},
{
"epoch": 1.96,
"eval_gen_len": 12.9636,
"eval_loss": 21.254241943359375,
"eval_rouge1": 0.0726,
"eval_rouge2": 0.0153,
"eval_rougeL": 0.0568,
"eval_rougeLsum": 0.0569,
"eval_runtime": 9.5424,
"eval_samples_per_second": 11.528,
"eval_steps_per_second": 1.467,
"step": 27
},
{
"epoch": 2.98,
"eval_gen_len": 12.9636,
"eval_loss": 21.031164169311523,
"eval_rouge1": 0.0724,
"eval_rouge2": 0.0176,
"eval_rougeL": 0.0568,
"eval_rougeLsum": 0.0569,
"eval_runtime": 9.5313,
"eval_samples_per_second": 11.541,
"eval_steps_per_second": 1.469,
"step": 41
},
{
"epoch": 4.0,
"eval_gen_len": 12.6727,
"eval_loss": 20.743330001831055,
"eval_rouge1": 0.0722,
"eval_rouge2": 0.0175,
"eval_rougeL": 0.055,
"eval_rougeLsum": 0.0551,
"eval_runtime": 9.5638,
"eval_samples_per_second": 11.502,
"eval_steps_per_second": 1.464,
"step": 55
},
{
"epoch": 4.95,
"eval_gen_len": 12.5273,
"eval_loss": 20.430522918701172,
"eval_rouge1": 0.0708,
"eval_rouge2": 0.0177,
"eval_rougeL": 0.0545,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.583,
"eval_samples_per_second": 11.479,
"eval_steps_per_second": 1.461,
"step": 68
},
{
"epoch": 5.96,
"eval_gen_len": 12.6091,
"eval_loss": 20.10591697692871,
"eval_rouge1": 0.0725,
"eval_rouge2": 0.0185,
"eval_rougeL": 0.0559,
"eval_rougeLsum": 0.0559,
"eval_runtime": 9.5892,
"eval_samples_per_second": 11.471,
"eval_steps_per_second": 1.46,
"step": 82
},
{
"epoch": 6.98,
"eval_gen_len": 12.6091,
"eval_loss": 19.850391387939453,
"eval_rouge1": 0.0727,
"eval_rouge2": 0.0169,
"eval_rougeL": 0.0551,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.6928,
"eval_samples_per_second": 11.349,
"eval_steps_per_second": 1.444,
"step": 96
},
{
"epoch": 8.0,
"eval_gen_len": 13.2909,
"eval_loss": 19.628494262695312,
"eval_rouge1": 0.0816,
"eval_rouge2": 0.0228,
"eval_rougeL": 0.062,
"eval_rougeLsum": 0.0622,
"eval_runtime": 9.7244,
"eval_samples_per_second": 11.312,
"eval_steps_per_second": 1.44,
"step": 110
},
{
"epoch": 8.95,
"eval_gen_len": 13.2909,
"eval_loss": 19.41258430480957,
"eval_rouge1": 0.0811,
"eval_rouge2": 0.0214,
"eval_rougeL": 0.0613,
"eval_rougeLsum": 0.0614,
"eval_runtime": 9.5996,
"eval_samples_per_second": 11.459,
"eval_steps_per_second": 1.458,
"step": 123
},
{
"epoch": 9.96,
"eval_gen_len": 13.5182,
"eval_loss": 19.160032272338867,
"eval_rouge1": 0.0818,
"eval_rouge2": 0.0208,
"eval_rougeL": 0.0632,
"eval_rougeLsum": 0.0633,
"eval_runtime": 9.5959,
"eval_samples_per_second": 11.463,
"eval_steps_per_second": 1.459,
"step": 137
},
{
"epoch": 10.98,
"eval_gen_len": 13.8909,
"eval_loss": 18.8905086517334,
"eval_rouge1": 0.09,
"eval_rouge2": 0.024,
"eval_rougeL": 0.0698,
"eval_rougeLsum": 0.0696,
"eval_runtime": 9.6532,
"eval_samples_per_second": 11.395,
"eval_steps_per_second": 1.45,
"step": 151
},
{
"epoch": 12.0,
"eval_gen_len": 14.1818,
"eval_loss": 18.593591690063477,
"eval_rouge1": 0.094,
"eval_rouge2": 0.0324,
"eval_rougeL": 0.0735,
"eval_rougeLsum": 0.0732,
"eval_runtime": 9.6893,
"eval_samples_per_second": 11.353,
"eval_steps_per_second": 1.445,
"step": 165
},
{
"epoch": 12.95,
"eval_gen_len": 14.4364,
"eval_loss": 18.286243438720703,
"eval_rouge1": 0.0928,
"eval_rouge2": 0.0329,
"eval_rougeL": 0.0746,
"eval_rougeLsum": 0.0749,
"eval_runtime": 9.7163,
"eval_samples_per_second": 11.321,
"eval_steps_per_second": 1.441,
"step": 178
},
{
"epoch": 13.96,
"eval_gen_len": 15.0727,
"eval_loss": 17.896913528442383,
"eval_rouge1": 0.096,
"eval_rouge2": 0.0328,
"eval_rougeL": 0.0788,
"eval_rougeLsum": 0.0792,
"eval_runtime": 9.6929,
"eval_samples_per_second": 11.348,
"eval_steps_per_second": 1.444,
"step": 192
},
{
"epoch": 14.98,
"eval_gen_len": 15.7364,
"eval_loss": 17.442358016967773,
"eval_rouge1": 0.1015,
"eval_rouge2": 0.0334,
"eval_rougeL": 0.0816,
"eval_rougeLsum": 0.0818,
"eval_runtime": 9.6767,
"eval_samples_per_second": 11.368,
"eval_steps_per_second": 1.447,
"step": 206
},
{
"epoch": 16.0,
"eval_gen_len": 16.2364,
"eval_loss": 16.91552734375,
"eval_rouge1": 0.1001,
"eval_rouge2": 0.0337,
"eval_rougeL": 0.0811,
"eval_rougeLsum": 0.0814,
"eval_runtime": 9.5645,
"eval_samples_per_second": 11.501,
"eval_steps_per_second": 1.464,
"step": 220
},
{
"epoch": 16.95,
"eval_gen_len": 16.6818,
"eval_loss": 16.372838973999023,
"eval_rouge1": 0.102,
"eval_rouge2": 0.0336,
"eval_rougeL": 0.081,
"eval_rougeLsum": 0.0809,
"eval_runtime": 9.7488,
"eval_samples_per_second": 11.283,
"eval_steps_per_second": 1.436,
"step": 233
},
{
"epoch": 17.96,
"eval_gen_len": 16.5364,
"eval_loss": 15.727865219116211,
"eval_rouge1": 0.0901,
"eval_rouge2": 0.0301,
"eval_rougeL": 0.0739,
"eval_rougeLsum": 0.0741,
"eval_runtime": 9.6417,
"eval_samples_per_second": 11.409,
"eval_steps_per_second": 1.452,
"step": 247
},
{
"epoch": 18.98,
"eval_gen_len": 16.9818,
"eval_loss": 14.980103492736816,
"eval_rouge1": 0.0828,
"eval_rouge2": 0.0258,
"eval_rougeL": 0.0673,
"eval_rougeLsum": 0.0671,
"eval_runtime": 9.6417,
"eval_samples_per_second": 11.409,
"eval_steps_per_second": 1.452,
"step": 261
},
{
"epoch": 20.0,
"eval_gen_len": 16.4727,
"eval_loss": 14.11096477508545,
"eval_rouge1": 0.0687,
"eval_rouge2": 0.0146,
"eval_rougeL": 0.0549,
"eval_rougeLsum": 0.0547,
"eval_runtime": 9.556,
"eval_samples_per_second": 11.511,
"eval_steps_per_second": 1.465,
"step": 275
},
{
"epoch": 20.95,
"eval_gen_len": 14.2364,
"eval_loss": 13.337847709655762,
"eval_rouge1": 0.051,
"eval_rouge2": 0.0102,
"eval_rougeL": 0.0418,
"eval_rougeLsum": 0.0414,
"eval_runtime": 9.7157,
"eval_samples_per_second": 11.322,
"eval_steps_per_second": 1.441,
"step": 288
},
{
"epoch": 21.96,
"eval_gen_len": 12.3818,
"eval_loss": 12.585112571716309,
"eval_rouge1": 0.0377,
"eval_rouge2": 0.007,
"eval_rougeL": 0.0317,
"eval_rougeLsum": 0.0317,
"eval_runtime": 9.5457,
"eval_samples_per_second": 11.524,
"eval_steps_per_second": 1.467,
"step": 302
},
{
"epoch": 22.98,
"eval_gen_len": 12.0455,
"eval_loss": 11.860977172851562,
"eval_rouge1": 0.023,
"eval_rouge2": 0.0048,
"eval_rougeL": 0.0204,
"eval_rougeLsum": 0.0204,
"eval_runtime": 9.5524,
"eval_samples_per_second": 11.515,
"eval_steps_per_second": 1.466,
"step": 316
},
{
"epoch": 24.0,
"eval_gen_len": 10.8545,
"eval_loss": 11.155168533325195,
"eval_rouge1": 0.012,
"eval_rouge2": 0.001,
"eval_rougeL": 0.0108,
"eval_rougeLsum": 0.0108,
"eval_runtime": 9.5834,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 330
},
{
"epoch": 24.95,
"eval_gen_len": 10.0273,
"eval_loss": 10.512735366821289,
"eval_rouge1": 0.0037,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0035,
"eval_rougeLsum": 0.0036,
"eval_runtime": 9.613,
"eval_samples_per_second": 11.443,
"eval_steps_per_second": 1.456,
"step": 343
},
{
"epoch": 25.96,
"eval_gen_len": 11.6,
"eval_loss": 9.83348560333252,
"eval_rouge1": 0.0039,
"eval_rouge2": 0.0002,
"eval_rougeL": 0.0038,
"eval_rougeLsum": 0.0039,
"eval_runtime": 9.5253,
"eval_samples_per_second": 11.548,
"eval_steps_per_second": 1.47,
"step": 357
},
{
"epoch": 26.98,
"eval_gen_len": 13.0455,
"eval_loss": 9.216172218322754,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0016,
"eval_rougeLsum": 0.0016,
"eval_runtime": 9.5441,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 371
},
{
"epoch": 28.0,
"eval_gen_len": 14.6818,
"eval_loss": 8.572382926940918,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.6018,
"eval_samples_per_second": 11.456,
"eval_steps_per_second": 1.458,
"step": 385
},
{
"epoch": 28.95,
"eval_gen_len": 15.2727,
"eval_loss": 8.037731170654297,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0009,
"eval_rougeLsum": 0.0009,
"eval_runtime": 9.5646,
"eval_samples_per_second": 11.501,
"eval_steps_per_second": 1.464,
"step": 398
},
{
"epoch": 29.96,
"eval_gen_len": 16.3909,
"eval_loss": 7.415070056915283,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5548,
"eval_samples_per_second": 11.513,
"eval_steps_per_second": 1.465,
"step": 412
},
{
"epoch": 30.98,
"eval_gen_len": 17.8364,
"eval_loss": 6.802865028381348,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5629,
"eval_samples_per_second": 11.503,
"eval_steps_per_second": 1.464,
"step": 426
},
{
"epoch": 32.0,
"eval_gen_len": 18.2818,
"eval_loss": 6.211207866668701,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5163,
"eval_samples_per_second": 11.559,
"eval_steps_per_second": 1.471,
"step": 440
},
{
"epoch": 32.95,
"eval_gen_len": 18.7091,
"eval_loss": 5.717328071594238,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.505,
"eval_samples_per_second": 11.573,
"eval_steps_per_second": 1.473,
"step": 453
},
{
"epoch": 33.96,
"eval_gen_len": 18.7091,
"eval_loss": 5.17288064956665,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5443,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 467
},
{
"epoch": 34.98,
"eval_gen_len": 19.0,
"eval_loss": 4.669548034667969,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4845,
"eval_samples_per_second": 11.598,
"eval_steps_per_second": 1.476,
"step": 481
},
{
"epoch": 36.0,
"eval_gen_len": 19.0,
"eval_loss": 4.216309547424316,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5515,
"eval_samples_per_second": 11.516,
"eval_steps_per_second": 1.466,
"step": 495
},
{
"epoch": 36.36,
"grad_norm": 5.376431465148926,
"learning_rate": 1.660854700854701e-05,
"loss": 14.4939,
"step": 500
},
{
"epoch": 36.95,
"eval_gen_len": 19.0,
"eval_loss": 3.8450570106506348,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5449,
"eval_samples_per_second": 11.525,
"eval_steps_per_second": 1.467,
"step": 508
},
{
"epoch": 37.96,
"eval_gen_len": 19.0,
"eval_loss": 3.507812261581421,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5902,
"eval_samples_per_second": 11.47,
"eval_steps_per_second": 1.46,
"step": 522
},
{
"epoch": 38.98,
"eval_gen_len": 19.0,
"eval_loss": 3.231505870819092,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5074,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 536
},
{
"epoch": 40.0,
"eval_gen_len": 19.0,
"eval_loss": 2.9967288970947266,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5195,
"eval_samples_per_second": 11.555,
"eval_steps_per_second": 1.471,
"step": 550
},
{
"epoch": 40.95,
"eval_gen_len": 18.9545,
"eval_loss": 2.8089849948883057,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5224,
"eval_samples_per_second": 11.552,
"eval_steps_per_second": 1.47,
"step": 563
},
{
"epoch": 41.96,
"eval_gen_len": 18.3727,
"eval_loss": 2.6384663581848145,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5129,
"eval_samples_per_second": 11.563,
"eval_steps_per_second": 1.472,
"step": 577
},
{
"epoch": 42.98,
"eval_gen_len": 16.7273,
"eval_loss": 2.492598295211792,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.5174,
"eval_samples_per_second": 11.558,
"eval_steps_per_second": 1.471,
"step": 591
},
{
"epoch": 44.0,
"eval_gen_len": 11.9636,
"eval_loss": 2.3677501678466797,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.5796,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.461,
"step": 605
},
{
"epoch": 44.95,
"eval_gen_len": 9.2455,
"eval_loss": 2.2777955532073975,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6685,
"eval_samples_per_second": 11.377,
"eval_steps_per_second": 1.448,
"step": 618
},
{
"epoch": 45.96,
"eval_gen_len": 7.9455,
"eval_loss": 2.198147773742676,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.585,
"eval_samples_per_second": 11.476,
"eval_steps_per_second": 1.461,
"step": 632
},
{
"epoch": 46.98,
"eval_gen_len": 7.5909,
"eval_loss": 2.1306562423706055,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6008,
"eval_samples_per_second": 11.457,
"eval_steps_per_second": 1.458,
"step": 646
},
{
"epoch": 48.0,
"eval_gen_len": 7.4091,
"eval_loss": 2.0773117542266846,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.6506,
"eval_samples_per_second": 11.398,
"eval_steps_per_second": 1.451,
"step": 660
},
{
"epoch": 48.95,
"eval_gen_len": 7.2909,
"eval_loss": 2.036808729171753,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5097,
"eval_samples_per_second": 11.567,
"eval_steps_per_second": 1.472,
"step": 673
},
{
"epoch": 49.96,
"eval_gen_len": 6.8364,
"eval_loss": 1.9949748516082764,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6031,
"eval_samples_per_second": 11.455,
"eval_steps_per_second": 1.458,
"step": 687
},
{
"epoch": 50.98,
"eval_gen_len": 7.8273,
"eval_loss": 1.957520842552185,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5071,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 701
},
{
"epoch": 52.0,
"eval_gen_len": 7.5545,
"eval_loss": 1.9219788312911987,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5836,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 715
},
{
"epoch": 52.95,
"eval_gen_len": 7.5364,
"eval_loss": 1.8916202783584595,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6019,
"eval_samples_per_second": 11.456,
"eval_steps_per_second": 1.458,
"step": 728
},
{
"epoch": 53.96,
"eval_gen_len": 7.1182,
"eval_loss": 1.8674402236938477,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5024,
"eval_samples_per_second": 11.576,
"eval_steps_per_second": 1.473,
"step": 742
},
{
"epoch": 54.98,
"eval_gen_len": 7.0364,
"eval_loss": 1.846158742904663,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6139,
"eval_samples_per_second": 11.442,
"eval_steps_per_second": 1.456,
"step": 756
},
{
"epoch": 56.0,
"eval_gen_len": 7.0,
"eval_loss": 1.827086329460144,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.591,
"eval_samples_per_second": 11.469,
"eval_steps_per_second": 1.46,
"step": 770
},
{
"epoch": 56.95,
"eval_gen_len": 7.5455,
"eval_loss": 1.8088210821151733,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.572,
"eval_samples_per_second": 11.492,
"eval_steps_per_second": 1.463,
"step": 783
},
{
"epoch": 57.96,
"eval_gen_len": 7.9,
"eval_loss": 1.789602279663086,
"eval_rouge1": 0.0001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0001,
"eval_rougeLsum": 0.0001,
"eval_runtime": 9.5837,
"eval_samples_per_second": 11.478,
"eval_steps_per_second": 1.461,
"step": 797
},
{
"epoch": 58.98,
"eval_gen_len": 8.2545,
"eval_loss": 1.7687013149261475,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6152,
"eval_samples_per_second": 11.44,
"eval_steps_per_second": 1.456,
"step": 811
},
{
"epoch": 60.0,
"eval_gen_len": 8.3636,
"eval_loss": 1.7496564388275146,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4902,
"eval_samples_per_second": 11.591,
"eval_steps_per_second": 1.475,
"step": 825
},
{
"epoch": 60.95,
"eval_gen_len": 9.1455,
"eval_loss": 1.7332907915115356,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5867,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 838
},
{
"epoch": 61.96,
"eval_gen_len": 8.9,
"eval_loss": 1.7185932397842407,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4797,
"eval_samples_per_second": 11.604,
"eval_steps_per_second": 1.477,
"step": 852
},
{
"epoch": 62.98,
"eval_gen_len": 9.7545,
"eval_loss": 1.7047526836395264,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.618,
"eval_samples_per_second": 11.437,
"eval_steps_per_second": 1.456,
"step": 866
},
{
"epoch": 64.0,
"eval_gen_len": 9.9818,
"eval_loss": 1.6921414136886597,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6264,
"eval_samples_per_second": 11.427,
"eval_steps_per_second": 1.454,
"step": 880
},
{
"epoch": 64.95,
"eval_gen_len": 9.6909,
"eval_loss": 1.6816327571868896,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5134,
"eval_samples_per_second": 11.563,
"eval_steps_per_second": 1.472,
"step": 893
},
{
"epoch": 65.96,
"eval_gen_len": 8.9545,
"eval_loss": 1.6697723865509033,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5506,
"eval_samples_per_second": 11.518,
"eval_steps_per_second": 1.466,
"step": 907
},
{
"epoch": 66.98,
"eval_gen_len": 9.6818,
"eval_loss": 1.6568113565444946,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5388,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 921
},
{
"epoch": 68.0,
"eval_gen_len": 9.9455,
"eval_loss": 1.6469463109970093,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4825,
"eval_samples_per_second": 11.6,
"eval_steps_per_second": 1.476,
"step": 935
},
{
"epoch": 68.95,
"eval_gen_len": 9.3545,
"eval_loss": 1.6408612728118896,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5212,
"eval_samples_per_second": 11.553,
"eval_steps_per_second": 1.47,
"step": 948
},
{
"epoch": 69.96,
"eval_gen_len": 9.1545,
"eval_loss": 1.6316603422164917,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4986,
"eval_samples_per_second": 11.581,
"eval_steps_per_second": 1.474,
"step": 962
},
{
"epoch": 70.98,
"eval_gen_len": 9.7818,
"eval_loss": 1.623169183731079,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5276,
"eval_samples_per_second": 11.545,
"eval_steps_per_second": 1.469,
"step": 976
},
{
"epoch": 72.0,
"eval_gen_len": 10.0273,
"eval_loss": 1.6152759790420532,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5801,
"eval_samples_per_second": 11.482,
"eval_steps_per_second": 1.461,
"step": 990
},
{
"epoch": 72.73,
"grad_norm": 3.344996213912964,
"learning_rate": 1.3196581196581197e-05,
"loss": 2.6089,
"step": 1000
},
{
"epoch": 72.95,
"eval_gen_len": 9.1727,
"eval_loss": 1.6071548461914062,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.6124,
"eval_samples_per_second": 11.444,
"eval_steps_per_second": 1.456,
"step": 1003
},
{
"epoch": 73.96,
"eval_gen_len": 9.1545,
"eval_loss": 1.599768042564392,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.498,
"eval_samples_per_second": 11.581,
"eval_steps_per_second": 1.474,
"step": 1017
},
{
"epoch": 74.98,
"eval_gen_len": 9.5273,
"eval_loss": 1.5934444665908813,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5664,
"eval_samples_per_second": 11.499,
"eval_steps_per_second": 1.463,
"step": 1031
},
{
"epoch": 76.0,
"eval_gen_len": 10.4091,
"eval_loss": 1.5867650508880615,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.579,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.462,
"step": 1045
},
{
"epoch": 76.95,
"eval_gen_len": 10.3,
"eval_loss": 1.5827070474624634,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.4875,
"eval_samples_per_second": 11.594,
"eval_steps_per_second": 1.476,
"step": 1058
},
{
"epoch": 77.96,
"eval_gen_len": 9.6182,
"eval_loss": 1.5748482942581177,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4782,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1072
},
{
"epoch": 78.98,
"eval_gen_len": 9.6273,
"eval_loss": 1.5662298202514648,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4778,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1086
},
{
"epoch": 80.0,
"eval_gen_len": 9.5273,
"eval_loss": 1.5585095882415771,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 9.4851,
"eval_samples_per_second": 11.597,
"eval_steps_per_second": 1.476,
"step": 1100
},
{
"epoch": 80.95,
"eval_gen_len": 10.1364,
"eval_loss": 1.5539778470993042,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6048,
"eval_samples_per_second": 11.453,
"eval_steps_per_second": 1.458,
"step": 1113
},
{
"epoch": 81.96,
"eval_gen_len": 9.6182,
"eval_loss": 1.541092038154602,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.4829,
"eval_samples_per_second": 11.6,
"eval_steps_per_second": 1.476,
"step": 1127
},
{
"epoch": 82.98,
"eval_gen_len": 9.6091,
"eval_loss": 1.5330486297607422,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5558,
"eval_samples_per_second": 11.511,
"eval_steps_per_second": 1.465,
"step": 1141
},
{
"epoch": 84.0,
"eval_gen_len": 9.0818,
"eval_loss": 1.5241070985794067,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6145,
"eval_samples_per_second": 11.441,
"eval_steps_per_second": 1.456,
"step": 1155
},
{
"epoch": 84.95,
"eval_gen_len": 9.3,
"eval_loss": 1.5192241668701172,
"eval_rouge1": 0.0004,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0004,
"eval_rougeLsum": 0.0004,
"eval_runtime": 9.4781,
"eval_samples_per_second": 11.606,
"eval_steps_per_second": 1.477,
"step": 1168
},
{
"epoch": 85.96,
"eval_gen_len": 9.5364,
"eval_loss": 1.5117179155349731,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5291,
"eval_samples_per_second": 11.544,
"eval_steps_per_second": 1.469,
"step": 1182
},
{
"epoch": 86.98,
"eval_gen_len": 9.4545,
"eval_loss": 1.4990766048431396,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6213,
"eval_samples_per_second": 11.433,
"eval_steps_per_second": 1.455,
"step": 1196
},
{
"epoch": 88.0,
"eval_gen_len": 9.4182,
"eval_loss": 1.4878661632537842,
"eval_rouge1": 0.0011,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.4886,
"eval_samples_per_second": 11.593,
"eval_steps_per_second": 1.475,
"step": 1210
},
{
"epoch": 88.95,
"eval_gen_len": 9.5727,
"eval_loss": 1.479432463645935,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5867,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1223
},
{
"epoch": 89.96,
"eval_gen_len": 9.3909,
"eval_loss": 1.471197247505188,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6139,
"eval_samples_per_second": 11.442,
"eval_steps_per_second": 1.456,
"step": 1237
},
{
"epoch": 90.98,
"eval_gen_len": 9.5,
"eval_loss": 1.4649511575698853,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 9.5947,
"eval_samples_per_second": 11.465,
"eval_steps_per_second": 1.459,
"step": 1251
},
{
"epoch": 92.0,
"eval_gen_len": 9.5273,
"eval_loss": 1.4548052549362183,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0004,
"eval_rougeL": 0.0016,
"eval_rougeLsum": 0.0017,
"eval_runtime": 9.6868,
"eval_samples_per_second": 11.356,
"eval_steps_per_second": 1.445,
"step": 1265
},
{
"epoch": 92.95,
"eval_gen_len": 9.3727,
"eval_loss": 1.444838285446167,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5071,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 1278
},
{
"epoch": 93.96,
"eval_gen_len": 9.6,
"eval_loss": 1.4365838766098022,
"eval_rouge1": 0.0014,
"eval_rouge2": 0.0004,
"eval_rougeL": 0.0014,
"eval_rougeLsum": 0.0015,
"eval_runtime": 9.5507,
"eval_samples_per_second": 11.517,
"eval_steps_per_second": 1.466,
"step": 1292
},
{
"epoch": 94.98,
"eval_gen_len": 9.3364,
"eval_loss": 1.4285393953323364,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6323,
"eval_samples_per_second": 11.42,
"eval_steps_per_second": 1.453,
"step": 1306
},
{
"epoch": 96.0,
"eval_gen_len": 9.3455,
"eval_loss": 1.4242411851882935,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5163,
"eval_samples_per_second": 11.559,
"eval_steps_per_second": 1.471,
"step": 1320
},
{
"epoch": 96.95,
"eval_gen_len": 9.4,
"eval_loss": 1.4160754680633545,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5868,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1333
},
{
"epoch": 97.96,
"eval_gen_len": 9.4455,
"eval_loss": 1.4052343368530273,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6109,
"eval_samples_per_second": 11.445,
"eval_steps_per_second": 1.457,
"step": 1347
},
{
"epoch": 98.98,
"eval_gen_len": 9.5273,
"eval_loss": 1.3928413391113281,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.6591,
"eval_samples_per_second": 11.388,
"eval_steps_per_second": 1.449,
"step": 1361
},
{
"epoch": 100.0,
"eval_gen_len": 9.5182,
"eval_loss": 1.3840864896774292,
"eval_rouge1": 0.0011,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0011,
"eval_rougeLsum": 0.0011,
"eval_runtime": 9.6661,
"eval_samples_per_second": 11.38,
"eval_steps_per_second": 1.448,
"step": 1375
},
{
"epoch": 100.95,
"eval_gen_len": 9.3,
"eval_loss": 1.381872296333313,
"eval_rouge1": 0.0006,
"eval_rouge2": 0.0001,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.6513,
"eval_samples_per_second": 11.397,
"eval_steps_per_second": 1.451,
"step": 1388
},
{
"epoch": 101.96,
"eval_gen_len": 9.3455,
"eval_loss": 1.37588632106781,
"eval_rouge1": 0.0004,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0004,
"eval_rougeLsum": 0.0004,
"eval_runtime": 9.5607,
"eval_samples_per_second": 11.505,
"eval_steps_per_second": 1.464,
"step": 1402
},
{
"epoch": 102.98,
"eval_gen_len": 9.2636,
"eval_loss": 1.3675447702407837,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.5449,
"eval_samples_per_second": 11.524,
"eval_steps_per_second": 1.467,
"step": 1416
},
{
"epoch": 104.0,
"eval_gen_len": 9.2455,
"eval_loss": 1.358955979347229,
"eval_rouge1": 0.0012,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0012,
"eval_rougeLsum": 0.0012,
"eval_runtime": 9.5868,
"eval_samples_per_second": 11.474,
"eval_steps_per_second": 1.46,
"step": 1430
},
{
"epoch": 104.95,
"eval_gen_len": 9.1455,
"eval_loss": 1.3501193523406982,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 9.6684,
"eval_samples_per_second": 11.377,
"eval_steps_per_second": 1.448,
"step": 1443
},
{
"epoch": 105.96,
"eval_gen_len": 9.1636,
"eval_loss": 1.344258189201355,
"eval_rouge1": 0.0007,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0007,
"eval_rougeLsum": 0.0007,
"eval_runtime": 9.5388,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 1457
},
{
"epoch": 106.98,
"eval_gen_len": 9.8,
"eval_loss": 1.3355817794799805,
"eval_rouge1": 0.001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.5263,
"eval_samples_per_second": 11.547,
"eval_steps_per_second": 1.47,
"step": 1471
},
{
"epoch": 108.0,
"eval_gen_len": 9.9182,
"eval_loss": 1.3305474519729614,
"eval_rouge1": 0.0008,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 9.6315,
"eval_samples_per_second": 11.421,
"eval_steps_per_second": 1.454,
"step": 1485
},
{
"epoch": 108.95,
"eval_gen_len": 10.1636,
"eval_loss": 1.3212946653366089,
"eval_rouge1": 0.0028,
"eval_rouge2": 0.001,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0026,
"eval_runtime": 9.5797,
"eval_samples_per_second": 11.483,
"eval_steps_per_second": 1.461,
"step": 1498
},
{
"epoch": 109.09,
"grad_norm": 5.480063438415527,
"learning_rate": 9.784615384615387e-06,
"loss": 1.7753,
"step": 1500
},
{
"epoch": 109.96,
"eval_gen_len": 9.9091,
"eval_loss": 1.31065833568573,
"eval_rouge1": 0.0019,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0015,
"eval_rougeLsum": 0.0016,
"eval_runtime": 9.57,
"eval_samples_per_second": 11.494,
"eval_steps_per_second": 1.463,
"step": 1512
},
{
"epoch": 110.98,
"eval_gen_len": 10.2,
"eval_loss": 1.301637053489685,
"eval_rouge1": 0.0015,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0013,
"eval_rougeLsum": 0.0013,
"eval_runtime": 9.5009,
"eval_samples_per_second": 11.578,
"eval_steps_per_second": 1.474,
"step": 1526
},
{
"epoch": 112.0,
"eval_gen_len": 9.7091,
"eval_loss": 1.2922732830047607,
"eval_rouge1": 0.0014,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0013,
"eval_rougeLsum": 0.0014,
"eval_runtime": 9.5171,
"eval_samples_per_second": 11.558,
"eval_steps_per_second": 1.471,
"step": 1540
},
{
"epoch": 112.95,
"eval_gen_len": 9.6273,
"eval_loss": 1.2817051410675049,
"eval_rouge1": 0.001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.001,
"eval_rougeLsum": 0.001,
"eval_runtime": 9.5296,
"eval_samples_per_second": 11.543,
"eval_steps_per_second": 1.469,
"step": 1553
},
{
"epoch": 113.96,
"eval_gen_len": 9.9818,
"eval_loss": 1.2726068496704102,
"eval_rouge1": 0.001,
"eval_rouge2": 0.0,
"eval_rougeL": 0.001,
"eval_rougeLsum": 0.001,
"eval_runtime": 9.6467,
"eval_samples_per_second": 11.403,
"eval_steps_per_second": 1.451,
"step": 1567
},
{
"epoch": 114.98,
"eval_gen_len": 9.5273,
"eval_loss": 1.2626111507415771,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0006,
"eval_rougeLsum": 0.0006,
"eval_runtime": 9.5006,
"eval_samples_per_second": 11.578,
"eval_steps_per_second": 1.474,
"step": 1581
},
{
"epoch": 116.0,
"eval_gen_len": 9.8,
"eval_loss": 1.2526050806045532,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.0006,
"eval_rougeL": 0.0019,
"eval_rougeLsum": 0.0019,
"eval_runtime": 9.5945,
"eval_samples_per_second": 11.465,
"eval_steps_per_second": 1.459,
"step": 1595
},
{
"epoch": 116.95,
"eval_gen_len": 9.6182,
"eval_loss": 1.2438002824783325,
"eval_rouge1": 0.0012,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0012,
"eval_rougeLsum": 0.0012,
"eval_runtime": 9.582,
"eval_samples_per_second": 11.48,
"eval_steps_per_second": 1.461,
"step": 1608
},
{
"epoch": 117.96,
"eval_gen_len": 9.5727,
"eval_loss": 1.235589861869812,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0013,
"eval_rougeLsum": 0.0013,
"eval_runtime": 9.5074,
"eval_samples_per_second": 11.57,
"eval_steps_per_second": 1.473,
"step": 1622
},
{
"epoch": 118.98,
"eval_gen_len": 9.5727,
"eval_loss": 1.2265100479125977,
"eval_rouge1": 0.0017,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0017,
"eval_rougeLsum": 0.0017,
"eval_runtime": 9.6164,
"eval_samples_per_second": 11.439,
"eval_steps_per_second": 1.456,
"step": 1636
},
{
"epoch": 120.0,
"eval_gen_len": 9.7182,
"eval_loss": 1.21653151512146,
"eval_rouge1": 0.0013,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0011,
"eval_rougeLsum": 0.0011,
"eval_runtime": 9.5387,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 1650
},
{
"epoch": 120.95,
"eval_gen_len": 9.7182,
"eval_loss": 1.208518624305725,
"eval_rouge1": 0.0018,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0014,
"eval_rougeLsum": 0.0015,
"eval_runtime": 9.6008,
"eval_samples_per_second": 11.457,
"eval_steps_per_second": 1.458,
"step": 1663
},
{
"epoch": 121.96,
"eval_gen_len": 9.4182,
"eval_loss": 1.1974164247512817,
"eval_rouge1": 0.0024,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.0022,
"eval_rougeLsum": 0.0022,
"eval_runtime": 9.5182,
"eval_samples_per_second": 11.557,
"eval_steps_per_second": 1.471,
"step": 1677
},
{
"epoch": 122.98,
"eval_gen_len": 9.3273,
"eval_loss": 1.188578486442566,
"eval_rouge1": 0.0013,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0013,
"eval_rougeLsum": 0.0009,
"eval_runtime": 9.5875,
"eval_samples_per_second": 11.473,
"eval_steps_per_second": 1.46,
"step": 1691
},
{
"epoch": 124.0,
"eval_gen_len": 9.3727,
"eval_loss": 1.1796928644180298,
"eval_rouge1": 0.0018,
"eval_rouge2": 0.0006,
"eval_rougeL": 0.0018,
"eval_rougeLsum": 0.0018,
"eval_runtime": 9.5197,
"eval_samples_per_second": 11.555,
"eval_steps_per_second": 1.471,
"step": 1705
},
{
"epoch": 124.95,
"eval_gen_len": 9.4091,
"eval_loss": 1.1718236207962036,
"eval_rouge1": 0.0021,
"eval_rouge2": 0.0006,
"eval_rougeL": 0.0018,
"eval_rougeLsum": 0.0018,
"eval_runtime": 9.5197,
"eval_samples_per_second": 11.555,
"eval_steps_per_second": 1.471,
"step": 1718
},
{
"epoch": 125.96,
"eval_gen_len": 9.3364,
"eval_loss": 1.1624401807785034,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.0023,
"eval_rougeLsum": 0.0023,
"eval_runtime": 9.5347,
"eval_samples_per_second": 11.537,
"eval_steps_per_second": 1.468,
"step": 1732
},
{
"epoch": 126.98,
"eval_gen_len": 9.6364,
"eval_loss": 1.1539288759231567,
"eval_rouge1": 0.0041,
"eval_rouge2": 0.0011,
"eval_rougeL": 0.0032,
"eval_rougeLsum": 0.0032,
"eval_runtime": 9.4835,
"eval_samples_per_second": 11.599,
"eval_steps_per_second": 1.476,
"step": 1746
},
{
"epoch": 128.0,
"eval_gen_len": 9.5364,
"eval_loss": 1.1447480916976929,
"eval_rouge1": 0.0018,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0017,
"eval_rougeLsum": 0.0016,
"eval_runtime": 9.5263,
"eval_samples_per_second": 11.547,
"eval_steps_per_second": 1.47,
"step": 1760
},
{
"epoch": 128.95,
"eval_gen_len": 9.4545,
"eval_loss": 1.1359179019927979,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 9.5577,
"eval_samples_per_second": 11.509,
"eval_steps_per_second": 1.465,
"step": 1773
},
{
"epoch": 129.96,
"eval_gen_len": 9.3636,
"eval_loss": 1.1250239610671997,
"eval_rouge1": 0.0029,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0026,
"eval_rougeLsum": 0.0025,
"eval_runtime": 9.5118,
"eval_samples_per_second": 11.565,
"eval_steps_per_second": 1.472,
"step": 1787
},
{
"epoch": 130.98,
"eval_gen_len": 9.4364,
"eval_loss": 1.1156790256500244,
"eval_rouge1": 0.0034,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0031,
"eval_rougeLsum": 0.003,
"eval_runtime": 9.4869,
"eval_samples_per_second": 11.595,
"eval_steps_per_second": 1.476,
"step": 1801
},
{
"epoch": 132.0,
"eval_gen_len": 9.5182,
"eval_loss": 1.1065722703933716,
"eval_rouge1": 0.0029,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0027,
"eval_rougeLsum": 0.0026,
"eval_runtime": 9.6053,
"eval_samples_per_second": 11.452,
"eval_steps_per_second": 1.458,
"step": 1815
},
{
"epoch": 132.95,
"eval_gen_len": 9.6636,
"eval_loss": 1.0981847047805786,
"eval_rouge1": 0.0032,
"eval_rouge2": 0.0003,
"eval_rougeL": 0.0034,
"eval_rougeLsum": 0.0032,
"eval_runtime": 9.6558,
"eval_samples_per_second": 11.392,
"eval_steps_per_second": 1.45,
"step": 1828
},
{
"epoch": 133.96,
"eval_gen_len": 9.7273,
"eval_loss": 1.090613603591919,
"eval_rouge1": 0.0032,
"eval_rouge2": 0.0006,
"eval_rougeL": 0.0031,
"eval_rougeLsum": 0.0029,
"eval_runtime": 9.6549,
"eval_samples_per_second": 11.393,
"eval_steps_per_second": 1.45,
"step": 1842
},
{
"epoch": 134.98,
"eval_gen_len": 10.0818,
"eval_loss": 1.0842803716659546,
"eval_rouge1": 0.0061,
"eval_rouge2": 0.0008,
"eval_rougeL": 0.0052,
"eval_rougeLsum": 0.0052,
"eval_runtime": 9.4978,
"eval_samples_per_second": 11.582,
"eval_steps_per_second": 1.474,
"step": 1856
},
{
"epoch": 136.0,
"eval_gen_len": 9.5818,
"eval_loss": 1.075701117515564,
"eval_rouge1": 0.0046,
"eval_rouge2": 0.0011,
"eval_rougeL": 0.0042,
"eval_rougeLsum": 0.0042,
"eval_runtime": 9.6101,
"eval_samples_per_second": 11.446,
"eval_steps_per_second": 1.457,
"step": 1870
},
{
"epoch": 136.95,
"eval_gen_len": 9.8273,
"eval_loss": 1.0663608312606812,
"eval_rouge1": 0.0048,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0043,
"eval_rougeLsum": 0.0043,
"eval_runtime": 9.6501,
"eval_samples_per_second": 11.399,
"eval_steps_per_second": 1.451,
"step": 1883
},
{
"epoch": 137.96,
"eval_gen_len": 9.8545,
"eval_loss": 1.0555903911590576,
"eval_rouge1": 0.0055,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.0046,
"eval_rougeLsum": 0.0045,
"eval_runtime": 9.5822,
"eval_samples_per_second": 11.48,
"eval_steps_per_second": 1.461,
"step": 1897
},
{
"epoch": 138.98,
"eval_gen_len": 9.9182,
"eval_loss": 1.0459803342819214,
"eval_rouge1": 0.0067,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.0063,
"eval_rougeLsum": 0.0062,
"eval_runtime": 9.659,
"eval_samples_per_second": 11.388,
"eval_steps_per_second": 1.449,
"step": 1911
},
{
"epoch": 140.0,
"eval_gen_len": 10.1636,
"eval_loss": 1.0374290943145752,
"eval_rouge1": 0.0067,
"eval_rouge2": 0.0017,
"eval_rougeL": 0.0062,
"eval_rougeLsum": 0.0061,
"eval_runtime": 9.5201,
"eval_samples_per_second": 11.555,
"eval_steps_per_second": 1.471,
"step": 1925
},
{
"epoch": 140.95,
"eval_gen_len": 9.8909,
"eval_loss": 1.0285921096801758,
"eval_rouge1": 0.0037,
"eval_rouge2": 0.001,
"eval_rougeL": 0.0034,
"eval_rougeLsum": 0.0034,
"eval_runtime": 9.5989,
"eval_samples_per_second": 11.46,
"eval_steps_per_second": 1.458,
"step": 1938
},
{
"epoch": 141.96,
"eval_gen_len": 9.6455,
"eval_loss": 1.0195242166519165,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0027,
"eval_runtime": 9.5173,
"eval_samples_per_second": 11.558,
"eval_steps_per_second": 1.471,
"step": 1952
},
{
"epoch": 142.98,
"eval_gen_len": 9.8182,
"eval_loss": 1.0105475187301636,
"eval_rouge1": 0.0031,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.5853,
"eval_samples_per_second": 11.476,
"eval_steps_per_second": 1.461,
"step": 1966
},
{
"epoch": 144.0,
"eval_gen_len": 9.5091,
"eval_loss": 1.001856803894043,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0022,
"eval_rougeLsum": 0.002,
"eval_runtime": 9.5828,
"eval_samples_per_second": 11.479,
"eval_steps_per_second": 1.461,
"step": 1980
},
{
"epoch": 144.95,
"eval_gen_len": 9.6909,
"eval_loss": 0.9937859773635864,
"eval_rouge1": 0.0023,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0023,
"eval_rougeLsum": 0.0023,
"eval_runtime": 9.5054,
"eval_samples_per_second": 11.572,
"eval_steps_per_second": 1.473,
"step": 1993
},
{
"epoch": 145.45,
"grad_norm": 3.3901820182800293,
"learning_rate": 6.365811965811967e-06,
"loss": 1.4532,
"step": 2000
},
{
"epoch": 145.96,
"eval_gen_len": 9.4182,
"eval_loss": 0.985722541809082,
"eval_rouge1": 0.0023,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0023,
"eval_rougeLsum": 0.0023,
"eval_runtime": 9.5248,
"eval_samples_per_second": 11.549,
"eval_steps_per_second": 1.47,
"step": 2007
},
{
"epoch": 146.98,
"eval_gen_len": 9.6818,
"eval_loss": 0.978095531463623,
"eval_rouge1": 0.0028,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.5123,
"eval_samples_per_second": 11.564,
"eval_steps_per_second": 1.472,
"step": 2021
},
{
"epoch": 148.0,
"eval_gen_len": 9.7636,
"eval_loss": 0.9693424105644226,
"eval_rouge1": 0.0023,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0023,
"eval_rougeLsum": 0.0023,
"eval_runtime": 9.6266,
"eval_samples_per_second": 11.427,
"eval_steps_per_second": 1.454,
"step": 2035
},
{
"epoch": 148.95,
"eval_gen_len": 9.7545,
"eval_loss": 0.9614344835281372,
"eval_rouge1": 0.0016,
"eval_rouge2": 0.0002,
"eval_rougeL": 0.0017,
"eval_rougeLsum": 0.0016,
"eval_runtime": 9.5312,
"eval_samples_per_second": 11.541,
"eval_steps_per_second": 1.469,
"step": 2048
},
{
"epoch": 149.96,
"eval_gen_len": 9.7545,
"eval_loss": 0.9523000121116638,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0026,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.5972,
"eval_samples_per_second": 11.462,
"eval_steps_per_second": 1.459,
"step": 2062
},
{
"epoch": 150.98,
"eval_gen_len": 9.7,
"eval_loss": 0.9436053037643433,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0009,
"eval_rougeLsum": 0.001,
"eval_runtime": 9.5824,
"eval_samples_per_second": 11.479,
"eval_steps_per_second": 1.461,
"step": 2076
},
{
"epoch": 152.0,
"eval_gen_len": 9.8,
"eval_loss": 0.9361644983291626,
"eval_rouge1": 0.0024,
"eval_rouge2": 0.0009,
"eval_rougeL": 0.0024,
"eval_rougeLsum": 0.0024,
"eval_runtime": 9.5387,
"eval_samples_per_second": 11.532,
"eval_steps_per_second": 1.468,
"step": 2090
},
{
"epoch": 152.95,
"eval_gen_len": 9.8091,
"eval_loss": 0.9287785887718201,
"eval_rouge1": 0.0028,
"eval_rouge2": 0.0011,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.5888,
"eval_samples_per_second": 11.472,
"eval_steps_per_second": 1.46,
"step": 2103
},
{
"epoch": 153.96,
"eval_gen_len": 9.9273,
"eval_loss": 0.9205007553100586,
"eval_rouge1": 0.0036,
"eval_rouge2": 0.0013,
"eval_rougeL": 0.0036,
"eval_rougeLsum": 0.0036,
"eval_runtime": 9.5824,
"eval_samples_per_second": 11.479,
"eval_steps_per_second": 1.461,
"step": 2117
},
{
"epoch": 154.98,
"eval_gen_len": 10.1,
"eval_loss": 0.9119426608085632,
"eval_rouge1": 0.0037,
"eval_rouge2": 0.0011,
"eval_rougeL": 0.0038,
"eval_rougeLsum": 0.0037,
"eval_runtime": 9.5407,
"eval_samples_per_second": 11.53,
"eval_steps_per_second": 1.467,
"step": 2131
},
{
"epoch": 156.0,
"eval_gen_len": 9.9364,
"eval_loss": 0.9043306112289429,
"eval_rouge1": 0.0034,
"eval_rouge2": 0.0011,
"eval_rougeL": 0.0034,
"eval_rougeLsum": 0.0034,
"eval_runtime": 9.5334,
"eval_samples_per_second": 11.538,
"eval_steps_per_second": 1.469,
"step": 2145
},
{
"epoch": 156.95,
"eval_gen_len": 9.7818,
"eval_loss": 0.8976907730102539,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.003,
"eval_rougeLsum": 0.0031,
"eval_runtime": 9.5568,
"eval_samples_per_second": 11.51,
"eval_steps_per_second": 1.465,
"step": 2158
},
{
"epoch": 157.96,
"eval_gen_len": 9.7364,
"eval_loss": 0.8908756971359253,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0007,
"eval_rougeL": 0.003,
"eval_rougeLsum": 0.0031,
"eval_runtime": 9.5534,
"eval_samples_per_second": 11.514,
"eval_steps_per_second": 1.465,
"step": 2172
},
{
"epoch": 158.98,
"eval_gen_len": 9.6273,
"eval_loss": 0.8828199505805969,
"eval_rouge1": 0.0017,
"eval_rouge2": 0.0002,
"eval_rougeL": 0.002,
"eval_rougeLsum": 0.0019,
"eval_runtime": 9.5064,
"eval_samples_per_second": 11.571,
"eval_steps_per_second": 1.473,
"step": 2186
},
{
"epoch": 160.0,
"eval_gen_len": 9.8273,
"eval_loss": 0.8748722076416016,
"eval_rouge1": 0.0045,
"eval_rouge2": 0.0015,
"eval_rougeL": 0.0036,
"eval_rougeLsum": 0.0036,
"eval_runtime": 9.6814,
"eval_samples_per_second": 11.362,
"eval_steps_per_second": 1.446,
"step": 2200
},
{
"epoch": 160.95,
"eval_gen_len": 9.9455,
"eval_loss": 0.8685693740844727,
"eval_rouge1": 0.0061,
"eval_rouge2": 0.0022,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 9.4971,
"eval_samples_per_second": 11.583,
"eval_steps_per_second": 1.474,
"step": 2213
},
{
"epoch": 161.96,
"eval_gen_len": 9.9364,
"eval_loss": 0.8622080087661743,
"eval_rouge1": 0.0056,
"eval_rouge2": 0.0017,
"eval_rougeL": 0.005,
"eval_rougeLsum": 0.0051,
"eval_runtime": 9.5035,
"eval_samples_per_second": 11.575,
"eval_steps_per_second": 1.473,
"step": 2227
},
{
"epoch": 162.98,
"eval_gen_len": 9.8636,
"eval_loss": 0.8555266261100769,
"eval_rouge1": 0.0049,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0046,
"eval_rougeLsum": 0.0047,
"eval_runtime": 9.5934,
"eval_samples_per_second": 11.466,
"eval_steps_per_second": 1.459,
"step": 2241
},
{
"epoch": 164.0,
"eval_gen_len": 9.9455,
"eval_loss": 0.8489392399787903,
"eval_rouge1": 0.0065,
"eval_rouge2": 0.0025,
"eval_rougeL": 0.0062,
"eval_rougeLsum": 0.0062,
"eval_runtime": 9.588,
"eval_samples_per_second": 11.473,
"eval_steps_per_second": 1.46,
"step": 2255
},
{
"epoch": 164.95,
"eval_gen_len": 10.1273,
"eval_loss": 0.8434127569198608,
"eval_rouge1": 0.0078,
"eval_rouge2": 0.0022,
"eval_rougeL": 0.0073,
"eval_rougeLsum": 0.0073,
"eval_runtime": 9.5692,
"eval_samples_per_second": 11.495,
"eval_steps_per_second": 1.463,
"step": 2268
},
{
"epoch": 165.96,
"eval_gen_len": 10.1,
"eval_loss": 0.8369239568710327,
"eval_rouge1": 0.0072,
"eval_rouge2": 0.0019,
"eval_rougeL": 0.0068,
"eval_rougeLsum": 0.0067,
"eval_runtime": 9.5983,
"eval_samples_per_second": 11.46,
"eval_steps_per_second": 1.459,
"step": 2282
},
{
"epoch": 166.98,
"eval_gen_len": 10.0636,
"eval_loss": 0.8303181529045105,
"eval_rouge1": 0.0068,
"eval_rouge2": 0.0022,
"eval_rougeL": 0.0062,
"eval_rougeLsum": 0.0061,
"eval_runtime": 9.5017,
"eval_samples_per_second": 11.577,
"eval_steps_per_second": 1.473,
"step": 2296
},
{
"epoch": 168.0,
"eval_gen_len": 9.9,
"eval_loss": 0.8243575096130371,
"eval_rouge1": 0.0044,
"eval_rouge2": 0.0013,
"eval_rougeL": 0.0039,
"eval_rougeLsum": 0.004,
"eval_runtime": 9.5856,
"eval_samples_per_second": 11.476,
"eval_steps_per_second": 1.461,
"step": 2310
},
{
"epoch": 168.95,
"eval_gen_len": 9.9818,
"eval_loss": 0.8190615773200989,
"eval_rouge1": 0.0063,
"eval_rouge2": 0.0022,
"eval_rougeL": 0.0058,
"eval_rougeLsum": 0.0057,
"eval_runtime": 9.5114,
"eval_samples_per_second": 11.565,
"eval_steps_per_second": 1.472,
"step": 2323
},
{
"epoch": 169.96,
"eval_gen_len": 9.9818,
"eval_loss": 0.8129807114601135,
"eval_rouge1": 0.0054,
"eval_rouge2": 0.0017,
"eval_rougeL": 0.0047,
"eval_rougeLsum": 0.0047,
"eval_runtime": 9.625,
"eval_samples_per_second": 11.429,
"eval_steps_per_second": 1.455,
"step": 2337
},
{
"epoch": 170.98,
"eval_gen_len": 9.9727,
"eval_loss": 0.8074091672897339,
"eval_rouge1": 0.0059,
"eval_rouge2": 0.0017,
"eval_rougeL": 0.0052,
"eval_rougeLsum": 0.0052,
"eval_runtime": 9.5111,
"eval_samples_per_second": 11.565,
"eval_steps_per_second": 1.472,
"step": 2351
},
{
"epoch": 172.0,
"eval_gen_len": 9.9,
"eval_loss": 0.8017935752868652,
"eval_rouge1": 0.0045,
"eval_rouge2": 0.0008,
"eval_rougeL": 0.0038,
"eval_rougeLsum": 0.0039,
"eval_runtime": 9.5064,
"eval_samples_per_second": 11.571,
"eval_steps_per_second": 1.473,
"step": 2365
},
{
"epoch": 172.95,
"eval_gen_len": 9.7091,
"eval_loss": 0.7968164086341858,
"eval_rouge1": 0.0031,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0027,
"eval_rougeLsum": 0.0026,
"eval_runtime": 9.5948,
"eval_samples_per_second": 11.465,
"eval_steps_per_second": 1.459,
"step": 2378
},
{
"epoch": 173.96,
"eval_gen_len": 9.7364,
"eval_loss": 0.7917037010192871,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0022,
"eval_rougeLsum": 0.0022,
"eval_runtime": 9.636,
"eval_samples_per_second": 11.416,
"eval_steps_per_second": 1.453,
"step": 2392
},
{
"epoch": 174.98,
"eval_gen_len": 9.7455,
"eval_loss": 0.787342369556427,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0027,
"eval_rougeLsum": 0.0027,
"eval_runtime": 9.5895,
"eval_samples_per_second": 11.471,
"eval_steps_per_second": 1.46,
"step": 2406
},
{
"epoch": 176.0,
"eval_gen_len": 9.7,
"eval_loss": 0.7829388976097107,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.5727,
"eval_samples_per_second": 11.491,
"eval_steps_per_second": 1.462,
"step": 2420
},
{
"epoch": 176.95,
"eval_gen_len": 9.6091,
"eval_loss": 0.7783145904541016,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.6033,
"eval_samples_per_second": 11.454,
"eval_steps_per_second": 1.458,
"step": 2433
},
{
"epoch": 177.96,
"eval_gen_len": 9.7091,
"eval_loss": 0.7735804319381714,
"eval_rouge1": 0.0033,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.7017,
"eval_samples_per_second": 11.338,
"eval_steps_per_second": 1.443,
"step": 2447
},
{
"epoch": 178.98,
"eval_gen_len": 9.6364,
"eval_loss": 0.7691650986671448,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.0016,
"eval_rougeL": 0.0028,
"eval_rougeLsum": 0.0028,
"eval_runtime": 9.6163,
"eval_samples_per_second": 11.439,
"eval_steps_per_second": 1.456,
"step": 2461
},
{
"epoch": 180.0,
"eval_gen_len": 9.4818,
"eval_loss": 0.7652955055236816,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.5613,
"eval_samples_per_second": 11.505,
"eval_steps_per_second": 1.464,
"step": 2475
},
{
"epoch": 180.95,
"eval_gen_len": 9.4818,
"eval_loss": 0.7611756920814514,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 9.6858,
"eval_samples_per_second": 11.357,
"eval_steps_per_second": 1.445,
"step": 2488
},
{
"epoch": 181.82,
"grad_norm": 1.818055272102356,
"learning_rate": 2.9470085470085475e-06,
"loss": 1.1581,
"step": 2500
}
],
"logging_steps": 500,
"max_steps": 2925,
"num_input_tokens_seen": 0,
"num_train_epochs": 225,
"save_steps": 500,
"total_flos": 9.676695641731891e+16,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}