newsSum / checkpoint-8000 /trainer_state.json
Jorkieboe's picture
initial
1bf7d4a verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 438.35616438356163,
"eval_steps": 500,
"global_step": 8000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.99,
"eval_gen_len": 14.6545,
"eval_loss": 21.487245559692383,
"eval_rouge1": 0.0812,
"eval_rouge2": 0.0183,
"eval_rougeL": 0.0668,
"eval_rougeLsum": 0.0663,
"eval_runtime": 5.8972,
"eval_samples_per_second": 18.653,
"eval_steps_per_second": 3.222,
"step": 18
},
{
"epoch": 1.97,
"eval_gen_len": 14.5091,
"eval_loss": 21.332395553588867,
"eval_rouge1": 0.0808,
"eval_rouge2": 0.0191,
"eval_rougeL": 0.0657,
"eval_rougeLsum": 0.0652,
"eval_runtime": 5.8919,
"eval_samples_per_second": 18.67,
"eval_steps_per_second": 3.225,
"step": 36
},
{
"epoch": 2.96,
"eval_gen_len": 14.0727,
"eval_loss": 21.108753204345703,
"eval_rouge1": 0.0819,
"eval_rouge2": 0.0203,
"eval_rougeL": 0.0671,
"eval_rougeLsum": 0.0665,
"eval_runtime": 5.8908,
"eval_samples_per_second": 18.673,
"eval_steps_per_second": 3.225,
"step": 54
},
{
"epoch": 4.0,
"eval_gen_len": 14.0727,
"eval_loss": 20.799583435058594,
"eval_rouge1": 0.0824,
"eval_rouge2": 0.0206,
"eval_rougeL": 0.0665,
"eval_rougeLsum": 0.0659,
"eval_runtime": 5.8851,
"eval_samples_per_second": 18.691,
"eval_steps_per_second": 3.228,
"step": 73
},
{
"epoch": 4.99,
"eval_gen_len": 14.2182,
"eval_loss": 20.4609317779541,
"eval_rouge1": 0.0814,
"eval_rouge2": 0.0212,
"eval_rougeL": 0.0669,
"eval_rougeLsum": 0.0664,
"eval_runtime": 5.8894,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 91
},
{
"epoch": 5.97,
"eval_gen_len": 14.2182,
"eval_loss": 20.145660400390625,
"eval_rouge1": 0.0847,
"eval_rouge2": 0.0208,
"eval_rougeL": 0.067,
"eval_rougeLsum": 0.0666,
"eval_runtime": 5.8963,
"eval_samples_per_second": 18.656,
"eval_steps_per_second": 3.222,
"step": 109
},
{
"epoch": 6.96,
"eval_gen_len": 14.0727,
"eval_loss": 19.89990234375,
"eval_rouge1": 0.084,
"eval_rouge2": 0.0184,
"eval_rougeL": 0.0662,
"eval_rougeLsum": 0.0657,
"eval_runtime": 5.8939,
"eval_samples_per_second": 18.663,
"eval_steps_per_second": 3.224,
"step": 127
},
{
"epoch": 8.0,
"eval_gen_len": 14.1455,
"eval_loss": 19.661205291748047,
"eval_rouge1": 0.0882,
"eval_rouge2": 0.0219,
"eval_rougeL": 0.0703,
"eval_rougeLsum": 0.0699,
"eval_runtime": 5.9009,
"eval_samples_per_second": 18.641,
"eval_steps_per_second": 3.22,
"step": 146
},
{
"epoch": 8.99,
"eval_gen_len": 14.9273,
"eval_loss": 19.420059204101562,
"eval_rouge1": 0.0958,
"eval_rouge2": 0.0243,
"eval_rougeL": 0.0775,
"eval_rougeLsum": 0.0772,
"eval_runtime": 5.8976,
"eval_samples_per_second": 18.652,
"eval_steps_per_second": 3.222,
"step": 164
},
{
"epoch": 9.97,
"eval_gen_len": 14.9273,
"eval_loss": 19.1621036529541,
"eval_rouge1": 0.0915,
"eval_rouge2": 0.0256,
"eval_rougeL": 0.0769,
"eval_rougeLsum": 0.0763,
"eval_runtime": 5.937,
"eval_samples_per_second": 18.528,
"eval_steps_per_second": 3.2,
"step": 182
},
{
"epoch": 10.96,
"eval_gen_len": 14.7818,
"eval_loss": 18.873458862304688,
"eval_rouge1": 0.0968,
"eval_rouge2": 0.0284,
"eval_rougeL": 0.0786,
"eval_rougeLsum": 0.0786,
"eval_runtime": 5.8869,
"eval_samples_per_second": 18.686,
"eval_steps_per_second": 3.228,
"step": 200
},
{
"epoch": 12.0,
"eval_gen_len": 14.6364,
"eval_loss": 18.52387237548828,
"eval_rouge1": 0.0901,
"eval_rouge2": 0.0262,
"eval_rougeL": 0.0738,
"eval_rougeLsum": 0.0741,
"eval_runtime": 5.9384,
"eval_samples_per_second": 18.524,
"eval_steps_per_second": 3.2,
"step": 219
},
{
"epoch": 12.99,
"eval_gen_len": 14.4909,
"eval_loss": 18.13555145263672,
"eval_rouge1": 0.0886,
"eval_rouge2": 0.0263,
"eval_rougeL": 0.0714,
"eval_rougeLsum": 0.0717,
"eval_runtime": 5.9432,
"eval_samples_per_second": 18.509,
"eval_steps_per_second": 3.197,
"step": 237
},
{
"epoch": 13.97,
"eval_gen_len": 14.4909,
"eval_loss": 17.66015625,
"eval_rouge1": 0.0915,
"eval_rouge2": 0.03,
"eval_rougeL": 0.0712,
"eval_rougeLsum": 0.0713,
"eval_runtime": 5.9005,
"eval_samples_per_second": 18.643,
"eval_steps_per_second": 3.22,
"step": 255
},
{
"epoch": 14.96,
"eval_gen_len": 15.2182,
"eval_loss": 17.107126235961914,
"eval_rouge1": 0.0933,
"eval_rouge2": 0.0295,
"eval_rougeL": 0.0733,
"eval_rougeLsum": 0.0734,
"eval_runtime": 5.9126,
"eval_samples_per_second": 18.604,
"eval_steps_per_second": 3.213,
"step": 273
},
{
"epoch": 16.0,
"eval_gen_len": 15.2182,
"eval_loss": 16.4465389251709,
"eval_rouge1": 0.0792,
"eval_rouge2": 0.02,
"eval_rougeL": 0.0637,
"eval_rougeLsum": 0.0639,
"eval_runtime": 5.893,
"eval_samples_per_second": 18.666,
"eval_steps_per_second": 3.224,
"step": 292
},
{
"epoch": 16.99,
"eval_gen_len": 14.7818,
"eval_loss": 15.7671480178833,
"eval_rouge1": 0.069,
"eval_rouge2": 0.0176,
"eval_rougeL": 0.0534,
"eval_rougeLsum": 0.0537,
"eval_runtime": 5.8934,
"eval_samples_per_second": 18.665,
"eval_steps_per_second": 3.224,
"step": 310
},
{
"epoch": 17.97,
"eval_gen_len": 13.1545,
"eval_loss": 14.998970031738281,
"eval_rouge1": 0.0566,
"eval_rouge2": 0.0124,
"eval_rougeL": 0.0449,
"eval_rougeLsum": 0.0451,
"eval_runtime": 5.8867,
"eval_samples_per_second": 18.686,
"eval_steps_per_second": 3.228,
"step": 328
},
{
"epoch": 18.96,
"eval_gen_len": 11.1091,
"eval_loss": 14.060928344726562,
"eval_rouge1": 0.0498,
"eval_rouge2": 0.0171,
"eval_rougeL": 0.0368,
"eval_rougeLsum": 0.0367,
"eval_runtime": 5.6651,
"eval_samples_per_second": 19.417,
"eval_steps_per_second": 3.354,
"step": 346
},
{
"epoch": 20.0,
"eval_gen_len": 8.7273,
"eval_loss": 13.174235343933105,
"eval_rouge1": 0.0205,
"eval_rouge2": 0.0044,
"eval_rougeL": 0.0165,
"eval_rougeLsum": 0.0164,
"eval_runtime": 5.7609,
"eval_samples_per_second": 19.094,
"eval_steps_per_second": 3.298,
"step": 365
},
{
"epoch": 20.99,
"eval_gen_len": 6.8636,
"eval_loss": 12.371671676635742,
"eval_rouge1": 0.0139,
"eval_rouge2": 0.0028,
"eval_rougeL": 0.0124,
"eval_rougeLsum": 0.0123,
"eval_runtime": 5.2236,
"eval_samples_per_second": 21.058,
"eval_steps_per_second": 3.637,
"step": 383
},
{
"epoch": 21.97,
"eval_gen_len": 6.8727,
"eval_loss": 11.577987670898438,
"eval_rouge1": 0.0053,
"eval_rouge2": 0.0005,
"eval_rougeL": 0.0047,
"eval_rougeLsum": 0.0048,
"eval_runtime": 5.7472,
"eval_samples_per_second": 19.14,
"eval_steps_per_second": 3.306,
"step": 401
},
{
"epoch": 22.96,
"eval_gen_len": 7.3818,
"eval_loss": 10.78397274017334,
"eval_rouge1": 0.0021,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.7621,
"eval_samples_per_second": 19.09,
"eval_steps_per_second": 3.297,
"step": 419
},
{
"epoch": 24.0,
"eval_gen_len": 6.8273,
"eval_loss": 9.95447826385498,
"eval_rouge1": 0.0009,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0008,
"eval_rougeLsum": 0.0008,
"eval_runtime": 5.4394,
"eval_samples_per_second": 20.223,
"eval_steps_per_second": 3.493,
"step": 438
},
{
"epoch": 24.99,
"eval_gen_len": 7.0909,
"eval_loss": 9.179459571838379,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.4343,
"eval_samples_per_second": 20.242,
"eval_steps_per_second": 3.496,
"step": 456
},
{
"epoch": 25.97,
"eval_gen_len": 8.1,
"eval_loss": 8.421984672546387,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 5.7592,
"eval_samples_per_second": 19.1,
"eval_steps_per_second": 3.299,
"step": 474
},
{
"epoch": 26.96,
"eval_gen_len": 8.2636,
"eval_loss": 7.694218158721924,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8511,
"eval_samples_per_second": 18.8,
"eval_steps_per_second": 3.247,
"step": 492
},
{
"epoch": 27.4,
"grad_norm": 5.483399868011475,
"learning_rate": 1.8893333333333334e-05,
"loss": 16.3522,
"step": 500
},
{
"epoch": 28.0,
"eval_gen_len": 11.3818,
"eval_loss": 6.939992427825928,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.887,
"eval_samples_per_second": 18.685,
"eval_steps_per_second": 3.227,
"step": 511
},
{
"epoch": 28.99,
"eval_gen_len": 12.6273,
"eval_loss": 6.2829270362854,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.905,
"eval_samples_per_second": 18.628,
"eval_steps_per_second": 3.218,
"step": 529
},
{
"epoch": 29.97,
"eval_gen_len": 15.3091,
"eval_loss": 5.604813575744629,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 5.8732,
"eval_samples_per_second": 18.729,
"eval_steps_per_second": 3.235,
"step": 547
},
{
"epoch": 30.96,
"eval_gen_len": 17.3182,
"eval_loss": 4.977880954742432,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9021,
"eval_samples_per_second": 18.637,
"eval_steps_per_second": 3.219,
"step": 565
},
{
"epoch": 32.0,
"eval_gen_len": 18.3273,
"eval_loss": 4.37266731262207,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 5.9191,
"eval_samples_per_second": 18.584,
"eval_steps_per_second": 3.21,
"step": 584
},
{
"epoch": 32.99,
"eval_gen_len": 18.7091,
"eval_loss": 3.8430399894714355,
"eval_rouge1": 0.0002,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0002,
"eval_rougeLsum": 0.0002,
"eval_runtime": 5.8717,
"eval_samples_per_second": 18.734,
"eval_steps_per_second": 3.236,
"step": 602
},
{
"epoch": 33.97,
"eval_gen_len": 18.8545,
"eval_loss": 3.400059938430786,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 5.9022,
"eval_samples_per_second": 18.637,
"eval_steps_per_second": 3.219,
"step": 620
},
{
"epoch": 34.96,
"eval_gen_len": 19.0,
"eval_loss": 3.0639424324035645,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 5.8543,
"eval_samples_per_second": 18.79,
"eval_steps_per_second": 3.245,
"step": 638
},
{
"epoch": 36.0,
"eval_gen_len": 19.0,
"eval_loss": 2.758321762084961,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 5.9112,
"eval_samples_per_second": 18.609,
"eval_steps_per_second": 3.214,
"step": 657
},
{
"epoch": 36.99,
"eval_gen_len": 18.4364,
"eval_loss": 2.5352485179901123,
"eval_rouge1": 0.0003,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0003,
"eval_rougeLsum": 0.0003,
"eval_runtime": 5.8968,
"eval_samples_per_second": 18.654,
"eval_steps_per_second": 3.222,
"step": 675
},
{
"epoch": 37.97,
"eval_gen_len": 14.8909,
"eval_loss": 2.3651320934295654,
"eval_rouge1": 0.0005,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0005,
"eval_rougeLsum": 0.0005,
"eval_runtime": 5.8625,
"eval_samples_per_second": 18.763,
"eval_steps_per_second": 3.241,
"step": 693
},
{
"epoch": 38.96,
"eval_gen_len": 10.8273,
"eval_loss": 2.230059862136841,
"eval_rouge1": 0.0014,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0014,
"eval_rougeLsum": 0.0014,
"eval_runtime": 5.852,
"eval_samples_per_second": 18.797,
"eval_steps_per_second": 3.247,
"step": 711
},
{
"epoch": 40.0,
"eval_gen_len": 7.9545,
"eval_loss": 2.111604928970337,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8625,
"eval_samples_per_second": 18.763,
"eval_steps_per_second": 3.241,
"step": 730
},
{
"epoch": 40.99,
"eval_gen_len": 6.8364,
"eval_loss": 2.019117832183838,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8494,
"eval_samples_per_second": 18.805,
"eval_steps_per_second": 3.248,
"step": 748
},
{
"epoch": 41.97,
"eval_gen_len": 6.1727,
"eval_loss": 1.950454831123352,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8618,
"eval_samples_per_second": 18.765,
"eval_steps_per_second": 3.241,
"step": 766
},
{
"epoch": 42.96,
"eval_gen_len": 5.6,
"eval_loss": 1.9009199142456055,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8848,
"eval_samples_per_second": 18.692,
"eval_steps_per_second": 3.229,
"step": 784
},
{
"epoch": 44.0,
"eval_gen_len": 5.4455,
"eval_loss": 1.8568826913833618,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8517,
"eval_samples_per_second": 18.798,
"eval_steps_per_second": 3.247,
"step": 803
},
{
"epoch": 44.99,
"eval_gen_len": 5.1909,
"eval_loss": 1.8185267448425293,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8682,
"eval_samples_per_second": 18.745,
"eval_steps_per_second": 3.238,
"step": 821
},
{
"epoch": 45.97,
"eval_gen_len": 5.1182,
"eval_loss": 1.7847113609313965,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8488,
"eval_samples_per_second": 18.807,
"eval_steps_per_second": 3.249,
"step": 839
},
{
"epoch": 46.96,
"eval_gen_len": 4.9455,
"eval_loss": 1.7547551393508911,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8518,
"eval_samples_per_second": 18.798,
"eval_steps_per_second": 3.247,
"step": 857
},
{
"epoch": 48.0,
"eval_gen_len": 4.9455,
"eval_loss": 1.7266695499420166,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8585,
"eval_samples_per_second": 18.776,
"eval_steps_per_second": 3.243,
"step": 876
},
{
"epoch": 48.99,
"eval_gen_len": 4.7455,
"eval_loss": 1.7055079936981201,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8511,
"eval_samples_per_second": 18.8,
"eval_steps_per_second": 3.247,
"step": 894
},
{
"epoch": 49.97,
"eval_gen_len": 4.4727,
"eval_loss": 1.6864752769470215,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.848,
"eval_samples_per_second": 18.81,
"eval_steps_per_second": 3.249,
"step": 912
},
{
"epoch": 50.96,
"eval_gen_len": 4.4,
"eval_loss": 1.6679636240005493,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8942,
"eval_samples_per_second": 18.662,
"eval_steps_per_second": 3.223,
"step": 930
},
{
"epoch": 52.0,
"eval_gen_len": 4.8273,
"eval_loss": 1.6500035524368286,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8572,
"eval_samples_per_second": 18.78,
"eval_steps_per_second": 3.244,
"step": 949
},
{
"epoch": 52.99,
"eval_gen_len": 5.4273,
"eval_loss": 1.6347858905792236,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8874,
"eval_samples_per_second": 18.684,
"eval_steps_per_second": 3.227,
"step": 967
},
{
"epoch": 53.97,
"eval_gen_len": 5.4727,
"eval_loss": 1.620485782623291,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8561,
"eval_samples_per_second": 18.784,
"eval_steps_per_second": 3.244,
"step": 985
},
{
"epoch": 54.79,
"grad_norm": 3.8756470680236816,
"learning_rate": 1.7786666666666667e-05,
"loss": 3.4786,
"step": 1000
},
{
"epoch": 54.96,
"eval_gen_len": 4.9909,
"eval_loss": 1.6101189851760864,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9013,
"eval_samples_per_second": 18.64,
"eval_steps_per_second": 3.22,
"step": 1003
},
{
"epoch": 56.0,
"eval_gen_len": 4.9727,
"eval_loss": 1.5964934825897217,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8515,
"eval_samples_per_second": 18.799,
"eval_steps_per_second": 3.247,
"step": 1022
},
{
"epoch": 56.99,
"eval_gen_len": 5.3,
"eval_loss": 1.583103060722351,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8488,
"eval_samples_per_second": 18.807,
"eval_steps_per_second": 3.249,
"step": 1040
},
{
"epoch": 57.97,
"eval_gen_len": 5.2273,
"eval_loss": 1.5738121271133423,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8912,
"eval_samples_per_second": 18.672,
"eval_steps_per_second": 3.225,
"step": 1058
},
{
"epoch": 58.96,
"eval_gen_len": 5.5273,
"eval_loss": 1.5626448392868042,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8527,
"eval_samples_per_second": 18.795,
"eval_steps_per_second": 3.246,
"step": 1076
},
{
"epoch": 60.0,
"eval_gen_len": 6.3273,
"eval_loss": 1.5520726442337036,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8887,
"eval_samples_per_second": 18.68,
"eval_steps_per_second": 3.227,
"step": 1095
},
{
"epoch": 60.99,
"eval_gen_len": 7.1091,
"eval_loss": 1.5398296117782593,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8838,
"eval_samples_per_second": 18.695,
"eval_steps_per_second": 3.229,
"step": 1113
},
{
"epoch": 61.97,
"eval_gen_len": 7.9182,
"eval_loss": 1.5261036157608032,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8837,
"eval_samples_per_second": 18.696,
"eval_steps_per_second": 3.229,
"step": 1131
},
{
"epoch": 62.96,
"eval_gen_len": 8.6,
"eval_loss": 1.5135173797607422,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8878,
"eval_samples_per_second": 18.683,
"eval_steps_per_second": 3.227,
"step": 1149
},
{
"epoch": 64.0,
"eval_gen_len": 8.9727,
"eval_loss": 1.5019876956939697,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8993,
"eval_samples_per_second": 18.646,
"eval_steps_per_second": 3.221,
"step": 1168
},
{
"epoch": 64.99,
"eval_gen_len": 9.1455,
"eval_loss": 1.4927572011947632,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8898,
"eval_samples_per_second": 18.676,
"eval_steps_per_second": 3.226,
"step": 1186
},
{
"epoch": 65.97,
"eval_gen_len": 9.3636,
"eval_loss": 1.4839699268341064,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8485,
"eval_samples_per_second": 18.808,
"eval_steps_per_second": 3.249,
"step": 1204
},
{
"epoch": 66.96,
"eval_gen_len": 9.6727,
"eval_loss": 1.4724147319793701,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8498,
"eval_samples_per_second": 18.804,
"eval_steps_per_second": 3.248,
"step": 1222
},
{
"epoch": 68.0,
"eval_gen_len": 9.6545,
"eval_loss": 1.4610724449157715,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.857,
"eval_samples_per_second": 18.781,
"eval_steps_per_second": 3.244,
"step": 1241
},
{
"epoch": 68.99,
"eval_gen_len": 9.7182,
"eval_loss": 1.4491708278656006,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8545,
"eval_samples_per_second": 18.789,
"eval_steps_per_second": 3.245,
"step": 1259
},
{
"epoch": 69.97,
"eval_gen_len": 9.6727,
"eval_loss": 1.4401447772979736,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8617,
"eval_samples_per_second": 18.766,
"eval_steps_per_second": 3.241,
"step": 1277
},
{
"epoch": 70.96,
"eval_gen_len": 9.6818,
"eval_loss": 1.4306913614273071,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.861,
"eval_samples_per_second": 18.768,
"eval_steps_per_second": 3.242,
"step": 1295
},
{
"epoch": 72.0,
"eval_gen_len": 9.7636,
"eval_loss": 1.4177192449569702,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8583,
"eval_samples_per_second": 18.777,
"eval_steps_per_second": 3.243,
"step": 1314
},
{
"epoch": 72.99,
"eval_gen_len": 9.8182,
"eval_loss": 1.4081608057022095,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8574,
"eval_samples_per_second": 18.78,
"eval_steps_per_second": 3.244,
"step": 1332
},
{
"epoch": 73.97,
"eval_gen_len": 9.8,
"eval_loss": 1.3982936143875122,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8596,
"eval_samples_per_second": 18.773,
"eval_steps_per_second": 3.243,
"step": 1350
},
{
"epoch": 74.96,
"eval_gen_len": 9.7545,
"eval_loss": 1.385299563407898,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8543,
"eval_samples_per_second": 18.79,
"eval_steps_per_second": 3.245,
"step": 1368
},
{
"epoch": 76.0,
"eval_gen_len": 9.8727,
"eval_loss": 1.3723993301391602,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9071,
"eval_samples_per_second": 18.622,
"eval_steps_per_second": 3.216,
"step": 1387
},
{
"epoch": 76.99,
"eval_gen_len": 9.8636,
"eval_loss": 1.3635698556900024,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8955,
"eval_samples_per_second": 18.658,
"eval_steps_per_second": 3.223,
"step": 1405
},
{
"epoch": 77.97,
"eval_gen_len": 9.7727,
"eval_loss": 1.3577702045440674,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8571,
"eval_samples_per_second": 18.781,
"eval_steps_per_second": 3.244,
"step": 1423
},
{
"epoch": 78.96,
"eval_gen_len": 9.8455,
"eval_loss": 1.350039005279541,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8893,
"eval_samples_per_second": 18.678,
"eval_steps_per_second": 3.226,
"step": 1441
},
{
"epoch": 80.0,
"eval_gen_len": 9.8,
"eval_loss": 1.3370468616485596,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9191,
"eval_samples_per_second": 18.584,
"eval_steps_per_second": 3.21,
"step": 1460
},
{
"epoch": 80.99,
"eval_gen_len": 9.7909,
"eval_loss": 1.3282612562179565,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8938,
"eval_samples_per_second": 18.664,
"eval_steps_per_second": 3.224,
"step": 1478
},
{
"epoch": 81.97,
"eval_gen_len": 9.7273,
"eval_loss": 1.3168359994888306,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8597,
"eval_samples_per_second": 18.772,
"eval_steps_per_second": 3.242,
"step": 1496
},
{
"epoch": 82.19,
"grad_norm": 2.8337929248809814,
"learning_rate": 1.667777777777778e-05,
"loss": 1.7958,
"step": 1500
},
{
"epoch": 82.96,
"eval_gen_len": 9.8727,
"eval_loss": 1.3036466836929321,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8558,
"eval_samples_per_second": 18.785,
"eval_steps_per_second": 3.245,
"step": 1514
},
{
"epoch": 84.0,
"eval_gen_len": 9.9455,
"eval_loss": 1.2935236692428589,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8683,
"eval_samples_per_second": 18.745,
"eval_steps_per_second": 3.238,
"step": 1533
},
{
"epoch": 84.99,
"eval_gen_len": 9.9182,
"eval_loss": 1.2810677289962769,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8603,
"eval_samples_per_second": 18.77,
"eval_steps_per_second": 3.242,
"step": 1551
},
{
"epoch": 85.97,
"eval_gen_len": 9.9364,
"eval_loss": 1.2679041624069214,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8561,
"eval_samples_per_second": 18.784,
"eval_steps_per_second": 3.244,
"step": 1569
},
{
"epoch": 86.96,
"eval_gen_len": 9.9091,
"eval_loss": 1.259030818939209,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.855,
"eval_samples_per_second": 18.787,
"eval_steps_per_second": 3.245,
"step": 1587
},
{
"epoch": 88.0,
"eval_gen_len": 9.9636,
"eval_loss": 1.2463934421539307,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8568,
"eval_samples_per_second": 18.782,
"eval_steps_per_second": 3.244,
"step": 1606
},
{
"epoch": 88.99,
"eval_gen_len": 9.9091,
"eval_loss": 1.2380764484405518,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8541,
"eval_samples_per_second": 18.79,
"eval_steps_per_second": 3.246,
"step": 1624
},
{
"epoch": 89.97,
"eval_gen_len": 9.9091,
"eval_loss": 1.2272734642028809,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8679,
"eval_samples_per_second": 18.746,
"eval_steps_per_second": 3.238,
"step": 1642
},
{
"epoch": 90.96,
"eval_gen_len": 9.9091,
"eval_loss": 1.2162261009216309,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8611,
"eval_samples_per_second": 18.768,
"eval_steps_per_second": 3.242,
"step": 1660
},
{
"epoch": 92.0,
"eval_gen_len": 9.9273,
"eval_loss": 1.2035719156265259,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8669,
"eval_samples_per_second": 18.749,
"eval_steps_per_second": 3.239,
"step": 1679
},
{
"epoch": 92.99,
"eval_gen_len": 9.9727,
"eval_loss": 1.1865819692611694,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8764,
"eval_samples_per_second": 18.719,
"eval_steps_per_second": 3.233,
"step": 1697
},
{
"epoch": 93.97,
"eval_gen_len": 9.9273,
"eval_loss": 1.1713649034500122,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8739,
"eval_samples_per_second": 18.727,
"eval_steps_per_second": 3.235,
"step": 1715
},
{
"epoch": 94.96,
"eval_gen_len": 9.8818,
"eval_loss": 1.1566345691680908,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8775,
"eval_samples_per_second": 18.715,
"eval_steps_per_second": 3.233,
"step": 1733
},
{
"epoch": 96.0,
"eval_gen_len": 9.6818,
"eval_loss": 1.141555905342102,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8697,
"eval_samples_per_second": 18.74,
"eval_steps_per_second": 3.237,
"step": 1752
},
{
"epoch": 96.99,
"eval_gen_len": 9.5,
"eval_loss": 1.1269311904907227,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8514,
"eval_samples_per_second": 18.799,
"eval_steps_per_second": 3.247,
"step": 1770
},
{
"epoch": 97.97,
"eval_gen_len": 9.6545,
"eval_loss": 1.1134684085845947,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8582,
"eval_samples_per_second": 18.777,
"eval_steps_per_second": 3.243,
"step": 1788
},
{
"epoch": 98.96,
"eval_gen_len": 9.7,
"eval_loss": 1.099327564239502,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8567,
"eval_samples_per_second": 18.782,
"eval_steps_per_second": 3.244,
"step": 1806
},
{
"epoch": 100.0,
"eval_gen_len": 9.7909,
"eval_loss": 1.0843485593795776,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8693,
"eval_samples_per_second": 18.741,
"eval_steps_per_second": 3.237,
"step": 1825
},
{
"epoch": 100.99,
"eval_gen_len": 9.8909,
"eval_loss": 1.0678842067718506,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8524,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 3.247,
"step": 1843
},
{
"epoch": 101.97,
"eval_gen_len": 9.8636,
"eval_loss": 1.0531669855117798,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8548,
"eval_samples_per_second": 18.788,
"eval_steps_per_second": 3.245,
"step": 1861
},
{
"epoch": 102.96,
"eval_gen_len": 9.8364,
"eval_loss": 1.0373491048812866,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8537,
"eval_samples_per_second": 18.792,
"eval_steps_per_second": 3.246,
"step": 1879
},
{
"epoch": 104.0,
"eval_gen_len": 9.8636,
"eval_loss": 1.0185768604278564,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8656,
"eval_samples_per_second": 18.754,
"eval_steps_per_second": 3.239,
"step": 1898
},
{
"epoch": 104.99,
"eval_gen_len": 9.9091,
"eval_loss": 1.0003846883773804,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8601,
"eval_samples_per_second": 18.771,
"eval_steps_per_second": 3.242,
"step": 1916
},
{
"epoch": 105.97,
"eval_gen_len": 9.9364,
"eval_loss": 0.9851242899894714,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8522,
"eval_samples_per_second": 18.796,
"eval_steps_per_second": 3.247,
"step": 1934
},
{
"epoch": 106.96,
"eval_gen_len": 9.9545,
"eval_loss": 0.9700939059257507,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8548,
"eval_samples_per_second": 18.788,
"eval_steps_per_second": 3.245,
"step": 1952
},
{
"epoch": 108.0,
"eval_gen_len": 9.9182,
"eval_loss": 0.9482754468917847,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8554,
"eval_samples_per_second": 18.786,
"eval_steps_per_second": 3.245,
"step": 1971
},
{
"epoch": 108.99,
"eval_gen_len": 9.9273,
"eval_loss": 0.9286762475967407,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8532,
"eval_samples_per_second": 18.793,
"eval_steps_per_second": 3.246,
"step": 1989
},
{
"epoch": 109.59,
"grad_norm": 1.9006233215332031,
"learning_rate": 1.5566666666666668e-05,
"loss": 1.4343,
"step": 2000
},
{
"epoch": 109.97,
"eval_gen_len": 9.8364,
"eval_loss": 0.906920850276947,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8648,
"eval_samples_per_second": 18.756,
"eval_steps_per_second": 3.24,
"step": 2007
},
{
"epoch": 110.96,
"eval_gen_len": 9.9273,
"eval_loss": 0.8903268575668335,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8436,
"eval_samples_per_second": 18.824,
"eval_steps_per_second": 3.251,
"step": 2025
},
{
"epoch": 112.0,
"eval_gen_len": 9.8818,
"eval_loss": 0.8693720102310181,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8617,
"eval_samples_per_second": 18.766,
"eval_steps_per_second": 3.241,
"step": 2044
},
{
"epoch": 112.99,
"eval_gen_len": 9.8182,
"eval_loss": 0.8487831354141235,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8484,
"eval_samples_per_second": 18.808,
"eval_steps_per_second": 3.249,
"step": 2062
},
{
"epoch": 113.97,
"eval_gen_len": 9.8,
"eval_loss": 0.8259497284889221,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8508,
"eval_samples_per_second": 18.801,
"eval_steps_per_second": 3.247,
"step": 2080
},
{
"epoch": 114.96,
"eval_gen_len": 9.7818,
"eval_loss": 0.8031529784202576,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8573,
"eval_samples_per_second": 18.78,
"eval_steps_per_second": 3.244,
"step": 2098
},
{
"epoch": 116.0,
"eval_gen_len": 9.7273,
"eval_loss": 0.7789543271064758,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8581,
"eval_samples_per_second": 18.777,
"eval_steps_per_second": 3.243,
"step": 2117
},
{
"epoch": 116.99,
"eval_gen_len": 9.5909,
"eval_loss": 0.7579861283302307,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8897,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 2135
},
{
"epoch": 117.97,
"eval_gen_len": 9.7273,
"eval_loss": 0.7319938540458679,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8872,
"eval_samples_per_second": 18.684,
"eval_steps_per_second": 3.227,
"step": 2153
},
{
"epoch": 118.96,
"eval_gen_len": 9.7455,
"eval_loss": 0.710875928401947,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8965,
"eval_samples_per_second": 18.655,
"eval_steps_per_second": 3.222,
"step": 2171
},
{
"epoch": 120.0,
"eval_gen_len": 9.8182,
"eval_loss": 0.6845319271087646,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8634,
"eval_samples_per_second": 18.76,
"eval_steps_per_second": 3.24,
"step": 2190
},
{
"epoch": 120.99,
"eval_gen_len": 9.8455,
"eval_loss": 0.6633948683738708,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8707,
"eval_samples_per_second": 18.737,
"eval_steps_per_second": 3.236,
"step": 2208
},
{
"epoch": 121.97,
"eval_gen_len": 9.8364,
"eval_loss": 0.6423484683036804,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8549,
"eval_samples_per_second": 18.788,
"eval_steps_per_second": 3.245,
"step": 2226
},
{
"epoch": 122.96,
"eval_gen_len": 9.8273,
"eval_loss": 0.6260173916816711,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8887,
"eval_samples_per_second": 18.68,
"eval_steps_per_second": 3.227,
"step": 2244
},
{
"epoch": 124.0,
"eval_gen_len": 9.7636,
"eval_loss": 0.604343056678772,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8959,
"eval_samples_per_second": 18.657,
"eval_steps_per_second": 3.223,
"step": 2263
},
{
"epoch": 124.99,
"eval_gen_len": 9.7273,
"eval_loss": 0.5872541666030884,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8541,
"eval_samples_per_second": 18.79,
"eval_steps_per_second": 3.246,
"step": 2281
},
{
"epoch": 125.97,
"eval_gen_len": 9.7818,
"eval_loss": 0.5708852410316467,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8564,
"eval_samples_per_second": 18.783,
"eval_steps_per_second": 3.244,
"step": 2299
},
{
"epoch": 126.96,
"eval_gen_len": 9.8364,
"eval_loss": 0.5527102947235107,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.854,
"eval_samples_per_second": 18.79,
"eval_steps_per_second": 3.246,
"step": 2317
},
{
"epoch": 128.0,
"eval_gen_len": 9.7545,
"eval_loss": 0.537294864654541,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8561,
"eval_samples_per_second": 18.784,
"eval_steps_per_second": 3.244,
"step": 2336
},
{
"epoch": 128.99,
"eval_gen_len": 9.5636,
"eval_loss": 0.5231500864028931,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8429,
"eval_samples_per_second": 18.826,
"eval_steps_per_second": 3.252,
"step": 2354
},
{
"epoch": 129.97,
"eval_gen_len": 9.7091,
"eval_loss": 0.5122325420379639,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8959,
"eval_samples_per_second": 18.657,
"eval_steps_per_second": 3.223,
"step": 2372
},
{
"epoch": 130.96,
"eval_gen_len": 9.6,
"eval_loss": 0.501021683216095,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8801,
"eval_samples_per_second": 18.707,
"eval_steps_per_second": 3.231,
"step": 2390
},
{
"epoch": 132.0,
"eval_gen_len": 9.7,
"eval_loss": 0.4930221140384674,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8944,
"eval_samples_per_second": 18.662,
"eval_steps_per_second": 3.223,
"step": 2409
},
{
"epoch": 132.99,
"eval_gen_len": 9.5909,
"eval_loss": 0.48477092385292053,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8544,
"eval_samples_per_second": 18.789,
"eval_steps_per_second": 3.245,
"step": 2427
},
{
"epoch": 133.97,
"eval_gen_len": 9.4818,
"eval_loss": 0.47620585560798645,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8504,
"eval_samples_per_second": 18.802,
"eval_steps_per_second": 3.248,
"step": 2445
},
{
"epoch": 134.96,
"eval_gen_len": 9.4727,
"eval_loss": 0.4678414463996887,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8619,
"eval_samples_per_second": 18.765,
"eval_steps_per_second": 3.241,
"step": 2463
},
{
"epoch": 136.0,
"eval_gen_len": 8.9727,
"eval_loss": 0.46089962124824524,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9034,
"eval_samples_per_second": 18.633,
"eval_steps_per_second": 3.219,
"step": 2482
},
{
"epoch": 136.99,
"grad_norm": 0.9683671593666077,
"learning_rate": 1.4455555555555555e-05,
"loss": 0.904,
"step": 2500
},
{
"epoch": 136.99,
"eval_gen_len": 8.8182,
"eval_loss": 0.45609110593795776,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8499,
"eval_samples_per_second": 18.804,
"eval_steps_per_second": 3.248,
"step": 2500
},
{
"epoch": 137.97,
"eval_gen_len": 8.8,
"eval_loss": 0.448975533246994,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8901,
"eval_samples_per_second": 18.676,
"eval_steps_per_second": 3.226,
"step": 2518
},
{
"epoch": 138.96,
"eval_gen_len": 8.7091,
"eval_loss": 0.44343459606170654,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8897,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 2536
},
{
"epoch": 140.0,
"eval_gen_len": 8.6818,
"eval_loss": 0.4378666877746582,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9063,
"eval_samples_per_second": 18.624,
"eval_steps_per_second": 3.217,
"step": 2555
},
{
"epoch": 140.99,
"eval_gen_len": 8.1182,
"eval_loss": 0.4321661591529846,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8879,
"eval_samples_per_second": 18.682,
"eval_steps_per_second": 3.227,
"step": 2573
},
{
"epoch": 141.97,
"eval_gen_len": 8.0455,
"eval_loss": 0.42785531282424927,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8597,
"eval_samples_per_second": 18.772,
"eval_steps_per_second": 3.242,
"step": 2591
},
{
"epoch": 142.96,
"eval_gen_len": 7.8909,
"eval_loss": 0.42234906554222107,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.002,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.8591,
"eval_samples_per_second": 18.774,
"eval_steps_per_second": 3.243,
"step": 2609
},
{
"epoch": 144.0,
"eval_gen_len": 7.5727,
"eval_loss": 0.4168229401111603,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.002,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.862,
"eval_samples_per_second": 18.765,
"eval_steps_per_second": 3.241,
"step": 2628
},
{
"epoch": 144.99,
"eval_gen_len": 7.4182,
"eval_loss": 0.4125988185405731,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.002,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.855,
"eval_samples_per_second": 18.787,
"eval_steps_per_second": 3.245,
"step": 2646
},
{
"epoch": 145.97,
"eval_gen_len": 6.8545,
"eval_loss": 0.40698733925819397,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.002,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.8558,
"eval_samples_per_second": 18.785,
"eval_steps_per_second": 3.245,
"step": 2664
},
{
"epoch": 146.96,
"eval_gen_len": 6.6,
"eval_loss": 0.4030015468597412,
"eval_rouge1": 0.0026,
"eval_rouge2": 0.002,
"eval_rougeL": 0.0021,
"eval_rougeLsum": 0.0021,
"eval_runtime": 5.8557,
"eval_samples_per_second": 18.785,
"eval_steps_per_second": 3.245,
"step": 2682
},
{
"epoch": 148.0,
"eval_gen_len": 6.5273,
"eval_loss": 0.39865967631340027,
"eval_rouge1": 0.0044,
"eval_rouge2": 0.0035,
"eval_rougeL": 0.0039,
"eval_rougeLsum": 0.0031,
"eval_runtime": 5.863,
"eval_samples_per_second": 18.762,
"eval_steps_per_second": 3.241,
"step": 2701
},
{
"epoch": 148.99,
"eval_gen_len": 6.5273,
"eval_loss": 0.3959006071090698,
"eval_rouge1": 0.0041,
"eval_rouge2": 0.0035,
"eval_rougeL": 0.0041,
"eval_rougeLsum": 0.0035,
"eval_runtime": 5.857,
"eval_samples_per_second": 18.781,
"eval_steps_per_second": 3.244,
"step": 2719
},
{
"epoch": 149.97,
"eval_gen_len": 6.3,
"eval_loss": 0.3902026414871216,
"eval_rouge1": 0.0049,
"eval_rouge2": 0.0041,
"eval_rougeL": 0.004,
"eval_rougeLsum": 0.0033,
"eval_runtime": 5.855,
"eval_samples_per_second": 18.787,
"eval_steps_per_second": 3.245,
"step": 2737
},
{
"epoch": 150.96,
"eval_gen_len": 6.1909,
"eval_loss": 0.3883272707462311,
"eval_rouge1": 0.0049,
"eval_rouge2": 0.0041,
"eval_rougeL": 0.004,
"eval_rougeLsum": 0.0033,
"eval_runtime": 5.853,
"eval_samples_per_second": 18.794,
"eval_steps_per_second": 3.246,
"step": 2755
},
{
"epoch": 152.0,
"eval_gen_len": 6.1182,
"eval_loss": 0.38500654697418213,
"eval_rouge1": 0.006,
"eval_rouge2": 0.0046,
"eval_rougeL": 0.0058,
"eval_rougeLsum": 0.0049,
"eval_runtime": 5.8626,
"eval_samples_per_second": 18.763,
"eval_steps_per_second": 3.241,
"step": 2774
},
{
"epoch": 152.99,
"eval_gen_len": 6.3818,
"eval_loss": 0.38074272871017456,
"eval_rouge1": 0.0065,
"eval_rouge2": 0.0043,
"eval_rougeL": 0.0058,
"eval_rougeLsum": 0.0058,
"eval_runtime": 5.8533,
"eval_samples_per_second": 18.793,
"eval_steps_per_second": 3.246,
"step": 2792
},
{
"epoch": 153.97,
"eval_gen_len": 6.2,
"eval_loss": 0.3779795467853546,
"eval_rouge1": 0.0045,
"eval_rouge2": 0.0022,
"eval_rougeL": 0.0039,
"eval_rougeLsum": 0.0038,
"eval_runtime": 5.8616,
"eval_samples_per_second": 18.766,
"eval_steps_per_second": 3.241,
"step": 2810
},
{
"epoch": 154.96,
"eval_gen_len": 6.3636,
"eval_loss": 0.37300601601600647,
"eval_rouge1": 0.0136,
"eval_rouge2": 0.0099,
"eval_rougeL": 0.0122,
"eval_rougeLsum": 0.0117,
"eval_runtime": 5.8638,
"eval_samples_per_second": 18.759,
"eval_steps_per_second": 3.24,
"step": 2828
},
{
"epoch": 156.0,
"eval_gen_len": 6.4909,
"eval_loss": 0.36999648809432983,
"eval_rouge1": 0.0195,
"eval_rouge2": 0.0152,
"eval_rougeL": 0.0179,
"eval_rougeLsum": 0.0173,
"eval_runtime": 5.8779,
"eval_samples_per_second": 18.714,
"eval_steps_per_second": 3.232,
"step": 2847
},
{
"epoch": 156.99,
"eval_gen_len": 6.5455,
"eval_loss": 0.3666529059410095,
"eval_rouge1": 0.0193,
"eval_rouge2": 0.0135,
"eval_rougeL": 0.0174,
"eval_rougeLsum": 0.0171,
"eval_runtime": 5.8596,
"eval_samples_per_second": 18.773,
"eval_steps_per_second": 3.243,
"step": 2865
},
{
"epoch": 157.97,
"eval_gen_len": 6.1909,
"eval_loss": 0.3615466058254242,
"eval_rouge1": 0.019,
"eval_rouge2": 0.0133,
"eval_rougeL": 0.0169,
"eval_rougeLsum": 0.0168,
"eval_runtime": 5.8587,
"eval_samples_per_second": 18.776,
"eval_steps_per_second": 3.243,
"step": 2883
},
{
"epoch": 158.96,
"eval_gen_len": 7.5,
"eval_loss": 0.3599731922149658,
"eval_rouge1": 0.0283,
"eval_rouge2": 0.0195,
"eval_rougeL": 0.0269,
"eval_rougeLsum": 0.0268,
"eval_runtime": 5.8597,
"eval_samples_per_second": 18.772,
"eval_steps_per_second": 3.242,
"step": 2901
},
{
"epoch": 160.0,
"eval_gen_len": 6.8,
"eval_loss": 0.3567432165145874,
"eval_rouge1": 0.0241,
"eval_rouge2": 0.0168,
"eval_rougeL": 0.022,
"eval_rougeLsum": 0.0216,
"eval_runtime": 5.8734,
"eval_samples_per_second": 18.729,
"eval_steps_per_second": 3.235,
"step": 2920
},
{
"epoch": 160.99,
"eval_gen_len": 6.6,
"eval_loss": 0.35366886854171753,
"eval_rouge1": 0.021,
"eval_rouge2": 0.0135,
"eval_rougeL": 0.0189,
"eval_rougeLsum": 0.0184,
"eval_runtime": 5.8665,
"eval_samples_per_second": 18.751,
"eval_steps_per_second": 3.239,
"step": 2938
},
{
"epoch": 161.97,
"eval_gen_len": 8.6818,
"eval_loss": 0.35154151916503906,
"eval_rouge1": 0.0438,
"eval_rouge2": 0.0277,
"eval_rougeL": 0.0401,
"eval_rougeLsum": 0.0401,
"eval_runtime": 5.8694,
"eval_samples_per_second": 18.741,
"eval_steps_per_second": 3.237,
"step": 2956
},
{
"epoch": 162.96,
"eval_gen_len": 8.2636,
"eval_loss": 0.3467026948928833,
"eval_rouge1": 0.0374,
"eval_rouge2": 0.023,
"eval_rougeL": 0.0346,
"eval_rougeLsum": 0.0341,
"eval_runtime": 5.8684,
"eval_samples_per_second": 18.745,
"eval_steps_per_second": 3.238,
"step": 2974
},
{
"epoch": 164.0,
"eval_gen_len": 7.4636,
"eval_loss": 0.343766987323761,
"eval_rouge1": 0.0284,
"eval_rouge2": 0.0184,
"eval_rougeL": 0.0267,
"eval_rougeLsum": 0.027,
"eval_runtime": 5.876,
"eval_samples_per_second": 18.72,
"eval_steps_per_second": 3.233,
"step": 2993
},
{
"epoch": 164.38,
"grad_norm": 0.7337635159492493,
"learning_rate": 1.3346666666666667e-05,
"loss": 0.5395,
"step": 3000
},
{
"epoch": 164.99,
"eval_gen_len": 8.9909,
"eval_loss": 0.3419288694858551,
"eval_rouge1": 0.0445,
"eval_rouge2": 0.0276,
"eval_rougeL": 0.0414,
"eval_rougeLsum": 0.0408,
"eval_runtime": 5.8707,
"eval_samples_per_second": 18.737,
"eval_steps_per_second": 3.236,
"step": 3011
},
{
"epoch": 165.97,
"eval_gen_len": 8.8909,
"eval_loss": 0.33906012773513794,
"eval_rouge1": 0.0446,
"eval_rouge2": 0.0294,
"eval_rougeL": 0.0415,
"eval_rougeLsum": 0.0412,
"eval_runtime": 5.8733,
"eval_samples_per_second": 18.729,
"eval_steps_per_second": 3.235,
"step": 3029
},
{
"epoch": 166.96,
"eval_gen_len": 9.5455,
"eval_loss": 0.3354407548904419,
"eval_rouge1": 0.0498,
"eval_rouge2": 0.032,
"eval_rougeL": 0.0452,
"eval_rougeLsum": 0.0453,
"eval_runtime": 5.8714,
"eval_samples_per_second": 18.735,
"eval_steps_per_second": 3.236,
"step": 3047
},
{
"epoch": 168.0,
"eval_gen_len": 10.0818,
"eval_loss": 0.33422771096229553,
"eval_rouge1": 0.0579,
"eval_rouge2": 0.0388,
"eval_rougeL": 0.0549,
"eval_rougeLsum": 0.0553,
"eval_runtime": 5.8768,
"eval_samples_per_second": 18.718,
"eval_steps_per_second": 3.233,
"step": 3066
},
{
"epoch": 168.99,
"eval_gen_len": 10.2,
"eval_loss": 0.331695020198822,
"eval_rouge1": 0.0542,
"eval_rouge2": 0.0353,
"eval_rougeL": 0.0508,
"eval_rougeLsum": 0.051,
"eval_runtime": 5.8711,
"eval_samples_per_second": 18.736,
"eval_steps_per_second": 3.236,
"step": 3084
},
{
"epoch": 169.97,
"eval_gen_len": 10.1364,
"eval_loss": 0.3284001052379608,
"eval_rouge1": 0.0555,
"eval_rouge2": 0.0363,
"eval_rougeL": 0.0523,
"eval_rougeLsum": 0.0521,
"eval_runtime": 5.8689,
"eval_samples_per_second": 18.743,
"eval_steps_per_second": 3.237,
"step": 3102
},
{
"epoch": 170.96,
"eval_gen_len": 10.6727,
"eval_loss": 0.32654786109924316,
"eval_rouge1": 0.0562,
"eval_rouge2": 0.0353,
"eval_rougeL": 0.0519,
"eval_rougeLsum": 0.0521,
"eval_runtime": 5.8762,
"eval_samples_per_second": 18.72,
"eval_steps_per_second": 3.233,
"step": 3120
},
{
"epoch": 172.0,
"eval_gen_len": 10.7818,
"eval_loss": 0.3229809105396271,
"eval_rouge1": 0.0551,
"eval_rouge2": 0.0346,
"eval_rougeL": 0.0523,
"eval_rougeLsum": 0.0525,
"eval_runtime": 5.8825,
"eval_samples_per_second": 18.699,
"eval_steps_per_second": 3.23,
"step": 3139
},
{
"epoch": 172.99,
"eval_gen_len": 11.3727,
"eval_loss": 0.3223954439163208,
"eval_rouge1": 0.0614,
"eval_rouge2": 0.0388,
"eval_rougeL": 0.0579,
"eval_rougeLsum": 0.0585,
"eval_runtime": 5.8822,
"eval_samples_per_second": 18.701,
"eval_steps_per_second": 3.23,
"step": 3157
},
{
"epoch": 173.97,
"eval_gen_len": 11.2909,
"eval_loss": 0.31792977452278137,
"eval_rouge1": 0.0584,
"eval_rouge2": 0.0328,
"eval_rougeL": 0.055,
"eval_rougeLsum": 0.0553,
"eval_runtime": 5.8754,
"eval_samples_per_second": 18.722,
"eval_steps_per_second": 3.234,
"step": 3175
},
{
"epoch": 174.96,
"eval_gen_len": 12.2455,
"eval_loss": 0.31657084822654724,
"eval_rouge1": 0.0649,
"eval_rouge2": 0.0392,
"eval_rougeL": 0.0615,
"eval_rougeLsum": 0.0619,
"eval_runtime": 5.9198,
"eval_samples_per_second": 18.582,
"eval_steps_per_second": 3.21,
"step": 3193
},
{
"epoch": 176.0,
"eval_gen_len": 11.7545,
"eval_loss": 0.3131771981716156,
"eval_rouge1": 0.0605,
"eval_rouge2": 0.0341,
"eval_rougeL": 0.0568,
"eval_rougeLsum": 0.0571,
"eval_runtime": 5.8941,
"eval_samples_per_second": 18.663,
"eval_steps_per_second": 3.224,
"step": 3212
},
{
"epoch": 176.99,
"eval_gen_len": 12.2364,
"eval_loss": 0.3109656870365143,
"eval_rouge1": 0.0639,
"eval_rouge2": 0.0396,
"eval_rougeL": 0.0614,
"eval_rougeLsum": 0.0619,
"eval_runtime": 5.8859,
"eval_samples_per_second": 18.689,
"eval_steps_per_second": 3.228,
"step": 3230
},
{
"epoch": 177.97,
"eval_gen_len": 11.9273,
"eval_loss": 0.3089582324028015,
"eval_rouge1": 0.0664,
"eval_rouge2": 0.04,
"eval_rougeL": 0.0619,
"eval_rougeLsum": 0.0626,
"eval_runtime": 5.873,
"eval_samples_per_second": 18.73,
"eval_steps_per_second": 3.235,
"step": 3248
},
{
"epoch": 178.96,
"eval_gen_len": 12.3364,
"eval_loss": 0.30663853883743286,
"eval_rouge1": 0.0721,
"eval_rouge2": 0.0455,
"eval_rougeL": 0.0685,
"eval_rougeLsum": 0.0688,
"eval_runtime": 5.8871,
"eval_samples_per_second": 18.685,
"eval_steps_per_second": 3.227,
"step": 3266
},
{
"epoch": 180.0,
"eval_gen_len": 12.6,
"eval_loss": 0.3049904704093933,
"eval_rouge1": 0.0727,
"eval_rouge2": 0.0447,
"eval_rougeL": 0.0691,
"eval_rougeLsum": 0.0693,
"eval_runtime": 5.8952,
"eval_samples_per_second": 18.659,
"eval_steps_per_second": 3.223,
"step": 3285
},
{
"epoch": 180.99,
"eval_gen_len": 12.2636,
"eval_loss": 0.30238986015319824,
"eval_rouge1": 0.0716,
"eval_rouge2": 0.0436,
"eval_rougeL": 0.0684,
"eval_rougeLsum": 0.0688,
"eval_runtime": 5.8902,
"eval_samples_per_second": 18.675,
"eval_steps_per_second": 3.226,
"step": 3303
},
{
"epoch": 181.97,
"eval_gen_len": 15.2,
"eval_loss": 0.29928120970726013,
"eval_rouge1": 0.0901,
"eval_rouge2": 0.0567,
"eval_rougeL": 0.0848,
"eval_rougeLsum": 0.0851,
"eval_runtime": 5.9062,
"eval_samples_per_second": 18.624,
"eval_steps_per_second": 3.217,
"step": 3321
},
{
"epoch": 182.96,
"eval_gen_len": 13.8182,
"eval_loss": 0.2973878085613251,
"eval_rouge1": 0.0874,
"eval_rouge2": 0.0583,
"eval_rougeL": 0.084,
"eval_rougeLsum": 0.0838,
"eval_runtime": 5.892,
"eval_samples_per_second": 18.67,
"eval_steps_per_second": 3.225,
"step": 3339
},
{
"epoch": 184.0,
"eval_gen_len": 14.8091,
"eval_loss": 0.29529863595962524,
"eval_rouge1": 0.0924,
"eval_rouge2": 0.0616,
"eval_rougeL": 0.0892,
"eval_rougeLsum": 0.0893,
"eval_runtime": 5.9216,
"eval_samples_per_second": 18.576,
"eval_steps_per_second": 3.209,
"step": 3358
},
{
"epoch": 184.99,
"eval_gen_len": 16.4,
"eval_loss": 0.29348161816596985,
"eval_rouge1": 0.1029,
"eval_rouge2": 0.0663,
"eval_rougeL": 0.0987,
"eval_rougeLsum": 0.0984,
"eval_runtime": 5.8929,
"eval_samples_per_second": 18.666,
"eval_steps_per_second": 3.224,
"step": 3376
},
{
"epoch": 185.97,
"eval_gen_len": 15.0364,
"eval_loss": 0.2902657389640808,
"eval_rouge1": 0.0923,
"eval_rouge2": 0.0603,
"eval_rougeL": 0.089,
"eval_rougeLsum": 0.0887,
"eval_runtime": 5.8988,
"eval_samples_per_second": 18.648,
"eval_steps_per_second": 3.221,
"step": 3394
},
{
"epoch": 186.96,
"eval_gen_len": 16.7,
"eval_loss": 0.2885950207710266,
"eval_rouge1": 0.1027,
"eval_rouge2": 0.0661,
"eval_rougeL": 0.0985,
"eval_rougeLsum": 0.099,
"eval_runtime": 5.9087,
"eval_samples_per_second": 18.617,
"eval_steps_per_second": 3.216,
"step": 3412
},
{
"epoch": 188.0,
"eval_gen_len": 15.0455,
"eval_loss": 0.2858668267726898,
"eval_rouge1": 0.0998,
"eval_rouge2": 0.0686,
"eval_rougeL": 0.0979,
"eval_rougeLsum": 0.0973,
"eval_runtime": 5.9128,
"eval_samples_per_second": 18.604,
"eval_steps_per_second": 3.213,
"step": 3431
},
{
"epoch": 188.99,
"eval_gen_len": 16.7,
"eval_loss": 0.28371575474739075,
"eval_rouge1": 0.1081,
"eval_rouge2": 0.0733,
"eval_rougeL": 0.105,
"eval_rougeLsum": 0.1051,
"eval_runtime": 5.9179,
"eval_samples_per_second": 18.588,
"eval_steps_per_second": 3.211,
"step": 3449
},
{
"epoch": 189.97,
"eval_gen_len": 16.9364,
"eval_loss": 0.28239956498146057,
"eval_rouge1": 0.1176,
"eval_rouge2": 0.0809,
"eval_rougeL": 0.1142,
"eval_rougeLsum": 0.1136,
"eval_runtime": 5.9392,
"eval_samples_per_second": 18.521,
"eval_steps_per_second": 3.199,
"step": 3467
},
{
"epoch": 190.96,
"eval_gen_len": 17.1545,
"eval_loss": 0.28112414479255676,
"eval_rouge1": 0.1281,
"eval_rouge2": 0.0923,
"eval_rougeL": 0.1251,
"eval_rougeLsum": 0.1247,
"eval_runtime": 5.9439,
"eval_samples_per_second": 18.506,
"eval_steps_per_second": 3.197,
"step": 3485
},
{
"epoch": 191.78,
"grad_norm": 0.6559975743293762,
"learning_rate": 1.2235555555555556e-05,
"loss": 0.4165,
"step": 3500
},
{
"epoch": 192.0,
"eval_gen_len": 16.9909,
"eval_loss": 0.2794179916381836,
"eval_rouge1": 0.1326,
"eval_rouge2": 0.0983,
"eval_rougeL": 0.1308,
"eval_rougeLsum": 0.1303,
"eval_runtime": 5.9583,
"eval_samples_per_second": 18.462,
"eval_steps_per_second": 3.189,
"step": 3504
},
{
"epoch": 192.99,
"eval_gen_len": 17.2818,
"eval_loss": 0.2769763767719269,
"eval_rouge1": 0.1377,
"eval_rouge2": 0.1021,
"eval_rougeL": 0.1352,
"eval_rougeLsum": 0.1352,
"eval_runtime": 5.9436,
"eval_samples_per_second": 18.507,
"eval_steps_per_second": 3.197,
"step": 3522
},
{
"epoch": 193.97,
"eval_gen_len": 16.6727,
"eval_loss": 0.27478569746017456,
"eval_rouge1": 0.1421,
"eval_rouge2": 0.1072,
"eval_rougeL": 0.1391,
"eval_rougeLsum": 0.1389,
"eval_runtime": 5.9492,
"eval_samples_per_second": 18.49,
"eval_steps_per_second": 3.194,
"step": 3540
},
{
"epoch": 194.96,
"eval_gen_len": 17.6273,
"eval_loss": 0.273343026638031,
"eval_rouge1": 0.1536,
"eval_rouge2": 0.1166,
"eval_rougeL": 0.1499,
"eval_rougeLsum": 0.15,
"eval_runtime": 5.9258,
"eval_samples_per_second": 18.563,
"eval_steps_per_second": 3.206,
"step": 3558
},
{
"epoch": 196.0,
"eval_gen_len": 17.6182,
"eval_loss": 0.27078884840011597,
"eval_rouge1": 0.1575,
"eval_rouge2": 0.1196,
"eval_rougeL": 0.1531,
"eval_rougeLsum": 0.1529,
"eval_runtime": 5.9196,
"eval_samples_per_second": 18.582,
"eval_steps_per_second": 3.21,
"step": 3577
},
{
"epoch": 196.99,
"eval_gen_len": 18.1727,
"eval_loss": 0.2679530382156372,
"eval_rouge1": 0.1544,
"eval_rouge2": 0.1152,
"eval_rougeL": 0.1492,
"eval_rougeLsum": 0.1492,
"eval_runtime": 5.9115,
"eval_samples_per_second": 18.608,
"eval_steps_per_second": 3.214,
"step": 3595
},
{
"epoch": 197.97,
"eval_gen_len": 17.6636,
"eval_loss": 0.26689696311950684,
"eval_rouge1": 0.1615,
"eval_rouge2": 0.1264,
"eval_rougeL": 0.1579,
"eval_rougeLsum": 0.1577,
"eval_runtime": 5.911,
"eval_samples_per_second": 18.609,
"eval_steps_per_second": 3.214,
"step": 3613
},
{
"epoch": 198.96,
"eval_gen_len": 18.3182,
"eval_loss": 0.26582667231559753,
"eval_rouge1": 0.1687,
"eval_rouge2": 0.1322,
"eval_rougeL": 0.165,
"eval_rougeLsum": 0.1646,
"eval_runtime": 5.9133,
"eval_samples_per_second": 18.602,
"eval_steps_per_second": 3.213,
"step": 3631
},
{
"epoch": 200.0,
"eval_gen_len": 18.3091,
"eval_loss": 0.262999951839447,
"eval_rouge1": 0.1753,
"eval_rouge2": 0.1408,
"eval_rougeL": 0.1726,
"eval_rougeLsum": 0.1721,
"eval_runtime": 5.9621,
"eval_samples_per_second": 18.45,
"eval_steps_per_second": 3.187,
"step": 3650
},
{
"epoch": 200.99,
"eval_gen_len": 18.6182,
"eval_loss": 0.2615664601325989,
"eval_rouge1": 0.1803,
"eval_rouge2": 0.1452,
"eval_rougeL": 0.1776,
"eval_rougeLsum": 0.1767,
"eval_runtime": 5.9566,
"eval_samples_per_second": 18.467,
"eval_steps_per_second": 3.19,
"step": 3668
},
{
"epoch": 201.97,
"eval_gen_len": 18.4818,
"eval_loss": 0.26064223051071167,
"eval_rouge1": 0.1883,
"eval_rouge2": 0.1542,
"eval_rougeL": 0.1857,
"eval_rougeLsum": 0.1854,
"eval_runtime": 5.9305,
"eval_samples_per_second": 18.548,
"eval_steps_per_second": 3.204,
"step": 3686
},
{
"epoch": 202.96,
"eval_gen_len": 18.6364,
"eval_loss": 0.2593740224838257,
"eval_rouge1": 0.1822,
"eval_rouge2": 0.1461,
"eval_rougeL": 0.179,
"eval_rougeLsum": 0.1784,
"eval_runtime": 5.9634,
"eval_samples_per_second": 18.446,
"eval_steps_per_second": 3.186,
"step": 3704
},
{
"epoch": 204.0,
"eval_gen_len": 18.5909,
"eval_loss": 0.2574484348297119,
"eval_rouge1": 0.1785,
"eval_rouge2": 0.1424,
"eval_rougeL": 0.1754,
"eval_rougeLsum": 0.1741,
"eval_runtime": 5.9326,
"eval_samples_per_second": 18.542,
"eval_steps_per_second": 3.203,
"step": 3723
},
{
"epoch": 204.99,
"eval_gen_len": 18.5909,
"eval_loss": 0.255962997674942,
"eval_rouge1": 0.182,
"eval_rouge2": 0.1475,
"eval_rougeL": 0.1799,
"eval_rougeLsum": 0.1789,
"eval_runtime": 5.9645,
"eval_samples_per_second": 18.442,
"eval_steps_per_second": 3.186,
"step": 3741
},
{
"epoch": 205.97,
"eval_gen_len": 18.3818,
"eval_loss": 0.25388580560684204,
"eval_rouge1": 0.1899,
"eval_rouge2": 0.1557,
"eval_rougeL": 0.1862,
"eval_rougeLsum": 0.1861,
"eval_runtime": 5.9231,
"eval_samples_per_second": 18.571,
"eval_steps_per_second": 3.208,
"step": 3759
},
{
"epoch": 206.96,
"eval_gen_len": 18.3727,
"eval_loss": 0.251926988363266,
"eval_rouge1": 0.1962,
"eval_rouge2": 0.1635,
"eval_rougeL": 0.1935,
"eval_rougeLsum": 0.1929,
"eval_runtime": 5.9351,
"eval_samples_per_second": 18.534,
"eval_steps_per_second": 3.201,
"step": 3777
},
{
"epoch": 208.0,
"eval_gen_len": 18.5818,
"eval_loss": 0.25200676918029785,
"eval_rouge1": 0.2002,
"eval_rouge2": 0.1666,
"eval_rougeL": 0.197,
"eval_rougeLsum": 0.1964,
"eval_runtime": 5.9349,
"eval_samples_per_second": 18.535,
"eval_steps_per_second": 3.201,
"step": 3796
},
{
"epoch": 208.99,
"eval_gen_len": 18.4182,
"eval_loss": 0.24954193830490112,
"eval_rouge1": 0.2016,
"eval_rouge2": 0.1708,
"eval_rougeL": 0.1997,
"eval_rougeLsum": 0.1994,
"eval_runtime": 5.9236,
"eval_samples_per_second": 18.57,
"eval_steps_per_second": 3.208,
"step": 3814
},
{
"epoch": 209.97,
"eval_gen_len": 18.5182,
"eval_loss": 0.2487880438566208,
"eval_rouge1": 0.2029,
"eval_rouge2": 0.172,
"eval_rougeL": 0.2008,
"eval_rougeLsum": 0.2005,
"eval_runtime": 5.9666,
"eval_samples_per_second": 18.436,
"eval_steps_per_second": 3.184,
"step": 3832
},
{
"epoch": 210.96,
"eval_gen_len": 18.7455,
"eval_loss": 0.246944397687912,
"eval_rouge1": 0.2126,
"eval_rouge2": 0.183,
"eval_rougeL": 0.2107,
"eval_rougeLsum": 0.2102,
"eval_runtime": 5.9334,
"eval_samples_per_second": 18.539,
"eval_steps_per_second": 3.202,
"step": 3850
},
{
"epoch": 212.0,
"eval_gen_len": 18.7455,
"eval_loss": 0.24503479897975922,
"eval_rouge1": 0.2153,
"eval_rouge2": 0.1832,
"eval_rougeL": 0.213,
"eval_rougeLsum": 0.2126,
"eval_runtime": 5.9384,
"eval_samples_per_second": 18.524,
"eval_steps_per_second": 3.2,
"step": 3869
},
{
"epoch": 212.99,
"eval_gen_len": 18.8,
"eval_loss": 0.24539507925510406,
"eval_rouge1": 0.2199,
"eval_rouge2": 0.1891,
"eval_rougeL": 0.2176,
"eval_rougeLsum": 0.2173,
"eval_runtime": 5.9326,
"eval_samples_per_second": 18.542,
"eval_steps_per_second": 3.203,
"step": 3887
},
{
"epoch": 213.97,
"eval_gen_len": 18.7455,
"eval_loss": 0.24339380860328674,
"eval_rouge1": 0.2253,
"eval_rouge2": 0.1972,
"eval_rougeL": 0.2239,
"eval_rougeLsum": 0.2238,
"eval_runtime": 5.9368,
"eval_samples_per_second": 18.529,
"eval_steps_per_second": 3.2,
"step": 3905
},
{
"epoch": 214.96,
"eval_gen_len": 18.8,
"eval_loss": 0.24222548305988312,
"eval_rouge1": 0.2245,
"eval_rouge2": 0.1953,
"eval_rougeL": 0.2234,
"eval_rougeLsum": 0.2227,
"eval_runtime": 5.9303,
"eval_samples_per_second": 18.549,
"eval_steps_per_second": 3.204,
"step": 3923
},
{
"epoch": 216.0,
"eval_gen_len": 18.8,
"eval_loss": 0.24038065969944,
"eval_rouge1": 0.2269,
"eval_rouge2": 0.1974,
"eval_rougeL": 0.2255,
"eval_rougeLsum": 0.2251,
"eval_runtime": 5.9423,
"eval_samples_per_second": 18.511,
"eval_steps_per_second": 3.197,
"step": 3942
},
{
"epoch": 216.99,
"eval_gen_len": 18.8,
"eval_loss": 0.24085824191570282,
"eval_rouge1": 0.2324,
"eval_rouge2": 0.203,
"eval_rougeL": 0.2305,
"eval_rougeLsum": 0.2302,
"eval_runtime": 5.9374,
"eval_samples_per_second": 18.527,
"eval_steps_per_second": 3.2,
"step": 3960
},
{
"epoch": 217.97,
"eval_gen_len": 18.8,
"eval_loss": 0.2390824556350708,
"eval_rouge1": 0.2328,
"eval_rouge2": 0.204,
"eval_rougeL": 0.2309,
"eval_rougeLsum": 0.2307,
"eval_runtime": 5.9716,
"eval_samples_per_second": 18.421,
"eval_steps_per_second": 3.182,
"step": 3978
},
{
"epoch": 218.96,
"eval_gen_len": 18.9636,
"eval_loss": 0.23841167986392975,
"eval_rouge1": 0.2394,
"eval_rouge2": 0.2109,
"eval_rougeL": 0.2381,
"eval_rougeLsum": 0.238,
"eval_runtime": 5.9528,
"eval_samples_per_second": 18.479,
"eval_steps_per_second": 3.192,
"step": 3996
},
{
"epoch": 219.18,
"grad_norm": 1.037503957748413,
"learning_rate": 1.1124444444444444e-05,
"loss": 0.3439,
"step": 4000
},
{
"epoch": 220.0,
"eval_gen_len": 18.9636,
"eval_loss": 0.2358517199754715,
"eval_rouge1": 0.2413,
"eval_rouge2": 0.2128,
"eval_rougeL": 0.2404,
"eval_rougeLsum": 0.2403,
"eval_runtime": 5.9869,
"eval_samples_per_second": 18.373,
"eval_steps_per_second": 3.174,
"step": 4015
},
{
"epoch": 220.99,
"eval_gen_len": 18.9636,
"eval_loss": 0.2353217452764511,
"eval_rouge1": 0.2411,
"eval_rouge2": 0.2122,
"eval_rougeL": 0.2401,
"eval_rougeLsum": 0.2398,
"eval_runtime": 5.9744,
"eval_samples_per_second": 18.412,
"eval_steps_per_second": 3.18,
"step": 4033
},
{
"epoch": 221.97,
"eval_gen_len": 18.9636,
"eval_loss": 0.23452366888523102,
"eval_rouge1": 0.2423,
"eval_rouge2": 0.2131,
"eval_rougeL": 0.2414,
"eval_rougeLsum": 0.2409,
"eval_runtime": 5.9345,
"eval_samples_per_second": 18.536,
"eval_steps_per_second": 3.202,
"step": 4051
},
{
"epoch": 222.96,
"eval_gen_len": 18.9636,
"eval_loss": 0.23285672068595886,
"eval_rouge1": 0.2417,
"eval_rouge2": 0.2133,
"eval_rougeL": 0.2407,
"eval_rougeLsum": 0.2404,
"eval_runtime": 5.9399,
"eval_samples_per_second": 18.519,
"eval_steps_per_second": 3.199,
"step": 4069
},
{
"epoch": 224.0,
"eval_gen_len": 18.9636,
"eval_loss": 0.23224669694900513,
"eval_rouge1": 0.2439,
"eval_rouge2": 0.2166,
"eval_rougeL": 0.2434,
"eval_rougeLsum": 0.2431,
"eval_runtime": 5.9565,
"eval_samples_per_second": 18.467,
"eval_steps_per_second": 3.19,
"step": 4088
},
{
"epoch": 224.99,
"eval_gen_len": 18.9636,
"eval_loss": 0.231593519449234,
"eval_rouge1": 0.2416,
"eval_rouge2": 0.2138,
"eval_rougeL": 0.2413,
"eval_rougeLsum": 0.2409,
"eval_runtime": 5.9359,
"eval_samples_per_second": 18.531,
"eval_steps_per_second": 3.201,
"step": 4106
},
{
"epoch": 225.97,
"eval_gen_len": 18.9636,
"eval_loss": 0.23053088784217834,
"eval_rouge1": 0.2423,
"eval_rouge2": 0.2147,
"eval_rougeL": 0.242,
"eval_rougeLsum": 0.2414,
"eval_runtime": 5.9359,
"eval_samples_per_second": 18.531,
"eval_steps_per_second": 3.201,
"step": 4124
},
{
"epoch": 226.96,
"eval_gen_len": 18.9636,
"eval_loss": 0.22999462485313416,
"eval_rouge1": 0.243,
"eval_rouge2": 0.215,
"eval_rougeL": 0.2427,
"eval_rougeLsum": 0.242,
"eval_runtime": 5.939,
"eval_samples_per_second": 18.522,
"eval_steps_per_second": 3.199,
"step": 4142
},
{
"epoch": 228.0,
"eval_gen_len": 18.9182,
"eval_loss": 0.22881445288658142,
"eval_rouge1": 0.2472,
"eval_rouge2": 0.2201,
"eval_rougeL": 0.2469,
"eval_rougeLsum": 0.2466,
"eval_runtime": 5.9542,
"eval_samples_per_second": 18.474,
"eval_steps_per_second": 3.191,
"step": 4161
},
{
"epoch": 228.99,
"eval_gen_len": 18.9636,
"eval_loss": 0.2282283455133438,
"eval_rouge1": 0.247,
"eval_rouge2": 0.2195,
"eval_rougeL": 0.2468,
"eval_rougeLsum": 0.2464,
"eval_runtime": 5.9427,
"eval_samples_per_second": 18.51,
"eval_steps_per_second": 3.197,
"step": 4179
},
{
"epoch": 229.97,
"eval_gen_len": 18.9182,
"eval_loss": 0.22686214745044708,
"eval_rouge1": 0.2437,
"eval_rouge2": 0.2163,
"eval_rougeL": 0.2436,
"eval_rougeLsum": 0.2434,
"eval_runtime": 5.9383,
"eval_samples_per_second": 18.524,
"eval_steps_per_second": 3.2,
"step": 4197
},
{
"epoch": 230.96,
"eval_gen_len": 18.9636,
"eval_loss": 0.22540777921676636,
"eval_rouge1": 0.2485,
"eval_rouge2": 0.2218,
"eval_rougeL": 0.2484,
"eval_rougeLsum": 0.2478,
"eval_runtime": 5.9379,
"eval_samples_per_second": 18.525,
"eval_steps_per_second": 3.2,
"step": 4215
},
{
"epoch": 232.0,
"eval_gen_len": 18.7455,
"eval_loss": 0.22596728801727295,
"eval_rouge1": 0.2458,
"eval_rouge2": 0.2194,
"eval_rougeL": 0.2456,
"eval_rougeLsum": 0.2455,
"eval_runtime": 5.9457,
"eval_samples_per_second": 18.501,
"eval_steps_per_second": 3.196,
"step": 4234
},
{
"epoch": 232.99,
"eval_gen_len": 18.6182,
"eval_loss": 0.22396238148212433,
"eval_rouge1": 0.2482,
"eval_rouge2": 0.2227,
"eval_rougeL": 0.2481,
"eval_rougeLsum": 0.248,
"eval_runtime": 5.9411,
"eval_samples_per_second": 18.515,
"eval_steps_per_second": 3.198,
"step": 4252
},
{
"epoch": 233.97,
"eval_gen_len": 18.6182,
"eval_loss": 0.22270821034908295,
"eval_rouge1": 0.2442,
"eval_rouge2": 0.2178,
"eval_rougeL": 0.2438,
"eval_rougeLsum": 0.2435,
"eval_runtime": 5.9443,
"eval_samples_per_second": 18.505,
"eval_steps_per_second": 3.196,
"step": 4270
},
{
"epoch": 234.96,
"eval_gen_len": 18.6182,
"eval_loss": 0.222365602850914,
"eval_rouge1": 0.2491,
"eval_rouge2": 0.2241,
"eval_rougeL": 0.2487,
"eval_rougeLsum": 0.2488,
"eval_runtime": 5.9399,
"eval_samples_per_second": 18.519,
"eval_steps_per_second": 3.199,
"step": 4288
},
{
"epoch": 236.0,
"eval_gen_len": 18.6182,
"eval_loss": 0.22221778333187103,
"eval_rouge1": 0.2486,
"eval_rouge2": 0.2233,
"eval_rougeL": 0.2484,
"eval_rougeLsum": 0.2484,
"eval_runtime": 5.9418,
"eval_samples_per_second": 18.513,
"eval_steps_per_second": 3.198,
"step": 4307
},
{
"epoch": 236.99,
"eval_gen_len": 18.2727,
"eval_loss": 0.2206820845603943,
"eval_rouge1": 0.2443,
"eval_rouge2": 0.219,
"eval_rougeL": 0.2434,
"eval_rougeLsum": 0.2434,
"eval_runtime": 5.9282,
"eval_samples_per_second": 18.555,
"eval_steps_per_second": 3.205,
"step": 4325
},
{
"epoch": 237.97,
"eval_gen_len": 17.4091,
"eval_loss": 0.22046072781085968,
"eval_rouge1": 0.2327,
"eval_rouge2": 0.2091,
"eval_rougeL": 0.2321,
"eval_rougeLsum": 0.2325,
"eval_runtime": 5.9336,
"eval_samples_per_second": 18.539,
"eval_steps_per_second": 3.202,
"step": 4343
},
{
"epoch": 238.96,
"eval_gen_len": 15.1636,
"eval_loss": 0.21863390505313873,
"eval_rouge1": 0.1989,
"eval_rouge2": 0.1774,
"eval_rougeL": 0.1978,
"eval_rougeLsum": 0.1979,
"eval_runtime": 5.9311,
"eval_samples_per_second": 18.546,
"eval_steps_per_second": 3.203,
"step": 4361
},
{
"epoch": 240.0,
"eval_gen_len": 16.1909,
"eval_loss": 0.2192383110523224,
"eval_rouge1": 0.2148,
"eval_rouge2": 0.1923,
"eval_rougeL": 0.2137,
"eval_rougeLsum": 0.2144,
"eval_runtime": 5.9365,
"eval_samples_per_second": 18.529,
"eval_steps_per_second": 3.201,
"step": 4380
},
{
"epoch": 240.99,
"eval_gen_len": 13.0909,
"eval_loss": 0.2176760584115982,
"eval_rouge1": 0.1742,
"eval_rouge2": 0.1548,
"eval_rougeL": 0.1725,
"eval_rougeLsum": 0.1723,
"eval_runtime": 5.9161,
"eval_samples_per_second": 18.593,
"eval_steps_per_second": 3.212,
"step": 4398
},
{
"epoch": 241.97,
"eval_gen_len": 11.7091,
"eval_loss": 0.21764642000198364,
"eval_rouge1": 0.1541,
"eval_rouge2": 0.1376,
"eval_rougeL": 0.1529,
"eval_rougeLsum": 0.1537,
"eval_runtime": 5.9162,
"eval_samples_per_second": 18.593,
"eval_steps_per_second": 3.212,
"step": 4416
},
{
"epoch": 242.96,
"eval_gen_len": 12.9,
"eval_loss": 0.21736116707324982,
"eval_rouge1": 0.1671,
"eval_rouge2": 0.1495,
"eval_rougeL": 0.1661,
"eval_rougeLsum": 0.1671,
"eval_runtime": 5.9221,
"eval_samples_per_second": 18.574,
"eval_steps_per_second": 3.208,
"step": 4434
},
{
"epoch": 244.0,
"eval_gen_len": 10.4818,
"eval_loss": 0.21571263670921326,
"eval_rouge1": 0.1364,
"eval_rouge2": 0.1201,
"eval_rougeL": 0.1354,
"eval_rougeLsum": 0.135,
"eval_runtime": 5.9422,
"eval_samples_per_second": 18.512,
"eval_steps_per_second": 3.197,
"step": 4453
},
{
"epoch": 244.99,
"eval_gen_len": 8.9273,
"eval_loss": 0.21506664156913757,
"eval_rouge1": 0.1149,
"eval_rouge2": 0.101,
"eval_rougeL": 0.1133,
"eval_rougeLsum": 0.1136,
"eval_runtime": 5.9071,
"eval_samples_per_second": 18.622,
"eval_steps_per_second": 3.216,
"step": 4471
},
{
"epoch": 245.97,
"eval_gen_len": 7.7182,
"eval_loss": 0.21382498741149902,
"eval_rouge1": 0.0989,
"eval_rouge2": 0.0866,
"eval_rougeL": 0.0976,
"eval_rougeLsum": 0.0981,
"eval_runtime": 5.9086,
"eval_samples_per_second": 18.617,
"eval_steps_per_second": 3.216,
"step": 4489
},
{
"epoch": 246.58,
"grad_norm": 0.4568144977092743,
"learning_rate": 1.0013333333333335e-05,
"loss": 0.2977,
"step": 4500
},
{
"epoch": 246.96,
"eval_gen_len": 7.3727,
"eval_loss": 0.21434056758880615,
"eval_rouge1": 0.0942,
"eval_rouge2": 0.0823,
"eval_rougeL": 0.093,
"eval_rougeLsum": 0.0937,
"eval_runtime": 5.9102,
"eval_samples_per_second": 18.612,
"eval_steps_per_second": 3.215,
"step": 4507
},
{
"epoch": 248.0,
"eval_gen_len": 6.8636,
"eval_loss": 0.2125895619392395,
"eval_rouge1": 0.0884,
"eval_rouge2": 0.0777,
"eval_rougeL": 0.0876,
"eval_rougeLsum": 0.0884,
"eval_runtime": 5.9078,
"eval_samples_per_second": 18.619,
"eval_steps_per_second": 3.216,
"step": 4526
},
{
"epoch": 248.99,
"eval_gen_len": 6.6909,
"eval_loss": 0.21264444291591644,
"eval_rouge1": 0.0856,
"eval_rouge2": 0.0753,
"eval_rougeL": 0.0851,
"eval_rougeLsum": 0.0853,
"eval_runtime": 5.9128,
"eval_samples_per_second": 18.604,
"eval_steps_per_second": 3.213,
"step": 4544
},
{
"epoch": 249.97,
"eval_gen_len": 6.8636,
"eval_loss": 0.2110782116651535,
"eval_rouge1": 0.0871,
"eval_rouge2": 0.0764,
"eval_rougeL": 0.0865,
"eval_rougeLsum": 0.0866,
"eval_runtime": 5.911,
"eval_samples_per_second": 18.609,
"eval_steps_per_second": 3.214,
"step": 4562
},
{
"epoch": 250.96,
"eval_gen_len": 6.3455,
"eval_loss": 0.2114747315645218,
"eval_rouge1": 0.0813,
"eval_rouge2": 0.071,
"eval_rougeL": 0.0805,
"eval_rougeLsum": 0.0808,
"eval_runtime": 5.9048,
"eval_samples_per_second": 18.629,
"eval_steps_per_second": 3.218,
"step": 4580
},
{
"epoch": 252.0,
"eval_gen_len": 5.1364,
"eval_loss": 0.21088837087154388,
"eval_rouge1": 0.0658,
"eval_rouge2": 0.0587,
"eval_rougeL": 0.0647,
"eval_rougeLsum": 0.0656,
"eval_runtime": 5.9122,
"eval_samples_per_second": 18.606,
"eval_steps_per_second": 3.214,
"step": 4599
},
{
"epoch": 252.99,
"eval_gen_len": 4.1,
"eval_loss": 0.21026724576950073,
"eval_rouge1": 0.0525,
"eval_rouge2": 0.0474,
"eval_rougeL": 0.0523,
"eval_rougeLsum": 0.0531,
"eval_runtime": 5.8924,
"eval_samples_per_second": 18.668,
"eval_steps_per_second": 3.225,
"step": 4617
},
{
"epoch": 253.97,
"eval_gen_len": 3.4091,
"eval_loss": 0.20923300087451935,
"eval_rouge1": 0.0422,
"eval_rouge2": 0.0371,
"eval_rougeL": 0.0417,
"eval_rougeLsum": 0.0427,
"eval_runtime": 5.9069,
"eval_samples_per_second": 18.622,
"eval_steps_per_second": 3.217,
"step": 4635
},
{
"epoch": 254.96,
"eval_gen_len": 3.4091,
"eval_loss": 0.20883551239967346,
"eval_rouge1": 0.0425,
"eval_rouge2": 0.0373,
"eval_rougeL": 0.0421,
"eval_rougeLsum": 0.0432,
"eval_runtime": 5.8899,
"eval_samples_per_second": 18.676,
"eval_steps_per_second": 3.226,
"step": 4653
},
{
"epoch": 256.0,
"eval_gen_len": 3.0545,
"eval_loss": 0.20799440145492554,
"eval_rouge1": 0.0383,
"eval_rouge2": 0.0339,
"eval_rougeL": 0.0381,
"eval_rougeLsum": 0.0386,
"eval_runtime": 5.8948,
"eval_samples_per_second": 18.66,
"eval_steps_per_second": 3.223,
"step": 4672
},
{
"epoch": 256.99,
"eval_gen_len": 2.0364,
"eval_loss": 0.20746435225009918,
"eval_rouge1": 0.0256,
"eval_rouge2": 0.0228,
"eval_rougeL": 0.0255,
"eval_rougeLsum": 0.0259,
"eval_runtime": 5.8826,
"eval_samples_per_second": 18.699,
"eval_steps_per_second": 3.23,
"step": 4690
},
{
"epoch": 257.97,
"eval_gen_len": 2.0364,
"eval_loss": 0.2079101800918579,
"eval_rouge1": 0.026,
"eval_rouge2": 0.0231,
"eval_rougeL": 0.0258,
"eval_rougeLsum": 0.0263,
"eval_runtime": 5.9214,
"eval_samples_per_second": 18.577,
"eval_steps_per_second": 3.209,
"step": 4708
},
{
"epoch": 258.96,
"eval_gen_len": 2.0182,
"eval_loss": 0.20639775693416595,
"eval_rouge1": 0.0232,
"eval_rouge2": 0.0203,
"eval_rougeL": 0.0231,
"eval_rougeLsum": 0.0233,
"eval_runtime": 5.9298,
"eval_samples_per_second": 18.55,
"eval_steps_per_second": 3.204,
"step": 4726
},
{
"epoch": 260.0,
"eval_gen_len": 2.0182,
"eval_loss": 0.20615024864673615,
"eval_rouge1": 0.0238,
"eval_rouge2": 0.0202,
"eval_rougeL": 0.0237,
"eval_rougeLsum": 0.0238,
"eval_runtime": 5.9324,
"eval_samples_per_second": 18.542,
"eval_steps_per_second": 3.203,
"step": 4745
},
{
"epoch": 260.99,
"eval_gen_len": 1.7091,
"eval_loss": 0.20586760342121124,
"eval_rouge1": 0.02,
"eval_rouge2": 0.0175,
"eval_rougeL": 0.0202,
"eval_rougeLsum": 0.0201,
"eval_runtime": 5.8796,
"eval_samples_per_second": 18.709,
"eval_steps_per_second": 3.231,
"step": 4763
},
{
"epoch": 261.97,
"eval_gen_len": 0.6909,
"eval_loss": 0.20486456155776978,
"eval_rouge1": 0.0091,
"eval_rouge2": 0.0079,
"eval_rougeL": 0.0091,
"eval_rougeLsum": 0.0091,
"eval_runtime": 5.8759,
"eval_samples_per_second": 18.721,
"eval_steps_per_second": 3.234,
"step": 4781
},
{
"epoch": 262.96,
"eval_gen_len": 0.6909,
"eval_loss": 0.20466168224811554,
"eval_rouge1": 0.0091,
"eval_rouge2": 0.0079,
"eval_rougeL": 0.0091,
"eval_rougeLsum": 0.0091,
"eval_runtime": 5.9163,
"eval_samples_per_second": 18.593,
"eval_steps_per_second": 3.211,
"step": 4799
},
{
"epoch": 264.0,
"eval_gen_len": 0.6818,
"eval_loss": 0.20416179299354553,
"eval_rouge1": 0.0082,
"eval_rouge2": 0.0071,
"eval_rougeL": 0.0081,
"eval_rougeLsum": 0.0082,
"eval_runtime": 5.8989,
"eval_samples_per_second": 18.647,
"eval_steps_per_second": 3.221,
"step": 4818
},
{
"epoch": 264.99,
"eval_gen_len": 0.3364,
"eval_loss": 0.20311486721038818,
"eval_rouge1": 0.0044,
"eval_rouge2": 0.0038,
"eval_rougeL": 0.0044,
"eval_rougeLsum": 0.0046,
"eval_runtime": 5.8639,
"eval_samples_per_second": 18.759,
"eval_steps_per_second": 3.24,
"step": 4836
},
{
"epoch": 265.97,
"eval_gen_len": 0.3455,
"eval_loss": 0.20284703373908997,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.874,
"eval_samples_per_second": 18.726,
"eval_steps_per_second": 3.235,
"step": 4854
},
{
"epoch": 266.96,
"eval_gen_len": 0.3455,
"eval_loss": 0.20207703113555908,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.871,
"eval_samples_per_second": 18.736,
"eval_steps_per_second": 3.236,
"step": 4872
},
{
"epoch": 268.0,
"eval_gen_len": 0.3455,
"eval_loss": 0.20189516246318817,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.8784,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 4891
},
{
"epoch": 268.99,
"eval_gen_len": 0.3455,
"eval_loss": 0.20159202814102173,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.8675,
"eval_samples_per_second": 18.747,
"eval_steps_per_second": 3.238,
"step": 4909
},
{
"epoch": 269.97,
"eval_gen_len": 0.3455,
"eval_loss": 0.20119339227676392,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.8693,
"eval_samples_per_second": 18.741,
"eval_steps_per_second": 3.237,
"step": 4927
},
{
"epoch": 270.96,
"eval_gen_len": 0.3455,
"eval_loss": 0.20063255727291107,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.8634,
"eval_samples_per_second": 18.76,
"eval_steps_per_second": 3.24,
"step": 4945
},
{
"epoch": 272.0,
"eval_gen_len": 0.3455,
"eval_loss": 0.20042632520198822,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.9002,
"eval_samples_per_second": 18.644,
"eval_steps_per_second": 3.22,
"step": 4964
},
{
"epoch": 272.99,
"eval_gen_len": 0.3455,
"eval_loss": 0.19937776029109955,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.866,
"eval_samples_per_second": 18.752,
"eval_steps_per_second": 3.239,
"step": 4982
},
{
"epoch": 273.97,
"grad_norm": 0.47718000411987305,
"learning_rate": 8.902222222222224e-06,
"loss": 0.2666,
"step": 5000
},
{
"epoch": 273.97,
"eval_gen_len": 0.5182,
"eval_loss": 0.199092298746109,
"eval_rouge1": 0.008,
"eval_rouge2": 0.0071,
"eval_rougeL": 0.008,
"eval_rougeLsum": 0.008,
"eval_runtime": 5.8818,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 5000
},
{
"epoch": 274.96,
"eval_gen_len": 0.5182,
"eval_loss": 0.1990521252155304,
"eval_rouge1": 0.008,
"eval_rouge2": 0.0071,
"eval_rougeL": 0.008,
"eval_rougeLsum": 0.008,
"eval_runtime": 5.8794,
"eval_samples_per_second": 18.709,
"eval_steps_per_second": 3.232,
"step": 5018
},
{
"epoch": 276.0,
"eval_gen_len": 0.5182,
"eval_loss": 0.19846394658088684,
"eval_rouge1": 0.008,
"eval_rouge2": 0.0071,
"eval_rougeL": 0.008,
"eval_rougeLsum": 0.008,
"eval_runtime": 5.8793,
"eval_samples_per_second": 18.71,
"eval_steps_per_second": 3.232,
"step": 5037
},
{
"epoch": 276.99,
"eval_gen_len": 0.5182,
"eval_loss": 0.1975831538438797,
"eval_rouge1": 0.008,
"eval_rouge2": 0.0071,
"eval_rougeL": 0.008,
"eval_rougeLsum": 0.008,
"eval_runtime": 5.8684,
"eval_samples_per_second": 18.744,
"eval_steps_per_second": 3.238,
"step": 5055
},
{
"epoch": 277.97,
"eval_gen_len": 0.1727,
"eval_loss": 0.19685131311416626,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0024,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0025,
"eval_runtime": 5.8685,
"eval_samples_per_second": 18.744,
"eval_steps_per_second": 3.238,
"step": 5073
},
{
"epoch": 278.96,
"eval_gen_len": 0.1727,
"eval_loss": 0.19685351848602295,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0024,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0025,
"eval_runtime": 5.87,
"eval_samples_per_second": 18.739,
"eval_steps_per_second": 3.237,
"step": 5091
},
{
"epoch": 280.0,
"eval_gen_len": 0.3455,
"eval_loss": 0.19706358015537262,
"eval_rouge1": 0.0057,
"eval_rouge2": 0.0051,
"eval_rougeL": 0.0057,
"eval_rougeLsum": 0.0057,
"eval_runtime": 5.8807,
"eval_samples_per_second": 18.705,
"eval_steps_per_second": 3.231,
"step": 5110
},
{
"epoch": 280.99,
"eval_gen_len": 0.1727,
"eval_loss": 0.19580155611038208,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0024,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0025,
"eval_runtime": 5.8696,
"eval_samples_per_second": 18.741,
"eval_steps_per_second": 3.237,
"step": 5128
},
{
"epoch": 281.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1954393833875656,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8668,
"eval_samples_per_second": 18.75,
"eval_steps_per_second": 3.239,
"step": 5146
},
{
"epoch": 282.96,
"eval_gen_len": 0.0,
"eval_loss": 0.19552475214004517,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8798,
"eval_samples_per_second": 18.708,
"eval_steps_per_second": 3.231,
"step": 5164
},
{
"epoch": 284.0,
"eval_gen_len": 0.1727,
"eval_loss": 0.1950557678937912,
"eval_rouge1": 0.0025,
"eval_rouge2": 0.0024,
"eval_rougeL": 0.0025,
"eval_rougeLsum": 0.0025,
"eval_runtime": 5.8764,
"eval_samples_per_second": 18.719,
"eval_steps_per_second": 3.233,
"step": 5183
},
{
"epoch": 284.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1939947009086609,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8768,
"eval_samples_per_second": 18.718,
"eval_steps_per_second": 3.233,
"step": 5201
},
{
"epoch": 285.97,
"eval_gen_len": 0.0,
"eval_loss": 0.19393964111804962,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8764,
"eval_samples_per_second": 18.719,
"eval_steps_per_second": 3.233,
"step": 5219
},
{
"epoch": 286.96,
"eval_gen_len": 0.0,
"eval_loss": 0.19383706152439117,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8701,
"eval_samples_per_second": 18.739,
"eval_steps_per_second": 3.237,
"step": 5237
},
{
"epoch": 288.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1930641084909439,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8828,
"eval_samples_per_second": 18.699,
"eval_steps_per_second": 3.23,
"step": 5256
},
{
"epoch": 288.99,
"eval_gen_len": 0.0,
"eval_loss": 0.19216330349445343,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8721,
"eval_samples_per_second": 18.733,
"eval_steps_per_second": 3.236,
"step": 5274
},
{
"epoch": 289.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1919858455657959,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.886,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 3.228,
"step": 5292
},
{
"epoch": 290.96,
"eval_gen_len": 0.0,
"eval_loss": 0.19181759655475616,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8804,
"eval_samples_per_second": 18.706,
"eval_steps_per_second": 3.231,
"step": 5310
},
{
"epoch": 292.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1913154274225235,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8887,
"eval_samples_per_second": 18.68,
"eval_steps_per_second": 3.227,
"step": 5329
},
{
"epoch": 292.99,
"eval_gen_len": 0.0,
"eval_loss": 0.19096632301807404,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8809,
"eval_samples_per_second": 18.704,
"eval_steps_per_second": 3.231,
"step": 5347
},
{
"epoch": 293.97,
"eval_gen_len": 0.0,
"eval_loss": 0.19034302234649658,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.886,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 3.228,
"step": 5365
},
{
"epoch": 294.96,
"eval_gen_len": 0.0,
"eval_loss": 0.18984819948673248,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8905,
"eval_samples_per_second": 18.674,
"eval_steps_per_second": 3.226,
"step": 5383
},
{
"epoch": 296.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1901622861623764,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8956,
"eval_samples_per_second": 18.658,
"eval_steps_per_second": 3.223,
"step": 5402
},
{
"epoch": 296.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18923012912273407,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8738,
"eval_samples_per_second": 18.727,
"eval_steps_per_second": 3.235,
"step": 5420
},
{
"epoch": 297.97,
"eval_gen_len": 0.0,
"eval_loss": 0.18847127258777618,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8804,
"eval_samples_per_second": 18.706,
"eval_steps_per_second": 3.231,
"step": 5438
},
{
"epoch": 298.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1884116530418396,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8771,
"eval_samples_per_second": 18.717,
"eval_steps_per_second": 3.233,
"step": 5456
},
{
"epoch": 300.0,
"eval_gen_len": 0.0,
"eval_loss": 0.18776828050613403,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8882,
"eval_samples_per_second": 18.681,
"eval_steps_per_second": 3.227,
"step": 5475
},
{
"epoch": 300.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18751147389411926,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8894,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 5493
},
{
"epoch": 301.37,
"grad_norm": 0.6001901030540466,
"learning_rate": 7.791111111111111e-06,
"loss": 0.2463,
"step": 5500
},
{
"epoch": 301.97,
"eval_gen_len": 0.0,
"eval_loss": 0.18754757940769196,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8862,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 3.228,
"step": 5511
},
{
"epoch": 302.96,
"eval_gen_len": 0.0,
"eval_loss": 0.18714185059070587,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8804,
"eval_samples_per_second": 18.706,
"eval_steps_per_second": 3.231,
"step": 5529
},
{
"epoch": 304.0,
"eval_gen_len": 0.0,
"eval_loss": 0.18668492138385773,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8924,
"eval_samples_per_second": 18.668,
"eval_steps_per_second": 3.225,
"step": 5548
},
{
"epoch": 304.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18612505495548248,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8787,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 5566
},
{
"epoch": 305.97,
"eval_gen_len": 0.0,
"eval_loss": 0.18622124195098877,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8963,
"eval_samples_per_second": 18.656,
"eval_steps_per_second": 3.222,
"step": 5584
},
{
"epoch": 306.96,
"eval_gen_len": 0.0,
"eval_loss": 0.18581855297088623,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8851,
"eval_samples_per_second": 18.691,
"eval_steps_per_second": 3.228,
"step": 5602
},
{
"epoch": 308.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1851491779088974,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8834,
"eval_samples_per_second": 18.697,
"eval_steps_per_second": 3.229,
"step": 5621
},
{
"epoch": 308.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18541742861270905,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8802,
"eval_samples_per_second": 18.707,
"eval_steps_per_second": 3.231,
"step": 5639
},
{
"epoch": 309.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1846253126859665,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.88,
"eval_samples_per_second": 18.708,
"eval_steps_per_second": 3.231,
"step": 5657
},
{
"epoch": 310.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1842205971479416,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8908,
"eval_samples_per_second": 18.673,
"eval_steps_per_second": 3.225,
"step": 5675
},
{
"epoch": 312.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1838139295578003,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8826,
"eval_samples_per_second": 18.699,
"eval_steps_per_second": 3.23,
"step": 5694
},
{
"epoch": 312.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18346160650253296,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8686,
"eval_samples_per_second": 18.744,
"eval_steps_per_second": 3.238,
"step": 5712
},
{
"epoch": 313.97,
"eval_gen_len": 0.0,
"eval_loss": 0.18300552666187286,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8811,
"eval_samples_per_second": 18.704,
"eval_steps_per_second": 3.231,
"step": 5730
},
{
"epoch": 314.96,
"eval_gen_len": 0.0,
"eval_loss": 0.18304497003555298,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8736,
"eval_samples_per_second": 18.728,
"eval_steps_per_second": 3.235,
"step": 5748
},
{
"epoch": 316.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1824423372745514,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9117,
"eval_samples_per_second": 18.607,
"eval_steps_per_second": 3.214,
"step": 5767
},
{
"epoch": 316.99,
"eval_gen_len": 0.0,
"eval_loss": 0.18214993178844452,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8772,
"eval_samples_per_second": 18.716,
"eval_steps_per_second": 3.233,
"step": 5785
},
{
"epoch": 317.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1819440871477127,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8707,
"eval_samples_per_second": 18.737,
"eval_steps_per_second": 3.236,
"step": 5803
},
{
"epoch": 318.96,
"eval_gen_len": 0.0,
"eval_loss": 0.18124094605445862,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9162,
"eval_samples_per_second": 18.593,
"eval_steps_per_second": 3.212,
"step": 5821
},
{
"epoch": 320.0,
"eval_gen_len": 0.0,
"eval_loss": 0.18139633536338806,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.874,
"eval_samples_per_second": 18.726,
"eval_steps_per_second": 3.235,
"step": 5840
},
{
"epoch": 320.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1810206174850464,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8919,
"eval_samples_per_second": 18.67,
"eval_steps_per_second": 3.225,
"step": 5858
},
{
"epoch": 321.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1808764487504959,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9173,
"eval_samples_per_second": 18.589,
"eval_steps_per_second": 3.211,
"step": 5876
},
{
"epoch": 322.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1801535189151764,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9176,
"eval_samples_per_second": 18.589,
"eval_steps_per_second": 3.211,
"step": 5894
},
{
"epoch": 324.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1799170821905136,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9289,
"eval_samples_per_second": 18.553,
"eval_steps_per_second": 3.205,
"step": 5913
},
{
"epoch": 324.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1797485649585724,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9221,
"eval_samples_per_second": 18.575,
"eval_steps_per_second": 3.208,
"step": 5931
},
{
"epoch": 325.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17966806888580322,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9096,
"eval_samples_per_second": 18.614,
"eval_steps_per_second": 3.215,
"step": 5949
},
{
"epoch": 326.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17945848405361176,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8715,
"eval_samples_per_second": 18.735,
"eval_steps_per_second": 3.236,
"step": 5967
},
{
"epoch": 328.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17859123647212982,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8842,
"eval_samples_per_second": 18.694,
"eval_steps_per_second": 3.229,
"step": 5986
},
{
"epoch": 328.77,
"grad_norm": 0.45411407947540283,
"learning_rate": 6.680000000000001e-06,
"loss": 0.2311,
"step": 6000
},
{
"epoch": 328.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17845258116722107,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8879,
"eval_samples_per_second": 18.682,
"eval_steps_per_second": 3.227,
"step": 6004
},
{
"epoch": 329.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17820703983306885,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8859,
"eval_samples_per_second": 18.689,
"eval_steps_per_second": 3.228,
"step": 6022
},
{
"epoch": 330.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17832966148853302,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8753,
"eval_samples_per_second": 18.723,
"eval_steps_per_second": 3.234,
"step": 6040
},
{
"epoch": 332.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17780448496341705,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8819,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 6059
},
{
"epoch": 332.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17747904360294342,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8701,
"eval_samples_per_second": 18.739,
"eval_steps_per_second": 3.237,
"step": 6077
},
{
"epoch": 333.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17767922580242157,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8742,
"eval_samples_per_second": 18.726,
"eval_steps_per_second": 3.234,
"step": 6095
},
{
"epoch": 334.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1771049201488495,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9091,
"eval_samples_per_second": 18.615,
"eval_steps_per_second": 3.215,
"step": 6113
},
{
"epoch": 336.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1770164966583252,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8812,
"eval_samples_per_second": 18.704,
"eval_steps_per_second": 3.231,
"step": 6132
},
{
"epoch": 336.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17682689428329468,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8773,
"eval_samples_per_second": 18.716,
"eval_steps_per_second": 3.233,
"step": 6150
},
{
"epoch": 337.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1767437607049942,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8745,
"eval_samples_per_second": 18.725,
"eval_steps_per_second": 3.234,
"step": 6168
},
{
"epoch": 338.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17660827934741974,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.876,
"eval_samples_per_second": 18.72,
"eval_steps_per_second": 3.233,
"step": 6186
},
{
"epoch": 340.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1762937307357788,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8747,
"eval_samples_per_second": 18.724,
"eval_steps_per_second": 3.234,
"step": 6205
},
{
"epoch": 340.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17532125115394592,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.877,
"eval_samples_per_second": 18.717,
"eval_steps_per_second": 3.233,
"step": 6223
},
{
"epoch": 341.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17553770542144775,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8818,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 6241
},
{
"epoch": 342.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17563851177692413,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8839,
"eval_samples_per_second": 18.695,
"eval_steps_per_second": 3.229,
"step": 6259
},
{
"epoch": 344.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17437517642974854,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8841,
"eval_samples_per_second": 18.694,
"eval_steps_per_second": 3.229,
"step": 6278
},
{
"epoch": 344.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17455054819583893,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8746,
"eval_samples_per_second": 18.725,
"eval_steps_per_second": 3.234,
"step": 6296
},
{
"epoch": 345.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17481422424316406,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8787,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 6314
},
{
"epoch": 346.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1744970828294754,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8781,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 6332
},
{
"epoch": 348.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17412132024765015,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8833,
"eval_samples_per_second": 18.697,
"eval_steps_per_second": 3.229,
"step": 6351
},
{
"epoch": 348.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1738380789756775,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9075,
"eval_samples_per_second": 18.621,
"eval_steps_per_second": 3.216,
"step": 6369
},
{
"epoch": 349.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17379747331142426,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.876,
"eval_samples_per_second": 18.72,
"eval_steps_per_second": 3.234,
"step": 6387
},
{
"epoch": 350.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17336434125900269,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8785,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 6405
},
{
"epoch": 352.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17307358980178833,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9197,
"eval_samples_per_second": 18.582,
"eval_steps_per_second": 3.21,
"step": 6424
},
{
"epoch": 352.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17290958762168884,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8928,
"eval_samples_per_second": 18.667,
"eval_steps_per_second": 3.224,
"step": 6442
},
{
"epoch": 353.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1726769059896469,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8785,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 6460
},
{
"epoch": 354.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17268246412277222,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8721,
"eval_samples_per_second": 18.733,
"eval_steps_per_second": 3.236,
"step": 6478
},
{
"epoch": 356.0,
"eval_gen_len": 0.0,
"eval_loss": 0.17257879674434662,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8803,
"eval_samples_per_second": 18.706,
"eval_steps_per_second": 3.231,
"step": 6497
},
{
"epoch": 356.16,
"grad_norm": 0.3692683279514313,
"learning_rate": 5.56888888888889e-06,
"loss": 0.2192,
"step": 6500
},
{
"epoch": 356.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17185170948505402,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8845,
"eval_samples_per_second": 18.693,
"eval_steps_per_second": 3.229,
"step": 6515
},
{
"epoch": 357.97,
"eval_gen_len": 0.0,
"eval_loss": 0.17126674950122833,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8816,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 6533
},
{
"epoch": 358.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17136740684509277,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8802,
"eval_samples_per_second": 18.707,
"eval_steps_per_second": 3.231,
"step": 6551
},
{
"epoch": 360.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1711340844631195,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8839,
"eval_samples_per_second": 18.695,
"eval_steps_per_second": 3.229,
"step": 6570
},
{
"epoch": 360.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1712319403886795,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8782,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 6588
},
{
"epoch": 361.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1710105836391449,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8829,
"eval_samples_per_second": 18.698,
"eval_steps_per_second": 3.23,
"step": 6606
},
{
"epoch": 362.96,
"eval_gen_len": 0.0,
"eval_loss": 0.17070402204990387,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9043,
"eval_samples_per_second": 18.631,
"eval_steps_per_second": 3.218,
"step": 6624
},
{
"epoch": 364.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1703125238418579,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9174,
"eval_samples_per_second": 18.589,
"eval_steps_per_second": 3.211,
"step": 6643
},
{
"epoch": 364.99,
"eval_gen_len": 0.0,
"eval_loss": 0.17009203135967255,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9165,
"eval_samples_per_second": 18.592,
"eval_steps_per_second": 3.211,
"step": 6661
},
{
"epoch": 365.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1701081246137619,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8801,
"eval_samples_per_second": 18.707,
"eval_steps_per_second": 3.231,
"step": 6679
},
{
"epoch": 366.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16997285187244415,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8816,
"eval_samples_per_second": 18.703,
"eval_steps_per_second": 3.23,
"step": 6697
},
{
"epoch": 368.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16970933973789215,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8906,
"eval_samples_per_second": 18.674,
"eval_steps_per_second": 3.225,
"step": 6716
},
{
"epoch": 368.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1696108877658844,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8896,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 6734
},
{
"epoch": 369.97,
"eval_gen_len": 0.0,
"eval_loss": 0.1694546341896057,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9145,
"eval_samples_per_second": 18.598,
"eval_steps_per_second": 3.212,
"step": 6752
},
{
"epoch": 370.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16927814483642578,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9179,
"eval_samples_per_second": 18.588,
"eval_steps_per_second": 3.211,
"step": 6770
},
{
"epoch": 372.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16911160945892334,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8804,
"eval_samples_per_second": 18.706,
"eval_steps_per_second": 3.231,
"step": 6789
},
{
"epoch": 372.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16869549453258514,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8847,
"eval_samples_per_second": 18.693,
"eval_steps_per_second": 3.229,
"step": 6807
},
{
"epoch": 373.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16840766370296478,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9184,
"eval_samples_per_second": 18.586,
"eval_steps_per_second": 3.21,
"step": 6825
},
{
"epoch": 374.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16836071014404297,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9124,
"eval_samples_per_second": 18.605,
"eval_steps_per_second": 3.214,
"step": 6843
},
{
"epoch": 376.0,
"eval_gen_len": 0.0,
"eval_loss": 0.168260395526886,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8823,
"eval_samples_per_second": 18.7,
"eval_steps_per_second": 3.23,
"step": 6862
},
{
"epoch": 376.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16808076202869415,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8791,
"eval_samples_per_second": 18.71,
"eval_steps_per_second": 3.232,
"step": 6880
},
{
"epoch": 377.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16791433095932007,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8871,
"eval_samples_per_second": 18.685,
"eval_steps_per_second": 3.227,
"step": 6898
},
{
"epoch": 378.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16784635186195374,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8948,
"eval_samples_per_second": 18.661,
"eval_steps_per_second": 3.223,
"step": 6916
},
{
"epoch": 380.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16753236949443817,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8837,
"eval_samples_per_second": 18.696,
"eval_steps_per_second": 3.229,
"step": 6935
},
{
"epoch": 380.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16738325357437134,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8709,
"eval_samples_per_second": 18.736,
"eval_steps_per_second": 3.236,
"step": 6953
},
{
"epoch": 381.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16733896732330322,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8784,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 6971
},
{
"epoch": 382.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16720926761627197,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8818,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 6989
},
{
"epoch": 383.56,
"grad_norm": 0.32142043113708496,
"learning_rate": 4.457777777777778e-06,
"loss": 0.211,
"step": 7000
},
{
"epoch": 384.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16671238839626312,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8769,
"eval_samples_per_second": 18.717,
"eval_steps_per_second": 3.233,
"step": 7008
},
{
"epoch": 384.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16662339866161346,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8678,
"eval_samples_per_second": 18.746,
"eval_steps_per_second": 3.238,
"step": 7026
},
{
"epoch": 385.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16629952192306519,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8786,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 7044
},
{
"epoch": 386.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16644792258739471,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8767,
"eval_samples_per_second": 18.718,
"eval_steps_per_second": 3.233,
"step": 7062
},
{
"epoch": 388.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1665712594985962,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8895,
"eval_samples_per_second": 18.677,
"eval_steps_per_second": 3.226,
"step": 7081
},
{
"epoch": 388.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16607053577899933,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8751,
"eval_samples_per_second": 18.723,
"eval_steps_per_second": 3.234,
"step": 7099
},
{
"epoch": 389.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16597412526607513,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8717,
"eval_samples_per_second": 18.734,
"eval_steps_per_second": 3.236,
"step": 7117
},
{
"epoch": 390.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16590653359889984,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8807,
"eval_samples_per_second": 18.705,
"eval_steps_per_second": 3.231,
"step": 7135
},
{
"epoch": 392.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16565540432929993,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8901,
"eval_samples_per_second": 18.675,
"eval_steps_per_second": 3.226,
"step": 7154
},
{
"epoch": 392.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1656540036201477,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8795,
"eval_samples_per_second": 18.709,
"eval_steps_per_second": 3.232,
"step": 7172
},
{
"epoch": 393.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16561686992645264,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.891,
"eval_samples_per_second": 18.672,
"eval_steps_per_second": 3.225,
"step": 7190
},
{
"epoch": 394.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16566209495067596,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8782,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 7208
},
{
"epoch": 396.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16528266668319702,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9003,
"eval_samples_per_second": 18.643,
"eval_steps_per_second": 3.22,
"step": 7227
},
{
"epoch": 396.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16509102284908295,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8787,
"eval_samples_per_second": 18.712,
"eval_steps_per_second": 3.232,
"step": 7245
},
{
"epoch": 397.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16473665833473206,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8817,
"eval_samples_per_second": 18.702,
"eval_steps_per_second": 3.23,
"step": 7263
},
{
"epoch": 398.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16476485133171082,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8783,
"eval_samples_per_second": 18.713,
"eval_steps_per_second": 3.232,
"step": 7281
},
{
"epoch": 400.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1648998111486435,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8847,
"eval_samples_per_second": 18.692,
"eval_steps_per_second": 3.229,
"step": 7300
},
{
"epoch": 400.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16450464725494385,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8833,
"eval_samples_per_second": 18.697,
"eval_steps_per_second": 3.23,
"step": 7318
},
{
"epoch": 401.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16408170759677887,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9273,
"eval_samples_per_second": 18.558,
"eval_steps_per_second": 3.206,
"step": 7336
},
{
"epoch": 402.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16408471763134003,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9148,
"eval_samples_per_second": 18.597,
"eval_steps_per_second": 3.212,
"step": 7354
},
{
"epoch": 404.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1640195995569229,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.924,
"eval_samples_per_second": 18.569,
"eval_steps_per_second": 3.207,
"step": 7373
},
{
"epoch": 404.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16400323808193207,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9183,
"eval_samples_per_second": 18.586,
"eval_steps_per_second": 3.21,
"step": 7391
},
{
"epoch": 405.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16385148465633392,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9116,
"eval_samples_per_second": 18.608,
"eval_steps_per_second": 3.214,
"step": 7409
},
{
"epoch": 406.96,
"eval_gen_len": 0.0,
"eval_loss": 0.163739874958992,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9025,
"eval_samples_per_second": 18.636,
"eval_steps_per_second": 3.219,
"step": 7427
},
{
"epoch": 408.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1632901281118393,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8717,
"eval_samples_per_second": 18.734,
"eval_steps_per_second": 3.236,
"step": 7446
},
{
"epoch": 408.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1632470041513443,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8869,
"eval_samples_per_second": 18.686,
"eval_steps_per_second": 3.228,
"step": 7464
},
{
"epoch": 409.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16338156163692474,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.903,
"eval_samples_per_second": 18.635,
"eval_steps_per_second": 3.219,
"step": 7482
},
{
"epoch": 410.96,
"grad_norm": 0.4017387628555298,
"learning_rate": 3.346666666666667e-06,
"loss": 0.2061,
"step": 7500
},
{
"epoch": 410.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16326506435871124,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8869,
"eval_samples_per_second": 18.686,
"eval_steps_per_second": 3.228,
"step": 7500
},
{
"epoch": 412.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1629171222448349,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8877,
"eval_samples_per_second": 18.683,
"eval_steps_per_second": 3.227,
"step": 7519
},
{
"epoch": 412.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1628817468881607,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9117,
"eval_samples_per_second": 18.607,
"eval_steps_per_second": 3.214,
"step": 7537
},
{
"epoch": 413.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16285060346126556,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9244,
"eval_samples_per_second": 18.567,
"eval_steps_per_second": 3.207,
"step": 7555
},
{
"epoch": 414.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16274811327457428,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8821,
"eval_samples_per_second": 18.701,
"eval_steps_per_second": 3.23,
"step": 7573
},
{
"epoch": 416.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16286291182041168,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.918,
"eval_samples_per_second": 18.587,
"eval_steps_per_second": 3.211,
"step": 7592
},
{
"epoch": 416.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16281896829605103,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8847,
"eval_samples_per_second": 18.693,
"eval_steps_per_second": 3.229,
"step": 7610
},
{
"epoch": 417.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16268208622932434,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9077,
"eval_samples_per_second": 18.62,
"eval_steps_per_second": 3.216,
"step": 7628
},
{
"epoch": 418.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16232354938983917,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9153,
"eval_samples_per_second": 18.596,
"eval_steps_per_second": 3.212,
"step": 7646
},
{
"epoch": 420.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16238705813884735,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9249,
"eval_samples_per_second": 18.566,
"eval_steps_per_second": 3.207,
"step": 7665
},
{
"epoch": 420.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16214394569396973,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8853,
"eval_samples_per_second": 18.691,
"eval_steps_per_second": 3.228,
"step": 7683
},
{
"epoch": 421.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16213367879390717,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.869,
"eval_samples_per_second": 18.743,
"eval_steps_per_second": 3.237,
"step": 7701
},
{
"epoch": 422.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1621711403131485,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.869,
"eval_samples_per_second": 18.743,
"eval_steps_per_second": 3.237,
"step": 7719
},
{
"epoch": 424.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16199961304664612,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8763,
"eval_samples_per_second": 18.719,
"eval_steps_per_second": 3.233,
"step": 7738
},
{
"epoch": 424.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1616300344467163,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9176,
"eval_samples_per_second": 18.589,
"eval_steps_per_second": 3.211,
"step": 7756
},
{
"epoch": 425.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16134707629680634,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9055,
"eval_samples_per_second": 18.627,
"eval_steps_per_second": 3.217,
"step": 7774
},
{
"epoch": 426.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1613784283399582,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8741,
"eval_samples_per_second": 18.726,
"eval_steps_per_second": 3.235,
"step": 7792
},
{
"epoch": 428.0,
"eval_gen_len": 0.0,
"eval_loss": 0.1613391935825348,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.9229,
"eval_samples_per_second": 18.572,
"eval_steps_per_second": 3.208,
"step": 7811
},
{
"epoch": 428.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1613980233669281,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8737,
"eval_samples_per_second": 18.728,
"eval_steps_per_second": 3.235,
"step": 7829
},
{
"epoch": 429.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16145525872707367,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8935,
"eval_samples_per_second": 18.664,
"eval_steps_per_second": 3.224,
"step": 7847
},
{
"epoch": 430.96,
"eval_gen_len": 0.0,
"eval_loss": 0.16126497089862823,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8806,
"eval_samples_per_second": 18.705,
"eval_steps_per_second": 3.231,
"step": 7865
},
{
"epoch": 432.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16113270819187164,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8882,
"eval_samples_per_second": 18.681,
"eval_steps_per_second": 3.227,
"step": 7884
},
{
"epoch": 432.99,
"eval_gen_len": 0.0,
"eval_loss": 0.16119304299354553,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8803,
"eval_samples_per_second": 18.707,
"eval_steps_per_second": 3.231,
"step": 7902
},
{
"epoch": 433.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16113385558128357,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8923,
"eval_samples_per_second": 18.668,
"eval_steps_per_second": 3.225,
"step": 7920
},
{
"epoch": 434.96,
"eval_gen_len": 0.0,
"eval_loss": 0.1608574539422989,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8855,
"eval_samples_per_second": 18.69,
"eval_steps_per_second": 3.228,
"step": 7938
},
{
"epoch": 436.0,
"eval_gen_len": 0.0,
"eval_loss": 0.16086578369140625,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8752,
"eval_samples_per_second": 18.723,
"eval_steps_per_second": 3.234,
"step": 7957
},
{
"epoch": 436.99,
"eval_gen_len": 0.0,
"eval_loss": 0.1609336882829666,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8764,
"eval_samples_per_second": 18.719,
"eval_steps_per_second": 3.233,
"step": 7975
},
{
"epoch": 437.97,
"eval_gen_len": 0.0,
"eval_loss": 0.16086000204086304,
"eval_rouge1": 0.0,
"eval_rouge2": 0.0,
"eval_rougeL": 0.0,
"eval_rougeLsum": 0.0,
"eval_runtime": 5.8862,
"eval_samples_per_second": 18.688,
"eval_steps_per_second": 3.228,
"step": 7993
},
{
"epoch": 438.36,
"grad_norm": 0.3427538573741913,
"learning_rate": 2.235555555555556e-06,
"loss": 0.2001,
"step": 8000
}
],
"logging_steps": 500,
"max_steps": 9000,
"num_input_tokens_seen": 0,
"num_train_epochs": 500,
"save_steps": 500,
"total_flos": 2.332923933889659e+17,
"train_batch_size": 6,
"trial_name": null,
"trial_params": null
}