{ "best_metric": null, "best_model_checkpoint": null, "epoch": 438.35616438356163, "eval_steps": 500, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.99, "eval_gen_len": 14.6545, "eval_loss": 21.487245559692383, "eval_rouge1": 0.0812, "eval_rouge2": 0.0183, "eval_rougeL": 0.0668, "eval_rougeLsum": 0.0663, "eval_runtime": 5.8972, "eval_samples_per_second": 18.653, "eval_steps_per_second": 3.222, "step": 18 }, { "epoch": 1.97, "eval_gen_len": 14.5091, "eval_loss": 21.332395553588867, "eval_rouge1": 0.0808, "eval_rouge2": 0.0191, "eval_rougeL": 0.0657, "eval_rougeLsum": 0.0652, "eval_runtime": 5.8919, "eval_samples_per_second": 18.67, "eval_steps_per_second": 3.225, "step": 36 }, { "epoch": 2.96, "eval_gen_len": 14.0727, "eval_loss": 21.108753204345703, "eval_rouge1": 0.0819, "eval_rouge2": 0.0203, "eval_rougeL": 0.0671, "eval_rougeLsum": 0.0665, "eval_runtime": 5.8908, "eval_samples_per_second": 18.673, "eval_steps_per_second": 3.225, "step": 54 }, { "epoch": 4.0, "eval_gen_len": 14.0727, "eval_loss": 20.799583435058594, "eval_rouge1": 0.0824, "eval_rouge2": 0.0206, "eval_rougeL": 0.0665, "eval_rougeLsum": 0.0659, "eval_runtime": 5.8851, "eval_samples_per_second": 18.691, "eval_steps_per_second": 3.228, "step": 73 }, { "epoch": 4.99, "eval_gen_len": 14.2182, "eval_loss": 20.4609317779541, "eval_rouge1": 0.0814, "eval_rouge2": 0.0212, "eval_rougeL": 0.0669, "eval_rougeLsum": 0.0664, "eval_runtime": 5.8894, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 91 }, { "epoch": 5.97, "eval_gen_len": 14.2182, "eval_loss": 20.145660400390625, "eval_rouge1": 0.0847, "eval_rouge2": 0.0208, "eval_rougeL": 0.067, "eval_rougeLsum": 0.0666, "eval_runtime": 5.8963, "eval_samples_per_second": 18.656, "eval_steps_per_second": 3.222, "step": 109 }, { "epoch": 6.96, "eval_gen_len": 14.0727, "eval_loss": 19.89990234375, "eval_rouge1": 0.084, "eval_rouge2": 0.0184, "eval_rougeL": 0.0662, "eval_rougeLsum": 0.0657, "eval_runtime": 5.8939, "eval_samples_per_second": 18.663, "eval_steps_per_second": 3.224, "step": 127 }, { "epoch": 8.0, "eval_gen_len": 14.1455, "eval_loss": 19.661205291748047, "eval_rouge1": 0.0882, "eval_rouge2": 0.0219, "eval_rougeL": 0.0703, "eval_rougeLsum": 0.0699, "eval_runtime": 5.9009, "eval_samples_per_second": 18.641, "eval_steps_per_second": 3.22, "step": 146 }, { "epoch": 8.99, "eval_gen_len": 14.9273, "eval_loss": 19.420059204101562, "eval_rouge1": 0.0958, "eval_rouge2": 0.0243, "eval_rougeL": 0.0775, "eval_rougeLsum": 0.0772, "eval_runtime": 5.8976, "eval_samples_per_second": 18.652, "eval_steps_per_second": 3.222, "step": 164 }, { "epoch": 9.97, "eval_gen_len": 14.9273, "eval_loss": 19.1621036529541, "eval_rouge1": 0.0915, "eval_rouge2": 0.0256, "eval_rougeL": 0.0769, "eval_rougeLsum": 0.0763, "eval_runtime": 5.937, "eval_samples_per_second": 18.528, "eval_steps_per_second": 3.2, "step": 182 }, { "epoch": 10.96, "eval_gen_len": 14.7818, "eval_loss": 18.873458862304688, "eval_rouge1": 0.0968, "eval_rouge2": 0.0284, "eval_rougeL": 0.0786, "eval_rougeLsum": 0.0786, "eval_runtime": 5.8869, "eval_samples_per_second": 18.686, "eval_steps_per_second": 3.228, "step": 200 }, { "epoch": 12.0, "eval_gen_len": 14.6364, "eval_loss": 18.52387237548828, "eval_rouge1": 0.0901, "eval_rouge2": 0.0262, "eval_rougeL": 0.0738, "eval_rougeLsum": 0.0741, "eval_runtime": 5.9384, "eval_samples_per_second": 18.524, "eval_steps_per_second": 3.2, "step": 219 }, { "epoch": 12.99, "eval_gen_len": 14.4909, "eval_loss": 18.13555145263672, "eval_rouge1": 0.0886, "eval_rouge2": 0.0263, "eval_rougeL": 0.0714, "eval_rougeLsum": 0.0717, "eval_runtime": 5.9432, "eval_samples_per_second": 18.509, "eval_steps_per_second": 3.197, "step": 237 }, { "epoch": 13.97, "eval_gen_len": 14.4909, "eval_loss": 17.66015625, "eval_rouge1": 0.0915, "eval_rouge2": 0.03, "eval_rougeL": 0.0712, "eval_rougeLsum": 0.0713, "eval_runtime": 5.9005, "eval_samples_per_second": 18.643, "eval_steps_per_second": 3.22, "step": 255 }, { "epoch": 14.96, "eval_gen_len": 15.2182, "eval_loss": 17.107126235961914, "eval_rouge1": 0.0933, "eval_rouge2": 0.0295, "eval_rougeL": 0.0733, "eval_rougeLsum": 0.0734, "eval_runtime": 5.9126, "eval_samples_per_second": 18.604, "eval_steps_per_second": 3.213, "step": 273 }, { "epoch": 16.0, "eval_gen_len": 15.2182, "eval_loss": 16.4465389251709, "eval_rouge1": 0.0792, "eval_rouge2": 0.02, "eval_rougeL": 0.0637, "eval_rougeLsum": 0.0639, "eval_runtime": 5.893, "eval_samples_per_second": 18.666, "eval_steps_per_second": 3.224, "step": 292 }, { "epoch": 16.99, "eval_gen_len": 14.7818, "eval_loss": 15.7671480178833, "eval_rouge1": 0.069, "eval_rouge2": 0.0176, "eval_rougeL": 0.0534, "eval_rougeLsum": 0.0537, "eval_runtime": 5.8934, "eval_samples_per_second": 18.665, "eval_steps_per_second": 3.224, "step": 310 }, { "epoch": 17.97, "eval_gen_len": 13.1545, "eval_loss": 14.998970031738281, "eval_rouge1": 0.0566, "eval_rouge2": 0.0124, "eval_rougeL": 0.0449, "eval_rougeLsum": 0.0451, "eval_runtime": 5.8867, "eval_samples_per_second": 18.686, "eval_steps_per_second": 3.228, "step": 328 }, { "epoch": 18.96, "eval_gen_len": 11.1091, "eval_loss": 14.060928344726562, "eval_rouge1": 0.0498, "eval_rouge2": 0.0171, "eval_rougeL": 0.0368, "eval_rougeLsum": 0.0367, "eval_runtime": 5.6651, "eval_samples_per_second": 19.417, "eval_steps_per_second": 3.354, "step": 346 }, { "epoch": 20.0, "eval_gen_len": 8.7273, "eval_loss": 13.174235343933105, "eval_rouge1": 0.0205, "eval_rouge2": 0.0044, "eval_rougeL": 0.0165, "eval_rougeLsum": 0.0164, "eval_runtime": 5.7609, "eval_samples_per_second": 19.094, "eval_steps_per_second": 3.298, "step": 365 }, { "epoch": 20.99, "eval_gen_len": 6.8636, "eval_loss": 12.371671676635742, "eval_rouge1": 0.0139, "eval_rouge2": 0.0028, "eval_rougeL": 0.0124, "eval_rougeLsum": 0.0123, "eval_runtime": 5.2236, "eval_samples_per_second": 21.058, "eval_steps_per_second": 3.637, "step": 383 }, { "epoch": 21.97, "eval_gen_len": 6.8727, "eval_loss": 11.577987670898438, "eval_rouge1": 0.0053, "eval_rouge2": 0.0005, "eval_rougeL": 0.0047, "eval_rougeLsum": 0.0048, "eval_runtime": 5.7472, "eval_samples_per_second": 19.14, "eval_steps_per_second": 3.306, "step": 401 }, { "epoch": 22.96, "eval_gen_len": 7.3818, "eval_loss": 10.78397274017334, "eval_rouge1": 0.0021, "eval_rouge2": 0.0, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.7621, "eval_samples_per_second": 19.09, "eval_steps_per_second": 3.297, "step": 419 }, { "epoch": 24.0, "eval_gen_len": 6.8273, "eval_loss": 9.95447826385498, "eval_rouge1": 0.0009, "eval_rouge2": 0.0, "eval_rougeL": 0.0008, "eval_rougeLsum": 0.0008, "eval_runtime": 5.4394, "eval_samples_per_second": 20.223, "eval_steps_per_second": 3.493, "step": 438 }, { "epoch": 24.99, "eval_gen_len": 7.0909, "eval_loss": 9.179459571838379, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.4343, "eval_samples_per_second": 20.242, "eval_steps_per_second": 3.496, "step": 456 }, { "epoch": 25.97, "eval_gen_len": 8.1, "eval_loss": 8.421984672546387, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 5.7592, "eval_samples_per_second": 19.1, "eval_steps_per_second": 3.299, "step": 474 }, { "epoch": 26.96, "eval_gen_len": 8.2636, "eval_loss": 7.694218158721924, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8511, "eval_samples_per_second": 18.8, "eval_steps_per_second": 3.247, "step": 492 }, { "epoch": 27.4, "grad_norm": 5.483399868011475, "learning_rate": 1.8893333333333334e-05, "loss": 16.3522, "step": 500 }, { "epoch": 28.0, "eval_gen_len": 11.3818, "eval_loss": 6.939992427825928, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.887, "eval_samples_per_second": 18.685, "eval_steps_per_second": 3.227, "step": 511 }, { "epoch": 28.99, "eval_gen_len": 12.6273, "eval_loss": 6.2829270362854, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.905, "eval_samples_per_second": 18.628, "eval_steps_per_second": 3.218, "step": 529 }, { "epoch": 29.97, "eval_gen_len": 15.3091, "eval_loss": 5.604813575744629, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 5.8732, "eval_samples_per_second": 18.729, "eval_steps_per_second": 3.235, "step": 547 }, { "epoch": 30.96, "eval_gen_len": 17.3182, "eval_loss": 4.977880954742432, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9021, "eval_samples_per_second": 18.637, "eval_steps_per_second": 3.219, "step": 565 }, { "epoch": 32.0, "eval_gen_len": 18.3273, "eval_loss": 4.37266731262207, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 5.9191, "eval_samples_per_second": 18.584, "eval_steps_per_second": 3.21, "step": 584 }, { "epoch": 32.99, "eval_gen_len": 18.7091, "eval_loss": 3.8430399894714355, "eval_rouge1": 0.0002, "eval_rouge2": 0.0, "eval_rougeL": 0.0002, "eval_rougeLsum": 0.0002, "eval_runtime": 5.8717, "eval_samples_per_second": 18.734, "eval_steps_per_second": 3.236, "step": 602 }, { "epoch": 33.97, "eval_gen_len": 18.8545, "eval_loss": 3.400059938430786, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 5.9022, "eval_samples_per_second": 18.637, "eval_steps_per_second": 3.219, "step": 620 }, { "epoch": 34.96, "eval_gen_len": 19.0, "eval_loss": 3.0639424324035645, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 5.8543, "eval_samples_per_second": 18.79, "eval_steps_per_second": 3.245, "step": 638 }, { "epoch": 36.0, "eval_gen_len": 19.0, "eval_loss": 2.758321762084961, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 5.9112, "eval_samples_per_second": 18.609, "eval_steps_per_second": 3.214, "step": 657 }, { "epoch": 36.99, "eval_gen_len": 18.4364, "eval_loss": 2.5352485179901123, "eval_rouge1": 0.0003, "eval_rouge2": 0.0, "eval_rougeL": 0.0003, "eval_rougeLsum": 0.0003, "eval_runtime": 5.8968, "eval_samples_per_second": 18.654, "eval_steps_per_second": 3.222, "step": 675 }, { "epoch": 37.97, "eval_gen_len": 14.8909, "eval_loss": 2.3651320934295654, "eval_rouge1": 0.0005, "eval_rouge2": 0.0, "eval_rougeL": 0.0005, "eval_rougeLsum": 0.0005, "eval_runtime": 5.8625, "eval_samples_per_second": 18.763, "eval_steps_per_second": 3.241, "step": 693 }, { "epoch": 38.96, "eval_gen_len": 10.8273, "eval_loss": 2.230059862136841, "eval_rouge1": 0.0014, "eval_rouge2": 0.0, "eval_rougeL": 0.0014, "eval_rougeLsum": 0.0014, "eval_runtime": 5.852, "eval_samples_per_second": 18.797, "eval_steps_per_second": 3.247, "step": 711 }, { "epoch": 40.0, "eval_gen_len": 7.9545, "eval_loss": 2.111604928970337, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8625, "eval_samples_per_second": 18.763, "eval_steps_per_second": 3.241, "step": 730 }, { "epoch": 40.99, "eval_gen_len": 6.8364, "eval_loss": 2.019117832183838, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8494, "eval_samples_per_second": 18.805, "eval_steps_per_second": 3.248, "step": 748 }, { "epoch": 41.97, "eval_gen_len": 6.1727, "eval_loss": 1.950454831123352, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8618, "eval_samples_per_second": 18.765, "eval_steps_per_second": 3.241, "step": 766 }, { "epoch": 42.96, "eval_gen_len": 5.6, "eval_loss": 1.9009199142456055, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8848, "eval_samples_per_second": 18.692, "eval_steps_per_second": 3.229, "step": 784 }, { "epoch": 44.0, "eval_gen_len": 5.4455, "eval_loss": 1.8568826913833618, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8517, "eval_samples_per_second": 18.798, "eval_steps_per_second": 3.247, "step": 803 }, { "epoch": 44.99, "eval_gen_len": 5.1909, "eval_loss": 1.8185267448425293, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8682, "eval_samples_per_second": 18.745, "eval_steps_per_second": 3.238, "step": 821 }, { "epoch": 45.97, "eval_gen_len": 5.1182, "eval_loss": 1.7847113609313965, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8488, "eval_samples_per_second": 18.807, "eval_steps_per_second": 3.249, "step": 839 }, { "epoch": 46.96, "eval_gen_len": 4.9455, "eval_loss": 1.7547551393508911, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8518, "eval_samples_per_second": 18.798, "eval_steps_per_second": 3.247, "step": 857 }, { "epoch": 48.0, "eval_gen_len": 4.9455, "eval_loss": 1.7266695499420166, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8585, "eval_samples_per_second": 18.776, "eval_steps_per_second": 3.243, "step": 876 }, { "epoch": 48.99, "eval_gen_len": 4.7455, "eval_loss": 1.7055079936981201, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8511, "eval_samples_per_second": 18.8, "eval_steps_per_second": 3.247, "step": 894 }, { "epoch": 49.97, "eval_gen_len": 4.4727, "eval_loss": 1.6864752769470215, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.848, "eval_samples_per_second": 18.81, "eval_steps_per_second": 3.249, "step": 912 }, { "epoch": 50.96, "eval_gen_len": 4.4, "eval_loss": 1.6679636240005493, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8942, "eval_samples_per_second": 18.662, "eval_steps_per_second": 3.223, "step": 930 }, { "epoch": 52.0, "eval_gen_len": 4.8273, "eval_loss": 1.6500035524368286, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8572, "eval_samples_per_second": 18.78, "eval_steps_per_second": 3.244, "step": 949 }, { "epoch": 52.99, "eval_gen_len": 5.4273, "eval_loss": 1.6347858905792236, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8874, "eval_samples_per_second": 18.684, "eval_steps_per_second": 3.227, "step": 967 }, { "epoch": 53.97, "eval_gen_len": 5.4727, "eval_loss": 1.620485782623291, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8561, "eval_samples_per_second": 18.784, "eval_steps_per_second": 3.244, "step": 985 }, { "epoch": 54.79, "grad_norm": 3.8756470680236816, "learning_rate": 1.7786666666666667e-05, "loss": 3.4786, "step": 1000 }, { "epoch": 54.96, "eval_gen_len": 4.9909, "eval_loss": 1.6101189851760864, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9013, "eval_samples_per_second": 18.64, "eval_steps_per_second": 3.22, "step": 1003 }, { "epoch": 56.0, "eval_gen_len": 4.9727, "eval_loss": 1.5964934825897217, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8515, "eval_samples_per_second": 18.799, "eval_steps_per_second": 3.247, "step": 1022 }, { "epoch": 56.99, "eval_gen_len": 5.3, "eval_loss": 1.583103060722351, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8488, "eval_samples_per_second": 18.807, "eval_steps_per_second": 3.249, "step": 1040 }, { "epoch": 57.97, "eval_gen_len": 5.2273, "eval_loss": 1.5738121271133423, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8912, "eval_samples_per_second": 18.672, "eval_steps_per_second": 3.225, "step": 1058 }, { "epoch": 58.96, "eval_gen_len": 5.5273, "eval_loss": 1.5626448392868042, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8527, "eval_samples_per_second": 18.795, "eval_steps_per_second": 3.246, "step": 1076 }, { "epoch": 60.0, "eval_gen_len": 6.3273, "eval_loss": 1.5520726442337036, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8887, "eval_samples_per_second": 18.68, "eval_steps_per_second": 3.227, "step": 1095 }, { "epoch": 60.99, "eval_gen_len": 7.1091, "eval_loss": 1.5398296117782593, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8838, "eval_samples_per_second": 18.695, "eval_steps_per_second": 3.229, "step": 1113 }, { "epoch": 61.97, "eval_gen_len": 7.9182, "eval_loss": 1.5261036157608032, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8837, "eval_samples_per_second": 18.696, "eval_steps_per_second": 3.229, "step": 1131 }, { "epoch": 62.96, "eval_gen_len": 8.6, "eval_loss": 1.5135173797607422, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8878, "eval_samples_per_second": 18.683, "eval_steps_per_second": 3.227, "step": 1149 }, { "epoch": 64.0, "eval_gen_len": 8.9727, "eval_loss": 1.5019876956939697, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8993, "eval_samples_per_second": 18.646, "eval_steps_per_second": 3.221, "step": 1168 }, { "epoch": 64.99, "eval_gen_len": 9.1455, "eval_loss": 1.4927572011947632, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8898, "eval_samples_per_second": 18.676, "eval_steps_per_second": 3.226, "step": 1186 }, { "epoch": 65.97, "eval_gen_len": 9.3636, "eval_loss": 1.4839699268341064, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8485, "eval_samples_per_second": 18.808, "eval_steps_per_second": 3.249, "step": 1204 }, { "epoch": 66.96, "eval_gen_len": 9.6727, "eval_loss": 1.4724147319793701, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8498, "eval_samples_per_second": 18.804, "eval_steps_per_second": 3.248, "step": 1222 }, { "epoch": 68.0, "eval_gen_len": 9.6545, "eval_loss": 1.4610724449157715, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.857, "eval_samples_per_second": 18.781, "eval_steps_per_second": 3.244, "step": 1241 }, { "epoch": 68.99, "eval_gen_len": 9.7182, "eval_loss": 1.4491708278656006, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8545, "eval_samples_per_second": 18.789, "eval_steps_per_second": 3.245, "step": 1259 }, { "epoch": 69.97, "eval_gen_len": 9.6727, "eval_loss": 1.4401447772979736, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8617, "eval_samples_per_second": 18.766, "eval_steps_per_second": 3.241, "step": 1277 }, { "epoch": 70.96, "eval_gen_len": 9.6818, "eval_loss": 1.4306913614273071, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.861, "eval_samples_per_second": 18.768, "eval_steps_per_second": 3.242, "step": 1295 }, { "epoch": 72.0, "eval_gen_len": 9.7636, "eval_loss": 1.4177192449569702, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8583, "eval_samples_per_second": 18.777, "eval_steps_per_second": 3.243, "step": 1314 }, { "epoch": 72.99, "eval_gen_len": 9.8182, "eval_loss": 1.4081608057022095, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8574, "eval_samples_per_second": 18.78, "eval_steps_per_second": 3.244, "step": 1332 }, { "epoch": 73.97, "eval_gen_len": 9.8, "eval_loss": 1.3982936143875122, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8596, "eval_samples_per_second": 18.773, "eval_steps_per_second": 3.243, "step": 1350 }, { "epoch": 74.96, "eval_gen_len": 9.7545, "eval_loss": 1.385299563407898, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8543, "eval_samples_per_second": 18.79, "eval_steps_per_second": 3.245, "step": 1368 }, { "epoch": 76.0, "eval_gen_len": 9.8727, "eval_loss": 1.3723993301391602, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9071, "eval_samples_per_second": 18.622, "eval_steps_per_second": 3.216, "step": 1387 }, { "epoch": 76.99, "eval_gen_len": 9.8636, "eval_loss": 1.3635698556900024, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8955, "eval_samples_per_second": 18.658, "eval_steps_per_second": 3.223, "step": 1405 }, { "epoch": 77.97, "eval_gen_len": 9.7727, "eval_loss": 1.3577702045440674, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8571, "eval_samples_per_second": 18.781, "eval_steps_per_second": 3.244, "step": 1423 }, { "epoch": 78.96, "eval_gen_len": 9.8455, "eval_loss": 1.350039005279541, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8893, "eval_samples_per_second": 18.678, "eval_steps_per_second": 3.226, "step": 1441 }, { "epoch": 80.0, "eval_gen_len": 9.8, "eval_loss": 1.3370468616485596, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9191, "eval_samples_per_second": 18.584, "eval_steps_per_second": 3.21, "step": 1460 }, { "epoch": 80.99, "eval_gen_len": 9.7909, "eval_loss": 1.3282612562179565, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8938, "eval_samples_per_second": 18.664, "eval_steps_per_second": 3.224, "step": 1478 }, { "epoch": 81.97, "eval_gen_len": 9.7273, "eval_loss": 1.3168359994888306, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8597, "eval_samples_per_second": 18.772, "eval_steps_per_second": 3.242, "step": 1496 }, { "epoch": 82.19, "grad_norm": 2.8337929248809814, "learning_rate": 1.667777777777778e-05, "loss": 1.7958, "step": 1500 }, { "epoch": 82.96, "eval_gen_len": 9.8727, "eval_loss": 1.3036466836929321, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8558, "eval_samples_per_second": 18.785, "eval_steps_per_second": 3.245, "step": 1514 }, { "epoch": 84.0, "eval_gen_len": 9.9455, "eval_loss": 1.2935236692428589, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8683, "eval_samples_per_second": 18.745, "eval_steps_per_second": 3.238, "step": 1533 }, { "epoch": 84.99, "eval_gen_len": 9.9182, "eval_loss": 1.2810677289962769, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8603, "eval_samples_per_second": 18.77, "eval_steps_per_second": 3.242, "step": 1551 }, { "epoch": 85.97, "eval_gen_len": 9.9364, "eval_loss": 1.2679041624069214, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8561, "eval_samples_per_second": 18.784, "eval_steps_per_second": 3.244, "step": 1569 }, { "epoch": 86.96, "eval_gen_len": 9.9091, "eval_loss": 1.259030818939209, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.855, "eval_samples_per_second": 18.787, "eval_steps_per_second": 3.245, "step": 1587 }, { "epoch": 88.0, "eval_gen_len": 9.9636, "eval_loss": 1.2463934421539307, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8568, "eval_samples_per_second": 18.782, "eval_steps_per_second": 3.244, "step": 1606 }, { "epoch": 88.99, "eval_gen_len": 9.9091, "eval_loss": 1.2380764484405518, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8541, "eval_samples_per_second": 18.79, "eval_steps_per_second": 3.246, "step": 1624 }, { "epoch": 89.97, "eval_gen_len": 9.9091, "eval_loss": 1.2272734642028809, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8679, "eval_samples_per_second": 18.746, "eval_steps_per_second": 3.238, "step": 1642 }, { "epoch": 90.96, "eval_gen_len": 9.9091, "eval_loss": 1.2162261009216309, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8611, "eval_samples_per_second": 18.768, "eval_steps_per_second": 3.242, "step": 1660 }, { "epoch": 92.0, "eval_gen_len": 9.9273, "eval_loss": 1.2035719156265259, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8669, "eval_samples_per_second": 18.749, "eval_steps_per_second": 3.239, "step": 1679 }, { "epoch": 92.99, "eval_gen_len": 9.9727, "eval_loss": 1.1865819692611694, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8764, "eval_samples_per_second": 18.719, "eval_steps_per_second": 3.233, "step": 1697 }, { "epoch": 93.97, "eval_gen_len": 9.9273, "eval_loss": 1.1713649034500122, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8739, "eval_samples_per_second": 18.727, "eval_steps_per_second": 3.235, "step": 1715 }, { "epoch": 94.96, "eval_gen_len": 9.8818, "eval_loss": 1.1566345691680908, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8775, "eval_samples_per_second": 18.715, "eval_steps_per_second": 3.233, "step": 1733 }, { "epoch": 96.0, "eval_gen_len": 9.6818, "eval_loss": 1.141555905342102, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8697, "eval_samples_per_second": 18.74, "eval_steps_per_second": 3.237, "step": 1752 }, { "epoch": 96.99, "eval_gen_len": 9.5, "eval_loss": 1.1269311904907227, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8514, "eval_samples_per_second": 18.799, "eval_steps_per_second": 3.247, "step": 1770 }, { "epoch": 97.97, "eval_gen_len": 9.6545, "eval_loss": 1.1134684085845947, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8582, "eval_samples_per_second": 18.777, "eval_steps_per_second": 3.243, "step": 1788 }, { "epoch": 98.96, "eval_gen_len": 9.7, "eval_loss": 1.099327564239502, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8567, "eval_samples_per_second": 18.782, "eval_steps_per_second": 3.244, "step": 1806 }, { "epoch": 100.0, "eval_gen_len": 9.7909, "eval_loss": 1.0843485593795776, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8693, "eval_samples_per_second": 18.741, "eval_steps_per_second": 3.237, "step": 1825 }, { "epoch": 100.99, "eval_gen_len": 9.8909, "eval_loss": 1.0678842067718506, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8524, "eval_samples_per_second": 18.796, "eval_steps_per_second": 3.247, "step": 1843 }, { "epoch": 101.97, "eval_gen_len": 9.8636, "eval_loss": 1.0531669855117798, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8548, "eval_samples_per_second": 18.788, "eval_steps_per_second": 3.245, "step": 1861 }, { "epoch": 102.96, "eval_gen_len": 9.8364, "eval_loss": 1.0373491048812866, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8537, "eval_samples_per_second": 18.792, "eval_steps_per_second": 3.246, "step": 1879 }, { "epoch": 104.0, "eval_gen_len": 9.8636, "eval_loss": 1.0185768604278564, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8656, "eval_samples_per_second": 18.754, "eval_steps_per_second": 3.239, "step": 1898 }, { "epoch": 104.99, "eval_gen_len": 9.9091, "eval_loss": 1.0003846883773804, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8601, "eval_samples_per_second": 18.771, "eval_steps_per_second": 3.242, "step": 1916 }, { "epoch": 105.97, "eval_gen_len": 9.9364, "eval_loss": 0.9851242899894714, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8522, "eval_samples_per_second": 18.796, "eval_steps_per_second": 3.247, "step": 1934 }, { "epoch": 106.96, "eval_gen_len": 9.9545, "eval_loss": 0.9700939059257507, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8548, "eval_samples_per_second": 18.788, "eval_steps_per_second": 3.245, "step": 1952 }, { "epoch": 108.0, "eval_gen_len": 9.9182, "eval_loss": 0.9482754468917847, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8554, "eval_samples_per_second": 18.786, "eval_steps_per_second": 3.245, "step": 1971 }, { "epoch": 108.99, "eval_gen_len": 9.9273, "eval_loss": 0.9286762475967407, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8532, "eval_samples_per_second": 18.793, "eval_steps_per_second": 3.246, "step": 1989 }, { "epoch": 109.59, "grad_norm": 1.9006233215332031, "learning_rate": 1.5566666666666668e-05, "loss": 1.4343, "step": 2000 }, { "epoch": 109.97, "eval_gen_len": 9.8364, "eval_loss": 0.906920850276947, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8648, "eval_samples_per_second": 18.756, "eval_steps_per_second": 3.24, "step": 2007 }, { "epoch": 110.96, "eval_gen_len": 9.9273, "eval_loss": 0.8903268575668335, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8436, "eval_samples_per_second": 18.824, "eval_steps_per_second": 3.251, "step": 2025 }, { "epoch": 112.0, "eval_gen_len": 9.8818, "eval_loss": 0.8693720102310181, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8617, "eval_samples_per_second": 18.766, "eval_steps_per_second": 3.241, "step": 2044 }, { "epoch": 112.99, "eval_gen_len": 9.8182, "eval_loss": 0.8487831354141235, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8484, "eval_samples_per_second": 18.808, "eval_steps_per_second": 3.249, "step": 2062 }, { "epoch": 113.97, "eval_gen_len": 9.8, "eval_loss": 0.8259497284889221, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8508, "eval_samples_per_second": 18.801, "eval_steps_per_second": 3.247, "step": 2080 }, { "epoch": 114.96, "eval_gen_len": 9.7818, "eval_loss": 0.8031529784202576, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8573, "eval_samples_per_second": 18.78, "eval_steps_per_second": 3.244, "step": 2098 }, { "epoch": 116.0, "eval_gen_len": 9.7273, "eval_loss": 0.7789543271064758, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8581, "eval_samples_per_second": 18.777, "eval_steps_per_second": 3.243, "step": 2117 }, { "epoch": 116.99, "eval_gen_len": 9.5909, "eval_loss": 0.7579861283302307, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8897, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 2135 }, { "epoch": 117.97, "eval_gen_len": 9.7273, "eval_loss": 0.7319938540458679, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8872, "eval_samples_per_second": 18.684, "eval_steps_per_second": 3.227, "step": 2153 }, { "epoch": 118.96, "eval_gen_len": 9.7455, "eval_loss": 0.710875928401947, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8965, "eval_samples_per_second": 18.655, "eval_steps_per_second": 3.222, "step": 2171 }, { "epoch": 120.0, "eval_gen_len": 9.8182, "eval_loss": 0.6845319271087646, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8634, "eval_samples_per_second": 18.76, "eval_steps_per_second": 3.24, "step": 2190 }, { "epoch": 120.99, "eval_gen_len": 9.8455, "eval_loss": 0.6633948683738708, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8707, "eval_samples_per_second": 18.737, "eval_steps_per_second": 3.236, "step": 2208 }, { "epoch": 121.97, "eval_gen_len": 9.8364, "eval_loss": 0.6423484683036804, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8549, "eval_samples_per_second": 18.788, "eval_steps_per_second": 3.245, "step": 2226 }, { "epoch": 122.96, "eval_gen_len": 9.8273, "eval_loss": 0.6260173916816711, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8887, "eval_samples_per_second": 18.68, "eval_steps_per_second": 3.227, "step": 2244 }, { "epoch": 124.0, "eval_gen_len": 9.7636, "eval_loss": 0.604343056678772, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8959, "eval_samples_per_second": 18.657, "eval_steps_per_second": 3.223, "step": 2263 }, { "epoch": 124.99, "eval_gen_len": 9.7273, "eval_loss": 0.5872541666030884, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8541, "eval_samples_per_second": 18.79, "eval_steps_per_second": 3.246, "step": 2281 }, { "epoch": 125.97, "eval_gen_len": 9.7818, "eval_loss": 0.5708852410316467, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8564, "eval_samples_per_second": 18.783, "eval_steps_per_second": 3.244, "step": 2299 }, { "epoch": 126.96, "eval_gen_len": 9.8364, "eval_loss": 0.5527102947235107, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.854, "eval_samples_per_second": 18.79, "eval_steps_per_second": 3.246, "step": 2317 }, { "epoch": 128.0, "eval_gen_len": 9.7545, "eval_loss": 0.537294864654541, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8561, "eval_samples_per_second": 18.784, "eval_steps_per_second": 3.244, "step": 2336 }, { "epoch": 128.99, "eval_gen_len": 9.5636, "eval_loss": 0.5231500864028931, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8429, "eval_samples_per_second": 18.826, "eval_steps_per_second": 3.252, "step": 2354 }, { "epoch": 129.97, "eval_gen_len": 9.7091, "eval_loss": 0.5122325420379639, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8959, "eval_samples_per_second": 18.657, "eval_steps_per_second": 3.223, "step": 2372 }, { "epoch": 130.96, "eval_gen_len": 9.6, "eval_loss": 0.501021683216095, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8801, "eval_samples_per_second": 18.707, "eval_steps_per_second": 3.231, "step": 2390 }, { "epoch": 132.0, "eval_gen_len": 9.7, "eval_loss": 0.4930221140384674, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8944, "eval_samples_per_second": 18.662, "eval_steps_per_second": 3.223, "step": 2409 }, { "epoch": 132.99, "eval_gen_len": 9.5909, "eval_loss": 0.48477092385292053, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8544, "eval_samples_per_second": 18.789, "eval_steps_per_second": 3.245, "step": 2427 }, { "epoch": 133.97, "eval_gen_len": 9.4818, "eval_loss": 0.47620585560798645, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8504, "eval_samples_per_second": 18.802, "eval_steps_per_second": 3.248, "step": 2445 }, { "epoch": 134.96, "eval_gen_len": 9.4727, "eval_loss": 0.4678414463996887, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8619, "eval_samples_per_second": 18.765, "eval_steps_per_second": 3.241, "step": 2463 }, { "epoch": 136.0, "eval_gen_len": 8.9727, "eval_loss": 0.46089962124824524, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9034, "eval_samples_per_second": 18.633, "eval_steps_per_second": 3.219, "step": 2482 }, { "epoch": 136.99, "grad_norm": 0.9683671593666077, "learning_rate": 1.4455555555555555e-05, "loss": 0.904, "step": 2500 }, { "epoch": 136.99, "eval_gen_len": 8.8182, "eval_loss": 0.45609110593795776, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8499, "eval_samples_per_second": 18.804, "eval_steps_per_second": 3.248, "step": 2500 }, { "epoch": 137.97, "eval_gen_len": 8.8, "eval_loss": 0.448975533246994, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8901, "eval_samples_per_second": 18.676, "eval_steps_per_second": 3.226, "step": 2518 }, { "epoch": 138.96, "eval_gen_len": 8.7091, "eval_loss": 0.44343459606170654, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8897, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 2536 }, { "epoch": 140.0, "eval_gen_len": 8.6818, "eval_loss": 0.4378666877746582, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9063, "eval_samples_per_second": 18.624, "eval_steps_per_second": 3.217, "step": 2555 }, { "epoch": 140.99, "eval_gen_len": 8.1182, "eval_loss": 0.4321661591529846, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8879, "eval_samples_per_second": 18.682, "eval_steps_per_second": 3.227, "step": 2573 }, { "epoch": 141.97, "eval_gen_len": 8.0455, "eval_loss": 0.42785531282424927, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8597, "eval_samples_per_second": 18.772, "eval_steps_per_second": 3.242, "step": 2591 }, { "epoch": 142.96, "eval_gen_len": 7.8909, "eval_loss": 0.42234906554222107, "eval_rouge1": 0.0026, "eval_rouge2": 0.002, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.8591, "eval_samples_per_second": 18.774, "eval_steps_per_second": 3.243, "step": 2609 }, { "epoch": 144.0, "eval_gen_len": 7.5727, "eval_loss": 0.4168229401111603, "eval_rouge1": 0.0026, "eval_rouge2": 0.002, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.862, "eval_samples_per_second": 18.765, "eval_steps_per_second": 3.241, "step": 2628 }, { "epoch": 144.99, "eval_gen_len": 7.4182, "eval_loss": 0.4125988185405731, "eval_rouge1": 0.0026, "eval_rouge2": 0.002, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.855, "eval_samples_per_second": 18.787, "eval_steps_per_second": 3.245, "step": 2646 }, { "epoch": 145.97, "eval_gen_len": 6.8545, "eval_loss": 0.40698733925819397, "eval_rouge1": 0.0026, "eval_rouge2": 0.002, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.8558, "eval_samples_per_second": 18.785, "eval_steps_per_second": 3.245, "step": 2664 }, { "epoch": 146.96, "eval_gen_len": 6.6, "eval_loss": 0.4030015468597412, "eval_rouge1": 0.0026, "eval_rouge2": 0.002, "eval_rougeL": 0.0021, "eval_rougeLsum": 0.0021, "eval_runtime": 5.8557, "eval_samples_per_second": 18.785, "eval_steps_per_second": 3.245, "step": 2682 }, { "epoch": 148.0, "eval_gen_len": 6.5273, "eval_loss": 0.39865967631340027, "eval_rouge1": 0.0044, "eval_rouge2": 0.0035, "eval_rougeL": 0.0039, "eval_rougeLsum": 0.0031, "eval_runtime": 5.863, "eval_samples_per_second": 18.762, "eval_steps_per_second": 3.241, "step": 2701 }, { "epoch": 148.99, "eval_gen_len": 6.5273, "eval_loss": 0.3959006071090698, "eval_rouge1": 0.0041, "eval_rouge2": 0.0035, "eval_rougeL": 0.0041, "eval_rougeLsum": 0.0035, "eval_runtime": 5.857, "eval_samples_per_second": 18.781, "eval_steps_per_second": 3.244, "step": 2719 }, { "epoch": 149.97, "eval_gen_len": 6.3, "eval_loss": 0.3902026414871216, "eval_rouge1": 0.0049, "eval_rouge2": 0.0041, "eval_rougeL": 0.004, "eval_rougeLsum": 0.0033, "eval_runtime": 5.855, "eval_samples_per_second": 18.787, "eval_steps_per_second": 3.245, "step": 2737 }, { "epoch": 150.96, "eval_gen_len": 6.1909, "eval_loss": 0.3883272707462311, "eval_rouge1": 0.0049, "eval_rouge2": 0.0041, "eval_rougeL": 0.004, "eval_rougeLsum": 0.0033, "eval_runtime": 5.853, "eval_samples_per_second": 18.794, "eval_steps_per_second": 3.246, "step": 2755 }, { "epoch": 152.0, "eval_gen_len": 6.1182, "eval_loss": 0.38500654697418213, "eval_rouge1": 0.006, "eval_rouge2": 0.0046, "eval_rougeL": 0.0058, "eval_rougeLsum": 0.0049, "eval_runtime": 5.8626, "eval_samples_per_second": 18.763, "eval_steps_per_second": 3.241, "step": 2774 }, { "epoch": 152.99, "eval_gen_len": 6.3818, "eval_loss": 0.38074272871017456, "eval_rouge1": 0.0065, "eval_rouge2": 0.0043, "eval_rougeL": 0.0058, "eval_rougeLsum": 0.0058, "eval_runtime": 5.8533, "eval_samples_per_second": 18.793, "eval_steps_per_second": 3.246, "step": 2792 }, { "epoch": 153.97, "eval_gen_len": 6.2, "eval_loss": 0.3779795467853546, "eval_rouge1": 0.0045, "eval_rouge2": 0.0022, "eval_rougeL": 0.0039, "eval_rougeLsum": 0.0038, "eval_runtime": 5.8616, "eval_samples_per_second": 18.766, "eval_steps_per_second": 3.241, "step": 2810 }, { "epoch": 154.96, "eval_gen_len": 6.3636, "eval_loss": 0.37300601601600647, "eval_rouge1": 0.0136, "eval_rouge2": 0.0099, "eval_rougeL": 0.0122, "eval_rougeLsum": 0.0117, "eval_runtime": 5.8638, "eval_samples_per_second": 18.759, "eval_steps_per_second": 3.24, "step": 2828 }, { "epoch": 156.0, "eval_gen_len": 6.4909, "eval_loss": 0.36999648809432983, "eval_rouge1": 0.0195, "eval_rouge2": 0.0152, "eval_rougeL": 0.0179, "eval_rougeLsum": 0.0173, "eval_runtime": 5.8779, "eval_samples_per_second": 18.714, "eval_steps_per_second": 3.232, "step": 2847 }, { "epoch": 156.99, "eval_gen_len": 6.5455, "eval_loss": 0.3666529059410095, "eval_rouge1": 0.0193, "eval_rouge2": 0.0135, "eval_rougeL": 0.0174, "eval_rougeLsum": 0.0171, "eval_runtime": 5.8596, "eval_samples_per_second": 18.773, "eval_steps_per_second": 3.243, "step": 2865 }, { "epoch": 157.97, "eval_gen_len": 6.1909, "eval_loss": 0.3615466058254242, "eval_rouge1": 0.019, "eval_rouge2": 0.0133, "eval_rougeL": 0.0169, "eval_rougeLsum": 0.0168, "eval_runtime": 5.8587, "eval_samples_per_second": 18.776, "eval_steps_per_second": 3.243, "step": 2883 }, { "epoch": 158.96, "eval_gen_len": 7.5, "eval_loss": 0.3599731922149658, "eval_rouge1": 0.0283, "eval_rouge2": 0.0195, "eval_rougeL": 0.0269, "eval_rougeLsum": 0.0268, "eval_runtime": 5.8597, "eval_samples_per_second": 18.772, "eval_steps_per_second": 3.242, "step": 2901 }, { "epoch": 160.0, "eval_gen_len": 6.8, "eval_loss": 0.3567432165145874, "eval_rouge1": 0.0241, "eval_rouge2": 0.0168, "eval_rougeL": 0.022, "eval_rougeLsum": 0.0216, "eval_runtime": 5.8734, "eval_samples_per_second": 18.729, "eval_steps_per_second": 3.235, "step": 2920 }, { "epoch": 160.99, "eval_gen_len": 6.6, "eval_loss": 0.35366886854171753, "eval_rouge1": 0.021, "eval_rouge2": 0.0135, "eval_rougeL": 0.0189, "eval_rougeLsum": 0.0184, "eval_runtime": 5.8665, "eval_samples_per_second": 18.751, "eval_steps_per_second": 3.239, "step": 2938 }, { "epoch": 161.97, "eval_gen_len": 8.6818, "eval_loss": 0.35154151916503906, "eval_rouge1": 0.0438, "eval_rouge2": 0.0277, "eval_rougeL": 0.0401, "eval_rougeLsum": 0.0401, "eval_runtime": 5.8694, "eval_samples_per_second": 18.741, "eval_steps_per_second": 3.237, "step": 2956 }, { "epoch": 162.96, "eval_gen_len": 8.2636, "eval_loss": 0.3467026948928833, "eval_rouge1": 0.0374, "eval_rouge2": 0.023, "eval_rougeL": 0.0346, "eval_rougeLsum": 0.0341, "eval_runtime": 5.8684, "eval_samples_per_second": 18.745, "eval_steps_per_second": 3.238, "step": 2974 }, { "epoch": 164.0, "eval_gen_len": 7.4636, "eval_loss": 0.343766987323761, "eval_rouge1": 0.0284, "eval_rouge2": 0.0184, "eval_rougeL": 0.0267, "eval_rougeLsum": 0.027, "eval_runtime": 5.876, "eval_samples_per_second": 18.72, "eval_steps_per_second": 3.233, "step": 2993 }, { "epoch": 164.38, "grad_norm": 0.7337635159492493, "learning_rate": 1.3346666666666667e-05, "loss": 0.5395, "step": 3000 }, { "epoch": 164.99, "eval_gen_len": 8.9909, "eval_loss": 0.3419288694858551, "eval_rouge1": 0.0445, "eval_rouge2": 0.0276, "eval_rougeL": 0.0414, "eval_rougeLsum": 0.0408, "eval_runtime": 5.8707, "eval_samples_per_second": 18.737, "eval_steps_per_second": 3.236, "step": 3011 }, { "epoch": 165.97, "eval_gen_len": 8.8909, "eval_loss": 0.33906012773513794, "eval_rouge1": 0.0446, "eval_rouge2": 0.0294, "eval_rougeL": 0.0415, "eval_rougeLsum": 0.0412, "eval_runtime": 5.8733, "eval_samples_per_second": 18.729, "eval_steps_per_second": 3.235, "step": 3029 }, { "epoch": 166.96, "eval_gen_len": 9.5455, "eval_loss": 0.3354407548904419, "eval_rouge1": 0.0498, "eval_rouge2": 0.032, "eval_rougeL": 0.0452, "eval_rougeLsum": 0.0453, "eval_runtime": 5.8714, "eval_samples_per_second": 18.735, "eval_steps_per_second": 3.236, "step": 3047 }, { "epoch": 168.0, "eval_gen_len": 10.0818, "eval_loss": 0.33422771096229553, "eval_rouge1": 0.0579, "eval_rouge2": 0.0388, "eval_rougeL": 0.0549, "eval_rougeLsum": 0.0553, "eval_runtime": 5.8768, "eval_samples_per_second": 18.718, "eval_steps_per_second": 3.233, "step": 3066 }, { "epoch": 168.99, "eval_gen_len": 10.2, "eval_loss": 0.331695020198822, "eval_rouge1": 0.0542, "eval_rouge2": 0.0353, "eval_rougeL": 0.0508, "eval_rougeLsum": 0.051, "eval_runtime": 5.8711, "eval_samples_per_second": 18.736, "eval_steps_per_second": 3.236, "step": 3084 }, { "epoch": 169.97, "eval_gen_len": 10.1364, "eval_loss": 0.3284001052379608, "eval_rouge1": 0.0555, "eval_rouge2": 0.0363, "eval_rougeL": 0.0523, "eval_rougeLsum": 0.0521, "eval_runtime": 5.8689, "eval_samples_per_second": 18.743, "eval_steps_per_second": 3.237, "step": 3102 }, { "epoch": 170.96, "eval_gen_len": 10.6727, "eval_loss": 0.32654786109924316, "eval_rouge1": 0.0562, "eval_rouge2": 0.0353, "eval_rougeL": 0.0519, "eval_rougeLsum": 0.0521, "eval_runtime": 5.8762, "eval_samples_per_second": 18.72, "eval_steps_per_second": 3.233, "step": 3120 }, { "epoch": 172.0, "eval_gen_len": 10.7818, "eval_loss": 0.3229809105396271, "eval_rouge1": 0.0551, "eval_rouge2": 0.0346, "eval_rougeL": 0.0523, "eval_rougeLsum": 0.0525, "eval_runtime": 5.8825, "eval_samples_per_second": 18.699, "eval_steps_per_second": 3.23, "step": 3139 }, { "epoch": 172.99, "eval_gen_len": 11.3727, "eval_loss": 0.3223954439163208, "eval_rouge1": 0.0614, "eval_rouge2": 0.0388, "eval_rougeL": 0.0579, "eval_rougeLsum": 0.0585, "eval_runtime": 5.8822, "eval_samples_per_second": 18.701, "eval_steps_per_second": 3.23, "step": 3157 }, { "epoch": 173.97, "eval_gen_len": 11.2909, "eval_loss": 0.31792977452278137, "eval_rouge1": 0.0584, "eval_rouge2": 0.0328, "eval_rougeL": 0.055, "eval_rougeLsum": 0.0553, "eval_runtime": 5.8754, "eval_samples_per_second": 18.722, "eval_steps_per_second": 3.234, "step": 3175 }, { "epoch": 174.96, "eval_gen_len": 12.2455, "eval_loss": 0.31657084822654724, "eval_rouge1": 0.0649, "eval_rouge2": 0.0392, "eval_rougeL": 0.0615, "eval_rougeLsum": 0.0619, "eval_runtime": 5.9198, "eval_samples_per_second": 18.582, "eval_steps_per_second": 3.21, "step": 3193 }, { "epoch": 176.0, "eval_gen_len": 11.7545, "eval_loss": 0.3131771981716156, "eval_rouge1": 0.0605, "eval_rouge2": 0.0341, "eval_rougeL": 0.0568, "eval_rougeLsum": 0.0571, "eval_runtime": 5.8941, "eval_samples_per_second": 18.663, "eval_steps_per_second": 3.224, "step": 3212 }, { "epoch": 176.99, "eval_gen_len": 12.2364, "eval_loss": 0.3109656870365143, "eval_rouge1": 0.0639, "eval_rouge2": 0.0396, "eval_rougeL": 0.0614, "eval_rougeLsum": 0.0619, "eval_runtime": 5.8859, "eval_samples_per_second": 18.689, "eval_steps_per_second": 3.228, "step": 3230 }, { "epoch": 177.97, "eval_gen_len": 11.9273, "eval_loss": 0.3089582324028015, "eval_rouge1": 0.0664, "eval_rouge2": 0.04, "eval_rougeL": 0.0619, "eval_rougeLsum": 0.0626, "eval_runtime": 5.873, "eval_samples_per_second": 18.73, "eval_steps_per_second": 3.235, "step": 3248 }, { "epoch": 178.96, "eval_gen_len": 12.3364, "eval_loss": 0.30663853883743286, "eval_rouge1": 0.0721, "eval_rouge2": 0.0455, "eval_rougeL": 0.0685, "eval_rougeLsum": 0.0688, "eval_runtime": 5.8871, "eval_samples_per_second": 18.685, "eval_steps_per_second": 3.227, "step": 3266 }, { "epoch": 180.0, "eval_gen_len": 12.6, "eval_loss": 0.3049904704093933, "eval_rouge1": 0.0727, "eval_rouge2": 0.0447, "eval_rougeL": 0.0691, "eval_rougeLsum": 0.0693, "eval_runtime": 5.8952, "eval_samples_per_second": 18.659, "eval_steps_per_second": 3.223, "step": 3285 }, { "epoch": 180.99, "eval_gen_len": 12.2636, "eval_loss": 0.30238986015319824, "eval_rouge1": 0.0716, "eval_rouge2": 0.0436, "eval_rougeL": 0.0684, "eval_rougeLsum": 0.0688, "eval_runtime": 5.8902, "eval_samples_per_second": 18.675, "eval_steps_per_second": 3.226, "step": 3303 }, { "epoch": 181.97, "eval_gen_len": 15.2, "eval_loss": 0.29928120970726013, "eval_rouge1": 0.0901, "eval_rouge2": 0.0567, "eval_rougeL": 0.0848, "eval_rougeLsum": 0.0851, "eval_runtime": 5.9062, "eval_samples_per_second": 18.624, "eval_steps_per_second": 3.217, "step": 3321 }, { "epoch": 182.96, "eval_gen_len": 13.8182, "eval_loss": 0.2973878085613251, "eval_rouge1": 0.0874, "eval_rouge2": 0.0583, "eval_rougeL": 0.084, "eval_rougeLsum": 0.0838, "eval_runtime": 5.892, "eval_samples_per_second": 18.67, "eval_steps_per_second": 3.225, "step": 3339 }, { "epoch": 184.0, "eval_gen_len": 14.8091, "eval_loss": 0.29529863595962524, "eval_rouge1": 0.0924, "eval_rouge2": 0.0616, "eval_rougeL": 0.0892, "eval_rougeLsum": 0.0893, "eval_runtime": 5.9216, "eval_samples_per_second": 18.576, "eval_steps_per_second": 3.209, "step": 3358 }, { "epoch": 184.99, "eval_gen_len": 16.4, "eval_loss": 0.29348161816596985, "eval_rouge1": 0.1029, "eval_rouge2": 0.0663, "eval_rougeL": 0.0987, "eval_rougeLsum": 0.0984, "eval_runtime": 5.8929, "eval_samples_per_second": 18.666, "eval_steps_per_second": 3.224, "step": 3376 }, { "epoch": 185.97, "eval_gen_len": 15.0364, "eval_loss": 0.2902657389640808, "eval_rouge1": 0.0923, "eval_rouge2": 0.0603, "eval_rougeL": 0.089, "eval_rougeLsum": 0.0887, "eval_runtime": 5.8988, "eval_samples_per_second": 18.648, "eval_steps_per_second": 3.221, "step": 3394 }, { "epoch": 186.96, "eval_gen_len": 16.7, "eval_loss": 0.2885950207710266, "eval_rouge1": 0.1027, "eval_rouge2": 0.0661, "eval_rougeL": 0.0985, "eval_rougeLsum": 0.099, "eval_runtime": 5.9087, "eval_samples_per_second": 18.617, "eval_steps_per_second": 3.216, "step": 3412 }, { "epoch": 188.0, "eval_gen_len": 15.0455, "eval_loss": 0.2858668267726898, "eval_rouge1": 0.0998, "eval_rouge2": 0.0686, "eval_rougeL": 0.0979, "eval_rougeLsum": 0.0973, "eval_runtime": 5.9128, "eval_samples_per_second": 18.604, "eval_steps_per_second": 3.213, "step": 3431 }, { "epoch": 188.99, "eval_gen_len": 16.7, "eval_loss": 0.28371575474739075, "eval_rouge1": 0.1081, "eval_rouge2": 0.0733, "eval_rougeL": 0.105, "eval_rougeLsum": 0.1051, "eval_runtime": 5.9179, "eval_samples_per_second": 18.588, "eval_steps_per_second": 3.211, "step": 3449 }, { "epoch": 189.97, "eval_gen_len": 16.9364, "eval_loss": 0.28239956498146057, "eval_rouge1": 0.1176, "eval_rouge2": 0.0809, "eval_rougeL": 0.1142, "eval_rougeLsum": 0.1136, "eval_runtime": 5.9392, "eval_samples_per_second": 18.521, "eval_steps_per_second": 3.199, "step": 3467 }, { "epoch": 190.96, "eval_gen_len": 17.1545, "eval_loss": 0.28112414479255676, "eval_rouge1": 0.1281, "eval_rouge2": 0.0923, "eval_rougeL": 0.1251, "eval_rougeLsum": 0.1247, "eval_runtime": 5.9439, "eval_samples_per_second": 18.506, "eval_steps_per_second": 3.197, "step": 3485 }, { "epoch": 191.78, "grad_norm": 0.6559975743293762, "learning_rate": 1.2235555555555556e-05, "loss": 0.4165, "step": 3500 }, { "epoch": 192.0, "eval_gen_len": 16.9909, "eval_loss": 0.2794179916381836, "eval_rouge1": 0.1326, "eval_rouge2": 0.0983, "eval_rougeL": 0.1308, "eval_rougeLsum": 0.1303, "eval_runtime": 5.9583, "eval_samples_per_second": 18.462, "eval_steps_per_second": 3.189, "step": 3504 }, { "epoch": 192.99, "eval_gen_len": 17.2818, "eval_loss": 0.2769763767719269, "eval_rouge1": 0.1377, "eval_rouge2": 0.1021, "eval_rougeL": 0.1352, "eval_rougeLsum": 0.1352, "eval_runtime": 5.9436, "eval_samples_per_second": 18.507, "eval_steps_per_second": 3.197, "step": 3522 }, { "epoch": 193.97, "eval_gen_len": 16.6727, "eval_loss": 0.27478569746017456, "eval_rouge1": 0.1421, "eval_rouge2": 0.1072, "eval_rougeL": 0.1391, "eval_rougeLsum": 0.1389, "eval_runtime": 5.9492, "eval_samples_per_second": 18.49, "eval_steps_per_second": 3.194, "step": 3540 }, { "epoch": 194.96, "eval_gen_len": 17.6273, "eval_loss": 0.273343026638031, "eval_rouge1": 0.1536, "eval_rouge2": 0.1166, "eval_rougeL": 0.1499, "eval_rougeLsum": 0.15, "eval_runtime": 5.9258, "eval_samples_per_second": 18.563, "eval_steps_per_second": 3.206, "step": 3558 }, { "epoch": 196.0, "eval_gen_len": 17.6182, "eval_loss": 0.27078884840011597, "eval_rouge1": 0.1575, "eval_rouge2": 0.1196, "eval_rougeL": 0.1531, "eval_rougeLsum": 0.1529, "eval_runtime": 5.9196, "eval_samples_per_second": 18.582, "eval_steps_per_second": 3.21, "step": 3577 }, { "epoch": 196.99, "eval_gen_len": 18.1727, "eval_loss": 0.2679530382156372, "eval_rouge1": 0.1544, "eval_rouge2": 0.1152, "eval_rougeL": 0.1492, "eval_rougeLsum": 0.1492, "eval_runtime": 5.9115, "eval_samples_per_second": 18.608, "eval_steps_per_second": 3.214, "step": 3595 }, { "epoch": 197.97, "eval_gen_len": 17.6636, "eval_loss": 0.26689696311950684, "eval_rouge1": 0.1615, "eval_rouge2": 0.1264, "eval_rougeL": 0.1579, "eval_rougeLsum": 0.1577, "eval_runtime": 5.911, "eval_samples_per_second": 18.609, "eval_steps_per_second": 3.214, "step": 3613 }, { "epoch": 198.96, "eval_gen_len": 18.3182, "eval_loss": 0.26582667231559753, "eval_rouge1": 0.1687, "eval_rouge2": 0.1322, "eval_rougeL": 0.165, "eval_rougeLsum": 0.1646, "eval_runtime": 5.9133, "eval_samples_per_second": 18.602, "eval_steps_per_second": 3.213, "step": 3631 }, { "epoch": 200.0, "eval_gen_len": 18.3091, "eval_loss": 0.262999951839447, "eval_rouge1": 0.1753, "eval_rouge2": 0.1408, "eval_rougeL": 0.1726, "eval_rougeLsum": 0.1721, "eval_runtime": 5.9621, "eval_samples_per_second": 18.45, "eval_steps_per_second": 3.187, "step": 3650 }, { "epoch": 200.99, "eval_gen_len": 18.6182, "eval_loss": 0.2615664601325989, "eval_rouge1": 0.1803, "eval_rouge2": 0.1452, "eval_rougeL": 0.1776, "eval_rougeLsum": 0.1767, "eval_runtime": 5.9566, "eval_samples_per_second": 18.467, "eval_steps_per_second": 3.19, "step": 3668 }, { "epoch": 201.97, "eval_gen_len": 18.4818, "eval_loss": 0.26064223051071167, "eval_rouge1": 0.1883, "eval_rouge2": 0.1542, "eval_rougeL": 0.1857, "eval_rougeLsum": 0.1854, "eval_runtime": 5.9305, "eval_samples_per_second": 18.548, "eval_steps_per_second": 3.204, "step": 3686 }, { "epoch": 202.96, "eval_gen_len": 18.6364, "eval_loss": 0.2593740224838257, "eval_rouge1": 0.1822, "eval_rouge2": 0.1461, "eval_rougeL": 0.179, "eval_rougeLsum": 0.1784, "eval_runtime": 5.9634, "eval_samples_per_second": 18.446, "eval_steps_per_second": 3.186, "step": 3704 }, { "epoch": 204.0, "eval_gen_len": 18.5909, "eval_loss": 0.2574484348297119, "eval_rouge1": 0.1785, "eval_rouge2": 0.1424, "eval_rougeL": 0.1754, "eval_rougeLsum": 0.1741, "eval_runtime": 5.9326, "eval_samples_per_second": 18.542, "eval_steps_per_second": 3.203, "step": 3723 }, { "epoch": 204.99, "eval_gen_len": 18.5909, "eval_loss": 0.255962997674942, "eval_rouge1": 0.182, "eval_rouge2": 0.1475, "eval_rougeL": 0.1799, "eval_rougeLsum": 0.1789, "eval_runtime": 5.9645, "eval_samples_per_second": 18.442, "eval_steps_per_second": 3.186, "step": 3741 }, { "epoch": 205.97, "eval_gen_len": 18.3818, "eval_loss": 0.25388580560684204, "eval_rouge1": 0.1899, "eval_rouge2": 0.1557, "eval_rougeL": 0.1862, "eval_rougeLsum": 0.1861, "eval_runtime": 5.9231, "eval_samples_per_second": 18.571, "eval_steps_per_second": 3.208, "step": 3759 }, { "epoch": 206.96, "eval_gen_len": 18.3727, "eval_loss": 0.251926988363266, "eval_rouge1": 0.1962, "eval_rouge2": 0.1635, "eval_rougeL": 0.1935, "eval_rougeLsum": 0.1929, "eval_runtime": 5.9351, "eval_samples_per_second": 18.534, "eval_steps_per_second": 3.201, "step": 3777 }, { "epoch": 208.0, "eval_gen_len": 18.5818, "eval_loss": 0.25200676918029785, "eval_rouge1": 0.2002, "eval_rouge2": 0.1666, "eval_rougeL": 0.197, "eval_rougeLsum": 0.1964, "eval_runtime": 5.9349, "eval_samples_per_second": 18.535, "eval_steps_per_second": 3.201, "step": 3796 }, { "epoch": 208.99, "eval_gen_len": 18.4182, "eval_loss": 0.24954193830490112, "eval_rouge1": 0.2016, "eval_rouge2": 0.1708, "eval_rougeL": 0.1997, "eval_rougeLsum": 0.1994, "eval_runtime": 5.9236, "eval_samples_per_second": 18.57, "eval_steps_per_second": 3.208, "step": 3814 }, { "epoch": 209.97, "eval_gen_len": 18.5182, "eval_loss": 0.2487880438566208, "eval_rouge1": 0.2029, "eval_rouge2": 0.172, "eval_rougeL": 0.2008, "eval_rougeLsum": 0.2005, "eval_runtime": 5.9666, "eval_samples_per_second": 18.436, "eval_steps_per_second": 3.184, "step": 3832 }, { "epoch": 210.96, "eval_gen_len": 18.7455, "eval_loss": 0.246944397687912, "eval_rouge1": 0.2126, "eval_rouge2": 0.183, "eval_rougeL": 0.2107, "eval_rougeLsum": 0.2102, "eval_runtime": 5.9334, "eval_samples_per_second": 18.539, "eval_steps_per_second": 3.202, "step": 3850 }, { "epoch": 212.0, "eval_gen_len": 18.7455, "eval_loss": 0.24503479897975922, "eval_rouge1": 0.2153, "eval_rouge2": 0.1832, "eval_rougeL": 0.213, "eval_rougeLsum": 0.2126, "eval_runtime": 5.9384, "eval_samples_per_second": 18.524, "eval_steps_per_second": 3.2, "step": 3869 }, { "epoch": 212.99, "eval_gen_len": 18.8, "eval_loss": 0.24539507925510406, "eval_rouge1": 0.2199, "eval_rouge2": 0.1891, "eval_rougeL": 0.2176, "eval_rougeLsum": 0.2173, "eval_runtime": 5.9326, "eval_samples_per_second": 18.542, "eval_steps_per_second": 3.203, "step": 3887 }, { "epoch": 213.97, "eval_gen_len": 18.7455, "eval_loss": 0.24339380860328674, "eval_rouge1": 0.2253, "eval_rouge2": 0.1972, "eval_rougeL": 0.2239, "eval_rougeLsum": 0.2238, "eval_runtime": 5.9368, "eval_samples_per_second": 18.529, "eval_steps_per_second": 3.2, "step": 3905 }, { "epoch": 214.96, "eval_gen_len": 18.8, "eval_loss": 0.24222548305988312, "eval_rouge1": 0.2245, "eval_rouge2": 0.1953, "eval_rougeL": 0.2234, "eval_rougeLsum": 0.2227, "eval_runtime": 5.9303, "eval_samples_per_second": 18.549, "eval_steps_per_second": 3.204, "step": 3923 }, { "epoch": 216.0, "eval_gen_len": 18.8, "eval_loss": 0.24038065969944, "eval_rouge1": 0.2269, "eval_rouge2": 0.1974, "eval_rougeL": 0.2255, "eval_rougeLsum": 0.2251, "eval_runtime": 5.9423, "eval_samples_per_second": 18.511, "eval_steps_per_second": 3.197, "step": 3942 }, { "epoch": 216.99, "eval_gen_len": 18.8, "eval_loss": 0.24085824191570282, "eval_rouge1": 0.2324, "eval_rouge2": 0.203, "eval_rougeL": 0.2305, "eval_rougeLsum": 0.2302, "eval_runtime": 5.9374, "eval_samples_per_second": 18.527, "eval_steps_per_second": 3.2, "step": 3960 }, { "epoch": 217.97, "eval_gen_len": 18.8, "eval_loss": 0.2390824556350708, "eval_rouge1": 0.2328, "eval_rouge2": 0.204, "eval_rougeL": 0.2309, "eval_rougeLsum": 0.2307, "eval_runtime": 5.9716, "eval_samples_per_second": 18.421, "eval_steps_per_second": 3.182, "step": 3978 }, { "epoch": 218.96, "eval_gen_len": 18.9636, "eval_loss": 0.23841167986392975, "eval_rouge1": 0.2394, "eval_rouge2": 0.2109, "eval_rougeL": 0.2381, "eval_rougeLsum": 0.238, "eval_runtime": 5.9528, "eval_samples_per_second": 18.479, "eval_steps_per_second": 3.192, "step": 3996 }, { "epoch": 219.18, "grad_norm": 1.037503957748413, "learning_rate": 1.1124444444444444e-05, "loss": 0.3439, "step": 4000 }, { "epoch": 220.0, "eval_gen_len": 18.9636, "eval_loss": 0.2358517199754715, "eval_rouge1": 0.2413, "eval_rouge2": 0.2128, "eval_rougeL": 0.2404, "eval_rougeLsum": 0.2403, "eval_runtime": 5.9869, "eval_samples_per_second": 18.373, "eval_steps_per_second": 3.174, "step": 4015 }, { "epoch": 220.99, "eval_gen_len": 18.9636, "eval_loss": 0.2353217452764511, "eval_rouge1": 0.2411, "eval_rouge2": 0.2122, "eval_rougeL": 0.2401, "eval_rougeLsum": 0.2398, "eval_runtime": 5.9744, "eval_samples_per_second": 18.412, "eval_steps_per_second": 3.18, "step": 4033 }, { "epoch": 221.97, "eval_gen_len": 18.9636, "eval_loss": 0.23452366888523102, "eval_rouge1": 0.2423, "eval_rouge2": 0.2131, "eval_rougeL": 0.2414, "eval_rougeLsum": 0.2409, "eval_runtime": 5.9345, "eval_samples_per_second": 18.536, "eval_steps_per_second": 3.202, "step": 4051 }, { "epoch": 222.96, "eval_gen_len": 18.9636, "eval_loss": 0.23285672068595886, "eval_rouge1": 0.2417, "eval_rouge2": 0.2133, "eval_rougeL": 0.2407, "eval_rougeLsum": 0.2404, "eval_runtime": 5.9399, "eval_samples_per_second": 18.519, "eval_steps_per_second": 3.199, "step": 4069 }, { "epoch": 224.0, "eval_gen_len": 18.9636, "eval_loss": 0.23224669694900513, "eval_rouge1": 0.2439, "eval_rouge2": 0.2166, "eval_rougeL": 0.2434, "eval_rougeLsum": 0.2431, "eval_runtime": 5.9565, "eval_samples_per_second": 18.467, "eval_steps_per_second": 3.19, "step": 4088 }, { "epoch": 224.99, "eval_gen_len": 18.9636, "eval_loss": 0.231593519449234, "eval_rouge1": 0.2416, "eval_rouge2": 0.2138, "eval_rougeL": 0.2413, "eval_rougeLsum": 0.2409, "eval_runtime": 5.9359, "eval_samples_per_second": 18.531, "eval_steps_per_second": 3.201, "step": 4106 }, { "epoch": 225.97, "eval_gen_len": 18.9636, "eval_loss": 0.23053088784217834, "eval_rouge1": 0.2423, "eval_rouge2": 0.2147, "eval_rougeL": 0.242, "eval_rougeLsum": 0.2414, "eval_runtime": 5.9359, "eval_samples_per_second": 18.531, "eval_steps_per_second": 3.201, "step": 4124 }, { "epoch": 226.96, "eval_gen_len": 18.9636, "eval_loss": 0.22999462485313416, "eval_rouge1": 0.243, "eval_rouge2": 0.215, "eval_rougeL": 0.2427, "eval_rougeLsum": 0.242, "eval_runtime": 5.939, "eval_samples_per_second": 18.522, "eval_steps_per_second": 3.199, "step": 4142 }, { "epoch": 228.0, "eval_gen_len": 18.9182, "eval_loss": 0.22881445288658142, "eval_rouge1": 0.2472, "eval_rouge2": 0.2201, "eval_rougeL": 0.2469, "eval_rougeLsum": 0.2466, "eval_runtime": 5.9542, "eval_samples_per_second": 18.474, "eval_steps_per_second": 3.191, "step": 4161 }, { "epoch": 228.99, "eval_gen_len": 18.9636, "eval_loss": 0.2282283455133438, "eval_rouge1": 0.247, "eval_rouge2": 0.2195, "eval_rougeL": 0.2468, "eval_rougeLsum": 0.2464, "eval_runtime": 5.9427, "eval_samples_per_second": 18.51, "eval_steps_per_second": 3.197, "step": 4179 }, { "epoch": 229.97, "eval_gen_len": 18.9182, "eval_loss": 0.22686214745044708, "eval_rouge1": 0.2437, "eval_rouge2": 0.2163, "eval_rougeL": 0.2436, "eval_rougeLsum": 0.2434, "eval_runtime": 5.9383, "eval_samples_per_second": 18.524, "eval_steps_per_second": 3.2, "step": 4197 }, { "epoch": 230.96, "eval_gen_len": 18.9636, "eval_loss": 0.22540777921676636, "eval_rouge1": 0.2485, "eval_rouge2": 0.2218, "eval_rougeL": 0.2484, "eval_rougeLsum": 0.2478, "eval_runtime": 5.9379, "eval_samples_per_second": 18.525, "eval_steps_per_second": 3.2, "step": 4215 }, { "epoch": 232.0, "eval_gen_len": 18.7455, "eval_loss": 0.22596728801727295, "eval_rouge1": 0.2458, "eval_rouge2": 0.2194, "eval_rougeL": 0.2456, "eval_rougeLsum": 0.2455, "eval_runtime": 5.9457, "eval_samples_per_second": 18.501, "eval_steps_per_second": 3.196, "step": 4234 }, { "epoch": 232.99, "eval_gen_len": 18.6182, "eval_loss": 0.22396238148212433, "eval_rouge1": 0.2482, "eval_rouge2": 0.2227, "eval_rougeL": 0.2481, "eval_rougeLsum": 0.248, "eval_runtime": 5.9411, "eval_samples_per_second": 18.515, "eval_steps_per_second": 3.198, "step": 4252 }, { "epoch": 233.97, "eval_gen_len": 18.6182, "eval_loss": 0.22270821034908295, "eval_rouge1": 0.2442, "eval_rouge2": 0.2178, "eval_rougeL": 0.2438, "eval_rougeLsum": 0.2435, "eval_runtime": 5.9443, "eval_samples_per_second": 18.505, "eval_steps_per_second": 3.196, "step": 4270 }, { "epoch": 234.96, "eval_gen_len": 18.6182, "eval_loss": 0.222365602850914, "eval_rouge1": 0.2491, "eval_rouge2": 0.2241, "eval_rougeL": 0.2487, "eval_rougeLsum": 0.2488, "eval_runtime": 5.9399, "eval_samples_per_second": 18.519, "eval_steps_per_second": 3.199, "step": 4288 }, { "epoch": 236.0, "eval_gen_len": 18.6182, "eval_loss": 0.22221778333187103, "eval_rouge1": 0.2486, "eval_rouge2": 0.2233, "eval_rougeL": 0.2484, "eval_rougeLsum": 0.2484, "eval_runtime": 5.9418, "eval_samples_per_second": 18.513, "eval_steps_per_second": 3.198, "step": 4307 }, { "epoch": 236.99, "eval_gen_len": 18.2727, "eval_loss": 0.2206820845603943, "eval_rouge1": 0.2443, "eval_rouge2": 0.219, "eval_rougeL": 0.2434, "eval_rougeLsum": 0.2434, "eval_runtime": 5.9282, "eval_samples_per_second": 18.555, "eval_steps_per_second": 3.205, "step": 4325 }, { "epoch": 237.97, "eval_gen_len": 17.4091, "eval_loss": 0.22046072781085968, "eval_rouge1": 0.2327, "eval_rouge2": 0.2091, "eval_rougeL": 0.2321, "eval_rougeLsum": 0.2325, "eval_runtime": 5.9336, "eval_samples_per_second": 18.539, "eval_steps_per_second": 3.202, "step": 4343 }, { "epoch": 238.96, "eval_gen_len": 15.1636, "eval_loss": 0.21863390505313873, "eval_rouge1": 0.1989, "eval_rouge2": 0.1774, "eval_rougeL": 0.1978, "eval_rougeLsum": 0.1979, "eval_runtime": 5.9311, "eval_samples_per_second": 18.546, "eval_steps_per_second": 3.203, "step": 4361 }, { "epoch": 240.0, "eval_gen_len": 16.1909, "eval_loss": 0.2192383110523224, "eval_rouge1": 0.2148, "eval_rouge2": 0.1923, "eval_rougeL": 0.2137, "eval_rougeLsum": 0.2144, "eval_runtime": 5.9365, "eval_samples_per_second": 18.529, "eval_steps_per_second": 3.201, "step": 4380 }, { "epoch": 240.99, "eval_gen_len": 13.0909, "eval_loss": 0.2176760584115982, "eval_rouge1": 0.1742, "eval_rouge2": 0.1548, "eval_rougeL": 0.1725, "eval_rougeLsum": 0.1723, "eval_runtime": 5.9161, "eval_samples_per_second": 18.593, "eval_steps_per_second": 3.212, "step": 4398 }, { "epoch": 241.97, "eval_gen_len": 11.7091, "eval_loss": 0.21764642000198364, "eval_rouge1": 0.1541, "eval_rouge2": 0.1376, "eval_rougeL": 0.1529, "eval_rougeLsum": 0.1537, "eval_runtime": 5.9162, "eval_samples_per_second": 18.593, "eval_steps_per_second": 3.212, "step": 4416 }, { "epoch": 242.96, "eval_gen_len": 12.9, "eval_loss": 0.21736116707324982, "eval_rouge1": 0.1671, "eval_rouge2": 0.1495, "eval_rougeL": 0.1661, "eval_rougeLsum": 0.1671, "eval_runtime": 5.9221, "eval_samples_per_second": 18.574, "eval_steps_per_second": 3.208, "step": 4434 }, { "epoch": 244.0, "eval_gen_len": 10.4818, "eval_loss": 0.21571263670921326, "eval_rouge1": 0.1364, "eval_rouge2": 0.1201, "eval_rougeL": 0.1354, "eval_rougeLsum": 0.135, "eval_runtime": 5.9422, "eval_samples_per_second": 18.512, "eval_steps_per_second": 3.197, "step": 4453 }, { "epoch": 244.99, "eval_gen_len": 8.9273, "eval_loss": 0.21506664156913757, "eval_rouge1": 0.1149, "eval_rouge2": 0.101, "eval_rougeL": 0.1133, "eval_rougeLsum": 0.1136, "eval_runtime": 5.9071, "eval_samples_per_second": 18.622, "eval_steps_per_second": 3.216, "step": 4471 }, { "epoch": 245.97, "eval_gen_len": 7.7182, "eval_loss": 0.21382498741149902, "eval_rouge1": 0.0989, "eval_rouge2": 0.0866, "eval_rougeL": 0.0976, "eval_rougeLsum": 0.0981, "eval_runtime": 5.9086, "eval_samples_per_second": 18.617, "eval_steps_per_second": 3.216, "step": 4489 }, { "epoch": 246.58, "grad_norm": 0.4568144977092743, "learning_rate": 1.0013333333333335e-05, "loss": 0.2977, "step": 4500 }, { "epoch": 246.96, "eval_gen_len": 7.3727, "eval_loss": 0.21434056758880615, "eval_rouge1": 0.0942, "eval_rouge2": 0.0823, "eval_rougeL": 0.093, "eval_rougeLsum": 0.0937, "eval_runtime": 5.9102, "eval_samples_per_second": 18.612, "eval_steps_per_second": 3.215, "step": 4507 }, { "epoch": 248.0, "eval_gen_len": 6.8636, "eval_loss": 0.2125895619392395, "eval_rouge1": 0.0884, "eval_rouge2": 0.0777, "eval_rougeL": 0.0876, "eval_rougeLsum": 0.0884, "eval_runtime": 5.9078, "eval_samples_per_second": 18.619, "eval_steps_per_second": 3.216, "step": 4526 }, { "epoch": 248.99, "eval_gen_len": 6.6909, "eval_loss": 0.21264444291591644, "eval_rouge1": 0.0856, "eval_rouge2": 0.0753, "eval_rougeL": 0.0851, "eval_rougeLsum": 0.0853, "eval_runtime": 5.9128, "eval_samples_per_second": 18.604, "eval_steps_per_second": 3.213, "step": 4544 }, { "epoch": 249.97, "eval_gen_len": 6.8636, "eval_loss": 0.2110782116651535, "eval_rouge1": 0.0871, "eval_rouge2": 0.0764, "eval_rougeL": 0.0865, "eval_rougeLsum": 0.0866, "eval_runtime": 5.911, "eval_samples_per_second": 18.609, "eval_steps_per_second": 3.214, "step": 4562 }, { "epoch": 250.96, "eval_gen_len": 6.3455, "eval_loss": 0.2114747315645218, "eval_rouge1": 0.0813, "eval_rouge2": 0.071, "eval_rougeL": 0.0805, "eval_rougeLsum": 0.0808, "eval_runtime": 5.9048, "eval_samples_per_second": 18.629, "eval_steps_per_second": 3.218, "step": 4580 }, { "epoch": 252.0, "eval_gen_len": 5.1364, "eval_loss": 0.21088837087154388, "eval_rouge1": 0.0658, "eval_rouge2": 0.0587, "eval_rougeL": 0.0647, "eval_rougeLsum": 0.0656, "eval_runtime": 5.9122, "eval_samples_per_second": 18.606, "eval_steps_per_second": 3.214, "step": 4599 }, { "epoch": 252.99, "eval_gen_len": 4.1, "eval_loss": 0.21026724576950073, "eval_rouge1": 0.0525, "eval_rouge2": 0.0474, "eval_rougeL": 0.0523, "eval_rougeLsum": 0.0531, "eval_runtime": 5.8924, "eval_samples_per_second": 18.668, "eval_steps_per_second": 3.225, "step": 4617 }, { "epoch": 253.97, "eval_gen_len": 3.4091, "eval_loss": 0.20923300087451935, "eval_rouge1": 0.0422, "eval_rouge2": 0.0371, "eval_rougeL": 0.0417, "eval_rougeLsum": 0.0427, "eval_runtime": 5.9069, "eval_samples_per_second": 18.622, "eval_steps_per_second": 3.217, "step": 4635 }, { "epoch": 254.96, "eval_gen_len": 3.4091, "eval_loss": 0.20883551239967346, "eval_rouge1": 0.0425, "eval_rouge2": 0.0373, "eval_rougeL": 0.0421, "eval_rougeLsum": 0.0432, "eval_runtime": 5.8899, "eval_samples_per_second": 18.676, "eval_steps_per_second": 3.226, "step": 4653 }, { "epoch": 256.0, "eval_gen_len": 3.0545, "eval_loss": 0.20799440145492554, "eval_rouge1": 0.0383, "eval_rouge2": 0.0339, "eval_rougeL": 0.0381, "eval_rougeLsum": 0.0386, "eval_runtime": 5.8948, "eval_samples_per_second": 18.66, "eval_steps_per_second": 3.223, "step": 4672 }, { "epoch": 256.99, "eval_gen_len": 2.0364, "eval_loss": 0.20746435225009918, "eval_rouge1": 0.0256, "eval_rouge2": 0.0228, "eval_rougeL": 0.0255, "eval_rougeLsum": 0.0259, "eval_runtime": 5.8826, "eval_samples_per_second": 18.699, "eval_steps_per_second": 3.23, "step": 4690 }, { "epoch": 257.97, "eval_gen_len": 2.0364, "eval_loss": 0.2079101800918579, "eval_rouge1": 0.026, "eval_rouge2": 0.0231, "eval_rougeL": 0.0258, "eval_rougeLsum": 0.0263, "eval_runtime": 5.9214, "eval_samples_per_second": 18.577, "eval_steps_per_second": 3.209, "step": 4708 }, { "epoch": 258.96, "eval_gen_len": 2.0182, "eval_loss": 0.20639775693416595, "eval_rouge1": 0.0232, "eval_rouge2": 0.0203, "eval_rougeL": 0.0231, "eval_rougeLsum": 0.0233, "eval_runtime": 5.9298, "eval_samples_per_second": 18.55, "eval_steps_per_second": 3.204, "step": 4726 }, { "epoch": 260.0, "eval_gen_len": 2.0182, "eval_loss": 0.20615024864673615, "eval_rouge1": 0.0238, "eval_rouge2": 0.0202, "eval_rougeL": 0.0237, "eval_rougeLsum": 0.0238, "eval_runtime": 5.9324, "eval_samples_per_second": 18.542, "eval_steps_per_second": 3.203, "step": 4745 }, { "epoch": 260.99, "eval_gen_len": 1.7091, "eval_loss": 0.20586760342121124, "eval_rouge1": 0.02, "eval_rouge2": 0.0175, "eval_rougeL": 0.0202, "eval_rougeLsum": 0.0201, "eval_runtime": 5.8796, "eval_samples_per_second": 18.709, "eval_steps_per_second": 3.231, "step": 4763 }, { "epoch": 261.97, "eval_gen_len": 0.6909, "eval_loss": 0.20486456155776978, "eval_rouge1": 0.0091, "eval_rouge2": 0.0079, "eval_rougeL": 0.0091, "eval_rougeLsum": 0.0091, "eval_runtime": 5.8759, "eval_samples_per_second": 18.721, "eval_steps_per_second": 3.234, "step": 4781 }, { "epoch": 262.96, "eval_gen_len": 0.6909, "eval_loss": 0.20466168224811554, "eval_rouge1": 0.0091, "eval_rouge2": 0.0079, "eval_rougeL": 0.0091, "eval_rougeLsum": 0.0091, "eval_runtime": 5.9163, "eval_samples_per_second": 18.593, "eval_steps_per_second": 3.211, "step": 4799 }, { "epoch": 264.0, "eval_gen_len": 0.6818, "eval_loss": 0.20416179299354553, "eval_rouge1": 0.0082, "eval_rouge2": 0.0071, "eval_rougeL": 0.0081, "eval_rougeLsum": 0.0082, "eval_runtime": 5.8989, "eval_samples_per_second": 18.647, "eval_steps_per_second": 3.221, "step": 4818 }, { "epoch": 264.99, "eval_gen_len": 0.3364, "eval_loss": 0.20311486721038818, "eval_rouge1": 0.0044, "eval_rouge2": 0.0038, "eval_rougeL": 0.0044, "eval_rougeLsum": 0.0046, "eval_runtime": 5.8639, "eval_samples_per_second": 18.759, "eval_steps_per_second": 3.24, "step": 4836 }, { "epoch": 265.97, "eval_gen_len": 0.3455, "eval_loss": 0.20284703373908997, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.874, "eval_samples_per_second": 18.726, "eval_steps_per_second": 3.235, "step": 4854 }, { "epoch": 266.96, "eval_gen_len": 0.3455, "eval_loss": 0.20207703113555908, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.871, "eval_samples_per_second": 18.736, "eval_steps_per_second": 3.236, "step": 4872 }, { "epoch": 268.0, "eval_gen_len": 0.3455, "eval_loss": 0.20189516246318817, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.8784, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 4891 }, { "epoch": 268.99, "eval_gen_len": 0.3455, "eval_loss": 0.20159202814102173, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.8675, "eval_samples_per_second": 18.747, "eval_steps_per_second": 3.238, "step": 4909 }, { "epoch": 269.97, "eval_gen_len": 0.3455, "eval_loss": 0.20119339227676392, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.8693, "eval_samples_per_second": 18.741, "eval_steps_per_second": 3.237, "step": 4927 }, { "epoch": 270.96, "eval_gen_len": 0.3455, "eval_loss": 0.20063255727291107, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.8634, "eval_samples_per_second": 18.76, "eval_steps_per_second": 3.24, "step": 4945 }, { "epoch": 272.0, "eval_gen_len": 0.3455, "eval_loss": 0.20042632520198822, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.9002, "eval_samples_per_second": 18.644, "eval_steps_per_second": 3.22, "step": 4964 }, { "epoch": 272.99, "eval_gen_len": 0.3455, "eval_loss": 0.19937776029109955, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.866, "eval_samples_per_second": 18.752, "eval_steps_per_second": 3.239, "step": 4982 }, { "epoch": 273.97, "grad_norm": 0.47718000411987305, "learning_rate": 8.902222222222224e-06, "loss": 0.2666, "step": 5000 }, { "epoch": 273.97, "eval_gen_len": 0.5182, "eval_loss": 0.199092298746109, "eval_rouge1": 0.008, "eval_rouge2": 0.0071, "eval_rougeL": 0.008, "eval_rougeLsum": 0.008, "eval_runtime": 5.8818, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 5000 }, { "epoch": 274.96, "eval_gen_len": 0.5182, "eval_loss": 0.1990521252155304, "eval_rouge1": 0.008, "eval_rouge2": 0.0071, "eval_rougeL": 0.008, "eval_rougeLsum": 0.008, "eval_runtime": 5.8794, "eval_samples_per_second": 18.709, "eval_steps_per_second": 3.232, "step": 5018 }, { "epoch": 276.0, "eval_gen_len": 0.5182, "eval_loss": 0.19846394658088684, "eval_rouge1": 0.008, "eval_rouge2": 0.0071, "eval_rougeL": 0.008, "eval_rougeLsum": 0.008, "eval_runtime": 5.8793, "eval_samples_per_second": 18.71, "eval_steps_per_second": 3.232, "step": 5037 }, { "epoch": 276.99, "eval_gen_len": 0.5182, "eval_loss": 0.1975831538438797, "eval_rouge1": 0.008, "eval_rouge2": 0.0071, "eval_rougeL": 0.008, "eval_rougeLsum": 0.008, "eval_runtime": 5.8684, "eval_samples_per_second": 18.744, "eval_steps_per_second": 3.238, "step": 5055 }, { "epoch": 277.97, "eval_gen_len": 0.1727, "eval_loss": 0.19685131311416626, "eval_rouge1": 0.0025, "eval_rouge2": 0.0024, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0025, "eval_runtime": 5.8685, "eval_samples_per_second": 18.744, "eval_steps_per_second": 3.238, "step": 5073 }, { "epoch": 278.96, "eval_gen_len": 0.1727, "eval_loss": 0.19685351848602295, "eval_rouge1": 0.0025, "eval_rouge2": 0.0024, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0025, "eval_runtime": 5.87, "eval_samples_per_second": 18.739, "eval_steps_per_second": 3.237, "step": 5091 }, { "epoch": 280.0, "eval_gen_len": 0.3455, "eval_loss": 0.19706358015537262, "eval_rouge1": 0.0057, "eval_rouge2": 0.0051, "eval_rougeL": 0.0057, "eval_rougeLsum": 0.0057, "eval_runtime": 5.8807, "eval_samples_per_second": 18.705, "eval_steps_per_second": 3.231, "step": 5110 }, { "epoch": 280.99, "eval_gen_len": 0.1727, "eval_loss": 0.19580155611038208, "eval_rouge1": 0.0025, "eval_rouge2": 0.0024, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0025, "eval_runtime": 5.8696, "eval_samples_per_second": 18.741, "eval_steps_per_second": 3.237, "step": 5128 }, { "epoch": 281.97, "eval_gen_len": 0.0, "eval_loss": 0.1954393833875656, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8668, "eval_samples_per_second": 18.75, "eval_steps_per_second": 3.239, "step": 5146 }, { "epoch": 282.96, "eval_gen_len": 0.0, "eval_loss": 0.19552475214004517, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8798, "eval_samples_per_second": 18.708, "eval_steps_per_second": 3.231, "step": 5164 }, { "epoch": 284.0, "eval_gen_len": 0.1727, "eval_loss": 0.1950557678937912, "eval_rouge1": 0.0025, "eval_rouge2": 0.0024, "eval_rougeL": 0.0025, "eval_rougeLsum": 0.0025, "eval_runtime": 5.8764, "eval_samples_per_second": 18.719, "eval_steps_per_second": 3.233, "step": 5183 }, { "epoch": 284.99, "eval_gen_len": 0.0, "eval_loss": 0.1939947009086609, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8768, "eval_samples_per_second": 18.718, "eval_steps_per_second": 3.233, "step": 5201 }, { "epoch": 285.97, "eval_gen_len": 0.0, "eval_loss": 0.19393964111804962, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8764, "eval_samples_per_second": 18.719, "eval_steps_per_second": 3.233, "step": 5219 }, { "epoch": 286.96, "eval_gen_len": 0.0, "eval_loss": 0.19383706152439117, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8701, "eval_samples_per_second": 18.739, "eval_steps_per_second": 3.237, "step": 5237 }, { "epoch": 288.0, "eval_gen_len": 0.0, "eval_loss": 0.1930641084909439, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8828, "eval_samples_per_second": 18.699, "eval_steps_per_second": 3.23, "step": 5256 }, { "epoch": 288.99, "eval_gen_len": 0.0, "eval_loss": 0.19216330349445343, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8721, "eval_samples_per_second": 18.733, "eval_steps_per_second": 3.236, "step": 5274 }, { "epoch": 289.97, "eval_gen_len": 0.0, "eval_loss": 0.1919858455657959, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.886, "eval_samples_per_second": 18.688, "eval_steps_per_second": 3.228, "step": 5292 }, { "epoch": 290.96, "eval_gen_len": 0.0, "eval_loss": 0.19181759655475616, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8804, "eval_samples_per_second": 18.706, "eval_steps_per_second": 3.231, "step": 5310 }, { "epoch": 292.0, "eval_gen_len": 0.0, "eval_loss": 0.1913154274225235, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8887, "eval_samples_per_second": 18.68, "eval_steps_per_second": 3.227, "step": 5329 }, { "epoch": 292.99, "eval_gen_len": 0.0, "eval_loss": 0.19096632301807404, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8809, "eval_samples_per_second": 18.704, "eval_steps_per_second": 3.231, "step": 5347 }, { "epoch": 293.97, "eval_gen_len": 0.0, "eval_loss": 0.19034302234649658, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.886, "eval_samples_per_second": 18.688, "eval_steps_per_second": 3.228, "step": 5365 }, { "epoch": 294.96, "eval_gen_len": 0.0, "eval_loss": 0.18984819948673248, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8905, "eval_samples_per_second": 18.674, "eval_steps_per_second": 3.226, "step": 5383 }, { "epoch": 296.0, "eval_gen_len": 0.0, "eval_loss": 0.1901622861623764, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8956, "eval_samples_per_second": 18.658, "eval_steps_per_second": 3.223, "step": 5402 }, { "epoch": 296.99, "eval_gen_len": 0.0, "eval_loss": 0.18923012912273407, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8738, "eval_samples_per_second": 18.727, "eval_steps_per_second": 3.235, "step": 5420 }, { "epoch": 297.97, "eval_gen_len": 0.0, "eval_loss": 0.18847127258777618, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8804, "eval_samples_per_second": 18.706, "eval_steps_per_second": 3.231, "step": 5438 }, { "epoch": 298.96, "eval_gen_len": 0.0, "eval_loss": 0.1884116530418396, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8771, "eval_samples_per_second": 18.717, "eval_steps_per_second": 3.233, "step": 5456 }, { "epoch": 300.0, "eval_gen_len": 0.0, "eval_loss": 0.18776828050613403, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8882, "eval_samples_per_second": 18.681, "eval_steps_per_second": 3.227, "step": 5475 }, { "epoch": 300.99, "eval_gen_len": 0.0, "eval_loss": 0.18751147389411926, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8894, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 5493 }, { "epoch": 301.37, "grad_norm": 0.6001901030540466, "learning_rate": 7.791111111111111e-06, "loss": 0.2463, "step": 5500 }, { "epoch": 301.97, "eval_gen_len": 0.0, "eval_loss": 0.18754757940769196, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8862, "eval_samples_per_second": 18.688, "eval_steps_per_second": 3.228, "step": 5511 }, { "epoch": 302.96, "eval_gen_len": 0.0, "eval_loss": 0.18714185059070587, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8804, "eval_samples_per_second": 18.706, "eval_steps_per_second": 3.231, "step": 5529 }, { "epoch": 304.0, "eval_gen_len": 0.0, "eval_loss": 0.18668492138385773, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8924, "eval_samples_per_second": 18.668, "eval_steps_per_second": 3.225, "step": 5548 }, { "epoch": 304.99, "eval_gen_len": 0.0, "eval_loss": 0.18612505495548248, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8787, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 5566 }, { "epoch": 305.97, "eval_gen_len": 0.0, "eval_loss": 0.18622124195098877, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8963, "eval_samples_per_second": 18.656, "eval_steps_per_second": 3.222, "step": 5584 }, { "epoch": 306.96, "eval_gen_len": 0.0, "eval_loss": 0.18581855297088623, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8851, "eval_samples_per_second": 18.691, "eval_steps_per_second": 3.228, "step": 5602 }, { "epoch": 308.0, "eval_gen_len": 0.0, "eval_loss": 0.1851491779088974, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8834, "eval_samples_per_second": 18.697, "eval_steps_per_second": 3.229, "step": 5621 }, { "epoch": 308.99, "eval_gen_len": 0.0, "eval_loss": 0.18541742861270905, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8802, "eval_samples_per_second": 18.707, "eval_steps_per_second": 3.231, "step": 5639 }, { "epoch": 309.97, "eval_gen_len": 0.0, "eval_loss": 0.1846253126859665, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.88, "eval_samples_per_second": 18.708, "eval_steps_per_second": 3.231, "step": 5657 }, { "epoch": 310.96, "eval_gen_len": 0.0, "eval_loss": 0.1842205971479416, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8908, "eval_samples_per_second": 18.673, "eval_steps_per_second": 3.225, "step": 5675 }, { "epoch": 312.0, "eval_gen_len": 0.0, "eval_loss": 0.1838139295578003, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8826, "eval_samples_per_second": 18.699, "eval_steps_per_second": 3.23, "step": 5694 }, { "epoch": 312.99, "eval_gen_len": 0.0, "eval_loss": 0.18346160650253296, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8686, "eval_samples_per_second": 18.744, "eval_steps_per_second": 3.238, "step": 5712 }, { "epoch": 313.97, "eval_gen_len": 0.0, "eval_loss": 0.18300552666187286, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8811, "eval_samples_per_second": 18.704, "eval_steps_per_second": 3.231, "step": 5730 }, { "epoch": 314.96, "eval_gen_len": 0.0, "eval_loss": 0.18304497003555298, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8736, "eval_samples_per_second": 18.728, "eval_steps_per_second": 3.235, "step": 5748 }, { "epoch": 316.0, "eval_gen_len": 0.0, "eval_loss": 0.1824423372745514, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9117, "eval_samples_per_second": 18.607, "eval_steps_per_second": 3.214, "step": 5767 }, { "epoch": 316.99, "eval_gen_len": 0.0, "eval_loss": 0.18214993178844452, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8772, "eval_samples_per_second": 18.716, "eval_steps_per_second": 3.233, "step": 5785 }, { "epoch": 317.97, "eval_gen_len": 0.0, "eval_loss": 0.1819440871477127, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8707, "eval_samples_per_second": 18.737, "eval_steps_per_second": 3.236, "step": 5803 }, { "epoch": 318.96, "eval_gen_len": 0.0, "eval_loss": 0.18124094605445862, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9162, "eval_samples_per_second": 18.593, "eval_steps_per_second": 3.212, "step": 5821 }, { "epoch": 320.0, "eval_gen_len": 0.0, "eval_loss": 0.18139633536338806, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.874, "eval_samples_per_second": 18.726, "eval_steps_per_second": 3.235, "step": 5840 }, { "epoch": 320.99, "eval_gen_len": 0.0, "eval_loss": 0.1810206174850464, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8919, "eval_samples_per_second": 18.67, "eval_steps_per_second": 3.225, "step": 5858 }, { "epoch": 321.97, "eval_gen_len": 0.0, "eval_loss": 0.1808764487504959, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9173, "eval_samples_per_second": 18.589, "eval_steps_per_second": 3.211, "step": 5876 }, { "epoch": 322.96, "eval_gen_len": 0.0, "eval_loss": 0.1801535189151764, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9176, "eval_samples_per_second": 18.589, "eval_steps_per_second": 3.211, "step": 5894 }, { "epoch": 324.0, "eval_gen_len": 0.0, "eval_loss": 0.1799170821905136, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9289, "eval_samples_per_second": 18.553, "eval_steps_per_second": 3.205, "step": 5913 }, { "epoch": 324.99, "eval_gen_len": 0.0, "eval_loss": 0.1797485649585724, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9221, "eval_samples_per_second": 18.575, "eval_steps_per_second": 3.208, "step": 5931 }, { "epoch": 325.97, "eval_gen_len": 0.0, "eval_loss": 0.17966806888580322, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9096, "eval_samples_per_second": 18.614, "eval_steps_per_second": 3.215, "step": 5949 }, { "epoch": 326.96, "eval_gen_len": 0.0, "eval_loss": 0.17945848405361176, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8715, "eval_samples_per_second": 18.735, "eval_steps_per_second": 3.236, "step": 5967 }, { "epoch": 328.0, "eval_gen_len": 0.0, "eval_loss": 0.17859123647212982, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8842, "eval_samples_per_second": 18.694, "eval_steps_per_second": 3.229, "step": 5986 }, { "epoch": 328.77, "grad_norm": 0.45411407947540283, "learning_rate": 6.680000000000001e-06, "loss": 0.2311, "step": 6000 }, { "epoch": 328.99, "eval_gen_len": 0.0, "eval_loss": 0.17845258116722107, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8879, "eval_samples_per_second": 18.682, "eval_steps_per_second": 3.227, "step": 6004 }, { "epoch": 329.97, "eval_gen_len": 0.0, "eval_loss": 0.17820703983306885, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8859, "eval_samples_per_second": 18.689, "eval_steps_per_second": 3.228, "step": 6022 }, { "epoch": 330.96, "eval_gen_len": 0.0, "eval_loss": 0.17832966148853302, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8753, "eval_samples_per_second": 18.723, "eval_steps_per_second": 3.234, "step": 6040 }, { "epoch": 332.0, "eval_gen_len": 0.0, "eval_loss": 0.17780448496341705, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8819, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 6059 }, { "epoch": 332.99, "eval_gen_len": 0.0, "eval_loss": 0.17747904360294342, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8701, "eval_samples_per_second": 18.739, "eval_steps_per_second": 3.237, "step": 6077 }, { "epoch": 333.97, "eval_gen_len": 0.0, "eval_loss": 0.17767922580242157, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8742, "eval_samples_per_second": 18.726, "eval_steps_per_second": 3.234, "step": 6095 }, { "epoch": 334.96, "eval_gen_len": 0.0, "eval_loss": 0.1771049201488495, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9091, "eval_samples_per_second": 18.615, "eval_steps_per_second": 3.215, "step": 6113 }, { "epoch": 336.0, "eval_gen_len": 0.0, "eval_loss": 0.1770164966583252, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8812, "eval_samples_per_second": 18.704, "eval_steps_per_second": 3.231, "step": 6132 }, { "epoch": 336.99, "eval_gen_len": 0.0, "eval_loss": 0.17682689428329468, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8773, "eval_samples_per_second": 18.716, "eval_steps_per_second": 3.233, "step": 6150 }, { "epoch": 337.97, "eval_gen_len": 0.0, "eval_loss": 0.1767437607049942, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8745, "eval_samples_per_second": 18.725, "eval_steps_per_second": 3.234, "step": 6168 }, { "epoch": 338.96, "eval_gen_len": 0.0, "eval_loss": 0.17660827934741974, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.876, "eval_samples_per_second": 18.72, "eval_steps_per_second": 3.233, "step": 6186 }, { "epoch": 340.0, "eval_gen_len": 0.0, "eval_loss": 0.1762937307357788, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8747, "eval_samples_per_second": 18.724, "eval_steps_per_second": 3.234, "step": 6205 }, { "epoch": 340.99, "eval_gen_len": 0.0, "eval_loss": 0.17532125115394592, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.877, "eval_samples_per_second": 18.717, "eval_steps_per_second": 3.233, "step": 6223 }, { "epoch": 341.97, "eval_gen_len": 0.0, "eval_loss": 0.17553770542144775, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8818, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 6241 }, { "epoch": 342.96, "eval_gen_len": 0.0, "eval_loss": 0.17563851177692413, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8839, "eval_samples_per_second": 18.695, "eval_steps_per_second": 3.229, "step": 6259 }, { "epoch": 344.0, "eval_gen_len": 0.0, "eval_loss": 0.17437517642974854, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8841, "eval_samples_per_second": 18.694, "eval_steps_per_second": 3.229, "step": 6278 }, { "epoch": 344.99, "eval_gen_len": 0.0, "eval_loss": 0.17455054819583893, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8746, "eval_samples_per_second": 18.725, "eval_steps_per_second": 3.234, "step": 6296 }, { "epoch": 345.97, "eval_gen_len": 0.0, "eval_loss": 0.17481422424316406, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8787, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 6314 }, { "epoch": 346.96, "eval_gen_len": 0.0, "eval_loss": 0.1744970828294754, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8781, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 6332 }, { "epoch": 348.0, "eval_gen_len": 0.0, "eval_loss": 0.17412132024765015, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8833, "eval_samples_per_second": 18.697, "eval_steps_per_second": 3.229, "step": 6351 }, { "epoch": 348.99, "eval_gen_len": 0.0, "eval_loss": 0.1738380789756775, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9075, "eval_samples_per_second": 18.621, "eval_steps_per_second": 3.216, "step": 6369 }, { "epoch": 349.97, "eval_gen_len": 0.0, "eval_loss": 0.17379747331142426, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.876, "eval_samples_per_second": 18.72, "eval_steps_per_second": 3.234, "step": 6387 }, { "epoch": 350.96, "eval_gen_len": 0.0, "eval_loss": 0.17336434125900269, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8785, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 6405 }, { "epoch": 352.0, "eval_gen_len": 0.0, "eval_loss": 0.17307358980178833, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9197, "eval_samples_per_second": 18.582, "eval_steps_per_second": 3.21, "step": 6424 }, { "epoch": 352.99, "eval_gen_len": 0.0, "eval_loss": 0.17290958762168884, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8928, "eval_samples_per_second": 18.667, "eval_steps_per_second": 3.224, "step": 6442 }, { "epoch": 353.97, "eval_gen_len": 0.0, "eval_loss": 0.1726769059896469, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8785, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 6460 }, { "epoch": 354.96, "eval_gen_len": 0.0, "eval_loss": 0.17268246412277222, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8721, "eval_samples_per_second": 18.733, "eval_steps_per_second": 3.236, "step": 6478 }, { "epoch": 356.0, "eval_gen_len": 0.0, "eval_loss": 0.17257879674434662, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8803, "eval_samples_per_second": 18.706, "eval_steps_per_second": 3.231, "step": 6497 }, { "epoch": 356.16, "grad_norm": 0.3692683279514313, "learning_rate": 5.56888888888889e-06, "loss": 0.2192, "step": 6500 }, { "epoch": 356.99, "eval_gen_len": 0.0, "eval_loss": 0.17185170948505402, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8845, "eval_samples_per_second": 18.693, "eval_steps_per_second": 3.229, "step": 6515 }, { "epoch": 357.97, "eval_gen_len": 0.0, "eval_loss": 0.17126674950122833, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8816, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 6533 }, { "epoch": 358.96, "eval_gen_len": 0.0, "eval_loss": 0.17136740684509277, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8802, "eval_samples_per_second": 18.707, "eval_steps_per_second": 3.231, "step": 6551 }, { "epoch": 360.0, "eval_gen_len": 0.0, "eval_loss": 0.1711340844631195, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8839, "eval_samples_per_second": 18.695, "eval_steps_per_second": 3.229, "step": 6570 }, { "epoch": 360.99, "eval_gen_len": 0.0, "eval_loss": 0.1712319403886795, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8782, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 6588 }, { "epoch": 361.97, "eval_gen_len": 0.0, "eval_loss": 0.1710105836391449, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8829, "eval_samples_per_second": 18.698, "eval_steps_per_second": 3.23, "step": 6606 }, { "epoch": 362.96, "eval_gen_len": 0.0, "eval_loss": 0.17070402204990387, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9043, "eval_samples_per_second": 18.631, "eval_steps_per_second": 3.218, "step": 6624 }, { "epoch": 364.0, "eval_gen_len": 0.0, "eval_loss": 0.1703125238418579, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9174, "eval_samples_per_second": 18.589, "eval_steps_per_second": 3.211, "step": 6643 }, { "epoch": 364.99, "eval_gen_len": 0.0, "eval_loss": 0.17009203135967255, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9165, "eval_samples_per_second": 18.592, "eval_steps_per_second": 3.211, "step": 6661 }, { "epoch": 365.97, "eval_gen_len": 0.0, "eval_loss": 0.1701081246137619, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8801, "eval_samples_per_second": 18.707, "eval_steps_per_second": 3.231, "step": 6679 }, { "epoch": 366.96, "eval_gen_len": 0.0, "eval_loss": 0.16997285187244415, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8816, "eval_samples_per_second": 18.703, "eval_steps_per_second": 3.23, "step": 6697 }, { "epoch": 368.0, "eval_gen_len": 0.0, "eval_loss": 0.16970933973789215, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8906, "eval_samples_per_second": 18.674, "eval_steps_per_second": 3.225, "step": 6716 }, { "epoch": 368.99, "eval_gen_len": 0.0, "eval_loss": 0.1696108877658844, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8896, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 6734 }, { "epoch": 369.97, "eval_gen_len": 0.0, "eval_loss": 0.1694546341896057, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9145, "eval_samples_per_second": 18.598, "eval_steps_per_second": 3.212, "step": 6752 }, { "epoch": 370.96, "eval_gen_len": 0.0, "eval_loss": 0.16927814483642578, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9179, "eval_samples_per_second": 18.588, "eval_steps_per_second": 3.211, "step": 6770 }, { "epoch": 372.0, "eval_gen_len": 0.0, "eval_loss": 0.16911160945892334, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8804, "eval_samples_per_second": 18.706, "eval_steps_per_second": 3.231, "step": 6789 }, { "epoch": 372.99, "eval_gen_len": 0.0, "eval_loss": 0.16869549453258514, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8847, "eval_samples_per_second": 18.693, "eval_steps_per_second": 3.229, "step": 6807 }, { "epoch": 373.97, "eval_gen_len": 0.0, "eval_loss": 0.16840766370296478, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9184, "eval_samples_per_second": 18.586, "eval_steps_per_second": 3.21, "step": 6825 }, { "epoch": 374.96, "eval_gen_len": 0.0, "eval_loss": 0.16836071014404297, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9124, "eval_samples_per_second": 18.605, "eval_steps_per_second": 3.214, "step": 6843 }, { "epoch": 376.0, "eval_gen_len": 0.0, "eval_loss": 0.168260395526886, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8823, "eval_samples_per_second": 18.7, "eval_steps_per_second": 3.23, "step": 6862 }, { "epoch": 376.99, "eval_gen_len": 0.0, "eval_loss": 0.16808076202869415, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8791, "eval_samples_per_second": 18.71, "eval_steps_per_second": 3.232, "step": 6880 }, { "epoch": 377.97, "eval_gen_len": 0.0, "eval_loss": 0.16791433095932007, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8871, "eval_samples_per_second": 18.685, "eval_steps_per_second": 3.227, "step": 6898 }, { "epoch": 378.96, "eval_gen_len": 0.0, "eval_loss": 0.16784635186195374, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8948, "eval_samples_per_second": 18.661, "eval_steps_per_second": 3.223, "step": 6916 }, { "epoch": 380.0, "eval_gen_len": 0.0, "eval_loss": 0.16753236949443817, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8837, "eval_samples_per_second": 18.696, "eval_steps_per_second": 3.229, "step": 6935 }, { "epoch": 380.99, "eval_gen_len": 0.0, "eval_loss": 0.16738325357437134, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8709, "eval_samples_per_second": 18.736, "eval_steps_per_second": 3.236, "step": 6953 }, { "epoch": 381.97, "eval_gen_len": 0.0, "eval_loss": 0.16733896732330322, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8784, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 6971 }, { "epoch": 382.96, "eval_gen_len": 0.0, "eval_loss": 0.16720926761627197, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8818, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 6989 }, { "epoch": 383.56, "grad_norm": 0.32142043113708496, "learning_rate": 4.457777777777778e-06, "loss": 0.211, "step": 7000 }, { "epoch": 384.0, "eval_gen_len": 0.0, "eval_loss": 0.16671238839626312, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8769, "eval_samples_per_second": 18.717, "eval_steps_per_second": 3.233, "step": 7008 }, { "epoch": 384.99, "eval_gen_len": 0.0, "eval_loss": 0.16662339866161346, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8678, "eval_samples_per_second": 18.746, "eval_steps_per_second": 3.238, "step": 7026 }, { "epoch": 385.97, "eval_gen_len": 0.0, "eval_loss": 0.16629952192306519, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8786, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 7044 }, { "epoch": 386.96, "eval_gen_len": 0.0, "eval_loss": 0.16644792258739471, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8767, "eval_samples_per_second": 18.718, "eval_steps_per_second": 3.233, "step": 7062 }, { "epoch": 388.0, "eval_gen_len": 0.0, "eval_loss": 0.1665712594985962, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8895, "eval_samples_per_second": 18.677, "eval_steps_per_second": 3.226, "step": 7081 }, { "epoch": 388.99, "eval_gen_len": 0.0, "eval_loss": 0.16607053577899933, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8751, "eval_samples_per_second": 18.723, "eval_steps_per_second": 3.234, "step": 7099 }, { "epoch": 389.97, "eval_gen_len": 0.0, "eval_loss": 0.16597412526607513, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8717, "eval_samples_per_second": 18.734, "eval_steps_per_second": 3.236, "step": 7117 }, { "epoch": 390.96, "eval_gen_len": 0.0, "eval_loss": 0.16590653359889984, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8807, "eval_samples_per_second": 18.705, "eval_steps_per_second": 3.231, "step": 7135 }, { "epoch": 392.0, "eval_gen_len": 0.0, "eval_loss": 0.16565540432929993, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8901, "eval_samples_per_second": 18.675, "eval_steps_per_second": 3.226, "step": 7154 }, { "epoch": 392.99, "eval_gen_len": 0.0, "eval_loss": 0.1656540036201477, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8795, "eval_samples_per_second": 18.709, "eval_steps_per_second": 3.232, "step": 7172 }, { "epoch": 393.97, "eval_gen_len": 0.0, "eval_loss": 0.16561686992645264, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.891, "eval_samples_per_second": 18.672, "eval_steps_per_second": 3.225, "step": 7190 }, { "epoch": 394.96, "eval_gen_len": 0.0, "eval_loss": 0.16566209495067596, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8782, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 7208 }, { "epoch": 396.0, "eval_gen_len": 0.0, "eval_loss": 0.16528266668319702, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9003, "eval_samples_per_second": 18.643, "eval_steps_per_second": 3.22, "step": 7227 }, { "epoch": 396.99, "eval_gen_len": 0.0, "eval_loss": 0.16509102284908295, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8787, "eval_samples_per_second": 18.712, "eval_steps_per_second": 3.232, "step": 7245 }, { "epoch": 397.97, "eval_gen_len": 0.0, "eval_loss": 0.16473665833473206, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8817, "eval_samples_per_second": 18.702, "eval_steps_per_second": 3.23, "step": 7263 }, { "epoch": 398.96, "eval_gen_len": 0.0, "eval_loss": 0.16476485133171082, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8783, "eval_samples_per_second": 18.713, "eval_steps_per_second": 3.232, "step": 7281 }, { "epoch": 400.0, "eval_gen_len": 0.0, "eval_loss": 0.1648998111486435, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8847, "eval_samples_per_second": 18.692, "eval_steps_per_second": 3.229, "step": 7300 }, { "epoch": 400.99, "eval_gen_len": 0.0, "eval_loss": 0.16450464725494385, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8833, "eval_samples_per_second": 18.697, "eval_steps_per_second": 3.23, "step": 7318 }, { "epoch": 401.97, "eval_gen_len": 0.0, "eval_loss": 0.16408170759677887, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9273, "eval_samples_per_second": 18.558, "eval_steps_per_second": 3.206, "step": 7336 }, { "epoch": 402.96, "eval_gen_len": 0.0, "eval_loss": 0.16408471763134003, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9148, "eval_samples_per_second": 18.597, "eval_steps_per_second": 3.212, "step": 7354 }, { "epoch": 404.0, "eval_gen_len": 0.0, "eval_loss": 0.1640195995569229, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.924, "eval_samples_per_second": 18.569, "eval_steps_per_second": 3.207, "step": 7373 }, { "epoch": 404.99, "eval_gen_len": 0.0, "eval_loss": 0.16400323808193207, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9183, "eval_samples_per_second": 18.586, "eval_steps_per_second": 3.21, "step": 7391 }, { "epoch": 405.97, "eval_gen_len": 0.0, "eval_loss": 0.16385148465633392, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9116, "eval_samples_per_second": 18.608, "eval_steps_per_second": 3.214, "step": 7409 }, { "epoch": 406.96, "eval_gen_len": 0.0, "eval_loss": 0.163739874958992, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9025, "eval_samples_per_second": 18.636, "eval_steps_per_second": 3.219, "step": 7427 }, { "epoch": 408.0, "eval_gen_len": 0.0, "eval_loss": 0.1632901281118393, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8717, "eval_samples_per_second": 18.734, "eval_steps_per_second": 3.236, "step": 7446 }, { "epoch": 408.99, "eval_gen_len": 0.0, "eval_loss": 0.1632470041513443, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8869, "eval_samples_per_second": 18.686, "eval_steps_per_second": 3.228, "step": 7464 }, { "epoch": 409.97, "eval_gen_len": 0.0, "eval_loss": 0.16338156163692474, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.903, "eval_samples_per_second": 18.635, "eval_steps_per_second": 3.219, "step": 7482 }, { "epoch": 410.96, "grad_norm": 0.4017387628555298, "learning_rate": 3.346666666666667e-06, "loss": 0.2061, "step": 7500 }, { "epoch": 410.96, "eval_gen_len": 0.0, "eval_loss": 0.16326506435871124, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8869, "eval_samples_per_second": 18.686, "eval_steps_per_second": 3.228, "step": 7500 }, { "epoch": 412.0, "eval_gen_len": 0.0, "eval_loss": 0.1629171222448349, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8877, "eval_samples_per_second": 18.683, "eval_steps_per_second": 3.227, "step": 7519 }, { "epoch": 412.99, "eval_gen_len": 0.0, "eval_loss": 0.1628817468881607, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9117, "eval_samples_per_second": 18.607, "eval_steps_per_second": 3.214, "step": 7537 }, { "epoch": 413.97, "eval_gen_len": 0.0, "eval_loss": 0.16285060346126556, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9244, "eval_samples_per_second": 18.567, "eval_steps_per_second": 3.207, "step": 7555 }, { "epoch": 414.96, "eval_gen_len": 0.0, "eval_loss": 0.16274811327457428, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8821, "eval_samples_per_second": 18.701, "eval_steps_per_second": 3.23, "step": 7573 }, { "epoch": 416.0, "eval_gen_len": 0.0, "eval_loss": 0.16286291182041168, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.918, "eval_samples_per_second": 18.587, "eval_steps_per_second": 3.211, "step": 7592 }, { "epoch": 416.99, "eval_gen_len": 0.0, "eval_loss": 0.16281896829605103, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8847, "eval_samples_per_second": 18.693, "eval_steps_per_second": 3.229, "step": 7610 }, { "epoch": 417.97, "eval_gen_len": 0.0, "eval_loss": 0.16268208622932434, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9077, "eval_samples_per_second": 18.62, "eval_steps_per_second": 3.216, "step": 7628 }, { "epoch": 418.96, "eval_gen_len": 0.0, "eval_loss": 0.16232354938983917, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9153, "eval_samples_per_second": 18.596, "eval_steps_per_second": 3.212, "step": 7646 }, { "epoch": 420.0, "eval_gen_len": 0.0, "eval_loss": 0.16238705813884735, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9249, "eval_samples_per_second": 18.566, "eval_steps_per_second": 3.207, "step": 7665 }, { "epoch": 420.99, "eval_gen_len": 0.0, "eval_loss": 0.16214394569396973, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8853, "eval_samples_per_second": 18.691, "eval_steps_per_second": 3.228, "step": 7683 }, { "epoch": 421.97, "eval_gen_len": 0.0, "eval_loss": 0.16213367879390717, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.869, "eval_samples_per_second": 18.743, "eval_steps_per_second": 3.237, "step": 7701 }, { "epoch": 422.96, "eval_gen_len": 0.0, "eval_loss": 0.1621711403131485, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.869, "eval_samples_per_second": 18.743, "eval_steps_per_second": 3.237, "step": 7719 }, { "epoch": 424.0, "eval_gen_len": 0.0, "eval_loss": 0.16199961304664612, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8763, "eval_samples_per_second": 18.719, "eval_steps_per_second": 3.233, "step": 7738 }, { "epoch": 424.99, "eval_gen_len": 0.0, "eval_loss": 0.1616300344467163, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9176, "eval_samples_per_second": 18.589, "eval_steps_per_second": 3.211, "step": 7756 }, { "epoch": 425.97, "eval_gen_len": 0.0, "eval_loss": 0.16134707629680634, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9055, "eval_samples_per_second": 18.627, "eval_steps_per_second": 3.217, "step": 7774 }, { "epoch": 426.96, "eval_gen_len": 0.0, "eval_loss": 0.1613784283399582, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8741, "eval_samples_per_second": 18.726, "eval_steps_per_second": 3.235, "step": 7792 }, { "epoch": 428.0, "eval_gen_len": 0.0, "eval_loss": 0.1613391935825348, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.9229, "eval_samples_per_second": 18.572, "eval_steps_per_second": 3.208, "step": 7811 }, { "epoch": 428.99, "eval_gen_len": 0.0, "eval_loss": 0.1613980233669281, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8737, "eval_samples_per_second": 18.728, "eval_steps_per_second": 3.235, "step": 7829 }, { "epoch": 429.97, "eval_gen_len": 0.0, "eval_loss": 0.16145525872707367, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8935, "eval_samples_per_second": 18.664, "eval_steps_per_second": 3.224, "step": 7847 }, { "epoch": 430.96, "eval_gen_len": 0.0, "eval_loss": 0.16126497089862823, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8806, "eval_samples_per_second": 18.705, "eval_steps_per_second": 3.231, "step": 7865 }, { "epoch": 432.0, "eval_gen_len": 0.0, "eval_loss": 0.16113270819187164, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8882, "eval_samples_per_second": 18.681, "eval_steps_per_second": 3.227, "step": 7884 }, { "epoch": 432.99, "eval_gen_len": 0.0, "eval_loss": 0.16119304299354553, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8803, "eval_samples_per_second": 18.707, "eval_steps_per_second": 3.231, "step": 7902 }, { "epoch": 433.97, "eval_gen_len": 0.0, "eval_loss": 0.16113385558128357, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8923, "eval_samples_per_second": 18.668, "eval_steps_per_second": 3.225, "step": 7920 }, { "epoch": 434.96, "eval_gen_len": 0.0, "eval_loss": 0.1608574539422989, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8855, "eval_samples_per_second": 18.69, "eval_steps_per_second": 3.228, "step": 7938 }, { "epoch": 436.0, "eval_gen_len": 0.0, "eval_loss": 0.16086578369140625, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8752, "eval_samples_per_second": 18.723, "eval_steps_per_second": 3.234, "step": 7957 }, { "epoch": 436.99, "eval_gen_len": 0.0, "eval_loss": 0.1609336882829666, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8764, "eval_samples_per_second": 18.719, "eval_steps_per_second": 3.233, "step": 7975 }, { "epoch": 437.97, "eval_gen_len": 0.0, "eval_loss": 0.16086000204086304, "eval_rouge1": 0.0, "eval_rouge2": 0.0, "eval_rougeL": 0.0, "eval_rougeLsum": 0.0, "eval_runtime": 5.8862, "eval_samples_per_second": 18.688, "eval_steps_per_second": 3.228, "step": 7993 }, { "epoch": 438.36, "grad_norm": 0.3427538573741913, "learning_rate": 2.235555555555556e-06, "loss": 0.2001, "step": 8000 } ], "logging_steps": 500, "max_steps": 9000, "num_input_tokens_seen": 0, "num_train_epochs": 500, "save_steps": 500, "total_flos": 2.332923933889659e+17, "train_batch_size": 6, "trial_name": null, "trial_params": null }