diff --git "a/checkpoint-8500/trainer_state.json" "b/checkpoint-8500/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-8500/trainer_state.json" @@ -0,0 +1,6185 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 465.75342465753425, + "eval_steps": 500, + "global_step": 8500, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.99, + "eval_gen_len": 14.6545, + "eval_loss": 21.487245559692383, + "eval_rouge1": 0.0812, + "eval_rouge2": 0.0183, + "eval_rougeL": 0.0668, + "eval_rougeLsum": 0.0663, + "eval_runtime": 5.8972, + "eval_samples_per_second": 18.653, + "eval_steps_per_second": 3.222, + "step": 18 + }, + { + "epoch": 1.97, + "eval_gen_len": 14.5091, + "eval_loss": 21.332395553588867, + "eval_rouge1": 0.0808, + "eval_rouge2": 0.0191, + "eval_rougeL": 0.0657, + "eval_rougeLsum": 0.0652, + "eval_runtime": 5.8919, + "eval_samples_per_second": 18.67, + "eval_steps_per_second": 3.225, + "step": 36 + }, + { + "epoch": 2.96, + "eval_gen_len": 14.0727, + "eval_loss": 21.108753204345703, + "eval_rouge1": 0.0819, + "eval_rouge2": 0.0203, + "eval_rougeL": 0.0671, + "eval_rougeLsum": 0.0665, + "eval_runtime": 5.8908, + "eval_samples_per_second": 18.673, + "eval_steps_per_second": 3.225, + "step": 54 + }, + { + "epoch": 4.0, + "eval_gen_len": 14.0727, + "eval_loss": 20.799583435058594, + "eval_rouge1": 0.0824, + "eval_rouge2": 0.0206, + "eval_rougeL": 0.0665, + "eval_rougeLsum": 0.0659, + "eval_runtime": 5.8851, + "eval_samples_per_second": 18.691, + "eval_steps_per_second": 3.228, + "step": 73 + }, + { + "epoch": 4.99, + "eval_gen_len": 14.2182, + "eval_loss": 20.4609317779541, + "eval_rouge1": 0.0814, + "eval_rouge2": 0.0212, + "eval_rougeL": 0.0669, + "eval_rougeLsum": 0.0664, + "eval_runtime": 5.8894, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 91 + }, + { + "epoch": 5.97, + "eval_gen_len": 14.2182, + "eval_loss": 20.145660400390625, + "eval_rouge1": 0.0847, + "eval_rouge2": 0.0208, + "eval_rougeL": 0.067, + "eval_rougeLsum": 0.0666, + "eval_runtime": 5.8963, + "eval_samples_per_second": 18.656, + "eval_steps_per_second": 3.222, + "step": 109 + }, + { + "epoch": 6.96, + "eval_gen_len": 14.0727, + "eval_loss": 19.89990234375, + "eval_rouge1": 0.084, + "eval_rouge2": 0.0184, + "eval_rougeL": 0.0662, + "eval_rougeLsum": 0.0657, + "eval_runtime": 5.8939, + "eval_samples_per_second": 18.663, + "eval_steps_per_second": 3.224, + "step": 127 + }, + { + "epoch": 8.0, + "eval_gen_len": 14.1455, + "eval_loss": 19.661205291748047, + "eval_rouge1": 0.0882, + "eval_rouge2": 0.0219, + "eval_rougeL": 0.0703, + "eval_rougeLsum": 0.0699, + "eval_runtime": 5.9009, + "eval_samples_per_second": 18.641, + "eval_steps_per_second": 3.22, + "step": 146 + }, + { + "epoch": 8.99, + "eval_gen_len": 14.9273, + "eval_loss": 19.420059204101562, + "eval_rouge1": 0.0958, + "eval_rouge2": 0.0243, + "eval_rougeL": 0.0775, + "eval_rougeLsum": 0.0772, + "eval_runtime": 5.8976, + "eval_samples_per_second": 18.652, + "eval_steps_per_second": 3.222, + "step": 164 + }, + { + "epoch": 9.97, + "eval_gen_len": 14.9273, + "eval_loss": 19.1621036529541, + "eval_rouge1": 0.0915, + "eval_rouge2": 0.0256, + "eval_rougeL": 0.0769, + "eval_rougeLsum": 0.0763, + "eval_runtime": 5.937, + "eval_samples_per_second": 18.528, + "eval_steps_per_second": 3.2, + "step": 182 + }, + { + "epoch": 10.96, + "eval_gen_len": 14.7818, + "eval_loss": 18.873458862304688, + "eval_rouge1": 0.0968, + "eval_rouge2": 0.0284, + "eval_rougeL": 0.0786, + "eval_rougeLsum": 0.0786, + "eval_runtime": 5.8869, + "eval_samples_per_second": 18.686, + "eval_steps_per_second": 3.228, + "step": 200 + }, + { + "epoch": 12.0, + "eval_gen_len": 14.6364, + "eval_loss": 18.52387237548828, + "eval_rouge1": 0.0901, + "eval_rouge2": 0.0262, + "eval_rougeL": 0.0738, + "eval_rougeLsum": 0.0741, + "eval_runtime": 5.9384, + "eval_samples_per_second": 18.524, + "eval_steps_per_second": 3.2, + "step": 219 + }, + { + "epoch": 12.99, + "eval_gen_len": 14.4909, + "eval_loss": 18.13555145263672, + "eval_rouge1": 0.0886, + "eval_rouge2": 0.0263, + "eval_rougeL": 0.0714, + "eval_rougeLsum": 0.0717, + "eval_runtime": 5.9432, + "eval_samples_per_second": 18.509, + "eval_steps_per_second": 3.197, + "step": 237 + }, + { + "epoch": 13.97, + "eval_gen_len": 14.4909, + "eval_loss": 17.66015625, + "eval_rouge1": 0.0915, + "eval_rouge2": 0.03, + "eval_rougeL": 0.0712, + "eval_rougeLsum": 0.0713, + "eval_runtime": 5.9005, + "eval_samples_per_second": 18.643, + "eval_steps_per_second": 3.22, + "step": 255 + }, + { + "epoch": 14.96, + "eval_gen_len": 15.2182, + "eval_loss": 17.107126235961914, + "eval_rouge1": 0.0933, + "eval_rouge2": 0.0295, + "eval_rougeL": 0.0733, + "eval_rougeLsum": 0.0734, + "eval_runtime": 5.9126, + "eval_samples_per_second": 18.604, + "eval_steps_per_second": 3.213, + "step": 273 + }, + { + "epoch": 16.0, + "eval_gen_len": 15.2182, + "eval_loss": 16.4465389251709, + "eval_rouge1": 0.0792, + "eval_rouge2": 0.02, + "eval_rougeL": 0.0637, + "eval_rougeLsum": 0.0639, + "eval_runtime": 5.893, + "eval_samples_per_second": 18.666, + "eval_steps_per_second": 3.224, + "step": 292 + }, + { + "epoch": 16.99, + "eval_gen_len": 14.7818, + "eval_loss": 15.7671480178833, + "eval_rouge1": 0.069, + "eval_rouge2": 0.0176, + "eval_rougeL": 0.0534, + "eval_rougeLsum": 0.0537, + "eval_runtime": 5.8934, + "eval_samples_per_second": 18.665, + "eval_steps_per_second": 3.224, + "step": 310 + }, + { + "epoch": 17.97, + "eval_gen_len": 13.1545, + "eval_loss": 14.998970031738281, + "eval_rouge1": 0.0566, + "eval_rouge2": 0.0124, + "eval_rougeL": 0.0449, + "eval_rougeLsum": 0.0451, + "eval_runtime": 5.8867, + "eval_samples_per_second": 18.686, + "eval_steps_per_second": 3.228, + "step": 328 + }, + { + "epoch": 18.96, + "eval_gen_len": 11.1091, + "eval_loss": 14.060928344726562, + "eval_rouge1": 0.0498, + "eval_rouge2": 0.0171, + "eval_rougeL": 0.0368, + "eval_rougeLsum": 0.0367, + "eval_runtime": 5.6651, + "eval_samples_per_second": 19.417, + "eval_steps_per_second": 3.354, + "step": 346 + }, + { + "epoch": 20.0, + "eval_gen_len": 8.7273, + "eval_loss": 13.174235343933105, + "eval_rouge1": 0.0205, + "eval_rouge2": 0.0044, + "eval_rougeL": 0.0165, + "eval_rougeLsum": 0.0164, + "eval_runtime": 5.7609, + "eval_samples_per_second": 19.094, + "eval_steps_per_second": 3.298, + "step": 365 + }, + { + "epoch": 20.99, + "eval_gen_len": 6.8636, + "eval_loss": 12.371671676635742, + "eval_rouge1": 0.0139, + "eval_rouge2": 0.0028, + "eval_rougeL": 0.0124, + "eval_rougeLsum": 0.0123, + "eval_runtime": 5.2236, + "eval_samples_per_second": 21.058, + "eval_steps_per_second": 3.637, + "step": 383 + }, + { + "epoch": 21.97, + "eval_gen_len": 6.8727, + "eval_loss": 11.577987670898438, + "eval_rouge1": 0.0053, + "eval_rouge2": 0.0005, + "eval_rougeL": 0.0047, + "eval_rougeLsum": 0.0048, + "eval_runtime": 5.7472, + "eval_samples_per_second": 19.14, + "eval_steps_per_second": 3.306, + "step": 401 + }, + { + "epoch": 22.96, + "eval_gen_len": 7.3818, + "eval_loss": 10.78397274017334, + "eval_rouge1": 0.0021, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.7621, + "eval_samples_per_second": 19.09, + "eval_steps_per_second": 3.297, + "step": 419 + }, + { + "epoch": 24.0, + "eval_gen_len": 6.8273, + "eval_loss": 9.95447826385498, + "eval_rouge1": 0.0009, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0008, + "eval_rougeLsum": 0.0008, + "eval_runtime": 5.4394, + "eval_samples_per_second": 20.223, + "eval_steps_per_second": 3.493, + "step": 438 + }, + { + "epoch": 24.99, + "eval_gen_len": 7.0909, + "eval_loss": 9.179459571838379, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.4343, + "eval_samples_per_second": 20.242, + "eval_steps_per_second": 3.496, + "step": 456 + }, + { + "epoch": 25.97, + "eval_gen_len": 8.1, + "eval_loss": 8.421984672546387, + "eval_rouge1": 0.0005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0005, + "eval_rougeLsum": 0.0005, + "eval_runtime": 5.7592, + "eval_samples_per_second": 19.1, + "eval_steps_per_second": 3.299, + "step": 474 + }, + { + "epoch": 26.96, + "eval_gen_len": 8.2636, + "eval_loss": 7.694218158721924, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8511, + "eval_samples_per_second": 18.8, + "eval_steps_per_second": 3.247, + "step": 492 + }, + { + "epoch": 27.4, + "grad_norm": 5.483399868011475, + "learning_rate": 1.8893333333333334e-05, + "loss": 16.3522, + "step": 500 + }, + { + "epoch": 28.0, + "eval_gen_len": 11.3818, + "eval_loss": 6.939992427825928, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.887, + "eval_samples_per_second": 18.685, + "eval_steps_per_second": 3.227, + "step": 511 + }, + { + "epoch": 28.99, + "eval_gen_len": 12.6273, + "eval_loss": 6.2829270362854, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.905, + "eval_samples_per_second": 18.628, + "eval_steps_per_second": 3.218, + "step": 529 + }, + { + "epoch": 29.97, + "eval_gen_len": 15.3091, + "eval_loss": 5.604813575744629, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 5.8732, + "eval_samples_per_second": 18.729, + "eval_steps_per_second": 3.235, + "step": 547 + }, + { + "epoch": 30.96, + "eval_gen_len": 17.3182, + "eval_loss": 4.977880954742432, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9021, + "eval_samples_per_second": 18.637, + "eval_steps_per_second": 3.219, + "step": 565 + }, + { + "epoch": 32.0, + "eval_gen_len": 18.3273, + "eval_loss": 4.37266731262207, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 5.9191, + "eval_samples_per_second": 18.584, + "eval_steps_per_second": 3.21, + "step": 584 + }, + { + "epoch": 32.99, + "eval_gen_len": 18.7091, + "eval_loss": 3.8430399894714355, + "eval_rouge1": 0.0002, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0002, + "eval_rougeLsum": 0.0002, + "eval_runtime": 5.8717, + "eval_samples_per_second": 18.734, + "eval_steps_per_second": 3.236, + "step": 602 + }, + { + "epoch": 33.97, + "eval_gen_len": 18.8545, + "eval_loss": 3.400059938430786, + "eval_rouge1": 0.0005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0005, + "eval_rougeLsum": 0.0005, + "eval_runtime": 5.9022, + "eval_samples_per_second": 18.637, + "eval_steps_per_second": 3.219, + "step": 620 + }, + { + "epoch": 34.96, + "eval_gen_len": 19.0, + "eval_loss": 3.0639424324035645, + "eval_rouge1": 0.0003, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0003, + "eval_rougeLsum": 0.0003, + "eval_runtime": 5.8543, + "eval_samples_per_second": 18.79, + "eval_steps_per_second": 3.245, + "step": 638 + }, + { + "epoch": 36.0, + "eval_gen_len": 19.0, + "eval_loss": 2.758321762084961, + "eval_rouge1": 0.0003, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0003, + "eval_rougeLsum": 0.0003, + "eval_runtime": 5.9112, + "eval_samples_per_second": 18.609, + "eval_steps_per_second": 3.214, + "step": 657 + }, + { + "epoch": 36.99, + "eval_gen_len": 18.4364, + "eval_loss": 2.5352485179901123, + "eval_rouge1": 0.0003, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0003, + "eval_rougeLsum": 0.0003, + "eval_runtime": 5.8968, + "eval_samples_per_second": 18.654, + "eval_steps_per_second": 3.222, + "step": 675 + }, + { + "epoch": 37.97, + "eval_gen_len": 14.8909, + "eval_loss": 2.3651320934295654, + "eval_rouge1": 0.0005, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0005, + "eval_rougeLsum": 0.0005, + "eval_runtime": 5.8625, + "eval_samples_per_second": 18.763, + "eval_steps_per_second": 3.241, + "step": 693 + }, + { + "epoch": 38.96, + "eval_gen_len": 10.8273, + "eval_loss": 2.230059862136841, + "eval_rouge1": 0.0014, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0014, + "eval_rougeLsum": 0.0014, + "eval_runtime": 5.852, + "eval_samples_per_second": 18.797, + "eval_steps_per_second": 3.247, + "step": 711 + }, + { + "epoch": 40.0, + "eval_gen_len": 7.9545, + "eval_loss": 2.111604928970337, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8625, + "eval_samples_per_second": 18.763, + "eval_steps_per_second": 3.241, + "step": 730 + }, + { + "epoch": 40.99, + "eval_gen_len": 6.8364, + "eval_loss": 2.019117832183838, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8494, + "eval_samples_per_second": 18.805, + "eval_steps_per_second": 3.248, + "step": 748 + }, + { + "epoch": 41.97, + "eval_gen_len": 6.1727, + "eval_loss": 1.950454831123352, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8618, + "eval_samples_per_second": 18.765, + "eval_steps_per_second": 3.241, + "step": 766 + }, + { + "epoch": 42.96, + "eval_gen_len": 5.6, + "eval_loss": 1.9009199142456055, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8848, + "eval_samples_per_second": 18.692, + "eval_steps_per_second": 3.229, + "step": 784 + }, + { + "epoch": 44.0, + "eval_gen_len": 5.4455, + "eval_loss": 1.8568826913833618, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8517, + "eval_samples_per_second": 18.798, + "eval_steps_per_second": 3.247, + "step": 803 + }, + { + "epoch": 44.99, + "eval_gen_len": 5.1909, + "eval_loss": 1.8185267448425293, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8682, + "eval_samples_per_second": 18.745, + "eval_steps_per_second": 3.238, + "step": 821 + }, + { + "epoch": 45.97, + "eval_gen_len": 5.1182, + "eval_loss": 1.7847113609313965, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8488, + "eval_samples_per_second": 18.807, + "eval_steps_per_second": 3.249, + "step": 839 + }, + { + "epoch": 46.96, + "eval_gen_len": 4.9455, + "eval_loss": 1.7547551393508911, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8518, + "eval_samples_per_second": 18.798, + "eval_steps_per_second": 3.247, + "step": 857 + }, + { + "epoch": 48.0, + "eval_gen_len": 4.9455, + "eval_loss": 1.7266695499420166, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8585, + "eval_samples_per_second": 18.776, + "eval_steps_per_second": 3.243, + "step": 876 + }, + { + "epoch": 48.99, + "eval_gen_len": 4.7455, + "eval_loss": 1.7055079936981201, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8511, + "eval_samples_per_second": 18.8, + "eval_steps_per_second": 3.247, + "step": 894 + }, + { + "epoch": 49.97, + "eval_gen_len": 4.4727, + "eval_loss": 1.6864752769470215, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.848, + "eval_samples_per_second": 18.81, + "eval_steps_per_second": 3.249, + "step": 912 + }, + { + "epoch": 50.96, + "eval_gen_len": 4.4, + "eval_loss": 1.6679636240005493, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8942, + "eval_samples_per_second": 18.662, + "eval_steps_per_second": 3.223, + "step": 930 + }, + { + "epoch": 52.0, + "eval_gen_len": 4.8273, + "eval_loss": 1.6500035524368286, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8572, + "eval_samples_per_second": 18.78, + "eval_steps_per_second": 3.244, + "step": 949 + }, + { + "epoch": 52.99, + "eval_gen_len": 5.4273, + "eval_loss": 1.6347858905792236, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8874, + "eval_samples_per_second": 18.684, + "eval_steps_per_second": 3.227, + "step": 967 + }, + { + "epoch": 53.97, + "eval_gen_len": 5.4727, + "eval_loss": 1.620485782623291, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8561, + "eval_samples_per_second": 18.784, + "eval_steps_per_second": 3.244, + "step": 985 + }, + { + "epoch": 54.79, + "grad_norm": 3.8756470680236816, + "learning_rate": 1.7786666666666667e-05, + "loss": 3.4786, + "step": 1000 + }, + { + "epoch": 54.96, + "eval_gen_len": 4.9909, + "eval_loss": 1.6101189851760864, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9013, + "eval_samples_per_second": 18.64, + "eval_steps_per_second": 3.22, + "step": 1003 + }, + { + "epoch": 56.0, + "eval_gen_len": 4.9727, + "eval_loss": 1.5964934825897217, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8515, + "eval_samples_per_second": 18.799, + "eval_steps_per_second": 3.247, + "step": 1022 + }, + { + "epoch": 56.99, + "eval_gen_len": 5.3, + "eval_loss": 1.583103060722351, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8488, + "eval_samples_per_second": 18.807, + "eval_steps_per_second": 3.249, + "step": 1040 + }, + { + "epoch": 57.97, + "eval_gen_len": 5.2273, + "eval_loss": 1.5738121271133423, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8912, + "eval_samples_per_second": 18.672, + "eval_steps_per_second": 3.225, + "step": 1058 + }, + { + "epoch": 58.96, + "eval_gen_len": 5.5273, + "eval_loss": 1.5626448392868042, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8527, + "eval_samples_per_second": 18.795, + "eval_steps_per_second": 3.246, + "step": 1076 + }, + { + "epoch": 60.0, + "eval_gen_len": 6.3273, + "eval_loss": 1.5520726442337036, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8887, + "eval_samples_per_second": 18.68, + "eval_steps_per_second": 3.227, + "step": 1095 + }, + { + "epoch": 60.99, + "eval_gen_len": 7.1091, + "eval_loss": 1.5398296117782593, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8838, + "eval_samples_per_second": 18.695, + "eval_steps_per_second": 3.229, + "step": 1113 + }, + { + "epoch": 61.97, + "eval_gen_len": 7.9182, + "eval_loss": 1.5261036157608032, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8837, + "eval_samples_per_second": 18.696, + "eval_steps_per_second": 3.229, + "step": 1131 + }, + { + "epoch": 62.96, + "eval_gen_len": 8.6, + "eval_loss": 1.5135173797607422, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8878, + "eval_samples_per_second": 18.683, + "eval_steps_per_second": 3.227, + "step": 1149 + }, + { + "epoch": 64.0, + "eval_gen_len": 8.9727, + "eval_loss": 1.5019876956939697, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8993, + "eval_samples_per_second": 18.646, + "eval_steps_per_second": 3.221, + "step": 1168 + }, + { + "epoch": 64.99, + "eval_gen_len": 9.1455, + "eval_loss": 1.4927572011947632, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8898, + "eval_samples_per_second": 18.676, + "eval_steps_per_second": 3.226, + "step": 1186 + }, + { + "epoch": 65.97, + "eval_gen_len": 9.3636, + "eval_loss": 1.4839699268341064, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8485, + "eval_samples_per_second": 18.808, + "eval_steps_per_second": 3.249, + "step": 1204 + }, + { + "epoch": 66.96, + "eval_gen_len": 9.6727, + "eval_loss": 1.4724147319793701, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8498, + "eval_samples_per_second": 18.804, + "eval_steps_per_second": 3.248, + "step": 1222 + }, + { + "epoch": 68.0, + "eval_gen_len": 9.6545, + "eval_loss": 1.4610724449157715, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.857, + "eval_samples_per_second": 18.781, + "eval_steps_per_second": 3.244, + "step": 1241 + }, + { + "epoch": 68.99, + "eval_gen_len": 9.7182, + "eval_loss": 1.4491708278656006, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8545, + "eval_samples_per_second": 18.789, + "eval_steps_per_second": 3.245, + "step": 1259 + }, + { + "epoch": 69.97, + "eval_gen_len": 9.6727, + "eval_loss": 1.4401447772979736, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8617, + "eval_samples_per_second": 18.766, + "eval_steps_per_second": 3.241, + "step": 1277 + }, + { + "epoch": 70.96, + "eval_gen_len": 9.6818, + "eval_loss": 1.4306913614273071, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.861, + "eval_samples_per_second": 18.768, + "eval_steps_per_second": 3.242, + "step": 1295 + }, + { + "epoch": 72.0, + "eval_gen_len": 9.7636, + "eval_loss": 1.4177192449569702, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8583, + "eval_samples_per_second": 18.777, + "eval_steps_per_second": 3.243, + "step": 1314 + }, + { + "epoch": 72.99, + "eval_gen_len": 9.8182, + "eval_loss": 1.4081608057022095, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8574, + "eval_samples_per_second": 18.78, + "eval_steps_per_second": 3.244, + "step": 1332 + }, + { + "epoch": 73.97, + "eval_gen_len": 9.8, + "eval_loss": 1.3982936143875122, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8596, + "eval_samples_per_second": 18.773, + "eval_steps_per_second": 3.243, + "step": 1350 + }, + { + "epoch": 74.96, + "eval_gen_len": 9.7545, + "eval_loss": 1.385299563407898, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8543, + "eval_samples_per_second": 18.79, + "eval_steps_per_second": 3.245, + "step": 1368 + }, + { + "epoch": 76.0, + "eval_gen_len": 9.8727, + "eval_loss": 1.3723993301391602, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9071, + "eval_samples_per_second": 18.622, + "eval_steps_per_second": 3.216, + "step": 1387 + }, + { + "epoch": 76.99, + "eval_gen_len": 9.8636, + "eval_loss": 1.3635698556900024, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8955, + "eval_samples_per_second": 18.658, + "eval_steps_per_second": 3.223, + "step": 1405 + }, + { + "epoch": 77.97, + "eval_gen_len": 9.7727, + "eval_loss": 1.3577702045440674, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8571, + "eval_samples_per_second": 18.781, + "eval_steps_per_second": 3.244, + "step": 1423 + }, + { + "epoch": 78.96, + "eval_gen_len": 9.8455, + "eval_loss": 1.350039005279541, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8893, + "eval_samples_per_second": 18.678, + "eval_steps_per_second": 3.226, + "step": 1441 + }, + { + "epoch": 80.0, + "eval_gen_len": 9.8, + "eval_loss": 1.3370468616485596, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9191, + "eval_samples_per_second": 18.584, + "eval_steps_per_second": 3.21, + "step": 1460 + }, + { + "epoch": 80.99, + "eval_gen_len": 9.7909, + "eval_loss": 1.3282612562179565, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8938, + "eval_samples_per_second": 18.664, + "eval_steps_per_second": 3.224, + "step": 1478 + }, + { + "epoch": 81.97, + "eval_gen_len": 9.7273, + "eval_loss": 1.3168359994888306, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8597, + "eval_samples_per_second": 18.772, + "eval_steps_per_second": 3.242, + "step": 1496 + }, + { + "epoch": 82.19, + "grad_norm": 2.8337929248809814, + "learning_rate": 1.667777777777778e-05, + "loss": 1.7958, + "step": 1500 + }, + { + "epoch": 82.96, + "eval_gen_len": 9.8727, + "eval_loss": 1.3036466836929321, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8558, + "eval_samples_per_second": 18.785, + "eval_steps_per_second": 3.245, + "step": 1514 + }, + { + "epoch": 84.0, + "eval_gen_len": 9.9455, + "eval_loss": 1.2935236692428589, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8683, + "eval_samples_per_second": 18.745, + "eval_steps_per_second": 3.238, + "step": 1533 + }, + { + "epoch": 84.99, + "eval_gen_len": 9.9182, + "eval_loss": 1.2810677289962769, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8603, + "eval_samples_per_second": 18.77, + "eval_steps_per_second": 3.242, + "step": 1551 + }, + { + "epoch": 85.97, + "eval_gen_len": 9.9364, + "eval_loss": 1.2679041624069214, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8561, + "eval_samples_per_second": 18.784, + "eval_steps_per_second": 3.244, + "step": 1569 + }, + { + "epoch": 86.96, + "eval_gen_len": 9.9091, + "eval_loss": 1.259030818939209, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.855, + "eval_samples_per_second": 18.787, + "eval_steps_per_second": 3.245, + "step": 1587 + }, + { + "epoch": 88.0, + "eval_gen_len": 9.9636, + "eval_loss": 1.2463934421539307, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8568, + "eval_samples_per_second": 18.782, + "eval_steps_per_second": 3.244, + "step": 1606 + }, + { + "epoch": 88.99, + "eval_gen_len": 9.9091, + "eval_loss": 1.2380764484405518, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8541, + "eval_samples_per_second": 18.79, + "eval_steps_per_second": 3.246, + "step": 1624 + }, + { + "epoch": 89.97, + "eval_gen_len": 9.9091, + "eval_loss": 1.2272734642028809, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8679, + "eval_samples_per_second": 18.746, + "eval_steps_per_second": 3.238, + "step": 1642 + }, + { + "epoch": 90.96, + "eval_gen_len": 9.9091, + "eval_loss": 1.2162261009216309, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8611, + "eval_samples_per_second": 18.768, + "eval_steps_per_second": 3.242, + "step": 1660 + }, + { + "epoch": 92.0, + "eval_gen_len": 9.9273, + "eval_loss": 1.2035719156265259, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8669, + "eval_samples_per_second": 18.749, + "eval_steps_per_second": 3.239, + "step": 1679 + }, + { + "epoch": 92.99, + "eval_gen_len": 9.9727, + "eval_loss": 1.1865819692611694, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8764, + "eval_samples_per_second": 18.719, + "eval_steps_per_second": 3.233, + "step": 1697 + }, + { + "epoch": 93.97, + "eval_gen_len": 9.9273, + "eval_loss": 1.1713649034500122, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8739, + "eval_samples_per_second": 18.727, + "eval_steps_per_second": 3.235, + "step": 1715 + }, + { + "epoch": 94.96, + "eval_gen_len": 9.8818, + "eval_loss": 1.1566345691680908, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8775, + "eval_samples_per_second": 18.715, + "eval_steps_per_second": 3.233, + "step": 1733 + }, + { + "epoch": 96.0, + "eval_gen_len": 9.6818, + "eval_loss": 1.141555905342102, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8697, + "eval_samples_per_second": 18.74, + "eval_steps_per_second": 3.237, + "step": 1752 + }, + { + "epoch": 96.99, + "eval_gen_len": 9.5, + "eval_loss": 1.1269311904907227, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8514, + "eval_samples_per_second": 18.799, + "eval_steps_per_second": 3.247, + "step": 1770 + }, + { + "epoch": 97.97, + "eval_gen_len": 9.6545, + "eval_loss": 1.1134684085845947, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8582, + "eval_samples_per_second": 18.777, + "eval_steps_per_second": 3.243, + "step": 1788 + }, + { + "epoch": 98.96, + "eval_gen_len": 9.7, + "eval_loss": 1.099327564239502, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8567, + "eval_samples_per_second": 18.782, + "eval_steps_per_second": 3.244, + "step": 1806 + }, + { + "epoch": 100.0, + "eval_gen_len": 9.7909, + "eval_loss": 1.0843485593795776, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8693, + "eval_samples_per_second": 18.741, + "eval_steps_per_second": 3.237, + "step": 1825 + }, + { + "epoch": 100.99, + "eval_gen_len": 9.8909, + "eval_loss": 1.0678842067718506, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8524, + "eval_samples_per_second": 18.796, + "eval_steps_per_second": 3.247, + "step": 1843 + }, + { + "epoch": 101.97, + "eval_gen_len": 9.8636, + "eval_loss": 1.0531669855117798, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8548, + "eval_samples_per_second": 18.788, + "eval_steps_per_second": 3.245, + "step": 1861 + }, + { + "epoch": 102.96, + "eval_gen_len": 9.8364, + "eval_loss": 1.0373491048812866, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8537, + "eval_samples_per_second": 18.792, + "eval_steps_per_second": 3.246, + "step": 1879 + }, + { + "epoch": 104.0, + "eval_gen_len": 9.8636, + "eval_loss": 1.0185768604278564, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8656, + "eval_samples_per_second": 18.754, + "eval_steps_per_second": 3.239, + "step": 1898 + }, + { + "epoch": 104.99, + "eval_gen_len": 9.9091, + "eval_loss": 1.0003846883773804, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8601, + "eval_samples_per_second": 18.771, + "eval_steps_per_second": 3.242, + "step": 1916 + }, + { + "epoch": 105.97, + "eval_gen_len": 9.9364, + "eval_loss": 0.9851242899894714, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8522, + "eval_samples_per_second": 18.796, + "eval_steps_per_second": 3.247, + "step": 1934 + }, + { + "epoch": 106.96, + "eval_gen_len": 9.9545, + "eval_loss": 0.9700939059257507, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8548, + "eval_samples_per_second": 18.788, + "eval_steps_per_second": 3.245, + "step": 1952 + }, + { + "epoch": 108.0, + "eval_gen_len": 9.9182, + "eval_loss": 0.9482754468917847, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8554, + "eval_samples_per_second": 18.786, + "eval_steps_per_second": 3.245, + "step": 1971 + }, + { + "epoch": 108.99, + "eval_gen_len": 9.9273, + "eval_loss": 0.9286762475967407, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8532, + "eval_samples_per_second": 18.793, + "eval_steps_per_second": 3.246, + "step": 1989 + }, + { + "epoch": 109.59, + "grad_norm": 1.9006233215332031, + "learning_rate": 1.5566666666666668e-05, + "loss": 1.4343, + "step": 2000 + }, + { + "epoch": 109.97, + "eval_gen_len": 9.8364, + "eval_loss": 0.906920850276947, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8648, + "eval_samples_per_second": 18.756, + "eval_steps_per_second": 3.24, + "step": 2007 + }, + { + "epoch": 110.96, + "eval_gen_len": 9.9273, + "eval_loss": 0.8903268575668335, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8436, + "eval_samples_per_second": 18.824, + "eval_steps_per_second": 3.251, + "step": 2025 + }, + { + "epoch": 112.0, + "eval_gen_len": 9.8818, + "eval_loss": 0.8693720102310181, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8617, + "eval_samples_per_second": 18.766, + "eval_steps_per_second": 3.241, + "step": 2044 + }, + { + "epoch": 112.99, + "eval_gen_len": 9.8182, + "eval_loss": 0.8487831354141235, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8484, + "eval_samples_per_second": 18.808, + "eval_steps_per_second": 3.249, + "step": 2062 + }, + { + "epoch": 113.97, + "eval_gen_len": 9.8, + "eval_loss": 0.8259497284889221, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8508, + "eval_samples_per_second": 18.801, + "eval_steps_per_second": 3.247, + "step": 2080 + }, + { + "epoch": 114.96, + "eval_gen_len": 9.7818, + "eval_loss": 0.8031529784202576, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8573, + "eval_samples_per_second": 18.78, + "eval_steps_per_second": 3.244, + "step": 2098 + }, + { + "epoch": 116.0, + "eval_gen_len": 9.7273, + "eval_loss": 0.7789543271064758, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8581, + "eval_samples_per_second": 18.777, + "eval_steps_per_second": 3.243, + "step": 2117 + }, + { + "epoch": 116.99, + "eval_gen_len": 9.5909, + "eval_loss": 0.7579861283302307, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8897, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 2135 + }, + { + "epoch": 117.97, + "eval_gen_len": 9.7273, + "eval_loss": 0.7319938540458679, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8872, + "eval_samples_per_second": 18.684, + "eval_steps_per_second": 3.227, + "step": 2153 + }, + { + "epoch": 118.96, + "eval_gen_len": 9.7455, + "eval_loss": 0.710875928401947, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8965, + "eval_samples_per_second": 18.655, + "eval_steps_per_second": 3.222, + "step": 2171 + }, + { + "epoch": 120.0, + "eval_gen_len": 9.8182, + "eval_loss": 0.6845319271087646, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8634, + "eval_samples_per_second": 18.76, + "eval_steps_per_second": 3.24, + "step": 2190 + }, + { + "epoch": 120.99, + "eval_gen_len": 9.8455, + "eval_loss": 0.6633948683738708, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8707, + "eval_samples_per_second": 18.737, + "eval_steps_per_second": 3.236, + "step": 2208 + }, + { + "epoch": 121.97, + "eval_gen_len": 9.8364, + "eval_loss": 0.6423484683036804, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8549, + "eval_samples_per_second": 18.788, + "eval_steps_per_second": 3.245, + "step": 2226 + }, + { + "epoch": 122.96, + "eval_gen_len": 9.8273, + "eval_loss": 0.6260173916816711, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8887, + "eval_samples_per_second": 18.68, + "eval_steps_per_second": 3.227, + "step": 2244 + }, + { + "epoch": 124.0, + "eval_gen_len": 9.7636, + "eval_loss": 0.604343056678772, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8959, + "eval_samples_per_second": 18.657, + "eval_steps_per_second": 3.223, + "step": 2263 + }, + { + "epoch": 124.99, + "eval_gen_len": 9.7273, + "eval_loss": 0.5872541666030884, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8541, + "eval_samples_per_second": 18.79, + "eval_steps_per_second": 3.246, + "step": 2281 + }, + { + "epoch": 125.97, + "eval_gen_len": 9.7818, + "eval_loss": 0.5708852410316467, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8564, + "eval_samples_per_second": 18.783, + "eval_steps_per_second": 3.244, + "step": 2299 + }, + { + "epoch": 126.96, + "eval_gen_len": 9.8364, + "eval_loss": 0.5527102947235107, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.854, + "eval_samples_per_second": 18.79, + "eval_steps_per_second": 3.246, + "step": 2317 + }, + { + "epoch": 128.0, + "eval_gen_len": 9.7545, + "eval_loss": 0.537294864654541, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8561, + "eval_samples_per_second": 18.784, + "eval_steps_per_second": 3.244, + "step": 2336 + }, + { + "epoch": 128.99, + "eval_gen_len": 9.5636, + "eval_loss": 0.5231500864028931, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8429, + "eval_samples_per_second": 18.826, + "eval_steps_per_second": 3.252, + "step": 2354 + }, + { + "epoch": 129.97, + "eval_gen_len": 9.7091, + "eval_loss": 0.5122325420379639, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8959, + "eval_samples_per_second": 18.657, + "eval_steps_per_second": 3.223, + "step": 2372 + }, + { + "epoch": 130.96, + "eval_gen_len": 9.6, + "eval_loss": 0.501021683216095, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8801, + "eval_samples_per_second": 18.707, + "eval_steps_per_second": 3.231, + "step": 2390 + }, + { + "epoch": 132.0, + "eval_gen_len": 9.7, + "eval_loss": 0.4930221140384674, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8944, + "eval_samples_per_second": 18.662, + "eval_steps_per_second": 3.223, + "step": 2409 + }, + { + "epoch": 132.99, + "eval_gen_len": 9.5909, + "eval_loss": 0.48477092385292053, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8544, + "eval_samples_per_second": 18.789, + "eval_steps_per_second": 3.245, + "step": 2427 + }, + { + "epoch": 133.97, + "eval_gen_len": 9.4818, + "eval_loss": 0.47620585560798645, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8504, + "eval_samples_per_second": 18.802, + "eval_steps_per_second": 3.248, + "step": 2445 + }, + { + "epoch": 134.96, + "eval_gen_len": 9.4727, + "eval_loss": 0.4678414463996887, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8619, + "eval_samples_per_second": 18.765, + "eval_steps_per_second": 3.241, + "step": 2463 + }, + { + "epoch": 136.0, + "eval_gen_len": 8.9727, + "eval_loss": 0.46089962124824524, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9034, + "eval_samples_per_second": 18.633, + "eval_steps_per_second": 3.219, + "step": 2482 + }, + { + "epoch": 136.99, + "grad_norm": 0.9683671593666077, + "learning_rate": 1.4455555555555555e-05, + "loss": 0.904, + "step": 2500 + }, + { + "epoch": 136.99, + "eval_gen_len": 8.8182, + "eval_loss": 0.45609110593795776, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8499, + "eval_samples_per_second": 18.804, + "eval_steps_per_second": 3.248, + "step": 2500 + }, + { + "epoch": 137.97, + "eval_gen_len": 8.8, + "eval_loss": 0.448975533246994, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8901, + "eval_samples_per_second": 18.676, + "eval_steps_per_second": 3.226, + "step": 2518 + }, + { + "epoch": 138.96, + "eval_gen_len": 8.7091, + "eval_loss": 0.44343459606170654, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8897, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 2536 + }, + { + "epoch": 140.0, + "eval_gen_len": 8.6818, + "eval_loss": 0.4378666877746582, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9063, + "eval_samples_per_second": 18.624, + "eval_steps_per_second": 3.217, + "step": 2555 + }, + { + "epoch": 140.99, + "eval_gen_len": 8.1182, + "eval_loss": 0.4321661591529846, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8879, + "eval_samples_per_second": 18.682, + "eval_steps_per_second": 3.227, + "step": 2573 + }, + { + "epoch": 141.97, + "eval_gen_len": 8.0455, + "eval_loss": 0.42785531282424927, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8597, + "eval_samples_per_second": 18.772, + "eval_steps_per_second": 3.242, + "step": 2591 + }, + { + "epoch": 142.96, + "eval_gen_len": 7.8909, + "eval_loss": 0.42234906554222107, + "eval_rouge1": 0.0026, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.8591, + "eval_samples_per_second": 18.774, + "eval_steps_per_second": 3.243, + "step": 2609 + }, + { + "epoch": 144.0, + "eval_gen_len": 7.5727, + "eval_loss": 0.4168229401111603, + "eval_rouge1": 0.0026, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.862, + "eval_samples_per_second": 18.765, + "eval_steps_per_second": 3.241, + "step": 2628 + }, + { + "epoch": 144.99, + "eval_gen_len": 7.4182, + "eval_loss": 0.4125988185405731, + "eval_rouge1": 0.0026, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.855, + "eval_samples_per_second": 18.787, + "eval_steps_per_second": 3.245, + "step": 2646 + }, + { + "epoch": 145.97, + "eval_gen_len": 6.8545, + "eval_loss": 0.40698733925819397, + "eval_rouge1": 0.0026, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.8558, + "eval_samples_per_second": 18.785, + "eval_steps_per_second": 3.245, + "step": 2664 + }, + { + "epoch": 146.96, + "eval_gen_len": 6.6, + "eval_loss": 0.4030015468597412, + "eval_rouge1": 0.0026, + "eval_rouge2": 0.002, + "eval_rougeL": 0.0021, + "eval_rougeLsum": 0.0021, + "eval_runtime": 5.8557, + "eval_samples_per_second": 18.785, + "eval_steps_per_second": 3.245, + "step": 2682 + }, + { + "epoch": 148.0, + "eval_gen_len": 6.5273, + "eval_loss": 0.39865967631340027, + "eval_rouge1": 0.0044, + "eval_rouge2": 0.0035, + "eval_rougeL": 0.0039, + "eval_rougeLsum": 0.0031, + "eval_runtime": 5.863, + "eval_samples_per_second": 18.762, + "eval_steps_per_second": 3.241, + "step": 2701 + }, + { + "epoch": 148.99, + "eval_gen_len": 6.5273, + "eval_loss": 0.3959006071090698, + "eval_rouge1": 0.0041, + "eval_rouge2": 0.0035, + "eval_rougeL": 0.0041, + "eval_rougeLsum": 0.0035, + "eval_runtime": 5.857, + "eval_samples_per_second": 18.781, + "eval_steps_per_second": 3.244, + "step": 2719 + }, + { + "epoch": 149.97, + "eval_gen_len": 6.3, + "eval_loss": 0.3902026414871216, + "eval_rouge1": 0.0049, + "eval_rouge2": 0.0041, + "eval_rougeL": 0.004, + "eval_rougeLsum": 0.0033, + "eval_runtime": 5.855, + "eval_samples_per_second": 18.787, + "eval_steps_per_second": 3.245, + "step": 2737 + }, + { + "epoch": 150.96, + "eval_gen_len": 6.1909, + "eval_loss": 0.3883272707462311, + "eval_rouge1": 0.0049, + "eval_rouge2": 0.0041, + "eval_rougeL": 0.004, + "eval_rougeLsum": 0.0033, + "eval_runtime": 5.853, + "eval_samples_per_second": 18.794, + "eval_steps_per_second": 3.246, + "step": 2755 + }, + { + "epoch": 152.0, + "eval_gen_len": 6.1182, + "eval_loss": 0.38500654697418213, + "eval_rouge1": 0.006, + "eval_rouge2": 0.0046, + "eval_rougeL": 0.0058, + "eval_rougeLsum": 0.0049, + "eval_runtime": 5.8626, + "eval_samples_per_second": 18.763, + "eval_steps_per_second": 3.241, + "step": 2774 + }, + { + "epoch": 152.99, + "eval_gen_len": 6.3818, + "eval_loss": 0.38074272871017456, + "eval_rouge1": 0.0065, + "eval_rouge2": 0.0043, + "eval_rougeL": 0.0058, + "eval_rougeLsum": 0.0058, + "eval_runtime": 5.8533, + "eval_samples_per_second": 18.793, + "eval_steps_per_second": 3.246, + "step": 2792 + }, + { + "epoch": 153.97, + "eval_gen_len": 6.2, + "eval_loss": 0.3779795467853546, + "eval_rouge1": 0.0045, + "eval_rouge2": 0.0022, + "eval_rougeL": 0.0039, + "eval_rougeLsum": 0.0038, + "eval_runtime": 5.8616, + "eval_samples_per_second": 18.766, + "eval_steps_per_second": 3.241, + "step": 2810 + }, + { + "epoch": 154.96, + "eval_gen_len": 6.3636, + "eval_loss": 0.37300601601600647, + "eval_rouge1": 0.0136, + "eval_rouge2": 0.0099, + "eval_rougeL": 0.0122, + "eval_rougeLsum": 0.0117, + "eval_runtime": 5.8638, + "eval_samples_per_second": 18.759, + "eval_steps_per_second": 3.24, + "step": 2828 + }, + { + "epoch": 156.0, + "eval_gen_len": 6.4909, + "eval_loss": 0.36999648809432983, + "eval_rouge1": 0.0195, + "eval_rouge2": 0.0152, + "eval_rougeL": 0.0179, + "eval_rougeLsum": 0.0173, + "eval_runtime": 5.8779, + "eval_samples_per_second": 18.714, + "eval_steps_per_second": 3.232, + "step": 2847 + }, + { + "epoch": 156.99, + "eval_gen_len": 6.5455, + "eval_loss": 0.3666529059410095, + "eval_rouge1": 0.0193, + "eval_rouge2": 0.0135, + "eval_rougeL": 0.0174, + "eval_rougeLsum": 0.0171, + "eval_runtime": 5.8596, + "eval_samples_per_second": 18.773, + "eval_steps_per_second": 3.243, + "step": 2865 + }, + { + "epoch": 157.97, + "eval_gen_len": 6.1909, + "eval_loss": 0.3615466058254242, + "eval_rouge1": 0.019, + "eval_rouge2": 0.0133, + "eval_rougeL": 0.0169, + "eval_rougeLsum": 0.0168, + "eval_runtime": 5.8587, + "eval_samples_per_second": 18.776, + "eval_steps_per_second": 3.243, + "step": 2883 + }, + { + "epoch": 158.96, + "eval_gen_len": 7.5, + "eval_loss": 0.3599731922149658, + "eval_rouge1": 0.0283, + "eval_rouge2": 0.0195, + "eval_rougeL": 0.0269, + "eval_rougeLsum": 0.0268, + "eval_runtime": 5.8597, + "eval_samples_per_second": 18.772, + "eval_steps_per_second": 3.242, + "step": 2901 + }, + { + "epoch": 160.0, + "eval_gen_len": 6.8, + "eval_loss": 0.3567432165145874, + "eval_rouge1": 0.0241, + "eval_rouge2": 0.0168, + "eval_rougeL": 0.022, + "eval_rougeLsum": 0.0216, + "eval_runtime": 5.8734, + "eval_samples_per_second": 18.729, + "eval_steps_per_second": 3.235, + "step": 2920 + }, + { + "epoch": 160.99, + "eval_gen_len": 6.6, + "eval_loss": 0.35366886854171753, + "eval_rouge1": 0.021, + "eval_rouge2": 0.0135, + "eval_rougeL": 0.0189, + "eval_rougeLsum": 0.0184, + "eval_runtime": 5.8665, + "eval_samples_per_second": 18.751, + "eval_steps_per_second": 3.239, + "step": 2938 + }, + { + "epoch": 161.97, + "eval_gen_len": 8.6818, + "eval_loss": 0.35154151916503906, + "eval_rouge1": 0.0438, + "eval_rouge2": 0.0277, + "eval_rougeL": 0.0401, + "eval_rougeLsum": 0.0401, + "eval_runtime": 5.8694, + "eval_samples_per_second": 18.741, + "eval_steps_per_second": 3.237, + "step": 2956 + }, + { + "epoch": 162.96, + "eval_gen_len": 8.2636, + "eval_loss": 0.3467026948928833, + "eval_rouge1": 0.0374, + "eval_rouge2": 0.023, + "eval_rougeL": 0.0346, + "eval_rougeLsum": 0.0341, + "eval_runtime": 5.8684, + "eval_samples_per_second": 18.745, + "eval_steps_per_second": 3.238, + "step": 2974 + }, + { + "epoch": 164.0, + "eval_gen_len": 7.4636, + "eval_loss": 0.343766987323761, + "eval_rouge1": 0.0284, + "eval_rouge2": 0.0184, + "eval_rougeL": 0.0267, + "eval_rougeLsum": 0.027, + "eval_runtime": 5.876, + "eval_samples_per_second": 18.72, + "eval_steps_per_second": 3.233, + "step": 2993 + }, + { + "epoch": 164.38, + "grad_norm": 0.7337635159492493, + "learning_rate": 1.3346666666666667e-05, + "loss": 0.5395, + "step": 3000 + }, + { + "epoch": 164.99, + "eval_gen_len": 8.9909, + "eval_loss": 0.3419288694858551, + "eval_rouge1": 0.0445, + "eval_rouge2": 0.0276, + "eval_rougeL": 0.0414, + "eval_rougeLsum": 0.0408, + "eval_runtime": 5.8707, + "eval_samples_per_second": 18.737, + "eval_steps_per_second": 3.236, + "step": 3011 + }, + { + "epoch": 165.97, + "eval_gen_len": 8.8909, + "eval_loss": 0.33906012773513794, + "eval_rouge1": 0.0446, + "eval_rouge2": 0.0294, + "eval_rougeL": 0.0415, + "eval_rougeLsum": 0.0412, + "eval_runtime": 5.8733, + "eval_samples_per_second": 18.729, + "eval_steps_per_second": 3.235, + "step": 3029 + }, + { + "epoch": 166.96, + "eval_gen_len": 9.5455, + "eval_loss": 0.3354407548904419, + "eval_rouge1": 0.0498, + "eval_rouge2": 0.032, + "eval_rougeL": 0.0452, + "eval_rougeLsum": 0.0453, + "eval_runtime": 5.8714, + "eval_samples_per_second": 18.735, + "eval_steps_per_second": 3.236, + "step": 3047 + }, + { + "epoch": 168.0, + "eval_gen_len": 10.0818, + "eval_loss": 0.33422771096229553, + "eval_rouge1": 0.0579, + "eval_rouge2": 0.0388, + "eval_rougeL": 0.0549, + "eval_rougeLsum": 0.0553, + "eval_runtime": 5.8768, + "eval_samples_per_second": 18.718, + "eval_steps_per_second": 3.233, + "step": 3066 + }, + { + "epoch": 168.99, + "eval_gen_len": 10.2, + "eval_loss": 0.331695020198822, + "eval_rouge1": 0.0542, + "eval_rouge2": 0.0353, + "eval_rougeL": 0.0508, + "eval_rougeLsum": 0.051, + "eval_runtime": 5.8711, + "eval_samples_per_second": 18.736, + "eval_steps_per_second": 3.236, + "step": 3084 + }, + { + "epoch": 169.97, + "eval_gen_len": 10.1364, + "eval_loss": 0.3284001052379608, + "eval_rouge1": 0.0555, + "eval_rouge2": 0.0363, + "eval_rougeL": 0.0523, + "eval_rougeLsum": 0.0521, + "eval_runtime": 5.8689, + "eval_samples_per_second": 18.743, + "eval_steps_per_second": 3.237, + "step": 3102 + }, + { + "epoch": 170.96, + "eval_gen_len": 10.6727, + "eval_loss": 0.32654786109924316, + "eval_rouge1": 0.0562, + "eval_rouge2": 0.0353, + "eval_rougeL": 0.0519, + "eval_rougeLsum": 0.0521, + "eval_runtime": 5.8762, + "eval_samples_per_second": 18.72, + "eval_steps_per_second": 3.233, + "step": 3120 + }, + { + "epoch": 172.0, + "eval_gen_len": 10.7818, + "eval_loss": 0.3229809105396271, + "eval_rouge1": 0.0551, + "eval_rouge2": 0.0346, + "eval_rougeL": 0.0523, + "eval_rougeLsum": 0.0525, + "eval_runtime": 5.8825, + "eval_samples_per_second": 18.699, + "eval_steps_per_second": 3.23, + "step": 3139 + }, + { + "epoch": 172.99, + "eval_gen_len": 11.3727, + "eval_loss": 0.3223954439163208, + "eval_rouge1": 0.0614, + "eval_rouge2": 0.0388, + "eval_rougeL": 0.0579, + "eval_rougeLsum": 0.0585, + "eval_runtime": 5.8822, + "eval_samples_per_second": 18.701, + "eval_steps_per_second": 3.23, + "step": 3157 + }, + { + "epoch": 173.97, + "eval_gen_len": 11.2909, + "eval_loss": 0.31792977452278137, + "eval_rouge1": 0.0584, + "eval_rouge2": 0.0328, + "eval_rougeL": 0.055, + "eval_rougeLsum": 0.0553, + "eval_runtime": 5.8754, + "eval_samples_per_second": 18.722, + "eval_steps_per_second": 3.234, + "step": 3175 + }, + { + "epoch": 174.96, + "eval_gen_len": 12.2455, + "eval_loss": 0.31657084822654724, + "eval_rouge1": 0.0649, + "eval_rouge2": 0.0392, + "eval_rougeL": 0.0615, + "eval_rougeLsum": 0.0619, + "eval_runtime": 5.9198, + "eval_samples_per_second": 18.582, + "eval_steps_per_second": 3.21, + "step": 3193 + }, + { + "epoch": 176.0, + "eval_gen_len": 11.7545, + "eval_loss": 0.3131771981716156, + "eval_rouge1": 0.0605, + "eval_rouge2": 0.0341, + "eval_rougeL": 0.0568, + "eval_rougeLsum": 0.0571, + "eval_runtime": 5.8941, + "eval_samples_per_second": 18.663, + "eval_steps_per_second": 3.224, + "step": 3212 + }, + { + "epoch": 176.99, + "eval_gen_len": 12.2364, + "eval_loss": 0.3109656870365143, + "eval_rouge1": 0.0639, + "eval_rouge2": 0.0396, + "eval_rougeL": 0.0614, + "eval_rougeLsum": 0.0619, + "eval_runtime": 5.8859, + "eval_samples_per_second": 18.689, + "eval_steps_per_second": 3.228, + "step": 3230 + }, + { + "epoch": 177.97, + "eval_gen_len": 11.9273, + "eval_loss": 0.3089582324028015, + "eval_rouge1": 0.0664, + "eval_rouge2": 0.04, + "eval_rougeL": 0.0619, + "eval_rougeLsum": 0.0626, + "eval_runtime": 5.873, + "eval_samples_per_second": 18.73, + "eval_steps_per_second": 3.235, + "step": 3248 + }, + { + "epoch": 178.96, + "eval_gen_len": 12.3364, + "eval_loss": 0.30663853883743286, + "eval_rouge1": 0.0721, + "eval_rouge2": 0.0455, + "eval_rougeL": 0.0685, + "eval_rougeLsum": 0.0688, + "eval_runtime": 5.8871, + "eval_samples_per_second": 18.685, + "eval_steps_per_second": 3.227, + "step": 3266 + }, + { + "epoch": 180.0, + "eval_gen_len": 12.6, + "eval_loss": 0.3049904704093933, + "eval_rouge1": 0.0727, + "eval_rouge2": 0.0447, + "eval_rougeL": 0.0691, + "eval_rougeLsum": 0.0693, + "eval_runtime": 5.8952, + "eval_samples_per_second": 18.659, + "eval_steps_per_second": 3.223, + "step": 3285 + }, + { + "epoch": 180.99, + "eval_gen_len": 12.2636, + "eval_loss": 0.30238986015319824, + "eval_rouge1": 0.0716, + "eval_rouge2": 0.0436, + "eval_rougeL": 0.0684, + "eval_rougeLsum": 0.0688, + "eval_runtime": 5.8902, + "eval_samples_per_second": 18.675, + "eval_steps_per_second": 3.226, + "step": 3303 + }, + { + "epoch": 181.97, + "eval_gen_len": 15.2, + "eval_loss": 0.29928120970726013, + "eval_rouge1": 0.0901, + "eval_rouge2": 0.0567, + "eval_rougeL": 0.0848, + "eval_rougeLsum": 0.0851, + "eval_runtime": 5.9062, + "eval_samples_per_second": 18.624, + "eval_steps_per_second": 3.217, + "step": 3321 + }, + { + "epoch": 182.96, + "eval_gen_len": 13.8182, + "eval_loss": 0.2973878085613251, + "eval_rouge1": 0.0874, + "eval_rouge2": 0.0583, + "eval_rougeL": 0.084, + "eval_rougeLsum": 0.0838, + "eval_runtime": 5.892, + "eval_samples_per_second": 18.67, + "eval_steps_per_second": 3.225, + "step": 3339 + }, + { + "epoch": 184.0, + "eval_gen_len": 14.8091, + "eval_loss": 0.29529863595962524, + "eval_rouge1": 0.0924, + "eval_rouge2": 0.0616, + "eval_rougeL": 0.0892, + "eval_rougeLsum": 0.0893, + "eval_runtime": 5.9216, + "eval_samples_per_second": 18.576, + "eval_steps_per_second": 3.209, + "step": 3358 + }, + { + "epoch": 184.99, + "eval_gen_len": 16.4, + "eval_loss": 0.29348161816596985, + "eval_rouge1": 0.1029, + "eval_rouge2": 0.0663, + "eval_rougeL": 0.0987, + "eval_rougeLsum": 0.0984, + "eval_runtime": 5.8929, + "eval_samples_per_second": 18.666, + "eval_steps_per_second": 3.224, + "step": 3376 + }, + { + "epoch": 185.97, + "eval_gen_len": 15.0364, + "eval_loss": 0.2902657389640808, + "eval_rouge1": 0.0923, + "eval_rouge2": 0.0603, + "eval_rougeL": 0.089, + "eval_rougeLsum": 0.0887, + "eval_runtime": 5.8988, + "eval_samples_per_second": 18.648, + "eval_steps_per_second": 3.221, + "step": 3394 + }, + { + "epoch": 186.96, + "eval_gen_len": 16.7, + "eval_loss": 0.2885950207710266, + "eval_rouge1": 0.1027, + "eval_rouge2": 0.0661, + "eval_rougeL": 0.0985, + "eval_rougeLsum": 0.099, + "eval_runtime": 5.9087, + "eval_samples_per_second": 18.617, + "eval_steps_per_second": 3.216, + "step": 3412 + }, + { + "epoch": 188.0, + "eval_gen_len": 15.0455, + "eval_loss": 0.2858668267726898, + "eval_rouge1": 0.0998, + "eval_rouge2": 0.0686, + "eval_rougeL": 0.0979, + "eval_rougeLsum": 0.0973, + "eval_runtime": 5.9128, + "eval_samples_per_second": 18.604, + "eval_steps_per_second": 3.213, + "step": 3431 + }, + { + "epoch": 188.99, + "eval_gen_len": 16.7, + "eval_loss": 0.28371575474739075, + "eval_rouge1": 0.1081, + "eval_rouge2": 0.0733, + "eval_rougeL": 0.105, + "eval_rougeLsum": 0.1051, + "eval_runtime": 5.9179, + "eval_samples_per_second": 18.588, + "eval_steps_per_second": 3.211, + "step": 3449 + }, + { + "epoch": 189.97, + "eval_gen_len": 16.9364, + "eval_loss": 0.28239956498146057, + "eval_rouge1": 0.1176, + "eval_rouge2": 0.0809, + "eval_rougeL": 0.1142, + "eval_rougeLsum": 0.1136, + "eval_runtime": 5.9392, + "eval_samples_per_second": 18.521, + "eval_steps_per_second": 3.199, + "step": 3467 + }, + { + "epoch": 190.96, + "eval_gen_len": 17.1545, + "eval_loss": 0.28112414479255676, + "eval_rouge1": 0.1281, + "eval_rouge2": 0.0923, + "eval_rougeL": 0.1251, + "eval_rougeLsum": 0.1247, + "eval_runtime": 5.9439, + "eval_samples_per_second": 18.506, + "eval_steps_per_second": 3.197, + "step": 3485 + }, + { + "epoch": 191.78, + "grad_norm": 0.6559975743293762, + "learning_rate": 1.2235555555555556e-05, + "loss": 0.4165, + "step": 3500 + }, + { + "epoch": 192.0, + "eval_gen_len": 16.9909, + "eval_loss": 0.2794179916381836, + "eval_rouge1": 0.1326, + "eval_rouge2": 0.0983, + "eval_rougeL": 0.1308, + "eval_rougeLsum": 0.1303, + "eval_runtime": 5.9583, + "eval_samples_per_second": 18.462, + "eval_steps_per_second": 3.189, + "step": 3504 + }, + { + "epoch": 192.99, + "eval_gen_len": 17.2818, + "eval_loss": 0.2769763767719269, + "eval_rouge1": 0.1377, + "eval_rouge2": 0.1021, + "eval_rougeL": 0.1352, + "eval_rougeLsum": 0.1352, + "eval_runtime": 5.9436, + "eval_samples_per_second": 18.507, + "eval_steps_per_second": 3.197, + "step": 3522 + }, + { + "epoch": 193.97, + "eval_gen_len": 16.6727, + "eval_loss": 0.27478569746017456, + "eval_rouge1": 0.1421, + "eval_rouge2": 0.1072, + "eval_rougeL": 0.1391, + "eval_rougeLsum": 0.1389, + "eval_runtime": 5.9492, + "eval_samples_per_second": 18.49, + "eval_steps_per_second": 3.194, + "step": 3540 + }, + { + "epoch": 194.96, + "eval_gen_len": 17.6273, + "eval_loss": 0.273343026638031, + "eval_rouge1": 0.1536, + "eval_rouge2": 0.1166, + "eval_rougeL": 0.1499, + "eval_rougeLsum": 0.15, + "eval_runtime": 5.9258, + "eval_samples_per_second": 18.563, + "eval_steps_per_second": 3.206, + "step": 3558 + }, + { + "epoch": 196.0, + "eval_gen_len": 17.6182, + "eval_loss": 0.27078884840011597, + "eval_rouge1": 0.1575, + "eval_rouge2": 0.1196, + "eval_rougeL": 0.1531, + "eval_rougeLsum": 0.1529, + "eval_runtime": 5.9196, + "eval_samples_per_second": 18.582, + "eval_steps_per_second": 3.21, + "step": 3577 + }, + { + "epoch": 196.99, + "eval_gen_len": 18.1727, + "eval_loss": 0.2679530382156372, + "eval_rouge1": 0.1544, + "eval_rouge2": 0.1152, + "eval_rougeL": 0.1492, + "eval_rougeLsum": 0.1492, + "eval_runtime": 5.9115, + "eval_samples_per_second": 18.608, + "eval_steps_per_second": 3.214, + "step": 3595 + }, + { + "epoch": 197.97, + "eval_gen_len": 17.6636, + "eval_loss": 0.26689696311950684, + "eval_rouge1": 0.1615, + "eval_rouge2": 0.1264, + "eval_rougeL": 0.1579, + "eval_rougeLsum": 0.1577, + "eval_runtime": 5.911, + "eval_samples_per_second": 18.609, + "eval_steps_per_second": 3.214, + "step": 3613 + }, + { + "epoch": 198.96, + "eval_gen_len": 18.3182, + "eval_loss": 0.26582667231559753, + "eval_rouge1": 0.1687, + "eval_rouge2": 0.1322, + "eval_rougeL": 0.165, + "eval_rougeLsum": 0.1646, + "eval_runtime": 5.9133, + "eval_samples_per_second": 18.602, + "eval_steps_per_second": 3.213, + "step": 3631 + }, + { + "epoch": 200.0, + "eval_gen_len": 18.3091, + "eval_loss": 0.262999951839447, + "eval_rouge1": 0.1753, + "eval_rouge2": 0.1408, + "eval_rougeL": 0.1726, + "eval_rougeLsum": 0.1721, + "eval_runtime": 5.9621, + "eval_samples_per_second": 18.45, + "eval_steps_per_second": 3.187, + "step": 3650 + }, + { + "epoch": 200.99, + "eval_gen_len": 18.6182, + "eval_loss": 0.2615664601325989, + "eval_rouge1": 0.1803, + "eval_rouge2": 0.1452, + "eval_rougeL": 0.1776, + "eval_rougeLsum": 0.1767, + "eval_runtime": 5.9566, + "eval_samples_per_second": 18.467, + "eval_steps_per_second": 3.19, + "step": 3668 + }, + { + "epoch": 201.97, + "eval_gen_len": 18.4818, + "eval_loss": 0.26064223051071167, + "eval_rouge1": 0.1883, + "eval_rouge2": 0.1542, + "eval_rougeL": 0.1857, + "eval_rougeLsum": 0.1854, + "eval_runtime": 5.9305, + "eval_samples_per_second": 18.548, + "eval_steps_per_second": 3.204, + "step": 3686 + }, + { + "epoch": 202.96, + "eval_gen_len": 18.6364, + "eval_loss": 0.2593740224838257, + "eval_rouge1": 0.1822, + "eval_rouge2": 0.1461, + "eval_rougeL": 0.179, + "eval_rougeLsum": 0.1784, + "eval_runtime": 5.9634, + "eval_samples_per_second": 18.446, + "eval_steps_per_second": 3.186, + "step": 3704 + }, + { + "epoch": 204.0, + "eval_gen_len": 18.5909, + "eval_loss": 0.2574484348297119, + "eval_rouge1": 0.1785, + "eval_rouge2": 0.1424, + "eval_rougeL": 0.1754, + "eval_rougeLsum": 0.1741, + "eval_runtime": 5.9326, + "eval_samples_per_second": 18.542, + "eval_steps_per_second": 3.203, + "step": 3723 + }, + { + "epoch": 204.99, + "eval_gen_len": 18.5909, + "eval_loss": 0.255962997674942, + "eval_rouge1": 0.182, + "eval_rouge2": 0.1475, + "eval_rougeL": 0.1799, + "eval_rougeLsum": 0.1789, + "eval_runtime": 5.9645, + "eval_samples_per_second": 18.442, + "eval_steps_per_second": 3.186, + "step": 3741 + }, + { + "epoch": 205.97, + "eval_gen_len": 18.3818, + "eval_loss": 0.25388580560684204, + "eval_rouge1": 0.1899, + "eval_rouge2": 0.1557, + "eval_rougeL": 0.1862, + "eval_rougeLsum": 0.1861, + "eval_runtime": 5.9231, + "eval_samples_per_second": 18.571, + "eval_steps_per_second": 3.208, + "step": 3759 + }, + { + "epoch": 206.96, + "eval_gen_len": 18.3727, + "eval_loss": 0.251926988363266, + "eval_rouge1": 0.1962, + "eval_rouge2": 0.1635, + "eval_rougeL": 0.1935, + "eval_rougeLsum": 0.1929, + "eval_runtime": 5.9351, + "eval_samples_per_second": 18.534, + "eval_steps_per_second": 3.201, + "step": 3777 + }, + { + "epoch": 208.0, + "eval_gen_len": 18.5818, + "eval_loss": 0.25200676918029785, + "eval_rouge1": 0.2002, + "eval_rouge2": 0.1666, + "eval_rougeL": 0.197, + "eval_rougeLsum": 0.1964, + "eval_runtime": 5.9349, + "eval_samples_per_second": 18.535, + "eval_steps_per_second": 3.201, + "step": 3796 + }, + { + "epoch": 208.99, + "eval_gen_len": 18.4182, + "eval_loss": 0.24954193830490112, + "eval_rouge1": 0.2016, + "eval_rouge2": 0.1708, + "eval_rougeL": 0.1997, + "eval_rougeLsum": 0.1994, + "eval_runtime": 5.9236, + "eval_samples_per_second": 18.57, + "eval_steps_per_second": 3.208, + "step": 3814 + }, + { + "epoch": 209.97, + "eval_gen_len": 18.5182, + "eval_loss": 0.2487880438566208, + "eval_rouge1": 0.2029, + "eval_rouge2": 0.172, + "eval_rougeL": 0.2008, + "eval_rougeLsum": 0.2005, + "eval_runtime": 5.9666, + "eval_samples_per_second": 18.436, + "eval_steps_per_second": 3.184, + "step": 3832 + }, + { + "epoch": 210.96, + "eval_gen_len": 18.7455, + "eval_loss": 0.246944397687912, + "eval_rouge1": 0.2126, + "eval_rouge2": 0.183, + "eval_rougeL": 0.2107, + "eval_rougeLsum": 0.2102, + "eval_runtime": 5.9334, + "eval_samples_per_second": 18.539, + "eval_steps_per_second": 3.202, + "step": 3850 + }, + { + "epoch": 212.0, + "eval_gen_len": 18.7455, + "eval_loss": 0.24503479897975922, + "eval_rouge1": 0.2153, + "eval_rouge2": 0.1832, + "eval_rougeL": 0.213, + "eval_rougeLsum": 0.2126, + "eval_runtime": 5.9384, + "eval_samples_per_second": 18.524, + "eval_steps_per_second": 3.2, + "step": 3869 + }, + { + "epoch": 212.99, + "eval_gen_len": 18.8, + "eval_loss": 0.24539507925510406, + "eval_rouge1": 0.2199, + "eval_rouge2": 0.1891, + "eval_rougeL": 0.2176, + "eval_rougeLsum": 0.2173, + "eval_runtime": 5.9326, + "eval_samples_per_second": 18.542, + "eval_steps_per_second": 3.203, + "step": 3887 + }, + { + "epoch": 213.97, + "eval_gen_len": 18.7455, + "eval_loss": 0.24339380860328674, + "eval_rouge1": 0.2253, + "eval_rouge2": 0.1972, + "eval_rougeL": 0.2239, + "eval_rougeLsum": 0.2238, + "eval_runtime": 5.9368, + "eval_samples_per_second": 18.529, + "eval_steps_per_second": 3.2, + "step": 3905 + }, + { + "epoch": 214.96, + "eval_gen_len": 18.8, + "eval_loss": 0.24222548305988312, + "eval_rouge1": 0.2245, + "eval_rouge2": 0.1953, + "eval_rougeL": 0.2234, + "eval_rougeLsum": 0.2227, + "eval_runtime": 5.9303, + "eval_samples_per_second": 18.549, + "eval_steps_per_second": 3.204, + "step": 3923 + }, + { + "epoch": 216.0, + "eval_gen_len": 18.8, + "eval_loss": 0.24038065969944, + "eval_rouge1": 0.2269, + "eval_rouge2": 0.1974, + "eval_rougeL": 0.2255, + "eval_rougeLsum": 0.2251, + "eval_runtime": 5.9423, + "eval_samples_per_second": 18.511, + "eval_steps_per_second": 3.197, + "step": 3942 + }, + { + "epoch": 216.99, + "eval_gen_len": 18.8, + "eval_loss": 0.24085824191570282, + "eval_rouge1": 0.2324, + "eval_rouge2": 0.203, + "eval_rougeL": 0.2305, + "eval_rougeLsum": 0.2302, + "eval_runtime": 5.9374, + "eval_samples_per_second": 18.527, + "eval_steps_per_second": 3.2, + "step": 3960 + }, + { + "epoch": 217.97, + "eval_gen_len": 18.8, + "eval_loss": 0.2390824556350708, + "eval_rouge1": 0.2328, + "eval_rouge2": 0.204, + "eval_rougeL": 0.2309, + "eval_rougeLsum": 0.2307, + "eval_runtime": 5.9716, + "eval_samples_per_second": 18.421, + "eval_steps_per_second": 3.182, + "step": 3978 + }, + { + "epoch": 218.96, + "eval_gen_len": 18.9636, + "eval_loss": 0.23841167986392975, + "eval_rouge1": 0.2394, + "eval_rouge2": 0.2109, + "eval_rougeL": 0.2381, + "eval_rougeLsum": 0.238, + "eval_runtime": 5.9528, + "eval_samples_per_second": 18.479, + "eval_steps_per_second": 3.192, + "step": 3996 + }, + { + "epoch": 219.18, + "grad_norm": 1.037503957748413, + "learning_rate": 1.1124444444444444e-05, + "loss": 0.3439, + "step": 4000 + }, + { + "epoch": 220.0, + "eval_gen_len": 18.9636, + "eval_loss": 0.2358517199754715, + "eval_rouge1": 0.2413, + "eval_rouge2": 0.2128, + "eval_rougeL": 0.2404, + "eval_rougeLsum": 0.2403, + "eval_runtime": 5.9869, + "eval_samples_per_second": 18.373, + "eval_steps_per_second": 3.174, + "step": 4015 + }, + { + "epoch": 220.99, + "eval_gen_len": 18.9636, + "eval_loss": 0.2353217452764511, + "eval_rouge1": 0.2411, + "eval_rouge2": 0.2122, + "eval_rougeL": 0.2401, + "eval_rougeLsum": 0.2398, + "eval_runtime": 5.9744, + "eval_samples_per_second": 18.412, + "eval_steps_per_second": 3.18, + "step": 4033 + }, + { + "epoch": 221.97, + "eval_gen_len": 18.9636, + "eval_loss": 0.23452366888523102, + "eval_rouge1": 0.2423, + "eval_rouge2": 0.2131, + "eval_rougeL": 0.2414, + "eval_rougeLsum": 0.2409, + "eval_runtime": 5.9345, + "eval_samples_per_second": 18.536, + "eval_steps_per_second": 3.202, + "step": 4051 + }, + { + "epoch": 222.96, + "eval_gen_len": 18.9636, + "eval_loss": 0.23285672068595886, + "eval_rouge1": 0.2417, + "eval_rouge2": 0.2133, + "eval_rougeL": 0.2407, + "eval_rougeLsum": 0.2404, + "eval_runtime": 5.9399, + "eval_samples_per_second": 18.519, + "eval_steps_per_second": 3.199, + "step": 4069 + }, + { + "epoch": 224.0, + "eval_gen_len": 18.9636, + "eval_loss": 0.23224669694900513, + "eval_rouge1": 0.2439, + "eval_rouge2": 0.2166, + "eval_rougeL": 0.2434, + "eval_rougeLsum": 0.2431, + "eval_runtime": 5.9565, + "eval_samples_per_second": 18.467, + "eval_steps_per_second": 3.19, + "step": 4088 + }, + { + "epoch": 224.99, + "eval_gen_len": 18.9636, + "eval_loss": 0.231593519449234, + "eval_rouge1": 0.2416, + "eval_rouge2": 0.2138, + "eval_rougeL": 0.2413, + "eval_rougeLsum": 0.2409, + "eval_runtime": 5.9359, + "eval_samples_per_second": 18.531, + "eval_steps_per_second": 3.201, + "step": 4106 + }, + { + "epoch": 225.97, + "eval_gen_len": 18.9636, + "eval_loss": 0.23053088784217834, + "eval_rouge1": 0.2423, + "eval_rouge2": 0.2147, + "eval_rougeL": 0.242, + "eval_rougeLsum": 0.2414, + "eval_runtime": 5.9359, + "eval_samples_per_second": 18.531, + "eval_steps_per_second": 3.201, + "step": 4124 + }, + { + "epoch": 226.96, + "eval_gen_len": 18.9636, + "eval_loss": 0.22999462485313416, + "eval_rouge1": 0.243, + "eval_rouge2": 0.215, + "eval_rougeL": 0.2427, + "eval_rougeLsum": 0.242, + "eval_runtime": 5.939, + "eval_samples_per_second": 18.522, + "eval_steps_per_second": 3.199, + "step": 4142 + }, + { + "epoch": 228.0, + "eval_gen_len": 18.9182, + "eval_loss": 0.22881445288658142, + "eval_rouge1": 0.2472, + "eval_rouge2": 0.2201, + "eval_rougeL": 0.2469, + "eval_rougeLsum": 0.2466, + "eval_runtime": 5.9542, + "eval_samples_per_second": 18.474, + "eval_steps_per_second": 3.191, + "step": 4161 + }, + { + "epoch": 228.99, + "eval_gen_len": 18.9636, + "eval_loss": 0.2282283455133438, + "eval_rouge1": 0.247, + "eval_rouge2": 0.2195, + "eval_rougeL": 0.2468, + "eval_rougeLsum": 0.2464, + "eval_runtime": 5.9427, + "eval_samples_per_second": 18.51, + "eval_steps_per_second": 3.197, + "step": 4179 + }, + { + "epoch": 229.97, + "eval_gen_len": 18.9182, + "eval_loss": 0.22686214745044708, + "eval_rouge1": 0.2437, + "eval_rouge2": 0.2163, + "eval_rougeL": 0.2436, + "eval_rougeLsum": 0.2434, + "eval_runtime": 5.9383, + "eval_samples_per_second": 18.524, + "eval_steps_per_second": 3.2, + "step": 4197 + }, + { + "epoch": 230.96, + "eval_gen_len": 18.9636, + "eval_loss": 0.22540777921676636, + "eval_rouge1": 0.2485, + "eval_rouge2": 0.2218, + "eval_rougeL": 0.2484, + "eval_rougeLsum": 0.2478, + "eval_runtime": 5.9379, + "eval_samples_per_second": 18.525, + "eval_steps_per_second": 3.2, + "step": 4215 + }, + { + "epoch": 232.0, + "eval_gen_len": 18.7455, + "eval_loss": 0.22596728801727295, + "eval_rouge1": 0.2458, + "eval_rouge2": 0.2194, + "eval_rougeL": 0.2456, + "eval_rougeLsum": 0.2455, + "eval_runtime": 5.9457, + "eval_samples_per_second": 18.501, + "eval_steps_per_second": 3.196, + "step": 4234 + }, + { + "epoch": 232.99, + "eval_gen_len": 18.6182, + "eval_loss": 0.22396238148212433, + "eval_rouge1": 0.2482, + "eval_rouge2": 0.2227, + "eval_rougeL": 0.2481, + "eval_rougeLsum": 0.248, + "eval_runtime": 5.9411, + "eval_samples_per_second": 18.515, + "eval_steps_per_second": 3.198, + "step": 4252 + }, + { + "epoch": 233.97, + "eval_gen_len": 18.6182, + "eval_loss": 0.22270821034908295, + "eval_rouge1": 0.2442, + "eval_rouge2": 0.2178, + "eval_rougeL": 0.2438, + "eval_rougeLsum": 0.2435, + "eval_runtime": 5.9443, + "eval_samples_per_second": 18.505, + "eval_steps_per_second": 3.196, + "step": 4270 + }, + { + "epoch": 234.96, + "eval_gen_len": 18.6182, + "eval_loss": 0.222365602850914, + "eval_rouge1": 0.2491, + "eval_rouge2": 0.2241, + "eval_rougeL": 0.2487, + "eval_rougeLsum": 0.2488, + "eval_runtime": 5.9399, + "eval_samples_per_second": 18.519, + "eval_steps_per_second": 3.199, + "step": 4288 + }, + { + "epoch": 236.0, + "eval_gen_len": 18.6182, + "eval_loss": 0.22221778333187103, + "eval_rouge1": 0.2486, + "eval_rouge2": 0.2233, + "eval_rougeL": 0.2484, + "eval_rougeLsum": 0.2484, + "eval_runtime": 5.9418, + "eval_samples_per_second": 18.513, + "eval_steps_per_second": 3.198, + "step": 4307 + }, + { + "epoch": 236.99, + "eval_gen_len": 18.2727, + "eval_loss": 0.2206820845603943, + "eval_rouge1": 0.2443, + "eval_rouge2": 0.219, + "eval_rougeL": 0.2434, + "eval_rougeLsum": 0.2434, + "eval_runtime": 5.9282, + "eval_samples_per_second": 18.555, + "eval_steps_per_second": 3.205, + "step": 4325 + }, + { + "epoch": 237.97, + "eval_gen_len": 17.4091, + "eval_loss": 0.22046072781085968, + "eval_rouge1": 0.2327, + "eval_rouge2": 0.2091, + "eval_rougeL": 0.2321, + "eval_rougeLsum": 0.2325, + "eval_runtime": 5.9336, + "eval_samples_per_second": 18.539, + "eval_steps_per_second": 3.202, + "step": 4343 + }, + { + "epoch": 238.96, + "eval_gen_len": 15.1636, + "eval_loss": 0.21863390505313873, + "eval_rouge1": 0.1989, + "eval_rouge2": 0.1774, + "eval_rougeL": 0.1978, + "eval_rougeLsum": 0.1979, + "eval_runtime": 5.9311, + "eval_samples_per_second": 18.546, + "eval_steps_per_second": 3.203, + "step": 4361 + }, + { + "epoch": 240.0, + "eval_gen_len": 16.1909, + "eval_loss": 0.2192383110523224, + "eval_rouge1": 0.2148, + "eval_rouge2": 0.1923, + "eval_rougeL": 0.2137, + "eval_rougeLsum": 0.2144, + "eval_runtime": 5.9365, + "eval_samples_per_second": 18.529, + "eval_steps_per_second": 3.201, + "step": 4380 + }, + { + "epoch": 240.99, + "eval_gen_len": 13.0909, + "eval_loss": 0.2176760584115982, + "eval_rouge1": 0.1742, + "eval_rouge2": 0.1548, + "eval_rougeL": 0.1725, + "eval_rougeLsum": 0.1723, + "eval_runtime": 5.9161, + "eval_samples_per_second": 18.593, + "eval_steps_per_second": 3.212, + "step": 4398 + }, + { + "epoch": 241.97, + "eval_gen_len": 11.7091, + "eval_loss": 0.21764642000198364, + "eval_rouge1": 0.1541, + "eval_rouge2": 0.1376, + "eval_rougeL": 0.1529, + "eval_rougeLsum": 0.1537, + "eval_runtime": 5.9162, + "eval_samples_per_second": 18.593, + "eval_steps_per_second": 3.212, + "step": 4416 + }, + { + "epoch": 242.96, + "eval_gen_len": 12.9, + "eval_loss": 0.21736116707324982, + "eval_rouge1": 0.1671, + "eval_rouge2": 0.1495, + "eval_rougeL": 0.1661, + "eval_rougeLsum": 0.1671, + "eval_runtime": 5.9221, + "eval_samples_per_second": 18.574, + "eval_steps_per_second": 3.208, + "step": 4434 + }, + { + "epoch": 244.0, + "eval_gen_len": 10.4818, + "eval_loss": 0.21571263670921326, + "eval_rouge1": 0.1364, + "eval_rouge2": 0.1201, + "eval_rougeL": 0.1354, + "eval_rougeLsum": 0.135, + "eval_runtime": 5.9422, + "eval_samples_per_second": 18.512, + "eval_steps_per_second": 3.197, + "step": 4453 + }, + { + "epoch": 244.99, + "eval_gen_len": 8.9273, + "eval_loss": 0.21506664156913757, + "eval_rouge1": 0.1149, + "eval_rouge2": 0.101, + "eval_rougeL": 0.1133, + "eval_rougeLsum": 0.1136, + "eval_runtime": 5.9071, + "eval_samples_per_second": 18.622, + "eval_steps_per_second": 3.216, + "step": 4471 + }, + { + "epoch": 245.97, + "eval_gen_len": 7.7182, + "eval_loss": 0.21382498741149902, + "eval_rouge1": 0.0989, + "eval_rouge2": 0.0866, + "eval_rougeL": 0.0976, + "eval_rougeLsum": 0.0981, + "eval_runtime": 5.9086, + "eval_samples_per_second": 18.617, + "eval_steps_per_second": 3.216, + "step": 4489 + }, + { + "epoch": 246.58, + "grad_norm": 0.4568144977092743, + "learning_rate": 1.0013333333333335e-05, + "loss": 0.2977, + "step": 4500 + }, + { + "epoch": 246.96, + "eval_gen_len": 7.3727, + "eval_loss": 0.21434056758880615, + "eval_rouge1": 0.0942, + "eval_rouge2": 0.0823, + "eval_rougeL": 0.093, + "eval_rougeLsum": 0.0937, + "eval_runtime": 5.9102, + "eval_samples_per_second": 18.612, + "eval_steps_per_second": 3.215, + "step": 4507 + }, + { + "epoch": 248.0, + "eval_gen_len": 6.8636, + "eval_loss": 0.2125895619392395, + "eval_rouge1": 0.0884, + "eval_rouge2": 0.0777, + "eval_rougeL": 0.0876, + "eval_rougeLsum": 0.0884, + "eval_runtime": 5.9078, + "eval_samples_per_second": 18.619, + "eval_steps_per_second": 3.216, + "step": 4526 + }, + { + "epoch": 248.99, + "eval_gen_len": 6.6909, + "eval_loss": 0.21264444291591644, + "eval_rouge1": 0.0856, + "eval_rouge2": 0.0753, + "eval_rougeL": 0.0851, + "eval_rougeLsum": 0.0853, + "eval_runtime": 5.9128, + "eval_samples_per_second": 18.604, + "eval_steps_per_second": 3.213, + "step": 4544 + }, + { + "epoch": 249.97, + "eval_gen_len": 6.8636, + "eval_loss": 0.2110782116651535, + "eval_rouge1": 0.0871, + "eval_rouge2": 0.0764, + "eval_rougeL": 0.0865, + "eval_rougeLsum": 0.0866, + "eval_runtime": 5.911, + "eval_samples_per_second": 18.609, + "eval_steps_per_second": 3.214, + "step": 4562 + }, + { + "epoch": 250.96, + "eval_gen_len": 6.3455, + "eval_loss": 0.2114747315645218, + "eval_rouge1": 0.0813, + "eval_rouge2": 0.071, + "eval_rougeL": 0.0805, + "eval_rougeLsum": 0.0808, + "eval_runtime": 5.9048, + "eval_samples_per_second": 18.629, + "eval_steps_per_second": 3.218, + "step": 4580 + }, + { + "epoch": 252.0, + "eval_gen_len": 5.1364, + "eval_loss": 0.21088837087154388, + "eval_rouge1": 0.0658, + "eval_rouge2": 0.0587, + "eval_rougeL": 0.0647, + "eval_rougeLsum": 0.0656, + "eval_runtime": 5.9122, + "eval_samples_per_second": 18.606, + "eval_steps_per_second": 3.214, + "step": 4599 + }, + { + "epoch": 252.99, + "eval_gen_len": 4.1, + "eval_loss": 0.21026724576950073, + "eval_rouge1": 0.0525, + "eval_rouge2": 0.0474, + "eval_rougeL": 0.0523, + "eval_rougeLsum": 0.0531, + "eval_runtime": 5.8924, + "eval_samples_per_second": 18.668, + "eval_steps_per_second": 3.225, + "step": 4617 + }, + { + "epoch": 253.97, + "eval_gen_len": 3.4091, + "eval_loss": 0.20923300087451935, + "eval_rouge1": 0.0422, + "eval_rouge2": 0.0371, + "eval_rougeL": 0.0417, + "eval_rougeLsum": 0.0427, + "eval_runtime": 5.9069, + "eval_samples_per_second": 18.622, + "eval_steps_per_second": 3.217, + "step": 4635 + }, + { + "epoch": 254.96, + "eval_gen_len": 3.4091, + "eval_loss": 0.20883551239967346, + "eval_rouge1": 0.0425, + "eval_rouge2": 0.0373, + "eval_rougeL": 0.0421, + "eval_rougeLsum": 0.0432, + "eval_runtime": 5.8899, + "eval_samples_per_second": 18.676, + "eval_steps_per_second": 3.226, + "step": 4653 + }, + { + "epoch": 256.0, + "eval_gen_len": 3.0545, + "eval_loss": 0.20799440145492554, + "eval_rouge1": 0.0383, + "eval_rouge2": 0.0339, + "eval_rougeL": 0.0381, + "eval_rougeLsum": 0.0386, + "eval_runtime": 5.8948, + "eval_samples_per_second": 18.66, + "eval_steps_per_second": 3.223, + "step": 4672 + }, + { + "epoch": 256.99, + "eval_gen_len": 2.0364, + "eval_loss": 0.20746435225009918, + "eval_rouge1": 0.0256, + "eval_rouge2": 0.0228, + "eval_rougeL": 0.0255, + "eval_rougeLsum": 0.0259, + "eval_runtime": 5.8826, + "eval_samples_per_second": 18.699, + "eval_steps_per_second": 3.23, + "step": 4690 + }, + { + "epoch": 257.97, + "eval_gen_len": 2.0364, + "eval_loss": 0.2079101800918579, + "eval_rouge1": 0.026, + "eval_rouge2": 0.0231, + "eval_rougeL": 0.0258, + "eval_rougeLsum": 0.0263, + "eval_runtime": 5.9214, + "eval_samples_per_second": 18.577, + "eval_steps_per_second": 3.209, + "step": 4708 + }, + { + "epoch": 258.96, + "eval_gen_len": 2.0182, + "eval_loss": 0.20639775693416595, + "eval_rouge1": 0.0232, + "eval_rouge2": 0.0203, + "eval_rougeL": 0.0231, + "eval_rougeLsum": 0.0233, + "eval_runtime": 5.9298, + "eval_samples_per_second": 18.55, + "eval_steps_per_second": 3.204, + "step": 4726 + }, + { + "epoch": 260.0, + "eval_gen_len": 2.0182, + "eval_loss": 0.20615024864673615, + "eval_rouge1": 0.0238, + "eval_rouge2": 0.0202, + "eval_rougeL": 0.0237, + "eval_rougeLsum": 0.0238, + "eval_runtime": 5.9324, + "eval_samples_per_second": 18.542, + "eval_steps_per_second": 3.203, + "step": 4745 + }, + { + "epoch": 260.99, + "eval_gen_len": 1.7091, + "eval_loss": 0.20586760342121124, + "eval_rouge1": 0.02, + "eval_rouge2": 0.0175, + "eval_rougeL": 0.0202, + "eval_rougeLsum": 0.0201, + "eval_runtime": 5.8796, + "eval_samples_per_second": 18.709, + "eval_steps_per_second": 3.231, + "step": 4763 + }, + { + "epoch": 261.97, + "eval_gen_len": 0.6909, + "eval_loss": 0.20486456155776978, + "eval_rouge1": 0.0091, + "eval_rouge2": 0.0079, + "eval_rougeL": 0.0091, + "eval_rougeLsum": 0.0091, + "eval_runtime": 5.8759, + "eval_samples_per_second": 18.721, + "eval_steps_per_second": 3.234, + "step": 4781 + }, + { + "epoch": 262.96, + "eval_gen_len": 0.6909, + "eval_loss": 0.20466168224811554, + "eval_rouge1": 0.0091, + "eval_rouge2": 0.0079, + "eval_rougeL": 0.0091, + "eval_rougeLsum": 0.0091, + "eval_runtime": 5.9163, + "eval_samples_per_second": 18.593, + "eval_steps_per_second": 3.211, + "step": 4799 + }, + { + "epoch": 264.0, + "eval_gen_len": 0.6818, + "eval_loss": 0.20416179299354553, + "eval_rouge1": 0.0082, + "eval_rouge2": 0.0071, + "eval_rougeL": 0.0081, + "eval_rougeLsum": 0.0082, + "eval_runtime": 5.8989, + "eval_samples_per_second": 18.647, + "eval_steps_per_second": 3.221, + "step": 4818 + }, + { + "epoch": 264.99, + "eval_gen_len": 0.3364, + "eval_loss": 0.20311486721038818, + "eval_rouge1": 0.0044, + "eval_rouge2": 0.0038, + "eval_rougeL": 0.0044, + "eval_rougeLsum": 0.0046, + "eval_runtime": 5.8639, + "eval_samples_per_second": 18.759, + "eval_steps_per_second": 3.24, + "step": 4836 + }, + { + "epoch": 265.97, + "eval_gen_len": 0.3455, + "eval_loss": 0.20284703373908997, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.874, + "eval_samples_per_second": 18.726, + "eval_steps_per_second": 3.235, + "step": 4854 + }, + { + "epoch": 266.96, + "eval_gen_len": 0.3455, + "eval_loss": 0.20207703113555908, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.871, + "eval_samples_per_second": 18.736, + "eval_steps_per_second": 3.236, + "step": 4872 + }, + { + "epoch": 268.0, + "eval_gen_len": 0.3455, + "eval_loss": 0.20189516246318817, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.8784, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 4891 + }, + { + "epoch": 268.99, + "eval_gen_len": 0.3455, + "eval_loss": 0.20159202814102173, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.8675, + "eval_samples_per_second": 18.747, + "eval_steps_per_second": 3.238, + "step": 4909 + }, + { + "epoch": 269.97, + "eval_gen_len": 0.3455, + "eval_loss": 0.20119339227676392, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.8693, + "eval_samples_per_second": 18.741, + "eval_steps_per_second": 3.237, + "step": 4927 + }, + { + "epoch": 270.96, + "eval_gen_len": 0.3455, + "eval_loss": 0.20063255727291107, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.8634, + "eval_samples_per_second": 18.76, + "eval_steps_per_second": 3.24, + "step": 4945 + }, + { + "epoch": 272.0, + "eval_gen_len": 0.3455, + "eval_loss": 0.20042632520198822, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.9002, + "eval_samples_per_second": 18.644, + "eval_steps_per_second": 3.22, + "step": 4964 + }, + { + "epoch": 272.99, + "eval_gen_len": 0.3455, + "eval_loss": 0.19937776029109955, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.866, + "eval_samples_per_second": 18.752, + "eval_steps_per_second": 3.239, + "step": 4982 + }, + { + "epoch": 273.97, + "grad_norm": 0.47718000411987305, + "learning_rate": 8.902222222222224e-06, + "loss": 0.2666, + "step": 5000 + }, + { + "epoch": 273.97, + "eval_gen_len": 0.5182, + "eval_loss": 0.199092298746109, + "eval_rouge1": 0.008, + "eval_rouge2": 0.0071, + "eval_rougeL": 0.008, + "eval_rougeLsum": 0.008, + "eval_runtime": 5.8818, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 5000 + }, + { + "epoch": 274.96, + "eval_gen_len": 0.5182, + "eval_loss": 0.1990521252155304, + "eval_rouge1": 0.008, + "eval_rouge2": 0.0071, + "eval_rougeL": 0.008, + "eval_rougeLsum": 0.008, + "eval_runtime": 5.8794, + "eval_samples_per_second": 18.709, + "eval_steps_per_second": 3.232, + "step": 5018 + }, + { + "epoch": 276.0, + "eval_gen_len": 0.5182, + "eval_loss": 0.19846394658088684, + "eval_rouge1": 0.008, + "eval_rouge2": 0.0071, + "eval_rougeL": 0.008, + "eval_rougeLsum": 0.008, + "eval_runtime": 5.8793, + "eval_samples_per_second": 18.71, + "eval_steps_per_second": 3.232, + "step": 5037 + }, + { + "epoch": 276.99, + "eval_gen_len": 0.5182, + "eval_loss": 0.1975831538438797, + "eval_rouge1": 0.008, + "eval_rouge2": 0.0071, + "eval_rougeL": 0.008, + "eval_rougeLsum": 0.008, + "eval_runtime": 5.8684, + "eval_samples_per_second": 18.744, + "eval_steps_per_second": 3.238, + "step": 5055 + }, + { + "epoch": 277.97, + "eval_gen_len": 0.1727, + "eval_loss": 0.19685131311416626, + "eval_rouge1": 0.0025, + "eval_rouge2": 0.0024, + "eval_rougeL": 0.0025, + "eval_rougeLsum": 0.0025, + "eval_runtime": 5.8685, + "eval_samples_per_second": 18.744, + "eval_steps_per_second": 3.238, + "step": 5073 + }, + { + "epoch": 278.96, + "eval_gen_len": 0.1727, + "eval_loss": 0.19685351848602295, + "eval_rouge1": 0.0025, + "eval_rouge2": 0.0024, + "eval_rougeL": 0.0025, + "eval_rougeLsum": 0.0025, + "eval_runtime": 5.87, + "eval_samples_per_second": 18.739, + "eval_steps_per_second": 3.237, + "step": 5091 + }, + { + "epoch": 280.0, + "eval_gen_len": 0.3455, + "eval_loss": 0.19706358015537262, + "eval_rouge1": 0.0057, + "eval_rouge2": 0.0051, + "eval_rougeL": 0.0057, + "eval_rougeLsum": 0.0057, + "eval_runtime": 5.8807, + "eval_samples_per_second": 18.705, + "eval_steps_per_second": 3.231, + "step": 5110 + }, + { + "epoch": 280.99, + "eval_gen_len": 0.1727, + "eval_loss": 0.19580155611038208, + "eval_rouge1": 0.0025, + "eval_rouge2": 0.0024, + "eval_rougeL": 0.0025, + "eval_rougeLsum": 0.0025, + "eval_runtime": 5.8696, + "eval_samples_per_second": 18.741, + "eval_steps_per_second": 3.237, + "step": 5128 + }, + { + "epoch": 281.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1954393833875656, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8668, + "eval_samples_per_second": 18.75, + "eval_steps_per_second": 3.239, + "step": 5146 + }, + { + "epoch": 282.96, + "eval_gen_len": 0.0, + "eval_loss": 0.19552475214004517, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8798, + "eval_samples_per_second": 18.708, + "eval_steps_per_second": 3.231, + "step": 5164 + }, + { + "epoch": 284.0, + "eval_gen_len": 0.1727, + "eval_loss": 0.1950557678937912, + "eval_rouge1": 0.0025, + "eval_rouge2": 0.0024, + "eval_rougeL": 0.0025, + "eval_rougeLsum": 0.0025, + "eval_runtime": 5.8764, + "eval_samples_per_second": 18.719, + "eval_steps_per_second": 3.233, + "step": 5183 + }, + { + "epoch": 284.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1939947009086609, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8768, + "eval_samples_per_second": 18.718, + "eval_steps_per_second": 3.233, + "step": 5201 + }, + { + "epoch": 285.97, + "eval_gen_len": 0.0, + "eval_loss": 0.19393964111804962, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8764, + "eval_samples_per_second": 18.719, + "eval_steps_per_second": 3.233, + "step": 5219 + }, + { + "epoch": 286.96, + "eval_gen_len": 0.0, + "eval_loss": 0.19383706152439117, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8701, + "eval_samples_per_second": 18.739, + "eval_steps_per_second": 3.237, + "step": 5237 + }, + { + "epoch": 288.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1930641084909439, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8828, + "eval_samples_per_second": 18.699, + "eval_steps_per_second": 3.23, + "step": 5256 + }, + { + "epoch": 288.99, + "eval_gen_len": 0.0, + "eval_loss": 0.19216330349445343, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8721, + "eval_samples_per_second": 18.733, + "eval_steps_per_second": 3.236, + "step": 5274 + }, + { + "epoch": 289.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1919858455657959, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.886, + "eval_samples_per_second": 18.688, + "eval_steps_per_second": 3.228, + "step": 5292 + }, + { + "epoch": 290.96, + "eval_gen_len": 0.0, + "eval_loss": 0.19181759655475616, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8804, + "eval_samples_per_second": 18.706, + "eval_steps_per_second": 3.231, + "step": 5310 + }, + { + "epoch": 292.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1913154274225235, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8887, + "eval_samples_per_second": 18.68, + "eval_steps_per_second": 3.227, + "step": 5329 + }, + { + "epoch": 292.99, + "eval_gen_len": 0.0, + "eval_loss": 0.19096632301807404, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8809, + "eval_samples_per_second": 18.704, + "eval_steps_per_second": 3.231, + "step": 5347 + }, + { + "epoch": 293.97, + "eval_gen_len": 0.0, + "eval_loss": 0.19034302234649658, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.886, + "eval_samples_per_second": 18.688, + "eval_steps_per_second": 3.228, + "step": 5365 + }, + { + "epoch": 294.96, + "eval_gen_len": 0.0, + "eval_loss": 0.18984819948673248, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8905, + "eval_samples_per_second": 18.674, + "eval_steps_per_second": 3.226, + "step": 5383 + }, + { + "epoch": 296.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1901622861623764, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8956, + "eval_samples_per_second": 18.658, + "eval_steps_per_second": 3.223, + "step": 5402 + }, + { + "epoch": 296.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18923012912273407, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8738, + "eval_samples_per_second": 18.727, + "eval_steps_per_second": 3.235, + "step": 5420 + }, + { + "epoch": 297.97, + "eval_gen_len": 0.0, + "eval_loss": 0.18847127258777618, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8804, + "eval_samples_per_second": 18.706, + "eval_steps_per_second": 3.231, + "step": 5438 + }, + { + "epoch": 298.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1884116530418396, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8771, + "eval_samples_per_second": 18.717, + "eval_steps_per_second": 3.233, + "step": 5456 + }, + { + "epoch": 300.0, + "eval_gen_len": 0.0, + "eval_loss": 0.18776828050613403, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8882, + "eval_samples_per_second": 18.681, + "eval_steps_per_second": 3.227, + "step": 5475 + }, + { + "epoch": 300.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18751147389411926, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8894, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 5493 + }, + { + "epoch": 301.37, + "grad_norm": 0.6001901030540466, + "learning_rate": 7.791111111111111e-06, + "loss": 0.2463, + "step": 5500 + }, + { + "epoch": 301.97, + "eval_gen_len": 0.0, + "eval_loss": 0.18754757940769196, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8862, + "eval_samples_per_second": 18.688, + "eval_steps_per_second": 3.228, + "step": 5511 + }, + { + "epoch": 302.96, + "eval_gen_len": 0.0, + "eval_loss": 0.18714185059070587, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8804, + "eval_samples_per_second": 18.706, + "eval_steps_per_second": 3.231, + "step": 5529 + }, + { + "epoch": 304.0, + "eval_gen_len": 0.0, + "eval_loss": 0.18668492138385773, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8924, + "eval_samples_per_second": 18.668, + "eval_steps_per_second": 3.225, + "step": 5548 + }, + { + "epoch": 304.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18612505495548248, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8787, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 5566 + }, + { + "epoch": 305.97, + "eval_gen_len": 0.0, + "eval_loss": 0.18622124195098877, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8963, + "eval_samples_per_second": 18.656, + "eval_steps_per_second": 3.222, + "step": 5584 + }, + { + "epoch": 306.96, + "eval_gen_len": 0.0, + "eval_loss": 0.18581855297088623, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8851, + "eval_samples_per_second": 18.691, + "eval_steps_per_second": 3.228, + "step": 5602 + }, + { + "epoch": 308.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1851491779088974, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8834, + "eval_samples_per_second": 18.697, + "eval_steps_per_second": 3.229, + "step": 5621 + }, + { + "epoch": 308.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18541742861270905, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8802, + "eval_samples_per_second": 18.707, + "eval_steps_per_second": 3.231, + "step": 5639 + }, + { + "epoch": 309.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1846253126859665, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.88, + "eval_samples_per_second": 18.708, + "eval_steps_per_second": 3.231, + "step": 5657 + }, + { + "epoch": 310.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1842205971479416, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8908, + "eval_samples_per_second": 18.673, + "eval_steps_per_second": 3.225, + "step": 5675 + }, + { + "epoch": 312.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1838139295578003, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8826, + "eval_samples_per_second": 18.699, + "eval_steps_per_second": 3.23, + "step": 5694 + }, + { + "epoch": 312.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18346160650253296, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8686, + "eval_samples_per_second": 18.744, + "eval_steps_per_second": 3.238, + "step": 5712 + }, + { + "epoch": 313.97, + "eval_gen_len": 0.0, + "eval_loss": 0.18300552666187286, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8811, + "eval_samples_per_second": 18.704, + "eval_steps_per_second": 3.231, + "step": 5730 + }, + { + "epoch": 314.96, + "eval_gen_len": 0.0, + "eval_loss": 0.18304497003555298, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8736, + "eval_samples_per_second": 18.728, + "eval_steps_per_second": 3.235, + "step": 5748 + }, + { + "epoch": 316.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1824423372745514, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9117, + "eval_samples_per_second": 18.607, + "eval_steps_per_second": 3.214, + "step": 5767 + }, + { + "epoch": 316.99, + "eval_gen_len": 0.0, + "eval_loss": 0.18214993178844452, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8772, + "eval_samples_per_second": 18.716, + "eval_steps_per_second": 3.233, + "step": 5785 + }, + { + "epoch": 317.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1819440871477127, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8707, + "eval_samples_per_second": 18.737, + "eval_steps_per_second": 3.236, + "step": 5803 + }, + { + "epoch": 318.96, + "eval_gen_len": 0.0, + "eval_loss": 0.18124094605445862, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9162, + "eval_samples_per_second": 18.593, + "eval_steps_per_second": 3.212, + "step": 5821 + }, + { + "epoch": 320.0, + "eval_gen_len": 0.0, + "eval_loss": 0.18139633536338806, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.874, + "eval_samples_per_second": 18.726, + "eval_steps_per_second": 3.235, + "step": 5840 + }, + { + "epoch": 320.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1810206174850464, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8919, + "eval_samples_per_second": 18.67, + "eval_steps_per_second": 3.225, + "step": 5858 + }, + { + "epoch": 321.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1808764487504959, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9173, + "eval_samples_per_second": 18.589, + "eval_steps_per_second": 3.211, + "step": 5876 + }, + { + "epoch": 322.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1801535189151764, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9176, + "eval_samples_per_second": 18.589, + "eval_steps_per_second": 3.211, + "step": 5894 + }, + { + "epoch": 324.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1799170821905136, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9289, + "eval_samples_per_second": 18.553, + "eval_steps_per_second": 3.205, + "step": 5913 + }, + { + "epoch": 324.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1797485649585724, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9221, + "eval_samples_per_second": 18.575, + "eval_steps_per_second": 3.208, + "step": 5931 + }, + { + "epoch": 325.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17966806888580322, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9096, + "eval_samples_per_second": 18.614, + "eval_steps_per_second": 3.215, + "step": 5949 + }, + { + "epoch": 326.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17945848405361176, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8715, + "eval_samples_per_second": 18.735, + "eval_steps_per_second": 3.236, + "step": 5967 + }, + { + "epoch": 328.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17859123647212982, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8842, + "eval_samples_per_second": 18.694, + "eval_steps_per_second": 3.229, + "step": 5986 + }, + { + "epoch": 328.77, + "grad_norm": 0.45411407947540283, + "learning_rate": 6.680000000000001e-06, + "loss": 0.2311, + "step": 6000 + }, + { + "epoch": 328.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17845258116722107, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8879, + "eval_samples_per_second": 18.682, + "eval_steps_per_second": 3.227, + "step": 6004 + }, + { + "epoch": 329.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17820703983306885, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8859, + "eval_samples_per_second": 18.689, + "eval_steps_per_second": 3.228, + "step": 6022 + }, + { + "epoch": 330.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17832966148853302, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8753, + "eval_samples_per_second": 18.723, + "eval_steps_per_second": 3.234, + "step": 6040 + }, + { + "epoch": 332.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17780448496341705, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8819, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 6059 + }, + { + "epoch": 332.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17747904360294342, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8701, + "eval_samples_per_second": 18.739, + "eval_steps_per_second": 3.237, + "step": 6077 + }, + { + "epoch": 333.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17767922580242157, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8742, + "eval_samples_per_second": 18.726, + "eval_steps_per_second": 3.234, + "step": 6095 + }, + { + "epoch": 334.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1771049201488495, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9091, + "eval_samples_per_second": 18.615, + "eval_steps_per_second": 3.215, + "step": 6113 + }, + { + "epoch": 336.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1770164966583252, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8812, + "eval_samples_per_second": 18.704, + "eval_steps_per_second": 3.231, + "step": 6132 + }, + { + "epoch": 336.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17682689428329468, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8773, + "eval_samples_per_second": 18.716, + "eval_steps_per_second": 3.233, + "step": 6150 + }, + { + "epoch": 337.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1767437607049942, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8745, + "eval_samples_per_second": 18.725, + "eval_steps_per_second": 3.234, + "step": 6168 + }, + { + "epoch": 338.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17660827934741974, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.876, + "eval_samples_per_second": 18.72, + "eval_steps_per_second": 3.233, + "step": 6186 + }, + { + "epoch": 340.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1762937307357788, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8747, + "eval_samples_per_second": 18.724, + "eval_steps_per_second": 3.234, + "step": 6205 + }, + { + "epoch": 340.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17532125115394592, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.877, + "eval_samples_per_second": 18.717, + "eval_steps_per_second": 3.233, + "step": 6223 + }, + { + "epoch": 341.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17553770542144775, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8818, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 6241 + }, + { + "epoch": 342.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17563851177692413, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8839, + "eval_samples_per_second": 18.695, + "eval_steps_per_second": 3.229, + "step": 6259 + }, + { + "epoch": 344.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17437517642974854, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8841, + "eval_samples_per_second": 18.694, + "eval_steps_per_second": 3.229, + "step": 6278 + }, + { + "epoch": 344.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17455054819583893, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8746, + "eval_samples_per_second": 18.725, + "eval_steps_per_second": 3.234, + "step": 6296 + }, + { + "epoch": 345.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17481422424316406, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8787, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 6314 + }, + { + "epoch": 346.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1744970828294754, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8781, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 6332 + }, + { + "epoch": 348.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17412132024765015, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8833, + "eval_samples_per_second": 18.697, + "eval_steps_per_second": 3.229, + "step": 6351 + }, + { + "epoch": 348.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1738380789756775, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9075, + "eval_samples_per_second": 18.621, + "eval_steps_per_second": 3.216, + "step": 6369 + }, + { + "epoch": 349.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17379747331142426, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.876, + "eval_samples_per_second": 18.72, + "eval_steps_per_second": 3.234, + "step": 6387 + }, + { + "epoch": 350.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17336434125900269, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8785, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 6405 + }, + { + "epoch": 352.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17307358980178833, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9197, + "eval_samples_per_second": 18.582, + "eval_steps_per_second": 3.21, + "step": 6424 + }, + { + "epoch": 352.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17290958762168884, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8928, + "eval_samples_per_second": 18.667, + "eval_steps_per_second": 3.224, + "step": 6442 + }, + { + "epoch": 353.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1726769059896469, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8785, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 6460 + }, + { + "epoch": 354.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17268246412277222, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8721, + "eval_samples_per_second": 18.733, + "eval_steps_per_second": 3.236, + "step": 6478 + }, + { + "epoch": 356.0, + "eval_gen_len": 0.0, + "eval_loss": 0.17257879674434662, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8803, + "eval_samples_per_second": 18.706, + "eval_steps_per_second": 3.231, + "step": 6497 + }, + { + "epoch": 356.16, + "grad_norm": 0.3692683279514313, + "learning_rate": 5.56888888888889e-06, + "loss": 0.2192, + "step": 6500 + }, + { + "epoch": 356.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17185170948505402, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8845, + "eval_samples_per_second": 18.693, + "eval_steps_per_second": 3.229, + "step": 6515 + }, + { + "epoch": 357.97, + "eval_gen_len": 0.0, + "eval_loss": 0.17126674950122833, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8816, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 6533 + }, + { + "epoch": 358.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17136740684509277, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8802, + "eval_samples_per_second": 18.707, + "eval_steps_per_second": 3.231, + "step": 6551 + }, + { + "epoch": 360.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1711340844631195, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8839, + "eval_samples_per_second": 18.695, + "eval_steps_per_second": 3.229, + "step": 6570 + }, + { + "epoch": 360.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1712319403886795, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8782, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 6588 + }, + { + "epoch": 361.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1710105836391449, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8829, + "eval_samples_per_second": 18.698, + "eval_steps_per_second": 3.23, + "step": 6606 + }, + { + "epoch": 362.96, + "eval_gen_len": 0.0, + "eval_loss": 0.17070402204990387, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9043, + "eval_samples_per_second": 18.631, + "eval_steps_per_second": 3.218, + "step": 6624 + }, + { + "epoch": 364.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1703125238418579, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9174, + "eval_samples_per_second": 18.589, + "eval_steps_per_second": 3.211, + "step": 6643 + }, + { + "epoch": 364.99, + "eval_gen_len": 0.0, + "eval_loss": 0.17009203135967255, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9165, + "eval_samples_per_second": 18.592, + "eval_steps_per_second": 3.211, + "step": 6661 + }, + { + "epoch": 365.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1701081246137619, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8801, + "eval_samples_per_second": 18.707, + "eval_steps_per_second": 3.231, + "step": 6679 + }, + { + "epoch": 366.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16997285187244415, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8816, + "eval_samples_per_second": 18.703, + "eval_steps_per_second": 3.23, + "step": 6697 + }, + { + "epoch": 368.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16970933973789215, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8906, + "eval_samples_per_second": 18.674, + "eval_steps_per_second": 3.225, + "step": 6716 + }, + { + "epoch": 368.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1696108877658844, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8896, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 6734 + }, + { + "epoch": 369.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1694546341896057, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9145, + "eval_samples_per_second": 18.598, + "eval_steps_per_second": 3.212, + "step": 6752 + }, + { + "epoch": 370.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16927814483642578, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9179, + "eval_samples_per_second": 18.588, + "eval_steps_per_second": 3.211, + "step": 6770 + }, + { + "epoch": 372.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16911160945892334, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8804, + "eval_samples_per_second": 18.706, + "eval_steps_per_second": 3.231, + "step": 6789 + }, + { + "epoch": 372.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16869549453258514, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8847, + "eval_samples_per_second": 18.693, + "eval_steps_per_second": 3.229, + "step": 6807 + }, + { + "epoch": 373.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16840766370296478, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9184, + "eval_samples_per_second": 18.586, + "eval_steps_per_second": 3.21, + "step": 6825 + }, + { + "epoch": 374.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16836071014404297, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9124, + "eval_samples_per_second": 18.605, + "eval_steps_per_second": 3.214, + "step": 6843 + }, + { + "epoch": 376.0, + "eval_gen_len": 0.0, + "eval_loss": 0.168260395526886, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8823, + "eval_samples_per_second": 18.7, + "eval_steps_per_second": 3.23, + "step": 6862 + }, + { + "epoch": 376.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16808076202869415, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8791, + "eval_samples_per_second": 18.71, + "eval_steps_per_second": 3.232, + "step": 6880 + }, + { + "epoch": 377.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16791433095932007, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8871, + "eval_samples_per_second": 18.685, + "eval_steps_per_second": 3.227, + "step": 6898 + }, + { + "epoch": 378.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16784635186195374, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8948, + "eval_samples_per_second": 18.661, + "eval_steps_per_second": 3.223, + "step": 6916 + }, + { + "epoch": 380.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16753236949443817, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8837, + "eval_samples_per_second": 18.696, + "eval_steps_per_second": 3.229, + "step": 6935 + }, + { + "epoch": 380.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16738325357437134, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8709, + "eval_samples_per_second": 18.736, + "eval_steps_per_second": 3.236, + "step": 6953 + }, + { + "epoch": 381.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16733896732330322, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8784, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 6971 + }, + { + "epoch": 382.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16720926761627197, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8818, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 6989 + }, + { + "epoch": 383.56, + "grad_norm": 0.32142043113708496, + "learning_rate": 4.457777777777778e-06, + "loss": 0.211, + "step": 7000 + }, + { + "epoch": 384.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16671238839626312, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8769, + "eval_samples_per_second": 18.717, + "eval_steps_per_second": 3.233, + "step": 7008 + }, + { + "epoch": 384.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16662339866161346, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8678, + "eval_samples_per_second": 18.746, + "eval_steps_per_second": 3.238, + "step": 7026 + }, + { + "epoch": 385.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16629952192306519, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8786, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 7044 + }, + { + "epoch": 386.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16644792258739471, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8767, + "eval_samples_per_second": 18.718, + "eval_steps_per_second": 3.233, + "step": 7062 + }, + { + "epoch": 388.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1665712594985962, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8895, + "eval_samples_per_second": 18.677, + "eval_steps_per_second": 3.226, + "step": 7081 + }, + { + "epoch": 388.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16607053577899933, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8751, + "eval_samples_per_second": 18.723, + "eval_steps_per_second": 3.234, + "step": 7099 + }, + { + "epoch": 389.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16597412526607513, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8717, + "eval_samples_per_second": 18.734, + "eval_steps_per_second": 3.236, + "step": 7117 + }, + { + "epoch": 390.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16590653359889984, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8807, + "eval_samples_per_second": 18.705, + "eval_steps_per_second": 3.231, + "step": 7135 + }, + { + "epoch": 392.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16565540432929993, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8901, + "eval_samples_per_second": 18.675, + "eval_steps_per_second": 3.226, + "step": 7154 + }, + { + "epoch": 392.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1656540036201477, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8795, + "eval_samples_per_second": 18.709, + "eval_steps_per_second": 3.232, + "step": 7172 + }, + { + "epoch": 393.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16561686992645264, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.891, + "eval_samples_per_second": 18.672, + "eval_steps_per_second": 3.225, + "step": 7190 + }, + { + "epoch": 394.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16566209495067596, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8782, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 7208 + }, + { + "epoch": 396.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16528266668319702, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9003, + "eval_samples_per_second": 18.643, + "eval_steps_per_second": 3.22, + "step": 7227 + }, + { + "epoch": 396.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16509102284908295, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8787, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 7245 + }, + { + "epoch": 397.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16473665833473206, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8817, + "eval_samples_per_second": 18.702, + "eval_steps_per_second": 3.23, + "step": 7263 + }, + { + "epoch": 398.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16476485133171082, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8783, + "eval_samples_per_second": 18.713, + "eval_steps_per_second": 3.232, + "step": 7281 + }, + { + "epoch": 400.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1648998111486435, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8847, + "eval_samples_per_second": 18.692, + "eval_steps_per_second": 3.229, + "step": 7300 + }, + { + "epoch": 400.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16450464725494385, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8833, + "eval_samples_per_second": 18.697, + "eval_steps_per_second": 3.23, + "step": 7318 + }, + { + "epoch": 401.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16408170759677887, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9273, + "eval_samples_per_second": 18.558, + "eval_steps_per_second": 3.206, + "step": 7336 + }, + { + "epoch": 402.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16408471763134003, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9148, + "eval_samples_per_second": 18.597, + "eval_steps_per_second": 3.212, + "step": 7354 + }, + { + "epoch": 404.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1640195995569229, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.924, + "eval_samples_per_second": 18.569, + "eval_steps_per_second": 3.207, + "step": 7373 + }, + { + "epoch": 404.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16400323808193207, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9183, + "eval_samples_per_second": 18.586, + "eval_steps_per_second": 3.21, + "step": 7391 + }, + { + "epoch": 405.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16385148465633392, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9116, + "eval_samples_per_second": 18.608, + "eval_steps_per_second": 3.214, + "step": 7409 + }, + { + "epoch": 406.96, + "eval_gen_len": 0.0, + "eval_loss": 0.163739874958992, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9025, + "eval_samples_per_second": 18.636, + "eval_steps_per_second": 3.219, + "step": 7427 + }, + { + "epoch": 408.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1632901281118393, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8717, + "eval_samples_per_second": 18.734, + "eval_steps_per_second": 3.236, + "step": 7446 + }, + { + "epoch": 408.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1632470041513443, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8869, + "eval_samples_per_second": 18.686, + "eval_steps_per_second": 3.228, + "step": 7464 + }, + { + "epoch": 409.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16338156163692474, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.903, + "eval_samples_per_second": 18.635, + "eval_steps_per_second": 3.219, + "step": 7482 + }, + { + "epoch": 410.96, + "grad_norm": 0.4017387628555298, + "learning_rate": 3.346666666666667e-06, + "loss": 0.2061, + "step": 7500 + }, + { + "epoch": 410.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16326506435871124, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8869, + "eval_samples_per_second": 18.686, + "eval_steps_per_second": 3.228, + "step": 7500 + }, + { + "epoch": 412.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1629171222448349, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8877, + "eval_samples_per_second": 18.683, + "eval_steps_per_second": 3.227, + "step": 7519 + }, + { + "epoch": 412.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1628817468881607, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9117, + "eval_samples_per_second": 18.607, + "eval_steps_per_second": 3.214, + "step": 7537 + }, + { + "epoch": 413.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16285060346126556, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9244, + "eval_samples_per_second": 18.567, + "eval_steps_per_second": 3.207, + "step": 7555 + }, + { + "epoch": 414.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16274811327457428, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8821, + "eval_samples_per_second": 18.701, + "eval_steps_per_second": 3.23, + "step": 7573 + }, + { + "epoch": 416.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16286291182041168, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.918, + "eval_samples_per_second": 18.587, + "eval_steps_per_second": 3.211, + "step": 7592 + }, + { + "epoch": 416.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16281896829605103, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8847, + "eval_samples_per_second": 18.693, + "eval_steps_per_second": 3.229, + "step": 7610 + }, + { + "epoch": 417.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16268208622932434, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9077, + "eval_samples_per_second": 18.62, + "eval_steps_per_second": 3.216, + "step": 7628 + }, + { + "epoch": 418.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16232354938983917, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9153, + "eval_samples_per_second": 18.596, + "eval_steps_per_second": 3.212, + "step": 7646 + }, + { + "epoch": 420.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16238705813884735, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9249, + "eval_samples_per_second": 18.566, + "eval_steps_per_second": 3.207, + "step": 7665 + }, + { + "epoch": 420.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16214394569396973, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8853, + "eval_samples_per_second": 18.691, + "eval_steps_per_second": 3.228, + "step": 7683 + }, + { + "epoch": 421.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16213367879390717, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.869, + "eval_samples_per_second": 18.743, + "eval_steps_per_second": 3.237, + "step": 7701 + }, + { + "epoch": 422.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1621711403131485, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.869, + "eval_samples_per_second": 18.743, + "eval_steps_per_second": 3.237, + "step": 7719 + }, + { + "epoch": 424.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16199961304664612, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8763, + "eval_samples_per_second": 18.719, + "eval_steps_per_second": 3.233, + "step": 7738 + }, + { + "epoch": 424.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1616300344467163, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9176, + "eval_samples_per_second": 18.589, + "eval_steps_per_second": 3.211, + "step": 7756 + }, + { + "epoch": 425.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16134707629680634, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9055, + "eval_samples_per_second": 18.627, + "eval_steps_per_second": 3.217, + "step": 7774 + }, + { + "epoch": 426.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1613784283399582, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8741, + "eval_samples_per_second": 18.726, + "eval_steps_per_second": 3.235, + "step": 7792 + }, + { + "epoch": 428.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1613391935825348, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9229, + "eval_samples_per_second": 18.572, + "eval_steps_per_second": 3.208, + "step": 7811 + }, + { + "epoch": 428.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1613980233669281, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8737, + "eval_samples_per_second": 18.728, + "eval_steps_per_second": 3.235, + "step": 7829 + }, + { + "epoch": 429.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16145525872707367, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8935, + "eval_samples_per_second": 18.664, + "eval_steps_per_second": 3.224, + "step": 7847 + }, + { + "epoch": 430.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16126497089862823, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8806, + "eval_samples_per_second": 18.705, + "eval_steps_per_second": 3.231, + "step": 7865 + }, + { + "epoch": 432.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16113270819187164, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8882, + "eval_samples_per_second": 18.681, + "eval_steps_per_second": 3.227, + "step": 7884 + }, + { + "epoch": 432.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16119304299354553, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8803, + "eval_samples_per_second": 18.707, + "eval_steps_per_second": 3.231, + "step": 7902 + }, + { + "epoch": 433.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16113385558128357, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8923, + "eval_samples_per_second": 18.668, + "eval_steps_per_second": 3.225, + "step": 7920 + }, + { + "epoch": 434.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1608574539422989, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8855, + "eval_samples_per_second": 18.69, + "eval_steps_per_second": 3.228, + "step": 7938 + }, + { + "epoch": 436.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16086578369140625, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8752, + "eval_samples_per_second": 18.723, + "eval_steps_per_second": 3.234, + "step": 7957 + }, + { + "epoch": 436.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1609336882829666, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8764, + "eval_samples_per_second": 18.719, + "eval_steps_per_second": 3.233, + "step": 7975 + }, + { + "epoch": 437.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16086000204086304, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8862, + "eval_samples_per_second": 18.688, + "eval_steps_per_second": 3.228, + "step": 7993 + }, + { + "epoch": 438.36, + "grad_norm": 0.3427538573741913, + "learning_rate": 2.235555555555556e-06, + "loss": 0.2001, + "step": 8000 + }, + { + "epoch": 438.96, + "eval_gen_len": 0.0, + "eval_loss": 0.16061532497406006, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8915, + "eval_samples_per_second": 18.671, + "eval_steps_per_second": 3.225, + "step": 8011 + }, + { + "epoch": 440.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1605236977338791, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8957, + "eval_samples_per_second": 18.658, + "eval_steps_per_second": 3.223, + "step": 8030 + }, + { + "epoch": 440.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16052110493183136, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9116, + "eval_samples_per_second": 18.608, + "eval_steps_per_second": 3.214, + "step": 8048 + }, + { + "epoch": 441.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1603657305240631, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8965, + "eval_samples_per_second": 18.655, + "eval_steps_per_second": 3.222, + "step": 8066 + }, + { + "epoch": 442.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1602524369955063, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.881, + "eval_samples_per_second": 18.704, + "eval_steps_per_second": 3.231, + "step": 8084 + }, + { + "epoch": 444.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16014447808265686, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8914, + "eval_samples_per_second": 18.671, + "eval_steps_per_second": 3.225, + "step": 8103 + }, + { + "epoch": 444.99, + "eval_gen_len": 0.0, + "eval_loss": 0.1601133793592453, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8836, + "eval_samples_per_second": 18.696, + "eval_steps_per_second": 3.229, + "step": 8121 + }, + { + "epoch": 445.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16021089255809784, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8829, + "eval_samples_per_second": 18.698, + "eval_steps_per_second": 3.23, + "step": 8139 + }, + { + "epoch": 446.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1603224277496338, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8753, + "eval_samples_per_second": 18.723, + "eval_steps_per_second": 3.234, + "step": 8157 + }, + { + "epoch": 448.0, + "eval_gen_len": 0.0, + "eval_loss": 0.16021238267421722, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9018, + "eval_samples_per_second": 18.638, + "eval_steps_per_second": 3.219, + "step": 8176 + }, + { + "epoch": 448.99, + "eval_gen_len": 0.0, + "eval_loss": 0.16013209521770477, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.9114, + "eval_samples_per_second": 18.608, + "eval_steps_per_second": 3.214, + "step": 8194 + }, + { + "epoch": 449.97, + "eval_gen_len": 0.0, + "eval_loss": 0.16003312170505524, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8898, + "eval_samples_per_second": 18.676, + "eval_steps_per_second": 3.226, + "step": 8212 + }, + { + "epoch": 450.96, + "eval_gen_len": 0.0, + "eval_loss": 0.15982508659362793, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8842, + "eval_samples_per_second": 18.694, + "eval_steps_per_second": 3.229, + "step": 8230 + }, + { + "epoch": 452.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1597803682088852, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8948, + "eval_samples_per_second": 18.661, + "eval_steps_per_second": 3.223, + "step": 8249 + }, + { + "epoch": 452.99, + "eval_gen_len": 0.0, + "eval_loss": 0.15985353291034698, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.875, + "eval_samples_per_second": 18.723, + "eval_steps_per_second": 3.234, + "step": 8267 + }, + { + "epoch": 453.97, + "eval_gen_len": 0.0, + "eval_loss": 0.1597578376531601, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8791, + "eval_samples_per_second": 18.71, + "eval_steps_per_second": 3.232, + "step": 8285 + }, + { + "epoch": 454.96, + "eval_gen_len": 0.0, + "eval_loss": 0.15968413650989532, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8847, + "eval_samples_per_second": 18.693, + "eval_steps_per_second": 3.229, + "step": 8303 + }, + { + "epoch": 456.0, + "eval_gen_len": 0.0, + "eval_loss": 0.15952864289283752, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8965, + "eval_samples_per_second": 18.655, + "eval_steps_per_second": 3.222, + "step": 8322 + }, + { + "epoch": 456.99, + "eval_gen_len": 0.0, + "eval_loss": 0.15957479178905487, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8822, + "eval_samples_per_second": 18.701, + "eval_steps_per_second": 3.23, + "step": 8340 + }, + { + "epoch": 457.97, + "eval_gen_len": 0.0, + "eval_loss": 0.15952877700328827, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8842, + "eval_samples_per_second": 18.694, + "eval_steps_per_second": 3.229, + "step": 8358 + }, + { + "epoch": 458.96, + "eval_gen_len": 0.0, + "eval_loss": 0.1594904661178589, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8814, + "eval_samples_per_second": 18.703, + "eval_steps_per_second": 3.231, + "step": 8376 + }, + { + "epoch": 460.0, + "eval_gen_len": 0.0, + "eval_loss": 0.15939652919769287, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8837, + "eval_samples_per_second": 18.696, + "eval_steps_per_second": 3.229, + "step": 8395 + }, + { + "epoch": 460.99, + "eval_gen_len": 0.0, + "eval_loss": 0.15931101143360138, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8785, + "eval_samples_per_second": 18.712, + "eval_steps_per_second": 3.232, + "step": 8413 + }, + { + "epoch": 461.97, + "eval_gen_len": 0.0, + "eval_loss": 0.15929743647575378, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8954, + "eval_samples_per_second": 18.659, + "eval_steps_per_second": 3.223, + "step": 8431 + }, + { + "epoch": 462.96, + "eval_gen_len": 0.0, + "eval_loss": 0.15925699472427368, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8822, + "eval_samples_per_second": 18.7, + "eval_steps_per_second": 3.23, + "step": 8449 + }, + { + "epoch": 464.0, + "eval_gen_len": 0.0, + "eval_loss": 0.1592811495065689, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8854, + "eval_samples_per_second": 18.69, + "eval_steps_per_second": 3.228, + "step": 8468 + }, + { + "epoch": 464.99, + "eval_gen_len": 0.0, + "eval_loss": 0.15929646790027618, + "eval_rouge1": 0.0, + "eval_rouge2": 0.0, + "eval_rougeL": 0.0, + "eval_rougeLsum": 0.0, + "eval_runtime": 5.8852, + "eval_samples_per_second": 18.691, + "eval_steps_per_second": 3.228, + "step": 8486 + }, + { + "epoch": 465.75, + "grad_norm": 0.3651351034641266, + "learning_rate": 1.1266666666666667e-06, + "loss": 0.1976, + "step": 8500 + } + ], + "logging_steps": 500, + "max_steps": 9000, + "num_input_tokens_seen": 0, + "num_train_epochs": 500, + "save_steps": 500, + "total_flos": 2.4787408345104384e+17, + "train_batch_size": 6, + "trial_name": null, + "trial_params": null +}