{ "best_metric": 54.4377, "best_model_checkpoint": "/uoa/home/s02sd1/Desktop/Project data/pytorch_project/facebook_28.03/checkpoint-53000", "epoch": 78.98448519040903, "global_step": 56000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.71, "learning_rate": 2.978843441466855e-06, "loss": 0.6224, "step": 500 }, { "epoch": 1.41, "learning_rate": 2.9576868829337095e-06, "loss": 0.5049, "step": 1000 }, { "epoch": 1.41, "eval_gen_len": 167.2707, "eval_loss": 0.75015789270401, "eval_rouge1": 61.2286, "eval_rouge2": 43.2506, "eval_rougeL": 42.2051, "eval_rougeLsum": 46.3795, "eval_runtime": 5804.0101, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 1000 }, { "epoch": 2.12, "learning_rate": 2.9365303244005645e-06, "loss": 0.4992, "step": 1500 }, { "epoch": 2.82, "learning_rate": 2.915373765867419e-06, "loss": 0.4523, "step": 2000 }, { "epoch": 2.82, "eval_gen_len": 169.4872, "eval_loss": 0.7313751578330994, "eval_rouge1": 63.5466, "eval_rouge2": 45.7831, "eval_rougeL": 44.4885, "eval_rougeLsum": 48.6313, "eval_runtime": 4918.4156, "eval_samples_per_second": 0.071, "eval_steps_per_second": 0.004, "step": 2000 }, { "epoch": 3.53, "learning_rate": 2.894217207334274e-06, "loss": 0.4422, "step": 2500 }, { "epoch": 4.23, "learning_rate": 2.8730606488011285e-06, "loss": 0.4036, "step": 3000 }, { "epoch": 4.23, "eval_gen_len": 160.4074, "eval_loss": 0.7353909611701965, "eval_rouge1": 64.7439, "eval_rouge2": 47.1723, "eval_rougeL": 45.9479, "eval_rougeLsum": 50.2341, "eval_runtime": 4241.5946, "eval_samples_per_second": 0.083, "eval_steps_per_second": 0.004, "step": 3000 }, { "epoch": 4.94, "learning_rate": 2.851904090267983e-06, "loss": 0.4028, "step": 3500 }, { "epoch": 5.64, "learning_rate": 2.830747531734838e-06, "loss": 0.3722, "step": 4000 }, { "epoch": 5.64, "eval_gen_len": 162.0085, "eval_loss": 0.72761070728302, "eval_rouge1": 65.6958, "eval_rouge2": 48.5007, "eval_rougeL": 46.6737, "eval_rougeLsum": 50.9835, "eval_runtime": 4232.4341, "eval_samples_per_second": 0.083, "eval_steps_per_second": 0.004, "step": 4000 }, { "epoch": 6.35, "learning_rate": 2.8095909732016925e-06, "loss": 0.3749, "step": 4500 }, { "epoch": 7.05, "learning_rate": 2.7884344146685474e-06, "loss": 0.3482, "step": 5000 }, { "epoch": 7.05, "eval_gen_len": 163.9231, "eval_loss": 0.7417466640472412, "eval_rouge1": 65.933, "eval_rouge2": 48.921, "eval_rougeL": 47.5877, "eval_rougeLsum": 52.0114, "eval_runtime": 4836.5569, "eval_samples_per_second": 0.073, "eval_steps_per_second": 0.004, "step": 5000 }, { "epoch": 7.76, "learning_rate": 2.767277856135402e-06, "loss": 0.3312, "step": 5500 }, { "epoch": 8.46, "learning_rate": 2.746121297602257e-06, "loss": 0.3174, "step": 6000 }, { "epoch": 8.46, "eval_gen_len": 163.5527, "eval_loss": 0.7546955943107605, "eval_rouge1": 65.9445, "eval_rouge2": 48.9117, "eval_rougeL": 48.312, "eval_rougeLsum": 52.4923, "eval_runtime": 5417.8435, "eval_samples_per_second": 0.065, "eval_steps_per_second": 0.003, "step": 6000 }, { "epoch": 9.17, "learning_rate": 2.7249647390691114e-06, "loss": 0.3074, "step": 6500 }, { "epoch": 9.87, "learning_rate": 2.7038081805359663e-06, "loss": 0.2922, "step": 7000 }, { "epoch": 9.87, "eval_gen_len": 163.1311, "eval_loss": 0.754240095615387, "eval_rouge1": 66.6619, "eval_rouge2": 49.9256, "eval_rougeL": 48.7757, "eval_rougeLsum": 53.1916, "eval_runtime": 5340.7776, "eval_samples_per_second": 0.066, "eval_steps_per_second": 0.003, "step": 7000 }, { "epoch": 10.58, "learning_rate": 2.6826516220028213e-06, "loss": 0.281, "step": 7500 }, { "epoch": 11.28, "learning_rate": 2.6614950634696754e-06, "loss": 0.276, "step": 8000 }, { "epoch": 11.28, "eval_gen_len": 165.0826, "eval_loss": 0.7793148756027222, "eval_rouge1": 66.8911, "eval_rouge2": 50.331, "eval_rougeL": 49.8396, "eval_rougeLsum": 54.0617, "eval_runtime": 6037.4348, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 8000 }, { "epoch": 11.99, "learning_rate": 2.6403385049365303e-06, "loss": 0.2557, "step": 8500 }, { "epoch": 12.69, "learning_rate": 2.6191819464033853e-06, "loss": 0.2464, "step": 9000 }, { "epoch": 12.69, "eval_gen_len": 164.9943, "eval_loss": 0.7877324819564819, "eval_rouge1": 67.1466, "eval_rouge2": 50.939, "eval_rougeL": 50.2255, "eval_rougeLsum": 54.5145, "eval_runtime": 5837.726, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 9000 }, { "epoch": 13.4, "learning_rate": 2.5980253878702398e-06, "loss": 0.2369, "step": 9500 }, { "epoch": 14.1, "learning_rate": 2.5768688293370947e-06, "loss": 0.2265, "step": 10000 }, { "epoch": 14.1, "eval_gen_len": 162.1966, "eval_loss": 0.806422233581543, "eval_rouge1": 67.5137, "eval_rouge2": 51.2801, "eval_rougeL": 51.2323, "eval_rougeLsum": 55.426, "eval_runtime": 5856.0164, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 10000 }, { "epoch": 14.81, "learning_rate": 2.5557122708039492e-06, "loss": 0.2215, "step": 10500 }, { "epoch": 15.51, "learning_rate": 2.534555712270804e-06, "loss": 0.2004, "step": 11000 }, { "epoch": 15.51, "eval_gen_len": 161.2365, "eval_loss": 0.8361101746559143, "eval_rouge1": 67.9515, "eval_rouge2": 51.5857, "eval_rougeL": 51.5796, "eval_rougeLsum": 55.9738, "eval_runtime": 5461.4431, "eval_samples_per_second": 0.064, "eval_steps_per_second": 0.003, "step": 11000 }, { "epoch": 16.22, "learning_rate": 2.5133991537376587e-06, "loss": 0.2039, "step": 11500 }, { "epoch": 16.93, "learning_rate": 2.4922425952045136e-06, "loss": 0.1903, "step": 12000 }, { "epoch": 16.93, "eval_gen_len": 163.8234, "eval_loss": 0.8543212413787842, "eval_rouge1": 67.8403, "eval_rouge2": 51.7839, "eval_rougeL": 51.8869, "eval_rougeLsum": 56.2681, "eval_runtime": 5935.4542, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.003, "step": 12000 }, { "epoch": 17.63, "learning_rate": 2.471086036671368e-06, "loss": 0.1816, "step": 12500 }, { "epoch": 18.34, "learning_rate": 2.4499294781382227e-06, "loss": 0.1782, "step": 13000 }, { "epoch": 18.34, "eval_gen_len": 161.2137, "eval_loss": 0.8672023415565491, "eval_rouge1": 68.2574, "eval_rouge2": 52.2649, "eval_rougeL": 52.3755, "eval_rougeLsum": 56.4509, "eval_runtime": 5698.6716, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 13000 }, { "epoch": 19.04, "learning_rate": 2.4287729196050776e-06, "loss": 0.1686, "step": 13500 }, { "epoch": 19.75, "learning_rate": 2.407616361071932e-06, "loss": 0.1581, "step": 14000 }, { "epoch": 19.75, "eval_gen_len": 163.0057, "eval_loss": 0.9022919535636902, "eval_rouge1": 67.8924, "eval_rouge2": 51.8802, "eval_rougeL": 52.5074, "eval_rougeLsum": 56.5946, "eval_runtime": 5838.2781, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 14000 }, { "epoch": 20.45, "learning_rate": 2.386459802538787e-06, "loss": 0.1525, "step": 14500 }, { "epoch": 21.16, "learning_rate": 2.3653032440056416e-06, "loss": 0.1516, "step": 15000 }, { "epoch": 21.16, "eval_gen_len": 163.0883, "eval_loss": 0.9338411092758179, "eval_rouge1": 68.0101, "eval_rouge2": 51.7535, "eval_rougeL": 52.2777, "eval_rougeLsum": 56.692, "eval_runtime": 6063.036, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 15000 }, { "epoch": 21.86, "learning_rate": 2.3441466854724966e-06, "loss": 0.143, "step": 15500 }, { "epoch": 22.57, "learning_rate": 2.3229901269393515e-06, "loss": 0.1358, "step": 16000 }, { "epoch": 22.57, "eval_gen_len": 162.3504, "eval_loss": 0.9403561353683472, "eval_rouge1": 68.4997, "eval_rouge2": 52.385, "eval_rougeL": 52.7137, "eval_rougeLsum": 57.2165, "eval_runtime": 5678.9898, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 16000 }, { "epoch": 23.27, "learning_rate": 2.301833568406206e-06, "loss": 0.1268, "step": 16500 }, { "epoch": 23.98, "learning_rate": 2.280677009873061e-06, "loss": 0.1256, "step": 17000 }, { "epoch": 23.98, "eval_gen_len": 163.6467, "eval_loss": 0.9707176685333252, "eval_rouge1": 68.3426, "eval_rouge2": 52.1326, "eval_rougeL": 52.6279, "eval_rougeLsum": 57.0803, "eval_runtime": 6169.909, "eval_samples_per_second": 0.057, "eval_steps_per_second": 0.003, "step": 17000 }, { "epoch": 24.68, "learning_rate": 2.2595204513399155e-06, "loss": 0.1158, "step": 17500 }, { "epoch": 25.39, "learning_rate": 2.23836389280677e-06, "loss": 0.1119, "step": 18000 }, { "epoch": 25.39, "eval_gen_len": 160.9316, "eval_loss": 1.0022860765457153, "eval_rouge1": 69.1266, "eval_rouge2": 52.9311, "eval_rougeL": 53.9465, "eval_rougeLsum": 58.0804, "eval_runtime": 5772.1926, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 18000 }, { "epoch": 26.09, "learning_rate": 2.217207334273625e-06, "loss": 0.1125, "step": 18500 }, { "epoch": 26.8, "learning_rate": 2.1960507757404795e-06, "loss": 0.1038, "step": 19000 }, { "epoch": 26.8, "eval_gen_len": 161.8746, "eval_loss": 1.019147515296936, "eval_rouge1": 68.6629, "eval_rouge2": 52.6518, "eval_rougeL": 53.1635, "eval_rougeLsum": 57.4514, "eval_runtime": 5939.4826, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.003, "step": 19000 }, { "epoch": 27.5, "learning_rate": 2.1748942172073344e-06, "loss": 0.098, "step": 19500 }, { "epoch": 28.21, "learning_rate": 2.153737658674189e-06, "loss": 0.0944, "step": 20000 }, { "epoch": 28.21, "eval_gen_len": 161.4359, "eval_loss": 1.0427296161651611, "eval_rouge1": 69.1063, "eval_rouge2": 53.0226, "eval_rougeL": 53.5979, "eval_rougeLsum": 57.8602, "eval_runtime": 6011.7516, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 20000 }, { "epoch": 28.91, "learning_rate": 2.132581100141044e-06, "loss": 0.0929, "step": 20500 }, { "epoch": 29.62, "learning_rate": 2.1114245416078984e-06, "loss": 0.0843, "step": 21000 }, { "epoch": 29.62, "eval_gen_len": 161.849, "eval_loss": 1.0581331253051758, "eval_rouge1": 68.0982, "eval_rouge2": 52.0153, "eval_rougeL": 53.1935, "eval_rougeLsum": 57.443, "eval_runtime": 5905.725, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.003, "step": 21000 }, { "epoch": 30.32, "learning_rate": 2.0902679830747533e-06, "loss": 0.0846, "step": 21500 }, { "epoch": 31.03, "learning_rate": 2.069111424541608e-06, "loss": 0.082, "step": 22000 }, { "epoch": 31.03, "eval_gen_len": 161.0684, "eval_loss": 1.0757286548614502, "eval_rouge1": 68.6318, "eval_rouge2": 52.8093, "eval_rougeL": 53.7214, "eval_rougeLsum": 57.775, "eval_runtime": 6004.4709, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 22000 }, { "epoch": 31.73, "learning_rate": 2.047954866008463e-06, "loss": 0.0735, "step": 22500 }, { "epoch": 32.44, "learning_rate": 2.0267983074753173e-06, "loss": 0.0756, "step": 23000 }, { "epoch": 32.44, "eval_gen_len": 162.4245, "eval_loss": 1.0798823833465576, "eval_rouge1": 68.557, "eval_rouge2": 52.4644, "eval_rougeL": 53.7606, "eval_rougeLsum": 57.8967, "eval_runtime": 6130.0883, "eval_samples_per_second": 0.057, "eval_steps_per_second": 0.003, "step": 23000 }, { "epoch": 33.15, "learning_rate": 2.005641748942172e-06, "loss": 0.0706, "step": 23500 }, { "epoch": 33.85, "learning_rate": 1.9844851904090268e-06, "loss": 0.0674, "step": 24000 }, { "epoch": 33.85, "eval_gen_len": 162.963, "eval_loss": 1.1047182083129883, "eval_rouge1": 68.6765, "eval_rouge2": 52.713, "eval_rougeL": 53.5339, "eval_rougeLsum": 57.7006, "eval_runtime": 6102.6451, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 24000 }, { "epoch": 34.56, "learning_rate": 1.9633286318758817e-06, "loss": 0.0647, "step": 24500 }, { "epoch": 35.26, "learning_rate": 1.9421720733427362e-06, "loss": 0.063, "step": 25000 }, { "epoch": 35.26, "eval_gen_len": 161.9003, "eval_loss": 1.1172865629196167, "eval_rouge1": 69.1911, "eval_rouge2": 53.1382, "eval_rougeL": 53.8971, "eval_rougeLsum": 58.2079, "eval_runtime": 5980.899, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.003, "step": 25000 }, { "epoch": 35.97, "learning_rate": 1.921015514809591e-06, "loss": 0.0599, "step": 25500 }, { "epoch": 36.67, "learning_rate": 1.8998589562764457e-06, "loss": 0.0566, "step": 26000 }, { "epoch": 36.67, "eval_gen_len": 162.3362, "eval_loss": 1.131187081336975, "eval_rouge1": 69.0624, "eval_rouge2": 53.101, "eval_rougeL": 54.2012, "eval_rougeLsum": 58.5665, "eval_runtime": 5792.9256, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 26000 }, { "epoch": 37.38, "learning_rate": 1.8787023977433004e-06, "loss": 0.0557, "step": 26500 }, { "epoch": 38.08, "learning_rate": 1.8575458392101554e-06, "loss": 0.0532, "step": 27000 }, { "epoch": 38.08, "eval_gen_len": 161.7009, "eval_loss": 1.1492334604263306, "eval_rouge1": 68.8232, "eval_rouge2": 52.7547, "eval_rougeL": 53.914, "eval_rougeLsum": 58.1061, "eval_runtime": 6017.4685, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 27000 }, { "epoch": 38.79, "learning_rate": 1.8363892806770101e-06, "loss": 0.0518, "step": 27500 }, { "epoch": 39.49, "learning_rate": 1.8152327221438644e-06, "loss": 0.0482, "step": 28000 }, { "epoch": 39.49, "eval_gen_len": 161.4046, "eval_loss": 1.1425426006317139, "eval_rouge1": 68.8641, "eval_rouge2": 52.7579, "eval_rougeL": 54.0604, "eval_rougeLsum": 58.3338, "eval_runtime": 5870.7466, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 28000 }, { "epoch": 40.2, "learning_rate": 1.7940761636107192e-06, "loss": 0.0459, "step": 28500 }, { "epoch": 40.9, "learning_rate": 1.772919605077574e-06, "loss": 0.0458, "step": 29000 }, { "epoch": 40.9, "eval_gen_len": 156.8718, "eval_loss": 1.1709474325180054, "eval_rouge1": 69.9022, "eval_rouge2": 53.8817, "eval_rougeL": 55.3029, "eval_rougeLsum": 59.6658, "eval_runtime": 5805.0163, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 29000 }, { "epoch": 41.61, "learning_rate": 1.7517630465444288e-06, "loss": 0.0417, "step": 29500 }, { "epoch": 42.31, "learning_rate": 1.7306064880112836e-06, "loss": 0.0421, "step": 30000 }, { "epoch": 42.31, "eval_gen_len": 161.1595, "eval_loss": 1.1962451934814453, "eval_rouge1": 69.8385, "eval_rouge2": 53.9521, "eval_rougeL": 55.043, "eval_rougeLsum": 59.4319, "eval_runtime": 5621.1651, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 30000 }, { "epoch": 43.02, "learning_rate": 1.7094499294781383e-06, "loss": 0.0415, "step": 30500 }, { "epoch": 43.72, "learning_rate": 1.688293370944993e-06, "loss": 0.0378, "step": 31000 }, { "epoch": 43.72, "eval_gen_len": 161.1368, "eval_loss": 1.1894584894180298, "eval_rouge1": 69.2784, "eval_rouge2": 53.3896, "eval_rougeL": 54.7309, "eval_rougeLsum": 58.9909, "eval_runtime": 5848.8931, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 31000 }, { "epoch": 44.43, "learning_rate": 1.6671368124118478e-06, "loss": 0.0382, "step": 31500 }, { "epoch": 45.13, "learning_rate": 1.6459802538787025e-06, "loss": 0.0351, "step": 32000 }, { "epoch": 45.13, "eval_gen_len": 161.0627, "eval_loss": 1.2170298099517822, "eval_rouge1": 69.2182, "eval_rouge2": 53.2021, "eval_rougeL": 54.8192, "eval_rougeLsum": 58.858, "eval_runtime": 5747.8165, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 32000 }, { "epoch": 45.84, "learning_rate": 1.6248236953455572e-06, "loss": 0.0348, "step": 32500 }, { "epoch": 46.54, "learning_rate": 1.6036671368124117e-06, "loss": 0.0328, "step": 33000 }, { "epoch": 46.54, "eval_gen_len": 162.3647, "eval_loss": 1.2242525815963745, "eval_rouge1": 69.2151, "eval_rouge2": 53.5006, "eval_rougeL": 54.6811, "eval_rougeLsum": 58.899, "eval_runtime": 5815.6916, "eval_samples_per_second": 0.06, "eval_steps_per_second": 0.003, "step": 33000 }, { "epoch": 47.25, "learning_rate": 1.5825105782792665e-06, "loss": 0.0334, "step": 33500 }, { "epoch": 47.95, "learning_rate": 1.5613540197461212e-06, "loss": 0.0316, "step": 34000 }, { "epoch": 47.95, "eval_gen_len": 163.4872, "eval_loss": 1.2345460653305054, "eval_rouge1": 68.9079, "eval_rouge2": 52.7923, "eval_rougeL": 54.7024, "eval_rougeLsum": 59.1333, "eval_runtime": 5738.4019, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 34000 }, { "epoch": 48.66, "learning_rate": 1.540197461212976e-06, "loss": 0.0303, "step": 34500 }, { "epoch": 49.37, "learning_rate": 1.5190409026798307e-06, "loss": 0.0289, "step": 35000 }, { "epoch": 49.37, "eval_gen_len": 161.2165, "eval_loss": 1.2448240518569946, "eval_rouge1": 69.3435, "eval_rouge2": 53.5007, "eval_rougeL": 54.6771, "eval_rougeLsum": 59.067, "eval_runtime": 5523.8322, "eval_samples_per_second": 0.064, "eval_steps_per_second": 0.003, "step": 35000 }, { "epoch": 50.07, "learning_rate": 1.4978843441466856e-06, "loss": 0.0287, "step": 35500 }, { "epoch": 50.78, "learning_rate": 1.4767277856135403e-06, "loss": 0.0279, "step": 36000 }, { "epoch": 50.78, "eval_gen_len": 163.7664, "eval_loss": 1.254445195198059, "eval_rouge1": 68.8472, "eval_rouge2": 53.1054, "eval_rougeL": 54.3767, "eval_rougeLsum": 58.4479, "eval_runtime": 5626.7741, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 36000 }, { "epoch": 51.48, "learning_rate": 1.455571227080395e-06, "loss": 0.0266, "step": 36500 }, { "epoch": 52.19, "learning_rate": 1.4344146685472496e-06, "loss": 0.0266, "step": 37000 }, { "epoch": 52.19, "eval_gen_len": 163.3675, "eval_loss": 1.267912745475769, "eval_rouge1": 69.1523, "eval_rouge2": 53.3283, "eval_rougeL": 54.8059, "eval_rougeLsum": 58.9981, "eval_runtime": 5635.0806, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 37000 }, { "epoch": 52.89, "learning_rate": 1.4132581100141043e-06, "loss": 0.025, "step": 37500 }, { "epoch": 53.6, "learning_rate": 1.392101551480959e-06, "loss": 0.0237, "step": 38000 }, { "epoch": 53.6, "eval_gen_len": 161.943, "eval_loss": 1.278545618057251, "eval_rouge1": 69.7623, "eval_rouge2": 53.9089, "eval_rougeL": 55.4188, "eval_rougeLsum": 59.7174, "eval_runtime": 5517.6551, "eval_samples_per_second": 0.064, "eval_steps_per_second": 0.003, "step": 38000 }, { "epoch": 54.3, "learning_rate": 1.3709449929478138e-06, "loss": 0.0243, "step": 38500 }, { "epoch": 55.01, "learning_rate": 1.3497884344146687e-06, "loss": 0.0225, "step": 39000 }, { "epoch": 55.01, "eval_gen_len": 162.8234, "eval_loss": 1.2910034656524658, "eval_rouge1": 69.4568, "eval_rouge2": 53.5993, "eval_rougeL": 55.3547, "eval_rougeLsum": 59.7055, "eval_runtime": 5493.2949, "eval_samples_per_second": 0.064, "eval_steps_per_second": 0.003, "step": 39000 }, { "epoch": 55.71, "learning_rate": 1.3286318758815233e-06, "loss": 0.0222, "step": 39500 }, { "epoch": 56.42, "learning_rate": 1.307475317348378e-06, "loss": 0.0207, "step": 40000 }, { "epoch": 56.42, "eval_gen_len": 161.9145, "eval_loss": 1.302707314491272, "eval_rouge1": 69.3521, "eval_rouge2": 53.657, "eval_rougeL": 55.4158, "eval_rougeLsum": 59.7595, "eval_runtime": 5493.618, "eval_samples_per_second": 0.064, "eval_steps_per_second": 0.003, "step": 40000 }, { "epoch": 57.12, "learning_rate": 1.2863187588152327e-06, "loss": 0.0216, "step": 40500 }, { "epoch": 57.83, "learning_rate": 1.2651622002820875e-06, "loss": 0.0207, "step": 41000 }, { "epoch": 57.83, "eval_gen_len": 161.4017, "eval_loss": 1.2961533069610596, "eval_rouge1": 69.5378, "eval_rouge2": 53.6568, "eval_rougeL": 55.4833, "eval_rougeLsum": 59.8609, "eval_runtime": 5729.7323, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 41000 }, { "epoch": 58.53, "learning_rate": 1.2440056417489422e-06, "loss": 0.0202, "step": 41500 }, { "epoch": 59.24, "learning_rate": 1.222849083215797e-06, "loss": 0.019, "step": 42000 }, { "epoch": 59.24, "eval_gen_len": 161.5954, "eval_loss": 1.3159691095352173, "eval_rouge1": 69.6692, "eval_rouge2": 53.8824, "eval_rougeL": 56.153, "eval_rougeLsum": 60.5724, "eval_runtime": 5770.1856, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 42000 }, { "epoch": 59.94, "learning_rate": 1.2016925246826517e-06, "loss": 0.0193, "step": 42500 }, { "epoch": 60.65, "learning_rate": 1.1805359661495064e-06, "loss": 0.0183, "step": 43000 }, { "epoch": 60.65, "eval_gen_len": 159.6097, "eval_loss": 1.3009204864501953, "eval_rouge1": 69.8056, "eval_rouge2": 54.3016, "eval_rougeL": 56.2054, "eval_rougeLsum": 60.4544, "eval_runtime": 5689.5789, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 43000 }, { "epoch": 61.35, "learning_rate": 1.1593794076163611e-06, "loss": 0.0186, "step": 43500 }, { "epoch": 62.06, "learning_rate": 1.1382228490832158e-06, "loss": 0.0179, "step": 44000 }, { "epoch": 62.06, "eval_gen_len": 159.4046, "eval_loss": 1.3091576099395752, "eval_rouge1": 69.9596, "eval_rouge2": 54.1221, "eval_rougeL": 56.3196, "eval_rougeLsum": 60.6711, "eval_runtime": 5735.0694, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 44000 }, { "epoch": 62.76, "learning_rate": 1.1170662905500706e-06, "loss": 0.0179, "step": 44500 }, { "epoch": 63.47, "learning_rate": 1.0959097320169253e-06, "loss": 0.0162, "step": 45000 }, { "epoch": 63.47, "eval_gen_len": 161.7721, "eval_loss": 1.3298344612121582, "eval_rouge1": 69.7464, "eval_rouge2": 54.0211, "eval_rougeL": 56.0314, "eval_rougeLsum": 60.4478, "eval_runtime": 5746.6407, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 45000 }, { "epoch": 64.17, "learning_rate": 1.07475317348378e-06, "loss": 0.0177, "step": 45500 }, { "epoch": 64.88, "learning_rate": 1.0535966149506348e-06, "loss": 0.0161, "step": 46000 }, { "epoch": 64.88, "eval_gen_len": 161.3932, "eval_loss": 1.3238565921783447, "eval_rouge1": 69.6076, "eval_rouge2": 53.7339, "eval_rougeL": 55.9025, "eval_rougeLsum": 60.3293, "eval_runtime": 5658.4636, "eval_samples_per_second": 0.062, "eval_steps_per_second": 0.003, "step": 46000 }, { "epoch": 65.59, "learning_rate": 1.0324400564174895e-06, "loss": 0.0157, "step": 46500 }, { "epoch": 66.29, "learning_rate": 1.011283497884344e-06, "loss": 0.0152, "step": 47000 }, { "epoch": 66.29, "eval_gen_len": 162.8547, "eval_loss": 1.3345621824264526, "eval_rouge1": 69.2641, "eval_rouge2": 53.4141, "eval_rougeL": 55.3861, "eval_rougeLsum": 59.6768, "eval_runtime": 5757.5178, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 47000 }, { "epoch": 67.0, "learning_rate": 9.901269393511988e-07, "loss": 0.0157, "step": 47500 }, { "epoch": 67.7, "learning_rate": 9.689703808180537e-07, "loss": 0.0149, "step": 48000 }, { "epoch": 67.7, "eval_gen_len": 163.0684, "eval_loss": 1.3516221046447754, "eval_rouge1": 69.3874, "eval_rouge2": 53.707, "eval_rougeL": 56.0318, "eval_rougeLsum": 60.1116, "eval_runtime": 5778.0873, "eval_samples_per_second": 0.061, "eval_steps_per_second": 0.003, "step": 48000 }, { "epoch": 68.41, "learning_rate": 9.478138222849083e-07, "loss": 0.014, "step": 48500 }, { "epoch": 69.11, "learning_rate": 9.266572637517632e-07, "loss": 0.0143, "step": 49000 }, { "epoch": 69.11, "eval_gen_len": 163.4387, "eval_loss": 1.3576686382293701, "eval_rouge1": 69.5713, "eval_rouge2": 53.8362, "eval_rougeL": 55.9625, "eval_rougeLsum": 60.2185, "eval_runtime": 6313.8347, "eval_samples_per_second": 0.056, "eval_steps_per_second": 0.003, "step": 49000 }, { "epoch": 69.82, "learning_rate": 9.055007052186177e-07, "loss": 0.0141, "step": 49500 }, { "epoch": 70.52, "learning_rate": 8.843441466854725e-07, "loss": 0.0139, "step": 50000 }, { "epoch": 70.52, "eval_gen_len": 161.9259, "eval_loss": 1.346762776374817, "eval_rouge1": 69.5609, "eval_rouge2": 53.9709, "eval_rougeL": 56.0658, "eval_rougeLsum": 60.0989, "eval_runtime": 5992.2258, "eval_samples_per_second": 0.059, "eval_steps_per_second": 0.003, "step": 50000 }, { "epoch": 71.23, "learning_rate": 8.631875881523273e-07, "loss": 0.0136, "step": 50500 }, { "epoch": 71.93, "learning_rate": 8.42031029619182e-07, "loss": 0.0132, "step": 51000 }, { "epoch": 71.93, "eval_gen_len": 161.208, "eval_loss": 1.3650970458984375, "eval_rouge1": 69.5418, "eval_rouge2": 53.7154, "eval_rougeL": 55.7361, "eval_rougeLsum": 60.0613, "eval_runtime": 7093.4012, "eval_samples_per_second": 0.049, "eval_steps_per_second": 0.003, "step": 51000 }, { "epoch": 72.64, "learning_rate": 8.208744710860367e-07, "loss": 0.0131, "step": 51500 }, { "epoch": 73.34, "learning_rate": 7.997179125528913e-07, "loss": 0.0127, "step": 52000 }, { "epoch": 73.34, "eval_gen_len": 162.2849, "eval_loss": 1.3561229705810547, "eval_rouge1": 69.4318, "eval_rouge2": 53.5887, "eval_rougeL": 55.4159, "eval_rougeLsum": 59.6162, "eval_runtime": 6162.4321, "eval_samples_per_second": 0.057, "eval_steps_per_second": 0.003, "step": 52000 }, { "epoch": 74.05, "learning_rate": 7.785613540197461e-07, "loss": 0.0127, "step": 52500 }, { "epoch": 74.75, "learning_rate": 7.574047954866008e-07, "loss": 0.0123, "step": 53000 }, { "epoch": 74.75, "eval_gen_len": 159.567, "eval_loss": 1.3640151023864746, "eval_rouge1": 69.9555, "eval_rouge2": 54.4377, "eval_rougeL": 56.2914, "eval_rougeLsum": 60.5544, "eval_runtime": 6099.51, "eval_samples_per_second": 0.058, "eval_steps_per_second": 0.003, "step": 53000 }, { "epoch": 75.46, "learning_rate": 7.362482369534556e-07, "loss": 0.0123, "step": 53500 }, { "epoch": 76.16, "learning_rate": 7.150916784203103e-07, "loss": 0.0119, "step": 54000 }, { "epoch": 76.16, "eval_gen_len": 162.0228, "eval_loss": 1.3737815618515015, "eval_rouge1": 69.6472, "eval_rouge2": 53.9313, "eval_rougeL": 55.9586, "eval_rougeLsum": 59.9692, "eval_runtime": 6462.5491, "eval_samples_per_second": 0.054, "eval_steps_per_second": 0.003, "step": 54000 }, { "epoch": 76.87, "learning_rate": 6.93935119887165e-07, "loss": 0.0118, "step": 54500 }, { "epoch": 77.57, "learning_rate": 6.727785613540198e-07, "loss": 0.0117, "step": 55000 }, { "epoch": 77.57, "eval_gen_len": 162.3789, "eval_loss": 1.3787622451782227, "eval_rouge1": 69.5463, "eval_rouge2": 53.7206, "eval_rougeL": 56.0721, "eval_rougeLsum": 60.3372, "eval_runtime": 6234.7587, "eval_samples_per_second": 0.056, "eval_steps_per_second": 0.003, "step": 55000 }, { "epoch": 78.28, "learning_rate": 6.516220028208745e-07, "loss": 0.0114, "step": 55500 }, { "epoch": 78.98, "learning_rate": 6.304654442877292e-07, "loss": 0.0114, "step": 56000 }, { "epoch": 78.98, "eval_gen_len": 162.1083, "eval_loss": 1.3712390661239624, "eval_rouge1": 69.5879, "eval_rouge2": 53.9682, "eval_rougeL": 56.2965, "eval_rougeLsum": 60.5547, "eval_runtime": 6229.6909, "eval_samples_per_second": 0.056, "eval_steps_per_second": 0.003, "step": 56000 } ], "max_steps": 70900, "num_train_epochs": 100, "total_flos": 9.43193629959635e+17, "trial_name": null, "trial_params": null }