|
{ |
|
"best_metric": 2.225323438644409, |
|
"best_model_checkpoint": "autotrain-x906d-mvlef/checkpoint-2460", |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 2460, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03048780487804878, |
|
"grad_norm": 135.88742065429688, |
|
"learning_rate": 1.016260162601626e-05, |
|
"loss": 30.3396, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06097560975609756, |
|
"grad_norm": 93.93063354492188, |
|
"learning_rate": 2.032520325203252e-05, |
|
"loss": 24.435, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09146341463414634, |
|
"grad_norm": 104.44407653808594, |
|
"learning_rate": 3.048780487804878e-05, |
|
"loss": 14.4152, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.12195121951219512, |
|
"grad_norm": 15.064167976379395, |
|
"learning_rate": 4.065040650406504e-05, |
|
"loss": 8.6678, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.1524390243902439, |
|
"grad_norm": 8.59422492980957, |
|
"learning_rate": 5.081300813008131e-05, |
|
"loss": 6.2719, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.18292682926829268, |
|
"grad_norm": 6.402270793914795, |
|
"learning_rate": 6.097560975609756e-05, |
|
"loss": 5.391, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.21341463414634146, |
|
"grad_norm": 7.405852794647217, |
|
"learning_rate": 7.113821138211383e-05, |
|
"loss": 4.9132, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.24390243902439024, |
|
"grad_norm": 6.045359134674072, |
|
"learning_rate": 8.130081300813008e-05, |
|
"loss": 4.4981, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.27439024390243905, |
|
"grad_norm": 4.673328876495361, |
|
"learning_rate": 9.146341463414635e-05, |
|
"loss": 4.267, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3048780487804878, |
|
"grad_norm": 5.1413774490356445, |
|
"learning_rate": 9.981933152664861e-05, |
|
"loss": 4.0875, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.3353658536585366, |
|
"grad_norm": 4.102631092071533, |
|
"learning_rate": 9.869015356820235e-05, |
|
"loss": 3.9819, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.36585365853658536, |
|
"grad_norm": 3.946659803390503, |
|
"learning_rate": 9.75609756097561e-05, |
|
"loss": 4.0086, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.39634146341463417, |
|
"grad_norm": 5.377846717834473, |
|
"learning_rate": 9.643179765130986e-05, |
|
"loss": 3.7839, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.4268292682926829, |
|
"grad_norm": 3.9124577045440674, |
|
"learning_rate": 9.530261969286361e-05, |
|
"loss": 3.6614, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.4573170731707317, |
|
"grad_norm": 3.9939420223236084, |
|
"learning_rate": 9.417344173441735e-05, |
|
"loss": 3.7353, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.4878048780487805, |
|
"grad_norm": 4.000846862792969, |
|
"learning_rate": 9.30442637759711e-05, |
|
"loss": 3.6826, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5182926829268293, |
|
"grad_norm": 4.859684944152832, |
|
"learning_rate": 9.191508581752484e-05, |
|
"loss": 3.5672, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.5487804878048781, |
|
"grad_norm": 3.8994104862213135, |
|
"learning_rate": 9.07859078590786e-05, |
|
"loss": 3.4385, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.5792682926829268, |
|
"grad_norm": 5.588184356689453, |
|
"learning_rate": 8.965672990063234e-05, |
|
"loss": 3.4864, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 4.631311416625977, |
|
"learning_rate": 8.852755194218609e-05, |
|
"loss": 3.4949, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.6402439024390244, |
|
"grad_norm": 4.662644386291504, |
|
"learning_rate": 8.739837398373984e-05, |
|
"loss": 3.4993, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.6707317073170732, |
|
"grad_norm": 4.6329874992370605, |
|
"learning_rate": 8.626919602529358e-05, |
|
"loss": 3.4153, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.7012195121951219, |
|
"grad_norm": 5.302168846130371, |
|
"learning_rate": 8.514001806684734e-05, |
|
"loss": 3.4356, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.7317073170731707, |
|
"grad_norm": 4.273636341094971, |
|
"learning_rate": 8.401084010840109e-05, |
|
"loss": 3.479, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.7621951219512195, |
|
"grad_norm": 3.5382657051086426, |
|
"learning_rate": 8.288166214995484e-05, |
|
"loss": 3.4343, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.7926829268292683, |
|
"grad_norm": 3.272901773452759, |
|
"learning_rate": 8.175248419150858e-05, |
|
"loss": 3.3167, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.823170731707317, |
|
"grad_norm": 3.156431198120117, |
|
"learning_rate": 8.062330623306234e-05, |
|
"loss": 3.2939, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.8536585365853658, |
|
"grad_norm": 4.288792610168457, |
|
"learning_rate": 7.949412827461609e-05, |
|
"loss": 3.2499, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.8841463414634146, |
|
"grad_norm": 3.8503577709198, |
|
"learning_rate": 7.836495031616983e-05, |
|
"loss": 3.3326, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.9146341463414634, |
|
"grad_norm": 3.282243251800537, |
|
"learning_rate": 7.723577235772358e-05, |
|
"loss": 3.3124, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.9451219512195121, |
|
"grad_norm": 3.938842296600342, |
|
"learning_rate": 7.610659439927734e-05, |
|
"loss": 3.2749, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.975609756097561, |
|
"grad_norm": 4.175856590270996, |
|
"learning_rate": 7.497741644083109e-05, |
|
"loss": 3.2124, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 19.0, |
|
"eval_loss": 2.420257329940796, |
|
"eval_rouge1": 25.1635, |
|
"eval_rouge2": 23.2382, |
|
"eval_rougeL": 24.6426, |
|
"eval_rougeLsum": 25.0666, |
|
"eval_runtime": 54.8826, |
|
"eval_samples_per_second": 7.47, |
|
"eval_steps_per_second": 1.877, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.0060975609756098, |
|
"grad_norm": 3.5674939155578613, |
|
"learning_rate": 7.384823848238483e-05, |
|
"loss": 3.2418, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.0365853658536586, |
|
"grad_norm": 4.070037841796875, |
|
"learning_rate": 7.271906052393858e-05, |
|
"loss": 3.2318, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.0670731707317074, |
|
"grad_norm": 4.293022632598877, |
|
"learning_rate": 7.158988256549232e-05, |
|
"loss": 3.1837, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.0975609756097562, |
|
"grad_norm": 3.463479995727539, |
|
"learning_rate": 7.046070460704606e-05, |
|
"loss": 3.228, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.1280487804878048, |
|
"grad_norm": 7.417110443115234, |
|
"learning_rate": 6.933152664859982e-05, |
|
"loss": 3.1462, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.1585365853658536, |
|
"grad_norm": 5.171451091766357, |
|
"learning_rate": 6.820234869015357e-05, |
|
"loss": 3.0788, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.1890243902439024, |
|
"grad_norm": 3.9318175315856934, |
|
"learning_rate": 6.707317073170732e-05, |
|
"loss": 3.207, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 5.252470016479492, |
|
"learning_rate": 6.594399277326106e-05, |
|
"loss": 3.0539, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 3.6664767265319824, |
|
"learning_rate": 6.481481481481482e-05, |
|
"loss": 3.1699, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.2804878048780488, |
|
"grad_norm": 2.665335178375244, |
|
"learning_rate": 6.368563685636857e-05, |
|
"loss": 3.0474, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.3109756097560976, |
|
"grad_norm": 3.2224156856536865, |
|
"learning_rate": 6.255645889792232e-05, |
|
"loss": 3.1378, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.3414634146341464, |
|
"grad_norm": 3.4332261085510254, |
|
"learning_rate": 6.142728093947606e-05, |
|
"loss": 3.1683, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.3719512195121952, |
|
"grad_norm": 4.648336887359619, |
|
"learning_rate": 6.0298102981029816e-05, |
|
"loss": 3.1224, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.4024390243902438, |
|
"grad_norm": 3.557978391647339, |
|
"learning_rate": 5.916892502258356e-05, |
|
"loss": 3.1106, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.4329268292682926, |
|
"grad_norm": 3.8348541259765625, |
|
"learning_rate": 5.803974706413731e-05, |
|
"loss": 3.1733, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.4634146341463414, |
|
"grad_norm": 4.132476329803467, |
|
"learning_rate": 5.6910569105691056e-05, |
|
"loss": 3.0671, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.4939024390243902, |
|
"grad_norm": 3.8571975231170654, |
|
"learning_rate": 5.578139114724481e-05, |
|
"loss": 3.1065, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 1.524390243902439, |
|
"grad_norm": 3.408566951751709, |
|
"learning_rate": 5.465221318879856e-05, |
|
"loss": 3.1613, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.5548780487804879, |
|
"grad_norm": 3.7324483394622803, |
|
"learning_rate": 5.35230352303523e-05, |
|
"loss": 2.9743, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 1.5853658536585367, |
|
"grad_norm": 3.1098973751068115, |
|
"learning_rate": 5.2393857271906056e-05, |
|
"loss": 3.1296, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.6158536585365852, |
|
"grad_norm": 5.050591468811035, |
|
"learning_rate": 5.126467931345981e-05, |
|
"loss": 2.9844, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 1.6463414634146343, |
|
"grad_norm": 3.30641508102417, |
|
"learning_rate": 5.013550135501355e-05, |
|
"loss": 3.0301, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.6768292682926829, |
|
"grad_norm": 4.2086358070373535, |
|
"learning_rate": 4.90063233965673e-05, |
|
"loss": 3.0881, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 1.7073170731707317, |
|
"grad_norm": 4.049353122711182, |
|
"learning_rate": 4.787714543812105e-05, |
|
"loss": 3.0382, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.7378048780487805, |
|
"grad_norm": 3.4281935691833496, |
|
"learning_rate": 4.6747967479674795e-05, |
|
"loss": 3.0105, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 1.7682926829268293, |
|
"grad_norm": 3.7564141750335693, |
|
"learning_rate": 4.561878952122855e-05, |
|
"loss": 3.0319, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 1.798780487804878, |
|
"grad_norm": 4.241165637969971, |
|
"learning_rate": 4.4489611562782295e-05, |
|
"loss": 3.0275, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 3.024312734603882, |
|
"learning_rate": 4.336043360433605e-05, |
|
"loss": 3.004, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 1.8597560975609757, |
|
"grad_norm": 3.0586462020874023, |
|
"learning_rate": 4.2231255645889795e-05, |
|
"loss": 2.8741, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 1.8902439024390243, |
|
"grad_norm": 3.032233953475952, |
|
"learning_rate": 4.110207768744354e-05, |
|
"loss": 3.0305, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 1.9207317073170733, |
|
"grad_norm": 3.4631378650665283, |
|
"learning_rate": 3.9972899728997295e-05, |
|
"loss": 2.9829, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 1.951219512195122, |
|
"grad_norm": 3.462908983230591, |
|
"learning_rate": 3.884372177055104e-05, |
|
"loss": 2.9073, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 1.9817073170731707, |
|
"grad_norm": 3.844022512435913, |
|
"learning_rate": 3.771454381210479e-05, |
|
"loss": 2.943, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 17.9268, |
|
"eval_loss": 2.2693333625793457, |
|
"eval_rouge1": 24.8213, |
|
"eval_rouge2": 22.8064, |
|
"eval_rougeL": 24.126, |
|
"eval_rougeLsum": 24.7561, |
|
"eval_runtime": 54.0619, |
|
"eval_samples_per_second": 7.584, |
|
"eval_steps_per_second": 1.905, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.0121951219512195, |
|
"grad_norm": 3.2110657691955566, |
|
"learning_rate": 3.6585365853658535e-05, |
|
"loss": 2.9109, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.042682926829268, |
|
"grad_norm": 5.20732307434082, |
|
"learning_rate": 3.545618789521229e-05, |
|
"loss": 2.907, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 2.073170731707317, |
|
"grad_norm": 3.6366891860961914, |
|
"learning_rate": 3.4327009936766035e-05, |
|
"loss": 2.7614, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.1036585365853657, |
|
"grad_norm": 3.012146472930908, |
|
"learning_rate": 3.319783197831978e-05, |
|
"loss": 2.8866, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 2.1341463414634148, |
|
"grad_norm": 5.310023307800293, |
|
"learning_rate": 3.2068654019873535e-05, |
|
"loss": 2.9738, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.1646341463414633, |
|
"grad_norm": 3.908693313598633, |
|
"learning_rate": 3.093947606142728e-05, |
|
"loss": 2.989, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 2.1951219512195124, |
|
"grad_norm": 3.6812493801116943, |
|
"learning_rate": 2.9810298102981032e-05, |
|
"loss": 2.9844, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.225609756097561, |
|
"grad_norm": 3.1762988567352295, |
|
"learning_rate": 2.868112014453478e-05, |
|
"loss": 2.9518, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 2.2560975609756095, |
|
"grad_norm": 3.526785373687744, |
|
"learning_rate": 2.7551942186088532e-05, |
|
"loss": 3.0312, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.2865853658536586, |
|
"grad_norm": 3.4673197269439697, |
|
"learning_rate": 2.642276422764228e-05, |
|
"loss": 2.9413, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 2.317073170731707, |
|
"grad_norm": 3.7603211402893066, |
|
"learning_rate": 2.529358626919603e-05, |
|
"loss": 2.8524, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.347560975609756, |
|
"grad_norm": 3.039940357208252, |
|
"learning_rate": 2.4164408310749775e-05, |
|
"loss": 2.9403, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 2.3780487804878048, |
|
"grad_norm": 4.610980987548828, |
|
"learning_rate": 2.3035230352303525e-05, |
|
"loss": 2.892, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.408536585365854, |
|
"grad_norm": 3.3454501628875732, |
|
"learning_rate": 2.1906052393857275e-05, |
|
"loss": 2.905, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 2.4390243902439024, |
|
"grad_norm": 2.9850571155548096, |
|
"learning_rate": 2.077687443541102e-05, |
|
"loss": 2.9325, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.4695121951219514, |
|
"grad_norm": 4.394461631774902, |
|
"learning_rate": 1.9647696476964768e-05, |
|
"loss": 2.9112, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 4.401582717895508, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 2.8956, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.5304878048780486, |
|
"grad_norm": 3.5677547454833984, |
|
"learning_rate": 1.7389340560072268e-05, |
|
"loss": 2.9706, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 2.5609756097560976, |
|
"grad_norm": 3.1517820358276367, |
|
"learning_rate": 1.6260162601626018e-05, |
|
"loss": 2.896, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 2.591463414634146, |
|
"grad_norm": 5.237459182739258, |
|
"learning_rate": 1.5130984643179767e-05, |
|
"loss": 2.9137, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 2.6219512195121952, |
|
"grad_norm": 3.8501670360565186, |
|
"learning_rate": 1.4001806684733515e-05, |
|
"loss": 2.9322, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 2.652439024390244, |
|
"grad_norm": 6.259314060211182, |
|
"learning_rate": 1.2872628726287265e-05, |
|
"loss": 2.8803, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 2.682926829268293, |
|
"grad_norm": 3.369128942489624, |
|
"learning_rate": 1.1743450767841012e-05, |
|
"loss": 2.9141, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.7134146341463414, |
|
"grad_norm": 3.4941694736480713, |
|
"learning_rate": 1.0614272809394762e-05, |
|
"loss": 2.9613, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 2.7439024390243905, |
|
"grad_norm": 4.332786560058594, |
|
"learning_rate": 9.48509485094851e-06, |
|
"loss": 2.9518, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 2.774390243902439, |
|
"grad_norm": 3.981058359146118, |
|
"learning_rate": 8.35591689250226e-06, |
|
"loss": 2.8848, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 2.8048780487804876, |
|
"grad_norm": 3.464871644973755, |
|
"learning_rate": 7.226738934056007e-06, |
|
"loss": 2.9663, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 2.8353658536585367, |
|
"grad_norm": 3.324209690093994, |
|
"learning_rate": 6.0975609756097564e-06, |
|
"loss": 2.8891, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 2.8658536585365852, |
|
"grad_norm": 3.896878480911255, |
|
"learning_rate": 4.968383017163506e-06, |
|
"loss": 2.952, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 2.8963414634146343, |
|
"grad_norm": 3.644209384918213, |
|
"learning_rate": 3.839205058717254e-06, |
|
"loss": 2.8857, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 2.926829268292683, |
|
"grad_norm": 3.039807081222534, |
|
"learning_rate": 2.710027100271003e-06, |
|
"loss": 2.8308, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 2.9573170731707314, |
|
"grad_norm": 3.2632334232330322, |
|
"learning_rate": 1.5808491418247518e-06, |
|
"loss": 2.9224, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 2.9878048780487805, |
|
"grad_norm": 4.359528064727783, |
|
"learning_rate": 4.5167118337850045e-07, |
|
"loss": 2.901, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_gen_len": 18.8, |
|
"eval_loss": 2.225323438644409, |
|
"eval_rouge1": 26.024, |
|
"eval_rouge2": 24.1663, |
|
"eval_rougeL": 25.432, |
|
"eval_rougeLsum": 25.9929, |
|
"eval_runtime": 55.2175, |
|
"eval_samples_per_second": 7.425, |
|
"eval_steps_per_second": 1.865, |
|
"step": 2460 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 2460, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 27756032163840.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|