|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 500, |
|
"global_step": 1320, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0022727272727272726, |
|
"grad_norm": NaN, |
|
"learning_rate": 0.0001, |
|
"loss": 3.1855, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.004545454545454545, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0001, |
|
"loss": 4.3007, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.006818181818181818, |
|
"grad_norm": Infinity, |
|
"learning_rate": 0.0001, |
|
"loss": 4.3188, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00909090909090909, |
|
"grad_norm": 26.366512298583984, |
|
"learning_rate": 9.992424242424244e-05, |
|
"loss": 3.6582, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.011363636363636364, |
|
"grad_norm": 29.344751358032227, |
|
"learning_rate": 9.984848484848486e-05, |
|
"loss": 4.2964, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013636363636363636, |
|
"grad_norm": 29.519277572631836, |
|
"learning_rate": 9.977272727272728e-05, |
|
"loss": 4.0004, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015909090909090907, |
|
"grad_norm": 24.204898834228516, |
|
"learning_rate": 9.96969696969697e-05, |
|
"loss": 3.2453, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01818181818181818, |
|
"grad_norm": 23.69887351989746, |
|
"learning_rate": 9.962121212121213e-05, |
|
"loss": 2.7972, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020454545454545454, |
|
"grad_norm": 52.371498107910156, |
|
"learning_rate": 9.954545454545455e-05, |
|
"loss": 2.5971, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.022727272727272728, |
|
"grad_norm": 41.59567642211914, |
|
"learning_rate": 9.946969696969698e-05, |
|
"loss": 3.3081, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.025, |
|
"grad_norm": 27.913963317871094, |
|
"learning_rate": 9.939393939393939e-05, |
|
"loss": 3.5977, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.02727272727272727, |
|
"grad_norm": 21.261117935180664, |
|
"learning_rate": 9.931818181818182e-05, |
|
"loss": 3.3403, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.029545454545454545, |
|
"grad_norm": 20.344589233398438, |
|
"learning_rate": 9.924242424242425e-05, |
|
"loss": 2.0478, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.031818181818181815, |
|
"grad_norm": 32.50373077392578, |
|
"learning_rate": 9.916666666666667e-05, |
|
"loss": 3.0773, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.03409090909090909, |
|
"grad_norm": 21.426048278808594, |
|
"learning_rate": 9.909090909090911e-05, |
|
"loss": 2.8572, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03636363636363636, |
|
"grad_norm": 27.847314834594727, |
|
"learning_rate": 9.901515151515151e-05, |
|
"loss": 3.129, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.038636363636363635, |
|
"grad_norm": 23.516616821289062, |
|
"learning_rate": 9.893939393939395e-05, |
|
"loss": 3.3971, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04090909090909091, |
|
"grad_norm": 29.170352935791016, |
|
"learning_rate": 9.886363636363637e-05, |
|
"loss": 3.6325, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.04318181818181818, |
|
"grad_norm": 21.103153228759766, |
|
"learning_rate": 9.87878787878788e-05, |
|
"loss": 2.7935, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.045454545454545456, |
|
"grad_norm": 25.863285064697266, |
|
"learning_rate": 9.871212121212122e-05, |
|
"loss": 2.0675, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04772727272727273, |
|
"grad_norm": 25.554828643798828, |
|
"learning_rate": 9.863636363636364e-05, |
|
"loss": 2.8331, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 26.424827575683594, |
|
"learning_rate": 9.856060606060607e-05, |
|
"loss": 4.0934, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.05227272727272727, |
|
"grad_norm": 40.84152603149414, |
|
"learning_rate": 9.848484848484849e-05, |
|
"loss": 2.7315, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.05454545454545454, |
|
"grad_norm": 17.789630889892578, |
|
"learning_rate": 9.840909090909092e-05, |
|
"loss": 2.5798, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.056818181818181816, |
|
"grad_norm": 15.23817253112793, |
|
"learning_rate": 9.833333333333333e-05, |
|
"loss": 2.3981, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.05909090909090909, |
|
"grad_norm": 17.333356857299805, |
|
"learning_rate": 9.825757575757576e-05, |
|
"loss": 2.0097, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06136363636363636, |
|
"grad_norm": 17.358461380004883, |
|
"learning_rate": 9.818181818181818e-05, |
|
"loss": 1.5636, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.06363636363636363, |
|
"grad_norm": 15.479598999023438, |
|
"learning_rate": 9.810606060606061e-05, |
|
"loss": 2.3064, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0659090909090909, |
|
"grad_norm": 18.889394760131836, |
|
"learning_rate": 9.803030303030303e-05, |
|
"loss": 1.6592, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.06818181818181818, |
|
"grad_norm": 19.264772415161133, |
|
"learning_rate": 9.795454545454545e-05, |
|
"loss": 2.9327, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07045454545454545, |
|
"grad_norm": 19.369556427001953, |
|
"learning_rate": 9.787878787878789e-05, |
|
"loss": 3.2685, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.07272727272727272, |
|
"grad_norm": 20.017459869384766, |
|
"learning_rate": 9.78030303030303e-05, |
|
"loss": 3.4532, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.075, |
|
"grad_norm": 18.956012725830078, |
|
"learning_rate": 9.772727272727274e-05, |
|
"loss": 2.2143, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.07727272727272727, |
|
"grad_norm": 15.438785552978516, |
|
"learning_rate": 9.765151515151516e-05, |
|
"loss": 2.407, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.07954545454545454, |
|
"grad_norm": 22.79155921936035, |
|
"learning_rate": 9.757575757575758e-05, |
|
"loss": 3.1064, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08181818181818182, |
|
"grad_norm": 15.908382415771484, |
|
"learning_rate": 9.75e-05, |
|
"loss": 2.9192, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.08409090909090909, |
|
"grad_norm": 21.536775588989258, |
|
"learning_rate": 9.742424242424243e-05, |
|
"loss": 2.8127, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.08636363636363636, |
|
"grad_norm": 19.644390106201172, |
|
"learning_rate": 9.734848484848485e-05, |
|
"loss": 1.704, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.08863636363636364, |
|
"grad_norm": 20.067602157592773, |
|
"learning_rate": 9.727272727272728e-05, |
|
"loss": 2.3733, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.09090909090909091, |
|
"grad_norm": 16.551055908203125, |
|
"learning_rate": 9.71969696969697e-05, |
|
"loss": 2.2413, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09318181818181819, |
|
"grad_norm": 18.292987823486328, |
|
"learning_rate": 9.712121212121212e-05, |
|
"loss": 2.6103, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.09545454545454546, |
|
"grad_norm": 15.751124382019043, |
|
"learning_rate": 9.704545454545456e-05, |
|
"loss": 1.5648, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.09772727272727273, |
|
"grad_norm": 25.068395614624023, |
|
"learning_rate": 9.696969696969698e-05, |
|
"loss": 1.8226, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 25.069040298461914, |
|
"learning_rate": 9.689393939393941e-05, |
|
"loss": 3.8825, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.10227272727272728, |
|
"grad_norm": 20.751232147216797, |
|
"learning_rate": 9.681818181818181e-05, |
|
"loss": 2.9331, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10454545454545454, |
|
"grad_norm": 23.918386459350586, |
|
"learning_rate": 9.674242424242425e-05, |
|
"loss": 3.0365, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.10681818181818181, |
|
"grad_norm": 16.94843864440918, |
|
"learning_rate": 9.666666666666667e-05, |
|
"loss": 2.0012, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.10909090909090909, |
|
"grad_norm": 38.2060432434082, |
|
"learning_rate": 9.65909090909091e-05, |
|
"loss": 2.4087, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.11136363636363636, |
|
"grad_norm": 15.836068153381348, |
|
"learning_rate": 9.651515151515152e-05, |
|
"loss": 2.7204, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.11363636363636363, |
|
"grad_norm": 20.13130760192871, |
|
"learning_rate": 9.643939393939394e-05, |
|
"loss": 1.8803, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1159090909090909, |
|
"grad_norm": 21.58964729309082, |
|
"learning_rate": 9.636363636363637e-05, |
|
"loss": 2.2448, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.11818181818181818, |
|
"grad_norm": 15.996927261352539, |
|
"learning_rate": 9.628787878787879e-05, |
|
"loss": 2.456, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.12045454545454545, |
|
"grad_norm": 15.738017082214355, |
|
"learning_rate": 9.621212121212123e-05, |
|
"loss": 2.0494, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.12272727272727273, |
|
"grad_norm": 20.54029655456543, |
|
"learning_rate": 9.613636363636363e-05, |
|
"loss": 2.8584, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 20.11783790588379, |
|
"learning_rate": 9.606060606060606e-05, |
|
"loss": 2.9836, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12727272727272726, |
|
"grad_norm": 15.297281265258789, |
|
"learning_rate": 9.598484848484848e-05, |
|
"loss": 1.8828, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.12954545454545455, |
|
"grad_norm": 15.26744270324707, |
|
"learning_rate": 9.590909090909092e-05, |
|
"loss": 1.2548, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1318181818181818, |
|
"grad_norm": 18.839954376220703, |
|
"learning_rate": 9.583333333333334e-05, |
|
"loss": 3.7553, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.1340909090909091, |
|
"grad_norm": 17.30214500427246, |
|
"learning_rate": 9.575757575757576e-05, |
|
"loss": 2.2297, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.13636363636363635, |
|
"grad_norm": 25.153942108154297, |
|
"learning_rate": 9.568181818181819e-05, |
|
"loss": 2.6817, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13863636363636364, |
|
"grad_norm": 17.55406379699707, |
|
"learning_rate": 9.560606060606061e-05, |
|
"loss": 2.8551, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.1409090909090909, |
|
"grad_norm": NaN, |
|
"learning_rate": 9.560606060606061e-05, |
|
"loss": 2.4352, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1431818181818182, |
|
"grad_norm": 18.4881649017334, |
|
"learning_rate": 9.553030303030304e-05, |
|
"loss": 2.1839, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.14545454545454545, |
|
"grad_norm": 15.114643096923828, |
|
"learning_rate": 9.545454545454546e-05, |
|
"loss": 1.768, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.14772727272727273, |
|
"grad_norm": 17.272735595703125, |
|
"learning_rate": 9.537878787878788e-05, |
|
"loss": 2.4241, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 18.25682258605957, |
|
"learning_rate": 9.53030303030303e-05, |
|
"loss": 1.8703, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.15227272727272728, |
|
"grad_norm": 20.255084991455078, |
|
"learning_rate": 9.522727272727273e-05, |
|
"loss": 2.3706, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.15454545454545454, |
|
"grad_norm": 16.153093338012695, |
|
"learning_rate": 9.515151515151515e-05, |
|
"loss": 2.3896, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.15681818181818183, |
|
"grad_norm": 14.229001998901367, |
|
"learning_rate": 9.507575757575759e-05, |
|
"loss": 2.5261, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1590909090909091, |
|
"grad_norm": 14.036202430725098, |
|
"learning_rate": 9.5e-05, |
|
"loss": 1.8918, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16136363636363638, |
|
"grad_norm": 16.262582778930664, |
|
"learning_rate": 9.492424242424242e-05, |
|
"loss": 2.7854, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.16363636363636364, |
|
"grad_norm": 17.119918823242188, |
|
"learning_rate": 9.484848484848486e-05, |
|
"loss": 2.1371, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.16590909090909092, |
|
"grad_norm": 19.72575569152832, |
|
"learning_rate": 9.477272727272728e-05, |
|
"loss": 2.6801, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.16818181818181818, |
|
"grad_norm": 17.036550521850586, |
|
"learning_rate": 9.469696969696971e-05, |
|
"loss": 2.6403, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.17045454545454544, |
|
"grad_norm": 14.31810188293457, |
|
"learning_rate": 9.462121212121212e-05, |
|
"loss": 1.9865, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17272727272727273, |
|
"grad_norm": 18.39834213256836, |
|
"learning_rate": 9.454545454545455e-05, |
|
"loss": 2.418, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.175, |
|
"grad_norm": 18.37046241760254, |
|
"learning_rate": 9.446969696969697e-05, |
|
"loss": 2.2905, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.17727272727272728, |
|
"grad_norm": 14.999472618103027, |
|
"learning_rate": 9.43939393939394e-05, |
|
"loss": 2.2521, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.17954545454545454, |
|
"grad_norm": 11.88487434387207, |
|
"learning_rate": 9.431818181818182e-05, |
|
"loss": 2.2871, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.18181818181818182, |
|
"grad_norm": 21.745532989501953, |
|
"learning_rate": 9.424242424242424e-05, |
|
"loss": 2.6415, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18409090909090908, |
|
"grad_norm": 13.109172821044922, |
|
"learning_rate": 9.416666666666667e-05, |
|
"loss": 1.9554, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.18636363636363637, |
|
"grad_norm": 17.222652435302734, |
|
"learning_rate": 9.40909090909091e-05, |
|
"loss": 2.0914, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.18863636363636363, |
|
"grad_norm": 17.833839416503906, |
|
"learning_rate": 9.401515151515153e-05, |
|
"loss": 1.6032, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.19090909090909092, |
|
"grad_norm": 22.737525939941406, |
|
"learning_rate": 9.393939393939395e-05, |
|
"loss": 3.5915, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.19318181818181818, |
|
"grad_norm": 14.926959037780762, |
|
"learning_rate": 9.386363636363637e-05, |
|
"loss": 2.2499, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19545454545454546, |
|
"grad_norm": 13.586040496826172, |
|
"learning_rate": 9.378787878787879e-05, |
|
"loss": 1.8228, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.19772727272727272, |
|
"grad_norm": 19.175617218017578, |
|
"learning_rate": 9.371212121212122e-05, |
|
"loss": 2.7846, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 21.078235626220703, |
|
"learning_rate": 9.363636363636364e-05, |
|
"loss": 2.7906, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.20227272727272727, |
|
"grad_norm": 17.618940353393555, |
|
"learning_rate": 9.356060606060606e-05, |
|
"loss": 2.3022, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.20454545454545456, |
|
"grad_norm": 16.79983139038086, |
|
"learning_rate": 9.348484848484849e-05, |
|
"loss": 1.8126, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20681818181818182, |
|
"grad_norm": 20.444580078125, |
|
"learning_rate": 9.340909090909091e-05, |
|
"loss": 2.055, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.20909090909090908, |
|
"grad_norm": 18.694856643676758, |
|
"learning_rate": 9.333333333333334e-05, |
|
"loss": 2.6534, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.21136363636363636, |
|
"grad_norm": 11.254834175109863, |
|
"learning_rate": 9.325757575757576e-05, |
|
"loss": 1.6695, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.21363636363636362, |
|
"grad_norm": 14.369203567504883, |
|
"learning_rate": 9.318181818181818e-05, |
|
"loss": 2.3469, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.2159090909090909, |
|
"grad_norm": 17.27039909362793, |
|
"learning_rate": 9.31060606060606e-05, |
|
"loss": 1.9188, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21818181818181817, |
|
"grad_norm": 12.644415855407715, |
|
"learning_rate": 9.303030303030303e-05, |
|
"loss": 1.3295, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.22045454545454546, |
|
"grad_norm": 20.46677589416504, |
|
"learning_rate": 9.295454545454545e-05, |
|
"loss": 2.4697, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.22272727272727272, |
|
"grad_norm": 15.218058586120605, |
|
"learning_rate": 9.287878787878789e-05, |
|
"loss": 2.4472, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.225, |
|
"grad_norm": 14.982362747192383, |
|
"learning_rate": 9.280303030303031e-05, |
|
"loss": 1.881, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 20.168306350708008, |
|
"learning_rate": 9.272727272727273e-05, |
|
"loss": 1.6077, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22954545454545455, |
|
"grad_norm": 13.462889671325684, |
|
"learning_rate": 9.265151515151516e-05, |
|
"loss": 1.6057, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2318181818181818, |
|
"grad_norm": 12.3695068359375, |
|
"learning_rate": 9.257575757575758e-05, |
|
"loss": 2.0871, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2340909090909091, |
|
"grad_norm": 15.381841659545898, |
|
"learning_rate": 9.250000000000001e-05, |
|
"loss": 2.0592, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.23636363636363636, |
|
"grad_norm": 18.213014602661133, |
|
"learning_rate": 9.242424242424242e-05, |
|
"loss": 2.2397, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.23863636363636365, |
|
"grad_norm": 19.589962005615234, |
|
"learning_rate": 9.234848484848485e-05, |
|
"loss": 2.8305, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2409090909090909, |
|
"grad_norm": 21.765127182006836, |
|
"learning_rate": 9.227272727272727e-05, |
|
"loss": 1.8691, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2431818181818182, |
|
"grad_norm": 21.66250228881836, |
|
"learning_rate": 9.21969696969697e-05, |
|
"loss": 2.7176, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.24545454545454545, |
|
"grad_norm": 16.438037872314453, |
|
"learning_rate": 9.212121212121214e-05, |
|
"loss": 3.0262, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.24772727272727274, |
|
"grad_norm": 18.32391357421875, |
|
"learning_rate": 9.204545454545454e-05, |
|
"loss": 2.4011, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 18.3424015045166, |
|
"learning_rate": 9.196969696969698e-05, |
|
"loss": 3.3481, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25227272727272726, |
|
"grad_norm": 12.168206214904785, |
|
"learning_rate": 9.18939393939394e-05, |
|
"loss": 1.5084, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2545454545454545, |
|
"grad_norm": 16.183521270751953, |
|
"learning_rate": 9.181818181818183e-05, |
|
"loss": 3.3444, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.25681818181818183, |
|
"grad_norm": 17.887187957763672, |
|
"learning_rate": 9.174242424242425e-05, |
|
"loss": 2.4529, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2590909090909091, |
|
"grad_norm": 18.000579833984375, |
|
"learning_rate": 9.166666666666667e-05, |
|
"loss": 2.3228, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.26136363636363635, |
|
"grad_norm": 15.579062461853027, |
|
"learning_rate": 9.159090909090909e-05, |
|
"loss": 3.2008, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2636363636363636, |
|
"grad_norm": 14.111518859863281, |
|
"learning_rate": 9.151515151515152e-05, |
|
"loss": 2.2286, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.26590909090909093, |
|
"grad_norm": 13.755249977111816, |
|
"learning_rate": 9.143939393939395e-05, |
|
"loss": 1.9561, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.2681818181818182, |
|
"grad_norm": 14.665258407592773, |
|
"learning_rate": 9.136363636363637e-05, |
|
"loss": 2.5016, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.27045454545454545, |
|
"grad_norm": 14.470067024230957, |
|
"learning_rate": 9.128787878787879e-05, |
|
"loss": 2.3301, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.2727272727272727, |
|
"grad_norm": 15.108169555664062, |
|
"learning_rate": 9.121212121212121e-05, |
|
"loss": 2.6079, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.275, |
|
"grad_norm": 15.080549240112305, |
|
"learning_rate": 9.113636363636365e-05, |
|
"loss": 2.6349, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.2772727272727273, |
|
"grad_norm": 17.71773910522461, |
|
"learning_rate": 9.106060606060606e-05, |
|
"loss": 1.9447, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.27954545454545454, |
|
"grad_norm": 11.128664016723633, |
|
"learning_rate": 9.098484848484848e-05, |
|
"loss": 2.2076, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.2818181818181818, |
|
"grad_norm": 19.131866455078125, |
|
"learning_rate": 9.090909090909092e-05, |
|
"loss": 1.5932, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.2840909090909091, |
|
"grad_norm": 11.3361177444458, |
|
"learning_rate": 9.083333333333334e-05, |
|
"loss": 2.5923, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2863636363636364, |
|
"grad_norm": 16.97115707397461, |
|
"learning_rate": 9.075757575757577e-05, |
|
"loss": 1.828, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.28863636363636364, |
|
"grad_norm": 11.52206802368164, |
|
"learning_rate": 9.068181818181819e-05, |
|
"loss": 2.3389, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.2909090909090909, |
|
"grad_norm": 18.27076530456543, |
|
"learning_rate": 9.060606060606061e-05, |
|
"loss": 3.1892, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.29318181818181815, |
|
"grad_norm": 15.098003387451172, |
|
"learning_rate": 9.053030303030303e-05, |
|
"loss": 2.3429, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.29545454545454547, |
|
"grad_norm": 13.432772636413574, |
|
"learning_rate": 9.045454545454546e-05, |
|
"loss": 1.7032, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29772727272727273, |
|
"grad_norm": 21.96811866760254, |
|
"learning_rate": 9.037878787878788e-05, |
|
"loss": 3.3135, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 17.522789001464844, |
|
"learning_rate": 9.030303030303031e-05, |
|
"loss": 2.0827, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.30227272727272725, |
|
"grad_norm": 16.18021011352539, |
|
"learning_rate": 9.022727272727273e-05, |
|
"loss": 2.6956, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.30454545454545456, |
|
"grad_norm": 17.834138870239258, |
|
"learning_rate": 9.015151515151515e-05, |
|
"loss": 2.3929, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3068181818181818, |
|
"grad_norm": 18.146596908569336, |
|
"learning_rate": 9.007575757575759e-05, |
|
"loss": 3.0074, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3090909090909091, |
|
"grad_norm": 11.941591262817383, |
|
"learning_rate": 9e-05, |
|
"loss": 1.6793, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.31136363636363634, |
|
"grad_norm": 15.524669647216797, |
|
"learning_rate": 8.992424242424244e-05, |
|
"loss": 2.3193, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.31363636363636366, |
|
"grad_norm": 17.986879348754883, |
|
"learning_rate": 8.984848484848484e-05, |
|
"loss": 3.1335, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3159090909090909, |
|
"grad_norm": 19.568361282348633, |
|
"learning_rate": 8.977272727272728e-05, |
|
"loss": 2.6232, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3181818181818182, |
|
"grad_norm": 15.213788986206055, |
|
"learning_rate": 8.96969696969697e-05, |
|
"loss": 1.6936, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.32045454545454544, |
|
"grad_norm": 16.093795776367188, |
|
"learning_rate": 8.962121212121213e-05, |
|
"loss": 2.38, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.32272727272727275, |
|
"grad_norm": 17.010087966918945, |
|
"learning_rate": 8.954545454545455e-05, |
|
"loss": 2.0467, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.325, |
|
"grad_norm": 20.31732749938965, |
|
"learning_rate": 8.946969696969697e-05, |
|
"loss": 2.062, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.32727272727272727, |
|
"grad_norm": 15.800658226013184, |
|
"learning_rate": 8.93939393939394e-05, |
|
"loss": 1.4575, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.32954545454545453, |
|
"grad_norm": 15.116626739501953, |
|
"learning_rate": 8.931818181818182e-05, |
|
"loss": 2.314, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.33181818181818185, |
|
"grad_norm": 25.464197158813477, |
|
"learning_rate": 8.924242424242426e-05, |
|
"loss": 2.0073, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3340909090909091, |
|
"grad_norm": 13.291275978088379, |
|
"learning_rate": 8.916666666666667e-05, |
|
"loss": 2.151, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.33636363636363636, |
|
"grad_norm": 13.530828475952148, |
|
"learning_rate": 8.90909090909091e-05, |
|
"loss": 2.3051, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3386363636363636, |
|
"grad_norm": 15.941877365112305, |
|
"learning_rate": 8.901515151515151e-05, |
|
"loss": 2.6671, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3409090909090909, |
|
"grad_norm": 16.19255828857422, |
|
"learning_rate": 8.893939393939395e-05, |
|
"loss": 2.4137, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3431818181818182, |
|
"grad_norm": 25.39113998413086, |
|
"learning_rate": 8.886363636363637e-05, |
|
"loss": 3.1836, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.34545454545454546, |
|
"grad_norm": 14.128908157348633, |
|
"learning_rate": 8.87878787878788e-05, |
|
"loss": 2.4864, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3477272727272727, |
|
"grad_norm": 14.206392288208008, |
|
"learning_rate": 8.871212121212122e-05, |
|
"loss": 1.3842, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 11.746234893798828, |
|
"learning_rate": 8.863636363636364e-05, |
|
"loss": 1.69, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.3522727272727273, |
|
"grad_norm": 14.249229431152344, |
|
"learning_rate": 8.856060606060607e-05, |
|
"loss": 2.962, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.35454545454545455, |
|
"grad_norm": 13.884110450744629, |
|
"learning_rate": 8.848484848484849e-05, |
|
"loss": 1.9429, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.3568181818181818, |
|
"grad_norm": 15.577651023864746, |
|
"learning_rate": 8.840909090909091e-05, |
|
"loss": 2.0814, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.35909090909090907, |
|
"grad_norm": 13.055503845214844, |
|
"learning_rate": 8.833333333333333e-05, |
|
"loss": 2.286, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.3613636363636364, |
|
"grad_norm": 14.148711204528809, |
|
"learning_rate": 8.825757575757576e-05, |
|
"loss": 1.7243, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.36363636363636365, |
|
"grad_norm": 18.32880210876465, |
|
"learning_rate": 8.818181818181818e-05, |
|
"loss": 2.0912, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3659090909090909, |
|
"grad_norm": 19.306982040405273, |
|
"learning_rate": 8.810606060606062e-05, |
|
"loss": 2.1032, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.36818181818181817, |
|
"grad_norm": 18.99219512939453, |
|
"learning_rate": 8.803030303030304e-05, |
|
"loss": 2.3527, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.3704545454545455, |
|
"grad_norm": 14.297601699829102, |
|
"learning_rate": 8.795454545454545e-05, |
|
"loss": 2.8786, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.37272727272727274, |
|
"grad_norm": 19.273303985595703, |
|
"learning_rate": 8.787878787878789e-05, |
|
"loss": 2.4364, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 11.870357513427734, |
|
"learning_rate": 8.780303030303031e-05, |
|
"loss": 2.1716, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.37727272727272726, |
|
"grad_norm": 11.26362133026123, |
|
"learning_rate": 8.772727272727274e-05, |
|
"loss": 3.1212, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.3795454545454545, |
|
"grad_norm": 12.994135856628418, |
|
"learning_rate": 8.765151515151515e-05, |
|
"loss": 2.4722, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.38181818181818183, |
|
"grad_norm": 13.474489212036133, |
|
"learning_rate": 8.757575757575758e-05, |
|
"loss": 2.9132, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.3840909090909091, |
|
"grad_norm": 16.456457138061523, |
|
"learning_rate": 8.75e-05, |
|
"loss": 2.1006, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.38636363636363635, |
|
"grad_norm": 16.236146926879883, |
|
"learning_rate": 8.742424242424243e-05, |
|
"loss": 2.1458, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3886363636363636, |
|
"grad_norm": 13.122529983520508, |
|
"learning_rate": 8.734848484848485e-05, |
|
"loss": 2.7045, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.39090909090909093, |
|
"grad_norm": 12.385522842407227, |
|
"learning_rate": 8.727272727272727e-05, |
|
"loss": 2.2677, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.3931818181818182, |
|
"grad_norm": 14.4050931930542, |
|
"learning_rate": 8.71969696969697e-05, |
|
"loss": 1.3401, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.39545454545454545, |
|
"grad_norm": 21.25592803955078, |
|
"learning_rate": 8.712121212121212e-05, |
|
"loss": 1.8591, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.3977272727272727, |
|
"grad_norm": 13.744414329528809, |
|
"learning_rate": 8.704545454545456e-05, |
|
"loss": 1.8915, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 14.040199279785156, |
|
"learning_rate": 8.696969696969698e-05, |
|
"loss": 2.1142, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4022727272727273, |
|
"grad_norm": 13.779399871826172, |
|
"learning_rate": 8.68939393939394e-05, |
|
"loss": 1.6946, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.40454545454545454, |
|
"grad_norm": 12.878482818603516, |
|
"learning_rate": 8.681818181818182e-05, |
|
"loss": 2.0229, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4068181818181818, |
|
"grad_norm": 10.951014518737793, |
|
"learning_rate": 8.674242424242425e-05, |
|
"loss": 2.2302, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4090909090909091, |
|
"grad_norm": 15.133676528930664, |
|
"learning_rate": 8.666666666666667e-05, |
|
"loss": 1.7796, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4113636363636364, |
|
"grad_norm": 11.56503677368164, |
|
"learning_rate": 8.65909090909091e-05, |
|
"loss": 2.0587, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.41363636363636364, |
|
"grad_norm": 12.170353889465332, |
|
"learning_rate": 8.651515151515152e-05, |
|
"loss": 1.9297, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4159090909090909, |
|
"grad_norm": 14.984827995300293, |
|
"learning_rate": 8.643939393939394e-05, |
|
"loss": 1.3361, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.41818181818181815, |
|
"grad_norm": 12.686882972717285, |
|
"learning_rate": 8.636363636363637e-05, |
|
"loss": 2.3203, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.42045454545454547, |
|
"grad_norm": 19.53303337097168, |
|
"learning_rate": 8.628787878787879e-05, |
|
"loss": 2.1686, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.42272727272727273, |
|
"grad_norm": 13.246541976928711, |
|
"learning_rate": 8.621212121212121e-05, |
|
"loss": 2.154, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.425, |
|
"grad_norm": 18.38794708251953, |
|
"learning_rate": 8.613636363636363e-05, |
|
"loss": 2.3975, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.42727272727272725, |
|
"grad_norm": 19.281801223754883, |
|
"learning_rate": 8.606060606060606e-05, |
|
"loss": 3.1559, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.42954545454545456, |
|
"grad_norm": 16.43345069885254, |
|
"learning_rate": 8.598484848484848e-05, |
|
"loss": 2.4324, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4318181818181818, |
|
"grad_norm": 22.686885833740234, |
|
"learning_rate": 8.590909090909092e-05, |
|
"loss": 2.4541, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4340909090909091, |
|
"grad_norm": 16.799205780029297, |
|
"learning_rate": 8.583333333333334e-05, |
|
"loss": 1.9834, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.43636363636363634, |
|
"grad_norm": 12.861906051635742, |
|
"learning_rate": 8.575757575757576e-05, |
|
"loss": 1.4132, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.43863636363636366, |
|
"grad_norm": 14.350102424621582, |
|
"learning_rate": 8.568181818181819e-05, |
|
"loss": 2.5181, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.4409090909090909, |
|
"grad_norm": 9.91285228729248, |
|
"learning_rate": 8.560606060606061e-05, |
|
"loss": 1.1131, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.4431818181818182, |
|
"grad_norm": 12.768558502197266, |
|
"learning_rate": 8.553030303030304e-05, |
|
"loss": 1.6889, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.44545454545454544, |
|
"grad_norm": 11.671558380126953, |
|
"learning_rate": 8.545454545454545e-05, |
|
"loss": 2.4559, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.44772727272727275, |
|
"grad_norm": 12.10418701171875, |
|
"learning_rate": 8.537878787878788e-05, |
|
"loss": 2.2951, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 12.047237396240234, |
|
"learning_rate": 8.53030303030303e-05, |
|
"loss": 1.7895, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.45227272727272727, |
|
"grad_norm": 13.83714485168457, |
|
"learning_rate": 8.522727272727273e-05, |
|
"loss": 2.1267, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 17.289377212524414, |
|
"learning_rate": 8.515151515151515e-05, |
|
"loss": 3.4595, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45681818181818185, |
|
"grad_norm": 16.056198120117188, |
|
"learning_rate": 8.507575757575757e-05, |
|
"loss": 2.2333, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.4590909090909091, |
|
"grad_norm": 12.874887466430664, |
|
"learning_rate": 8.5e-05, |
|
"loss": 2.3555, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.46136363636363636, |
|
"grad_norm": 11.859071731567383, |
|
"learning_rate": 8.492424242424243e-05, |
|
"loss": 2.0893, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.4636363636363636, |
|
"grad_norm": 11.99448013305664, |
|
"learning_rate": 8.484848484848486e-05, |
|
"loss": 2.4165, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.4659090909090909, |
|
"grad_norm": 14.352676391601562, |
|
"learning_rate": 8.477272727272728e-05, |
|
"loss": 2.58, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4681818181818182, |
|
"grad_norm": 10.942952156066895, |
|
"learning_rate": 8.46969696969697e-05, |
|
"loss": 2.1313, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.47045454545454546, |
|
"grad_norm": 13.232431411743164, |
|
"learning_rate": 8.462121212121212e-05, |
|
"loss": 2.8598, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.4727272727272727, |
|
"grad_norm": 14.74603271484375, |
|
"learning_rate": 8.454545454545455e-05, |
|
"loss": 2.5221, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.475, |
|
"grad_norm": 11.541604042053223, |
|
"learning_rate": 8.446969696969697e-05, |
|
"loss": 2.6656, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.4772727272727273, |
|
"grad_norm": 22.731273651123047, |
|
"learning_rate": 8.43939393939394e-05, |
|
"loss": 1.9391, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.47954545454545455, |
|
"grad_norm": 16.327220916748047, |
|
"learning_rate": 8.431818181818182e-05, |
|
"loss": 2.1225, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.4818181818181818, |
|
"grad_norm": 15.646464347839355, |
|
"learning_rate": 8.424242424242424e-05, |
|
"loss": 2.1468, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.48409090909090907, |
|
"grad_norm": 16.69521141052246, |
|
"learning_rate": 8.416666666666668e-05, |
|
"loss": 2.4979, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.4863636363636364, |
|
"grad_norm": 12.17435073852539, |
|
"learning_rate": 8.40909090909091e-05, |
|
"loss": 1.915, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.48863636363636365, |
|
"grad_norm": 15.295214653015137, |
|
"learning_rate": 8.401515151515153e-05, |
|
"loss": 2.6765, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4909090909090909, |
|
"grad_norm": 14.532336235046387, |
|
"learning_rate": 8.393939393939393e-05, |
|
"loss": 2.1649, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.49318181818181817, |
|
"grad_norm": 9.738990783691406, |
|
"learning_rate": 8.386363636363637e-05, |
|
"loss": 1.7751, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.4954545454545455, |
|
"grad_norm": 13.893047332763672, |
|
"learning_rate": 8.378787878787879e-05, |
|
"loss": 2.3839, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.49772727272727274, |
|
"grad_norm": 10.604107856750488, |
|
"learning_rate": 8.371212121212122e-05, |
|
"loss": 1.839, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 14.21572208404541, |
|
"learning_rate": 8.363636363636364e-05, |
|
"loss": 2.4181, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5022727272727273, |
|
"grad_norm": 12.247942924499512, |
|
"learning_rate": 8.356060606060606e-05, |
|
"loss": 1.6214, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5045454545454545, |
|
"grad_norm": 11.43807601928711, |
|
"learning_rate": 8.348484848484849e-05, |
|
"loss": 1.7002, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5068181818181818, |
|
"grad_norm": 12.532363891601562, |
|
"learning_rate": 8.340909090909091e-05, |
|
"loss": 1.6798, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.509090909090909, |
|
"grad_norm": 21.122955322265625, |
|
"learning_rate": 8.333333333333334e-05, |
|
"loss": 2.3791, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5113636363636364, |
|
"grad_norm": 15.643569946289062, |
|
"learning_rate": 8.325757575757575e-05, |
|
"loss": 2.2841, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5136363636363637, |
|
"grad_norm": 13.66476821899414, |
|
"learning_rate": 8.318181818181818e-05, |
|
"loss": 2.7105, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5159090909090909, |
|
"grad_norm": 15.538378715515137, |
|
"learning_rate": 8.310606060606062e-05, |
|
"loss": 2.5573, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5181818181818182, |
|
"grad_norm": 14.432341575622559, |
|
"learning_rate": 8.303030303030304e-05, |
|
"loss": 1.6926, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5204545454545455, |
|
"grad_norm": 14.326302528381348, |
|
"learning_rate": 8.295454545454547e-05, |
|
"loss": 1.9976, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5227272727272727, |
|
"grad_norm": 16.38084602355957, |
|
"learning_rate": 8.287878787878787e-05, |
|
"loss": 2.8438, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.525, |
|
"grad_norm": 14.56826114654541, |
|
"learning_rate": 8.280303030303031e-05, |
|
"loss": 2.3643, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.5272727272727272, |
|
"grad_norm": 10.183893203735352, |
|
"learning_rate": 8.272727272727273e-05, |
|
"loss": 1.9476, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.5295454545454545, |
|
"grad_norm": 15.575922012329102, |
|
"learning_rate": 8.265151515151516e-05, |
|
"loss": 2.3493, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.5318181818181819, |
|
"grad_norm": 12.653141021728516, |
|
"learning_rate": 8.257575757575758e-05, |
|
"loss": 2.0519, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.5340909090909091, |
|
"grad_norm": 12.279047966003418, |
|
"learning_rate": 8.25e-05, |
|
"loss": 2.0694, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5363636363636364, |
|
"grad_norm": 12.395997047424316, |
|
"learning_rate": 8.242424242424243e-05, |
|
"loss": 2.1307, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.5386363636363637, |
|
"grad_norm": 10.851142883300781, |
|
"learning_rate": 8.234848484848485e-05, |
|
"loss": 1.9883, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.5409090909090909, |
|
"grad_norm": 14.103243827819824, |
|
"learning_rate": 8.227272727272729e-05, |
|
"loss": 2.6901, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.5431818181818182, |
|
"grad_norm": 9.63924789428711, |
|
"learning_rate": 8.21969696969697e-05, |
|
"loss": 1.2228, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.5454545454545454, |
|
"grad_norm": 13.430061340332031, |
|
"learning_rate": 8.212121212121212e-05, |
|
"loss": 1.7877, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5477272727272727, |
|
"grad_norm": 15.428567886352539, |
|
"learning_rate": 8.204545454545454e-05, |
|
"loss": 2.0201, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 15.405593872070312, |
|
"learning_rate": 8.196969696969698e-05, |
|
"loss": 2.8325, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.5522727272727272, |
|
"grad_norm": 22.855867385864258, |
|
"learning_rate": 8.18939393939394e-05, |
|
"loss": 3.045, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.5545454545454546, |
|
"grad_norm": 14.374544143676758, |
|
"learning_rate": 8.181818181818183e-05, |
|
"loss": 2.0002, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.5568181818181818, |
|
"grad_norm": 13.37702465057373, |
|
"learning_rate": 8.174242424242425e-05, |
|
"loss": 1.6496, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5590909090909091, |
|
"grad_norm": 13.321274757385254, |
|
"learning_rate": 8.166666666666667e-05, |
|
"loss": 1.9746, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.5613636363636364, |
|
"grad_norm": 13.79466438293457, |
|
"learning_rate": 8.15909090909091e-05, |
|
"loss": 2.0699, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.5636363636363636, |
|
"grad_norm": 12.355722427368164, |
|
"learning_rate": 8.151515151515152e-05, |
|
"loss": 2.2207, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.5659090909090909, |
|
"grad_norm": 14.220561981201172, |
|
"learning_rate": 8.143939393939395e-05, |
|
"loss": 2.1695, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"grad_norm": 12.587940216064453, |
|
"learning_rate": 8.136363636363636e-05, |
|
"loss": 1.8604, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5704545454545454, |
|
"grad_norm": 9.54430103302002, |
|
"learning_rate": 8.12878787878788e-05, |
|
"loss": 1.6446, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.5727272727272728, |
|
"grad_norm": 14.440407752990723, |
|
"learning_rate": 8.121212121212121e-05, |
|
"loss": 2.4646, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.575, |
|
"grad_norm": 14.50412368774414, |
|
"learning_rate": 8.113636363636365e-05, |
|
"loss": 1.5263, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.5772727272727273, |
|
"grad_norm": 18.535612106323242, |
|
"learning_rate": 8.106060606060607e-05, |
|
"loss": 2.7942, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.5795454545454546, |
|
"grad_norm": 11.250702857971191, |
|
"learning_rate": 8.098484848484848e-05, |
|
"loss": 1.5575, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5818181818181818, |
|
"grad_norm": 12.534632682800293, |
|
"learning_rate": 8.090909090909092e-05, |
|
"loss": 1.9031, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.5840909090909091, |
|
"grad_norm": 14.82848834991455, |
|
"learning_rate": 8.083333333333334e-05, |
|
"loss": 1.4666, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.5863636363636363, |
|
"grad_norm": 15.74230670928955, |
|
"learning_rate": 8.075757575757577e-05, |
|
"loss": 2.3956, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.5886363636363636, |
|
"grad_norm": 13.576948165893555, |
|
"learning_rate": 8.068181818181818e-05, |
|
"loss": 1.9797, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.5909090909090909, |
|
"grad_norm": 12.77927303314209, |
|
"learning_rate": 8.060606060606061e-05, |
|
"loss": 2.0894, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5931818181818181, |
|
"grad_norm": 17.75493621826172, |
|
"learning_rate": 8.053030303030303e-05, |
|
"loss": 2.6691, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.5954545454545455, |
|
"grad_norm": 12.445291519165039, |
|
"learning_rate": 8.045454545454546e-05, |
|
"loss": 1.9188, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.5977272727272728, |
|
"grad_norm": 12.350727081298828, |
|
"learning_rate": 8.037878787878788e-05, |
|
"loss": 1.9648, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 10.37759780883789, |
|
"learning_rate": 8.03030303030303e-05, |
|
"loss": 1.5221, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6022727272727273, |
|
"grad_norm": 13.281451225280762, |
|
"learning_rate": 8.022727272727273e-05, |
|
"loss": 3.2337, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6045454545454545, |
|
"grad_norm": 11.684523582458496, |
|
"learning_rate": 8.015151515151515e-05, |
|
"loss": 1.7641, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6068181818181818, |
|
"grad_norm": 15.161863327026367, |
|
"learning_rate": 8.007575757575759e-05, |
|
"loss": 3.5694, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6090909090909091, |
|
"grad_norm": 13.221097946166992, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5334, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6113636363636363, |
|
"grad_norm": 15.834603309631348, |
|
"learning_rate": 7.992424242424243e-05, |
|
"loss": 2.5292, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.6136363636363636, |
|
"grad_norm": 15.016695976257324, |
|
"learning_rate": 7.984848484848485e-05, |
|
"loss": 1.9177, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.615909090909091, |
|
"grad_norm": 18.896211624145508, |
|
"learning_rate": 7.977272727272728e-05, |
|
"loss": 2.2495, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.6181818181818182, |
|
"grad_norm": 17.597623825073242, |
|
"learning_rate": 7.96969696969697e-05, |
|
"loss": 2.1252, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.6204545454545455, |
|
"grad_norm": 14.346769332885742, |
|
"learning_rate": 7.962121212121213e-05, |
|
"loss": 2.0273, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.6227272727272727, |
|
"grad_norm": 13.852729797363281, |
|
"learning_rate": 7.954545454545455e-05, |
|
"loss": 2.7319, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 12.906790733337402, |
|
"learning_rate": 7.946969696969697e-05, |
|
"loss": 1.6674, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.6272727272727273, |
|
"grad_norm": 10.031960487365723, |
|
"learning_rate": 7.93939393939394e-05, |
|
"loss": 1.5017, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.6295454545454545, |
|
"grad_norm": 12.02971363067627, |
|
"learning_rate": 7.931818181818182e-05, |
|
"loss": 2.1617, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.6318181818181818, |
|
"grad_norm": 12.239229202270508, |
|
"learning_rate": 7.924242424242426e-05, |
|
"loss": 1.285, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.634090909090909, |
|
"grad_norm": 12.207528114318848, |
|
"learning_rate": 7.916666666666666e-05, |
|
"loss": 1.4661, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.6363636363636364, |
|
"grad_norm": 21.659215927124023, |
|
"learning_rate": 7.90909090909091e-05, |
|
"loss": 1.8808, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6386363636363637, |
|
"grad_norm": 14.419612884521484, |
|
"learning_rate": 7.901515151515151e-05, |
|
"loss": 2.6502, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.6409090909090909, |
|
"grad_norm": NaN, |
|
"learning_rate": 7.901515151515151e-05, |
|
"loss": 0.0, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.6431818181818182, |
|
"grad_norm": 11.444130897521973, |
|
"learning_rate": 7.893939393939395e-05, |
|
"loss": 1.5987, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.6454545454545455, |
|
"grad_norm": 10.316890716552734, |
|
"learning_rate": 7.886363636363637e-05, |
|
"loss": 1.5173, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.6477272727272727, |
|
"grad_norm": 13.772204399108887, |
|
"learning_rate": 7.878787878787879e-05, |
|
"loss": 3.0357, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 12.452784538269043, |
|
"learning_rate": 7.871212121212122e-05, |
|
"loss": 2.2077, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.6522727272727272, |
|
"grad_norm": 15.323153495788574, |
|
"learning_rate": 7.863636363636364e-05, |
|
"loss": 1.8941, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.6545454545454545, |
|
"grad_norm": 10.558858871459961, |
|
"learning_rate": 7.856060606060607e-05, |
|
"loss": 1.5262, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.6568181818181819, |
|
"grad_norm": 15.232844352722168, |
|
"learning_rate": 7.848484848484848e-05, |
|
"loss": 3.1486, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.6590909090909091, |
|
"grad_norm": 11.309487342834473, |
|
"learning_rate": 7.840909090909091e-05, |
|
"loss": 1.8324, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6613636363636364, |
|
"grad_norm": 11.427604675292969, |
|
"learning_rate": 7.833333333333333e-05, |
|
"loss": 1.0609, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.6636363636363637, |
|
"grad_norm": 15.115833282470703, |
|
"learning_rate": 7.825757575757576e-05, |
|
"loss": 2.8888, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.6659090909090909, |
|
"grad_norm": 14.701318740844727, |
|
"learning_rate": 7.818181818181818e-05, |
|
"loss": 2.823, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.6681818181818182, |
|
"grad_norm": 10.650053024291992, |
|
"learning_rate": 7.81060606060606e-05, |
|
"loss": 1.8724, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.6704545454545454, |
|
"grad_norm": 12.72999382019043, |
|
"learning_rate": 7.803030303030304e-05, |
|
"loss": 1.9267, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6727272727272727, |
|
"grad_norm": 16.98598861694336, |
|
"learning_rate": 7.795454545454546e-05, |
|
"loss": 2.325, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.675, |
|
"grad_norm": 12.848193168640137, |
|
"learning_rate": 7.787878787878789e-05, |
|
"loss": 3.1965, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.6772727272727272, |
|
"grad_norm": 8.765904426574707, |
|
"learning_rate": 7.780303030303031e-05, |
|
"loss": 1.8081, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.6795454545454546, |
|
"grad_norm": 14.633967399597168, |
|
"learning_rate": 7.772727272727273e-05, |
|
"loss": 1.8056, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 9.972925186157227, |
|
"learning_rate": 7.765151515151515e-05, |
|
"loss": 1.8835, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6840909090909091, |
|
"grad_norm": 11.186135292053223, |
|
"learning_rate": 7.757575757575758e-05, |
|
"loss": 1.6734, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.6863636363636364, |
|
"grad_norm": 15.052450180053711, |
|
"learning_rate": 7.75e-05, |
|
"loss": 2.2574, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.6886363636363636, |
|
"grad_norm": 12.664848327636719, |
|
"learning_rate": 7.742424242424243e-05, |
|
"loss": 1.5916, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.6909090909090909, |
|
"grad_norm": 14.287535667419434, |
|
"learning_rate": 7.734848484848485e-05, |
|
"loss": 1.8552, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.6931818181818182, |
|
"grad_norm": 14.354594230651855, |
|
"learning_rate": 7.727272727272727e-05, |
|
"loss": 3.0925, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6954545454545454, |
|
"grad_norm": 12.003613471984863, |
|
"learning_rate": 7.71969696969697e-05, |
|
"loss": 1.6642, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.6977272727272728, |
|
"grad_norm": 11.559938430786133, |
|
"learning_rate": 7.712121212121212e-05, |
|
"loss": 1.5997, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.42446517944336, |
|
"learning_rate": 7.704545454545456e-05, |
|
"loss": 1.7934, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.7022727272727273, |
|
"grad_norm": 11.831766128540039, |
|
"learning_rate": 7.696969696969696e-05, |
|
"loss": 1.7729, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.7045454545454546, |
|
"grad_norm": 11.884734153747559, |
|
"learning_rate": 7.68939393939394e-05, |
|
"loss": 1.9489, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7068181818181818, |
|
"grad_norm": 15.816669464111328, |
|
"learning_rate": 7.681818181818182e-05, |
|
"loss": 2.4105, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.7090909090909091, |
|
"grad_norm": 12.010058403015137, |
|
"learning_rate": 7.674242424242425e-05, |
|
"loss": 1.9247, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.7113636363636363, |
|
"grad_norm": 9.436304092407227, |
|
"learning_rate": 7.666666666666667e-05, |
|
"loss": 1.9038, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.7136363636363636, |
|
"grad_norm": 9.153775215148926, |
|
"learning_rate": 7.659090909090909e-05, |
|
"loss": 1.241, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.7159090909090909, |
|
"grad_norm": 13.067652702331543, |
|
"learning_rate": 7.651515151515152e-05, |
|
"loss": 2.7662, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.7181818181818181, |
|
"grad_norm": 16.106948852539062, |
|
"learning_rate": 7.643939393939394e-05, |
|
"loss": 2.0783, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.7204545454545455, |
|
"grad_norm": 13.585596084594727, |
|
"learning_rate": 7.636363636363637e-05, |
|
"loss": 1.919, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.7227272727272728, |
|
"grad_norm": 13.833767890930176, |
|
"learning_rate": 7.62878787878788e-05, |
|
"loss": 1.1069, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.725, |
|
"grad_norm": 12.201956748962402, |
|
"learning_rate": 7.621212121212121e-05, |
|
"loss": 1.9548, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.7272727272727273, |
|
"grad_norm": 15.562934875488281, |
|
"learning_rate": 7.613636363636363e-05, |
|
"loss": 1.9211, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7295454545454545, |
|
"grad_norm": 14.389630317687988, |
|
"learning_rate": 7.606060606060607e-05, |
|
"loss": 1.821, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.7318181818181818, |
|
"grad_norm": 14.584891319274902, |
|
"learning_rate": 7.598484848484849e-05, |
|
"loss": 2.5068, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.7340909090909091, |
|
"grad_norm": 14.5166654586792, |
|
"learning_rate": 7.59090909090909e-05, |
|
"loss": 1.9124, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.7363636363636363, |
|
"grad_norm": 46.67388916015625, |
|
"learning_rate": 7.583333333333334e-05, |
|
"loss": 1.6895, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.7386363636363636, |
|
"grad_norm": 12.92702865600586, |
|
"learning_rate": 7.575757575757576e-05, |
|
"loss": 1.7526, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.740909090909091, |
|
"grad_norm": 8.52035140991211, |
|
"learning_rate": 7.568181818181819e-05, |
|
"loss": 1.4144, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.7431818181818182, |
|
"grad_norm": 13.630702018737793, |
|
"learning_rate": 7.560606060606061e-05, |
|
"loss": 2.2018, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.7454545454545455, |
|
"grad_norm": 14.379950523376465, |
|
"learning_rate": 7.553030303030303e-05, |
|
"loss": 2.8618, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.7477272727272727, |
|
"grad_norm": 14.78795051574707, |
|
"learning_rate": 7.545454545454545e-05, |
|
"loss": 1.9749, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.462140083312988, |
|
"learning_rate": 7.537878787878788e-05, |
|
"loss": 2.3666, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7522727272727273, |
|
"grad_norm": 11.336270332336426, |
|
"learning_rate": 7.530303030303032e-05, |
|
"loss": 1.4712, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.7545454545454545, |
|
"grad_norm": 17.15682029724121, |
|
"learning_rate": 7.522727272727273e-05, |
|
"loss": 3.2442, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.7568181818181818, |
|
"grad_norm": 14.129326820373535, |
|
"learning_rate": 7.515151515151515e-05, |
|
"loss": 1.9768, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.759090909090909, |
|
"grad_norm": 14.239521026611328, |
|
"learning_rate": 7.507575757575757e-05, |
|
"loss": 1.9933, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.7613636363636364, |
|
"grad_norm": 10.573707580566406, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.3049, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7636363636363637, |
|
"grad_norm": 15.881331443786621, |
|
"learning_rate": 7.492424242424243e-05, |
|
"loss": 2.7249, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.7659090909090909, |
|
"grad_norm": 11.606864929199219, |
|
"learning_rate": 7.484848484848486e-05, |
|
"loss": 1.4883, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.7681818181818182, |
|
"grad_norm": 8.834245681762695, |
|
"learning_rate": 7.477272727272727e-05, |
|
"loss": 1.3757, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.7704545454545455, |
|
"grad_norm": 10.011686325073242, |
|
"learning_rate": 7.46969696969697e-05, |
|
"loss": 1.4306, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.7727272727272727, |
|
"grad_norm": 13.084802627563477, |
|
"learning_rate": 7.462121212121213e-05, |
|
"loss": 2.1676, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.775, |
|
"grad_norm": 12.480827331542969, |
|
"learning_rate": 7.454545454545455e-05, |
|
"loss": 2.2564, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.7772727272727272, |
|
"grad_norm": 12.32083797454834, |
|
"learning_rate": 7.446969696969698e-05, |
|
"loss": 1.4576, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.7795454545454545, |
|
"grad_norm": 13.759376525878906, |
|
"learning_rate": 7.439393939393939e-05, |
|
"loss": 2.5308, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.7818181818181819, |
|
"grad_norm": 17.70578384399414, |
|
"learning_rate": 7.431818181818182e-05, |
|
"loss": 3.0816, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.7840909090909091, |
|
"grad_norm": 13.809745788574219, |
|
"learning_rate": 7.424242424242424e-05, |
|
"loss": 2.6903, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7863636363636364, |
|
"grad_norm": 13.484768867492676, |
|
"learning_rate": 7.416666666666668e-05, |
|
"loss": 1.6094, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.7886363636363637, |
|
"grad_norm": 10.424938201904297, |
|
"learning_rate": 7.40909090909091e-05, |
|
"loss": 1.3566, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.7909090909090909, |
|
"grad_norm": 15.058128356933594, |
|
"learning_rate": 7.401515151515152e-05, |
|
"loss": 1.945, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.7931818181818182, |
|
"grad_norm": 11.48098373413086, |
|
"learning_rate": 7.393939393939395e-05, |
|
"loss": 2.9329, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.7954545454545454, |
|
"grad_norm": 15.027339935302734, |
|
"learning_rate": 7.386363636363637e-05, |
|
"loss": 3.3324, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7977272727272727, |
|
"grad_norm": 12.786996841430664, |
|
"learning_rate": 7.37878787878788e-05, |
|
"loss": 2.7898, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 14.68897819519043, |
|
"learning_rate": 7.37121212121212e-05, |
|
"loss": 2.1318, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.8022727272727272, |
|
"grad_norm": 15.081788063049316, |
|
"learning_rate": 7.363636363636364e-05, |
|
"loss": 2.544, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.8045454545454546, |
|
"grad_norm": 13.604434967041016, |
|
"learning_rate": 7.356060606060606e-05, |
|
"loss": 3.242, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.8068181818181818, |
|
"grad_norm": 10.167998313903809, |
|
"learning_rate": 7.348484848484849e-05, |
|
"loss": 1.7378, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.8090909090909091, |
|
"grad_norm": 11.878591537475586, |
|
"learning_rate": 7.340909090909091e-05, |
|
"loss": 1.9651, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.8113636363636364, |
|
"grad_norm": 10.606021881103516, |
|
"learning_rate": 7.333333333333333e-05, |
|
"loss": 1.6922, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.8136363636363636, |
|
"grad_norm": 36.99083709716797, |
|
"learning_rate": 7.325757575757576e-05, |
|
"loss": 2.7004, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.8159090909090909, |
|
"grad_norm": 12.748845100402832, |
|
"learning_rate": 7.318181818181818e-05, |
|
"loss": 2.0722, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.8181818181818182, |
|
"grad_norm": 13.374279975891113, |
|
"learning_rate": 7.310606060606062e-05, |
|
"loss": 2.3361, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8204545454545454, |
|
"grad_norm": 10.289033889770508, |
|
"learning_rate": 7.303030303030304e-05, |
|
"loss": 1.6377, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.8227272727272728, |
|
"grad_norm": 10.585772514343262, |
|
"learning_rate": 7.295454545454546e-05, |
|
"loss": 1.6941, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.825, |
|
"grad_norm": 13.439225196838379, |
|
"learning_rate": 7.287878787878788e-05, |
|
"loss": 1.9242, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.8272727272727273, |
|
"grad_norm": 12.649117469787598, |
|
"learning_rate": 7.280303030303031e-05, |
|
"loss": 3.5932, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.8295454545454546, |
|
"grad_norm": 13.014269828796387, |
|
"learning_rate": 7.272727272727273e-05, |
|
"loss": 1.6747, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.8318181818181818, |
|
"grad_norm": 10.855698585510254, |
|
"learning_rate": 7.265151515151516e-05, |
|
"loss": 2.2644, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.8340909090909091, |
|
"grad_norm": 9.967236518859863, |
|
"learning_rate": 7.257575757575758e-05, |
|
"loss": 1.7373, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.8363636363636363, |
|
"grad_norm": 12.029590606689453, |
|
"learning_rate": 7.25e-05, |
|
"loss": 1.7012, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.8386363636363636, |
|
"grad_norm": 18.046247482299805, |
|
"learning_rate": 7.242424242424243e-05, |
|
"loss": 2.7507, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.8409090909090909, |
|
"grad_norm": 12.02083969116211, |
|
"learning_rate": 7.234848484848485e-05, |
|
"loss": 1.4928, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8431818181818181, |
|
"grad_norm": 14.034537315368652, |
|
"learning_rate": 7.227272727272729e-05, |
|
"loss": 1.5557, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.8454545454545455, |
|
"grad_norm": 11.5894775390625, |
|
"learning_rate": 7.219696969696969e-05, |
|
"loss": 2.0848, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.8477272727272728, |
|
"grad_norm": 10.489690780639648, |
|
"learning_rate": 7.212121212121213e-05, |
|
"loss": 2.1963, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 14.684807777404785, |
|
"learning_rate": 7.204545454545454e-05, |
|
"loss": 1.6653, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.8522727272727273, |
|
"grad_norm": 10.650580406188965, |
|
"learning_rate": 7.196969696969698e-05, |
|
"loss": 1.5813, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8545454545454545, |
|
"grad_norm": 14.406346321105957, |
|
"learning_rate": 7.18939393939394e-05, |
|
"loss": 1.6018, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.8568181818181818, |
|
"grad_norm": 10.684210777282715, |
|
"learning_rate": 7.181818181818182e-05, |
|
"loss": 1.16, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.8590909090909091, |
|
"grad_norm": 11.588654518127441, |
|
"learning_rate": 7.174242424242425e-05, |
|
"loss": 1.52, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.8613636363636363, |
|
"grad_norm": 13.342896461486816, |
|
"learning_rate": 7.166666666666667e-05, |
|
"loss": 1.3069, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.8636363636363636, |
|
"grad_norm": 10.33123779296875, |
|
"learning_rate": 7.15909090909091e-05, |
|
"loss": 2.097, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.865909090909091, |
|
"grad_norm": 13.286327362060547, |
|
"learning_rate": 7.151515151515152e-05, |
|
"loss": 1.6996, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.8681818181818182, |
|
"grad_norm": 12.737727165222168, |
|
"learning_rate": 7.143939393939394e-05, |
|
"loss": 1.8533, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.8704545454545455, |
|
"grad_norm": 10.602120399475098, |
|
"learning_rate": 7.136363636363636e-05, |
|
"loss": 0.9764, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.8727272727272727, |
|
"grad_norm": 13.362771034240723, |
|
"learning_rate": 7.12878787878788e-05, |
|
"loss": 2.6888, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 15.875019073486328, |
|
"learning_rate": 7.121212121212121e-05, |
|
"loss": 1.3865, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8772727272727273, |
|
"grad_norm": 11.602843284606934, |
|
"learning_rate": 7.113636363636363e-05, |
|
"loss": 1.489, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.8795454545454545, |
|
"grad_norm": 10.052959442138672, |
|
"learning_rate": 7.106060606060607e-05, |
|
"loss": 1.423, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.8818181818181818, |
|
"grad_norm": 15.898283004760742, |
|
"learning_rate": 7.098484848484849e-05, |
|
"loss": 2.0401, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.884090909090909, |
|
"grad_norm": 14.83981990814209, |
|
"learning_rate": 7.090909090909092e-05, |
|
"loss": 2.9656, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.8863636363636364, |
|
"grad_norm": 12.542622566223145, |
|
"learning_rate": 7.083333333333334e-05, |
|
"loss": 1.7818, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8886363636363637, |
|
"grad_norm": 10.65149974822998, |
|
"learning_rate": 7.075757575757576e-05, |
|
"loss": 1.4115, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.8909090909090909, |
|
"grad_norm": 14.208708763122559, |
|
"learning_rate": 7.068181818181818e-05, |
|
"loss": 2.5107, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.8931818181818182, |
|
"grad_norm": 13.435481071472168, |
|
"learning_rate": 7.060606060606061e-05, |
|
"loss": 2.0141, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.8954545454545455, |
|
"grad_norm": 14.987428665161133, |
|
"learning_rate": 7.053030303030303e-05, |
|
"loss": 1.6295, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.8977272727272727, |
|
"grad_norm": 15.590865135192871, |
|
"learning_rate": 7.045454545454546e-05, |
|
"loss": 2.5029, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.00338077545166, |
|
"learning_rate": 7.037878787878788e-05, |
|
"loss": 1.5399, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.9022727272727272, |
|
"grad_norm": 10.2390718460083, |
|
"learning_rate": 7.03030303030303e-05, |
|
"loss": 1.2943, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.9045454545454545, |
|
"grad_norm": 13.09786319732666, |
|
"learning_rate": 7.022727272727274e-05, |
|
"loss": 1.951, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.9068181818181819, |
|
"grad_norm": 14.016656875610352, |
|
"learning_rate": 7.015151515151515e-05, |
|
"loss": 2.4783, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 14.135820388793945, |
|
"learning_rate": 7.007575757575759e-05, |
|
"loss": 1.8109, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9113636363636364, |
|
"grad_norm": 15.545958518981934, |
|
"learning_rate": 7e-05, |
|
"loss": 2.2156, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.9136363636363637, |
|
"grad_norm": 15.512310028076172, |
|
"learning_rate": 6.992424242424243e-05, |
|
"loss": 1.8199, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.9159090909090909, |
|
"grad_norm": 12.54996109008789, |
|
"learning_rate": 6.984848484848485e-05, |
|
"loss": 2.0134, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.9181818181818182, |
|
"grad_norm": 10.554512023925781, |
|
"learning_rate": 6.977272727272728e-05, |
|
"loss": 1.5173, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.9204545454545454, |
|
"grad_norm": 13.31303882598877, |
|
"learning_rate": 6.96969696969697e-05, |
|
"loss": 1.7694, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.9227272727272727, |
|
"grad_norm": 18.840511322021484, |
|
"learning_rate": 6.962121212121212e-05, |
|
"loss": 3.0551, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.925, |
|
"grad_norm": 13.331717491149902, |
|
"learning_rate": 6.954545454545455e-05, |
|
"loss": 2.0296, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.9272727272727272, |
|
"grad_norm": 11.75788688659668, |
|
"learning_rate": 6.946969696969697e-05, |
|
"loss": 1.8544, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.9295454545454546, |
|
"grad_norm": 14.479559898376465, |
|
"learning_rate": 6.93939393939394e-05, |
|
"loss": 2.4435, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.9318181818181818, |
|
"grad_norm": 14.522322654724121, |
|
"learning_rate": 6.931818181818182e-05, |
|
"loss": 2.3013, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.9340909090909091, |
|
"grad_norm": 12.853972434997559, |
|
"learning_rate": 6.924242424242424e-05, |
|
"loss": 2.4637, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.9363636363636364, |
|
"grad_norm": 10.978107452392578, |
|
"learning_rate": 6.916666666666666e-05, |
|
"loss": 1.5277, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.9386363636363636, |
|
"grad_norm": 14.109042167663574, |
|
"learning_rate": 6.90909090909091e-05, |
|
"loss": 1.9601, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.9409090909090909, |
|
"grad_norm": 10.699783325195312, |
|
"learning_rate": 6.901515151515152e-05, |
|
"loss": 2.2143, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.9431818181818182, |
|
"grad_norm": 10.57825756072998, |
|
"learning_rate": 6.893939393939395e-05, |
|
"loss": 2.0557, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.9454545454545454, |
|
"grad_norm": 12.432737350463867, |
|
"learning_rate": 6.886363636363637e-05, |
|
"loss": 1.7554, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.9477272727272728, |
|
"grad_norm": 12.157960891723633, |
|
"learning_rate": 6.878787878787879e-05, |
|
"loss": 2.1302, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 15.89067554473877, |
|
"learning_rate": 6.871212121212122e-05, |
|
"loss": 2.1424, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.9522727272727273, |
|
"grad_norm": 10.453248977661133, |
|
"learning_rate": 6.863636363636364e-05, |
|
"loss": 1.8215, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.9545454545454546, |
|
"grad_norm": 8.481575012207031, |
|
"learning_rate": 6.856060606060606e-05, |
|
"loss": 1.5999, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9568181818181818, |
|
"grad_norm": 10.795332908630371, |
|
"learning_rate": 6.848484848484848e-05, |
|
"loss": 1.4623, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.9590909090909091, |
|
"grad_norm": 18.586315155029297, |
|
"learning_rate": 6.840909090909091e-05, |
|
"loss": 2.1875, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.9613636363636363, |
|
"grad_norm": 15.387242317199707, |
|
"learning_rate": 6.833333333333333e-05, |
|
"loss": 2.1544, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.9636363636363636, |
|
"grad_norm": 11.277326583862305, |
|
"learning_rate": 6.825757575757576e-05, |
|
"loss": 1.8575, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.9659090909090909, |
|
"grad_norm": 9.451603889465332, |
|
"learning_rate": 6.818181818181818e-05, |
|
"loss": 1.6149, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9681818181818181, |
|
"grad_norm": 14.108964920043945, |
|
"learning_rate": 6.81060606060606e-05, |
|
"loss": 2.0166, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.9704545454545455, |
|
"grad_norm": 8.922270774841309, |
|
"learning_rate": 6.803030303030304e-05, |
|
"loss": 1.3486, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.9727272727272728, |
|
"grad_norm": 9.383979797363281, |
|
"learning_rate": 6.795454545454546e-05, |
|
"loss": 1.0425, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.975, |
|
"grad_norm": 13.076512336730957, |
|
"learning_rate": 6.787878787878789e-05, |
|
"loss": 1.7828, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.9772727272727273, |
|
"grad_norm": 14.815391540527344, |
|
"learning_rate": 6.78030303030303e-05, |
|
"loss": 1.893, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9795454545454545, |
|
"grad_norm": 10.523706436157227, |
|
"learning_rate": 6.772727272727273e-05, |
|
"loss": 1.5307, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.9818181818181818, |
|
"grad_norm": 16.938919067382812, |
|
"learning_rate": 6.765151515151515e-05, |
|
"loss": 1.9001, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.9840909090909091, |
|
"grad_norm": 11.781875610351562, |
|
"learning_rate": 6.757575757575758e-05, |
|
"loss": 2.183, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.9863636363636363, |
|
"grad_norm": 14.539305686950684, |
|
"learning_rate": 6.750000000000001e-05, |
|
"loss": 2.2021, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.9886363636363636, |
|
"grad_norm": 15.532546997070312, |
|
"learning_rate": 6.742424242424242e-05, |
|
"loss": 2.1856, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.990909090909091, |
|
"grad_norm": 12.917964935302734, |
|
"learning_rate": 6.734848484848485e-05, |
|
"loss": 2.8732, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.9931818181818182, |
|
"grad_norm": 12.498353958129883, |
|
"learning_rate": 6.727272727272727e-05, |
|
"loss": 1.9246, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.9954545454545455, |
|
"grad_norm": 14.181402206420898, |
|
"learning_rate": 6.71969696969697e-05, |
|
"loss": 2.3863, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.9977272727272727, |
|
"grad_norm": 12.139135360717773, |
|
"learning_rate": 6.712121212121213e-05, |
|
"loss": 2.5505, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 18.971040725708008, |
|
"learning_rate": 6.704545454545455e-05, |
|
"loss": 2.3566, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_f1": 0.8942, |
|
"eval_gen_len": 41.6727, |
|
"eval_loss": 1.852333426475525, |
|
"eval_precision": 0.8938, |
|
"eval_recall": 0.8947, |
|
"eval_rouge1": 0.4801, |
|
"eval_rouge2": 0.2302, |
|
"eval_rougeL": 0.4078, |
|
"eval_rougeLsum": 0.4472, |
|
"eval_runtime": 28.5976, |
|
"eval_samples_per_second": 3.846, |
|
"eval_steps_per_second": 0.49, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.0022727272727272, |
|
"grad_norm": 9.610616683959961, |
|
"learning_rate": 6.696969696969696e-05, |
|
"loss": 1.3656, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.0045454545454546, |
|
"grad_norm": 13.653773307800293, |
|
"learning_rate": 6.68939393939394e-05, |
|
"loss": 3.0115, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.0068181818181818, |
|
"grad_norm": 10.243281364440918, |
|
"learning_rate": 6.681818181818183e-05, |
|
"loss": 1.7598, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.009090909090909, |
|
"grad_norm": 12.79389762878418, |
|
"learning_rate": 6.674242424242425e-05, |
|
"loss": 1.7768, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.0113636363636365, |
|
"grad_norm": 8.748100280761719, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 1.4368, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.0136363636363637, |
|
"grad_norm": 9.42500114440918, |
|
"learning_rate": 6.659090909090909e-05, |
|
"loss": 1.0754, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.0159090909090909, |
|
"grad_norm": 11.976570129394531, |
|
"learning_rate": 6.651515151515152e-05, |
|
"loss": 2.07, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.018181818181818, |
|
"grad_norm": 9.448553085327148, |
|
"learning_rate": 6.643939393939394e-05, |
|
"loss": 1.5004, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.0204545454545455, |
|
"grad_norm": 10.295342445373535, |
|
"learning_rate": 6.636363636363638e-05, |
|
"loss": 1.6393, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.0227272727272727, |
|
"grad_norm": 9.445040702819824, |
|
"learning_rate": 6.628787878787878e-05, |
|
"loss": 1.7432, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.025, |
|
"grad_norm": 16.851524353027344, |
|
"learning_rate": 6.621212121212121e-05, |
|
"loss": 2.2318, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.0272727272727273, |
|
"grad_norm": 10.721171379089355, |
|
"learning_rate": 6.613636363636365e-05, |
|
"loss": 1.7857, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.0295454545454545, |
|
"grad_norm": 10.074830055236816, |
|
"learning_rate": 6.606060606060607e-05, |
|
"loss": 1.5901, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.0318181818181817, |
|
"grad_norm": 20.14990234375, |
|
"learning_rate": 6.598484848484849e-05, |
|
"loss": 2.6518, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.0340909090909092, |
|
"grad_norm": 10.911235809326172, |
|
"learning_rate": 6.59090909090909e-05, |
|
"loss": 2.1865, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.0363636363636364, |
|
"grad_norm": 18.03226089477539, |
|
"learning_rate": 6.583333333333334e-05, |
|
"loss": 2.4383, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.0386363636363636, |
|
"grad_norm": 9.279253959655762, |
|
"learning_rate": 6.575757575757576e-05, |
|
"loss": 0.9629, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.040909090909091, |
|
"grad_norm": 11.864253997802734, |
|
"learning_rate": 6.568181818181819e-05, |
|
"loss": 2.1734, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.0431818181818182, |
|
"grad_norm": 13.346138954162598, |
|
"learning_rate": 6.560606060606061e-05, |
|
"loss": 1.4337, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.0454545454545454, |
|
"grad_norm": 8.396434783935547, |
|
"learning_rate": 6.553030303030303e-05, |
|
"loss": 1.54, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.0477272727272728, |
|
"grad_norm": 9.705253601074219, |
|
"learning_rate": 6.545454545454546e-05, |
|
"loss": 1.9016, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"grad_norm": 9.6156005859375, |
|
"learning_rate": 6.537878787878788e-05, |
|
"loss": 1.3029, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.0522727272727272, |
|
"grad_norm": 16.548994064331055, |
|
"learning_rate": 6.530303030303032e-05, |
|
"loss": 3.5641, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.0545454545454545, |
|
"grad_norm": 11.045211791992188, |
|
"learning_rate": 6.522727272727272e-05, |
|
"loss": 1.3876, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.0568181818181819, |
|
"grad_norm": 10.465343475341797, |
|
"learning_rate": 6.515151515151516e-05, |
|
"loss": 1.5871, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.059090909090909, |
|
"grad_norm": 10.053452491760254, |
|
"learning_rate": 6.507575757575757e-05, |
|
"loss": 1.4177, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.0613636363636363, |
|
"grad_norm": 12.043208122253418, |
|
"learning_rate": 6.500000000000001e-05, |
|
"loss": 1.5364, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.0636363636363637, |
|
"grad_norm": 11.853958129882812, |
|
"learning_rate": 6.492424242424243e-05, |
|
"loss": 1.3952, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.065909090909091, |
|
"grad_norm": 8.25589656829834, |
|
"learning_rate": 6.484848484848485e-05, |
|
"loss": 1.5497, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.0681818181818181, |
|
"grad_norm": 13.430974960327148, |
|
"learning_rate": 6.477272727272728e-05, |
|
"loss": 2.4184, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0704545454545455, |
|
"grad_norm": 10.576482772827148, |
|
"learning_rate": 6.46969696969697e-05, |
|
"loss": 1.4223, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.0727272727272728, |
|
"grad_norm": 11.786113739013672, |
|
"learning_rate": 6.462121212121213e-05, |
|
"loss": 2.0499, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.075, |
|
"grad_norm": 12.00688362121582, |
|
"learning_rate": 6.454545454545455e-05, |
|
"loss": 2.9764, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.0772727272727272, |
|
"grad_norm": 10.834086418151855, |
|
"learning_rate": 6.446969696969697e-05, |
|
"loss": 2.0765, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.0795454545454546, |
|
"grad_norm": 10.710877418518066, |
|
"learning_rate": 6.439393939393939e-05, |
|
"loss": 1.4314, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.0818181818181818, |
|
"grad_norm": 12.800888061523438, |
|
"learning_rate": 6.431818181818182e-05, |
|
"loss": 1.4847, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.084090909090909, |
|
"grad_norm": 10.365299224853516, |
|
"learning_rate": 6.424242424242424e-05, |
|
"loss": 1.6775, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.0863636363636364, |
|
"grad_norm": 10.344579696655273, |
|
"learning_rate": 6.416666666666668e-05, |
|
"loss": 2.3473, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.0886363636363636, |
|
"grad_norm": 13.791784286499023, |
|
"learning_rate": 6.40909090909091e-05, |
|
"loss": 2.5763, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.0909090909090908, |
|
"grad_norm": 13.133481979370117, |
|
"learning_rate": 6.401515151515152e-05, |
|
"loss": 1.7025, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.0931818181818183, |
|
"grad_norm": 13.444737434387207, |
|
"learning_rate": 6.393939393939395e-05, |
|
"loss": 2.341, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.0954545454545455, |
|
"grad_norm": 15.245584487915039, |
|
"learning_rate": 6.386363636363637e-05, |
|
"loss": 1.929, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.0977272727272727, |
|
"grad_norm": 10.724458694458008, |
|
"learning_rate": 6.37878787878788e-05, |
|
"loss": 1.4099, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 11.243814468383789, |
|
"learning_rate": 6.371212121212121e-05, |
|
"loss": 1.5886, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.1022727272727273, |
|
"grad_norm": 11.731426239013672, |
|
"learning_rate": 6.363636363636364e-05, |
|
"loss": 1.9571, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.1045454545454545, |
|
"grad_norm": 10.820639610290527, |
|
"learning_rate": 6.356060606060606e-05, |
|
"loss": 1.113, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.106818181818182, |
|
"grad_norm": 14.63482666015625, |
|
"learning_rate": 6.34848484848485e-05, |
|
"loss": 1.9765, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.1090909090909091, |
|
"grad_norm": 12.746257781982422, |
|
"learning_rate": 6.340909090909091e-05, |
|
"loss": 1.5906, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.1113636363636363, |
|
"grad_norm": 14.916450500488281, |
|
"learning_rate": 6.333333333333333e-05, |
|
"loss": 1.6616, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.1136363636363635, |
|
"grad_norm": 11.509872436523438, |
|
"learning_rate": 6.325757575757577e-05, |
|
"loss": 2.5105, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.115909090909091, |
|
"grad_norm": 11.517654418945312, |
|
"learning_rate": 6.318181818181818e-05, |
|
"loss": 1.3542, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.1181818181818182, |
|
"grad_norm": 13.984039306640625, |
|
"learning_rate": 6.310606060606062e-05, |
|
"loss": 2.1356, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.1204545454545454, |
|
"grad_norm": 13.018148422241211, |
|
"learning_rate": 6.303030303030302e-05, |
|
"loss": 1.5024, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.1227272727272728, |
|
"grad_norm": 13.609540939331055, |
|
"learning_rate": 6.295454545454546e-05, |
|
"loss": 2.1359, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 13.505942344665527, |
|
"learning_rate": 6.287878787878788e-05, |
|
"loss": 2.8486, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.1272727272727272, |
|
"grad_norm": 11.420187950134277, |
|
"learning_rate": 6.280303030303031e-05, |
|
"loss": 1.5044, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.1295454545454546, |
|
"grad_norm": 14.127695083618164, |
|
"learning_rate": 6.272727272727273e-05, |
|
"loss": 2.6676, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.1318181818181818, |
|
"grad_norm": 9.813878059387207, |
|
"learning_rate": 6.265151515151515e-05, |
|
"loss": 1.4169, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.134090909090909, |
|
"grad_norm": 9.80479621887207, |
|
"learning_rate": 6.257575757575758e-05, |
|
"loss": 1.5349, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 10.739019393920898, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.5255, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.1386363636363637, |
|
"grad_norm": 11.327676773071289, |
|
"learning_rate": 6.242424242424243e-05, |
|
"loss": 1.3854, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.1409090909090909, |
|
"grad_norm": 9.645312309265137, |
|
"learning_rate": 6.234848484848485e-05, |
|
"loss": 1.6148, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.143181818181818, |
|
"grad_norm": 12.285623550415039, |
|
"learning_rate": 6.227272727272727e-05, |
|
"loss": 1.9336, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.1454545454545455, |
|
"grad_norm": 15.579854011535645, |
|
"learning_rate": 6.219696969696969e-05, |
|
"loss": 2.1064, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.1477272727272727, |
|
"grad_norm": 17.76817512512207, |
|
"learning_rate": 6.212121212121213e-05, |
|
"loss": 1.4266, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 10.037004470825195, |
|
"learning_rate": 6.204545454545455e-05, |
|
"loss": 1.5432, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.1522727272727273, |
|
"grad_norm": 10.46380615234375, |
|
"learning_rate": 6.196969696969698e-05, |
|
"loss": 2.1057, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.1545454545454545, |
|
"grad_norm": 12.883086204528809, |
|
"learning_rate": 6.18939393939394e-05, |
|
"loss": 2.1955, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.1568181818181817, |
|
"grad_norm": 10.667054176330566, |
|
"learning_rate": 6.181818181818182e-05, |
|
"loss": 1.8041, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.1590909090909092, |
|
"grad_norm": 13.076772689819336, |
|
"learning_rate": 6.174242424242425e-05, |
|
"loss": 1.9923, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.1613636363636364, |
|
"grad_norm": 13.195068359375, |
|
"learning_rate": 6.166666666666667e-05, |
|
"loss": 2.2575, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.1636363636363636, |
|
"grad_norm": 25.86856460571289, |
|
"learning_rate": 6.15909090909091e-05, |
|
"loss": 0.9713, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.165909090909091, |
|
"grad_norm": 13.29697322845459, |
|
"learning_rate": 6.151515151515151e-05, |
|
"loss": 1.9724, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.1681818181818182, |
|
"grad_norm": 11.164151191711426, |
|
"learning_rate": 6.143939393939394e-05, |
|
"loss": 1.7574, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.1704545454545454, |
|
"grad_norm": 11.621664047241211, |
|
"learning_rate": 6.136363636363636e-05, |
|
"loss": 2.0349, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.1727272727272728, |
|
"grad_norm": 13.135611534118652, |
|
"learning_rate": 6.12878787878788e-05, |
|
"loss": 2.1065, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.175, |
|
"grad_norm": 13.730208396911621, |
|
"learning_rate": 6.121212121212121e-05, |
|
"loss": 2.2205, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.1772727272727272, |
|
"grad_norm": 11.453598022460938, |
|
"learning_rate": 6.113636363636363e-05, |
|
"loss": 2.2924, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.1795454545454545, |
|
"grad_norm": 10.924808502197266, |
|
"learning_rate": 6.106060606060607e-05, |
|
"loss": 1.2283, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.1818181818181819, |
|
"grad_norm": 16.08315658569336, |
|
"learning_rate": 6.098484848484849e-05, |
|
"loss": 2.5927, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.184090909090909, |
|
"grad_norm": 8.260347366333008, |
|
"learning_rate": 6.090909090909091e-05, |
|
"loss": 1.3534, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.1863636363636363, |
|
"grad_norm": 12.075833320617676, |
|
"learning_rate": 6.083333333333333e-05, |
|
"loss": 2.0813, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.1886363636363637, |
|
"grad_norm": 10.575677871704102, |
|
"learning_rate": 6.075757575757576e-05, |
|
"loss": 1.4781, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.190909090909091, |
|
"grad_norm": 12.236503601074219, |
|
"learning_rate": 6.0681818181818185e-05, |
|
"loss": 2.003, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.1931818181818181, |
|
"grad_norm": 12.172025680541992, |
|
"learning_rate": 6.060606060606061e-05, |
|
"loss": 1.4951, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.1954545454545455, |
|
"grad_norm": 12.456896781921387, |
|
"learning_rate": 6.053030303030304e-05, |
|
"loss": 1.8737, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.1977272727272728, |
|
"grad_norm": 13.824838638305664, |
|
"learning_rate": 6.045454545454545e-05, |
|
"loss": 1.7923, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 10.863786697387695, |
|
"learning_rate": 6.037878787878788e-05, |
|
"loss": 2.144, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.2022727272727272, |
|
"grad_norm": 17.319700241088867, |
|
"learning_rate": 6.03030303030303e-05, |
|
"loss": 1.9297, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.2045454545454546, |
|
"grad_norm": 8.89411449432373, |
|
"learning_rate": 6.022727272727273e-05, |
|
"loss": 1.3583, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.2068181818181818, |
|
"grad_norm": 16.971437454223633, |
|
"learning_rate": 6.0151515151515156e-05, |
|
"loss": 2.5184, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.209090909090909, |
|
"grad_norm": 11.486995697021484, |
|
"learning_rate": 6.0075757575757575e-05, |
|
"loss": 1.5296, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.2113636363636364, |
|
"grad_norm": 17.541278839111328, |
|
"learning_rate": 6e-05, |
|
"loss": 2.108, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.2136363636363636, |
|
"grad_norm": 13.599751472473145, |
|
"learning_rate": 5.992424242424243e-05, |
|
"loss": 2.0622, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.2159090909090908, |
|
"grad_norm": 10.884852409362793, |
|
"learning_rate": 5.9848484848484854e-05, |
|
"loss": 1.5018, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.2181818181818183, |
|
"grad_norm": 10.407668113708496, |
|
"learning_rate": 5.977272727272728e-05, |
|
"loss": 1.5013, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.2204545454545455, |
|
"grad_norm": 9.911277770996094, |
|
"learning_rate": 5.969696969696969e-05, |
|
"loss": 1.9855, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.2227272727272727, |
|
"grad_norm": 11.939435958862305, |
|
"learning_rate": 5.962121212121212e-05, |
|
"loss": 2.359, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.225, |
|
"grad_norm": 11.17503547668457, |
|
"learning_rate": 5.9545454545454546e-05, |
|
"loss": 1.4952, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.2272727272727273, |
|
"grad_norm": 15.073485374450684, |
|
"learning_rate": 5.946969696969697e-05, |
|
"loss": 2.1802, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.2295454545454545, |
|
"grad_norm": 12.413151741027832, |
|
"learning_rate": 5.93939393939394e-05, |
|
"loss": 1.9444, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.231818181818182, |
|
"grad_norm": 12.741022109985352, |
|
"learning_rate": 5.931818181818182e-05, |
|
"loss": 1.894, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.2340909090909091, |
|
"grad_norm": 11.041027069091797, |
|
"learning_rate": 5.9242424242424244e-05, |
|
"loss": 1.748, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.2363636363636363, |
|
"grad_norm": 10.045198440551758, |
|
"learning_rate": 5.916666666666667e-05, |
|
"loss": 1.7848, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.2386363636363638, |
|
"grad_norm": 10.759014129638672, |
|
"learning_rate": 5.90909090909091e-05, |
|
"loss": 1.7836, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.240909090909091, |
|
"grad_norm": 10.296431541442871, |
|
"learning_rate": 5.901515151515152e-05, |
|
"loss": 1.088, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.2431818181818182, |
|
"grad_norm": 11.159008026123047, |
|
"learning_rate": 5.8939393939393936e-05, |
|
"loss": 1.3126, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.2454545454545454, |
|
"grad_norm": 7.6021270751953125, |
|
"learning_rate": 5.886363636363636e-05, |
|
"loss": 1.137, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.2477272727272728, |
|
"grad_norm": 11.449591636657715, |
|
"learning_rate": 5.878787878787879e-05, |
|
"loss": 1.7471, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 14.451662063598633, |
|
"learning_rate": 5.871212121212122e-05, |
|
"loss": 2.014, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.2522727272727272, |
|
"grad_norm": 11.24593448638916, |
|
"learning_rate": 5.8636363636363634e-05, |
|
"loss": 1.5885, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.2545454545454544, |
|
"grad_norm": 10.326696395874023, |
|
"learning_rate": 5.856060606060606e-05, |
|
"loss": 1.5146, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.2568181818181818, |
|
"grad_norm": 11.736088752746582, |
|
"learning_rate": 5.848484848484849e-05, |
|
"loss": 2.1627, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.259090909090909, |
|
"grad_norm": 14.25733757019043, |
|
"learning_rate": 5.840909090909091e-05, |
|
"loss": 1.8419, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.2613636363636362, |
|
"grad_norm": 10.154618263244629, |
|
"learning_rate": 5.833333333333334e-05, |
|
"loss": 1.8319, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.2636363636363637, |
|
"grad_norm": 14.464015007019043, |
|
"learning_rate": 5.825757575757575e-05, |
|
"loss": 1.7117, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.2659090909090909, |
|
"grad_norm": 9.713830947875977, |
|
"learning_rate": 5.818181818181818e-05, |
|
"loss": 1.4495, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.268181818181818, |
|
"grad_norm": 21.958648681640625, |
|
"learning_rate": 5.810606060606061e-05, |
|
"loss": 3.0762, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.2704545454545455, |
|
"grad_norm": 11.349808692932129, |
|
"learning_rate": 5.803030303030304e-05, |
|
"loss": 1.9419, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.2727272727272727, |
|
"grad_norm": 12.586771965026855, |
|
"learning_rate": 5.7954545454545464e-05, |
|
"loss": 2.1826, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.275, |
|
"grad_norm": 10.261626243591309, |
|
"learning_rate": 5.787878787878788e-05, |
|
"loss": 2.0422, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.2772727272727273, |
|
"grad_norm": 11.65180492401123, |
|
"learning_rate": 5.78030303030303e-05, |
|
"loss": 1.5295, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.2795454545454545, |
|
"grad_norm": 12.369877815246582, |
|
"learning_rate": 5.772727272727273e-05, |
|
"loss": 1.8935, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.2818181818181817, |
|
"grad_norm": 10.670714378356934, |
|
"learning_rate": 5.7651515151515156e-05, |
|
"loss": 2.0215, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.2840909090909092, |
|
"grad_norm": 13.76659870147705, |
|
"learning_rate": 5.757575757575758e-05, |
|
"loss": 2.2982, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.2863636363636364, |
|
"grad_norm": 9.004195213317871, |
|
"learning_rate": 5.7499999999999995e-05, |
|
"loss": 1.6066, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.2886363636363636, |
|
"grad_norm": 10.873322486877441, |
|
"learning_rate": 5.742424242424243e-05, |
|
"loss": 1.5999, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.290909090909091, |
|
"grad_norm": 11.641073226928711, |
|
"learning_rate": 5.7348484848484854e-05, |
|
"loss": 1.3272, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.2931818181818182, |
|
"grad_norm": 9.68420124053955, |
|
"learning_rate": 5.727272727272728e-05, |
|
"loss": 1.413, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.2954545454545454, |
|
"grad_norm": 13.477838516235352, |
|
"learning_rate": 5.719696969696971e-05, |
|
"loss": 2.5129, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.2977272727272728, |
|
"grad_norm": 11.720010757446289, |
|
"learning_rate": 5.712121212121212e-05, |
|
"loss": 2.1576, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 13.136527061462402, |
|
"learning_rate": 5.7045454545454546e-05, |
|
"loss": 1.9311, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.3022727272727272, |
|
"grad_norm": 8.095415115356445, |
|
"learning_rate": 5.696969696969697e-05, |
|
"loss": 0.8927, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.3045454545454547, |
|
"grad_norm": 11.233893394470215, |
|
"learning_rate": 5.68939393939394e-05, |
|
"loss": 2.0108, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.3068181818181819, |
|
"grad_norm": 11.203099250793457, |
|
"learning_rate": 5.6818181818181825e-05, |
|
"loss": 1.9241, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.309090909090909, |
|
"grad_norm": 9.640209197998047, |
|
"learning_rate": 5.6742424242424244e-05, |
|
"loss": 1.3841, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.3113636363636363, |
|
"grad_norm": 10.882938385009766, |
|
"learning_rate": 5.666666666666667e-05, |
|
"loss": 1.4184, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.3136363636363637, |
|
"grad_norm": 10.470818519592285, |
|
"learning_rate": 5.65909090909091e-05, |
|
"loss": 2.0096, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.315909090909091, |
|
"grad_norm": 12.759695053100586, |
|
"learning_rate": 5.651515151515152e-05, |
|
"loss": 2.138, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.3181818181818181, |
|
"grad_norm": 26.707128524780273, |
|
"learning_rate": 5.643939393939395e-05, |
|
"loss": 2.8215, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.3204545454545453, |
|
"grad_norm": 11.116402626037598, |
|
"learning_rate": 5.636363636363636e-05, |
|
"loss": 2.4158, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.3227272727272728, |
|
"grad_norm": 14.136595726013184, |
|
"learning_rate": 5.628787878787879e-05, |
|
"loss": 1.6545, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.325, |
|
"grad_norm": 11.88375473022461, |
|
"learning_rate": 5.6212121212121215e-05, |
|
"loss": 2.1069, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.3272727272727272, |
|
"grad_norm": 11.863356590270996, |
|
"learning_rate": 5.613636363636364e-05, |
|
"loss": 1.535, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.3295454545454546, |
|
"grad_norm": 11.284381866455078, |
|
"learning_rate": 5.606060606060606e-05, |
|
"loss": 2.3407, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.3318181818181818, |
|
"grad_norm": 11.79831600189209, |
|
"learning_rate": 5.598484848484849e-05, |
|
"loss": 1.6409, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.334090909090909, |
|
"grad_norm": 11.130000114440918, |
|
"learning_rate": 5.5909090909090913e-05, |
|
"loss": 1.6426, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.3363636363636364, |
|
"grad_norm": 9.1551513671875, |
|
"learning_rate": 5.583333333333334e-05, |
|
"loss": 1.8466, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.3386363636363636, |
|
"grad_norm": 14.405865669250488, |
|
"learning_rate": 5.5757575757575766e-05, |
|
"loss": 2.066, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.3409090909090908, |
|
"grad_norm": 53.46037673950195, |
|
"learning_rate": 5.568181818181818e-05, |
|
"loss": 2.4224, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.3431818181818183, |
|
"grad_norm": 11.6724271774292, |
|
"learning_rate": 5.5606060606060605e-05, |
|
"loss": 1.7148, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.3454545454545455, |
|
"grad_norm": 15.849516868591309, |
|
"learning_rate": 5.553030303030303e-05, |
|
"loss": 2.0981, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.3477272727272727, |
|
"grad_norm": 13.421188354492188, |
|
"learning_rate": 5.545454545454546e-05, |
|
"loss": 1.722, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 14.319283485412598, |
|
"learning_rate": 5.5378787878787884e-05, |
|
"loss": 1.7284, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.3522727272727273, |
|
"grad_norm": 12.210022926330566, |
|
"learning_rate": 5.5303030303030304e-05, |
|
"loss": 1.4507, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.3545454545454545, |
|
"grad_norm": 11.60317325592041, |
|
"learning_rate": 5.522727272727273e-05, |
|
"loss": 1.7749, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.356818181818182, |
|
"grad_norm": 12.895737648010254, |
|
"learning_rate": 5.5151515151515156e-05, |
|
"loss": 1.5555, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.3590909090909091, |
|
"grad_norm": 11.198805809020996, |
|
"learning_rate": 5.507575757575758e-05, |
|
"loss": 1.7624, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.3613636363636363, |
|
"grad_norm": 13.309189796447754, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 1.8765, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 10.177202224731445, |
|
"learning_rate": 5.492424242424242e-05, |
|
"loss": 1.1895, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.365909090909091, |
|
"grad_norm": 11.205484390258789, |
|
"learning_rate": 5.484848484848485e-05, |
|
"loss": 1.1661, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.3681818181818182, |
|
"grad_norm": 12.091497421264648, |
|
"learning_rate": 5.4772727272727274e-05, |
|
"loss": 1.9972, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.3704545454545456, |
|
"grad_norm": 11.2894926071167, |
|
"learning_rate": 5.46969696969697e-05, |
|
"loss": 1.7121, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.3727272727272728, |
|
"grad_norm": 15.034446716308594, |
|
"learning_rate": 5.462121212121213e-05, |
|
"loss": 2.8078, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 8.075346946716309, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 1.0453, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.3772727272727272, |
|
"grad_norm": 10.377656936645508, |
|
"learning_rate": 5.446969696969697e-05, |
|
"loss": 1.7973, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.3795454545454544, |
|
"grad_norm": 10.147284507751465, |
|
"learning_rate": 5.43939393939394e-05, |
|
"loss": 2.1848, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.3818181818181818, |
|
"grad_norm": 11.856623649597168, |
|
"learning_rate": 5.4318181818181825e-05, |
|
"loss": 1.9857, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.384090909090909, |
|
"grad_norm": 10.355262756347656, |
|
"learning_rate": 5.424242424242425e-05, |
|
"loss": 1.4383, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.3863636363636362, |
|
"grad_norm": 9.085455894470215, |
|
"learning_rate": 5.4166666666666664e-05, |
|
"loss": 1.382, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.3886363636363637, |
|
"grad_norm": 13.221922874450684, |
|
"learning_rate": 5.409090909090909e-05, |
|
"loss": 2.3278, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.3909090909090909, |
|
"grad_norm": 14.725556373596191, |
|
"learning_rate": 5.401515151515152e-05, |
|
"loss": 2.0181, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.393181818181818, |
|
"grad_norm": 11.90503978729248, |
|
"learning_rate": 5.393939393939394e-05, |
|
"loss": 2.5601, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.3954545454545455, |
|
"grad_norm": 10.583837509155273, |
|
"learning_rate": 5.386363636363637e-05, |
|
"loss": 1.4886, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.3977272727272727, |
|
"grad_norm": 12.369796752929688, |
|
"learning_rate": 5.378787878787879e-05, |
|
"loss": 1.2716, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 12.412566184997559, |
|
"learning_rate": 5.3712121212121215e-05, |
|
"loss": 2.0391, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.4022727272727273, |
|
"grad_norm": 12.033483505249023, |
|
"learning_rate": 5.363636363636364e-05, |
|
"loss": 1.2044, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.4045454545454545, |
|
"grad_norm": 11.291866302490234, |
|
"learning_rate": 5.356060606060607e-05, |
|
"loss": 2.3266, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.4068181818181817, |
|
"grad_norm": 17.745227813720703, |
|
"learning_rate": 5.348484848484848e-05, |
|
"loss": 1.7097, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.4090909090909092, |
|
"grad_norm": 11.858403205871582, |
|
"learning_rate": 5.340909090909091e-05, |
|
"loss": 1.9088, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.4113636363636364, |
|
"grad_norm": 14.968146324157715, |
|
"learning_rate": 5.333333333333333e-05, |
|
"loss": 2.009, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.4136363636363636, |
|
"grad_norm": 13.16178035736084, |
|
"learning_rate": 5.325757575757576e-05, |
|
"loss": 1.6262, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.415909090909091, |
|
"grad_norm": 11.63772201538086, |
|
"learning_rate": 5.3181818181818186e-05, |
|
"loss": 1.481, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.4181818181818182, |
|
"grad_norm": 13.266715049743652, |
|
"learning_rate": 5.3106060606060605e-05, |
|
"loss": 2.3015, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.4204545454545454, |
|
"grad_norm": 11.690614700317383, |
|
"learning_rate": 5.303030303030303e-05, |
|
"loss": 1.7226, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.4227272727272728, |
|
"grad_norm": 10.599973678588867, |
|
"learning_rate": 5.295454545454546e-05, |
|
"loss": 1.0261, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.425, |
|
"grad_norm": 17.117259979248047, |
|
"learning_rate": 5.2878787878787884e-05, |
|
"loss": 1.7164, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.4272727272727272, |
|
"grad_norm": 11.62483024597168, |
|
"learning_rate": 5.280303030303031e-05, |
|
"loss": 1.3686, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.4295454545454547, |
|
"grad_norm": 10.503996849060059, |
|
"learning_rate": 5.272727272727272e-05, |
|
"loss": 1.6085, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.4318181818181819, |
|
"grad_norm": 14.493663787841797, |
|
"learning_rate": 5.265151515151515e-05, |
|
"loss": 2.0943, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.434090909090909, |
|
"grad_norm": 11.125360488891602, |
|
"learning_rate": 5.2575757575757576e-05, |
|
"loss": 1.8284, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.4363636363636363, |
|
"grad_norm": 10.438358306884766, |
|
"learning_rate": 5.25e-05, |
|
"loss": 2.1436, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.4386363636363637, |
|
"grad_norm": 13.013614654541016, |
|
"learning_rate": 5.242424242424243e-05, |
|
"loss": 1.6999, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.440909090909091, |
|
"grad_norm": 14.21478271484375, |
|
"learning_rate": 5.234848484848485e-05, |
|
"loss": 3.268, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.4431818181818181, |
|
"grad_norm": 10.756131172180176, |
|
"learning_rate": 5.2272727272727274e-05, |
|
"loss": 1.1294, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.4454545454545453, |
|
"grad_norm": 14.409692764282227, |
|
"learning_rate": 5.21969696969697e-05, |
|
"loss": 1.391, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.4477272727272728, |
|
"grad_norm": 9.839500427246094, |
|
"learning_rate": 5.212121212121213e-05, |
|
"loss": 1.4028, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 13.601579666137695, |
|
"learning_rate": 5.204545454545455e-05, |
|
"loss": 1.6384, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.4522727272727272, |
|
"grad_norm": 12.721500396728516, |
|
"learning_rate": 5.1969696969696966e-05, |
|
"loss": 1.9382, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.4545454545454546, |
|
"grad_norm": 11.373588562011719, |
|
"learning_rate": 5.189393939393939e-05, |
|
"loss": 2.7324, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.4568181818181818, |
|
"grad_norm": 11.873559951782227, |
|
"learning_rate": 5.181818181818182e-05, |
|
"loss": 1.6583, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.459090909090909, |
|
"grad_norm": 10.649148941040039, |
|
"learning_rate": 5.1742424242424245e-05, |
|
"loss": 1.7733, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.4613636363636364, |
|
"grad_norm": 12.14698314666748, |
|
"learning_rate": 5.166666666666667e-05, |
|
"loss": 1.6434, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.4636363636363636, |
|
"grad_norm": 9.80806827545166, |
|
"learning_rate": 5.159090909090909e-05, |
|
"loss": 1.9463, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.4659090909090908, |
|
"grad_norm": 7.273732662200928, |
|
"learning_rate": 5.151515151515152e-05, |
|
"loss": 0.8156, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.4681818181818183, |
|
"grad_norm": 12.560272216796875, |
|
"learning_rate": 5.143939393939394e-05, |
|
"loss": 2.2347, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.4704545454545455, |
|
"grad_norm": 10.116893768310547, |
|
"learning_rate": 5.136363636363637e-05, |
|
"loss": 1.2157, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.4727272727272727, |
|
"grad_norm": 11.09861946105957, |
|
"learning_rate": 5.1287878787878796e-05, |
|
"loss": 1.2521, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.475, |
|
"grad_norm": 11.454336166381836, |
|
"learning_rate": 5.121212121212121e-05, |
|
"loss": 1.6148, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.4772727272727273, |
|
"grad_norm": 11.669930458068848, |
|
"learning_rate": 5.1136363636363635e-05, |
|
"loss": 2.4559, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.4795454545454545, |
|
"grad_norm": 10.853449821472168, |
|
"learning_rate": 5.106060606060606e-05, |
|
"loss": 1.6519, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.481818181818182, |
|
"grad_norm": 23.87467384338379, |
|
"learning_rate": 5.098484848484849e-05, |
|
"loss": 3.9198, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.4840909090909091, |
|
"grad_norm": 15.731586456298828, |
|
"learning_rate": 5.090909090909091e-05, |
|
"loss": 2.4425, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.4863636363636363, |
|
"grad_norm": 10.91791820526123, |
|
"learning_rate": 5.0833333333333333e-05, |
|
"loss": 1.4977, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.4886363636363638, |
|
"grad_norm": 11.515501022338867, |
|
"learning_rate": 5.075757575757576e-05, |
|
"loss": 1.4377, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.490909090909091, |
|
"grad_norm": 9.79021167755127, |
|
"learning_rate": 5.0681818181818186e-05, |
|
"loss": 1.208, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.4931818181818182, |
|
"grad_norm": 7.424502849578857, |
|
"learning_rate": 5.060606060606061e-05, |
|
"loss": 1.368, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.4954545454545456, |
|
"grad_norm": 9.132887840270996, |
|
"learning_rate": 5.0530303030303025e-05, |
|
"loss": 1.0296, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.4977272727272728, |
|
"grad_norm": 14.063539505004883, |
|
"learning_rate": 5.045454545454545e-05, |
|
"loss": 1.9923, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 10.994144439697266, |
|
"learning_rate": 5.037878787878788e-05, |
|
"loss": 1.5963, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.5022727272727274, |
|
"grad_norm": 11.193540573120117, |
|
"learning_rate": 5.030303030303031e-05, |
|
"loss": 2.6418, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.5045454545454544, |
|
"grad_norm": 11.344916343688965, |
|
"learning_rate": 5.022727272727274e-05, |
|
"loss": 0.9847, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.5068181818181818, |
|
"grad_norm": 16.028928756713867, |
|
"learning_rate": 5.015151515151515e-05, |
|
"loss": 2.7095, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.509090909090909, |
|
"grad_norm": 10.2492036819458, |
|
"learning_rate": 5.0075757575757576e-05, |
|
"loss": 1.4351, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.5113636363636362, |
|
"grad_norm": 12.819211959838867, |
|
"learning_rate": 5e-05, |
|
"loss": 2.2236, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.5136363636363637, |
|
"grad_norm": 9.43850326538086, |
|
"learning_rate": 4.992424242424243e-05, |
|
"loss": 0.988, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.5159090909090909, |
|
"grad_norm": 12.35922622680664, |
|
"learning_rate": 4.984848484848485e-05, |
|
"loss": 1.9395, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.518181818181818, |
|
"grad_norm": 12.175325393676758, |
|
"learning_rate": 4.9772727272727275e-05, |
|
"loss": 2.0219, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.5204545454545455, |
|
"grad_norm": 16.44111442565918, |
|
"learning_rate": 4.9696969696969694e-05, |
|
"loss": 1.7191, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.5227272727272727, |
|
"grad_norm": 12.413610458374023, |
|
"learning_rate": 4.962121212121213e-05, |
|
"loss": 2.2003, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.525, |
|
"grad_norm": 7.922098159790039, |
|
"learning_rate": 4.9545454545454553e-05, |
|
"loss": 1.1514, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.5272727272727273, |
|
"grad_norm": 11.402259826660156, |
|
"learning_rate": 4.946969696969697e-05, |
|
"loss": 1.6611, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.5295454545454545, |
|
"grad_norm": 10.548962593078613, |
|
"learning_rate": 4.93939393939394e-05, |
|
"loss": 1.6242, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.5318181818181817, |
|
"grad_norm": 14.536432266235352, |
|
"learning_rate": 4.931818181818182e-05, |
|
"loss": 2.2415, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.5340909090909092, |
|
"grad_norm": 12.954751014709473, |
|
"learning_rate": 4.9242424242424245e-05, |
|
"loss": 1.8463, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.5363636363636364, |
|
"grad_norm": 12.143820762634277, |
|
"learning_rate": 4.9166666666666665e-05, |
|
"loss": 1.97, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.5386363636363636, |
|
"grad_norm": 10.134570121765137, |
|
"learning_rate": 4.909090909090909e-05, |
|
"loss": 0.9264, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.540909090909091, |
|
"grad_norm": 12.558758735656738, |
|
"learning_rate": 4.901515151515152e-05, |
|
"loss": 1.4608, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.5431818181818182, |
|
"grad_norm": 10.165045738220215, |
|
"learning_rate": 4.8939393939393944e-05, |
|
"loss": 1.3453, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.5454545454545454, |
|
"grad_norm": 11.995816230773926, |
|
"learning_rate": 4.886363636363637e-05, |
|
"loss": 2.1228, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.5477272727272728, |
|
"grad_norm": 10.822747230529785, |
|
"learning_rate": 4.878787878787879e-05, |
|
"loss": 2.0378, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 16.348892211914062, |
|
"learning_rate": 4.8712121212121216e-05, |
|
"loss": 1.7209, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.5522727272727272, |
|
"grad_norm": 9.395282745361328, |
|
"learning_rate": 4.863636363636364e-05, |
|
"loss": 1.4529, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.5545454545454547, |
|
"grad_norm": 16.89964485168457, |
|
"learning_rate": 4.856060606060606e-05, |
|
"loss": 2.8833, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.5568181818181817, |
|
"grad_norm": 10.703327178955078, |
|
"learning_rate": 4.848484848484849e-05, |
|
"loss": 1.7938, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.559090909090909, |
|
"grad_norm": 19.770193099975586, |
|
"learning_rate": 4.840909090909091e-05, |
|
"loss": 1.6041, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.5613636363636365, |
|
"grad_norm": 11.777501106262207, |
|
"learning_rate": 4.8333333333333334e-05, |
|
"loss": 2.0716, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.5636363636363635, |
|
"grad_norm": 10.248165130615234, |
|
"learning_rate": 4.825757575757576e-05, |
|
"loss": 1.5853, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.565909090909091, |
|
"grad_norm": 10.732747077941895, |
|
"learning_rate": 4.8181818181818186e-05, |
|
"loss": 1.2683, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.5681818181818183, |
|
"grad_norm": 11.304749488830566, |
|
"learning_rate": 4.810606060606061e-05, |
|
"loss": 2.2432, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.5704545454545453, |
|
"grad_norm": 13.820841789245605, |
|
"learning_rate": 4.803030303030303e-05, |
|
"loss": 1.8117, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.5727272727272728, |
|
"grad_norm": 9.33556079864502, |
|
"learning_rate": 4.795454545454546e-05, |
|
"loss": 1.0837, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.575, |
|
"grad_norm": 13.970429420471191, |
|
"learning_rate": 4.787878787878788e-05, |
|
"loss": 2.5927, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.5772727272727272, |
|
"grad_norm": 10.840149879455566, |
|
"learning_rate": 4.7803030303030304e-05, |
|
"loss": 1.8707, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.5795454545454546, |
|
"grad_norm": 11.14415168762207, |
|
"learning_rate": 4.772727272727273e-05, |
|
"loss": 1.6668, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.5818181818181818, |
|
"grad_norm": 14.185403823852539, |
|
"learning_rate": 4.765151515151515e-05, |
|
"loss": 1.6091, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.584090909090909, |
|
"grad_norm": 13.565306663513184, |
|
"learning_rate": 4.7575757575757576e-05, |
|
"loss": 1.8229, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.5863636363636364, |
|
"grad_norm": 14.329642295837402, |
|
"learning_rate": 4.75e-05, |
|
"loss": 1.9366, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.5886363636363636, |
|
"grad_norm": 12.332931518554688, |
|
"learning_rate": 4.742424242424243e-05, |
|
"loss": 1.683, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 10.493454933166504, |
|
"learning_rate": 4.7348484848484855e-05, |
|
"loss": 1.8994, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.5931818181818183, |
|
"grad_norm": 11.809647560119629, |
|
"learning_rate": 4.7272727272727275e-05, |
|
"loss": 1.509, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.5954545454545455, |
|
"grad_norm": 12.72128963470459, |
|
"learning_rate": 4.71969696969697e-05, |
|
"loss": 2.1266, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.5977272727272727, |
|
"grad_norm": 13.074295043945312, |
|
"learning_rate": 4.712121212121212e-05, |
|
"loss": 1.6113, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 10.254904747009277, |
|
"learning_rate": 4.704545454545455e-05, |
|
"loss": 2.2737, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.6022727272727273, |
|
"grad_norm": 24.574390411376953, |
|
"learning_rate": 4.696969696969697e-05, |
|
"loss": 2.2779, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.6045454545454545, |
|
"grad_norm": 10.441598892211914, |
|
"learning_rate": 4.689393939393939e-05, |
|
"loss": 1.8209, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.606818181818182, |
|
"grad_norm": 12.4207763671875, |
|
"learning_rate": 4.681818181818182e-05, |
|
"loss": 1.5389, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.6090909090909091, |
|
"grad_norm": 15.072708129882812, |
|
"learning_rate": 4.6742424242424245e-05, |
|
"loss": 1.3703, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.6113636363636363, |
|
"grad_norm": 11.555070877075195, |
|
"learning_rate": 4.666666666666667e-05, |
|
"loss": 1.9363, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.6136363636363638, |
|
"grad_norm": 13.27509593963623, |
|
"learning_rate": 4.659090909090909e-05, |
|
"loss": 1.4334, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.615909090909091, |
|
"grad_norm": 12.357429504394531, |
|
"learning_rate": 4.651515151515152e-05, |
|
"loss": 2.3112, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.6181818181818182, |
|
"grad_norm": 19.84957504272461, |
|
"learning_rate": 4.6439393939393944e-05, |
|
"loss": 1.1851, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.6204545454545456, |
|
"grad_norm": 10.689920425415039, |
|
"learning_rate": 4.636363636363636e-05, |
|
"loss": 1.921, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.6227272727272726, |
|
"grad_norm": 10.688066482543945, |
|
"learning_rate": 4.628787878787879e-05, |
|
"loss": 1.2294, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 11.80333423614502, |
|
"learning_rate": 4.621212121212121e-05, |
|
"loss": 2.5255, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.6272727272727274, |
|
"grad_norm": 11.181013107299805, |
|
"learning_rate": 4.6136363636363635e-05, |
|
"loss": 1.2692, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.6295454545454544, |
|
"grad_norm": 11.557047843933105, |
|
"learning_rate": 4.606060606060607e-05, |
|
"loss": 1.4575, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.6318181818181818, |
|
"grad_norm": 13.798693656921387, |
|
"learning_rate": 4.598484848484849e-05, |
|
"loss": 2.3197, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.634090909090909, |
|
"grad_norm": 8.890710830688477, |
|
"learning_rate": 4.5909090909090914e-05, |
|
"loss": 1.5266, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.6363636363636362, |
|
"grad_norm": 10.293892860412598, |
|
"learning_rate": 4.5833333333333334e-05, |
|
"loss": 1.9222, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.6386363636363637, |
|
"grad_norm": 12.959512710571289, |
|
"learning_rate": 4.575757575757576e-05, |
|
"loss": 1.5771, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.6409090909090909, |
|
"grad_norm": 11.565927505493164, |
|
"learning_rate": 4.5681818181818186e-05, |
|
"loss": 1.5313, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.643181818181818, |
|
"grad_norm": 9.419241905212402, |
|
"learning_rate": 4.5606060606060606e-05, |
|
"loss": 1.4229, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.6454545454545455, |
|
"grad_norm": 15.411003112792969, |
|
"learning_rate": 4.553030303030303e-05, |
|
"loss": 1.8707, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.6477272727272727, |
|
"grad_norm": 7.6546711921691895, |
|
"learning_rate": 4.545454545454546e-05, |
|
"loss": 0.742, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"grad_norm": 13.029730796813965, |
|
"learning_rate": 4.5378787878787885e-05, |
|
"loss": 1.5179, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.6522727272727273, |
|
"grad_norm": 12.853962898254395, |
|
"learning_rate": 4.5303030303030304e-05, |
|
"loss": 1.8908, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.6545454545454545, |
|
"grad_norm": 12.864992141723633, |
|
"learning_rate": 4.522727272727273e-05, |
|
"loss": 1.7175, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.6568181818181817, |
|
"grad_norm": 13.25144100189209, |
|
"learning_rate": 4.515151515151516e-05, |
|
"loss": 1.7681, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.6590909090909092, |
|
"grad_norm": 9.894201278686523, |
|
"learning_rate": 4.5075757575757577e-05, |
|
"loss": 1.5505, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.6613636363636364, |
|
"grad_norm": 16.501630783081055, |
|
"learning_rate": 4.5e-05, |
|
"loss": 1.4968, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.6636363636363636, |
|
"grad_norm": 10.3342924118042, |
|
"learning_rate": 4.492424242424242e-05, |
|
"loss": 1.4734, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.665909090909091, |
|
"grad_norm": 11.081184387207031, |
|
"learning_rate": 4.484848484848485e-05, |
|
"loss": 2.6513, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.6681818181818182, |
|
"grad_norm": 17.005704879760742, |
|
"learning_rate": 4.4772727272727275e-05, |
|
"loss": 2.4109, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.6704545454545454, |
|
"grad_norm": 11.718207359313965, |
|
"learning_rate": 4.46969696969697e-05, |
|
"loss": 1.6445, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.6727272727272728, |
|
"grad_norm": 12.14245319366455, |
|
"learning_rate": 4.462121212121213e-05, |
|
"loss": 2.335, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.675, |
|
"grad_norm": 10.971789360046387, |
|
"learning_rate": 4.454545454545455e-05, |
|
"loss": 1.6266, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.6772727272727272, |
|
"grad_norm": 17.435321807861328, |
|
"learning_rate": 4.4469696969696973e-05, |
|
"loss": 2.1164, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.6795454545454547, |
|
"grad_norm": 10.45814323425293, |
|
"learning_rate": 4.43939393939394e-05, |
|
"loss": 1.3992, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.6818181818181817, |
|
"grad_norm": 12.788302421569824, |
|
"learning_rate": 4.431818181818182e-05, |
|
"loss": 2.4001, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.684090909090909, |
|
"grad_norm": 14.425982475280762, |
|
"learning_rate": 4.4242424242424246e-05, |
|
"loss": 2.163, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.6863636363636365, |
|
"grad_norm": 9.09310531616211, |
|
"learning_rate": 4.4166666666666665e-05, |
|
"loss": 1.4595, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.6886363636363635, |
|
"grad_norm": 11.336987495422363, |
|
"learning_rate": 4.409090909090909e-05, |
|
"loss": 2.6262, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.690909090909091, |
|
"grad_norm": 11.697134017944336, |
|
"learning_rate": 4.401515151515152e-05, |
|
"loss": 1.3628, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.6931818181818183, |
|
"grad_norm": 8.620695114135742, |
|
"learning_rate": 4.3939393939393944e-05, |
|
"loss": 1.2893, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.6954545454545453, |
|
"grad_norm": 9.322046279907227, |
|
"learning_rate": 4.386363636363637e-05, |
|
"loss": 1.9579, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.6977272727272728, |
|
"grad_norm": 11.273119926452637, |
|
"learning_rate": 4.378787878787879e-05, |
|
"loss": 2.2207, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"grad_norm": 11.111379623413086, |
|
"learning_rate": 4.3712121212121216e-05, |
|
"loss": 1.4021, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.7022727272727272, |
|
"grad_norm": 11.808859825134277, |
|
"learning_rate": 4.3636363636363636e-05, |
|
"loss": 1.4873, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"grad_norm": 14.41899585723877, |
|
"learning_rate": 4.356060606060606e-05, |
|
"loss": 1.9247, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.7068181818181818, |
|
"grad_norm": 9.383740425109863, |
|
"learning_rate": 4.348484848484849e-05, |
|
"loss": 1.6231, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.709090909090909, |
|
"grad_norm": 9.926271438598633, |
|
"learning_rate": 4.340909090909091e-05, |
|
"loss": 2.2661, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.7113636363636364, |
|
"grad_norm": 12.015188217163086, |
|
"learning_rate": 4.3333333333333334e-05, |
|
"loss": 1.4877, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.7136363636363636, |
|
"grad_norm": 12.057700157165527, |
|
"learning_rate": 4.325757575757576e-05, |
|
"loss": 1.6091, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.7159090909090908, |
|
"grad_norm": 8.392674446105957, |
|
"learning_rate": 4.318181818181819e-05, |
|
"loss": 1.4652, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.7181818181818183, |
|
"grad_norm": 7.7269287109375, |
|
"learning_rate": 4.3106060606060606e-05, |
|
"loss": 1.1991, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.7204545454545455, |
|
"grad_norm": 13.280454635620117, |
|
"learning_rate": 4.303030303030303e-05, |
|
"loss": 1.9597, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.7227272727272727, |
|
"grad_norm": 11.144329071044922, |
|
"learning_rate": 4.295454545454546e-05, |
|
"loss": 1.6052, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.725, |
|
"grad_norm": 12.23388385772705, |
|
"learning_rate": 4.287878787878788e-05, |
|
"loss": 1.5491, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.7272727272727273, |
|
"grad_norm": 11.918728828430176, |
|
"learning_rate": 4.2803030303030305e-05, |
|
"loss": 2.0586, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.7295454545454545, |
|
"grad_norm": 7.68416166305542, |
|
"learning_rate": 4.2727272727272724e-05, |
|
"loss": 1.0501, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.731818181818182, |
|
"grad_norm": 16.64651870727539, |
|
"learning_rate": 4.265151515151515e-05, |
|
"loss": 1.9819, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.7340909090909091, |
|
"grad_norm": 14.889754295349121, |
|
"learning_rate": 4.257575757575758e-05, |
|
"loss": 2.5418, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.7363636363636363, |
|
"grad_norm": 13.508451461791992, |
|
"learning_rate": 4.25e-05, |
|
"loss": 1.5028, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.7386363636363638, |
|
"grad_norm": 9.541330337524414, |
|
"learning_rate": 4.242424242424243e-05, |
|
"loss": 1.0183, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.740909090909091, |
|
"grad_norm": 13.14413833618164, |
|
"learning_rate": 4.234848484848485e-05, |
|
"loss": 2.0542, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.7431818181818182, |
|
"grad_norm": 12.490581512451172, |
|
"learning_rate": 4.2272727272727275e-05, |
|
"loss": 1.5971, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.7454545454545456, |
|
"grad_norm": 14.117782592773438, |
|
"learning_rate": 4.21969696969697e-05, |
|
"loss": 3.0207, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.7477272727272726, |
|
"grad_norm": 12.968109130859375, |
|
"learning_rate": 4.212121212121212e-05, |
|
"loss": 1.9058, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 10.889745712280273, |
|
"learning_rate": 4.204545454545455e-05, |
|
"loss": 1.535, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.7522727272727274, |
|
"grad_norm": 11.901477813720703, |
|
"learning_rate": 4.196969696969697e-05, |
|
"loss": 1.3743, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.7545454545454544, |
|
"grad_norm": 11.466394424438477, |
|
"learning_rate": 4.189393939393939e-05, |
|
"loss": 2.1364, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.7568181818181818, |
|
"grad_norm": 9.973612785339355, |
|
"learning_rate": 4.181818181818182e-05, |
|
"loss": 1.7472, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.759090909090909, |
|
"grad_norm": 11.81697940826416, |
|
"learning_rate": 4.1742424242424246e-05, |
|
"loss": 1.6475, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.7613636363636362, |
|
"grad_norm": 10.81869125366211, |
|
"learning_rate": 4.166666666666667e-05, |
|
"loss": 2.433, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.7636363636363637, |
|
"grad_norm": 15.867783546447754, |
|
"learning_rate": 4.159090909090909e-05, |
|
"loss": 3.0407, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.7659090909090909, |
|
"grad_norm": 12.047411918640137, |
|
"learning_rate": 4.151515151515152e-05, |
|
"loss": 1.7651, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.768181818181818, |
|
"grad_norm": 11.829177856445312, |
|
"learning_rate": 4.143939393939394e-05, |
|
"loss": 1.5285, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.7704545454545455, |
|
"grad_norm": 13.831562995910645, |
|
"learning_rate": 4.1363636363636364e-05, |
|
"loss": 2.6372, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.7727272727272727, |
|
"grad_norm": 10.6288480758667, |
|
"learning_rate": 4.128787878787879e-05, |
|
"loss": 1.8006, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.775, |
|
"grad_norm": 12.919150352478027, |
|
"learning_rate": 4.1212121212121216e-05, |
|
"loss": 1.8753, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.7772727272727273, |
|
"grad_norm": 14.138745307922363, |
|
"learning_rate": 4.113636363636364e-05, |
|
"loss": 2.1089, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.7795454545454545, |
|
"grad_norm": 8.130454063415527, |
|
"learning_rate": 4.106060606060606e-05, |
|
"loss": 0.9243, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.7818181818181817, |
|
"grad_norm": 13.32907485961914, |
|
"learning_rate": 4.098484848484849e-05, |
|
"loss": 2.599, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.7840909090909092, |
|
"grad_norm": 9.957046508789062, |
|
"learning_rate": 4.0909090909090915e-05, |
|
"loss": 1.1874, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.7863636363636364, |
|
"grad_norm": 10.413941383361816, |
|
"learning_rate": 4.0833333333333334e-05, |
|
"loss": 1.2206, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.7886363636363636, |
|
"grad_norm": 12.38062858581543, |
|
"learning_rate": 4.075757575757576e-05, |
|
"loss": 1.5484, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.790909090909091, |
|
"grad_norm": 10.63827896118164, |
|
"learning_rate": 4.068181818181818e-05, |
|
"loss": 1.4851, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.7931818181818182, |
|
"grad_norm": 10.755563735961914, |
|
"learning_rate": 4.0606060606060606e-05, |
|
"loss": 2.0725, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.7954545454545454, |
|
"grad_norm": 10.352532386779785, |
|
"learning_rate": 4.053030303030303e-05, |
|
"loss": 1.6825, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.7977272727272728, |
|
"grad_norm": 10.303858757019043, |
|
"learning_rate": 4.045454545454546e-05, |
|
"loss": 1.6771, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 12.914578437805176, |
|
"learning_rate": 4.0378787878787885e-05, |
|
"loss": 2.0149, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.8022727272727272, |
|
"grad_norm": 9.389689445495605, |
|
"learning_rate": 4.0303030303030305e-05, |
|
"loss": 1.9987, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.8045454545454547, |
|
"grad_norm": 13.615360260009766, |
|
"learning_rate": 4.022727272727273e-05, |
|
"loss": 1.7871, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.8068181818181817, |
|
"grad_norm": 12.188302040100098, |
|
"learning_rate": 4.015151515151515e-05, |
|
"loss": 2.1458, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.809090909090909, |
|
"grad_norm": 23.321977615356445, |
|
"learning_rate": 4.007575757575758e-05, |
|
"loss": 1.5815, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.8113636363636365, |
|
"grad_norm": 13.12856674194336, |
|
"learning_rate": 4e-05, |
|
"loss": 1.9065, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.8136363636363635, |
|
"grad_norm": 8.955425262451172, |
|
"learning_rate": 3.992424242424242e-05, |
|
"loss": 1.4415, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.815909090909091, |
|
"grad_norm": 14.052294731140137, |
|
"learning_rate": 3.984848484848485e-05, |
|
"loss": 2.6913, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 8.688261032104492, |
|
"learning_rate": 3.9772727272727275e-05, |
|
"loss": 1.6981, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.8204545454545453, |
|
"grad_norm": 13.951496124267578, |
|
"learning_rate": 3.96969696969697e-05, |
|
"loss": 1.5787, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.8227272727272728, |
|
"grad_norm": 10.023541450500488, |
|
"learning_rate": 3.962121212121213e-05, |
|
"loss": 1.9886, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.825, |
|
"grad_norm": 8.397741317749023, |
|
"learning_rate": 3.954545454545455e-05, |
|
"loss": 1.7193, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.8272727272727272, |
|
"grad_norm": 10.017319679260254, |
|
"learning_rate": 3.9469696969696974e-05, |
|
"loss": 1.7097, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.8295454545454546, |
|
"grad_norm": 13.632206916809082, |
|
"learning_rate": 3.939393939393939e-05, |
|
"loss": 2.1469, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.8318181818181818, |
|
"grad_norm": 19.315832138061523, |
|
"learning_rate": 3.931818181818182e-05, |
|
"loss": 2.2873, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.834090909090909, |
|
"grad_norm": 11.273087501525879, |
|
"learning_rate": 3.924242424242424e-05, |
|
"loss": 1.352, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 1.8363636363636364, |
|
"grad_norm": 12.127049446105957, |
|
"learning_rate": 3.9166666666666665e-05, |
|
"loss": 1.8422, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 1.8386363636363636, |
|
"grad_norm": 9.968843460083008, |
|
"learning_rate": 3.909090909090909e-05, |
|
"loss": 1.2724, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 1.8409090909090908, |
|
"grad_norm": 13.883306503295898, |
|
"learning_rate": 3.901515151515152e-05, |
|
"loss": 2.6822, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.8431818181818183, |
|
"grad_norm": 10.443497657775879, |
|
"learning_rate": 3.8939393939393944e-05, |
|
"loss": 1.2037, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 1.8454545454545455, |
|
"grad_norm": 10.290310859680176, |
|
"learning_rate": 3.8863636363636364e-05, |
|
"loss": 1.5355, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 1.8477272727272727, |
|
"grad_norm": 9.970185279846191, |
|
"learning_rate": 3.878787878787879e-05, |
|
"loss": 1.957, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"grad_norm": 10.905329704284668, |
|
"learning_rate": 3.8712121212121217e-05, |
|
"loss": 1.8562, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 1.8522727272727273, |
|
"grad_norm": 9.466534614562988, |
|
"learning_rate": 3.8636363636363636e-05, |
|
"loss": 1.4522, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.8545454545454545, |
|
"grad_norm": 13.48620891571045, |
|
"learning_rate": 3.856060606060606e-05, |
|
"loss": 2.1203, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 1.856818181818182, |
|
"grad_norm": 12.107563018798828, |
|
"learning_rate": 3.848484848484848e-05, |
|
"loss": 1.7011, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 1.8590909090909091, |
|
"grad_norm": 10.786709785461426, |
|
"learning_rate": 3.840909090909091e-05, |
|
"loss": 1.7418, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 1.8613636363636363, |
|
"grad_norm": 10.853336334228516, |
|
"learning_rate": 3.8333333333333334e-05, |
|
"loss": 1.4229, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 1.8636363636363638, |
|
"grad_norm": 11.42320442199707, |
|
"learning_rate": 3.825757575757576e-05, |
|
"loss": 1.6411, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.865909090909091, |
|
"grad_norm": 9.623292922973633, |
|
"learning_rate": 3.818181818181819e-05, |
|
"loss": 2.2372, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 1.8681818181818182, |
|
"grad_norm": 19.681766510009766, |
|
"learning_rate": 3.810606060606061e-05, |
|
"loss": 1.7814, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 1.8704545454545456, |
|
"grad_norm": 11.759204864501953, |
|
"learning_rate": 3.803030303030303e-05, |
|
"loss": 1.4783, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 1.8727272727272726, |
|
"grad_norm": 11.130982398986816, |
|
"learning_rate": 3.795454545454545e-05, |
|
"loss": 1.3937, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 10.193344116210938, |
|
"learning_rate": 3.787878787878788e-05, |
|
"loss": 1.3912, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.8772727272727274, |
|
"grad_norm": 8.412622451782227, |
|
"learning_rate": 3.7803030303030305e-05, |
|
"loss": 1.3978, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 1.8795454545454544, |
|
"grad_norm": 12.766166687011719, |
|
"learning_rate": 3.7727272727272725e-05, |
|
"loss": 1.9356, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 1.8818181818181818, |
|
"grad_norm": 11.161136627197266, |
|
"learning_rate": 3.765151515151516e-05, |
|
"loss": 1.8318, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 1.884090909090909, |
|
"grad_norm": 11.214709281921387, |
|
"learning_rate": 3.757575757575758e-05, |
|
"loss": 1.4253, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 1.8863636363636362, |
|
"grad_norm": 12.173728942871094, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 1.3093, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.8886363636363637, |
|
"grad_norm": 12.564881324768066, |
|
"learning_rate": 3.742424242424243e-05, |
|
"loss": 2.0086, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 1.8909090909090909, |
|
"grad_norm": 10.378774642944336, |
|
"learning_rate": 3.734848484848485e-05, |
|
"loss": 2.2117, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 1.893181818181818, |
|
"grad_norm": 13.659943580627441, |
|
"learning_rate": 3.7272727272727276e-05, |
|
"loss": 1.8717, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 1.8954545454545455, |
|
"grad_norm": 10.889350891113281, |
|
"learning_rate": 3.7196969696969695e-05, |
|
"loss": 2.524, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 1.8977272727272727, |
|
"grad_norm": 20.47830581665039, |
|
"learning_rate": 3.712121212121212e-05, |
|
"loss": 1.5575, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 8.377565383911133, |
|
"learning_rate": 3.704545454545455e-05, |
|
"loss": 1.4985, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 1.9022727272727273, |
|
"grad_norm": 14.420267105102539, |
|
"learning_rate": 3.6969696969696974e-05, |
|
"loss": 2.0562, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 1.9045454545454545, |
|
"grad_norm": 11.469067573547363, |
|
"learning_rate": 3.68939393939394e-05, |
|
"loss": 1.9261, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 1.9068181818181817, |
|
"grad_norm": 14.95913314819336, |
|
"learning_rate": 3.681818181818182e-05, |
|
"loss": 1.4905, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 1.9090909090909092, |
|
"grad_norm": 12.481145858764648, |
|
"learning_rate": 3.6742424242424246e-05, |
|
"loss": 1.3664, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.9113636363636364, |
|
"grad_norm": 11.715337753295898, |
|
"learning_rate": 3.6666666666666666e-05, |
|
"loss": 2.0561, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 1.9136363636363636, |
|
"grad_norm": 12.499181747436523, |
|
"learning_rate": 3.659090909090909e-05, |
|
"loss": 1.62, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 1.915909090909091, |
|
"grad_norm": 7.448797225952148, |
|
"learning_rate": 3.651515151515152e-05, |
|
"loss": 0.979, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 1.9181818181818182, |
|
"grad_norm": 11.219677925109863, |
|
"learning_rate": 3.643939393939394e-05, |
|
"loss": 1.8378, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 1.9204545454545454, |
|
"grad_norm": 11.738428115844727, |
|
"learning_rate": 3.6363636363636364e-05, |
|
"loss": 2.1477, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.9227272727272728, |
|
"grad_norm": 13.800374031066895, |
|
"learning_rate": 3.628787878787879e-05, |
|
"loss": 2.3644, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 1.925, |
|
"grad_norm": 11.240313529968262, |
|
"learning_rate": 3.621212121212122e-05, |
|
"loss": 1.6775, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 1.9272727272727272, |
|
"grad_norm": 13.477606773376465, |
|
"learning_rate": 3.613636363636364e-05, |
|
"loss": 1.3438, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 1.9295454545454547, |
|
"grad_norm": 12.788423538208008, |
|
"learning_rate": 3.606060606060606e-05, |
|
"loss": 1.7158, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 1.9318181818181817, |
|
"grad_norm": 8.893767356872559, |
|
"learning_rate": 3.598484848484849e-05, |
|
"loss": 1.4747, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.934090909090909, |
|
"grad_norm": 12.053075790405273, |
|
"learning_rate": 3.590909090909091e-05, |
|
"loss": 1.0121, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 1.9363636363636365, |
|
"grad_norm": 12.093589782714844, |
|
"learning_rate": 3.5833333333333335e-05, |
|
"loss": 2.1991, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 1.9386363636363635, |
|
"grad_norm": 9.356278419494629, |
|
"learning_rate": 3.575757575757576e-05, |
|
"loss": 1.4497, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 1.940909090909091, |
|
"grad_norm": 12.686812400817871, |
|
"learning_rate": 3.568181818181818e-05, |
|
"loss": 1.5038, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 1.9431818181818183, |
|
"grad_norm": 13.139368057250977, |
|
"learning_rate": 3.560606060606061e-05, |
|
"loss": 2.9399, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.9454545454545453, |
|
"grad_norm": 11.385064125061035, |
|
"learning_rate": 3.553030303030303e-05, |
|
"loss": 1.4202, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 1.9477272727272728, |
|
"grad_norm": 9.905313491821289, |
|
"learning_rate": 3.545454545454546e-05, |
|
"loss": 2.5033, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 9.99422836303711, |
|
"learning_rate": 3.537878787878788e-05, |
|
"loss": 1.631, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 1.9522727272727272, |
|
"grad_norm": 12.235610961914062, |
|
"learning_rate": 3.5303030303030305e-05, |
|
"loss": 1.7517, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 1.9545454545454546, |
|
"grad_norm": 13.225701332092285, |
|
"learning_rate": 3.522727272727273e-05, |
|
"loss": 1.545, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.9568181818181818, |
|
"grad_norm": 13.755146980285645, |
|
"learning_rate": 3.515151515151515e-05, |
|
"loss": 1.6548, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 1.959090909090909, |
|
"grad_norm": 14.235300064086914, |
|
"learning_rate": 3.507575757575758e-05, |
|
"loss": 2.2791, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 1.9613636363636364, |
|
"grad_norm": 12.734109878540039, |
|
"learning_rate": 3.5e-05, |
|
"loss": 1.4257, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 1.9636363636363636, |
|
"grad_norm": 12.51075267791748, |
|
"learning_rate": 3.492424242424242e-05, |
|
"loss": 2.1328, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 1.9659090909090908, |
|
"grad_norm": 12.090396881103516, |
|
"learning_rate": 3.484848484848485e-05, |
|
"loss": 2.4949, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.9681818181818183, |
|
"grad_norm": 9.898470878601074, |
|
"learning_rate": 3.4772727272727276e-05, |
|
"loss": 1.0122, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 1.9704545454545455, |
|
"grad_norm": 12.299036979675293, |
|
"learning_rate": 3.46969696969697e-05, |
|
"loss": 1.1734, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 1.9727272727272727, |
|
"grad_norm": 10.930243492126465, |
|
"learning_rate": 3.462121212121212e-05, |
|
"loss": 1.8219, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 1.975, |
|
"grad_norm": 11.0517578125, |
|
"learning_rate": 3.454545454545455e-05, |
|
"loss": 1.5023, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 1.9772727272727273, |
|
"grad_norm": 11.98909854888916, |
|
"learning_rate": 3.4469696969696974e-05, |
|
"loss": 1.298, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.9795454545454545, |
|
"grad_norm": 12.753129959106445, |
|
"learning_rate": 3.4393939393939394e-05, |
|
"loss": 1.7147, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 1.981818181818182, |
|
"grad_norm": 71.2451171875, |
|
"learning_rate": 3.431818181818182e-05, |
|
"loss": 1.3867, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 1.9840909090909091, |
|
"grad_norm": 9.198206901550293, |
|
"learning_rate": 3.424242424242424e-05, |
|
"loss": 1.2175, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 1.9863636363636363, |
|
"grad_norm": 10.864444732666016, |
|
"learning_rate": 3.4166666666666666e-05, |
|
"loss": 2.4479, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 1.9886363636363638, |
|
"grad_norm": 12.929604530334473, |
|
"learning_rate": 3.409090909090909e-05, |
|
"loss": 2.3538, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.990909090909091, |
|
"grad_norm": 15.190954208374023, |
|
"learning_rate": 3.401515151515152e-05, |
|
"loss": 2.7314, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 1.9931818181818182, |
|
"grad_norm": 12.220293045043945, |
|
"learning_rate": 3.3939393939393945e-05, |
|
"loss": 1.8087, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 1.9954545454545456, |
|
"grad_norm": 13.717775344848633, |
|
"learning_rate": 3.3863636363636364e-05, |
|
"loss": 2.2791, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 1.9977272727272726, |
|
"grad_norm": 13.53941822052002, |
|
"learning_rate": 3.378787878787879e-05, |
|
"loss": 1.9205, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 10.206825256347656, |
|
"learning_rate": 3.371212121212121e-05, |
|
"loss": 1.2968, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_f1": 0.8929, |
|
"eval_gen_len": 41.9091, |
|
"eval_loss": 1.7823115587234497, |
|
"eval_precision": 0.8925, |
|
"eval_recall": 0.8935, |
|
"eval_rouge1": 0.447, |
|
"eval_rouge2": 0.2102, |
|
"eval_rougeL": 0.3795, |
|
"eval_rougeLsum": 0.4136, |
|
"eval_runtime": 29.0339, |
|
"eval_samples_per_second": 3.789, |
|
"eval_steps_per_second": 0.482, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.0022727272727274, |
|
"grad_norm": 9.781706809997559, |
|
"learning_rate": 3.3636363636363636e-05, |
|
"loss": 1.0468, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.0045454545454544, |
|
"grad_norm": 8.61344051361084, |
|
"learning_rate": 3.356060606060606e-05, |
|
"loss": 1.7286, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.006818181818182, |
|
"grad_norm": 11.291481971740723, |
|
"learning_rate": 3.348484848484848e-05, |
|
"loss": 1.1274, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.0090909090909093, |
|
"grad_norm": 11.33132553100586, |
|
"learning_rate": 3.3409090909090915e-05, |
|
"loss": 1.4992, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.0113636363636362, |
|
"grad_norm": 10.342754364013672, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 1.7733, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.0136363636363637, |
|
"grad_norm": 9.18486499786377, |
|
"learning_rate": 3.325757575757576e-05, |
|
"loss": 1.7391, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.015909090909091, |
|
"grad_norm": 35.923648834228516, |
|
"learning_rate": 3.318181818181819e-05, |
|
"loss": 1.8191, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.018181818181818, |
|
"grad_norm": 10.737150192260742, |
|
"learning_rate": 3.310606060606061e-05, |
|
"loss": 1.1656, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.0204545454545455, |
|
"grad_norm": 7.691224098205566, |
|
"learning_rate": 3.303030303030303e-05, |
|
"loss": 1.1787, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.022727272727273, |
|
"grad_norm": 14.402198791503906, |
|
"learning_rate": 3.295454545454545e-05, |
|
"loss": 2.1618, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.025, |
|
"grad_norm": 9.567869186401367, |
|
"learning_rate": 3.287878787878788e-05, |
|
"loss": 1.4921, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.0272727272727273, |
|
"grad_norm": 12.46391487121582, |
|
"learning_rate": 3.2803030303030305e-05, |
|
"loss": 2.0986, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.0295454545454548, |
|
"grad_norm": 12.333531379699707, |
|
"learning_rate": 3.272727272727273e-05, |
|
"loss": 1.5944, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.0318181818181817, |
|
"grad_norm": 12.140853881835938, |
|
"learning_rate": 3.265151515151516e-05, |
|
"loss": 1.7773, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.034090909090909, |
|
"grad_norm": 9.412683486938477, |
|
"learning_rate": 3.257575757575758e-05, |
|
"loss": 1.2663, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.036363636363636, |
|
"grad_norm": 10.711098670959473, |
|
"learning_rate": 3.2500000000000004e-05, |
|
"loss": 1.6462, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.0386363636363636, |
|
"grad_norm": 11.64570426940918, |
|
"learning_rate": 3.2424242424242423e-05, |
|
"loss": 1.8232, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.040909090909091, |
|
"grad_norm": 12.753011703491211, |
|
"learning_rate": 3.234848484848485e-05, |
|
"loss": 1.9761, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.043181818181818, |
|
"grad_norm": 15.42159366607666, |
|
"learning_rate": 3.2272727272727276e-05, |
|
"loss": 1.5225, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 13.561200141906738, |
|
"learning_rate": 3.2196969696969696e-05, |
|
"loss": 2.2342, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.047727272727273, |
|
"grad_norm": 11.59468936920166, |
|
"learning_rate": 3.212121212121212e-05, |
|
"loss": 1.3996, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 12.330318450927734, |
|
"learning_rate": 3.204545454545455e-05, |
|
"loss": 2.3926, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.0522727272727272, |
|
"grad_norm": 15.305580139160156, |
|
"learning_rate": 3.1969696969696974e-05, |
|
"loss": 2.5056, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.0545454545454547, |
|
"grad_norm": 12.250936508178711, |
|
"learning_rate": 3.18939393939394e-05, |
|
"loss": 2.2595, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.0568181818181817, |
|
"grad_norm": 9.258564949035645, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 1.0952, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.059090909090909, |
|
"grad_norm": 10.1191987991333, |
|
"learning_rate": 3.174242424242425e-05, |
|
"loss": 2.2179, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.0613636363636365, |
|
"grad_norm": 12.793285369873047, |
|
"learning_rate": 3.1666666666666666e-05, |
|
"loss": 1.7858, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.0636363636363635, |
|
"grad_norm": 10.188157081604004, |
|
"learning_rate": 3.159090909090909e-05, |
|
"loss": 1.3631, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.065909090909091, |
|
"grad_norm": 13.256832122802734, |
|
"learning_rate": 3.151515151515151e-05, |
|
"loss": 2.2464, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.0681818181818183, |
|
"grad_norm": 10.160938262939453, |
|
"learning_rate": 3.143939393939394e-05, |
|
"loss": 1.5204, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.0704545454545453, |
|
"grad_norm": 10.945446014404297, |
|
"learning_rate": 3.1363636363636365e-05, |
|
"loss": 1.6125, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.0727272727272728, |
|
"grad_norm": 10.19439697265625, |
|
"learning_rate": 3.128787878787879e-05, |
|
"loss": 1.5317, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.075, |
|
"grad_norm": 9.242986679077148, |
|
"learning_rate": 3.121212121212122e-05, |
|
"loss": 1.7993, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.077272727272727, |
|
"grad_norm": 9.43307113647461, |
|
"learning_rate": 3.113636363636364e-05, |
|
"loss": 1.4297, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.0795454545454546, |
|
"grad_norm": 9.292837142944336, |
|
"learning_rate": 3.106060606060606e-05, |
|
"loss": 1.1428, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.081818181818182, |
|
"grad_norm": 10.290895462036133, |
|
"learning_rate": 3.098484848484849e-05, |
|
"loss": 1.3587, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.084090909090909, |
|
"grad_norm": 12.890341758728027, |
|
"learning_rate": 3.090909090909091e-05, |
|
"loss": 1.5721, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.0863636363636364, |
|
"grad_norm": 9.548102378845215, |
|
"learning_rate": 3.0833333333333335e-05, |
|
"loss": 1.5717, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.088636363636364, |
|
"grad_norm": 11.2235689163208, |
|
"learning_rate": 3.0757575757575755e-05, |
|
"loss": 1.818, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.090909090909091, |
|
"grad_norm": 14.528667449951172, |
|
"learning_rate": 3.068181818181818e-05, |
|
"loss": 1.6878, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.0931818181818183, |
|
"grad_norm": 13.295345306396484, |
|
"learning_rate": 3.060606060606061e-05, |
|
"loss": 1.8521, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.0954545454545457, |
|
"grad_norm": 13.902974128723145, |
|
"learning_rate": 3.0530303030303034e-05, |
|
"loss": 1.7186, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.0977272727272727, |
|
"grad_norm": 8.313849449157715, |
|
"learning_rate": 3.0454545454545456e-05, |
|
"loss": 0.8988, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 11.491289138793945, |
|
"learning_rate": 3.037878787878788e-05, |
|
"loss": 1.1394, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.102272727272727, |
|
"grad_norm": 13.124963760375977, |
|
"learning_rate": 3.0303030303030306e-05, |
|
"loss": 1.7424, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.1045454545454545, |
|
"grad_norm": 8.5538911819458, |
|
"learning_rate": 3.0227272727272725e-05, |
|
"loss": 1.3577, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.106818181818182, |
|
"grad_norm": 12.04502010345459, |
|
"learning_rate": 3.015151515151515e-05, |
|
"loss": 1.2389, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.109090909090909, |
|
"grad_norm": 8.608831405639648, |
|
"learning_rate": 3.0075757575757578e-05, |
|
"loss": 1.1577, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.1113636363636363, |
|
"grad_norm": 14.802834510803223, |
|
"learning_rate": 3e-05, |
|
"loss": 1.8636, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.1136363636363638, |
|
"grad_norm": 9.014802932739258, |
|
"learning_rate": 2.9924242424242427e-05, |
|
"loss": 0.7823, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.1159090909090907, |
|
"grad_norm": 10.007800102233887, |
|
"learning_rate": 2.9848484848484847e-05, |
|
"loss": 1.7205, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.118181818181818, |
|
"grad_norm": 16.067474365234375, |
|
"learning_rate": 2.9772727272727273e-05, |
|
"loss": 2.443, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.1204545454545456, |
|
"grad_norm": 12.624736785888672, |
|
"learning_rate": 2.96969696969697e-05, |
|
"loss": 1.5536, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.1227272727272726, |
|
"grad_norm": 10.400491714477539, |
|
"learning_rate": 2.9621212121212122e-05, |
|
"loss": 1.2871, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.125, |
|
"grad_norm": 11.056097984313965, |
|
"learning_rate": 2.954545454545455e-05, |
|
"loss": 1.4614, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.1272727272727274, |
|
"grad_norm": 9.163816452026367, |
|
"learning_rate": 2.9469696969696968e-05, |
|
"loss": 1.2918, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.1295454545454544, |
|
"grad_norm": 8.908564567565918, |
|
"learning_rate": 2.9393939393939394e-05, |
|
"loss": 1.2489, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.131818181818182, |
|
"grad_norm": 8.402863502502441, |
|
"learning_rate": 2.9318181818181817e-05, |
|
"loss": 1.4269, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.1340909090909093, |
|
"grad_norm": 10.939780235290527, |
|
"learning_rate": 2.9242424242424243e-05, |
|
"loss": 1.4199, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.1363636363636362, |
|
"grad_norm": 11.758381843566895, |
|
"learning_rate": 2.916666666666667e-05, |
|
"loss": 1.4597, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.1386363636363637, |
|
"grad_norm": 11.411653518676758, |
|
"learning_rate": 2.909090909090909e-05, |
|
"loss": 2.1611, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.140909090909091, |
|
"grad_norm": 11.838427543640137, |
|
"learning_rate": 2.901515151515152e-05, |
|
"loss": 1.2373, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.143181818181818, |
|
"grad_norm": 14.833626747131348, |
|
"learning_rate": 2.893939393939394e-05, |
|
"loss": 1.9202, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.1454545454545455, |
|
"grad_norm": 10.815326690673828, |
|
"learning_rate": 2.8863636363636365e-05, |
|
"loss": 1.5089, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.147727272727273, |
|
"grad_norm": 12.253664016723633, |
|
"learning_rate": 2.878787878787879e-05, |
|
"loss": 1.3787, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"grad_norm": 13.154531478881836, |
|
"learning_rate": 2.8712121212121214e-05, |
|
"loss": 1.8925, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.1522727272727273, |
|
"grad_norm": 12.020703315734863, |
|
"learning_rate": 2.863636363636364e-05, |
|
"loss": 1.379, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.1545454545454543, |
|
"grad_norm": 10.430608749389648, |
|
"learning_rate": 2.856060606060606e-05, |
|
"loss": 1.4203, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.1568181818181817, |
|
"grad_norm": 8.769074440002441, |
|
"learning_rate": 2.8484848484848486e-05, |
|
"loss": 1.227, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.159090909090909, |
|
"grad_norm": 11.399450302124023, |
|
"learning_rate": 2.8409090909090912e-05, |
|
"loss": 1.3783, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.161363636363636, |
|
"grad_norm": 9.87228012084961, |
|
"learning_rate": 2.8333333333333335e-05, |
|
"loss": 1.6523, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.1636363636363636, |
|
"grad_norm": 15.94421100616455, |
|
"learning_rate": 2.825757575757576e-05, |
|
"loss": 2.4161, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.165909090909091, |
|
"grad_norm": 9.126893043518066, |
|
"learning_rate": 2.818181818181818e-05, |
|
"loss": 1.2675, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.168181818181818, |
|
"grad_norm": 15.760127067565918, |
|
"learning_rate": 2.8106060606060607e-05, |
|
"loss": 2.9231, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.1704545454545454, |
|
"grad_norm": 8.999767303466797, |
|
"learning_rate": 2.803030303030303e-05, |
|
"loss": 1.5147, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.172727272727273, |
|
"grad_norm": 12.179048538208008, |
|
"learning_rate": 2.7954545454545457e-05, |
|
"loss": 1.4017, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.175, |
|
"grad_norm": 11.52514934539795, |
|
"learning_rate": 2.7878787878787883e-05, |
|
"loss": 2.2158, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.1772727272727272, |
|
"grad_norm": 14.60074520111084, |
|
"learning_rate": 2.7803030303030303e-05, |
|
"loss": 1.6378, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.1795454545454547, |
|
"grad_norm": 11.505465507507324, |
|
"learning_rate": 2.772727272727273e-05, |
|
"loss": 1.6039, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.1818181818181817, |
|
"grad_norm": 12.141363143920898, |
|
"learning_rate": 2.7651515151515152e-05, |
|
"loss": 2.6782, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.184090909090909, |
|
"grad_norm": 10.89749813079834, |
|
"learning_rate": 2.7575757575757578e-05, |
|
"loss": 1.4787, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.1863636363636365, |
|
"grad_norm": 11.249963760375977, |
|
"learning_rate": 2.7500000000000004e-05, |
|
"loss": 1.9647, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.1886363636363635, |
|
"grad_norm": 9.608443260192871, |
|
"learning_rate": 2.7424242424242424e-05, |
|
"loss": 0.8747, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.190909090909091, |
|
"grad_norm": 9.517485618591309, |
|
"learning_rate": 2.734848484848485e-05, |
|
"loss": 1.2376, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.1931818181818183, |
|
"grad_norm": 9.044648170471191, |
|
"learning_rate": 2.7272727272727273e-05, |
|
"loss": 0.8014, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.1954545454545453, |
|
"grad_norm": 9.988462448120117, |
|
"learning_rate": 2.71969696969697e-05, |
|
"loss": 1.652, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.1977272727272728, |
|
"grad_norm": 8.96922492980957, |
|
"learning_rate": 2.7121212121212126e-05, |
|
"loss": 0.9484, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 10.36929702758789, |
|
"learning_rate": 2.7045454545454545e-05, |
|
"loss": 1.2604, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.202272727272727, |
|
"grad_norm": 14.008241653442383, |
|
"learning_rate": 2.696969696969697e-05, |
|
"loss": 2.4898, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.2045454545454546, |
|
"grad_norm": 14.017687797546387, |
|
"learning_rate": 2.6893939393939394e-05, |
|
"loss": 1.8664, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.206818181818182, |
|
"grad_norm": 11.672577857971191, |
|
"learning_rate": 2.681818181818182e-05, |
|
"loss": 1.8917, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.209090909090909, |
|
"grad_norm": 11.760181427001953, |
|
"learning_rate": 2.674242424242424e-05, |
|
"loss": 2.0559, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.2113636363636364, |
|
"grad_norm": 13.333674430847168, |
|
"learning_rate": 2.6666666666666667e-05, |
|
"loss": 1.8072, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.213636363636364, |
|
"grad_norm": 9.448116302490234, |
|
"learning_rate": 2.6590909090909093e-05, |
|
"loss": 1.2764, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.215909090909091, |
|
"grad_norm": 11.52153491973877, |
|
"learning_rate": 2.6515151515151516e-05, |
|
"loss": 1.7083, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.2181818181818183, |
|
"grad_norm": 20.444080352783203, |
|
"learning_rate": 2.6439393939393942e-05, |
|
"loss": 2.2781, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.2204545454545457, |
|
"grad_norm": 15.952470779418945, |
|
"learning_rate": 2.636363636363636e-05, |
|
"loss": 2.0901, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.2227272727272727, |
|
"grad_norm": 10.751893997192383, |
|
"learning_rate": 2.6287878787878788e-05, |
|
"loss": 0.9779, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.225, |
|
"grad_norm": 11.89562702178955, |
|
"learning_rate": 2.6212121212121214e-05, |
|
"loss": 1.7043, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.227272727272727, |
|
"grad_norm": 12.013797760009766, |
|
"learning_rate": 2.6136363636363637e-05, |
|
"loss": 1.4427, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.2295454545454545, |
|
"grad_norm": 13.685124397277832, |
|
"learning_rate": 2.6060606060606063e-05, |
|
"loss": 1.9327, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.231818181818182, |
|
"grad_norm": 14.36984920501709, |
|
"learning_rate": 2.5984848484848483e-05, |
|
"loss": 2.4401, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.234090909090909, |
|
"grad_norm": 11.657794952392578, |
|
"learning_rate": 2.590909090909091e-05, |
|
"loss": 1.5776, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.2363636363636363, |
|
"grad_norm": 9.138626098632812, |
|
"learning_rate": 2.5833333333333336e-05, |
|
"loss": 1.5954, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.2386363636363638, |
|
"grad_norm": 11.275242805480957, |
|
"learning_rate": 2.575757575757576e-05, |
|
"loss": 1.5874, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.2409090909090907, |
|
"grad_norm": 11.694557189941406, |
|
"learning_rate": 2.5681818181818185e-05, |
|
"loss": 1.2839, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.243181818181818, |
|
"grad_norm": 14.328207015991211, |
|
"learning_rate": 2.5606060606060604e-05, |
|
"loss": 2.3689, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.2454545454545456, |
|
"grad_norm": 14.487227439880371, |
|
"learning_rate": 2.553030303030303e-05, |
|
"loss": 1.5858, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.2477272727272726, |
|
"grad_norm": 14.691239356994629, |
|
"learning_rate": 2.5454545454545454e-05, |
|
"loss": 1.8329, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 10.622157096862793, |
|
"learning_rate": 2.537878787878788e-05, |
|
"loss": 1.8422, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.2522727272727274, |
|
"grad_norm": 13.788392066955566, |
|
"learning_rate": 2.5303030303030306e-05, |
|
"loss": 2.0421, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.2545454545454544, |
|
"grad_norm": 8.527210235595703, |
|
"learning_rate": 2.5227272727272726e-05, |
|
"loss": 1.4462, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.256818181818182, |
|
"grad_norm": 11.221017837524414, |
|
"learning_rate": 2.5151515151515155e-05, |
|
"loss": 1.7809, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.2590909090909093, |
|
"grad_norm": 15.243719100952148, |
|
"learning_rate": 2.5075757575757575e-05, |
|
"loss": 1.7409, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.2613636363636362, |
|
"grad_norm": 16.965797424316406, |
|
"learning_rate": 2.5e-05, |
|
"loss": 3.2836, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.2636363636363637, |
|
"grad_norm": 10.187609672546387, |
|
"learning_rate": 2.4924242424242424e-05, |
|
"loss": 1.5489, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.265909090909091, |
|
"grad_norm": 9.865535736083984, |
|
"learning_rate": 2.4848484848484847e-05, |
|
"loss": 2.0742, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.268181818181818, |
|
"grad_norm": 11.739052772521973, |
|
"learning_rate": 2.4772727272727277e-05, |
|
"loss": 1.4237, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.2704545454545455, |
|
"grad_norm": 13.875876426696777, |
|
"learning_rate": 2.46969696969697e-05, |
|
"loss": 2.8714, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 11.909977912902832, |
|
"learning_rate": 2.4621212121212123e-05, |
|
"loss": 1.9434, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.275, |
|
"grad_norm": 13.642827033996582, |
|
"learning_rate": 2.4545454545454545e-05, |
|
"loss": 1.4233, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.2772727272727273, |
|
"grad_norm": 10.349024772644043, |
|
"learning_rate": 2.4469696969696972e-05, |
|
"loss": 1.5193, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.2795454545454543, |
|
"grad_norm": 8.302240371704102, |
|
"learning_rate": 2.4393939393939395e-05, |
|
"loss": 1.0769, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.2818181818181817, |
|
"grad_norm": 9.903936386108398, |
|
"learning_rate": 2.431818181818182e-05, |
|
"loss": 1.4596, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.284090909090909, |
|
"grad_norm": 7.976583957672119, |
|
"learning_rate": 2.4242424242424244e-05, |
|
"loss": 1.3187, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.286363636363636, |
|
"grad_norm": 8.382739067077637, |
|
"learning_rate": 2.4166666666666667e-05, |
|
"loss": 1.1004, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.2886363636363636, |
|
"grad_norm": 9.898600578308105, |
|
"learning_rate": 2.4090909090909093e-05, |
|
"loss": 1.3482, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.290909090909091, |
|
"grad_norm": 9.736372947692871, |
|
"learning_rate": 2.4015151515151516e-05, |
|
"loss": 1.0737, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.293181818181818, |
|
"grad_norm": 14.735883712768555, |
|
"learning_rate": 2.393939393939394e-05, |
|
"loss": 1.9045, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.2954545454545454, |
|
"grad_norm": 16.780405044555664, |
|
"learning_rate": 2.3863636363636365e-05, |
|
"loss": 1.9355, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.297727272727273, |
|
"grad_norm": 9.181320190429688, |
|
"learning_rate": 2.3787878787878788e-05, |
|
"loss": 1.4465, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"grad_norm": 11.207884788513184, |
|
"learning_rate": 2.3712121212121214e-05, |
|
"loss": 1.6341, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.3022727272727272, |
|
"grad_norm": 12.287393569946289, |
|
"learning_rate": 2.3636363636363637e-05, |
|
"loss": 1.806, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.3045454545454547, |
|
"grad_norm": 12.173286437988281, |
|
"learning_rate": 2.356060606060606e-05, |
|
"loss": 2.2166, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.3068181818181817, |
|
"grad_norm": 13.528629302978516, |
|
"learning_rate": 2.3484848484848487e-05, |
|
"loss": 1.5679, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.309090909090909, |
|
"grad_norm": 9.217406272888184, |
|
"learning_rate": 2.340909090909091e-05, |
|
"loss": 1.7179, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.3113636363636365, |
|
"grad_norm": 13.768959999084473, |
|
"learning_rate": 2.3333333333333336e-05, |
|
"loss": 2.1235, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.3136363636363635, |
|
"grad_norm": 9.60761833190918, |
|
"learning_rate": 2.325757575757576e-05, |
|
"loss": 1.3526, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.315909090909091, |
|
"grad_norm": 10.336706161499023, |
|
"learning_rate": 2.318181818181818e-05, |
|
"loss": 1.3543, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.3181818181818183, |
|
"grad_norm": 11.636757850646973, |
|
"learning_rate": 2.3106060606060605e-05, |
|
"loss": 1.8026, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.3204545454545453, |
|
"grad_norm": 10.546634674072266, |
|
"learning_rate": 2.3030303030303034e-05, |
|
"loss": 1.9753, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.3227272727272728, |
|
"grad_norm": 13.629782676696777, |
|
"learning_rate": 2.2954545454545457e-05, |
|
"loss": 1.6927, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.325, |
|
"grad_norm": 13.1149263381958, |
|
"learning_rate": 2.287878787878788e-05, |
|
"loss": 1.4331, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.327272727272727, |
|
"grad_norm": 10.624835968017578, |
|
"learning_rate": 2.2803030303030303e-05, |
|
"loss": 1.4769, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.3295454545454546, |
|
"grad_norm": 13.692902565002441, |
|
"learning_rate": 2.272727272727273e-05, |
|
"loss": 2.7543, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.331818181818182, |
|
"grad_norm": 10.054675102233887, |
|
"learning_rate": 2.2651515151515152e-05, |
|
"loss": 1.2323, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.334090909090909, |
|
"grad_norm": 14.394067764282227, |
|
"learning_rate": 2.257575757575758e-05, |
|
"loss": 2.094, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.3363636363636364, |
|
"grad_norm": 10.581347465515137, |
|
"learning_rate": 2.25e-05, |
|
"loss": 2.2432, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.338636363636364, |
|
"grad_norm": 9.492446899414062, |
|
"learning_rate": 2.2424242424242424e-05, |
|
"loss": 1.3964, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.340909090909091, |
|
"grad_norm": 10.887022972106934, |
|
"learning_rate": 2.234848484848485e-05, |
|
"loss": 2.0411, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.3431818181818183, |
|
"grad_norm": 13.539667129516602, |
|
"learning_rate": 2.2272727272727274e-05, |
|
"loss": 1.3067, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.3454545454545457, |
|
"grad_norm": 9.191630363464355, |
|
"learning_rate": 2.21969696969697e-05, |
|
"loss": 1.266, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.3477272727272727, |
|
"grad_norm": 8.683979034423828, |
|
"learning_rate": 2.2121212121212123e-05, |
|
"loss": 0.8044, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"grad_norm": 13.170730590820312, |
|
"learning_rate": 2.2045454545454546e-05, |
|
"loss": 2.2811, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.3522727272727275, |
|
"grad_norm": 11.17111873626709, |
|
"learning_rate": 2.1969696969696972e-05, |
|
"loss": 1.3998, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.3545454545454545, |
|
"grad_norm": 11.230095863342285, |
|
"learning_rate": 2.1893939393939395e-05, |
|
"loss": 2.0224, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.356818181818182, |
|
"grad_norm": 11.912615776062012, |
|
"learning_rate": 2.1818181818181818e-05, |
|
"loss": 1.5619, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.359090909090909, |
|
"grad_norm": 10.748661994934082, |
|
"learning_rate": 2.1742424242424244e-05, |
|
"loss": 1.924, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.3613636363636363, |
|
"grad_norm": 9.370635032653809, |
|
"learning_rate": 2.1666666666666667e-05, |
|
"loss": 1.1797, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.3636363636363638, |
|
"grad_norm": 10.01646900177002, |
|
"learning_rate": 2.1590909090909093e-05, |
|
"loss": 2.1678, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.3659090909090907, |
|
"grad_norm": 9.345016479492188, |
|
"learning_rate": 2.1515151515151516e-05, |
|
"loss": 1.4512, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.368181818181818, |
|
"grad_norm": 11.185441970825195, |
|
"learning_rate": 2.143939393939394e-05, |
|
"loss": 1.5958, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.3704545454545456, |
|
"grad_norm": 10.186037063598633, |
|
"learning_rate": 2.1363636363636362e-05, |
|
"loss": 0.8744, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.3727272727272726, |
|
"grad_norm": 16.676177978515625, |
|
"learning_rate": 2.128787878787879e-05, |
|
"loss": 2.0851, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.375, |
|
"grad_norm": 12.497913360595703, |
|
"learning_rate": 2.1212121212121215e-05, |
|
"loss": 1.4765, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.3772727272727274, |
|
"grad_norm": 7.271422386169434, |
|
"learning_rate": 2.1136363636363638e-05, |
|
"loss": 1.0424, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.3795454545454544, |
|
"grad_norm": 14.968780517578125, |
|
"learning_rate": 2.106060606060606e-05, |
|
"loss": 2.1247, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.381818181818182, |
|
"grad_norm": 11.1759672164917, |
|
"learning_rate": 2.0984848484848483e-05, |
|
"loss": 1.5037, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.3840909090909093, |
|
"grad_norm": 9.880687713623047, |
|
"learning_rate": 2.090909090909091e-05, |
|
"loss": 0.8131, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.3863636363636362, |
|
"grad_norm": 7.559080123901367, |
|
"learning_rate": 2.0833333333333336e-05, |
|
"loss": 0.5826, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.3886363636363637, |
|
"grad_norm": 14.357791900634766, |
|
"learning_rate": 2.075757575757576e-05, |
|
"loss": 2.0945, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.390909090909091, |
|
"grad_norm": 11.396363258361816, |
|
"learning_rate": 2.0681818181818182e-05, |
|
"loss": 1.1564, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.393181818181818, |
|
"grad_norm": 11.255867958068848, |
|
"learning_rate": 2.0606060606060608e-05, |
|
"loss": 2.2688, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.3954545454545455, |
|
"grad_norm": 12.590128898620605, |
|
"learning_rate": 2.053030303030303e-05, |
|
"loss": 2.0123, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.3977272727272725, |
|
"grad_norm": 8.069854736328125, |
|
"learning_rate": 2.0454545454545457e-05, |
|
"loss": 1.3967, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 12.596185684204102, |
|
"learning_rate": 2.037878787878788e-05, |
|
"loss": 1.6038, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.4022727272727273, |
|
"grad_norm": 10.432991981506348, |
|
"learning_rate": 2.0303030303030303e-05, |
|
"loss": 1.645, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.4045454545454543, |
|
"grad_norm": 10.639815330505371, |
|
"learning_rate": 2.022727272727273e-05, |
|
"loss": 1.5334, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.4068181818181817, |
|
"grad_norm": 8.867145538330078, |
|
"learning_rate": 2.0151515151515152e-05, |
|
"loss": 1.2041, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.409090909090909, |
|
"grad_norm": 9.741902351379395, |
|
"learning_rate": 2.0075757575757575e-05, |
|
"loss": 1.4987, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.411363636363636, |
|
"grad_norm": 9.907489776611328, |
|
"learning_rate": 2e-05, |
|
"loss": 1.299, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.4136363636363636, |
|
"grad_norm": 8.68997859954834, |
|
"learning_rate": 1.9924242424242425e-05, |
|
"loss": 1.2559, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.415909090909091, |
|
"grad_norm": 9.990528106689453, |
|
"learning_rate": 1.984848484848485e-05, |
|
"loss": 2.3812, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.418181818181818, |
|
"grad_norm": 6.777112007141113, |
|
"learning_rate": 1.9772727272727274e-05, |
|
"loss": 1.0051, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.4204545454545454, |
|
"grad_norm": 13.396077156066895, |
|
"learning_rate": 1.9696969696969697e-05, |
|
"loss": 2.4201, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.422727272727273, |
|
"grad_norm": 13.596755981445312, |
|
"learning_rate": 1.962121212121212e-05, |
|
"loss": 2.0457, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.425, |
|
"grad_norm": 10.351893424987793, |
|
"learning_rate": 1.9545454545454546e-05, |
|
"loss": 1.9791, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.4272727272727272, |
|
"grad_norm": 7.505919933319092, |
|
"learning_rate": 1.9469696969696972e-05, |
|
"loss": 1.2944, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.4295454545454547, |
|
"grad_norm": 10.136748313903809, |
|
"learning_rate": 1.9393939393939395e-05, |
|
"loss": 1.2477, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.4318181818181817, |
|
"grad_norm": 8.979276657104492, |
|
"learning_rate": 1.9318181818181818e-05, |
|
"loss": 0.9829, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.434090909090909, |
|
"grad_norm": 11.097721099853516, |
|
"learning_rate": 1.924242424242424e-05, |
|
"loss": 1.5509, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.4363636363636365, |
|
"grad_norm": 10.789654731750488, |
|
"learning_rate": 1.9166666666666667e-05, |
|
"loss": 1.7344, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.4386363636363635, |
|
"grad_norm": 12.25899887084961, |
|
"learning_rate": 1.9090909090909094e-05, |
|
"loss": 2.0121, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.440909090909091, |
|
"grad_norm": 11.828030586242676, |
|
"learning_rate": 1.9015151515151516e-05, |
|
"loss": 1.7356, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.4431818181818183, |
|
"grad_norm": 10.524036407470703, |
|
"learning_rate": 1.893939393939394e-05, |
|
"loss": 1.402, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.4454545454545453, |
|
"grad_norm": 10.572868347167969, |
|
"learning_rate": 1.8863636363636362e-05, |
|
"loss": 1.6468, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.4477272727272728, |
|
"grad_norm": 9.194175720214844, |
|
"learning_rate": 1.878787878787879e-05, |
|
"loss": 1.1557, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 11.355244636535645, |
|
"learning_rate": 1.8712121212121215e-05, |
|
"loss": 1.7729, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.452272727272727, |
|
"grad_norm": 10.380278587341309, |
|
"learning_rate": 1.8636363636363638e-05, |
|
"loss": 2.3491, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.4545454545454546, |
|
"grad_norm": 9.57583236694336, |
|
"learning_rate": 1.856060606060606e-05, |
|
"loss": 1.6112, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.456818181818182, |
|
"grad_norm": 12.973028182983398, |
|
"learning_rate": 1.8484848484848487e-05, |
|
"loss": 1.5272, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.459090909090909, |
|
"grad_norm": 9.473404884338379, |
|
"learning_rate": 1.840909090909091e-05, |
|
"loss": 1.2366, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.4613636363636364, |
|
"grad_norm": 9.843785285949707, |
|
"learning_rate": 1.8333333333333333e-05, |
|
"loss": 1.6283, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.463636363636364, |
|
"grad_norm": 13.467684745788574, |
|
"learning_rate": 1.825757575757576e-05, |
|
"loss": 1.5219, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.465909090909091, |
|
"grad_norm": 8.460468292236328, |
|
"learning_rate": 1.8181818181818182e-05, |
|
"loss": 0.8931, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.4681818181818183, |
|
"grad_norm": 8.956411361694336, |
|
"learning_rate": 1.810606060606061e-05, |
|
"loss": 1.1577, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.4704545454545457, |
|
"grad_norm": 10.919206619262695, |
|
"learning_rate": 1.803030303030303e-05, |
|
"loss": 1.719, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.4727272727272727, |
|
"grad_norm": 10.65345287322998, |
|
"learning_rate": 1.7954545454545454e-05, |
|
"loss": 1.5257, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.475, |
|
"grad_norm": 9.616610527038574, |
|
"learning_rate": 1.787878787878788e-05, |
|
"loss": 1.4704, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.4772727272727275, |
|
"grad_norm": 14.458331108093262, |
|
"learning_rate": 1.7803030303030303e-05, |
|
"loss": 1.4181, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.4795454545454545, |
|
"grad_norm": 8.37006664276123, |
|
"learning_rate": 1.772727272727273e-05, |
|
"loss": 1.191, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.481818181818182, |
|
"grad_norm": 13.129170417785645, |
|
"learning_rate": 1.7651515151515153e-05, |
|
"loss": 1.9966, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.484090909090909, |
|
"grad_norm": 12.65162181854248, |
|
"learning_rate": 1.7575757575757576e-05, |
|
"loss": 1.7372, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.4863636363636363, |
|
"grad_norm": 12.132272720336914, |
|
"learning_rate": 1.75e-05, |
|
"loss": 1.9386, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.4886363636363638, |
|
"grad_norm": 11.549707412719727, |
|
"learning_rate": 1.7424242424242425e-05, |
|
"loss": 1.2838, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.4909090909090907, |
|
"grad_norm": 10.115202903747559, |
|
"learning_rate": 1.734848484848485e-05, |
|
"loss": 1.7778, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.493181818181818, |
|
"grad_norm": 14.97376823425293, |
|
"learning_rate": 1.7272727272727274e-05, |
|
"loss": 2.5436, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.4954545454545456, |
|
"grad_norm": 10.270051956176758, |
|
"learning_rate": 1.7196969696969697e-05, |
|
"loss": 1.3943, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.4977272727272726, |
|
"grad_norm": 11.584896087646484, |
|
"learning_rate": 1.712121212121212e-05, |
|
"loss": 1.8023, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 11.003795623779297, |
|
"learning_rate": 1.7045454545454546e-05, |
|
"loss": 1.2057, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.5022727272727274, |
|
"grad_norm": 10.495930671691895, |
|
"learning_rate": 1.6969696969696972e-05, |
|
"loss": 1.7265, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.5045454545454544, |
|
"grad_norm": 10.6824951171875, |
|
"learning_rate": 1.6893939393939395e-05, |
|
"loss": 1.4241, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.506818181818182, |
|
"grad_norm": 10.532041549682617, |
|
"learning_rate": 1.6818181818181818e-05, |
|
"loss": 1.4532, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.509090909090909, |
|
"grad_norm": 8.671700477600098, |
|
"learning_rate": 1.674242424242424e-05, |
|
"loss": 1.2539, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.5113636363636362, |
|
"grad_norm": 14.828866004943848, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 1.4732, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.5136363636363637, |
|
"grad_norm": 11.871790885925293, |
|
"learning_rate": 1.6590909090909094e-05, |
|
"loss": 1.7559, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.5159090909090907, |
|
"grad_norm": 9.144551277160645, |
|
"learning_rate": 1.6515151515151517e-05, |
|
"loss": 1.3562, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.518181818181818, |
|
"grad_norm": 9.856282234191895, |
|
"learning_rate": 1.643939393939394e-05, |
|
"loss": 1.4721, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.5204545454545455, |
|
"grad_norm": 8.48530101776123, |
|
"learning_rate": 1.6363636363636366e-05, |
|
"loss": 1.0045, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.5227272727272725, |
|
"grad_norm": 16.73642349243164, |
|
"learning_rate": 1.628787878787879e-05, |
|
"loss": 2.4458, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.525, |
|
"grad_norm": 10.180378913879395, |
|
"learning_rate": 1.6212121212121212e-05, |
|
"loss": 1.3323, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.5272727272727273, |
|
"grad_norm": 11.56425666809082, |
|
"learning_rate": 1.6136363636363638e-05, |
|
"loss": 2.0303, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.5295454545454543, |
|
"grad_norm": 14.644630432128906, |
|
"learning_rate": 1.606060606060606e-05, |
|
"loss": 1.9247, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.5318181818181817, |
|
"grad_norm": 11.767682075500488, |
|
"learning_rate": 1.5984848484848487e-05, |
|
"loss": 1.7903, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.534090909090909, |
|
"grad_norm": 11.074971199035645, |
|
"learning_rate": 1.590909090909091e-05, |
|
"loss": 2.0781, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.536363636363636, |
|
"grad_norm": 13.846643447875977, |
|
"learning_rate": 1.5833333333333333e-05, |
|
"loss": 1.2449, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.5386363636363636, |
|
"grad_norm": 12.496777534484863, |
|
"learning_rate": 1.5757575757575756e-05, |
|
"loss": 1.287, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.540909090909091, |
|
"grad_norm": 8.406025886535645, |
|
"learning_rate": 1.5681818181818182e-05, |
|
"loss": 1.4133, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.543181818181818, |
|
"grad_norm": 9.715517044067383, |
|
"learning_rate": 1.560606060606061e-05, |
|
"loss": 1.6738, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.5454545454545454, |
|
"grad_norm": 14.14928913116455, |
|
"learning_rate": 1.553030303030303e-05, |
|
"loss": 1.9505, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.547727272727273, |
|
"grad_norm": 10.110836029052734, |
|
"learning_rate": 1.5454545454545454e-05, |
|
"loss": 1.4759, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 15.94524097442627, |
|
"learning_rate": 1.5378787878787877e-05, |
|
"loss": 1.7516, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.5522727272727272, |
|
"grad_norm": 16.20330047607422, |
|
"learning_rate": 1.5303030303030304e-05, |
|
"loss": 2.1093, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.5545454545454547, |
|
"grad_norm": 8.647255897521973, |
|
"learning_rate": 1.5227272727272728e-05, |
|
"loss": 1.0308, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.5568181818181817, |
|
"grad_norm": 8.955947875976562, |
|
"learning_rate": 1.5151515151515153e-05, |
|
"loss": 1.0129, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.559090909090909, |
|
"grad_norm": 12.877582550048828, |
|
"learning_rate": 1.5075757575757576e-05, |
|
"loss": 1.4853, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.5613636363636365, |
|
"grad_norm": 14.299208641052246, |
|
"learning_rate": 1.5e-05, |
|
"loss": 2.0464, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.5636363636363635, |
|
"grad_norm": 14.365765571594238, |
|
"learning_rate": 1.4924242424242423e-05, |
|
"loss": 1.9381, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.565909090909091, |
|
"grad_norm": 10.231593132019043, |
|
"learning_rate": 1.484848484848485e-05, |
|
"loss": 1.6777, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.5681818181818183, |
|
"grad_norm": 14.259530067443848, |
|
"learning_rate": 1.4772727272727274e-05, |
|
"loss": 1.6438, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.5704545454545453, |
|
"grad_norm": 13.114981651306152, |
|
"learning_rate": 1.4696969696969697e-05, |
|
"loss": 1.3336, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.5727272727272728, |
|
"grad_norm": 9.463297843933105, |
|
"learning_rate": 1.4621212121212122e-05, |
|
"loss": 1.203, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.575, |
|
"grad_norm": 9.805520057678223, |
|
"learning_rate": 1.4545454545454545e-05, |
|
"loss": 1.2487, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.577272727272727, |
|
"grad_norm": 14.853455543518066, |
|
"learning_rate": 1.446969696969697e-05, |
|
"loss": 1.5734, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.5795454545454546, |
|
"grad_norm": 11.86341381072998, |
|
"learning_rate": 1.4393939393939396e-05, |
|
"loss": 1.4835, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.581818181818182, |
|
"grad_norm": 11.581096649169922, |
|
"learning_rate": 1.431818181818182e-05, |
|
"loss": 2.0558, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.584090909090909, |
|
"grad_norm": 12.040521621704102, |
|
"learning_rate": 1.4242424242424243e-05, |
|
"loss": 1.4117, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.5863636363636364, |
|
"grad_norm": 13.00901985168457, |
|
"learning_rate": 1.4166666666666668e-05, |
|
"loss": 2.9511, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.588636363636364, |
|
"grad_norm": 9.332910537719727, |
|
"learning_rate": 1.409090909090909e-05, |
|
"loss": 1.1121, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.590909090909091, |
|
"grad_norm": 10.607443809509277, |
|
"learning_rate": 1.4015151515151515e-05, |
|
"loss": 1.4706, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.5931818181818183, |
|
"grad_norm": 9.47099494934082, |
|
"learning_rate": 1.3939393939393942e-05, |
|
"loss": 1.6907, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.5954545454545457, |
|
"grad_norm": 12.868734359741211, |
|
"learning_rate": 1.3863636363636364e-05, |
|
"loss": 1.334, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.5977272727272727, |
|
"grad_norm": 7.338480472564697, |
|
"learning_rate": 1.3787878787878789e-05, |
|
"loss": 0.6364, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 10.434823989868164, |
|
"learning_rate": 1.3712121212121212e-05, |
|
"loss": 1.7292, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.6022727272727275, |
|
"grad_norm": 10.510713577270508, |
|
"learning_rate": 1.3636363636363637e-05, |
|
"loss": 1.555, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.6045454545454545, |
|
"grad_norm": 11.927501678466797, |
|
"learning_rate": 1.3560606060606063e-05, |
|
"loss": 1.7373, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.606818181818182, |
|
"grad_norm": 8.673569679260254, |
|
"learning_rate": 1.3484848484848486e-05, |
|
"loss": 1.3046, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.6090909090909093, |
|
"grad_norm": 9.680171012878418, |
|
"learning_rate": 1.340909090909091e-05, |
|
"loss": 1.2691, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.6113636363636363, |
|
"grad_norm": 20.66661834716797, |
|
"learning_rate": 1.3333333333333333e-05, |
|
"loss": 3.1138, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.6136363636363638, |
|
"grad_norm": 59.59333801269531, |
|
"learning_rate": 1.3257575757575758e-05, |
|
"loss": 1.8486, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.615909090909091, |
|
"grad_norm": 9.416550636291504, |
|
"learning_rate": 1.318181818181818e-05, |
|
"loss": 1.198, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.618181818181818, |
|
"grad_norm": 11.847350120544434, |
|
"learning_rate": 1.3106060606060607e-05, |
|
"loss": 1.494, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.6204545454545456, |
|
"grad_norm": 8.2369966506958, |
|
"learning_rate": 1.3030303030303032e-05, |
|
"loss": 0.8885, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.6227272727272726, |
|
"grad_norm": 13.204099655151367, |
|
"learning_rate": 1.2954545454545455e-05, |
|
"loss": 1.9838, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.625, |
|
"grad_norm": 11.384471893310547, |
|
"learning_rate": 1.287878787878788e-05, |
|
"loss": 1.5648, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.6272727272727274, |
|
"grad_norm": 43.95447540283203, |
|
"learning_rate": 1.2803030303030302e-05, |
|
"loss": 1.6246, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.6295454545454544, |
|
"grad_norm": 12.041752815246582, |
|
"learning_rate": 1.2727272727272727e-05, |
|
"loss": 1.6404, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.631818181818182, |
|
"grad_norm": 13.470951080322266, |
|
"learning_rate": 1.2651515151515153e-05, |
|
"loss": 2.1278, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.634090909090909, |
|
"grad_norm": 12.769510269165039, |
|
"learning_rate": 1.2575757575757578e-05, |
|
"loss": 1.6486, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.6363636363636362, |
|
"grad_norm": 9.455702781677246, |
|
"learning_rate": 1.25e-05, |
|
"loss": 1.5211, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.6386363636363637, |
|
"grad_norm": 13.590509414672852, |
|
"learning_rate": 1.2424242424242424e-05, |
|
"loss": 2.081, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.6409090909090907, |
|
"grad_norm": 12.029936790466309, |
|
"learning_rate": 1.234848484848485e-05, |
|
"loss": 1.6036, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.643181818181818, |
|
"grad_norm": 65.75121307373047, |
|
"learning_rate": 1.2272727272727273e-05, |
|
"loss": 1.5853, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.6454545454545455, |
|
"grad_norm": 13.093693733215332, |
|
"learning_rate": 1.2196969696969697e-05, |
|
"loss": 1.4623, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.6477272727272725, |
|
"grad_norm": 14.704643249511719, |
|
"learning_rate": 1.2121212121212122e-05, |
|
"loss": 1.7431, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 10.710149765014648, |
|
"learning_rate": 1.2045454545454547e-05, |
|
"loss": 1.6442, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.6522727272727273, |
|
"grad_norm": 12.05364990234375, |
|
"learning_rate": 1.196969696969697e-05, |
|
"loss": 2.0733, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.6545454545454543, |
|
"grad_norm": 12.834985733032227, |
|
"learning_rate": 1.1893939393939394e-05, |
|
"loss": 2.8648, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.6568181818181817, |
|
"grad_norm": 9.302035331726074, |
|
"learning_rate": 1.1818181818181819e-05, |
|
"loss": 1.1539, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.659090909090909, |
|
"grad_norm": 9.240340232849121, |
|
"learning_rate": 1.1742424242424243e-05, |
|
"loss": 1.5434, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.661363636363636, |
|
"grad_norm": 14.066667556762695, |
|
"learning_rate": 1.1666666666666668e-05, |
|
"loss": 1.7866, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.6636363636363636, |
|
"grad_norm": 10.935914039611816, |
|
"learning_rate": 1.159090909090909e-05, |
|
"loss": 1.4766, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.665909090909091, |
|
"grad_norm": 8.409308433532715, |
|
"learning_rate": 1.1515151515151517e-05, |
|
"loss": 1.3846, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.668181818181818, |
|
"grad_norm": 10.203055381774902, |
|
"learning_rate": 1.143939393939394e-05, |
|
"loss": 1.1693, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.6704545454545454, |
|
"grad_norm": 11.417679786682129, |
|
"learning_rate": 1.1363636363636365e-05, |
|
"loss": 1.9941, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.672727272727273, |
|
"grad_norm": 13.196696281433105, |
|
"learning_rate": 1.128787878787879e-05, |
|
"loss": 1.8474, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.675, |
|
"grad_norm": 11.088204383850098, |
|
"learning_rate": 1.1212121212121212e-05, |
|
"loss": 1.7153, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.6772727272727272, |
|
"grad_norm": 12.048771858215332, |
|
"learning_rate": 1.1136363636363637e-05, |
|
"loss": 2.5212, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.6795454545454547, |
|
"grad_norm": 13.929719924926758, |
|
"learning_rate": 1.1060606060606061e-05, |
|
"loss": 2.3728, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.6818181818181817, |
|
"grad_norm": 10.445011138916016, |
|
"learning_rate": 1.0984848484848486e-05, |
|
"loss": 0.9737, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.684090909090909, |
|
"grad_norm": 14.0521821975708, |
|
"learning_rate": 1.0909090909090909e-05, |
|
"loss": 1.6476, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.6863636363636365, |
|
"grad_norm": 10.526323318481445, |
|
"learning_rate": 1.0833333333333334e-05, |
|
"loss": 1.4206, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.6886363636363635, |
|
"grad_norm": 11.84065055847168, |
|
"learning_rate": 1.0757575757575758e-05, |
|
"loss": 2.5504, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.690909090909091, |
|
"grad_norm": 13.432804107666016, |
|
"learning_rate": 1.0681818181818181e-05, |
|
"loss": 1.1723, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.6931818181818183, |
|
"grad_norm": 10.570472717285156, |
|
"learning_rate": 1.0606060606060607e-05, |
|
"loss": 1.3094, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.6954545454545453, |
|
"grad_norm": 9.313067436218262, |
|
"learning_rate": 1.053030303030303e-05, |
|
"loss": 1.3848, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.6977272727272728, |
|
"grad_norm": 12.77459716796875, |
|
"learning_rate": 1.0454545454545455e-05, |
|
"loss": 1.9546, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 12.23890495300293, |
|
"learning_rate": 1.037878787878788e-05, |
|
"loss": 1.858, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.702272727272727, |
|
"grad_norm": 10.90783977508545, |
|
"learning_rate": 1.0303030303030304e-05, |
|
"loss": 1.7215, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.7045454545454546, |
|
"grad_norm": 11.610969543457031, |
|
"learning_rate": 1.0227272727272729e-05, |
|
"loss": 1.3744, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.706818181818182, |
|
"grad_norm": 13.296714782714844, |
|
"learning_rate": 1.0151515151515152e-05, |
|
"loss": 1.3959, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.709090909090909, |
|
"grad_norm": 11.602737426757812, |
|
"learning_rate": 1.0075757575757576e-05, |
|
"loss": 0.9706, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.7113636363636364, |
|
"grad_norm": 8.904767036437988, |
|
"learning_rate": 1e-05, |
|
"loss": 1.1206, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.713636363636364, |
|
"grad_norm": 9.719966888427734, |
|
"learning_rate": 9.924242424242425e-06, |
|
"loss": 1.326, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.715909090909091, |
|
"grad_norm": 11.37736701965332, |
|
"learning_rate": 9.848484848484848e-06, |
|
"loss": 1.2423, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.7181818181818183, |
|
"grad_norm": 8.89704418182373, |
|
"learning_rate": 9.772727272727273e-06, |
|
"loss": 1.5434, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.7204545454545457, |
|
"grad_norm": 11.980868339538574, |
|
"learning_rate": 9.696969696969698e-06, |
|
"loss": 1.9285, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.7227272727272727, |
|
"grad_norm": 20.147335052490234, |
|
"learning_rate": 9.62121212121212e-06, |
|
"loss": 1.9032, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.725, |
|
"grad_norm": 12.508543014526367, |
|
"learning_rate": 9.545454545454547e-06, |
|
"loss": 2.549, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 15.286222457885742, |
|
"learning_rate": 9.46969696969697e-06, |
|
"loss": 1.7541, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.7295454545454545, |
|
"grad_norm": 9.950079917907715, |
|
"learning_rate": 9.393939393939394e-06, |
|
"loss": 1.0859, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.731818181818182, |
|
"grad_norm": 9.034377098083496, |
|
"learning_rate": 9.318181818181819e-06, |
|
"loss": 1.6942, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.7340909090909093, |
|
"grad_norm": 10.347823143005371, |
|
"learning_rate": 9.242424242424244e-06, |
|
"loss": 0.7853, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.7363636363636363, |
|
"grad_norm": 13.554040908813477, |
|
"learning_rate": 9.166666666666666e-06, |
|
"loss": 1.6867, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.7386363636363638, |
|
"grad_norm": 12.764242172241211, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 1.7983, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.740909090909091, |
|
"grad_norm": 13.305977821350098, |
|
"learning_rate": 9.015151515151516e-06, |
|
"loss": 1.5904, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.743181818181818, |
|
"grad_norm": 16.118629455566406, |
|
"learning_rate": 8.93939393939394e-06, |
|
"loss": 1.593, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.7454545454545456, |
|
"grad_norm": 9.158020973205566, |
|
"learning_rate": 8.863636363636365e-06, |
|
"loss": 1.2809, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.7477272727272726, |
|
"grad_norm": 12.490316390991211, |
|
"learning_rate": 8.787878787878788e-06, |
|
"loss": 1.5405, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 12.778218269348145, |
|
"learning_rate": 8.712121212121212e-06, |
|
"loss": 1.4892, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.7522727272727274, |
|
"grad_norm": 11.4492826461792, |
|
"learning_rate": 8.636363636363637e-06, |
|
"loss": 1.2019, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.7545454545454544, |
|
"grad_norm": 13.168742179870605, |
|
"learning_rate": 8.56060606060606e-06, |
|
"loss": 1.6647, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 2.756818181818182, |
|
"grad_norm": 10.593256950378418, |
|
"learning_rate": 8.484848484848486e-06, |
|
"loss": 1.3455, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 2.759090909090909, |
|
"grad_norm": 12.997807502746582, |
|
"learning_rate": 8.409090909090909e-06, |
|
"loss": 1.6967, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 2.7613636363636362, |
|
"grad_norm": 16.37111473083496, |
|
"learning_rate": 8.333333333333334e-06, |
|
"loss": 1.7001, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 2.7636363636363637, |
|
"grad_norm": 11.749297142028809, |
|
"learning_rate": 8.257575757575758e-06, |
|
"loss": 0.9918, |
|
"step": 1216 |
|
}, |
|
{ |
|
"epoch": 2.7659090909090907, |
|
"grad_norm": 9.196391105651855, |
|
"learning_rate": 8.181818181818183e-06, |
|
"loss": 1.3952, |
|
"step": 1217 |
|
}, |
|
{ |
|
"epoch": 2.768181818181818, |
|
"grad_norm": 7.304767608642578, |
|
"learning_rate": 8.106060606060606e-06, |
|
"loss": 0.9309, |
|
"step": 1218 |
|
}, |
|
{ |
|
"epoch": 2.7704545454545455, |
|
"grad_norm": 11.371389389038086, |
|
"learning_rate": 8.03030303030303e-06, |
|
"loss": 2.2034, |
|
"step": 1219 |
|
}, |
|
{ |
|
"epoch": 2.7727272727272725, |
|
"grad_norm": 10.503549575805664, |
|
"learning_rate": 7.954545454545455e-06, |
|
"loss": 1.0822, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 2.775, |
|
"grad_norm": 11.071968078613281, |
|
"learning_rate": 7.878787878787878e-06, |
|
"loss": 1.7071, |
|
"step": 1221 |
|
}, |
|
{ |
|
"epoch": 2.7772727272727273, |
|
"grad_norm": 11.416297912597656, |
|
"learning_rate": 7.803030303030304e-06, |
|
"loss": 2.0261, |
|
"step": 1222 |
|
}, |
|
{ |
|
"epoch": 2.7795454545454543, |
|
"grad_norm": 15.829241752624512, |
|
"learning_rate": 7.727272727272727e-06, |
|
"loss": 2.0085, |
|
"step": 1223 |
|
}, |
|
{ |
|
"epoch": 2.7818181818181817, |
|
"grad_norm": 8.403531074523926, |
|
"learning_rate": 7.651515151515152e-06, |
|
"loss": 1.2764, |
|
"step": 1224 |
|
}, |
|
{ |
|
"epoch": 2.784090909090909, |
|
"grad_norm": 11.730886459350586, |
|
"learning_rate": 7.5757575757575764e-06, |
|
"loss": 1.6733, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 2.786363636363636, |
|
"grad_norm": 13.102418899536133, |
|
"learning_rate": 7.5e-06, |
|
"loss": 2.139, |
|
"step": 1226 |
|
}, |
|
{ |
|
"epoch": 2.7886363636363636, |
|
"grad_norm": 14.804220199584961, |
|
"learning_rate": 7.424242424242425e-06, |
|
"loss": 2.1015, |
|
"step": 1227 |
|
}, |
|
{ |
|
"epoch": 2.790909090909091, |
|
"grad_norm": 11.839103698730469, |
|
"learning_rate": 7.3484848484848486e-06, |
|
"loss": 1.6026, |
|
"step": 1228 |
|
}, |
|
{ |
|
"epoch": 2.793181818181818, |
|
"grad_norm": 17.421327590942383, |
|
"learning_rate": 7.272727272727272e-06, |
|
"loss": 2.7038, |
|
"step": 1229 |
|
}, |
|
{ |
|
"epoch": 2.7954545454545454, |
|
"grad_norm": 14.81433391571045, |
|
"learning_rate": 7.196969696969698e-06, |
|
"loss": 1.702, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 2.797727272727273, |
|
"grad_norm": 7.195108413696289, |
|
"learning_rate": 7.1212121212121215e-06, |
|
"loss": 0.9022, |
|
"step": 1231 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 9.045830726623535, |
|
"learning_rate": 7.045454545454545e-06, |
|
"loss": 1.0748, |
|
"step": 1232 |
|
}, |
|
{ |
|
"epoch": 2.8022727272727272, |
|
"grad_norm": 11.995684623718262, |
|
"learning_rate": 6.969696969696971e-06, |
|
"loss": 2.5776, |
|
"step": 1233 |
|
}, |
|
{ |
|
"epoch": 2.8045454545454547, |
|
"grad_norm": 10.528661727905273, |
|
"learning_rate": 6.8939393939393945e-06, |
|
"loss": 1.8155, |
|
"step": 1234 |
|
}, |
|
{ |
|
"epoch": 2.8068181818181817, |
|
"grad_norm": 34.72589111328125, |
|
"learning_rate": 6.818181818181818e-06, |
|
"loss": 2.5481, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 2.809090909090909, |
|
"grad_norm": 8.032730102539062, |
|
"learning_rate": 6.742424242424243e-06, |
|
"loss": 0.736, |
|
"step": 1236 |
|
}, |
|
{ |
|
"epoch": 2.8113636363636365, |
|
"grad_norm": 9.088884353637695, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 1.6364, |
|
"step": 1237 |
|
}, |
|
{ |
|
"epoch": 2.8136363636363635, |
|
"grad_norm": 9.277338027954102, |
|
"learning_rate": 6.59090909090909e-06, |
|
"loss": 1.4521, |
|
"step": 1238 |
|
}, |
|
{ |
|
"epoch": 2.815909090909091, |
|
"grad_norm": 12.458305358886719, |
|
"learning_rate": 6.515151515151516e-06, |
|
"loss": 1.2296, |
|
"step": 1239 |
|
}, |
|
{ |
|
"epoch": 2.8181818181818183, |
|
"grad_norm": 10.594490051269531, |
|
"learning_rate": 6.43939393939394e-06, |
|
"loss": 1.414, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 2.8204545454545453, |
|
"grad_norm": 10.604024887084961, |
|
"learning_rate": 6.363636363636363e-06, |
|
"loss": 1.7017, |
|
"step": 1241 |
|
}, |
|
{ |
|
"epoch": 2.8227272727272728, |
|
"grad_norm": 10.347737312316895, |
|
"learning_rate": 6.287878787878789e-06, |
|
"loss": 1.2462, |
|
"step": 1242 |
|
}, |
|
{ |
|
"epoch": 2.825, |
|
"grad_norm": 11.151006698608398, |
|
"learning_rate": 6.212121212121212e-06, |
|
"loss": 1.7713, |
|
"step": 1243 |
|
}, |
|
{ |
|
"epoch": 2.827272727272727, |
|
"grad_norm": 12.432381629943848, |
|
"learning_rate": 6.136363636363636e-06, |
|
"loss": 2.7927, |
|
"step": 1244 |
|
}, |
|
{ |
|
"epoch": 2.8295454545454546, |
|
"grad_norm": 12.030777931213379, |
|
"learning_rate": 6.060606060606061e-06, |
|
"loss": 2.1842, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 2.831818181818182, |
|
"grad_norm": 14.940272331237793, |
|
"learning_rate": 5.984848484848485e-06, |
|
"loss": 1.6475, |
|
"step": 1246 |
|
}, |
|
{ |
|
"epoch": 2.834090909090909, |
|
"grad_norm": 8.027610778808594, |
|
"learning_rate": 5.909090909090909e-06, |
|
"loss": 0.948, |
|
"step": 1247 |
|
}, |
|
{ |
|
"epoch": 2.8363636363636364, |
|
"grad_norm": 12.356363296508789, |
|
"learning_rate": 5.833333333333334e-06, |
|
"loss": 1.6191, |
|
"step": 1248 |
|
}, |
|
{ |
|
"epoch": 2.838636363636364, |
|
"grad_norm": 12.225868225097656, |
|
"learning_rate": 5.7575757575757586e-06, |
|
"loss": 1.2056, |
|
"step": 1249 |
|
}, |
|
{ |
|
"epoch": 2.840909090909091, |
|
"grad_norm": 11.615985870361328, |
|
"learning_rate": 5.681818181818182e-06, |
|
"loss": 1.5477, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 2.8431818181818183, |
|
"grad_norm": 13.92235279083252, |
|
"learning_rate": 5.606060606060606e-06, |
|
"loss": 2.401, |
|
"step": 1251 |
|
}, |
|
{ |
|
"epoch": 2.8454545454545457, |
|
"grad_norm": 19.311002731323242, |
|
"learning_rate": 5.530303030303031e-06, |
|
"loss": 2.2211, |
|
"step": 1252 |
|
}, |
|
{ |
|
"epoch": 2.8477272727272727, |
|
"grad_norm": 9.447689056396484, |
|
"learning_rate": 5.4545454545454545e-06, |
|
"loss": 1.2734, |
|
"step": 1253 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 10.197713851928711, |
|
"learning_rate": 5.378787878787879e-06, |
|
"loss": 0.878, |
|
"step": 1254 |
|
}, |
|
{ |
|
"epoch": 2.8522727272727275, |
|
"grad_norm": 14.826508522033691, |
|
"learning_rate": 5.303030303030304e-06, |
|
"loss": 1.6759, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 2.8545454545454545, |
|
"grad_norm": 10.666242599487305, |
|
"learning_rate": 5.2272727272727274e-06, |
|
"loss": 2.1974, |
|
"step": 1256 |
|
}, |
|
{ |
|
"epoch": 2.856818181818182, |
|
"grad_norm": 13.020369529724121, |
|
"learning_rate": 5.151515151515152e-06, |
|
"loss": 1.4073, |
|
"step": 1257 |
|
}, |
|
{ |
|
"epoch": 2.8590909090909093, |
|
"grad_norm": 14.27531623840332, |
|
"learning_rate": 5.075757575757576e-06, |
|
"loss": 2.1165, |
|
"step": 1258 |
|
}, |
|
{ |
|
"epoch": 2.8613636363636363, |
|
"grad_norm": 11.82662296295166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7765, |
|
"step": 1259 |
|
}, |
|
{ |
|
"epoch": 2.8636363636363638, |
|
"grad_norm": 12.107914924621582, |
|
"learning_rate": 4.924242424242424e-06, |
|
"loss": 1.2762, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 2.865909090909091, |
|
"grad_norm": 10.041885375976562, |
|
"learning_rate": 4.848484848484849e-06, |
|
"loss": 2.1775, |
|
"step": 1261 |
|
}, |
|
{ |
|
"epoch": 2.868181818181818, |
|
"grad_norm": 11.078441619873047, |
|
"learning_rate": 4.772727272727273e-06, |
|
"loss": 1.6073, |
|
"step": 1262 |
|
}, |
|
{ |
|
"epoch": 2.8704545454545456, |
|
"grad_norm": 9.000492095947266, |
|
"learning_rate": 4.696969696969697e-06, |
|
"loss": 1.4636, |
|
"step": 1263 |
|
}, |
|
{ |
|
"epoch": 2.8727272727272726, |
|
"grad_norm": 11.069653511047363, |
|
"learning_rate": 4.621212121212122e-06, |
|
"loss": 1.4654, |
|
"step": 1264 |
|
}, |
|
{ |
|
"epoch": 2.875, |
|
"grad_norm": 9.110404968261719, |
|
"learning_rate": 4.5454545454545455e-06, |
|
"loss": 1.8338, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 2.8772727272727274, |
|
"grad_norm": 16.761194229125977, |
|
"learning_rate": 4.46969696969697e-06, |
|
"loss": 1.0709, |
|
"step": 1266 |
|
}, |
|
{ |
|
"epoch": 2.8795454545454544, |
|
"grad_norm": 13.67717170715332, |
|
"learning_rate": 4.393939393939394e-06, |
|
"loss": 2.0994, |
|
"step": 1267 |
|
}, |
|
{ |
|
"epoch": 2.881818181818182, |
|
"grad_norm": 8.258940696716309, |
|
"learning_rate": 4.3181818181818185e-06, |
|
"loss": 1.2818, |
|
"step": 1268 |
|
}, |
|
{ |
|
"epoch": 2.884090909090909, |
|
"grad_norm": 12.960264205932617, |
|
"learning_rate": 4.242424242424243e-06, |
|
"loss": 1.9218, |
|
"step": 1269 |
|
}, |
|
{ |
|
"epoch": 2.8863636363636362, |
|
"grad_norm": 10.886972427368164, |
|
"learning_rate": 4.166666666666667e-06, |
|
"loss": 1.4611, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 2.8886363636363637, |
|
"grad_norm": 10.516489028930664, |
|
"learning_rate": 4.0909090909090915e-06, |
|
"loss": 2.3418, |
|
"step": 1271 |
|
}, |
|
{ |
|
"epoch": 2.8909090909090907, |
|
"grad_norm": 12.977254867553711, |
|
"learning_rate": 4.015151515151515e-06, |
|
"loss": 1.5361, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 2.893181818181818, |
|
"grad_norm": 14.605803489685059, |
|
"learning_rate": 3.939393939393939e-06, |
|
"loss": 1.6679, |
|
"step": 1273 |
|
}, |
|
{ |
|
"epoch": 2.8954545454545455, |
|
"grad_norm": 17.729450225830078, |
|
"learning_rate": 3.863636363636364e-06, |
|
"loss": 1.468, |
|
"step": 1274 |
|
}, |
|
{ |
|
"epoch": 2.8977272727272725, |
|
"grad_norm": 10.65392780303955, |
|
"learning_rate": 3.7878787878787882e-06, |
|
"loss": 1.8606, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 18.738691329956055, |
|
"learning_rate": 3.7121212121212124e-06, |
|
"loss": 2.7391, |
|
"step": 1276 |
|
}, |
|
{ |
|
"epoch": 2.9022727272727273, |
|
"grad_norm": 11.129204750061035, |
|
"learning_rate": 3.636363636363636e-06, |
|
"loss": 1.4911, |
|
"step": 1277 |
|
}, |
|
{ |
|
"epoch": 2.9045454545454543, |
|
"grad_norm": 10.117977142333984, |
|
"learning_rate": 3.5606060606060608e-06, |
|
"loss": 1.0915, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 2.9068181818181817, |
|
"grad_norm": 9.391002655029297, |
|
"learning_rate": 3.4848484848484854e-06, |
|
"loss": 1.1659, |
|
"step": 1279 |
|
}, |
|
{ |
|
"epoch": 2.909090909090909, |
|
"grad_norm": 10.86440372467041, |
|
"learning_rate": 3.409090909090909e-06, |
|
"loss": 1.4967, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 2.911363636363636, |
|
"grad_norm": 11.438384056091309, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 1.6597, |
|
"step": 1281 |
|
}, |
|
{ |
|
"epoch": 2.9136363636363636, |
|
"grad_norm": 13.486211776733398, |
|
"learning_rate": 3.257575757575758e-06, |
|
"loss": 1.947, |
|
"step": 1282 |
|
}, |
|
{ |
|
"epoch": 2.915909090909091, |
|
"grad_norm": 13.491000175476074, |
|
"learning_rate": 3.1818181818181817e-06, |
|
"loss": 2.4163, |
|
"step": 1283 |
|
}, |
|
{ |
|
"epoch": 2.918181818181818, |
|
"grad_norm": 10.710677146911621, |
|
"learning_rate": 3.106060606060606e-06, |
|
"loss": 1.8073, |
|
"step": 1284 |
|
}, |
|
{ |
|
"epoch": 2.9204545454545454, |
|
"grad_norm": 12.062322616577148, |
|
"learning_rate": 3.0303030303030305e-06, |
|
"loss": 1.9969, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 2.922727272727273, |
|
"grad_norm": 70.31402587890625, |
|
"learning_rate": 2.9545454545454547e-06, |
|
"loss": 1.3767, |
|
"step": 1286 |
|
}, |
|
{ |
|
"epoch": 2.925, |
|
"grad_norm": 9.519462585449219, |
|
"learning_rate": 2.8787878787878793e-06, |
|
"loss": 1.4795, |
|
"step": 1287 |
|
}, |
|
{ |
|
"epoch": 2.9272727272727272, |
|
"grad_norm": 13.316557884216309, |
|
"learning_rate": 2.803030303030303e-06, |
|
"loss": 0.858, |
|
"step": 1288 |
|
}, |
|
{ |
|
"epoch": 2.9295454545454547, |
|
"grad_norm": 11.898123741149902, |
|
"learning_rate": 2.7272727272727272e-06, |
|
"loss": 1.7807, |
|
"step": 1289 |
|
}, |
|
{ |
|
"epoch": 2.9318181818181817, |
|
"grad_norm": 13.429510116577148, |
|
"learning_rate": 2.651515151515152e-06, |
|
"loss": 1.7467, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 2.934090909090909, |
|
"grad_norm": 34.4333381652832, |
|
"learning_rate": 2.575757575757576e-06, |
|
"loss": 1.6774, |
|
"step": 1291 |
|
}, |
|
{ |
|
"epoch": 2.9363636363636365, |
|
"grad_norm": 8.44999885559082, |
|
"learning_rate": 2.5e-06, |
|
"loss": 0.8595, |
|
"step": 1292 |
|
}, |
|
{ |
|
"epoch": 2.9386363636363635, |
|
"grad_norm": 9.824548721313477, |
|
"learning_rate": 2.4242424242424244e-06, |
|
"loss": 1.551, |
|
"step": 1293 |
|
}, |
|
{ |
|
"epoch": 2.940909090909091, |
|
"grad_norm": 10.713866233825684, |
|
"learning_rate": 2.3484848484848486e-06, |
|
"loss": 1.4604, |
|
"step": 1294 |
|
}, |
|
{ |
|
"epoch": 2.9431818181818183, |
|
"grad_norm": 18.695775985717773, |
|
"learning_rate": 2.2727272727272728e-06, |
|
"loss": 2.8512, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 2.9454545454545453, |
|
"grad_norm": 9.289727210998535, |
|
"learning_rate": 2.196969696969697e-06, |
|
"loss": 1.3539, |
|
"step": 1296 |
|
}, |
|
{ |
|
"epoch": 2.9477272727272728, |
|
"grad_norm": 7.917882442474365, |
|
"learning_rate": 2.1212121212121216e-06, |
|
"loss": 1.2179, |
|
"step": 1297 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"grad_norm": 16.269927978515625, |
|
"learning_rate": 2.0454545454545457e-06, |
|
"loss": 1.8904, |
|
"step": 1298 |
|
}, |
|
{ |
|
"epoch": 2.952272727272727, |
|
"grad_norm": 11.293408393859863, |
|
"learning_rate": 1.9696969696969695e-06, |
|
"loss": 1.4438, |
|
"step": 1299 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 14.2405424118042, |
|
"learning_rate": 1.8939393939393941e-06, |
|
"loss": 2.2578, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 2.956818181818182, |
|
"grad_norm": 9.712430953979492, |
|
"learning_rate": 1.818181818181818e-06, |
|
"loss": 1.1685, |
|
"step": 1301 |
|
}, |
|
{ |
|
"epoch": 2.959090909090909, |
|
"grad_norm": 14.34041690826416, |
|
"learning_rate": 1.7424242424242427e-06, |
|
"loss": 1.9741, |
|
"step": 1302 |
|
}, |
|
{ |
|
"epoch": 2.9613636363636364, |
|
"grad_norm": 12.20971965789795, |
|
"learning_rate": 1.6666666666666667e-06, |
|
"loss": 2.283, |
|
"step": 1303 |
|
}, |
|
{ |
|
"epoch": 2.963636363636364, |
|
"grad_norm": 13.051138877868652, |
|
"learning_rate": 1.5909090909090908e-06, |
|
"loss": 2.3128, |
|
"step": 1304 |
|
}, |
|
{ |
|
"epoch": 2.965909090909091, |
|
"grad_norm": 11.069129943847656, |
|
"learning_rate": 1.5151515151515152e-06, |
|
"loss": 1.4379, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 2.9681818181818183, |
|
"grad_norm": 10.655563354492188, |
|
"learning_rate": 1.4393939393939396e-06, |
|
"loss": 1.4726, |
|
"step": 1306 |
|
}, |
|
{ |
|
"epoch": 2.9704545454545457, |
|
"grad_norm": 9.674460411071777, |
|
"learning_rate": 1.3636363636363636e-06, |
|
"loss": 1.2689, |
|
"step": 1307 |
|
}, |
|
{ |
|
"epoch": 2.9727272727272727, |
|
"grad_norm": 10.24626636505127, |
|
"learning_rate": 1.287878787878788e-06, |
|
"loss": 1.2585, |
|
"step": 1308 |
|
}, |
|
{ |
|
"epoch": 2.975, |
|
"grad_norm": 13.117413520812988, |
|
"learning_rate": 1.2121212121212122e-06, |
|
"loss": 1.8019, |
|
"step": 1309 |
|
}, |
|
{ |
|
"epoch": 2.9772727272727275, |
|
"grad_norm": 11.649164199829102, |
|
"learning_rate": 1.1363636363636364e-06, |
|
"loss": 1.375, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 2.9795454545454545, |
|
"grad_norm": 11.054950714111328, |
|
"learning_rate": 1.0606060606060608e-06, |
|
"loss": 1.7139, |
|
"step": 1311 |
|
}, |
|
{ |
|
"epoch": 2.981818181818182, |
|
"grad_norm": 9.476350784301758, |
|
"learning_rate": 9.848484848484847e-07, |
|
"loss": 1.1851, |
|
"step": 1312 |
|
}, |
|
{ |
|
"epoch": 2.9840909090909093, |
|
"grad_norm": 9.467584609985352, |
|
"learning_rate": 9.09090909090909e-07, |
|
"loss": 1.0272, |
|
"step": 1313 |
|
}, |
|
{ |
|
"epoch": 2.9863636363636363, |
|
"grad_norm": 11.783283233642578, |
|
"learning_rate": 8.333333333333333e-07, |
|
"loss": 1.886, |
|
"step": 1314 |
|
}, |
|
{ |
|
"epoch": 2.9886363636363638, |
|
"grad_norm": 11.245438575744629, |
|
"learning_rate": 7.575757575757576e-07, |
|
"loss": 1.2872, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 2.990909090909091, |
|
"grad_norm": 12.71106243133545, |
|
"learning_rate": 6.818181818181818e-07, |
|
"loss": 1.3681, |
|
"step": 1316 |
|
}, |
|
{ |
|
"epoch": 2.993181818181818, |
|
"grad_norm": 11.738058090209961, |
|
"learning_rate": 6.060606060606061e-07, |
|
"loss": 1.9274, |
|
"step": 1317 |
|
}, |
|
{ |
|
"epoch": 2.9954545454545456, |
|
"grad_norm": 12.179485321044922, |
|
"learning_rate": 5.303030303030304e-07, |
|
"loss": 1.6056, |
|
"step": 1318 |
|
}, |
|
{ |
|
"epoch": 2.9977272727272726, |
|
"grad_norm": 9.123523712158203, |
|
"learning_rate": 4.545454545454545e-07, |
|
"loss": 1.2402, |
|
"step": 1319 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"grad_norm": 17.10702133178711, |
|
"learning_rate": 3.787878787878788e-07, |
|
"loss": 1.7438, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_f1": 0.8924, |
|
"eval_gen_len": 41.8818, |
|
"eval_loss": 1.7954092025756836, |
|
"eval_precision": 0.8906, |
|
"eval_recall": 0.8943, |
|
"eval_rouge1": 0.4651, |
|
"eval_rouge2": 0.218, |
|
"eval_rougeL": 0.3904, |
|
"eval_rougeLsum": 0.4291, |
|
"eval_runtime": 28.6293, |
|
"eval_samples_per_second": 3.842, |
|
"eval_steps_per_second": 0.489, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1320, |
|
"total_flos": 2659801069854720.0, |
|
"train_loss": 1.8849294849868976, |
|
"train_runtime": 574.0732, |
|
"train_samples_per_second": 4.593, |
|
"train_steps_per_second": 2.299 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1320, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2659801069854720.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|