|
{ |
|
"best_metric": 4.248934268951416, |
|
"best_model_checkpoint": "autotrain-l6hey-orl0t/checkpoint-8938", |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 8938, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.005594092638174088, |
|
"grad_norm": 34.332366943359375, |
|
"learning_rate": 7.829977628635347e-07, |
|
"loss": 6.4897, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.011188185276348177, |
|
"grad_norm": 23.168872833251953, |
|
"learning_rate": 1.7151379567486951e-06, |
|
"loss": 6.3738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.016782277914522265, |
|
"grad_norm": 20.714399337768555, |
|
"learning_rate": 2.6472781506338553e-06, |
|
"loss": 5.5122, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.022376370552696354, |
|
"grad_norm": 17.31004524230957, |
|
"learning_rate": 3.542132736763609e-06, |
|
"loss": 5.5352, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.027970463190870442, |
|
"grad_norm": 20.792905807495117, |
|
"learning_rate": 4.47427293064877e-06, |
|
"loss": 5.3902, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.03356455582904453, |
|
"grad_norm": 29.867664337158203, |
|
"learning_rate": 5.40641312453393e-06, |
|
"loss": 5.0552, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.039158648467218615, |
|
"grad_norm": 17.143095016479492, |
|
"learning_rate": 6.338553318419091e-06, |
|
"loss": 5.0792, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.04475274110539271, |
|
"grad_norm": 16.778640747070312, |
|
"learning_rate": 7.270693512304251e-06, |
|
"loss": 5.0412, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05034683374356679, |
|
"grad_norm": 21.074262619018555, |
|
"learning_rate": 8.20283370618941e-06, |
|
"loss": 4.8163, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.055940926381740884, |
|
"grad_norm": 21.221630096435547, |
|
"learning_rate": 9.134973900074571e-06, |
|
"loss": 4.9409, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.06153501901991497, |
|
"grad_norm": 21.189231872558594, |
|
"learning_rate": 1.006711409395973e-05, |
|
"loss": 5.0649, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.06712911165808906, |
|
"grad_norm": 14.992874145507812, |
|
"learning_rate": 1.0999254287844893e-05, |
|
"loss": 4.7542, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07272320429626315, |
|
"grad_norm": 15.733444213867188, |
|
"learning_rate": 1.1931394481730052e-05, |
|
"loss": 4.5938, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.07831729693443723, |
|
"grad_norm": 23.274600982666016, |
|
"learning_rate": 1.2863534675615213e-05, |
|
"loss": 4.6138, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.08391138957261132, |
|
"grad_norm": 17.92934799194336, |
|
"learning_rate": 1.3795674869500374e-05, |
|
"loss": 4.8809, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.08950548221078541, |
|
"grad_norm": 18.286834716796875, |
|
"learning_rate": 1.4727815063385533e-05, |
|
"loss": 4.6919, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.09509957484895949, |
|
"grad_norm": 20.009130477905273, |
|
"learning_rate": 1.5659955257270695e-05, |
|
"loss": 4.7557, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.10069366748713358, |
|
"grad_norm": 20.796175003051758, |
|
"learning_rate": 1.6592095451155853e-05, |
|
"loss": 4.8059, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.10628776012530768, |
|
"grad_norm": 18.083595275878906, |
|
"learning_rate": 1.7524235645041014e-05, |
|
"loss": 4.86, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.11188185276348177, |
|
"grad_norm": 17.57659339904785, |
|
"learning_rate": 1.8456375838926178e-05, |
|
"loss": 4.568, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.11747594540165585, |
|
"grad_norm": 20.379024505615234, |
|
"learning_rate": 1.9388516032811335e-05, |
|
"loss": 4.9103, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.12307003803982994, |
|
"grad_norm": 16.093093872070312, |
|
"learning_rate": 2.0320656226696496e-05, |
|
"loss": 4.5315, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.12866413067800403, |
|
"grad_norm": 31.44219970703125, |
|
"learning_rate": 2.1252796420581657e-05, |
|
"loss": 4.6423, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.13425822331617812, |
|
"grad_norm": 16.757986068725586, |
|
"learning_rate": 2.2184936614466818e-05, |
|
"loss": 4.5965, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1398523159543522, |
|
"grad_norm": 16.31531524658203, |
|
"learning_rate": 2.311707680835198e-05, |
|
"loss": 4.6928, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.1454464085925263, |
|
"grad_norm": 13.83728313446045, |
|
"learning_rate": 2.4049217002237136e-05, |
|
"loss": 4.5197, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.15104050123070037, |
|
"grad_norm": 17.00248146057129, |
|
"learning_rate": 2.49813571961223e-05, |
|
"loss": 4.4602, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.15663459386887446, |
|
"grad_norm": 22.1146183013916, |
|
"learning_rate": 2.5913497390007457e-05, |
|
"loss": 4.4573, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.16222868650704855, |
|
"grad_norm": 16.24863624572754, |
|
"learning_rate": 2.6845637583892618e-05, |
|
"loss": 4.6094, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.16782277914522264, |
|
"grad_norm": 15.607491493225098, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 4.4851, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.17341687178339674, |
|
"grad_norm": 17.399606704711914, |
|
"learning_rate": 2.8709917971662943e-05, |
|
"loss": 4.7641, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.17901096442157083, |
|
"grad_norm": 12.437596321105957, |
|
"learning_rate": 2.9642058165548097e-05, |
|
"loss": 4.6065, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.18460505705974492, |
|
"grad_norm": 16.83686637878418, |
|
"learning_rate": 3.057419835943326e-05, |
|
"loss": 4.471, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.19019914969791898, |
|
"grad_norm": 17.172122955322266, |
|
"learning_rate": 3.150633855331842e-05, |
|
"loss": 4.7266, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.19579324233609308, |
|
"grad_norm": 18.03239631652832, |
|
"learning_rate": 3.243847874720358e-05, |
|
"loss": 4.46, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.20138733497426717, |
|
"grad_norm": 11.44616985321045, |
|
"learning_rate": 3.3370618941088744e-05, |
|
"loss": 4.2646, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.20698142761244126, |
|
"grad_norm": 18.69893455505371, |
|
"learning_rate": 3.43027591349739e-05, |
|
"loss": 4.5796, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.21257552025061535, |
|
"grad_norm": 26.265470504760742, |
|
"learning_rate": 3.523489932885906e-05, |
|
"loss": 4.476, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.21816961288878944, |
|
"grad_norm": 28.28611946105957, |
|
"learning_rate": 3.616703952274422e-05, |
|
"loss": 4.9502, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.22376370552696354, |
|
"grad_norm": 20.53813362121582, |
|
"learning_rate": 3.709917971662939e-05, |
|
"loss": 4.5447, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 14.012741088867188, |
|
"learning_rate": 3.8031319910514545e-05, |
|
"loss": 4.7348, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.2349518908033117, |
|
"grad_norm": 15.572279930114746, |
|
"learning_rate": 3.89634601043997e-05, |
|
"loss": 4.7246, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.24054598344148578, |
|
"grad_norm": 13.881638526916504, |
|
"learning_rate": 3.9895600298284866e-05, |
|
"loss": 4.6181, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.24614007607965988, |
|
"grad_norm": 12.749186515808105, |
|
"learning_rate": 4.0827740492170024e-05, |
|
"loss": 4.3893, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.251734168717834, |
|
"grad_norm": 17.131837844848633, |
|
"learning_rate": 4.175988068605519e-05, |
|
"loss": 4.3859, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.25732826135600806, |
|
"grad_norm": 13.731075286865234, |
|
"learning_rate": 4.2692020879940345e-05, |
|
"loss": 4.4761, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2629223539941821, |
|
"grad_norm": 15.064730644226074, |
|
"learning_rate": 4.36241610738255e-05, |
|
"loss": 4.4364, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.26851644663235624, |
|
"grad_norm": 13.714836120605469, |
|
"learning_rate": 4.455630126771067e-05, |
|
"loss": 4.5969, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.2741105392705303, |
|
"grad_norm": 14.1889009475708, |
|
"learning_rate": 4.5488441461595824e-05, |
|
"loss": 4.9771, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.2797046319087044, |
|
"grad_norm": 14.543293952941895, |
|
"learning_rate": 4.642058165548099e-05, |
|
"loss": 4.6537, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2852987245468785, |
|
"grad_norm": 18.1214542388916, |
|
"learning_rate": 4.735272184936615e-05, |
|
"loss": 4.6208, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.2908928171850526, |
|
"grad_norm": 13.347675323486328, |
|
"learning_rate": 4.82848620432513e-05, |
|
"loss": 4.6199, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.2964869098232267, |
|
"grad_norm": 17.083444595336914, |
|
"learning_rate": 4.921700223713647e-05, |
|
"loss": 4.8805, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.30208100246140074, |
|
"grad_norm": 24.343929290771484, |
|
"learning_rate": 4.998342449859108e-05, |
|
"loss": 4.6357, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.30767509509957486, |
|
"grad_norm": 16.355289459228516, |
|
"learning_rate": 4.987982761478535e-05, |
|
"loss": 4.674, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.3132691877377489, |
|
"grad_norm": 20.73784828186035, |
|
"learning_rate": 4.978037460633184e-05, |
|
"loss": 4.6211, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.31886328037592304, |
|
"grad_norm": 13.50486946105957, |
|
"learning_rate": 4.9676777722526106e-05, |
|
"loss": 4.7257, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.3244573730140971, |
|
"grad_norm": 14.019612312316895, |
|
"learning_rate": 4.9573180838720376e-05, |
|
"loss": 4.6509, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.3300514656522712, |
|
"grad_norm": 18.581026077270508, |
|
"learning_rate": 4.946958395491464e-05, |
|
"loss": 4.3779, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.3356455582904453, |
|
"grad_norm": 12.192296981811523, |
|
"learning_rate": 4.93659870711089e-05, |
|
"loss": 4.2878, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.34123965092861935, |
|
"grad_norm": 12.066230773925781, |
|
"learning_rate": 4.926239018730317e-05, |
|
"loss": 4.5682, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.3468337435667935, |
|
"grad_norm": 10.833414077758789, |
|
"learning_rate": 4.9158793303497436e-05, |
|
"loss": 4.1772, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.35242783620496754, |
|
"grad_norm": 10.834297180175781, |
|
"learning_rate": 4.905519641969169e-05, |
|
"loss": 4.6879, |
|
"step": 1575 |
|
}, |
|
{ |
|
"epoch": 0.35802192884314166, |
|
"grad_norm": 12.870365142822266, |
|
"learning_rate": 4.895159953588596e-05, |
|
"loss": 4.7267, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3636160214813157, |
|
"grad_norm": 10.991371154785156, |
|
"learning_rate": 4.8848002652080226e-05, |
|
"loss": 4.6822, |
|
"step": 1625 |
|
}, |
|
{ |
|
"epoch": 0.36921011411948984, |
|
"grad_norm": 14.67126178741455, |
|
"learning_rate": 4.874440576827449e-05, |
|
"loss": 4.3277, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3748042067576639, |
|
"grad_norm": 13.142556190490723, |
|
"learning_rate": 4.864080888446876e-05, |
|
"loss": 4.513, |
|
"step": 1675 |
|
}, |
|
{ |
|
"epoch": 0.38039829939583797, |
|
"grad_norm": 12.647134780883789, |
|
"learning_rate": 4.853721200066302e-05, |
|
"loss": 4.6069, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.3859923920340121, |
|
"grad_norm": 12.48798942565918, |
|
"learning_rate": 4.8433615116857286e-05, |
|
"loss": 4.846, |
|
"step": 1725 |
|
}, |
|
{ |
|
"epoch": 0.39158648467218615, |
|
"grad_norm": 10.90004825592041, |
|
"learning_rate": 4.833001823305155e-05, |
|
"loss": 4.4091, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.39718057731036027, |
|
"grad_norm": 14.13915729522705, |
|
"learning_rate": 4.822642134924581e-05, |
|
"loss": 4.4192, |
|
"step": 1775 |
|
}, |
|
{ |
|
"epoch": 0.40277466994853434, |
|
"grad_norm": 13.102397918701172, |
|
"learning_rate": 4.812282446544008e-05, |
|
"loss": 4.5444, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.40836876258670846, |
|
"grad_norm": 12.227831840515137, |
|
"learning_rate": 4.8019227581634346e-05, |
|
"loss": 4.712, |
|
"step": 1825 |
|
}, |
|
{ |
|
"epoch": 0.4139628552248825, |
|
"grad_norm": 15.215840339660645, |
|
"learning_rate": 4.791563069782861e-05, |
|
"loss": 4.6632, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.41955694786305664, |
|
"grad_norm": 18.023183822631836, |
|
"learning_rate": 4.781203381402288e-05, |
|
"loss": 4.1778, |
|
"step": 1875 |
|
}, |
|
{ |
|
"epoch": 0.4251510405012307, |
|
"grad_norm": 13.230670928955078, |
|
"learning_rate": 4.770843693021714e-05, |
|
"loss": 4.4197, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.43074513313940477, |
|
"grad_norm": 14.070335388183594, |
|
"learning_rate": 4.7604840046411407e-05, |
|
"loss": 4.6564, |
|
"step": 1925 |
|
}, |
|
{ |
|
"epoch": 0.4363392257775789, |
|
"grad_norm": 20.313472747802734, |
|
"learning_rate": 4.750124316260567e-05, |
|
"loss": 4.5667, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.44193331841575295, |
|
"grad_norm": 15.953713417053223, |
|
"learning_rate": 4.739764627879993e-05, |
|
"loss": 4.5235, |
|
"step": 1975 |
|
}, |
|
{ |
|
"epoch": 0.44752741105392707, |
|
"grad_norm": 10.95453929901123, |
|
"learning_rate": 4.7294049394994197e-05, |
|
"loss": 4.5047, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.45312150369210114, |
|
"grad_norm": 15.660309791564941, |
|
"learning_rate": 4.719045251118847e-05, |
|
"loss": 4.1537, |
|
"step": 2025 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 11.03080940246582, |
|
"learning_rate": 4.708685562738273e-05, |
|
"loss": 4.3448, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4643096889684493, |
|
"grad_norm": 10.359949111938477, |
|
"learning_rate": 4.698325874357699e-05, |
|
"loss": 4.3634, |
|
"step": 2075 |
|
}, |
|
{ |
|
"epoch": 0.4699037816066234, |
|
"grad_norm": 25.401718139648438, |
|
"learning_rate": 4.6879661859771263e-05, |
|
"loss": 4.2735, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.4754978742447975, |
|
"grad_norm": 14.679646492004395, |
|
"learning_rate": 4.677606497596553e-05, |
|
"loss": 4.4465, |
|
"step": 2125 |
|
}, |
|
{ |
|
"epoch": 0.48109196688297157, |
|
"grad_norm": 11.817214965820312, |
|
"learning_rate": 4.667246809215979e-05, |
|
"loss": 4.629, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.4866860595211457, |
|
"grad_norm": 10.622258186340332, |
|
"learning_rate": 4.656887120835405e-05, |
|
"loss": 4.4189, |
|
"step": 2175 |
|
}, |
|
{ |
|
"epoch": 0.49228015215931975, |
|
"grad_norm": 15.188981056213379, |
|
"learning_rate": 4.646527432454832e-05, |
|
"loss": 4.2514, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.49787424479749387, |
|
"grad_norm": 14.326010704040527, |
|
"learning_rate": 4.636167744074259e-05, |
|
"loss": 4.3532, |
|
"step": 2225 |
|
}, |
|
{ |
|
"epoch": 0.503468337435668, |
|
"grad_norm": 16.31020164489746, |
|
"learning_rate": 4.625808055693685e-05, |
|
"loss": 4.2333, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.509062430073842, |
|
"grad_norm": 18.346088409423828, |
|
"learning_rate": 4.6154483673131113e-05, |
|
"loss": 4.4483, |
|
"step": 2275 |
|
}, |
|
{ |
|
"epoch": 0.5146565227120161, |
|
"grad_norm": 15.169132232666016, |
|
"learning_rate": 4.6050886789325384e-05, |
|
"loss": 4.3814, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.5202506153501902, |
|
"grad_norm": 10.011805534362793, |
|
"learning_rate": 4.594728990551964e-05, |
|
"loss": 4.6235, |
|
"step": 2325 |
|
}, |
|
{ |
|
"epoch": 0.5258447079883642, |
|
"grad_norm": 11.23599624633789, |
|
"learning_rate": 4.584369302171391e-05, |
|
"loss": 4.5861, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.5314388006265384, |
|
"grad_norm": 10.484898567199707, |
|
"learning_rate": 4.5740096137908174e-05, |
|
"loss": 4.3894, |
|
"step": 2375 |
|
}, |
|
{ |
|
"epoch": 0.5370328932647125, |
|
"grad_norm": 12.192333221435547, |
|
"learning_rate": 4.563649925410244e-05, |
|
"loss": 4.448, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.5426269859028866, |
|
"grad_norm": 15.25631332397461, |
|
"learning_rate": 4.553290237029671e-05, |
|
"loss": 4.3787, |
|
"step": 2425 |
|
}, |
|
{ |
|
"epoch": 0.5482210785410606, |
|
"grad_norm": 10.27658748626709, |
|
"learning_rate": 4.542930548649097e-05, |
|
"loss": 4.4333, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.5538151711792347, |
|
"grad_norm": 16.127513885498047, |
|
"learning_rate": 4.5325708602685234e-05, |
|
"loss": 4.1792, |
|
"step": 2475 |
|
}, |
|
{ |
|
"epoch": 0.5594092638174089, |
|
"grad_norm": 10.214879035949707, |
|
"learning_rate": 4.5222111718879504e-05, |
|
"loss": 4.6276, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5650033564555829, |
|
"grad_norm": 10.60392951965332, |
|
"learning_rate": 4.511851483507376e-05, |
|
"loss": 4.5925, |
|
"step": 2525 |
|
}, |
|
{ |
|
"epoch": 0.570597449093757, |
|
"grad_norm": 11.073285102844238, |
|
"learning_rate": 4.5014917951268024e-05, |
|
"loss": 4.4202, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5761915417319311, |
|
"grad_norm": 14.709641456604004, |
|
"learning_rate": 4.4911321067462294e-05, |
|
"loss": 4.2758, |
|
"step": 2575 |
|
}, |
|
{ |
|
"epoch": 0.5817856343701052, |
|
"grad_norm": 12.901021957397461, |
|
"learning_rate": 4.480772418365656e-05, |
|
"loss": 4.3067, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5873797270082792, |
|
"grad_norm": 11.27915096282959, |
|
"learning_rate": 4.470412729985082e-05, |
|
"loss": 4.0111, |
|
"step": 2625 |
|
}, |
|
{ |
|
"epoch": 0.5929738196464533, |
|
"grad_norm": 15.488706588745117, |
|
"learning_rate": 4.460053041604509e-05, |
|
"loss": 4.2671, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5985679122846275, |
|
"grad_norm": 13.603848457336426, |
|
"learning_rate": 4.4496933532239354e-05, |
|
"loss": 4.2589, |
|
"step": 2675 |
|
}, |
|
{ |
|
"epoch": 0.6041620049228015, |
|
"grad_norm": 12.827610969543457, |
|
"learning_rate": 4.439333664843362e-05, |
|
"loss": 4.6476, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.6097560975609756, |
|
"grad_norm": 16.806106567382812, |
|
"learning_rate": 4.428973976462788e-05, |
|
"loss": 4.6188, |
|
"step": 2725 |
|
}, |
|
{ |
|
"epoch": 0.6153501901991497, |
|
"grad_norm": 14.598794937133789, |
|
"learning_rate": 4.4186142880822144e-05, |
|
"loss": 4.4195, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.6209442828373238, |
|
"grad_norm": 10.380790710449219, |
|
"learning_rate": 4.4082545997016414e-05, |
|
"loss": 4.4634, |
|
"step": 2775 |
|
}, |
|
{ |
|
"epoch": 0.6265383754754978, |
|
"grad_norm": 12.62149715423584, |
|
"learning_rate": 4.397894911321068e-05, |
|
"loss": 4.4751, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.632132468113672, |
|
"grad_norm": 10.467231750488281, |
|
"learning_rate": 4.387535222940494e-05, |
|
"loss": 4.2898, |
|
"step": 2825 |
|
}, |
|
{ |
|
"epoch": 0.6377265607518461, |
|
"grad_norm": 12.244780540466309, |
|
"learning_rate": 4.377175534559921e-05, |
|
"loss": 4.3242, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.6433206533900201, |
|
"grad_norm": 12.39667797088623, |
|
"learning_rate": 4.3668158461793474e-05, |
|
"loss": 4.2754, |
|
"step": 2875 |
|
}, |
|
{ |
|
"epoch": 0.6489147460281942, |
|
"grad_norm": 12.747861862182617, |
|
"learning_rate": 4.356456157798773e-05, |
|
"loss": 4.286, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.6545088386663683, |
|
"grad_norm": 22.809650421142578, |
|
"learning_rate": 4.3460964694182e-05, |
|
"loss": 4.3482, |
|
"step": 2925 |
|
}, |
|
{ |
|
"epoch": 0.6601029313045425, |
|
"grad_norm": 10.659783363342285, |
|
"learning_rate": 4.3357367810376264e-05, |
|
"loss": 4.2066, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.6656970239427165, |
|
"grad_norm": 14.72028636932373, |
|
"learning_rate": 4.325377092657053e-05, |
|
"loss": 4.4523, |
|
"step": 2975 |
|
}, |
|
{ |
|
"epoch": 0.6712911165808906, |
|
"grad_norm": 10.166138648986816, |
|
"learning_rate": 4.31501740427648e-05, |
|
"loss": 4.3615, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6768852092190647, |
|
"grad_norm": 12.992719650268555, |
|
"learning_rate": 4.304657715895906e-05, |
|
"loss": 4.5061, |
|
"step": 3025 |
|
}, |
|
{ |
|
"epoch": 0.6824793018572387, |
|
"grad_norm": 11.16627311706543, |
|
"learning_rate": 4.2942980275153324e-05, |
|
"loss": 4.5588, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 10.317902565002441, |
|
"learning_rate": 4.2839383391347594e-05, |
|
"loss": 4.3462, |
|
"step": 3075 |
|
}, |
|
{ |
|
"epoch": 0.693667487133587, |
|
"grad_norm": 12.662385940551758, |
|
"learning_rate": 4.273578650754185e-05, |
|
"loss": 4.5361, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.6992615797717611, |
|
"grad_norm": 16.921144485473633, |
|
"learning_rate": 4.263218962373612e-05, |
|
"loss": 4.2344, |
|
"step": 3125 |
|
}, |
|
{ |
|
"epoch": 0.7048556724099351, |
|
"grad_norm": 17.665006637573242, |
|
"learning_rate": 4.2528592739930384e-05, |
|
"loss": 4.2197, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.7104497650481092, |
|
"grad_norm": 9.232120513916016, |
|
"learning_rate": 4.242499585612465e-05, |
|
"loss": 4.511, |
|
"step": 3175 |
|
}, |
|
{ |
|
"epoch": 0.7160438576862833, |
|
"grad_norm": 12.514689445495605, |
|
"learning_rate": 4.232139897231892e-05, |
|
"loss": 4.5251, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.7216379503244573, |
|
"grad_norm": 11.398234367370605, |
|
"learning_rate": 4.221780208851318e-05, |
|
"loss": 4.1084, |
|
"step": 3225 |
|
}, |
|
{ |
|
"epoch": 0.7272320429626314, |
|
"grad_norm": 8.08095932006836, |
|
"learning_rate": 4.2114205204707444e-05, |
|
"loss": 4.1601, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.7328261356008056, |
|
"grad_norm": 12.109641075134277, |
|
"learning_rate": 4.2010608320901714e-05, |
|
"loss": 4.274, |
|
"step": 3275 |
|
}, |
|
{ |
|
"epoch": 0.7384202282389797, |
|
"grad_norm": 10.819121360778809, |
|
"learning_rate": 4.190701143709597e-05, |
|
"loss": 4.5686, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.7440143208771537, |
|
"grad_norm": 11.090829849243164, |
|
"learning_rate": 4.180341455329024e-05, |
|
"loss": 4.5401, |
|
"step": 3325 |
|
}, |
|
{ |
|
"epoch": 0.7496084135153278, |
|
"grad_norm": 11.24759578704834, |
|
"learning_rate": 4.1699817669484504e-05, |
|
"loss": 4.4797, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.7552025061535019, |
|
"grad_norm": 10.916013717651367, |
|
"learning_rate": 4.159622078567877e-05, |
|
"loss": 4.1258, |
|
"step": 3375 |
|
}, |
|
{ |
|
"epoch": 0.7607965987916759, |
|
"grad_norm": 11.953822135925293, |
|
"learning_rate": 4.149262390187304e-05, |
|
"loss": 4.3313, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.7663906914298501, |
|
"grad_norm": 16.665861129760742, |
|
"learning_rate": 4.13890270180673e-05, |
|
"loss": 4.2789, |
|
"step": 3425 |
|
}, |
|
{ |
|
"epoch": 0.7719847840680242, |
|
"grad_norm": 11.539497375488281, |
|
"learning_rate": 4.1285430134261564e-05, |
|
"loss": 4.5436, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.7775788767061983, |
|
"grad_norm": 11.955995559692383, |
|
"learning_rate": 4.118183325045583e-05, |
|
"loss": 4.1149, |
|
"step": 3475 |
|
}, |
|
{ |
|
"epoch": 0.7831729693443723, |
|
"grad_norm": 15.087596893310547, |
|
"learning_rate": 4.107823636665009e-05, |
|
"loss": 4.3523, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7887670619825464, |
|
"grad_norm": 14.733497619628906, |
|
"learning_rate": 4.0974639482844354e-05, |
|
"loss": 4.2913, |
|
"step": 3525 |
|
}, |
|
{ |
|
"epoch": 0.7943611546207205, |
|
"grad_norm": 10.125676155090332, |
|
"learning_rate": 4.0871042599038624e-05, |
|
"loss": 4.3078, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7999552472588946, |
|
"grad_norm": 11.222993850708008, |
|
"learning_rate": 4.076744571523289e-05, |
|
"loss": 4.4803, |
|
"step": 3575 |
|
}, |
|
{ |
|
"epoch": 0.8055493398970687, |
|
"grad_norm": 10.871453285217285, |
|
"learning_rate": 4.066384883142715e-05, |
|
"loss": 4.5347, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.8111434325352428, |
|
"grad_norm": 10.571527481079102, |
|
"learning_rate": 4.056025194762142e-05, |
|
"loss": 4.1658, |
|
"step": 3625 |
|
}, |
|
{ |
|
"epoch": 0.8167375251734169, |
|
"grad_norm": 12.036689758300781, |
|
"learning_rate": 4.0456655063815685e-05, |
|
"loss": 4.3284, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.8223316178115909, |
|
"grad_norm": 13.614919662475586, |
|
"learning_rate": 4.035305818000995e-05, |
|
"loss": 4.1228, |
|
"step": 3675 |
|
}, |
|
{ |
|
"epoch": 0.827925710449765, |
|
"grad_norm": 12.302602767944336, |
|
"learning_rate": 4.024946129620421e-05, |
|
"loss": 4.0459, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.8335198030879392, |
|
"grad_norm": 13.102405548095703, |
|
"learning_rate": 4.0145864412398474e-05, |
|
"loss": 4.4641, |
|
"step": 3725 |
|
}, |
|
{ |
|
"epoch": 0.8391138957261133, |
|
"grad_norm": 12.71800422668457, |
|
"learning_rate": 4.0042267528592745e-05, |
|
"loss": 4.206, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.8447079883642873, |
|
"grad_norm": 13.687782287597656, |
|
"learning_rate": 3.993867064478701e-05, |
|
"loss": 4.388, |
|
"step": 3775 |
|
}, |
|
{ |
|
"epoch": 0.8503020810024614, |
|
"grad_norm": 16.609664916992188, |
|
"learning_rate": 3.983507376098127e-05, |
|
"loss": 4.1664, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.8558961736406355, |
|
"grad_norm": 16.731786727905273, |
|
"learning_rate": 3.973147687717554e-05, |
|
"loss": 4.3956, |
|
"step": 3825 |
|
}, |
|
{ |
|
"epoch": 0.8614902662788095, |
|
"grad_norm": 17.152843475341797, |
|
"learning_rate": 3.96278799933698e-05, |
|
"loss": 4.3338, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.8670843589169837, |
|
"grad_norm": 14.199774742126465, |
|
"learning_rate": 3.952428310956406e-05, |
|
"loss": 4.4609, |
|
"step": 3875 |
|
}, |
|
{ |
|
"epoch": 0.8726784515551578, |
|
"grad_norm": 11.605820655822754, |
|
"learning_rate": 3.942068622575833e-05, |
|
"loss": 4.1688, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.8782725441933319, |
|
"grad_norm": 11.608319282531738, |
|
"learning_rate": 3.9317089341952595e-05, |
|
"loss": 4.2122, |
|
"step": 3925 |
|
}, |
|
{ |
|
"epoch": 0.8838666368315059, |
|
"grad_norm": 12.212512016296387, |
|
"learning_rate": 3.921349245814686e-05, |
|
"loss": 4.2482, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.88946072946968, |
|
"grad_norm": 12.425273895263672, |
|
"learning_rate": 3.910989557434113e-05, |
|
"loss": 4.5128, |
|
"step": 3975 |
|
}, |
|
{ |
|
"epoch": 0.8950548221078541, |
|
"grad_norm": 14.292542457580566, |
|
"learning_rate": 3.900629869053539e-05, |
|
"loss": 4.5421, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.9006489147460282, |
|
"grad_norm": 9.911199569702148, |
|
"learning_rate": 3.8902701806729655e-05, |
|
"loss": 4.3237, |
|
"step": 4025 |
|
}, |
|
{ |
|
"epoch": 0.9062430073842023, |
|
"grad_norm": 6.303875923156738, |
|
"learning_rate": 3.879910492292392e-05, |
|
"loss": 4.195, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.9118371000223764, |
|
"grad_norm": 8.433326721191406, |
|
"learning_rate": 3.869550803911818e-05, |
|
"loss": 4.1758, |
|
"step": 4075 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 12.792257308959961, |
|
"learning_rate": 3.859191115531245e-05, |
|
"loss": 4.1538, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.9230252852987245, |
|
"grad_norm": 10.63804817199707, |
|
"learning_rate": 3.8488314271506715e-05, |
|
"loss": 4.2415, |
|
"step": 4125 |
|
}, |
|
{ |
|
"epoch": 0.9286193779368986, |
|
"grad_norm": 10.244176864624023, |
|
"learning_rate": 3.838471738770098e-05, |
|
"loss": 4.624, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.9342134705750728, |
|
"grad_norm": 14.590502738952637, |
|
"learning_rate": 3.828112050389525e-05, |
|
"loss": 4.261, |
|
"step": 4175 |
|
}, |
|
{ |
|
"epoch": 0.9398075632132468, |
|
"grad_norm": 17.149826049804688, |
|
"learning_rate": 3.817752362008951e-05, |
|
"loss": 4.0573, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.9454016558514209, |
|
"grad_norm": 10.837606430053711, |
|
"learning_rate": 3.8073926736283775e-05, |
|
"loss": 4.2502, |
|
"step": 4225 |
|
}, |
|
{ |
|
"epoch": 0.950995748489595, |
|
"grad_norm": 13.960970878601074, |
|
"learning_rate": 3.797032985247804e-05, |
|
"loss": 4.1431, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.9565898411277691, |
|
"grad_norm": 10.603372573852539, |
|
"learning_rate": 3.78667329686723e-05, |
|
"loss": 4.2805, |
|
"step": 4275 |
|
}, |
|
{ |
|
"epoch": 0.9621839337659431, |
|
"grad_norm": 14.068360328674316, |
|
"learning_rate": 3.776313608486657e-05, |
|
"loss": 4.2771, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.9677780264041173, |
|
"grad_norm": 12.487285614013672, |
|
"learning_rate": 3.7659539201060835e-05, |
|
"loss": 4.4749, |
|
"step": 4325 |
|
}, |
|
{ |
|
"epoch": 0.9733721190422914, |
|
"grad_norm": 11.214025497436523, |
|
"learning_rate": 3.75559423172551e-05, |
|
"loss": 4.1715, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.9789662116804654, |
|
"grad_norm": 12.117270469665527, |
|
"learning_rate": 3.745234543344937e-05, |
|
"loss": 4.1669, |
|
"step": 4375 |
|
}, |
|
{ |
|
"epoch": 0.9845603043186395, |
|
"grad_norm": 7.657718181610107, |
|
"learning_rate": 3.734874854964363e-05, |
|
"loss": 4.1935, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.9901543969568136, |
|
"grad_norm": 12.381430625915527, |
|
"learning_rate": 3.724929554119012e-05, |
|
"loss": 4.3682, |
|
"step": 4425 |
|
}, |
|
{ |
|
"epoch": 0.9957484895949877, |
|
"grad_norm": 12.749862670898438, |
|
"learning_rate": 3.7145698657384385e-05, |
|
"loss": 4.2554, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 67.1128, |
|
"eval_loss": 4.290805816650391, |
|
"eval_rouge1": 26.1327, |
|
"eval_rouge2": 10.0836, |
|
"eval_rougeL": 24.9862, |
|
"eval_rougeLsum": 25.321, |
|
"eval_runtime": 1004.8822, |
|
"eval_samples_per_second": 1.112, |
|
"eval_steps_per_second": 0.279, |
|
"step": 4469 |
|
}, |
|
{ |
|
"epoch": 1.0013425822331619, |
|
"grad_norm": 15.127477645874023, |
|
"learning_rate": 3.7042101773578655e-05, |
|
"loss": 3.9898, |
|
"step": 4475 |
|
}, |
|
{ |
|
"epoch": 1.006936674871336, |
|
"grad_norm": 10.354872703552246, |
|
"learning_rate": 3.693850488977292e-05, |
|
"loss": 3.623, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 1.0125307675095099, |
|
"grad_norm": 13.131593704223633, |
|
"learning_rate": 3.683490800596718e-05, |
|
"loss": 3.7952, |
|
"step": 4525 |
|
}, |
|
{ |
|
"epoch": 1.018124860147684, |
|
"grad_norm": 10.636340141296387, |
|
"learning_rate": 3.673131112216145e-05, |
|
"loss": 3.4462, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 1.0237189527858581, |
|
"grad_norm": 14.03700065612793, |
|
"learning_rate": 3.662771423835571e-05, |
|
"loss": 3.4108, |
|
"step": 4575 |
|
}, |
|
{ |
|
"epoch": 1.0293130454240322, |
|
"grad_norm": 21.94048309326172, |
|
"learning_rate": 3.652411735454997e-05, |
|
"loss": 3.7664, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 1.0349071380622064, |
|
"grad_norm": 11.382323265075684, |
|
"learning_rate": 3.642052047074424e-05, |
|
"loss": 3.7189, |
|
"step": 4625 |
|
}, |
|
{ |
|
"epoch": 1.0405012307003805, |
|
"grad_norm": 11.167036056518555, |
|
"learning_rate": 3.6316923586938505e-05, |
|
"loss": 3.7947, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 1.0460953233385544, |
|
"grad_norm": 13.024956703186035, |
|
"learning_rate": 3.621332670313277e-05, |
|
"loss": 3.6083, |
|
"step": 4675 |
|
}, |
|
{ |
|
"epoch": 1.0516894159767285, |
|
"grad_norm": 11.757680892944336, |
|
"learning_rate": 3.610972981932704e-05, |
|
"loss": 3.5998, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 1.0572835086149026, |
|
"grad_norm": 14.893111228942871, |
|
"learning_rate": 3.60061329355213e-05, |
|
"loss": 3.6173, |
|
"step": 4725 |
|
}, |
|
{ |
|
"epoch": 1.0628776012530767, |
|
"grad_norm": 9.222747802734375, |
|
"learning_rate": 3.5902536051715565e-05, |
|
"loss": 3.6453, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 1.0684716938912509, |
|
"grad_norm": 25.488880157470703, |
|
"learning_rate": 3.579893916790983e-05, |
|
"loss": 3.3486, |
|
"step": 4775 |
|
}, |
|
{ |
|
"epoch": 1.074065786529425, |
|
"grad_norm": 10.05694580078125, |
|
"learning_rate": 3.569534228410409e-05, |
|
"loss": 3.7705, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 1.079659879167599, |
|
"grad_norm": 12.402889251708984, |
|
"learning_rate": 3.559174540029836e-05, |
|
"loss": 3.3739, |
|
"step": 4825 |
|
}, |
|
{ |
|
"epoch": 1.085253971805773, |
|
"grad_norm": 10.890093803405762, |
|
"learning_rate": 3.5488148516492625e-05, |
|
"loss": 3.7975, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 1.090848064443947, |
|
"grad_norm": 12.410653114318848, |
|
"learning_rate": 3.538455163268689e-05, |
|
"loss": 3.5483, |
|
"step": 4875 |
|
}, |
|
{ |
|
"epoch": 1.0964421570821212, |
|
"grad_norm": 11.636606216430664, |
|
"learning_rate": 3.528095474888116e-05, |
|
"loss": 3.5182, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 1.1020362497202953, |
|
"grad_norm": 14.367986679077148, |
|
"learning_rate": 3.517735786507542e-05, |
|
"loss": 3.9954, |
|
"step": 4925 |
|
}, |
|
{ |
|
"epoch": 1.1076303423584695, |
|
"grad_norm": 10.753607749938965, |
|
"learning_rate": 3.5073760981269685e-05, |
|
"loss": 3.8582, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 1.1132244349966436, |
|
"grad_norm": 9.407801628112793, |
|
"learning_rate": 3.497016409746395e-05, |
|
"loss": 3.6181, |
|
"step": 4975 |
|
}, |
|
{ |
|
"epoch": 1.1188185276348177, |
|
"grad_norm": 9.98642349243164, |
|
"learning_rate": 3.486656721365821e-05, |
|
"loss": 3.5989, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 1.1244126202729916, |
|
"grad_norm": 9.880094528198242, |
|
"learning_rate": 3.476297032985248e-05, |
|
"loss": 3.658, |
|
"step": 5025 |
|
}, |
|
{ |
|
"epoch": 1.1300067129111657, |
|
"grad_norm": 14.001792907714844, |
|
"learning_rate": 3.4659373446046745e-05, |
|
"loss": 3.7073, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 1.1356008055493398, |
|
"grad_norm": 18.54832649230957, |
|
"learning_rate": 3.455577656224101e-05, |
|
"loss": 3.7806, |
|
"step": 5075 |
|
}, |
|
{ |
|
"epoch": 1.141194898187514, |
|
"grad_norm": 9.804744720458984, |
|
"learning_rate": 3.445217967843528e-05, |
|
"loss": 3.6671, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 1.146788990825688, |
|
"grad_norm": 8.401939392089844, |
|
"learning_rate": 3.434858279462954e-05, |
|
"loss": 3.8566, |
|
"step": 5125 |
|
}, |
|
{ |
|
"epoch": 1.1523830834638622, |
|
"grad_norm": 10.120752334594727, |
|
"learning_rate": 3.42449859108238e-05, |
|
"loss": 3.8143, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 1.1579771761020363, |
|
"grad_norm": 16.10240364074707, |
|
"learning_rate": 3.414138902701807e-05, |
|
"loss": 3.7451, |
|
"step": 5175 |
|
}, |
|
{ |
|
"epoch": 1.1635712687402102, |
|
"grad_norm": 10.377949714660645, |
|
"learning_rate": 3.403779214321233e-05, |
|
"loss": 3.6082, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 1.1691653613783843, |
|
"grad_norm": 10.826866149902344, |
|
"learning_rate": 3.3934195259406595e-05, |
|
"loss": 3.8001, |
|
"step": 5225 |
|
}, |
|
{ |
|
"epoch": 1.1747594540165585, |
|
"grad_norm": 10.02441120147705, |
|
"learning_rate": 3.3830598375600866e-05, |
|
"loss": 3.6624, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 1.1803535466547326, |
|
"grad_norm": 15.683877944946289, |
|
"learning_rate": 3.372700149179513e-05, |
|
"loss": 3.5834, |
|
"step": 5275 |
|
}, |
|
{ |
|
"epoch": 1.1859476392929067, |
|
"grad_norm": 11.696283340454102, |
|
"learning_rate": 3.362340460798939e-05, |
|
"loss": 3.6069, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 1.1915417319310808, |
|
"grad_norm": 13.27725601196289, |
|
"learning_rate": 3.351980772418366e-05, |
|
"loss": 3.6614, |
|
"step": 5325 |
|
}, |
|
{ |
|
"epoch": 1.197135824569255, |
|
"grad_norm": 11.811793327331543, |
|
"learning_rate": 3.341621084037792e-05, |
|
"loss": 3.6186, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 1.2027299172074288, |
|
"grad_norm": 30.400972366333008, |
|
"learning_rate": 3.331261395657219e-05, |
|
"loss": 3.6704, |
|
"step": 5375 |
|
}, |
|
{ |
|
"epoch": 1.208324009845603, |
|
"grad_norm": 11.845870018005371, |
|
"learning_rate": 3.320901707276645e-05, |
|
"loss": 3.6688, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 1.213918102483777, |
|
"grad_norm": 14.447372436523438, |
|
"learning_rate": 3.3105420188960716e-05, |
|
"loss": 3.8172, |
|
"step": 5425 |
|
}, |
|
{ |
|
"epoch": 1.2195121951219512, |
|
"grad_norm": 9.492889404296875, |
|
"learning_rate": 3.3001823305154986e-05, |
|
"loss": 3.5741, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 1.2251062877601253, |
|
"grad_norm": 12.105642318725586, |
|
"learning_rate": 3.289822642134925e-05, |
|
"loss": 3.5402, |
|
"step": 5475 |
|
}, |
|
{ |
|
"epoch": 1.2307003803982994, |
|
"grad_norm": 7.518635272979736, |
|
"learning_rate": 3.279462953754351e-05, |
|
"loss": 3.7333, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 1.2362944730364736, |
|
"grad_norm": 11.485749244689941, |
|
"learning_rate": 3.269103265373778e-05, |
|
"loss": 3.8156, |
|
"step": 5525 |
|
}, |
|
{ |
|
"epoch": 1.2418885656746474, |
|
"grad_norm": 11.726677894592285, |
|
"learning_rate": 3.258743576993204e-05, |
|
"loss": 3.937, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 1.2474826583128216, |
|
"grad_norm": 13.454861640930176, |
|
"learning_rate": 3.24838388861263e-05, |
|
"loss": 3.5792, |
|
"step": 5575 |
|
}, |
|
{ |
|
"epoch": 1.2530767509509957, |
|
"grad_norm": 17.696428298950195, |
|
"learning_rate": 3.238024200232057e-05, |
|
"loss": 3.5828, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 1.2586708435891698, |
|
"grad_norm": 12.128670692443848, |
|
"learning_rate": 3.2276645118514836e-05, |
|
"loss": 3.6379, |
|
"step": 5625 |
|
}, |
|
{ |
|
"epoch": 1.264264936227344, |
|
"grad_norm": 11.507698059082031, |
|
"learning_rate": 3.21730482347091e-05, |
|
"loss": 3.5192, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 1.269859028865518, |
|
"grad_norm": 11.207321166992188, |
|
"learning_rate": 3.206945135090337e-05, |
|
"loss": 3.885, |
|
"step": 5675 |
|
}, |
|
{ |
|
"epoch": 1.2754531215036922, |
|
"grad_norm": 9.954567909240723, |
|
"learning_rate": 3.196585446709763e-05, |
|
"loss": 3.6138, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 1.281047214141866, |
|
"grad_norm": 9.7274751663208, |
|
"learning_rate": 3.1862257583291896e-05, |
|
"loss": 3.7395, |
|
"step": 5725 |
|
}, |
|
{ |
|
"epoch": 1.2866413067800404, |
|
"grad_norm": 10.094833374023438, |
|
"learning_rate": 3.175866069948616e-05, |
|
"loss": 3.6801, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 1.2922353994182143, |
|
"grad_norm": 12.403266906738281, |
|
"learning_rate": 3.165506381568042e-05, |
|
"loss": 3.6891, |
|
"step": 5775 |
|
}, |
|
{ |
|
"epoch": 1.2978294920563884, |
|
"grad_norm": 13.569632530212402, |
|
"learning_rate": 3.155146693187469e-05, |
|
"loss": 3.392, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 1.3034235846945625, |
|
"grad_norm": 10.21789836883545, |
|
"learning_rate": 3.1447870048068956e-05, |
|
"loss": 3.4392, |
|
"step": 5825 |
|
}, |
|
{ |
|
"epoch": 1.3090176773327367, |
|
"grad_norm": 9.875311851501465, |
|
"learning_rate": 3.134427316426322e-05, |
|
"loss": 3.6576, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 1.3146117699709108, |
|
"grad_norm": 13.931588172912598, |
|
"learning_rate": 3.124067628045749e-05, |
|
"loss": 3.869, |
|
"step": 5875 |
|
}, |
|
{ |
|
"epoch": 1.3202058626090847, |
|
"grad_norm": 9.532690048217773, |
|
"learning_rate": 3.113707939665175e-05, |
|
"loss": 3.7711, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 1.325799955247259, |
|
"grad_norm": 9.777695655822754, |
|
"learning_rate": 3.1033482512846016e-05, |
|
"loss": 3.4998, |
|
"step": 5925 |
|
}, |
|
{ |
|
"epoch": 1.331394047885433, |
|
"grad_norm": 10.633079528808594, |
|
"learning_rate": 3.092988562904028e-05, |
|
"loss": 3.6, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 1.336988140523607, |
|
"grad_norm": 11.918797492980957, |
|
"learning_rate": 3.082628874523454e-05, |
|
"loss": 3.6084, |
|
"step": 5975 |
|
}, |
|
{ |
|
"epoch": 1.3425822331617812, |
|
"grad_norm": 11.27762222290039, |
|
"learning_rate": 3.072269186142881e-05, |
|
"loss": 3.6122, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 1.3481763257999553, |
|
"grad_norm": 18.506898880004883, |
|
"learning_rate": 3.0619094977623076e-05, |
|
"loss": 3.2751, |
|
"step": 6025 |
|
}, |
|
{ |
|
"epoch": 1.3537704184381294, |
|
"grad_norm": 21.525028228759766, |
|
"learning_rate": 3.051549809381734e-05, |
|
"loss": 3.4856, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 1.3593645110763033, |
|
"grad_norm": 13.029548645019531, |
|
"learning_rate": 3.0411901210011606e-05, |
|
"loss": 3.5935, |
|
"step": 6075 |
|
}, |
|
{ |
|
"epoch": 1.3649586037144776, |
|
"grad_norm": 9.536824226379395, |
|
"learning_rate": 3.0308304326205873e-05, |
|
"loss": 3.569, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 1.3705526963526515, |
|
"grad_norm": 11.298256874084473, |
|
"learning_rate": 3.0204707442400133e-05, |
|
"loss": 3.8319, |
|
"step": 6125 |
|
}, |
|
{ |
|
"epoch": 1.3761467889908257, |
|
"grad_norm": 8.461627006530762, |
|
"learning_rate": 3.0101110558594396e-05, |
|
"loss": 3.7151, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 1.3817408816289998, |
|
"grad_norm": 8.925524711608887, |
|
"learning_rate": 2.9997513674788663e-05, |
|
"loss": 3.5296, |
|
"step": 6175 |
|
}, |
|
{ |
|
"epoch": 1.387334974267174, |
|
"grad_norm": 12.109110832214355, |
|
"learning_rate": 2.989391679098293e-05, |
|
"loss": 3.4691, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 1.392929066905348, |
|
"grad_norm": 9.915245056152344, |
|
"learning_rate": 2.9790319907177193e-05, |
|
"loss": 3.5228, |
|
"step": 6225 |
|
}, |
|
{ |
|
"epoch": 1.398523159543522, |
|
"grad_norm": 12.354578018188477, |
|
"learning_rate": 2.968672302337146e-05, |
|
"loss": 3.5543, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 1.4041172521816963, |
|
"grad_norm": 13.05074405670166, |
|
"learning_rate": 2.9583126139565726e-05, |
|
"loss": 3.5762, |
|
"step": 6275 |
|
}, |
|
{ |
|
"epoch": 1.4097113448198701, |
|
"grad_norm": 9.809946060180664, |
|
"learning_rate": 2.9479529255759986e-05, |
|
"loss": 3.4119, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 1.4153054374580443, |
|
"grad_norm": 10.61130142211914, |
|
"learning_rate": 2.937593237195425e-05, |
|
"loss": 3.5493, |
|
"step": 6325 |
|
}, |
|
{ |
|
"epoch": 1.4208995300962184, |
|
"grad_norm": 8.541769027709961, |
|
"learning_rate": 2.9272335488148516e-05, |
|
"loss": 3.607, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 1.4264936227343925, |
|
"grad_norm": 14.672987937927246, |
|
"learning_rate": 2.9168738604342783e-05, |
|
"loss": 3.3681, |
|
"step": 6375 |
|
}, |
|
{ |
|
"epoch": 1.4320877153725666, |
|
"grad_norm": 16.417062759399414, |
|
"learning_rate": 2.9065141720537046e-05, |
|
"loss": 3.581, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 1.4376818080107405, |
|
"grad_norm": 11.234773635864258, |
|
"learning_rate": 2.8961544836731313e-05, |
|
"loss": 3.8083, |
|
"step": 6425 |
|
}, |
|
{ |
|
"epoch": 1.4432759006489149, |
|
"grad_norm": 8.443843841552734, |
|
"learning_rate": 2.885794795292558e-05, |
|
"loss": 3.4413, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 1.4488699932870888, |
|
"grad_norm": 11.358126640319824, |
|
"learning_rate": 2.8754351069119843e-05, |
|
"loss": 3.5337, |
|
"step": 6475 |
|
}, |
|
{ |
|
"epoch": 1.4544640859252629, |
|
"grad_norm": 15.18321418762207, |
|
"learning_rate": 2.8650754185314106e-05, |
|
"loss": 3.7784, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 1.460058178563437, |
|
"grad_norm": 10.894759178161621, |
|
"learning_rate": 2.854715730150837e-05, |
|
"loss": 3.736, |
|
"step": 6525 |
|
}, |
|
{ |
|
"epoch": 1.4656522712016111, |
|
"grad_norm": 8.780854225158691, |
|
"learning_rate": 2.8443560417702636e-05, |
|
"loss": 3.5959, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 1.4712463638397852, |
|
"grad_norm": 16.36128044128418, |
|
"learning_rate": 2.8339963533896903e-05, |
|
"loss": 3.6979, |
|
"step": 6575 |
|
}, |
|
{ |
|
"epoch": 1.4768404564779591, |
|
"grad_norm": 10.243796348571777, |
|
"learning_rate": 2.8236366650091166e-05, |
|
"loss": 3.6813, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 1.4824345491161335, |
|
"grad_norm": 13.104743003845215, |
|
"learning_rate": 2.8132769766285433e-05, |
|
"loss": 3.6853, |
|
"step": 6625 |
|
}, |
|
{ |
|
"epoch": 1.4880286417543074, |
|
"grad_norm": 11.391397476196289, |
|
"learning_rate": 2.80291728824797e-05, |
|
"loss": 3.5911, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 1.4936227343924815, |
|
"grad_norm": 11.6659574508667, |
|
"learning_rate": 2.792557599867396e-05, |
|
"loss": 3.5579, |
|
"step": 6675 |
|
}, |
|
{ |
|
"epoch": 1.4992168270306556, |
|
"grad_norm": 11.695647239685059, |
|
"learning_rate": 2.7821979114868223e-05, |
|
"loss": 3.5243, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 1.5048109196688297, |
|
"grad_norm": 9.701094627380371, |
|
"learning_rate": 2.771838223106249e-05, |
|
"loss": 3.6699, |
|
"step": 6725 |
|
}, |
|
{ |
|
"epoch": 1.5104050123070039, |
|
"grad_norm": 15.949247360229492, |
|
"learning_rate": 2.7614785347256757e-05, |
|
"loss": 3.7613, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 1.5159991049451778, |
|
"grad_norm": 13.379142761230469, |
|
"learning_rate": 2.751118846345102e-05, |
|
"loss": 3.7097, |
|
"step": 6775 |
|
}, |
|
{ |
|
"epoch": 1.521593197583352, |
|
"grad_norm": 10.693124771118164, |
|
"learning_rate": 2.7407591579645287e-05, |
|
"loss": 3.7779, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 1.527187290221526, |
|
"grad_norm": 7.9651312828063965, |
|
"learning_rate": 2.7303994695839553e-05, |
|
"loss": 3.6414, |
|
"step": 6825 |
|
}, |
|
{ |
|
"epoch": 1.5327813828597001, |
|
"grad_norm": 11.157812118530273, |
|
"learning_rate": 2.7200397812033817e-05, |
|
"loss": 3.6576, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 1.5383754754978742, |
|
"grad_norm": 12.323993682861328, |
|
"learning_rate": 2.7096800928228077e-05, |
|
"loss": 3.4723, |
|
"step": 6875 |
|
}, |
|
{ |
|
"epoch": 1.5439695681360484, |
|
"grad_norm": 10.302526473999023, |
|
"learning_rate": 2.6993204044422343e-05, |
|
"loss": 3.7213, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 1.5495636607742225, |
|
"grad_norm": 10.622782707214355, |
|
"learning_rate": 2.688960716061661e-05, |
|
"loss": 3.582, |
|
"step": 6925 |
|
}, |
|
{ |
|
"epoch": 1.5551577534123964, |
|
"grad_norm": 12.592206001281738, |
|
"learning_rate": 2.6786010276810873e-05, |
|
"loss": 3.524, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 1.5607518460505707, |
|
"grad_norm": 10.893891334533691, |
|
"learning_rate": 2.668241339300514e-05, |
|
"loss": 3.4272, |
|
"step": 6975 |
|
}, |
|
{ |
|
"epoch": 1.5663459386887446, |
|
"grad_norm": 11.728677749633789, |
|
"learning_rate": 2.6578816509199407e-05, |
|
"loss": 3.5618, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 1.5719400313269187, |
|
"grad_norm": 14.411259651184082, |
|
"learning_rate": 2.647521962539367e-05, |
|
"loss": 3.6041, |
|
"step": 7025 |
|
}, |
|
{ |
|
"epoch": 1.5775341239650928, |
|
"grad_norm": 12.213258743286133, |
|
"learning_rate": 2.6371622741587937e-05, |
|
"loss": 3.5627, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 1.583128216603267, |
|
"grad_norm": 12.341785430908203, |
|
"learning_rate": 2.6268025857782197e-05, |
|
"loss": 3.631, |
|
"step": 7075 |
|
}, |
|
{ |
|
"epoch": 1.588722309241441, |
|
"grad_norm": 22.097862243652344, |
|
"learning_rate": 2.6164428973976464e-05, |
|
"loss": 3.5977, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 1.594316401879615, |
|
"grad_norm": 11.355073928833008, |
|
"learning_rate": 2.6060832090170727e-05, |
|
"loss": 3.6045, |
|
"step": 7125 |
|
}, |
|
{ |
|
"epoch": 1.5999104945177893, |
|
"grad_norm": 12.087318420410156, |
|
"learning_rate": 2.5957235206364994e-05, |
|
"loss": 3.5971, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 1.6055045871559632, |
|
"grad_norm": 12.888108253479004, |
|
"learning_rate": 2.585363832255926e-05, |
|
"loss": 3.4293, |
|
"step": 7175 |
|
}, |
|
{ |
|
"epoch": 1.6110986797941373, |
|
"grad_norm": 11.495614051818848, |
|
"learning_rate": 2.5750041438753524e-05, |
|
"loss": 3.9471, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 1.6166927724323115, |
|
"grad_norm": 12.585895538330078, |
|
"learning_rate": 2.564644455494779e-05, |
|
"loss": 3.5671, |
|
"step": 7225 |
|
}, |
|
{ |
|
"epoch": 1.6222868650704856, |
|
"grad_norm": 8.79129409790039, |
|
"learning_rate": 2.554284767114205e-05, |
|
"loss": 3.6453, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 1.6278809577086597, |
|
"grad_norm": 10.552870750427246, |
|
"learning_rate": 2.5439250787336317e-05, |
|
"loss": 3.5649, |
|
"step": 7275 |
|
}, |
|
{ |
|
"epoch": 1.6334750503468336, |
|
"grad_norm": 15.649266242980957, |
|
"learning_rate": 2.533565390353058e-05, |
|
"loss": 3.5291, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 1.639069142985008, |
|
"grad_norm": 13.944864273071289, |
|
"learning_rate": 2.5232057019724847e-05, |
|
"loss": 3.3558, |
|
"step": 7325 |
|
}, |
|
{ |
|
"epoch": 1.6446632356231818, |
|
"grad_norm": 14.092317581176758, |
|
"learning_rate": 2.5128460135919114e-05, |
|
"loss": 3.6211, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 1.650257328261356, |
|
"grad_norm": 12.18454647064209, |
|
"learning_rate": 2.5024863252113377e-05, |
|
"loss": 3.7732, |
|
"step": 7375 |
|
}, |
|
{ |
|
"epoch": 1.65585142089953, |
|
"grad_norm": 10.779006958007812, |
|
"learning_rate": 2.492126636830764e-05, |
|
"loss": 3.6804, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 1.6614455135377042, |
|
"grad_norm": 10.292470932006836, |
|
"learning_rate": 2.4817669484501907e-05, |
|
"loss": 3.5316, |
|
"step": 7425 |
|
}, |
|
{ |
|
"epoch": 1.6670396061758783, |
|
"grad_norm": 13.621623039245605, |
|
"learning_rate": 2.4714072600696174e-05, |
|
"loss": 3.4568, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 1.6726336988140522, |
|
"grad_norm": 11.503890991210938, |
|
"learning_rate": 2.4610475716890437e-05, |
|
"loss": 3.3641, |
|
"step": 7475 |
|
}, |
|
{ |
|
"epoch": 1.6782277914522266, |
|
"grad_norm": 13.564423561096191, |
|
"learning_rate": 2.45068788330847e-05, |
|
"loss": 3.5669, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 1.6838218840904005, |
|
"grad_norm": 9.360475540161133, |
|
"learning_rate": 2.4403281949278967e-05, |
|
"loss": 3.6688, |
|
"step": 7525 |
|
}, |
|
{ |
|
"epoch": 1.6894159767285746, |
|
"grad_norm": 13.366150856018066, |
|
"learning_rate": 2.4299685065473234e-05, |
|
"loss": 3.3199, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 1.6950100693667487, |
|
"grad_norm": 10.094856262207031, |
|
"learning_rate": 2.4196088181667497e-05, |
|
"loss": 3.4633, |
|
"step": 7575 |
|
}, |
|
{ |
|
"epoch": 1.7006041620049228, |
|
"grad_norm": 9.134540557861328, |
|
"learning_rate": 2.409249129786176e-05, |
|
"loss": 3.6078, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 1.706198254643097, |
|
"grad_norm": 11.252095222473145, |
|
"learning_rate": 2.3988894414056027e-05, |
|
"loss": 3.7198, |
|
"step": 7625 |
|
}, |
|
{ |
|
"epoch": 1.7117923472812708, |
|
"grad_norm": 6.201746940612793, |
|
"learning_rate": 2.388529753025029e-05, |
|
"loss": 3.5264, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 1.7173864399194452, |
|
"grad_norm": 12.331997871398926, |
|
"learning_rate": 2.3781700646444557e-05, |
|
"loss": 3.4556, |
|
"step": 7675 |
|
}, |
|
{ |
|
"epoch": 1.722980532557619, |
|
"grad_norm": 12.859444618225098, |
|
"learning_rate": 2.367810376263882e-05, |
|
"loss": 3.4533, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 1.7285746251957932, |
|
"grad_norm": 13.243956565856934, |
|
"learning_rate": 2.3574506878833087e-05, |
|
"loss": 3.7355, |
|
"step": 7725 |
|
}, |
|
{ |
|
"epoch": 1.7341687178339673, |
|
"grad_norm": 11.75436019897461, |
|
"learning_rate": 2.347090999502735e-05, |
|
"loss": 3.6108, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 1.7397628104721414, |
|
"grad_norm": 13.429585456848145, |
|
"learning_rate": 2.3367313111221614e-05, |
|
"loss": 3.7925, |
|
"step": 7775 |
|
}, |
|
{ |
|
"epoch": 1.7453569031103155, |
|
"grad_norm": 11.077943801879883, |
|
"learning_rate": 2.326371622741588e-05, |
|
"loss": 3.6788, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 1.7509509957484894, |
|
"grad_norm": 7.700258731842041, |
|
"learning_rate": 2.3160119343610144e-05, |
|
"loss": 3.4562, |
|
"step": 7825 |
|
}, |
|
{ |
|
"epoch": 1.7565450883866638, |
|
"grad_norm": 11.34974479675293, |
|
"learning_rate": 2.305652245980441e-05, |
|
"loss": 3.3855, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 1.7621391810248377, |
|
"grad_norm": 11.598840713500977, |
|
"learning_rate": 2.2952925575998674e-05, |
|
"loss": 3.6097, |
|
"step": 7875 |
|
}, |
|
{ |
|
"epoch": 1.7677332736630118, |
|
"grad_norm": 11.69258975982666, |
|
"learning_rate": 2.284932869219294e-05, |
|
"loss": 3.6146, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 1.773327366301186, |
|
"grad_norm": 10.501328468322754, |
|
"learning_rate": 2.2745731808387204e-05, |
|
"loss": 3.6597, |
|
"step": 7925 |
|
}, |
|
{ |
|
"epoch": 1.77892145893936, |
|
"grad_norm": 12.03715705871582, |
|
"learning_rate": 2.264213492458147e-05, |
|
"loss": 3.6975, |
|
"step": 7950 |
|
}, |
|
{ |
|
"epoch": 1.7845155515775342, |
|
"grad_norm": 13.386404991149902, |
|
"learning_rate": 2.2538538040775734e-05, |
|
"loss": 3.6164, |
|
"step": 7975 |
|
}, |
|
{ |
|
"epoch": 1.790109644215708, |
|
"grad_norm": 9.877335548400879, |
|
"learning_rate": 2.2434941156969997e-05, |
|
"loss": 3.614, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 1.7957037368538824, |
|
"grad_norm": 12.025654792785645, |
|
"learning_rate": 2.2335488148516494e-05, |
|
"loss": 3.743, |
|
"step": 8025 |
|
}, |
|
{ |
|
"epoch": 1.8012978294920563, |
|
"grad_norm": 12.508997917175293, |
|
"learning_rate": 2.2231891264710757e-05, |
|
"loss": 3.7309, |
|
"step": 8050 |
|
}, |
|
{ |
|
"epoch": 1.8068919221302304, |
|
"grad_norm": 12.994415283203125, |
|
"learning_rate": 2.2128294380905024e-05, |
|
"loss": 3.5284, |
|
"step": 8075 |
|
}, |
|
{ |
|
"epoch": 1.8124860147684045, |
|
"grad_norm": 12.859843254089355, |
|
"learning_rate": 2.202469749709929e-05, |
|
"loss": 3.686, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 1.8180801074065787, |
|
"grad_norm": 15.91470718383789, |
|
"learning_rate": 2.192110061329355e-05, |
|
"loss": 3.3395, |
|
"step": 8125 |
|
}, |
|
{ |
|
"epoch": 1.8236742000447528, |
|
"grad_norm": 10.755178451538086, |
|
"learning_rate": 2.1817503729487817e-05, |
|
"loss": 3.6407, |
|
"step": 8150 |
|
}, |
|
{ |
|
"epoch": 1.8292682926829267, |
|
"grad_norm": 10.679194450378418, |
|
"learning_rate": 2.1713906845682084e-05, |
|
"loss": 3.5509, |
|
"step": 8175 |
|
}, |
|
{ |
|
"epoch": 1.834862385321101, |
|
"grad_norm": 18.6633243560791, |
|
"learning_rate": 2.1610309961876348e-05, |
|
"loss": 3.5166, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.840456477959275, |
|
"grad_norm": 7.9321112632751465, |
|
"learning_rate": 2.150671307807061e-05, |
|
"loss": 3.36, |
|
"step": 8225 |
|
}, |
|
{ |
|
"epoch": 1.8460505705974493, |
|
"grad_norm": 10.757131576538086, |
|
"learning_rate": 2.1403116194264878e-05, |
|
"loss": 3.4811, |
|
"step": 8250 |
|
}, |
|
{ |
|
"epoch": 1.8516446632356232, |
|
"grad_norm": 15.632428169250488, |
|
"learning_rate": 2.1299519310459144e-05, |
|
"loss": 3.5502, |
|
"step": 8275 |
|
}, |
|
{ |
|
"epoch": 1.8572387558737973, |
|
"grad_norm": 19.17276954650879, |
|
"learning_rate": 2.1195922426653408e-05, |
|
"loss": 3.5601, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.8628328485119714, |
|
"grad_norm": 11.047025680541992, |
|
"learning_rate": 2.109232554284767e-05, |
|
"loss": 3.6373, |
|
"step": 8325 |
|
}, |
|
{ |
|
"epoch": 1.8684269411501453, |
|
"grad_norm": 15.699575424194336, |
|
"learning_rate": 2.0988728659041938e-05, |
|
"loss": 3.7378, |
|
"step": 8350 |
|
}, |
|
{ |
|
"epoch": 1.8740210337883196, |
|
"grad_norm": 13.09723949432373, |
|
"learning_rate": 2.08851317752362e-05, |
|
"loss": 3.8717, |
|
"step": 8375 |
|
}, |
|
{ |
|
"epoch": 1.8796151264264935, |
|
"grad_norm": 8.441289901733398, |
|
"learning_rate": 2.0781534891430468e-05, |
|
"loss": 3.2558, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.8852092190646679, |
|
"grad_norm": 12.046778678894043, |
|
"learning_rate": 2.067793800762473e-05, |
|
"loss": 3.3936, |
|
"step": 8425 |
|
}, |
|
{ |
|
"epoch": 1.8908033117028418, |
|
"grad_norm": 10.983031272888184, |
|
"learning_rate": 2.0574341123818998e-05, |
|
"loss": 3.4321, |
|
"step": 8450 |
|
}, |
|
{ |
|
"epoch": 1.896397404341016, |
|
"grad_norm": 12.274590492248535, |
|
"learning_rate": 2.047074424001326e-05, |
|
"loss": 3.4638, |
|
"step": 8475 |
|
}, |
|
{ |
|
"epoch": 1.90199149697919, |
|
"grad_norm": 15.135939598083496, |
|
"learning_rate": 2.0367147356207524e-05, |
|
"loss": 3.5686, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.907585589617364, |
|
"grad_norm": 11.194721221923828, |
|
"learning_rate": 2.026355047240179e-05, |
|
"loss": 3.7067, |
|
"step": 8525 |
|
}, |
|
{ |
|
"epoch": 1.9131796822555382, |
|
"grad_norm": 15.062312126159668, |
|
"learning_rate": 2.0159953588596054e-05, |
|
"loss": 3.5299, |
|
"step": 8550 |
|
}, |
|
{ |
|
"epoch": 1.9187737748937121, |
|
"grad_norm": 12.282342910766602, |
|
"learning_rate": 2.005635670479032e-05, |
|
"loss": 3.5803, |
|
"step": 8575 |
|
}, |
|
{ |
|
"epoch": 1.9243678675318865, |
|
"grad_norm": 14.33022689819336, |
|
"learning_rate": 1.9952759820984584e-05, |
|
"loss": 3.5835, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.9299619601700604, |
|
"grad_norm": 8.249588966369629, |
|
"learning_rate": 1.984916293717885e-05, |
|
"loss": 3.2694, |
|
"step": 8625 |
|
}, |
|
{ |
|
"epoch": 1.9355560528082345, |
|
"grad_norm": 9.1649169921875, |
|
"learning_rate": 1.9745566053373115e-05, |
|
"loss": 3.6105, |
|
"step": 8650 |
|
}, |
|
{ |
|
"epoch": 1.9411501454464086, |
|
"grad_norm": 8.755537986755371, |
|
"learning_rate": 1.964196916956738e-05, |
|
"loss": 3.5554, |
|
"step": 8675 |
|
}, |
|
{ |
|
"epoch": 1.9467442380845825, |
|
"grad_norm": 8.148399353027344, |
|
"learning_rate": 1.9538372285761645e-05, |
|
"loss": 3.5073, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.9523383307227569, |
|
"grad_norm": 10.864067077636719, |
|
"learning_rate": 1.943477540195591e-05, |
|
"loss": 3.6951, |
|
"step": 8725 |
|
}, |
|
{ |
|
"epoch": 1.9579324233609308, |
|
"grad_norm": 13.049738883972168, |
|
"learning_rate": 1.9331178518150175e-05, |
|
"loss": 3.5186, |
|
"step": 8750 |
|
}, |
|
{ |
|
"epoch": 1.963526515999105, |
|
"grad_norm": 5.955536842346191, |
|
"learning_rate": 1.922758163434444e-05, |
|
"loss": 3.479, |
|
"step": 8775 |
|
}, |
|
{ |
|
"epoch": 1.969120608637279, |
|
"grad_norm": 12.35295581817627, |
|
"learning_rate": 1.9123984750538705e-05, |
|
"loss": 3.5103, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.9747147012754531, |
|
"grad_norm": 9.945602416992188, |
|
"learning_rate": 1.9020387866732968e-05, |
|
"loss": 3.6919, |
|
"step": 8825 |
|
}, |
|
{ |
|
"epoch": 1.9803087939136272, |
|
"grad_norm": 9.716672897338867, |
|
"learning_rate": 1.8916790982927235e-05, |
|
"loss": 3.7097, |
|
"step": 8850 |
|
}, |
|
{ |
|
"epoch": 1.9859028865518011, |
|
"grad_norm": 7.3651041984558105, |
|
"learning_rate": 1.88131940991215e-05, |
|
"loss": 3.4111, |
|
"step": 8875 |
|
}, |
|
{ |
|
"epoch": 1.9914969791899755, |
|
"grad_norm": 11.258004188537598, |
|
"learning_rate": 1.8709597215315765e-05, |
|
"loss": 3.4201, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.9970910718281494, |
|
"grad_norm": 14.962812423706055, |
|
"learning_rate": 1.8606000331510028e-05, |
|
"loss": 3.4142, |
|
"step": 8925 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_gen_len": 61.8854, |
|
"eval_loss": 4.248934268951416, |
|
"eval_rouge1": 25.7685, |
|
"eval_rouge2": 9.8226, |
|
"eval_rougeL": 24.6426, |
|
"eval_rougeLsum": 24.9756, |
|
"eval_runtime": 700.259, |
|
"eval_samples_per_second": 1.595, |
|
"eval_steps_per_second": 0.4, |
|
"step": 8938 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 13407, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1151792816578560.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|