|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9999537845806875, |
|
"eval_steps": 500, |
|
"global_step": 13523, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014788934179999814, |
|
"grad_norm": 1.9570056200027466, |
|
"learning_rate": 1.4781966001478198e-06, |
|
"loss": 12.123, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.002957786835999963, |
|
"grad_norm": 3.435842990875244, |
|
"learning_rate": 2.9563932002956396e-06, |
|
"loss": 11.8952, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0044366802539999445, |
|
"grad_norm": 1.3055179119110107, |
|
"learning_rate": 4.434589800443459e-06, |
|
"loss": 11.1244, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005915573671999926, |
|
"grad_norm": 1.1435202360153198, |
|
"learning_rate": 5.912786400591279e-06, |
|
"loss": 10.6584, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007394467089999908, |
|
"grad_norm": 1.1122593879699707, |
|
"learning_rate": 7.390983000739099e-06, |
|
"loss": 10.3924, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008873360507999889, |
|
"grad_norm": 1.0903944969177246, |
|
"learning_rate": 8.869179600886918e-06, |
|
"loss": 10.1278, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.010352253925999871, |
|
"grad_norm": 1.0405408143997192, |
|
"learning_rate": 1.0347376201034738e-05, |
|
"loss": 9.829, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.011831147343999851, |
|
"grad_norm": 1.032538652420044, |
|
"learning_rate": 1.1825572801182558e-05, |
|
"loss": 9.4957, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.013310040761999833, |
|
"grad_norm": 1.4152177572250366, |
|
"learning_rate": 1.3303769401330377e-05, |
|
"loss": 9.1722, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.014788934179999816, |
|
"grad_norm": 0.8978266716003418, |
|
"learning_rate": 1.4781966001478198e-05, |
|
"loss": 8.8736, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.016267827597999798, |
|
"grad_norm": 1.0230133533477783, |
|
"learning_rate": 1.6260162601626018e-05, |
|
"loss": 8.6163, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.017746721015999778, |
|
"grad_norm": 1.3886386156082153, |
|
"learning_rate": 1.7738359201773837e-05, |
|
"loss": 8.3772, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.019225614433999758, |
|
"grad_norm": 0.8950226306915283, |
|
"learning_rate": 1.9216555801921658e-05, |
|
"loss": 8.1872, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.020704507851999742, |
|
"grad_norm": 1.3098183870315552, |
|
"learning_rate": 2.0694752402069477e-05, |
|
"loss": 8.0067, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.022183401269999722, |
|
"grad_norm": 1.3033353090286255, |
|
"learning_rate": 2.2172949002217298e-05, |
|
"loss": 7.8361, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.023662294687999703, |
|
"grad_norm": 1.6088228225708008, |
|
"learning_rate": 2.3651145602365117e-05, |
|
"loss": 7.69, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.025141188105999687, |
|
"grad_norm": 1.0888606309890747, |
|
"learning_rate": 2.5129342202512935e-05, |
|
"loss": 7.5744, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.026620081523999667, |
|
"grad_norm": 1.0944548845291138, |
|
"learning_rate": 2.6607538802660753e-05, |
|
"loss": 7.4501, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.028098974941999647, |
|
"grad_norm": 1.5041922330856323, |
|
"learning_rate": 2.8085735402808578e-05, |
|
"loss": 7.3575, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.02957786835999963, |
|
"grad_norm": 1.4672595262527466, |
|
"learning_rate": 2.9563932002956397e-05, |
|
"loss": 7.2633, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.03105676177799961, |
|
"grad_norm": 1.3001948595046997, |
|
"learning_rate": 3.104212860310421e-05, |
|
"loss": 7.1749, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.032535655195999595, |
|
"grad_norm": 1.4149699211120605, |
|
"learning_rate": 3.2520325203252037e-05, |
|
"loss": 7.098, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.03401454861399957, |
|
"grad_norm": 1.6322951316833496, |
|
"learning_rate": 3.3998521803399855e-05, |
|
"loss": 7.015, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.035493442031999556, |
|
"grad_norm": 1.659485101699829, |
|
"learning_rate": 3.547671840354767e-05, |
|
"loss": 6.9398, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.03697233544999954, |
|
"grad_norm": 1.7957265377044678, |
|
"learning_rate": 3.69549150036955e-05, |
|
"loss": 6.8648, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.038451228867999517, |
|
"grad_norm": 1.4912447929382324, |
|
"learning_rate": 3.8433111603843317e-05, |
|
"loss": 6.7973, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.0399301222859995, |
|
"grad_norm": 1.7237913608551025, |
|
"learning_rate": 3.9911308203991135e-05, |
|
"loss": 6.7331, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.041409015703999484, |
|
"grad_norm": 1.8182610273361206, |
|
"learning_rate": 4.138950480413895e-05, |
|
"loss": 6.668, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04288790912199946, |
|
"grad_norm": 1.6812163591384888, |
|
"learning_rate": 4.286770140428677e-05, |
|
"loss": 6.5894, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.044366802539999445, |
|
"grad_norm": 1.818665623664856, |
|
"learning_rate": 4.4345898004434597e-05, |
|
"loss": 6.5361, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.04584569595799943, |
|
"grad_norm": 1.3113698959350586, |
|
"learning_rate": 4.5824094604582415e-05, |
|
"loss": 6.4732, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.047324589375999405, |
|
"grad_norm": 1.9587410688400269, |
|
"learning_rate": 4.730229120473023e-05, |
|
"loss": 6.4143, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.04880348279399939, |
|
"grad_norm": 1.4764151573181152, |
|
"learning_rate": 4.878048780487805e-05, |
|
"loss": 6.358, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.05028237621199937, |
|
"grad_norm": 1.5685200691223145, |
|
"learning_rate": 5.025868440502587e-05, |
|
"loss": 6.3084, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05176126962999935, |
|
"grad_norm": 2.1411592960357666, |
|
"learning_rate": 5.173688100517369e-05, |
|
"loss": 6.2515, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.053240163047999334, |
|
"grad_norm": 2.6792619228363037, |
|
"learning_rate": 5.3215077605321506e-05, |
|
"loss": 6.2091, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.05471905646599932, |
|
"grad_norm": 1.5457326173782349, |
|
"learning_rate": 5.4693274205469325e-05, |
|
"loss": 6.1512, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.056197949883999294, |
|
"grad_norm": 1.931794285774231, |
|
"learning_rate": 5.6171470805617157e-05, |
|
"loss": 6.0981, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.05767684330199928, |
|
"grad_norm": 2.3924379348754883, |
|
"learning_rate": 5.7649667405764975e-05, |
|
"loss": 6.0439, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.05915573671999926, |
|
"grad_norm": 2.1078522205352783, |
|
"learning_rate": 5.912786400591279e-05, |
|
"loss": 6.0081, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.06063463013799924, |
|
"grad_norm": 1.8126791715621948, |
|
"learning_rate": 6.060606060606061e-05, |
|
"loss": 5.9435, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.06211352355599922, |
|
"grad_norm": 1.6939939260482788, |
|
"learning_rate": 6.208425720620842e-05, |
|
"loss": 5.9, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.0635924169739992, |
|
"grad_norm": 1.7903132438659668, |
|
"learning_rate": 6.356245380635625e-05, |
|
"loss": 5.8536, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.06507131039199919, |
|
"grad_norm": 2.1418817043304443, |
|
"learning_rate": 6.504065040650407e-05, |
|
"loss": 5.8192, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.06655020380999917, |
|
"grad_norm": 1.6386531591415405, |
|
"learning_rate": 6.651884700665188e-05, |
|
"loss": 5.768, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.06802909722799914, |
|
"grad_norm": 1.82034432888031, |
|
"learning_rate": 6.799704360679971e-05, |
|
"loss": 5.7162, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.06950799064599913, |
|
"grad_norm": 1.9206963777542114, |
|
"learning_rate": 6.947524020694752e-05, |
|
"loss": 5.6755, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.07098688406399911, |
|
"grad_norm": 1.4253259897232056, |
|
"learning_rate": 7.095343680709535e-05, |
|
"loss": 5.6321, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.07246577748199909, |
|
"grad_norm": 2.0578746795654297, |
|
"learning_rate": 7.243163340724317e-05, |
|
"loss": 5.5907, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.07394467089999908, |
|
"grad_norm": 1.4132108688354492, |
|
"learning_rate": 7.3909830007391e-05, |
|
"loss": 5.5483, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.07542356431799906, |
|
"grad_norm": 1.6758071184158325, |
|
"learning_rate": 7.538802660753881e-05, |
|
"loss": 5.5136, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.07690245773599903, |
|
"grad_norm": 1.5184019804000854, |
|
"learning_rate": 7.686622320768663e-05, |
|
"loss": 5.4715, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.07838135115399902, |
|
"grad_norm": 1.731789231300354, |
|
"learning_rate": 7.834441980783444e-05, |
|
"loss": 5.4289, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.079860244571999, |
|
"grad_norm": 1.4423941373825073, |
|
"learning_rate": 7.982261640798227e-05, |
|
"loss": 5.3799, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.08133913798999898, |
|
"grad_norm": 1.200088620185852, |
|
"learning_rate": 8.130081300813008e-05, |
|
"loss": 5.3446, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.08281803140799897, |
|
"grad_norm": 1.5034804344177246, |
|
"learning_rate": 8.27790096082779e-05, |
|
"loss": 5.3011, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.08429692482599895, |
|
"grad_norm": 1.6272141933441162, |
|
"learning_rate": 8.425720620842572e-05, |
|
"loss": 5.2573, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.08577581824399892, |
|
"grad_norm": 1.6940892934799194, |
|
"learning_rate": 8.573540280857354e-05, |
|
"loss": 5.2206, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.08725471166199891, |
|
"grad_norm": 1.531122088432312, |
|
"learning_rate": 8.721359940872137e-05, |
|
"loss": 5.1842, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.08873360507999889, |
|
"grad_norm": 1.3891607522964478, |
|
"learning_rate": 8.869179600886919e-05, |
|
"loss": 5.1574, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.09021249849799887, |
|
"grad_norm": 1.5175141096115112, |
|
"learning_rate": 9.0169992609017e-05, |
|
"loss": 5.0965, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.09169139191599886, |
|
"grad_norm": 1.2954392433166504, |
|
"learning_rate": 9.164818920916483e-05, |
|
"loss": 5.0615, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.09317028533399883, |
|
"grad_norm": 1.1776789426803589, |
|
"learning_rate": 9.312638580931264e-05, |
|
"loss": 5.0263, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.09464917875199881, |
|
"grad_norm": 1.342835545539856, |
|
"learning_rate": 9.460458240946047e-05, |
|
"loss": 4.9938, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.0961280721699988, |
|
"grad_norm": 1.5098336935043335, |
|
"learning_rate": 9.608277900960828e-05, |
|
"loss": 4.9579, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.09760696558799878, |
|
"grad_norm": 1.3883858919143677, |
|
"learning_rate": 9.75609756097561e-05, |
|
"loss": 4.9159, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.09908585900599876, |
|
"grad_norm": 1.6131935119628906, |
|
"learning_rate": 9.903917220990391e-05, |
|
"loss": 4.8716, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.10056475242399875, |
|
"grad_norm": 1.3793425559997559, |
|
"learning_rate": 9.999991836910476e-05, |
|
"loss": 4.8389, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.10204364584199872, |
|
"grad_norm": 1.2413076162338257, |
|
"learning_rate": 9.999878553677705e-05, |
|
"loss": 4.8044, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.1035225392599987, |
|
"grad_norm": 1.4875175952911377, |
|
"learning_rate": 9.99963199901083e-05, |
|
"loss": 4.759, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.10500143267799869, |
|
"grad_norm": 1.281230092048645, |
|
"learning_rate": 9.999252179481748e-05, |
|
"loss": 4.733, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.10648032609599867, |
|
"grad_norm": 1.179935336112976, |
|
"learning_rate": 9.998739105214525e-05, |
|
"loss": 4.6965, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.10795921951399864, |
|
"grad_norm": 1.2033872604370117, |
|
"learning_rate": 9.998092789885118e-05, |
|
"loss": 4.649, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.10943811293199864, |
|
"grad_norm": 1.310261607170105, |
|
"learning_rate": 9.997313250721026e-05, |
|
"loss": 4.6158, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.11091700634999861, |
|
"grad_norm": 1.1370333433151245, |
|
"learning_rate": 9.996400508500809e-05, |
|
"loss": 4.5917, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.11239589976799859, |
|
"grad_norm": 0.9518343210220337, |
|
"learning_rate": 9.995354587553553e-05, |
|
"loss": 4.5477, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.11387479318599858, |
|
"grad_norm": 1.1209640502929688, |
|
"learning_rate": 9.994175515758211e-05, |
|
"loss": 4.5169, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.11535368660399856, |
|
"grad_norm": 1.1134682893753052, |
|
"learning_rate": 9.992863324542865e-05, |
|
"loss": 4.4921, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.11683258002199853, |
|
"grad_norm": 1.1962740421295166, |
|
"learning_rate": 9.991418048883885e-05, |
|
"loss": 4.4678, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.11831147343999852, |
|
"grad_norm": 1.0190341472625732, |
|
"learning_rate": 9.989839727305e-05, |
|
"loss": 4.4265, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.1197903668579985, |
|
"grad_norm": 1.1323659420013428, |
|
"learning_rate": 9.988128401876267e-05, |
|
"loss": 4.3951, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.12126926027599848, |
|
"grad_norm": 1.2068976163864136, |
|
"learning_rate": 9.986284118212951e-05, |
|
"loss": 4.3762, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.12274815369399847, |
|
"grad_norm": 1.1199101209640503, |
|
"learning_rate": 9.984306925474313e-05, |
|
"loss": 4.3519, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.12422704711199845, |
|
"grad_norm": 0.8594743013381958, |
|
"learning_rate": 9.982196876362298e-05, |
|
"loss": 4.3268, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.12570594052999842, |
|
"grad_norm": 1.0981128215789795, |
|
"learning_rate": 9.979954027120124e-05, |
|
"loss": 4.3018, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.1271848339479984, |
|
"grad_norm": 0.9453332424163818, |
|
"learning_rate": 9.97757843753079e-05, |
|
"loss": 4.2747, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.1286637273659984, |
|
"grad_norm": 0.9754221439361572, |
|
"learning_rate": 9.975070170915481e-05, |
|
"loss": 4.2539, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.13014262078399838, |
|
"grad_norm": 0.7794106602668762, |
|
"learning_rate": 9.972429294131878e-05, |
|
"loss": 4.2331, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.13162151420199836, |
|
"grad_norm": 0.8084755539894104, |
|
"learning_rate": 9.969655877572379e-05, |
|
"loss": 4.2076, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.13310040761999833, |
|
"grad_norm": 0.9451693296432495, |
|
"learning_rate": 9.96674999516222e-05, |
|
"loss": 4.2023, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.1345793010379983, |
|
"grad_norm": 0.9662824869155884, |
|
"learning_rate": 9.963711724357503e-05, |
|
"loss": 4.1661, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.1360581944559983, |
|
"grad_norm": 0.8646146655082703, |
|
"learning_rate": 9.960541146143138e-05, |
|
"loss": 4.1529, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.1375370878739983, |
|
"grad_norm": 0.819580078125, |
|
"learning_rate": 9.957238345030681e-05, |
|
"loss": 4.1353, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.13901598129199827, |
|
"grad_norm": 0.793268620967865, |
|
"learning_rate": 9.953803409056077e-05, |
|
"loss": 4.1205, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.14049487470999825, |
|
"grad_norm": 0.8794734477996826, |
|
"learning_rate": 9.950236429777319e-05, |
|
"loss": 4.1034, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.14197376812799822, |
|
"grad_norm": 0.8757349252700806, |
|
"learning_rate": 9.946537502272004e-05, |
|
"loss": 4.0896, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.1434526615459982, |
|
"grad_norm": 0.806181788444519, |
|
"learning_rate": 9.942706725134801e-05, |
|
"loss": 4.0792, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.14493155496399818, |
|
"grad_norm": 0.568131148815155, |
|
"learning_rate": 9.938744200474825e-05, |
|
"loss": 4.0483, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.14641044838199818, |
|
"grad_norm": 0.9386783242225647, |
|
"learning_rate": 9.934650033912909e-05, |
|
"loss": 4.0349, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.14788934179999816, |
|
"grad_norm": 0.8668307065963745, |
|
"learning_rate": 9.930424334578793e-05, |
|
"loss": 4.0249, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.14936823521799814, |
|
"grad_norm": 0.7728129625320435, |
|
"learning_rate": 9.926067215108216e-05, |
|
"loss": 4.001, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.1508471286359981, |
|
"grad_norm": 0.8983877301216125, |
|
"learning_rate": 9.92157879163991e-05, |
|
"loss": 4.0099, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.1523260220539981, |
|
"grad_norm": 0.7290263772010803, |
|
"learning_rate": 9.916959183812508e-05, |
|
"loss": 3.9816, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.15380491547199807, |
|
"grad_norm": 1.0002912282943726, |
|
"learning_rate": 9.912208514761353e-05, |
|
"loss": 3.964, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.15528380888999807, |
|
"grad_norm": 0.8696877956390381, |
|
"learning_rate": 9.907326911115215e-05, |
|
"loss": 3.9532, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.15676270230799805, |
|
"grad_norm": 0.9264429211616516, |
|
"learning_rate": 9.90231450299292e-05, |
|
"loss": 3.9405, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.15824159572599802, |
|
"grad_norm": 0.6036892533302307, |
|
"learning_rate": 9.897171423999877e-05, |
|
"loss": 3.9308, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.159720489143998, |
|
"grad_norm": 0.6206973791122437, |
|
"learning_rate": 9.891897811224516e-05, |
|
"loss": 3.9089, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.16119938256199798, |
|
"grad_norm": 0.9498934149742126, |
|
"learning_rate": 9.886493805234642e-05, |
|
"loss": 3.9101, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.16267827597999795, |
|
"grad_norm": 0.8084043264389038, |
|
"learning_rate": 9.880959550073676e-05, |
|
"loss": 3.9108, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.16415716939799796, |
|
"grad_norm": 0.7810977697372437, |
|
"learning_rate": 9.875295193256829e-05, |
|
"loss": 3.8923, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.16563606281599794, |
|
"grad_norm": 0.5951938033103943, |
|
"learning_rate": 9.869500885767156e-05, |
|
"loss": 3.8676, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.1671149562339979, |
|
"grad_norm": 0.7140426635742188, |
|
"learning_rate": 9.863576782051544e-05, |
|
"loss": 3.8717, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.1685938496519979, |
|
"grad_norm": 0.7328889966011047, |
|
"learning_rate": 9.857523040016588e-05, |
|
"loss": 3.8585, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.17007274306999787, |
|
"grad_norm": 0.9172821044921875, |
|
"learning_rate": 9.851339821024383e-05, |
|
"loss": 3.8515, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.17155163648799784, |
|
"grad_norm": 0.70406574010849, |
|
"learning_rate": 9.845027289888226e-05, |
|
"loss": 3.8322, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.17303052990599785, |
|
"grad_norm": 0.6545581221580505, |
|
"learning_rate": 9.838585614868221e-05, |
|
"loss": 3.8342, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.17450942332399783, |
|
"grad_norm": 0.8262337446212769, |
|
"learning_rate": 9.832014967666788e-05, |
|
"loss": 3.8178, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.1759883167419978, |
|
"grad_norm": 0.748437225818634, |
|
"learning_rate": 9.825315523424097e-05, |
|
"loss": 3.8054, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.17746721015999778, |
|
"grad_norm": 0.7961335778236389, |
|
"learning_rate": 9.818487460713397e-05, |
|
"loss": 3.803, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.17894610357799776, |
|
"grad_norm": 0.5949457287788391, |
|
"learning_rate": 9.811530961536246e-05, |
|
"loss": 3.7988, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.18042499699599773, |
|
"grad_norm": 0.6500332355499268, |
|
"learning_rate": 9.804446211317677e-05, |
|
"loss": 3.7902, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.18190389041399774, |
|
"grad_norm": 0.5734246969223022, |
|
"learning_rate": 9.797233398901238e-05, |
|
"loss": 3.7788, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.18338278383199771, |
|
"grad_norm": 0.6358067393302917, |
|
"learning_rate": 9.78989271654397e-05, |
|
"loss": 3.7581, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.1848616772499977, |
|
"grad_norm": 0.7676229476928711, |
|
"learning_rate": 9.78242435991128e-05, |
|
"loss": 3.7566, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.18634057066799767, |
|
"grad_norm": 0.5594522356987, |
|
"learning_rate": 9.774828528071722e-05, |
|
"loss": 3.7552, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.18781946408599764, |
|
"grad_norm": 0.7414741516113281, |
|
"learning_rate": 9.767105423491694e-05, |
|
"loss": 3.7404, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.18929835750399762, |
|
"grad_norm": 0.6007790565490723, |
|
"learning_rate": 9.759255252030042e-05, |
|
"loss": 3.7308, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.19077725092199763, |
|
"grad_norm": 0.6344082355499268, |
|
"learning_rate": 9.751278222932569e-05, |
|
"loss": 3.7179, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.1922561443399976, |
|
"grad_norm": 0.6184104681015015, |
|
"learning_rate": 9.743174548826461e-05, |
|
"loss": 3.7177, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.19373503775799758, |
|
"grad_norm": 0.785652756690979, |
|
"learning_rate": 9.734944445714618e-05, |
|
"loss": 3.7022, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.19521393117599756, |
|
"grad_norm": 0.664434015750885, |
|
"learning_rate": 9.726588132969901e-05, |
|
"loss": 3.6885, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.19669282459399753, |
|
"grad_norm": 0.6987696290016174, |
|
"learning_rate": 9.718105833329272e-05, |
|
"loss": 3.682, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.1981717180119975, |
|
"grad_norm": 0.5085122585296631, |
|
"learning_rate": 9.709497772887874e-05, |
|
"loss": 3.6707, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.19965061142999752, |
|
"grad_norm": 0.8911309838294983, |
|
"learning_rate": 9.700764181092988e-05, |
|
"loss": 3.6517, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.2011295048479975, |
|
"grad_norm": 0.7100036144256592, |
|
"learning_rate": 9.691905290737932e-05, |
|
"loss": 3.6738, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.20260839826599747, |
|
"grad_norm": 0.5330691933631897, |
|
"learning_rate": 9.682921337955847e-05, |
|
"loss": 3.664, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.20408729168399745, |
|
"grad_norm": 0.5505249500274658, |
|
"learning_rate": 9.673812562213401e-05, |
|
"loss": 3.6491, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.20556618510199742, |
|
"grad_norm": 0.7107018232345581, |
|
"learning_rate": 9.664579206304413e-05, |
|
"loss": 3.6406, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.2070450785199974, |
|
"grad_norm": 0.5617266893386841, |
|
"learning_rate": 9.65522151634338e-05, |
|
"loss": 3.653, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.2085239719379974, |
|
"grad_norm": 0.5702326893806458, |
|
"learning_rate": 9.64573974175891e-05, |
|
"loss": 3.6311, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.21000286535599738, |
|
"grad_norm": 0.5759734511375427, |
|
"learning_rate": 9.636134135287081e-05, |
|
"loss": 3.6256, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.21148175877399736, |
|
"grad_norm": 0.6595752835273743, |
|
"learning_rate": 9.626404952964704e-05, |
|
"loss": 3.6184, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.21296065219199733, |
|
"grad_norm": 0.7071236371994019, |
|
"learning_rate": 9.616552454122492e-05, |
|
"loss": 3.6138, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.2144395456099973, |
|
"grad_norm": 0.7660998702049255, |
|
"learning_rate": 9.606576901378156e-05, |
|
"loss": 3.6059, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.2159184390279973, |
|
"grad_norm": 0.9190542101860046, |
|
"learning_rate": 9.596478560629397e-05, |
|
"loss": 3.5887, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.2173973324459973, |
|
"grad_norm": 0.5795056223869324, |
|
"learning_rate": 9.586257701046824e-05, |
|
"loss": 3.5981, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.21887622586399727, |
|
"grad_norm": 0.607071578502655, |
|
"learning_rate": 9.575914595066777e-05, |
|
"loss": 3.592, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.22035511928199725, |
|
"grad_norm": 0.7824068069458008, |
|
"learning_rate": 9.565449518384066e-05, |
|
"loss": 3.5919, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.22183401269999722, |
|
"grad_norm": 0.5169054269790649, |
|
"learning_rate": 9.554862749944622e-05, |
|
"loss": 3.5899, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.2233129061179972, |
|
"grad_norm": 0.8486248850822449, |
|
"learning_rate": 9.544154571938062e-05, |
|
"loss": 3.5707, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.22479179953599718, |
|
"grad_norm": 0.47671154141426086, |
|
"learning_rate": 9.533325269790167e-05, |
|
"loss": 3.559, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.22627069295399718, |
|
"grad_norm": 0.5938573479652405, |
|
"learning_rate": 9.522375132155272e-05, |
|
"loss": 3.5422, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.22774958637199716, |
|
"grad_norm": 0.6117560267448425, |
|
"learning_rate": 9.511304450908576e-05, |
|
"loss": 3.5671, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.22922847978999714, |
|
"grad_norm": 0.6173937916755676, |
|
"learning_rate": 9.500113521138361e-05, |
|
"loss": 3.5669, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.2307073732079971, |
|
"grad_norm": 0.726667046546936, |
|
"learning_rate": 9.488802641138125e-05, |
|
"loss": 3.5366, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.2321862666259971, |
|
"grad_norm": 0.5627657771110535, |
|
"learning_rate": 9.477372112398629e-05, |
|
"loss": 3.53, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.23366516004399707, |
|
"grad_norm": 0.49706488847732544, |
|
"learning_rate": 9.465822239599864e-05, |
|
"loss": 3.5406, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.23514405346199707, |
|
"grad_norm": 0.9899396896362305, |
|
"learning_rate": 9.454153330602932e-05, |
|
"loss": 3.5231, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.23662294687999705, |
|
"grad_norm": 0.4798751771450043, |
|
"learning_rate": 9.442365696441835e-05, |
|
"loss": 3.5116, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.23810184029799702, |
|
"grad_norm": 0.6276853084564209, |
|
"learning_rate": 9.430459651315185e-05, |
|
"loss": 3.5184, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.239580733715997, |
|
"grad_norm": 0.4986541271209717, |
|
"learning_rate": 9.418435512577833e-05, |
|
"loss": 3.5119, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.24105962713399698, |
|
"grad_norm": 0.535453736782074, |
|
"learning_rate": 9.406293600732408e-05, |
|
"loss": 3.5147, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.24253852055199696, |
|
"grad_norm": 0.5945438146591187, |
|
"learning_rate": 9.39403423942077e-05, |
|
"loss": 3.5023, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.24401741396999696, |
|
"grad_norm": 0.6451681852340698, |
|
"learning_rate": 9.381657755415387e-05, |
|
"loss": 3.4846, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.24549630738799694, |
|
"grad_norm": 0.6193166375160217, |
|
"learning_rate": 9.369164478610631e-05, |
|
"loss": 3.488, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.24697520080599691, |
|
"grad_norm": 0.7059178352355957, |
|
"learning_rate": 9.35655474201397e-05, |
|
"loss": 3.4883, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.2484540942239969, |
|
"grad_norm": 0.6481304168701172, |
|
"learning_rate": 9.343828881737107e-05, |
|
"loss": 3.4762, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.24993298764199687, |
|
"grad_norm": 0.5440752506256104, |
|
"learning_rate": 9.330987236987008e-05, |
|
"loss": 3.481, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.25141188105999684, |
|
"grad_norm": 0.5582643747329712, |
|
"learning_rate": 9.318030150056869e-05, |
|
"loss": 3.4755, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.25289077447799685, |
|
"grad_norm": 0.6249572038650513, |
|
"learning_rate": 9.304957966316995e-05, |
|
"loss": 3.4775, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.2543696678959968, |
|
"grad_norm": 0.6695943474769592, |
|
"learning_rate": 9.291771034205578e-05, |
|
"loss": 3.463, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.2558485613139968, |
|
"grad_norm": 0.4462078809738159, |
|
"learning_rate": 9.27846970521943e-05, |
|
"loss": 3.4561, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.2573274547319968, |
|
"grad_norm": 0.49235352873802185, |
|
"learning_rate": 9.265054333904601e-05, |
|
"loss": 3.4515, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.25880634814999676, |
|
"grad_norm": 0.6507192254066467, |
|
"learning_rate": 9.251525277846929e-05, |
|
"loss": 3.4514, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.26028524156799676, |
|
"grad_norm": 0.4588228166103363, |
|
"learning_rate": 9.237882897662515e-05, |
|
"loss": 3.4286, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.2617641349859967, |
|
"grad_norm": 0.575430691242218, |
|
"learning_rate": 9.224127556988107e-05, |
|
"loss": 3.4458, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.2632430284039967, |
|
"grad_norm": 0.7287342548370361, |
|
"learning_rate": 9.210259622471403e-05, |
|
"loss": 3.4318, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.26472192182199666, |
|
"grad_norm": 0.6866022348403931, |
|
"learning_rate": 9.19627946376129e-05, |
|
"loss": 3.4361, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.26620081523999667, |
|
"grad_norm": 0.5268846750259399, |
|
"learning_rate": 9.182187453497974e-05, |
|
"loss": 3.4364, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.2676797086579967, |
|
"grad_norm": 0.6380168795585632, |
|
"learning_rate": 9.167983967303066e-05, |
|
"loss": 3.4389, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.2691586020759966, |
|
"grad_norm": 0.6250066757202148, |
|
"learning_rate": 9.153669383769556e-05, |
|
"loss": 3.4322, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.2706374954939966, |
|
"grad_norm": 0.6497014164924622, |
|
"learning_rate": 9.139244084451729e-05, |
|
"loss": 3.4068, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.2721163889119966, |
|
"grad_norm": 0.8837792277336121, |
|
"learning_rate": 9.124708453854983e-05, |
|
"loss": 3.4132, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.2735952823299966, |
|
"grad_norm": 0.5183786153793335, |
|
"learning_rate": 9.110062879425602e-05, |
|
"loss": 3.4081, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.2750741757479966, |
|
"grad_norm": 0.7497463226318359, |
|
"learning_rate": 9.095307751540407e-05, |
|
"loss": 3.3986, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.27655306916599653, |
|
"grad_norm": 0.5026047825813293, |
|
"learning_rate": 9.080443463496363e-05, |
|
"loss": 3.4111, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.27803196258399654, |
|
"grad_norm": 0.4640219211578369, |
|
"learning_rate": 9.06547041150009e-05, |
|
"loss": 3.3865, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.2795108560019965, |
|
"grad_norm": 0.5095507502555847, |
|
"learning_rate": 9.050388994657303e-05, |
|
"loss": 3.3915, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.2809897494199965, |
|
"grad_norm": 0.5542161464691162, |
|
"learning_rate": 9.035199614962178e-05, |
|
"loss": 3.3924, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.28246864283799644, |
|
"grad_norm": 0.44914740324020386, |
|
"learning_rate": 9.019902677286631e-05, |
|
"loss": 3.3968, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.28394753625599645, |
|
"grad_norm": 0.4764072000980377, |
|
"learning_rate": 9.004498589369532e-05, |
|
"loss": 3.3937, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.28542642967399645, |
|
"grad_norm": 1.0480468273162842, |
|
"learning_rate": 8.98898776180583e-05, |
|
"loss": 3.3926, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.2869053230919964, |
|
"grad_norm": 0.5355066061019897, |
|
"learning_rate": 8.973370608035612e-05, |
|
"loss": 3.3895, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.2883842165099964, |
|
"grad_norm": 0.4495852589607239, |
|
"learning_rate": 8.957647544333088e-05, |
|
"loss": 3.3717, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.28986310992799635, |
|
"grad_norm": 0.5025330781936646, |
|
"learning_rate": 8.941818989795487e-05, |
|
"loss": 3.3653, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.29134200334599636, |
|
"grad_norm": 0.7565049529075623, |
|
"learning_rate": 8.925885366331887e-05, |
|
"loss": 3.3668, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.29282089676399636, |
|
"grad_norm": 0.8078230619430542, |
|
"learning_rate": 8.909847098651978e-05, |
|
"loss": 3.3678, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.2942997901819963, |
|
"grad_norm": 0.532131552696228, |
|
"learning_rate": 8.893704614254725e-05, |
|
"loss": 3.3616, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.2957786835999963, |
|
"grad_norm": 0.6017030477523804, |
|
"learning_rate": 8.877458343416993e-05, |
|
"loss": 3.349, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.29725757701799627, |
|
"grad_norm": 0.5634870529174805, |
|
"learning_rate": 8.861108719182061e-05, |
|
"loss": 3.3385, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.29873647043599627, |
|
"grad_norm": 0.5135075449943542, |
|
"learning_rate": 8.844656177348087e-05, |
|
"loss": 3.353, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.3002153638539962, |
|
"grad_norm": 0.49317190051078796, |
|
"learning_rate": 8.828101156456493e-05, |
|
"loss": 3.3455, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.3016942572719962, |
|
"grad_norm": 0.5618060827255249, |
|
"learning_rate": 8.811444097780273e-05, |
|
"loss": 3.3444, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.30317315068999623, |
|
"grad_norm": 0.5211082100868225, |
|
"learning_rate": 8.79468544531223e-05, |
|
"loss": 3.3491, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.3046520441079962, |
|
"grad_norm": 0.5708051919937134, |
|
"learning_rate": 8.777825645753144e-05, |
|
"loss": 3.3345, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.3061309375259962, |
|
"grad_norm": 0.5056930184364319, |
|
"learning_rate": 8.760865148499862e-05, |
|
"loss": 3.3333, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.30760983094399613, |
|
"grad_norm": 0.5034912824630737, |
|
"learning_rate": 8.743804405633327e-05, |
|
"loss": 3.3313, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.30908872436199614, |
|
"grad_norm": 0.6101865768432617, |
|
"learning_rate": 8.726643871906512e-05, |
|
"loss": 3.3211, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.31056761777999614, |
|
"grad_norm": 0.49354320764541626, |
|
"learning_rate": 8.709384004732322e-05, |
|
"loss": 3.328, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.3120465111979961, |
|
"grad_norm": 1.0049197673797607, |
|
"learning_rate": 8.69202526417138e-05, |
|
"loss": 3.3256, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.3135254046159961, |
|
"grad_norm": 0.4796050786972046, |
|
"learning_rate": 8.67456811291977e-05, |
|
"loss": 3.3264, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.31500429803399604, |
|
"grad_norm": 0.6114419102668762, |
|
"learning_rate": 8.657013016296716e-05, |
|
"loss": 3.3041, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.31648319145199605, |
|
"grad_norm": 0.6853553652763367, |
|
"learning_rate": 8.639360442232163e-05, |
|
"loss": 3.3123, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.317962084869996, |
|
"grad_norm": 0.4117718040943146, |
|
"learning_rate": 8.621610861254307e-05, |
|
"loss": 3.3036, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.319440978287996, |
|
"grad_norm": 0.4868248701095581, |
|
"learning_rate": 8.60376474647707e-05, |
|
"loss": 3.3112, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.320919871705996, |
|
"grad_norm": 0.4655211865901947, |
|
"learning_rate": 8.585822573587463e-05, |
|
"loss": 3.2959, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.32239876512399596, |
|
"grad_norm": 0.4244300127029419, |
|
"learning_rate": 8.567784820832926e-05, |
|
"loss": 3.3006, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.32387765854199596, |
|
"grad_norm": 0.5585177540779114, |
|
"learning_rate": 8.549651969008572e-05, |
|
"loss": 3.304, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.3253565519599959, |
|
"grad_norm": 0.4044816493988037, |
|
"learning_rate": 8.531424501444376e-05, |
|
"loss": 3.2943, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.3268354453779959, |
|
"grad_norm": 0.5332701802253723, |
|
"learning_rate": 8.513102903992285e-05, |
|
"loss": 3.2691, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.3283143387959959, |
|
"grad_norm": 0.6828725934028625, |
|
"learning_rate": 8.494687665013274e-05, |
|
"loss": 3.2757, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.32979323221399587, |
|
"grad_norm": 0.4340764284133911, |
|
"learning_rate": 8.476179275364331e-05, |
|
"loss": 3.2798, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.3312721256319959, |
|
"grad_norm": 0.5927674770355225, |
|
"learning_rate": 8.457578228385362e-05, |
|
"loss": 3.277, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.3327510190499958, |
|
"grad_norm": 0.5142761468887329, |
|
"learning_rate": 8.438885019886051e-05, |
|
"loss": 3.2745, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.3342299124679958, |
|
"grad_norm": 0.5035094618797302, |
|
"learning_rate": 8.420100148132643e-05, |
|
"loss": 3.282, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.33570880588599583, |
|
"grad_norm": 0.4529162049293518, |
|
"learning_rate": 8.40122411383466e-05, |
|
"loss": 3.2741, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.3371876993039958, |
|
"grad_norm": 0.47236135601997375, |
|
"learning_rate": 8.382257420131554e-05, |
|
"loss": 3.2566, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.3386665927219958, |
|
"grad_norm": 0.5067903995513916, |
|
"learning_rate": 8.363200572579297e-05, |
|
"loss": 3.2729, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.34014548613999573, |
|
"grad_norm": 0.5891897678375244, |
|
"learning_rate": 8.344054079136911e-05, |
|
"loss": 3.254, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.34162437955799574, |
|
"grad_norm": 0.4857490062713623, |
|
"learning_rate": 8.324818450152917e-05, |
|
"loss": 3.2704, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.3431032729759957, |
|
"grad_norm": 0.5922226309776306, |
|
"learning_rate": 8.305494198351741e-05, |
|
"loss": 3.2511, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.3445821663939957, |
|
"grad_norm": 0.5176606178283691, |
|
"learning_rate": 8.286081838820047e-05, |
|
"loss": 3.2577, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.3460610598119957, |
|
"grad_norm": 0.4542312026023865, |
|
"learning_rate": 8.266581888993e-05, |
|
"loss": 3.269, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.34753995322999565, |
|
"grad_norm": 0.4864133596420288, |
|
"learning_rate": 8.246994868640478e-05, |
|
"loss": 3.2468, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.34901884664799565, |
|
"grad_norm": 0.5213157534599304, |
|
"learning_rate": 8.227321299853225e-05, |
|
"loss": 3.2431, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.3504977400659956, |
|
"grad_norm": 0.495194673538208, |
|
"learning_rate": 8.207561707028921e-05, |
|
"loss": 3.26, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.3519766334839956, |
|
"grad_norm": 0.47876933217048645, |
|
"learning_rate": 8.187716616858217e-05, |
|
"loss": 3.2397, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.3534555269019956, |
|
"grad_norm": 0.558392345905304, |
|
"learning_rate": 8.167786558310679e-05, |
|
"loss": 3.2357, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.35493442031999556, |
|
"grad_norm": 0.5333178043365479, |
|
"learning_rate": 8.147772062620715e-05, |
|
"loss": 3.2374, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.35641331373799556, |
|
"grad_norm": 0.41947266459465027, |
|
"learning_rate": 8.127673663273388e-05, |
|
"loss": 3.238, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.3578922071559955, |
|
"grad_norm": 0.6376889944076538, |
|
"learning_rate": 8.107491895990213e-05, |
|
"loss": 3.2295, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.3593711005739955, |
|
"grad_norm": 0.46790727972984314, |
|
"learning_rate": 8.087227298714865e-05, |
|
"loss": 3.2203, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.36084999399199547, |
|
"grad_norm": 0.4850638508796692, |
|
"learning_rate": 8.06688041159886e-05, |
|
"loss": 3.2282, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.36232888740999547, |
|
"grad_norm": 0.48408469557762146, |
|
"learning_rate": 8.04645177698713e-05, |
|
"loss": 3.2156, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.3638077808279955, |
|
"grad_norm": 0.4044775068759918, |
|
"learning_rate": 8.025941939403589e-05, |
|
"loss": 3.2054, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.3652866742459954, |
|
"grad_norm": 0.5881346464157104, |
|
"learning_rate": 8.005351445536611e-05, |
|
"loss": 3.2179, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.36676556766399543, |
|
"grad_norm": 0.49967604875564575, |
|
"learning_rate": 7.984680844224455e-05, |
|
"loss": 3.2243, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 0.3682444610819954, |
|
"grad_norm": 0.3812451958656311, |
|
"learning_rate": 7.963930686440638e-05, |
|
"loss": 3.2071, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 0.3697233544999954, |
|
"grad_norm": 0.5718510150909424, |
|
"learning_rate": 7.943101525279254e-05, |
|
"loss": 3.2097, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.3712022479179954, |
|
"grad_norm": 0.4486338198184967, |
|
"learning_rate": 7.922193915940223e-05, |
|
"loss": 3.2108, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 0.37268114133599534, |
|
"grad_norm": 0.3966203033924103, |
|
"learning_rate": 7.901208415714498e-05, |
|
"loss": 3.2079, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 0.37416003475399534, |
|
"grad_norm": 0.5968387722969055, |
|
"learning_rate": 7.880145583969208e-05, |
|
"loss": 3.2194, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 0.3756389281719953, |
|
"grad_norm": 0.4266614019870758, |
|
"learning_rate": 7.859005982132746e-05, |
|
"loss": 3.2041, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 0.3771178215899953, |
|
"grad_norm": 0.39778637886047363, |
|
"learning_rate": 7.83779017367981e-05, |
|
"loss": 3.1994, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.37859671500799524, |
|
"grad_norm": 0.5236369967460632, |
|
"learning_rate": 7.816498724116384e-05, |
|
"loss": 3.1862, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 0.38007560842599525, |
|
"grad_norm": 0.7279762625694275, |
|
"learning_rate": 7.79513220096465e-05, |
|
"loss": 3.1994, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 0.38155450184399525, |
|
"grad_norm": 0.4763568639755249, |
|
"learning_rate": 7.773691173747878e-05, |
|
"loss": 3.1906, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 0.3830333952619952, |
|
"grad_norm": 0.44299814105033875, |
|
"learning_rate": 7.752176213975242e-05, |
|
"loss": 3.1834, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 0.3845122886799952, |
|
"grad_norm": 0.5032374262809753, |
|
"learning_rate": 7.73058789512658e-05, |
|
"loss": 3.195, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.38599118209799516, |
|
"grad_norm": 0.4971736669540405, |
|
"learning_rate": 7.708926792637109e-05, |
|
"loss": 3.1912, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 0.38747007551599516, |
|
"grad_norm": 0.3745681941509247, |
|
"learning_rate": 7.687193483882094e-05, |
|
"loss": 3.1822, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 0.38894896893399517, |
|
"grad_norm": 0.45209985971450806, |
|
"learning_rate": 7.665388548161449e-05, |
|
"loss": 3.1747, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 0.3904278623519951, |
|
"grad_norm": 0.45653989911079407, |
|
"learning_rate": 7.643512566684302e-05, |
|
"loss": 3.1586, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 0.3919067557699951, |
|
"grad_norm": 0.5007410049438477, |
|
"learning_rate": 7.621566122553503e-05, |
|
"loss": 3.1777, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.39338564918799507, |
|
"grad_norm": 0.39367878437042236, |
|
"learning_rate": 7.599549800750075e-05, |
|
"loss": 3.1713, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 0.3948645426059951, |
|
"grad_norm": 0.41411903500556946, |
|
"learning_rate": 7.577464188117629e-05, |
|
"loss": 3.1743, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 0.396343436023995, |
|
"grad_norm": 0.45292773842811584, |
|
"learning_rate": 7.555309873346719e-05, |
|
"loss": 3.1615, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 0.397822329441995, |
|
"grad_norm": 0.8281717300415039, |
|
"learning_rate": 7.533087446959146e-05, |
|
"loss": 3.167, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 0.39930122285999503, |
|
"grad_norm": 0.4002739489078522, |
|
"learning_rate": 7.510797501292224e-05, |
|
"loss": 3.1778, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.400780116277995, |
|
"grad_norm": 0.4849472641944885, |
|
"learning_rate": 7.488440630482993e-05, |
|
"loss": 3.156, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 0.402259009695995, |
|
"grad_norm": 0.5112612247467041, |
|
"learning_rate": 7.466017430452372e-05, |
|
"loss": 3.1722, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 0.40373790311399493, |
|
"grad_norm": 0.7139009833335876, |
|
"learning_rate": 7.443528498889282e-05, |
|
"loss": 3.1638, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 0.40521679653199494, |
|
"grad_norm": 0.508050262928009, |
|
"learning_rate": 7.420974435234718e-05, |
|
"loss": 3.178, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 0.40669568994999494, |
|
"grad_norm": 0.42061784863471985, |
|
"learning_rate": 7.398355840665762e-05, |
|
"loss": 3.1644, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.4081745833679949, |
|
"grad_norm": 0.4205974340438843, |
|
"learning_rate": 7.375673318079566e-05, |
|
"loss": 3.1405, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 0.4096534767859949, |
|
"grad_norm": 0.37122201919555664, |
|
"learning_rate": 7.352927472077278e-05, |
|
"loss": 3.1446, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 0.41113237020399485, |
|
"grad_norm": 0.42649346590042114, |
|
"learning_rate": 7.330118908947927e-05, |
|
"loss": 3.1553, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 0.41261126362199485, |
|
"grad_norm": 0.4024769365787506, |
|
"learning_rate": 7.307248236652264e-05, |
|
"loss": 3.1468, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 0.4140901570399948, |
|
"grad_norm": 0.44164013862609863, |
|
"learning_rate": 7.284316064806555e-05, |
|
"loss": 3.1431, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.4155690504579948, |
|
"grad_norm": 0.43745094537734985, |
|
"learning_rate": 7.261323004666332e-05, |
|
"loss": 3.1566, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 0.4170479438759948, |
|
"grad_norm": 0.5233656764030457, |
|
"learning_rate": 7.238269669110104e-05, |
|
"loss": 3.1387, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 0.41852683729399476, |
|
"grad_norm": 0.5196412801742554, |
|
"learning_rate": 7.215156672623011e-05, |
|
"loss": 3.1359, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 0.42000573071199476, |
|
"grad_norm": 0.46823379397392273, |
|
"learning_rate": 7.191984631280457e-05, |
|
"loss": 3.1274, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 0.4214846241299947, |
|
"grad_norm": 0.4213380217552185, |
|
"learning_rate": 7.168754162731682e-05, |
|
"loss": 3.1261, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.4229635175479947, |
|
"grad_norm": 0.48972517251968384, |
|
"learning_rate": 7.145465886183291e-05, |
|
"loss": 3.1367, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 0.4244424109659947, |
|
"grad_norm": 0.4298087954521179, |
|
"learning_rate": 7.122120422382771e-05, |
|
"loss": 3.1342, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 0.42592130438399467, |
|
"grad_norm": 0.6111768484115601, |
|
"learning_rate": 7.098718393601922e-05, |
|
"loss": 3.1323, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 0.4274001978019947, |
|
"grad_norm": 0.4182634949684143, |
|
"learning_rate": 7.075260423620284e-05, |
|
"loss": 3.1206, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 0.4288790912199946, |
|
"grad_norm": 0.4418911337852478, |
|
"learning_rate": 7.051747137708503e-05, |
|
"loss": 3.1252, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.43035798463799463, |
|
"grad_norm": 0.4269157350063324, |
|
"learning_rate": 7.028179162611668e-05, |
|
"loss": 3.1291, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 0.4318368780559946, |
|
"grad_norm": 0.38284796476364136, |
|
"learning_rate": 7.004557126532608e-05, |
|
"loss": 3.1272, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 0.4333157714739946, |
|
"grad_norm": 0.42110738158226013, |
|
"learning_rate": 6.98088165911514e-05, |
|
"loss": 3.1277, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 0.4347946648919946, |
|
"grad_norm": 0.45251357555389404, |
|
"learning_rate": 6.957153391427293e-05, |
|
"loss": 3.1258, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 0.43627355830999454, |
|
"grad_norm": 0.5021226406097412, |
|
"learning_rate": 6.933372955944478e-05, |
|
"loss": 3.1132, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.43775245172799454, |
|
"grad_norm": 0.5621367692947388, |
|
"learning_rate": 6.909540986532644e-05, |
|
"loss": 3.1223, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 0.4392313451459945, |
|
"grad_norm": 0.48778969049453735, |
|
"learning_rate": 6.885658118431367e-05, |
|
"loss": 3.1239, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 0.4407102385639945, |
|
"grad_norm": 0.4777956008911133, |
|
"learning_rate": 6.861724988236926e-05, |
|
"loss": 3.1096, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 0.4421891319819945, |
|
"grad_norm": 0.5108891725540161, |
|
"learning_rate": 6.83774223388533e-05, |
|
"loss": 3.1172, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 0.44366802539999445, |
|
"grad_norm": 0.42329996824264526, |
|
"learning_rate": 6.813710494635325e-05, |
|
"loss": 3.0999, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.44514691881799445, |
|
"grad_norm": 0.538500964641571, |
|
"learning_rate": 6.789630411051336e-05, |
|
"loss": 3.1098, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 0.4466258122359944, |
|
"grad_norm": 0.51045823097229, |
|
"learning_rate": 6.765502624986409e-05, |
|
"loss": 3.1021, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 0.4481047056539944, |
|
"grad_norm": 0.46791911125183105, |
|
"learning_rate": 6.741327779565096e-05, |
|
"loss": 3.1031, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 0.44958359907199436, |
|
"grad_norm": 0.4351001977920532, |
|
"learning_rate": 6.71710651916631e-05, |
|
"loss": 3.0976, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 0.45106249248999436, |
|
"grad_norm": 0.3884891867637634, |
|
"learning_rate": 6.692839489406155e-05, |
|
"loss": 3.0977, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.45254138590799436, |
|
"grad_norm": 0.44683268666267395, |
|
"learning_rate": 6.668527337120717e-05, |
|
"loss": 3.0915, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 0.4540202793259943, |
|
"grad_norm": 0.36208999156951904, |
|
"learning_rate": 6.644170710348813e-05, |
|
"loss": 3.1036, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 0.4554991727439943, |
|
"grad_norm": 0.6256937384605408, |
|
"learning_rate": 6.619770258314729e-05, |
|
"loss": 3.0841, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 0.45697806616199427, |
|
"grad_norm": 0.44526803493499756, |
|
"learning_rate": 6.595326631410911e-05, |
|
"loss": 3.0801, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 0.45845695957999427, |
|
"grad_norm": 0.37642255425453186, |
|
"learning_rate": 6.570840481180624e-05, |
|
"loss": 3.0923, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.4599358529979943, |
|
"grad_norm": 0.4022856056690216, |
|
"learning_rate": 6.546312460300595e-05, |
|
"loss": 3.0865, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 0.4614147464159942, |
|
"grad_norm": 0.41262638568878174, |
|
"learning_rate": 6.521743222563608e-05, |
|
"loss": 3.0895, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 0.46289363983399423, |
|
"grad_norm": 0.6894219517707825, |
|
"learning_rate": 6.49713342286108e-05, |
|
"loss": 3.0882, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 0.4643725332519942, |
|
"grad_norm": 0.4044055938720703, |
|
"learning_rate": 6.4724837171656e-05, |
|
"loss": 3.0811, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 0.4658514266699942, |
|
"grad_norm": 0.5523516535758972, |
|
"learning_rate": 6.447794762513456e-05, |
|
"loss": 3.0687, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.46733032008799413, |
|
"grad_norm": 0.6067591309547424, |
|
"learning_rate": 6.42306721698711e-05, |
|
"loss": 3.0651, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 0.46880921350599414, |
|
"grad_norm": 0.48093098402023315, |
|
"learning_rate": 6.398301739697661e-05, |
|
"loss": 3.0862, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 0.47028810692399414, |
|
"grad_norm": 0.516197144985199, |
|
"learning_rate": 6.373498990767281e-05, |
|
"loss": 3.0879, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 0.4717670003419941, |
|
"grad_norm": 0.4190840721130371, |
|
"learning_rate": 6.348659631311608e-05, |
|
"loss": 3.0786, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 0.4732458937599941, |
|
"grad_norm": 0.42481333017349243, |
|
"learning_rate": 6.32378432342214e-05, |
|
"loss": 3.0701, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.47472478717799405, |
|
"grad_norm": 0.5522997379302979, |
|
"learning_rate": 6.29887373014857e-05, |
|
"loss": 3.0722, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 0.47620368059599405, |
|
"grad_norm": 0.3823126554489136, |
|
"learning_rate": 6.27392851548112e-05, |
|
"loss": 3.0722, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 0.47768257401399405, |
|
"grad_norm": 0.38790881633758545, |
|
"learning_rate": 6.248949344332853e-05, |
|
"loss": 3.0726, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 0.479161467431994, |
|
"grad_norm": 0.503336489200592, |
|
"learning_rate": 6.223936882521935e-05, |
|
"loss": 3.0652, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 0.480640360849994, |
|
"grad_norm": 0.5279501080513, |
|
"learning_rate": 6.198891796753885e-05, |
|
"loss": 3.0771, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.48211925426799396, |
|
"grad_norm": 0.4080502986907959, |
|
"learning_rate": 6.17381475460382e-05, |
|
"loss": 3.064, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 0.48359814768599396, |
|
"grad_norm": 0.45085135102272034, |
|
"learning_rate": 6.148706424498649e-05, |
|
"loss": 3.0594, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 0.4850770411039939, |
|
"grad_norm": 0.42239508032798767, |
|
"learning_rate": 6.123567475699261e-05, |
|
"loss": 3.064, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 0.4865559345219939, |
|
"grad_norm": 0.43709495663642883, |
|
"learning_rate": 6.098398578282682e-05, |
|
"loss": 3.0563, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 0.4880348279399939, |
|
"grad_norm": 0.6891195178031921, |
|
"learning_rate": 6.073200403124222e-05, |
|
"loss": 3.0594, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.48951372135799387, |
|
"grad_norm": 0.37419646978378296, |
|
"learning_rate": 6.047973621879577e-05, |
|
"loss": 3.0448, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 0.4909926147759939, |
|
"grad_norm": 0.3710575997829437, |
|
"learning_rate": 6.0227189069669464e-05, |
|
"loss": 3.0518, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 0.4924715081939938, |
|
"grad_norm": 0.7165172696113586, |
|
"learning_rate": 5.997436931549096e-05, |
|
"loss": 3.0589, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 0.49395040161199383, |
|
"grad_norm": 0.48645517230033875, |
|
"learning_rate": 5.972128369515415e-05, |
|
"loss": 3.0507, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 0.49542929502999383, |
|
"grad_norm": 0.3613664507865906, |
|
"learning_rate": 5.9467938954639624e-05, |
|
"loss": 3.05, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.4969081884479938, |
|
"grad_norm": 0.44066616892814636, |
|
"learning_rate": 5.921434184683479e-05, |
|
"loss": 3.0452, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 0.4983870818659938, |
|
"grad_norm": 0.4224984049797058, |
|
"learning_rate": 5.896049913135386e-05, |
|
"loss": 3.0474, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 0.49986597528399374, |
|
"grad_norm": 0.4076259434223175, |
|
"learning_rate": 5.870641757435775e-05, |
|
"loss": 3.0424, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 0.5013448687019937, |
|
"grad_norm": 0.6098340153694153, |
|
"learning_rate": 5.845210394837366e-05, |
|
"loss": 3.0581, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 0.5028237621199937, |
|
"grad_norm": 1.0002901554107666, |
|
"learning_rate": 5.8197565032114533e-05, |
|
"loss": 3.0335, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.5043026555379937, |
|
"grad_norm": 0.4866860508918762, |
|
"learning_rate": 5.7942807610298456e-05, |
|
"loss": 3.0329, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 0.5057815489559937, |
|
"grad_norm": 0.4324921667575836, |
|
"learning_rate": 5.768783847346779e-05, |
|
"loss": 3.0366, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 0.5072604423739937, |
|
"grad_norm": 0.40503060817718506, |
|
"learning_rate": 5.743266441780808e-05, |
|
"loss": 3.0461, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 0.5087393357919936, |
|
"grad_norm": 0.38576483726501465, |
|
"learning_rate": 5.717729224496703e-05, |
|
"loss": 3.0238, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 0.5102182292099936, |
|
"grad_norm": 0.4007696211338043, |
|
"learning_rate": 5.6921728761873086e-05, |
|
"loss": 3.0221, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.5116971226279936, |
|
"grad_norm": 0.4254515469074249, |
|
"learning_rate": 5.6665980780554096e-05, |
|
"loss": 3.0421, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 0.5131760160459936, |
|
"grad_norm": 0.42919921875, |
|
"learning_rate": 5.6410055117955695e-05, |
|
"loss": 3.0435, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 0.5146549094639936, |
|
"grad_norm": 0.45048367977142334, |
|
"learning_rate": 5.615395859575958e-05, |
|
"loss": 3.0331, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 0.5161338028819935, |
|
"grad_norm": 0.3860481381416321, |
|
"learning_rate": 5.589769804020173e-05, |
|
"loss": 3.0255, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 0.5176126962999935, |
|
"grad_norm": 0.3789386749267578, |
|
"learning_rate": 5.5641280281890394e-05, |
|
"loss": 3.0364, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.5190915897179935, |
|
"grad_norm": 0.3918616473674774, |
|
"learning_rate": 5.538471215562406e-05, |
|
"loss": 3.0288, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 0.5205704831359935, |
|
"grad_norm": 0.5674075484275818, |
|
"learning_rate": 5.5128000500209254e-05, |
|
"loss": 3.034, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 0.5220493765539935, |
|
"grad_norm": 0.38289138674736023, |
|
"learning_rate": 5.48711521582783e-05, |
|
"loss": 3.0228, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 0.5235282699719934, |
|
"grad_norm": 0.5652275681495667, |
|
"learning_rate": 5.461417397610682e-05, |
|
"loss": 3.0148, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 0.5250071633899934, |
|
"grad_norm": 0.39682313799858093, |
|
"learning_rate": 5.4357072803431396e-05, |
|
"loss": 3.0168, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.5264860568079934, |
|
"grad_norm": 0.5409131646156311, |
|
"learning_rate": 5.4099855493266896e-05, |
|
"loss": 3.0071, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 0.5279649502259934, |
|
"grad_norm": 0.465202659368515, |
|
"learning_rate": 5.3842528901723786e-05, |
|
"loss": 3.0236, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 0.5294438436439933, |
|
"grad_norm": 0.4230177104473114, |
|
"learning_rate": 5.358509988782543e-05, |
|
"loss": 3.0209, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 0.5309227370619933, |
|
"grad_norm": 0.3867465555667877, |
|
"learning_rate": 5.332757531332529e-05, |
|
"loss": 3.0212, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 0.5324016304799933, |
|
"grad_norm": 0.57347172498703, |
|
"learning_rate": 5.306996204252397e-05, |
|
"loss": 3.0197, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.5338805238979933, |
|
"grad_norm": 0.45516273379325867, |
|
"learning_rate": 5.2812266942086256e-05, |
|
"loss": 3.0118, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 0.5353594173159933, |
|
"grad_norm": 0.45842480659484863, |
|
"learning_rate": 5.2554496880858106e-05, |
|
"loss": 3.0229, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 0.5368383107339932, |
|
"grad_norm": 0.4081624448299408, |
|
"learning_rate": 5.2296658729683555e-05, |
|
"loss": 3.0109, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 0.5383172041519932, |
|
"grad_norm": 0.36024734377861023, |
|
"learning_rate": 5.203875936122158e-05, |
|
"loss": 3.007, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 0.5397960975699932, |
|
"grad_norm": 0.5755016803741455, |
|
"learning_rate": 5.178080564976287e-05, |
|
"loss": 3.0073, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.5412749909879933, |
|
"grad_norm": 0.4267408847808838, |
|
"learning_rate": 5.152280447104665e-05, |
|
"loss": 3.0077, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 0.5427538844059933, |
|
"grad_norm": 0.4339446723461151, |
|
"learning_rate": 5.126476270207739e-05, |
|
"loss": 2.9991, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 0.5442327778239932, |
|
"grad_norm": 0.3711448907852173, |
|
"learning_rate": 5.1006687220941455e-05, |
|
"loss": 3.0091, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 0.5457116712419932, |
|
"grad_norm": 0.4235258996486664, |
|
"learning_rate": 5.074858490662384e-05, |
|
"loss": 3.0015, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 0.5471905646599932, |
|
"grad_norm": 0.3901888430118561, |
|
"learning_rate": 5.0490462638824764e-05, |
|
"loss": 2.9862, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.5486694580779932, |
|
"grad_norm": 0.40519407391548157, |
|
"learning_rate": 5.023232729777628e-05, |
|
"loss": 3.0052, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 0.5501483514959932, |
|
"grad_norm": 0.5243799686431885, |
|
"learning_rate": 4.997418576405896e-05, |
|
"loss": 3.0002, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 0.5516272449139931, |
|
"grad_norm": 0.444050133228302, |
|
"learning_rate": 4.9716044918418414e-05, |
|
"loss": 3.0037, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 0.5531061383319931, |
|
"grad_norm": 0.3496316075325012, |
|
"learning_rate": 4.945791164158188e-05, |
|
"loss": 3.0084, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 0.5545850317499931, |
|
"grad_norm": 0.5127915740013123, |
|
"learning_rate": 4.9199792814074896e-05, |
|
"loss": 2.9986, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.5560639251679931, |
|
"grad_norm": 0.4601123332977295, |
|
"learning_rate": 4.8941695316037865e-05, |
|
"loss": 3.0057, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 0.5575428185859931, |
|
"grad_norm": 0.48755237460136414, |
|
"learning_rate": 4.868362602704258e-05, |
|
"loss": 2.9809, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 0.559021712003993, |
|
"grad_norm": 0.3724111318588257, |
|
"learning_rate": 4.842559182590899e-05, |
|
"loss": 2.9975, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 0.560500605421993, |
|
"grad_norm": 0.46181684732437134, |
|
"learning_rate": 4.816759959052177e-05, |
|
"loss": 2.9781, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 0.561979498839993, |
|
"grad_norm": 0.39748480916023254, |
|
"learning_rate": 4.790965619764698e-05, |
|
"loss": 2.9965, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.563458392257993, |
|
"grad_norm": 0.5718439221382141, |
|
"learning_rate": 4.76517685227488e-05, |
|
"loss": 2.9806, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 0.5649372856759929, |
|
"grad_norm": 0.5939317941665649, |
|
"learning_rate": 4.7393943439806264e-05, |
|
"loss": 2.9801, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 0.5664161790939929, |
|
"grad_norm": 0.4281553626060486, |
|
"learning_rate": 4.713618782112997e-05, |
|
"loss": 2.9829, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 0.5678950725119929, |
|
"grad_norm": 0.37646615505218506, |
|
"learning_rate": 4.6878508537179015e-05, |
|
"loss": 2.9829, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 0.5693739659299929, |
|
"grad_norm": 0.4106582701206207, |
|
"learning_rate": 4.662091245637777e-05, |
|
"loss": 2.9694, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.5708528593479929, |
|
"grad_norm": 0.3310515582561493, |
|
"learning_rate": 4.6363406444932814e-05, |
|
"loss": 2.9799, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 0.5723317527659928, |
|
"grad_norm": 0.36721667647361755, |
|
"learning_rate": 4.610599736664996e-05, |
|
"loss": 2.9794, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 0.5738106461839928, |
|
"grad_norm": 0.45474308729171753, |
|
"learning_rate": 4.5848692082751296e-05, |
|
"loss": 2.9848, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 0.5752895396019928, |
|
"grad_norm": 0.6072131991386414, |
|
"learning_rate": 4.559149745169218e-05, |
|
"loss": 2.972, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 0.5767684330199928, |
|
"grad_norm": 0.486600786447525, |
|
"learning_rate": 4.533442032897864e-05, |
|
"loss": 2.9602, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.5782473264379928, |
|
"grad_norm": 0.4024549126625061, |
|
"learning_rate": 4.5077467566984474e-05, |
|
"loss": 2.9852, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 0.5797262198559927, |
|
"grad_norm": 0.3547488749027252, |
|
"learning_rate": 4.4820646014768644e-05, |
|
"loss": 2.9794, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 0.5812051132739927, |
|
"grad_norm": 0.38729000091552734, |
|
"learning_rate": 4.456396251789274e-05, |
|
"loss": 2.9822, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 0.5826840066919927, |
|
"grad_norm": 0.35460221767425537, |
|
"learning_rate": 4.430742391823853e-05, |
|
"loss": 2.9768, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 0.5841629001099927, |
|
"grad_norm": 0.3545529544353485, |
|
"learning_rate": 4.405103705382547e-05, |
|
"loss": 2.9681, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.5856417935279927, |
|
"grad_norm": 0.3542696237564087, |
|
"learning_rate": 4.379480875862859e-05, |
|
"loss": 2.9748, |
|
"step": 7920 |
|
}, |
|
{ |
|
"epoch": 0.5871206869459926, |
|
"grad_norm": 0.34213724732398987, |
|
"learning_rate": 4.3538745862396275e-05, |
|
"loss": 2.969, |
|
"step": 7940 |
|
}, |
|
{ |
|
"epoch": 0.5885995803639926, |
|
"grad_norm": 0.35730448365211487, |
|
"learning_rate": 4.328285519046815e-05, |
|
"loss": 2.9627, |
|
"step": 7960 |
|
}, |
|
{ |
|
"epoch": 0.5900784737819926, |
|
"grad_norm": 0.4420771598815918, |
|
"learning_rate": 4.302714356359327e-05, |
|
"loss": 2.9781, |
|
"step": 7980 |
|
}, |
|
{ |
|
"epoch": 0.5915573671999926, |
|
"grad_norm": 0.47289857268333435, |
|
"learning_rate": 4.2771617797748256e-05, |
|
"loss": 2.9637, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.5930362606179926, |
|
"grad_norm": 0.4006676971912384, |
|
"learning_rate": 4.251628470395556e-05, |
|
"loss": 2.9721, |
|
"step": 8020 |
|
}, |
|
{ |
|
"epoch": 0.5945151540359925, |
|
"grad_norm": 0.39483192563056946, |
|
"learning_rate": 4.226115108810201e-05, |
|
"loss": 2.9607, |
|
"step": 8040 |
|
}, |
|
{ |
|
"epoch": 0.5959940474539925, |
|
"grad_norm": 0.49096304178237915, |
|
"learning_rate": 4.20062237507574e-05, |
|
"loss": 2.9567, |
|
"step": 8060 |
|
}, |
|
{ |
|
"epoch": 0.5974729408719925, |
|
"grad_norm": 0.373417466878891, |
|
"learning_rate": 4.175150948699311e-05, |
|
"loss": 2.965, |
|
"step": 8080 |
|
}, |
|
{ |
|
"epoch": 0.5989518342899925, |
|
"grad_norm": 0.33696213364601135, |
|
"learning_rate": 4.149701508620109e-05, |
|
"loss": 2.9636, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.6004307277079924, |
|
"grad_norm": 0.5063782930374146, |
|
"learning_rate": 4.124274733191291e-05, |
|
"loss": 2.9737, |
|
"step": 8120 |
|
}, |
|
{ |
|
"epoch": 0.6019096211259924, |
|
"grad_norm": 0.39363813400268555, |
|
"learning_rate": 4.098871300161878e-05, |
|
"loss": 2.9516, |
|
"step": 8140 |
|
}, |
|
{ |
|
"epoch": 0.6033885145439924, |
|
"grad_norm": 0.3740212023258209, |
|
"learning_rate": 4.07349188665871e-05, |
|
"loss": 2.9472, |
|
"step": 8160 |
|
}, |
|
{ |
|
"epoch": 0.6048674079619925, |
|
"grad_norm": 0.42378878593444824, |
|
"learning_rate": 4.048137169168385e-05, |
|
"loss": 2.9684, |
|
"step": 8180 |
|
}, |
|
{ |
|
"epoch": 0.6063463013799925, |
|
"grad_norm": 0.4358353614807129, |
|
"learning_rate": 4.02280782351923e-05, |
|
"loss": 2.9643, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 0.6078251947979924, |
|
"grad_norm": 0.35567548871040344, |
|
"learning_rate": 3.997504524863291e-05, |
|
"loss": 2.9435, |
|
"step": 8220 |
|
}, |
|
{ |
|
"epoch": 0.6093040882159924, |
|
"grad_norm": 0.3486579358577728, |
|
"learning_rate": 3.972227947658325e-05, |
|
"loss": 2.9605, |
|
"step": 8240 |
|
}, |
|
{ |
|
"epoch": 0.6107829816339924, |
|
"grad_norm": 0.42745381593704224, |
|
"learning_rate": 3.946978765649838e-05, |
|
"loss": 2.9481, |
|
"step": 8260 |
|
}, |
|
{ |
|
"epoch": 0.6122618750519924, |
|
"grad_norm": 0.4889651834964752, |
|
"learning_rate": 3.921757651853117e-05, |
|
"loss": 2.9492, |
|
"step": 8280 |
|
}, |
|
{ |
|
"epoch": 0.6137407684699924, |
|
"grad_norm": 0.44278714060783386, |
|
"learning_rate": 3.896565278535291e-05, |
|
"loss": 2.9578, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 0.6152196618879923, |
|
"grad_norm": 0.42498791217803955, |
|
"learning_rate": 3.8714023171974135e-05, |
|
"loss": 2.9439, |
|
"step": 8320 |
|
}, |
|
{ |
|
"epoch": 0.6166985553059923, |
|
"grad_norm": 0.36626169085502625, |
|
"learning_rate": 3.846269438556568e-05, |
|
"loss": 2.9549, |
|
"step": 8340 |
|
}, |
|
{ |
|
"epoch": 0.6181774487239923, |
|
"grad_norm": 0.369567334651947, |
|
"learning_rate": 3.8211673125279776e-05, |
|
"loss": 2.947, |
|
"step": 8360 |
|
}, |
|
{ |
|
"epoch": 0.6196563421419923, |
|
"grad_norm": 0.43409767746925354, |
|
"learning_rate": 3.7960966082071636e-05, |
|
"loss": 2.9363, |
|
"step": 8380 |
|
}, |
|
{ |
|
"epoch": 0.6211352355599923, |
|
"grad_norm": 0.4202839434146881, |
|
"learning_rate": 3.771057993852101e-05, |
|
"loss": 2.9501, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 0.6226141289779922, |
|
"grad_norm": 0.3709544241428375, |
|
"learning_rate": 3.746052136865409e-05, |
|
"loss": 2.9452, |
|
"step": 8420 |
|
}, |
|
{ |
|
"epoch": 0.6240930223959922, |
|
"grad_norm": 0.3776955008506775, |
|
"learning_rate": 3.721079703776561e-05, |
|
"loss": 2.9249, |
|
"step": 8440 |
|
}, |
|
{ |
|
"epoch": 0.6255719158139922, |
|
"grad_norm": 0.41565999388694763, |
|
"learning_rate": 3.6961413602241215e-05, |
|
"loss": 2.9304, |
|
"step": 8460 |
|
}, |
|
{ |
|
"epoch": 0.6270508092319922, |
|
"grad_norm": 0.3948330581188202, |
|
"learning_rate": 3.6712377709379944e-05, |
|
"loss": 2.9371, |
|
"step": 8480 |
|
}, |
|
{ |
|
"epoch": 0.6285297026499922, |
|
"grad_norm": 0.3861006498336792, |
|
"learning_rate": 3.646369599721716e-05, |
|
"loss": 2.9399, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 0.6300085960679921, |
|
"grad_norm": 0.3641924560070038, |
|
"learning_rate": 3.621537509434757e-05, |
|
"loss": 2.9283, |
|
"step": 8520 |
|
}, |
|
{ |
|
"epoch": 0.6314874894859921, |
|
"grad_norm": 0.4140797555446625, |
|
"learning_rate": 3.596742161974848e-05, |
|
"loss": 2.9321, |
|
"step": 8540 |
|
}, |
|
{ |
|
"epoch": 0.6329663829039921, |
|
"grad_norm": 0.40179234743118286, |
|
"learning_rate": 3.571984218260348e-05, |
|
"loss": 2.9439, |
|
"step": 8560 |
|
}, |
|
{ |
|
"epoch": 0.6344452763219921, |
|
"grad_norm": 0.4169887602329254, |
|
"learning_rate": 3.547264338212619e-05, |
|
"loss": 2.9299, |
|
"step": 8580 |
|
}, |
|
{ |
|
"epoch": 0.635924169739992, |
|
"grad_norm": 0.4229363203048706, |
|
"learning_rate": 3.522583180738436e-05, |
|
"loss": 2.927, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 0.637403063157992, |
|
"grad_norm": 0.33680644631385803, |
|
"learning_rate": 3.497941403712429e-05, |
|
"loss": 2.9373, |
|
"step": 8620 |
|
}, |
|
{ |
|
"epoch": 0.638881956575992, |
|
"grad_norm": 0.39601895213127136, |
|
"learning_rate": 3.473339663959547e-05, |
|
"loss": 2.9363, |
|
"step": 8640 |
|
}, |
|
{ |
|
"epoch": 0.640360849993992, |
|
"grad_norm": 0.356684148311615, |
|
"learning_rate": 3.448778617237543e-05, |
|
"loss": 2.9275, |
|
"step": 8660 |
|
}, |
|
{ |
|
"epoch": 0.641839743411992, |
|
"grad_norm": 0.37500935792922974, |
|
"learning_rate": 3.424258918219503e-05, |
|
"loss": 2.9224, |
|
"step": 8680 |
|
}, |
|
{ |
|
"epoch": 0.6433186368299919, |
|
"grad_norm": 0.3620283901691437, |
|
"learning_rate": 3.399781220476394e-05, |
|
"loss": 2.9294, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 0.6447975302479919, |
|
"grad_norm": 0.3849022090435028, |
|
"learning_rate": 3.3753461764596375e-05, |
|
"loss": 2.9332, |
|
"step": 8720 |
|
}, |
|
{ |
|
"epoch": 0.6462764236659919, |
|
"grad_norm": 0.598598837852478, |
|
"learning_rate": 3.350954437483725e-05, |
|
"loss": 2.9268, |
|
"step": 8740 |
|
}, |
|
{ |
|
"epoch": 0.6477553170839919, |
|
"grad_norm": 0.42141565680503845, |
|
"learning_rate": 3.326606653708857e-05, |
|
"loss": 2.926, |
|
"step": 8760 |
|
}, |
|
{ |
|
"epoch": 0.6492342105019919, |
|
"grad_norm": 0.39355704188346863, |
|
"learning_rate": 3.302303474123608e-05, |
|
"loss": 2.9302, |
|
"step": 8780 |
|
}, |
|
{ |
|
"epoch": 0.6507131039199918, |
|
"grad_norm": 0.3644985258579254, |
|
"learning_rate": 3.278045546527633e-05, |
|
"loss": 2.9178, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 0.6521919973379918, |
|
"grad_norm": 0.3427523672580719, |
|
"learning_rate": 3.253833517514397e-05, |
|
"loss": 2.9291, |
|
"step": 8820 |
|
}, |
|
{ |
|
"epoch": 0.6536708907559918, |
|
"grad_norm": 0.433736652135849, |
|
"learning_rate": 3.22966803245394e-05, |
|
"loss": 2.914, |
|
"step": 8840 |
|
}, |
|
{ |
|
"epoch": 0.6551497841739918, |
|
"grad_norm": 0.38325321674346924, |
|
"learning_rate": 3.205549735475677e-05, |
|
"loss": 2.9242, |
|
"step": 8860 |
|
}, |
|
{ |
|
"epoch": 0.6566286775919918, |
|
"grad_norm": 0.4170295000076294, |
|
"learning_rate": 3.181479269451231e-05, |
|
"loss": 2.9175, |
|
"step": 8880 |
|
}, |
|
{ |
|
"epoch": 0.6581075710099917, |
|
"grad_norm": 0.4253075420856476, |
|
"learning_rate": 3.1574572759772885e-05, |
|
"loss": 2.9211, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 0.6595864644279917, |
|
"grad_norm": 0.38273829221725464, |
|
"learning_rate": 3.133484395358507e-05, |
|
"loss": 2.914, |
|
"step": 8920 |
|
}, |
|
{ |
|
"epoch": 0.6610653578459917, |
|
"grad_norm": 0.3915143609046936, |
|
"learning_rate": 3.109561266590445e-05, |
|
"loss": 2.9207, |
|
"step": 8940 |
|
}, |
|
{ |
|
"epoch": 0.6625442512639917, |
|
"grad_norm": 0.37426161766052246, |
|
"learning_rate": 3.085688527342524e-05, |
|
"loss": 2.927, |
|
"step": 8960 |
|
}, |
|
{ |
|
"epoch": 0.6640231446819918, |
|
"grad_norm": 0.34895965456962585, |
|
"learning_rate": 3.06186681394104e-05, |
|
"loss": 2.9157, |
|
"step": 8980 |
|
}, |
|
{ |
|
"epoch": 0.6655020380999916, |
|
"grad_norm": 0.3564130663871765, |
|
"learning_rate": 3.038096761352199e-05, |
|
"loss": 2.9178, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 0.6669809315179916, |
|
"grad_norm": 0.3817369043827057, |
|
"learning_rate": 3.0143790031651863e-05, |
|
"loss": 2.9252, |
|
"step": 9020 |
|
}, |
|
{ |
|
"epoch": 0.6684598249359917, |
|
"grad_norm": 0.37359967827796936, |
|
"learning_rate": 2.9907141715752906e-05, |
|
"loss": 2.9134, |
|
"step": 9040 |
|
}, |
|
{ |
|
"epoch": 0.6699387183539917, |
|
"grad_norm": 0.3740251660346985, |
|
"learning_rate": 2.9671028973670418e-05, |
|
"loss": 2.9175, |
|
"step": 9060 |
|
}, |
|
{ |
|
"epoch": 0.6714176117719917, |
|
"grad_norm": 0.3896474242210388, |
|
"learning_rate": 2.943545809897398e-05, |
|
"loss": 2.9153, |
|
"step": 9080 |
|
}, |
|
{ |
|
"epoch": 0.6728965051899916, |
|
"grad_norm": 0.4986639618873596, |
|
"learning_rate": 2.9200435370789792e-05, |
|
"loss": 2.9215, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 0.6743753986079916, |
|
"grad_norm": 0.3836432099342346, |
|
"learning_rate": 2.8965967053633225e-05, |
|
"loss": 2.9123, |
|
"step": 9120 |
|
}, |
|
{ |
|
"epoch": 0.6758542920259916, |
|
"grad_norm": 0.3539137840270996, |
|
"learning_rate": 2.873205939724185e-05, |
|
"loss": 2.9172, |
|
"step": 9140 |
|
}, |
|
{ |
|
"epoch": 0.6773331854439916, |
|
"grad_norm": 0.4474085569381714, |
|
"learning_rate": 2.8498718636408862e-05, |
|
"loss": 2.9126, |
|
"step": 9160 |
|
}, |
|
{ |
|
"epoch": 0.6788120788619915, |
|
"grad_norm": 0.3727508783340454, |
|
"learning_rate": 2.8265950990816926e-05, |
|
"loss": 2.9136, |
|
"step": 9180 |
|
}, |
|
{ |
|
"epoch": 0.6802909722799915, |
|
"grad_norm": 0.3365872800350189, |
|
"learning_rate": 2.8033762664872293e-05, |
|
"loss": 2.9074, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 0.6817698656979915, |
|
"grad_norm": 0.3774373233318329, |
|
"learning_rate": 2.7802159847539545e-05, |
|
"loss": 2.9078, |
|
"step": 9220 |
|
}, |
|
{ |
|
"epoch": 0.6832487591159915, |
|
"grad_norm": 0.34899139404296875, |
|
"learning_rate": 2.757114871217656e-05, |
|
"loss": 2.9117, |
|
"step": 9240 |
|
}, |
|
{ |
|
"epoch": 0.6847276525339915, |
|
"grad_norm": 0.3489275276660919, |
|
"learning_rate": 2.7340735416369934e-05, |
|
"loss": 2.9, |
|
"step": 9260 |
|
}, |
|
{ |
|
"epoch": 0.6862065459519914, |
|
"grad_norm": 0.3772989511489868, |
|
"learning_rate": 2.7110926101770927e-05, |
|
"loss": 2.8968, |
|
"step": 9280 |
|
}, |
|
{ |
|
"epoch": 0.6876854393699914, |
|
"grad_norm": 0.3743598461151123, |
|
"learning_rate": 2.688172689393172e-05, |
|
"loss": 2.8978, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 0.6891643327879914, |
|
"grad_norm": 0.3543947637081146, |
|
"learning_rate": 2.665314390214212e-05, |
|
"loss": 2.9029, |
|
"step": 9320 |
|
}, |
|
{ |
|
"epoch": 0.6906432262059914, |
|
"grad_norm": 0.3778015673160553, |
|
"learning_rate": 2.6425183219266746e-05, |
|
"loss": 2.8875, |
|
"step": 9340 |
|
}, |
|
{ |
|
"epoch": 0.6921221196239914, |
|
"grad_norm": 0.3994954824447632, |
|
"learning_rate": 2.6197850921582633e-05, |
|
"loss": 2.8988, |
|
"step": 9360 |
|
}, |
|
{ |
|
"epoch": 0.6936010130419913, |
|
"grad_norm": 0.4375861883163452, |
|
"learning_rate": 2.5971153068617195e-05, |
|
"loss": 2.8888, |
|
"step": 9380 |
|
}, |
|
{ |
|
"epoch": 0.6950799064599913, |
|
"grad_norm": 0.3965347111225128, |
|
"learning_rate": 2.57450957029868e-05, |
|
"loss": 2.896, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 0.6965587998779913, |
|
"grad_norm": 0.3397294580936432, |
|
"learning_rate": 2.5519684850235703e-05, |
|
"loss": 2.8979, |
|
"step": 9420 |
|
}, |
|
{ |
|
"epoch": 0.6980376932959913, |
|
"grad_norm": 0.38435131311416626, |
|
"learning_rate": 2.529492651867531e-05, |
|
"loss": 2.8914, |
|
"step": 9440 |
|
}, |
|
{ |
|
"epoch": 0.6995165867139913, |
|
"grad_norm": 0.4583021402359009, |
|
"learning_rate": 2.5070826699224202e-05, |
|
"loss": 2.8994, |
|
"step": 9460 |
|
}, |
|
{ |
|
"epoch": 0.7009954801319912, |
|
"grad_norm": 0.35780495405197144, |
|
"learning_rate": 2.4847391365248346e-05, |
|
"loss": 2.904, |
|
"step": 9480 |
|
}, |
|
{ |
|
"epoch": 0.7024743735499912, |
|
"grad_norm": 0.48425179719924927, |
|
"learning_rate": 2.4624626472401834e-05, |
|
"loss": 2.8902, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 0.7039532669679912, |
|
"grad_norm": 0.34029942750930786, |
|
"learning_rate": 2.440253795846827e-05, |
|
"loss": 2.8964, |
|
"step": 9520 |
|
}, |
|
{ |
|
"epoch": 0.7054321603859912, |
|
"grad_norm": 0.33855918049812317, |
|
"learning_rate": 2.4181131743202377e-05, |
|
"loss": 2.8917, |
|
"step": 9540 |
|
}, |
|
{ |
|
"epoch": 0.7069110538039912, |
|
"grad_norm": 0.3716065287590027, |
|
"learning_rate": 2.3960413728172277e-05, |
|
"loss": 2.9, |
|
"step": 9560 |
|
}, |
|
{ |
|
"epoch": 0.7083899472219911, |
|
"grad_norm": 0.3275023102760315, |
|
"learning_rate": 2.374038979660214e-05, |
|
"loss": 2.9032, |
|
"step": 9580 |
|
}, |
|
{ |
|
"epoch": 0.7098688406399911, |
|
"grad_norm": 0.3434765040874481, |
|
"learning_rate": 2.352106581321542e-05, |
|
"loss": 2.8992, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 0.7113477340579911, |
|
"grad_norm": 0.3282793462276459, |
|
"learning_rate": 2.3302447624078427e-05, |
|
"loss": 2.8918, |
|
"step": 9620 |
|
}, |
|
{ |
|
"epoch": 0.7128266274759911, |
|
"grad_norm": 0.4167431890964508, |
|
"learning_rate": 2.3084541056444654e-05, |
|
"loss": 2.8844, |
|
"step": 9640 |
|
}, |
|
{ |
|
"epoch": 0.714305520893991, |
|
"grad_norm": 0.3788709342479706, |
|
"learning_rate": 2.2867351918599333e-05, |
|
"loss": 2.8737, |
|
"step": 9660 |
|
}, |
|
{ |
|
"epoch": 0.715784414311991, |
|
"grad_norm": 0.32435911893844604, |
|
"learning_rate": 2.2650885999704628e-05, |
|
"loss": 2.8946, |
|
"step": 9680 |
|
}, |
|
{ |
|
"epoch": 0.717263307729991, |
|
"grad_norm": 0.37471237778663635, |
|
"learning_rate": 2.243514906964539e-05, |
|
"loss": 2.8935, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 0.718742201147991, |
|
"grad_norm": 0.3652307093143463, |
|
"learning_rate": 2.222014687887532e-05, |
|
"loss": 2.8767, |
|
"step": 9720 |
|
}, |
|
{ |
|
"epoch": 0.720221094565991, |
|
"grad_norm": 0.37537747621536255, |
|
"learning_rate": 2.2005885158263645e-05, |
|
"loss": 2.8802, |
|
"step": 9740 |
|
}, |
|
{ |
|
"epoch": 0.7216999879839909, |
|
"grad_norm": 0.40164393186569214, |
|
"learning_rate": 2.1792369618942455e-05, |
|
"loss": 2.881, |
|
"step": 9760 |
|
}, |
|
{ |
|
"epoch": 0.7231788814019909, |
|
"grad_norm": 0.35087114572525024, |
|
"learning_rate": 2.1579605952154435e-05, |
|
"loss": 2.8904, |
|
"step": 9780 |
|
}, |
|
{ |
|
"epoch": 0.7246577748199909, |
|
"grad_norm": 0.4332689046859741, |
|
"learning_rate": 2.136759982910107e-05, |
|
"loss": 2.8778, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 0.726136668237991, |
|
"grad_norm": 0.34787076711654663, |
|
"learning_rate": 2.1156356900791695e-05, |
|
"loss": 2.8845, |
|
"step": 9820 |
|
}, |
|
{ |
|
"epoch": 0.727615561655991, |
|
"grad_norm": 0.37883126735687256, |
|
"learning_rate": 2.0945882797892673e-05, |
|
"loss": 2.8876, |
|
"step": 9840 |
|
}, |
|
{ |
|
"epoch": 0.7290944550739908, |
|
"grad_norm": 0.3691736161708832, |
|
"learning_rate": 2.0736183130577335e-05, |
|
"loss": 2.8887, |
|
"step": 9860 |
|
}, |
|
{ |
|
"epoch": 0.7305733484919908, |
|
"grad_norm": 0.31982922554016113, |
|
"learning_rate": 2.0527263488376552e-05, |
|
"loss": 2.8815, |
|
"step": 9880 |
|
}, |
|
{ |
|
"epoch": 0.7320522419099909, |
|
"grad_norm": 0.3566115200519562, |
|
"learning_rate": 2.031912944002966e-05, |
|
"loss": 2.8884, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 0.7335311353279909, |
|
"grad_norm": 0.33468520641326904, |
|
"learning_rate": 2.0111786533336e-05, |
|
"loss": 2.8818, |
|
"step": 9920 |
|
}, |
|
{ |
|
"epoch": 0.7350100287459909, |
|
"grad_norm": 0.3208761513233185, |
|
"learning_rate": 1.9905240295007145e-05, |
|
"loss": 2.8803, |
|
"step": 9940 |
|
}, |
|
{ |
|
"epoch": 0.7364889221639908, |
|
"grad_norm": 0.34477704763412476, |
|
"learning_rate": 1.9699496230519497e-05, |
|
"loss": 2.8917, |
|
"step": 9960 |
|
}, |
|
{ |
|
"epoch": 0.7379678155819908, |
|
"grad_norm": 0.37035301327705383, |
|
"learning_rate": 1.949455982396755e-05, |
|
"loss": 2.8786, |
|
"step": 9980 |
|
}, |
|
{ |
|
"epoch": 0.7394467089999908, |
|
"grad_norm": 0.3365253210067749, |
|
"learning_rate": 1.929043653791775e-05, |
|
"loss": 2.8675, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 0.7409256024179908, |
|
"grad_norm": 0.3333218991756439, |
|
"learning_rate": 1.9087131813262886e-05, |
|
"loss": 2.8687, |
|
"step": 10020 |
|
}, |
|
{ |
|
"epoch": 0.7424044958359908, |
|
"grad_norm": 0.3710993230342865, |
|
"learning_rate": 1.8884651069076992e-05, |
|
"loss": 2.8718, |
|
"step": 10040 |
|
}, |
|
{ |
|
"epoch": 0.7438833892539907, |
|
"grad_norm": 0.36842554807662964, |
|
"learning_rate": 1.8682999702471014e-05, |
|
"loss": 2.8631, |
|
"step": 10060 |
|
}, |
|
{ |
|
"epoch": 0.7453622826719907, |
|
"grad_norm": 0.35305920243263245, |
|
"learning_rate": 1.8482183088448862e-05, |
|
"loss": 2.8708, |
|
"step": 10080 |
|
}, |
|
{ |
|
"epoch": 0.7468411760899907, |
|
"grad_norm": 0.3375717103481293, |
|
"learning_rate": 1.828220657976419e-05, |
|
"loss": 2.8817, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 0.7483200695079907, |
|
"grad_norm": 0.37821289896965027, |
|
"learning_rate": 1.8083075506777676e-05, |
|
"loss": 2.8787, |
|
"step": 10120 |
|
}, |
|
{ |
|
"epoch": 0.7497989629259906, |
|
"grad_norm": 0.3393423557281494, |
|
"learning_rate": 1.7884795177314995e-05, |
|
"loss": 2.8681, |
|
"step": 10140 |
|
}, |
|
{ |
|
"epoch": 0.7512778563439906, |
|
"grad_norm": 0.35140156745910645, |
|
"learning_rate": 1.7687370876525273e-05, |
|
"loss": 2.8742, |
|
"step": 10160 |
|
}, |
|
{ |
|
"epoch": 0.7527567497619906, |
|
"grad_norm": 0.3378312587738037, |
|
"learning_rate": 1.7490807866740268e-05, |
|
"loss": 2.8736, |
|
"step": 10180 |
|
}, |
|
{ |
|
"epoch": 0.7542356431799906, |
|
"grad_norm": 0.37517204880714417, |
|
"learning_rate": 1.7295111387334103e-05, |
|
"loss": 2.8623, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 0.7557145365979906, |
|
"grad_norm": 0.3355712890625, |
|
"learning_rate": 1.7100286654583543e-05, |
|
"loss": 2.8721, |
|
"step": 10220 |
|
}, |
|
{ |
|
"epoch": 0.7571934300159905, |
|
"grad_norm": 0.3331904411315918, |
|
"learning_rate": 1.690633886152903e-05, |
|
"loss": 2.8701, |
|
"step": 10240 |
|
}, |
|
{ |
|
"epoch": 0.7586723234339905, |
|
"grad_norm": 0.34373047947883606, |
|
"learning_rate": 1.6713273177836276e-05, |
|
"loss": 2.8718, |
|
"step": 10260 |
|
}, |
|
{ |
|
"epoch": 0.7601512168519905, |
|
"grad_norm": 0.3202342987060547, |
|
"learning_rate": 1.6521094749658328e-05, |
|
"loss": 2.8658, |
|
"step": 10280 |
|
}, |
|
{ |
|
"epoch": 0.7616301102699905, |
|
"grad_norm": 0.33778509497642517, |
|
"learning_rate": 1.6329808699498588e-05, |
|
"loss": 2.8786, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 0.7631090036879905, |
|
"grad_norm": 0.33873429894447327, |
|
"learning_rate": 1.613942012607414e-05, |
|
"loss": 2.8731, |
|
"step": 10320 |
|
}, |
|
{ |
|
"epoch": 0.7645878971059904, |
|
"grad_norm": 0.3424777090549469, |
|
"learning_rate": 1.5949934104179887e-05, |
|
"loss": 2.8715, |
|
"step": 10340 |
|
}, |
|
{ |
|
"epoch": 0.7660667905239904, |
|
"grad_norm": 0.33158713579177856, |
|
"learning_rate": 1.5761355684553286e-05, |
|
"loss": 2.8545, |
|
"step": 10360 |
|
}, |
|
{ |
|
"epoch": 0.7675456839419904, |
|
"grad_norm": 0.3395291566848755, |
|
"learning_rate": 1.557368989373973e-05, |
|
"loss": 2.8533, |
|
"step": 10380 |
|
}, |
|
{ |
|
"epoch": 0.7690245773599904, |
|
"grad_norm": 0.31933024525642395, |
|
"learning_rate": 1.5386941733958503e-05, |
|
"loss": 2.8651, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 0.7705034707779904, |
|
"grad_norm": 0.3164694309234619, |
|
"learning_rate": 1.5201116182969538e-05, |
|
"loss": 2.8773, |
|
"step": 10420 |
|
}, |
|
{ |
|
"epoch": 0.7719823641959903, |
|
"grad_norm": 0.35544392466545105, |
|
"learning_rate": 1.50162181939407e-05, |
|
"loss": 2.859, |
|
"step": 10440 |
|
}, |
|
{ |
|
"epoch": 0.7734612576139903, |
|
"grad_norm": 0.3556651175022125, |
|
"learning_rate": 1.4832252695315691e-05, |
|
"loss": 2.8463, |
|
"step": 10460 |
|
}, |
|
{ |
|
"epoch": 0.7749401510319903, |
|
"grad_norm": 0.335028737783432, |
|
"learning_rate": 1.4649224590682802e-05, |
|
"loss": 2.8635, |
|
"step": 10480 |
|
}, |
|
{ |
|
"epoch": 0.7764190444499903, |
|
"grad_norm": 0.4239474833011627, |
|
"learning_rate": 1.4467138758644139e-05, |
|
"loss": 2.8493, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 0.7778979378679903, |
|
"grad_norm": 0.3199774920940399, |
|
"learning_rate": 1.4286000052685556e-05, |
|
"loss": 2.8687, |
|
"step": 10520 |
|
}, |
|
{ |
|
"epoch": 0.7793768312859902, |
|
"grad_norm": 0.3779512643814087, |
|
"learning_rate": 1.4105813301047366e-05, |
|
"loss": 2.8518, |
|
"step": 10540 |
|
}, |
|
{ |
|
"epoch": 0.7808557247039902, |
|
"grad_norm": 0.3382132649421692, |
|
"learning_rate": 1.3926583306595581e-05, |
|
"loss": 2.8572, |
|
"step": 10560 |
|
}, |
|
{ |
|
"epoch": 0.7823346181219902, |
|
"grad_norm": 0.3185078203678131, |
|
"learning_rate": 1.374831484669392e-05, |
|
"loss": 2.8607, |
|
"step": 10580 |
|
}, |
|
{ |
|
"epoch": 0.7838135115399902, |
|
"grad_norm": 0.35780152678489685, |
|
"learning_rate": 1.3571012673076472e-05, |
|
"loss": 2.8564, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 0.7852924049579901, |
|
"grad_norm": 0.3039771616458893, |
|
"learning_rate": 1.3394681511721013e-05, |
|
"loss": 2.8587, |
|
"step": 10620 |
|
}, |
|
{ |
|
"epoch": 0.7867712983759901, |
|
"grad_norm": 0.3119048774242401, |
|
"learning_rate": 1.3219326062723042e-05, |
|
"loss": 2.864, |
|
"step": 10640 |
|
}, |
|
{ |
|
"epoch": 0.7882501917939901, |
|
"grad_norm": 0.3685562312602997, |
|
"learning_rate": 1.304495100017053e-05, |
|
"loss": 2.8551, |
|
"step": 10660 |
|
}, |
|
{ |
|
"epoch": 0.7897290852119901, |
|
"grad_norm": 0.32328301668167114, |
|
"learning_rate": 1.2871560972019314e-05, |
|
"loss": 2.8537, |
|
"step": 10680 |
|
}, |
|
{ |
|
"epoch": 0.7912079786299901, |
|
"grad_norm": 0.32044264674186707, |
|
"learning_rate": 1.2699160599969174e-05, |
|
"loss": 2.8647, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 0.79268687204799, |
|
"grad_norm": 0.39615657925605774, |
|
"learning_rate": 1.2527754479340703e-05, |
|
"loss": 2.8558, |
|
"step": 10720 |
|
}, |
|
{ |
|
"epoch": 0.79416576546599, |
|
"grad_norm": 0.31399622559547424, |
|
"learning_rate": 1.2357347178952788e-05, |
|
"loss": 2.8582, |
|
"step": 10740 |
|
}, |
|
{ |
|
"epoch": 0.79564465888399, |
|
"grad_norm": 0.33324578404426575, |
|
"learning_rate": 1.2187943241000794e-05, |
|
"loss": 2.8447, |
|
"step": 10760 |
|
}, |
|
{ |
|
"epoch": 0.7971235523019901, |
|
"grad_norm": 0.32412442564964294, |
|
"learning_rate": 1.2019547180935552e-05, |
|
"loss": 2.842, |
|
"step": 10780 |
|
}, |
|
{ |
|
"epoch": 0.7986024457199901, |
|
"grad_norm": 0.3198014795780182, |
|
"learning_rate": 1.1852163487342981e-05, |
|
"loss": 2.8594, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 0.80008133913799, |
|
"grad_norm": 0.3332209289073944, |
|
"learning_rate": 1.1685796621824423e-05, |
|
"loss": 2.8542, |
|
"step": 10820 |
|
}, |
|
{ |
|
"epoch": 0.80156023255599, |
|
"grad_norm": 0.3251478374004364, |
|
"learning_rate": 1.1520451018877742e-05, |
|
"loss": 2.8623, |
|
"step": 10840 |
|
}, |
|
{ |
|
"epoch": 0.80303912597399, |
|
"grad_norm": 0.3332981765270233, |
|
"learning_rate": 1.1356131085779131e-05, |
|
"loss": 2.8566, |
|
"step": 10860 |
|
}, |
|
{ |
|
"epoch": 0.80451801939199, |
|
"grad_norm": 0.30493640899658203, |
|
"learning_rate": 1.1192841202465565e-05, |
|
"loss": 2.8596, |
|
"step": 10880 |
|
}, |
|
{ |
|
"epoch": 0.80599691280999, |
|
"grad_norm": 0.3335663974285126, |
|
"learning_rate": 1.1030585721418174e-05, |
|
"loss": 2.854, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 0.8074758062279899, |
|
"grad_norm": 0.3442290127277374, |
|
"learning_rate": 1.0869368967546134e-05, |
|
"loss": 2.8471, |
|
"step": 10920 |
|
}, |
|
{ |
|
"epoch": 0.8089546996459899, |
|
"grad_norm": 0.3200606107711792, |
|
"learning_rate": 1.0709195238071407e-05, |
|
"loss": 2.8553, |
|
"step": 10940 |
|
}, |
|
{ |
|
"epoch": 0.8104335930639899, |
|
"grad_norm": 0.30462324619293213, |
|
"learning_rate": 1.0550068802414231e-05, |
|
"loss": 2.8487, |
|
"step": 10960 |
|
}, |
|
{ |
|
"epoch": 0.8119124864819899, |
|
"grad_norm": 0.3395856022834778, |
|
"learning_rate": 1.0391993902079295e-05, |
|
"loss": 2.8472, |
|
"step": 10980 |
|
}, |
|
{ |
|
"epoch": 0.8133913798999899, |
|
"grad_norm": 0.3614775836467743, |
|
"learning_rate": 1.0234974750542647e-05, |
|
"loss": 2.8427, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 0.8148702733179898, |
|
"grad_norm": 0.3020230829715729, |
|
"learning_rate": 1.0079015533139463e-05, |
|
"loss": 2.8606, |
|
"step": 11020 |
|
}, |
|
{ |
|
"epoch": 0.8163491667359898, |
|
"grad_norm": 0.32456544041633606, |
|
"learning_rate": 9.924120406952431e-06, |
|
"loss": 2.8508, |
|
"step": 11040 |
|
}, |
|
{ |
|
"epoch": 0.8178280601539898, |
|
"grad_norm": 0.3214119076728821, |
|
"learning_rate": 9.77029350070095e-06, |
|
"loss": 2.8391, |
|
"step": 11060 |
|
}, |
|
{ |
|
"epoch": 0.8193069535719898, |
|
"grad_norm": 0.3201681077480316, |
|
"learning_rate": 9.61753891463109e-06, |
|
"loss": 2.8532, |
|
"step": 11080 |
|
}, |
|
{ |
|
"epoch": 0.8207858469899897, |
|
"grad_norm": 0.323337584733963, |
|
"learning_rate": 9.465860720406327e-06, |
|
"loss": 2.8499, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 0.8222647404079897, |
|
"grad_norm": 0.31912675499916077, |
|
"learning_rate": 9.315262960998911e-06, |
|
"loss": 2.852, |
|
"step": 11120 |
|
}, |
|
{ |
|
"epoch": 0.8237436338259897, |
|
"grad_norm": 0.31801870465278625, |
|
"learning_rate": 9.165749650582239e-06, |
|
"loss": 2.8373, |
|
"step": 11140 |
|
}, |
|
{ |
|
"epoch": 0.8252225272439897, |
|
"grad_norm": 0.3083365559577942, |
|
"learning_rate": 9.017324774423785e-06, |
|
"loss": 2.8565, |
|
"step": 11160 |
|
}, |
|
{ |
|
"epoch": 0.8267014206619897, |
|
"grad_norm": 0.34097760915756226, |
|
"learning_rate": 8.869992288778834e-06, |
|
"loss": 2.8389, |
|
"step": 11180 |
|
}, |
|
{ |
|
"epoch": 0.8281803140799896, |
|
"grad_norm": 0.32595744729042053, |
|
"learning_rate": 8.72375612078511e-06, |
|
"loss": 2.8588, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 0.8296592074979896, |
|
"grad_norm": 0.3241618275642395, |
|
"learning_rate": 8.578620168358082e-06, |
|
"loss": 2.8527, |
|
"step": 11220 |
|
}, |
|
{ |
|
"epoch": 0.8311381009159896, |
|
"grad_norm": 0.31303274631500244, |
|
"learning_rate": 8.434588300086988e-06, |
|
"loss": 2.8326, |
|
"step": 11240 |
|
}, |
|
{ |
|
"epoch": 0.8326169943339896, |
|
"grad_norm": 0.3417539596557617, |
|
"learning_rate": 8.291664355131818e-06, |
|
"loss": 2.8477, |
|
"step": 11260 |
|
}, |
|
{ |
|
"epoch": 0.8340958877519896, |
|
"grad_norm": 0.3075898289680481, |
|
"learning_rate": 8.149852143120923e-06, |
|
"loss": 2.8353, |
|
"step": 11280 |
|
}, |
|
{ |
|
"epoch": 0.8355747811699895, |
|
"grad_norm": 0.32699164748191833, |
|
"learning_rate": 8.009155444049499e-06, |
|
"loss": 2.8432, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 0.8370536745879895, |
|
"grad_norm": 0.29232412576675415, |
|
"learning_rate": 7.869578008178808e-06, |
|
"loss": 2.8538, |
|
"step": 11320 |
|
}, |
|
{ |
|
"epoch": 0.8385325680059895, |
|
"grad_norm": 0.2949979901313782, |
|
"learning_rate": 7.731123555936232e-06, |
|
"loss": 2.8494, |
|
"step": 11340 |
|
}, |
|
{ |
|
"epoch": 0.8400114614239895, |
|
"grad_norm": 0.2993783950805664, |
|
"learning_rate": 7.593795777816071e-06, |
|
"loss": 2.8439, |
|
"step": 11360 |
|
}, |
|
{ |
|
"epoch": 0.8414903548419895, |
|
"grad_norm": 0.31987783312797546, |
|
"learning_rate": 7.457598334281235e-06, |
|
"loss": 2.8364, |
|
"step": 11380 |
|
}, |
|
{ |
|
"epoch": 0.8429692482599894, |
|
"grad_norm": 0.3066832721233368, |
|
"learning_rate": 7.322534855665636e-06, |
|
"loss": 2.8414, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 0.8444481416779894, |
|
"grad_norm": 0.3674749433994293, |
|
"learning_rate": 7.1886089420773965e-06, |
|
"loss": 2.8346, |
|
"step": 11420 |
|
}, |
|
{ |
|
"epoch": 0.8459270350959894, |
|
"grad_norm": 0.3142234981060028, |
|
"learning_rate": 7.055824163302943e-06, |
|
"loss": 2.8478, |
|
"step": 11440 |
|
}, |
|
{ |
|
"epoch": 0.8474059285139894, |
|
"grad_norm": 0.30251550674438477, |
|
"learning_rate": 6.924184058711836e-06, |
|
"loss": 2.8447, |
|
"step": 11460 |
|
}, |
|
{ |
|
"epoch": 0.8488848219319894, |
|
"grad_norm": 0.35557475686073303, |
|
"learning_rate": 6.7936921371623885e-06, |
|
"loss": 2.8387, |
|
"step": 11480 |
|
}, |
|
{ |
|
"epoch": 0.8503637153499893, |
|
"grad_norm": 0.2999821901321411, |
|
"learning_rate": 6.6643518769082036e-06, |
|
"loss": 2.8484, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 0.8518426087679893, |
|
"grad_norm": 0.29102715849876404, |
|
"learning_rate": 6.536166725505405e-06, |
|
"loss": 2.8418, |
|
"step": 11520 |
|
}, |
|
{ |
|
"epoch": 0.8533215021859893, |
|
"grad_norm": 0.3709971606731415, |
|
"learning_rate": 6.4091400997207785e-06, |
|
"loss": 2.8393, |
|
"step": 11540 |
|
}, |
|
{ |
|
"epoch": 0.8548003956039893, |
|
"grad_norm": 0.3058640658855438, |
|
"learning_rate": 6.2832753854406846e-06, |
|
"loss": 2.8428, |
|
"step": 11560 |
|
}, |
|
{ |
|
"epoch": 0.8562792890219892, |
|
"grad_norm": 0.2915048599243164, |
|
"learning_rate": 6.158575937580818e-06, |
|
"loss": 2.8446, |
|
"step": 11580 |
|
}, |
|
{ |
|
"epoch": 0.8577581824399892, |
|
"grad_norm": 0.31149548292160034, |
|
"learning_rate": 6.035045079996743e-06, |
|
"loss": 2.8438, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 0.8592370758579893, |
|
"grad_norm": 0.2985529601573944, |
|
"learning_rate": 5.9126861053953595e-06, |
|
"loss": 2.8246, |
|
"step": 11620 |
|
}, |
|
{ |
|
"epoch": 0.8607159692759893, |
|
"grad_norm": 0.33099082112312317, |
|
"learning_rate": 5.791502275247079e-06, |
|
"loss": 2.8412, |
|
"step": 11640 |
|
}, |
|
{ |
|
"epoch": 0.8621948626939893, |
|
"grad_norm": 0.28865981101989746, |
|
"learning_rate": 5.6714968196989295e-06, |
|
"loss": 2.8299, |
|
"step": 11660 |
|
}, |
|
{ |
|
"epoch": 0.8636737561119892, |
|
"grad_norm": 0.34115445613861084, |
|
"learning_rate": 5.5526729374884456e-06, |
|
"loss": 2.8368, |
|
"step": 11680 |
|
}, |
|
{ |
|
"epoch": 0.8651526495299892, |
|
"grad_norm": 0.3019537925720215, |
|
"learning_rate": 5.435033795858385e-06, |
|
"loss": 2.8424, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 0.8666315429479892, |
|
"grad_norm": 0.2919292449951172, |
|
"learning_rate": 5.318582530472338e-06, |
|
"loss": 2.8449, |
|
"step": 11720 |
|
}, |
|
{ |
|
"epoch": 0.8681104363659892, |
|
"grad_norm": 0.2975643575191498, |
|
"learning_rate": 5.203322245331127e-06, |
|
"loss": 2.8484, |
|
"step": 11740 |
|
}, |
|
{ |
|
"epoch": 0.8695893297839892, |
|
"grad_norm": 0.30803442001342773, |
|
"learning_rate": 5.089256012690069e-06, |
|
"loss": 2.839, |
|
"step": 11760 |
|
}, |
|
{ |
|
"epoch": 0.8710682232019891, |
|
"grad_norm": 0.3415025770664215, |
|
"learning_rate": 4.976386872977107e-06, |
|
"loss": 2.8406, |
|
"step": 11780 |
|
}, |
|
{ |
|
"epoch": 0.8725471166199891, |
|
"grad_norm": 0.3077727258205414, |
|
"learning_rate": 4.864717834711735e-06, |
|
"loss": 2.8262, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 0.8740260100379891, |
|
"grad_norm": 0.3027855455875397, |
|
"learning_rate": 4.75425187442482e-06, |
|
"loss": 2.8394, |
|
"step": 11820 |
|
}, |
|
{ |
|
"epoch": 0.8755049034559891, |
|
"grad_norm": 0.3020201027393341, |
|
"learning_rate": 4.644991936579268e-06, |
|
"loss": 2.8397, |
|
"step": 11840 |
|
}, |
|
{ |
|
"epoch": 0.8769837968739891, |
|
"grad_norm": 0.2942678928375244, |
|
"learning_rate": 4.536940933491552e-06, |
|
"loss": 2.8506, |
|
"step": 11860 |
|
}, |
|
{ |
|
"epoch": 0.878462690291989, |
|
"grad_norm": 0.30446386337280273, |
|
"learning_rate": 4.43010174525404e-06, |
|
"loss": 2.8323, |
|
"step": 11880 |
|
}, |
|
{ |
|
"epoch": 0.879941583709989, |
|
"grad_norm": 0.2892758250236511, |
|
"learning_rate": 4.324477219658274e-06, |
|
"loss": 2.8268, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 0.881420477127989, |
|
"grad_norm": 0.29356256127357483, |
|
"learning_rate": 4.220070172119045e-06, |
|
"loss": 2.8561, |
|
"step": 11920 |
|
}, |
|
{ |
|
"epoch": 0.882899370545989, |
|
"grad_norm": 0.2972046136856079, |
|
"learning_rate": 4.116883385599335e-06, |
|
"loss": 2.8459, |
|
"step": 11940 |
|
}, |
|
{ |
|
"epoch": 0.884378263963989, |
|
"grad_norm": 0.30883651971817017, |
|
"learning_rate": 4.01491961053615e-06, |
|
"loss": 2.8526, |
|
"step": 11960 |
|
}, |
|
{ |
|
"epoch": 0.8858571573819889, |
|
"grad_norm": 0.30948570370674133, |
|
"learning_rate": 3.914181564767216e-06, |
|
"loss": 2.8335, |
|
"step": 11980 |
|
}, |
|
{ |
|
"epoch": 0.8873360507999889, |
|
"grad_norm": 0.2896897494792938, |
|
"learning_rate": 3.8146719334585246e-06, |
|
"loss": 2.8353, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 0.8888149442179889, |
|
"grad_norm": 0.29304638504981995, |
|
"learning_rate": 3.7163933690327447e-06, |
|
"loss": 2.8352, |
|
"step": 12020 |
|
}, |
|
{ |
|
"epoch": 0.8902938376359889, |
|
"grad_norm": 0.29079097509384155, |
|
"learning_rate": 3.619348491098562e-06, |
|
"loss": 2.8256, |
|
"step": 12040 |
|
}, |
|
{ |
|
"epoch": 0.8917727310539888, |
|
"grad_norm": 0.3122529089450836, |
|
"learning_rate": 3.5235398863808055e-06, |
|
"loss": 2.8211, |
|
"step": 12060 |
|
}, |
|
{ |
|
"epoch": 0.8932516244719888, |
|
"grad_norm": 0.2927321493625641, |
|
"learning_rate": 3.4289701086515357e-06, |
|
"loss": 2.8338, |
|
"step": 12080 |
|
}, |
|
{ |
|
"epoch": 0.8947305178899888, |
|
"grad_norm": 0.2869907319545746, |
|
"learning_rate": 3.3356416786619716e-06, |
|
"loss": 2.8313, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 0.8962094113079888, |
|
"grad_norm": 0.27835631370544434, |
|
"learning_rate": 3.2435570840752605e-06, |
|
"loss": 2.8346, |
|
"step": 12120 |
|
}, |
|
{ |
|
"epoch": 0.8976883047259888, |
|
"grad_norm": 0.2780158817768097, |
|
"learning_rate": 3.152718779400221e-06, |
|
"loss": 2.8315, |
|
"step": 12140 |
|
}, |
|
{ |
|
"epoch": 0.8991671981439887, |
|
"grad_norm": 0.2955233156681061, |
|
"learning_rate": 3.0631291859259114e-06, |
|
"loss": 2.8241, |
|
"step": 12160 |
|
}, |
|
{ |
|
"epoch": 0.9006460915619887, |
|
"grad_norm": 0.29205450415611267, |
|
"learning_rate": 2.9747906916570258e-06, |
|
"loss": 2.8308, |
|
"step": 12180 |
|
}, |
|
{ |
|
"epoch": 0.9021249849799887, |
|
"grad_norm": 0.289033979177475, |
|
"learning_rate": 2.8877056512503386e-06, |
|
"loss": 2.8469, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 0.9036038783979887, |
|
"grad_norm": 0.29402533173561096, |
|
"learning_rate": 2.8018763859518736e-06, |
|
"loss": 2.82, |
|
"step": 12220 |
|
}, |
|
{ |
|
"epoch": 0.9050827718159887, |
|
"grad_norm": 0.30112123489379883, |
|
"learning_rate": 2.7173051835350517e-06, |
|
"loss": 2.8269, |
|
"step": 12240 |
|
}, |
|
{ |
|
"epoch": 0.9065616652339886, |
|
"grad_norm": 0.2986692488193512, |
|
"learning_rate": 2.6339942982397116e-06, |
|
"loss": 2.8269, |
|
"step": 12260 |
|
}, |
|
{ |
|
"epoch": 0.9080405586519886, |
|
"grad_norm": 0.3106101453304291, |
|
"learning_rate": 2.5519459507120313e-06, |
|
"loss": 2.8415, |
|
"step": 12280 |
|
}, |
|
{ |
|
"epoch": 0.9095194520699886, |
|
"grad_norm": 0.2930283844470978, |
|
"learning_rate": 2.471162327945303e-06, |
|
"loss": 2.8353, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 0.9109983454879886, |
|
"grad_norm": 0.28059104084968567, |
|
"learning_rate": 2.3916455832216964e-06, |
|
"loss": 2.8318, |
|
"step": 12320 |
|
}, |
|
{ |
|
"epoch": 0.9124772389059886, |
|
"grad_norm": 0.2927623987197876, |
|
"learning_rate": 2.313397836054815e-06, |
|
"loss": 2.841, |
|
"step": 12340 |
|
}, |
|
{ |
|
"epoch": 0.9139561323239885, |
|
"grad_norm": 0.28432729840278625, |
|
"learning_rate": 2.2364211721331964e-06, |
|
"loss": 2.8294, |
|
"step": 12360 |
|
}, |
|
{ |
|
"epoch": 0.9154350257419885, |
|
"grad_norm": 0.2854309678077698, |
|
"learning_rate": 2.1607176432647703e-06, |
|
"loss": 2.8389, |
|
"step": 12380 |
|
}, |
|
{ |
|
"epoch": 0.9169139191599885, |
|
"grad_norm": 0.2870195209980011, |
|
"learning_rate": 2.0862892673221224e-06, |
|
"loss": 2.8355, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 0.9183928125779885, |
|
"grad_norm": 0.27523091435432434, |
|
"learning_rate": 2.01313802818871e-06, |
|
"loss": 2.8379, |
|
"step": 12420 |
|
}, |
|
{ |
|
"epoch": 0.9198717059959886, |
|
"grad_norm": 0.2815629839897156, |
|
"learning_rate": 1.9412658757060053e-06, |
|
"loss": 2.8279, |
|
"step": 12440 |
|
}, |
|
{ |
|
"epoch": 0.9213505994139884, |
|
"grad_norm": 0.28886112570762634, |
|
"learning_rate": 1.870674725621513e-06, |
|
"loss": 2.8242, |
|
"step": 12460 |
|
}, |
|
{ |
|
"epoch": 0.9228294928319885, |
|
"grad_norm": 0.2753719985485077, |
|
"learning_rate": 1.80136645953769e-06, |
|
"loss": 2.8234, |
|
"step": 12480 |
|
}, |
|
{ |
|
"epoch": 0.9243083862499885, |
|
"grad_norm": 0.2705097496509552, |
|
"learning_rate": 1.7333429248618194e-06, |
|
"loss": 2.8209, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 0.9257872796679885, |
|
"grad_norm": 0.284212589263916, |
|
"learning_rate": 1.6666059347567485e-06, |
|
"loss": 2.838, |
|
"step": 12520 |
|
}, |
|
{ |
|
"epoch": 0.9272661730859884, |
|
"grad_norm": 0.28033483028411865, |
|
"learning_rate": 1.6011572680925458e-06, |
|
"loss": 2.827, |
|
"step": 12540 |
|
}, |
|
{ |
|
"epoch": 0.9287450665039884, |
|
"grad_norm": 0.27618134021759033, |
|
"learning_rate": 1.5369986693991255e-06, |
|
"loss": 2.8415, |
|
"step": 12560 |
|
}, |
|
{ |
|
"epoch": 0.9302239599219884, |
|
"grad_norm": 0.28289562463760376, |
|
"learning_rate": 1.474131848819721e-06, |
|
"loss": 2.834, |
|
"step": 12580 |
|
}, |
|
{ |
|
"epoch": 0.9317028533399884, |
|
"grad_norm": 0.2737962305545807, |
|
"learning_rate": 1.4125584820652959e-06, |
|
"loss": 2.8228, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 0.9331817467579884, |
|
"grad_norm": 0.27976194024086, |
|
"learning_rate": 1.352280210369894e-06, |
|
"loss": 2.8387, |
|
"step": 12620 |
|
}, |
|
{ |
|
"epoch": 0.9346606401759883, |
|
"grad_norm": 0.27253544330596924, |
|
"learning_rate": 1.2932986404468883e-06, |
|
"loss": 2.8417, |
|
"step": 12640 |
|
}, |
|
{ |
|
"epoch": 0.9361395335939883, |
|
"grad_norm": 0.2787373661994934, |
|
"learning_rate": 1.2356153444461393e-06, |
|
"loss": 2.8295, |
|
"step": 12660 |
|
}, |
|
{ |
|
"epoch": 0.9376184270119883, |
|
"grad_norm": 0.27786681056022644, |
|
"learning_rate": 1.1792318599121165e-06, |
|
"loss": 2.8238, |
|
"step": 12680 |
|
}, |
|
{ |
|
"epoch": 0.9390973204299883, |
|
"grad_norm": 0.2707980275154114, |
|
"learning_rate": 1.1241496897428872e-06, |
|
"loss": 2.8216, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 0.9405762138479883, |
|
"grad_norm": 0.2854357063770294, |
|
"learning_rate": 1.0703703021500811e-06, |
|
"loss": 2.8108, |
|
"step": 12720 |
|
}, |
|
{ |
|
"epoch": 0.9420551072659882, |
|
"grad_norm": 0.2822173833847046, |
|
"learning_rate": 1.0178951306197337e-06, |
|
"loss": 2.8093, |
|
"step": 12740 |
|
}, |
|
{ |
|
"epoch": 0.9435340006839882, |
|
"grad_norm": 0.29024040699005127, |
|
"learning_rate": 9.667255738740943e-07, |
|
"loss": 2.8258, |
|
"step": 12760 |
|
}, |
|
{ |
|
"epoch": 0.9450128941019882, |
|
"grad_norm": 0.2967122793197632, |
|
"learning_rate": 9.168629958343334e-07, |
|
"loss": 2.842, |
|
"step": 12780 |
|
}, |
|
{ |
|
"epoch": 0.9464917875199882, |
|
"grad_norm": 0.2722231149673462, |
|
"learning_rate": 8.683087255841881e-07, |
|
"loss": 2.8341, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 0.9479706809379882, |
|
"grad_norm": 0.2952738106250763, |
|
"learning_rate": 8.210640573345474e-07, |
|
"loss": 2.8212, |
|
"step": 12820 |
|
}, |
|
{ |
|
"epoch": 0.9494495743559881, |
|
"grad_norm": 0.27017560601234436, |
|
"learning_rate": 7.751302503889224e-07, |
|
"loss": 2.8123, |
|
"step": 12840 |
|
}, |
|
{ |
|
"epoch": 0.9509284677739881, |
|
"grad_norm": 0.2811236083507538, |
|
"learning_rate": 7.305085291099301e-07, |
|
"loss": 2.8426, |
|
"step": 12860 |
|
}, |
|
{ |
|
"epoch": 0.9524073611919881, |
|
"grad_norm": 0.282913476228714, |
|
"learning_rate": 6.872000828866131e-07, |
|
"loss": 2.8348, |
|
"step": 12880 |
|
}, |
|
{ |
|
"epoch": 0.9538862546099881, |
|
"grad_norm": 0.2759126126766205, |
|
"learning_rate": 6.452060661027548e-07, |
|
"loss": 2.8301, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 0.9553651480279881, |
|
"grad_norm": 0.2853533923625946, |
|
"learning_rate": 6.045275981061138e-07, |
|
"loss": 2.8415, |
|
"step": 12920 |
|
}, |
|
{ |
|
"epoch": 0.956844041445988, |
|
"grad_norm": 0.2731573283672333, |
|
"learning_rate": 5.651657631785878e-07, |
|
"loss": 2.826, |
|
"step": 12940 |
|
}, |
|
{ |
|
"epoch": 0.958322934863988, |
|
"grad_norm": 0.2759709060192108, |
|
"learning_rate": 5.271216105072863e-07, |
|
"loss": 2.8261, |
|
"step": 12960 |
|
}, |
|
{ |
|
"epoch": 0.959801828281988, |
|
"grad_norm": 0.2832717001438141, |
|
"learning_rate": 4.903961541565971e-07, |
|
"loss": 2.8332, |
|
"step": 12980 |
|
}, |
|
{ |
|
"epoch": 0.961280721699988, |
|
"grad_norm": 0.269037127494812, |
|
"learning_rate": 4.5499037304115866e-07, |
|
"loss": 2.8229, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 0.9627596151179879, |
|
"grad_norm": 0.271410197019577, |
|
"learning_rate": 4.2090521089972466e-07, |
|
"loss": 2.8401, |
|
"step": 13020 |
|
}, |
|
{ |
|
"epoch": 0.9642385085359879, |
|
"grad_norm": 0.26483696699142456, |
|
"learning_rate": 3.8814157627005685e-07, |
|
"loss": 2.8376, |
|
"step": 13040 |
|
}, |
|
{ |
|
"epoch": 0.9657174019539879, |
|
"grad_norm": 0.2761934697628021, |
|
"learning_rate": 3.567003424646831e-07, |
|
"loss": 2.8374, |
|
"step": 13060 |
|
}, |
|
{ |
|
"epoch": 0.9671962953719879, |
|
"grad_norm": 0.27471932768821716, |
|
"learning_rate": 3.265823475476215e-07, |
|
"loss": 2.8358, |
|
"step": 13080 |
|
}, |
|
{ |
|
"epoch": 0.9686751887899879, |
|
"grad_norm": 0.27371978759765625, |
|
"learning_rate": 2.97788394312043e-07, |
|
"loss": 2.8289, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 0.9701540822079878, |
|
"grad_norm": 0.2889103889465332, |
|
"learning_rate": 2.7031925025888247e-07, |
|
"loss": 2.8145, |
|
"step": 13120 |
|
}, |
|
{ |
|
"epoch": 0.9716329756259878, |
|
"grad_norm": 0.2687681317329407, |
|
"learning_rate": 2.441756475763668e-07, |
|
"loss": 2.818, |
|
"step": 13140 |
|
}, |
|
{ |
|
"epoch": 0.9731118690439878, |
|
"grad_norm": 0.2686457931995392, |
|
"learning_rate": 2.1935828312050766e-07, |
|
"loss": 2.8344, |
|
"step": 13160 |
|
}, |
|
{ |
|
"epoch": 0.9745907624619878, |
|
"grad_norm": 0.26769590377807617, |
|
"learning_rate": 1.9586781839652235e-07, |
|
"loss": 2.8236, |
|
"step": 13180 |
|
}, |
|
{ |
|
"epoch": 0.9760696558799878, |
|
"grad_norm": 0.27022501826286316, |
|
"learning_rate": 1.737048795412033e-07, |
|
"loss": 2.8307, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 0.9775485492979877, |
|
"grad_norm": 0.2741018533706665, |
|
"learning_rate": 1.5287005730623138e-07, |
|
"loss": 2.8312, |
|
"step": 13220 |
|
}, |
|
{ |
|
"epoch": 0.9790274427159877, |
|
"grad_norm": 0.27768802642822266, |
|
"learning_rate": 1.333639070424164e-07, |
|
"loss": 2.8281, |
|
"step": 13240 |
|
}, |
|
{ |
|
"epoch": 0.9805063361339877, |
|
"grad_norm": 0.26736685633659363, |
|
"learning_rate": 1.1518694868491442e-07, |
|
"loss": 2.8342, |
|
"step": 13260 |
|
}, |
|
{ |
|
"epoch": 0.9819852295519877, |
|
"grad_norm": 0.26495057344436646, |
|
"learning_rate": 9.833966673935546e-08, |
|
"loss": 2.8236, |
|
"step": 13280 |
|
}, |
|
{ |
|
"epoch": 0.9834641229699878, |
|
"grad_norm": 0.27052661776542664, |
|
"learning_rate": 8.282251026893728e-08, |
|
"loss": 2.8214, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 0.9849430163879876, |
|
"grad_norm": 0.2683194875717163, |
|
"learning_rate": 6.863589288244043e-08, |
|
"loss": 2.8468, |
|
"step": 13320 |
|
}, |
|
{ |
|
"epoch": 0.9864219098059877, |
|
"grad_norm": 0.27812352776527405, |
|
"learning_rate": 5.5780192723214884e-08, |
|
"loss": 2.8254, |
|
"step": 13340 |
|
}, |
|
{ |
|
"epoch": 0.9879008032239877, |
|
"grad_norm": 0.2842520773410797, |
|
"learning_rate": 4.425575245911029e-08, |
|
"loss": 2.8273, |
|
"step": 13360 |
|
}, |
|
{ |
|
"epoch": 0.9893796966419877, |
|
"grad_norm": 0.2864263355731964, |
|
"learning_rate": 3.406287927332219e-08, |
|
"loss": 2.8311, |
|
"step": 13380 |
|
}, |
|
{ |
|
"epoch": 0.9908585900599877, |
|
"grad_norm": 0.26490774750709534, |
|
"learning_rate": 2.520184485620969e-08, |
|
"loss": 2.8298, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 0.9923374834779876, |
|
"grad_norm": 0.2666003406047821, |
|
"learning_rate": 1.7672885398067883e-08, |
|
"loss": 2.8303, |
|
"step": 13420 |
|
}, |
|
{ |
|
"epoch": 0.9938163768959876, |
|
"grad_norm": 0.27174392342567444, |
|
"learning_rate": 1.147620158281626e-08, |
|
"loss": 2.8177, |
|
"step": 13440 |
|
}, |
|
{ |
|
"epoch": 0.9952952703139876, |
|
"grad_norm": 0.2677934467792511, |
|
"learning_rate": 6.6119585826529554e-09, |
|
"loss": 2.8123, |
|
"step": 13460 |
|
}, |
|
{ |
|
"epoch": 0.9967741637319876, |
|
"grad_norm": 0.2655700445175171, |
|
"learning_rate": 3.0802860536582876e-09, |
|
"loss": 2.8268, |
|
"step": 13480 |
|
}, |
|
{ |
|
"epoch": 0.9982530571499876, |
|
"grad_norm": 0.2759760022163391, |
|
"learning_rate": 8.812781323253027e-10, |
|
"loss": 2.8247, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 0.9997319505679875, |
|
"grad_norm": 0.2634597718715668, |
|
"learning_rate": 1.4993433072874042e-11, |
|
"loss": 2.831, |
|
"step": 13520 |
|
} |
|
], |
|
"logging_steps": 20, |
|
"max_steps": 13523, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.070897645108016e+19, |
|
"train_batch_size": 12, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|