{ "best_metric": 0.160739004611969, "best_model_checkpoint": "./results_t5_mixed_wiki_cv_arhiv_large/checkpoint-19699", "epoch": 3.0, "eval_steps": 500, "global_step": 59097, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00025381999086248035, "grad_norm": 28009.150390625, "learning_rate": 1.9998307866727585e-05, "loss": 16.9957, "step": 5 }, { "epoch": 0.0005076399817249607, "grad_norm": 8431.59375, "learning_rate": 1.999661573345517e-05, "loss": 15.242, "step": 10 }, { "epoch": 0.000761459972587441, "grad_norm": 13960.4951171875, "learning_rate": 1.9994923600182752e-05, "loss": 14.3643, "step": 15 }, { "epoch": 0.0010152799634499214, "grad_norm": 9187.435546875, "learning_rate": 1.9993231466910336e-05, "loss": 13.9964, "step": 20 }, { "epoch": 0.0012690999543124015, "grad_norm": 11192.2958984375, "learning_rate": 1.999153933363792e-05, "loss": 13.0181, "step": 25 }, { "epoch": 0.001522919945174882, "grad_norm": 10066.05859375, "learning_rate": 1.9989847200365503e-05, "loss": 12.9025, "step": 30 }, { "epoch": 0.0017767399360373623, "grad_norm": 7277.57373046875, "learning_rate": 1.9988155067093086e-05, "loss": 12.5301, "step": 35 }, { "epoch": 0.002030559926899843, "grad_norm": 157497.796875, "learning_rate": 1.998646293382067e-05, "loss": 11.712, "step": 40 }, { "epoch": 0.0022843799177623227, "grad_norm": 2509.368408203125, "learning_rate": 1.9984770800548254e-05, "loss": 11.4352, "step": 45 }, { "epoch": 0.002538199908624803, "grad_norm": 9022.3505859375, "learning_rate": 1.9983078667275837e-05, "loss": 10.8293, "step": 50 }, { "epoch": 0.0027920198994872835, "grad_norm": 2620.181640625, "learning_rate": 1.998138653400342e-05, "loss": 10.5957, "step": 55 }, { "epoch": 0.003045839890349764, "grad_norm": 4354.4189453125, "learning_rate": 1.9979694400731e-05, "loss": 10.201, "step": 60 }, { "epoch": 0.003299659881212244, "grad_norm": 2636.816162109375, "learning_rate": 1.9978002267458588e-05, "loss": 9.8294, "step": 65 }, { "epoch": 0.0035534798720747245, "grad_norm": 2717.7568359375, "learning_rate": 1.997631013418617e-05, "loss": 9.7655, "step": 70 }, { "epoch": 0.003807299862937205, "grad_norm": 1577.282958984375, "learning_rate": 1.9974618000913755e-05, "loss": 9.2661, "step": 75 }, { "epoch": 0.004061119853799686, "grad_norm": 1305.2647705078125, "learning_rate": 1.997292586764134e-05, "loss": 8.5601, "step": 80 }, { "epoch": 0.004314939844662166, "grad_norm": 1029.8795166015625, "learning_rate": 1.997123373436892e-05, "loss": 8.3479, "step": 85 }, { "epoch": 0.0045687598355246455, "grad_norm": 488.6922302246094, "learning_rate": 1.9969541601096505e-05, "loss": 7.9367, "step": 90 }, { "epoch": 0.004822579826387126, "grad_norm": 1308.7169189453125, "learning_rate": 1.996784946782409e-05, "loss": 7.6314, "step": 95 }, { "epoch": 0.005076399817249606, "grad_norm": 1560.9044189453125, "learning_rate": 1.996615733455167e-05, "loss": 7.2683, "step": 100 }, { "epoch": 0.0053302198081120866, "grad_norm": 1124.25390625, "learning_rate": 1.9964465201279256e-05, "loss": 6.7153, "step": 105 }, { "epoch": 0.005584039798974567, "grad_norm": 1180.7884521484375, "learning_rate": 1.9962773068006836e-05, "loss": 6.4866, "step": 110 }, { "epoch": 0.005837859789837047, "grad_norm": 156.94796752929688, "learning_rate": 1.996108093473442e-05, "loss": 5.8496, "step": 115 }, { "epoch": 0.006091679780699528, "grad_norm": 256.8822326660156, "learning_rate": 1.9959388801462007e-05, "loss": 5.4725, "step": 120 }, { "epoch": 0.006345499771562008, "grad_norm": 242.91868591308594, "learning_rate": 1.9957696668189587e-05, "loss": 5.0473, "step": 125 }, { "epoch": 0.006599319762424488, "grad_norm": 140.21670532226562, "learning_rate": 1.9956004534917174e-05, "loss": 4.739, "step": 130 }, { "epoch": 0.006853139753286969, "grad_norm": 2685.30712890625, "learning_rate": 1.9954312401644754e-05, "loss": 4.309, "step": 135 }, { "epoch": 0.007106959744149449, "grad_norm": 51.844295501708984, "learning_rate": 1.9952620268372337e-05, "loss": 4.074, "step": 140 }, { "epoch": 0.007360779735011929, "grad_norm": 15.273852348327637, "learning_rate": 1.9950928135099924e-05, "loss": 3.4298, "step": 145 }, { "epoch": 0.00761459972587441, "grad_norm": 17.046186447143555, "learning_rate": 1.9949236001827505e-05, "loss": 3.1516, "step": 150 }, { "epoch": 0.00786841971673689, "grad_norm": 13.263907432556152, "learning_rate": 1.9947543868555088e-05, "loss": 2.6151, "step": 155 }, { "epoch": 0.008122239707599371, "grad_norm": 17.467811584472656, "learning_rate": 1.994585173528267e-05, "loss": 2.4349, "step": 160 }, { "epoch": 0.00837605969846185, "grad_norm": 26.78594398498535, "learning_rate": 1.9944159602010255e-05, "loss": 2.1512, "step": 165 }, { "epoch": 0.008629879689324332, "grad_norm": 7.074516296386719, "learning_rate": 1.994246746873784e-05, "loss": 1.9379, "step": 170 }, { "epoch": 0.008883699680186811, "grad_norm": 6.19895601272583, "learning_rate": 1.9940775335465422e-05, "loss": 1.7137, "step": 175 }, { "epoch": 0.009137519671049291, "grad_norm": 7.024235725402832, "learning_rate": 1.9939083202193006e-05, "loss": 1.6583, "step": 180 }, { "epoch": 0.009391339661911772, "grad_norm": 5.285205841064453, "learning_rate": 1.993739106892059e-05, "loss": 1.5457, "step": 185 }, { "epoch": 0.009645159652774252, "grad_norm": 4.7672600746154785, "learning_rate": 1.9935698935648173e-05, "loss": 1.423, "step": 190 }, { "epoch": 0.009898979643636733, "grad_norm": 4.843871116638184, "learning_rate": 1.9934006802375756e-05, "loss": 1.4004, "step": 195 }, { "epoch": 0.010152799634499212, "grad_norm": 3.7040324211120605, "learning_rate": 1.993231466910334e-05, "loss": 1.3773, "step": 200 }, { "epoch": 0.010406619625361694, "grad_norm": 3.55208420753479, "learning_rate": 1.9930622535830924e-05, "loss": 1.287, "step": 205 }, { "epoch": 0.010660439616224173, "grad_norm": 4.512004852294922, "learning_rate": 1.9928930402558507e-05, "loss": 1.215, "step": 210 }, { "epoch": 0.010914259607086654, "grad_norm": 3.2358570098876953, "learning_rate": 1.992723826928609e-05, "loss": 1.1807, "step": 215 }, { "epoch": 0.011168079597949134, "grad_norm": 2.9898245334625244, "learning_rate": 1.9925546136013674e-05, "loss": 1.1454, "step": 220 }, { "epoch": 0.011421899588811615, "grad_norm": 3.377270221710205, "learning_rate": 1.9923854002741258e-05, "loss": 1.1211, "step": 225 }, { "epoch": 0.011675719579674095, "grad_norm": 2.5435738563537598, "learning_rate": 1.992216186946884e-05, "loss": 1.0875, "step": 230 }, { "epoch": 0.011929539570536576, "grad_norm": 2.505054473876953, "learning_rate": 1.9920469736196425e-05, "loss": 1.0262, "step": 235 }, { "epoch": 0.012183359561399055, "grad_norm": 2.3626339435577393, "learning_rate": 1.991877760292401e-05, "loss": 0.971, "step": 240 }, { "epoch": 0.012437179552261536, "grad_norm": 2.3419456481933594, "learning_rate": 1.9917085469651592e-05, "loss": 0.9605, "step": 245 }, { "epoch": 0.012690999543124016, "grad_norm": 2.3525550365448, "learning_rate": 1.9915393336379175e-05, "loss": 0.9515, "step": 250 }, { "epoch": 0.012944819533986497, "grad_norm": 2.0995123386383057, "learning_rate": 1.991370120310676e-05, "loss": 0.9169, "step": 255 }, { "epoch": 0.013198639524848977, "grad_norm": 2.3146281242370605, "learning_rate": 1.9912009069834343e-05, "loss": 0.9073, "step": 260 }, { "epoch": 0.013452459515711458, "grad_norm": 2.5654919147491455, "learning_rate": 1.9910316936561923e-05, "loss": 0.8649, "step": 265 }, { "epoch": 0.013706279506573937, "grad_norm": 2.566847562789917, "learning_rate": 1.990862480328951e-05, "loss": 0.8795, "step": 270 }, { "epoch": 0.013960099497436419, "grad_norm": 2.0294430255889893, "learning_rate": 1.9906932670017093e-05, "loss": 0.8524, "step": 275 }, { "epoch": 0.014213919488298898, "grad_norm": 1.80044424533844, "learning_rate": 1.9905240536744677e-05, "loss": 0.8287, "step": 280 }, { "epoch": 0.01446773947916138, "grad_norm": 3.0150132179260254, "learning_rate": 1.990354840347226e-05, "loss": 0.7852, "step": 285 }, { "epoch": 0.014721559470023859, "grad_norm": 1.9330862760543823, "learning_rate": 1.990185627019984e-05, "loss": 0.7896, "step": 290 }, { "epoch": 0.01497537946088634, "grad_norm": 1.4856704473495483, "learning_rate": 1.9900164136927427e-05, "loss": 0.7634, "step": 295 }, { "epoch": 0.01522919945174882, "grad_norm": 2.0876917839050293, "learning_rate": 1.989847200365501e-05, "loss": 0.8272, "step": 300 }, { "epoch": 0.0154830194426113, "grad_norm": 1.916139006614685, "learning_rate": 1.989677987038259e-05, "loss": 0.7789, "step": 305 }, { "epoch": 0.01573683943347378, "grad_norm": 1.5130165815353394, "learning_rate": 1.9895087737110178e-05, "loss": 0.7559, "step": 310 }, { "epoch": 0.01599065942433626, "grad_norm": 1.9628571271896362, "learning_rate": 1.9893395603837758e-05, "loss": 0.7496, "step": 315 }, { "epoch": 0.016244479415198743, "grad_norm": 2.4967174530029297, "learning_rate": 1.9891703470565345e-05, "loss": 0.7925, "step": 320 }, { "epoch": 0.01649829940606122, "grad_norm": 1.711539626121521, "learning_rate": 1.989001133729293e-05, "loss": 0.7409, "step": 325 }, { "epoch": 0.0167521193969237, "grad_norm": 1.7477493286132812, "learning_rate": 1.988831920402051e-05, "loss": 0.7136, "step": 330 }, { "epoch": 0.017005939387786183, "grad_norm": 4.01808500289917, "learning_rate": 1.9886627070748096e-05, "loss": 0.7446, "step": 335 }, { "epoch": 0.017259759378648664, "grad_norm": 2.3399136066436768, "learning_rate": 1.9884934937475676e-05, "loss": 0.8573, "step": 340 }, { "epoch": 0.017513579369511142, "grad_norm": 1.5367169380187988, "learning_rate": 1.988324280420326e-05, "loss": 0.6873, "step": 345 }, { "epoch": 0.017767399360373623, "grad_norm": 3.4695355892181396, "learning_rate": 1.9881550670930846e-05, "loss": 0.7075, "step": 350 }, { "epoch": 0.018021219351236104, "grad_norm": 1.5102581977844238, "learning_rate": 1.9879858537658427e-05, "loss": 0.6935, "step": 355 }, { "epoch": 0.018275039342098582, "grad_norm": 1.7649434804916382, "learning_rate": 1.987816640438601e-05, "loss": 0.6854, "step": 360 }, { "epoch": 0.018528859332961063, "grad_norm": 1.312583088874817, "learning_rate": 1.9876474271113594e-05, "loss": 0.796, "step": 365 }, { "epoch": 0.018782679323823544, "grad_norm": 1.6248362064361572, "learning_rate": 1.9874782137841177e-05, "loss": 0.6442, "step": 370 }, { "epoch": 0.019036499314686026, "grad_norm": 1.6744344234466553, "learning_rate": 1.9873090004568764e-05, "loss": 0.6433, "step": 375 }, { "epoch": 0.019290319305548503, "grad_norm": 1.5160279273986816, "learning_rate": 1.9871397871296344e-05, "loss": 0.6437, "step": 380 }, { "epoch": 0.019544139296410985, "grad_norm": 1.3933593034744263, "learning_rate": 1.9869705738023928e-05, "loss": 0.6469, "step": 385 }, { "epoch": 0.019797959287273466, "grad_norm": 1.2953556776046753, "learning_rate": 1.986801360475151e-05, "loss": 0.6602, "step": 390 }, { "epoch": 0.020051779278135947, "grad_norm": 1.387939214706421, "learning_rate": 1.9866321471479095e-05, "loss": 0.6201, "step": 395 }, { "epoch": 0.020305599268998425, "grad_norm": 1.3470954895019531, "learning_rate": 1.986462933820668e-05, "loss": 0.6238, "step": 400 }, { "epoch": 0.020559419259860906, "grad_norm": 1.760582447052002, "learning_rate": 1.9862937204934262e-05, "loss": 0.6314, "step": 405 }, { "epoch": 0.020813239250723387, "grad_norm": 1.6875195503234863, "learning_rate": 1.9861245071661846e-05, "loss": 0.6141, "step": 410 }, { "epoch": 0.02106705924158587, "grad_norm": 1.4659909009933472, "learning_rate": 1.985955293838943e-05, "loss": 0.6416, "step": 415 }, { "epoch": 0.021320879232448346, "grad_norm": 1.3127461671829224, "learning_rate": 1.9857860805117013e-05, "loss": 0.5959, "step": 420 }, { "epoch": 0.021574699223310827, "grad_norm": 1.2059125900268555, "learning_rate": 1.9856168671844596e-05, "loss": 0.6423, "step": 425 }, { "epoch": 0.02182851921417331, "grad_norm": 1.4507384300231934, "learning_rate": 1.985447653857218e-05, "loss": 0.619, "step": 430 }, { "epoch": 0.02208233920503579, "grad_norm": 1.2196464538574219, "learning_rate": 1.9852784405299763e-05, "loss": 0.6224, "step": 435 }, { "epoch": 0.022336159195898268, "grad_norm": 1.8387898206710815, "learning_rate": 1.9851092272027347e-05, "loss": 0.5911, "step": 440 }, { "epoch": 0.02258997918676075, "grad_norm": 0.9832326769828796, "learning_rate": 1.984940013875493e-05, "loss": 0.5794, "step": 445 }, { "epoch": 0.02284379917762323, "grad_norm": 1.2416331768035889, "learning_rate": 1.9847708005482514e-05, "loss": 0.5693, "step": 450 }, { "epoch": 0.02309761916848571, "grad_norm": 1.0847058296203613, "learning_rate": 1.9846015872210097e-05, "loss": 0.5366, "step": 455 }, { "epoch": 0.02335143915934819, "grad_norm": 1.1567232608795166, "learning_rate": 1.984432373893768e-05, "loss": 0.5673, "step": 460 }, { "epoch": 0.02360525915021067, "grad_norm": 1.3223860263824463, "learning_rate": 1.9842631605665265e-05, "loss": 0.5835, "step": 465 }, { "epoch": 0.02385907914107315, "grad_norm": 1.5882163047790527, "learning_rate": 1.9840939472392848e-05, "loss": 0.5227, "step": 470 }, { "epoch": 0.024112899131935633, "grad_norm": 2.358612537384033, "learning_rate": 1.983924733912043e-05, "loss": 0.5583, "step": 475 }, { "epoch": 0.02436671912279811, "grad_norm": 1.134081244468689, "learning_rate": 1.9837555205848015e-05, "loss": 0.5728, "step": 480 }, { "epoch": 0.02462053911366059, "grad_norm": 1.3474904298782349, "learning_rate": 1.98358630725756e-05, "loss": 0.5207, "step": 485 }, { "epoch": 0.024874359104523073, "grad_norm": 0.914055585861206, "learning_rate": 1.9834170939303182e-05, "loss": 0.5529, "step": 490 }, { "epoch": 0.025128179095385554, "grad_norm": 1.1716861724853516, "learning_rate": 1.9832478806030762e-05, "loss": 0.5516, "step": 495 }, { "epoch": 0.025381999086248032, "grad_norm": 1.6925829648971558, "learning_rate": 1.983078667275835e-05, "loss": 0.537, "step": 500 }, { "epoch": 0.025635819077110513, "grad_norm": 4.091062545776367, "learning_rate": 1.9829094539485933e-05, "loss": 0.6366, "step": 505 }, { "epoch": 0.025889639067972994, "grad_norm": 1.6175552606582642, "learning_rate": 1.9827402406213513e-05, "loss": 0.503, "step": 510 }, { "epoch": 0.026143459058835476, "grad_norm": 1.2992746829986572, "learning_rate": 1.98257102729411e-05, "loss": 0.5253, "step": 515 }, { "epoch": 0.026397279049697953, "grad_norm": 0.9036175012588501, "learning_rate": 1.982401813966868e-05, "loss": 0.532, "step": 520 }, { "epoch": 0.026651099040560434, "grad_norm": 1.241174340248108, "learning_rate": 1.9822326006396267e-05, "loss": 0.5049, "step": 525 }, { "epoch": 0.026904919031422916, "grad_norm": 1.9662011861801147, "learning_rate": 1.982063387312385e-05, "loss": 0.4869, "step": 530 }, { "epoch": 0.027158739022285393, "grad_norm": 1.3847944736480713, "learning_rate": 1.981894173985143e-05, "loss": 0.5257, "step": 535 }, { "epoch": 0.027412559013147875, "grad_norm": 1.0714912414550781, "learning_rate": 1.9817249606579018e-05, "loss": 0.4935, "step": 540 }, { "epoch": 0.027666379004010356, "grad_norm": 1.3018203973770142, "learning_rate": 1.9815557473306598e-05, "loss": 0.4876, "step": 545 }, { "epoch": 0.027920198994872837, "grad_norm": 1.309104084968567, "learning_rate": 1.981386534003418e-05, "loss": 0.5586, "step": 550 }, { "epoch": 0.028174018985735315, "grad_norm": 1.4573897123336792, "learning_rate": 1.9812173206761768e-05, "loss": 0.4816, "step": 555 }, { "epoch": 0.028427838976597796, "grad_norm": 1.175330638885498, "learning_rate": 1.981048107348935e-05, "loss": 0.5306, "step": 560 }, { "epoch": 0.028681658967460277, "grad_norm": 3.1696276664733887, "learning_rate": 1.9808788940216935e-05, "loss": 0.6168, "step": 565 }, { "epoch": 0.02893547895832276, "grad_norm": 1.5825937986373901, "learning_rate": 1.9807096806944516e-05, "loss": 0.5071, "step": 570 }, { "epoch": 0.029189298949185236, "grad_norm": 2.6860151290893555, "learning_rate": 1.98054046736721e-05, "loss": 0.5171, "step": 575 }, { "epoch": 0.029443118940047718, "grad_norm": 1.351448655128479, "learning_rate": 1.9803712540399686e-05, "loss": 0.4982, "step": 580 }, { "epoch": 0.0296969389309102, "grad_norm": 21.1049747467041, "learning_rate": 1.9802020407127266e-05, "loss": 0.5326, "step": 585 }, { "epoch": 0.02995075892177268, "grad_norm": 93.52015686035156, "learning_rate": 1.980032827385485e-05, "loss": 0.4622, "step": 590 }, { "epoch": 0.030204578912635158, "grad_norm": 1.095617651939392, "learning_rate": 1.9798636140582433e-05, "loss": 0.457, "step": 595 }, { "epoch": 0.03045839890349764, "grad_norm": 1.1381208896636963, "learning_rate": 1.9796944007310017e-05, "loss": 0.4665, "step": 600 }, { "epoch": 0.03071221889436012, "grad_norm": 146.4381866455078, "learning_rate": 1.97952518740376e-05, "loss": 0.4723, "step": 605 }, { "epoch": 0.0309660388852226, "grad_norm": 1.255440592765808, "learning_rate": 1.9793559740765184e-05, "loss": 0.4205, "step": 610 }, { "epoch": 0.03121985887608508, "grad_norm": 1.259681224822998, "learning_rate": 1.9791867607492767e-05, "loss": 0.4506, "step": 615 }, { "epoch": 0.03147367886694756, "grad_norm": 1.818813681602478, "learning_rate": 1.979017547422035e-05, "loss": 0.4028, "step": 620 }, { "epoch": 0.03172749885781004, "grad_norm": 1.2193713188171387, "learning_rate": 1.9788483340947935e-05, "loss": 0.4232, "step": 625 }, { "epoch": 0.03198131884867252, "grad_norm": 1.1357531547546387, "learning_rate": 1.9786791207675518e-05, "loss": 0.4146, "step": 630 }, { "epoch": 0.032235138839535, "grad_norm": 37.55704879760742, "learning_rate": 1.97850990744031e-05, "loss": 0.4468, "step": 635 }, { "epoch": 0.032488958830397485, "grad_norm": 1.28550386428833, "learning_rate": 1.9783406941130685e-05, "loss": 0.4205, "step": 640 }, { "epoch": 0.03274277882125996, "grad_norm": 1.4312454462051392, "learning_rate": 1.978171480785827e-05, "loss": 0.4807, "step": 645 }, { "epoch": 0.03299659881212244, "grad_norm": 397.8225402832031, "learning_rate": 1.9780022674585852e-05, "loss": 0.423, "step": 650 }, { "epoch": 0.033250418802984925, "grad_norm": 1.1653971672058105, "learning_rate": 1.9778330541313436e-05, "loss": 0.4284, "step": 655 }, { "epoch": 0.0335042387938474, "grad_norm": 1.3684107065200806, "learning_rate": 1.977663840804102e-05, "loss": 0.4189, "step": 660 }, { "epoch": 0.03375805878470988, "grad_norm": 1.4786688089370728, "learning_rate": 1.9774946274768603e-05, "loss": 0.3919, "step": 665 }, { "epoch": 0.034011878775572366, "grad_norm": 1.0354373455047607, "learning_rate": 1.9773254141496186e-05, "loss": 0.4386, "step": 670 }, { "epoch": 0.03426569876643484, "grad_norm": 2.865295171737671, "learning_rate": 1.977156200822377e-05, "loss": 0.4476, "step": 675 }, { "epoch": 0.03451951875729733, "grad_norm": 0.9796468615531921, "learning_rate": 1.9769869874951354e-05, "loss": 0.3822, "step": 680 }, { "epoch": 0.034773338748159806, "grad_norm": 1.1535001993179321, "learning_rate": 1.9768177741678937e-05, "loss": 0.3935, "step": 685 }, { "epoch": 0.035027158739022284, "grad_norm": 1.33408522605896, "learning_rate": 1.976648560840652e-05, "loss": 0.4267, "step": 690 }, { "epoch": 0.03528097872988477, "grad_norm": 1.2834749221801758, "learning_rate": 1.9764793475134104e-05, "loss": 0.3954, "step": 695 }, { "epoch": 0.035534798720747246, "grad_norm": 0.8446087837219238, "learning_rate": 1.9763101341861684e-05, "loss": 0.3865, "step": 700 }, { "epoch": 0.035788618711609724, "grad_norm": 1.3027830123901367, "learning_rate": 1.976140920858927e-05, "loss": 0.4294, "step": 705 }, { "epoch": 0.03604243870247221, "grad_norm": 0.8204338550567627, "learning_rate": 1.9759717075316855e-05, "loss": 0.4074, "step": 710 }, { "epoch": 0.036296258693334686, "grad_norm": 1.232735276222229, "learning_rate": 1.975802494204444e-05, "loss": 0.413, "step": 715 }, { "epoch": 0.036550078684197164, "grad_norm": 1.436716079711914, "learning_rate": 1.9756332808772022e-05, "loss": 0.4133, "step": 720 }, { "epoch": 0.03680389867505965, "grad_norm": 0.9789804816246033, "learning_rate": 1.9754640675499602e-05, "loss": 0.4078, "step": 725 }, { "epoch": 0.037057718665922126, "grad_norm": 1.6822584867477417, "learning_rate": 1.975294854222719e-05, "loss": 0.4372, "step": 730 }, { "epoch": 0.03731153865678461, "grad_norm": 1.0740268230438232, "learning_rate": 1.9751256408954773e-05, "loss": 0.4162, "step": 735 }, { "epoch": 0.03756535864764709, "grad_norm": 1.06478750705719, "learning_rate": 1.9749564275682353e-05, "loss": 0.4091, "step": 740 }, { "epoch": 0.03781917863850957, "grad_norm": 0.8764176368713379, "learning_rate": 1.974787214240994e-05, "loss": 0.4438, "step": 745 }, { "epoch": 0.03807299862937205, "grad_norm": 0.9054062366485596, "learning_rate": 1.974618000913752e-05, "loss": 0.4423, "step": 750 }, { "epoch": 0.03832681862023453, "grad_norm": 1.0033007860183716, "learning_rate": 1.9744487875865103e-05, "loss": 0.4017, "step": 755 }, { "epoch": 0.03858063861109701, "grad_norm": 1.1299196481704712, "learning_rate": 1.974279574259269e-05, "loss": 0.3955, "step": 760 }, { "epoch": 0.03883445860195949, "grad_norm": 1.2052655220031738, "learning_rate": 1.974110360932027e-05, "loss": 0.3745, "step": 765 }, { "epoch": 0.03908827859282197, "grad_norm": 3.022794246673584, "learning_rate": 1.9739411476047857e-05, "loss": 0.3888, "step": 770 }, { "epoch": 0.039342098583684454, "grad_norm": 5.622093200683594, "learning_rate": 1.9737719342775437e-05, "loss": 0.379, "step": 775 }, { "epoch": 0.03959591857454693, "grad_norm": 1.1566879749298096, "learning_rate": 1.973602720950302e-05, "loss": 0.3739, "step": 780 }, { "epoch": 0.03984973856540941, "grad_norm": 2.6651649475097656, "learning_rate": 1.9734335076230608e-05, "loss": 0.3838, "step": 785 }, { "epoch": 0.040103558556271894, "grad_norm": 1.4303189516067505, "learning_rate": 1.9732642942958188e-05, "loss": 0.4171, "step": 790 }, { "epoch": 0.04035737854713437, "grad_norm": 0.97078937292099, "learning_rate": 1.973095080968577e-05, "loss": 0.3513, "step": 795 }, { "epoch": 0.04061119853799685, "grad_norm": 0.9570413827896118, "learning_rate": 1.9729258676413355e-05, "loss": 0.3702, "step": 800 }, { "epoch": 0.040865018528859334, "grad_norm": 1.7016774415969849, "learning_rate": 1.972756654314094e-05, "loss": 0.3608, "step": 805 }, { "epoch": 0.04111883851972181, "grad_norm": 1.1286097764968872, "learning_rate": 1.9725874409868522e-05, "loss": 0.3915, "step": 810 }, { "epoch": 0.0413726585105843, "grad_norm": 1.0915040969848633, "learning_rate": 1.9724182276596106e-05, "loss": 0.3501, "step": 815 }, { "epoch": 0.041626478501446774, "grad_norm": 3.019122362136841, "learning_rate": 1.972249014332369e-05, "loss": 0.3413, "step": 820 }, { "epoch": 0.04188029849230925, "grad_norm": 1.6340800523757935, "learning_rate": 1.9720798010051273e-05, "loss": 0.3902, "step": 825 }, { "epoch": 0.04213411848317174, "grad_norm": 4.086325645446777, "learning_rate": 1.9719105876778856e-05, "loss": 0.4549, "step": 830 }, { "epoch": 0.042387938474034215, "grad_norm": 1.0035637617111206, "learning_rate": 1.971741374350644e-05, "loss": 0.3406, "step": 835 }, { "epoch": 0.04264175846489669, "grad_norm": 0.8659387230873108, "learning_rate": 1.9715721610234024e-05, "loss": 0.3907, "step": 840 }, { "epoch": 0.04289557845575918, "grad_norm": 1.208949327468872, "learning_rate": 1.9714029476961607e-05, "loss": 0.3448, "step": 845 }, { "epoch": 0.043149398446621655, "grad_norm": 1.0771589279174805, "learning_rate": 1.971233734368919e-05, "loss": 0.3617, "step": 850 }, { "epoch": 0.04340321843748414, "grad_norm": 1.216821312904358, "learning_rate": 1.9710645210416774e-05, "loss": 0.3516, "step": 855 }, { "epoch": 0.04365703842834662, "grad_norm": 0.8121851086616516, "learning_rate": 1.9708953077144358e-05, "loss": 0.3514, "step": 860 }, { "epoch": 0.043910858419209095, "grad_norm": 0.9845564961433411, "learning_rate": 1.970726094387194e-05, "loss": 0.3626, "step": 865 }, { "epoch": 0.04416467841007158, "grad_norm": 276.0869140625, "learning_rate": 1.9705568810599525e-05, "loss": 0.3701, "step": 870 }, { "epoch": 0.04441849840093406, "grad_norm": 0.9632354974746704, "learning_rate": 1.970387667732711e-05, "loss": 0.3553, "step": 875 }, { "epoch": 0.044672318391796535, "grad_norm": 1.2075852155685425, "learning_rate": 1.9702184544054692e-05, "loss": 0.3355, "step": 880 }, { "epoch": 0.04492613838265902, "grad_norm": 1.1684023141860962, "learning_rate": 1.9700492410782275e-05, "loss": 0.3593, "step": 885 }, { "epoch": 0.0451799583735215, "grad_norm": 1.093794345855713, "learning_rate": 1.969880027750986e-05, "loss": 0.3217, "step": 890 }, { "epoch": 0.045433778364383975, "grad_norm": 0.8607542514801025, "learning_rate": 1.9697108144237443e-05, "loss": 0.3595, "step": 895 }, { "epoch": 0.04568759835524646, "grad_norm": 1.0586198568344116, "learning_rate": 1.9695416010965026e-05, "loss": 0.3402, "step": 900 }, { "epoch": 0.04594141834610894, "grad_norm": 1.3861478567123413, "learning_rate": 1.9693723877692606e-05, "loss": 0.3389, "step": 905 }, { "epoch": 0.04619523833697142, "grad_norm": 1.1892589330673218, "learning_rate": 1.9692031744420193e-05, "loss": 0.3371, "step": 910 }, { "epoch": 0.0464490583278339, "grad_norm": 1.0808658599853516, "learning_rate": 1.9690339611147777e-05, "loss": 0.3632, "step": 915 }, { "epoch": 0.04670287831869638, "grad_norm": 6.600335121154785, "learning_rate": 1.968864747787536e-05, "loss": 0.3464, "step": 920 }, { "epoch": 0.04695669830955886, "grad_norm": 0.9508718252182007, "learning_rate": 1.9686955344602944e-05, "loss": 0.3804, "step": 925 }, { "epoch": 0.04721051830042134, "grad_norm": 0.8976243734359741, "learning_rate": 1.9685263211330524e-05, "loss": 0.3379, "step": 930 }, { "epoch": 0.04746433829128382, "grad_norm": 0.816540539264679, "learning_rate": 1.968357107805811e-05, "loss": 0.3479, "step": 935 }, { "epoch": 0.0477181582821463, "grad_norm": 1.652158498764038, "learning_rate": 1.9681878944785694e-05, "loss": 0.3458, "step": 940 }, { "epoch": 0.04797197827300878, "grad_norm": 0.913906991481781, "learning_rate": 1.9680186811513275e-05, "loss": 0.3456, "step": 945 }, { "epoch": 0.048225798263871265, "grad_norm": 1.0308032035827637, "learning_rate": 1.967849467824086e-05, "loss": 0.3384, "step": 950 }, { "epoch": 0.04847961825473374, "grad_norm": 1.0393309593200684, "learning_rate": 1.9676802544968442e-05, "loss": 0.3399, "step": 955 }, { "epoch": 0.04873343824559622, "grad_norm": 0.9788406491279602, "learning_rate": 1.967511041169603e-05, "loss": 0.3387, "step": 960 }, { "epoch": 0.048987258236458706, "grad_norm": 1.0427477359771729, "learning_rate": 1.9673418278423612e-05, "loss": 0.3398, "step": 965 }, { "epoch": 0.04924107822732118, "grad_norm": 0.898420512676239, "learning_rate": 1.9671726145151192e-05, "loss": 0.3273, "step": 970 }, { "epoch": 0.04949489821818366, "grad_norm": 1.0824823379516602, "learning_rate": 1.967003401187878e-05, "loss": 0.3541, "step": 975 }, { "epoch": 0.049748718209046146, "grad_norm": 0.8971902132034302, "learning_rate": 1.966834187860636e-05, "loss": 0.304, "step": 980 }, { "epoch": 0.050002538199908624, "grad_norm": 2.5218119621276855, "learning_rate": 1.9666649745333943e-05, "loss": 0.3209, "step": 985 }, { "epoch": 0.05025635819077111, "grad_norm": 0.8553778529167175, "learning_rate": 1.9664957612061527e-05, "loss": 0.3208, "step": 990 }, { "epoch": 0.050510178181633586, "grad_norm": 1.212137222290039, "learning_rate": 1.966326547878911e-05, "loss": 0.3412, "step": 995 }, { "epoch": 0.050763998172496064, "grad_norm": 0.7518376708030701, "learning_rate": 1.9661573345516694e-05, "loss": 0.4053, "step": 1000 }, { "epoch": 0.05101781816335855, "grad_norm": 0.9260375499725342, "learning_rate": 1.9659881212244277e-05, "loss": 0.3254, "step": 1005 }, { "epoch": 0.051271638154221026, "grad_norm": 1.103661060333252, "learning_rate": 1.965818907897186e-05, "loss": 0.3211, "step": 1010 }, { "epoch": 0.051525458145083504, "grad_norm": 1.1954058408737183, "learning_rate": 1.9656496945699444e-05, "loss": 0.3486, "step": 1015 }, { "epoch": 0.05177927813594599, "grad_norm": 0.992374837398529, "learning_rate": 1.9654804812427028e-05, "loss": 0.3211, "step": 1020 }, { "epoch": 0.052033098126808466, "grad_norm": 4.1818413734436035, "learning_rate": 1.965311267915461e-05, "loss": 0.3095, "step": 1025 }, { "epoch": 0.05228691811767095, "grad_norm": 1.0134354829788208, "learning_rate": 1.9651420545882195e-05, "loss": 0.3281, "step": 1030 }, { "epoch": 0.05254073810853343, "grad_norm": 0.9592474102973938, "learning_rate": 1.964972841260978e-05, "loss": 0.3289, "step": 1035 }, { "epoch": 0.05279455809939591, "grad_norm": 0.8658552765846252, "learning_rate": 1.9648036279337362e-05, "loss": 0.3318, "step": 1040 }, { "epoch": 0.05304837809025839, "grad_norm": 0.9752548933029175, "learning_rate": 1.9646344146064946e-05, "loss": 0.3342, "step": 1045 }, { "epoch": 0.05330219808112087, "grad_norm": 1.1210874319076538, "learning_rate": 1.964465201279253e-05, "loss": 0.2928, "step": 1050 }, { "epoch": 0.05355601807198335, "grad_norm": 0.7496079206466675, "learning_rate": 1.9642959879520113e-05, "loss": 0.3433, "step": 1055 }, { "epoch": 0.05380983806284583, "grad_norm": 1.119456171989441, "learning_rate": 1.9641267746247696e-05, "loss": 0.3038, "step": 1060 }, { "epoch": 0.05406365805370831, "grad_norm": 0.9271594882011414, "learning_rate": 1.963957561297528e-05, "loss": 0.3138, "step": 1065 }, { "epoch": 0.05431747804457079, "grad_norm": 1.0885213613510132, "learning_rate": 1.9637883479702863e-05, "loss": 0.3176, "step": 1070 }, { "epoch": 0.05457129803543327, "grad_norm": 1.075253963470459, "learning_rate": 1.9636191346430447e-05, "loss": 0.2996, "step": 1075 }, { "epoch": 0.05482511802629575, "grad_norm": 0.9437046647071838, "learning_rate": 1.963449921315803e-05, "loss": 0.3203, "step": 1080 }, { "epoch": 0.055078938017158234, "grad_norm": 0.8787136077880859, "learning_rate": 1.9632807079885614e-05, "loss": 0.3017, "step": 1085 }, { "epoch": 0.05533275800802071, "grad_norm": 0.9308958649635315, "learning_rate": 1.9631114946613197e-05, "loss": 0.3023, "step": 1090 }, { "epoch": 0.05558657799888319, "grad_norm": 0.8047986626625061, "learning_rate": 1.962942281334078e-05, "loss": 0.3042, "step": 1095 }, { "epoch": 0.055840397989745674, "grad_norm": 0.6756587028503418, "learning_rate": 1.9627730680068364e-05, "loss": 0.2961, "step": 1100 }, { "epoch": 0.05609421798060815, "grad_norm": 1.1307016611099243, "learning_rate": 1.9626038546795948e-05, "loss": 0.3447, "step": 1105 }, { "epoch": 0.05634803797147063, "grad_norm": 0.9675906896591187, "learning_rate": 1.962434641352353e-05, "loss": 0.3325, "step": 1110 }, { "epoch": 0.056601857962333114, "grad_norm": 0.9537900686264038, "learning_rate": 1.9622654280251115e-05, "loss": 0.3241, "step": 1115 }, { "epoch": 0.05685567795319559, "grad_norm": 0.9516997337341309, "learning_rate": 1.96209621469787e-05, "loss": 0.3301, "step": 1120 }, { "epoch": 0.05710949794405808, "grad_norm": 1.157469391822815, "learning_rate": 1.9619270013706282e-05, "loss": 0.306, "step": 1125 }, { "epoch": 0.057363317934920555, "grad_norm": 0.7865618467330933, "learning_rate": 1.9617577880433866e-05, "loss": 0.3097, "step": 1130 }, { "epoch": 0.05761713792578303, "grad_norm": 0.8256521821022034, "learning_rate": 1.9615885747161446e-05, "loss": 0.3012, "step": 1135 }, { "epoch": 0.05787095791664552, "grad_norm": 0.7554883360862732, "learning_rate": 1.9614193613889033e-05, "loss": 0.3226, "step": 1140 }, { "epoch": 0.058124777907507995, "grad_norm": 0.8436192870140076, "learning_rate": 1.9612501480616616e-05, "loss": 0.3068, "step": 1145 }, { "epoch": 0.05837859789837047, "grad_norm": 1.5259509086608887, "learning_rate": 1.9610809347344197e-05, "loss": 0.3158, "step": 1150 }, { "epoch": 0.05863241788923296, "grad_norm": 1.0376449823379517, "learning_rate": 1.9609117214071783e-05, "loss": 0.2944, "step": 1155 }, { "epoch": 0.058886237880095435, "grad_norm": 0.9757311344146729, "learning_rate": 1.9607425080799364e-05, "loss": 0.3029, "step": 1160 }, { "epoch": 0.05914005787095792, "grad_norm": 1.0501054525375366, "learning_rate": 1.960573294752695e-05, "loss": 0.2927, "step": 1165 }, { "epoch": 0.0593938778618204, "grad_norm": 0.7940589189529419, "learning_rate": 1.9604040814254534e-05, "loss": 0.3092, "step": 1170 }, { "epoch": 0.059647697852682875, "grad_norm": 1.0338493585586548, "learning_rate": 1.9602348680982114e-05, "loss": 0.3086, "step": 1175 }, { "epoch": 0.05990151784354536, "grad_norm": 0.8323164582252502, "learning_rate": 1.96006565477097e-05, "loss": 0.2797, "step": 1180 }, { "epoch": 0.06015533783440784, "grad_norm": 1.0480012893676758, "learning_rate": 1.959896441443728e-05, "loss": 0.2908, "step": 1185 }, { "epoch": 0.060409157825270315, "grad_norm": 0.8771175742149353, "learning_rate": 1.9597272281164865e-05, "loss": 0.2945, "step": 1190 }, { "epoch": 0.0606629778161328, "grad_norm": 0.968286395072937, "learning_rate": 1.959558014789245e-05, "loss": 0.2989, "step": 1195 }, { "epoch": 0.06091679780699528, "grad_norm": 1.787429690361023, "learning_rate": 1.9593888014620032e-05, "loss": 0.3255, "step": 1200 }, { "epoch": 0.06117061779785776, "grad_norm": 1.2332099676132202, "learning_rate": 1.959219588134762e-05, "loss": 0.2937, "step": 1205 }, { "epoch": 0.06142443778872024, "grad_norm": 0.9430225491523743, "learning_rate": 1.95905037480752e-05, "loss": 0.309, "step": 1210 }, { "epoch": 0.06167825777958272, "grad_norm": 0.9462845921516418, "learning_rate": 1.9588811614802783e-05, "loss": 0.3156, "step": 1215 }, { "epoch": 0.0619320777704452, "grad_norm": 0.9460931420326233, "learning_rate": 1.9587119481530366e-05, "loss": 0.2851, "step": 1220 }, { "epoch": 0.06218589776130768, "grad_norm": 0.8023846745491028, "learning_rate": 1.958542734825795e-05, "loss": 0.304, "step": 1225 }, { "epoch": 0.06243971775217016, "grad_norm": 1.055830955505371, "learning_rate": 1.9583735214985533e-05, "loss": 0.3123, "step": 1230 }, { "epoch": 0.06269353774303264, "grad_norm": 0.7749910354614258, "learning_rate": 1.9582043081713117e-05, "loss": 0.3202, "step": 1235 }, { "epoch": 0.06294735773389512, "grad_norm": 1.0144844055175781, "learning_rate": 1.95803509484407e-05, "loss": 0.2781, "step": 1240 }, { "epoch": 0.0632011777247576, "grad_norm": 0.9046682715415955, "learning_rate": 1.9578658815168284e-05, "loss": 0.3233, "step": 1245 }, { "epoch": 0.06345499771562008, "grad_norm": 71.55082702636719, "learning_rate": 1.9576966681895867e-05, "loss": 0.2724, "step": 1250 }, { "epoch": 0.06370881770648257, "grad_norm": 0.8242619037628174, "learning_rate": 1.957527454862345e-05, "loss": 0.2722, "step": 1255 }, { "epoch": 0.06396263769734505, "grad_norm": 1.0677359104156494, "learning_rate": 1.9573582415351035e-05, "loss": 0.3172, "step": 1260 }, { "epoch": 0.06421645768820752, "grad_norm": 0.8488295078277588, "learning_rate": 1.9571890282078618e-05, "loss": 0.2966, "step": 1265 }, { "epoch": 0.06447027767907, "grad_norm": 0.703192949295044, "learning_rate": 1.95701981488062e-05, "loss": 0.3108, "step": 1270 }, { "epoch": 0.06472409766993248, "grad_norm": 0.8458622694015503, "learning_rate": 1.9568506015533785e-05, "loss": 0.2884, "step": 1275 }, { "epoch": 0.06497791766079497, "grad_norm": 0.9391268491744995, "learning_rate": 1.956681388226137e-05, "loss": 0.293, "step": 1280 }, { "epoch": 0.06523173765165745, "grad_norm": 1.0053985118865967, "learning_rate": 1.9565121748988952e-05, "loss": 0.3244, "step": 1285 }, { "epoch": 0.06548555764251993, "grad_norm": 0.8404228687286377, "learning_rate": 1.9563429615716536e-05, "loss": 0.2932, "step": 1290 }, { "epoch": 0.0657393776333824, "grad_norm": 1.3393089771270752, "learning_rate": 1.956173748244412e-05, "loss": 0.3064, "step": 1295 }, { "epoch": 0.06599319762424488, "grad_norm": 0.6775000691413879, "learning_rate": 1.9560045349171703e-05, "loss": 0.2931, "step": 1300 }, { "epoch": 0.06624701761510736, "grad_norm": 0.9884696006774902, "learning_rate": 1.9558353215899286e-05, "loss": 0.285, "step": 1305 }, { "epoch": 0.06650083760596985, "grad_norm": 1.0637091398239136, "learning_rate": 1.955666108262687e-05, "loss": 0.2468, "step": 1310 }, { "epoch": 0.06675465759683233, "grad_norm": 0.8244103193283081, "learning_rate": 1.9554968949354454e-05, "loss": 0.2582, "step": 1315 }, { "epoch": 0.0670084775876948, "grad_norm": 0.6568355560302734, "learning_rate": 1.9553276816082037e-05, "loss": 0.2774, "step": 1320 }, { "epoch": 0.06726229757855728, "grad_norm": 1.0442551374435425, "learning_rate": 1.955158468280962e-05, "loss": 0.277, "step": 1325 }, { "epoch": 0.06751611756941976, "grad_norm": 0.797654390335083, "learning_rate": 1.9549892549537204e-05, "loss": 0.2658, "step": 1330 }, { "epoch": 0.06776993756028225, "grad_norm": 0.8469611406326294, "learning_rate": 1.9548200416264788e-05, "loss": 0.319, "step": 1335 }, { "epoch": 0.06802375755114473, "grad_norm": 38.28089141845703, "learning_rate": 1.9546508282992368e-05, "loss": 0.268, "step": 1340 }, { "epoch": 0.06827757754200721, "grad_norm": 0.961740255355835, "learning_rate": 1.9544816149719955e-05, "loss": 0.3031, "step": 1345 }, { "epoch": 0.06853139753286969, "grad_norm": 0.9473285675048828, "learning_rate": 1.954312401644754e-05, "loss": 0.2707, "step": 1350 }, { "epoch": 0.06878521752373216, "grad_norm": 1.0564128160476685, "learning_rate": 1.9541431883175122e-05, "loss": 0.2747, "step": 1355 }, { "epoch": 0.06903903751459466, "grad_norm": 1.3247959613800049, "learning_rate": 1.9539739749902705e-05, "loss": 0.2801, "step": 1360 }, { "epoch": 0.06929285750545713, "grad_norm": 1.2612837553024292, "learning_rate": 1.9538047616630286e-05, "loss": 0.314, "step": 1365 }, { "epoch": 0.06954667749631961, "grad_norm": 0.8873483538627625, "learning_rate": 1.9536355483357873e-05, "loss": 0.2818, "step": 1370 }, { "epoch": 0.06980049748718209, "grad_norm": 1.1142306327819824, "learning_rate": 1.9534663350085453e-05, "loss": 0.2964, "step": 1375 }, { "epoch": 0.07005431747804457, "grad_norm": 0.7834559679031372, "learning_rate": 1.9532971216813036e-05, "loss": 0.3129, "step": 1380 }, { "epoch": 0.07030813746890704, "grad_norm": 0.6153595447540283, "learning_rate": 1.9531279083540623e-05, "loss": 0.2775, "step": 1385 }, { "epoch": 0.07056195745976954, "grad_norm": 0.9495353102684021, "learning_rate": 1.9529586950268203e-05, "loss": 0.2902, "step": 1390 }, { "epoch": 0.07081577745063201, "grad_norm": 1.0067836046218872, "learning_rate": 1.9527894816995787e-05, "loss": 0.2918, "step": 1395 }, { "epoch": 0.07106959744149449, "grad_norm": 1.068027377128601, "learning_rate": 1.952620268372337e-05, "loss": 0.2845, "step": 1400 }, { "epoch": 0.07132341743235697, "grad_norm": 1.1914137601852417, "learning_rate": 1.9524510550450954e-05, "loss": 0.2867, "step": 1405 }, { "epoch": 0.07157723742321945, "grad_norm": 0.9969173669815063, "learning_rate": 1.952281841717854e-05, "loss": 0.2888, "step": 1410 }, { "epoch": 0.07183105741408194, "grad_norm": 0.8332482576370239, "learning_rate": 1.952112628390612e-05, "loss": 0.2833, "step": 1415 }, { "epoch": 0.07208487740494442, "grad_norm": 1.3746370077133179, "learning_rate": 1.9519434150633705e-05, "loss": 0.2867, "step": 1420 }, { "epoch": 0.0723386973958069, "grad_norm": 0.6186951398849487, "learning_rate": 1.9517742017361288e-05, "loss": 0.2754, "step": 1425 }, { "epoch": 0.07259251738666937, "grad_norm": 0.668218195438385, "learning_rate": 1.951604988408887e-05, "loss": 0.2538, "step": 1430 }, { "epoch": 0.07284633737753185, "grad_norm": 0.900421679019928, "learning_rate": 1.9514357750816455e-05, "loss": 0.2657, "step": 1435 }, { "epoch": 0.07310015736839433, "grad_norm": 0.7691309452056885, "learning_rate": 1.951266561754404e-05, "loss": 0.3226, "step": 1440 }, { "epoch": 0.07335397735925682, "grad_norm": 0.9059398770332336, "learning_rate": 1.9510973484271622e-05, "loss": 0.3005, "step": 1445 }, { "epoch": 0.0736077973501193, "grad_norm": 1.2089287042617798, "learning_rate": 1.9509281350999206e-05, "loss": 0.2816, "step": 1450 }, { "epoch": 0.07386161734098178, "grad_norm": 0.8258754014968872, "learning_rate": 1.950758921772679e-05, "loss": 0.2874, "step": 1455 }, { "epoch": 0.07411543733184425, "grad_norm": 0.7340827584266663, "learning_rate": 1.9505897084454373e-05, "loss": 0.2884, "step": 1460 }, { "epoch": 0.07436925732270673, "grad_norm": 0.741913914680481, "learning_rate": 1.9504204951181956e-05, "loss": 0.2929, "step": 1465 }, { "epoch": 0.07462307731356922, "grad_norm": 0.7973793745040894, "learning_rate": 1.950251281790954e-05, "loss": 0.2963, "step": 1470 }, { "epoch": 0.0748768973044317, "grad_norm": 0.9016576409339905, "learning_rate": 1.9500820684637124e-05, "loss": 0.265, "step": 1475 }, { "epoch": 0.07513071729529418, "grad_norm": 1.0395702123641968, "learning_rate": 1.9499128551364707e-05, "loss": 0.3539, "step": 1480 }, { "epoch": 0.07538453728615666, "grad_norm": 0.7906758189201355, "learning_rate": 1.949743641809229e-05, "loss": 0.2769, "step": 1485 }, { "epoch": 0.07563835727701913, "grad_norm": 0.7581782341003418, "learning_rate": 1.9495744284819874e-05, "loss": 0.2911, "step": 1490 }, { "epoch": 0.07589217726788162, "grad_norm": 0.9939795732498169, "learning_rate": 1.9494052151547458e-05, "loss": 0.2759, "step": 1495 }, { "epoch": 0.0761459972587441, "grad_norm": 0.7973515391349792, "learning_rate": 1.949236001827504e-05, "loss": 0.2673, "step": 1500 }, { "epoch": 0.07639981724960658, "grad_norm": 0.793901264667511, "learning_rate": 1.9490667885002625e-05, "loss": 0.2643, "step": 1505 }, { "epoch": 0.07665363724046906, "grad_norm": 0.9730905294418335, "learning_rate": 1.948897575173021e-05, "loss": 0.2505, "step": 1510 }, { "epoch": 0.07690745723133154, "grad_norm": 0.7291544079780579, "learning_rate": 1.9487283618457792e-05, "loss": 0.2337, "step": 1515 }, { "epoch": 0.07716127722219401, "grad_norm": 0.836552083492279, "learning_rate": 1.9485591485185375e-05, "loss": 0.274, "step": 1520 }, { "epoch": 0.0774150972130565, "grad_norm": 0.7705031037330627, "learning_rate": 1.948389935191296e-05, "loss": 0.2706, "step": 1525 }, { "epoch": 0.07766891720391898, "grad_norm": 0.6896925568580627, "learning_rate": 1.9482207218640543e-05, "loss": 0.2807, "step": 1530 }, { "epoch": 0.07792273719478146, "grad_norm": 1.161415696144104, "learning_rate": 1.9480515085368126e-05, "loss": 0.2574, "step": 1535 }, { "epoch": 0.07817655718564394, "grad_norm": 1.0795843601226807, "learning_rate": 1.947882295209571e-05, "loss": 0.2574, "step": 1540 }, { "epoch": 0.07843037717650642, "grad_norm": 0.6460023522377014, "learning_rate": 1.947713081882329e-05, "loss": 0.2794, "step": 1545 }, { "epoch": 0.07868419716736891, "grad_norm": 0.9282433390617371, "learning_rate": 1.9475438685550877e-05, "loss": 0.2564, "step": 1550 }, { "epoch": 0.07893801715823139, "grad_norm": 0.9009420871734619, "learning_rate": 1.9473746552278457e-05, "loss": 0.2607, "step": 1555 }, { "epoch": 0.07919183714909386, "grad_norm": 0.8371971845626831, "learning_rate": 1.9472054419006044e-05, "loss": 0.2732, "step": 1560 }, { "epoch": 0.07944565713995634, "grad_norm": 0.9188066124916077, "learning_rate": 1.9470362285733627e-05, "loss": 0.2457, "step": 1565 }, { "epoch": 0.07969947713081882, "grad_norm": 0.7665749788284302, "learning_rate": 1.9468670152461208e-05, "loss": 0.2373, "step": 1570 }, { "epoch": 0.07995329712168131, "grad_norm": 1.9648408889770508, "learning_rate": 1.9466978019188794e-05, "loss": 0.2852, "step": 1575 }, { "epoch": 0.08020711711254379, "grad_norm": 0.8872603178024292, "learning_rate": 1.9465285885916375e-05, "loss": 0.3144, "step": 1580 }, { "epoch": 0.08046093710340627, "grad_norm": 1.1950451135635376, "learning_rate": 1.9463593752643958e-05, "loss": 0.2723, "step": 1585 }, { "epoch": 0.08071475709426874, "grad_norm": 0.8541845083236694, "learning_rate": 1.9461901619371545e-05, "loss": 0.2556, "step": 1590 }, { "epoch": 0.08096857708513122, "grad_norm": 0.8095240592956543, "learning_rate": 1.9460209486099125e-05, "loss": 0.2707, "step": 1595 }, { "epoch": 0.0812223970759937, "grad_norm": 0.8487701416015625, "learning_rate": 1.9458517352826712e-05, "loss": 0.2712, "step": 1600 }, { "epoch": 0.08147621706685619, "grad_norm": 1.3312042951583862, "learning_rate": 1.9456825219554292e-05, "loss": 0.2592, "step": 1605 }, { "epoch": 0.08173003705771867, "grad_norm": 0.7110800743103027, "learning_rate": 1.9455133086281876e-05, "loss": 0.2652, "step": 1610 }, { "epoch": 0.08198385704858115, "grad_norm": 0.933272659778595, "learning_rate": 1.9453440953009463e-05, "loss": 0.3137, "step": 1615 }, { "epoch": 0.08223767703944362, "grad_norm": 1.1123526096343994, "learning_rate": 1.9451748819737043e-05, "loss": 0.2681, "step": 1620 }, { "epoch": 0.0824914970303061, "grad_norm": 1.181645393371582, "learning_rate": 1.9450056686464627e-05, "loss": 0.2715, "step": 1625 }, { "epoch": 0.0827453170211686, "grad_norm": 1.169384241104126, "learning_rate": 1.944836455319221e-05, "loss": 0.2534, "step": 1630 }, { "epoch": 0.08299913701203107, "grad_norm": 0.8438488245010376, "learning_rate": 1.9446672419919794e-05, "loss": 0.2797, "step": 1635 }, { "epoch": 0.08325295700289355, "grad_norm": 0.6939857602119446, "learning_rate": 1.9444980286647377e-05, "loss": 0.2649, "step": 1640 }, { "epoch": 0.08350677699375603, "grad_norm": 0.803497314453125, "learning_rate": 1.944328815337496e-05, "loss": 0.2699, "step": 1645 }, { "epoch": 0.0837605969846185, "grad_norm": 0.6957250237464905, "learning_rate": 1.9441596020102544e-05, "loss": 0.2411, "step": 1650 }, { "epoch": 0.08401441697548098, "grad_norm": 0.8274784684181213, "learning_rate": 1.9439903886830128e-05, "loss": 0.2345, "step": 1655 }, { "epoch": 0.08426823696634347, "grad_norm": 0.9105510711669922, "learning_rate": 1.943821175355771e-05, "loss": 0.2779, "step": 1660 }, { "epoch": 0.08452205695720595, "grad_norm": 0.8826274275779724, "learning_rate": 1.9436519620285295e-05, "loss": 0.2501, "step": 1665 }, { "epoch": 0.08477587694806843, "grad_norm": 1.227468729019165, "learning_rate": 1.943482748701288e-05, "loss": 0.2628, "step": 1670 }, { "epoch": 0.08502969693893091, "grad_norm": 0.8807783722877502, "learning_rate": 1.9433135353740462e-05, "loss": 0.319, "step": 1675 }, { "epoch": 0.08528351692979338, "grad_norm": 0.829677164554596, "learning_rate": 1.9431443220468045e-05, "loss": 0.2426, "step": 1680 }, { "epoch": 0.08553733692065588, "grad_norm": 0.7359742522239685, "learning_rate": 1.942975108719563e-05, "loss": 0.2444, "step": 1685 }, { "epoch": 0.08579115691151835, "grad_norm": 0.8583529591560364, "learning_rate": 1.9428058953923213e-05, "loss": 0.2667, "step": 1690 }, { "epoch": 0.08604497690238083, "grad_norm": 0.8555963039398193, "learning_rate": 1.9426366820650796e-05, "loss": 0.2694, "step": 1695 }, { "epoch": 0.08629879689324331, "grad_norm": 0.7486214637756348, "learning_rate": 1.942467468737838e-05, "loss": 0.2464, "step": 1700 }, { "epoch": 0.08655261688410579, "grad_norm": 0.9317395687103271, "learning_rate": 1.9422982554105963e-05, "loss": 0.2421, "step": 1705 }, { "epoch": 0.08680643687496828, "grad_norm": 1.1423553228378296, "learning_rate": 1.9421290420833547e-05, "loss": 0.2773, "step": 1710 }, { "epoch": 0.08706025686583076, "grad_norm": 0.7504492402076721, "learning_rate": 1.941959828756113e-05, "loss": 0.2531, "step": 1715 }, { "epoch": 0.08731407685669323, "grad_norm": 0.7574618458747864, "learning_rate": 1.9417906154288714e-05, "loss": 0.2537, "step": 1720 }, { "epoch": 0.08756789684755571, "grad_norm": 0.7793534994125366, "learning_rate": 1.9416214021016297e-05, "loss": 0.2856, "step": 1725 }, { "epoch": 0.08782171683841819, "grad_norm": 0.7327775955200195, "learning_rate": 1.941452188774388e-05, "loss": 0.2499, "step": 1730 }, { "epoch": 0.08807553682928067, "grad_norm": 0.7954163551330566, "learning_rate": 1.941282975447146e-05, "loss": 0.2599, "step": 1735 }, { "epoch": 0.08832935682014316, "grad_norm": 0.7551532983779907, "learning_rate": 1.9411137621199048e-05, "loss": 0.2548, "step": 1740 }, { "epoch": 0.08858317681100564, "grad_norm": 0.8406553864479065, "learning_rate": 1.940944548792663e-05, "loss": 0.2291, "step": 1745 }, { "epoch": 0.08883699680186811, "grad_norm": 0.6450394988059998, "learning_rate": 1.9407753354654215e-05, "loss": 0.2654, "step": 1750 }, { "epoch": 0.08909081679273059, "grad_norm": 0.6348497271537781, "learning_rate": 1.94060612213818e-05, "loss": 0.2302, "step": 1755 }, { "epoch": 0.08934463678359307, "grad_norm": 0.589336097240448, "learning_rate": 1.940436908810938e-05, "loss": 0.242, "step": 1760 }, { "epoch": 0.08959845677445556, "grad_norm": 1.3192259073257446, "learning_rate": 1.9402676954836966e-05, "loss": 0.2606, "step": 1765 }, { "epoch": 0.08985227676531804, "grad_norm": 1.3367799520492554, "learning_rate": 1.940098482156455e-05, "loss": 0.2755, "step": 1770 }, { "epoch": 0.09010609675618052, "grad_norm": 0.8470580577850342, "learning_rate": 1.939929268829213e-05, "loss": 0.2596, "step": 1775 }, { "epoch": 0.090359916747043, "grad_norm": 0.7992636561393738, "learning_rate": 1.9397600555019716e-05, "loss": 0.259, "step": 1780 }, { "epoch": 0.09061373673790547, "grad_norm": 0.6407599449157715, "learning_rate": 1.9395908421747297e-05, "loss": 0.2247, "step": 1785 }, { "epoch": 0.09086755672876795, "grad_norm": 0.9677866101264954, "learning_rate": 1.939421628847488e-05, "loss": 0.2329, "step": 1790 }, { "epoch": 0.09112137671963044, "grad_norm": 0.7822664976119995, "learning_rate": 1.9392524155202467e-05, "loss": 0.2402, "step": 1795 }, { "epoch": 0.09137519671049292, "grad_norm": 0.7724019289016724, "learning_rate": 1.9390832021930047e-05, "loss": 0.2504, "step": 1800 }, { "epoch": 0.0916290167013554, "grad_norm": 0.7643800973892212, "learning_rate": 1.9389139888657634e-05, "loss": 0.2409, "step": 1805 }, { "epoch": 0.09188283669221788, "grad_norm": 0.84905606508255, "learning_rate": 1.9387447755385214e-05, "loss": 0.2391, "step": 1810 }, { "epoch": 0.09213665668308035, "grad_norm": 3.136202812194824, "learning_rate": 1.9385755622112798e-05, "loss": 0.3056, "step": 1815 }, { "epoch": 0.09239047667394285, "grad_norm": 1.0175585746765137, "learning_rate": 1.9384063488840385e-05, "loss": 0.2591, "step": 1820 }, { "epoch": 0.09264429666480532, "grad_norm": 1.1569483280181885, "learning_rate": 1.9382371355567965e-05, "loss": 0.2738, "step": 1825 }, { "epoch": 0.0928981166556678, "grad_norm": 0.648151159286499, "learning_rate": 1.938067922229555e-05, "loss": 0.2288, "step": 1830 }, { "epoch": 0.09315193664653028, "grad_norm": 0.5930745005607605, "learning_rate": 1.9378987089023132e-05, "loss": 0.2324, "step": 1835 }, { "epoch": 0.09340575663739276, "grad_norm": 0.7623791098594666, "learning_rate": 1.9377294955750716e-05, "loss": 0.2287, "step": 1840 }, { "epoch": 0.09365957662825525, "grad_norm": 1.552193284034729, "learning_rate": 1.9375602822478302e-05, "loss": 0.2388, "step": 1845 }, { "epoch": 0.09391339661911773, "grad_norm": 0.6569783687591553, "learning_rate": 1.9373910689205883e-05, "loss": 0.2415, "step": 1850 }, { "epoch": 0.0941672166099802, "grad_norm": 0.8682032823562622, "learning_rate": 1.9372218555933466e-05, "loss": 0.2376, "step": 1855 }, { "epoch": 0.09442103660084268, "grad_norm": 0.7503842115402222, "learning_rate": 1.937052642266105e-05, "loss": 0.2479, "step": 1860 }, { "epoch": 0.09467485659170516, "grad_norm": 1.0479599237442017, "learning_rate": 1.9368834289388633e-05, "loss": 0.2598, "step": 1865 }, { "epoch": 0.09492867658256764, "grad_norm": 3.8259048461914062, "learning_rate": 1.9367142156116217e-05, "loss": 0.2883, "step": 1870 }, { "epoch": 0.09518249657343013, "grad_norm": 0.7968519330024719, "learning_rate": 1.93654500228438e-05, "loss": 0.2404, "step": 1875 }, { "epoch": 0.0954363165642926, "grad_norm": 0.9822812080383301, "learning_rate": 1.9363757889571384e-05, "loss": 0.2585, "step": 1880 }, { "epoch": 0.09569013655515508, "grad_norm": 1.0278716087341309, "learning_rate": 1.9362065756298967e-05, "loss": 0.2263, "step": 1885 }, { "epoch": 0.09594395654601756, "grad_norm": 0.7152266502380371, "learning_rate": 1.936037362302655e-05, "loss": 0.2329, "step": 1890 }, { "epoch": 0.09619777653688004, "grad_norm": 0.9978631138801575, "learning_rate": 1.9358681489754135e-05, "loss": 0.2461, "step": 1895 }, { "epoch": 0.09645159652774253, "grad_norm": 0.8091859221458435, "learning_rate": 1.9356989356481718e-05, "loss": 0.248, "step": 1900 }, { "epoch": 0.09670541651860501, "grad_norm": 0.714453935623169, "learning_rate": 1.93552972232093e-05, "loss": 0.2521, "step": 1905 }, { "epoch": 0.09695923650946749, "grad_norm": 0.9081348180770874, "learning_rate": 1.9353605089936885e-05, "loss": 0.2538, "step": 1910 }, { "epoch": 0.09721305650032996, "grad_norm": 1.0938876867294312, "learning_rate": 1.935191295666447e-05, "loss": 0.2338, "step": 1915 }, { "epoch": 0.09746687649119244, "grad_norm": 0.6565501093864441, "learning_rate": 1.9350220823392052e-05, "loss": 0.2505, "step": 1920 }, { "epoch": 0.09772069648205493, "grad_norm": 0.615381121635437, "learning_rate": 1.9348528690119636e-05, "loss": 0.228, "step": 1925 }, { "epoch": 0.09797451647291741, "grad_norm": 1.128936529159546, "learning_rate": 1.934683655684722e-05, "loss": 0.2206, "step": 1930 }, { "epoch": 0.09822833646377989, "grad_norm": 0.9524216651916504, "learning_rate": 1.9345144423574803e-05, "loss": 0.2608, "step": 1935 }, { "epoch": 0.09848215645464237, "grad_norm": 0.7258911728858948, "learning_rate": 1.9343452290302383e-05, "loss": 0.247, "step": 1940 }, { "epoch": 0.09873597644550484, "grad_norm": 0.7854008674621582, "learning_rate": 1.934176015702997e-05, "loss": 0.2611, "step": 1945 }, { "epoch": 0.09898979643636732, "grad_norm": 0.9118169546127319, "learning_rate": 1.9340068023757554e-05, "loss": 0.2428, "step": 1950 }, { "epoch": 0.09924361642722981, "grad_norm": 0.9793441891670227, "learning_rate": 1.9338375890485137e-05, "loss": 0.2296, "step": 1955 }, { "epoch": 0.09949743641809229, "grad_norm": 0.601921558380127, "learning_rate": 1.933668375721272e-05, "loss": 0.2492, "step": 1960 }, { "epoch": 0.09975125640895477, "grad_norm": 0.6406486630439758, "learning_rate": 1.93349916239403e-05, "loss": 0.2456, "step": 1965 }, { "epoch": 0.10000507639981725, "grad_norm": 0.935122013092041, "learning_rate": 1.9333299490667888e-05, "loss": 0.2373, "step": 1970 }, { "epoch": 0.10025889639067972, "grad_norm": 0.6741816997528076, "learning_rate": 1.933160735739547e-05, "loss": 0.2215, "step": 1975 }, { "epoch": 0.10051271638154222, "grad_norm": 1.050551414489746, "learning_rate": 1.932991522412305e-05, "loss": 0.2436, "step": 1980 }, { "epoch": 0.1007665363724047, "grad_norm": 0.9753983020782471, "learning_rate": 1.932822309085064e-05, "loss": 0.2506, "step": 1985 }, { "epoch": 0.10102035636326717, "grad_norm": 0.5514481067657471, "learning_rate": 1.932653095757822e-05, "loss": 0.2287, "step": 1990 }, { "epoch": 0.10127417635412965, "grad_norm": 0.6138185262680054, "learning_rate": 1.9324838824305805e-05, "loss": 0.2546, "step": 1995 }, { "epoch": 0.10152799634499213, "grad_norm": 0.7834172248840332, "learning_rate": 1.932314669103339e-05, "loss": 0.2398, "step": 2000 }, { "epoch": 0.1017818163358546, "grad_norm": 0.7345184683799744, "learning_rate": 1.932145455776097e-05, "loss": 0.2264, "step": 2005 }, { "epoch": 0.1020356363267171, "grad_norm": 0.7603473663330078, "learning_rate": 1.9319762424488556e-05, "loss": 0.2579, "step": 2010 }, { "epoch": 0.10228945631757957, "grad_norm": 0.5276412963867188, "learning_rate": 1.9318070291216136e-05, "loss": 0.2575, "step": 2015 }, { "epoch": 0.10254327630844205, "grad_norm": 0.5985664129257202, "learning_rate": 1.931637815794372e-05, "loss": 0.2357, "step": 2020 }, { "epoch": 0.10279709629930453, "grad_norm": 1.0004132986068726, "learning_rate": 1.9314686024671307e-05, "loss": 0.2415, "step": 2025 }, { "epoch": 0.10305091629016701, "grad_norm": 0.9058099389076233, "learning_rate": 1.9312993891398887e-05, "loss": 0.2489, "step": 2030 }, { "epoch": 0.1033047362810295, "grad_norm": 0.6580535769462585, "learning_rate": 1.931130175812647e-05, "loss": 0.2282, "step": 2035 }, { "epoch": 0.10355855627189198, "grad_norm": 0.8212989568710327, "learning_rate": 1.9309609624854054e-05, "loss": 0.2486, "step": 2040 }, { "epoch": 0.10381237626275445, "grad_norm": 0.6100918650627136, "learning_rate": 1.9307917491581637e-05, "loss": 0.2817, "step": 2045 }, { "epoch": 0.10406619625361693, "grad_norm": 0.9934706687927246, "learning_rate": 1.9306225358309224e-05, "loss": 0.2319, "step": 2050 }, { "epoch": 0.10432001624447941, "grad_norm": 0.7338758111000061, "learning_rate": 1.9304533225036805e-05, "loss": 0.2284, "step": 2055 }, { "epoch": 0.1045738362353419, "grad_norm": 0.7655614614486694, "learning_rate": 1.9302841091764388e-05, "loss": 0.2106, "step": 2060 }, { "epoch": 0.10482765622620438, "grad_norm": 0.8056983351707458, "learning_rate": 1.930114895849197e-05, "loss": 0.2301, "step": 2065 }, { "epoch": 0.10508147621706686, "grad_norm": 0.6935755610466003, "learning_rate": 1.9299456825219555e-05, "loss": 0.1977, "step": 2070 }, { "epoch": 0.10533529620792934, "grad_norm": 0.7659327387809753, "learning_rate": 1.929776469194714e-05, "loss": 0.2319, "step": 2075 }, { "epoch": 0.10558911619879181, "grad_norm": 0.8755415081977844, "learning_rate": 1.9296072558674722e-05, "loss": 0.2531, "step": 2080 }, { "epoch": 0.10584293618965429, "grad_norm": 0.7641196846961975, "learning_rate": 1.9294380425402306e-05, "loss": 0.2262, "step": 2085 }, { "epoch": 0.10609675618051678, "grad_norm": 0.5032942295074463, "learning_rate": 1.929268829212989e-05, "loss": 0.2371, "step": 2090 }, { "epoch": 0.10635057617137926, "grad_norm": 0.8890644311904907, "learning_rate": 1.9290996158857473e-05, "loss": 0.2262, "step": 2095 }, { "epoch": 0.10660439616224174, "grad_norm": 0.6854649186134338, "learning_rate": 1.9289304025585056e-05, "loss": 0.2407, "step": 2100 }, { "epoch": 0.10685821615310422, "grad_norm": 0.7222452163696289, "learning_rate": 1.928761189231264e-05, "loss": 0.2246, "step": 2105 }, { "epoch": 0.1071120361439667, "grad_norm": 0.5661618709564209, "learning_rate": 1.9285919759040224e-05, "loss": 0.236, "step": 2110 }, { "epoch": 0.10736585613482919, "grad_norm": 0.6868450045585632, "learning_rate": 1.9284227625767807e-05, "loss": 0.2206, "step": 2115 }, { "epoch": 0.10761967612569166, "grad_norm": 0.6375486254692078, "learning_rate": 1.928253549249539e-05, "loss": 0.2075, "step": 2120 }, { "epoch": 0.10787349611655414, "grad_norm": 0.7467653751373291, "learning_rate": 1.9280843359222974e-05, "loss": 0.2168, "step": 2125 }, { "epoch": 0.10812731610741662, "grad_norm": 1.1524518728256226, "learning_rate": 1.9279151225950558e-05, "loss": 0.2622, "step": 2130 }, { "epoch": 0.1083811360982791, "grad_norm": 0.8847719430923462, "learning_rate": 1.927745909267814e-05, "loss": 0.2297, "step": 2135 }, { "epoch": 0.10863495608914157, "grad_norm": 1.002732515335083, "learning_rate": 1.9275766959405725e-05, "loss": 0.2322, "step": 2140 }, { "epoch": 0.10888877608000407, "grad_norm": 1.2165062427520752, "learning_rate": 1.927407482613331e-05, "loss": 0.2213, "step": 2145 }, { "epoch": 0.10914259607086654, "grad_norm": 0.7660351395606995, "learning_rate": 1.9272382692860892e-05, "loss": 0.2369, "step": 2150 }, { "epoch": 0.10939641606172902, "grad_norm": 0.6399320960044861, "learning_rate": 1.9270690559588475e-05, "loss": 0.2226, "step": 2155 }, { "epoch": 0.1096502360525915, "grad_norm": 0.929460883140564, "learning_rate": 1.926899842631606e-05, "loss": 0.2345, "step": 2160 }, { "epoch": 0.10990405604345398, "grad_norm": 0.7910303473472595, "learning_rate": 1.9267306293043643e-05, "loss": 0.1851, "step": 2165 }, { "epoch": 0.11015787603431647, "grad_norm": 0.6536149978637695, "learning_rate": 1.9265614159771223e-05, "loss": 0.2259, "step": 2170 }, { "epoch": 0.11041169602517895, "grad_norm": 0.6428697109222412, "learning_rate": 1.926392202649881e-05, "loss": 0.2169, "step": 2175 }, { "epoch": 0.11066551601604142, "grad_norm": 0.6982537508010864, "learning_rate": 1.9262229893226393e-05, "loss": 0.2084, "step": 2180 }, { "epoch": 0.1109193360069039, "grad_norm": 0.7507615089416504, "learning_rate": 1.9260537759953973e-05, "loss": 0.2433, "step": 2185 }, { "epoch": 0.11117315599776638, "grad_norm": 0.8022400140762329, "learning_rate": 1.925884562668156e-05, "loss": 0.2215, "step": 2190 }, { "epoch": 0.11142697598862887, "grad_norm": 0.5533608794212341, "learning_rate": 1.925715349340914e-05, "loss": 0.2013, "step": 2195 }, { "epoch": 0.11168079597949135, "grad_norm": 0.6407077312469482, "learning_rate": 1.9255461360136727e-05, "loss": 0.2131, "step": 2200 }, { "epoch": 0.11193461597035383, "grad_norm": 1.1785430908203125, "learning_rate": 1.925376922686431e-05, "loss": 0.2623, "step": 2205 }, { "epoch": 0.1121884359612163, "grad_norm": 0.6303373575210571, "learning_rate": 1.925207709359189e-05, "loss": 0.2143, "step": 2210 }, { "epoch": 0.11244225595207878, "grad_norm": 0.8575406074523926, "learning_rate": 1.9250384960319478e-05, "loss": 0.2237, "step": 2215 }, { "epoch": 0.11269607594294126, "grad_norm": 0.9760498404502869, "learning_rate": 1.9248692827047058e-05, "loss": 0.214, "step": 2220 }, { "epoch": 0.11294989593380375, "grad_norm": 1.455056071281433, "learning_rate": 1.924700069377464e-05, "loss": 0.2518, "step": 2225 }, { "epoch": 0.11320371592466623, "grad_norm": 0.8172942996025085, "learning_rate": 1.924530856050223e-05, "loss": 0.2165, "step": 2230 }, { "epoch": 0.1134575359155287, "grad_norm": 0.9945189952850342, "learning_rate": 1.924361642722981e-05, "loss": 0.2459, "step": 2235 }, { "epoch": 0.11371135590639118, "grad_norm": 1.0252959728240967, "learning_rate": 1.9241924293957396e-05, "loss": 0.2323, "step": 2240 }, { "epoch": 0.11396517589725366, "grad_norm": 0.5688400268554688, "learning_rate": 1.9240232160684976e-05, "loss": 0.1913, "step": 2245 }, { "epoch": 0.11421899588811615, "grad_norm": 0.6286960244178772, "learning_rate": 1.923854002741256e-05, "loss": 0.2405, "step": 2250 }, { "epoch": 0.11447281587897863, "grad_norm": 1.2339733839035034, "learning_rate": 1.9236847894140146e-05, "loss": 0.2043, "step": 2255 }, { "epoch": 0.11472663586984111, "grad_norm": 0.5691444873809814, "learning_rate": 1.9235155760867726e-05, "loss": 0.221, "step": 2260 }, { "epoch": 0.11498045586070359, "grad_norm": 1.1626230478286743, "learning_rate": 1.923346362759531e-05, "loss": 0.225, "step": 2265 }, { "epoch": 0.11523427585156606, "grad_norm": 0.7354618906974792, "learning_rate": 1.9231771494322894e-05, "loss": 0.2281, "step": 2270 }, { "epoch": 0.11548809584242856, "grad_norm": 0.5635653734207153, "learning_rate": 1.9230079361050477e-05, "loss": 0.2105, "step": 2275 }, { "epoch": 0.11574191583329103, "grad_norm": 1.019686222076416, "learning_rate": 1.922838722777806e-05, "loss": 0.2232, "step": 2280 }, { "epoch": 0.11599573582415351, "grad_norm": 0.5987036228179932, "learning_rate": 1.9226695094505644e-05, "loss": 0.2232, "step": 2285 }, { "epoch": 0.11624955581501599, "grad_norm": 0.8209441304206848, "learning_rate": 1.9225002961233228e-05, "loss": 0.2016, "step": 2290 }, { "epoch": 0.11650337580587847, "grad_norm": 0.576745331287384, "learning_rate": 1.922331082796081e-05, "loss": 0.239, "step": 2295 }, { "epoch": 0.11675719579674095, "grad_norm": 1.0648269653320312, "learning_rate": 1.9221618694688395e-05, "loss": 0.1987, "step": 2300 }, { "epoch": 0.11701101578760344, "grad_norm": 0.5774346590042114, "learning_rate": 1.921992656141598e-05, "loss": 0.225, "step": 2305 }, { "epoch": 0.11726483577846591, "grad_norm": 0.6183851957321167, "learning_rate": 1.9218234428143562e-05, "loss": 0.2028, "step": 2310 }, { "epoch": 0.11751865576932839, "grad_norm": 0.8308761715888977, "learning_rate": 1.9216542294871145e-05, "loss": 0.2405, "step": 2315 }, { "epoch": 0.11777247576019087, "grad_norm": 0.8884351253509521, "learning_rate": 1.921485016159873e-05, "loss": 0.2338, "step": 2320 }, { "epoch": 0.11802629575105335, "grad_norm": 0.8659582138061523, "learning_rate": 1.9213158028326313e-05, "loss": 0.2082, "step": 2325 }, { "epoch": 0.11828011574191584, "grad_norm": 0.7193389534950256, "learning_rate": 1.9211465895053896e-05, "loss": 0.211, "step": 2330 }, { "epoch": 0.11853393573277832, "grad_norm": 0.6831737756729126, "learning_rate": 1.920977376178148e-05, "loss": 0.2214, "step": 2335 }, { "epoch": 0.1187877557236408, "grad_norm": 0.7798734903335571, "learning_rate": 1.9208081628509063e-05, "loss": 0.2062, "step": 2340 }, { "epoch": 0.11904157571450327, "grad_norm": 0.7005655169487, "learning_rate": 1.9206389495236647e-05, "loss": 0.2081, "step": 2345 }, { "epoch": 0.11929539570536575, "grad_norm": 0.694050669670105, "learning_rate": 1.920469736196423e-05, "loss": 0.2348, "step": 2350 }, { "epoch": 0.11954921569622823, "grad_norm": 0.5242777466773987, "learning_rate": 1.9203005228691814e-05, "loss": 0.2103, "step": 2355 }, { "epoch": 0.11980303568709072, "grad_norm": 0.9790678024291992, "learning_rate": 1.9201313095419397e-05, "loss": 0.2186, "step": 2360 }, { "epoch": 0.1200568556779532, "grad_norm": 0.6226420998573303, "learning_rate": 1.919962096214698e-05, "loss": 0.2132, "step": 2365 }, { "epoch": 0.12031067566881568, "grad_norm": 0.6048428416252136, "learning_rate": 1.9197928828874564e-05, "loss": 0.2111, "step": 2370 }, { "epoch": 0.12056449565967815, "grad_norm": 0.4867452383041382, "learning_rate": 1.9196236695602145e-05, "loss": 0.1954, "step": 2375 }, { "epoch": 0.12081831565054063, "grad_norm": 0.8069561123847961, "learning_rate": 1.919454456232973e-05, "loss": 0.2194, "step": 2380 }, { "epoch": 0.12107213564140312, "grad_norm": 0.9302831888198853, "learning_rate": 1.9192852429057315e-05, "loss": 0.2132, "step": 2385 }, { "epoch": 0.1213259556322656, "grad_norm": 0.6558440327644348, "learning_rate": 1.91911602957849e-05, "loss": 0.2138, "step": 2390 }, { "epoch": 0.12157977562312808, "grad_norm": 0.9373264908790588, "learning_rate": 1.9189468162512482e-05, "loss": 0.2006, "step": 2395 }, { "epoch": 0.12183359561399056, "grad_norm": 1.0025991201400757, "learning_rate": 1.9187776029240062e-05, "loss": 0.2473, "step": 2400 }, { "epoch": 0.12208741560485303, "grad_norm": 0.6057084202766418, "learning_rate": 1.918608389596765e-05, "loss": 0.2271, "step": 2405 }, { "epoch": 0.12234123559571553, "grad_norm": 0.7220245599746704, "learning_rate": 1.9184391762695233e-05, "loss": 0.2219, "step": 2410 }, { "epoch": 0.122595055586578, "grad_norm": 0.7604368329048157, "learning_rate": 1.9182699629422813e-05, "loss": 0.2019, "step": 2415 }, { "epoch": 0.12284887557744048, "grad_norm": 0.5124613046646118, "learning_rate": 1.91810074961504e-05, "loss": 0.2054, "step": 2420 }, { "epoch": 0.12310269556830296, "grad_norm": 0.6220692992210388, "learning_rate": 1.917931536287798e-05, "loss": 0.2201, "step": 2425 }, { "epoch": 0.12335651555916544, "grad_norm": 1.0920076370239258, "learning_rate": 1.9177623229605564e-05, "loss": 0.2245, "step": 2430 }, { "epoch": 0.12361033555002791, "grad_norm": 0.9251731038093567, "learning_rate": 1.917593109633315e-05, "loss": 0.2204, "step": 2435 }, { "epoch": 0.1238641555408904, "grad_norm": 0.9124245047569275, "learning_rate": 1.917423896306073e-05, "loss": 0.215, "step": 2440 }, { "epoch": 0.12411797553175288, "grad_norm": 2.2037713527679443, "learning_rate": 1.9172546829788318e-05, "loss": 0.2279, "step": 2445 }, { "epoch": 0.12437179552261536, "grad_norm": 0.696209728717804, "learning_rate": 1.9170854696515898e-05, "loss": 0.2353, "step": 2450 }, { "epoch": 0.12462561551347784, "grad_norm": 0.8244996070861816, "learning_rate": 1.916916256324348e-05, "loss": 0.2185, "step": 2455 }, { "epoch": 0.12487943550434032, "grad_norm": 1.1219316720962524, "learning_rate": 1.9167470429971068e-05, "loss": 0.2361, "step": 2460 }, { "epoch": 0.1251332554952028, "grad_norm": 0.6167690753936768, "learning_rate": 1.916577829669865e-05, "loss": 0.2075, "step": 2465 }, { "epoch": 0.12538707548606529, "grad_norm": 0.663831889629364, "learning_rate": 1.9164086163426232e-05, "loss": 0.2179, "step": 2470 }, { "epoch": 0.12564089547692775, "grad_norm": 0.7804519534111023, "learning_rate": 1.9162394030153816e-05, "loss": 0.2319, "step": 2475 }, { "epoch": 0.12589471546779024, "grad_norm": 0.5112632513046265, "learning_rate": 1.91607018968814e-05, "loss": 0.2121, "step": 2480 }, { "epoch": 0.12614853545865273, "grad_norm": 1.0492627620697021, "learning_rate": 1.9159009763608986e-05, "loss": 0.1928, "step": 2485 }, { "epoch": 0.1264023554495152, "grad_norm": 0.8314067721366882, "learning_rate": 1.9157317630336566e-05, "loss": 0.2102, "step": 2490 }, { "epoch": 0.1266561754403777, "grad_norm": 0.6615179181098938, "learning_rate": 1.915562549706415e-05, "loss": 0.2098, "step": 2495 }, { "epoch": 0.12690999543124015, "grad_norm": 1.4945785999298096, "learning_rate": 1.9153933363791733e-05, "loss": 0.2256, "step": 2500 }, { "epoch": 0.12716381542210264, "grad_norm": 0.6474151015281677, "learning_rate": 1.9152241230519317e-05, "loss": 0.2119, "step": 2505 }, { "epoch": 0.12741763541296514, "grad_norm": 0.7189993858337402, "learning_rate": 1.91505490972469e-05, "loss": 0.1996, "step": 2510 }, { "epoch": 0.1276714554038276, "grad_norm": 0.6964658498764038, "learning_rate": 1.9148856963974484e-05, "loss": 0.247, "step": 2515 }, { "epoch": 0.1279252753946901, "grad_norm": 0.5863429307937622, "learning_rate": 1.9147164830702067e-05, "loss": 0.2197, "step": 2520 }, { "epoch": 0.12817909538555255, "grad_norm": 0.7309147715568542, "learning_rate": 1.914547269742965e-05, "loss": 0.2016, "step": 2525 }, { "epoch": 0.12843291537641505, "grad_norm": 0.6691656112670898, "learning_rate": 1.9143780564157235e-05, "loss": 0.2326, "step": 2530 }, { "epoch": 0.12868673536727754, "grad_norm": 1.2683653831481934, "learning_rate": 1.9142088430884818e-05, "loss": 0.214, "step": 2535 }, { "epoch": 0.12894055535814, "grad_norm": 0.9203475713729858, "learning_rate": 1.91403962976124e-05, "loss": 0.2125, "step": 2540 }, { "epoch": 0.1291943753490025, "grad_norm": 0.636577308177948, "learning_rate": 1.9138704164339985e-05, "loss": 0.2013, "step": 2545 }, { "epoch": 0.12944819533986496, "grad_norm": 0.5804703235626221, "learning_rate": 1.913701203106757e-05, "loss": 0.2053, "step": 2550 }, { "epoch": 0.12970201533072745, "grad_norm": 0.5855862498283386, "learning_rate": 1.9135319897795152e-05, "loss": 0.1958, "step": 2555 }, { "epoch": 0.12995583532158994, "grad_norm": 0.7482487559318542, "learning_rate": 1.9133627764522736e-05, "loss": 0.2257, "step": 2560 }, { "epoch": 0.1302096553124524, "grad_norm": 0.7025090456008911, "learning_rate": 1.913193563125032e-05, "loss": 0.226, "step": 2565 }, { "epoch": 0.1304634753033149, "grad_norm": 0.8828746676445007, "learning_rate": 1.9130243497977903e-05, "loss": 0.2187, "step": 2570 }, { "epoch": 0.13071729529417736, "grad_norm": 1.0203245878219604, "learning_rate": 1.9128551364705486e-05, "loss": 0.2288, "step": 2575 }, { "epoch": 0.13097111528503985, "grad_norm": 0.6295384168624878, "learning_rate": 1.912685923143307e-05, "loss": 0.2034, "step": 2580 }, { "epoch": 0.13122493527590234, "grad_norm": 1.2643804550170898, "learning_rate": 1.9125167098160654e-05, "loss": 0.2278, "step": 2585 }, { "epoch": 0.1314787552667648, "grad_norm": 0.6384063959121704, "learning_rate": 1.9123474964888237e-05, "loss": 0.1976, "step": 2590 }, { "epoch": 0.1317325752576273, "grad_norm": 0.6057654619216919, "learning_rate": 1.912178283161582e-05, "loss": 0.2108, "step": 2595 }, { "epoch": 0.13198639524848976, "grad_norm": 0.6316978335380554, "learning_rate": 1.9120090698343404e-05, "loss": 0.2051, "step": 2600 }, { "epoch": 0.13224021523935225, "grad_norm": 0.6967063546180725, "learning_rate": 1.9118398565070984e-05, "loss": 0.2016, "step": 2605 }, { "epoch": 0.13249403523021472, "grad_norm": 0.8182034492492676, "learning_rate": 1.911670643179857e-05, "loss": 0.1894, "step": 2610 }, { "epoch": 0.1327478552210772, "grad_norm": 0.5409026741981506, "learning_rate": 1.9115014298526155e-05, "loss": 0.2166, "step": 2615 }, { "epoch": 0.1330016752119397, "grad_norm": 0.7620669007301331, "learning_rate": 1.9113322165253735e-05, "loss": 0.2018, "step": 2620 }, { "epoch": 0.13325549520280217, "grad_norm": 0.8058112859725952, "learning_rate": 1.9111630031981322e-05, "loss": 0.1956, "step": 2625 }, { "epoch": 0.13350931519366466, "grad_norm": 0.9924262166023254, "learning_rate": 1.9109937898708902e-05, "loss": 0.2005, "step": 2630 }, { "epoch": 0.13376313518452712, "grad_norm": 0.7677115797996521, "learning_rate": 1.910824576543649e-05, "loss": 0.2136, "step": 2635 }, { "epoch": 0.1340169551753896, "grad_norm": 0.8996549844741821, "learning_rate": 1.9106553632164072e-05, "loss": 0.2144, "step": 2640 }, { "epoch": 0.1342707751662521, "grad_norm": 1.055097222328186, "learning_rate": 1.9104861498891653e-05, "loss": 0.199, "step": 2645 }, { "epoch": 0.13452459515711457, "grad_norm": 0.6172511577606201, "learning_rate": 1.910316936561924e-05, "loss": 0.2001, "step": 2650 }, { "epoch": 0.13477841514797706, "grad_norm": 0.7040294408798218, "learning_rate": 1.910147723234682e-05, "loss": 0.2091, "step": 2655 }, { "epoch": 0.13503223513883952, "grad_norm": 0.6608620285987854, "learning_rate": 1.9099785099074403e-05, "loss": 0.1907, "step": 2660 }, { "epoch": 0.13528605512970202, "grad_norm": 0.7029122114181519, "learning_rate": 1.909809296580199e-05, "loss": 0.207, "step": 2665 }, { "epoch": 0.1355398751205645, "grad_norm": 0.7303573489189148, "learning_rate": 1.909640083252957e-05, "loss": 0.2103, "step": 2670 }, { "epoch": 0.13579369511142697, "grad_norm": 0.7197701334953308, "learning_rate": 1.9094708699257154e-05, "loss": 0.198, "step": 2675 }, { "epoch": 0.13604751510228946, "grad_norm": 2.0488266944885254, "learning_rate": 1.9093016565984737e-05, "loss": 0.2288, "step": 2680 }, { "epoch": 0.13630133509315193, "grad_norm": 0.7889509201049805, "learning_rate": 1.909132443271232e-05, "loss": 0.2184, "step": 2685 }, { "epoch": 0.13655515508401442, "grad_norm": 0.8902899622917175, "learning_rate": 1.9089632299439908e-05, "loss": 0.2161, "step": 2690 }, { "epoch": 0.1368089750748769, "grad_norm": 0.6209053993225098, "learning_rate": 1.9087940166167488e-05, "loss": 0.1956, "step": 2695 }, { "epoch": 0.13706279506573937, "grad_norm": 0.5760391354560852, "learning_rate": 1.908624803289507e-05, "loss": 0.2005, "step": 2700 }, { "epoch": 0.13731661505660187, "grad_norm": 0.9128169417381287, "learning_rate": 1.9084555899622655e-05, "loss": 0.2036, "step": 2705 }, { "epoch": 0.13757043504746433, "grad_norm": 0.8320951461791992, "learning_rate": 1.908286376635024e-05, "loss": 0.2108, "step": 2710 }, { "epoch": 0.13782425503832682, "grad_norm": 0.7470078468322754, "learning_rate": 1.9081171633077822e-05, "loss": 0.2058, "step": 2715 }, { "epoch": 0.1380780750291893, "grad_norm": 0.7988982796669006, "learning_rate": 1.9079479499805406e-05, "loss": 0.242, "step": 2720 }, { "epoch": 0.13833189502005178, "grad_norm": 0.8993115425109863, "learning_rate": 1.907778736653299e-05, "loss": 0.212, "step": 2725 }, { "epoch": 0.13858571501091427, "grad_norm": 0.6929076313972473, "learning_rate": 1.9076095233260573e-05, "loss": 0.2509, "step": 2730 }, { "epoch": 0.13883953500177673, "grad_norm": 0.6944275498390198, "learning_rate": 1.9074403099988156e-05, "loss": 0.2116, "step": 2735 }, { "epoch": 0.13909335499263922, "grad_norm": 0.6491143107414246, "learning_rate": 1.907271096671574e-05, "loss": 0.1943, "step": 2740 }, { "epoch": 0.1393471749835017, "grad_norm": 0.6222745776176453, "learning_rate": 1.9071018833443324e-05, "loss": 0.2144, "step": 2745 }, { "epoch": 0.13960099497436418, "grad_norm": 0.863029420375824, "learning_rate": 1.9069326700170907e-05, "loss": 0.1972, "step": 2750 }, { "epoch": 0.13985481496522667, "grad_norm": 0.5419744253158569, "learning_rate": 1.906763456689849e-05, "loss": 0.1913, "step": 2755 }, { "epoch": 0.14010863495608913, "grad_norm": 0.5654199719429016, "learning_rate": 1.9065942433626074e-05, "loss": 0.1994, "step": 2760 }, { "epoch": 0.14036245494695163, "grad_norm": 0.7003618478775024, "learning_rate": 1.9064250300353658e-05, "loss": 0.1955, "step": 2765 }, { "epoch": 0.1406162749378141, "grad_norm": 1.1119288206100464, "learning_rate": 1.906255816708124e-05, "loss": 0.1984, "step": 2770 }, { "epoch": 0.14087009492867658, "grad_norm": 0.496934711933136, "learning_rate": 1.9060866033808825e-05, "loss": 0.1855, "step": 2775 }, { "epoch": 0.14112391491953907, "grad_norm": 0.9058437943458557, "learning_rate": 1.905917390053641e-05, "loss": 0.188, "step": 2780 }, { "epoch": 0.14137773491040154, "grad_norm": 0.6056883335113525, "learning_rate": 1.9057481767263992e-05, "loss": 0.2088, "step": 2785 }, { "epoch": 0.14163155490126403, "grad_norm": 0.6516966223716736, "learning_rate": 1.9055789633991575e-05, "loss": 0.1905, "step": 2790 }, { "epoch": 0.1418853748921265, "grad_norm": 0.6040582060813904, "learning_rate": 1.905409750071916e-05, "loss": 0.2125, "step": 2795 }, { "epoch": 0.14213919488298898, "grad_norm": 0.5323441624641418, "learning_rate": 1.9052405367446743e-05, "loss": 0.1771, "step": 2800 }, { "epoch": 0.14239301487385148, "grad_norm": 0.4898989796638489, "learning_rate": 1.9050713234174326e-05, "loss": 0.1863, "step": 2805 }, { "epoch": 0.14264683486471394, "grad_norm": 0.5168548226356506, "learning_rate": 1.9049021100901906e-05, "loss": 0.1941, "step": 2810 }, { "epoch": 0.14290065485557643, "grad_norm": 0.5550641417503357, "learning_rate": 1.9047328967629493e-05, "loss": 0.2024, "step": 2815 }, { "epoch": 0.1431544748464389, "grad_norm": 0.8888419270515442, "learning_rate": 1.9045636834357077e-05, "loss": 0.2091, "step": 2820 }, { "epoch": 0.1434082948373014, "grad_norm": 0.6956480741500854, "learning_rate": 1.904394470108466e-05, "loss": 0.2006, "step": 2825 }, { "epoch": 0.14366211482816388, "grad_norm": 0.6455702781677246, "learning_rate": 1.9042252567812244e-05, "loss": 0.1969, "step": 2830 }, { "epoch": 0.14391593481902634, "grad_norm": 0.7298540472984314, "learning_rate": 1.9040560434539824e-05, "loss": 0.2041, "step": 2835 }, { "epoch": 0.14416975480988883, "grad_norm": 0.9407358169555664, "learning_rate": 1.903886830126741e-05, "loss": 0.2226, "step": 2840 }, { "epoch": 0.1444235748007513, "grad_norm": 0.6761470437049866, "learning_rate": 1.9037176167994994e-05, "loss": 0.2073, "step": 2845 }, { "epoch": 0.1446773947916138, "grad_norm": 0.9858958125114441, "learning_rate": 1.9035484034722575e-05, "loss": 0.2191, "step": 2850 }, { "epoch": 0.14493121478247628, "grad_norm": 0.7643491625785828, "learning_rate": 1.903379190145016e-05, "loss": 0.1855, "step": 2855 }, { "epoch": 0.14518503477333874, "grad_norm": 0.5545926690101624, "learning_rate": 1.903209976817774e-05, "loss": 0.1854, "step": 2860 }, { "epoch": 0.14543885476420124, "grad_norm": 0.7087584137916565, "learning_rate": 1.9030407634905325e-05, "loss": 0.1925, "step": 2865 }, { "epoch": 0.1456926747550637, "grad_norm": 0.9376761317253113, "learning_rate": 1.9028715501632912e-05, "loss": 0.2171, "step": 2870 }, { "epoch": 0.1459464947459262, "grad_norm": 0.5529223680496216, "learning_rate": 1.9027023368360492e-05, "loss": 0.1849, "step": 2875 }, { "epoch": 0.14620031473678866, "grad_norm": 0.6258545517921448, "learning_rate": 1.902533123508808e-05, "loss": 0.195, "step": 2880 }, { "epoch": 0.14645413472765115, "grad_norm": 0.5800721645355225, "learning_rate": 1.902363910181566e-05, "loss": 0.1879, "step": 2885 }, { "epoch": 0.14670795471851364, "grad_norm": 0.9016756415367126, "learning_rate": 1.9021946968543243e-05, "loss": 0.1978, "step": 2890 }, { "epoch": 0.1469617747093761, "grad_norm": 0.6598945260047913, "learning_rate": 1.902025483527083e-05, "loss": 0.2201, "step": 2895 }, { "epoch": 0.1472155947002386, "grad_norm": 0.5882731080055237, "learning_rate": 1.901856270199841e-05, "loss": 0.1942, "step": 2900 }, { "epoch": 0.14746941469110106, "grad_norm": 0.898997962474823, "learning_rate": 1.9016870568725994e-05, "loss": 0.2089, "step": 2905 }, { "epoch": 0.14772323468196355, "grad_norm": 0.547217071056366, "learning_rate": 1.9015178435453577e-05, "loss": 0.1957, "step": 2910 }, { "epoch": 0.14797705467282604, "grad_norm": 0.5666927099227905, "learning_rate": 1.901348630218116e-05, "loss": 0.2036, "step": 2915 }, { "epoch": 0.1482308746636885, "grad_norm": 0.7027495503425598, "learning_rate": 1.9011794168908744e-05, "loss": 0.2027, "step": 2920 }, { "epoch": 0.148484694654551, "grad_norm": 0.9105992317199707, "learning_rate": 1.9010102035636328e-05, "loss": 0.2185, "step": 2925 }, { "epoch": 0.14873851464541346, "grad_norm": 0.9565317630767822, "learning_rate": 1.900840990236391e-05, "loss": 0.1891, "step": 2930 }, { "epoch": 0.14899233463627595, "grad_norm": 0.6969782710075378, "learning_rate": 1.9006717769091495e-05, "loss": 0.215, "step": 2935 }, { "epoch": 0.14924615462713844, "grad_norm": 0.5370941758155823, "learning_rate": 1.900502563581908e-05, "loss": 0.1872, "step": 2940 }, { "epoch": 0.1494999746180009, "grad_norm": 0.932841420173645, "learning_rate": 1.9003333502546662e-05, "loss": 0.2144, "step": 2945 }, { "epoch": 0.1497537946088634, "grad_norm": 0.6696744561195374, "learning_rate": 1.9001641369274245e-05, "loss": 0.2132, "step": 2950 }, { "epoch": 0.15000761459972586, "grad_norm": 0.8368297219276428, "learning_rate": 1.899994923600183e-05, "loss": 0.1922, "step": 2955 }, { "epoch": 0.15026143459058836, "grad_norm": 0.7820245623588562, "learning_rate": 1.8998257102729413e-05, "loss": 0.1922, "step": 2960 }, { "epoch": 0.15051525458145085, "grad_norm": 0.6239885091781616, "learning_rate": 1.8996564969456996e-05, "loss": 0.2171, "step": 2965 }, { "epoch": 0.1507690745723133, "grad_norm": 0.6773269176483154, "learning_rate": 1.899487283618458e-05, "loss": 0.2039, "step": 2970 }, { "epoch": 0.1510228945631758, "grad_norm": 0.6102594137191772, "learning_rate": 1.8993180702912163e-05, "loss": 0.1843, "step": 2975 }, { "epoch": 0.15127671455403827, "grad_norm": 0.7514229416847229, "learning_rate": 1.8991488569639747e-05, "loss": 0.1819, "step": 2980 }, { "epoch": 0.15153053454490076, "grad_norm": 3.802446126937866, "learning_rate": 1.898979643636733e-05, "loss": 0.1858, "step": 2985 }, { "epoch": 0.15178435453576325, "grad_norm": 0.984550416469574, "learning_rate": 1.8988104303094914e-05, "loss": 0.1861, "step": 2990 }, { "epoch": 0.1520381745266257, "grad_norm": 0.672860324382782, "learning_rate": 1.8986412169822497e-05, "loss": 0.1966, "step": 2995 }, { "epoch": 0.1522919945174882, "grad_norm": 0.5952281355857849, "learning_rate": 1.898472003655008e-05, "loss": 0.1876, "step": 3000 }, { "epoch": 0.15254581450835067, "grad_norm": 0.8867749571800232, "learning_rate": 1.8983027903277664e-05, "loss": 0.2116, "step": 3005 }, { "epoch": 0.15279963449921316, "grad_norm": 0.7406168580055237, "learning_rate": 1.8981335770005248e-05, "loss": 0.1867, "step": 3010 }, { "epoch": 0.15305345449007565, "grad_norm": 0.8236103653907776, "learning_rate": 1.8979643636732828e-05, "loss": 0.2004, "step": 3015 }, { "epoch": 0.15330727448093812, "grad_norm": 0.5611258149147034, "learning_rate": 1.8977951503460415e-05, "loss": 0.1773, "step": 3020 }, { "epoch": 0.1535610944718006, "grad_norm": 0.8456403017044067, "learning_rate": 1.8976259370188e-05, "loss": 0.2209, "step": 3025 }, { "epoch": 0.15381491446266307, "grad_norm": 1.378568172454834, "learning_rate": 1.8974567236915582e-05, "loss": 0.2122, "step": 3030 }, { "epoch": 0.15406873445352556, "grad_norm": 0.599615216255188, "learning_rate": 1.8972875103643166e-05, "loss": 0.1886, "step": 3035 }, { "epoch": 0.15432255444438803, "grad_norm": 0.5790411233901978, "learning_rate": 1.8971182970370746e-05, "loss": 0.1923, "step": 3040 }, { "epoch": 0.15457637443525052, "grad_norm": 5.2787299156188965, "learning_rate": 1.8969490837098333e-05, "loss": 0.1875, "step": 3045 }, { "epoch": 0.154830194426113, "grad_norm": 0.5498223304748535, "learning_rate": 1.8967798703825916e-05, "loss": 0.1898, "step": 3050 }, { "epoch": 0.15508401441697547, "grad_norm": 0.5836355686187744, "learning_rate": 1.8966106570553497e-05, "loss": 0.1873, "step": 3055 }, { "epoch": 0.15533783440783797, "grad_norm": 0.6591739654541016, "learning_rate": 1.8964414437281083e-05, "loss": 0.2141, "step": 3060 }, { "epoch": 0.15559165439870043, "grad_norm": 0.5879199504852295, "learning_rate": 1.8962722304008664e-05, "loss": 0.199, "step": 3065 }, { "epoch": 0.15584547438956292, "grad_norm": 0.6252302527427673, "learning_rate": 1.8961030170736247e-05, "loss": 0.183, "step": 3070 }, { "epoch": 0.1560992943804254, "grad_norm": 0.6322395205497742, "learning_rate": 1.8959338037463834e-05, "loss": 0.2204, "step": 3075 }, { "epoch": 0.15635311437128788, "grad_norm": 0.8022140860557556, "learning_rate": 1.8957645904191414e-05, "loss": 0.1974, "step": 3080 }, { "epoch": 0.15660693436215037, "grad_norm": 0.7623772025108337, "learning_rate": 1.8955953770919e-05, "loss": 0.1824, "step": 3085 }, { "epoch": 0.15686075435301283, "grad_norm": 0.6760655641555786, "learning_rate": 1.895426163764658e-05, "loss": 0.2041, "step": 3090 }, { "epoch": 0.15711457434387532, "grad_norm": 0.6074882745742798, "learning_rate": 1.8952569504374165e-05, "loss": 0.19, "step": 3095 }, { "epoch": 0.15736839433473782, "grad_norm": 0.5363246202468872, "learning_rate": 1.8950877371101752e-05, "loss": 0.1732, "step": 3100 }, { "epoch": 0.15762221432560028, "grad_norm": 0.9593762159347534, "learning_rate": 1.8949185237829332e-05, "loss": 0.1973, "step": 3105 }, { "epoch": 0.15787603431646277, "grad_norm": 3.48103666305542, "learning_rate": 1.8947493104556916e-05, "loss": 0.2306, "step": 3110 }, { "epoch": 0.15812985430732523, "grad_norm": 0.593743085861206, "learning_rate": 1.89458009712845e-05, "loss": 0.2023, "step": 3115 }, { "epoch": 0.15838367429818773, "grad_norm": 0.8416429758071899, "learning_rate": 1.8944108838012083e-05, "loss": 0.1661, "step": 3120 }, { "epoch": 0.15863749428905022, "grad_norm": 0.5875362157821655, "learning_rate": 1.894241670473967e-05, "loss": 0.1693, "step": 3125 }, { "epoch": 0.15889131427991268, "grad_norm": 0.5337428450584412, "learning_rate": 1.894072457146725e-05, "loss": 0.2014, "step": 3130 }, { "epoch": 0.15914513427077517, "grad_norm": 0.705847978591919, "learning_rate": 1.8939032438194833e-05, "loss": 0.1769, "step": 3135 }, { "epoch": 0.15939895426163764, "grad_norm": 1.11298406124115, "learning_rate": 1.8937340304922417e-05, "loss": 0.1957, "step": 3140 }, { "epoch": 0.15965277425250013, "grad_norm": 0.7969094514846802, "learning_rate": 1.893564817165e-05, "loss": 0.2062, "step": 3145 }, { "epoch": 0.15990659424336262, "grad_norm": 0.6891659498214722, "learning_rate": 1.8933956038377584e-05, "loss": 0.1837, "step": 3150 }, { "epoch": 0.16016041423422508, "grad_norm": 0.7349233627319336, "learning_rate": 1.8932263905105167e-05, "loss": 0.1962, "step": 3155 }, { "epoch": 0.16041423422508758, "grad_norm": 0.6409569382667542, "learning_rate": 1.893057177183275e-05, "loss": 0.1762, "step": 3160 }, { "epoch": 0.16066805421595004, "grad_norm": 0.6636890769004822, "learning_rate": 1.8928879638560335e-05, "loss": 0.1863, "step": 3165 }, { "epoch": 0.16092187420681253, "grad_norm": 0.6937609314918518, "learning_rate": 1.8927187505287918e-05, "loss": 0.1784, "step": 3170 }, { "epoch": 0.161175694197675, "grad_norm": 0.5253648161888123, "learning_rate": 1.89254953720155e-05, "loss": 0.1917, "step": 3175 }, { "epoch": 0.1614295141885375, "grad_norm": 0.6246563196182251, "learning_rate": 1.8923803238743085e-05, "loss": 0.1687, "step": 3180 }, { "epoch": 0.16168333417939998, "grad_norm": 0.664107084274292, "learning_rate": 1.892211110547067e-05, "loss": 0.1761, "step": 3185 }, { "epoch": 0.16193715417026244, "grad_norm": 0.7611233592033386, "learning_rate": 1.8920418972198252e-05, "loss": 0.1977, "step": 3190 }, { "epoch": 0.16219097416112493, "grad_norm": 0.6064574718475342, "learning_rate": 1.8918726838925836e-05, "loss": 0.1824, "step": 3195 }, { "epoch": 0.1624447941519874, "grad_norm": 0.8105899691581726, "learning_rate": 1.891703470565342e-05, "loss": 0.1892, "step": 3200 }, { "epoch": 0.1626986141428499, "grad_norm": 0.6670052409172058, "learning_rate": 1.8915342572381003e-05, "loss": 0.1657, "step": 3205 }, { "epoch": 0.16295243413371238, "grad_norm": 0.8309715986251831, "learning_rate": 1.8913650439108586e-05, "loss": 0.1826, "step": 3210 }, { "epoch": 0.16320625412457485, "grad_norm": 0.6957319378852844, "learning_rate": 1.891195830583617e-05, "loss": 0.1981, "step": 3215 }, { "epoch": 0.16346007411543734, "grad_norm": 0.5244278311729431, "learning_rate": 1.8910266172563753e-05, "loss": 0.1835, "step": 3220 }, { "epoch": 0.1637138941062998, "grad_norm": 0.6687745451927185, "learning_rate": 1.8908574039291337e-05, "loss": 0.1818, "step": 3225 }, { "epoch": 0.1639677140971623, "grad_norm": 0.5225896835327148, "learning_rate": 1.890688190601892e-05, "loss": 0.1972, "step": 3230 }, { "epoch": 0.16422153408802478, "grad_norm": 0.71306973695755, "learning_rate": 1.8905189772746504e-05, "loss": 0.1793, "step": 3235 }, { "epoch": 0.16447535407888725, "grad_norm": 0.6916504502296448, "learning_rate": 1.8903497639474088e-05, "loss": 0.1796, "step": 3240 }, { "epoch": 0.16472917406974974, "grad_norm": 0.7372540831565857, "learning_rate": 1.8901805506201668e-05, "loss": 0.1891, "step": 3245 }, { "epoch": 0.1649829940606122, "grad_norm": 0.5736752152442932, "learning_rate": 1.8900113372929255e-05, "loss": 0.2053, "step": 3250 }, { "epoch": 0.1652368140514747, "grad_norm": 0.5286284685134888, "learning_rate": 1.889842123965684e-05, "loss": 0.1989, "step": 3255 }, { "epoch": 0.1654906340423372, "grad_norm": 0.9421578049659729, "learning_rate": 1.889672910638442e-05, "loss": 0.2002, "step": 3260 }, { "epoch": 0.16574445403319965, "grad_norm": 0.721328616142273, "learning_rate": 1.8895036973112005e-05, "loss": 0.1938, "step": 3265 }, { "epoch": 0.16599827402406214, "grad_norm": 0.6766708493232727, "learning_rate": 1.8893344839839586e-05, "loss": 0.1981, "step": 3270 }, { "epoch": 0.1662520940149246, "grad_norm": 0.7163949608802795, "learning_rate": 1.8891652706567172e-05, "loss": 0.1856, "step": 3275 }, { "epoch": 0.1665059140057871, "grad_norm": 0.673416793346405, "learning_rate": 1.8889960573294756e-05, "loss": 0.1928, "step": 3280 }, { "epoch": 0.1667597339966496, "grad_norm": 0.6823815703392029, "learning_rate": 1.8888268440022336e-05, "loss": 0.1953, "step": 3285 }, { "epoch": 0.16701355398751205, "grad_norm": 0.6587371826171875, "learning_rate": 1.8886576306749923e-05, "loss": 0.1985, "step": 3290 }, { "epoch": 0.16726737397837455, "grad_norm": 0.7395918965339661, "learning_rate": 1.8884884173477503e-05, "loss": 0.1859, "step": 3295 }, { "epoch": 0.167521193969237, "grad_norm": 0.6410045623779297, "learning_rate": 1.8883192040205087e-05, "loss": 0.1916, "step": 3300 }, { "epoch": 0.1677750139600995, "grad_norm": 0.5092994570732117, "learning_rate": 1.8881499906932674e-05, "loss": 0.1782, "step": 3305 }, { "epoch": 0.16802883395096196, "grad_norm": 0.6259738206863403, "learning_rate": 1.8879807773660254e-05, "loss": 0.2074, "step": 3310 }, { "epoch": 0.16828265394182446, "grad_norm": 0.8457812070846558, "learning_rate": 1.8878115640387837e-05, "loss": 0.1983, "step": 3315 }, { "epoch": 0.16853647393268695, "grad_norm": 0.4995182752609253, "learning_rate": 1.887642350711542e-05, "loss": 0.1781, "step": 3320 }, { "epoch": 0.1687902939235494, "grad_norm": 0.6008071899414062, "learning_rate": 1.8874731373843005e-05, "loss": 0.1904, "step": 3325 }, { "epoch": 0.1690441139144119, "grad_norm": 0.7149403691291809, "learning_rate": 1.8873039240570588e-05, "loss": 0.1846, "step": 3330 }, { "epoch": 0.16929793390527437, "grad_norm": 1.0425550937652588, "learning_rate": 1.887134710729817e-05, "loss": 0.1979, "step": 3335 }, { "epoch": 0.16955175389613686, "grad_norm": 0.6860085725784302, "learning_rate": 1.8869654974025755e-05, "loss": 0.1808, "step": 3340 }, { "epoch": 0.16980557388699935, "grad_norm": 0.8324023485183716, "learning_rate": 1.886796284075334e-05, "loss": 0.2056, "step": 3345 }, { "epoch": 0.17005939387786181, "grad_norm": 0.6411967873573303, "learning_rate": 1.8866270707480922e-05, "loss": 0.1771, "step": 3350 }, { "epoch": 0.1703132138687243, "grad_norm": 0.5328414440155029, "learning_rate": 1.8864578574208506e-05, "loss": 0.1957, "step": 3355 }, { "epoch": 0.17056703385958677, "grad_norm": 0.7603042125701904, "learning_rate": 1.886288644093609e-05, "loss": 0.2017, "step": 3360 }, { "epoch": 0.17082085385044926, "grad_norm": 0.8825428485870361, "learning_rate": 1.8861194307663673e-05, "loss": 0.1989, "step": 3365 }, { "epoch": 0.17107467384131175, "grad_norm": 0.657129168510437, "learning_rate": 1.8859502174391256e-05, "loss": 0.1801, "step": 3370 }, { "epoch": 0.17132849383217422, "grad_norm": 0.7885820269584656, "learning_rate": 1.885781004111884e-05, "loss": 0.2056, "step": 3375 }, { "epoch": 0.1715823138230367, "grad_norm": 0.5244271159172058, "learning_rate": 1.8856117907846424e-05, "loss": 0.1646, "step": 3380 }, { "epoch": 0.17183613381389917, "grad_norm": 0.6291254758834839, "learning_rate": 1.8854425774574007e-05, "loss": 0.1832, "step": 3385 }, { "epoch": 0.17208995380476166, "grad_norm": 0.6272834539413452, "learning_rate": 1.885273364130159e-05, "loss": 0.1888, "step": 3390 }, { "epoch": 0.17234377379562416, "grad_norm": 0.727254331111908, "learning_rate": 1.8851041508029174e-05, "loss": 0.1812, "step": 3395 }, { "epoch": 0.17259759378648662, "grad_norm": 0.8115093111991882, "learning_rate": 1.8849349374756758e-05, "loss": 0.1872, "step": 3400 }, { "epoch": 0.1728514137773491, "grad_norm": 0.6561703085899353, "learning_rate": 1.884765724148434e-05, "loss": 0.1813, "step": 3405 }, { "epoch": 0.17310523376821157, "grad_norm": 0.5098863840103149, "learning_rate": 1.8845965108211925e-05, "loss": 0.1873, "step": 3410 }, { "epoch": 0.17335905375907407, "grad_norm": 0.6273576021194458, "learning_rate": 1.884427297493951e-05, "loss": 0.1893, "step": 3415 }, { "epoch": 0.17361287374993656, "grad_norm": 0.622138261795044, "learning_rate": 1.8842580841667092e-05, "loss": 0.1596, "step": 3420 }, { "epoch": 0.17386669374079902, "grad_norm": 0.8276758790016174, "learning_rate": 1.8840888708394675e-05, "loss": 0.1774, "step": 3425 }, { "epoch": 0.1741205137316615, "grad_norm": 0.6109856963157654, "learning_rate": 1.883919657512226e-05, "loss": 0.1927, "step": 3430 }, { "epoch": 0.17437433372252398, "grad_norm": 0.6221029758453369, "learning_rate": 1.8837504441849843e-05, "loss": 0.1951, "step": 3435 }, { "epoch": 0.17462815371338647, "grad_norm": 0.567482590675354, "learning_rate": 1.8835812308577426e-05, "loss": 0.1848, "step": 3440 }, { "epoch": 0.17488197370424893, "grad_norm": 0.7713445425033569, "learning_rate": 1.883412017530501e-05, "loss": 0.1676, "step": 3445 }, { "epoch": 0.17513579369511142, "grad_norm": 0.6191247701644897, "learning_rate": 1.883242804203259e-05, "loss": 0.1867, "step": 3450 }, { "epoch": 0.17538961368597392, "grad_norm": 0.40963295102119446, "learning_rate": 1.8830735908760177e-05, "loss": 0.1683, "step": 3455 }, { "epoch": 0.17564343367683638, "grad_norm": 0.5506500601768494, "learning_rate": 1.882904377548776e-05, "loss": 0.1755, "step": 3460 }, { "epoch": 0.17589725366769887, "grad_norm": 0.5656499266624451, "learning_rate": 1.8827351642215344e-05, "loss": 0.1856, "step": 3465 }, { "epoch": 0.17615107365856134, "grad_norm": 0.9259144067764282, "learning_rate": 1.8825659508942927e-05, "loss": 0.1683, "step": 3470 }, { "epoch": 0.17640489364942383, "grad_norm": 0.6214718818664551, "learning_rate": 1.8823967375670507e-05, "loss": 0.1877, "step": 3475 }, { "epoch": 0.17665871364028632, "grad_norm": 0.6122065186500549, "learning_rate": 1.8822275242398094e-05, "loss": 0.1823, "step": 3480 }, { "epoch": 0.17691253363114878, "grad_norm": 0.5101046562194824, "learning_rate": 1.8820583109125678e-05, "loss": 0.1558, "step": 3485 }, { "epoch": 0.17716635362201127, "grad_norm": 0.6742383241653442, "learning_rate": 1.8818890975853258e-05, "loss": 0.1767, "step": 3490 }, { "epoch": 0.17742017361287374, "grad_norm": 0.7784730792045593, "learning_rate": 1.8817198842580845e-05, "loss": 0.1944, "step": 3495 }, { "epoch": 0.17767399360373623, "grad_norm": 0.5944966673851013, "learning_rate": 1.8815506709308425e-05, "loss": 0.1625, "step": 3500 }, { "epoch": 0.17792781359459872, "grad_norm": 0.5932590961456299, "learning_rate": 1.881381457603601e-05, "loss": 0.1994, "step": 3505 }, { "epoch": 0.17818163358546119, "grad_norm": 0.7129168510437012, "learning_rate": 1.8812122442763596e-05, "loss": 0.2029, "step": 3510 }, { "epoch": 0.17843545357632368, "grad_norm": 1.4822226762771606, "learning_rate": 1.8810430309491176e-05, "loss": 0.1733, "step": 3515 }, { "epoch": 0.17868927356718614, "grad_norm": 0.5432773232460022, "learning_rate": 1.8808738176218763e-05, "loss": 0.1995, "step": 3520 }, { "epoch": 0.17894309355804863, "grad_norm": 0.6018402576446533, "learning_rate": 1.8807046042946343e-05, "loss": 0.1706, "step": 3525 }, { "epoch": 0.17919691354891112, "grad_norm": 0.7911956906318665, "learning_rate": 1.8805353909673926e-05, "loss": 0.2016, "step": 3530 }, { "epoch": 0.1794507335397736, "grad_norm": 0.9234727621078491, "learning_rate": 1.880366177640151e-05, "loss": 0.1936, "step": 3535 }, { "epoch": 0.17970455353063608, "grad_norm": 0.5961397886276245, "learning_rate": 1.8801969643129094e-05, "loss": 0.1628, "step": 3540 }, { "epoch": 0.17995837352149854, "grad_norm": 0.689795196056366, "learning_rate": 1.8800277509856677e-05, "loss": 0.1835, "step": 3545 }, { "epoch": 0.18021219351236104, "grad_norm": 0.538791835308075, "learning_rate": 1.879858537658426e-05, "loss": 0.185, "step": 3550 }, { "epoch": 0.18046601350322353, "grad_norm": 0.6588658094406128, "learning_rate": 1.8796893243311844e-05, "loss": 0.1787, "step": 3555 }, { "epoch": 0.180719833494086, "grad_norm": 0.5752840042114258, "learning_rate": 1.8795201110039428e-05, "loss": 0.1643, "step": 3560 }, { "epoch": 0.18097365348494848, "grad_norm": 1.0449694395065308, "learning_rate": 1.879350897676701e-05, "loss": 0.167, "step": 3565 }, { "epoch": 0.18122747347581095, "grad_norm": 0.5879854559898376, "learning_rate": 1.8791816843494595e-05, "loss": 0.1879, "step": 3570 }, { "epoch": 0.18148129346667344, "grad_norm": 0.782319962978363, "learning_rate": 1.879012471022218e-05, "loss": 0.1702, "step": 3575 }, { "epoch": 0.1817351134575359, "grad_norm": 0.6967921853065491, "learning_rate": 1.8788432576949762e-05, "loss": 0.1811, "step": 3580 }, { "epoch": 0.1819889334483984, "grad_norm": 0.6189330220222473, "learning_rate": 1.8786740443677345e-05, "loss": 0.1682, "step": 3585 }, { "epoch": 0.18224275343926088, "grad_norm": 1.0096155405044556, "learning_rate": 1.878504831040493e-05, "loss": 0.1538, "step": 3590 }, { "epoch": 0.18249657343012335, "grad_norm": 0.9320985078811646, "learning_rate": 1.8783356177132513e-05, "loss": 0.1963, "step": 3595 }, { "epoch": 0.18275039342098584, "grad_norm": 0.6774333119392395, "learning_rate": 1.8781664043860096e-05, "loss": 0.1907, "step": 3600 }, { "epoch": 0.1830042134118483, "grad_norm": 0.7954360246658325, "learning_rate": 1.877997191058768e-05, "loss": 0.2024, "step": 3605 }, { "epoch": 0.1832580334027108, "grad_norm": 0.5827684998512268, "learning_rate": 1.8778279777315263e-05, "loss": 0.1882, "step": 3610 }, { "epoch": 0.1835118533935733, "grad_norm": 0.9476773738861084, "learning_rate": 1.8776587644042847e-05, "loss": 0.2031, "step": 3615 }, { "epoch": 0.18376567338443575, "grad_norm": 0.5946481227874756, "learning_rate": 1.877489551077043e-05, "loss": 0.1796, "step": 3620 }, { "epoch": 0.18401949337529824, "grad_norm": 0.9748368859291077, "learning_rate": 1.8773203377498014e-05, "loss": 0.1859, "step": 3625 }, { "epoch": 0.1842733133661607, "grad_norm": 0.680027425289154, "learning_rate": 1.8771511244225597e-05, "loss": 0.1949, "step": 3630 }, { "epoch": 0.1845271333570232, "grad_norm": 0.7523426413536072, "learning_rate": 1.876981911095318e-05, "loss": 0.1911, "step": 3635 }, { "epoch": 0.1847809533478857, "grad_norm": 0.6500402092933655, "learning_rate": 1.8768126977680764e-05, "loss": 0.163, "step": 3640 }, { "epoch": 0.18503477333874815, "grad_norm": 0.8817210793495178, "learning_rate": 1.8766434844408348e-05, "loss": 0.1681, "step": 3645 }, { "epoch": 0.18528859332961065, "grad_norm": 0.6788656115531921, "learning_rate": 1.876474271113593e-05, "loss": 0.1521, "step": 3650 }, { "epoch": 0.1855424133204731, "grad_norm": 0.5651068687438965, "learning_rate": 1.8763050577863512e-05, "loss": 0.167, "step": 3655 }, { "epoch": 0.1857962333113356, "grad_norm": 0.6204758286476135, "learning_rate": 1.87613584445911e-05, "loss": 0.1718, "step": 3660 }, { "epoch": 0.1860500533021981, "grad_norm": 0.6937487721443176, "learning_rate": 1.8759666311318682e-05, "loss": 0.2, "step": 3665 }, { "epoch": 0.18630387329306056, "grad_norm": 0.791344165802002, "learning_rate": 1.8757974178046266e-05, "loss": 0.1746, "step": 3670 }, { "epoch": 0.18655769328392305, "grad_norm": 0.7099347114562988, "learning_rate": 1.875628204477385e-05, "loss": 0.1779, "step": 3675 }, { "epoch": 0.1868115132747855, "grad_norm": 0.6113842129707336, "learning_rate": 1.875458991150143e-05, "loss": 0.1821, "step": 3680 }, { "epoch": 0.187065333265648, "grad_norm": 0.6101746559143066, "learning_rate": 1.8752897778229016e-05, "loss": 0.1616, "step": 3685 }, { "epoch": 0.1873191532565105, "grad_norm": 0.7758021354675293, "learning_rate": 1.87512056449566e-05, "loss": 0.1774, "step": 3690 }, { "epoch": 0.18757297324737296, "grad_norm": 0.632546603679657, "learning_rate": 1.874951351168418e-05, "loss": 0.1653, "step": 3695 }, { "epoch": 0.18782679323823545, "grad_norm": 1.211270809173584, "learning_rate": 1.8747821378411767e-05, "loss": 0.1907, "step": 3700 }, { "epoch": 0.18808061322909791, "grad_norm": 0.8178271055221558, "learning_rate": 1.8746129245139347e-05, "loss": 0.1833, "step": 3705 }, { "epoch": 0.1883344332199604, "grad_norm": 0.720481276512146, "learning_rate": 1.8744437111866934e-05, "loss": 0.1865, "step": 3710 }, { "epoch": 0.1885882532108229, "grad_norm": 0.6861073970794678, "learning_rate": 1.8742744978594514e-05, "loss": 0.159, "step": 3715 }, { "epoch": 0.18884207320168536, "grad_norm": 0.8942195773124695, "learning_rate": 1.8741052845322098e-05, "loss": 0.17, "step": 3720 }, { "epoch": 0.18909589319254785, "grad_norm": 0.6733516454696655, "learning_rate": 1.8739360712049685e-05, "loss": 0.2026, "step": 3725 }, { "epoch": 0.18934971318341032, "grad_norm": 0.7968901991844177, "learning_rate": 1.8737668578777265e-05, "loss": 0.1768, "step": 3730 }, { "epoch": 0.1896035331742728, "grad_norm": 0.7362722158432007, "learning_rate": 1.873597644550485e-05, "loss": 0.1793, "step": 3735 }, { "epoch": 0.18985735316513527, "grad_norm": 0.5821983814239502, "learning_rate": 1.8734284312232432e-05, "loss": 0.1935, "step": 3740 }, { "epoch": 0.19011117315599776, "grad_norm": 0.6550444960594177, "learning_rate": 1.8732592178960016e-05, "loss": 0.1815, "step": 3745 }, { "epoch": 0.19036499314686026, "grad_norm": 1.057003140449524, "learning_rate": 1.87309000456876e-05, "loss": 0.1735, "step": 3750 }, { "epoch": 0.19061881313772272, "grad_norm": 0.6318512558937073, "learning_rate": 1.8729207912415183e-05, "loss": 0.1894, "step": 3755 }, { "epoch": 0.1908726331285852, "grad_norm": 0.514367401599884, "learning_rate": 1.8727515779142766e-05, "loss": 0.162, "step": 3760 }, { "epoch": 0.19112645311944768, "grad_norm": 0.5353686809539795, "learning_rate": 1.872582364587035e-05, "loss": 0.1942, "step": 3765 }, { "epoch": 0.19138027311031017, "grad_norm": 0.7195748090744019, "learning_rate": 1.8724131512597933e-05, "loss": 0.1638, "step": 3770 }, { "epoch": 0.19163409310117266, "grad_norm": 0.5870863199234009, "learning_rate": 1.8722439379325517e-05, "loss": 0.1616, "step": 3775 }, { "epoch": 0.19188791309203512, "grad_norm": 0.5601217150688171, "learning_rate": 1.87207472460531e-05, "loss": 0.175, "step": 3780 }, { "epoch": 0.19214173308289761, "grad_norm": 0.6515225768089294, "learning_rate": 1.8719055112780684e-05, "loss": 0.196, "step": 3785 }, { "epoch": 0.19239555307376008, "grad_norm": 0.5685335397720337, "learning_rate": 1.8717362979508267e-05, "loss": 0.173, "step": 3790 }, { "epoch": 0.19264937306462257, "grad_norm": 0.5485351085662842, "learning_rate": 1.871567084623585e-05, "loss": 0.1613, "step": 3795 }, { "epoch": 0.19290319305548506, "grad_norm": 0.5211474299430847, "learning_rate": 1.8713978712963435e-05, "loss": 0.1811, "step": 3800 }, { "epoch": 0.19315701304634753, "grad_norm": 0.5695779323577881, "learning_rate": 1.8712286579691018e-05, "loss": 0.1737, "step": 3805 }, { "epoch": 0.19341083303721002, "grad_norm": 0.781248152256012, "learning_rate": 1.87105944464186e-05, "loss": 0.1474, "step": 3810 }, { "epoch": 0.19366465302807248, "grad_norm": 0.6343421936035156, "learning_rate": 1.8708902313146185e-05, "loss": 0.1597, "step": 3815 }, { "epoch": 0.19391847301893497, "grad_norm": 0.46050354838371277, "learning_rate": 1.870721017987377e-05, "loss": 0.1726, "step": 3820 }, { "epoch": 0.19417229300979746, "grad_norm": 0.7671462297439575, "learning_rate": 1.8705518046601352e-05, "loss": 0.16, "step": 3825 }, { "epoch": 0.19442611300065993, "grad_norm": 0.6018497347831726, "learning_rate": 1.8703825913328936e-05, "loss": 0.17, "step": 3830 }, { "epoch": 0.19467993299152242, "grad_norm": 0.5179815292358398, "learning_rate": 1.870213378005652e-05, "loss": 0.1667, "step": 3835 }, { "epoch": 0.19493375298238488, "grad_norm": 0.6752147674560547, "learning_rate": 1.8700441646784103e-05, "loss": 0.1778, "step": 3840 }, { "epoch": 0.19518757297324738, "grad_norm": 0.5998455286026001, "learning_rate": 1.8698749513511686e-05, "loss": 0.1804, "step": 3845 }, { "epoch": 0.19544139296410987, "grad_norm": 0.7329761981964111, "learning_rate": 1.869705738023927e-05, "loss": 0.1705, "step": 3850 }, { "epoch": 0.19569521295497233, "grad_norm": 0.5798784494400024, "learning_rate": 1.8695365246966853e-05, "loss": 0.1722, "step": 3855 }, { "epoch": 0.19594903294583482, "grad_norm": 0.55069899559021, "learning_rate": 1.8693673113694437e-05, "loss": 0.1677, "step": 3860 }, { "epoch": 0.1962028529366973, "grad_norm": 0.7288419008255005, "learning_rate": 1.869198098042202e-05, "loss": 0.1609, "step": 3865 }, { "epoch": 0.19645667292755978, "grad_norm": 0.5638713836669922, "learning_rate": 1.8690288847149604e-05, "loss": 0.1744, "step": 3870 }, { "epoch": 0.19671049291842224, "grad_norm": 1.066811203956604, "learning_rate": 1.8688596713877188e-05, "loss": 0.1809, "step": 3875 }, { "epoch": 0.19696431290928473, "grad_norm": 0.5710257291793823, "learning_rate": 1.868690458060477e-05, "loss": 0.177, "step": 3880 }, { "epoch": 0.19721813290014722, "grad_norm": 1.2099708318710327, "learning_rate": 1.868521244733235e-05, "loss": 0.1727, "step": 3885 }, { "epoch": 0.1974719528910097, "grad_norm": 0.5482766032218933, "learning_rate": 1.8683520314059938e-05, "loss": 0.1774, "step": 3890 }, { "epoch": 0.19772577288187218, "grad_norm": 0.5520328283309937, "learning_rate": 1.868182818078752e-05, "loss": 0.2038, "step": 3895 }, { "epoch": 0.19797959287273464, "grad_norm": 0.7456613183021545, "learning_rate": 1.8680136047515102e-05, "loss": 0.2103, "step": 3900 }, { "epoch": 0.19823341286359714, "grad_norm": 0.6516144275665283, "learning_rate": 1.867844391424269e-05, "loss": 0.1716, "step": 3905 }, { "epoch": 0.19848723285445963, "grad_norm": 0.6367723345756531, "learning_rate": 1.867675178097027e-05, "loss": 0.1618, "step": 3910 }, { "epoch": 0.1987410528453221, "grad_norm": 0.4886556565761566, "learning_rate": 1.8675059647697856e-05, "loss": 0.1596, "step": 3915 }, { "epoch": 0.19899487283618458, "grad_norm": 0.443977952003479, "learning_rate": 1.8673367514425436e-05, "loss": 0.1847, "step": 3920 }, { "epoch": 0.19924869282704705, "grad_norm": 0.5834622383117676, "learning_rate": 1.867167538115302e-05, "loss": 0.1697, "step": 3925 }, { "epoch": 0.19950251281790954, "grad_norm": 0.6995598673820496, "learning_rate": 1.8669983247880607e-05, "loss": 0.1785, "step": 3930 }, { "epoch": 0.19975633280877203, "grad_norm": 0.5793687105178833, "learning_rate": 1.8668291114608187e-05, "loss": 0.1835, "step": 3935 }, { "epoch": 0.2000101527996345, "grad_norm": 0.6711710691452026, "learning_rate": 1.866659898133577e-05, "loss": 0.1808, "step": 3940 }, { "epoch": 0.20026397279049699, "grad_norm": 0.7024595737457275, "learning_rate": 1.8664906848063354e-05, "loss": 0.1723, "step": 3945 }, { "epoch": 0.20051779278135945, "grad_norm": 0.5017092823982239, "learning_rate": 1.8663214714790937e-05, "loss": 0.1691, "step": 3950 }, { "epoch": 0.20077161277222194, "grad_norm": 0.769305944442749, "learning_rate": 1.8661522581518524e-05, "loss": 0.1612, "step": 3955 }, { "epoch": 0.20102543276308443, "grad_norm": 0.6473913192749023, "learning_rate": 1.8659830448246105e-05, "loss": 0.1788, "step": 3960 }, { "epoch": 0.2012792527539469, "grad_norm": 0.5249276757240295, "learning_rate": 1.8658138314973688e-05, "loss": 0.1654, "step": 3965 }, { "epoch": 0.2015330727448094, "grad_norm": 0.9578689932823181, "learning_rate": 1.865644618170127e-05, "loss": 0.1652, "step": 3970 }, { "epoch": 0.20178689273567185, "grad_norm": 0.580237627029419, "learning_rate": 1.8654754048428855e-05, "loss": 0.1705, "step": 3975 }, { "epoch": 0.20204071272653434, "grad_norm": 0.7080004811286926, "learning_rate": 1.865306191515644e-05, "loss": 0.1527, "step": 3980 }, { "epoch": 0.20229453271739684, "grad_norm": 0.4707263112068176, "learning_rate": 1.8651369781884022e-05, "loss": 0.1525, "step": 3985 }, { "epoch": 0.2025483527082593, "grad_norm": 1.2706327438354492, "learning_rate": 1.8649677648611606e-05, "loss": 0.1963, "step": 3990 }, { "epoch": 0.2028021726991218, "grad_norm": 0.7353994250297546, "learning_rate": 1.864798551533919e-05, "loss": 0.19, "step": 3995 }, { "epoch": 0.20305599268998425, "grad_norm": 0.7245195508003235, "learning_rate": 1.8646293382066773e-05, "loss": 0.1833, "step": 4000 }, { "epoch": 0.20330981268084675, "grad_norm": 0.7345285415649414, "learning_rate": 1.8644601248794356e-05, "loss": 0.175, "step": 4005 }, { "epoch": 0.2035636326717092, "grad_norm": 0.8779308795928955, "learning_rate": 1.864290911552194e-05, "loss": 0.1683, "step": 4010 }, { "epoch": 0.2038174526625717, "grad_norm": 0.49476489424705505, "learning_rate": 1.8641216982249524e-05, "loss": 0.1581, "step": 4015 }, { "epoch": 0.2040712726534342, "grad_norm": 0.5535828471183777, "learning_rate": 1.8639524848977107e-05, "loss": 0.1901, "step": 4020 }, { "epoch": 0.20432509264429666, "grad_norm": 0.5144571661949158, "learning_rate": 1.863783271570469e-05, "loss": 0.1598, "step": 4025 }, { "epoch": 0.20457891263515915, "grad_norm": 0.666415274143219, "learning_rate": 1.8636140582432274e-05, "loss": 0.1713, "step": 4030 }, { "epoch": 0.2048327326260216, "grad_norm": 0.5483223795890808, "learning_rate": 1.8634448449159858e-05, "loss": 0.1685, "step": 4035 }, { "epoch": 0.2050865526168841, "grad_norm": 1.4303494691848755, "learning_rate": 1.863275631588744e-05, "loss": 0.1635, "step": 4040 }, { "epoch": 0.2053403726077466, "grad_norm": 0.8545394539833069, "learning_rate": 1.8631064182615025e-05, "loss": 0.1647, "step": 4045 }, { "epoch": 0.20559419259860906, "grad_norm": 1.2137486934661865, "learning_rate": 1.862937204934261e-05, "loss": 0.1817, "step": 4050 }, { "epoch": 0.20584801258947155, "grad_norm": 0.6643612384796143, "learning_rate": 1.8627679916070192e-05, "loss": 0.1677, "step": 4055 }, { "epoch": 0.20610183258033402, "grad_norm": 0.6444915533065796, "learning_rate": 1.8625987782797775e-05, "loss": 0.1735, "step": 4060 }, { "epoch": 0.2063556525711965, "grad_norm": 0.591526985168457, "learning_rate": 1.862429564952536e-05, "loss": 0.1719, "step": 4065 }, { "epoch": 0.206609472562059, "grad_norm": 0.5368006825447083, "learning_rate": 1.8622603516252943e-05, "loss": 0.1468, "step": 4070 }, { "epoch": 0.20686329255292146, "grad_norm": 0.589856743812561, "learning_rate": 1.8620911382980523e-05, "loss": 0.1637, "step": 4075 }, { "epoch": 0.20711711254378395, "grad_norm": 0.707324206829071, "learning_rate": 1.861921924970811e-05, "loss": 0.1785, "step": 4080 }, { "epoch": 0.20737093253464642, "grad_norm": 0.6533371210098267, "learning_rate": 1.8617527116435693e-05, "loss": 0.1815, "step": 4085 }, { "epoch": 0.2076247525255089, "grad_norm": 1.014189600944519, "learning_rate": 1.8615834983163273e-05, "loss": 0.1684, "step": 4090 }, { "epoch": 0.2078785725163714, "grad_norm": 0.4793491065502167, "learning_rate": 1.861414284989086e-05, "loss": 0.1621, "step": 4095 }, { "epoch": 0.20813239250723387, "grad_norm": 0.7456510663032532, "learning_rate": 1.861245071661844e-05, "loss": 0.1836, "step": 4100 }, { "epoch": 0.20838621249809636, "grad_norm": 0.706316351890564, "learning_rate": 1.8610758583346027e-05, "loss": 0.176, "step": 4105 }, { "epoch": 0.20864003248895882, "grad_norm": 1.0256385803222656, "learning_rate": 1.860906645007361e-05, "loss": 0.1866, "step": 4110 }, { "epoch": 0.2088938524798213, "grad_norm": 0.5895639061927795, "learning_rate": 1.860737431680119e-05, "loss": 0.1542, "step": 4115 }, { "epoch": 0.2091476724706838, "grad_norm": 0.6485773324966431, "learning_rate": 1.8605682183528778e-05, "loss": 0.1585, "step": 4120 }, { "epoch": 0.20940149246154627, "grad_norm": 0.5658993124961853, "learning_rate": 1.8603990050256358e-05, "loss": 0.189, "step": 4125 }, { "epoch": 0.20965531245240876, "grad_norm": 0.5852290987968445, "learning_rate": 1.860229791698394e-05, "loss": 0.164, "step": 4130 }, { "epoch": 0.20990913244327122, "grad_norm": 0.5155125856399536, "learning_rate": 1.860060578371153e-05, "loss": 0.1706, "step": 4135 }, { "epoch": 0.21016295243413372, "grad_norm": 0.5006566047668457, "learning_rate": 1.859891365043911e-05, "loss": 0.158, "step": 4140 }, { "epoch": 0.21041677242499618, "grad_norm": 0.8291502594947815, "learning_rate": 1.8597221517166692e-05, "loss": 0.1732, "step": 4145 }, { "epoch": 0.21067059241585867, "grad_norm": 0.6317897439002991, "learning_rate": 1.8595529383894276e-05, "loss": 0.1629, "step": 4150 }, { "epoch": 0.21092441240672116, "grad_norm": 0.8886175155639648, "learning_rate": 1.859383725062186e-05, "loss": 0.162, "step": 4155 }, { "epoch": 0.21117823239758363, "grad_norm": 1.5014044046401978, "learning_rate": 1.8592145117349446e-05, "loss": 0.1696, "step": 4160 }, { "epoch": 0.21143205238844612, "grad_norm": 0.6006774306297302, "learning_rate": 1.8590452984077026e-05, "loss": 0.1706, "step": 4165 }, { "epoch": 0.21168587237930858, "grad_norm": 0.5629169940948486, "learning_rate": 1.858876085080461e-05, "loss": 0.1868, "step": 4170 }, { "epoch": 0.21193969237017107, "grad_norm": 0.5821768641471863, "learning_rate": 1.8587068717532194e-05, "loss": 0.1612, "step": 4175 }, { "epoch": 0.21219351236103356, "grad_norm": 0.4641706645488739, "learning_rate": 1.8585376584259777e-05, "loss": 0.1573, "step": 4180 }, { "epoch": 0.21244733235189603, "grad_norm": 0.6541835069656372, "learning_rate": 1.858368445098736e-05, "loss": 0.1757, "step": 4185 }, { "epoch": 0.21270115234275852, "grad_norm": 0.6074985861778259, "learning_rate": 1.8581992317714944e-05, "loss": 0.1877, "step": 4190 }, { "epoch": 0.21295497233362098, "grad_norm": 0.63148033618927, "learning_rate": 1.8580300184442528e-05, "loss": 0.1815, "step": 4195 }, { "epoch": 0.21320879232448348, "grad_norm": 0.7583580017089844, "learning_rate": 1.857860805117011e-05, "loss": 0.1703, "step": 4200 }, { "epoch": 0.21346261231534597, "grad_norm": 0.737652599811554, "learning_rate": 1.8576915917897695e-05, "loss": 0.1595, "step": 4205 }, { "epoch": 0.21371643230620843, "grad_norm": 0.6646580696105957, "learning_rate": 1.857522378462528e-05, "loss": 0.1653, "step": 4210 }, { "epoch": 0.21397025229707092, "grad_norm": 1.0683635473251343, "learning_rate": 1.8573531651352862e-05, "loss": 0.1656, "step": 4215 }, { "epoch": 0.2142240722879334, "grad_norm": 0.6982542276382446, "learning_rate": 1.8571839518080445e-05, "loss": 0.1861, "step": 4220 }, { "epoch": 0.21447789227879588, "grad_norm": 0.6811363697052002, "learning_rate": 1.857014738480803e-05, "loss": 0.1652, "step": 4225 }, { "epoch": 0.21473171226965837, "grad_norm": 0.43406397104263306, "learning_rate": 1.8568455251535613e-05, "loss": 0.1683, "step": 4230 }, { "epoch": 0.21498553226052083, "grad_norm": 0.4632825255393982, "learning_rate": 1.8566763118263196e-05, "loss": 0.1428, "step": 4235 }, { "epoch": 0.21523935225138333, "grad_norm": 0.6871059536933899, "learning_rate": 1.856507098499078e-05, "loss": 0.1557, "step": 4240 }, { "epoch": 0.2154931722422458, "grad_norm": 0.4901556968688965, "learning_rate": 1.8563378851718363e-05, "loss": 0.1651, "step": 4245 }, { "epoch": 0.21574699223310828, "grad_norm": 0.5677388310432434, "learning_rate": 1.8561686718445947e-05, "loss": 0.1552, "step": 4250 }, { "epoch": 0.21600081222397077, "grad_norm": 0.7926852107048035, "learning_rate": 1.855999458517353e-05, "loss": 0.1605, "step": 4255 }, { "epoch": 0.21625463221483324, "grad_norm": 0.69388347864151, "learning_rate": 1.8558302451901114e-05, "loss": 0.1773, "step": 4260 }, { "epoch": 0.21650845220569573, "grad_norm": 0.6975191235542297, "learning_rate": 1.8556610318628697e-05, "loss": 0.1646, "step": 4265 }, { "epoch": 0.2167622721965582, "grad_norm": 0.5928018689155579, "learning_rate": 1.855491818535628e-05, "loss": 0.1664, "step": 4270 }, { "epoch": 0.21701609218742068, "grad_norm": 0.7675602436065674, "learning_rate": 1.8553226052083864e-05, "loss": 0.1727, "step": 4275 }, { "epoch": 0.21726991217828315, "grad_norm": 1.1031657457351685, "learning_rate": 1.8551533918811445e-05, "loss": 0.1664, "step": 4280 }, { "epoch": 0.21752373216914564, "grad_norm": 0.46347787976264954, "learning_rate": 1.854984178553903e-05, "loss": 0.1781, "step": 4285 }, { "epoch": 0.21777755216000813, "grad_norm": 0.8614928126335144, "learning_rate": 1.8548149652266615e-05, "loss": 0.1721, "step": 4290 }, { "epoch": 0.2180313721508706, "grad_norm": 0.49578431248664856, "learning_rate": 1.8546457518994195e-05, "loss": 0.1724, "step": 4295 }, { "epoch": 0.2182851921417331, "grad_norm": 0.6700772643089294, "learning_rate": 1.8544765385721782e-05, "loss": 0.1681, "step": 4300 }, { "epoch": 0.21853901213259555, "grad_norm": 0.6420726180076599, "learning_rate": 1.8543073252449362e-05, "loss": 0.185, "step": 4305 }, { "epoch": 0.21879283212345804, "grad_norm": 0.6904594302177429, "learning_rate": 1.854138111917695e-05, "loss": 0.1689, "step": 4310 }, { "epoch": 0.21904665211432053, "grad_norm": 0.5921376347541809, "learning_rate": 1.8539688985904533e-05, "loss": 0.1651, "step": 4315 }, { "epoch": 0.219300472105183, "grad_norm": 0.7793440818786621, "learning_rate": 1.8537996852632113e-05, "loss": 0.1718, "step": 4320 }, { "epoch": 0.2195542920960455, "grad_norm": 0.42908427119255066, "learning_rate": 1.85363047193597e-05, "loss": 0.1639, "step": 4325 }, { "epoch": 0.21980811208690795, "grad_norm": 0.6529392600059509, "learning_rate": 1.853461258608728e-05, "loss": 0.1845, "step": 4330 }, { "epoch": 0.22006193207777044, "grad_norm": 0.4588109254837036, "learning_rate": 1.8532920452814864e-05, "loss": 0.1653, "step": 4335 }, { "epoch": 0.22031575206863294, "grad_norm": 0.5849223136901855, "learning_rate": 1.853122831954245e-05, "loss": 0.164, "step": 4340 }, { "epoch": 0.2205695720594954, "grad_norm": 0.7719104290008545, "learning_rate": 1.852953618627003e-05, "loss": 0.1663, "step": 4345 }, { "epoch": 0.2208233920503579, "grad_norm": 0.5275380611419678, "learning_rate": 1.8527844052997618e-05, "loss": 0.1622, "step": 4350 }, { "epoch": 0.22107721204122036, "grad_norm": 1.1291331052780151, "learning_rate": 1.8526151919725198e-05, "loss": 0.1736, "step": 4355 }, { "epoch": 0.22133103203208285, "grad_norm": 0.7423467636108398, "learning_rate": 1.852445978645278e-05, "loss": 0.1594, "step": 4360 }, { "epoch": 0.22158485202294534, "grad_norm": 0.6104901432991028, "learning_rate": 1.8522767653180368e-05, "loss": 0.1613, "step": 4365 }, { "epoch": 0.2218386720138078, "grad_norm": 0.6495379209518433, "learning_rate": 1.852107551990795e-05, "loss": 0.1595, "step": 4370 }, { "epoch": 0.2220924920046703, "grad_norm": 0.5626767873764038, "learning_rate": 1.8519383386635532e-05, "loss": 0.1716, "step": 4375 }, { "epoch": 0.22234631199553276, "grad_norm": 0.5380212664604187, "learning_rate": 1.8517691253363116e-05, "loss": 0.1745, "step": 4380 }, { "epoch": 0.22260013198639525, "grad_norm": 0.5656577348709106, "learning_rate": 1.85159991200907e-05, "loss": 0.1812, "step": 4385 }, { "epoch": 0.22285395197725774, "grad_norm": 0.5490018129348755, "learning_rate": 1.8514306986818283e-05, "loss": 0.1724, "step": 4390 }, { "epoch": 0.2231077719681202, "grad_norm": 0.563014805316925, "learning_rate": 1.8512614853545866e-05, "loss": 0.1547, "step": 4395 }, { "epoch": 0.2233615919589827, "grad_norm": 0.6768389344215393, "learning_rate": 1.851092272027345e-05, "loss": 0.1643, "step": 4400 }, { "epoch": 0.22361541194984516, "grad_norm": 0.9782013893127441, "learning_rate": 1.8509230587001033e-05, "loss": 0.1556, "step": 4405 }, { "epoch": 0.22386923194070765, "grad_norm": 0.6855179071426392, "learning_rate": 1.8507538453728617e-05, "loss": 0.167, "step": 4410 }, { "epoch": 0.22412305193157014, "grad_norm": 0.9513364434242249, "learning_rate": 1.85058463204562e-05, "loss": 0.1615, "step": 4415 }, { "epoch": 0.2243768719224326, "grad_norm": 0.5813558101654053, "learning_rate": 1.8504154187183784e-05, "loss": 0.1509, "step": 4420 }, { "epoch": 0.2246306919132951, "grad_norm": 0.8219256401062012, "learning_rate": 1.8502462053911367e-05, "loss": 0.1616, "step": 4425 }, { "epoch": 0.22488451190415756, "grad_norm": 0.4321226477622986, "learning_rate": 1.850076992063895e-05, "loss": 0.155, "step": 4430 }, { "epoch": 0.22513833189502006, "grad_norm": 0.6410679817199707, "learning_rate": 1.8499077787366534e-05, "loss": 0.1658, "step": 4435 }, { "epoch": 0.22539215188588252, "grad_norm": 0.6968072056770325, "learning_rate": 1.8497385654094118e-05, "loss": 0.1574, "step": 4440 }, { "epoch": 0.225645971876745, "grad_norm": 0.8063611388206482, "learning_rate": 1.84956935208217e-05, "loss": 0.149, "step": 4445 }, { "epoch": 0.2258997918676075, "grad_norm": 0.5573265552520752, "learning_rate": 1.8494001387549285e-05, "loss": 0.1624, "step": 4450 }, { "epoch": 0.22615361185846997, "grad_norm": 0.6173406839370728, "learning_rate": 1.849230925427687e-05, "loss": 0.1539, "step": 4455 }, { "epoch": 0.22640743184933246, "grad_norm": 0.46205034852027893, "learning_rate": 1.8490617121004452e-05, "loss": 0.1546, "step": 4460 }, { "epoch": 0.22666125184019492, "grad_norm": 0.5302807688713074, "learning_rate": 1.8488924987732036e-05, "loss": 0.1712, "step": 4465 }, { "epoch": 0.2269150718310574, "grad_norm": 0.6777194738388062, "learning_rate": 1.848723285445962e-05, "loss": 0.1613, "step": 4470 }, { "epoch": 0.2271688918219199, "grad_norm": 0.4836040735244751, "learning_rate": 1.8485540721187203e-05, "loss": 0.166, "step": 4475 }, { "epoch": 0.22742271181278237, "grad_norm": 0.7450656890869141, "learning_rate": 1.8483848587914786e-05, "loss": 0.1776, "step": 4480 }, { "epoch": 0.22767653180364486, "grad_norm": 1.0024539232254028, "learning_rate": 1.8482156454642367e-05, "loss": 0.1649, "step": 4485 }, { "epoch": 0.22793035179450732, "grad_norm": 0.8160313963890076, "learning_rate": 1.8480464321369953e-05, "loss": 0.1389, "step": 4490 }, { "epoch": 0.22818417178536982, "grad_norm": 0.495733380317688, "learning_rate": 1.8478772188097537e-05, "loss": 0.1434, "step": 4495 }, { "epoch": 0.2284379917762323, "grad_norm": 0.6479185819625854, "learning_rate": 1.847708005482512e-05, "loss": 0.1492, "step": 4500 }, { "epoch": 0.22869181176709477, "grad_norm": 0.6237260103225708, "learning_rate": 1.8475387921552704e-05, "loss": 0.1692, "step": 4505 }, { "epoch": 0.22894563175795726, "grad_norm": 0.5665149688720703, "learning_rate": 1.8473695788280284e-05, "loss": 0.1783, "step": 4510 }, { "epoch": 0.22919945174881973, "grad_norm": 0.7720448970794678, "learning_rate": 1.847200365500787e-05, "loss": 0.1766, "step": 4515 }, { "epoch": 0.22945327173968222, "grad_norm": 0.6933048963546753, "learning_rate": 1.8470311521735455e-05, "loss": 0.1655, "step": 4520 }, { "epoch": 0.2297070917305447, "grad_norm": 0.7732922434806824, "learning_rate": 1.8468619388463035e-05, "loss": 0.1609, "step": 4525 }, { "epoch": 0.22996091172140717, "grad_norm": 0.9285184741020203, "learning_rate": 1.8466927255190622e-05, "loss": 0.1476, "step": 4530 }, { "epoch": 0.23021473171226967, "grad_norm": 0.5524913668632507, "learning_rate": 1.8465235121918202e-05, "loss": 0.1541, "step": 4535 }, { "epoch": 0.23046855170313213, "grad_norm": 0.5339060425758362, "learning_rate": 1.8463542988645786e-05, "loss": 0.1802, "step": 4540 }, { "epoch": 0.23072237169399462, "grad_norm": 0.5357369184494019, "learning_rate": 1.8461850855373372e-05, "loss": 0.162, "step": 4545 }, { "epoch": 0.2309761916848571, "grad_norm": 0.6212167739868164, "learning_rate": 1.8460158722100953e-05, "loss": 0.1612, "step": 4550 }, { "epoch": 0.23123001167571958, "grad_norm": 0.5287737250328064, "learning_rate": 1.845846658882854e-05, "loss": 0.1445, "step": 4555 }, { "epoch": 0.23148383166658207, "grad_norm": 0.5808826088905334, "learning_rate": 1.845677445555612e-05, "loss": 0.1443, "step": 4560 }, { "epoch": 0.23173765165744453, "grad_norm": 0.47897592186927795, "learning_rate": 1.8455082322283703e-05, "loss": 0.1503, "step": 4565 }, { "epoch": 0.23199147164830702, "grad_norm": 0.7037847638130188, "learning_rate": 1.845339018901129e-05, "loss": 0.1676, "step": 4570 }, { "epoch": 0.2322452916391695, "grad_norm": 0.4233403503894806, "learning_rate": 1.845169805573887e-05, "loss": 0.1459, "step": 4575 }, { "epoch": 0.23249911163003198, "grad_norm": 1.0100395679473877, "learning_rate": 1.8450005922466454e-05, "loss": 0.1742, "step": 4580 }, { "epoch": 0.23275293162089447, "grad_norm": 0.6140120029449463, "learning_rate": 1.8448313789194037e-05, "loss": 0.1439, "step": 4585 }, { "epoch": 0.23300675161175693, "grad_norm": 0.5431662201881409, "learning_rate": 1.844662165592162e-05, "loss": 0.1444, "step": 4590 }, { "epoch": 0.23326057160261943, "grad_norm": 0.7064282894134521, "learning_rate": 1.8444929522649208e-05, "loss": 0.1598, "step": 4595 }, { "epoch": 0.2335143915934819, "grad_norm": 0.5530514717102051, "learning_rate": 1.8443237389376788e-05, "loss": 0.1514, "step": 4600 }, { "epoch": 0.23376821158434438, "grad_norm": 0.5173631906509399, "learning_rate": 1.844154525610437e-05, "loss": 0.1581, "step": 4605 }, { "epoch": 0.23402203157520687, "grad_norm": 0.5507360100746155, "learning_rate": 1.8439853122831955e-05, "loss": 0.1555, "step": 4610 }, { "epoch": 0.23427585156606934, "grad_norm": 0.6322073340415955, "learning_rate": 1.843816098955954e-05, "loss": 0.1734, "step": 4615 }, { "epoch": 0.23452967155693183, "grad_norm": 0.6972060799598694, "learning_rate": 1.8436468856287122e-05, "loss": 0.1554, "step": 4620 }, { "epoch": 0.2347834915477943, "grad_norm": 0.4970358610153198, "learning_rate": 1.8434776723014706e-05, "loss": 0.1679, "step": 4625 }, { "epoch": 0.23503731153865678, "grad_norm": 0.5872799158096313, "learning_rate": 1.843308458974229e-05, "loss": 0.1489, "step": 4630 }, { "epoch": 0.23529113152951928, "grad_norm": 1.1778312921524048, "learning_rate": 1.8431392456469873e-05, "loss": 0.1502, "step": 4635 }, { "epoch": 0.23554495152038174, "grad_norm": 0.5631628036499023, "learning_rate": 1.8429700323197456e-05, "loss": 0.1709, "step": 4640 }, { "epoch": 0.23579877151124423, "grad_norm": 0.7208503484725952, "learning_rate": 1.842800818992504e-05, "loss": 0.1591, "step": 4645 }, { "epoch": 0.2360525915021067, "grad_norm": 0.5835586786270142, "learning_rate": 1.8426316056652624e-05, "loss": 0.16, "step": 4650 }, { "epoch": 0.2363064114929692, "grad_norm": 0.5638494491577148, "learning_rate": 1.8424623923380207e-05, "loss": 0.1534, "step": 4655 }, { "epoch": 0.23656023148383168, "grad_norm": 0.5742624998092651, "learning_rate": 1.842293179010779e-05, "loss": 0.1587, "step": 4660 }, { "epoch": 0.23681405147469414, "grad_norm": 1.1678133010864258, "learning_rate": 1.8421239656835374e-05, "loss": 0.1577, "step": 4665 }, { "epoch": 0.23706787146555663, "grad_norm": 0.5317492485046387, "learning_rate": 1.8419547523562958e-05, "loss": 0.1635, "step": 4670 }, { "epoch": 0.2373216914564191, "grad_norm": 0.5751121640205383, "learning_rate": 1.841785539029054e-05, "loss": 0.1619, "step": 4675 }, { "epoch": 0.2375755114472816, "grad_norm": 0.5913323163986206, "learning_rate": 1.8416163257018125e-05, "loss": 0.1438, "step": 4680 }, { "epoch": 0.23782933143814408, "grad_norm": 0.43694448471069336, "learning_rate": 1.841447112374571e-05, "loss": 0.1558, "step": 4685 }, { "epoch": 0.23808315142900655, "grad_norm": 0.580193042755127, "learning_rate": 1.841277899047329e-05, "loss": 0.1544, "step": 4690 }, { "epoch": 0.23833697141986904, "grad_norm": 0.5924519896507263, "learning_rate": 1.8411086857200875e-05, "loss": 0.1766, "step": 4695 }, { "epoch": 0.2385907914107315, "grad_norm": 0.5101874470710754, "learning_rate": 1.840939472392846e-05, "loss": 0.149, "step": 4700 }, { "epoch": 0.238844611401594, "grad_norm": 0.5736780762672424, "learning_rate": 1.8407702590656043e-05, "loss": 0.1663, "step": 4705 }, { "epoch": 0.23909843139245646, "grad_norm": 0.4370191693305969, "learning_rate": 1.8406010457383626e-05, "loss": 0.1512, "step": 4710 }, { "epoch": 0.23935225138331895, "grad_norm": 0.545307993888855, "learning_rate": 1.8404318324111206e-05, "loss": 0.1537, "step": 4715 }, { "epoch": 0.23960607137418144, "grad_norm": 0.680793285369873, "learning_rate": 1.8402626190838793e-05, "loss": 0.1798, "step": 4720 }, { "epoch": 0.2398598913650439, "grad_norm": 0.7056201100349426, "learning_rate": 1.8400934057566377e-05, "loss": 0.1549, "step": 4725 }, { "epoch": 0.2401137113559064, "grad_norm": 0.4889606535434723, "learning_rate": 1.8399241924293957e-05, "loss": 0.1701, "step": 4730 }, { "epoch": 0.24036753134676886, "grad_norm": 0.5846245884895325, "learning_rate": 1.8397549791021544e-05, "loss": 0.1456, "step": 4735 }, { "epoch": 0.24062135133763135, "grad_norm": 0.6689904928207397, "learning_rate": 1.8395857657749124e-05, "loss": 0.1648, "step": 4740 }, { "epoch": 0.24087517132849384, "grad_norm": 0.5623743534088135, "learning_rate": 1.839416552447671e-05, "loss": 0.1538, "step": 4745 }, { "epoch": 0.2411289913193563, "grad_norm": 0.8147268295288086, "learning_rate": 1.8392473391204294e-05, "loss": 0.1594, "step": 4750 }, { "epoch": 0.2413828113102188, "grad_norm": 0.49249938130378723, "learning_rate": 1.8390781257931875e-05, "loss": 0.1728, "step": 4755 }, { "epoch": 0.24163663130108126, "grad_norm": 0.4992578327655792, "learning_rate": 1.838908912465946e-05, "loss": 0.1593, "step": 4760 }, { "epoch": 0.24189045129194375, "grad_norm": 0.5691167712211609, "learning_rate": 1.838739699138704e-05, "loss": 0.1537, "step": 4765 }, { "epoch": 0.24214427128280624, "grad_norm": 0.5181019902229309, "learning_rate": 1.8385704858114625e-05, "loss": 0.148, "step": 4770 }, { "epoch": 0.2423980912736687, "grad_norm": 0.4930359423160553, "learning_rate": 1.8384012724842212e-05, "loss": 0.1518, "step": 4775 }, { "epoch": 0.2426519112645312, "grad_norm": 0.6070294380187988, "learning_rate": 1.8382320591569792e-05, "loss": 0.1812, "step": 4780 }, { "epoch": 0.24290573125539366, "grad_norm": 0.6835089921951294, "learning_rate": 1.8380628458297376e-05, "loss": 0.1767, "step": 4785 }, { "epoch": 0.24315955124625616, "grad_norm": 0.7003611326217651, "learning_rate": 1.837893632502496e-05, "loss": 0.1476, "step": 4790 }, { "epoch": 0.24341337123711865, "grad_norm": 0.4410546123981476, "learning_rate": 1.8377244191752543e-05, "loss": 0.1493, "step": 4795 }, { "epoch": 0.2436671912279811, "grad_norm": 0.6904603242874146, "learning_rate": 1.837555205848013e-05, "loss": 0.1473, "step": 4800 }, { "epoch": 0.2439210112188436, "grad_norm": 0.5898627638816833, "learning_rate": 1.837385992520771e-05, "loss": 0.1576, "step": 4805 }, { "epoch": 0.24417483120970607, "grad_norm": 0.5015605092048645, "learning_rate": 1.8372167791935294e-05, "loss": 0.1438, "step": 4810 }, { "epoch": 0.24442865120056856, "grad_norm": 0.7323523163795471, "learning_rate": 1.8370475658662877e-05, "loss": 0.1609, "step": 4815 }, { "epoch": 0.24468247119143105, "grad_norm": 0.5204626321792603, "learning_rate": 1.836878352539046e-05, "loss": 0.169, "step": 4820 }, { "epoch": 0.24493629118229351, "grad_norm": 0.5830612182617188, "learning_rate": 1.8367091392118044e-05, "loss": 0.1387, "step": 4825 }, { "epoch": 0.245190111173156, "grad_norm": 0.45259636640548706, "learning_rate": 1.8365399258845628e-05, "loss": 0.1599, "step": 4830 }, { "epoch": 0.24544393116401847, "grad_norm": 0.6329886317253113, "learning_rate": 1.836370712557321e-05, "loss": 0.1701, "step": 4835 }, { "epoch": 0.24569775115488096, "grad_norm": 0.4662386476993561, "learning_rate": 1.8362014992300795e-05, "loss": 0.1307, "step": 4840 }, { "epoch": 0.24595157114574343, "grad_norm": 0.5723447799682617, "learning_rate": 1.836032285902838e-05, "loss": 0.1635, "step": 4845 }, { "epoch": 0.24620539113660592, "grad_norm": 0.9412028789520264, "learning_rate": 1.8358630725755962e-05, "loss": 0.1704, "step": 4850 }, { "epoch": 0.2464592111274684, "grad_norm": 0.6017778515815735, "learning_rate": 1.8356938592483545e-05, "loss": 0.1658, "step": 4855 }, { "epoch": 0.24671303111833087, "grad_norm": 0.45639511942863464, "learning_rate": 1.835524645921113e-05, "loss": 0.1519, "step": 4860 }, { "epoch": 0.24696685110919336, "grad_norm": 0.5620295405387878, "learning_rate": 1.8353554325938713e-05, "loss": 0.1727, "step": 4865 }, { "epoch": 0.24722067110005583, "grad_norm": 0.5075967907905579, "learning_rate": 1.8351862192666296e-05, "loss": 0.1656, "step": 4870 }, { "epoch": 0.24747449109091832, "grad_norm": 0.7711220383644104, "learning_rate": 1.835017005939388e-05, "loss": 0.1705, "step": 4875 }, { "epoch": 0.2477283110817808, "grad_norm": 0.5555239319801331, "learning_rate": 1.8348477926121463e-05, "loss": 0.1239, "step": 4880 }, { "epoch": 0.24798213107264327, "grad_norm": 0.4874543845653534, "learning_rate": 1.8346785792849047e-05, "loss": 0.1596, "step": 4885 }, { "epoch": 0.24823595106350577, "grad_norm": 0.49628138542175293, "learning_rate": 1.834509365957663e-05, "loss": 0.142, "step": 4890 }, { "epoch": 0.24848977105436823, "grad_norm": 0.6575145125389099, "learning_rate": 1.8343401526304214e-05, "loss": 0.1602, "step": 4895 }, { "epoch": 0.24874359104523072, "grad_norm": 0.4418342709541321, "learning_rate": 1.8341709393031797e-05, "loss": 0.1574, "step": 4900 }, { "epoch": 0.2489974110360932, "grad_norm": 0.7797775864601135, "learning_rate": 1.834001725975938e-05, "loss": 0.1718, "step": 4905 }, { "epoch": 0.24925123102695568, "grad_norm": 0.87046879529953, "learning_rate": 1.8338325126486964e-05, "loss": 0.1667, "step": 4910 }, { "epoch": 0.24950505101781817, "grad_norm": 0.47170791029930115, "learning_rate": 1.8336632993214548e-05, "loss": 0.1376, "step": 4915 }, { "epoch": 0.24975887100868063, "grad_norm": 0.6200122833251953, "learning_rate": 1.8334940859942128e-05, "loss": 0.1674, "step": 4920 }, { "epoch": 0.2500126909995431, "grad_norm": 0.7150362730026245, "learning_rate": 1.8333248726669715e-05, "loss": 0.1697, "step": 4925 }, { "epoch": 0.2502665109904056, "grad_norm": 0.9218109846115112, "learning_rate": 1.83315565933973e-05, "loss": 0.1693, "step": 4930 }, { "epoch": 0.2505203309812681, "grad_norm": 0.5581763386726379, "learning_rate": 1.832986446012488e-05, "loss": 0.1584, "step": 4935 }, { "epoch": 0.25077415097213057, "grad_norm": 0.610871434211731, "learning_rate": 1.8328172326852466e-05, "loss": 0.1564, "step": 4940 }, { "epoch": 0.25102797096299306, "grad_norm": 0.5767541527748108, "learning_rate": 1.8326480193580046e-05, "loss": 0.1501, "step": 4945 }, { "epoch": 0.2512817909538555, "grad_norm": 0.5301868319511414, "learning_rate": 1.8324788060307633e-05, "loss": 0.1444, "step": 4950 }, { "epoch": 0.251535610944718, "grad_norm": 0.4195795953273773, "learning_rate": 1.8323095927035216e-05, "loss": 0.1529, "step": 4955 }, { "epoch": 0.2517894309355805, "grad_norm": 0.6302689909934998, "learning_rate": 1.8321403793762797e-05, "loss": 0.1554, "step": 4960 }, { "epoch": 0.252043250926443, "grad_norm": 0.8117844462394714, "learning_rate": 1.8319711660490383e-05, "loss": 0.1598, "step": 4965 }, { "epoch": 0.25229707091730547, "grad_norm": 0.6213078498840332, "learning_rate": 1.8318019527217964e-05, "loss": 0.1598, "step": 4970 }, { "epoch": 0.2525508909081679, "grad_norm": 0.5293858647346497, "learning_rate": 1.8316327393945547e-05, "loss": 0.144, "step": 4975 }, { "epoch": 0.2528047108990304, "grad_norm": 0.5613870620727539, "learning_rate": 1.8314635260673134e-05, "loss": 0.1496, "step": 4980 }, { "epoch": 0.2530585308898929, "grad_norm": 0.9666538834571838, "learning_rate": 1.8312943127400714e-05, "loss": 0.1718, "step": 4985 }, { "epoch": 0.2533123508807554, "grad_norm": 0.6101076006889343, "learning_rate": 1.83112509941283e-05, "loss": 0.1444, "step": 4990 }, { "epoch": 0.25356617087161787, "grad_norm": 0.5413176417350769, "learning_rate": 1.830955886085588e-05, "loss": 0.1378, "step": 4995 }, { "epoch": 0.2538199908624803, "grad_norm": 0.731587827205658, "learning_rate": 1.8307866727583465e-05, "loss": 0.1523, "step": 5000 }, { "epoch": 0.2540738108533428, "grad_norm": 0.6076200008392334, "learning_rate": 1.8306174594311052e-05, "loss": 0.1298, "step": 5005 }, { "epoch": 0.2543276308442053, "grad_norm": 0.4898158013820648, "learning_rate": 1.8304482461038632e-05, "loss": 0.1509, "step": 5010 }, { "epoch": 0.2545814508350678, "grad_norm": 0.6534063816070557, "learning_rate": 1.8302790327766215e-05, "loss": 0.1562, "step": 5015 }, { "epoch": 0.25483527082593027, "grad_norm": 0.8010299801826477, "learning_rate": 1.83010981944938e-05, "loss": 0.1422, "step": 5020 }, { "epoch": 0.2550890908167927, "grad_norm": 0.6017109751701355, "learning_rate": 1.8299406061221383e-05, "loss": 0.1592, "step": 5025 }, { "epoch": 0.2553429108076552, "grad_norm": 0.579904317855835, "learning_rate": 1.8297713927948966e-05, "loss": 0.1567, "step": 5030 }, { "epoch": 0.2555967307985177, "grad_norm": 0.94927978515625, "learning_rate": 1.829602179467655e-05, "loss": 0.1479, "step": 5035 }, { "epoch": 0.2558505507893802, "grad_norm": 0.5884600877761841, "learning_rate": 1.8294329661404133e-05, "loss": 0.1592, "step": 5040 }, { "epoch": 0.2561043707802427, "grad_norm": 0.7687221169471741, "learning_rate": 1.8292637528131717e-05, "loss": 0.1514, "step": 5045 }, { "epoch": 0.2563581907711051, "grad_norm": 0.7298230528831482, "learning_rate": 1.82909453948593e-05, "loss": 0.1746, "step": 5050 }, { "epoch": 0.2566120107619676, "grad_norm": 0.8364652395248413, "learning_rate": 1.8289253261586884e-05, "loss": 0.145, "step": 5055 }, { "epoch": 0.2568658307528301, "grad_norm": 0.799123227596283, "learning_rate": 1.8287561128314467e-05, "loss": 0.1438, "step": 5060 }, { "epoch": 0.2571196507436926, "grad_norm": 0.7807921767234802, "learning_rate": 1.828586899504205e-05, "loss": 0.1375, "step": 5065 }, { "epoch": 0.2573734707345551, "grad_norm": 0.4554001986980438, "learning_rate": 1.8284176861769634e-05, "loss": 0.1586, "step": 5070 }, { "epoch": 0.2576272907254175, "grad_norm": 0.5187448263168335, "learning_rate": 1.8282484728497218e-05, "loss": 0.1526, "step": 5075 }, { "epoch": 0.25788111071628, "grad_norm": 0.5898265242576599, "learning_rate": 1.82807925952248e-05, "loss": 0.1523, "step": 5080 }, { "epoch": 0.2581349307071425, "grad_norm": 0.4496493637561798, "learning_rate": 1.8279100461952385e-05, "loss": 0.1583, "step": 5085 }, { "epoch": 0.258388750698005, "grad_norm": 0.5907924771308899, "learning_rate": 1.827740832867997e-05, "loss": 0.1697, "step": 5090 }, { "epoch": 0.2586425706888675, "grad_norm": 0.5844322443008423, "learning_rate": 1.8275716195407552e-05, "loss": 0.1562, "step": 5095 }, { "epoch": 0.2588963906797299, "grad_norm": 0.5347046256065369, "learning_rate": 1.8274024062135136e-05, "loss": 0.1396, "step": 5100 }, { "epoch": 0.2591502106705924, "grad_norm": 0.6127249598503113, "learning_rate": 1.827233192886272e-05, "loss": 0.1442, "step": 5105 }, { "epoch": 0.2594040306614549, "grad_norm": 0.5309034585952759, "learning_rate": 1.8270639795590303e-05, "loss": 0.148, "step": 5110 }, { "epoch": 0.2596578506523174, "grad_norm": 0.6933298707008362, "learning_rate": 1.8268947662317886e-05, "loss": 0.1723, "step": 5115 }, { "epoch": 0.2599116706431799, "grad_norm": 0.6623631119728088, "learning_rate": 1.826725552904547e-05, "loss": 0.164, "step": 5120 }, { "epoch": 0.2601654906340423, "grad_norm": 0.7233380079269409, "learning_rate": 1.826556339577305e-05, "loss": 0.1501, "step": 5125 }, { "epoch": 0.2604193106249048, "grad_norm": 0.6823766231536865, "learning_rate": 1.8263871262500637e-05, "loss": 0.1508, "step": 5130 }, { "epoch": 0.2606731306157673, "grad_norm": 0.5988183617591858, "learning_rate": 1.826217912922822e-05, "loss": 0.1495, "step": 5135 }, { "epoch": 0.2609269506066298, "grad_norm": 0.48616233468055725, "learning_rate": 1.8260486995955804e-05, "loss": 0.1423, "step": 5140 }, { "epoch": 0.2611807705974923, "grad_norm": 0.4381769001483917, "learning_rate": 1.8258794862683388e-05, "loss": 0.1396, "step": 5145 }, { "epoch": 0.2614345905883547, "grad_norm": 0.4858790934085846, "learning_rate": 1.8257102729410968e-05, "loss": 0.1503, "step": 5150 }, { "epoch": 0.2616884105792172, "grad_norm": 0.5144358277320862, "learning_rate": 1.8255410596138555e-05, "loss": 0.1586, "step": 5155 }, { "epoch": 0.2619422305700797, "grad_norm": 0.46459242701530457, "learning_rate": 1.8253718462866138e-05, "loss": 0.1571, "step": 5160 }, { "epoch": 0.2621960505609422, "grad_norm": 0.8006240725517273, "learning_rate": 1.825202632959372e-05, "loss": 0.1511, "step": 5165 }, { "epoch": 0.2624498705518047, "grad_norm": 0.5427458882331848, "learning_rate": 1.8250334196321305e-05, "loss": 0.1455, "step": 5170 }, { "epoch": 0.2627036905426671, "grad_norm": 0.6491566300392151, "learning_rate": 1.8248642063048886e-05, "loss": 0.1539, "step": 5175 }, { "epoch": 0.2629575105335296, "grad_norm": 0.9627673029899597, "learning_rate": 1.824694992977647e-05, "loss": 0.1524, "step": 5180 }, { "epoch": 0.2632113305243921, "grad_norm": 0.5841239094734192, "learning_rate": 1.8245257796504056e-05, "loss": 0.1476, "step": 5185 }, { "epoch": 0.2634651505152546, "grad_norm": 0.501349925994873, "learning_rate": 1.8243565663231636e-05, "loss": 0.1262, "step": 5190 }, { "epoch": 0.26371897050611703, "grad_norm": 0.899695873260498, "learning_rate": 1.8241873529959223e-05, "loss": 0.1508, "step": 5195 }, { "epoch": 0.2639727904969795, "grad_norm": 0.4951866865158081, "learning_rate": 1.8240181396686803e-05, "loss": 0.1479, "step": 5200 }, { "epoch": 0.264226610487842, "grad_norm": 0.5623950362205505, "learning_rate": 1.8238489263414387e-05, "loss": 0.1641, "step": 5205 }, { "epoch": 0.2644804304787045, "grad_norm": 0.4208991527557373, "learning_rate": 1.8236797130141974e-05, "loss": 0.1436, "step": 5210 }, { "epoch": 0.264734250469567, "grad_norm": 0.41514652967453003, "learning_rate": 1.8235104996869554e-05, "loss": 0.1507, "step": 5215 }, { "epoch": 0.26498807046042944, "grad_norm": 0.4979642629623413, "learning_rate": 1.8233412863597137e-05, "loss": 0.1338, "step": 5220 }, { "epoch": 0.26524189045129193, "grad_norm": 0.8609977960586548, "learning_rate": 1.823172073032472e-05, "loss": 0.1624, "step": 5225 }, { "epoch": 0.2654957104421544, "grad_norm": 0.4810056984424591, "learning_rate": 1.8230028597052305e-05, "loss": 0.1511, "step": 5230 }, { "epoch": 0.2657495304330169, "grad_norm": 0.5637201070785522, "learning_rate": 1.822833646377989e-05, "loss": 0.1451, "step": 5235 }, { "epoch": 0.2660033504238794, "grad_norm": 0.77173912525177, "learning_rate": 1.822664433050747e-05, "loss": 0.1694, "step": 5240 }, { "epoch": 0.26625717041474184, "grad_norm": 0.7287624478340149, "learning_rate": 1.8224952197235055e-05, "loss": 0.1474, "step": 5245 }, { "epoch": 0.26651099040560433, "grad_norm": 0.6791149973869324, "learning_rate": 1.822326006396264e-05, "loss": 0.1321, "step": 5250 }, { "epoch": 0.2667648103964668, "grad_norm": 0.486334890127182, "learning_rate": 1.8221567930690222e-05, "loss": 0.145, "step": 5255 }, { "epoch": 0.2670186303873293, "grad_norm": 0.4670858383178711, "learning_rate": 1.8219875797417806e-05, "loss": 0.1335, "step": 5260 }, { "epoch": 0.2672724503781918, "grad_norm": 0.4310056269168854, "learning_rate": 1.821818366414539e-05, "loss": 0.1339, "step": 5265 }, { "epoch": 0.26752627036905424, "grad_norm": 0.5363441109657288, "learning_rate": 1.8216491530872973e-05, "loss": 0.1421, "step": 5270 }, { "epoch": 0.26778009035991673, "grad_norm": 0.6194841861724854, "learning_rate": 1.8214799397600556e-05, "loss": 0.1682, "step": 5275 }, { "epoch": 0.2680339103507792, "grad_norm": 0.6221051216125488, "learning_rate": 1.821310726432814e-05, "loss": 0.139, "step": 5280 }, { "epoch": 0.2682877303416417, "grad_norm": 0.5695869326591492, "learning_rate": 1.8211415131055724e-05, "loss": 0.1409, "step": 5285 }, { "epoch": 0.2685415503325042, "grad_norm": 0.4726913273334503, "learning_rate": 1.8209722997783307e-05, "loss": 0.1443, "step": 5290 }, { "epoch": 0.26879537032336664, "grad_norm": 0.5159754157066345, "learning_rate": 1.820803086451089e-05, "loss": 0.158, "step": 5295 }, { "epoch": 0.26904919031422914, "grad_norm": 1.0508424043655396, "learning_rate": 1.8206338731238474e-05, "loss": 0.1508, "step": 5300 }, { "epoch": 0.26930301030509163, "grad_norm": 0.49234750866889954, "learning_rate": 1.8204646597966058e-05, "loss": 0.1555, "step": 5305 }, { "epoch": 0.2695568302959541, "grad_norm": 0.5780052542686462, "learning_rate": 1.820295446469364e-05, "loss": 0.1449, "step": 5310 }, { "epoch": 0.2698106502868166, "grad_norm": 0.6060476303100586, "learning_rate": 1.8201262331421225e-05, "loss": 0.1535, "step": 5315 }, { "epoch": 0.27006447027767905, "grad_norm": 0.5436288118362427, "learning_rate": 1.819957019814881e-05, "loss": 0.1513, "step": 5320 }, { "epoch": 0.27031829026854154, "grad_norm": 0.6781036257743835, "learning_rate": 1.8197878064876392e-05, "loss": 0.1599, "step": 5325 }, { "epoch": 0.27057211025940403, "grad_norm": 0.6104758381843567, "learning_rate": 1.8196185931603972e-05, "loss": 0.1395, "step": 5330 }, { "epoch": 0.2708259302502665, "grad_norm": 0.5809837579727173, "learning_rate": 1.819449379833156e-05, "loss": 0.1341, "step": 5335 }, { "epoch": 0.271079750241129, "grad_norm": 0.9313431978225708, "learning_rate": 1.8192801665059143e-05, "loss": 0.1543, "step": 5340 }, { "epoch": 0.27133357023199145, "grad_norm": 0.7047086954116821, "learning_rate": 1.8191109531786726e-05, "loss": 0.1528, "step": 5345 }, { "epoch": 0.27158739022285394, "grad_norm": 0.4853482246398926, "learning_rate": 1.818941739851431e-05, "loss": 0.1488, "step": 5350 }, { "epoch": 0.27184121021371643, "grad_norm": 1.5312269926071167, "learning_rate": 1.818772526524189e-05, "loss": 0.1581, "step": 5355 }, { "epoch": 0.2720950302045789, "grad_norm": 0.6196808815002441, "learning_rate": 1.8186033131969477e-05, "loss": 0.1306, "step": 5360 }, { "epoch": 0.2723488501954414, "grad_norm": 0.7211527228355408, "learning_rate": 1.818434099869706e-05, "loss": 0.1374, "step": 5365 }, { "epoch": 0.27260267018630385, "grad_norm": 0.4848230481147766, "learning_rate": 1.818264886542464e-05, "loss": 0.1495, "step": 5370 }, { "epoch": 0.27285649017716634, "grad_norm": 0.5052759051322937, "learning_rate": 1.8180956732152227e-05, "loss": 0.1601, "step": 5375 }, { "epoch": 0.27311031016802884, "grad_norm": 0.6089451909065247, "learning_rate": 1.8179264598879807e-05, "loss": 0.1567, "step": 5380 }, { "epoch": 0.2733641301588913, "grad_norm": 0.5925761461257935, "learning_rate": 1.8177572465607394e-05, "loss": 0.1406, "step": 5385 }, { "epoch": 0.2736179501497538, "grad_norm": 0.6521849632263184, "learning_rate": 1.8175880332334978e-05, "loss": 0.1402, "step": 5390 }, { "epoch": 0.27387177014061626, "grad_norm": 0.4725498557090759, "learning_rate": 1.8174188199062558e-05, "loss": 0.1494, "step": 5395 }, { "epoch": 0.27412559013147875, "grad_norm": 0.5570566058158875, "learning_rate": 1.8172496065790145e-05, "loss": 0.1463, "step": 5400 }, { "epoch": 0.27437941012234124, "grad_norm": 0.6081807613372803, "learning_rate": 1.8170803932517725e-05, "loss": 0.1356, "step": 5405 }, { "epoch": 0.27463323011320373, "grad_norm": 0.5569767355918884, "learning_rate": 1.816911179924531e-05, "loss": 0.131, "step": 5410 }, { "epoch": 0.2748870501040662, "grad_norm": 0.6330967545509338, "learning_rate": 1.8167419665972896e-05, "loss": 0.1529, "step": 5415 }, { "epoch": 0.27514087009492866, "grad_norm": 0.5000077486038208, "learning_rate": 1.8165727532700476e-05, "loss": 0.1352, "step": 5420 }, { "epoch": 0.27539469008579115, "grad_norm": 0.6218600273132324, "learning_rate": 1.816403539942806e-05, "loss": 0.1528, "step": 5425 }, { "epoch": 0.27564851007665364, "grad_norm": 0.5702294707298279, "learning_rate": 1.8162343266155643e-05, "loss": 0.1399, "step": 5430 }, { "epoch": 0.27590233006751613, "grad_norm": 0.9165341258049011, "learning_rate": 1.8160651132883226e-05, "loss": 0.1471, "step": 5435 }, { "epoch": 0.2761561500583786, "grad_norm": 0.5732564926147461, "learning_rate": 1.8158958999610813e-05, "loss": 0.1554, "step": 5440 }, { "epoch": 0.27640997004924106, "grad_norm": 0.5652003288269043, "learning_rate": 1.8157266866338394e-05, "loss": 0.1491, "step": 5445 }, { "epoch": 0.27666379004010355, "grad_norm": 0.5051900148391724, "learning_rate": 1.8155574733065977e-05, "loss": 0.1511, "step": 5450 }, { "epoch": 0.27691761003096604, "grad_norm": 0.4586610794067383, "learning_rate": 1.815388259979356e-05, "loss": 0.1433, "step": 5455 }, { "epoch": 0.27717143002182854, "grad_norm": 0.6589603424072266, "learning_rate": 1.8152190466521144e-05, "loss": 0.1451, "step": 5460 }, { "epoch": 0.277425250012691, "grad_norm": 0.6678264737129211, "learning_rate": 1.8150498333248728e-05, "loss": 0.1481, "step": 5465 }, { "epoch": 0.27767907000355346, "grad_norm": 0.5780376195907593, "learning_rate": 1.814880619997631e-05, "loss": 0.159, "step": 5470 }, { "epoch": 0.27793288999441595, "grad_norm": 0.72762531042099, "learning_rate": 1.8147114066703895e-05, "loss": 0.1429, "step": 5475 }, { "epoch": 0.27818670998527845, "grad_norm": 0.5006431341171265, "learning_rate": 1.814542193343148e-05, "loss": 0.1635, "step": 5480 }, { "epoch": 0.27844052997614094, "grad_norm": 0.5482341647148132, "learning_rate": 1.8143729800159062e-05, "loss": 0.1551, "step": 5485 }, { "epoch": 0.2786943499670034, "grad_norm": 0.4888313114643097, "learning_rate": 1.8142037666886645e-05, "loss": 0.1414, "step": 5490 }, { "epoch": 0.27894816995786587, "grad_norm": 0.49801504611968994, "learning_rate": 1.814034553361423e-05, "loss": 0.1561, "step": 5495 }, { "epoch": 0.27920198994872836, "grad_norm": 0.42549580335617065, "learning_rate": 1.8138653400341813e-05, "loss": 0.1265, "step": 5500 }, { "epoch": 0.27945580993959085, "grad_norm": 0.582340657711029, "learning_rate": 1.8136961267069396e-05, "loss": 0.1468, "step": 5505 }, { "epoch": 0.27970962993045334, "grad_norm": 0.46748143434524536, "learning_rate": 1.813526913379698e-05, "loss": 0.143, "step": 5510 }, { "epoch": 0.2799634499213158, "grad_norm": 0.49724528193473816, "learning_rate": 1.8133577000524563e-05, "loss": 0.1523, "step": 5515 }, { "epoch": 0.28021726991217827, "grad_norm": 0.49215927720069885, "learning_rate": 1.8131884867252147e-05, "loss": 0.1504, "step": 5520 }, { "epoch": 0.28047108990304076, "grad_norm": 0.5257487297058105, "learning_rate": 1.813019273397973e-05, "loss": 0.1279, "step": 5525 }, { "epoch": 0.28072490989390325, "grad_norm": 0.6766299605369568, "learning_rate": 1.8128500600707314e-05, "loss": 0.1515, "step": 5530 }, { "epoch": 0.28097872988476574, "grad_norm": 0.531494140625, "learning_rate": 1.8126808467434897e-05, "loss": 0.1464, "step": 5535 }, { "epoch": 0.2812325498756282, "grad_norm": 0.4178050458431244, "learning_rate": 1.812511633416248e-05, "loss": 0.1271, "step": 5540 }, { "epoch": 0.28148636986649067, "grad_norm": 0.576151967048645, "learning_rate": 1.8123424200890064e-05, "loss": 0.1328, "step": 5545 }, { "epoch": 0.28174018985735316, "grad_norm": 0.6306776404380798, "learning_rate": 1.8121732067617648e-05, "loss": 0.1511, "step": 5550 }, { "epoch": 0.28199400984821565, "grad_norm": 0.634989321231842, "learning_rate": 1.812003993434523e-05, "loss": 0.1549, "step": 5555 }, { "epoch": 0.28224782983907815, "grad_norm": 0.7044651508331299, "learning_rate": 1.811834780107281e-05, "loss": 0.1609, "step": 5560 }, { "epoch": 0.2825016498299406, "grad_norm": 0.5213934183120728, "learning_rate": 1.81166556678004e-05, "loss": 0.1359, "step": 5565 }, { "epoch": 0.2827554698208031, "grad_norm": 0.5297014117240906, "learning_rate": 1.8114963534527982e-05, "loss": 0.1493, "step": 5570 }, { "epoch": 0.28300928981166557, "grad_norm": 0.4303801953792572, "learning_rate": 1.8113271401255562e-05, "loss": 0.1408, "step": 5575 }, { "epoch": 0.28326310980252806, "grad_norm": 0.7388393878936768, "learning_rate": 1.811157926798315e-05, "loss": 0.1492, "step": 5580 }, { "epoch": 0.28351692979339055, "grad_norm": 0.6850863695144653, "learning_rate": 1.810988713471073e-05, "loss": 0.1631, "step": 5585 }, { "epoch": 0.283770749784253, "grad_norm": 0.7044445872306824, "learning_rate": 1.8108195001438316e-05, "loss": 0.1582, "step": 5590 }, { "epoch": 0.2840245697751155, "grad_norm": 0.4703899621963501, "learning_rate": 1.81065028681659e-05, "loss": 0.1369, "step": 5595 }, { "epoch": 0.28427838976597797, "grad_norm": 0.5529588460922241, "learning_rate": 1.810481073489348e-05, "loss": 0.1618, "step": 5600 }, { "epoch": 0.28453220975684046, "grad_norm": 0.6327471733093262, "learning_rate": 1.8103118601621067e-05, "loss": 0.1339, "step": 5605 }, { "epoch": 0.28478602974770295, "grad_norm": 0.5310539603233337, "learning_rate": 1.8101426468348647e-05, "loss": 0.1511, "step": 5610 }, { "epoch": 0.2850398497385654, "grad_norm": 0.5089000463485718, "learning_rate": 1.809973433507623e-05, "loss": 0.1491, "step": 5615 }, { "epoch": 0.2852936697294279, "grad_norm": 0.4953418970108032, "learning_rate": 1.8098042201803818e-05, "loss": 0.1468, "step": 5620 }, { "epoch": 0.28554748972029037, "grad_norm": 0.6269605159759521, "learning_rate": 1.8096350068531398e-05, "loss": 0.1411, "step": 5625 }, { "epoch": 0.28580130971115286, "grad_norm": 0.5403065085411072, "learning_rate": 1.8094657935258985e-05, "loss": 0.139, "step": 5630 }, { "epoch": 0.28605512970201535, "grad_norm": 0.9192875623703003, "learning_rate": 1.8092965801986565e-05, "loss": 0.133, "step": 5635 }, { "epoch": 0.2863089496928778, "grad_norm": 0.5615043044090271, "learning_rate": 1.809127366871415e-05, "loss": 0.1354, "step": 5640 }, { "epoch": 0.2865627696837403, "grad_norm": 2.930060863494873, "learning_rate": 1.8089581535441735e-05, "loss": 0.1422, "step": 5645 }, { "epoch": 0.2868165896746028, "grad_norm": 0.42745351791381836, "learning_rate": 1.8087889402169315e-05, "loss": 0.1428, "step": 5650 }, { "epoch": 0.28707040966546526, "grad_norm": 0.5274901390075684, "learning_rate": 1.80861972688969e-05, "loss": 0.164, "step": 5655 }, { "epoch": 0.28732422965632776, "grad_norm": 1.1042112112045288, "learning_rate": 1.8084505135624483e-05, "loss": 0.142, "step": 5660 }, { "epoch": 0.2875780496471902, "grad_norm": 0.6478941440582275, "learning_rate": 1.8082813002352066e-05, "loss": 0.1353, "step": 5665 }, { "epoch": 0.2878318696380527, "grad_norm": 0.6474268436431885, "learning_rate": 1.808112086907965e-05, "loss": 0.1537, "step": 5670 }, { "epoch": 0.2880856896289152, "grad_norm": 0.46995773911476135, "learning_rate": 1.8079428735807233e-05, "loss": 0.1338, "step": 5675 }, { "epoch": 0.28833950961977767, "grad_norm": 0.652370274066925, "learning_rate": 1.8077736602534817e-05, "loss": 0.1532, "step": 5680 }, { "epoch": 0.28859332961064016, "grad_norm": 0.5019606351852417, "learning_rate": 1.80760444692624e-05, "loss": 0.1606, "step": 5685 }, { "epoch": 0.2888471496015026, "grad_norm": 0.44102251529693604, "learning_rate": 1.8074352335989984e-05, "loss": 0.1462, "step": 5690 }, { "epoch": 0.2891009695923651, "grad_norm": 0.4564070701599121, "learning_rate": 1.8072660202717567e-05, "loss": 0.1537, "step": 5695 }, { "epoch": 0.2893547895832276, "grad_norm": 0.42364734411239624, "learning_rate": 1.807096806944515e-05, "loss": 0.1347, "step": 5700 }, { "epoch": 0.28960860957409007, "grad_norm": 0.5539987683296204, "learning_rate": 1.8069275936172734e-05, "loss": 0.1375, "step": 5705 }, { "epoch": 0.28986242956495256, "grad_norm": 0.4797275960445404, "learning_rate": 1.8067583802900318e-05, "loss": 0.1473, "step": 5710 }, { "epoch": 0.290116249555815, "grad_norm": 0.6089901328086853, "learning_rate": 1.80658916696279e-05, "loss": 0.1346, "step": 5715 }, { "epoch": 0.2903700695466775, "grad_norm": 0.49910488724708557, "learning_rate": 1.8064199536355485e-05, "loss": 0.1322, "step": 5720 }, { "epoch": 0.29062388953754, "grad_norm": 0.45843714475631714, "learning_rate": 1.806250740308307e-05, "loss": 0.1367, "step": 5725 }, { "epoch": 0.2908777095284025, "grad_norm": 1.3286268711090088, "learning_rate": 1.8060815269810652e-05, "loss": 0.1337, "step": 5730 }, { "epoch": 0.29113152951926496, "grad_norm": 0.5683203339576721, "learning_rate": 1.8059123136538236e-05, "loss": 0.1415, "step": 5735 }, { "epoch": 0.2913853495101274, "grad_norm": 0.6591338515281677, "learning_rate": 1.805743100326582e-05, "loss": 0.1362, "step": 5740 }, { "epoch": 0.2916391695009899, "grad_norm": 0.6420497298240662, "learning_rate": 1.8055738869993403e-05, "loss": 0.1575, "step": 5745 }, { "epoch": 0.2918929894918524, "grad_norm": 0.4309554100036621, "learning_rate": 1.8054046736720986e-05, "loss": 0.1502, "step": 5750 }, { "epoch": 0.2921468094827149, "grad_norm": 1.4654325246810913, "learning_rate": 1.805235460344857e-05, "loss": 0.1285, "step": 5755 }, { "epoch": 0.2924006294735773, "grad_norm": 0.667293906211853, "learning_rate": 1.8050662470176153e-05, "loss": 0.1473, "step": 5760 }, { "epoch": 0.2926544494644398, "grad_norm": 0.4698887765407562, "learning_rate": 1.8048970336903734e-05, "loss": 0.1396, "step": 5765 }, { "epoch": 0.2929082694553023, "grad_norm": 0.5942188501358032, "learning_rate": 1.804727820363132e-05, "loss": 0.1436, "step": 5770 }, { "epoch": 0.2931620894461648, "grad_norm": 0.34591802954673767, "learning_rate": 1.8045586070358904e-05, "loss": 0.1373, "step": 5775 }, { "epoch": 0.2934159094370273, "grad_norm": 0.4438014030456543, "learning_rate": 1.8043893937086488e-05, "loss": 0.1468, "step": 5780 }, { "epoch": 0.2936697294278897, "grad_norm": 0.5076015591621399, "learning_rate": 1.804220180381407e-05, "loss": 0.1318, "step": 5785 }, { "epoch": 0.2939235494187522, "grad_norm": 0.5563536882400513, "learning_rate": 1.804050967054165e-05, "loss": 0.1402, "step": 5790 }, { "epoch": 0.2941773694096147, "grad_norm": 14.195382118225098, "learning_rate": 1.8038817537269238e-05, "loss": 0.1376, "step": 5795 }, { "epoch": 0.2944311894004772, "grad_norm": 0.40444865822792053, "learning_rate": 1.8037125403996822e-05, "loss": 0.1234, "step": 5800 }, { "epoch": 0.2946850093913397, "grad_norm": 0.5746991038322449, "learning_rate": 1.8035433270724402e-05, "loss": 0.1297, "step": 5805 }, { "epoch": 0.2949388293822021, "grad_norm": 0.31169363856315613, "learning_rate": 1.803374113745199e-05, "loss": 0.136, "step": 5810 }, { "epoch": 0.2951926493730646, "grad_norm": 0.4355963170528412, "learning_rate": 1.803204900417957e-05, "loss": 0.1352, "step": 5815 }, { "epoch": 0.2954464693639271, "grad_norm": 0.5133848786354065, "learning_rate": 1.8030356870907153e-05, "loss": 0.149, "step": 5820 }, { "epoch": 0.2957002893547896, "grad_norm": 0.5429642200469971, "learning_rate": 1.802866473763474e-05, "loss": 0.148, "step": 5825 }, { "epoch": 0.2959541093456521, "grad_norm": 0.777310848236084, "learning_rate": 1.802697260436232e-05, "loss": 0.1203, "step": 5830 }, { "epoch": 0.2962079293365145, "grad_norm": 0.5586498975753784, "learning_rate": 1.8025280471089907e-05, "loss": 0.1489, "step": 5835 }, { "epoch": 0.296461749327377, "grad_norm": 0.4667200446128845, "learning_rate": 1.8023588337817487e-05, "loss": 0.1203, "step": 5840 }, { "epoch": 0.2967155693182395, "grad_norm": 0.7403630614280701, "learning_rate": 1.802189620454507e-05, "loss": 0.1111, "step": 5845 }, { "epoch": 0.296969389309102, "grad_norm": 1.7740875482559204, "learning_rate": 1.8020204071272657e-05, "loss": 0.1303, "step": 5850 }, { "epoch": 0.2972232092999645, "grad_norm": 0.44264400005340576, "learning_rate": 1.8018511938000237e-05, "loss": 0.141, "step": 5855 }, { "epoch": 0.2974770292908269, "grad_norm": 0.6758630275726318, "learning_rate": 1.801681980472782e-05, "loss": 0.1291, "step": 5860 }, { "epoch": 0.2977308492816894, "grad_norm": 0.6458776593208313, "learning_rate": 1.8015127671455405e-05, "loss": 0.1398, "step": 5865 }, { "epoch": 0.2979846692725519, "grad_norm": 0.6976125836372375, "learning_rate": 1.8013435538182988e-05, "loss": 0.1499, "step": 5870 }, { "epoch": 0.2982384892634144, "grad_norm": 0.7639079689979553, "learning_rate": 1.801174340491057e-05, "loss": 0.1473, "step": 5875 }, { "epoch": 0.2984923092542769, "grad_norm": 0.5145253539085388, "learning_rate": 1.8010051271638155e-05, "loss": 0.127, "step": 5880 }, { "epoch": 0.2987461292451393, "grad_norm": 0.5991470217704773, "learning_rate": 1.800835913836574e-05, "loss": 0.1434, "step": 5885 }, { "epoch": 0.2989999492360018, "grad_norm": 0.5576327443122864, "learning_rate": 1.8006667005093322e-05, "loss": 0.1258, "step": 5890 }, { "epoch": 0.2992537692268643, "grad_norm": 0.8021837472915649, "learning_rate": 1.8004974871820906e-05, "loss": 0.1543, "step": 5895 }, { "epoch": 0.2995075892177268, "grad_norm": 0.6771152019500732, "learning_rate": 1.800328273854849e-05, "loss": 0.1559, "step": 5900 }, { "epoch": 0.2997614092085893, "grad_norm": 0.6033034324645996, "learning_rate": 1.8001590605276073e-05, "loss": 0.1469, "step": 5905 }, { "epoch": 0.3000152291994517, "grad_norm": 0.4367592930793762, "learning_rate": 1.7999898472003656e-05, "loss": 0.1241, "step": 5910 }, { "epoch": 0.3002690491903142, "grad_norm": 0.8468542098999023, "learning_rate": 1.799820633873124e-05, "loss": 0.1336, "step": 5915 }, { "epoch": 0.3005228691811767, "grad_norm": 0.5358647108078003, "learning_rate": 1.7996514205458824e-05, "loss": 0.1435, "step": 5920 }, { "epoch": 0.3007766891720392, "grad_norm": 0.35355257987976074, "learning_rate": 1.7994822072186407e-05, "loss": 0.1468, "step": 5925 }, { "epoch": 0.3010305091629017, "grad_norm": 0.49814465641975403, "learning_rate": 1.799312993891399e-05, "loss": 0.133, "step": 5930 }, { "epoch": 0.30128432915376413, "grad_norm": 0.505181074142456, "learning_rate": 1.7991437805641574e-05, "loss": 0.1573, "step": 5935 }, { "epoch": 0.3015381491446266, "grad_norm": 0.5839815139770508, "learning_rate": 1.7989745672369158e-05, "loss": 0.1351, "step": 5940 }, { "epoch": 0.3017919691354891, "grad_norm": 0.7596808075904846, "learning_rate": 1.798805353909674e-05, "loss": 0.1488, "step": 5945 }, { "epoch": 0.3020457891263516, "grad_norm": 1.0872101783752441, "learning_rate": 1.7986361405824325e-05, "loss": 0.1295, "step": 5950 }, { "epoch": 0.3022996091172141, "grad_norm": 0.4910557270050049, "learning_rate": 1.798466927255191e-05, "loss": 0.1453, "step": 5955 }, { "epoch": 0.30255342910807653, "grad_norm": 0.46220898628234863, "learning_rate": 1.7982977139279492e-05, "loss": 0.1402, "step": 5960 }, { "epoch": 0.302807249098939, "grad_norm": 0.6212031245231628, "learning_rate": 1.7981285006007075e-05, "loss": 0.1473, "step": 5965 }, { "epoch": 0.3030610690898015, "grad_norm": 1.0893614292144775, "learning_rate": 1.797959287273466e-05, "loss": 0.1528, "step": 5970 }, { "epoch": 0.303314889080664, "grad_norm": 0.6324992775917053, "learning_rate": 1.7977900739462242e-05, "loss": 0.1384, "step": 5975 }, { "epoch": 0.3035687090715265, "grad_norm": 0.4653703272342682, "learning_rate": 1.7976208606189826e-05, "loss": 0.1314, "step": 5980 }, { "epoch": 0.30382252906238894, "grad_norm": 0.901879608631134, "learning_rate": 1.797451647291741e-05, "loss": 0.1424, "step": 5985 }, { "epoch": 0.3040763490532514, "grad_norm": 0.7357150316238403, "learning_rate": 1.7972824339644993e-05, "loss": 0.1383, "step": 5990 }, { "epoch": 0.3043301690441139, "grad_norm": 0.4859638810157776, "learning_rate": 1.7971132206372573e-05, "loss": 0.1473, "step": 5995 }, { "epoch": 0.3045839890349764, "grad_norm": 0.5311628580093384, "learning_rate": 1.796944007310016e-05, "loss": 0.1479, "step": 6000 }, { "epoch": 0.3048378090258389, "grad_norm": 0.48468127846717834, "learning_rate": 1.7967747939827744e-05, "loss": 0.1397, "step": 6005 }, { "epoch": 0.30509162901670134, "grad_norm": 0.5624568462371826, "learning_rate": 1.7966055806555324e-05, "loss": 0.1426, "step": 6010 }, { "epoch": 0.30534544900756383, "grad_norm": 0.6200907826423645, "learning_rate": 1.796436367328291e-05, "loss": 0.1372, "step": 6015 }, { "epoch": 0.3055992689984263, "grad_norm": 0.8948644995689392, "learning_rate": 1.796267154001049e-05, "loss": 0.1448, "step": 6020 }, { "epoch": 0.3058530889892888, "grad_norm": 0.6271385550498962, "learning_rate": 1.7960979406738078e-05, "loss": 0.1432, "step": 6025 }, { "epoch": 0.3061069089801513, "grad_norm": 0.6979460120201111, "learning_rate": 1.795928727346566e-05, "loss": 0.1276, "step": 6030 }, { "epoch": 0.30636072897101374, "grad_norm": 1.2904983758926392, "learning_rate": 1.795759514019324e-05, "loss": 0.1406, "step": 6035 }, { "epoch": 0.30661454896187623, "grad_norm": 0.6335322856903076, "learning_rate": 1.795590300692083e-05, "loss": 0.1433, "step": 6040 }, { "epoch": 0.3068683689527387, "grad_norm": 0.4658793807029724, "learning_rate": 1.795421087364841e-05, "loss": 0.1318, "step": 6045 }, { "epoch": 0.3071221889436012, "grad_norm": 0.5421667098999023, "learning_rate": 1.7952518740375992e-05, "loss": 0.142, "step": 6050 }, { "epoch": 0.30737600893446365, "grad_norm": 0.6543579697608948, "learning_rate": 1.7950826607103576e-05, "loss": 0.1387, "step": 6055 }, { "epoch": 0.30762982892532614, "grad_norm": 0.4661172330379486, "learning_rate": 1.794913447383116e-05, "loss": 0.142, "step": 6060 }, { "epoch": 0.30788364891618863, "grad_norm": 0.4072614014148712, "learning_rate": 1.7947442340558743e-05, "loss": 0.133, "step": 6065 }, { "epoch": 0.3081374689070511, "grad_norm": 0.46051713824272156, "learning_rate": 1.7945750207286326e-05, "loss": 0.1249, "step": 6070 }, { "epoch": 0.3083912888979136, "grad_norm": 0.5725454092025757, "learning_rate": 1.794405807401391e-05, "loss": 0.1221, "step": 6075 }, { "epoch": 0.30864510888877605, "grad_norm": 0.5743607878684998, "learning_rate": 1.7942365940741494e-05, "loss": 0.1223, "step": 6080 }, { "epoch": 0.30889892887963855, "grad_norm": 0.7320393919944763, "learning_rate": 1.7940673807469077e-05, "loss": 0.1453, "step": 6085 }, { "epoch": 0.30915274887050104, "grad_norm": 0.4180808365345001, "learning_rate": 1.793898167419666e-05, "loss": 0.1409, "step": 6090 }, { "epoch": 0.30940656886136353, "grad_norm": 0.5129685997962952, "learning_rate": 1.7937289540924244e-05, "loss": 0.1372, "step": 6095 }, { "epoch": 0.309660388852226, "grad_norm": 0.630707859992981, "learning_rate": 1.7935597407651828e-05, "loss": 0.1319, "step": 6100 }, { "epoch": 0.30991420884308846, "grad_norm": 0.5199107527732849, "learning_rate": 1.793390527437941e-05, "loss": 0.1371, "step": 6105 }, { "epoch": 0.31016802883395095, "grad_norm": 0.5185748338699341, "learning_rate": 1.7932213141106995e-05, "loss": 0.1467, "step": 6110 }, { "epoch": 0.31042184882481344, "grad_norm": 0.7415216565132141, "learning_rate": 1.793052100783458e-05, "loss": 0.1482, "step": 6115 }, { "epoch": 0.31067566881567593, "grad_norm": 0.5409974455833435, "learning_rate": 1.7928828874562162e-05, "loss": 0.1218, "step": 6120 }, { "epoch": 0.3109294888065384, "grad_norm": 0.47021251916885376, "learning_rate": 1.7927136741289745e-05, "loss": 0.137, "step": 6125 }, { "epoch": 0.31118330879740086, "grad_norm": 0.36402925848960876, "learning_rate": 1.792544460801733e-05, "loss": 0.1197, "step": 6130 }, { "epoch": 0.31143712878826335, "grad_norm": 0.485312819480896, "learning_rate": 1.7923752474744913e-05, "loss": 0.1321, "step": 6135 }, { "epoch": 0.31169094877912584, "grad_norm": 0.7221769690513611, "learning_rate": 1.7922060341472496e-05, "loss": 0.1479, "step": 6140 }, { "epoch": 0.31194476876998833, "grad_norm": 0.722754180431366, "learning_rate": 1.792036820820008e-05, "loss": 0.136, "step": 6145 }, { "epoch": 0.3121985887608508, "grad_norm": 0.43338173627853394, "learning_rate": 1.7918676074927663e-05, "loss": 0.135, "step": 6150 }, { "epoch": 0.31245240875171326, "grad_norm": 0.7877047061920166, "learning_rate": 1.7916983941655247e-05, "loss": 0.1345, "step": 6155 }, { "epoch": 0.31270622874257575, "grad_norm": 0.8918716907501221, "learning_rate": 1.791529180838283e-05, "loss": 0.1506, "step": 6160 }, { "epoch": 0.31296004873343825, "grad_norm": 0.4334893524646759, "learning_rate": 1.7913599675110414e-05, "loss": 0.1468, "step": 6165 }, { "epoch": 0.31321386872430074, "grad_norm": 0.6378242373466492, "learning_rate": 1.7911907541837997e-05, "loss": 0.1381, "step": 6170 }, { "epoch": 0.31346768871516323, "grad_norm": 0.6442030072212219, "learning_rate": 1.791021540856558e-05, "loss": 0.1546, "step": 6175 }, { "epoch": 0.31372150870602566, "grad_norm": 0.5167520046234131, "learning_rate": 1.7908523275293164e-05, "loss": 0.1357, "step": 6180 }, { "epoch": 0.31397532869688816, "grad_norm": 0.5824690461158752, "learning_rate": 1.7906831142020748e-05, "loss": 0.1268, "step": 6185 }, { "epoch": 0.31422914868775065, "grad_norm": 0.7088459134101868, "learning_rate": 1.790513900874833e-05, "loss": 0.1586, "step": 6190 }, { "epoch": 0.31448296867861314, "grad_norm": 0.6302781701087952, "learning_rate": 1.7903446875475915e-05, "loss": 0.1298, "step": 6195 }, { "epoch": 0.31473678866947563, "grad_norm": 0.5944651365280151, "learning_rate": 1.7901754742203495e-05, "loss": 0.1448, "step": 6200 }, { "epoch": 0.31499060866033807, "grad_norm": 0.679571270942688, "learning_rate": 1.7900062608931082e-05, "loss": 0.1469, "step": 6205 }, { "epoch": 0.31524442865120056, "grad_norm": 0.522102952003479, "learning_rate": 1.7898370475658666e-05, "loss": 0.1414, "step": 6210 }, { "epoch": 0.31549824864206305, "grad_norm": 0.5716415047645569, "learning_rate": 1.789667834238625e-05, "loss": 0.1409, "step": 6215 }, { "epoch": 0.31575206863292554, "grad_norm": 0.901875913143158, "learning_rate": 1.7894986209113833e-05, "loss": 0.1292, "step": 6220 }, { "epoch": 0.31600588862378803, "grad_norm": 0.7464191317558289, "learning_rate": 1.7893294075841413e-05, "loss": 0.1411, "step": 6225 }, { "epoch": 0.31625970861465047, "grad_norm": 0.451816201210022, "learning_rate": 1.7891601942569e-05, "loss": 0.1368, "step": 6230 }, { "epoch": 0.31651352860551296, "grad_norm": 0.6408305168151855, "learning_rate": 1.788990980929658e-05, "loss": 0.1352, "step": 6235 }, { "epoch": 0.31676734859637545, "grad_norm": 0.4555467367172241, "learning_rate": 1.7888217676024164e-05, "loss": 0.1309, "step": 6240 }, { "epoch": 0.31702116858723794, "grad_norm": 0.46165990829467773, "learning_rate": 1.788652554275175e-05, "loss": 0.1435, "step": 6245 }, { "epoch": 0.31727498857810044, "grad_norm": 0.40598687529563904, "learning_rate": 1.788483340947933e-05, "loss": 0.1415, "step": 6250 }, { "epoch": 0.3175288085689629, "grad_norm": 0.6253679990768433, "learning_rate": 1.7883141276206914e-05, "loss": 0.125, "step": 6255 }, { "epoch": 0.31778262855982536, "grad_norm": 0.46861565113067627, "learning_rate": 1.7881449142934498e-05, "loss": 0.1302, "step": 6260 }, { "epoch": 0.31803644855068786, "grad_norm": 0.6676781177520752, "learning_rate": 1.787975700966208e-05, "loss": 0.1358, "step": 6265 }, { "epoch": 0.31829026854155035, "grad_norm": 0.6015679240226746, "learning_rate": 1.7878064876389668e-05, "loss": 0.1259, "step": 6270 }, { "epoch": 0.31854408853241284, "grad_norm": 0.5872548818588257, "learning_rate": 1.787637274311725e-05, "loss": 0.138, "step": 6275 }, { "epoch": 0.3187979085232753, "grad_norm": 0.4974575936794281, "learning_rate": 1.7874680609844832e-05, "loss": 0.1137, "step": 6280 }, { "epoch": 0.31905172851413777, "grad_norm": 0.5124915838241577, "learning_rate": 1.7872988476572415e-05, "loss": 0.1423, "step": 6285 }, { "epoch": 0.31930554850500026, "grad_norm": 0.5141370892524719, "learning_rate": 1.78712963433e-05, "loss": 0.1341, "step": 6290 }, { "epoch": 0.31955936849586275, "grad_norm": 0.621908962726593, "learning_rate": 1.7869604210027583e-05, "loss": 0.1241, "step": 6295 }, { "epoch": 0.31981318848672524, "grad_norm": 0.8363323211669922, "learning_rate": 1.7867912076755166e-05, "loss": 0.1406, "step": 6300 }, { "epoch": 0.3200670084775877, "grad_norm": 0.48739150166511536, "learning_rate": 1.786621994348275e-05, "loss": 0.1296, "step": 6305 }, { "epoch": 0.32032082846845017, "grad_norm": 0.629400908946991, "learning_rate": 1.7864527810210333e-05, "loss": 0.1317, "step": 6310 }, { "epoch": 0.32057464845931266, "grad_norm": 0.4386022090911865, "learning_rate": 1.7862835676937917e-05, "loss": 0.1283, "step": 6315 }, { "epoch": 0.32082846845017515, "grad_norm": 0.44304966926574707, "learning_rate": 1.78611435436655e-05, "loss": 0.129, "step": 6320 }, { "epoch": 0.3210822884410376, "grad_norm": 0.8448670506477356, "learning_rate": 1.7859451410393084e-05, "loss": 0.1452, "step": 6325 }, { "epoch": 0.3213361084319001, "grad_norm": 0.4689197838306427, "learning_rate": 1.7857759277120667e-05, "loss": 0.1127, "step": 6330 }, { "epoch": 0.32158992842276257, "grad_norm": 0.6370121240615845, "learning_rate": 1.785606714384825e-05, "loss": 0.1357, "step": 6335 }, { "epoch": 0.32184374841362506, "grad_norm": 0.5023421049118042, "learning_rate": 1.7854375010575834e-05, "loss": 0.1465, "step": 6340 }, { "epoch": 0.32209756840448756, "grad_norm": 0.6802017688751221, "learning_rate": 1.7852682877303418e-05, "loss": 0.1416, "step": 6345 }, { "epoch": 0.32235138839535, "grad_norm": 0.48663991689682007, "learning_rate": 1.7850990744031e-05, "loss": 0.1262, "step": 6350 }, { "epoch": 0.3226052083862125, "grad_norm": 0.43129396438598633, "learning_rate": 1.7849298610758585e-05, "loss": 0.1402, "step": 6355 }, { "epoch": 0.322859028377075, "grad_norm": 0.7644272446632385, "learning_rate": 1.784760647748617e-05, "loss": 0.1396, "step": 6360 }, { "epoch": 0.32311284836793747, "grad_norm": 0.6173144578933716, "learning_rate": 1.7845914344213752e-05, "loss": 0.1371, "step": 6365 }, { "epoch": 0.32336666835879996, "grad_norm": 0.6148359179496765, "learning_rate": 1.7844222210941336e-05, "loss": 0.1211, "step": 6370 }, { "epoch": 0.3236204883496624, "grad_norm": 0.5884629487991333, "learning_rate": 1.784253007766892e-05, "loss": 0.126, "step": 6375 }, { "epoch": 0.3238743083405249, "grad_norm": 0.6891257166862488, "learning_rate": 1.7840837944396503e-05, "loss": 0.1325, "step": 6380 }, { "epoch": 0.3241281283313874, "grad_norm": 0.5167293548583984, "learning_rate": 1.7839145811124086e-05, "loss": 0.1283, "step": 6385 }, { "epoch": 0.32438194832224987, "grad_norm": 0.42851531505584717, "learning_rate": 1.783745367785167e-05, "loss": 0.1353, "step": 6390 }, { "epoch": 0.32463576831311236, "grad_norm": 0.4503128230571747, "learning_rate": 1.7835761544579253e-05, "loss": 0.1259, "step": 6395 }, { "epoch": 0.3248895883039748, "grad_norm": 0.8484407067298889, "learning_rate": 1.7834069411306837e-05, "loss": 0.1467, "step": 6400 }, { "epoch": 0.3251434082948373, "grad_norm": 0.5505496263504028, "learning_rate": 1.7832377278034417e-05, "loss": 0.1503, "step": 6405 }, { "epoch": 0.3253972282856998, "grad_norm": 0.5813527703285217, "learning_rate": 1.7830685144762004e-05, "loss": 0.1375, "step": 6410 }, { "epoch": 0.32565104827656227, "grad_norm": 0.4346156120300293, "learning_rate": 1.7828993011489584e-05, "loss": 0.1548, "step": 6415 }, { "epoch": 0.32590486826742476, "grad_norm": 0.8428875207901001, "learning_rate": 1.782730087821717e-05, "loss": 0.1355, "step": 6420 }, { "epoch": 0.3261586882582872, "grad_norm": 0.5086374878883362, "learning_rate": 1.7825608744944755e-05, "loss": 0.1477, "step": 6425 }, { "epoch": 0.3264125082491497, "grad_norm": 0.49893704056739807, "learning_rate": 1.7823916611672335e-05, "loss": 0.1396, "step": 6430 }, { "epoch": 0.3266663282400122, "grad_norm": 0.4795598089694977, "learning_rate": 1.7822224478399922e-05, "loss": 0.1382, "step": 6435 }, { "epoch": 0.3269201482308747, "grad_norm": 0.5606343150138855, "learning_rate": 1.7820532345127502e-05, "loss": 0.1386, "step": 6440 }, { "epoch": 0.32717396822173717, "grad_norm": 0.48243793845176697, "learning_rate": 1.7818840211855086e-05, "loss": 0.1241, "step": 6445 }, { "epoch": 0.3274277882125996, "grad_norm": 0.5099393725395203, "learning_rate": 1.7817148078582672e-05, "loss": 0.1387, "step": 6450 }, { "epoch": 0.3276816082034621, "grad_norm": 0.3960450291633606, "learning_rate": 1.7815455945310253e-05, "loss": 0.1212, "step": 6455 }, { "epoch": 0.3279354281943246, "grad_norm": 0.7666064500808716, "learning_rate": 1.781376381203784e-05, "loss": 0.1406, "step": 6460 }, { "epoch": 0.3281892481851871, "grad_norm": 0.6125515699386597, "learning_rate": 1.781207167876542e-05, "loss": 0.1341, "step": 6465 }, { "epoch": 0.32844306817604957, "grad_norm": 0.5433800220489502, "learning_rate": 1.7810379545493003e-05, "loss": 0.1335, "step": 6470 }, { "epoch": 0.328696888166912, "grad_norm": 0.5496737957000732, "learning_rate": 1.780868741222059e-05, "loss": 0.1399, "step": 6475 }, { "epoch": 0.3289507081577745, "grad_norm": 0.549060583114624, "learning_rate": 1.780699527894817e-05, "loss": 0.1251, "step": 6480 }, { "epoch": 0.329204528148637, "grad_norm": 0.4195408821105957, "learning_rate": 1.7805303145675754e-05, "loss": 0.1245, "step": 6485 }, { "epoch": 0.3294583481394995, "grad_norm": 0.48527300357818604, "learning_rate": 1.7803611012403337e-05, "loss": 0.1325, "step": 6490 }, { "epoch": 0.32971216813036197, "grad_norm": 0.4478241801261902, "learning_rate": 1.780191887913092e-05, "loss": 0.1227, "step": 6495 }, { "epoch": 0.3299659881212244, "grad_norm": 0.3931959569454193, "learning_rate": 1.7800226745858505e-05, "loss": 0.1245, "step": 6500 }, { "epoch": 0.3302198081120869, "grad_norm": 0.33147504925727844, "learning_rate": 1.7798534612586088e-05, "loss": 0.1131, "step": 6505 }, { "epoch": 0.3304736281029494, "grad_norm": 0.6583611965179443, "learning_rate": 1.779684247931367e-05, "loss": 0.1493, "step": 6510 }, { "epoch": 0.3307274480938119, "grad_norm": 0.5396264791488647, "learning_rate": 1.7795150346041255e-05, "loss": 0.1239, "step": 6515 }, { "epoch": 0.3309812680846744, "grad_norm": 0.5870803594589233, "learning_rate": 1.779345821276884e-05, "loss": 0.1464, "step": 6520 }, { "epoch": 0.3312350880755368, "grad_norm": 0.45324239134788513, "learning_rate": 1.7791766079496422e-05, "loss": 0.1359, "step": 6525 }, { "epoch": 0.3314889080663993, "grad_norm": 0.5347620248794556, "learning_rate": 1.7790073946224006e-05, "loss": 0.1249, "step": 6530 }, { "epoch": 0.3317427280572618, "grad_norm": 0.5685572624206543, "learning_rate": 1.778838181295159e-05, "loss": 0.1421, "step": 6535 }, { "epoch": 0.3319965480481243, "grad_norm": 0.5411074161529541, "learning_rate": 1.7786689679679173e-05, "loss": 0.1233, "step": 6540 }, { "epoch": 0.3322503680389868, "grad_norm": 0.42632856965065, "learning_rate": 1.7784997546406756e-05, "loss": 0.1339, "step": 6545 }, { "epoch": 0.3325041880298492, "grad_norm": 0.45992130041122437, "learning_rate": 1.778330541313434e-05, "loss": 0.1395, "step": 6550 }, { "epoch": 0.3327580080207117, "grad_norm": 0.7072666883468628, "learning_rate": 1.7781613279861923e-05, "loss": 0.1312, "step": 6555 }, { "epoch": 0.3330118280115742, "grad_norm": 0.5420939922332764, "learning_rate": 1.7779921146589507e-05, "loss": 0.1209, "step": 6560 }, { "epoch": 0.3332656480024367, "grad_norm": 0.5253438949584961, "learning_rate": 1.777822901331709e-05, "loss": 0.1315, "step": 6565 }, { "epoch": 0.3335194679932992, "grad_norm": 0.51002436876297, "learning_rate": 1.7776536880044674e-05, "loss": 0.1467, "step": 6570 }, { "epoch": 0.3337732879841616, "grad_norm": 0.7875125408172607, "learning_rate": 1.7774844746772258e-05, "loss": 0.1201, "step": 6575 }, { "epoch": 0.3340271079750241, "grad_norm": 0.4544963240623474, "learning_rate": 1.777315261349984e-05, "loss": 0.1239, "step": 6580 }, { "epoch": 0.3342809279658866, "grad_norm": 0.579490602016449, "learning_rate": 1.7771460480227425e-05, "loss": 0.1551, "step": 6585 }, { "epoch": 0.3345347479567491, "grad_norm": 0.5468235015869141, "learning_rate": 1.776976834695501e-05, "loss": 0.1406, "step": 6590 }, { "epoch": 0.3347885679476115, "grad_norm": 0.5087330341339111, "learning_rate": 1.7768076213682592e-05, "loss": 0.14, "step": 6595 }, { "epoch": 0.335042387938474, "grad_norm": 0.48226407170295715, "learning_rate": 1.7766384080410175e-05, "loss": 0.1214, "step": 6600 }, { "epoch": 0.3352962079293365, "grad_norm": 0.4688023626804352, "learning_rate": 1.776469194713776e-05, "loss": 0.1089, "step": 6605 }, { "epoch": 0.335550027920199, "grad_norm": 0.7546516060829163, "learning_rate": 1.7762999813865342e-05, "loss": 0.1428, "step": 6610 }, { "epoch": 0.3358038479110615, "grad_norm": 0.46591129899024963, "learning_rate": 1.7761307680592926e-05, "loss": 0.126, "step": 6615 }, { "epoch": 0.33605766790192393, "grad_norm": 0.5849207043647766, "learning_rate": 1.7759615547320506e-05, "loss": 0.1322, "step": 6620 }, { "epoch": 0.3363114878927864, "grad_norm": 0.461297869682312, "learning_rate": 1.7757923414048093e-05, "loss": 0.1355, "step": 6625 }, { "epoch": 0.3365653078836489, "grad_norm": 0.4782043397426605, "learning_rate": 1.7756231280775677e-05, "loss": 0.1337, "step": 6630 }, { "epoch": 0.3368191278745114, "grad_norm": 1.3908518552780151, "learning_rate": 1.7754539147503257e-05, "loss": 0.1418, "step": 6635 }, { "epoch": 0.3370729478653739, "grad_norm": 0.5779076218605042, "learning_rate": 1.7752847014230844e-05, "loss": 0.1416, "step": 6640 }, { "epoch": 0.33732676785623633, "grad_norm": 0.4418136477470398, "learning_rate": 1.7751154880958424e-05, "loss": 0.1194, "step": 6645 }, { "epoch": 0.3375805878470988, "grad_norm": 0.47528406977653503, "learning_rate": 1.7749462747686007e-05, "loss": 0.1362, "step": 6650 }, { "epoch": 0.3378344078379613, "grad_norm": 0.6058582663536072, "learning_rate": 1.7747770614413594e-05, "loss": 0.156, "step": 6655 }, { "epoch": 0.3380882278288238, "grad_norm": 0.4911196827888489, "learning_rate": 1.7746078481141175e-05, "loss": 0.1294, "step": 6660 }, { "epoch": 0.3383420478196863, "grad_norm": 0.5730322003364563, "learning_rate": 1.774438634786876e-05, "loss": 0.1217, "step": 6665 }, { "epoch": 0.33859586781054873, "grad_norm": 0.3868119418621063, "learning_rate": 1.774269421459634e-05, "loss": 0.1237, "step": 6670 }, { "epoch": 0.3388496878014112, "grad_norm": 0.70493483543396, "learning_rate": 1.7741002081323925e-05, "loss": 0.1471, "step": 6675 }, { "epoch": 0.3391035077922737, "grad_norm": 0.6178105473518372, "learning_rate": 1.7739309948051512e-05, "loss": 0.1282, "step": 6680 }, { "epoch": 0.3393573277831362, "grad_norm": 0.5365161895751953, "learning_rate": 1.7737617814779092e-05, "loss": 0.1191, "step": 6685 }, { "epoch": 0.3396111477739987, "grad_norm": 0.4318663477897644, "learning_rate": 1.7735925681506676e-05, "loss": 0.126, "step": 6690 }, { "epoch": 0.33986496776486114, "grad_norm": 0.46456629037857056, "learning_rate": 1.773423354823426e-05, "loss": 0.1327, "step": 6695 }, { "epoch": 0.34011878775572363, "grad_norm": 0.7820452451705933, "learning_rate": 1.7732541414961843e-05, "loss": 0.131, "step": 6700 }, { "epoch": 0.3403726077465861, "grad_norm": 0.6297656297683716, "learning_rate": 1.7730849281689426e-05, "loss": 0.151, "step": 6705 }, { "epoch": 0.3406264277374486, "grad_norm": 0.4756261706352234, "learning_rate": 1.772915714841701e-05, "loss": 0.1388, "step": 6710 }, { "epoch": 0.3408802477283111, "grad_norm": 0.5708016157150269, "learning_rate": 1.7727465015144594e-05, "loss": 0.1191, "step": 6715 }, { "epoch": 0.34113406771917354, "grad_norm": 0.5044305324554443, "learning_rate": 1.7725772881872177e-05, "loss": 0.1238, "step": 6720 }, { "epoch": 0.34138788771003603, "grad_norm": 0.39757969975471497, "learning_rate": 1.772408074859976e-05, "loss": 0.1364, "step": 6725 }, { "epoch": 0.3416417077008985, "grad_norm": 0.425277441740036, "learning_rate": 1.7722388615327344e-05, "loss": 0.1233, "step": 6730 }, { "epoch": 0.341895527691761, "grad_norm": 0.5457815527915955, "learning_rate": 1.7720696482054928e-05, "loss": 0.1335, "step": 6735 }, { "epoch": 0.3421493476826235, "grad_norm": 0.9425898194313049, "learning_rate": 1.771900434878251e-05, "loss": 0.125, "step": 6740 }, { "epoch": 0.34240316767348594, "grad_norm": 0.6296150088310242, "learning_rate": 1.7717312215510095e-05, "loss": 0.1156, "step": 6745 }, { "epoch": 0.34265698766434843, "grad_norm": 0.4702848196029663, "learning_rate": 1.771562008223768e-05, "loss": 0.1213, "step": 6750 }, { "epoch": 0.3429108076552109, "grad_norm": 0.48555007576942444, "learning_rate": 1.7713927948965262e-05, "loss": 0.1297, "step": 6755 }, { "epoch": 0.3431646276460734, "grad_norm": 0.46021294593811035, "learning_rate": 1.7712235815692845e-05, "loss": 0.1325, "step": 6760 }, { "epoch": 0.3434184476369359, "grad_norm": 0.7857270836830139, "learning_rate": 1.771054368242043e-05, "loss": 0.1354, "step": 6765 }, { "epoch": 0.34367226762779834, "grad_norm": 0.538543701171875, "learning_rate": 1.7708851549148013e-05, "loss": 0.1471, "step": 6770 }, { "epoch": 0.34392608761866084, "grad_norm": 0.5883283615112305, "learning_rate": 1.7707159415875596e-05, "loss": 0.1453, "step": 6775 }, { "epoch": 0.34417990760952333, "grad_norm": 0.5997344851493835, "learning_rate": 1.770546728260318e-05, "loss": 0.1381, "step": 6780 }, { "epoch": 0.3444337276003858, "grad_norm": 0.46415236592292786, "learning_rate": 1.7703775149330763e-05, "loss": 0.1331, "step": 6785 }, { "epoch": 0.3446875475912483, "grad_norm": 0.46198391914367676, "learning_rate": 1.7702083016058347e-05, "loss": 0.129, "step": 6790 }, { "epoch": 0.34494136758211075, "grad_norm": 0.6081626415252686, "learning_rate": 1.770039088278593e-05, "loss": 0.1367, "step": 6795 }, { "epoch": 0.34519518757297324, "grad_norm": 0.5060707926750183, "learning_rate": 1.769869874951351e-05, "loss": 0.1317, "step": 6800 }, { "epoch": 0.34544900756383573, "grad_norm": 0.48941001296043396, "learning_rate": 1.7697006616241097e-05, "loss": 0.1218, "step": 6805 }, { "epoch": 0.3457028275546982, "grad_norm": 0.8074930906295776, "learning_rate": 1.769531448296868e-05, "loss": 0.1178, "step": 6810 }, { "epoch": 0.3459566475455607, "grad_norm": 0.46753403544425964, "learning_rate": 1.7693622349696264e-05, "loss": 0.1152, "step": 6815 }, { "epoch": 0.34621046753642315, "grad_norm": 0.5410972237586975, "learning_rate": 1.7691930216423848e-05, "loss": 0.1364, "step": 6820 }, { "epoch": 0.34646428752728564, "grad_norm": 0.5206438302993774, "learning_rate": 1.7690238083151428e-05, "loss": 0.1292, "step": 6825 }, { "epoch": 0.34671810751814813, "grad_norm": 0.4511585831642151, "learning_rate": 1.7688545949879015e-05, "loss": 0.1236, "step": 6830 }, { "epoch": 0.3469719275090106, "grad_norm": 0.6129817962646484, "learning_rate": 1.76868538166066e-05, "loss": 0.1209, "step": 6835 }, { "epoch": 0.3472257474998731, "grad_norm": 1.1212575435638428, "learning_rate": 1.768516168333418e-05, "loss": 0.1293, "step": 6840 }, { "epoch": 0.34747956749073555, "grad_norm": 0.760433554649353, "learning_rate": 1.7683469550061766e-05, "loss": 0.1382, "step": 6845 }, { "epoch": 0.34773338748159804, "grad_norm": 0.6482082605361938, "learning_rate": 1.7681777416789346e-05, "loss": 0.1354, "step": 6850 }, { "epoch": 0.34798720747246054, "grad_norm": 0.606609046459198, "learning_rate": 1.7680085283516933e-05, "loss": 0.1174, "step": 6855 }, { "epoch": 0.348241027463323, "grad_norm": 0.49537140130996704, "learning_rate": 1.7678393150244516e-05, "loss": 0.1272, "step": 6860 }, { "epoch": 0.3484948474541855, "grad_norm": 0.5630388259887695, "learning_rate": 1.7676701016972096e-05, "loss": 0.1455, "step": 6865 }, { "epoch": 0.34874866744504796, "grad_norm": 0.5360369086265564, "learning_rate": 1.7675008883699683e-05, "loss": 0.1206, "step": 6870 }, { "epoch": 0.34900248743591045, "grad_norm": 0.5766199231147766, "learning_rate": 1.7673316750427264e-05, "loss": 0.1311, "step": 6875 }, { "epoch": 0.34925630742677294, "grad_norm": 0.4689859449863434, "learning_rate": 1.7671624617154847e-05, "loss": 0.1259, "step": 6880 }, { "epoch": 0.34951012741763543, "grad_norm": 0.8159186840057373, "learning_rate": 1.7669932483882434e-05, "loss": 0.1237, "step": 6885 }, { "epoch": 0.34976394740849787, "grad_norm": 0.9604336619377136, "learning_rate": 1.7668240350610014e-05, "loss": 0.1288, "step": 6890 }, { "epoch": 0.35001776739936036, "grad_norm": 1.4752824306488037, "learning_rate": 1.7666548217337598e-05, "loss": 0.1251, "step": 6895 }, { "epoch": 0.35027158739022285, "grad_norm": 0.449650377035141, "learning_rate": 1.766485608406518e-05, "loss": 0.1479, "step": 6900 }, { "epoch": 0.35052540738108534, "grad_norm": 0.5203964114189148, "learning_rate": 1.7663163950792765e-05, "loss": 0.1424, "step": 6905 }, { "epoch": 0.35077922737194783, "grad_norm": 0.5282999277114868, "learning_rate": 1.7661471817520352e-05, "loss": 0.1269, "step": 6910 }, { "epoch": 0.35103304736281027, "grad_norm": 0.41369152069091797, "learning_rate": 1.7659779684247932e-05, "loss": 0.1144, "step": 6915 }, { "epoch": 0.35128686735367276, "grad_norm": 2.030303716659546, "learning_rate": 1.7658087550975515e-05, "loss": 0.1372, "step": 6920 }, { "epoch": 0.35154068734453525, "grad_norm": 0.463283896446228, "learning_rate": 1.76563954177031e-05, "loss": 0.1329, "step": 6925 }, { "epoch": 0.35179450733539774, "grad_norm": 0.4552387595176697, "learning_rate": 1.7654703284430683e-05, "loss": 0.1317, "step": 6930 }, { "epoch": 0.35204832732626024, "grad_norm": 0.9171412587165833, "learning_rate": 1.7653011151158266e-05, "loss": 0.1284, "step": 6935 }, { "epoch": 0.35230214731712267, "grad_norm": 0.43250319361686707, "learning_rate": 1.765131901788585e-05, "loss": 0.1199, "step": 6940 }, { "epoch": 0.35255596730798516, "grad_norm": 0.8429256081581116, "learning_rate": 1.7649626884613433e-05, "loss": 0.1375, "step": 6945 }, { "epoch": 0.35280978729884765, "grad_norm": 0.651530921459198, "learning_rate": 1.7647934751341017e-05, "loss": 0.1313, "step": 6950 }, { "epoch": 0.35306360728971015, "grad_norm": 0.632590115070343, "learning_rate": 1.76462426180686e-05, "loss": 0.1437, "step": 6955 }, { "epoch": 0.35331742728057264, "grad_norm": 0.5507182478904724, "learning_rate": 1.7644550484796184e-05, "loss": 0.1353, "step": 6960 }, { "epoch": 0.3535712472714351, "grad_norm": 0.5852574110031128, "learning_rate": 1.7642858351523767e-05, "loss": 0.1166, "step": 6965 }, { "epoch": 0.35382506726229757, "grad_norm": 0.4446243643760681, "learning_rate": 1.764116621825135e-05, "loss": 0.1333, "step": 6970 }, { "epoch": 0.35407888725316006, "grad_norm": 0.6388731598854065, "learning_rate": 1.7639474084978934e-05, "loss": 0.1312, "step": 6975 }, { "epoch": 0.35433270724402255, "grad_norm": 0.4913591742515564, "learning_rate": 1.7637781951706518e-05, "loss": 0.1276, "step": 6980 }, { "epoch": 0.35458652723488504, "grad_norm": 0.5663020014762878, "learning_rate": 1.76360898184341e-05, "loss": 0.1228, "step": 6985 }, { "epoch": 0.3548403472257475, "grad_norm": 0.3867751657962799, "learning_rate": 1.7634397685161685e-05, "loss": 0.1331, "step": 6990 }, { "epoch": 0.35509416721660997, "grad_norm": 1.012452483177185, "learning_rate": 1.763270555188927e-05, "loss": 0.1431, "step": 6995 }, { "epoch": 0.35534798720747246, "grad_norm": 0.5785044431686401, "learning_rate": 1.7631013418616852e-05, "loss": 0.146, "step": 7000 }, { "epoch": 0.35560180719833495, "grad_norm": 0.5155715346336365, "learning_rate": 1.7629321285344436e-05, "loss": 0.1153, "step": 7005 }, { "epoch": 0.35585562718919744, "grad_norm": 0.5367943644523621, "learning_rate": 1.762762915207202e-05, "loss": 0.125, "step": 7010 }, { "epoch": 0.3561094471800599, "grad_norm": 0.3979216516017914, "learning_rate": 1.7625937018799603e-05, "loss": 0.1232, "step": 7015 }, { "epoch": 0.35636326717092237, "grad_norm": 0.5183656215667725, "learning_rate": 1.7624244885527186e-05, "loss": 0.1181, "step": 7020 }, { "epoch": 0.35661708716178486, "grad_norm": 0.45130103826522827, "learning_rate": 1.762255275225477e-05, "loss": 0.1325, "step": 7025 }, { "epoch": 0.35687090715264735, "grad_norm": 0.7615092992782593, "learning_rate": 1.762086061898235e-05, "loss": 0.1321, "step": 7030 }, { "epoch": 0.35712472714350985, "grad_norm": 0.5867871046066284, "learning_rate": 1.7619168485709937e-05, "loss": 0.1184, "step": 7035 }, { "epoch": 0.3573785471343723, "grad_norm": 1.0698941946029663, "learning_rate": 1.761747635243752e-05, "loss": 0.1272, "step": 7040 }, { "epoch": 0.3576323671252348, "grad_norm": 0.5040280222892761, "learning_rate": 1.76157842191651e-05, "loss": 0.1323, "step": 7045 }, { "epoch": 0.35788618711609727, "grad_norm": 0.49704474210739136, "learning_rate": 1.7614092085892688e-05, "loss": 0.133, "step": 7050 }, { "epoch": 0.35814000710695976, "grad_norm": 0.7625434398651123, "learning_rate": 1.7612399952620268e-05, "loss": 0.1295, "step": 7055 }, { "epoch": 0.35839382709782225, "grad_norm": 0.5237678289413452, "learning_rate": 1.7610707819347855e-05, "loss": 0.1282, "step": 7060 }, { "epoch": 0.3586476470886847, "grad_norm": 0.640903115272522, "learning_rate": 1.7609015686075438e-05, "loss": 0.1323, "step": 7065 }, { "epoch": 0.3589014670795472, "grad_norm": 0.5553655028343201, "learning_rate": 1.760732355280302e-05, "loss": 0.1261, "step": 7070 }, { "epoch": 0.35915528707040967, "grad_norm": 0.5053536891937256, "learning_rate": 1.7605631419530605e-05, "loss": 0.1298, "step": 7075 }, { "epoch": 0.35940910706127216, "grad_norm": 0.5184898972511292, "learning_rate": 1.7603939286258186e-05, "loss": 0.1348, "step": 7080 }, { "epoch": 0.35966292705213465, "grad_norm": 0.777260959148407, "learning_rate": 1.760224715298577e-05, "loss": 0.1373, "step": 7085 }, { "epoch": 0.3599167470429971, "grad_norm": 0.4676660895347595, "learning_rate": 1.7600555019713356e-05, "loss": 0.1254, "step": 7090 }, { "epoch": 0.3601705670338596, "grad_norm": 0.6567785739898682, "learning_rate": 1.7598862886440936e-05, "loss": 0.1165, "step": 7095 }, { "epoch": 0.36042438702472207, "grad_norm": 0.7958646416664124, "learning_rate": 1.7597170753168523e-05, "loss": 0.1334, "step": 7100 }, { "epoch": 0.36067820701558456, "grad_norm": 0.43118539452552795, "learning_rate": 1.7595478619896103e-05, "loss": 0.1203, "step": 7105 }, { "epoch": 0.36093202700644705, "grad_norm": 0.44612541794776917, "learning_rate": 1.7593786486623687e-05, "loss": 0.1049, "step": 7110 }, { "epoch": 0.3611858469973095, "grad_norm": 0.43221428990364075, "learning_rate": 1.7592094353351274e-05, "loss": 0.1152, "step": 7115 }, { "epoch": 0.361439666988172, "grad_norm": 0.486248642206192, "learning_rate": 1.7590402220078854e-05, "loss": 0.1281, "step": 7120 }, { "epoch": 0.3616934869790345, "grad_norm": 0.4399028420448303, "learning_rate": 1.7588710086806437e-05, "loss": 0.1321, "step": 7125 }, { "epoch": 0.36194730696989696, "grad_norm": 0.6637834906578064, "learning_rate": 1.758701795353402e-05, "loss": 0.1285, "step": 7130 }, { "epoch": 0.36220112696075946, "grad_norm": 0.4433422386646271, "learning_rate": 1.7585325820261605e-05, "loss": 0.133, "step": 7135 }, { "epoch": 0.3624549469516219, "grad_norm": 0.7549034953117371, "learning_rate": 1.7583633686989188e-05, "loss": 0.1279, "step": 7140 }, { "epoch": 0.3627087669424844, "grad_norm": 0.48059457540512085, "learning_rate": 1.758194155371677e-05, "loss": 0.1149, "step": 7145 }, { "epoch": 0.3629625869333469, "grad_norm": 0.582513689994812, "learning_rate": 1.7580249420444355e-05, "loss": 0.1423, "step": 7150 }, { "epoch": 0.36321640692420937, "grad_norm": 0.6541072130203247, "learning_rate": 1.757855728717194e-05, "loss": 0.1229, "step": 7155 }, { "epoch": 0.3634702269150718, "grad_norm": 0.5201939344406128, "learning_rate": 1.7576865153899522e-05, "loss": 0.1194, "step": 7160 }, { "epoch": 0.3637240469059343, "grad_norm": 1.192158579826355, "learning_rate": 1.7575173020627106e-05, "loss": 0.1261, "step": 7165 }, { "epoch": 0.3639778668967968, "grad_norm": 0.5061715245246887, "learning_rate": 1.757348088735469e-05, "loss": 0.1282, "step": 7170 }, { "epoch": 0.3642316868876593, "grad_norm": 0.45553848147392273, "learning_rate": 1.7571788754082273e-05, "loss": 0.1186, "step": 7175 }, { "epoch": 0.36448550687852177, "grad_norm": 0.4875742793083191, "learning_rate": 1.7570096620809856e-05, "loss": 0.124, "step": 7180 }, { "epoch": 0.3647393268693842, "grad_norm": 0.39498981833457947, "learning_rate": 1.756840448753744e-05, "loss": 0.1223, "step": 7185 }, { "epoch": 0.3649931468602467, "grad_norm": 0.6347449421882629, "learning_rate": 1.7566712354265023e-05, "loss": 0.1164, "step": 7190 }, { "epoch": 0.3652469668511092, "grad_norm": 0.5973386764526367, "learning_rate": 1.7565020220992607e-05, "loss": 0.1207, "step": 7195 }, { "epoch": 0.3655007868419717, "grad_norm": 0.44844862818717957, "learning_rate": 1.756332808772019e-05, "loss": 0.1268, "step": 7200 }, { "epoch": 0.3657546068328342, "grad_norm": 0.44825243949890137, "learning_rate": 1.7561635954447774e-05, "loss": 0.1333, "step": 7205 }, { "epoch": 0.3660084268236966, "grad_norm": 0.664547860622406, "learning_rate": 1.7559943821175358e-05, "loss": 0.1131, "step": 7210 }, { "epoch": 0.3662622468145591, "grad_norm": 0.5262232422828674, "learning_rate": 1.755825168790294e-05, "loss": 0.1314, "step": 7215 }, { "epoch": 0.3665160668054216, "grad_norm": 0.5070418119430542, "learning_rate": 1.7556559554630525e-05, "loss": 0.1326, "step": 7220 }, { "epoch": 0.3667698867962841, "grad_norm": 0.7615456581115723, "learning_rate": 1.7554867421358108e-05, "loss": 0.1262, "step": 7225 }, { "epoch": 0.3670237067871466, "grad_norm": 0.44965729117393494, "learning_rate": 1.7553175288085692e-05, "loss": 0.1211, "step": 7230 }, { "epoch": 0.367277526778009, "grad_norm": 0.6415624022483826, "learning_rate": 1.7551483154813272e-05, "loss": 0.1188, "step": 7235 }, { "epoch": 0.3675313467688715, "grad_norm": 0.4776475131511688, "learning_rate": 1.754979102154086e-05, "loss": 0.1141, "step": 7240 }, { "epoch": 0.367785166759734, "grad_norm": 0.4545101225376129, "learning_rate": 1.7548098888268442e-05, "loss": 0.1252, "step": 7245 }, { "epoch": 0.3680389867505965, "grad_norm": 0.8184462189674377, "learning_rate": 1.7546406754996026e-05, "loss": 0.122, "step": 7250 }, { "epoch": 0.368292806741459, "grad_norm": 0.46976208686828613, "learning_rate": 1.754471462172361e-05, "loss": 0.1219, "step": 7255 }, { "epoch": 0.3685466267323214, "grad_norm": 0.41548067331314087, "learning_rate": 1.754302248845119e-05, "loss": 0.1132, "step": 7260 }, { "epoch": 0.3688004467231839, "grad_norm": 0.781281590461731, "learning_rate": 1.7541330355178777e-05, "loss": 0.1401, "step": 7265 }, { "epoch": 0.3690542667140464, "grad_norm": 0.3721882700920105, "learning_rate": 1.753963822190636e-05, "loss": 0.1154, "step": 7270 }, { "epoch": 0.3693080867049089, "grad_norm": 0.5015051960945129, "learning_rate": 1.753794608863394e-05, "loss": 0.1186, "step": 7275 }, { "epoch": 0.3695619066957714, "grad_norm": 0.49192380905151367, "learning_rate": 1.7536253955361527e-05, "loss": 0.1115, "step": 7280 }, { "epoch": 0.3698157266866338, "grad_norm": 0.4846615195274353, "learning_rate": 1.7534561822089107e-05, "loss": 0.1333, "step": 7285 }, { "epoch": 0.3700695466774963, "grad_norm": 0.40966522693634033, "learning_rate": 1.753286968881669e-05, "loss": 0.1185, "step": 7290 }, { "epoch": 0.3703233666683588, "grad_norm": 0.533069372177124, "learning_rate": 1.7531177555544278e-05, "loss": 0.1181, "step": 7295 }, { "epoch": 0.3705771866592213, "grad_norm": 0.5515578985214233, "learning_rate": 1.7529485422271858e-05, "loss": 0.1148, "step": 7300 }, { "epoch": 0.3708310066500838, "grad_norm": 0.5015854239463806, "learning_rate": 1.7527793288999445e-05, "loss": 0.1193, "step": 7305 }, { "epoch": 0.3710848266409462, "grad_norm": 0.4574815630912781, "learning_rate": 1.7526101155727025e-05, "loss": 0.1355, "step": 7310 }, { "epoch": 0.3713386466318087, "grad_norm": 0.5332877039909363, "learning_rate": 1.752440902245461e-05, "loss": 0.1175, "step": 7315 }, { "epoch": 0.3715924666226712, "grad_norm": 0.6135624647140503, "learning_rate": 1.7522716889182196e-05, "loss": 0.1268, "step": 7320 }, { "epoch": 0.3718462866135337, "grad_norm": 0.5522362589836121, "learning_rate": 1.7521024755909776e-05, "loss": 0.1205, "step": 7325 }, { "epoch": 0.3721001066043962, "grad_norm": 0.41892364621162415, "learning_rate": 1.751933262263736e-05, "loss": 0.1272, "step": 7330 }, { "epoch": 0.3723539265952586, "grad_norm": 0.5606332421302795, "learning_rate": 1.7517640489364943e-05, "loss": 0.1238, "step": 7335 }, { "epoch": 0.3726077465861211, "grad_norm": 0.4218759834766388, "learning_rate": 1.7515948356092526e-05, "loss": 0.12, "step": 7340 }, { "epoch": 0.3728615665769836, "grad_norm": 0.45138901472091675, "learning_rate": 1.7514256222820113e-05, "loss": 0.1245, "step": 7345 }, { "epoch": 0.3731153865678461, "grad_norm": 0.8056939244270325, "learning_rate": 1.7512564089547694e-05, "loss": 0.1328, "step": 7350 }, { "epoch": 0.3733692065587086, "grad_norm": 0.5511149168014526, "learning_rate": 1.7510871956275277e-05, "loss": 0.1121, "step": 7355 }, { "epoch": 0.373623026549571, "grad_norm": 0.5154682993888855, "learning_rate": 1.750917982300286e-05, "loss": 0.132, "step": 7360 }, { "epoch": 0.3738768465404335, "grad_norm": 1.9294079542160034, "learning_rate": 1.7507487689730444e-05, "loss": 0.1322, "step": 7365 }, { "epoch": 0.374130666531296, "grad_norm": 0.43836021423339844, "learning_rate": 1.7505795556458028e-05, "loss": 0.116, "step": 7370 }, { "epoch": 0.3743844865221585, "grad_norm": 0.43150243163108826, "learning_rate": 1.750410342318561e-05, "loss": 0.1197, "step": 7375 }, { "epoch": 0.374638306513021, "grad_norm": 0.606220543384552, "learning_rate": 1.7502411289913195e-05, "loss": 0.1232, "step": 7380 }, { "epoch": 0.3748921265038834, "grad_norm": 0.4498575031757355, "learning_rate": 1.750071915664078e-05, "loss": 0.1182, "step": 7385 }, { "epoch": 0.3751459464947459, "grad_norm": 0.5238727331161499, "learning_rate": 1.7499027023368362e-05, "loss": 0.1433, "step": 7390 }, { "epoch": 0.3753997664856084, "grad_norm": 0.5070326328277588, "learning_rate": 1.7497334890095945e-05, "loss": 0.1309, "step": 7395 }, { "epoch": 0.3756535864764709, "grad_norm": 0.4144206643104553, "learning_rate": 1.749564275682353e-05, "loss": 0.1259, "step": 7400 }, { "epoch": 0.3759074064673334, "grad_norm": 0.476362943649292, "learning_rate": 1.7493950623551113e-05, "loss": 0.1213, "step": 7405 }, { "epoch": 0.37616122645819583, "grad_norm": 0.41985994577407837, "learning_rate": 1.7492258490278696e-05, "loss": 0.1143, "step": 7410 }, { "epoch": 0.3764150464490583, "grad_norm": 0.6084445714950562, "learning_rate": 1.749056635700628e-05, "loss": 0.1269, "step": 7415 }, { "epoch": 0.3766688664399208, "grad_norm": 0.39930278062820435, "learning_rate": 1.7488874223733863e-05, "loss": 0.1232, "step": 7420 }, { "epoch": 0.3769226864307833, "grad_norm": 0.4924406111240387, "learning_rate": 1.7487182090461447e-05, "loss": 0.124, "step": 7425 }, { "epoch": 0.3771765064216458, "grad_norm": 0.5184084177017212, "learning_rate": 1.748548995718903e-05, "loss": 0.1227, "step": 7430 }, { "epoch": 0.37743032641250823, "grad_norm": 0.5747602581977844, "learning_rate": 1.7483797823916614e-05, "loss": 0.1309, "step": 7435 }, { "epoch": 0.3776841464033707, "grad_norm": 0.6429848670959473, "learning_rate": 1.7482105690644194e-05, "loss": 0.1202, "step": 7440 }, { "epoch": 0.3779379663942332, "grad_norm": 0.6303040981292725, "learning_rate": 1.748041355737178e-05, "loss": 0.119, "step": 7445 }, { "epoch": 0.3781917863850957, "grad_norm": 0.8696273565292358, "learning_rate": 1.7478721424099364e-05, "loss": 0.1191, "step": 7450 }, { "epoch": 0.37844560637595814, "grad_norm": 0.7249330878257751, "learning_rate": 1.7477029290826948e-05, "loss": 0.1285, "step": 7455 }, { "epoch": 0.37869942636682064, "grad_norm": 0.37654367089271545, "learning_rate": 1.747533715755453e-05, "loss": 0.1287, "step": 7460 }, { "epoch": 0.3789532463576831, "grad_norm": 0.3263428807258606, "learning_rate": 1.747364502428211e-05, "loss": 0.109, "step": 7465 }, { "epoch": 0.3792070663485456, "grad_norm": 0.5799312591552734, "learning_rate": 1.74719528910097e-05, "loss": 0.1226, "step": 7470 }, { "epoch": 0.3794608863394081, "grad_norm": 0.47346293926239014, "learning_rate": 1.7470260757737282e-05, "loss": 0.1142, "step": 7475 }, { "epoch": 0.37971470633027055, "grad_norm": 0.44059112668037415, "learning_rate": 1.7468568624464862e-05, "loss": 0.1104, "step": 7480 }, { "epoch": 0.37996852632113304, "grad_norm": 0.5601668357849121, "learning_rate": 1.746687649119245e-05, "loss": 0.1318, "step": 7485 }, { "epoch": 0.38022234631199553, "grad_norm": 0.6741954684257507, "learning_rate": 1.746518435792003e-05, "loss": 0.123, "step": 7490 }, { "epoch": 0.380476166302858, "grad_norm": 0.7247961759567261, "learning_rate": 1.7463492224647616e-05, "loss": 0.1275, "step": 7495 }, { "epoch": 0.3807299862937205, "grad_norm": 0.4358140826225281, "learning_rate": 1.74618000913752e-05, "loss": 0.1214, "step": 7500 }, { "epoch": 0.38098380628458295, "grad_norm": 0.35858985781669617, "learning_rate": 1.746010795810278e-05, "loss": 0.1377, "step": 7505 }, { "epoch": 0.38123762627544544, "grad_norm": 0.5448585152626038, "learning_rate": 1.7458415824830367e-05, "loss": 0.1244, "step": 7510 }, { "epoch": 0.38149144626630793, "grad_norm": 0.6085044145584106, "learning_rate": 1.7456723691557947e-05, "loss": 0.1137, "step": 7515 }, { "epoch": 0.3817452662571704, "grad_norm": 0.43685221672058105, "learning_rate": 1.745503155828553e-05, "loss": 0.1193, "step": 7520 }, { "epoch": 0.3819990862480329, "grad_norm": 0.4632774293422699, "learning_rate": 1.7453339425013118e-05, "loss": 0.1253, "step": 7525 }, { "epoch": 0.38225290623889535, "grad_norm": 0.67642742395401, "learning_rate": 1.7451647291740698e-05, "loss": 0.1173, "step": 7530 }, { "epoch": 0.38250672622975784, "grad_norm": 0.5087795853614807, "learning_rate": 1.744995515846828e-05, "loss": 0.1271, "step": 7535 }, { "epoch": 0.38276054622062033, "grad_norm": 0.5182584524154663, "learning_rate": 1.7448263025195865e-05, "loss": 0.1189, "step": 7540 }, { "epoch": 0.3830143662114828, "grad_norm": 0.6090805530548096, "learning_rate": 1.744657089192345e-05, "loss": 0.1135, "step": 7545 }, { "epoch": 0.3832681862023453, "grad_norm": 0.558914065361023, "learning_rate": 1.7444878758651035e-05, "loss": 0.1275, "step": 7550 }, { "epoch": 0.38352200619320775, "grad_norm": 0.4196544885635376, "learning_rate": 1.7443186625378615e-05, "loss": 0.1208, "step": 7555 }, { "epoch": 0.38377582618407025, "grad_norm": 0.6540439128875732, "learning_rate": 1.74414944921062e-05, "loss": 0.1332, "step": 7560 }, { "epoch": 0.38402964617493274, "grad_norm": 0.9480463266372681, "learning_rate": 1.7439802358833783e-05, "loss": 0.1152, "step": 7565 }, { "epoch": 0.38428346616579523, "grad_norm": 0.68366539478302, "learning_rate": 1.7438110225561366e-05, "loss": 0.1121, "step": 7570 }, { "epoch": 0.3845372861566577, "grad_norm": 0.7604976296424866, "learning_rate": 1.743641809228895e-05, "loss": 0.1317, "step": 7575 }, { "epoch": 0.38479110614752016, "grad_norm": 0.704014003276825, "learning_rate": 1.7434725959016533e-05, "loss": 0.1103, "step": 7580 }, { "epoch": 0.38504492613838265, "grad_norm": 0.5294590592384338, "learning_rate": 1.7433033825744117e-05, "loss": 0.1276, "step": 7585 }, { "epoch": 0.38529874612924514, "grad_norm": 0.3316403925418854, "learning_rate": 1.74313416924717e-05, "loss": 0.1062, "step": 7590 }, { "epoch": 0.38555256612010763, "grad_norm": 0.48089930415153503, "learning_rate": 1.7429649559199284e-05, "loss": 0.114, "step": 7595 }, { "epoch": 0.3858063861109701, "grad_norm": 0.4831618368625641, "learning_rate": 1.7427957425926867e-05, "loss": 0.1184, "step": 7600 }, { "epoch": 0.38606020610183256, "grad_norm": 0.49979323148727417, "learning_rate": 1.742626529265445e-05, "loss": 0.1335, "step": 7605 }, { "epoch": 0.38631402609269505, "grad_norm": 1.3292902708053589, "learning_rate": 1.7424573159382034e-05, "loss": 0.1143, "step": 7610 }, { "epoch": 0.38656784608355754, "grad_norm": 0.4008750915527344, "learning_rate": 1.7422881026109618e-05, "loss": 0.1229, "step": 7615 }, { "epoch": 0.38682166607442003, "grad_norm": 0.5050025582313538, "learning_rate": 1.74211888928372e-05, "loss": 0.1137, "step": 7620 }, { "epoch": 0.3870754860652825, "grad_norm": 0.7327058911323547, "learning_rate": 1.7419496759564785e-05, "loss": 0.1408, "step": 7625 }, { "epoch": 0.38732930605614496, "grad_norm": 0.5808429718017578, "learning_rate": 1.741780462629237e-05, "loss": 0.1055, "step": 7630 }, { "epoch": 0.38758312604700745, "grad_norm": 1.0923219919204712, "learning_rate": 1.7416112493019952e-05, "loss": 0.128, "step": 7635 }, { "epoch": 0.38783694603786995, "grad_norm": 0.6931188106536865, "learning_rate": 1.7414420359747536e-05, "loss": 0.119, "step": 7640 }, { "epoch": 0.38809076602873244, "grad_norm": 0.7696263790130615, "learning_rate": 1.741272822647512e-05, "loss": 0.1138, "step": 7645 }, { "epoch": 0.38834458601959493, "grad_norm": 0.5164752006530762, "learning_rate": 1.7411036093202703e-05, "loss": 0.1151, "step": 7650 }, { "epoch": 0.38859840601045736, "grad_norm": 0.4094414710998535, "learning_rate": 1.7409343959930286e-05, "loss": 0.1241, "step": 7655 }, { "epoch": 0.38885222600131986, "grad_norm": 0.43349120020866394, "learning_rate": 1.740765182665787e-05, "loss": 0.1269, "step": 7660 }, { "epoch": 0.38910604599218235, "grad_norm": 0.5583718419075012, "learning_rate": 1.7405959693385453e-05, "loss": 0.1239, "step": 7665 }, { "epoch": 0.38935986598304484, "grad_norm": 0.45432811975479126, "learning_rate": 1.7404267560113034e-05, "loss": 0.1353, "step": 7670 }, { "epoch": 0.38961368597390733, "grad_norm": 0.4887847900390625, "learning_rate": 1.740257542684062e-05, "loss": 0.1189, "step": 7675 }, { "epoch": 0.38986750596476977, "grad_norm": 0.6936730146408081, "learning_rate": 1.7400883293568204e-05, "loss": 0.1206, "step": 7680 }, { "epoch": 0.39012132595563226, "grad_norm": 0.49213695526123047, "learning_rate": 1.7399191160295784e-05, "loss": 0.1092, "step": 7685 }, { "epoch": 0.39037514594649475, "grad_norm": 0.38413718342781067, "learning_rate": 1.739749902702337e-05, "loss": 0.1195, "step": 7690 }, { "epoch": 0.39062896593735724, "grad_norm": 0.5132599472999573, "learning_rate": 1.739580689375095e-05, "loss": 0.1336, "step": 7695 }, { "epoch": 0.39088278592821973, "grad_norm": 0.5477653741836548, "learning_rate": 1.7394114760478538e-05, "loss": 0.1258, "step": 7700 }, { "epoch": 0.39113660591908217, "grad_norm": 0.3821299076080322, "learning_rate": 1.7392422627206122e-05, "loss": 0.1269, "step": 7705 }, { "epoch": 0.39139042590994466, "grad_norm": 0.4275185167789459, "learning_rate": 1.7390730493933702e-05, "loss": 0.1288, "step": 7710 }, { "epoch": 0.39164424590080715, "grad_norm": 0.6078634858131409, "learning_rate": 1.738903836066129e-05, "loss": 0.1171, "step": 7715 }, { "epoch": 0.39189806589166964, "grad_norm": 0.5919456481933594, "learning_rate": 1.738734622738887e-05, "loss": 0.1187, "step": 7720 }, { "epoch": 0.3921518858825321, "grad_norm": 0.3606504797935486, "learning_rate": 1.7385654094116453e-05, "loss": 0.1196, "step": 7725 }, { "epoch": 0.3924057058733946, "grad_norm": 0.5512848496437073, "learning_rate": 1.738396196084404e-05, "loss": 0.1143, "step": 7730 }, { "epoch": 0.39265952586425706, "grad_norm": 0.5730594396591187, "learning_rate": 1.738226982757162e-05, "loss": 0.1215, "step": 7735 }, { "epoch": 0.39291334585511956, "grad_norm": 0.4641408622264862, "learning_rate": 1.7380577694299207e-05, "loss": 0.117, "step": 7740 }, { "epoch": 0.39316716584598205, "grad_norm": 0.417926162481308, "learning_rate": 1.7378885561026787e-05, "loss": 0.1095, "step": 7745 }, { "epoch": 0.3934209858368445, "grad_norm": 0.6021568775177002, "learning_rate": 1.737719342775437e-05, "loss": 0.1163, "step": 7750 }, { "epoch": 0.393674805827707, "grad_norm": 0.5516533851623535, "learning_rate": 1.7375501294481957e-05, "loss": 0.1147, "step": 7755 }, { "epoch": 0.39392862581856947, "grad_norm": 0.49156588315963745, "learning_rate": 1.7373809161209537e-05, "loss": 0.119, "step": 7760 }, { "epoch": 0.39418244580943196, "grad_norm": 0.546325147151947, "learning_rate": 1.737211702793712e-05, "loss": 0.1385, "step": 7765 }, { "epoch": 0.39443626580029445, "grad_norm": 0.4427269399166107, "learning_rate": 1.7370424894664704e-05, "loss": 0.1237, "step": 7770 }, { "epoch": 0.3946900857911569, "grad_norm": 0.42994052171707153, "learning_rate": 1.7368732761392288e-05, "loss": 0.107, "step": 7775 }, { "epoch": 0.3949439057820194, "grad_norm": 0.4181389808654785, "learning_rate": 1.736704062811987e-05, "loss": 0.0955, "step": 7780 }, { "epoch": 0.39519772577288187, "grad_norm": 0.599090576171875, "learning_rate": 1.7365348494847455e-05, "loss": 0.1237, "step": 7785 }, { "epoch": 0.39545154576374436, "grad_norm": 0.5317444801330566, "learning_rate": 1.736365636157504e-05, "loss": 0.1143, "step": 7790 }, { "epoch": 0.39570536575460685, "grad_norm": 0.5095632672309875, "learning_rate": 1.7361964228302622e-05, "loss": 0.1297, "step": 7795 }, { "epoch": 0.3959591857454693, "grad_norm": 0.7343167662620544, "learning_rate": 1.7360272095030206e-05, "loss": 0.1296, "step": 7800 }, { "epoch": 0.3962130057363318, "grad_norm": 0.7114145755767822, "learning_rate": 1.735857996175779e-05, "loss": 0.1186, "step": 7805 }, { "epoch": 0.39646682572719427, "grad_norm": 0.52663254737854, "learning_rate": 1.7356887828485373e-05, "loss": 0.1318, "step": 7810 }, { "epoch": 0.39672064571805676, "grad_norm": 0.33781489729881287, "learning_rate": 1.7355195695212956e-05, "loss": 0.1075, "step": 7815 }, { "epoch": 0.39697446570891926, "grad_norm": 0.8520582914352417, "learning_rate": 1.735350356194054e-05, "loss": 0.1175, "step": 7820 }, { "epoch": 0.3972282856997817, "grad_norm": 0.448352187871933, "learning_rate": 1.7351811428668123e-05, "loss": 0.1189, "step": 7825 }, { "epoch": 0.3974821056906442, "grad_norm": 1.0264869928359985, "learning_rate": 1.7350119295395707e-05, "loss": 0.1208, "step": 7830 }, { "epoch": 0.3977359256815067, "grad_norm": 0.4782174527645111, "learning_rate": 1.734842716212329e-05, "loss": 0.1211, "step": 7835 }, { "epoch": 0.39798974567236917, "grad_norm": 0.41531625390052795, "learning_rate": 1.7346735028850874e-05, "loss": 0.1131, "step": 7840 }, { "epoch": 0.39824356566323166, "grad_norm": 0.5510904788970947, "learning_rate": 1.7345042895578458e-05, "loss": 0.1068, "step": 7845 }, { "epoch": 0.3984973856540941, "grad_norm": 0.6751682758331299, "learning_rate": 1.734335076230604e-05, "loss": 0.1381, "step": 7850 }, { "epoch": 0.3987512056449566, "grad_norm": 0.47615987062454224, "learning_rate": 1.7341658629033625e-05, "loss": 0.1038, "step": 7855 }, { "epoch": 0.3990050256358191, "grad_norm": 0.5570587515830994, "learning_rate": 1.7339966495761208e-05, "loss": 0.111, "step": 7860 }, { "epoch": 0.39925884562668157, "grad_norm": 0.5412005186080933, "learning_rate": 1.7338274362488792e-05, "loss": 0.1264, "step": 7865 }, { "epoch": 0.39951266561754406, "grad_norm": 1.4431235790252686, "learning_rate": 1.7336582229216375e-05, "loss": 0.1239, "step": 7870 }, { "epoch": 0.3997664856084065, "grad_norm": 0.3393210768699646, "learning_rate": 1.7334890095943956e-05, "loss": 0.1383, "step": 7875 }, { "epoch": 0.400020305599269, "grad_norm": 0.5208285450935364, "learning_rate": 1.7333197962671542e-05, "loss": 0.1172, "step": 7880 }, { "epoch": 0.4002741255901315, "grad_norm": 0.5236523747444153, "learning_rate": 1.7331505829399126e-05, "loss": 0.1098, "step": 7885 }, { "epoch": 0.40052794558099397, "grad_norm": 0.6051152944564819, "learning_rate": 1.732981369612671e-05, "loss": 0.1165, "step": 7890 }, { "epoch": 0.40078176557185646, "grad_norm": 0.8865431547164917, "learning_rate": 1.7328121562854293e-05, "loss": 0.1145, "step": 7895 }, { "epoch": 0.4010355855627189, "grad_norm": 0.5429019927978516, "learning_rate": 1.7326429429581873e-05, "loss": 0.1297, "step": 7900 }, { "epoch": 0.4012894055535814, "grad_norm": 0.7259514331817627, "learning_rate": 1.732473729630946e-05, "loss": 0.1103, "step": 7905 }, { "epoch": 0.4015432255444439, "grad_norm": 0.6104267239570618, "learning_rate": 1.7323045163037044e-05, "loss": 0.1066, "step": 7910 }, { "epoch": 0.4017970455353064, "grad_norm": 0.4790496230125427, "learning_rate": 1.7321353029764624e-05, "loss": 0.1265, "step": 7915 }, { "epoch": 0.40205086552616887, "grad_norm": 0.39838600158691406, "learning_rate": 1.731966089649221e-05, "loss": 0.105, "step": 7920 }, { "epoch": 0.4023046855170313, "grad_norm": 0.44898778200149536, "learning_rate": 1.731796876321979e-05, "loss": 0.1256, "step": 7925 }, { "epoch": 0.4025585055078938, "grad_norm": 1.1361500024795532, "learning_rate": 1.7316276629947375e-05, "loss": 0.1154, "step": 7930 }, { "epoch": 0.4028123254987563, "grad_norm": 0.44268670678138733, "learning_rate": 1.731458449667496e-05, "loss": 0.122, "step": 7935 }, { "epoch": 0.4030661454896188, "grad_norm": 0.5239094495773315, "learning_rate": 1.731289236340254e-05, "loss": 0.1109, "step": 7940 }, { "epoch": 0.40331996548048127, "grad_norm": 0.46446603536605835, "learning_rate": 1.731120023013013e-05, "loss": 0.1077, "step": 7945 }, { "epoch": 0.4035737854713437, "grad_norm": 0.36538025736808777, "learning_rate": 1.730950809685771e-05, "loss": 0.1189, "step": 7950 }, { "epoch": 0.4038276054622062, "grad_norm": 0.3762917220592499, "learning_rate": 1.7307815963585292e-05, "loss": 0.1186, "step": 7955 }, { "epoch": 0.4040814254530687, "grad_norm": 0.3559653162956238, "learning_rate": 1.730612383031288e-05, "loss": 0.1242, "step": 7960 }, { "epoch": 0.4043352454439312, "grad_norm": 0.5034398436546326, "learning_rate": 1.730443169704046e-05, "loss": 0.1222, "step": 7965 }, { "epoch": 0.40458906543479367, "grad_norm": 0.8461323976516724, "learning_rate": 1.7302739563768043e-05, "loss": 0.1281, "step": 7970 }, { "epoch": 0.4048428854256561, "grad_norm": 0.4809536337852478, "learning_rate": 1.7301047430495626e-05, "loss": 0.1085, "step": 7975 }, { "epoch": 0.4050967054165186, "grad_norm": 0.5192294716835022, "learning_rate": 1.729935529722321e-05, "loss": 0.1183, "step": 7980 }, { "epoch": 0.4053505254073811, "grad_norm": 0.45198673009872437, "learning_rate": 1.7297663163950797e-05, "loss": 0.1144, "step": 7985 }, { "epoch": 0.4056043453982436, "grad_norm": 0.4005521535873413, "learning_rate": 1.7295971030678377e-05, "loss": 0.1195, "step": 7990 }, { "epoch": 0.4058581653891061, "grad_norm": 0.5918262004852295, "learning_rate": 1.729427889740596e-05, "loss": 0.1212, "step": 7995 }, { "epoch": 0.4061119853799685, "grad_norm": 0.46647128462791443, "learning_rate": 1.7292586764133544e-05, "loss": 0.1101, "step": 8000 }, { "epoch": 0.406365805370831, "grad_norm": 0.43062543869018555, "learning_rate": 1.7290894630861128e-05, "loss": 0.1174, "step": 8005 }, { "epoch": 0.4066196253616935, "grad_norm": 0.4810847043991089, "learning_rate": 1.728920249758871e-05, "loss": 0.1212, "step": 8010 }, { "epoch": 0.406873445352556, "grad_norm": 0.5878148078918457, "learning_rate": 1.7287510364316295e-05, "loss": 0.1138, "step": 8015 }, { "epoch": 0.4071272653434184, "grad_norm": 0.5232487916946411, "learning_rate": 1.728581823104388e-05, "loss": 0.1264, "step": 8020 }, { "epoch": 0.4073810853342809, "grad_norm": 0.3923046588897705, "learning_rate": 1.7284126097771462e-05, "loss": 0.1137, "step": 8025 }, { "epoch": 0.4076349053251434, "grad_norm": 0.5523611903190613, "learning_rate": 1.7282433964499045e-05, "loss": 0.1269, "step": 8030 }, { "epoch": 0.4078887253160059, "grad_norm": 0.587213933467865, "learning_rate": 1.728074183122663e-05, "loss": 0.1182, "step": 8035 }, { "epoch": 0.4081425453068684, "grad_norm": 0.6476024985313416, "learning_rate": 1.7279049697954213e-05, "loss": 0.1283, "step": 8040 }, { "epoch": 0.4083963652977308, "grad_norm": 0.5277358889579773, "learning_rate": 1.7277357564681796e-05, "loss": 0.1178, "step": 8045 }, { "epoch": 0.4086501852885933, "grad_norm": 0.4592641294002533, "learning_rate": 1.727566543140938e-05, "loss": 0.1155, "step": 8050 }, { "epoch": 0.4089040052794558, "grad_norm": 0.5247008800506592, "learning_rate": 1.7273973298136963e-05, "loss": 0.1086, "step": 8055 }, { "epoch": 0.4091578252703183, "grad_norm": 0.5143386125564575, "learning_rate": 1.7272281164864547e-05, "loss": 0.1229, "step": 8060 }, { "epoch": 0.4094116452611808, "grad_norm": 0.40900567173957825, "learning_rate": 1.727058903159213e-05, "loss": 0.1165, "step": 8065 }, { "epoch": 0.4096654652520432, "grad_norm": 0.44880881905555725, "learning_rate": 1.7268896898319714e-05, "loss": 0.1245, "step": 8070 }, { "epoch": 0.4099192852429057, "grad_norm": 0.4040411710739136, "learning_rate": 1.7267204765047297e-05, "loss": 0.1625, "step": 8075 }, { "epoch": 0.4101731052337682, "grad_norm": 0.5253224968910217, "learning_rate": 1.7265512631774877e-05, "loss": 0.1105, "step": 8080 }, { "epoch": 0.4104269252246307, "grad_norm": 0.5281549692153931, "learning_rate": 1.7263820498502464e-05, "loss": 0.1205, "step": 8085 }, { "epoch": 0.4106807452154932, "grad_norm": 0.4668600857257843, "learning_rate": 1.7262128365230048e-05, "loss": 0.1068, "step": 8090 }, { "epoch": 0.41093456520635563, "grad_norm": 0.47776713967323303, "learning_rate": 1.726043623195763e-05, "loss": 0.1147, "step": 8095 }, { "epoch": 0.4111883851972181, "grad_norm": 0.8644548058509827, "learning_rate": 1.7258744098685215e-05, "loss": 0.1148, "step": 8100 }, { "epoch": 0.4114422051880806, "grad_norm": 0.44676366448402405, "learning_rate": 1.7257051965412795e-05, "loss": 0.1025, "step": 8105 }, { "epoch": 0.4116960251789431, "grad_norm": 0.6538465619087219, "learning_rate": 1.7255359832140382e-05, "loss": 0.1112, "step": 8110 }, { "epoch": 0.4119498451698056, "grad_norm": 0.5455487370491028, "learning_rate": 1.7253667698867966e-05, "loss": 0.1225, "step": 8115 }, { "epoch": 0.41220366516066803, "grad_norm": 0.5371622443199158, "learning_rate": 1.7251975565595546e-05, "loss": 0.1113, "step": 8120 }, { "epoch": 0.4124574851515305, "grad_norm": 0.5552743077278137, "learning_rate": 1.7250283432323133e-05, "loss": 0.1145, "step": 8125 }, { "epoch": 0.412711305142393, "grad_norm": 0.40437573194503784, "learning_rate": 1.7248591299050713e-05, "loss": 0.115, "step": 8130 }, { "epoch": 0.4129651251332555, "grad_norm": 0.8257235884666443, "learning_rate": 1.72468991657783e-05, "loss": 0.1263, "step": 8135 }, { "epoch": 0.413218945124118, "grad_norm": 0.5629894733428955, "learning_rate": 1.7245207032505883e-05, "loss": 0.1114, "step": 8140 }, { "epoch": 0.41347276511498043, "grad_norm": 0.5272229909896851, "learning_rate": 1.7243514899233464e-05, "loss": 0.1197, "step": 8145 }, { "epoch": 0.4137265851058429, "grad_norm": 0.5540295243263245, "learning_rate": 1.724182276596105e-05, "loss": 0.1073, "step": 8150 }, { "epoch": 0.4139804050967054, "grad_norm": 0.5846796631813049, "learning_rate": 1.724013063268863e-05, "loss": 0.1228, "step": 8155 }, { "epoch": 0.4142342250875679, "grad_norm": 0.5993126034736633, "learning_rate": 1.7238438499416214e-05, "loss": 0.1118, "step": 8160 }, { "epoch": 0.4144880450784304, "grad_norm": 0.47880518436431885, "learning_rate": 1.72367463661438e-05, "loss": 0.1389, "step": 8165 }, { "epoch": 0.41474186506929284, "grad_norm": 0.7932132482528687, "learning_rate": 1.723505423287138e-05, "loss": 0.1148, "step": 8170 }, { "epoch": 0.41499568506015533, "grad_norm": 0.5608577728271484, "learning_rate": 1.7233362099598965e-05, "loss": 0.1065, "step": 8175 }, { "epoch": 0.4152495050510178, "grad_norm": 0.5518209934234619, "learning_rate": 1.723166996632655e-05, "loss": 0.1186, "step": 8180 }, { "epoch": 0.4155033250418803, "grad_norm": 0.38393184542655945, "learning_rate": 1.7229977833054132e-05, "loss": 0.112, "step": 8185 }, { "epoch": 0.4157571450327428, "grad_norm": 0.5350388884544373, "learning_rate": 1.722828569978172e-05, "loss": 0.1215, "step": 8190 }, { "epoch": 0.41601096502360524, "grad_norm": 0.487732857465744, "learning_rate": 1.72265935665093e-05, "loss": 0.1232, "step": 8195 }, { "epoch": 0.41626478501446773, "grad_norm": 0.7282403707504272, "learning_rate": 1.7224901433236883e-05, "loss": 0.1112, "step": 8200 }, { "epoch": 0.4165186050053302, "grad_norm": 0.49439603090286255, "learning_rate": 1.7223209299964466e-05, "loss": 0.1123, "step": 8205 }, { "epoch": 0.4167724249961927, "grad_norm": 0.3408522307872772, "learning_rate": 1.722151716669205e-05, "loss": 0.1047, "step": 8210 }, { "epoch": 0.4170262449870552, "grad_norm": 0.5345622897148132, "learning_rate": 1.7219825033419633e-05, "loss": 0.1233, "step": 8215 }, { "epoch": 0.41728006497791764, "grad_norm": 0.4255228638648987, "learning_rate": 1.7218132900147217e-05, "loss": 0.1268, "step": 8220 }, { "epoch": 0.41753388496878013, "grad_norm": 0.6682707667350769, "learning_rate": 1.72164407668748e-05, "loss": 0.1129, "step": 8225 }, { "epoch": 0.4177877049596426, "grad_norm": 0.49467936158180237, "learning_rate": 1.7214748633602384e-05, "loss": 0.1171, "step": 8230 }, { "epoch": 0.4180415249505051, "grad_norm": 0.4055912494659424, "learning_rate": 1.7213056500329967e-05, "loss": 0.117, "step": 8235 }, { "epoch": 0.4182953449413676, "grad_norm": 0.47649750113487244, "learning_rate": 1.721136436705755e-05, "loss": 0.1015, "step": 8240 }, { "epoch": 0.41854916493223004, "grad_norm": 0.3205729126930237, "learning_rate": 1.7209672233785134e-05, "loss": 0.1138, "step": 8245 }, { "epoch": 0.41880298492309254, "grad_norm": 0.5603702068328857, "learning_rate": 1.7207980100512718e-05, "loss": 0.106, "step": 8250 }, { "epoch": 0.419056804913955, "grad_norm": 0.6263189911842346, "learning_rate": 1.72062879672403e-05, "loss": 0.1135, "step": 8255 }, { "epoch": 0.4193106249048175, "grad_norm": 0.533976674079895, "learning_rate": 1.7204595833967885e-05, "loss": 0.1269, "step": 8260 }, { "epoch": 0.41956444489568, "grad_norm": 0.38608598709106445, "learning_rate": 1.720290370069547e-05, "loss": 0.1081, "step": 8265 }, { "epoch": 0.41981826488654245, "grad_norm": 0.329917311668396, "learning_rate": 1.7201211567423052e-05, "loss": 0.1102, "step": 8270 }, { "epoch": 0.42007208487740494, "grad_norm": 0.4950959086418152, "learning_rate": 1.7199519434150636e-05, "loss": 0.1032, "step": 8275 }, { "epoch": 0.42032590486826743, "grad_norm": 0.7091923356056213, "learning_rate": 1.719782730087822e-05, "loss": 0.108, "step": 8280 }, { "epoch": 0.4205797248591299, "grad_norm": 0.41781190037727356, "learning_rate": 1.7196135167605803e-05, "loss": 0.1106, "step": 8285 }, { "epoch": 0.42083354484999236, "grad_norm": 0.745145857334137, "learning_rate": 1.7194443034333386e-05, "loss": 0.1133, "step": 8290 }, { "epoch": 0.42108736484085485, "grad_norm": 0.4343501925468445, "learning_rate": 1.719275090106097e-05, "loss": 0.11, "step": 8295 }, { "epoch": 0.42134118483171734, "grad_norm": 0.4626290202140808, "learning_rate": 1.7191058767788553e-05, "loss": 0.1079, "step": 8300 }, { "epoch": 0.42159500482257983, "grad_norm": 0.4289278984069824, "learning_rate": 1.7189366634516137e-05, "loss": 0.1084, "step": 8305 }, { "epoch": 0.4218488248134423, "grad_norm": 0.3560695946216583, "learning_rate": 1.7187674501243717e-05, "loss": 0.109, "step": 8310 }, { "epoch": 0.42210264480430476, "grad_norm": 0.9896416664123535, "learning_rate": 1.7185982367971304e-05, "loss": 0.1067, "step": 8315 }, { "epoch": 0.42235646479516725, "grad_norm": 0.45371013879776, "learning_rate": 1.7184290234698888e-05, "loss": 0.1093, "step": 8320 }, { "epoch": 0.42261028478602974, "grad_norm": 0.38966628909111023, "learning_rate": 1.7182598101426468e-05, "loss": 0.1037, "step": 8325 }, { "epoch": 0.42286410477689224, "grad_norm": 0.6311661005020142, "learning_rate": 1.7180905968154055e-05, "loss": 0.1128, "step": 8330 }, { "epoch": 0.4231179247677547, "grad_norm": 0.36234763264656067, "learning_rate": 1.7179213834881635e-05, "loss": 0.1075, "step": 8335 }, { "epoch": 0.42337174475861716, "grad_norm": 0.4338438808917999, "learning_rate": 1.7177521701609222e-05, "loss": 0.1137, "step": 8340 }, { "epoch": 0.42362556474947966, "grad_norm": 0.603247344493866, "learning_rate": 1.7175829568336805e-05, "loss": 0.1067, "step": 8345 }, { "epoch": 0.42387938474034215, "grad_norm": 0.7704381346702576, "learning_rate": 1.7174137435064385e-05, "loss": 0.1071, "step": 8350 }, { "epoch": 0.42413320473120464, "grad_norm": 0.41250476241111755, "learning_rate": 1.7172445301791972e-05, "loss": 0.115, "step": 8355 }, { "epoch": 0.42438702472206713, "grad_norm": 0.4616197943687439, "learning_rate": 1.7170753168519553e-05, "loss": 0.112, "step": 8360 }, { "epoch": 0.42464084471292957, "grad_norm": 0.45028916001319885, "learning_rate": 1.7169061035247136e-05, "loss": 0.1158, "step": 8365 }, { "epoch": 0.42489466470379206, "grad_norm": 0.6397187113761902, "learning_rate": 1.7167368901974723e-05, "loss": 0.1111, "step": 8370 }, { "epoch": 0.42514848469465455, "grad_norm": 0.4066790044307709, "learning_rate": 1.7165676768702303e-05, "loss": 0.1185, "step": 8375 }, { "epoch": 0.42540230468551704, "grad_norm": 0.7176026701927185, "learning_rate": 1.716398463542989e-05, "loss": 0.1129, "step": 8380 }, { "epoch": 0.42565612467637953, "grad_norm": 0.6438480615615845, "learning_rate": 1.716229250215747e-05, "loss": 0.1186, "step": 8385 }, { "epoch": 0.42590994466724197, "grad_norm": 0.48937782645225525, "learning_rate": 1.7160600368885054e-05, "loss": 0.1104, "step": 8390 }, { "epoch": 0.42616376465810446, "grad_norm": 0.46808022260665894, "learning_rate": 1.7158908235612637e-05, "loss": 0.11, "step": 8395 }, { "epoch": 0.42641758464896695, "grad_norm": 0.5450372099876404, "learning_rate": 1.715721610234022e-05, "loss": 0.1175, "step": 8400 }, { "epoch": 0.42667140463982944, "grad_norm": 0.5416411757469177, "learning_rate": 1.7155523969067804e-05, "loss": 0.1329, "step": 8405 }, { "epoch": 0.42692522463069194, "grad_norm": 0.5366417169570923, "learning_rate": 1.7153831835795388e-05, "loss": 0.1159, "step": 8410 }, { "epoch": 0.42717904462155437, "grad_norm": 0.5638911128044128, "learning_rate": 1.715213970252297e-05, "loss": 0.1047, "step": 8415 }, { "epoch": 0.42743286461241686, "grad_norm": 0.4426100552082062, "learning_rate": 1.7150447569250555e-05, "loss": 0.1016, "step": 8420 }, { "epoch": 0.42768668460327935, "grad_norm": 0.4157399833202362, "learning_rate": 1.714875543597814e-05, "loss": 0.129, "step": 8425 }, { "epoch": 0.42794050459414185, "grad_norm": 0.4836711287498474, "learning_rate": 1.7147063302705722e-05, "loss": 0.1071, "step": 8430 }, { "epoch": 0.42819432458500434, "grad_norm": 0.5332268476486206, "learning_rate": 1.7145371169433306e-05, "loss": 0.1251, "step": 8435 }, { "epoch": 0.4284481445758668, "grad_norm": 0.5566955208778381, "learning_rate": 1.714367903616089e-05, "loss": 0.1124, "step": 8440 }, { "epoch": 0.42870196456672927, "grad_norm": 0.6206413507461548, "learning_rate": 1.7141986902888473e-05, "loss": 0.1224, "step": 8445 }, { "epoch": 0.42895578455759176, "grad_norm": 0.44592681527137756, "learning_rate": 1.7140294769616056e-05, "loss": 0.1146, "step": 8450 }, { "epoch": 0.42920960454845425, "grad_norm": 0.5079742074012756, "learning_rate": 1.713860263634364e-05, "loss": 0.1242, "step": 8455 }, { "epoch": 0.42946342453931674, "grad_norm": 0.41701123118400574, "learning_rate": 1.7136910503071223e-05, "loss": 0.1157, "step": 8460 }, { "epoch": 0.4297172445301792, "grad_norm": 0.4740390479564667, "learning_rate": 1.7135218369798807e-05, "loss": 0.1171, "step": 8465 }, { "epoch": 0.42997106452104167, "grad_norm": 0.378922700881958, "learning_rate": 1.713352623652639e-05, "loss": 0.1159, "step": 8470 }, { "epoch": 0.43022488451190416, "grad_norm": 0.45636868476867676, "learning_rate": 1.7131834103253974e-05, "loss": 0.1064, "step": 8475 }, { "epoch": 0.43047870450276665, "grad_norm": 0.4845265746116638, "learning_rate": 1.7130141969981558e-05, "loss": 0.1087, "step": 8480 }, { "epoch": 0.43073252449362914, "grad_norm": 0.41355660557746887, "learning_rate": 1.712844983670914e-05, "loss": 0.1154, "step": 8485 }, { "epoch": 0.4309863444844916, "grad_norm": 0.42567557096481323, "learning_rate": 1.7126757703436725e-05, "loss": 0.1131, "step": 8490 }, { "epoch": 0.43124016447535407, "grad_norm": 0.46482059359550476, "learning_rate": 1.7125065570164308e-05, "loss": 0.1068, "step": 8495 }, { "epoch": 0.43149398446621656, "grad_norm": 0.7739242911338806, "learning_rate": 1.7123373436891892e-05, "loss": 0.1175, "step": 8500 }, { "epoch": 0.43174780445707905, "grad_norm": 0.4292283058166504, "learning_rate": 1.7121681303619475e-05, "loss": 0.1197, "step": 8505 }, { "epoch": 0.43200162444794155, "grad_norm": 0.4146791696548462, "learning_rate": 1.711998917034706e-05, "loss": 0.1333, "step": 8510 }, { "epoch": 0.432255444438804, "grad_norm": 0.7794067859649658, "learning_rate": 1.711829703707464e-05, "loss": 0.1176, "step": 8515 }, { "epoch": 0.4325092644296665, "grad_norm": 0.4238576889038086, "learning_rate": 1.7116604903802226e-05, "loss": 0.0959, "step": 8520 }, { "epoch": 0.43276308442052897, "grad_norm": 0.5824153423309326, "learning_rate": 1.711491277052981e-05, "loss": 0.1198, "step": 8525 }, { "epoch": 0.43301690441139146, "grad_norm": 0.6489765644073486, "learning_rate": 1.7113220637257393e-05, "loss": 0.1005, "step": 8530 }, { "epoch": 0.43327072440225395, "grad_norm": 0.5258995890617371, "learning_rate": 1.7111528503984977e-05, "loss": 0.1167, "step": 8535 }, { "epoch": 0.4335245443931164, "grad_norm": 0.5935726761817932, "learning_rate": 1.7109836370712557e-05, "loss": 0.1123, "step": 8540 }, { "epoch": 0.4337783643839789, "grad_norm": 0.47369036078453064, "learning_rate": 1.7108144237440144e-05, "loss": 0.1208, "step": 8545 }, { "epoch": 0.43403218437484137, "grad_norm": 0.5065063834190369, "learning_rate": 1.7106452104167727e-05, "loss": 0.1088, "step": 8550 }, { "epoch": 0.43428600436570386, "grad_norm": 0.4577081799507141, "learning_rate": 1.7104759970895307e-05, "loss": 0.1283, "step": 8555 }, { "epoch": 0.4345398243565663, "grad_norm": 0.5575316548347473, "learning_rate": 1.7103067837622894e-05, "loss": 0.1152, "step": 8560 }, { "epoch": 0.4347936443474288, "grad_norm": 0.43118050694465637, "learning_rate": 1.7101375704350475e-05, "loss": 0.0927, "step": 8565 }, { "epoch": 0.4350474643382913, "grad_norm": 0.6905524134635925, "learning_rate": 1.7099683571078058e-05, "loss": 0.1209, "step": 8570 }, { "epoch": 0.43530128432915377, "grad_norm": 0.46179112792015076, "learning_rate": 1.709799143780564e-05, "loss": 0.1098, "step": 8575 }, { "epoch": 0.43555510432001626, "grad_norm": 0.5374900698661804, "learning_rate": 1.7096299304533225e-05, "loss": 0.0966, "step": 8580 }, { "epoch": 0.4358089243108787, "grad_norm": 0.47844305634498596, "learning_rate": 1.7094607171260812e-05, "loss": 0.1098, "step": 8585 }, { "epoch": 0.4360627443017412, "grad_norm": 0.5935022234916687, "learning_rate": 1.7092915037988392e-05, "loss": 0.099, "step": 8590 }, { "epoch": 0.4363165642926037, "grad_norm": 0.39847415685653687, "learning_rate": 1.7091222904715976e-05, "loss": 0.1041, "step": 8595 }, { "epoch": 0.4365703842834662, "grad_norm": 0.6426656246185303, "learning_rate": 1.708953077144356e-05, "loss": 0.1143, "step": 8600 }, { "epoch": 0.43682420427432866, "grad_norm": 0.40940332412719727, "learning_rate": 1.7087838638171143e-05, "loss": 0.1117, "step": 8605 }, { "epoch": 0.4370780242651911, "grad_norm": 0.5274299383163452, "learning_rate": 1.7086146504898726e-05, "loss": 0.1132, "step": 8610 }, { "epoch": 0.4373318442560536, "grad_norm": 0.48877760767936707, "learning_rate": 1.708445437162631e-05, "loss": 0.114, "step": 8615 }, { "epoch": 0.4375856642469161, "grad_norm": 0.4619860053062439, "learning_rate": 1.7082762238353894e-05, "loss": 0.113, "step": 8620 }, { "epoch": 0.4378394842377786, "grad_norm": 0.47805002331733704, "learning_rate": 1.7081070105081477e-05, "loss": 0.1201, "step": 8625 }, { "epoch": 0.43809330422864107, "grad_norm": 0.8779380917549133, "learning_rate": 1.707937797180906e-05, "loss": 0.1059, "step": 8630 }, { "epoch": 0.4383471242195035, "grad_norm": 0.7380253076553345, "learning_rate": 1.7077685838536644e-05, "loss": 0.1225, "step": 8635 }, { "epoch": 0.438600944210366, "grad_norm": 0.4746687710285187, "learning_rate": 1.7075993705264228e-05, "loss": 0.1119, "step": 8640 }, { "epoch": 0.4388547642012285, "grad_norm": 1.3679776191711426, "learning_rate": 1.707430157199181e-05, "loss": 0.1128, "step": 8645 }, { "epoch": 0.439108584192091, "grad_norm": 0.5259309411048889, "learning_rate": 1.7072609438719395e-05, "loss": 0.128, "step": 8650 }, { "epoch": 0.43936240418295347, "grad_norm": 0.4603613018989563, "learning_rate": 1.707091730544698e-05, "loss": 0.0927, "step": 8655 }, { "epoch": 0.4396162241738159, "grad_norm": 0.5576603412628174, "learning_rate": 1.7069225172174562e-05, "loss": 0.1329, "step": 8660 }, { "epoch": 0.4398700441646784, "grad_norm": 0.5329744219779968, "learning_rate": 1.7067533038902145e-05, "loss": 0.1017, "step": 8665 }, { "epoch": 0.4401238641555409, "grad_norm": 0.4557638168334961, "learning_rate": 1.706584090562973e-05, "loss": 0.1108, "step": 8670 }, { "epoch": 0.4403776841464034, "grad_norm": 0.6890199780464172, "learning_rate": 1.7064148772357313e-05, "loss": 0.1186, "step": 8675 }, { "epoch": 0.4406315041372659, "grad_norm": 0.526990532875061, "learning_rate": 1.7062456639084896e-05, "loss": 0.1048, "step": 8680 }, { "epoch": 0.4408853241281283, "grad_norm": 1.4475257396697998, "learning_rate": 1.706076450581248e-05, "loss": 0.1213, "step": 8685 }, { "epoch": 0.4411391441189908, "grad_norm": 0.4177703261375427, "learning_rate": 1.7059072372540063e-05, "loss": 0.1076, "step": 8690 }, { "epoch": 0.4413929641098533, "grad_norm": 1.0303242206573486, "learning_rate": 1.7057380239267647e-05, "loss": 0.1099, "step": 8695 }, { "epoch": 0.4416467841007158, "grad_norm": 0.3975723683834076, "learning_rate": 1.705568810599523e-05, "loss": 0.1072, "step": 8700 }, { "epoch": 0.4419006040915783, "grad_norm": 1.3383222818374634, "learning_rate": 1.7053995972722814e-05, "loss": 0.1233, "step": 8705 }, { "epoch": 0.4421544240824407, "grad_norm": 1.025421142578125, "learning_rate": 1.7052303839450397e-05, "loss": 0.1101, "step": 8710 }, { "epoch": 0.4424082440733032, "grad_norm": 0.33774542808532715, "learning_rate": 1.705061170617798e-05, "loss": 0.1008, "step": 8715 }, { "epoch": 0.4426620640641657, "grad_norm": 0.5173442959785461, "learning_rate": 1.7048919572905564e-05, "loss": 0.1063, "step": 8720 }, { "epoch": 0.4429158840550282, "grad_norm": 0.46076419949531555, "learning_rate": 1.7047227439633148e-05, "loss": 0.1121, "step": 8725 }, { "epoch": 0.4431697040458907, "grad_norm": 0.5615339875221252, "learning_rate": 1.704553530636073e-05, "loss": 0.1144, "step": 8730 }, { "epoch": 0.4434235240367531, "grad_norm": 0.513152539730072, "learning_rate": 1.7043843173088315e-05, "loss": 0.1111, "step": 8735 }, { "epoch": 0.4436773440276156, "grad_norm": 0.6728489995002747, "learning_rate": 1.70421510398159e-05, "loss": 0.1082, "step": 8740 }, { "epoch": 0.4439311640184781, "grad_norm": 0.4770069122314453, "learning_rate": 1.704045890654348e-05, "loss": 0.1126, "step": 8745 }, { "epoch": 0.4441849840093406, "grad_norm": 0.3625965416431427, "learning_rate": 1.7038766773271066e-05, "loss": 0.1202, "step": 8750 }, { "epoch": 0.4444388040002031, "grad_norm": 0.579849123954773, "learning_rate": 1.703707463999865e-05, "loss": 0.116, "step": 8755 }, { "epoch": 0.4446926239910655, "grad_norm": 0.5784683227539062, "learning_rate": 1.703538250672623e-05, "loss": 0.1351, "step": 8760 }, { "epoch": 0.444946443981928, "grad_norm": 0.5647311210632324, "learning_rate": 1.7033690373453816e-05, "loss": 0.101, "step": 8765 }, { "epoch": 0.4452002639727905, "grad_norm": 0.49872612953186035, "learning_rate": 1.7031998240181396e-05, "loss": 0.1206, "step": 8770 }, { "epoch": 0.445454083963653, "grad_norm": 0.6362616419792175, "learning_rate": 1.7030306106908983e-05, "loss": 0.1242, "step": 8775 }, { "epoch": 0.4457079039545155, "grad_norm": 0.34307578206062317, "learning_rate": 1.7028613973636564e-05, "loss": 0.1058, "step": 8780 }, { "epoch": 0.4459617239453779, "grad_norm": 0.5416290163993835, "learning_rate": 1.7026921840364147e-05, "loss": 0.1151, "step": 8785 }, { "epoch": 0.4462155439362404, "grad_norm": 0.46642163395881653, "learning_rate": 1.7025229707091734e-05, "loss": 0.1197, "step": 8790 }, { "epoch": 0.4464693639271029, "grad_norm": 0.3453334867954254, "learning_rate": 1.7023537573819314e-05, "loss": 0.107, "step": 8795 }, { "epoch": 0.4467231839179654, "grad_norm": 0.47410887479782104, "learning_rate": 1.7021845440546898e-05, "loss": 0.113, "step": 8800 }, { "epoch": 0.4469770039088279, "grad_norm": 1.0129278898239136, "learning_rate": 1.702015330727448e-05, "loss": 0.1171, "step": 8805 }, { "epoch": 0.4472308238996903, "grad_norm": 0.6906533241271973, "learning_rate": 1.7018461174002065e-05, "loss": 0.1278, "step": 8810 }, { "epoch": 0.4474846438905528, "grad_norm": 0.4985661804676056, "learning_rate": 1.701676904072965e-05, "loss": 0.1209, "step": 8815 }, { "epoch": 0.4477384638814153, "grad_norm": 0.4819851219654083, "learning_rate": 1.7015076907457232e-05, "loss": 0.1233, "step": 8820 }, { "epoch": 0.4479922838722778, "grad_norm": 0.49135705828666687, "learning_rate": 1.7013384774184815e-05, "loss": 0.106, "step": 8825 }, { "epoch": 0.4482461038631403, "grad_norm": 0.5526263117790222, "learning_rate": 1.70116926409124e-05, "loss": 0.1196, "step": 8830 }, { "epoch": 0.4484999238540027, "grad_norm": 0.6574402451515198, "learning_rate": 1.7010000507639983e-05, "loss": 0.1052, "step": 8835 }, { "epoch": 0.4487537438448652, "grad_norm": 0.3850298225879669, "learning_rate": 1.7008308374367566e-05, "loss": 0.099, "step": 8840 }, { "epoch": 0.4490075638357277, "grad_norm": 0.5754469037055969, "learning_rate": 1.700661624109515e-05, "loss": 0.1095, "step": 8845 }, { "epoch": 0.4492613838265902, "grad_norm": 0.6519771814346313, "learning_rate": 1.7004924107822733e-05, "loss": 0.1198, "step": 8850 }, { "epoch": 0.44951520381745264, "grad_norm": 0.5165800452232361, "learning_rate": 1.7003231974550317e-05, "loss": 0.119, "step": 8855 }, { "epoch": 0.4497690238083151, "grad_norm": 0.39308440685272217, "learning_rate": 1.70015398412779e-05, "loss": 0.1108, "step": 8860 }, { "epoch": 0.4500228437991776, "grad_norm": 0.3973790407180786, "learning_rate": 1.6999847708005484e-05, "loss": 0.108, "step": 8865 }, { "epoch": 0.4502766637900401, "grad_norm": 0.571454644203186, "learning_rate": 1.6998155574733067e-05, "loss": 0.1107, "step": 8870 }, { "epoch": 0.4505304837809026, "grad_norm": 0.48195260763168335, "learning_rate": 1.699646344146065e-05, "loss": 0.118, "step": 8875 }, { "epoch": 0.45078430377176504, "grad_norm": 0.384822815656662, "learning_rate": 1.6994771308188234e-05, "loss": 0.1239, "step": 8880 }, { "epoch": 0.45103812376262753, "grad_norm": 0.6219851970672607, "learning_rate": 1.6993079174915818e-05, "loss": 0.1087, "step": 8885 }, { "epoch": 0.45129194375349, "grad_norm": 0.5098511576652527, "learning_rate": 1.69913870416434e-05, "loss": 0.1153, "step": 8890 }, { "epoch": 0.4515457637443525, "grad_norm": 0.5047430992126465, "learning_rate": 1.6989694908370985e-05, "loss": 0.0957, "step": 8895 }, { "epoch": 0.451799583735215, "grad_norm": 0.5024707317352295, "learning_rate": 1.698800277509857e-05, "loss": 0.1006, "step": 8900 }, { "epoch": 0.45205340372607744, "grad_norm": 0.40146404504776, "learning_rate": 1.6986310641826152e-05, "loss": 0.1082, "step": 8905 }, { "epoch": 0.45230722371693993, "grad_norm": 1.9382678270339966, "learning_rate": 1.6984618508553736e-05, "loss": 0.098, "step": 8910 }, { "epoch": 0.4525610437078024, "grad_norm": 0.7064502835273743, "learning_rate": 1.698292637528132e-05, "loss": 0.1107, "step": 8915 }, { "epoch": 0.4528148636986649, "grad_norm": 0.4406428635120392, "learning_rate": 1.6981234242008903e-05, "loss": 0.0956, "step": 8920 }, { "epoch": 0.4530686836895274, "grad_norm": 0.39747583866119385, "learning_rate": 1.6979542108736486e-05, "loss": 0.105, "step": 8925 }, { "epoch": 0.45332250368038984, "grad_norm": 0.5317738056182861, "learning_rate": 1.697784997546407e-05, "loss": 0.1111, "step": 8930 }, { "epoch": 0.45357632367125233, "grad_norm": 0.4411675035953522, "learning_rate": 1.6976157842191653e-05, "loss": 0.1174, "step": 8935 }, { "epoch": 0.4538301436621148, "grad_norm": 0.7328295111656189, "learning_rate": 1.6974465708919237e-05, "loss": 0.1025, "step": 8940 }, { "epoch": 0.4540839636529773, "grad_norm": 0.40330472588539124, "learning_rate": 1.697277357564682e-05, "loss": 0.0993, "step": 8945 }, { "epoch": 0.4543377836438398, "grad_norm": 1.0070425271987915, "learning_rate": 1.69710814423744e-05, "loss": 0.1033, "step": 8950 }, { "epoch": 0.45459160363470225, "grad_norm": 0.5507986545562744, "learning_rate": 1.6969389309101988e-05, "loss": 0.1129, "step": 8955 }, { "epoch": 0.45484542362556474, "grad_norm": 0.48115500807762146, "learning_rate": 1.6967697175829568e-05, "loss": 0.1042, "step": 8960 }, { "epoch": 0.45509924361642723, "grad_norm": 0.4493533670902252, "learning_rate": 1.696600504255715e-05, "loss": 0.1132, "step": 8965 }, { "epoch": 0.4553530636072897, "grad_norm": 0.9694324135780334, "learning_rate": 1.6964312909284738e-05, "loss": 0.1155, "step": 8970 }, { "epoch": 0.4556068835981522, "grad_norm": 0.3891000747680664, "learning_rate": 1.696262077601232e-05, "loss": 0.0998, "step": 8975 }, { "epoch": 0.45586070358901465, "grad_norm": 0.4627774953842163, "learning_rate": 1.6960928642739905e-05, "loss": 0.1009, "step": 8980 }, { "epoch": 0.45611452357987714, "grad_norm": 0.5629536509513855, "learning_rate": 1.6959236509467485e-05, "loss": 0.108, "step": 8985 }, { "epoch": 0.45636834357073963, "grad_norm": 0.4632164239883423, "learning_rate": 1.695754437619507e-05, "loss": 0.113, "step": 8990 }, { "epoch": 0.4566221635616021, "grad_norm": 0.5252170562744141, "learning_rate": 1.6955852242922656e-05, "loss": 0.1114, "step": 8995 }, { "epoch": 0.4568759835524646, "grad_norm": 0.7564627528190613, "learning_rate": 1.6954160109650236e-05, "loss": 0.1086, "step": 9000 }, { "epoch": 0.45712980354332705, "grad_norm": 0.44219401478767395, "learning_rate": 1.695246797637782e-05, "loss": 0.1107, "step": 9005 }, { "epoch": 0.45738362353418954, "grad_norm": 0.5671635866165161, "learning_rate": 1.6950775843105403e-05, "loss": 0.1236, "step": 9010 }, { "epoch": 0.45763744352505203, "grad_norm": 0.42195212841033936, "learning_rate": 1.6949083709832987e-05, "loss": 0.1077, "step": 9015 }, { "epoch": 0.4578912635159145, "grad_norm": 0.36243775486946106, "learning_rate": 1.6947391576560574e-05, "loss": 0.0929, "step": 9020 }, { "epoch": 0.458145083506777, "grad_norm": 0.4893646240234375, "learning_rate": 1.6945699443288154e-05, "loss": 0.1182, "step": 9025 }, { "epoch": 0.45839890349763945, "grad_norm": 0.43038901686668396, "learning_rate": 1.6944007310015737e-05, "loss": 0.1121, "step": 9030 }, { "epoch": 0.45865272348850195, "grad_norm": 0.4785013198852539, "learning_rate": 1.694231517674332e-05, "loss": 0.1063, "step": 9035 }, { "epoch": 0.45890654347936444, "grad_norm": 0.4438520073890686, "learning_rate": 1.6940623043470904e-05, "loss": 0.107, "step": 9040 }, { "epoch": 0.45916036347022693, "grad_norm": 0.5882023572921753, "learning_rate": 1.6938930910198488e-05, "loss": 0.1147, "step": 9045 }, { "epoch": 0.4594141834610894, "grad_norm": 0.5411685705184937, "learning_rate": 1.693723877692607e-05, "loss": 0.1099, "step": 9050 }, { "epoch": 0.45966800345195186, "grad_norm": 0.9396799206733704, "learning_rate": 1.6935546643653655e-05, "loss": 0.1188, "step": 9055 }, { "epoch": 0.45992182344281435, "grad_norm": 0.5368686318397522, "learning_rate": 1.693385451038124e-05, "loss": 0.1063, "step": 9060 }, { "epoch": 0.46017564343367684, "grad_norm": 0.5517676472663879, "learning_rate": 1.6932162377108822e-05, "loss": 0.1013, "step": 9065 }, { "epoch": 0.46042946342453933, "grad_norm": 0.45235463976860046, "learning_rate": 1.6930470243836406e-05, "loss": 0.1123, "step": 9070 }, { "epoch": 0.4606832834154018, "grad_norm": 0.5119823217391968, "learning_rate": 1.692877811056399e-05, "loss": 0.1046, "step": 9075 }, { "epoch": 0.46093710340626426, "grad_norm": 0.48481494188308716, "learning_rate": 1.6927085977291573e-05, "loss": 0.1181, "step": 9080 }, { "epoch": 0.46119092339712675, "grad_norm": 0.8900187015533447, "learning_rate": 1.6925393844019156e-05, "loss": 0.0909, "step": 9085 }, { "epoch": 0.46144474338798924, "grad_norm": 0.5890236496925354, "learning_rate": 1.692370171074674e-05, "loss": 0.1115, "step": 9090 }, { "epoch": 0.46169856337885173, "grad_norm": 0.5484874248504639, "learning_rate": 1.6922009577474323e-05, "loss": 0.1222, "step": 9095 }, { "epoch": 0.4619523833697142, "grad_norm": 0.32354220747947693, "learning_rate": 1.6920317444201907e-05, "loss": 0.0988, "step": 9100 }, { "epoch": 0.46220620336057666, "grad_norm": 0.4846709966659546, "learning_rate": 1.691862531092949e-05, "loss": 0.1184, "step": 9105 }, { "epoch": 0.46246002335143915, "grad_norm": 0.43833622336387634, "learning_rate": 1.6916933177657074e-05, "loss": 0.1041, "step": 9110 }, { "epoch": 0.46271384334230165, "grad_norm": 0.5856930613517761, "learning_rate": 1.6915241044384658e-05, "loss": 0.1109, "step": 9115 }, { "epoch": 0.46296766333316414, "grad_norm": 0.6962338089942932, "learning_rate": 1.691354891111224e-05, "loss": 0.1146, "step": 9120 }, { "epoch": 0.4632214833240266, "grad_norm": 0.3463370203971863, "learning_rate": 1.6911856777839825e-05, "loss": 0.1099, "step": 9125 }, { "epoch": 0.46347530331488906, "grad_norm": 1.7089934349060059, "learning_rate": 1.6910164644567408e-05, "loss": 0.117, "step": 9130 }, { "epoch": 0.46372912330575156, "grad_norm": 0.47701790928840637, "learning_rate": 1.6908472511294992e-05, "loss": 0.1109, "step": 9135 }, { "epoch": 0.46398294329661405, "grad_norm": 0.7937628626823425, "learning_rate": 1.6906780378022572e-05, "loss": 0.1002, "step": 9140 }, { "epoch": 0.46423676328747654, "grad_norm": 0.608931303024292, "learning_rate": 1.690508824475016e-05, "loss": 0.1032, "step": 9145 }, { "epoch": 0.464490583278339, "grad_norm": 0.5773627161979675, "learning_rate": 1.6903396111477742e-05, "loss": 0.1204, "step": 9150 }, { "epoch": 0.46474440326920147, "grad_norm": 0.48551470041275024, "learning_rate": 1.6901703978205323e-05, "loss": 0.1023, "step": 9155 }, { "epoch": 0.46499822326006396, "grad_norm": 0.428069144487381, "learning_rate": 1.690001184493291e-05, "loss": 0.1174, "step": 9160 }, { "epoch": 0.46525204325092645, "grad_norm": 0.737295925617218, "learning_rate": 1.689831971166049e-05, "loss": 0.1024, "step": 9165 }, { "epoch": 0.46550586324178894, "grad_norm": 0.9198458194732666, "learning_rate": 1.6896627578388077e-05, "loss": 0.1045, "step": 9170 }, { "epoch": 0.4657596832326514, "grad_norm": 0.46820440888404846, "learning_rate": 1.689493544511566e-05, "loss": 0.105, "step": 9175 }, { "epoch": 0.46601350322351387, "grad_norm": 0.5066524147987366, "learning_rate": 1.689324331184324e-05, "loss": 0.0928, "step": 9180 }, { "epoch": 0.46626732321437636, "grad_norm": 0.45925962924957275, "learning_rate": 1.6891551178570827e-05, "loss": 0.1143, "step": 9185 }, { "epoch": 0.46652114320523885, "grad_norm": 0.5800960063934326, "learning_rate": 1.6889859045298407e-05, "loss": 0.1292, "step": 9190 }, { "epoch": 0.46677496319610134, "grad_norm": 0.47480955719947815, "learning_rate": 1.688816691202599e-05, "loss": 0.1098, "step": 9195 }, { "epoch": 0.4670287831869638, "grad_norm": 0.5127950310707092, "learning_rate": 1.6886474778753578e-05, "loss": 0.1229, "step": 9200 }, { "epoch": 0.4672826031778263, "grad_norm": 0.6397584676742554, "learning_rate": 1.6884782645481158e-05, "loss": 0.1075, "step": 9205 }, { "epoch": 0.46753642316868876, "grad_norm": 0.419527530670166, "learning_rate": 1.688309051220874e-05, "loss": 0.1033, "step": 9210 }, { "epoch": 0.46779024315955126, "grad_norm": 0.3961291015148163, "learning_rate": 1.6881398378936325e-05, "loss": 0.0988, "step": 9215 }, { "epoch": 0.46804406315041375, "grad_norm": 0.5905468463897705, "learning_rate": 1.687970624566391e-05, "loss": 0.1044, "step": 9220 }, { "epoch": 0.4682978831412762, "grad_norm": 0.390688419342041, "learning_rate": 1.6878014112391496e-05, "loss": 0.1111, "step": 9225 }, { "epoch": 0.4685517031321387, "grad_norm": 0.36093178391456604, "learning_rate": 1.6876321979119076e-05, "loss": 0.1002, "step": 9230 }, { "epoch": 0.46880552312300117, "grad_norm": 0.5973555445671082, "learning_rate": 1.687462984584666e-05, "loss": 0.1121, "step": 9235 }, { "epoch": 0.46905934311386366, "grad_norm": 0.5031787157058716, "learning_rate": 1.6872937712574243e-05, "loss": 0.1064, "step": 9240 }, { "epoch": 0.46931316310472615, "grad_norm": 0.844380259513855, "learning_rate": 1.6871245579301826e-05, "loss": 0.1042, "step": 9245 }, { "epoch": 0.4695669830955886, "grad_norm": 0.5998194217681885, "learning_rate": 1.686955344602941e-05, "loss": 0.1112, "step": 9250 }, { "epoch": 0.4698208030864511, "grad_norm": 0.5319337248802185, "learning_rate": 1.6867861312756994e-05, "loss": 0.1065, "step": 9255 }, { "epoch": 0.47007462307731357, "grad_norm": 0.43366721272468567, "learning_rate": 1.6866169179484577e-05, "loss": 0.1047, "step": 9260 }, { "epoch": 0.47032844306817606, "grad_norm": 0.4964733421802521, "learning_rate": 1.686447704621216e-05, "loss": 0.1027, "step": 9265 }, { "epoch": 0.47058226305903855, "grad_norm": 0.44293656945228577, "learning_rate": 1.6862784912939744e-05, "loss": 0.112, "step": 9270 }, { "epoch": 0.470836083049901, "grad_norm": 0.2671841084957123, "learning_rate": 1.6861092779667328e-05, "loss": 0.1019, "step": 9275 }, { "epoch": 0.4710899030407635, "grad_norm": 0.5117993354797363, "learning_rate": 1.685940064639491e-05, "loss": 0.1069, "step": 9280 }, { "epoch": 0.47134372303162597, "grad_norm": 0.49542441964149475, "learning_rate": 1.6857708513122495e-05, "loss": 0.1059, "step": 9285 }, { "epoch": 0.47159754302248846, "grad_norm": 0.4893092215061188, "learning_rate": 1.685601637985008e-05, "loss": 0.1054, "step": 9290 }, { "epoch": 0.47185136301335096, "grad_norm": 1.8522083759307861, "learning_rate": 1.6854324246577662e-05, "loss": 0.1002, "step": 9295 }, { "epoch": 0.4721051830042134, "grad_norm": 1.9006584882736206, "learning_rate": 1.6852632113305245e-05, "loss": 0.113, "step": 9300 }, { "epoch": 0.4723590029950759, "grad_norm": 0.43772315979003906, "learning_rate": 1.685093998003283e-05, "loss": 0.1172, "step": 9305 }, { "epoch": 0.4726128229859384, "grad_norm": 0.5562046766281128, "learning_rate": 1.6849247846760412e-05, "loss": 0.1037, "step": 9310 }, { "epoch": 0.47286664297680087, "grad_norm": 0.4431091248989105, "learning_rate": 1.6847555713487996e-05, "loss": 0.102, "step": 9315 }, { "epoch": 0.47312046296766336, "grad_norm": 0.4664026200771332, "learning_rate": 1.684586358021558e-05, "loss": 0.1014, "step": 9320 }, { "epoch": 0.4733742829585258, "grad_norm": 0.4194331765174866, "learning_rate": 1.6844171446943163e-05, "loss": 0.1094, "step": 9325 }, { "epoch": 0.4736281029493883, "grad_norm": 0.4214523732662201, "learning_rate": 1.6842479313670747e-05, "loss": 0.0985, "step": 9330 }, { "epoch": 0.4738819229402508, "grad_norm": 0.4564267098903656, "learning_rate": 1.684078718039833e-05, "loss": 0.0984, "step": 9335 }, { "epoch": 0.47413574293111327, "grad_norm": 0.6111764907836914, "learning_rate": 1.6839095047125914e-05, "loss": 0.1138, "step": 9340 }, { "epoch": 0.47438956292197576, "grad_norm": 0.8785223364830017, "learning_rate": 1.6837402913853494e-05, "loss": 0.1066, "step": 9345 }, { "epoch": 0.4746433829128382, "grad_norm": 0.5163254737854004, "learning_rate": 1.683571078058108e-05, "loss": 0.1086, "step": 9350 }, { "epoch": 0.4748972029037007, "grad_norm": 0.45714399218559265, "learning_rate": 1.6834018647308664e-05, "loss": 0.1137, "step": 9355 }, { "epoch": 0.4751510228945632, "grad_norm": 0.5146034359931946, "learning_rate": 1.6832326514036248e-05, "loss": 0.0973, "step": 9360 }, { "epoch": 0.47540484288542567, "grad_norm": 0.5823367834091187, "learning_rate": 1.683063438076383e-05, "loss": 0.1099, "step": 9365 }, { "epoch": 0.47565866287628816, "grad_norm": 0.472186416387558, "learning_rate": 1.682894224749141e-05, "loss": 0.1074, "step": 9370 }, { "epoch": 0.4759124828671506, "grad_norm": 0.817765474319458, "learning_rate": 1.6827250114219e-05, "loss": 0.1037, "step": 9375 }, { "epoch": 0.4761663028580131, "grad_norm": 0.43095797300338745, "learning_rate": 1.6825557980946582e-05, "loss": 0.1081, "step": 9380 }, { "epoch": 0.4764201228488756, "grad_norm": 0.40991702675819397, "learning_rate": 1.6823865847674162e-05, "loss": 0.1172, "step": 9385 }, { "epoch": 0.4766739428397381, "grad_norm": 0.39121660590171814, "learning_rate": 1.682217371440175e-05, "loss": 0.1066, "step": 9390 }, { "epoch": 0.47692776283060057, "grad_norm": 0.5421929359436035, "learning_rate": 1.682048158112933e-05, "loss": 0.0957, "step": 9395 }, { "epoch": 0.477181582821463, "grad_norm": 0.44107937812805176, "learning_rate": 1.6818789447856913e-05, "loss": 0.1129, "step": 9400 }, { "epoch": 0.4774354028123255, "grad_norm": 0.6062149405479431, "learning_rate": 1.68170973145845e-05, "loss": 0.1152, "step": 9405 }, { "epoch": 0.477689222803188, "grad_norm": 0.4286916255950928, "learning_rate": 1.681540518131208e-05, "loss": 0.1114, "step": 9410 }, { "epoch": 0.4779430427940505, "grad_norm": 0.42873385548591614, "learning_rate": 1.6813713048039667e-05, "loss": 0.1024, "step": 9415 }, { "epoch": 0.4781968627849129, "grad_norm": 0.5670800805091858, "learning_rate": 1.6812020914767247e-05, "loss": 0.1174, "step": 9420 }, { "epoch": 0.4784506827757754, "grad_norm": 0.4170010983943939, "learning_rate": 1.681032878149483e-05, "loss": 0.0972, "step": 9425 }, { "epoch": 0.4787045027666379, "grad_norm": 0.5890213251113892, "learning_rate": 1.6808636648222418e-05, "loss": 0.1212, "step": 9430 }, { "epoch": 0.4789583227575004, "grad_norm": 0.8447275757789612, "learning_rate": 1.6806944514949998e-05, "loss": 0.107, "step": 9435 }, { "epoch": 0.4792121427483629, "grad_norm": 0.41049903631210327, "learning_rate": 1.680525238167758e-05, "loss": 0.1117, "step": 9440 }, { "epoch": 0.4794659627392253, "grad_norm": 0.5393239855766296, "learning_rate": 1.6803560248405165e-05, "loss": 0.0978, "step": 9445 }, { "epoch": 0.4797197827300878, "grad_norm": 0.5582802295684814, "learning_rate": 1.680186811513275e-05, "loss": 0.1057, "step": 9450 }, { "epoch": 0.4799736027209503, "grad_norm": 0.6471379995346069, "learning_rate": 1.6800175981860332e-05, "loss": 0.1101, "step": 9455 }, { "epoch": 0.4802274227118128, "grad_norm": 0.5534394979476929, "learning_rate": 1.6798483848587915e-05, "loss": 0.1015, "step": 9460 }, { "epoch": 0.4804812427026753, "grad_norm": 0.43192145228385925, "learning_rate": 1.67967917153155e-05, "loss": 0.102, "step": 9465 }, { "epoch": 0.4807350626935377, "grad_norm": 0.38189658522605896, "learning_rate": 1.6795099582043083e-05, "loss": 0.1017, "step": 9470 }, { "epoch": 0.4809888826844002, "grad_norm": 0.4700341820716858, "learning_rate": 1.6793407448770666e-05, "loss": 0.1005, "step": 9475 }, { "epoch": 0.4812427026752627, "grad_norm": 0.6079494953155518, "learning_rate": 1.679171531549825e-05, "loss": 0.0978, "step": 9480 }, { "epoch": 0.4814965226661252, "grad_norm": 0.8428679704666138, "learning_rate": 1.6790023182225833e-05, "loss": 0.1251, "step": 9485 }, { "epoch": 0.4817503426569877, "grad_norm": 0.48023349046707153, "learning_rate": 1.6788331048953417e-05, "loss": 0.1109, "step": 9490 }, { "epoch": 0.4820041626478501, "grad_norm": 0.40779995918273926, "learning_rate": 1.6786638915681e-05, "loss": 0.1191, "step": 9495 }, { "epoch": 0.4822579826387126, "grad_norm": 0.4184429943561554, "learning_rate": 1.6784946782408584e-05, "loss": 0.0975, "step": 9500 }, { "epoch": 0.4825118026295751, "grad_norm": 0.39694687724113464, "learning_rate": 1.6783254649136167e-05, "loss": 0.0985, "step": 9505 }, { "epoch": 0.4827656226204376, "grad_norm": 0.503432035446167, "learning_rate": 1.678156251586375e-05, "loss": 0.1057, "step": 9510 }, { "epoch": 0.4830194426113001, "grad_norm": 0.43741822242736816, "learning_rate": 1.6779870382591334e-05, "loss": 0.1002, "step": 9515 }, { "epoch": 0.4832732626021625, "grad_norm": 0.49856293201446533, "learning_rate": 1.6778178249318918e-05, "loss": 0.1194, "step": 9520 }, { "epoch": 0.483527082593025, "grad_norm": 0.46641266345977783, "learning_rate": 1.67764861160465e-05, "loss": 0.0864, "step": 9525 }, { "epoch": 0.4837809025838875, "grad_norm": 0.4092983603477478, "learning_rate": 1.6774793982774085e-05, "loss": 0.1078, "step": 9530 }, { "epoch": 0.48403472257475, "grad_norm": 0.4319418668746948, "learning_rate": 1.677310184950167e-05, "loss": 0.1056, "step": 9535 }, { "epoch": 0.4842885425656125, "grad_norm": 0.3443421423435211, "learning_rate": 1.6771409716229252e-05, "loss": 0.098, "step": 9540 }, { "epoch": 0.4845423625564749, "grad_norm": 0.5126322507858276, "learning_rate": 1.6769717582956836e-05, "loss": 0.1052, "step": 9545 }, { "epoch": 0.4847961825473374, "grad_norm": 0.4677981436252594, "learning_rate": 1.6768025449684416e-05, "loss": 0.1123, "step": 9550 }, { "epoch": 0.4850500025381999, "grad_norm": 0.5052584409713745, "learning_rate": 1.6766333316412003e-05, "loss": 0.1084, "step": 9555 }, { "epoch": 0.4853038225290624, "grad_norm": 0.5080714821815491, "learning_rate": 1.6764641183139586e-05, "loss": 0.1052, "step": 9560 }, { "epoch": 0.4855576425199249, "grad_norm": 0.4302172064781189, "learning_rate": 1.676294904986717e-05, "loss": 0.1023, "step": 9565 }, { "epoch": 0.48581146251078733, "grad_norm": 0.4493500292301178, "learning_rate": 1.6761256916594753e-05, "loss": 0.1187, "step": 9570 }, { "epoch": 0.4860652825016498, "grad_norm": 0.3389817178249359, "learning_rate": 1.6759564783322334e-05, "loss": 0.1063, "step": 9575 }, { "epoch": 0.4863191024925123, "grad_norm": 0.4281669855117798, "learning_rate": 1.675787265004992e-05, "loss": 0.1004, "step": 9580 }, { "epoch": 0.4865729224833748, "grad_norm": 0.5047438740730286, "learning_rate": 1.6756180516777504e-05, "loss": 0.1168, "step": 9585 }, { "epoch": 0.4868267424742373, "grad_norm": 0.6686415076255798, "learning_rate": 1.6754488383505084e-05, "loss": 0.1013, "step": 9590 }, { "epoch": 0.48708056246509973, "grad_norm": 0.5081450939178467, "learning_rate": 1.675279625023267e-05, "loss": 0.1007, "step": 9595 }, { "epoch": 0.4873343824559622, "grad_norm": 0.5452947616577148, "learning_rate": 1.675110411696025e-05, "loss": 0.1072, "step": 9600 }, { "epoch": 0.4875882024468247, "grad_norm": 0.3936508595943451, "learning_rate": 1.6749411983687838e-05, "loss": 0.0975, "step": 9605 }, { "epoch": 0.4878420224376872, "grad_norm": 0.4514859914779663, "learning_rate": 1.6747719850415422e-05, "loss": 0.0993, "step": 9610 }, { "epoch": 0.4880958424285497, "grad_norm": 0.7531886696815491, "learning_rate": 1.6746027717143002e-05, "loss": 0.099, "step": 9615 }, { "epoch": 0.48834966241941213, "grad_norm": 0.5042106509208679, "learning_rate": 1.674433558387059e-05, "loss": 0.1125, "step": 9620 }, { "epoch": 0.4886034824102746, "grad_norm": 0.6559919714927673, "learning_rate": 1.674264345059817e-05, "loss": 0.1166, "step": 9625 }, { "epoch": 0.4888573024011371, "grad_norm": 0.4015331566333771, "learning_rate": 1.6740951317325753e-05, "loss": 0.1121, "step": 9630 }, { "epoch": 0.4891111223919996, "grad_norm": 0.38791272044181824, "learning_rate": 1.673925918405334e-05, "loss": 0.1058, "step": 9635 }, { "epoch": 0.4893649423828621, "grad_norm": 0.6192878484725952, "learning_rate": 1.673756705078092e-05, "loss": 0.1029, "step": 9640 }, { "epoch": 0.48961876237372454, "grad_norm": 0.3586585819721222, "learning_rate": 1.6735874917508503e-05, "loss": 0.1003, "step": 9645 }, { "epoch": 0.48987258236458703, "grad_norm": 0.5473580956459045, "learning_rate": 1.6734182784236087e-05, "loss": 0.102, "step": 9650 }, { "epoch": 0.4901264023554495, "grad_norm": 0.4406992495059967, "learning_rate": 1.673249065096367e-05, "loss": 0.1019, "step": 9655 }, { "epoch": 0.490380222346312, "grad_norm": 0.6175484657287598, "learning_rate": 1.6730798517691257e-05, "loss": 0.0987, "step": 9660 }, { "epoch": 0.4906340423371745, "grad_norm": 0.5513244271278381, "learning_rate": 1.6729106384418837e-05, "loss": 0.094, "step": 9665 }, { "epoch": 0.49088786232803694, "grad_norm": 0.5684154033660889, "learning_rate": 1.672741425114642e-05, "loss": 0.0943, "step": 9670 }, { "epoch": 0.49114168231889943, "grad_norm": 0.5040678381919861, "learning_rate": 1.6725722117874004e-05, "loss": 0.1, "step": 9675 }, { "epoch": 0.4913955023097619, "grad_norm": 0.5477058291435242, "learning_rate": 1.6724029984601588e-05, "loss": 0.098, "step": 9680 }, { "epoch": 0.4916493223006244, "grad_norm": 0.5544368028640747, "learning_rate": 1.672233785132917e-05, "loss": 0.1033, "step": 9685 }, { "epoch": 0.49190314229148685, "grad_norm": 0.3991355001926422, "learning_rate": 1.6720645718056755e-05, "loss": 0.1013, "step": 9690 }, { "epoch": 0.49215696228234934, "grad_norm": 0.39113104343414307, "learning_rate": 1.671895358478434e-05, "loss": 0.0954, "step": 9695 }, { "epoch": 0.49241078227321183, "grad_norm": 0.5807805061340332, "learning_rate": 1.6717261451511922e-05, "loss": 0.0946, "step": 9700 }, { "epoch": 0.4926646022640743, "grad_norm": 0.6040493249893188, "learning_rate": 1.6715569318239506e-05, "loss": 0.1099, "step": 9705 }, { "epoch": 0.4929184222549368, "grad_norm": 0.8760645985603333, "learning_rate": 1.671387718496709e-05, "loss": 0.1111, "step": 9710 }, { "epoch": 0.49317224224579925, "grad_norm": 0.7517929077148438, "learning_rate": 1.6712185051694673e-05, "loss": 0.1055, "step": 9715 }, { "epoch": 0.49342606223666174, "grad_norm": 0.5098727345466614, "learning_rate": 1.6710492918422256e-05, "loss": 0.0984, "step": 9720 }, { "epoch": 0.49367988222752424, "grad_norm": 0.7740623354911804, "learning_rate": 1.670880078514984e-05, "loss": 0.1011, "step": 9725 }, { "epoch": 0.4939337022183867, "grad_norm": 0.621465802192688, "learning_rate": 1.6707108651877423e-05, "loss": 0.1139, "step": 9730 }, { "epoch": 0.4941875222092492, "grad_norm": 0.5187132358551025, "learning_rate": 1.6705416518605007e-05, "loss": 0.1043, "step": 9735 }, { "epoch": 0.49444134220011166, "grad_norm": 0.42478689551353455, "learning_rate": 1.670372438533259e-05, "loss": 0.1017, "step": 9740 }, { "epoch": 0.49469516219097415, "grad_norm": 0.45251190662384033, "learning_rate": 1.6702032252060174e-05, "loss": 0.114, "step": 9745 }, { "epoch": 0.49494898218183664, "grad_norm": 0.48776471614837646, "learning_rate": 1.6700340118787758e-05, "loss": 0.107, "step": 9750 }, { "epoch": 0.49520280217269913, "grad_norm": 1.0470062494277954, "learning_rate": 1.669864798551534e-05, "loss": 0.0999, "step": 9755 }, { "epoch": 0.4954566221635616, "grad_norm": 0.5062408447265625, "learning_rate": 1.6696955852242925e-05, "loss": 0.1076, "step": 9760 }, { "epoch": 0.49571044215442406, "grad_norm": 0.35752901434898376, "learning_rate": 1.6695263718970508e-05, "loss": 0.1169, "step": 9765 }, { "epoch": 0.49596426214528655, "grad_norm": 0.4572597146034241, "learning_rate": 1.6693571585698092e-05, "loss": 0.0932, "step": 9770 }, { "epoch": 0.49621808213614904, "grad_norm": 1.1978650093078613, "learning_rate": 1.6691879452425675e-05, "loss": 0.1062, "step": 9775 }, { "epoch": 0.49647190212701153, "grad_norm": 0.5021398663520813, "learning_rate": 1.6690187319153256e-05, "loss": 0.1198, "step": 9780 }, { "epoch": 0.496725722117874, "grad_norm": 0.8845065236091614, "learning_rate": 1.6688495185880842e-05, "loss": 0.1021, "step": 9785 }, { "epoch": 0.49697954210873646, "grad_norm": 0.5153151750564575, "learning_rate": 1.6686803052608426e-05, "loss": 0.1053, "step": 9790 }, { "epoch": 0.49723336209959895, "grad_norm": 0.5133734941482544, "learning_rate": 1.6685110919336006e-05, "loss": 0.0964, "step": 9795 }, { "epoch": 0.49748718209046144, "grad_norm": 0.4482670724391937, "learning_rate": 1.6683418786063593e-05, "loss": 0.1081, "step": 9800 }, { "epoch": 0.49774100208132394, "grad_norm": 0.4168063998222351, "learning_rate": 1.6681726652791173e-05, "loss": 0.1145, "step": 9805 }, { "epoch": 0.4979948220721864, "grad_norm": 0.6683678030967712, "learning_rate": 1.668003451951876e-05, "loss": 0.0899, "step": 9810 }, { "epoch": 0.49824864206304886, "grad_norm": 0.46197474002838135, "learning_rate": 1.6678342386246344e-05, "loss": 0.1042, "step": 9815 }, { "epoch": 0.49850246205391135, "grad_norm": 0.3457307517528534, "learning_rate": 1.6676650252973924e-05, "loss": 0.1019, "step": 9820 }, { "epoch": 0.49875628204477385, "grad_norm": 0.41356801986694336, "learning_rate": 1.667495811970151e-05, "loss": 0.1043, "step": 9825 }, { "epoch": 0.49901010203563634, "grad_norm": 0.43680256605148315, "learning_rate": 1.667326598642909e-05, "loss": 0.1083, "step": 9830 }, { "epoch": 0.49926392202649883, "grad_norm": 0.48716604709625244, "learning_rate": 1.6671573853156675e-05, "loss": 0.1028, "step": 9835 }, { "epoch": 0.49951774201736127, "grad_norm": 0.5266230702400208, "learning_rate": 1.666988171988426e-05, "loss": 0.1009, "step": 9840 }, { "epoch": 0.49977156200822376, "grad_norm": 0.4376629889011383, "learning_rate": 1.666818958661184e-05, "loss": 0.1078, "step": 9845 }, { "epoch": 0.5000253819990862, "grad_norm": 0.48102936148643494, "learning_rate": 1.666649745333943e-05, "loss": 0.092, "step": 9850 }, { "epoch": 0.5002792019899487, "grad_norm": 0.4479629099369049, "learning_rate": 1.666480532006701e-05, "loss": 0.0984, "step": 9855 }, { "epoch": 0.5005330219808112, "grad_norm": 0.429806649684906, "learning_rate": 1.6663113186794592e-05, "loss": 0.09, "step": 9860 }, { "epoch": 0.5007868419716737, "grad_norm": 0.4415090084075928, "learning_rate": 1.666142105352218e-05, "loss": 0.0989, "step": 9865 }, { "epoch": 0.5010406619625362, "grad_norm": 0.4832078516483307, "learning_rate": 1.665972892024976e-05, "loss": 0.1061, "step": 9870 }, { "epoch": 0.5012944819533987, "grad_norm": 0.4944320619106293, "learning_rate": 1.6658036786977343e-05, "loss": 0.1129, "step": 9875 }, { "epoch": 0.5015483019442611, "grad_norm": 0.4951033592224121, "learning_rate": 1.6656344653704926e-05, "loss": 0.1057, "step": 9880 }, { "epoch": 0.5018021219351236, "grad_norm": 0.46183741092681885, "learning_rate": 1.665465252043251e-05, "loss": 0.103, "step": 9885 }, { "epoch": 0.5020559419259861, "grad_norm": 0.4078376293182373, "learning_rate": 1.6652960387160093e-05, "loss": 0.1023, "step": 9890 }, { "epoch": 0.5023097619168486, "grad_norm": 0.4910680949687958, "learning_rate": 1.6651268253887677e-05, "loss": 0.1045, "step": 9895 }, { "epoch": 0.502563581907711, "grad_norm": 0.5417612791061401, "learning_rate": 1.664957612061526e-05, "loss": 0.098, "step": 9900 }, { "epoch": 0.5028174018985735, "grad_norm": 0.44090452790260315, "learning_rate": 1.6647883987342844e-05, "loss": 0.1098, "step": 9905 }, { "epoch": 0.503071221889436, "grad_norm": 0.707319438457489, "learning_rate": 1.6646191854070428e-05, "loss": 0.0929, "step": 9910 }, { "epoch": 0.5033250418802985, "grad_norm": 0.4322894513607025, "learning_rate": 1.664449972079801e-05, "loss": 0.0897, "step": 9915 }, { "epoch": 0.503578861871161, "grad_norm": 0.44595420360565186, "learning_rate": 1.6642807587525595e-05, "loss": 0.1098, "step": 9920 }, { "epoch": 0.5038326818620235, "grad_norm": 0.47478431463241577, "learning_rate": 1.664111545425318e-05, "loss": 0.1152, "step": 9925 }, { "epoch": 0.504086501852886, "grad_norm": 0.4155206084251404, "learning_rate": 1.6639423320980762e-05, "loss": 0.101, "step": 9930 }, { "epoch": 0.5043403218437484, "grad_norm": 0.6963584423065186, "learning_rate": 1.6637731187708345e-05, "loss": 0.0959, "step": 9935 }, { "epoch": 0.5045941418346109, "grad_norm": 0.4272857904434204, "learning_rate": 1.663603905443593e-05, "loss": 0.0983, "step": 9940 }, { "epoch": 0.5048479618254734, "grad_norm": 0.4493730962276459, "learning_rate": 1.6634346921163512e-05, "loss": 0.1028, "step": 9945 }, { "epoch": 0.5051017818163358, "grad_norm": 0.5067510008811951, "learning_rate": 1.6632654787891096e-05, "loss": 0.0942, "step": 9950 }, { "epoch": 0.5053556018071983, "grad_norm": 0.5519894361495972, "learning_rate": 1.663096265461868e-05, "loss": 0.0949, "step": 9955 }, { "epoch": 0.5056094217980608, "grad_norm": 0.4868788719177246, "learning_rate": 1.6629270521346263e-05, "loss": 0.1023, "step": 9960 }, { "epoch": 0.5058632417889233, "grad_norm": 0.40864047408103943, "learning_rate": 1.6627578388073847e-05, "loss": 0.1036, "step": 9965 }, { "epoch": 0.5061170617797858, "grad_norm": 0.33604058623313904, "learning_rate": 1.662588625480143e-05, "loss": 0.0921, "step": 9970 }, { "epoch": 0.5063708817706483, "grad_norm": 0.39679473638534546, "learning_rate": 1.6624194121529014e-05, "loss": 0.1007, "step": 9975 }, { "epoch": 0.5066247017615108, "grad_norm": 0.4948192238807678, "learning_rate": 1.6622501988256597e-05, "loss": 0.104, "step": 9980 }, { "epoch": 0.5068785217523732, "grad_norm": 0.4956167936325073, "learning_rate": 1.6620809854984177e-05, "loss": 0.1078, "step": 9985 }, { "epoch": 0.5071323417432357, "grad_norm": 1.0531851053237915, "learning_rate": 1.6619117721711764e-05, "loss": 0.102, "step": 9990 }, { "epoch": 0.5073861617340982, "grad_norm": 0.3761901557445526, "learning_rate": 1.6617425588439348e-05, "loss": 0.108, "step": 9995 }, { "epoch": 0.5076399817249606, "grad_norm": 0.42027950286865234, "learning_rate": 1.661573345516693e-05, "loss": 0.0946, "step": 10000 }, { "epoch": 0.5078938017158231, "grad_norm": 0.44332537055015564, "learning_rate": 1.6614041321894515e-05, "loss": 0.1023, "step": 10005 }, { "epoch": 0.5081476217066856, "grad_norm": 0.35272926092147827, "learning_rate": 1.6612349188622095e-05, "loss": 0.0892, "step": 10010 }, { "epoch": 0.5084014416975481, "grad_norm": 0.4161962866783142, "learning_rate": 1.6610657055349682e-05, "loss": 0.0938, "step": 10015 }, { "epoch": 0.5086552616884106, "grad_norm": 1.0567114353179932, "learning_rate": 1.6608964922077266e-05, "loss": 0.1041, "step": 10020 }, { "epoch": 0.5089090816792731, "grad_norm": 0.40777215361595154, "learning_rate": 1.6607272788804846e-05, "loss": 0.103, "step": 10025 }, { "epoch": 0.5091629016701356, "grad_norm": 0.6142024993896484, "learning_rate": 1.6605580655532433e-05, "loss": 0.1047, "step": 10030 }, { "epoch": 0.509416721660998, "grad_norm": 0.6380098462104797, "learning_rate": 1.6603888522260013e-05, "loss": 0.1104, "step": 10035 }, { "epoch": 0.5096705416518605, "grad_norm": 0.5984404683113098, "learning_rate": 1.6602196388987596e-05, "loss": 0.097, "step": 10040 }, { "epoch": 0.509924361642723, "grad_norm": 0.4230678081512451, "learning_rate": 1.6600504255715183e-05, "loss": 0.1015, "step": 10045 }, { "epoch": 0.5101781816335854, "grad_norm": 0.38381847739219666, "learning_rate": 1.6598812122442764e-05, "loss": 0.0914, "step": 10050 }, { "epoch": 0.5104320016244479, "grad_norm": 0.44329530000686646, "learning_rate": 1.659711998917035e-05, "loss": 0.1188, "step": 10055 }, { "epoch": 0.5106858216153104, "grad_norm": 0.351794570684433, "learning_rate": 1.659542785589793e-05, "loss": 0.1054, "step": 10060 }, { "epoch": 0.5109396416061729, "grad_norm": 0.596990704536438, "learning_rate": 1.6593735722625514e-05, "loss": 0.1058, "step": 10065 }, { "epoch": 0.5111934615970354, "grad_norm": 0.53257155418396, "learning_rate": 1.65920435893531e-05, "loss": 0.0999, "step": 10070 }, { "epoch": 0.5114472815878979, "grad_norm": 0.768983781337738, "learning_rate": 1.659035145608068e-05, "loss": 0.0938, "step": 10075 }, { "epoch": 0.5117011015787604, "grad_norm": 0.6798129081726074, "learning_rate": 1.6588659322808265e-05, "loss": 0.0976, "step": 10080 }, { "epoch": 0.5119549215696229, "grad_norm": 0.4623749256134033, "learning_rate": 1.658696718953585e-05, "loss": 0.1173, "step": 10085 }, { "epoch": 0.5122087415604853, "grad_norm": 0.3781088590621948, "learning_rate": 1.6585275056263432e-05, "loss": 0.0992, "step": 10090 }, { "epoch": 0.5124625615513478, "grad_norm": 0.5837762951850891, "learning_rate": 1.6583582922991015e-05, "loss": 0.0996, "step": 10095 }, { "epoch": 0.5127163815422102, "grad_norm": 0.5101218819618225, "learning_rate": 1.65818907897186e-05, "loss": 0.0951, "step": 10100 }, { "epoch": 0.5129702015330727, "grad_norm": 0.5419501066207886, "learning_rate": 1.6580198656446183e-05, "loss": 0.1077, "step": 10105 }, { "epoch": 0.5132240215239352, "grad_norm": 0.28744372725486755, "learning_rate": 1.6578506523173766e-05, "loss": 0.0928, "step": 10110 }, { "epoch": 0.5134778415147977, "grad_norm": 0.43558162450790405, "learning_rate": 1.657681438990135e-05, "loss": 0.0963, "step": 10115 }, { "epoch": 0.5137316615056602, "grad_norm": 0.35775113105773926, "learning_rate": 1.6575122256628933e-05, "loss": 0.0956, "step": 10120 }, { "epoch": 0.5139854814965227, "grad_norm": 0.7168741822242737, "learning_rate": 1.6573430123356517e-05, "loss": 0.0902, "step": 10125 }, { "epoch": 0.5142393014873852, "grad_norm": 0.3916659355163574, "learning_rate": 1.65717379900841e-05, "loss": 0.105, "step": 10130 }, { "epoch": 0.5144931214782477, "grad_norm": 0.418158620595932, "learning_rate": 1.6570045856811684e-05, "loss": 0.0916, "step": 10135 }, { "epoch": 0.5147469414691102, "grad_norm": 0.678088903427124, "learning_rate": 1.6568353723539267e-05, "loss": 0.1082, "step": 10140 }, { "epoch": 0.5150007614599725, "grad_norm": 0.42723122239112854, "learning_rate": 1.656666159026685e-05, "loss": 0.1036, "step": 10145 }, { "epoch": 0.515254581450835, "grad_norm": 0.6436286568641663, "learning_rate": 1.6564969456994434e-05, "loss": 0.109, "step": 10150 }, { "epoch": 0.5155084014416975, "grad_norm": 0.7051729559898376, "learning_rate": 1.6563277323722018e-05, "loss": 0.1164, "step": 10155 }, { "epoch": 0.51576222143256, "grad_norm": 0.5349159836769104, "learning_rate": 1.65615851904496e-05, "loss": 0.1171, "step": 10160 }, { "epoch": 0.5160160414234225, "grad_norm": 0.6342788338661194, "learning_rate": 1.6559893057177185e-05, "loss": 0.114, "step": 10165 }, { "epoch": 0.516269861414285, "grad_norm": 0.4344607889652252, "learning_rate": 1.655820092390477e-05, "loss": 0.0888, "step": 10170 }, { "epoch": 0.5165236814051475, "grad_norm": 0.6470787525177002, "learning_rate": 1.6556508790632352e-05, "loss": 0.0952, "step": 10175 }, { "epoch": 0.51677750139601, "grad_norm": 0.6785343289375305, "learning_rate": 1.6554816657359936e-05, "loss": 0.111, "step": 10180 }, { "epoch": 0.5170313213868725, "grad_norm": 0.41532400250434875, "learning_rate": 1.655312452408752e-05, "loss": 0.0911, "step": 10185 }, { "epoch": 0.517285141377735, "grad_norm": 0.4153634309768677, "learning_rate": 1.65514323908151e-05, "loss": 0.0986, "step": 10190 }, { "epoch": 0.5175389613685973, "grad_norm": 0.49320197105407715, "learning_rate": 1.6549740257542686e-05, "loss": 0.1052, "step": 10195 }, { "epoch": 0.5177927813594598, "grad_norm": 0.44718942046165466, "learning_rate": 1.654804812427027e-05, "loss": 0.1064, "step": 10200 }, { "epoch": 0.5180466013503223, "grad_norm": 0.49730628728866577, "learning_rate": 1.6546355990997853e-05, "loss": 0.0981, "step": 10205 }, { "epoch": 0.5183004213411848, "grad_norm": 0.7655092477798462, "learning_rate": 1.6544663857725437e-05, "loss": 0.1057, "step": 10210 }, { "epoch": 0.5185542413320473, "grad_norm": 0.3895924389362335, "learning_rate": 1.6542971724453017e-05, "loss": 0.0978, "step": 10215 }, { "epoch": 0.5188080613229098, "grad_norm": 0.5672686696052551, "learning_rate": 1.6541279591180604e-05, "loss": 0.1043, "step": 10220 }, { "epoch": 0.5190618813137723, "grad_norm": 0.5215070843696594, "learning_rate": 1.6539587457908188e-05, "loss": 0.1016, "step": 10225 }, { "epoch": 0.5193157013046348, "grad_norm": 0.5295329689979553, "learning_rate": 1.6537895324635768e-05, "loss": 0.0954, "step": 10230 }, { "epoch": 0.5195695212954973, "grad_norm": 0.4239153563976288, "learning_rate": 1.6536203191363355e-05, "loss": 0.1025, "step": 10235 }, { "epoch": 0.5198233412863598, "grad_norm": 0.597144365310669, "learning_rate": 1.6534511058090935e-05, "loss": 0.0936, "step": 10240 }, { "epoch": 0.5200771612772221, "grad_norm": 0.4462467133998871, "learning_rate": 1.6532818924818522e-05, "loss": 0.1048, "step": 10245 }, { "epoch": 0.5203309812680846, "grad_norm": 0.7833221554756165, "learning_rate": 1.6531126791546105e-05, "loss": 0.1123, "step": 10250 }, { "epoch": 0.5205848012589471, "grad_norm": 0.7217886447906494, "learning_rate": 1.6529434658273685e-05, "loss": 0.1007, "step": 10255 }, { "epoch": 0.5208386212498096, "grad_norm": 0.6208773255348206, "learning_rate": 1.6527742525001272e-05, "loss": 0.088, "step": 10260 }, { "epoch": 0.5210924412406721, "grad_norm": 0.5365100502967834, "learning_rate": 1.6526050391728853e-05, "loss": 0.1052, "step": 10265 }, { "epoch": 0.5213462612315346, "grad_norm": 0.4037196636199951, "learning_rate": 1.6524358258456436e-05, "loss": 0.0958, "step": 10270 }, { "epoch": 0.5216000812223971, "grad_norm": 0.367753267288208, "learning_rate": 1.6522666125184023e-05, "loss": 0.0873, "step": 10275 }, { "epoch": 0.5218539012132596, "grad_norm": 0.5729825496673584, "learning_rate": 1.6520973991911603e-05, "loss": 0.1014, "step": 10280 }, { "epoch": 0.5221077212041221, "grad_norm": 0.3999881446361542, "learning_rate": 1.6519281858639187e-05, "loss": 0.0905, "step": 10285 }, { "epoch": 0.5223615411949846, "grad_norm": 0.5186746716499329, "learning_rate": 1.651758972536677e-05, "loss": 0.0934, "step": 10290 }, { "epoch": 0.522615361185847, "grad_norm": 0.4636486768722534, "learning_rate": 1.6515897592094354e-05, "loss": 0.0943, "step": 10295 }, { "epoch": 0.5228691811767094, "grad_norm": 0.5422764420509338, "learning_rate": 1.651420545882194e-05, "loss": 0.0939, "step": 10300 }, { "epoch": 0.5231230011675719, "grad_norm": 0.4960278570652008, "learning_rate": 1.651251332554952e-05, "loss": 0.089, "step": 10305 }, { "epoch": 0.5233768211584344, "grad_norm": 0.47158360481262207, "learning_rate": 1.6510821192277104e-05, "loss": 0.1025, "step": 10310 }, { "epoch": 0.5236306411492969, "grad_norm": 0.5370834469795227, "learning_rate": 1.6509129059004688e-05, "loss": 0.0955, "step": 10315 }, { "epoch": 0.5238844611401594, "grad_norm": 1.4522969722747803, "learning_rate": 1.650743692573227e-05, "loss": 0.1046, "step": 10320 }, { "epoch": 0.5241382811310219, "grad_norm": 0.5088288187980652, "learning_rate": 1.6505744792459855e-05, "loss": 0.1002, "step": 10325 }, { "epoch": 0.5243921011218844, "grad_norm": 0.6046572327613831, "learning_rate": 1.650405265918744e-05, "loss": 0.0896, "step": 10330 }, { "epoch": 0.5246459211127469, "grad_norm": 0.4464103877544403, "learning_rate": 1.6502360525915022e-05, "loss": 0.1115, "step": 10335 }, { "epoch": 0.5248997411036094, "grad_norm": 0.42552125453948975, "learning_rate": 1.6500668392642606e-05, "loss": 0.0953, "step": 10340 }, { "epoch": 0.5251535610944718, "grad_norm": 0.3787493407726288, "learning_rate": 1.649897625937019e-05, "loss": 0.0923, "step": 10345 }, { "epoch": 0.5254073810853342, "grad_norm": 0.7609786987304688, "learning_rate": 1.6497284126097773e-05, "loss": 0.0985, "step": 10350 }, { "epoch": 0.5256612010761967, "grad_norm": 0.41064974665641785, "learning_rate": 1.6495591992825356e-05, "loss": 0.1105, "step": 10355 }, { "epoch": 0.5259150210670592, "grad_norm": 0.4209999740123749, "learning_rate": 1.649389985955294e-05, "loss": 0.0995, "step": 10360 }, { "epoch": 0.5261688410579217, "grad_norm": 0.405991792678833, "learning_rate": 1.6492207726280523e-05, "loss": 0.0927, "step": 10365 }, { "epoch": 0.5264226610487842, "grad_norm": 0.47690391540527344, "learning_rate": 1.6490515593008107e-05, "loss": 0.1119, "step": 10370 }, { "epoch": 0.5266764810396467, "grad_norm": 0.3825083076953888, "learning_rate": 1.648882345973569e-05, "loss": 0.1069, "step": 10375 }, { "epoch": 0.5269303010305092, "grad_norm": 0.6007245779037476, "learning_rate": 1.6487131326463274e-05, "loss": 0.0949, "step": 10380 }, { "epoch": 0.5271841210213717, "grad_norm": 0.41400671005249023, "learning_rate": 1.6485439193190858e-05, "loss": 0.0959, "step": 10385 }, { "epoch": 0.5274379410122341, "grad_norm": 0.8201514482498169, "learning_rate": 1.648374705991844e-05, "loss": 0.0948, "step": 10390 }, { "epoch": 0.5276917610030966, "grad_norm": 0.4177849292755127, "learning_rate": 1.6482054926646025e-05, "loss": 0.1071, "step": 10395 }, { "epoch": 0.527945580993959, "grad_norm": 0.45180994272232056, "learning_rate": 1.6480362793373608e-05, "loss": 0.1038, "step": 10400 }, { "epoch": 0.5281994009848215, "grad_norm": 0.5408563613891602, "learning_rate": 1.6478670660101192e-05, "loss": 0.1203, "step": 10405 }, { "epoch": 0.528453220975684, "grad_norm": 0.4275490939617157, "learning_rate": 1.6476978526828775e-05, "loss": 0.1016, "step": 10410 }, { "epoch": 0.5287070409665465, "grad_norm": 0.4530520439147949, "learning_rate": 1.647528639355636e-05, "loss": 0.1036, "step": 10415 }, { "epoch": 0.528960860957409, "grad_norm": 0.49070650339126587, "learning_rate": 1.647359426028394e-05, "loss": 0.0977, "step": 10420 }, { "epoch": 0.5292146809482715, "grad_norm": 0.4170216917991638, "learning_rate": 1.6471902127011526e-05, "loss": 0.1227, "step": 10425 }, { "epoch": 0.529468500939134, "grad_norm": 0.3593534827232361, "learning_rate": 1.647020999373911e-05, "loss": 0.1125, "step": 10430 }, { "epoch": 0.5297223209299965, "grad_norm": 0.49653005599975586, "learning_rate": 1.646851786046669e-05, "loss": 0.0916, "step": 10435 }, { "epoch": 0.5299761409208589, "grad_norm": 0.536085844039917, "learning_rate": 1.6466825727194277e-05, "loss": 0.0895, "step": 10440 }, { "epoch": 0.5302299609117214, "grad_norm": 0.4254559278488159, "learning_rate": 1.6465133593921857e-05, "loss": 0.0948, "step": 10445 }, { "epoch": 0.5304837809025839, "grad_norm": 0.3751010000705719, "learning_rate": 1.6463441460649444e-05, "loss": 0.0907, "step": 10450 }, { "epoch": 0.5307376008934463, "grad_norm": 0.4354502558708191, "learning_rate": 1.6461749327377027e-05, "loss": 0.1001, "step": 10455 }, { "epoch": 0.5309914208843088, "grad_norm": 0.3564659357070923, "learning_rate": 1.6460057194104607e-05, "loss": 0.0916, "step": 10460 }, { "epoch": 0.5312452408751713, "grad_norm": 0.47137919068336487, "learning_rate": 1.6458365060832194e-05, "loss": 0.1, "step": 10465 }, { "epoch": 0.5314990608660338, "grad_norm": 0.32844093441963196, "learning_rate": 1.6456672927559775e-05, "loss": 0.0874, "step": 10470 }, { "epoch": 0.5317528808568963, "grad_norm": 0.521373450756073, "learning_rate": 1.6454980794287358e-05, "loss": 0.1061, "step": 10475 }, { "epoch": 0.5320067008477588, "grad_norm": 0.36201703548431396, "learning_rate": 1.6453288661014945e-05, "loss": 0.1006, "step": 10480 }, { "epoch": 0.5322605208386213, "grad_norm": 0.5359712839126587, "learning_rate": 1.6451596527742525e-05, "loss": 0.092, "step": 10485 }, { "epoch": 0.5325143408294837, "grad_norm": 0.38301995396614075, "learning_rate": 1.6449904394470112e-05, "loss": 0.0994, "step": 10490 }, { "epoch": 0.5327681608203462, "grad_norm": 0.3559681475162506, "learning_rate": 1.6448212261197692e-05, "loss": 0.0872, "step": 10495 }, { "epoch": 0.5330219808112087, "grad_norm": 0.4657198488712311, "learning_rate": 1.6446520127925276e-05, "loss": 0.0962, "step": 10500 }, { "epoch": 0.5332758008020712, "grad_norm": 0.8297700881958008, "learning_rate": 1.6444827994652863e-05, "loss": 0.1149, "step": 10505 }, { "epoch": 0.5335296207929336, "grad_norm": 0.4492545425891876, "learning_rate": 1.6443135861380443e-05, "loss": 0.0958, "step": 10510 }, { "epoch": 0.5337834407837961, "grad_norm": 0.41303345561027527, "learning_rate": 1.6441443728108026e-05, "loss": 0.0971, "step": 10515 }, { "epoch": 0.5340372607746586, "grad_norm": 0.4111636281013489, "learning_rate": 1.643975159483561e-05, "loss": 0.0831, "step": 10520 }, { "epoch": 0.5342910807655211, "grad_norm": 0.5072298049926758, "learning_rate": 1.6438059461563193e-05, "loss": 0.0935, "step": 10525 }, { "epoch": 0.5345449007563836, "grad_norm": 0.43646836280822754, "learning_rate": 1.6436367328290777e-05, "loss": 0.1003, "step": 10530 }, { "epoch": 0.5347987207472461, "grad_norm": 0.5552557110786438, "learning_rate": 1.643467519501836e-05, "loss": 0.0961, "step": 10535 }, { "epoch": 0.5350525407381085, "grad_norm": 0.398117333650589, "learning_rate": 1.6432983061745944e-05, "loss": 0.1038, "step": 10540 }, { "epoch": 0.535306360728971, "grad_norm": 0.301830530166626, "learning_rate": 1.6431290928473528e-05, "loss": 0.0828, "step": 10545 }, { "epoch": 0.5355601807198335, "grad_norm": 0.43275371193885803, "learning_rate": 1.642959879520111e-05, "loss": 0.1077, "step": 10550 }, { "epoch": 0.535814000710696, "grad_norm": 0.3594335913658142, "learning_rate": 1.6427906661928695e-05, "loss": 0.0974, "step": 10555 }, { "epoch": 0.5360678207015585, "grad_norm": 0.3689029812812805, "learning_rate": 1.6426214528656278e-05, "loss": 0.0762, "step": 10560 }, { "epoch": 0.5363216406924209, "grad_norm": 0.7268462777137756, "learning_rate": 1.6424522395383862e-05, "loss": 0.0943, "step": 10565 }, { "epoch": 0.5365754606832834, "grad_norm": 0.5907272696495056, "learning_rate": 1.6422830262111445e-05, "loss": 0.1044, "step": 10570 }, { "epoch": 0.5368292806741459, "grad_norm": 0.640363335609436, "learning_rate": 1.642113812883903e-05, "loss": 0.0887, "step": 10575 }, { "epoch": 0.5370831006650084, "grad_norm": 0.9156871438026428, "learning_rate": 1.6419445995566612e-05, "loss": 0.1107, "step": 10580 }, { "epoch": 0.5373369206558709, "grad_norm": 0.449605792760849, "learning_rate": 1.6417753862294196e-05, "loss": 0.0975, "step": 10585 }, { "epoch": 0.5375907406467333, "grad_norm": 0.4629546105861664, "learning_rate": 1.641606172902178e-05, "loss": 0.0946, "step": 10590 }, { "epoch": 0.5378445606375958, "grad_norm": 0.7200871109962463, "learning_rate": 1.6414369595749363e-05, "loss": 0.1041, "step": 10595 }, { "epoch": 0.5380983806284583, "grad_norm": 0.4900858402252197, "learning_rate": 1.6412677462476947e-05, "loss": 0.1005, "step": 10600 }, { "epoch": 0.5383522006193208, "grad_norm": 0.3713901937007904, "learning_rate": 1.641098532920453e-05, "loss": 0.0933, "step": 10605 }, { "epoch": 0.5386060206101833, "grad_norm": 0.4475940763950348, "learning_rate": 1.6409293195932114e-05, "loss": 0.0977, "step": 10610 }, { "epoch": 0.5388598406010457, "grad_norm": 0.5317751169204712, "learning_rate": 1.6407601062659697e-05, "loss": 0.109, "step": 10615 }, { "epoch": 0.5391136605919082, "grad_norm": 0.4223870635032654, "learning_rate": 1.640590892938728e-05, "loss": 0.0951, "step": 10620 }, { "epoch": 0.5393674805827707, "grad_norm": 0.4437798261642456, "learning_rate": 1.640421679611486e-05, "loss": 0.0971, "step": 10625 }, { "epoch": 0.5396213005736332, "grad_norm": 0.5337457060813904, "learning_rate": 1.6402524662842448e-05, "loss": 0.0893, "step": 10630 }, { "epoch": 0.5398751205644957, "grad_norm": 0.5861578583717346, "learning_rate": 1.640083252957003e-05, "loss": 0.1028, "step": 10635 }, { "epoch": 0.5401289405553581, "grad_norm": 0.46542319655418396, "learning_rate": 1.6399140396297615e-05, "loss": 0.0978, "step": 10640 }, { "epoch": 0.5403827605462206, "grad_norm": 0.5477737188339233, "learning_rate": 1.63974482630252e-05, "loss": 0.0928, "step": 10645 }, { "epoch": 0.5406365805370831, "grad_norm": 0.38435491919517517, "learning_rate": 1.639575612975278e-05, "loss": 0.1031, "step": 10650 }, { "epoch": 0.5408904005279456, "grad_norm": 0.632420003414154, "learning_rate": 1.6394063996480366e-05, "loss": 0.1089, "step": 10655 }, { "epoch": 0.5411442205188081, "grad_norm": 0.2502029240131378, "learning_rate": 1.639237186320795e-05, "loss": 0.1011, "step": 10660 }, { "epoch": 0.5413980405096706, "grad_norm": 0.7792786955833435, "learning_rate": 1.639067972993553e-05, "loss": 0.1021, "step": 10665 }, { "epoch": 0.541651860500533, "grad_norm": 0.36001554131507874, "learning_rate": 1.6388987596663116e-05, "loss": 0.1007, "step": 10670 }, { "epoch": 0.5419056804913955, "grad_norm": 0.6201646327972412, "learning_rate": 1.6387295463390696e-05, "loss": 0.1045, "step": 10675 }, { "epoch": 0.542159500482258, "grad_norm": 0.6190532445907593, "learning_rate": 1.638560333011828e-05, "loss": 0.0953, "step": 10680 }, { "epoch": 0.5424133204731204, "grad_norm": 0.7013592720031738, "learning_rate": 1.6383911196845867e-05, "loss": 0.0991, "step": 10685 }, { "epoch": 0.5426671404639829, "grad_norm": 0.3520493507385254, "learning_rate": 1.6382219063573447e-05, "loss": 0.0828, "step": 10690 }, { "epoch": 0.5429209604548454, "grad_norm": 0.788502037525177, "learning_rate": 1.6380526930301034e-05, "loss": 0.093, "step": 10695 }, { "epoch": 0.5431747804457079, "grad_norm": 0.38444724678993225, "learning_rate": 1.6378834797028614e-05, "loss": 0.0963, "step": 10700 }, { "epoch": 0.5434286004365704, "grad_norm": 0.6140927076339722, "learning_rate": 1.6377142663756198e-05, "loss": 0.103, "step": 10705 }, { "epoch": 0.5436824204274329, "grad_norm": 0.5046095848083496, "learning_rate": 1.6375450530483785e-05, "loss": 0.1056, "step": 10710 }, { "epoch": 0.5439362404182954, "grad_norm": 0.5487456917762756, "learning_rate": 1.6373758397211365e-05, "loss": 0.1221, "step": 10715 }, { "epoch": 0.5441900604091578, "grad_norm": 0.49649783968925476, "learning_rate": 1.637206626393895e-05, "loss": 0.1, "step": 10720 }, { "epoch": 0.5444438804000203, "grad_norm": 0.3561123013496399, "learning_rate": 1.6370374130666532e-05, "loss": 0.0825, "step": 10725 }, { "epoch": 0.5446977003908828, "grad_norm": 0.4641798138618469, "learning_rate": 1.6368681997394115e-05, "loss": 0.0995, "step": 10730 }, { "epoch": 0.5449515203817452, "grad_norm": 0.43982285261154175, "learning_rate": 1.63669898641217e-05, "loss": 0.1151, "step": 10735 }, { "epoch": 0.5452053403726077, "grad_norm": 0.3846445679664612, "learning_rate": 1.6365297730849283e-05, "loss": 0.1044, "step": 10740 }, { "epoch": 0.5454591603634702, "grad_norm": 0.46901729702949524, "learning_rate": 1.6363605597576866e-05, "loss": 0.1155, "step": 10745 }, { "epoch": 0.5457129803543327, "grad_norm": 0.3808036744594574, "learning_rate": 1.636191346430445e-05, "loss": 0.1055, "step": 10750 }, { "epoch": 0.5459668003451952, "grad_norm": 1.3655842542648315, "learning_rate": 1.6360221331032033e-05, "loss": 0.1032, "step": 10755 }, { "epoch": 0.5462206203360577, "grad_norm": 0.4646155834197998, "learning_rate": 1.6358529197759617e-05, "loss": 0.0991, "step": 10760 }, { "epoch": 0.5464744403269202, "grad_norm": 0.6558862924575806, "learning_rate": 1.63568370644872e-05, "loss": 0.1017, "step": 10765 }, { "epoch": 0.5467282603177827, "grad_norm": 0.4608815610408783, "learning_rate": 1.6355144931214784e-05, "loss": 0.1032, "step": 10770 }, { "epoch": 0.5469820803086451, "grad_norm": 0.3188372254371643, "learning_rate": 1.6353452797942367e-05, "loss": 0.0907, "step": 10775 }, { "epoch": 0.5472359002995076, "grad_norm": 0.31106188893318176, "learning_rate": 1.635176066466995e-05, "loss": 0.1002, "step": 10780 }, { "epoch": 0.54748972029037, "grad_norm": 0.37392106652259827, "learning_rate": 1.6350068531397534e-05, "loss": 0.0935, "step": 10785 }, { "epoch": 0.5477435402812325, "grad_norm": 0.3712967038154602, "learning_rate": 1.6348376398125118e-05, "loss": 0.1068, "step": 10790 }, { "epoch": 0.547997360272095, "grad_norm": 1.323493242263794, "learning_rate": 1.63466842648527e-05, "loss": 0.1024, "step": 10795 }, { "epoch": 0.5482511802629575, "grad_norm": 0.3808216452598572, "learning_rate": 1.6344992131580285e-05, "loss": 0.0899, "step": 10800 }, { "epoch": 0.54850500025382, "grad_norm": 0.7817515134811401, "learning_rate": 1.634329999830787e-05, "loss": 0.1175, "step": 10805 }, { "epoch": 0.5487588202446825, "grad_norm": 0.9595544934272766, "learning_rate": 1.6341607865035452e-05, "loss": 0.0989, "step": 10810 }, { "epoch": 0.549012640235545, "grad_norm": 0.5686103701591492, "learning_rate": 1.6339915731763036e-05, "loss": 0.1023, "step": 10815 }, { "epoch": 0.5492664602264075, "grad_norm": 0.457376092672348, "learning_rate": 1.633822359849062e-05, "loss": 0.1002, "step": 10820 }, { "epoch": 0.54952028021727, "grad_norm": 0.954951286315918, "learning_rate": 1.6336531465218203e-05, "loss": 0.1003, "step": 10825 }, { "epoch": 0.5497741002081324, "grad_norm": 0.40505141019821167, "learning_rate": 1.6334839331945783e-05, "loss": 0.0966, "step": 10830 }, { "epoch": 0.5500279201989948, "grad_norm": 0.41799911856651306, "learning_rate": 1.633314719867337e-05, "loss": 0.1108, "step": 10835 }, { "epoch": 0.5502817401898573, "grad_norm": 0.5992372632026672, "learning_rate": 1.6331455065400953e-05, "loss": 0.1106, "step": 10840 }, { "epoch": 0.5505355601807198, "grad_norm": 0.8904902935028076, "learning_rate": 1.6329762932128537e-05, "loss": 0.1022, "step": 10845 }, { "epoch": 0.5507893801715823, "grad_norm": 0.5283609628677368, "learning_rate": 1.632807079885612e-05, "loss": 0.0975, "step": 10850 }, { "epoch": 0.5510432001624448, "grad_norm": 0.49641087651252747, "learning_rate": 1.63263786655837e-05, "loss": 0.099, "step": 10855 }, { "epoch": 0.5512970201533073, "grad_norm": 0.5636726021766663, "learning_rate": 1.6324686532311288e-05, "loss": 0.0957, "step": 10860 }, { "epoch": 0.5515508401441698, "grad_norm": 0.6105141043663025, "learning_rate": 1.632299439903887e-05, "loss": 0.1119, "step": 10865 }, { "epoch": 0.5518046601350323, "grad_norm": 0.43738439679145813, "learning_rate": 1.632130226576645e-05, "loss": 0.0902, "step": 10870 }, { "epoch": 0.5520584801258948, "grad_norm": 0.6451889872550964, "learning_rate": 1.6319610132494038e-05, "loss": 0.0906, "step": 10875 }, { "epoch": 0.5523123001167572, "grad_norm": 0.4356880187988281, "learning_rate": 1.631791799922162e-05, "loss": 0.1015, "step": 10880 }, { "epoch": 0.5525661201076196, "grad_norm": 0.487354040145874, "learning_rate": 1.6316225865949205e-05, "loss": 0.1072, "step": 10885 }, { "epoch": 0.5528199400984821, "grad_norm": 0.41686418652534485, "learning_rate": 1.631453373267679e-05, "loss": 0.0994, "step": 10890 }, { "epoch": 0.5530737600893446, "grad_norm": 0.5132306218147278, "learning_rate": 1.631284159940437e-05, "loss": 0.0994, "step": 10895 }, { "epoch": 0.5533275800802071, "grad_norm": 0.4513254463672638, "learning_rate": 1.6311149466131956e-05, "loss": 0.0963, "step": 10900 }, { "epoch": 0.5535814000710696, "grad_norm": 0.38187047839164734, "learning_rate": 1.6309457332859536e-05, "loss": 0.0969, "step": 10905 }, { "epoch": 0.5538352200619321, "grad_norm": 0.5190130472183228, "learning_rate": 1.630776519958712e-05, "loss": 0.1029, "step": 10910 }, { "epoch": 0.5540890400527946, "grad_norm": 0.3652910888195038, "learning_rate": 1.6306073066314703e-05, "loss": 0.0953, "step": 10915 }, { "epoch": 0.5543428600436571, "grad_norm": 0.4218839406967163, "learning_rate": 1.6304380933042287e-05, "loss": 0.0997, "step": 10920 }, { "epoch": 0.5545966800345196, "grad_norm": 0.4914858043193817, "learning_rate": 1.630268879976987e-05, "loss": 0.1099, "step": 10925 }, { "epoch": 0.554850500025382, "grad_norm": 0.3577395975589752, "learning_rate": 1.6300996666497454e-05, "loss": 0.1083, "step": 10930 }, { "epoch": 0.5551043200162444, "grad_norm": 0.32556432485580444, "learning_rate": 1.6299304533225037e-05, "loss": 0.0865, "step": 10935 }, { "epoch": 0.5553581400071069, "grad_norm": 0.5093722939491272, "learning_rate": 1.629761239995262e-05, "loss": 0.0913, "step": 10940 }, { "epoch": 0.5556119599979694, "grad_norm": 0.5503299832344055, "learning_rate": 1.6295920266680204e-05, "loss": 0.1075, "step": 10945 }, { "epoch": 0.5558657799888319, "grad_norm": 0.6354576945304871, "learning_rate": 1.6294228133407788e-05, "loss": 0.103, "step": 10950 }, { "epoch": 0.5561195999796944, "grad_norm": 0.43695926666259766, "learning_rate": 1.629253600013537e-05, "loss": 0.0993, "step": 10955 }, { "epoch": 0.5563734199705569, "grad_norm": 0.36351871490478516, "learning_rate": 1.6290843866862955e-05, "loss": 0.0972, "step": 10960 }, { "epoch": 0.5566272399614194, "grad_norm": 0.6152383685112, "learning_rate": 1.628915173359054e-05, "loss": 0.1052, "step": 10965 }, { "epoch": 0.5568810599522819, "grad_norm": 0.7461835145950317, "learning_rate": 1.6287459600318122e-05, "loss": 0.0893, "step": 10970 }, { "epoch": 0.5571348799431444, "grad_norm": 0.45629680156707764, "learning_rate": 1.6285767467045706e-05, "loss": 0.0984, "step": 10975 }, { "epoch": 0.5573886999340067, "grad_norm": 0.4399794340133667, "learning_rate": 1.628407533377329e-05, "loss": 0.1036, "step": 10980 }, { "epoch": 0.5576425199248692, "grad_norm": 0.4887857437133789, "learning_rate": 1.6282383200500873e-05, "loss": 0.1022, "step": 10985 }, { "epoch": 0.5578963399157317, "grad_norm": 0.5037972927093506, "learning_rate": 1.6280691067228456e-05, "loss": 0.0961, "step": 10990 }, { "epoch": 0.5581501599065942, "grad_norm": 0.45002323389053345, "learning_rate": 1.627899893395604e-05, "loss": 0.0973, "step": 10995 }, { "epoch": 0.5584039798974567, "grad_norm": 0.6315869688987732, "learning_rate": 1.6277306800683623e-05, "loss": 0.0963, "step": 11000 }, { "epoch": 0.5586577998883192, "grad_norm": 0.3851071000099182, "learning_rate": 1.6275614667411207e-05, "loss": 0.0852, "step": 11005 }, { "epoch": 0.5589116198791817, "grad_norm": 0.6421680450439453, "learning_rate": 1.627392253413879e-05, "loss": 0.1016, "step": 11010 }, { "epoch": 0.5591654398700442, "grad_norm": 0.7815170884132385, "learning_rate": 1.6272230400866374e-05, "loss": 0.0833, "step": 11015 }, { "epoch": 0.5594192598609067, "grad_norm": 0.6230599880218506, "learning_rate": 1.6270538267593958e-05, "loss": 0.0952, "step": 11020 }, { "epoch": 0.5596730798517692, "grad_norm": 0.6255953311920166, "learning_rate": 1.626884613432154e-05, "loss": 0.0935, "step": 11025 }, { "epoch": 0.5599268998426316, "grad_norm": 0.3688150644302368, "learning_rate": 1.6267154001049125e-05, "loss": 0.1016, "step": 11030 }, { "epoch": 0.560180719833494, "grad_norm": 0.41703271865844727, "learning_rate": 1.6265461867776708e-05, "loss": 0.0937, "step": 11035 }, { "epoch": 0.5604345398243565, "grad_norm": 0.6567605137825012, "learning_rate": 1.6263769734504292e-05, "loss": 0.0937, "step": 11040 }, { "epoch": 0.560688359815219, "grad_norm": 0.5505645275115967, "learning_rate": 1.6262077601231875e-05, "loss": 0.1043, "step": 11045 }, { "epoch": 0.5609421798060815, "grad_norm": 0.42129865288734436, "learning_rate": 1.626038546795946e-05, "loss": 0.1049, "step": 11050 }, { "epoch": 0.561195999796944, "grad_norm": 0.5019480586051941, "learning_rate": 1.6258693334687042e-05, "loss": 0.0966, "step": 11055 }, { "epoch": 0.5614498197878065, "grad_norm": 0.44651737809181213, "learning_rate": 1.6257001201414623e-05, "loss": 0.1014, "step": 11060 }, { "epoch": 0.561703639778669, "grad_norm": 0.4271222651004791, "learning_rate": 1.625530906814221e-05, "loss": 0.105, "step": 11065 }, { "epoch": 0.5619574597695315, "grad_norm": 0.354856938123703, "learning_rate": 1.6253616934869793e-05, "loss": 0.0862, "step": 11070 }, { "epoch": 0.562211279760394, "grad_norm": 0.4536794424057007, "learning_rate": 1.6251924801597373e-05, "loss": 0.0963, "step": 11075 }, { "epoch": 0.5624650997512564, "grad_norm": 0.4090806245803833, "learning_rate": 1.625023266832496e-05, "loss": 0.0874, "step": 11080 }, { "epoch": 0.5627189197421189, "grad_norm": 0.7183617353439331, "learning_rate": 1.624854053505254e-05, "loss": 0.1116, "step": 11085 }, { "epoch": 0.5629727397329813, "grad_norm": 0.8949313759803772, "learning_rate": 1.6246848401780127e-05, "loss": 0.1, "step": 11090 }, { "epoch": 0.5632265597238438, "grad_norm": 0.47640174627304077, "learning_rate": 1.624515626850771e-05, "loss": 0.0952, "step": 11095 }, { "epoch": 0.5634803797147063, "grad_norm": 0.4236536920070648, "learning_rate": 1.624346413523529e-05, "loss": 0.0888, "step": 11100 }, { "epoch": 0.5637341997055688, "grad_norm": 0.4696132242679596, "learning_rate": 1.6241772001962878e-05, "loss": 0.0986, "step": 11105 }, { "epoch": 0.5639880196964313, "grad_norm": 0.48761463165283203, "learning_rate": 1.6240079868690458e-05, "loss": 0.086, "step": 11110 }, { "epoch": 0.5642418396872938, "grad_norm": 0.42676812410354614, "learning_rate": 1.623838773541804e-05, "loss": 0.0887, "step": 11115 }, { "epoch": 0.5644956596781563, "grad_norm": 0.5835752487182617, "learning_rate": 1.6236695602145625e-05, "loss": 0.0797, "step": 11120 }, { "epoch": 0.5647494796690188, "grad_norm": 0.39226260781288147, "learning_rate": 1.623500346887321e-05, "loss": 0.1086, "step": 11125 }, { "epoch": 0.5650032996598812, "grad_norm": 0.4616701006889343, "learning_rate": 1.6233311335600796e-05, "loss": 0.0845, "step": 11130 }, { "epoch": 0.5652571196507437, "grad_norm": 0.6852196455001831, "learning_rate": 1.6231619202328376e-05, "loss": 0.0953, "step": 11135 }, { "epoch": 0.5655109396416061, "grad_norm": 0.5007418990135193, "learning_rate": 1.622992706905596e-05, "loss": 0.1014, "step": 11140 }, { "epoch": 0.5657647596324686, "grad_norm": 0.43455320596694946, "learning_rate": 1.6228234935783543e-05, "loss": 0.0925, "step": 11145 }, { "epoch": 0.5660185796233311, "grad_norm": 0.41454216837882996, "learning_rate": 1.6226542802511126e-05, "loss": 0.0963, "step": 11150 }, { "epoch": 0.5662723996141936, "grad_norm": 0.5282880663871765, "learning_rate": 1.622485066923871e-05, "loss": 0.0926, "step": 11155 }, { "epoch": 0.5665262196050561, "grad_norm": 0.33719152212142944, "learning_rate": 1.6223158535966293e-05, "loss": 0.0983, "step": 11160 }, { "epoch": 0.5667800395959186, "grad_norm": 0.4163464605808258, "learning_rate": 1.6221466402693877e-05, "loss": 0.0993, "step": 11165 }, { "epoch": 0.5670338595867811, "grad_norm": 0.40644052624702454, "learning_rate": 1.621977426942146e-05, "loss": 0.0798, "step": 11170 }, { "epoch": 0.5672876795776436, "grad_norm": 0.42944255471229553, "learning_rate": 1.6218082136149044e-05, "loss": 0.1043, "step": 11175 }, { "epoch": 0.567541499568506, "grad_norm": 0.3915778398513794, "learning_rate": 1.6216390002876628e-05, "loss": 0.0828, "step": 11180 }, { "epoch": 0.5677953195593685, "grad_norm": 0.5200676321983337, "learning_rate": 1.621469786960421e-05, "loss": 0.0916, "step": 11185 }, { "epoch": 0.568049139550231, "grad_norm": 0.5898273587226868, "learning_rate": 1.6213005736331795e-05, "loss": 0.1157, "step": 11190 }, { "epoch": 0.5683029595410934, "grad_norm": 0.4564743638038635, "learning_rate": 1.6211313603059378e-05, "loss": 0.1059, "step": 11195 }, { "epoch": 0.5685567795319559, "grad_norm": 0.42315754294395447, "learning_rate": 1.6209621469786962e-05, "loss": 0.1021, "step": 11200 }, { "epoch": 0.5688105995228184, "grad_norm": 0.4259279668331146, "learning_rate": 1.6207929336514545e-05, "loss": 0.0893, "step": 11205 }, { "epoch": 0.5690644195136809, "grad_norm": 0.4402656555175781, "learning_rate": 1.620623720324213e-05, "loss": 0.085, "step": 11210 }, { "epoch": 0.5693182395045434, "grad_norm": 0.3866107165813446, "learning_rate": 1.6204545069969712e-05, "loss": 0.09, "step": 11215 }, { "epoch": 0.5695720594954059, "grad_norm": 0.4657836854457855, "learning_rate": 1.6202852936697296e-05, "loss": 0.0994, "step": 11220 }, { "epoch": 0.5698258794862683, "grad_norm": 0.40453943610191345, "learning_rate": 1.620116080342488e-05, "loss": 0.0987, "step": 11225 }, { "epoch": 0.5700796994771308, "grad_norm": 0.401993989944458, "learning_rate": 1.6199468670152463e-05, "loss": 0.0847, "step": 11230 }, { "epoch": 0.5703335194679933, "grad_norm": 0.5438871383666992, "learning_rate": 1.6197776536880047e-05, "loss": 0.0964, "step": 11235 }, { "epoch": 0.5705873394588558, "grad_norm": 0.5712496638298035, "learning_rate": 1.619608440360763e-05, "loss": 0.0966, "step": 11240 }, { "epoch": 0.5708411594497182, "grad_norm": 0.3064565658569336, "learning_rate": 1.6194392270335214e-05, "loss": 0.0909, "step": 11245 }, { "epoch": 0.5710949794405807, "grad_norm": 1.4858416318893433, "learning_rate": 1.6192700137062797e-05, "loss": 0.1, "step": 11250 }, { "epoch": 0.5713487994314432, "grad_norm": 0.36369451880455017, "learning_rate": 1.619100800379038e-05, "loss": 0.0975, "step": 11255 }, { "epoch": 0.5716026194223057, "grad_norm": 0.48280322551727295, "learning_rate": 1.6189315870517964e-05, "loss": 0.0852, "step": 11260 }, { "epoch": 0.5718564394131682, "grad_norm": 0.708537220954895, "learning_rate": 1.6187623737245545e-05, "loss": 0.1065, "step": 11265 }, { "epoch": 0.5721102594040307, "grad_norm": 0.5387185215950012, "learning_rate": 1.618593160397313e-05, "loss": 0.0971, "step": 11270 }, { "epoch": 0.5723640793948931, "grad_norm": 0.5026307106018066, "learning_rate": 1.6184239470700715e-05, "loss": 0.0887, "step": 11275 }, { "epoch": 0.5726178993857556, "grad_norm": 0.40268126130104065, "learning_rate": 1.61825473374283e-05, "loss": 0.0892, "step": 11280 }, { "epoch": 0.5728717193766181, "grad_norm": 0.5645183324813843, "learning_rate": 1.6180855204155882e-05, "loss": 0.0984, "step": 11285 }, { "epoch": 0.5731255393674806, "grad_norm": 0.35205450654029846, "learning_rate": 1.6179163070883462e-05, "loss": 0.1013, "step": 11290 }, { "epoch": 0.573379359358343, "grad_norm": 0.42374926805496216, "learning_rate": 1.617747093761105e-05, "loss": 0.0954, "step": 11295 }, { "epoch": 0.5736331793492055, "grad_norm": 0.438722163438797, "learning_rate": 1.617577880433863e-05, "loss": 0.0961, "step": 11300 }, { "epoch": 0.573886999340068, "grad_norm": 0.6137442588806152, "learning_rate": 1.6174086671066213e-05, "loss": 0.0918, "step": 11305 }, { "epoch": 0.5741408193309305, "grad_norm": 0.6508979201316833, "learning_rate": 1.61723945377938e-05, "loss": 0.0934, "step": 11310 }, { "epoch": 0.574394639321793, "grad_norm": 0.6937998533248901, "learning_rate": 1.617070240452138e-05, "loss": 0.104, "step": 11315 }, { "epoch": 0.5746484593126555, "grad_norm": 0.44844573736190796, "learning_rate": 1.6169010271248964e-05, "loss": 0.1024, "step": 11320 }, { "epoch": 0.5749022793035179, "grad_norm": 0.5491892695426941, "learning_rate": 1.6167318137976547e-05, "loss": 0.1098, "step": 11325 }, { "epoch": 0.5751560992943804, "grad_norm": 0.43779659271240234, "learning_rate": 1.616562600470413e-05, "loss": 0.0993, "step": 11330 }, { "epoch": 0.5754099192852429, "grad_norm": 0.3295450508594513, "learning_rate": 1.6163933871431718e-05, "loss": 0.0859, "step": 11335 }, { "epoch": 0.5756637392761054, "grad_norm": 0.43296727538108826, "learning_rate": 1.6162241738159298e-05, "loss": 0.0926, "step": 11340 }, { "epoch": 0.5759175592669679, "grad_norm": 0.5013937950134277, "learning_rate": 1.616054960488688e-05, "loss": 0.0917, "step": 11345 }, { "epoch": 0.5761713792578304, "grad_norm": 0.4258970022201538, "learning_rate": 1.6158857471614465e-05, "loss": 0.1014, "step": 11350 }, { "epoch": 0.5764251992486928, "grad_norm": 0.4371802806854248, "learning_rate": 1.615716533834205e-05, "loss": 0.0894, "step": 11355 }, { "epoch": 0.5766790192395553, "grad_norm": 0.34599819779396057, "learning_rate": 1.6155473205069632e-05, "loss": 0.0877, "step": 11360 }, { "epoch": 0.5769328392304178, "grad_norm": 0.3687082827091217, "learning_rate": 1.6153781071797215e-05, "loss": 0.0881, "step": 11365 }, { "epoch": 0.5771866592212803, "grad_norm": 0.45477935671806335, "learning_rate": 1.61520889385248e-05, "loss": 0.0896, "step": 11370 }, { "epoch": 0.5774404792121427, "grad_norm": 0.4007332921028137, "learning_rate": 1.6150396805252383e-05, "loss": 0.0888, "step": 11375 }, { "epoch": 0.5776942992030052, "grad_norm": 0.44187676906585693, "learning_rate": 1.6148704671979966e-05, "loss": 0.0953, "step": 11380 }, { "epoch": 0.5779481191938677, "grad_norm": 0.3938285708427429, "learning_rate": 1.614701253870755e-05, "loss": 0.0935, "step": 11385 }, { "epoch": 0.5782019391847302, "grad_norm": 0.4192904531955719, "learning_rate": 1.6145320405435133e-05, "loss": 0.0982, "step": 11390 }, { "epoch": 0.5784557591755927, "grad_norm": 0.3519038259983063, "learning_rate": 1.6143628272162717e-05, "loss": 0.095, "step": 11395 }, { "epoch": 0.5787095791664552, "grad_norm": 0.5053529143333435, "learning_rate": 1.61419361388903e-05, "loss": 0.1037, "step": 11400 }, { "epoch": 0.5789633991573176, "grad_norm": 0.5747759938240051, "learning_rate": 1.6140244005617884e-05, "loss": 0.0861, "step": 11405 }, { "epoch": 0.5792172191481801, "grad_norm": 0.5155202150344849, "learning_rate": 1.6138551872345467e-05, "loss": 0.0859, "step": 11410 }, { "epoch": 0.5794710391390426, "grad_norm": 0.799940288066864, "learning_rate": 1.613685973907305e-05, "loss": 0.0922, "step": 11415 }, { "epoch": 0.5797248591299051, "grad_norm": 0.29105183482170105, "learning_rate": 1.6135167605800634e-05, "loss": 0.0794, "step": 11420 }, { "epoch": 0.5799786791207675, "grad_norm": 0.43824175000190735, "learning_rate": 1.6133475472528218e-05, "loss": 0.1006, "step": 11425 }, { "epoch": 0.58023249911163, "grad_norm": 0.8796586990356445, "learning_rate": 1.61317833392558e-05, "loss": 0.0999, "step": 11430 }, { "epoch": 0.5804863191024925, "grad_norm": 0.4385398328304291, "learning_rate": 1.6130091205983385e-05, "loss": 0.0962, "step": 11435 }, { "epoch": 0.580740139093355, "grad_norm": 0.4407016336917877, "learning_rate": 1.612839907271097e-05, "loss": 0.0962, "step": 11440 }, { "epoch": 0.5809939590842175, "grad_norm": 0.37328964471817017, "learning_rate": 1.6126706939438552e-05, "loss": 0.0866, "step": 11445 }, { "epoch": 0.58124777907508, "grad_norm": 0.42281779646873474, "learning_rate": 1.6125014806166136e-05, "loss": 0.1012, "step": 11450 }, { "epoch": 0.5815015990659425, "grad_norm": 0.459942102432251, "learning_rate": 1.612332267289372e-05, "loss": 0.1061, "step": 11455 }, { "epoch": 0.581755419056805, "grad_norm": 0.6190848350524902, "learning_rate": 1.6121630539621303e-05, "loss": 0.1014, "step": 11460 }, { "epoch": 0.5820092390476674, "grad_norm": 0.6544129252433777, "learning_rate": 1.6119938406348886e-05, "loss": 0.0861, "step": 11465 }, { "epoch": 0.5822630590385299, "grad_norm": 0.32813870906829834, "learning_rate": 1.6118246273076466e-05, "loss": 0.0926, "step": 11470 }, { "epoch": 0.5825168790293923, "grad_norm": 0.4258780777454376, "learning_rate": 1.6116554139804053e-05, "loss": 0.0933, "step": 11475 }, { "epoch": 0.5827706990202548, "grad_norm": 0.6019533276557922, "learning_rate": 1.6114862006531634e-05, "loss": 0.0953, "step": 11480 }, { "epoch": 0.5830245190111173, "grad_norm": 0.4072204828262329, "learning_rate": 1.611316987325922e-05, "loss": 0.0951, "step": 11485 }, { "epoch": 0.5832783390019798, "grad_norm": 0.3429082930088043, "learning_rate": 1.6111477739986804e-05, "loss": 0.0879, "step": 11490 }, { "epoch": 0.5835321589928423, "grad_norm": 0.4086165428161621, "learning_rate": 1.6109785606714384e-05, "loss": 0.0996, "step": 11495 }, { "epoch": 0.5837859789837048, "grad_norm": 0.3771967589855194, "learning_rate": 1.610809347344197e-05, "loss": 0.0959, "step": 11500 }, { "epoch": 0.5840397989745673, "grad_norm": 0.4078262746334076, "learning_rate": 1.610640134016955e-05, "loss": 0.0953, "step": 11505 }, { "epoch": 0.5842936189654298, "grad_norm": 1.961870789527893, "learning_rate": 1.6104709206897135e-05, "loss": 0.0964, "step": 11510 }, { "epoch": 0.5845474389562922, "grad_norm": 0.4825584888458252, "learning_rate": 1.6103017073624722e-05, "loss": 0.0995, "step": 11515 }, { "epoch": 0.5848012589471546, "grad_norm": 0.46092280745506287, "learning_rate": 1.6101324940352302e-05, "loss": 0.085, "step": 11520 }, { "epoch": 0.5850550789380171, "grad_norm": 0.5379409193992615, "learning_rate": 1.609963280707989e-05, "loss": 0.1011, "step": 11525 }, { "epoch": 0.5853088989288796, "grad_norm": 0.9948629140853882, "learning_rate": 1.609794067380747e-05, "loss": 0.0914, "step": 11530 }, { "epoch": 0.5855627189197421, "grad_norm": 0.39759644865989685, "learning_rate": 1.6096248540535053e-05, "loss": 0.0939, "step": 11535 }, { "epoch": 0.5858165389106046, "grad_norm": 0.4798009693622589, "learning_rate": 1.609455640726264e-05, "loss": 0.0927, "step": 11540 }, { "epoch": 0.5860703589014671, "grad_norm": 0.3862641453742981, "learning_rate": 1.609286427399022e-05, "loss": 0.0899, "step": 11545 }, { "epoch": 0.5863241788923296, "grad_norm": 0.38452666997909546, "learning_rate": 1.6091172140717803e-05, "loss": 0.0959, "step": 11550 }, { "epoch": 0.5865779988831921, "grad_norm": 0.3630549907684326, "learning_rate": 1.6089480007445387e-05, "loss": 0.0995, "step": 11555 }, { "epoch": 0.5868318188740546, "grad_norm": 0.8909419178962708, "learning_rate": 1.608778787417297e-05, "loss": 0.1095, "step": 11560 }, { "epoch": 0.587085638864917, "grad_norm": 0.3947525918483734, "learning_rate": 1.6086095740900554e-05, "loss": 0.099, "step": 11565 }, { "epoch": 0.5873394588557794, "grad_norm": 0.3510633409023285, "learning_rate": 1.6084403607628137e-05, "loss": 0.0885, "step": 11570 }, { "epoch": 0.5875932788466419, "grad_norm": 0.49123838543891907, "learning_rate": 1.608271147435572e-05, "loss": 0.0867, "step": 11575 }, { "epoch": 0.5878470988375044, "grad_norm": 0.3366958200931549, "learning_rate": 1.6081019341083304e-05, "loss": 0.0974, "step": 11580 }, { "epoch": 0.5881009188283669, "grad_norm": 0.7465676665306091, "learning_rate": 1.6079327207810888e-05, "loss": 0.1032, "step": 11585 }, { "epoch": 0.5883547388192294, "grad_norm": 0.38815176486968994, "learning_rate": 1.607763507453847e-05, "loss": 0.1028, "step": 11590 }, { "epoch": 0.5886085588100919, "grad_norm": 0.42240670323371887, "learning_rate": 1.6075942941266055e-05, "loss": 0.086, "step": 11595 }, { "epoch": 0.5888623788009544, "grad_norm": 0.35436907410621643, "learning_rate": 1.607425080799364e-05, "loss": 0.095, "step": 11600 }, { "epoch": 0.5891161987918169, "grad_norm": 0.3987736999988556, "learning_rate": 1.6072558674721222e-05, "loss": 0.0994, "step": 11605 }, { "epoch": 0.5893700187826794, "grad_norm": 0.5091561079025269, "learning_rate": 1.6070866541448806e-05, "loss": 0.0942, "step": 11610 }, { "epoch": 0.5896238387735419, "grad_norm": 0.3973734974861145, "learning_rate": 1.606917440817639e-05, "loss": 0.0852, "step": 11615 }, { "epoch": 0.5898776587644042, "grad_norm": 0.38369306921958923, "learning_rate": 1.6067482274903973e-05, "loss": 0.0904, "step": 11620 }, { "epoch": 0.5901314787552667, "grad_norm": 0.41540855169296265, "learning_rate": 1.6065790141631556e-05, "loss": 0.083, "step": 11625 }, { "epoch": 0.5903852987461292, "grad_norm": 0.468839168548584, "learning_rate": 1.606409800835914e-05, "loss": 0.1061, "step": 11630 }, { "epoch": 0.5906391187369917, "grad_norm": 0.5892307162284851, "learning_rate": 1.6062405875086723e-05, "loss": 0.0913, "step": 11635 }, { "epoch": 0.5908929387278542, "grad_norm": 0.3928740322589874, "learning_rate": 1.6060713741814307e-05, "loss": 0.0921, "step": 11640 }, { "epoch": 0.5911467587187167, "grad_norm": 0.36110663414001465, "learning_rate": 1.605902160854189e-05, "loss": 0.0927, "step": 11645 }, { "epoch": 0.5914005787095792, "grad_norm": 0.699163019657135, "learning_rate": 1.6057329475269474e-05, "loss": 0.0997, "step": 11650 }, { "epoch": 0.5916543987004417, "grad_norm": 0.43201932311058044, "learning_rate": 1.6055637341997058e-05, "loss": 0.0863, "step": 11655 }, { "epoch": 0.5919082186913042, "grad_norm": 1.3616560697555542, "learning_rate": 1.6053945208724638e-05, "loss": 0.0943, "step": 11660 }, { "epoch": 0.5921620386821667, "grad_norm": 0.44065532088279724, "learning_rate": 1.6052253075452225e-05, "loss": 0.1008, "step": 11665 }, { "epoch": 0.592415858673029, "grad_norm": 0.48787134885787964, "learning_rate": 1.6050560942179808e-05, "loss": 0.0988, "step": 11670 }, { "epoch": 0.5926696786638915, "grad_norm": 0.502856969833374, "learning_rate": 1.6048868808907392e-05, "loss": 0.093, "step": 11675 }, { "epoch": 0.592923498654754, "grad_norm": 0.46943750977516174, "learning_rate": 1.6047176675634975e-05, "loss": 0.0912, "step": 11680 }, { "epoch": 0.5931773186456165, "grad_norm": 0.37175118923187256, "learning_rate": 1.6045484542362555e-05, "loss": 0.0861, "step": 11685 }, { "epoch": 0.593431138636479, "grad_norm": 0.5893447995185852, "learning_rate": 1.6043792409090142e-05, "loss": 0.0894, "step": 11690 }, { "epoch": 0.5936849586273415, "grad_norm": 0.4342266619205475, "learning_rate": 1.6042100275817726e-05, "loss": 0.0919, "step": 11695 }, { "epoch": 0.593938778618204, "grad_norm": 0.3848216235637665, "learning_rate": 1.6040408142545306e-05, "loss": 0.081, "step": 11700 }, { "epoch": 0.5941925986090665, "grad_norm": 0.4635235667228699, "learning_rate": 1.6038716009272893e-05, "loss": 0.0899, "step": 11705 }, { "epoch": 0.594446418599929, "grad_norm": 0.5500293374061584, "learning_rate": 1.6037023876000473e-05, "loss": 0.0902, "step": 11710 }, { "epoch": 0.5947002385907915, "grad_norm": 0.3731316924095154, "learning_rate": 1.6035331742728057e-05, "loss": 0.0846, "step": 11715 }, { "epoch": 0.5949540585816538, "grad_norm": 0.3542387783527374, "learning_rate": 1.6033639609455644e-05, "loss": 0.0959, "step": 11720 }, { "epoch": 0.5952078785725163, "grad_norm": 0.4823991656303406, "learning_rate": 1.6031947476183224e-05, "loss": 0.0959, "step": 11725 }, { "epoch": 0.5954616985633788, "grad_norm": 0.4015558063983917, "learning_rate": 1.603025534291081e-05, "loss": 0.0983, "step": 11730 }, { "epoch": 0.5957155185542413, "grad_norm": 0.4608984589576721, "learning_rate": 1.602856320963839e-05, "loss": 0.0885, "step": 11735 }, { "epoch": 0.5959693385451038, "grad_norm": 0.4496300220489502, "learning_rate": 1.6026871076365974e-05, "loss": 0.0854, "step": 11740 }, { "epoch": 0.5962231585359663, "grad_norm": 0.41843149065971375, "learning_rate": 1.602517894309356e-05, "loss": 0.0882, "step": 11745 }, { "epoch": 0.5964769785268288, "grad_norm": 0.3742343783378601, "learning_rate": 1.602348680982114e-05, "loss": 0.0829, "step": 11750 }, { "epoch": 0.5967307985176913, "grad_norm": 0.5669666528701782, "learning_rate": 1.6021794676548725e-05, "loss": 0.0933, "step": 11755 }, { "epoch": 0.5969846185085538, "grad_norm": 0.37627196311950684, "learning_rate": 1.602010254327631e-05, "loss": 0.0797, "step": 11760 }, { "epoch": 0.5972384384994163, "grad_norm": 0.4670741856098175, "learning_rate": 1.6018410410003892e-05, "loss": 0.0964, "step": 11765 }, { "epoch": 0.5974922584902786, "grad_norm": 0.5946935415267944, "learning_rate": 1.601671827673148e-05, "loss": 0.0848, "step": 11770 }, { "epoch": 0.5977460784811411, "grad_norm": 0.3806981146335602, "learning_rate": 1.601502614345906e-05, "loss": 0.0901, "step": 11775 }, { "epoch": 0.5979998984720036, "grad_norm": 0.4813790023326874, "learning_rate": 1.6013334010186643e-05, "loss": 0.0897, "step": 11780 }, { "epoch": 0.5982537184628661, "grad_norm": 0.33864712715148926, "learning_rate": 1.6011641876914226e-05, "loss": 0.0869, "step": 11785 }, { "epoch": 0.5985075384537286, "grad_norm": 0.4564129114151001, "learning_rate": 1.600994974364181e-05, "loss": 0.089, "step": 11790 }, { "epoch": 0.5987613584445911, "grad_norm": 0.3900724947452545, "learning_rate": 1.6008257610369393e-05, "loss": 0.0748, "step": 11795 }, { "epoch": 0.5990151784354536, "grad_norm": 0.34131884574890137, "learning_rate": 1.6006565477096977e-05, "loss": 0.0985, "step": 11800 }, { "epoch": 0.5992689984263161, "grad_norm": 0.6740922331809998, "learning_rate": 1.600487334382456e-05, "loss": 0.0969, "step": 11805 }, { "epoch": 0.5995228184171786, "grad_norm": 0.41970062255859375, "learning_rate": 1.6003181210552144e-05, "loss": 0.0924, "step": 11810 }, { "epoch": 0.599776638408041, "grad_norm": 0.40490224957466125, "learning_rate": 1.6001489077279728e-05, "loss": 0.0926, "step": 11815 }, { "epoch": 0.6000304583989035, "grad_norm": 0.9713094234466553, "learning_rate": 1.599979694400731e-05, "loss": 0.1022, "step": 11820 }, { "epoch": 0.600284278389766, "grad_norm": 0.3870560824871063, "learning_rate": 1.5998104810734895e-05, "loss": 0.0827, "step": 11825 }, { "epoch": 0.6005380983806284, "grad_norm": 0.44513827562332153, "learning_rate": 1.5996412677462478e-05, "loss": 0.0932, "step": 11830 }, { "epoch": 0.6007919183714909, "grad_norm": 0.42367058992385864, "learning_rate": 1.5994720544190062e-05, "loss": 0.0917, "step": 11835 }, { "epoch": 0.6010457383623534, "grad_norm": 0.5602151155471802, "learning_rate": 1.5993028410917645e-05, "loss": 0.099, "step": 11840 }, { "epoch": 0.6012995583532159, "grad_norm": 0.43050816655158997, "learning_rate": 1.599133627764523e-05, "loss": 0.0836, "step": 11845 }, { "epoch": 0.6015533783440784, "grad_norm": 0.34673821926116943, "learning_rate": 1.5989644144372812e-05, "loss": 0.0992, "step": 11850 }, { "epoch": 0.6018071983349409, "grad_norm": 0.3608914911746979, "learning_rate": 1.5987952011100396e-05, "loss": 0.0941, "step": 11855 }, { "epoch": 0.6020610183258034, "grad_norm": 0.4284670352935791, "learning_rate": 1.598625987782798e-05, "loss": 0.0925, "step": 11860 }, { "epoch": 0.6023148383166658, "grad_norm": 0.5360268950462341, "learning_rate": 1.5984567744555563e-05, "loss": 0.0936, "step": 11865 }, { "epoch": 0.6025686583075283, "grad_norm": 0.34779566526412964, "learning_rate": 1.5982875611283147e-05, "loss": 0.0871, "step": 11870 }, { "epoch": 0.6028224782983908, "grad_norm": 0.43765851855278015, "learning_rate": 1.598118347801073e-05, "loss": 0.0914, "step": 11875 }, { "epoch": 0.6030762982892532, "grad_norm": 0.5663272142410278, "learning_rate": 1.5979491344738314e-05, "loss": 0.084, "step": 11880 }, { "epoch": 0.6033301182801157, "grad_norm": 0.4468323290348053, "learning_rate": 1.5977799211465897e-05, "loss": 0.0927, "step": 11885 }, { "epoch": 0.6035839382709782, "grad_norm": 0.6487022042274475, "learning_rate": 1.5976107078193477e-05, "loss": 0.0985, "step": 11890 }, { "epoch": 0.6038377582618407, "grad_norm": 0.5686355233192444, "learning_rate": 1.5974414944921064e-05, "loss": 0.1071, "step": 11895 }, { "epoch": 0.6040915782527032, "grad_norm": 0.4146348237991333, "learning_rate": 1.5972722811648648e-05, "loss": 0.0844, "step": 11900 }, { "epoch": 0.6043453982435657, "grad_norm": 0.37118101119995117, "learning_rate": 1.5971030678376228e-05, "loss": 0.0891, "step": 11905 }, { "epoch": 0.6045992182344282, "grad_norm": 0.4417383670806885, "learning_rate": 1.5969338545103815e-05, "loss": 0.0932, "step": 11910 }, { "epoch": 0.6048530382252906, "grad_norm": 0.4875818192958832, "learning_rate": 1.5967646411831395e-05, "loss": 0.0851, "step": 11915 }, { "epoch": 0.6051068582161531, "grad_norm": 0.4297184944152832, "learning_rate": 1.5965954278558982e-05, "loss": 0.0887, "step": 11920 }, { "epoch": 0.6053606782070156, "grad_norm": 0.5882421135902405, "learning_rate": 1.5964262145286566e-05, "loss": 0.0828, "step": 11925 }, { "epoch": 0.605614498197878, "grad_norm": 0.4121566712856293, "learning_rate": 1.5962570012014146e-05, "loss": 0.092, "step": 11930 }, { "epoch": 0.6058683181887405, "grad_norm": 0.4164383113384247, "learning_rate": 1.5960877878741733e-05, "loss": 0.0916, "step": 11935 }, { "epoch": 0.606122138179603, "grad_norm": 0.7164746522903442, "learning_rate": 1.5959185745469313e-05, "loss": 0.0945, "step": 11940 }, { "epoch": 0.6063759581704655, "grad_norm": 0.3977768123149872, "learning_rate": 1.5957493612196896e-05, "loss": 0.0871, "step": 11945 }, { "epoch": 0.606629778161328, "grad_norm": 0.5656052827835083, "learning_rate": 1.5955801478924483e-05, "loss": 0.0728, "step": 11950 }, { "epoch": 0.6068835981521905, "grad_norm": 0.6886550188064575, "learning_rate": 1.5954109345652064e-05, "loss": 0.0971, "step": 11955 }, { "epoch": 0.607137418143053, "grad_norm": 0.30135658383369446, "learning_rate": 1.5952417212379647e-05, "loss": 0.097, "step": 11960 }, { "epoch": 0.6073912381339154, "grad_norm": 0.6407579779624939, "learning_rate": 1.595072507910723e-05, "loss": 0.0938, "step": 11965 }, { "epoch": 0.6076450581247779, "grad_norm": 0.3917185068130493, "learning_rate": 1.5949032945834814e-05, "loss": 0.0985, "step": 11970 }, { "epoch": 0.6078988781156404, "grad_norm": 0.38503170013427734, "learning_rate": 1.59473408125624e-05, "loss": 0.0808, "step": 11975 }, { "epoch": 0.6081526981065029, "grad_norm": 0.856484591960907, "learning_rate": 1.594564867928998e-05, "loss": 0.092, "step": 11980 }, { "epoch": 0.6084065180973653, "grad_norm": 0.538896381855011, "learning_rate": 1.5943956546017565e-05, "loss": 0.0954, "step": 11985 }, { "epoch": 0.6086603380882278, "grad_norm": 0.5355377793312073, "learning_rate": 1.594226441274515e-05, "loss": 0.0904, "step": 11990 }, { "epoch": 0.6089141580790903, "grad_norm": 0.5098680853843689, "learning_rate": 1.5940572279472732e-05, "loss": 0.0823, "step": 11995 }, { "epoch": 0.6091679780699528, "grad_norm": 0.4084436595439911, "learning_rate": 1.5938880146200315e-05, "loss": 0.082, "step": 12000 }, { "epoch": 0.6094217980608153, "grad_norm": 0.3876837193965912, "learning_rate": 1.59371880129279e-05, "loss": 0.1021, "step": 12005 }, { "epoch": 0.6096756180516778, "grad_norm": 0.4292503297328949, "learning_rate": 1.5935495879655483e-05, "loss": 0.079, "step": 12010 }, { "epoch": 0.6099294380425402, "grad_norm": 0.533645510673523, "learning_rate": 1.5933803746383066e-05, "loss": 0.0962, "step": 12015 }, { "epoch": 0.6101832580334027, "grad_norm": 0.36521783471107483, "learning_rate": 1.593211161311065e-05, "loss": 0.0956, "step": 12020 }, { "epoch": 0.6104370780242652, "grad_norm": 0.3349459767341614, "learning_rate": 1.5930419479838233e-05, "loss": 0.0904, "step": 12025 }, { "epoch": 0.6106908980151277, "grad_norm": 0.43317875266075134, "learning_rate": 1.5928727346565817e-05, "loss": 0.0913, "step": 12030 }, { "epoch": 0.6109447180059902, "grad_norm": 0.4873206317424774, "learning_rate": 1.59270352132934e-05, "loss": 0.0916, "step": 12035 }, { "epoch": 0.6111985379968526, "grad_norm": 0.38802623748779297, "learning_rate": 1.5925343080020984e-05, "loss": 0.0906, "step": 12040 }, { "epoch": 0.6114523579877151, "grad_norm": 0.5826549530029297, "learning_rate": 1.5923650946748567e-05, "loss": 0.0934, "step": 12045 }, { "epoch": 0.6117061779785776, "grad_norm": 0.5934155583381653, "learning_rate": 1.592195881347615e-05, "loss": 0.0948, "step": 12050 }, { "epoch": 0.6119599979694401, "grad_norm": 0.5034151077270508, "learning_rate": 1.5920266680203734e-05, "loss": 0.0793, "step": 12055 }, { "epoch": 0.6122138179603026, "grad_norm": 0.5423274040222168, "learning_rate": 1.5918574546931318e-05, "loss": 0.0899, "step": 12060 }, { "epoch": 0.612467637951165, "grad_norm": 0.6164039373397827, "learning_rate": 1.59168824136589e-05, "loss": 0.0932, "step": 12065 }, { "epoch": 0.6127214579420275, "grad_norm": 0.5863872766494751, "learning_rate": 1.5915190280386485e-05, "loss": 0.0858, "step": 12070 }, { "epoch": 0.61297527793289, "grad_norm": 0.3876388967037201, "learning_rate": 1.591349814711407e-05, "loss": 0.0785, "step": 12075 }, { "epoch": 0.6132290979237525, "grad_norm": 0.7438451051712036, "learning_rate": 1.5911806013841652e-05, "loss": 0.098, "step": 12080 }, { "epoch": 0.613482917914615, "grad_norm": 0.48748913407325745, "learning_rate": 1.5910113880569236e-05, "loss": 0.0984, "step": 12085 }, { "epoch": 0.6137367379054774, "grad_norm": 1.2986968755722046, "learning_rate": 1.590842174729682e-05, "loss": 0.0888, "step": 12090 }, { "epoch": 0.6139905578963399, "grad_norm": 0.525091826915741, "learning_rate": 1.59067296140244e-05, "loss": 0.0947, "step": 12095 }, { "epoch": 0.6142443778872024, "grad_norm": 0.42398321628570557, "learning_rate": 1.5905037480751986e-05, "loss": 0.0882, "step": 12100 }, { "epoch": 0.6144981978780649, "grad_norm": 0.5521324276924133, "learning_rate": 1.590334534747957e-05, "loss": 0.0885, "step": 12105 }, { "epoch": 0.6147520178689273, "grad_norm": 0.5781256556510925, "learning_rate": 1.5901653214207153e-05, "loss": 0.1041, "step": 12110 }, { "epoch": 0.6150058378597898, "grad_norm": 0.8404202461242676, "learning_rate": 1.5899961080934737e-05, "loss": 0.0966, "step": 12115 }, { "epoch": 0.6152596578506523, "grad_norm": 0.5177925229072571, "learning_rate": 1.5898268947662317e-05, "loss": 0.0846, "step": 12120 }, { "epoch": 0.6155134778415148, "grad_norm": 0.4168604910373688, "learning_rate": 1.5896576814389904e-05, "loss": 0.0937, "step": 12125 }, { "epoch": 0.6157672978323773, "grad_norm": 0.3698541820049286, "learning_rate": 1.5894884681117488e-05, "loss": 0.0905, "step": 12130 }, { "epoch": 0.6160211178232398, "grad_norm": 0.43006083369255066, "learning_rate": 1.5893192547845068e-05, "loss": 0.0896, "step": 12135 }, { "epoch": 0.6162749378141023, "grad_norm": 0.7461796402931213, "learning_rate": 1.5891500414572655e-05, "loss": 0.0853, "step": 12140 }, { "epoch": 0.6165287578049647, "grad_norm": 0.3485049307346344, "learning_rate": 1.5889808281300235e-05, "loss": 0.0941, "step": 12145 }, { "epoch": 0.6167825777958272, "grad_norm": 0.3577573299407959, "learning_rate": 1.588811614802782e-05, "loss": 0.0934, "step": 12150 }, { "epoch": 0.6170363977866897, "grad_norm": 0.43190285563468933, "learning_rate": 1.5886424014755405e-05, "loss": 0.0791, "step": 12155 }, { "epoch": 0.6172902177775521, "grad_norm": 0.3519067168235779, "learning_rate": 1.5884731881482985e-05, "loss": 0.0942, "step": 12160 }, { "epoch": 0.6175440377684146, "grad_norm": 0.35261034965515137, "learning_rate": 1.5883039748210572e-05, "loss": 0.0882, "step": 12165 }, { "epoch": 0.6177978577592771, "grad_norm": 0.39754313230514526, "learning_rate": 1.5881347614938153e-05, "loss": 0.0867, "step": 12170 }, { "epoch": 0.6180516777501396, "grad_norm": 0.5548686981201172, "learning_rate": 1.5879655481665736e-05, "loss": 0.099, "step": 12175 }, { "epoch": 0.6183054977410021, "grad_norm": 0.526914656162262, "learning_rate": 1.5877963348393323e-05, "loss": 0.087, "step": 12180 }, { "epoch": 0.6185593177318646, "grad_norm": 0.6457729339599609, "learning_rate": 1.5876271215120903e-05, "loss": 0.0894, "step": 12185 }, { "epoch": 0.6188131377227271, "grad_norm": 0.8887588381767273, "learning_rate": 1.5874579081848487e-05, "loss": 0.0885, "step": 12190 }, { "epoch": 0.6190669577135895, "grad_norm": 0.6914089918136597, "learning_rate": 1.587288694857607e-05, "loss": 0.0858, "step": 12195 }, { "epoch": 0.619320777704452, "grad_norm": 0.489515095949173, "learning_rate": 1.5871194815303654e-05, "loss": 0.0879, "step": 12200 }, { "epoch": 0.6195745976953145, "grad_norm": 0.5289825797080994, "learning_rate": 1.5869502682031237e-05, "loss": 0.092, "step": 12205 }, { "epoch": 0.6198284176861769, "grad_norm": 0.48612451553344727, "learning_rate": 1.586781054875882e-05, "loss": 0.0885, "step": 12210 }, { "epoch": 0.6200822376770394, "grad_norm": 0.4033755362033844, "learning_rate": 1.5866118415486404e-05, "loss": 0.0861, "step": 12215 }, { "epoch": 0.6203360576679019, "grad_norm": 0.5344930291175842, "learning_rate": 1.5864426282213988e-05, "loss": 0.1081, "step": 12220 }, { "epoch": 0.6205898776587644, "grad_norm": 0.3625963032245636, "learning_rate": 1.586273414894157e-05, "loss": 0.0916, "step": 12225 }, { "epoch": 0.6208436976496269, "grad_norm": 0.44668540358543396, "learning_rate": 1.5861042015669155e-05, "loss": 0.0906, "step": 12230 }, { "epoch": 0.6210975176404894, "grad_norm": 0.47905129194259644, "learning_rate": 1.585934988239674e-05, "loss": 0.0832, "step": 12235 }, { "epoch": 0.6213513376313519, "grad_norm": 0.5475975275039673, "learning_rate": 1.5857657749124322e-05, "loss": 0.0921, "step": 12240 }, { "epoch": 0.6216051576222144, "grad_norm": 0.4593254625797272, "learning_rate": 1.5855965615851906e-05, "loss": 0.0856, "step": 12245 }, { "epoch": 0.6218589776130768, "grad_norm": 0.7837191224098206, "learning_rate": 1.585427348257949e-05, "loss": 0.0886, "step": 12250 }, { "epoch": 0.6221127976039393, "grad_norm": 0.3772871196269989, "learning_rate": 1.5852581349307073e-05, "loss": 0.0924, "step": 12255 }, { "epoch": 0.6223666175948017, "grad_norm": 0.42365825176239014, "learning_rate": 1.5850889216034656e-05, "loss": 0.0944, "step": 12260 }, { "epoch": 0.6226204375856642, "grad_norm": 0.36323311924934387, "learning_rate": 1.584919708276224e-05, "loss": 0.0899, "step": 12265 }, { "epoch": 0.6228742575765267, "grad_norm": 0.39171355962753296, "learning_rate": 1.5847504949489823e-05, "loss": 0.0811, "step": 12270 }, { "epoch": 0.6231280775673892, "grad_norm": 0.914495587348938, "learning_rate": 1.5845812816217407e-05, "loss": 0.0969, "step": 12275 }, { "epoch": 0.6233818975582517, "grad_norm": 0.46378669142723083, "learning_rate": 1.584412068294499e-05, "loss": 0.0989, "step": 12280 }, { "epoch": 0.6236357175491142, "grad_norm": 0.33671021461486816, "learning_rate": 1.5842428549672574e-05, "loss": 0.0854, "step": 12285 }, { "epoch": 0.6238895375399767, "grad_norm": 0.6674852967262268, "learning_rate": 1.5840736416400158e-05, "loss": 0.0882, "step": 12290 }, { "epoch": 0.6241433575308392, "grad_norm": 0.3157777190208435, "learning_rate": 1.583904428312774e-05, "loss": 0.0932, "step": 12295 }, { "epoch": 0.6243971775217017, "grad_norm": 0.6090107560157776, "learning_rate": 1.583735214985532e-05, "loss": 0.0912, "step": 12300 }, { "epoch": 0.6246509975125641, "grad_norm": 0.4318656325340271, "learning_rate": 1.5835660016582908e-05, "loss": 0.0967, "step": 12305 }, { "epoch": 0.6249048175034265, "grad_norm": 0.42950794100761414, "learning_rate": 1.5833967883310492e-05, "loss": 0.0841, "step": 12310 }, { "epoch": 0.625158637494289, "grad_norm": 0.37206709384918213, "learning_rate": 1.5832275750038075e-05, "loss": 0.093, "step": 12315 }, { "epoch": 0.6254124574851515, "grad_norm": 0.5274568200111389, "learning_rate": 1.583058361676566e-05, "loss": 0.1056, "step": 12320 }, { "epoch": 0.625666277476014, "grad_norm": 0.39392906427383423, "learning_rate": 1.582889148349324e-05, "loss": 0.0845, "step": 12325 }, { "epoch": 0.6259200974668765, "grad_norm": 0.6189650297164917, "learning_rate": 1.5827199350220826e-05, "loss": 0.0961, "step": 12330 }, { "epoch": 0.626173917457739, "grad_norm": 0.5449414253234863, "learning_rate": 1.582550721694841e-05, "loss": 0.0868, "step": 12335 }, { "epoch": 0.6264277374486015, "grad_norm": 0.3943621516227722, "learning_rate": 1.582381508367599e-05, "loss": 0.0779, "step": 12340 }, { "epoch": 0.626681557439464, "grad_norm": 0.5067428946495056, "learning_rate": 1.5822122950403577e-05, "loss": 0.0941, "step": 12345 }, { "epoch": 0.6269353774303265, "grad_norm": 0.4015895426273346, "learning_rate": 1.5820430817131157e-05, "loss": 0.0895, "step": 12350 }, { "epoch": 0.6271891974211888, "grad_norm": 0.3975540101528168, "learning_rate": 1.5818738683858744e-05, "loss": 0.0934, "step": 12355 }, { "epoch": 0.6274430174120513, "grad_norm": 0.430369108915329, "learning_rate": 1.5817046550586327e-05, "loss": 0.1002, "step": 12360 }, { "epoch": 0.6276968374029138, "grad_norm": 0.6089135408401489, "learning_rate": 1.5815354417313907e-05, "loss": 0.0862, "step": 12365 }, { "epoch": 0.6279506573937763, "grad_norm": 0.43870821595191956, "learning_rate": 1.5813662284041494e-05, "loss": 0.0919, "step": 12370 }, { "epoch": 0.6282044773846388, "grad_norm": 0.49393364787101746, "learning_rate": 1.5811970150769074e-05, "loss": 0.0744, "step": 12375 }, { "epoch": 0.6284582973755013, "grad_norm": 0.3724878132343292, "learning_rate": 1.5810278017496658e-05, "loss": 0.0882, "step": 12380 }, { "epoch": 0.6287121173663638, "grad_norm": 0.43538209795951843, "learning_rate": 1.5808585884224245e-05, "loss": 0.0975, "step": 12385 }, { "epoch": 0.6289659373572263, "grad_norm": 0.4566599130630493, "learning_rate": 1.5806893750951825e-05, "loss": 0.0894, "step": 12390 }, { "epoch": 0.6292197573480888, "grad_norm": 0.5635015368461609, "learning_rate": 1.580520161767941e-05, "loss": 0.0965, "step": 12395 }, { "epoch": 0.6294735773389513, "grad_norm": 0.3993977904319763, "learning_rate": 1.5803509484406992e-05, "loss": 0.0837, "step": 12400 }, { "epoch": 0.6297273973298136, "grad_norm": 1.7102850675582886, "learning_rate": 1.5801817351134576e-05, "loss": 0.0964, "step": 12405 }, { "epoch": 0.6299812173206761, "grad_norm": 0.6190375089645386, "learning_rate": 1.5800125217862163e-05, "loss": 0.0833, "step": 12410 }, { "epoch": 0.6302350373115386, "grad_norm": 0.6037442684173584, "learning_rate": 1.5798433084589743e-05, "loss": 0.0892, "step": 12415 }, { "epoch": 0.6304888573024011, "grad_norm": 0.36666348576545715, "learning_rate": 1.5796740951317326e-05, "loss": 0.0823, "step": 12420 }, { "epoch": 0.6307426772932636, "grad_norm": 0.8224522471427917, "learning_rate": 1.579504881804491e-05, "loss": 0.0833, "step": 12425 }, { "epoch": 0.6309964972841261, "grad_norm": 0.37060612440109253, "learning_rate": 1.5793356684772493e-05, "loss": 0.09, "step": 12430 }, { "epoch": 0.6312503172749886, "grad_norm": 0.362363338470459, "learning_rate": 1.5791664551500077e-05, "loss": 0.0871, "step": 12435 }, { "epoch": 0.6315041372658511, "grad_norm": 0.4423457980155945, "learning_rate": 1.578997241822766e-05, "loss": 0.0924, "step": 12440 }, { "epoch": 0.6317579572567136, "grad_norm": 0.3518649935722351, "learning_rate": 1.5788280284955244e-05, "loss": 0.0833, "step": 12445 }, { "epoch": 0.6320117772475761, "grad_norm": 0.46395477652549744, "learning_rate": 1.5786588151682828e-05, "loss": 0.0922, "step": 12450 }, { "epoch": 0.6322655972384384, "grad_norm": 0.34381961822509766, "learning_rate": 1.578489601841041e-05, "loss": 0.0995, "step": 12455 }, { "epoch": 0.6325194172293009, "grad_norm": 0.4519914388656616, "learning_rate": 1.5783203885137995e-05, "loss": 0.0853, "step": 12460 }, { "epoch": 0.6327732372201634, "grad_norm": 0.30855950713157654, "learning_rate": 1.5781511751865578e-05, "loss": 0.0788, "step": 12465 }, { "epoch": 0.6330270572110259, "grad_norm": 0.4378630518913269, "learning_rate": 1.5779819618593162e-05, "loss": 0.0803, "step": 12470 }, { "epoch": 0.6332808772018884, "grad_norm": 0.7794264554977417, "learning_rate": 1.5778127485320745e-05, "loss": 0.0947, "step": 12475 }, { "epoch": 0.6335346971927509, "grad_norm": 0.44606897234916687, "learning_rate": 1.577643535204833e-05, "loss": 0.0966, "step": 12480 }, { "epoch": 0.6337885171836134, "grad_norm": 0.8295760750770569, "learning_rate": 1.5774743218775912e-05, "loss": 0.0973, "step": 12485 }, { "epoch": 0.6340423371744759, "grad_norm": 0.6100575923919678, "learning_rate": 1.5773051085503496e-05, "loss": 0.0763, "step": 12490 }, { "epoch": 0.6342961571653384, "grad_norm": 0.43771716952323914, "learning_rate": 1.577135895223108e-05, "loss": 0.092, "step": 12495 }, { "epoch": 0.6345499771562009, "grad_norm": 0.43595728278160095, "learning_rate": 1.5769666818958663e-05, "loss": 0.083, "step": 12500 }, { "epoch": 0.6348037971470633, "grad_norm": 0.5066717267036438, "learning_rate": 1.5767974685686247e-05, "loss": 0.0909, "step": 12505 }, { "epoch": 0.6350576171379257, "grad_norm": 0.42285871505737305, "learning_rate": 1.576628255241383e-05, "loss": 0.0894, "step": 12510 }, { "epoch": 0.6353114371287882, "grad_norm": 0.35143038630485535, "learning_rate": 1.5764590419141414e-05, "loss": 0.0883, "step": 12515 }, { "epoch": 0.6355652571196507, "grad_norm": 0.36473914980888367, "learning_rate": 1.5762898285868997e-05, "loss": 0.091, "step": 12520 }, { "epoch": 0.6358190771105132, "grad_norm": 0.4791051745414734, "learning_rate": 1.576120615259658e-05, "loss": 0.1023, "step": 12525 }, { "epoch": 0.6360728971013757, "grad_norm": 0.3315756916999817, "learning_rate": 1.575951401932416e-05, "loss": 0.0806, "step": 12530 }, { "epoch": 0.6363267170922382, "grad_norm": 0.3584679365158081, "learning_rate": 1.5757821886051748e-05, "loss": 0.0875, "step": 12535 }, { "epoch": 0.6365805370831007, "grad_norm": 0.40165871381759644, "learning_rate": 1.575612975277933e-05, "loss": 0.0916, "step": 12540 }, { "epoch": 0.6368343570739632, "grad_norm": 0.8896801471710205, "learning_rate": 1.575443761950691e-05, "loss": 0.0886, "step": 12545 }, { "epoch": 0.6370881770648257, "grad_norm": 0.3733901083469391, "learning_rate": 1.57527454862345e-05, "loss": 0.0791, "step": 12550 }, { "epoch": 0.6373419970556881, "grad_norm": 0.43153393268585205, "learning_rate": 1.575105335296208e-05, "loss": 0.0894, "step": 12555 }, { "epoch": 0.6375958170465506, "grad_norm": 0.6183637976646423, "learning_rate": 1.5749361219689666e-05, "loss": 0.0858, "step": 12560 }, { "epoch": 0.637849637037413, "grad_norm": 0.4100203216075897, "learning_rate": 1.574766908641725e-05, "loss": 0.1033, "step": 12565 }, { "epoch": 0.6381034570282755, "grad_norm": 0.5121060013771057, "learning_rate": 1.574597695314483e-05, "loss": 0.0868, "step": 12570 }, { "epoch": 0.638357277019138, "grad_norm": 1.0560269355773926, "learning_rate": 1.5744284819872416e-05, "loss": 0.09, "step": 12575 }, { "epoch": 0.6386110970100005, "grad_norm": 0.365310937166214, "learning_rate": 1.5742592686599996e-05, "loss": 0.0885, "step": 12580 }, { "epoch": 0.638864917000863, "grad_norm": 0.41774865984916687, "learning_rate": 1.574090055332758e-05, "loss": 0.0791, "step": 12585 }, { "epoch": 0.6391187369917255, "grad_norm": 0.4893551468849182, "learning_rate": 1.5739208420055167e-05, "loss": 0.0845, "step": 12590 }, { "epoch": 0.639372556982588, "grad_norm": 0.5731279850006104, "learning_rate": 1.5737516286782747e-05, "loss": 0.1017, "step": 12595 }, { "epoch": 0.6396263769734505, "grad_norm": 0.6109359860420227, "learning_rate": 1.573582415351033e-05, "loss": 0.0796, "step": 12600 }, { "epoch": 0.6398801969643129, "grad_norm": 0.3426520526409149, "learning_rate": 1.5734132020237914e-05, "loss": 0.0999, "step": 12605 }, { "epoch": 0.6401340169551754, "grad_norm": 0.3924323618412018, "learning_rate": 1.5732439886965498e-05, "loss": 0.0845, "step": 12610 }, { "epoch": 0.6403878369460378, "grad_norm": 1.8754016160964966, "learning_rate": 1.5730747753693085e-05, "loss": 0.1001, "step": 12615 }, { "epoch": 0.6406416569369003, "grad_norm": 0.3848492205142975, "learning_rate": 1.5729055620420665e-05, "loss": 0.0788, "step": 12620 }, { "epoch": 0.6408954769277628, "grad_norm": 0.671140730381012, "learning_rate": 1.572736348714825e-05, "loss": 0.0758, "step": 12625 }, { "epoch": 0.6411492969186253, "grad_norm": 0.38087940216064453, "learning_rate": 1.5725671353875832e-05, "loss": 0.0932, "step": 12630 }, { "epoch": 0.6414031169094878, "grad_norm": 0.5257353782653809, "learning_rate": 1.5723979220603415e-05, "loss": 0.0882, "step": 12635 }, { "epoch": 0.6416569369003503, "grad_norm": 0.35013777017593384, "learning_rate": 1.5722287087331e-05, "loss": 0.0856, "step": 12640 }, { "epoch": 0.6419107568912128, "grad_norm": 0.4160863161087036, "learning_rate": 1.5720594954058582e-05, "loss": 0.0833, "step": 12645 }, { "epoch": 0.6421645768820752, "grad_norm": 0.3413524031639099, "learning_rate": 1.5718902820786166e-05, "loss": 0.0802, "step": 12650 }, { "epoch": 0.6424183968729377, "grad_norm": 0.5095176100730896, "learning_rate": 1.571721068751375e-05, "loss": 0.0931, "step": 12655 }, { "epoch": 0.6426722168638002, "grad_norm": 0.45131492614746094, "learning_rate": 1.5715518554241333e-05, "loss": 0.0814, "step": 12660 }, { "epoch": 0.6429260368546627, "grad_norm": 0.4562227427959442, "learning_rate": 1.5713826420968917e-05, "loss": 0.0849, "step": 12665 }, { "epoch": 0.6431798568455251, "grad_norm": 0.38386526703834534, "learning_rate": 1.57121342876965e-05, "loss": 0.0848, "step": 12670 }, { "epoch": 0.6434336768363876, "grad_norm": 0.5002121329307556, "learning_rate": 1.5710442154424084e-05, "loss": 0.1066, "step": 12675 }, { "epoch": 0.6436874968272501, "grad_norm": 0.42015817761421204, "learning_rate": 1.5708750021151667e-05, "loss": 0.0995, "step": 12680 }, { "epoch": 0.6439413168181126, "grad_norm": 0.3642427921295166, "learning_rate": 1.570705788787925e-05, "loss": 0.0811, "step": 12685 }, { "epoch": 0.6441951368089751, "grad_norm": 0.40864598751068115, "learning_rate": 1.5705365754606834e-05, "loss": 0.0896, "step": 12690 }, { "epoch": 0.6444489567998376, "grad_norm": 0.3729974925518036, "learning_rate": 1.5703673621334418e-05, "loss": 0.0944, "step": 12695 }, { "epoch": 0.6447027767907, "grad_norm": 0.8188928365707397, "learning_rate": 1.5701981488062e-05, "loss": 0.0831, "step": 12700 }, { "epoch": 0.6449565967815625, "grad_norm": 0.49652352929115295, "learning_rate": 1.5700289354789585e-05, "loss": 0.0879, "step": 12705 }, { "epoch": 0.645210416772425, "grad_norm": 0.5149166584014893, "learning_rate": 1.569859722151717e-05, "loss": 0.0998, "step": 12710 }, { "epoch": 0.6454642367632875, "grad_norm": 0.3833802044391632, "learning_rate": 1.5696905088244752e-05, "loss": 0.0943, "step": 12715 }, { "epoch": 0.64571805675415, "grad_norm": 0.5231676697731018, "learning_rate": 1.5695212954972336e-05, "loss": 0.0819, "step": 12720 }, { "epoch": 0.6459718767450124, "grad_norm": 0.977109432220459, "learning_rate": 1.569352082169992e-05, "loss": 0.0956, "step": 12725 }, { "epoch": 0.6462256967358749, "grad_norm": 0.4414095878601074, "learning_rate": 1.5691828688427503e-05, "loss": 0.0873, "step": 12730 }, { "epoch": 0.6464795167267374, "grad_norm": 0.46533313393592834, "learning_rate": 1.5690136555155083e-05, "loss": 0.0878, "step": 12735 }, { "epoch": 0.6467333367175999, "grad_norm": 0.4510734975337982, "learning_rate": 1.568844442188267e-05, "loss": 0.0954, "step": 12740 }, { "epoch": 0.6469871567084624, "grad_norm": 0.43129807710647583, "learning_rate": 1.5686752288610253e-05, "loss": 0.0892, "step": 12745 }, { "epoch": 0.6472409766993248, "grad_norm": 0.5520406365394592, "learning_rate": 1.5685060155337837e-05, "loss": 0.0969, "step": 12750 }, { "epoch": 0.6474947966901873, "grad_norm": 0.40193498134613037, "learning_rate": 1.568336802206542e-05, "loss": 0.0808, "step": 12755 }, { "epoch": 0.6477486166810498, "grad_norm": 0.3910532295703888, "learning_rate": 1.5681675888793e-05, "loss": 0.0952, "step": 12760 }, { "epoch": 0.6480024366719123, "grad_norm": 0.3040367066860199, "learning_rate": 1.5679983755520588e-05, "loss": 0.0744, "step": 12765 }, { "epoch": 0.6482562566627748, "grad_norm": 0.34013864398002625, "learning_rate": 1.567829162224817e-05, "loss": 0.0849, "step": 12770 }, { "epoch": 0.6485100766536372, "grad_norm": 0.426899790763855, "learning_rate": 1.567659948897575e-05, "loss": 0.0898, "step": 12775 }, { "epoch": 0.6487638966444997, "grad_norm": 0.32523438334465027, "learning_rate": 1.5674907355703338e-05, "loss": 0.0828, "step": 12780 }, { "epoch": 0.6490177166353622, "grad_norm": 0.35754647850990295, "learning_rate": 1.567321522243092e-05, "loss": 0.0731, "step": 12785 }, { "epoch": 0.6492715366262247, "grad_norm": 0.45699095726013184, "learning_rate": 1.5671523089158502e-05, "loss": 0.0838, "step": 12790 }, { "epoch": 0.6495253566170872, "grad_norm": 0.41177016496658325, "learning_rate": 1.566983095588609e-05, "loss": 0.0801, "step": 12795 }, { "epoch": 0.6497791766079496, "grad_norm": 0.3860916793346405, "learning_rate": 1.566813882261367e-05, "loss": 0.0871, "step": 12800 }, { "epoch": 0.6500329965988121, "grad_norm": 0.6945805549621582, "learning_rate": 1.5666446689341256e-05, "loss": 0.0896, "step": 12805 }, { "epoch": 0.6502868165896746, "grad_norm": 0.3880050778388977, "learning_rate": 1.5664754556068836e-05, "loss": 0.0779, "step": 12810 }, { "epoch": 0.6505406365805371, "grad_norm": 0.689802348613739, "learning_rate": 1.566306242279642e-05, "loss": 0.0824, "step": 12815 }, { "epoch": 0.6507944565713996, "grad_norm": 0.5213664174079895, "learning_rate": 1.5661370289524007e-05, "loss": 0.0873, "step": 12820 }, { "epoch": 0.651048276562262, "grad_norm": 0.4796338975429535, "learning_rate": 1.5659678156251587e-05, "loss": 0.0889, "step": 12825 }, { "epoch": 0.6513020965531245, "grad_norm": 0.4608859419822693, "learning_rate": 1.565798602297917e-05, "loss": 0.0846, "step": 12830 }, { "epoch": 0.651555916543987, "grad_norm": 0.3637705147266388, "learning_rate": 1.5656293889706754e-05, "loss": 0.0808, "step": 12835 }, { "epoch": 0.6518097365348495, "grad_norm": 0.45812270045280457, "learning_rate": 1.5654601756434337e-05, "loss": 0.0876, "step": 12840 }, { "epoch": 0.652063556525712, "grad_norm": 0.35677555203437805, "learning_rate": 1.565290962316192e-05, "loss": 0.0921, "step": 12845 }, { "epoch": 0.6523173765165744, "grad_norm": 0.6063011884689331, "learning_rate": 1.5651217489889504e-05, "loss": 0.0882, "step": 12850 }, { "epoch": 0.6525711965074369, "grad_norm": 0.3067625164985657, "learning_rate": 1.5649525356617088e-05, "loss": 0.0763, "step": 12855 }, { "epoch": 0.6528250164982994, "grad_norm": 0.3955833315849304, "learning_rate": 1.564783322334467e-05, "loss": 0.0813, "step": 12860 }, { "epoch": 0.6530788364891619, "grad_norm": 0.4854811728000641, "learning_rate": 1.5646141090072255e-05, "loss": 0.0762, "step": 12865 }, { "epoch": 0.6533326564800244, "grad_norm": 0.3633703589439392, "learning_rate": 1.564444895679984e-05, "loss": 0.0891, "step": 12870 }, { "epoch": 0.6535864764708869, "grad_norm": 0.41939306259155273, "learning_rate": 1.5642756823527422e-05, "loss": 0.0981, "step": 12875 }, { "epoch": 0.6538402964617493, "grad_norm": 0.8128605484962463, "learning_rate": 1.5641064690255006e-05, "loss": 0.0942, "step": 12880 }, { "epoch": 0.6540941164526118, "grad_norm": 1.6264680624008179, "learning_rate": 1.563937255698259e-05, "loss": 0.0831, "step": 12885 }, { "epoch": 0.6543479364434743, "grad_norm": 0.35295000672340393, "learning_rate": 1.5637680423710173e-05, "loss": 0.0805, "step": 12890 }, { "epoch": 0.6546017564343368, "grad_norm": 0.327200323343277, "learning_rate": 1.5635988290437756e-05, "loss": 0.085, "step": 12895 }, { "epoch": 0.6548555764251992, "grad_norm": 0.37761542201042175, "learning_rate": 1.563429615716534e-05, "loss": 0.0808, "step": 12900 }, { "epoch": 0.6551093964160617, "grad_norm": 0.39181774854660034, "learning_rate": 1.5632604023892923e-05, "loss": 0.0852, "step": 12905 }, { "epoch": 0.6553632164069242, "grad_norm": 0.4610046148300171, "learning_rate": 1.5630911890620507e-05, "loss": 0.0861, "step": 12910 }, { "epoch": 0.6556170363977867, "grad_norm": 0.42793646454811096, "learning_rate": 1.562921975734809e-05, "loss": 0.0876, "step": 12915 }, { "epoch": 0.6558708563886492, "grad_norm": 0.6725565195083618, "learning_rate": 1.5627527624075674e-05, "loss": 0.1022, "step": 12920 }, { "epoch": 0.6561246763795117, "grad_norm": 0.6574685573577881, "learning_rate": 1.5625835490803258e-05, "loss": 0.1023, "step": 12925 }, { "epoch": 0.6563784963703742, "grad_norm": 0.45522236824035645, "learning_rate": 1.562414335753084e-05, "loss": 0.0794, "step": 12930 }, { "epoch": 0.6566323163612366, "grad_norm": 0.4538464844226837, "learning_rate": 1.5622451224258425e-05, "loss": 0.087, "step": 12935 }, { "epoch": 0.6568861363520991, "grad_norm": 0.6361369490623474, "learning_rate": 1.5620759090986005e-05, "loss": 0.0863, "step": 12940 }, { "epoch": 0.6571399563429615, "grad_norm": 0.47415581345558167, "learning_rate": 1.5619066957713592e-05, "loss": 0.0845, "step": 12945 }, { "epoch": 0.657393776333824, "grad_norm": 0.35891467332839966, "learning_rate": 1.5617374824441175e-05, "loss": 0.0912, "step": 12950 }, { "epoch": 0.6576475963246865, "grad_norm": 0.5842028260231018, "learning_rate": 1.561568269116876e-05, "loss": 0.0764, "step": 12955 }, { "epoch": 0.657901416315549, "grad_norm": 0.7354117035865784, "learning_rate": 1.5613990557896342e-05, "loss": 0.0835, "step": 12960 }, { "epoch": 0.6581552363064115, "grad_norm": 0.43807005882263184, "learning_rate": 1.5612298424623923e-05, "loss": 0.1034, "step": 12965 }, { "epoch": 0.658409056297274, "grad_norm": 0.5703946948051453, "learning_rate": 1.561060629135151e-05, "loss": 0.0799, "step": 12970 }, { "epoch": 0.6586628762881365, "grad_norm": 0.29095369577407837, "learning_rate": 1.5608914158079093e-05, "loss": 0.0762, "step": 12975 }, { "epoch": 0.658916696278999, "grad_norm": 0.4557504653930664, "learning_rate": 1.5607222024806673e-05, "loss": 0.0991, "step": 12980 }, { "epoch": 0.6591705162698615, "grad_norm": 0.5406267642974854, "learning_rate": 1.560552989153426e-05, "loss": 0.0837, "step": 12985 }, { "epoch": 0.6594243362607239, "grad_norm": 0.5688093304634094, "learning_rate": 1.560383775826184e-05, "loss": 0.0819, "step": 12990 }, { "epoch": 0.6596781562515863, "grad_norm": 0.4395672082901001, "learning_rate": 1.5602145624989427e-05, "loss": 0.0845, "step": 12995 }, { "epoch": 0.6599319762424488, "grad_norm": 0.3437829315662384, "learning_rate": 1.560045349171701e-05, "loss": 0.0785, "step": 13000 }, { "epoch": 0.6601857962333113, "grad_norm": 0.42482784390449524, "learning_rate": 1.559876135844459e-05, "loss": 0.0914, "step": 13005 }, { "epoch": 0.6604396162241738, "grad_norm": 0.4697912931442261, "learning_rate": 1.5597069225172178e-05, "loss": 0.0852, "step": 13010 }, { "epoch": 0.6606934362150363, "grad_norm": 0.36666378378868103, "learning_rate": 1.5595377091899758e-05, "loss": 0.0798, "step": 13015 }, { "epoch": 0.6609472562058988, "grad_norm": 0.37574443221092224, "learning_rate": 1.559368495862734e-05, "loss": 0.0932, "step": 13020 }, { "epoch": 0.6612010761967613, "grad_norm": 0.5501673221588135, "learning_rate": 1.559199282535493e-05, "loss": 0.0967, "step": 13025 }, { "epoch": 0.6614548961876238, "grad_norm": 0.48891526460647583, "learning_rate": 1.559030069208251e-05, "loss": 0.0814, "step": 13030 }, { "epoch": 0.6617087161784863, "grad_norm": 0.40868237614631653, "learning_rate": 1.5588608558810092e-05, "loss": 0.0857, "step": 13035 }, { "epoch": 0.6619625361693487, "grad_norm": 0.5038067102432251, "learning_rate": 1.5586916425537676e-05, "loss": 0.0791, "step": 13040 }, { "epoch": 0.6622163561602111, "grad_norm": 0.3892373740673065, "learning_rate": 1.558522429226526e-05, "loss": 0.0853, "step": 13045 }, { "epoch": 0.6624701761510736, "grad_norm": 0.6677381992340088, "learning_rate": 1.5583532158992846e-05, "loss": 0.083, "step": 13050 }, { "epoch": 0.6627239961419361, "grad_norm": 0.34850823879241943, "learning_rate": 1.5581840025720426e-05, "loss": 0.0779, "step": 13055 }, { "epoch": 0.6629778161327986, "grad_norm": 0.5405170321464539, "learning_rate": 1.558014789244801e-05, "loss": 0.076, "step": 13060 }, { "epoch": 0.6632316361236611, "grad_norm": 0.4197944402694702, "learning_rate": 1.5578455759175593e-05, "loss": 0.0795, "step": 13065 }, { "epoch": 0.6634854561145236, "grad_norm": 0.48564544320106506, "learning_rate": 1.5576763625903177e-05, "loss": 0.0815, "step": 13070 }, { "epoch": 0.6637392761053861, "grad_norm": 0.37770646810531616, "learning_rate": 1.557507149263076e-05, "loss": 0.0891, "step": 13075 }, { "epoch": 0.6639930960962486, "grad_norm": 0.39739593863487244, "learning_rate": 1.5573379359358344e-05, "loss": 0.0833, "step": 13080 }, { "epoch": 0.6642469160871111, "grad_norm": 0.3831949234008789, "learning_rate": 1.5571687226085928e-05, "loss": 0.0854, "step": 13085 }, { "epoch": 0.6645007360779736, "grad_norm": 0.3688269257545471, "learning_rate": 1.556999509281351e-05, "loss": 0.0814, "step": 13090 }, { "epoch": 0.6647545560688359, "grad_norm": 0.3308967649936676, "learning_rate": 1.5568302959541095e-05, "loss": 0.0668, "step": 13095 }, { "epoch": 0.6650083760596984, "grad_norm": 0.665611743927002, "learning_rate": 1.5566610826268678e-05, "loss": 0.0919, "step": 13100 }, { "epoch": 0.6652621960505609, "grad_norm": 0.3787246346473694, "learning_rate": 1.5564918692996262e-05, "loss": 0.0869, "step": 13105 }, { "epoch": 0.6655160160414234, "grad_norm": 0.5023996233940125, "learning_rate": 1.5563226559723845e-05, "loss": 0.0873, "step": 13110 }, { "epoch": 0.6657698360322859, "grad_norm": 0.36124274134635925, "learning_rate": 1.556153442645143e-05, "loss": 0.0768, "step": 13115 }, { "epoch": 0.6660236560231484, "grad_norm": 0.7764711380004883, "learning_rate": 1.5559842293179012e-05, "loss": 0.083, "step": 13120 }, { "epoch": 0.6662774760140109, "grad_norm": 0.36708641052246094, "learning_rate": 1.5558150159906596e-05, "loss": 0.0899, "step": 13125 }, { "epoch": 0.6665312960048734, "grad_norm": 0.45462819933891296, "learning_rate": 1.555645802663418e-05, "loss": 0.0968, "step": 13130 }, { "epoch": 0.6667851159957359, "grad_norm": 0.4756264388561249, "learning_rate": 1.5554765893361763e-05, "loss": 0.0873, "step": 13135 }, { "epoch": 0.6670389359865984, "grad_norm": 0.3898143470287323, "learning_rate": 1.5553073760089347e-05, "loss": 0.0862, "step": 13140 }, { "epoch": 0.6672927559774607, "grad_norm": 0.32764703035354614, "learning_rate": 1.555138162681693e-05, "loss": 0.0877, "step": 13145 }, { "epoch": 0.6675465759683232, "grad_norm": 0.5033909678459167, "learning_rate": 1.5549689493544514e-05, "loss": 0.0812, "step": 13150 }, { "epoch": 0.6678003959591857, "grad_norm": 0.42792779207229614, "learning_rate": 1.5547997360272097e-05, "loss": 0.0917, "step": 13155 }, { "epoch": 0.6680542159500482, "grad_norm": 0.37381792068481445, "learning_rate": 1.554630522699968e-05, "loss": 0.0796, "step": 13160 }, { "epoch": 0.6683080359409107, "grad_norm": 0.4677964448928833, "learning_rate": 1.5544613093727264e-05, "loss": 0.0892, "step": 13165 }, { "epoch": 0.6685618559317732, "grad_norm": 0.36071425676345825, "learning_rate": 1.5542920960454845e-05, "loss": 0.0961, "step": 13170 }, { "epoch": 0.6688156759226357, "grad_norm": 0.41257521510124207, "learning_rate": 1.554122882718243e-05, "loss": 0.0826, "step": 13175 }, { "epoch": 0.6690694959134982, "grad_norm": 0.48096606135368347, "learning_rate": 1.5539536693910015e-05, "loss": 0.0819, "step": 13180 }, { "epoch": 0.6693233159043607, "grad_norm": 0.3370281755924225, "learning_rate": 1.5537844560637595e-05, "loss": 0.0878, "step": 13185 }, { "epoch": 0.669577135895223, "grad_norm": 0.4116274416446686, "learning_rate": 1.5536152427365182e-05, "loss": 0.0878, "step": 13190 }, { "epoch": 0.6698309558860855, "grad_norm": 0.5579774975776672, "learning_rate": 1.5534460294092762e-05, "loss": 0.0886, "step": 13195 }, { "epoch": 0.670084775876948, "grad_norm": 0.5344901084899902, "learning_rate": 1.553276816082035e-05, "loss": 0.0756, "step": 13200 }, { "epoch": 0.6703385958678105, "grad_norm": 0.5055976510047913, "learning_rate": 1.5531076027547933e-05, "loss": 0.097, "step": 13205 }, { "epoch": 0.670592415858673, "grad_norm": 0.3117898106575012, "learning_rate": 1.5529383894275513e-05, "loss": 0.0839, "step": 13210 }, { "epoch": 0.6708462358495355, "grad_norm": 0.4203091561794281, "learning_rate": 1.55276917610031e-05, "loss": 0.0738, "step": 13215 }, { "epoch": 0.671100055840398, "grad_norm": 0.975365936756134, "learning_rate": 1.552599962773068e-05, "loss": 0.0877, "step": 13220 }, { "epoch": 0.6713538758312605, "grad_norm": 0.400333970785141, "learning_rate": 1.5524307494458263e-05, "loss": 0.0679, "step": 13225 }, { "epoch": 0.671607695822123, "grad_norm": 0.42872706055641174, "learning_rate": 1.552261536118585e-05, "loss": 0.0807, "step": 13230 }, { "epoch": 0.6718615158129855, "grad_norm": 0.6446133852005005, "learning_rate": 1.552092322791343e-05, "loss": 0.0725, "step": 13235 }, { "epoch": 0.6721153358038479, "grad_norm": 0.5523983836174011, "learning_rate": 1.5519231094641018e-05, "loss": 0.0851, "step": 13240 }, { "epoch": 0.6723691557947103, "grad_norm": 0.329487681388855, "learning_rate": 1.5517538961368598e-05, "loss": 0.0744, "step": 13245 }, { "epoch": 0.6726229757855728, "grad_norm": 0.38870227336883545, "learning_rate": 1.551584682809618e-05, "loss": 0.0739, "step": 13250 }, { "epoch": 0.6728767957764353, "grad_norm": 0.47514641284942627, "learning_rate": 1.5514154694823768e-05, "loss": 0.0765, "step": 13255 }, { "epoch": 0.6731306157672978, "grad_norm": 0.40743643045425415, "learning_rate": 1.551246256155135e-05, "loss": 0.0772, "step": 13260 }, { "epoch": 0.6733844357581603, "grad_norm": 0.4392092525959015, "learning_rate": 1.5510770428278932e-05, "loss": 0.094, "step": 13265 }, { "epoch": 0.6736382557490228, "grad_norm": 0.46465203166007996, "learning_rate": 1.5509078295006515e-05, "loss": 0.0869, "step": 13270 }, { "epoch": 0.6738920757398853, "grad_norm": 0.5030555725097656, "learning_rate": 1.55073861617341e-05, "loss": 0.0846, "step": 13275 }, { "epoch": 0.6741458957307478, "grad_norm": 0.36062154173851013, "learning_rate": 1.5505694028461682e-05, "loss": 0.0826, "step": 13280 }, { "epoch": 0.6743997157216103, "grad_norm": 0.29261308908462524, "learning_rate": 1.5504001895189266e-05, "loss": 0.0802, "step": 13285 }, { "epoch": 0.6746535357124727, "grad_norm": 0.40803733468055725, "learning_rate": 1.550230976191685e-05, "loss": 0.0844, "step": 13290 }, { "epoch": 0.6749073557033352, "grad_norm": 0.4190680980682373, "learning_rate": 1.5500617628644433e-05, "loss": 0.0813, "step": 13295 }, { "epoch": 0.6751611756941976, "grad_norm": 0.4443584084510803, "learning_rate": 1.5498925495372017e-05, "loss": 0.0856, "step": 13300 }, { "epoch": 0.6754149956850601, "grad_norm": 0.3262367248535156, "learning_rate": 1.54972333620996e-05, "loss": 0.087, "step": 13305 }, { "epoch": 0.6756688156759226, "grad_norm": 1.0407583713531494, "learning_rate": 1.5495541228827184e-05, "loss": 0.0806, "step": 13310 }, { "epoch": 0.6759226356667851, "grad_norm": 0.5855976939201355, "learning_rate": 1.5493849095554767e-05, "loss": 0.0895, "step": 13315 }, { "epoch": 0.6761764556576476, "grad_norm": 0.6860768795013428, "learning_rate": 1.549215696228235e-05, "loss": 0.0732, "step": 13320 }, { "epoch": 0.6764302756485101, "grad_norm": 0.5066936612129211, "learning_rate": 1.5490464829009934e-05, "loss": 0.086, "step": 13325 }, { "epoch": 0.6766840956393726, "grad_norm": 0.49966979026794434, "learning_rate": 1.5488772695737518e-05, "loss": 0.0848, "step": 13330 }, { "epoch": 0.6769379156302351, "grad_norm": 0.41109582781791687, "learning_rate": 1.54870805624651e-05, "loss": 0.0834, "step": 13335 }, { "epoch": 0.6771917356210975, "grad_norm": 0.6102433204650879, "learning_rate": 1.5485388429192685e-05, "loss": 0.1299, "step": 13340 }, { "epoch": 0.67744555561196, "grad_norm": 0.39649656414985657, "learning_rate": 1.548369629592027e-05, "loss": 0.0946, "step": 13345 }, { "epoch": 0.6776993756028225, "grad_norm": 0.375001460313797, "learning_rate": 1.5482004162647852e-05, "loss": 0.0756, "step": 13350 }, { "epoch": 0.6779531955936849, "grad_norm": 0.4396669268608093, "learning_rate": 1.5480312029375436e-05, "loss": 0.0791, "step": 13355 }, { "epoch": 0.6782070155845474, "grad_norm": 0.6256466507911682, "learning_rate": 1.547861989610302e-05, "loss": 0.0827, "step": 13360 }, { "epoch": 0.6784608355754099, "grad_norm": 0.39854422211647034, "learning_rate": 1.5476927762830603e-05, "loss": 0.0791, "step": 13365 }, { "epoch": 0.6787146555662724, "grad_norm": 0.6447271108627319, "learning_rate": 1.5475235629558186e-05, "loss": 0.0898, "step": 13370 }, { "epoch": 0.6789684755571349, "grad_norm": 0.3512633740901947, "learning_rate": 1.5473543496285766e-05, "loss": 0.0914, "step": 13375 }, { "epoch": 0.6792222955479974, "grad_norm": 0.37331703305244446, "learning_rate": 1.5471851363013353e-05, "loss": 0.0822, "step": 13380 }, { "epoch": 0.6794761155388599, "grad_norm": 0.5161044001579285, "learning_rate": 1.5470159229740937e-05, "loss": 0.0886, "step": 13385 }, { "epoch": 0.6797299355297223, "grad_norm": 0.3599933981895447, "learning_rate": 1.546846709646852e-05, "loss": 0.081, "step": 13390 }, { "epoch": 0.6799837555205848, "grad_norm": 0.4737204313278198, "learning_rate": 1.5466774963196104e-05, "loss": 0.0925, "step": 13395 }, { "epoch": 0.6802375755114473, "grad_norm": 0.46349260210990906, "learning_rate": 1.5465082829923684e-05, "loss": 0.0966, "step": 13400 }, { "epoch": 0.6804913955023097, "grad_norm": 0.4162529706954956, "learning_rate": 1.546339069665127e-05, "loss": 0.0854, "step": 13405 }, { "epoch": 0.6807452154931722, "grad_norm": 0.3948574662208557, "learning_rate": 1.5461698563378855e-05, "loss": 0.0883, "step": 13410 }, { "epoch": 0.6809990354840347, "grad_norm": 0.4505586624145508, "learning_rate": 1.5460006430106435e-05, "loss": 0.0968, "step": 13415 }, { "epoch": 0.6812528554748972, "grad_norm": 0.42507103085517883, "learning_rate": 1.5458314296834022e-05, "loss": 0.0841, "step": 13420 }, { "epoch": 0.6815066754657597, "grad_norm": 0.5045070648193359, "learning_rate": 1.5456622163561602e-05, "loss": 0.0894, "step": 13425 }, { "epoch": 0.6817604954566222, "grad_norm": 0.4296087622642517, "learning_rate": 1.5454930030289185e-05, "loss": 0.0866, "step": 13430 }, { "epoch": 0.6820143154474847, "grad_norm": 0.40591058135032654, "learning_rate": 1.5453237897016772e-05, "loss": 0.0887, "step": 13435 }, { "epoch": 0.6822681354383471, "grad_norm": 0.2938343286514282, "learning_rate": 1.5451545763744353e-05, "loss": 0.0783, "step": 13440 }, { "epoch": 0.6825219554292096, "grad_norm": 0.41390931606292725, "learning_rate": 1.544985363047194e-05, "loss": 0.0865, "step": 13445 }, { "epoch": 0.6827757754200721, "grad_norm": 1.899959683418274, "learning_rate": 1.544816149719952e-05, "loss": 0.0849, "step": 13450 }, { "epoch": 0.6830295954109346, "grad_norm": 0.6087545156478882, "learning_rate": 1.5446469363927103e-05, "loss": 0.0885, "step": 13455 }, { "epoch": 0.683283415401797, "grad_norm": 0.42178311944007874, "learning_rate": 1.5444777230654687e-05, "loss": 0.0784, "step": 13460 }, { "epoch": 0.6835372353926595, "grad_norm": 0.3378629684448242, "learning_rate": 1.544308509738227e-05, "loss": 0.0891, "step": 13465 }, { "epoch": 0.683791055383522, "grad_norm": 0.45582592487335205, "learning_rate": 1.5441392964109854e-05, "loss": 0.0785, "step": 13470 }, { "epoch": 0.6840448753743845, "grad_norm": 0.4021526277065277, "learning_rate": 1.5439700830837437e-05, "loss": 0.0861, "step": 13475 }, { "epoch": 0.684298695365247, "grad_norm": 0.42688125371932983, "learning_rate": 1.543800869756502e-05, "loss": 0.0804, "step": 13480 }, { "epoch": 0.6845525153561094, "grad_norm": 0.37077417969703674, "learning_rate": 1.5436316564292604e-05, "loss": 0.0765, "step": 13485 }, { "epoch": 0.6848063353469719, "grad_norm": 0.3427707850933075, "learning_rate": 1.5434624431020188e-05, "loss": 0.0857, "step": 13490 }, { "epoch": 0.6850601553378344, "grad_norm": 0.4085235297679901, "learning_rate": 1.543293229774777e-05, "loss": 0.088, "step": 13495 }, { "epoch": 0.6853139753286969, "grad_norm": 0.38972538709640503, "learning_rate": 1.5431240164475355e-05, "loss": 0.0877, "step": 13500 }, { "epoch": 0.6855677953195594, "grad_norm": 0.35393092036247253, "learning_rate": 1.542954803120294e-05, "loss": 0.0827, "step": 13505 }, { "epoch": 0.6858216153104219, "grad_norm": 0.4118354916572571, "learning_rate": 1.5427855897930522e-05, "loss": 0.0786, "step": 13510 }, { "epoch": 0.6860754353012843, "grad_norm": 0.5753445625305176, "learning_rate": 1.5426163764658106e-05, "loss": 0.0815, "step": 13515 }, { "epoch": 0.6863292552921468, "grad_norm": 0.48167121410369873, "learning_rate": 1.542447163138569e-05, "loss": 0.0914, "step": 13520 }, { "epoch": 0.6865830752830093, "grad_norm": 0.48669248819351196, "learning_rate": 1.5422779498113273e-05, "loss": 0.0841, "step": 13525 }, { "epoch": 0.6868368952738718, "grad_norm": 0.40434154868125916, "learning_rate": 1.5421087364840856e-05, "loss": 0.0851, "step": 13530 }, { "epoch": 0.6870907152647342, "grad_norm": 0.5444993376731873, "learning_rate": 1.541939523156844e-05, "loss": 0.0977, "step": 13535 }, { "epoch": 0.6873445352555967, "grad_norm": 0.5064772367477417, "learning_rate": 1.5417703098296023e-05, "loss": 0.0787, "step": 13540 }, { "epoch": 0.6875983552464592, "grad_norm": 0.28774887323379517, "learning_rate": 1.5416010965023607e-05, "loss": 0.0955, "step": 13545 }, { "epoch": 0.6878521752373217, "grad_norm": 0.41530025005340576, "learning_rate": 1.541431883175119e-05, "loss": 0.069, "step": 13550 }, { "epoch": 0.6881059952281842, "grad_norm": 0.769633948802948, "learning_rate": 1.5412626698478774e-05, "loss": 0.0872, "step": 13555 }, { "epoch": 0.6883598152190467, "grad_norm": 0.47131699323654175, "learning_rate": 1.5410934565206358e-05, "loss": 0.0871, "step": 13560 }, { "epoch": 0.6886136352099091, "grad_norm": 0.3732922077178955, "learning_rate": 1.540924243193394e-05, "loss": 0.0847, "step": 13565 }, { "epoch": 0.6888674552007716, "grad_norm": 0.5022915005683899, "learning_rate": 1.5407550298661525e-05, "loss": 0.0887, "step": 13570 }, { "epoch": 0.6891212751916341, "grad_norm": 0.3835129141807556, "learning_rate": 1.5405858165389108e-05, "loss": 0.0825, "step": 13575 }, { "epoch": 0.6893750951824966, "grad_norm": 0.5404291749000549, "learning_rate": 1.540416603211669e-05, "loss": 0.082, "step": 13580 }, { "epoch": 0.689628915173359, "grad_norm": 0.3274235129356384, "learning_rate": 1.5402473898844275e-05, "loss": 0.0833, "step": 13585 }, { "epoch": 0.6898827351642215, "grad_norm": 0.3606513440608978, "learning_rate": 1.540078176557186e-05, "loss": 0.079, "step": 13590 }, { "epoch": 0.690136555155084, "grad_norm": 0.6473351716995239, "learning_rate": 1.5399089632299442e-05, "loss": 0.0856, "step": 13595 }, { "epoch": 0.6903903751459465, "grad_norm": 1.008514165878296, "learning_rate": 1.5397397499027026e-05, "loss": 0.0856, "step": 13600 }, { "epoch": 0.690644195136809, "grad_norm": 0.5139790177345276, "learning_rate": 1.5395705365754606e-05, "loss": 0.087, "step": 13605 }, { "epoch": 0.6908980151276715, "grad_norm": 0.3781962990760803, "learning_rate": 1.5394013232482193e-05, "loss": 0.0808, "step": 13610 }, { "epoch": 0.691151835118534, "grad_norm": 0.3120231628417969, "learning_rate": 1.5392321099209777e-05, "loss": 0.072, "step": 13615 }, { "epoch": 0.6914056551093964, "grad_norm": 0.3772776424884796, "learning_rate": 1.5390628965937357e-05, "loss": 0.0701, "step": 13620 }, { "epoch": 0.6916594751002589, "grad_norm": 0.4236140549182892, "learning_rate": 1.5388936832664944e-05, "loss": 0.0822, "step": 13625 }, { "epoch": 0.6919132950911214, "grad_norm": 0.4006426930427551, "learning_rate": 1.5387244699392524e-05, "loss": 0.0839, "step": 13630 }, { "epoch": 0.6921671150819838, "grad_norm": 0.45694807171821594, "learning_rate": 1.538555256612011e-05, "loss": 0.0738, "step": 13635 }, { "epoch": 0.6924209350728463, "grad_norm": 0.6357932090759277, "learning_rate": 1.538386043284769e-05, "loss": 0.0842, "step": 13640 }, { "epoch": 0.6926747550637088, "grad_norm": 0.5149153470993042, "learning_rate": 1.5382168299575274e-05, "loss": 0.0776, "step": 13645 }, { "epoch": 0.6929285750545713, "grad_norm": 0.8405686616897583, "learning_rate": 1.538047616630286e-05, "loss": 0.0821, "step": 13650 }, { "epoch": 0.6931823950454338, "grad_norm": 0.35849130153656006, "learning_rate": 1.537878403303044e-05, "loss": 0.0803, "step": 13655 }, { "epoch": 0.6934362150362963, "grad_norm": 0.41094881296157837, "learning_rate": 1.5377091899758025e-05, "loss": 0.0863, "step": 13660 }, { "epoch": 0.6936900350271588, "grad_norm": 0.4219074249267578, "learning_rate": 1.537539976648561e-05, "loss": 0.0951, "step": 13665 }, { "epoch": 0.6939438550180212, "grad_norm": 0.4253228008747101, "learning_rate": 1.5373707633213192e-05, "loss": 0.0918, "step": 13670 }, { "epoch": 0.6941976750088837, "grad_norm": 0.5601229071617126, "learning_rate": 1.5372015499940776e-05, "loss": 0.0704, "step": 13675 }, { "epoch": 0.6944514949997462, "grad_norm": 0.40687859058380127, "learning_rate": 1.537032336666836e-05, "loss": 0.0689, "step": 13680 }, { "epoch": 0.6947053149906086, "grad_norm": 0.4325862526893616, "learning_rate": 1.5368631233395943e-05, "loss": 0.0917, "step": 13685 }, { "epoch": 0.6949591349814711, "grad_norm": 0.44314250349998474, "learning_rate": 1.5366939100123526e-05, "loss": 0.0812, "step": 13690 }, { "epoch": 0.6952129549723336, "grad_norm": 0.35597068071365356, "learning_rate": 1.536524696685111e-05, "loss": 0.0767, "step": 13695 }, { "epoch": 0.6954667749631961, "grad_norm": 0.7931150794029236, "learning_rate": 1.5363554833578693e-05, "loss": 0.0794, "step": 13700 }, { "epoch": 0.6957205949540586, "grad_norm": 0.511711835861206, "learning_rate": 1.5361862700306277e-05, "loss": 0.074, "step": 13705 }, { "epoch": 0.6959744149449211, "grad_norm": 0.8357803225517273, "learning_rate": 1.536017056703386e-05, "loss": 0.0688, "step": 13710 }, { "epoch": 0.6962282349357836, "grad_norm": 0.3736538887023926, "learning_rate": 1.5358478433761444e-05, "loss": 0.0676, "step": 13715 }, { "epoch": 0.696482054926646, "grad_norm": 0.38975706696510315, "learning_rate": 1.5356786300489028e-05, "loss": 0.0779, "step": 13720 }, { "epoch": 0.6967358749175085, "grad_norm": 0.524750828742981, "learning_rate": 1.535509416721661e-05, "loss": 0.0827, "step": 13725 }, { "epoch": 0.696989694908371, "grad_norm": 0.44146502017974854, "learning_rate": 1.5353402033944195e-05, "loss": 0.08, "step": 13730 }, { "epoch": 0.6972435148992334, "grad_norm": 0.49009469151496887, "learning_rate": 1.5351709900671778e-05, "loss": 0.0832, "step": 13735 }, { "epoch": 0.6974973348900959, "grad_norm": 0.5824193358421326, "learning_rate": 1.5350017767399362e-05, "loss": 0.0844, "step": 13740 }, { "epoch": 0.6977511548809584, "grad_norm": 0.3237861394882202, "learning_rate": 1.5348325634126945e-05, "loss": 0.0803, "step": 13745 }, { "epoch": 0.6980049748718209, "grad_norm": 0.42894473671913147, "learning_rate": 1.534663350085453e-05, "loss": 0.0822, "step": 13750 }, { "epoch": 0.6982587948626834, "grad_norm": 0.43603384494781494, "learning_rate": 1.5344941367582112e-05, "loss": 0.0749, "step": 13755 }, { "epoch": 0.6985126148535459, "grad_norm": 0.3695114850997925, "learning_rate": 1.5343249234309696e-05, "loss": 0.0844, "step": 13760 }, { "epoch": 0.6987664348444084, "grad_norm": 0.43473580479621887, "learning_rate": 1.534155710103728e-05, "loss": 0.0835, "step": 13765 }, { "epoch": 0.6990202548352709, "grad_norm": 1.5757832527160645, "learning_rate": 1.5339864967764863e-05, "loss": 0.0821, "step": 13770 }, { "epoch": 0.6992740748261334, "grad_norm": 0.6827208399772644, "learning_rate": 1.5338172834492447e-05, "loss": 0.0784, "step": 13775 }, { "epoch": 0.6995278948169957, "grad_norm": 0.30773502588272095, "learning_rate": 1.533648070122003e-05, "loss": 0.1152, "step": 13780 }, { "epoch": 0.6997817148078582, "grad_norm": 0.4392739534378052, "learning_rate": 1.5334788567947614e-05, "loss": 0.0784, "step": 13785 }, { "epoch": 0.7000355347987207, "grad_norm": 0.7191630601882935, "learning_rate": 1.5333096434675197e-05, "loss": 0.0789, "step": 13790 }, { "epoch": 0.7002893547895832, "grad_norm": 0.2967609763145447, "learning_rate": 1.533140430140278e-05, "loss": 0.0778, "step": 13795 }, { "epoch": 0.7005431747804457, "grad_norm": 0.9909408688545227, "learning_rate": 1.5329712168130364e-05, "loss": 0.0884, "step": 13800 }, { "epoch": 0.7007969947713082, "grad_norm": 0.35766205191612244, "learning_rate": 1.5328020034857948e-05, "loss": 0.0733, "step": 13805 }, { "epoch": 0.7010508147621707, "grad_norm": 0.44659724831581116, "learning_rate": 1.5326327901585528e-05, "loss": 0.0882, "step": 13810 }, { "epoch": 0.7013046347530332, "grad_norm": 0.34097808599472046, "learning_rate": 1.5324635768313115e-05, "loss": 0.0785, "step": 13815 }, { "epoch": 0.7015584547438957, "grad_norm": 0.34435734152793884, "learning_rate": 1.5322943635040695e-05, "loss": 0.0818, "step": 13820 }, { "epoch": 0.7018122747347582, "grad_norm": 0.4698044955730438, "learning_rate": 1.532125150176828e-05, "loss": 0.0848, "step": 13825 }, { "epoch": 0.7020660947256205, "grad_norm": 0.4470899999141693, "learning_rate": 1.5319559368495866e-05, "loss": 0.0815, "step": 13830 }, { "epoch": 0.702319914716483, "grad_norm": 0.3169173300266266, "learning_rate": 1.5317867235223446e-05, "loss": 0.0738, "step": 13835 }, { "epoch": 0.7025737347073455, "grad_norm": 0.8150441646575928, "learning_rate": 1.5316175101951033e-05, "loss": 0.0763, "step": 13840 }, { "epoch": 0.702827554698208, "grad_norm": 0.35342103242874146, "learning_rate": 1.5314482968678613e-05, "loss": 0.0833, "step": 13845 }, { "epoch": 0.7030813746890705, "grad_norm": 0.771515429019928, "learning_rate": 1.5312790835406196e-05, "loss": 0.0825, "step": 13850 }, { "epoch": 0.703335194679933, "grad_norm": 0.3441005051136017, "learning_rate": 1.5311098702133783e-05, "loss": 0.0842, "step": 13855 }, { "epoch": 0.7035890146707955, "grad_norm": 0.45794785022735596, "learning_rate": 1.5309406568861363e-05, "loss": 0.0833, "step": 13860 }, { "epoch": 0.703842834661658, "grad_norm": 0.41484132409095764, "learning_rate": 1.5307714435588947e-05, "loss": 0.0749, "step": 13865 }, { "epoch": 0.7040966546525205, "grad_norm": 0.38982802629470825, "learning_rate": 1.530602230231653e-05, "loss": 0.0762, "step": 13870 }, { "epoch": 0.704350474643383, "grad_norm": 0.3871098458766937, "learning_rate": 1.5304330169044114e-05, "loss": 0.0876, "step": 13875 }, { "epoch": 0.7046042946342453, "grad_norm": 0.3723105192184448, "learning_rate": 1.53026380357717e-05, "loss": 0.0687, "step": 13880 }, { "epoch": 0.7048581146251078, "grad_norm": 0.4953676462173462, "learning_rate": 1.530094590249928e-05, "loss": 0.0743, "step": 13885 }, { "epoch": 0.7051119346159703, "grad_norm": 0.43342849612236023, "learning_rate": 1.5299253769226865e-05, "loss": 0.0738, "step": 13890 }, { "epoch": 0.7053657546068328, "grad_norm": 0.6391082406044006, "learning_rate": 1.5297561635954448e-05, "loss": 0.0903, "step": 13895 }, { "epoch": 0.7056195745976953, "grad_norm": 0.46714121103286743, "learning_rate": 1.5295869502682032e-05, "loss": 0.0842, "step": 13900 }, { "epoch": 0.7058733945885578, "grad_norm": 0.5677165985107422, "learning_rate": 1.5294177369409615e-05, "loss": 0.0879, "step": 13905 }, { "epoch": 0.7061272145794203, "grad_norm": 0.4069088101387024, "learning_rate": 1.52924852361372e-05, "loss": 0.0744, "step": 13910 }, { "epoch": 0.7063810345702828, "grad_norm": 0.44778573513031006, "learning_rate": 1.5290793102864782e-05, "loss": 0.073, "step": 13915 }, { "epoch": 0.7066348545611453, "grad_norm": 0.40604692697525024, "learning_rate": 1.5289100969592366e-05, "loss": 0.089, "step": 13920 }, { "epoch": 0.7068886745520078, "grad_norm": 0.3721408545970917, "learning_rate": 1.528740883631995e-05, "loss": 0.0789, "step": 13925 }, { "epoch": 0.7071424945428701, "grad_norm": 0.31769973039627075, "learning_rate": 1.5285716703047533e-05, "loss": 0.0816, "step": 13930 }, { "epoch": 0.7073963145337326, "grad_norm": 0.4818114638328552, "learning_rate": 1.5284024569775117e-05, "loss": 0.0868, "step": 13935 }, { "epoch": 0.7076501345245951, "grad_norm": 0.3946104943752289, "learning_rate": 1.52823324365027e-05, "loss": 0.0707, "step": 13940 }, { "epoch": 0.7079039545154576, "grad_norm": 0.6479266285896301, "learning_rate": 1.5280640303230284e-05, "loss": 0.0821, "step": 13945 }, { "epoch": 0.7081577745063201, "grad_norm": 0.32928308844566345, "learning_rate": 1.5278948169957867e-05, "loss": 0.0737, "step": 13950 }, { "epoch": 0.7084115944971826, "grad_norm": 0.39813652634620667, "learning_rate": 1.527725603668545e-05, "loss": 0.0796, "step": 13955 }, { "epoch": 0.7086654144880451, "grad_norm": 0.4560108780860901, "learning_rate": 1.5275563903413034e-05, "loss": 0.0915, "step": 13960 }, { "epoch": 0.7089192344789076, "grad_norm": 1.1247360706329346, "learning_rate": 1.5273871770140618e-05, "loss": 0.0962, "step": 13965 }, { "epoch": 0.7091730544697701, "grad_norm": 0.43578168749809265, "learning_rate": 1.52721796368682e-05, "loss": 0.0834, "step": 13970 }, { "epoch": 0.7094268744606326, "grad_norm": 0.45874667167663574, "learning_rate": 1.5270487503595785e-05, "loss": 0.072, "step": 13975 }, { "epoch": 0.709680694451495, "grad_norm": 0.5207342505455017, "learning_rate": 1.526879537032337e-05, "loss": 0.0886, "step": 13980 }, { "epoch": 0.7099345144423574, "grad_norm": 0.39302659034729004, "learning_rate": 1.5267103237050952e-05, "loss": 0.0836, "step": 13985 }, { "epoch": 0.7101883344332199, "grad_norm": 0.44471731781959534, "learning_rate": 1.5265411103778536e-05, "loss": 0.1, "step": 13990 }, { "epoch": 0.7104421544240824, "grad_norm": 0.386536180973053, "learning_rate": 1.526371897050612e-05, "loss": 0.0816, "step": 13995 }, { "epoch": 0.7106959744149449, "grad_norm": 0.4404861330986023, "learning_rate": 1.52620268372337e-05, "loss": 0.0936, "step": 14000 }, { "epoch": 0.7109497944058074, "grad_norm": 0.7851703763008118, "learning_rate": 1.5260334703961286e-05, "loss": 0.0798, "step": 14005 }, { "epoch": 0.7112036143966699, "grad_norm": 0.6960633993148804, "learning_rate": 1.5258642570688868e-05, "loss": 0.0838, "step": 14010 }, { "epoch": 0.7114574343875324, "grad_norm": 0.47619956731796265, "learning_rate": 1.5256950437416452e-05, "loss": 0.0904, "step": 14015 }, { "epoch": 0.7117112543783949, "grad_norm": 0.6438000202178955, "learning_rate": 1.5255258304144037e-05, "loss": 0.0736, "step": 14020 }, { "epoch": 0.7119650743692574, "grad_norm": 0.4882700741291046, "learning_rate": 1.5253566170871619e-05, "loss": 0.0878, "step": 14025 }, { "epoch": 0.7122188943601198, "grad_norm": 0.36032137274742126, "learning_rate": 1.5251874037599204e-05, "loss": 0.0772, "step": 14030 }, { "epoch": 0.7124727143509823, "grad_norm": 0.5064921379089355, "learning_rate": 1.5250181904326786e-05, "loss": 0.0799, "step": 14035 }, { "epoch": 0.7127265343418447, "grad_norm": 0.586258590221405, "learning_rate": 1.524848977105437e-05, "loss": 0.0856, "step": 14040 }, { "epoch": 0.7129803543327072, "grad_norm": 0.4364579916000366, "learning_rate": 1.5246797637781953e-05, "loss": 0.0797, "step": 14045 }, { "epoch": 0.7132341743235697, "grad_norm": 0.30518513917922974, "learning_rate": 1.5245105504509536e-05, "loss": 0.0821, "step": 14050 }, { "epoch": 0.7134879943144322, "grad_norm": 0.3665226399898529, "learning_rate": 1.5243413371237118e-05, "loss": 0.0899, "step": 14055 }, { "epoch": 0.7137418143052947, "grad_norm": 0.5251713395118713, "learning_rate": 1.5241721237964704e-05, "loss": 0.0837, "step": 14060 }, { "epoch": 0.7139956342961572, "grad_norm": 0.37838253378868103, "learning_rate": 1.5240029104692287e-05, "loss": 0.0744, "step": 14065 }, { "epoch": 0.7142494542870197, "grad_norm": 0.4053058922290802, "learning_rate": 1.5238336971419869e-05, "loss": 0.0696, "step": 14070 }, { "epoch": 0.7145032742778821, "grad_norm": 0.4535035192966461, "learning_rate": 1.5236644838147454e-05, "loss": 0.0718, "step": 14075 }, { "epoch": 0.7147570942687446, "grad_norm": 0.2996232807636261, "learning_rate": 1.5234952704875036e-05, "loss": 0.0823, "step": 14080 }, { "epoch": 0.715010914259607, "grad_norm": 0.40874767303466797, "learning_rate": 1.5233260571602621e-05, "loss": 0.0793, "step": 14085 }, { "epoch": 0.7152647342504695, "grad_norm": 0.34170955419540405, "learning_rate": 1.5231568438330205e-05, "loss": 0.0722, "step": 14090 }, { "epoch": 0.715518554241332, "grad_norm": 0.42445680499076843, "learning_rate": 1.5229876305057787e-05, "loss": 0.0707, "step": 14095 }, { "epoch": 0.7157723742321945, "grad_norm": 0.5553193688392639, "learning_rate": 1.5228184171785372e-05, "loss": 0.0758, "step": 14100 }, { "epoch": 0.716026194223057, "grad_norm": 0.3143168091773987, "learning_rate": 1.5226492038512954e-05, "loss": 0.0619, "step": 14105 }, { "epoch": 0.7162800142139195, "grad_norm": 0.633912980556488, "learning_rate": 1.5224799905240537e-05, "loss": 0.0746, "step": 14110 }, { "epoch": 0.716533834204782, "grad_norm": 0.3377459943294525, "learning_rate": 1.5223107771968123e-05, "loss": 0.0761, "step": 14115 }, { "epoch": 0.7167876541956445, "grad_norm": 0.3944687247276306, "learning_rate": 1.5221415638695704e-05, "loss": 0.0766, "step": 14120 }, { "epoch": 0.7170414741865069, "grad_norm": 0.4088902771472931, "learning_rate": 1.521972350542329e-05, "loss": 0.077, "step": 14125 }, { "epoch": 0.7172952941773694, "grad_norm": 0.4192088842391968, "learning_rate": 1.5218031372150872e-05, "loss": 0.0778, "step": 14130 }, { "epoch": 0.7175491141682319, "grad_norm": 0.36348608136177063, "learning_rate": 1.5216339238878455e-05, "loss": 0.0854, "step": 14135 }, { "epoch": 0.7178029341590944, "grad_norm": 0.3958783447742462, "learning_rate": 1.521464710560604e-05, "loss": 0.0742, "step": 14140 }, { "epoch": 0.7180567541499568, "grad_norm": 0.44732987880706787, "learning_rate": 1.5212954972333622e-05, "loss": 0.076, "step": 14145 }, { "epoch": 0.7183105741408193, "grad_norm": 1.2303112745285034, "learning_rate": 1.5211262839061206e-05, "loss": 0.0784, "step": 14150 }, { "epoch": 0.7185643941316818, "grad_norm": 0.4170047342777252, "learning_rate": 1.520957070578879e-05, "loss": 0.0863, "step": 14155 }, { "epoch": 0.7188182141225443, "grad_norm": 0.3879324793815613, "learning_rate": 1.5207878572516373e-05, "loss": 0.0756, "step": 14160 }, { "epoch": 0.7190720341134068, "grad_norm": 0.516608476638794, "learning_rate": 1.5206186439243955e-05, "loss": 0.0821, "step": 14165 }, { "epoch": 0.7193258541042693, "grad_norm": 0.4164332151412964, "learning_rate": 1.520449430597154e-05, "loss": 0.0678, "step": 14170 }, { "epoch": 0.7195796740951317, "grad_norm": 0.4437199532985687, "learning_rate": 1.5202802172699123e-05, "loss": 0.0825, "step": 14175 }, { "epoch": 0.7198334940859942, "grad_norm": 0.49337777495384216, "learning_rate": 1.5201110039426707e-05, "loss": 0.106, "step": 14180 }, { "epoch": 0.7200873140768567, "grad_norm": 0.43861648440361023, "learning_rate": 1.519941790615429e-05, "loss": 0.0871, "step": 14185 }, { "epoch": 0.7203411340677192, "grad_norm": 194.89373779296875, "learning_rate": 1.5197725772881872e-05, "loss": 0.0967, "step": 14190 }, { "epoch": 0.7205949540585816, "grad_norm": 0.3648269474506378, "learning_rate": 1.5196033639609458e-05, "loss": 0.0855, "step": 14195 }, { "epoch": 0.7208487740494441, "grad_norm": 0.4006633758544922, "learning_rate": 1.5194341506337041e-05, "loss": 0.0798, "step": 14200 }, { "epoch": 0.7211025940403066, "grad_norm": 0.4306533932685852, "learning_rate": 1.5192649373064623e-05, "loss": 0.0915, "step": 14205 }, { "epoch": 0.7213564140311691, "grad_norm": 0.8204346895217896, "learning_rate": 1.5190957239792208e-05, "loss": 0.0843, "step": 14210 }, { "epoch": 0.7216102340220316, "grad_norm": 0.40292853116989136, "learning_rate": 1.518926510651979e-05, "loss": 0.0846, "step": 14215 }, { "epoch": 0.7218640540128941, "grad_norm": 0.4678119122982025, "learning_rate": 1.5187572973247374e-05, "loss": 0.0818, "step": 14220 }, { "epoch": 0.7221178740037565, "grad_norm": 0.5010687708854675, "learning_rate": 1.5185880839974957e-05, "loss": 0.081, "step": 14225 }, { "epoch": 0.722371693994619, "grad_norm": 1.1079010963439941, "learning_rate": 1.518418870670254e-05, "loss": 0.0954, "step": 14230 }, { "epoch": 0.7226255139854815, "grad_norm": 0.32439419627189636, "learning_rate": 1.5182496573430126e-05, "loss": 0.0801, "step": 14235 }, { "epoch": 0.722879333976344, "grad_norm": 0.4611896276473999, "learning_rate": 1.5180804440157708e-05, "loss": 0.07, "step": 14240 }, { "epoch": 0.7231331539672065, "grad_norm": 0.45286303758621216, "learning_rate": 1.5179112306885291e-05, "loss": 0.0791, "step": 14245 }, { "epoch": 0.723386973958069, "grad_norm": 0.6157929301261902, "learning_rate": 1.5177420173612875e-05, "loss": 0.0787, "step": 14250 }, { "epoch": 0.7236407939489314, "grad_norm": 0.4677484333515167, "learning_rate": 1.5175728040340458e-05, "loss": 0.078, "step": 14255 }, { "epoch": 0.7238946139397939, "grad_norm": 0.4395294487476349, "learning_rate": 1.517403590706804e-05, "loss": 0.0879, "step": 14260 }, { "epoch": 0.7241484339306564, "grad_norm": 0.48061785101890564, "learning_rate": 1.5172343773795626e-05, "loss": 0.0812, "step": 14265 }, { "epoch": 0.7244022539215189, "grad_norm": 0.378859281539917, "learning_rate": 1.5170651640523209e-05, "loss": 0.0837, "step": 14270 }, { "epoch": 0.7246560739123813, "grad_norm": 0.40391427278518677, "learning_rate": 1.5168959507250793e-05, "loss": 0.0909, "step": 14275 }, { "epoch": 0.7249098939032438, "grad_norm": 0.3788898289203644, "learning_rate": 1.5167267373978376e-05, "loss": 0.0832, "step": 14280 }, { "epoch": 0.7251637138941063, "grad_norm": 0.2744353413581848, "learning_rate": 1.5165575240705958e-05, "loss": 0.0712, "step": 14285 }, { "epoch": 0.7254175338849688, "grad_norm": 0.49795615673065186, "learning_rate": 1.5163883107433543e-05, "loss": 0.0686, "step": 14290 }, { "epoch": 0.7256713538758313, "grad_norm": 1.3748142719268799, "learning_rate": 1.5162190974161127e-05, "loss": 0.0906, "step": 14295 }, { "epoch": 0.7259251738666938, "grad_norm": 0.409864604473114, "learning_rate": 1.5160498840888709e-05, "loss": 0.082, "step": 14300 }, { "epoch": 0.7261789938575562, "grad_norm": 0.4577494263648987, "learning_rate": 1.5158806707616294e-05, "loss": 0.0847, "step": 14305 }, { "epoch": 0.7264328138484187, "grad_norm": 0.3561350703239441, "learning_rate": 1.5157114574343876e-05, "loss": 0.0878, "step": 14310 }, { "epoch": 0.7266866338392812, "grad_norm": 0.3978186547756195, "learning_rate": 1.515542244107146e-05, "loss": 0.0881, "step": 14315 }, { "epoch": 0.7269404538301436, "grad_norm": 0.2696934938430786, "learning_rate": 1.5153730307799045e-05, "loss": 0.0994, "step": 14320 }, { "epoch": 0.7271942738210061, "grad_norm": 0.47918376326560974, "learning_rate": 1.5152038174526626e-05, "loss": 0.0808, "step": 14325 }, { "epoch": 0.7274480938118686, "grad_norm": 0.39144304394721985, "learning_rate": 1.5150346041254212e-05, "loss": 0.0863, "step": 14330 }, { "epoch": 0.7277019138027311, "grad_norm": 0.39072200655937195, "learning_rate": 1.5148653907981793e-05, "loss": 0.0732, "step": 14335 }, { "epoch": 0.7279557337935936, "grad_norm": 0.3815653324127197, "learning_rate": 1.5146961774709377e-05, "loss": 0.0839, "step": 14340 }, { "epoch": 0.7282095537844561, "grad_norm": 0.4559372067451477, "learning_rate": 1.5145269641436962e-05, "loss": 0.0841, "step": 14345 }, { "epoch": 0.7284633737753186, "grad_norm": 0.42653796076774597, "learning_rate": 1.5143577508164544e-05, "loss": 0.0842, "step": 14350 }, { "epoch": 0.728717193766181, "grad_norm": 0.4985381066799164, "learning_rate": 1.5141885374892128e-05, "loss": 0.0883, "step": 14355 }, { "epoch": 0.7289710137570435, "grad_norm": 0.5497139692306519, "learning_rate": 1.5140193241619711e-05, "loss": 0.0819, "step": 14360 }, { "epoch": 0.729224833747906, "grad_norm": 0.595309853553772, "learning_rate": 1.5138501108347295e-05, "loss": 0.0848, "step": 14365 }, { "epoch": 0.7294786537387684, "grad_norm": 0.47207218408584595, "learning_rate": 1.513680897507488e-05, "loss": 0.0805, "step": 14370 }, { "epoch": 0.7297324737296309, "grad_norm": 0.38911616802215576, "learning_rate": 1.5135116841802462e-05, "loss": 0.0855, "step": 14375 }, { "epoch": 0.7299862937204934, "grad_norm": 0.6578527092933655, "learning_rate": 1.5133424708530045e-05, "loss": 0.0821, "step": 14380 }, { "epoch": 0.7302401137113559, "grad_norm": 0.3873518705368042, "learning_rate": 1.5131732575257629e-05, "loss": 0.0787, "step": 14385 }, { "epoch": 0.7304939337022184, "grad_norm": 0.3727337718009949, "learning_rate": 1.5130040441985212e-05, "loss": 0.0711, "step": 14390 }, { "epoch": 0.7307477536930809, "grad_norm": 0.3558390140533447, "learning_rate": 1.5128348308712794e-05, "loss": 0.0633, "step": 14395 }, { "epoch": 0.7310015736839434, "grad_norm": 0.44894295930862427, "learning_rate": 1.512665617544038e-05, "loss": 0.0775, "step": 14400 }, { "epoch": 0.7312553936748059, "grad_norm": 0.44867026805877686, "learning_rate": 1.5124964042167961e-05, "loss": 0.0862, "step": 14405 }, { "epoch": 0.7315092136656683, "grad_norm": 0.3655999004840851, "learning_rate": 1.5123271908895545e-05, "loss": 0.0851, "step": 14410 }, { "epoch": 0.7317630336565308, "grad_norm": 0.4013903737068176, "learning_rate": 1.512157977562313e-05, "loss": 0.0861, "step": 14415 }, { "epoch": 0.7320168536473932, "grad_norm": 0.3827629089355469, "learning_rate": 1.5119887642350712e-05, "loss": 0.0884, "step": 14420 }, { "epoch": 0.7322706736382557, "grad_norm": 0.48554348945617676, "learning_rate": 1.5118195509078297e-05, "loss": 0.0761, "step": 14425 }, { "epoch": 0.7325244936291182, "grad_norm": 0.3680451214313507, "learning_rate": 1.5116503375805879e-05, "loss": 0.0731, "step": 14430 }, { "epoch": 0.7327783136199807, "grad_norm": 0.47662273049354553, "learning_rate": 1.5114811242533463e-05, "loss": 0.0814, "step": 14435 }, { "epoch": 0.7330321336108432, "grad_norm": 0.4592245817184448, "learning_rate": 1.5113119109261048e-05, "loss": 0.0804, "step": 14440 }, { "epoch": 0.7332859536017057, "grad_norm": 0.4146338701248169, "learning_rate": 1.511142697598863e-05, "loss": 0.0902, "step": 14445 }, { "epoch": 0.7335397735925682, "grad_norm": 0.4722953736782074, "learning_rate": 1.5109734842716213e-05, "loss": 0.0863, "step": 14450 }, { "epoch": 0.7337935935834307, "grad_norm": 0.7869719862937927, "learning_rate": 1.5108042709443797e-05, "loss": 0.0794, "step": 14455 }, { "epoch": 0.7340474135742932, "grad_norm": 0.4236357808113098, "learning_rate": 1.510635057617138e-05, "loss": 0.0723, "step": 14460 }, { "epoch": 0.7343012335651556, "grad_norm": 0.38017693161964417, "learning_rate": 1.5104658442898962e-05, "loss": 0.0717, "step": 14465 }, { "epoch": 0.734555053556018, "grad_norm": 0.3802855908870697, "learning_rate": 1.5102966309626547e-05, "loss": 0.0795, "step": 14470 }, { "epoch": 0.7348088735468805, "grad_norm": 0.506759762763977, "learning_rate": 1.5101274176354131e-05, "loss": 0.0783, "step": 14475 }, { "epoch": 0.735062693537743, "grad_norm": 0.6665279269218445, "learning_rate": 1.5099582043081715e-05, "loss": 0.0778, "step": 14480 }, { "epoch": 0.7353165135286055, "grad_norm": 0.5124176144599915, "learning_rate": 1.5097889909809298e-05, "loss": 0.0895, "step": 14485 }, { "epoch": 0.735570333519468, "grad_norm": 0.3736747205257416, "learning_rate": 1.509619777653688e-05, "loss": 0.0754, "step": 14490 }, { "epoch": 0.7358241535103305, "grad_norm": 0.5663830637931824, "learning_rate": 1.5094505643264465e-05, "loss": 0.0744, "step": 14495 }, { "epoch": 0.736077973501193, "grad_norm": 0.45994728803634644, "learning_rate": 1.5092813509992049e-05, "loss": 0.0805, "step": 14500 }, { "epoch": 0.7363317934920555, "grad_norm": 0.4209723472595215, "learning_rate": 1.509112137671963e-05, "loss": 0.07, "step": 14505 }, { "epoch": 0.736585613482918, "grad_norm": 0.3272269666194916, "learning_rate": 1.5089429243447216e-05, "loss": 0.0657, "step": 14510 }, { "epoch": 0.7368394334737804, "grad_norm": 0.4161807894706726, "learning_rate": 1.5087737110174798e-05, "loss": 0.0726, "step": 14515 }, { "epoch": 0.7370932534646428, "grad_norm": 0.43482705950737, "learning_rate": 1.5086044976902383e-05, "loss": 0.0788, "step": 14520 }, { "epoch": 0.7373470734555053, "grad_norm": 0.5974509716033936, "learning_rate": 1.5084352843629966e-05, "loss": 0.0749, "step": 14525 }, { "epoch": 0.7376008934463678, "grad_norm": 0.5275264978408813, "learning_rate": 1.5082660710357548e-05, "loss": 0.0762, "step": 14530 }, { "epoch": 0.7378547134372303, "grad_norm": 0.4393943250179291, "learning_rate": 1.5080968577085134e-05, "loss": 0.0831, "step": 14535 }, { "epoch": 0.7381085334280928, "grad_norm": 0.3507572114467621, "learning_rate": 1.5079276443812715e-05, "loss": 0.0859, "step": 14540 }, { "epoch": 0.7383623534189553, "grad_norm": 0.3876013457775116, "learning_rate": 1.5077584310540299e-05, "loss": 0.0779, "step": 14545 }, { "epoch": 0.7386161734098178, "grad_norm": 0.37205737829208374, "learning_rate": 1.5075892177267884e-05, "loss": 0.0737, "step": 14550 }, { "epoch": 0.7388699934006803, "grad_norm": 0.4882403314113617, "learning_rate": 1.5074200043995466e-05, "loss": 0.0878, "step": 14555 }, { "epoch": 0.7391238133915428, "grad_norm": 0.5520374774932861, "learning_rate": 1.507250791072305e-05, "loss": 0.0966, "step": 14560 }, { "epoch": 0.7393776333824053, "grad_norm": 0.367116779088974, "learning_rate": 1.5070815777450633e-05, "loss": 0.0819, "step": 14565 }, { "epoch": 0.7396314533732676, "grad_norm": 0.4586057960987091, "learning_rate": 1.5069123644178217e-05, "loss": 0.0776, "step": 14570 }, { "epoch": 0.7398852733641301, "grad_norm": 0.4211220145225525, "learning_rate": 1.5067431510905802e-05, "loss": 0.0726, "step": 14575 }, { "epoch": 0.7401390933549926, "grad_norm": 0.4111827611923218, "learning_rate": 1.5065739377633384e-05, "loss": 0.0803, "step": 14580 }, { "epoch": 0.7403929133458551, "grad_norm": 0.49343472719192505, "learning_rate": 1.5064047244360966e-05, "loss": 0.0835, "step": 14585 }, { "epoch": 0.7406467333367176, "grad_norm": 0.3709668815135956, "learning_rate": 1.506235511108855e-05, "loss": 0.0845, "step": 14590 }, { "epoch": 0.7409005533275801, "grad_norm": 0.45862144231796265, "learning_rate": 1.5060662977816134e-05, "loss": 0.0886, "step": 14595 }, { "epoch": 0.7411543733184426, "grad_norm": 0.4751611351966858, "learning_rate": 1.5058970844543716e-05, "loss": 0.0785, "step": 14600 }, { "epoch": 0.7414081933093051, "grad_norm": 0.302118718624115, "learning_rate": 1.5057278711271301e-05, "loss": 0.0766, "step": 14605 }, { "epoch": 0.7416620133001676, "grad_norm": 0.5119585990905762, "learning_rate": 1.5055586577998883e-05, "loss": 0.0839, "step": 14610 }, { "epoch": 0.74191583329103, "grad_norm": 0.3752945363521576, "learning_rate": 1.5053894444726469e-05, "loss": 0.076, "step": 14615 }, { "epoch": 0.7421696532818924, "grad_norm": 0.5176377296447754, "learning_rate": 1.5052202311454052e-05, "loss": 0.0817, "step": 14620 }, { "epoch": 0.7424234732727549, "grad_norm": 0.44988998770713806, "learning_rate": 1.5050510178181634e-05, "loss": 0.0824, "step": 14625 }, { "epoch": 0.7426772932636174, "grad_norm": 0.3214319348335266, "learning_rate": 1.504881804490922e-05, "loss": 0.0744, "step": 14630 }, { "epoch": 0.7429311132544799, "grad_norm": 0.5503107905387878, "learning_rate": 1.5047125911636801e-05, "loss": 0.0731, "step": 14635 }, { "epoch": 0.7431849332453424, "grad_norm": 1.3438857793807983, "learning_rate": 1.5045433778364385e-05, "loss": 0.0832, "step": 14640 }, { "epoch": 0.7434387532362049, "grad_norm": 0.447172075510025, "learning_rate": 1.504374164509197e-05, "loss": 0.072, "step": 14645 }, { "epoch": 0.7436925732270674, "grad_norm": 0.39440739154815674, "learning_rate": 1.5042049511819552e-05, "loss": 0.0921, "step": 14650 }, { "epoch": 0.7439463932179299, "grad_norm": 0.39479300379753113, "learning_rate": 1.5040357378547135e-05, "loss": 0.0791, "step": 14655 }, { "epoch": 0.7442002132087924, "grad_norm": 0.5535449385643005, "learning_rate": 1.5038665245274719e-05, "loss": 0.0776, "step": 14660 }, { "epoch": 0.7444540331996548, "grad_norm": 0.3662354648113251, "learning_rate": 1.5036973112002302e-05, "loss": 0.0746, "step": 14665 }, { "epoch": 0.7447078531905172, "grad_norm": 0.34598585963249207, "learning_rate": 1.5035280978729888e-05, "loss": 0.0781, "step": 14670 }, { "epoch": 0.7449616731813797, "grad_norm": 0.43272921442985535, "learning_rate": 1.503358884545747e-05, "loss": 0.0801, "step": 14675 }, { "epoch": 0.7452154931722422, "grad_norm": 0.3881741464138031, "learning_rate": 1.5031896712185053e-05, "loss": 0.0823, "step": 14680 }, { "epoch": 0.7454693131631047, "grad_norm": 0.40420830249786377, "learning_rate": 1.5030204578912636e-05, "loss": 0.0881, "step": 14685 }, { "epoch": 0.7457231331539672, "grad_norm": 0.6293859481811523, "learning_rate": 1.502851244564022e-05, "loss": 0.0792, "step": 14690 }, { "epoch": 0.7459769531448297, "grad_norm": 0.6415058970451355, "learning_rate": 1.5026820312367802e-05, "loss": 0.0787, "step": 14695 }, { "epoch": 0.7462307731356922, "grad_norm": 0.9900127053260803, "learning_rate": 1.5025128179095387e-05, "loss": 0.08, "step": 14700 }, { "epoch": 0.7464845931265547, "grad_norm": 0.39871150255203247, "learning_rate": 1.502343604582297e-05, "loss": 0.0727, "step": 14705 }, { "epoch": 0.7467384131174172, "grad_norm": 0.46820712089538574, "learning_rate": 1.5021743912550553e-05, "loss": 0.0884, "step": 14710 }, { "epoch": 0.7469922331082796, "grad_norm": 0.8658115267753601, "learning_rate": 1.5020051779278138e-05, "loss": 0.0752, "step": 14715 }, { "epoch": 0.747246053099142, "grad_norm": 0.5432624220848083, "learning_rate": 1.501835964600572e-05, "loss": 0.072, "step": 14720 }, { "epoch": 0.7474998730900045, "grad_norm": 0.3773350119590759, "learning_rate": 1.5016667512733305e-05, "loss": 0.07, "step": 14725 }, { "epoch": 0.747753693080867, "grad_norm": 0.4341411292552948, "learning_rate": 1.5014975379460888e-05, "loss": 0.0742, "step": 14730 }, { "epoch": 0.7480075130717295, "grad_norm": 0.5234106183052063, "learning_rate": 1.501328324618847e-05, "loss": 0.0716, "step": 14735 }, { "epoch": 0.748261333062592, "grad_norm": 0.7145912051200867, "learning_rate": 1.5011591112916055e-05, "loss": 0.0841, "step": 14740 }, { "epoch": 0.7485151530534545, "grad_norm": 0.42830368876457214, "learning_rate": 1.5009898979643637e-05, "loss": 0.075, "step": 14745 }, { "epoch": 0.748768973044317, "grad_norm": 0.4660949409008026, "learning_rate": 1.5008206846371221e-05, "loss": 0.0733, "step": 14750 }, { "epoch": 0.7490227930351795, "grad_norm": 0.34171634912490845, "learning_rate": 1.5006514713098806e-05, "loss": 0.0887, "step": 14755 }, { "epoch": 0.749276613026042, "grad_norm": 0.8460459113121033, "learning_rate": 1.5004822579826388e-05, "loss": 0.0861, "step": 14760 }, { "epoch": 0.7495304330169044, "grad_norm": 0.5516984462738037, "learning_rate": 1.5003130446553973e-05, "loss": 0.081, "step": 14765 }, { "epoch": 0.7497842530077669, "grad_norm": 0.4025214910507202, "learning_rate": 1.5001438313281555e-05, "loss": 0.0787, "step": 14770 }, { "epoch": 0.7500380729986293, "grad_norm": 0.44463062286376953, "learning_rate": 1.4999746180009139e-05, "loss": 0.0833, "step": 14775 }, { "epoch": 0.7502918929894918, "grad_norm": 0.5166744589805603, "learning_rate": 1.4998054046736724e-05, "loss": 0.084, "step": 14780 }, { "epoch": 0.7505457129803543, "grad_norm": 0.5265631079673767, "learning_rate": 1.4996361913464306e-05, "loss": 0.0787, "step": 14785 }, { "epoch": 0.7507995329712168, "grad_norm": 0.45194345712661743, "learning_rate": 1.4994669780191888e-05, "loss": 0.0765, "step": 14790 }, { "epoch": 0.7510533529620793, "grad_norm": 0.4983544945716858, "learning_rate": 1.4992977646919473e-05, "loss": 0.0803, "step": 14795 }, { "epoch": 0.7513071729529418, "grad_norm": 0.4568130075931549, "learning_rate": 1.4991285513647056e-05, "loss": 0.0702, "step": 14800 }, { "epoch": 0.7515609929438043, "grad_norm": 0.2652099132537842, "learning_rate": 1.4989593380374638e-05, "loss": 0.0802, "step": 14805 }, { "epoch": 0.7518148129346668, "grad_norm": 0.40893828868865967, "learning_rate": 1.4987901247102223e-05, "loss": 0.0747, "step": 14810 }, { "epoch": 0.7520686329255292, "grad_norm": 0.8311263918876648, "learning_rate": 1.4986209113829805e-05, "loss": 0.0787, "step": 14815 }, { "epoch": 0.7523224529163917, "grad_norm": 0.3804740905761719, "learning_rate": 1.498451698055739e-05, "loss": 0.0737, "step": 14820 }, { "epoch": 0.7525762729072542, "grad_norm": 0.3137752115726471, "learning_rate": 1.4982824847284974e-05, "loss": 0.0749, "step": 14825 }, { "epoch": 0.7528300928981166, "grad_norm": 0.3906417191028595, "learning_rate": 1.4981132714012556e-05, "loss": 0.0833, "step": 14830 }, { "epoch": 0.7530839128889791, "grad_norm": 0.6462694406509399, "learning_rate": 1.4979440580740141e-05, "loss": 0.0784, "step": 14835 }, { "epoch": 0.7533377328798416, "grad_norm": 0.5136402249336243, "learning_rate": 1.4977748447467723e-05, "loss": 0.0754, "step": 14840 }, { "epoch": 0.7535915528707041, "grad_norm": 0.5540376305580139, "learning_rate": 1.4976056314195307e-05, "loss": 0.0796, "step": 14845 }, { "epoch": 0.7538453728615666, "grad_norm": 0.4849870800971985, "learning_rate": 1.4974364180922892e-05, "loss": 0.0757, "step": 14850 }, { "epoch": 0.7540991928524291, "grad_norm": 0.739065408706665, "learning_rate": 1.4972672047650474e-05, "loss": 0.0847, "step": 14855 }, { "epoch": 0.7543530128432916, "grad_norm": 0.5149842500686646, "learning_rate": 1.4970979914378057e-05, "loss": 0.0752, "step": 14860 }, { "epoch": 0.754606832834154, "grad_norm": 0.42165669798851013, "learning_rate": 1.496928778110564e-05, "loss": 0.0831, "step": 14865 }, { "epoch": 0.7548606528250165, "grad_norm": 0.49979931116104126, "learning_rate": 1.4967595647833224e-05, "loss": 0.0733, "step": 14870 }, { "epoch": 0.755114472815879, "grad_norm": 0.49712690711021423, "learning_rate": 1.496590351456081e-05, "loss": 0.0803, "step": 14875 }, { "epoch": 0.7553682928067414, "grad_norm": 0.4191552400588989, "learning_rate": 1.4964211381288391e-05, "loss": 0.0834, "step": 14880 }, { "epoch": 0.7556221127976039, "grad_norm": 0.3019554316997528, "learning_rate": 1.4962519248015975e-05, "loss": 0.0778, "step": 14885 }, { "epoch": 0.7558759327884664, "grad_norm": 0.5018486380577087, "learning_rate": 1.4960827114743558e-05, "loss": 0.0766, "step": 14890 }, { "epoch": 0.7561297527793289, "grad_norm": 0.33077868819236755, "learning_rate": 1.4959134981471142e-05, "loss": 0.0752, "step": 14895 }, { "epoch": 0.7563835727701914, "grad_norm": 0.54078209400177, "learning_rate": 1.4957442848198724e-05, "loss": 0.0796, "step": 14900 }, { "epoch": 0.7566373927610539, "grad_norm": 0.4296625554561615, "learning_rate": 1.4955750714926309e-05, "loss": 0.0742, "step": 14905 }, { "epoch": 0.7568912127519163, "grad_norm": 0.33885428309440613, "learning_rate": 1.4954058581653893e-05, "loss": 0.0722, "step": 14910 }, { "epoch": 0.7571450327427788, "grad_norm": 0.4666841924190521, "learning_rate": 1.4952366448381476e-05, "loss": 0.0831, "step": 14915 }, { "epoch": 0.7573988527336413, "grad_norm": 0.45907166600227356, "learning_rate": 1.495067431510906e-05, "loss": 0.0714, "step": 14920 }, { "epoch": 0.7576526727245038, "grad_norm": 0.32929760217666626, "learning_rate": 1.4948982181836642e-05, "loss": 0.0708, "step": 14925 }, { "epoch": 0.7579064927153663, "grad_norm": 0.4375610649585724, "learning_rate": 1.4947290048564227e-05, "loss": 0.0753, "step": 14930 }, { "epoch": 0.7581603127062287, "grad_norm": 0.32175201177597046, "learning_rate": 1.494559791529181e-05, "loss": 0.0816, "step": 14935 }, { "epoch": 0.7584141326970912, "grad_norm": 0.31863266229629517, "learning_rate": 1.4943905782019392e-05, "loss": 0.0768, "step": 14940 }, { "epoch": 0.7586679526879537, "grad_norm": 0.5156002640724182, "learning_rate": 1.4942213648746977e-05, "loss": 0.0784, "step": 14945 }, { "epoch": 0.7589217726788162, "grad_norm": 0.3342384696006775, "learning_rate": 1.494052151547456e-05, "loss": 0.0766, "step": 14950 }, { "epoch": 0.7591755926696787, "grad_norm": 0.3848695456981659, "learning_rate": 1.4938829382202143e-05, "loss": 0.0882, "step": 14955 }, { "epoch": 0.7594294126605411, "grad_norm": 0.32741138339042664, "learning_rate": 1.4937137248929728e-05, "loss": 0.0804, "step": 14960 }, { "epoch": 0.7596832326514036, "grad_norm": 0.4225481152534485, "learning_rate": 1.493544511565731e-05, "loss": 0.0775, "step": 14965 }, { "epoch": 0.7599370526422661, "grad_norm": 0.5659987926483154, "learning_rate": 1.4933752982384895e-05, "loss": 0.09, "step": 14970 }, { "epoch": 0.7601908726331286, "grad_norm": 0.46714866161346436, "learning_rate": 1.4932060849112477e-05, "loss": 0.076, "step": 14975 }, { "epoch": 0.7604446926239911, "grad_norm": 0.3290693759918213, "learning_rate": 1.493036871584006e-05, "loss": 0.0818, "step": 14980 }, { "epoch": 0.7606985126148536, "grad_norm": 0.43824639916419983, "learning_rate": 1.4928676582567646e-05, "loss": 0.0692, "step": 14985 }, { "epoch": 0.760952332605716, "grad_norm": 0.34384244680404663, "learning_rate": 1.4926984449295228e-05, "loss": 0.0865, "step": 14990 }, { "epoch": 0.7612061525965785, "grad_norm": 0.45859983563423157, "learning_rate": 1.492529231602281e-05, "loss": 0.0699, "step": 14995 }, { "epoch": 0.761459972587441, "grad_norm": 0.4020313024520874, "learning_rate": 1.4923600182750395e-05, "loss": 0.079, "step": 15000 }, { "epoch": 0.7617137925783035, "grad_norm": 0.41197675466537476, "learning_rate": 1.4921908049477978e-05, "loss": 0.0748, "step": 15005 }, { "epoch": 0.7619676125691659, "grad_norm": 0.5557354092597961, "learning_rate": 1.4920215916205563e-05, "loss": 0.084, "step": 15010 }, { "epoch": 0.7622214325600284, "grad_norm": 0.39988163113594055, "learning_rate": 1.4918523782933145e-05, "loss": 0.0835, "step": 15015 }, { "epoch": 0.7624752525508909, "grad_norm": 0.3414739966392517, "learning_rate": 1.4916831649660727e-05, "loss": 0.0888, "step": 15020 }, { "epoch": 0.7627290725417534, "grad_norm": 0.37848934531211853, "learning_rate": 1.4915139516388312e-05, "loss": 0.0784, "step": 15025 }, { "epoch": 0.7629828925326159, "grad_norm": 0.4667239189147949, "learning_rate": 1.4913447383115896e-05, "loss": 0.08, "step": 15030 }, { "epoch": 0.7632367125234784, "grad_norm": 0.42531412839889526, "learning_rate": 1.4911755249843478e-05, "loss": 0.0815, "step": 15035 }, { "epoch": 0.7634905325143408, "grad_norm": 0.4677846133708954, "learning_rate": 1.4910063116571063e-05, "loss": 0.0779, "step": 15040 }, { "epoch": 0.7637443525052033, "grad_norm": 0.3771125078201294, "learning_rate": 1.4908370983298645e-05, "loss": 0.0764, "step": 15045 }, { "epoch": 0.7639981724960658, "grad_norm": 0.4332607388496399, "learning_rate": 1.4906678850026228e-05, "loss": 0.0751, "step": 15050 }, { "epoch": 0.7642519924869283, "grad_norm": 0.47496718168258667, "learning_rate": 1.4904986716753814e-05, "loss": 0.0791, "step": 15055 }, { "epoch": 0.7645058124777907, "grad_norm": 0.3843788504600525, "learning_rate": 1.4903294583481396e-05, "loss": 0.0725, "step": 15060 }, { "epoch": 0.7647596324686532, "grad_norm": 0.44778764247894287, "learning_rate": 1.490160245020898e-05, "loss": 0.0842, "step": 15065 }, { "epoch": 0.7650134524595157, "grad_norm": 0.4595566391944885, "learning_rate": 1.4899910316936563e-05, "loss": 0.086, "step": 15070 }, { "epoch": 0.7652672724503782, "grad_norm": 0.4201864004135132, "learning_rate": 1.4898218183664146e-05, "loss": 0.0818, "step": 15075 }, { "epoch": 0.7655210924412407, "grad_norm": 0.35480204224586487, "learning_rate": 1.4896526050391731e-05, "loss": 0.0768, "step": 15080 }, { "epoch": 0.7657749124321032, "grad_norm": 0.34363895654678345, "learning_rate": 1.4894833917119313e-05, "loss": 0.0735, "step": 15085 }, { "epoch": 0.7660287324229657, "grad_norm": 0.39018622040748596, "learning_rate": 1.4893141783846897e-05, "loss": 0.0808, "step": 15090 }, { "epoch": 0.7662825524138281, "grad_norm": 0.40973344445228577, "learning_rate": 1.489144965057448e-05, "loss": 0.0845, "step": 15095 }, { "epoch": 0.7665363724046906, "grad_norm": 1.6443856954574585, "learning_rate": 1.4889757517302064e-05, "loss": 0.0759, "step": 15100 }, { "epoch": 0.7667901923955531, "grad_norm": 0.5674831867218018, "learning_rate": 1.4888065384029646e-05, "loss": 0.0916, "step": 15105 }, { "epoch": 0.7670440123864155, "grad_norm": 0.6491461992263794, "learning_rate": 1.4886373250757231e-05, "loss": 0.0727, "step": 15110 }, { "epoch": 0.767297832377278, "grad_norm": 0.43828409910202026, "learning_rate": 1.4884681117484815e-05, "loss": 0.069, "step": 15115 }, { "epoch": 0.7675516523681405, "grad_norm": 0.3990132212638855, "learning_rate": 1.4882988984212398e-05, "loss": 0.0752, "step": 15120 }, { "epoch": 0.767805472359003, "grad_norm": 0.6415324211120605, "learning_rate": 1.4881296850939982e-05, "loss": 0.0887, "step": 15125 }, { "epoch": 0.7680592923498655, "grad_norm": 0.415190190076828, "learning_rate": 1.4879604717667563e-05, "loss": 0.0854, "step": 15130 }, { "epoch": 0.768313112340728, "grad_norm": 0.43115556240081787, "learning_rate": 1.4877912584395149e-05, "loss": 0.0841, "step": 15135 }, { "epoch": 0.7685669323315905, "grad_norm": 0.4042651653289795, "learning_rate": 1.4876220451122732e-05, "loss": 0.0774, "step": 15140 }, { "epoch": 0.768820752322453, "grad_norm": 0.4800015985965729, "learning_rate": 1.4874528317850314e-05, "loss": 0.0781, "step": 15145 }, { "epoch": 0.7690745723133154, "grad_norm": 0.40148481726646423, "learning_rate": 1.48728361845779e-05, "loss": 0.0808, "step": 15150 }, { "epoch": 0.7693283923041778, "grad_norm": 0.44380658864974976, "learning_rate": 1.4871144051305481e-05, "loss": 0.0862, "step": 15155 }, { "epoch": 0.7695822122950403, "grad_norm": 0.41752004623413086, "learning_rate": 1.4869451918033066e-05, "loss": 0.0728, "step": 15160 }, { "epoch": 0.7698360322859028, "grad_norm": 0.3876765966415405, "learning_rate": 1.486775978476065e-05, "loss": 0.0759, "step": 15165 }, { "epoch": 0.7700898522767653, "grad_norm": 0.3961372971534729, "learning_rate": 1.4866067651488232e-05, "loss": 0.0676, "step": 15170 }, { "epoch": 0.7703436722676278, "grad_norm": 0.5213141441345215, "learning_rate": 1.4864375518215817e-05, "loss": 0.085, "step": 15175 }, { "epoch": 0.7705974922584903, "grad_norm": 0.4002414643764496, "learning_rate": 1.4862683384943399e-05, "loss": 0.0855, "step": 15180 }, { "epoch": 0.7708513122493528, "grad_norm": 0.26624107360839844, "learning_rate": 1.4860991251670982e-05, "loss": 0.0856, "step": 15185 }, { "epoch": 0.7711051322402153, "grad_norm": 0.4855886697769165, "learning_rate": 1.4859299118398568e-05, "loss": 0.0772, "step": 15190 }, { "epoch": 0.7713589522310778, "grad_norm": 0.5074960589408875, "learning_rate": 1.485760698512615e-05, "loss": 0.0737, "step": 15195 }, { "epoch": 0.7716127722219402, "grad_norm": 0.5205410122871399, "learning_rate": 1.4855914851853731e-05, "loss": 0.077, "step": 15200 }, { "epoch": 0.7718665922128026, "grad_norm": 0.4280433654785156, "learning_rate": 1.4854222718581317e-05, "loss": 0.0697, "step": 15205 }, { "epoch": 0.7721204122036651, "grad_norm": 0.3168952465057373, "learning_rate": 1.48525305853089e-05, "loss": 0.0795, "step": 15210 }, { "epoch": 0.7723742321945276, "grad_norm": 0.39712512493133545, "learning_rate": 1.4850838452036484e-05, "loss": 0.0829, "step": 15215 }, { "epoch": 0.7726280521853901, "grad_norm": 0.5094771385192871, "learning_rate": 1.4849146318764067e-05, "loss": 0.0805, "step": 15220 }, { "epoch": 0.7728818721762526, "grad_norm": 0.2554795444011688, "learning_rate": 1.4847454185491649e-05, "loss": 0.0655, "step": 15225 }, { "epoch": 0.7731356921671151, "grad_norm": 0.46590444445610046, "learning_rate": 1.4845762052219234e-05, "loss": 0.0816, "step": 15230 }, { "epoch": 0.7733895121579776, "grad_norm": 0.5981921553611755, "learning_rate": 1.4844069918946818e-05, "loss": 0.0721, "step": 15235 }, { "epoch": 0.7736433321488401, "grad_norm": 0.35320785641670227, "learning_rate": 1.48423777856744e-05, "loss": 0.0768, "step": 15240 }, { "epoch": 0.7738971521397026, "grad_norm": 0.38590070605278015, "learning_rate": 1.4840685652401985e-05, "loss": 0.0823, "step": 15245 }, { "epoch": 0.774150972130565, "grad_norm": 0.4039193093776703, "learning_rate": 1.4838993519129567e-05, "loss": 0.0674, "step": 15250 }, { "epoch": 0.7744047921214274, "grad_norm": 0.423473060131073, "learning_rate": 1.4837301385857152e-05, "loss": 0.0734, "step": 15255 }, { "epoch": 0.7746586121122899, "grad_norm": 0.4255438446998596, "learning_rate": 1.4835609252584736e-05, "loss": 0.0739, "step": 15260 }, { "epoch": 0.7749124321031524, "grad_norm": 0.3805790841579437, "learning_rate": 1.4833917119312317e-05, "loss": 0.0755, "step": 15265 }, { "epoch": 0.7751662520940149, "grad_norm": 0.4251479506492615, "learning_rate": 1.4832224986039903e-05, "loss": 0.0817, "step": 15270 }, { "epoch": 0.7754200720848774, "grad_norm": 0.6027906537055969, "learning_rate": 1.4830532852767485e-05, "loss": 0.0794, "step": 15275 }, { "epoch": 0.7756738920757399, "grad_norm": 0.5641241669654846, "learning_rate": 1.4828840719495068e-05, "loss": 0.0707, "step": 15280 }, { "epoch": 0.7759277120666024, "grad_norm": 0.3357883393764496, "learning_rate": 1.4827148586222653e-05, "loss": 0.0798, "step": 15285 }, { "epoch": 0.7761815320574649, "grad_norm": 0.6147197484970093, "learning_rate": 1.4825456452950235e-05, "loss": 0.0761, "step": 15290 }, { "epoch": 0.7764353520483274, "grad_norm": 0.30087563395500183, "learning_rate": 1.4823764319677819e-05, "loss": 0.0663, "step": 15295 }, { "epoch": 0.7766891720391899, "grad_norm": 0.3757729232311249, "learning_rate": 1.4822072186405402e-05, "loss": 0.0746, "step": 15300 }, { "epoch": 0.7769429920300522, "grad_norm": 0.38213640451431274, "learning_rate": 1.4820380053132986e-05, "loss": 0.0839, "step": 15305 }, { "epoch": 0.7771968120209147, "grad_norm": 0.6010876297950745, "learning_rate": 1.4818687919860571e-05, "loss": 0.0756, "step": 15310 }, { "epoch": 0.7774506320117772, "grad_norm": 0.39273327589035034, "learning_rate": 1.4816995786588153e-05, "loss": 0.0731, "step": 15315 }, { "epoch": 0.7777044520026397, "grad_norm": 0.5562280416488647, "learning_rate": 1.4815303653315736e-05, "loss": 0.0676, "step": 15320 }, { "epoch": 0.7779582719935022, "grad_norm": 0.37354519963264465, "learning_rate": 1.481361152004332e-05, "loss": 0.0792, "step": 15325 }, { "epoch": 0.7782120919843647, "grad_norm": 0.4271200895309448, "learning_rate": 1.4811919386770904e-05, "loss": 0.0678, "step": 15330 }, { "epoch": 0.7784659119752272, "grad_norm": 0.4866403043270111, "learning_rate": 1.4810227253498485e-05, "loss": 0.0778, "step": 15335 }, { "epoch": 0.7787197319660897, "grad_norm": 0.561621367931366, "learning_rate": 1.480853512022607e-05, "loss": 0.0753, "step": 15340 }, { "epoch": 0.7789735519569522, "grad_norm": 0.38731345534324646, "learning_rate": 1.4806842986953654e-05, "loss": 0.0828, "step": 15345 }, { "epoch": 0.7792273719478147, "grad_norm": 0.3867878019809723, "learning_rate": 1.4805150853681236e-05, "loss": 0.0858, "step": 15350 }, { "epoch": 0.779481191938677, "grad_norm": 0.43254461884498596, "learning_rate": 1.4803458720408821e-05, "loss": 0.0646, "step": 15355 }, { "epoch": 0.7797350119295395, "grad_norm": 0.35966265201568604, "learning_rate": 1.4801766587136403e-05, "loss": 0.0726, "step": 15360 }, { "epoch": 0.779988831920402, "grad_norm": 0.36605480313301086, "learning_rate": 1.4800074453863988e-05, "loss": 0.0721, "step": 15365 }, { "epoch": 0.7802426519112645, "grad_norm": 0.37214845418930054, "learning_rate": 1.4798382320591572e-05, "loss": 0.0836, "step": 15370 }, { "epoch": 0.780496471902127, "grad_norm": 0.43033382296562195, "learning_rate": 1.4796690187319154e-05, "loss": 0.0707, "step": 15375 }, { "epoch": 0.7807502918929895, "grad_norm": 0.41933897137641907, "learning_rate": 1.4794998054046739e-05, "loss": 0.074, "step": 15380 }, { "epoch": 0.781004111883852, "grad_norm": 0.4193064570426941, "learning_rate": 1.4793305920774321e-05, "loss": 0.0732, "step": 15385 }, { "epoch": 0.7812579318747145, "grad_norm": 0.35709092020988464, "learning_rate": 1.4791613787501904e-05, "loss": 0.0831, "step": 15390 }, { "epoch": 0.781511751865577, "grad_norm": 0.38249507546424866, "learning_rate": 1.4789921654229488e-05, "loss": 0.0753, "step": 15395 }, { "epoch": 0.7817655718564395, "grad_norm": 0.6960234045982361, "learning_rate": 1.4788229520957071e-05, "loss": 0.0752, "step": 15400 }, { "epoch": 0.7820193918473018, "grad_norm": 0.4672459065914154, "learning_rate": 1.4786537387684657e-05, "loss": 0.0851, "step": 15405 }, { "epoch": 0.7822732118381643, "grad_norm": 0.3816010057926178, "learning_rate": 1.4784845254412239e-05, "loss": 0.0764, "step": 15410 }, { "epoch": 0.7825270318290268, "grad_norm": 0.7014495730400085, "learning_rate": 1.4783153121139822e-05, "loss": 0.0694, "step": 15415 }, { "epoch": 0.7827808518198893, "grad_norm": 0.327251136302948, "learning_rate": 1.4781460987867406e-05, "loss": 0.0716, "step": 15420 }, { "epoch": 0.7830346718107518, "grad_norm": 0.5149769186973572, "learning_rate": 1.477976885459499e-05, "loss": 0.0734, "step": 15425 }, { "epoch": 0.7832884918016143, "grad_norm": 0.40763530135154724, "learning_rate": 1.4778076721322571e-05, "loss": 0.0812, "step": 15430 }, { "epoch": 0.7835423117924768, "grad_norm": 0.32969698309898376, "learning_rate": 1.4776384588050156e-05, "loss": 0.0799, "step": 15435 }, { "epoch": 0.7837961317833393, "grad_norm": 0.3335864245891571, "learning_rate": 1.477469245477774e-05, "loss": 0.0785, "step": 15440 }, { "epoch": 0.7840499517742018, "grad_norm": 0.48730531334877014, "learning_rate": 1.4773000321505322e-05, "loss": 0.0849, "step": 15445 }, { "epoch": 0.7843037717650642, "grad_norm": 0.7397474646568298, "learning_rate": 1.4771308188232907e-05, "loss": 0.0778, "step": 15450 }, { "epoch": 0.7845575917559267, "grad_norm": 0.39907628297805786, "learning_rate": 1.4769616054960489e-05, "loss": 0.0729, "step": 15455 }, { "epoch": 0.7848114117467891, "grad_norm": 0.47134730219841003, "learning_rate": 1.4767923921688074e-05, "loss": 0.0771, "step": 15460 }, { "epoch": 0.7850652317376516, "grad_norm": 0.41393592953681946, "learning_rate": 1.4766231788415658e-05, "loss": 0.0772, "step": 15465 }, { "epoch": 0.7853190517285141, "grad_norm": 0.3614552319049835, "learning_rate": 1.476453965514324e-05, "loss": 0.0718, "step": 15470 }, { "epoch": 0.7855728717193766, "grad_norm": 0.4395851194858551, "learning_rate": 1.4762847521870825e-05, "loss": 0.0825, "step": 15475 }, { "epoch": 0.7858266917102391, "grad_norm": 0.3465043902397156, "learning_rate": 1.4761155388598407e-05, "loss": 0.0746, "step": 15480 }, { "epoch": 0.7860805117011016, "grad_norm": 0.3366214632987976, "learning_rate": 1.475946325532599e-05, "loss": 0.071, "step": 15485 }, { "epoch": 0.7863343316919641, "grad_norm": 0.6985540986061096, "learning_rate": 1.4757771122053575e-05, "loss": 0.0701, "step": 15490 }, { "epoch": 0.7865881516828266, "grad_norm": 0.6810505390167236, "learning_rate": 1.4756078988781157e-05, "loss": 0.0881, "step": 15495 }, { "epoch": 0.786841971673689, "grad_norm": 0.8365420699119568, "learning_rate": 1.4754386855508742e-05, "loss": 0.0842, "step": 15500 }, { "epoch": 0.7870957916645515, "grad_norm": 0.4822964370250702, "learning_rate": 1.4752694722236324e-05, "loss": 0.07, "step": 15505 }, { "epoch": 0.787349611655414, "grad_norm": 0.3959048390388489, "learning_rate": 1.4751002588963908e-05, "loss": 0.078, "step": 15510 }, { "epoch": 0.7876034316462764, "grad_norm": 0.32304689288139343, "learning_rate": 1.4749310455691493e-05, "loss": 0.0814, "step": 15515 }, { "epoch": 0.7878572516371389, "grad_norm": 0.39487403631210327, "learning_rate": 1.4747618322419075e-05, "loss": 0.0779, "step": 15520 }, { "epoch": 0.7881110716280014, "grad_norm": 0.48532378673553467, "learning_rate": 1.4745926189146658e-05, "loss": 0.077, "step": 15525 }, { "epoch": 0.7883648916188639, "grad_norm": 0.38463422656059265, "learning_rate": 1.4744234055874242e-05, "loss": 0.0725, "step": 15530 }, { "epoch": 0.7886187116097264, "grad_norm": 0.5803540945053101, "learning_rate": 1.4742541922601826e-05, "loss": 0.0746, "step": 15535 }, { "epoch": 0.7888725316005889, "grad_norm": 0.37139931321144104, "learning_rate": 1.4740849789329407e-05, "loss": 0.0752, "step": 15540 }, { "epoch": 0.7891263515914514, "grad_norm": 0.39508089423179626, "learning_rate": 1.4739157656056993e-05, "loss": 0.0804, "step": 15545 }, { "epoch": 0.7893801715823138, "grad_norm": 0.4758867919445038, "learning_rate": 1.4737465522784576e-05, "loss": 0.0706, "step": 15550 }, { "epoch": 0.7896339915731763, "grad_norm": 0.3260742425918579, "learning_rate": 1.473577338951216e-05, "loss": 0.0762, "step": 15555 }, { "epoch": 0.7898878115640388, "grad_norm": 0.37266799807548523, "learning_rate": 1.4734081256239743e-05, "loss": 0.0875, "step": 15560 }, { "epoch": 0.7901416315549012, "grad_norm": 0.28276917338371277, "learning_rate": 1.4732389122967325e-05, "loss": 0.0633, "step": 15565 }, { "epoch": 0.7903954515457637, "grad_norm": 0.4956200122833252, "learning_rate": 1.473069698969491e-05, "loss": 0.0704, "step": 15570 }, { "epoch": 0.7906492715366262, "grad_norm": 0.5198813676834106, "learning_rate": 1.4729004856422492e-05, "loss": 0.0722, "step": 15575 }, { "epoch": 0.7909030915274887, "grad_norm": 0.40935298800468445, "learning_rate": 1.4727312723150076e-05, "loss": 0.0756, "step": 15580 }, { "epoch": 0.7911569115183512, "grad_norm": 0.3992593586444855, "learning_rate": 1.4725620589877661e-05, "loss": 0.0722, "step": 15585 }, { "epoch": 0.7914107315092137, "grad_norm": 0.3877473473548889, "learning_rate": 1.4723928456605243e-05, "loss": 0.0882, "step": 15590 }, { "epoch": 0.7916645515000762, "grad_norm": 0.3663567900657654, "learning_rate": 1.4722236323332826e-05, "loss": 0.0718, "step": 15595 }, { "epoch": 0.7919183714909386, "grad_norm": 0.37373846769332886, "learning_rate": 1.472054419006041e-05, "loss": 0.0821, "step": 15600 }, { "epoch": 0.7921721914818011, "grad_norm": 0.45595207810401917, "learning_rate": 1.4718852056787993e-05, "loss": 0.0716, "step": 15605 }, { "epoch": 0.7924260114726636, "grad_norm": 0.5352175831794739, "learning_rate": 1.4717159923515579e-05, "loss": 0.0715, "step": 15610 }, { "epoch": 0.792679831463526, "grad_norm": 0.3262074589729309, "learning_rate": 1.471546779024316e-05, "loss": 0.0737, "step": 15615 }, { "epoch": 0.7929336514543885, "grad_norm": 0.36793214082717896, "learning_rate": 1.4713775656970744e-05, "loss": 0.0679, "step": 15620 }, { "epoch": 0.793187471445251, "grad_norm": 0.41612109541893005, "learning_rate": 1.4712083523698328e-05, "loss": 0.0823, "step": 15625 }, { "epoch": 0.7934412914361135, "grad_norm": 0.40798622369766235, "learning_rate": 1.4710391390425911e-05, "loss": 0.0725, "step": 15630 }, { "epoch": 0.793695111426976, "grad_norm": 0.5767354369163513, "learning_rate": 1.4708699257153493e-05, "loss": 0.0813, "step": 15635 }, { "epoch": 0.7939489314178385, "grad_norm": 0.615403413772583, "learning_rate": 1.4707007123881078e-05, "loss": 0.0857, "step": 15640 }, { "epoch": 0.794202751408701, "grad_norm": 0.34347468614578247, "learning_rate": 1.4705314990608662e-05, "loss": 0.0702, "step": 15645 }, { "epoch": 0.7944565713995634, "grad_norm": 0.3589203655719757, "learning_rate": 1.4703622857336245e-05, "loss": 0.0724, "step": 15650 }, { "epoch": 0.7947103913904259, "grad_norm": 0.7738829851150513, "learning_rate": 1.4701930724063829e-05, "loss": 0.0768, "step": 15655 }, { "epoch": 0.7949642113812884, "grad_norm": 0.3893224000930786, "learning_rate": 1.470023859079141e-05, "loss": 0.0675, "step": 15660 }, { "epoch": 0.7952180313721509, "grad_norm": 0.43465477228164673, "learning_rate": 1.4698546457518996e-05, "loss": 0.0744, "step": 15665 }, { "epoch": 0.7954718513630133, "grad_norm": 0.3958378732204437, "learning_rate": 1.469685432424658e-05, "loss": 0.0753, "step": 15670 }, { "epoch": 0.7957256713538758, "grad_norm": 0.5245358347892761, "learning_rate": 1.4695162190974161e-05, "loss": 0.0748, "step": 15675 }, { "epoch": 0.7959794913447383, "grad_norm": 0.3974064290523529, "learning_rate": 1.4693470057701747e-05, "loss": 0.0786, "step": 15680 }, { "epoch": 0.7962333113356008, "grad_norm": 0.4498586356639862, "learning_rate": 1.4691777924429328e-05, "loss": 0.0844, "step": 15685 }, { "epoch": 0.7964871313264633, "grad_norm": 0.4059452712535858, "learning_rate": 1.4690085791156912e-05, "loss": 0.0637, "step": 15690 }, { "epoch": 0.7967409513173258, "grad_norm": 0.5134612321853638, "learning_rate": 1.4688393657884497e-05, "loss": 0.0767, "step": 15695 }, { "epoch": 0.7969947713081882, "grad_norm": 0.4138130843639374, "learning_rate": 1.4686701524612079e-05, "loss": 0.0845, "step": 15700 }, { "epoch": 0.7972485912990507, "grad_norm": 0.37701699137687683, "learning_rate": 1.4685009391339664e-05, "loss": 0.0743, "step": 15705 }, { "epoch": 0.7975024112899132, "grad_norm": 0.3598290681838989, "learning_rate": 1.4683317258067246e-05, "loss": 0.072, "step": 15710 }, { "epoch": 0.7977562312807757, "grad_norm": 0.548689067363739, "learning_rate": 1.468162512479483e-05, "loss": 0.0662, "step": 15715 }, { "epoch": 0.7980100512716382, "grad_norm": 0.3460603654384613, "learning_rate": 1.4679932991522415e-05, "loss": 0.0595, "step": 15720 }, { "epoch": 0.7982638712625006, "grad_norm": 0.5318562388420105, "learning_rate": 1.4678240858249997e-05, "loss": 0.0726, "step": 15725 }, { "epoch": 0.7985176912533631, "grad_norm": 1.7499229907989502, "learning_rate": 1.467654872497758e-05, "loss": 0.0727, "step": 15730 }, { "epoch": 0.7987715112442256, "grad_norm": 0.6805680990219116, "learning_rate": 1.4674856591705164e-05, "loss": 0.0822, "step": 15735 }, { "epoch": 0.7990253312350881, "grad_norm": 0.6451191306114197, "learning_rate": 1.4673164458432747e-05, "loss": 0.0772, "step": 15740 }, { "epoch": 0.7992791512259505, "grad_norm": 0.5430915951728821, "learning_rate": 1.4671472325160333e-05, "loss": 0.0869, "step": 15745 }, { "epoch": 0.799532971216813, "grad_norm": 0.4242735803127289, "learning_rate": 1.4669780191887915e-05, "loss": 0.0781, "step": 15750 }, { "epoch": 0.7997867912076755, "grad_norm": 0.4273703098297119, "learning_rate": 1.4668088058615498e-05, "loss": 0.0732, "step": 15755 }, { "epoch": 0.800040611198538, "grad_norm": 0.4131914973258972, "learning_rate": 1.4666395925343082e-05, "loss": 0.0792, "step": 15760 }, { "epoch": 0.8002944311894005, "grad_norm": 0.7269368171691895, "learning_rate": 1.4664703792070665e-05, "loss": 0.0706, "step": 15765 }, { "epoch": 0.800548251180263, "grad_norm": 0.730088472366333, "learning_rate": 1.4663011658798247e-05, "loss": 0.0803, "step": 15770 }, { "epoch": 0.8008020711711255, "grad_norm": 0.4482845962047577, "learning_rate": 1.4661319525525832e-05, "loss": 0.0718, "step": 15775 }, { "epoch": 0.8010558911619879, "grad_norm": 0.4157123863697052, "learning_rate": 1.4659627392253414e-05, "loss": 0.0782, "step": 15780 }, { "epoch": 0.8013097111528504, "grad_norm": 0.3921131491661072, "learning_rate": 1.4657935258980998e-05, "loss": 0.066, "step": 15785 }, { "epoch": 0.8015635311437129, "grad_norm": 0.33425718545913696, "learning_rate": 1.4656243125708583e-05, "loss": 0.0675, "step": 15790 }, { "epoch": 0.8018173511345753, "grad_norm": 0.5760083794593811, "learning_rate": 1.4654550992436165e-05, "loss": 0.0846, "step": 15795 }, { "epoch": 0.8020711711254378, "grad_norm": 0.5779892206192017, "learning_rate": 1.465285885916375e-05, "loss": 0.0821, "step": 15800 }, { "epoch": 0.8023249911163003, "grad_norm": 0.3755846917629242, "learning_rate": 1.4651166725891332e-05, "loss": 0.0715, "step": 15805 }, { "epoch": 0.8025788111071628, "grad_norm": 0.3848758637905121, "learning_rate": 1.4649474592618915e-05, "loss": 0.0857, "step": 15810 }, { "epoch": 0.8028326310980253, "grad_norm": 0.3276999890804291, "learning_rate": 1.46477824593465e-05, "loss": 0.0747, "step": 15815 }, { "epoch": 0.8030864510888878, "grad_norm": 0.33379969000816345, "learning_rate": 1.4646090326074082e-05, "loss": 0.0722, "step": 15820 }, { "epoch": 0.8033402710797503, "grad_norm": 0.37067776918411255, "learning_rate": 1.4644398192801666e-05, "loss": 0.0737, "step": 15825 }, { "epoch": 0.8035940910706127, "grad_norm": 0.29267624020576477, "learning_rate": 1.464270605952925e-05, "loss": 0.063, "step": 15830 }, { "epoch": 0.8038479110614752, "grad_norm": 0.3490564227104187, "learning_rate": 1.4641013926256833e-05, "loss": 0.0909, "step": 15835 }, { "epoch": 0.8041017310523377, "grad_norm": 0.4481734335422516, "learning_rate": 1.4639321792984415e-05, "loss": 0.0741, "step": 15840 }, { "epoch": 0.8043555510432001, "grad_norm": 0.3517626225948334, "learning_rate": 1.4637629659712e-05, "loss": 0.0731, "step": 15845 }, { "epoch": 0.8046093710340626, "grad_norm": 0.4463517963886261, "learning_rate": 1.4635937526439584e-05, "loss": 0.0801, "step": 15850 }, { "epoch": 0.8048631910249251, "grad_norm": 0.3373356759548187, "learning_rate": 1.4634245393167167e-05, "loss": 0.0672, "step": 15855 }, { "epoch": 0.8051170110157876, "grad_norm": 0.4135652482509613, "learning_rate": 1.463255325989475e-05, "loss": 0.0605, "step": 15860 }, { "epoch": 0.8053708310066501, "grad_norm": 0.4091756343841553, "learning_rate": 1.4630861126622333e-05, "loss": 0.0711, "step": 15865 }, { "epoch": 0.8056246509975126, "grad_norm": 0.4166070520877838, "learning_rate": 1.4629168993349918e-05, "loss": 0.0641, "step": 15870 }, { "epoch": 0.8058784709883751, "grad_norm": 0.37992623448371887, "learning_rate": 1.4627476860077501e-05, "loss": 0.0744, "step": 15875 }, { "epoch": 0.8061322909792376, "grad_norm": 0.39718085527420044, "learning_rate": 1.4625784726805083e-05, "loss": 0.0715, "step": 15880 }, { "epoch": 0.8063861109701, "grad_norm": 0.3552018404006958, "learning_rate": 1.4624092593532669e-05, "loss": 0.0718, "step": 15885 }, { "epoch": 0.8066399309609625, "grad_norm": 0.39325806498527527, "learning_rate": 1.462240046026025e-05, "loss": 0.0743, "step": 15890 }, { "epoch": 0.8068937509518249, "grad_norm": 0.3813045620918274, "learning_rate": 1.4620708326987836e-05, "loss": 0.0717, "step": 15895 }, { "epoch": 0.8071475709426874, "grad_norm": 0.5478102564811707, "learning_rate": 1.461901619371542e-05, "loss": 0.081, "step": 15900 }, { "epoch": 0.8074013909335499, "grad_norm": 0.7054280638694763, "learning_rate": 1.4617324060443001e-05, "loss": 0.0769, "step": 15905 }, { "epoch": 0.8076552109244124, "grad_norm": 0.5671230554580688, "learning_rate": 1.4615631927170586e-05, "loss": 0.068, "step": 15910 }, { "epoch": 0.8079090309152749, "grad_norm": 0.6359015703201294, "learning_rate": 1.4613939793898168e-05, "loss": 0.06, "step": 15915 }, { "epoch": 0.8081628509061374, "grad_norm": 0.4305456876754761, "learning_rate": 1.4612247660625752e-05, "loss": 0.065, "step": 15920 }, { "epoch": 0.8084166708969999, "grad_norm": 0.5645986199378967, "learning_rate": 1.4610555527353337e-05, "loss": 0.0746, "step": 15925 }, { "epoch": 0.8086704908878624, "grad_norm": 0.4272991716861725, "learning_rate": 1.4608863394080919e-05, "loss": 0.082, "step": 15930 }, { "epoch": 0.8089243108787249, "grad_norm": 0.4712943732738495, "learning_rate": 1.4607171260808502e-05, "loss": 0.0835, "step": 15935 }, { "epoch": 0.8091781308695873, "grad_norm": 0.36924442648887634, "learning_rate": 1.4605479127536086e-05, "loss": 0.0757, "step": 15940 }, { "epoch": 0.8094319508604497, "grad_norm": 0.9191230535507202, "learning_rate": 1.460378699426367e-05, "loss": 0.0721, "step": 15945 }, { "epoch": 0.8096857708513122, "grad_norm": 0.466630220413208, "learning_rate": 1.4602094860991255e-05, "loss": 0.0777, "step": 15950 }, { "epoch": 0.8099395908421747, "grad_norm": 0.6259939670562744, "learning_rate": 1.4600402727718836e-05, "loss": 0.0699, "step": 15955 }, { "epoch": 0.8101934108330372, "grad_norm": 0.31263235211372375, "learning_rate": 1.4598710594446418e-05, "loss": 0.0688, "step": 15960 }, { "epoch": 0.8104472308238997, "grad_norm": 0.32614171504974365, "learning_rate": 1.4597018461174004e-05, "loss": 0.0667, "step": 15965 }, { "epoch": 0.8107010508147622, "grad_norm": 0.551091730594635, "learning_rate": 1.4595326327901587e-05, "loss": 0.0614, "step": 15970 }, { "epoch": 0.8109548708056247, "grad_norm": 0.44654572010040283, "learning_rate": 1.4593634194629169e-05, "loss": 0.0857, "step": 15975 }, { "epoch": 0.8112086907964872, "grad_norm": 0.8355388641357422, "learning_rate": 1.4591942061356754e-05, "loss": 0.0589, "step": 15980 }, { "epoch": 0.8114625107873497, "grad_norm": 0.3554515242576599, "learning_rate": 1.4590249928084336e-05, "loss": 0.0672, "step": 15985 }, { "epoch": 0.8117163307782121, "grad_norm": 0.8215906023979187, "learning_rate": 1.458855779481192e-05, "loss": 0.0651, "step": 15990 }, { "epoch": 0.8119701507690745, "grad_norm": 0.5176529884338379, "learning_rate": 1.4586865661539505e-05, "loss": 0.0811, "step": 15995 }, { "epoch": 0.812223970759937, "grad_norm": 0.37184882164001465, "learning_rate": 1.4585173528267087e-05, "loss": 0.0811, "step": 16000 }, { "epoch": 0.8124777907507995, "grad_norm": 0.36488497257232666, "learning_rate": 1.4583481394994672e-05, "loss": 0.0694, "step": 16005 }, { "epoch": 0.812731610741662, "grad_norm": 0.29020920395851135, "learning_rate": 1.4581789261722254e-05, "loss": 0.0725, "step": 16010 }, { "epoch": 0.8129854307325245, "grad_norm": 0.41967329382896423, "learning_rate": 1.4580097128449837e-05, "loss": 0.0749, "step": 16015 }, { "epoch": 0.813239250723387, "grad_norm": 0.35643911361694336, "learning_rate": 1.4578404995177423e-05, "loss": 0.0728, "step": 16020 }, { "epoch": 0.8134930707142495, "grad_norm": 0.5488142967224121, "learning_rate": 1.4576712861905004e-05, "loss": 0.076, "step": 16025 }, { "epoch": 0.813746890705112, "grad_norm": 0.6067050695419312, "learning_rate": 1.4575020728632588e-05, "loss": 0.0723, "step": 16030 }, { "epoch": 0.8140007106959745, "grad_norm": 0.3196711838245392, "learning_rate": 1.4573328595360171e-05, "loss": 0.0752, "step": 16035 }, { "epoch": 0.8142545306868368, "grad_norm": 0.3165784478187561, "learning_rate": 1.4571636462087755e-05, "loss": 0.0608, "step": 16040 }, { "epoch": 0.8145083506776993, "grad_norm": 0.37109699845314026, "learning_rate": 1.456994432881534e-05, "loss": 0.077, "step": 16045 }, { "epoch": 0.8147621706685618, "grad_norm": 0.5756739377975464, "learning_rate": 1.4568252195542922e-05, "loss": 0.0746, "step": 16050 }, { "epoch": 0.8150159906594243, "grad_norm": 0.5761229395866394, "learning_rate": 1.4566560062270506e-05, "loss": 0.0769, "step": 16055 }, { "epoch": 0.8152698106502868, "grad_norm": 0.6240622401237488, "learning_rate": 1.456486792899809e-05, "loss": 0.0838, "step": 16060 }, { "epoch": 0.8155236306411493, "grad_norm": 0.40298014879226685, "learning_rate": 1.4563175795725673e-05, "loss": 0.0701, "step": 16065 }, { "epoch": 0.8157774506320118, "grad_norm": 0.4362527132034302, "learning_rate": 1.4561483662453255e-05, "loss": 0.0739, "step": 16070 }, { "epoch": 0.8160312706228743, "grad_norm": 0.37834423780441284, "learning_rate": 1.455979152918084e-05, "loss": 0.079, "step": 16075 }, { "epoch": 0.8162850906137368, "grad_norm": 0.3305414319038391, "learning_rate": 1.4558099395908423e-05, "loss": 0.0662, "step": 16080 }, { "epoch": 0.8165389106045993, "grad_norm": 0.45846548676490784, "learning_rate": 1.4556407262636005e-05, "loss": 0.0704, "step": 16085 }, { "epoch": 0.8167927305954616, "grad_norm": 0.42119476199150085, "learning_rate": 1.455471512936359e-05, "loss": 0.0734, "step": 16090 }, { "epoch": 0.8170465505863241, "grad_norm": 0.4292179346084595, "learning_rate": 1.4553022996091172e-05, "loss": 0.0665, "step": 16095 }, { "epoch": 0.8173003705771866, "grad_norm": 0.3152535855770111, "learning_rate": 1.4551330862818758e-05, "loss": 0.0719, "step": 16100 }, { "epoch": 0.8175541905680491, "grad_norm": 0.3996585011482239, "learning_rate": 1.4549638729546341e-05, "loss": 0.0616, "step": 16105 }, { "epoch": 0.8178080105589116, "grad_norm": 0.3468000292778015, "learning_rate": 1.4547946596273923e-05, "loss": 0.074, "step": 16110 }, { "epoch": 0.8180618305497741, "grad_norm": 0.4962630569934845, "learning_rate": 1.4546254463001508e-05, "loss": 0.0705, "step": 16115 }, { "epoch": 0.8183156505406366, "grad_norm": 0.48265331983566284, "learning_rate": 1.454456232972909e-05, "loss": 0.0718, "step": 16120 }, { "epoch": 0.8185694705314991, "grad_norm": 0.4469800293445587, "learning_rate": 1.4542870196456674e-05, "loss": 0.0932, "step": 16125 }, { "epoch": 0.8188232905223616, "grad_norm": 0.6275296211242676, "learning_rate": 1.4541178063184259e-05, "loss": 0.0755, "step": 16130 }, { "epoch": 0.8190771105132241, "grad_norm": 0.5437877178192139, "learning_rate": 1.453948592991184e-05, "loss": 0.0762, "step": 16135 }, { "epoch": 0.8193309305040865, "grad_norm": 0.3738441467285156, "learning_rate": 1.4537793796639426e-05, "loss": 0.0734, "step": 16140 }, { "epoch": 0.8195847504949489, "grad_norm": 0.41383683681488037, "learning_rate": 1.4536101663367008e-05, "loss": 0.0834, "step": 16145 }, { "epoch": 0.8198385704858114, "grad_norm": 0.49464190006256104, "learning_rate": 1.4534409530094591e-05, "loss": 0.0773, "step": 16150 }, { "epoch": 0.8200923904766739, "grad_norm": 0.5497142672538757, "learning_rate": 1.4532717396822177e-05, "loss": 0.0715, "step": 16155 }, { "epoch": 0.8203462104675364, "grad_norm": 1.2365076541900635, "learning_rate": 1.4531025263549758e-05, "loss": 0.0737, "step": 16160 }, { "epoch": 0.8206000304583989, "grad_norm": 0.47389277815818787, "learning_rate": 1.452933313027734e-05, "loss": 0.0762, "step": 16165 }, { "epoch": 0.8208538504492614, "grad_norm": 0.33445248007774353, "learning_rate": 1.4527640997004925e-05, "loss": 0.0774, "step": 16170 }, { "epoch": 0.8211076704401239, "grad_norm": 0.4635807275772095, "learning_rate": 1.4525948863732509e-05, "loss": 0.0739, "step": 16175 }, { "epoch": 0.8213614904309864, "grad_norm": 0.7265046834945679, "learning_rate": 1.4524256730460091e-05, "loss": 0.0809, "step": 16180 }, { "epoch": 0.8216153104218489, "grad_norm": 0.3342432379722595, "learning_rate": 1.4522564597187676e-05, "loss": 0.0651, "step": 16185 }, { "epoch": 0.8218691304127113, "grad_norm": 0.38529881834983826, "learning_rate": 1.4520872463915258e-05, "loss": 0.082, "step": 16190 }, { "epoch": 0.8221229504035737, "grad_norm": 0.45302921533584595, "learning_rate": 1.4519180330642843e-05, "loss": 0.0688, "step": 16195 }, { "epoch": 0.8223767703944362, "grad_norm": 0.33290159702301025, "learning_rate": 1.4517488197370427e-05, "loss": 0.075, "step": 16200 }, { "epoch": 0.8226305903852987, "grad_norm": 0.34083133935928345, "learning_rate": 1.4515796064098009e-05, "loss": 0.0755, "step": 16205 }, { "epoch": 0.8228844103761612, "grad_norm": 0.3705480992794037, "learning_rate": 1.4514103930825594e-05, "loss": 0.0721, "step": 16210 }, { "epoch": 0.8231382303670237, "grad_norm": 0.39600300788879395, "learning_rate": 1.4512411797553176e-05, "loss": 0.0708, "step": 16215 }, { "epoch": 0.8233920503578862, "grad_norm": 0.32571184635162354, "learning_rate": 1.451071966428076e-05, "loss": 0.0758, "step": 16220 }, { "epoch": 0.8236458703487487, "grad_norm": 0.791690468788147, "learning_rate": 1.4509027531008344e-05, "loss": 0.084, "step": 16225 }, { "epoch": 0.8238996903396112, "grad_norm": 0.3258461058139801, "learning_rate": 1.4507335397735926e-05, "loss": 0.0741, "step": 16230 }, { "epoch": 0.8241535103304737, "grad_norm": 0.3211754262447357, "learning_rate": 1.450564326446351e-05, "loss": 0.079, "step": 16235 }, { "epoch": 0.8244073303213361, "grad_norm": 0.3197133541107178, "learning_rate": 1.4503951131191093e-05, "loss": 0.0674, "step": 16240 }, { "epoch": 0.8246611503121986, "grad_norm": 0.39093726873397827, "learning_rate": 1.4502258997918677e-05, "loss": 0.0686, "step": 16245 }, { "epoch": 0.824914970303061, "grad_norm": 0.4963763356208801, "learning_rate": 1.4500566864646262e-05, "loss": 0.0775, "step": 16250 }, { "epoch": 0.8251687902939235, "grad_norm": 0.40643295645713806, "learning_rate": 1.4498874731373844e-05, "loss": 0.0698, "step": 16255 }, { "epoch": 0.825422610284786, "grad_norm": 0.39619845151901245, "learning_rate": 1.4497182598101428e-05, "loss": 0.0713, "step": 16260 }, { "epoch": 0.8256764302756485, "grad_norm": 0.4349762201309204, "learning_rate": 1.4495490464829011e-05, "loss": 0.0682, "step": 16265 }, { "epoch": 0.825930250266511, "grad_norm": 0.5058432817459106, "learning_rate": 1.4493798331556595e-05, "loss": 0.0729, "step": 16270 }, { "epoch": 0.8261840702573735, "grad_norm": 0.3339829444885254, "learning_rate": 1.4492106198284177e-05, "loss": 0.0715, "step": 16275 }, { "epoch": 0.826437890248236, "grad_norm": 0.33098337054252625, "learning_rate": 1.4490414065011762e-05, "loss": 0.0671, "step": 16280 }, { "epoch": 0.8266917102390984, "grad_norm": 0.42175155878067017, "learning_rate": 1.4488721931739345e-05, "loss": 0.071, "step": 16285 }, { "epoch": 0.8269455302299609, "grad_norm": 0.8576765656471252, "learning_rate": 1.4487029798466929e-05, "loss": 0.0698, "step": 16290 }, { "epoch": 0.8271993502208234, "grad_norm": 1.0109632015228271, "learning_rate": 1.4485337665194512e-05, "loss": 0.0794, "step": 16295 }, { "epoch": 0.8274531702116859, "grad_norm": 0.4387573301792145, "learning_rate": 1.4483645531922094e-05, "loss": 0.0758, "step": 16300 }, { "epoch": 0.8277069902025483, "grad_norm": 0.3630094826221466, "learning_rate": 1.448195339864968e-05, "loss": 0.0605, "step": 16305 }, { "epoch": 0.8279608101934108, "grad_norm": 0.4075300991535187, "learning_rate": 1.4480261265377263e-05, "loss": 0.0648, "step": 16310 }, { "epoch": 0.8282146301842733, "grad_norm": 0.37269020080566406, "learning_rate": 1.4478569132104845e-05, "loss": 0.0723, "step": 16315 }, { "epoch": 0.8284684501751358, "grad_norm": 0.6695398092269897, "learning_rate": 1.447687699883243e-05, "loss": 0.0678, "step": 16320 }, { "epoch": 0.8287222701659983, "grad_norm": 0.36002203822135925, "learning_rate": 1.4475184865560012e-05, "loss": 0.0634, "step": 16325 }, { "epoch": 0.8289760901568608, "grad_norm": 0.6074070334434509, "learning_rate": 1.4473492732287596e-05, "loss": 0.0842, "step": 16330 }, { "epoch": 0.8292299101477232, "grad_norm": 0.37209317088127136, "learning_rate": 1.447180059901518e-05, "loss": 0.0701, "step": 16335 }, { "epoch": 0.8294837301385857, "grad_norm": 0.3380017578601837, "learning_rate": 1.4470108465742763e-05, "loss": 0.0701, "step": 16340 }, { "epoch": 0.8297375501294482, "grad_norm": 0.6713643074035645, "learning_rate": 1.4468416332470348e-05, "loss": 0.0703, "step": 16345 }, { "epoch": 0.8299913701203107, "grad_norm": 0.9763821959495544, "learning_rate": 1.446672419919793e-05, "loss": 0.0714, "step": 16350 }, { "epoch": 0.8302451901111731, "grad_norm": 0.3959788680076599, "learning_rate": 1.4465032065925513e-05, "loss": 0.0737, "step": 16355 }, { "epoch": 0.8304990101020356, "grad_norm": 1.2258071899414062, "learning_rate": 1.4463339932653098e-05, "loss": 0.0774, "step": 16360 }, { "epoch": 0.8307528300928981, "grad_norm": 0.34479087591171265, "learning_rate": 1.446164779938068e-05, "loss": 0.0748, "step": 16365 }, { "epoch": 0.8310066500837606, "grad_norm": 0.39507466554641724, "learning_rate": 1.4459955666108262e-05, "loss": 0.0747, "step": 16370 }, { "epoch": 0.8312604700746231, "grad_norm": 0.5069799423217773, "learning_rate": 1.4458263532835847e-05, "loss": 0.0771, "step": 16375 }, { "epoch": 0.8315142900654856, "grad_norm": 0.5372797846794128, "learning_rate": 1.4456571399563431e-05, "loss": 0.0886, "step": 16380 }, { "epoch": 0.831768110056348, "grad_norm": 0.5318045616149902, "learning_rate": 1.4454879266291015e-05, "loss": 0.0835, "step": 16385 }, { "epoch": 0.8320219300472105, "grad_norm": 0.3830041289329529, "learning_rate": 1.4453187133018598e-05, "loss": 0.0646, "step": 16390 }, { "epoch": 0.832275750038073, "grad_norm": 0.40192267298698425, "learning_rate": 1.445149499974618e-05, "loss": 0.074, "step": 16395 }, { "epoch": 0.8325295700289355, "grad_norm": 0.6122369170188904, "learning_rate": 1.4449802866473765e-05, "loss": 0.0659, "step": 16400 }, { "epoch": 0.832783390019798, "grad_norm": 0.455731064081192, "learning_rate": 1.4448110733201349e-05, "loss": 0.08, "step": 16405 }, { "epoch": 0.8330372100106604, "grad_norm": 0.3991694450378418, "learning_rate": 1.444641859992893e-05, "loss": 0.0677, "step": 16410 }, { "epoch": 0.8332910300015229, "grad_norm": 0.49834415316581726, "learning_rate": 1.4444726466656516e-05, "loss": 0.0779, "step": 16415 }, { "epoch": 0.8335448499923854, "grad_norm": 0.3147116005420685, "learning_rate": 1.4443034333384098e-05, "loss": 0.0675, "step": 16420 }, { "epoch": 0.8337986699832479, "grad_norm": 0.4364480674266815, "learning_rate": 1.4441342200111681e-05, "loss": 0.0724, "step": 16425 }, { "epoch": 0.8340524899741104, "grad_norm": 0.3125317096710205, "learning_rate": 1.4439650066839266e-05, "loss": 0.0704, "step": 16430 }, { "epoch": 0.8343063099649728, "grad_norm": 0.3873157799243927, "learning_rate": 1.4437957933566848e-05, "loss": 0.0661, "step": 16435 }, { "epoch": 0.8345601299558353, "grad_norm": 0.24474631249904633, "learning_rate": 1.4436265800294434e-05, "loss": 0.0687, "step": 16440 }, { "epoch": 0.8348139499466978, "grad_norm": 0.37103623151779175, "learning_rate": 1.4434573667022015e-05, "loss": 0.0598, "step": 16445 }, { "epoch": 0.8350677699375603, "grad_norm": 0.2716129422187805, "learning_rate": 1.4432881533749599e-05, "loss": 0.0674, "step": 16450 }, { "epoch": 0.8353215899284228, "grad_norm": 0.39446020126342773, "learning_rate": 1.4431189400477184e-05, "loss": 0.0708, "step": 16455 }, { "epoch": 0.8355754099192853, "grad_norm": 0.39217862486839294, "learning_rate": 1.4429497267204766e-05, "loss": 0.0718, "step": 16460 }, { "epoch": 0.8358292299101477, "grad_norm": 0.5310700535774231, "learning_rate": 1.442780513393235e-05, "loss": 0.078, "step": 16465 }, { "epoch": 0.8360830499010102, "grad_norm": 0.337480753660202, "learning_rate": 1.4426113000659933e-05, "loss": 0.0693, "step": 16470 }, { "epoch": 0.8363368698918727, "grad_norm": 0.45497629046440125, "learning_rate": 1.4424420867387517e-05, "loss": 0.0831, "step": 16475 }, { "epoch": 0.8365906898827352, "grad_norm": 0.3556462526321411, "learning_rate": 1.4422728734115098e-05, "loss": 0.0653, "step": 16480 }, { "epoch": 0.8368445098735976, "grad_norm": 0.43825340270996094, "learning_rate": 1.4421036600842684e-05, "loss": 0.0688, "step": 16485 }, { "epoch": 0.8370983298644601, "grad_norm": 0.4429861605167389, "learning_rate": 1.4419344467570267e-05, "loss": 0.0736, "step": 16490 }, { "epoch": 0.8373521498553226, "grad_norm": 0.35255303978919983, "learning_rate": 1.441765233429785e-05, "loss": 0.066, "step": 16495 }, { "epoch": 0.8376059698461851, "grad_norm": 0.532727062702179, "learning_rate": 1.4415960201025434e-05, "loss": 0.0857, "step": 16500 }, { "epoch": 0.8378597898370476, "grad_norm": 0.38991039991378784, "learning_rate": 1.4414268067753016e-05, "loss": 0.0721, "step": 16505 }, { "epoch": 0.83811360982791, "grad_norm": 0.38206273317337036, "learning_rate": 1.4412575934480601e-05, "loss": 0.0816, "step": 16510 }, { "epoch": 0.8383674298187725, "grad_norm": 0.271957665681839, "learning_rate": 1.4410883801208185e-05, "loss": 0.0779, "step": 16515 }, { "epoch": 0.838621249809635, "grad_norm": 0.3314172327518463, "learning_rate": 1.4409191667935767e-05, "loss": 0.0634, "step": 16520 }, { "epoch": 0.8388750698004975, "grad_norm": 0.5224104523658752, "learning_rate": 1.4407499534663352e-05, "loss": 0.077, "step": 16525 }, { "epoch": 0.83912888979136, "grad_norm": 0.7262583374977112, "learning_rate": 1.4405807401390934e-05, "loss": 0.0909, "step": 16530 }, { "epoch": 0.8393827097822224, "grad_norm": 0.45320960879325867, "learning_rate": 1.440411526811852e-05, "loss": 0.0732, "step": 16535 }, { "epoch": 0.8396365297730849, "grad_norm": 0.3000989854335785, "learning_rate": 1.4402423134846103e-05, "loss": 0.0695, "step": 16540 }, { "epoch": 0.8398903497639474, "grad_norm": 0.6859510540962219, "learning_rate": 1.4400731001573685e-05, "loss": 0.0705, "step": 16545 }, { "epoch": 0.8401441697548099, "grad_norm": 0.4406552314758301, "learning_rate": 1.439903886830127e-05, "loss": 0.0774, "step": 16550 }, { "epoch": 0.8403979897456724, "grad_norm": 0.3408234715461731, "learning_rate": 1.4397346735028852e-05, "loss": 0.0699, "step": 16555 }, { "epoch": 0.8406518097365349, "grad_norm": 1.4002503156661987, "learning_rate": 1.4395654601756435e-05, "loss": 0.0739, "step": 16560 }, { "epoch": 0.8409056297273974, "grad_norm": 0.5265543460845947, "learning_rate": 1.4393962468484019e-05, "loss": 0.0817, "step": 16565 }, { "epoch": 0.8411594497182598, "grad_norm": 0.45000794529914856, "learning_rate": 1.4392270335211602e-05, "loss": 0.0729, "step": 16570 }, { "epoch": 0.8414132697091223, "grad_norm": 0.2590148150920868, "learning_rate": 1.4390578201939184e-05, "loss": 0.0773, "step": 16575 }, { "epoch": 0.8416670896999847, "grad_norm": 0.28776997327804565, "learning_rate": 1.438888606866677e-05, "loss": 0.0686, "step": 16580 }, { "epoch": 0.8419209096908472, "grad_norm": 0.6979045271873474, "learning_rate": 1.4387193935394353e-05, "loss": 0.0793, "step": 16585 }, { "epoch": 0.8421747296817097, "grad_norm": 0.33594322204589844, "learning_rate": 1.4385501802121936e-05, "loss": 0.0708, "step": 16590 }, { "epoch": 0.8424285496725722, "grad_norm": 0.4978824555873871, "learning_rate": 1.438380966884952e-05, "loss": 0.0698, "step": 16595 }, { "epoch": 0.8426823696634347, "grad_norm": 0.41426607966423035, "learning_rate": 1.4382117535577102e-05, "loss": 0.0734, "step": 16600 }, { "epoch": 0.8429361896542972, "grad_norm": 0.44715553522109985, "learning_rate": 1.4380425402304687e-05, "loss": 0.0693, "step": 16605 }, { "epoch": 0.8431900096451597, "grad_norm": 0.33712249994277954, "learning_rate": 1.437873326903227e-05, "loss": 0.0695, "step": 16610 }, { "epoch": 0.8434438296360222, "grad_norm": 0.4558604061603546, "learning_rate": 1.4377041135759852e-05, "loss": 0.0802, "step": 16615 }, { "epoch": 0.8436976496268846, "grad_norm": 0.4930158853530884, "learning_rate": 1.4375349002487438e-05, "loss": 0.066, "step": 16620 }, { "epoch": 0.8439514696177471, "grad_norm": 0.42849859595298767, "learning_rate": 1.437365686921502e-05, "loss": 0.0672, "step": 16625 }, { "epoch": 0.8442052896086095, "grad_norm": 0.4327075183391571, "learning_rate": 1.4371964735942605e-05, "loss": 0.0713, "step": 16630 }, { "epoch": 0.844459109599472, "grad_norm": 0.4134119749069214, "learning_rate": 1.4370272602670188e-05, "loss": 0.0785, "step": 16635 }, { "epoch": 0.8447129295903345, "grad_norm": 0.4089963436126709, "learning_rate": 1.436858046939777e-05, "loss": 0.0609, "step": 16640 }, { "epoch": 0.844966749581197, "grad_norm": 0.3104897737503052, "learning_rate": 1.4366888336125355e-05, "loss": 0.0628, "step": 16645 }, { "epoch": 0.8452205695720595, "grad_norm": 0.40896421670913696, "learning_rate": 1.4365196202852937e-05, "loss": 0.0745, "step": 16650 }, { "epoch": 0.845474389562922, "grad_norm": 0.5574126839637756, "learning_rate": 1.436350406958052e-05, "loss": 0.0812, "step": 16655 }, { "epoch": 0.8457282095537845, "grad_norm": 0.3526981770992279, "learning_rate": 1.4361811936308106e-05, "loss": 0.074, "step": 16660 }, { "epoch": 0.845982029544647, "grad_norm": 0.37888669967651367, "learning_rate": 1.4360119803035688e-05, "loss": 0.0769, "step": 16665 }, { "epoch": 0.8462358495355095, "grad_norm": 0.41363152861595154, "learning_rate": 1.4358427669763271e-05, "loss": 0.0788, "step": 16670 }, { "epoch": 0.846489669526372, "grad_norm": 1.3909887075424194, "learning_rate": 1.4356735536490855e-05, "loss": 0.0736, "step": 16675 }, { "epoch": 0.8467434895172343, "grad_norm": 0.40531304478645325, "learning_rate": 1.4355043403218439e-05, "loss": 0.0637, "step": 16680 }, { "epoch": 0.8469973095080968, "grad_norm": 0.291532963514328, "learning_rate": 1.4353351269946024e-05, "loss": 0.0682, "step": 16685 }, { "epoch": 0.8472511294989593, "grad_norm": 0.3950155973434448, "learning_rate": 1.4351659136673606e-05, "loss": 0.0655, "step": 16690 }, { "epoch": 0.8475049494898218, "grad_norm": 0.44813594222068787, "learning_rate": 1.434996700340119e-05, "loss": 0.0853, "step": 16695 }, { "epoch": 0.8477587694806843, "grad_norm": 0.38060590624809265, "learning_rate": 1.4348274870128773e-05, "loss": 0.0691, "step": 16700 }, { "epoch": 0.8480125894715468, "grad_norm": 0.546687126159668, "learning_rate": 1.4346582736856356e-05, "loss": 0.0684, "step": 16705 }, { "epoch": 0.8482664094624093, "grad_norm": 0.22715900838375092, "learning_rate": 1.4344890603583938e-05, "loss": 0.059, "step": 16710 }, { "epoch": 0.8485202294532718, "grad_norm": 0.290096253156662, "learning_rate": 1.4343198470311523e-05, "loss": 0.0594, "step": 16715 }, { "epoch": 0.8487740494441343, "grad_norm": 0.384205162525177, "learning_rate": 1.4341506337039107e-05, "loss": 0.0711, "step": 16720 }, { "epoch": 0.8490278694349968, "grad_norm": 0.2885967195034027, "learning_rate": 1.4339814203766689e-05, "loss": 0.0674, "step": 16725 }, { "epoch": 0.8492816894258591, "grad_norm": 0.5165429711341858, "learning_rate": 1.4338122070494274e-05, "loss": 0.0674, "step": 16730 }, { "epoch": 0.8495355094167216, "grad_norm": 0.36283811926841736, "learning_rate": 1.4336429937221856e-05, "loss": 0.0731, "step": 16735 }, { "epoch": 0.8497893294075841, "grad_norm": 0.4545622766017914, "learning_rate": 1.4334737803949441e-05, "loss": 0.0768, "step": 16740 }, { "epoch": 0.8500431493984466, "grad_norm": 0.30138736963272095, "learning_rate": 1.4333045670677023e-05, "loss": 0.0595, "step": 16745 }, { "epoch": 0.8502969693893091, "grad_norm": 0.38298335671424866, "learning_rate": 1.4331353537404606e-05, "loss": 0.0751, "step": 16750 }, { "epoch": 0.8505507893801716, "grad_norm": 0.6622296571731567, "learning_rate": 1.4329661404132192e-05, "loss": 0.0842, "step": 16755 }, { "epoch": 0.8508046093710341, "grad_norm": 0.5098762512207031, "learning_rate": 1.4327969270859774e-05, "loss": 0.0754, "step": 16760 }, { "epoch": 0.8510584293618966, "grad_norm": 0.4182951748371124, "learning_rate": 1.4326277137587357e-05, "loss": 0.055, "step": 16765 }, { "epoch": 0.8513122493527591, "grad_norm": 0.34056827425956726, "learning_rate": 1.432458500431494e-05, "loss": 0.0762, "step": 16770 }, { "epoch": 0.8515660693436216, "grad_norm": 0.4602746367454529, "learning_rate": 1.4322892871042524e-05, "loss": 0.0731, "step": 16775 }, { "epoch": 0.8518198893344839, "grad_norm": 0.46367478370666504, "learning_rate": 1.432120073777011e-05, "loss": 0.0662, "step": 16780 }, { "epoch": 0.8520737093253464, "grad_norm": 0.3692100942134857, "learning_rate": 1.4319508604497691e-05, "loss": 0.0732, "step": 16785 }, { "epoch": 0.8523275293162089, "grad_norm": 0.32194337248802185, "learning_rate": 1.4317816471225275e-05, "loss": 0.0694, "step": 16790 }, { "epoch": 0.8525813493070714, "grad_norm": 0.4015413820743561, "learning_rate": 1.4316124337952858e-05, "loss": 0.066, "step": 16795 }, { "epoch": 0.8528351692979339, "grad_norm": 0.4497564435005188, "learning_rate": 1.4314432204680442e-05, "loss": 0.0801, "step": 16800 }, { "epoch": 0.8530889892887964, "grad_norm": 0.5693073272705078, "learning_rate": 1.4312740071408024e-05, "loss": 0.0772, "step": 16805 }, { "epoch": 0.8533428092796589, "grad_norm": 0.7299516201019287, "learning_rate": 1.4311047938135609e-05, "loss": 0.0772, "step": 16810 }, { "epoch": 0.8535966292705214, "grad_norm": 0.370604544878006, "learning_rate": 1.4309355804863193e-05, "loss": 0.0696, "step": 16815 }, { "epoch": 0.8538504492613839, "grad_norm": 0.33534955978393555, "learning_rate": 1.4307663671590774e-05, "loss": 0.0736, "step": 16820 }, { "epoch": 0.8541042692522464, "grad_norm": 0.5034054517745972, "learning_rate": 1.430597153831836e-05, "loss": 0.0769, "step": 16825 }, { "epoch": 0.8543580892431087, "grad_norm": 0.2479061633348465, "learning_rate": 1.4304279405045942e-05, "loss": 0.0654, "step": 16830 }, { "epoch": 0.8546119092339712, "grad_norm": 0.5529633164405823, "learning_rate": 1.4302587271773527e-05, "loss": 0.069, "step": 16835 }, { "epoch": 0.8548657292248337, "grad_norm": 0.4025406837463379, "learning_rate": 1.430089513850111e-05, "loss": 0.0684, "step": 16840 }, { "epoch": 0.8551195492156962, "grad_norm": 0.3485677242279053, "learning_rate": 1.4299203005228692e-05, "loss": 0.0658, "step": 16845 }, { "epoch": 0.8553733692065587, "grad_norm": 0.46029365062713623, "learning_rate": 1.4297510871956277e-05, "loss": 0.0692, "step": 16850 }, { "epoch": 0.8556271891974212, "grad_norm": 0.47386330366134644, "learning_rate": 1.429581873868386e-05, "loss": 0.0738, "step": 16855 }, { "epoch": 0.8558810091882837, "grad_norm": 0.3624202311038971, "learning_rate": 1.4294126605411443e-05, "loss": 0.0723, "step": 16860 }, { "epoch": 0.8561348291791462, "grad_norm": 1.0836365222930908, "learning_rate": 1.4292434472139028e-05, "loss": 0.0803, "step": 16865 }, { "epoch": 0.8563886491700087, "grad_norm": 0.45594584941864014, "learning_rate": 1.429074233886661e-05, "loss": 0.0678, "step": 16870 }, { "epoch": 0.8566424691608711, "grad_norm": 0.34718087315559387, "learning_rate": 1.4289050205594195e-05, "loss": 0.0613, "step": 16875 }, { "epoch": 0.8568962891517335, "grad_norm": 0.30333346128463745, "learning_rate": 1.4287358072321777e-05, "loss": 0.0635, "step": 16880 }, { "epoch": 0.857150109142596, "grad_norm": 0.44795432686805725, "learning_rate": 1.428566593904936e-05, "loss": 0.0678, "step": 16885 }, { "epoch": 0.8574039291334585, "grad_norm": 0.33551299571990967, "learning_rate": 1.4283973805776946e-05, "loss": 0.0641, "step": 16890 }, { "epoch": 0.857657749124321, "grad_norm": 0.42668506503105164, "learning_rate": 1.4282281672504528e-05, "loss": 0.0738, "step": 16895 }, { "epoch": 0.8579115691151835, "grad_norm": 0.5033948421478271, "learning_rate": 1.4280589539232111e-05, "loss": 0.0791, "step": 16900 }, { "epoch": 0.858165389106046, "grad_norm": 0.5407020449638367, "learning_rate": 1.4278897405959695e-05, "loss": 0.0817, "step": 16905 }, { "epoch": 0.8584192090969085, "grad_norm": 1.9471144676208496, "learning_rate": 1.4277205272687278e-05, "loss": 0.0785, "step": 16910 }, { "epoch": 0.858673029087771, "grad_norm": 0.38863787055015564, "learning_rate": 1.427551313941486e-05, "loss": 0.067, "step": 16915 }, { "epoch": 0.8589268490786335, "grad_norm": 0.5478694438934326, "learning_rate": 1.4273821006142445e-05, "loss": 0.0718, "step": 16920 }, { "epoch": 0.8591806690694959, "grad_norm": 0.5812584757804871, "learning_rate": 1.4272128872870029e-05, "loss": 0.0845, "step": 16925 }, { "epoch": 0.8594344890603584, "grad_norm": 0.4321596324443817, "learning_rate": 1.4270436739597612e-05, "loss": 0.0734, "step": 16930 }, { "epoch": 0.8596883090512208, "grad_norm": 0.512316107749939, "learning_rate": 1.4268744606325196e-05, "loss": 0.065, "step": 16935 }, { "epoch": 0.8599421290420833, "grad_norm": 0.4328327775001526, "learning_rate": 1.4267052473052778e-05, "loss": 0.0685, "step": 16940 }, { "epoch": 0.8601959490329458, "grad_norm": 0.5791327357292175, "learning_rate": 1.4265360339780363e-05, "loss": 0.0775, "step": 16945 }, { "epoch": 0.8604497690238083, "grad_norm": 0.387787401676178, "learning_rate": 1.4263668206507945e-05, "loss": 0.069, "step": 16950 }, { "epoch": 0.8607035890146708, "grad_norm": 0.38102301955223083, "learning_rate": 1.4261976073235528e-05, "loss": 0.067, "step": 16955 }, { "epoch": 0.8609574090055333, "grad_norm": 0.3059747815132141, "learning_rate": 1.4260283939963114e-05, "loss": 0.0582, "step": 16960 }, { "epoch": 0.8612112289963958, "grad_norm": 0.38691335916519165, "learning_rate": 1.4258591806690696e-05, "loss": 0.0761, "step": 16965 }, { "epoch": 0.8614650489872583, "grad_norm": 0.3976000249385834, "learning_rate": 1.4256899673418279e-05, "loss": 0.0695, "step": 16970 }, { "epoch": 0.8617188689781207, "grad_norm": 0.4070408344268799, "learning_rate": 1.4255207540145863e-05, "loss": 0.0702, "step": 16975 }, { "epoch": 0.8619726889689832, "grad_norm": 0.6455148458480835, "learning_rate": 1.4253515406873446e-05, "loss": 0.0797, "step": 16980 }, { "epoch": 0.8622265089598457, "grad_norm": 0.36329150199890137, "learning_rate": 1.4251823273601031e-05, "loss": 0.0669, "step": 16985 }, { "epoch": 0.8624803289507081, "grad_norm": 0.36581847071647644, "learning_rate": 1.4250131140328613e-05, "loss": 0.0639, "step": 16990 }, { "epoch": 0.8627341489415706, "grad_norm": 0.2543756067752838, "learning_rate": 1.4248439007056197e-05, "loss": 0.064, "step": 16995 }, { "epoch": 0.8629879689324331, "grad_norm": 0.7665139436721802, "learning_rate": 1.424674687378378e-05, "loss": 0.072, "step": 17000 }, { "epoch": 0.8632417889232956, "grad_norm": 0.2957867383956909, "learning_rate": 1.4245054740511364e-05, "loss": 0.0718, "step": 17005 }, { "epoch": 0.8634956089141581, "grad_norm": 0.5713722705841064, "learning_rate": 1.4243362607238946e-05, "loss": 0.0748, "step": 17010 }, { "epoch": 0.8637494289050206, "grad_norm": 0.43129876255989075, "learning_rate": 1.4241670473966531e-05, "loss": 0.085, "step": 17015 }, { "epoch": 0.8640032488958831, "grad_norm": 0.6972838640213013, "learning_rate": 1.4239978340694115e-05, "loss": 0.0695, "step": 17020 }, { "epoch": 0.8642570688867455, "grad_norm": 0.8373203873634338, "learning_rate": 1.4238286207421698e-05, "loss": 0.0758, "step": 17025 }, { "epoch": 0.864510888877608, "grad_norm": 0.2909460961818695, "learning_rate": 1.4236594074149282e-05, "loss": 0.0682, "step": 17030 }, { "epoch": 0.8647647088684705, "grad_norm": 0.30259981751441956, "learning_rate": 1.4234901940876863e-05, "loss": 0.0677, "step": 17035 }, { "epoch": 0.865018528859333, "grad_norm": 0.36737337708473206, "learning_rate": 1.4233209807604449e-05, "loss": 0.0763, "step": 17040 }, { "epoch": 0.8652723488501954, "grad_norm": 0.32847535610198975, "learning_rate": 1.4231517674332032e-05, "loss": 0.0706, "step": 17045 }, { "epoch": 0.8655261688410579, "grad_norm": 0.4564855992794037, "learning_rate": 1.4229825541059614e-05, "loss": 0.0808, "step": 17050 }, { "epoch": 0.8657799888319204, "grad_norm": 0.4900711178779602, "learning_rate": 1.42281334077872e-05, "loss": 0.0595, "step": 17055 }, { "epoch": 0.8660338088227829, "grad_norm": 0.6517011523246765, "learning_rate": 1.4226441274514781e-05, "loss": 0.0665, "step": 17060 }, { "epoch": 0.8662876288136454, "grad_norm": 0.3914799094200134, "learning_rate": 1.4224749141242365e-05, "loss": 0.0696, "step": 17065 }, { "epoch": 0.8665414488045079, "grad_norm": 0.42431968450546265, "learning_rate": 1.422305700796995e-05, "loss": 0.0654, "step": 17070 }, { "epoch": 0.8667952687953703, "grad_norm": 0.34742167592048645, "learning_rate": 1.4221364874697532e-05, "loss": 0.0769, "step": 17075 }, { "epoch": 0.8670490887862328, "grad_norm": 0.8822436928749084, "learning_rate": 1.4219672741425117e-05, "loss": 0.0787, "step": 17080 }, { "epoch": 0.8673029087770953, "grad_norm": 0.6568184494972229, "learning_rate": 1.4217980608152699e-05, "loss": 0.0593, "step": 17085 }, { "epoch": 0.8675567287679578, "grad_norm": 0.53938227891922, "learning_rate": 1.4216288474880282e-05, "loss": 0.083, "step": 17090 }, { "epoch": 0.8678105487588202, "grad_norm": 0.4471435248851776, "learning_rate": 1.4214596341607868e-05, "loss": 0.0744, "step": 17095 }, { "epoch": 0.8680643687496827, "grad_norm": 0.3798215389251709, "learning_rate": 1.421290420833545e-05, "loss": 0.0754, "step": 17100 }, { "epoch": 0.8683181887405452, "grad_norm": 0.5007672309875488, "learning_rate": 1.4211212075063033e-05, "loss": 0.071, "step": 17105 }, { "epoch": 0.8685720087314077, "grad_norm": 0.4671100676059723, "learning_rate": 1.4209519941790617e-05, "loss": 0.0858, "step": 17110 }, { "epoch": 0.8688258287222702, "grad_norm": 0.4374212920665741, "learning_rate": 1.42078278085182e-05, "loss": 0.0684, "step": 17115 }, { "epoch": 0.8690796487131326, "grad_norm": 0.46207401156425476, "learning_rate": 1.4206135675245785e-05, "loss": 0.0733, "step": 17120 }, { "epoch": 0.8693334687039951, "grad_norm": 0.4559246301651001, "learning_rate": 1.4204443541973367e-05, "loss": 0.0798, "step": 17125 }, { "epoch": 0.8695872886948576, "grad_norm": 0.47378304600715637, "learning_rate": 1.4202751408700949e-05, "loss": 0.0763, "step": 17130 }, { "epoch": 0.8698411086857201, "grad_norm": 0.36055704951286316, "learning_rate": 1.4201059275428534e-05, "loss": 0.0663, "step": 17135 }, { "epoch": 0.8700949286765826, "grad_norm": 0.4687144160270691, "learning_rate": 1.4199367142156118e-05, "loss": 0.072, "step": 17140 }, { "epoch": 0.870348748667445, "grad_norm": 0.565639078617096, "learning_rate": 1.41976750088837e-05, "loss": 0.0769, "step": 17145 }, { "epoch": 0.8706025686583075, "grad_norm": 0.3410448431968689, "learning_rate": 1.4195982875611285e-05, "loss": 0.0585, "step": 17150 }, { "epoch": 0.87085638864917, "grad_norm": 0.42370325326919556, "learning_rate": 1.4194290742338867e-05, "loss": 0.0721, "step": 17155 }, { "epoch": 0.8711102086400325, "grad_norm": 0.6147048473358154, "learning_rate": 1.419259860906645e-05, "loss": 0.0712, "step": 17160 }, { "epoch": 0.871364028630895, "grad_norm": 0.3107832074165344, "learning_rate": 1.4190906475794036e-05, "loss": 0.0717, "step": 17165 }, { "epoch": 0.8716178486217574, "grad_norm": 0.4038301706314087, "learning_rate": 1.4189214342521617e-05, "loss": 0.0792, "step": 17170 }, { "epoch": 0.8718716686126199, "grad_norm": 0.3050367832183838, "learning_rate": 1.4187522209249203e-05, "loss": 0.0693, "step": 17175 }, { "epoch": 0.8721254886034824, "grad_norm": 0.3587893843650818, "learning_rate": 1.4185830075976785e-05, "loss": 0.0657, "step": 17180 }, { "epoch": 0.8723793085943449, "grad_norm": 0.3330988883972168, "learning_rate": 1.4184137942704368e-05, "loss": 0.0652, "step": 17185 }, { "epoch": 0.8726331285852074, "grad_norm": 0.3522055745124817, "learning_rate": 1.4182445809431953e-05, "loss": 0.0708, "step": 17190 }, { "epoch": 0.8728869485760699, "grad_norm": 0.35526609420776367, "learning_rate": 1.4180753676159535e-05, "loss": 0.0776, "step": 17195 }, { "epoch": 0.8731407685669323, "grad_norm": 0.41325056552886963, "learning_rate": 1.4179061542887119e-05, "loss": 0.0582, "step": 17200 }, { "epoch": 0.8733945885577948, "grad_norm": 0.3183586001396179, "learning_rate": 1.4177369409614702e-05, "loss": 0.0649, "step": 17205 }, { "epoch": 0.8736484085486573, "grad_norm": 0.3800111711025238, "learning_rate": 1.4175677276342286e-05, "loss": 0.0656, "step": 17210 }, { "epoch": 0.8739022285395198, "grad_norm": 0.31832781434059143, "learning_rate": 1.4173985143069868e-05, "loss": 0.075, "step": 17215 }, { "epoch": 0.8741560485303822, "grad_norm": 0.3157210350036621, "learning_rate": 1.4172293009797453e-05, "loss": 0.075, "step": 17220 }, { "epoch": 0.8744098685212447, "grad_norm": 0.36594507098197937, "learning_rate": 1.4170600876525036e-05, "loss": 0.0729, "step": 17225 }, { "epoch": 0.8746636885121072, "grad_norm": 0.4369713366031647, "learning_rate": 1.416890874325262e-05, "loss": 0.0576, "step": 17230 }, { "epoch": 0.8749175085029697, "grad_norm": 0.3689056932926178, "learning_rate": 1.4167216609980204e-05, "loss": 0.072, "step": 17235 }, { "epoch": 0.8751713284938322, "grad_norm": 0.2857867181301117, "learning_rate": 1.4165524476707785e-05, "loss": 0.0661, "step": 17240 }, { "epoch": 0.8754251484846947, "grad_norm": 0.4573242962360382, "learning_rate": 1.416383234343537e-05, "loss": 0.0656, "step": 17245 }, { "epoch": 0.8756789684755572, "grad_norm": 0.48289331793785095, "learning_rate": 1.4162140210162954e-05, "loss": 0.0797, "step": 17250 }, { "epoch": 0.8759327884664196, "grad_norm": 0.4658796191215515, "learning_rate": 1.4160448076890536e-05, "loss": 0.0687, "step": 17255 }, { "epoch": 0.8761866084572821, "grad_norm": 0.8445097804069519, "learning_rate": 1.4158755943618121e-05, "loss": 0.0683, "step": 17260 }, { "epoch": 0.8764404284481446, "grad_norm": 0.49470093846321106, "learning_rate": 1.4157063810345703e-05, "loss": 0.0727, "step": 17265 }, { "epoch": 0.876694248439007, "grad_norm": 0.5909218788146973, "learning_rate": 1.4155371677073288e-05, "loss": 0.0631, "step": 17270 }, { "epoch": 0.8769480684298695, "grad_norm": 0.33578768372535706, "learning_rate": 1.4153679543800872e-05, "loss": 0.0719, "step": 17275 }, { "epoch": 0.877201888420732, "grad_norm": 0.3844059109687805, "learning_rate": 1.4151987410528454e-05, "loss": 0.0691, "step": 17280 }, { "epoch": 0.8774557084115945, "grad_norm": 0.7239956259727478, "learning_rate": 1.4150295277256039e-05, "loss": 0.0643, "step": 17285 }, { "epoch": 0.877709528402457, "grad_norm": 0.4169676601886749, "learning_rate": 1.414860314398362e-05, "loss": 0.0677, "step": 17290 }, { "epoch": 0.8779633483933195, "grad_norm": 0.5026949644088745, "learning_rate": 1.4146911010711204e-05, "loss": 0.0826, "step": 17295 }, { "epoch": 0.878217168384182, "grad_norm": 0.353205144405365, "learning_rate": 1.414521887743879e-05, "loss": 0.0703, "step": 17300 }, { "epoch": 0.8784709883750444, "grad_norm": 0.5128482580184937, "learning_rate": 1.4143526744166371e-05, "loss": 0.0733, "step": 17305 }, { "epoch": 0.8787248083659069, "grad_norm": 0.43205496668815613, "learning_rate": 1.4141834610893953e-05, "loss": 0.0741, "step": 17310 }, { "epoch": 0.8789786283567694, "grad_norm": 0.36526092886924744, "learning_rate": 1.4140142477621539e-05, "loss": 0.0685, "step": 17315 }, { "epoch": 0.8792324483476318, "grad_norm": 1.4037833213806152, "learning_rate": 1.4138450344349122e-05, "loss": 0.0829, "step": 17320 }, { "epoch": 0.8794862683384943, "grad_norm": 0.5729219317436218, "learning_rate": 1.4136758211076707e-05, "loss": 0.0733, "step": 17325 }, { "epoch": 0.8797400883293568, "grad_norm": 0.421556681394577, "learning_rate": 1.413506607780429e-05, "loss": 0.0655, "step": 17330 }, { "epoch": 0.8799939083202193, "grad_norm": 0.46088892221450806, "learning_rate": 1.4133373944531871e-05, "loss": 0.069, "step": 17335 }, { "epoch": 0.8802477283110818, "grad_norm": 0.29208675026893616, "learning_rate": 1.4131681811259456e-05, "loss": 0.0572, "step": 17340 }, { "epoch": 0.8805015483019443, "grad_norm": 0.5103023648262024, "learning_rate": 1.412998967798704e-05, "loss": 0.0623, "step": 17345 }, { "epoch": 0.8807553682928068, "grad_norm": 0.4015849232673645, "learning_rate": 1.4128297544714622e-05, "loss": 0.0709, "step": 17350 }, { "epoch": 0.8810091882836693, "grad_norm": 0.2928534150123596, "learning_rate": 1.4126605411442207e-05, "loss": 0.0688, "step": 17355 }, { "epoch": 0.8812630082745317, "grad_norm": 0.2520070970058441, "learning_rate": 1.4124913278169789e-05, "loss": 0.0692, "step": 17360 }, { "epoch": 0.8815168282653942, "grad_norm": 0.736379086971283, "learning_rate": 1.4123221144897372e-05, "loss": 0.077, "step": 17365 }, { "epoch": 0.8817706482562566, "grad_norm": 0.3595697283744812, "learning_rate": 1.4121529011624958e-05, "loss": 0.0589, "step": 17370 }, { "epoch": 0.8820244682471191, "grad_norm": 0.3573293685913086, "learning_rate": 1.411983687835254e-05, "loss": 0.071, "step": 17375 }, { "epoch": 0.8822782882379816, "grad_norm": 0.5311951637268066, "learning_rate": 1.4118144745080125e-05, "loss": 0.0707, "step": 17380 }, { "epoch": 0.8825321082288441, "grad_norm": 0.4468015432357788, "learning_rate": 1.4116452611807706e-05, "loss": 0.0693, "step": 17385 }, { "epoch": 0.8827859282197066, "grad_norm": 0.5135281085968018, "learning_rate": 1.411476047853529e-05, "loss": 0.0648, "step": 17390 }, { "epoch": 0.8830397482105691, "grad_norm": 0.36561042070388794, "learning_rate": 1.4113068345262875e-05, "loss": 0.0621, "step": 17395 }, { "epoch": 0.8832935682014316, "grad_norm": 0.6639304757118225, "learning_rate": 1.4111376211990457e-05, "loss": 0.065, "step": 17400 }, { "epoch": 0.8835473881922941, "grad_norm": 0.4147944450378418, "learning_rate": 1.410968407871804e-05, "loss": 0.0744, "step": 17405 }, { "epoch": 0.8838012081831566, "grad_norm": 0.4257824420928955, "learning_rate": 1.4107991945445624e-05, "loss": 0.0732, "step": 17410 }, { "epoch": 0.8840550281740189, "grad_norm": 0.30751651525497437, "learning_rate": 1.4106299812173208e-05, "loss": 0.0694, "step": 17415 }, { "epoch": 0.8843088481648814, "grad_norm": 0.3239120543003082, "learning_rate": 1.4104607678900793e-05, "loss": 0.0672, "step": 17420 }, { "epoch": 0.8845626681557439, "grad_norm": 0.4700276553630829, "learning_rate": 1.4102915545628375e-05, "loss": 0.0714, "step": 17425 }, { "epoch": 0.8848164881466064, "grad_norm": 0.3352143466472626, "learning_rate": 1.4101223412355958e-05, "loss": 0.0731, "step": 17430 }, { "epoch": 0.8850703081374689, "grad_norm": 0.34849241375923157, "learning_rate": 1.4099531279083542e-05, "loss": 0.067, "step": 17435 }, { "epoch": 0.8853241281283314, "grad_norm": 0.48389196395874023, "learning_rate": 1.4097839145811125e-05, "loss": 0.0659, "step": 17440 }, { "epoch": 0.8855779481191939, "grad_norm": 0.4253050684928894, "learning_rate": 1.4096147012538707e-05, "loss": 0.071, "step": 17445 }, { "epoch": 0.8858317681100564, "grad_norm": 0.34934577345848083, "learning_rate": 1.4094454879266293e-05, "loss": 0.068, "step": 17450 }, { "epoch": 0.8860855881009189, "grad_norm": 0.3193303644657135, "learning_rate": 1.4092762745993876e-05, "loss": 0.065, "step": 17455 }, { "epoch": 0.8863394080917814, "grad_norm": 0.5447885990142822, "learning_rate": 1.4091070612721458e-05, "loss": 0.0703, "step": 17460 }, { "epoch": 0.8865932280826437, "grad_norm": 0.4106573462486267, "learning_rate": 1.4089378479449043e-05, "loss": 0.0764, "step": 17465 }, { "epoch": 0.8868470480735062, "grad_norm": 0.35047876834869385, "learning_rate": 1.4087686346176625e-05, "loss": 0.0725, "step": 17470 }, { "epoch": 0.8871008680643687, "grad_norm": 0.5358198881149292, "learning_rate": 1.408599421290421e-05, "loss": 0.0748, "step": 17475 }, { "epoch": 0.8873546880552312, "grad_norm": 0.5286594033241272, "learning_rate": 1.4084302079631794e-05, "loss": 0.0695, "step": 17480 }, { "epoch": 0.8876085080460937, "grad_norm": 0.7509157657623291, "learning_rate": 1.4082609946359376e-05, "loss": 0.0677, "step": 17485 }, { "epoch": 0.8878623280369562, "grad_norm": 0.44565531611442566, "learning_rate": 1.4080917813086961e-05, "loss": 0.0675, "step": 17490 }, { "epoch": 0.8881161480278187, "grad_norm": 0.403942734003067, "learning_rate": 1.4079225679814543e-05, "loss": 0.0695, "step": 17495 }, { "epoch": 0.8883699680186812, "grad_norm": 0.653939425945282, "learning_rate": 1.4077533546542126e-05, "loss": 0.0751, "step": 17500 }, { "epoch": 0.8886237880095437, "grad_norm": 0.3768764138221741, "learning_rate": 1.4075841413269712e-05, "loss": 0.0654, "step": 17505 }, { "epoch": 0.8888776080004062, "grad_norm": 0.33701881766319275, "learning_rate": 1.4074149279997293e-05, "loss": 0.0663, "step": 17510 }, { "epoch": 0.8891314279912685, "grad_norm": 0.3610353171825409, "learning_rate": 1.4072457146724879e-05, "loss": 0.0632, "step": 17515 }, { "epoch": 0.889385247982131, "grad_norm": 0.5949681997299194, "learning_rate": 1.407076501345246e-05, "loss": 0.0694, "step": 17520 }, { "epoch": 0.8896390679729935, "grad_norm": 0.34294983744621277, "learning_rate": 1.4069072880180044e-05, "loss": 0.062, "step": 17525 }, { "epoch": 0.889892887963856, "grad_norm": 0.3811400830745697, "learning_rate": 1.406738074690763e-05, "loss": 0.061, "step": 17530 }, { "epoch": 0.8901467079547185, "grad_norm": 0.3286953270435333, "learning_rate": 1.4065688613635211e-05, "loss": 0.0635, "step": 17535 }, { "epoch": 0.890400527945581, "grad_norm": 0.4377914071083069, "learning_rate": 1.4063996480362793e-05, "loss": 0.0699, "step": 17540 }, { "epoch": 0.8906543479364435, "grad_norm": 0.4538041949272156, "learning_rate": 1.4062304347090378e-05, "loss": 0.0776, "step": 17545 }, { "epoch": 0.890908167927306, "grad_norm": 0.3821781277656555, "learning_rate": 1.4060612213817962e-05, "loss": 0.073, "step": 17550 }, { "epoch": 0.8911619879181685, "grad_norm": 0.5155601501464844, "learning_rate": 1.4058920080545544e-05, "loss": 0.0629, "step": 17555 }, { "epoch": 0.891415807909031, "grad_norm": 0.452945739030838, "learning_rate": 1.4057227947273129e-05, "loss": 0.0632, "step": 17560 }, { "epoch": 0.8916696278998933, "grad_norm": 0.48677828907966614, "learning_rate": 1.405553581400071e-05, "loss": 0.0751, "step": 17565 }, { "epoch": 0.8919234478907558, "grad_norm": 0.5773299932479858, "learning_rate": 1.4053843680728296e-05, "loss": 0.0657, "step": 17570 }, { "epoch": 0.8921772678816183, "grad_norm": 0.47708895802497864, "learning_rate": 1.405215154745588e-05, "loss": 0.0694, "step": 17575 }, { "epoch": 0.8924310878724808, "grad_norm": 0.3603571951389313, "learning_rate": 1.4050459414183461e-05, "loss": 0.07, "step": 17580 }, { "epoch": 0.8926849078633433, "grad_norm": 0.40807899832725525, "learning_rate": 1.4048767280911047e-05, "loss": 0.0733, "step": 17585 }, { "epoch": 0.8929387278542058, "grad_norm": 0.3264356255531311, "learning_rate": 1.4047075147638628e-05, "loss": 0.0724, "step": 17590 }, { "epoch": 0.8931925478450683, "grad_norm": 0.4679695665836334, "learning_rate": 1.4045383014366212e-05, "loss": 0.0772, "step": 17595 }, { "epoch": 0.8934463678359308, "grad_norm": 0.3341139853000641, "learning_rate": 1.4043690881093797e-05, "loss": 0.0745, "step": 17600 }, { "epoch": 0.8937001878267933, "grad_norm": 0.3257398307323456, "learning_rate": 1.4041998747821379e-05, "loss": 0.0736, "step": 17605 }, { "epoch": 0.8939540078176558, "grad_norm": 0.34771645069122314, "learning_rate": 1.4040306614548963e-05, "loss": 0.0663, "step": 17610 }, { "epoch": 0.8942078278085182, "grad_norm": 0.2789120674133301, "learning_rate": 1.4038614481276546e-05, "loss": 0.0727, "step": 17615 }, { "epoch": 0.8944616477993806, "grad_norm": 0.3300907611846924, "learning_rate": 1.403692234800413e-05, "loss": 0.0609, "step": 17620 }, { "epoch": 0.8947154677902431, "grad_norm": 0.4132726490497589, "learning_rate": 1.4035230214731715e-05, "loss": 0.0677, "step": 17625 }, { "epoch": 0.8949692877811056, "grad_norm": 0.47992974519729614, "learning_rate": 1.4033538081459297e-05, "loss": 0.0826, "step": 17630 }, { "epoch": 0.8952231077719681, "grad_norm": 0.3706139028072357, "learning_rate": 1.403184594818688e-05, "loss": 0.0766, "step": 17635 }, { "epoch": 0.8954769277628306, "grad_norm": 0.4228927791118622, "learning_rate": 1.4030153814914464e-05, "loss": 0.0827, "step": 17640 }, { "epoch": 0.8957307477536931, "grad_norm": 0.5431283116340637, "learning_rate": 1.4028461681642047e-05, "loss": 0.0683, "step": 17645 }, { "epoch": 0.8959845677445556, "grad_norm": 0.33838915824890137, "learning_rate": 1.402676954836963e-05, "loss": 0.058, "step": 17650 }, { "epoch": 0.8962383877354181, "grad_norm": 0.3902619481086731, "learning_rate": 1.4025077415097215e-05, "loss": 0.078, "step": 17655 }, { "epoch": 0.8964922077262806, "grad_norm": 0.38549157977104187, "learning_rate": 1.4023385281824798e-05, "loss": 0.0741, "step": 17660 }, { "epoch": 0.896746027717143, "grad_norm": 0.4042127728462219, "learning_rate": 1.4021693148552382e-05, "loss": 0.0687, "step": 17665 }, { "epoch": 0.8969998477080054, "grad_norm": 0.452392041683197, "learning_rate": 1.4020001015279965e-05, "loss": 0.0739, "step": 17670 }, { "epoch": 0.8972536676988679, "grad_norm": 0.45912966132164, "learning_rate": 1.4018308882007547e-05, "loss": 0.0725, "step": 17675 }, { "epoch": 0.8975074876897304, "grad_norm": 0.4095827043056488, "learning_rate": 1.4016616748735132e-05, "loss": 0.075, "step": 17680 }, { "epoch": 0.8977613076805929, "grad_norm": 0.3190585672855377, "learning_rate": 1.4014924615462716e-05, "loss": 0.0717, "step": 17685 }, { "epoch": 0.8980151276714554, "grad_norm": 0.6912304759025574, "learning_rate": 1.4013232482190298e-05, "loss": 0.0708, "step": 17690 }, { "epoch": 0.8982689476623179, "grad_norm": 0.7174093723297119, "learning_rate": 1.4011540348917883e-05, "loss": 0.069, "step": 17695 }, { "epoch": 0.8985227676531804, "grad_norm": 0.30754783749580383, "learning_rate": 1.4009848215645465e-05, "loss": 0.0618, "step": 17700 }, { "epoch": 0.8987765876440429, "grad_norm": 0.7452945113182068, "learning_rate": 1.4008156082373048e-05, "loss": 0.0766, "step": 17705 }, { "epoch": 0.8990304076349053, "grad_norm": 0.3052619695663452, "learning_rate": 1.4006463949100633e-05, "loss": 0.0701, "step": 17710 }, { "epoch": 0.8992842276257678, "grad_norm": 0.6337426900863647, "learning_rate": 1.4004771815828215e-05, "loss": 0.0775, "step": 17715 }, { "epoch": 0.8995380476166303, "grad_norm": 0.35096266865730286, "learning_rate": 1.40030796825558e-05, "loss": 0.0629, "step": 17720 }, { "epoch": 0.8997918676074927, "grad_norm": 0.40677130222320557, "learning_rate": 1.4001387549283382e-05, "loss": 0.0777, "step": 17725 }, { "epoch": 0.9000456875983552, "grad_norm": 0.4063386023044586, "learning_rate": 1.3999695416010966e-05, "loss": 0.0565, "step": 17730 }, { "epoch": 0.9002995075892177, "grad_norm": 0.30389711260795593, "learning_rate": 1.399800328273855e-05, "loss": 0.0671, "step": 17735 }, { "epoch": 0.9005533275800802, "grad_norm": 0.26976916193962097, "learning_rate": 1.3996311149466133e-05, "loss": 0.0717, "step": 17740 }, { "epoch": 0.9008071475709427, "grad_norm": 0.4910055994987488, "learning_rate": 1.3994619016193715e-05, "loss": 0.065, "step": 17745 }, { "epoch": 0.9010609675618052, "grad_norm": 0.3902299702167511, "learning_rate": 1.39929268829213e-05, "loss": 0.0636, "step": 17750 }, { "epoch": 0.9013147875526677, "grad_norm": 0.9779106378555298, "learning_rate": 1.3991234749648884e-05, "loss": 0.0642, "step": 17755 }, { "epoch": 0.9015686075435301, "grad_norm": 0.48715925216674805, "learning_rate": 1.3989542616376467e-05, "loss": 0.0749, "step": 17760 }, { "epoch": 0.9018224275343926, "grad_norm": 0.9629639387130737, "learning_rate": 1.398785048310405e-05, "loss": 0.0692, "step": 17765 }, { "epoch": 0.9020762475252551, "grad_norm": 0.39406028389930725, "learning_rate": 1.3986158349831633e-05, "loss": 0.0673, "step": 17770 }, { "epoch": 0.9023300675161176, "grad_norm": 0.3680139482021332, "learning_rate": 1.3984466216559218e-05, "loss": 0.0706, "step": 17775 }, { "epoch": 0.90258388750698, "grad_norm": 0.3493126928806305, "learning_rate": 1.3982774083286801e-05, "loss": 0.0711, "step": 17780 }, { "epoch": 0.9028377074978425, "grad_norm": 0.35266679525375366, "learning_rate": 1.3981081950014383e-05, "loss": 0.058, "step": 17785 }, { "epoch": 0.903091527488705, "grad_norm": 0.5064133405685425, "learning_rate": 1.3979389816741969e-05, "loss": 0.09, "step": 17790 }, { "epoch": 0.9033453474795675, "grad_norm": 0.6205065250396729, "learning_rate": 1.397769768346955e-05, "loss": 0.0656, "step": 17795 }, { "epoch": 0.90359916747043, "grad_norm": 0.36125099658966064, "learning_rate": 1.3976005550197134e-05, "loss": 0.0662, "step": 17800 }, { "epoch": 0.9038529874612925, "grad_norm": 0.369780570268631, "learning_rate": 1.3974313416924719e-05, "loss": 0.0625, "step": 17805 }, { "epoch": 0.9041068074521549, "grad_norm": 0.3109322786331177, "learning_rate": 1.3972621283652301e-05, "loss": 0.0709, "step": 17810 }, { "epoch": 0.9043606274430174, "grad_norm": 0.44117799401283264, "learning_rate": 1.3970929150379886e-05, "loss": 0.0566, "step": 17815 }, { "epoch": 0.9046144474338799, "grad_norm": 0.41141143441200256, "learning_rate": 1.3969237017107468e-05, "loss": 0.066, "step": 17820 }, { "epoch": 0.9048682674247424, "grad_norm": 0.38892972469329834, "learning_rate": 1.3967544883835052e-05, "loss": 0.0785, "step": 17825 }, { "epoch": 0.9051220874156048, "grad_norm": 0.42113691568374634, "learning_rate": 1.3965852750562637e-05, "loss": 0.0747, "step": 17830 }, { "epoch": 0.9053759074064673, "grad_norm": 0.33473747968673706, "learning_rate": 1.3964160617290219e-05, "loss": 0.0732, "step": 17835 }, { "epoch": 0.9056297273973298, "grad_norm": 0.4112972915172577, "learning_rate": 1.3962468484017802e-05, "loss": 0.0689, "step": 17840 }, { "epoch": 0.9058835473881923, "grad_norm": 0.31149786710739136, "learning_rate": 1.3960776350745386e-05, "loss": 0.0665, "step": 17845 }, { "epoch": 0.9061373673790548, "grad_norm": 0.4009893238544464, "learning_rate": 1.395908421747297e-05, "loss": 0.0708, "step": 17850 }, { "epoch": 0.9063911873699173, "grad_norm": 0.3744799792766571, "learning_rate": 1.3957392084200551e-05, "loss": 0.0738, "step": 17855 }, { "epoch": 0.9066450073607797, "grad_norm": 0.3893011212348938, "learning_rate": 1.3955699950928136e-05, "loss": 0.0616, "step": 17860 }, { "epoch": 0.9068988273516422, "grad_norm": 0.35065850615501404, "learning_rate": 1.395400781765572e-05, "loss": 0.0651, "step": 17865 }, { "epoch": 0.9071526473425047, "grad_norm": 0.5612497925758362, "learning_rate": 1.3952315684383304e-05, "loss": 0.0835, "step": 17870 }, { "epoch": 0.9074064673333672, "grad_norm": 0.39559072256088257, "learning_rate": 1.3950623551110887e-05, "loss": 0.0682, "step": 17875 }, { "epoch": 0.9076602873242297, "grad_norm": 0.35949650406837463, "learning_rate": 1.3948931417838469e-05, "loss": 0.0785, "step": 17880 }, { "epoch": 0.9079141073150921, "grad_norm": 0.5224626064300537, "learning_rate": 1.3947239284566054e-05, "loss": 0.085, "step": 17885 }, { "epoch": 0.9081679273059546, "grad_norm": 0.3599477708339691, "learning_rate": 1.3945547151293638e-05, "loss": 0.0684, "step": 17890 }, { "epoch": 0.9084217472968171, "grad_norm": 0.6261371970176697, "learning_rate": 1.394385501802122e-05, "loss": 0.068, "step": 17895 }, { "epoch": 0.9086755672876796, "grad_norm": 0.3289092183113098, "learning_rate": 1.3942162884748805e-05, "loss": 0.0606, "step": 17900 }, { "epoch": 0.9089293872785421, "grad_norm": 0.39295879006385803, "learning_rate": 1.3940470751476387e-05, "loss": 0.0604, "step": 17905 }, { "epoch": 0.9091832072694045, "grad_norm": 0.40853872895240784, "learning_rate": 1.3938778618203972e-05, "loss": 0.0763, "step": 17910 }, { "epoch": 0.909437027260267, "grad_norm": 0.587341845035553, "learning_rate": 1.3937086484931554e-05, "loss": 0.0724, "step": 17915 }, { "epoch": 0.9096908472511295, "grad_norm": 0.5507720708847046, "learning_rate": 1.3935394351659137e-05, "loss": 0.0695, "step": 17920 }, { "epoch": 0.909944667241992, "grad_norm": 0.31212714314460754, "learning_rate": 1.3933702218386723e-05, "loss": 0.0757, "step": 17925 }, { "epoch": 0.9101984872328545, "grad_norm": 0.31327691674232483, "learning_rate": 1.3932010085114304e-05, "loss": 0.0721, "step": 17930 }, { "epoch": 0.910452307223717, "grad_norm": 0.3152126669883728, "learning_rate": 1.3930317951841888e-05, "loss": 0.0568, "step": 17935 }, { "epoch": 0.9107061272145794, "grad_norm": 0.4510830342769623, "learning_rate": 1.3928625818569471e-05, "loss": 0.0672, "step": 17940 }, { "epoch": 0.9109599472054419, "grad_norm": 0.3216569721698761, "learning_rate": 1.3926933685297055e-05, "loss": 0.0676, "step": 17945 }, { "epoch": 0.9112137671963044, "grad_norm": 0.6665675044059753, "learning_rate": 1.3925241552024637e-05, "loss": 0.0646, "step": 17950 }, { "epoch": 0.9114675871871669, "grad_norm": 0.37302878499031067, "learning_rate": 1.3923549418752222e-05, "loss": 0.0681, "step": 17955 }, { "epoch": 0.9117214071780293, "grad_norm": 0.30719074606895447, "learning_rate": 1.3921857285479806e-05, "loss": 0.0694, "step": 17960 }, { "epoch": 0.9119752271688918, "grad_norm": 0.38624605536460876, "learning_rate": 1.392016515220739e-05, "loss": 0.0725, "step": 17965 }, { "epoch": 0.9122290471597543, "grad_norm": 0.299686461687088, "learning_rate": 1.3918473018934973e-05, "loss": 0.0604, "step": 17970 }, { "epoch": 0.9124828671506168, "grad_norm": 0.38363954424858093, "learning_rate": 1.3916780885662555e-05, "loss": 0.0683, "step": 17975 }, { "epoch": 0.9127366871414793, "grad_norm": 0.3559637665748596, "learning_rate": 1.391508875239014e-05, "loss": 0.0681, "step": 17980 }, { "epoch": 0.9129905071323418, "grad_norm": 0.41254445910453796, "learning_rate": 1.3913396619117723e-05, "loss": 0.0647, "step": 17985 }, { "epoch": 0.9132443271232042, "grad_norm": 0.3159063756465912, "learning_rate": 1.3911704485845305e-05, "loss": 0.0687, "step": 17990 }, { "epoch": 0.9134981471140667, "grad_norm": 0.5465823411941528, "learning_rate": 1.391001235257289e-05, "loss": 0.0702, "step": 17995 }, { "epoch": 0.9137519671049292, "grad_norm": 0.37820661067962646, "learning_rate": 1.3908320219300472e-05, "loss": 0.0766, "step": 18000 }, { "epoch": 0.9140057870957916, "grad_norm": 0.35360532999038696, "learning_rate": 1.3906628086028058e-05, "loss": 0.0726, "step": 18005 }, { "epoch": 0.9142596070866541, "grad_norm": 0.3606135845184326, "learning_rate": 1.3904935952755641e-05, "loss": 0.0647, "step": 18010 }, { "epoch": 0.9145134270775166, "grad_norm": 0.3642440438270569, "learning_rate": 1.3903243819483223e-05, "loss": 0.0769, "step": 18015 }, { "epoch": 0.9147672470683791, "grad_norm": 0.49041110277175903, "learning_rate": 1.3901551686210808e-05, "loss": 0.0738, "step": 18020 }, { "epoch": 0.9150210670592416, "grad_norm": 0.38325151801109314, "learning_rate": 1.389985955293839e-05, "loss": 0.062, "step": 18025 }, { "epoch": 0.9152748870501041, "grad_norm": 0.4725360870361328, "learning_rate": 1.3898167419665974e-05, "loss": 0.0713, "step": 18030 }, { "epoch": 0.9155287070409666, "grad_norm": 0.7474817037582397, "learning_rate": 1.3896475286393559e-05, "loss": 0.0604, "step": 18035 }, { "epoch": 0.915782527031829, "grad_norm": 0.3981219530105591, "learning_rate": 1.389478315312114e-05, "loss": 0.0713, "step": 18040 }, { "epoch": 0.9160363470226915, "grad_norm": 0.32454854249954224, "learning_rate": 1.3893091019848724e-05, "loss": 0.0672, "step": 18045 }, { "epoch": 0.916290167013554, "grad_norm": 0.3997170329093933, "learning_rate": 1.3891398886576308e-05, "loss": 0.0828, "step": 18050 }, { "epoch": 0.9165439870044164, "grad_norm": 0.28583142161369324, "learning_rate": 1.3889706753303891e-05, "loss": 0.0685, "step": 18055 }, { "epoch": 0.9167978069952789, "grad_norm": 0.48778054118156433, "learning_rate": 1.3888014620031477e-05, "loss": 0.0628, "step": 18060 }, { "epoch": 0.9170516269861414, "grad_norm": 0.38761603832244873, "learning_rate": 1.3886322486759058e-05, "loss": 0.0718, "step": 18065 }, { "epoch": 0.9173054469770039, "grad_norm": 0.5895673632621765, "learning_rate": 1.3884630353486642e-05, "loss": 0.0774, "step": 18070 }, { "epoch": 0.9175592669678664, "grad_norm": 0.30503496527671814, "learning_rate": 1.3882938220214225e-05, "loss": 0.0607, "step": 18075 }, { "epoch": 0.9178130869587289, "grad_norm": 0.4478660225868225, "learning_rate": 1.3881246086941809e-05, "loss": 0.0708, "step": 18080 }, { "epoch": 0.9180669069495914, "grad_norm": 0.509867787361145, "learning_rate": 1.3879553953669391e-05, "loss": 0.0738, "step": 18085 }, { "epoch": 0.9183207269404539, "grad_norm": 1.060240387916565, "learning_rate": 1.3877861820396976e-05, "loss": 0.0762, "step": 18090 }, { "epoch": 0.9185745469313163, "grad_norm": 0.31515824794769287, "learning_rate": 1.387616968712456e-05, "loss": 0.0693, "step": 18095 }, { "epoch": 0.9188283669221788, "grad_norm": 0.32389432191848755, "learning_rate": 1.3874477553852141e-05, "loss": 0.0649, "step": 18100 }, { "epoch": 0.9190821869130412, "grad_norm": 0.4039955139160156, "learning_rate": 1.3872785420579727e-05, "loss": 0.0619, "step": 18105 }, { "epoch": 0.9193360069039037, "grad_norm": 0.29883208870887756, "learning_rate": 1.3871093287307309e-05, "loss": 0.0692, "step": 18110 }, { "epoch": 0.9195898268947662, "grad_norm": 0.49272438883781433, "learning_rate": 1.3869401154034894e-05, "loss": 0.0791, "step": 18115 }, { "epoch": 0.9198436468856287, "grad_norm": 0.6225094795227051, "learning_rate": 1.3867709020762476e-05, "loss": 0.0692, "step": 18120 }, { "epoch": 0.9200974668764912, "grad_norm": 0.3257209062576294, "learning_rate": 1.386601688749006e-05, "loss": 0.0773, "step": 18125 }, { "epoch": 0.9203512868673537, "grad_norm": 0.5255199074745178, "learning_rate": 1.3864324754217644e-05, "loss": 0.0753, "step": 18130 }, { "epoch": 0.9206051068582162, "grad_norm": 0.42838171124458313, "learning_rate": 1.3862632620945226e-05, "loss": 0.0654, "step": 18135 }, { "epoch": 0.9208589268490787, "grad_norm": 0.37420716881752014, "learning_rate": 1.386094048767281e-05, "loss": 0.0669, "step": 18140 }, { "epoch": 0.9211127468399412, "grad_norm": 0.4622999429702759, "learning_rate": 1.3859248354400393e-05, "loss": 0.0679, "step": 18145 }, { "epoch": 0.9213665668308036, "grad_norm": 0.4860808849334717, "learning_rate": 1.3857556221127977e-05, "loss": 0.0785, "step": 18150 }, { "epoch": 0.921620386821666, "grad_norm": 0.32169151306152344, "learning_rate": 1.3855864087855562e-05, "loss": 0.0637, "step": 18155 }, { "epoch": 0.9218742068125285, "grad_norm": 0.37703365087509155, "learning_rate": 1.3854171954583144e-05, "loss": 0.065, "step": 18160 }, { "epoch": 0.922128026803391, "grad_norm": 0.3887845277786255, "learning_rate": 1.3852479821310728e-05, "loss": 0.0599, "step": 18165 }, { "epoch": 0.9223818467942535, "grad_norm": 0.35805755853652954, "learning_rate": 1.3850787688038311e-05, "loss": 0.0732, "step": 18170 }, { "epoch": 0.922635666785116, "grad_norm": 0.33947518467903137, "learning_rate": 1.3849095554765895e-05, "loss": 0.0667, "step": 18175 }, { "epoch": 0.9228894867759785, "grad_norm": 0.4389743208885193, "learning_rate": 1.3847403421493477e-05, "loss": 0.0693, "step": 18180 }, { "epoch": 0.923143306766841, "grad_norm": 0.44879239797592163, "learning_rate": 1.3845711288221062e-05, "loss": 0.0618, "step": 18185 }, { "epoch": 0.9233971267577035, "grad_norm": 0.3365269601345062, "learning_rate": 1.3844019154948645e-05, "loss": 0.0759, "step": 18190 }, { "epoch": 0.923650946748566, "grad_norm": 0.24301739037036896, "learning_rate": 1.3842327021676227e-05, "loss": 0.0658, "step": 18195 }, { "epoch": 0.9239047667394285, "grad_norm": 0.324535995721817, "learning_rate": 1.3840634888403812e-05, "loss": 0.0572, "step": 18200 }, { "epoch": 0.9241585867302908, "grad_norm": 0.3125174641609192, "learning_rate": 1.3838942755131394e-05, "loss": 0.0705, "step": 18205 }, { "epoch": 0.9244124067211533, "grad_norm": 0.41187185049057007, "learning_rate": 1.383725062185898e-05, "loss": 0.0654, "step": 18210 }, { "epoch": 0.9246662267120158, "grad_norm": 0.4254433512687683, "learning_rate": 1.3835558488586563e-05, "loss": 0.0832, "step": 18215 }, { "epoch": 0.9249200467028783, "grad_norm": 0.37248459458351135, "learning_rate": 1.3833866355314145e-05, "loss": 0.0635, "step": 18220 }, { "epoch": 0.9251738666937408, "grad_norm": 0.34162911772727966, "learning_rate": 1.383217422204173e-05, "loss": 0.0626, "step": 18225 }, { "epoch": 0.9254276866846033, "grad_norm": 0.4066483676433563, "learning_rate": 1.3830482088769312e-05, "loss": 0.0745, "step": 18230 }, { "epoch": 0.9256815066754658, "grad_norm": 0.600093424320221, "learning_rate": 1.3828789955496896e-05, "loss": 0.0673, "step": 18235 }, { "epoch": 0.9259353266663283, "grad_norm": 0.47302451729774475, "learning_rate": 1.382709782222448e-05, "loss": 0.0653, "step": 18240 }, { "epoch": 0.9261891466571908, "grad_norm": 0.41447651386260986, "learning_rate": 1.3825405688952063e-05, "loss": 0.0709, "step": 18245 }, { "epoch": 0.9264429666480531, "grad_norm": 0.31857216358184814, "learning_rate": 1.3823713555679648e-05, "loss": 0.0702, "step": 18250 }, { "epoch": 0.9266967866389156, "grad_norm": 0.39774346351623535, "learning_rate": 1.382202142240723e-05, "loss": 0.0757, "step": 18255 }, { "epoch": 0.9269506066297781, "grad_norm": 0.3512285053730011, "learning_rate": 1.3820329289134813e-05, "loss": 0.0664, "step": 18260 }, { "epoch": 0.9272044266206406, "grad_norm": 0.42229223251342773, "learning_rate": 1.3818637155862398e-05, "loss": 0.0662, "step": 18265 }, { "epoch": 0.9274582466115031, "grad_norm": 0.4002987742424011, "learning_rate": 1.381694502258998e-05, "loss": 0.0679, "step": 18270 }, { "epoch": 0.9277120666023656, "grad_norm": 0.4806728661060333, "learning_rate": 1.3815252889317564e-05, "loss": 0.0645, "step": 18275 }, { "epoch": 0.9279658865932281, "grad_norm": 0.29817402362823486, "learning_rate": 1.3813560756045147e-05, "loss": 0.0686, "step": 18280 }, { "epoch": 0.9282197065840906, "grad_norm": 0.4930202066898346, "learning_rate": 1.3811868622772731e-05, "loss": 0.0744, "step": 18285 }, { "epoch": 0.9284735265749531, "grad_norm": 0.4029638171195984, "learning_rate": 1.3810176489500313e-05, "loss": 0.0637, "step": 18290 }, { "epoch": 0.9287273465658156, "grad_norm": 0.45114755630493164, "learning_rate": 1.3808484356227898e-05, "loss": 0.0668, "step": 18295 }, { "epoch": 0.928981166556678, "grad_norm": 0.3982153832912445, "learning_rate": 1.380679222295548e-05, "loss": 0.0613, "step": 18300 }, { "epoch": 0.9292349865475404, "grad_norm": 0.3455192446708679, "learning_rate": 1.3805100089683065e-05, "loss": 0.0707, "step": 18305 }, { "epoch": 0.9294888065384029, "grad_norm": 0.3606429398059845, "learning_rate": 1.3803407956410649e-05, "loss": 0.0708, "step": 18310 }, { "epoch": 0.9297426265292654, "grad_norm": 0.45667099952697754, "learning_rate": 1.380171582313823e-05, "loss": 0.0642, "step": 18315 }, { "epoch": 0.9299964465201279, "grad_norm": 0.38719412684440613, "learning_rate": 1.3800023689865816e-05, "loss": 0.0648, "step": 18320 }, { "epoch": 0.9302502665109904, "grad_norm": 0.5505213737487793, "learning_rate": 1.3798331556593398e-05, "loss": 0.0653, "step": 18325 }, { "epoch": 0.9305040865018529, "grad_norm": 0.5225023627281189, "learning_rate": 1.3796639423320981e-05, "loss": 0.0715, "step": 18330 }, { "epoch": 0.9307579064927154, "grad_norm": 0.2744523286819458, "learning_rate": 1.3794947290048566e-05, "loss": 0.0596, "step": 18335 }, { "epoch": 0.9310117264835779, "grad_norm": 0.4220918118953705, "learning_rate": 1.3793255156776148e-05, "loss": 0.0703, "step": 18340 }, { "epoch": 0.9312655464744404, "grad_norm": 0.4151606261730194, "learning_rate": 1.3791563023503732e-05, "loss": 0.0629, "step": 18345 }, { "epoch": 0.9315193664653028, "grad_norm": 0.43558046221733093, "learning_rate": 1.3789870890231315e-05, "loss": 0.0554, "step": 18350 }, { "epoch": 0.9317731864561652, "grad_norm": 0.3388653099536896, "learning_rate": 1.3788178756958899e-05, "loss": 0.0702, "step": 18355 }, { "epoch": 0.9320270064470277, "grad_norm": 0.5925946235656738, "learning_rate": 1.3786486623686484e-05, "loss": 0.0682, "step": 18360 }, { "epoch": 0.9322808264378902, "grad_norm": 1.0079941749572754, "learning_rate": 1.3784794490414066e-05, "loss": 0.0663, "step": 18365 }, { "epoch": 0.9325346464287527, "grad_norm": 0.38128307461738586, "learning_rate": 1.378310235714165e-05, "loss": 0.0671, "step": 18370 }, { "epoch": 0.9327884664196152, "grad_norm": 0.3320787847042084, "learning_rate": 1.3781410223869233e-05, "loss": 0.0654, "step": 18375 }, { "epoch": 0.9330422864104777, "grad_norm": 0.2928427755832672, "learning_rate": 1.3779718090596817e-05, "loss": 0.0761, "step": 18380 }, { "epoch": 0.9332961064013402, "grad_norm": 0.4131205081939697, "learning_rate": 1.3778025957324398e-05, "loss": 0.0728, "step": 18385 }, { "epoch": 0.9335499263922027, "grad_norm": 0.38338300585746765, "learning_rate": 1.3776333824051984e-05, "loss": 0.0645, "step": 18390 }, { "epoch": 0.9338037463830652, "grad_norm": 0.387997031211853, "learning_rate": 1.3774641690779567e-05, "loss": 0.0662, "step": 18395 }, { "epoch": 0.9340575663739276, "grad_norm": 0.384552925825119, "learning_rate": 1.377294955750715e-05, "loss": 0.0722, "step": 18400 }, { "epoch": 0.93431138636479, "grad_norm": 0.36367639899253845, "learning_rate": 1.3771257424234734e-05, "loss": 0.0649, "step": 18405 }, { "epoch": 0.9345652063556525, "grad_norm": 0.7512829303741455, "learning_rate": 1.3769565290962316e-05, "loss": 0.0625, "step": 18410 }, { "epoch": 0.934819026346515, "grad_norm": 0.38222187757492065, "learning_rate": 1.3767873157689901e-05, "loss": 0.072, "step": 18415 }, { "epoch": 0.9350728463373775, "grad_norm": 0.6945295929908752, "learning_rate": 1.3766181024417485e-05, "loss": 0.0635, "step": 18420 }, { "epoch": 0.93532666632824, "grad_norm": 0.4692384600639343, "learning_rate": 1.3764488891145067e-05, "loss": 0.0697, "step": 18425 }, { "epoch": 0.9355804863191025, "grad_norm": 0.3983794152736664, "learning_rate": 1.3762796757872652e-05, "loss": 0.0651, "step": 18430 }, { "epoch": 0.935834306309965, "grad_norm": 0.450428307056427, "learning_rate": 1.3761104624600234e-05, "loss": 0.0644, "step": 18435 }, { "epoch": 0.9360881263008275, "grad_norm": 0.26266250014305115, "learning_rate": 1.3759412491327817e-05, "loss": 0.0562, "step": 18440 }, { "epoch": 0.93634194629169, "grad_norm": 0.4074264168739319, "learning_rate": 1.3757720358055403e-05, "loss": 0.0627, "step": 18445 }, { "epoch": 0.9365957662825524, "grad_norm": 0.41876572370529175, "learning_rate": 1.3756028224782985e-05, "loss": 0.0605, "step": 18450 }, { "epoch": 0.9368495862734149, "grad_norm": 0.38248318433761597, "learning_rate": 1.375433609151057e-05, "loss": 0.0615, "step": 18455 }, { "epoch": 0.9371034062642773, "grad_norm": 0.38324958086013794, "learning_rate": 1.3752643958238152e-05, "loss": 0.0664, "step": 18460 }, { "epoch": 0.9373572262551398, "grad_norm": 0.5330802202224731, "learning_rate": 1.3750951824965735e-05, "loss": 0.0593, "step": 18465 }, { "epoch": 0.9376110462460023, "grad_norm": 1.0437511205673218, "learning_rate": 1.374925969169332e-05, "loss": 0.064, "step": 18470 }, { "epoch": 0.9378648662368648, "grad_norm": 0.9003655910491943, "learning_rate": 1.3747567558420902e-05, "loss": 0.0683, "step": 18475 }, { "epoch": 0.9381186862277273, "grad_norm": 0.32290807366371155, "learning_rate": 1.3745875425148484e-05, "loss": 0.068, "step": 18480 }, { "epoch": 0.9383725062185898, "grad_norm": 0.6567671895027161, "learning_rate": 1.374418329187607e-05, "loss": 0.0661, "step": 18485 }, { "epoch": 0.9386263262094523, "grad_norm": 0.35361889004707336, "learning_rate": 1.3742491158603653e-05, "loss": 0.0689, "step": 18490 }, { "epoch": 0.9388801462003148, "grad_norm": 0.3336738049983978, "learning_rate": 1.3740799025331235e-05, "loss": 0.0594, "step": 18495 }, { "epoch": 0.9391339661911772, "grad_norm": 0.3773046135902405, "learning_rate": 1.373910689205882e-05, "loss": 0.0704, "step": 18500 }, { "epoch": 0.9393877861820397, "grad_norm": 0.3520534038543701, "learning_rate": 1.3737414758786402e-05, "loss": 0.0704, "step": 18505 }, { "epoch": 0.9396416061729022, "grad_norm": 0.3274901807308197, "learning_rate": 1.3735722625513987e-05, "loss": 0.071, "step": 18510 }, { "epoch": 0.9398954261637646, "grad_norm": 0.25946927070617676, "learning_rate": 1.373403049224157e-05, "loss": 0.0698, "step": 18515 }, { "epoch": 0.9401492461546271, "grad_norm": 0.5171555876731873, "learning_rate": 1.3732338358969152e-05, "loss": 0.0553, "step": 18520 }, { "epoch": 0.9404030661454896, "grad_norm": 0.32266131043434143, "learning_rate": 1.3730646225696738e-05, "loss": 0.0538, "step": 18525 }, { "epoch": 0.9406568861363521, "grad_norm": 0.43915310502052307, "learning_rate": 1.372895409242432e-05, "loss": 0.0583, "step": 18530 }, { "epoch": 0.9409107061272146, "grad_norm": 0.5458141565322876, "learning_rate": 1.3727261959151903e-05, "loss": 0.0679, "step": 18535 }, { "epoch": 0.9411645261180771, "grad_norm": 0.33626362681388855, "learning_rate": 1.3725569825879488e-05, "loss": 0.0617, "step": 18540 }, { "epoch": 0.9414183461089395, "grad_norm": 0.3278324007987976, "learning_rate": 1.372387769260707e-05, "loss": 0.0599, "step": 18545 }, { "epoch": 0.941672166099802, "grad_norm": 0.3287067115306854, "learning_rate": 1.3722185559334655e-05, "loss": 0.0635, "step": 18550 }, { "epoch": 0.9419259860906645, "grad_norm": 0.3660200834274292, "learning_rate": 1.3720493426062237e-05, "loss": 0.055, "step": 18555 }, { "epoch": 0.942179806081527, "grad_norm": 0.41835132241249084, "learning_rate": 1.371880129278982e-05, "loss": 0.0736, "step": 18560 }, { "epoch": 0.9424336260723895, "grad_norm": 0.44712701439857483, "learning_rate": 1.3717109159517406e-05, "loss": 0.0607, "step": 18565 }, { "epoch": 0.9426874460632519, "grad_norm": 0.33872178196907043, "learning_rate": 1.3715417026244988e-05, "loss": 0.0619, "step": 18570 }, { "epoch": 0.9429412660541144, "grad_norm": 0.6011540293693542, "learning_rate": 1.3713724892972571e-05, "loss": 0.0646, "step": 18575 }, { "epoch": 0.9431950860449769, "grad_norm": 0.5028777122497559, "learning_rate": 1.3712032759700155e-05, "loss": 0.0685, "step": 18580 }, { "epoch": 0.9434489060358394, "grad_norm": 0.595824122428894, "learning_rate": 1.3710340626427739e-05, "loss": 0.0637, "step": 18585 }, { "epoch": 0.9437027260267019, "grad_norm": 0.33471375703811646, "learning_rate": 1.370864849315532e-05, "loss": 0.0629, "step": 18590 }, { "epoch": 0.9439565460175643, "grad_norm": 0.3110968768596649, "learning_rate": 1.3706956359882906e-05, "loss": 0.0587, "step": 18595 }, { "epoch": 0.9442103660084268, "grad_norm": 0.303251713514328, "learning_rate": 1.370526422661049e-05, "loss": 0.0684, "step": 18600 }, { "epoch": 0.9444641859992893, "grad_norm": 0.5813922882080078, "learning_rate": 1.3703572093338073e-05, "loss": 0.0623, "step": 18605 }, { "epoch": 0.9447180059901518, "grad_norm": 0.4020741283893585, "learning_rate": 1.3701879960065656e-05, "loss": 0.065, "step": 18610 }, { "epoch": 0.9449718259810143, "grad_norm": 0.504642903804779, "learning_rate": 1.3700187826793238e-05, "loss": 0.0552, "step": 18615 }, { "epoch": 0.9452256459718767, "grad_norm": 0.4510534703731537, "learning_rate": 1.3698495693520823e-05, "loss": 0.0653, "step": 18620 }, { "epoch": 0.9454794659627392, "grad_norm": 0.40231838822364807, "learning_rate": 1.3696803560248407e-05, "loss": 0.0604, "step": 18625 }, { "epoch": 0.9457332859536017, "grad_norm": 0.303325891494751, "learning_rate": 1.3695111426975989e-05, "loss": 0.0552, "step": 18630 }, { "epoch": 0.9459871059444642, "grad_norm": 0.2996688187122345, "learning_rate": 1.3693419293703574e-05, "loss": 0.0658, "step": 18635 }, { "epoch": 0.9462409259353267, "grad_norm": 0.57343989610672, "learning_rate": 1.3691727160431156e-05, "loss": 0.0678, "step": 18640 }, { "epoch": 0.9464947459261891, "grad_norm": 0.36376577615737915, "learning_rate": 1.3690035027158741e-05, "loss": 0.072, "step": 18645 }, { "epoch": 0.9467485659170516, "grad_norm": 0.4058065414428711, "learning_rate": 1.3688342893886325e-05, "loss": 0.0642, "step": 18650 }, { "epoch": 0.9470023859079141, "grad_norm": 0.5225498676300049, "learning_rate": 1.3686650760613906e-05, "loss": 0.0687, "step": 18655 }, { "epoch": 0.9472562058987766, "grad_norm": 0.3555619716644287, "learning_rate": 1.3684958627341492e-05, "loss": 0.0575, "step": 18660 }, { "epoch": 0.9475100258896391, "grad_norm": 0.32786718010902405, "learning_rate": 1.3683266494069074e-05, "loss": 0.0603, "step": 18665 }, { "epoch": 0.9477638458805016, "grad_norm": 0.497963547706604, "learning_rate": 1.3681574360796657e-05, "loss": 0.0707, "step": 18670 }, { "epoch": 0.948017665871364, "grad_norm": 0.3656896650791168, "learning_rate": 1.3679882227524242e-05, "loss": 0.0636, "step": 18675 }, { "epoch": 0.9482714858622265, "grad_norm": 0.38868141174316406, "learning_rate": 1.3678190094251824e-05, "loss": 0.0619, "step": 18680 }, { "epoch": 0.948525305853089, "grad_norm": 0.4414658844470978, "learning_rate": 1.3676497960979406e-05, "loss": 0.0726, "step": 18685 }, { "epoch": 0.9487791258439515, "grad_norm": 0.38483738899230957, "learning_rate": 1.3674805827706991e-05, "loss": 0.068, "step": 18690 }, { "epoch": 0.9490329458348139, "grad_norm": 0.33117255568504333, "learning_rate": 1.3673113694434575e-05, "loss": 0.0637, "step": 18695 }, { "epoch": 0.9492867658256764, "grad_norm": 0.633201539516449, "learning_rate": 1.367142156116216e-05, "loss": 0.0634, "step": 18700 }, { "epoch": 0.9495405858165389, "grad_norm": 0.4020320177078247, "learning_rate": 1.3669729427889742e-05, "loss": 0.0544, "step": 18705 }, { "epoch": 0.9497944058074014, "grad_norm": 0.42026662826538086, "learning_rate": 1.3668037294617324e-05, "loss": 0.0612, "step": 18710 }, { "epoch": 0.9500482257982639, "grad_norm": 2.030712842941284, "learning_rate": 1.3666345161344909e-05, "loss": 0.0656, "step": 18715 }, { "epoch": 0.9503020457891264, "grad_norm": 0.44346386194229126, "learning_rate": 1.3664653028072493e-05, "loss": 0.0668, "step": 18720 }, { "epoch": 0.9505558657799889, "grad_norm": 0.41186749935150146, "learning_rate": 1.3662960894800074e-05, "loss": 0.0753, "step": 18725 }, { "epoch": 0.9508096857708513, "grad_norm": 0.3317037522792816, "learning_rate": 1.366126876152766e-05, "loss": 0.0677, "step": 18730 }, { "epoch": 0.9510635057617138, "grad_norm": 0.3373609781265259, "learning_rate": 1.3659576628255241e-05, "loss": 0.0704, "step": 18735 }, { "epoch": 0.9513173257525763, "grad_norm": 0.3054006099700928, "learning_rate": 1.3657884494982825e-05, "loss": 0.0564, "step": 18740 }, { "epoch": 0.9515711457434387, "grad_norm": 0.3507018983364105, "learning_rate": 1.365619236171041e-05, "loss": 0.0682, "step": 18745 }, { "epoch": 0.9518249657343012, "grad_norm": 0.501518189907074, "learning_rate": 1.3654500228437992e-05, "loss": 0.0721, "step": 18750 }, { "epoch": 0.9520787857251637, "grad_norm": 0.3085997998714447, "learning_rate": 1.3652808095165577e-05, "loss": 0.06, "step": 18755 }, { "epoch": 0.9523326057160262, "grad_norm": 0.700425386428833, "learning_rate": 1.365111596189316e-05, "loss": 0.0705, "step": 18760 }, { "epoch": 0.9525864257068887, "grad_norm": 0.3953579068183899, "learning_rate": 1.3649423828620743e-05, "loss": 0.0609, "step": 18765 }, { "epoch": 0.9528402456977512, "grad_norm": 0.48132267594337463, "learning_rate": 1.3647731695348328e-05, "loss": 0.0641, "step": 18770 }, { "epoch": 0.9530940656886137, "grad_norm": 0.38885220885276794, "learning_rate": 1.364603956207591e-05, "loss": 0.0662, "step": 18775 }, { "epoch": 0.9533478856794761, "grad_norm": 0.30875876545906067, "learning_rate": 1.3644347428803493e-05, "loss": 0.0597, "step": 18780 }, { "epoch": 0.9536017056703386, "grad_norm": 0.324533611536026, "learning_rate": 1.3642655295531077e-05, "loss": 0.0548, "step": 18785 }, { "epoch": 0.9538555256612011, "grad_norm": 0.33613696694374084, "learning_rate": 1.364096316225866e-05, "loss": 0.0601, "step": 18790 }, { "epoch": 0.9541093456520635, "grad_norm": 0.5050367712974548, "learning_rate": 1.3639271028986246e-05, "loss": 0.0637, "step": 18795 }, { "epoch": 0.954363165642926, "grad_norm": 0.2775934338569641, "learning_rate": 1.3637578895713828e-05, "loss": 0.072, "step": 18800 }, { "epoch": 0.9546169856337885, "grad_norm": 0.33112195134162903, "learning_rate": 1.3635886762441411e-05, "loss": 0.0624, "step": 18805 }, { "epoch": 0.954870805624651, "grad_norm": 0.4370911121368408, "learning_rate": 1.3634194629168995e-05, "loss": 0.0684, "step": 18810 }, { "epoch": 0.9551246256155135, "grad_norm": 0.2732505798339844, "learning_rate": 1.3632502495896578e-05, "loss": 0.0636, "step": 18815 }, { "epoch": 0.955378445606376, "grad_norm": 0.45320791006088257, "learning_rate": 1.363081036262416e-05, "loss": 0.0745, "step": 18820 }, { "epoch": 0.9556322655972385, "grad_norm": 0.44382214546203613, "learning_rate": 1.3629118229351745e-05, "loss": 0.0564, "step": 18825 }, { "epoch": 0.955886085588101, "grad_norm": 0.33199283480644226, "learning_rate": 1.3627426096079329e-05, "loss": 0.069, "step": 18830 }, { "epoch": 0.9561399055789634, "grad_norm": 0.48582470417022705, "learning_rate": 1.362573396280691e-05, "loss": 0.0689, "step": 18835 }, { "epoch": 0.9563937255698258, "grad_norm": 0.4178053140640259, "learning_rate": 1.3624041829534496e-05, "loss": 0.0692, "step": 18840 }, { "epoch": 0.9566475455606883, "grad_norm": 0.39388084411621094, "learning_rate": 1.3622349696262078e-05, "loss": 0.0658, "step": 18845 }, { "epoch": 0.9569013655515508, "grad_norm": 0.3513961136341095, "learning_rate": 1.3620657562989663e-05, "loss": 0.0729, "step": 18850 }, { "epoch": 0.9571551855424133, "grad_norm": 0.31259411573410034, "learning_rate": 1.3618965429717247e-05, "loss": 0.0679, "step": 18855 }, { "epoch": 0.9574090055332758, "grad_norm": 0.5756545066833496, "learning_rate": 1.3617273296444828e-05, "loss": 0.0665, "step": 18860 }, { "epoch": 0.9576628255241383, "grad_norm": 0.5503848791122437, "learning_rate": 1.3615581163172414e-05, "loss": 0.0535, "step": 18865 }, { "epoch": 0.9579166455150008, "grad_norm": 0.3748461604118347, "learning_rate": 1.3613889029899996e-05, "loss": 0.07, "step": 18870 }, { "epoch": 0.9581704655058633, "grad_norm": 0.25519129633903503, "learning_rate": 1.3612196896627579e-05, "loss": 0.0668, "step": 18875 }, { "epoch": 0.9584242854967258, "grad_norm": 0.46383601427078247, "learning_rate": 1.3610504763355164e-05, "loss": 0.0691, "step": 18880 }, { "epoch": 0.9586781054875883, "grad_norm": 0.4589533805847168, "learning_rate": 1.3608812630082746e-05, "loss": 0.0734, "step": 18885 }, { "epoch": 0.9589319254784506, "grad_norm": 0.9147213697433472, "learning_rate": 1.3607120496810331e-05, "loss": 0.0678, "step": 18890 }, { "epoch": 0.9591857454693131, "grad_norm": 0.37536412477493286, "learning_rate": 1.3605428363537913e-05, "loss": 0.0653, "step": 18895 }, { "epoch": 0.9594395654601756, "grad_norm": 0.7372077703475952, "learning_rate": 1.3603736230265497e-05, "loss": 0.0673, "step": 18900 }, { "epoch": 0.9596933854510381, "grad_norm": 0.4121319651603699, "learning_rate": 1.360204409699308e-05, "loss": 0.0643, "step": 18905 }, { "epoch": 0.9599472054419006, "grad_norm": 0.2821780741214752, "learning_rate": 1.3600351963720664e-05, "loss": 0.0634, "step": 18910 }, { "epoch": 0.9602010254327631, "grad_norm": 0.2763786017894745, "learning_rate": 1.3598659830448246e-05, "loss": 0.0655, "step": 18915 }, { "epoch": 0.9604548454236256, "grad_norm": 0.3872879445552826, "learning_rate": 1.3596967697175831e-05, "loss": 0.0695, "step": 18920 }, { "epoch": 0.9607086654144881, "grad_norm": 0.3432711660861969, "learning_rate": 1.3595275563903414e-05, "loss": 0.0618, "step": 18925 }, { "epoch": 0.9609624854053506, "grad_norm": 0.40918609499931335, "learning_rate": 1.3593583430630996e-05, "loss": 0.0663, "step": 18930 }, { "epoch": 0.961216305396213, "grad_norm": 0.2839658558368683, "learning_rate": 1.3591891297358582e-05, "loss": 0.0609, "step": 18935 }, { "epoch": 0.9614701253870754, "grad_norm": 0.45697519183158875, "learning_rate": 1.3590199164086163e-05, "loss": 0.0682, "step": 18940 }, { "epoch": 0.9617239453779379, "grad_norm": 0.4115654528141022, "learning_rate": 1.3588507030813749e-05, "loss": 0.0755, "step": 18945 }, { "epoch": 0.9619777653688004, "grad_norm": 0.3328510820865631, "learning_rate": 1.3586814897541332e-05, "loss": 0.0726, "step": 18950 }, { "epoch": 0.9622315853596629, "grad_norm": 0.392888605594635, "learning_rate": 1.3585122764268914e-05, "loss": 0.0692, "step": 18955 }, { "epoch": 0.9624854053505254, "grad_norm": 0.7154536843299866, "learning_rate": 1.35834306309965e-05, "loss": 0.0709, "step": 18960 }, { "epoch": 0.9627392253413879, "grad_norm": 0.38823920488357544, "learning_rate": 1.3581738497724081e-05, "loss": 0.0581, "step": 18965 }, { "epoch": 0.9629930453322504, "grad_norm": 1.6257457733154297, "learning_rate": 1.3580046364451665e-05, "loss": 0.0627, "step": 18970 }, { "epoch": 0.9632468653231129, "grad_norm": 0.3897399604320526, "learning_rate": 1.357835423117925e-05, "loss": 0.0612, "step": 18975 }, { "epoch": 0.9635006853139754, "grad_norm": 0.33613255620002747, "learning_rate": 1.3576662097906832e-05, "loss": 0.0535, "step": 18980 }, { "epoch": 0.9637545053048379, "grad_norm": 0.3078048527240753, "learning_rate": 1.3574969964634415e-05, "loss": 0.0664, "step": 18985 }, { "epoch": 0.9640083252957002, "grad_norm": 0.8129367828369141, "learning_rate": 1.3573277831361999e-05, "loss": 0.0686, "step": 18990 }, { "epoch": 0.9642621452865627, "grad_norm": 0.28624656796455383, "learning_rate": 1.3571585698089582e-05, "loss": 0.0629, "step": 18995 }, { "epoch": 0.9645159652774252, "grad_norm": 0.40177884697914124, "learning_rate": 1.3569893564817168e-05, "loss": 0.0631, "step": 19000 }, { "epoch": 0.9647697852682877, "grad_norm": 0.8333194255828857, "learning_rate": 1.356820143154475e-05, "loss": 0.0636, "step": 19005 }, { "epoch": 0.9650236052591502, "grad_norm": 0.6080228090286255, "learning_rate": 1.3566509298272333e-05, "loss": 0.0579, "step": 19010 }, { "epoch": 0.9652774252500127, "grad_norm": 0.3721805512905121, "learning_rate": 1.3564817164999917e-05, "loss": 0.0657, "step": 19015 }, { "epoch": 0.9655312452408752, "grad_norm": 0.36625728011131287, "learning_rate": 1.35631250317275e-05, "loss": 0.065, "step": 19020 }, { "epoch": 0.9657850652317377, "grad_norm": 0.4012068510055542, "learning_rate": 1.3561432898455082e-05, "loss": 0.0647, "step": 19025 }, { "epoch": 0.9660388852226002, "grad_norm": 0.32912421226501465, "learning_rate": 1.3559740765182667e-05, "loss": 0.0632, "step": 19030 }, { "epoch": 0.9662927052134627, "grad_norm": 0.6822962164878845, "learning_rate": 1.355804863191025e-05, "loss": 0.0625, "step": 19035 }, { "epoch": 0.966546525204325, "grad_norm": 0.3175319731235504, "learning_rate": 1.3556356498637834e-05, "loss": 0.068, "step": 19040 }, { "epoch": 0.9668003451951875, "grad_norm": 0.4144521653652191, "learning_rate": 1.3554664365365418e-05, "loss": 0.0693, "step": 19045 }, { "epoch": 0.96705416518605, "grad_norm": 0.42917636036872864, "learning_rate": 1.3552972232093e-05, "loss": 0.0584, "step": 19050 }, { "epoch": 0.9673079851769125, "grad_norm": 0.3734082877635956, "learning_rate": 1.3551280098820585e-05, "loss": 0.0753, "step": 19055 }, { "epoch": 0.967561805167775, "grad_norm": 0.6813828945159912, "learning_rate": 1.3549587965548168e-05, "loss": 0.0681, "step": 19060 }, { "epoch": 0.9678156251586375, "grad_norm": 0.5187677145004272, "learning_rate": 1.354789583227575e-05, "loss": 0.063, "step": 19065 }, { "epoch": 0.9680694451495, "grad_norm": 0.37740230560302734, "learning_rate": 1.3546203699003336e-05, "loss": 0.0598, "step": 19070 }, { "epoch": 0.9683232651403625, "grad_norm": 0.35848960280418396, "learning_rate": 1.3544511565730917e-05, "loss": 0.0598, "step": 19075 }, { "epoch": 0.968577085131225, "grad_norm": 0.36278432607650757, "learning_rate": 1.3542819432458501e-05, "loss": 0.0645, "step": 19080 }, { "epoch": 0.9688309051220874, "grad_norm": 0.37709158658981323, "learning_rate": 1.3541127299186085e-05, "loss": 0.0654, "step": 19085 }, { "epoch": 0.9690847251129499, "grad_norm": 0.41565874218940735, "learning_rate": 1.3539435165913668e-05, "loss": 0.067, "step": 19090 }, { "epoch": 0.9693385451038123, "grad_norm": 0.405154287815094, "learning_rate": 1.3537743032641253e-05, "loss": 0.06, "step": 19095 }, { "epoch": 0.9695923650946748, "grad_norm": 0.4619700610637665, "learning_rate": 1.3536050899368835e-05, "loss": 0.0692, "step": 19100 }, { "epoch": 0.9698461850855373, "grad_norm": 0.43478238582611084, "learning_rate": 1.3534358766096419e-05, "loss": 0.0603, "step": 19105 }, { "epoch": 0.9701000050763998, "grad_norm": 0.36731022596359253, "learning_rate": 1.3532666632824002e-05, "loss": 0.06, "step": 19110 }, { "epoch": 0.9703538250672623, "grad_norm": 1.0390390157699585, "learning_rate": 1.3530974499551586e-05, "loss": 0.0686, "step": 19115 }, { "epoch": 0.9706076450581248, "grad_norm": 0.3847677707672119, "learning_rate": 1.3529282366279168e-05, "loss": 0.0622, "step": 19120 }, { "epoch": 0.9708614650489873, "grad_norm": 0.302962064743042, "learning_rate": 1.3527590233006753e-05, "loss": 0.0576, "step": 19125 }, { "epoch": 0.9711152850398498, "grad_norm": 0.48355162143707275, "learning_rate": 1.3525898099734336e-05, "loss": 0.0691, "step": 19130 }, { "epoch": 0.9713691050307122, "grad_norm": 0.40184906125068665, "learning_rate": 1.352420596646192e-05, "loss": 0.064, "step": 19135 }, { "epoch": 0.9716229250215747, "grad_norm": 0.3403814136981964, "learning_rate": 1.3522513833189504e-05, "loss": 0.0654, "step": 19140 }, { "epoch": 0.9718767450124371, "grad_norm": 0.37238359451293945, "learning_rate": 1.3520821699917085e-05, "loss": 0.068, "step": 19145 }, { "epoch": 0.9721305650032996, "grad_norm": 0.30097347497940063, "learning_rate": 1.351912956664467e-05, "loss": 0.0694, "step": 19150 }, { "epoch": 0.9723843849941621, "grad_norm": 0.25011131167411804, "learning_rate": 1.3517437433372254e-05, "loss": 0.0593, "step": 19155 }, { "epoch": 0.9726382049850246, "grad_norm": 0.3264051377773285, "learning_rate": 1.3515745300099836e-05, "loss": 0.0596, "step": 19160 }, { "epoch": 0.9728920249758871, "grad_norm": 0.4142516553401947, "learning_rate": 1.3514053166827421e-05, "loss": 0.0748, "step": 19165 }, { "epoch": 0.9731458449667496, "grad_norm": 0.4116646349430084, "learning_rate": 1.3512361033555003e-05, "loss": 0.0582, "step": 19170 }, { "epoch": 0.9733996649576121, "grad_norm": 0.33693939447402954, "learning_rate": 1.3510668900282587e-05, "loss": 0.0742, "step": 19175 }, { "epoch": 0.9736534849484746, "grad_norm": 0.3353005051612854, "learning_rate": 1.3508976767010172e-05, "loss": 0.0641, "step": 19180 }, { "epoch": 0.973907304939337, "grad_norm": 0.4305810332298279, "learning_rate": 1.3507284633737754e-05, "loss": 0.0645, "step": 19185 }, { "epoch": 0.9741611249301995, "grad_norm": 0.3571843206882477, "learning_rate": 1.3505592500465339e-05, "loss": 0.066, "step": 19190 }, { "epoch": 0.974414944921062, "grad_norm": 0.34298276901245117, "learning_rate": 1.350390036719292e-05, "loss": 0.0736, "step": 19195 }, { "epoch": 0.9746687649119244, "grad_norm": 0.4371688961982727, "learning_rate": 1.3502208233920504e-05, "loss": 0.0638, "step": 19200 }, { "epoch": 0.9749225849027869, "grad_norm": 0.43059366941452026, "learning_rate": 1.350051610064809e-05, "loss": 0.0683, "step": 19205 }, { "epoch": 0.9751764048936494, "grad_norm": 0.33127647638320923, "learning_rate": 1.3498823967375671e-05, "loss": 0.066, "step": 19210 }, { "epoch": 0.9754302248845119, "grad_norm": 0.4141124188899994, "learning_rate": 1.3497131834103255e-05, "loss": 0.0569, "step": 19215 }, { "epoch": 0.9756840448753744, "grad_norm": 0.45645686984062195, "learning_rate": 1.3495439700830839e-05, "loss": 0.0676, "step": 19220 }, { "epoch": 0.9759378648662369, "grad_norm": 0.44985485076904297, "learning_rate": 1.3493747567558422e-05, "loss": 0.0654, "step": 19225 }, { "epoch": 0.9761916848570994, "grad_norm": 0.3672463893890381, "learning_rate": 1.3492055434286004e-05, "loss": 0.0711, "step": 19230 }, { "epoch": 0.9764455048479618, "grad_norm": 1.25873601436615, "learning_rate": 1.349036330101359e-05, "loss": 0.0723, "step": 19235 }, { "epoch": 0.9766993248388243, "grad_norm": 0.5634270310401917, "learning_rate": 1.3488671167741173e-05, "loss": 0.0616, "step": 19240 }, { "epoch": 0.9769531448296868, "grad_norm": 0.2947288155555725, "learning_rate": 1.3486979034468756e-05, "loss": 0.0618, "step": 19245 }, { "epoch": 0.9772069648205493, "grad_norm": 0.5857254266738892, "learning_rate": 1.348528690119634e-05, "loss": 0.0682, "step": 19250 }, { "epoch": 0.9774607848114117, "grad_norm": 0.300155907869339, "learning_rate": 1.3483594767923922e-05, "loss": 0.0563, "step": 19255 }, { "epoch": 0.9777146048022742, "grad_norm": 0.521580159664154, "learning_rate": 1.3481902634651507e-05, "loss": 0.0677, "step": 19260 }, { "epoch": 0.9779684247931367, "grad_norm": 0.3018982708454132, "learning_rate": 1.348021050137909e-05, "loss": 0.0695, "step": 19265 }, { "epoch": 0.9782222447839992, "grad_norm": 0.4247705638408661, "learning_rate": 1.3478518368106672e-05, "loss": 0.0648, "step": 19270 }, { "epoch": 0.9784760647748617, "grad_norm": 0.3755514621734619, "learning_rate": 1.3476826234834258e-05, "loss": 0.0713, "step": 19275 }, { "epoch": 0.9787298847657242, "grad_norm": 0.650913417339325, "learning_rate": 1.347513410156184e-05, "loss": 0.0591, "step": 19280 }, { "epoch": 0.9789837047565866, "grad_norm": 0.32536032795906067, "learning_rate": 1.3473441968289425e-05, "loss": 0.0794, "step": 19285 }, { "epoch": 0.9792375247474491, "grad_norm": 0.5901187658309937, "learning_rate": 1.3471749835017006e-05, "loss": 0.0784, "step": 19290 }, { "epoch": 0.9794913447383116, "grad_norm": 0.3043603003025055, "learning_rate": 1.347005770174459e-05, "loss": 0.0748, "step": 19295 }, { "epoch": 0.9797451647291741, "grad_norm": 0.36308273673057556, "learning_rate": 1.3468365568472175e-05, "loss": 0.0697, "step": 19300 }, { "epoch": 0.9799989847200365, "grad_norm": 0.34357786178588867, "learning_rate": 1.3466673435199757e-05, "loss": 0.0694, "step": 19305 }, { "epoch": 0.980252804710899, "grad_norm": 0.9105756878852844, "learning_rate": 1.346498130192734e-05, "loss": 0.0545, "step": 19310 }, { "epoch": 0.9805066247017615, "grad_norm": 0.3355824947357178, "learning_rate": 1.3463289168654924e-05, "loss": 0.0552, "step": 19315 }, { "epoch": 0.980760444692624, "grad_norm": 0.3532446026802063, "learning_rate": 1.3461597035382508e-05, "loss": 0.0681, "step": 19320 }, { "epoch": 0.9810142646834865, "grad_norm": 0.3839724659919739, "learning_rate": 1.345990490211009e-05, "loss": 0.0596, "step": 19325 }, { "epoch": 0.981268084674349, "grad_norm": 0.4662843346595764, "learning_rate": 1.3458212768837675e-05, "loss": 0.0633, "step": 19330 }, { "epoch": 0.9815219046652114, "grad_norm": 0.4850928485393524, "learning_rate": 1.3456520635565258e-05, "loss": 0.06, "step": 19335 }, { "epoch": 0.9817757246560739, "grad_norm": 0.5426207184791565, "learning_rate": 1.3454828502292842e-05, "loss": 0.0711, "step": 19340 }, { "epoch": 0.9820295446469364, "grad_norm": 0.24476711452007294, "learning_rate": 1.3453136369020425e-05, "loss": 0.0687, "step": 19345 }, { "epoch": 0.9822833646377989, "grad_norm": 0.39669865369796753, "learning_rate": 1.3451444235748007e-05, "loss": 0.0648, "step": 19350 }, { "epoch": 0.9825371846286614, "grad_norm": 0.329333633184433, "learning_rate": 1.3449752102475593e-05, "loss": 0.0623, "step": 19355 }, { "epoch": 0.9827910046195238, "grad_norm": 0.5927011966705322, "learning_rate": 1.3448059969203176e-05, "loss": 0.0579, "step": 19360 }, { "epoch": 0.9830448246103863, "grad_norm": 0.4235691726207733, "learning_rate": 1.3446367835930758e-05, "loss": 0.0576, "step": 19365 }, { "epoch": 0.9832986446012488, "grad_norm": 0.3612593412399292, "learning_rate": 1.3444675702658343e-05, "loss": 0.0659, "step": 19370 }, { "epoch": 0.9835524645921113, "grad_norm": 0.4044787287712097, "learning_rate": 1.3442983569385925e-05, "loss": 0.0665, "step": 19375 }, { "epoch": 0.9838062845829737, "grad_norm": 0.32001495361328125, "learning_rate": 1.344129143611351e-05, "loss": 0.058, "step": 19380 }, { "epoch": 0.9840601045738362, "grad_norm": 0.2624494731426239, "learning_rate": 1.3439599302841094e-05, "loss": 0.0622, "step": 19385 }, { "epoch": 0.9843139245646987, "grad_norm": 0.3039698004722595, "learning_rate": 1.3437907169568676e-05, "loss": 0.0539, "step": 19390 }, { "epoch": 0.9845677445555612, "grad_norm": 1.1381529569625854, "learning_rate": 1.3436215036296261e-05, "loss": 0.0671, "step": 19395 }, { "epoch": 0.9848215645464237, "grad_norm": 0.3349166810512543, "learning_rate": 1.3434522903023843e-05, "loss": 0.0663, "step": 19400 }, { "epoch": 0.9850753845372862, "grad_norm": 0.31104400753974915, "learning_rate": 1.3432830769751426e-05, "loss": 0.053, "step": 19405 }, { "epoch": 0.9853292045281486, "grad_norm": 0.3512427806854248, "learning_rate": 1.3431138636479012e-05, "loss": 0.0741, "step": 19410 }, { "epoch": 0.9855830245190111, "grad_norm": 0.5050990581512451, "learning_rate": 1.3429446503206593e-05, "loss": 0.0678, "step": 19415 }, { "epoch": 0.9858368445098736, "grad_norm": 0.4575447142124176, "learning_rate": 1.3427754369934177e-05, "loss": 0.0707, "step": 19420 }, { "epoch": 0.9860906645007361, "grad_norm": 0.464824378490448, "learning_rate": 1.342606223666176e-05, "loss": 0.0573, "step": 19425 }, { "epoch": 0.9863444844915985, "grad_norm": 0.2967434823513031, "learning_rate": 1.3424370103389344e-05, "loss": 0.0627, "step": 19430 }, { "epoch": 0.986598304482461, "grad_norm": 0.425357848405838, "learning_rate": 1.342267797011693e-05, "loss": 0.0534, "step": 19435 }, { "epoch": 0.9868521244733235, "grad_norm": 0.437012255191803, "learning_rate": 1.3420985836844511e-05, "loss": 0.0685, "step": 19440 }, { "epoch": 0.987105944464186, "grad_norm": 0.37560611963272095, "learning_rate": 1.3419293703572095e-05, "loss": 0.0605, "step": 19445 }, { "epoch": 0.9873597644550485, "grad_norm": 0.35586923360824585, "learning_rate": 1.3417601570299678e-05, "loss": 0.0641, "step": 19450 }, { "epoch": 0.987613584445911, "grad_norm": 0.3776257038116455, "learning_rate": 1.3415909437027262e-05, "loss": 0.0571, "step": 19455 }, { "epoch": 0.9878674044367735, "grad_norm": 1.247651219367981, "learning_rate": 1.3414217303754844e-05, "loss": 0.0635, "step": 19460 }, { "epoch": 0.988121224427636, "grad_norm": 0.5398990511894226, "learning_rate": 1.3412525170482429e-05, "loss": 0.0712, "step": 19465 }, { "epoch": 0.9883750444184984, "grad_norm": 0.49520477652549744, "learning_rate": 1.341083303721001e-05, "loss": 0.0661, "step": 19470 }, { "epoch": 0.9886288644093609, "grad_norm": 0.38991716504096985, "learning_rate": 1.3409140903937594e-05, "loss": 0.0766, "step": 19475 }, { "epoch": 0.9888826844002233, "grad_norm": 0.2874346971511841, "learning_rate": 1.340744877066518e-05, "loss": 0.0654, "step": 19480 }, { "epoch": 0.9891365043910858, "grad_norm": 0.5209989547729492, "learning_rate": 1.3405756637392761e-05, "loss": 0.0577, "step": 19485 }, { "epoch": 0.9893903243819483, "grad_norm": 0.7382110953330994, "learning_rate": 1.3404064504120347e-05, "loss": 0.0635, "step": 19490 }, { "epoch": 0.9896441443728108, "grad_norm": 0.41847217082977295, "learning_rate": 1.3402372370847928e-05, "loss": 0.0525, "step": 19495 }, { "epoch": 0.9898979643636733, "grad_norm": 0.3458189070224762, "learning_rate": 1.3400680237575512e-05, "loss": 0.1819, "step": 19500 }, { "epoch": 0.9901517843545358, "grad_norm": 0.3702060580253601, "learning_rate": 1.3398988104303097e-05, "loss": 0.0635, "step": 19505 }, { "epoch": 0.9904056043453983, "grad_norm": 0.3774377405643463, "learning_rate": 1.3397295971030679e-05, "loss": 0.0685, "step": 19510 }, { "epoch": 0.9906594243362608, "grad_norm": 0.296116441488266, "learning_rate": 1.3395603837758263e-05, "loss": 0.0704, "step": 19515 }, { "epoch": 0.9909132443271232, "grad_norm": 0.5571689605712891, "learning_rate": 1.3393911704485846e-05, "loss": 0.0602, "step": 19520 }, { "epoch": 0.9911670643179857, "grad_norm": 0.36025819182395935, "learning_rate": 1.339221957121343e-05, "loss": 0.0623, "step": 19525 }, { "epoch": 0.9914208843088481, "grad_norm": 0.39932265877723694, "learning_rate": 1.3390527437941015e-05, "loss": 0.0564, "step": 19530 }, { "epoch": 0.9916747042997106, "grad_norm": 0.35256311297416687, "learning_rate": 1.3388835304668597e-05, "loss": 0.0751, "step": 19535 }, { "epoch": 0.9919285242905731, "grad_norm": 0.4102122187614441, "learning_rate": 1.338714317139618e-05, "loss": 0.0593, "step": 19540 }, { "epoch": 0.9921823442814356, "grad_norm": 0.3455532193183899, "learning_rate": 1.3385451038123764e-05, "loss": 0.0598, "step": 19545 }, { "epoch": 0.9924361642722981, "grad_norm": 0.3834126889705658, "learning_rate": 1.3383758904851347e-05, "loss": 0.0608, "step": 19550 }, { "epoch": 0.9926899842631606, "grad_norm": 0.4701562821865082, "learning_rate": 1.338206677157893e-05, "loss": 0.057, "step": 19555 }, { "epoch": 0.9929438042540231, "grad_norm": 0.5309749245643616, "learning_rate": 1.3380374638306514e-05, "loss": 0.0592, "step": 19560 }, { "epoch": 0.9931976242448856, "grad_norm": 0.4886894226074219, "learning_rate": 1.3378682505034098e-05, "loss": 0.0625, "step": 19565 }, { "epoch": 0.993451444235748, "grad_norm": 0.5212421417236328, "learning_rate": 1.337699037176168e-05, "loss": 0.0668, "step": 19570 }, { "epoch": 0.9937052642266105, "grad_norm": 0.3621155917644501, "learning_rate": 1.3375298238489265e-05, "loss": 0.0623, "step": 19575 }, { "epoch": 0.9939590842174729, "grad_norm": 0.47136250138282776, "learning_rate": 1.3373606105216847e-05, "loss": 0.0728, "step": 19580 }, { "epoch": 0.9942129042083354, "grad_norm": 0.31008508801460266, "learning_rate": 1.3371913971944432e-05, "loss": 0.0583, "step": 19585 }, { "epoch": 0.9944667241991979, "grad_norm": 0.47124168276786804, "learning_rate": 1.3370221838672016e-05, "loss": 0.0659, "step": 19590 }, { "epoch": 0.9947205441900604, "grad_norm": 0.40367239713668823, "learning_rate": 1.3368529705399598e-05, "loss": 0.059, "step": 19595 }, { "epoch": 0.9949743641809229, "grad_norm": 0.37501493096351624, "learning_rate": 1.3366837572127183e-05, "loss": 0.0666, "step": 19600 }, { "epoch": 0.9952281841717854, "grad_norm": 0.6273606419563293, "learning_rate": 1.3365145438854765e-05, "loss": 0.0662, "step": 19605 }, { "epoch": 0.9954820041626479, "grad_norm": 0.3741655945777893, "learning_rate": 1.3363453305582348e-05, "loss": 0.0577, "step": 19610 }, { "epoch": 0.9957358241535104, "grad_norm": 0.33114105463027954, "learning_rate": 1.3361761172309933e-05, "loss": 0.0658, "step": 19615 }, { "epoch": 0.9959896441443729, "grad_norm": 0.35361751914024353, "learning_rate": 1.3360069039037515e-05, "loss": 0.063, "step": 19620 }, { "epoch": 0.9962434641352353, "grad_norm": 0.41206908226013184, "learning_rate": 1.3358376905765099e-05, "loss": 0.0629, "step": 19625 }, { "epoch": 0.9964972841260977, "grad_norm": 0.4646819233894348, "learning_rate": 1.3356684772492682e-05, "loss": 0.0647, "step": 19630 }, { "epoch": 0.9967511041169602, "grad_norm": 0.3486287295818329, "learning_rate": 1.3354992639220266e-05, "loss": 0.0571, "step": 19635 }, { "epoch": 0.9970049241078227, "grad_norm": 0.3921375274658203, "learning_rate": 1.3353300505947851e-05, "loss": 0.057, "step": 19640 }, { "epoch": 0.9972587440986852, "grad_norm": 0.3869142532348633, "learning_rate": 1.3351608372675433e-05, "loss": 0.0695, "step": 19645 }, { "epoch": 0.9975125640895477, "grad_norm": 0.652873694896698, "learning_rate": 1.3349916239403015e-05, "loss": 0.0608, "step": 19650 }, { "epoch": 0.9977663840804102, "grad_norm": 0.35025376081466675, "learning_rate": 1.33482241061306e-05, "loss": 0.0675, "step": 19655 }, { "epoch": 0.9980202040712727, "grad_norm": 0.45848169922828674, "learning_rate": 1.3346531972858184e-05, "loss": 0.0656, "step": 19660 }, { "epoch": 0.9982740240621352, "grad_norm": 0.3314003050327301, "learning_rate": 1.3344839839585766e-05, "loss": 0.0591, "step": 19665 }, { "epoch": 0.9985278440529977, "grad_norm": 0.3548816740512848, "learning_rate": 1.334314770631335e-05, "loss": 0.0668, "step": 19670 }, { "epoch": 0.99878166404386, "grad_norm": 0.46631526947021484, "learning_rate": 1.3341455573040933e-05, "loss": 0.0563, "step": 19675 }, { "epoch": 0.9990354840347225, "grad_norm": 0.5142710208892822, "learning_rate": 1.3339763439768518e-05, "loss": 0.0609, "step": 19680 }, { "epoch": 0.999289304025585, "grad_norm": 0.965015172958374, "learning_rate": 1.3338071306496101e-05, "loss": 0.0591, "step": 19685 }, { "epoch": 0.9995431240164475, "grad_norm": 0.568295955657959, "learning_rate": 1.3336379173223683e-05, "loss": 0.0636, "step": 19690 }, { "epoch": 0.99979694400731, "grad_norm": 0.3382091820240021, "learning_rate": 1.3334687039951268e-05, "loss": 0.0609, "step": 19695 }, { "epoch": 1.0, "eval_loss": 0.160739004611969, "eval_runtime": 1784.4225, "eval_samples_per_second": 70.051, "eval_steps_per_second": 2.19, "step": 19699 }, { "epoch": 1.0000507639981724, "grad_norm": 0.3930205702781677, "learning_rate": 1.333299490667885e-05, "loss": 0.068, "step": 19700 }, { "epoch": 1.000304583989035, "grad_norm": 0.3845120370388031, "learning_rate": 1.3331302773406434e-05, "loss": 0.0642, "step": 19705 }, { "epoch": 1.0005584039798974, "grad_norm": 0.4073793292045593, "learning_rate": 1.3329610640134019e-05, "loss": 0.0605, "step": 19710 }, { "epoch": 1.00081222397076, "grad_norm": 0.34505850076675415, "learning_rate": 1.3327918506861601e-05, "loss": 0.0636, "step": 19715 }, { "epoch": 1.0010660439616224, "grad_norm": 0.6583669781684875, "learning_rate": 1.3326226373589185e-05, "loss": 0.0639, "step": 19720 }, { "epoch": 1.001319863952485, "grad_norm": 0.5834250450134277, "learning_rate": 1.3324534240316768e-05, "loss": 0.0641, "step": 19725 }, { "epoch": 1.0015736839433473, "grad_norm": 0.3789755403995514, "learning_rate": 1.3322842107044352e-05, "loss": 0.0593, "step": 19730 }, { "epoch": 1.00182750393421, "grad_norm": 0.3655293881893158, "learning_rate": 1.3321149973771937e-05, "loss": 0.0675, "step": 19735 }, { "epoch": 1.0020813239250723, "grad_norm": 0.4233531355857849, "learning_rate": 1.3319457840499519e-05, "loss": 0.0625, "step": 19740 }, { "epoch": 1.002335143915935, "grad_norm": 0.3656198978424072, "learning_rate": 1.3317765707227102e-05, "loss": 0.0637, "step": 19745 }, { "epoch": 1.0025889639067973, "grad_norm": 0.3142138123512268, "learning_rate": 1.3316073573954686e-05, "loss": 0.0648, "step": 19750 }, { "epoch": 1.0028427838976597, "grad_norm": 0.37569764256477356, "learning_rate": 1.331438144068227e-05, "loss": 0.0681, "step": 19755 }, { "epoch": 1.0030966038885223, "grad_norm": 0.4830641746520996, "learning_rate": 1.3312689307409851e-05, "loss": 0.0621, "step": 19760 }, { "epoch": 1.0033504238793847, "grad_norm": 0.3205713629722595, "learning_rate": 1.3310997174137436e-05, "loss": 0.0579, "step": 19765 }, { "epoch": 1.0036042438702473, "grad_norm": 0.4066654145717621, "learning_rate": 1.330930504086502e-05, "loss": 0.0529, "step": 19770 }, { "epoch": 1.0038580638611097, "grad_norm": 0.35887086391448975, "learning_rate": 1.3307612907592604e-05, "loss": 0.0603, "step": 19775 }, { "epoch": 1.0041118838519723, "grad_norm": 0.4492895007133484, "learning_rate": 1.3305920774320187e-05, "loss": 0.0569, "step": 19780 }, { "epoch": 1.0043657038428346, "grad_norm": 0.38979437947273254, "learning_rate": 1.3304228641047769e-05, "loss": 0.071, "step": 19785 }, { "epoch": 1.0046195238336972, "grad_norm": 0.36487793922424316, "learning_rate": 1.3302536507775354e-05, "loss": 0.0599, "step": 19790 }, { "epoch": 1.0048733438245596, "grad_norm": 0.4509921073913574, "learning_rate": 1.3300844374502938e-05, "loss": 0.0717, "step": 19795 }, { "epoch": 1.005127163815422, "grad_norm": 0.39418506622314453, "learning_rate": 1.329915224123052e-05, "loss": 0.0608, "step": 19800 }, { "epoch": 1.0053809838062846, "grad_norm": 0.44550445675849915, "learning_rate": 1.3297460107958105e-05, "loss": 0.0711, "step": 19805 }, { "epoch": 1.005634803797147, "grad_norm": 0.40730229020118713, "learning_rate": 1.3295767974685687e-05, "loss": 0.0635, "step": 19810 }, { "epoch": 1.0058886237880096, "grad_norm": 0.41011321544647217, "learning_rate": 1.329407584141327e-05, "loss": 0.0596, "step": 19815 }, { "epoch": 1.006142443778872, "grad_norm": 0.36048024892807007, "learning_rate": 1.3292383708140855e-05, "loss": 0.0565, "step": 19820 }, { "epoch": 1.0063962637697346, "grad_norm": 0.49804311990737915, "learning_rate": 1.3290691574868437e-05, "loss": 0.0658, "step": 19825 }, { "epoch": 1.006650083760597, "grad_norm": 0.3198188841342926, "learning_rate": 1.3288999441596023e-05, "loss": 0.0631, "step": 19830 }, { "epoch": 1.0069039037514596, "grad_norm": 0.4254019856452942, "learning_rate": 1.3287307308323604e-05, "loss": 0.0609, "step": 19835 }, { "epoch": 1.007157723742322, "grad_norm": 0.31280845403671265, "learning_rate": 1.3285615175051188e-05, "loss": 0.065, "step": 19840 }, { "epoch": 1.0074115437331845, "grad_norm": 0.4120943546295166, "learning_rate": 1.3283923041778773e-05, "loss": 0.0609, "step": 19845 }, { "epoch": 1.007665363724047, "grad_norm": 0.39241835474967957, "learning_rate": 1.3282230908506355e-05, "loss": 0.0664, "step": 19850 }, { "epoch": 1.0079191837149093, "grad_norm": 0.5152608752250671, "learning_rate": 1.3280538775233937e-05, "loss": 0.0623, "step": 19855 }, { "epoch": 1.008173003705772, "grad_norm": 0.7134572267532349, "learning_rate": 1.3278846641961522e-05, "loss": 0.068, "step": 19860 }, { "epoch": 1.0084268236966343, "grad_norm": 0.3215194344520569, "learning_rate": 1.3277154508689106e-05, "loss": 0.0608, "step": 19865 }, { "epoch": 1.0086806436874969, "grad_norm": 0.5526114702224731, "learning_rate": 1.3275462375416687e-05, "loss": 0.0632, "step": 19870 }, { "epoch": 1.0089344636783593, "grad_norm": 0.4783251881599426, "learning_rate": 1.3273770242144273e-05, "loss": 0.0695, "step": 19875 }, { "epoch": 1.0091882836692219, "grad_norm": 0.48075810074806213, "learning_rate": 1.3272078108871855e-05, "loss": 0.0576, "step": 19880 }, { "epoch": 1.0094421036600842, "grad_norm": 0.3602225184440613, "learning_rate": 1.327038597559944e-05, "loss": 0.0565, "step": 19885 }, { "epoch": 1.0096959236509468, "grad_norm": 0.3769897222518921, "learning_rate": 1.3268693842327023e-05, "loss": 0.0665, "step": 19890 }, { "epoch": 1.0099497436418092, "grad_norm": 0.3689655065536499, "learning_rate": 1.3267001709054605e-05, "loss": 0.0638, "step": 19895 }, { "epoch": 1.0102035636326716, "grad_norm": 0.26339930295944214, "learning_rate": 1.326530957578219e-05, "loss": 0.0568, "step": 19900 }, { "epoch": 1.0104573836235342, "grad_norm": 0.5178040862083435, "learning_rate": 1.3263617442509772e-05, "loss": 0.061, "step": 19905 }, { "epoch": 1.0107112036143966, "grad_norm": 0.4020889103412628, "learning_rate": 1.3261925309237356e-05, "loss": 0.0614, "step": 19910 }, { "epoch": 1.0109650236052592, "grad_norm": 0.2972166836261749, "learning_rate": 1.3260233175964941e-05, "loss": 0.0644, "step": 19915 }, { "epoch": 1.0112188435961216, "grad_norm": 0.36977848410606384, "learning_rate": 1.3258541042692523e-05, "loss": 0.0621, "step": 19920 }, { "epoch": 1.0114726635869842, "grad_norm": 0.46586596965789795, "learning_rate": 1.3256848909420108e-05, "loss": 0.0593, "step": 19925 }, { "epoch": 1.0117264835778466, "grad_norm": 0.34465721249580383, "learning_rate": 1.325515677614769e-05, "loss": 0.0635, "step": 19930 }, { "epoch": 1.0119803035687092, "grad_norm": 0.27624374628067017, "learning_rate": 1.3253464642875274e-05, "loss": 0.0585, "step": 19935 }, { "epoch": 1.0122341235595715, "grad_norm": 0.41285547614097595, "learning_rate": 1.3251772509602859e-05, "loss": 0.0718, "step": 19940 }, { "epoch": 1.012487943550434, "grad_norm": 0.3415983319282532, "learning_rate": 1.325008037633044e-05, "loss": 0.0716, "step": 19945 }, { "epoch": 1.0127417635412965, "grad_norm": 0.35713669657707214, "learning_rate": 1.3248388243058024e-05, "loss": 0.0605, "step": 19950 }, { "epoch": 1.012995583532159, "grad_norm": 0.31129032373428345, "learning_rate": 1.3246696109785608e-05, "loss": 0.0633, "step": 19955 }, { "epoch": 1.0132494035230215, "grad_norm": 0.9359402656555176, "learning_rate": 1.3245003976513191e-05, "loss": 0.0641, "step": 19960 }, { "epoch": 1.0135032235138839, "grad_norm": 0.3182302713394165, "learning_rate": 1.3243311843240773e-05, "loss": 0.0627, "step": 19965 }, { "epoch": 1.0137570435047465, "grad_norm": 0.3964100778102875, "learning_rate": 1.3241619709968358e-05, "loss": 0.0568, "step": 19970 }, { "epoch": 1.0140108634956089, "grad_norm": 0.35237252712249756, "learning_rate": 1.3239927576695942e-05, "loss": 0.0679, "step": 19975 }, { "epoch": 1.0142646834864715, "grad_norm": 0.37263184785842896, "learning_rate": 1.3238235443423525e-05, "loss": 0.0585, "step": 19980 }, { "epoch": 1.0145185034773339, "grad_norm": 0.44940632581710815, "learning_rate": 1.3236543310151109e-05, "loss": 0.0632, "step": 19985 }, { "epoch": 1.0147723234681965, "grad_norm": 0.7039437890052795, "learning_rate": 1.323485117687869e-05, "loss": 0.0697, "step": 19990 }, { "epoch": 1.0150261434590588, "grad_norm": 0.7221899628639221, "learning_rate": 1.3233159043606276e-05, "loss": 0.0585, "step": 19995 }, { "epoch": 1.0152799634499212, "grad_norm": 0.25587931275367737, "learning_rate": 1.323146691033386e-05, "loss": 0.0663, "step": 20000 }, { "epoch": 1.0155337834407838, "grad_norm": 0.34087198972702026, "learning_rate": 1.3229774777061441e-05, "loss": 0.0739, "step": 20005 }, { "epoch": 1.0157876034316462, "grad_norm": 0.49121344089508057, "learning_rate": 1.3228082643789027e-05, "loss": 0.0589, "step": 20010 }, { "epoch": 1.0160414234225088, "grad_norm": 0.39154985547065735, "learning_rate": 1.3226390510516609e-05, "loss": 0.0561, "step": 20015 }, { "epoch": 1.0162952434133712, "grad_norm": 0.3109443187713623, "learning_rate": 1.3224698377244194e-05, "loss": 0.0565, "step": 20020 }, { "epoch": 1.0165490634042338, "grad_norm": 0.349619597196579, "learning_rate": 1.3223006243971777e-05, "loss": 0.0612, "step": 20025 }, { "epoch": 1.0168028833950962, "grad_norm": 0.6276240348815918, "learning_rate": 1.322131411069936e-05, "loss": 0.0633, "step": 20030 }, { "epoch": 1.0170567033859588, "grad_norm": 0.4947071373462677, "learning_rate": 1.3219621977426944e-05, "loss": 0.0532, "step": 20035 }, { "epoch": 1.0173105233768212, "grad_norm": 0.987997829914093, "learning_rate": 1.3217929844154526e-05, "loss": 0.0661, "step": 20040 }, { "epoch": 1.0175643433676835, "grad_norm": 0.5403960347175598, "learning_rate": 1.321623771088211e-05, "loss": 0.0588, "step": 20045 }, { "epoch": 1.0178181633585461, "grad_norm": 0.41313982009887695, "learning_rate": 1.3214545577609695e-05, "loss": 0.0599, "step": 20050 }, { "epoch": 1.0180719833494085, "grad_norm": 0.3229277729988098, "learning_rate": 1.3212853444337277e-05, "loss": 0.0602, "step": 20055 }, { "epoch": 1.0183258033402711, "grad_norm": 0.5154731273651123, "learning_rate": 1.3211161311064859e-05, "loss": 0.0557, "step": 20060 }, { "epoch": 1.0185796233311335, "grad_norm": 0.3665696978569031, "learning_rate": 1.3209469177792444e-05, "loss": 0.0642, "step": 20065 }, { "epoch": 1.018833443321996, "grad_norm": 0.35480642318725586, "learning_rate": 1.3207777044520028e-05, "loss": 0.0621, "step": 20070 }, { "epoch": 1.0190872633128585, "grad_norm": 0.3062871992588043, "learning_rate": 1.3206084911247611e-05, "loss": 0.0646, "step": 20075 }, { "epoch": 1.019341083303721, "grad_norm": 0.37780115008354187, "learning_rate": 1.3204392777975195e-05, "loss": 0.0559, "step": 20080 }, { "epoch": 1.0195949032945835, "grad_norm": 0.5313156843185425, "learning_rate": 1.3202700644702776e-05, "loss": 0.0635, "step": 20085 }, { "epoch": 1.019848723285446, "grad_norm": 0.4488339424133301, "learning_rate": 1.3201008511430362e-05, "loss": 0.0559, "step": 20090 }, { "epoch": 1.0201025432763084, "grad_norm": 0.3646913766860962, "learning_rate": 1.3199316378157945e-05, "loss": 0.0573, "step": 20095 }, { "epoch": 1.0203563632671708, "grad_norm": 0.2983665466308594, "learning_rate": 1.3197624244885527e-05, "loss": 0.0694, "step": 20100 }, { "epoch": 1.0206101832580334, "grad_norm": 0.4676768481731415, "learning_rate": 1.3195932111613112e-05, "loss": 0.0594, "step": 20105 }, { "epoch": 1.0208640032488958, "grad_norm": 0.37798216938972473, "learning_rate": 1.3194239978340694e-05, "loss": 0.0671, "step": 20110 }, { "epoch": 1.0211178232397584, "grad_norm": 0.28777554631233215, "learning_rate": 1.3192547845068278e-05, "loss": 0.0513, "step": 20115 }, { "epoch": 1.0213716432306208, "grad_norm": 0.5031976699829102, "learning_rate": 1.3190855711795863e-05, "loss": 0.0551, "step": 20120 }, { "epoch": 1.0216254632214834, "grad_norm": 0.8326206803321838, "learning_rate": 1.3189163578523445e-05, "loss": 0.0711, "step": 20125 }, { "epoch": 1.0218792832123458, "grad_norm": 0.35230034589767456, "learning_rate": 1.318747144525103e-05, "loss": 0.0674, "step": 20130 }, { "epoch": 1.0221331032032084, "grad_norm": 0.8358342051506042, "learning_rate": 1.3185779311978612e-05, "loss": 0.0586, "step": 20135 }, { "epoch": 1.0223869231940708, "grad_norm": 1.732825517654419, "learning_rate": 1.3184087178706195e-05, "loss": 0.0644, "step": 20140 }, { "epoch": 1.0226407431849331, "grad_norm": 0.36032116413116455, "learning_rate": 1.318239504543378e-05, "loss": 0.0567, "step": 20145 }, { "epoch": 1.0228945631757957, "grad_norm": 0.36123108863830566, "learning_rate": 1.3180702912161363e-05, "loss": 0.0582, "step": 20150 }, { "epoch": 1.0231483831666581, "grad_norm": 0.32960960268974304, "learning_rate": 1.3179010778888946e-05, "loss": 0.0617, "step": 20155 }, { "epoch": 1.0234022031575207, "grad_norm": 0.4045203626155853, "learning_rate": 1.317731864561653e-05, "loss": 0.059, "step": 20160 }, { "epoch": 1.023656023148383, "grad_norm": 0.38749194145202637, "learning_rate": 1.3175626512344113e-05, "loss": 0.0567, "step": 20165 }, { "epoch": 1.0239098431392457, "grad_norm": 0.37718096375465393, "learning_rate": 1.3173934379071698e-05, "loss": 0.0693, "step": 20170 }, { "epoch": 1.024163663130108, "grad_norm": 0.9238191843032837, "learning_rate": 1.317224224579928e-05, "loss": 0.0565, "step": 20175 }, { "epoch": 1.0244174831209707, "grad_norm": 0.41635268926620483, "learning_rate": 1.3170550112526864e-05, "loss": 0.0659, "step": 20180 }, { "epoch": 1.024671303111833, "grad_norm": 0.41010648012161255, "learning_rate": 1.3168857979254447e-05, "loss": 0.0517, "step": 20185 }, { "epoch": 1.0249251231026957, "grad_norm": 0.40220311284065247, "learning_rate": 1.3167165845982031e-05, "loss": 0.0689, "step": 20190 }, { "epoch": 1.025178943093558, "grad_norm": 0.40923091769218445, "learning_rate": 1.3165473712709613e-05, "loss": 0.0595, "step": 20195 }, { "epoch": 1.0254327630844204, "grad_norm": 0.3411441445350647, "learning_rate": 1.3163781579437198e-05, "loss": 0.0644, "step": 20200 }, { "epoch": 1.025686583075283, "grad_norm": 0.3687761425971985, "learning_rate": 1.3162089446164782e-05, "loss": 0.0579, "step": 20205 }, { "epoch": 1.0259404030661454, "grad_norm": 0.3332912027835846, "learning_rate": 1.3160397312892363e-05, "loss": 0.0651, "step": 20210 }, { "epoch": 1.026194223057008, "grad_norm": 0.851193904876709, "learning_rate": 1.3158705179619949e-05, "loss": 0.0657, "step": 20215 }, { "epoch": 1.0264480430478704, "grad_norm": 0.4673822224140167, "learning_rate": 1.315701304634753e-05, "loss": 0.0572, "step": 20220 }, { "epoch": 1.026701863038733, "grad_norm": 0.33037447929382324, "learning_rate": 1.3155320913075116e-05, "loss": 0.0683, "step": 20225 }, { "epoch": 1.0269556830295954, "grad_norm": 0.6725794672966003, "learning_rate": 1.31536287798027e-05, "loss": 0.0666, "step": 20230 }, { "epoch": 1.027209503020458, "grad_norm": 0.6721415519714355, "learning_rate": 1.3151936646530281e-05, "loss": 0.0791, "step": 20235 }, { "epoch": 1.0274633230113204, "grad_norm": 0.39361828565597534, "learning_rate": 1.3150244513257866e-05, "loss": 0.0651, "step": 20240 }, { "epoch": 1.0277171430021828, "grad_norm": 0.30990535020828247, "learning_rate": 1.3148552379985448e-05, "loss": 0.0637, "step": 20245 }, { "epoch": 1.0279709629930454, "grad_norm": 0.44756659865379333, "learning_rate": 1.3146860246713032e-05, "loss": 0.0678, "step": 20250 }, { "epoch": 1.0282247829839077, "grad_norm": 0.3761730194091797, "learning_rate": 1.3145168113440615e-05, "loss": 0.0621, "step": 20255 }, { "epoch": 1.0284786029747703, "grad_norm": 0.29764240980148315, "learning_rate": 1.3143475980168199e-05, "loss": 0.0572, "step": 20260 }, { "epoch": 1.0287324229656327, "grad_norm": 0.31058117747306824, "learning_rate": 1.3141783846895784e-05, "loss": 0.0497, "step": 20265 }, { "epoch": 1.0289862429564953, "grad_norm": 0.677664577960968, "learning_rate": 1.3140091713623366e-05, "loss": 0.0689, "step": 20270 }, { "epoch": 1.0292400629473577, "grad_norm": 0.3639600872993469, "learning_rate": 1.313839958035095e-05, "loss": 0.0561, "step": 20275 }, { "epoch": 1.0294938829382203, "grad_norm": 0.29225844144821167, "learning_rate": 1.3136707447078533e-05, "loss": 0.0551, "step": 20280 }, { "epoch": 1.0297477029290827, "grad_norm": 0.43538472056388855, "learning_rate": 1.3135015313806117e-05, "loss": 0.0612, "step": 20285 }, { "epoch": 1.030001522919945, "grad_norm": 0.3201233744621277, "learning_rate": 1.3133323180533698e-05, "loss": 0.0577, "step": 20290 }, { "epoch": 1.0302553429108077, "grad_norm": 0.3105303645133972, "learning_rate": 1.3131631047261284e-05, "loss": 0.0593, "step": 20295 }, { "epoch": 1.03050916290167, "grad_norm": 0.4399048686027527, "learning_rate": 1.3129938913988867e-05, "loss": 0.0688, "step": 20300 }, { "epoch": 1.0307629828925327, "grad_norm": 0.45303788781166077, "learning_rate": 1.3128246780716449e-05, "loss": 0.0586, "step": 20305 }, { "epoch": 1.031016802883395, "grad_norm": 0.5587612986564636, "learning_rate": 1.3126554647444034e-05, "loss": 0.0668, "step": 20310 }, { "epoch": 1.0312706228742576, "grad_norm": 0.39321210980415344, "learning_rate": 1.3124862514171616e-05, "loss": 0.0588, "step": 20315 }, { "epoch": 1.03152444286512, "grad_norm": 0.2924768328666687, "learning_rate": 1.3123170380899201e-05, "loss": 0.0569, "step": 20320 }, { "epoch": 1.0317782628559826, "grad_norm": 0.32587504386901855, "learning_rate": 1.3121478247626785e-05, "loss": 0.0588, "step": 20325 }, { "epoch": 1.032032082846845, "grad_norm": 0.34906765818595886, "learning_rate": 1.3119786114354367e-05, "loss": 0.0669, "step": 20330 }, { "epoch": 1.0322859028377076, "grad_norm": 0.45067837834358215, "learning_rate": 1.3118093981081952e-05, "loss": 0.0627, "step": 20335 }, { "epoch": 1.03253972282857, "grad_norm": 0.43164461851119995, "learning_rate": 1.3116401847809534e-05, "loss": 0.0544, "step": 20340 }, { "epoch": 1.0327935428194324, "grad_norm": 0.343434602022171, "learning_rate": 1.3114709714537117e-05, "loss": 0.0596, "step": 20345 }, { "epoch": 1.033047362810295, "grad_norm": 0.43846994638442993, "learning_rate": 1.3113017581264703e-05, "loss": 0.0646, "step": 20350 }, { "epoch": 1.0333011828011573, "grad_norm": 0.3330884277820587, "learning_rate": 1.3111325447992285e-05, "loss": 0.0568, "step": 20355 }, { "epoch": 1.03355500279202, "grad_norm": 0.5818292498588562, "learning_rate": 1.3109633314719868e-05, "loss": 0.0613, "step": 20360 }, { "epoch": 1.0338088227828823, "grad_norm": 0.37332767248153687, "learning_rate": 1.3107941181447452e-05, "loss": 0.06, "step": 20365 }, { "epoch": 1.034062642773745, "grad_norm": 0.3650437891483307, "learning_rate": 1.3106249048175035e-05, "loss": 0.059, "step": 20370 }, { "epoch": 1.0343164627646073, "grad_norm": 0.800799548625946, "learning_rate": 1.310455691490262e-05, "loss": 0.0664, "step": 20375 }, { "epoch": 1.03457028275547, "grad_norm": 0.4584999680519104, "learning_rate": 1.3102864781630202e-05, "loss": 0.06, "step": 20380 }, { "epoch": 1.0348241027463323, "grad_norm": 0.4071432948112488, "learning_rate": 1.3101172648357786e-05, "loss": 0.0786, "step": 20385 }, { "epoch": 1.0350779227371947, "grad_norm": 0.3009105622768402, "learning_rate": 1.309948051508537e-05, "loss": 0.0629, "step": 20390 }, { "epoch": 1.0353317427280573, "grad_norm": 0.573149561882019, "learning_rate": 1.3097788381812953e-05, "loss": 0.069, "step": 20395 }, { "epoch": 1.0355855627189197, "grad_norm": 0.8304667472839355, "learning_rate": 1.3096096248540535e-05, "loss": 0.0595, "step": 20400 }, { "epoch": 1.0358393827097823, "grad_norm": 0.4500064551830292, "learning_rate": 1.309440411526812e-05, "loss": 0.0642, "step": 20405 }, { "epoch": 1.0360932027006446, "grad_norm": 0.6188081502914429, "learning_rate": 1.3092711981995704e-05, "loss": 0.0581, "step": 20410 }, { "epoch": 1.0363470226915072, "grad_norm": 0.4538813829421997, "learning_rate": 1.3091019848723287e-05, "loss": 0.0652, "step": 20415 }, { "epoch": 1.0366008426823696, "grad_norm": 0.269491583108902, "learning_rate": 1.308932771545087e-05, "loss": 0.0541, "step": 20420 }, { "epoch": 1.0368546626732322, "grad_norm": 0.36252927780151367, "learning_rate": 1.3087635582178452e-05, "loss": 0.0587, "step": 20425 }, { "epoch": 1.0371084826640946, "grad_norm": 0.4088985025882721, "learning_rate": 1.3085943448906038e-05, "loss": 0.0726, "step": 20430 }, { "epoch": 1.037362302654957, "grad_norm": 0.4039810001850128, "learning_rate": 1.3084251315633621e-05, "loss": 0.0561, "step": 20435 }, { "epoch": 1.0376161226458196, "grad_norm": 0.36475130915641785, "learning_rate": 1.3082559182361203e-05, "loss": 0.0712, "step": 20440 }, { "epoch": 1.037869942636682, "grad_norm": 0.4266735017299652, "learning_rate": 1.3080867049088788e-05, "loss": 0.0593, "step": 20445 }, { "epoch": 1.0381237626275446, "grad_norm": 0.44501543045043945, "learning_rate": 1.307917491581637e-05, "loss": 0.0605, "step": 20450 }, { "epoch": 1.038377582618407, "grad_norm": 0.6565629839897156, "learning_rate": 1.3077482782543954e-05, "loss": 0.057, "step": 20455 }, { "epoch": 1.0386314026092696, "grad_norm": 0.33326682448387146, "learning_rate": 1.3075790649271537e-05, "loss": 0.081, "step": 20460 }, { "epoch": 1.038885222600132, "grad_norm": 0.3565109074115753, "learning_rate": 1.307409851599912e-05, "loss": 0.0618, "step": 20465 }, { "epoch": 1.0391390425909945, "grad_norm": 0.30127477645874023, "learning_rate": 1.3072406382726706e-05, "loss": 0.0595, "step": 20470 }, { "epoch": 1.039392862581857, "grad_norm": 0.39654025435447693, "learning_rate": 1.3070714249454288e-05, "loss": 0.0554, "step": 20475 }, { "epoch": 1.0396466825727195, "grad_norm": 0.4261183440685272, "learning_rate": 1.3069022116181871e-05, "loss": 0.0744, "step": 20480 }, { "epoch": 1.039900502563582, "grad_norm": 0.4532530605792999, "learning_rate": 1.3067329982909455e-05, "loss": 0.0598, "step": 20485 }, { "epoch": 1.0401543225544443, "grad_norm": 0.49771350622177124, "learning_rate": 1.3065637849637039e-05, "loss": 0.0674, "step": 20490 }, { "epoch": 1.040408142545307, "grad_norm": 0.49165821075439453, "learning_rate": 1.306394571636462e-05, "loss": 0.0655, "step": 20495 }, { "epoch": 1.0406619625361693, "grad_norm": 0.3307662010192871, "learning_rate": 1.3062253583092206e-05, "loss": 0.0587, "step": 20500 }, { "epoch": 1.0409157825270319, "grad_norm": 0.3834070861339569, "learning_rate": 1.306056144981979e-05, "loss": 0.0599, "step": 20505 }, { "epoch": 1.0411696025178943, "grad_norm": 0.4196530878543854, "learning_rate": 1.3058869316547373e-05, "loss": 0.058, "step": 20510 }, { "epoch": 1.0414234225087569, "grad_norm": 0.376595675945282, "learning_rate": 1.3057177183274956e-05, "loss": 0.0598, "step": 20515 }, { "epoch": 1.0416772424996192, "grad_norm": 0.4179707467556, "learning_rate": 1.3055485050002538e-05, "loss": 0.0647, "step": 20520 }, { "epoch": 1.0419310624904818, "grad_norm": 0.35341501235961914, "learning_rate": 1.3053792916730123e-05, "loss": 0.0506, "step": 20525 }, { "epoch": 1.0421848824813442, "grad_norm": 0.3597331941127777, "learning_rate": 1.3052100783457707e-05, "loss": 0.0671, "step": 20530 }, { "epoch": 1.0424387024722068, "grad_norm": 0.35214442014694214, "learning_rate": 1.3050408650185289e-05, "loss": 0.0501, "step": 20535 }, { "epoch": 1.0426925224630692, "grad_norm": 1.1911040544509888, "learning_rate": 1.3048716516912874e-05, "loss": 0.0683, "step": 20540 }, { "epoch": 1.0429463424539316, "grad_norm": 0.40806475281715393, "learning_rate": 1.3047024383640456e-05, "loss": 0.0652, "step": 20545 }, { "epoch": 1.0432001624447942, "grad_norm": 0.3692667782306671, "learning_rate": 1.304533225036804e-05, "loss": 0.0665, "step": 20550 }, { "epoch": 1.0434539824356566, "grad_norm": 0.580768883228302, "learning_rate": 1.3043640117095625e-05, "loss": 0.0652, "step": 20555 }, { "epoch": 1.0437078024265192, "grad_norm": 0.32445693016052246, "learning_rate": 1.3041947983823206e-05, "loss": 0.0599, "step": 20560 }, { "epoch": 1.0439616224173816, "grad_norm": 0.5763265490531921, "learning_rate": 1.3040255850550792e-05, "loss": 0.0694, "step": 20565 }, { "epoch": 1.0442154424082442, "grad_norm": 0.33058294653892517, "learning_rate": 1.3038563717278374e-05, "loss": 0.0563, "step": 20570 }, { "epoch": 1.0444692623991065, "grad_norm": 0.26986175775527954, "learning_rate": 1.3036871584005957e-05, "loss": 0.0542, "step": 20575 }, { "epoch": 1.0447230823899691, "grad_norm": 0.3680388331413269, "learning_rate": 1.3035179450733542e-05, "loss": 0.0613, "step": 20580 }, { "epoch": 1.0449769023808315, "grad_norm": 0.3555222153663635, "learning_rate": 1.3033487317461124e-05, "loss": 0.0608, "step": 20585 }, { "epoch": 1.045230722371694, "grad_norm": 0.29103875160217285, "learning_rate": 1.3031795184188708e-05, "loss": 0.0479, "step": 20590 }, { "epoch": 1.0454845423625565, "grad_norm": 0.36284422874450684, "learning_rate": 1.3030103050916291e-05, "loss": 0.0546, "step": 20595 }, { "epoch": 1.0457383623534189, "grad_norm": 0.2850485146045685, "learning_rate": 1.3028410917643875e-05, "loss": 0.0633, "step": 20600 }, { "epoch": 1.0459921823442815, "grad_norm": 0.37116739153862, "learning_rate": 1.3026718784371457e-05, "loss": 0.0618, "step": 20605 }, { "epoch": 1.0462460023351439, "grad_norm": 0.41948822140693665, "learning_rate": 1.3025026651099042e-05, "loss": 0.0711, "step": 20610 }, { "epoch": 1.0464998223260065, "grad_norm": 0.3508945107460022, "learning_rate": 1.3023334517826625e-05, "loss": 0.0549, "step": 20615 }, { "epoch": 1.0467536423168688, "grad_norm": 0.46695828437805176, "learning_rate": 1.3021642384554209e-05, "loss": 0.0566, "step": 20620 }, { "epoch": 1.0470074623077315, "grad_norm": 0.4364718496799469, "learning_rate": 1.3019950251281793e-05, "loss": 0.0669, "step": 20625 }, { "epoch": 1.0472612822985938, "grad_norm": 0.26235929131507874, "learning_rate": 1.3018258118009374e-05, "loss": 0.06, "step": 20630 }, { "epoch": 1.0475151022894562, "grad_norm": 0.36236390471458435, "learning_rate": 1.301656598473696e-05, "loss": 0.0602, "step": 20635 }, { "epoch": 1.0477689222803188, "grad_norm": 0.5523089170455933, "learning_rate": 1.3014873851464541e-05, "loss": 0.0604, "step": 20640 }, { "epoch": 1.0480227422711812, "grad_norm": 2.3140509128570557, "learning_rate": 1.3013181718192125e-05, "loss": 0.0675, "step": 20645 }, { "epoch": 1.0482765622620438, "grad_norm": 0.2716338634490967, "learning_rate": 1.301148958491971e-05, "loss": 0.0613, "step": 20650 }, { "epoch": 1.0485303822529062, "grad_norm": 0.38538867235183716, "learning_rate": 1.3009797451647292e-05, "loss": 0.0613, "step": 20655 }, { "epoch": 1.0487842022437688, "grad_norm": 0.8486253619194031, "learning_rate": 1.3008105318374877e-05, "loss": 0.0683, "step": 20660 }, { "epoch": 1.0490380222346312, "grad_norm": 0.8067624568939209, "learning_rate": 1.300641318510246e-05, "loss": 0.0569, "step": 20665 }, { "epoch": 1.0492918422254938, "grad_norm": 0.36104530096054077, "learning_rate": 1.3004721051830043e-05, "loss": 0.0571, "step": 20670 }, { "epoch": 1.0495456622163561, "grad_norm": 0.3068177402019501, "learning_rate": 1.3003028918557628e-05, "loss": 0.0574, "step": 20675 }, { "epoch": 1.0497994822072187, "grad_norm": 0.41249674558639526, "learning_rate": 1.300133678528521e-05, "loss": 0.0616, "step": 20680 }, { "epoch": 1.0500533021980811, "grad_norm": 0.3544803857803345, "learning_rate": 1.2999644652012793e-05, "loss": 0.0664, "step": 20685 }, { "epoch": 1.0503071221889435, "grad_norm": 0.2459775060415268, "learning_rate": 1.2997952518740377e-05, "loss": 0.0602, "step": 20690 }, { "epoch": 1.0505609421798061, "grad_norm": 0.5989146828651428, "learning_rate": 1.299626038546796e-05, "loss": 0.0552, "step": 20695 }, { "epoch": 1.0508147621706685, "grad_norm": 0.44064995646476746, "learning_rate": 1.2994568252195542e-05, "loss": 0.0494, "step": 20700 }, { "epoch": 1.051068582161531, "grad_norm": 0.34163224697113037, "learning_rate": 1.2992876118923128e-05, "loss": 0.053, "step": 20705 }, { "epoch": 1.0513224021523935, "grad_norm": 0.3708654046058655, "learning_rate": 1.2991183985650711e-05, "loss": 0.054, "step": 20710 }, { "epoch": 1.051576222143256, "grad_norm": 0.2843742072582245, "learning_rate": 1.2989491852378295e-05, "loss": 0.058, "step": 20715 }, { "epoch": 1.0518300421341185, "grad_norm": 0.4104609489440918, "learning_rate": 1.2987799719105878e-05, "loss": 0.0633, "step": 20720 }, { "epoch": 1.052083862124981, "grad_norm": 0.32387226819992065, "learning_rate": 1.298610758583346e-05, "loss": 0.0572, "step": 20725 }, { "epoch": 1.0523376821158434, "grad_norm": 0.3362037241458893, "learning_rate": 1.2984415452561045e-05, "loss": 0.0565, "step": 20730 }, { "epoch": 1.0525915021067058, "grad_norm": 0.32576224207878113, "learning_rate": 1.2982723319288629e-05, "loss": 0.0654, "step": 20735 }, { "epoch": 1.0528453220975684, "grad_norm": 0.4229300618171692, "learning_rate": 1.298103118601621e-05, "loss": 0.0646, "step": 20740 }, { "epoch": 1.0530991420884308, "grad_norm": 0.4319082200527191, "learning_rate": 1.2979339052743796e-05, "loss": 0.0541, "step": 20745 }, { "epoch": 1.0533529620792934, "grad_norm": 0.421505868434906, "learning_rate": 1.2977646919471378e-05, "loss": 0.0597, "step": 20750 }, { "epoch": 1.0536067820701558, "grad_norm": 0.3763205409049988, "learning_rate": 1.2975954786198961e-05, "loss": 0.0492, "step": 20755 }, { "epoch": 1.0538606020610184, "grad_norm": 0.3560442626476288, "learning_rate": 1.2974262652926547e-05, "loss": 0.0588, "step": 20760 }, { "epoch": 1.0541144220518808, "grad_norm": 0.4291508197784424, "learning_rate": 1.2972570519654128e-05, "loss": 0.072, "step": 20765 }, { "epoch": 1.0543682420427434, "grad_norm": 0.3134581446647644, "learning_rate": 1.2970878386381714e-05, "loss": 0.0573, "step": 20770 }, { "epoch": 1.0546220620336058, "grad_norm": 0.3855588138103485, "learning_rate": 1.2969186253109295e-05, "loss": 0.0657, "step": 20775 }, { "epoch": 1.0548758820244681, "grad_norm": 0.2989584803581238, "learning_rate": 1.2967494119836879e-05, "loss": 0.0587, "step": 20780 }, { "epoch": 1.0551297020153307, "grad_norm": 0.4255363345146179, "learning_rate": 1.2965801986564464e-05, "loss": 0.0619, "step": 20785 }, { "epoch": 1.0553835220061931, "grad_norm": 0.4935469329357147, "learning_rate": 1.2964109853292046e-05, "loss": 0.0663, "step": 20790 }, { "epoch": 1.0556373419970557, "grad_norm": 0.2790074348449707, "learning_rate": 1.296241772001963e-05, "loss": 0.0493, "step": 20795 }, { "epoch": 1.055891161987918, "grad_norm": 0.5771297216415405, "learning_rate": 1.2960725586747213e-05, "loss": 0.0635, "step": 20800 }, { "epoch": 1.0561449819787807, "grad_norm": 0.49115124344825745, "learning_rate": 1.2959033453474797e-05, "loss": 0.0605, "step": 20805 }, { "epoch": 1.056398801969643, "grad_norm": 0.3402343690395355, "learning_rate": 1.2957341320202382e-05, "loss": 0.058, "step": 20810 }, { "epoch": 1.0566526219605057, "grad_norm": 0.39330241084098816, "learning_rate": 1.2955649186929964e-05, "loss": 0.0623, "step": 20815 }, { "epoch": 1.056906441951368, "grad_norm": 0.29627296328544617, "learning_rate": 1.2953957053657546e-05, "loss": 0.0552, "step": 20820 }, { "epoch": 1.0571602619422307, "grad_norm": 0.35211825370788574, "learning_rate": 1.2952264920385131e-05, "loss": 0.0632, "step": 20825 }, { "epoch": 1.057414081933093, "grad_norm": 0.4592018723487854, "learning_rate": 1.2950572787112714e-05, "loss": 0.0626, "step": 20830 }, { "epoch": 1.0576679019239554, "grad_norm": 0.5759520530700684, "learning_rate": 1.2948880653840296e-05, "loss": 0.0812, "step": 20835 }, { "epoch": 1.057921721914818, "grad_norm": 0.431813508272171, "learning_rate": 1.2947188520567882e-05, "loss": 0.057, "step": 20840 }, { "epoch": 1.0581755419056804, "grad_norm": 0.810368001461029, "learning_rate": 1.2945496387295463e-05, "loss": 0.0601, "step": 20845 }, { "epoch": 1.058429361896543, "grad_norm": 0.3972325623035431, "learning_rate": 1.2943804254023047e-05, "loss": 0.0614, "step": 20850 }, { "epoch": 1.0586831818874054, "grad_norm": 0.34748002886772156, "learning_rate": 1.2942112120750632e-05, "loss": 0.0631, "step": 20855 }, { "epoch": 1.058937001878268, "grad_norm": 0.5244632363319397, "learning_rate": 1.2940419987478214e-05, "loss": 0.0664, "step": 20860 }, { "epoch": 1.0591908218691304, "grad_norm": 0.4019574522972107, "learning_rate": 1.29387278542058e-05, "loss": 0.0622, "step": 20865 }, { "epoch": 1.059444641859993, "grad_norm": 0.43360385298728943, "learning_rate": 1.2937035720933381e-05, "loss": 0.0605, "step": 20870 }, { "epoch": 1.0596984618508554, "grad_norm": 0.37314683198928833, "learning_rate": 1.2935343587660965e-05, "loss": 0.0598, "step": 20875 }, { "epoch": 1.0599522818417177, "grad_norm": 0.3321194350719452, "learning_rate": 1.293365145438855e-05, "loss": 0.0598, "step": 20880 }, { "epoch": 1.0602061018325803, "grad_norm": 0.5326045155525208, "learning_rate": 1.2931959321116132e-05, "loss": 0.0623, "step": 20885 }, { "epoch": 1.0604599218234427, "grad_norm": 0.31733155250549316, "learning_rate": 1.2930267187843715e-05, "loss": 0.0526, "step": 20890 }, { "epoch": 1.0607137418143053, "grad_norm": 0.34186238050460815, "learning_rate": 1.2928575054571299e-05, "loss": 0.0633, "step": 20895 }, { "epoch": 1.0609675618051677, "grad_norm": 0.4136981964111328, "learning_rate": 1.2926882921298882e-05, "loss": 0.0663, "step": 20900 }, { "epoch": 1.0612213817960303, "grad_norm": 0.41344696283340454, "learning_rate": 1.2925190788026468e-05, "loss": 0.0592, "step": 20905 }, { "epoch": 1.0614752017868927, "grad_norm": 0.5743115544319153, "learning_rate": 1.292349865475405e-05, "loss": 0.067, "step": 20910 }, { "epoch": 1.0617290217777553, "grad_norm": 0.4706607460975647, "learning_rate": 1.2921806521481633e-05, "loss": 0.0581, "step": 20915 }, { "epoch": 1.0619828417686177, "grad_norm": 0.39083805680274963, "learning_rate": 1.2920114388209217e-05, "loss": 0.0726, "step": 20920 }, { "epoch": 1.0622366617594803, "grad_norm": 0.3401205241680145, "learning_rate": 1.29184222549368e-05, "loss": 0.061, "step": 20925 }, { "epoch": 1.0624904817503427, "grad_norm": 0.33678048849105835, "learning_rate": 1.2916730121664382e-05, "loss": 0.0606, "step": 20930 }, { "epoch": 1.062744301741205, "grad_norm": 0.5099437832832336, "learning_rate": 1.2915037988391967e-05, "loss": 0.071, "step": 20935 }, { "epoch": 1.0629981217320676, "grad_norm": 0.32160258293151855, "learning_rate": 1.291334585511955e-05, "loss": 0.0643, "step": 20940 }, { "epoch": 1.06325194172293, "grad_norm": 0.4883134067058563, "learning_rate": 1.2911653721847133e-05, "loss": 0.0616, "step": 20945 }, { "epoch": 1.0635057617137926, "grad_norm": 0.3711860179901123, "learning_rate": 1.2909961588574718e-05, "loss": 0.0621, "step": 20950 }, { "epoch": 1.063759581704655, "grad_norm": 0.49837297201156616, "learning_rate": 1.29082694553023e-05, "loss": 0.0691, "step": 20955 }, { "epoch": 1.0640134016955176, "grad_norm": 0.6001039147377014, "learning_rate": 1.2906577322029885e-05, "loss": 0.0606, "step": 20960 }, { "epoch": 1.06426722168638, "grad_norm": 0.32044780254364014, "learning_rate": 1.2904885188757468e-05, "loss": 0.0534, "step": 20965 }, { "epoch": 1.0645210416772426, "grad_norm": 0.30035600066185, "learning_rate": 1.290319305548505e-05, "loss": 0.054, "step": 20970 }, { "epoch": 1.064774861668105, "grad_norm": 0.343683660030365, "learning_rate": 1.2901500922212636e-05, "loss": 0.0637, "step": 20975 }, { "epoch": 1.0650286816589674, "grad_norm": 0.3978451192378998, "learning_rate": 1.2899808788940217e-05, "loss": 0.06, "step": 20980 }, { "epoch": 1.06528250164983, "grad_norm": 0.40602362155914307, "learning_rate": 1.2898116655667801e-05, "loss": 0.0693, "step": 20985 }, { "epoch": 1.0655363216406923, "grad_norm": 0.4250233471393585, "learning_rate": 1.2896424522395386e-05, "loss": 0.0615, "step": 20990 }, { "epoch": 1.065790141631555, "grad_norm": 0.3212973177433014, "learning_rate": 1.2894732389122968e-05, "loss": 0.0448, "step": 20995 }, { "epoch": 1.0660439616224173, "grad_norm": 0.4593718945980072, "learning_rate": 1.289304025585055e-05, "loss": 0.0589, "step": 21000 }, { "epoch": 1.06629778161328, "grad_norm": 0.41860365867614746, "learning_rate": 1.2891348122578135e-05, "loss": 0.0582, "step": 21005 }, { "epoch": 1.0665516016041423, "grad_norm": 0.32869061827659607, "learning_rate": 1.2889655989305719e-05, "loss": 0.0559, "step": 21010 }, { "epoch": 1.066805421595005, "grad_norm": 0.4083346724510193, "learning_rate": 1.2887963856033304e-05, "loss": 0.0536, "step": 21015 }, { "epoch": 1.0670592415858673, "grad_norm": 0.4695943295955658, "learning_rate": 1.2886271722760886e-05, "loss": 0.0629, "step": 21020 }, { "epoch": 1.06731306157673, "grad_norm": 0.42070719599723816, "learning_rate": 1.2884579589488468e-05, "loss": 0.0628, "step": 21025 }, { "epoch": 1.0675668815675923, "grad_norm": 0.32895520329475403, "learning_rate": 1.2882887456216053e-05, "loss": 0.0565, "step": 21030 }, { "epoch": 1.0678207015584547, "grad_norm": 0.44509220123291016, "learning_rate": 1.2881195322943636e-05, "loss": 0.0595, "step": 21035 }, { "epoch": 1.0680745215493173, "grad_norm": 0.3205646276473999, "learning_rate": 1.2879503189671218e-05, "loss": 0.0543, "step": 21040 }, { "epoch": 1.0683283415401796, "grad_norm": 0.6537548899650574, "learning_rate": 1.2877811056398803e-05, "loss": 0.0676, "step": 21045 }, { "epoch": 1.0685821615310422, "grad_norm": 0.40213271975517273, "learning_rate": 1.2876118923126385e-05, "loss": 0.0665, "step": 21050 }, { "epoch": 1.0688359815219046, "grad_norm": 0.39872047305107117, "learning_rate": 1.287442678985397e-05, "loss": 0.0633, "step": 21055 }, { "epoch": 1.0690898015127672, "grad_norm": 0.8748286366462708, "learning_rate": 1.2872734656581554e-05, "loss": 0.0584, "step": 21060 }, { "epoch": 1.0693436215036296, "grad_norm": 0.28194040060043335, "learning_rate": 1.2871042523309136e-05, "loss": 0.0569, "step": 21065 }, { "epoch": 1.0695974414944922, "grad_norm": 0.30738887190818787, "learning_rate": 1.2869350390036721e-05, "loss": 0.0523, "step": 21070 }, { "epoch": 1.0698512614853546, "grad_norm": 0.4939640462398529, "learning_rate": 1.2867658256764303e-05, "loss": 0.059, "step": 21075 }, { "epoch": 1.070105081476217, "grad_norm": 0.3448868691921234, "learning_rate": 1.2865966123491887e-05, "loss": 0.0564, "step": 21080 }, { "epoch": 1.0703589014670796, "grad_norm": 0.3513730764389038, "learning_rate": 1.2864273990219472e-05, "loss": 0.0567, "step": 21085 }, { "epoch": 1.070612721457942, "grad_norm": 0.4272534251213074, "learning_rate": 1.2862581856947054e-05, "loss": 0.054, "step": 21090 }, { "epoch": 1.0708665414488046, "grad_norm": 0.2933310568332672, "learning_rate": 1.2860889723674637e-05, "loss": 0.0505, "step": 21095 }, { "epoch": 1.071120361439667, "grad_norm": 0.4470325708389282, "learning_rate": 1.285919759040222e-05, "loss": 0.0735, "step": 21100 }, { "epoch": 1.0713741814305295, "grad_norm": 0.3551653325557709, "learning_rate": 1.2857505457129804e-05, "loss": 0.0541, "step": 21105 }, { "epoch": 1.071628001421392, "grad_norm": 0.6195331811904907, "learning_rate": 1.285581332385739e-05, "loss": 0.0681, "step": 21110 }, { "epoch": 1.0718818214122545, "grad_norm": 0.38556531071662903, "learning_rate": 1.2854121190584971e-05, "loss": 0.0612, "step": 21115 }, { "epoch": 1.072135641403117, "grad_norm": 0.6047544479370117, "learning_rate": 1.2852429057312555e-05, "loss": 0.0597, "step": 21120 }, { "epoch": 1.0723894613939793, "grad_norm": 0.48388174176216125, "learning_rate": 1.2850736924040139e-05, "loss": 0.0641, "step": 21125 }, { "epoch": 1.0726432813848419, "grad_norm": 0.8064355850219727, "learning_rate": 1.2849044790767722e-05, "loss": 0.0678, "step": 21130 }, { "epoch": 1.0728971013757043, "grad_norm": 0.3345118463039398, "learning_rate": 1.2847352657495304e-05, "loss": 0.0635, "step": 21135 }, { "epoch": 1.0731509213665669, "grad_norm": 0.3172535300254822, "learning_rate": 1.2845660524222889e-05, "loss": 0.065, "step": 21140 }, { "epoch": 1.0734047413574292, "grad_norm": 0.8722877502441406, "learning_rate": 1.2843968390950473e-05, "loss": 0.0571, "step": 21145 }, { "epoch": 1.0736585613482919, "grad_norm": 0.42107436060905457, "learning_rate": 1.2842276257678056e-05, "loss": 0.0537, "step": 21150 }, { "epoch": 1.0739123813391542, "grad_norm": 0.41451701521873474, "learning_rate": 1.284058412440564e-05, "loss": 0.0595, "step": 21155 }, { "epoch": 1.0741662013300168, "grad_norm": 0.4234960377216339, "learning_rate": 1.2838891991133222e-05, "loss": 0.0581, "step": 21160 }, { "epoch": 1.0744200213208792, "grad_norm": 0.40103062987327576, "learning_rate": 1.2837199857860807e-05, "loss": 0.0581, "step": 21165 }, { "epoch": 1.0746738413117418, "grad_norm": 0.35604149103164673, "learning_rate": 1.283550772458839e-05, "loss": 0.057, "step": 21170 }, { "epoch": 1.0749276613026042, "grad_norm": 0.4039483666419983, "learning_rate": 1.2833815591315972e-05, "loss": 0.061, "step": 21175 }, { "epoch": 1.0751814812934666, "grad_norm": 0.36171674728393555, "learning_rate": 1.2832123458043558e-05, "loss": 0.0592, "step": 21180 }, { "epoch": 1.0754353012843292, "grad_norm": 0.525554358959198, "learning_rate": 1.283043132477114e-05, "loss": 0.0607, "step": 21185 }, { "epoch": 1.0756891212751916, "grad_norm": 0.3412221670150757, "learning_rate": 1.2828739191498723e-05, "loss": 0.0593, "step": 21190 }, { "epoch": 1.0759429412660542, "grad_norm": 0.35413116216659546, "learning_rate": 1.2827047058226308e-05, "loss": 0.0608, "step": 21195 }, { "epoch": 1.0761967612569165, "grad_norm": 0.7104817032814026, "learning_rate": 1.282535492495389e-05, "loss": 0.0676, "step": 21200 }, { "epoch": 1.0764505812477791, "grad_norm": 0.4611401855945587, "learning_rate": 1.2823662791681475e-05, "loss": 0.065, "step": 21205 }, { "epoch": 1.0767044012386415, "grad_norm": 0.32466113567352295, "learning_rate": 1.2821970658409057e-05, "loss": 0.0638, "step": 21210 }, { "epoch": 1.0769582212295041, "grad_norm": 0.37902674078941345, "learning_rate": 1.282027852513664e-05, "loss": 0.0586, "step": 21215 }, { "epoch": 1.0772120412203665, "grad_norm": 0.29567191004753113, "learning_rate": 1.2818586391864226e-05, "loss": 0.0583, "step": 21220 }, { "epoch": 1.077465861211229, "grad_norm": 0.24273955821990967, "learning_rate": 1.2816894258591808e-05, "loss": 0.0586, "step": 21225 }, { "epoch": 1.0777196812020915, "grad_norm": 0.4052215814590454, "learning_rate": 1.281520212531939e-05, "loss": 0.0519, "step": 21230 }, { "epoch": 1.0779735011929539, "grad_norm": 0.43652230501174927, "learning_rate": 1.2813509992046975e-05, "loss": 0.0588, "step": 21235 }, { "epoch": 1.0782273211838165, "grad_norm": 0.5343736410140991, "learning_rate": 1.2811817858774558e-05, "loss": 0.0683, "step": 21240 }, { "epoch": 1.0784811411746789, "grad_norm": 0.43747490644454956, "learning_rate": 1.281012572550214e-05, "loss": 0.0556, "step": 21245 }, { "epoch": 1.0787349611655415, "grad_norm": 0.28066039085388184, "learning_rate": 1.2808433592229725e-05, "loss": 0.058, "step": 21250 }, { "epoch": 1.0789887811564038, "grad_norm": 0.5007389783859253, "learning_rate": 1.2806741458957307e-05, "loss": 0.0506, "step": 21255 }, { "epoch": 1.0792426011472664, "grad_norm": 0.3657475411891937, "learning_rate": 1.2805049325684893e-05, "loss": 0.066, "step": 21260 }, { "epoch": 1.0794964211381288, "grad_norm": 0.3738833963871002, "learning_rate": 1.2803357192412476e-05, "loss": 0.0601, "step": 21265 }, { "epoch": 1.0797502411289912, "grad_norm": 0.35183340311050415, "learning_rate": 1.2801665059140058e-05, "loss": 0.0672, "step": 21270 }, { "epoch": 1.0800040611198538, "grad_norm": 0.47940659523010254, "learning_rate": 1.2799972925867643e-05, "loss": 0.0623, "step": 21275 }, { "epoch": 1.0802578811107162, "grad_norm": 0.5233344435691833, "learning_rate": 1.2798280792595225e-05, "loss": 0.0598, "step": 21280 }, { "epoch": 1.0805117011015788, "grad_norm": 0.30529236793518066, "learning_rate": 1.2796588659322809e-05, "loss": 0.0479, "step": 21285 }, { "epoch": 1.0807655210924412, "grad_norm": 0.4038148522377014, "learning_rate": 1.2794896526050394e-05, "loss": 0.0736, "step": 21290 }, { "epoch": 1.0810193410833038, "grad_norm": 0.3613746464252472, "learning_rate": 1.2793204392777976e-05, "loss": 0.0547, "step": 21295 }, { "epoch": 1.0812731610741662, "grad_norm": 0.3199770450592041, "learning_rate": 1.2791512259505561e-05, "loss": 0.0582, "step": 21300 }, { "epoch": 1.0815269810650288, "grad_norm": 0.6598812937736511, "learning_rate": 1.2789820126233143e-05, "loss": 0.0638, "step": 21305 }, { "epoch": 1.0817808010558911, "grad_norm": 0.3184927999973297, "learning_rate": 1.2788127992960726e-05, "loss": 0.0535, "step": 21310 }, { "epoch": 1.0820346210467537, "grad_norm": 0.3598698675632477, "learning_rate": 1.2786435859688312e-05, "loss": 0.0534, "step": 21315 }, { "epoch": 1.0822884410376161, "grad_norm": 0.4094015657901764, "learning_rate": 1.2784743726415893e-05, "loss": 0.0479, "step": 21320 }, { "epoch": 1.0825422610284785, "grad_norm": 0.9048264026641846, "learning_rate": 1.2783051593143477e-05, "loss": 0.0632, "step": 21325 }, { "epoch": 1.082796081019341, "grad_norm": 0.4209839701652527, "learning_rate": 1.278135945987106e-05, "loss": 0.0714, "step": 21330 }, { "epoch": 1.0830499010102035, "grad_norm": 0.4597996771335602, "learning_rate": 1.2779667326598644e-05, "loss": 0.0731, "step": 21335 }, { "epoch": 1.083303721001066, "grad_norm": 0.29909446835517883, "learning_rate": 1.2777975193326226e-05, "loss": 0.0644, "step": 21340 }, { "epoch": 1.0835575409919285, "grad_norm": 0.4894222617149353, "learning_rate": 1.2776283060053811e-05, "loss": 0.0624, "step": 21345 }, { "epoch": 1.083811360982791, "grad_norm": 0.38440021872520447, "learning_rate": 1.2774590926781395e-05, "loss": 0.0657, "step": 21350 }, { "epoch": 1.0840651809736535, "grad_norm": 0.4377933144569397, "learning_rate": 1.2772898793508978e-05, "loss": 0.0567, "step": 21355 }, { "epoch": 1.084319000964516, "grad_norm": 0.3647824227809906, "learning_rate": 1.2771206660236562e-05, "loss": 0.0674, "step": 21360 }, { "epoch": 1.0845728209553784, "grad_norm": 0.3673681616783142, "learning_rate": 1.2769514526964144e-05, "loss": 0.0582, "step": 21365 }, { "epoch": 1.084826640946241, "grad_norm": 0.7054483890533447, "learning_rate": 1.2767822393691729e-05, "loss": 0.0566, "step": 21370 }, { "epoch": 1.0850804609371034, "grad_norm": 0.4479354918003082, "learning_rate": 1.2766130260419312e-05, "loss": 0.0625, "step": 21375 }, { "epoch": 1.0853342809279658, "grad_norm": 0.34682953357696533, "learning_rate": 1.2764438127146894e-05, "loss": 0.0578, "step": 21380 }, { "epoch": 1.0855881009188284, "grad_norm": 0.4050367772579193, "learning_rate": 1.276274599387448e-05, "loss": 0.0532, "step": 21385 }, { "epoch": 1.0858419209096908, "grad_norm": 0.3523172438144684, "learning_rate": 1.2761053860602061e-05, "loss": 0.0578, "step": 21390 }, { "epoch": 1.0860957409005534, "grad_norm": 0.34507274627685547, "learning_rate": 1.2759361727329647e-05, "loss": 0.0622, "step": 21395 }, { "epoch": 1.0863495608914158, "grad_norm": 0.29645681381225586, "learning_rate": 1.275766959405723e-05, "loss": 0.0599, "step": 21400 }, { "epoch": 1.0866033808822784, "grad_norm": 0.36118993163108826, "learning_rate": 1.2755977460784812e-05, "loss": 0.0571, "step": 21405 }, { "epoch": 1.0868572008731407, "grad_norm": 0.5020495653152466, "learning_rate": 1.2754285327512397e-05, "loss": 0.0638, "step": 21410 }, { "epoch": 1.0871110208640034, "grad_norm": 0.32721999287605286, "learning_rate": 1.2752593194239979e-05, "loss": 0.0616, "step": 21415 }, { "epoch": 1.0873648408548657, "grad_norm": 0.38569194078445435, "learning_rate": 1.2750901060967563e-05, "loss": 0.0527, "step": 21420 }, { "epoch": 1.0876186608457281, "grad_norm": 0.6979565620422363, "learning_rate": 1.2749208927695148e-05, "loss": 0.0524, "step": 21425 }, { "epoch": 1.0878724808365907, "grad_norm": 0.4546845555305481, "learning_rate": 1.274751679442273e-05, "loss": 0.0601, "step": 21430 }, { "epoch": 1.088126300827453, "grad_norm": 0.46561935544013977, "learning_rate": 1.2745824661150311e-05, "loss": 0.0587, "step": 21435 }, { "epoch": 1.0883801208183157, "grad_norm": 0.32504963874816895, "learning_rate": 1.2744132527877897e-05, "loss": 0.0474, "step": 21440 }, { "epoch": 1.088633940809178, "grad_norm": 0.4292490482330322, "learning_rate": 1.274244039460548e-05, "loss": 0.054, "step": 21445 }, { "epoch": 1.0888877608000407, "grad_norm": 0.3140452802181244, "learning_rate": 1.2740748261333064e-05, "loss": 0.0709, "step": 21450 }, { "epoch": 1.089141580790903, "grad_norm": 0.4569678008556366, "learning_rate": 1.2739056128060647e-05, "loss": 0.064, "step": 21455 }, { "epoch": 1.0893954007817657, "grad_norm": 0.34585288166999817, "learning_rate": 1.273736399478823e-05, "loss": 0.056, "step": 21460 }, { "epoch": 1.089649220772628, "grad_norm": 0.35902848839759827, "learning_rate": 1.2735671861515814e-05, "loss": 0.0487, "step": 21465 }, { "epoch": 1.0899030407634904, "grad_norm": 0.40261754393577576, "learning_rate": 1.2733979728243398e-05, "loss": 0.0532, "step": 21470 }, { "epoch": 1.090156860754353, "grad_norm": 0.3371613323688507, "learning_rate": 1.273228759497098e-05, "loss": 0.0689, "step": 21475 }, { "epoch": 1.0904106807452154, "grad_norm": 0.3712048828601837, "learning_rate": 1.2730595461698565e-05, "loss": 0.064, "step": 21480 }, { "epoch": 1.090664500736078, "grad_norm": 0.39349299669265747, "learning_rate": 1.2728903328426147e-05, "loss": 0.0587, "step": 21485 }, { "epoch": 1.0909183207269404, "grad_norm": 0.6331197023391724, "learning_rate": 1.272721119515373e-05, "loss": 0.0638, "step": 21490 }, { "epoch": 1.091172140717803, "grad_norm": 0.28697672486305237, "learning_rate": 1.2725519061881316e-05, "loss": 0.0682, "step": 21495 }, { "epoch": 1.0914259607086654, "grad_norm": 0.48400163650512695, "learning_rate": 1.2723826928608898e-05, "loss": 0.0563, "step": 21500 }, { "epoch": 1.091679780699528, "grad_norm": 0.38245150446891785, "learning_rate": 1.2722134795336483e-05, "loss": 0.0541, "step": 21505 }, { "epoch": 1.0919336006903904, "grad_norm": 0.399664044380188, "learning_rate": 1.2720442662064065e-05, "loss": 0.0572, "step": 21510 }, { "epoch": 1.092187420681253, "grad_norm": 0.385906457901001, "learning_rate": 1.2718750528791648e-05, "loss": 0.0661, "step": 21515 }, { "epoch": 1.0924412406721153, "grad_norm": 0.5282519459724426, "learning_rate": 1.2717058395519233e-05, "loss": 0.05, "step": 21520 }, { "epoch": 1.0926950606629777, "grad_norm": 0.35903728008270264, "learning_rate": 1.2715366262246815e-05, "loss": 0.0623, "step": 21525 }, { "epoch": 1.0929488806538403, "grad_norm": 0.33200833201408386, "learning_rate": 1.2713674128974399e-05, "loss": 0.0585, "step": 21530 }, { "epoch": 1.0932027006447027, "grad_norm": 0.5667392611503601, "learning_rate": 1.2711981995701982e-05, "loss": 0.0561, "step": 21535 }, { "epoch": 1.0934565206355653, "grad_norm": 0.49172818660736084, "learning_rate": 1.2710289862429566e-05, "loss": 0.0512, "step": 21540 }, { "epoch": 1.0937103406264277, "grad_norm": 0.31876128911972046, "learning_rate": 1.2708597729157151e-05, "loss": 0.0592, "step": 21545 }, { "epoch": 1.0939641606172903, "grad_norm": 0.348209023475647, "learning_rate": 1.2706905595884733e-05, "loss": 0.054, "step": 21550 }, { "epoch": 1.0942179806081527, "grad_norm": 1.1828866004943848, "learning_rate": 1.2705213462612317e-05, "loss": 0.0622, "step": 21555 }, { "epoch": 1.0944718005990153, "grad_norm": 0.376403272151947, "learning_rate": 1.27035213293399e-05, "loss": 0.0693, "step": 21560 }, { "epoch": 1.0947256205898777, "grad_norm": 0.35962969064712524, "learning_rate": 1.2701829196067484e-05, "loss": 0.0499, "step": 21565 }, { "epoch": 1.09497944058074, "grad_norm": 0.3168092668056488, "learning_rate": 1.2700137062795066e-05, "loss": 0.0544, "step": 21570 }, { "epoch": 1.0952332605716026, "grad_norm": 0.7175357341766357, "learning_rate": 1.269844492952265e-05, "loss": 0.059, "step": 21575 }, { "epoch": 1.095487080562465, "grad_norm": 0.3884638845920563, "learning_rate": 1.2696752796250234e-05, "loss": 0.0632, "step": 21580 }, { "epoch": 1.0957409005533276, "grad_norm": 0.40249037742614746, "learning_rate": 1.2695060662977816e-05, "loss": 0.0603, "step": 21585 }, { "epoch": 1.09599472054419, "grad_norm": 0.5622670650482178, "learning_rate": 1.2693368529705401e-05, "loss": 0.0582, "step": 21590 }, { "epoch": 1.0962485405350526, "grad_norm": 0.42979860305786133, "learning_rate": 1.2691676396432983e-05, "loss": 0.058, "step": 21595 }, { "epoch": 1.096502360525915, "grad_norm": 0.36912596225738525, "learning_rate": 1.2689984263160568e-05, "loss": 0.0677, "step": 21600 }, { "epoch": 1.0967561805167776, "grad_norm": 0.5511152744293213, "learning_rate": 1.2688292129888152e-05, "loss": 0.0566, "step": 21605 }, { "epoch": 1.09701000050764, "grad_norm": 0.4276013672351837, "learning_rate": 1.2686599996615734e-05, "loss": 0.0757, "step": 21610 }, { "epoch": 1.0972638204985024, "grad_norm": 0.34152284264564514, "learning_rate": 1.2684907863343319e-05, "loss": 0.0543, "step": 21615 }, { "epoch": 1.097517640489365, "grad_norm": 0.3993591070175171, "learning_rate": 1.2683215730070901e-05, "loss": 0.0513, "step": 21620 }, { "epoch": 1.0977714604802273, "grad_norm": 0.2898729741573334, "learning_rate": 1.2681523596798484e-05, "loss": 0.0559, "step": 21625 }, { "epoch": 1.09802528047109, "grad_norm": 0.3158964514732361, "learning_rate": 1.2679831463526068e-05, "loss": 0.0575, "step": 21630 }, { "epoch": 1.0982791004619523, "grad_norm": 0.6591548323631287, "learning_rate": 1.2678139330253652e-05, "loss": 0.0614, "step": 21635 }, { "epoch": 1.098532920452815, "grad_norm": 0.39295467734336853, "learning_rate": 1.2676447196981237e-05, "loss": 0.0593, "step": 21640 }, { "epoch": 1.0987867404436773, "grad_norm": 0.3043166995048523, "learning_rate": 1.2674755063708819e-05, "loss": 0.0524, "step": 21645 }, { "epoch": 1.09904056043454, "grad_norm": 0.5459681153297424, "learning_rate": 1.2673062930436402e-05, "loss": 0.0599, "step": 21650 }, { "epoch": 1.0992943804254023, "grad_norm": 0.3792338967323303, "learning_rate": 1.2671370797163986e-05, "loss": 0.0498, "step": 21655 }, { "epoch": 1.0995482004162649, "grad_norm": 0.35836130380630493, "learning_rate": 1.266967866389157e-05, "loss": 0.0672, "step": 21660 }, { "epoch": 1.0998020204071273, "grad_norm": 0.3934837579727173, "learning_rate": 1.2667986530619151e-05, "loss": 0.069, "step": 21665 }, { "epoch": 1.1000558403979896, "grad_norm": 0.374671071767807, "learning_rate": 1.2666294397346736e-05, "loss": 0.0657, "step": 21670 }, { "epoch": 1.1003096603888523, "grad_norm": 0.3873213231563568, "learning_rate": 1.266460226407432e-05, "loss": 0.0505, "step": 21675 }, { "epoch": 1.1005634803797146, "grad_norm": 0.29623717069625854, "learning_rate": 1.2662910130801902e-05, "loss": 0.0536, "step": 21680 }, { "epoch": 1.1008173003705772, "grad_norm": 0.320719450712204, "learning_rate": 1.2661217997529487e-05, "loss": 0.0545, "step": 21685 }, { "epoch": 1.1010711203614396, "grad_norm": 0.2868250906467438, "learning_rate": 1.2659525864257069e-05, "loss": 0.0587, "step": 21690 }, { "epoch": 1.1013249403523022, "grad_norm": 0.42279836535453796, "learning_rate": 1.2657833730984654e-05, "loss": 0.0527, "step": 21695 }, { "epoch": 1.1015787603431646, "grad_norm": 0.3950606882572174, "learning_rate": 1.2656141597712238e-05, "loss": 0.0573, "step": 21700 }, { "epoch": 1.1018325803340272, "grad_norm": 0.5181793570518494, "learning_rate": 1.265444946443982e-05, "loss": 0.0526, "step": 21705 }, { "epoch": 1.1020864003248896, "grad_norm": 0.5141026973724365, "learning_rate": 1.2652757331167405e-05, "loss": 0.0693, "step": 21710 }, { "epoch": 1.1023402203157522, "grad_norm": 0.4179568290710449, "learning_rate": 1.2651065197894987e-05, "loss": 0.0558, "step": 21715 }, { "epoch": 1.1025940403066146, "grad_norm": 0.37919455766677856, "learning_rate": 1.264937306462257e-05, "loss": 0.0566, "step": 21720 }, { "epoch": 1.102847860297477, "grad_norm": 0.3605843782424927, "learning_rate": 1.2647680931350155e-05, "loss": 0.0587, "step": 21725 }, { "epoch": 1.1031016802883395, "grad_norm": 0.28280141949653625, "learning_rate": 1.2645988798077737e-05, "loss": 0.0612, "step": 21730 }, { "epoch": 1.103355500279202, "grad_norm": 0.35269805788993835, "learning_rate": 1.264429666480532e-05, "loss": 0.0536, "step": 21735 }, { "epoch": 1.1036093202700645, "grad_norm": 0.35521480441093445, "learning_rate": 1.2642604531532904e-05, "loss": 0.0533, "step": 21740 }, { "epoch": 1.103863140260927, "grad_norm": 0.4273066222667694, "learning_rate": 1.2640912398260488e-05, "loss": 0.0609, "step": 21745 }, { "epoch": 1.1041169602517895, "grad_norm": 0.6088941097259521, "learning_rate": 1.2639220264988073e-05, "loss": 0.0587, "step": 21750 }, { "epoch": 1.104370780242652, "grad_norm": 0.28236907720565796, "learning_rate": 1.2637528131715655e-05, "loss": 0.063, "step": 21755 }, { "epoch": 1.1046246002335143, "grad_norm": 0.723622739315033, "learning_rate": 1.2635835998443239e-05, "loss": 0.0613, "step": 21760 }, { "epoch": 1.1048784202243769, "grad_norm": 0.43080398440361023, "learning_rate": 1.2634143865170822e-05, "loss": 0.0547, "step": 21765 }, { "epoch": 1.1051322402152393, "grad_norm": 0.41854366660118103, "learning_rate": 1.2632451731898406e-05, "loss": 0.0544, "step": 21770 }, { "epoch": 1.1053860602061019, "grad_norm": 0.26487666368484497, "learning_rate": 1.2630759598625987e-05, "loss": 0.0583, "step": 21775 }, { "epoch": 1.1056398801969642, "grad_norm": 0.5939835906028748, "learning_rate": 1.2629067465353573e-05, "loss": 0.0576, "step": 21780 }, { "epoch": 1.1058937001878268, "grad_norm": 0.3684999942779541, "learning_rate": 1.2627375332081156e-05, "loss": 0.0511, "step": 21785 }, { "epoch": 1.1061475201786892, "grad_norm": 0.3227483630180359, "learning_rate": 1.262568319880874e-05, "loss": 0.0518, "step": 21790 }, { "epoch": 1.1064013401695518, "grad_norm": 0.3155810534954071, "learning_rate": 1.2623991065536323e-05, "loss": 0.0575, "step": 21795 }, { "epoch": 1.1066551601604142, "grad_norm": 0.35092318058013916, "learning_rate": 1.2622298932263905e-05, "loss": 0.0554, "step": 21800 }, { "epoch": 1.1069089801512768, "grad_norm": 0.5898188352584839, "learning_rate": 1.262060679899149e-05, "loss": 0.06, "step": 21805 }, { "epoch": 1.1071628001421392, "grad_norm": 0.430916428565979, "learning_rate": 1.2618914665719072e-05, "loss": 0.0549, "step": 21810 }, { "epoch": 1.1074166201330016, "grad_norm": 0.5769869685173035, "learning_rate": 1.2617222532446656e-05, "loss": 0.0618, "step": 21815 }, { "epoch": 1.1076704401238642, "grad_norm": 0.3023453950881958, "learning_rate": 1.2615530399174241e-05, "loss": 0.0608, "step": 21820 }, { "epoch": 1.1079242601147266, "grad_norm": 0.7524684071540833, "learning_rate": 1.2613838265901823e-05, "loss": 0.0546, "step": 21825 }, { "epoch": 1.1081780801055892, "grad_norm": 0.3695290684700012, "learning_rate": 1.2612146132629406e-05, "loss": 0.0685, "step": 21830 }, { "epoch": 1.1084319000964515, "grad_norm": 0.29292744398117065, "learning_rate": 1.261045399935699e-05, "loss": 0.0589, "step": 21835 }, { "epoch": 1.1086857200873141, "grad_norm": 0.43530234694480896, "learning_rate": 1.2608761866084574e-05, "loss": 0.0537, "step": 21840 }, { "epoch": 1.1089395400781765, "grad_norm": 0.34705910086631775, "learning_rate": 1.2607069732812159e-05, "loss": 0.0596, "step": 21845 }, { "epoch": 1.1091933600690391, "grad_norm": 0.655457079410553, "learning_rate": 1.260537759953974e-05, "loss": 0.0512, "step": 21850 }, { "epoch": 1.1094471800599015, "grad_norm": 0.3433069586753845, "learning_rate": 1.2603685466267324e-05, "loss": 0.0456, "step": 21855 }, { "epoch": 1.109701000050764, "grad_norm": 0.5595205426216125, "learning_rate": 1.2601993332994908e-05, "loss": 0.0587, "step": 21860 }, { "epoch": 1.1099548200416265, "grad_norm": 0.4585667550563812, "learning_rate": 1.2600301199722491e-05, "loss": 0.0613, "step": 21865 }, { "epoch": 1.1102086400324889, "grad_norm": 0.41729608178138733, "learning_rate": 1.2598609066450073e-05, "loss": 0.0583, "step": 21870 }, { "epoch": 1.1104624600233515, "grad_norm": 1.0790683031082153, "learning_rate": 1.2596916933177658e-05, "loss": 0.0584, "step": 21875 }, { "epoch": 1.1107162800142139, "grad_norm": 0.43448856472969055, "learning_rate": 1.2595224799905242e-05, "loss": 0.0539, "step": 21880 }, { "epoch": 1.1109701000050765, "grad_norm": 0.3212340474128723, "learning_rate": 1.2593532666632824e-05, "loss": 0.0467, "step": 21885 }, { "epoch": 1.1112239199959388, "grad_norm": 0.32932937145233154, "learning_rate": 1.2591840533360409e-05, "loss": 0.0648, "step": 21890 }, { "epoch": 1.1114777399868014, "grad_norm": 0.4850245416164398, "learning_rate": 1.259014840008799e-05, "loss": 0.0658, "step": 21895 }, { "epoch": 1.1117315599776638, "grad_norm": 0.3544910252094269, "learning_rate": 1.2588456266815576e-05, "loss": 0.0594, "step": 21900 }, { "epoch": 1.1119853799685264, "grad_norm": 0.40757372975349426, "learning_rate": 1.258676413354316e-05, "loss": 0.0556, "step": 21905 }, { "epoch": 1.1122391999593888, "grad_norm": 0.4513925313949585, "learning_rate": 1.2585072000270741e-05, "loss": 0.0583, "step": 21910 }, { "epoch": 1.1124930199502512, "grad_norm": 0.4681965410709381, "learning_rate": 1.2583379866998327e-05, "loss": 0.0628, "step": 21915 }, { "epoch": 1.1127468399411138, "grad_norm": 0.3026393949985504, "learning_rate": 1.2581687733725909e-05, "loss": 0.0507, "step": 21920 }, { "epoch": 1.1130006599319762, "grad_norm": 0.6231611371040344, "learning_rate": 1.2579995600453492e-05, "loss": 0.0661, "step": 21925 }, { "epoch": 1.1132544799228388, "grad_norm": 0.4058896005153656, "learning_rate": 1.2578303467181077e-05, "loss": 0.0585, "step": 21930 }, { "epoch": 1.1135082999137011, "grad_norm": 0.4088999330997467, "learning_rate": 1.257661133390866e-05, "loss": 0.0544, "step": 21935 }, { "epoch": 1.1137621199045638, "grad_norm": 0.34594523906707764, "learning_rate": 1.2574919200636244e-05, "loss": 0.0553, "step": 21940 }, { "epoch": 1.1140159398954261, "grad_norm": 0.38814792037010193, "learning_rate": 1.2573227067363826e-05, "loss": 0.0665, "step": 21945 }, { "epoch": 1.1142697598862887, "grad_norm": 0.2994110584259033, "learning_rate": 1.257153493409141e-05, "loss": 0.0508, "step": 21950 }, { "epoch": 1.1145235798771511, "grad_norm": 0.4700678288936615, "learning_rate": 1.2569842800818995e-05, "loss": 0.0538, "step": 21955 }, { "epoch": 1.1147773998680135, "grad_norm": 0.3245387375354767, "learning_rate": 1.2568150667546577e-05, "loss": 0.0723, "step": 21960 }, { "epoch": 1.115031219858876, "grad_norm": 0.32570117712020874, "learning_rate": 1.256645853427416e-05, "loss": 0.0629, "step": 21965 }, { "epoch": 1.1152850398497385, "grad_norm": 0.39165782928466797, "learning_rate": 1.2564766401001744e-05, "loss": 0.0596, "step": 21970 }, { "epoch": 1.115538859840601, "grad_norm": 0.3810831904411316, "learning_rate": 1.2563074267729328e-05, "loss": 0.0539, "step": 21975 }, { "epoch": 1.1157926798314635, "grad_norm": 0.2843420207500458, "learning_rate": 1.256138213445691e-05, "loss": 0.0539, "step": 21980 }, { "epoch": 1.116046499822326, "grad_norm": 0.28111520409584045, "learning_rate": 1.2559690001184495e-05, "loss": 0.058, "step": 21985 }, { "epoch": 1.1163003198131884, "grad_norm": 0.37228140234947205, "learning_rate": 1.2557997867912076e-05, "loss": 0.0589, "step": 21990 }, { "epoch": 1.116554139804051, "grad_norm": 0.3925822079181671, "learning_rate": 1.2556305734639662e-05, "loss": 0.0695, "step": 21995 }, { "epoch": 1.1168079597949134, "grad_norm": 0.7498750686645508, "learning_rate": 1.2554613601367245e-05, "loss": 0.0677, "step": 22000 }, { "epoch": 1.117061779785776, "grad_norm": 0.3801857531070709, "learning_rate": 1.2552921468094827e-05, "loss": 0.0603, "step": 22005 }, { "epoch": 1.1173155997766384, "grad_norm": 0.35116496682167053, "learning_rate": 1.2551229334822412e-05, "loss": 0.0691, "step": 22010 }, { "epoch": 1.1175694197675008, "grad_norm": 0.427156001329422, "learning_rate": 1.2549537201549994e-05, "loss": 0.0603, "step": 22015 }, { "epoch": 1.1178232397583634, "grad_norm": 0.43225473165512085, "learning_rate": 1.2547845068277578e-05, "loss": 0.055, "step": 22020 }, { "epoch": 1.1180770597492258, "grad_norm": 0.38967064023017883, "learning_rate": 1.2546152935005163e-05, "loss": 0.0565, "step": 22025 }, { "epoch": 1.1183308797400884, "grad_norm": 1.0033137798309326, "learning_rate": 1.2544460801732745e-05, "loss": 0.055, "step": 22030 }, { "epoch": 1.1185846997309508, "grad_norm": 0.2806367874145508, "learning_rate": 1.254276866846033e-05, "loss": 0.0583, "step": 22035 }, { "epoch": 1.1188385197218134, "grad_norm": 0.33049389719963074, "learning_rate": 1.2541076535187912e-05, "loss": 0.0624, "step": 22040 }, { "epoch": 1.1190923397126757, "grad_norm": 0.2302655428647995, "learning_rate": 1.2539384401915495e-05, "loss": 0.0617, "step": 22045 }, { "epoch": 1.1193461597035383, "grad_norm": 0.35394585132598877, "learning_rate": 1.253769226864308e-05, "loss": 0.0618, "step": 22050 }, { "epoch": 1.1195999796944007, "grad_norm": 0.49913811683654785, "learning_rate": 1.2536000135370663e-05, "loss": 0.0574, "step": 22055 }, { "epoch": 1.1198537996852633, "grad_norm": 0.4379311501979828, "learning_rate": 1.2534308002098246e-05, "loss": 0.0618, "step": 22060 }, { "epoch": 1.1201076196761257, "grad_norm": 0.36873766779899597, "learning_rate": 1.253261586882583e-05, "loss": 0.0617, "step": 22065 }, { "epoch": 1.120361439666988, "grad_norm": 0.3143756091594696, "learning_rate": 1.2530923735553413e-05, "loss": 0.0616, "step": 22070 }, { "epoch": 1.1206152596578507, "grad_norm": 0.4979376494884491, "learning_rate": 1.2529231602280995e-05, "loss": 0.062, "step": 22075 }, { "epoch": 1.120869079648713, "grad_norm": 0.3125242292881012, "learning_rate": 1.252753946900858e-05, "loss": 0.0584, "step": 22080 }, { "epoch": 1.1211228996395757, "grad_norm": 0.3742777705192566, "learning_rate": 1.2525847335736164e-05, "loss": 0.0524, "step": 22085 }, { "epoch": 1.121376719630438, "grad_norm": 0.4746595621109009, "learning_rate": 1.2524155202463747e-05, "loss": 0.0571, "step": 22090 }, { "epoch": 1.1216305396213007, "grad_norm": 0.3313211500644684, "learning_rate": 1.2522463069191331e-05, "loss": 0.0469, "step": 22095 }, { "epoch": 1.121884359612163, "grad_norm": 0.31520578265190125, "learning_rate": 1.2520770935918913e-05, "loss": 0.0516, "step": 22100 }, { "epoch": 1.1221381796030254, "grad_norm": 0.31391963362693787, "learning_rate": 1.2519078802646498e-05, "loss": 0.0598, "step": 22105 }, { "epoch": 1.122391999593888, "grad_norm": 0.318359375, "learning_rate": 1.2517386669374082e-05, "loss": 0.0628, "step": 22110 }, { "epoch": 1.1226458195847504, "grad_norm": 0.40016159415245056, "learning_rate": 1.2515694536101663e-05, "loss": 0.0639, "step": 22115 }, { "epoch": 1.122899639575613, "grad_norm": 0.3543905019760132, "learning_rate": 1.2514002402829249e-05, "loss": 0.0572, "step": 22120 }, { "epoch": 1.1231534595664754, "grad_norm": 0.3604658544063568, "learning_rate": 1.251231026955683e-05, "loss": 0.052, "step": 22125 }, { "epoch": 1.123407279557338, "grad_norm": 0.3308972716331482, "learning_rate": 1.2510618136284414e-05, "loss": 0.0541, "step": 22130 }, { "epoch": 1.1236610995482004, "grad_norm": 0.3981793224811554, "learning_rate": 1.2508926003012e-05, "loss": 0.0618, "step": 22135 }, { "epoch": 1.123914919539063, "grad_norm": 0.2392720729112625, "learning_rate": 1.2507233869739581e-05, "loss": 0.0625, "step": 22140 }, { "epoch": 1.1241687395299254, "grad_norm": 0.35200855135917664, "learning_rate": 1.2505541736467166e-05, "loss": 0.054, "step": 22145 }, { "epoch": 1.124422559520788, "grad_norm": 0.3301455080509186, "learning_rate": 1.2503849603194748e-05, "loss": 0.0496, "step": 22150 }, { "epoch": 1.1246763795116503, "grad_norm": 0.32119038701057434, "learning_rate": 1.2502157469922332e-05, "loss": 0.062, "step": 22155 }, { "epoch": 1.1249301995025127, "grad_norm": 0.373477578163147, "learning_rate": 1.2500465336649917e-05, "loss": 0.0542, "step": 22160 }, { "epoch": 1.1251840194933753, "grad_norm": 0.2541143298149109, "learning_rate": 1.2498773203377499e-05, "loss": 0.0504, "step": 22165 }, { "epoch": 1.1254378394842377, "grad_norm": 0.3145160377025604, "learning_rate": 1.2497081070105082e-05, "loss": 0.0538, "step": 22170 }, { "epoch": 1.1256916594751003, "grad_norm": 1.1840285062789917, "learning_rate": 1.2495388936832666e-05, "loss": 0.0515, "step": 22175 }, { "epoch": 1.1259454794659627, "grad_norm": 0.36791080236434937, "learning_rate": 1.249369680356025e-05, "loss": 0.0567, "step": 22180 }, { "epoch": 1.1261992994568253, "grad_norm": 0.39223095774650574, "learning_rate": 1.2492004670287835e-05, "loss": 0.0577, "step": 22185 }, { "epoch": 1.1264531194476877, "grad_norm": 0.3730306625366211, "learning_rate": 1.2490312537015417e-05, "loss": 0.0533, "step": 22190 }, { "epoch": 1.1267069394385503, "grad_norm": 0.32315680384635925, "learning_rate": 1.2488620403742998e-05, "loss": 0.0573, "step": 22195 }, { "epoch": 1.1269607594294127, "grad_norm": 0.27425339818000793, "learning_rate": 1.2486928270470584e-05, "loss": 0.0487, "step": 22200 }, { "epoch": 1.1272145794202753, "grad_norm": 0.4643983244895935, "learning_rate": 1.2485236137198167e-05, "loss": 0.055, "step": 22205 }, { "epoch": 1.1274683994111376, "grad_norm": 0.4175454080104828, "learning_rate": 1.2483544003925749e-05, "loss": 0.0509, "step": 22210 }, { "epoch": 1.127722219402, "grad_norm": 0.5054285526275635, "learning_rate": 1.2481851870653334e-05, "loss": 0.0555, "step": 22215 }, { "epoch": 1.1279760393928626, "grad_norm": 0.44866254925727844, "learning_rate": 1.2480159737380916e-05, "loss": 0.0541, "step": 22220 }, { "epoch": 1.128229859383725, "grad_norm": 0.31655773520469666, "learning_rate": 1.24784676041085e-05, "loss": 0.0527, "step": 22225 }, { "epoch": 1.1284836793745876, "grad_norm": 0.3848085403442383, "learning_rate": 1.2476775470836085e-05, "loss": 0.0544, "step": 22230 }, { "epoch": 1.12873749936545, "grad_norm": 0.3327775001525879, "learning_rate": 1.2475083337563667e-05, "loss": 0.0494, "step": 22235 }, { "epoch": 1.1289913193563126, "grad_norm": 0.2371433675289154, "learning_rate": 1.2473391204291252e-05, "loss": 0.0548, "step": 22240 }, { "epoch": 1.129245139347175, "grad_norm": 0.3679490387439728, "learning_rate": 1.2471699071018834e-05, "loss": 0.0564, "step": 22245 }, { "epoch": 1.1294989593380373, "grad_norm": 0.31148838996887207, "learning_rate": 1.2470006937746417e-05, "loss": 0.0543, "step": 22250 }, { "epoch": 1.1297527793289, "grad_norm": 0.3589724004268646, "learning_rate": 1.2468314804474003e-05, "loss": 0.0596, "step": 22255 }, { "epoch": 1.1300065993197623, "grad_norm": 0.3857835531234741, "learning_rate": 1.2466622671201584e-05, "loss": 0.0639, "step": 22260 }, { "epoch": 1.130260419310625, "grad_norm": 0.4105840027332306, "learning_rate": 1.2464930537929168e-05, "loss": 0.0549, "step": 22265 }, { "epoch": 1.1305142393014873, "grad_norm": 0.3158581852912903, "learning_rate": 1.2463238404656752e-05, "loss": 0.0561, "step": 22270 }, { "epoch": 1.13076805929235, "grad_norm": 0.34058722853660583, "learning_rate": 1.2461546271384335e-05, "loss": 0.06, "step": 22275 }, { "epoch": 1.1310218792832123, "grad_norm": 0.6887267827987671, "learning_rate": 1.245985413811192e-05, "loss": 0.0546, "step": 22280 }, { "epoch": 1.131275699274075, "grad_norm": 0.35839903354644775, "learning_rate": 1.2458162004839502e-05, "loss": 0.0513, "step": 22285 }, { "epoch": 1.1315295192649373, "grad_norm": 0.37777021527290344, "learning_rate": 1.2456469871567086e-05, "loss": 0.0562, "step": 22290 }, { "epoch": 1.1317833392557999, "grad_norm": 0.3687744736671448, "learning_rate": 1.245477773829467e-05, "loss": 0.0577, "step": 22295 }, { "epoch": 1.1320371592466623, "grad_norm": 0.5017027854919434, "learning_rate": 1.2453085605022253e-05, "loss": 0.0603, "step": 22300 }, { "epoch": 1.1322909792375246, "grad_norm": 0.43287739157676697, "learning_rate": 1.2451393471749835e-05, "loss": 0.0681, "step": 22305 }, { "epoch": 1.1325447992283872, "grad_norm": 0.6787398457527161, "learning_rate": 1.244970133847742e-05, "loss": 0.049, "step": 22310 }, { "epoch": 1.1327986192192496, "grad_norm": 0.5929518938064575, "learning_rate": 1.2448009205205003e-05, "loss": 0.0557, "step": 22315 }, { "epoch": 1.1330524392101122, "grad_norm": 0.24244466423988342, "learning_rate": 1.2446317071932585e-05, "loss": 0.0502, "step": 22320 }, { "epoch": 1.1333062592009746, "grad_norm": 0.28957462310791016, "learning_rate": 1.244462493866017e-05, "loss": 0.0609, "step": 22325 }, { "epoch": 1.1335600791918372, "grad_norm": 0.3269180953502655, "learning_rate": 1.2442932805387752e-05, "loss": 0.0556, "step": 22330 }, { "epoch": 1.1338138991826996, "grad_norm": 0.41526785492897034, "learning_rate": 1.2441240672115338e-05, "loss": 0.0556, "step": 22335 }, { "epoch": 1.1340677191735622, "grad_norm": 0.3812713027000427, "learning_rate": 1.2439548538842921e-05, "loss": 0.0576, "step": 22340 }, { "epoch": 1.1343215391644246, "grad_norm": 0.3995012044906616, "learning_rate": 1.2437856405570503e-05, "loss": 0.0553, "step": 22345 }, { "epoch": 1.1345753591552872, "grad_norm": 0.6085227727890015, "learning_rate": 1.2436164272298088e-05, "loss": 0.0526, "step": 22350 }, { "epoch": 1.1348291791461496, "grad_norm": 0.3577788174152374, "learning_rate": 1.243447213902567e-05, "loss": 0.0568, "step": 22355 }, { "epoch": 1.135082999137012, "grad_norm": 0.3074178397655487, "learning_rate": 1.2432780005753254e-05, "loss": 0.0503, "step": 22360 }, { "epoch": 1.1353368191278745, "grad_norm": 0.30455219745635986, "learning_rate": 1.2431087872480839e-05, "loss": 0.0593, "step": 22365 }, { "epoch": 1.135590639118737, "grad_norm": 0.4841032028198242, "learning_rate": 1.242939573920842e-05, "loss": 0.0628, "step": 22370 }, { "epoch": 1.1358444591095995, "grad_norm": 0.24335934221744537, "learning_rate": 1.2427703605936003e-05, "loss": 0.0595, "step": 22375 }, { "epoch": 1.136098279100462, "grad_norm": 0.38525867462158203, "learning_rate": 1.2426011472663588e-05, "loss": 0.0683, "step": 22380 }, { "epoch": 1.1363520990913245, "grad_norm": 0.4575713276863098, "learning_rate": 1.2424319339391171e-05, "loss": 0.0688, "step": 22385 }, { "epoch": 1.1366059190821869, "grad_norm": 0.33208250999450684, "learning_rate": 1.2422627206118757e-05, "loss": 0.0542, "step": 22390 }, { "epoch": 1.1368597390730493, "grad_norm": 0.4368082880973816, "learning_rate": 1.2420935072846338e-05, "loss": 0.0597, "step": 22395 }, { "epoch": 1.1371135590639119, "grad_norm": 0.3381536900997162, "learning_rate": 1.241924293957392e-05, "loss": 0.0595, "step": 22400 }, { "epoch": 1.1373673790547745, "grad_norm": 0.40493860840797424, "learning_rate": 1.2417550806301506e-05, "loss": 0.0485, "step": 22405 }, { "epoch": 1.1376211990456369, "grad_norm": 0.3095938265323639, "learning_rate": 1.2415858673029089e-05, "loss": 0.0667, "step": 22410 }, { "epoch": 1.1378750190364992, "grad_norm": 0.35543668270111084, "learning_rate": 1.2414166539756671e-05, "loss": 0.0564, "step": 22415 }, { "epoch": 1.1381288390273618, "grad_norm": 0.35956689715385437, "learning_rate": 1.2412474406484256e-05, "loss": 0.0598, "step": 22420 }, { "epoch": 1.1383826590182242, "grad_norm": 0.33214613795280457, "learning_rate": 1.2410782273211838e-05, "loss": 0.0519, "step": 22425 }, { "epoch": 1.1386364790090868, "grad_norm": 0.43719860911369324, "learning_rate": 1.2409090139939423e-05, "loss": 0.0545, "step": 22430 }, { "epoch": 1.1388902989999492, "grad_norm": 0.37340477108955383, "learning_rate": 1.2407398006667007e-05, "loss": 0.0507, "step": 22435 }, { "epoch": 1.1391441189908118, "grad_norm": 0.2947007715702057, "learning_rate": 1.2405705873394589e-05, "loss": 0.0492, "step": 22440 }, { "epoch": 1.1393979389816742, "grad_norm": 0.3368941843509674, "learning_rate": 1.2404013740122174e-05, "loss": 0.0509, "step": 22445 }, { "epoch": 1.1396517589725366, "grad_norm": 0.36827924847602844, "learning_rate": 1.2402321606849756e-05, "loss": 0.066, "step": 22450 }, { "epoch": 1.1399055789633992, "grad_norm": 0.4480016827583313, "learning_rate": 1.240062947357734e-05, "loss": 0.0541, "step": 22455 }, { "epoch": 1.1401593989542615, "grad_norm": 0.39560964703559875, "learning_rate": 1.2398937340304925e-05, "loss": 0.0692, "step": 22460 }, { "epoch": 1.1404132189451242, "grad_norm": 0.59153813123703, "learning_rate": 1.2397245207032506e-05, "loss": 0.0477, "step": 22465 }, { "epoch": 1.1406670389359865, "grad_norm": 0.3255225419998169, "learning_rate": 1.239555307376009e-05, "loss": 0.0587, "step": 22470 }, { "epoch": 1.1409208589268491, "grad_norm": 0.33760660886764526, "learning_rate": 1.2393860940487674e-05, "loss": 0.0539, "step": 22475 }, { "epoch": 1.1411746789177115, "grad_norm": 1.3096075057983398, "learning_rate": 1.2392168807215257e-05, "loss": 0.0592, "step": 22480 }, { "epoch": 1.1414284989085741, "grad_norm": 0.37213486433029175, "learning_rate": 1.2390476673942842e-05, "loss": 0.0519, "step": 22485 }, { "epoch": 1.1416823188994365, "grad_norm": 0.3870268166065216, "learning_rate": 1.2388784540670424e-05, "loss": 0.0505, "step": 22490 }, { "epoch": 1.141936138890299, "grad_norm": 0.2687552869319916, "learning_rate": 1.2387092407398008e-05, "loss": 0.0592, "step": 22495 }, { "epoch": 1.1421899588811615, "grad_norm": 0.4848724603652954, "learning_rate": 1.2385400274125591e-05, "loss": 0.0663, "step": 22500 }, { "epoch": 1.1424437788720239, "grad_norm": 0.3139934539794922, "learning_rate": 1.2383708140853175e-05, "loss": 0.0502, "step": 22505 }, { "epoch": 1.1426975988628865, "grad_norm": 0.40067628026008606, "learning_rate": 1.2382016007580757e-05, "loss": 0.0549, "step": 22510 }, { "epoch": 1.1429514188537488, "grad_norm": 0.3154310882091522, "learning_rate": 1.2380323874308342e-05, "loss": 0.0561, "step": 22515 }, { "epoch": 1.1432052388446114, "grad_norm": 0.3540889322757721, "learning_rate": 1.2378631741035925e-05, "loss": 0.058, "step": 22520 }, { "epoch": 1.1434590588354738, "grad_norm": 0.36180123686790466, "learning_rate": 1.2376939607763509e-05, "loss": 0.0561, "step": 22525 }, { "epoch": 1.1437128788263364, "grad_norm": 0.3295062780380249, "learning_rate": 1.2375247474491093e-05, "loss": 0.0632, "step": 22530 }, { "epoch": 1.1439666988171988, "grad_norm": 1.8421072959899902, "learning_rate": 1.2373555341218674e-05, "loss": 0.0594, "step": 22535 }, { "epoch": 1.1442205188080614, "grad_norm": 0.5811465978622437, "learning_rate": 1.237186320794626e-05, "loss": 0.0654, "step": 22540 }, { "epoch": 1.1444743387989238, "grad_norm": 0.390346497297287, "learning_rate": 1.2370171074673843e-05, "loss": 0.0588, "step": 22545 }, { "epoch": 1.1447281587897864, "grad_norm": 0.43421465158462524, "learning_rate": 1.2368478941401425e-05, "loss": 0.05, "step": 22550 }, { "epoch": 1.1449819787806488, "grad_norm": 0.29329365491867065, "learning_rate": 1.236678680812901e-05, "loss": 0.0535, "step": 22555 }, { "epoch": 1.1452357987715112, "grad_norm": 0.3655465841293335, "learning_rate": 1.2365094674856592e-05, "loss": 0.0572, "step": 22560 }, { "epoch": 1.1454896187623738, "grad_norm": 0.44629284739494324, "learning_rate": 1.2363402541584176e-05, "loss": 0.0581, "step": 22565 }, { "epoch": 1.1457434387532361, "grad_norm": 0.5228719711303711, "learning_rate": 1.2361710408311761e-05, "loss": 0.0577, "step": 22570 }, { "epoch": 1.1459972587440987, "grad_norm": 0.4526336193084717, "learning_rate": 1.2360018275039343e-05, "loss": 0.0616, "step": 22575 }, { "epoch": 1.1462510787349611, "grad_norm": 1.1659830808639526, "learning_rate": 1.2358326141766928e-05, "loss": 0.0685, "step": 22580 }, { "epoch": 1.1465048987258237, "grad_norm": 0.3989523649215698, "learning_rate": 1.235663400849451e-05, "loss": 0.0617, "step": 22585 }, { "epoch": 1.146758718716686, "grad_norm": 0.5509122610092163, "learning_rate": 1.2354941875222093e-05, "loss": 0.05, "step": 22590 }, { "epoch": 1.1470125387075485, "grad_norm": 0.2802308201789856, "learning_rate": 1.2353249741949679e-05, "loss": 0.0541, "step": 22595 }, { "epoch": 1.147266358698411, "grad_norm": 0.40862369537353516, "learning_rate": 1.235155760867726e-05, "loss": 0.0579, "step": 22600 }, { "epoch": 1.1475201786892735, "grad_norm": 0.44067612290382385, "learning_rate": 1.2349865475404842e-05, "loss": 0.0558, "step": 22605 }, { "epoch": 1.147773998680136, "grad_norm": 0.3719422519207001, "learning_rate": 1.2348173342132428e-05, "loss": 0.0517, "step": 22610 }, { "epoch": 1.1480278186709985, "grad_norm": 0.33248206973075867, "learning_rate": 1.2346481208860011e-05, "loss": 0.0583, "step": 22615 }, { "epoch": 1.148281638661861, "grad_norm": 0.3742452561855316, "learning_rate": 1.2344789075587593e-05, "loss": 0.0538, "step": 22620 }, { "epoch": 1.1485354586527234, "grad_norm": 0.31650519371032715, "learning_rate": 1.2343096942315178e-05, "loss": 0.051, "step": 22625 }, { "epoch": 1.148789278643586, "grad_norm": 0.4304707944393158, "learning_rate": 1.234140480904276e-05, "loss": 0.0506, "step": 22630 }, { "epoch": 1.1490430986344484, "grad_norm": 0.36866042017936707, "learning_rate": 1.2339712675770345e-05, "loss": 0.0643, "step": 22635 }, { "epoch": 1.149296918625311, "grad_norm": 0.3071114718914032, "learning_rate": 1.2338020542497929e-05, "loss": 0.0504, "step": 22640 }, { "epoch": 1.1495507386161734, "grad_norm": 0.39008423686027527, "learning_rate": 1.233632840922551e-05, "loss": 0.0561, "step": 22645 }, { "epoch": 1.1498045586070358, "grad_norm": 0.3482741713523865, "learning_rate": 1.2334636275953096e-05, "loss": 0.0525, "step": 22650 }, { "epoch": 1.1500583785978984, "grad_norm": 0.3156396150588989, "learning_rate": 1.2332944142680678e-05, "loss": 0.0567, "step": 22655 }, { "epoch": 1.1503121985887608, "grad_norm": 0.3933767080307007, "learning_rate": 1.2331252009408261e-05, "loss": 0.0626, "step": 22660 }, { "epoch": 1.1505660185796234, "grad_norm": 0.38870787620544434, "learning_rate": 1.2329559876135847e-05, "loss": 0.061, "step": 22665 }, { "epoch": 1.1508198385704858, "grad_norm": 0.3350406289100647, "learning_rate": 1.2327867742863428e-05, "loss": 0.0553, "step": 22670 }, { "epoch": 1.1510736585613484, "grad_norm": 0.6869404315948486, "learning_rate": 1.2326175609591014e-05, "loss": 0.0477, "step": 22675 }, { "epoch": 1.1513274785522107, "grad_norm": 0.5427852869033813, "learning_rate": 1.2324483476318595e-05, "loss": 0.0537, "step": 22680 }, { "epoch": 1.1515812985430733, "grad_norm": 0.42787012457847595, "learning_rate": 1.2322791343046179e-05, "loss": 0.0551, "step": 22685 }, { "epoch": 1.1518351185339357, "grad_norm": 0.3151043951511383, "learning_rate": 1.2321099209773764e-05, "loss": 0.0548, "step": 22690 }, { "epoch": 1.1520889385247983, "grad_norm": 0.3836911618709564, "learning_rate": 1.2319407076501346e-05, "loss": 0.0511, "step": 22695 }, { "epoch": 1.1523427585156607, "grad_norm": 0.30847352743148804, "learning_rate": 1.231771494322893e-05, "loss": 0.0569, "step": 22700 }, { "epoch": 1.152596578506523, "grad_norm": 0.3887520432472229, "learning_rate": 1.2316022809956513e-05, "loss": 0.0585, "step": 22705 }, { "epoch": 1.1528503984973857, "grad_norm": 0.3231756091117859, "learning_rate": 1.2314330676684097e-05, "loss": 0.0547, "step": 22710 }, { "epoch": 1.153104218488248, "grad_norm": 0.4676765203475952, "learning_rate": 1.2312638543411679e-05, "loss": 0.0527, "step": 22715 }, { "epoch": 1.1533580384791107, "grad_norm": 0.3112950026988983, "learning_rate": 1.2310946410139264e-05, "loss": 0.0489, "step": 22720 }, { "epoch": 1.153611858469973, "grad_norm": 0.25811097025871277, "learning_rate": 1.2309254276866847e-05, "loss": 0.0496, "step": 22725 }, { "epoch": 1.1538656784608357, "grad_norm": 0.3294421136379242, "learning_rate": 1.2307562143594431e-05, "loss": 0.0508, "step": 22730 }, { "epoch": 1.154119498451698, "grad_norm": 0.4806308448314667, "learning_rate": 1.2305870010322014e-05, "loss": 0.0732, "step": 22735 }, { "epoch": 1.1543733184425604, "grad_norm": 0.3521295189857483, "learning_rate": 1.2304177877049596e-05, "loss": 0.0516, "step": 22740 }, { "epoch": 1.154627138433423, "grad_norm": 0.3540806472301483, "learning_rate": 1.2302485743777182e-05, "loss": 0.0489, "step": 22745 }, { "epoch": 1.1548809584242856, "grad_norm": 0.38654571771621704, "learning_rate": 1.2300793610504765e-05, "loss": 0.0492, "step": 22750 }, { "epoch": 1.155134778415148, "grad_norm": 0.36057183146476746, "learning_rate": 1.2299101477232347e-05, "loss": 0.0521, "step": 22755 }, { "epoch": 1.1553885984060104, "grad_norm": 0.3039451241493225, "learning_rate": 1.2297409343959932e-05, "loss": 0.0605, "step": 22760 }, { "epoch": 1.155642418396873, "grad_norm": 0.2759784758090973, "learning_rate": 1.2295717210687514e-05, "loss": 0.0529, "step": 22765 }, { "epoch": 1.1558962383877354, "grad_norm": 0.579740583896637, "learning_rate": 1.22940250774151e-05, "loss": 0.0673, "step": 22770 }, { "epoch": 1.156150058378598, "grad_norm": 0.40537041425704956, "learning_rate": 1.2292332944142683e-05, "loss": 0.0514, "step": 22775 }, { "epoch": 1.1564038783694603, "grad_norm": 0.3935674726963043, "learning_rate": 1.2290640810870265e-05, "loss": 0.0556, "step": 22780 }, { "epoch": 1.156657698360323, "grad_norm": 0.7717337608337402, "learning_rate": 1.228894867759785e-05, "loss": 0.0569, "step": 22785 }, { "epoch": 1.1569115183511853, "grad_norm": 0.28389760851860046, "learning_rate": 1.2287256544325432e-05, "loss": 0.059, "step": 22790 }, { "epoch": 1.1571653383420477, "grad_norm": 0.7284798622131348, "learning_rate": 1.2285564411053015e-05, "loss": 0.0533, "step": 22795 }, { "epoch": 1.1574191583329103, "grad_norm": 0.42667490243911743, "learning_rate": 1.2283872277780599e-05, "loss": 0.0586, "step": 22800 }, { "epoch": 1.1576729783237727, "grad_norm": 0.3872958719730377, "learning_rate": 1.2282180144508182e-05, "loss": 0.0446, "step": 22805 }, { "epoch": 1.1579267983146353, "grad_norm": 0.396384060382843, "learning_rate": 1.2280488011235764e-05, "loss": 0.0546, "step": 22810 }, { "epoch": 1.1581806183054977, "grad_norm": 0.4082648456096649, "learning_rate": 1.227879587796335e-05, "loss": 0.0594, "step": 22815 }, { "epoch": 1.1584344382963603, "grad_norm": 0.6174493432044983, "learning_rate": 1.2277103744690933e-05, "loss": 0.0501, "step": 22820 }, { "epoch": 1.1586882582872227, "grad_norm": 0.4318661391735077, "learning_rate": 1.2275411611418517e-05, "loss": 0.0598, "step": 22825 }, { "epoch": 1.1589420782780853, "grad_norm": 0.30461791157722473, "learning_rate": 1.22737194781461e-05, "loss": 0.0557, "step": 22830 }, { "epoch": 1.1591958982689476, "grad_norm": 0.4471897780895233, "learning_rate": 1.2272027344873682e-05, "loss": 0.0568, "step": 22835 }, { "epoch": 1.1594497182598102, "grad_norm": 0.6687557697296143, "learning_rate": 1.2270335211601267e-05, "loss": 0.0613, "step": 22840 }, { "epoch": 1.1597035382506726, "grad_norm": 0.457183301448822, "learning_rate": 1.226864307832885e-05, "loss": 0.0554, "step": 22845 }, { "epoch": 1.159957358241535, "grad_norm": 0.3360708951950073, "learning_rate": 1.2266950945056433e-05, "loss": 0.0591, "step": 22850 }, { "epoch": 1.1602111782323976, "grad_norm": 0.5214487314224243, "learning_rate": 1.2265258811784018e-05, "loss": 0.0533, "step": 22855 }, { "epoch": 1.16046499822326, "grad_norm": 0.3773740828037262, "learning_rate": 1.22635666785116e-05, "loss": 0.0551, "step": 22860 }, { "epoch": 1.1607188182141226, "grad_norm": 0.40249964594841003, "learning_rate": 1.2261874545239183e-05, "loss": 0.056, "step": 22865 }, { "epoch": 1.160972638204985, "grad_norm": 0.30552637577056885, "learning_rate": 1.2260182411966768e-05, "loss": 0.0502, "step": 22870 }, { "epoch": 1.1612264581958476, "grad_norm": 0.832575798034668, "learning_rate": 1.225849027869435e-05, "loss": 0.0637, "step": 22875 }, { "epoch": 1.16148027818671, "grad_norm": 0.8184303045272827, "learning_rate": 1.2256798145421936e-05, "loss": 0.0623, "step": 22880 }, { "epoch": 1.1617340981775726, "grad_norm": 0.45047980546951294, "learning_rate": 1.2255106012149517e-05, "loss": 0.0566, "step": 22885 }, { "epoch": 1.161987918168435, "grad_norm": 0.42531150579452515, "learning_rate": 1.2253413878877101e-05, "loss": 0.0478, "step": 22890 }, { "epoch": 1.1622417381592975, "grad_norm": 0.8212893605232239, "learning_rate": 1.2251721745604686e-05, "loss": 0.0766, "step": 22895 }, { "epoch": 1.16249555815016, "grad_norm": 0.7722528576850891, "learning_rate": 1.2250029612332268e-05, "loss": 0.0638, "step": 22900 }, { "epoch": 1.1627493781410223, "grad_norm": 0.7015485167503357, "learning_rate": 1.2248337479059852e-05, "loss": 0.0524, "step": 22905 }, { "epoch": 1.163003198131885, "grad_norm": 0.6041594743728638, "learning_rate": 1.2246645345787435e-05, "loss": 0.0542, "step": 22910 }, { "epoch": 1.1632570181227473, "grad_norm": 0.5705603957176208, "learning_rate": 1.2244953212515019e-05, "loss": 0.051, "step": 22915 }, { "epoch": 1.16351083811361, "grad_norm": 0.3686521351337433, "learning_rate": 1.2243261079242604e-05, "loss": 0.0517, "step": 22920 }, { "epoch": 1.1637646581044723, "grad_norm": 0.9701533317565918, "learning_rate": 1.2241568945970186e-05, "loss": 0.0582, "step": 22925 }, { "epoch": 1.1640184780953349, "grad_norm": 0.47844773530960083, "learning_rate": 1.223987681269777e-05, "loss": 0.0503, "step": 22930 }, { "epoch": 1.1642722980861973, "grad_norm": 0.5356249809265137, "learning_rate": 1.2238184679425353e-05, "loss": 0.0538, "step": 22935 }, { "epoch": 1.1645261180770596, "grad_norm": 0.4381619393825531, "learning_rate": 1.2236492546152936e-05, "loss": 0.0558, "step": 22940 }, { "epoch": 1.1647799380679222, "grad_norm": 0.4224344789981842, "learning_rate": 1.2234800412880518e-05, "loss": 0.0653, "step": 22945 }, { "epoch": 1.1650337580587846, "grad_norm": 0.48513519763946533, "learning_rate": 1.2233108279608103e-05, "loss": 0.0508, "step": 22950 }, { "epoch": 1.1652875780496472, "grad_norm": 0.31747058033943176, "learning_rate": 1.2231416146335687e-05, "loss": 0.0609, "step": 22955 }, { "epoch": 1.1655413980405096, "grad_norm": 0.9931601881980896, "learning_rate": 1.2229724013063269e-05, "loss": 0.0659, "step": 22960 }, { "epoch": 1.1657952180313722, "grad_norm": 0.30699819326400757, "learning_rate": 1.2228031879790854e-05, "loss": 0.0567, "step": 22965 }, { "epoch": 1.1660490380222346, "grad_norm": 0.5038007497787476, "learning_rate": 1.2226339746518436e-05, "loss": 0.0563, "step": 22970 }, { "epoch": 1.1663028580130972, "grad_norm": 0.8224351406097412, "learning_rate": 1.2224647613246021e-05, "loss": 0.0502, "step": 22975 }, { "epoch": 1.1665566780039596, "grad_norm": 0.3340928852558136, "learning_rate": 1.2222955479973603e-05, "loss": 0.0534, "step": 22980 }, { "epoch": 1.1668104979948222, "grad_norm": 0.2981889843940735, "learning_rate": 1.2221263346701187e-05, "loss": 0.0506, "step": 22985 }, { "epoch": 1.1670643179856846, "grad_norm": 0.46697402000427246, "learning_rate": 1.2219571213428772e-05, "loss": 0.055, "step": 22990 }, { "epoch": 1.167318137976547, "grad_norm": 0.3900391161441803, "learning_rate": 1.2217879080156354e-05, "loss": 0.0646, "step": 22995 }, { "epoch": 1.1675719579674095, "grad_norm": 0.25162240862846375, "learning_rate": 1.2216186946883937e-05, "loss": 0.0522, "step": 23000 }, { "epoch": 1.167825777958272, "grad_norm": 0.349566251039505, "learning_rate": 1.221449481361152e-05, "loss": 0.0526, "step": 23005 }, { "epoch": 1.1680795979491345, "grad_norm": 0.5660791993141174, "learning_rate": 1.2212802680339104e-05, "loss": 0.0635, "step": 23010 }, { "epoch": 1.168333417939997, "grad_norm": 0.42194125056266785, "learning_rate": 1.221111054706669e-05, "loss": 0.0556, "step": 23015 }, { "epoch": 1.1685872379308595, "grad_norm": 0.38500216603279114, "learning_rate": 1.2209418413794271e-05, "loss": 0.0589, "step": 23020 }, { "epoch": 1.1688410579217219, "grad_norm": 0.6102268695831299, "learning_rate": 1.2207726280521855e-05, "loss": 0.0497, "step": 23025 }, { "epoch": 1.1690948779125845, "grad_norm": 0.30943453311920166, "learning_rate": 1.2206034147249438e-05, "loss": 0.0577, "step": 23030 }, { "epoch": 1.1693486979034469, "grad_norm": 0.413257896900177, "learning_rate": 1.2204342013977022e-05, "loss": 0.0482, "step": 23035 }, { "epoch": 1.1696025178943095, "grad_norm": 0.5840480923652649, "learning_rate": 1.2202649880704604e-05, "loss": 0.0581, "step": 23040 }, { "epoch": 1.1698563378851718, "grad_norm": 0.3983316719532013, "learning_rate": 1.2200957747432189e-05, "loss": 0.0566, "step": 23045 }, { "epoch": 1.1701101578760342, "grad_norm": 0.6954087615013123, "learning_rate": 1.2199265614159773e-05, "loss": 0.0557, "step": 23050 }, { "epoch": 1.1703639778668968, "grad_norm": 0.36523228883743286, "learning_rate": 1.2197573480887355e-05, "loss": 0.0611, "step": 23055 }, { "epoch": 1.1706177978577592, "grad_norm": 0.4090610146522522, "learning_rate": 1.219588134761494e-05, "loss": 0.0591, "step": 23060 }, { "epoch": 1.1708716178486218, "grad_norm": 0.3347264528274536, "learning_rate": 1.2194189214342522e-05, "loss": 0.0537, "step": 23065 }, { "epoch": 1.1711254378394842, "grad_norm": 0.8110659718513489, "learning_rate": 1.2192497081070107e-05, "loss": 0.0716, "step": 23070 }, { "epoch": 1.1713792578303468, "grad_norm": 0.35596945881843567, "learning_rate": 1.219080494779769e-05, "loss": 0.0577, "step": 23075 }, { "epoch": 1.1716330778212092, "grad_norm": 0.39061930775642395, "learning_rate": 1.2189112814525272e-05, "loss": 0.0533, "step": 23080 }, { "epoch": 1.1718868978120716, "grad_norm": 0.4372428357601166, "learning_rate": 1.2187420681252857e-05, "loss": 0.0631, "step": 23085 }, { "epoch": 1.1721407178029342, "grad_norm": 0.35665372014045715, "learning_rate": 1.218572854798044e-05, "loss": 0.0549, "step": 23090 }, { "epoch": 1.1723945377937965, "grad_norm": 0.3845728933811188, "learning_rate": 1.2184036414708023e-05, "loss": 0.0632, "step": 23095 }, { "epoch": 1.1726483577846591, "grad_norm": 0.3194248080253601, "learning_rate": 1.2182344281435608e-05, "loss": 0.0512, "step": 23100 }, { "epoch": 1.1729021777755215, "grad_norm": 0.3907899260520935, "learning_rate": 1.218065214816319e-05, "loss": 0.0601, "step": 23105 }, { "epoch": 1.1731559977663841, "grad_norm": 0.37811875343322754, "learning_rate": 1.2178960014890774e-05, "loss": 0.0513, "step": 23110 }, { "epoch": 1.1734098177572465, "grad_norm": 0.2940334677696228, "learning_rate": 1.2177267881618357e-05, "loss": 0.0485, "step": 23115 }, { "epoch": 1.1736636377481091, "grad_norm": 0.3130854666233063, "learning_rate": 1.217557574834594e-05, "loss": 0.055, "step": 23120 }, { "epoch": 1.1739174577389715, "grad_norm": 0.5939212441444397, "learning_rate": 1.2173883615073526e-05, "loss": 0.0619, "step": 23125 }, { "epoch": 1.174171277729834, "grad_norm": 0.3176313042640686, "learning_rate": 1.2172191481801108e-05, "loss": 0.0541, "step": 23130 }, { "epoch": 1.1744250977206965, "grad_norm": 0.2850649654865265, "learning_rate": 1.2170499348528691e-05, "loss": 0.0575, "step": 23135 }, { "epoch": 1.1746789177115589, "grad_norm": 0.4402577877044678, "learning_rate": 1.2168807215256275e-05, "loss": 0.0668, "step": 23140 }, { "epoch": 1.1749327377024215, "grad_norm": 0.3713493347167969, "learning_rate": 1.2167115081983858e-05, "loss": 0.0547, "step": 23145 }, { "epoch": 1.1751865576932838, "grad_norm": 0.38666704297065735, "learning_rate": 1.216542294871144e-05, "loss": 0.0642, "step": 23150 }, { "epoch": 1.1754403776841464, "grad_norm": 0.4440547823905945, "learning_rate": 1.2163730815439025e-05, "loss": 0.0482, "step": 23155 }, { "epoch": 1.1756941976750088, "grad_norm": 0.22269631922245026, "learning_rate": 1.2162038682166607e-05, "loss": 0.048, "step": 23160 }, { "epoch": 1.1759480176658714, "grad_norm": 0.4494595527648926, "learning_rate": 1.2160346548894193e-05, "loss": 0.0526, "step": 23165 }, { "epoch": 1.1762018376567338, "grad_norm": 0.36091774702072144, "learning_rate": 1.2158654415621776e-05, "loss": 0.0502, "step": 23170 }, { "epoch": 1.1764556576475964, "grad_norm": 0.3348097503185272, "learning_rate": 1.2156962282349358e-05, "loss": 0.0491, "step": 23175 }, { "epoch": 1.1767094776384588, "grad_norm": 0.345774382352829, "learning_rate": 1.2155270149076943e-05, "loss": 0.0471, "step": 23180 }, { "epoch": 1.1769632976293214, "grad_norm": 0.3489903211593628, "learning_rate": 1.2153578015804525e-05, "loss": 0.0516, "step": 23185 }, { "epoch": 1.1772171176201838, "grad_norm": 0.2823465168476105, "learning_rate": 1.2151885882532109e-05, "loss": 0.0553, "step": 23190 }, { "epoch": 1.1774709376110462, "grad_norm": 0.553920328617096, "learning_rate": 1.2150193749259694e-05, "loss": 0.0514, "step": 23195 }, { "epoch": 1.1777247576019088, "grad_norm": 0.3592303991317749, "learning_rate": 1.2148501615987276e-05, "loss": 0.0626, "step": 23200 }, { "epoch": 1.1779785775927711, "grad_norm": 0.38532131910324097, "learning_rate": 1.214680948271486e-05, "loss": 0.0564, "step": 23205 }, { "epoch": 1.1782323975836337, "grad_norm": 0.27536770701408386, "learning_rate": 1.2145117349442443e-05, "loss": 0.0545, "step": 23210 }, { "epoch": 1.1784862175744961, "grad_norm": 0.44083353877067566, "learning_rate": 1.2143425216170026e-05, "loss": 0.058, "step": 23215 }, { "epoch": 1.1787400375653587, "grad_norm": 0.30215704441070557, "learning_rate": 1.2141733082897611e-05, "loss": 0.0515, "step": 23220 }, { "epoch": 1.178993857556221, "grad_norm": 0.2528264820575714, "learning_rate": 1.2140040949625193e-05, "loss": 0.0469, "step": 23225 }, { "epoch": 1.1792476775470835, "grad_norm": 0.3954659700393677, "learning_rate": 1.2138348816352777e-05, "loss": 0.0583, "step": 23230 }, { "epoch": 1.179501497537946, "grad_norm": 0.30110490322113037, "learning_rate": 1.213665668308036e-05, "loss": 0.0541, "step": 23235 }, { "epoch": 1.1797553175288087, "grad_norm": 0.47874072194099426, "learning_rate": 1.2134964549807944e-05, "loss": 0.0599, "step": 23240 }, { "epoch": 1.180009137519671, "grad_norm": 0.34137025475502014, "learning_rate": 1.2133272416535526e-05, "loss": 0.0576, "step": 23245 }, { "epoch": 1.1802629575105335, "grad_norm": 0.43007606267929077, "learning_rate": 1.2131580283263111e-05, "loss": 0.0609, "step": 23250 }, { "epoch": 1.180516777501396, "grad_norm": 0.35884323716163635, "learning_rate": 1.2129888149990695e-05, "loss": 0.0535, "step": 23255 }, { "epoch": 1.1807705974922584, "grad_norm": 0.3358921408653259, "learning_rate": 1.2128196016718276e-05, "loss": 0.0587, "step": 23260 }, { "epoch": 1.181024417483121, "grad_norm": 0.3821486830711365, "learning_rate": 1.2126503883445862e-05, "loss": 0.0604, "step": 23265 }, { "epoch": 1.1812782374739834, "grad_norm": 0.5798441767692566, "learning_rate": 1.2124811750173444e-05, "loss": 0.0598, "step": 23270 }, { "epoch": 1.181532057464846, "grad_norm": 0.344942569732666, "learning_rate": 1.2123119616901029e-05, "loss": 0.0614, "step": 23275 }, { "epoch": 1.1817858774557084, "grad_norm": 0.5960049629211426, "learning_rate": 1.2121427483628612e-05, "loss": 0.0595, "step": 23280 }, { "epoch": 1.1820396974465708, "grad_norm": 0.37386181950569153, "learning_rate": 1.2119735350356194e-05, "loss": 0.0579, "step": 23285 }, { "epoch": 1.1822935174374334, "grad_norm": 0.36428967118263245, "learning_rate": 1.211804321708378e-05, "loss": 0.0623, "step": 23290 }, { "epoch": 1.1825473374282958, "grad_norm": 0.39738011360168457, "learning_rate": 1.2116351083811361e-05, "loss": 0.0622, "step": 23295 }, { "epoch": 1.1828011574191584, "grad_norm": 0.5076942443847656, "learning_rate": 1.2114658950538945e-05, "loss": 0.0531, "step": 23300 }, { "epoch": 1.1830549774100207, "grad_norm": 0.3927176296710968, "learning_rate": 1.211296681726653e-05, "loss": 0.0598, "step": 23305 }, { "epoch": 1.1833087974008833, "grad_norm": 0.5115927457809448, "learning_rate": 1.2111274683994112e-05, "loss": 0.0533, "step": 23310 }, { "epoch": 1.1835626173917457, "grad_norm": 0.35300958156585693, "learning_rate": 1.2109582550721697e-05, "loss": 0.0498, "step": 23315 }, { "epoch": 1.1838164373826083, "grad_norm": 0.29630640149116516, "learning_rate": 1.2107890417449279e-05, "loss": 0.0604, "step": 23320 }, { "epoch": 1.1840702573734707, "grad_norm": 0.3651716411113739, "learning_rate": 1.2106198284176863e-05, "loss": 0.048, "step": 23325 }, { "epoch": 1.1843240773643333, "grad_norm": 0.2765333652496338, "learning_rate": 1.2104506150904448e-05, "loss": 0.0582, "step": 23330 }, { "epoch": 1.1845778973551957, "grad_norm": 0.33312156796455383, "learning_rate": 1.210281401763203e-05, "loss": 0.0541, "step": 23335 }, { "epoch": 1.184831717346058, "grad_norm": 0.42472630739212036, "learning_rate": 1.2101121884359613e-05, "loss": 0.06, "step": 23340 }, { "epoch": 1.1850855373369207, "grad_norm": 0.39112213253974915, "learning_rate": 1.2099429751087197e-05, "loss": 0.0551, "step": 23345 }, { "epoch": 1.185339357327783, "grad_norm": 0.4763796627521515, "learning_rate": 1.209773761781478e-05, "loss": 0.0501, "step": 23350 }, { "epoch": 1.1855931773186457, "grad_norm": 0.3270481824874878, "learning_rate": 1.2096045484542362e-05, "loss": 0.0492, "step": 23355 }, { "epoch": 1.185846997309508, "grad_norm": 0.32376089692115784, "learning_rate": 1.2094353351269947e-05, "loss": 0.0592, "step": 23360 }, { "epoch": 1.1861008173003706, "grad_norm": 0.5592198371887207, "learning_rate": 1.209266121799753e-05, "loss": 0.0584, "step": 23365 }, { "epoch": 1.186354637291233, "grad_norm": 0.34053146839141846, "learning_rate": 1.2090969084725114e-05, "loss": 0.0616, "step": 23370 }, { "epoch": 1.1866084572820956, "grad_norm": 0.4493182301521301, "learning_rate": 1.2089276951452698e-05, "loss": 0.0596, "step": 23375 }, { "epoch": 1.186862277272958, "grad_norm": 0.4678153693675995, "learning_rate": 1.208758481818028e-05, "loss": 0.0567, "step": 23380 }, { "epoch": 1.1871160972638206, "grad_norm": 0.4072043001651764, "learning_rate": 1.2085892684907865e-05, "loss": 0.0569, "step": 23385 }, { "epoch": 1.187369917254683, "grad_norm": 0.6094187498092651, "learning_rate": 1.2084200551635447e-05, "loss": 0.0656, "step": 23390 }, { "epoch": 1.1876237372455454, "grad_norm": 0.420998752117157, "learning_rate": 1.208250841836303e-05, "loss": 0.0612, "step": 23395 }, { "epoch": 1.187877557236408, "grad_norm": 0.5017419457435608, "learning_rate": 1.2080816285090616e-05, "loss": 0.0602, "step": 23400 }, { "epoch": 1.1881313772272704, "grad_norm": 0.2493000626564026, "learning_rate": 1.2079124151818198e-05, "loss": 0.0518, "step": 23405 }, { "epoch": 1.188385197218133, "grad_norm": 0.5939289331436157, "learning_rate": 1.2077432018545783e-05, "loss": 0.057, "step": 23410 }, { "epoch": 1.1886390172089953, "grad_norm": 0.34695038199424744, "learning_rate": 1.2075739885273365e-05, "loss": 0.0555, "step": 23415 }, { "epoch": 1.188892837199858, "grad_norm": 0.6310956478118896, "learning_rate": 1.2074047752000948e-05, "loss": 0.0516, "step": 23420 }, { "epoch": 1.1891466571907203, "grad_norm": 0.5171706080436707, "learning_rate": 1.2072355618728533e-05, "loss": 0.0543, "step": 23425 }, { "epoch": 1.1894004771815827, "grad_norm": 0.3515871465206146, "learning_rate": 1.2070663485456115e-05, "loss": 0.0583, "step": 23430 }, { "epoch": 1.1896542971724453, "grad_norm": 0.2978825271129608, "learning_rate": 1.2068971352183699e-05, "loss": 0.0456, "step": 23435 }, { "epoch": 1.1899081171633077, "grad_norm": 0.41832324862480164, "learning_rate": 1.2067279218911282e-05, "loss": 0.0531, "step": 23440 }, { "epoch": 1.1901619371541703, "grad_norm": 0.4018298387527466, "learning_rate": 1.2065587085638866e-05, "loss": 0.0523, "step": 23445 }, { "epoch": 1.1904157571450327, "grad_norm": 0.24528133869171143, "learning_rate": 1.2063894952366448e-05, "loss": 0.0498, "step": 23450 }, { "epoch": 1.1906695771358953, "grad_norm": 0.36855852603912354, "learning_rate": 1.2062202819094033e-05, "loss": 0.0533, "step": 23455 }, { "epoch": 1.1909233971267577, "grad_norm": 0.4531480073928833, "learning_rate": 1.2060510685821617e-05, "loss": 0.0552, "step": 23460 }, { "epoch": 1.1911772171176203, "grad_norm": 0.5693694949150085, "learning_rate": 1.20588185525492e-05, "loss": 0.0642, "step": 23465 }, { "epoch": 1.1914310371084826, "grad_norm": 0.3737713694572449, "learning_rate": 1.2057126419276784e-05, "loss": 0.0535, "step": 23470 }, { "epoch": 1.1916848570993452, "grad_norm": 0.40837082266807556, "learning_rate": 1.2055434286004365e-05, "loss": 0.0554, "step": 23475 }, { "epoch": 1.1919386770902076, "grad_norm": 0.2771040201187134, "learning_rate": 1.205374215273195e-05, "loss": 0.0503, "step": 23480 }, { "epoch": 1.19219249708107, "grad_norm": 0.2847016453742981, "learning_rate": 1.2052050019459534e-05, "loss": 0.0525, "step": 23485 }, { "epoch": 1.1924463170719326, "grad_norm": 0.2703809440135956, "learning_rate": 1.2050357886187116e-05, "loss": 0.0447, "step": 23490 }, { "epoch": 1.192700137062795, "grad_norm": 0.6293087005615234, "learning_rate": 1.2048665752914701e-05, "loss": 0.0614, "step": 23495 }, { "epoch": 1.1929539570536576, "grad_norm": 0.27921855449676514, "learning_rate": 1.2046973619642283e-05, "loss": 0.0458, "step": 23500 }, { "epoch": 1.19320777704452, "grad_norm": 0.45750892162323, "learning_rate": 1.2045281486369867e-05, "loss": 0.0526, "step": 23505 }, { "epoch": 1.1934615970353826, "grad_norm": 0.3219360113143921, "learning_rate": 1.2043589353097452e-05, "loss": 0.0477, "step": 23510 }, { "epoch": 1.193715417026245, "grad_norm": 0.5314846634864807, "learning_rate": 1.2041897219825034e-05, "loss": 0.0651, "step": 23515 }, { "epoch": 1.1939692370171076, "grad_norm": 0.410866916179657, "learning_rate": 1.2040205086552619e-05, "loss": 0.0584, "step": 23520 }, { "epoch": 1.19422305700797, "grad_norm": 0.2978534996509552, "learning_rate": 1.2038512953280201e-05, "loss": 0.0476, "step": 23525 }, { "epoch": 1.1944768769988325, "grad_norm": 0.2534315586090088, "learning_rate": 1.2036820820007784e-05, "loss": 0.0458, "step": 23530 }, { "epoch": 1.194730696989695, "grad_norm": 0.40118440985679626, "learning_rate": 1.203512868673537e-05, "loss": 0.0551, "step": 23535 }, { "epoch": 1.1949845169805573, "grad_norm": 0.44053852558135986, "learning_rate": 1.2033436553462952e-05, "loss": 0.0588, "step": 23540 }, { "epoch": 1.19523833697142, "grad_norm": 0.41670575737953186, "learning_rate": 1.2031744420190533e-05, "loss": 0.0581, "step": 23545 }, { "epoch": 1.1954921569622823, "grad_norm": 0.30073875188827515, "learning_rate": 1.2030052286918119e-05, "loss": 0.0507, "step": 23550 }, { "epoch": 1.1957459769531449, "grad_norm": 0.43326103687286377, "learning_rate": 1.2028360153645702e-05, "loss": 0.0632, "step": 23555 }, { "epoch": 1.1959997969440073, "grad_norm": 0.30326777696609497, "learning_rate": 1.2026668020373287e-05, "loss": 0.0525, "step": 23560 }, { "epoch": 1.1962536169348699, "grad_norm": 0.36257094144821167, "learning_rate": 1.202497588710087e-05, "loss": 0.0551, "step": 23565 }, { "epoch": 1.1965074369257322, "grad_norm": 0.32513904571533203, "learning_rate": 1.2023283753828451e-05, "loss": 0.0573, "step": 23570 }, { "epoch": 1.1967612569165946, "grad_norm": 0.3686450719833374, "learning_rate": 1.2021591620556036e-05, "loss": 0.0547, "step": 23575 }, { "epoch": 1.1970150769074572, "grad_norm": 0.3500473201274872, "learning_rate": 1.201989948728362e-05, "loss": 0.0478, "step": 23580 }, { "epoch": 1.1972688968983198, "grad_norm": 0.43649592995643616, "learning_rate": 1.2018207354011202e-05, "loss": 0.0501, "step": 23585 }, { "epoch": 1.1975227168891822, "grad_norm": 0.22626397013664246, "learning_rate": 1.2016515220738787e-05, "loss": 0.0556, "step": 23590 }, { "epoch": 1.1977765368800446, "grad_norm": 0.3905506730079651, "learning_rate": 1.2014823087466369e-05, "loss": 0.0703, "step": 23595 }, { "epoch": 1.1980303568709072, "grad_norm": 0.34414008259773254, "learning_rate": 1.2013130954193952e-05, "loss": 0.0523, "step": 23600 }, { "epoch": 1.1982841768617696, "grad_norm": 0.32847830653190613, "learning_rate": 1.2011438820921538e-05, "loss": 0.0649, "step": 23605 }, { "epoch": 1.1985379968526322, "grad_norm": 0.35023289918899536, "learning_rate": 1.200974668764912e-05, "loss": 0.0598, "step": 23610 }, { "epoch": 1.1987918168434946, "grad_norm": 0.29963234066963196, "learning_rate": 1.2008054554376705e-05, "loss": 0.0586, "step": 23615 }, { "epoch": 1.1990456368343572, "grad_norm": 0.3658560812473297, "learning_rate": 1.2006362421104287e-05, "loss": 0.0548, "step": 23620 }, { "epoch": 1.1992994568252195, "grad_norm": 0.6136611700057983, "learning_rate": 1.200467028783187e-05, "loss": 0.0572, "step": 23625 }, { "epoch": 1.199553276816082, "grad_norm": 0.39471113681793213, "learning_rate": 1.2002978154559455e-05, "loss": 0.0498, "step": 23630 }, { "epoch": 1.1998070968069445, "grad_norm": 0.592869758605957, "learning_rate": 1.2001286021287037e-05, "loss": 0.0608, "step": 23635 }, { "epoch": 1.200060916797807, "grad_norm": 0.41724860668182373, "learning_rate": 1.199959388801462e-05, "loss": 0.0513, "step": 23640 }, { "epoch": 1.2003147367886695, "grad_norm": 0.31232571601867676, "learning_rate": 1.1997901754742204e-05, "loss": 0.0612, "step": 23645 }, { "epoch": 1.200568556779532, "grad_norm": 0.33139002323150635, "learning_rate": 1.1996209621469788e-05, "loss": 0.052, "step": 23650 }, { "epoch": 1.2008223767703945, "grad_norm": 0.3042997717857361, "learning_rate": 1.1994517488197373e-05, "loss": 0.0504, "step": 23655 }, { "epoch": 1.2010761967612569, "grad_norm": 0.40804755687713623, "learning_rate": 1.1992825354924955e-05, "loss": 0.0535, "step": 23660 }, { "epoch": 1.2013300167521195, "grad_norm": 0.41323235630989075, "learning_rate": 1.1991133221652538e-05, "loss": 0.0618, "step": 23665 }, { "epoch": 1.2015838367429819, "grad_norm": 0.375251442193985, "learning_rate": 1.1989441088380122e-05, "loss": 0.0585, "step": 23670 }, { "epoch": 1.2018376567338445, "grad_norm": 0.4380832016468048, "learning_rate": 1.1987748955107706e-05, "loss": 0.0574, "step": 23675 }, { "epoch": 1.2020914767247068, "grad_norm": 0.5451785326004028, "learning_rate": 1.1986056821835287e-05, "loss": 0.0532, "step": 23680 }, { "epoch": 1.2023452967155692, "grad_norm": 0.3498691916465759, "learning_rate": 1.1984364688562873e-05, "loss": 0.0555, "step": 23685 }, { "epoch": 1.2025991167064318, "grad_norm": 0.31484562158584595, "learning_rate": 1.1982672555290456e-05, "loss": 0.054, "step": 23690 }, { "epoch": 1.2028529366972942, "grad_norm": 0.3103238642215729, "learning_rate": 1.1980980422018038e-05, "loss": 0.0557, "step": 23695 }, { "epoch": 1.2031067566881568, "grad_norm": 0.3442491590976715, "learning_rate": 1.1979288288745623e-05, "loss": 0.0488, "step": 23700 }, { "epoch": 1.2033605766790192, "grad_norm": 0.5479670166969299, "learning_rate": 1.1977596155473205e-05, "loss": 0.0545, "step": 23705 }, { "epoch": 1.2036143966698818, "grad_norm": 0.43110498785972595, "learning_rate": 1.197590402220079e-05, "loss": 0.0564, "step": 23710 }, { "epoch": 1.2038682166607442, "grad_norm": 0.3031986951828003, "learning_rate": 1.1974211888928374e-05, "loss": 0.0654, "step": 23715 }, { "epoch": 1.2041220366516068, "grad_norm": 0.4608815312385559, "learning_rate": 1.1972519755655956e-05, "loss": 0.0486, "step": 23720 }, { "epoch": 1.2043758566424692, "grad_norm": 0.31896111369132996, "learning_rate": 1.1970827622383541e-05, "loss": 0.0569, "step": 23725 }, { "epoch": 1.2046296766333318, "grad_norm": 0.3649381697177887, "learning_rate": 1.1969135489111123e-05, "loss": 0.0501, "step": 23730 }, { "epoch": 1.2048834966241941, "grad_norm": 0.45164427161216736, "learning_rate": 1.1967443355838706e-05, "loss": 0.0505, "step": 23735 }, { "epoch": 1.2051373166150565, "grad_norm": 0.3351602554321289, "learning_rate": 1.1965751222566292e-05, "loss": 0.0471, "step": 23740 }, { "epoch": 1.2053911366059191, "grad_norm": 0.4169917702674866, "learning_rate": 1.1964059089293874e-05, "loss": 0.0526, "step": 23745 }, { "epoch": 1.2056449565967815, "grad_norm": 0.5554206967353821, "learning_rate": 1.1962366956021455e-05, "loss": 0.0529, "step": 23750 }, { "epoch": 1.205898776587644, "grad_norm": 0.4624297320842743, "learning_rate": 1.196067482274904e-05, "loss": 0.0504, "step": 23755 }, { "epoch": 1.2061525965785065, "grad_norm": 0.2930656671524048, "learning_rate": 1.1958982689476624e-05, "loss": 0.0548, "step": 23760 }, { "epoch": 1.206406416569369, "grad_norm": 0.2735726833343506, "learning_rate": 1.195729055620421e-05, "loss": 0.0532, "step": 23765 }, { "epoch": 1.2066602365602315, "grad_norm": 0.3359985053539276, "learning_rate": 1.1955598422931791e-05, "loss": 0.0601, "step": 23770 }, { "epoch": 1.2069140565510939, "grad_norm": 0.31836122274398804, "learning_rate": 1.1953906289659373e-05, "loss": 0.0541, "step": 23775 }, { "epoch": 1.2071678765419565, "grad_norm": 0.5076533555984497, "learning_rate": 1.1952214156386958e-05, "loss": 0.0467, "step": 23780 }, { "epoch": 1.2074216965328188, "grad_norm": 0.4004075229167938, "learning_rate": 1.1950522023114542e-05, "loss": 0.06, "step": 23785 }, { "epoch": 1.2076755165236814, "grad_norm": 0.5112072229385376, "learning_rate": 1.1948829889842124e-05, "loss": 0.0576, "step": 23790 }, { "epoch": 1.2079293365145438, "grad_norm": 0.3895137310028076, "learning_rate": 1.1947137756569709e-05, "loss": 0.0458, "step": 23795 }, { "epoch": 1.2081831565054064, "grad_norm": 0.7249547839164734, "learning_rate": 1.194544562329729e-05, "loss": 0.0531, "step": 23800 }, { "epoch": 1.2084369764962688, "grad_norm": 0.7560728788375854, "learning_rate": 1.1943753490024876e-05, "loss": 0.0492, "step": 23805 }, { "epoch": 1.2086907964871314, "grad_norm": 0.3840545117855072, "learning_rate": 1.194206135675246e-05, "loss": 0.0474, "step": 23810 }, { "epoch": 1.2089446164779938, "grad_norm": 0.42460840940475464, "learning_rate": 1.1940369223480041e-05, "loss": 0.0537, "step": 23815 }, { "epoch": 1.2091984364688564, "grad_norm": 0.2921667695045471, "learning_rate": 1.1938677090207627e-05, "loss": 0.0526, "step": 23820 }, { "epoch": 1.2094522564597188, "grad_norm": 0.3440396189689636, "learning_rate": 1.1936984956935209e-05, "loss": 0.0454, "step": 23825 }, { "epoch": 1.2097060764505811, "grad_norm": 0.33783891797065735, "learning_rate": 1.1935292823662792e-05, "loss": 0.0466, "step": 23830 }, { "epoch": 1.2099598964414437, "grad_norm": 0.3296278417110443, "learning_rate": 1.1933600690390377e-05, "loss": 0.0621, "step": 23835 }, { "epoch": 1.2102137164323061, "grad_norm": 0.5119358897209167, "learning_rate": 1.193190855711796e-05, "loss": 0.0585, "step": 23840 }, { "epoch": 1.2104675364231687, "grad_norm": 0.738791823387146, "learning_rate": 1.1930216423845543e-05, "loss": 0.0524, "step": 23845 }, { "epoch": 1.2107213564140311, "grad_norm": 0.3069751262664795, "learning_rate": 1.1928524290573126e-05, "loss": 0.0441, "step": 23850 }, { "epoch": 1.2109751764048937, "grad_norm": 0.30451783537864685, "learning_rate": 1.192683215730071e-05, "loss": 0.0468, "step": 23855 }, { "epoch": 1.211228996395756, "grad_norm": 0.32865050435066223, "learning_rate": 1.1925140024028295e-05, "loss": 0.0534, "step": 23860 }, { "epoch": 1.2114828163866187, "grad_norm": 0.41793230175971985, "learning_rate": 1.1923447890755877e-05, "loss": 0.0504, "step": 23865 }, { "epoch": 1.211736636377481, "grad_norm": 0.6536849737167358, "learning_rate": 1.192175575748346e-05, "loss": 0.0527, "step": 23870 }, { "epoch": 1.2119904563683437, "grad_norm": 0.28495994210243225, "learning_rate": 1.1920063624211044e-05, "loss": 0.0505, "step": 23875 }, { "epoch": 1.212244276359206, "grad_norm": 1.1305482387542725, "learning_rate": 1.1918371490938628e-05, "loss": 0.0657, "step": 23880 }, { "epoch": 1.2124980963500684, "grad_norm": 0.4528692662715912, "learning_rate": 1.191667935766621e-05, "loss": 0.0519, "step": 23885 }, { "epoch": 1.212751916340931, "grad_norm": 0.49388232827186584, "learning_rate": 1.1914987224393795e-05, "loss": 0.0554, "step": 23890 }, { "epoch": 1.2130057363317934, "grad_norm": 0.3623510003089905, "learning_rate": 1.1913295091121378e-05, "loss": 0.0572, "step": 23895 }, { "epoch": 1.213259556322656, "grad_norm": 0.3501339852809906, "learning_rate": 1.1911602957848962e-05, "loss": 0.0584, "step": 23900 }, { "epoch": 1.2135133763135184, "grad_norm": 0.588069498538971, "learning_rate": 1.1909910824576545e-05, "loss": 0.053, "step": 23905 }, { "epoch": 1.213767196304381, "grad_norm": 0.5254976153373718, "learning_rate": 1.1908218691304127e-05, "loss": 0.053, "step": 23910 }, { "epoch": 1.2140210162952434, "grad_norm": 0.6018771529197693, "learning_rate": 1.1906526558031712e-05, "loss": 0.057, "step": 23915 }, { "epoch": 1.2142748362861058, "grad_norm": 0.3812793791294098, "learning_rate": 1.1904834424759296e-05, "loss": 0.0658, "step": 23920 }, { "epoch": 1.2145286562769684, "grad_norm": 0.36745819449424744, "learning_rate": 1.1903142291486878e-05, "loss": 0.0546, "step": 23925 }, { "epoch": 1.2147824762678308, "grad_norm": 0.28504976630210876, "learning_rate": 1.1901450158214463e-05, "loss": 0.0489, "step": 23930 }, { "epoch": 1.2150362962586934, "grad_norm": 0.2376314401626587, "learning_rate": 1.1899758024942045e-05, "loss": 0.0565, "step": 23935 }, { "epoch": 1.2152901162495557, "grad_norm": 0.36779358983039856, "learning_rate": 1.1898065891669628e-05, "loss": 0.0521, "step": 23940 }, { "epoch": 1.2155439362404183, "grad_norm": 0.2886993885040283, "learning_rate": 1.1896373758397214e-05, "loss": 0.0444, "step": 23945 }, { "epoch": 1.2157977562312807, "grad_norm": 0.42894473671913147, "learning_rate": 1.1894681625124795e-05, "loss": 0.0478, "step": 23950 }, { "epoch": 1.2160515762221433, "grad_norm": 0.4883272051811218, "learning_rate": 1.189298949185238e-05, "loss": 0.0555, "step": 23955 }, { "epoch": 1.2163053962130057, "grad_norm": 0.4135822057723999, "learning_rate": 1.1891297358579963e-05, "loss": 0.0607, "step": 23960 }, { "epoch": 1.2165592162038683, "grad_norm": 0.8400814533233643, "learning_rate": 1.1889605225307546e-05, "loss": 0.0594, "step": 23965 }, { "epoch": 1.2168130361947307, "grad_norm": 0.2872380316257477, "learning_rate": 1.188791309203513e-05, "loss": 0.0555, "step": 23970 }, { "epoch": 1.217066856185593, "grad_norm": 0.3974021375179291, "learning_rate": 1.1886220958762713e-05, "loss": 0.0583, "step": 23975 }, { "epoch": 1.2173206761764557, "grad_norm": 0.5222055912017822, "learning_rate": 1.1884528825490295e-05, "loss": 0.0612, "step": 23980 }, { "epoch": 1.217574496167318, "grad_norm": 0.37451913952827454, "learning_rate": 1.188283669221788e-05, "loss": 0.058, "step": 23985 }, { "epoch": 1.2178283161581807, "grad_norm": 0.38074713945388794, "learning_rate": 1.1881144558945464e-05, "loss": 0.0627, "step": 23990 }, { "epoch": 1.218082136149043, "grad_norm": 0.47878900170326233, "learning_rate": 1.1879452425673046e-05, "loss": 0.062, "step": 23995 }, { "epoch": 1.2183359561399056, "grad_norm": 0.5156375169754028, "learning_rate": 1.1877760292400631e-05, "loss": 0.0486, "step": 24000 }, { "epoch": 1.218589776130768, "grad_norm": 0.24383500218391418, "learning_rate": 1.1876068159128213e-05, "loss": 0.0474, "step": 24005 }, { "epoch": 1.2188435961216306, "grad_norm": 0.34059467911720276, "learning_rate": 1.1874376025855798e-05, "loss": 0.0466, "step": 24010 }, { "epoch": 1.219097416112493, "grad_norm": 0.31764811277389526, "learning_rate": 1.1872683892583382e-05, "loss": 0.0513, "step": 24015 }, { "epoch": 1.2193512361033556, "grad_norm": 0.43181148171424866, "learning_rate": 1.1870991759310963e-05, "loss": 0.0458, "step": 24020 }, { "epoch": 1.219605056094218, "grad_norm": 0.6314930319786072, "learning_rate": 1.1869299626038549e-05, "loss": 0.0542, "step": 24025 }, { "epoch": 1.2198588760850804, "grad_norm": 0.37427467107772827, "learning_rate": 1.186760749276613e-05, "loss": 0.0531, "step": 24030 }, { "epoch": 1.220112696075943, "grad_norm": 0.2935940623283386, "learning_rate": 1.1865915359493714e-05, "loss": 0.052, "step": 24035 }, { "epoch": 1.2203665160668054, "grad_norm": 0.48649024963378906, "learning_rate": 1.18642232262213e-05, "loss": 0.0579, "step": 24040 }, { "epoch": 1.220620336057668, "grad_norm": 0.5453752279281616, "learning_rate": 1.1862531092948881e-05, "loss": 0.0585, "step": 24045 }, { "epoch": 1.2208741560485303, "grad_norm": 0.24057643115520477, "learning_rate": 1.1860838959676466e-05, "loss": 0.0546, "step": 24050 }, { "epoch": 1.221127976039393, "grad_norm": 0.3876838684082031, "learning_rate": 1.1859146826404048e-05, "loss": 0.0548, "step": 24055 }, { "epoch": 1.2213817960302553, "grad_norm": 0.7951164245605469, "learning_rate": 1.1857454693131632e-05, "loss": 0.0601, "step": 24060 }, { "epoch": 1.2216356160211177, "grad_norm": 0.26889508962631226, "learning_rate": 1.1855762559859217e-05, "loss": 0.055, "step": 24065 }, { "epoch": 1.2218894360119803, "grad_norm": 0.48906293511390686, "learning_rate": 1.1854070426586799e-05, "loss": 0.0554, "step": 24070 }, { "epoch": 1.222143256002843, "grad_norm": 0.3633570373058319, "learning_rate": 1.1852378293314382e-05, "loss": 0.0529, "step": 24075 }, { "epoch": 1.2223970759937053, "grad_norm": 0.273607462644577, "learning_rate": 1.1850686160041966e-05, "loss": 0.0513, "step": 24080 }, { "epoch": 1.2226508959845677, "grad_norm": 0.6867349743843079, "learning_rate": 1.184899402676955e-05, "loss": 0.0531, "step": 24085 }, { "epoch": 1.2229047159754303, "grad_norm": 0.36821645498275757, "learning_rate": 1.1847301893497131e-05, "loss": 0.0494, "step": 24090 }, { "epoch": 1.2231585359662926, "grad_norm": 0.737259030342102, "learning_rate": 1.1845609760224717e-05, "loss": 0.061, "step": 24095 }, { "epoch": 1.2234123559571553, "grad_norm": 0.39399975538253784, "learning_rate": 1.18439176269523e-05, "loss": 0.0432, "step": 24100 }, { "epoch": 1.2236661759480176, "grad_norm": 0.34652629494667053, "learning_rate": 1.1842225493679884e-05, "loss": 0.06, "step": 24105 }, { "epoch": 1.2239199959388802, "grad_norm": 0.4035334289073944, "learning_rate": 1.1840533360407467e-05, "loss": 0.0552, "step": 24110 }, { "epoch": 1.2241738159297426, "grad_norm": 0.45802584290504456, "learning_rate": 1.1838841227135049e-05, "loss": 0.0525, "step": 24115 }, { "epoch": 1.224427635920605, "grad_norm": 0.26952633261680603, "learning_rate": 1.1837149093862634e-05, "loss": 0.0511, "step": 24120 }, { "epoch": 1.2246814559114676, "grad_norm": 0.5091894268989563, "learning_rate": 1.1835456960590218e-05, "loss": 0.0599, "step": 24125 }, { "epoch": 1.22493527590233, "grad_norm": 0.5348808169364929, "learning_rate": 1.18337648273178e-05, "loss": 0.0505, "step": 24130 }, { "epoch": 1.2251890958931926, "grad_norm": 0.41867509484291077, "learning_rate": 1.1832072694045385e-05, "loss": 0.0572, "step": 24135 }, { "epoch": 1.225442915884055, "grad_norm": 0.4618304967880249, "learning_rate": 1.1830380560772967e-05, "loss": 0.0479, "step": 24140 }, { "epoch": 1.2256967358749176, "grad_norm": 0.3024294078350067, "learning_rate": 1.1828688427500552e-05, "loss": 0.0496, "step": 24145 }, { "epoch": 1.22595055586578, "grad_norm": 0.3937217891216278, "learning_rate": 1.1826996294228134e-05, "loss": 0.0502, "step": 24150 }, { "epoch": 1.2262043758566425, "grad_norm": 0.44144943356513977, "learning_rate": 1.1825304160955717e-05, "loss": 0.0502, "step": 24155 }, { "epoch": 1.226458195847505, "grad_norm": 0.4977549910545349, "learning_rate": 1.1823612027683303e-05, "loss": 0.0539, "step": 24160 }, { "epoch": 1.2267120158383675, "grad_norm": 0.37051019072532654, "learning_rate": 1.1821919894410884e-05, "loss": 0.0527, "step": 24165 }, { "epoch": 1.22696583582923, "grad_norm": 0.5204314589500427, "learning_rate": 1.1820227761138468e-05, "loss": 0.0465, "step": 24170 }, { "epoch": 1.2272196558200923, "grad_norm": 1.4250450134277344, "learning_rate": 1.1818535627866052e-05, "loss": 0.0438, "step": 24175 }, { "epoch": 1.227473475810955, "grad_norm": 0.7377504110336304, "learning_rate": 1.1816843494593635e-05, "loss": 0.0584, "step": 24180 }, { "epoch": 1.2277272958018173, "grad_norm": 0.3235386908054352, "learning_rate": 1.1815151361321217e-05, "loss": 0.0511, "step": 24185 }, { "epoch": 1.2279811157926799, "grad_norm": 0.4034362733364105, "learning_rate": 1.1813459228048802e-05, "loss": 0.0467, "step": 24190 }, { "epoch": 1.2282349357835423, "grad_norm": 0.30886006355285645, "learning_rate": 1.1811767094776386e-05, "loss": 0.043, "step": 24195 }, { "epoch": 1.2284887557744049, "grad_norm": 0.38146716356277466, "learning_rate": 1.181007496150397e-05, "loss": 0.0594, "step": 24200 }, { "epoch": 1.2287425757652672, "grad_norm": 0.34178265929222107, "learning_rate": 1.1808382828231553e-05, "loss": 0.0486, "step": 24205 }, { "epoch": 1.2289963957561298, "grad_norm": 0.5366050601005554, "learning_rate": 1.1806690694959135e-05, "loss": 0.0621, "step": 24210 }, { "epoch": 1.2292502157469922, "grad_norm": 0.36923089623451233, "learning_rate": 1.180499856168672e-05, "loss": 0.0527, "step": 24215 }, { "epoch": 1.2295040357378548, "grad_norm": 0.39558228850364685, "learning_rate": 1.1803306428414303e-05, "loss": 0.0618, "step": 24220 }, { "epoch": 1.2297578557287172, "grad_norm": 0.3447304368019104, "learning_rate": 1.1801614295141885e-05, "loss": 0.0526, "step": 24225 }, { "epoch": 1.2300116757195796, "grad_norm": 0.5974411964416504, "learning_rate": 1.179992216186947e-05, "loss": 0.0522, "step": 24230 }, { "epoch": 1.2302654957104422, "grad_norm": 0.31374403834342957, "learning_rate": 1.1798230028597052e-05, "loss": 0.0497, "step": 24235 }, { "epoch": 1.2305193157013046, "grad_norm": 0.3632044792175293, "learning_rate": 1.1796537895324636e-05, "loss": 0.0487, "step": 24240 }, { "epoch": 1.2307731356921672, "grad_norm": 0.36959999799728394, "learning_rate": 1.1794845762052221e-05, "loss": 0.055, "step": 24245 }, { "epoch": 1.2310269556830296, "grad_norm": 0.43235883116722107, "learning_rate": 1.1793153628779803e-05, "loss": 0.0576, "step": 24250 }, { "epoch": 1.2312807756738922, "grad_norm": 0.3197312653064728, "learning_rate": 1.1791461495507388e-05, "loss": 0.0542, "step": 24255 }, { "epoch": 1.2315345956647545, "grad_norm": 0.4243973195552826, "learning_rate": 1.178976936223497e-05, "loss": 0.0492, "step": 24260 }, { "epoch": 1.231788415655617, "grad_norm": 0.3641511797904968, "learning_rate": 1.1788077228962554e-05, "loss": 0.0613, "step": 24265 }, { "epoch": 1.2320422356464795, "grad_norm": 0.343627393245697, "learning_rate": 1.1786385095690139e-05, "loss": 0.0487, "step": 24270 }, { "epoch": 1.232296055637342, "grad_norm": 0.633890688419342, "learning_rate": 1.178469296241772e-05, "loss": 0.0489, "step": 24275 }, { "epoch": 1.2325498756282045, "grad_norm": 0.4130115509033203, "learning_rate": 1.1783000829145304e-05, "loss": 0.0587, "step": 24280 }, { "epoch": 1.2328036956190669, "grad_norm": 0.37340712547302246, "learning_rate": 1.1781308695872888e-05, "loss": 0.0466, "step": 24285 }, { "epoch": 1.2330575156099295, "grad_norm": 1.31352961063385, "learning_rate": 1.1779616562600471e-05, "loss": 0.053, "step": 24290 }, { "epoch": 1.2333113356007919, "grad_norm": 0.25371623039245605, "learning_rate": 1.1777924429328057e-05, "loss": 0.0506, "step": 24295 }, { "epoch": 1.2335651555916545, "grad_norm": 0.5354921817779541, "learning_rate": 1.1776232296055638e-05, "loss": 0.0569, "step": 24300 }, { "epoch": 1.2338189755825169, "grad_norm": 0.3371390104293823, "learning_rate": 1.1774540162783222e-05, "loss": 0.051, "step": 24305 }, { "epoch": 1.2340727955733795, "grad_norm": 0.5659612417221069, "learning_rate": 1.1772848029510806e-05, "loss": 0.0454, "step": 24310 }, { "epoch": 1.2343266155642418, "grad_norm": 0.5271756052970886, "learning_rate": 1.1771155896238389e-05, "loss": 0.0574, "step": 24315 }, { "epoch": 1.2345804355551042, "grad_norm": 0.575239896774292, "learning_rate": 1.1769463762965971e-05, "loss": 0.0541, "step": 24320 }, { "epoch": 1.2348342555459668, "grad_norm": 0.3457287847995758, "learning_rate": 1.1767771629693556e-05, "loss": 0.0495, "step": 24325 }, { "epoch": 1.2350880755368292, "grad_norm": 0.3027961552143097, "learning_rate": 1.1766079496421138e-05, "loss": 0.0568, "step": 24330 }, { "epoch": 1.2353418955276918, "grad_norm": 0.48337095975875854, "learning_rate": 1.1764387363148722e-05, "loss": 0.0559, "step": 24335 }, { "epoch": 1.2355957155185542, "grad_norm": 0.31636735796928406, "learning_rate": 1.1762695229876307e-05, "loss": 0.0578, "step": 24340 }, { "epoch": 1.2358495355094168, "grad_norm": 0.3348773419857025, "learning_rate": 1.1761003096603889e-05, "loss": 0.0458, "step": 24345 }, { "epoch": 1.2361033555002792, "grad_norm": 0.3149411380290985, "learning_rate": 1.1759310963331474e-05, "loss": 0.0616, "step": 24350 }, { "epoch": 1.2363571754911418, "grad_norm": 0.2966812551021576, "learning_rate": 1.1757618830059056e-05, "loss": 0.0449, "step": 24355 }, { "epoch": 1.2366109954820041, "grad_norm": 0.8318333029747009, "learning_rate": 1.175592669678664e-05, "loss": 0.0491, "step": 24360 }, { "epoch": 1.2368648154728668, "grad_norm": 0.8147919178009033, "learning_rate": 1.1754234563514225e-05, "loss": 0.0512, "step": 24365 }, { "epoch": 1.2371186354637291, "grad_norm": 0.39781033992767334, "learning_rate": 1.1752542430241806e-05, "loss": 0.0461, "step": 24370 }, { "epoch": 1.2373724554545915, "grad_norm": 0.33265021443367004, "learning_rate": 1.175085029696939e-05, "loss": 0.0476, "step": 24375 }, { "epoch": 1.2376262754454541, "grad_norm": 0.3713069558143616, "learning_rate": 1.1749158163696973e-05, "loss": 0.0497, "step": 24380 }, { "epoch": 1.2378800954363165, "grad_norm": 0.3097081482410431, "learning_rate": 1.1747466030424557e-05, "loss": 0.0535, "step": 24385 }, { "epoch": 1.238133915427179, "grad_norm": 0.509508490562439, "learning_rate": 1.1745773897152139e-05, "loss": 0.0474, "step": 24390 }, { "epoch": 1.2383877354180415, "grad_norm": 0.2997609078884125, "learning_rate": 1.1744081763879724e-05, "loss": 0.0446, "step": 24395 }, { "epoch": 1.238641555408904, "grad_norm": 0.31849291920661926, "learning_rate": 1.1742389630607308e-05, "loss": 0.0623, "step": 24400 }, { "epoch": 1.2388953753997665, "grad_norm": 0.2741042673587799, "learning_rate": 1.1740697497334891e-05, "loss": 0.0506, "step": 24405 }, { "epoch": 1.2391491953906288, "grad_norm": 0.3714274764060974, "learning_rate": 1.1739005364062475e-05, "loss": 0.0531, "step": 24410 }, { "epoch": 1.2394030153814914, "grad_norm": 0.3533041179180145, "learning_rate": 1.1737313230790057e-05, "loss": 0.0527, "step": 24415 }, { "epoch": 1.239656835372354, "grad_norm": 0.35267361998558044, "learning_rate": 1.1735621097517642e-05, "loss": 0.0474, "step": 24420 }, { "epoch": 1.2399106553632164, "grad_norm": 0.2871090769767761, "learning_rate": 1.1733928964245225e-05, "loss": 0.0491, "step": 24425 }, { "epoch": 1.2401644753540788, "grad_norm": 0.34999963641166687, "learning_rate": 1.1732236830972807e-05, "loss": 0.049, "step": 24430 }, { "epoch": 1.2404182953449414, "grad_norm": 0.7294630408287048, "learning_rate": 1.1730544697700392e-05, "loss": 0.0574, "step": 24435 }, { "epoch": 1.2406721153358038, "grad_norm": 0.31107303500175476, "learning_rate": 1.1728852564427974e-05, "loss": 0.0428, "step": 24440 }, { "epoch": 1.2409259353266664, "grad_norm": 0.4596913158893585, "learning_rate": 1.172716043115556e-05, "loss": 0.0562, "step": 24445 }, { "epoch": 1.2411797553175288, "grad_norm": 0.28400036692619324, "learning_rate": 1.1725468297883143e-05, "loss": 0.0547, "step": 24450 }, { "epoch": 1.2414335753083914, "grad_norm": 0.3861754536628723, "learning_rate": 1.1723776164610725e-05, "loss": 0.0541, "step": 24455 }, { "epoch": 1.2416873952992538, "grad_norm": 0.46610140800476074, "learning_rate": 1.172208403133831e-05, "loss": 0.0574, "step": 24460 }, { "epoch": 1.2419412152901161, "grad_norm": 0.5145004987716675, "learning_rate": 1.1720391898065892e-05, "loss": 0.0496, "step": 24465 }, { "epoch": 1.2421950352809787, "grad_norm": 0.3357173502445221, "learning_rate": 1.1718699764793476e-05, "loss": 0.0434, "step": 24470 }, { "epoch": 1.2424488552718411, "grad_norm": 0.5073157548904419, "learning_rate": 1.171700763152106e-05, "loss": 0.0535, "step": 24475 }, { "epoch": 1.2427026752627037, "grad_norm": 0.5658380389213562, "learning_rate": 1.1715315498248643e-05, "loss": 0.0507, "step": 24480 }, { "epoch": 1.242956495253566, "grad_norm": 0.3627479374408722, "learning_rate": 1.1713623364976226e-05, "loss": 0.0525, "step": 24485 }, { "epoch": 1.2432103152444287, "grad_norm": 0.4254165589809418, "learning_rate": 1.171193123170381e-05, "loss": 0.0566, "step": 24490 }, { "epoch": 1.243464135235291, "grad_norm": 0.28777673840522766, "learning_rate": 1.1710239098431393e-05, "loss": 0.0485, "step": 24495 }, { "epoch": 1.2437179552261537, "grad_norm": 0.5588412284851074, "learning_rate": 1.1708546965158979e-05, "loss": 0.0498, "step": 24500 }, { "epoch": 1.243971775217016, "grad_norm": 0.38440296053886414, "learning_rate": 1.170685483188656e-05, "loss": 0.054, "step": 24505 }, { "epoch": 1.2442255952078787, "grad_norm": 0.434359610080719, "learning_rate": 1.1705162698614144e-05, "loss": 0.048, "step": 24510 }, { "epoch": 1.244479415198741, "grad_norm": 0.4097727835178375, "learning_rate": 1.1703470565341728e-05, "loss": 0.0579, "step": 24515 }, { "epoch": 1.2447332351896034, "grad_norm": 0.49199771881103516, "learning_rate": 1.1701778432069311e-05, "loss": 0.045, "step": 24520 }, { "epoch": 1.244987055180466, "grad_norm": 0.3446272015571594, "learning_rate": 1.1700086298796893e-05, "loss": 0.0554, "step": 24525 }, { "epoch": 1.2452408751713284, "grad_norm": 0.4560483694076538, "learning_rate": 1.1698394165524478e-05, "loss": 0.0521, "step": 24530 }, { "epoch": 1.245494695162191, "grad_norm": 0.30533355474472046, "learning_rate": 1.169670203225206e-05, "loss": 0.0606, "step": 24535 }, { "epoch": 1.2457485151530534, "grad_norm": 0.34249964356422424, "learning_rate": 1.1695009898979645e-05, "loss": 0.0526, "step": 24540 }, { "epoch": 1.246002335143916, "grad_norm": 0.6279357671737671, "learning_rate": 1.1693317765707229e-05, "loss": 0.0587, "step": 24545 }, { "epoch": 1.2462561551347784, "grad_norm": 0.24504680931568146, "learning_rate": 1.169162563243481e-05, "loss": 0.0506, "step": 24550 }, { "epoch": 1.246509975125641, "grad_norm": 0.622235894203186, "learning_rate": 1.1689933499162396e-05, "loss": 0.0611, "step": 24555 }, { "epoch": 1.2467637951165034, "grad_norm": 0.3807246685028076, "learning_rate": 1.1688241365889978e-05, "loss": 0.0588, "step": 24560 }, { "epoch": 1.247017615107366, "grad_norm": 0.35363635420799255, "learning_rate": 1.1686549232617561e-05, "loss": 0.0525, "step": 24565 }, { "epoch": 1.2472714350982284, "grad_norm": 0.415667861700058, "learning_rate": 1.1684857099345146e-05, "loss": 0.059, "step": 24570 }, { "epoch": 1.2475252550890907, "grad_norm": 0.40636613965034485, "learning_rate": 1.1683164966072728e-05, "loss": 0.0585, "step": 24575 }, { "epoch": 1.2477790750799533, "grad_norm": 0.33179885149002075, "learning_rate": 1.1681472832800312e-05, "loss": 0.0504, "step": 24580 }, { "epoch": 1.2480328950708157, "grad_norm": 0.41674908995628357, "learning_rate": 1.1679780699527895e-05, "loss": 0.054, "step": 24585 }, { "epoch": 1.2482867150616783, "grad_norm": 0.4602307081222534, "learning_rate": 1.1678088566255479e-05, "loss": 0.0541, "step": 24590 }, { "epoch": 1.2485405350525407, "grad_norm": 0.3662072718143463, "learning_rate": 1.1676396432983064e-05, "loss": 0.0487, "step": 24595 }, { "epoch": 1.2487943550434033, "grad_norm": 1.340538501739502, "learning_rate": 1.1674704299710646e-05, "loss": 0.0649, "step": 24600 }, { "epoch": 1.2490481750342657, "grad_norm": 0.242966428399086, "learning_rate": 1.167301216643823e-05, "loss": 0.0593, "step": 24605 }, { "epoch": 1.249301995025128, "grad_norm": 0.32781925797462463, "learning_rate": 1.1671320033165813e-05, "loss": 0.0484, "step": 24610 }, { "epoch": 1.2495558150159907, "grad_norm": 0.29135435819625854, "learning_rate": 1.1669627899893397e-05, "loss": 0.0393, "step": 24615 }, { "epoch": 1.249809635006853, "grad_norm": 0.44302451610565186, "learning_rate": 1.1667935766620979e-05, "loss": 0.0551, "step": 24620 }, { "epoch": 1.2500634549977157, "grad_norm": 0.29195067286491394, "learning_rate": 1.1666243633348564e-05, "loss": 0.0443, "step": 24625 }, { "epoch": 1.250317274988578, "grad_norm": 0.5356406569480896, "learning_rate": 1.1664551500076147e-05, "loss": 0.0483, "step": 24630 }, { "epoch": 1.2505710949794406, "grad_norm": 0.3515762984752655, "learning_rate": 1.166285936680373e-05, "loss": 0.0472, "step": 24635 }, { "epoch": 1.250824914970303, "grad_norm": 0.40354248881340027, "learning_rate": 1.1661167233531314e-05, "loss": 0.0555, "step": 24640 }, { "epoch": 1.2510787349611656, "grad_norm": 0.4865587651729584, "learning_rate": 1.1659475100258896e-05, "loss": 0.0484, "step": 24645 }, { "epoch": 1.251332554952028, "grad_norm": 0.43588995933532715, "learning_rate": 1.1657782966986482e-05, "loss": 0.0626, "step": 24650 }, { "epoch": 1.2515863749428906, "grad_norm": 0.5347614884376526, "learning_rate": 1.1656090833714065e-05, "loss": 0.0614, "step": 24655 }, { "epoch": 1.251840194933753, "grad_norm": 0.40371572971343994, "learning_rate": 1.1654398700441647e-05, "loss": 0.0654, "step": 24660 }, { "epoch": 1.2520940149246154, "grad_norm": 0.3678436875343323, "learning_rate": 1.1652706567169232e-05, "loss": 0.05, "step": 24665 }, { "epoch": 1.252347834915478, "grad_norm": 0.4183593988418579, "learning_rate": 1.1651014433896814e-05, "loss": 0.06, "step": 24670 }, { "epoch": 1.2526016549063403, "grad_norm": 0.29606175422668457, "learning_rate": 1.1649322300624398e-05, "loss": 0.0531, "step": 24675 }, { "epoch": 1.252855474897203, "grad_norm": 0.3883073031902313, "learning_rate": 1.1647630167351983e-05, "loss": 0.0563, "step": 24680 }, { "epoch": 1.2531092948880653, "grad_norm": 0.326962947845459, "learning_rate": 1.1645938034079565e-05, "loss": 0.0499, "step": 24685 }, { "epoch": 1.253363114878928, "grad_norm": 0.3533216416835785, "learning_rate": 1.164424590080715e-05, "loss": 0.0468, "step": 24690 }, { "epoch": 1.2536169348697903, "grad_norm": 0.43244466185569763, "learning_rate": 1.1642553767534732e-05, "loss": 0.0548, "step": 24695 }, { "epoch": 1.2538707548606527, "grad_norm": 0.4531016945838928, "learning_rate": 1.1640861634262315e-05, "loss": 0.0518, "step": 24700 }, { "epoch": 1.2541245748515153, "grad_norm": 0.27352747321128845, "learning_rate": 1.16391695009899e-05, "loss": 0.056, "step": 24705 }, { "epoch": 1.254378394842378, "grad_norm": 0.29257744550704956, "learning_rate": 1.1637477367717482e-05, "loss": 0.0498, "step": 24710 }, { "epoch": 1.2546322148332403, "grad_norm": 0.4877859950065613, "learning_rate": 1.1635785234445064e-05, "loss": 0.0525, "step": 24715 }, { "epoch": 1.2548860348241027, "grad_norm": 0.3827455937862396, "learning_rate": 1.163409310117265e-05, "loss": 0.0478, "step": 24720 }, { "epoch": 1.2551398548149653, "grad_norm": 0.6896519660949707, "learning_rate": 1.1632400967900233e-05, "loss": 0.0474, "step": 24725 }, { "epoch": 1.2553936748058276, "grad_norm": 0.4330749809741974, "learning_rate": 1.1630708834627815e-05, "loss": 0.051, "step": 24730 }, { "epoch": 1.2556474947966902, "grad_norm": 0.41606810688972473, "learning_rate": 1.16290167013554e-05, "loss": 0.0599, "step": 24735 }, { "epoch": 1.2559013147875526, "grad_norm": 0.37970244884490967, "learning_rate": 1.1627324568082982e-05, "loss": 0.0424, "step": 24740 }, { "epoch": 1.2561551347784152, "grad_norm": 0.29445701837539673, "learning_rate": 1.1625632434810567e-05, "loss": 0.0461, "step": 24745 }, { "epoch": 1.2564089547692776, "grad_norm": 0.24456225335597992, "learning_rate": 1.162394030153815e-05, "loss": 0.0462, "step": 24750 }, { "epoch": 1.25666277476014, "grad_norm": 0.2942696213722229, "learning_rate": 1.1622248168265733e-05, "loss": 0.0548, "step": 24755 }, { "epoch": 1.2569165947510026, "grad_norm": 0.28085386753082275, "learning_rate": 1.1620556034993318e-05, "loss": 0.0497, "step": 24760 }, { "epoch": 1.2571704147418652, "grad_norm": 0.37461450695991516, "learning_rate": 1.16188639017209e-05, "loss": 0.0485, "step": 24765 }, { "epoch": 1.2574242347327276, "grad_norm": 0.42366278171539307, "learning_rate": 1.1617171768448483e-05, "loss": 0.0472, "step": 24770 }, { "epoch": 1.25767805472359, "grad_norm": 0.3100433945655823, "learning_rate": 1.1615479635176068e-05, "loss": 0.0529, "step": 24775 }, { "epoch": 1.2579318747144526, "grad_norm": 0.32511138916015625, "learning_rate": 1.161378750190365e-05, "loss": 0.0549, "step": 24780 }, { "epoch": 1.258185694705315, "grad_norm": 0.4048891067504883, "learning_rate": 1.1612095368631236e-05, "loss": 0.0555, "step": 24785 }, { "epoch": 1.2584395146961775, "grad_norm": 0.49252721667289734, "learning_rate": 1.1610403235358817e-05, "loss": 0.0568, "step": 24790 }, { "epoch": 1.25869333468704, "grad_norm": 0.3089674413204193, "learning_rate": 1.1608711102086401e-05, "loss": 0.0545, "step": 24795 }, { "epoch": 1.2589471546779025, "grad_norm": 0.4839104115962982, "learning_rate": 1.1607018968813986e-05, "loss": 0.0586, "step": 24800 }, { "epoch": 1.259200974668765, "grad_norm": 0.4469437003135681, "learning_rate": 1.1605326835541568e-05, "loss": 0.0451, "step": 24805 }, { "epoch": 1.2594547946596273, "grad_norm": 0.30239444971084595, "learning_rate": 1.1603634702269152e-05, "loss": 0.0422, "step": 24810 }, { "epoch": 1.2597086146504899, "grad_norm": 0.8179628252983093, "learning_rate": 1.1601942568996735e-05, "loss": 0.0497, "step": 24815 }, { "epoch": 1.2599624346413525, "grad_norm": 0.5173925161361694, "learning_rate": 1.1600250435724319e-05, "loss": 0.0557, "step": 24820 }, { "epoch": 1.2602162546322149, "grad_norm": 0.35866352915763855, "learning_rate": 1.15985583024519e-05, "loss": 0.051, "step": 24825 }, { "epoch": 1.2604700746230773, "grad_norm": 0.4166609048843384, "learning_rate": 1.1596866169179486e-05, "loss": 0.0497, "step": 24830 }, { "epoch": 1.2607238946139399, "grad_norm": 0.31274521350860596, "learning_rate": 1.159517403590707e-05, "loss": 0.0548, "step": 24835 }, { "epoch": 1.2609777146048022, "grad_norm": 0.39399391412734985, "learning_rate": 1.1593481902634653e-05, "loss": 0.0495, "step": 24840 }, { "epoch": 1.2612315345956646, "grad_norm": 0.2745342552661896, "learning_rate": 1.1591789769362236e-05, "loss": 0.0474, "step": 24845 }, { "epoch": 1.2614853545865272, "grad_norm": 0.4054584801197052, "learning_rate": 1.1590097636089818e-05, "loss": 0.0537, "step": 24850 }, { "epoch": 1.2617391745773898, "grad_norm": 0.332953542470932, "learning_rate": 1.1588405502817403e-05, "loss": 0.0501, "step": 24855 }, { "epoch": 1.2619929945682522, "grad_norm": 0.4233452379703522, "learning_rate": 1.1586713369544987e-05, "loss": 0.0566, "step": 24860 }, { "epoch": 1.2622468145591146, "grad_norm": 0.2981891334056854, "learning_rate": 1.1585021236272569e-05, "loss": 0.0534, "step": 24865 }, { "epoch": 1.2625006345499772, "grad_norm": 0.2727615535259247, "learning_rate": 1.1583329103000154e-05, "loss": 0.0458, "step": 24870 }, { "epoch": 1.2627544545408396, "grad_norm": 1.0050932168960571, "learning_rate": 1.1581636969727736e-05, "loss": 0.0501, "step": 24875 }, { "epoch": 1.2630082745317022, "grad_norm": 0.5648157000541687, "learning_rate": 1.157994483645532e-05, "loss": 0.051, "step": 24880 }, { "epoch": 1.2632620945225645, "grad_norm": 0.35903069376945496, "learning_rate": 1.1578252703182905e-05, "loss": 0.0504, "step": 24885 }, { "epoch": 1.2635159145134272, "grad_norm": 0.42427051067352295, "learning_rate": 1.1576560569910487e-05, "loss": 0.0519, "step": 24890 }, { "epoch": 1.2637697345042895, "grad_norm": 0.34407487511634827, "learning_rate": 1.1574868436638072e-05, "loss": 0.0614, "step": 24895 }, { "epoch": 1.264023554495152, "grad_norm": 0.34046033024787903, "learning_rate": 1.1573176303365654e-05, "loss": 0.0437, "step": 24900 }, { "epoch": 1.2642773744860145, "grad_norm": 0.3334759771823883, "learning_rate": 1.1571484170093237e-05, "loss": 0.0474, "step": 24905 }, { "epoch": 1.2645311944768771, "grad_norm": 0.2973962128162384, "learning_rate": 1.1569792036820822e-05, "loss": 0.0466, "step": 24910 }, { "epoch": 1.2647850144677395, "grad_norm": 0.4161064624786377, "learning_rate": 1.1568099903548404e-05, "loss": 0.0495, "step": 24915 }, { "epoch": 1.2650388344586019, "grad_norm": 0.4790429174900055, "learning_rate": 1.1566407770275986e-05, "loss": 0.0569, "step": 24920 }, { "epoch": 1.2652926544494645, "grad_norm": 0.340096116065979, "learning_rate": 1.1564715637003571e-05, "loss": 0.056, "step": 24925 }, { "epoch": 1.2655464744403269, "grad_norm": 0.3699130713939667, "learning_rate": 1.1563023503731155e-05, "loss": 0.0506, "step": 24930 }, { "epoch": 1.2658002944311895, "grad_norm": 0.33714622259140015, "learning_rate": 1.156133137045874e-05, "loss": 0.05, "step": 24935 }, { "epoch": 1.2660541144220518, "grad_norm": 0.35506245493888855, "learning_rate": 1.1559639237186322e-05, "loss": 0.0555, "step": 24940 }, { "epoch": 1.2663079344129144, "grad_norm": 0.2794026732444763, "learning_rate": 1.1557947103913904e-05, "loss": 0.0554, "step": 24945 }, { "epoch": 1.2665617544037768, "grad_norm": 0.28179222345352173, "learning_rate": 1.1556254970641489e-05, "loss": 0.0469, "step": 24950 }, { "epoch": 1.2668155743946392, "grad_norm": 0.31685301661491394, "learning_rate": 1.1554562837369073e-05, "loss": 0.0568, "step": 24955 }, { "epoch": 1.2670693943855018, "grad_norm": 0.38221171498298645, "learning_rate": 1.1552870704096654e-05, "loss": 0.0509, "step": 24960 }, { "epoch": 1.2673232143763644, "grad_norm": 0.38538140058517456, "learning_rate": 1.155117857082424e-05, "loss": 0.059, "step": 24965 }, { "epoch": 1.2675770343672268, "grad_norm": 0.5618222951889038, "learning_rate": 1.1549486437551822e-05, "loss": 0.0545, "step": 24970 }, { "epoch": 1.2678308543580892, "grad_norm": 0.3329094648361206, "learning_rate": 1.1547794304279405e-05, "loss": 0.0567, "step": 24975 }, { "epoch": 1.2680846743489518, "grad_norm": 0.3512543737888336, "learning_rate": 1.154610217100699e-05, "loss": 0.0565, "step": 24980 }, { "epoch": 1.2683384943398142, "grad_norm": 0.38060635328292847, "learning_rate": 1.1544410037734572e-05, "loss": 0.0436, "step": 24985 }, { "epoch": 1.2685923143306768, "grad_norm": 0.3356670141220093, "learning_rate": 1.1542717904462157e-05, "loss": 0.0486, "step": 24990 }, { "epoch": 1.2688461343215391, "grad_norm": 0.3373483717441559, "learning_rate": 1.154102577118974e-05, "loss": 0.0518, "step": 24995 }, { "epoch": 1.2690999543124017, "grad_norm": 0.351968377828598, "learning_rate": 1.1539333637917323e-05, "loss": 0.0479, "step": 25000 }, { "epoch": 1.2693537743032641, "grad_norm": 0.4267289936542511, "learning_rate": 1.1537641504644908e-05, "loss": 0.0542, "step": 25005 }, { "epoch": 1.2696075942941265, "grad_norm": 1.2102017402648926, "learning_rate": 1.153594937137249e-05, "loss": 0.0677, "step": 25010 }, { "epoch": 1.269861414284989, "grad_norm": 0.6700267195701599, "learning_rate": 1.1534257238100073e-05, "loss": 0.055, "step": 25015 }, { "epoch": 1.2701152342758515, "grad_norm": 0.2946222126483917, "learning_rate": 1.1532565104827657e-05, "loss": 0.0531, "step": 25020 }, { "epoch": 1.270369054266714, "grad_norm": 0.25673529505729675, "learning_rate": 1.153087297155524e-05, "loss": 0.0568, "step": 25025 }, { "epoch": 1.2706228742575765, "grad_norm": 0.4724583923816681, "learning_rate": 1.1529180838282826e-05, "loss": 0.0508, "step": 25030 }, { "epoch": 1.270876694248439, "grad_norm": 0.36126697063446045, "learning_rate": 1.1527488705010408e-05, "loss": 0.0493, "step": 25035 }, { "epoch": 1.2711305142393015, "grad_norm": 0.43014219403266907, "learning_rate": 1.1525796571737991e-05, "loss": 0.0444, "step": 25040 }, { "epoch": 1.2713843342301638, "grad_norm": 0.4349735677242279, "learning_rate": 1.1524104438465575e-05, "loss": 0.053, "step": 25045 }, { "epoch": 1.2716381542210264, "grad_norm": 0.42500919103622437, "learning_rate": 1.1522412305193158e-05, "loss": 0.0484, "step": 25050 }, { "epoch": 1.271891974211889, "grad_norm": 0.38369154930114746, "learning_rate": 1.152072017192074e-05, "loss": 0.0522, "step": 25055 }, { "epoch": 1.2721457942027514, "grad_norm": 0.4491569697856903, "learning_rate": 1.1519028038648325e-05, "loss": 0.0453, "step": 25060 }, { "epoch": 1.2723996141936138, "grad_norm": 0.3386807441711426, "learning_rate": 1.1517335905375909e-05, "loss": 0.0487, "step": 25065 }, { "epoch": 1.2726534341844764, "grad_norm": 0.26542624831199646, "learning_rate": 1.151564377210349e-05, "loss": 0.0479, "step": 25070 }, { "epoch": 1.2729072541753388, "grad_norm": 0.33871063590049744, "learning_rate": 1.1513951638831076e-05, "loss": 0.0567, "step": 25075 }, { "epoch": 1.2731610741662014, "grad_norm": 0.46228867769241333, "learning_rate": 1.1512259505558658e-05, "loss": 0.0491, "step": 25080 }, { "epoch": 1.2734148941570638, "grad_norm": 0.6303410530090332, "learning_rate": 1.1510567372286243e-05, "loss": 0.05, "step": 25085 }, { "epoch": 1.2736687141479264, "grad_norm": 0.31501638889312744, "learning_rate": 1.1508875239013827e-05, "loss": 0.0489, "step": 25090 }, { "epoch": 1.2739225341387888, "grad_norm": 0.36004316806793213, "learning_rate": 1.1507183105741409e-05, "loss": 0.0551, "step": 25095 }, { "epoch": 1.2741763541296511, "grad_norm": 0.8616158366203308, "learning_rate": 1.1505490972468994e-05, "loss": 0.0569, "step": 25100 }, { "epoch": 1.2744301741205137, "grad_norm": 0.33611050248146057, "learning_rate": 1.1503798839196576e-05, "loss": 0.0469, "step": 25105 }, { "epoch": 1.2746839941113763, "grad_norm": 0.41049808263778687, "learning_rate": 1.1502106705924159e-05, "loss": 0.0562, "step": 25110 }, { "epoch": 1.2749378141022387, "grad_norm": 0.2955979108810425, "learning_rate": 1.1500414572651744e-05, "loss": 0.0497, "step": 25115 }, { "epoch": 1.275191634093101, "grad_norm": 0.44358739256858826, "learning_rate": 1.1498722439379326e-05, "loss": 0.0515, "step": 25120 }, { "epoch": 1.2754454540839637, "grad_norm": 0.374898761510849, "learning_rate": 1.1497030306106908e-05, "loss": 0.0522, "step": 25125 }, { "epoch": 1.275699274074826, "grad_norm": 0.30691927671432495, "learning_rate": 1.1495338172834493e-05, "loss": 0.0479, "step": 25130 }, { "epoch": 1.2759530940656887, "grad_norm": 0.34827449917793274, "learning_rate": 1.1493646039562077e-05, "loss": 0.0551, "step": 25135 }, { "epoch": 1.276206914056551, "grad_norm": 0.37255775928497314, "learning_rate": 1.149195390628966e-05, "loss": 0.0509, "step": 25140 }, { "epoch": 1.2764607340474137, "grad_norm": 0.5028783082962036, "learning_rate": 1.1490261773017244e-05, "loss": 0.0527, "step": 25145 }, { "epoch": 1.276714554038276, "grad_norm": 0.44755929708480835, "learning_rate": 1.1488569639744826e-05, "loss": 0.0497, "step": 25150 }, { "epoch": 1.2769683740291384, "grad_norm": 0.3543397784233093, "learning_rate": 1.1486877506472411e-05, "loss": 0.0502, "step": 25155 }, { "epoch": 1.277222194020001, "grad_norm": 0.4144035577774048, "learning_rate": 1.1485185373199995e-05, "loss": 0.0516, "step": 25160 }, { "epoch": 1.2774760140108634, "grad_norm": 0.3628363311290741, "learning_rate": 1.1483493239927576e-05, "loss": 0.0466, "step": 25165 }, { "epoch": 1.277729834001726, "grad_norm": 0.3269154727458954, "learning_rate": 1.1481801106655162e-05, "loss": 0.0546, "step": 25170 }, { "epoch": 1.2779836539925884, "grad_norm": 0.4602469205856323, "learning_rate": 1.1480108973382744e-05, "loss": 0.0483, "step": 25175 }, { "epoch": 1.278237473983451, "grad_norm": 0.8606670498847961, "learning_rate": 1.1478416840110329e-05, "loss": 0.0661, "step": 25180 }, { "epoch": 1.2784912939743134, "grad_norm": 0.6975868940353394, "learning_rate": 1.1476724706837912e-05, "loss": 0.044, "step": 25185 }, { "epoch": 1.2787451139651758, "grad_norm": 0.2676074802875519, "learning_rate": 1.1475032573565494e-05, "loss": 0.0466, "step": 25190 }, { "epoch": 1.2789989339560384, "grad_norm": 0.22543856501579285, "learning_rate": 1.147334044029308e-05, "loss": 0.0542, "step": 25195 }, { "epoch": 1.279252753946901, "grad_norm": 0.33862537145614624, "learning_rate": 1.1471648307020661e-05, "loss": 0.0604, "step": 25200 }, { "epoch": 1.2795065739377633, "grad_norm": 0.3779035210609436, "learning_rate": 1.1469956173748245e-05, "loss": 0.0522, "step": 25205 }, { "epoch": 1.2797603939286257, "grad_norm": 0.3730185329914093, "learning_rate": 1.146826404047583e-05, "loss": 0.0507, "step": 25210 }, { "epoch": 1.2800142139194883, "grad_norm": 0.404440313577652, "learning_rate": 1.1466571907203412e-05, "loss": 0.0565, "step": 25215 }, { "epoch": 1.2802680339103507, "grad_norm": 0.39377304911613464, "learning_rate": 1.1464879773930995e-05, "loss": 0.0566, "step": 25220 }, { "epoch": 1.2805218539012133, "grad_norm": 0.3067817986011505, "learning_rate": 1.1463187640658579e-05, "loss": 0.051, "step": 25225 }, { "epoch": 1.2807756738920757, "grad_norm": 0.42959004640579224, "learning_rate": 1.1461495507386163e-05, "loss": 0.0592, "step": 25230 }, { "epoch": 1.2810294938829383, "grad_norm": 0.3348968029022217, "learning_rate": 1.1459803374113748e-05, "loss": 0.0459, "step": 25235 }, { "epoch": 1.2812833138738007, "grad_norm": 0.5492569804191589, "learning_rate": 1.145811124084133e-05, "loss": 0.051, "step": 25240 }, { "epoch": 1.281537133864663, "grad_norm": 0.5650330781936646, "learning_rate": 1.1456419107568913e-05, "loss": 0.053, "step": 25245 }, { "epoch": 1.2817909538555257, "grad_norm": 0.33115166425704956, "learning_rate": 1.1454726974296497e-05, "loss": 0.0472, "step": 25250 }, { "epoch": 1.2820447738463883, "grad_norm": 0.6106663346290588, "learning_rate": 1.145303484102408e-05, "loss": 0.0512, "step": 25255 }, { "epoch": 1.2822985938372506, "grad_norm": 0.6498463749885559, "learning_rate": 1.1451342707751662e-05, "loss": 0.0539, "step": 25260 }, { "epoch": 1.282552413828113, "grad_norm": 0.25299450755119324, "learning_rate": 1.1449650574479247e-05, "loss": 0.0481, "step": 25265 }, { "epoch": 1.2828062338189756, "grad_norm": 0.26644986867904663, "learning_rate": 1.1447958441206831e-05, "loss": 0.0586, "step": 25270 }, { "epoch": 1.283060053809838, "grad_norm": 0.3053162097930908, "learning_rate": 1.1446266307934414e-05, "loss": 0.0538, "step": 25275 }, { "epoch": 1.2833138738007006, "grad_norm": 0.28157272934913635, "learning_rate": 1.1444574174661998e-05, "loss": 0.0511, "step": 25280 }, { "epoch": 1.283567693791563, "grad_norm": 0.4689750671386719, "learning_rate": 1.144288204138958e-05, "loss": 0.0464, "step": 25285 }, { "epoch": 1.2838215137824256, "grad_norm": 0.8079620599746704, "learning_rate": 1.1441189908117165e-05, "loss": 0.052, "step": 25290 }, { "epoch": 1.284075333773288, "grad_norm": 0.263162761926651, "learning_rate": 1.1439497774844749e-05, "loss": 0.0522, "step": 25295 }, { "epoch": 1.2843291537641504, "grad_norm": 0.43071693181991577, "learning_rate": 1.143780564157233e-05, "loss": 0.0523, "step": 25300 }, { "epoch": 1.284582973755013, "grad_norm": 0.31495729088783264, "learning_rate": 1.1436113508299916e-05, "loss": 0.0613, "step": 25305 }, { "epoch": 1.2848367937458756, "grad_norm": 0.38278722763061523, "learning_rate": 1.1434421375027498e-05, "loss": 0.0524, "step": 25310 }, { "epoch": 1.285090613736738, "grad_norm": 0.4674856960773468, "learning_rate": 1.1432729241755081e-05, "loss": 0.0469, "step": 25315 }, { "epoch": 1.2853444337276003, "grad_norm": 0.3894411623477936, "learning_rate": 1.1431037108482665e-05, "loss": 0.0493, "step": 25320 }, { "epoch": 1.285598253718463, "grad_norm": 0.3096882104873657, "learning_rate": 1.1429344975210248e-05, "loss": 0.0484, "step": 25325 }, { "epoch": 1.2858520737093253, "grad_norm": 0.40916988253593445, "learning_rate": 1.1427652841937833e-05, "loss": 0.0501, "step": 25330 }, { "epoch": 1.286105893700188, "grad_norm": 0.4035155177116394, "learning_rate": 1.1425960708665415e-05, "loss": 0.0509, "step": 25335 }, { "epoch": 1.2863597136910503, "grad_norm": 0.2631663382053375, "learning_rate": 1.1424268575392999e-05, "loss": 0.0508, "step": 25340 }, { "epoch": 1.286613533681913, "grad_norm": 1.0878431797027588, "learning_rate": 1.1422576442120582e-05, "loss": 0.0365, "step": 25345 }, { "epoch": 1.2868673536727753, "grad_norm": 0.4171302318572998, "learning_rate": 1.1420884308848166e-05, "loss": 0.0441, "step": 25350 }, { "epoch": 1.2871211736636377, "grad_norm": 0.3274853229522705, "learning_rate": 1.1419192175575748e-05, "loss": 0.0445, "step": 25355 }, { "epoch": 1.2873749936545003, "grad_norm": 0.5124243497848511, "learning_rate": 1.1417500042303333e-05, "loss": 0.0465, "step": 25360 }, { "epoch": 1.2876288136453626, "grad_norm": 0.6019737720489502, "learning_rate": 1.1415807909030917e-05, "loss": 0.0537, "step": 25365 }, { "epoch": 1.2878826336362252, "grad_norm": 0.3629150092601776, "learning_rate": 1.1414115775758498e-05, "loss": 0.0481, "step": 25370 }, { "epoch": 1.2881364536270876, "grad_norm": 0.40465471148490906, "learning_rate": 1.1412423642486084e-05, "loss": 0.0542, "step": 25375 }, { "epoch": 1.2883902736179502, "grad_norm": 0.5901461839675903, "learning_rate": 1.1410731509213665e-05, "loss": 0.0501, "step": 25380 }, { "epoch": 1.2886440936088126, "grad_norm": 0.3407045006752014, "learning_rate": 1.140903937594125e-05, "loss": 0.0559, "step": 25385 }, { "epoch": 1.288897913599675, "grad_norm": 0.41215410828590393, "learning_rate": 1.1407347242668834e-05, "loss": 0.0538, "step": 25390 }, { "epoch": 1.2891517335905376, "grad_norm": 0.3115251064300537, "learning_rate": 1.1405655109396416e-05, "loss": 0.0394, "step": 25395 }, { "epoch": 1.2894055535814002, "grad_norm": 0.2772251069545746, "learning_rate": 1.1403962976124001e-05, "loss": 0.0421, "step": 25400 }, { "epoch": 1.2896593735722626, "grad_norm": 0.3756926953792572, "learning_rate": 1.1402270842851583e-05, "loss": 0.053, "step": 25405 }, { "epoch": 1.289913193563125, "grad_norm": 0.5077455639839172, "learning_rate": 1.1400578709579167e-05, "loss": 0.0505, "step": 25410 }, { "epoch": 1.2901670135539876, "grad_norm": 0.2544682025909424, "learning_rate": 1.1398886576306752e-05, "loss": 0.0451, "step": 25415 }, { "epoch": 1.29042083354485, "grad_norm": 0.39733994007110596, "learning_rate": 1.1397194443034334e-05, "loss": 0.0488, "step": 25420 }, { "epoch": 1.2906746535357125, "grad_norm": 0.3462377190589905, "learning_rate": 1.1395502309761919e-05, "loss": 0.0463, "step": 25425 }, { "epoch": 1.290928473526575, "grad_norm": 0.3922407031059265, "learning_rate": 1.1393810176489501e-05, "loss": 0.0542, "step": 25430 }, { "epoch": 1.2911822935174375, "grad_norm": 0.3106277883052826, "learning_rate": 1.1392118043217084e-05, "loss": 0.0574, "step": 25435 }, { "epoch": 1.2914361135083, "grad_norm": 0.40110254287719727, "learning_rate": 1.139042590994467e-05, "loss": 0.045, "step": 25440 }, { "epoch": 1.2916899334991623, "grad_norm": 0.44177547097206116, "learning_rate": 1.1388733776672252e-05, "loss": 0.061, "step": 25445 }, { "epoch": 1.2919437534900249, "grad_norm": 0.44935235381126404, "learning_rate": 1.1387041643399835e-05, "loss": 0.0483, "step": 25450 }, { "epoch": 1.2921975734808875, "grad_norm": 0.32007691264152527, "learning_rate": 1.1385349510127419e-05, "loss": 0.0477, "step": 25455 }, { "epoch": 1.2924513934717499, "grad_norm": 0.4596714675426483, "learning_rate": 1.1383657376855002e-05, "loss": 0.0447, "step": 25460 }, { "epoch": 1.2927052134626122, "grad_norm": 0.3188910484313965, "learning_rate": 1.1381965243582584e-05, "loss": 0.0502, "step": 25465 }, { "epoch": 1.2929590334534748, "grad_norm": 0.43237102031707764, "learning_rate": 1.138027311031017e-05, "loss": 0.0472, "step": 25470 }, { "epoch": 1.2932128534443372, "grad_norm": 0.2928224205970764, "learning_rate": 1.1378580977037753e-05, "loss": 0.0484, "step": 25475 }, { "epoch": 1.2934666734351998, "grad_norm": 0.2517949044704437, "learning_rate": 1.1376888843765336e-05, "loss": 0.0438, "step": 25480 }, { "epoch": 1.2937204934260622, "grad_norm": 0.3528852164745331, "learning_rate": 1.137519671049292e-05, "loss": 0.0468, "step": 25485 }, { "epoch": 1.2939743134169248, "grad_norm": 0.2391764372587204, "learning_rate": 1.1373504577220502e-05, "loss": 0.0477, "step": 25490 }, { "epoch": 1.2942281334077872, "grad_norm": 0.2708483040332794, "learning_rate": 1.1371812443948087e-05, "loss": 0.0438, "step": 25495 }, { "epoch": 1.2944819533986496, "grad_norm": 0.38952237367630005, "learning_rate": 1.1370120310675669e-05, "loss": 0.0619, "step": 25500 }, { "epoch": 1.2947357733895122, "grad_norm": 0.682826817035675, "learning_rate": 1.1368428177403252e-05, "loss": 0.0586, "step": 25505 }, { "epoch": 1.2949895933803746, "grad_norm": 0.2654576897621155, "learning_rate": 1.1366736044130838e-05, "loss": 0.0527, "step": 25510 }, { "epoch": 1.2952434133712372, "grad_norm": 0.5474327802658081, "learning_rate": 1.136504391085842e-05, "loss": 0.0615, "step": 25515 }, { "epoch": 1.2954972333620995, "grad_norm": 0.263549268245697, "learning_rate": 1.1363351777586003e-05, "loss": 0.0462, "step": 25520 }, { "epoch": 1.2957510533529621, "grad_norm": 0.6336255073547363, "learning_rate": 1.1361659644313587e-05, "loss": 0.053, "step": 25525 }, { "epoch": 1.2960048733438245, "grad_norm": 0.26589730381965637, "learning_rate": 1.135996751104117e-05, "loss": 0.0509, "step": 25530 }, { "epoch": 1.296258693334687, "grad_norm": 0.3139997124671936, "learning_rate": 1.1358275377768755e-05, "loss": 0.057, "step": 25535 }, { "epoch": 1.2965125133255495, "grad_norm": 0.40500542521476746, "learning_rate": 1.1356583244496337e-05, "loss": 0.0496, "step": 25540 }, { "epoch": 1.2967663333164121, "grad_norm": 0.3901219367980957, "learning_rate": 1.135489111122392e-05, "loss": 0.0513, "step": 25545 }, { "epoch": 1.2970201533072745, "grad_norm": 0.22926051914691925, "learning_rate": 1.1353198977951504e-05, "loss": 0.0445, "step": 25550 }, { "epoch": 1.2972739732981369, "grad_norm": 0.3791753351688385, "learning_rate": 1.1351506844679088e-05, "loss": 0.0644, "step": 25555 }, { "epoch": 1.2975277932889995, "grad_norm": 0.7541919350624084, "learning_rate": 1.134981471140667e-05, "loss": 0.0392, "step": 25560 }, { "epoch": 1.2977816132798619, "grad_norm": 0.4487682580947876, "learning_rate": 1.1348122578134255e-05, "loss": 0.0476, "step": 25565 }, { "epoch": 1.2980354332707245, "grad_norm": 0.4413999021053314, "learning_rate": 1.1346430444861838e-05, "loss": 0.0533, "step": 25570 }, { "epoch": 1.2982892532615868, "grad_norm": 0.29243093729019165, "learning_rate": 1.1344738311589422e-05, "loss": 0.052, "step": 25575 }, { "epoch": 1.2985430732524494, "grad_norm": 0.36786124110221863, "learning_rate": 1.1343046178317006e-05, "loss": 0.0542, "step": 25580 }, { "epoch": 1.2987968932433118, "grad_norm": 0.6370408535003662, "learning_rate": 1.1341354045044587e-05, "loss": 0.0532, "step": 25585 }, { "epoch": 1.2990507132341742, "grad_norm": 0.6985453367233276, "learning_rate": 1.1339661911772173e-05, "loss": 0.0422, "step": 25590 }, { "epoch": 1.2993045332250368, "grad_norm": 0.9178361296653748, "learning_rate": 1.1337969778499756e-05, "loss": 0.0451, "step": 25595 }, { "epoch": 1.2995583532158994, "grad_norm": 0.5521905422210693, "learning_rate": 1.1336277645227338e-05, "loss": 0.0528, "step": 25600 }, { "epoch": 1.2998121732067618, "grad_norm": 0.33282098174095154, "learning_rate": 1.1334585511954923e-05, "loss": 0.0559, "step": 25605 }, { "epoch": 1.3000659931976242, "grad_norm": 0.4263562262058258, "learning_rate": 1.1332893378682505e-05, "loss": 0.0561, "step": 25610 }, { "epoch": 1.3003198131884868, "grad_norm": 0.5796880722045898, "learning_rate": 1.1331201245410089e-05, "loss": 0.049, "step": 25615 }, { "epoch": 1.3005736331793492, "grad_norm": 0.2884933650493622, "learning_rate": 1.1329509112137674e-05, "loss": 0.0465, "step": 25620 }, { "epoch": 1.3008274531702118, "grad_norm": 0.3866657018661499, "learning_rate": 1.1327816978865256e-05, "loss": 0.0595, "step": 25625 }, { "epoch": 1.3010812731610741, "grad_norm": 0.5192033052444458, "learning_rate": 1.1326124845592841e-05, "loss": 0.0481, "step": 25630 }, { "epoch": 1.3013350931519367, "grad_norm": 0.7006744146347046, "learning_rate": 1.1324432712320423e-05, "loss": 0.0502, "step": 25635 }, { "epoch": 1.3015889131427991, "grad_norm": 0.3354194760322571, "learning_rate": 1.1322740579048006e-05, "loss": 0.0528, "step": 25640 }, { "epoch": 1.3018427331336615, "grad_norm": 0.3940652310848236, "learning_rate": 1.1321048445775592e-05, "loss": 0.0429, "step": 25645 }, { "epoch": 1.302096553124524, "grad_norm": 0.3817504942417145, "learning_rate": 1.1319356312503173e-05, "loss": 0.0487, "step": 25650 }, { "epoch": 1.3023503731153867, "grad_norm": 0.35483697056770325, "learning_rate": 1.1317664179230757e-05, "loss": 0.0546, "step": 25655 }, { "epoch": 1.302604193106249, "grad_norm": 0.48677176237106323, "learning_rate": 1.131597204595834e-05, "loss": 0.0511, "step": 25660 }, { "epoch": 1.3028580130971115, "grad_norm": 0.2799428701400757, "learning_rate": 1.1314279912685924e-05, "loss": 0.0508, "step": 25665 }, { "epoch": 1.303111833087974, "grad_norm": 0.3958439528942108, "learning_rate": 1.131258777941351e-05, "loss": 0.0571, "step": 25670 }, { "epoch": 1.3033656530788365, "grad_norm": 0.3991915285587311, "learning_rate": 1.1310895646141091e-05, "loss": 0.0592, "step": 25675 }, { "epoch": 1.3036194730696988, "grad_norm": 0.541405200958252, "learning_rate": 1.1309203512868675e-05, "loss": 0.0509, "step": 25680 }, { "epoch": 1.3038732930605614, "grad_norm": 0.4154348075389862, "learning_rate": 1.1307511379596258e-05, "loss": 0.0466, "step": 25685 }, { "epoch": 1.304127113051424, "grad_norm": 0.37647801637649536, "learning_rate": 1.1305819246323842e-05, "loss": 0.0512, "step": 25690 }, { "epoch": 1.3043809330422864, "grad_norm": 0.3786545991897583, "learning_rate": 1.1304127113051424e-05, "loss": 0.045, "step": 25695 }, { "epoch": 1.3046347530331488, "grad_norm": 0.3503737151622772, "learning_rate": 1.1302434979779009e-05, "loss": 0.0596, "step": 25700 }, { "epoch": 1.3048885730240114, "grad_norm": 0.3044377565383911, "learning_rate": 1.130074284650659e-05, "loss": 0.0542, "step": 25705 }, { "epoch": 1.3051423930148738, "grad_norm": 0.37505218386650085, "learning_rate": 1.1299050713234174e-05, "loss": 0.0511, "step": 25710 }, { "epoch": 1.3053962130057364, "grad_norm": 0.5570057034492493, "learning_rate": 1.129735857996176e-05, "loss": 0.0441, "step": 25715 }, { "epoch": 1.3056500329965988, "grad_norm": 0.3466768264770508, "learning_rate": 1.1295666446689341e-05, "loss": 0.0513, "step": 25720 }, { "epoch": 1.3059038529874614, "grad_norm": 0.44723430275917053, "learning_rate": 1.1293974313416927e-05, "loss": 0.0512, "step": 25725 }, { "epoch": 1.3061576729783237, "grad_norm": 0.34250175952911377, "learning_rate": 1.1292282180144508e-05, "loss": 0.0449, "step": 25730 }, { "epoch": 1.3064114929691861, "grad_norm": 0.30145788192749023, "learning_rate": 1.1290590046872092e-05, "loss": 0.0477, "step": 25735 }, { "epoch": 1.3066653129600487, "grad_norm": 0.2720540463924408, "learning_rate": 1.1288897913599677e-05, "loss": 0.0452, "step": 25740 }, { "epoch": 1.3069191329509113, "grad_norm": 0.4038149416446686, "learning_rate": 1.1287205780327259e-05, "loss": 0.0462, "step": 25745 }, { "epoch": 1.3071729529417737, "grad_norm": 0.3033551573753357, "learning_rate": 1.1285513647054843e-05, "loss": 0.046, "step": 25750 }, { "epoch": 1.307426772932636, "grad_norm": 0.48980191349983215, "learning_rate": 1.1283821513782426e-05, "loss": 0.0501, "step": 25755 }, { "epoch": 1.3076805929234987, "grad_norm": 0.382783442735672, "learning_rate": 1.128212938051001e-05, "loss": 0.0501, "step": 25760 }, { "epoch": 1.307934412914361, "grad_norm": 0.3193140923976898, "learning_rate": 1.1280437247237592e-05, "loss": 0.0469, "step": 25765 }, { "epoch": 1.3081882329052237, "grad_norm": 0.4315343499183655, "learning_rate": 1.1278745113965177e-05, "loss": 0.0522, "step": 25770 }, { "epoch": 1.308442052896086, "grad_norm": 0.402089387178421, "learning_rate": 1.127705298069276e-05, "loss": 0.0533, "step": 25775 }, { "epoch": 1.3086958728869487, "grad_norm": 0.3406699299812317, "learning_rate": 1.1275360847420344e-05, "loss": 0.0467, "step": 25780 }, { "epoch": 1.308949692877811, "grad_norm": 0.48056527972221375, "learning_rate": 1.1273668714147927e-05, "loss": 0.059, "step": 25785 }, { "epoch": 1.3092035128686734, "grad_norm": 0.390354186296463, "learning_rate": 1.127197658087551e-05, "loss": 0.0484, "step": 25790 }, { "epoch": 1.309457332859536, "grad_norm": 0.4609234631061554, "learning_rate": 1.1270284447603095e-05, "loss": 0.0498, "step": 25795 }, { "epoch": 1.3097111528503986, "grad_norm": 0.19950836896896362, "learning_rate": 1.1268592314330678e-05, "loss": 0.0433, "step": 25800 }, { "epoch": 1.309964972841261, "grad_norm": 0.23946751654148102, "learning_rate": 1.126690018105826e-05, "loss": 0.0502, "step": 25805 }, { "epoch": 1.3102187928321234, "grad_norm": 0.37170344591140747, "learning_rate": 1.1265208047785845e-05, "loss": 0.0499, "step": 25810 }, { "epoch": 1.310472612822986, "grad_norm": 0.35320132970809937, "learning_rate": 1.1263515914513427e-05, "loss": 0.0483, "step": 25815 }, { "epoch": 1.3107264328138484, "grad_norm": 0.2944350838661194, "learning_rate": 1.1261823781241012e-05, "loss": 0.05, "step": 25820 }, { "epoch": 1.310980252804711, "grad_norm": 0.6497879028320312, "learning_rate": 1.1260131647968596e-05, "loss": 0.0475, "step": 25825 }, { "epoch": 1.3112340727955734, "grad_norm": 0.42111438512802124, "learning_rate": 1.1258439514696178e-05, "loss": 0.047, "step": 25830 }, { "epoch": 1.311487892786436, "grad_norm": 0.2944899797439575, "learning_rate": 1.1256747381423763e-05, "loss": 0.052, "step": 25835 }, { "epoch": 1.3117417127772983, "grad_norm": 0.3662308156490326, "learning_rate": 1.1255055248151345e-05, "loss": 0.0492, "step": 25840 }, { "epoch": 1.3119955327681607, "grad_norm": 0.40842756628990173, "learning_rate": 1.1253363114878928e-05, "loss": 0.053, "step": 25845 }, { "epoch": 1.3122493527590233, "grad_norm": 0.4699631929397583, "learning_rate": 1.1251670981606514e-05, "loss": 0.0571, "step": 25850 }, { "epoch": 1.3125031727498857, "grad_norm": 0.3852125108242035, "learning_rate": 1.1249978848334095e-05, "loss": 0.0473, "step": 25855 }, { "epoch": 1.3127569927407483, "grad_norm": 0.3017500042915344, "learning_rate": 1.1248286715061679e-05, "loss": 0.0454, "step": 25860 }, { "epoch": 1.3130108127316107, "grad_norm": 0.2818055748939514, "learning_rate": 1.1246594581789263e-05, "loss": 0.0413, "step": 25865 }, { "epoch": 1.3132646327224733, "grad_norm": 0.34530168771743774, "learning_rate": 1.1244902448516846e-05, "loss": 0.0443, "step": 25870 }, { "epoch": 1.3135184527133357, "grad_norm": 0.41481852531433105, "learning_rate": 1.1243210315244431e-05, "loss": 0.0523, "step": 25875 }, { "epoch": 1.313772272704198, "grad_norm": 0.293000727891922, "learning_rate": 1.1241518181972013e-05, "loss": 0.052, "step": 25880 }, { "epoch": 1.3140260926950607, "grad_norm": 0.4565964341163635, "learning_rate": 1.1239826048699595e-05, "loss": 0.0477, "step": 25885 }, { "epoch": 1.3142799126859233, "grad_norm": 0.5154722332954407, "learning_rate": 1.123813391542718e-05, "loss": 0.0455, "step": 25890 }, { "epoch": 1.3145337326767856, "grad_norm": 0.33028629422187805, "learning_rate": 1.1236441782154764e-05, "loss": 0.0489, "step": 25895 }, { "epoch": 1.314787552667648, "grad_norm": 0.3494911789894104, "learning_rate": 1.1234749648882346e-05, "loss": 0.0531, "step": 25900 }, { "epoch": 1.3150413726585106, "grad_norm": 0.46097883582115173, "learning_rate": 1.1233057515609931e-05, "loss": 0.0521, "step": 25905 }, { "epoch": 1.315295192649373, "grad_norm": 0.3439639210700989, "learning_rate": 1.1231365382337513e-05, "loss": 0.04, "step": 25910 }, { "epoch": 1.3155490126402356, "grad_norm": 0.28974398970603943, "learning_rate": 1.1229673249065098e-05, "loss": 0.0522, "step": 25915 }, { "epoch": 1.315802832631098, "grad_norm": 0.35030993819236755, "learning_rate": 1.1227981115792681e-05, "loss": 0.0477, "step": 25920 }, { "epoch": 1.3160566526219606, "grad_norm": 0.3437199294567108, "learning_rate": 1.1226288982520263e-05, "loss": 0.0418, "step": 25925 }, { "epoch": 1.316310472612823, "grad_norm": 0.32538580894470215, "learning_rate": 1.1224596849247849e-05, "loss": 0.0499, "step": 25930 }, { "epoch": 1.3165642926036853, "grad_norm": 1.0620239973068237, "learning_rate": 1.122290471597543e-05, "loss": 0.0524, "step": 25935 }, { "epoch": 1.316818112594548, "grad_norm": 0.3068779706954956, "learning_rate": 1.1221212582703014e-05, "loss": 0.0544, "step": 25940 }, { "epoch": 1.3170719325854106, "grad_norm": 0.39146485924720764, "learning_rate": 1.12195204494306e-05, "loss": 0.0482, "step": 25945 }, { "epoch": 1.317325752576273, "grad_norm": 0.7082789540290833, "learning_rate": 1.1217828316158181e-05, "loss": 0.049, "step": 25950 }, { "epoch": 1.3175795725671353, "grad_norm": 0.27648693323135376, "learning_rate": 1.1216136182885765e-05, "loss": 0.0517, "step": 25955 }, { "epoch": 1.317833392557998, "grad_norm": 0.3340929448604584, "learning_rate": 1.1214444049613348e-05, "loss": 0.0419, "step": 25960 }, { "epoch": 1.3180872125488603, "grad_norm": 0.28633975982666016, "learning_rate": 1.1212751916340932e-05, "loss": 0.0463, "step": 25965 }, { "epoch": 1.318341032539723, "grad_norm": 0.3893069624900818, "learning_rate": 1.1211059783068517e-05, "loss": 0.0547, "step": 25970 }, { "epoch": 1.3185948525305853, "grad_norm": 0.39877814054489136, "learning_rate": 1.1209367649796099e-05, "loss": 0.0544, "step": 25975 }, { "epoch": 1.3188486725214479, "grad_norm": 0.46034538745880127, "learning_rate": 1.1207675516523682e-05, "loss": 0.0521, "step": 25980 }, { "epoch": 1.3191024925123103, "grad_norm": 0.4608680009841919, "learning_rate": 1.1205983383251266e-05, "loss": 0.0545, "step": 25985 }, { "epoch": 1.3193563125031726, "grad_norm": 0.4240970313549042, "learning_rate": 1.120429124997885e-05, "loss": 0.0551, "step": 25990 }, { "epoch": 1.3196101324940352, "grad_norm": 0.2486318200826645, "learning_rate": 1.1202599116706431e-05, "loss": 0.0487, "step": 25995 }, { "epoch": 1.3198639524848976, "grad_norm": 0.3128660321235657, "learning_rate": 1.1200906983434017e-05, "loss": 0.0485, "step": 26000 }, { "epoch": 1.3201177724757602, "grad_norm": 0.36881980299949646, "learning_rate": 1.11992148501616e-05, "loss": 0.0485, "step": 26005 }, { "epoch": 1.3203715924666226, "grad_norm": 0.49103283882141113, "learning_rate": 1.1197522716889182e-05, "loss": 0.0563, "step": 26010 }, { "epoch": 1.3206254124574852, "grad_norm": 0.2944199740886688, "learning_rate": 1.1195830583616767e-05, "loss": 0.0502, "step": 26015 }, { "epoch": 1.3208792324483476, "grad_norm": 0.29078707098960876, "learning_rate": 1.1194138450344349e-05, "loss": 0.0485, "step": 26020 }, { "epoch": 1.32113305243921, "grad_norm": 0.5979177355766296, "learning_rate": 1.1192446317071934e-05, "loss": 0.0465, "step": 26025 }, { "epoch": 1.3213868724300726, "grad_norm": 0.6528639197349548, "learning_rate": 1.1190754183799518e-05, "loss": 0.0519, "step": 26030 }, { "epoch": 1.3216406924209352, "grad_norm": 0.44926077127456665, "learning_rate": 1.11890620505271e-05, "loss": 0.0522, "step": 26035 }, { "epoch": 1.3218945124117976, "grad_norm": 0.4386170506477356, "learning_rate": 1.1187369917254685e-05, "loss": 0.0482, "step": 26040 }, { "epoch": 1.32214833240266, "grad_norm": 0.3527389168739319, "learning_rate": 1.1185677783982267e-05, "loss": 0.0573, "step": 26045 }, { "epoch": 1.3224021523935225, "grad_norm": 0.24854600429534912, "learning_rate": 1.118398565070985e-05, "loss": 0.0373, "step": 26050 }, { "epoch": 1.322655972384385, "grad_norm": 0.45639580488204956, "learning_rate": 1.1182293517437436e-05, "loss": 0.0534, "step": 26055 }, { "epoch": 1.3229097923752475, "grad_norm": 1.2171883583068848, "learning_rate": 1.1180601384165017e-05, "loss": 0.0497, "step": 26060 }, { "epoch": 1.32316361236611, "grad_norm": 0.4451811909675598, "learning_rate": 1.1178909250892603e-05, "loss": 0.0467, "step": 26065 }, { "epoch": 1.3234174323569725, "grad_norm": 0.2924448251724243, "learning_rate": 1.1177217117620184e-05, "loss": 0.062, "step": 26070 }, { "epoch": 1.323671252347835, "grad_norm": 0.2893722355365753, "learning_rate": 1.1175524984347768e-05, "loss": 0.046, "step": 26075 }, { "epoch": 1.3239250723386973, "grad_norm": 0.21968765556812286, "learning_rate": 1.1173832851075353e-05, "loss": 0.0409, "step": 26080 }, { "epoch": 1.3241788923295599, "grad_norm": 0.4578390419483185, "learning_rate": 1.1172140717802935e-05, "loss": 0.049, "step": 26085 }, { "epoch": 1.3244327123204225, "grad_norm": 0.41233834624290466, "learning_rate": 1.1170448584530517e-05, "loss": 0.0499, "step": 26090 }, { "epoch": 1.3246865323112849, "grad_norm": 0.5314094424247742, "learning_rate": 1.1168756451258102e-05, "loss": 0.0514, "step": 26095 }, { "epoch": 1.3249403523021472, "grad_norm": 0.3177417814731598, "learning_rate": 1.1167064317985686e-05, "loss": 0.0518, "step": 26100 }, { "epoch": 1.3251941722930098, "grad_norm": 0.321321040391922, "learning_rate": 1.1165372184713268e-05, "loss": 0.0467, "step": 26105 }, { "epoch": 1.3254479922838722, "grad_norm": 0.5315613150596619, "learning_rate": 1.1163680051440853e-05, "loss": 0.0553, "step": 26110 }, { "epoch": 1.3257018122747348, "grad_norm": 0.4163055717945099, "learning_rate": 1.1161987918168435e-05, "loss": 0.0518, "step": 26115 }, { "epoch": 1.3259556322655972, "grad_norm": 0.5081636905670166, "learning_rate": 1.116029578489602e-05, "loss": 0.0479, "step": 26120 }, { "epoch": 1.3262094522564598, "grad_norm": 0.37250816822052, "learning_rate": 1.1158603651623603e-05, "loss": 0.052, "step": 26125 }, { "epoch": 1.3264632722473222, "grad_norm": 0.4670708477497101, "learning_rate": 1.1156911518351185e-05, "loss": 0.059, "step": 26130 }, { "epoch": 1.3267170922381846, "grad_norm": 0.28547587990760803, "learning_rate": 1.115521938507877e-05, "loss": 0.0557, "step": 26135 }, { "epoch": 1.3269709122290472, "grad_norm": 0.3063032627105713, "learning_rate": 1.1153527251806352e-05, "loss": 0.0508, "step": 26140 }, { "epoch": 1.3272247322199098, "grad_norm": 0.2864580452442169, "learning_rate": 1.1151835118533936e-05, "loss": 0.0552, "step": 26145 }, { "epoch": 1.3274785522107722, "grad_norm": 0.32648247480392456, "learning_rate": 1.1150142985261521e-05, "loss": 0.0478, "step": 26150 }, { "epoch": 1.3277323722016345, "grad_norm": 0.482799768447876, "learning_rate": 1.1148450851989103e-05, "loss": 0.0559, "step": 26155 }, { "epoch": 1.3279861921924971, "grad_norm": 0.3012193739414215, "learning_rate": 1.1146758718716688e-05, "loss": 0.0579, "step": 26160 }, { "epoch": 1.3282400121833595, "grad_norm": 0.332803338766098, "learning_rate": 1.114506658544427e-05, "loss": 0.0492, "step": 26165 }, { "epoch": 1.3284938321742221, "grad_norm": 0.46283581852912903, "learning_rate": 1.1143374452171854e-05, "loss": 0.0457, "step": 26170 }, { "epoch": 1.3287476521650845, "grad_norm": 0.39792996644973755, "learning_rate": 1.1141682318899439e-05, "loss": 0.0524, "step": 26175 }, { "epoch": 1.329001472155947, "grad_norm": 0.4443744122982025, "learning_rate": 1.113999018562702e-05, "loss": 0.0535, "step": 26180 }, { "epoch": 1.3292552921468095, "grad_norm": 0.3356640934944153, "learning_rate": 1.1138298052354604e-05, "loss": 0.0464, "step": 26185 }, { "epoch": 1.3295091121376719, "grad_norm": 0.4023672640323639, "learning_rate": 1.1136605919082188e-05, "loss": 0.0567, "step": 26190 }, { "epoch": 1.3297629321285345, "grad_norm": 0.43528980016708374, "learning_rate": 1.1134913785809771e-05, "loss": 0.0532, "step": 26195 }, { "epoch": 1.3300167521193969, "grad_norm": 0.7247572541236877, "learning_rate": 1.1133221652537353e-05, "loss": 0.052, "step": 26200 }, { "epoch": 1.3302705721102595, "grad_norm": 0.48162537813186646, "learning_rate": 1.1131529519264938e-05, "loss": 0.0607, "step": 26205 }, { "epoch": 1.3305243921011218, "grad_norm": 0.42504459619522095, "learning_rate": 1.1129837385992522e-05, "loss": 0.0472, "step": 26210 }, { "epoch": 1.3307782120919844, "grad_norm": 0.3819892108440399, "learning_rate": 1.1128145252720106e-05, "loss": 0.0466, "step": 26215 }, { "epoch": 1.3310320320828468, "grad_norm": 0.505711019039154, "learning_rate": 1.1126453119447689e-05, "loss": 0.0595, "step": 26220 }, { "epoch": 1.3312858520737092, "grad_norm": 0.44940781593322754, "learning_rate": 1.1124760986175271e-05, "loss": 0.0543, "step": 26225 }, { "epoch": 1.3315396720645718, "grad_norm": 0.8618845343589783, "learning_rate": 1.1123068852902856e-05, "loss": 0.0493, "step": 26230 }, { "epoch": 1.3317934920554344, "grad_norm": 0.5297338962554932, "learning_rate": 1.112137671963044e-05, "loss": 0.0579, "step": 26235 }, { "epoch": 1.3320473120462968, "grad_norm": 0.362981379032135, "learning_rate": 1.1119684586358022e-05, "loss": 0.0471, "step": 26240 }, { "epoch": 1.3323011320371592, "grad_norm": 0.3708239495754242, "learning_rate": 1.1117992453085607e-05, "loss": 0.051, "step": 26245 }, { "epoch": 1.3325549520280218, "grad_norm": 0.6170402765274048, "learning_rate": 1.1116300319813189e-05, "loss": 0.0552, "step": 26250 }, { "epoch": 1.3328087720188841, "grad_norm": 0.3187443017959595, "learning_rate": 1.1114608186540772e-05, "loss": 0.0465, "step": 26255 }, { "epoch": 1.3330625920097467, "grad_norm": 0.42820754647254944, "learning_rate": 1.1112916053268357e-05, "loss": 0.0607, "step": 26260 }, { "epoch": 1.3333164120006091, "grad_norm": 0.3025449812412262, "learning_rate": 1.111122391999594e-05, "loss": 0.0456, "step": 26265 }, { "epoch": 1.3335702319914717, "grad_norm": 0.40699583292007446, "learning_rate": 1.1109531786723525e-05, "loss": 0.0461, "step": 26270 }, { "epoch": 1.3338240519823341, "grad_norm": 0.3597828447818756, "learning_rate": 1.1107839653451106e-05, "loss": 0.0546, "step": 26275 }, { "epoch": 1.3340778719731965, "grad_norm": 0.2918253540992737, "learning_rate": 1.110614752017869e-05, "loss": 0.0478, "step": 26280 }, { "epoch": 1.334331691964059, "grad_norm": 0.36164140701293945, "learning_rate": 1.1104455386906275e-05, "loss": 0.0475, "step": 26285 }, { "epoch": 1.3345855119549217, "grad_norm": 0.3688763976097107, "learning_rate": 1.1102763253633857e-05, "loss": 0.0578, "step": 26290 }, { "epoch": 1.334839331945784, "grad_norm": 0.37201929092407227, "learning_rate": 1.1101071120361439e-05, "loss": 0.0551, "step": 26295 }, { "epoch": 1.3350931519366465, "grad_norm": 0.2655457854270935, "learning_rate": 1.1099378987089024e-05, "loss": 0.0497, "step": 26300 }, { "epoch": 1.335346971927509, "grad_norm": 0.37188592553138733, "learning_rate": 1.1097686853816608e-05, "loss": 0.0532, "step": 26305 }, { "epoch": 1.3356007919183714, "grad_norm": 0.3306806683540344, "learning_rate": 1.1095994720544191e-05, "loss": 0.0599, "step": 26310 }, { "epoch": 1.335854611909234, "grad_norm": 0.3516598045825958, "learning_rate": 1.1094302587271775e-05, "loss": 0.0461, "step": 26315 }, { "epoch": 1.3361084319000964, "grad_norm": 0.2789323031902313, "learning_rate": 1.1092610453999357e-05, "loss": 0.0518, "step": 26320 }, { "epoch": 1.336362251890959, "grad_norm": 0.3232646584510803, "learning_rate": 1.1090918320726942e-05, "loss": 0.0489, "step": 26325 }, { "epoch": 1.3366160718818214, "grad_norm": 1.0127755403518677, "learning_rate": 1.1089226187454525e-05, "loss": 0.0518, "step": 26330 }, { "epoch": 1.3368698918726838, "grad_norm": 0.2616908550262451, "learning_rate": 1.1087534054182107e-05, "loss": 0.0481, "step": 26335 }, { "epoch": 1.3371237118635464, "grad_norm": 0.46473437547683716, "learning_rate": 1.1085841920909692e-05, "loss": 0.053, "step": 26340 }, { "epoch": 1.3373775318544088, "grad_norm": 0.34728309512138367, "learning_rate": 1.1084149787637274e-05, "loss": 0.0489, "step": 26345 }, { "epoch": 1.3376313518452714, "grad_norm": 0.3350659906864166, "learning_rate": 1.1082457654364858e-05, "loss": 0.0474, "step": 26350 }, { "epoch": 1.3378851718361338, "grad_norm": 0.43599680066108704, "learning_rate": 1.1080765521092443e-05, "loss": 0.0467, "step": 26355 }, { "epoch": 1.3381389918269964, "grad_norm": 0.42427605390548706, "learning_rate": 1.1079073387820025e-05, "loss": 0.0498, "step": 26360 }, { "epoch": 1.3383928118178587, "grad_norm": 0.4834648668766022, "learning_rate": 1.107738125454761e-05, "loss": 0.0596, "step": 26365 }, { "epoch": 1.3386466318087211, "grad_norm": 0.2929410934448242, "learning_rate": 1.1075689121275192e-05, "loss": 0.0538, "step": 26370 }, { "epoch": 1.3389004517995837, "grad_norm": 0.578696072101593, "learning_rate": 1.1073996988002776e-05, "loss": 0.0525, "step": 26375 }, { "epoch": 1.3391542717904463, "grad_norm": 0.4364106059074402, "learning_rate": 1.107230485473036e-05, "loss": 0.0528, "step": 26380 }, { "epoch": 1.3394080917813087, "grad_norm": 0.406345397233963, "learning_rate": 1.1070612721457943e-05, "loss": 0.0466, "step": 26385 }, { "epoch": 1.339661911772171, "grad_norm": 0.4139547049999237, "learning_rate": 1.1068920588185526e-05, "loss": 0.0534, "step": 26390 }, { "epoch": 1.3399157317630337, "grad_norm": 0.28089866042137146, "learning_rate": 1.106722845491311e-05, "loss": 0.0519, "step": 26395 }, { "epoch": 1.340169551753896, "grad_norm": 0.5075027346611023, "learning_rate": 1.1065536321640693e-05, "loss": 0.0598, "step": 26400 }, { "epoch": 1.3404233717447587, "grad_norm": 0.3053726255893707, "learning_rate": 1.1063844188368279e-05, "loss": 0.0403, "step": 26405 }, { "epoch": 1.340677191735621, "grad_norm": 0.48526138067245483, "learning_rate": 1.106215205509586e-05, "loss": 0.0561, "step": 26410 }, { "epoch": 1.3409310117264837, "grad_norm": 0.4186187982559204, "learning_rate": 1.1060459921823444e-05, "loss": 0.0574, "step": 26415 }, { "epoch": 1.341184831717346, "grad_norm": 0.6404673457145691, "learning_rate": 1.1058767788551027e-05, "loss": 0.0542, "step": 26420 }, { "epoch": 1.3414386517082084, "grad_norm": 0.28784051537513733, "learning_rate": 1.1057075655278611e-05, "loss": 0.0543, "step": 26425 }, { "epoch": 1.341692471699071, "grad_norm": 0.48208898305892944, "learning_rate": 1.1055383522006193e-05, "loss": 0.0492, "step": 26430 }, { "epoch": 1.3419462916899336, "grad_norm": 0.405626505613327, "learning_rate": 1.1053691388733778e-05, "loss": 0.0535, "step": 26435 }, { "epoch": 1.342200111680796, "grad_norm": 0.39484503865242004, "learning_rate": 1.1051999255461362e-05, "loss": 0.0552, "step": 26440 }, { "epoch": 1.3424539316716584, "grad_norm": 0.28524714708328247, "learning_rate": 1.1050307122188944e-05, "loss": 0.0544, "step": 26445 }, { "epoch": 1.342707751662521, "grad_norm": 0.27638357877731323, "learning_rate": 1.1048614988916529e-05, "loss": 0.0517, "step": 26450 }, { "epoch": 1.3429615716533834, "grad_norm": 0.6675527095794678, "learning_rate": 1.104692285564411e-05, "loss": 0.0635, "step": 26455 }, { "epoch": 1.343215391644246, "grad_norm": 0.24780261516571045, "learning_rate": 1.1045230722371696e-05, "loss": 0.0421, "step": 26460 }, { "epoch": 1.3434692116351084, "grad_norm": 0.3185769021511078, "learning_rate": 1.104353858909928e-05, "loss": 0.0428, "step": 26465 }, { "epoch": 1.343723031625971, "grad_norm": 0.38865941762924194, "learning_rate": 1.1041846455826861e-05, "loss": 0.0616, "step": 26470 }, { "epoch": 1.3439768516168333, "grad_norm": 0.44646382331848145, "learning_rate": 1.1040154322554446e-05, "loss": 0.0631, "step": 26475 }, { "epoch": 1.3442306716076957, "grad_norm": 0.25871074199676514, "learning_rate": 1.1038462189282028e-05, "loss": 0.0424, "step": 26480 }, { "epoch": 1.3444844915985583, "grad_norm": 0.35550186038017273, "learning_rate": 1.1036770056009612e-05, "loss": 0.0449, "step": 26485 }, { "epoch": 1.344738311589421, "grad_norm": 0.3502696752548218, "learning_rate": 1.1035077922737195e-05, "loss": 0.0565, "step": 26490 }, { "epoch": 1.3449921315802833, "grad_norm": 0.49729862809181213, "learning_rate": 1.1033385789464779e-05, "loss": 0.0475, "step": 26495 }, { "epoch": 1.3452459515711457, "grad_norm": 0.35838475823402405, "learning_rate": 1.103169365619236e-05, "loss": 0.0508, "step": 26500 }, { "epoch": 1.3454997715620083, "grad_norm": 0.34287241101264954, "learning_rate": 1.1030001522919946e-05, "loss": 0.0574, "step": 26505 }, { "epoch": 1.3457535915528707, "grad_norm": 0.26396411657333374, "learning_rate": 1.102830938964753e-05, "loss": 0.0536, "step": 26510 }, { "epoch": 1.3460074115437333, "grad_norm": 0.31700873374938965, "learning_rate": 1.1026617256375113e-05, "loss": 0.0564, "step": 26515 }, { "epoch": 1.3462612315345956, "grad_norm": 0.3169967234134674, "learning_rate": 1.1024925123102697e-05, "loss": 0.0393, "step": 26520 }, { "epoch": 1.3465150515254583, "grad_norm": 0.39078614115715027, "learning_rate": 1.1023232989830279e-05, "loss": 0.0432, "step": 26525 }, { "epoch": 1.3467688715163206, "grad_norm": 0.3944978713989258, "learning_rate": 1.1021540856557864e-05, "loss": 0.0499, "step": 26530 }, { "epoch": 1.347022691507183, "grad_norm": 0.5801070332527161, "learning_rate": 1.1019848723285447e-05, "loss": 0.051, "step": 26535 }, { "epoch": 1.3472765114980456, "grad_norm": 0.5324280261993408, "learning_rate": 1.101815659001303e-05, "loss": 0.0494, "step": 26540 }, { "epoch": 1.347530331488908, "grad_norm": 0.5855595469474792, "learning_rate": 1.1016464456740614e-05, "loss": 0.0562, "step": 26545 }, { "epoch": 1.3477841514797706, "grad_norm": 0.3513774275779724, "learning_rate": 1.1014772323468196e-05, "loss": 0.0463, "step": 26550 }, { "epoch": 1.348037971470633, "grad_norm": 0.4091087877750397, "learning_rate": 1.1013080190195781e-05, "loss": 0.0414, "step": 26555 }, { "epoch": 1.3482917914614956, "grad_norm": 0.27667826414108276, "learning_rate": 1.1011388056923365e-05, "loss": 0.0423, "step": 26560 }, { "epoch": 1.348545611452358, "grad_norm": 0.2812269628047943, "learning_rate": 1.1009695923650947e-05, "loss": 0.0389, "step": 26565 }, { "epoch": 1.3487994314432203, "grad_norm": 0.31701719760894775, "learning_rate": 1.1008003790378532e-05, "loss": 0.052, "step": 26570 }, { "epoch": 1.349053251434083, "grad_norm": 0.24721567332744598, "learning_rate": 1.1006311657106114e-05, "loss": 0.042, "step": 26575 }, { "epoch": 1.3493070714249455, "grad_norm": 0.32528063654899597, "learning_rate": 1.1004619523833698e-05, "loss": 0.0449, "step": 26580 }, { "epoch": 1.349560891415808, "grad_norm": 0.35122808814048767, "learning_rate": 1.1002927390561283e-05, "loss": 0.0546, "step": 26585 }, { "epoch": 1.3498147114066703, "grad_norm": 0.39924734830856323, "learning_rate": 1.1001235257288865e-05, "loss": 0.052, "step": 26590 }, { "epoch": 1.350068531397533, "grad_norm": 0.38985615968704224, "learning_rate": 1.0999543124016448e-05, "loss": 0.0497, "step": 26595 }, { "epoch": 1.3503223513883953, "grad_norm": 0.4624733328819275, "learning_rate": 1.0997850990744032e-05, "loss": 0.041, "step": 26600 }, { "epoch": 1.350576171379258, "grad_norm": 0.2167605310678482, "learning_rate": 1.0996158857471615e-05, "loss": 0.0487, "step": 26605 }, { "epoch": 1.3508299913701203, "grad_norm": 0.47157183289527893, "learning_rate": 1.09944667241992e-05, "loss": 0.0501, "step": 26610 }, { "epoch": 1.3510838113609829, "grad_norm": 0.2828313410282135, "learning_rate": 1.0992774590926782e-05, "loss": 0.0461, "step": 26615 }, { "epoch": 1.3513376313518453, "grad_norm": 0.3092103898525238, "learning_rate": 1.0991082457654366e-05, "loss": 0.0462, "step": 26620 }, { "epoch": 1.3515914513427076, "grad_norm": 1.7850135564804077, "learning_rate": 1.098939032438195e-05, "loss": 0.0522, "step": 26625 }, { "epoch": 1.3518452713335702, "grad_norm": 0.596710205078125, "learning_rate": 1.0987698191109533e-05, "loss": 0.0459, "step": 26630 }, { "epoch": 1.3520990913244328, "grad_norm": 0.5141996145248413, "learning_rate": 1.0986006057837115e-05, "loss": 0.0423, "step": 26635 }, { "epoch": 1.3523529113152952, "grad_norm": 0.24726712703704834, "learning_rate": 1.09843139245647e-05, "loss": 0.0492, "step": 26640 }, { "epoch": 1.3526067313061576, "grad_norm": 0.4391964077949524, "learning_rate": 1.0982621791292284e-05, "loss": 0.0477, "step": 26645 }, { "epoch": 1.3528605512970202, "grad_norm": 0.7240193486213684, "learning_rate": 1.0980929658019865e-05, "loss": 0.0449, "step": 26650 }, { "epoch": 1.3531143712878826, "grad_norm": 0.3740382492542267, "learning_rate": 1.097923752474745e-05, "loss": 0.0519, "step": 26655 }, { "epoch": 1.3533681912787452, "grad_norm": 0.3421389162540436, "learning_rate": 1.0977545391475033e-05, "loss": 0.0401, "step": 26660 }, { "epoch": 1.3536220112696076, "grad_norm": 0.34667646884918213, "learning_rate": 1.0975853258202618e-05, "loss": 0.0411, "step": 26665 }, { "epoch": 1.3538758312604702, "grad_norm": 0.28433099389076233, "learning_rate": 1.09741611249302e-05, "loss": 0.0476, "step": 26670 }, { "epoch": 1.3541296512513326, "grad_norm": 0.41038647294044495, "learning_rate": 1.0972468991657783e-05, "loss": 0.0419, "step": 26675 }, { "epoch": 1.354383471242195, "grad_norm": 0.3607024550437927, "learning_rate": 1.0970776858385368e-05, "loss": 0.0516, "step": 26680 }, { "epoch": 1.3546372912330575, "grad_norm": 0.2682955265045166, "learning_rate": 1.096908472511295e-05, "loss": 0.05, "step": 26685 }, { "epoch": 1.35489111122392, "grad_norm": 0.3467102646827698, "learning_rate": 1.0967392591840534e-05, "loss": 0.0475, "step": 26690 }, { "epoch": 1.3551449312147825, "grad_norm": 0.3220283091068268, "learning_rate": 1.0965700458568117e-05, "loss": 0.0461, "step": 26695 }, { "epoch": 1.355398751205645, "grad_norm": 0.35406333208084106, "learning_rate": 1.0964008325295701e-05, "loss": 0.0526, "step": 26700 }, { "epoch": 1.3556525711965075, "grad_norm": 0.3669520616531372, "learning_rate": 1.0962316192023286e-05, "loss": 0.0453, "step": 26705 }, { "epoch": 1.3559063911873699, "grad_norm": 0.5051359534263611, "learning_rate": 1.0960624058750868e-05, "loss": 0.0543, "step": 26710 }, { "epoch": 1.3561602111782323, "grad_norm": 0.33588844537734985, "learning_rate": 1.0958931925478452e-05, "loss": 0.0479, "step": 26715 }, { "epoch": 1.3564140311690949, "grad_norm": 0.29593461751937866, "learning_rate": 1.0957239792206035e-05, "loss": 0.0385, "step": 26720 }, { "epoch": 1.3566678511599575, "grad_norm": 0.3803567886352539, "learning_rate": 1.0955547658933619e-05, "loss": 0.0549, "step": 26725 }, { "epoch": 1.3569216711508199, "grad_norm": 0.6764376163482666, "learning_rate": 1.09538555256612e-05, "loss": 0.0641, "step": 26730 }, { "epoch": 1.3571754911416822, "grad_norm": 0.3458918631076813, "learning_rate": 1.0952163392388786e-05, "loss": 0.045, "step": 26735 }, { "epoch": 1.3574293111325448, "grad_norm": 0.45418867468833923, "learning_rate": 1.095047125911637e-05, "loss": 0.0469, "step": 26740 }, { "epoch": 1.3576831311234072, "grad_norm": 0.3401700258255005, "learning_rate": 1.0948779125843951e-05, "loss": 0.0508, "step": 26745 }, { "epoch": 1.3579369511142698, "grad_norm": 0.39936044812202454, "learning_rate": 1.0947086992571536e-05, "loss": 0.0469, "step": 26750 }, { "epoch": 1.3581907711051322, "grad_norm": 0.45466601848602295, "learning_rate": 1.0945394859299118e-05, "loss": 0.0469, "step": 26755 }, { "epoch": 1.3584445910959948, "grad_norm": 0.3149279057979584, "learning_rate": 1.0943702726026703e-05, "loss": 0.0537, "step": 26760 }, { "epoch": 1.3586984110868572, "grad_norm": 0.3702044188976288, "learning_rate": 1.0942010592754287e-05, "loss": 0.0425, "step": 26765 }, { "epoch": 1.3589522310777196, "grad_norm": 0.33377188444137573, "learning_rate": 1.0940318459481869e-05, "loss": 0.0511, "step": 26770 }, { "epoch": 1.3592060510685822, "grad_norm": 0.2742324769496918, "learning_rate": 1.0938626326209454e-05, "loss": 0.042, "step": 26775 }, { "epoch": 1.3594598710594448, "grad_norm": 0.4314764142036438, "learning_rate": 1.0936934192937036e-05, "loss": 0.046, "step": 26780 }, { "epoch": 1.3597136910503071, "grad_norm": 0.44701406359672546, "learning_rate": 1.093524205966462e-05, "loss": 0.0511, "step": 26785 }, { "epoch": 1.3599675110411695, "grad_norm": 0.34280073642730713, "learning_rate": 1.0933549926392205e-05, "loss": 0.0533, "step": 26790 }, { "epoch": 1.3602213310320321, "grad_norm": 0.4505353271961212, "learning_rate": 1.0931857793119787e-05, "loss": 0.0584, "step": 26795 }, { "epoch": 1.3604751510228945, "grad_norm": 0.3516581356525421, "learning_rate": 1.0930165659847372e-05, "loss": 0.0501, "step": 26800 }, { "epoch": 1.3607289710137571, "grad_norm": 0.38294366002082825, "learning_rate": 1.0928473526574954e-05, "loss": 0.0412, "step": 26805 }, { "epoch": 1.3609827910046195, "grad_norm": 0.2905791401863098, "learning_rate": 1.0926781393302537e-05, "loss": 0.0421, "step": 26810 }, { "epoch": 1.361236610995482, "grad_norm": 0.45999136567115784, "learning_rate": 1.0925089260030122e-05, "loss": 0.0564, "step": 26815 }, { "epoch": 1.3614904309863445, "grad_norm": 0.29272863268852234, "learning_rate": 1.0923397126757704e-05, "loss": 0.042, "step": 26820 }, { "epoch": 1.3617442509772069, "grad_norm": 0.42476195096969604, "learning_rate": 1.0921704993485288e-05, "loss": 0.0474, "step": 26825 }, { "epoch": 1.3619980709680695, "grad_norm": 0.3225775361061096, "learning_rate": 1.0920012860212871e-05, "loss": 0.0546, "step": 26830 }, { "epoch": 1.3622518909589318, "grad_norm": 0.6205003261566162, "learning_rate": 1.0918320726940455e-05, "loss": 0.0485, "step": 26835 }, { "epoch": 1.3625057109497944, "grad_norm": 0.2589777112007141, "learning_rate": 1.0916628593668037e-05, "loss": 0.0493, "step": 26840 }, { "epoch": 1.3627595309406568, "grad_norm": 0.44953614473342896, "learning_rate": 1.0914936460395622e-05, "loss": 0.0607, "step": 26845 }, { "epoch": 1.3630133509315194, "grad_norm": 0.3794015944004059, "learning_rate": 1.0913244327123206e-05, "loss": 0.0462, "step": 26850 }, { "epoch": 1.3632671709223818, "grad_norm": 0.35074856877326965, "learning_rate": 1.0911552193850789e-05, "loss": 0.0481, "step": 26855 }, { "epoch": 1.3635209909132442, "grad_norm": 0.9279066920280457, "learning_rate": 1.0909860060578373e-05, "loss": 0.0451, "step": 26860 }, { "epoch": 1.3637748109041068, "grad_norm": 0.5168582797050476, "learning_rate": 1.0908167927305954e-05, "loss": 0.049, "step": 26865 }, { "epoch": 1.3640286308949694, "grad_norm": 0.3608698844909668, "learning_rate": 1.090647579403354e-05, "loss": 0.0502, "step": 26870 }, { "epoch": 1.3642824508858318, "grad_norm": 0.5553390383720398, "learning_rate": 1.0904783660761122e-05, "loss": 0.0471, "step": 26875 }, { "epoch": 1.3645362708766942, "grad_norm": 0.28950929641723633, "learning_rate": 1.0903091527488705e-05, "loss": 0.0463, "step": 26880 }, { "epoch": 1.3647900908675568, "grad_norm": 0.4183814525604248, "learning_rate": 1.090139939421629e-05, "loss": 0.0508, "step": 26885 }, { "epoch": 1.3650439108584191, "grad_norm": 0.3526676893234253, "learning_rate": 1.0899707260943872e-05, "loss": 0.0442, "step": 26890 }, { "epoch": 1.3652977308492817, "grad_norm": 0.2606235146522522, "learning_rate": 1.0898015127671456e-05, "loss": 0.0496, "step": 26895 }, { "epoch": 1.3655515508401441, "grad_norm": 0.3592001795768738, "learning_rate": 1.089632299439904e-05, "loss": 0.0474, "step": 26900 }, { "epoch": 1.3658053708310067, "grad_norm": 0.36163032054901123, "learning_rate": 1.0894630861126623e-05, "loss": 0.0582, "step": 26905 }, { "epoch": 1.366059190821869, "grad_norm": 0.3937353789806366, "learning_rate": 1.0892938727854208e-05, "loss": 0.0497, "step": 26910 }, { "epoch": 1.3663130108127315, "grad_norm": 0.37750929594039917, "learning_rate": 1.089124659458179e-05, "loss": 0.0506, "step": 26915 }, { "epoch": 1.366566830803594, "grad_norm": 0.33422794938087463, "learning_rate": 1.0889554461309373e-05, "loss": 0.0427, "step": 26920 }, { "epoch": 1.3668206507944567, "grad_norm": 0.36222967505455017, "learning_rate": 1.0887862328036957e-05, "loss": 0.0571, "step": 26925 }, { "epoch": 1.367074470785319, "grad_norm": 0.33056381344795227, "learning_rate": 1.088617019476454e-05, "loss": 0.0397, "step": 26930 }, { "epoch": 1.3673282907761815, "grad_norm": 0.3641473054885864, "learning_rate": 1.0884478061492122e-05, "loss": 0.0448, "step": 26935 }, { "epoch": 1.367582110767044, "grad_norm": 0.4339211583137512, "learning_rate": 1.0882785928219708e-05, "loss": 0.0483, "step": 26940 }, { "epoch": 1.3678359307579064, "grad_norm": 0.31715840101242065, "learning_rate": 1.0881093794947291e-05, "loss": 0.0501, "step": 26945 }, { "epoch": 1.368089750748769, "grad_norm": 0.32084155082702637, "learning_rate": 1.0879401661674875e-05, "loss": 0.0495, "step": 26950 }, { "epoch": 1.3683435707396314, "grad_norm": 0.40496891736984253, "learning_rate": 1.0877709528402458e-05, "loss": 0.0462, "step": 26955 }, { "epoch": 1.368597390730494, "grad_norm": 0.289095014333725, "learning_rate": 1.087601739513004e-05, "loss": 0.0506, "step": 26960 }, { "epoch": 1.3688512107213564, "grad_norm": 0.5172923803329468, "learning_rate": 1.0874325261857625e-05, "loss": 0.0424, "step": 26965 }, { "epoch": 1.3691050307122188, "grad_norm": 0.30263659358024597, "learning_rate": 1.0872633128585209e-05, "loss": 0.0437, "step": 26970 }, { "epoch": 1.3693588507030814, "grad_norm": 0.3625158369541168, "learning_rate": 1.087094099531279e-05, "loss": 0.0478, "step": 26975 }, { "epoch": 1.369612670693944, "grad_norm": 0.5003167390823364, "learning_rate": 1.0869248862040376e-05, "loss": 0.0494, "step": 26980 }, { "epoch": 1.3698664906848064, "grad_norm": 0.42639175057411194, "learning_rate": 1.0867556728767958e-05, "loss": 0.0513, "step": 26985 }, { "epoch": 1.3701203106756688, "grad_norm": 0.623604416847229, "learning_rate": 1.0865864595495541e-05, "loss": 0.0526, "step": 26990 }, { "epoch": 1.3703741306665314, "grad_norm": 0.4742110073566437, "learning_rate": 1.0864172462223127e-05, "loss": 0.051, "step": 26995 }, { "epoch": 1.3706279506573937, "grad_norm": 0.623345673084259, "learning_rate": 1.0862480328950708e-05, "loss": 0.0437, "step": 27000 }, { "epoch": 1.3708817706482563, "grad_norm": 0.3437178134918213, "learning_rate": 1.0860788195678294e-05, "loss": 0.0458, "step": 27005 }, { "epoch": 1.3711355906391187, "grad_norm": 0.288751482963562, "learning_rate": 1.0859096062405876e-05, "loss": 0.0515, "step": 27010 }, { "epoch": 1.3713894106299813, "grad_norm": 0.2961622476577759, "learning_rate": 1.0857403929133459e-05, "loss": 0.0535, "step": 27015 }, { "epoch": 1.3716432306208437, "grad_norm": 0.2858904302120209, "learning_rate": 1.0855711795861044e-05, "loss": 0.0408, "step": 27020 }, { "epoch": 1.371897050611706, "grad_norm": 0.25875383615493774, "learning_rate": 1.0854019662588626e-05, "loss": 0.0423, "step": 27025 }, { "epoch": 1.3721508706025687, "grad_norm": 0.9569272398948669, "learning_rate": 1.085232752931621e-05, "loss": 0.044, "step": 27030 }, { "epoch": 1.372404690593431, "grad_norm": 0.4269765615463257, "learning_rate": 1.0850635396043793e-05, "loss": 0.0434, "step": 27035 }, { "epoch": 1.3726585105842937, "grad_norm": 0.5560934543609619, "learning_rate": 1.0848943262771377e-05, "loss": 0.0572, "step": 27040 }, { "epoch": 1.372912330575156, "grad_norm": 0.42453646659851074, "learning_rate": 1.0847251129498962e-05, "loss": 0.0521, "step": 27045 }, { "epoch": 1.3731661505660187, "grad_norm": 0.36190876364707947, "learning_rate": 1.0845558996226544e-05, "loss": 0.0531, "step": 27050 }, { "epoch": 1.373419970556881, "grad_norm": 0.6442621946334839, "learning_rate": 1.0843866862954126e-05, "loss": 0.0482, "step": 27055 }, { "epoch": 1.3736737905477434, "grad_norm": 0.34046563506126404, "learning_rate": 1.0842174729681711e-05, "loss": 0.0421, "step": 27060 }, { "epoch": 1.373927610538606, "grad_norm": 0.30943602323532104, "learning_rate": 1.0840482596409295e-05, "loss": 0.0537, "step": 27065 }, { "epoch": 1.3741814305294686, "grad_norm": 0.4879555106163025, "learning_rate": 1.0838790463136876e-05, "loss": 0.0492, "step": 27070 }, { "epoch": 1.374435250520331, "grad_norm": 0.30128493905067444, "learning_rate": 1.0837098329864462e-05, "loss": 0.0496, "step": 27075 }, { "epoch": 1.3746890705111934, "grad_norm": 0.37397828698158264, "learning_rate": 1.0835406196592044e-05, "loss": 0.0532, "step": 27080 }, { "epoch": 1.374942890502056, "grad_norm": 0.2845868468284607, "learning_rate": 1.0833714063319627e-05, "loss": 0.045, "step": 27085 }, { "epoch": 1.3751967104929184, "grad_norm": 0.42873308062553406, "learning_rate": 1.0832021930047212e-05, "loss": 0.0649, "step": 27090 }, { "epoch": 1.375450530483781, "grad_norm": 0.4360988736152649, "learning_rate": 1.0830329796774794e-05, "loss": 0.0467, "step": 27095 }, { "epoch": 1.3757043504746433, "grad_norm": 0.33429446816444397, "learning_rate": 1.082863766350238e-05, "loss": 0.0443, "step": 27100 }, { "epoch": 1.375958170465506, "grad_norm": 0.41631633043289185, "learning_rate": 1.0826945530229961e-05, "loss": 0.0546, "step": 27105 }, { "epoch": 1.3762119904563683, "grad_norm": 0.3739117980003357, "learning_rate": 1.0825253396957545e-05, "loss": 0.0518, "step": 27110 }, { "epoch": 1.3764658104472307, "grad_norm": 0.25905776023864746, "learning_rate": 1.082356126368513e-05, "loss": 0.0444, "step": 27115 }, { "epoch": 1.3767196304380933, "grad_norm": 0.3633892238140106, "learning_rate": 1.0821869130412712e-05, "loss": 0.0469, "step": 27120 }, { "epoch": 1.376973450428956, "grad_norm": 0.31577759981155396, "learning_rate": 1.0820176997140295e-05, "loss": 0.0525, "step": 27125 }, { "epoch": 1.3772272704198183, "grad_norm": 0.5098549723625183, "learning_rate": 1.0818484863867879e-05, "loss": 0.0447, "step": 27130 }, { "epoch": 1.3774810904106807, "grad_norm": 0.2873249650001526, "learning_rate": 1.0816792730595462e-05, "loss": 0.0482, "step": 27135 }, { "epoch": 1.3777349104015433, "grad_norm": 0.4410748779773712, "learning_rate": 1.0815100597323044e-05, "loss": 0.0538, "step": 27140 }, { "epoch": 1.3779887303924057, "grad_norm": 0.4886588454246521, "learning_rate": 1.081340846405063e-05, "loss": 0.0503, "step": 27145 }, { "epoch": 1.3782425503832683, "grad_norm": 0.376172810792923, "learning_rate": 1.0811716330778213e-05, "loss": 0.0482, "step": 27150 }, { "epoch": 1.3784963703741306, "grad_norm": 0.3205304443836212, "learning_rate": 1.0810024197505797e-05, "loss": 0.0458, "step": 27155 }, { "epoch": 1.3787501903649932, "grad_norm": 0.3526761829853058, "learning_rate": 1.080833206423338e-05, "loss": 0.0482, "step": 27160 }, { "epoch": 1.3790040103558556, "grad_norm": 0.3599817454814911, "learning_rate": 1.0806639930960962e-05, "loss": 0.0511, "step": 27165 }, { "epoch": 1.379257830346718, "grad_norm": 0.29613739252090454, "learning_rate": 1.0804947797688547e-05, "loss": 0.0451, "step": 27170 }, { "epoch": 1.3795116503375806, "grad_norm": 0.3637460470199585, "learning_rate": 1.0803255664416131e-05, "loss": 0.0536, "step": 27175 }, { "epoch": 1.379765470328443, "grad_norm": 0.3151790499687195, "learning_rate": 1.0801563531143713e-05, "loss": 0.0455, "step": 27180 }, { "epoch": 1.3800192903193056, "grad_norm": 0.643429160118103, "learning_rate": 1.0799871397871298e-05, "loss": 0.0518, "step": 27185 }, { "epoch": 1.380273110310168, "grad_norm": 0.26551660895347595, "learning_rate": 1.079817926459888e-05, "loss": 0.0418, "step": 27190 }, { "epoch": 1.3805269303010306, "grad_norm": 0.30446264147758484, "learning_rate": 1.0796487131326465e-05, "loss": 0.0431, "step": 27195 }, { "epoch": 1.380780750291893, "grad_norm": 0.5254416465759277, "learning_rate": 1.0794794998054049e-05, "loss": 0.0519, "step": 27200 }, { "epoch": 1.3810345702827553, "grad_norm": 0.33535945415496826, "learning_rate": 1.079310286478163e-05, "loss": 0.0352, "step": 27205 }, { "epoch": 1.381288390273618, "grad_norm": 0.5450144410133362, "learning_rate": 1.0791410731509216e-05, "loss": 0.0412, "step": 27210 }, { "epoch": 1.3815422102644805, "grad_norm": 0.40926575660705566, "learning_rate": 1.0789718598236798e-05, "loss": 0.0526, "step": 27215 }, { "epoch": 1.381796030255343, "grad_norm": 0.3639059066772461, "learning_rate": 1.0788026464964381e-05, "loss": 0.0552, "step": 27220 }, { "epoch": 1.3820498502462053, "grad_norm": 0.5474151968955994, "learning_rate": 1.0786334331691966e-05, "loss": 0.0485, "step": 27225 }, { "epoch": 1.382303670237068, "grad_norm": 0.3799017071723938, "learning_rate": 1.0784642198419548e-05, "loss": 0.0591, "step": 27230 }, { "epoch": 1.3825574902279303, "grad_norm": 0.45561301708221436, "learning_rate": 1.078295006514713e-05, "loss": 0.0433, "step": 27235 }, { "epoch": 1.3828113102187929, "grad_norm": 0.4006117582321167, "learning_rate": 1.0781257931874715e-05, "loss": 0.051, "step": 27240 }, { "epoch": 1.3830651302096553, "grad_norm": 0.477120965719223, "learning_rate": 1.0779565798602299e-05, "loss": 0.0479, "step": 27245 }, { "epoch": 1.3833189502005179, "grad_norm": 0.29165032505989075, "learning_rate": 1.0777873665329884e-05, "loss": 0.0536, "step": 27250 }, { "epoch": 1.3835727701913803, "grad_norm": 0.3408038914203644, "learning_rate": 1.0776181532057466e-05, "loss": 0.0502, "step": 27255 }, { "epoch": 1.3838265901822426, "grad_norm": 1.091934323310852, "learning_rate": 1.0774489398785048e-05, "loss": 0.048, "step": 27260 }, { "epoch": 1.3840804101731052, "grad_norm": 0.330030620098114, "learning_rate": 1.0772797265512633e-05, "loss": 0.0488, "step": 27265 }, { "epoch": 1.3843342301639678, "grad_norm": 0.24753251671791077, "learning_rate": 1.0771105132240217e-05, "loss": 0.0491, "step": 27270 }, { "epoch": 1.3845880501548302, "grad_norm": 0.31028640270233154, "learning_rate": 1.0769412998967798e-05, "loss": 0.0481, "step": 27275 }, { "epoch": 1.3848418701456926, "grad_norm": 0.295444130897522, "learning_rate": 1.0767720865695384e-05, "loss": 0.0543, "step": 27280 }, { "epoch": 1.3850956901365552, "grad_norm": 0.7282490730285645, "learning_rate": 1.0766028732422965e-05, "loss": 0.055, "step": 27285 }, { "epoch": 1.3853495101274176, "grad_norm": 0.41168054938316345, "learning_rate": 1.076433659915055e-05, "loss": 0.0567, "step": 27290 }, { "epoch": 1.3856033301182802, "grad_norm": 0.447490930557251, "learning_rate": 1.0762644465878134e-05, "loss": 0.0501, "step": 27295 }, { "epoch": 1.3858571501091426, "grad_norm": 0.3839946687221527, "learning_rate": 1.0760952332605716e-05, "loss": 0.0479, "step": 27300 }, { "epoch": 1.3861109701000052, "grad_norm": 0.4760317802429199, "learning_rate": 1.0759260199333301e-05, "loss": 0.0464, "step": 27305 }, { "epoch": 1.3863647900908675, "grad_norm": 0.32983019948005676, "learning_rate": 1.0757568066060883e-05, "loss": 0.0453, "step": 27310 }, { "epoch": 1.38661861008173, "grad_norm": 0.7022455930709839, "learning_rate": 1.0755875932788467e-05, "loss": 0.0533, "step": 27315 }, { "epoch": 1.3868724300725925, "grad_norm": 0.44730642437934875, "learning_rate": 1.0754183799516052e-05, "loss": 0.0411, "step": 27320 }, { "epoch": 1.3871262500634551, "grad_norm": 0.25385260581970215, "learning_rate": 1.0752491666243634e-05, "loss": 0.0486, "step": 27325 }, { "epoch": 1.3873800700543175, "grad_norm": 0.29286813735961914, "learning_rate": 1.0750799532971217e-05, "loss": 0.0485, "step": 27330 }, { "epoch": 1.38763389004518, "grad_norm": 0.43927282094955444, "learning_rate": 1.0749107399698801e-05, "loss": 0.0425, "step": 27335 }, { "epoch": 1.3878877100360425, "grad_norm": 0.3693688213825226, "learning_rate": 1.0747415266426384e-05, "loss": 0.0508, "step": 27340 }, { "epoch": 1.3881415300269049, "grad_norm": 0.46396780014038086, "learning_rate": 1.074572313315397e-05, "loss": 0.0429, "step": 27345 }, { "epoch": 1.3883953500177675, "grad_norm": 0.5464805960655212, "learning_rate": 1.0744030999881552e-05, "loss": 0.0536, "step": 27350 }, { "epoch": 1.3886491700086299, "grad_norm": 0.26196610927581787, "learning_rate": 1.0742338866609135e-05, "loss": 0.0437, "step": 27355 }, { "epoch": 1.3889029899994925, "grad_norm": 0.355894535779953, "learning_rate": 1.0740646733336719e-05, "loss": 0.0447, "step": 27360 }, { "epoch": 1.3891568099903548, "grad_norm": 0.2798496186733246, "learning_rate": 1.0738954600064302e-05, "loss": 0.0414, "step": 27365 }, { "epoch": 1.3894106299812172, "grad_norm": 0.36841532588005066, "learning_rate": 1.0737262466791884e-05, "loss": 0.0469, "step": 27370 }, { "epoch": 1.3896644499720798, "grad_norm": 0.36413395404815674, "learning_rate": 1.073557033351947e-05, "loss": 0.0354, "step": 27375 }, { "epoch": 1.3899182699629422, "grad_norm": 0.3879263401031494, "learning_rate": 1.0733878200247053e-05, "loss": 0.0405, "step": 27380 }, { "epoch": 1.3901720899538048, "grad_norm": 0.4517977237701416, "learning_rate": 1.0732186066974635e-05, "loss": 0.0468, "step": 27385 }, { "epoch": 1.3904259099446672, "grad_norm": 0.4055328369140625, "learning_rate": 1.073049393370222e-05, "loss": 0.0457, "step": 27390 }, { "epoch": 1.3906797299355298, "grad_norm": 0.35788199305534363, "learning_rate": 1.0728801800429802e-05, "loss": 0.0427, "step": 27395 }, { "epoch": 1.3909335499263922, "grad_norm": 0.421030730009079, "learning_rate": 1.0727109667157387e-05, "loss": 0.0542, "step": 27400 }, { "epoch": 1.3911873699172546, "grad_norm": 0.28301897644996643, "learning_rate": 1.072541753388497e-05, "loss": 0.0486, "step": 27405 }, { "epoch": 1.3914411899081172, "grad_norm": 0.2675779461860657, "learning_rate": 1.0723725400612552e-05, "loss": 0.0502, "step": 27410 }, { "epoch": 1.3916950098989798, "grad_norm": 0.40590351819992065, "learning_rate": 1.0722033267340138e-05, "loss": 0.0493, "step": 27415 }, { "epoch": 1.3919488298898421, "grad_norm": 0.5221124887466431, "learning_rate": 1.072034113406772e-05, "loss": 0.0591, "step": 27420 }, { "epoch": 1.3922026498807045, "grad_norm": 0.6834927797317505, "learning_rate": 1.0718649000795303e-05, "loss": 0.0441, "step": 27425 }, { "epoch": 1.3924564698715671, "grad_norm": 0.3241395652294159, "learning_rate": 1.0716956867522888e-05, "loss": 0.0395, "step": 27430 }, { "epoch": 1.3927102898624295, "grad_norm": 0.3666060268878937, "learning_rate": 1.071526473425047e-05, "loss": 0.05, "step": 27435 }, { "epoch": 1.392964109853292, "grad_norm": 0.33367741107940674, "learning_rate": 1.0713572600978055e-05, "loss": 0.0406, "step": 27440 }, { "epoch": 1.3932179298441545, "grad_norm": 0.3969685137271881, "learning_rate": 1.0711880467705637e-05, "loss": 0.0427, "step": 27445 }, { "epoch": 1.393471749835017, "grad_norm": 0.3486899137496948, "learning_rate": 1.071018833443322e-05, "loss": 0.0489, "step": 27450 }, { "epoch": 1.3937255698258795, "grad_norm": 0.543165922164917, "learning_rate": 1.0708496201160806e-05, "loss": 0.0464, "step": 27455 }, { "epoch": 1.3939793898167419, "grad_norm": 0.35785973072052, "learning_rate": 1.0706804067888388e-05, "loss": 0.0502, "step": 27460 }, { "epoch": 1.3942332098076045, "grad_norm": 0.4813598692417145, "learning_rate": 1.070511193461597e-05, "loss": 0.0513, "step": 27465 }, { "epoch": 1.394487029798467, "grad_norm": 0.39403215050697327, "learning_rate": 1.0703419801343555e-05, "loss": 0.0488, "step": 27470 }, { "epoch": 1.3947408497893294, "grad_norm": 0.33684828877449036, "learning_rate": 1.0701727668071138e-05, "loss": 0.0431, "step": 27475 }, { "epoch": 1.3949946697801918, "grad_norm": 0.3423417806625366, "learning_rate": 1.070003553479872e-05, "loss": 0.0451, "step": 27480 }, { "epoch": 1.3952484897710544, "grad_norm": 0.3403189480304718, "learning_rate": 1.0698343401526306e-05, "loss": 0.0549, "step": 27485 }, { "epoch": 1.3955023097619168, "grad_norm": 0.4790232479572296, "learning_rate": 1.0696651268253887e-05, "loss": 0.0528, "step": 27490 }, { "epoch": 1.3957561297527794, "grad_norm": 0.4470588266849518, "learning_rate": 1.0694959134981473e-05, "loss": 0.0542, "step": 27495 }, { "epoch": 1.3960099497436418, "grad_norm": 0.30051055550575256, "learning_rate": 1.0693267001709056e-05, "loss": 0.0562, "step": 27500 }, { "epoch": 1.3962637697345044, "grad_norm": 0.4164995849132538, "learning_rate": 1.0691574868436638e-05, "loss": 0.0522, "step": 27505 }, { "epoch": 1.3965175897253668, "grad_norm": 0.4499371647834778, "learning_rate": 1.0689882735164223e-05, "loss": 0.0457, "step": 27510 }, { "epoch": 1.3967714097162292, "grad_norm": 0.36601021885871887, "learning_rate": 1.0688190601891805e-05, "loss": 0.048, "step": 27515 }, { "epoch": 1.3970252297070918, "grad_norm": 0.5158339738845825, "learning_rate": 1.0686498468619389e-05, "loss": 0.0538, "step": 27520 }, { "epoch": 1.3972790496979541, "grad_norm": 0.4598226845264435, "learning_rate": 1.0684806335346974e-05, "loss": 0.0517, "step": 27525 }, { "epoch": 1.3975328696888167, "grad_norm": 0.26244619488716125, "learning_rate": 1.0683114202074556e-05, "loss": 0.0486, "step": 27530 }, { "epoch": 1.3977866896796791, "grad_norm": 0.46184155344963074, "learning_rate": 1.0681422068802141e-05, "loss": 0.0475, "step": 27535 }, { "epoch": 1.3980405096705417, "grad_norm": 0.38430994749069214, "learning_rate": 1.0679729935529723e-05, "loss": 0.0461, "step": 27540 }, { "epoch": 1.398294329661404, "grad_norm": 0.4069960117340088, "learning_rate": 1.0678037802257306e-05, "loss": 0.0479, "step": 27545 }, { "epoch": 1.3985481496522665, "grad_norm": 0.3001312017440796, "learning_rate": 1.0676345668984892e-05, "loss": 0.0547, "step": 27550 }, { "epoch": 1.398801969643129, "grad_norm": 0.42500364780426025, "learning_rate": 1.0674653535712473e-05, "loss": 0.0402, "step": 27555 }, { "epoch": 1.3990557896339917, "grad_norm": 0.26252198219299316, "learning_rate": 1.0672961402440057e-05, "loss": 0.0419, "step": 27560 }, { "epoch": 1.399309609624854, "grad_norm": 0.363121896982193, "learning_rate": 1.067126926916764e-05, "loss": 0.0542, "step": 27565 }, { "epoch": 1.3995634296157164, "grad_norm": 0.3183755576610565, "learning_rate": 1.0669577135895224e-05, "loss": 0.0501, "step": 27570 }, { "epoch": 1.399817249606579, "grad_norm": 0.3802196979522705, "learning_rate": 1.0667885002622806e-05, "loss": 0.0548, "step": 27575 }, { "epoch": 1.4000710695974414, "grad_norm": 0.8211646676063538, "learning_rate": 1.0666192869350391e-05, "loss": 0.0462, "step": 27580 }, { "epoch": 1.400324889588304, "grad_norm": 0.33730578422546387, "learning_rate": 1.0664500736077975e-05, "loss": 0.0501, "step": 27585 }, { "epoch": 1.4005787095791664, "grad_norm": 0.2553010582923889, "learning_rate": 1.0662808602805558e-05, "loss": 0.0426, "step": 27590 }, { "epoch": 1.400832529570029, "grad_norm": 0.3305681049823761, "learning_rate": 1.0661116469533142e-05, "loss": 0.0483, "step": 27595 }, { "epoch": 1.4010863495608914, "grad_norm": 0.34884992241859436, "learning_rate": 1.0659424336260724e-05, "loss": 0.0383, "step": 27600 }, { "epoch": 1.4013401695517538, "grad_norm": 0.5144136548042297, "learning_rate": 1.0657732202988309e-05, "loss": 0.0438, "step": 27605 }, { "epoch": 1.4015939895426164, "grad_norm": 0.3989544212818146, "learning_rate": 1.0656040069715892e-05, "loss": 0.0427, "step": 27610 }, { "epoch": 1.401847809533479, "grad_norm": 0.4733310043811798, "learning_rate": 1.0654347936443474e-05, "loss": 0.053, "step": 27615 }, { "epoch": 1.4021016295243414, "grad_norm": 0.7690626382827759, "learning_rate": 1.065265580317106e-05, "loss": 0.0507, "step": 27620 }, { "epoch": 1.4023554495152037, "grad_norm": 0.4382520318031311, "learning_rate": 1.0650963669898641e-05, "loss": 0.0483, "step": 27625 }, { "epoch": 1.4026092695060663, "grad_norm": 0.40650293231010437, "learning_rate": 1.0649271536626225e-05, "loss": 0.0451, "step": 27630 }, { "epoch": 1.4028630894969287, "grad_norm": 0.39848485589027405, "learning_rate": 1.064757940335381e-05, "loss": 0.0529, "step": 27635 }, { "epoch": 1.4031169094877913, "grad_norm": 0.24001622200012207, "learning_rate": 1.0645887270081392e-05, "loss": 0.0403, "step": 27640 }, { "epoch": 1.4033707294786537, "grad_norm": 0.3177374303340912, "learning_rate": 1.0644195136808977e-05, "loss": 0.0462, "step": 27645 }, { "epoch": 1.4036245494695163, "grad_norm": 0.23650433123111725, "learning_rate": 1.0642503003536559e-05, "loss": 0.0465, "step": 27650 }, { "epoch": 1.4038783694603787, "grad_norm": 0.41654306650161743, "learning_rate": 1.0640810870264143e-05, "loss": 0.0499, "step": 27655 }, { "epoch": 1.404132189451241, "grad_norm": 0.2645888328552246, "learning_rate": 1.0639118736991726e-05, "loss": 0.0526, "step": 27660 }, { "epoch": 1.4043860094421037, "grad_norm": 0.2605307102203369, "learning_rate": 1.063742660371931e-05, "loss": 0.0386, "step": 27665 }, { "epoch": 1.404639829432966, "grad_norm": 0.3085188567638397, "learning_rate": 1.0635734470446892e-05, "loss": 0.0512, "step": 27670 }, { "epoch": 1.4048936494238287, "grad_norm": 0.3979353606700897, "learning_rate": 1.0634042337174477e-05, "loss": 0.05, "step": 27675 }, { "epoch": 1.405147469414691, "grad_norm": 0.8337517380714417, "learning_rate": 1.063235020390206e-05, "loss": 0.0451, "step": 27680 }, { "epoch": 1.4054012894055536, "grad_norm": 0.45434877276420593, "learning_rate": 1.0630658070629644e-05, "loss": 0.0504, "step": 27685 }, { "epoch": 1.405655109396416, "grad_norm": 0.45057016611099243, "learning_rate": 1.0628965937357227e-05, "loss": 0.0448, "step": 27690 }, { "epoch": 1.4059089293872784, "grad_norm": 0.40183669328689575, "learning_rate": 1.062727380408481e-05, "loss": 0.0379, "step": 27695 }, { "epoch": 1.406162749378141, "grad_norm": 0.36201322078704834, "learning_rate": 1.0625581670812395e-05, "loss": 0.0448, "step": 27700 }, { "epoch": 1.4064165693690036, "grad_norm": 0.3698229491710663, "learning_rate": 1.0623889537539978e-05, "loss": 0.0401, "step": 27705 }, { "epoch": 1.406670389359866, "grad_norm": 0.3653453588485718, "learning_rate": 1.062219740426756e-05, "loss": 0.0444, "step": 27710 }, { "epoch": 1.4069242093507284, "grad_norm": 0.5779272317886353, "learning_rate": 1.0620505270995145e-05, "loss": 0.0478, "step": 27715 }, { "epoch": 1.407178029341591, "grad_norm": 0.29561781883239746, "learning_rate": 1.0618813137722727e-05, "loss": 0.0501, "step": 27720 }, { "epoch": 1.4074318493324534, "grad_norm": 0.3608880937099457, "learning_rate": 1.061712100445031e-05, "loss": 0.0487, "step": 27725 }, { "epoch": 1.407685669323316, "grad_norm": 0.2934507131576538, "learning_rate": 1.0615428871177896e-05, "loss": 0.0464, "step": 27730 }, { "epoch": 1.4079394893141783, "grad_norm": 0.28474876284599304, "learning_rate": 1.0613736737905478e-05, "loss": 0.0399, "step": 27735 }, { "epoch": 1.408193309305041, "grad_norm": 0.42325231432914734, "learning_rate": 1.0612044604633063e-05, "loss": 0.0467, "step": 27740 }, { "epoch": 1.4084471292959033, "grad_norm": 0.3682181239128113, "learning_rate": 1.0610352471360645e-05, "loss": 0.0432, "step": 27745 }, { "epoch": 1.4087009492867657, "grad_norm": 0.41442859172821045, "learning_rate": 1.0608660338088228e-05, "loss": 0.0516, "step": 27750 }, { "epoch": 1.4089547692776283, "grad_norm": 0.4755087196826935, "learning_rate": 1.0606968204815814e-05, "loss": 0.0555, "step": 27755 }, { "epoch": 1.409208589268491, "grad_norm": 0.44574877619743347, "learning_rate": 1.0605276071543395e-05, "loss": 0.0485, "step": 27760 }, { "epoch": 1.4094624092593533, "grad_norm": 0.39875462651252747, "learning_rate": 1.0603583938270979e-05, "loss": 0.0537, "step": 27765 }, { "epoch": 1.4097162292502157, "grad_norm": 0.2607244551181793, "learning_rate": 1.0601891804998562e-05, "loss": 0.0465, "step": 27770 }, { "epoch": 1.4099700492410783, "grad_norm": 0.36298197507858276, "learning_rate": 1.0600199671726146e-05, "loss": 0.05, "step": 27775 }, { "epoch": 1.4102238692319407, "grad_norm": 0.5285542011260986, "learning_rate": 1.0598507538453728e-05, "loss": 0.0523, "step": 27780 }, { "epoch": 1.4104776892228033, "grad_norm": 0.49905699491500854, "learning_rate": 1.0596815405181313e-05, "loss": 0.0451, "step": 27785 }, { "epoch": 1.4107315092136656, "grad_norm": 0.5594649314880371, "learning_rate": 1.0595123271908897e-05, "loss": 0.0506, "step": 27790 }, { "epoch": 1.4109853292045282, "grad_norm": 0.2815134525299072, "learning_rate": 1.059343113863648e-05, "loss": 0.048, "step": 27795 }, { "epoch": 1.4112391491953906, "grad_norm": 0.302711546421051, "learning_rate": 1.0591739005364064e-05, "loss": 0.0453, "step": 27800 }, { "epoch": 1.411492969186253, "grad_norm": 0.42682504653930664, "learning_rate": 1.0590046872091646e-05, "loss": 0.0523, "step": 27805 }, { "epoch": 1.4117467891771156, "grad_norm": 0.39569196105003357, "learning_rate": 1.058835473881923e-05, "loss": 0.0556, "step": 27810 }, { "epoch": 1.4120006091679782, "grad_norm": 0.5938218832015991, "learning_rate": 1.0586662605546814e-05, "loss": 0.0525, "step": 27815 }, { "epoch": 1.4122544291588406, "grad_norm": 0.3381215035915375, "learning_rate": 1.0584970472274396e-05, "loss": 0.052, "step": 27820 }, { "epoch": 1.412508249149703, "grad_norm": 0.3109475374221802, "learning_rate": 1.0583278339001981e-05, "loss": 0.0463, "step": 27825 }, { "epoch": 1.4127620691405656, "grad_norm": 0.4209303557872772, "learning_rate": 1.0581586205729563e-05, "loss": 0.0469, "step": 27830 }, { "epoch": 1.413015889131428, "grad_norm": 0.3299580216407776, "learning_rate": 1.0579894072457149e-05, "loss": 0.0453, "step": 27835 }, { "epoch": 1.4132697091222906, "grad_norm": 0.31590738892555237, "learning_rate": 1.057820193918473e-05, "loss": 0.0449, "step": 27840 }, { "epoch": 1.413523529113153, "grad_norm": 0.3760964274406433, "learning_rate": 1.0576509805912314e-05, "loss": 0.0495, "step": 27845 }, { "epoch": 1.4137773491040155, "grad_norm": 0.37932348251342773, "learning_rate": 1.05748176726399e-05, "loss": 0.0494, "step": 27850 }, { "epoch": 1.414031169094878, "grad_norm": 0.4534890055656433, "learning_rate": 1.0573125539367481e-05, "loss": 0.0508, "step": 27855 }, { "epoch": 1.4142849890857403, "grad_norm": 0.323983371257782, "learning_rate": 1.0571433406095065e-05, "loss": 0.055, "step": 27860 }, { "epoch": 1.414538809076603, "grad_norm": 0.34417563676834106, "learning_rate": 1.0569741272822648e-05, "loss": 0.0567, "step": 27865 }, { "epoch": 1.4147926290674653, "grad_norm": 0.36441168189048767, "learning_rate": 1.0568049139550232e-05, "loss": 0.043, "step": 27870 }, { "epoch": 1.4150464490583279, "grad_norm": 0.49125048518180847, "learning_rate": 1.0566357006277814e-05, "loss": 0.0435, "step": 27875 }, { "epoch": 1.4153002690491903, "grad_norm": 0.40159016847610474, "learning_rate": 1.0564664873005399e-05, "loss": 0.0494, "step": 27880 }, { "epoch": 1.4155540890400529, "grad_norm": 0.5556924343109131, "learning_rate": 1.0562972739732982e-05, "loss": 0.0569, "step": 27885 }, { "epoch": 1.4158079090309152, "grad_norm": 0.8219660520553589, "learning_rate": 1.0561280606460566e-05, "loss": 0.0609, "step": 27890 }, { "epoch": 1.4160617290217776, "grad_norm": 0.318620890378952, "learning_rate": 1.055958847318815e-05, "loss": 0.0472, "step": 27895 }, { "epoch": 1.4163155490126402, "grad_norm": 0.2669641971588135, "learning_rate": 1.0557896339915731e-05, "loss": 0.0513, "step": 27900 }, { "epoch": 1.4165693690035028, "grad_norm": 0.5739254355430603, "learning_rate": 1.0556204206643316e-05, "loss": 0.043, "step": 27905 }, { "epoch": 1.4168231889943652, "grad_norm": 0.6050904989242554, "learning_rate": 1.05545120733709e-05, "loss": 0.0492, "step": 27910 }, { "epoch": 1.4170770089852276, "grad_norm": 0.47114139795303345, "learning_rate": 1.0552819940098482e-05, "loss": 0.0483, "step": 27915 }, { "epoch": 1.4173308289760902, "grad_norm": 1.0412184000015259, "learning_rate": 1.0551127806826067e-05, "loss": 0.0563, "step": 27920 }, { "epoch": 1.4175846489669526, "grad_norm": 0.3775009512901306, "learning_rate": 1.0549435673553649e-05, "loss": 0.0506, "step": 27925 }, { "epoch": 1.4178384689578152, "grad_norm": 0.3336693346500397, "learning_rate": 1.0547743540281234e-05, "loss": 0.0525, "step": 27930 }, { "epoch": 1.4180922889486776, "grad_norm": 0.26565924286842346, "learning_rate": 1.0546051407008818e-05, "loss": 0.0468, "step": 27935 }, { "epoch": 1.4183461089395402, "grad_norm": 0.3058395981788635, "learning_rate": 1.05443592737364e-05, "loss": 0.0366, "step": 27940 }, { "epoch": 1.4185999289304025, "grad_norm": 0.3325693905353546, "learning_rate": 1.0542667140463985e-05, "loss": 0.0452, "step": 27945 }, { "epoch": 1.418853748921265, "grad_norm": 0.3314320743083954, "learning_rate": 1.0540975007191567e-05, "loss": 0.0494, "step": 27950 }, { "epoch": 1.4191075689121275, "grad_norm": 0.27716904878616333, "learning_rate": 1.053928287391915e-05, "loss": 0.0416, "step": 27955 }, { "epoch": 1.4193613889029901, "grad_norm": 0.3261719346046448, "learning_rate": 1.0537590740646735e-05, "loss": 0.0557, "step": 27960 }, { "epoch": 1.4196152088938525, "grad_norm": 0.4958856701850891, "learning_rate": 1.0535898607374317e-05, "loss": 0.047, "step": 27965 }, { "epoch": 1.419869028884715, "grad_norm": 0.2569836378097534, "learning_rate": 1.0534206474101901e-05, "loss": 0.0514, "step": 27970 }, { "epoch": 1.4201228488755775, "grad_norm": 0.3573864996433258, "learning_rate": 1.0532514340829484e-05, "loss": 0.0431, "step": 27975 }, { "epoch": 1.4203766688664399, "grad_norm": 1.4592692852020264, "learning_rate": 1.0530822207557068e-05, "loss": 0.0483, "step": 27980 }, { "epoch": 1.4206304888573025, "grad_norm": 0.7606174945831299, "learning_rate": 1.0529130074284653e-05, "loss": 0.0512, "step": 27985 }, { "epoch": 1.4208843088481649, "grad_norm": 0.49157556891441345, "learning_rate": 1.0527437941012235e-05, "loss": 0.0461, "step": 27990 }, { "epoch": 1.4211381288390275, "grad_norm": 1.5763497352600098, "learning_rate": 1.0525745807739819e-05, "loss": 0.056, "step": 27995 }, { "epoch": 1.4213919488298898, "grad_norm": 0.3884940445423126, "learning_rate": 1.0524053674467402e-05, "loss": 0.0425, "step": 28000 }, { "epoch": 1.4216457688207522, "grad_norm": 0.36167848110198975, "learning_rate": 1.0522361541194986e-05, "loss": 0.0472, "step": 28005 }, { "epoch": 1.4218995888116148, "grad_norm": 0.6486092209815979, "learning_rate": 1.0520669407922568e-05, "loss": 0.0487, "step": 28010 }, { "epoch": 1.4221534088024772, "grad_norm": 0.4026010036468506, "learning_rate": 1.0518977274650153e-05, "loss": 0.0511, "step": 28015 }, { "epoch": 1.4224072287933398, "grad_norm": 0.4928683042526245, "learning_rate": 1.0517285141377736e-05, "loss": 0.0583, "step": 28020 }, { "epoch": 1.4226610487842022, "grad_norm": 0.706096351146698, "learning_rate": 1.0515593008105318e-05, "loss": 0.0557, "step": 28025 }, { "epoch": 1.4229148687750648, "grad_norm": 0.33938637375831604, "learning_rate": 1.0513900874832903e-05, "loss": 0.0418, "step": 28030 }, { "epoch": 1.4231686887659272, "grad_norm": 0.3287295699119568, "learning_rate": 1.0512208741560485e-05, "loss": 0.0486, "step": 28035 }, { "epoch": 1.4234225087567896, "grad_norm": 0.3345835506916046, "learning_rate": 1.051051660828807e-05, "loss": 0.054, "step": 28040 }, { "epoch": 1.4236763287476522, "grad_norm": 0.29467958211898804, "learning_rate": 1.0508824475015652e-05, "loss": 0.0495, "step": 28045 }, { "epoch": 1.4239301487385148, "grad_norm": 0.37132373452186584, "learning_rate": 1.0507132341743236e-05, "loss": 0.051, "step": 28050 }, { "epoch": 1.4241839687293771, "grad_norm": 0.385649710893631, "learning_rate": 1.0505440208470821e-05, "loss": 0.0441, "step": 28055 }, { "epoch": 1.4244377887202395, "grad_norm": 0.43290475010871887, "learning_rate": 1.0503748075198403e-05, "loss": 0.0464, "step": 28060 }, { "epoch": 1.4246916087111021, "grad_norm": 0.37711504101753235, "learning_rate": 1.0502055941925987e-05, "loss": 0.0407, "step": 28065 }, { "epoch": 1.4249454287019645, "grad_norm": 0.29967761039733887, "learning_rate": 1.050036380865357e-05, "loss": 0.0472, "step": 28070 }, { "epoch": 1.425199248692827, "grad_norm": 0.3553757667541504, "learning_rate": 1.0498671675381154e-05, "loss": 0.0432, "step": 28075 }, { "epoch": 1.4254530686836895, "grad_norm": 0.5949152708053589, "learning_rate": 1.0496979542108739e-05, "loss": 0.0441, "step": 28080 }, { "epoch": 1.425706888674552, "grad_norm": 0.45510491728782654, "learning_rate": 1.049528740883632e-05, "loss": 0.0473, "step": 28085 }, { "epoch": 1.4259607086654145, "grad_norm": 1.5510005950927734, "learning_rate": 1.0493595275563904e-05, "loss": 0.0497, "step": 28090 }, { "epoch": 1.4262145286562768, "grad_norm": 0.4228087365627289, "learning_rate": 1.0491903142291488e-05, "loss": 0.0436, "step": 28095 }, { "epoch": 1.4264683486471395, "grad_norm": 0.45731356739997864, "learning_rate": 1.0490211009019071e-05, "loss": 0.0584, "step": 28100 }, { "epoch": 1.426722168638002, "grad_norm": 0.3750070631504059, "learning_rate": 1.0488518875746653e-05, "loss": 0.0477, "step": 28105 }, { "epoch": 1.4269759886288644, "grad_norm": 0.530326783657074, "learning_rate": 1.0486826742474238e-05, "loss": 0.0491, "step": 28110 }, { "epoch": 1.4272298086197268, "grad_norm": 0.45974087715148926, "learning_rate": 1.0485134609201822e-05, "loss": 0.0503, "step": 28115 }, { "epoch": 1.4274836286105894, "grad_norm": 0.3488108217716217, "learning_rate": 1.0483442475929404e-05, "loss": 0.0493, "step": 28120 }, { "epoch": 1.4277374486014518, "grad_norm": 0.3617185950279236, "learning_rate": 1.0481750342656989e-05, "loss": 0.0463, "step": 28125 }, { "epoch": 1.4279912685923144, "grad_norm": 0.4106167256832123, "learning_rate": 1.0480058209384571e-05, "loss": 0.0542, "step": 28130 }, { "epoch": 1.4282450885831768, "grad_norm": 0.3748033940792084, "learning_rate": 1.0478366076112156e-05, "loss": 0.0385, "step": 28135 }, { "epoch": 1.4284989085740394, "grad_norm": 0.3587561547756195, "learning_rate": 1.047667394283974e-05, "loss": 0.0561, "step": 28140 }, { "epoch": 1.4287527285649018, "grad_norm": 0.3485838770866394, "learning_rate": 1.0474981809567322e-05, "loss": 0.0468, "step": 28145 }, { "epoch": 1.4290065485557641, "grad_norm": 0.6947336196899414, "learning_rate": 1.0473289676294907e-05, "loss": 0.0433, "step": 28150 }, { "epoch": 1.4292603685466267, "grad_norm": 0.3088648319244385, "learning_rate": 1.0471597543022489e-05, "loss": 0.0476, "step": 28155 }, { "epoch": 1.4295141885374893, "grad_norm": 0.38387876749038696, "learning_rate": 1.0469905409750072e-05, "loss": 0.0406, "step": 28160 }, { "epoch": 1.4297680085283517, "grad_norm": 0.3302471339702606, "learning_rate": 1.0468213276477657e-05, "loss": 0.0406, "step": 28165 }, { "epoch": 1.430021828519214, "grad_norm": 0.9328307509422302, "learning_rate": 1.046652114320524e-05, "loss": 0.0459, "step": 28170 }, { "epoch": 1.4302756485100767, "grad_norm": 0.3534489870071411, "learning_rate": 1.0464829009932825e-05, "loss": 0.0384, "step": 28175 }, { "epoch": 1.430529468500939, "grad_norm": 0.2702120542526245, "learning_rate": 1.0463136876660406e-05, "loss": 0.0431, "step": 28180 }, { "epoch": 1.4307832884918017, "grad_norm": 0.9567867517471313, "learning_rate": 1.046144474338799e-05, "loss": 0.0561, "step": 28185 }, { "epoch": 1.431037108482664, "grad_norm": 0.35010412335395813, "learning_rate": 1.0459752610115575e-05, "loss": 0.0548, "step": 28190 }, { "epoch": 1.4312909284735267, "grad_norm": 0.6569327712059021, "learning_rate": 1.0458060476843157e-05, "loss": 0.0461, "step": 28195 }, { "epoch": 1.431544748464389, "grad_norm": 0.305759996175766, "learning_rate": 1.045636834357074e-05, "loss": 0.0475, "step": 28200 }, { "epoch": 1.4317985684552514, "grad_norm": 0.36200013756752014, "learning_rate": 1.0454676210298324e-05, "loss": 0.0469, "step": 28205 }, { "epoch": 1.432052388446114, "grad_norm": 0.3904263377189636, "learning_rate": 1.0452984077025908e-05, "loss": 0.0448, "step": 28210 }, { "epoch": 1.4323062084369764, "grad_norm": 0.7225484251976013, "learning_rate": 1.045129194375349e-05, "loss": 0.0452, "step": 28215 }, { "epoch": 1.432560028427839, "grad_norm": 0.4833544194698334, "learning_rate": 1.0449599810481075e-05, "loss": 0.0476, "step": 28220 }, { "epoch": 1.4328138484187014, "grad_norm": 0.39925500750541687, "learning_rate": 1.0447907677208657e-05, "loss": 0.0342, "step": 28225 }, { "epoch": 1.433067668409564, "grad_norm": 0.26874077320098877, "learning_rate": 1.0446215543936242e-05, "loss": 0.0444, "step": 28230 }, { "epoch": 1.4333214884004264, "grad_norm": 0.3140677809715271, "learning_rate": 1.0444523410663825e-05, "loss": 0.037, "step": 28235 }, { "epoch": 1.4335753083912888, "grad_norm": 0.3089941740036011, "learning_rate": 1.0442831277391407e-05, "loss": 0.0428, "step": 28240 }, { "epoch": 1.4338291283821514, "grad_norm": 0.7370154857635498, "learning_rate": 1.0441139144118992e-05, "loss": 0.0417, "step": 28245 }, { "epoch": 1.434082948373014, "grad_norm": 0.2998804748058319, "learning_rate": 1.0439447010846574e-05, "loss": 0.0415, "step": 28250 }, { "epoch": 1.4343367683638764, "grad_norm": 0.2329416424036026, "learning_rate": 1.0437754877574158e-05, "loss": 0.0479, "step": 28255 }, { "epoch": 1.4345905883547387, "grad_norm": 0.3600409924983978, "learning_rate": 1.0436062744301743e-05, "loss": 0.052, "step": 28260 }, { "epoch": 1.4348444083456013, "grad_norm": 0.3627881705760956, "learning_rate": 1.0434370611029325e-05, "loss": 0.0457, "step": 28265 }, { "epoch": 1.4350982283364637, "grad_norm": 0.3902629017829895, "learning_rate": 1.0432678477756908e-05, "loss": 0.0484, "step": 28270 }, { "epoch": 1.4353520483273263, "grad_norm": 0.42769598960876465, "learning_rate": 1.0430986344484492e-05, "loss": 0.0504, "step": 28275 }, { "epoch": 1.4356058683181887, "grad_norm": 0.3344421982765198, "learning_rate": 1.0429294211212076e-05, "loss": 0.047, "step": 28280 }, { "epoch": 1.4358596883090513, "grad_norm": 0.24225115776062012, "learning_rate": 1.042760207793966e-05, "loss": 0.0381, "step": 28285 }, { "epoch": 1.4361135082999137, "grad_norm": 0.47621268033981323, "learning_rate": 1.0425909944667243e-05, "loss": 0.0483, "step": 28290 }, { "epoch": 1.436367328290776, "grad_norm": 0.3224206566810608, "learning_rate": 1.0424217811394826e-05, "loss": 0.0391, "step": 28295 }, { "epoch": 1.4366211482816387, "grad_norm": 0.6168380379676819, "learning_rate": 1.042252567812241e-05, "loss": 0.0455, "step": 28300 }, { "epoch": 1.4368749682725013, "grad_norm": 0.6423296928405762, "learning_rate": 1.0420833544849993e-05, "loss": 0.0557, "step": 28305 }, { "epoch": 1.4371287882633637, "grad_norm": 0.4000137150287628, "learning_rate": 1.0419141411577575e-05, "loss": 0.0383, "step": 28310 }, { "epoch": 1.437382608254226, "grad_norm": 0.8485282063484192, "learning_rate": 1.041744927830516e-05, "loss": 0.0499, "step": 28315 }, { "epoch": 1.4376364282450886, "grad_norm": 0.4467218816280365, "learning_rate": 1.0415757145032744e-05, "loss": 0.039, "step": 28320 }, { "epoch": 1.437890248235951, "grad_norm": 0.5513525009155273, "learning_rate": 1.0414065011760327e-05, "loss": 0.0505, "step": 28325 }, { "epoch": 1.4381440682268136, "grad_norm": 0.36793988943099976, "learning_rate": 1.0412372878487911e-05, "loss": 0.0448, "step": 28330 }, { "epoch": 1.438397888217676, "grad_norm": 0.500225305557251, "learning_rate": 1.0410680745215493e-05, "loss": 0.0443, "step": 28335 }, { "epoch": 1.4386517082085386, "grad_norm": 0.4077879786491394, "learning_rate": 1.0408988611943078e-05, "loss": 0.0456, "step": 28340 }, { "epoch": 1.438905528199401, "grad_norm": 0.28105783462524414, "learning_rate": 1.0407296478670662e-05, "loss": 0.0425, "step": 28345 }, { "epoch": 1.4391593481902634, "grad_norm": 1.5346609354019165, "learning_rate": 1.0405604345398243e-05, "loss": 0.0493, "step": 28350 }, { "epoch": 1.439413168181126, "grad_norm": 0.256663978099823, "learning_rate": 1.0403912212125829e-05, "loss": 0.0445, "step": 28355 }, { "epoch": 1.4396669881719883, "grad_norm": 0.5210541486740112, "learning_rate": 1.040222007885341e-05, "loss": 0.0441, "step": 28360 }, { "epoch": 1.439920808162851, "grad_norm": 0.3798723518848419, "learning_rate": 1.0400527945580994e-05, "loss": 0.048, "step": 28365 }, { "epoch": 1.4401746281537133, "grad_norm": 0.3566092550754547, "learning_rate": 1.039883581230858e-05, "loss": 0.0417, "step": 28370 }, { "epoch": 1.440428448144576, "grad_norm": 0.358541339635849, "learning_rate": 1.0397143679036161e-05, "loss": 0.0444, "step": 28375 }, { "epoch": 1.4406822681354383, "grad_norm": 0.3433448374271393, "learning_rate": 1.0395451545763746e-05, "loss": 0.0462, "step": 28380 }, { "epoch": 1.4409360881263007, "grad_norm": 0.3503459095954895, "learning_rate": 1.0393759412491328e-05, "loss": 0.0598, "step": 28385 }, { "epoch": 1.4411899081171633, "grad_norm": 0.5386857986450195, "learning_rate": 1.0392067279218912e-05, "loss": 0.0478, "step": 28390 }, { "epoch": 1.441443728108026, "grad_norm": 0.39092475175857544, "learning_rate": 1.0390375145946497e-05, "loss": 0.0463, "step": 28395 }, { "epoch": 1.4416975480988883, "grad_norm": 1.031701922416687, "learning_rate": 1.0388683012674079e-05, "loss": 0.0485, "step": 28400 }, { "epoch": 1.4419513680897507, "grad_norm": 0.32506614923477173, "learning_rate": 1.038699087940166e-05, "loss": 0.0484, "step": 28405 }, { "epoch": 1.4422051880806133, "grad_norm": 0.4326903522014618, "learning_rate": 1.0385298746129246e-05, "loss": 0.0486, "step": 28410 }, { "epoch": 1.4424590080714756, "grad_norm": 0.36183029413223267, "learning_rate": 1.038360661285683e-05, "loss": 0.0436, "step": 28415 }, { "epoch": 1.4427128280623382, "grad_norm": 0.37883949279785156, "learning_rate": 1.0381914479584415e-05, "loss": 0.0523, "step": 28420 }, { "epoch": 1.4429666480532006, "grad_norm": 0.26860812306404114, "learning_rate": 1.0380222346311997e-05, "loss": 0.0474, "step": 28425 }, { "epoch": 1.4432204680440632, "grad_norm": 0.706049382686615, "learning_rate": 1.0378530213039579e-05, "loss": 0.0511, "step": 28430 }, { "epoch": 1.4434742880349256, "grad_norm": 0.628073513507843, "learning_rate": 1.0376838079767164e-05, "loss": 0.0491, "step": 28435 }, { "epoch": 1.443728108025788, "grad_norm": 0.45285552740097046, "learning_rate": 1.0375145946494747e-05, "loss": 0.0448, "step": 28440 }, { "epoch": 1.4439819280166506, "grad_norm": 0.22899897396564484, "learning_rate": 1.0373453813222329e-05, "loss": 0.0463, "step": 28445 }, { "epoch": 1.4442357480075132, "grad_norm": 0.2924014627933502, "learning_rate": 1.0371761679949914e-05, "loss": 0.0488, "step": 28450 }, { "epoch": 1.4444895679983756, "grad_norm": 0.2901057302951813, "learning_rate": 1.0370069546677496e-05, "loss": 0.043, "step": 28455 }, { "epoch": 1.444743387989238, "grad_norm": 0.24489150941371918, "learning_rate": 1.036837741340508e-05, "loss": 0.0444, "step": 28460 }, { "epoch": 1.4449972079801006, "grad_norm": 0.2966690957546234, "learning_rate": 1.0366685280132665e-05, "loss": 0.0572, "step": 28465 }, { "epoch": 1.445251027970963, "grad_norm": 0.2543412744998932, "learning_rate": 1.0364993146860247e-05, "loss": 0.0413, "step": 28470 }, { "epoch": 1.4455048479618255, "grad_norm": 0.3248431384563446, "learning_rate": 1.0363301013587832e-05, "loss": 0.0474, "step": 28475 }, { "epoch": 1.445758667952688, "grad_norm": 0.2841273248195648, "learning_rate": 1.0361608880315414e-05, "loss": 0.0456, "step": 28480 }, { "epoch": 1.4460124879435505, "grad_norm": 0.7544896006584167, "learning_rate": 1.0359916747042997e-05, "loss": 0.0529, "step": 28485 }, { "epoch": 1.446266307934413, "grad_norm": 0.3466797173023224, "learning_rate": 1.0358224613770583e-05, "loss": 0.043, "step": 28490 }, { "epoch": 1.4465201279252753, "grad_norm": 0.3613210618495941, "learning_rate": 1.0356532480498165e-05, "loss": 0.0458, "step": 28495 }, { "epoch": 1.446773947916138, "grad_norm": 0.32847079634666443, "learning_rate": 1.0354840347225748e-05, "loss": 0.0359, "step": 28500 }, { "epoch": 1.4470277679070003, "grad_norm": 0.9837027192115784, "learning_rate": 1.0353148213953332e-05, "loss": 0.044, "step": 28505 }, { "epoch": 1.4472815878978629, "grad_norm": 0.2532666027545929, "learning_rate": 1.0351456080680915e-05, "loss": 0.0427, "step": 28510 }, { "epoch": 1.4475354078887253, "grad_norm": 0.430247038602829, "learning_rate": 1.0349763947408497e-05, "loss": 0.0456, "step": 28515 }, { "epoch": 1.4477892278795879, "grad_norm": 0.6153171062469482, "learning_rate": 1.0348071814136082e-05, "loss": 0.0404, "step": 28520 }, { "epoch": 1.4480430478704502, "grad_norm": 0.24143315851688385, "learning_rate": 1.0346379680863666e-05, "loss": 0.0368, "step": 28525 }, { "epoch": 1.4482968678613126, "grad_norm": 0.36326682567596436, "learning_rate": 1.034468754759125e-05, "loss": 0.0453, "step": 28530 }, { "epoch": 1.4485506878521752, "grad_norm": 0.34203532338142395, "learning_rate": 1.0342995414318833e-05, "loss": 0.042, "step": 28535 }, { "epoch": 1.4488045078430378, "grad_norm": 0.6145029664039612, "learning_rate": 1.0341303281046415e-05, "loss": 0.0521, "step": 28540 }, { "epoch": 1.4490583278339002, "grad_norm": 0.3803384602069855, "learning_rate": 1.0339611147774e-05, "loss": 0.0453, "step": 28545 }, { "epoch": 1.4493121478247626, "grad_norm": 0.36794525384902954, "learning_rate": 1.0337919014501584e-05, "loss": 0.0524, "step": 28550 }, { "epoch": 1.4495659678156252, "grad_norm": 0.28114748001098633, "learning_rate": 1.0336226881229165e-05, "loss": 0.0347, "step": 28555 }, { "epoch": 1.4498197878064876, "grad_norm": 0.7040995359420776, "learning_rate": 1.033453474795675e-05, "loss": 0.0548, "step": 28560 }, { "epoch": 1.4500736077973502, "grad_norm": 0.3672772943973541, "learning_rate": 1.0332842614684333e-05, "loss": 0.053, "step": 28565 }, { "epoch": 1.4503274277882126, "grad_norm": 0.29484474658966064, "learning_rate": 1.0331150481411918e-05, "loss": 0.0462, "step": 28570 }, { "epoch": 1.4505812477790752, "grad_norm": 0.32553452253341675, "learning_rate": 1.0329458348139501e-05, "loss": 0.0391, "step": 28575 }, { "epoch": 1.4508350677699375, "grad_norm": 0.32741355895996094, "learning_rate": 1.0327766214867083e-05, "loss": 0.0424, "step": 28580 }, { "epoch": 1.4510888877608, "grad_norm": 0.3792947232723236, "learning_rate": 1.0326074081594668e-05, "loss": 0.0457, "step": 28585 }, { "epoch": 1.4513427077516625, "grad_norm": 0.47558140754699707, "learning_rate": 1.032438194832225e-05, "loss": 0.0485, "step": 28590 }, { "epoch": 1.4515965277425251, "grad_norm": 0.4144081175327301, "learning_rate": 1.0322689815049834e-05, "loss": 0.0492, "step": 28595 }, { "epoch": 1.4518503477333875, "grad_norm": 0.41987255215644836, "learning_rate": 1.0320997681777419e-05, "loss": 0.0488, "step": 28600 }, { "epoch": 1.4521041677242499, "grad_norm": 0.3136137127876282, "learning_rate": 1.0319305548505001e-05, "loss": 0.0441, "step": 28605 }, { "epoch": 1.4523579877151125, "grad_norm": 0.4296714663505554, "learning_rate": 1.0317613415232583e-05, "loss": 0.0501, "step": 28610 }, { "epoch": 1.4526118077059749, "grad_norm": 0.3685763478279114, "learning_rate": 1.0315921281960168e-05, "loss": 0.0526, "step": 28615 }, { "epoch": 1.4528656276968375, "grad_norm": 0.426189661026001, "learning_rate": 1.0314229148687752e-05, "loss": 0.0493, "step": 28620 }, { "epoch": 1.4531194476876999, "grad_norm": 0.38130417466163635, "learning_rate": 1.0312537015415337e-05, "loss": 0.0429, "step": 28625 }, { "epoch": 1.4533732676785625, "grad_norm": 0.4051515460014343, "learning_rate": 1.0310844882142919e-05, "loss": 0.0481, "step": 28630 }, { "epoch": 1.4536270876694248, "grad_norm": 0.4713396728038788, "learning_rate": 1.03091527488705e-05, "loss": 0.0461, "step": 28635 }, { "epoch": 1.4538809076602872, "grad_norm": 0.30494242906570435, "learning_rate": 1.0307460615598086e-05, "loss": 0.0418, "step": 28640 }, { "epoch": 1.4541347276511498, "grad_norm": 0.3397122621536255, "learning_rate": 1.030576848232567e-05, "loss": 0.0505, "step": 28645 }, { "epoch": 1.4543885476420124, "grad_norm": 0.4459775686264038, "learning_rate": 1.0304076349053251e-05, "loss": 0.0471, "step": 28650 }, { "epoch": 1.4546423676328748, "grad_norm": 0.6436803936958313, "learning_rate": 1.0302384215780836e-05, "loss": 0.0447, "step": 28655 }, { "epoch": 1.4548961876237372, "grad_norm": 0.3298328220844269, "learning_rate": 1.0300692082508418e-05, "loss": 0.051, "step": 28660 }, { "epoch": 1.4551500076145998, "grad_norm": 0.3109586238861084, "learning_rate": 1.0298999949236003e-05, "loss": 0.0456, "step": 28665 }, { "epoch": 1.4554038276054622, "grad_norm": 0.39373233914375305, "learning_rate": 1.0297307815963587e-05, "loss": 0.0458, "step": 28670 }, { "epoch": 1.4556576475963248, "grad_norm": 0.34695670008659363, "learning_rate": 1.0295615682691169e-05, "loss": 0.045, "step": 28675 }, { "epoch": 1.4559114675871871, "grad_norm": 0.369632750749588, "learning_rate": 1.0293923549418754e-05, "loss": 0.0497, "step": 28680 }, { "epoch": 1.4561652875780497, "grad_norm": 0.41509467363357544, "learning_rate": 1.0292231416146336e-05, "loss": 0.0525, "step": 28685 }, { "epoch": 1.4564191075689121, "grad_norm": 0.4681684672832489, "learning_rate": 1.029053928287392e-05, "loss": 0.0543, "step": 28690 }, { "epoch": 1.4566729275597745, "grad_norm": 0.46059858798980713, "learning_rate": 1.0288847149601505e-05, "loss": 0.0432, "step": 28695 }, { "epoch": 1.4569267475506371, "grad_norm": 0.33197858929634094, "learning_rate": 1.0287155016329087e-05, "loss": 0.0458, "step": 28700 }, { "epoch": 1.4571805675414995, "grad_norm": 0.3484487235546112, "learning_rate": 1.028546288305667e-05, "loss": 0.0455, "step": 28705 }, { "epoch": 1.457434387532362, "grad_norm": 0.3765062689781189, "learning_rate": 1.0283770749784254e-05, "loss": 0.0399, "step": 28710 }, { "epoch": 1.4576882075232245, "grad_norm": 0.3412276804447174, "learning_rate": 1.0282078616511837e-05, "loss": 0.0444, "step": 28715 }, { "epoch": 1.457942027514087, "grad_norm": 0.5508232116699219, "learning_rate": 1.0280386483239422e-05, "loss": 0.0599, "step": 28720 }, { "epoch": 1.4581958475049495, "grad_norm": 0.294949471950531, "learning_rate": 1.0278694349967004e-05, "loss": 0.0422, "step": 28725 }, { "epoch": 1.4584496674958118, "grad_norm": 0.501362681388855, "learning_rate": 1.0277002216694588e-05, "loss": 0.0411, "step": 28730 }, { "epoch": 1.4587034874866744, "grad_norm": 0.3812694847583771, "learning_rate": 1.0275310083422171e-05, "loss": 0.0492, "step": 28735 }, { "epoch": 1.458957307477537, "grad_norm": 0.3240166902542114, "learning_rate": 1.0273617950149755e-05, "loss": 0.0436, "step": 28740 }, { "epoch": 1.4592111274683994, "grad_norm": 0.3954068720340729, "learning_rate": 1.0271925816877337e-05, "loss": 0.0529, "step": 28745 }, { "epoch": 1.4594649474592618, "grad_norm": 0.2987024486064911, "learning_rate": 1.0270233683604922e-05, "loss": 0.0485, "step": 28750 }, { "epoch": 1.4597187674501244, "grad_norm": 0.4231718182563782, "learning_rate": 1.0268541550332506e-05, "loss": 0.0434, "step": 28755 }, { "epoch": 1.4599725874409868, "grad_norm": 1.153788447380066, "learning_rate": 1.0266849417060087e-05, "loss": 0.041, "step": 28760 }, { "epoch": 1.4602264074318494, "grad_norm": 0.3164049983024597, "learning_rate": 1.0265157283787673e-05, "loss": 0.0395, "step": 28765 }, { "epoch": 1.4604802274227118, "grad_norm": 0.36508455872535706, "learning_rate": 1.0263465150515254e-05, "loss": 0.0499, "step": 28770 }, { "epoch": 1.4607340474135744, "grad_norm": 0.26266711950302124, "learning_rate": 1.026177301724284e-05, "loss": 0.0441, "step": 28775 }, { "epoch": 1.4609878674044368, "grad_norm": 0.29270073771476746, "learning_rate": 1.0260080883970423e-05, "loss": 0.0472, "step": 28780 }, { "epoch": 1.4612416873952991, "grad_norm": 0.302064448595047, "learning_rate": 1.0258388750698005e-05, "loss": 0.0453, "step": 28785 }, { "epoch": 1.4614955073861617, "grad_norm": 0.2645045220851898, "learning_rate": 1.025669661742559e-05, "loss": 0.0443, "step": 28790 }, { "epoch": 1.4617493273770243, "grad_norm": 0.3246147334575653, "learning_rate": 1.0255004484153172e-05, "loss": 0.0364, "step": 28795 }, { "epoch": 1.4620031473678867, "grad_norm": 0.3109263479709625, "learning_rate": 1.0253312350880756e-05, "loss": 0.0487, "step": 28800 }, { "epoch": 1.462256967358749, "grad_norm": 0.3940425217151642, "learning_rate": 1.0251620217608341e-05, "loss": 0.0499, "step": 28805 }, { "epoch": 1.4625107873496117, "grad_norm": 0.38812288641929626, "learning_rate": 1.0249928084335923e-05, "loss": 0.0474, "step": 28810 }, { "epoch": 1.462764607340474, "grad_norm": 0.2860491871833801, "learning_rate": 1.0248235951063508e-05, "loss": 0.0391, "step": 28815 }, { "epoch": 1.4630184273313367, "grad_norm": 0.4146775007247925, "learning_rate": 1.024654381779109e-05, "loss": 0.0463, "step": 28820 }, { "epoch": 1.463272247322199, "grad_norm": 0.5292524695396423, "learning_rate": 1.0244851684518673e-05, "loss": 0.0467, "step": 28825 }, { "epoch": 1.4635260673130617, "grad_norm": 0.2877132296562195, "learning_rate": 1.0243159551246257e-05, "loss": 0.0435, "step": 28830 }, { "epoch": 1.463779887303924, "grad_norm": 0.31168654561042786, "learning_rate": 1.024146741797384e-05, "loss": 0.039, "step": 28835 }, { "epoch": 1.4640337072947864, "grad_norm": 0.37059271335601807, "learning_rate": 1.0239775284701422e-05, "loss": 0.0513, "step": 28840 }, { "epoch": 1.464287527285649, "grad_norm": 0.3526490330696106, "learning_rate": 1.0238083151429008e-05, "loss": 0.0445, "step": 28845 }, { "epoch": 1.4645413472765114, "grad_norm": 0.43100833892822266, "learning_rate": 1.0236391018156591e-05, "loss": 0.047, "step": 28850 }, { "epoch": 1.464795167267374, "grad_norm": 0.45979759097099304, "learning_rate": 1.0234698884884173e-05, "loss": 0.0461, "step": 28855 }, { "epoch": 1.4650489872582364, "grad_norm": 0.34121495485305786, "learning_rate": 1.0233006751611758e-05, "loss": 0.0493, "step": 28860 }, { "epoch": 1.465302807249099, "grad_norm": 0.6851451396942139, "learning_rate": 1.023131461833934e-05, "loss": 0.0424, "step": 28865 }, { "epoch": 1.4655566272399614, "grad_norm": 0.3656606674194336, "learning_rate": 1.0229622485066925e-05, "loss": 0.053, "step": 28870 }, { "epoch": 1.4658104472308238, "grad_norm": 0.31337806582450867, "learning_rate": 1.0227930351794509e-05, "loss": 0.047, "step": 28875 }, { "epoch": 1.4660642672216864, "grad_norm": 0.4288000464439392, "learning_rate": 1.022623821852209e-05, "loss": 0.0553, "step": 28880 }, { "epoch": 1.466318087212549, "grad_norm": 0.34418991208076477, "learning_rate": 1.0224546085249676e-05, "loss": 0.0542, "step": 28885 }, { "epoch": 1.4665719072034114, "grad_norm": 0.40049436688423157, "learning_rate": 1.0222853951977258e-05, "loss": 0.0441, "step": 28890 }, { "epoch": 1.4668257271942737, "grad_norm": 0.35026365518569946, "learning_rate": 1.0221161818704841e-05, "loss": 0.0461, "step": 28895 }, { "epoch": 1.4670795471851363, "grad_norm": 0.24924367666244507, "learning_rate": 1.0219469685432427e-05, "loss": 0.0509, "step": 28900 }, { "epoch": 1.4673333671759987, "grad_norm": 0.2462109625339508, "learning_rate": 1.0217777552160008e-05, "loss": 0.0362, "step": 28905 }, { "epoch": 1.4675871871668613, "grad_norm": 0.49023517966270447, "learning_rate": 1.0216085418887594e-05, "loss": 0.044, "step": 28910 }, { "epoch": 1.4678410071577237, "grad_norm": 0.24693015217781067, "learning_rate": 1.0214393285615176e-05, "loss": 0.0455, "step": 28915 }, { "epoch": 1.4680948271485863, "grad_norm": 0.31813114881515503, "learning_rate": 1.0212701152342759e-05, "loss": 0.0462, "step": 28920 }, { "epoch": 1.4683486471394487, "grad_norm": 0.367294579744339, "learning_rate": 1.0211009019070344e-05, "loss": 0.0441, "step": 28925 }, { "epoch": 1.468602467130311, "grad_norm": 0.48306939005851746, "learning_rate": 1.0209316885797926e-05, "loss": 0.0578, "step": 28930 }, { "epoch": 1.4688562871211737, "grad_norm": 0.6266639232635498, "learning_rate": 1.020762475252551e-05, "loss": 0.0431, "step": 28935 }, { "epoch": 1.4691101071120363, "grad_norm": 0.4411011338233948, "learning_rate": 1.0205932619253093e-05, "loss": 0.0436, "step": 28940 }, { "epoch": 1.4693639271028986, "grad_norm": 0.6849778294563293, "learning_rate": 1.0204240485980677e-05, "loss": 0.0499, "step": 28945 }, { "epoch": 1.469617747093761, "grad_norm": 0.2983316481113434, "learning_rate": 1.0202548352708259e-05, "loss": 0.0375, "step": 28950 }, { "epoch": 1.4698715670846236, "grad_norm": 0.3210887908935547, "learning_rate": 1.0200856219435844e-05, "loss": 0.0518, "step": 28955 }, { "epoch": 1.470125387075486, "grad_norm": 0.4435349404811859, "learning_rate": 1.0199164086163427e-05, "loss": 0.0461, "step": 28960 }, { "epoch": 1.4703792070663486, "grad_norm": 0.4102541208267212, "learning_rate": 1.0197471952891011e-05, "loss": 0.0443, "step": 28965 }, { "epoch": 1.470633027057211, "grad_norm": 0.3138459324836731, "learning_rate": 1.0195779819618595e-05, "loss": 0.0485, "step": 28970 }, { "epoch": 1.4708868470480736, "grad_norm": 0.46446776390075684, "learning_rate": 1.0194087686346176e-05, "loss": 0.0462, "step": 28975 }, { "epoch": 1.471140667038936, "grad_norm": 0.3791068494319916, "learning_rate": 1.0192395553073762e-05, "loss": 0.0433, "step": 28980 }, { "epoch": 1.4713944870297984, "grad_norm": 0.4010803997516632, "learning_rate": 1.0190703419801345e-05, "loss": 0.0515, "step": 28985 }, { "epoch": 1.471648307020661, "grad_norm": 0.3441152274608612, "learning_rate": 1.0189011286528927e-05, "loss": 0.0498, "step": 28990 }, { "epoch": 1.4719021270115236, "grad_norm": 0.40290719270706177, "learning_rate": 1.0187319153256512e-05, "loss": 0.0454, "step": 28995 }, { "epoch": 1.472155947002386, "grad_norm": 0.9115328192710876, "learning_rate": 1.0185627019984094e-05, "loss": 0.0412, "step": 29000 }, { "epoch": 1.4724097669932483, "grad_norm": 0.3189408779144287, "learning_rate": 1.0183934886711678e-05, "loss": 0.0493, "step": 29005 }, { "epoch": 1.472663586984111, "grad_norm": 0.3260115385055542, "learning_rate": 1.0182242753439263e-05, "loss": 0.0418, "step": 29010 }, { "epoch": 1.4729174069749733, "grad_norm": 0.3087772727012634, "learning_rate": 1.0180550620166845e-05, "loss": 0.0489, "step": 29015 }, { "epoch": 1.473171226965836, "grad_norm": 0.39418232440948486, "learning_rate": 1.017885848689443e-05, "loss": 0.0396, "step": 29020 }, { "epoch": 1.4734250469566983, "grad_norm": 0.2795569896697998, "learning_rate": 1.0177166353622012e-05, "loss": 0.0512, "step": 29025 }, { "epoch": 1.473678866947561, "grad_norm": 0.5018810629844666, "learning_rate": 1.0175474220349595e-05, "loss": 0.0571, "step": 29030 }, { "epoch": 1.4739326869384233, "grad_norm": 0.49596336483955383, "learning_rate": 1.0173782087077179e-05, "loss": 0.0487, "step": 29035 }, { "epoch": 1.4741865069292857, "grad_norm": 0.3247216045856476, "learning_rate": 1.0172089953804762e-05, "loss": 0.043, "step": 29040 }, { "epoch": 1.4744403269201483, "grad_norm": 0.43180331587791443, "learning_rate": 1.0170397820532344e-05, "loss": 0.0447, "step": 29045 }, { "epoch": 1.4746941469110106, "grad_norm": 0.44530582427978516, "learning_rate": 1.016870568725993e-05, "loss": 0.0385, "step": 29050 }, { "epoch": 1.4749479669018732, "grad_norm": 0.37632861733436584, "learning_rate": 1.0167013553987513e-05, "loss": 0.0455, "step": 29055 }, { "epoch": 1.4752017868927356, "grad_norm": 0.3150545060634613, "learning_rate": 1.0165321420715097e-05, "loss": 0.0464, "step": 29060 }, { "epoch": 1.4754556068835982, "grad_norm": 0.32982978224754333, "learning_rate": 1.016362928744268e-05, "loss": 0.0439, "step": 29065 }, { "epoch": 1.4757094268744606, "grad_norm": 0.22459256649017334, "learning_rate": 1.0161937154170262e-05, "loss": 0.0457, "step": 29070 }, { "epoch": 1.475963246865323, "grad_norm": 0.6925598978996277, "learning_rate": 1.0160245020897847e-05, "loss": 0.0487, "step": 29075 }, { "epoch": 1.4762170668561856, "grad_norm": 0.2393013834953308, "learning_rate": 1.015855288762543e-05, "loss": 0.0414, "step": 29080 }, { "epoch": 1.4764708868470482, "grad_norm": 0.40662673115730286, "learning_rate": 1.0156860754353013e-05, "loss": 0.0491, "step": 29085 }, { "epoch": 1.4767247068379106, "grad_norm": 0.35922446846961975, "learning_rate": 1.0155168621080598e-05, "loss": 0.0472, "step": 29090 }, { "epoch": 1.476978526828773, "grad_norm": 0.284529447555542, "learning_rate": 1.015347648780818e-05, "loss": 0.046, "step": 29095 }, { "epoch": 1.4772323468196356, "grad_norm": 0.3706730604171753, "learning_rate": 1.0151784354535763e-05, "loss": 0.05, "step": 29100 }, { "epoch": 1.477486166810498, "grad_norm": 0.24887824058532715, "learning_rate": 1.0150092221263349e-05, "loss": 0.0528, "step": 29105 }, { "epoch": 1.4777399868013605, "grad_norm": 0.458808571100235, "learning_rate": 1.014840008799093e-05, "loss": 0.0519, "step": 29110 }, { "epoch": 1.477993806792223, "grad_norm": 0.32688531279563904, "learning_rate": 1.0146707954718516e-05, "loss": 0.0529, "step": 29115 }, { "epoch": 1.4782476267830855, "grad_norm": 0.22939306497573853, "learning_rate": 1.0145015821446097e-05, "loss": 0.0368, "step": 29120 }, { "epoch": 1.478501446773948, "grad_norm": 0.38046857714653015, "learning_rate": 1.0143323688173681e-05, "loss": 0.0492, "step": 29125 }, { "epoch": 1.4787552667648103, "grad_norm": 0.5389629602432251, "learning_rate": 1.0141631554901266e-05, "loss": 0.049, "step": 29130 }, { "epoch": 1.4790090867556729, "grad_norm": 0.6151430606842041, "learning_rate": 1.0139939421628848e-05, "loss": 0.0551, "step": 29135 }, { "epoch": 1.4792629067465355, "grad_norm": 0.33038952946662903, "learning_rate": 1.0138247288356432e-05, "loss": 0.0449, "step": 29140 }, { "epoch": 1.4795167267373979, "grad_norm": 0.388681024312973, "learning_rate": 1.0136555155084015e-05, "loss": 0.0511, "step": 29145 }, { "epoch": 1.4797705467282602, "grad_norm": 0.3388515114784241, "learning_rate": 1.0134863021811599e-05, "loss": 0.0479, "step": 29150 }, { "epoch": 1.4800243667191229, "grad_norm": 0.400495707988739, "learning_rate": 1.013317088853918e-05, "loss": 0.0588, "step": 29155 }, { "epoch": 1.4802781867099852, "grad_norm": 0.3645167946815491, "learning_rate": 1.0131478755266766e-05, "loss": 0.0497, "step": 29160 }, { "epoch": 1.4805320067008478, "grad_norm": 0.3763105869293213, "learning_rate": 1.012978662199435e-05, "loss": 0.0515, "step": 29165 }, { "epoch": 1.4807858266917102, "grad_norm": 0.34039294719696045, "learning_rate": 1.0128094488721933e-05, "loss": 0.0427, "step": 29170 }, { "epoch": 1.4810396466825728, "grad_norm": 0.2790921628475189, "learning_rate": 1.0126402355449516e-05, "loss": 0.038, "step": 29175 }, { "epoch": 1.4812934666734352, "grad_norm": 0.41854605078697205, "learning_rate": 1.0124710222177098e-05, "loss": 0.0457, "step": 29180 }, { "epoch": 1.4815472866642976, "grad_norm": 0.2072964310646057, "learning_rate": 1.0123018088904684e-05, "loss": 0.0408, "step": 29185 }, { "epoch": 1.4818011066551602, "grad_norm": 0.4667484164237976, "learning_rate": 1.0121325955632267e-05, "loss": 0.0449, "step": 29190 }, { "epoch": 1.4820549266460226, "grad_norm": 0.445342481136322, "learning_rate": 1.0119633822359849e-05, "loss": 0.0531, "step": 29195 }, { "epoch": 1.4823087466368852, "grad_norm": 0.3367258608341217, "learning_rate": 1.0117941689087434e-05, "loss": 0.0494, "step": 29200 }, { "epoch": 1.4825625666277475, "grad_norm": 0.5606625080108643, "learning_rate": 1.0116249555815016e-05, "loss": 0.0381, "step": 29205 }, { "epoch": 1.4828163866186101, "grad_norm": 0.5759447813034058, "learning_rate": 1.0114557422542601e-05, "loss": 0.042, "step": 29210 }, { "epoch": 1.4830702066094725, "grad_norm": 0.5777857899665833, "learning_rate": 1.0112865289270183e-05, "loss": 0.0453, "step": 29215 }, { "epoch": 1.483324026600335, "grad_norm": 0.5903242230415344, "learning_rate": 1.0111173155997767e-05, "loss": 0.0451, "step": 29220 }, { "epoch": 1.4835778465911975, "grad_norm": 0.39073795080184937, "learning_rate": 1.0109481022725352e-05, "loss": 0.0489, "step": 29225 }, { "epoch": 1.4838316665820601, "grad_norm": 0.4036313593387604, "learning_rate": 1.0107788889452934e-05, "loss": 0.0467, "step": 29230 }, { "epoch": 1.4840854865729225, "grad_norm": 0.35615524649620056, "learning_rate": 1.0106096756180517e-05, "loss": 0.0471, "step": 29235 }, { "epoch": 1.4843393065637849, "grad_norm": 0.34579744935035706, "learning_rate": 1.0104404622908101e-05, "loss": 0.0424, "step": 29240 }, { "epoch": 1.4845931265546475, "grad_norm": 0.2929094731807709, "learning_rate": 1.0102712489635684e-05, "loss": 0.0362, "step": 29245 }, { "epoch": 1.4848469465455099, "grad_norm": 0.39636480808258057, "learning_rate": 1.0101020356363266e-05, "loss": 0.0509, "step": 29250 }, { "epoch": 1.4851007665363725, "grad_norm": 0.33898094296455383, "learning_rate": 1.0099328223090851e-05, "loss": 0.0415, "step": 29255 }, { "epoch": 1.4853545865272348, "grad_norm": 0.4324086904525757, "learning_rate": 1.0097636089818435e-05, "loss": 0.0473, "step": 29260 }, { "epoch": 1.4856084065180974, "grad_norm": 0.37893491983413696, "learning_rate": 1.0095943956546019e-05, "loss": 0.0479, "step": 29265 }, { "epoch": 1.4858622265089598, "grad_norm": 0.4576340615749359, "learning_rate": 1.0094251823273602e-05, "loss": 0.0458, "step": 29270 }, { "epoch": 1.4861160464998222, "grad_norm": 0.2759975492954254, "learning_rate": 1.0092559690001184e-05, "loss": 0.0476, "step": 29275 }, { "epoch": 1.4863698664906848, "grad_norm": 0.3189256489276886, "learning_rate": 1.009086755672877e-05, "loss": 0.0433, "step": 29280 }, { "epoch": 1.4866236864815474, "grad_norm": 0.3150682747364044, "learning_rate": 1.0089175423456353e-05, "loss": 0.0427, "step": 29285 }, { "epoch": 1.4868775064724098, "grad_norm": 0.5731126666069031, "learning_rate": 1.0087483290183935e-05, "loss": 0.0456, "step": 29290 }, { "epoch": 1.4871313264632722, "grad_norm": 0.24449680745601654, "learning_rate": 1.008579115691152e-05, "loss": 0.0426, "step": 29295 }, { "epoch": 1.4873851464541348, "grad_norm": 0.487131804227829, "learning_rate": 1.0084099023639102e-05, "loss": 0.0487, "step": 29300 }, { "epoch": 1.4876389664449972, "grad_norm": 0.37090784311294556, "learning_rate": 1.0082406890366687e-05, "loss": 0.0483, "step": 29305 }, { "epoch": 1.4878927864358598, "grad_norm": 0.32718345522880554, "learning_rate": 1.008071475709427e-05, "loss": 0.0407, "step": 29310 }, { "epoch": 1.4881466064267221, "grad_norm": 0.7287764549255371, "learning_rate": 1.0079022623821852e-05, "loss": 0.0437, "step": 29315 }, { "epoch": 1.4884004264175847, "grad_norm": 0.44735628366470337, "learning_rate": 1.0077330490549438e-05, "loss": 0.0428, "step": 29320 }, { "epoch": 1.4886542464084471, "grad_norm": 0.4608113765716553, "learning_rate": 1.007563835727702e-05, "loss": 0.0481, "step": 29325 }, { "epoch": 1.4889080663993095, "grad_norm": 0.4977395534515381, "learning_rate": 1.0073946224004603e-05, "loss": 0.0496, "step": 29330 }, { "epoch": 1.489161886390172, "grad_norm": 0.308709055185318, "learning_rate": 1.0072254090732188e-05, "loss": 0.0465, "step": 29335 }, { "epoch": 1.4894157063810345, "grad_norm": 0.4213113486766815, "learning_rate": 1.007056195745977e-05, "loss": 0.0398, "step": 29340 }, { "epoch": 1.489669526371897, "grad_norm": 0.6217330694198608, "learning_rate": 1.0068869824187354e-05, "loss": 0.0457, "step": 29345 }, { "epoch": 1.4899233463627595, "grad_norm": 0.583604097366333, "learning_rate": 1.0067177690914937e-05, "loss": 0.0477, "step": 29350 }, { "epoch": 1.490177166353622, "grad_norm": 0.36942973732948303, "learning_rate": 1.006548555764252e-05, "loss": 0.0445, "step": 29355 }, { "epoch": 1.4904309863444845, "grad_norm": 0.28825482726097107, "learning_rate": 1.0063793424370106e-05, "loss": 0.0426, "step": 29360 }, { "epoch": 1.4906848063353468, "grad_norm": 0.3700205981731415, "learning_rate": 1.0062101291097688e-05, "loss": 0.0488, "step": 29365 }, { "epoch": 1.4909386263262094, "grad_norm": 0.9633322954177856, "learning_rate": 1.0060409157825271e-05, "loss": 0.0432, "step": 29370 }, { "epoch": 1.491192446317072, "grad_norm": 0.43203192949295044, "learning_rate": 1.0058717024552855e-05, "loss": 0.0464, "step": 29375 }, { "epoch": 1.4914462663079344, "grad_norm": 0.3489255905151367, "learning_rate": 1.0057024891280438e-05, "loss": 0.0456, "step": 29380 }, { "epoch": 1.4917000862987968, "grad_norm": 0.40560370683670044, "learning_rate": 1.005533275800802e-05, "loss": 0.0448, "step": 29385 }, { "epoch": 1.4919539062896594, "grad_norm": 0.3906811475753784, "learning_rate": 1.0053640624735606e-05, "loss": 0.0447, "step": 29390 }, { "epoch": 1.4922077262805218, "grad_norm": 0.42720428109169006, "learning_rate": 1.0051948491463187e-05, "loss": 0.0376, "step": 29395 }, { "epoch": 1.4924615462713844, "grad_norm": 0.5189972519874573, "learning_rate": 1.0050256358190771e-05, "loss": 0.0532, "step": 29400 }, { "epoch": 1.4927153662622468, "grad_norm": 1.2741813659667969, "learning_rate": 1.0048564224918356e-05, "loss": 0.0445, "step": 29405 }, { "epoch": 1.4929691862531094, "grad_norm": 0.43833017349243164, "learning_rate": 1.0046872091645938e-05, "loss": 0.0491, "step": 29410 }, { "epoch": 1.4932230062439718, "grad_norm": 0.28552454710006714, "learning_rate": 1.0045179958373523e-05, "loss": 0.0411, "step": 29415 }, { "epoch": 1.4934768262348341, "grad_norm": 0.39519840478897095, "learning_rate": 1.0043487825101105e-05, "loss": 0.0441, "step": 29420 }, { "epoch": 1.4937306462256967, "grad_norm": 0.5029251575469971, "learning_rate": 1.0041795691828689e-05, "loss": 0.0521, "step": 29425 }, { "epoch": 1.4939844662165593, "grad_norm": 0.3176358640193939, "learning_rate": 1.0040103558556274e-05, "loss": 0.042, "step": 29430 }, { "epoch": 1.4942382862074217, "grad_norm": 0.3860771059989929, "learning_rate": 1.0038411425283856e-05, "loss": 0.0435, "step": 29435 }, { "epoch": 1.494492106198284, "grad_norm": 0.5486845374107361, "learning_rate": 1.003671929201144e-05, "loss": 0.0478, "step": 29440 }, { "epoch": 1.4947459261891467, "grad_norm": 0.3547370135784149, "learning_rate": 1.0035027158739023e-05, "loss": 0.0401, "step": 29445 }, { "epoch": 1.494999746180009, "grad_norm": 0.21240559220314026, "learning_rate": 1.0033335025466606e-05, "loss": 0.0438, "step": 29450 }, { "epoch": 1.4952535661708717, "grad_norm": 0.2859129309654236, "learning_rate": 1.0031642892194192e-05, "loss": 0.0405, "step": 29455 }, { "epoch": 1.495507386161734, "grad_norm": 0.6197316646575928, "learning_rate": 1.0029950758921773e-05, "loss": 0.0468, "step": 29460 }, { "epoch": 1.4957612061525967, "grad_norm": 0.46611955761909485, "learning_rate": 1.0028258625649357e-05, "loss": 0.0433, "step": 29465 }, { "epoch": 1.496015026143459, "grad_norm": 0.3373320698738098, "learning_rate": 1.002656649237694e-05, "loss": 0.0438, "step": 29470 }, { "epoch": 1.4962688461343214, "grad_norm": 0.40270277857780457, "learning_rate": 1.0024874359104524e-05, "loss": 0.0568, "step": 29475 }, { "epoch": 1.496522666125184, "grad_norm": 0.5500027537345886, "learning_rate": 1.0023182225832106e-05, "loss": 0.0428, "step": 29480 }, { "epoch": 1.4967764861160466, "grad_norm": 0.5985025763511658, "learning_rate": 1.0021490092559691e-05, "loss": 0.053, "step": 29485 }, { "epoch": 1.497030306106909, "grad_norm": 0.39436739683151245, "learning_rate": 1.0019797959287275e-05, "loss": 0.0457, "step": 29490 }, { "epoch": 1.4972841260977714, "grad_norm": 0.8494074940681458, "learning_rate": 1.0018105826014857e-05, "loss": 0.0453, "step": 29495 }, { "epoch": 1.497537946088634, "grad_norm": 0.3544905185699463, "learning_rate": 1.0016413692742442e-05, "loss": 0.0446, "step": 29500 }, { "epoch": 1.4977917660794964, "grad_norm": 0.3078000545501709, "learning_rate": 1.0014721559470024e-05, "loss": 0.0426, "step": 29505 }, { "epoch": 1.498045586070359, "grad_norm": 0.3942676782608032, "learning_rate": 1.0013029426197609e-05, "loss": 0.0475, "step": 29510 }, { "epoch": 1.4982994060612214, "grad_norm": 0.3200058341026306, "learning_rate": 1.0011337292925192e-05, "loss": 0.0461, "step": 29515 }, { "epoch": 1.498553226052084, "grad_norm": 0.4130484163761139, "learning_rate": 1.0009645159652774e-05, "loss": 0.0488, "step": 29520 }, { "epoch": 1.4988070460429463, "grad_norm": 0.44190675020217896, "learning_rate": 1.000795302638036e-05, "loss": 0.048, "step": 29525 }, { "epoch": 1.4990608660338087, "grad_norm": 0.5355786681175232, "learning_rate": 1.0006260893107941e-05, "loss": 0.0464, "step": 29530 }, { "epoch": 1.4993146860246713, "grad_norm": 0.27352702617645264, "learning_rate": 1.0004568759835525e-05, "loss": 0.0422, "step": 29535 }, { "epoch": 1.4995685060155337, "grad_norm": 0.3291056156158447, "learning_rate": 1.000287662656311e-05, "loss": 0.0498, "step": 29540 }, { "epoch": 1.4998223260063963, "grad_norm": 0.35799673199653625, "learning_rate": 1.0001184493290692e-05, "loss": 0.0427, "step": 29545 }, { "epoch": 1.5000761459972587, "grad_norm": 0.33578240871429443, "learning_rate": 9.999492360018276e-06, "loss": 0.046, "step": 29550 }, { "epoch": 1.5003299659881213, "grad_norm": 0.27118977904319763, "learning_rate": 9.997800226745859e-06, "loss": 0.0354, "step": 29555 }, { "epoch": 1.5005837859789837, "grad_norm": 0.5098280906677246, "learning_rate": 9.996108093473443e-06, "loss": 0.051, "step": 29560 }, { "epoch": 1.500837605969846, "grad_norm": 0.34415581822395325, "learning_rate": 9.994415960201026e-06, "loss": 0.0428, "step": 29565 }, { "epoch": 1.5010914259607087, "grad_norm": 0.2864645719528198, "learning_rate": 9.99272382692861e-06, "loss": 0.0371, "step": 29570 }, { "epoch": 1.5013452459515713, "grad_norm": 0.316386878490448, "learning_rate": 9.991031693656193e-06, "loss": 0.0451, "step": 29575 }, { "epoch": 1.5015990659424336, "grad_norm": 0.3148564100265503, "learning_rate": 9.989339560383777e-06, "loss": 0.0447, "step": 29580 }, { "epoch": 1.501852885933296, "grad_norm": 0.28054192662239075, "learning_rate": 9.98764742711136e-06, "loss": 0.0425, "step": 29585 }, { "epoch": 1.5021067059241586, "grad_norm": 0.34092408418655396, "learning_rate": 9.985955293838944e-06, "loss": 0.0434, "step": 29590 }, { "epoch": 1.5023605259150212, "grad_norm": 0.3001309633255005, "learning_rate": 9.984263160566527e-06, "loss": 0.0404, "step": 29595 }, { "epoch": 1.5026143459058834, "grad_norm": 0.36474132537841797, "learning_rate": 9.98257102729411e-06, "loss": 0.0449, "step": 29600 }, { "epoch": 1.502868165896746, "grad_norm": 0.35133421421051025, "learning_rate": 9.980878894021695e-06, "loss": 0.0585, "step": 29605 }, { "epoch": 1.5031219858876086, "grad_norm": 0.4537728726863861, "learning_rate": 9.979186760749278e-06, "loss": 0.0371, "step": 29610 }, { "epoch": 1.503375805878471, "grad_norm": 0.2741606831550598, "learning_rate": 9.977494627476862e-06, "loss": 0.0378, "step": 29615 }, { "epoch": 1.5036296258693334, "grad_norm": 0.30056142807006836, "learning_rate": 9.975802494204443e-06, "loss": 0.0387, "step": 29620 }, { "epoch": 1.503883445860196, "grad_norm": 0.34249797463417053, "learning_rate": 9.974110360932027e-06, "loss": 0.042, "step": 29625 }, { "epoch": 1.5041372658510586, "grad_norm": 0.2840827703475952, "learning_rate": 9.972418227659612e-06, "loss": 0.0473, "step": 29630 }, { "epoch": 1.504391085841921, "grad_norm": 0.40764328837394714, "learning_rate": 9.970726094387196e-06, "loss": 0.044, "step": 29635 }, { "epoch": 1.5046449058327833, "grad_norm": 0.4199784994125366, "learning_rate": 9.969033961114778e-06, "loss": 0.0411, "step": 29640 }, { "epoch": 1.504898725823646, "grad_norm": 0.33828359842300415, "learning_rate": 9.967341827842361e-06, "loss": 0.0427, "step": 29645 }, { "epoch": 1.5051525458145083, "grad_norm": 0.3699682056903839, "learning_rate": 9.965649694569945e-06, "loss": 0.0549, "step": 29650 }, { "epoch": 1.5054063658053707, "grad_norm": 0.41304442286491394, "learning_rate": 9.963957561297528e-06, "loss": 0.0498, "step": 29655 }, { "epoch": 1.5056601857962333, "grad_norm": 0.46015387773513794, "learning_rate": 9.962265428025112e-06, "loss": 0.0422, "step": 29660 }, { "epoch": 1.5059140057870959, "grad_norm": 0.28946584463119507, "learning_rate": 9.960573294752695e-06, "loss": 0.0441, "step": 29665 }, { "epoch": 1.5061678257779583, "grad_norm": 0.592179536819458, "learning_rate": 9.958881161480279e-06, "loss": 0.0466, "step": 29670 }, { "epoch": 1.5064216457688206, "grad_norm": 0.31929734349250793, "learning_rate": 9.957189028207862e-06, "loss": 0.04, "step": 29675 }, { "epoch": 1.5066754657596833, "grad_norm": 0.5968852043151855, "learning_rate": 9.955496894935446e-06, "loss": 0.0539, "step": 29680 }, { "epoch": 1.5069292857505459, "grad_norm": 0.43727225065231323, "learning_rate": 9.95380476166303e-06, "loss": 0.0558, "step": 29685 }, { "epoch": 1.5071831057414082, "grad_norm": 0.2910405397415161, "learning_rate": 9.952112628390613e-06, "loss": 0.042, "step": 29690 }, { "epoch": 1.5074369257322706, "grad_norm": 0.5035144686698914, "learning_rate": 9.950420495118197e-06, "loss": 0.0495, "step": 29695 }, { "epoch": 1.5076907457231332, "grad_norm": 0.8541820645332336, "learning_rate": 9.94872836184578e-06, "loss": 0.0416, "step": 29700 }, { "epoch": 1.5079445657139956, "grad_norm": 0.49969613552093506, "learning_rate": 9.947036228573364e-06, "loss": 0.0468, "step": 29705 }, { "epoch": 1.508198385704858, "grad_norm": 0.22982028126716614, "learning_rate": 9.945344095300947e-06, "loss": 0.0418, "step": 29710 }, { "epoch": 1.5084522056957206, "grad_norm": 0.3136741816997528, "learning_rate": 9.943651962028529e-06, "loss": 0.0452, "step": 29715 }, { "epoch": 1.5087060256865832, "grad_norm": 0.2914392352104187, "learning_rate": 9.941959828756114e-06, "loss": 0.0469, "step": 29720 }, { "epoch": 1.5089598456774456, "grad_norm": 0.390132337808609, "learning_rate": 9.940267695483698e-06, "loss": 0.0549, "step": 29725 }, { "epoch": 1.509213665668308, "grad_norm": 0.4769875109195709, "learning_rate": 9.93857556221128e-06, "loss": 0.0404, "step": 29730 }, { "epoch": 1.5094674856591705, "grad_norm": 0.4255264401435852, "learning_rate": 9.936883428938863e-06, "loss": 0.0423, "step": 29735 }, { "epoch": 1.5097213056500332, "grad_norm": 0.621986448764801, "learning_rate": 9.935191295666447e-06, "loss": 0.043, "step": 29740 }, { "epoch": 1.5099751256408953, "grad_norm": 0.47021618485450745, "learning_rate": 9.933499162394032e-06, "loss": 0.0372, "step": 29745 }, { "epoch": 1.510228945631758, "grad_norm": 0.5070352554321289, "learning_rate": 9.931807029121614e-06, "loss": 0.0458, "step": 29750 }, { "epoch": 1.5104827656226205, "grad_norm": 0.357382595539093, "learning_rate": 9.930114895849197e-06, "loss": 0.0444, "step": 29755 }, { "epoch": 1.510736585613483, "grad_norm": 0.303201287984848, "learning_rate": 9.928422762576781e-06, "loss": 0.0433, "step": 29760 }, { "epoch": 1.5109904056043453, "grad_norm": 0.4169237017631531, "learning_rate": 9.926730629304365e-06, "loss": 0.0507, "step": 29765 }, { "epoch": 1.5112442255952079, "grad_norm": 0.4273361265659332, "learning_rate": 9.925038496031948e-06, "loss": 0.047, "step": 29770 }, { "epoch": 1.5114980455860705, "grad_norm": 0.3794945180416107, "learning_rate": 9.923346362759532e-06, "loss": 0.0432, "step": 29775 }, { "epoch": 1.5117518655769329, "grad_norm": 0.29941776394844055, "learning_rate": 9.921654229487115e-06, "loss": 0.0384, "step": 29780 }, { "epoch": 1.5120056855677952, "grad_norm": 0.4585740566253662, "learning_rate": 9.919962096214699e-06, "loss": 0.0493, "step": 29785 }, { "epoch": 1.5122595055586578, "grad_norm": 0.30415454506874084, "learning_rate": 9.918269962942282e-06, "loss": 0.0423, "step": 29790 }, { "epoch": 1.5125133255495202, "grad_norm": 0.2759764790534973, "learning_rate": 9.916577829669866e-06, "loss": 0.0414, "step": 29795 }, { "epoch": 1.5127671455403826, "grad_norm": 0.3170830011367798, "learning_rate": 9.91488569639745e-06, "loss": 0.043, "step": 29800 }, { "epoch": 1.5130209655312452, "grad_norm": 0.3159930408000946, "learning_rate": 9.913193563125031e-06, "loss": 0.0419, "step": 29805 }, { "epoch": 1.5132747855221078, "grad_norm": 0.35223549604415894, "learning_rate": 9.911501429852616e-06, "loss": 0.044, "step": 29810 }, { "epoch": 1.5135286055129702, "grad_norm": 0.2940140962600708, "learning_rate": 9.9098092965802e-06, "loss": 0.0506, "step": 29815 }, { "epoch": 1.5137824255038326, "grad_norm": 0.44202086329460144, "learning_rate": 9.908117163307784e-06, "loss": 0.0503, "step": 29820 }, { "epoch": 1.5140362454946952, "grad_norm": 0.37494203448295593, "learning_rate": 9.906425030035365e-06, "loss": 0.0557, "step": 29825 }, { "epoch": 1.5142900654855578, "grad_norm": 0.34451350569725037, "learning_rate": 9.904732896762949e-06, "loss": 0.0382, "step": 29830 }, { "epoch": 1.5145438854764202, "grad_norm": 0.35963883996009827, "learning_rate": 9.903040763490534e-06, "loss": 0.0482, "step": 29835 }, { "epoch": 1.5147977054672825, "grad_norm": 0.3412439525127411, "learning_rate": 9.901348630218118e-06, "loss": 0.042, "step": 29840 }, { "epoch": 1.5150515254581451, "grad_norm": 0.32426542043685913, "learning_rate": 9.8996564969457e-06, "loss": 0.045, "step": 29845 }, { "epoch": 1.5153053454490075, "grad_norm": 0.2650112807750702, "learning_rate": 9.897964363673283e-06, "loss": 0.0443, "step": 29850 }, { "epoch": 1.51555916543987, "grad_norm": 0.38973918557167053, "learning_rate": 9.896272230400867e-06, "loss": 0.0402, "step": 29855 }, { "epoch": 1.5158129854307325, "grad_norm": 0.49945682287216187, "learning_rate": 9.894580097128452e-06, "loss": 0.045, "step": 29860 }, { "epoch": 1.516066805421595, "grad_norm": 0.283811092376709, "learning_rate": 9.892887963856034e-06, "loss": 0.034, "step": 29865 }, { "epoch": 1.5163206254124575, "grad_norm": 0.381591260433197, "learning_rate": 9.891195830583617e-06, "loss": 0.0445, "step": 29870 }, { "epoch": 1.5165744454033199, "grad_norm": 0.43761926889419556, "learning_rate": 9.889503697311201e-06, "loss": 0.051, "step": 29875 }, { "epoch": 1.5168282653941825, "grad_norm": 0.3689984381198883, "learning_rate": 9.887811564038784e-06, "loss": 0.0456, "step": 29880 }, { "epoch": 1.517082085385045, "grad_norm": 0.41369885206222534, "learning_rate": 9.886119430766368e-06, "loss": 0.0478, "step": 29885 }, { "epoch": 1.5173359053759075, "grad_norm": 0.3490171730518341, "learning_rate": 9.884427297493951e-06, "loss": 0.0467, "step": 29890 }, { "epoch": 1.5175897253667698, "grad_norm": 0.41968002915382385, "learning_rate": 9.882735164221535e-06, "loss": 0.052, "step": 29895 }, { "epoch": 1.5178435453576324, "grad_norm": 0.3344227075576782, "learning_rate": 9.881043030949119e-06, "loss": 0.0497, "step": 29900 }, { "epoch": 1.5180973653484948, "grad_norm": 0.2427772432565689, "learning_rate": 9.879350897676702e-06, "loss": 0.0369, "step": 29905 }, { "epoch": 1.5183511853393572, "grad_norm": 0.2409926950931549, "learning_rate": 9.877658764404286e-06, "loss": 0.0416, "step": 29910 }, { "epoch": 1.5186050053302198, "grad_norm": 0.31125301122665405, "learning_rate": 9.87596663113187e-06, "loss": 0.0419, "step": 29915 }, { "epoch": 1.5188588253210824, "grad_norm": 0.3816099464893341, "learning_rate": 9.874274497859451e-06, "loss": 0.0452, "step": 29920 }, { "epoch": 1.5191126453119448, "grad_norm": 0.2479274719953537, "learning_rate": 9.872582364587036e-06, "loss": 0.0487, "step": 29925 }, { "epoch": 1.5193664653028072, "grad_norm": 0.4390551447868347, "learning_rate": 9.87089023131462e-06, "loss": 0.0506, "step": 29930 }, { "epoch": 1.5196202852936698, "grad_norm": 0.36978304386138916, "learning_rate": 9.869198098042203e-06, "loss": 0.0438, "step": 29935 }, { "epoch": 1.5198741052845324, "grad_norm": 0.35317033529281616, "learning_rate": 9.867505964769785e-06, "loss": 0.0395, "step": 29940 }, { "epoch": 1.5201279252753945, "grad_norm": 0.3761212229728699, "learning_rate": 9.865813831497369e-06, "loss": 0.0505, "step": 29945 }, { "epoch": 1.5203817452662571, "grad_norm": 0.3977470099925995, "learning_rate": 9.864121698224954e-06, "loss": 0.051, "step": 29950 }, { "epoch": 1.5206355652571197, "grad_norm": 0.43119311332702637, "learning_rate": 9.862429564952538e-06, "loss": 0.0485, "step": 29955 }, { "epoch": 1.5208893852479821, "grad_norm": 0.3914470076560974, "learning_rate": 9.86073743168012e-06, "loss": 0.0425, "step": 29960 }, { "epoch": 1.5211432052388445, "grad_norm": 0.30373236536979675, "learning_rate": 9.859045298407703e-06, "loss": 0.0473, "step": 29965 }, { "epoch": 1.521397025229707, "grad_norm": 1.0446465015411377, "learning_rate": 9.857353165135287e-06, "loss": 0.0439, "step": 29970 }, { "epoch": 1.5216508452205697, "grad_norm": 0.5108188986778259, "learning_rate": 9.85566103186287e-06, "loss": 0.0432, "step": 29975 }, { "epoch": 1.521904665211432, "grad_norm": 0.257059782743454, "learning_rate": 9.853968898590454e-06, "loss": 0.0403, "step": 29980 }, { "epoch": 1.5221584852022945, "grad_norm": 0.3410789370536804, "learning_rate": 9.852276765318037e-06, "loss": 0.0456, "step": 29985 }, { "epoch": 1.522412305193157, "grad_norm": 0.28535956144332886, "learning_rate": 9.85058463204562e-06, "loss": 0.041, "step": 29990 }, { "epoch": 1.5226661251840194, "grad_norm": 0.3559688627719879, "learning_rate": 9.848892498773204e-06, "loss": 0.0401, "step": 29995 }, { "epoch": 1.5229199451748818, "grad_norm": 0.3361237943172455, "learning_rate": 9.847200365500788e-06, "loss": 0.0492, "step": 30000 }, { "epoch": 1.5231737651657444, "grad_norm": 0.535846471786499, "learning_rate": 9.845508232228371e-06, "loss": 0.0549, "step": 30005 }, { "epoch": 1.523427585156607, "grad_norm": 0.4088345170021057, "learning_rate": 9.843816098955955e-06, "loss": 0.0447, "step": 30010 }, { "epoch": 1.5236814051474694, "grad_norm": 0.36553964018821716, "learning_rate": 9.842123965683538e-06, "loss": 0.0458, "step": 30015 }, { "epoch": 1.5239352251383318, "grad_norm": 0.4269677400588989, "learning_rate": 9.840431832411122e-06, "loss": 0.043, "step": 30020 }, { "epoch": 1.5241890451291944, "grad_norm": 0.2518591284751892, "learning_rate": 9.838739699138705e-06, "loss": 0.0414, "step": 30025 }, { "epoch": 1.524442865120057, "grad_norm": 0.2876974940299988, "learning_rate": 9.837047565866289e-06, "loss": 0.039, "step": 30030 }, { "epoch": 1.5246966851109194, "grad_norm": 0.40843939781188965, "learning_rate": 9.835355432593871e-06, "loss": 0.0465, "step": 30035 }, { "epoch": 1.5249505051017818, "grad_norm": 1.2598471641540527, "learning_rate": 9.833663299321456e-06, "loss": 0.04, "step": 30040 }, { "epoch": 1.5252043250926444, "grad_norm": 0.39028775691986084, "learning_rate": 9.83197116604904e-06, "loss": 0.0524, "step": 30045 }, { "epoch": 1.5254581450835067, "grad_norm": 0.5589412450790405, "learning_rate": 9.830279032776622e-06, "loss": 0.0535, "step": 30050 }, { "epoch": 1.5257119650743691, "grad_norm": 0.3849193751811981, "learning_rate": 9.828586899504205e-06, "loss": 0.0482, "step": 30055 }, { "epoch": 1.5259657850652317, "grad_norm": 0.3056574761867523, "learning_rate": 9.826894766231789e-06, "loss": 0.0426, "step": 30060 }, { "epoch": 1.5262196050560943, "grad_norm": 0.43994876742362976, "learning_rate": 9.825202632959374e-06, "loss": 0.0424, "step": 30065 }, { "epoch": 1.5264734250469567, "grad_norm": 0.33953234553337097, "learning_rate": 9.823510499686956e-06, "loss": 0.0438, "step": 30070 }, { "epoch": 1.526727245037819, "grad_norm": 0.4996977746486664, "learning_rate": 9.82181836641454e-06, "loss": 0.0487, "step": 30075 }, { "epoch": 1.5269810650286817, "grad_norm": 0.4753670394420624, "learning_rate": 9.820126233142123e-06, "loss": 0.0401, "step": 30080 }, { "epoch": 1.5272348850195443, "grad_norm": 0.3781532645225525, "learning_rate": 9.818434099869706e-06, "loss": 0.0457, "step": 30085 }, { "epoch": 1.5274887050104065, "grad_norm": 0.4716370105743408, "learning_rate": 9.81674196659729e-06, "loss": 0.0457, "step": 30090 }, { "epoch": 1.527742525001269, "grad_norm": 0.2860237658023834, "learning_rate": 9.815049833324873e-06, "loss": 0.0379, "step": 30095 }, { "epoch": 1.5279963449921317, "grad_norm": 0.5502962470054626, "learning_rate": 9.813357700052457e-06, "loss": 0.0441, "step": 30100 }, { "epoch": 1.528250164982994, "grad_norm": 0.32045990228652954, "learning_rate": 9.81166556678004e-06, "loss": 0.0384, "step": 30105 }, { "epoch": 1.5285039849738564, "grad_norm": 0.2748587727546692, "learning_rate": 9.809973433507624e-06, "loss": 0.0428, "step": 30110 }, { "epoch": 1.528757804964719, "grad_norm": 0.3589244484901428, "learning_rate": 9.808281300235208e-06, "loss": 0.0423, "step": 30115 }, { "epoch": 1.5290116249555816, "grad_norm": 0.30734512209892273, "learning_rate": 9.806589166962791e-06, "loss": 0.0368, "step": 30120 }, { "epoch": 1.529265444946444, "grad_norm": 0.5760895013809204, "learning_rate": 9.804897033690373e-06, "loss": 0.0494, "step": 30125 }, { "epoch": 1.5295192649373064, "grad_norm": 0.3356100916862488, "learning_rate": 9.803204900417958e-06, "loss": 0.0465, "step": 30130 }, { "epoch": 1.529773084928169, "grad_norm": 0.7250293493270874, "learning_rate": 9.801512767145542e-06, "loss": 0.0421, "step": 30135 }, { "epoch": 1.5300269049190314, "grad_norm": 0.22375506162643433, "learning_rate": 9.799820633873125e-06, "loss": 0.0389, "step": 30140 }, { "epoch": 1.5302807249098938, "grad_norm": 0.5991578102111816, "learning_rate": 9.798128500600707e-06, "loss": 0.043, "step": 30145 }, { "epoch": 1.5305345449007564, "grad_norm": 0.3490102291107178, "learning_rate": 9.79643636732829e-06, "loss": 0.0413, "step": 30150 }, { "epoch": 1.530788364891619, "grad_norm": 0.3767857551574707, "learning_rate": 9.794744234055876e-06, "loss": 0.0541, "step": 30155 }, { "epoch": 1.5310421848824813, "grad_norm": 0.6094956994056702, "learning_rate": 9.79305210078346e-06, "loss": 0.041, "step": 30160 }, { "epoch": 1.5312960048733437, "grad_norm": 0.6001025438308716, "learning_rate": 9.791359967511041e-06, "loss": 0.0453, "step": 30165 }, { "epoch": 1.5315498248642063, "grad_norm": 0.36103636026382446, "learning_rate": 9.789667834238625e-06, "loss": 0.0466, "step": 30170 }, { "epoch": 1.531803644855069, "grad_norm": 0.48035770654678345, "learning_rate": 9.787975700966208e-06, "loss": 0.045, "step": 30175 }, { "epoch": 1.5320574648459313, "grad_norm": 0.43839773535728455, "learning_rate": 9.786283567693794e-06, "loss": 0.0489, "step": 30180 }, { "epoch": 1.5323112848367937, "grad_norm": 0.7059385776519775, "learning_rate": 9.784591434421376e-06, "loss": 0.0422, "step": 30185 }, { "epoch": 1.5325651048276563, "grad_norm": 0.3195935785770416, "learning_rate": 9.782899301148959e-06, "loss": 0.0413, "step": 30190 }, { "epoch": 1.5328189248185187, "grad_norm": 0.25066328048706055, "learning_rate": 9.781207167876543e-06, "loss": 0.0379, "step": 30195 }, { "epoch": 1.533072744809381, "grad_norm": 0.3572368025779724, "learning_rate": 9.779515034604126e-06, "loss": 0.0465, "step": 30200 }, { "epoch": 1.5333265648002437, "grad_norm": 0.21863994002342224, "learning_rate": 9.77782290133171e-06, "loss": 0.0421, "step": 30205 }, { "epoch": 1.5335803847911063, "grad_norm": 0.427107036113739, "learning_rate": 9.776130768059293e-06, "loss": 0.0446, "step": 30210 }, { "epoch": 1.5338342047819686, "grad_norm": 0.2890668213367462, "learning_rate": 9.774438634786877e-06, "loss": 0.0491, "step": 30215 }, { "epoch": 1.534088024772831, "grad_norm": 0.34447863698005676, "learning_rate": 9.77274650151446e-06, "loss": 0.0369, "step": 30220 }, { "epoch": 1.5343418447636936, "grad_norm": 0.3056979775428772, "learning_rate": 9.771054368242044e-06, "loss": 0.0544, "step": 30225 }, { "epoch": 1.5345956647545562, "grad_norm": 0.27630293369293213, "learning_rate": 9.769362234969627e-06, "loss": 0.0385, "step": 30230 }, { "epoch": 1.5348494847454186, "grad_norm": 0.42689627408981323, "learning_rate": 9.767670101697211e-06, "loss": 0.0424, "step": 30235 }, { "epoch": 1.535103304736281, "grad_norm": 0.40753817558288574, "learning_rate": 9.765977968424793e-06, "loss": 0.0568, "step": 30240 }, { "epoch": 1.5353571247271436, "grad_norm": 0.2552444040775299, "learning_rate": 9.764285835152378e-06, "loss": 0.0437, "step": 30245 }, { "epoch": 1.535610944718006, "grad_norm": 0.27542778849601746, "learning_rate": 9.762593701879962e-06, "loss": 0.0397, "step": 30250 }, { "epoch": 1.5358647647088683, "grad_norm": 0.257028728723526, "learning_rate": 9.760901568607545e-06, "loss": 0.043, "step": 30255 }, { "epoch": 1.536118584699731, "grad_norm": 0.7336534857749939, "learning_rate": 9.759209435335127e-06, "loss": 0.0448, "step": 30260 }, { "epoch": 1.5363724046905936, "grad_norm": 0.7926486134529114, "learning_rate": 9.75751730206271e-06, "loss": 0.0473, "step": 30265 }, { "epoch": 1.536626224681456, "grad_norm": 0.37469780445098877, "learning_rate": 9.755825168790296e-06, "loss": 0.0434, "step": 30270 }, { "epoch": 1.5368800446723183, "grad_norm": 0.2389148771762848, "learning_rate": 9.75413303551788e-06, "loss": 0.0328, "step": 30275 }, { "epoch": 1.537133864663181, "grad_norm": 0.18880969285964966, "learning_rate": 9.752440902245461e-06, "loss": 0.0333, "step": 30280 }, { "epoch": 1.5373876846540435, "grad_norm": 0.29365938901901245, "learning_rate": 9.750748768973045e-06, "loss": 0.0365, "step": 30285 }, { "epoch": 1.5376415046449057, "grad_norm": 0.4020933508872986, "learning_rate": 9.749056635700628e-06, "loss": 0.0451, "step": 30290 }, { "epoch": 1.5378953246357683, "grad_norm": 0.3207293748855591, "learning_rate": 9.747364502428212e-06, "loss": 0.0465, "step": 30295 }, { "epoch": 1.5381491446266309, "grad_norm": 0.5186519026756287, "learning_rate": 9.745672369155795e-06, "loss": 0.0478, "step": 30300 }, { "epoch": 1.5384029646174933, "grad_norm": 0.657397985458374, "learning_rate": 9.743980235883379e-06, "loss": 0.0366, "step": 30305 }, { "epoch": 1.5386567846083556, "grad_norm": 0.3749079704284668, "learning_rate": 9.742288102610962e-06, "loss": 0.0419, "step": 30310 }, { "epoch": 1.5389106045992182, "grad_norm": 0.47508054971694946, "learning_rate": 9.740595969338546e-06, "loss": 0.0492, "step": 30315 }, { "epoch": 1.5391644245900808, "grad_norm": 0.3623720109462738, "learning_rate": 9.73890383606613e-06, "loss": 0.0413, "step": 30320 }, { "epoch": 1.5394182445809432, "grad_norm": 0.32053571939468384, "learning_rate": 9.737211702793713e-06, "loss": 0.0432, "step": 30325 }, { "epoch": 1.5396720645718056, "grad_norm": 0.3546943962574005, "learning_rate": 9.735519569521297e-06, "loss": 0.0487, "step": 30330 }, { "epoch": 1.5399258845626682, "grad_norm": 0.3554565906524658, "learning_rate": 9.73382743624888e-06, "loss": 0.0424, "step": 30335 }, { "epoch": 1.5401797045535306, "grad_norm": 0.44860467314720154, "learning_rate": 9.732135302976464e-06, "loss": 0.0416, "step": 30340 }, { "epoch": 1.540433524544393, "grad_norm": 0.28419896960258484, "learning_rate": 9.730443169704047e-06, "loss": 0.0422, "step": 30345 }, { "epoch": 1.5406873445352556, "grad_norm": 0.2352607697248459, "learning_rate": 9.72875103643163e-06, "loss": 0.0397, "step": 30350 }, { "epoch": 1.5409411645261182, "grad_norm": 0.348967969417572, "learning_rate": 9.727058903159213e-06, "loss": 0.0443, "step": 30355 }, { "epoch": 1.5411949845169806, "grad_norm": 0.5574663281440735, "learning_rate": 9.725366769886798e-06, "loss": 0.0476, "step": 30360 }, { "epoch": 1.541448804507843, "grad_norm": 0.3044932186603546, "learning_rate": 9.723674636614381e-06, "loss": 0.0497, "step": 30365 }, { "epoch": 1.5417026244987055, "grad_norm": 0.3024437427520752, "learning_rate": 9.721982503341963e-06, "loss": 0.0392, "step": 30370 }, { "epoch": 1.5419564444895681, "grad_norm": 0.9230430126190186, "learning_rate": 9.720290370069547e-06, "loss": 0.0457, "step": 30375 }, { "epoch": 1.5422102644804305, "grad_norm": 0.29543060064315796, "learning_rate": 9.71859823679713e-06, "loss": 0.0407, "step": 30380 }, { "epoch": 1.542464084471293, "grad_norm": 0.3375641107559204, "learning_rate": 9.716906103524714e-06, "loss": 0.04, "step": 30385 }, { "epoch": 1.5427179044621555, "grad_norm": 0.31575915217399597, "learning_rate": 9.715213970252297e-06, "loss": 0.049, "step": 30390 }, { "epoch": 1.542971724453018, "grad_norm": 0.2939968705177307, "learning_rate": 9.713521836979881e-06, "loss": 0.0469, "step": 30395 }, { "epoch": 1.5432255444438803, "grad_norm": 0.47484177350997925, "learning_rate": 9.711829703707465e-06, "loss": 0.0539, "step": 30400 }, { "epoch": 1.5434793644347429, "grad_norm": 0.29723361134529114, "learning_rate": 9.710137570435048e-06, "loss": 0.0388, "step": 30405 }, { "epoch": 1.5437331844256055, "grad_norm": 0.6943442225456238, "learning_rate": 9.708445437162632e-06, "loss": 0.0447, "step": 30410 }, { "epoch": 1.5439870044164679, "grad_norm": 0.3720008134841919, "learning_rate": 9.706753303890215e-06, "loss": 0.0427, "step": 30415 }, { "epoch": 1.5442408244073302, "grad_norm": 0.2879853844642639, "learning_rate": 9.705061170617799e-06, "loss": 0.047, "step": 30420 }, { "epoch": 1.5444946443981928, "grad_norm": 0.5231357216835022, "learning_rate": 9.703369037345382e-06, "loss": 0.0394, "step": 30425 }, { "epoch": 1.5447484643890554, "grad_norm": 0.33853334188461304, "learning_rate": 9.701676904072966e-06, "loss": 0.0488, "step": 30430 }, { "epoch": 1.5450022843799176, "grad_norm": 0.5183348655700684, "learning_rate": 9.69998477080055e-06, "loss": 0.0382, "step": 30435 }, { "epoch": 1.5452561043707802, "grad_norm": 0.34625086188316345, "learning_rate": 9.698292637528133e-06, "loss": 0.038, "step": 30440 }, { "epoch": 1.5455099243616428, "grad_norm": 0.5586714744567871, "learning_rate": 9.696600504255715e-06, "loss": 0.0367, "step": 30445 }, { "epoch": 1.5457637443525052, "grad_norm": 0.7301913499832153, "learning_rate": 9.6949083709833e-06, "loss": 0.0479, "step": 30450 }, { "epoch": 1.5460175643433676, "grad_norm": 0.5144783854484558, "learning_rate": 9.693216237710884e-06, "loss": 0.0416, "step": 30455 }, { "epoch": 1.5462713843342302, "grad_norm": 0.33749499917030334, "learning_rate": 9.691524104438467e-06, "loss": 0.0465, "step": 30460 }, { "epoch": 1.5465252043250928, "grad_norm": 0.28034669160842896, "learning_rate": 9.689831971166049e-06, "loss": 0.0442, "step": 30465 }, { "epoch": 1.5467790243159552, "grad_norm": 0.47718653082847595, "learning_rate": 9.688139837893632e-06, "loss": 0.0417, "step": 30470 }, { "epoch": 1.5470328443068175, "grad_norm": 0.543878436088562, "learning_rate": 9.686447704621216e-06, "loss": 0.0499, "step": 30475 }, { "epoch": 1.5472866642976801, "grad_norm": 0.30951932072639465, "learning_rate": 9.684755571348801e-06, "loss": 0.0548, "step": 30480 }, { "epoch": 1.5475404842885425, "grad_norm": 0.39540088176727295, "learning_rate": 9.683063438076383e-06, "loss": 0.0394, "step": 30485 }, { "epoch": 1.547794304279405, "grad_norm": 0.37335509061813354, "learning_rate": 9.681371304803967e-06, "loss": 0.0466, "step": 30490 }, { "epoch": 1.5480481242702675, "grad_norm": 0.39366480708122253, "learning_rate": 9.67967917153155e-06, "loss": 0.0391, "step": 30495 }, { "epoch": 1.54830194426113, "grad_norm": 0.2801901698112488, "learning_rate": 9.677987038259134e-06, "loss": 0.0421, "step": 30500 }, { "epoch": 1.5485557642519925, "grad_norm": 0.37131232023239136, "learning_rate": 9.676294904986717e-06, "loss": 0.0496, "step": 30505 }, { "epoch": 1.5488095842428549, "grad_norm": 0.3372965455055237, "learning_rate": 9.674602771714301e-06, "loss": 0.0588, "step": 30510 }, { "epoch": 1.5490634042337175, "grad_norm": 0.8736234307289124, "learning_rate": 9.672910638441884e-06, "loss": 0.0464, "step": 30515 }, { "epoch": 1.54931722422458, "grad_norm": 0.24658441543579102, "learning_rate": 9.671218505169468e-06, "loss": 0.0338, "step": 30520 }, { "epoch": 1.5495710442154425, "grad_norm": 0.2501583397388458, "learning_rate": 9.669526371897051e-06, "loss": 0.0386, "step": 30525 }, { "epoch": 1.5498248642063048, "grad_norm": 0.3103393018245697, "learning_rate": 9.667834238624635e-06, "loss": 0.0476, "step": 30530 }, { "epoch": 1.5500786841971674, "grad_norm": 0.36764755845069885, "learning_rate": 9.666142105352219e-06, "loss": 0.0414, "step": 30535 }, { "epoch": 1.5503325041880298, "grad_norm": 0.46840208768844604, "learning_rate": 9.664449972079802e-06, "loss": 0.0451, "step": 30540 }, { "epoch": 1.5505863241788922, "grad_norm": 0.5336713194847107, "learning_rate": 9.662757838807386e-06, "loss": 0.0537, "step": 30545 }, { "epoch": 1.5508401441697548, "grad_norm": 0.4022626578807831, "learning_rate": 9.66106570553497e-06, "loss": 0.0472, "step": 30550 }, { "epoch": 1.5510939641606174, "grad_norm": 0.22053535282611847, "learning_rate": 9.659373572262553e-06, "loss": 0.052, "step": 30555 }, { "epoch": 1.5513477841514798, "grad_norm": 0.3512711226940155, "learning_rate": 9.657681438990135e-06, "loss": 0.044, "step": 30560 }, { "epoch": 1.5516016041423422, "grad_norm": 0.31947484612464905, "learning_rate": 9.655989305717718e-06, "loss": 0.0444, "step": 30565 }, { "epoch": 1.5518554241332048, "grad_norm": 0.6187324523925781, "learning_rate": 9.654297172445303e-06, "loss": 0.0499, "step": 30570 }, { "epoch": 1.5521092441240674, "grad_norm": 0.3712591230869293, "learning_rate": 9.652605039172887e-06, "loss": 0.0443, "step": 30575 }, { "epoch": 1.5523630641149297, "grad_norm": 0.6312601566314697, "learning_rate": 9.650912905900469e-06, "loss": 0.0468, "step": 30580 }, { "epoch": 1.5526168841057921, "grad_norm": 0.3091924786567688, "learning_rate": 9.649220772628052e-06, "loss": 0.0395, "step": 30585 }, { "epoch": 1.5528707040966547, "grad_norm": 0.23264305293560028, "learning_rate": 9.647528639355636e-06, "loss": 0.0401, "step": 30590 }, { "epoch": 1.553124524087517, "grad_norm": 0.4403635263442993, "learning_rate": 9.645836506083221e-06, "loss": 0.0394, "step": 30595 }, { "epoch": 1.5533783440783795, "grad_norm": 0.6066749095916748, "learning_rate": 9.644144372810803e-06, "loss": 0.0555, "step": 30600 }, { "epoch": 1.553632164069242, "grad_norm": 0.43328630924224854, "learning_rate": 9.642452239538387e-06, "loss": 0.0418, "step": 30605 }, { "epoch": 1.5538859840601047, "grad_norm": 0.3233341574668884, "learning_rate": 9.64076010626597e-06, "loss": 0.0411, "step": 30610 }, { "epoch": 1.554139804050967, "grad_norm": 0.47174957394599915, "learning_rate": 9.639067972993554e-06, "loss": 0.0457, "step": 30615 }, { "epoch": 1.5543936240418295, "grad_norm": 0.31749293208122253, "learning_rate": 9.637375839721137e-06, "loss": 0.0467, "step": 30620 }, { "epoch": 1.554647444032692, "grad_norm": 0.7397310137748718, "learning_rate": 9.63568370644872e-06, "loss": 0.0388, "step": 30625 }, { "epoch": 1.5549012640235544, "grad_norm": 0.24703024327754974, "learning_rate": 9.633991573176304e-06, "loss": 0.041, "step": 30630 }, { "epoch": 1.5551550840144168, "grad_norm": 0.3380805253982544, "learning_rate": 9.632299439903888e-06, "loss": 0.039, "step": 30635 }, { "epoch": 1.5554089040052794, "grad_norm": 0.30269336700439453, "learning_rate": 9.630607306631471e-06, "loss": 0.0409, "step": 30640 }, { "epoch": 1.555662723996142, "grad_norm": 0.20982228219509125, "learning_rate": 9.628915173359055e-06, "loss": 0.0426, "step": 30645 }, { "epoch": 1.5559165439870044, "grad_norm": 0.30065563321113586, "learning_rate": 9.627223040086638e-06, "loss": 0.0474, "step": 30650 }, { "epoch": 1.5561703639778668, "grad_norm": 0.5171536803245544, "learning_rate": 9.62553090681422e-06, "loss": 0.0457, "step": 30655 }, { "epoch": 1.5564241839687294, "grad_norm": 0.408906489610672, "learning_rate": 9.623838773541805e-06, "loss": 0.0403, "step": 30660 }, { "epoch": 1.556678003959592, "grad_norm": 0.2871289849281311, "learning_rate": 9.622146640269389e-06, "loss": 0.0333, "step": 30665 }, { "epoch": 1.5569318239504544, "grad_norm": 0.2980833053588867, "learning_rate": 9.620454506996973e-06, "loss": 0.0423, "step": 30670 }, { "epoch": 1.5571856439413168, "grad_norm": 0.38692790269851685, "learning_rate": 9.618762373724554e-06, "loss": 0.048, "step": 30675 }, { "epoch": 1.5574394639321794, "grad_norm": 0.9138740301132202, "learning_rate": 9.617070240452138e-06, "loss": 0.0453, "step": 30680 }, { "epoch": 1.5576932839230417, "grad_norm": 0.30773526430130005, "learning_rate": 9.615378107179723e-06, "loss": 0.0419, "step": 30685 }, { "epoch": 1.5579471039139041, "grad_norm": 0.4494325518608093, "learning_rate": 9.613685973907305e-06, "loss": 0.0355, "step": 30690 }, { "epoch": 1.5582009239047667, "grad_norm": 0.2837882339954376, "learning_rate": 9.611993840634889e-06, "loss": 0.0389, "step": 30695 }, { "epoch": 1.5584547438956293, "grad_norm": 0.3327257037162781, "learning_rate": 9.610301707362472e-06, "loss": 0.0414, "step": 30700 }, { "epoch": 1.5587085638864917, "grad_norm": 0.37678539752960205, "learning_rate": 9.608609574090056e-06, "loss": 0.0365, "step": 30705 }, { "epoch": 1.558962383877354, "grad_norm": 0.326312392950058, "learning_rate": 9.60691744081764e-06, "loss": 0.048, "step": 30710 }, { "epoch": 1.5592162038682167, "grad_norm": 0.4527011215686798, "learning_rate": 9.605225307545223e-06, "loss": 0.0405, "step": 30715 }, { "epoch": 1.5594700238590793, "grad_norm": 0.7663089036941528, "learning_rate": 9.603533174272806e-06, "loss": 0.0455, "step": 30720 }, { "epoch": 1.5597238438499417, "grad_norm": 0.3243597447872162, "learning_rate": 9.60184104100039e-06, "loss": 0.0414, "step": 30725 }, { "epoch": 1.559977663840804, "grad_norm": 0.40656790137290955, "learning_rate": 9.600148907727973e-06, "loss": 0.0553, "step": 30730 }, { "epoch": 1.5602314838316667, "grad_norm": 0.48976048827171326, "learning_rate": 9.598456774455557e-06, "loss": 0.0384, "step": 30735 }, { "epoch": 1.560485303822529, "grad_norm": 0.2989707589149475, "learning_rate": 9.59676464118314e-06, "loss": 0.0446, "step": 30740 }, { "epoch": 1.5607391238133914, "grad_norm": 0.3599499762058258, "learning_rate": 9.595072507910724e-06, "loss": 0.0385, "step": 30745 }, { "epoch": 1.560992943804254, "grad_norm": 0.36865293979644775, "learning_rate": 9.593380374638308e-06, "loss": 0.036, "step": 30750 }, { "epoch": 1.5612467637951166, "grad_norm": 0.41994988918304443, "learning_rate": 9.591688241365891e-06, "loss": 0.0463, "step": 30755 }, { "epoch": 1.561500583785979, "grad_norm": 0.7477067112922668, "learning_rate": 9.589996108093475e-06, "loss": 0.0407, "step": 30760 }, { "epoch": 1.5617544037768414, "grad_norm": 0.2858511805534363, "learning_rate": 9.588303974821058e-06, "loss": 0.0485, "step": 30765 }, { "epoch": 1.562008223767704, "grad_norm": 0.4207206964492798, "learning_rate": 9.58661184154864e-06, "loss": 0.0546, "step": 30770 }, { "epoch": 1.5622620437585666, "grad_norm": 0.7034492492675781, "learning_rate": 9.584919708276225e-06, "loss": 0.0439, "step": 30775 }, { "epoch": 1.5625158637494287, "grad_norm": 0.4226521849632263, "learning_rate": 9.583227575003809e-06, "loss": 0.0437, "step": 30780 }, { "epoch": 1.5627696837402913, "grad_norm": 0.37233424186706543, "learning_rate": 9.58153544173139e-06, "loss": 0.0396, "step": 30785 }, { "epoch": 1.563023503731154, "grad_norm": 0.35524532198905945, "learning_rate": 9.579843308458974e-06, "loss": 0.042, "step": 30790 }, { "epoch": 1.5632773237220163, "grad_norm": 0.24936921894550323, "learning_rate": 9.578151175186558e-06, "loss": 0.037, "step": 30795 }, { "epoch": 1.5635311437128787, "grad_norm": 0.35110658407211304, "learning_rate": 9.576459041914143e-06, "loss": 0.0443, "step": 30800 }, { "epoch": 1.5637849637037413, "grad_norm": 0.339292973279953, "learning_rate": 9.574766908641725e-06, "loss": 0.0433, "step": 30805 }, { "epoch": 1.564038783694604, "grad_norm": 0.39210009574890137, "learning_rate": 9.573074775369308e-06, "loss": 0.0385, "step": 30810 }, { "epoch": 1.5642926036854663, "grad_norm": 0.39238834381103516, "learning_rate": 9.571382642096892e-06, "loss": 0.0415, "step": 30815 }, { "epoch": 1.5645464236763287, "grad_norm": 0.4399608075618744, "learning_rate": 9.569690508824476e-06, "loss": 0.0466, "step": 30820 }, { "epoch": 1.5648002436671913, "grad_norm": 0.34421685338020325, "learning_rate": 9.567998375552059e-06, "loss": 0.0392, "step": 30825 }, { "epoch": 1.5650540636580537, "grad_norm": 0.256448894739151, "learning_rate": 9.566306242279643e-06, "loss": 0.041, "step": 30830 }, { "epoch": 1.565307883648916, "grad_norm": 0.2971365749835968, "learning_rate": 9.564614109007226e-06, "loss": 0.0469, "step": 30835 }, { "epoch": 1.5655617036397786, "grad_norm": 0.37906402349472046, "learning_rate": 9.56292197573481e-06, "loss": 0.0478, "step": 30840 }, { "epoch": 1.5658155236306412, "grad_norm": 0.35806217789649963, "learning_rate": 9.561229842462393e-06, "loss": 0.042, "step": 30845 }, { "epoch": 1.5660693436215036, "grad_norm": 0.5716193914413452, "learning_rate": 9.559537709189977e-06, "loss": 0.045, "step": 30850 }, { "epoch": 1.566323163612366, "grad_norm": 0.2948254644870758, "learning_rate": 9.55784557591756e-06, "loss": 0.0407, "step": 30855 }, { "epoch": 1.5665769836032286, "grad_norm": 0.4924793839454651, "learning_rate": 9.556153442645142e-06, "loss": 0.0405, "step": 30860 }, { "epoch": 1.5668308035940912, "grad_norm": 0.4333508610725403, "learning_rate": 9.554461309372727e-06, "loss": 0.0386, "step": 30865 }, { "epoch": 1.5670846235849536, "grad_norm": 0.35485759377479553, "learning_rate": 9.552769176100311e-06, "loss": 0.0356, "step": 30870 }, { "epoch": 1.567338443575816, "grad_norm": 0.2829851806163788, "learning_rate": 9.551077042827895e-06, "loss": 0.0469, "step": 30875 }, { "epoch": 1.5675922635666786, "grad_norm": 0.4814103841781616, "learning_rate": 9.549384909555476e-06, "loss": 0.05, "step": 30880 }, { "epoch": 1.567846083557541, "grad_norm": 0.3469924032688141, "learning_rate": 9.54769277628306e-06, "loss": 0.0434, "step": 30885 }, { "epoch": 1.5680999035484033, "grad_norm": 0.35943081974983215, "learning_rate": 9.546000643010645e-06, "loss": 0.0472, "step": 30890 }, { "epoch": 1.568353723539266, "grad_norm": 0.3924970030784607, "learning_rate": 9.544308509738229e-06, "loss": 0.0489, "step": 30895 }, { "epoch": 1.5686075435301285, "grad_norm": 0.3497081995010376, "learning_rate": 9.54261637646581e-06, "loss": 0.0456, "step": 30900 }, { "epoch": 1.568861363520991, "grad_norm": 0.3416511118412018, "learning_rate": 9.540924243193394e-06, "loss": 0.0357, "step": 30905 }, { "epoch": 1.5691151835118533, "grad_norm": 0.23118287324905396, "learning_rate": 9.539232109920978e-06, "loss": 0.0412, "step": 30910 }, { "epoch": 1.569369003502716, "grad_norm": 0.4158100485801697, "learning_rate": 9.537539976648563e-06, "loss": 0.0461, "step": 30915 }, { "epoch": 1.5696228234935785, "grad_norm": 0.38120755553245544, "learning_rate": 9.535847843376145e-06, "loss": 0.0384, "step": 30920 }, { "epoch": 1.5698766434844407, "grad_norm": 0.25962239503860474, "learning_rate": 9.534155710103728e-06, "loss": 0.0404, "step": 30925 }, { "epoch": 1.5701304634753033, "grad_norm": 0.31484225392341614, "learning_rate": 9.532463576831312e-06, "loss": 0.0326, "step": 30930 }, { "epoch": 1.5703842834661659, "grad_norm": 0.2863726317882538, "learning_rate": 9.530771443558895e-06, "loss": 0.0465, "step": 30935 }, { "epoch": 1.5706381034570283, "grad_norm": 0.3848249018192291, "learning_rate": 9.529079310286479e-06, "loss": 0.0449, "step": 30940 }, { "epoch": 1.5708919234478906, "grad_norm": 0.5639122724533081, "learning_rate": 9.527387177014062e-06, "loss": 0.0462, "step": 30945 }, { "epoch": 1.5711457434387532, "grad_norm": 0.33197957277297974, "learning_rate": 9.525695043741646e-06, "loss": 0.0353, "step": 30950 }, { "epoch": 1.5713995634296158, "grad_norm": 0.3288804292678833, "learning_rate": 9.52400291046923e-06, "loss": 0.0388, "step": 30955 }, { "epoch": 1.5716533834204782, "grad_norm": 0.3814419209957123, "learning_rate": 9.522310777196813e-06, "loss": 0.0413, "step": 30960 }, { "epoch": 1.5719072034113406, "grad_norm": 0.3379333019256592, "learning_rate": 9.520618643924397e-06, "loss": 0.0477, "step": 30965 }, { "epoch": 1.5721610234022032, "grad_norm": 0.438324898481369, "learning_rate": 9.51892651065198e-06, "loss": 0.0462, "step": 30970 }, { "epoch": 1.5724148433930656, "grad_norm": 0.3242974281311035, "learning_rate": 9.517234377379562e-06, "loss": 0.0372, "step": 30975 }, { "epoch": 1.572668663383928, "grad_norm": 0.9535584449768066, "learning_rate": 9.515542244107147e-06, "loss": 0.0476, "step": 30980 }, { "epoch": 1.5729224833747906, "grad_norm": 0.34823837876319885, "learning_rate": 9.51385011083473e-06, "loss": 0.0507, "step": 30985 }, { "epoch": 1.5731763033656532, "grad_norm": 0.21808657050132751, "learning_rate": 9.512157977562314e-06, "loss": 0.0376, "step": 30990 }, { "epoch": 1.5734301233565156, "grad_norm": 0.3237823247909546, "learning_rate": 9.510465844289896e-06, "loss": 0.0435, "step": 30995 }, { "epoch": 1.573683943347378, "grad_norm": 0.3559344708919525, "learning_rate": 9.50877371101748e-06, "loss": 0.0394, "step": 31000 }, { "epoch": 1.5739377633382405, "grad_norm": 0.43727895617485046, "learning_rate": 9.507081577745065e-06, "loss": 0.0489, "step": 31005 }, { "epoch": 1.5741915833291031, "grad_norm": 0.42054206132888794, "learning_rate": 9.505389444472649e-06, "loss": 0.0442, "step": 31010 }, { "epoch": 1.5744454033199655, "grad_norm": 0.19902174174785614, "learning_rate": 9.50369731120023e-06, "loss": 0.0359, "step": 31015 }, { "epoch": 1.574699223310828, "grad_norm": 0.5150263905525208, "learning_rate": 9.502005177927814e-06, "loss": 0.0499, "step": 31020 }, { "epoch": 1.5749530433016905, "grad_norm": 0.4221786856651306, "learning_rate": 9.500313044655397e-06, "loss": 0.0421, "step": 31025 }, { "epoch": 1.5752068632925529, "grad_norm": 0.5770427584648132, "learning_rate": 9.498620911382981e-06, "loss": 0.0383, "step": 31030 }, { "epoch": 1.5754606832834153, "grad_norm": 0.3271523416042328, "learning_rate": 9.496928778110565e-06, "loss": 0.0401, "step": 31035 }, { "epoch": 1.5757145032742779, "grad_norm": 0.33425188064575195, "learning_rate": 9.495236644838148e-06, "loss": 0.039, "step": 31040 }, { "epoch": 1.5759683232651405, "grad_norm": 0.6423659920692444, "learning_rate": 9.493544511565732e-06, "loss": 0.0469, "step": 31045 }, { "epoch": 1.5762221432560029, "grad_norm": 0.32353103160858154, "learning_rate": 9.491852378293315e-06, "loss": 0.0402, "step": 31050 }, { "epoch": 1.5764759632468652, "grad_norm": 0.446249395608902, "learning_rate": 9.490160245020899e-06, "loss": 0.0404, "step": 31055 }, { "epoch": 1.5767297832377278, "grad_norm": 0.5686723589897156, "learning_rate": 9.488468111748482e-06, "loss": 0.0351, "step": 31060 }, { "epoch": 1.5769836032285904, "grad_norm": 0.334552139043808, "learning_rate": 9.486775978476066e-06, "loss": 0.0442, "step": 31065 }, { "epoch": 1.5772374232194528, "grad_norm": 0.40195050835609436, "learning_rate": 9.48508384520365e-06, "loss": 0.0419, "step": 31070 }, { "epoch": 1.5774912432103152, "grad_norm": 0.36055585741996765, "learning_rate": 9.483391711931233e-06, "loss": 0.0442, "step": 31075 }, { "epoch": 1.5777450632011778, "grad_norm": 0.6583750247955322, "learning_rate": 9.481699578658816e-06, "loss": 0.0342, "step": 31080 }, { "epoch": 1.5779988831920402, "grad_norm": 0.3017454147338867, "learning_rate": 9.4800074453864e-06, "loss": 0.0476, "step": 31085 }, { "epoch": 1.5782527031829026, "grad_norm": 0.2620936334133148, "learning_rate": 9.478315312113982e-06, "loss": 0.0419, "step": 31090 }, { "epoch": 1.5785065231737652, "grad_norm": 0.347018301486969, "learning_rate": 9.476623178841567e-06, "loss": 0.0419, "step": 31095 }, { "epoch": 1.5787603431646278, "grad_norm": 0.3718807101249695, "learning_rate": 9.47493104556915e-06, "loss": 0.0412, "step": 31100 }, { "epoch": 1.5790141631554901, "grad_norm": 0.2230846881866455, "learning_rate": 9.473238912296732e-06, "loss": 0.0351, "step": 31105 }, { "epoch": 1.5792679831463525, "grad_norm": 0.2787439525127411, "learning_rate": 9.471546779024316e-06, "loss": 0.0469, "step": 31110 }, { "epoch": 1.5795218031372151, "grad_norm": 0.46202558279037476, "learning_rate": 9.4698546457519e-06, "loss": 0.0484, "step": 31115 }, { "epoch": 1.5797756231280777, "grad_norm": 0.3812348246574402, "learning_rate": 9.468162512479485e-06, "loss": 0.0389, "step": 31120 }, { "epoch": 1.58002944311894, "grad_norm": 0.2918323278427124, "learning_rate": 9.466470379207067e-06, "loss": 0.0517, "step": 31125 }, { "epoch": 1.5802832631098025, "grad_norm": 0.33513250946998596, "learning_rate": 9.46477824593465e-06, "loss": 0.0481, "step": 31130 }, { "epoch": 1.580537083100665, "grad_norm": 0.5008119344711304, "learning_rate": 9.463086112662234e-06, "loss": 0.0464, "step": 31135 }, { "epoch": 1.5807909030915275, "grad_norm": 0.31832876801490784, "learning_rate": 9.461393979389817e-06, "loss": 0.0357, "step": 31140 }, { "epoch": 1.5810447230823899, "grad_norm": 0.9408812522888184, "learning_rate": 9.4597018461174e-06, "loss": 0.0517, "step": 31145 }, { "epoch": 1.5812985430732525, "grad_norm": 0.3648240268230438, "learning_rate": 9.458009712844984e-06, "loss": 0.0475, "step": 31150 }, { "epoch": 1.581552363064115, "grad_norm": 0.3548484146595001, "learning_rate": 9.456317579572568e-06, "loss": 0.0466, "step": 31155 }, { "epoch": 1.5818061830549774, "grad_norm": 0.6021202802658081, "learning_rate": 9.454625446300151e-06, "loss": 0.0413, "step": 31160 }, { "epoch": 1.5820600030458398, "grad_norm": 0.38574841618537903, "learning_rate": 9.452933313027735e-06, "loss": 0.0476, "step": 31165 }, { "epoch": 1.5823138230367024, "grad_norm": 0.4719664752483368, "learning_rate": 9.451241179755319e-06, "loss": 0.0518, "step": 31170 }, { "epoch": 1.5825676430275648, "grad_norm": 0.2845129072666168, "learning_rate": 9.449549046482902e-06, "loss": 0.045, "step": 31175 }, { "epoch": 1.5828214630184272, "grad_norm": 0.44490012526512146, "learning_rate": 9.447856913210484e-06, "loss": 0.0414, "step": 31180 }, { "epoch": 1.5830752830092898, "grad_norm": 0.6356984376907349, "learning_rate": 9.44616477993807e-06, "loss": 0.0504, "step": 31185 }, { "epoch": 1.5833291030001524, "grad_norm": 0.47728046774864197, "learning_rate": 9.444472646665653e-06, "loss": 0.0471, "step": 31190 }, { "epoch": 1.5835829229910148, "grad_norm": 0.23689168691635132, "learning_rate": 9.442780513393236e-06, "loss": 0.0388, "step": 31195 }, { "epoch": 1.5838367429818772, "grad_norm": 0.2717723548412323, "learning_rate": 9.441088380120818e-06, "loss": 0.0476, "step": 31200 }, { "epoch": 1.5840905629727398, "grad_norm": 0.35897499322891235, "learning_rate": 9.439396246848402e-06, "loss": 0.0521, "step": 31205 }, { "epoch": 1.5843443829636024, "grad_norm": 0.4387221336364746, "learning_rate": 9.437704113575987e-06, "loss": 0.0407, "step": 31210 }, { "epoch": 1.5845982029544647, "grad_norm": 0.2721177637577057, "learning_rate": 9.43601198030357e-06, "loss": 0.0408, "step": 31215 }, { "epoch": 1.5848520229453271, "grad_norm": 0.3234291672706604, "learning_rate": 9.434319847031152e-06, "loss": 0.0348, "step": 31220 }, { "epoch": 1.5851058429361897, "grad_norm": 0.5149333477020264, "learning_rate": 9.432627713758736e-06, "loss": 0.0459, "step": 31225 }, { "epoch": 1.585359662927052, "grad_norm": 0.40333887934684753, "learning_rate": 9.43093558048632e-06, "loss": 0.0494, "step": 31230 }, { "epoch": 1.5856134829179145, "grad_norm": 0.4914284646511078, "learning_rate": 9.429243447213905e-06, "loss": 0.0429, "step": 31235 }, { "epoch": 1.585867302908777, "grad_norm": 0.571137011051178, "learning_rate": 9.427551313941486e-06, "loss": 0.0471, "step": 31240 }, { "epoch": 1.5861211228996397, "grad_norm": 0.2421867698431015, "learning_rate": 9.42585918066907e-06, "loss": 0.0425, "step": 31245 }, { "epoch": 1.586374942890502, "grad_norm": 0.54640793800354, "learning_rate": 9.424167047396654e-06, "loss": 0.0364, "step": 31250 }, { "epoch": 1.5866287628813645, "grad_norm": 0.4234645366668701, "learning_rate": 9.422474914124237e-06, "loss": 0.0562, "step": 31255 }, { "epoch": 1.586882582872227, "grad_norm": 0.44822415709495544, "learning_rate": 9.42078278085182e-06, "loss": 0.0431, "step": 31260 }, { "epoch": 1.5871364028630897, "grad_norm": 0.32454562187194824, "learning_rate": 9.419090647579404e-06, "loss": 0.0349, "step": 31265 }, { "epoch": 1.5873902228539518, "grad_norm": 0.4538845717906952, "learning_rate": 9.417398514306988e-06, "loss": 0.048, "step": 31270 }, { "epoch": 1.5876440428448144, "grad_norm": 0.35510364174842834, "learning_rate": 9.415706381034571e-06, "loss": 0.0498, "step": 31275 }, { "epoch": 1.587897862835677, "grad_norm": 0.26279670000076294, "learning_rate": 9.414014247762155e-06, "loss": 0.0395, "step": 31280 }, { "epoch": 1.5881516828265394, "grad_norm": 0.35393065214157104, "learning_rate": 9.412322114489738e-06, "loss": 0.0424, "step": 31285 }, { "epoch": 1.5884055028174018, "grad_norm": 0.2939152717590332, "learning_rate": 9.410629981217322e-06, "loss": 0.0414, "step": 31290 }, { "epoch": 1.5886593228082644, "grad_norm": 0.35889071226119995, "learning_rate": 9.408937847944904e-06, "loss": 0.0514, "step": 31295 }, { "epoch": 1.588913142799127, "grad_norm": 0.41180306673049927, "learning_rate": 9.407245714672489e-06, "loss": 0.048, "step": 31300 }, { "epoch": 1.5891669627899894, "grad_norm": 0.46586310863494873, "learning_rate": 9.405553581400073e-06, "loss": 0.0521, "step": 31305 }, { "epoch": 1.5894207827808517, "grad_norm": 0.355020672082901, "learning_rate": 9.403861448127656e-06, "loss": 0.0383, "step": 31310 }, { "epoch": 1.5896746027717144, "grad_norm": 0.47443002462387085, "learning_rate": 9.402169314855238e-06, "loss": 0.043, "step": 31315 }, { "epoch": 1.5899284227625767, "grad_norm": 0.37852799892425537, "learning_rate": 9.400477181582822e-06, "loss": 0.0389, "step": 31320 }, { "epoch": 1.5901822427534391, "grad_norm": 0.3380233645439148, "learning_rate": 9.398785048310407e-06, "loss": 0.0397, "step": 31325 }, { "epoch": 1.5904360627443017, "grad_norm": 0.34201329946517944, "learning_rate": 9.39709291503799e-06, "loss": 0.044, "step": 31330 }, { "epoch": 1.5906898827351643, "grad_norm": 0.3614378571510315, "learning_rate": 9.395400781765572e-06, "loss": 0.0437, "step": 31335 }, { "epoch": 1.5909437027260267, "grad_norm": 0.2608165144920349, "learning_rate": 9.393708648493156e-06, "loss": 0.0432, "step": 31340 }, { "epoch": 1.591197522716889, "grad_norm": 0.2886785566806793, "learning_rate": 9.39201651522074e-06, "loss": 0.0394, "step": 31345 }, { "epoch": 1.5914513427077517, "grad_norm": 0.24169431626796722, "learning_rate": 9.390324381948323e-06, "loss": 0.0449, "step": 31350 }, { "epoch": 1.5917051626986143, "grad_norm": 0.2917103171348572, "learning_rate": 9.388632248675906e-06, "loss": 0.0421, "step": 31355 }, { "epoch": 1.5919589826894767, "grad_norm": 0.32198965549468994, "learning_rate": 9.38694011540349e-06, "loss": 0.0373, "step": 31360 }, { "epoch": 1.592212802680339, "grad_norm": 0.27352675795555115, "learning_rate": 9.385247982131073e-06, "loss": 0.0422, "step": 31365 }, { "epoch": 1.5924666226712016, "grad_norm": 0.33560416102409363, "learning_rate": 9.383555848858657e-06, "loss": 0.0437, "step": 31370 }, { "epoch": 1.592720442662064, "grad_norm": 0.24007673561573029, "learning_rate": 9.38186371558624e-06, "loss": 0.0386, "step": 31375 }, { "epoch": 1.5929742626529264, "grad_norm": 0.3025837242603302, "learning_rate": 9.380171582313824e-06, "loss": 0.0498, "step": 31380 }, { "epoch": 1.593228082643789, "grad_norm": 0.4102720618247986, "learning_rate": 9.378479449041408e-06, "loss": 0.0487, "step": 31385 }, { "epoch": 1.5934819026346516, "grad_norm": 0.3684849143028259, "learning_rate": 9.376787315768991e-06, "loss": 0.0399, "step": 31390 }, { "epoch": 1.593735722625514, "grad_norm": 0.39328739047050476, "learning_rate": 9.375095182496575e-06, "loss": 0.0464, "step": 31395 }, { "epoch": 1.5939895426163764, "grad_norm": 0.30063509941101074, "learning_rate": 9.373403049224158e-06, "loss": 0.0506, "step": 31400 }, { "epoch": 1.594243362607239, "grad_norm": 0.26448220014572144, "learning_rate": 9.371710915951742e-06, "loss": 0.0343, "step": 31405 }, { "epoch": 1.5944971825981016, "grad_norm": 0.5844763517379761, "learning_rate": 9.370018782679324e-06, "loss": 0.0436, "step": 31410 }, { "epoch": 1.594751002588964, "grad_norm": 0.35715028643608093, "learning_rate": 9.368326649406909e-06, "loss": 0.0445, "step": 31415 }, { "epoch": 1.5950048225798263, "grad_norm": 0.45810723304748535, "learning_rate": 9.366634516134492e-06, "loss": 0.045, "step": 31420 }, { "epoch": 1.595258642570689, "grad_norm": 0.35017555952072144, "learning_rate": 9.364942382862074e-06, "loss": 0.045, "step": 31425 }, { "epoch": 1.5955124625615513, "grad_norm": 0.3044382631778717, "learning_rate": 9.363250249589658e-06, "loss": 0.0375, "step": 31430 }, { "epoch": 1.5957662825524137, "grad_norm": 0.36368584632873535, "learning_rate": 9.361558116317241e-06, "loss": 0.0423, "step": 31435 }, { "epoch": 1.5960201025432763, "grad_norm": 0.3246999680995941, "learning_rate": 9.359865983044827e-06, "loss": 0.0469, "step": 31440 }, { "epoch": 1.596273922534139, "grad_norm": 0.3725318908691406, "learning_rate": 9.358173849772408e-06, "loss": 0.0493, "step": 31445 }, { "epoch": 1.5965277425250013, "grad_norm": 0.3187475800514221, "learning_rate": 9.356481716499992e-06, "loss": 0.0401, "step": 31450 }, { "epoch": 1.5967815625158637, "grad_norm": 0.3895965814590454, "learning_rate": 9.354789583227576e-06, "loss": 0.0429, "step": 31455 }, { "epoch": 1.5970353825067263, "grad_norm": 0.39535629749298096, "learning_rate": 9.353097449955159e-06, "loss": 0.0391, "step": 31460 }, { "epoch": 1.5972892024975887, "grad_norm": 0.40566694736480713, "learning_rate": 9.351405316682743e-06, "loss": 0.0446, "step": 31465 }, { "epoch": 1.597543022488451, "grad_norm": 0.426129549741745, "learning_rate": 9.349713183410326e-06, "loss": 0.0412, "step": 31470 }, { "epoch": 1.5977968424793136, "grad_norm": 0.3265942931175232, "learning_rate": 9.34802105013791e-06, "loss": 0.0465, "step": 31475 }, { "epoch": 1.5980506624701762, "grad_norm": 0.27125173807144165, "learning_rate": 9.346328916865493e-06, "loss": 0.0362, "step": 31480 }, { "epoch": 1.5983044824610386, "grad_norm": 0.3212997615337372, "learning_rate": 9.344636783593077e-06, "loss": 0.0436, "step": 31485 }, { "epoch": 1.598558302451901, "grad_norm": 0.32562533020973206, "learning_rate": 9.34294465032066e-06, "loss": 0.0474, "step": 31490 }, { "epoch": 1.5988121224427636, "grad_norm": 0.37782034277915955, "learning_rate": 9.341252517048244e-06, "loss": 0.0463, "step": 31495 }, { "epoch": 1.5990659424336262, "grad_norm": 0.33579403162002563, "learning_rate": 9.339560383775826e-06, "loss": 0.0517, "step": 31500 }, { "epoch": 1.5993197624244886, "grad_norm": 0.2755340039730072, "learning_rate": 9.337868250503411e-06, "loss": 0.0348, "step": 31505 }, { "epoch": 1.599573582415351, "grad_norm": 0.3979896306991577, "learning_rate": 9.336176117230995e-06, "loss": 0.0447, "step": 31510 }, { "epoch": 1.5998274024062136, "grad_norm": 0.3220609426498413, "learning_rate": 9.334483983958578e-06, "loss": 0.0437, "step": 31515 }, { "epoch": 1.600081222397076, "grad_norm": 0.30401086807250977, "learning_rate": 9.33279185068616e-06, "loss": 0.0369, "step": 31520 }, { "epoch": 1.6003350423879383, "grad_norm": 0.48431262373924255, "learning_rate": 9.331099717413743e-06, "loss": 0.042, "step": 31525 }, { "epoch": 1.600588862378801, "grad_norm": 0.3727475106716156, "learning_rate": 9.329407584141329e-06, "loss": 0.0476, "step": 31530 }, { "epoch": 1.6008426823696635, "grad_norm": 0.3813401460647583, "learning_rate": 9.327715450868912e-06, "loss": 0.0392, "step": 31535 }, { "epoch": 1.601096502360526, "grad_norm": 0.5032063126564026, "learning_rate": 9.326023317596494e-06, "loss": 0.0458, "step": 31540 }, { "epoch": 1.6013503223513883, "grad_norm": 0.4369184672832489, "learning_rate": 9.324331184324078e-06, "loss": 0.0441, "step": 31545 }, { "epoch": 1.601604142342251, "grad_norm": 0.3979043662548065, "learning_rate": 9.322639051051661e-06, "loss": 0.0507, "step": 31550 }, { "epoch": 1.6018579623331135, "grad_norm": 0.3451641798019409, "learning_rate": 9.320946917779245e-06, "loss": 0.0451, "step": 31555 }, { "epoch": 1.6021117823239759, "grad_norm": 0.30609747767448425, "learning_rate": 9.319254784506828e-06, "loss": 0.0406, "step": 31560 }, { "epoch": 1.6023656023148383, "grad_norm": 0.2856360077857971, "learning_rate": 9.317562651234412e-06, "loss": 0.0471, "step": 31565 }, { "epoch": 1.6026194223057009, "grad_norm": 0.2808777987957001, "learning_rate": 9.315870517961995e-06, "loss": 0.0401, "step": 31570 }, { "epoch": 1.6028732422965632, "grad_norm": 0.27101564407348633, "learning_rate": 9.314178384689579e-06, "loss": 0.0367, "step": 31575 }, { "epoch": 1.6031270622874256, "grad_norm": 0.38390570878982544, "learning_rate": 9.312486251417162e-06, "loss": 0.0496, "step": 31580 }, { "epoch": 1.6033808822782882, "grad_norm": 0.44923219084739685, "learning_rate": 9.310794118144746e-06, "loss": 0.047, "step": 31585 }, { "epoch": 1.6036347022691508, "grad_norm": 0.4212184548377991, "learning_rate": 9.30910198487233e-06, "loss": 0.0504, "step": 31590 }, { "epoch": 1.6038885222600132, "grad_norm": 0.3964141011238098, "learning_rate": 9.307409851599913e-06, "loss": 0.0454, "step": 31595 }, { "epoch": 1.6041423422508756, "grad_norm": 0.4174087643623352, "learning_rate": 9.305717718327497e-06, "loss": 0.0422, "step": 31600 }, { "epoch": 1.6043961622417382, "grad_norm": 0.28624725341796875, "learning_rate": 9.30402558505508e-06, "loss": 0.0412, "step": 31605 }, { "epoch": 1.6046499822326008, "grad_norm": 0.3116838037967682, "learning_rate": 9.302333451782664e-06, "loss": 0.053, "step": 31610 }, { "epoch": 1.604903802223463, "grad_norm": 0.2775300145149231, "learning_rate": 9.300641318510246e-06, "loss": 0.0477, "step": 31615 }, { "epoch": 1.6051576222143256, "grad_norm": 0.3478650748729706, "learning_rate": 9.29894918523783e-06, "loss": 0.046, "step": 31620 }, { "epoch": 1.6054114422051882, "grad_norm": 0.35184112191200256, "learning_rate": 9.297257051965414e-06, "loss": 0.0534, "step": 31625 }, { "epoch": 1.6056652621960505, "grad_norm": 0.9067050218582153, "learning_rate": 9.295564918692998e-06, "loss": 0.0453, "step": 31630 }, { "epoch": 1.605919082186913, "grad_norm": 1.2345666885375977, "learning_rate": 9.29387278542058e-06, "loss": 0.0417, "step": 31635 }, { "epoch": 1.6061729021777755, "grad_norm": 0.39380595088005066, "learning_rate": 9.292180652148163e-06, "loss": 0.0456, "step": 31640 }, { "epoch": 1.6064267221686381, "grad_norm": 0.3252556324005127, "learning_rate": 9.290488518875747e-06, "loss": 0.0375, "step": 31645 }, { "epoch": 1.6066805421595005, "grad_norm": 0.2784633934497833, "learning_rate": 9.288796385603332e-06, "loss": 0.0367, "step": 31650 }, { "epoch": 1.606934362150363, "grad_norm": 0.3153322637081146, "learning_rate": 9.287104252330914e-06, "loss": 0.0403, "step": 31655 }, { "epoch": 1.6071881821412255, "grad_norm": 0.3245221674442291, "learning_rate": 9.285412119058497e-06, "loss": 0.0346, "step": 31660 }, { "epoch": 1.6074420021320879, "grad_norm": 0.3184836506843567, "learning_rate": 9.283719985786081e-06, "loss": 0.0375, "step": 31665 }, { "epoch": 1.6076958221229503, "grad_norm": 0.3533708453178406, "learning_rate": 9.282027852513665e-06, "loss": 0.0468, "step": 31670 }, { "epoch": 1.6079496421138129, "grad_norm": 1.2411906719207764, "learning_rate": 9.280335719241248e-06, "loss": 0.0435, "step": 31675 }, { "epoch": 1.6082034621046755, "grad_norm": 0.3504463732242584, "learning_rate": 9.278643585968832e-06, "loss": 0.0484, "step": 31680 }, { "epoch": 1.6084572820955378, "grad_norm": 0.25611740350723267, "learning_rate": 9.276951452696415e-06, "loss": 0.0379, "step": 31685 }, { "epoch": 1.6087111020864002, "grad_norm": 0.35780343413352966, "learning_rate": 9.275259319423999e-06, "loss": 0.0496, "step": 31690 }, { "epoch": 1.6089649220772628, "grad_norm": 0.3506191074848175, "learning_rate": 9.273567186151582e-06, "loss": 0.0367, "step": 31695 }, { "epoch": 1.6092187420681254, "grad_norm": 0.69794100522995, "learning_rate": 9.271875052879166e-06, "loss": 0.0459, "step": 31700 }, { "epoch": 1.6094725620589878, "grad_norm": 0.29237765073776245, "learning_rate": 9.27018291960675e-06, "loss": 0.041, "step": 31705 }, { "epoch": 1.6097263820498502, "grad_norm": 0.4747942388057709, "learning_rate": 9.268490786334333e-06, "loss": 0.0407, "step": 31710 }, { "epoch": 1.6099802020407128, "grad_norm": 0.29343801736831665, "learning_rate": 9.266798653061916e-06, "loss": 0.0394, "step": 31715 }, { "epoch": 1.6102340220315752, "grad_norm": 0.3256320059299469, "learning_rate": 9.2651065197895e-06, "loss": 0.0485, "step": 31720 }, { "epoch": 1.6104878420224376, "grad_norm": 0.2793024778366089, "learning_rate": 9.263414386517084e-06, "loss": 0.0436, "step": 31725 }, { "epoch": 1.6107416620133002, "grad_norm": 0.5951392650604248, "learning_rate": 9.261722253244665e-06, "loss": 0.0402, "step": 31730 }, { "epoch": 1.6109954820041628, "grad_norm": 0.30947548151016235, "learning_rate": 9.260030119972249e-06, "loss": 0.0373, "step": 31735 }, { "epoch": 1.6112493019950251, "grad_norm": 0.491113543510437, "learning_rate": 9.258337986699834e-06, "loss": 0.0428, "step": 31740 }, { "epoch": 1.6115031219858875, "grad_norm": 0.3150951862335205, "learning_rate": 9.256645853427416e-06, "loss": 0.0411, "step": 31745 }, { "epoch": 1.6117569419767501, "grad_norm": 0.6105877757072449, "learning_rate": 9.254953720155e-06, "loss": 0.0394, "step": 31750 }, { "epoch": 1.6120107619676127, "grad_norm": 0.3246072828769684, "learning_rate": 9.253261586882583e-06, "loss": 0.039, "step": 31755 }, { "epoch": 1.6122645819584749, "grad_norm": 0.44057467579841614, "learning_rate": 9.251569453610167e-06, "loss": 0.042, "step": 31760 }, { "epoch": 1.6125184019493375, "grad_norm": 0.39417964220046997, "learning_rate": 9.24987732033775e-06, "loss": 0.0438, "step": 31765 }, { "epoch": 1.6127722219402, "grad_norm": 0.5825387835502625, "learning_rate": 9.248185187065334e-06, "loss": 0.0444, "step": 31770 }, { "epoch": 1.6130260419310625, "grad_norm": 0.36112135648727417, "learning_rate": 9.246493053792917e-06, "loss": 0.0469, "step": 31775 }, { "epoch": 1.6132798619219249, "grad_norm": 0.488370418548584, "learning_rate": 9.2448009205205e-06, "loss": 0.036, "step": 31780 }, { "epoch": 1.6135336819127875, "grad_norm": 0.3322552442550659, "learning_rate": 9.243108787248084e-06, "loss": 0.0407, "step": 31785 }, { "epoch": 1.61378750190365, "grad_norm": 0.4039710462093353, "learning_rate": 9.241416653975668e-06, "loss": 0.0438, "step": 31790 }, { "epoch": 1.6140413218945124, "grad_norm": 0.34084275364875793, "learning_rate": 9.239724520703251e-06, "loss": 0.0445, "step": 31795 }, { "epoch": 1.6142951418853748, "grad_norm": 0.4430694282054901, "learning_rate": 9.238032387430835e-06, "loss": 0.0505, "step": 31800 }, { "epoch": 1.6145489618762374, "grad_norm": 0.3359103798866272, "learning_rate": 9.236340254158419e-06, "loss": 0.037, "step": 31805 }, { "epoch": 1.6148027818670998, "grad_norm": 0.26327624917030334, "learning_rate": 9.234648120886002e-06, "loss": 0.0391, "step": 31810 }, { "epoch": 1.6150566018579622, "grad_norm": 0.2624158561229706, "learning_rate": 9.232955987613586e-06, "loss": 0.0362, "step": 31815 }, { "epoch": 1.6153104218488248, "grad_norm": 0.36701250076293945, "learning_rate": 9.231263854341167e-06, "loss": 0.0437, "step": 31820 }, { "epoch": 1.6155642418396874, "grad_norm": 0.7499197125434875, "learning_rate": 9.229571721068751e-06, "loss": 0.04, "step": 31825 }, { "epoch": 1.6158180618305498, "grad_norm": 0.3554810881614685, "learning_rate": 9.227879587796336e-06, "loss": 0.0459, "step": 31830 }, { "epoch": 1.6160718818214121, "grad_norm": 0.4646514356136322, "learning_rate": 9.22618745452392e-06, "loss": 0.0395, "step": 31835 }, { "epoch": 1.6163257018122748, "grad_norm": 0.5427611470222473, "learning_rate": 9.224495321251502e-06, "loss": 0.0456, "step": 31840 }, { "epoch": 1.6165795218031374, "grad_norm": 0.346992552280426, "learning_rate": 9.222803187979085e-06, "loss": 0.0344, "step": 31845 }, { "epoch": 1.6168333417939997, "grad_norm": 0.473665714263916, "learning_rate": 9.221111054706669e-06, "loss": 0.0344, "step": 31850 }, { "epoch": 1.6170871617848621, "grad_norm": 0.38679835200309753, "learning_rate": 9.219418921434254e-06, "loss": 0.0375, "step": 31855 }, { "epoch": 1.6173409817757247, "grad_norm": 0.689315676689148, "learning_rate": 9.217726788161836e-06, "loss": 0.0445, "step": 31860 }, { "epoch": 1.617594801766587, "grad_norm": 0.3175433874130249, "learning_rate": 9.21603465488942e-06, "loss": 0.0482, "step": 31865 }, { "epoch": 1.6178486217574495, "grad_norm": 0.301717072725296, "learning_rate": 9.214342521617003e-06, "loss": 0.0426, "step": 31870 }, { "epoch": 1.618102441748312, "grad_norm": 0.24479809403419495, "learning_rate": 9.212650388344586e-06, "loss": 0.0344, "step": 31875 }, { "epoch": 1.6183562617391747, "grad_norm": 0.36919647455215454, "learning_rate": 9.21095825507217e-06, "loss": 0.04, "step": 31880 }, { "epoch": 1.618610081730037, "grad_norm": 0.3051331639289856, "learning_rate": 9.209266121799754e-06, "loss": 0.0337, "step": 31885 }, { "epoch": 1.6188639017208994, "grad_norm": 0.31036415696144104, "learning_rate": 9.207573988527337e-06, "loss": 0.0458, "step": 31890 }, { "epoch": 1.619117721711762, "grad_norm": 0.4276675581932068, "learning_rate": 9.20588185525492e-06, "loss": 0.0368, "step": 31895 }, { "epoch": 1.6193715417026247, "grad_norm": 0.2697307765483856, "learning_rate": 9.204189721982504e-06, "loss": 0.0381, "step": 31900 }, { "epoch": 1.619625361693487, "grad_norm": 0.4047255516052246, "learning_rate": 9.202497588710088e-06, "loss": 0.0473, "step": 31905 }, { "epoch": 1.6198791816843494, "grad_norm": 0.46836021542549133, "learning_rate": 9.200805455437671e-06, "loss": 0.038, "step": 31910 }, { "epoch": 1.620133001675212, "grad_norm": 1.1610099077224731, "learning_rate": 9.199113322165253e-06, "loss": 0.0512, "step": 31915 }, { "epoch": 1.6203868216660744, "grad_norm": 0.3843535780906677, "learning_rate": 9.197421188892838e-06, "loss": 0.042, "step": 31920 }, { "epoch": 1.6206406416569368, "grad_norm": 0.35286080837249756, "learning_rate": 9.195729055620422e-06, "loss": 0.0425, "step": 31925 }, { "epoch": 1.6208944616477994, "grad_norm": 0.30205556750297546, "learning_rate": 9.194036922348005e-06, "loss": 0.0411, "step": 31930 }, { "epoch": 1.621148281638662, "grad_norm": 0.3636847138404846, "learning_rate": 9.192344789075587e-06, "loss": 0.0399, "step": 31935 }, { "epoch": 1.6214021016295244, "grad_norm": 0.2955825626850128, "learning_rate": 9.190652655803171e-06, "loss": 0.0384, "step": 31940 }, { "epoch": 1.6216559216203867, "grad_norm": 0.5489678978919983, "learning_rate": 9.188960522530756e-06, "loss": 0.0461, "step": 31945 }, { "epoch": 1.6219097416112493, "grad_norm": 0.4751637876033783, "learning_rate": 9.18726838925834e-06, "loss": 0.0537, "step": 31950 }, { "epoch": 1.622163561602112, "grad_norm": 0.8122043609619141, "learning_rate": 9.185576255985922e-06, "loss": 0.0493, "step": 31955 }, { "epoch": 1.622417381592974, "grad_norm": 0.32820770144462585, "learning_rate": 9.183884122713505e-06, "loss": 0.0362, "step": 31960 }, { "epoch": 1.6226712015838367, "grad_norm": 0.5642192363739014, "learning_rate": 9.182191989441089e-06, "loss": 0.0402, "step": 31965 }, { "epoch": 1.6229250215746993, "grad_norm": 0.3879733085632324, "learning_rate": 9.180499856168674e-06, "loss": 0.0561, "step": 31970 }, { "epoch": 1.6231788415655617, "grad_norm": 0.4680909812450409, "learning_rate": 9.178807722896256e-06, "loss": 0.039, "step": 31975 }, { "epoch": 1.623432661556424, "grad_norm": 0.30603843927383423, "learning_rate": 9.17711558962384e-06, "loss": 0.0452, "step": 31980 }, { "epoch": 1.6236864815472867, "grad_norm": 0.36126911640167236, "learning_rate": 9.175423456351423e-06, "loss": 0.0455, "step": 31985 }, { "epoch": 1.6239403015381493, "grad_norm": 0.3678168058395386, "learning_rate": 9.173731323079006e-06, "loss": 0.0448, "step": 31990 }, { "epoch": 1.6241941215290117, "grad_norm": 0.28959447145462036, "learning_rate": 9.17203918980659e-06, "loss": 0.0383, "step": 31995 }, { "epoch": 1.624447941519874, "grad_norm": 0.4826527535915375, "learning_rate": 9.170347056534173e-06, "loss": 0.0417, "step": 32000 }, { "epoch": 1.6247017615107366, "grad_norm": 0.31721869111061096, "learning_rate": 9.168654923261757e-06, "loss": 0.0431, "step": 32005 }, { "epoch": 1.624955581501599, "grad_norm": 0.2833889424800873, "learning_rate": 9.16696278998934e-06, "loss": 0.0443, "step": 32010 }, { "epoch": 1.6252094014924614, "grad_norm": 0.37571266293525696, "learning_rate": 9.165270656716924e-06, "loss": 0.0475, "step": 32015 }, { "epoch": 1.625463221483324, "grad_norm": 0.4514489769935608, "learning_rate": 9.163578523444508e-06, "loss": 0.0465, "step": 32020 }, { "epoch": 1.6257170414741866, "grad_norm": 0.5537285208702087, "learning_rate": 9.161886390172091e-06, "loss": 0.0461, "step": 32025 }, { "epoch": 1.625970861465049, "grad_norm": 0.3658086955547333, "learning_rate": 9.160194256899673e-06, "loss": 0.0532, "step": 32030 }, { "epoch": 1.6262246814559114, "grad_norm": 0.2867622375488281, "learning_rate": 9.158502123627258e-06, "loss": 0.0388, "step": 32035 }, { "epoch": 1.626478501446774, "grad_norm": 0.6386134028434753, "learning_rate": 9.156809990354842e-06, "loss": 0.0508, "step": 32040 }, { "epoch": 1.6267323214376366, "grad_norm": 0.2683972716331482, "learning_rate": 9.155117857082425e-06, "loss": 0.0466, "step": 32045 }, { "epoch": 1.626986141428499, "grad_norm": 0.3644627034664154, "learning_rate": 9.153425723810007e-06, "loss": 0.0486, "step": 32050 }, { "epoch": 1.6272399614193613, "grad_norm": 0.4730590283870697, "learning_rate": 9.15173359053759e-06, "loss": 0.0408, "step": 32055 }, { "epoch": 1.627493781410224, "grad_norm": 0.2658335566520691, "learning_rate": 9.150041457265176e-06, "loss": 0.0431, "step": 32060 }, { "epoch": 1.6277476014010863, "grad_norm": 0.2563387453556061, "learning_rate": 9.148349323992758e-06, "loss": 0.0378, "step": 32065 }, { "epoch": 1.6280014213919487, "grad_norm": 0.3779682219028473, "learning_rate": 9.146657190720341e-06, "loss": 0.0524, "step": 32070 }, { "epoch": 1.6282552413828113, "grad_norm": 0.8039404153823853, "learning_rate": 9.144965057447925e-06, "loss": 0.0487, "step": 32075 }, { "epoch": 1.628509061373674, "grad_norm": 0.3716595768928528, "learning_rate": 9.143272924175508e-06, "loss": 0.0451, "step": 32080 }, { "epoch": 1.6287628813645363, "grad_norm": 0.30066925287246704, "learning_rate": 9.141580790903092e-06, "loss": 0.0469, "step": 32085 }, { "epoch": 1.6290167013553987, "grad_norm": 0.36805853247642517, "learning_rate": 9.139888657630676e-06, "loss": 0.0427, "step": 32090 }, { "epoch": 1.6292705213462613, "grad_norm": 0.7319865226745605, "learning_rate": 9.138196524358259e-06, "loss": 0.0382, "step": 32095 }, { "epoch": 1.6295243413371239, "grad_norm": 0.5053632855415344, "learning_rate": 9.136504391085843e-06, "loss": 0.0468, "step": 32100 }, { "epoch": 1.629778161327986, "grad_norm": 0.32306617498397827, "learning_rate": 9.134812257813426e-06, "loss": 0.0435, "step": 32105 }, { "epoch": 1.6300319813188486, "grad_norm": 0.2762267291545868, "learning_rate": 9.13312012454101e-06, "loss": 0.0425, "step": 32110 }, { "epoch": 1.6302858013097112, "grad_norm": 0.350439190864563, "learning_rate": 9.131427991268593e-06, "loss": 0.048, "step": 32115 }, { "epoch": 1.6305396213005736, "grad_norm": 0.296966552734375, "learning_rate": 9.129735857996177e-06, "loss": 0.0403, "step": 32120 }, { "epoch": 1.630793441291436, "grad_norm": 0.4328997731208801, "learning_rate": 9.12804372472376e-06, "loss": 0.0439, "step": 32125 }, { "epoch": 1.6310472612822986, "grad_norm": 0.384617418050766, "learning_rate": 9.126351591451344e-06, "loss": 0.0488, "step": 32130 }, { "epoch": 1.6313010812731612, "grad_norm": 0.43519383668899536, "learning_rate": 9.124659458178927e-06, "loss": 0.0448, "step": 32135 }, { "epoch": 1.6315549012640236, "grad_norm": 0.6686396598815918, "learning_rate": 9.122967324906511e-06, "loss": 0.0449, "step": 32140 }, { "epoch": 1.631808721254886, "grad_norm": 0.44832053780555725, "learning_rate": 9.121275191634093e-06, "loss": 0.0422, "step": 32145 }, { "epoch": 1.6320625412457486, "grad_norm": 0.3811791241168976, "learning_rate": 9.119583058361678e-06, "loss": 0.0378, "step": 32150 }, { "epoch": 1.632316361236611, "grad_norm": 0.3821338713169098, "learning_rate": 9.117890925089262e-06, "loss": 0.0439, "step": 32155 }, { "epoch": 1.6325701812274733, "grad_norm": 0.30220335721969604, "learning_rate": 9.116198791816843e-06, "loss": 0.0387, "step": 32160 }, { "epoch": 1.632824001218336, "grad_norm": 0.456337571144104, "learning_rate": 9.114506658544427e-06, "loss": 0.0492, "step": 32165 }, { "epoch": 1.6330778212091985, "grad_norm": 0.24902650713920593, "learning_rate": 9.11281452527201e-06, "loss": 0.0441, "step": 32170 }, { "epoch": 1.633331641200061, "grad_norm": 0.38890451192855835, "learning_rate": 9.111122391999596e-06, "loss": 0.0418, "step": 32175 }, { "epoch": 1.6335854611909233, "grad_norm": 1.0790613889694214, "learning_rate": 9.109430258727178e-06, "loss": 0.0367, "step": 32180 }, { "epoch": 1.633839281181786, "grad_norm": 0.46890148520469666, "learning_rate": 9.107738125454761e-06, "loss": 0.0406, "step": 32185 }, { "epoch": 1.6340931011726485, "grad_norm": 0.2735448479652405, "learning_rate": 9.106045992182345e-06, "loss": 0.0448, "step": 32190 }, { "epoch": 1.6343469211635109, "grad_norm": 0.3427935838699341, "learning_rate": 9.104353858909928e-06, "loss": 0.0396, "step": 32195 }, { "epoch": 1.6346007411543733, "grad_norm": 0.4376786947250366, "learning_rate": 9.102661725637512e-06, "loss": 0.0445, "step": 32200 }, { "epoch": 1.6348545611452359, "grad_norm": 0.29393336176872253, "learning_rate": 9.100969592365095e-06, "loss": 0.0456, "step": 32205 }, { "epoch": 1.6351083811360982, "grad_norm": 0.389175146818161, "learning_rate": 9.099277459092679e-06, "loss": 0.0441, "step": 32210 }, { "epoch": 1.6353622011269606, "grad_norm": 0.3740473687648773, "learning_rate": 9.097585325820262e-06, "loss": 0.0412, "step": 32215 }, { "epoch": 1.6356160211178232, "grad_norm": 0.3517167270183563, "learning_rate": 9.095893192547846e-06, "loss": 0.0422, "step": 32220 }, { "epoch": 1.6358698411086858, "grad_norm": 0.4503104090690613, "learning_rate": 9.09420105927543e-06, "loss": 0.0432, "step": 32225 }, { "epoch": 1.6361236610995482, "grad_norm": 0.279237300157547, "learning_rate": 9.092508926003013e-06, "loss": 0.0373, "step": 32230 }, { "epoch": 1.6363774810904106, "grad_norm": 0.3596137464046478, "learning_rate": 9.090816792730595e-06, "loss": 0.0477, "step": 32235 }, { "epoch": 1.6366313010812732, "grad_norm": 0.3686954975128174, "learning_rate": 9.08912465945818e-06, "loss": 0.041, "step": 32240 }, { "epoch": 1.6368851210721358, "grad_norm": 0.4615430235862732, "learning_rate": 9.087432526185764e-06, "loss": 0.0353, "step": 32245 }, { "epoch": 1.6371389410629982, "grad_norm": 0.3107963502407074, "learning_rate": 9.085740392913347e-06, "loss": 0.0471, "step": 32250 }, { "epoch": 1.6373927610538606, "grad_norm": 0.3667447566986084, "learning_rate": 9.084048259640929e-06, "loss": 0.0423, "step": 32255 }, { "epoch": 1.6376465810447232, "grad_norm": 0.49654021859169006, "learning_rate": 9.082356126368513e-06, "loss": 0.0432, "step": 32260 }, { "epoch": 1.6379004010355855, "grad_norm": 1.1227554082870483, "learning_rate": 9.080663993096098e-06, "loss": 0.0438, "step": 32265 }, { "epoch": 1.638154221026448, "grad_norm": 0.41635578870773315, "learning_rate": 9.078971859823681e-06, "loss": 0.0554, "step": 32270 }, { "epoch": 1.6384080410173105, "grad_norm": 0.22257228195667267, "learning_rate": 9.077279726551263e-06, "loss": 0.0416, "step": 32275 }, { "epoch": 1.6386618610081731, "grad_norm": 0.2682057321071625, "learning_rate": 9.075587593278847e-06, "loss": 0.0363, "step": 32280 }, { "epoch": 1.6389156809990355, "grad_norm": 0.305270254611969, "learning_rate": 9.07389546000643e-06, "loss": 0.0384, "step": 32285 }, { "epoch": 1.6391695009898979, "grad_norm": 0.5659728050231934, "learning_rate": 9.072203326734016e-06, "loss": 0.0497, "step": 32290 }, { "epoch": 1.6394233209807605, "grad_norm": 0.38251349329948425, "learning_rate": 9.070511193461597e-06, "loss": 0.0437, "step": 32295 }, { "epoch": 1.6396771409716229, "grad_norm": 0.7674602270126343, "learning_rate": 9.068819060189181e-06, "loss": 0.0498, "step": 32300 }, { "epoch": 1.6399309609624853, "grad_norm": 0.49687087535858154, "learning_rate": 9.067126926916765e-06, "loss": 0.0327, "step": 32305 }, { "epoch": 1.6401847809533479, "grad_norm": 0.3052130341529846, "learning_rate": 9.065434793644348e-06, "loss": 0.044, "step": 32310 }, { "epoch": 1.6404386009442105, "grad_norm": 0.31215372681617737, "learning_rate": 9.063742660371932e-06, "loss": 0.0378, "step": 32315 }, { "epoch": 1.6406924209350728, "grad_norm": 0.4322529435157776, "learning_rate": 9.062050527099515e-06, "loss": 0.0389, "step": 32320 }, { "epoch": 1.6409462409259352, "grad_norm": 0.27929675579071045, "learning_rate": 9.060358393827099e-06, "loss": 0.041, "step": 32325 }, { "epoch": 1.6412000609167978, "grad_norm": 0.3943149745464325, "learning_rate": 9.058666260554682e-06, "loss": 0.0454, "step": 32330 }, { "epoch": 1.6414538809076604, "grad_norm": 0.3910643756389618, "learning_rate": 9.056974127282266e-06, "loss": 0.0445, "step": 32335 }, { "epoch": 1.6417077008985228, "grad_norm": 0.42487633228302, "learning_rate": 9.05528199400985e-06, "loss": 0.0507, "step": 32340 }, { "epoch": 1.6419615208893852, "grad_norm": 0.2816854417324066, "learning_rate": 9.053589860737433e-06, "loss": 0.0319, "step": 32345 }, { "epoch": 1.6422153408802478, "grad_norm": 0.28755298256874084, "learning_rate": 9.051897727465015e-06, "loss": 0.0377, "step": 32350 }, { "epoch": 1.6424691608711102, "grad_norm": 0.2777571678161621, "learning_rate": 9.0502055941926e-06, "loss": 0.0413, "step": 32355 }, { "epoch": 1.6427229808619725, "grad_norm": 0.30235791206359863, "learning_rate": 9.048513460920184e-06, "loss": 0.0439, "step": 32360 }, { "epoch": 1.6429768008528352, "grad_norm": 0.40951234102249146, "learning_rate": 9.046821327647767e-06, "loss": 0.0468, "step": 32365 }, { "epoch": 1.6432306208436978, "grad_norm": 0.366265207529068, "learning_rate": 9.045129194375349e-06, "loss": 0.0324, "step": 32370 }, { "epoch": 1.6434844408345601, "grad_norm": 0.39244604110717773, "learning_rate": 9.043437061102932e-06, "loss": 0.0441, "step": 32375 }, { "epoch": 1.6437382608254225, "grad_norm": 0.3864644467830658, "learning_rate": 9.041744927830518e-06, "loss": 0.044, "step": 32380 }, { "epoch": 1.6439920808162851, "grad_norm": 0.4265846312046051, "learning_rate": 9.0400527945581e-06, "loss": 0.0417, "step": 32385 }, { "epoch": 1.6442459008071477, "grad_norm": 0.31097733974456787, "learning_rate": 9.038360661285683e-06, "loss": 0.0418, "step": 32390 }, { "epoch": 1.64449972079801, "grad_norm": 0.26527610421180725, "learning_rate": 9.036668528013267e-06, "loss": 0.0406, "step": 32395 }, { "epoch": 1.6447535407888725, "grad_norm": 0.3447258174419403, "learning_rate": 9.03497639474085e-06, "loss": 0.0448, "step": 32400 }, { "epoch": 1.645007360779735, "grad_norm": 0.4536108374595642, "learning_rate": 9.033284261468434e-06, "loss": 0.0449, "step": 32405 }, { "epoch": 1.6452611807705975, "grad_norm": 0.43144747614860535, "learning_rate": 9.031592128196017e-06, "loss": 0.0414, "step": 32410 }, { "epoch": 1.6455150007614598, "grad_norm": 0.26930710673332214, "learning_rate": 9.0298999949236e-06, "loss": 0.0469, "step": 32415 }, { "epoch": 1.6457688207523224, "grad_norm": 0.33055657148361206, "learning_rate": 9.028207861651184e-06, "loss": 0.0416, "step": 32420 }, { "epoch": 1.646022640743185, "grad_norm": 0.4396286606788635, "learning_rate": 9.026515728378768e-06, "loss": 0.0472, "step": 32425 }, { "epoch": 1.6462764607340474, "grad_norm": 0.3018118441104889, "learning_rate": 9.024823595106351e-06, "loss": 0.0372, "step": 32430 }, { "epoch": 1.6465302807249098, "grad_norm": 0.39372122287750244, "learning_rate": 9.023131461833935e-06, "loss": 0.0495, "step": 32435 }, { "epoch": 1.6467841007157724, "grad_norm": 0.2524060904979706, "learning_rate": 9.021439328561519e-06, "loss": 0.0392, "step": 32440 }, { "epoch": 1.647037920706635, "grad_norm": 0.5259925127029419, "learning_rate": 9.019747195289102e-06, "loss": 0.0401, "step": 32445 }, { "epoch": 1.6472917406974972, "grad_norm": 0.41529425978660583, "learning_rate": 9.018055062016686e-06, "loss": 0.0432, "step": 32450 }, { "epoch": 1.6475455606883598, "grad_norm": 0.3844156861305237, "learning_rate": 9.01636292874427e-06, "loss": 0.0361, "step": 32455 }, { "epoch": 1.6477993806792224, "grad_norm": 0.3346210718154907, "learning_rate": 9.014670795471853e-06, "loss": 0.0454, "step": 32460 }, { "epoch": 1.6480532006700848, "grad_norm": 0.3419366776943207, "learning_rate": 9.012978662199435e-06, "loss": 0.0426, "step": 32465 }, { "epoch": 1.6483070206609471, "grad_norm": 0.40948936343193054, "learning_rate": 9.01128652892702e-06, "loss": 0.0431, "step": 32470 }, { "epoch": 1.6485608406518097, "grad_norm": 0.3462151885032654, "learning_rate": 9.009594395654603e-06, "loss": 0.0424, "step": 32475 }, { "epoch": 1.6488146606426723, "grad_norm": 0.38817259669303894, "learning_rate": 9.007902262382185e-06, "loss": 0.0367, "step": 32480 }, { "epoch": 1.6490684806335347, "grad_norm": 0.33476361632347107, "learning_rate": 9.006210129109769e-06, "loss": 0.0459, "step": 32485 }, { "epoch": 1.649322300624397, "grad_norm": 0.31743040680885315, "learning_rate": 9.004517995837352e-06, "loss": 0.038, "step": 32490 }, { "epoch": 1.6495761206152597, "grad_norm": 0.3448827266693115, "learning_rate": 9.002825862564938e-06, "loss": 0.0388, "step": 32495 }, { "epoch": 1.649829940606122, "grad_norm": 0.277004599571228, "learning_rate": 9.00113372929252e-06, "loss": 0.044, "step": 32500 }, { "epoch": 1.6500837605969845, "grad_norm": 0.3807571232318878, "learning_rate": 8.999441596020103e-06, "loss": 0.0516, "step": 32505 }, { "epoch": 1.650337580587847, "grad_norm": 0.43307530879974365, "learning_rate": 8.997749462747686e-06, "loss": 0.0385, "step": 32510 }, { "epoch": 1.6505914005787097, "grad_norm": 0.36925190687179565, "learning_rate": 8.99605732947527e-06, "loss": 0.0406, "step": 32515 }, { "epoch": 1.650845220569572, "grad_norm": 0.37371015548706055, "learning_rate": 8.994365196202854e-06, "loss": 0.033, "step": 32520 }, { "epoch": 1.6510990405604344, "grad_norm": 0.3058145344257355, "learning_rate": 8.992673062930437e-06, "loss": 0.041, "step": 32525 }, { "epoch": 1.651352860551297, "grad_norm": 0.44341811537742615, "learning_rate": 8.99098092965802e-06, "loss": 0.0517, "step": 32530 }, { "epoch": 1.6516066805421596, "grad_norm": 0.4051477313041687, "learning_rate": 8.989288796385604e-06, "loss": 0.0444, "step": 32535 }, { "epoch": 1.651860500533022, "grad_norm": 0.4557914137840271, "learning_rate": 8.987596663113188e-06, "loss": 0.0471, "step": 32540 }, { "epoch": 1.6521143205238844, "grad_norm": 0.367385596036911, "learning_rate": 8.985904529840771e-06, "loss": 0.0434, "step": 32545 }, { "epoch": 1.652368140514747, "grad_norm": 0.31616297364234924, "learning_rate": 8.984212396568355e-06, "loss": 0.0441, "step": 32550 }, { "epoch": 1.6526219605056094, "grad_norm": 0.47400176525115967, "learning_rate": 8.982520263295937e-06, "loss": 0.0403, "step": 32555 }, { "epoch": 1.6528757804964718, "grad_norm": 0.37616056203842163, "learning_rate": 8.980828130023522e-06, "loss": 0.0516, "step": 32560 }, { "epoch": 1.6531296004873344, "grad_norm": 0.40422409772872925, "learning_rate": 8.979135996751105e-06, "loss": 0.0458, "step": 32565 }, { "epoch": 1.653383420478197, "grad_norm": 0.33850717544555664, "learning_rate": 8.977443863478689e-06, "loss": 0.0454, "step": 32570 }, { "epoch": 1.6536372404690594, "grad_norm": 0.2698414921760559, "learning_rate": 8.975751730206271e-06, "loss": 0.043, "step": 32575 }, { "epoch": 1.6538910604599217, "grad_norm": 0.3722778558731079, "learning_rate": 8.974059596933854e-06, "loss": 0.0477, "step": 32580 }, { "epoch": 1.6541448804507843, "grad_norm": 0.3686807453632355, "learning_rate": 8.97236746366144e-06, "loss": 0.0376, "step": 32585 }, { "epoch": 1.654398700441647, "grad_norm": 0.3176324963569641, "learning_rate": 8.970675330389023e-06, "loss": 0.0437, "step": 32590 }, { "epoch": 1.654652520432509, "grad_norm": 0.48339200019836426, "learning_rate": 8.968983197116605e-06, "loss": 0.047, "step": 32595 }, { "epoch": 1.6549063404233717, "grad_norm": 0.422828733921051, "learning_rate": 8.967291063844189e-06, "loss": 0.0413, "step": 32600 }, { "epoch": 1.6551601604142343, "grad_norm": 0.3320799469947815, "learning_rate": 8.965598930571772e-06, "loss": 0.0358, "step": 32605 }, { "epoch": 1.6554139804050967, "grad_norm": 0.8261854648590088, "learning_rate": 8.963906797299357e-06, "loss": 0.0414, "step": 32610 }, { "epoch": 1.655667800395959, "grad_norm": 0.3974917232990265, "learning_rate": 8.96221466402694e-06, "loss": 0.0353, "step": 32615 }, { "epoch": 1.6559216203868217, "grad_norm": 0.3724062144756317, "learning_rate": 8.960522530754523e-06, "loss": 0.0294, "step": 32620 }, { "epoch": 1.6561754403776843, "grad_norm": 0.4030728340148926, "learning_rate": 8.958830397482106e-06, "loss": 0.0429, "step": 32625 }, { "epoch": 1.6564292603685467, "grad_norm": 0.677786111831665, "learning_rate": 8.95713826420969e-06, "loss": 0.0495, "step": 32630 }, { "epoch": 1.656683080359409, "grad_norm": 0.21341340243816376, "learning_rate": 8.955446130937273e-06, "loss": 0.029, "step": 32635 }, { "epoch": 1.6569369003502716, "grad_norm": 0.45626917481422424, "learning_rate": 8.953753997664857e-06, "loss": 0.0363, "step": 32640 }, { "epoch": 1.657190720341134, "grad_norm": 0.29046255350112915, "learning_rate": 8.95206186439244e-06, "loss": 0.0442, "step": 32645 }, { "epoch": 1.6574445403319964, "grad_norm": 0.6526368260383606, "learning_rate": 8.950369731120024e-06, "loss": 0.0437, "step": 32650 }, { "epoch": 1.657698360322859, "grad_norm": 0.28871995210647583, "learning_rate": 8.948677597847608e-06, "loss": 0.0453, "step": 32655 }, { "epoch": 1.6579521803137216, "grad_norm": 0.3228037357330322, "learning_rate": 8.946985464575191e-06, "loss": 0.0454, "step": 32660 }, { "epoch": 1.658206000304584, "grad_norm": 0.24650952219963074, "learning_rate": 8.945293331302775e-06, "loss": 0.0368, "step": 32665 }, { "epoch": 1.6584598202954464, "grad_norm": 0.26511305570602417, "learning_rate": 8.943601198030357e-06, "loss": 0.0419, "step": 32670 }, { "epoch": 1.658713640286309, "grad_norm": 0.32455992698669434, "learning_rate": 8.941909064757942e-06, "loss": 0.0473, "step": 32675 }, { "epoch": 1.6589674602771716, "grad_norm": 0.3761235475540161, "learning_rate": 8.940216931485525e-06, "loss": 0.0417, "step": 32680 }, { "epoch": 1.659221280268034, "grad_norm": 0.2946665287017822, "learning_rate": 8.938524798213109e-06, "loss": 0.0433, "step": 32685 }, { "epoch": 1.6594751002588963, "grad_norm": 0.3487130403518677, "learning_rate": 8.93683266494069e-06, "loss": 0.0371, "step": 32690 }, { "epoch": 1.659728920249759, "grad_norm": 0.4113446772098541, "learning_rate": 8.935140531668274e-06, "loss": 0.0424, "step": 32695 }, { "epoch": 1.6599827402406213, "grad_norm": 0.22404730319976807, "learning_rate": 8.93344839839586e-06, "loss": 0.042, "step": 32700 }, { "epoch": 1.6602365602314837, "grad_norm": 0.3690703809261322, "learning_rate": 8.931756265123443e-06, "loss": 0.0503, "step": 32705 }, { "epoch": 1.6604903802223463, "grad_norm": 0.26985934376716614, "learning_rate": 8.930064131851025e-06, "loss": 0.0415, "step": 32710 }, { "epoch": 1.660744200213209, "grad_norm": 0.30366820096969604, "learning_rate": 8.928371998578608e-06, "loss": 0.0454, "step": 32715 }, { "epoch": 1.6609980202040713, "grad_norm": 0.3210146725177765, "learning_rate": 8.926679865306192e-06, "loss": 0.038, "step": 32720 }, { "epoch": 1.6612518401949337, "grad_norm": 0.38465768098831177, "learning_rate": 8.924987732033776e-06, "loss": 0.0393, "step": 32725 }, { "epoch": 1.6615056601857963, "grad_norm": 0.32459139823913574, "learning_rate": 8.923295598761359e-06, "loss": 0.0515, "step": 32730 }, { "epoch": 1.6617594801766589, "grad_norm": 0.2934326231479645, "learning_rate": 8.921603465488943e-06, "loss": 0.03, "step": 32735 }, { "epoch": 1.6620133001675212, "grad_norm": 0.35384735465049744, "learning_rate": 8.919911332216526e-06, "loss": 0.0426, "step": 32740 }, { "epoch": 1.6622671201583836, "grad_norm": 0.3279673159122467, "learning_rate": 8.91821919894411e-06, "loss": 0.0416, "step": 32745 }, { "epoch": 1.6625209401492462, "grad_norm": 0.36697596311569214, "learning_rate": 8.916527065671693e-06, "loss": 0.0477, "step": 32750 }, { "epoch": 1.6627747601401086, "grad_norm": 0.2286474108695984, "learning_rate": 8.914834932399277e-06, "loss": 0.043, "step": 32755 }, { "epoch": 1.663028580130971, "grad_norm": 0.33469220995903015, "learning_rate": 8.91314279912686e-06, "loss": 0.0415, "step": 32760 }, { "epoch": 1.6632824001218336, "grad_norm": 0.5135287642478943, "learning_rate": 8.911450665854444e-06, "loss": 0.04, "step": 32765 }, { "epoch": 1.6635362201126962, "grad_norm": 0.3337949216365814, "learning_rate": 8.909758532582027e-06, "loss": 0.0361, "step": 32770 }, { "epoch": 1.6637900401035586, "grad_norm": 0.3559700548648834, "learning_rate": 8.908066399309611e-06, "loss": 0.0422, "step": 32775 }, { "epoch": 1.664043860094421, "grad_norm": 0.44909822940826416, "learning_rate": 8.906374266037194e-06, "loss": 0.0415, "step": 32780 }, { "epoch": 1.6642976800852836, "grad_norm": 0.3622046113014221, "learning_rate": 8.904682132764776e-06, "loss": 0.0418, "step": 32785 }, { "epoch": 1.6645515000761462, "grad_norm": 0.32745665311813354, "learning_rate": 8.902989999492362e-06, "loss": 0.0423, "step": 32790 }, { "epoch": 1.6648053200670083, "grad_norm": 0.47989243268966675, "learning_rate": 8.901297866219945e-06, "loss": 0.05, "step": 32795 }, { "epoch": 1.665059140057871, "grad_norm": 0.2573833465576172, "learning_rate": 8.899605732947527e-06, "loss": 0.0363, "step": 32800 }, { "epoch": 1.6653129600487335, "grad_norm": 0.3501220643520355, "learning_rate": 8.89791359967511e-06, "loss": 0.0475, "step": 32805 }, { "epoch": 1.665566780039596, "grad_norm": 0.3637681305408478, "learning_rate": 8.896221466402694e-06, "loss": 0.0416, "step": 32810 }, { "epoch": 1.6658206000304583, "grad_norm": 0.46171268820762634, "learning_rate": 8.894529333130278e-06, "loss": 0.0412, "step": 32815 }, { "epoch": 1.666074420021321, "grad_norm": 0.3462655544281006, "learning_rate": 8.892837199857861e-06, "loss": 0.0339, "step": 32820 }, { "epoch": 1.6663282400121835, "grad_norm": 0.34022197127342224, "learning_rate": 8.891145066585445e-06, "loss": 0.0401, "step": 32825 }, { "epoch": 1.6665820600030459, "grad_norm": 0.31248193979263306, "learning_rate": 8.889452933313028e-06, "loss": 0.0404, "step": 32830 }, { "epoch": 1.6668358799939083, "grad_norm": 0.49435219168663025, "learning_rate": 8.887760800040612e-06, "loss": 0.0471, "step": 32835 }, { "epoch": 1.6670896999847709, "grad_norm": 0.47587648034095764, "learning_rate": 8.886068666768195e-06, "loss": 0.0406, "step": 32840 }, { "epoch": 1.6673435199756332, "grad_norm": 0.2598819434642792, "learning_rate": 8.884376533495779e-06, "loss": 0.037, "step": 32845 }, { "epoch": 1.6675973399664956, "grad_norm": 0.4888969361782074, "learning_rate": 8.882684400223362e-06, "loss": 0.0397, "step": 32850 }, { "epoch": 1.6678511599573582, "grad_norm": 0.37045595049858093, "learning_rate": 8.880992266950946e-06, "loss": 0.043, "step": 32855 }, { "epoch": 1.6681049799482208, "grad_norm": 0.3329499363899231, "learning_rate": 8.87930013367853e-06, "loss": 0.0469, "step": 32860 }, { "epoch": 1.6683587999390832, "grad_norm": 0.34843963384628296, "learning_rate": 8.877608000406113e-06, "loss": 0.0449, "step": 32865 }, { "epoch": 1.6686126199299456, "grad_norm": 0.45146849751472473, "learning_rate": 8.875915867133697e-06, "loss": 0.0378, "step": 32870 }, { "epoch": 1.6688664399208082, "grad_norm": 0.4130539298057556, "learning_rate": 8.874223733861278e-06, "loss": 0.0458, "step": 32875 }, { "epoch": 1.6691202599116708, "grad_norm": 0.32472503185272217, "learning_rate": 8.872531600588864e-06, "loss": 0.0354, "step": 32880 }, { "epoch": 1.6693740799025332, "grad_norm": 0.20518916845321655, "learning_rate": 8.870839467316447e-06, "loss": 0.0389, "step": 32885 }, { "epoch": 1.6696278998933956, "grad_norm": 0.31469863653182983, "learning_rate": 8.86914733404403e-06, "loss": 0.0447, "step": 32890 }, { "epoch": 1.6698817198842582, "grad_norm": 0.3323427736759186, "learning_rate": 8.867455200771613e-06, "loss": 0.0395, "step": 32895 }, { "epoch": 1.6701355398751205, "grad_norm": 0.5702518224716187, "learning_rate": 8.865763067499196e-06, "loss": 0.0468, "step": 32900 }, { "epoch": 1.670389359865983, "grad_norm": 0.4348698854446411, "learning_rate": 8.86407093422678e-06, "loss": 0.0465, "step": 32905 }, { "epoch": 1.6706431798568455, "grad_norm": 0.23130303621292114, "learning_rate": 8.862378800954365e-06, "loss": 0.0386, "step": 32910 }, { "epoch": 1.6708969998477081, "grad_norm": 0.3477792739868164, "learning_rate": 8.860686667681947e-06, "loss": 0.0371, "step": 32915 }, { "epoch": 1.6711508198385705, "grad_norm": 0.6751791834831238, "learning_rate": 8.85899453440953e-06, "loss": 0.0362, "step": 32920 }, { "epoch": 1.6714046398294329, "grad_norm": 0.29711753129959106, "learning_rate": 8.857302401137114e-06, "loss": 0.0425, "step": 32925 }, { "epoch": 1.6716584598202955, "grad_norm": 0.4057323634624481, "learning_rate": 8.855610267864697e-06, "loss": 0.0387, "step": 32930 }, { "epoch": 1.671912279811158, "grad_norm": 0.288831502199173, "learning_rate": 8.853918134592281e-06, "loss": 0.0411, "step": 32935 }, { "epoch": 1.6721660998020202, "grad_norm": 0.43437838554382324, "learning_rate": 8.852226001319865e-06, "loss": 0.0441, "step": 32940 }, { "epoch": 1.6724199197928828, "grad_norm": 0.3017238974571228, "learning_rate": 8.850533868047448e-06, "loss": 0.0374, "step": 32945 }, { "epoch": 1.6726737397837455, "grad_norm": 0.27872663736343384, "learning_rate": 8.848841734775032e-06, "loss": 0.037, "step": 32950 }, { "epoch": 1.6729275597746078, "grad_norm": 0.4081742763519287, "learning_rate": 8.847149601502615e-06, "loss": 0.0447, "step": 32955 }, { "epoch": 1.6731813797654702, "grad_norm": 0.4713224470615387, "learning_rate": 8.845457468230199e-06, "loss": 0.0402, "step": 32960 }, { "epoch": 1.6734351997563328, "grad_norm": 0.3346846401691437, "learning_rate": 8.843765334957782e-06, "loss": 0.045, "step": 32965 }, { "epoch": 1.6736890197471954, "grad_norm": 0.2436722368001938, "learning_rate": 8.842073201685366e-06, "loss": 0.0377, "step": 32970 }, { "epoch": 1.6739428397380578, "grad_norm": 0.43119826912879944, "learning_rate": 8.84038106841295e-06, "loss": 0.0514, "step": 32975 }, { "epoch": 1.6741966597289202, "grad_norm": 0.36432069540023804, "learning_rate": 8.838688935140533e-06, "loss": 0.0484, "step": 32980 }, { "epoch": 1.6744504797197828, "grad_norm": 0.2824365496635437, "learning_rate": 8.836996801868116e-06, "loss": 0.04, "step": 32985 }, { "epoch": 1.6747042997106452, "grad_norm": 0.38415277004241943, "learning_rate": 8.835304668595698e-06, "loss": 0.0335, "step": 32990 }, { "epoch": 1.6749581197015075, "grad_norm": 0.4238277077674866, "learning_rate": 8.833612535323282e-06, "loss": 0.0407, "step": 32995 }, { "epoch": 1.6752119396923701, "grad_norm": 0.49974820017814636, "learning_rate": 8.831920402050867e-06, "loss": 0.0413, "step": 33000 }, { "epoch": 1.6754657596832327, "grad_norm": 0.33166828751564026, "learning_rate": 8.83022826877845e-06, "loss": 0.0366, "step": 33005 }, { "epoch": 1.6757195796740951, "grad_norm": 0.4129970073699951, "learning_rate": 8.828536135506032e-06, "loss": 0.038, "step": 33010 }, { "epoch": 1.6759733996649575, "grad_norm": 0.23709560930728912, "learning_rate": 8.826844002233616e-06, "loss": 0.0352, "step": 33015 }, { "epoch": 1.67622721965582, "grad_norm": 0.37866827845573425, "learning_rate": 8.8251518689612e-06, "loss": 0.0447, "step": 33020 }, { "epoch": 1.6764810396466827, "grad_norm": 0.3766491711139679, "learning_rate": 8.823459735688785e-06, "loss": 0.0384, "step": 33025 }, { "epoch": 1.676734859637545, "grad_norm": 0.4483327567577362, "learning_rate": 8.821767602416367e-06, "loss": 0.0433, "step": 33030 }, { "epoch": 1.6769886796284075, "grad_norm": 0.33308711647987366, "learning_rate": 8.82007546914395e-06, "loss": 0.0307, "step": 33035 }, { "epoch": 1.67724249961927, "grad_norm": 0.3442741632461548, "learning_rate": 8.818383335871534e-06, "loss": 0.0411, "step": 33040 }, { "epoch": 1.6774963196101325, "grad_norm": 0.3743845820426941, "learning_rate": 8.816691202599117e-06, "loss": 0.0362, "step": 33045 }, { "epoch": 1.6777501396009948, "grad_norm": 0.35795456171035767, "learning_rate": 8.8149990693267e-06, "loss": 0.0414, "step": 33050 }, { "epoch": 1.6780039595918574, "grad_norm": 0.6182012557983398, "learning_rate": 8.813306936054284e-06, "loss": 0.0446, "step": 33055 }, { "epoch": 1.67825777958272, "grad_norm": 0.43161648511886597, "learning_rate": 8.811614802781868e-06, "loss": 0.0362, "step": 33060 }, { "epoch": 1.6785115995735824, "grad_norm": 0.31941068172454834, "learning_rate": 8.809922669509451e-06, "loss": 0.0414, "step": 33065 }, { "epoch": 1.6787654195644448, "grad_norm": 0.3838498890399933, "learning_rate": 8.808230536237035e-06, "loss": 0.0341, "step": 33070 }, { "epoch": 1.6790192395553074, "grad_norm": 0.38141605257987976, "learning_rate": 8.806538402964619e-06, "loss": 0.047, "step": 33075 }, { "epoch": 1.67927305954617, "grad_norm": 0.33075106143951416, "learning_rate": 8.804846269692202e-06, "loss": 0.0402, "step": 33080 }, { "epoch": 1.6795268795370324, "grad_norm": 0.32069718837738037, "learning_rate": 8.803154136419784e-06, "loss": 0.0408, "step": 33085 }, { "epoch": 1.6797806995278948, "grad_norm": 0.3646174371242523, "learning_rate": 8.80146200314737e-06, "loss": 0.0386, "step": 33090 }, { "epoch": 1.6800345195187574, "grad_norm": 0.30178675055503845, "learning_rate": 8.799769869874953e-06, "loss": 0.0466, "step": 33095 }, { "epoch": 1.6802883395096198, "grad_norm": 0.6580922603607178, "learning_rate": 8.798077736602536e-06, "loss": 0.0459, "step": 33100 }, { "epoch": 1.6805421595004821, "grad_norm": 0.48057985305786133, "learning_rate": 8.796385603330118e-06, "loss": 0.0482, "step": 33105 }, { "epoch": 1.6807959794913447, "grad_norm": 0.28072890639305115, "learning_rate": 8.794693470057702e-06, "loss": 0.0441, "step": 33110 }, { "epoch": 1.6810497994822073, "grad_norm": 0.27334949374198914, "learning_rate": 8.793001336785287e-06, "loss": 0.0333, "step": 33115 }, { "epoch": 1.6813036194730697, "grad_norm": 0.42797207832336426, "learning_rate": 8.791309203512869e-06, "loss": 0.041, "step": 33120 }, { "epoch": 1.681557439463932, "grad_norm": 0.3023119866847992, "learning_rate": 8.789617070240452e-06, "loss": 0.0367, "step": 33125 }, { "epoch": 1.6818112594547947, "grad_norm": 0.26819950342178345, "learning_rate": 8.787924936968036e-06, "loss": 0.0446, "step": 33130 }, { "epoch": 1.682065079445657, "grad_norm": 0.3586992919445038, "learning_rate": 8.78623280369562e-06, "loss": 0.0372, "step": 33135 }, { "epoch": 1.6823188994365195, "grad_norm": 0.35732945799827576, "learning_rate": 8.784540670423203e-06, "loss": 0.04, "step": 33140 }, { "epoch": 1.682572719427382, "grad_norm": 0.2647523581981659, "learning_rate": 8.782848537150786e-06, "loss": 0.0321, "step": 33145 }, { "epoch": 1.6828265394182447, "grad_norm": 0.39509347081184387, "learning_rate": 8.78115640387837e-06, "loss": 0.0396, "step": 33150 }, { "epoch": 1.683080359409107, "grad_norm": 0.3287479281425476, "learning_rate": 8.779464270605954e-06, "loss": 0.0356, "step": 33155 }, { "epoch": 1.6833341793999694, "grad_norm": 0.5807609558105469, "learning_rate": 8.777772137333537e-06, "loss": 0.0377, "step": 33160 }, { "epoch": 1.683587999390832, "grad_norm": 0.28490084409713745, "learning_rate": 8.77608000406112e-06, "loss": 0.0422, "step": 33165 }, { "epoch": 1.6838418193816946, "grad_norm": 0.37548598647117615, "learning_rate": 8.774387870788704e-06, "loss": 0.0453, "step": 33170 }, { "epoch": 1.684095639372557, "grad_norm": 0.6309858560562134, "learning_rate": 8.772695737516288e-06, "loss": 0.0359, "step": 33175 }, { "epoch": 1.6843494593634194, "grad_norm": 0.3599869906902313, "learning_rate": 8.771003604243871e-06, "loss": 0.0382, "step": 33180 }, { "epoch": 1.684603279354282, "grad_norm": 0.26254507899284363, "learning_rate": 8.769311470971455e-06, "loss": 0.0406, "step": 33185 }, { "epoch": 1.6848570993451444, "grad_norm": 0.3382471799850464, "learning_rate": 8.767619337699038e-06, "loss": 0.0412, "step": 33190 }, { "epoch": 1.6851109193360068, "grad_norm": 0.5124127268791199, "learning_rate": 8.76592720442662e-06, "loss": 0.0448, "step": 33195 }, { "epoch": 1.6853647393268694, "grad_norm": 0.31272459030151367, "learning_rate": 8.764235071154204e-06, "loss": 0.036, "step": 33200 }, { "epoch": 1.685618559317732, "grad_norm": 0.35631221532821655, "learning_rate": 8.762542937881789e-06, "loss": 0.0332, "step": 33205 }, { "epoch": 1.6858723793085943, "grad_norm": 0.2877142131328583, "learning_rate": 8.760850804609373e-06, "loss": 0.0446, "step": 33210 }, { "epoch": 1.6861261992994567, "grad_norm": 0.44849103689193726, "learning_rate": 8.759158671336954e-06, "loss": 0.0372, "step": 33215 }, { "epoch": 1.6863800192903193, "grad_norm": 0.4658852517604828, "learning_rate": 8.757466538064538e-06, "loss": 0.0372, "step": 33220 }, { "epoch": 1.686633839281182, "grad_norm": 0.6611411571502686, "learning_rate": 8.755774404792121e-06, "loss": 0.0402, "step": 33225 }, { "epoch": 1.6868876592720443, "grad_norm": 0.3149184584617615, "learning_rate": 8.754082271519707e-06, "loss": 0.0437, "step": 33230 }, { "epoch": 1.6871414792629067, "grad_norm": 0.31619152426719666, "learning_rate": 8.752390138247289e-06, "loss": 0.0393, "step": 33235 }, { "epoch": 1.6873952992537693, "grad_norm": 0.35493898391723633, "learning_rate": 8.750698004974872e-06, "loss": 0.0385, "step": 33240 }, { "epoch": 1.6876491192446317, "grad_norm": 0.48094281554222107, "learning_rate": 8.749005871702456e-06, "loss": 0.0413, "step": 33245 }, { "epoch": 1.687902939235494, "grad_norm": 0.26521581411361694, "learning_rate": 8.74731373843004e-06, "loss": 0.0373, "step": 33250 }, { "epoch": 1.6881567592263567, "grad_norm": 0.3604218065738678, "learning_rate": 8.745621605157623e-06, "loss": 0.0418, "step": 33255 }, { "epoch": 1.6884105792172193, "grad_norm": 0.5107678174972534, "learning_rate": 8.743929471885206e-06, "loss": 0.0396, "step": 33260 }, { "epoch": 1.6886643992080816, "grad_norm": 0.2442566156387329, "learning_rate": 8.74223733861279e-06, "loss": 0.038, "step": 33265 }, { "epoch": 1.688918219198944, "grad_norm": 0.3603915572166443, "learning_rate": 8.740545205340373e-06, "loss": 0.0438, "step": 33270 }, { "epoch": 1.6891720391898066, "grad_norm": 0.3329687714576721, "learning_rate": 8.738853072067957e-06, "loss": 0.0483, "step": 33275 }, { "epoch": 1.6894258591806692, "grad_norm": 0.2893288731575012, "learning_rate": 8.73716093879554e-06, "loss": 0.0381, "step": 33280 }, { "epoch": 1.6896796791715314, "grad_norm": 0.55464106798172, "learning_rate": 8.735468805523124e-06, "loss": 0.0475, "step": 33285 }, { "epoch": 1.689933499162394, "grad_norm": 0.8349003791809082, "learning_rate": 8.733776672250706e-06, "loss": 0.0399, "step": 33290 }, { "epoch": 1.6901873191532566, "grad_norm": 0.2722102105617523, "learning_rate": 8.732084538978291e-06, "loss": 0.0358, "step": 33295 }, { "epoch": 1.690441139144119, "grad_norm": 0.27093610167503357, "learning_rate": 8.730392405705875e-06, "loss": 0.0339, "step": 33300 }, { "epoch": 1.6906949591349814, "grad_norm": 0.3764627277851105, "learning_rate": 8.728700272433458e-06, "loss": 0.0397, "step": 33305 }, { "epoch": 1.690948779125844, "grad_norm": 0.3306204378604889, "learning_rate": 8.72700813916104e-06, "loss": 0.042, "step": 33310 }, { "epoch": 1.6912025991167066, "grad_norm": 0.2610977292060852, "learning_rate": 8.725316005888624e-06, "loss": 0.037, "step": 33315 }, { "epoch": 1.691456419107569, "grad_norm": 0.3594244420528412, "learning_rate": 8.723623872616209e-06, "loss": 0.0453, "step": 33320 }, { "epoch": 1.6917102390984313, "grad_norm": 0.31486913561820984, "learning_rate": 8.721931739343792e-06, "loss": 0.044, "step": 33325 }, { "epoch": 1.691964059089294, "grad_norm": 0.3887704610824585, "learning_rate": 8.720239606071374e-06, "loss": 0.0454, "step": 33330 }, { "epoch": 1.6922178790801563, "grad_norm": 0.3932081460952759, "learning_rate": 8.718547472798958e-06, "loss": 0.0446, "step": 33335 }, { "epoch": 1.6924716990710187, "grad_norm": 0.35229092836380005, "learning_rate": 8.716855339526541e-06, "loss": 0.0395, "step": 33340 }, { "epoch": 1.6927255190618813, "grad_norm": 0.3832239508628845, "learning_rate": 8.715163206254127e-06, "loss": 0.0487, "step": 33345 }, { "epoch": 1.692979339052744, "grad_norm": 0.3000941872596741, "learning_rate": 8.713471072981708e-06, "loss": 0.0329, "step": 33350 }, { "epoch": 1.6932331590436063, "grad_norm": 0.4978597164154053, "learning_rate": 8.711778939709292e-06, "loss": 0.046, "step": 33355 }, { "epoch": 1.6934869790344687, "grad_norm": 0.35227295756340027, "learning_rate": 8.710086806436875e-06, "loss": 0.0385, "step": 33360 }, { "epoch": 1.6937407990253313, "grad_norm": 0.46936389803886414, "learning_rate": 8.708394673164459e-06, "loss": 0.0437, "step": 33365 }, { "epoch": 1.6939946190161939, "grad_norm": 0.2642430067062378, "learning_rate": 8.706702539892043e-06, "loss": 0.049, "step": 33370 }, { "epoch": 1.6942484390070562, "grad_norm": 0.2733605206012726, "learning_rate": 8.705010406619626e-06, "loss": 0.0446, "step": 33375 }, { "epoch": 1.6945022589979186, "grad_norm": 0.42943286895751953, "learning_rate": 8.70331827334721e-06, "loss": 0.0432, "step": 33380 }, { "epoch": 1.6947560789887812, "grad_norm": 0.32281479239463806, "learning_rate": 8.701626140074793e-06, "loss": 0.0397, "step": 33385 }, { "epoch": 1.6950098989796436, "grad_norm": 0.2502782940864563, "learning_rate": 8.699934006802377e-06, "loss": 0.0429, "step": 33390 }, { "epoch": 1.695263718970506, "grad_norm": 0.34502100944519043, "learning_rate": 8.69824187352996e-06, "loss": 0.0473, "step": 33395 }, { "epoch": 1.6955175389613686, "grad_norm": 0.3549754023551941, "learning_rate": 8.696549740257544e-06, "loss": 0.0456, "step": 33400 }, { "epoch": 1.6957713589522312, "grad_norm": 0.4002918303012848, "learning_rate": 8.694857606985126e-06, "loss": 0.0429, "step": 33405 }, { "epoch": 1.6960251789430936, "grad_norm": 0.351992130279541, "learning_rate": 8.693165473712711e-06, "loss": 0.0405, "step": 33410 }, { "epoch": 1.696278998933956, "grad_norm": 0.27613869309425354, "learning_rate": 8.691473340440294e-06, "loss": 0.0433, "step": 33415 }, { "epoch": 1.6965328189248186, "grad_norm": 0.42446577548980713, "learning_rate": 8.689781207167878e-06, "loss": 0.0427, "step": 33420 }, { "epoch": 1.6967866389156812, "grad_norm": 0.32727229595184326, "learning_rate": 8.68808907389546e-06, "loss": 0.0358, "step": 33425 }, { "epoch": 1.6970404589065433, "grad_norm": 0.1883794367313385, "learning_rate": 8.686396940623043e-06, "loss": 0.0464, "step": 33430 }, { "epoch": 1.697294278897406, "grad_norm": 0.27226707339286804, "learning_rate": 8.684704807350629e-06, "loss": 0.0349, "step": 33435 }, { "epoch": 1.6975480988882685, "grad_norm": 0.4325341284275055, "learning_rate": 8.68301267407821e-06, "loss": 0.0471, "step": 33440 }, { "epoch": 1.697801918879131, "grad_norm": 0.33074063062667847, "learning_rate": 8.681320540805794e-06, "loss": 0.031, "step": 33445 }, { "epoch": 1.6980557388699933, "grad_norm": 0.3044057786464691, "learning_rate": 8.679628407533378e-06, "loss": 0.043, "step": 33450 }, { "epoch": 1.6983095588608559, "grad_norm": 0.43691715598106384, "learning_rate": 8.677936274260961e-06, "loss": 0.0411, "step": 33455 }, { "epoch": 1.6985633788517185, "grad_norm": 0.3442010283470154, "learning_rate": 8.676244140988545e-06, "loss": 0.0408, "step": 33460 }, { "epoch": 1.6988171988425809, "grad_norm": 0.24146975576877594, "learning_rate": 8.674552007716128e-06, "loss": 0.038, "step": 33465 }, { "epoch": 1.6990710188334432, "grad_norm": 0.28107747435569763, "learning_rate": 8.672859874443712e-06, "loss": 0.0404, "step": 33470 }, { "epoch": 1.6993248388243058, "grad_norm": 0.3662523627281189, "learning_rate": 8.671167741171295e-06, "loss": 0.0434, "step": 33475 }, { "epoch": 1.6995786588151682, "grad_norm": 0.3790246546268463, "learning_rate": 8.669475607898879e-06, "loss": 0.0359, "step": 33480 }, { "epoch": 1.6998324788060306, "grad_norm": 0.3024158179759979, "learning_rate": 8.667783474626462e-06, "loss": 0.0436, "step": 33485 }, { "epoch": 1.7000862987968932, "grad_norm": 0.5186987519264221, "learning_rate": 8.666091341354046e-06, "loss": 0.0437, "step": 33490 }, { "epoch": 1.7003401187877558, "grad_norm": 0.25226029753685, "learning_rate": 8.66439920808163e-06, "loss": 0.0431, "step": 33495 }, { "epoch": 1.7005939387786182, "grad_norm": 0.4354790151119232, "learning_rate": 8.662707074809213e-06, "loss": 0.0469, "step": 33500 }, { "epoch": 1.7008477587694806, "grad_norm": 0.2978040874004364, "learning_rate": 8.661014941536797e-06, "loss": 0.037, "step": 33505 }, { "epoch": 1.7011015787603432, "grad_norm": 0.36080655455589294, "learning_rate": 8.65932280826438e-06, "loss": 0.0396, "step": 33510 }, { "epoch": 1.7013553987512058, "grad_norm": 0.323999285697937, "learning_rate": 8.657630674991962e-06, "loss": 0.0389, "step": 33515 }, { "epoch": 1.7016092187420682, "grad_norm": 0.3238927721977234, "learning_rate": 8.655938541719546e-06, "loss": 0.0394, "step": 33520 }, { "epoch": 1.7018630387329305, "grad_norm": 0.30916842818260193, "learning_rate": 8.65424640844713e-06, "loss": 0.0367, "step": 33525 }, { "epoch": 1.7021168587237931, "grad_norm": 0.30041056871414185, "learning_rate": 8.652554275174714e-06, "loss": 0.0382, "step": 33530 }, { "epoch": 1.7023706787146555, "grad_norm": 0.40629908442497253, "learning_rate": 8.650862141902296e-06, "loss": 0.0408, "step": 33535 }, { "epoch": 1.702624498705518, "grad_norm": 0.3429891765117645, "learning_rate": 8.64917000862988e-06, "loss": 0.0355, "step": 33540 }, { "epoch": 1.7028783186963805, "grad_norm": 0.5190361142158508, "learning_rate": 8.647477875357463e-06, "loss": 0.04, "step": 33545 }, { "epoch": 1.7031321386872431, "grad_norm": 0.36994943022727966, "learning_rate": 8.645785742085048e-06, "loss": 0.04, "step": 33550 }, { "epoch": 1.7033859586781055, "grad_norm": 0.4087146818637848, "learning_rate": 8.64409360881263e-06, "loss": 0.0482, "step": 33555 }, { "epoch": 1.7036397786689679, "grad_norm": 0.44062137603759766, "learning_rate": 8.642401475540214e-06, "loss": 0.0395, "step": 33560 }, { "epoch": 1.7038935986598305, "grad_norm": 0.4262305498123169, "learning_rate": 8.640709342267797e-06, "loss": 0.0402, "step": 33565 }, { "epoch": 1.704147418650693, "grad_norm": 0.30454927682876587, "learning_rate": 8.639017208995381e-06, "loss": 0.0401, "step": 33570 }, { "epoch": 1.7044012386415555, "grad_norm": 0.3262251317501068, "learning_rate": 8.637325075722965e-06, "loss": 0.0435, "step": 33575 }, { "epoch": 1.7046550586324178, "grad_norm": 0.2448095828294754, "learning_rate": 8.635632942450548e-06, "loss": 0.0401, "step": 33580 }, { "epoch": 1.7049088786232804, "grad_norm": 0.3910999894142151, "learning_rate": 8.633940809178132e-06, "loss": 0.0417, "step": 33585 }, { "epoch": 1.7051626986141428, "grad_norm": 0.2758971154689789, "learning_rate": 8.632248675905715e-06, "loss": 0.0458, "step": 33590 }, { "epoch": 1.7054165186050052, "grad_norm": 0.27816465497016907, "learning_rate": 8.630556542633299e-06, "loss": 0.0417, "step": 33595 }, { "epoch": 1.7056703385958678, "grad_norm": 0.30175527930259705, "learning_rate": 8.628864409360882e-06, "loss": 0.0429, "step": 33600 }, { "epoch": 1.7059241585867304, "grad_norm": 0.2667434811592102, "learning_rate": 8.627172276088466e-06, "loss": 0.0343, "step": 33605 }, { "epoch": 1.7061779785775928, "grad_norm": 0.2773091495037079, "learning_rate": 8.625480142816048e-06, "loss": 0.0444, "step": 33610 }, { "epoch": 1.7064317985684552, "grad_norm": 0.29049497842788696, "learning_rate": 8.623788009543633e-06, "loss": 0.0352, "step": 33615 }, { "epoch": 1.7066856185593178, "grad_norm": 0.4345710873603821, "learning_rate": 8.622095876271216e-06, "loss": 0.0348, "step": 33620 }, { "epoch": 1.7069394385501804, "grad_norm": 0.43229636549949646, "learning_rate": 8.6204037429988e-06, "loss": 0.0362, "step": 33625 }, { "epoch": 1.7071932585410425, "grad_norm": 0.4182978570461273, "learning_rate": 8.618711609726382e-06, "loss": 0.0466, "step": 33630 }, { "epoch": 1.7074470785319051, "grad_norm": 0.36239463090896606, "learning_rate": 8.617019476453965e-06, "loss": 0.0415, "step": 33635 }, { "epoch": 1.7077008985227677, "grad_norm": 0.4394817650318146, "learning_rate": 8.61532734318155e-06, "loss": 0.0393, "step": 33640 }, { "epoch": 1.7079547185136301, "grad_norm": 0.323661208152771, "learning_rate": 8.613635209909134e-06, "loss": 0.0381, "step": 33645 }, { "epoch": 1.7082085385044925, "grad_norm": 0.6629905700683594, "learning_rate": 8.611943076636716e-06, "loss": 0.0374, "step": 33650 }, { "epoch": 1.708462358495355, "grad_norm": 0.31542661786079407, "learning_rate": 8.6102509433643e-06, "loss": 0.0445, "step": 33655 }, { "epoch": 1.7087161784862177, "grad_norm": 0.24203170835971832, "learning_rate": 8.608558810091883e-06, "loss": 0.0349, "step": 33660 }, { "epoch": 1.70896999847708, "grad_norm": 0.38370826840400696, "learning_rate": 8.606866676819468e-06, "loss": 0.0479, "step": 33665 }, { "epoch": 1.7092238184679425, "grad_norm": 0.5466206073760986, "learning_rate": 8.60517454354705e-06, "loss": 0.0398, "step": 33670 }, { "epoch": 1.709477638458805, "grad_norm": 0.3482562303543091, "learning_rate": 8.603482410274634e-06, "loss": 0.0326, "step": 33675 }, { "epoch": 1.7097314584496675, "grad_norm": 0.33356785774230957, "learning_rate": 8.601790277002217e-06, "loss": 0.0493, "step": 33680 }, { "epoch": 1.7099852784405298, "grad_norm": 0.24861811101436615, "learning_rate": 8.6000981437298e-06, "loss": 0.0498, "step": 33685 }, { "epoch": 1.7102390984313924, "grad_norm": 0.2985035479068756, "learning_rate": 8.598406010457384e-06, "loss": 0.0345, "step": 33690 }, { "epoch": 1.710492918422255, "grad_norm": 0.34388431906700134, "learning_rate": 8.596713877184968e-06, "loss": 0.0436, "step": 33695 }, { "epoch": 1.7107467384131174, "grad_norm": 0.28177011013031006, "learning_rate": 8.595021743912551e-06, "loss": 0.0412, "step": 33700 }, { "epoch": 1.7110005584039798, "grad_norm": 0.29938557744026184, "learning_rate": 8.593329610640135e-06, "loss": 0.0419, "step": 33705 }, { "epoch": 1.7112543783948424, "grad_norm": 0.3993265628814697, "learning_rate": 8.591637477367719e-06, "loss": 0.0424, "step": 33710 }, { "epoch": 1.711508198385705, "grad_norm": 0.2539103925228119, "learning_rate": 8.589945344095302e-06, "loss": 0.0402, "step": 33715 }, { "epoch": 1.7117620183765674, "grad_norm": 0.3944316506385803, "learning_rate": 8.588253210822886e-06, "loss": 0.049, "step": 33720 }, { "epoch": 1.7120158383674298, "grad_norm": 0.3147892355918884, "learning_rate": 8.586561077550467e-06, "loss": 0.0457, "step": 33725 }, { "epoch": 1.7122696583582924, "grad_norm": 0.3464619517326355, "learning_rate": 8.584868944278053e-06, "loss": 0.041, "step": 33730 }, { "epoch": 1.7125234783491547, "grad_norm": 0.33929938077926636, "learning_rate": 8.583176811005636e-06, "loss": 0.0436, "step": 33735 }, { "epoch": 1.7127772983400171, "grad_norm": 0.28738826513290405, "learning_rate": 8.58148467773322e-06, "loss": 0.0445, "step": 33740 }, { "epoch": 1.7130311183308797, "grad_norm": 0.40138235688209534, "learning_rate": 8.579792544460802e-06, "loss": 0.0483, "step": 33745 }, { "epoch": 1.7132849383217423, "grad_norm": 0.3729156255722046, "learning_rate": 8.578100411188385e-06, "loss": 0.0422, "step": 33750 }, { "epoch": 1.7135387583126047, "grad_norm": 0.32851508259773254, "learning_rate": 8.57640827791597e-06, "loss": 0.0401, "step": 33755 }, { "epoch": 1.713792578303467, "grad_norm": 0.3027333617210388, "learning_rate": 8.574716144643552e-06, "loss": 0.0483, "step": 33760 }, { "epoch": 1.7140463982943297, "grad_norm": 0.305976539850235, "learning_rate": 8.573024011371136e-06, "loss": 0.0382, "step": 33765 }, { "epoch": 1.7143002182851923, "grad_norm": 0.3750983774662018, "learning_rate": 8.57133187809872e-06, "loss": 0.0482, "step": 33770 }, { "epoch": 1.7145540382760545, "grad_norm": 0.4367023706436157, "learning_rate": 8.569639744826303e-06, "loss": 0.0475, "step": 33775 }, { "epoch": 1.714807858266917, "grad_norm": 0.5546810626983643, "learning_rate": 8.567947611553886e-06, "loss": 0.051, "step": 33780 }, { "epoch": 1.7150616782577797, "grad_norm": 0.3331943452358246, "learning_rate": 8.56625547828147e-06, "loss": 0.0453, "step": 33785 }, { "epoch": 1.715315498248642, "grad_norm": 0.5244970321655273, "learning_rate": 8.564563345009054e-06, "loss": 0.0505, "step": 33790 }, { "epoch": 1.7155693182395044, "grad_norm": 0.35232967138290405, "learning_rate": 8.562871211736637e-06, "loss": 0.0435, "step": 33795 }, { "epoch": 1.715823138230367, "grad_norm": 0.24759510159492493, "learning_rate": 8.56117907846422e-06, "loss": 0.0415, "step": 33800 }, { "epoch": 1.7160769582212296, "grad_norm": 0.32473260164260864, "learning_rate": 8.559486945191804e-06, "loss": 0.0373, "step": 33805 }, { "epoch": 1.716330778212092, "grad_norm": 0.5221812129020691, "learning_rate": 8.557794811919388e-06, "loss": 0.0352, "step": 33810 }, { "epoch": 1.7165845982029544, "grad_norm": 0.32362228631973267, "learning_rate": 8.556102678646971e-06, "loss": 0.0428, "step": 33815 }, { "epoch": 1.716838418193817, "grad_norm": 0.3280642628669739, "learning_rate": 8.554410545374555e-06, "loss": 0.0423, "step": 33820 }, { "epoch": 1.7170922381846794, "grad_norm": 0.38787609338760376, "learning_rate": 8.552718412102138e-06, "loss": 0.0425, "step": 33825 }, { "epoch": 1.7173460581755418, "grad_norm": 0.39664000272750854, "learning_rate": 8.551026278829722e-06, "loss": 0.0419, "step": 33830 }, { "epoch": 1.7175998781664044, "grad_norm": 0.3131723999977112, "learning_rate": 8.549334145557305e-06, "loss": 0.0395, "step": 33835 }, { "epoch": 1.717853698157267, "grad_norm": 0.374670147895813, "learning_rate": 8.547642012284887e-06, "loss": 0.0365, "step": 33840 }, { "epoch": 1.7181075181481293, "grad_norm": 0.37094488739967346, "learning_rate": 8.545949879012473e-06, "loss": 0.0479, "step": 33845 }, { "epoch": 1.7183613381389917, "grad_norm": 0.30541858077049255, "learning_rate": 8.544257745740056e-06, "loss": 0.0438, "step": 33850 }, { "epoch": 1.7186151581298543, "grad_norm": 0.3539015054702759, "learning_rate": 8.542565612467638e-06, "loss": 0.0447, "step": 33855 }, { "epoch": 1.718868978120717, "grad_norm": 0.23813435435295105, "learning_rate": 8.540873479195221e-06, "loss": 0.0373, "step": 33860 }, { "epoch": 1.7191227981115793, "grad_norm": 0.2674414813518524, "learning_rate": 8.539181345922805e-06, "loss": 0.0461, "step": 33865 }, { "epoch": 1.7193766181024417, "grad_norm": 0.37278127670288086, "learning_rate": 8.53748921265039e-06, "loss": 0.0405, "step": 33870 }, { "epoch": 1.7196304380933043, "grad_norm": 0.268686443567276, "learning_rate": 8.535797079377972e-06, "loss": 0.0375, "step": 33875 }, { "epoch": 1.7198842580841667, "grad_norm": 0.3514656722545624, "learning_rate": 8.534104946105556e-06, "loss": 0.0381, "step": 33880 }, { "epoch": 1.720138078075029, "grad_norm": 0.2861263155937195, "learning_rate": 8.53241281283314e-06, "loss": 0.0442, "step": 33885 }, { "epoch": 1.7203918980658917, "grad_norm": 0.375036358833313, "learning_rate": 8.530720679560723e-06, "loss": 0.0324, "step": 33890 }, { "epoch": 1.7206457180567543, "grad_norm": 0.2760937511920929, "learning_rate": 8.529028546288306e-06, "loss": 0.0402, "step": 33895 }, { "epoch": 1.7208995380476166, "grad_norm": 0.3707115948200226, "learning_rate": 8.52733641301589e-06, "loss": 0.0406, "step": 33900 }, { "epoch": 1.721153358038479, "grad_norm": 0.31657668948173523, "learning_rate": 8.525644279743473e-06, "loss": 0.0423, "step": 33905 }, { "epoch": 1.7214071780293416, "grad_norm": 0.45224529504776, "learning_rate": 8.523952146471057e-06, "loss": 0.0406, "step": 33910 }, { "epoch": 1.7216609980202042, "grad_norm": 0.3776727616786957, "learning_rate": 8.52226001319864e-06, "loss": 0.0416, "step": 33915 }, { "epoch": 1.7219148180110666, "grad_norm": 0.32330262660980225, "learning_rate": 8.520567879926224e-06, "loss": 0.0366, "step": 33920 }, { "epoch": 1.722168638001929, "grad_norm": 0.3480754494667053, "learning_rate": 8.518875746653808e-06, "loss": 0.0437, "step": 33925 }, { "epoch": 1.7224224579927916, "grad_norm": 0.2876611053943634, "learning_rate": 8.51718361338139e-06, "loss": 0.0355, "step": 33930 }, { "epoch": 1.722676277983654, "grad_norm": 0.24682271480560303, "learning_rate": 8.515491480108975e-06, "loss": 0.0329, "step": 33935 }, { "epoch": 1.7229300979745164, "grad_norm": 0.22861187160015106, "learning_rate": 8.513799346836558e-06, "loss": 0.044, "step": 33940 }, { "epoch": 1.723183917965379, "grad_norm": 0.4073830246925354, "learning_rate": 8.512107213564142e-06, "loss": 0.0445, "step": 33945 }, { "epoch": 1.7234377379562416, "grad_norm": 0.2965492308139801, "learning_rate": 8.510415080291724e-06, "loss": 0.0381, "step": 33950 }, { "epoch": 1.723691557947104, "grad_norm": 0.2638970911502838, "learning_rate": 8.508722947019307e-06, "loss": 0.0382, "step": 33955 }, { "epoch": 1.7239453779379663, "grad_norm": 0.35170966386795044, "learning_rate": 8.507030813746892e-06, "loss": 0.0375, "step": 33960 }, { "epoch": 1.724199197928829, "grad_norm": 0.3397308588027954, "learning_rate": 8.505338680474476e-06, "loss": 0.034, "step": 33965 }, { "epoch": 1.7244530179196913, "grad_norm": 0.47272270917892456, "learning_rate": 8.503646547202058e-06, "loss": 0.0417, "step": 33970 }, { "epoch": 1.7247068379105537, "grad_norm": 0.3830793499946594, "learning_rate": 8.501954413929641e-06, "loss": 0.0333, "step": 33975 }, { "epoch": 1.7249606579014163, "grad_norm": 0.5919451713562012, "learning_rate": 8.500262280657225e-06, "loss": 0.0412, "step": 33980 }, { "epoch": 1.7252144778922789, "grad_norm": 0.3923093378543854, "learning_rate": 8.498570147384808e-06, "loss": 0.0414, "step": 33985 }, { "epoch": 1.7254682978831413, "grad_norm": 0.2887297570705414, "learning_rate": 8.496878014112392e-06, "loss": 0.0359, "step": 33990 }, { "epoch": 1.7257221178740036, "grad_norm": 0.26516517996788025, "learning_rate": 8.495185880839975e-06, "loss": 0.0345, "step": 33995 }, { "epoch": 1.7259759378648662, "grad_norm": 0.359676331281662, "learning_rate": 8.493493747567559e-06, "loss": 0.0428, "step": 34000 }, { "epoch": 1.7262297578557289, "grad_norm": 0.5799069404602051, "learning_rate": 8.491801614295143e-06, "loss": 0.037, "step": 34005 }, { "epoch": 1.7264835778465912, "grad_norm": 0.23710906505584717, "learning_rate": 8.490109481022726e-06, "loss": 0.0419, "step": 34010 }, { "epoch": 1.7267373978374536, "grad_norm": 0.2692362368106842, "learning_rate": 8.48841734775031e-06, "loss": 0.0377, "step": 34015 }, { "epoch": 1.7269912178283162, "grad_norm": 0.6800029873847961, "learning_rate": 8.486725214477893e-06, "loss": 0.0404, "step": 34020 }, { "epoch": 1.7272450378191786, "grad_norm": 0.37395069003105164, "learning_rate": 8.485033081205477e-06, "loss": 0.0511, "step": 34025 }, { "epoch": 1.727498857810041, "grad_norm": 0.5215486884117126, "learning_rate": 8.48334094793306e-06, "loss": 0.0412, "step": 34030 }, { "epoch": 1.7277526778009036, "grad_norm": 0.30943822860717773, "learning_rate": 8.481648814660644e-06, "loss": 0.0424, "step": 34035 }, { "epoch": 1.7280064977917662, "grad_norm": 0.32664647698402405, "learning_rate": 8.479956681388227e-06, "loss": 0.043, "step": 34040 }, { "epoch": 1.7282603177826286, "grad_norm": 2.2540807723999023, "learning_rate": 8.47826454811581e-06, "loss": 0.0425, "step": 34045 }, { "epoch": 1.728514137773491, "grad_norm": 0.43366777896881104, "learning_rate": 8.476572414843394e-06, "loss": 0.0347, "step": 34050 }, { "epoch": 1.7287679577643535, "grad_norm": 0.5059844255447388, "learning_rate": 8.474880281570978e-06, "loss": 0.0385, "step": 34055 }, { "epoch": 1.7290217777552161, "grad_norm": 0.2656039297580719, "learning_rate": 8.473188148298562e-06, "loss": 0.0363, "step": 34060 }, { "epoch": 1.7292755977460785, "grad_norm": 0.3212661147117615, "learning_rate": 8.471496015026143e-06, "loss": 0.049, "step": 34065 }, { "epoch": 1.729529417736941, "grad_norm": 0.6338233947753906, "learning_rate": 8.469803881753727e-06, "loss": 0.0459, "step": 34070 }, { "epoch": 1.7297832377278035, "grad_norm": 0.3562254309654236, "learning_rate": 8.46811174848131e-06, "loss": 0.0384, "step": 34075 }, { "epoch": 1.730037057718666, "grad_norm": 0.25323736667633057, "learning_rate": 8.466419615208894e-06, "loss": 0.0377, "step": 34080 }, { "epoch": 1.7302908777095283, "grad_norm": 0.35645341873168945, "learning_rate": 8.464727481936478e-06, "loss": 0.0425, "step": 34085 }, { "epoch": 1.7305446977003909, "grad_norm": 0.3992117941379547, "learning_rate": 8.463035348664061e-06, "loss": 0.0406, "step": 34090 }, { "epoch": 1.7307985176912535, "grad_norm": 0.40293341875076294, "learning_rate": 8.461343215391645e-06, "loss": 0.0504, "step": 34095 }, { "epoch": 1.7310523376821159, "grad_norm": 0.27027973532676697, "learning_rate": 8.459651082119228e-06, "loss": 0.0476, "step": 34100 }, { "epoch": 1.7313061576729782, "grad_norm": 0.44675561785697937, "learning_rate": 8.457958948846812e-06, "loss": 0.0453, "step": 34105 }, { "epoch": 1.7315599776638408, "grad_norm": 0.2727741301059723, "learning_rate": 8.456266815574395e-06, "loss": 0.036, "step": 34110 }, { "epoch": 1.7318137976547034, "grad_norm": 0.3078666627407074, "learning_rate": 8.454574682301979e-06, "loss": 0.035, "step": 34115 }, { "epoch": 1.7320676176455656, "grad_norm": 0.2535753846168518, "learning_rate": 8.452882549029562e-06, "loss": 0.039, "step": 34120 }, { "epoch": 1.7323214376364282, "grad_norm": 0.3663230538368225, "learning_rate": 8.451190415757146e-06, "loss": 0.0383, "step": 34125 }, { "epoch": 1.7325752576272908, "grad_norm": 0.34004485607147217, "learning_rate": 8.44949828248473e-06, "loss": 0.033, "step": 34130 }, { "epoch": 1.7328290776181532, "grad_norm": 0.25209248065948486, "learning_rate": 8.447806149212313e-06, "loss": 0.0359, "step": 34135 }, { "epoch": 1.7330828976090156, "grad_norm": 0.351085364818573, "learning_rate": 8.446114015939897e-06, "loss": 0.041, "step": 34140 }, { "epoch": 1.7333367175998782, "grad_norm": 0.28116995096206665, "learning_rate": 8.44442188266748e-06, "loss": 0.0351, "step": 34145 }, { "epoch": 1.7335905375907408, "grad_norm": 0.4009757936000824, "learning_rate": 8.442729749395064e-06, "loss": 0.0397, "step": 34150 }, { "epoch": 1.7338443575816032, "grad_norm": 0.5399395227432251, "learning_rate": 8.441037616122647e-06, "loss": 0.0443, "step": 34155 }, { "epoch": 1.7340981775724655, "grad_norm": 0.28884685039520264, "learning_rate": 8.439345482850229e-06, "loss": 0.0359, "step": 34160 }, { "epoch": 1.7343519975633281, "grad_norm": 0.3574964702129364, "learning_rate": 8.437653349577813e-06, "loss": 0.0439, "step": 34165 }, { "epoch": 1.7346058175541905, "grad_norm": 0.28065600991249084, "learning_rate": 8.435961216305398e-06, "loss": 0.0398, "step": 34170 }, { "epoch": 1.734859637545053, "grad_norm": 0.3115881085395813, "learning_rate": 8.43426908303298e-06, "loss": 0.0366, "step": 34175 }, { "epoch": 1.7351134575359155, "grad_norm": 0.3346794545650482, "learning_rate": 8.432576949760563e-06, "loss": 0.0398, "step": 34180 }, { "epoch": 1.735367277526778, "grad_norm": 0.3550897240638733, "learning_rate": 8.430884816488147e-06, "loss": 0.0385, "step": 34185 }, { "epoch": 1.7356210975176405, "grad_norm": 0.2994544208049774, "learning_rate": 8.42919268321573e-06, "loss": 0.033, "step": 34190 }, { "epoch": 1.7358749175085029, "grad_norm": 0.2627989947795868, "learning_rate": 8.427500549943314e-06, "loss": 0.0326, "step": 34195 }, { "epoch": 1.7361287374993655, "grad_norm": 0.3173164427280426, "learning_rate": 8.425808416670897e-06, "loss": 0.0393, "step": 34200 }, { "epoch": 1.736382557490228, "grad_norm": 0.40492770075798035, "learning_rate": 8.424116283398481e-06, "loss": 0.0412, "step": 34205 }, { "epoch": 1.7366363774810905, "grad_norm": 0.5181750059127808, "learning_rate": 8.422424150126065e-06, "loss": 0.0397, "step": 34210 }, { "epoch": 1.7368901974719528, "grad_norm": 0.3461681008338928, "learning_rate": 8.420732016853648e-06, "loss": 0.0413, "step": 34215 }, { "epoch": 1.7371440174628154, "grad_norm": 0.4116683304309845, "learning_rate": 8.419039883581232e-06, "loss": 0.0383, "step": 34220 }, { "epoch": 1.7373978374536778, "grad_norm": 0.3937915861606598, "learning_rate": 8.417347750308815e-06, "loss": 0.0503, "step": 34225 }, { "epoch": 1.7376516574445402, "grad_norm": 0.2615407109260559, "learning_rate": 8.415655617036399e-06, "loss": 0.0401, "step": 34230 }, { "epoch": 1.7379054774354028, "grad_norm": 0.3176824152469635, "learning_rate": 8.413963483763982e-06, "loss": 0.0414, "step": 34235 }, { "epoch": 1.7381592974262654, "grad_norm": 0.3036479353904724, "learning_rate": 8.412271350491566e-06, "loss": 0.0442, "step": 34240 }, { "epoch": 1.7384131174171278, "grad_norm": 0.3770005404949188, "learning_rate": 8.41057921721915e-06, "loss": 0.0486, "step": 34245 }, { "epoch": 1.7386669374079902, "grad_norm": 0.30073773860931396, "learning_rate": 8.408887083946731e-06, "loss": 0.0381, "step": 34250 }, { "epoch": 1.7389207573988528, "grad_norm": 0.38862597942352295, "learning_rate": 8.407194950674315e-06, "loss": 0.0328, "step": 34255 }, { "epoch": 1.7391745773897154, "grad_norm": 0.3898008167743683, "learning_rate": 8.4055028174019e-06, "loss": 0.0441, "step": 34260 }, { "epoch": 1.7394283973805775, "grad_norm": 0.3264569342136383, "learning_rate": 8.403810684129484e-06, "loss": 0.041, "step": 34265 }, { "epoch": 1.7396822173714401, "grad_norm": 0.3390885293483734, "learning_rate": 8.402118550857065e-06, "loss": 0.0515, "step": 34270 }, { "epoch": 1.7399360373623027, "grad_norm": 0.371646910905838, "learning_rate": 8.400426417584649e-06, "loss": 0.0428, "step": 34275 }, { "epoch": 1.7401898573531651, "grad_norm": 0.46684572100639343, "learning_rate": 8.398734284312232e-06, "loss": 0.042, "step": 34280 }, { "epoch": 1.7404436773440275, "grad_norm": 0.2889266610145569, "learning_rate": 8.397042151039818e-06, "loss": 0.0357, "step": 34285 }, { "epoch": 1.74069749733489, "grad_norm": 0.27424633502960205, "learning_rate": 8.3953500177674e-06, "loss": 0.0359, "step": 34290 }, { "epoch": 1.7409513173257527, "grad_norm": 0.2002898007631302, "learning_rate": 8.393657884494983e-06, "loss": 0.036, "step": 34295 }, { "epoch": 1.741205137316615, "grad_norm": 0.3924329876899719, "learning_rate": 8.391965751222567e-06, "loss": 0.0358, "step": 34300 }, { "epoch": 1.7414589573074775, "grad_norm": 0.25661349296569824, "learning_rate": 8.39027361795015e-06, "loss": 0.0328, "step": 34305 }, { "epoch": 1.74171277729834, "grad_norm": 0.19355782866477966, "learning_rate": 8.388581484677734e-06, "loss": 0.0312, "step": 34310 }, { "epoch": 1.7419665972892024, "grad_norm": 0.32729625701904297, "learning_rate": 8.386889351405317e-06, "loss": 0.0468, "step": 34315 }, { "epoch": 1.7422204172800648, "grad_norm": 0.36025574803352356, "learning_rate": 8.3851972181329e-06, "loss": 0.0428, "step": 34320 }, { "epoch": 1.7424742372709274, "grad_norm": 0.39251708984375, "learning_rate": 8.383505084860484e-06, "loss": 0.0366, "step": 34325 }, { "epoch": 1.74272805726179, "grad_norm": 0.5001270771026611, "learning_rate": 8.381812951588068e-06, "loss": 0.0404, "step": 34330 }, { "epoch": 1.7429818772526524, "grad_norm": 0.36959490180015564, "learning_rate": 8.380120818315651e-06, "loss": 0.037, "step": 34335 }, { "epoch": 1.7432356972435148, "grad_norm": 0.3076193928718567, "learning_rate": 8.378428685043235e-06, "loss": 0.0389, "step": 34340 }, { "epoch": 1.7434895172343774, "grad_norm": 0.37228232622146606, "learning_rate": 8.376736551770819e-06, "loss": 0.0495, "step": 34345 }, { "epoch": 1.74374333722524, "grad_norm": 0.3191566467285156, "learning_rate": 8.375044418498402e-06, "loss": 0.0344, "step": 34350 }, { "epoch": 1.7439971572161024, "grad_norm": 0.3981260061264038, "learning_rate": 8.373352285225986e-06, "loss": 0.0434, "step": 34355 }, { "epoch": 1.7442509772069648, "grad_norm": 0.310577392578125, "learning_rate": 8.37166015195357e-06, "loss": 0.0335, "step": 34360 }, { "epoch": 1.7445047971978274, "grad_norm": 0.4347289204597473, "learning_rate": 8.369968018681151e-06, "loss": 0.0336, "step": 34365 }, { "epoch": 1.7447586171886897, "grad_norm": 0.3463854491710663, "learning_rate": 8.368275885408735e-06, "loss": 0.0437, "step": 34370 }, { "epoch": 1.7450124371795521, "grad_norm": 0.32871583104133606, "learning_rate": 8.36658375213632e-06, "loss": 0.04, "step": 34375 }, { "epoch": 1.7452662571704147, "grad_norm": 0.5799885988235474, "learning_rate": 8.364891618863903e-06, "loss": 0.0471, "step": 34380 }, { "epoch": 1.7455200771612773, "grad_norm": 0.49882349371910095, "learning_rate": 8.363199485591485e-06, "loss": 0.0408, "step": 34385 }, { "epoch": 1.7457738971521397, "grad_norm": 0.29698601365089417, "learning_rate": 8.361507352319069e-06, "loss": 0.0415, "step": 34390 }, { "epoch": 1.746027717143002, "grad_norm": 0.3538743555545807, "learning_rate": 8.359815219046652e-06, "loss": 0.0442, "step": 34395 }, { "epoch": 1.7462815371338647, "grad_norm": 0.20709893107414246, "learning_rate": 8.358123085774238e-06, "loss": 0.0355, "step": 34400 }, { "epoch": 1.7465353571247273, "grad_norm": 0.27079060673713684, "learning_rate": 8.35643095250182e-06, "loss": 0.0468, "step": 34405 }, { "epoch": 1.7467891771155897, "grad_norm": 0.37064772844314575, "learning_rate": 8.354738819229403e-06, "loss": 0.0377, "step": 34410 }, { "epoch": 1.747042997106452, "grad_norm": 0.38094353675842285, "learning_rate": 8.353046685956986e-06, "loss": 0.0427, "step": 34415 }, { "epoch": 1.7472968170973147, "grad_norm": 0.3148980140686035, "learning_rate": 8.35135455268457e-06, "loss": 0.0339, "step": 34420 }, { "epoch": 1.747550637088177, "grad_norm": 0.49832138419151306, "learning_rate": 8.349662419412154e-06, "loss": 0.0409, "step": 34425 }, { "epoch": 1.7478044570790394, "grad_norm": 0.3183405101299286, "learning_rate": 8.347970286139737e-06, "loss": 0.0418, "step": 34430 }, { "epoch": 1.748058277069902, "grad_norm": 0.3183467984199524, "learning_rate": 8.34627815286732e-06, "loss": 0.0352, "step": 34435 }, { "epoch": 1.7483120970607646, "grad_norm": 0.38760673999786377, "learning_rate": 8.344586019594904e-06, "loss": 0.0423, "step": 34440 }, { "epoch": 1.748565917051627, "grad_norm": 0.25345578789711, "learning_rate": 8.342893886322488e-06, "loss": 0.0322, "step": 34445 }, { "epoch": 1.7488197370424894, "grad_norm": 0.3874957859516144, "learning_rate": 8.341201753050071e-06, "loss": 0.0399, "step": 34450 }, { "epoch": 1.749073557033352, "grad_norm": 0.31035029888153076, "learning_rate": 8.339509619777655e-06, "loss": 0.0429, "step": 34455 }, { "epoch": 1.7493273770242146, "grad_norm": 0.5256080627441406, "learning_rate": 8.337817486505237e-06, "loss": 0.0369, "step": 34460 }, { "epoch": 1.7495811970150768, "grad_norm": 0.6625921130180359, "learning_rate": 8.336125353232822e-06, "loss": 0.0367, "step": 34465 }, { "epoch": 1.7498350170059394, "grad_norm": 0.2330664098262787, "learning_rate": 8.334433219960405e-06, "loss": 0.0389, "step": 34470 }, { "epoch": 1.750088836996802, "grad_norm": 0.6050800085067749, "learning_rate": 8.332741086687989e-06, "loss": 0.0381, "step": 34475 }, { "epoch": 1.7503426569876643, "grad_norm": 0.3633989691734314, "learning_rate": 8.33104895341557e-06, "loss": 0.0333, "step": 34480 }, { "epoch": 1.7505964769785267, "grad_norm": 0.25826677680015564, "learning_rate": 8.329356820143154e-06, "loss": 0.0405, "step": 34485 }, { "epoch": 1.7508502969693893, "grad_norm": 0.2678346037864685, "learning_rate": 8.32766468687074e-06, "loss": 0.041, "step": 34490 }, { "epoch": 1.751104116960252, "grad_norm": 0.8798556923866272, "learning_rate": 8.325972553598321e-06, "loss": 0.0472, "step": 34495 }, { "epoch": 1.7513579369511143, "grad_norm": 0.4030362367630005, "learning_rate": 8.324280420325905e-06, "loss": 0.0423, "step": 34500 }, { "epoch": 1.7516117569419767, "grad_norm": 0.5761476159095764, "learning_rate": 8.322588287053489e-06, "loss": 0.0368, "step": 34505 }, { "epoch": 1.7518655769328393, "grad_norm": 0.2705753743648529, "learning_rate": 8.320896153781072e-06, "loss": 0.038, "step": 34510 }, { "epoch": 1.7521193969237017, "grad_norm": 0.3345622420310974, "learning_rate": 8.319204020508656e-06, "loss": 0.0366, "step": 34515 }, { "epoch": 1.752373216914564, "grad_norm": 0.2705942392349243, "learning_rate": 8.31751188723624e-06, "loss": 0.0358, "step": 34520 }, { "epoch": 1.7526270369054266, "grad_norm": 0.28924059867858887, "learning_rate": 8.315819753963823e-06, "loss": 0.0347, "step": 34525 }, { "epoch": 1.7528808568962893, "grad_norm": 0.2418852597475052, "learning_rate": 8.314127620691406e-06, "loss": 0.0324, "step": 34530 }, { "epoch": 1.7531346768871516, "grad_norm": 0.399440735578537, "learning_rate": 8.31243548741899e-06, "loss": 0.0415, "step": 34535 }, { "epoch": 1.753388496878014, "grad_norm": 0.37920889258384705, "learning_rate": 8.310743354146573e-06, "loss": 0.0381, "step": 34540 }, { "epoch": 1.7536423168688766, "grad_norm": 0.37504783272743225, "learning_rate": 8.309051220874157e-06, "loss": 0.0441, "step": 34545 }, { "epoch": 1.7538961368597392, "grad_norm": 0.3581880033016205, "learning_rate": 8.30735908760174e-06, "loss": 0.0376, "step": 34550 }, { "epoch": 1.7541499568506016, "grad_norm": 0.4461826980113983, "learning_rate": 8.305666954329324e-06, "loss": 0.0412, "step": 34555 }, { "epoch": 1.754403776841464, "grad_norm": 0.29656851291656494, "learning_rate": 8.303974821056908e-06, "loss": 0.0315, "step": 34560 }, { "epoch": 1.7546575968323266, "grad_norm": 0.3078388273715973, "learning_rate": 8.302282687784491e-06, "loss": 0.0354, "step": 34565 }, { "epoch": 1.754911416823189, "grad_norm": 0.4501648247241974, "learning_rate": 8.300590554512073e-06, "loss": 0.0379, "step": 34570 }, { "epoch": 1.7551652368140513, "grad_norm": 0.6488507986068726, "learning_rate": 8.298898421239656e-06, "loss": 0.0429, "step": 34575 }, { "epoch": 1.755419056804914, "grad_norm": 0.2891724407672882, "learning_rate": 8.297206287967242e-06, "loss": 0.0447, "step": 34580 }, { "epoch": 1.7556728767957765, "grad_norm": 0.4944934546947479, "learning_rate": 8.295514154694825e-06, "loss": 0.032, "step": 34585 }, { "epoch": 1.755926696786639, "grad_norm": 0.3709994852542877, "learning_rate": 8.293822021422407e-06, "loss": 0.0426, "step": 34590 }, { "epoch": 1.7561805167775013, "grad_norm": 0.552474856376648, "learning_rate": 8.29212988814999e-06, "loss": 0.0358, "step": 34595 }, { "epoch": 1.756434336768364, "grad_norm": 0.3508904278278351, "learning_rate": 8.290437754877574e-06, "loss": 0.0324, "step": 34600 }, { "epoch": 1.7566881567592265, "grad_norm": 0.3192765712738037, "learning_rate": 8.28874562160516e-06, "loss": 0.038, "step": 34605 }, { "epoch": 1.7569419767500887, "grad_norm": 0.3167833983898163, "learning_rate": 8.287053488332741e-06, "loss": 0.0431, "step": 34610 }, { "epoch": 1.7571957967409513, "grad_norm": 0.33134904503822327, "learning_rate": 8.285361355060325e-06, "loss": 0.0391, "step": 34615 }, { "epoch": 1.7574496167318139, "grad_norm": 0.5783857107162476, "learning_rate": 8.283669221787908e-06, "loss": 0.0446, "step": 34620 }, { "epoch": 1.7577034367226763, "grad_norm": 0.24570982158184052, "learning_rate": 8.281977088515492e-06, "loss": 0.0376, "step": 34625 }, { "epoch": 1.7579572567135386, "grad_norm": 0.29740071296691895, "learning_rate": 8.280284955243075e-06, "loss": 0.0436, "step": 34630 }, { "epoch": 1.7582110767044012, "grad_norm": 0.4562722444534302, "learning_rate": 8.278592821970659e-06, "loss": 0.0445, "step": 34635 }, { "epoch": 1.7584648966952638, "grad_norm": 0.5596370100975037, "learning_rate": 8.276900688698243e-06, "loss": 0.0402, "step": 34640 }, { "epoch": 1.7587187166861262, "grad_norm": 0.38506823778152466, "learning_rate": 8.275208555425826e-06, "loss": 0.0374, "step": 34645 }, { "epoch": 1.7589725366769886, "grad_norm": 0.2869029641151428, "learning_rate": 8.27351642215341e-06, "loss": 0.0391, "step": 34650 }, { "epoch": 1.7592263566678512, "grad_norm": 0.5008401274681091, "learning_rate": 8.271824288880993e-06, "loss": 0.0458, "step": 34655 }, { "epoch": 1.7594801766587136, "grad_norm": 0.3160930871963501, "learning_rate": 8.270132155608577e-06, "loss": 0.0399, "step": 34660 }, { "epoch": 1.759733996649576, "grad_norm": 0.32366958260536194, "learning_rate": 8.268440022336159e-06, "loss": 0.0427, "step": 34665 }, { "epoch": 1.7599878166404386, "grad_norm": 0.25260210037231445, "learning_rate": 8.266747889063744e-06, "loss": 0.0413, "step": 34670 }, { "epoch": 1.7602416366313012, "grad_norm": 0.33976075053215027, "learning_rate": 8.265055755791327e-06, "loss": 0.0402, "step": 34675 }, { "epoch": 1.7604954566221636, "grad_norm": 0.47365057468414307, "learning_rate": 8.263363622518911e-06, "loss": 0.0412, "step": 34680 }, { "epoch": 1.760749276613026, "grad_norm": 0.28111734986305237, "learning_rate": 8.261671489246493e-06, "loss": 0.0363, "step": 34685 }, { "epoch": 1.7610030966038885, "grad_norm": 0.220921590924263, "learning_rate": 8.259979355974076e-06, "loss": 0.0368, "step": 34690 }, { "epoch": 1.7612569165947511, "grad_norm": 0.3260694146156311, "learning_rate": 8.258287222701662e-06, "loss": 0.0401, "step": 34695 }, { "epoch": 1.7615107365856135, "grad_norm": 0.32202768325805664, "learning_rate": 8.256595089429245e-06, "loss": 0.0422, "step": 34700 }, { "epoch": 1.761764556576476, "grad_norm": 0.3090651333332062, "learning_rate": 8.254902956156827e-06, "loss": 0.0417, "step": 34705 }, { "epoch": 1.7620183765673385, "grad_norm": 0.3767678737640381, "learning_rate": 8.25321082288441e-06, "loss": 0.043, "step": 34710 }, { "epoch": 1.7622721965582009, "grad_norm": 0.4838859736919403, "learning_rate": 8.251518689611994e-06, "loss": 0.036, "step": 34715 }, { "epoch": 1.7625260165490633, "grad_norm": 0.4400741755962372, "learning_rate": 8.24982655633958e-06, "loss": 0.0347, "step": 34720 }, { "epoch": 1.7627798365399259, "grad_norm": 0.31652846932411194, "learning_rate": 8.248134423067161e-06, "loss": 0.0543, "step": 34725 }, { "epoch": 1.7630336565307885, "grad_norm": 0.46276259422302246, "learning_rate": 8.246442289794745e-06, "loss": 0.0405, "step": 34730 }, { "epoch": 1.7632874765216509, "grad_norm": 0.3353394567966461, "learning_rate": 8.244750156522328e-06, "loss": 0.0348, "step": 34735 }, { "epoch": 1.7635412965125132, "grad_norm": 0.7226163148880005, "learning_rate": 8.243058023249912e-06, "loss": 0.0379, "step": 34740 }, { "epoch": 1.7637951165033758, "grad_norm": 0.3163387179374695, "learning_rate": 8.241365889977495e-06, "loss": 0.0415, "step": 34745 }, { "epoch": 1.7640489364942384, "grad_norm": 0.36522141098976135, "learning_rate": 8.239673756705079e-06, "loss": 0.0397, "step": 34750 }, { "epoch": 1.7643027564851008, "grad_norm": 0.4560871124267578, "learning_rate": 8.237981623432662e-06, "loss": 0.0454, "step": 34755 }, { "epoch": 1.7645565764759632, "grad_norm": 0.6275542974472046, "learning_rate": 8.236289490160246e-06, "loss": 0.0401, "step": 34760 }, { "epoch": 1.7648103964668258, "grad_norm": 0.3421061336994171, "learning_rate": 8.23459735688783e-06, "loss": 0.0354, "step": 34765 }, { "epoch": 1.7650642164576882, "grad_norm": 0.2922126054763794, "learning_rate": 8.232905223615413e-06, "loss": 0.0379, "step": 34770 }, { "epoch": 1.7653180364485506, "grad_norm": 0.37141862511634827, "learning_rate": 8.231213090342997e-06, "loss": 0.0377, "step": 34775 }, { "epoch": 1.7655718564394132, "grad_norm": 0.2555692493915558, "learning_rate": 8.229520957070578e-06, "loss": 0.0321, "step": 34780 }, { "epoch": 1.7658256764302758, "grad_norm": 0.47595980763435364, "learning_rate": 8.227828823798164e-06, "loss": 0.046, "step": 34785 }, { "epoch": 1.7660794964211382, "grad_norm": 0.31954678893089294, "learning_rate": 8.226136690525747e-06, "loss": 0.0324, "step": 34790 }, { "epoch": 1.7663333164120005, "grad_norm": 0.30136197805404663, "learning_rate": 8.22444455725333e-06, "loss": 0.0454, "step": 34795 }, { "epoch": 1.7665871364028631, "grad_norm": 0.41031014919281006, "learning_rate": 8.222752423980913e-06, "loss": 0.0378, "step": 34800 }, { "epoch": 1.7668409563937255, "grad_norm": 0.2514888048171997, "learning_rate": 8.221060290708496e-06, "loss": 0.0348, "step": 34805 }, { "epoch": 1.767094776384588, "grad_norm": 0.34350481629371643, "learning_rate": 8.219368157436081e-06, "loss": 0.0353, "step": 34810 }, { "epoch": 1.7673485963754505, "grad_norm": 0.27384987473487854, "learning_rate": 8.217676024163663e-06, "loss": 0.0332, "step": 34815 }, { "epoch": 1.767602416366313, "grad_norm": 0.40239375829696655, "learning_rate": 8.215983890891247e-06, "loss": 0.0414, "step": 34820 }, { "epoch": 1.7678562363571755, "grad_norm": 0.31507280468940735, "learning_rate": 8.21429175761883e-06, "loss": 0.041, "step": 34825 }, { "epoch": 1.7681100563480379, "grad_norm": 0.5452533960342407, "learning_rate": 8.212599624346414e-06, "loss": 0.0472, "step": 34830 }, { "epoch": 1.7683638763389005, "grad_norm": 0.2969473600387573, "learning_rate": 8.210907491073997e-06, "loss": 0.0356, "step": 34835 }, { "epoch": 1.768617696329763, "grad_norm": 0.4441262483596802, "learning_rate": 8.209215357801581e-06, "loss": 0.0414, "step": 34840 }, { "epoch": 1.7688715163206254, "grad_norm": 0.3782421946525574, "learning_rate": 8.207523224529165e-06, "loss": 0.0413, "step": 34845 }, { "epoch": 1.7691253363114878, "grad_norm": 0.3684667646884918, "learning_rate": 8.205831091256748e-06, "loss": 0.0388, "step": 34850 }, { "epoch": 1.7693791563023504, "grad_norm": 0.3120320439338684, "learning_rate": 8.204138957984332e-06, "loss": 0.0404, "step": 34855 }, { "epoch": 1.7696329762932128, "grad_norm": 0.46274301409721375, "learning_rate": 8.202446824711915e-06, "loss": 0.0383, "step": 34860 }, { "epoch": 1.7698867962840752, "grad_norm": 0.3160901367664337, "learning_rate": 8.200754691439499e-06, "loss": 0.0349, "step": 34865 }, { "epoch": 1.7701406162749378, "grad_norm": 0.3432873487472534, "learning_rate": 8.199062558167082e-06, "loss": 0.0451, "step": 34870 }, { "epoch": 1.7703944362658004, "grad_norm": 0.3663712739944458, "learning_rate": 8.197370424894666e-06, "loss": 0.037, "step": 34875 }, { "epoch": 1.7706482562566628, "grad_norm": 0.29719987511634827, "learning_rate": 8.19567829162225e-06, "loss": 0.038, "step": 34880 }, { "epoch": 1.7709020762475252, "grad_norm": 0.3122144639492035, "learning_rate": 8.193986158349833e-06, "loss": 0.0416, "step": 34885 }, { "epoch": 1.7711558962383878, "grad_norm": 0.3486882150173187, "learning_rate": 8.192294025077415e-06, "loss": 0.0453, "step": 34890 }, { "epoch": 1.7714097162292504, "grad_norm": 0.2783128023147583, "learning_rate": 8.190601891804998e-06, "loss": 0.0503, "step": 34895 }, { "epoch": 1.7716635362201127, "grad_norm": 0.36007043719291687, "learning_rate": 8.188909758532583e-06, "loss": 0.0362, "step": 34900 }, { "epoch": 1.7719173562109751, "grad_norm": 0.7849182486534119, "learning_rate": 8.187217625260167e-06, "loss": 0.0401, "step": 34905 }, { "epoch": 1.7721711762018377, "grad_norm": 0.35825115442276, "learning_rate": 8.185525491987749e-06, "loss": 0.0429, "step": 34910 }, { "epoch": 1.7724249961927, "grad_norm": 0.5037504434585571, "learning_rate": 8.183833358715332e-06, "loss": 0.0364, "step": 34915 }, { "epoch": 1.7726788161835625, "grad_norm": 0.47052305936813354, "learning_rate": 8.182141225442916e-06, "loss": 0.0427, "step": 34920 }, { "epoch": 1.772932636174425, "grad_norm": 0.45571646094322205, "learning_rate": 8.180449092170501e-06, "loss": 0.0461, "step": 34925 }, { "epoch": 1.7731864561652877, "grad_norm": 0.32976770401000977, "learning_rate": 8.178756958898083e-06, "loss": 0.0397, "step": 34930 }, { "epoch": 1.77344027615615, "grad_norm": 0.369817852973938, "learning_rate": 8.177064825625667e-06, "loss": 0.0477, "step": 34935 }, { "epoch": 1.7736940961470125, "grad_norm": 0.31425940990448, "learning_rate": 8.17537269235325e-06, "loss": 0.0397, "step": 34940 }, { "epoch": 1.773947916137875, "grad_norm": 0.6579198837280273, "learning_rate": 8.173680559080834e-06, "loss": 0.0482, "step": 34945 }, { "epoch": 1.7742017361287377, "grad_norm": 0.3199804127216339, "learning_rate": 8.171988425808417e-06, "loss": 0.0355, "step": 34950 }, { "epoch": 1.7744555561195998, "grad_norm": 0.3551079332828522, "learning_rate": 8.170296292536e-06, "loss": 0.0411, "step": 34955 }, { "epoch": 1.7747093761104624, "grad_norm": 0.4659683108329773, "learning_rate": 8.168604159263584e-06, "loss": 0.0335, "step": 34960 }, { "epoch": 1.774963196101325, "grad_norm": 0.28088241815567017, "learning_rate": 8.166912025991168e-06, "loss": 0.0415, "step": 34965 }, { "epoch": 1.7752170160921874, "grad_norm": 0.37877973914146423, "learning_rate": 8.165219892718751e-06, "loss": 0.0426, "step": 34970 }, { "epoch": 1.7754708360830498, "grad_norm": 0.8946444392204285, "learning_rate": 8.163527759446335e-06, "loss": 0.0396, "step": 34975 }, { "epoch": 1.7757246560739124, "grad_norm": 0.29175713658332825, "learning_rate": 8.161835626173919e-06, "loss": 0.0315, "step": 34980 }, { "epoch": 1.775978476064775, "grad_norm": 0.7535488605499268, "learning_rate": 8.1601434929015e-06, "loss": 0.0439, "step": 34985 }, { "epoch": 1.7762322960556374, "grad_norm": 0.39257892966270447, "learning_rate": 8.158451359629086e-06, "loss": 0.0476, "step": 34990 }, { "epoch": 1.7764861160464998, "grad_norm": 0.4855389893054962, "learning_rate": 8.156759226356669e-06, "loss": 0.0364, "step": 34995 }, { "epoch": 1.7767399360373624, "grad_norm": 0.3152945935726166, "learning_rate": 8.155067093084253e-06, "loss": 0.0389, "step": 35000 }, { "epoch": 1.7769937560282247, "grad_norm": 0.34567198157310486, "learning_rate": 8.153374959811835e-06, "loss": 0.0413, "step": 35005 }, { "epoch": 1.7772475760190871, "grad_norm": 0.36939743161201477, "learning_rate": 8.151682826539418e-06, "loss": 0.0448, "step": 35010 }, { "epoch": 1.7775013960099497, "grad_norm": 0.3870468735694885, "learning_rate": 8.149990693267003e-06, "loss": 0.037, "step": 35015 }, { "epoch": 1.7777552160008123, "grad_norm": 0.3341517150402069, "learning_rate": 8.148298559994587e-06, "loss": 0.0411, "step": 35020 }, { "epoch": 1.7780090359916747, "grad_norm": 0.5142117738723755, "learning_rate": 8.146606426722169e-06, "loss": 0.0442, "step": 35025 }, { "epoch": 1.778262855982537, "grad_norm": 0.28023678064346313, "learning_rate": 8.144914293449752e-06, "loss": 0.0456, "step": 35030 }, { "epoch": 1.7785166759733997, "grad_norm": 0.6822634935379028, "learning_rate": 8.143222160177336e-06, "loss": 0.041, "step": 35035 }, { "epoch": 1.7787704959642623, "grad_norm": 0.33477500081062317, "learning_rate": 8.141530026904921e-06, "loss": 0.0323, "step": 35040 }, { "epoch": 1.7790243159551247, "grad_norm": 0.17918257415294647, "learning_rate": 8.139837893632503e-06, "loss": 0.0325, "step": 35045 }, { "epoch": 1.779278135945987, "grad_norm": 0.5592842102050781, "learning_rate": 8.138145760360086e-06, "loss": 0.0378, "step": 35050 }, { "epoch": 1.7795319559368497, "grad_norm": 0.2886914610862732, "learning_rate": 8.13645362708767e-06, "loss": 0.0454, "step": 35055 }, { "epoch": 1.779785775927712, "grad_norm": 0.5471748113632202, "learning_rate": 8.134761493815254e-06, "loss": 0.0363, "step": 35060 }, { "epoch": 1.7800395959185744, "grad_norm": 0.347551554441452, "learning_rate": 8.133069360542837e-06, "loss": 0.0323, "step": 35065 }, { "epoch": 1.780293415909437, "grad_norm": 0.38865914940834045, "learning_rate": 8.13137722727042e-06, "loss": 0.0343, "step": 35070 }, { "epoch": 1.7805472359002996, "grad_norm": 0.5800191760063171, "learning_rate": 8.129685093998004e-06, "loss": 0.0358, "step": 35075 }, { "epoch": 1.780801055891162, "grad_norm": 0.4911668300628662, "learning_rate": 8.127992960725588e-06, "loss": 0.045, "step": 35080 }, { "epoch": 1.7810548758820244, "grad_norm": 0.3816292881965637, "learning_rate": 8.126300827453171e-06, "loss": 0.0387, "step": 35085 }, { "epoch": 1.781308695872887, "grad_norm": 0.3930221498012543, "learning_rate": 8.124608694180755e-06, "loss": 0.0391, "step": 35090 }, { "epoch": 1.7815625158637496, "grad_norm": 0.7621637582778931, "learning_rate": 8.122916560908338e-06, "loss": 0.0381, "step": 35095 }, { "epoch": 1.7818163358546117, "grad_norm": 0.2887486517429352, "learning_rate": 8.12122442763592e-06, "loss": 0.0387, "step": 35100 }, { "epoch": 1.7820701558454743, "grad_norm": 0.46208956837654114, "learning_rate": 8.119532294363505e-06, "loss": 0.0422, "step": 35105 }, { "epoch": 1.782323975836337, "grad_norm": 0.34289759397506714, "learning_rate": 8.117840161091089e-06, "loss": 0.0365, "step": 35110 }, { "epoch": 1.7825777958271993, "grad_norm": 0.31571081280708313, "learning_rate": 8.116148027818673e-06, "loss": 0.0305, "step": 35115 }, { "epoch": 1.7828316158180617, "grad_norm": 0.24657775461673737, "learning_rate": 8.114455894546254e-06, "loss": 0.042, "step": 35120 }, { "epoch": 1.7830854358089243, "grad_norm": 0.22031854093074799, "learning_rate": 8.112763761273838e-06, "loss": 0.035, "step": 35125 }, { "epoch": 1.783339255799787, "grad_norm": 0.28018301725387573, "learning_rate": 8.111071628001423e-06, "loss": 0.0338, "step": 35130 }, { "epoch": 1.7835930757906493, "grad_norm": 0.45661869645118713, "learning_rate": 8.109379494729005e-06, "loss": 0.04, "step": 35135 }, { "epoch": 1.7838468957815117, "grad_norm": 0.5636528730392456, "learning_rate": 8.107687361456589e-06, "loss": 0.0386, "step": 35140 }, { "epoch": 1.7841007157723743, "grad_norm": 0.2825608253479004, "learning_rate": 8.105995228184172e-06, "loss": 0.0397, "step": 35145 }, { "epoch": 1.7843545357632367, "grad_norm": 0.3992137312889099, "learning_rate": 8.104303094911756e-06, "loss": 0.0468, "step": 35150 }, { "epoch": 1.784608355754099, "grad_norm": 0.22503553330898285, "learning_rate": 8.10261096163934e-06, "loss": 0.0354, "step": 35155 }, { "epoch": 1.7848621757449616, "grad_norm": 0.36107516288757324, "learning_rate": 8.100918828366923e-06, "loss": 0.0435, "step": 35160 }, { "epoch": 1.7851159957358242, "grad_norm": 0.29463401436805725, "learning_rate": 8.099226695094506e-06, "loss": 0.0403, "step": 35165 }, { "epoch": 1.7853698157266866, "grad_norm": 0.2965399920940399, "learning_rate": 8.09753456182209e-06, "loss": 0.041, "step": 35170 }, { "epoch": 1.785623635717549, "grad_norm": 0.5433416962623596, "learning_rate": 8.095842428549673e-06, "loss": 0.0434, "step": 35175 }, { "epoch": 1.7858774557084116, "grad_norm": 0.29908525943756104, "learning_rate": 8.094150295277257e-06, "loss": 0.0339, "step": 35180 }, { "epoch": 1.7861312756992742, "grad_norm": 0.2853911221027374, "learning_rate": 8.09245816200484e-06, "loss": 0.0397, "step": 35185 }, { "epoch": 1.7863850956901366, "grad_norm": 0.46170076727867126, "learning_rate": 8.090766028732424e-06, "loss": 0.0401, "step": 35190 }, { "epoch": 1.786638915680999, "grad_norm": 0.4234716296195984, "learning_rate": 8.089073895460008e-06, "loss": 0.0353, "step": 35195 }, { "epoch": 1.7868927356718616, "grad_norm": 0.46433013677597046, "learning_rate": 8.087381762187591e-06, "loss": 0.0373, "step": 35200 }, { "epoch": 1.787146555662724, "grad_norm": 0.33198902010917664, "learning_rate": 8.085689628915175e-06, "loss": 0.0353, "step": 35205 }, { "epoch": 1.7874003756535863, "grad_norm": 0.7026520371437073, "learning_rate": 8.083997495642756e-06, "loss": 0.0425, "step": 35210 }, { "epoch": 1.787654195644449, "grad_norm": 0.32603394985198975, "learning_rate": 8.08230536237034e-06, "loss": 0.0376, "step": 35215 }, { "epoch": 1.7879080156353115, "grad_norm": 0.3134501874446869, "learning_rate": 8.080613229097925e-06, "loss": 0.0419, "step": 35220 }, { "epoch": 1.788161835626174, "grad_norm": 0.3281780779361725, "learning_rate": 8.078921095825509e-06, "loss": 0.0538, "step": 35225 }, { "epoch": 1.7884156556170363, "grad_norm": 0.3749331831932068, "learning_rate": 8.07722896255309e-06, "loss": 0.0365, "step": 35230 }, { "epoch": 1.788669475607899, "grad_norm": 0.7166233062744141, "learning_rate": 8.075536829280674e-06, "loss": 0.051, "step": 35235 }, { "epoch": 1.7889232955987615, "grad_norm": 0.8942742347717285, "learning_rate": 8.073844696008258e-06, "loss": 0.0376, "step": 35240 }, { "epoch": 1.789177115589624, "grad_norm": 0.4451989233493805, "learning_rate": 8.072152562735841e-06, "loss": 0.0315, "step": 35245 }, { "epoch": 1.7894309355804863, "grad_norm": 0.3336634635925293, "learning_rate": 8.070460429463425e-06, "loss": 0.0406, "step": 35250 }, { "epoch": 1.7896847555713489, "grad_norm": 0.28594163060188293, "learning_rate": 8.068768296191008e-06, "loss": 0.0388, "step": 35255 }, { "epoch": 1.7899385755622113, "grad_norm": 0.3234853744506836, "learning_rate": 8.067076162918592e-06, "loss": 0.0389, "step": 35260 }, { "epoch": 1.7901923955530736, "grad_norm": 0.3901161551475525, "learning_rate": 8.065384029646175e-06, "loss": 0.0363, "step": 35265 }, { "epoch": 1.7904462155439362, "grad_norm": 0.30935701727867126, "learning_rate": 8.063691896373759e-06, "loss": 0.0399, "step": 35270 }, { "epoch": 1.7907000355347988, "grad_norm": 0.5069534778594971, "learning_rate": 8.061999763101343e-06, "loss": 0.0408, "step": 35275 }, { "epoch": 1.7909538555256612, "grad_norm": 0.20961259305477142, "learning_rate": 8.060307629828926e-06, "loss": 0.0329, "step": 35280 }, { "epoch": 1.7912076755165236, "grad_norm": 0.32768288254737854, "learning_rate": 8.05861549655651e-06, "loss": 0.0393, "step": 35285 }, { "epoch": 1.7914614955073862, "grad_norm": 0.3863822817802429, "learning_rate": 8.056923363284093e-06, "loss": 0.0361, "step": 35290 }, { "epoch": 1.7917153154982488, "grad_norm": 0.40561339259147644, "learning_rate": 8.055231230011677e-06, "loss": 0.043, "step": 35295 }, { "epoch": 1.791969135489111, "grad_norm": 0.43230199813842773, "learning_rate": 8.05353909673926e-06, "loss": 0.0435, "step": 35300 }, { "epoch": 1.7922229554799736, "grad_norm": 0.3429777920246124, "learning_rate": 8.051846963466842e-06, "loss": 0.0356, "step": 35305 }, { "epoch": 1.7924767754708362, "grad_norm": 0.3415292203426361, "learning_rate": 8.050154830194427e-06, "loss": 0.0375, "step": 35310 }, { "epoch": 1.7927305954616986, "grad_norm": 0.260860413312912, "learning_rate": 8.048462696922011e-06, "loss": 0.0343, "step": 35315 }, { "epoch": 1.792984415452561, "grad_norm": 0.41929513216018677, "learning_rate": 8.046770563649594e-06, "loss": 0.0417, "step": 35320 }, { "epoch": 1.7932382354434235, "grad_norm": 0.4116337299346924, "learning_rate": 8.045078430377176e-06, "loss": 0.0359, "step": 35325 }, { "epoch": 1.7934920554342861, "grad_norm": 0.2833936810493469, "learning_rate": 8.04338629710476e-06, "loss": 0.0345, "step": 35330 }, { "epoch": 1.7937458754251485, "grad_norm": 0.36605337262153625, "learning_rate": 8.041694163832343e-06, "loss": 0.0442, "step": 35335 }, { "epoch": 1.793999695416011, "grad_norm": 0.44311726093292236, "learning_rate": 8.040002030559929e-06, "loss": 0.0378, "step": 35340 }, { "epoch": 1.7942535154068735, "grad_norm": 0.3758402466773987, "learning_rate": 8.03830989728751e-06, "loss": 0.0516, "step": 35345 }, { "epoch": 1.7945073353977359, "grad_norm": 0.38618481159210205, "learning_rate": 8.036617764015094e-06, "loss": 0.0403, "step": 35350 }, { "epoch": 1.7947611553885983, "grad_norm": 0.4462096691131592, "learning_rate": 8.034925630742678e-06, "loss": 0.0391, "step": 35355 }, { "epoch": 1.7950149753794609, "grad_norm": 0.34843987226486206, "learning_rate": 8.033233497470261e-06, "loss": 0.038, "step": 35360 }, { "epoch": 1.7952687953703235, "grad_norm": 0.35366612672805786, "learning_rate": 8.031541364197845e-06, "loss": 0.0506, "step": 35365 }, { "epoch": 1.7955226153611858, "grad_norm": 0.2395736426115036, "learning_rate": 8.029849230925428e-06, "loss": 0.0443, "step": 35370 }, { "epoch": 1.7957764353520482, "grad_norm": 0.4678282141685486, "learning_rate": 8.028157097653012e-06, "loss": 0.0418, "step": 35375 }, { "epoch": 1.7960302553429108, "grad_norm": 0.37620967626571655, "learning_rate": 8.026464964380595e-06, "loss": 0.0458, "step": 35380 }, { "epoch": 1.7962840753337734, "grad_norm": 0.4443618059158325, "learning_rate": 8.024772831108179e-06, "loss": 0.0382, "step": 35385 }, { "epoch": 1.7965378953246358, "grad_norm": 0.22134755551815033, "learning_rate": 8.023080697835762e-06, "loss": 0.0361, "step": 35390 }, { "epoch": 1.7967917153154982, "grad_norm": 0.3631151616573334, "learning_rate": 8.021388564563346e-06, "loss": 0.0411, "step": 35395 }, { "epoch": 1.7970455353063608, "grad_norm": 0.29067274928092957, "learning_rate": 8.01969643129093e-06, "loss": 0.0439, "step": 35400 }, { "epoch": 1.7972993552972232, "grad_norm": 0.3182755410671234, "learning_rate": 8.018004298018513e-06, "loss": 0.0455, "step": 35405 }, { "epoch": 1.7975531752880856, "grad_norm": 0.518097996711731, "learning_rate": 8.016312164746097e-06, "loss": 0.041, "step": 35410 }, { "epoch": 1.7978069952789482, "grad_norm": 0.3362354338169098, "learning_rate": 8.01462003147368e-06, "loss": 0.0403, "step": 35415 }, { "epoch": 1.7980608152698108, "grad_norm": 0.23532569408416748, "learning_rate": 8.012927898201262e-06, "loss": 0.0395, "step": 35420 }, { "epoch": 1.7983146352606731, "grad_norm": 0.2703220546245575, "learning_rate": 8.011235764928847e-06, "loss": 0.0434, "step": 35425 }, { "epoch": 1.7985684552515355, "grad_norm": 0.36935853958129883, "learning_rate": 8.00954363165643e-06, "loss": 0.041, "step": 35430 }, { "epoch": 1.7988222752423981, "grad_norm": 0.34497174620628357, "learning_rate": 8.007851498384014e-06, "loss": 0.0353, "step": 35435 }, { "epoch": 1.7990760952332607, "grad_norm": 0.7554081678390503, "learning_rate": 8.006159365111596e-06, "loss": 0.0431, "step": 35440 }, { "epoch": 1.7993299152241229, "grad_norm": 0.32350003719329834, "learning_rate": 8.00446723183918e-06, "loss": 0.0336, "step": 35445 }, { "epoch": 1.7995837352149855, "grad_norm": 0.27758076786994934, "learning_rate": 8.002775098566763e-06, "loss": 0.0492, "step": 35450 }, { "epoch": 1.799837555205848, "grad_norm": 0.29208967089653015, "learning_rate": 8.001082965294347e-06, "loss": 0.0403, "step": 35455 }, { "epoch": 1.8000913751967105, "grad_norm": 0.5288401246070862, "learning_rate": 7.99939083202193e-06, "loss": 0.0398, "step": 35460 }, { "epoch": 1.8003451951875729, "grad_norm": 0.4076966643333435, "learning_rate": 7.997698698749514e-06, "loss": 0.0368, "step": 35465 }, { "epoch": 1.8005990151784355, "grad_norm": 0.3328183889389038, "learning_rate": 7.996006565477097e-06, "loss": 0.0359, "step": 35470 }, { "epoch": 1.800852835169298, "grad_norm": 0.37678319215774536, "learning_rate": 7.994314432204681e-06, "loss": 0.0408, "step": 35475 }, { "epoch": 1.8011066551601604, "grad_norm": 1.1589629650115967, "learning_rate": 7.992622298932265e-06, "loss": 0.0345, "step": 35480 }, { "epoch": 1.8013604751510228, "grad_norm": 0.41570189595222473, "learning_rate": 7.990930165659848e-06, "loss": 0.0382, "step": 35485 }, { "epoch": 1.8016142951418854, "grad_norm": 0.30130791664123535, "learning_rate": 7.989238032387432e-06, "loss": 0.0313, "step": 35490 }, { "epoch": 1.8018681151327478, "grad_norm": 0.5530520081520081, "learning_rate": 7.987545899115015e-06, "loss": 0.0452, "step": 35495 }, { "epoch": 1.8021219351236102, "grad_norm": 0.33491554856300354, "learning_rate": 7.985853765842599e-06, "loss": 0.0381, "step": 35500 }, { "epoch": 1.8023757551144728, "grad_norm": 0.3501150608062744, "learning_rate": 7.984161632570182e-06, "loss": 0.0517, "step": 35505 }, { "epoch": 1.8026295751053354, "grad_norm": 0.25684893131256104, "learning_rate": 7.982469499297766e-06, "loss": 0.0497, "step": 35510 }, { "epoch": 1.8028833950961978, "grad_norm": 0.29228684306144714, "learning_rate": 7.98077736602535e-06, "loss": 0.0396, "step": 35515 }, { "epoch": 1.8031372150870602, "grad_norm": 0.3270423412322998, "learning_rate": 7.979085232752933e-06, "loss": 0.0396, "step": 35520 }, { "epoch": 1.8033910350779228, "grad_norm": 0.3722864091396332, "learning_rate": 7.977393099480516e-06, "loss": 0.0492, "step": 35525 }, { "epoch": 1.8036448550687854, "grad_norm": 0.424862802028656, "learning_rate": 7.9757009662081e-06, "loss": 0.0461, "step": 35530 }, { "epoch": 1.8038986750596477, "grad_norm": 0.4169599413871765, "learning_rate": 7.974008832935682e-06, "loss": 0.0377, "step": 35535 }, { "epoch": 1.8041524950505101, "grad_norm": 0.30000168085098267, "learning_rate": 7.972316699663265e-06, "loss": 0.039, "step": 35540 }, { "epoch": 1.8044063150413727, "grad_norm": 0.4431754946708679, "learning_rate": 7.97062456639085e-06, "loss": 0.0388, "step": 35545 }, { "epoch": 1.804660135032235, "grad_norm": 0.373129278421402, "learning_rate": 7.968932433118432e-06, "loss": 0.0357, "step": 35550 }, { "epoch": 1.8049139550230975, "grad_norm": 0.44162359833717346, "learning_rate": 7.967240299846016e-06, "loss": 0.0477, "step": 35555 }, { "epoch": 1.80516777501396, "grad_norm": 0.35507842898368835, "learning_rate": 7.9655481665736e-06, "loss": 0.0372, "step": 35560 }, { "epoch": 1.8054215950048227, "grad_norm": 0.32913821935653687, "learning_rate": 7.963856033301183e-06, "loss": 0.033, "step": 35565 }, { "epoch": 1.805675414995685, "grad_norm": 0.3151976466178894, "learning_rate": 7.962163900028767e-06, "loss": 0.0387, "step": 35570 }, { "epoch": 1.8059292349865474, "grad_norm": 0.492038369178772, "learning_rate": 7.96047176675635e-06, "loss": 0.0351, "step": 35575 }, { "epoch": 1.80618305497741, "grad_norm": 0.37101173400878906, "learning_rate": 7.958779633483934e-06, "loss": 0.0396, "step": 35580 }, { "epoch": 1.8064368749682727, "grad_norm": 0.24208274483680725, "learning_rate": 7.957087500211517e-06, "loss": 0.0334, "step": 35585 }, { "epoch": 1.806690694959135, "grad_norm": 0.8557137846946716, "learning_rate": 7.9553953669391e-06, "loss": 0.0385, "step": 35590 }, { "epoch": 1.8069445149499974, "grad_norm": 0.33417654037475586, "learning_rate": 7.953703233666684e-06, "loss": 0.0427, "step": 35595 }, { "epoch": 1.80719833494086, "grad_norm": 0.36135685443878174, "learning_rate": 7.952011100394268e-06, "loss": 0.0433, "step": 35600 }, { "epoch": 1.8074521549317224, "grad_norm": 0.6363187432289124, "learning_rate": 7.950318967121851e-06, "loss": 0.053, "step": 35605 }, { "epoch": 1.8077059749225848, "grad_norm": 0.41718024015426636, "learning_rate": 7.948626833849435e-06, "loss": 0.0349, "step": 35610 }, { "epoch": 1.8079597949134474, "grad_norm": 0.3126942813396454, "learning_rate": 7.946934700577019e-06, "loss": 0.039, "step": 35615 }, { "epoch": 1.80821361490431, "grad_norm": 0.34499695897102356, "learning_rate": 7.945242567304602e-06, "loss": 0.039, "step": 35620 }, { "epoch": 1.8084674348951724, "grad_norm": 0.3745400607585907, "learning_rate": 7.943550434032184e-06, "loss": 0.0413, "step": 35625 }, { "epoch": 1.8087212548860347, "grad_norm": 0.34194043278694153, "learning_rate": 7.941858300759767e-06, "loss": 0.0402, "step": 35630 }, { "epoch": 1.8089750748768973, "grad_norm": 0.3365754187107086, "learning_rate": 7.940166167487353e-06, "loss": 0.0386, "step": 35635 }, { "epoch": 1.8092288948677597, "grad_norm": 0.5197813510894775, "learning_rate": 7.938474034214936e-06, "loss": 0.0395, "step": 35640 }, { "epoch": 1.809482714858622, "grad_norm": 0.35244494676589966, "learning_rate": 7.936781900942518e-06, "loss": 0.0412, "step": 35645 }, { "epoch": 1.8097365348494847, "grad_norm": 0.31359171867370605, "learning_rate": 7.935089767670102e-06, "loss": 0.0339, "step": 35650 }, { "epoch": 1.8099903548403473, "grad_norm": 0.32754194736480713, "learning_rate": 7.933397634397685e-06, "loss": 0.0362, "step": 35655 }, { "epoch": 1.8102441748312097, "grad_norm": 0.6635916233062744, "learning_rate": 7.93170550112527e-06, "loss": 0.0353, "step": 35660 }, { "epoch": 1.810497994822072, "grad_norm": 0.3632308840751648, "learning_rate": 7.930013367852852e-06, "loss": 0.0379, "step": 35665 }, { "epoch": 1.8107518148129347, "grad_norm": 0.2154543250799179, "learning_rate": 7.928321234580436e-06, "loss": 0.0557, "step": 35670 }, { "epoch": 1.8110056348037973, "grad_norm": 0.31016165018081665, "learning_rate": 7.92662910130802e-06, "loss": 0.0429, "step": 35675 }, { "epoch": 1.8112594547946597, "grad_norm": 0.3344568610191345, "learning_rate": 7.924936968035603e-06, "loss": 0.0385, "step": 35680 }, { "epoch": 1.811513274785522, "grad_norm": 0.412442684173584, "learning_rate": 7.923244834763186e-06, "loss": 0.0372, "step": 35685 }, { "epoch": 1.8117670947763846, "grad_norm": 0.24281445145606995, "learning_rate": 7.92155270149077e-06, "loss": 0.0332, "step": 35690 }, { "epoch": 1.812020914767247, "grad_norm": 0.512369692325592, "learning_rate": 7.919860568218354e-06, "loss": 0.0458, "step": 35695 }, { "epoch": 1.8122747347581094, "grad_norm": 0.26524874567985535, "learning_rate": 7.918168434945937e-06, "loss": 0.0309, "step": 35700 }, { "epoch": 1.812528554748972, "grad_norm": 0.31035247445106506, "learning_rate": 7.91647630167352e-06, "loss": 0.0422, "step": 35705 }, { "epoch": 1.8127823747398346, "grad_norm": 0.3776507079601288, "learning_rate": 7.914784168401104e-06, "loss": 0.0363, "step": 35710 }, { "epoch": 1.813036194730697, "grad_norm": 0.3676854074001312, "learning_rate": 7.913092035128688e-06, "loss": 0.0418, "step": 35715 }, { "epoch": 1.8132900147215594, "grad_norm": 0.37066182494163513, "learning_rate": 7.91139990185627e-06, "loss": 0.041, "step": 35720 }, { "epoch": 1.813543834712422, "grad_norm": 0.36492326855659485, "learning_rate": 7.909707768583855e-06, "loss": 0.0403, "step": 35725 }, { "epoch": 1.8137976547032846, "grad_norm": 0.36690789461135864, "learning_rate": 7.908015635311438e-06, "loss": 0.0295, "step": 35730 }, { "epoch": 1.814051474694147, "grad_norm": 0.35419976711273193, "learning_rate": 7.906323502039022e-06, "loss": 0.0438, "step": 35735 }, { "epoch": 1.8143052946850093, "grad_norm": 0.24726921319961548, "learning_rate": 7.904631368766604e-06, "loss": 0.0355, "step": 35740 }, { "epoch": 1.814559114675872, "grad_norm": 0.27474740147590637, "learning_rate": 7.902939235494187e-06, "loss": 0.0389, "step": 35745 }, { "epoch": 1.8148129346667343, "grad_norm": 0.37380725145339966, "learning_rate": 7.901247102221773e-06, "loss": 0.035, "step": 35750 }, { "epoch": 1.8150667546575967, "grad_norm": 0.3170168697834015, "learning_rate": 7.899554968949356e-06, "loss": 0.0416, "step": 35755 }, { "epoch": 1.8153205746484593, "grad_norm": 0.26000136137008667, "learning_rate": 7.897862835676938e-06, "loss": 0.0311, "step": 35760 }, { "epoch": 1.815574394639322, "grad_norm": 0.4417473077774048, "learning_rate": 7.896170702404521e-06, "loss": 0.0433, "step": 35765 }, { "epoch": 1.8158282146301843, "grad_norm": 0.23624826967716217, "learning_rate": 7.894478569132105e-06, "loss": 0.0384, "step": 35770 }, { "epoch": 1.8160820346210467, "grad_norm": 0.4043356776237488, "learning_rate": 7.892786435859689e-06, "loss": 0.0307, "step": 35775 }, { "epoch": 1.8163358546119093, "grad_norm": 0.8513171076774597, "learning_rate": 7.891094302587272e-06, "loss": 0.0451, "step": 35780 }, { "epoch": 1.8165896746027719, "grad_norm": 0.4032558500766754, "learning_rate": 7.889402169314856e-06, "loss": 0.0392, "step": 35785 }, { "epoch": 1.816843494593634, "grad_norm": 0.2610934376716614, "learning_rate": 7.88771003604244e-06, "loss": 0.0439, "step": 35790 }, { "epoch": 1.8170973145844966, "grad_norm": 0.2699238061904907, "learning_rate": 7.886017902770023e-06, "loss": 0.0443, "step": 35795 }, { "epoch": 1.8173511345753592, "grad_norm": 0.5964365005493164, "learning_rate": 7.884325769497606e-06, "loss": 0.0401, "step": 35800 }, { "epoch": 1.8176049545662216, "grad_norm": 0.310382217168808, "learning_rate": 7.88263363622519e-06, "loss": 0.0417, "step": 35805 }, { "epoch": 1.817858774557084, "grad_norm": 0.2789522111415863, "learning_rate": 7.880941502952773e-06, "loss": 0.044, "step": 35810 }, { "epoch": 1.8181125945479466, "grad_norm": 0.2450733631849289, "learning_rate": 7.879249369680357e-06, "loss": 0.0302, "step": 35815 }, { "epoch": 1.8183664145388092, "grad_norm": 0.2876873314380646, "learning_rate": 7.87755723640794e-06, "loss": 0.045, "step": 35820 }, { "epoch": 1.8186202345296716, "grad_norm": 0.3161139190196991, "learning_rate": 7.875865103135524e-06, "loss": 0.0382, "step": 35825 }, { "epoch": 1.818874054520534, "grad_norm": 0.3722044825553894, "learning_rate": 7.874172969863108e-06, "loss": 0.0405, "step": 35830 }, { "epoch": 1.8191278745113966, "grad_norm": 0.3855113685131073, "learning_rate": 7.87248083659069e-06, "loss": 0.0355, "step": 35835 }, { "epoch": 1.819381694502259, "grad_norm": 0.7797011733055115, "learning_rate": 7.870788703318275e-06, "loss": 0.0452, "step": 35840 }, { "epoch": 1.8196355144931213, "grad_norm": 0.27733364701271057, "learning_rate": 7.869096570045858e-06, "loss": 0.0355, "step": 35845 }, { "epoch": 1.819889334483984, "grad_norm": 0.3695673942565918, "learning_rate": 7.867404436773442e-06, "loss": 0.0453, "step": 35850 }, { "epoch": 1.8201431544748465, "grad_norm": 0.30087968707084656, "learning_rate": 7.865712303501024e-06, "loss": 0.0298, "step": 35855 }, { "epoch": 1.820396974465709, "grad_norm": 0.2529532313346863, "learning_rate": 7.864020170228607e-06, "loss": 0.031, "step": 35860 }, { "epoch": 1.8206507944565713, "grad_norm": 0.6073928475379944, "learning_rate": 7.862328036956192e-06, "loss": 0.0365, "step": 35865 }, { "epoch": 1.820904614447434, "grad_norm": 0.5613511204719543, "learning_rate": 7.860635903683774e-06, "loss": 0.0441, "step": 35870 }, { "epoch": 1.8211584344382965, "grad_norm": 0.4455784857273102, "learning_rate": 7.858943770411358e-06, "loss": 0.0373, "step": 35875 }, { "epoch": 1.8214122544291589, "grad_norm": 0.5386607050895691, "learning_rate": 7.857251637138941e-06, "loss": 0.0334, "step": 35880 }, { "epoch": 1.8216660744200213, "grad_norm": 0.49189093708992004, "learning_rate": 7.855559503866525e-06, "loss": 0.0315, "step": 35885 }, { "epoch": 1.8219198944108839, "grad_norm": 0.571233332157135, "learning_rate": 7.853867370594108e-06, "loss": 0.0336, "step": 35890 }, { "epoch": 1.8221737144017462, "grad_norm": 0.2096501588821411, "learning_rate": 7.852175237321692e-06, "loss": 0.03, "step": 35895 }, { "epoch": 1.8224275343926086, "grad_norm": 0.32770031690597534, "learning_rate": 7.850483104049275e-06, "loss": 0.0378, "step": 35900 }, { "epoch": 1.8226813543834712, "grad_norm": 0.22983644902706146, "learning_rate": 7.848790970776859e-06, "loss": 0.0384, "step": 35905 }, { "epoch": 1.8229351743743338, "grad_norm": 0.35651785135269165, "learning_rate": 7.847098837504443e-06, "loss": 0.0374, "step": 35910 }, { "epoch": 1.8231889943651962, "grad_norm": 0.3615643084049225, "learning_rate": 7.845406704232026e-06, "loss": 0.0469, "step": 35915 }, { "epoch": 1.8234428143560586, "grad_norm": 0.4260779023170471, "learning_rate": 7.84371457095961e-06, "loss": 0.057, "step": 35920 }, { "epoch": 1.8236966343469212, "grad_norm": 0.2626129388809204, "learning_rate": 7.842022437687193e-06, "loss": 0.0389, "step": 35925 }, { "epoch": 1.8239504543377838, "grad_norm": 0.2752457559108734, "learning_rate": 7.840330304414777e-06, "loss": 0.0312, "step": 35930 }, { "epoch": 1.824204274328646, "grad_norm": 0.41361814737319946, "learning_rate": 7.83863817114236e-06, "loss": 0.0349, "step": 35935 }, { "epoch": 1.8244580943195086, "grad_norm": 0.39695820212364197, "learning_rate": 7.836946037869944e-06, "loss": 0.041, "step": 35940 }, { "epoch": 1.8247119143103712, "grad_norm": 0.3582683503627777, "learning_rate": 7.835253904597526e-06, "loss": 0.0376, "step": 35945 }, { "epoch": 1.8249657343012335, "grad_norm": 0.581875741481781, "learning_rate": 7.83356177132511e-06, "loss": 0.0327, "step": 35950 }, { "epoch": 1.825219554292096, "grad_norm": 0.2829011380672455, "learning_rate": 7.831869638052694e-06, "loss": 0.0395, "step": 35955 }, { "epoch": 1.8254733742829585, "grad_norm": 0.2991785407066345, "learning_rate": 7.830177504780278e-06, "loss": 0.0367, "step": 35960 }, { "epoch": 1.8257271942738211, "grad_norm": 0.6983319520950317, "learning_rate": 7.82848537150786e-06, "loss": 0.0401, "step": 35965 }, { "epoch": 1.8259810142646835, "grad_norm": 0.32356399297714233, "learning_rate": 7.826793238235443e-06, "loss": 0.0331, "step": 35970 }, { "epoch": 1.826234834255546, "grad_norm": 0.4386630058288574, "learning_rate": 7.825101104963027e-06, "loss": 0.036, "step": 35975 }, { "epoch": 1.8264886542464085, "grad_norm": 0.24106809496879578, "learning_rate": 7.823408971690612e-06, "loss": 0.0419, "step": 35980 }, { "epoch": 1.8267424742372709, "grad_norm": 0.2574845552444458, "learning_rate": 7.821716838418194e-06, "loss": 0.0384, "step": 35985 }, { "epoch": 1.8269962942281333, "grad_norm": 0.3407520055770874, "learning_rate": 7.820024705145778e-06, "loss": 0.0337, "step": 35990 }, { "epoch": 1.8272501142189959, "grad_norm": 0.4200335443019867, "learning_rate": 7.818332571873361e-06, "loss": 0.0374, "step": 35995 }, { "epoch": 1.8275039342098585, "grad_norm": 0.434444397687912, "learning_rate": 7.816640438600945e-06, "loss": 0.0364, "step": 36000 }, { "epoch": 1.8277577542007208, "grad_norm": 0.3582698404788971, "learning_rate": 7.814948305328528e-06, "loss": 0.0395, "step": 36005 }, { "epoch": 1.8280115741915832, "grad_norm": 0.3376217484474182, "learning_rate": 7.813256172056112e-06, "loss": 0.0421, "step": 36010 }, { "epoch": 1.8282653941824458, "grad_norm": 0.7544217109680176, "learning_rate": 7.811564038783695e-06, "loss": 0.0339, "step": 36015 }, { "epoch": 1.8285192141733084, "grad_norm": 0.24290993809700012, "learning_rate": 7.809871905511279e-06, "loss": 0.0427, "step": 36020 }, { "epoch": 1.8287730341641708, "grad_norm": 0.24777421355247498, "learning_rate": 7.808179772238862e-06, "loss": 0.0395, "step": 36025 }, { "epoch": 1.8290268541550332, "grad_norm": 0.23828460276126862, "learning_rate": 7.806487638966446e-06, "loss": 0.0364, "step": 36030 }, { "epoch": 1.8292806741458958, "grad_norm": 0.7653145790100098, "learning_rate": 7.80479550569403e-06, "loss": 0.0551, "step": 36035 }, { "epoch": 1.8295344941367582, "grad_norm": 0.519119143486023, "learning_rate": 7.803103372421611e-06, "loss": 0.037, "step": 36040 }, { "epoch": 1.8297883141276206, "grad_norm": 0.3949947953224182, "learning_rate": 7.801411239149197e-06, "loss": 0.0353, "step": 36045 }, { "epoch": 1.8300421341184832, "grad_norm": 0.3932245969772339, "learning_rate": 7.79971910587678e-06, "loss": 0.0326, "step": 36050 }, { "epoch": 1.8302959541093458, "grad_norm": 0.3780907690525055, "learning_rate": 7.798026972604364e-06, "loss": 0.0327, "step": 36055 }, { "epoch": 1.8305497741002081, "grad_norm": 0.22897011041641235, "learning_rate": 7.796334839331946e-06, "loss": 0.0416, "step": 36060 }, { "epoch": 1.8308035940910705, "grad_norm": 0.33317452669143677, "learning_rate": 7.794642706059529e-06, "loss": 0.0406, "step": 36065 }, { "epoch": 1.8310574140819331, "grad_norm": 0.3200877606868744, "learning_rate": 7.792950572787114e-06, "loss": 0.0415, "step": 36070 }, { "epoch": 1.8313112340727957, "grad_norm": 0.47098788619041443, "learning_rate": 7.791258439514698e-06, "loss": 0.0519, "step": 36075 }, { "epoch": 1.831565054063658, "grad_norm": 0.2071748822927475, "learning_rate": 7.78956630624228e-06, "loss": 0.0348, "step": 36080 }, { "epoch": 1.8318188740545205, "grad_norm": 0.8616563081741333, "learning_rate": 7.787874172969863e-06, "loss": 0.0374, "step": 36085 }, { "epoch": 1.832072694045383, "grad_norm": 0.4214298129081726, "learning_rate": 7.786182039697447e-06, "loss": 0.0459, "step": 36090 }, { "epoch": 1.8323265140362455, "grad_norm": 0.6901262998580933, "learning_rate": 7.784489906425032e-06, "loss": 0.0355, "step": 36095 }, { "epoch": 1.8325803340271078, "grad_norm": 0.7623701095581055, "learning_rate": 7.782797773152614e-06, "loss": 0.0494, "step": 36100 }, { "epoch": 1.8328341540179705, "grad_norm": 0.33716341853141785, "learning_rate": 7.781105639880197e-06, "loss": 0.0375, "step": 36105 }, { "epoch": 1.833087974008833, "grad_norm": 0.35571691393852234, "learning_rate": 7.779413506607781e-06, "loss": 0.0388, "step": 36110 }, { "epoch": 1.8333417939996954, "grad_norm": 0.5281639695167542, "learning_rate": 7.777721373335364e-06, "loss": 0.0364, "step": 36115 }, { "epoch": 1.8335956139905578, "grad_norm": 0.5555434823036194, "learning_rate": 7.776029240062948e-06, "loss": 0.0345, "step": 36120 }, { "epoch": 1.8338494339814204, "grad_norm": 0.5790799260139465, "learning_rate": 7.774337106790532e-06, "loss": 0.0356, "step": 36125 }, { "epoch": 1.834103253972283, "grad_norm": 0.5399708151817322, "learning_rate": 7.772644973518115e-06, "loss": 0.0362, "step": 36130 }, { "epoch": 1.8343570739631452, "grad_norm": 0.27483931183815, "learning_rate": 7.770952840245699e-06, "loss": 0.0347, "step": 36135 }, { "epoch": 1.8346108939540078, "grad_norm": 0.29935479164123535, "learning_rate": 7.769260706973282e-06, "loss": 0.0402, "step": 36140 }, { "epoch": 1.8348647139448704, "grad_norm": 0.46089279651641846, "learning_rate": 7.767568573700866e-06, "loss": 0.0359, "step": 36145 }, { "epoch": 1.8351185339357328, "grad_norm": 0.5341860055923462, "learning_rate": 7.76587644042845e-06, "loss": 0.0398, "step": 36150 }, { "epoch": 1.8353723539265951, "grad_norm": 0.3907197117805481, "learning_rate": 7.764184307156031e-06, "loss": 0.044, "step": 36155 }, { "epoch": 1.8356261739174577, "grad_norm": 0.3691658675670624, "learning_rate": 7.762492173883616e-06, "loss": 0.0367, "step": 36160 }, { "epoch": 1.8358799939083204, "grad_norm": 0.40355217456817627, "learning_rate": 7.7608000406112e-06, "loss": 0.0396, "step": 36165 }, { "epoch": 1.8361338138991827, "grad_norm": 0.38757845759391785, "learning_rate": 7.759107907338783e-06, "loss": 0.0404, "step": 36170 }, { "epoch": 1.8363876338900451, "grad_norm": 0.39395466446876526, "learning_rate": 7.757415774066365e-06, "loss": 0.0405, "step": 36175 }, { "epoch": 1.8366414538809077, "grad_norm": 0.41867950558662415, "learning_rate": 7.755723640793949e-06, "loss": 0.0343, "step": 36180 }, { "epoch": 1.83689527387177, "grad_norm": 0.220166876912117, "learning_rate": 7.754031507521534e-06, "loss": 0.0381, "step": 36185 }, { "epoch": 1.8371490938626325, "grad_norm": 0.34739238023757935, "learning_rate": 7.752339374249116e-06, "loss": 0.0341, "step": 36190 }, { "epoch": 1.837402913853495, "grad_norm": 0.37881341576576233, "learning_rate": 7.7506472409767e-06, "loss": 0.0377, "step": 36195 }, { "epoch": 1.8376567338443577, "grad_norm": 0.7510872483253479, "learning_rate": 7.748955107704283e-06, "loss": 0.0342, "step": 36200 }, { "epoch": 1.83791055383522, "grad_norm": 0.7937986254692078, "learning_rate": 7.747262974431867e-06, "loss": 0.0396, "step": 36205 }, { "epoch": 1.8381643738260824, "grad_norm": 0.5486979484558105, "learning_rate": 7.74557084115945e-06, "loss": 0.0402, "step": 36210 }, { "epoch": 1.838418193816945, "grad_norm": 0.31487876176834106, "learning_rate": 7.743878707887034e-06, "loss": 0.0542, "step": 36215 }, { "epoch": 1.8386720138078076, "grad_norm": 0.39621078968048096, "learning_rate": 7.742186574614617e-06, "loss": 0.0357, "step": 36220 }, { "epoch": 1.83892583379867, "grad_norm": 0.4235934913158417, "learning_rate": 7.7404944413422e-06, "loss": 0.0446, "step": 36225 }, { "epoch": 1.8391796537895324, "grad_norm": 0.30609992146492004, "learning_rate": 7.738802308069784e-06, "loss": 0.0413, "step": 36230 }, { "epoch": 1.839433473780395, "grad_norm": 0.47948646545410156, "learning_rate": 7.737110174797368e-06, "loss": 0.0413, "step": 36235 }, { "epoch": 1.8396872937712574, "grad_norm": 0.34571924805641174, "learning_rate": 7.735418041524951e-06, "loss": 0.0296, "step": 36240 }, { "epoch": 1.8399411137621198, "grad_norm": 0.6670829653739929, "learning_rate": 7.733725908252535e-06, "loss": 0.0469, "step": 36245 }, { "epoch": 1.8401949337529824, "grad_norm": 0.29783716797828674, "learning_rate": 7.732033774980119e-06, "loss": 0.0395, "step": 36250 }, { "epoch": 1.840448753743845, "grad_norm": 0.47688236832618713, "learning_rate": 7.730341641707702e-06, "loss": 0.0398, "step": 36255 }, { "epoch": 1.8407025737347074, "grad_norm": 0.278981477022171, "learning_rate": 7.728649508435286e-06, "loss": 0.0375, "step": 36260 }, { "epoch": 1.8409563937255697, "grad_norm": 0.3316904306411743, "learning_rate": 7.726957375162867e-06, "loss": 0.0416, "step": 36265 }, { "epoch": 1.8412102137164323, "grad_norm": 0.33688676357269287, "learning_rate": 7.725265241890451e-06, "loss": 0.0347, "step": 36270 }, { "epoch": 1.841464033707295, "grad_norm": 0.37452802062034607, "learning_rate": 7.723573108618036e-06, "loss": 0.0481, "step": 36275 }, { "epoch": 1.841717853698157, "grad_norm": 0.41721731424331665, "learning_rate": 7.72188097534562e-06, "loss": 0.0358, "step": 36280 }, { "epoch": 1.8419716736890197, "grad_norm": 0.2928161323070526, "learning_rate": 7.720188842073202e-06, "loss": 0.035, "step": 36285 }, { "epoch": 1.8422254936798823, "grad_norm": 0.4405878186225891, "learning_rate": 7.718496708800785e-06, "loss": 0.0356, "step": 36290 }, { "epoch": 1.8424793136707447, "grad_norm": 0.23799636960029602, "learning_rate": 7.716804575528369e-06, "loss": 0.0364, "step": 36295 }, { "epoch": 1.842733133661607, "grad_norm": 0.3568035960197449, "learning_rate": 7.715112442255954e-06, "loss": 0.0375, "step": 36300 }, { "epoch": 1.8429869536524697, "grad_norm": 0.4410528540611267, "learning_rate": 7.713420308983536e-06, "loss": 0.0423, "step": 36305 }, { "epoch": 1.8432407736433323, "grad_norm": 0.30826106667518616, "learning_rate": 7.71172817571112e-06, "loss": 0.0357, "step": 36310 }, { "epoch": 1.8434945936341947, "grad_norm": 0.9397932887077332, "learning_rate": 7.710036042438703e-06, "loss": 0.0384, "step": 36315 }, { "epoch": 1.843748413625057, "grad_norm": 0.4034275412559509, "learning_rate": 7.708343909166286e-06, "loss": 0.0408, "step": 36320 }, { "epoch": 1.8440022336159196, "grad_norm": 0.29389598965644836, "learning_rate": 7.70665177589387e-06, "loss": 0.0356, "step": 36325 }, { "epoch": 1.844256053606782, "grad_norm": 0.3444618284702301, "learning_rate": 7.704959642621454e-06, "loss": 0.0347, "step": 36330 }, { "epoch": 1.8445098735976444, "grad_norm": 0.49377360939979553, "learning_rate": 7.703267509349037e-06, "loss": 0.0443, "step": 36335 }, { "epoch": 1.844763693588507, "grad_norm": 0.2680582106113434, "learning_rate": 7.70157537607662e-06, "loss": 0.0426, "step": 36340 }, { "epoch": 1.8450175135793696, "grad_norm": 0.316648006439209, "learning_rate": 7.699883242804204e-06, "loss": 0.037, "step": 36345 }, { "epoch": 1.845271333570232, "grad_norm": 0.5903269648551941, "learning_rate": 7.698191109531788e-06, "loss": 0.0419, "step": 36350 }, { "epoch": 1.8455251535610944, "grad_norm": 0.35091760754585266, "learning_rate": 7.696498976259371e-06, "loss": 0.0401, "step": 36355 }, { "epoch": 1.845778973551957, "grad_norm": 0.2766816318035126, "learning_rate": 7.694806842986953e-06, "loss": 0.0362, "step": 36360 }, { "epoch": 1.8460327935428196, "grad_norm": 0.38903939723968506, "learning_rate": 7.693114709714538e-06, "loss": 0.0435, "step": 36365 }, { "epoch": 1.846286613533682, "grad_norm": 0.5219350457191467, "learning_rate": 7.691422576442122e-06, "loss": 0.0498, "step": 36370 }, { "epoch": 1.8465404335245443, "grad_norm": 0.26614704728126526, "learning_rate": 7.689730443169705e-06, "loss": 0.0383, "step": 36375 }, { "epoch": 1.846794253515407, "grad_norm": 0.27631068229675293, "learning_rate": 7.688038309897287e-06, "loss": 0.0356, "step": 36380 }, { "epoch": 1.8470480735062693, "grad_norm": 0.3098543584346771, "learning_rate": 7.68634617662487e-06, "loss": 0.0387, "step": 36385 }, { "epoch": 1.8473018934971317, "grad_norm": 0.4202713966369629, "learning_rate": 7.684654043352456e-06, "loss": 0.0392, "step": 36390 }, { "epoch": 1.8475557134879943, "grad_norm": 0.36861708760261536, "learning_rate": 7.68296191008004e-06, "loss": 0.0441, "step": 36395 }, { "epoch": 1.847809533478857, "grad_norm": 0.41563522815704346, "learning_rate": 7.681269776807621e-06, "loss": 0.0439, "step": 36400 }, { "epoch": 1.8480633534697193, "grad_norm": 0.3617202341556549, "learning_rate": 7.679577643535205e-06, "loss": 0.0337, "step": 36405 }, { "epoch": 1.8483171734605817, "grad_norm": 1.1566804647445679, "learning_rate": 7.677885510262789e-06, "loss": 0.0361, "step": 36410 }, { "epoch": 1.8485709934514443, "grad_norm": 0.33016854524612427, "learning_rate": 7.676193376990372e-06, "loss": 0.0341, "step": 36415 }, { "epoch": 1.8488248134423069, "grad_norm": 0.2854905426502228, "learning_rate": 7.674501243717956e-06, "loss": 0.0459, "step": 36420 }, { "epoch": 1.8490786334331692, "grad_norm": 0.3764021694660187, "learning_rate": 7.67280911044554e-06, "loss": 0.0426, "step": 36425 }, { "epoch": 1.8493324534240316, "grad_norm": 0.42459553480148315, "learning_rate": 7.671116977173123e-06, "loss": 0.0445, "step": 36430 }, { "epoch": 1.8495862734148942, "grad_norm": 0.3755717873573303, "learning_rate": 7.669424843900706e-06, "loss": 0.0364, "step": 36435 }, { "epoch": 1.8498400934057566, "grad_norm": 0.3747606575489044, "learning_rate": 7.66773271062829e-06, "loss": 0.0388, "step": 36440 }, { "epoch": 1.850093913396619, "grad_norm": 0.33696579933166504, "learning_rate": 7.666040577355873e-06, "loss": 0.0309, "step": 36445 }, { "epoch": 1.8503477333874816, "grad_norm": 0.37529999017715454, "learning_rate": 7.664348444083457e-06, "loss": 0.0383, "step": 36450 }, { "epoch": 1.8506015533783442, "grad_norm": 0.325459361076355, "learning_rate": 7.66265631081104e-06, "loss": 0.0422, "step": 36455 }, { "epoch": 1.8508553733692066, "grad_norm": 0.33724257349967957, "learning_rate": 7.660964177538624e-06, "loss": 0.0386, "step": 36460 }, { "epoch": 1.851109193360069, "grad_norm": 0.6354991793632507, "learning_rate": 7.659272044266208e-06, "loss": 0.0389, "step": 36465 }, { "epoch": 1.8513630133509316, "grad_norm": 0.5462630391120911, "learning_rate": 7.657579910993791e-06, "loss": 0.0371, "step": 36470 }, { "epoch": 1.8516168333417942, "grad_norm": 0.3498347997665405, "learning_rate": 7.655887777721373e-06, "loss": 0.0401, "step": 36475 }, { "epoch": 1.8518706533326563, "grad_norm": 0.3156206011772156, "learning_rate": 7.654195644448958e-06, "loss": 0.0364, "step": 36480 }, { "epoch": 1.852124473323519, "grad_norm": 0.3194058835506439, "learning_rate": 7.652503511176542e-06, "loss": 0.0408, "step": 36485 }, { "epoch": 1.8523782933143815, "grad_norm": 0.629120409488678, "learning_rate": 7.650811377904125e-06, "loss": 0.0372, "step": 36490 }, { "epoch": 1.852632113305244, "grad_norm": 0.6228652000427246, "learning_rate": 7.649119244631707e-06, "loss": 0.0449, "step": 36495 }, { "epoch": 1.8528859332961063, "grad_norm": 0.3349374532699585, "learning_rate": 7.64742711135929e-06, "loss": 0.0402, "step": 36500 }, { "epoch": 1.853139753286969, "grad_norm": 0.46309956908226013, "learning_rate": 7.645734978086874e-06, "loss": 0.0352, "step": 36505 }, { "epoch": 1.8533935732778315, "grad_norm": 0.49359285831451416, "learning_rate": 7.644042844814458e-06, "loss": 0.0374, "step": 36510 }, { "epoch": 1.8536473932686939, "grad_norm": 0.3246408700942993, "learning_rate": 7.642350711542041e-06, "loss": 0.0414, "step": 36515 }, { "epoch": 1.8539012132595563, "grad_norm": 0.41470158100128174, "learning_rate": 7.640658578269625e-06, "loss": 0.0425, "step": 36520 }, { "epoch": 1.8541550332504189, "grad_norm": 0.28343310952186584, "learning_rate": 7.638966444997208e-06, "loss": 0.0439, "step": 36525 }, { "epoch": 1.8544088532412812, "grad_norm": 0.3936581015586853, "learning_rate": 7.637274311724792e-06, "loss": 0.0375, "step": 36530 }, { "epoch": 1.8546626732321436, "grad_norm": 0.4883519411087036, "learning_rate": 7.635582178452375e-06, "loss": 0.0387, "step": 36535 }, { "epoch": 1.8549164932230062, "grad_norm": 0.368341863155365, "learning_rate": 7.633890045179959e-06, "loss": 0.0373, "step": 36540 }, { "epoch": 1.8551703132138688, "grad_norm": 0.3583613336086273, "learning_rate": 7.632197911907543e-06, "loss": 0.0414, "step": 36545 }, { "epoch": 1.8554241332047312, "grad_norm": 0.36319252848625183, "learning_rate": 7.630505778635126e-06, "loss": 0.0394, "step": 36550 }, { "epoch": 1.8556779531955936, "grad_norm": 0.5051521062850952, "learning_rate": 7.62881364536271e-06, "loss": 0.05, "step": 36555 }, { "epoch": 1.8559317731864562, "grad_norm": 0.357225239276886, "learning_rate": 7.627121512090293e-06, "loss": 0.0406, "step": 36560 }, { "epoch": 1.8561855931773188, "grad_norm": 0.339259535074234, "learning_rate": 7.625429378817877e-06, "loss": 0.0478, "step": 36565 }, { "epoch": 1.8564394131681812, "grad_norm": 0.36899054050445557, "learning_rate": 7.6237372455454594e-06, "loss": 0.0323, "step": 36570 }, { "epoch": 1.8566932331590436, "grad_norm": 0.3415074944496155, "learning_rate": 7.622045112273043e-06, "loss": 0.038, "step": 36575 }, { "epoch": 1.8569470531499062, "grad_norm": 0.39684244990348816, "learning_rate": 7.620352979000627e-06, "loss": 0.0445, "step": 36580 }, { "epoch": 1.8572008731407685, "grad_norm": 0.33541497588157654, "learning_rate": 7.618660845728209e-06, "loss": 0.038, "step": 36585 }, { "epoch": 1.857454693131631, "grad_norm": 0.37271255254745483, "learning_rate": 7.616968712455794e-06, "loss": 0.0448, "step": 36590 }, { "epoch": 1.8577085131224935, "grad_norm": 0.3162631690502167, "learning_rate": 7.615276579183377e-06, "loss": 0.0317, "step": 36595 }, { "epoch": 1.8579623331133561, "grad_norm": 0.3193531036376953, "learning_rate": 7.613584445910961e-06, "loss": 0.0359, "step": 36600 }, { "epoch": 1.8582161531042185, "grad_norm": 0.4613184332847595, "learning_rate": 7.611892312638543e-06, "loss": 0.0466, "step": 36605 }, { "epoch": 1.8584699730950809, "grad_norm": 0.2512896955013275, "learning_rate": 7.610200179366127e-06, "loss": 0.0356, "step": 36610 }, { "epoch": 1.8587237930859435, "grad_norm": 0.44435155391693115, "learning_rate": 7.608508046093711e-06, "loss": 0.0355, "step": 36615 }, { "epoch": 1.858977613076806, "grad_norm": 0.2987118661403656, "learning_rate": 7.606815912821295e-06, "loss": 0.0478, "step": 36620 }, { "epoch": 1.8592314330676682, "grad_norm": 0.33937567472457886, "learning_rate": 7.6051237795488776e-06, "loss": 0.0426, "step": 36625 }, { "epoch": 1.8594852530585309, "grad_norm": 0.3043089509010315, "learning_rate": 7.603431646276461e-06, "loss": 0.0376, "step": 36630 }, { "epoch": 1.8597390730493935, "grad_norm": 0.5690452456474304, "learning_rate": 7.601739513004045e-06, "loss": 0.0451, "step": 36635 }, { "epoch": 1.8599928930402558, "grad_norm": 0.5804987549781799, "learning_rate": 7.600047379731629e-06, "loss": 0.0395, "step": 36640 }, { "epoch": 1.8602467130311182, "grad_norm": 0.37388941645622253, "learning_rate": 7.598355246459212e-06, "loss": 0.036, "step": 36645 }, { "epoch": 1.8605005330219808, "grad_norm": 0.5256741642951965, "learning_rate": 7.596663113186795e-06, "loss": 0.0372, "step": 36650 }, { "epoch": 1.8607543530128434, "grad_norm": 0.30373093485832214, "learning_rate": 7.594970979914379e-06, "loss": 0.0331, "step": 36655 }, { "epoch": 1.8610081730037058, "grad_norm": 0.3162570893764496, "learning_rate": 7.593278846641962e-06, "loss": 0.0348, "step": 36660 }, { "epoch": 1.8612619929945682, "grad_norm": 0.24273575842380524, "learning_rate": 7.591586713369545e-06, "loss": 0.0488, "step": 36665 }, { "epoch": 1.8615158129854308, "grad_norm": 0.4897520840167999, "learning_rate": 7.5898945800971295e-06, "loss": 0.0478, "step": 36670 }, { "epoch": 1.8617696329762932, "grad_norm": 0.3911951780319214, "learning_rate": 7.588202446824713e-06, "loss": 0.0327, "step": 36675 }, { "epoch": 1.8620234529671555, "grad_norm": 0.47243648767471313, "learning_rate": 7.586510313552296e-06, "loss": 0.0383, "step": 36680 }, { "epoch": 1.8622772729580181, "grad_norm": 0.27408725023269653, "learning_rate": 7.584818180279879e-06, "loss": 0.0384, "step": 36685 }, { "epoch": 1.8625310929488808, "grad_norm": 0.34124070405960083, "learning_rate": 7.583126047007463e-06, "loss": 0.0363, "step": 36690 }, { "epoch": 1.8627849129397431, "grad_norm": 0.49087390303611755, "learning_rate": 7.581433913735047e-06, "loss": 0.0414, "step": 36695 }, { "epoch": 1.8630387329306055, "grad_norm": 0.4342269003391266, "learning_rate": 7.579741780462629e-06, "loss": 0.0321, "step": 36700 }, { "epoch": 1.8632925529214681, "grad_norm": 0.26301121711730957, "learning_rate": 7.5780496471902134e-06, "loss": 0.0372, "step": 36705 }, { "epoch": 1.8635463729123307, "grad_norm": 0.543662428855896, "learning_rate": 7.576357513917797e-06, "loss": 0.049, "step": 36710 }, { "epoch": 1.863800192903193, "grad_norm": 0.3795504570007324, "learning_rate": 7.5746653806453805e-06, "loss": 0.0369, "step": 36715 }, { "epoch": 1.8640540128940555, "grad_norm": 0.28618723154067993, "learning_rate": 7.572973247372963e-06, "loss": 0.0373, "step": 36720 }, { "epoch": 1.864307832884918, "grad_norm": 0.33548107743263245, "learning_rate": 7.571281114100547e-06, "loss": 0.0357, "step": 36725 }, { "epoch": 1.8645616528757805, "grad_norm": 0.2927003502845764, "learning_rate": 7.569588980828131e-06, "loss": 0.0354, "step": 36730 }, { "epoch": 1.8648154728666428, "grad_norm": 0.24474462866783142, "learning_rate": 7.567896847555715e-06, "loss": 0.0307, "step": 36735 }, { "epoch": 1.8650692928575054, "grad_norm": 0.2846127152442932, "learning_rate": 7.566204714283297e-06, "loss": 0.0343, "step": 36740 }, { "epoch": 1.865323112848368, "grad_norm": 0.36875009536743164, "learning_rate": 7.564512581010881e-06, "loss": 0.0402, "step": 36745 }, { "epoch": 1.8655769328392304, "grad_norm": 0.36819466948509216, "learning_rate": 7.5628204477384645e-06, "loss": 0.0337, "step": 36750 }, { "epoch": 1.8658307528300928, "grad_norm": 0.2949831187725067, "learning_rate": 7.561128314466047e-06, "loss": 0.0385, "step": 36755 }, { "epoch": 1.8660845728209554, "grad_norm": 0.41179198026657104, "learning_rate": 7.5594361811936316e-06, "loss": 0.0316, "step": 36760 }, { "epoch": 1.866338392811818, "grad_norm": 0.4023495316505432, "learning_rate": 7.557744047921215e-06, "loss": 0.0421, "step": 36765 }, { "epoch": 1.8665922128026802, "grad_norm": 0.5222672820091248, "learning_rate": 7.556051914648799e-06, "loss": 0.0393, "step": 36770 }, { "epoch": 1.8668460327935428, "grad_norm": 0.3291333317756653, "learning_rate": 7.554359781376381e-06, "loss": 0.0345, "step": 36775 }, { "epoch": 1.8670998527844054, "grad_norm": 0.4046681225299835, "learning_rate": 7.552667648103965e-06, "loss": 0.04, "step": 36780 }, { "epoch": 1.8673536727752678, "grad_norm": 0.2864059507846832, "learning_rate": 7.550975514831549e-06, "loss": 0.0417, "step": 36785 }, { "epoch": 1.8676074927661301, "grad_norm": 0.3820878565311432, "learning_rate": 7.549283381559133e-06, "loss": 0.0376, "step": 36790 }, { "epoch": 1.8678613127569927, "grad_norm": 0.3426235020160675, "learning_rate": 7.5475912482867155e-06, "loss": 0.0376, "step": 36795 }, { "epoch": 1.8681151327478553, "grad_norm": 0.43335989117622375, "learning_rate": 7.545899115014299e-06, "loss": 0.0337, "step": 36800 }, { "epoch": 1.8683689527387177, "grad_norm": 0.5109525918960571, "learning_rate": 7.544206981741883e-06, "loss": 0.0363, "step": 36805 }, { "epoch": 1.86862277272958, "grad_norm": 0.3855689465999603, "learning_rate": 7.542514848469466e-06, "loss": 0.0343, "step": 36810 }, { "epoch": 1.8688765927204427, "grad_norm": 0.584434986114502, "learning_rate": 7.540822715197049e-06, "loss": 0.0342, "step": 36815 }, { "epoch": 1.869130412711305, "grad_norm": 1.0270673036575317, "learning_rate": 7.539130581924633e-06, "loss": 0.0351, "step": 36820 }, { "epoch": 1.8693842327021675, "grad_norm": 0.31905609369277954, "learning_rate": 7.537438448652217e-06, "loss": 0.0461, "step": 36825 }, { "epoch": 1.86963805269303, "grad_norm": 0.244539275765419, "learning_rate": 7.5357463153797995e-06, "loss": 0.0428, "step": 36830 }, { "epoch": 1.8698918726838927, "grad_norm": 0.23592790961265564, "learning_rate": 7.534054182107383e-06, "loss": 0.0348, "step": 36835 }, { "epoch": 1.870145692674755, "grad_norm": 0.28931909799575806, "learning_rate": 7.532362048834967e-06, "loss": 0.0474, "step": 36840 }, { "epoch": 1.8703995126656174, "grad_norm": 1.2063060998916626, "learning_rate": 7.530669915562551e-06, "loss": 0.0409, "step": 36845 }, { "epoch": 1.87065333265648, "grad_norm": 1.4404006004333496, "learning_rate": 7.528977782290134e-06, "loss": 0.0443, "step": 36850 }, { "epoch": 1.8709071526473426, "grad_norm": 0.49859338998794556, "learning_rate": 7.527285649017717e-06, "loss": 0.0356, "step": 36855 }, { "epoch": 1.871160972638205, "grad_norm": 0.23417167365550995, "learning_rate": 7.525593515745301e-06, "loss": 0.0371, "step": 36860 }, { "epoch": 1.8714147926290674, "grad_norm": 0.38218954205513, "learning_rate": 7.523901382472884e-06, "loss": 0.0391, "step": 36865 }, { "epoch": 1.87166861261993, "grad_norm": 0.2729610800743103, "learning_rate": 7.522209249200467e-06, "loss": 0.0422, "step": 36870 }, { "epoch": 1.8719224326107924, "grad_norm": 0.2917572855949402, "learning_rate": 7.520517115928051e-06, "loss": 0.0465, "step": 36875 }, { "epoch": 1.8721762526016548, "grad_norm": 0.4478292167186737, "learning_rate": 7.518824982655635e-06, "loss": 0.0403, "step": 36880 }, { "epoch": 1.8724300725925174, "grad_norm": 0.3274686634540558, "learning_rate": 7.5171328493832185e-06, "loss": 0.037, "step": 36885 }, { "epoch": 1.87268389258338, "grad_norm": 0.19864174723625183, "learning_rate": 7.515440716110801e-06, "loss": 0.0332, "step": 36890 }, { "epoch": 1.8729377125742424, "grad_norm": 0.44155529141426086, "learning_rate": 7.513748582838385e-06, "loss": 0.0431, "step": 36895 }, { "epoch": 1.8731915325651047, "grad_norm": 0.3078289031982422, "learning_rate": 7.512056449565968e-06, "loss": 0.0346, "step": 36900 }, { "epoch": 1.8734453525559673, "grad_norm": 0.3366737365722656, "learning_rate": 7.510364316293553e-06, "loss": 0.0356, "step": 36905 }, { "epoch": 1.87369917254683, "grad_norm": 0.35496756434440613, "learning_rate": 7.508672183021135e-06, "loss": 0.0482, "step": 36910 }, { "epoch": 1.8739529925376923, "grad_norm": 0.3293476700782776, "learning_rate": 7.506980049748719e-06, "loss": 0.0388, "step": 36915 }, { "epoch": 1.8742068125285547, "grad_norm": 0.30580469965934753, "learning_rate": 7.5052879164763025e-06, "loss": 0.0437, "step": 36920 }, { "epoch": 1.8744606325194173, "grad_norm": 0.28655534982681274, "learning_rate": 7.503595783203885e-06, "loss": 0.0531, "step": 36925 }, { "epoch": 1.8747144525102797, "grad_norm": 0.33288678526878357, "learning_rate": 7.501903649931469e-06, "loss": 0.0331, "step": 36930 }, { "epoch": 1.874968272501142, "grad_norm": 0.3044753670692444, "learning_rate": 7.500211516659053e-06, "loss": 0.0472, "step": 36935 }, { "epoch": 1.8752220924920047, "grad_norm": 0.3109078109264374, "learning_rate": 7.498519383386637e-06, "loss": 0.0414, "step": 36940 }, { "epoch": 1.8754759124828673, "grad_norm": 0.40895843505859375, "learning_rate": 7.496827250114219e-06, "loss": 0.0332, "step": 36945 }, { "epoch": 1.8757297324737296, "grad_norm": 0.4430285096168518, "learning_rate": 7.495135116841803e-06, "loss": 0.0465, "step": 36950 }, { "epoch": 1.875983552464592, "grad_norm": 0.48985281586647034, "learning_rate": 7.4934429835693864e-06, "loss": 0.0374, "step": 36955 }, { "epoch": 1.8762373724554546, "grad_norm": 0.3518620729446411, "learning_rate": 7.491750850296971e-06, "loss": 0.0314, "step": 36960 }, { "epoch": 1.8764911924463172, "grad_norm": 0.3940987288951874, "learning_rate": 7.4900587170245535e-06, "loss": 0.0288, "step": 36965 }, { "epoch": 1.8767450124371794, "grad_norm": 0.2915908694267273, "learning_rate": 7.488366583752137e-06, "loss": 0.0365, "step": 36970 }, { "epoch": 1.876998832428042, "grad_norm": 0.2620247006416321, "learning_rate": 7.486674450479721e-06, "loss": 0.0432, "step": 36975 }, { "epoch": 1.8772526524189046, "grad_norm": 0.34094831347465515, "learning_rate": 7.484982317207304e-06, "loss": 0.0345, "step": 36980 }, { "epoch": 1.877506472409767, "grad_norm": 0.2771894037723541, "learning_rate": 7.483290183934887e-06, "loss": 0.0347, "step": 36985 }, { "epoch": 1.8777602924006294, "grad_norm": 0.33156320452690125, "learning_rate": 7.48159805066247e-06, "loss": 0.0369, "step": 36990 }, { "epoch": 1.878014112391492, "grad_norm": 0.5050420761108398, "learning_rate": 7.479905917390055e-06, "loss": 0.0445, "step": 36995 }, { "epoch": 1.8782679323823546, "grad_norm": 0.29895123839378357, "learning_rate": 7.4782137841176375e-06, "loss": 0.0419, "step": 37000 }, { "epoch": 1.878521752373217, "grad_norm": 0.27417564392089844, "learning_rate": 7.476521650845221e-06, "loss": 0.0425, "step": 37005 }, { "epoch": 1.8787755723640793, "grad_norm": 0.5192016959190369, "learning_rate": 7.4748295175728046e-06, "loss": 0.0447, "step": 37010 }, { "epoch": 1.879029392354942, "grad_norm": 0.25949370861053467, "learning_rate": 7.473137384300388e-06, "loss": 0.025, "step": 37015 }, { "epoch": 1.8792832123458043, "grad_norm": 0.44494354724884033, "learning_rate": 7.471445251027971e-06, "loss": 0.0337, "step": 37020 }, { "epoch": 1.8795370323366667, "grad_norm": 0.28168588876724243, "learning_rate": 7.469753117755555e-06, "loss": 0.0419, "step": 37025 }, { "epoch": 1.8797908523275293, "grad_norm": 0.2574255168437958, "learning_rate": 7.468060984483139e-06, "loss": 0.0363, "step": 37030 }, { "epoch": 1.880044672318392, "grad_norm": 0.40726467967033386, "learning_rate": 7.466368851210722e-06, "loss": 0.0407, "step": 37035 }, { "epoch": 1.8802984923092543, "grad_norm": 0.3467763364315033, "learning_rate": 7.464676717938305e-06, "loss": 0.0385, "step": 37040 }, { "epoch": 1.8805523123001167, "grad_norm": 0.5550787448883057, "learning_rate": 7.4629845846658885e-06, "loss": 0.0402, "step": 37045 }, { "epoch": 1.8808061322909793, "grad_norm": 0.28454452753067017, "learning_rate": 7.461292451393473e-06, "loss": 0.0379, "step": 37050 }, { "epoch": 1.8810599522818419, "grad_norm": 0.4483475387096405, "learning_rate": 7.4596003181210565e-06, "loss": 0.0384, "step": 37055 }, { "epoch": 1.8813137722727042, "grad_norm": 0.4282529652118683, "learning_rate": 7.457908184848639e-06, "loss": 0.0336, "step": 37060 }, { "epoch": 1.8815675922635666, "grad_norm": 0.532959520816803, "learning_rate": 7.456216051576223e-06, "loss": 0.0462, "step": 37065 }, { "epoch": 1.8818214122544292, "grad_norm": 0.2977970242500305, "learning_rate": 7.454523918303806e-06, "loss": 0.0372, "step": 37070 }, { "epoch": 1.8820752322452916, "grad_norm": 0.2505723536014557, "learning_rate": 7.452831785031389e-06, "loss": 0.0375, "step": 37075 }, { "epoch": 1.882329052236154, "grad_norm": 0.26401594281196594, "learning_rate": 7.4511396517589725e-06, "loss": 0.0342, "step": 37080 }, { "epoch": 1.8825828722270166, "grad_norm": 0.5874828100204468, "learning_rate": 7.449447518486557e-06, "loss": 0.0385, "step": 37085 }, { "epoch": 1.8828366922178792, "grad_norm": 0.33207279443740845, "learning_rate": 7.4477553852141404e-06, "loss": 0.0335, "step": 37090 }, { "epoch": 1.8830905122087416, "grad_norm": 0.4250926077365875, "learning_rate": 7.446063251941723e-06, "loss": 0.0387, "step": 37095 }, { "epoch": 1.883344332199604, "grad_norm": 0.2815552353858948, "learning_rate": 7.444371118669307e-06, "loss": 0.0383, "step": 37100 }, { "epoch": 1.8835981521904666, "grad_norm": 1.2461798191070557, "learning_rate": 7.44267898539689e-06, "loss": 0.0413, "step": 37105 }, { "epoch": 1.8838519721813292, "grad_norm": 0.4171600341796875, "learning_rate": 7.440986852124475e-06, "loss": 0.0392, "step": 37110 }, { "epoch": 1.8841057921721913, "grad_norm": 0.8906102180480957, "learning_rate": 7.439294718852057e-06, "loss": 0.0302, "step": 37115 }, { "epoch": 1.884359612163054, "grad_norm": 0.6476141214370728, "learning_rate": 7.437602585579641e-06, "loss": 0.0388, "step": 37120 }, { "epoch": 1.8846134321539165, "grad_norm": 0.40726473927497864, "learning_rate": 7.435910452307224e-06, "loss": 0.0429, "step": 37125 }, { "epoch": 1.884867252144779, "grad_norm": 0.47000131011009216, "learning_rate": 7.434218319034808e-06, "loss": 0.0444, "step": 37130 }, { "epoch": 1.8851210721356413, "grad_norm": 0.27233800292015076, "learning_rate": 7.432526185762391e-06, "loss": 0.0351, "step": 37135 }, { "epoch": 1.8853748921265039, "grad_norm": 0.3793398141860962, "learning_rate": 7.430834052489975e-06, "loss": 0.0375, "step": 37140 }, { "epoch": 1.8856287121173665, "grad_norm": 0.5338091850280762, "learning_rate": 7.429141919217559e-06, "loss": 0.0372, "step": 37145 }, { "epoch": 1.8858825321082289, "grad_norm": 0.3071553111076355, "learning_rate": 7.427449785945141e-06, "loss": 0.0355, "step": 37150 }, { "epoch": 1.8861363520990913, "grad_norm": 0.39112332463264465, "learning_rate": 7.425757652672725e-06, "loss": 0.0372, "step": 37155 }, { "epoch": 1.8863901720899539, "grad_norm": 1.7440931797027588, "learning_rate": 7.424065519400308e-06, "loss": 0.0374, "step": 37160 }, { "epoch": 1.8866439920808162, "grad_norm": 0.42379260063171387, "learning_rate": 7.422373386127893e-06, "loss": 0.0429, "step": 37165 }, { "epoch": 1.8868978120716786, "grad_norm": 0.2365400493144989, "learning_rate": 7.420681252855475e-06, "loss": 0.0399, "step": 37170 }, { "epoch": 1.8871516320625412, "grad_norm": 0.29219627380371094, "learning_rate": 7.418989119583059e-06, "loss": 0.034, "step": 37175 }, { "epoch": 1.8874054520534038, "grad_norm": 0.42391523718833923, "learning_rate": 7.4172969863106425e-06, "loss": 0.0434, "step": 37180 }, { "epoch": 1.8876592720442662, "grad_norm": 0.7317461967468262, "learning_rate": 7.415604853038226e-06, "loss": 0.0409, "step": 37185 }, { "epoch": 1.8879130920351286, "grad_norm": 0.3624744713306427, "learning_rate": 7.413912719765809e-06, "loss": 0.0398, "step": 37190 }, { "epoch": 1.8881669120259912, "grad_norm": 0.24236305058002472, "learning_rate": 7.412220586493392e-06, "loss": 0.03, "step": 37195 }, { "epoch": 1.8884207320168538, "grad_norm": 0.3188173174858093, "learning_rate": 7.410528453220977e-06, "loss": 0.0333, "step": 37200 }, { "epoch": 1.8886745520077162, "grad_norm": 0.3659869432449341, "learning_rate": 7.40883631994856e-06, "loss": 0.0348, "step": 37205 }, { "epoch": 1.8889283719985785, "grad_norm": 0.4334191679954529, "learning_rate": 7.407144186676143e-06, "loss": 0.0283, "step": 37210 }, { "epoch": 1.8891821919894412, "grad_norm": 0.2886342406272888, "learning_rate": 7.4054520534037265e-06, "loss": 0.0384, "step": 37215 }, { "epoch": 1.8894360119803035, "grad_norm": 0.5727047324180603, "learning_rate": 7.40375992013131e-06, "loss": 0.0387, "step": 37220 }, { "epoch": 1.889689831971166, "grad_norm": 0.2886624336242676, "learning_rate": 7.4020677868588944e-06, "loss": 0.0333, "step": 37225 }, { "epoch": 1.8899436519620285, "grad_norm": 0.9751167893409729, "learning_rate": 7.400375653586477e-06, "loss": 0.043, "step": 37230 }, { "epoch": 1.8901974719528911, "grad_norm": 0.3085179030895233, "learning_rate": 7.398683520314061e-06, "loss": 0.0424, "step": 37235 }, { "epoch": 1.8904512919437535, "grad_norm": 0.29897645115852356, "learning_rate": 7.396991387041644e-06, "loss": 0.0327, "step": 37240 }, { "epoch": 1.8907051119346159, "grad_norm": 0.346880167722702, "learning_rate": 7.395299253769227e-06, "loss": 0.0367, "step": 37245 }, { "epoch": 1.8909589319254785, "grad_norm": 0.4613039195537567, "learning_rate": 7.3936071204968105e-06, "loss": 0.0449, "step": 37250 }, { "epoch": 1.891212751916341, "grad_norm": 0.3410487473011017, "learning_rate": 7.391914987224395e-06, "loss": 0.0376, "step": 37255 }, { "epoch": 1.8914665719072035, "grad_norm": 0.2915606200695038, "learning_rate": 7.390222853951978e-06, "loss": 0.0431, "step": 37260 }, { "epoch": 1.8917203918980658, "grad_norm": 0.48398151993751526, "learning_rate": 7.388530720679561e-06, "loss": 0.029, "step": 37265 }, { "epoch": 1.8919742118889284, "grad_norm": 0.4111987352371216, "learning_rate": 7.386838587407145e-06, "loss": 0.0377, "step": 37270 }, { "epoch": 1.8922280318797908, "grad_norm": 0.3373020887374878, "learning_rate": 7.385146454134728e-06, "loss": 0.0384, "step": 37275 }, { "epoch": 1.8924818518706532, "grad_norm": 0.8100658655166626, "learning_rate": 7.383454320862313e-06, "loss": 0.0403, "step": 37280 }, { "epoch": 1.8927356718615158, "grad_norm": 0.3214988708496094, "learning_rate": 7.3817621875898944e-06, "loss": 0.0343, "step": 37285 }, { "epoch": 1.8929894918523784, "grad_norm": 0.5179448127746582, "learning_rate": 7.380070054317479e-06, "loss": 0.0385, "step": 37290 }, { "epoch": 1.8932433118432408, "grad_norm": 0.28286972641944885, "learning_rate": 7.378377921045062e-06, "loss": 0.034, "step": 37295 }, { "epoch": 1.8934971318341032, "grad_norm": 0.28703486919403076, "learning_rate": 7.376685787772646e-06, "loss": 0.0351, "step": 37300 }, { "epoch": 1.8937509518249658, "grad_norm": 0.23158909380435944, "learning_rate": 7.374993654500229e-06, "loss": 0.0448, "step": 37305 }, { "epoch": 1.8940047718158284, "grad_norm": 0.29262420535087585, "learning_rate": 7.373301521227812e-06, "loss": 0.0383, "step": 37310 }, { "epoch": 1.8942585918066905, "grad_norm": 0.3622038662433624, "learning_rate": 7.3716093879553965e-06, "loss": 0.0384, "step": 37315 }, { "epoch": 1.8945124117975531, "grad_norm": 0.3091678023338318, "learning_rate": 7.369917254682979e-06, "loss": 0.029, "step": 37320 }, { "epoch": 1.8947662317884157, "grad_norm": 0.4038952887058258, "learning_rate": 7.368225121410563e-06, "loss": 0.0333, "step": 37325 }, { "epoch": 1.8950200517792781, "grad_norm": 0.45298081636428833, "learning_rate": 7.366532988138146e-06, "loss": 0.0381, "step": 37330 }, { "epoch": 1.8952738717701405, "grad_norm": 0.3163958191871643, "learning_rate": 7.36484085486573e-06, "loss": 0.0391, "step": 37335 }, { "epoch": 1.895527691761003, "grad_norm": 0.34478557109832764, "learning_rate": 7.363148721593313e-06, "loss": 0.0322, "step": 37340 }, { "epoch": 1.8957815117518657, "grad_norm": 1.6724156141281128, "learning_rate": 7.361456588320897e-06, "loss": 0.0409, "step": 37345 }, { "epoch": 1.896035331742728, "grad_norm": 0.20028600096702576, "learning_rate": 7.3597644550484805e-06, "loss": 0.0356, "step": 37350 }, { "epoch": 1.8962891517335905, "grad_norm": 0.31665441393852234, "learning_rate": 7.358072321776064e-06, "loss": 0.0409, "step": 37355 }, { "epoch": 1.896542971724453, "grad_norm": 0.2620135247707367, "learning_rate": 7.356380188503647e-06, "loss": 0.0352, "step": 37360 }, { "epoch": 1.8967967917153155, "grad_norm": 0.39468491077423096, "learning_rate": 7.35468805523123e-06, "loss": 0.0369, "step": 37365 }, { "epoch": 1.8970506117061778, "grad_norm": 0.2632450759410858, "learning_rate": 7.352995921958815e-06, "loss": 0.0325, "step": 37370 }, { "epoch": 1.8973044316970404, "grad_norm": 0.32689785957336426, "learning_rate": 7.351303788686398e-06, "loss": 0.0406, "step": 37375 }, { "epoch": 1.897558251687903, "grad_norm": 0.4007929563522339, "learning_rate": 7.349611655413981e-06, "loss": 0.0432, "step": 37380 }, { "epoch": 1.8978120716787654, "grad_norm": 0.28323855996131897, "learning_rate": 7.3479195221415645e-06, "loss": 0.0336, "step": 37385 }, { "epoch": 1.8980658916696278, "grad_norm": 0.3580118417739868, "learning_rate": 7.346227388869148e-06, "loss": 0.04, "step": 37390 }, { "epoch": 1.8983197116604904, "grad_norm": 0.31773775815963745, "learning_rate": 7.344535255596731e-06, "loss": 0.0442, "step": 37395 }, { "epoch": 1.898573531651353, "grad_norm": 0.5973715782165527, "learning_rate": 7.342843122324314e-06, "loss": 0.0448, "step": 37400 }, { "epoch": 1.8988273516422154, "grad_norm": 0.30234646797180176, "learning_rate": 7.341150989051899e-06, "loss": 0.0361, "step": 37405 }, { "epoch": 1.8990811716330778, "grad_norm": 0.2853446900844574, "learning_rate": 7.339458855779482e-06, "loss": 0.0339, "step": 37410 }, { "epoch": 1.8993349916239404, "grad_norm": 0.4864497482776642, "learning_rate": 7.337766722507065e-06, "loss": 0.0444, "step": 37415 }, { "epoch": 1.8995888116148028, "grad_norm": 0.35563915967941284, "learning_rate": 7.3360745892346484e-06, "loss": 0.041, "step": 37420 }, { "epoch": 1.8998426316056651, "grad_norm": 0.2955119013786316, "learning_rate": 7.334382455962232e-06, "loss": 0.0385, "step": 37425 }, { "epoch": 1.9000964515965277, "grad_norm": 0.23126500844955444, "learning_rate": 7.332690322689816e-06, "loss": 0.0299, "step": 37430 }, { "epoch": 1.9003502715873903, "grad_norm": 0.2585117220878601, "learning_rate": 7.330998189417399e-06, "loss": 0.0465, "step": 37435 }, { "epoch": 1.9006040915782527, "grad_norm": 0.36390066146850586, "learning_rate": 7.329306056144983e-06, "loss": 0.037, "step": 37440 }, { "epoch": 1.900857911569115, "grad_norm": 0.32259151339530945, "learning_rate": 7.327613922872566e-06, "loss": 0.0421, "step": 37445 }, { "epoch": 1.9011117315599777, "grad_norm": 0.2413652539253235, "learning_rate": 7.32592178960015e-06, "loss": 0.0296, "step": 37450 }, { "epoch": 1.9013655515508403, "grad_norm": 0.25002026557922363, "learning_rate": 7.324229656327732e-06, "loss": 0.0381, "step": 37455 }, { "epoch": 1.9016193715417025, "grad_norm": 0.5289567112922668, "learning_rate": 7.322537523055317e-06, "loss": 0.0359, "step": 37460 }, { "epoch": 1.901873191532565, "grad_norm": 0.2889693081378937, "learning_rate": 7.3208453897829e-06, "loss": 0.0385, "step": 37465 }, { "epoch": 1.9021270115234277, "grad_norm": 0.31875142455101013, "learning_rate": 7.319153256510484e-06, "loss": 0.0346, "step": 37470 }, { "epoch": 1.90238083151429, "grad_norm": 0.5404953360557556, "learning_rate": 7.317461123238067e-06, "loss": 0.037, "step": 37475 }, { "epoch": 1.9026346515051524, "grad_norm": 0.3537548780441284, "learning_rate": 7.31576898996565e-06, "loss": 0.0361, "step": 37480 }, { "epoch": 1.902888471496015, "grad_norm": 0.2968684732913971, "learning_rate": 7.314076856693234e-06, "loss": 0.0325, "step": 37485 }, { "epoch": 1.9031422914868776, "grad_norm": 0.294409841299057, "learning_rate": 7.312384723420816e-06, "loss": 0.0337, "step": 37490 }, { "epoch": 1.90339611147774, "grad_norm": 0.24077461659908295, "learning_rate": 7.310692590148401e-06, "loss": 0.0409, "step": 37495 }, { "epoch": 1.9036499314686024, "grad_norm": 0.3184297978878021, "learning_rate": 7.309000456875984e-06, "loss": 0.0337, "step": 37500 }, { "epoch": 1.903903751459465, "grad_norm": 0.2995074391365051, "learning_rate": 7.307308323603568e-06, "loss": 0.0387, "step": 37505 }, { "epoch": 1.9041575714503274, "grad_norm": 0.587436318397522, "learning_rate": 7.3056161903311506e-06, "loss": 0.037, "step": 37510 }, { "epoch": 1.9044113914411898, "grad_norm": 0.29330456256866455, "learning_rate": 7.303924057058734e-06, "loss": 0.0388, "step": 37515 }, { "epoch": 1.9046652114320524, "grad_norm": 0.24542950093746185, "learning_rate": 7.3022319237863185e-06, "loss": 0.0416, "step": 37520 }, { "epoch": 1.904919031422915, "grad_norm": 0.35747164487838745, "learning_rate": 7.300539790513902e-06, "loss": 0.0415, "step": 37525 }, { "epoch": 1.9051728514137773, "grad_norm": 0.20445160567760468, "learning_rate": 7.298847657241485e-06, "loss": 0.0381, "step": 37530 }, { "epoch": 1.9054266714046397, "grad_norm": 0.8954115509986877, "learning_rate": 7.297155523969068e-06, "loss": 0.0375, "step": 37535 }, { "epoch": 1.9056804913955023, "grad_norm": 0.3642162084579468, "learning_rate": 7.295463390696652e-06, "loss": 0.0391, "step": 37540 }, { "epoch": 1.905934311386365, "grad_norm": 0.2574728727340698, "learning_rate": 7.293771257424236e-06, "loss": 0.0352, "step": 37545 }, { "epoch": 1.9061881313772273, "grad_norm": 0.3355601727962494, "learning_rate": 7.292079124151819e-06, "loss": 0.0345, "step": 37550 }, { "epoch": 1.9064419513680897, "grad_norm": 0.276810884475708, "learning_rate": 7.2903869908794025e-06, "loss": 0.0401, "step": 37555 }, { "epoch": 1.9066957713589523, "grad_norm": 0.31906431913375854, "learning_rate": 7.288694857606986e-06, "loss": 0.0377, "step": 37560 }, { "epoch": 1.9069495913498147, "grad_norm": 0.48261189460754395, "learning_rate": 7.287002724334569e-06, "loss": 0.0369, "step": 37565 }, { "epoch": 1.907203411340677, "grad_norm": 0.34832999110221863, "learning_rate": 7.285310591062152e-06, "loss": 0.0316, "step": 37570 }, { "epoch": 1.9074572313315397, "grad_norm": 0.2636961042881012, "learning_rate": 7.283618457789736e-06, "loss": 0.0417, "step": 37575 }, { "epoch": 1.9077110513224023, "grad_norm": 0.27580955624580383, "learning_rate": 7.28192632451732e-06, "loss": 0.0423, "step": 37580 }, { "epoch": 1.9079648713132646, "grad_norm": 0.2866416573524475, "learning_rate": 7.280234191244903e-06, "loss": 0.0363, "step": 37585 }, { "epoch": 1.908218691304127, "grad_norm": 1.0502089262008667, "learning_rate": 7.278542057972486e-06, "loss": 0.0338, "step": 37590 }, { "epoch": 1.9084725112949896, "grad_norm": 0.6088945865631104, "learning_rate": 7.27684992470007e-06, "loss": 0.0454, "step": 37595 }, { "epoch": 1.9087263312858522, "grad_norm": 0.3789014220237732, "learning_rate": 7.2751577914276535e-06, "loss": 0.034, "step": 37600 }, { "epoch": 1.9089801512767144, "grad_norm": 0.2999269962310791, "learning_rate": 7.273465658155236e-06, "loss": 0.0372, "step": 37605 }, { "epoch": 1.909233971267577, "grad_norm": 0.3169345557689667, "learning_rate": 7.271773524882821e-06, "loss": 0.0334, "step": 37610 }, { "epoch": 1.9094877912584396, "grad_norm": 0.2436423897743225, "learning_rate": 7.270081391610404e-06, "loss": 0.0354, "step": 37615 }, { "epoch": 1.909741611249302, "grad_norm": 0.34217819571495056, "learning_rate": 7.268389258337988e-06, "loss": 0.0417, "step": 37620 }, { "epoch": 1.9099954312401644, "grad_norm": 0.35862669348716736, "learning_rate": 7.26669712506557e-06, "loss": 0.0357, "step": 37625 }, { "epoch": 1.910249251231027, "grad_norm": 0.36769625544548035, "learning_rate": 7.265004991793154e-06, "loss": 0.031, "step": 37630 }, { "epoch": 1.9105030712218896, "grad_norm": 0.2944765090942383, "learning_rate": 7.263312858520738e-06, "loss": 0.0344, "step": 37635 }, { "epoch": 1.910756891212752, "grad_norm": 0.2816435396671295, "learning_rate": 7.261620725248321e-06, "loss": 0.0334, "step": 37640 }, { "epoch": 1.9110107112036143, "grad_norm": 0.369663804769516, "learning_rate": 7.2599285919759046e-06, "loss": 0.0442, "step": 37645 }, { "epoch": 1.911264531194477, "grad_norm": 0.3657083213329315, "learning_rate": 7.258236458703488e-06, "loss": 0.041, "step": 37650 }, { "epoch": 1.9115183511853393, "grad_norm": 0.34254011511802673, "learning_rate": 7.256544325431072e-06, "loss": 0.0373, "step": 37655 }, { "epoch": 1.9117721711762017, "grad_norm": 0.41274362802505493, "learning_rate": 7.254852192158654e-06, "loss": 0.0418, "step": 37660 }, { "epoch": 1.9120259911670643, "grad_norm": 0.33585357666015625, "learning_rate": 7.253160058886238e-06, "loss": 0.0304, "step": 37665 }, { "epoch": 1.912279811157927, "grad_norm": 0.26096150279045105, "learning_rate": 7.251467925613822e-06, "loss": 0.0365, "step": 37670 }, { "epoch": 1.9125336311487893, "grad_norm": 0.4009125828742981, "learning_rate": 7.249775792341406e-06, "loss": 0.0338, "step": 37675 }, { "epoch": 1.9127874511396517, "grad_norm": 0.4178220331668854, "learning_rate": 7.2480836590689885e-06, "loss": 0.0388, "step": 37680 }, { "epoch": 1.9130412711305143, "grad_norm": 0.4047532379627228, "learning_rate": 7.246391525796572e-06, "loss": 0.0393, "step": 37685 }, { "epoch": 1.9132950911213769, "grad_norm": 0.28076454997062683, "learning_rate": 7.244699392524156e-06, "loss": 0.0364, "step": 37690 }, { "epoch": 1.9135489111122392, "grad_norm": 0.4012969136238098, "learning_rate": 7.24300725925174e-06, "loss": 0.0322, "step": 37695 }, { "epoch": 1.9138027311031016, "grad_norm": 0.40359947085380554, "learning_rate": 7.241315125979323e-06, "loss": 0.0397, "step": 37700 }, { "epoch": 1.9140565510939642, "grad_norm": 0.42775213718414307, "learning_rate": 7.239622992706906e-06, "loss": 0.0362, "step": 37705 }, { "epoch": 1.9143103710848266, "grad_norm": 0.2496214509010315, "learning_rate": 7.23793085943449e-06, "loss": 0.0368, "step": 37710 }, { "epoch": 1.914564191075689, "grad_norm": 0.3519715666770935, "learning_rate": 7.2362387261620725e-06, "loss": 0.042, "step": 37715 }, { "epoch": 1.9148180110665516, "grad_norm": 0.30283480882644653, "learning_rate": 7.234546592889656e-06, "loss": 0.0321, "step": 37720 }, { "epoch": 1.9150718310574142, "grad_norm": 0.3165220618247986, "learning_rate": 7.2328544596172404e-06, "loss": 0.0388, "step": 37725 }, { "epoch": 1.9153256510482766, "grad_norm": 0.499083012342453, "learning_rate": 7.231162326344824e-06, "loss": 0.038, "step": 37730 }, { "epoch": 1.915579471039139, "grad_norm": 0.38413795828819275, "learning_rate": 7.229470193072407e-06, "loss": 0.0499, "step": 37735 }, { "epoch": 1.9158332910300016, "grad_norm": 0.5062192678451538, "learning_rate": 7.22777805979999e-06, "loss": 0.0455, "step": 37740 }, { "epoch": 1.9160871110208642, "grad_norm": 0.2117767184972763, "learning_rate": 7.226085926527574e-06, "loss": 0.0374, "step": 37745 }, { "epoch": 1.9163409310117265, "grad_norm": 0.46465274691581726, "learning_rate": 7.224393793255158e-06, "loss": 0.0408, "step": 37750 }, { "epoch": 1.916594751002589, "grad_norm": 0.5478437542915344, "learning_rate": 7.22270165998274e-06, "loss": 0.036, "step": 37755 }, { "epoch": 1.9168485709934515, "grad_norm": 0.30504342913627625, "learning_rate": 7.221009526710324e-06, "loss": 0.0404, "step": 37760 }, { "epoch": 1.917102390984314, "grad_norm": 0.3328668177127838, "learning_rate": 7.219317393437908e-06, "loss": 0.0428, "step": 37765 }, { "epoch": 1.9173562109751763, "grad_norm": 0.276241660118103, "learning_rate": 7.2176252601654915e-06, "loss": 0.0364, "step": 37770 }, { "epoch": 1.9176100309660389, "grad_norm": 0.2813354432582855, "learning_rate": 7.215933126893074e-06, "loss": 0.0316, "step": 37775 }, { "epoch": 1.9178638509569015, "grad_norm": 0.33498886227607727, "learning_rate": 7.214240993620658e-06, "loss": 0.0321, "step": 37780 }, { "epoch": 1.9181176709477639, "grad_norm": 0.5148637890815735, "learning_rate": 7.212548860348242e-06, "loss": 0.0417, "step": 37785 }, { "epoch": 1.9183714909386262, "grad_norm": 0.38031625747680664, "learning_rate": 7.210856727075826e-06, "loss": 0.0364, "step": 37790 }, { "epoch": 1.9186253109294888, "grad_norm": 0.2922528088092804, "learning_rate": 7.209164593803408e-06, "loss": 0.0414, "step": 37795 }, { "epoch": 1.9188791309203515, "grad_norm": 0.27960649132728577, "learning_rate": 7.207472460530992e-06, "loss": 0.035, "step": 37800 }, { "epoch": 1.9191329509112136, "grad_norm": 0.34974753856658936, "learning_rate": 7.2057803272585754e-06, "loss": 0.0354, "step": 37805 }, { "epoch": 1.9193867709020762, "grad_norm": 0.28370293974876404, "learning_rate": 7.204088193986158e-06, "loss": 0.032, "step": 37810 }, { "epoch": 1.9196405908929388, "grad_norm": 0.47823625802993774, "learning_rate": 7.2023960607137425e-06, "loss": 0.0387, "step": 37815 }, { "epoch": 1.9198944108838012, "grad_norm": 0.3467170000076294, "learning_rate": 7.200703927441326e-06, "loss": 0.0345, "step": 37820 }, { "epoch": 1.9201482308746636, "grad_norm": 0.44955742359161377, "learning_rate": 7.19901179416891e-06, "loss": 0.0415, "step": 37825 }, { "epoch": 1.9204020508655262, "grad_norm": 0.4269384741783142, "learning_rate": 7.197319660896492e-06, "loss": 0.037, "step": 37830 }, { "epoch": 1.9206558708563888, "grad_norm": 0.3061307668685913, "learning_rate": 7.195627527624076e-06, "loss": 0.0335, "step": 37835 }, { "epoch": 1.9209096908472512, "grad_norm": 0.34396281838417053, "learning_rate": 7.19393539435166e-06, "loss": 0.0323, "step": 37840 }, { "epoch": 1.9211635108381135, "grad_norm": 0.3059838116168976, "learning_rate": 7.192243261079244e-06, "loss": 0.0388, "step": 37845 }, { "epoch": 1.9214173308289761, "grad_norm": 0.3554377555847168, "learning_rate": 7.1905511278068265e-06, "loss": 0.0445, "step": 37850 }, { "epoch": 1.9216711508198385, "grad_norm": 0.251293420791626, "learning_rate": 7.18885899453441e-06, "loss": 0.0361, "step": 37855 }, { "epoch": 1.921924970810701, "grad_norm": 0.39309418201446533, "learning_rate": 7.187166861261994e-06, "loss": 0.0464, "step": 37860 }, { "epoch": 1.9221787908015635, "grad_norm": 0.5594707727432251, "learning_rate": 7.185474727989578e-06, "loss": 0.0373, "step": 37865 }, { "epoch": 1.922432610792426, "grad_norm": 0.5243818163871765, "learning_rate": 7.18378259471716e-06, "loss": 0.0343, "step": 37870 }, { "epoch": 1.9226864307832885, "grad_norm": 0.49607399106025696, "learning_rate": 7.182090461444744e-06, "loss": 0.0386, "step": 37875 }, { "epoch": 1.9229402507741509, "grad_norm": 0.3219578266143799, "learning_rate": 7.180398328172328e-06, "loss": 0.0405, "step": 37880 }, { "epoch": 1.9231940707650135, "grad_norm": 0.23030859231948853, "learning_rate": 7.1787061948999105e-06, "loss": 0.0472, "step": 37885 }, { "epoch": 1.923447890755876, "grad_norm": 0.3197329640388489, "learning_rate": 7.177014061627494e-06, "loss": 0.0329, "step": 37890 }, { "epoch": 1.9237017107467385, "grad_norm": 0.2778763473033905, "learning_rate": 7.1753219283550776e-06, "loss": 0.0314, "step": 37895 }, { "epoch": 1.9239555307376008, "grad_norm": 0.22582057118415833, "learning_rate": 7.173629795082662e-06, "loss": 0.0375, "step": 37900 }, { "epoch": 1.9242093507284634, "grad_norm": 0.2623133659362793, "learning_rate": 7.171937661810245e-06, "loss": 0.0367, "step": 37905 }, { "epoch": 1.9244631707193258, "grad_norm": 0.30197757482528687, "learning_rate": 7.170245528537828e-06, "loss": 0.0359, "step": 37910 }, { "epoch": 1.9247169907101882, "grad_norm": 0.39974135160446167, "learning_rate": 7.168553395265412e-06, "loss": 0.0438, "step": 37915 }, { "epoch": 1.9249708107010508, "grad_norm": 0.8644199967384338, "learning_rate": 7.166861261992995e-06, "loss": 0.0375, "step": 37920 }, { "epoch": 1.9252246306919134, "grad_norm": 0.33568376302719116, "learning_rate": 7.165169128720578e-06, "loss": 0.0351, "step": 37925 }, { "epoch": 1.9254784506827758, "grad_norm": 0.2705657482147217, "learning_rate": 7.163476995448162e-06, "loss": 0.0408, "step": 37930 }, { "epoch": 1.9257322706736382, "grad_norm": 0.27909040451049805, "learning_rate": 7.161784862175746e-06, "loss": 0.035, "step": 37935 }, { "epoch": 1.9259860906645008, "grad_norm": 1.79963219165802, "learning_rate": 7.1600927289033295e-06, "loss": 0.0386, "step": 37940 }, { "epoch": 1.9262399106553634, "grad_norm": 0.2219754010438919, "learning_rate": 7.158400595630912e-06, "loss": 0.0331, "step": 37945 }, { "epoch": 1.9264937306462255, "grad_norm": 0.5909177660942078, "learning_rate": 7.156708462358496e-06, "loss": 0.0411, "step": 37950 }, { "epoch": 1.9267475506370881, "grad_norm": 0.4270838797092438, "learning_rate": 7.15501632908608e-06, "loss": 0.0323, "step": 37955 }, { "epoch": 1.9270013706279507, "grad_norm": 0.4282520115375519, "learning_rate": 7.153324195813662e-06, "loss": 0.0381, "step": 37960 }, { "epoch": 1.9272551906188131, "grad_norm": 0.2768152952194214, "learning_rate": 7.151632062541246e-06, "loss": 0.047, "step": 37965 }, { "epoch": 1.9275090106096755, "grad_norm": 0.30975621938705444, "learning_rate": 7.14993992926883e-06, "loss": 0.037, "step": 37970 }, { "epoch": 1.927762830600538, "grad_norm": 0.5023228526115417, "learning_rate": 7.148247795996413e-06, "loss": 0.0396, "step": 37975 }, { "epoch": 1.9280166505914007, "grad_norm": 0.4292077124118805, "learning_rate": 7.146555662723996e-06, "loss": 0.0379, "step": 37980 }, { "epoch": 1.928270470582263, "grad_norm": 0.37395018339157104, "learning_rate": 7.14486352945158e-06, "loss": 0.0314, "step": 37985 }, { "epoch": 1.9285242905731255, "grad_norm": 0.24250783026218414, "learning_rate": 7.143171396179164e-06, "loss": 0.0376, "step": 37990 }, { "epoch": 1.928778110563988, "grad_norm": 0.3052719235420227, "learning_rate": 7.141479262906748e-06, "loss": 0.0367, "step": 37995 }, { "epoch": 1.9290319305548504, "grad_norm": 0.34796974062919617, "learning_rate": 7.13978712963433e-06, "loss": 0.0391, "step": 38000 }, { "epoch": 1.9292857505457128, "grad_norm": 0.2718687653541565, "learning_rate": 7.138094996361914e-06, "loss": 0.0285, "step": 38005 }, { "epoch": 1.9295395705365754, "grad_norm": 0.6020230650901794, "learning_rate": 7.136402863089497e-06, "loss": 0.0329, "step": 38010 }, { "epoch": 1.929793390527438, "grad_norm": 0.4138783812522888, "learning_rate": 7.134710729817082e-06, "loss": 0.037, "step": 38015 }, { "epoch": 1.9300472105183004, "grad_norm": 0.31362026929855347, "learning_rate": 7.1330185965446645e-06, "loss": 0.0294, "step": 38020 }, { "epoch": 1.9303010305091628, "grad_norm": 1.2694065570831299, "learning_rate": 7.131326463272248e-06, "loss": 0.035, "step": 38025 }, { "epoch": 1.9305548505000254, "grad_norm": 0.3924097418785095, "learning_rate": 7.1296343299998316e-06, "loss": 0.0359, "step": 38030 }, { "epoch": 1.930808670490888, "grad_norm": 0.3452089726924896, "learning_rate": 7.127942196727415e-06, "loss": 0.0381, "step": 38035 }, { "epoch": 1.9310624904817504, "grad_norm": 0.3343926668167114, "learning_rate": 7.126250063454998e-06, "loss": 0.0392, "step": 38040 }, { "epoch": 1.9313163104726128, "grad_norm": 0.3243504762649536, "learning_rate": 7.124557930182582e-06, "loss": 0.0421, "step": 38045 }, { "epoch": 1.9315701304634754, "grad_norm": 0.30533623695373535, "learning_rate": 7.122865796910166e-06, "loss": 0.0439, "step": 38050 }, { "epoch": 1.9318239504543377, "grad_norm": 0.4469950795173645, "learning_rate": 7.1211736636377484e-06, "loss": 0.0366, "step": 38055 }, { "epoch": 1.9320777704452001, "grad_norm": 0.4529452919960022, "learning_rate": 7.119481530365332e-06, "loss": 0.0326, "step": 38060 }, { "epoch": 1.9323315904360627, "grad_norm": 0.2496260106563568, "learning_rate": 7.1177893970929155e-06, "loss": 0.0322, "step": 38065 }, { "epoch": 1.9325854104269253, "grad_norm": 0.3679202198982239, "learning_rate": 7.116097263820499e-06, "loss": 0.0371, "step": 38070 }, { "epoch": 1.9328392304177877, "grad_norm": 0.527133047580719, "learning_rate": 7.114405130548082e-06, "loss": 0.0381, "step": 38075 }, { "epoch": 1.93309305040865, "grad_norm": 0.7933568358421326, "learning_rate": 7.112712997275666e-06, "loss": 0.0389, "step": 38080 }, { "epoch": 1.9333468703995127, "grad_norm": 0.37088605761528015, "learning_rate": 7.11102086400325e-06, "loss": 0.0372, "step": 38085 }, { "epoch": 1.9336006903903753, "grad_norm": 0.2773051857948303, "learning_rate": 7.109328730730833e-06, "loss": 0.0344, "step": 38090 }, { "epoch": 1.9338545103812377, "grad_norm": 0.4214688837528229, "learning_rate": 7.107636597458416e-06, "loss": 0.0436, "step": 38095 }, { "epoch": 1.9341083303721, "grad_norm": 0.3429591953754425, "learning_rate": 7.1059444641859995e-06, "loss": 0.038, "step": 38100 }, { "epoch": 1.9343621503629627, "grad_norm": 0.6051746606826782, "learning_rate": 7.104252330913584e-06, "loss": 0.0414, "step": 38105 }, { "epoch": 1.934615970353825, "grad_norm": 0.27440497279167175, "learning_rate": 7.1025601976411674e-06, "loss": 0.0397, "step": 38110 }, { "epoch": 1.9348697903446874, "grad_norm": 0.2947607934474945, "learning_rate": 7.10086806436875e-06, "loss": 0.0413, "step": 38115 }, { "epoch": 1.93512361033555, "grad_norm": 0.2716275453567505, "learning_rate": 7.099175931096334e-06, "loss": 0.0338, "step": 38120 }, { "epoch": 1.9353774303264126, "grad_norm": 0.20705972611904144, "learning_rate": 7.097483797823917e-06, "loss": 0.0317, "step": 38125 }, { "epoch": 1.935631250317275, "grad_norm": 0.2767583727836609, "learning_rate": 7.0957916645515e-06, "loss": 0.029, "step": 38130 }, { "epoch": 1.9358850703081374, "grad_norm": 0.31235581636428833, "learning_rate": 7.094099531279084e-06, "loss": 0.0376, "step": 38135 }, { "epoch": 1.936138890299, "grad_norm": 0.26473572850227356, "learning_rate": 7.092407398006668e-06, "loss": 0.0388, "step": 38140 }, { "epoch": 1.9363927102898626, "grad_norm": 0.38204026222229004, "learning_rate": 7.090715264734251e-06, "loss": 0.0362, "step": 38145 }, { "epoch": 1.9366465302807248, "grad_norm": 0.39236733317375183, "learning_rate": 7.089023131461834e-06, "loss": 0.0344, "step": 38150 }, { "epoch": 1.9369003502715874, "grad_norm": 0.27405235171318054, "learning_rate": 7.087330998189418e-06, "loss": 0.0355, "step": 38155 }, { "epoch": 1.93715417026245, "grad_norm": 0.2893589437007904, "learning_rate": 7.085638864917001e-06, "loss": 0.0405, "step": 38160 }, { "epoch": 1.9374079902533123, "grad_norm": 0.3183000981807709, "learning_rate": 7.0839467316445856e-06, "loss": 0.036, "step": 38165 }, { "epoch": 1.9376618102441747, "grad_norm": 0.3648395836353302, "learning_rate": 7.082254598372168e-06, "loss": 0.0423, "step": 38170 }, { "epoch": 1.9379156302350373, "grad_norm": 0.4205324351787567, "learning_rate": 7.080562465099752e-06, "loss": 0.0311, "step": 38175 }, { "epoch": 1.9381694502259, "grad_norm": 0.4777676463127136, "learning_rate": 7.078870331827335e-06, "loss": 0.0374, "step": 38180 }, { "epoch": 1.9384232702167623, "grad_norm": 0.3146344721317291, "learning_rate": 7.077178198554919e-06, "loss": 0.0326, "step": 38185 }, { "epoch": 1.9386770902076247, "grad_norm": 0.38266465067863464, "learning_rate": 7.075486065282502e-06, "loss": 0.0363, "step": 38190 }, { "epoch": 1.9389309101984873, "grad_norm": 0.18472445011138916, "learning_rate": 7.073793932010086e-06, "loss": 0.0414, "step": 38195 }, { "epoch": 1.9391847301893497, "grad_norm": 0.4240174889564514, "learning_rate": 7.0721017987376695e-06, "loss": 0.0421, "step": 38200 }, { "epoch": 1.939438550180212, "grad_norm": 0.3366274833679199, "learning_rate": 7.070409665465252e-06, "loss": 0.0358, "step": 38205 }, { "epoch": 1.9396923701710747, "grad_norm": 0.3867480754852295, "learning_rate": 7.068717532192836e-06, "loss": 0.0391, "step": 38210 }, { "epoch": 1.9399461901619373, "grad_norm": 0.32904496788978577, "learning_rate": 7.067025398920419e-06, "loss": 0.0323, "step": 38215 }, { "epoch": 1.9402000101527996, "grad_norm": 0.3309951424598694, "learning_rate": 7.065333265648004e-06, "loss": 0.0338, "step": 38220 }, { "epoch": 1.940453830143662, "grad_norm": 0.5160368084907532, "learning_rate": 7.063641132375586e-06, "loss": 0.033, "step": 38225 }, { "epoch": 1.9407076501345246, "grad_norm": 0.4503108859062195, "learning_rate": 7.06194899910317e-06, "loss": 0.0369, "step": 38230 }, { "epoch": 1.9409614701253872, "grad_norm": 0.39195284247398376, "learning_rate": 7.0602568658307535e-06, "loss": 0.0303, "step": 38235 }, { "epoch": 1.9412152901162496, "grad_norm": 0.28088903427124023, "learning_rate": 7.058564732558337e-06, "loss": 0.0302, "step": 38240 }, { "epoch": 1.941469110107112, "grad_norm": 0.4266859292984009, "learning_rate": 7.05687259928592e-06, "loss": 0.0408, "step": 38245 }, { "epoch": 1.9417229300979746, "grad_norm": 0.31425270438194275, "learning_rate": 7.055180466013503e-06, "loss": 0.0326, "step": 38250 }, { "epoch": 1.941976750088837, "grad_norm": 0.6080331802368164, "learning_rate": 7.053488332741088e-06, "loss": 0.0427, "step": 38255 }, { "epoch": 1.9422305700796993, "grad_norm": 0.4568061828613281, "learning_rate": 7.051796199468671e-06, "loss": 0.0406, "step": 38260 }, { "epoch": 1.942484390070562, "grad_norm": 0.2914579212665558, "learning_rate": 7.050104066196254e-06, "loss": 0.0364, "step": 38265 }, { "epoch": 1.9427382100614246, "grad_norm": 0.31048232316970825, "learning_rate": 7.0484119329238375e-06, "loss": 0.036, "step": 38270 }, { "epoch": 1.942992030052287, "grad_norm": 0.40617647767066956, "learning_rate": 7.046719799651421e-06, "loss": 0.0346, "step": 38275 }, { "epoch": 1.9432458500431493, "grad_norm": 0.2798089385032654, "learning_rate": 7.045027666379004e-06, "loss": 0.0327, "step": 38280 }, { "epoch": 1.943499670034012, "grad_norm": 1.6191227436065674, "learning_rate": 7.043335533106588e-06, "loss": 0.0295, "step": 38285 }, { "epoch": 1.9437534900248745, "grad_norm": 0.3303340971469879, "learning_rate": 7.041643399834172e-06, "loss": 0.054, "step": 38290 }, { "epoch": 1.9440073100157367, "grad_norm": 0.3839823007583618, "learning_rate": 7.039951266561755e-06, "loss": 0.0386, "step": 38295 }, { "epoch": 1.9442611300065993, "grad_norm": 0.29821422696113586, "learning_rate": 7.038259133289338e-06, "loss": 0.031, "step": 38300 }, { "epoch": 1.9445149499974619, "grad_norm": 0.23630665242671967, "learning_rate": 7.0365670000169214e-06, "loss": 0.0336, "step": 38305 }, { "epoch": 1.9447687699883243, "grad_norm": 0.45323365926742554, "learning_rate": 7.034874866744506e-06, "loss": 0.0312, "step": 38310 }, { "epoch": 1.9450225899791866, "grad_norm": 0.33891797065734863, "learning_rate": 7.033182733472089e-06, "loss": 0.034, "step": 38315 }, { "epoch": 1.9452764099700492, "grad_norm": 0.23988015949726105, "learning_rate": 7.031490600199672e-06, "loss": 0.0358, "step": 38320 }, { "epoch": 1.9455302299609118, "grad_norm": 0.32248857617378235, "learning_rate": 7.029798466927256e-06, "loss": 0.0375, "step": 38325 }, { "epoch": 1.9457840499517742, "grad_norm": 0.34360724687576294, "learning_rate": 7.028106333654839e-06, "loss": 0.0415, "step": 38330 }, { "epoch": 1.9460378699426366, "grad_norm": 0.25492408871650696, "learning_rate": 7.0264142003824235e-06, "loss": 0.0312, "step": 38335 }, { "epoch": 1.9462916899334992, "grad_norm": 0.3143220543861389, "learning_rate": 7.024722067110005e-06, "loss": 0.0292, "step": 38340 }, { "epoch": 1.9465455099243616, "grad_norm": 0.31019699573516846, "learning_rate": 7.02302993383759e-06, "loss": 0.0309, "step": 38345 }, { "epoch": 1.946799329915224, "grad_norm": 0.3752652406692505, "learning_rate": 7.021337800565173e-06, "loss": 0.0387, "step": 38350 }, { "epoch": 1.9470531499060866, "grad_norm": 0.4151340425014496, "learning_rate": 7.019645667292757e-06, "loss": 0.044, "step": 38355 }, { "epoch": 1.9473069698969492, "grad_norm": 0.3452011048793793, "learning_rate": 7.0179535340203396e-06, "loss": 0.0361, "step": 38360 }, { "epoch": 1.9475607898878116, "grad_norm": 0.34209251403808594, "learning_rate": 7.016261400747923e-06, "loss": 0.044, "step": 38365 }, { "epoch": 1.947814609878674, "grad_norm": 0.28460708260536194, "learning_rate": 7.0145692674755075e-06, "loss": 0.0402, "step": 38370 }, { "epoch": 1.9480684298695365, "grad_norm": 0.40208396315574646, "learning_rate": 7.01287713420309e-06, "loss": 0.046, "step": 38375 }, { "epoch": 1.9483222498603991, "grad_norm": 0.4834042489528656, "learning_rate": 7.011185000930674e-06, "loss": 0.0434, "step": 38380 }, { "epoch": 1.9485760698512615, "grad_norm": 0.2941204011440277, "learning_rate": 7.009492867658257e-06, "loss": 0.0431, "step": 38385 }, { "epoch": 1.948829889842124, "grad_norm": 0.34474998712539673, "learning_rate": 7.007800734385841e-06, "loss": 0.0402, "step": 38390 }, { "epoch": 1.9490837098329865, "grad_norm": 0.3636670410633087, "learning_rate": 7.0061086011134235e-06, "loss": 0.038, "step": 38395 }, { "epoch": 1.949337529823849, "grad_norm": 0.2243959754705429, "learning_rate": 7.004416467841008e-06, "loss": 0.036, "step": 38400 }, { "epoch": 1.9495913498147113, "grad_norm": 0.31654590368270874, "learning_rate": 7.0027243345685915e-06, "loss": 0.0382, "step": 38405 }, { "epoch": 1.9498451698055739, "grad_norm": 0.308843195438385, "learning_rate": 7.001032201296175e-06, "loss": 0.036, "step": 38410 }, { "epoch": 1.9500989897964365, "grad_norm": 0.2872798442840576, "learning_rate": 6.999340068023758e-06, "loss": 0.0336, "step": 38415 }, { "epoch": 1.9503528097872989, "grad_norm": 0.42075660824775696, "learning_rate": 6.997647934751341e-06, "loss": 0.0405, "step": 38420 }, { "epoch": 1.9506066297781612, "grad_norm": 0.5195392370223999, "learning_rate": 6.995955801478926e-06, "loss": 0.0454, "step": 38425 }, { "epoch": 1.9508604497690238, "grad_norm": 0.3235759437084198, "learning_rate": 6.994263668206509e-06, "loss": 0.031, "step": 38430 }, { "epoch": 1.9511142697598864, "grad_norm": 0.31546446681022644, "learning_rate": 6.992571534934092e-06, "loss": 0.031, "step": 38435 }, { "epoch": 1.9513680897507488, "grad_norm": 0.4008297026157379, "learning_rate": 6.9908794016616754e-06, "loss": 0.0281, "step": 38440 }, { "epoch": 1.9516219097416112, "grad_norm": 0.3607640862464905, "learning_rate": 6.989187268389259e-06, "loss": 0.041, "step": 38445 }, { "epoch": 1.9518757297324738, "grad_norm": 0.34919342398643494, "learning_rate": 6.987495135116842e-06, "loss": 0.032, "step": 38450 }, { "epoch": 1.9521295497233362, "grad_norm": 0.27828821539878845, "learning_rate": 6.985803001844425e-06, "loss": 0.0367, "step": 38455 }, { "epoch": 1.9523833697141986, "grad_norm": 0.40238264203071594, "learning_rate": 6.98411086857201e-06, "loss": 0.047, "step": 38460 }, { "epoch": 1.9526371897050612, "grad_norm": 0.2481817901134491, "learning_rate": 6.982418735299593e-06, "loss": 0.037, "step": 38465 }, { "epoch": 1.9528910096959238, "grad_norm": 0.3263671100139618, "learning_rate": 6.980726602027176e-06, "loss": 0.0342, "step": 38470 }, { "epoch": 1.9531448296867862, "grad_norm": 0.925449788570404, "learning_rate": 6.979034468754759e-06, "loss": 0.0351, "step": 38475 }, { "epoch": 1.9533986496776485, "grad_norm": 0.2783336341381073, "learning_rate": 6.977342335482343e-06, "loss": 0.0349, "step": 38480 }, { "epoch": 1.9536524696685111, "grad_norm": 0.2663723826408386, "learning_rate": 6.975650202209927e-06, "loss": 0.0349, "step": 38485 }, { "epoch": 1.9539062896593735, "grad_norm": 0.7413889765739441, "learning_rate": 6.97395806893751e-06, "loss": 0.0414, "step": 38490 }, { "epoch": 1.954160109650236, "grad_norm": 0.39749935269355774, "learning_rate": 6.972265935665094e-06, "loss": 0.0397, "step": 38495 }, { "epoch": 1.9544139296410985, "grad_norm": 0.3025566339492798, "learning_rate": 6.970573802392677e-06, "loss": 0.0372, "step": 38500 }, { "epoch": 1.954667749631961, "grad_norm": 0.35124385356903076, "learning_rate": 6.968881669120261e-06, "loss": 0.036, "step": 38505 }, { "epoch": 1.9549215696228235, "grad_norm": 0.4823085367679596, "learning_rate": 6.967189535847843e-06, "loss": 0.035, "step": 38510 }, { "epoch": 1.9551753896136859, "grad_norm": 0.3143712282180786, "learning_rate": 6.965497402575428e-06, "loss": 0.04, "step": 38515 }, { "epoch": 1.9554292096045485, "grad_norm": 0.24416399002075195, "learning_rate": 6.963805269303011e-06, "loss": 0.0351, "step": 38520 }, { "epoch": 1.955683029595411, "grad_norm": 0.3662688732147217, "learning_rate": 6.962113136030594e-06, "loss": 0.0332, "step": 38525 }, { "epoch": 1.9559368495862735, "grad_norm": 0.25640466809272766, "learning_rate": 6.9604210027581775e-06, "loss": 0.0368, "step": 38530 }, { "epoch": 1.9561906695771358, "grad_norm": 0.27777099609375, "learning_rate": 6.958728869485761e-06, "loss": 0.0268, "step": 38535 }, { "epoch": 1.9564444895679984, "grad_norm": 0.7067055106163025, "learning_rate": 6.9570367362133455e-06, "loss": 0.0328, "step": 38540 }, { "epoch": 1.9566983095588608, "grad_norm": 0.3245342969894409, "learning_rate": 6.955344602940927e-06, "loss": 0.0296, "step": 38545 }, { "epoch": 1.9569521295497232, "grad_norm": 0.3933471739292145, "learning_rate": 6.953652469668512e-06, "loss": 0.0396, "step": 38550 }, { "epoch": 1.9572059495405858, "grad_norm": 0.2676612436771393, "learning_rate": 6.951960336396095e-06, "loss": 0.0323, "step": 38555 }, { "epoch": 1.9574597695314484, "grad_norm": 0.3796135485172272, "learning_rate": 6.950268203123679e-06, "loss": 0.0391, "step": 38560 }, { "epoch": 1.9577135895223108, "grad_norm": 0.529050886631012, "learning_rate": 6.9485760698512615e-06, "loss": 0.0365, "step": 38565 }, { "epoch": 1.9579674095131732, "grad_norm": 0.3404449224472046, "learning_rate": 6.946883936578845e-06, "loss": 0.0326, "step": 38570 }, { "epoch": 1.9582212295040358, "grad_norm": 0.30739548802375793, "learning_rate": 6.9451918033064294e-06, "loss": 0.0415, "step": 38575 }, { "epoch": 1.9584750494948984, "grad_norm": 0.2338048368692398, "learning_rate": 6.943499670034013e-06, "loss": 0.0363, "step": 38580 }, { "epoch": 1.9587288694857607, "grad_norm": 0.4167294204235077, "learning_rate": 6.941807536761596e-06, "loss": 0.0339, "step": 38585 }, { "epoch": 1.9589826894766231, "grad_norm": 0.27878156304359436, "learning_rate": 6.940115403489179e-06, "loss": 0.0286, "step": 38590 }, { "epoch": 1.9592365094674857, "grad_norm": 0.3524043560028076, "learning_rate": 6.938423270216763e-06, "loss": 0.0397, "step": 38595 }, { "epoch": 1.9594903294583481, "grad_norm": 0.38988733291625977, "learning_rate": 6.936731136944347e-06, "loss": 0.0358, "step": 38600 }, { "epoch": 1.9597441494492105, "grad_norm": 0.3674320876598358, "learning_rate": 6.93503900367193e-06, "loss": 0.0395, "step": 38605 }, { "epoch": 1.959997969440073, "grad_norm": 0.31990566849708557, "learning_rate": 6.933346870399513e-06, "loss": 0.0271, "step": 38610 }, { "epoch": 1.9602517894309357, "grad_norm": 0.2604389786720276, "learning_rate": 6.931654737127097e-06, "loss": 0.0322, "step": 38615 }, { "epoch": 1.960505609421798, "grad_norm": 0.477633535861969, "learning_rate": 6.92996260385468e-06, "loss": 0.0521, "step": 38620 }, { "epoch": 1.9607594294126605, "grad_norm": 0.3733793795108795, "learning_rate": 6.928270470582263e-06, "loss": 0.0356, "step": 38625 }, { "epoch": 1.961013249403523, "grad_norm": 0.3341461718082428, "learning_rate": 6.926578337309848e-06, "loss": 0.0363, "step": 38630 }, { "epoch": 1.9612670693943857, "grad_norm": 0.29843002557754517, "learning_rate": 6.924886204037431e-06, "loss": 0.0367, "step": 38635 }, { "epoch": 1.9615208893852478, "grad_norm": 0.34912988543510437, "learning_rate": 6.923194070765014e-06, "loss": 0.0367, "step": 38640 }, { "epoch": 1.9617747093761104, "grad_norm": 0.4012235105037689, "learning_rate": 6.921501937492597e-06, "loss": 0.0369, "step": 38645 }, { "epoch": 1.962028529366973, "grad_norm": 0.33369386196136475, "learning_rate": 6.919809804220181e-06, "loss": 0.0381, "step": 38650 }, { "epoch": 1.9622823493578354, "grad_norm": 0.33917364478111267, "learning_rate": 6.9181176709477645e-06, "loss": 0.0394, "step": 38655 }, { "epoch": 1.9625361693486978, "grad_norm": 0.3566811680793762, "learning_rate": 6.916425537675347e-06, "loss": 0.0346, "step": 38660 }, { "epoch": 1.9627899893395604, "grad_norm": 0.35312744975090027, "learning_rate": 6.9147334044029316e-06, "loss": 0.0314, "step": 38665 }, { "epoch": 1.963043809330423, "grad_norm": 0.5093296766281128, "learning_rate": 6.913041271130515e-06, "loss": 0.037, "step": 38670 }, { "epoch": 1.9632976293212854, "grad_norm": 0.3182208836078644, "learning_rate": 6.911349137858099e-06, "loss": 0.033, "step": 38675 }, { "epoch": 1.9635514493121478, "grad_norm": 0.4049065411090851, "learning_rate": 6.909657004585681e-06, "loss": 0.0318, "step": 38680 }, { "epoch": 1.9638052693030104, "grad_norm": 0.2679682970046997, "learning_rate": 6.907964871313265e-06, "loss": 0.0338, "step": 38685 }, { "epoch": 1.9640590892938727, "grad_norm": 0.34067901968955994, "learning_rate": 6.906272738040849e-06, "loss": 0.037, "step": 38690 }, { "epoch": 1.9643129092847351, "grad_norm": 0.4129829406738281, "learning_rate": 6.904580604768432e-06, "loss": 0.0366, "step": 38695 }, { "epoch": 1.9645667292755977, "grad_norm": 0.25145605206489563, "learning_rate": 6.9028884714960155e-06, "loss": 0.0326, "step": 38700 }, { "epoch": 1.9648205492664603, "grad_norm": 0.24122470617294312, "learning_rate": 6.901196338223599e-06, "loss": 0.0414, "step": 38705 }, { "epoch": 1.9650743692573227, "grad_norm": 0.29186809062957764, "learning_rate": 6.899504204951183e-06, "loss": 0.0323, "step": 38710 }, { "epoch": 1.965328189248185, "grad_norm": 0.3512170612812042, "learning_rate": 6.897812071678765e-06, "loss": 0.0324, "step": 38715 }, { "epoch": 1.9655820092390477, "grad_norm": 0.392165869474411, "learning_rate": 6.89611993840635e-06, "loss": 0.0411, "step": 38720 }, { "epoch": 1.9658358292299103, "grad_norm": 0.3638029992580414, "learning_rate": 6.894427805133933e-06, "loss": 0.046, "step": 38725 }, { "epoch": 1.9660896492207727, "grad_norm": 0.2671540379524231, "learning_rate": 6.892735671861517e-06, "loss": 0.0388, "step": 38730 }, { "epoch": 1.966343469211635, "grad_norm": 0.2336130440235138, "learning_rate": 6.8910435385890995e-06, "loss": 0.0372, "step": 38735 }, { "epoch": 1.9665972892024977, "grad_norm": 0.3330386281013489, "learning_rate": 6.889351405316683e-06, "loss": 0.0325, "step": 38740 }, { "epoch": 1.96685110919336, "grad_norm": 1.5531961917877197, "learning_rate": 6.8876592720442666e-06, "loss": 0.0388, "step": 38745 }, { "epoch": 1.9671049291842224, "grad_norm": 0.4123472273349762, "learning_rate": 6.885967138771851e-06, "loss": 0.0439, "step": 38750 }, { "epoch": 1.967358749175085, "grad_norm": 0.3184575140476227, "learning_rate": 6.884275005499434e-06, "loss": 0.032, "step": 38755 }, { "epoch": 1.9676125691659476, "grad_norm": 0.2574605345726013, "learning_rate": 6.882582872227017e-06, "loss": 0.0315, "step": 38760 }, { "epoch": 1.96786638915681, "grad_norm": 0.3253490924835205, "learning_rate": 6.880890738954601e-06, "loss": 0.0389, "step": 38765 }, { "epoch": 1.9681202091476724, "grad_norm": 0.294781893491745, "learning_rate": 6.8791986056821834e-06, "loss": 0.032, "step": 38770 }, { "epoch": 1.968374029138535, "grad_norm": 0.29747629165649414, "learning_rate": 6.877506472409767e-06, "loss": 0.0307, "step": 38775 }, { "epoch": 1.9686278491293976, "grad_norm": 0.2784003019332886, "learning_rate": 6.875814339137351e-06, "loss": 0.0377, "step": 38780 }, { "epoch": 1.9688816691202597, "grad_norm": 0.47071167826652527, "learning_rate": 6.874122205864935e-06, "loss": 0.0408, "step": 38785 }, { "epoch": 1.9691354891111224, "grad_norm": 0.21550071239471436, "learning_rate": 6.872430072592518e-06, "loss": 0.0372, "step": 38790 }, { "epoch": 1.969389309101985, "grad_norm": 0.4224310517311096, "learning_rate": 6.870737939320101e-06, "loss": 0.0378, "step": 38795 }, { "epoch": 1.9696431290928473, "grad_norm": 0.3429742753505707, "learning_rate": 6.869045806047685e-06, "loss": 0.0348, "step": 38800 }, { "epoch": 1.9698969490837097, "grad_norm": 0.3539807200431824, "learning_rate": 6.867353672775269e-06, "loss": 0.0393, "step": 38805 }, { "epoch": 1.9701507690745723, "grad_norm": 0.281963050365448, "learning_rate": 6.865661539502852e-06, "loss": 0.0288, "step": 38810 }, { "epoch": 1.970404589065435, "grad_norm": 0.3246704936027527, "learning_rate": 6.863969406230435e-06, "loss": 0.0326, "step": 38815 }, { "epoch": 1.9706584090562973, "grad_norm": 0.3228529393672943, "learning_rate": 6.862277272958019e-06, "loss": 0.0442, "step": 38820 }, { "epoch": 1.9709122290471597, "grad_norm": 0.31963005661964417, "learning_rate": 6.8605851396856024e-06, "loss": 0.0307, "step": 38825 }, { "epoch": 1.9711660490380223, "grad_norm": 0.47388240694999695, "learning_rate": 6.858893006413185e-06, "loss": 0.0372, "step": 38830 }, { "epoch": 1.9714198690288847, "grad_norm": 0.7682002782821655, "learning_rate": 6.857200873140769e-06, "loss": 0.0369, "step": 38835 }, { "epoch": 1.971673689019747, "grad_norm": 0.23182091116905212, "learning_rate": 6.855508739868353e-06, "loss": 0.0292, "step": 38840 }, { "epoch": 1.9719275090106096, "grad_norm": 0.3109188377857208, "learning_rate": 6.853816606595936e-06, "loss": 0.0361, "step": 38845 }, { "epoch": 1.9721813290014722, "grad_norm": 0.661422848701477, "learning_rate": 6.852124473323519e-06, "loss": 0.0397, "step": 38850 }, { "epoch": 1.9724351489923346, "grad_norm": 0.2877862751483917, "learning_rate": 6.850432340051103e-06, "loss": 0.0346, "step": 38855 }, { "epoch": 1.972688968983197, "grad_norm": 0.5599194169044495, "learning_rate": 6.848740206778686e-06, "loss": 0.0361, "step": 38860 }, { "epoch": 1.9729427889740596, "grad_norm": 0.500221848487854, "learning_rate": 6.847048073506269e-06, "loss": 0.0355, "step": 38865 }, { "epoch": 1.9731966089649222, "grad_norm": 0.2395467460155487, "learning_rate": 6.8453559402338535e-06, "loss": 0.0283, "step": 38870 }, { "epoch": 1.9734504289557846, "grad_norm": 0.39513200521469116, "learning_rate": 6.843663806961437e-06, "loss": 0.033, "step": 38875 }, { "epoch": 1.973704248946647, "grad_norm": 0.2706584632396698, "learning_rate": 6.841971673689021e-06, "loss": 0.038, "step": 38880 }, { "epoch": 1.9739580689375096, "grad_norm": 0.38689085841178894, "learning_rate": 6.840279540416603e-06, "loss": 0.0322, "step": 38885 }, { "epoch": 1.974211888928372, "grad_norm": 0.25408896803855896, "learning_rate": 6.838587407144187e-06, "loss": 0.0393, "step": 38890 }, { "epoch": 1.9744657089192343, "grad_norm": 0.39390990138053894, "learning_rate": 6.836895273871771e-06, "loss": 0.0382, "step": 38895 }, { "epoch": 1.974719528910097, "grad_norm": 0.2625126540660858, "learning_rate": 6.835203140599355e-06, "loss": 0.0336, "step": 38900 }, { "epoch": 1.9749733489009595, "grad_norm": 0.3571978509426117, "learning_rate": 6.8335110073269375e-06, "loss": 0.0365, "step": 38905 }, { "epoch": 1.975227168891822, "grad_norm": 0.41177958250045776, "learning_rate": 6.831818874054521e-06, "loss": 0.0413, "step": 38910 }, { "epoch": 1.9754809888826843, "grad_norm": 0.24215035140514374, "learning_rate": 6.8301267407821045e-06, "loss": 0.0337, "step": 38915 }, { "epoch": 1.975734808873547, "grad_norm": 0.2806743085384369, "learning_rate": 6.828434607509689e-06, "loss": 0.0356, "step": 38920 }, { "epoch": 1.9759886288644095, "grad_norm": 0.26548781991004944, "learning_rate": 6.826742474237271e-06, "loss": 0.0306, "step": 38925 }, { "epoch": 1.976242448855272, "grad_norm": 0.3748215138912201, "learning_rate": 6.825050340964855e-06, "loss": 0.0314, "step": 38930 }, { "epoch": 1.9764962688461343, "grad_norm": 0.5329114198684692, "learning_rate": 6.823358207692439e-06, "loss": 0.0365, "step": 38935 }, { "epoch": 1.9767500888369969, "grad_norm": 0.33160844445228577, "learning_rate": 6.821666074420021e-06, "loss": 0.0403, "step": 38940 }, { "epoch": 1.9770039088278593, "grad_norm": 0.3684629201889038, "learning_rate": 6.819973941147605e-06, "loss": 0.0381, "step": 38945 }, { "epoch": 1.9772577288187216, "grad_norm": 0.6188549995422363, "learning_rate": 6.8182818078751885e-06, "loss": 0.042, "step": 38950 }, { "epoch": 1.9775115488095842, "grad_norm": 0.4174979627132416, "learning_rate": 6.816589674602773e-06, "loss": 0.0329, "step": 38955 }, { "epoch": 1.9777653688004468, "grad_norm": 0.4109318256378174, "learning_rate": 6.814897541330356e-06, "loss": 0.0368, "step": 38960 }, { "epoch": 1.9780191887913092, "grad_norm": 0.3277137577533722, "learning_rate": 6.813205408057939e-06, "loss": 0.0342, "step": 38965 }, { "epoch": 1.9782730087821716, "grad_norm": 0.42303919792175293, "learning_rate": 6.811513274785523e-06, "loss": 0.0453, "step": 38970 }, { "epoch": 1.9785268287730342, "grad_norm": 0.4994072914123535, "learning_rate": 6.809821141513106e-06, "loss": 0.0411, "step": 38975 }, { "epoch": 1.9787806487638968, "grad_norm": 0.6838673949241638, "learning_rate": 6.808129008240689e-06, "loss": 0.0398, "step": 38980 }, { "epoch": 1.979034468754759, "grad_norm": 0.25443944334983826, "learning_rate": 6.806436874968273e-06, "loss": 0.0348, "step": 38985 }, { "epoch": 1.9792882887456216, "grad_norm": 0.2758225202560425, "learning_rate": 6.804744741695857e-06, "loss": 0.0395, "step": 38990 }, { "epoch": 1.9795421087364842, "grad_norm": 0.2721156179904938, "learning_rate": 6.80305260842344e-06, "loss": 0.041, "step": 38995 }, { "epoch": 1.9797959287273466, "grad_norm": 0.27341485023498535, "learning_rate": 6.801360475151023e-06, "loss": 0.037, "step": 39000 }, { "epoch": 1.980049748718209, "grad_norm": 0.3476855456829071, "learning_rate": 6.799668341878607e-06, "loss": 0.0386, "step": 39005 }, { "epoch": 1.9803035687090715, "grad_norm": 0.2492963969707489, "learning_rate": 6.797976208606191e-06, "loss": 0.0339, "step": 39010 }, { "epoch": 1.9805573886999341, "grad_norm": 0.2879369854927063, "learning_rate": 6.796284075333774e-06, "loss": 0.0275, "step": 39015 }, { "epoch": 1.9808112086907965, "grad_norm": 0.32539618015289307, "learning_rate": 6.794591942061357e-06, "loss": 0.0372, "step": 39020 }, { "epoch": 1.981065028681659, "grad_norm": 0.35178670287132263, "learning_rate": 6.792899808788941e-06, "loss": 0.0351, "step": 39025 }, { "epoch": 1.9813188486725215, "grad_norm": 0.3978646993637085, "learning_rate": 6.791207675516524e-06, "loss": 0.0426, "step": 39030 }, { "epoch": 1.9815726686633839, "grad_norm": 0.3202974796295166, "learning_rate": 6.789515542244107e-06, "loss": 0.0365, "step": 39035 }, { "epoch": 1.9818264886542463, "grad_norm": 0.28949302434921265, "learning_rate": 6.787823408971691e-06, "loss": 0.0315, "step": 39040 }, { "epoch": 1.9820803086451089, "grad_norm": 0.5082337856292725, "learning_rate": 6.786131275699275e-06, "loss": 0.0332, "step": 39045 }, { "epoch": 1.9823341286359715, "grad_norm": 0.38315925002098083, "learning_rate": 6.7844391424268586e-06, "loss": 0.0389, "step": 39050 }, { "epoch": 1.9825879486268339, "grad_norm": 0.7824303507804871, "learning_rate": 6.782747009154441e-06, "loss": 0.0341, "step": 39055 }, { "epoch": 1.9828417686176962, "grad_norm": 0.3990626335144043, "learning_rate": 6.781054875882025e-06, "loss": 0.0457, "step": 39060 }, { "epoch": 1.9830955886085588, "grad_norm": 0.3116946816444397, "learning_rate": 6.779362742609608e-06, "loss": 0.0296, "step": 39065 }, { "epoch": 1.9833494085994214, "grad_norm": 0.30906444787979126, "learning_rate": 6.777670609337193e-06, "loss": 0.0322, "step": 39070 }, { "epoch": 1.9836032285902838, "grad_norm": 0.3945848345756531, "learning_rate": 6.7759784760647754e-06, "loss": 0.0448, "step": 39075 }, { "epoch": 1.9838570485811462, "grad_norm": 0.26128843426704407, "learning_rate": 6.774286342792359e-06, "loss": 0.0375, "step": 39080 }, { "epoch": 1.9841108685720088, "grad_norm": 0.32992011308670044, "learning_rate": 6.7725942095199425e-06, "loss": 0.0345, "step": 39085 }, { "epoch": 1.9843646885628712, "grad_norm": 0.2666621208190918, "learning_rate": 6.770902076247525e-06, "loss": 0.0339, "step": 39090 }, { "epoch": 1.9846185085537336, "grad_norm": 0.3576783835887909, "learning_rate": 6.769209942975109e-06, "loss": 0.0338, "step": 39095 }, { "epoch": 1.9848723285445962, "grad_norm": 0.3300950527191162, "learning_rate": 6.767517809702693e-06, "loss": 0.0317, "step": 39100 }, { "epoch": 1.9851261485354588, "grad_norm": 0.43691083788871765, "learning_rate": 6.765825676430277e-06, "loss": 0.0345, "step": 39105 }, { "epoch": 1.9853799685263211, "grad_norm": 0.2713020443916321, "learning_rate": 6.764133543157859e-06, "loss": 0.0293, "step": 39110 }, { "epoch": 1.9856337885171835, "grad_norm": 0.5607559680938721, "learning_rate": 6.762441409885443e-06, "loss": 0.0371, "step": 39115 }, { "epoch": 1.9858876085080461, "grad_norm": 1.484630823135376, "learning_rate": 6.7607492766130265e-06, "loss": 0.0378, "step": 39120 }, { "epoch": 1.9861414284989087, "grad_norm": 0.35799115896224976, "learning_rate": 6.759057143340611e-06, "loss": 0.0351, "step": 39125 }, { "epoch": 1.986395248489771, "grad_norm": 0.30156785249710083, "learning_rate": 6.757365010068193e-06, "loss": 0.0355, "step": 39130 }, { "epoch": 1.9866490684806335, "grad_norm": 0.44789522886276245, "learning_rate": 6.755672876795777e-06, "loss": 0.0395, "step": 39135 }, { "epoch": 1.986902888471496, "grad_norm": 0.5082187652587891, "learning_rate": 6.753980743523361e-06, "loss": 0.0389, "step": 39140 }, { "epoch": 1.9871567084623585, "grad_norm": 0.4441748559474945, "learning_rate": 6.752288610250944e-06, "loss": 0.031, "step": 39145 }, { "epoch": 1.9874105284532209, "grad_norm": 0.2157219499349594, "learning_rate": 6.750596476978527e-06, "loss": 0.0335, "step": 39150 }, { "epoch": 1.9876643484440835, "grad_norm": 0.4951675534248352, "learning_rate": 6.7489043437061104e-06, "loss": 0.0376, "step": 39155 }, { "epoch": 1.987918168434946, "grad_norm": 0.3324943482875824, "learning_rate": 6.747212210433695e-06, "loss": 0.0302, "step": 39160 }, { "epoch": 1.9881719884258084, "grad_norm": 0.32476142048835754, "learning_rate": 6.745520077161278e-06, "loss": 0.0422, "step": 39165 }, { "epoch": 1.9884258084166708, "grad_norm": 0.37469473481178284, "learning_rate": 6.743827943888861e-06, "loss": 0.0432, "step": 39170 }, { "epoch": 1.9886796284075334, "grad_norm": 0.672195553779602, "learning_rate": 6.742135810616445e-06, "loss": 0.0331, "step": 39175 }, { "epoch": 1.9889334483983958, "grad_norm": 0.3643190562725067, "learning_rate": 6.740443677344028e-06, "loss": 0.0344, "step": 39180 }, { "epoch": 1.9891872683892582, "grad_norm": 0.2701443135738373, "learning_rate": 6.738751544071611e-06, "loss": 0.0401, "step": 39185 }, { "epoch": 1.9894410883801208, "grad_norm": 0.4211021661758423, "learning_rate": 6.737059410799195e-06, "loss": 0.0467, "step": 39190 }, { "epoch": 1.9896949083709834, "grad_norm": 0.3575497567653656, "learning_rate": 6.735367277526779e-06, "loss": 0.0416, "step": 39195 }, { "epoch": 1.9899487283618458, "grad_norm": 0.43901878595352173, "learning_rate": 6.733675144254362e-06, "loss": 0.0411, "step": 39200 }, { "epoch": 1.9902025483527082, "grad_norm": 0.29841357469558716, "learning_rate": 6.731983010981945e-06, "loss": 0.04, "step": 39205 }, { "epoch": 1.9904563683435708, "grad_norm": 0.4036899507045746, "learning_rate": 6.730290877709529e-06, "loss": 0.0352, "step": 39210 }, { "epoch": 1.9907101883344334, "grad_norm": 0.3392904996871948, "learning_rate": 6.728598744437113e-06, "loss": 0.0313, "step": 39215 }, { "epoch": 1.9909640083252957, "grad_norm": 0.3535223603248596, "learning_rate": 6.7269066111646965e-06, "loss": 0.038, "step": 39220 }, { "epoch": 1.9912178283161581, "grad_norm": 0.5300154089927673, "learning_rate": 6.725214477892279e-06, "loss": 0.0427, "step": 39225 }, { "epoch": 1.9914716483070207, "grad_norm": 0.2561807632446289, "learning_rate": 6.723522344619863e-06, "loss": 0.0279, "step": 39230 }, { "epoch": 1.991725468297883, "grad_norm": 0.3881128132343292, "learning_rate": 6.721830211347446e-06, "loss": 0.0444, "step": 39235 }, { "epoch": 1.9919792882887455, "grad_norm": 0.340969979763031, "learning_rate": 6.72013807807503e-06, "loss": 0.0374, "step": 39240 }, { "epoch": 1.992233108279608, "grad_norm": 0.3254038989543915, "learning_rate": 6.7184459448026126e-06, "loss": 0.0313, "step": 39245 }, { "epoch": 1.9924869282704707, "grad_norm": 0.5408515930175781, "learning_rate": 6.716753811530197e-06, "loss": 0.038, "step": 39250 }, { "epoch": 1.992740748261333, "grad_norm": 0.2633402347564697, "learning_rate": 6.7150616782577805e-06, "loss": 0.0387, "step": 39255 }, { "epoch": 1.9929945682521955, "grad_norm": 0.39038726687431335, "learning_rate": 6.713369544985363e-06, "loss": 0.0333, "step": 39260 }, { "epoch": 1.993248388243058, "grad_norm": 0.46379637718200684, "learning_rate": 6.711677411712947e-06, "loss": 0.0309, "step": 39265 }, { "epoch": 1.9935022082339207, "grad_norm": 0.44145670533180237, "learning_rate": 6.70998527844053e-06, "loss": 0.0413, "step": 39270 }, { "epoch": 1.993756028224783, "grad_norm": 0.4047604501247406, "learning_rate": 6.708293145168115e-06, "loss": 0.0428, "step": 39275 }, { "epoch": 1.9940098482156454, "grad_norm": 0.4288921356201172, "learning_rate": 6.706601011895697e-06, "loss": 0.0391, "step": 39280 }, { "epoch": 1.994263668206508, "grad_norm": 0.3530460298061371, "learning_rate": 6.704908878623281e-06, "loss": 0.0404, "step": 39285 }, { "epoch": 1.9945174881973704, "grad_norm": 1.06656014919281, "learning_rate": 6.7032167453508645e-06, "loss": 0.0349, "step": 39290 }, { "epoch": 1.9947713081882328, "grad_norm": 0.32023099064826965, "learning_rate": 6.701524612078448e-06, "loss": 0.0314, "step": 39295 }, { "epoch": 1.9950251281790954, "grad_norm": 0.32165810465812683, "learning_rate": 6.699832478806031e-06, "loss": 0.0406, "step": 39300 }, { "epoch": 1.995278948169958, "grad_norm": 0.30110183358192444, "learning_rate": 6.698140345533615e-06, "loss": 0.0301, "step": 39305 }, { "epoch": 1.9955327681608204, "grad_norm": 0.2650599777698517, "learning_rate": 6.696448212261199e-06, "loss": 0.0392, "step": 39310 }, { "epoch": 1.9957865881516827, "grad_norm": 0.6036587953567505, "learning_rate": 6.694756078988782e-06, "loss": 0.0375, "step": 39315 }, { "epoch": 1.9960404081425454, "grad_norm": 1.74124276638031, "learning_rate": 6.693063945716365e-06, "loss": 0.041, "step": 39320 }, { "epoch": 1.9962942281334077, "grad_norm": 0.357609361410141, "learning_rate": 6.691371812443948e-06, "loss": 0.0394, "step": 39325 }, { "epoch": 1.9965480481242701, "grad_norm": 0.271170049905777, "learning_rate": 6.689679679171532e-06, "loss": 0.0385, "step": 39330 }, { "epoch": 1.9968018681151327, "grad_norm": 0.3494093418121338, "learning_rate": 6.687987545899115e-06, "loss": 0.045, "step": 39335 }, { "epoch": 1.9970556881059953, "grad_norm": 0.2796786427497864, "learning_rate": 6.686295412626699e-06, "loss": 0.0405, "step": 39340 }, { "epoch": 1.9973095080968577, "grad_norm": 0.26732075214385986, "learning_rate": 6.684603279354283e-06, "loss": 0.0417, "step": 39345 }, { "epoch": 1.99756332808772, "grad_norm": 0.22764670848846436, "learning_rate": 6.682911146081866e-06, "loss": 0.0277, "step": 39350 }, { "epoch": 1.9978171480785827, "grad_norm": 0.28476661443710327, "learning_rate": 6.681219012809449e-06, "loss": 0.0318, "step": 39355 }, { "epoch": 1.9980709680694453, "grad_norm": 0.21495118737220764, "learning_rate": 6.679526879537032e-06, "loss": 0.0361, "step": 39360 }, { "epoch": 1.9983247880603077, "grad_norm": 0.2934248149394989, "learning_rate": 6.677834746264617e-06, "loss": 0.0395, "step": 39365 }, { "epoch": 1.99857860805117, "grad_norm": 0.4350336790084839, "learning_rate": 6.6761426129922e-06, "loss": 0.0374, "step": 39370 }, { "epoch": 1.9988324280420326, "grad_norm": 0.31187477707862854, "learning_rate": 6.674450479719783e-06, "loss": 0.033, "step": 39375 }, { "epoch": 1.999086248032895, "grad_norm": 0.3732047379016876, "learning_rate": 6.6727583464473666e-06, "loss": 0.036, "step": 39380 }, { "epoch": 1.9993400680237574, "grad_norm": 0.24284490942955017, "learning_rate": 6.67106621317495e-06, "loss": 0.0316, "step": 39385 }, { "epoch": 1.99959388801462, "grad_norm": 0.3445446789264679, "learning_rate": 6.6693740799025345e-06, "loss": 0.0391, "step": 39390 }, { "epoch": 1.9998477080054826, "grad_norm": 0.35594990849494934, "learning_rate": 6.667681946630117e-06, "loss": 0.0401, "step": 39395 }, { "epoch": 2.0, "eval_loss": 0.18550293147563934, "eval_runtime": 1774.6443, "eval_samples_per_second": 70.437, "eval_steps_per_second": 2.202, "step": 39398 }, { "epoch": 2.0001015279963448, "grad_norm": 0.48830828070640564, "learning_rate": 6.665989813357701e-06, "loss": 0.0421, "step": 39400 }, { "epoch": 2.0003553479872074, "grad_norm": 0.43993479013442993, "learning_rate": 6.664297680085284e-06, "loss": 0.0445, "step": 39405 }, { "epoch": 2.00060916797807, "grad_norm": 0.26866963505744934, "learning_rate": 6.662605546812867e-06, "loss": 0.0356, "step": 39410 }, { "epoch": 2.0008629879689326, "grad_norm": 0.356734961271286, "learning_rate": 6.6609134135404505e-06, "loss": 0.0332, "step": 39415 }, { "epoch": 2.0011168079597947, "grad_norm": 0.34094324707984924, "learning_rate": 6.659221280268034e-06, "loss": 0.0433, "step": 39420 }, { "epoch": 2.0013706279506573, "grad_norm": 0.5367624759674072, "learning_rate": 6.6575291469956185e-06, "loss": 0.0418, "step": 39425 }, { "epoch": 2.00162444794152, "grad_norm": 0.2931801676750183, "learning_rate": 6.655837013723201e-06, "loss": 0.041, "step": 39430 }, { "epoch": 2.0018782679323825, "grad_norm": 0.39484718441963196, "learning_rate": 6.654144880450785e-06, "loss": 0.0359, "step": 39435 }, { "epoch": 2.0021320879232447, "grad_norm": 0.5829231142997742, "learning_rate": 6.652452747178368e-06, "loss": 0.0352, "step": 39440 }, { "epoch": 2.0023859079141073, "grad_norm": 0.5522474050521851, "learning_rate": 6.650760613905952e-06, "loss": 0.0503, "step": 39445 }, { "epoch": 2.00263972790497, "grad_norm": 0.5528905987739563, "learning_rate": 6.6490684806335345e-06, "loss": 0.0322, "step": 39450 }, { "epoch": 2.002893547895832, "grad_norm": 0.6939111948013306, "learning_rate": 6.647376347361119e-06, "loss": 0.0387, "step": 39455 }, { "epoch": 2.0031473678866947, "grad_norm": 0.3555048108100891, "learning_rate": 6.6456842140887024e-06, "loss": 0.0384, "step": 39460 }, { "epoch": 2.0034011878775573, "grad_norm": 0.2942870855331421, "learning_rate": 6.643992080816286e-06, "loss": 0.0411, "step": 39465 }, { "epoch": 2.00365500786842, "grad_norm": 0.5334384441375732, "learning_rate": 6.642299947543869e-06, "loss": 0.0389, "step": 39470 }, { "epoch": 2.003908827859282, "grad_norm": 0.4089035987854004, "learning_rate": 6.640607814271452e-06, "loss": 0.0327, "step": 39475 }, { "epoch": 2.0041626478501446, "grad_norm": 0.6318344473838806, "learning_rate": 6.638915680999037e-06, "loss": 0.0398, "step": 39480 }, { "epoch": 2.0044164678410072, "grad_norm": 0.34425875544548035, "learning_rate": 6.63722354772662e-06, "loss": 0.0421, "step": 39485 }, { "epoch": 2.00467028783187, "grad_norm": 0.2746249735355377, "learning_rate": 6.635531414454203e-06, "loss": 0.029, "step": 39490 }, { "epoch": 2.004924107822732, "grad_norm": 0.41694292426109314, "learning_rate": 6.633839281181786e-06, "loss": 0.0364, "step": 39495 }, { "epoch": 2.0051779278135946, "grad_norm": 0.43403369188308716, "learning_rate": 6.63214714790937e-06, "loss": 0.0419, "step": 39500 }, { "epoch": 2.005431747804457, "grad_norm": 0.3920260965824127, "learning_rate": 6.630455014636953e-06, "loss": 0.0372, "step": 39505 }, { "epoch": 2.0056855677953194, "grad_norm": 0.35140088200569153, "learning_rate": 6.628762881364537e-06, "loss": 0.0368, "step": 39510 }, { "epoch": 2.005939387786182, "grad_norm": 0.5638235211372375, "learning_rate": 6.6270707480921206e-06, "loss": 0.031, "step": 39515 }, { "epoch": 2.0061932077770446, "grad_norm": 0.3450993001461029, "learning_rate": 6.625378614819704e-06, "loss": 0.0389, "step": 39520 }, { "epoch": 2.006447027767907, "grad_norm": 0.23240645229816437, "learning_rate": 6.623686481547287e-06, "loss": 0.0317, "step": 39525 }, { "epoch": 2.0067008477587693, "grad_norm": 0.2569294571876526, "learning_rate": 6.62199434827487e-06, "loss": 0.0352, "step": 39530 }, { "epoch": 2.006954667749632, "grad_norm": 0.7697589993476868, "learning_rate": 6.620302215002454e-06, "loss": 0.032, "step": 39535 }, { "epoch": 2.0072084877404945, "grad_norm": 0.39913979172706604, "learning_rate": 6.618610081730038e-06, "loss": 0.0296, "step": 39540 }, { "epoch": 2.007462307731357, "grad_norm": 0.4698142111301422, "learning_rate": 6.616917948457621e-06, "loss": 0.0382, "step": 39545 }, { "epoch": 2.0077161277222193, "grad_norm": 0.30996814370155334, "learning_rate": 6.6152258151852045e-06, "loss": 0.0349, "step": 39550 }, { "epoch": 2.007969947713082, "grad_norm": 0.3366950452327728, "learning_rate": 6.613533681912788e-06, "loss": 0.0393, "step": 39555 }, { "epoch": 2.0082237677039445, "grad_norm": 0.24016176164150238, "learning_rate": 6.611841548640372e-06, "loss": 0.0415, "step": 39560 }, { "epoch": 2.0084775876948067, "grad_norm": 0.669681191444397, "learning_rate": 6.610149415367954e-06, "loss": 0.0363, "step": 39565 }, { "epoch": 2.0087314076856693, "grad_norm": 0.22296953201293945, "learning_rate": 6.608457282095539e-06, "loss": 0.0356, "step": 39570 }, { "epoch": 2.008985227676532, "grad_norm": 0.5446509718894958, "learning_rate": 6.606765148823122e-06, "loss": 0.0316, "step": 39575 }, { "epoch": 2.0092390476673945, "grad_norm": 0.340939462184906, "learning_rate": 6.605073015550705e-06, "loss": 0.0334, "step": 39580 }, { "epoch": 2.0094928676582566, "grad_norm": 0.42687132954597473, "learning_rate": 6.6033808822782885e-06, "loss": 0.0356, "step": 39585 }, { "epoch": 2.0097466876491192, "grad_norm": 0.3017434775829315, "learning_rate": 6.601688749005872e-06, "loss": 0.039, "step": 39590 }, { "epoch": 2.010000507639982, "grad_norm": 0.24050408601760864, "learning_rate": 6.5999966157334564e-06, "loss": 0.0295, "step": 39595 }, { "epoch": 2.010254327630844, "grad_norm": 0.3019162714481354, "learning_rate": 6.598304482461039e-06, "loss": 0.0322, "step": 39600 }, { "epoch": 2.0105081476217066, "grad_norm": 0.38189876079559326, "learning_rate": 6.596612349188623e-06, "loss": 0.0326, "step": 39605 }, { "epoch": 2.010761967612569, "grad_norm": 0.5544765591621399, "learning_rate": 6.594920215916206e-06, "loss": 0.0421, "step": 39610 }, { "epoch": 2.011015787603432, "grad_norm": 0.5120232105255127, "learning_rate": 6.59322808264379e-06, "loss": 0.0424, "step": 39615 }, { "epoch": 2.011269607594294, "grad_norm": 0.29444101452827454, "learning_rate": 6.5915359493713725e-06, "loss": 0.0311, "step": 39620 }, { "epoch": 2.0115234275851566, "grad_norm": 0.24789972603321075, "learning_rate": 6.589843816098956e-06, "loss": 0.0338, "step": 39625 }, { "epoch": 2.011777247576019, "grad_norm": 0.2729002833366394, "learning_rate": 6.58815168282654e-06, "loss": 0.0328, "step": 39630 }, { "epoch": 2.0120310675668818, "grad_norm": 0.4501836895942688, "learning_rate": 6.586459549554124e-06, "loss": 0.0378, "step": 39635 }, { "epoch": 2.012284887557744, "grad_norm": 0.2863801419734955, "learning_rate": 6.584767416281707e-06, "loss": 0.0332, "step": 39640 }, { "epoch": 2.0125387075486065, "grad_norm": 0.2944399118423462, "learning_rate": 6.58307528300929e-06, "loss": 0.0374, "step": 39645 }, { "epoch": 2.012792527539469, "grad_norm": 0.8535482883453369, "learning_rate": 6.581383149736874e-06, "loss": 0.0419, "step": 39650 }, { "epoch": 2.0130463475303313, "grad_norm": 1.3947867155075073, "learning_rate": 6.5796910164644564e-06, "loss": 0.0362, "step": 39655 }, { "epoch": 2.013300167521194, "grad_norm": 0.2795107066631317, "learning_rate": 6.577998883192041e-06, "loss": 0.0351, "step": 39660 }, { "epoch": 2.0135539875120565, "grad_norm": 0.2812153398990631, "learning_rate": 6.576306749919624e-06, "loss": 0.0345, "step": 39665 }, { "epoch": 2.013807807502919, "grad_norm": 0.4150792062282562, "learning_rate": 6.574614616647208e-06, "loss": 0.0368, "step": 39670 }, { "epoch": 2.0140616274937813, "grad_norm": 0.34947192668914795, "learning_rate": 6.572922483374791e-06, "loss": 0.0341, "step": 39675 }, { "epoch": 2.014315447484644, "grad_norm": 0.28611230850219727, "learning_rate": 6.571230350102374e-06, "loss": 0.0315, "step": 39680 }, { "epoch": 2.0145692674755065, "grad_norm": 0.4013167917728424, "learning_rate": 6.5695382168299585e-06, "loss": 0.0336, "step": 39685 }, { "epoch": 2.014823087466369, "grad_norm": 0.2976538836956024, "learning_rate": 6.567846083557542e-06, "loss": 0.0383, "step": 39690 }, { "epoch": 2.0150769074572312, "grad_norm": 0.2748160660266876, "learning_rate": 6.566153950285125e-06, "loss": 0.0379, "step": 39695 }, { "epoch": 2.015330727448094, "grad_norm": 0.244331955909729, "learning_rate": 6.564461817012708e-06, "loss": 0.0316, "step": 39700 }, { "epoch": 2.0155845474389564, "grad_norm": 0.8902464509010315, "learning_rate": 6.562769683740292e-06, "loss": 0.0406, "step": 39705 }, { "epoch": 2.0158383674298186, "grad_norm": 0.3426436185836792, "learning_rate": 6.561077550467876e-06, "loss": 0.0302, "step": 39710 }, { "epoch": 2.016092187420681, "grad_norm": 0.3238757848739624, "learning_rate": 6.559385417195458e-06, "loss": 0.039, "step": 39715 }, { "epoch": 2.016346007411544, "grad_norm": 0.34030091762542725, "learning_rate": 6.5576932839230425e-06, "loss": 0.0382, "step": 39720 }, { "epoch": 2.0165998274024064, "grad_norm": 0.2633964717388153, "learning_rate": 6.556001150650626e-06, "loss": 0.0344, "step": 39725 }, { "epoch": 2.0168536473932686, "grad_norm": 0.23452314734458923, "learning_rate": 6.55430901737821e-06, "loss": 0.0405, "step": 39730 }, { "epoch": 2.017107467384131, "grad_norm": 0.34209662675857544, "learning_rate": 6.552616884105792e-06, "loss": 0.0288, "step": 39735 }, { "epoch": 2.0173612873749938, "grad_norm": 0.23917095363140106, "learning_rate": 6.550924750833376e-06, "loss": 0.0326, "step": 39740 }, { "epoch": 2.017615107365856, "grad_norm": 0.3735106289386749, "learning_rate": 6.54923261756096e-06, "loss": 0.0323, "step": 39745 }, { "epoch": 2.0178689273567185, "grad_norm": 0.517549455165863, "learning_rate": 6.547540484288543e-06, "loss": 0.0403, "step": 39750 }, { "epoch": 2.018122747347581, "grad_norm": 0.3461301922798157, "learning_rate": 6.5458483510161265e-06, "loss": 0.0271, "step": 39755 }, { "epoch": 2.0183765673384437, "grad_norm": 0.3491016626358032, "learning_rate": 6.54415621774371e-06, "loss": 0.0375, "step": 39760 }, { "epoch": 2.018630387329306, "grad_norm": 0.5119760036468506, "learning_rate": 6.5424640844712936e-06, "loss": 0.0357, "step": 39765 }, { "epoch": 2.0188842073201685, "grad_norm": 0.5701979398727417, "learning_rate": 6.540771951198876e-06, "loss": 0.0384, "step": 39770 }, { "epoch": 2.019138027311031, "grad_norm": 0.24367502331733704, "learning_rate": 6.539079817926461e-06, "loss": 0.0379, "step": 39775 }, { "epoch": 2.0193918473018937, "grad_norm": 0.3051294684410095, "learning_rate": 6.537387684654044e-06, "loss": 0.0353, "step": 39780 }, { "epoch": 2.019645667292756, "grad_norm": 0.37204432487487793, "learning_rate": 6.535695551381628e-06, "loss": 0.0352, "step": 39785 }, { "epoch": 2.0198994872836185, "grad_norm": 0.4559103846549988, "learning_rate": 6.5340034181092104e-06, "loss": 0.0304, "step": 39790 }, { "epoch": 2.020153307274481, "grad_norm": 0.37972748279571533, "learning_rate": 6.532311284836794e-06, "loss": 0.0267, "step": 39795 }, { "epoch": 2.020407127265343, "grad_norm": 0.7116328477859497, "learning_rate": 6.530619151564378e-06, "loss": 0.0325, "step": 39800 }, { "epoch": 2.020660947256206, "grad_norm": 0.3754270076751709, "learning_rate": 6.528927018291962e-06, "loss": 0.0358, "step": 39805 }, { "epoch": 2.0209147672470684, "grad_norm": 0.26524120569229126, "learning_rate": 6.527234885019545e-06, "loss": 0.0273, "step": 39810 }, { "epoch": 2.021168587237931, "grad_norm": 0.3698604106903076, "learning_rate": 6.525542751747128e-06, "loss": 0.0376, "step": 39815 }, { "epoch": 2.021422407228793, "grad_norm": 0.3561539947986603, "learning_rate": 6.523850618474712e-06, "loss": 0.0386, "step": 39820 }, { "epoch": 2.021676227219656, "grad_norm": 0.40872108936309814, "learning_rate": 6.522158485202294e-06, "loss": 0.0295, "step": 39825 }, { "epoch": 2.0219300472105184, "grad_norm": 0.3250705301761627, "learning_rate": 6.520466351929878e-06, "loss": 0.0379, "step": 39830 }, { "epoch": 2.022183867201381, "grad_norm": 0.2779284417629242, "learning_rate": 6.518774218657462e-06, "loss": 0.0317, "step": 39835 }, { "epoch": 2.022437687192243, "grad_norm": 0.4617094397544861, "learning_rate": 6.517082085385046e-06, "loss": 0.0326, "step": 39840 }, { "epoch": 2.0226915071831058, "grad_norm": 0.4745062291622162, "learning_rate": 6.515389952112629e-06, "loss": 0.0431, "step": 39845 }, { "epoch": 2.0229453271739684, "grad_norm": 0.6929597854614258, "learning_rate": 6.513697818840212e-06, "loss": 0.0315, "step": 39850 }, { "epoch": 2.0231991471648305, "grad_norm": 0.23170751333236694, "learning_rate": 6.512005685567796e-06, "loss": 0.0319, "step": 39855 }, { "epoch": 2.023452967155693, "grad_norm": 0.5685155391693115, "learning_rate": 6.51031355229538e-06, "loss": 0.0329, "step": 39860 }, { "epoch": 2.0237067871465557, "grad_norm": 0.2968323230743408, "learning_rate": 6.508621419022963e-06, "loss": 0.0354, "step": 39865 }, { "epoch": 2.0239606071374183, "grad_norm": 1.593350887298584, "learning_rate": 6.506929285750546e-06, "loss": 0.0368, "step": 39870 }, { "epoch": 2.0242144271282805, "grad_norm": 0.41603031754493713, "learning_rate": 6.50523715247813e-06, "loss": 0.0348, "step": 39875 }, { "epoch": 2.024468247119143, "grad_norm": 0.2845626473426819, "learning_rate": 6.503545019205713e-06, "loss": 0.0357, "step": 39880 }, { "epoch": 2.0247220671100057, "grad_norm": 0.4017883539199829, "learning_rate": 6.501852885933296e-06, "loss": 0.0423, "step": 39885 }, { "epoch": 2.024975887100868, "grad_norm": 0.23194077610969543, "learning_rate": 6.5001607526608805e-06, "loss": 0.0339, "step": 39890 }, { "epoch": 2.0252297070917304, "grad_norm": 0.629647970199585, "learning_rate": 6.498468619388464e-06, "loss": 0.0371, "step": 39895 }, { "epoch": 2.025483527082593, "grad_norm": 0.2321619689464569, "learning_rate": 6.496776486116047e-06, "loss": 0.0325, "step": 39900 }, { "epoch": 2.0257373470734557, "grad_norm": 0.2681446075439453, "learning_rate": 6.49508435284363e-06, "loss": 0.031, "step": 39905 }, { "epoch": 2.025991167064318, "grad_norm": 0.2765409052371979, "learning_rate": 6.493392219571214e-06, "loss": 0.0347, "step": 39910 }, { "epoch": 2.0262449870551804, "grad_norm": 0.2587858736515045, "learning_rate": 6.491700086298797e-06, "loss": 0.0363, "step": 39915 }, { "epoch": 2.026498807046043, "grad_norm": 0.39432743191719055, "learning_rate": 6.49000795302638e-06, "loss": 0.0331, "step": 39920 }, { "epoch": 2.0267526270369056, "grad_norm": 0.2792506217956543, "learning_rate": 6.4883158197539644e-06, "loss": 0.0332, "step": 39925 }, { "epoch": 2.0270064470277678, "grad_norm": 0.26611894369125366, "learning_rate": 6.486623686481548e-06, "loss": 0.0363, "step": 39930 }, { "epoch": 2.0272602670186304, "grad_norm": 0.34571000933647156, "learning_rate": 6.4849315532091315e-06, "loss": 0.0365, "step": 39935 }, { "epoch": 2.027514087009493, "grad_norm": 0.23014172911643982, "learning_rate": 6.483239419936714e-06, "loss": 0.0393, "step": 39940 }, { "epoch": 2.027767907000355, "grad_norm": 0.26579776406288147, "learning_rate": 6.481547286664298e-06, "loss": 0.0323, "step": 39945 }, { "epoch": 2.0280217269912177, "grad_norm": 0.44567152857780457, "learning_rate": 6.479855153391882e-06, "loss": 0.0447, "step": 39950 }, { "epoch": 2.0282755469820803, "grad_norm": 0.2629788815975189, "learning_rate": 6.478163020119466e-06, "loss": 0.0357, "step": 39955 }, { "epoch": 2.028529366972943, "grad_norm": 0.40638864040374756, "learning_rate": 6.476470886847048e-06, "loss": 0.0291, "step": 39960 }, { "epoch": 2.028783186963805, "grad_norm": 0.36897462606430054, "learning_rate": 6.474778753574632e-06, "loss": 0.0367, "step": 39965 }, { "epoch": 2.0290370069546677, "grad_norm": 0.468357115983963, "learning_rate": 6.4730866203022155e-06, "loss": 0.032, "step": 39970 }, { "epoch": 2.0292908269455303, "grad_norm": 0.37311598658561707, "learning_rate": 6.471394487029798e-06, "loss": 0.0422, "step": 39975 }, { "epoch": 2.029544646936393, "grad_norm": 0.3664664030075073, "learning_rate": 6.469702353757383e-06, "loss": 0.0388, "step": 39980 }, { "epoch": 2.029798466927255, "grad_norm": 0.28249913454055786, "learning_rate": 6.468010220484966e-06, "loss": 0.0364, "step": 39985 }, { "epoch": 2.0300522869181177, "grad_norm": 0.30894264578819275, "learning_rate": 6.46631808721255e-06, "loss": 0.035, "step": 39990 }, { "epoch": 2.0303061069089803, "grad_norm": 0.27944618463516235, "learning_rate": 6.464625953940132e-06, "loss": 0.0374, "step": 39995 }, { "epoch": 2.0305599268998424, "grad_norm": 0.3840200901031494, "learning_rate": 6.462933820667716e-06, "loss": 0.0393, "step": 40000 }, { "epoch": 2.030813746890705, "grad_norm": 0.3649371862411499, "learning_rate": 6.4612416873952995e-06, "loss": 0.0348, "step": 40005 }, { "epoch": 2.0310675668815676, "grad_norm": 0.2791478633880615, "learning_rate": 6.459549554122884e-06, "loss": 0.0452, "step": 40010 }, { "epoch": 2.0313213868724302, "grad_norm": 0.28353622555732727, "learning_rate": 6.4578574208504666e-06, "loss": 0.0345, "step": 40015 }, { "epoch": 2.0315752068632924, "grad_norm": 0.36117586493492126, "learning_rate": 6.45616528757805e-06, "loss": 0.0396, "step": 40020 }, { "epoch": 2.031829026854155, "grad_norm": 0.3081957995891571, "learning_rate": 6.454473154305634e-06, "loss": 0.0271, "step": 40025 }, { "epoch": 2.0320828468450176, "grad_norm": 0.25008490681648254, "learning_rate": 6.452781021033217e-06, "loss": 0.0349, "step": 40030 }, { "epoch": 2.03233666683588, "grad_norm": 0.40683382749557495, "learning_rate": 6.4510888877608e-06, "loss": 0.0342, "step": 40035 }, { "epoch": 2.0325904868267424, "grad_norm": 0.31075912714004517, "learning_rate": 6.449396754488384e-06, "loss": 0.0338, "step": 40040 }, { "epoch": 2.032844306817605, "grad_norm": 0.28491321206092834, "learning_rate": 6.447704621215968e-06, "loss": 0.0463, "step": 40045 }, { "epoch": 2.0330981268084676, "grad_norm": 0.24667423963546753, "learning_rate": 6.446012487943551e-06, "loss": 0.0372, "step": 40050 }, { "epoch": 2.0333519467993297, "grad_norm": 0.2972967028617859, "learning_rate": 6.444320354671134e-06, "loss": 0.029, "step": 40055 }, { "epoch": 2.0336057667901923, "grad_norm": 0.3517604470252991, "learning_rate": 6.442628221398718e-06, "loss": 0.0391, "step": 40060 }, { "epoch": 2.033859586781055, "grad_norm": 0.3772994875907898, "learning_rate": 6.440936088126302e-06, "loss": 0.0242, "step": 40065 }, { "epoch": 2.0341134067719175, "grad_norm": 0.39820396900177, "learning_rate": 6.439243954853885e-06, "loss": 0.0368, "step": 40070 }, { "epoch": 2.0343672267627797, "grad_norm": 0.21249252557754517, "learning_rate": 6.437551821581468e-06, "loss": 0.0421, "step": 40075 }, { "epoch": 2.0346210467536423, "grad_norm": 0.3023703694343567, "learning_rate": 6.435859688309052e-06, "loss": 0.0316, "step": 40080 }, { "epoch": 2.034874866744505, "grad_norm": 0.3016105592250824, "learning_rate": 6.434167555036635e-06, "loss": 0.0365, "step": 40085 }, { "epoch": 2.035128686735367, "grad_norm": 0.38153544068336487, "learning_rate": 6.432475421764218e-06, "loss": 0.0319, "step": 40090 }, { "epoch": 2.0353825067262297, "grad_norm": 0.33254504203796387, "learning_rate": 6.430783288491802e-06, "loss": 0.0287, "step": 40095 }, { "epoch": 2.0356363267170923, "grad_norm": 0.41204509139060974, "learning_rate": 6.429091155219386e-06, "loss": 0.0371, "step": 40100 }, { "epoch": 2.035890146707955, "grad_norm": 0.5164371728897095, "learning_rate": 6.4273990219469695e-06, "loss": 0.0291, "step": 40105 }, { "epoch": 2.036143966698817, "grad_norm": 0.30971401929855347, "learning_rate": 6.425706888674552e-06, "loss": 0.0331, "step": 40110 }, { "epoch": 2.0363977866896796, "grad_norm": 0.4166437089443207, "learning_rate": 6.424014755402136e-06, "loss": 0.0347, "step": 40115 }, { "epoch": 2.0366516066805422, "grad_norm": 0.4730953872203827, "learning_rate": 6.422322622129719e-06, "loss": 0.0418, "step": 40120 }, { "epoch": 2.036905426671405, "grad_norm": 0.2546430826187134, "learning_rate": 6.420630488857304e-06, "loss": 0.0345, "step": 40125 }, { "epoch": 2.037159246662267, "grad_norm": 0.35239869356155396, "learning_rate": 6.418938355584886e-06, "loss": 0.0342, "step": 40130 }, { "epoch": 2.0374130666531296, "grad_norm": 0.34764301776885986, "learning_rate": 6.41724622231247e-06, "loss": 0.0284, "step": 40135 }, { "epoch": 2.037666886643992, "grad_norm": 0.30288177728652954, "learning_rate": 6.4155540890400535e-06, "loss": 0.0407, "step": 40140 }, { "epoch": 2.0379207066348544, "grad_norm": 0.2945058047771454, "learning_rate": 6.413861955767636e-06, "loss": 0.0423, "step": 40145 }, { "epoch": 2.038174526625717, "grad_norm": 0.37763145565986633, "learning_rate": 6.41216982249522e-06, "loss": 0.0324, "step": 40150 }, { "epoch": 2.0384283466165796, "grad_norm": 0.5259355902671814, "learning_rate": 6.410477689222804e-06, "loss": 0.0356, "step": 40155 }, { "epoch": 2.038682166607442, "grad_norm": 0.4084901511669159, "learning_rate": 6.408785555950388e-06, "loss": 0.0414, "step": 40160 }, { "epoch": 2.0389359865983043, "grad_norm": 0.3193150758743286, "learning_rate": 6.40709342267797e-06, "loss": 0.03, "step": 40165 }, { "epoch": 2.039189806589167, "grad_norm": 0.36446109414100647, "learning_rate": 6.405401289405554e-06, "loss": 0.0317, "step": 40170 }, { "epoch": 2.0394436265800295, "grad_norm": 0.40664389729499817, "learning_rate": 6.4037091561331374e-06, "loss": 0.0339, "step": 40175 }, { "epoch": 2.039697446570892, "grad_norm": 0.4111139178276062, "learning_rate": 6.402017022860722e-06, "loss": 0.0389, "step": 40180 }, { "epoch": 2.0399512665617543, "grad_norm": 0.3648948073387146, "learning_rate": 6.4003248895883045e-06, "loss": 0.0265, "step": 40185 }, { "epoch": 2.040205086552617, "grad_norm": 0.27332165837287903, "learning_rate": 6.398632756315888e-06, "loss": 0.0345, "step": 40190 }, { "epoch": 2.0404589065434795, "grad_norm": 0.2328202873468399, "learning_rate": 6.396940623043472e-06, "loss": 0.0295, "step": 40195 }, { "epoch": 2.0407127265343417, "grad_norm": 0.3368794023990631, "learning_rate": 6.395248489771055e-06, "loss": 0.0431, "step": 40200 }, { "epoch": 2.0409665465252043, "grad_norm": 0.2832939624786377, "learning_rate": 6.393556356498638e-06, "loss": 0.0323, "step": 40205 }, { "epoch": 2.041220366516067, "grad_norm": 0.6089285612106323, "learning_rate": 6.391864223226221e-06, "loss": 0.042, "step": 40210 }, { "epoch": 2.0414741865069295, "grad_norm": 0.341775506734848, "learning_rate": 6.390172089953806e-06, "loss": 0.0338, "step": 40215 }, { "epoch": 2.0417280064977916, "grad_norm": 0.24220286309719086, "learning_rate": 6.3884799566813885e-06, "loss": 0.0295, "step": 40220 }, { "epoch": 2.0419818264886542, "grad_norm": 0.31153371930122375, "learning_rate": 6.386787823408972e-06, "loss": 0.035, "step": 40225 }, { "epoch": 2.042235646479517, "grad_norm": 0.3575878441333771, "learning_rate": 6.385095690136556e-06, "loss": 0.0377, "step": 40230 }, { "epoch": 2.042489466470379, "grad_norm": 0.44863882660865784, "learning_rate": 6.383403556864139e-06, "loss": 0.0419, "step": 40235 }, { "epoch": 2.0427432864612416, "grad_norm": 0.24326546490192413, "learning_rate": 6.381711423591722e-06, "loss": 0.0317, "step": 40240 }, { "epoch": 2.042997106452104, "grad_norm": 0.5627332329750061, "learning_rate": 6.380019290319306e-06, "loss": 0.0346, "step": 40245 }, { "epoch": 2.043250926442967, "grad_norm": 0.22772236168384552, "learning_rate": 6.37832715704689e-06, "loss": 0.0389, "step": 40250 }, { "epoch": 2.043504746433829, "grad_norm": 0.383705735206604, "learning_rate": 6.376635023774473e-06, "loss": 0.0403, "step": 40255 }, { "epoch": 2.0437585664246916, "grad_norm": 0.40597113966941833, "learning_rate": 6.374942890502056e-06, "loss": 0.0349, "step": 40260 }, { "epoch": 2.044012386415554, "grad_norm": 0.3816896378993988, "learning_rate": 6.3732507572296395e-06, "loss": 0.0363, "step": 40265 }, { "epoch": 2.0442662064064168, "grad_norm": 0.29251110553741455, "learning_rate": 6.371558623957224e-06, "loss": 0.0321, "step": 40270 }, { "epoch": 2.044520026397279, "grad_norm": 0.33574673533439636, "learning_rate": 6.3698664906848075e-06, "loss": 0.0334, "step": 40275 }, { "epoch": 2.0447738463881415, "grad_norm": 0.24987970292568207, "learning_rate": 6.36817435741239e-06, "loss": 0.0312, "step": 40280 }, { "epoch": 2.045027666379004, "grad_norm": 0.2474692314863205, "learning_rate": 6.366482224139974e-06, "loss": 0.0359, "step": 40285 }, { "epoch": 2.0452814863698663, "grad_norm": 0.24791835248470306, "learning_rate": 6.364790090867557e-06, "loss": 0.0355, "step": 40290 }, { "epoch": 2.045535306360729, "grad_norm": 0.41668781638145447, "learning_rate": 6.363097957595142e-06, "loss": 0.0379, "step": 40295 }, { "epoch": 2.0457891263515915, "grad_norm": 0.4789046347141266, "learning_rate": 6.3614058243227235e-06, "loss": 0.0323, "step": 40300 }, { "epoch": 2.046042946342454, "grad_norm": 0.4169135093688965, "learning_rate": 6.359713691050308e-06, "loss": 0.0282, "step": 40305 }, { "epoch": 2.0462967663333163, "grad_norm": 0.3398691713809967, "learning_rate": 6.3580215577778914e-06, "loss": 0.0372, "step": 40310 }, { "epoch": 2.046550586324179, "grad_norm": 0.3975592255592346, "learning_rate": 6.356329424505474e-06, "loss": 0.0342, "step": 40315 }, { "epoch": 2.0468044063150415, "grad_norm": 0.28930965065956116, "learning_rate": 6.354637291233058e-06, "loss": 0.0398, "step": 40320 }, { "epoch": 2.047058226305904, "grad_norm": 0.311261922121048, "learning_rate": 6.352945157960641e-06, "loss": 0.0354, "step": 40325 }, { "epoch": 2.047312046296766, "grad_norm": 0.441555917263031, "learning_rate": 6.351253024688226e-06, "loss": 0.0357, "step": 40330 }, { "epoch": 2.047565866287629, "grad_norm": 0.8066316843032837, "learning_rate": 6.349560891415808e-06, "loss": 0.038, "step": 40335 }, { "epoch": 2.0478196862784914, "grad_norm": 0.39569395780563354, "learning_rate": 6.347868758143392e-06, "loss": 0.0428, "step": 40340 }, { "epoch": 2.0480735062693536, "grad_norm": 0.28354886174201965, "learning_rate": 6.346176624870975e-06, "loss": 0.0329, "step": 40345 }, { "epoch": 2.048327326260216, "grad_norm": 0.29512515664100647, "learning_rate": 6.344484491598559e-06, "loss": 0.0334, "step": 40350 }, { "epoch": 2.048581146251079, "grad_norm": 0.35090482234954834, "learning_rate": 6.342792358326142e-06, "loss": 0.0335, "step": 40355 }, { "epoch": 2.0488349662419414, "grad_norm": 0.48609450459480286, "learning_rate": 6.341100225053726e-06, "loss": 0.0357, "step": 40360 }, { "epoch": 2.0490887862328035, "grad_norm": 0.2837776839733124, "learning_rate": 6.33940809178131e-06, "loss": 0.0336, "step": 40365 }, { "epoch": 2.049342606223666, "grad_norm": 0.8645208477973938, "learning_rate": 6.337715958508893e-06, "loss": 0.0291, "step": 40370 }, { "epoch": 2.0495964262145288, "grad_norm": 0.3983393907546997, "learning_rate": 6.336023825236476e-06, "loss": 0.0422, "step": 40375 }, { "epoch": 2.0498502462053914, "grad_norm": 0.3151146173477173, "learning_rate": 6.334331691964059e-06, "loss": 0.0325, "step": 40380 }, { "epoch": 2.0501040661962535, "grad_norm": 0.27322548627853394, "learning_rate": 6.332639558691644e-06, "loss": 0.0348, "step": 40385 }, { "epoch": 2.050357886187116, "grad_norm": 0.3726036846637726, "learning_rate": 6.330947425419226e-06, "loss": 0.0324, "step": 40390 }, { "epoch": 2.0506117061779787, "grad_norm": 0.3639608323574066, "learning_rate": 6.32925529214681e-06, "loss": 0.0334, "step": 40395 }, { "epoch": 2.050865526168841, "grad_norm": 0.2874867618083954, "learning_rate": 6.3275631588743936e-06, "loss": 0.0311, "step": 40400 }, { "epoch": 2.0511193461597035, "grad_norm": 0.4578050374984741, "learning_rate": 6.325871025601977e-06, "loss": 0.0338, "step": 40405 }, { "epoch": 2.051373166150566, "grad_norm": 0.5080977082252502, "learning_rate": 6.32417889232956e-06, "loss": 0.0354, "step": 40410 }, { "epoch": 2.0516269861414287, "grad_norm": 0.4109554886817932, "learning_rate": 6.322486759057143e-06, "loss": 0.0367, "step": 40415 }, { "epoch": 2.051880806132291, "grad_norm": 0.5904496312141418, "learning_rate": 6.320794625784728e-06, "loss": 0.0429, "step": 40420 }, { "epoch": 2.0521346261231534, "grad_norm": 0.43072524666786194, "learning_rate": 6.319102492512311e-06, "loss": 0.0319, "step": 40425 }, { "epoch": 2.052388446114016, "grad_norm": 0.29243141412734985, "learning_rate": 6.317410359239894e-06, "loss": 0.034, "step": 40430 }, { "epoch": 2.052642266104878, "grad_norm": 0.35898685455322266, "learning_rate": 6.3157182259674775e-06, "loss": 0.0325, "step": 40435 }, { "epoch": 2.052896086095741, "grad_norm": 1.1760319471359253, "learning_rate": 6.314026092695061e-06, "loss": 0.0293, "step": 40440 }, { "epoch": 2.0531499060866034, "grad_norm": 0.2817917466163635, "learning_rate": 6.3123339594226455e-06, "loss": 0.0349, "step": 40445 }, { "epoch": 2.053403726077466, "grad_norm": 0.35304921865463257, "learning_rate": 6.310641826150228e-06, "loss": 0.0337, "step": 40450 }, { "epoch": 2.053657546068328, "grad_norm": 0.4008921682834625, "learning_rate": 6.308949692877812e-06, "loss": 0.0426, "step": 40455 }, { "epoch": 2.053911366059191, "grad_norm": 0.3372136950492859, "learning_rate": 6.307257559605395e-06, "loss": 0.0345, "step": 40460 }, { "epoch": 2.0541651860500534, "grad_norm": 0.5483426451683044, "learning_rate": 6.305565426332978e-06, "loss": 0.0302, "step": 40465 }, { "epoch": 2.054419006040916, "grad_norm": 0.18743745982646942, "learning_rate": 6.3038732930605615e-06, "loss": 0.0402, "step": 40470 }, { "epoch": 2.054672826031778, "grad_norm": 0.3917071521282196, "learning_rate": 6.302181159788146e-06, "loss": 0.0295, "step": 40475 }, { "epoch": 2.0549266460226407, "grad_norm": 0.3281704783439636, "learning_rate": 6.300489026515729e-06, "loss": 0.036, "step": 40480 }, { "epoch": 2.0551804660135033, "grad_norm": 0.3832402527332306, "learning_rate": 6.298796893243312e-06, "loss": 0.0338, "step": 40485 }, { "epoch": 2.0554342860043655, "grad_norm": 0.4939913749694824, "learning_rate": 6.297104759970896e-06, "loss": 0.037, "step": 40490 }, { "epoch": 2.055688105995228, "grad_norm": 0.34781306982040405, "learning_rate": 6.295412626698479e-06, "loss": 0.0351, "step": 40495 }, { "epoch": 2.0559419259860907, "grad_norm": 0.516234278678894, "learning_rate": 6.293720493426063e-06, "loss": 0.0343, "step": 40500 }, { "epoch": 2.0561957459769533, "grad_norm": 0.3306788206100464, "learning_rate": 6.2920283601536455e-06, "loss": 0.0442, "step": 40505 }, { "epoch": 2.0564495659678155, "grad_norm": 0.2988586127758026, "learning_rate": 6.29033622688123e-06, "loss": 0.0357, "step": 40510 }, { "epoch": 2.056703385958678, "grad_norm": 0.25617992877960205, "learning_rate": 6.288644093608813e-06, "loss": 0.0292, "step": 40515 }, { "epoch": 2.0569572059495407, "grad_norm": 0.2515580356121063, "learning_rate": 6.286951960336397e-06, "loss": 0.0303, "step": 40520 }, { "epoch": 2.0572110259404033, "grad_norm": 0.20995844900608063, "learning_rate": 6.28525982706398e-06, "loss": 0.0347, "step": 40525 }, { "epoch": 2.0574648459312654, "grad_norm": 0.6654472947120667, "learning_rate": 6.283567693791563e-06, "loss": 0.0363, "step": 40530 }, { "epoch": 2.057718665922128, "grad_norm": 0.47852277755737305, "learning_rate": 6.2818755605191476e-06, "loss": 0.0385, "step": 40535 }, { "epoch": 2.0579724859129906, "grad_norm": 0.4235854744911194, "learning_rate": 6.28018342724673e-06, "loss": 0.0353, "step": 40540 }, { "epoch": 2.058226305903853, "grad_norm": 0.4624616801738739, "learning_rate": 6.278491293974314e-06, "loss": 0.0372, "step": 40545 }, { "epoch": 2.0584801258947154, "grad_norm": 0.3644164204597473, "learning_rate": 6.276799160701897e-06, "loss": 0.0349, "step": 40550 }, { "epoch": 2.058733945885578, "grad_norm": 0.39000046253204346, "learning_rate": 6.275107027429481e-06, "loss": 0.0387, "step": 40555 }, { "epoch": 2.0589877658764406, "grad_norm": 0.3902978301048279, "learning_rate": 6.273414894157064e-06, "loss": 0.0314, "step": 40560 }, { "epoch": 2.0592415858673028, "grad_norm": 0.3080569803714752, "learning_rate": 6.271722760884648e-06, "loss": 0.0396, "step": 40565 }, { "epoch": 2.0594954058581654, "grad_norm": 0.4491880536079407, "learning_rate": 6.2700306276122315e-06, "loss": 0.0316, "step": 40570 }, { "epoch": 2.059749225849028, "grad_norm": 0.4176236689090729, "learning_rate": 6.268338494339815e-06, "loss": 0.0359, "step": 40575 }, { "epoch": 2.06000304583989, "grad_norm": 0.334221214056015, "learning_rate": 6.266646361067398e-06, "loss": 0.0385, "step": 40580 }, { "epoch": 2.0602568658307527, "grad_norm": 0.37594276666641235, "learning_rate": 6.264954227794981e-06, "loss": 0.0371, "step": 40585 }, { "epoch": 2.0605106858216153, "grad_norm": 0.50478595495224, "learning_rate": 6.263262094522565e-06, "loss": 0.0351, "step": 40590 }, { "epoch": 2.060764505812478, "grad_norm": 0.31555843353271484, "learning_rate": 6.261569961250149e-06, "loss": 0.0399, "step": 40595 }, { "epoch": 2.06101832580334, "grad_norm": 0.40124326944351196, "learning_rate": 6.259877827977732e-06, "loss": 0.0326, "step": 40600 }, { "epoch": 2.0612721457942027, "grad_norm": 0.2583628296852112, "learning_rate": 6.2581856947053155e-06, "loss": 0.0318, "step": 40605 }, { "epoch": 2.0615259657850653, "grad_norm": 0.24880623817443848, "learning_rate": 6.256493561432899e-06, "loss": 0.0449, "step": 40610 }, { "epoch": 2.061779785775928, "grad_norm": 0.30163612961769104, "learning_rate": 6.254801428160483e-06, "loss": 0.0431, "step": 40615 }, { "epoch": 2.06203360576679, "grad_norm": 0.2596006989479065, "learning_rate": 6.253109294888065e-06, "loss": 0.0284, "step": 40620 }, { "epoch": 2.0622874257576527, "grad_norm": 0.2562941014766693, "learning_rate": 6.25141716161565e-06, "loss": 0.0301, "step": 40625 }, { "epoch": 2.0625412457485153, "grad_norm": 0.3008427619934082, "learning_rate": 6.249725028343233e-06, "loss": 0.0318, "step": 40630 }, { "epoch": 2.0627950657393774, "grad_norm": 0.335764080286026, "learning_rate": 6.248032895070816e-06, "loss": 0.0344, "step": 40635 }, { "epoch": 2.06304888573024, "grad_norm": 0.5868695378303528, "learning_rate": 6.2463407617983995e-06, "loss": 0.0366, "step": 40640 }, { "epoch": 2.0633027057211026, "grad_norm": 0.38782617449760437, "learning_rate": 6.244648628525983e-06, "loss": 0.03, "step": 40645 }, { "epoch": 2.0635565257119652, "grad_norm": 0.5476816296577454, "learning_rate": 6.242956495253567e-06, "loss": 0.0364, "step": 40650 }, { "epoch": 2.0638103457028274, "grad_norm": 0.27162158489227295, "learning_rate": 6.24126436198115e-06, "loss": 0.0307, "step": 40655 }, { "epoch": 2.06406416569369, "grad_norm": 0.41091370582580566, "learning_rate": 6.239572228708734e-06, "loss": 0.0273, "step": 40660 }, { "epoch": 2.0643179856845526, "grad_norm": 0.32241737842559814, "learning_rate": 6.237880095436317e-06, "loss": 0.0356, "step": 40665 }, { "epoch": 2.064571805675415, "grad_norm": 0.25782039761543274, "learning_rate": 6.236187962163901e-06, "loss": 0.0388, "step": 40670 }, { "epoch": 2.0648256256662774, "grad_norm": 0.3799375295639038, "learning_rate": 6.2344958288914834e-06, "loss": 0.0437, "step": 40675 }, { "epoch": 2.06507944565714, "grad_norm": 0.5712692141532898, "learning_rate": 6.232803695619068e-06, "loss": 0.0388, "step": 40680 }, { "epoch": 2.0653332656480026, "grad_norm": 0.47142764925956726, "learning_rate": 6.231111562346651e-06, "loss": 0.0337, "step": 40685 }, { "epoch": 2.0655870856388647, "grad_norm": 0.3855939507484436, "learning_rate": 6.229419429074235e-06, "loss": 0.0405, "step": 40690 }, { "epoch": 2.0658409056297273, "grad_norm": 0.36656343936920166, "learning_rate": 6.227727295801818e-06, "loss": 0.0339, "step": 40695 }, { "epoch": 2.06609472562059, "grad_norm": 0.529723048210144, "learning_rate": 6.226035162529401e-06, "loss": 0.0302, "step": 40700 }, { "epoch": 2.0663485456114525, "grad_norm": 0.41368111968040466, "learning_rate": 6.224343029256985e-06, "loss": 0.0269, "step": 40705 }, { "epoch": 2.0666023656023147, "grad_norm": 0.26356250047683716, "learning_rate": 6.222650895984567e-06, "loss": 0.0311, "step": 40710 }, { "epoch": 2.0668561855931773, "grad_norm": 0.42043641209602356, "learning_rate": 6.220958762712152e-06, "loss": 0.0305, "step": 40715 }, { "epoch": 2.06711000558404, "grad_norm": 0.3392391502857208, "learning_rate": 6.219266629439735e-06, "loss": 0.0395, "step": 40720 }, { "epoch": 2.067363825574902, "grad_norm": 0.43280452489852905, "learning_rate": 6.217574496167319e-06, "loss": 0.0341, "step": 40725 }, { "epoch": 2.0676176455657647, "grad_norm": 0.3381582200527191, "learning_rate": 6.2158823628949016e-06, "loss": 0.0269, "step": 40730 }, { "epoch": 2.0678714655566273, "grad_norm": 0.34322628378868103, "learning_rate": 6.214190229622485e-06, "loss": 0.036, "step": 40735 }, { "epoch": 2.06812528554749, "grad_norm": 0.40060219168663025, "learning_rate": 6.2124980963500695e-06, "loss": 0.033, "step": 40740 }, { "epoch": 2.068379105538352, "grad_norm": 0.21552379429340363, "learning_rate": 6.210805963077653e-06, "loss": 0.0284, "step": 40745 }, { "epoch": 2.0686329255292146, "grad_norm": 0.30288293957710266, "learning_rate": 6.209113829805236e-06, "loss": 0.0363, "step": 40750 }, { "epoch": 2.0688867455200772, "grad_norm": 0.29889294505119324, "learning_rate": 6.207421696532819e-06, "loss": 0.0335, "step": 40755 }, { "epoch": 2.06914056551094, "grad_norm": 0.24941956996917725, "learning_rate": 6.205729563260403e-06, "loss": 0.034, "step": 40760 }, { "epoch": 2.069394385501802, "grad_norm": 0.3261270523071289, "learning_rate": 6.204037429987987e-06, "loss": 0.0371, "step": 40765 }, { "epoch": 2.0696482054926646, "grad_norm": 0.3423829674720764, "learning_rate": 6.20234529671557e-06, "loss": 0.0502, "step": 40770 }, { "epoch": 2.069902025483527, "grad_norm": 0.34635868668556213, "learning_rate": 6.2006531634431535e-06, "loss": 0.0316, "step": 40775 }, { "epoch": 2.0701558454743894, "grad_norm": 0.2521602213382721, "learning_rate": 6.198961030170737e-06, "loss": 0.0366, "step": 40780 }, { "epoch": 2.070409665465252, "grad_norm": 0.22092673182487488, "learning_rate": 6.19726889689832e-06, "loss": 0.0309, "step": 40785 }, { "epoch": 2.0706634854561146, "grad_norm": 0.33411675691604614, "learning_rate": 6.195576763625903e-06, "loss": 0.039, "step": 40790 }, { "epoch": 2.070917305446977, "grad_norm": 0.22911129891872406, "learning_rate": 6.193884630353487e-06, "loss": 0.028, "step": 40795 }, { "epoch": 2.0711711254378393, "grad_norm": 4.144477844238281, "learning_rate": 6.192192497081071e-06, "loss": 0.0308, "step": 40800 }, { "epoch": 2.071424945428702, "grad_norm": 0.24567130208015442, "learning_rate": 6.190500363808654e-06, "loss": 0.0316, "step": 40805 }, { "epoch": 2.0716787654195645, "grad_norm": 0.3412927985191345, "learning_rate": 6.1888082305362374e-06, "loss": 0.0327, "step": 40810 }, { "epoch": 2.071932585410427, "grad_norm": 0.2085569053888321, "learning_rate": 6.187116097263821e-06, "loss": 0.0341, "step": 40815 }, { "epoch": 2.0721864054012893, "grad_norm": 0.2844689190387726, "learning_rate": 6.1854239639914045e-06, "loss": 0.0332, "step": 40820 }, { "epoch": 2.072440225392152, "grad_norm": 0.431959331035614, "learning_rate": 6.183731830718987e-06, "loss": 0.033, "step": 40825 }, { "epoch": 2.0726940453830145, "grad_norm": 0.2555539011955261, "learning_rate": 6.182039697446572e-06, "loss": 0.0349, "step": 40830 }, { "epoch": 2.0729478653738767, "grad_norm": 0.35910919308662415, "learning_rate": 6.180347564174155e-06, "loss": 0.0355, "step": 40835 }, { "epoch": 2.0732016853647393, "grad_norm": 0.34894514083862305, "learning_rate": 6.178655430901739e-06, "loss": 0.0316, "step": 40840 }, { "epoch": 2.073455505355602, "grad_norm": 0.25370797514915466, "learning_rate": 6.176963297629321e-06, "loss": 0.0401, "step": 40845 }, { "epoch": 2.0737093253464645, "grad_norm": 0.3521358072757721, "learning_rate": 6.175271164356905e-06, "loss": 0.0334, "step": 40850 }, { "epoch": 2.0739631453373266, "grad_norm": 0.3908250331878662, "learning_rate": 6.173579031084489e-06, "loss": 0.0355, "step": 40855 }, { "epoch": 2.074216965328189, "grad_norm": 0.31335461139678955, "learning_rate": 6.171886897812073e-06, "loss": 0.035, "step": 40860 }, { "epoch": 2.074470785319052, "grad_norm": 0.404501736164093, "learning_rate": 6.170194764539656e-06, "loss": 0.0357, "step": 40865 }, { "epoch": 2.074724605309914, "grad_norm": 0.39813360571861267, "learning_rate": 6.168502631267239e-06, "loss": 0.0325, "step": 40870 }, { "epoch": 2.0749784253007766, "grad_norm": 0.3120538592338562, "learning_rate": 6.166810497994823e-06, "loss": 0.0332, "step": 40875 }, { "epoch": 2.075232245291639, "grad_norm": 0.24379944801330566, "learning_rate": 6.165118364722405e-06, "loss": 0.0328, "step": 40880 }, { "epoch": 2.075486065282502, "grad_norm": 0.3763482868671417, "learning_rate": 6.163426231449989e-06, "loss": 0.0434, "step": 40885 }, { "epoch": 2.075739885273364, "grad_norm": 0.27463313937187195, "learning_rate": 6.161734098177573e-06, "loss": 0.0314, "step": 40890 }, { "epoch": 2.0759937052642266, "grad_norm": 0.3216022849082947, "learning_rate": 6.160041964905157e-06, "loss": 0.0356, "step": 40895 }, { "epoch": 2.076247525255089, "grad_norm": 0.4241223633289337, "learning_rate": 6.1583498316327395e-06, "loss": 0.0361, "step": 40900 }, { "epoch": 2.0765013452459518, "grad_norm": 0.4173457622528076, "learning_rate": 6.156657698360323e-06, "loss": 0.0287, "step": 40905 }, { "epoch": 2.076755165236814, "grad_norm": 0.4344845712184906, "learning_rate": 6.154965565087907e-06, "loss": 0.0383, "step": 40910 }, { "epoch": 2.0770089852276765, "grad_norm": 0.38608500361442566, "learning_rate": 6.153273431815491e-06, "loss": 0.0305, "step": 40915 }, { "epoch": 2.077262805218539, "grad_norm": 0.37664130330085754, "learning_rate": 6.151581298543074e-06, "loss": 0.0358, "step": 40920 }, { "epoch": 2.0775166252094013, "grad_norm": 0.36590301990509033, "learning_rate": 6.149889165270657e-06, "loss": 0.0372, "step": 40925 }, { "epoch": 2.077770445200264, "grad_norm": 0.2959653437137604, "learning_rate": 6.148197031998241e-06, "loss": 0.0309, "step": 40930 }, { "epoch": 2.0780242651911265, "grad_norm": 0.20683987438678741, "learning_rate": 6.146504898725824e-06, "loss": 0.0359, "step": 40935 }, { "epoch": 2.078278085181989, "grad_norm": 0.4567297101020813, "learning_rate": 6.144812765453407e-06, "loss": 0.0349, "step": 40940 }, { "epoch": 2.0785319051728512, "grad_norm": 0.4936054050922394, "learning_rate": 6.1431206321809914e-06, "loss": 0.0291, "step": 40945 }, { "epoch": 2.078785725163714, "grad_norm": 0.45569735765457153, "learning_rate": 6.141428498908575e-06, "loss": 0.0383, "step": 40950 }, { "epoch": 2.0790395451545765, "grad_norm": 0.26285237073898315, "learning_rate": 6.139736365636158e-06, "loss": 0.0269, "step": 40955 }, { "epoch": 2.079293365145439, "grad_norm": 0.4176620841026306, "learning_rate": 6.138044232363741e-06, "loss": 0.0335, "step": 40960 }, { "epoch": 2.079547185136301, "grad_norm": 0.3247475028038025, "learning_rate": 6.136352099091325e-06, "loss": 0.0369, "step": 40965 }, { "epoch": 2.079801005127164, "grad_norm": 0.37378448247909546, "learning_rate": 6.134659965818909e-06, "loss": 0.0439, "step": 40970 }, { "epoch": 2.0800548251180264, "grad_norm": 0.35308516025543213, "learning_rate": 6.132967832546491e-06, "loss": 0.0385, "step": 40975 }, { "epoch": 2.0803086451088886, "grad_norm": 0.3680042624473572, "learning_rate": 6.131275699274075e-06, "loss": 0.0321, "step": 40980 }, { "epoch": 2.080562465099751, "grad_norm": 0.27900105714797974, "learning_rate": 6.129583566001659e-06, "loss": 0.0325, "step": 40985 }, { "epoch": 2.080816285090614, "grad_norm": 0.3695995509624481, "learning_rate": 6.1278914327292425e-06, "loss": 0.0301, "step": 40990 }, { "epoch": 2.0810701050814764, "grad_norm": 0.5086187124252319, "learning_rate": 6.126199299456825e-06, "loss": 0.0364, "step": 40995 }, { "epoch": 2.0813239250723385, "grad_norm": 0.38260045647621155, "learning_rate": 6.124507166184409e-06, "loss": 0.0364, "step": 41000 }, { "epoch": 2.081577745063201, "grad_norm": 0.3308844566345215, "learning_rate": 6.122815032911993e-06, "loss": 0.028, "step": 41005 }, { "epoch": 2.0818315650540637, "grad_norm": 1.5857940912246704, "learning_rate": 6.121122899639577e-06, "loss": 0.0435, "step": 41010 }, { "epoch": 2.0820853850449264, "grad_norm": 0.3290964365005493, "learning_rate": 6.119430766367159e-06, "loss": 0.03, "step": 41015 }, { "epoch": 2.0823392050357885, "grad_norm": 0.9360296130180359, "learning_rate": 6.117738633094743e-06, "loss": 0.0372, "step": 41020 }, { "epoch": 2.082593025026651, "grad_norm": 0.435088574886322, "learning_rate": 6.1160464998223265e-06, "loss": 0.0323, "step": 41025 }, { "epoch": 2.0828468450175137, "grad_norm": 0.307736873626709, "learning_rate": 6.114354366549909e-06, "loss": 0.0336, "step": 41030 }, { "epoch": 2.083100665008376, "grad_norm": 0.34247100353240967, "learning_rate": 6.1126622332774936e-06, "loss": 0.0398, "step": 41035 }, { "epoch": 2.0833544849992385, "grad_norm": 0.2559192478656769, "learning_rate": 6.110970100005077e-06, "loss": 0.0318, "step": 41040 }, { "epoch": 2.083608304990101, "grad_norm": 0.42116183042526245, "learning_rate": 6.109277966732661e-06, "loss": 0.0294, "step": 41045 }, { "epoch": 2.0838621249809637, "grad_norm": 0.36998796463012695, "learning_rate": 6.107585833460243e-06, "loss": 0.0314, "step": 41050 }, { "epoch": 2.084115944971826, "grad_norm": 0.3762056231498718, "learning_rate": 6.105893700187827e-06, "loss": 0.0422, "step": 41055 }, { "epoch": 2.0843697649626884, "grad_norm": 0.24426433444023132, "learning_rate": 6.104201566915411e-06, "loss": 0.0381, "step": 41060 }, { "epoch": 2.084623584953551, "grad_norm": 0.21887318789958954, "learning_rate": 6.102509433642995e-06, "loss": 0.0313, "step": 41065 }, { "epoch": 2.0848774049444136, "grad_norm": 0.3165340721607208, "learning_rate": 6.1008173003705775e-06, "loss": 0.0316, "step": 41070 }, { "epoch": 2.085131224935276, "grad_norm": 0.4518713355064392, "learning_rate": 6.099125167098161e-06, "loss": 0.0359, "step": 41075 }, { "epoch": 2.0853850449261384, "grad_norm": 0.3209543824195862, "learning_rate": 6.097433033825745e-06, "loss": 0.0383, "step": 41080 }, { "epoch": 2.085638864917001, "grad_norm": 0.2539142668247223, "learning_rate": 6.095740900553328e-06, "loss": 0.0288, "step": 41085 }, { "epoch": 2.085892684907863, "grad_norm": 0.327890008687973, "learning_rate": 6.094048767280911e-06, "loss": 0.0335, "step": 41090 }, { "epoch": 2.0861465048987258, "grad_norm": 0.40916168689727783, "learning_rate": 6.092356634008495e-06, "loss": 0.032, "step": 41095 }, { "epoch": 2.0864003248895884, "grad_norm": 0.5362857580184937, "learning_rate": 6.090664500736079e-06, "loss": 0.0312, "step": 41100 }, { "epoch": 2.086654144880451, "grad_norm": 0.20822367072105408, "learning_rate": 6.0889723674636615e-06, "loss": 0.0347, "step": 41105 }, { "epoch": 2.086907964871313, "grad_norm": 0.25241541862487793, "learning_rate": 6.087280234191245e-06, "loss": 0.0347, "step": 41110 }, { "epoch": 2.0871617848621757, "grad_norm": 0.3435940742492676, "learning_rate": 6.0855881009188286e-06, "loss": 0.0375, "step": 41115 }, { "epoch": 2.0874156048530383, "grad_norm": 0.37276408076286316, "learning_rate": 6.083895967646413e-06, "loss": 0.0413, "step": 41120 }, { "epoch": 2.0876694248439005, "grad_norm": 0.30192676186561584, "learning_rate": 6.082203834373996e-06, "loss": 0.0288, "step": 41125 }, { "epoch": 2.087923244834763, "grad_norm": 0.304162859916687, "learning_rate": 6.080511701101579e-06, "loss": 0.0346, "step": 41130 }, { "epoch": 2.0881770648256257, "grad_norm": 0.3105781078338623, "learning_rate": 6.078819567829163e-06, "loss": 0.0358, "step": 41135 }, { "epoch": 2.0884308848164883, "grad_norm": 0.29993224143981934, "learning_rate": 6.077127434556746e-06, "loss": 0.0362, "step": 41140 }, { "epoch": 2.0886847048073505, "grad_norm": 0.3587764501571655, "learning_rate": 6.075435301284329e-06, "loss": 0.0332, "step": 41145 }, { "epoch": 2.088938524798213, "grad_norm": 0.40033718943595886, "learning_rate": 6.073743168011913e-06, "loss": 0.0415, "step": 41150 }, { "epoch": 2.0891923447890757, "grad_norm": 0.26234421133995056, "learning_rate": 6.072051034739497e-06, "loss": 0.0298, "step": 41155 }, { "epoch": 2.0894461647799383, "grad_norm": 0.6442959904670715, "learning_rate": 6.0703589014670805e-06, "loss": 0.0369, "step": 41160 }, { "epoch": 2.0896999847708004, "grad_norm": 3.047675848007202, "learning_rate": 6.068666768194663e-06, "loss": 0.0298, "step": 41165 }, { "epoch": 2.089953804761663, "grad_norm": 0.3974636197090149, "learning_rate": 6.066974634922247e-06, "loss": 0.0372, "step": 41170 }, { "epoch": 2.0902076247525256, "grad_norm": 0.3301776945590973, "learning_rate": 6.06528250164983e-06, "loss": 0.0358, "step": 41175 }, { "epoch": 2.090461444743388, "grad_norm": 0.4375455975532532, "learning_rate": 6.063590368377415e-06, "loss": 0.0282, "step": 41180 }, { "epoch": 2.0907152647342504, "grad_norm": 0.28423193097114563, "learning_rate": 6.061898235104997e-06, "loss": 0.0269, "step": 41185 }, { "epoch": 2.090969084725113, "grad_norm": 0.3169099986553192, "learning_rate": 6.060206101832581e-06, "loss": 0.0336, "step": 41190 }, { "epoch": 2.0912229047159756, "grad_norm": 0.3071502447128296, "learning_rate": 6.0585139685601644e-06, "loss": 0.041, "step": 41195 }, { "epoch": 2.0914767247068378, "grad_norm": 0.29963594675064087, "learning_rate": 6.056821835287747e-06, "loss": 0.0348, "step": 41200 }, { "epoch": 2.0917305446977004, "grad_norm": 0.3469637632369995, "learning_rate": 6.055129702015331e-06, "loss": 0.0343, "step": 41205 }, { "epoch": 2.091984364688563, "grad_norm": 0.6181375980377197, "learning_rate": 6.053437568742915e-06, "loss": 0.041, "step": 41210 }, { "epoch": 2.0922381846794256, "grad_norm": 0.7354362607002258, "learning_rate": 6.051745435470499e-06, "loss": 0.0401, "step": 41215 }, { "epoch": 2.0924920046702877, "grad_norm": 0.3352373242378235, "learning_rate": 6.050053302198081e-06, "loss": 0.0297, "step": 41220 }, { "epoch": 2.0927458246611503, "grad_norm": 0.4805290102958679, "learning_rate": 6.048361168925665e-06, "loss": 0.0429, "step": 41225 }, { "epoch": 2.092999644652013, "grad_norm": 0.47889429330825806, "learning_rate": 6.046669035653248e-06, "loss": 0.0407, "step": 41230 }, { "epoch": 2.093253464642875, "grad_norm": 0.2577539086341858, "learning_rate": 6.044976902380833e-06, "loss": 0.0312, "step": 41235 }, { "epoch": 2.0935072846337377, "grad_norm": 0.29059699177742004, "learning_rate": 6.0432847691084155e-06, "loss": 0.0312, "step": 41240 }, { "epoch": 2.0937611046246003, "grad_norm": 0.23989014327526093, "learning_rate": 6.041592635835999e-06, "loss": 0.0355, "step": 41245 }, { "epoch": 2.094014924615463, "grad_norm": 0.41318488121032715, "learning_rate": 6.039900502563583e-06, "loss": 0.0325, "step": 41250 }, { "epoch": 2.094268744606325, "grad_norm": 0.37093064188957214, "learning_rate": 6.038208369291166e-06, "loss": 0.0387, "step": 41255 }, { "epoch": 2.0945225645971877, "grad_norm": 0.28160449862480164, "learning_rate": 6.036516236018749e-06, "loss": 0.0339, "step": 41260 }, { "epoch": 2.0947763845880503, "grad_norm": 0.25263282656669617, "learning_rate": 6.034824102746333e-06, "loss": 0.029, "step": 41265 }, { "epoch": 2.0950302045789124, "grad_norm": 0.3500441610813141, "learning_rate": 6.033131969473917e-06, "loss": 0.0303, "step": 41270 }, { "epoch": 2.095284024569775, "grad_norm": 0.25448429584503174, "learning_rate": 6.0314398362014995e-06, "loss": 0.0351, "step": 41275 }, { "epoch": 2.0955378445606376, "grad_norm": 0.3285086154937744, "learning_rate": 6.029747702929083e-06, "loss": 0.0357, "step": 41280 }, { "epoch": 2.0957916645515002, "grad_norm": 0.20502544939517975, "learning_rate": 6.0280555696566665e-06, "loss": 0.0267, "step": 41285 }, { "epoch": 2.0960454845423624, "grad_norm": 0.30411866307258606, "learning_rate": 6.02636343638425e-06, "loss": 0.0342, "step": 41290 }, { "epoch": 2.096299304533225, "grad_norm": 0.3867494761943817, "learning_rate": 6.024671303111833e-06, "loss": 0.0381, "step": 41295 }, { "epoch": 2.0965531245240876, "grad_norm": 0.3713706433773041, "learning_rate": 6.022979169839417e-06, "loss": 0.032, "step": 41300 }, { "epoch": 2.09680694451495, "grad_norm": 0.3718228042125702, "learning_rate": 6.021287036567001e-06, "loss": 0.0355, "step": 41305 }, { "epoch": 2.0970607645058124, "grad_norm": 0.34682971239089966, "learning_rate": 6.019594903294584e-06, "loss": 0.0349, "step": 41310 }, { "epoch": 2.097314584496675, "grad_norm": 0.4789373278617859, "learning_rate": 6.017902770022167e-06, "loss": 0.0343, "step": 41315 }, { "epoch": 2.0975684044875376, "grad_norm": 0.3536965548992157, "learning_rate": 6.0162106367497505e-06, "loss": 0.0354, "step": 41320 }, { "epoch": 2.0978222244783997, "grad_norm": 2.8167803287506104, "learning_rate": 6.014518503477335e-06, "loss": 0.0352, "step": 41325 }, { "epoch": 2.0980760444692623, "grad_norm": 0.3835851550102234, "learning_rate": 6.0128263702049184e-06, "loss": 0.0314, "step": 41330 }, { "epoch": 2.098329864460125, "grad_norm": 0.5738430023193359, "learning_rate": 6.011134236932501e-06, "loss": 0.0293, "step": 41335 }, { "epoch": 2.0985836844509875, "grad_norm": 0.37173283100128174, "learning_rate": 6.009442103660085e-06, "loss": 0.0275, "step": 41340 }, { "epoch": 2.0988375044418497, "grad_norm": 0.4102475643157959, "learning_rate": 6.007749970387668e-06, "loss": 0.0387, "step": 41345 }, { "epoch": 2.0990913244327123, "grad_norm": 0.24621905386447906, "learning_rate": 6.006057837115251e-06, "loss": 0.0317, "step": 41350 }, { "epoch": 2.099345144423575, "grad_norm": 0.35181790590286255, "learning_rate": 6.004365703842835e-06, "loss": 0.0308, "step": 41355 }, { "epoch": 2.0995989644144375, "grad_norm": 0.5117323398590088, "learning_rate": 6.002673570570419e-06, "loss": 0.0285, "step": 41360 }, { "epoch": 2.0998527844052997, "grad_norm": 0.27516618371009827, "learning_rate": 6.000981437298002e-06, "loss": 0.0259, "step": 41365 }, { "epoch": 2.1001066043961623, "grad_norm": 0.369096577167511, "learning_rate": 5.999289304025585e-06, "loss": 0.0331, "step": 41370 }, { "epoch": 2.100360424387025, "grad_norm": 0.3615378737449646, "learning_rate": 5.997597170753169e-06, "loss": 0.0345, "step": 41375 }, { "epoch": 2.100614244377887, "grad_norm": 0.24888181686401367, "learning_rate": 5.995905037480752e-06, "loss": 0.0267, "step": 41380 }, { "epoch": 2.1008680643687496, "grad_norm": 0.5934954881668091, "learning_rate": 5.994212904208337e-06, "loss": 0.0307, "step": 41385 }, { "epoch": 2.1011218843596122, "grad_norm": 0.3345963656902313, "learning_rate": 5.992520770935919e-06, "loss": 0.0291, "step": 41390 }, { "epoch": 2.101375704350475, "grad_norm": 0.25391513109207153, "learning_rate": 5.990828637663503e-06, "loss": 0.0349, "step": 41395 }, { "epoch": 2.101629524341337, "grad_norm": 1.7726422548294067, "learning_rate": 5.989136504391086e-06, "loss": 0.0315, "step": 41400 }, { "epoch": 2.1018833443321996, "grad_norm": 0.37963372468948364, "learning_rate": 5.98744437111867e-06, "loss": 0.0361, "step": 41405 }, { "epoch": 2.102137164323062, "grad_norm": 0.2904397249221802, "learning_rate": 5.985752237846253e-06, "loss": 0.0329, "step": 41410 }, { "epoch": 2.1023909843139243, "grad_norm": 0.2872005105018616, "learning_rate": 5.984060104573837e-06, "loss": 0.0336, "step": 41415 }, { "epoch": 2.102644804304787, "grad_norm": 0.28669625520706177, "learning_rate": 5.9823679713014206e-06, "loss": 0.0311, "step": 41420 }, { "epoch": 2.1028986242956496, "grad_norm": 0.29623928666114807, "learning_rate": 5.980675838029004e-06, "loss": 0.0372, "step": 41425 }, { "epoch": 2.103152444286512, "grad_norm": 0.27423080801963806, "learning_rate": 5.978983704756587e-06, "loss": 0.0305, "step": 41430 }, { "epoch": 2.1034062642773743, "grad_norm": 0.32243502140045166, "learning_rate": 5.97729157148417e-06, "loss": 0.0364, "step": 41435 }, { "epoch": 2.103660084268237, "grad_norm": 0.2616148889064789, "learning_rate": 5.975599438211755e-06, "loss": 0.0269, "step": 41440 }, { "epoch": 2.1039139042590995, "grad_norm": 0.31910741329193115, "learning_rate": 5.9739073049393374e-06, "loss": 0.0337, "step": 41445 }, { "epoch": 2.104167724249962, "grad_norm": 0.21999460458755493, "learning_rate": 5.972215171666921e-06, "loss": 0.0311, "step": 41450 }, { "epoch": 2.1044215442408243, "grad_norm": 0.2903193533420563, "learning_rate": 5.9705230383945045e-06, "loss": 0.0343, "step": 41455 }, { "epoch": 2.104675364231687, "grad_norm": 0.3697342574596405, "learning_rate": 5.968830905122088e-06, "loss": 0.0359, "step": 41460 }, { "epoch": 2.1049291842225495, "grad_norm": 0.7846786975860596, "learning_rate": 5.967138771849671e-06, "loss": 0.0352, "step": 41465 }, { "epoch": 2.1051830042134116, "grad_norm": 0.3305487036705017, "learning_rate": 5.965446638577254e-06, "loss": 0.0393, "step": 41470 }, { "epoch": 2.1054368242042742, "grad_norm": 0.7854042053222656, "learning_rate": 5.963754505304839e-06, "loss": 0.0355, "step": 41475 }, { "epoch": 2.105690644195137, "grad_norm": 0.26362791657447815, "learning_rate": 5.962062372032422e-06, "loss": 0.0368, "step": 41480 }, { "epoch": 2.1059444641859995, "grad_norm": 0.348886102437973, "learning_rate": 5.960370238760005e-06, "loss": 0.0334, "step": 41485 }, { "epoch": 2.1061982841768616, "grad_norm": 0.538774311542511, "learning_rate": 5.9586781054875885e-06, "loss": 0.0375, "step": 41490 }, { "epoch": 2.106452104167724, "grad_norm": 0.31403154134750366, "learning_rate": 5.956985972215172e-06, "loss": 0.0318, "step": 41495 }, { "epoch": 2.106705924158587, "grad_norm": 0.3984089493751526, "learning_rate": 5.955293838942756e-06, "loss": 0.0364, "step": 41500 }, { "epoch": 2.1069597441494494, "grad_norm": 0.2857896089553833, "learning_rate": 5.953601705670339e-06, "loss": 0.0301, "step": 41505 }, { "epoch": 2.1072135641403116, "grad_norm": 0.4356675148010254, "learning_rate": 5.951909572397923e-06, "loss": 0.038, "step": 41510 }, { "epoch": 2.107467384131174, "grad_norm": 0.24989667534828186, "learning_rate": 5.950217439125506e-06, "loss": 0.036, "step": 41515 }, { "epoch": 2.107721204122037, "grad_norm": 0.38163310289382935, "learning_rate": 5.948525305853089e-06, "loss": 0.0369, "step": 41520 }, { "epoch": 2.107975024112899, "grad_norm": 0.2924136221408844, "learning_rate": 5.9468331725806724e-06, "loss": 0.0252, "step": 41525 }, { "epoch": 2.1082288441037615, "grad_norm": 0.6070223450660706, "learning_rate": 5.945141039308257e-06, "loss": 0.0415, "step": 41530 }, { "epoch": 2.108482664094624, "grad_norm": 0.3942844569683075, "learning_rate": 5.94344890603584e-06, "loss": 0.0342, "step": 41535 }, { "epoch": 2.1087364840854868, "grad_norm": 0.2800804376602173, "learning_rate": 5.941756772763423e-06, "loss": 0.0367, "step": 41540 }, { "epoch": 2.108990304076349, "grad_norm": 0.49201416969299316, "learning_rate": 5.940064639491007e-06, "loss": 0.0329, "step": 41545 }, { "epoch": 2.1092441240672115, "grad_norm": 0.35287120938301086, "learning_rate": 5.93837250621859e-06, "loss": 0.0311, "step": 41550 }, { "epoch": 2.109497944058074, "grad_norm": 0.5795586705207825, "learning_rate": 5.9366803729461746e-06, "loss": 0.0402, "step": 41555 }, { "epoch": 2.1097517640489363, "grad_norm": 0.3506717085838318, "learning_rate": 5.934988239673756e-06, "loss": 0.036, "step": 41560 }, { "epoch": 2.110005584039799, "grad_norm": 0.46954965591430664, "learning_rate": 5.933296106401341e-06, "loss": 0.0356, "step": 41565 }, { "epoch": 2.1102594040306615, "grad_norm": 0.3808066248893738, "learning_rate": 5.931603973128924e-06, "loss": 0.0353, "step": 41570 }, { "epoch": 2.110513224021524, "grad_norm": 0.3136650025844574, "learning_rate": 5.929911839856508e-06, "loss": 0.035, "step": 41575 }, { "epoch": 2.1107670440123862, "grad_norm": 0.3546306788921356, "learning_rate": 5.928219706584091e-06, "loss": 0.0314, "step": 41580 }, { "epoch": 2.111020864003249, "grad_norm": 0.2899860441684723, "learning_rate": 5.926527573311674e-06, "loss": 0.032, "step": 41585 }, { "epoch": 2.1112746839941114, "grad_norm": 0.3703233003616333, "learning_rate": 5.9248354400392585e-06, "loss": 0.0407, "step": 41590 }, { "epoch": 2.111528503984974, "grad_norm": 0.3205859661102295, "learning_rate": 5.923143306766841e-06, "loss": 0.0343, "step": 41595 }, { "epoch": 2.111782323975836, "grad_norm": 0.32839179039001465, "learning_rate": 5.921451173494425e-06, "loss": 0.0289, "step": 41600 }, { "epoch": 2.112036143966699, "grad_norm": 0.3662111461162567, "learning_rate": 5.919759040222008e-06, "loss": 0.0358, "step": 41605 }, { "epoch": 2.1122899639575614, "grad_norm": 0.2723025977611542, "learning_rate": 5.918066906949592e-06, "loss": 0.029, "step": 41610 }, { "epoch": 2.1125437839484236, "grad_norm": 0.4181530773639679, "learning_rate": 5.9163747736771746e-06, "loss": 0.0378, "step": 41615 }, { "epoch": 2.112797603939286, "grad_norm": 0.33553892374038696, "learning_rate": 5.914682640404759e-06, "loss": 0.0318, "step": 41620 }, { "epoch": 2.1130514239301488, "grad_norm": 0.3390558958053589, "learning_rate": 5.9129905071323425e-06, "loss": 0.0371, "step": 41625 }, { "epoch": 2.1133052439210114, "grad_norm": 0.4047679007053375, "learning_rate": 5.911298373859926e-06, "loss": 0.0403, "step": 41630 }, { "epoch": 2.1135590639118735, "grad_norm": 0.32319176197052, "learning_rate": 5.909606240587509e-06, "loss": 0.0321, "step": 41635 }, { "epoch": 2.113812883902736, "grad_norm": 0.3039274513721466, "learning_rate": 5.907914107315092e-06, "loss": 0.0327, "step": 41640 }, { "epoch": 2.1140667038935987, "grad_norm": 0.31280750036239624, "learning_rate": 5.906221974042677e-06, "loss": 0.0274, "step": 41645 }, { "epoch": 2.1143205238844613, "grad_norm": 0.4254741966724396, "learning_rate": 5.90452984077026e-06, "loss": 0.0325, "step": 41650 }, { "epoch": 2.1145743438753235, "grad_norm": 0.5669242739677429, "learning_rate": 5.902837707497843e-06, "loss": 0.0361, "step": 41655 }, { "epoch": 2.114828163866186, "grad_norm": 0.2737228572368622, "learning_rate": 5.9011455742254265e-06, "loss": 0.035, "step": 41660 }, { "epoch": 2.1150819838570487, "grad_norm": 0.35538411140441895, "learning_rate": 5.89945344095301e-06, "loss": 0.0387, "step": 41665 }, { "epoch": 2.115335803847911, "grad_norm": 0.23223480582237244, "learning_rate": 5.897761307680593e-06, "loss": 0.0377, "step": 41670 }, { "epoch": 2.1155896238387735, "grad_norm": 0.34843796491622925, "learning_rate": 5.896069174408176e-06, "loss": 0.0363, "step": 41675 }, { "epoch": 2.115843443829636, "grad_norm": 0.3045257329940796, "learning_rate": 5.894377041135761e-06, "loss": 0.0285, "step": 41680 }, { "epoch": 2.1160972638204987, "grad_norm": 0.6797808408737183, "learning_rate": 5.892684907863344e-06, "loss": 0.0337, "step": 41685 }, { "epoch": 2.116351083811361, "grad_norm": 0.8594637513160706, "learning_rate": 5.890992774590927e-06, "loss": 0.0361, "step": 41690 }, { "epoch": 2.1166049038022234, "grad_norm": 0.6106091141700745, "learning_rate": 5.88930064131851e-06, "loss": 0.0309, "step": 41695 }, { "epoch": 2.116858723793086, "grad_norm": 0.3803764879703522, "learning_rate": 5.887608508046094e-06, "loss": 0.0416, "step": 41700 }, { "epoch": 2.117112543783948, "grad_norm": 0.29211854934692383, "learning_rate": 5.885916374773678e-06, "loss": 0.0315, "step": 41705 }, { "epoch": 2.117366363774811, "grad_norm": 0.3258429765701294, "learning_rate": 5.884224241501261e-06, "loss": 0.0343, "step": 41710 }, { "epoch": 2.1176201837656734, "grad_norm": 0.35452401638031006, "learning_rate": 5.882532108228845e-06, "loss": 0.0339, "step": 41715 }, { "epoch": 2.117874003756536, "grad_norm": 0.3310200572013855, "learning_rate": 5.880839974956428e-06, "loss": 0.0315, "step": 41720 }, { "epoch": 2.118127823747398, "grad_norm": 0.43584612011909485, "learning_rate": 5.879147841684012e-06, "loss": 0.0368, "step": 41725 }, { "epoch": 2.1183816437382608, "grad_norm": 0.29797083139419556, "learning_rate": 5.877455708411594e-06, "loss": 0.031, "step": 41730 }, { "epoch": 2.1186354637291234, "grad_norm": 0.3942992389202118, "learning_rate": 5.875763575139179e-06, "loss": 0.0329, "step": 41735 }, { "epoch": 2.118889283719986, "grad_norm": 0.272582083940506, "learning_rate": 5.874071441866762e-06, "loss": 0.0319, "step": 41740 }, { "epoch": 2.119143103710848, "grad_norm": 0.3063197135925293, "learning_rate": 5.872379308594346e-06, "loss": 0.032, "step": 41745 }, { "epoch": 2.1193969237017107, "grad_norm": 0.24674548208713531, "learning_rate": 5.8706871753219286e-06, "loss": 0.0317, "step": 41750 }, { "epoch": 2.1196507436925733, "grad_norm": 0.38824862241744995, "learning_rate": 5.868995042049512e-06, "loss": 0.0332, "step": 41755 }, { "epoch": 2.1199045636834355, "grad_norm": 0.46050703525543213, "learning_rate": 5.867302908777096e-06, "loss": 0.0383, "step": 41760 }, { "epoch": 2.120158383674298, "grad_norm": 0.34193238615989685, "learning_rate": 5.865610775504678e-06, "loss": 0.0346, "step": 41765 }, { "epoch": 2.1204122036651607, "grad_norm": 0.5228917002677917, "learning_rate": 5.863918642232263e-06, "loss": 0.0437, "step": 41770 }, { "epoch": 2.1206660236560233, "grad_norm": 0.33700859546661377, "learning_rate": 5.862226508959846e-06, "loss": 0.0338, "step": 41775 }, { "epoch": 2.1209198436468855, "grad_norm": 1.1780332326889038, "learning_rate": 5.86053437568743e-06, "loss": 0.0357, "step": 41780 }, { "epoch": 2.121173663637748, "grad_norm": 0.34263256192207336, "learning_rate": 5.8588422424150125e-06, "loss": 0.0346, "step": 41785 }, { "epoch": 2.1214274836286107, "grad_norm": 0.35251888632774353, "learning_rate": 5.857150109142596e-06, "loss": 0.039, "step": 41790 }, { "epoch": 2.1216813036194733, "grad_norm": 0.5435646772384644, "learning_rate": 5.8554579758701805e-06, "loss": 0.0302, "step": 41795 }, { "epoch": 2.1219351236103354, "grad_norm": 0.2682788074016571, "learning_rate": 5.853765842597764e-06, "loss": 0.0342, "step": 41800 }, { "epoch": 2.122188943601198, "grad_norm": 0.33146414160728455, "learning_rate": 5.852073709325347e-06, "loss": 0.0371, "step": 41805 }, { "epoch": 2.1224427635920606, "grad_norm": 0.41672345995903015, "learning_rate": 5.85038157605293e-06, "loss": 0.0298, "step": 41810 }, { "epoch": 2.122696583582923, "grad_norm": 0.4548164904117584, "learning_rate": 5.848689442780514e-06, "loss": 0.0322, "step": 41815 }, { "epoch": 2.1229504035737854, "grad_norm": 0.2541426122188568, "learning_rate": 5.846997309508098e-06, "loss": 0.0278, "step": 41820 }, { "epoch": 2.123204223564648, "grad_norm": 0.313416451215744, "learning_rate": 5.845305176235681e-06, "loss": 0.0288, "step": 41825 }, { "epoch": 2.1234580435555106, "grad_norm": 1.0031988620758057, "learning_rate": 5.8436130429632644e-06, "loss": 0.0385, "step": 41830 }, { "epoch": 2.1237118635463728, "grad_norm": 0.3969992399215698, "learning_rate": 5.841920909690848e-06, "loss": 0.0382, "step": 41835 }, { "epoch": 2.1239656835372354, "grad_norm": 0.2746165096759796, "learning_rate": 5.840228776418431e-06, "loss": 0.0272, "step": 41840 }, { "epoch": 2.124219503528098, "grad_norm": 0.257400780916214, "learning_rate": 5.838536643146014e-06, "loss": 0.0276, "step": 41845 }, { "epoch": 2.1244733235189606, "grad_norm": 0.2490048110485077, "learning_rate": 5.836844509873599e-06, "loss": 0.0282, "step": 41850 }, { "epoch": 2.1247271435098227, "grad_norm": 0.45560091733932495, "learning_rate": 5.835152376601182e-06, "loss": 0.0403, "step": 41855 }, { "epoch": 2.1249809635006853, "grad_norm": 0.6095450520515442, "learning_rate": 5.833460243328765e-06, "loss": 0.0305, "step": 41860 }, { "epoch": 2.125234783491548, "grad_norm": 0.3171636164188385, "learning_rate": 5.831768110056348e-06, "loss": 0.0452, "step": 41865 }, { "epoch": 2.12548860348241, "grad_norm": 0.38941559195518494, "learning_rate": 5.830075976783932e-06, "loss": 0.037, "step": 41870 }, { "epoch": 2.1257424234732727, "grad_norm": 0.2944948077201843, "learning_rate": 5.8283838435115155e-06, "loss": 0.0305, "step": 41875 }, { "epoch": 2.1259962434641353, "grad_norm": 0.3468674123287201, "learning_rate": 5.826691710239098e-06, "loss": 0.0391, "step": 41880 }, { "epoch": 2.126250063454998, "grad_norm": 0.305381178855896, "learning_rate": 5.8249995769666826e-06, "loss": 0.0326, "step": 41885 }, { "epoch": 2.12650388344586, "grad_norm": 0.27479448914527893, "learning_rate": 5.823307443694266e-06, "loss": 0.0348, "step": 41890 }, { "epoch": 2.1267577034367227, "grad_norm": 0.2829623222351074, "learning_rate": 5.82161531042185e-06, "loss": 0.0357, "step": 41895 }, { "epoch": 2.1270115234275853, "grad_norm": 0.4399655759334564, "learning_rate": 5.819923177149432e-06, "loss": 0.0402, "step": 41900 }, { "epoch": 2.127265343418448, "grad_norm": 0.31989338994026184, "learning_rate": 5.818231043877016e-06, "loss": 0.0377, "step": 41905 }, { "epoch": 2.12751916340931, "grad_norm": 0.37366315722465515, "learning_rate": 5.8165389106046e-06, "loss": 0.0294, "step": 41910 }, { "epoch": 2.1277729834001726, "grad_norm": 0.2678751051425934, "learning_rate": 5.814846777332183e-06, "loss": 0.0261, "step": 41915 }, { "epoch": 2.1280268033910352, "grad_norm": 0.2653253674507141, "learning_rate": 5.8131546440597665e-06, "loss": 0.0299, "step": 41920 }, { "epoch": 2.1282806233818974, "grad_norm": 0.3034153878688812, "learning_rate": 5.81146251078735e-06, "loss": 0.0295, "step": 41925 }, { "epoch": 2.12853444337276, "grad_norm": 0.31852924823760986, "learning_rate": 5.809770377514934e-06, "loss": 0.0288, "step": 41930 }, { "epoch": 2.1287882633636226, "grad_norm": 0.4647199511528015, "learning_rate": 5.808078244242516e-06, "loss": 0.0321, "step": 41935 }, { "epoch": 2.129042083354485, "grad_norm": 0.22736875712871552, "learning_rate": 5.806386110970101e-06, "loss": 0.0317, "step": 41940 }, { "epoch": 2.1292959033453474, "grad_norm": 0.43213286995887756, "learning_rate": 5.804693977697684e-06, "loss": 0.0463, "step": 41945 }, { "epoch": 2.12954972333621, "grad_norm": 0.26801225543022156, "learning_rate": 5.803001844425268e-06, "loss": 0.0342, "step": 41950 }, { "epoch": 2.1298035433270726, "grad_norm": 0.3421599268913269, "learning_rate": 5.8013097111528505e-06, "loss": 0.0366, "step": 41955 }, { "epoch": 2.1300573633179347, "grad_norm": 0.5038048028945923, "learning_rate": 5.799617577880434e-06, "loss": 0.045, "step": 41960 }, { "epoch": 2.1303111833087973, "grad_norm": 0.3382954001426697, "learning_rate": 5.797925444608018e-06, "loss": 0.0389, "step": 41965 }, { "epoch": 2.13056500329966, "grad_norm": 0.3887106478214264, "learning_rate": 5.796233311335602e-06, "loss": 0.0386, "step": 41970 }, { "epoch": 2.1308188232905225, "grad_norm": 0.3497892916202545, "learning_rate": 5.794541178063185e-06, "loss": 0.0339, "step": 41975 }, { "epoch": 2.1310726432813847, "grad_norm": 0.49205145239830017, "learning_rate": 5.792849044790768e-06, "loss": 0.0336, "step": 41980 }, { "epoch": 2.1313264632722473, "grad_norm": 0.2803315818309784, "learning_rate": 5.791156911518352e-06, "loss": 0.0351, "step": 41985 }, { "epoch": 2.13158028326311, "grad_norm": 0.4156321883201599, "learning_rate": 5.789464778245935e-06, "loss": 0.0338, "step": 41990 }, { "epoch": 2.1318341032539725, "grad_norm": 0.29502180218696594, "learning_rate": 5.787772644973518e-06, "loss": 0.0357, "step": 41995 }, { "epoch": 2.1320879232448346, "grad_norm": 0.3212575614452362, "learning_rate": 5.786080511701102e-06, "loss": 0.0279, "step": 42000 }, { "epoch": 2.1323417432356973, "grad_norm": 0.40423792600631714, "learning_rate": 5.784388378428686e-06, "loss": 0.0396, "step": 42005 }, { "epoch": 2.13259556322656, "grad_norm": 0.31288251280784607, "learning_rate": 5.782696245156269e-06, "loss": 0.0329, "step": 42010 }, { "epoch": 2.132849383217422, "grad_norm": 0.26179739832878113, "learning_rate": 5.781004111883852e-06, "loss": 0.0369, "step": 42015 }, { "epoch": 2.1331032032082846, "grad_norm": 0.3306056559085846, "learning_rate": 5.779311978611436e-06, "loss": 0.0338, "step": 42020 }, { "epoch": 2.133357023199147, "grad_norm": 0.314375102519989, "learning_rate": 5.77761984533902e-06, "loss": 0.0268, "step": 42025 }, { "epoch": 2.13361084319001, "grad_norm": 0.2780863046646118, "learning_rate": 5.775927712066603e-06, "loss": 0.0318, "step": 42030 }, { "epoch": 2.133864663180872, "grad_norm": 0.8828794956207275, "learning_rate": 5.774235578794186e-06, "loss": 0.0348, "step": 42035 }, { "epoch": 2.1341184831717346, "grad_norm": 0.3718717098236084, "learning_rate": 5.77254344552177e-06, "loss": 0.0327, "step": 42040 }, { "epoch": 2.134372303162597, "grad_norm": 0.3083139657974243, "learning_rate": 5.7708513122493535e-06, "loss": 0.0315, "step": 42045 }, { "epoch": 2.13462612315346, "grad_norm": 0.39442887902259827, "learning_rate": 5.769159178976936e-06, "loss": 0.0329, "step": 42050 }, { "epoch": 2.134879943144322, "grad_norm": 0.31543081998825073, "learning_rate": 5.76746704570452e-06, "loss": 0.0283, "step": 42055 }, { "epoch": 2.1351337631351845, "grad_norm": 0.3375324606895447, "learning_rate": 5.765774912432104e-06, "loss": 0.0304, "step": 42060 }, { "epoch": 2.135387583126047, "grad_norm": 0.3673302233219147, "learning_rate": 5.764082779159688e-06, "loss": 0.0307, "step": 42065 }, { "epoch": 2.1356414031169093, "grad_norm": 0.3264317214488983, "learning_rate": 5.76239064588727e-06, "loss": 0.0348, "step": 42070 }, { "epoch": 2.135895223107772, "grad_norm": 0.3236130177974701, "learning_rate": 5.760698512614854e-06, "loss": 0.036, "step": 42075 }, { "epoch": 2.1361490430986345, "grad_norm": 0.46524056792259216, "learning_rate": 5.759006379342437e-06, "loss": 0.0344, "step": 42080 }, { "epoch": 2.136402863089497, "grad_norm": 0.3885677456855774, "learning_rate": 5.75731424607002e-06, "loss": 0.0339, "step": 42085 }, { "epoch": 2.1366566830803593, "grad_norm": 0.3115951418876648, "learning_rate": 5.7556221127976045e-06, "loss": 0.0305, "step": 42090 }, { "epoch": 2.136910503071222, "grad_norm": 0.28554677963256836, "learning_rate": 5.753929979525188e-06, "loss": 0.0333, "step": 42095 }, { "epoch": 2.1371643230620845, "grad_norm": 0.2696676254272461, "learning_rate": 5.752237846252772e-06, "loss": 0.0334, "step": 42100 }, { "epoch": 2.1374181430529466, "grad_norm": 0.2900834083557129, "learning_rate": 5.750545712980354e-06, "loss": 0.038, "step": 42105 }, { "epoch": 2.1376719630438092, "grad_norm": 0.7020983099937439, "learning_rate": 5.748853579707938e-06, "loss": 0.0291, "step": 42110 }, { "epoch": 2.137925783034672, "grad_norm": 0.2436659336090088, "learning_rate": 5.747161446435522e-06, "loss": 0.0376, "step": 42115 }, { "epoch": 2.1381796030255344, "grad_norm": 0.2586725652217865, "learning_rate": 5.745469313163106e-06, "loss": 0.0286, "step": 42120 }, { "epoch": 2.1384334230163966, "grad_norm": 0.414315789937973, "learning_rate": 5.7437771798906885e-06, "loss": 0.03, "step": 42125 }, { "epoch": 2.138687243007259, "grad_norm": 0.3736642897129059, "learning_rate": 5.742085046618272e-06, "loss": 0.0359, "step": 42130 }, { "epoch": 2.138941062998122, "grad_norm": 0.44996002316474915, "learning_rate": 5.7403929133458556e-06, "loss": 0.0366, "step": 42135 }, { "epoch": 2.1391948829889844, "grad_norm": 0.39824178814888, "learning_rate": 5.73870078007344e-06, "loss": 0.0373, "step": 42140 }, { "epoch": 2.1394487029798466, "grad_norm": 0.29749274253845215, "learning_rate": 5.737008646801022e-06, "loss": 0.0359, "step": 42145 }, { "epoch": 2.139702522970709, "grad_norm": 0.2442183643579483, "learning_rate": 5.735316513528606e-06, "loss": 0.0293, "step": 42150 }, { "epoch": 2.139956342961572, "grad_norm": 0.32782718539237976, "learning_rate": 5.73362438025619e-06, "loss": 0.0348, "step": 42155 }, { "epoch": 2.140210162952434, "grad_norm": 0.362077534198761, "learning_rate": 5.7319322469837724e-06, "loss": 0.0315, "step": 42160 }, { "epoch": 2.1404639829432965, "grad_norm": 0.4710308313369751, "learning_rate": 5.730240113711356e-06, "loss": 0.0364, "step": 42165 }, { "epoch": 2.140717802934159, "grad_norm": 0.37128207087516785, "learning_rate": 5.7285479804389395e-06, "loss": 0.0361, "step": 42170 }, { "epoch": 2.1409716229250217, "grad_norm": 0.2547430694103241, "learning_rate": 5.726855847166524e-06, "loss": 0.0332, "step": 42175 }, { "epoch": 2.141225442915884, "grad_norm": 0.2352115660905838, "learning_rate": 5.725163713894107e-06, "loss": 0.0246, "step": 42180 }, { "epoch": 2.1414792629067465, "grad_norm": 0.276828408241272, "learning_rate": 5.72347158062169e-06, "loss": 0.0316, "step": 42185 }, { "epoch": 2.141733082897609, "grad_norm": 0.29261428117752075, "learning_rate": 5.721779447349274e-06, "loss": 0.0292, "step": 42190 }, { "epoch": 2.1419869028884717, "grad_norm": 0.43690693378448486, "learning_rate": 5.720087314076857e-06, "loss": 0.0321, "step": 42195 }, { "epoch": 2.142240722879334, "grad_norm": 0.35671156644821167, "learning_rate": 5.71839518080444e-06, "loss": 0.0402, "step": 42200 }, { "epoch": 2.1424945428701965, "grad_norm": 0.32940271496772766, "learning_rate": 5.716703047532024e-06, "loss": 0.0322, "step": 42205 }, { "epoch": 2.142748362861059, "grad_norm": 0.3026338815689087, "learning_rate": 5.715010914259608e-06, "loss": 0.03, "step": 42210 }, { "epoch": 2.1430021828519212, "grad_norm": 0.29456955194473267, "learning_rate": 5.7133187809871914e-06, "loss": 0.0311, "step": 42215 }, { "epoch": 2.143256002842784, "grad_norm": 0.4180431067943573, "learning_rate": 5.711626647714774e-06, "loss": 0.0335, "step": 42220 }, { "epoch": 2.1435098228336464, "grad_norm": 0.25527551770210266, "learning_rate": 5.709934514442358e-06, "loss": 0.0317, "step": 42225 }, { "epoch": 2.143763642824509, "grad_norm": 0.38631191849708557, "learning_rate": 5.708242381169942e-06, "loss": 0.0316, "step": 42230 }, { "epoch": 2.144017462815371, "grad_norm": 0.4208427667617798, "learning_rate": 5.706550247897526e-06, "loss": 0.0349, "step": 42235 }, { "epoch": 2.144271282806234, "grad_norm": 0.3130198121070862, "learning_rate": 5.704858114625108e-06, "loss": 0.0315, "step": 42240 }, { "epoch": 2.1445251027970964, "grad_norm": 0.3700083792209625, "learning_rate": 5.703165981352692e-06, "loss": 0.0371, "step": 42245 }, { "epoch": 2.1447789227879586, "grad_norm": 0.3512057363986969, "learning_rate": 5.701473848080275e-06, "loss": 0.0342, "step": 42250 }, { "epoch": 2.145032742778821, "grad_norm": 0.34538981318473816, "learning_rate": 5.699781714807858e-06, "loss": 0.0325, "step": 42255 }, { "epoch": 2.1452865627696838, "grad_norm": 0.3043919801712036, "learning_rate": 5.698089581535442e-06, "loss": 0.0276, "step": 42260 }, { "epoch": 2.1455403827605464, "grad_norm": 0.30897659063339233, "learning_rate": 5.696397448263026e-06, "loss": 0.035, "step": 42265 }, { "epoch": 2.1457942027514085, "grad_norm": 0.3704676032066345, "learning_rate": 5.6947053149906096e-06, "loss": 0.0356, "step": 42270 }, { "epoch": 2.146048022742271, "grad_norm": 0.3414614498615265, "learning_rate": 5.693013181718192e-06, "loss": 0.0348, "step": 42275 }, { "epoch": 2.1463018427331337, "grad_norm": 0.3303713798522949, "learning_rate": 5.691321048445776e-06, "loss": 0.0324, "step": 42280 }, { "epoch": 2.1465556627239963, "grad_norm": 0.24423152208328247, "learning_rate": 5.689628915173359e-06, "loss": 0.0315, "step": 42285 }, { "epoch": 2.1468094827148585, "grad_norm": 0.43744367361068726, "learning_rate": 5.687936781900944e-06, "loss": 0.0395, "step": 42290 }, { "epoch": 2.147063302705721, "grad_norm": 0.3257816433906555, "learning_rate": 5.6862446486285264e-06, "loss": 0.0375, "step": 42295 }, { "epoch": 2.1473171226965837, "grad_norm": 0.4677280783653259, "learning_rate": 5.68455251535611e-06, "loss": 0.0381, "step": 42300 }, { "epoch": 2.147570942687446, "grad_norm": 0.5847957134246826, "learning_rate": 5.6828603820836935e-06, "loss": 0.0322, "step": 42305 }, { "epoch": 2.1478247626783085, "grad_norm": 0.34915176033973694, "learning_rate": 5.681168248811277e-06, "loss": 0.0299, "step": 42310 }, { "epoch": 2.148078582669171, "grad_norm": 0.29204973578453064, "learning_rate": 5.67947611553886e-06, "loss": 0.0323, "step": 42315 }, { "epoch": 2.1483324026600337, "grad_norm": 1.1503249406814575, "learning_rate": 5.677783982266444e-06, "loss": 0.0396, "step": 42320 }, { "epoch": 2.148586222650896, "grad_norm": 0.32928937673568726, "learning_rate": 5.676091848994028e-06, "loss": 0.0354, "step": 42325 }, { "epoch": 2.1488400426417584, "grad_norm": 0.28010591864585876, "learning_rate": 5.67439971572161e-06, "loss": 0.0268, "step": 42330 }, { "epoch": 2.149093862632621, "grad_norm": 0.22533346712589264, "learning_rate": 5.672707582449194e-06, "loss": 0.0278, "step": 42335 }, { "epoch": 2.1493476826234836, "grad_norm": 0.27991658449172974, "learning_rate": 5.6710154491767775e-06, "loss": 0.0332, "step": 42340 }, { "epoch": 2.149601502614346, "grad_norm": 0.2624974548816681, "learning_rate": 5.669323315904362e-06, "loss": 0.0388, "step": 42345 }, { "epoch": 2.1498553226052084, "grad_norm": 0.45382624864578247, "learning_rate": 5.667631182631944e-06, "loss": 0.027, "step": 42350 }, { "epoch": 2.150109142596071, "grad_norm": 0.27515923976898193, "learning_rate": 5.665939049359528e-06, "loss": 0.0313, "step": 42355 }, { "epoch": 2.150362962586933, "grad_norm": 0.3498297929763794, "learning_rate": 5.664246916087112e-06, "loss": 0.0286, "step": 42360 }, { "epoch": 2.1506167825777958, "grad_norm": 0.32744863629341125, "learning_rate": 5.662554782814695e-06, "loss": 0.0281, "step": 42365 }, { "epoch": 2.1508706025686584, "grad_norm": 0.4372187852859497, "learning_rate": 5.660862649542278e-06, "loss": 0.0405, "step": 42370 }, { "epoch": 2.151124422559521, "grad_norm": 0.23590414226055145, "learning_rate": 5.6591705162698615e-06, "loss": 0.0291, "step": 42375 }, { "epoch": 2.151378242550383, "grad_norm": 0.33515384793281555, "learning_rate": 5.657478382997446e-06, "loss": 0.0315, "step": 42380 }, { "epoch": 2.1516320625412457, "grad_norm": 0.2991335093975067, "learning_rate": 5.655786249725029e-06, "loss": 0.0414, "step": 42385 }, { "epoch": 2.1518858825321083, "grad_norm": 0.40804606676101685, "learning_rate": 5.654094116452612e-06, "loss": 0.0335, "step": 42390 }, { "epoch": 2.1521397025229705, "grad_norm": 0.42320457100868225, "learning_rate": 5.652401983180196e-06, "loss": 0.0363, "step": 42395 }, { "epoch": 2.152393522513833, "grad_norm": 0.5444016456604004, "learning_rate": 5.650709849907779e-06, "loss": 0.029, "step": 42400 }, { "epoch": 2.1526473425046957, "grad_norm": 0.3651138246059418, "learning_rate": 5.649017716635362e-06, "loss": 0.0399, "step": 42405 }, { "epoch": 2.1529011624955583, "grad_norm": 0.30745598673820496, "learning_rate": 5.647325583362946e-06, "loss": 0.0348, "step": 42410 }, { "epoch": 2.1531549824864205, "grad_norm": 0.2215747833251953, "learning_rate": 5.64563345009053e-06, "loss": 0.0342, "step": 42415 }, { "epoch": 2.153408802477283, "grad_norm": 0.47790786623954773, "learning_rate": 5.643941316818113e-06, "loss": 0.0371, "step": 42420 }, { "epoch": 2.1536626224681457, "grad_norm": 0.4156583547592163, "learning_rate": 5.642249183545696e-06, "loss": 0.0326, "step": 42425 }, { "epoch": 2.1539164424590083, "grad_norm": 0.38761693239212036, "learning_rate": 5.64055705027328e-06, "loss": 0.0358, "step": 42430 }, { "epoch": 2.1541702624498704, "grad_norm": 0.3796803057193756, "learning_rate": 5.638864917000864e-06, "loss": 0.0363, "step": 42435 }, { "epoch": 2.154424082440733, "grad_norm": 0.3898187279701233, "learning_rate": 5.6371727837284475e-06, "loss": 0.0339, "step": 42440 }, { "epoch": 2.1546779024315956, "grad_norm": 0.31213703751564026, "learning_rate": 5.63548065045603e-06, "loss": 0.039, "step": 42445 }, { "epoch": 2.154931722422458, "grad_norm": 0.2264726459980011, "learning_rate": 5.633788517183614e-06, "loss": 0.0302, "step": 42450 }, { "epoch": 2.1551855424133204, "grad_norm": 0.24250595271587372, "learning_rate": 5.632096383911197e-06, "loss": 0.0376, "step": 42455 }, { "epoch": 2.155439362404183, "grad_norm": 0.32829833030700684, "learning_rate": 5.630404250638781e-06, "loss": 0.0352, "step": 42460 }, { "epoch": 2.1556931823950456, "grad_norm": 0.3167964220046997, "learning_rate": 5.6287121173663636e-06, "loss": 0.0368, "step": 42465 }, { "epoch": 2.1559470023859078, "grad_norm": 0.3327024579048157, "learning_rate": 5.627019984093948e-06, "loss": 0.0368, "step": 42470 }, { "epoch": 2.1562008223767704, "grad_norm": 0.337005078792572, "learning_rate": 5.6253278508215315e-06, "loss": 0.0324, "step": 42475 }, { "epoch": 2.156454642367633, "grad_norm": 0.34376224875450134, "learning_rate": 5.623635717549114e-06, "loss": 0.0346, "step": 42480 }, { "epoch": 2.1567084623584956, "grad_norm": 0.41502124071121216, "learning_rate": 5.621943584276698e-06, "loss": 0.0333, "step": 42485 }, { "epoch": 2.1569622823493577, "grad_norm": 0.34838420152664185, "learning_rate": 5.620251451004281e-06, "loss": 0.0334, "step": 42490 }, { "epoch": 2.1572161023402203, "grad_norm": 0.36978450417518616, "learning_rate": 5.618559317731866e-06, "loss": 0.0338, "step": 42495 }, { "epoch": 2.157469922331083, "grad_norm": 0.2665274441242218, "learning_rate": 5.616867184459448e-06, "loss": 0.0275, "step": 42500 }, { "epoch": 2.157723742321945, "grad_norm": 0.389095664024353, "learning_rate": 5.615175051187032e-06, "loss": 0.0333, "step": 42505 }, { "epoch": 2.1579775623128077, "grad_norm": 0.2917749881744385, "learning_rate": 5.6134829179146155e-06, "loss": 0.0332, "step": 42510 }, { "epoch": 2.1582313823036703, "grad_norm": 0.3830176889896393, "learning_rate": 5.611790784642199e-06, "loss": 0.034, "step": 42515 }, { "epoch": 2.158485202294533, "grad_norm": 0.3689527213573456, "learning_rate": 5.610098651369782e-06, "loss": 0.0304, "step": 42520 }, { "epoch": 2.158739022285395, "grad_norm": 0.517031729221344, "learning_rate": 5.608406518097366e-06, "loss": 0.0376, "step": 42525 }, { "epoch": 2.1589928422762577, "grad_norm": 0.3284299671649933, "learning_rate": 5.60671438482495e-06, "loss": 0.039, "step": 42530 }, { "epoch": 2.1592466622671203, "grad_norm": 0.3636420667171478, "learning_rate": 5.605022251552533e-06, "loss": 0.0334, "step": 42535 }, { "epoch": 2.1595004822579824, "grad_norm": 0.5183312892913818, "learning_rate": 5.603330118280116e-06, "loss": 0.0377, "step": 42540 }, { "epoch": 2.159754302248845, "grad_norm": 0.30915188789367676, "learning_rate": 5.6016379850076994e-06, "loss": 0.0271, "step": 42545 }, { "epoch": 2.1600081222397076, "grad_norm": 0.3173023760318756, "learning_rate": 5.599945851735283e-06, "loss": 0.0332, "step": 42550 }, { "epoch": 2.16026194223057, "grad_norm": 0.31991812586784363, "learning_rate": 5.598253718462867e-06, "loss": 0.0442, "step": 42555 }, { "epoch": 2.1605157622214324, "grad_norm": 0.7277874946594238, "learning_rate": 5.59656158519045e-06, "loss": 0.0327, "step": 42560 }, { "epoch": 2.160769582212295, "grad_norm": 0.23353658616542816, "learning_rate": 5.594869451918034e-06, "loss": 0.035, "step": 42565 }, { "epoch": 2.1610234022031576, "grad_norm": 0.5625307559967041, "learning_rate": 5.593177318645617e-06, "loss": 0.0313, "step": 42570 }, { "epoch": 2.16127722219402, "grad_norm": 0.41298919916152954, "learning_rate": 5.5914851853732e-06, "loss": 0.0377, "step": 42575 }, { "epoch": 2.1615310421848823, "grad_norm": 0.26099902391433716, "learning_rate": 5.589793052100783e-06, "loss": 0.0321, "step": 42580 }, { "epoch": 2.161784862175745, "grad_norm": 0.6761044263839722, "learning_rate": 5.588100918828368e-06, "loss": 0.0325, "step": 42585 }, { "epoch": 2.1620386821666076, "grad_norm": 0.3050651550292969, "learning_rate": 5.586408785555951e-06, "loss": 0.0327, "step": 42590 }, { "epoch": 2.16229250215747, "grad_norm": 0.4124506115913391, "learning_rate": 5.584716652283534e-06, "loss": 0.0396, "step": 42595 }, { "epoch": 2.1625463221483323, "grad_norm": 0.5697929263114929, "learning_rate": 5.583024519011118e-06, "loss": 0.0379, "step": 42600 }, { "epoch": 2.162800142139195, "grad_norm": 0.471778929233551, "learning_rate": 5.581332385738701e-06, "loss": 0.0291, "step": 42605 }, { "epoch": 2.1630539621300575, "grad_norm": 0.27507635951042175, "learning_rate": 5.5796402524662855e-06, "loss": 0.0331, "step": 42610 }, { "epoch": 2.1633077821209197, "grad_norm": 0.4065435230731964, "learning_rate": 5.577948119193868e-06, "loss": 0.0319, "step": 42615 }, { "epoch": 2.1635616021117823, "grad_norm": 0.32734885811805725, "learning_rate": 5.576255985921452e-06, "loss": 0.0301, "step": 42620 }, { "epoch": 2.163815422102645, "grad_norm": 0.27780577540397644, "learning_rate": 5.574563852649035e-06, "loss": 0.0255, "step": 42625 }, { "epoch": 2.1640692420935075, "grad_norm": 0.37050920724868774, "learning_rate": 5.572871719376619e-06, "loss": 0.0355, "step": 42630 }, { "epoch": 2.1643230620843696, "grad_norm": 0.3847156763076782, "learning_rate": 5.5711795861042015e-06, "loss": 0.0324, "step": 42635 }, { "epoch": 2.1645768820752322, "grad_norm": 0.4115941822528839, "learning_rate": 5.569487452831785e-06, "loss": 0.0343, "step": 42640 }, { "epoch": 2.164830702066095, "grad_norm": 0.3248206377029419, "learning_rate": 5.5677953195593695e-06, "loss": 0.0293, "step": 42645 }, { "epoch": 2.165084522056957, "grad_norm": 0.5444241762161255, "learning_rate": 5.566103186286952e-06, "loss": 0.0392, "step": 42650 }, { "epoch": 2.1653383420478196, "grad_norm": 0.4807383716106415, "learning_rate": 5.564411053014536e-06, "loss": 0.0359, "step": 42655 }, { "epoch": 2.165592162038682, "grad_norm": 0.2330743968486786, "learning_rate": 5.562718919742119e-06, "loss": 0.0249, "step": 42660 }, { "epoch": 2.165845982029545, "grad_norm": 0.29443061351776123, "learning_rate": 5.561026786469703e-06, "loss": 0.0321, "step": 42665 }, { "epoch": 2.166099802020407, "grad_norm": 0.42073678970336914, "learning_rate": 5.5593346531972855e-06, "loss": 0.0345, "step": 42670 }, { "epoch": 2.1663536220112696, "grad_norm": 0.2619229853153229, "learning_rate": 5.55764251992487e-06, "loss": 0.0242, "step": 42675 }, { "epoch": 2.166607442002132, "grad_norm": 0.37656205892562866, "learning_rate": 5.5559503866524534e-06, "loss": 0.0338, "step": 42680 }, { "epoch": 2.1668612619929943, "grad_norm": 0.3815825283527374, "learning_rate": 5.554258253380037e-06, "loss": 0.0316, "step": 42685 }, { "epoch": 2.167115081983857, "grad_norm": 0.24443581700325012, "learning_rate": 5.55256612010762e-06, "loss": 0.0399, "step": 42690 }, { "epoch": 2.1673689019747195, "grad_norm": 0.6075119376182556, "learning_rate": 5.550873986835203e-06, "loss": 0.0304, "step": 42695 }, { "epoch": 2.167622721965582, "grad_norm": 0.4012862741947174, "learning_rate": 5.549181853562788e-06, "loss": 0.0352, "step": 42700 }, { "epoch": 2.1678765419564443, "grad_norm": 0.5233157277107239, "learning_rate": 5.547489720290371e-06, "loss": 0.0303, "step": 42705 }, { "epoch": 2.168130361947307, "grad_norm": 0.23460358381271362, "learning_rate": 5.545797587017954e-06, "loss": 0.0379, "step": 42710 }, { "epoch": 2.1683841819381695, "grad_norm": 0.3687634766101837, "learning_rate": 5.544105453745537e-06, "loss": 0.035, "step": 42715 }, { "epoch": 2.168638001929032, "grad_norm": 0.49674105644226074, "learning_rate": 5.542413320473121e-06, "loss": 0.0305, "step": 42720 }, { "epoch": 2.1688918219198943, "grad_norm": 0.2974553406238556, "learning_rate": 5.540721187200704e-06, "loss": 0.0301, "step": 42725 }, { "epoch": 2.169145641910757, "grad_norm": 0.3256213068962097, "learning_rate": 5.539029053928287e-06, "loss": 0.0383, "step": 42730 }, { "epoch": 2.1693994619016195, "grad_norm": 0.2907634377479553, "learning_rate": 5.537336920655872e-06, "loss": 0.038, "step": 42735 }, { "epoch": 2.169653281892482, "grad_norm": 0.6651555895805359, "learning_rate": 5.535644787383455e-06, "loss": 0.0322, "step": 42740 }, { "epoch": 2.1699071018833442, "grad_norm": 0.3532056212425232, "learning_rate": 5.533952654111038e-06, "loss": 0.0327, "step": 42745 }, { "epoch": 2.170160921874207, "grad_norm": 0.2968956232070923, "learning_rate": 5.532260520838621e-06, "loss": 0.0339, "step": 42750 }, { "epoch": 2.1704147418650694, "grad_norm": 0.40109357237815857, "learning_rate": 5.530568387566205e-06, "loss": 0.0283, "step": 42755 }, { "epoch": 2.1706685618559316, "grad_norm": 0.44607171416282654, "learning_rate": 5.528876254293789e-06, "loss": 0.0346, "step": 42760 }, { "epoch": 2.170922381846794, "grad_norm": 0.4335692524909973, "learning_rate": 5.527184121021372e-06, "loss": 0.0409, "step": 42765 }, { "epoch": 2.171176201837657, "grad_norm": 0.40640366077423096, "learning_rate": 5.5254919877489556e-06, "loss": 0.0264, "step": 42770 }, { "epoch": 2.1714300218285194, "grad_norm": 0.2704562544822693, "learning_rate": 5.523799854476539e-06, "loss": 0.0287, "step": 42775 }, { "epoch": 2.1716838418193816, "grad_norm": 0.2660040855407715, "learning_rate": 5.522107721204123e-06, "loss": 0.0326, "step": 42780 }, { "epoch": 2.171937661810244, "grad_norm": 0.3302189111709595, "learning_rate": 5.520415587931705e-06, "loss": 0.044, "step": 42785 }, { "epoch": 2.1721914818011068, "grad_norm": 0.300412118434906, "learning_rate": 5.51872345465929e-06, "loss": 0.0349, "step": 42790 }, { "epoch": 2.172445301791969, "grad_norm": 0.26021671295166016, "learning_rate": 5.517031321386873e-06, "loss": 0.0375, "step": 42795 }, { "epoch": 2.1726991217828315, "grad_norm": 0.3481993079185486, "learning_rate": 5.515339188114457e-06, "loss": 0.0339, "step": 42800 }, { "epoch": 2.172952941773694, "grad_norm": 0.37204688787460327, "learning_rate": 5.5136470548420395e-06, "loss": 0.0304, "step": 42805 }, { "epoch": 2.1732067617645567, "grad_norm": 0.453274667263031, "learning_rate": 5.511954921569623e-06, "loss": 0.0302, "step": 42810 }, { "epoch": 2.173460581755419, "grad_norm": 0.4201031029224396, "learning_rate": 5.5102627882972075e-06, "loss": 0.0379, "step": 42815 }, { "epoch": 2.1737144017462815, "grad_norm": 0.3628084063529968, "learning_rate": 5.508570655024789e-06, "loss": 0.0356, "step": 42820 }, { "epoch": 2.173968221737144, "grad_norm": 0.4329802691936493, "learning_rate": 5.506878521752374e-06, "loss": 0.0347, "step": 42825 }, { "epoch": 2.1742220417280067, "grad_norm": 0.32375532388687134, "learning_rate": 5.505186388479957e-06, "loss": 0.039, "step": 42830 }, { "epoch": 2.174475861718869, "grad_norm": 0.7113686800003052, "learning_rate": 5.503494255207541e-06, "loss": 0.0349, "step": 42835 }, { "epoch": 2.1747296817097315, "grad_norm": 0.3115473985671997, "learning_rate": 5.5018021219351235e-06, "loss": 0.0399, "step": 42840 }, { "epoch": 2.174983501700594, "grad_norm": 0.7726762890815735, "learning_rate": 5.500109988662707e-06, "loss": 0.0312, "step": 42845 }, { "epoch": 2.1752373216914562, "grad_norm": 0.34752291440963745, "learning_rate": 5.498417855390291e-06, "loss": 0.037, "step": 42850 }, { "epoch": 2.175491141682319, "grad_norm": 0.8168660998344421, "learning_rate": 5.496725722117875e-06, "loss": 0.029, "step": 42855 }, { "epoch": 2.1757449616731814, "grad_norm": 0.27437952160835266, "learning_rate": 5.495033588845458e-06, "loss": 0.0328, "step": 42860 }, { "epoch": 2.175998781664044, "grad_norm": 0.30334383249282837, "learning_rate": 5.493341455573041e-06, "loss": 0.0389, "step": 42865 }, { "epoch": 2.176252601654906, "grad_norm": 0.2583600878715515, "learning_rate": 5.491649322300625e-06, "loss": 0.0318, "step": 42870 }, { "epoch": 2.176506421645769, "grad_norm": 0.42259642481803894, "learning_rate": 5.489957189028209e-06, "loss": 0.0279, "step": 42875 }, { "epoch": 2.1767602416366314, "grad_norm": 1.2351564168930054, "learning_rate": 5.488265055755792e-06, "loss": 0.0346, "step": 42880 }, { "epoch": 2.177014061627494, "grad_norm": 0.36466923356056213, "learning_rate": 5.486572922483375e-06, "loss": 0.0321, "step": 42885 }, { "epoch": 2.177267881618356, "grad_norm": 0.3623839020729065, "learning_rate": 5.484880789210959e-06, "loss": 0.0354, "step": 42890 }, { "epoch": 2.1775217016092188, "grad_norm": 0.3960568308830261, "learning_rate": 5.483188655938542e-06, "loss": 0.0315, "step": 42895 }, { "epoch": 2.1777755216000814, "grad_norm": 0.23207826912403107, "learning_rate": 5.481496522666125e-06, "loss": 0.0332, "step": 42900 }, { "epoch": 2.1780293415909435, "grad_norm": 0.34986627101898193, "learning_rate": 5.4798043893937096e-06, "loss": 0.0365, "step": 42905 }, { "epoch": 2.178283161581806, "grad_norm": 0.2791183888912201, "learning_rate": 5.478112256121293e-06, "loss": 0.0346, "step": 42910 }, { "epoch": 2.1785369815726687, "grad_norm": 0.4135153889656067, "learning_rate": 5.476420122848876e-06, "loss": 0.0387, "step": 42915 }, { "epoch": 2.1787908015635313, "grad_norm": 0.4401041567325592, "learning_rate": 5.474727989576459e-06, "loss": 0.0348, "step": 42920 }, { "epoch": 2.1790446215543935, "grad_norm": 0.25939372181892395, "learning_rate": 5.473035856304043e-06, "loss": 0.0341, "step": 42925 }, { "epoch": 2.179298441545256, "grad_norm": 0.8912258744239807, "learning_rate": 5.471343723031627e-06, "loss": 0.0365, "step": 42930 }, { "epoch": 2.1795522615361187, "grad_norm": 0.40229907631874084, "learning_rate": 5.469651589759209e-06, "loss": 0.0332, "step": 42935 }, { "epoch": 2.179806081526981, "grad_norm": 0.2511097192764282, "learning_rate": 5.4679594564867935e-06, "loss": 0.0314, "step": 42940 }, { "epoch": 2.1800599015178435, "grad_norm": 0.25943753123283386, "learning_rate": 5.466267323214377e-06, "loss": 0.0307, "step": 42945 }, { "epoch": 2.180313721508706, "grad_norm": 0.3757547438144684, "learning_rate": 5.464575189941961e-06, "loss": 0.0378, "step": 42950 }, { "epoch": 2.1805675414995687, "grad_norm": 0.3526191711425781, "learning_rate": 5.462883056669543e-06, "loss": 0.0337, "step": 42955 }, { "epoch": 2.180821361490431, "grad_norm": 0.2390231043100357, "learning_rate": 5.461190923397127e-06, "loss": 0.0336, "step": 42960 }, { "epoch": 2.1810751814812934, "grad_norm": 0.3768102526664734, "learning_rate": 5.459498790124711e-06, "loss": 0.04, "step": 42965 }, { "epoch": 2.181329001472156, "grad_norm": 0.345241516828537, "learning_rate": 5.457806656852294e-06, "loss": 0.0326, "step": 42970 }, { "epoch": 2.1815828214630186, "grad_norm": 0.3643832504749298, "learning_rate": 5.4561145235798775e-06, "loss": 0.0364, "step": 42975 }, { "epoch": 2.181836641453881, "grad_norm": 0.3406466245651245, "learning_rate": 5.454422390307461e-06, "loss": 0.0287, "step": 42980 }, { "epoch": 2.1820904614447434, "grad_norm": 0.46112820506095886, "learning_rate": 5.452730257035045e-06, "loss": 0.0402, "step": 42985 }, { "epoch": 2.182344281435606, "grad_norm": 0.24325571954250336, "learning_rate": 5.451038123762627e-06, "loss": 0.0299, "step": 42990 }, { "epoch": 2.182598101426468, "grad_norm": 0.3688530921936035, "learning_rate": 5.449345990490212e-06, "loss": 0.0279, "step": 42995 }, { "epoch": 2.1828519214173308, "grad_norm": 0.29081302881240845, "learning_rate": 5.447653857217795e-06, "loss": 0.0299, "step": 43000 }, { "epoch": 2.1831057414081934, "grad_norm": 0.45404350757598877, "learning_rate": 5.445961723945379e-06, "loss": 0.0308, "step": 43005 }, { "epoch": 2.183359561399056, "grad_norm": 0.41532981395721436, "learning_rate": 5.4442695906729615e-06, "loss": 0.0366, "step": 43010 }, { "epoch": 2.183613381389918, "grad_norm": 0.38594257831573486, "learning_rate": 5.442577457400545e-06, "loss": 0.0334, "step": 43015 }, { "epoch": 2.1838672013807807, "grad_norm": 0.2794020473957062, "learning_rate": 5.440885324128129e-06, "loss": 0.0337, "step": 43020 }, { "epoch": 2.1841210213716433, "grad_norm": 0.26633644104003906, "learning_rate": 5.439193190855713e-06, "loss": 0.0313, "step": 43025 }, { "epoch": 2.184374841362506, "grad_norm": 0.41068437695503235, "learning_rate": 5.437501057583296e-06, "loss": 0.0329, "step": 43030 }, { "epoch": 2.184628661353368, "grad_norm": 0.4237484931945801, "learning_rate": 5.435808924310879e-06, "loss": 0.0361, "step": 43035 }, { "epoch": 2.1848824813442307, "grad_norm": 0.2985211908817291, "learning_rate": 5.434116791038463e-06, "loss": 0.0355, "step": 43040 }, { "epoch": 2.1851363013350933, "grad_norm": 0.2558479309082031, "learning_rate": 5.432424657766045e-06, "loss": 0.0307, "step": 43045 }, { "epoch": 2.1853901213259554, "grad_norm": 0.2939911484718323, "learning_rate": 5.430732524493629e-06, "loss": 0.0323, "step": 43050 }, { "epoch": 2.185643941316818, "grad_norm": 0.4152364730834961, "learning_rate": 5.429040391221213e-06, "loss": 0.0306, "step": 43055 }, { "epoch": 2.1858977613076807, "grad_norm": 0.3391212224960327, "learning_rate": 5.427348257948797e-06, "loss": 0.0375, "step": 43060 }, { "epoch": 2.1861515812985433, "grad_norm": 0.3118323087692261, "learning_rate": 5.42565612467638e-06, "loss": 0.0341, "step": 43065 }, { "epoch": 2.1864054012894054, "grad_norm": 0.278476744890213, "learning_rate": 5.423963991403963e-06, "loss": 0.0301, "step": 43070 }, { "epoch": 2.186659221280268, "grad_norm": 0.31256920099258423, "learning_rate": 5.422271858131547e-06, "loss": 0.0342, "step": 43075 }, { "epoch": 2.1869130412711306, "grad_norm": 0.27027949690818787, "learning_rate": 5.420579724859131e-06, "loss": 0.0312, "step": 43080 }, { "epoch": 2.187166861261993, "grad_norm": 1.943433403968811, "learning_rate": 5.418887591586714e-06, "loss": 0.0378, "step": 43085 }, { "epoch": 2.1874206812528554, "grad_norm": 0.5797852873802185, "learning_rate": 5.417195458314297e-06, "loss": 0.0346, "step": 43090 }, { "epoch": 2.187674501243718, "grad_norm": 0.39422717690467834, "learning_rate": 5.415503325041881e-06, "loss": 0.0347, "step": 43095 }, { "epoch": 2.1879283212345806, "grad_norm": 0.26226067543029785, "learning_rate": 5.413811191769464e-06, "loss": 0.0319, "step": 43100 }, { "epoch": 2.1881821412254427, "grad_norm": 0.3121758699417114, "learning_rate": 5.412119058497047e-06, "loss": 0.0367, "step": 43105 }, { "epoch": 2.1884359612163053, "grad_norm": 0.36041760444641113, "learning_rate": 5.4104269252246315e-06, "loss": 0.0336, "step": 43110 }, { "epoch": 2.188689781207168, "grad_norm": 0.25645551085472107, "learning_rate": 5.408734791952215e-06, "loss": 0.028, "step": 43115 }, { "epoch": 2.1889436011980306, "grad_norm": 0.30976834893226624, "learning_rate": 5.407042658679799e-06, "loss": 0.0392, "step": 43120 }, { "epoch": 2.1891974211888927, "grad_norm": 0.3137769103050232, "learning_rate": 5.405350525407381e-06, "loss": 0.0323, "step": 43125 }, { "epoch": 2.1894512411797553, "grad_norm": 0.5139391422271729, "learning_rate": 5.403658392134965e-06, "loss": 0.0342, "step": 43130 }, { "epoch": 2.189705061170618, "grad_norm": 0.32713380455970764, "learning_rate": 5.401966258862548e-06, "loss": 0.0334, "step": 43135 }, { "epoch": 2.18995888116148, "grad_norm": 0.36082905530929565, "learning_rate": 5.400274125590131e-06, "loss": 0.0321, "step": 43140 }, { "epoch": 2.1902127011523427, "grad_norm": 0.2427683025598526, "learning_rate": 5.3985819923177155e-06, "loss": 0.0413, "step": 43145 }, { "epoch": 2.1904665211432053, "grad_norm": 0.6607348322868347, "learning_rate": 5.396889859045299e-06, "loss": 0.0409, "step": 43150 }, { "epoch": 2.190720341134068, "grad_norm": 0.4588119387626648, "learning_rate": 5.3951977257728826e-06, "loss": 0.0358, "step": 43155 }, { "epoch": 2.19097416112493, "grad_norm": 0.3139321208000183, "learning_rate": 5.393505592500465e-06, "loss": 0.0299, "step": 43160 }, { "epoch": 2.1912279811157926, "grad_norm": 0.24087227880954742, "learning_rate": 5.391813459228049e-06, "loss": 0.0316, "step": 43165 }, { "epoch": 2.1914818011066552, "grad_norm": 0.338569313287735, "learning_rate": 5.390121325955633e-06, "loss": 0.0268, "step": 43170 }, { "epoch": 2.191735621097518, "grad_norm": 0.3832125961780548, "learning_rate": 5.388429192683217e-06, "loss": 0.0354, "step": 43175 }, { "epoch": 2.19198944108838, "grad_norm": 0.2771073877811432, "learning_rate": 5.3867370594107994e-06, "loss": 0.0356, "step": 43180 }, { "epoch": 2.1922432610792426, "grad_norm": 0.27626606822013855, "learning_rate": 5.385044926138383e-06, "loss": 0.0333, "step": 43185 }, { "epoch": 2.192497081070105, "grad_norm": 0.3390451669692993, "learning_rate": 5.3833527928659665e-06, "loss": 0.0294, "step": 43190 }, { "epoch": 2.1927509010609674, "grad_norm": 0.2988022565841675, "learning_rate": 5.381660659593551e-06, "loss": 0.034, "step": 43195 }, { "epoch": 2.19300472105183, "grad_norm": 0.41277876496315, "learning_rate": 5.379968526321134e-06, "loss": 0.0328, "step": 43200 }, { "epoch": 2.1932585410426926, "grad_norm": 0.3886488080024719, "learning_rate": 5.378276393048717e-06, "loss": 0.0329, "step": 43205 }, { "epoch": 2.193512361033555, "grad_norm": 0.4166080057621002, "learning_rate": 5.376584259776301e-06, "loss": 0.0345, "step": 43210 }, { "epoch": 2.1937661810244173, "grad_norm": 0.311166912317276, "learning_rate": 5.374892126503883e-06, "loss": 0.0259, "step": 43215 }, { "epoch": 2.19402000101528, "grad_norm": 1.2612603902816772, "learning_rate": 5.373199993231467e-06, "loss": 0.0365, "step": 43220 }, { "epoch": 2.1942738210061425, "grad_norm": 0.2629720866680145, "learning_rate": 5.3715078599590505e-06, "loss": 0.0387, "step": 43225 }, { "epoch": 2.1945276409970047, "grad_norm": 0.41876986622810364, "learning_rate": 5.369815726686635e-06, "loss": 0.0417, "step": 43230 }, { "epoch": 2.1947814609878673, "grad_norm": 0.4134247303009033, "learning_rate": 5.3681235934142176e-06, "loss": 0.043, "step": 43235 }, { "epoch": 2.19503528097873, "grad_norm": 0.7171376347541809, "learning_rate": 5.366431460141801e-06, "loss": 0.0327, "step": 43240 }, { "epoch": 2.1952891009695925, "grad_norm": 0.4790825843811035, "learning_rate": 5.364739326869385e-06, "loss": 0.0343, "step": 43245 }, { "epoch": 2.1955429209604547, "grad_norm": 0.47442811727523804, "learning_rate": 5.363047193596968e-06, "loss": 0.032, "step": 43250 }, { "epoch": 2.1957967409513173, "grad_norm": 0.32504549622535706, "learning_rate": 5.361355060324551e-06, "loss": 0.0348, "step": 43255 }, { "epoch": 2.19605056094218, "grad_norm": 0.3139643371105194, "learning_rate": 5.359662927052135e-06, "loss": 0.0274, "step": 43260 }, { "epoch": 2.1963043809330425, "grad_norm": 0.3576580882072449, "learning_rate": 5.357970793779719e-06, "loss": 0.0382, "step": 43265 }, { "epoch": 2.1965582009239046, "grad_norm": 0.3130418658256531, "learning_rate": 5.356278660507302e-06, "loss": 0.0297, "step": 43270 }, { "epoch": 2.1968120209147672, "grad_norm": 0.2839604616165161, "learning_rate": 5.354586527234885e-06, "loss": 0.0261, "step": 43275 }, { "epoch": 2.19706584090563, "grad_norm": 0.5264848470687866, "learning_rate": 5.352894393962469e-06, "loss": 0.0336, "step": 43280 }, { "epoch": 2.197319660896492, "grad_norm": 0.40158921480178833, "learning_rate": 5.351202260690053e-06, "loss": 0.0384, "step": 43285 }, { "epoch": 2.1975734808873546, "grad_norm": 0.35445520281791687, "learning_rate": 5.349510127417636e-06, "loss": 0.0316, "step": 43290 }, { "epoch": 2.197827300878217, "grad_norm": 0.2767607867717743, "learning_rate": 5.347817994145219e-06, "loss": 0.0325, "step": 43295 }, { "epoch": 2.19808112086908, "grad_norm": 0.2724688649177551, "learning_rate": 5.346125860872803e-06, "loss": 0.0289, "step": 43300 }, { "epoch": 2.198334940859942, "grad_norm": 0.3401924967765808, "learning_rate": 5.344433727600386e-06, "loss": 0.0364, "step": 43305 }, { "epoch": 2.1985887608508046, "grad_norm": 0.27552342414855957, "learning_rate": 5.342741594327969e-06, "loss": 0.0298, "step": 43310 }, { "epoch": 2.198842580841667, "grad_norm": 0.31632283329963684, "learning_rate": 5.341049461055553e-06, "loss": 0.0272, "step": 43315 }, { "epoch": 2.1990964008325298, "grad_norm": 0.5958532094955444, "learning_rate": 5.339357327783137e-06, "loss": 0.0328, "step": 43320 }, { "epoch": 2.199350220823392, "grad_norm": 0.29851803183555603, "learning_rate": 5.3376651945107205e-06, "loss": 0.0306, "step": 43325 }, { "epoch": 2.1996040408142545, "grad_norm": 0.30227798223495483, "learning_rate": 5.335973061238303e-06, "loss": 0.0267, "step": 43330 }, { "epoch": 2.199857860805117, "grad_norm": 0.31471794843673706, "learning_rate": 5.334280927965887e-06, "loss": 0.0315, "step": 43335 }, { "epoch": 2.2001116807959793, "grad_norm": 0.3955003619194031, "learning_rate": 5.33258879469347e-06, "loss": 0.0348, "step": 43340 }, { "epoch": 2.200365500786842, "grad_norm": 0.2753443717956543, "learning_rate": 5.330896661421055e-06, "loss": 0.0343, "step": 43345 }, { "epoch": 2.2006193207777045, "grad_norm": 0.282761812210083, "learning_rate": 5.329204528148637e-06, "loss": 0.0376, "step": 43350 }, { "epoch": 2.200873140768567, "grad_norm": 0.37203195691108704, "learning_rate": 5.327512394876221e-06, "loss": 0.0445, "step": 43355 }, { "epoch": 2.2011269607594293, "grad_norm": 0.4049202501773834, "learning_rate": 5.3258202616038045e-06, "loss": 0.033, "step": 43360 }, { "epoch": 2.201380780750292, "grad_norm": 0.4423353374004364, "learning_rate": 5.324128128331388e-06, "loss": 0.0341, "step": 43365 }, { "epoch": 2.2016346007411545, "grad_norm": 0.3491520881652832, "learning_rate": 5.322435995058971e-06, "loss": 0.0329, "step": 43370 }, { "epoch": 2.2018884207320166, "grad_norm": 0.3284947872161865, "learning_rate": 5.320743861786555e-06, "loss": 0.032, "step": 43375 }, { "epoch": 2.2021422407228792, "grad_norm": 0.36112505197525024, "learning_rate": 5.319051728514139e-06, "loss": 0.0288, "step": 43380 }, { "epoch": 2.202396060713742, "grad_norm": 0.3680697977542877, "learning_rate": 5.317359595241721e-06, "loss": 0.0314, "step": 43385 }, { "epoch": 2.2026498807046044, "grad_norm": 0.26703742146492004, "learning_rate": 5.315667461969305e-06, "loss": 0.0287, "step": 43390 }, { "epoch": 2.2029037006954666, "grad_norm": 0.4103862941265106, "learning_rate": 5.3139753286968885e-06, "loss": 0.034, "step": 43395 }, { "epoch": 2.203157520686329, "grad_norm": 0.34333786368370056, "learning_rate": 5.312283195424473e-06, "loss": 0.0294, "step": 43400 }, { "epoch": 2.203411340677192, "grad_norm": 0.5722442269325256, "learning_rate": 5.310591062152055e-06, "loss": 0.0352, "step": 43405 }, { "epoch": 2.2036651606680544, "grad_norm": 0.44682955741882324, "learning_rate": 5.308898928879639e-06, "loss": 0.0376, "step": 43410 }, { "epoch": 2.2039189806589166, "grad_norm": 1.0077766180038452, "learning_rate": 5.307206795607223e-06, "loss": 0.032, "step": 43415 }, { "epoch": 2.204172800649779, "grad_norm": 0.40870773792266846, "learning_rate": 5.305514662334806e-06, "loss": 0.0338, "step": 43420 }, { "epoch": 2.2044266206406418, "grad_norm": 0.2662786543369293, "learning_rate": 5.303822529062389e-06, "loss": 0.0325, "step": 43425 }, { "epoch": 2.2046804406315044, "grad_norm": 0.6228799223899841, "learning_rate": 5.302130395789972e-06, "loss": 0.0335, "step": 43430 }, { "epoch": 2.2049342606223665, "grad_norm": 0.44985294342041016, "learning_rate": 5.300438262517557e-06, "loss": 0.0378, "step": 43435 }, { "epoch": 2.205188080613229, "grad_norm": 0.36874616146087646, "learning_rate": 5.29874612924514e-06, "loss": 0.033, "step": 43440 }, { "epoch": 2.2054419006040917, "grad_norm": 0.8388116359710693, "learning_rate": 5.297053995972723e-06, "loss": 0.0335, "step": 43445 }, { "epoch": 2.205695720594954, "grad_norm": 0.3380875885486603, "learning_rate": 5.295361862700307e-06, "loss": 0.0337, "step": 43450 }, { "epoch": 2.2059495405858165, "grad_norm": 0.27034997940063477, "learning_rate": 5.29366972942789e-06, "loss": 0.0401, "step": 43455 }, { "epoch": 2.206203360576679, "grad_norm": 0.31091082096099854, "learning_rate": 5.291977596155473e-06, "loss": 0.0298, "step": 43460 }, { "epoch": 2.2064571805675417, "grad_norm": 0.33562663197517395, "learning_rate": 5.290285462883057e-06, "loss": 0.0291, "step": 43465 }, { "epoch": 2.206711000558404, "grad_norm": 0.42769908905029297, "learning_rate": 5.288593329610641e-06, "loss": 0.0266, "step": 43470 }, { "epoch": 2.2069648205492665, "grad_norm": 0.28732359409332275, "learning_rate": 5.286901196338224e-06, "loss": 0.0259, "step": 43475 }, { "epoch": 2.207218640540129, "grad_norm": 0.4161680042743683, "learning_rate": 5.285209063065807e-06, "loss": 0.0345, "step": 43480 }, { "epoch": 2.207472460530991, "grad_norm": 0.3423847556114197, "learning_rate": 5.2835169297933906e-06, "loss": 0.0316, "step": 43485 }, { "epoch": 2.207726280521854, "grad_norm": 0.32624608278274536, "learning_rate": 5.281824796520975e-06, "loss": 0.0316, "step": 43490 }, { "epoch": 2.2079801005127164, "grad_norm": 0.43581536412239075, "learning_rate": 5.2801326632485585e-06, "loss": 0.0351, "step": 43495 }, { "epoch": 2.208233920503579, "grad_norm": 0.3912333846092224, "learning_rate": 5.278440529976141e-06, "loss": 0.0382, "step": 43500 }, { "epoch": 2.208487740494441, "grad_norm": 0.260891318321228, "learning_rate": 5.276748396703725e-06, "loss": 0.0283, "step": 43505 }, { "epoch": 2.208741560485304, "grad_norm": 0.42657461762428284, "learning_rate": 5.275056263431308e-06, "loss": 0.0328, "step": 43510 }, { "epoch": 2.2089953804761664, "grad_norm": 0.3546578288078308, "learning_rate": 5.273364130158893e-06, "loss": 0.0335, "step": 43515 }, { "epoch": 2.2092492004670286, "grad_norm": 0.2557455599308014, "learning_rate": 5.2716719968864745e-06, "loss": 0.0332, "step": 43520 }, { "epoch": 2.209503020457891, "grad_norm": 0.3160721957683563, "learning_rate": 5.269979863614059e-06, "loss": 0.0341, "step": 43525 }, { "epoch": 2.2097568404487538, "grad_norm": 0.27429550886154175, "learning_rate": 5.2682877303416425e-06, "loss": 0.0334, "step": 43530 }, { "epoch": 2.2100106604396164, "grad_norm": 0.2348918616771698, "learning_rate": 5.266595597069225e-06, "loss": 0.034, "step": 43535 }, { "epoch": 2.2102644804304785, "grad_norm": 0.19068334996700287, "learning_rate": 5.264903463796809e-06, "loss": 0.032, "step": 43540 }, { "epoch": 2.210518300421341, "grad_norm": 0.3050518035888672, "learning_rate": 5.263211330524392e-06, "loss": 0.0305, "step": 43545 }, { "epoch": 2.2107721204122037, "grad_norm": 0.5854632258415222, "learning_rate": 5.261519197251977e-06, "loss": 0.0424, "step": 43550 }, { "epoch": 2.2110259404030663, "grad_norm": 0.3303602933883667, "learning_rate": 5.259827063979559e-06, "loss": 0.027, "step": 43555 }, { "epoch": 2.2112797603939285, "grad_norm": 0.304310142993927, "learning_rate": 5.258134930707143e-06, "loss": 0.0386, "step": 43560 }, { "epoch": 2.211533580384791, "grad_norm": 0.38438746333122253, "learning_rate": 5.2564427974347264e-06, "loss": 0.0353, "step": 43565 }, { "epoch": 2.2117874003756537, "grad_norm": 0.20120958983898163, "learning_rate": 5.25475066416231e-06, "loss": 0.034, "step": 43570 }, { "epoch": 2.2120412203665163, "grad_norm": 0.2356599122285843, "learning_rate": 5.253058530889893e-06, "loss": 0.0395, "step": 43575 }, { "epoch": 2.2122950403573785, "grad_norm": 0.3216328024864197, "learning_rate": 5.251366397617477e-06, "loss": 0.0285, "step": 43580 }, { "epoch": 2.212548860348241, "grad_norm": 0.737366259098053, "learning_rate": 5.249674264345061e-06, "loss": 0.0331, "step": 43585 }, { "epoch": 2.2128026803391037, "grad_norm": 0.27575215697288513, "learning_rate": 5.247982131072644e-06, "loss": 0.037, "step": 43590 }, { "epoch": 2.213056500329966, "grad_norm": 0.2615464925765991, "learning_rate": 5.246289997800227e-06, "loss": 0.0316, "step": 43595 }, { "epoch": 2.2133103203208284, "grad_norm": 0.2448047250509262, "learning_rate": 5.24459786452781e-06, "loss": 0.0326, "step": 43600 }, { "epoch": 2.213564140311691, "grad_norm": 0.37527599930763245, "learning_rate": 5.242905731255395e-06, "loss": 0.0335, "step": 43605 }, { "epoch": 2.2138179603025536, "grad_norm": 0.5040178298950195, "learning_rate": 5.241213597982977e-06, "loss": 0.0371, "step": 43610 }, { "epoch": 2.214071780293416, "grad_norm": 0.2776201069355011, "learning_rate": 5.239521464710561e-06, "loss": 0.029, "step": 43615 }, { "epoch": 2.2143256002842784, "grad_norm": 0.25941890478134155, "learning_rate": 5.2378293314381446e-06, "loss": 0.0282, "step": 43620 }, { "epoch": 2.214579420275141, "grad_norm": 0.4714052677154541, "learning_rate": 5.236137198165728e-06, "loss": 0.0397, "step": 43625 }, { "epoch": 2.214833240266003, "grad_norm": 0.1847241073846817, "learning_rate": 5.234445064893311e-06, "loss": 0.0312, "step": 43630 }, { "epoch": 2.2150870602568657, "grad_norm": 0.7910279631614685, "learning_rate": 5.232752931620894e-06, "loss": 0.0323, "step": 43635 }, { "epoch": 2.2153408802477284, "grad_norm": 0.3663308322429657, "learning_rate": 5.231060798348479e-06, "loss": 0.0341, "step": 43640 }, { "epoch": 2.215594700238591, "grad_norm": 0.3408203125, "learning_rate": 5.229368665076062e-06, "loss": 0.0352, "step": 43645 }, { "epoch": 2.215848520229453, "grad_norm": 1.7612500190734863, "learning_rate": 5.227676531803645e-06, "loss": 0.0296, "step": 43650 }, { "epoch": 2.2161023402203157, "grad_norm": 0.46181654930114746, "learning_rate": 5.2259843985312285e-06, "loss": 0.0389, "step": 43655 }, { "epoch": 2.2163561602111783, "grad_norm": 0.25822097063064575, "learning_rate": 5.224292265258812e-06, "loss": 0.0345, "step": 43660 }, { "epoch": 2.216609980202041, "grad_norm": 0.5843444466590881, "learning_rate": 5.2226001319863965e-06, "loss": 0.0396, "step": 43665 }, { "epoch": 2.216863800192903, "grad_norm": 0.31277167797088623, "learning_rate": 5.220907998713979e-06, "loss": 0.0311, "step": 43670 }, { "epoch": 2.2171176201837657, "grad_norm": 0.5508875846862793, "learning_rate": 5.219215865441563e-06, "loss": 0.0385, "step": 43675 }, { "epoch": 2.2173714401746283, "grad_norm": 0.31969529390335083, "learning_rate": 5.217523732169146e-06, "loss": 0.0308, "step": 43680 }, { "epoch": 2.2176252601654904, "grad_norm": 0.357572078704834, "learning_rate": 5.21583159889673e-06, "loss": 0.0301, "step": 43685 }, { "epoch": 2.217879080156353, "grad_norm": 0.3687649667263031, "learning_rate": 5.2141394656243125e-06, "loss": 0.0362, "step": 43690 }, { "epoch": 2.2181329001472156, "grad_norm": 0.4379350543022156, "learning_rate": 5.212447332351897e-06, "loss": 0.0313, "step": 43695 }, { "epoch": 2.2183867201380782, "grad_norm": 0.270060271024704, "learning_rate": 5.2107551990794804e-06, "loss": 0.0282, "step": 43700 }, { "epoch": 2.2186405401289404, "grad_norm": 0.3274651765823364, "learning_rate": 5.209063065807063e-06, "loss": 0.0246, "step": 43705 }, { "epoch": 2.218894360119803, "grad_norm": 0.4745948314666748, "learning_rate": 5.207370932534647e-06, "loss": 0.0292, "step": 43710 }, { "epoch": 2.2191481801106656, "grad_norm": 0.3572133481502533, "learning_rate": 5.20567879926223e-06, "loss": 0.0325, "step": 43715 }, { "epoch": 2.219402000101528, "grad_norm": 0.3448382616043091, "learning_rate": 5.203986665989814e-06, "loss": 0.0356, "step": 43720 }, { "epoch": 2.2196558200923904, "grad_norm": 0.2912856638431549, "learning_rate": 5.2022945327173965e-06, "loss": 0.0295, "step": 43725 }, { "epoch": 2.219909640083253, "grad_norm": 0.41613078117370605, "learning_rate": 5.200602399444981e-06, "loss": 0.0326, "step": 43730 }, { "epoch": 2.2201634600741156, "grad_norm": 0.2688710391521454, "learning_rate": 5.198910266172564e-06, "loss": 0.0296, "step": 43735 }, { "epoch": 2.2204172800649777, "grad_norm": 0.40279969573020935, "learning_rate": 5.197218132900148e-06, "loss": 0.0368, "step": 43740 }, { "epoch": 2.2206711000558403, "grad_norm": 0.34636586904525757, "learning_rate": 5.195525999627731e-06, "loss": 0.0299, "step": 43745 }, { "epoch": 2.220924920046703, "grad_norm": 0.40201225876808167, "learning_rate": 5.193833866355314e-06, "loss": 0.0297, "step": 43750 }, { "epoch": 2.2211787400375655, "grad_norm": 0.2993839383125305, "learning_rate": 5.192141733082899e-06, "loss": 0.0272, "step": 43755 }, { "epoch": 2.2214325600284277, "grad_norm": 0.3276756703853607, "learning_rate": 5.190449599810482e-06, "loss": 0.0333, "step": 43760 }, { "epoch": 2.2216863800192903, "grad_norm": 0.22368964552879333, "learning_rate": 5.188757466538065e-06, "loss": 0.0304, "step": 43765 }, { "epoch": 2.221940200010153, "grad_norm": 0.42625653743743896, "learning_rate": 5.187065333265648e-06, "loss": 0.0367, "step": 43770 }, { "epoch": 2.222194020001015, "grad_norm": 0.3062908947467804, "learning_rate": 5.185373199993232e-06, "loss": 0.0266, "step": 43775 }, { "epoch": 2.2224478399918777, "grad_norm": 0.42325690388679504, "learning_rate": 5.183681066720815e-06, "loss": 0.0298, "step": 43780 }, { "epoch": 2.2227016599827403, "grad_norm": 0.2115492969751358, "learning_rate": 5.181988933448399e-06, "loss": 0.0328, "step": 43785 }, { "epoch": 2.222955479973603, "grad_norm": 0.3463805913925171, "learning_rate": 5.1802968001759825e-06, "loss": 0.0396, "step": 43790 }, { "epoch": 2.223209299964465, "grad_norm": 0.5982481837272644, "learning_rate": 5.178604666903566e-06, "loss": 0.0276, "step": 43795 }, { "epoch": 2.2234631199553276, "grad_norm": 0.4598280191421509, "learning_rate": 5.176912533631149e-06, "loss": 0.0321, "step": 43800 }, { "epoch": 2.2237169399461902, "grad_norm": 0.33554238080978394, "learning_rate": 5.175220400358732e-06, "loss": 0.0358, "step": 43805 }, { "epoch": 2.223970759937053, "grad_norm": 1.4233322143554688, "learning_rate": 5.173528267086316e-06, "loss": 0.037, "step": 43810 }, { "epoch": 2.224224579927915, "grad_norm": 0.27777615189552307, "learning_rate": 5.1718361338139e-06, "loss": 0.0287, "step": 43815 }, { "epoch": 2.2244783999187776, "grad_norm": 0.2759939134120941, "learning_rate": 5.170144000541483e-06, "loss": 0.0329, "step": 43820 }, { "epoch": 2.22473221990964, "grad_norm": 0.35867956280708313, "learning_rate": 5.1684518672690665e-06, "loss": 0.0311, "step": 43825 }, { "epoch": 2.2249860399005024, "grad_norm": 0.39028945565223694, "learning_rate": 5.16675973399665e-06, "loss": 0.0382, "step": 43830 }, { "epoch": 2.225239859891365, "grad_norm": 0.1933843493461609, "learning_rate": 5.165067600724234e-06, "loss": 0.0234, "step": 43835 }, { "epoch": 2.2254936798822276, "grad_norm": 0.35192665457725525, "learning_rate": 5.163375467451816e-06, "loss": 0.0352, "step": 43840 }, { "epoch": 2.22574749987309, "grad_norm": 0.3243769109249115, "learning_rate": 5.161683334179401e-06, "loss": 0.037, "step": 43845 }, { "epoch": 2.2260013198639523, "grad_norm": 0.31889811158180237, "learning_rate": 5.159991200906984e-06, "loss": 0.0351, "step": 43850 }, { "epoch": 2.226255139854815, "grad_norm": 0.3885818421840668, "learning_rate": 5.158299067634567e-06, "loss": 0.0349, "step": 43855 }, { "epoch": 2.2265089598456775, "grad_norm": 0.31781822443008423, "learning_rate": 5.1566069343621505e-06, "loss": 0.036, "step": 43860 }, { "epoch": 2.22676277983654, "grad_norm": 0.24584543704986572, "learning_rate": 5.154914801089734e-06, "loss": 0.0322, "step": 43865 }, { "epoch": 2.2270165998274023, "grad_norm": 0.3724859952926636, "learning_rate": 5.153222667817318e-06, "loss": 0.0459, "step": 43870 }, { "epoch": 2.227270419818265, "grad_norm": 0.37444204092025757, "learning_rate": 5.151530534544901e-06, "loss": 0.034, "step": 43875 }, { "epoch": 2.2275242398091275, "grad_norm": 0.3302791714668274, "learning_rate": 5.149838401272485e-06, "loss": 0.0386, "step": 43880 }, { "epoch": 2.2277780597999897, "grad_norm": 0.331866979598999, "learning_rate": 5.148146268000068e-06, "loss": 0.033, "step": 43885 }, { "epoch": 2.2280318797908523, "grad_norm": 0.2684163451194763, "learning_rate": 5.146454134727652e-06, "loss": 0.0304, "step": 43890 }, { "epoch": 2.228285699781715, "grad_norm": 0.30631887912750244, "learning_rate": 5.1447620014552344e-06, "loss": 0.0383, "step": 43895 }, { "epoch": 2.2285395197725775, "grad_norm": 0.3266892433166504, "learning_rate": 5.143069868182818e-06, "loss": 0.0285, "step": 43900 }, { "epoch": 2.2287933397634396, "grad_norm": 0.31804943084716797, "learning_rate": 5.141377734910402e-06, "loss": 0.0307, "step": 43905 }, { "epoch": 2.2290471597543022, "grad_norm": 0.44756922125816345, "learning_rate": 5.139685601637986e-06, "loss": 0.0385, "step": 43910 }, { "epoch": 2.229300979745165, "grad_norm": 0.2774399220943451, "learning_rate": 5.137993468365569e-06, "loss": 0.0275, "step": 43915 }, { "epoch": 2.229554799736027, "grad_norm": 0.29544955492019653, "learning_rate": 5.136301335093152e-06, "loss": 0.0315, "step": 43920 }, { "epoch": 2.2298086197268896, "grad_norm": 0.2739022672176361, "learning_rate": 5.134609201820736e-06, "loss": 0.0295, "step": 43925 }, { "epoch": 2.230062439717752, "grad_norm": 0.3167058229446411, "learning_rate": 5.13291706854832e-06, "loss": 0.0367, "step": 43930 }, { "epoch": 2.230316259708615, "grad_norm": 0.352201908826828, "learning_rate": 5.131224935275903e-06, "loss": 0.0345, "step": 43935 }, { "epoch": 2.230570079699477, "grad_norm": 0.3670605719089508, "learning_rate": 5.129532802003486e-06, "loss": 0.0388, "step": 43940 }, { "epoch": 2.2308238996903396, "grad_norm": 0.38734838366508484, "learning_rate": 5.12784066873107e-06, "loss": 0.0315, "step": 43945 }, { "epoch": 2.231077719681202, "grad_norm": 0.33268794417381287, "learning_rate": 5.126148535458653e-06, "loss": 0.0336, "step": 43950 }, { "epoch": 2.2313315396720648, "grad_norm": 0.42838096618652344, "learning_rate": 5.124456402186236e-06, "loss": 0.028, "step": 43955 }, { "epoch": 2.231585359662927, "grad_norm": 0.4699762761592865, "learning_rate": 5.1227642689138205e-06, "loss": 0.0339, "step": 43960 }, { "epoch": 2.2318391796537895, "grad_norm": 0.6462127566337585, "learning_rate": 5.121072135641404e-06, "loss": 0.0318, "step": 43965 }, { "epoch": 2.232092999644652, "grad_norm": 0.2937588393688202, "learning_rate": 5.119380002368987e-06, "loss": 0.0302, "step": 43970 }, { "epoch": 2.2323468196355143, "grad_norm": 0.2300993949174881, "learning_rate": 5.11768786909657e-06, "loss": 0.0362, "step": 43975 }, { "epoch": 2.232600639626377, "grad_norm": 0.5920124053955078, "learning_rate": 5.115995735824154e-06, "loss": 0.0308, "step": 43980 }, { "epoch": 2.2328544596172395, "grad_norm": 0.23320113122463226, "learning_rate": 5.114303602551738e-06, "loss": 0.0302, "step": 43985 }, { "epoch": 2.233108279608102, "grad_norm": 0.3090137243270874, "learning_rate": 5.11261146927932e-06, "loss": 0.0429, "step": 43990 }, { "epoch": 2.2333620995989643, "grad_norm": 0.44511866569519043, "learning_rate": 5.1109193360069045e-06, "loss": 0.0458, "step": 43995 }, { "epoch": 2.233615919589827, "grad_norm": 0.22914119064807892, "learning_rate": 5.109227202734488e-06, "loss": 0.0425, "step": 44000 }, { "epoch": 2.2338697395806895, "grad_norm": 0.2924869656562805, "learning_rate": 5.1075350694620716e-06, "loss": 0.0371, "step": 44005 }, { "epoch": 2.234123559571552, "grad_norm": 0.7008922100067139, "learning_rate": 5.105842936189654e-06, "loss": 0.0303, "step": 44010 }, { "epoch": 2.2343773795624142, "grad_norm": 0.3576049208641052, "learning_rate": 5.104150802917238e-06, "loss": 0.0336, "step": 44015 }, { "epoch": 2.234631199553277, "grad_norm": 0.37319669127464294, "learning_rate": 5.102458669644822e-06, "loss": 0.0333, "step": 44020 }, { "epoch": 2.2348850195441394, "grad_norm": 0.4137890934944153, "learning_rate": 5.100766536372405e-06, "loss": 0.0373, "step": 44025 }, { "epoch": 2.2351388395350016, "grad_norm": 0.35421955585479736, "learning_rate": 5.0990744030999885e-06, "loss": 0.0333, "step": 44030 }, { "epoch": 2.235392659525864, "grad_norm": 0.2621701955795288, "learning_rate": 5.097382269827572e-06, "loss": 0.0346, "step": 44035 }, { "epoch": 2.235646479516727, "grad_norm": 0.5451110005378723, "learning_rate": 5.0956901365551555e-06, "loss": 0.0326, "step": 44040 }, { "epoch": 2.2359002995075894, "grad_norm": 0.2946808934211731, "learning_rate": 5.093998003282738e-06, "loss": 0.0284, "step": 44045 }, { "epoch": 2.2361541194984516, "grad_norm": 0.37474432587623596, "learning_rate": 5.092305870010323e-06, "loss": 0.0386, "step": 44050 }, { "epoch": 2.236407939489314, "grad_norm": 0.37870481610298157, "learning_rate": 5.090613736737906e-06, "loss": 0.0325, "step": 44055 }, { "epoch": 2.2366617594801768, "grad_norm": 0.29171672463417053, "learning_rate": 5.08892160346549e-06, "loss": 0.0339, "step": 44060 }, { "epoch": 2.236915579471039, "grad_norm": 0.4076695442199707, "learning_rate": 5.087229470193072e-06, "loss": 0.0355, "step": 44065 }, { "epoch": 2.2371693994619015, "grad_norm": 0.30842044949531555, "learning_rate": 5.085537336920656e-06, "loss": 0.0373, "step": 44070 }, { "epoch": 2.237423219452764, "grad_norm": 0.28765350580215454, "learning_rate": 5.08384520364824e-06, "loss": 0.0345, "step": 44075 }, { "epoch": 2.2376770394436267, "grad_norm": 0.28565219044685364, "learning_rate": 5.082153070375824e-06, "loss": 0.0401, "step": 44080 }, { "epoch": 2.237930859434489, "grad_norm": 0.7458963394165039, "learning_rate": 5.080460937103407e-06, "loss": 0.0345, "step": 44085 }, { "epoch": 2.2381846794253515, "grad_norm": 0.4765220284461975, "learning_rate": 5.07876880383099e-06, "loss": 0.0373, "step": 44090 }, { "epoch": 2.238438499416214, "grad_norm": 0.35971006751060486, "learning_rate": 5.077076670558574e-06, "loss": 0.0373, "step": 44095 }, { "epoch": 2.2386923194070767, "grad_norm": 0.37267395853996277, "learning_rate": 5.075384537286156e-06, "loss": 0.0402, "step": 44100 }, { "epoch": 2.238946139397939, "grad_norm": 0.30565184354782104, "learning_rate": 5.07369240401374e-06, "loss": 0.0313, "step": 44105 }, { "epoch": 2.2391999593888015, "grad_norm": 0.4393250346183777, "learning_rate": 5.072000270741324e-06, "loss": 0.0292, "step": 44110 }, { "epoch": 2.239453779379664, "grad_norm": 0.30050981044769287, "learning_rate": 5.070308137468908e-06, "loss": 0.0419, "step": 44115 }, { "epoch": 2.2397075993705267, "grad_norm": 0.43722015619277954, "learning_rate": 5.0686160041964906e-06, "loss": 0.0313, "step": 44120 }, { "epoch": 2.239961419361389, "grad_norm": 0.3736498951911926, "learning_rate": 5.066923870924074e-06, "loss": 0.0342, "step": 44125 }, { "epoch": 2.2402152393522514, "grad_norm": 0.5136289596557617, "learning_rate": 5.065231737651658e-06, "loss": 0.0369, "step": 44130 }, { "epoch": 2.240469059343114, "grad_norm": 0.2988525331020355, "learning_rate": 5.063539604379242e-06, "loss": 0.0309, "step": 44135 }, { "epoch": 2.240722879333976, "grad_norm": 0.24521800875663757, "learning_rate": 5.061847471106825e-06, "loss": 0.0296, "step": 44140 }, { "epoch": 2.240976699324839, "grad_norm": 0.5935750007629395, "learning_rate": 5.060155337834408e-06, "loss": 0.038, "step": 44145 }, { "epoch": 2.2412305193157014, "grad_norm": 0.5300387144088745, "learning_rate": 5.058463204561992e-06, "loss": 0.0314, "step": 44150 }, { "epoch": 2.241484339306564, "grad_norm": 0.32966119050979614, "learning_rate": 5.056771071289575e-06, "loss": 0.0302, "step": 44155 }, { "epoch": 2.241738159297426, "grad_norm": 0.35278815031051636, "learning_rate": 5.055078938017158e-06, "loss": 0.0346, "step": 44160 }, { "epoch": 2.2419919792882887, "grad_norm": 0.40568065643310547, "learning_rate": 5.0533868047447425e-06, "loss": 0.0375, "step": 44165 }, { "epoch": 2.2422457992791514, "grad_norm": 0.411802738904953, "learning_rate": 5.051694671472326e-06, "loss": 0.0327, "step": 44170 }, { "epoch": 2.2424996192700135, "grad_norm": 0.347493439912796, "learning_rate": 5.050002538199909e-06, "loss": 0.0342, "step": 44175 }, { "epoch": 2.242753439260876, "grad_norm": 0.4209951162338257, "learning_rate": 5.048310404927492e-06, "loss": 0.0301, "step": 44180 }, { "epoch": 2.2430072592517387, "grad_norm": 0.4024890959262848, "learning_rate": 5.046618271655076e-06, "loss": 0.0287, "step": 44185 }, { "epoch": 2.2432610792426013, "grad_norm": 0.26085108518600464, "learning_rate": 5.04492613838266e-06, "loss": 0.0278, "step": 44190 }, { "epoch": 2.2435148992334635, "grad_norm": 0.33618801832199097, "learning_rate": 5.043234005110242e-06, "loss": 0.0264, "step": 44195 }, { "epoch": 2.243768719224326, "grad_norm": 0.31238695979118347, "learning_rate": 5.041541871837826e-06, "loss": 0.0311, "step": 44200 }, { "epoch": 2.2440225392151887, "grad_norm": 0.29754072427749634, "learning_rate": 5.03984973856541e-06, "loss": 0.0297, "step": 44205 }, { "epoch": 2.244276359206051, "grad_norm": 0.2515721917152405, "learning_rate": 5.0381576052929935e-06, "loss": 0.0347, "step": 44210 }, { "epoch": 2.2445301791969134, "grad_norm": 0.40768948197364807, "learning_rate": 5.036465472020576e-06, "loss": 0.0414, "step": 44215 }, { "epoch": 2.244783999187776, "grad_norm": 0.34544774889945984, "learning_rate": 5.03477333874816e-06, "loss": 0.0377, "step": 44220 }, { "epoch": 2.2450378191786386, "grad_norm": 0.34288665652275085, "learning_rate": 5.033081205475744e-06, "loss": 0.0355, "step": 44225 }, { "epoch": 2.245291639169501, "grad_norm": 0.36351481080055237, "learning_rate": 5.031389072203328e-06, "loss": 0.0385, "step": 44230 }, { "epoch": 2.2455454591603634, "grad_norm": 0.2494805008172989, "learning_rate": 5.02969693893091e-06, "loss": 0.0334, "step": 44235 }, { "epoch": 2.245799279151226, "grad_norm": 0.3414851725101471, "learning_rate": 5.028004805658494e-06, "loss": 0.0328, "step": 44240 }, { "epoch": 2.2460530991420886, "grad_norm": 0.3112623989582062, "learning_rate": 5.0263126723860775e-06, "loss": 0.0365, "step": 44245 }, { "epoch": 2.2463069191329508, "grad_norm": 0.4333570897579193, "learning_rate": 5.024620539113662e-06, "loss": 0.0383, "step": 44250 }, { "epoch": 2.2465607391238134, "grad_norm": 0.49220526218414307, "learning_rate": 5.0229284058412446e-06, "loss": 0.0297, "step": 44255 }, { "epoch": 2.246814559114676, "grad_norm": 0.2645159959793091, "learning_rate": 5.021236272568828e-06, "loss": 0.0296, "step": 44260 }, { "epoch": 2.2470683791055386, "grad_norm": 0.36111289262771606, "learning_rate": 5.019544139296412e-06, "loss": 0.0316, "step": 44265 }, { "epoch": 2.2473221990964007, "grad_norm": 0.3091657757759094, "learning_rate": 5.017852006023994e-06, "loss": 0.0355, "step": 44270 }, { "epoch": 2.2475760190872633, "grad_norm": 0.29647141695022583, "learning_rate": 5.016159872751578e-06, "loss": 0.028, "step": 44275 }, { "epoch": 2.247829839078126, "grad_norm": 0.5130562782287598, "learning_rate": 5.014467739479162e-06, "loss": 0.0334, "step": 44280 }, { "epoch": 2.248083659068988, "grad_norm": 0.21307742595672607, "learning_rate": 5.012775606206746e-06, "loss": 0.0275, "step": 44285 }, { "epoch": 2.2483374790598507, "grad_norm": 0.3160382807254791, "learning_rate": 5.0110834729343285e-06, "loss": 0.0381, "step": 44290 }, { "epoch": 2.2485912990507133, "grad_norm": 0.4054400324821472, "learning_rate": 5.009391339661912e-06, "loss": 0.0332, "step": 44295 }, { "epoch": 2.248845119041576, "grad_norm": 0.39842960238456726, "learning_rate": 5.007699206389496e-06, "loss": 0.0335, "step": 44300 }, { "epoch": 2.249098939032438, "grad_norm": 0.5165933966636658, "learning_rate": 5.006007073117079e-06, "loss": 0.0328, "step": 44305 }, { "epoch": 2.2493527590233007, "grad_norm": 0.42537662386894226, "learning_rate": 5.004314939844662e-06, "loss": 0.0329, "step": 44310 }, { "epoch": 2.2496065790141633, "grad_norm": 0.4120660126209259, "learning_rate": 5.002622806572246e-06, "loss": 0.03, "step": 44315 }, { "epoch": 2.2498603990050254, "grad_norm": 0.30993524193763733, "learning_rate": 5.00093067329983e-06, "loss": 0.0316, "step": 44320 }, { "epoch": 2.250114218995888, "grad_norm": 0.4453542232513428, "learning_rate": 4.999238540027413e-06, "loss": 0.0436, "step": 44325 }, { "epoch": 2.2503680389867506, "grad_norm": 0.40560978651046753, "learning_rate": 4.997546406754997e-06, "loss": 0.0419, "step": 44330 }, { "epoch": 2.2506218589776132, "grad_norm": 0.3937535285949707, "learning_rate": 4.99585427348258e-06, "loss": 0.0367, "step": 44335 }, { "epoch": 2.2508756789684754, "grad_norm": 0.31118619441986084, "learning_rate": 4.994162140210163e-06, "loss": 0.0356, "step": 44340 }, { "epoch": 2.251129498959338, "grad_norm": 0.34185513854026794, "learning_rate": 4.992470006937747e-06, "loss": 0.0341, "step": 44345 }, { "epoch": 2.2513833189502006, "grad_norm": 0.21117255091667175, "learning_rate": 4.99077787366533e-06, "loss": 0.0283, "step": 44350 }, { "epoch": 2.2516371389410628, "grad_norm": 0.2540547847747803, "learning_rate": 4.989085740392914e-06, "loss": 0.0348, "step": 44355 }, { "epoch": 2.2518909589319254, "grad_norm": 0.2739291489124298, "learning_rate": 4.987393607120497e-06, "loss": 0.0227, "step": 44360 }, { "epoch": 2.252144778922788, "grad_norm": 0.32483914494514465, "learning_rate": 4.985701473848081e-06, "loss": 0.0335, "step": 44365 }, { "epoch": 2.2523985989136506, "grad_norm": 0.36847808957099915, "learning_rate": 4.984009340575664e-06, "loss": 0.0349, "step": 44370 }, { "epoch": 2.2526524189045127, "grad_norm": 0.5985268354415894, "learning_rate": 4.982317207303248e-06, "loss": 0.0317, "step": 44375 }, { "epoch": 2.2529062388953753, "grad_norm": 0.3975210189819336, "learning_rate": 4.980625074030831e-06, "loss": 0.033, "step": 44380 }, { "epoch": 2.253160058886238, "grad_norm": 0.27625125646591187, "learning_rate": 4.978932940758415e-06, "loss": 0.0316, "step": 44385 }, { "epoch": 2.2534138788771005, "grad_norm": 0.2550783157348633, "learning_rate": 4.977240807485998e-06, "loss": 0.0329, "step": 44390 }, { "epoch": 2.2536676988679627, "grad_norm": 0.5757700204849243, "learning_rate": 4.975548674213581e-06, "loss": 0.0353, "step": 44395 }, { "epoch": 2.2539215188588253, "grad_norm": 0.2964254319667816, "learning_rate": 4.973856540941165e-06, "loss": 0.0286, "step": 44400 }, { "epoch": 2.254175338849688, "grad_norm": 0.3224160075187683, "learning_rate": 4.972164407668748e-06, "loss": 0.0305, "step": 44405 }, { "epoch": 2.2544291588405505, "grad_norm": 0.3136308789253235, "learning_rate": 4.970472274396332e-06, "loss": 0.0342, "step": 44410 }, { "epoch": 2.2546829788314127, "grad_norm": 0.30831313133239746, "learning_rate": 4.9687801411239155e-06, "loss": 0.0303, "step": 44415 }, { "epoch": 2.2549367988222753, "grad_norm": 0.3761250078678131, "learning_rate": 4.967088007851499e-06, "loss": 0.0355, "step": 44420 }, { "epoch": 2.255190618813138, "grad_norm": 0.4608723521232605, "learning_rate": 4.965395874579082e-06, "loss": 0.0349, "step": 44425 }, { "epoch": 2.255444438804, "grad_norm": 0.35675951838493347, "learning_rate": 4.963703741306666e-06, "loss": 0.0417, "step": 44430 }, { "epoch": 2.2556982587948626, "grad_norm": 0.3406544029712677, "learning_rate": 4.962011608034249e-06, "loss": 0.0341, "step": 44435 }, { "epoch": 2.2559520787857252, "grad_norm": 0.4340391755104065, "learning_rate": 4.960319474761832e-06, "loss": 0.0235, "step": 44440 }, { "epoch": 2.256205898776588, "grad_norm": 0.2823371887207031, "learning_rate": 4.958627341489416e-06, "loss": 0.029, "step": 44445 }, { "epoch": 2.25645971876745, "grad_norm": 0.2538403272628784, "learning_rate": 4.956935208216999e-06, "loss": 0.0271, "step": 44450 }, { "epoch": 2.2567135387583126, "grad_norm": 0.3238656520843506, "learning_rate": 4.955243074944583e-06, "loss": 0.0282, "step": 44455 }, { "epoch": 2.256967358749175, "grad_norm": 0.4126463234424591, "learning_rate": 4.9535509416721665e-06, "loss": 0.0276, "step": 44460 }, { "epoch": 2.2572211787400374, "grad_norm": 0.6289737820625305, "learning_rate": 4.95185880839975e-06, "loss": 0.0316, "step": 44465 }, { "epoch": 2.2574749987309, "grad_norm": 0.3341315686702728, "learning_rate": 4.950166675127334e-06, "loss": 0.0298, "step": 44470 }, { "epoch": 2.2577288187217626, "grad_norm": 0.44910258054733276, "learning_rate": 4.948474541854917e-06, "loss": 0.035, "step": 44475 }, { "epoch": 2.257982638712625, "grad_norm": 0.261214941740036, "learning_rate": 4.946782408582501e-06, "loss": 0.0373, "step": 44480 }, { "epoch": 2.2582364587034873, "grad_norm": 0.44271722435951233, "learning_rate": 4.945090275310083e-06, "loss": 0.0282, "step": 44485 }, { "epoch": 2.25849027869435, "grad_norm": 0.34975486993789673, "learning_rate": 4.943398142037668e-06, "loss": 0.0289, "step": 44490 }, { "epoch": 2.2587440986852125, "grad_norm": 0.6106913089752197, "learning_rate": 4.9417060087652505e-06, "loss": 0.034, "step": 44495 }, { "epoch": 2.2589979186760747, "grad_norm": 0.25883862376213074, "learning_rate": 4.940013875492834e-06, "loss": 0.038, "step": 44500 }, { "epoch": 2.2592517386669373, "grad_norm": 0.39152991771698, "learning_rate": 4.9383217422204176e-06, "loss": 0.0349, "step": 44505 }, { "epoch": 2.2595055586578, "grad_norm": 0.3380178213119507, "learning_rate": 4.936629608948001e-06, "loss": 0.0342, "step": 44510 }, { "epoch": 2.2597593786486625, "grad_norm": 0.36176571249961853, "learning_rate": 4.934937475675585e-06, "loss": 0.0309, "step": 44515 }, { "epoch": 2.2600131986395247, "grad_norm": 0.6492758989334106, "learning_rate": 4.933245342403168e-06, "loss": 0.0348, "step": 44520 }, { "epoch": 2.2602670186303873, "grad_norm": 0.3978842794895172, "learning_rate": 4.931553209130752e-06, "loss": 0.0379, "step": 44525 }, { "epoch": 2.26052083862125, "grad_norm": 0.40266185998916626, "learning_rate": 4.9298610758583344e-06, "loss": 0.0331, "step": 44530 }, { "epoch": 2.2607746586121125, "grad_norm": 0.262500524520874, "learning_rate": 4.928168942585919e-06, "loss": 0.0352, "step": 44535 }, { "epoch": 2.2610284786029746, "grad_norm": 0.389607697725296, "learning_rate": 4.9264768093135015e-06, "loss": 0.0291, "step": 44540 }, { "epoch": 2.2612822985938372, "grad_norm": 0.24380935728549957, "learning_rate": 4.924784676041086e-06, "loss": 0.0312, "step": 44545 }, { "epoch": 2.2615361185847, "grad_norm": 0.5458943247795105, "learning_rate": 4.923092542768669e-06, "loss": 0.0284, "step": 44550 }, { "epoch": 2.2617899385755624, "grad_norm": 1.1159203052520752, "learning_rate": 4.921400409496252e-06, "loss": 0.0305, "step": 44555 }, { "epoch": 2.2620437585664246, "grad_norm": 0.3896821141242981, "learning_rate": 4.919708276223836e-06, "loss": 0.0387, "step": 44560 }, { "epoch": 2.262297578557287, "grad_norm": 0.3972178101539612, "learning_rate": 4.918016142951419e-06, "loss": 0.035, "step": 44565 }, { "epoch": 2.26255139854815, "grad_norm": 0.3106682598590851, "learning_rate": 4.916324009679003e-06, "loss": 0.0276, "step": 44570 }, { "epoch": 2.262805218539012, "grad_norm": 0.24602703750133514, "learning_rate": 4.914631876406586e-06, "loss": 0.0354, "step": 44575 }, { "epoch": 2.2630590385298746, "grad_norm": 0.26132622361183167, "learning_rate": 4.91293974313417e-06, "loss": 0.0274, "step": 44580 }, { "epoch": 2.263312858520737, "grad_norm": 0.34328407049179077, "learning_rate": 4.911247609861753e-06, "loss": 0.029, "step": 44585 }, { "epoch": 2.2635666785115998, "grad_norm": 0.3113946318626404, "learning_rate": 4.909555476589337e-06, "loss": 0.034, "step": 44590 }, { "epoch": 2.263820498502462, "grad_norm": 0.24702712893486023, "learning_rate": 4.90786334331692e-06, "loss": 0.033, "step": 44595 }, { "epoch": 2.2640743184933245, "grad_norm": 0.4547874629497528, "learning_rate": 4.906171210044503e-06, "loss": 0.0334, "step": 44600 }, { "epoch": 2.264328138484187, "grad_norm": 0.31980690360069275, "learning_rate": 4.904479076772087e-06, "loss": 0.0291, "step": 44605 }, { "epoch": 2.2645819584750493, "grad_norm": 0.28869301080703735, "learning_rate": 4.90278694349967e-06, "loss": 0.0295, "step": 44610 }, { "epoch": 2.264835778465912, "grad_norm": 0.3857107162475586, "learning_rate": 4.901094810227254e-06, "loss": 0.0398, "step": 44615 }, { "epoch": 2.2650895984567745, "grad_norm": 0.25656676292419434, "learning_rate": 4.899402676954837e-06, "loss": 0.032, "step": 44620 }, { "epoch": 2.265343418447637, "grad_norm": 0.24995790421962738, "learning_rate": 4.897710543682421e-06, "loss": 0.0271, "step": 44625 }, { "epoch": 2.2655972384384993, "grad_norm": 0.4084688723087311, "learning_rate": 4.8960184104100045e-06, "loss": 0.0327, "step": 44630 }, { "epoch": 2.265851058429362, "grad_norm": 0.2487572878599167, "learning_rate": 4.894326277137588e-06, "loss": 0.0308, "step": 44635 }, { "epoch": 2.2661048784202245, "grad_norm": 0.6064757108688354, "learning_rate": 4.8926341438651716e-06, "loss": 0.0287, "step": 44640 }, { "epoch": 2.2663586984110866, "grad_norm": 0.37674620747566223, "learning_rate": 4.890942010592754e-06, "loss": 0.0289, "step": 44645 }, { "epoch": 2.266612518401949, "grad_norm": 0.27238699793815613, "learning_rate": 4.889249877320339e-06, "loss": 0.0316, "step": 44650 }, { "epoch": 2.266866338392812, "grad_norm": 0.3761901557445526, "learning_rate": 4.887557744047921e-06, "loss": 0.0374, "step": 44655 }, { "epoch": 2.2671201583836744, "grad_norm": 0.4535421133041382, "learning_rate": 4.885865610775505e-06, "loss": 0.0327, "step": 44660 }, { "epoch": 2.267373978374537, "grad_norm": 0.3541247248649597, "learning_rate": 4.8841734775030884e-06, "loss": 0.032, "step": 44665 }, { "epoch": 2.267627798365399, "grad_norm": 0.2639147639274597, "learning_rate": 4.882481344230672e-06, "loss": 0.029, "step": 44670 }, { "epoch": 2.267881618356262, "grad_norm": 0.3500845432281494, "learning_rate": 4.8807892109582555e-06, "loss": 0.0335, "step": 44675 }, { "epoch": 2.2681354383471244, "grad_norm": 0.22339092195034027, "learning_rate": 4.879097077685839e-06, "loss": 0.0304, "step": 44680 }, { "epoch": 2.2683892583379865, "grad_norm": 0.2681799829006195, "learning_rate": 4.877404944413423e-06, "loss": 0.0327, "step": 44685 }, { "epoch": 2.268643078328849, "grad_norm": 0.35728010535240173, "learning_rate": 4.875712811141005e-06, "loss": 0.0318, "step": 44690 }, { "epoch": 2.2688968983197118, "grad_norm": 0.5533677935600281, "learning_rate": 4.87402067786859e-06, "loss": 0.0297, "step": 44695 }, { "epoch": 2.2691507183105744, "grad_norm": 0.3111599087715149, "learning_rate": 4.872328544596172e-06, "loss": 0.032, "step": 44700 }, { "epoch": 2.2694045383014365, "grad_norm": 0.5662405490875244, "learning_rate": 4.870636411323757e-06, "loss": 0.0449, "step": 44705 }, { "epoch": 2.269658358292299, "grad_norm": 0.33681607246398926, "learning_rate": 4.8689442780513395e-06, "loss": 0.0352, "step": 44710 }, { "epoch": 2.2699121782831617, "grad_norm": 0.33438214659690857, "learning_rate": 4.867252144778923e-06, "loss": 0.033, "step": 44715 }, { "epoch": 2.270165998274024, "grad_norm": 0.49922633171081543, "learning_rate": 4.865560011506507e-06, "loss": 0.035, "step": 44720 }, { "epoch": 2.2704198182648865, "grad_norm": 0.32202664017677307, "learning_rate": 4.86386787823409e-06, "loss": 0.0364, "step": 44725 }, { "epoch": 2.270673638255749, "grad_norm": 0.41406023502349854, "learning_rate": 4.862175744961674e-06, "loss": 0.0374, "step": 44730 }, { "epoch": 2.2709274582466117, "grad_norm": 0.404519647359848, "learning_rate": 4.860483611689257e-06, "loss": 0.0337, "step": 44735 }, { "epoch": 2.271181278237474, "grad_norm": 0.3270086348056793, "learning_rate": 4.858791478416841e-06, "loss": 0.0401, "step": 44740 }, { "epoch": 2.2714350982283364, "grad_norm": 0.3245806097984314, "learning_rate": 4.8570993451444235e-06, "loss": 0.0311, "step": 44745 }, { "epoch": 2.271688918219199, "grad_norm": 0.1995822787284851, "learning_rate": 4.855407211872008e-06, "loss": 0.032, "step": 44750 }, { "epoch": 2.271942738210061, "grad_norm": 0.3266526758670807, "learning_rate": 4.8537150785995906e-06, "loss": 0.0243, "step": 44755 }, { "epoch": 2.272196558200924, "grad_norm": 0.26783430576324463, "learning_rate": 4.852022945327174e-06, "loss": 0.0273, "step": 44760 }, { "epoch": 2.2724503781917864, "grad_norm": 0.4026534855365753, "learning_rate": 4.850330812054758e-06, "loss": 0.0275, "step": 44765 }, { "epoch": 2.272704198182649, "grad_norm": 0.33564287424087524, "learning_rate": 4.848638678782341e-06, "loss": 0.0362, "step": 44770 }, { "epoch": 2.272958018173511, "grad_norm": 0.57597815990448, "learning_rate": 4.846946545509925e-06, "loss": 0.0407, "step": 44775 }, { "epoch": 2.2732118381643738, "grad_norm": 0.3466244637966156, "learning_rate": 4.845254412237508e-06, "loss": 0.0361, "step": 44780 }, { "epoch": 2.2734656581552364, "grad_norm": 0.21853549778461456, "learning_rate": 4.843562278965092e-06, "loss": 0.0273, "step": 44785 }, { "epoch": 2.2737194781460985, "grad_norm": 0.32469242811203003, "learning_rate": 4.841870145692675e-06, "loss": 0.0289, "step": 44790 }, { "epoch": 2.273973298136961, "grad_norm": 0.330661416053772, "learning_rate": 4.840178012420259e-06, "loss": 0.0345, "step": 44795 }, { "epoch": 2.2742271181278237, "grad_norm": 0.28515830636024475, "learning_rate": 4.8384858791478425e-06, "loss": 0.0297, "step": 44800 }, { "epoch": 2.2744809381186863, "grad_norm": 0.4901011288166046, "learning_rate": 4.836793745875425e-06, "loss": 0.031, "step": 44805 }, { "epoch": 2.274734758109549, "grad_norm": 0.31480327248573303, "learning_rate": 4.8351016126030095e-06, "loss": 0.0382, "step": 44810 }, { "epoch": 2.274988578100411, "grad_norm": 0.3091256618499756, "learning_rate": 4.833409479330592e-06, "loss": 0.0379, "step": 44815 }, { "epoch": 2.2752423980912737, "grad_norm": 0.2778155207633972, "learning_rate": 4.831717346058176e-06, "loss": 0.035, "step": 44820 }, { "epoch": 2.2754962180821363, "grad_norm": 0.41067826747894287, "learning_rate": 4.830025212785759e-06, "loss": 0.033, "step": 44825 }, { "epoch": 2.2757500380729985, "grad_norm": 0.49526798725128174, "learning_rate": 4.828333079513343e-06, "loss": 0.0311, "step": 44830 }, { "epoch": 2.276003858063861, "grad_norm": 0.3333423137664795, "learning_rate": 4.826640946240926e-06, "loss": 0.028, "step": 44835 }, { "epoch": 2.2762576780547237, "grad_norm": 0.386203795671463, "learning_rate": 4.82494881296851e-06, "loss": 0.0348, "step": 44840 }, { "epoch": 2.2765114980455863, "grad_norm": 0.3671344816684723, "learning_rate": 4.8232566796960935e-06, "loss": 0.0344, "step": 44845 }, { "epoch": 2.2767653180364484, "grad_norm": 0.50922030210495, "learning_rate": 4.821564546423676e-06, "loss": 0.0308, "step": 44850 }, { "epoch": 2.277019138027311, "grad_norm": 0.228525772690773, "learning_rate": 4.819872413151261e-06, "loss": 0.036, "step": 44855 }, { "epoch": 2.2772729580181736, "grad_norm": 0.41872110962867737, "learning_rate": 4.818180279878843e-06, "loss": 0.0392, "step": 44860 }, { "epoch": 2.277526778009036, "grad_norm": 0.32197269797325134, "learning_rate": 4.816488146606428e-06, "loss": 0.0307, "step": 44865 }, { "epoch": 2.2777805979998984, "grad_norm": 0.2782743573188782, "learning_rate": 4.81479601333401e-06, "loss": 0.0312, "step": 44870 }, { "epoch": 2.278034417990761, "grad_norm": 0.40119466185569763, "learning_rate": 4.813103880061594e-06, "loss": 0.027, "step": 44875 }, { "epoch": 2.2782882379816236, "grad_norm": 0.34341195225715637, "learning_rate": 4.8114117467891775e-06, "loss": 0.0322, "step": 44880 }, { "epoch": 2.2785420579724858, "grad_norm": 0.4127197563648224, "learning_rate": 4.809719613516761e-06, "loss": 0.028, "step": 44885 }, { "epoch": 2.2787958779633484, "grad_norm": 0.4768372178077698, "learning_rate": 4.8080274802443446e-06, "loss": 0.0325, "step": 44890 }, { "epoch": 2.279049697954211, "grad_norm": 0.30066752433776855, "learning_rate": 4.806335346971928e-06, "loss": 0.0342, "step": 44895 }, { "epoch": 2.279303517945073, "grad_norm": 0.3694850504398346, "learning_rate": 4.804643213699512e-06, "loss": 0.0417, "step": 44900 }, { "epoch": 2.2795573379359357, "grad_norm": 0.3389255404472351, "learning_rate": 4.802951080427094e-06, "loss": 0.0328, "step": 44905 }, { "epoch": 2.2798111579267983, "grad_norm": 0.32273051142692566, "learning_rate": 4.801258947154679e-06, "loss": 0.0302, "step": 44910 }, { "epoch": 2.280064977917661, "grad_norm": 0.2565813660621643, "learning_rate": 4.7995668138822614e-06, "loss": 0.0373, "step": 44915 }, { "epoch": 2.280318797908523, "grad_norm": 0.17176474630832672, "learning_rate": 4.797874680609845e-06, "loss": 0.0308, "step": 44920 }, { "epoch": 2.2805726178993857, "grad_norm": 0.28532516956329346, "learning_rate": 4.7961825473374285e-06, "loss": 0.0294, "step": 44925 }, { "epoch": 2.2808264378902483, "grad_norm": 0.3874659836292267, "learning_rate": 4.794490414065012e-06, "loss": 0.0294, "step": 44930 }, { "epoch": 2.281080257881111, "grad_norm": 0.3868589997291565, "learning_rate": 4.792798280792596e-06, "loss": 0.0309, "step": 44935 }, { "epoch": 2.281334077871973, "grad_norm": 0.37089648842811584, "learning_rate": 4.791106147520179e-06, "loss": 0.032, "step": 44940 }, { "epoch": 2.2815878978628357, "grad_norm": 0.28405794501304626, "learning_rate": 4.789414014247763e-06, "loss": 0.0304, "step": 44945 }, { "epoch": 2.2818417178536983, "grad_norm": 0.316731721162796, "learning_rate": 4.787721880975346e-06, "loss": 0.0359, "step": 44950 }, { "epoch": 2.282095537844561, "grad_norm": 0.6578758358955383, "learning_rate": 4.78602974770293e-06, "loss": 0.0352, "step": 44955 }, { "epoch": 2.282349357835423, "grad_norm": 0.41155996918678284, "learning_rate": 4.784337614430513e-06, "loss": 0.0313, "step": 44960 }, { "epoch": 2.2826031778262856, "grad_norm": 0.33022230863571167, "learning_rate": 4.782645481158096e-06, "loss": 0.0244, "step": 44965 }, { "epoch": 2.2828569978171482, "grad_norm": 0.285152792930603, "learning_rate": 4.7809533478856804e-06, "loss": 0.0289, "step": 44970 }, { "epoch": 2.2831108178080104, "grad_norm": 0.30267083644866943, "learning_rate": 4.779261214613263e-06, "loss": 0.0341, "step": 44975 }, { "epoch": 2.283364637798873, "grad_norm": 1.3061234951019287, "learning_rate": 4.777569081340847e-06, "loss": 0.0311, "step": 44980 }, { "epoch": 2.2836184577897356, "grad_norm": 0.2621577978134155, "learning_rate": 4.77587694806843e-06, "loss": 0.0288, "step": 44985 }, { "epoch": 2.283872277780598, "grad_norm": 0.5385904312133789, "learning_rate": 4.774184814796014e-06, "loss": 0.037, "step": 44990 }, { "epoch": 2.2841260977714604, "grad_norm": 0.42236170172691345, "learning_rate": 4.772492681523597e-06, "loss": 0.0367, "step": 44995 }, { "epoch": 2.284379917762323, "grad_norm": 0.35834038257598877, "learning_rate": 4.770800548251181e-06, "loss": 0.0349, "step": 45000 }, { "epoch": 2.2846337377531856, "grad_norm": 0.39529144763946533, "learning_rate": 4.769108414978764e-06, "loss": 0.0393, "step": 45005 }, { "epoch": 2.2848875577440477, "grad_norm": 0.3461845815181732, "learning_rate": 4.767416281706347e-06, "loss": 0.0359, "step": 45010 }, { "epoch": 2.2851413777349103, "grad_norm": 0.2342214435338974, "learning_rate": 4.7657241484339315e-06, "loss": 0.0258, "step": 45015 }, { "epoch": 2.285395197725773, "grad_norm": 0.4008984863758087, "learning_rate": 4.764032015161514e-06, "loss": 0.0345, "step": 45020 }, { "epoch": 2.2856490177166355, "grad_norm": 0.3111340403556824, "learning_rate": 4.762339881889098e-06, "loss": 0.0428, "step": 45025 }, { "epoch": 2.2859028377074977, "grad_norm": 0.2649993300437927, "learning_rate": 4.760647748616681e-06, "loss": 0.027, "step": 45030 }, { "epoch": 2.2861566576983603, "grad_norm": 0.8399127721786499, "learning_rate": 4.758955615344265e-06, "loss": 0.0369, "step": 45035 }, { "epoch": 2.286410477689223, "grad_norm": 0.3271864354610443, "learning_rate": 4.757263482071848e-06, "loss": 0.0268, "step": 45040 }, { "epoch": 2.286664297680085, "grad_norm": 0.35605019330978394, "learning_rate": 4.755571348799432e-06, "loss": 0.0318, "step": 45045 }, { "epoch": 2.2869181176709477, "grad_norm": 0.25064730644226074, "learning_rate": 4.7538792155270154e-06, "loss": 0.026, "step": 45050 }, { "epoch": 2.2871719376618103, "grad_norm": 0.2588244676589966, "learning_rate": 4.752187082254599e-06, "loss": 0.0283, "step": 45055 }, { "epoch": 2.287425757652673, "grad_norm": 0.4669940769672394, "learning_rate": 4.7504949489821825e-06, "loss": 0.0341, "step": 45060 }, { "epoch": 2.287679577643535, "grad_norm": 0.2035631388425827, "learning_rate": 4.748802815709765e-06, "loss": 0.0323, "step": 45065 }, { "epoch": 2.2879333976343976, "grad_norm": 0.29153090715408325, "learning_rate": 4.747110682437349e-06, "loss": 0.0329, "step": 45070 }, { "epoch": 2.2881872176252602, "grad_norm": 0.5664026737213135, "learning_rate": 4.745418549164932e-06, "loss": 0.0354, "step": 45075 }, { "epoch": 2.288441037616123, "grad_norm": 0.31028592586517334, "learning_rate": 4.743726415892516e-06, "loss": 0.0361, "step": 45080 }, { "epoch": 2.288694857606985, "grad_norm": 0.29820579290390015, "learning_rate": 4.742034282620099e-06, "loss": 0.0323, "step": 45085 }, { "epoch": 2.2889486775978476, "grad_norm": 0.3673454523086548, "learning_rate": 4.740342149347683e-06, "loss": 0.032, "step": 45090 }, { "epoch": 2.28920249758871, "grad_norm": 0.35142046213150024, "learning_rate": 4.7386500160752665e-06, "loss": 0.0338, "step": 45095 }, { "epoch": 2.289456317579573, "grad_norm": 0.293951153755188, "learning_rate": 4.73695788280285e-06, "loss": 0.0291, "step": 45100 }, { "epoch": 2.289710137570435, "grad_norm": 0.3154962956905365, "learning_rate": 4.735265749530434e-06, "loss": 0.0289, "step": 45105 }, { "epoch": 2.2899639575612976, "grad_norm": 0.3382570147514343, "learning_rate": 4.733573616258017e-06, "loss": 0.0348, "step": 45110 }, { "epoch": 2.29021777755216, "grad_norm": 0.3350144624710083, "learning_rate": 4.7318814829856e-06, "loss": 0.0279, "step": 45115 }, { "epoch": 2.2904715975430223, "grad_norm": 0.5217757821083069, "learning_rate": 4.730189349713184e-06, "loss": 0.0328, "step": 45120 }, { "epoch": 2.290725417533885, "grad_norm": 0.34693512320518494, "learning_rate": 4.728497216440767e-06, "loss": 0.0329, "step": 45125 }, { "epoch": 2.2909792375247475, "grad_norm": 0.41887202858924866, "learning_rate": 4.726805083168351e-06, "loss": 0.0348, "step": 45130 }, { "epoch": 2.29123305751561, "grad_norm": 0.3702602684497833, "learning_rate": 4.725112949895934e-06, "loss": 0.0312, "step": 45135 }, { "epoch": 2.2914868775064723, "grad_norm": 0.4091658294200897, "learning_rate": 4.7234208166235176e-06, "loss": 0.0283, "step": 45140 }, { "epoch": 2.291740697497335, "grad_norm": 0.46928733587265015, "learning_rate": 4.721728683351101e-06, "loss": 0.0297, "step": 45145 }, { "epoch": 2.2919945174881975, "grad_norm": 0.31767764687538147, "learning_rate": 4.720036550078685e-06, "loss": 0.0268, "step": 45150 }, { "epoch": 2.2922483374790596, "grad_norm": 0.2629365622997284, "learning_rate": 4.718344416806268e-06, "loss": 0.0359, "step": 45155 }, { "epoch": 2.2925021574699223, "grad_norm": 0.40257173776626587, "learning_rate": 4.716652283533851e-06, "loss": 0.0369, "step": 45160 }, { "epoch": 2.292755977460785, "grad_norm": 0.29946520924568176, "learning_rate": 4.714960150261435e-06, "loss": 0.0307, "step": 45165 }, { "epoch": 2.2930097974516475, "grad_norm": 0.4067082107067108, "learning_rate": 4.713268016989018e-06, "loss": 0.0354, "step": 45170 }, { "epoch": 2.2932636174425096, "grad_norm": 1.0462335348129272, "learning_rate": 4.711575883716602e-06, "loss": 0.0276, "step": 45175 }, { "epoch": 2.293517437433372, "grad_norm": 0.5343073606491089, "learning_rate": 4.709883750444185e-06, "loss": 0.0288, "step": 45180 }, { "epoch": 2.293771257424235, "grad_norm": 0.601951539516449, "learning_rate": 4.708191617171769e-06, "loss": 0.0341, "step": 45185 }, { "epoch": 2.294025077415097, "grad_norm": 0.3987095355987549, "learning_rate": 4.706499483899352e-06, "loss": 0.0273, "step": 45190 }, { "epoch": 2.2942788974059596, "grad_norm": 0.45923149585723877, "learning_rate": 4.704807350626936e-06, "loss": 0.0414, "step": 45195 }, { "epoch": 2.294532717396822, "grad_norm": 0.3649718165397644, "learning_rate": 4.703115217354519e-06, "loss": 0.0213, "step": 45200 }, { "epoch": 2.294786537387685, "grad_norm": 0.24480339884757996, "learning_rate": 4.701423084082103e-06, "loss": 0.0281, "step": 45205 }, { "epoch": 2.295040357378547, "grad_norm": 0.36629045009613037, "learning_rate": 4.699730950809686e-06, "loss": 0.0317, "step": 45210 }, { "epoch": 2.2952941773694095, "grad_norm": 0.3395744264125824, "learning_rate": 4.69803881753727e-06, "loss": 0.0334, "step": 45215 }, { "epoch": 2.295547997360272, "grad_norm": 0.7202277779579163, "learning_rate": 4.696346684264853e-06, "loss": 0.0329, "step": 45220 }, { "epoch": 2.2958018173511348, "grad_norm": 0.3674011528491974, "learning_rate": 4.694654550992436e-06, "loss": 0.0273, "step": 45225 }, { "epoch": 2.296055637341997, "grad_norm": 0.3968465030193329, "learning_rate": 4.69296241772002e-06, "loss": 0.0245, "step": 45230 }, { "epoch": 2.2963094573328595, "grad_norm": 0.371276319026947, "learning_rate": 4.691270284447603e-06, "loss": 0.0281, "step": 45235 }, { "epoch": 2.296563277323722, "grad_norm": 0.5648274421691895, "learning_rate": 4.689578151175187e-06, "loss": 0.0409, "step": 45240 }, { "epoch": 2.2968170973145847, "grad_norm": 0.7816662788391113, "learning_rate": 4.68788601790277e-06, "loss": 0.0299, "step": 45245 }, { "epoch": 2.297070917305447, "grad_norm": 0.35681354999542236, "learning_rate": 4.686193884630354e-06, "loss": 0.0302, "step": 45250 }, { "epoch": 2.2973247372963095, "grad_norm": 0.3457604646682739, "learning_rate": 4.684501751357937e-06, "loss": 0.0344, "step": 45255 }, { "epoch": 2.297578557287172, "grad_norm": 0.7468198537826538, "learning_rate": 4.682809618085521e-06, "loss": 0.0352, "step": 45260 }, { "epoch": 2.2978323772780342, "grad_norm": 0.2634367048740387, "learning_rate": 4.6811174848131045e-06, "loss": 0.0321, "step": 45265 }, { "epoch": 2.298086197268897, "grad_norm": 0.22003506124019623, "learning_rate": 4.679425351540688e-06, "loss": 0.0338, "step": 45270 }, { "epoch": 2.2983400172597594, "grad_norm": 0.17040963470935822, "learning_rate": 4.677733218268271e-06, "loss": 0.0281, "step": 45275 }, { "epoch": 2.298593837250622, "grad_norm": 0.27573078870773315, "learning_rate": 4.676041084995855e-06, "loss": 0.0304, "step": 45280 }, { "epoch": 2.298847657241484, "grad_norm": 0.9302784204483032, "learning_rate": 4.674348951723438e-06, "loss": 0.0329, "step": 45285 }, { "epoch": 2.299101477232347, "grad_norm": 0.3319515585899353, "learning_rate": 4.672656818451022e-06, "loss": 0.0423, "step": 45290 }, { "epoch": 2.2993552972232094, "grad_norm": 0.7237670421600342, "learning_rate": 4.670964685178605e-06, "loss": 0.033, "step": 45295 }, { "epoch": 2.2996091172140716, "grad_norm": 0.3120134174823761, "learning_rate": 4.6692725519061884e-06, "loss": 0.0396, "step": 45300 }, { "epoch": 2.299862937204934, "grad_norm": 0.47648027539253235, "learning_rate": 4.667580418633772e-06, "loss": 0.03, "step": 45305 }, { "epoch": 2.300116757195797, "grad_norm": 0.24086786806583405, "learning_rate": 4.6658882853613555e-06, "loss": 0.0379, "step": 45310 }, { "epoch": 2.3003705771866594, "grad_norm": 0.2652145326137543, "learning_rate": 4.664196152088939e-06, "loss": 0.0355, "step": 45315 }, { "epoch": 2.3006243971775215, "grad_norm": 0.2888307273387909, "learning_rate": 4.662504018816522e-06, "loss": 0.0265, "step": 45320 }, { "epoch": 2.300878217168384, "grad_norm": 0.3059805929660797, "learning_rate": 4.660811885544106e-06, "loss": 0.0322, "step": 45325 }, { "epoch": 2.3011320371592467, "grad_norm": 0.555613100528717, "learning_rate": 4.659119752271689e-06, "loss": 0.031, "step": 45330 }, { "epoch": 2.301385857150109, "grad_norm": 0.31799572706222534, "learning_rate": 4.657427618999273e-06, "loss": 0.0326, "step": 45335 }, { "epoch": 2.3016396771409715, "grad_norm": 0.2731345295906067, "learning_rate": 4.655735485726856e-06, "loss": 0.0367, "step": 45340 }, { "epoch": 2.301893497131834, "grad_norm": 0.23575860261917114, "learning_rate": 4.6540433524544395e-06, "loss": 0.024, "step": 45345 }, { "epoch": 2.3021473171226967, "grad_norm": 0.43875861167907715, "learning_rate": 4.652351219182023e-06, "loss": 0.0288, "step": 45350 }, { "epoch": 2.302401137113559, "grad_norm": 0.3125607669353485, "learning_rate": 4.650659085909607e-06, "loss": 0.032, "step": 45355 }, { "epoch": 2.3026549571044215, "grad_norm": 0.2842237949371338, "learning_rate": 4.64896695263719e-06, "loss": 0.0329, "step": 45360 }, { "epoch": 2.302908777095284, "grad_norm": 0.3345993459224701, "learning_rate": 4.647274819364774e-06, "loss": 0.0352, "step": 45365 }, { "epoch": 2.3031625970861467, "grad_norm": 0.3049146831035614, "learning_rate": 4.645582686092357e-06, "loss": 0.0305, "step": 45370 }, { "epoch": 2.303416417077009, "grad_norm": 0.3029148280620575, "learning_rate": 4.643890552819941e-06, "loss": 0.0304, "step": 45375 }, { "epoch": 2.3036702370678714, "grad_norm": 0.2953786253929138, "learning_rate": 4.642198419547524e-06, "loss": 0.027, "step": 45380 }, { "epoch": 2.303924057058734, "grad_norm": 0.5618043541908264, "learning_rate": 4.640506286275107e-06, "loss": 0.036, "step": 45385 }, { "epoch": 2.3041778770495966, "grad_norm": 0.36698117852211, "learning_rate": 4.6388141530026905e-06, "loss": 0.0356, "step": 45390 }, { "epoch": 2.304431697040459, "grad_norm": 0.30508795380592346, "learning_rate": 4.637122019730274e-06, "loss": 0.0278, "step": 45395 }, { "epoch": 2.3046855170313214, "grad_norm": 0.39213359355926514, "learning_rate": 4.635429886457858e-06, "loss": 0.0371, "step": 45400 }, { "epoch": 2.304939337022184, "grad_norm": 0.32886233925819397, "learning_rate": 4.633737753185441e-06, "loss": 0.0336, "step": 45405 }, { "epoch": 2.305193157013046, "grad_norm": 0.34936273097991943, "learning_rate": 4.632045619913025e-06, "loss": 0.0377, "step": 45410 }, { "epoch": 2.3054469770039088, "grad_norm": 0.3372868299484253, "learning_rate": 4.630353486640608e-06, "loss": 0.0334, "step": 45415 }, { "epoch": 2.3057007969947714, "grad_norm": 0.29870179295539856, "learning_rate": 4.628661353368192e-06, "loss": 0.0376, "step": 45420 }, { "epoch": 2.305954616985634, "grad_norm": 0.2501830756664276, "learning_rate": 4.626969220095775e-06, "loss": 0.0287, "step": 45425 }, { "epoch": 2.306208436976496, "grad_norm": 0.33491966128349304, "learning_rate": 4.625277086823359e-06, "loss": 0.0283, "step": 45430 }, { "epoch": 2.3064622569673587, "grad_norm": 0.30442988872528076, "learning_rate": 4.623584953550942e-06, "loss": 0.0347, "step": 45435 }, { "epoch": 2.3067160769582213, "grad_norm": 0.44757723808288574, "learning_rate": 4.621892820278526e-06, "loss": 0.0331, "step": 45440 }, { "epoch": 2.3069698969490835, "grad_norm": 0.27261823415756226, "learning_rate": 4.620200687006109e-06, "loss": 0.0305, "step": 45445 }, { "epoch": 2.307223716939946, "grad_norm": 0.27286872267723083, "learning_rate": 4.618508553733693e-06, "loss": 0.0272, "step": 45450 }, { "epoch": 2.3074775369308087, "grad_norm": 0.23978255689144135, "learning_rate": 4.616816420461276e-06, "loss": 0.034, "step": 45455 }, { "epoch": 2.3077313569216713, "grad_norm": 0.406010240316391, "learning_rate": 4.615124287188859e-06, "loss": 0.0358, "step": 45460 }, { "epoch": 2.3079851769125335, "grad_norm": 0.34851107001304626, "learning_rate": 4.613432153916443e-06, "loss": 0.0336, "step": 45465 }, { "epoch": 2.308238996903396, "grad_norm": 0.38284942507743835, "learning_rate": 4.611740020644026e-06, "loss": 0.0297, "step": 45470 }, { "epoch": 2.3084928168942587, "grad_norm": 0.34113410115242004, "learning_rate": 4.61004788737161e-06, "loss": 0.0345, "step": 45475 }, { "epoch": 2.308746636885121, "grad_norm": 0.3386651575565338, "learning_rate": 4.608355754099193e-06, "loss": 0.0318, "step": 45480 }, { "epoch": 2.3090004568759834, "grad_norm": 0.4475891590118408, "learning_rate": 4.606663620826777e-06, "loss": 0.0384, "step": 45485 }, { "epoch": 2.309254276866846, "grad_norm": 0.39710333943367004, "learning_rate": 4.60497148755436e-06, "loss": 0.0338, "step": 45490 }, { "epoch": 2.3095080968577086, "grad_norm": 0.5366309881210327, "learning_rate": 4.603279354281944e-06, "loss": 0.0312, "step": 45495 }, { "epoch": 2.3097619168485712, "grad_norm": 0.5256260633468628, "learning_rate": 4.601587221009527e-06, "loss": 0.0373, "step": 45500 }, { "epoch": 2.3100157368394334, "grad_norm": 0.23877793550491333, "learning_rate": 4.59989508773711e-06, "loss": 0.0307, "step": 45505 }, { "epoch": 2.310269556830296, "grad_norm": 0.24250926077365875, "learning_rate": 4.598202954464694e-06, "loss": 0.0384, "step": 45510 }, { "epoch": 2.3105233768211586, "grad_norm": 0.3884424567222595, "learning_rate": 4.5965108211922775e-06, "loss": 0.0455, "step": 45515 }, { "epoch": 2.3107771968120208, "grad_norm": 0.3509715795516968, "learning_rate": 4.594818687919861e-06, "loss": 0.0321, "step": 45520 }, { "epoch": 2.3110310168028834, "grad_norm": 0.3542064428329468, "learning_rate": 4.5931265546474446e-06, "loss": 0.0336, "step": 45525 }, { "epoch": 2.311284836793746, "grad_norm": 0.43271404504776, "learning_rate": 4.591434421375028e-06, "loss": 0.037, "step": 45530 }, { "epoch": 2.3115386567846086, "grad_norm": 0.5255846381187439, "learning_rate": 4.589742288102612e-06, "loss": 0.0345, "step": 45535 }, { "epoch": 2.3117924767754707, "grad_norm": 0.36676549911499023, "learning_rate": 4.588050154830195e-06, "loss": 0.029, "step": 45540 }, { "epoch": 2.3120462967663333, "grad_norm": 0.3285497725009918, "learning_rate": 4.586358021557779e-06, "loss": 0.0299, "step": 45545 }, { "epoch": 2.312300116757196, "grad_norm": 0.4070643186569214, "learning_rate": 4.5846658882853614e-06, "loss": 0.0324, "step": 45550 }, { "epoch": 2.312553936748058, "grad_norm": 0.22612564265727997, "learning_rate": 4.582973755012945e-06, "loss": 0.0295, "step": 45555 }, { "epoch": 2.3128077567389207, "grad_norm": 0.527091383934021, "learning_rate": 4.5812816217405285e-06, "loss": 0.0292, "step": 45560 }, { "epoch": 2.3130615767297833, "grad_norm": 0.36508095264434814, "learning_rate": 4.579589488468112e-06, "loss": 0.0351, "step": 45565 }, { "epoch": 2.313315396720646, "grad_norm": 0.2954444885253906, "learning_rate": 4.577897355195696e-06, "loss": 0.0373, "step": 45570 }, { "epoch": 2.313569216711508, "grad_norm": 0.48034361004829407, "learning_rate": 4.576205221923279e-06, "loss": 0.039, "step": 45575 }, { "epoch": 2.3138230367023707, "grad_norm": 0.35288605093955994, "learning_rate": 4.574513088650863e-06, "loss": 0.0271, "step": 45580 }, { "epoch": 2.3140768566932333, "grad_norm": 0.24266214668750763, "learning_rate": 4.572820955378446e-06, "loss": 0.0267, "step": 45585 }, { "epoch": 2.3143306766840954, "grad_norm": 0.2815217077732086, "learning_rate": 4.57112882210603e-06, "loss": 0.0294, "step": 45590 }, { "epoch": 2.314584496674958, "grad_norm": 0.27903756499290466, "learning_rate": 4.5694366888336125e-06, "loss": 0.0332, "step": 45595 }, { "epoch": 2.3148383166658206, "grad_norm": 0.43626129627227783, "learning_rate": 4.567744555561197e-06, "loss": 0.0358, "step": 45600 }, { "epoch": 2.3150921366566832, "grad_norm": 0.3123166561126709, "learning_rate": 4.5660524222887796e-06, "loss": 0.0352, "step": 45605 }, { "epoch": 2.3153459566475454, "grad_norm": 0.3405829966068268, "learning_rate": 4.564360289016363e-06, "loss": 0.0346, "step": 45610 }, { "epoch": 2.315599776638408, "grad_norm": 0.5032159090042114, "learning_rate": 4.562668155743947e-06, "loss": 0.0311, "step": 45615 }, { "epoch": 2.3158535966292706, "grad_norm": 0.36772871017456055, "learning_rate": 4.56097602247153e-06, "loss": 0.0308, "step": 45620 }, { "epoch": 2.3161074166201328, "grad_norm": 0.49046018719673157, "learning_rate": 4.559283889199114e-06, "loss": 0.0309, "step": 45625 }, { "epoch": 2.3163612366109954, "grad_norm": 0.379315584897995, "learning_rate": 4.557591755926697e-06, "loss": 0.0326, "step": 45630 }, { "epoch": 2.316615056601858, "grad_norm": 0.38047313690185547, "learning_rate": 4.555899622654281e-06, "loss": 0.0437, "step": 45635 }, { "epoch": 2.3168688765927206, "grad_norm": 1.0208139419555664, "learning_rate": 4.5542074893818635e-06, "loss": 0.028, "step": 45640 }, { "epoch": 2.317122696583583, "grad_norm": 0.3524598479270935, "learning_rate": 4.552515356109448e-06, "loss": 0.0403, "step": 45645 }, { "epoch": 2.3173765165744453, "grad_norm": 0.3423077166080475, "learning_rate": 4.550823222837031e-06, "loss": 0.0431, "step": 45650 }, { "epoch": 2.317630336565308, "grad_norm": 0.72367924451828, "learning_rate": 4.549131089564614e-06, "loss": 0.0309, "step": 45655 }, { "epoch": 2.3178841565561705, "grad_norm": 0.5329000949859619, "learning_rate": 4.547438956292198e-06, "loss": 0.0427, "step": 45660 }, { "epoch": 2.3181379765470327, "grad_norm": 0.2928794026374817, "learning_rate": 4.545746823019781e-06, "loss": 0.0353, "step": 45665 }, { "epoch": 2.3183917965378953, "grad_norm": 0.3894771933555603, "learning_rate": 4.544054689747365e-06, "loss": 0.031, "step": 45670 }, { "epoch": 2.318645616528758, "grad_norm": 0.40756314992904663, "learning_rate": 4.542362556474948e-06, "loss": 0.0284, "step": 45675 }, { "epoch": 2.3188994365196205, "grad_norm": 0.2886298894882202, "learning_rate": 4.540670423202532e-06, "loss": 0.0348, "step": 45680 }, { "epoch": 2.3191532565104827, "grad_norm": 0.4084828495979309, "learning_rate": 4.5389782899301154e-06, "loss": 0.0368, "step": 45685 }, { "epoch": 2.3194070765013453, "grad_norm": 0.6314843893051147, "learning_rate": 4.537286156657699e-06, "loss": 0.0375, "step": 45690 }, { "epoch": 2.319660896492208, "grad_norm": 0.21973839402198792, "learning_rate": 4.5355940233852825e-06, "loss": 0.0317, "step": 45695 }, { "epoch": 2.31991471648307, "grad_norm": 0.2902248799800873, "learning_rate": 4.533901890112865e-06, "loss": 0.0299, "step": 45700 }, { "epoch": 2.3201685364739326, "grad_norm": 0.3448190987110138, "learning_rate": 4.53220975684045e-06, "loss": 0.0449, "step": 45705 }, { "epoch": 2.320422356464795, "grad_norm": 0.2905779480934143, "learning_rate": 4.530517623568032e-06, "loss": 0.0309, "step": 45710 }, { "epoch": 2.320676176455658, "grad_norm": 0.3386252820491791, "learning_rate": 4.528825490295616e-06, "loss": 0.0302, "step": 45715 }, { "epoch": 2.32092999644652, "grad_norm": 0.31150320172309875, "learning_rate": 4.527133357023199e-06, "loss": 0.0295, "step": 45720 }, { "epoch": 2.3211838164373826, "grad_norm": 0.20965662598609924, "learning_rate": 4.525441223750783e-06, "loss": 0.0283, "step": 45725 }, { "epoch": 2.321437636428245, "grad_norm": 0.31546589732170105, "learning_rate": 4.5237490904783665e-06, "loss": 0.0312, "step": 45730 }, { "epoch": 2.3216914564191073, "grad_norm": 0.4126461148262024, "learning_rate": 4.52205695720595e-06, "loss": 0.0347, "step": 45735 }, { "epoch": 2.32194527640997, "grad_norm": 0.3679712116718292, "learning_rate": 4.520364823933534e-06, "loss": 0.029, "step": 45740 }, { "epoch": 2.3221990964008326, "grad_norm": 0.7221439480781555, "learning_rate": 4.518672690661116e-06, "loss": 0.0276, "step": 45745 }, { "epoch": 2.322452916391695, "grad_norm": 0.4402199387550354, "learning_rate": 4.516980557388701e-06, "loss": 0.0336, "step": 45750 }, { "epoch": 2.3227067363825573, "grad_norm": 0.32814040780067444, "learning_rate": 4.515288424116283e-06, "loss": 0.0336, "step": 45755 }, { "epoch": 2.32296055637342, "grad_norm": 0.4378836154937744, "learning_rate": 4.513596290843868e-06, "loss": 0.0465, "step": 45760 }, { "epoch": 2.3232143763642825, "grad_norm": 0.38695916533470154, "learning_rate": 4.5119041575714505e-06, "loss": 0.0375, "step": 45765 }, { "epoch": 2.323468196355145, "grad_norm": 0.7761498689651489, "learning_rate": 4.510212024299034e-06, "loss": 0.0357, "step": 45770 }, { "epoch": 2.3237220163460073, "grad_norm": 0.4433533549308777, "learning_rate": 4.5085198910266175e-06, "loss": 0.0413, "step": 45775 }, { "epoch": 2.32397583633687, "grad_norm": 0.2922021746635437, "learning_rate": 4.506827757754201e-06, "loss": 0.0351, "step": 45780 }, { "epoch": 2.3242296563277325, "grad_norm": 0.32604724168777466, "learning_rate": 4.505135624481785e-06, "loss": 0.0337, "step": 45785 }, { "epoch": 2.324483476318595, "grad_norm": 0.3584740459918976, "learning_rate": 4.503443491209367e-06, "loss": 0.0372, "step": 45790 }, { "epoch": 2.3247372963094572, "grad_norm": 0.5668825507164001, "learning_rate": 4.501751357936952e-06, "loss": 0.0413, "step": 45795 }, { "epoch": 2.32499111630032, "grad_norm": 0.3352181613445282, "learning_rate": 4.500059224664534e-06, "loss": 0.0373, "step": 45800 }, { "epoch": 2.3252449362911825, "grad_norm": 0.32401424646377563, "learning_rate": 4.498367091392119e-06, "loss": 0.0341, "step": 45805 }, { "epoch": 2.3254987562820446, "grad_norm": 0.3033709228038788, "learning_rate": 4.4966749581197015e-06, "loss": 0.0288, "step": 45810 }, { "epoch": 2.325752576272907, "grad_norm": 0.30588579177856445, "learning_rate": 4.494982824847285e-06, "loss": 0.0268, "step": 45815 }, { "epoch": 2.32600639626377, "grad_norm": 0.40908050537109375, "learning_rate": 4.493290691574869e-06, "loss": 0.0363, "step": 45820 }, { "epoch": 2.3262602162546324, "grad_norm": 0.5321323871612549, "learning_rate": 4.491598558302452e-06, "loss": 0.033, "step": 45825 }, { "epoch": 2.3265140362454946, "grad_norm": 0.20381450653076172, "learning_rate": 4.489906425030036e-06, "loss": 0.0328, "step": 45830 }, { "epoch": 2.326767856236357, "grad_norm": 0.9036016464233398, "learning_rate": 4.488214291757619e-06, "loss": 0.0495, "step": 45835 }, { "epoch": 2.32702167622722, "grad_norm": 0.2598854601383209, "learning_rate": 4.486522158485203e-06, "loss": 0.0328, "step": 45840 }, { "epoch": 2.327275496218082, "grad_norm": 0.2837057411670685, "learning_rate": 4.484830025212786e-06, "loss": 0.0267, "step": 45845 }, { "epoch": 2.3275293162089445, "grad_norm": 0.33842211961746216, "learning_rate": 4.48313789194037e-06, "loss": 0.0313, "step": 45850 }, { "epoch": 2.327783136199807, "grad_norm": 0.3597102761268616, "learning_rate": 4.481445758667953e-06, "loss": 0.034, "step": 45855 }, { "epoch": 2.3280369561906697, "grad_norm": 0.33887141942977905, "learning_rate": 4.479753625395536e-06, "loss": 0.0269, "step": 45860 }, { "epoch": 2.328290776181532, "grad_norm": 0.24716730415821075, "learning_rate": 4.4780614921231205e-06, "loss": 0.03, "step": 45865 }, { "epoch": 2.3285445961723945, "grad_norm": 0.4096173048019409, "learning_rate": 4.476369358850703e-06, "loss": 0.0298, "step": 45870 }, { "epoch": 2.328798416163257, "grad_norm": 0.34281015396118164, "learning_rate": 4.474677225578287e-06, "loss": 0.0224, "step": 45875 }, { "epoch": 2.3290522361541193, "grad_norm": 0.2912023067474365, "learning_rate": 4.47298509230587e-06, "loss": 0.0281, "step": 45880 }, { "epoch": 2.329306056144982, "grad_norm": 0.3161762058734894, "learning_rate": 4.471292959033454e-06, "loss": 0.0327, "step": 45885 }, { "epoch": 2.3295598761358445, "grad_norm": 0.3971143364906311, "learning_rate": 4.469600825761037e-06, "loss": 0.0245, "step": 45890 }, { "epoch": 2.329813696126707, "grad_norm": 0.24481990933418274, "learning_rate": 4.467908692488621e-06, "loss": 0.0262, "step": 45895 }, { "epoch": 2.3300675161175692, "grad_norm": 0.4387982189655304, "learning_rate": 4.4662165592162045e-06, "loss": 0.028, "step": 45900 }, { "epoch": 2.330321336108432, "grad_norm": 0.3262830674648285, "learning_rate": 4.464524425943787e-06, "loss": 0.0432, "step": 45905 }, { "epoch": 2.3305751560992944, "grad_norm": 0.5117439031600952, "learning_rate": 4.4628322926713716e-06, "loss": 0.0405, "step": 45910 }, { "epoch": 2.330828976090157, "grad_norm": 0.4031218886375427, "learning_rate": 4.461140159398954e-06, "loss": 0.0318, "step": 45915 }, { "epoch": 2.331082796081019, "grad_norm": 0.31962868571281433, "learning_rate": 4.459448026126539e-06, "loss": 0.0303, "step": 45920 }, { "epoch": 2.331336616071882, "grad_norm": 0.3103834390640259, "learning_rate": 4.457755892854121e-06, "loss": 0.0375, "step": 45925 }, { "epoch": 2.3315904360627444, "grad_norm": 0.41599103808403015, "learning_rate": 4.456063759581705e-06, "loss": 0.0323, "step": 45930 }, { "epoch": 2.331844256053607, "grad_norm": 0.38975197076797485, "learning_rate": 4.4543716263092884e-06, "loss": 0.0345, "step": 45935 }, { "epoch": 2.332098076044469, "grad_norm": 0.22098346054553986, "learning_rate": 4.452679493036872e-06, "loss": 0.0255, "step": 45940 }, { "epoch": 2.3323518960353318, "grad_norm": 0.26242735981941223, "learning_rate": 4.4509873597644555e-06, "loss": 0.0331, "step": 45945 }, { "epoch": 2.3326057160261944, "grad_norm": 0.2703067660331726, "learning_rate": 4.449295226492038e-06, "loss": 0.0303, "step": 45950 }, { "epoch": 2.3328595360170565, "grad_norm": 0.42793992161750793, "learning_rate": 4.447603093219623e-06, "loss": 0.042, "step": 45955 }, { "epoch": 2.333113356007919, "grad_norm": 0.28537246584892273, "learning_rate": 4.445910959947205e-06, "loss": 0.0342, "step": 45960 }, { "epoch": 2.3333671759987817, "grad_norm": 0.338586688041687, "learning_rate": 4.44421882667479e-06, "loss": 0.0304, "step": 45965 }, { "epoch": 2.3336209959896443, "grad_norm": 0.2869323790073395, "learning_rate": 4.442526693402372e-06, "loss": 0.0335, "step": 45970 }, { "epoch": 2.3338748159805065, "grad_norm": 0.21080097556114197, "learning_rate": 4.440834560129956e-06, "loss": 0.0275, "step": 45975 }, { "epoch": 2.334128635971369, "grad_norm": 0.8009614944458008, "learning_rate": 4.4391424268575395e-06, "loss": 0.0291, "step": 45980 }, { "epoch": 2.3343824559622317, "grad_norm": 0.4224371910095215, "learning_rate": 4.437450293585123e-06, "loss": 0.0277, "step": 45985 }, { "epoch": 2.334636275953094, "grad_norm": 0.3391774296760559, "learning_rate": 4.4357581603127066e-06, "loss": 0.0242, "step": 45990 }, { "epoch": 2.3348900959439565, "grad_norm": 0.27695131301879883, "learning_rate": 4.43406602704029e-06, "loss": 0.0334, "step": 45995 }, { "epoch": 2.335143915934819, "grad_norm": 0.2666962742805481, "learning_rate": 4.432373893767874e-06, "loss": 0.031, "step": 46000 }, { "epoch": 2.3353977359256817, "grad_norm": 0.3789578378200531, "learning_rate": 4.430681760495457e-06, "loss": 0.0369, "step": 46005 }, { "epoch": 2.335651555916544, "grad_norm": 0.37819820642471313, "learning_rate": 4.428989627223041e-06, "loss": 0.0306, "step": 46010 }, { "epoch": 2.3359053759074064, "grad_norm": 0.4716276526451111, "learning_rate": 4.427297493950624e-06, "loss": 0.0484, "step": 46015 }, { "epoch": 2.336159195898269, "grad_norm": 0.3425740897655487, "learning_rate": 4.425605360678207e-06, "loss": 0.0333, "step": 46020 }, { "epoch": 2.336413015889131, "grad_norm": 0.41194647550582886, "learning_rate": 4.423913227405791e-06, "loss": 0.0341, "step": 46025 }, { "epoch": 2.336666835879994, "grad_norm": 0.3530094027519226, "learning_rate": 4.422221094133374e-06, "loss": 0.0293, "step": 46030 }, { "epoch": 2.3369206558708564, "grad_norm": 0.31580179929733276, "learning_rate": 4.420528960860958e-06, "loss": 0.0295, "step": 46035 }, { "epoch": 2.337174475861719, "grad_norm": 0.35331493616104126, "learning_rate": 4.418836827588541e-06, "loss": 0.0366, "step": 46040 }, { "epoch": 2.337428295852581, "grad_norm": 0.47498664259910583, "learning_rate": 4.417144694316125e-06, "loss": 0.0372, "step": 46045 }, { "epoch": 2.3376821158434438, "grad_norm": 0.31873664259910583, "learning_rate": 4.415452561043708e-06, "loss": 0.0284, "step": 46050 }, { "epoch": 2.3379359358343064, "grad_norm": 0.41014498472213745, "learning_rate": 4.413760427771292e-06, "loss": 0.0403, "step": 46055 }, { "epoch": 2.338189755825169, "grad_norm": 0.43682634830474854, "learning_rate": 4.412068294498875e-06, "loss": 0.0445, "step": 46060 }, { "epoch": 2.338443575816031, "grad_norm": 0.5572406649589539, "learning_rate": 4.410376161226458e-06, "loss": 0.0274, "step": 46065 }, { "epoch": 2.3386973958068937, "grad_norm": 0.4906730353832245, "learning_rate": 4.4086840279540424e-06, "loss": 0.0288, "step": 46070 }, { "epoch": 2.3389512157977563, "grad_norm": 0.26693737506866455, "learning_rate": 4.406991894681625e-06, "loss": 0.0349, "step": 46075 }, { "epoch": 2.339205035788619, "grad_norm": 0.2562685012817383, "learning_rate": 4.4052997614092095e-06, "loss": 0.0286, "step": 46080 }, { "epoch": 2.339458855779481, "grad_norm": 0.3544439375400543, "learning_rate": 4.403607628136792e-06, "loss": 0.0291, "step": 46085 }, { "epoch": 2.3397126757703437, "grad_norm": 0.5881972908973694, "learning_rate": 4.401915494864376e-06, "loss": 0.0264, "step": 46090 }, { "epoch": 2.3399664957612063, "grad_norm": 0.304453045129776, "learning_rate": 4.400223361591959e-06, "loss": 0.0334, "step": 46095 }, { "epoch": 2.3402203157520685, "grad_norm": 0.26709556579589844, "learning_rate": 4.398531228319543e-06, "loss": 0.0366, "step": 46100 }, { "epoch": 2.340474135742931, "grad_norm": 0.4727105498313904, "learning_rate": 4.396839095047126e-06, "loss": 0.0335, "step": 46105 }, { "epoch": 2.3407279557337937, "grad_norm": 0.27335217595100403, "learning_rate": 4.39514696177471e-06, "loss": 0.0283, "step": 46110 }, { "epoch": 2.3409817757246563, "grad_norm": 0.544344961643219, "learning_rate": 4.3934548285022935e-06, "loss": 0.0305, "step": 46115 }, { "epoch": 2.3412355957155184, "grad_norm": 0.2982689440250397, "learning_rate": 4.391762695229876e-06, "loss": 0.0301, "step": 46120 }, { "epoch": 2.341489415706381, "grad_norm": 0.4149956703186035, "learning_rate": 4.390070561957461e-06, "loss": 0.0338, "step": 46125 }, { "epoch": 2.3417432356972436, "grad_norm": 0.28789716958999634, "learning_rate": 4.388378428685043e-06, "loss": 0.0364, "step": 46130 }, { "epoch": 2.341997055688106, "grad_norm": 0.32526326179504395, "learning_rate": 4.386686295412627e-06, "loss": 0.0333, "step": 46135 }, { "epoch": 2.3422508756789684, "grad_norm": 0.3240688145160675, "learning_rate": 4.38499416214021e-06, "loss": 0.0303, "step": 46140 }, { "epoch": 2.342504695669831, "grad_norm": 0.3309798836708069, "learning_rate": 4.383302028867794e-06, "loss": 0.0346, "step": 46145 }, { "epoch": 2.3427585156606936, "grad_norm": 0.4500001072883606, "learning_rate": 4.3816098955953775e-06, "loss": 0.0369, "step": 46150 }, { "epoch": 2.3430123356515558, "grad_norm": 0.32434019446372986, "learning_rate": 4.379917762322961e-06, "loss": 0.0362, "step": 46155 }, { "epoch": 2.3432661556424184, "grad_norm": 0.40813419222831726, "learning_rate": 4.3782256290505445e-06, "loss": 0.0308, "step": 46160 }, { "epoch": 2.343519975633281, "grad_norm": 0.3180595636367798, "learning_rate": 4.376533495778128e-06, "loss": 0.0303, "step": 46165 }, { "epoch": 2.343773795624143, "grad_norm": 0.3424380123615265, "learning_rate": 4.374841362505712e-06, "loss": 0.0317, "step": 46170 }, { "epoch": 2.3440276156150057, "grad_norm": 0.4235934019088745, "learning_rate": 4.373149229233295e-06, "loss": 0.0366, "step": 46175 }, { "epoch": 2.3442814356058683, "grad_norm": 0.29761865735054016, "learning_rate": 4.371457095960878e-06, "loss": 0.0312, "step": 46180 }, { "epoch": 2.344535255596731, "grad_norm": 0.34761884808540344, "learning_rate": 4.369764962688462e-06, "loss": 0.0273, "step": 46185 }, { "epoch": 2.344789075587593, "grad_norm": 0.7427986860275269, "learning_rate": 4.368072829416045e-06, "loss": 0.0321, "step": 46190 }, { "epoch": 2.3450428955784557, "grad_norm": 0.42068642377853394, "learning_rate": 4.3663806961436285e-06, "loss": 0.0328, "step": 46195 }, { "epoch": 2.3452967155693183, "grad_norm": 0.23314493894577026, "learning_rate": 4.364688562871212e-06, "loss": 0.0296, "step": 46200 }, { "epoch": 2.345550535560181, "grad_norm": 0.561964750289917, "learning_rate": 4.362996429598796e-06, "loss": 0.0316, "step": 46205 }, { "epoch": 2.345804355551043, "grad_norm": 0.2341952621936798, "learning_rate": 4.361304296326379e-06, "loss": 0.0307, "step": 46210 }, { "epoch": 2.3460581755419057, "grad_norm": 0.3044934868812561, "learning_rate": 4.359612163053963e-06, "loss": 0.0316, "step": 46215 }, { "epoch": 2.3463119955327683, "grad_norm": 0.29033297300338745, "learning_rate": 4.357920029781546e-06, "loss": 0.0257, "step": 46220 }, { "epoch": 2.346565815523631, "grad_norm": 0.548446774482727, "learning_rate": 4.356227896509129e-06, "loss": 0.0297, "step": 46225 }, { "epoch": 2.346819635514493, "grad_norm": 0.43785858154296875, "learning_rate": 4.354535763236713e-06, "loss": 0.0323, "step": 46230 }, { "epoch": 2.3470734555053556, "grad_norm": 0.303646445274353, "learning_rate": 4.352843629964296e-06, "loss": 0.0285, "step": 46235 }, { "epoch": 2.3473272754962182, "grad_norm": 0.32536157965660095, "learning_rate": 4.3511514966918796e-06, "loss": 0.0359, "step": 46240 }, { "epoch": 2.3475810954870804, "grad_norm": 0.22959794104099274, "learning_rate": 4.349459363419463e-06, "loss": 0.0364, "step": 46245 }, { "epoch": 2.347834915477943, "grad_norm": 0.24991437792778015, "learning_rate": 4.347767230147047e-06, "loss": 0.0268, "step": 46250 }, { "epoch": 2.3480887354688056, "grad_norm": 0.4207878112792969, "learning_rate": 4.34607509687463e-06, "loss": 0.0363, "step": 46255 }, { "epoch": 2.348342555459668, "grad_norm": 0.2433720827102661, "learning_rate": 4.344382963602214e-06, "loss": 0.0365, "step": 46260 }, { "epoch": 2.3485963754505303, "grad_norm": 0.4478245675563812, "learning_rate": 4.342690830329797e-06, "loss": 0.0351, "step": 46265 }, { "epoch": 2.348850195441393, "grad_norm": 0.3620702922344208, "learning_rate": 4.340998697057381e-06, "loss": 0.0285, "step": 46270 }, { "epoch": 2.3491040154322556, "grad_norm": 0.46670037508010864, "learning_rate": 4.339306563784964e-06, "loss": 0.0309, "step": 46275 }, { "epoch": 2.3493578354231177, "grad_norm": 0.5231664180755615, "learning_rate": 4.337614430512547e-06, "loss": 0.0299, "step": 46280 }, { "epoch": 2.3496116554139803, "grad_norm": 0.32671064138412476, "learning_rate": 4.335922297240131e-06, "loss": 0.0333, "step": 46285 }, { "epoch": 2.349865475404843, "grad_norm": 0.6618512272834778, "learning_rate": 4.334230163967714e-06, "loss": 0.0348, "step": 46290 }, { "epoch": 2.3501192953957055, "grad_norm": 0.2867075800895691, "learning_rate": 4.332538030695298e-06, "loss": 0.0317, "step": 46295 }, { "epoch": 2.3503731153865677, "grad_norm": 0.31707561016082764, "learning_rate": 4.330845897422881e-06, "loss": 0.0347, "step": 46300 }, { "epoch": 2.3506269353774303, "grad_norm": 0.21336521208286285, "learning_rate": 4.329153764150465e-06, "loss": 0.0283, "step": 46305 }, { "epoch": 2.350880755368293, "grad_norm": 0.24199140071868896, "learning_rate": 4.327461630878048e-06, "loss": 0.0398, "step": 46310 }, { "epoch": 2.351134575359155, "grad_norm": 0.390618234872818, "learning_rate": 4.325769497605632e-06, "loss": 0.0352, "step": 46315 }, { "epoch": 2.3513883953500176, "grad_norm": 0.32350775599479675, "learning_rate": 4.3240773643332154e-06, "loss": 0.0362, "step": 46320 }, { "epoch": 2.3516422153408802, "grad_norm": 0.3590441346168518, "learning_rate": 4.322385231060799e-06, "loss": 0.0316, "step": 46325 }, { "epoch": 2.351896035331743, "grad_norm": 0.33314502239227295, "learning_rate": 4.320693097788382e-06, "loss": 0.0355, "step": 46330 }, { "epoch": 2.3521498553226055, "grad_norm": 0.3136577606201172, "learning_rate": 4.319000964515966e-06, "loss": 0.0377, "step": 46335 }, { "epoch": 2.3524036753134676, "grad_norm": 0.29925400018692017, "learning_rate": 4.317308831243549e-06, "loss": 0.0308, "step": 46340 }, { "epoch": 2.35265749530433, "grad_norm": 0.37032827734947205, "learning_rate": 4.315616697971133e-06, "loss": 0.0302, "step": 46345 }, { "epoch": 2.352911315295193, "grad_norm": 0.2775189280509949, "learning_rate": 4.313924564698716e-06, "loss": 0.0332, "step": 46350 }, { "epoch": 2.353165135286055, "grad_norm": 0.507088840007782, "learning_rate": 4.312232431426299e-06, "loss": 0.0291, "step": 46355 }, { "epoch": 2.3534189552769176, "grad_norm": 0.4810927212238312, "learning_rate": 4.310540298153883e-06, "loss": 0.0331, "step": 46360 }, { "epoch": 2.35367277526778, "grad_norm": 0.3024055063724518, "learning_rate": 4.3088481648814665e-06, "loss": 0.0303, "step": 46365 }, { "epoch": 2.353926595258643, "grad_norm": 0.3359050452709198, "learning_rate": 4.30715603160905e-06, "loss": 0.0359, "step": 46370 }, { "epoch": 2.354180415249505, "grad_norm": 0.4089173376560211, "learning_rate": 4.305463898336633e-06, "loss": 0.0406, "step": 46375 }, { "epoch": 2.3544342352403675, "grad_norm": 0.2997671067714691, "learning_rate": 4.303771765064217e-06, "loss": 0.0391, "step": 46380 }, { "epoch": 2.35468805523123, "grad_norm": 0.2722918391227722, "learning_rate": 4.3020796317918e-06, "loss": 0.0277, "step": 46385 }, { "epoch": 2.3549418752220923, "grad_norm": 0.29539498686790466, "learning_rate": 4.300387498519384e-06, "loss": 0.026, "step": 46390 }, { "epoch": 2.355195695212955, "grad_norm": 0.30494311451911926, "learning_rate": 4.298695365246967e-06, "loss": 0.0283, "step": 46395 }, { "epoch": 2.3554495152038175, "grad_norm": 0.3142552971839905, "learning_rate": 4.2970032319745504e-06, "loss": 0.031, "step": 46400 }, { "epoch": 2.35570333519468, "grad_norm": 0.3581981658935547, "learning_rate": 4.295311098702134e-06, "loss": 0.0342, "step": 46405 }, { "epoch": 2.3559571551855423, "grad_norm": 0.2926599979400635, "learning_rate": 4.2936189654297175e-06, "loss": 0.028, "step": 46410 }, { "epoch": 2.356210975176405, "grad_norm": 0.44851022958755493, "learning_rate": 4.291926832157301e-06, "loss": 0.0327, "step": 46415 }, { "epoch": 2.3564647951672675, "grad_norm": 0.5071258544921875, "learning_rate": 4.290234698884885e-06, "loss": 0.026, "step": 46420 }, { "epoch": 2.3567186151581296, "grad_norm": 0.48489269614219666, "learning_rate": 4.288542565612468e-06, "loss": 0.0395, "step": 46425 }, { "epoch": 2.3569724351489922, "grad_norm": 0.34989824891090393, "learning_rate": 4.286850432340052e-06, "loss": 0.0262, "step": 46430 }, { "epoch": 2.357226255139855, "grad_norm": 0.2624622881412506, "learning_rate": 4.285158299067635e-06, "loss": 0.0319, "step": 46435 }, { "epoch": 2.3574800751307174, "grad_norm": 0.2850838005542755, "learning_rate": 4.283466165795218e-06, "loss": 0.0328, "step": 46440 }, { "epoch": 2.3577338951215796, "grad_norm": 0.38306334614753723, "learning_rate": 4.2817740325228015e-06, "loss": 0.0286, "step": 46445 }, { "epoch": 2.357987715112442, "grad_norm": 0.2267332822084427, "learning_rate": 4.280081899250385e-06, "loss": 0.0293, "step": 46450 }, { "epoch": 2.358241535103305, "grad_norm": 0.4027699828147888, "learning_rate": 4.278389765977969e-06, "loss": 0.0368, "step": 46455 }, { "epoch": 2.358495355094167, "grad_norm": 0.2977030575275421, "learning_rate": 4.276697632705552e-06, "loss": 0.0298, "step": 46460 }, { "epoch": 2.3587491750850296, "grad_norm": 0.3098295331001282, "learning_rate": 4.275005499433136e-06, "loss": 0.0395, "step": 46465 }, { "epoch": 2.359002995075892, "grad_norm": 0.3363555371761322, "learning_rate": 4.273313366160719e-06, "loss": 0.0312, "step": 46470 }, { "epoch": 2.3592568150667548, "grad_norm": 1.3011178970336914, "learning_rate": 4.271621232888303e-06, "loss": 0.0378, "step": 46475 }, { "epoch": 2.3595106350576174, "grad_norm": 0.28810590505599976, "learning_rate": 4.269929099615886e-06, "loss": 0.0379, "step": 46480 }, { "epoch": 2.3597644550484795, "grad_norm": 0.36974385380744934, "learning_rate": 4.26823696634347e-06, "loss": 0.0277, "step": 46485 }, { "epoch": 2.360018275039342, "grad_norm": 0.2768723666667938, "learning_rate": 4.2665448330710526e-06, "loss": 0.0357, "step": 46490 }, { "epoch": 2.3602720950302047, "grad_norm": 0.25840070843696594, "learning_rate": 4.264852699798637e-06, "loss": 0.0305, "step": 46495 }, { "epoch": 2.360525915021067, "grad_norm": 0.5833351612091064, "learning_rate": 4.26316056652622e-06, "loss": 0.0462, "step": 46500 }, { "epoch": 2.3607797350119295, "grad_norm": 0.30810680985450745, "learning_rate": 4.261468433253804e-06, "loss": 0.0297, "step": 46505 }, { "epoch": 2.361033555002792, "grad_norm": 0.27955737709999084, "learning_rate": 4.259776299981387e-06, "loss": 0.0311, "step": 46510 }, { "epoch": 2.3612873749936547, "grad_norm": 0.2240092009305954, "learning_rate": 4.25808416670897e-06, "loss": 0.0276, "step": 46515 }, { "epoch": 2.361541194984517, "grad_norm": 0.5027244091033936, "learning_rate": 4.256392033436554e-06, "loss": 0.0285, "step": 46520 }, { "epoch": 2.3617950149753795, "grad_norm": 0.32359835505485535, "learning_rate": 4.254699900164137e-06, "loss": 0.033, "step": 46525 }, { "epoch": 2.362048834966242, "grad_norm": 0.3147777318954468, "learning_rate": 4.253007766891721e-06, "loss": 0.0302, "step": 46530 }, { "epoch": 2.3623026549571042, "grad_norm": 0.36554408073425293, "learning_rate": 4.251315633619304e-06, "loss": 0.0303, "step": 46535 }, { "epoch": 2.362556474947967, "grad_norm": 0.7193531394004822, "learning_rate": 4.249623500346888e-06, "loss": 0.035, "step": 46540 }, { "epoch": 2.3628102949388294, "grad_norm": 0.16339777410030365, "learning_rate": 4.247931367074471e-06, "loss": 0.0345, "step": 46545 }, { "epoch": 2.363064114929692, "grad_norm": 0.294882208108902, "learning_rate": 4.246239233802055e-06, "loss": 0.0387, "step": 46550 }, { "epoch": 2.363317934920554, "grad_norm": 0.3476087749004364, "learning_rate": 4.244547100529638e-06, "loss": 0.0319, "step": 46555 }, { "epoch": 2.363571754911417, "grad_norm": 0.43848177790641785, "learning_rate": 4.242854967257221e-06, "loss": 0.0282, "step": 46560 }, { "epoch": 2.3638255749022794, "grad_norm": 0.30569490790367126, "learning_rate": 4.241162833984805e-06, "loss": 0.0381, "step": 46565 }, { "epoch": 2.3640793948931416, "grad_norm": 0.39467477798461914, "learning_rate": 4.239470700712388e-06, "loss": 0.0347, "step": 46570 }, { "epoch": 2.364333214884004, "grad_norm": 0.25366389751434326, "learning_rate": 4.237778567439972e-06, "loss": 0.0332, "step": 46575 }, { "epoch": 2.3645870348748668, "grad_norm": 0.31514278054237366, "learning_rate": 4.2360864341675555e-06, "loss": 0.0361, "step": 46580 }, { "epoch": 2.3648408548657294, "grad_norm": 0.51214200258255, "learning_rate": 4.234394300895139e-06, "loss": 0.0289, "step": 46585 }, { "epoch": 2.3650946748565915, "grad_norm": 0.25476548075675964, "learning_rate": 4.232702167622723e-06, "loss": 0.0329, "step": 46590 }, { "epoch": 2.365348494847454, "grad_norm": 0.30458933115005493, "learning_rate": 4.231010034350306e-06, "loss": 0.0289, "step": 46595 }, { "epoch": 2.3656023148383167, "grad_norm": 0.2518400251865387, "learning_rate": 4.229317901077889e-06, "loss": 0.0251, "step": 46600 }, { "epoch": 2.3658561348291793, "grad_norm": 0.23019066452980042, "learning_rate": 4.227625767805472e-06, "loss": 0.0323, "step": 46605 }, { "epoch": 2.3661099548200415, "grad_norm": 0.2931053042411804, "learning_rate": 4.225933634533056e-06, "loss": 0.0279, "step": 46610 }, { "epoch": 2.366363774810904, "grad_norm": 0.8957230448722839, "learning_rate": 4.2242415012606395e-06, "loss": 0.0303, "step": 46615 }, { "epoch": 2.3666175948017667, "grad_norm": 0.35307177901268005, "learning_rate": 4.222549367988223e-06, "loss": 0.0255, "step": 46620 }, { "epoch": 2.3668714147926293, "grad_norm": 0.3644431233406067, "learning_rate": 4.2208572347158066e-06, "loss": 0.0366, "step": 46625 }, { "epoch": 2.3671252347834915, "grad_norm": 0.2834033668041229, "learning_rate": 4.21916510144339e-06, "loss": 0.0338, "step": 46630 }, { "epoch": 2.367379054774354, "grad_norm": 0.29104098677635193, "learning_rate": 4.217472968170974e-06, "loss": 0.0363, "step": 46635 }, { "epoch": 2.3676328747652167, "grad_norm": 0.26519590616226196, "learning_rate": 4.215780834898557e-06, "loss": 0.0341, "step": 46640 }, { "epoch": 2.367886694756079, "grad_norm": 0.6125626564025879, "learning_rate": 4.214088701626141e-06, "loss": 0.0253, "step": 46645 }, { "epoch": 2.3681405147469414, "grad_norm": 0.3606280982494354, "learning_rate": 4.2123965683537234e-06, "loss": 0.0269, "step": 46650 }, { "epoch": 2.368394334737804, "grad_norm": 0.4279687702655792, "learning_rate": 4.210704435081308e-06, "loss": 0.0278, "step": 46655 }, { "epoch": 2.3686481547286666, "grad_norm": 0.6018189191818237, "learning_rate": 4.2090123018088905e-06, "loss": 0.0353, "step": 46660 }, { "epoch": 2.368901974719529, "grad_norm": 0.38729801774024963, "learning_rate": 4.207320168536475e-06, "loss": 0.0284, "step": 46665 }, { "epoch": 2.3691557947103914, "grad_norm": 0.35448938608169556, "learning_rate": 4.205628035264058e-06, "loss": 0.0268, "step": 46670 }, { "epoch": 2.369409614701254, "grad_norm": 0.25390028953552246, "learning_rate": 4.203935901991641e-06, "loss": 0.025, "step": 46675 }, { "epoch": 2.369663434692116, "grad_norm": 0.22552552819252014, "learning_rate": 4.202243768719225e-06, "loss": 0.0268, "step": 46680 }, { "epoch": 2.3699172546829788, "grad_norm": 0.6019819378852844, "learning_rate": 4.200551635446808e-06, "loss": 0.0412, "step": 46685 }, { "epoch": 2.3701710746738414, "grad_norm": 0.3028351068496704, "learning_rate": 4.198859502174392e-06, "loss": 0.0335, "step": 46690 }, { "epoch": 2.370424894664704, "grad_norm": 0.4909636378288269, "learning_rate": 4.1971673689019745e-06, "loss": 0.038, "step": 46695 }, { "epoch": 2.370678714655566, "grad_norm": 0.37978753447532654, "learning_rate": 4.195475235629559e-06, "loss": 0.0307, "step": 46700 }, { "epoch": 2.3709325346464287, "grad_norm": 0.16427743434906006, "learning_rate": 4.193783102357142e-06, "loss": 0.0276, "step": 46705 }, { "epoch": 2.3711863546372913, "grad_norm": 0.4129177927970886, "learning_rate": 4.192090969084726e-06, "loss": 0.0313, "step": 46710 }, { "epoch": 2.3714401746281535, "grad_norm": 0.5559883117675781, "learning_rate": 4.190398835812309e-06, "loss": 0.0342, "step": 46715 }, { "epoch": 2.371693994619016, "grad_norm": 0.32237133383750916, "learning_rate": 4.188706702539892e-06, "loss": 0.0245, "step": 46720 }, { "epoch": 2.3719478146098787, "grad_norm": 0.2823083996772766, "learning_rate": 4.187014569267476e-06, "loss": 0.0255, "step": 46725 }, { "epoch": 2.3722016346007413, "grad_norm": 0.5001686215400696, "learning_rate": 4.185322435995059e-06, "loss": 0.0336, "step": 46730 }, { "epoch": 2.3724554545916035, "grad_norm": 0.26875582337379456, "learning_rate": 4.183630302722643e-06, "loss": 0.0395, "step": 46735 }, { "epoch": 2.372709274582466, "grad_norm": 0.4248732030391693, "learning_rate": 4.181938169450226e-06, "loss": 0.0278, "step": 46740 }, { "epoch": 2.3729630945733287, "grad_norm": 0.3200513422489166, "learning_rate": 4.18024603617781e-06, "loss": 0.0329, "step": 46745 }, { "epoch": 2.3732169145641913, "grad_norm": 0.38520458340644836, "learning_rate": 4.1785539029053935e-06, "loss": 0.0367, "step": 46750 }, { "epoch": 2.3734707345550534, "grad_norm": 0.3649415671825409, "learning_rate": 4.176861769632977e-06, "loss": 0.033, "step": 46755 }, { "epoch": 2.373724554545916, "grad_norm": 0.38120341300964355, "learning_rate": 4.17516963636056e-06, "loss": 0.0311, "step": 46760 }, { "epoch": 2.3739783745367786, "grad_norm": 0.2855028212070465, "learning_rate": 4.173477503088143e-06, "loss": 0.036, "step": 46765 }, { "epoch": 2.3742321945276412, "grad_norm": 0.4050988554954529, "learning_rate": 4.171785369815727e-06, "loss": 0.0362, "step": 46770 }, { "epoch": 2.3744860145185034, "grad_norm": 0.6171091198921204, "learning_rate": 4.17009323654331e-06, "loss": 0.0407, "step": 46775 }, { "epoch": 2.374739834509366, "grad_norm": 0.29305702447891235, "learning_rate": 4.168401103270894e-06, "loss": 0.0277, "step": 46780 }, { "epoch": 2.3749936545002286, "grad_norm": 0.3541305363178253, "learning_rate": 4.1667089699984774e-06, "loss": 0.033, "step": 46785 }, { "epoch": 2.3752474744910907, "grad_norm": 0.34138309955596924, "learning_rate": 4.165016836726061e-06, "loss": 0.0317, "step": 46790 }, { "epoch": 2.3755012944819534, "grad_norm": 0.34759989380836487, "learning_rate": 4.1633247034536445e-06, "loss": 0.0289, "step": 46795 }, { "epoch": 2.375755114472816, "grad_norm": 0.4288697838783264, "learning_rate": 4.161632570181228e-06, "loss": 0.0336, "step": 46800 }, { "epoch": 2.3760089344636786, "grad_norm": 0.31496793031692505, "learning_rate": 4.159940436908812e-06, "loss": 0.0328, "step": 46805 }, { "epoch": 2.3762627544545407, "grad_norm": 0.3088779151439667, "learning_rate": 4.158248303636394e-06, "loss": 0.0293, "step": 46810 }, { "epoch": 2.3765165744454033, "grad_norm": 0.34354227781295776, "learning_rate": 4.156556170363979e-06, "loss": 0.0322, "step": 46815 }, { "epoch": 2.376770394436266, "grad_norm": 0.3034856617450714, "learning_rate": 4.154864037091561e-06, "loss": 0.0389, "step": 46820 }, { "epoch": 2.377024214427128, "grad_norm": 0.47851690649986267, "learning_rate": 4.153171903819145e-06, "loss": 0.0375, "step": 46825 }, { "epoch": 2.3772780344179907, "grad_norm": 0.3652953505516052, "learning_rate": 4.1514797705467285e-06, "loss": 0.0355, "step": 46830 }, { "epoch": 2.3775318544088533, "grad_norm": 0.5073241591453552, "learning_rate": 4.149787637274312e-06, "loss": 0.027, "step": 46835 }, { "epoch": 2.377785674399716, "grad_norm": 0.27668410539627075, "learning_rate": 4.148095504001896e-06, "loss": 0.0298, "step": 46840 }, { "epoch": 2.378039494390578, "grad_norm": 0.47308704257011414, "learning_rate": 4.146403370729479e-06, "loss": 0.0376, "step": 46845 }, { "epoch": 2.3782933143814406, "grad_norm": 0.35060396790504456, "learning_rate": 4.144711237457063e-06, "loss": 0.0356, "step": 46850 }, { "epoch": 2.3785471343723033, "grad_norm": 0.28105148673057556, "learning_rate": 4.143019104184645e-06, "loss": 0.0359, "step": 46855 }, { "epoch": 2.3788009543631654, "grad_norm": 0.39512941241264343, "learning_rate": 4.14132697091223e-06, "loss": 0.0326, "step": 46860 }, { "epoch": 2.379054774354028, "grad_norm": 0.36061257123947144, "learning_rate": 4.1396348376398125e-06, "loss": 0.0334, "step": 46865 }, { "epoch": 2.3793085943448906, "grad_norm": 1.4042720794677734, "learning_rate": 4.137942704367396e-06, "loss": 0.0331, "step": 46870 }, { "epoch": 2.379562414335753, "grad_norm": 0.33557531237602234, "learning_rate": 4.1362505710949796e-06, "loss": 0.0289, "step": 46875 }, { "epoch": 2.3798162343266154, "grad_norm": 0.2567538619041443, "learning_rate": 4.134558437822563e-06, "loss": 0.0302, "step": 46880 }, { "epoch": 2.380070054317478, "grad_norm": 0.48120400309562683, "learning_rate": 4.132866304550147e-06, "loss": 0.0302, "step": 46885 }, { "epoch": 2.3803238743083406, "grad_norm": 0.30264732241630554, "learning_rate": 4.13117417127773e-06, "loss": 0.0312, "step": 46890 }, { "epoch": 2.380577694299203, "grad_norm": 0.2624591588973999, "learning_rate": 4.129482038005314e-06, "loss": 0.0309, "step": 46895 }, { "epoch": 2.3808315142900653, "grad_norm": 0.2809188663959503, "learning_rate": 4.127789904732897e-06, "loss": 0.0273, "step": 46900 }, { "epoch": 2.381085334280928, "grad_norm": 0.5473712086677551, "learning_rate": 4.126097771460481e-06, "loss": 0.0314, "step": 46905 }, { "epoch": 2.3813391542717905, "grad_norm": 0.3612068295478821, "learning_rate": 4.124405638188064e-06, "loss": 0.0324, "step": 46910 }, { "epoch": 2.381592974262653, "grad_norm": 0.30368199944496155, "learning_rate": 4.122713504915647e-06, "loss": 0.0367, "step": 46915 }, { "epoch": 2.3818467942535153, "grad_norm": 0.35154810547828674, "learning_rate": 4.121021371643231e-06, "loss": 0.0321, "step": 46920 }, { "epoch": 2.382100614244378, "grad_norm": 0.48841744661331177, "learning_rate": 4.119329238370814e-06, "loss": 0.0326, "step": 46925 }, { "epoch": 2.3823544342352405, "grad_norm": 0.3970104455947876, "learning_rate": 4.117637105098398e-06, "loss": 0.0288, "step": 46930 }, { "epoch": 2.3826082542261027, "grad_norm": 0.32299691438674927, "learning_rate": 4.115944971825981e-06, "loss": 0.0282, "step": 46935 }, { "epoch": 2.3828620742169653, "grad_norm": 0.41886699199676514, "learning_rate": 4.114252838553565e-06, "loss": 0.0368, "step": 46940 }, { "epoch": 2.383115894207828, "grad_norm": 0.36832237243652344, "learning_rate": 4.112560705281148e-06, "loss": 0.033, "step": 46945 }, { "epoch": 2.3833697141986905, "grad_norm": 0.48107612133026123, "learning_rate": 4.110868572008732e-06, "loss": 0.0363, "step": 46950 }, { "epoch": 2.3836235341895526, "grad_norm": 0.31277942657470703, "learning_rate": 4.109176438736315e-06, "loss": 0.0266, "step": 46955 }, { "epoch": 2.3838773541804152, "grad_norm": 0.28694868087768555, "learning_rate": 4.107484305463898e-06, "loss": 0.032, "step": 46960 }, { "epoch": 2.384131174171278, "grad_norm": 0.3679179847240448, "learning_rate": 4.1057921721914825e-06, "loss": 0.0279, "step": 46965 }, { "epoch": 2.38438499416214, "grad_norm": 0.19425620138645172, "learning_rate": 4.104100038919065e-06, "loss": 0.0261, "step": 46970 }, { "epoch": 2.3846388141530026, "grad_norm": 0.46411842107772827, "learning_rate": 4.10240790564665e-06, "loss": 0.0361, "step": 46975 }, { "epoch": 2.384892634143865, "grad_norm": 0.6477605700492859, "learning_rate": 4.100715772374232e-06, "loss": 0.0352, "step": 46980 }, { "epoch": 2.385146454134728, "grad_norm": 0.43195345997810364, "learning_rate": 4.099023639101816e-06, "loss": 0.029, "step": 46985 }, { "epoch": 2.38540027412559, "grad_norm": 0.2981319725513458, "learning_rate": 4.097331505829399e-06, "loss": 0.0301, "step": 46990 }, { "epoch": 2.3856540941164526, "grad_norm": 0.2841643989086151, "learning_rate": 4.095639372556983e-06, "loss": 0.0283, "step": 46995 }, { "epoch": 2.385907914107315, "grad_norm": 0.37677624821662903, "learning_rate": 4.0939472392845665e-06, "loss": 0.0302, "step": 47000 }, { "epoch": 2.3861617340981773, "grad_norm": 0.2917637825012207, "learning_rate": 4.092255106012149e-06, "loss": 0.0326, "step": 47005 }, { "epoch": 2.38641555408904, "grad_norm": 0.23505131900310516, "learning_rate": 4.0905629727397336e-06, "loss": 0.0268, "step": 47010 }, { "epoch": 2.3866693740799025, "grad_norm": 0.36129844188690186, "learning_rate": 4.088870839467316e-06, "loss": 0.0317, "step": 47015 }, { "epoch": 2.386923194070765, "grad_norm": 0.4052008092403412, "learning_rate": 4.087178706194901e-06, "loss": 0.026, "step": 47020 }, { "epoch": 2.3871770140616273, "grad_norm": 0.35589584708213806, "learning_rate": 4.085486572922483e-06, "loss": 0.0349, "step": 47025 }, { "epoch": 2.38743083405249, "grad_norm": 0.19920487701892853, "learning_rate": 4.083794439650067e-06, "loss": 0.0345, "step": 47030 }, { "epoch": 2.3876846540433525, "grad_norm": 0.7072770595550537, "learning_rate": 4.0821023063776504e-06, "loss": 0.0264, "step": 47035 }, { "epoch": 2.387938474034215, "grad_norm": 0.5042538046836853, "learning_rate": 4.080410173105234e-06, "loss": 0.0352, "step": 47040 }, { "epoch": 2.3881922940250773, "grad_norm": 0.3139657974243164, "learning_rate": 4.0787180398328175e-06, "loss": 0.0353, "step": 47045 }, { "epoch": 2.38844611401594, "grad_norm": 0.7741235494613647, "learning_rate": 4.077025906560401e-06, "loss": 0.0345, "step": 47050 }, { "epoch": 2.3886999340068025, "grad_norm": 0.26117607951164246, "learning_rate": 4.075333773287985e-06, "loss": 0.0259, "step": 47055 }, { "epoch": 2.388953753997665, "grad_norm": 0.28457748889923096, "learning_rate": 4.073641640015568e-06, "loss": 0.0353, "step": 47060 }, { "epoch": 2.3892075739885272, "grad_norm": 0.8835075497627258, "learning_rate": 4.071949506743152e-06, "loss": 0.0281, "step": 47065 }, { "epoch": 2.38946139397939, "grad_norm": 0.5735437870025635, "learning_rate": 4.070257373470735e-06, "loss": 0.0321, "step": 47070 }, { "epoch": 2.3897152139702524, "grad_norm": 0.3915027379989624, "learning_rate": 4.068565240198318e-06, "loss": 0.0358, "step": 47075 }, { "epoch": 2.3899690339611146, "grad_norm": 0.2943165600299835, "learning_rate": 4.066873106925902e-06, "loss": 0.0302, "step": 47080 }, { "epoch": 2.390222853951977, "grad_norm": 0.3288983106613159, "learning_rate": 4.065180973653485e-06, "loss": 0.0345, "step": 47085 }, { "epoch": 2.39047667394284, "grad_norm": 0.23327219486236572, "learning_rate": 4.063488840381069e-06, "loss": 0.0308, "step": 47090 }, { "epoch": 2.3907304939337024, "grad_norm": 0.3964608907699585, "learning_rate": 4.061796707108652e-06, "loss": 0.0268, "step": 47095 }, { "epoch": 2.3909843139245646, "grad_norm": 0.4795733094215393, "learning_rate": 4.060104573836236e-06, "loss": 0.0289, "step": 47100 }, { "epoch": 2.391238133915427, "grad_norm": 0.39220699667930603, "learning_rate": 4.058412440563819e-06, "loss": 0.0257, "step": 47105 }, { "epoch": 2.3914919539062898, "grad_norm": 0.26814162731170654, "learning_rate": 4.056720307291403e-06, "loss": 0.0325, "step": 47110 }, { "epoch": 2.391745773897152, "grad_norm": 0.3248618543148041, "learning_rate": 4.055028174018986e-06, "loss": 0.0287, "step": 47115 }, { "epoch": 2.3919995938880145, "grad_norm": 0.2833445966243744, "learning_rate": 4.053336040746569e-06, "loss": 0.0299, "step": 47120 }, { "epoch": 2.392253413878877, "grad_norm": 0.34968307614326477, "learning_rate": 4.051643907474153e-06, "loss": 0.0329, "step": 47125 }, { "epoch": 2.3925072338697397, "grad_norm": 0.34798914194107056, "learning_rate": 4.049951774201736e-06, "loss": 0.0359, "step": 47130 }, { "epoch": 2.392761053860602, "grad_norm": 0.4476436674594879, "learning_rate": 4.0482596409293205e-06, "loss": 0.0342, "step": 47135 }, { "epoch": 2.3930148738514645, "grad_norm": 0.366957426071167, "learning_rate": 4.046567507656903e-06, "loss": 0.0357, "step": 47140 }, { "epoch": 2.393268693842327, "grad_norm": 0.24836216866970062, "learning_rate": 4.044875374384487e-06, "loss": 0.0335, "step": 47145 }, { "epoch": 2.3935225138331893, "grad_norm": 0.3659680187702179, "learning_rate": 4.04318324111207e-06, "loss": 0.0311, "step": 47150 }, { "epoch": 2.393776333824052, "grad_norm": 0.19439701735973358, "learning_rate": 4.041491107839654e-06, "loss": 0.0325, "step": 47155 }, { "epoch": 2.3940301538149145, "grad_norm": 0.3122596740722656, "learning_rate": 4.039798974567237e-06, "loss": 0.0356, "step": 47160 }, { "epoch": 2.394283973805777, "grad_norm": 0.2920304834842682, "learning_rate": 4.03810684129482e-06, "loss": 0.0359, "step": 47165 }, { "epoch": 2.3945377937966397, "grad_norm": 0.2468995451927185, "learning_rate": 4.0364147080224044e-06, "loss": 0.0234, "step": 47170 }, { "epoch": 2.394791613787502, "grad_norm": 0.4460321068763733, "learning_rate": 4.034722574749987e-06, "loss": 0.0314, "step": 47175 }, { "epoch": 2.3950454337783644, "grad_norm": 0.40437963604927063, "learning_rate": 4.0330304414775715e-06, "loss": 0.033, "step": 47180 }, { "epoch": 2.395299253769227, "grad_norm": 0.30205151438713074, "learning_rate": 4.031338308205154e-06, "loss": 0.0348, "step": 47185 }, { "epoch": 2.395553073760089, "grad_norm": 0.2941863238811493, "learning_rate": 4.029646174932738e-06, "loss": 0.0267, "step": 47190 }, { "epoch": 2.395806893750952, "grad_norm": 0.3476633131504059, "learning_rate": 4.027954041660321e-06, "loss": 0.0342, "step": 47195 }, { "epoch": 2.3960607137418144, "grad_norm": 0.21167103946208954, "learning_rate": 4.026261908387905e-06, "loss": 0.0364, "step": 47200 }, { "epoch": 2.396314533732677, "grad_norm": 0.30532538890838623, "learning_rate": 4.024569775115488e-06, "loss": 0.0387, "step": 47205 }, { "epoch": 2.396568353723539, "grad_norm": 0.3205692172050476, "learning_rate": 4.022877641843072e-06, "loss": 0.0312, "step": 47210 }, { "epoch": 2.3968221737144018, "grad_norm": 0.428493857383728, "learning_rate": 4.0211855085706555e-06, "loss": 0.0291, "step": 47215 }, { "epoch": 2.3970759937052644, "grad_norm": 0.3529970943927765, "learning_rate": 4.019493375298239e-06, "loss": 0.0329, "step": 47220 }, { "epoch": 2.3973298136961265, "grad_norm": 0.24430900812149048, "learning_rate": 4.017801242025823e-06, "loss": 0.0268, "step": 47225 }, { "epoch": 2.397583633686989, "grad_norm": 0.2590664029121399, "learning_rate": 4.016109108753406e-06, "loss": 0.036, "step": 47230 }, { "epoch": 2.3978374536778517, "grad_norm": 0.3526705503463745, "learning_rate": 4.014416975480989e-06, "loss": 0.0345, "step": 47235 }, { "epoch": 2.3980912736687143, "grad_norm": 0.7975666522979736, "learning_rate": 4.012724842208573e-06, "loss": 0.0364, "step": 47240 }, { "epoch": 2.3983450936595765, "grad_norm": 0.23875541985034943, "learning_rate": 4.011032708936156e-06, "loss": 0.0347, "step": 47245 }, { "epoch": 2.398598913650439, "grad_norm": 0.23341935873031616, "learning_rate": 4.0093405756637395e-06, "loss": 0.0313, "step": 47250 }, { "epoch": 2.3988527336413017, "grad_norm": 0.2588426470756531, "learning_rate": 4.007648442391323e-06, "loss": 0.0337, "step": 47255 }, { "epoch": 2.399106553632164, "grad_norm": 0.4820748269557953, "learning_rate": 4.0059563091189066e-06, "loss": 0.0295, "step": 47260 }, { "epoch": 2.3993603736230265, "grad_norm": 0.623077929019928, "learning_rate": 4.00426417584649e-06, "loss": 0.0366, "step": 47265 }, { "epoch": 2.399614193613889, "grad_norm": 0.6706781387329102, "learning_rate": 4.002572042574074e-06, "loss": 0.0342, "step": 47270 }, { "epoch": 2.3998680136047517, "grad_norm": 0.32776203751564026, "learning_rate": 4.000879909301657e-06, "loss": 0.0244, "step": 47275 }, { "epoch": 2.400121833595614, "grad_norm": 0.29026415944099426, "learning_rate": 3.99918777602924e-06, "loss": 0.0299, "step": 47280 }, { "epoch": 2.4003756535864764, "grad_norm": 0.32502275705337524, "learning_rate": 3.997495642756824e-06, "loss": 0.0291, "step": 47285 }, { "epoch": 2.400629473577339, "grad_norm": 0.2750031352043152, "learning_rate": 3.995803509484407e-06, "loss": 0.0318, "step": 47290 }, { "epoch": 2.400883293568201, "grad_norm": 0.28516432642936707, "learning_rate": 3.994111376211991e-06, "loss": 0.0301, "step": 47295 }, { "epoch": 2.401137113559064, "grad_norm": 0.3644152283668518, "learning_rate": 3.992419242939574e-06, "loss": 0.0382, "step": 47300 }, { "epoch": 2.4013909335499264, "grad_norm": 0.3614419996738434, "learning_rate": 3.990727109667158e-06, "loss": 0.0358, "step": 47305 }, { "epoch": 2.401644753540789, "grad_norm": 0.3342830538749695, "learning_rate": 3.989034976394741e-06, "loss": 0.036, "step": 47310 }, { "epoch": 2.4018985735316516, "grad_norm": 0.26829349994659424, "learning_rate": 3.987342843122325e-06, "loss": 0.0273, "step": 47315 }, { "epoch": 2.4021523935225138, "grad_norm": 0.21632443368434906, "learning_rate": 3.985650709849908e-06, "loss": 0.028, "step": 47320 }, { "epoch": 2.4024062135133764, "grad_norm": 0.3586091697216034, "learning_rate": 3.983958576577491e-06, "loss": 0.0285, "step": 47325 }, { "epoch": 2.402660033504239, "grad_norm": 0.3348862826824188, "learning_rate": 3.982266443305075e-06, "loss": 0.0307, "step": 47330 }, { "epoch": 2.402913853495101, "grad_norm": 0.24888646602630615, "learning_rate": 3.980574310032658e-06, "loss": 0.0339, "step": 47335 }, { "epoch": 2.4031676734859637, "grad_norm": 0.5736678838729858, "learning_rate": 3.978882176760242e-06, "loss": 0.0384, "step": 47340 }, { "epoch": 2.4034214934768263, "grad_norm": 0.43109336495399475, "learning_rate": 3.977190043487825e-06, "loss": 0.0296, "step": 47345 }, { "epoch": 2.403675313467689, "grad_norm": 0.26424750685691833, "learning_rate": 3.975497910215409e-06, "loss": 0.0339, "step": 47350 }, { "epoch": 2.403929133458551, "grad_norm": 0.3025932312011719, "learning_rate": 3.973805776942992e-06, "loss": 0.0265, "step": 47355 }, { "epoch": 2.4041829534494137, "grad_norm": 0.5657501220703125, "learning_rate": 3.972113643670576e-06, "loss": 0.0304, "step": 47360 }, { "epoch": 2.4044367734402763, "grad_norm": 0.2696693539619446, "learning_rate": 3.970421510398159e-06, "loss": 0.0253, "step": 47365 }, { "epoch": 2.4046905934311384, "grad_norm": 0.2921593487262726, "learning_rate": 3.968729377125743e-06, "loss": 0.0307, "step": 47370 }, { "epoch": 2.404944413422001, "grad_norm": 0.4239245057106018, "learning_rate": 3.967037243853326e-06, "loss": 0.0326, "step": 47375 }, { "epoch": 2.4051982334128637, "grad_norm": 0.28727543354034424, "learning_rate": 3.96534511058091e-06, "loss": 0.034, "step": 47380 }, { "epoch": 2.4054520534037263, "grad_norm": 0.2851596176624298, "learning_rate": 3.9636529773084935e-06, "loss": 0.0341, "step": 47385 }, { "epoch": 2.4057058733945884, "grad_norm": 0.3607095777988434, "learning_rate": 3.961960844036077e-06, "loss": 0.0344, "step": 47390 }, { "epoch": 2.405959693385451, "grad_norm": 0.35408905148506165, "learning_rate": 3.96026871076366e-06, "loss": 0.0355, "step": 47395 }, { "epoch": 2.4062135133763136, "grad_norm": 0.23963472247123718, "learning_rate": 3.958576577491244e-06, "loss": 0.0281, "step": 47400 }, { "epoch": 2.4064673333671758, "grad_norm": 0.5606070756912231, "learning_rate": 3.956884444218827e-06, "loss": 0.0296, "step": 47405 }, { "epoch": 2.4067211533580384, "grad_norm": 0.25117459893226624, "learning_rate": 3.95519231094641e-06, "loss": 0.0283, "step": 47410 }, { "epoch": 2.406974973348901, "grad_norm": 0.2868806719779968, "learning_rate": 3.953500177673994e-06, "loss": 0.0323, "step": 47415 }, { "epoch": 2.4072287933397636, "grad_norm": 0.30753445625305176, "learning_rate": 3.9518080444015774e-06, "loss": 0.0343, "step": 47420 }, { "epoch": 2.4074826133306257, "grad_norm": 0.5894188284873962, "learning_rate": 3.950115911129161e-06, "loss": 0.0341, "step": 47425 }, { "epoch": 2.4077364333214883, "grad_norm": 0.35693567991256714, "learning_rate": 3.9484237778567445e-06, "loss": 0.0314, "step": 47430 }, { "epoch": 2.407990253312351, "grad_norm": 0.3354618549346924, "learning_rate": 3.946731644584328e-06, "loss": 0.0307, "step": 47435 }, { "epoch": 2.4082440733032136, "grad_norm": 0.276893675327301, "learning_rate": 3.945039511311911e-06, "loss": 0.0289, "step": 47440 }, { "epoch": 2.4084978932940757, "grad_norm": 0.29656293988227844, "learning_rate": 3.943347378039495e-06, "loss": 0.0303, "step": 47445 }, { "epoch": 2.4087517132849383, "grad_norm": 0.33579137921333313, "learning_rate": 3.941655244767078e-06, "loss": 0.0248, "step": 47450 }, { "epoch": 2.409005533275801, "grad_norm": 0.25143346190452576, "learning_rate": 3.939963111494661e-06, "loss": 0.0272, "step": 47455 }, { "epoch": 2.4092593532666635, "grad_norm": 0.3469618856906891, "learning_rate": 3.938270978222245e-06, "loss": 0.0239, "step": 47460 }, { "epoch": 2.4095131732575257, "grad_norm": 0.27013543248176575, "learning_rate": 3.9365788449498285e-06, "loss": 0.026, "step": 47465 }, { "epoch": 2.4097669932483883, "grad_norm": 0.28849682211875916, "learning_rate": 3.934886711677412e-06, "loss": 0.0268, "step": 47470 }, { "epoch": 2.410020813239251, "grad_norm": 0.21555496752262115, "learning_rate": 3.933194578404996e-06, "loss": 0.0282, "step": 47475 }, { "epoch": 2.410274633230113, "grad_norm": 0.2733899652957916, "learning_rate": 3.931502445132579e-06, "loss": 0.0375, "step": 47480 }, { "epoch": 2.4105284532209756, "grad_norm": 0.9940813183784485, "learning_rate": 3.929810311860162e-06, "loss": 0.0307, "step": 47485 }, { "epoch": 2.4107822732118382, "grad_norm": 0.3628968596458435, "learning_rate": 3.928118178587746e-06, "loss": 0.0269, "step": 47490 }, { "epoch": 2.411036093202701, "grad_norm": 0.2938300371170044, "learning_rate": 3.926426045315329e-06, "loss": 0.031, "step": 47495 }, { "epoch": 2.411289913193563, "grad_norm": 0.42575541138648987, "learning_rate": 3.9247339120429125e-06, "loss": 0.034, "step": 47500 }, { "epoch": 2.4115437331844256, "grad_norm": 0.2942411005496979, "learning_rate": 3.923041778770496e-06, "loss": 0.0319, "step": 47505 }, { "epoch": 2.411797553175288, "grad_norm": 0.533082127571106, "learning_rate": 3.9213496454980795e-06, "loss": 0.026, "step": 47510 }, { "epoch": 2.4120513731661504, "grad_norm": 0.4928203523159027, "learning_rate": 3.919657512225663e-06, "loss": 0.0393, "step": 47515 }, { "epoch": 2.412305193157013, "grad_norm": 0.28853780031204224, "learning_rate": 3.917965378953247e-06, "loss": 0.0328, "step": 47520 }, { "epoch": 2.4125590131478756, "grad_norm": 0.3255179524421692, "learning_rate": 3.91627324568083e-06, "loss": 0.0334, "step": 47525 }, { "epoch": 2.412812833138738, "grad_norm": 0.2753036618232727, "learning_rate": 3.914581112408414e-06, "loss": 0.0364, "step": 47530 }, { "epoch": 2.4130666531296003, "grad_norm": 0.43244901299476624, "learning_rate": 3.912888979135997e-06, "loss": 0.026, "step": 47535 }, { "epoch": 2.413320473120463, "grad_norm": 0.2927841246128082, "learning_rate": 3.911196845863581e-06, "loss": 0.0327, "step": 47540 }, { "epoch": 2.4135742931113255, "grad_norm": 0.4211539626121521, "learning_rate": 3.9095047125911635e-06, "loss": 0.0251, "step": 47545 }, { "epoch": 2.4138281131021877, "grad_norm": 0.28814446926116943, "learning_rate": 3.907812579318748e-06, "loss": 0.0291, "step": 47550 }, { "epoch": 2.4140819330930503, "grad_norm": 0.42922118306159973, "learning_rate": 3.906120446046331e-06, "loss": 0.0336, "step": 47555 }, { "epoch": 2.414335753083913, "grad_norm": 0.33254024386405945, "learning_rate": 3.904428312773915e-06, "loss": 0.0341, "step": 47560 }, { "epoch": 2.4145895730747755, "grad_norm": 0.40885141491889954, "learning_rate": 3.902736179501498e-06, "loss": 0.0278, "step": 47565 }, { "epoch": 2.4148433930656377, "grad_norm": 0.3493020534515381, "learning_rate": 3.901044046229081e-06, "loss": 0.0309, "step": 47570 }, { "epoch": 2.4150972130565003, "grad_norm": 0.2720143496990204, "learning_rate": 3.899351912956665e-06, "loss": 0.0309, "step": 47575 }, { "epoch": 2.415351033047363, "grad_norm": 0.44273534417152405, "learning_rate": 3.897659779684248e-06, "loss": 0.0346, "step": 47580 }, { "epoch": 2.4156048530382255, "grad_norm": 0.3117941617965698, "learning_rate": 3.895967646411832e-06, "loss": 0.0265, "step": 47585 }, { "epoch": 2.4158586730290876, "grad_norm": 0.29840338230133057, "learning_rate": 3.8942755131394146e-06, "loss": 0.031, "step": 47590 }, { "epoch": 2.4161124930199502, "grad_norm": 0.33411332964897156, "learning_rate": 3.892583379866999e-06, "loss": 0.025, "step": 47595 }, { "epoch": 2.416366313010813, "grad_norm": 0.4123666286468506, "learning_rate": 3.890891246594582e-06, "loss": 0.038, "step": 47600 }, { "epoch": 2.4166201330016754, "grad_norm": 0.3992561101913452, "learning_rate": 3.889199113322166e-06, "loss": 0.033, "step": 47605 }, { "epoch": 2.4168739529925376, "grad_norm": 0.26997870206832886, "learning_rate": 3.887506980049749e-06, "loss": 0.033, "step": 47610 }, { "epoch": 2.4171277729834, "grad_norm": 0.24809785187244415, "learning_rate": 3.885814846777332e-06, "loss": 0.0288, "step": 47615 }, { "epoch": 2.417381592974263, "grad_norm": 0.27336522936820984, "learning_rate": 3.884122713504916e-06, "loss": 0.0205, "step": 47620 }, { "epoch": 2.417635412965125, "grad_norm": 0.2975972890853882, "learning_rate": 3.882430580232499e-06, "loss": 0.027, "step": 47625 }, { "epoch": 2.4178892329559876, "grad_norm": 0.23915809392929077, "learning_rate": 3.880738446960083e-06, "loss": 0.036, "step": 47630 }, { "epoch": 2.41814305294685, "grad_norm": 0.4225624203681946, "learning_rate": 3.8790463136876665e-06, "loss": 0.033, "step": 47635 }, { "epoch": 2.4183968729377128, "grad_norm": 0.2970333397388458, "learning_rate": 3.87735418041525e-06, "loss": 0.0337, "step": 47640 }, { "epoch": 2.418650692928575, "grad_norm": 0.2330479919910431, "learning_rate": 3.8756620471428336e-06, "loss": 0.0286, "step": 47645 }, { "epoch": 2.4189045129194375, "grad_norm": 0.23988056182861328, "learning_rate": 3.873969913870417e-06, "loss": 0.0239, "step": 47650 }, { "epoch": 2.4191583329103, "grad_norm": 0.2401759773492813, "learning_rate": 3.872277780598e-06, "loss": 0.0339, "step": 47655 }, { "epoch": 2.4194121529011623, "grad_norm": 0.2513987720012665, "learning_rate": 3.870585647325583e-06, "loss": 0.0331, "step": 47660 }, { "epoch": 2.419665972892025, "grad_norm": 0.28474968671798706, "learning_rate": 3.868893514053167e-06, "loss": 0.0278, "step": 47665 }, { "epoch": 2.4199197928828875, "grad_norm": 0.35355323553085327, "learning_rate": 3.8672013807807504e-06, "loss": 0.0262, "step": 47670 }, { "epoch": 2.42017361287375, "grad_norm": 0.43043777346611023, "learning_rate": 3.865509247508334e-06, "loss": 0.0339, "step": 47675 }, { "epoch": 2.4204274328646123, "grad_norm": 0.4528213143348694, "learning_rate": 3.8638171142359175e-06, "loss": 0.0302, "step": 47680 }, { "epoch": 2.420681252855475, "grad_norm": 0.34391161799430847, "learning_rate": 3.862124980963501e-06, "loss": 0.028, "step": 47685 }, { "epoch": 2.4209350728463375, "grad_norm": 0.3792388141155243, "learning_rate": 3.860432847691085e-06, "loss": 0.0269, "step": 47690 }, { "epoch": 2.4211888928371996, "grad_norm": 0.3140493631362915, "learning_rate": 3.858740714418668e-06, "loss": 0.0372, "step": 47695 }, { "epoch": 2.4214427128280622, "grad_norm": 0.28535985946655273, "learning_rate": 3.857048581146252e-06, "loss": 0.0346, "step": 47700 }, { "epoch": 2.421696532818925, "grad_norm": 0.21206805109977722, "learning_rate": 3.855356447873834e-06, "loss": 0.0302, "step": 47705 }, { "epoch": 2.4219503528097874, "grad_norm": 0.252411812543869, "learning_rate": 3.853664314601419e-06, "loss": 0.0266, "step": 47710 }, { "epoch": 2.4222041728006496, "grad_norm": 0.4648708701133728, "learning_rate": 3.8519721813290015e-06, "loss": 0.0249, "step": 47715 }, { "epoch": 2.422457992791512, "grad_norm": 0.27817830443382263, "learning_rate": 3.850280048056586e-06, "loss": 0.0226, "step": 47720 }, { "epoch": 2.422711812782375, "grad_norm": 0.2477516084909439, "learning_rate": 3.8485879147841686e-06, "loss": 0.0267, "step": 47725 }, { "epoch": 2.4229656327732374, "grad_norm": 0.3148895502090454, "learning_rate": 3.846895781511752e-06, "loss": 0.0294, "step": 47730 }, { "epoch": 2.4232194527640996, "grad_norm": 0.6183810830116272, "learning_rate": 3.845203648239336e-06, "loss": 0.0287, "step": 47735 }, { "epoch": 2.423473272754962, "grad_norm": 0.20661011338233948, "learning_rate": 3.843511514966919e-06, "loss": 0.0287, "step": 47740 }, { "epoch": 2.4237270927458248, "grad_norm": 0.5037245750427246, "learning_rate": 3.841819381694503e-06, "loss": 0.0366, "step": 47745 }, { "epoch": 2.4239809127366874, "grad_norm": 0.5295654535293579, "learning_rate": 3.8401272484220855e-06, "loss": 0.0314, "step": 47750 }, { "epoch": 2.4242347327275495, "grad_norm": 0.22970163822174072, "learning_rate": 3.83843511514967e-06, "loss": 0.0298, "step": 47755 }, { "epoch": 2.424488552718412, "grad_norm": 0.3270503580570221, "learning_rate": 3.8367429818772525e-06, "loss": 0.0317, "step": 47760 }, { "epoch": 2.4247423727092747, "grad_norm": 0.3448561131954193, "learning_rate": 3.835050848604837e-06, "loss": 0.0388, "step": 47765 }, { "epoch": 2.424996192700137, "grad_norm": 0.23250630497932434, "learning_rate": 3.83335871533242e-06, "loss": 0.0291, "step": 47770 }, { "epoch": 2.4252500126909995, "grad_norm": 0.37792283296585083, "learning_rate": 3.831666582060003e-06, "loss": 0.0357, "step": 47775 }, { "epoch": 2.425503832681862, "grad_norm": 0.44455191493034363, "learning_rate": 3.829974448787587e-06, "loss": 0.0317, "step": 47780 }, { "epoch": 2.4257576526727247, "grad_norm": 0.2922888696193695, "learning_rate": 3.82828231551517e-06, "loss": 0.03, "step": 47785 }, { "epoch": 2.426011472663587, "grad_norm": 0.33817926049232483, "learning_rate": 3.826590182242754e-06, "loss": 0.0456, "step": 47790 }, { "epoch": 2.4262652926544495, "grad_norm": 0.46196913719177246, "learning_rate": 3.824898048970337e-06, "loss": 0.0399, "step": 47795 }, { "epoch": 2.426519112645312, "grad_norm": 0.27602121233940125, "learning_rate": 3.823205915697921e-06, "loss": 0.0359, "step": 47800 }, { "epoch": 2.426772932636174, "grad_norm": 0.2666013538837433, "learning_rate": 3.8215137824255044e-06, "loss": 0.0256, "step": 47805 }, { "epoch": 2.427026752627037, "grad_norm": 0.22680050134658813, "learning_rate": 3.819821649153088e-06, "loss": 0.0296, "step": 47810 }, { "epoch": 2.4272805726178994, "grad_norm": 0.3893703818321228, "learning_rate": 3.818129515880671e-06, "loss": 0.0377, "step": 47815 }, { "epoch": 2.427534392608762, "grad_norm": 0.31379884481430054, "learning_rate": 3.816437382608254e-06, "loss": 0.0307, "step": 47820 }, { "epoch": 2.427788212599624, "grad_norm": 0.32073163986206055, "learning_rate": 3.814745249335838e-06, "loss": 0.0313, "step": 47825 }, { "epoch": 2.428042032590487, "grad_norm": 0.345052570104599, "learning_rate": 3.8130531160634217e-06, "loss": 0.0328, "step": 47830 }, { "epoch": 2.4282958525813494, "grad_norm": 0.3013349175453186, "learning_rate": 3.811360982791005e-06, "loss": 0.0258, "step": 47835 }, { "epoch": 2.4285496725722115, "grad_norm": 0.4422132074832916, "learning_rate": 3.8096688495185884e-06, "loss": 0.0376, "step": 47840 }, { "epoch": 2.428803492563074, "grad_norm": 0.3477928340435028, "learning_rate": 3.8079767162461715e-06, "loss": 0.027, "step": 47845 }, { "epoch": 2.4290573125539368, "grad_norm": 0.4966389536857605, "learning_rate": 3.8062845829737555e-06, "loss": 0.0316, "step": 47850 }, { "epoch": 2.4293111325447994, "grad_norm": 0.24931129813194275, "learning_rate": 3.8045924497013386e-06, "loss": 0.0271, "step": 47855 }, { "epoch": 2.4295649525356615, "grad_norm": 0.3480713367462158, "learning_rate": 3.8029003164289226e-06, "loss": 0.0315, "step": 47860 }, { "epoch": 2.429818772526524, "grad_norm": 0.20172403752803802, "learning_rate": 3.8012081831565057e-06, "loss": 0.029, "step": 47865 }, { "epoch": 2.4300725925173867, "grad_norm": 0.22433215379714966, "learning_rate": 3.7995160498840892e-06, "loss": 0.0294, "step": 47870 }, { "epoch": 2.4303264125082493, "grad_norm": 0.42179742455482483, "learning_rate": 3.797823916611673e-06, "loss": 0.0302, "step": 47875 }, { "epoch": 2.4305802324991115, "grad_norm": 0.345768541097641, "learning_rate": 3.7961317833392563e-06, "loss": 0.0395, "step": 47880 }, { "epoch": 2.430834052489974, "grad_norm": 0.30003654956817627, "learning_rate": 3.7944396500668395e-06, "loss": 0.0304, "step": 47885 }, { "epoch": 2.4310878724808367, "grad_norm": 0.7992334365844727, "learning_rate": 3.7927475167944226e-06, "loss": 0.0282, "step": 47890 }, { "epoch": 2.4313416924716993, "grad_norm": 1.201789140701294, "learning_rate": 3.7910553835220065e-06, "loss": 0.0388, "step": 47895 }, { "epoch": 2.4315955124625614, "grad_norm": 0.2232540100812912, "learning_rate": 3.7893632502495897e-06, "loss": 0.0312, "step": 47900 }, { "epoch": 2.431849332453424, "grad_norm": 0.39150550961494446, "learning_rate": 3.7876711169771736e-06, "loss": 0.0396, "step": 47905 }, { "epoch": 2.4321031524442867, "grad_norm": 0.25751233100891113, "learning_rate": 3.7859789837047568e-06, "loss": 0.0313, "step": 47910 }, { "epoch": 2.432356972435149, "grad_norm": 0.38640815019607544, "learning_rate": 3.7842868504323403e-06, "loss": 0.0364, "step": 47915 }, { "epoch": 2.4326107924260114, "grad_norm": 0.30392131209373474, "learning_rate": 3.782594717159924e-06, "loss": 0.0295, "step": 47920 }, { "epoch": 2.432864612416874, "grad_norm": 0.36660847067832947, "learning_rate": 3.7809025838875074e-06, "loss": 0.0348, "step": 47925 }, { "epoch": 2.4331184324077366, "grad_norm": 0.31865790486335754, "learning_rate": 3.7792104506150905e-06, "loss": 0.0343, "step": 47930 }, { "epoch": 2.433372252398599, "grad_norm": 0.44969046115875244, "learning_rate": 3.7775183173426745e-06, "loss": 0.0335, "step": 47935 }, { "epoch": 2.4336260723894614, "grad_norm": 0.30257928371429443, "learning_rate": 3.7758261840702576e-06, "loss": 0.0323, "step": 47940 }, { "epoch": 2.433879892380324, "grad_norm": 0.3042431175708771, "learning_rate": 3.774134050797841e-06, "loss": 0.0323, "step": 47945 }, { "epoch": 2.434133712371186, "grad_norm": 0.302280992269516, "learning_rate": 3.7724419175254247e-06, "loss": 0.0312, "step": 47950 }, { "epoch": 2.4343875323620487, "grad_norm": 0.2679111063480377, "learning_rate": 3.7707497842530082e-06, "loss": 0.0268, "step": 47955 }, { "epoch": 2.4346413523529113, "grad_norm": 0.2837721109390259, "learning_rate": 3.7690576509805914e-06, "loss": 0.0316, "step": 47960 }, { "epoch": 2.434895172343774, "grad_norm": 0.3853626847267151, "learning_rate": 3.7673655177081753e-06, "loss": 0.0325, "step": 47965 }, { "epoch": 2.435148992334636, "grad_norm": 0.2845440208911896, "learning_rate": 3.7656733844357584e-06, "loss": 0.035, "step": 47970 }, { "epoch": 2.4354028123254987, "grad_norm": 0.34717926383018494, "learning_rate": 3.7639812511633416e-06, "loss": 0.0338, "step": 47975 }, { "epoch": 2.4356566323163613, "grad_norm": 0.1998908966779709, "learning_rate": 3.7622891178909255e-06, "loss": 0.0333, "step": 47980 }, { "epoch": 2.4359104523072235, "grad_norm": 0.19842973351478577, "learning_rate": 3.7605969846185087e-06, "loss": 0.0266, "step": 47985 }, { "epoch": 2.436164272298086, "grad_norm": 0.3331410586833954, "learning_rate": 3.758904851346092e-06, "loss": 0.0254, "step": 47990 }, { "epoch": 2.4364180922889487, "grad_norm": 0.33584150671958923, "learning_rate": 3.7572127180736757e-06, "loss": 0.0325, "step": 47995 }, { "epoch": 2.4366719122798113, "grad_norm": 0.3402474820613861, "learning_rate": 3.7555205848012593e-06, "loss": 0.0288, "step": 48000 }, { "epoch": 2.436925732270674, "grad_norm": 0.5381267666816711, "learning_rate": 3.7538284515288424e-06, "loss": 0.0346, "step": 48005 }, { "epoch": 2.437179552261536, "grad_norm": 0.447458416223526, "learning_rate": 3.7521363182564264e-06, "loss": 0.0301, "step": 48010 }, { "epoch": 2.4374333722523986, "grad_norm": 0.2832920253276825, "learning_rate": 3.7504441849840095e-06, "loss": 0.0349, "step": 48015 }, { "epoch": 2.4376871922432612, "grad_norm": 0.5114197731018066, "learning_rate": 3.7487520517115935e-06, "loss": 0.0337, "step": 48020 }, { "epoch": 2.4379410122341234, "grad_norm": 2.0609970092773438, "learning_rate": 3.7470599184391766e-06, "loss": 0.0272, "step": 48025 }, { "epoch": 2.438194832224986, "grad_norm": 0.38977518677711487, "learning_rate": 3.74536778516676e-06, "loss": 0.0323, "step": 48030 }, { "epoch": 2.4384486522158486, "grad_norm": 0.3010084331035614, "learning_rate": 3.7436756518943433e-06, "loss": 0.0305, "step": 48035 }, { "epoch": 2.438702472206711, "grad_norm": 0.2480255663394928, "learning_rate": 3.7419835186219272e-06, "loss": 0.0282, "step": 48040 }, { "epoch": 2.4389562921975734, "grad_norm": 0.18872040510177612, "learning_rate": 3.7402913853495103e-06, "loss": 0.0261, "step": 48045 }, { "epoch": 2.439210112188436, "grad_norm": 0.3619585931301117, "learning_rate": 3.7385992520770935e-06, "loss": 0.0286, "step": 48050 }, { "epoch": 2.4394639321792986, "grad_norm": 0.35356375575065613, "learning_rate": 3.7369071188046774e-06, "loss": 0.0305, "step": 48055 }, { "epoch": 2.4397177521701607, "grad_norm": 0.26403146982192993, "learning_rate": 3.7352149855322606e-06, "loss": 0.0273, "step": 48060 }, { "epoch": 2.4399715721610233, "grad_norm": 0.2659587860107422, "learning_rate": 3.7335228522598445e-06, "loss": 0.0304, "step": 48065 }, { "epoch": 2.440225392151886, "grad_norm": 0.32062849402427673, "learning_rate": 3.7318307189874276e-06, "loss": 0.0296, "step": 48070 }, { "epoch": 2.4404792121427485, "grad_norm": 0.35643866658210754, "learning_rate": 3.730138585715011e-06, "loss": 0.0291, "step": 48075 }, { "epoch": 2.4407330321336107, "grad_norm": 0.305879145860672, "learning_rate": 3.7284464524425943e-06, "loss": 0.0273, "step": 48080 }, { "epoch": 2.4409868521244733, "grad_norm": 0.275307297706604, "learning_rate": 3.7267543191701783e-06, "loss": 0.036, "step": 48085 }, { "epoch": 2.441240672115336, "grad_norm": 0.34820234775543213, "learning_rate": 3.7250621858977614e-06, "loss": 0.0297, "step": 48090 }, { "epoch": 2.441494492106198, "grad_norm": 0.25399303436279297, "learning_rate": 3.7233700526253454e-06, "loss": 0.0321, "step": 48095 }, { "epoch": 2.4417483120970607, "grad_norm": 0.27098965644836426, "learning_rate": 3.7216779193529285e-06, "loss": 0.0346, "step": 48100 }, { "epoch": 2.4420021320879233, "grad_norm": 0.3064155876636505, "learning_rate": 3.719985786080512e-06, "loss": 0.0281, "step": 48105 }, { "epoch": 2.442255952078786, "grad_norm": 0.3673038184642792, "learning_rate": 3.7182936528080956e-06, "loss": 0.0254, "step": 48110 }, { "epoch": 2.442509772069648, "grad_norm": 0.39501214027404785, "learning_rate": 3.716601519535679e-06, "loss": 0.039, "step": 48115 }, { "epoch": 2.4427635920605106, "grad_norm": 0.37488582730293274, "learning_rate": 3.7149093862632622e-06, "loss": 0.0285, "step": 48120 }, { "epoch": 2.4430174120513732, "grad_norm": 0.20910173654556274, "learning_rate": 3.713217252990846e-06, "loss": 0.0288, "step": 48125 }, { "epoch": 2.4432712320422354, "grad_norm": 0.32626721262931824, "learning_rate": 3.7115251197184293e-06, "loss": 0.0255, "step": 48130 }, { "epoch": 2.443525052033098, "grad_norm": 0.27159538865089417, "learning_rate": 3.7098329864460125e-06, "loss": 0.0283, "step": 48135 }, { "epoch": 2.4437788720239606, "grad_norm": 0.6607284545898438, "learning_rate": 3.7081408531735964e-06, "loss": 0.0311, "step": 48140 }, { "epoch": 2.444032692014823, "grad_norm": 0.23017600178718567, "learning_rate": 3.7064487199011795e-06, "loss": 0.037, "step": 48145 }, { "epoch": 2.444286512005686, "grad_norm": 0.3214363753795624, "learning_rate": 3.704756586628763e-06, "loss": 0.0278, "step": 48150 }, { "epoch": 2.444540331996548, "grad_norm": 0.3399595618247986, "learning_rate": 3.7030644533563466e-06, "loss": 0.0339, "step": 48155 }, { "epoch": 2.4447941519874106, "grad_norm": 0.277424693107605, "learning_rate": 3.70137232008393e-06, "loss": 0.0312, "step": 48160 }, { "epoch": 2.445047971978273, "grad_norm": 0.3377756178379059, "learning_rate": 3.6996801868115133e-06, "loss": 0.0319, "step": 48165 }, { "epoch": 2.4453017919691353, "grad_norm": 1.510842204093933, "learning_rate": 3.6979880535390973e-06, "loss": 0.0298, "step": 48170 }, { "epoch": 2.445555611959998, "grad_norm": 0.2741282880306244, "learning_rate": 3.6962959202666804e-06, "loss": 0.029, "step": 48175 }, { "epoch": 2.4458094319508605, "grad_norm": 0.3779626786708832, "learning_rate": 3.6946037869942644e-06, "loss": 0.0246, "step": 48180 }, { "epoch": 2.446063251941723, "grad_norm": 0.44682037830352783, "learning_rate": 3.6929116537218475e-06, "loss": 0.0309, "step": 48185 }, { "epoch": 2.4463170719325853, "grad_norm": 0.2568581700325012, "learning_rate": 3.691219520449431e-06, "loss": 0.0268, "step": 48190 }, { "epoch": 2.446570891923448, "grad_norm": 0.2952916920185089, "learning_rate": 3.689527387177014e-06, "loss": 0.028, "step": 48195 }, { "epoch": 2.4468247119143105, "grad_norm": 0.49456146359443665, "learning_rate": 3.687835253904598e-06, "loss": 0.0331, "step": 48200 }, { "epoch": 2.4470785319051727, "grad_norm": 0.2728605568408966, "learning_rate": 3.6861431206321812e-06, "loss": 0.0365, "step": 48205 }, { "epoch": 2.4473323518960353, "grad_norm": 0.37636256217956543, "learning_rate": 3.684450987359765e-06, "loss": 0.0397, "step": 48210 }, { "epoch": 2.447586171886898, "grad_norm": 0.241055428981781, "learning_rate": 3.6827588540873483e-06, "loss": 0.0403, "step": 48215 }, { "epoch": 2.4478399918777605, "grad_norm": 0.2556413412094116, "learning_rate": 3.6810667208149314e-06, "loss": 0.0308, "step": 48220 }, { "epoch": 2.4480938118686226, "grad_norm": 0.3312203288078308, "learning_rate": 3.6793745875425154e-06, "loss": 0.027, "step": 48225 }, { "epoch": 2.4483476318594852, "grad_norm": 0.34544646739959717, "learning_rate": 3.6776824542700985e-06, "loss": 0.0294, "step": 48230 }, { "epoch": 2.448601451850348, "grad_norm": 0.3323667347431183, "learning_rate": 3.675990320997682e-06, "loss": 0.0343, "step": 48235 }, { "epoch": 2.44885527184121, "grad_norm": 0.2082269936800003, "learning_rate": 3.674298187725265e-06, "loss": 0.0314, "step": 48240 }, { "epoch": 2.4491090918320726, "grad_norm": 0.2525387108325958, "learning_rate": 3.672606054452849e-06, "loss": 0.0331, "step": 48245 }, { "epoch": 2.449362911822935, "grad_norm": 0.34921693801879883, "learning_rate": 3.6709139211804323e-06, "loss": 0.0367, "step": 48250 }, { "epoch": 2.449616731813798, "grad_norm": 0.4713902175426483, "learning_rate": 3.6692217879080162e-06, "loss": 0.0332, "step": 48255 }, { "epoch": 2.44987055180466, "grad_norm": 0.5320238471031189, "learning_rate": 3.6675296546355994e-06, "loss": 0.0312, "step": 48260 }, { "epoch": 2.4501243717955226, "grad_norm": 0.3506769835948944, "learning_rate": 3.665837521363183e-06, "loss": 0.0305, "step": 48265 }, { "epoch": 2.450378191786385, "grad_norm": 0.2673940658569336, "learning_rate": 3.6641453880907665e-06, "loss": 0.0326, "step": 48270 }, { "epoch": 2.4506320117772478, "grad_norm": 0.259799599647522, "learning_rate": 3.66245325481835e-06, "loss": 0.0259, "step": 48275 }, { "epoch": 2.45088583176811, "grad_norm": 0.2402365803718567, "learning_rate": 3.660761121545933e-06, "loss": 0.0234, "step": 48280 }, { "epoch": 2.4511396517589725, "grad_norm": 0.28318873047828674, "learning_rate": 3.659068988273517e-06, "loss": 0.0363, "step": 48285 }, { "epoch": 2.451393471749835, "grad_norm": 0.218606099486351, "learning_rate": 3.6573768550011002e-06, "loss": 0.0287, "step": 48290 }, { "epoch": 2.4516472917406977, "grad_norm": 0.35069960355758667, "learning_rate": 3.6556847217286833e-06, "loss": 0.0276, "step": 48295 }, { "epoch": 2.45190111173156, "grad_norm": 0.2726610600948334, "learning_rate": 3.6539925884562673e-06, "loss": 0.0318, "step": 48300 }, { "epoch": 2.4521549317224225, "grad_norm": 0.4079586863517761, "learning_rate": 3.6523004551838504e-06, "loss": 0.0314, "step": 48305 }, { "epoch": 2.452408751713285, "grad_norm": 0.2880258858203888, "learning_rate": 3.650608321911434e-06, "loss": 0.0391, "step": 48310 }, { "epoch": 2.4526625717041473, "grad_norm": 0.3445485234260559, "learning_rate": 3.6489161886390175e-06, "loss": 0.0327, "step": 48315 }, { "epoch": 2.45291639169501, "grad_norm": 0.23641039431095123, "learning_rate": 3.647224055366601e-06, "loss": 0.0269, "step": 48320 }, { "epoch": 2.4531702116858725, "grad_norm": 0.3212410509586334, "learning_rate": 3.645531922094184e-06, "loss": 0.0325, "step": 48325 }, { "epoch": 2.453424031676735, "grad_norm": 0.2834191918373108, "learning_rate": 3.643839788821768e-06, "loss": 0.0288, "step": 48330 }, { "epoch": 2.453677851667597, "grad_norm": 0.26539257168769836, "learning_rate": 3.6421476555493513e-06, "loss": 0.0338, "step": 48335 }, { "epoch": 2.45393167165846, "grad_norm": 0.4019145369529724, "learning_rate": 3.640455522276935e-06, "loss": 0.0309, "step": 48340 }, { "epoch": 2.4541854916493224, "grad_norm": 0.6921430230140686, "learning_rate": 3.6387633890045184e-06, "loss": 0.0302, "step": 48345 }, { "epoch": 2.4544393116401846, "grad_norm": 0.3001328408718109, "learning_rate": 3.637071255732102e-06, "loss": 0.0314, "step": 48350 }, { "epoch": 2.454693131631047, "grad_norm": 0.34600576758384705, "learning_rate": 3.635379122459685e-06, "loss": 0.0354, "step": 48355 }, { "epoch": 2.45494695162191, "grad_norm": 0.3184991478919983, "learning_rate": 3.633686989187269e-06, "loss": 0.0294, "step": 48360 }, { "epoch": 2.4552007716127724, "grad_norm": 0.2975368797779083, "learning_rate": 3.631994855914852e-06, "loss": 0.0234, "step": 48365 }, { "epoch": 2.4554545916036346, "grad_norm": 0.40252795815467834, "learning_rate": 3.630302722642436e-06, "loss": 0.0308, "step": 48370 }, { "epoch": 2.455708411594497, "grad_norm": 0.2199138104915619, "learning_rate": 3.628610589370019e-06, "loss": 0.0296, "step": 48375 }, { "epoch": 2.4559622315853598, "grad_norm": 0.2915712296962738, "learning_rate": 3.6269184560976023e-06, "loss": 0.0249, "step": 48380 }, { "epoch": 2.456216051576222, "grad_norm": 0.4096558690071106, "learning_rate": 3.625226322825186e-06, "loss": 0.0281, "step": 48385 }, { "epoch": 2.4564698715670845, "grad_norm": 0.22814741730690002, "learning_rate": 3.6235341895527694e-06, "loss": 0.0354, "step": 48390 }, { "epoch": 2.456723691557947, "grad_norm": 0.2444760948419571, "learning_rate": 3.621842056280353e-06, "loss": 0.0294, "step": 48395 }, { "epoch": 2.4569775115488097, "grad_norm": 0.35308727622032166, "learning_rate": 3.620149923007936e-06, "loss": 0.0331, "step": 48400 }, { "epoch": 2.457231331539672, "grad_norm": 0.3179206848144531, "learning_rate": 3.61845778973552e-06, "loss": 0.0326, "step": 48405 }, { "epoch": 2.4574851515305345, "grad_norm": 0.2587548792362213, "learning_rate": 3.616765656463103e-06, "loss": 0.036, "step": 48410 }, { "epoch": 2.457738971521397, "grad_norm": 0.44887417554855347, "learning_rate": 3.615073523190687e-06, "loss": 0.0327, "step": 48415 }, { "epoch": 2.4579927915122597, "grad_norm": 0.6582462191581726, "learning_rate": 3.6133813899182703e-06, "loss": 0.0416, "step": 48420 }, { "epoch": 2.458246611503122, "grad_norm": 0.3380301892757416, "learning_rate": 3.611689256645854e-06, "loss": 0.0378, "step": 48425 }, { "epoch": 2.4585004314939845, "grad_norm": 0.3325098156929016, "learning_rate": 3.609997123373437e-06, "loss": 0.0411, "step": 48430 }, { "epoch": 2.458754251484847, "grad_norm": 0.4534139633178711, "learning_rate": 3.608304990101021e-06, "loss": 0.0336, "step": 48435 }, { "epoch": 2.4590080714757097, "grad_norm": 0.3075084090232849, "learning_rate": 3.606612856828604e-06, "loss": 0.0394, "step": 48440 }, { "epoch": 2.459261891466572, "grad_norm": 0.2668818533420563, "learning_rate": 3.604920723556188e-06, "loss": 0.0348, "step": 48445 }, { "epoch": 2.4595157114574344, "grad_norm": 0.2585658133029938, "learning_rate": 3.603228590283771e-06, "loss": 0.0367, "step": 48450 }, { "epoch": 2.459769531448297, "grad_norm": 0.246776282787323, "learning_rate": 3.6015364570113542e-06, "loss": 0.0284, "step": 48455 }, { "epoch": 2.460023351439159, "grad_norm": 0.36037343740463257, "learning_rate": 3.599844323738938e-06, "loss": 0.0256, "step": 48460 }, { "epoch": 2.460277171430022, "grad_norm": 0.5250141620635986, "learning_rate": 3.5981521904665213e-06, "loss": 0.035, "step": 48465 }, { "epoch": 2.4605309914208844, "grad_norm": 0.340463787317276, "learning_rate": 3.596460057194105e-06, "loss": 0.0289, "step": 48470 }, { "epoch": 2.460784811411747, "grad_norm": 0.4077812433242798, "learning_rate": 3.594767923921688e-06, "loss": 0.0278, "step": 48475 }, { "epoch": 2.461038631402609, "grad_norm": 0.32608455419540405, "learning_rate": 3.593075790649272e-06, "loss": 0.0344, "step": 48480 }, { "epoch": 2.4612924513934717, "grad_norm": 0.28923162817955017, "learning_rate": 3.591383657376855e-06, "loss": 0.0244, "step": 48485 }, { "epoch": 2.4615462713843344, "grad_norm": 0.5158969163894653, "learning_rate": 3.589691524104439e-06, "loss": 0.034, "step": 48490 }, { "epoch": 2.4618000913751965, "grad_norm": 0.24424219131469727, "learning_rate": 3.587999390832022e-06, "loss": 0.0281, "step": 48495 }, { "epoch": 2.462053911366059, "grad_norm": 0.27959030866622925, "learning_rate": 3.5863072575596057e-06, "loss": 0.0379, "step": 48500 }, { "epoch": 2.4623077313569217, "grad_norm": 0.307492196559906, "learning_rate": 3.5846151242871892e-06, "loss": 0.0323, "step": 48505 }, { "epoch": 2.4625615513477843, "grad_norm": 0.27596306800842285, "learning_rate": 3.5829229910147728e-06, "loss": 0.0291, "step": 48510 }, { "epoch": 2.4628153713386465, "grad_norm": 0.2955135107040405, "learning_rate": 3.581230857742356e-06, "loss": 0.0258, "step": 48515 }, { "epoch": 2.463069191329509, "grad_norm": 0.30613574385643005, "learning_rate": 3.57953872446994e-06, "loss": 0.0323, "step": 48520 }, { "epoch": 2.4633230113203717, "grad_norm": 0.30772191286087036, "learning_rate": 3.577846591197523e-06, "loss": 0.0337, "step": 48525 }, { "epoch": 2.463576831311234, "grad_norm": 0.32587411999702454, "learning_rate": 3.5761544579251065e-06, "loss": 0.0313, "step": 48530 }, { "epoch": 2.4638306513020964, "grad_norm": 0.5187804698944092, "learning_rate": 3.57446232465269e-06, "loss": 0.0365, "step": 48535 }, { "epoch": 2.464084471292959, "grad_norm": 0.3063710629940033, "learning_rate": 3.572770191380273e-06, "loss": 0.0365, "step": 48540 }, { "epoch": 2.4643382912838216, "grad_norm": 0.44692617654800415, "learning_rate": 3.5710780581078568e-06, "loss": 0.0304, "step": 48545 }, { "epoch": 2.464592111274684, "grad_norm": 0.2769562304019928, "learning_rate": 3.5693859248354403e-06, "loss": 0.0299, "step": 48550 }, { "epoch": 2.4648459312655464, "grad_norm": 0.3721355199813843, "learning_rate": 3.567693791563024e-06, "loss": 0.0328, "step": 48555 }, { "epoch": 2.465099751256409, "grad_norm": 0.36836931109428406, "learning_rate": 3.566001658290607e-06, "loss": 0.031, "step": 48560 }, { "epoch": 2.4653535712472716, "grad_norm": 0.3551080822944641, "learning_rate": 3.564309525018191e-06, "loss": 0.0316, "step": 48565 }, { "epoch": 2.4656073912381338, "grad_norm": 0.3343908488750458, "learning_rate": 3.562617391745774e-06, "loss": 0.0345, "step": 48570 }, { "epoch": 2.4658612112289964, "grad_norm": 0.3180460035800934, "learning_rate": 3.5609252584733576e-06, "loss": 0.0383, "step": 48575 }, { "epoch": 2.466115031219859, "grad_norm": 0.2818259298801422, "learning_rate": 3.559233125200941e-06, "loss": 0.0331, "step": 48580 }, { "epoch": 2.4663688512107216, "grad_norm": 0.1655755341053009, "learning_rate": 3.5575409919285247e-06, "loss": 0.0304, "step": 48585 }, { "epoch": 2.4666226712015837, "grad_norm": 0.7998566627502441, "learning_rate": 3.555848858656108e-06, "loss": 0.0267, "step": 48590 }, { "epoch": 2.4668764911924463, "grad_norm": 0.4926971197128296, "learning_rate": 3.5541567253836918e-06, "loss": 0.0332, "step": 48595 }, { "epoch": 2.467130311183309, "grad_norm": 0.2525290548801422, "learning_rate": 3.552464592111275e-06, "loss": 0.0444, "step": 48600 }, { "epoch": 2.467384131174171, "grad_norm": 0.3362772464752197, "learning_rate": 3.550772458838859e-06, "loss": 0.0286, "step": 48605 }, { "epoch": 2.4676379511650337, "grad_norm": 0.2079012095928192, "learning_rate": 3.549080325566442e-06, "loss": 0.0292, "step": 48610 }, { "epoch": 2.4678917711558963, "grad_norm": 0.2826537489891052, "learning_rate": 3.547388192294025e-06, "loss": 0.0311, "step": 48615 }, { "epoch": 2.468145591146759, "grad_norm": 0.5033324360847473, "learning_rate": 3.5456960590216086e-06, "loss": 0.0294, "step": 48620 }, { "epoch": 2.468399411137621, "grad_norm": 0.21514873206615448, "learning_rate": 3.544003925749192e-06, "loss": 0.0334, "step": 48625 }, { "epoch": 2.4686532311284837, "grad_norm": 0.42751839756965637, "learning_rate": 3.5423117924767757e-06, "loss": 0.0295, "step": 48630 }, { "epoch": 2.4689070511193463, "grad_norm": 0.3107595145702362, "learning_rate": 3.540619659204359e-06, "loss": 0.0312, "step": 48635 }, { "epoch": 2.4691608711102084, "grad_norm": 0.3402955234050751, "learning_rate": 3.538927525931943e-06, "loss": 0.0338, "step": 48640 }, { "epoch": 2.469414691101071, "grad_norm": 0.20256027579307556, "learning_rate": 3.537235392659526e-06, "loss": 0.0226, "step": 48645 }, { "epoch": 2.4696685110919336, "grad_norm": 0.28199639916419983, "learning_rate": 3.53554325938711e-06, "loss": 0.0284, "step": 48650 }, { "epoch": 2.4699223310827962, "grad_norm": 0.37197086215019226, "learning_rate": 3.533851126114693e-06, "loss": 0.0344, "step": 48655 }, { "epoch": 2.4701761510736584, "grad_norm": 0.5461879968643188, "learning_rate": 3.5321589928422766e-06, "loss": 0.0337, "step": 48660 }, { "epoch": 2.470429971064521, "grad_norm": 0.32721325755119324, "learning_rate": 3.5304668595698597e-06, "loss": 0.0353, "step": 48665 }, { "epoch": 2.4706837910553836, "grad_norm": 0.28904861211776733, "learning_rate": 3.5287747262974437e-06, "loss": 0.0297, "step": 48670 }, { "epoch": 2.4709376110462458, "grad_norm": 0.36164960265159607, "learning_rate": 3.527082593025027e-06, "loss": 0.0368, "step": 48675 }, { "epoch": 2.4711914310371084, "grad_norm": 0.4914800226688385, "learning_rate": 3.5253904597526108e-06, "loss": 0.0339, "step": 48680 }, { "epoch": 2.471445251027971, "grad_norm": 0.359645277261734, "learning_rate": 3.523698326480194e-06, "loss": 0.0308, "step": 48685 }, { "epoch": 2.4716990710188336, "grad_norm": 0.2739097476005554, "learning_rate": 3.5220061932077774e-06, "loss": 0.037, "step": 48690 }, { "epoch": 2.4719528910096957, "grad_norm": 0.27334341406822205, "learning_rate": 3.520314059935361e-06, "loss": 0.0206, "step": 48695 }, { "epoch": 2.4722067110005583, "grad_norm": 0.47454833984375, "learning_rate": 3.518621926662944e-06, "loss": 0.0348, "step": 48700 }, { "epoch": 2.472460530991421, "grad_norm": 0.3284585177898407, "learning_rate": 3.5169297933905276e-06, "loss": 0.032, "step": 48705 }, { "epoch": 2.4727143509822835, "grad_norm": 0.3352780044078827, "learning_rate": 3.5152376601181108e-06, "loss": 0.0308, "step": 48710 }, { "epoch": 2.4729681709731457, "grad_norm": 0.4959048628807068, "learning_rate": 3.5135455268456947e-06, "loss": 0.031, "step": 48715 }, { "epoch": 2.4732219909640083, "grad_norm": 0.3201127052307129, "learning_rate": 3.511853393573278e-06, "loss": 0.0273, "step": 48720 }, { "epoch": 2.473475810954871, "grad_norm": 0.2874223589897156, "learning_rate": 3.510161260300862e-06, "loss": 0.0346, "step": 48725 }, { "epoch": 2.4737296309457335, "grad_norm": 0.38007476925849915, "learning_rate": 3.508469127028445e-06, "loss": 0.0364, "step": 48730 }, { "epoch": 2.4739834509365957, "grad_norm": 0.7429245710372925, "learning_rate": 3.5067769937560285e-06, "loss": 0.031, "step": 48735 }, { "epoch": 2.4742372709274583, "grad_norm": 0.4520021080970764, "learning_rate": 3.505084860483612e-06, "loss": 0.0389, "step": 48740 }, { "epoch": 2.474491090918321, "grad_norm": 0.3023478388786316, "learning_rate": 3.5033927272111956e-06, "loss": 0.0335, "step": 48745 }, { "epoch": 2.474744910909183, "grad_norm": 0.28845712542533875, "learning_rate": 3.5017005939387787e-06, "loss": 0.0255, "step": 48750 }, { "epoch": 2.4749987309000456, "grad_norm": 0.22676537930965424, "learning_rate": 3.5000084606663627e-06, "loss": 0.0263, "step": 48755 }, { "epoch": 2.4752525508909082, "grad_norm": 0.2971080243587494, "learning_rate": 3.4983163273939458e-06, "loss": 0.0307, "step": 48760 }, { "epoch": 2.475506370881771, "grad_norm": 0.47433048486709595, "learning_rate": 3.4966241941215297e-06, "loss": 0.0292, "step": 48765 }, { "epoch": 2.475760190872633, "grad_norm": 0.32119104266166687, "learning_rate": 3.494932060849113e-06, "loss": 0.0315, "step": 48770 }, { "epoch": 2.4760140108634956, "grad_norm": 0.3103313446044922, "learning_rate": 3.4932399275766964e-06, "loss": 0.0354, "step": 48775 }, { "epoch": 2.476267830854358, "grad_norm": 0.2995244860649109, "learning_rate": 3.4915477943042795e-06, "loss": 0.0315, "step": 48780 }, { "epoch": 2.4765216508452204, "grad_norm": 0.36046555638313293, "learning_rate": 3.489855661031863e-06, "loss": 0.0316, "step": 48785 }, { "epoch": 2.476775470836083, "grad_norm": 0.33610787987709045, "learning_rate": 3.4881635277594466e-06, "loss": 0.032, "step": 48790 }, { "epoch": 2.4770292908269456, "grad_norm": 0.29550817608833313, "learning_rate": 3.4864713944870297e-06, "loss": 0.0359, "step": 48795 }, { "epoch": 2.477283110817808, "grad_norm": 0.26815691590309143, "learning_rate": 3.4847792612146137e-06, "loss": 0.0299, "step": 48800 }, { "epoch": 2.4775369308086703, "grad_norm": 0.2790869474411011, "learning_rate": 3.483087127942197e-06, "loss": 0.033, "step": 48805 }, { "epoch": 2.477790750799533, "grad_norm": 0.3197694718837738, "learning_rate": 3.481394994669781e-06, "loss": 0.0301, "step": 48810 }, { "epoch": 2.4780445707903955, "grad_norm": 0.521484911441803, "learning_rate": 3.479702861397364e-06, "loss": 0.0286, "step": 48815 }, { "epoch": 2.4782983907812577, "grad_norm": 0.26468929648399353, "learning_rate": 3.4780107281249475e-06, "loss": 0.0333, "step": 48820 }, { "epoch": 2.4785522107721203, "grad_norm": 0.3268936276435852, "learning_rate": 3.4763185948525306e-06, "loss": 0.026, "step": 48825 }, { "epoch": 2.478806030762983, "grad_norm": 0.7504985332489014, "learning_rate": 3.4746264615801146e-06, "loss": 0.0318, "step": 48830 }, { "epoch": 2.4790598507538455, "grad_norm": 0.25839510560035706, "learning_rate": 3.4729343283076977e-06, "loss": 0.0334, "step": 48835 }, { "epoch": 2.479313670744708, "grad_norm": 0.38872215151786804, "learning_rate": 3.4712421950352816e-06, "loss": 0.0286, "step": 48840 }, { "epoch": 2.4795674907355703, "grad_norm": 0.44475406408309937, "learning_rate": 3.4695500617628648e-06, "loss": 0.0341, "step": 48845 }, { "epoch": 2.479821310726433, "grad_norm": 0.2652853727340698, "learning_rate": 3.4678579284904483e-06, "loss": 0.0308, "step": 48850 }, { "epoch": 2.4800751307172955, "grad_norm": 0.2847198247909546, "learning_rate": 3.466165795218032e-06, "loss": 0.0342, "step": 48855 }, { "epoch": 2.4803289507081576, "grad_norm": 0.33611080050468445, "learning_rate": 3.464473661945615e-06, "loss": 0.0349, "step": 48860 }, { "epoch": 2.4805827706990202, "grad_norm": 0.3062199056148529, "learning_rate": 3.4627815286731985e-06, "loss": 0.031, "step": 48865 }, { "epoch": 2.480836590689883, "grad_norm": 0.272950142621994, "learning_rate": 3.4610893954007816e-06, "loss": 0.0328, "step": 48870 }, { "epoch": 2.4810904106807454, "grad_norm": 0.27738696336746216, "learning_rate": 3.4593972621283656e-06, "loss": 0.031, "step": 48875 }, { "epoch": 2.4813442306716076, "grad_norm": 0.3519563674926758, "learning_rate": 3.4577051288559487e-06, "loss": 0.0302, "step": 48880 }, { "epoch": 2.48159805066247, "grad_norm": 0.4059299826622009, "learning_rate": 3.4560129955835327e-06, "loss": 0.0306, "step": 48885 }, { "epoch": 2.481851870653333, "grad_norm": 0.6799148321151733, "learning_rate": 3.454320862311116e-06, "loss": 0.0355, "step": 48890 }, { "epoch": 2.482105690644195, "grad_norm": 0.6731832027435303, "learning_rate": 3.4526287290386994e-06, "loss": 0.0288, "step": 48895 }, { "epoch": 2.4823595106350576, "grad_norm": 0.42268016934394836, "learning_rate": 3.450936595766283e-06, "loss": 0.0309, "step": 48900 }, { "epoch": 2.48261333062592, "grad_norm": 0.6146883368492126, "learning_rate": 3.4492444624938665e-06, "loss": 0.0376, "step": 48905 }, { "epoch": 2.4828671506167828, "grad_norm": 0.22841694951057434, "learning_rate": 3.4475523292214496e-06, "loss": 0.0302, "step": 48910 }, { "epoch": 2.483120970607645, "grad_norm": 0.2890803813934326, "learning_rate": 3.4458601959490335e-06, "loss": 0.0251, "step": 48915 }, { "epoch": 2.4833747905985075, "grad_norm": 0.4178946912288666, "learning_rate": 3.4441680626766167e-06, "loss": 0.0378, "step": 48920 }, { "epoch": 2.48362861058937, "grad_norm": 0.43672826886177063, "learning_rate": 3.4424759294042e-06, "loss": 0.0344, "step": 48925 }, { "epoch": 2.4838824305802323, "grad_norm": 0.32364997267723083, "learning_rate": 3.4407837961317838e-06, "loss": 0.0303, "step": 48930 }, { "epoch": 2.484136250571095, "grad_norm": 0.37183573842048645, "learning_rate": 3.4390916628593673e-06, "loss": 0.0318, "step": 48935 }, { "epoch": 2.4843900705619575, "grad_norm": 0.2628357410430908, "learning_rate": 3.4373995295869504e-06, "loss": 0.0271, "step": 48940 }, { "epoch": 2.48464389055282, "grad_norm": 0.290749192237854, "learning_rate": 3.435707396314534e-06, "loss": 0.0273, "step": 48945 }, { "epoch": 2.4848977105436822, "grad_norm": 0.8906141519546509, "learning_rate": 3.4340152630421175e-06, "loss": 0.0314, "step": 48950 }, { "epoch": 2.485151530534545, "grad_norm": 0.2430388629436493, "learning_rate": 3.4323231297697006e-06, "loss": 0.0212, "step": 48955 }, { "epoch": 2.4854053505254075, "grad_norm": 0.28215649724006653, "learning_rate": 3.4306309964972846e-06, "loss": 0.0382, "step": 48960 }, { "epoch": 2.48565917051627, "grad_norm": 0.3023797869682312, "learning_rate": 3.4289388632248677e-06, "loss": 0.0352, "step": 48965 }, { "epoch": 2.485912990507132, "grad_norm": 0.34995901584625244, "learning_rate": 3.4272467299524513e-06, "loss": 0.032, "step": 48970 }, { "epoch": 2.486166810497995, "grad_norm": 0.3282700181007385, "learning_rate": 3.425554596680035e-06, "loss": 0.0299, "step": 48975 }, { "epoch": 2.4864206304888574, "grad_norm": 0.288655549287796, "learning_rate": 3.4238624634076183e-06, "loss": 0.0281, "step": 48980 }, { "epoch": 2.48667445047972, "grad_norm": 0.5877058506011963, "learning_rate": 3.4221703301352015e-06, "loss": 0.0331, "step": 48985 }, { "epoch": 2.486928270470582, "grad_norm": 0.27871277928352356, "learning_rate": 3.4204781968627854e-06, "loss": 0.0335, "step": 48990 }, { "epoch": 2.487182090461445, "grad_norm": 0.37147513031959534, "learning_rate": 3.4187860635903686e-06, "loss": 0.0345, "step": 48995 }, { "epoch": 2.4874359104523074, "grad_norm": 0.3929043710231781, "learning_rate": 3.4170939303179525e-06, "loss": 0.0441, "step": 49000 }, { "epoch": 2.4876897304431695, "grad_norm": 0.32876530289649963, "learning_rate": 3.4154017970455356e-06, "loss": 0.0284, "step": 49005 }, { "epoch": 2.487943550434032, "grad_norm": 0.34745267033576965, "learning_rate": 3.413709663773119e-06, "loss": 0.0268, "step": 49010 }, { "epoch": 2.4881973704248947, "grad_norm": 0.489314466714859, "learning_rate": 3.4120175305007023e-06, "loss": 0.0364, "step": 49015 }, { "epoch": 2.4884511904157574, "grad_norm": 0.8166823983192444, "learning_rate": 3.410325397228286e-06, "loss": 0.035, "step": 49020 }, { "epoch": 2.4887050104066195, "grad_norm": 0.2987882196903229, "learning_rate": 3.4086332639558694e-06, "loss": 0.0366, "step": 49025 }, { "epoch": 2.488958830397482, "grad_norm": 0.25694194436073303, "learning_rate": 3.4069411306834525e-06, "loss": 0.0292, "step": 49030 }, { "epoch": 2.4892126503883447, "grad_norm": 0.9844125509262085, "learning_rate": 3.4052489974110365e-06, "loss": 0.0358, "step": 49035 }, { "epoch": 2.489466470379207, "grad_norm": 0.287350594997406, "learning_rate": 3.4035568641386196e-06, "loss": 0.0298, "step": 49040 }, { "epoch": 2.4897202903700695, "grad_norm": 0.5002569556236267, "learning_rate": 3.4018647308662036e-06, "loss": 0.0301, "step": 49045 }, { "epoch": 2.489974110360932, "grad_norm": 0.36009514331817627, "learning_rate": 3.4001725975937867e-06, "loss": 0.0319, "step": 49050 }, { "epoch": 2.4902279303517947, "grad_norm": 0.27335289120674133, "learning_rate": 3.3984804643213702e-06, "loss": 0.0241, "step": 49055 }, { "epoch": 2.490481750342657, "grad_norm": 0.43379515409469604, "learning_rate": 3.3967883310489534e-06, "loss": 0.0336, "step": 49060 }, { "epoch": 2.4907355703335194, "grad_norm": 0.5943365693092346, "learning_rate": 3.3950961977765373e-06, "loss": 0.0305, "step": 49065 }, { "epoch": 2.490989390324382, "grad_norm": 0.3196374475955963, "learning_rate": 3.3934040645041205e-06, "loss": 0.0322, "step": 49070 }, { "epoch": 2.491243210315244, "grad_norm": 0.34475764632225037, "learning_rate": 3.3917119312317044e-06, "loss": 0.032, "step": 49075 }, { "epoch": 2.491497030306107, "grad_norm": 0.27804192900657654, "learning_rate": 3.3900197979592875e-06, "loss": 0.0314, "step": 49080 }, { "epoch": 2.4917508502969694, "grad_norm": 0.3123543858528137, "learning_rate": 3.388327664686871e-06, "loss": 0.0288, "step": 49085 }, { "epoch": 2.492004670287832, "grad_norm": 0.2633233368396759, "learning_rate": 3.3866355314144546e-06, "loss": 0.0332, "step": 49090 }, { "epoch": 2.492258490278694, "grad_norm": 0.3150838613510132, "learning_rate": 3.384943398142038e-06, "loss": 0.0298, "step": 49095 }, { "epoch": 2.4925123102695568, "grad_norm": 0.2555038332939148, "learning_rate": 3.3832512648696213e-06, "loss": 0.0304, "step": 49100 }, { "epoch": 2.4927661302604194, "grad_norm": 0.2612498700618744, "learning_rate": 3.3815591315972044e-06, "loss": 0.0283, "step": 49105 }, { "epoch": 2.493019950251282, "grad_norm": 0.26925814151763916, "learning_rate": 3.3798669983247884e-06, "loss": 0.0275, "step": 49110 }, { "epoch": 2.493273770242144, "grad_norm": 0.2616172134876251, "learning_rate": 3.3781748650523715e-06, "loss": 0.0345, "step": 49115 }, { "epoch": 2.4935275902330067, "grad_norm": 0.24253271520137787, "learning_rate": 3.3764827317799555e-06, "loss": 0.0232, "step": 49120 }, { "epoch": 2.4937814102238693, "grad_norm": 0.2660481929779053, "learning_rate": 3.3747905985075386e-06, "loss": 0.0302, "step": 49125 }, { "epoch": 2.494035230214732, "grad_norm": 0.39158353209495544, "learning_rate": 3.373098465235122e-06, "loss": 0.0331, "step": 49130 }, { "epoch": 2.494289050205594, "grad_norm": 0.2616908550262451, "learning_rate": 3.3714063319627057e-06, "loss": 0.0333, "step": 49135 }, { "epoch": 2.4945428701964567, "grad_norm": 0.34958600997924805, "learning_rate": 3.3697141986902892e-06, "loss": 0.0341, "step": 49140 }, { "epoch": 2.4947966901873193, "grad_norm": 0.23418071866035461, "learning_rate": 3.3680220654178724e-06, "loss": 0.0362, "step": 49145 }, { "epoch": 2.4950505101781815, "grad_norm": 0.34714365005493164, "learning_rate": 3.3663299321454563e-06, "loss": 0.0299, "step": 49150 }, { "epoch": 2.495304330169044, "grad_norm": 0.47345075011253357, "learning_rate": 3.3646377988730394e-06, "loss": 0.0311, "step": 49155 }, { "epoch": 2.4955581501599067, "grad_norm": 0.26879772543907166, "learning_rate": 3.362945665600623e-06, "loss": 0.0292, "step": 49160 }, { "epoch": 2.4958119701507693, "grad_norm": 0.4332658350467682, "learning_rate": 3.3612535323282065e-06, "loss": 0.0307, "step": 49165 }, { "epoch": 2.4960657901416314, "grad_norm": 0.23341147601604462, "learning_rate": 3.35956139905579e-06, "loss": 0.0325, "step": 49170 }, { "epoch": 2.496319610132494, "grad_norm": 0.30529898405075073, "learning_rate": 3.357869265783373e-06, "loss": 0.0297, "step": 49175 }, { "epoch": 2.4965734301233566, "grad_norm": 0.2946845293045044, "learning_rate": 3.356177132510957e-06, "loss": 0.0252, "step": 49180 }, { "epoch": 2.496827250114219, "grad_norm": 0.32320156693458557, "learning_rate": 3.3544849992385403e-06, "loss": 0.03, "step": 49185 }, { "epoch": 2.4970810701050814, "grad_norm": 0.6085475087165833, "learning_rate": 3.3527928659661234e-06, "loss": 0.025, "step": 49190 }, { "epoch": 2.497334890095944, "grad_norm": 1.414477825164795, "learning_rate": 3.3511007326937074e-06, "loss": 0.0313, "step": 49195 }, { "epoch": 2.4975887100868066, "grad_norm": 0.36679041385650635, "learning_rate": 3.3494085994212905e-06, "loss": 0.0366, "step": 49200 }, { "epoch": 2.4978425300776688, "grad_norm": 0.4237407147884369, "learning_rate": 3.347716466148874e-06, "loss": 0.0366, "step": 49205 }, { "epoch": 2.4980963500685314, "grad_norm": 0.2822991907596588, "learning_rate": 3.3460243328764576e-06, "loss": 0.0307, "step": 49210 }, { "epoch": 2.498350170059394, "grad_norm": 0.21562591195106506, "learning_rate": 3.344332199604041e-06, "loss": 0.0298, "step": 49215 }, { "epoch": 2.498603990050256, "grad_norm": 0.3568347096443176, "learning_rate": 3.3426400663316243e-06, "loss": 0.0308, "step": 49220 }, { "epoch": 2.4988578100411187, "grad_norm": 0.42248597741127014, "learning_rate": 3.3409479330592082e-06, "loss": 0.0291, "step": 49225 }, { "epoch": 2.4991116300319813, "grad_norm": 0.3704330623149872, "learning_rate": 3.3392557997867913e-06, "loss": 0.0299, "step": 49230 }, { "epoch": 2.499365450022844, "grad_norm": 0.2916848659515381, "learning_rate": 3.3375636665143753e-06, "loss": 0.0315, "step": 49235 }, { "epoch": 2.499619270013706, "grad_norm": 0.25191444158554077, "learning_rate": 3.3358715332419584e-06, "loss": 0.0293, "step": 49240 }, { "epoch": 2.4998730900045687, "grad_norm": 0.3909609019756317, "learning_rate": 3.334179399969542e-06, "loss": 0.0355, "step": 49245 }, { "epoch": 2.5001269099954313, "grad_norm": 0.3124801218509674, "learning_rate": 3.332487266697125e-06, "loss": 0.0252, "step": 49250 }, { "epoch": 2.5003807299862935, "grad_norm": 1.1593356132507324, "learning_rate": 3.330795133424709e-06, "loss": 0.0293, "step": 49255 }, { "epoch": 2.500634549977156, "grad_norm": 0.318891704082489, "learning_rate": 3.329103000152292e-06, "loss": 0.0277, "step": 49260 }, { "epoch": 2.5008883699680187, "grad_norm": 0.25046369433403015, "learning_rate": 3.3274108668798753e-06, "loss": 0.0236, "step": 49265 }, { "epoch": 2.5011421899588813, "grad_norm": 0.8154666423797607, "learning_rate": 3.3257187336074593e-06, "loss": 0.04, "step": 49270 }, { "epoch": 2.501396009949744, "grad_norm": 0.5566936731338501, "learning_rate": 3.3240266003350424e-06, "loss": 0.0282, "step": 49275 }, { "epoch": 2.501649829940606, "grad_norm": 0.3415115177631378, "learning_rate": 3.3223344670626264e-06, "loss": 0.0314, "step": 49280 }, { "epoch": 2.5019036499314686, "grad_norm": 0.2734259366989136, "learning_rate": 3.3206423337902095e-06, "loss": 0.031, "step": 49285 }, { "epoch": 2.5021574699223312, "grad_norm": 0.179351806640625, "learning_rate": 3.318950200517793e-06, "loss": 0.025, "step": 49290 }, { "epoch": 2.5024112899131934, "grad_norm": 0.3378109335899353, "learning_rate": 3.317258067245376e-06, "loss": 0.0262, "step": 49295 }, { "epoch": 2.502665109904056, "grad_norm": 0.38636019825935364, "learning_rate": 3.31556593397296e-06, "loss": 0.0248, "step": 49300 }, { "epoch": 2.5029189298949186, "grad_norm": 0.21963395178318024, "learning_rate": 3.3138738007005432e-06, "loss": 0.027, "step": 49305 }, { "epoch": 2.503172749885781, "grad_norm": 0.2768746614456177, "learning_rate": 3.312181667428127e-06, "loss": 0.0253, "step": 49310 }, { "epoch": 2.5034265698766434, "grad_norm": 0.41382497549057007, "learning_rate": 3.3104895341557103e-06, "loss": 0.0315, "step": 49315 }, { "epoch": 2.503680389867506, "grad_norm": 0.5263127088546753, "learning_rate": 3.308797400883294e-06, "loss": 0.0287, "step": 49320 }, { "epoch": 2.5039342098583686, "grad_norm": 0.33877232670783997, "learning_rate": 3.3071052676108774e-06, "loss": 0.0286, "step": 49325 }, { "epoch": 2.5041880298492307, "grad_norm": 0.27942949533462524, "learning_rate": 3.305413134338461e-06, "loss": 0.0354, "step": 49330 }, { "epoch": 2.5044418498400933, "grad_norm": 0.35031718015670776, "learning_rate": 3.303721001066044e-06, "loss": 0.0369, "step": 49335 }, { "epoch": 2.504695669830956, "grad_norm": 0.32853952050209045, "learning_rate": 3.302028867793628e-06, "loss": 0.0297, "step": 49340 }, { "epoch": 2.5049494898218185, "grad_norm": 0.2991621792316437, "learning_rate": 3.300336734521211e-06, "loss": 0.0328, "step": 49345 }, { "epoch": 2.5052033098126807, "grad_norm": 0.28316929936408997, "learning_rate": 3.2986446012487943e-06, "loss": 0.0245, "step": 49350 }, { "epoch": 2.5054571298035433, "grad_norm": 0.20764075219631195, "learning_rate": 3.2969524679763783e-06, "loss": 0.0282, "step": 49355 }, { "epoch": 2.505710949794406, "grad_norm": 0.19954226911067963, "learning_rate": 3.2952603347039614e-06, "loss": 0.0246, "step": 49360 }, { "epoch": 2.505964769785268, "grad_norm": 0.3868962526321411, "learning_rate": 3.293568201431545e-06, "loss": 0.0236, "step": 49365 }, { "epoch": 2.5062185897761307, "grad_norm": 0.23645120859146118, "learning_rate": 3.2918760681591285e-06, "loss": 0.0395, "step": 49370 }, { "epoch": 2.5064724097669933, "grad_norm": 0.504305362701416, "learning_rate": 3.290183934886712e-06, "loss": 0.031, "step": 49375 }, { "epoch": 2.506726229757856, "grad_norm": 0.3593463599681854, "learning_rate": 3.288491801614295e-06, "loss": 0.0329, "step": 49380 }, { "epoch": 2.5069800497487185, "grad_norm": 0.3594639301300049, "learning_rate": 3.286799668341879e-06, "loss": 0.0345, "step": 49385 }, { "epoch": 2.5072338697395806, "grad_norm": 0.40261438488960266, "learning_rate": 3.2851075350694622e-06, "loss": 0.0306, "step": 49390 }, { "epoch": 2.5074876897304432, "grad_norm": 0.1871395707130432, "learning_rate": 3.283415401797046e-06, "loss": 0.0269, "step": 49395 }, { "epoch": 2.5077415097213054, "grad_norm": 0.26229771971702576, "learning_rate": 3.2817232685246293e-06, "loss": 0.0298, "step": 49400 }, { "epoch": 2.507995329712168, "grad_norm": 0.32470229268074036, "learning_rate": 3.280031135252213e-06, "loss": 0.0231, "step": 49405 }, { "epoch": 2.5082491497030306, "grad_norm": 0.24094648659229279, "learning_rate": 3.278339001979796e-06, "loss": 0.0268, "step": 49410 }, { "epoch": 2.508502969693893, "grad_norm": 0.30596256256103516, "learning_rate": 3.27664686870738e-06, "loss": 0.0267, "step": 49415 }, { "epoch": 2.508756789684756, "grad_norm": 0.6077698469161987, "learning_rate": 3.274954735434963e-06, "loss": 0.0372, "step": 49420 }, { "epoch": 2.509010609675618, "grad_norm": 0.3845747411251068, "learning_rate": 3.273262602162546e-06, "loss": 0.04, "step": 49425 }, { "epoch": 2.5092644296664806, "grad_norm": 0.3875996172428131, "learning_rate": 3.27157046889013e-06, "loss": 0.0307, "step": 49430 }, { "epoch": 2.509518249657343, "grad_norm": 0.33381351828575134, "learning_rate": 3.2698783356177133e-06, "loss": 0.034, "step": 49435 }, { "epoch": 2.5097720696482053, "grad_norm": 0.6025996208190918, "learning_rate": 3.2681862023452972e-06, "loss": 0.0387, "step": 49440 }, { "epoch": 2.510025889639068, "grad_norm": 0.2946641147136688, "learning_rate": 3.2664940690728804e-06, "loss": 0.0315, "step": 49445 }, { "epoch": 2.5102797096299305, "grad_norm": 0.6482958793640137, "learning_rate": 3.264801935800464e-06, "loss": 0.0308, "step": 49450 }, { "epoch": 2.510533529620793, "grad_norm": 0.26497596502304077, "learning_rate": 3.263109802528047e-06, "loss": 0.0322, "step": 49455 }, { "epoch": 2.5107873496116553, "grad_norm": 0.5210638046264648, "learning_rate": 3.261417669255631e-06, "loss": 0.0343, "step": 49460 }, { "epoch": 2.511041169602518, "grad_norm": 0.23202571272850037, "learning_rate": 3.259725535983214e-06, "loss": 0.0296, "step": 49465 }, { "epoch": 2.5112949895933805, "grad_norm": 0.3447119891643524, "learning_rate": 3.258033402710798e-06, "loss": 0.0361, "step": 49470 }, { "epoch": 2.5115488095842426, "grad_norm": 0.22966940701007843, "learning_rate": 3.2563412694383812e-06, "loss": 0.0253, "step": 49475 }, { "epoch": 2.5118026295751053, "grad_norm": 0.43663614988327026, "learning_rate": 3.2546491361659648e-06, "loss": 0.0384, "step": 49480 }, { "epoch": 2.512056449565968, "grad_norm": 0.3742697238922119, "learning_rate": 3.2529570028935483e-06, "loss": 0.0419, "step": 49485 }, { "epoch": 2.5123102695568305, "grad_norm": 0.3168654441833496, "learning_rate": 3.251264869621132e-06, "loss": 0.0234, "step": 49490 }, { "epoch": 2.5125640895476926, "grad_norm": 0.35261592268943787, "learning_rate": 3.249572736348715e-06, "loss": 0.0348, "step": 49495 }, { "epoch": 2.512817909538555, "grad_norm": 0.3630485534667969, "learning_rate": 3.247880603076299e-06, "loss": 0.0372, "step": 49500 }, { "epoch": 2.513071729529418, "grad_norm": 0.32719823718070984, "learning_rate": 3.246188469803882e-06, "loss": 0.0292, "step": 49505 }, { "epoch": 2.51332554952028, "grad_norm": 0.3303397297859192, "learning_rate": 3.244496336531465e-06, "loss": 0.0361, "step": 49510 }, { "epoch": 2.5135793695111426, "grad_norm": 0.3644379675388336, "learning_rate": 3.242804203259049e-06, "loss": 0.0334, "step": 49515 }, { "epoch": 2.513833189502005, "grad_norm": 0.2639160752296448, "learning_rate": 3.2411120699866323e-06, "loss": 0.0218, "step": 49520 }, { "epoch": 2.514087009492868, "grad_norm": 0.408854216337204, "learning_rate": 3.239419936714216e-06, "loss": 0.03, "step": 49525 }, { "epoch": 2.5143408294837304, "grad_norm": 0.29264143109321594, "learning_rate": 3.2377278034417994e-06, "loss": 0.0264, "step": 49530 }, { "epoch": 2.5145946494745925, "grad_norm": 0.25693920254707336, "learning_rate": 3.236035670169383e-06, "loss": 0.0387, "step": 49535 }, { "epoch": 2.514848469465455, "grad_norm": 0.36717018485069275, "learning_rate": 3.234343536896966e-06, "loss": 0.0352, "step": 49540 }, { "epoch": 2.5151022894563173, "grad_norm": 0.2756657004356384, "learning_rate": 3.23265140362455e-06, "loss": 0.0352, "step": 49545 }, { "epoch": 2.51535610944718, "grad_norm": 0.2907589077949524, "learning_rate": 3.230959270352133e-06, "loss": 0.0312, "step": 49550 }, { "epoch": 2.5156099294380425, "grad_norm": 0.3871802091598511, "learning_rate": 3.2292671370797167e-06, "loss": 0.041, "step": 49555 }, { "epoch": 2.515863749428905, "grad_norm": 0.3290282189846039, "learning_rate": 3.2275750038073e-06, "loss": 0.0229, "step": 49560 }, { "epoch": 2.5161175694197677, "grad_norm": 0.8194361925125122, "learning_rate": 3.2258828705348837e-06, "loss": 0.0366, "step": 49565 }, { "epoch": 2.51637138941063, "grad_norm": 0.3643014430999756, "learning_rate": 3.224190737262467e-06, "loss": 0.0345, "step": 49570 }, { "epoch": 2.5166252094014925, "grad_norm": 0.5090135931968689, "learning_rate": 3.222498603990051e-06, "loss": 0.0307, "step": 49575 }, { "epoch": 2.516879029392355, "grad_norm": 0.31708189845085144, "learning_rate": 3.220806470717634e-06, "loss": 0.0289, "step": 49580 }, { "epoch": 2.5171328493832172, "grad_norm": 0.4696919023990631, "learning_rate": 3.219114337445217e-06, "loss": 0.0393, "step": 49585 }, { "epoch": 2.51738666937408, "grad_norm": 0.4421710669994354, "learning_rate": 3.217422204172801e-06, "loss": 0.0371, "step": 49590 }, { "epoch": 2.5176404893649424, "grad_norm": 0.38796231150627136, "learning_rate": 3.215730070900384e-06, "loss": 0.0321, "step": 49595 }, { "epoch": 2.517894309355805, "grad_norm": 0.27723702788352966, "learning_rate": 3.2140379376279677e-06, "loss": 0.0235, "step": 49600 }, { "epoch": 2.518148129346667, "grad_norm": 0.27128395438194275, "learning_rate": 3.2123458043555513e-06, "loss": 0.0289, "step": 49605 }, { "epoch": 2.51840194933753, "grad_norm": 0.31028029322624207, "learning_rate": 3.210653671083135e-06, "loss": 0.0296, "step": 49610 }, { "epoch": 2.5186557693283924, "grad_norm": 0.2169944792985916, "learning_rate": 3.208961537810718e-06, "loss": 0.0299, "step": 49615 }, { "epoch": 2.5189095893192546, "grad_norm": 0.4345290958881378, "learning_rate": 3.207269404538302e-06, "loss": 0.0319, "step": 49620 }, { "epoch": 2.519163409310117, "grad_norm": 0.32916420698165894, "learning_rate": 3.205577271265885e-06, "loss": 0.0314, "step": 49625 }, { "epoch": 2.5194172293009798, "grad_norm": 0.2635617256164551, "learning_rate": 3.203885137993469e-06, "loss": 0.026, "step": 49630 }, { "epoch": 2.5196710492918424, "grad_norm": 0.5266566276550293, "learning_rate": 3.202193004721052e-06, "loss": 0.0374, "step": 49635 }, { "epoch": 2.519924869282705, "grad_norm": 0.35786283016204834, "learning_rate": 3.2005008714486356e-06, "loss": 0.0336, "step": 49640 }, { "epoch": 2.520178689273567, "grad_norm": 0.34204965829849243, "learning_rate": 3.1988087381762188e-06, "loss": 0.0341, "step": 49645 }, { "epoch": 2.5204325092644297, "grad_norm": 2.4020159244537354, "learning_rate": 3.1971166049038027e-06, "loss": 0.0332, "step": 49650 }, { "epoch": 2.520686329255292, "grad_norm": 0.30302706360816956, "learning_rate": 3.195424471631386e-06, "loss": 0.0282, "step": 49655 }, { "epoch": 2.5209401492461545, "grad_norm": 0.3472190201282501, "learning_rate": 3.19373233835897e-06, "loss": 0.0292, "step": 49660 }, { "epoch": 2.521193969237017, "grad_norm": 0.31304502487182617, "learning_rate": 3.192040205086553e-06, "loss": 0.0257, "step": 49665 }, { "epoch": 2.5214477892278797, "grad_norm": 0.3431473970413208, "learning_rate": 3.190348071814136e-06, "loss": 0.0325, "step": 49670 }, { "epoch": 2.5217016092187423, "grad_norm": 0.22988146543502808, "learning_rate": 3.18865593854172e-06, "loss": 0.0266, "step": 49675 }, { "epoch": 2.5219554292096045, "grad_norm": 0.32324403524398804, "learning_rate": 3.186963805269303e-06, "loss": 0.0264, "step": 49680 }, { "epoch": 2.522209249200467, "grad_norm": 0.374230295419693, "learning_rate": 3.1852716719968867e-06, "loss": 0.0336, "step": 49685 }, { "epoch": 2.5224630691913292, "grad_norm": 0.37457334995269775, "learning_rate": 3.18357953872447e-06, "loss": 0.0384, "step": 49690 }, { "epoch": 2.522716889182192, "grad_norm": 0.35331031680107117, "learning_rate": 3.1818874054520538e-06, "loss": 0.0324, "step": 49695 }, { "epoch": 2.5229707091730544, "grad_norm": 0.24105656147003174, "learning_rate": 3.180195272179637e-06, "loss": 0.0325, "step": 49700 }, { "epoch": 2.523224529163917, "grad_norm": 0.6230748891830444, "learning_rate": 3.178503138907221e-06, "loss": 0.0341, "step": 49705 }, { "epoch": 2.5234783491547796, "grad_norm": 0.27622297406196594, "learning_rate": 3.176811005634804e-06, "loss": 0.0314, "step": 49710 }, { "epoch": 2.523732169145642, "grad_norm": 0.4867340922355652, "learning_rate": 3.1751188723623875e-06, "loss": 0.0327, "step": 49715 }, { "epoch": 2.5239859891365044, "grad_norm": 0.19795769453048706, "learning_rate": 3.173426739089971e-06, "loss": 0.0297, "step": 49720 }, { "epoch": 2.524239809127367, "grad_norm": 0.32494819164276123, "learning_rate": 3.1717346058175546e-06, "loss": 0.0239, "step": 49725 }, { "epoch": 2.524493629118229, "grad_norm": 0.3516275882720947, "learning_rate": 3.1700424725451377e-06, "loss": 0.034, "step": 49730 }, { "epoch": 2.5247474491090918, "grad_norm": 0.3350643217563629, "learning_rate": 3.1683503392727217e-06, "loss": 0.0334, "step": 49735 }, { "epoch": 2.5250012690999544, "grad_norm": 0.24879878759384155, "learning_rate": 3.166658206000305e-06, "loss": 0.0302, "step": 49740 }, { "epoch": 2.525255089090817, "grad_norm": 0.28340357542037964, "learning_rate": 3.1649660727278884e-06, "loss": 0.0348, "step": 49745 }, { "epoch": 2.525508909081679, "grad_norm": 0.2749658524990082, "learning_rate": 3.163273939455472e-06, "loss": 0.0321, "step": 49750 }, { "epoch": 2.5257627290725417, "grad_norm": 0.45516085624694824, "learning_rate": 3.161581806183055e-06, "loss": 0.0397, "step": 49755 }, { "epoch": 2.5260165490634043, "grad_norm": 0.514147937297821, "learning_rate": 3.1598896729106386e-06, "loss": 0.0381, "step": 49760 }, { "epoch": 2.5262703690542665, "grad_norm": 0.2310241311788559, "learning_rate": 3.158197539638222e-06, "loss": 0.0309, "step": 49765 }, { "epoch": 2.526524189045129, "grad_norm": 0.33417701721191406, "learning_rate": 3.1565054063658057e-06, "loss": 0.0352, "step": 49770 }, { "epoch": 2.5267780090359917, "grad_norm": 0.19402511417865753, "learning_rate": 3.154813273093389e-06, "loss": 0.0324, "step": 49775 }, { "epoch": 2.5270318290268543, "grad_norm": 0.4040732681751251, "learning_rate": 3.1531211398209728e-06, "loss": 0.0405, "step": 49780 }, { "epoch": 2.527285649017717, "grad_norm": 0.45084550976753235, "learning_rate": 3.151429006548556e-06, "loss": 0.0321, "step": 49785 }, { "epoch": 2.527539469008579, "grad_norm": 0.3171873390674591, "learning_rate": 3.1497368732761394e-06, "loss": 0.0253, "step": 49790 }, { "epoch": 2.5277932889994417, "grad_norm": 0.33110058307647705, "learning_rate": 3.148044740003723e-06, "loss": 0.0292, "step": 49795 }, { "epoch": 2.528047108990304, "grad_norm": 0.3826856315135956, "learning_rate": 3.1463526067313065e-06, "loss": 0.031, "step": 49800 }, { "epoch": 2.5283009289811664, "grad_norm": 0.2924165725708008, "learning_rate": 3.1446604734588896e-06, "loss": 0.0296, "step": 49805 }, { "epoch": 2.528554748972029, "grad_norm": 0.22940395772457123, "learning_rate": 3.1429683401864736e-06, "loss": 0.0255, "step": 49810 }, { "epoch": 2.5288085689628916, "grad_norm": 0.20305858552455902, "learning_rate": 3.1412762069140567e-06, "loss": 0.0251, "step": 49815 }, { "epoch": 2.5290623889537542, "grad_norm": 0.36855000257492065, "learning_rate": 3.1395840736416407e-06, "loss": 0.03, "step": 49820 }, { "epoch": 2.5293162089446164, "grad_norm": 0.3701513409614563, "learning_rate": 3.137891940369224e-06, "loss": 0.0385, "step": 49825 }, { "epoch": 2.529570028935479, "grad_norm": 0.368150919675827, "learning_rate": 3.136199807096807e-06, "loss": 0.0305, "step": 49830 }, { "epoch": 2.5298238489263416, "grad_norm": 0.407752126455307, "learning_rate": 3.1345076738243905e-06, "loss": 0.038, "step": 49835 }, { "epoch": 2.5300776689172038, "grad_norm": 0.28789639472961426, "learning_rate": 3.132815540551974e-06, "loss": 0.0259, "step": 49840 }, { "epoch": 2.5303314889080664, "grad_norm": 0.30561700463294983, "learning_rate": 3.1311234072795576e-06, "loss": 0.0359, "step": 49845 }, { "epoch": 2.530585308898929, "grad_norm": 0.4357641339302063, "learning_rate": 3.1294312740071407e-06, "loss": 0.0244, "step": 49850 }, { "epoch": 2.5308391288897916, "grad_norm": 0.3821885287761688, "learning_rate": 3.1277391407347247e-06, "loss": 0.033, "step": 49855 }, { "epoch": 2.5310929488806537, "grad_norm": 0.28230446577072144, "learning_rate": 3.126047007462308e-06, "loss": 0.0385, "step": 49860 }, { "epoch": 2.5313467688715163, "grad_norm": 0.39753055572509766, "learning_rate": 3.1243548741898918e-06, "loss": 0.0361, "step": 49865 }, { "epoch": 2.531600588862379, "grad_norm": 0.2797062397003174, "learning_rate": 3.122662740917475e-06, "loss": 0.0287, "step": 49870 }, { "epoch": 2.531854408853241, "grad_norm": 0.27603286504745483, "learning_rate": 3.1209706076450584e-06, "loss": 0.0242, "step": 49875 }, { "epoch": 2.5321082288441037, "grad_norm": 0.3599488139152527, "learning_rate": 3.1192784743726415e-06, "loss": 0.0369, "step": 49880 }, { "epoch": 2.5323620488349663, "grad_norm": 0.2599182426929474, "learning_rate": 3.1175863411002255e-06, "loss": 0.0299, "step": 49885 }, { "epoch": 2.532615868825829, "grad_norm": 0.3144007921218872, "learning_rate": 3.1158942078278086e-06, "loss": 0.0284, "step": 49890 }, { "epoch": 2.532869688816691, "grad_norm": 0.24892964959144592, "learning_rate": 3.1142020745553926e-06, "loss": 0.0319, "step": 49895 }, { "epoch": 2.5331235088075537, "grad_norm": 0.3454008102416992, "learning_rate": 3.1125099412829757e-06, "loss": 0.0287, "step": 49900 }, { "epoch": 2.5333773287984163, "grad_norm": 0.6400479674339294, "learning_rate": 3.1108178080105593e-06, "loss": 0.0318, "step": 49905 }, { "epoch": 2.5336311487892784, "grad_norm": 0.2760767936706543, "learning_rate": 3.109125674738143e-06, "loss": 0.0311, "step": 49910 }, { "epoch": 2.533884968780141, "grad_norm": 0.40467485785484314, "learning_rate": 3.107433541465726e-06, "loss": 0.0323, "step": 49915 }, { "epoch": 2.5341387887710036, "grad_norm": 0.46811002492904663, "learning_rate": 3.1057414081933095e-06, "loss": 0.0348, "step": 49920 }, { "epoch": 2.5343926087618662, "grad_norm": 0.43257901072502136, "learning_rate": 3.1040492749208926e-06, "loss": 0.0345, "step": 49925 }, { "epoch": 2.534646428752729, "grad_norm": 0.2802751660346985, "learning_rate": 3.1023571416484766e-06, "loss": 0.0319, "step": 49930 }, { "epoch": 2.534900248743591, "grad_norm": 0.2783406376838684, "learning_rate": 3.1006650083760597e-06, "loss": 0.0304, "step": 49935 }, { "epoch": 2.5351540687344536, "grad_norm": 0.3503018021583557, "learning_rate": 3.0989728751036437e-06, "loss": 0.029, "step": 49940 }, { "epoch": 2.5354078887253158, "grad_norm": 0.42926421761512756, "learning_rate": 3.0972807418312268e-06, "loss": 0.0306, "step": 49945 }, { "epoch": 2.5356617087161784, "grad_norm": 0.2726653516292572, "learning_rate": 3.0955886085588103e-06, "loss": 0.0237, "step": 49950 }, { "epoch": 2.535915528707041, "grad_norm": 0.3227391242980957, "learning_rate": 3.093896475286394e-06, "loss": 0.0301, "step": 49955 }, { "epoch": 2.5361693486979036, "grad_norm": 0.28392598032951355, "learning_rate": 3.0922043420139774e-06, "loss": 0.0299, "step": 49960 }, { "epoch": 2.536423168688766, "grad_norm": 0.3558497726917267, "learning_rate": 3.0905122087415605e-06, "loss": 0.0283, "step": 49965 }, { "epoch": 2.5366769886796283, "grad_norm": 0.16093190014362335, "learning_rate": 3.0888200754691445e-06, "loss": 0.0243, "step": 49970 }, { "epoch": 2.536930808670491, "grad_norm": 0.3414202034473419, "learning_rate": 3.0871279421967276e-06, "loss": 0.035, "step": 49975 }, { "epoch": 2.5371846286613535, "grad_norm": 0.2506318688392639, "learning_rate": 3.0854358089243116e-06, "loss": 0.0319, "step": 49980 }, { "epoch": 2.5374384486522157, "grad_norm": 0.350211501121521, "learning_rate": 3.0837436756518947e-06, "loss": 0.0339, "step": 49985 }, { "epoch": 2.5376922686430783, "grad_norm": 0.4482111632823944, "learning_rate": 3.082051542379478e-06, "loss": 0.0364, "step": 49990 }, { "epoch": 2.537946088633941, "grad_norm": 0.25266894698143005, "learning_rate": 3.0803594091070614e-06, "loss": 0.0314, "step": 49995 }, { "epoch": 2.5381999086248035, "grad_norm": 0.2782626748085022, "learning_rate": 3.078667275834645e-06, "loss": 0.0269, "step": 50000 }, { "epoch": 2.5384537286156656, "grad_norm": 0.3150825798511505, "learning_rate": 3.0769751425622285e-06, "loss": 0.0337, "step": 50005 }, { "epoch": 2.5387075486065283, "grad_norm": 0.33296340703964233, "learning_rate": 3.0752830092898116e-06, "loss": 0.0312, "step": 50010 }, { "epoch": 2.538961368597391, "grad_norm": 0.4527866840362549, "learning_rate": 3.0735908760173956e-06, "loss": 0.0298, "step": 50015 }, { "epoch": 2.539215188588253, "grad_norm": 0.33419156074523926, "learning_rate": 3.0718987427449787e-06, "loss": 0.0305, "step": 50020 }, { "epoch": 2.5394690085791156, "grad_norm": 0.29681098461151123, "learning_rate": 3.0702066094725626e-06, "loss": 0.0314, "step": 50025 }, { "epoch": 2.539722828569978, "grad_norm": 0.3465264141559601, "learning_rate": 3.0685144762001458e-06, "loss": 0.027, "step": 50030 }, { "epoch": 2.539976648560841, "grad_norm": 0.3378564417362213, "learning_rate": 3.0668223429277293e-06, "loss": 0.0308, "step": 50035 }, { "epoch": 2.540230468551703, "grad_norm": 0.2836815118789673, "learning_rate": 3.0651302096553124e-06, "loss": 0.0269, "step": 50040 }, { "epoch": 2.5404842885425656, "grad_norm": 0.32169854640960693, "learning_rate": 3.0634380763828964e-06, "loss": 0.0253, "step": 50045 }, { "epoch": 2.540738108533428, "grad_norm": 0.29076623916625977, "learning_rate": 3.0617459431104795e-06, "loss": 0.0341, "step": 50050 }, { "epoch": 2.5409919285242903, "grad_norm": 0.36522313952445984, "learning_rate": 3.0600538098380635e-06, "loss": 0.0313, "step": 50055 }, { "epoch": 2.541245748515153, "grad_norm": 0.4595192074775696, "learning_rate": 3.0583616765656466e-06, "loss": 0.0383, "step": 50060 }, { "epoch": 2.5414995685060155, "grad_norm": 0.18228591978549957, "learning_rate": 3.05666954329323e-06, "loss": 0.0287, "step": 50065 }, { "epoch": 2.541753388496878, "grad_norm": 0.3524336814880371, "learning_rate": 3.0549774100208137e-06, "loss": 0.0272, "step": 50070 }, { "epoch": 2.5420072084877408, "grad_norm": 0.2865443825721741, "learning_rate": 3.053285276748397e-06, "loss": 0.031, "step": 50075 }, { "epoch": 2.542261028478603, "grad_norm": 0.3282105028629303, "learning_rate": 3.0515931434759804e-06, "loss": 0.0278, "step": 50080 }, { "epoch": 2.5425148484694655, "grad_norm": 0.27646273374557495, "learning_rate": 3.0499010102035635e-06, "loss": 0.0293, "step": 50085 }, { "epoch": 2.5427686684603277, "grad_norm": 0.3110489547252655, "learning_rate": 3.0482088769311475e-06, "loss": 0.0237, "step": 50090 }, { "epoch": 2.5430224884511903, "grad_norm": 0.4063291549682617, "learning_rate": 3.0465167436587306e-06, "loss": 0.0306, "step": 50095 }, { "epoch": 2.543276308442053, "grad_norm": 0.3129997253417969, "learning_rate": 3.0448246103863145e-06, "loss": 0.0305, "step": 50100 }, { "epoch": 2.5435301284329155, "grad_norm": 0.39521104097366333, "learning_rate": 3.0431324771138977e-06, "loss": 0.0406, "step": 50105 }, { "epoch": 2.543783948423778, "grad_norm": 0.29474636912345886, "learning_rate": 3.041440343841481e-06, "loss": 0.0318, "step": 50110 }, { "epoch": 2.5440377684146402, "grad_norm": 0.2978830933570862, "learning_rate": 3.0397482105690647e-06, "loss": 0.0348, "step": 50115 }, { "epoch": 2.544291588405503, "grad_norm": 0.21669217944145203, "learning_rate": 3.0380560772966483e-06, "loss": 0.0252, "step": 50120 }, { "epoch": 2.5445454083963654, "grad_norm": 0.429474413394928, "learning_rate": 3.0363639440242314e-06, "loss": 0.0327, "step": 50125 }, { "epoch": 2.5447992283872276, "grad_norm": 0.27436283230781555, "learning_rate": 3.0346718107518154e-06, "loss": 0.0335, "step": 50130 }, { "epoch": 2.54505304837809, "grad_norm": 0.22250846028327942, "learning_rate": 3.0329796774793985e-06, "loss": 0.0285, "step": 50135 }, { "epoch": 2.545306868368953, "grad_norm": 0.33518174290657043, "learning_rate": 3.031287544206982e-06, "loss": 0.0315, "step": 50140 }, { "epoch": 2.5455606883598154, "grad_norm": 0.24940243363380432, "learning_rate": 3.0295954109345656e-06, "loss": 0.0306, "step": 50145 }, { "epoch": 2.5458145083506776, "grad_norm": 0.44675156474113464, "learning_rate": 3.0279032776621487e-06, "loss": 0.0356, "step": 50150 }, { "epoch": 2.54606832834154, "grad_norm": 0.28247472643852234, "learning_rate": 3.0262111443897323e-06, "loss": 0.0397, "step": 50155 }, { "epoch": 2.546322148332403, "grad_norm": 0.3223744034767151, "learning_rate": 3.024519011117316e-06, "loss": 0.0289, "step": 50160 }, { "epoch": 2.546575968323265, "grad_norm": 0.3707750141620636, "learning_rate": 3.0228268778448993e-06, "loss": 0.0246, "step": 50165 }, { "epoch": 2.5468297883141275, "grad_norm": 0.27658745646476746, "learning_rate": 3.0211347445724825e-06, "loss": 0.033, "step": 50170 }, { "epoch": 2.54708360830499, "grad_norm": 0.42075684666633606, "learning_rate": 3.0194426113000664e-06, "loss": 0.0345, "step": 50175 }, { "epoch": 2.5473374282958527, "grad_norm": 0.2999601364135742, "learning_rate": 3.0177504780276496e-06, "loss": 0.0333, "step": 50180 }, { "epoch": 2.547591248286715, "grad_norm": 0.3456297218799591, "learning_rate": 3.016058344755233e-06, "loss": 0.0355, "step": 50185 }, { "epoch": 2.5478450682775775, "grad_norm": 0.24620822072029114, "learning_rate": 3.0143662114828166e-06, "loss": 0.027, "step": 50190 }, { "epoch": 2.54809888826844, "grad_norm": 0.4098372757434845, "learning_rate": 3.0126740782104e-06, "loss": 0.029, "step": 50195 }, { "epoch": 2.5483527082593023, "grad_norm": 0.2857387065887451, "learning_rate": 3.0109819449379833e-06, "loss": 0.0321, "step": 50200 }, { "epoch": 2.548606528250165, "grad_norm": 0.345076322555542, "learning_rate": 3.0092898116655673e-06, "loss": 0.0363, "step": 50205 }, { "epoch": 2.5488603482410275, "grad_norm": 0.3622076213359833, "learning_rate": 3.0075976783931504e-06, "loss": 0.0325, "step": 50210 }, { "epoch": 2.54911416823189, "grad_norm": 0.24855151772499084, "learning_rate": 3.0059055451207344e-06, "loss": 0.0286, "step": 50215 }, { "epoch": 2.5493679882227527, "grad_norm": 0.4033049941062927, "learning_rate": 3.0042134118483175e-06, "loss": 0.0244, "step": 50220 }, { "epoch": 2.549621808213615, "grad_norm": 0.16056959331035614, "learning_rate": 3.002521278575901e-06, "loss": 0.0292, "step": 50225 }, { "epoch": 2.5498756282044774, "grad_norm": 0.3058248460292816, "learning_rate": 3.000829145303484e-06, "loss": 0.0342, "step": 50230 }, { "epoch": 2.5501294481953396, "grad_norm": 0.5386940836906433, "learning_rate": 2.9991370120310677e-06, "loss": 0.0327, "step": 50235 }, { "epoch": 2.550383268186202, "grad_norm": 0.2744958698749542, "learning_rate": 2.9974448787586512e-06, "loss": 0.0252, "step": 50240 }, { "epoch": 2.550637088177065, "grad_norm": 0.325539767742157, "learning_rate": 2.9957527454862344e-06, "loss": 0.0305, "step": 50245 }, { "epoch": 2.5508909081679274, "grad_norm": 0.3471980392932892, "learning_rate": 2.9940606122138183e-06, "loss": 0.0335, "step": 50250 }, { "epoch": 2.55114472815879, "grad_norm": 0.5069236755371094, "learning_rate": 2.9923684789414015e-06, "loss": 0.0347, "step": 50255 }, { "epoch": 2.551398548149652, "grad_norm": 0.34677132964134216, "learning_rate": 2.9906763456689854e-06, "loss": 0.0312, "step": 50260 }, { "epoch": 2.5516523681405148, "grad_norm": 0.6088144183158875, "learning_rate": 2.9889842123965685e-06, "loss": 0.0342, "step": 50265 }, { "epoch": 2.5519061881313774, "grad_norm": 0.27897128462791443, "learning_rate": 2.987292079124152e-06, "loss": 0.0325, "step": 50270 }, { "epoch": 2.5521600081222395, "grad_norm": 0.2830289304256439, "learning_rate": 2.985599945851735e-06, "loss": 0.0338, "step": 50275 }, { "epoch": 2.552413828113102, "grad_norm": 0.3052821457386017, "learning_rate": 2.983907812579319e-06, "loss": 0.0302, "step": 50280 }, { "epoch": 2.5526676481039647, "grad_norm": 0.3779296576976776, "learning_rate": 2.9822156793069023e-06, "loss": 0.0309, "step": 50285 }, { "epoch": 2.5529214680948273, "grad_norm": 0.343472957611084, "learning_rate": 2.9805235460344863e-06, "loss": 0.0321, "step": 50290 }, { "epoch": 2.5531752880856895, "grad_norm": 0.299538254737854, "learning_rate": 2.9788314127620694e-06, "loss": 0.0349, "step": 50295 }, { "epoch": 2.553429108076552, "grad_norm": 0.24920311570167542, "learning_rate": 2.977139279489653e-06, "loss": 0.0271, "step": 50300 }, { "epoch": 2.5536829280674147, "grad_norm": 0.4240674376487732, "learning_rate": 2.9754471462172365e-06, "loss": 0.0382, "step": 50305 }, { "epoch": 2.553936748058277, "grad_norm": 0.17833265662193298, "learning_rate": 2.97375501294482e-06, "loss": 0.0297, "step": 50310 }, { "epoch": 2.5541905680491395, "grad_norm": 0.3323668837547302, "learning_rate": 2.972062879672403e-06, "loss": 0.029, "step": 50315 }, { "epoch": 2.554444388040002, "grad_norm": 0.23749475181102753, "learning_rate": 2.9703707463999863e-06, "loss": 0.0328, "step": 50320 }, { "epoch": 2.5546982080308647, "grad_norm": 0.25050121545791626, "learning_rate": 2.9686786131275702e-06, "loss": 0.0266, "step": 50325 }, { "epoch": 2.554952028021727, "grad_norm": 0.2653757333755493, "learning_rate": 2.9669864798551534e-06, "loss": 0.0302, "step": 50330 }, { "epoch": 2.5552058480125894, "grad_norm": 0.4240018427371979, "learning_rate": 2.9652943465827373e-06, "loss": 0.0373, "step": 50335 }, { "epoch": 2.555459668003452, "grad_norm": 0.2510708272457123, "learning_rate": 2.9636022133103204e-06, "loss": 0.028, "step": 50340 }, { "epoch": 2.555713487994314, "grad_norm": 0.4198581874370575, "learning_rate": 2.961910080037904e-06, "loss": 0.0411, "step": 50345 }, { "epoch": 2.555967307985177, "grad_norm": 0.40445321798324585, "learning_rate": 2.9602179467654875e-06, "loss": 0.0378, "step": 50350 }, { "epoch": 2.5562211279760394, "grad_norm": 0.3391188681125641, "learning_rate": 2.958525813493071e-06, "loss": 0.041, "step": 50355 }, { "epoch": 2.556474947966902, "grad_norm": 0.4442570209503174, "learning_rate": 2.956833680220654e-06, "loss": 0.0288, "step": 50360 }, { "epoch": 2.5567287679577646, "grad_norm": 0.3415834605693817, "learning_rate": 2.955141546948238e-06, "loss": 0.0284, "step": 50365 }, { "epoch": 2.5569825879486268, "grad_norm": 0.35875076055526733, "learning_rate": 2.9534494136758213e-06, "loss": 0.0305, "step": 50370 }, { "epoch": 2.5572364079394894, "grad_norm": 0.26490122079849243, "learning_rate": 2.951757280403405e-06, "loss": 0.0333, "step": 50375 }, { "epoch": 2.5574902279303515, "grad_norm": 0.31595557928085327, "learning_rate": 2.9500651471309884e-06, "loss": 0.0283, "step": 50380 }, { "epoch": 2.557744047921214, "grad_norm": 0.36508527398109436, "learning_rate": 2.948373013858572e-06, "loss": 0.0388, "step": 50385 }, { "epoch": 2.5579978679120767, "grad_norm": 0.2791566848754883, "learning_rate": 2.946680880586155e-06, "loss": 0.0299, "step": 50390 }, { "epoch": 2.5582516879029393, "grad_norm": 0.2070358693599701, "learning_rate": 2.9449887473137386e-06, "loss": 0.0326, "step": 50395 }, { "epoch": 2.558505507893802, "grad_norm": 0.2233690321445465, "learning_rate": 2.943296614041322e-06, "loss": 0.0283, "step": 50400 }, { "epoch": 2.558759327884664, "grad_norm": 0.3481098711490631, "learning_rate": 2.9416044807689053e-06, "loss": 0.0272, "step": 50405 }, { "epoch": 2.5590131478755267, "grad_norm": 0.41446927189826965, "learning_rate": 2.9399123474964892e-06, "loss": 0.0264, "step": 50410 }, { "epoch": 2.5592669678663893, "grad_norm": 0.20012246072292328, "learning_rate": 2.9382202142240723e-06, "loss": 0.0297, "step": 50415 }, { "epoch": 2.5595207878572515, "grad_norm": 0.6422848701477051, "learning_rate": 2.936528080951656e-06, "loss": 0.0281, "step": 50420 }, { "epoch": 2.559774607848114, "grad_norm": 0.3305734694004059, "learning_rate": 2.9348359476792394e-06, "loss": 0.0264, "step": 50425 }, { "epoch": 2.5600284278389767, "grad_norm": 0.25522372126579285, "learning_rate": 2.933143814406823e-06, "loss": 0.0337, "step": 50430 }, { "epoch": 2.5602822478298393, "grad_norm": 0.19020916521549225, "learning_rate": 2.931451681134406e-06, "loss": 0.0351, "step": 50435 }, { "epoch": 2.5605360678207014, "grad_norm": 0.3231838643550873, "learning_rate": 2.92975954786199e-06, "loss": 0.028, "step": 50440 }, { "epoch": 2.560789887811564, "grad_norm": 0.3561446964740753, "learning_rate": 2.928067414589573e-06, "loss": 0.0377, "step": 50445 }, { "epoch": 2.5610437078024266, "grad_norm": 0.25820067524909973, "learning_rate": 2.926375281317157e-06, "loss": 0.029, "step": 50450 }, { "epoch": 2.561297527793289, "grad_norm": 0.2987731397151947, "learning_rate": 2.9246831480447403e-06, "loss": 0.0316, "step": 50455 }, { "epoch": 2.5615513477841514, "grad_norm": 0.39317697286605835, "learning_rate": 2.922991014772324e-06, "loss": 0.0377, "step": 50460 }, { "epoch": 2.561805167775014, "grad_norm": 0.26665055751800537, "learning_rate": 2.921298881499907e-06, "loss": 0.0391, "step": 50465 }, { "epoch": 2.5620589877658766, "grad_norm": 0.4422428011894226, "learning_rate": 2.919606748227491e-06, "loss": 0.0285, "step": 50470 }, { "epoch": 2.562312807756739, "grad_norm": 1.0131126642227173, "learning_rate": 2.917914614955074e-06, "loss": 0.0384, "step": 50475 }, { "epoch": 2.5625666277476014, "grad_norm": 0.20424379408359528, "learning_rate": 2.916222481682657e-06, "loss": 0.0305, "step": 50480 }, { "epoch": 2.562820447738464, "grad_norm": 0.8846186399459839, "learning_rate": 2.914530348410241e-06, "loss": 0.0286, "step": 50485 }, { "epoch": 2.563074267729326, "grad_norm": 0.5644127726554871, "learning_rate": 2.9128382151378242e-06, "loss": 0.0321, "step": 50490 }, { "epoch": 2.5633280877201887, "grad_norm": 0.24908645451068878, "learning_rate": 2.911146081865408e-06, "loss": 0.0316, "step": 50495 }, { "epoch": 2.5635819077110513, "grad_norm": 0.2304484248161316, "learning_rate": 2.9094539485929913e-06, "loss": 0.0278, "step": 50500 }, { "epoch": 2.563835727701914, "grad_norm": 0.3092939257621765, "learning_rate": 2.907761815320575e-06, "loss": 0.0268, "step": 50505 }, { "epoch": 2.5640895476927765, "grad_norm": 0.18987365067005157, "learning_rate": 2.906069682048158e-06, "loss": 0.0242, "step": 50510 }, { "epoch": 2.5643433676836387, "grad_norm": 0.3280569016933441, "learning_rate": 2.904377548775742e-06, "loss": 0.0314, "step": 50515 }, { "epoch": 2.5645971876745013, "grad_norm": 0.26012980937957764, "learning_rate": 2.902685415503325e-06, "loss": 0.0333, "step": 50520 }, { "epoch": 2.5648510076653634, "grad_norm": 0.34960871934890747, "learning_rate": 2.900993282230909e-06, "loss": 0.0284, "step": 50525 }, { "epoch": 2.565104827656226, "grad_norm": 0.3368133306503296, "learning_rate": 2.899301148958492e-06, "loss": 0.0401, "step": 50530 }, { "epoch": 2.5653586476470887, "grad_norm": 0.24301111698150635, "learning_rate": 2.8976090156860757e-06, "loss": 0.0288, "step": 50535 }, { "epoch": 2.5656124676379513, "grad_norm": 0.33640941977500916, "learning_rate": 2.8959168824136593e-06, "loss": 0.029, "step": 50540 }, { "epoch": 2.565866287628814, "grad_norm": 0.2389553189277649, "learning_rate": 2.894224749141243e-06, "loss": 0.0349, "step": 50545 }, { "epoch": 2.566120107619676, "grad_norm": 0.34752050042152405, "learning_rate": 2.892532615868826e-06, "loss": 0.0256, "step": 50550 }, { "epoch": 2.5663739276105386, "grad_norm": 0.48742684721946716, "learning_rate": 2.8908404825964095e-06, "loss": 0.0262, "step": 50555 }, { "epoch": 2.566627747601401, "grad_norm": 0.2560529410839081, "learning_rate": 2.889148349323993e-06, "loss": 0.0298, "step": 50560 }, { "epoch": 2.5668815675922634, "grad_norm": 0.3415350317955017, "learning_rate": 2.887456216051576e-06, "loss": 0.029, "step": 50565 }, { "epoch": 2.567135387583126, "grad_norm": 0.26284313201904297, "learning_rate": 2.88576408277916e-06, "loss": 0.0316, "step": 50570 }, { "epoch": 2.5673892075739886, "grad_norm": 0.29155880212783813, "learning_rate": 2.8840719495067432e-06, "loss": 0.0358, "step": 50575 }, { "epoch": 2.567643027564851, "grad_norm": 0.3447701334953308, "learning_rate": 2.8823798162343268e-06, "loss": 0.0312, "step": 50580 }, { "epoch": 2.5678968475557133, "grad_norm": 0.3020358085632324, "learning_rate": 2.8806876829619103e-06, "loss": 0.0259, "step": 50585 }, { "epoch": 2.568150667546576, "grad_norm": 0.6446263790130615, "learning_rate": 2.878995549689494e-06, "loss": 0.0411, "step": 50590 }, { "epoch": 2.5684044875374386, "grad_norm": 0.6810723543167114, "learning_rate": 2.877303416417077e-06, "loss": 0.0298, "step": 50595 }, { "epoch": 2.5686583075283007, "grad_norm": 0.2936282157897949, "learning_rate": 2.875611283144661e-06, "loss": 0.0311, "step": 50600 }, { "epoch": 2.5689121275191633, "grad_norm": 0.23811903595924377, "learning_rate": 2.873919149872244e-06, "loss": 0.0336, "step": 50605 }, { "epoch": 2.569165947510026, "grad_norm": 0.29496702551841736, "learning_rate": 2.872227016599828e-06, "loss": 0.0324, "step": 50610 }, { "epoch": 2.5694197675008885, "grad_norm": 0.293457567691803, "learning_rate": 2.870534883327411e-06, "loss": 0.0335, "step": 50615 }, { "epoch": 2.569673587491751, "grad_norm": 0.36301565170288086, "learning_rate": 2.8688427500549947e-06, "loss": 0.0312, "step": 50620 }, { "epoch": 2.5699274074826133, "grad_norm": 0.32104015350341797, "learning_rate": 2.867150616782578e-06, "loss": 0.0289, "step": 50625 }, { "epoch": 2.570181227473476, "grad_norm": 0.3693762421607971, "learning_rate": 2.865458483510162e-06, "loss": 0.0374, "step": 50630 }, { "epoch": 2.570435047464338, "grad_norm": 0.4718092978000641, "learning_rate": 2.863766350237745e-06, "loss": 0.0373, "step": 50635 }, { "epoch": 2.5706888674552006, "grad_norm": 0.3577287197113037, "learning_rate": 2.862074216965328e-06, "loss": 0.0311, "step": 50640 }, { "epoch": 2.5709426874460632, "grad_norm": 0.3808220624923706, "learning_rate": 2.860382083692912e-06, "loss": 0.0357, "step": 50645 }, { "epoch": 2.571196507436926, "grad_norm": 0.41456541419029236, "learning_rate": 2.858689950420495e-06, "loss": 0.0367, "step": 50650 }, { "epoch": 2.5714503274277885, "grad_norm": 0.33753418922424316, "learning_rate": 2.856997817148079e-06, "loss": 0.0277, "step": 50655 }, { "epoch": 2.5717041474186506, "grad_norm": 0.30118709802627563, "learning_rate": 2.855305683875662e-06, "loss": 0.0342, "step": 50660 }, { "epoch": 2.571957967409513, "grad_norm": 0.3879167437553406, "learning_rate": 2.8536135506032458e-06, "loss": 0.0329, "step": 50665 }, { "epoch": 2.572211787400376, "grad_norm": 0.3201780617237091, "learning_rate": 2.851921417330829e-06, "loss": 0.0292, "step": 50670 }, { "epoch": 2.572465607391238, "grad_norm": 0.28086307644844055, "learning_rate": 2.850229284058413e-06, "loss": 0.0301, "step": 50675 }, { "epoch": 2.5727194273821006, "grad_norm": 0.8992939591407776, "learning_rate": 2.848537150785996e-06, "loss": 0.0234, "step": 50680 }, { "epoch": 2.572973247372963, "grad_norm": 0.3172586262226105, "learning_rate": 2.84684501751358e-06, "loss": 0.0268, "step": 50685 }, { "epoch": 2.573227067363826, "grad_norm": 0.25958219170570374, "learning_rate": 2.845152884241163e-06, "loss": 0.026, "step": 50690 }, { "epoch": 2.573480887354688, "grad_norm": 0.22440466284751892, "learning_rate": 2.8434607509687466e-06, "loss": 0.0292, "step": 50695 }, { "epoch": 2.5737347073455505, "grad_norm": 0.34167757630348206, "learning_rate": 2.84176861769633e-06, "loss": 0.031, "step": 50700 }, { "epoch": 2.573988527336413, "grad_norm": 0.3714410960674286, "learning_rate": 2.8400764844239137e-06, "loss": 0.0321, "step": 50705 }, { "epoch": 2.5742423473272753, "grad_norm": 0.38244038820266724, "learning_rate": 2.838384351151497e-06, "loss": 0.0268, "step": 50710 }, { "epoch": 2.574496167318138, "grad_norm": 0.42816251516342163, "learning_rate": 2.83669221787908e-06, "loss": 0.0337, "step": 50715 }, { "epoch": 2.5747499873090005, "grad_norm": 0.27166709303855896, "learning_rate": 2.835000084606664e-06, "loss": 0.0317, "step": 50720 }, { "epoch": 2.575003807299863, "grad_norm": 0.2813893258571625, "learning_rate": 2.833307951334247e-06, "loss": 0.0301, "step": 50725 }, { "epoch": 2.5752576272907253, "grad_norm": 0.6570308208465576, "learning_rate": 2.831615818061831e-06, "loss": 0.038, "step": 50730 }, { "epoch": 2.575511447281588, "grad_norm": 0.31474921107292175, "learning_rate": 2.829923684789414e-06, "loss": 0.0252, "step": 50735 }, { "epoch": 2.5757652672724505, "grad_norm": 0.21304818987846375, "learning_rate": 2.8282315515169977e-06, "loss": 0.0347, "step": 50740 }, { "epoch": 2.5760190872633126, "grad_norm": 0.5252552032470703, "learning_rate": 2.826539418244581e-06, "loss": 0.0312, "step": 50745 }, { "epoch": 2.5762729072541752, "grad_norm": 0.355497807264328, "learning_rate": 2.8248472849721647e-06, "loss": 0.026, "step": 50750 }, { "epoch": 2.576526727245038, "grad_norm": 0.2816254496574402, "learning_rate": 2.823155151699748e-06, "loss": 0.0286, "step": 50755 }, { "epoch": 2.5767805472359004, "grad_norm": 0.35320496559143066, "learning_rate": 2.821463018427332e-06, "loss": 0.0251, "step": 50760 }, { "epoch": 2.577034367226763, "grad_norm": 0.36286720633506775, "learning_rate": 2.819770885154915e-06, "loss": 0.0306, "step": 50765 }, { "epoch": 2.577288187217625, "grad_norm": 0.25672924518585205, "learning_rate": 2.8180787518824985e-06, "loss": 0.0244, "step": 50770 }, { "epoch": 2.577542007208488, "grad_norm": 0.2837968170642853, "learning_rate": 2.816386618610082e-06, "loss": 0.0316, "step": 50775 }, { "epoch": 2.57779582719935, "grad_norm": 0.24833479523658752, "learning_rate": 2.8146944853376656e-06, "loss": 0.0274, "step": 50780 }, { "epoch": 2.5780496471902126, "grad_norm": 0.3615385591983795, "learning_rate": 2.8130023520652487e-06, "loss": 0.0261, "step": 50785 }, { "epoch": 2.578303467181075, "grad_norm": 0.24618951976299286, "learning_rate": 2.8113102187928327e-06, "loss": 0.0318, "step": 50790 }, { "epoch": 2.5785572871719378, "grad_norm": 0.3765259385108948, "learning_rate": 2.809618085520416e-06, "loss": 0.0413, "step": 50795 }, { "epoch": 2.5788111071628004, "grad_norm": 0.27768805623054504, "learning_rate": 2.807925952247999e-06, "loss": 0.0261, "step": 50800 }, { "epoch": 2.5790649271536625, "grad_norm": 0.3444536030292511, "learning_rate": 2.806233818975583e-06, "loss": 0.0369, "step": 50805 }, { "epoch": 2.579318747144525, "grad_norm": 0.24263224005699158, "learning_rate": 2.804541685703166e-06, "loss": 0.0336, "step": 50810 }, { "epoch": 2.5795725671353877, "grad_norm": 0.33877697587013245, "learning_rate": 2.8028495524307496e-06, "loss": 0.032, "step": 50815 }, { "epoch": 2.57982638712625, "grad_norm": 0.2877173125743866, "learning_rate": 2.801157419158333e-06, "loss": 0.0345, "step": 50820 }, { "epoch": 2.5800802071171125, "grad_norm": 0.3200043737888336, "learning_rate": 2.7994652858859166e-06, "loss": 0.0354, "step": 50825 }, { "epoch": 2.580334027107975, "grad_norm": 0.29038625955581665, "learning_rate": 2.7977731526134998e-06, "loss": 0.0324, "step": 50830 }, { "epoch": 2.5805878470988377, "grad_norm": 0.24091961979866028, "learning_rate": 2.7960810193410837e-06, "loss": 0.0297, "step": 50835 }, { "epoch": 2.5808416670897, "grad_norm": 0.3212089538574219, "learning_rate": 2.794388886068667e-06, "loss": 0.0308, "step": 50840 }, { "epoch": 2.5810954870805625, "grad_norm": 0.33707278966903687, "learning_rate": 2.792696752796251e-06, "loss": 0.0366, "step": 50845 }, { "epoch": 2.581349307071425, "grad_norm": 0.6692639589309692, "learning_rate": 2.791004619523834e-06, "loss": 0.0304, "step": 50850 }, { "epoch": 2.5816031270622872, "grad_norm": 0.46995946764945984, "learning_rate": 2.7893124862514175e-06, "loss": 0.0363, "step": 50855 }, { "epoch": 2.58185694705315, "grad_norm": 0.2652070224285126, "learning_rate": 2.7876203529790006e-06, "loss": 0.0245, "step": 50860 }, { "epoch": 2.5821107670440124, "grad_norm": 0.24837477505207062, "learning_rate": 2.7859282197065846e-06, "loss": 0.0295, "step": 50865 }, { "epoch": 2.582364587034875, "grad_norm": 0.3100050389766693, "learning_rate": 2.7842360864341677e-06, "loss": 0.0354, "step": 50870 }, { "epoch": 2.582618407025737, "grad_norm": 0.22369235754013062, "learning_rate": 2.7825439531617517e-06, "loss": 0.0359, "step": 50875 }, { "epoch": 2.5828722270166, "grad_norm": 0.32840296626091003, "learning_rate": 2.7808518198893348e-06, "loss": 0.0281, "step": 50880 }, { "epoch": 2.5831260470074624, "grad_norm": 0.2909887135028839, "learning_rate": 2.779159686616918e-06, "loss": 0.0323, "step": 50885 }, { "epoch": 2.5833798669983246, "grad_norm": 0.4277086555957794, "learning_rate": 2.777467553344502e-06, "loss": 0.0356, "step": 50890 }, { "epoch": 2.583633686989187, "grad_norm": 0.33212006092071533, "learning_rate": 2.775775420072085e-06, "loss": 0.034, "step": 50895 }, { "epoch": 2.5838875069800498, "grad_norm": 0.30871519446372986, "learning_rate": 2.7740832867996685e-06, "loss": 0.0257, "step": 50900 }, { "epoch": 2.5841413269709124, "grad_norm": 0.26937437057495117, "learning_rate": 2.7723911535272517e-06, "loss": 0.0301, "step": 50905 }, { "epoch": 2.584395146961775, "grad_norm": 0.6625112891197205, "learning_rate": 2.7706990202548356e-06, "loss": 0.0302, "step": 50910 }, { "epoch": 2.584648966952637, "grad_norm": 0.5626937747001648, "learning_rate": 2.7690068869824187e-06, "loss": 0.0266, "step": 50915 }, { "epoch": 2.5849027869434997, "grad_norm": 0.29541710019111633, "learning_rate": 2.7673147537100027e-06, "loss": 0.0308, "step": 50920 }, { "epoch": 2.585156606934362, "grad_norm": 0.31842437386512756, "learning_rate": 2.765622620437586e-06, "loss": 0.0303, "step": 50925 }, { "epoch": 2.5854104269252245, "grad_norm": 0.35333383083343506, "learning_rate": 2.7639304871651694e-06, "loss": 0.0245, "step": 50930 }, { "epoch": 2.585664246916087, "grad_norm": 0.2786847949028015, "learning_rate": 2.762238353892753e-06, "loss": 0.0235, "step": 50935 }, { "epoch": 2.5859180669069497, "grad_norm": 0.36661455035209656, "learning_rate": 2.7605462206203365e-06, "loss": 0.0327, "step": 50940 }, { "epoch": 2.5861718868978123, "grad_norm": 0.42655545473098755, "learning_rate": 2.7588540873479196e-06, "loss": 0.0285, "step": 50945 }, { "epoch": 2.5864257068886745, "grad_norm": 0.3568045198917389, "learning_rate": 2.7571619540755036e-06, "loss": 0.0232, "step": 50950 }, { "epoch": 2.586679526879537, "grad_norm": 0.34968164563179016, "learning_rate": 2.7554698208030867e-06, "loss": 0.0268, "step": 50955 }, { "epoch": 2.5869333468703997, "grad_norm": 0.26196905970573425, "learning_rate": 2.75377768753067e-06, "loss": 0.0259, "step": 50960 }, { "epoch": 2.587187166861262, "grad_norm": 0.3904362618923187, "learning_rate": 2.7520855542582538e-06, "loss": 0.0343, "step": 50965 }, { "epoch": 2.5874409868521244, "grad_norm": 0.3048829138278961, "learning_rate": 2.750393420985837e-06, "loss": 0.028, "step": 50970 }, { "epoch": 2.587694806842987, "grad_norm": 0.365691214799881, "learning_rate": 2.7487012877134204e-06, "loss": 0.0261, "step": 50975 }, { "epoch": 2.5879486268338496, "grad_norm": 0.37310320138931274, "learning_rate": 2.747009154441004e-06, "loss": 0.0307, "step": 50980 }, { "epoch": 2.588202446824712, "grad_norm": 0.2797809839248657, "learning_rate": 2.7453170211685875e-06, "loss": 0.0265, "step": 50985 }, { "epoch": 2.5884562668155744, "grad_norm": 1.3563828468322754, "learning_rate": 2.7436248878961706e-06, "loss": 0.0269, "step": 50990 }, { "epoch": 2.588710086806437, "grad_norm": 0.2800024747848511, "learning_rate": 2.7419327546237546e-06, "loss": 0.0282, "step": 50995 }, { "epoch": 2.588963906797299, "grad_norm": 0.4817359745502472, "learning_rate": 2.7402406213513377e-06, "loss": 0.0353, "step": 51000 }, { "epoch": 2.5892177267881618, "grad_norm": 0.28154054284095764, "learning_rate": 2.7385484880789213e-06, "loss": 0.0306, "step": 51005 }, { "epoch": 2.5894715467790244, "grad_norm": 1.3750730752944946, "learning_rate": 2.736856354806505e-06, "loss": 0.0247, "step": 51010 }, { "epoch": 2.589725366769887, "grad_norm": 0.4572613537311554, "learning_rate": 2.7351642215340884e-06, "loss": 0.0349, "step": 51015 }, { "epoch": 2.589979186760749, "grad_norm": 0.28127723932266235, "learning_rate": 2.7334720882616715e-06, "loss": 0.0331, "step": 51020 }, { "epoch": 2.5902330067516117, "grad_norm": 0.3305010497570038, "learning_rate": 2.7317799549892555e-06, "loss": 0.0323, "step": 51025 }, { "epoch": 2.5904868267424743, "grad_norm": 0.3264330327510834, "learning_rate": 2.7300878217168386e-06, "loss": 0.0311, "step": 51030 }, { "epoch": 2.5907406467333365, "grad_norm": 0.351087749004364, "learning_rate": 2.7283956884444225e-06, "loss": 0.0298, "step": 51035 }, { "epoch": 2.590994466724199, "grad_norm": 0.3994324505329132, "learning_rate": 2.7267035551720057e-06, "loss": 0.0315, "step": 51040 }, { "epoch": 2.5912482867150617, "grad_norm": 0.31754663586616516, "learning_rate": 2.7250114218995888e-06, "loss": 0.0363, "step": 51045 }, { "epoch": 2.5915021067059243, "grad_norm": 0.2738948464393616, "learning_rate": 2.7233192886271723e-06, "loss": 0.0315, "step": 51050 }, { "epoch": 2.591755926696787, "grad_norm": 0.2503436803817749, "learning_rate": 2.721627155354756e-06, "loss": 0.03, "step": 51055 }, { "epoch": 2.592009746687649, "grad_norm": 0.32593145966529846, "learning_rate": 2.7199350220823394e-06, "loss": 0.0317, "step": 51060 }, { "epoch": 2.5922635666785117, "grad_norm": 0.23604324460029602, "learning_rate": 2.7182428888099225e-06, "loss": 0.0271, "step": 51065 }, { "epoch": 2.592517386669374, "grad_norm": 0.2927345335483551, "learning_rate": 2.7165507555375065e-06, "loss": 0.0335, "step": 51070 }, { "epoch": 2.5927712066602364, "grad_norm": 0.27732741832733154, "learning_rate": 2.7148586222650896e-06, "loss": 0.0304, "step": 51075 }, { "epoch": 2.593025026651099, "grad_norm": 0.28966188430786133, "learning_rate": 2.7131664889926736e-06, "loss": 0.0279, "step": 51080 }, { "epoch": 2.5932788466419616, "grad_norm": 0.4134257435798645, "learning_rate": 2.7114743557202567e-06, "loss": 0.0316, "step": 51085 }, { "epoch": 2.5935326666328242, "grad_norm": 0.8288419246673584, "learning_rate": 2.7097822224478403e-06, "loss": 0.0348, "step": 51090 }, { "epoch": 2.5937864866236864, "grad_norm": 0.45848527550697327, "learning_rate": 2.708090089175424e-06, "loss": 0.0364, "step": 51095 }, { "epoch": 2.594040306614549, "grad_norm": 0.3152906000614166, "learning_rate": 2.7063979559030074e-06, "loss": 0.029, "step": 51100 }, { "epoch": 2.5942941266054116, "grad_norm": 0.31780606508255005, "learning_rate": 2.7047058226305905e-06, "loss": 0.0339, "step": 51105 }, { "epoch": 2.5945479465962737, "grad_norm": 0.3507641851902008, "learning_rate": 2.7030136893581744e-06, "loss": 0.0339, "step": 51110 }, { "epoch": 2.5948017665871363, "grad_norm": 0.2539495527744293, "learning_rate": 2.7013215560857576e-06, "loss": 0.0257, "step": 51115 }, { "epoch": 2.595055586577999, "grad_norm": 0.3520859479904175, "learning_rate": 2.6996294228133407e-06, "loss": 0.032, "step": 51120 }, { "epoch": 2.5953094065688616, "grad_norm": 0.27110981941223145, "learning_rate": 2.6979372895409247e-06, "loss": 0.0285, "step": 51125 }, { "epoch": 2.5955632265597237, "grad_norm": 0.3485463857650757, "learning_rate": 2.6962451562685078e-06, "loss": 0.0292, "step": 51130 }, { "epoch": 2.5958170465505863, "grad_norm": 0.5310997366905212, "learning_rate": 2.6945530229960913e-06, "loss": 0.039, "step": 51135 }, { "epoch": 2.596070866541449, "grad_norm": 0.2819546163082123, "learning_rate": 2.692860889723675e-06, "loss": 0.0298, "step": 51140 }, { "epoch": 2.596324686532311, "grad_norm": 0.5256403088569641, "learning_rate": 2.6911687564512584e-06, "loss": 0.0371, "step": 51145 }, { "epoch": 2.5965785065231737, "grad_norm": 0.21236854791641235, "learning_rate": 2.6894766231788415e-06, "loss": 0.0273, "step": 51150 }, { "epoch": 2.5968323265140363, "grad_norm": 0.32847344875335693, "learning_rate": 2.6877844899064255e-06, "loss": 0.0307, "step": 51155 }, { "epoch": 2.597086146504899, "grad_norm": 0.3638588488101959, "learning_rate": 2.6860923566340086e-06, "loss": 0.035, "step": 51160 }, { "epoch": 2.597339966495761, "grad_norm": 0.584497332572937, "learning_rate": 2.684400223361592e-06, "loss": 0.0305, "step": 51165 }, { "epoch": 2.5975937864866236, "grad_norm": 0.2996261715888977, "learning_rate": 2.6827080900891757e-06, "loss": 0.0295, "step": 51170 }, { "epoch": 2.5978476064774862, "grad_norm": 0.35180866718292236, "learning_rate": 2.6810159568167593e-06, "loss": 0.0264, "step": 51175 }, { "epoch": 2.5981014264683484, "grad_norm": 0.29140573740005493, "learning_rate": 2.6793238235443424e-06, "loss": 0.0358, "step": 51180 }, { "epoch": 2.598355246459211, "grad_norm": 0.3289799690246582, "learning_rate": 2.6776316902719263e-06, "loss": 0.0268, "step": 51185 }, { "epoch": 2.5986090664500736, "grad_norm": 0.3575092554092407, "learning_rate": 2.6759395569995095e-06, "loss": 0.0291, "step": 51190 }, { "epoch": 2.598862886440936, "grad_norm": 0.3847915232181549, "learning_rate": 2.6742474237270934e-06, "loss": 0.0379, "step": 51195 }, { "epoch": 2.599116706431799, "grad_norm": 0.44137245416641235, "learning_rate": 2.6725552904546766e-06, "loss": 0.0276, "step": 51200 }, { "epoch": 2.599370526422661, "grad_norm": 0.3935795724391937, "learning_rate": 2.6708631571822597e-06, "loss": 0.0337, "step": 51205 }, { "epoch": 2.5996243464135236, "grad_norm": 0.3812408149242401, "learning_rate": 2.6691710239098432e-06, "loss": 0.0342, "step": 51210 }, { "epoch": 2.5998781664043857, "grad_norm": 0.3082849383354187, "learning_rate": 2.6674788906374268e-06, "loss": 0.0262, "step": 51215 }, { "epoch": 2.6001319863952483, "grad_norm": 0.4165632128715515, "learning_rate": 2.6657867573650103e-06, "loss": 0.0226, "step": 51220 }, { "epoch": 2.600385806386111, "grad_norm": 0.9450509548187256, "learning_rate": 2.6640946240925934e-06, "loss": 0.0395, "step": 51225 }, { "epoch": 2.6006396263769735, "grad_norm": 0.6938175559043884, "learning_rate": 2.6624024908201774e-06, "loss": 0.0318, "step": 51230 }, { "epoch": 2.600893446367836, "grad_norm": 0.23680560290813446, "learning_rate": 2.6607103575477605e-06, "loss": 0.0267, "step": 51235 }, { "epoch": 2.6011472663586983, "grad_norm": 0.25722989439964294, "learning_rate": 2.6590182242753445e-06, "loss": 0.0328, "step": 51240 }, { "epoch": 2.601401086349561, "grad_norm": 0.22755849361419678, "learning_rate": 2.6573260910029276e-06, "loss": 0.033, "step": 51245 }, { "epoch": 2.6016549063404235, "grad_norm": 0.3570180833339691, "learning_rate": 2.655633957730511e-06, "loss": 0.0372, "step": 51250 }, { "epoch": 2.6019087263312857, "grad_norm": 0.3486759662628174, "learning_rate": 2.6539418244580943e-06, "loss": 0.0295, "step": 51255 }, { "epoch": 2.6021625463221483, "grad_norm": 0.27461516857147217, "learning_rate": 2.6522496911856782e-06, "loss": 0.0279, "step": 51260 }, { "epoch": 2.602416366313011, "grad_norm": 0.24911971390247345, "learning_rate": 2.6505575579132614e-06, "loss": 0.0291, "step": 51265 }, { "epoch": 2.6026701863038735, "grad_norm": 0.23654115200042725, "learning_rate": 2.6488654246408453e-06, "loss": 0.0233, "step": 51270 }, { "epoch": 2.6029240062947356, "grad_norm": 0.4130326807498932, "learning_rate": 2.6471732913684284e-06, "loss": 0.0386, "step": 51275 }, { "epoch": 2.6031778262855982, "grad_norm": 0.19910484552383423, "learning_rate": 2.6454811580960116e-06, "loss": 0.0304, "step": 51280 }, { "epoch": 2.603431646276461, "grad_norm": 0.3008933961391449, "learning_rate": 2.6437890248235955e-06, "loss": 0.0343, "step": 51285 }, { "epoch": 2.603685466267323, "grad_norm": 0.38071808218955994, "learning_rate": 2.6420968915511787e-06, "loss": 0.0278, "step": 51290 }, { "epoch": 2.6039392862581856, "grad_norm": 0.22071151435375214, "learning_rate": 2.640404758278762e-06, "loss": 0.0262, "step": 51295 }, { "epoch": 2.604193106249048, "grad_norm": 0.4619128108024597, "learning_rate": 2.6387126250063453e-06, "loss": 0.0307, "step": 51300 }, { "epoch": 2.604446926239911, "grad_norm": 0.6991639733314514, "learning_rate": 2.6370204917339293e-06, "loss": 0.0329, "step": 51305 }, { "epoch": 2.6047007462307734, "grad_norm": 0.3269241154193878, "learning_rate": 2.6353283584615124e-06, "loss": 0.0281, "step": 51310 }, { "epoch": 2.6049545662216356, "grad_norm": 0.4293957054615021, "learning_rate": 2.6336362251890964e-06, "loss": 0.037, "step": 51315 }, { "epoch": 2.605208386212498, "grad_norm": 0.211612269282341, "learning_rate": 2.6319440919166795e-06, "loss": 0.0286, "step": 51320 }, { "epoch": 2.6054622062033603, "grad_norm": 0.2835038900375366, "learning_rate": 2.630251958644263e-06, "loss": 0.032, "step": 51325 }, { "epoch": 2.605716026194223, "grad_norm": 0.507549524307251, "learning_rate": 2.6285598253718466e-06, "loss": 0.0345, "step": 51330 }, { "epoch": 2.6059698461850855, "grad_norm": 0.4087376296520233, "learning_rate": 2.62686769209943e-06, "loss": 0.0322, "step": 51335 }, { "epoch": 2.606223666175948, "grad_norm": 0.3005754053592682, "learning_rate": 2.6251755588270133e-06, "loss": 0.0349, "step": 51340 }, { "epoch": 2.6064774861668107, "grad_norm": 0.4824678897857666, "learning_rate": 2.6234834255545972e-06, "loss": 0.0309, "step": 51345 }, { "epoch": 2.606731306157673, "grad_norm": 0.3900478780269623, "learning_rate": 2.6217912922821803e-06, "loss": 0.0324, "step": 51350 }, { "epoch": 2.6069851261485355, "grad_norm": 0.326042115688324, "learning_rate": 2.620099159009764e-06, "loss": 0.0299, "step": 51355 }, { "epoch": 2.6072389461393977, "grad_norm": 0.29876914620399475, "learning_rate": 2.6184070257373474e-06, "loss": 0.0265, "step": 51360 }, { "epoch": 2.6074927661302603, "grad_norm": 0.26470544934272766, "learning_rate": 2.6167148924649306e-06, "loss": 0.0289, "step": 51365 }, { "epoch": 2.607746586121123, "grad_norm": 0.31948786973953247, "learning_rate": 2.615022759192514e-06, "loss": 0.0273, "step": 51370 }, { "epoch": 2.6080004061119855, "grad_norm": 0.2920685112476349, "learning_rate": 2.6133306259200976e-06, "loss": 0.0326, "step": 51375 }, { "epoch": 2.608254226102848, "grad_norm": 0.4184175133705139, "learning_rate": 2.611638492647681e-06, "loss": 0.0311, "step": 51380 }, { "epoch": 2.6085080460937102, "grad_norm": 0.30750107765197754, "learning_rate": 2.6099463593752643e-06, "loss": 0.0313, "step": 51385 }, { "epoch": 2.608761866084573, "grad_norm": 0.37827202677726746, "learning_rate": 2.6082542261028483e-06, "loss": 0.0315, "step": 51390 }, { "epoch": 2.6090156860754354, "grad_norm": 0.2605781555175781, "learning_rate": 2.6065620928304314e-06, "loss": 0.0263, "step": 51395 }, { "epoch": 2.6092695060662976, "grad_norm": 0.40529435873031616, "learning_rate": 2.604869959558015e-06, "loss": 0.0262, "step": 51400 }, { "epoch": 2.60952332605716, "grad_norm": 0.4452066123485565, "learning_rate": 2.6031778262855985e-06, "loss": 0.0391, "step": 51405 }, { "epoch": 2.609777146048023, "grad_norm": 0.36278364062309265, "learning_rate": 2.601485693013182e-06, "loss": 0.0311, "step": 51410 }, { "epoch": 2.6100309660388854, "grad_norm": 0.25109902024269104, "learning_rate": 2.599793559740765e-06, "loss": 0.0296, "step": 51415 }, { "epoch": 2.6102847860297476, "grad_norm": 0.32898685336112976, "learning_rate": 2.598101426468349e-06, "loss": 0.0298, "step": 51420 }, { "epoch": 2.61053860602061, "grad_norm": 0.3027734160423279, "learning_rate": 2.5964092931959322e-06, "loss": 0.0232, "step": 51425 }, { "epoch": 2.6107924260114728, "grad_norm": 0.24649985134601593, "learning_rate": 2.594717159923516e-06, "loss": 0.0285, "step": 51430 }, { "epoch": 2.611046246002335, "grad_norm": 0.3023594319820404, "learning_rate": 2.5930250266510993e-06, "loss": 0.0275, "step": 51435 }, { "epoch": 2.6113000659931975, "grad_norm": 0.31649115681648254, "learning_rate": 2.591332893378683e-06, "loss": 0.0319, "step": 51440 }, { "epoch": 2.61155388598406, "grad_norm": 0.6469734311103821, "learning_rate": 2.589640760106266e-06, "loss": 0.0289, "step": 51445 }, { "epoch": 2.6118077059749227, "grad_norm": 0.415568470954895, "learning_rate": 2.5879486268338495e-06, "loss": 0.0256, "step": 51450 }, { "epoch": 2.6120615259657853, "grad_norm": 0.3895602822303772, "learning_rate": 2.586256493561433e-06, "loss": 0.0302, "step": 51455 }, { "epoch": 2.6123153459566475, "grad_norm": 0.33492904901504517, "learning_rate": 2.584564360289016e-06, "loss": 0.0388, "step": 51460 }, { "epoch": 2.61256916594751, "grad_norm": 0.5842387080192566, "learning_rate": 2.5828722270166e-06, "loss": 0.0331, "step": 51465 }, { "epoch": 2.6128229859383723, "grad_norm": 0.2856934368610382, "learning_rate": 2.5811800937441833e-06, "loss": 0.0305, "step": 51470 }, { "epoch": 2.613076805929235, "grad_norm": 0.35018250346183777, "learning_rate": 2.5794879604717673e-06, "loss": 0.0348, "step": 51475 }, { "epoch": 2.6133306259200975, "grad_norm": 0.2299485057592392, "learning_rate": 2.5777958271993504e-06, "loss": 0.0262, "step": 51480 }, { "epoch": 2.61358444591096, "grad_norm": 0.7264879941940308, "learning_rate": 2.576103693926934e-06, "loss": 0.0276, "step": 51485 }, { "epoch": 2.6138382659018227, "grad_norm": 0.30724015831947327, "learning_rate": 2.574411560654517e-06, "loss": 0.0316, "step": 51490 }, { "epoch": 2.614092085892685, "grad_norm": 0.309587299823761, "learning_rate": 2.572719427382101e-06, "loss": 0.0355, "step": 51495 }, { "epoch": 2.6143459058835474, "grad_norm": 1.0329458713531494, "learning_rate": 2.571027294109684e-06, "loss": 0.0293, "step": 51500 }, { "epoch": 2.61459972587441, "grad_norm": 0.30419212579727173, "learning_rate": 2.569335160837268e-06, "loss": 0.0299, "step": 51505 }, { "epoch": 2.614853545865272, "grad_norm": 0.336566686630249, "learning_rate": 2.5676430275648512e-06, "loss": 0.0285, "step": 51510 }, { "epoch": 2.615107365856135, "grad_norm": 0.3229808807373047, "learning_rate": 2.5659508942924348e-06, "loss": 0.0327, "step": 51515 }, { "epoch": 2.6153611858469974, "grad_norm": 0.25162097811698914, "learning_rate": 2.5642587610200183e-06, "loss": 0.031, "step": 51520 }, { "epoch": 2.61561500583786, "grad_norm": 0.5137783288955688, "learning_rate": 2.5625666277476014e-06, "loss": 0.0284, "step": 51525 }, { "epoch": 2.615868825828722, "grad_norm": 0.2492351233959198, "learning_rate": 2.560874494475185e-06, "loss": 0.0274, "step": 51530 }, { "epoch": 2.6161226458195848, "grad_norm": 0.5326012372970581, "learning_rate": 2.559182361202768e-06, "loss": 0.0369, "step": 51535 }, { "epoch": 2.6163764658104474, "grad_norm": 0.37052249908447266, "learning_rate": 2.557490227930352e-06, "loss": 0.0359, "step": 51540 }, { "epoch": 2.6166302858013095, "grad_norm": 0.20791205763816833, "learning_rate": 2.555798094657935e-06, "loss": 0.021, "step": 51545 }, { "epoch": 2.616884105792172, "grad_norm": 0.2117859572172165, "learning_rate": 2.554105961385519e-06, "loss": 0.0484, "step": 51550 }, { "epoch": 2.6171379257830347, "grad_norm": 0.452799528837204, "learning_rate": 2.5524138281131023e-06, "loss": 0.0291, "step": 51555 }, { "epoch": 2.6173917457738973, "grad_norm": 0.2676321864128113, "learning_rate": 2.550721694840686e-06, "loss": 0.0321, "step": 51560 }, { "epoch": 2.6176455657647595, "grad_norm": 0.39478787779808044, "learning_rate": 2.5490295615682694e-06, "loss": 0.034, "step": 51565 }, { "epoch": 2.617899385755622, "grad_norm": 0.3360627293586731, "learning_rate": 2.547337428295853e-06, "loss": 0.0293, "step": 51570 }, { "epoch": 2.6181532057464847, "grad_norm": 0.3433999717235565, "learning_rate": 2.545645295023436e-06, "loss": 0.0314, "step": 51575 }, { "epoch": 2.618407025737347, "grad_norm": 0.37200894951820374, "learning_rate": 2.54395316175102e-06, "loss": 0.0324, "step": 51580 }, { "epoch": 2.6186608457282095, "grad_norm": 0.4213031232357025, "learning_rate": 2.542261028478603e-06, "loss": 0.0339, "step": 51585 }, { "epoch": 2.618914665719072, "grad_norm": 0.4242783784866333, "learning_rate": 2.5405688952061867e-06, "loss": 0.0406, "step": 51590 }, { "epoch": 2.6191684857099347, "grad_norm": 0.3434397280216217, "learning_rate": 2.5388767619337702e-06, "loss": 0.03, "step": 51595 }, { "epoch": 2.6194223057007973, "grad_norm": 0.3476824462413788, "learning_rate": 2.5371846286613538e-06, "loss": 0.0326, "step": 51600 }, { "epoch": 2.6196761256916594, "grad_norm": 0.3337551951408386, "learning_rate": 2.535492495388937e-06, "loss": 0.0322, "step": 51605 }, { "epoch": 2.619929945682522, "grad_norm": 0.2564079761505127, "learning_rate": 2.5338003621165204e-06, "loss": 0.0347, "step": 51610 }, { "epoch": 2.620183765673384, "grad_norm": 0.4180367887020111, "learning_rate": 2.532108228844104e-06, "loss": 0.0297, "step": 51615 }, { "epoch": 2.620437585664247, "grad_norm": 0.3430015444755554, "learning_rate": 2.530416095571687e-06, "loss": 0.0303, "step": 51620 }, { "epoch": 2.6206914056551094, "grad_norm": 0.3892328143119812, "learning_rate": 2.528723962299271e-06, "loss": 0.0341, "step": 51625 }, { "epoch": 2.620945225645972, "grad_norm": 0.32110515236854553, "learning_rate": 2.527031829026854e-06, "loss": 0.0293, "step": 51630 }, { "epoch": 2.6211990456368346, "grad_norm": 0.3606153428554535, "learning_rate": 2.5253396957544377e-06, "loss": 0.033, "step": 51635 }, { "epoch": 2.6214528656276967, "grad_norm": 0.49945294857025146, "learning_rate": 2.5236475624820213e-06, "loss": 0.0346, "step": 51640 }, { "epoch": 2.6217066856185594, "grad_norm": 0.9243090748786926, "learning_rate": 2.521955429209605e-06, "loss": 0.029, "step": 51645 }, { "epoch": 2.621960505609422, "grad_norm": 0.20461325347423553, "learning_rate": 2.520263295937188e-06, "loss": 0.0294, "step": 51650 }, { "epoch": 2.622214325600284, "grad_norm": 0.19676488637924194, "learning_rate": 2.518571162664772e-06, "loss": 0.0318, "step": 51655 }, { "epoch": 2.6224681455911467, "grad_norm": 0.5037747621536255, "learning_rate": 2.516879029392355e-06, "loss": 0.0273, "step": 51660 }, { "epoch": 2.6227219655820093, "grad_norm": 0.37965553998947144, "learning_rate": 2.515186896119939e-06, "loss": 0.0333, "step": 51665 }, { "epoch": 2.622975785572872, "grad_norm": 0.6465805768966675, "learning_rate": 2.513494762847522e-06, "loss": 0.0321, "step": 51670 }, { "epoch": 2.623229605563734, "grad_norm": 0.2858799993991852, "learning_rate": 2.5118026295751057e-06, "loss": 0.0298, "step": 51675 }, { "epoch": 2.6234834255545967, "grad_norm": 0.4108503460884094, "learning_rate": 2.510110496302689e-06, "loss": 0.0394, "step": 51680 }, { "epoch": 2.6237372455454593, "grad_norm": 0.3374457359313965, "learning_rate": 2.5084183630302723e-06, "loss": 0.0277, "step": 51685 }, { "epoch": 2.6239910655363214, "grad_norm": 0.3668394386768341, "learning_rate": 2.506726229757856e-06, "loss": 0.0354, "step": 51690 }, { "epoch": 2.624244885527184, "grad_norm": 0.4046284854412079, "learning_rate": 2.505034096485439e-06, "loss": 0.0336, "step": 51695 }, { "epoch": 2.6244987055180466, "grad_norm": 0.32230740785598755, "learning_rate": 2.503341963213023e-06, "loss": 0.0322, "step": 51700 }, { "epoch": 2.6247525255089093, "grad_norm": 0.4608549475669861, "learning_rate": 2.501649829940606e-06, "loss": 0.0316, "step": 51705 }, { "epoch": 2.6250063454997714, "grad_norm": 0.43577080965042114, "learning_rate": 2.49995769666819e-06, "loss": 0.0345, "step": 51710 }, { "epoch": 2.625260165490634, "grad_norm": 0.21909955143928528, "learning_rate": 2.498265563395773e-06, "loss": 0.034, "step": 51715 }, { "epoch": 2.6255139854814966, "grad_norm": 0.31630709767341614, "learning_rate": 2.4965734301233567e-06, "loss": 0.0248, "step": 51720 }, { "epoch": 2.6257678054723588, "grad_norm": 0.23227165639400482, "learning_rate": 2.4948812968509403e-06, "loss": 0.0258, "step": 51725 }, { "epoch": 2.6260216254632214, "grad_norm": 0.3392637372016907, "learning_rate": 2.4931891635785234e-06, "loss": 0.0344, "step": 51730 }, { "epoch": 2.626275445454084, "grad_norm": 0.4881313145160675, "learning_rate": 2.491497030306107e-06, "loss": 0.033, "step": 51735 }, { "epoch": 2.6265292654449466, "grad_norm": 0.27817487716674805, "learning_rate": 2.4898048970336905e-06, "loss": 0.0287, "step": 51740 }, { "epoch": 2.626783085435809, "grad_norm": 0.31453368067741394, "learning_rate": 2.488112763761274e-06, "loss": 0.0288, "step": 51745 }, { "epoch": 2.6270369054266713, "grad_norm": 0.2960861921310425, "learning_rate": 2.4864206304888576e-06, "loss": 0.0319, "step": 51750 }, { "epoch": 2.627290725417534, "grad_norm": 0.34762755036354065, "learning_rate": 2.484728497216441e-06, "loss": 0.0285, "step": 51755 }, { "epoch": 2.627544545408396, "grad_norm": 0.4663408398628235, "learning_rate": 2.4830363639440242e-06, "loss": 0.0329, "step": 51760 }, { "epoch": 2.6277983653992587, "grad_norm": 0.3391950726509094, "learning_rate": 2.4813442306716078e-06, "loss": 0.0277, "step": 51765 }, { "epoch": 2.6280521853901213, "grad_norm": 0.27818772196769714, "learning_rate": 2.4796520973991913e-06, "loss": 0.0241, "step": 51770 }, { "epoch": 2.628306005380984, "grad_norm": 0.5784787535667419, "learning_rate": 2.477959964126775e-06, "loss": 0.0338, "step": 51775 }, { "epoch": 2.6285598253718465, "grad_norm": 0.25116094946861267, "learning_rate": 2.4762678308543584e-06, "loss": 0.0267, "step": 51780 }, { "epoch": 2.6288136453627087, "grad_norm": 0.4190313518047333, "learning_rate": 2.474575697581942e-06, "loss": 0.0366, "step": 51785 }, { "epoch": 2.6290674653535713, "grad_norm": 0.3404175341129303, "learning_rate": 2.4728835643095255e-06, "loss": 0.03, "step": 51790 }, { "epoch": 2.629321285344434, "grad_norm": 0.3389876186847687, "learning_rate": 2.4711914310371086e-06, "loss": 0.0292, "step": 51795 }, { "epoch": 2.629575105335296, "grad_norm": 0.8526211977005005, "learning_rate": 2.469499297764692e-06, "loss": 0.0283, "step": 51800 }, { "epoch": 2.6298289253261586, "grad_norm": 0.6599206924438477, "learning_rate": 2.4678071644922753e-06, "loss": 0.0448, "step": 51805 }, { "epoch": 2.6300827453170212, "grad_norm": 0.29481494426727295, "learning_rate": 2.466115031219859e-06, "loss": 0.0267, "step": 51810 }, { "epoch": 2.630336565307884, "grad_norm": 0.2825844883918762, "learning_rate": 2.4644228979474424e-06, "loss": 0.0232, "step": 51815 }, { "epoch": 2.630590385298746, "grad_norm": 0.3380671739578247, "learning_rate": 2.462730764675026e-06, "loss": 0.0391, "step": 51820 }, { "epoch": 2.6308442052896086, "grad_norm": 0.21934860944747925, "learning_rate": 2.4610386314026095e-06, "loss": 0.028, "step": 51825 }, { "epoch": 2.631098025280471, "grad_norm": 1.0363450050354004, "learning_rate": 2.459346498130193e-06, "loss": 0.0367, "step": 51830 }, { "epoch": 2.6313518452713334, "grad_norm": 0.4414408504962921, "learning_rate": 2.4576543648577765e-06, "loss": 0.0292, "step": 51835 }, { "epoch": 2.631605665262196, "grad_norm": 0.29103752970695496, "learning_rate": 2.4559622315853597e-06, "loss": 0.0279, "step": 51840 }, { "epoch": 2.6318594852530586, "grad_norm": 0.247410848736763, "learning_rate": 2.454270098312943e-06, "loss": 0.0352, "step": 51845 }, { "epoch": 2.632113305243921, "grad_norm": 0.36777010560035706, "learning_rate": 2.4525779650405268e-06, "loss": 0.0389, "step": 51850 }, { "epoch": 2.6323671252347833, "grad_norm": 0.3306533396244049, "learning_rate": 2.4508858317681103e-06, "loss": 0.0315, "step": 51855 }, { "epoch": 2.632620945225646, "grad_norm": 0.336128294467926, "learning_rate": 2.449193698495694e-06, "loss": 0.0365, "step": 51860 }, { "epoch": 2.6328747652165085, "grad_norm": 0.7833350896835327, "learning_rate": 2.4475015652232774e-06, "loss": 0.0352, "step": 51865 }, { "epoch": 2.6331285852073707, "grad_norm": 0.28254038095474243, "learning_rate": 2.445809431950861e-06, "loss": 0.0274, "step": 51870 }, { "epoch": 2.6333824051982333, "grad_norm": 0.21460244059562683, "learning_rate": 2.444117298678444e-06, "loss": 0.0301, "step": 51875 }, { "epoch": 2.633636225189096, "grad_norm": 0.2699926495552063, "learning_rate": 2.4424251654060276e-06, "loss": 0.0322, "step": 51880 }, { "epoch": 2.6338900451799585, "grad_norm": 0.20997563004493713, "learning_rate": 2.440733032133611e-06, "loss": 0.0222, "step": 51885 }, { "epoch": 2.634143865170821, "grad_norm": 0.5877103805541992, "learning_rate": 2.4390408988611943e-06, "loss": 0.037, "step": 51890 }, { "epoch": 2.6343976851616833, "grad_norm": 0.3468745946884155, "learning_rate": 2.437348765588778e-06, "loss": 0.0283, "step": 51895 }, { "epoch": 2.634651505152546, "grad_norm": 0.32325586676597595, "learning_rate": 2.4356566323163614e-06, "loss": 0.0345, "step": 51900 }, { "epoch": 2.634905325143408, "grad_norm": 0.3084450662136078, "learning_rate": 2.433964499043945e-06, "loss": 0.0383, "step": 51905 }, { "epoch": 2.6351591451342706, "grad_norm": 0.2576277554035187, "learning_rate": 2.4322723657715284e-06, "loss": 0.0308, "step": 51910 }, { "epoch": 2.6354129651251332, "grad_norm": 0.4685831367969513, "learning_rate": 2.430580232499112e-06, "loss": 0.0384, "step": 51915 }, { "epoch": 2.635666785115996, "grad_norm": 0.8822712302207947, "learning_rate": 2.428888099226695e-06, "loss": 0.0363, "step": 51920 }, { "epoch": 2.6359206051068584, "grad_norm": 0.2899301052093506, "learning_rate": 2.4271959659542787e-06, "loss": 0.0258, "step": 51925 }, { "epoch": 2.6361744250977206, "grad_norm": 0.40878719091415405, "learning_rate": 2.425503832681862e-06, "loss": 0.0341, "step": 51930 }, { "epoch": 2.636428245088583, "grad_norm": 0.37059518694877625, "learning_rate": 2.4238116994094457e-06, "loss": 0.0324, "step": 51935 }, { "epoch": 2.636682065079446, "grad_norm": 0.2958579957485199, "learning_rate": 2.4221195661370293e-06, "loss": 0.029, "step": 51940 }, { "epoch": 2.636935885070308, "grad_norm": 0.31784969568252563, "learning_rate": 2.420427432864613e-06, "loss": 0.024, "step": 51945 }, { "epoch": 2.6371897050611706, "grad_norm": 0.2704399526119232, "learning_rate": 2.4187352995921964e-06, "loss": 0.032, "step": 51950 }, { "epoch": 2.637443525052033, "grad_norm": 0.28617197275161743, "learning_rate": 2.4170431663197795e-06, "loss": 0.0315, "step": 51955 }, { "epoch": 2.6376973450428958, "grad_norm": 0.257956862449646, "learning_rate": 2.415351033047363e-06, "loss": 0.0271, "step": 51960 }, { "epoch": 2.637951165033758, "grad_norm": 0.25092050433158875, "learning_rate": 2.4136588997749466e-06, "loss": 0.0242, "step": 51965 }, { "epoch": 2.6382049850246205, "grad_norm": 0.36452189087867737, "learning_rate": 2.4119667665025297e-06, "loss": 0.0324, "step": 51970 }, { "epoch": 2.638458805015483, "grad_norm": 0.44480210542678833, "learning_rate": 2.4102746332301132e-06, "loss": 0.034, "step": 51975 }, { "epoch": 2.6387126250063453, "grad_norm": 0.1927006095647812, "learning_rate": 2.408582499957697e-06, "loss": 0.0254, "step": 51980 }, { "epoch": 2.638966444997208, "grad_norm": 0.2507188022136688, "learning_rate": 2.4068903666852803e-06, "loss": 0.0291, "step": 51985 }, { "epoch": 2.6392202649880705, "grad_norm": 0.3480997681617737, "learning_rate": 2.405198233412864e-06, "loss": 0.0336, "step": 51990 }, { "epoch": 2.639474084978933, "grad_norm": 0.28692418336868286, "learning_rate": 2.4035061001404474e-06, "loss": 0.0247, "step": 51995 }, { "epoch": 2.6397279049697953, "grad_norm": 0.3567029535770416, "learning_rate": 2.4018139668680305e-06, "loss": 0.0361, "step": 52000 }, { "epoch": 2.639981724960658, "grad_norm": 0.24052301049232483, "learning_rate": 2.400121833595614e-06, "loss": 0.0249, "step": 52005 }, { "epoch": 2.6402355449515205, "grad_norm": 0.34244805574417114, "learning_rate": 2.3984297003231976e-06, "loss": 0.0312, "step": 52010 }, { "epoch": 2.6404893649423826, "grad_norm": 1.291332483291626, "learning_rate": 2.396737567050781e-06, "loss": 0.032, "step": 52015 }, { "epoch": 2.6407431849332452, "grad_norm": 0.32589441537857056, "learning_rate": 2.3950454337783647e-06, "loss": 0.0273, "step": 52020 }, { "epoch": 2.640997004924108, "grad_norm": 0.2426830381155014, "learning_rate": 2.3933533005059483e-06, "loss": 0.0274, "step": 52025 }, { "epoch": 2.6412508249149704, "grad_norm": 0.2730412483215332, "learning_rate": 2.3916611672335314e-06, "loss": 0.0346, "step": 52030 }, { "epoch": 2.641504644905833, "grad_norm": 0.23146089911460876, "learning_rate": 2.389969033961115e-06, "loss": 0.0236, "step": 52035 }, { "epoch": 2.641758464896695, "grad_norm": 0.20168402791023254, "learning_rate": 2.3882769006886985e-06, "loss": 0.0281, "step": 52040 }, { "epoch": 2.642012284887558, "grad_norm": 0.3644895851612091, "learning_rate": 2.386584767416282e-06, "loss": 0.0337, "step": 52045 }, { "epoch": 2.64226610487842, "grad_norm": 0.47276461124420166, "learning_rate": 2.384892634143865e-06, "loss": 0.0304, "step": 52050 }, { "epoch": 2.6425199248692826, "grad_norm": 0.3690647780895233, "learning_rate": 2.3832005008714487e-06, "loss": 0.032, "step": 52055 }, { "epoch": 2.642773744860145, "grad_norm": 0.33413854241371155, "learning_rate": 2.3815083675990322e-06, "loss": 0.0267, "step": 52060 }, { "epoch": 2.6430275648510078, "grad_norm": 1.1163268089294434, "learning_rate": 2.3798162343266158e-06, "loss": 0.0275, "step": 52065 }, { "epoch": 2.6432813848418704, "grad_norm": 0.34472498297691345, "learning_rate": 2.3781241010541993e-06, "loss": 0.0305, "step": 52070 }, { "epoch": 2.6435352048327325, "grad_norm": 0.2940658926963806, "learning_rate": 2.3764319677817824e-06, "loss": 0.0293, "step": 52075 }, { "epoch": 2.643789024823595, "grad_norm": 0.24414999783039093, "learning_rate": 2.374739834509366e-06, "loss": 0.025, "step": 52080 }, { "epoch": 2.6440428448144577, "grad_norm": 0.2933432161808014, "learning_rate": 2.3730477012369495e-06, "loss": 0.0302, "step": 52085 }, { "epoch": 2.64429666480532, "grad_norm": 0.5342914462089539, "learning_rate": 2.371355567964533e-06, "loss": 0.0273, "step": 52090 }, { "epoch": 2.6445504847961825, "grad_norm": 0.35974082350730896, "learning_rate": 2.3696634346921166e-06, "loss": 0.0255, "step": 52095 }, { "epoch": 2.644804304787045, "grad_norm": 0.27689170837402344, "learning_rate": 2.3679713014197e-06, "loss": 0.0277, "step": 52100 }, { "epoch": 2.6450581247779077, "grad_norm": 0.5819672346115112, "learning_rate": 2.3662791681472837e-06, "loss": 0.0255, "step": 52105 }, { "epoch": 2.64531194476877, "grad_norm": 0.3903255760669708, "learning_rate": 2.364587034874867e-06, "loss": 0.0361, "step": 52110 }, { "epoch": 2.6455657647596325, "grad_norm": 0.2258889079093933, "learning_rate": 2.3628949016024504e-06, "loss": 0.0289, "step": 52115 }, { "epoch": 2.645819584750495, "grad_norm": 0.32111355662345886, "learning_rate": 2.361202768330034e-06, "loss": 0.023, "step": 52120 }, { "epoch": 2.646073404741357, "grad_norm": 0.2620217800140381, "learning_rate": 2.3595106350576175e-06, "loss": 0.0259, "step": 52125 }, { "epoch": 2.64632722473222, "grad_norm": 0.3304223120212555, "learning_rate": 2.3578185017852006e-06, "loss": 0.0291, "step": 52130 }, { "epoch": 2.6465810447230824, "grad_norm": 0.2463671863079071, "learning_rate": 2.356126368512784e-06, "loss": 0.0274, "step": 52135 }, { "epoch": 2.646834864713945, "grad_norm": 0.4745607376098633, "learning_rate": 2.3544342352403677e-06, "loss": 0.0259, "step": 52140 }, { "epoch": 2.6470886847048076, "grad_norm": 0.18332301080226898, "learning_rate": 2.3527421019679512e-06, "loss": 0.0306, "step": 52145 }, { "epoch": 2.64734250469567, "grad_norm": 0.37587037682533264, "learning_rate": 2.3510499686955348e-06, "loss": 0.0299, "step": 52150 }, { "epoch": 2.6475963246865324, "grad_norm": 0.4496591091156006, "learning_rate": 2.349357835423118e-06, "loss": 0.0279, "step": 52155 }, { "epoch": 2.6478501446773945, "grad_norm": 0.6839533448219299, "learning_rate": 2.3476657021507014e-06, "loss": 0.0284, "step": 52160 }, { "epoch": 2.648103964668257, "grad_norm": 0.32634589076042175, "learning_rate": 2.345973568878285e-06, "loss": 0.0281, "step": 52165 }, { "epoch": 2.6483577846591198, "grad_norm": 0.2792976200580597, "learning_rate": 2.3442814356058685e-06, "loss": 0.0332, "step": 52170 }, { "epoch": 2.6486116046499824, "grad_norm": 0.4249747693538666, "learning_rate": 2.342589302333452e-06, "loss": 0.0297, "step": 52175 }, { "epoch": 2.648865424640845, "grad_norm": 0.28616654872894287, "learning_rate": 2.3408971690610356e-06, "loss": 0.0327, "step": 52180 }, { "epoch": 2.649119244631707, "grad_norm": 0.2563963830471039, "learning_rate": 2.339205035788619e-06, "loss": 0.0272, "step": 52185 }, { "epoch": 2.6493730646225697, "grad_norm": 0.4164884388446808, "learning_rate": 2.3375129025162023e-06, "loss": 0.0285, "step": 52190 }, { "epoch": 2.649626884613432, "grad_norm": 0.4970816969871521, "learning_rate": 2.335820769243786e-06, "loss": 0.0301, "step": 52195 }, { "epoch": 2.6498807046042945, "grad_norm": 0.2564595341682434, "learning_rate": 2.3341286359713694e-06, "loss": 0.0295, "step": 52200 }, { "epoch": 2.650134524595157, "grad_norm": 0.343791127204895, "learning_rate": 2.332436502698953e-06, "loss": 0.0295, "step": 52205 }, { "epoch": 2.6503883445860197, "grad_norm": 0.2889670133590698, "learning_rate": 2.330744369426536e-06, "loss": 0.0255, "step": 52210 }, { "epoch": 2.6506421645768823, "grad_norm": 0.5983048677444458, "learning_rate": 2.3290522361541196e-06, "loss": 0.0273, "step": 52215 }, { "epoch": 2.6508959845677444, "grad_norm": 0.5745328664779663, "learning_rate": 2.327360102881703e-06, "loss": 0.0264, "step": 52220 }, { "epoch": 2.651149804558607, "grad_norm": 0.28300371766090393, "learning_rate": 2.3256679696092867e-06, "loss": 0.0311, "step": 52225 }, { "epoch": 2.6514036245494697, "grad_norm": 0.2970012128353119, "learning_rate": 2.32397583633687e-06, "loss": 0.0338, "step": 52230 }, { "epoch": 2.651657444540332, "grad_norm": 0.4181409776210785, "learning_rate": 2.3222837030644533e-06, "loss": 0.026, "step": 52235 }, { "epoch": 2.6519112645311944, "grad_norm": 0.31793472170829773, "learning_rate": 2.320591569792037e-06, "loss": 0.0355, "step": 52240 }, { "epoch": 2.652165084522057, "grad_norm": 0.3255579173564911, "learning_rate": 2.3188994365196204e-06, "loss": 0.0306, "step": 52245 }, { "epoch": 2.6524189045129196, "grad_norm": 0.35661157965660095, "learning_rate": 2.317207303247204e-06, "loss": 0.0335, "step": 52250 }, { "epoch": 2.6526727245037818, "grad_norm": 0.2453710436820984, "learning_rate": 2.3155151699747875e-06, "loss": 0.0285, "step": 52255 }, { "epoch": 2.6529265444946444, "grad_norm": 0.36720964312553406, "learning_rate": 2.313823036702371e-06, "loss": 0.034, "step": 52260 }, { "epoch": 2.653180364485507, "grad_norm": 0.32312971353530884, "learning_rate": 2.3121309034299546e-06, "loss": 0.0299, "step": 52265 }, { "epoch": 2.653434184476369, "grad_norm": 0.39574339985847473, "learning_rate": 2.3104387701575377e-06, "loss": 0.0318, "step": 52270 }, { "epoch": 2.6536880044672317, "grad_norm": 0.21212691068649292, "learning_rate": 2.3087466368851213e-06, "loss": 0.0292, "step": 52275 }, { "epoch": 2.6539418244580943, "grad_norm": 0.2754327356815338, "learning_rate": 2.307054503612705e-06, "loss": 0.0278, "step": 52280 }, { "epoch": 2.654195644448957, "grad_norm": 0.3506637513637543, "learning_rate": 2.3053623703402884e-06, "loss": 0.0373, "step": 52285 }, { "epoch": 2.6544494644398196, "grad_norm": 0.3828669786453247, "learning_rate": 2.3036702370678715e-06, "loss": 0.0338, "step": 52290 }, { "epoch": 2.6547032844306817, "grad_norm": 0.31429722905158997, "learning_rate": 2.301978103795455e-06, "loss": 0.0328, "step": 52295 }, { "epoch": 2.6549571044215443, "grad_norm": 0.3292172849178314, "learning_rate": 2.3002859705230386e-06, "loss": 0.0309, "step": 52300 }, { "epoch": 2.6552109244124065, "grad_norm": 0.3343614339828491, "learning_rate": 2.298593837250622e-06, "loss": 0.0296, "step": 52305 }, { "epoch": 2.655464744403269, "grad_norm": 0.245424285531044, "learning_rate": 2.2969017039782057e-06, "loss": 0.0309, "step": 52310 }, { "epoch": 2.6557185643941317, "grad_norm": 0.3650681674480438, "learning_rate": 2.2952095707057888e-06, "loss": 0.0319, "step": 52315 }, { "epoch": 2.6559723843849943, "grad_norm": 0.5489715337753296, "learning_rate": 2.2935174374333723e-06, "loss": 0.0289, "step": 52320 }, { "epoch": 2.656226204375857, "grad_norm": 0.6062617301940918, "learning_rate": 2.291825304160956e-06, "loss": 0.0304, "step": 52325 }, { "epoch": 2.656480024366719, "grad_norm": 0.27395087480545044, "learning_rate": 2.2901331708885394e-06, "loss": 0.0307, "step": 52330 }, { "epoch": 2.6567338443575816, "grad_norm": 0.2297271490097046, "learning_rate": 2.288441037616123e-06, "loss": 0.0285, "step": 52335 }, { "epoch": 2.6569876643484442, "grad_norm": 0.32028794288635254, "learning_rate": 2.2867489043437065e-06, "loss": 0.0345, "step": 52340 }, { "epoch": 2.6572414843393064, "grad_norm": 0.20256635546684265, "learning_rate": 2.2850567710712896e-06, "loss": 0.027, "step": 52345 }, { "epoch": 2.657495304330169, "grad_norm": 0.7101668119430542, "learning_rate": 2.283364637798873e-06, "loss": 0.0304, "step": 52350 }, { "epoch": 2.6577491243210316, "grad_norm": 0.2871030271053314, "learning_rate": 2.2816725045264567e-06, "loss": 0.0293, "step": 52355 }, { "epoch": 2.658002944311894, "grad_norm": 0.39420485496520996, "learning_rate": 2.2799803712540402e-06, "loss": 0.0349, "step": 52360 }, { "epoch": 2.6582567643027564, "grad_norm": 0.2553163468837738, "learning_rate": 2.278288237981624e-06, "loss": 0.0279, "step": 52365 }, { "epoch": 2.658510584293619, "grad_norm": 0.24502582848072052, "learning_rate": 2.2765961047092073e-06, "loss": 0.028, "step": 52370 }, { "epoch": 2.6587644042844816, "grad_norm": 0.41072553396224976, "learning_rate": 2.2749039714367905e-06, "loss": 0.0246, "step": 52375 }, { "epoch": 2.6590182242753437, "grad_norm": 0.7050197720527649, "learning_rate": 2.273211838164374e-06, "loss": 0.0271, "step": 52380 }, { "epoch": 2.6592720442662063, "grad_norm": 0.6191830039024353, "learning_rate": 2.2715197048919575e-06, "loss": 0.0309, "step": 52385 }, { "epoch": 2.659525864257069, "grad_norm": 0.22344526648521423, "learning_rate": 2.2698275716195407e-06, "loss": 0.0266, "step": 52390 }, { "epoch": 2.6597796842479315, "grad_norm": 0.3696863353252411, "learning_rate": 2.2681354383471242e-06, "loss": 0.0291, "step": 52395 }, { "epoch": 2.6600335042387937, "grad_norm": 0.4067589044570923, "learning_rate": 2.2664433050747078e-06, "loss": 0.0308, "step": 52400 }, { "epoch": 2.6602873242296563, "grad_norm": 0.2778363525867462, "learning_rate": 2.2647511718022913e-06, "loss": 0.0253, "step": 52405 }, { "epoch": 2.660541144220519, "grad_norm": 0.40471798181533813, "learning_rate": 2.263059038529875e-06, "loss": 0.029, "step": 52410 }, { "epoch": 2.660794964211381, "grad_norm": 0.31421124935150146, "learning_rate": 2.2613669052574584e-06, "loss": 0.0298, "step": 52415 }, { "epoch": 2.6610487842022437, "grad_norm": 0.39784589409828186, "learning_rate": 2.259674771985042e-06, "loss": 0.0369, "step": 52420 }, { "epoch": 2.6613026041931063, "grad_norm": 0.3476801812648773, "learning_rate": 2.257982638712625e-06, "loss": 0.0311, "step": 52425 }, { "epoch": 2.661556424183969, "grad_norm": 0.3795233368873596, "learning_rate": 2.2562905054402086e-06, "loss": 0.039, "step": 52430 }, { "epoch": 2.6618102441748315, "grad_norm": 0.6271621584892273, "learning_rate": 2.254598372167792e-06, "loss": 0.0313, "step": 52435 }, { "epoch": 2.6620640641656936, "grad_norm": 0.3377338647842407, "learning_rate": 2.2529062388953757e-06, "loss": 0.0317, "step": 52440 }, { "epoch": 2.6623178841565562, "grad_norm": 0.3012075126171112, "learning_rate": 2.2512141056229592e-06, "loss": 0.0316, "step": 52445 }, { "epoch": 2.6625717041474184, "grad_norm": 0.3194831311702728, "learning_rate": 2.2495219723505428e-06, "loss": 0.0339, "step": 52450 }, { "epoch": 2.662825524138281, "grad_norm": 0.3819662630558014, "learning_rate": 2.247829839078126e-06, "loss": 0.0286, "step": 52455 }, { "epoch": 2.6630793441291436, "grad_norm": 0.24752876162528992, "learning_rate": 2.2461377058057094e-06, "loss": 0.0328, "step": 52460 }, { "epoch": 2.663333164120006, "grad_norm": 0.3253592550754547, "learning_rate": 2.244445572533293e-06, "loss": 0.0269, "step": 52465 }, { "epoch": 2.663586984110869, "grad_norm": 0.3416394591331482, "learning_rate": 2.242753439260876e-06, "loss": 0.028, "step": 52470 }, { "epoch": 2.663840804101731, "grad_norm": 0.3783033490180969, "learning_rate": 2.2410613059884597e-06, "loss": 0.0319, "step": 52475 }, { "epoch": 2.6640946240925936, "grad_norm": 0.43123677372932434, "learning_rate": 2.239369172716043e-06, "loss": 0.0392, "step": 52480 }, { "epoch": 2.664348444083456, "grad_norm": 0.28624406456947327, "learning_rate": 2.2376770394436267e-06, "loss": 0.026, "step": 52485 }, { "epoch": 2.6646022640743183, "grad_norm": 0.3098219335079193, "learning_rate": 2.2359849061712103e-06, "loss": 0.0311, "step": 52490 }, { "epoch": 2.664856084065181, "grad_norm": 0.3867039978504181, "learning_rate": 2.234292772898794e-06, "loss": 0.0354, "step": 52495 }, { "epoch": 2.6651099040560435, "grad_norm": 0.7705091834068298, "learning_rate": 2.2326006396263774e-06, "loss": 0.0286, "step": 52500 }, { "epoch": 2.665363724046906, "grad_norm": 0.3141399025917053, "learning_rate": 2.2309085063539605e-06, "loss": 0.0261, "step": 52505 }, { "epoch": 2.6656175440377683, "grad_norm": 0.9174738526344299, "learning_rate": 2.229216373081544e-06, "loss": 0.0257, "step": 52510 }, { "epoch": 2.665871364028631, "grad_norm": 0.25226977467536926, "learning_rate": 2.2275242398091276e-06, "loss": 0.0275, "step": 52515 }, { "epoch": 2.6661251840194935, "grad_norm": 0.7154881358146667, "learning_rate": 2.225832106536711e-06, "loss": 0.0298, "step": 52520 }, { "epoch": 2.6663790040103557, "grad_norm": 0.5491287708282471, "learning_rate": 2.2241399732642947e-06, "loss": 0.0268, "step": 52525 }, { "epoch": 2.6666328240012183, "grad_norm": 0.24701310694217682, "learning_rate": 2.2224478399918782e-06, "loss": 0.0329, "step": 52530 }, { "epoch": 2.666886643992081, "grad_norm": 1.1633200645446777, "learning_rate": 2.2207557067194613e-06, "loss": 0.0353, "step": 52535 }, { "epoch": 2.6671404639829435, "grad_norm": 0.30238714814186096, "learning_rate": 2.219063573447045e-06, "loss": 0.0331, "step": 52540 }, { "epoch": 2.6673942839738056, "grad_norm": 0.33450546860694885, "learning_rate": 2.2173714401746284e-06, "loss": 0.0367, "step": 52545 }, { "epoch": 2.6676481039646682, "grad_norm": 0.5556016564369202, "learning_rate": 2.2156793069022116e-06, "loss": 0.0366, "step": 52550 }, { "epoch": 2.667901923955531, "grad_norm": 0.30191656947135925, "learning_rate": 2.213987173629795e-06, "loss": 0.0265, "step": 52555 }, { "epoch": 2.668155743946393, "grad_norm": 0.40027421712875366, "learning_rate": 2.2122950403573786e-06, "loss": 0.0304, "step": 52560 }, { "epoch": 2.6684095639372556, "grad_norm": 0.2920042872428894, "learning_rate": 2.210602907084962e-06, "loss": 0.0322, "step": 52565 }, { "epoch": 2.668663383928118, "grad_norm": 0.29836907982826233, "learning_rate": 2.2089107738125457e-06, "loss": 0.036, "step": 52570 }, { "epoch": 2.668917203918981, "grad_norm": 0.2802891135215759, "learning_rate": 2.2072186405401293e-06, "loss": 0.0324, "step": 52575 }, { "epoch": 2.6691710239098434, "grad_norm": 0.282135933637619, "learning_rate": 2.205526507267713e-06, "loss": 0.0262, "step": 52580 }, { "epoch": 2.6694248439007056, "grad_norm": 0.35920336842536926, "learning_rate": 2.203834373995296e-06, "loss": 0.0236, "step": 52585 }, { "epoch": 2.669678663891568, "grad_norm": 0.44473469257354736, "learning_rate": 2.2021422407228795e-06, "loss": 0.03, "step": 52590 }, { "epoch": 2.6699324838824303, "grad_norm": 0.37438032031059265, "learning_rate": 2.200450107450463e-06, "loss": 0.0303, "step": 52595 }, { "epoch": 2.670186303873293, "grad_norm": 0.7606229782104492, "learning_rate": 2.1987579741780466e-06, "loss": 0.0332, "step": 52600 }, { "epoch": 2.6704401238641555, "grad_norm": 0.2716493606567383, "learning_rate": 2.19706584090563e-06, "loss": 0.0284, "step": 52605 }, { "epoch": 2.670693943855018, "grad_norm": 0.38287749886512756, "learning_rate": 2.1953737076332137e-06, "loss": 0.0348, "step": 52610 }, { "epoch": 2.6709477638458807, "grad_norm": 0.31366923451423645, "learning_rate": 2.1936815743607968e-06, "loss": 0.0283, "step": 52615 }, { "epoch": 2.671201583836743, "grad_norm": 0.3830898106098175, "learning_rate": 2.1919894410883803e-06, "loss": 0.0251, "step": 52620 }, { "epoch": 2.6714554038276055, "grad_norm": 0.2692367434501648, "learning_rate": 2.190297307815964e-06, "loss": 0.0276, "step": 52625 }, { "epoch": 2.671709223818468, "grad_norm": 0.27730458974838257, "learning_rate": 2.188605174543547e-06, "loss": 0.026, "step": 52630 }, { "epoch": 2.6719630438093303, "grad_norm": 0.36511486768722534, "learning_rate": 2.1869130412711305e-06, "loss": 0.0258, "step": 52635 }, { "epoch": 2.672216863800193, "grad_norm": 0.3859044909477234, "learning_rate": 2.185220907998714e-06, "loss": 0.0237, "step": 52640 }, { "epoch": 2.6724706837910555, "grad_norm": 0.35351133346557617, "learning_rate": 2.1835287747262976e-06, "loss": 0.0282, "step": 52645 }, { "epoch": 2.672724503781918, "grad_norm": 0.301961749792099, "learning_rate": 2.181836641453881e-06, "loss": 0.0298, "step": 52650 }, { "epoch": 2.67297832377278, "grad_norm": 0.3102557063102722, "learning_rate": 2.1801445081814647e-06, "loss": 0.0286, "step": 52655 }, { "epoch": 2.673232143763643, "grad_norm": 0.3127071261405945, "learning_rate": 2.178452374909048e-06, "loss": 0.033, "step": 52660 }, { "epoch": 2.6734859637545054, "grad_norm": 0.24851199984550476, "learning_rate": 2.1767602416366314e-06, "loss": 0.0271, "step": 52665 }, { "epoch": 2.6737397837453676, "grad_norm": 0.2732860743999481, "learning_rate": 2.175068108364215e-06, "loss": 0.0266, "step": 52670 }, { "epoch": 2.67399360373623, "grad_norm": 0.40874701738357544, "learning_rate": 2.1733759750917985e-06, "loss": 0.0247, "step": 52675 }, { "epoch": 2.674247423727093, "grad_norm": 0.18156781792640686, "learning_rate": 2.171683841819382e-06, "loss": 0.0251, "step": 52680 }, { "epoch": 2.6745012437179554, "grad_norm": 0.4294818043708801, "learning_rate": 2.1699917085469656e-06, "loss": 0.0282, "step": 52685 }, { "epoch": 2.6747550637088175, "grad_norm": 0.4318823218345642, "learning_rate": 2.168299575274549e-06, "loss": 0.0325, "step": 52690 }, { "epoch": 2.67500888369968, "grad_norm": 0.2763345241546631, "learning_rate": 2.1666074420021322e-06, "loss": 0.0276, "step": 52695 }, { "epoch": 2.6752627036905428, "grad_norm": 0.3835084140300751, "learning_rate": 2.1649153087297158e-06, "loss": 0.035, "step": 52700 }, { "epoch": 2.675516523681405, "grad_norm": 0.3311332166194916, "learning_rate": 2.163223175457299e-06, "loss": 0.0235, "step": 52705 }, { "epoch": 2.6757703436722675, "grad_norm": 0.2773440182209015, "learning_rate": 2.1615310421848824e-06, "loss": 0.0273, "step": 52710 }, { "epoch": 2.67602416366313, "grad_norm": 0.508554995059967, "learning_rate": 2.159838908912466e-06, "loss": 0.0305, "step": 52715 }, { "epoch": 2.6762779836539927, "grad_norm": 0.24255989491939545, "learning_rate": 2.1581467756400495e-06, "loss": 0.0314, "step": 52720 }, { "epoch": 2.6765318036448553, "grad_norm": 0.37368252873420715, "learning_rate": 2.156454642367633e-06, "loss": 0.0298, "step": 52725 }, { "epoch": 2.6767856236357175, "grad_norm": 0.41288262605667114, "learning_rate": 2.1547625090952166e-06, "loss": 0.0274, "step": 52730 }, { "epoch": 2.67703944362658, "grad_norm": 0.3736118674278259, "learning_rate": 2.1530703758228e-06, "loss": 0.0297, "step": 52735 }, { "epoch": 2.6772932636174422, "grad_norm": 0.3442663252353668, "learning_rate": 2.1513782425503833e-06, "loss": 0.0319, "step": 52740 }, { "epoch": 2.677547083608305, "grad_norm": 0.2854013741016388, "learning_rate": 2.149686109277967e-06, "loss": 0.0279, "step": 52745 }, { "epoch": 2.6778009035991674, "grad_norm": 0.2770628333091736, "learning_rate": 2.1479939760055504e-06, "loss": 0.031, "step": 52750 }, { "epoch": 2.67805472359003, "grad_norm": 0.2934173345565796, "learning_rate": 2.146301842733134e-06, "loss": 0.0259, "step": 52755 }, { "epoch": 2.6783085435808927, "grad_norm": 0.24669590592384338, "learning_rate": 2.1446097094607175e-06, "loss": 0.0308, "step": 52760 }, { "epoch": 2.678562363571755, "grad_norm": 0.5056079030036926, "learning_rate": 2.142917576188301e-06, "loss": 0.028, "step": 52765 }, { "epoch": 2.6788161835626174, "grad_norm": 0.4430501163005829, "learning_rate": 2.1412254429158845e-06, "loss": 0.0335, "step": 52770 }, { "epoch": 2.67907000355348, "grad_norm": 0.41625797748565674, "learning_rate": 2.1395333096434677e-06, "loss": 0.0381, "step": 52775 }, { "epoch": 2.679323823544342, "grad_norm": 0.2931457757949829, "learning_rate": 2.1378411763710512e-06, "loss": 0.0279, "step": 52780 }, { "epoch": 2.679577643535205, "grad_norm": 0.28468772768974304, "learning_rate": 2.1361490430986343e-06, "loss": 0.0296, "step": 52785 }, { "epoch": 2.6798314635260674, "grad_norm": 0.8410632610321045, "learning_rate": 2.134456909826218e-06, "loss": 0.0332, "step": 52790 }, { "epoch": 2.68008528351693, "grad_norm": 0.31851619482040405, "learning_rate": 2.1327647765538014e-06, "loss": 0.0326, "step": 52795 }, { "epoch": 2.680339103507792, "grad_norm": 0.3129703104496002, "learning_rate": 2.131072643281385e-06, "loss": 0.0289, "step": 52800 }, { "epoch": 2.6805929234986547, "grad_norm": 0.3168695271015167, "learning_rate": 2.1293805100089685e-06, "loss": 0.0252, "step": 52805 }, { "epoch": 2.6808467434895173, "grad_norm": 0.4538348615169525, "learning_rate": 2.127688376736552e-06, "loss": 0.0294, "step": 52810 }, { "epoch": 2.6811005634803795, "grad_norm": 0.9396774768829346, "learning_rate": 2.1259962434641356e-06, "loss": 0.0286, "step": 52815 }, { "epoch": 2.681354383471242, "grad_norm": 0.3485766351222992, "learning_rate": 2.1243041101917187e-06, "loss": 0.0326, "step": 52820 }, { "epoch": 2.6816082034621047, "grad_norm": 0.27177703380584717, "learning_rate": 2.1226119769193023e-06, "loss": 0.0305, "step": 52825 }, { "epoch": 2.6818620234529673, "grad_norm": 0.3578769862651825, "learning_rate": 2.120919843646886e-06, "loss": 0.0273, "step": 52830 }, { "epoch": 2.6821158434438295, "grad_norm": 0.2714287042617798, "learning_rate": 2.1192277103744694e-06, "loss": 0.0323, "step": 52835 }, { "epoch": 2.682369663434692, "grad_norm": 0.22217968106269836, "learning_rate": 2.117535577102053e-06, "loss": 0.0334, "step": 52840 }, { "epoch": 2.6826234834255547, "grad_norm": 0.2560397684574127, "learning_rate": 2.1158434438296364e-06, "loss": 0.032, "step": 52845 }, { "epoch": 2.682877303416417, "grad_norm": 0.4822799861431122, "learning_rate": 2.11415131055722e-06, "loss": 0.0314, "step": 52850 }, { "epoch": 2.6831311234072794, "grad_norm": 0.37949657440185547, "learning_rate": 2.112459177284803e-06, "loss": 0.0335, "step": 52855 }, { "epoch": 2.683384943398142, "grad_norm": 0.4751673638820648, "learning_rate": 2.1107670440123867e-06, "loss": 0.0316, "step": 52860 }, { "epoch": 2.6836387633890046, "grad_norm": 0.17257443070411682, "learning_rate": 2.1090749107399698e-06, "loss": 0.0286, "step": 52865 }, { "epoch": 2.6838925833798672, "grad_norm": 0.28788110613822937, "learning_rate": 2.1073827774675533e-06, "loss": 0.0273, "step": 52870 }, { "epoch": 2.6841464033707294, "grad_norm": 0.37915948033332825, "learning_rate": 2.105690644195137e-06, "loss": 0.0249, "step": 52875 }, { "epoch": 2.684400223361592, "grad_norm": 0.29168131947517395, "learning_rate": 2.1039985109227204e-06, "loss": 0.0308, "step": 52880 }, { "epoch": 2.684654043352454, "grad_norm": 0.43253839015960693, "learning_rate": 2.102306377650304e-06, "loss": 0.0289, "step": 52885 }, { "epoch": 2.6849078633433168, "grad_norm": 0.3503051698207855, "learning_rate": 2.1006142443778875e-06, "loss": 0.0344, "step": 52890 }, { "epoch": 2.6851616833341794, "grad_norm": 0.30547627806663513, "learning_rate": 2.098922111105471e-06, "loss": 0.0327, "step": 52895 }, { "epoch": 2.685415503325042, "grad_norm": 0.5834780931472778, "learning_rate": 2.097229977833054e-06, "loss": 0.0353, "step": 52900 }, { "epoch": 2.6856693233159046, "grad_norm": 0.22508762776851654, "learning_rate": 2.0955378445606377e-06, "loss": 0.0277, "step": 52905 }, { "epoch": 2.6859231433067667, "grad_norm": 0.36343008279800415, "learning_rate": 2.0938457112882213e-06, "loss": 0.0336, "step": 52910 }, { "epoch": 2.6861769632976293, "grad_norm": 0.3569226861000061, "learning_rate": 2.092153578015805e-06, "loss": 0.0343, "step": 52915 }, { "epoch": 2.686430783288492, "grad_norm": 0.5335173010826111, "learning_rate": 2.0904614447433883e-06, "loss": 0.0273, "step": 52920 }, { "epoch": 2.686684603279354, "grad_norm": 2.068007707595825, "learning_rate": 2.088769311470972e-06, "loss": 0.0362, "step": 52925 }, { "epoch": 2.6869384232702167, "grad_norm": 0.3901418447494507, "learning_rate": 2.087077178198555e-06, "loss": 0.0302, "step": 52930 }, { "epoch": 2.6871922432610793, "grad_norm": 0.4862616956233978, "learning_rate": 2.0853850449261386e-06, "loss": 0.0289, "step": 52935 }, { "epoch": 2.687446063251942, "grad_norm": 0.38844648003578186, "learning_rate": 2.083692911653722e-06, "loss": 0.0397, "step": 52940 }, { "epoch": 2.687699883242804, "grad_norm": 0.3375749886035919, "learning_rate": 2.0820007783813052e-06, "loss": 0.0247, "step": 52945 }, { "epoch": 2.6879537032336667, "grad_norm": 0.6121558547019958, "learning_rate": 2.0803086451088888e-06, "loss": 0.0341, "step": 52950 }, { "epoch": 2.6882075232245293, "grad_norm": 0.4033074676990509, "learning_rate": 2.0786165118364723e-06, "loss": 0.033, "step": 52955 }, { "epoch": 2.6884613432153914, "grad_norm": 0.27083489298820496, "learning_rate": 2.076924378564056e-06, "loss": 0.0344, "step": 52960 }, { "epoch": 2.688715163206254, "grad_norm": 0.2858365774154663, "learning_rate": 2.0752322452916394e-06, "loss": 0.0369, "step": 52965 }, { "epoch": 2.6889689831971166, "grad_norm": 0.4232316315174103, "learning_rate": 2.073540112019223e-06, "loss": 0.0317, "step": 52970 }, { "epoch": 2.6892228031879792, "grad_norm": 0.21906213462352753, "learning_rate": 2.071847978746806e-06, "loss": 0.03, "step": 52975 }, { "epoch": 2.689476623178842, "grad_norm": 0.27975738048553467, "learning_rate": 2.0701558454743896e-06, "loss": 0.0287, "step": 52980 }, { "epoch": 2.689730443169704, "grad_norm": 0.23709796369075775, "learning_rate": 2.068463712201973e-06, "loss": 0.0282, "step": 52985 }, { "epoch": 2.6899842631605666, "grad_norm": 0.23567497730255127, "learning_rate": 2.0667715789295567e-06, "loss": 0.0268, "step": 52990 }, { "epoch": 2.6902380831514288, "grad_norm": 0.2758468985557556, "learning_rate": 2.0650794456571402e-06, "loss": 0.0262, "step": 52995 }, { "epoch": 2.6904919031422914, "grad_norm": 0.3465255796909332, "learning_rate": 2.0633873123847238e-06, "loss": 0.0298, "step": 53000 }, { "epoch": 2.690745723133154, "grad_norm": 0.44373437762260437, "learning_rate": 2.0616951791123073e-06, "loss": 0.0269, "step": 53005 }, { "epoch": 2.6909995431240166, "grad_norm": 0.26769983768463135, "learning_rate": 2.0600030458398905e-06, "loss": 0.0326, "step": 53010 }, { "epoch": 2.691253363114879, "grad_norm": 0.38085025548934937, "learning_rate": 2.058310912567474e-06, "loss": 0.0337, "step": 53015 }, { "epoch": 2.6915071831057413, "grad_norm": 0.306956022977829, "learning_rate": 2.056618779295057e-06, "loss": 0.0285, "step": 53020 }, { "epoch": 2.691761003096604, "grad_norm": 0.3227962255477905, "learning_rate": 2.0549266460226407e-06, "loss": 0.0244, "step": 53025 }, { "epoch": 2.6920148230874665, "grad_norm": 0.3663029372692108, "learning_rate": 2.053234512750224e-06, "loss": 0.0341, "step": 53030 }, { "epoch": 2.6922686430783287, "grad_norm": 0.30087295174598694, "learning_rate": 2.0515423794778078e-06, "loss": 0.0226, "step": 53035 }, { "epoch": 2.6925224630691913, "grad_norm": 0.3504243493080139, "learning_rate": 2.0498502462053913e-06, "loss": 0.0256, "step": 53040 }, { "epoch": 2.692776283060054, "grad_norm": 0.28184401988983154, "learning_rate": 2.048158112932975e-06, "loss": 0.0293, "step": 53045 }, { "epoch": 2.6930301030509165, "grad_norm": 0.42434102296829224, "learning_rate": 2.0464659796605584e-06, "loss": 0.035, "step": 53050 }, { "epoch": 2.6932839230417787, "grad_norm": 0.3815975785255432, "learning_rate": 2.0447738463881415e-06, "loss": 0.0282, "step": 53055 }, { "epoch": 2.6935377430326413, "grad_norm": 0.25539329648017883, "learning_rate": 2.043081713115725e-06, "loss": 0.027, "step": 53060 }, { "epoch": 2.693791563023504, "grad_norm": 0.36869537830352783, "learning_rate": 2.0413895798433086e-06, "loss": 0.033, "step": 53065 }, { "epoch": 2.694045383014366, "grad_norm": 0.5211175680160522, "learning_rate": 2.039697446570892e-06, "loss": 0.0354, "step": 53070 }, { "epoch": 2.6942992030052286, "grad_norm": 0.4958113431930542, "learning_rate": 2.0380053132984757e-06, "loss": 0.0351, "step": 53075 }, { "epoch": 2.6945530229960912, "grad_norm": 0.38227608799934387, "learning_rate": 2.0363131800260592e-06, "loss": 0.0254, "step": 53080 }, { "epoch": 2.694806842986954, "grad_norm": 0.31782981753349304, "learning_rate": 2.0346210467536428e-06, "loss": 0.0357, "step": 53085 }, { "epoch": 2.695060662977816, "grad_norm": 0.344621866941452, "learning_rate": 2.032928913481226e-06, "loss": 0.0262, "step": 53090 }, { "epoch": 2.6953144829686786, "grad_norm": 0.2928641438484192, "learning_rate": 2.0312367802088094e-06, "loss": 0.0282, "step": 53095 }, { "epoch": 2.695568302959541, "grad_norm": 0.3690706193447113, "learning_rate": 2.0295446469363926e-06, "loss": 0.0326, "step": 53100 }, { "epoch": 2.6958221229504034, "grad_norm": 0.38905826210975647, "learning_rate": 2.027852513663976e-06, "loss": 0.0347, "step": 53105 }, { "epoch": 2.696075942941266, "grad_norm": 0.4815005660057068, "learning_rate": 2.0261603803915596e-06, "loss": 0.0324, "step": 53110 }, { "epoch": 2.6963297629321286, "grad_norm": 0.36236539483070374, "learning_rate": 2.024468247119143e-06, "loss": 0.0341, "step": 53115 }, { "epoch": 2.696583582922991, "grad_norm": 0.2581886947154999, "learning_rate": 2.0227761138467267e-06, "loss": 0.0253, "step": 53120 }, { "epoch": 2.6968374029138538, "grad_norm": 0.698701798915863, "learning_rate": 2.0210839805743103e-06, "loss": 0.0246, "step": 53125 }, { "epoch": 2.697091222904716, "grad_norm": 0.3778875172138214, "learning_rate": 2.019391847301894e-06, "loss": 0.0335, "step": 53130 }, { "epoch": 2.6973450428955785, "grad_norm": 0.3054998219013214, "learning_rate": 2.017699714029477e-06, "loss": 0.0272, "step": 53135 }, { "epoch": 2.6975988628864407, "grad_norm": 0.3761795163154602, "learning_rate": 2.0160075807570605e-06, "loss": 0.0313, "step": 53140 }, { "epoch": 2.6978526828773033, "grad_norm": 0.41128095984458923, "learning_rate": 2.014315447484644e-06, "loss": 0.0288, "step": 53145 }, { "epoch": 2.698106502868166, "grad_norm": 0.287852942943573, "learning_rate": 2.0126233142122276e-06, "loss": 0.0267, "step": 53150 }, { "epoch": 2.6983603228590285, "grad_norm": 0.32598933577537537, "learning_rate": 2.010931180939811e-06, "loss": 0.0271, "step": 53155 }, { "epoch": 2.698614142849891, "grad_norm": 0.4211830496788025, "learning_rate": 2.0092390476673947e-06, "loss": 0.0277, "step": 53160 }, { "epoch": 2.6988679628407533, "grad_norm": 0.308586448431015, "learning_rate": 2.0075469143949782e-06, "loss": 0.031, "step": 53165 }, { "epoch": 2.699121782831616, "grad_norm": 0.304868221282959, "learning_rate": 2.0058547811225613e-06, "loss": 0.0377, "step": 53170 }, { "epoch": 2.6993756028224785, "grad_norm": 0.2767963707447052, "learning_rate": 2.004162647850145e-06, "loss": 0.0338, "step": 53175 }, { "epoch": 2.6996294228133406, "grad_norm": 0.2587690055370331, "learning_rate": 2.002470514577728e-06, "loss": 0.026, "step": 53180 }, { "epoch": 2.699883242804203, "grad_norm": 0.25689199566841125, "learning_rate": 2.0007783813053115e-06, "loss": 0.0292, "step": 53185 }, { "epoch": 2.700137062795066, "grad_norm": 0.36869364976882935, "learning_rate": 1.999086248032895e-06, "loss": 0.0282, "step": 53190 }, { "epoch": 2.7003908827859284, "grad_norm": 0.34846585988998413, "learning_rate": 1.9973941147604786e-06, "loss": 0.0334, "step": 53195 }, { "epoch": 2.7006447027767906, "grad_norm": 0.31279513239860535, "learning_rate": 1.995701981488062e-06, "loss": 0.0291, "step": 53200 }, { "epoch": 2.700898522767653, "grad_norm": 0.2938686013221741, "learning_rate": 1.9940098482156457e-06, "loss": 0.0273, "step": 53205 }, { "epoch": 2.701152342758516, "grad_norm": 0.4499298930168152, "learning_rate": 1.9923177149432293e-06, "loss": 0.0297, "step": 53210 }, { "epoch": 2.701406162749378, "grad_norm": 0.5888147950172424, "learning_rate": 1.9906255816708124e-06, "loss": 0.0384, "step": 53215 }, { "epoch": 2.7016599827402406, "grad_norm": 0.2620474100112915, "learning_rate": 1.988933448398396e-06, "loss": 0.0301, "step": 53220 }, { "epoch": 2.701913802731103, "grad_norm": 0.4047901928424835, "learning_rate": 1.9872413151259795e-06, "loss": 0.0282, "step": 53225 }, { "epoch": 2.7021676227219658, "grad_norm": 0.42640551924705505, "learning_rate": 1.985549181853563e-06, "loss": 0.036, "step": 53230 }, { "epoch": 2.702421442712828, "grad_norm": 0.28090572357177734, "learning_rate": 1.9838570485811466e-06, "loss": 0.0255, "step": 53235 }, { "epoch": 2.7026752627036905, "grad_norm": 0.3164530396461487, "learning_rate": 1.98216491530873e-06, "loss": 0.0327, "step": 53240 }, { "epoch": 2.702929082694553, "grad_norm": 0.38603898882865906, "learning_rate": 1.9804727820363132e-06, "loss": 0.0295, "step": 53245 }, { "epoch": 2.7031829026854153, "grad_norm": 0.3389573395252228, "learning_rate": 1.9787806487638968e-06, "loss": 0.0362, "step": 53250 }, { "epoch": 2.703436722676278, "grad_norm": 0.2195272594690323, "learning_rate": 1.9770885154914803e-06, "loss": 0.0273, "step": 53255 }, { "epoch": 2.7036905426671405, "grad_norm": 0.505377471446991, "learning_rate": 1.9753963822190634e-06, "loss": 0.0335, "step": 53260 }, { "epoch": 2.703944362658003, "grad_norm": 0.32349297404289246, "learning_rate": 1.973704248946647e-06, "loss": 0.0317, "step": 53265 }, { "epoch": 2.7041981826488657, "grad_norm": 0.27195101976394653, "learning_rate": 1.9720121156742305e-06, "loss": 0.032, "step": 53270 }, { "epoch": 2.704452002639728, "grad_norm": 0.28886640071868896, "learning_rate": 1.970319982401814e-06, "loss": 0.0289, "step": 53275 }, { "epoch": 2.7047058226305905, "grad_norm": 0.44799792766571045, "learning_rate": 1.9686278491293976e-06, "loss": 0.0301, "step": 53280 }, { "epoch": 2.7049596426214526, "grad_norm": 0.44857335090637207, "learning_rate": 1.966935715856981e-06, "loss": 0.0387, "step": 53285 }, { "epoch": 2.705213462612315, "grad_norm": 0.29437506198883057, "learning_rate": 1.9652435825845643e-06, "loss": 0.0274, "step": 53290 }, { "epoch": 2.705467282603178, "grad_norm": 0.309415727853775, "learning_rate": 1.963551449312148e-06, "loss": 0.0368, "step": 53295 }, { "epoch": 2.7057211025940404, "grad_norm": 0.4497421979904175, "learning_rate": 1.9618593160397314e-06, "loss": 0.0299, "step": 53300 }, { "epoch": 2.705974922584903, "grad_norm": 0.3225899934768677, "learning_rate": 1.960167182767315e-06, "loss": 0.0273, "step": 53305 }, { "epoch": 2.706228742575765, "grad_norm": 0.34908631443977356, "learning_rate": 1.9584750494948985e-06, "loss": 0.033, "step": 53310 }, { "epoch": 2.706482562566628, "grad_norm": 0.33565932512283325, "learning_rate": 1.956782916222482e-06, "loss": 0.0309, "step": 53315 }, { "epoch": 2.7067363825574904, "grad_norm": 0.5139685273170471, "learning_rate": 1.9550907829500656e-06, "loss": 0.0297, "step": 53320 }, { "epoch": 2.7069902025483525, "grad_norm": 0.30862730741500854, "learning_rate": 1.9533986496776487e-06, "loss": 0.0273, "step": 53325 }, { "epoch": 2.707244022539215, "grad_norm": 0.4228881895542145, "learning_rate": 1.9517065164052322e-06, "loss": 0.0252, "step": 53330 }, { "epoch": 2.7074978425300777, "grad_norm": 0.278484970331192, "learning_rate": 1.9500143831328158e-06, "loss": 0.0254, "step": 53335 }, { "epoch": 2.7077516625209404, "grad_norm": 0.37600430846214294, "learning_rate": 1.948322249860399e-06, "loss": 0.0273, "step": 53340 }, { "epoch": 2.7080054825118025, "grad_norm": 0.20989076793193817, "learning_rate": 1.9466301165879824e-06, "loss": 0.0306, "step": 53345 }, { "epoch": 2.708259302502665, "grad_norm": 0.5742992162704468, "learning_rate": 1.944937983315566e-06, "loss": 0.0307, "step": 53350 }, { "epoch": 2.7085131224935277, "grad_norm": 0.28505074977874756, "learning_rate": 1.9432458500431495e-06, "loss": 0.0269, "step": 53355 }, { "epoch": 2.70876694248439, "grad_norm": 0.27916303277015686, "learning_rate": 1.941553716770733e-06, "loss": 0.0357, "step": 53360 }, { "epoch": 2.7090207624752525, "grad_norm": 0.261547714471817, "learning_rate": 1.9398615834983166e-06, "loss": 0.0297, "step": 53365 }, { "epoch": 2.709274582466115, "grad_norm": 0.370958149433136, "learning_rate": 1.9381694502258997e-06, "loss": 0.0266, "step": 53370 }, { "epoch": 2.7095284024569777, "grad_norm": 0.3699497580528259, "learning_rate": 1.9364773169534833e-06, "loss": 0.0305, "step": 53375 }, { "epoch": 2.70978222244784, "grad_norm": 0.2690362334251404, "learning_rate": 1.934785183681067e-06, "loss": 0.0262, "step": 53380 }, { "epoch": 2.7100360424387024, "grad_norm": 1.8637694120407104, "learning_rate": 1.9330930504086504e-06, "loss": 0.0365, "step": 53385 }, { "epoch": 2.710289862429565, "grad_norm": 0.32636716961860657, "learning_rate": 1.931400917136234e-06, "loss": 0.0338, "step": 53390 }, { "epoch": 2.710543682420427, "grad_norm": 0.3575417697429657, "learning_rate": 1.9297087838638175e-06, "loss": 0.031, "step": 53395 }, { "epoch": 2.71079750241129, "grad_norm": 0.3870401084423065, "learning_rate": 1.928016650591401e-06, "loss": 0.0277, "step": 53400 }, { "epoch": 2.7110513224021524, "grad_norm": 0.43011873960494995, "learning_rate": 1.926324517318984e-06, "loss": 0.0332, "step": 53405 }, { "epoch": 2.711305142393015, "grad_norm": 0.21395263075828552, "learning_rate": 1.9246323840465677e-06, "loss": 0.0241, "step": 53410 }, { "epoch": 2.7115589623838776, "grad_norm": 0.43874168395996094, "learning_rate": 1.922940250774151e-06, "loss": 0.0323, "step": 53415 }, { "epoch": 2.7118127823747398, "grad_norm": 0.32266518473625183, "learning_rate": 1.9212481175017343e-06, "loss": 0.0239, "step": 53420 }, { "epoch": 2.7120666023656024, "grad_norm": 0.34239718317985535, "learning_rate": 1.919555984229318e-06, "loss": 0.0363, "step": 53425 }, { "epoch": 2.7123204223564645, "grad_norm": 0.45117393136024475, "learning_rate": 1.9178638509569014e-06, "loss": 0.0298, "step": 53430 }, { "epoch": 2.712574242347327, "grad_norm": 0.3619763255119324, "learning_rate": 1.916171717684485e-06, "loss": 0.0256, "step": 53435 }, { "epoch": 2.7128280623381897, "grad_norm": 0.6177137494087219, "learning_rate": 1.9144795844120685e-06, "loss": 0.0242, "step": 53440 }, { "epoch": 2.7130818823290523, "grad_norm": 0.31061235070228577, "learning_rate": 1.912787451139652e-06, "loss": 0.0312, "step": 53445 }, { "epoch": 2.713335702319915, "grad_norm": 0.42823871970176697, "learning_rate": 1.911095317867235e-06, "loss": 0.0333, "step": 53450 }, { "epoch": 2.713589522310777, "grad_norm": 0.42373305559158325, "learning_rate": 1.9094031845948187e-06, "loss": 0.0257, "step": 53455 }, { "epoch": 2.7138433423016397, "grad_norm": 0.27779242396354675, "learning_rate": 1.9077110513224023e-06, "loss": 0.0247, "step": 53460 }, { "epoch": 2.7140971622925023, "grad_norm": 0.22847887873649597, "learning_rate": 1.9060189180499858e-06, "loss": 0.0272, "step": 53465 }, { "epoch": 2.7143509822833645, "grad_norm": 0.3980698883533478, "learning_rate": 1.9043267847775694e-06, "loss": 0.0298, "step": 53470 }, { "epoch": 2.714604802274227, "grad_norm": 0.2841477692127228, "learning_rate": 1.9026346515051527e-06, "loss": 0.0258, "step": 53475 }, { "epoch": 2.7148586222650897, "grad_norm": 0.38664472103118896, "learning_rate": 1.9009425182327362e-06, "loss": 0.03, "step": 53480 }, { "epoch": 2.7151124422559523, "grad_norm": 0.21626536548137665, "learning_rate": 1.8992503849603198e-06, "loss": 0.0284, "step": 53485 }, { "epoch": 2.7153662622468144, "grad_norm": 0.31044235825538635, "learning_rate": 1.897558251687903e-06, "loss": 0.0274, "step": 53490 }, { "epoch": 2.715620082237677, "grad_norm": 0.35324153304100037, "learning_rate": 1.8958661184154866e-06, "loss": 0.0274, "step": 53495 }, { "epoch": 2.7158739022285396, "grad_norm": 0.4163343608379364, "learning_rate": 1.8941739851430702e-06, "loss": 0.0293, "step": 53500 }, { "epoch": 2.716127722219402, "grad_norm": 0.561988890171051, "learning_rate": 1.8924818518706533e-06, "loss": 0.0295, "step": 53505 }, { "epoch": 2.7163815422102644, "grad_norm": 0.30611640214920044, "learning_rate": 1.8907897185982369e-06, "loss": 0.0317, "step": 53510 }, { "epoch": 2.716635362201127, "grad_norm": 0.3293723464012146, "learning_rate": 1.8890975853258204e-06, "loss": 0.0299, "step": 53515 }, { "epoch": 2.7168891821919896, "grad_norm": 1.3542070388793945, "learning_rate": 1.8874054520534037e-06, "loss": 0.0356, "step": 53520 }, { "epoch": 2.7171430021828518, "grad_norm": 0.25533461570739746, "learning_rate": 1.8857133187809873e-06, "loss": 0.035, "step": 53525 }, { "epoch": 2.7173968221737144, "grad_norm": 0.2728980481624603, "learning_rate": 1.8840211855085708e-06, "loss": 0.0321, "step": 53530 }, { "epoch": 2.717650642164577, "grad_norm": 0.3773408830165863, "learning_rate": 1.8823290522361542e-06, "loss": 0.0306, "step": 53535 }, { "epoch": 2.717904462155439, "grad_norm": 0.42744117975234985, "learning_rate": 1.8806369189637377e-06, "loss": 0.0274, "step": 53540 }, { "epoch": 2.7181582821463017, "grad_norm": 0.5459184646606445, "learning_rate": 1.8789447856913212e-06, "loss": 0.0354, "step": 53545 }, { "epoch": 2.7184121021371643, "grad_norm": 0.40570247173309326, "learning_rate": 1.8772526524189048e-06, "loss": 0.0303, "step": 53550 }, { "epoch": 2.718665922128027, "grad_norm": 0.18053355813026428, "learning_rate": 1.8755605191464881e-06, "loss": 0.0224, "step": 53555 }, { "epoch": 2.7189197421188895, "grad_norm": 0.3330439329147339, "learning_rate": 1.8738683858740717e-06, "loss": 0.0277, "step": 53560 }, { "epoch": 2.7191735621097517, "grad_norm": 0.3566732108592987, "learning_rate": 1.8721762526016552e-06, "loss": 0.0321, "step": 53565 }, { "epoch": 2.7194273821006143, "grad_norm": 0.2924792468547821, "learning_rate": 1.8704841193292385e-06, "loss": 0.0279, "step": 53570 }, { "epoch": 2.7196812020914765, "grad_norm": 0.35724127292633057, "learning_rate": 1.868791986056822e-06, "loss": 0.0277, "step": 53575 }, { "epoch": 2.719935022082339, "grad_norm": 0.4432433247566223, "learning_rate": 1.8670998527844056e-06, "loss": 0.035, "step": 53580 }, { "epoch": 2.7201888420732017, "grad_norm": 0.3439810276031494, "learning_rate": 1.8654077195119888e-06, "loss": 0.0321, "step": 53585 }, { "epoch": 2.7204426620640643, "grad_norm": 0.2902897298336029, "learning_rate": 1.8637155862395723e-06, "loss": 0.0339, "step": 53590 }, { "epoch": 2.720696482054927, "grad_norm": 0.565328061580658, "learning_rate": 1.8620234529671558e-06, "loss": 0.0368, "step": 53595 }, { "epoch": 2.720950302045789, "grad_norm": 0.23699818551540375, "learning_rate": 1.8603313196947392e-06, "loss": 0.0275, "step": 53600 }, { "epoch": 2.7212041220366516, "grad_norm": 0.4020783305168152, "learning_rate": 1.8586391864223227e-06, "loss": 0.0244, "step": 53605 }, { "epoch": 2.7214579420275142, "grad_norm": 0.6224753856658936, "learning_rate": 1.8569470531499063e-06, "loss": 0.03, "step": 53610 }, { "epoch": 2.7217117620183764, "grad_norm": 0.21802890300750732, "learning_rate": 1.8552549198774896e-06, "loss": 0.0261, "step": 53615 }, { "epoch": 2.721965582009239, "grad_norm": 0.25149235129356384, "learning_rate": 1.8535627866050731e-06, "loss": 0.0287, "step": 53620 }, { "epoch": 2.7222194020001016, "grad_norm": 0.2931320071220398, "learning_rate": 1.8518706533326567e-06, "loss": 0.0324, "step": 53625 }, { "epoch": 2.722473221990964, "grad_norm": 0.35715875029563904, "learning_rate": 1.85017852006024e-06, "loss": 0.0369, "step": 53630 }, { "epoch": 2.7227270419818264, "grad_norm": 0.31633713841438293, "learning_rate": 1.8484863867878236e-06, "loss": 0.0287, "step": 53635 }, { "epoch": 2.722980861972689, "grad_norm": 0.4361322522163391, "learning_rate": 1.8467942535154071e-06, "loss": 0.0323, "step": 53640 }, { "epoch": 2.7232346819635516, "grad_norm": 0.26381734013557434, "learning_rate": 1.8451021202429907e-06, "loss": 0.0274, "step": 53645 }, { "epoch": 2.7234885019544137, "grad_norm": 0.3084844946861267, "learning_rate": 1.843409986970574e-06, "loss": 0.0297, "step": 53650 }, { "epoch": 2.7237423219452763, "grad_norm": 0.6224455833435059, "learning_rate": 1.8417178536981575e-06, "loss": 0.0349, "step": 53655 }, { "epoch": 2.723996141936139, "grad_norm": 0.44383668899536133, "learning_rate": 1.840025720425741e-06, "loss": 0.0309, "step": 53660 }, { "epoch": 2.7242499619270015, "grad_norm": 0.4933556914329529, "learning_rate": 1.8383335871533242e-06, "loss": 0.036, "step": 53665 }, { "epoch": 2.7245037819178637, "grad_norm": 0.23321112990379333, "learning_rate": 1.8366414538809077e-06, "loss": 0.0227, "step": 53670 }, { "epoch": 2.7247576019087263, "grad_norm": 0.3232824504375458, "learning_rate": 1.834949320608491e-06, "loss": 0.0339, "step": 53675 }, { "epoch": 2.725011421899589, "grad_norm": 0.2447776347398758, "learning_rate": 1.8332571873360746e-06, "loss": 0.0308, "step": 53680 }, { "epoch": 2.725265241890451, "grad_norm": 0.26831647753715515, "learning_rate": 1.8315650540636582e-06, "loss": 0.03, "step": 53685 }, { "epoch": 2.7255190618813137, "grad_norm": 0.2714548110961914, "learning_rate": 1.8298729207912417e-06, "loss": 0.0315, "step": 53690 }, { "epoch": 2.7257728818721763, "grad_norm": 0.2372501641511917, "learning_rate": 1.828180787518825e-06, "loss": 0.0254, "step": 53695 }, { "epoch": 2.726026701863039, "grad_norm": 0.18035678565502167, "learning_rate": 1.8264886542464086e-06, "loss": 0.0257, "step": 53700 }, { "epoch": 2.7262805218539015, "grad_norm": 0.3743029832839966, "learning_rate": 1.8247965209739921e-06, "loss": 0.0365, "step": 53705 }, { "epoch": 2.7265343418447636, "grad_norm": 0.4348129630088806, "learning_rate": 1.8231043877015755e-06, "loss": 0.0311, "step": 53710 }, { "epoch": 2.7267881618356262, "grad_norm": 0.3668404817581177, "learning_rate": 1.821412254429159e-06, "loss": 0.0312, "step": 53715 }, { "epoch": 2.7270419818264884, "grad_norm": 0.35408517718315125, "learning_rate": 1.8197201211567426e-06, "loss": 0.0291, "step": 53720 }, { "epoch": 2.727295801817351, "grad_norm": 0.32698673009872437, "learning_rate": 1.818027987884326e-06, "loss": 0.0252, "step": 53725 }, { "epoch": 2.7275496218082136, "grad_norm": 0.4094547927379608, "learning_rate": 1.8163358546119094e-06, "loss": 0.0381, "step": 53730 }, { "epoch": 2.727803441799076, "grad_norm": 0.38025033473968506, "learning_rate": 1.814643721339493e-06, "loss": 0.0291, "step": 53735 }, { "epoch": 2.728057261789939, "grad_norm": 0.45479652285575867, "learning_rate": 1.8129515880670765e-06, "loss": 0.034, "step": 53740 }, { "epoch": 2.728311081780801, "grad_norm": 0.726309061050415, "learning_rate": 1.8112594547946596e-06, "loss": 0.0354, "step": 53745 }, { "epoch": 2.7285649017716636, "grad_norm": 0.30821505188941956, "learning_rate": 1.8095673215222432e-06, "loss": 0.031, "step": 53750 }, { "epoch": 2.728818721762526, "grad_norm": 0.2646833658218384, "learning_rate": 1.8078751882498265e-06, "loss": 0.0285, "step": 53755 }, { "epoch": 2.7290725417533883, "grad_norm": 0.32509294152259827, "learning_rate": 1.80618305497741e-06, "loss": 0.0295, "step": 53760 }, { "epoch": 2.729326361744251, "grad_norm": 0.2838088274002075, "learning_rate": 1.8044909217049936e-06, "loss": 0.0293, "step": 53765 }, { "epoch": 2.7295801817351135, "grad_norm": 0.34795206785202026, "learning_rate": 1.8027987884325772e-06, "loss": 0.0296, "step": 53770 }, { "epoch": 2.729834001725976, "grad_norm": 0.17932498455047607, "learning_rate": 1.8011066551601605e-06, "loss": 0.0278, "step": 53775 }, { "epoch": 2.7300878217168383, "grad_norm": 0.29193925857543945, "learning_rate": 1.799414521887744e-06, "loss": 0.0348, "step": 53780 }, { "epoch": 2.730341641707701, "grad_norm": 1.6499922275543213, "learning_rate": 1.7977223886153276e-06, "loss": 0.0318, "step": 53785 }, { "epoch": 2.7305954616985635, "grad_norm": 0.2674989402294159, "learning_rate": 1.796030255342911e-06, "loss": 0.0301, "step": 53790 }, { "epoch": 2.7308492816894256, "grad_norm": 0.3045971691608429, "learning_rate": 1.7943381220704945e-06, "loss": 0.0286, "step": 53795 }, { "epoch": 2.7311031016802882, "grad_norm": 0.25646665692329407, "learning_rate": 1.792645988798078e-06, "loss": 0.0254, "step": 53800 }, { "epoch": 2.731356921671151, "grad_norm": 0.2805609703063965, "learning_rate": 1.7909538555256613e-06, "loss": 0.0326, "step": 53805 }, { "epoch": 2.7316107416620135, "grad_norm": 0.3846939504146576, "learning_rate": 1.7892617222532449e-06, "loss": 0.0287, "step": 53810 }, { "epoch": 2.731864561652876, "grad_norm": 0.30910441279411316, "learning_rate": 1.7875695889808284e-06, "loss": 0.0338, "step": 53815 }, { "epoch": 2.732118381643738, "grad_norm": 0.26713868975639343, "learning_rate": 1.785877455708412e-06, "loss": 0.0244, "step": 53820 }, { "epoch": 2.732372201634601, "grad_norm": 0.3901999592781067, "learning_rate": 1.784185322435995e-06, "loss": 0.0247, "step": 53825 }, { "epoch": 2.732626021625463, "grad_norm": 0.260437935590744, "learning_rate": 1.7824931891635786e-06, "loss": 0.0339, "step": 53830 }, { "epoch": 2.7328798416163256, "grad_norm": 0.418204665184021, "learning_rate": 1.780801055891162e-06, "loss": 0.028, "step": 53835 }, { "epoch": 2.733133661607188, "grad_norm": 0.32420992851257324, "learning_rate": 1.7791089226187455e-06, "loss": 0.0309, "step": 53840 }, { "epoch": 2.733387481598051, "grad_norm": 0.32176563143730164, "learning_rate": 1.777416789346329e-06, "loss": 0.025, "step": 53845 }, { "epoch": 2.7336413015889134, "grad_norm": 0.332557737827301, "learning_rate": 1.7757246560739124e-06, "loss": 0.032, "step": 53850 }, { "epoch": 2.7338951215797755, "grad_norm": 0.31119346618652344, "learning_rate": 1.774032522801496e-06, "loss": 0.0231, "step": 53855 }, { "epoch": 2.734148941570638, "grad_norm": 0.33499324321746826, "learning_rate": 1.7723403895290795e-06, "loss": 0.0305, "step": 53860 }, { "epoch": 2.7344027615615007, "grad_norm": 0.4173754155635834, "learning_rate": 1.770648256256663e-06, "loss": 0.0289, "step": 53865 }, { "epoch": 2.734656581552363, "grad_norm": 0.32819196581840515, "learning_rate": 1.7689561229842463e-06, "loss": 0.0312, "step": 53870 }, { "epoch": 2.7349104015432255, "grad_norm": 0.3945378363132477, "learning_rate": 1.7672639897118299e-06, "loss": 0.0383, "step": 53875 }, { "epoch": 2.735164221534088, "grad_norm": 0.34407007694244385, "learning_rate": 1.7655718564394134e-06, "loss": 0.0313, "step": 53880 }, { "epoch": 2.7354180415249507, "grad_norm": 0.37155938148498535, "learning_rate": 1.7638797231669968e-06, "loss": 0.0348, "step": 53885 }, { "epoch": 2.735671861515813, "grad_norm": 0.3493196666240692, "learning_rate": 1.7621875898945803e-06, "loss": 0.0262, "step": 53890 }, { "epoch": 2.7359256815066755, "grad_norm": 0.39568087458610535, "learning_rate": 1.7604954566221639e-06, "loss": 0.0327, "step": 53895 }, { "epoch": 2.736179501497538, "grad_norm": 0.3887282907962799, "learning_rate": 1.7588033233497472e-06, "loss": 0.0314, "step": 53900 }, { "epoch": 2.7364333214884002, "grad_norm": 0.2152509093284607, "learning_rate": 1.7571111900773305e-06, "loss": 0.0319, "step": 53905 }, { "epoch": 2.736687141479263, "grad_norm": 0.24241326749324799, "learning_rate": 1.755419056804914e-06, "loss": 0.0241, "step": 53910 }, { "epoch": 2.7369409614701254, "grad_norm": 0.6015492677688599, "learning_rate": 1.7537269235324974e-06, "loss": 0.0287, "step": 53915 }, { "epoch": 2.737194781460988, "grad_norm": 0.7322681546211243, "learning_rate": 1.752034790260081e-06, "loss": 0.0336, "step": 53920 }, { "epoch": 2.73744860145185, "grad_norm": 0.27525922656059265, "learning_rate": 1.7503426569876645e-06, "loss": 0.0284, "step": 53925 }, { "epoch": 2.737702421442713, "grad_norm": 0.24374821782112122, "learning_rate": 1.7486505237152478e-06, "loss": 0.035, "step": 53930 }, { "epoch": 2.7379562414335754, "grad_norm": 0.2927287518978119, "learning_rate": 1.7469583904428314e-06, "loss": 0.0293, "step": 53935 }, { "epoch": 2.7382100614244376, "grad_norm": 0.44183194637298584, "learning_rate": 1.745266257170415e-06, "loss": 0.0257, "step": 53940 }, { "epoch": 2.7384638814153, "grad_norm": 0.970026433467865, "learning_rate": 1.7435741238979982e-06, "loss": 0.0286, "step": 53945 }, { "epoch": 2.7387177014061628, "grad_norm": 0.5048357844352722, "learning_rate": 1.7418819906255818e-06, "loss": 0.0292, "step": 53950 }, { "epoch": 2.7389715213970254, "grad_norm": 0.46825069189071655, "learning_rate": 1.7401898573531653e-06, "loss": 0.0404, "step": 53955 }, { "epoch": 2.739225341387888, "grad_norm": 0.3677990138530731, "learning_rate": 1.7384977240807489e-06, "loss": 0.0266, "step": 53960 }, { "epoch": 2.73947916137875, "grad_norm": 0.312150776386261, "learning_rate": 1.7368055908083322e-06, "loss": 0.028, "step": 53965 }, { "epoch": 2.7397329813696127, "grad_norm": 0.3144506812095642, "learning_rate": 1.7351134575359158e-06, "loss": 0.026, "step": 53970 }, { "epoch": 2.739986801360475, "grad_norm": 1.072723627090454, "learning_rate": 1.7334213242634993e-06, "loss": 0.0408, "step": 53975 }, { "epoch": 2.7402406213513375, "grad_norm": 0.2998804748058319, "learning_rate": 1.7317291909910826e-06, "loss": 0.0257, "step": 53980 }, { "epoch": 2.7404944413422, "grad_norm": 0.3485616147518158, "learning_rate": 1.7300370577186662e-06, "loss": 0.0289, "step": 53985 }, { "epoch": 2.7407482613330627, "grad_norm": 0.36675891280174255, "learning_rate": 1.7283449244462493e-06, "loss": 0.0333, "step": 53990 }, { "epoch": 2.7410020813239253, "grad_norm": 0.48393315076828003, "learning_rate": 1.7266527911738328e-06, "loss": 0.0316, "step": 53995 }, { "epoch": 2.7412559013147875, "grad_norm": 0.35300666093826294, "learning_rate": 1.7249606579014164e-06, "loss": 0.0271, "step": 54000 }, { "epoch": 2.74150972130565, "grad_norm": 0.2933576703071594, "learning_rate": 1.723268524629e-06, "loss": 0.0377, "step": 54005 }, { "epoch": 2.7417635412965127, "grad_norm": 0.4027237296104431, "learning_rate": 1.7215763913565833e-06, "loss": 0.0307, "step": 54010 }, { "epoch": 2.742017361287375, "grad_norm": 0.2828601598739624, "learning_rate": 1.7198842580841668e-06, "loss": 0.0271, "step": 54015 }, { "epoch": 2.7422711812782374, "grad_norm": 0.3389827609062195, "learning_rate": 1.7181921248117504e-06, "loss": 0.0276, "step": 54020 }, { "epoch": 2.7425250012691, "grad_norm": 0.5130777955055237, "learning_rate": 1.7164999915393337e-06, "loss": 0.0279, "step": 54025 }, { "epoch": 2.7427788212599626, "grad_norm": 0.32711169123649597, "learning_rate": 1.7148078582669172e-06, "loss": 0.0328, "step": 54030 }, { "epoch": 2.743032641250825, "grad_norm": 0.45256665349006653, "learning_rate": 1.7131157249945008e-06, "loss": 0.0365, "step": 54035 }, { "epoch": 2.7432864612416874, "grad_norm": 0.27658596634864807, "learning_rate": 1.7114235917220843e-06, "loss": 0.0289, "step": 54040 }, { "epoch": 2.74354028123255, "grad_norm": 0.35465267300605774, "learning_rate": 1.7097314584496677e-06, "loss": 0.0401, "step": 54045 }, { "epoch": 2.743794101223412, "grad_norm": 0.2526685893535614, "learning_rate": 1.7080393251772512e-06, "loss": 0.0335, "step": 54050 }, { "epoch": 2.7440479212142748, "grad_norm": 0.43355873227119446, "learning_rate": 1.7063471919048347e-06, "loss": 0.03, "step": 54055 }, { "epoch": 2.7443017412051374, "grad_norm": 0.2964344918727875, "learning_rate": 1.704655058632418e-06, "loss": 0.0299, "step": 54060 }, { "epoch": 2.744555561196, "grad_norm": 0.46827706694602966, "learning_rate": 1.7029629253600016e-06, "loss": 0.0427, "step": 54065 }, { "epoch": 2.744809381186862, "grad_norm": 1.2639143466949463, "learning_rate": 1.7012707920875847e-06, "loss": 0.027, "step": 54070 }, { "epoch": 2.7450632011777247, "grad_norm": 0.2572653293609619, "learning_rate": 1.6995786588151683e-06, "loss": 0.0322, "step": 54075 }, { "epoch": 2.7453170211685873, "grad_norm": 0.37225455045700073, "learning_rate": 1.6978865255427518e-06, "loss": 0.0297, "step": 54080 }, { "epoch": 2.7455708411594495, "grad_norm": 0.3000081777572632, "learning_rate": 1.6961943922703354e-06, "loss": 0.0344, "step": 54085 }, { "epoch": 2.745824661150312, "grad_norm": 0.36823397874832153, "learning_rate": 1.6945022589979187e-06, "loss": 0.0302, "step": 54090 }, { "epoch": 2.7460784811411747, "grad_norm": 0.37122178077697754, "learning_rate": 1.6928101257255023e-06, "loss": 0.0316, "step": 54095 }, { "epoch": 2.7463323011320373, "grad_norm": 0.3983829617500305, "learning_rate": 1.6911179924530858e-06, "loss": 0.0296, "step": 54100 }, { "epoch": 2.7465861211229, "grad_norm": 0.40506190061569214, "learning_rate": 1.6894258591806691e-06, "loss": 0.031, "step": 54105 }, { "epoch": 2.746839941113762, "grad_norm": 0.45326289534568787, "learning_rate": 1.6877337259082527e-06, "loss": 0.0232, "step": 54110 }, { "epoch": 2.7470937611046247, "grad_norm": 0.23854999244213104, "learning_rate": 1.6860415926358362e-06, "loss": 0.0361, "step": 54115 }, { "epoch": 2.747347581095487, "grad_norm": 0.3053087890148163, "learning_rate": 1.6843494593634196e-06, "loss": 0.0328, "step": 54120 }, { "epoch": 2.7476014010863494, "grad_norm": 0.47979798913002014, "learning_rate": 1.682657326091003e-06, "loss": 0.0255, "step": 54125 }, { "epoch": 2.747855221077212, "grad_norm": 0.35441768169403076, "learning_rate": 1.6809651928185866e-06, "loss": 0.0294, "step": 54130 }, { "epoch": 2.7481090410680746, "grad_norm": 0.40479037165641785, "learning_rate": 1.6792730595461702e-06, "loss": 0.0313, "step": 54135 }, { "epoch": 2.7483628610589372, "grad_norm": 0.27825799584388733, "learning_rate": 1.6775809262737535e-06, "loss": 0.0245, "step": 54140 }, { "epoch": 2.7486166810497994, "grad_norm": 0.2868492901325226, "learning_rate": 1.675888793001337e-06, "loss": 0.0342, "step": 54145 }, { "epoch": 2.748870501040662, "grad_norm": 0.3349640667438507, "learning_rate": 1.6741966597289202e-06, "loss": 0.027, "step": 54150 }, { "epoch": 2.7491243210315246, "grad_norm": 0.29142335057258606, "learning_rate": 1.6725045264565037e-06, "loss": 0.0333, "step": 54155 }, { "epoch": 2.7493781410223868, "grad_norm": 0.327721506357193, "learning_rate": 1.6708123931840873e-06, "loss": 0.0307, "step": 54160 }, { "epoch": 2.7496319610132494, "grad_norm": 0.3892741799354553, "learning_rate": 1.6691202599116706e-06, "loss": 0.0313, "step": 54165 }, { "epoch": 2.749885781004112, "grad_norm": 0.3699101209640503, "learning_rate": 1.6674281266392542e-06, "loss": 0.0257, "step": 54170 }, { "epoch": 2.7501396009949746, "grad_norm": 0.30111369490623474, "learning_rate": 1.6657359933668377e-06, "loss": 0.032, "step": 54175 }, { "epoch": 2.7503934209858367, "grad_norm": 0.3970666229724884, "learning_rate": 1.6640438600944212e-06, "loss": 0.027, "step": 54180 }, { "epoch": 2.7506472409766993, "grad_norm": 0.600459635257721, "learning_rate": 1.6623517268220046e-06, "loss": 0.0373, "step": 54185 }, { "epoch": 2.750901060967562, "grad_norm": 0.5128913521766663, "learning_rate": 1.6606595935495881e-06, "loss": 0.0366, "step": 54190 }, { "epoch": 2.751154880958424, "grad_norm": 0.4742211103439331, "learning_rate": 1.6589674602771717e-06, "loss": 0.0414, "step": 54195 }, { "epoch": 2.7514087009492867, "grad_norm": 0.5436285734176636, "learning_rate": 1.657275327004755e-06, "loss": 0.0302, "step": 54200 }, { "epoch": 2.7516625209401493, "grad_norm": 0.19674985110759735, "learning_rate": 1.6555831937323385e-06, "loss": 0.0332, "step": 54205 }, { "epoch": 2.751916340931012, "grad_norm": 0.341109961271286, "learning_rate": 1.653891060459922e-06, "loss": 0.0322, "step": 54210 }, { "epoch": 2.752170160921874, "grad_norm": 0.2867673337459564, "learning_rate": 1.6521989271875054e-06, "loss": 0.0313, "step": 54215 }, { "epoch": 2.7524239809127367, "grad_norm": 0.24930787086486816, "learning_rate": 1.650506793915089e-06, "loss": 0.0294, "step": 54220 }, { "epoch": 2.7526778009035993, "grad_norm": 0.3115566074848175, "learning_rate": 1.6488146606426725e-06, "loss": 0.0261, "step": 54225 }, { "epoch": 2.7529316208944614, "grad_norm": 0.36223429441452026, "learning_rate": 1.6471225273702556e-06, "loss": 0.0422, "step": 54230 }, { "epoch": 2.753185440885324, "grad_norm": 0.3967058062553406, "learning_rate": 1.6454303940978392e-06, "loss": 0.0316, "step": 54235 }, { "epoch": 2.7534392608761866, "grad_norm": 0.2525790333747864, "learning_rate": 1.6437382608254227e-06, "loss": 0.029, "step": 54240 }, { "epoch": 2.7536930808670492, "grad_norm": 0.4637283384799957, "learning_rate": 1.642046127553006e-06, "loss": 0.0283, "step": 54245 }, { "epoch": 2.753946900857912, "grad_norm": 0.34502989053726196, "learning_rate": 1.6403539942805896e-06, "loss": 0.0317, "step": 54250 }, { "epoch": 2.754200720848774, "grad_norm": 0.2657451331615448, "learning_rate": 1.6386618610081731e-06, "loss": 0.0304, "step": 54255 }, { "epoch": 2.7544545408396366, "grad_norm": 0.2720555365085602, "learning_rate": 1.6369697277357565e-06, "loss": 0.0266, "step": 54260 }, { "epoch": 2.7547083608304987, "grad_norm": 0.31259456276893616, "learning_rate": 1.63527759446334e-06, "loss": 0.0306, "step": 54265 }, { "epoch": 2.7549621808213614, "grad_norm": 0.23816870152950287, "learning_rate": 1.6335854611909236e-06, "loss": 0.031, "step": 54270 }, { "epoch": 2.755216000812224, "grad_norm": 0.3377796709537506, "learning_rate": 1.631893327918507e-06, "loss": 0.0259, "step": 54275 }, { "epoch": 2.7554698208030866, "grad_norm": 0.4004886746406555, "learning_rate": 1.6302011946460904e-06, "loss": 0.0311, "step": 54280 }, { "epoch": 2.755723640793949, "grad_norm": 0.40718159079551697, "learning_rate": 1.628509061373674e-06, "loss": 0.0317, "step": 54285 }, { "epoch": 2.7559774607848113, "grad_norm": 0.32053378224372864, "learning_rate": 1.6268169281012575e-06, "loss": 0.0399, "step": 54290 }, { "epoch": 2.756231280775674, "grad_norm": 0.24005572497844696, "learning_rate": 1.6251247948288409e-06, "loss": 0.0293, "step": 54295 }, { "epoch": 2.7564851007665365, "grad_norm": 0.20497609674930573, "learning_rate": 1.6234326615564244e-06, "loss": 0.0265, "step": 54300 }, { "epoch": 2.7567389207573987, "grad_norm": 0.23367075622081757, "learning_rate": 1.621740528284008e-06, "loss": 0.0284, "step": 54305 }, { "epoch": 2.7569927407482613, "grad_norm": 0.2720516622066498, "learning_rate": 1.620048395011591e-06, "loss": 0.0291, "step": 54310 }, { "epoch": 2.757246560739124, "grad_norm": 0.5106813907623291, "learning_rate": 1.6183562617391746e-06, "loss": 0.0322, "step": 54315 }, { "epoch": 2.7575003807299865, "grad_norm": 0.36109301447868347, "learning_rate": 1.6166641284667582e-06, "loss": 0.026, "step": 54320 }, { "epoch": 2.7577542007208486, "grad_norm": 0.29374897480010986, "learning_rate": 1.6149719951943415e-06, "loss": 0.0272, "step": 54325 }, { "epoch": 2.7580080207117113, "grad_norm": 0.35226473212242126, "learning_rate": 1.613279861921925e-06, "loss": 0.0372, "step": 54330 }, { "epoch": 2.758261840702574, "grad_norm": 0.36463406682014465, "learning_rate": 1.6115877286495086e-06, "loss": 0.0308, "step": 54335 }, { "epoch": 2.758515660693436, "grad_norm": 0.5041033029556274, "learning_rate": 1.609895595377092e-06, "loss": 0.0309, "step": 54340 }, { "epoch": 2.7587694806842986, "grad_norm": 0.24311576783657074, "learning_rate": 1.6082034621046755e-06, "loss": 0.0296, "step": 54345 }, { "epoch": 2.759023300675161, "grad_norm": 0.5578087568283081, "learning_rate": 1.606511328832259e-06, "loss": 0.0323, "step": 54350 }, { "epoch": 2.759277120666024, "grad_norm": 0.2777861952781677, "learning_rate": 1.6048191955598425e-06, "loss": 0.0297, "step": 54355 }, { "epoch": 2.759530940656886, "grad_norm": 0.3467789590358734, "learning_rate": 1.6031270622874259e-06, "loss": 0.0308, "step": 54360 }, { "epoch": 2.7597847606477486, "grad_norm": 0.3258620500564575, "learning_rate": 1.6014349290150094e-06, "loss": 0.0259, "step": 54365 }, { "epoch": 2.760038580638611, "grad_norm": 0.32327863574028015, "learning_rate": 1.599742795742593e-06, "loss": 0.0298, "step": 54370 }, { "epoch": 2.7602924006294733, "grad_norm": 0.2921620309352875, "learning_rate": 1.5980506624701763e-06, "loss": 0.0314, "step": 54375 }, { "epoch": 2.760546220620336, "grad_norm": 0.3468886911869049, "learning_rate": 1.5963585291977598e-06, "loss": 0.0328, "step": 54380 }, { "epoch": 2.7608000406111985, "grad_norm": 0.4291483759880066, "learning_rate": 1.5946663959253434e-06, "loss": 0.0328, "step": 54385 }, { "epoch": 2.761053860602061, "grad_norm": 0.42713791131973267, "learning_rate": 1.5929742626529265e-06, "loss": 0.0256, "step": 54390 }, { "epoch": 2.7613076805929238, "grad_norm": 0.3330911099910736, "learning_rate": 1.59128212938051e-06, "loss": 0.0306, "step": 54395 }, { "epoch": 2.761561500583786, "grad_norm": 0.23655596375465393, "learning_rate": 1.5895899961080936e-06, "loss": 0.0303, "step": 54400 }, { "epoch": 2.7618153205746485, "grad_norm": 0.22869274020195007, "learning_rate": 1.587897862835677e-06, "loss": 0.0279, "step": 54405 }, { "epoch": 2.7620691405655107, "grad_norm": 0.37720853090286255, "learning_rate": 1.5862057295632605e-06, "loss": 0.0356, "step": 54410 }, { "epoch": 2.7623229605563733, "grad_norm": 0.4346856474876404, "learning_rate": 1.584513596290844e-06, "loss": 0.0294, "step": 54415 }, { "epoch": 2.762576780547236, "grad_norm": 0.3101567029953003, "learning_rate": 1.5828214630184274e-06, "loss": 0.0287, "step": 54420 }, { "epoch": 2.7628306005380985, "grad_norm": 0.46257123351097107, "learning_rate": 1.581129329746011e-06, "loss": 0.0243, "step": 54425 }, { "epoch": 2.763084420528961, "grad_norm": 0.42117181420326233, "learning_rate": 1.5794371964735944e-06, "loss": 0.0328, "step": 54430 }, { "epoch": 2.7633382405198232, "grad_norm": 0.2318694293498993, "learning_rate": 1.5777450632011778e-06, "loss": 0.0335, "step": 54435 }, { "epoch": 2.763592060510686, "grad_norm": 0.1912282556295395, "learning_rate": 1.5760529299287613e-06, "loss": 0.0294, "step": 54440 }, { "epoch": 2.7638458805015484, "grad_norm": 0.42659279704093933, "learning_rate": 1.5743607966563449e-06, "loss": 0.0234, "step": 54445 }, { "epoch": 2.7640997004924106, "grad_norm": 0.3059816360473633, "learning_rate": 1.5726686633839284e-06, "loss": 0.0234, "step": 54450 }, { "epoch": 2.764353520483273, "grad_norm": 0.43603745102882385, "learning_rate": 1.5709765301115117e-06, "loss": 0.0371, "step": 54455 }, { "epoch": 2.764607340474136, "grad_norm": 0.38762128353118896, "learning_rate": 1.5692843968390953e-06, "loss": 0.0337, "step": 54460 }, { "epoch": 2.7648611604649984, "grad_norm": 0.1974819451570511, "learning_rate": 1.5675922635666788e-06, "loss": 0.0254, "step": 54465 }, { "epoch": 2.7651149804558606, "grad_norm": 0.3987434506416321, "learning_rate": 1.565900130294262e-06, "loss": 0.0273, "step": 54470 }, { "epoch": 2.765368800446723, "grad_norm": 0.312901109457016, "learning_rate": 1.5642079970218455e-06, "loss": 0.0346, "step": 54475 }, { "epoch": 2.7656226204375858, "grad_norm": 0.3683776259422302, "learning_rate": 1.5625158637494288e-06, "loss": 0.0341, "step": 54480 }, { "epoch": 2.765876440428448, "grad_norm": 0.3106054365634918, "learning_rate": 1.5608237304770124e-06, "loss": 0.0288, "step": 54485 }, { "epoch": 2.7661302604193105, "grad_norm": 0.275949627161026, "learning_rate": 1.559131597204596e-06, "loss": 0.0275, "step": 54490 }, { "epoch": 2.766384080410173, "grad_norm": 0.39345717430114746, "learning_rate": 1.5574394639321795e-06, "loss": 0.0312, "step": 54495 }, { "epoch": 2.7666379004010357, "grad_norm": 0.4222765862941742, "learning_rate": 1.5557473306597628e-06, "loss": 0.0295, "step": 54500 }, { "epoch": 2.766891720391898, "grad_norm": 0.5655775666236877, "learning_rate": 1.5540551973873463e-06, "loss": 0.0336, "step": 54505 }, { "epoch": 2.7671455403827605, "grad_norm": 0.34256479144096375, "learning_rate": 1.5523630641149299e-06, "loss": 0.0305, "step": 54510 }, { "epoch": 2.767399360373623, "grad_norm": 0.31936198472976685, "learning_rate": 1.5506709308425132e-06, "loss": 0.035, "step": 54515 }, { "epoch": 2.7676531803644853, "grad_norm": 0.5186873078346252, "learning_rate": 1.5489787975700968e-06, "loss": 0.0349, "step": 54520 }, { "epoch": 2.767907000355348, "grad_norm": 0.3005698323249817, "learning_rate": 1.5472866642976803e-06, "loss": 0.0384, "step": 54525 }, { "epoch": 2.7681608203462105, "grad_norm": 0.41957610845565796, "learning_rate": 1.5455945310252636e-06, "loss": 0.033, "step": 54530 }, { "epoch": 2.768414640337073, "grad_norm": 0.21110914647579193, "learning_rate": 1.5439023977528472e-06, "loss": 0.034, "step": 54535 }, { "epoch": 2.7686684603279357, "grad_norm": 0.4496798813343048, "learning_rate": 1.5422102644804307e-06, "loss": 0.0327, "step": 54540 }, { "epoch": 2.768922280318798, "grad_norm": 0.33650708198547363, "learning_rate": 1.5405181312080143e-06, "loss": 0.0265, "step": 54545 }, { "epoch": 2.7691761003096604, "grad_norm": 0.31067219376564026, "learning_rate": 1.5388259979355976e-06, "loss": 0.031, "step": 54550 }, { "epoch": 2.7694299203005226, "grad_norm": 0.4140172600746155, "learning_rate": 1.537133864663181e-06, "loss": 0.0309, "step": 54555 }, { "epoch": 2.769683740291385, "grad_norm": 0.5254555940628052, "learning_rate": 1.5354417313907643e-06, "loss": 0.0329, "step": 54560 }, { "epoch": 2.769937560282248, "grad_norm": 0.3984996974468231, "learning_rate": 1.5337495981183478e-06, "loss": 0.0373, "step": 54565 }, { "epoch": 2.7701913802731104, "grad_norm": 0.3657810091972351, "learning_rate": 1.5320574648459314e-06, "loss": 0.0234, "step": 54570 }, { "epoch": 2.770445200263973, "grad_norm": 0.4152500033378601, "learning_rate": 1.5303653315735147e-06, "loss": 0.0374, "step": 54575 }, { "epoch": 2.770699020254835, "grad_norm": 0.3096051812171936, "learning_rate": 1.5286731983010982e-06, "loss": 0.029, "step": 54580 }, { "epoch": 2.7709528402456978, "grad_norm": 0.27113279700279236, "learning_rate": 1.5269810650286818e-06, "loss": 0.0353, "step": 54585 }, { "epoch": 2.7712066602365604, "grad_norm": 0.8830772638320923, "learning_rate": 1.5252889317562653e-06, "loss": 0.0293, "step": 54590 }, { "epoch": 2.7714604802274225, "grad_norm": 0.2550186812877655, "learning_rate": 1.5235967984838487e-06, "loss": 0.0345, "step": 54595 }, { "epoch": 2.771714300218285, "grad_norm": 0.5828475952148438, "learning_rate": 1.5219046652114322e-06, "loss": 0.0303, "step": 54600 }, { "epoch": 2.7719681202091477, "grad_norm": 0.27280479669570923, "learning_rate": 1.5202125319390158e-06, "loss": 0.0247, "step": 54605 }, { "epoch": 2.7722219402000103, "grad_norm": 0.3151116967201233, "learning_rate": 1.518520398666599e-06, "loss": 0.0344, "step": 54610 }, { "epoch": 2.7724757601908725, "grad_norm": 0.279140442609787, "learning_rate": 1.5168282653941826e-06, "loss": 0.0292, "step": 54615 }, { "epoch": 2.772729580181735, "grad_norm": 0.37659916281700134, "learning_rate": 1.5151361321217662e-06, "loss": 0.0321, "step": 54620 }, { "epoch": 2.7729834001725977, "grad_norm": 0.3131263256072998, "learning_rate": 1.5134439988493497e-06, "loss": 0.026, "step": 54625 }, { "epoch": 2.77323722016346, "grad_norm": 0.20809635519981384, "learning_rate": 1.511751865576933e-06, "loss": 0.0307, "step": 54630 }, { "epoch": 2.7734910401543225, "grad_norm": 0.30809369683265686, "learning_rate": 1.5100597323045164e-06, "loss": 0.0343, "step": 54635 }, { "epoch": 2.773744860145185, "grad_norm": 0.2556045949459076, "learning_rate": 1.5083675990320997e-06, "loss": 0.0235, "step": 54640 }, { "epoch": 2.7739986801360477, "grad_norm": 0.1649613231420517, "learning_rate": 1.5066754657596833e-06, "loss": 0.0305, "step": 54645 }, { "epoch": 2.7742525001269103, "grad_norm": 0.23043885827064514, "learning_rate": 1.5049833324872668e-06, "loss": 0.0304, "step": 54650 }, { "epoch": 2.7745063201177724, "grad_norm": 0.5607897043228149, "learning_rate": 1.5032911992148501e-06, "loss": 0.0281, "step": 54655 }, { "epoch": 2.774760140108635, "grad_norm": 0.28994089365005493, "learning_rate": 1.5015990659424337e-06, "loss": 0.029, "step": 54660 }, { "epoch": 2.775013960099497, "grad_norm": 0.3250032961368561, "learning_rate": 1.4999069326700172e-06, "loss": 0.0268, "step": 54665 }, { "epoch": 2.77526778009036, "grad_norm": 0.5446613430976868, "learning_rate": 1.4982147993976008e-06, "loss": 0.0253, "step": 54670 }, { "epoch": 2.7755216000812224, "grad_norm": 0.33558404445648193, "learning_rate": 1.496522666125184e-06, "loss": 0.0323, "step": 54675 }, { "epoch": 2.775775420072085, "grad_norm": 0.37360864877700806, "learning_rate": 1.4948305328527676e-06, "loss": 0.0341, "step": 54680 }, { "epoch": 2.7760292400629476, "grad_norm": 0.39937227964401245, "learning_rate": 1.4931383995803512e-06, "loss": 0.0308, "step": 54685 }, { "epoch": 2.7762830600538098, "grad_norm": 0.4435569941997528, "learning_rate": 1.4914462663079345e-06, "loss": 0.0347, "step": 54690 }, { "epoch": 2.7765368800446724, "grad_norm": 0.2976076602935791, "learning_rate": 1.489754133035518e-06, "loss": 0.0308, "step": 54695 }, { "epoch": 2.776790700035535, "grad_norm": 0.5093637704849243, "learning_rate": 1.4880619997631016e-06, "loss": 0.0256, "step": 54700 }, { "epoch": 2.777044520026397, "grad_norm": 0.2233898788690567, "learning_rate": 1.486369866490685e-06, "loss": 0.0328, "step": 54705 }, { "epoch": 2.7772983400172597, "grad_norm": 0.3220416009426117, "learning_rate": 1.4846777332182685e-06, "loss": 0.0316, "step": 54710 }, { "epoch": 2.7775521600081223, "grad_norm": 0.3018849194049835, "learning_rate": 1.4829855999458518e-06, "loss": 0.0328, "step": 54715 }, { "epoch": 2.777805979998985, "grad_norm": 0.29990217089653015, "learning_rate": 1.4812934666734352e-06, "loss": 0.025, "step": 54720 }, { "epoch": 2.778059799989847, "grad_norm": 0.2418762594461441, "learning_rate": 1.4796013334010187e-06, "loss": 0.0323, "step": 54725 }, { "epoch": 2.7783136199807097, "grad_norm": 0.46849316358566284, "learning_rate": 1.4779092001286022e-06, "loss": 0.0352, "step": 54730 }, { "epoch": 2.7785674399715723, "grad_norm": 0.4002370536327362, "learning_rate": 1.4762170668561856e-06, "loss": 0.0331, "step": 54735 }, { "epoch": 2.7788212599624345, "grad_norm": 0.37340641021728516, "learning_rate": 1.4745249335837691e-06, "loss": 0.028, "step": 54740 }, { "epoch": 2.779075079953297, "grad_norm": 0.20235440135002136, "learning_rate": 1.4728328003113527e-06, "loss": 0.0298, "step": 54745 }, { "epoch": 2.7793288999441597, "grad_norm": 0.379058301448822, "learning_rate": 1.471140667038936e-06, "loss": 0.0319, "step": 54750 }, { "epoch": 2.7795827199350223, "grad_norm": 0.7213035225868225, "learning_rate": 1.4694485337665195e-06, "loss": 0.0343, "step": 54755 }, { "epoch": 2.7798365399258844, "grad_norm": 0.4405682384967804, "learning_rate": 1.467756400494103e-06, "loss": 0.0266, "step": 54760 }, { "epoch": 2.780090359916747, "grad_norm": 0.43502214550971985, "learning_rate": 1.4660642672216866e-06, "loss": 0.026, "step": 54765 }, { "epoch": 2.7803441799076096, "grad_norm": 0.3652034103870392, "learning_rate": 1.46437213394927e-06, "loss": 0.0295, "step": 54770 }, { "epoch": 2.780597999898472, "grad_norm": 0.2877683937549591, "learning_rate": 1.4626800006768535e-06, "loss": 0.0298, "step": 54775 }, { "epoch": 2.7808518198893344, "grad_norm": 0.24761992692947388, "learning_rate": 1.460987867404437e-06, "loss": 0.0292, "step": 54780 }, { "epoch": 2.781105639880197, "grad_norm": 0.2824486792087555, "learning_rate": 1.4592957341320204e-06, "loss": 0.0317, "step": 54785 }, { "epoch": 2.7813594598710596, "grad_norm": 0.33154594898223877, "learning_rate": 1.457603600859604e-06, "loss": 0.036, "step": 54790 }, { "epoch": 2.781613279861922, "grad_norm": 0.3138459324836731, "learning_rate": 1.455911467587187e-06, "loss": 0.0263, "step": 54795 }, { "epoch": 2.7818670998527844, "grad_norm": 0.39758846163749695, "learning_rate": 1.4542193343147706e-06, "loss": 0.0324, "step": 54800 }, { "epoch": 2.782120919843647, "grad_norm": 0.2929428219795227, "learning_rate": 1.4525272010423541e-06, "loss": 0.0262, "step": 54805 }, { "epoch": 2.782374739834509, "grad_norm": 0.3221243619918823, "learning_rate": 1.4508350677699377e-06, "loss": 0.0363, "step": 54810 }, { "epoch": 2.7826285598253717, "grad_norm": 0.4146369397640228, "learning_rate": 1.449142934497521e-06, "loss": 0.0335, "step": 54815 }, { "epoch": 2.7828823798162343, "grad_norm": 0.48562976717948914, "learning_rate": 1.4474508012251046e-06, "loss": 0.0375, "step": 54820 }, { "epoch": 2.783136199807097, "grad_norm": 0.7758735418319702, "learning_rate": 1.4457586679526881e-06, "loss": 0.0289, "step": 54825 }, { "epoch": 2.7833900197979595, "grad_norm": 0.33634909987449646, "learning_rate": 1.4440665346802714e-06, "loss": 0.0261, "step": 54830 }, { "epoch": 2.7836438397888217, "grad_norm": 0.3155843913555145, "learning_rate": 1.442374401407855e-06, "loss": 0.0344, "step": 54835 }, { "epoch": 2.7838976597796843, "grad_norm": 0.2868061363697052, "learning_rate": 1.4406822681354385e-06, "loss": 0.0326, "step": 54840 }, { "epoch": 2.784151479770547, "grad_norm": 0.35042792558670044, "learning_rate": 1.4389901348630219e-06, "loss": 0.0284, "step": 54845 }, { "epoch": 2.784405299761409, "grad_norm": 0.2201160043478012, "learning_rate": 1.4372980015906054e-06, "loss": 0.0221, "step": 54850 }, { "epoch": 2.7846591197522716, "grad_norm": 0.3119716942310333, "learning_rate": 1.435605868318189e-06, "loss": 0.0265, "step": 54855 }, { "epoch": 2.7849129397431343, "grad_norm": 0.4090033173561096, "learning_rate": 1.4339137350457725e-06, "loss": 0.0345, "step": 54860 }, { "epoch": 2.785166759733997, "grad_norm": 0.2550966143608093, "learning_rate": 1.4322216017733558e-06, "loss": 0.0311, "step": 54865 }, { "epoch": 2.785420579724859, "grad_norm": 0.27337196469306946, "learning_rate": 1.4305294685009394e-06, "loss": 0.0361, "step": 54870 }, { "epoch": 2.7856743997157216, "grad_norm": 0.26968151330947876, "learning_rate": 1.4288373352285225e-06, "loss": 0.0265, "step": 54875 }, { "epoch": 2.785928219706584, "grad_norm": 0.3220800757408142, "learning_rate": 1.427145201956106e-06, "loss": 0.0278, "step": 54880 }, { "epoch": 2.7861820396974464, "grad_norm": 0.2650590240955353, "learning_rate": 1.4254530686836896e-06, "loss": 0.0317, "step": 54885 }, { "epoch": 2.786435859688309, "grad_norm": 0.2614637017250061, "learning_rate": 1.4237609354112731e-06, "loss": 0.0273, "step": 54890 }, { "epoch": 2.7866896796791716, "grad_norm": 0.34072941541671753, "learning_rate": 1.4220688021388565e-06, "loss": 0.0249, "step": 54895 }, { "epoch": 2.786943499670034, "grad_norm": 0.2870447337627411, "learning_rate": 1.42037666886644e-06, "loss": 0.0274, "step": 54900 }, { "epoch": 2.7871973196608963, "grad_norm": 0.28836342692375183, "learning_rate": 1.4186845355940236e-06, "loss": 0.0328, "step": 54905 }, { "epoch": 2.787451139651759, "grad_norm": 0.4721055030822754, "learning_rate": 1.4169924023216069e-06, "loss": 0.0343, "step": 54910 }, { "epoch": 2.7877049596426215, "grad_norm": 0.23974347114562988, "learning_rate": 1.4153002690491904e-06, "loss": 0.027, "step": 54915 }, { "epoch": 2.7879587796334837, "grad_norm": 0.31404948234558105, "learning_rate": 1.413608135776774e-06, "loss": 0.0383, "step": 54920 }, { "epoch": 2.7882125996243463, "grad_norm": 0.26835718750953674, "learning_rate": 1.4119160025043573e-06, "loss": 0.0234, "step": 54925 }, { "epoch": 2.788466419615209, "grad_norm": 0.4135149419307709, "learning_rate": 1.4102238692319409e-06, "loss": 0.0293, "step": 54930 }, { "epoch": 2.7887202396060715, "grad_norm": 0.3677108883857727, "learning_rate": 1.4085317359595244e-06, "loss": 0.026, "step": 54935 }, { "epoch": 2.788974059596934, "grad_norm": 0.28418686985969543, "learning_rate": 1.406839602687108e-06, "loss": 0.0255, "step": 54940 }, { "epoch": 2.7892278795877963, "grad_norm": 0.369150847196579, "learning_rate": 1.4051474694146913e-06, "loss": 0.0335, "step": 54945 }, { "epoch": 2.789481699578659, "grad_norm": 0.23390887677669525, "learning_rate": 1.4034553361422748e-06, "loss": 0.0245, "step": 54950 }, { "epoch": 2.789735519569521, "grad_norm": 0.5765233039855957, "learning_rate": 1.401763202869858e-06, "loss": 0.026, "step": 54955 }, { "epoch": 2.7899893395603836, "grad_norm": 0.35066676139831543, "learning_rate": 1.4000710695974415e-06, "loss": 0.0316, "step": 54960 }, { "epoch": 2.7902431595512462, "grad_norm": 0.282090425491333, "learning_rate": 1.398378936325025e-06, "loss": 0.029, "step": 54965 }, { "epoch": 2.790496979542109, "grad_norm": 0.24391506612300873, "learning_rate": 1.3966868030526084e-06, "loss": 0.0289, "step": 54970 }, { "epoch": 2.7907507995329714, "grad_norm": 0.3046450912952423, "learning_rate": 1.394994669780192e-06, "loss": 0.0325, "step": 54975 }, { "epoch": 2.7910046195238336, "grad_norm": 0.2391621321439743, "learning_rate": 1.3933025365077755e-06, "loss": 0.0363, "step": 54980 }, { "epoch": 2.791258439514696, "grad_norm": 0.3334785997867584, "learning_rate": 1.391610403235359e-06, "loss": 0.0321, "step": 54985 }, { "epoch": 2.791512259505559, "grad_norm": 0.2553236782550812, "learning_rate": 1.3899182699629423e-06, "loss": 0.0232, "step": 54990 }, { "epoch": 2.791766079496421, "grad_norm": 0.2845880687236786, "learning_rate": 1.3882261366905259e-06, "loss": 0.0403, "step": 54995 }, { "epoch": 2.7920198994872836, "grad_norm": 0.23673295974731445, "learning_rate": 1.3865340034181094e-06, "loss": 0.0299, "step": 55000 }, { "epoch": 2.792273719478146, "grad_norm": 0.37878087162971497, "learning_rate": 1.3848418701456927e-06, "loss": 0.0454, "step": 55005 }, { "epoch": 2.792527539469009, "grad_norm": 0.3362523019313812, "learning_rate": 1.3831497368732763e-06, "loss": 0.0273, "step": 55010 }, { "epoch": 2.792781359459871, "grad_norm": 0.3414081335067749, "learning_rate": 1.3814576036008598e-06, "loss": 0.0353, "step": 55015 }, { "epoch": 2.7930351794507335, "grad_norm": 0.5782923102378845, "learning_rate": 1.3797654703284432e-06, "loss": 0.0391, "step": 55020 }, { "epoch": 2.793288999441596, "grad_norm": 0.33037516474723816, "learning_rate": 1.3780733370560267e-06, "loss": 0.0296, "step": 55025 }, { "epoch": 2.7935428194324583, "grad_norm": 0.3172570466995239, "learning_rate": 1.3763812037836103e-06, "loss": 0.0353, "step": 55030 }, { "epoch": 2.793796639423321, "grad_norm": 0.3240835964679718, "learning_rate": 1.3746890705111934e-06, "loss": 0.0338, "step": 55035 }, { "epoch": 2.7940504594141835, "grad_norm": 0.2889838218688965, "learning_rate": 1.372996937238777e-06, "loss": 0.0278, "step": 55040 }, { "epoch": 2.794304279405046, "grad_norm": 0.5404177308082581, "learning_rate": 1.3713048039663605e-06, "loss": 0.0267, "step": 55045 }, { "epoch": 2.7945580993959083, "grad_norm": 0.27731940150260925, "learning_rate": 1.3696126706939438e-06, "loss": 0.0269, "step": 55050 }, { "epoch": 2.794811919386771, "grad_norm": 0.46202221512794495, "learning_rate": 1.3679205374215273e-06, "loss": 0.0344, "step": 55055 }, { "epoch": 2.7950657393776335, "grad_norm": 0.4258192181587219, "learning_rate": 1.3662284041491109e-06, "loss": 0.0289, "step": 55060 }, { "epoch": 2.7953195593684956, "grad_norm": 0.574909508228302, "learning_rate": 1.3645362708766942e-06, "loss": 0.0306, "step": 55065 }, { "epoch": 2.7955733793593582, "grad_norm": 0.5580008625984192, "learning_rate": 1.3628441376042778e-06, "loss": 0.0283, "step": 55070 }, { "epoch": 2.795827199350221, "grad_norm": 0.24519678950309753, "learning_rate": 1.3611520043318613e-06, "loss": 0.0294, "step": 55075 }, { "epoch": 2.7960810193410834, "grad_norm": 0.351688414812088, "learning_rate": 1.3594598710594449e-06, "loss": 0.0326, "step": 55080 }, { "epoch": 2.796334839331946, "grad_norm": 0.544750452041626, "learning_rate": 1.3577677377870282e-06, "loss": 0.0308, "step": 55085 }, { "epoch": 2.796588659322808, "grad_norm": 0.2852821350097656, "learning_rate": 1.3560756045146117e-06, "loss": 0.0298, "step": 55090 }, { "epoch": 2.796842479313671, "grad_norm": 0.2739110589027405, "learning_rate": 1.3543834712421953e-06, "loss": 0.0313, "step": 55095 }, { "epoch": 2.797096299304533, "grad_norm": 0.26846620440483093, "learning_rate": 1.3526913379697786e-06, "loss": 0.0255, "step": 55100 }, { "epoch": 2.7973501192953956, "grad_norm": 0.3372206389904022, "learning_rate": 1.3509992046973622e-06, "loss": 0.0261, "step": 55105 }, { "epoch": 2.797603939286258, "grad_norm": 0.2873419523239136, "learning_rate": 1.3493070714249457e-06, "loss": 0.0322, "step": 55110 }, { "epoch": 2.7978577592771208, "grad_norm": 0.3785225749015808, "learning_rate": 1.347614938152529e-06, "loss": 0.0282, "step": 55115 }, { "epoch": 2.7981115792679834, "grad_norm": 0.3132956624031067, "learning_rate": 1.3459228048801124e-06, "loss": 0.0271, "step": 55120 }, { "epoch": 2.7983653992588455, "grad_norm": 0.22603264451026917, "learning_rate": 1.344230671607696e-06, "loss": 0.0328, "step": 55125 }, { "epoch": 2.798619219249708, "grad_norm": 0.2586395740509033, "learning_rate": 1.3425385383352792e-06, "loss": 0.0207, "step": 55130 }, { "epoch": 2.7988730392405707, "grad_norm": 0.8957403898239136, "learning_rate": 1.3408464050628628e-06, "loss": 0.0314, "step": 55135 }, { "epoch": 2.799126859231433, "grad_norm": 0.24698592722415924, "learning_rate": 1.3391542717904463e-06, "loss": 0.0292, "step": 55140 }, { "epoch": 2.7993806792222955, "grad_norm": 0.2640494704246521, "learning_rate": 1.3374621385180297e-06, "loss": 0.0289, "step": 55145 }, { "epoch": 2.799634499213158, "grad_norm": 0.2573885917663574, "learning_rate": 1.3357700052456132e-06, "loss": 0.0295, "step": 55150 }, { "epoch": 2.7998883192040207, "grad_norm": 0.2399040311574936, "learning_rate": 1.3340778719731968e-06, "loss": 0.028, "step": 55155 }, { "epoch": 2.800142139194883, "grad_norm": 0.41929957270622253, "learning_rate": 1.3323857387007803e-06, "loss": 0.0251, "step": 55160 }, { "epoch": 2.8003959591857455, "grad_norm": 0.40195733308792114, "learning_rate": 1.3306936054283636e-06, "loss": 0.022, "step": 55165 }, { "epoch": 2.800649779176608, "grad_norm": 0.31589677929878235, "learning_rate": 1.3290014721559472e-06, "loss": 0.0308, "step": 55170 }, { "epoch": 2.8009035991674702, "grad_norm": 0.3632115423679352, "learning_rate": 1.3273093388835307e-06, "loss": 0.0315, "step": 55175 }, { "epoch": 2.801157419158333, "grad_norm": 0.37809327244758606, "learning_rate": 1.325617205611114e-06, "loss": 0.0344, "step": 55180 }, { "epoch": 2.8014112391491954, "grad_norm": 0.3532311022281647, "learning_rate": 1.3239250723386976e-06, "loss": 0.0318, "step": 55185 }, { "epoch": 2.801665059140058, "grad_norm": 0.3482745885848999, "learning_rate": 1.3222329390662811e-06, "loss": 0.0354, "step": 55190 }, { "epoch": 2.80191887913092, "grad_norm": 0.3260227143764496, "learning_rate": 1.3205408057938645e-06, "loss": 0.0296, "step": 55195 }, { "epoch": 2.802172699121783, "grad_norm": 0.2941901385784149, "learning_rate": 1.3188486725214478e-06, "loss": 0.028, "step": 55200 }, { "epoch": 2.8024265191126454, "grad_norm": 0.3940282464027405, "learning_rate": 1.3171565392490314e-06, "loss": 0.0374, "step": 55205 }, { "epoch": 2.8026803391035076, "grad_norm": 0.2064363658428192, "learning_rate": 1.3154644059766147e-06, "loss": 0.027, "step": 55210 }, { "epoch": 2.80293415909437, "grad_norm": 0.303488165140152, "learning_rate": 1.3137722727041982e-06, "loss": 0.0314, "step": 55215 }, { "epoch": 2.8031879790852328, "grad_norm": 0.49318596720695496, "learning_rate": 1.3120801394317818e-06, "loss": 0.0298, "step": 55220 }, { "epoch": 2.8034417990760954, "grad_norm": 0.2664399743080139, "learning_rate": 1.3103880061593651e-06, "loss": 0.0303, "step": 55225 }, { "epoch": 2.803695619066958, "grad_norm": 0.2834978699684143, "learning_rate": 1.3086958728869487e-06, "loss": 0.0252, "step": 55230 }, { "epoch": 2.80394943905782, "grad_norm": 0.3490089178085327, "learning_rate": 1.3070037396145322e-06, "loss": 0.0221, "step": 55235 }, { "epoch": 2.8042032590486827, "grad_norm": 0.4803531765937805, "learning_rate": 1.3053116063421155e-06, "loss": 0.0222, "step": 55240 }, { "epoch": 2.804457079039545, "grad_norm": 0.4122765064239502, "learning_rate": 1.303619473069699e-06, "loss": 0.0318, "step": 55245 }, { "epoch": 2.8047108990304075, "grad_norm": 0.4039084315299988, "learning_rate": 1.3019273397972826e-06, "loss": 0.0321, "step": 55250 }, { "epoch": 2.80496471902127, "grad_norm": 0.32391849160194397, "learning_rate": 1.3002352065248662e-06, "loss": 0.0344, "step": 55255 }, { "epoch": 2.8052185390121327, "grad_norm": 0.3236011564731598, "learning_rate": 1.2985430732524495e-06, "loss": 0.0245, "step": 55260 }, { "epoch": 2.8054723590029953, "grad_norm": 0.41068461537361145, "learning_rate": 1.296850939980033e-06, "loss": 0.0278, "step": 55265 }, { "epoch": 2.8057261789938575, "grad_norm": 0.2617715895175934, "learning_rate": 1.2951588067076166e-06, "loss": 0.0337, "step": 55270 }, { "epoch": 2.80597999898472, "grad_norm": 0.32993102073669434, "learning_rate": 1.2934666734352e-06, "loss": 0.0315, "step": 55275 }, { "epoch": 2.8062338189755827, "grad_norm": 0.29497578740119934, "learning_rate": 1.2917745401627833e-06, "loss": 0.0282, "step": 55280 }, { "epoch": 2.806487638966445, "grad_norm": 0.3863518238067627, "learning_rate": 1.2900824068903666e-06, "loss": 0.0311, "step": 55285 }, { "epoch": 2.8067414589573074, "grad_norm": 0.29784294962882996, "learning_rate": 1.2883902736179501e-06, "loss": 0.0233, "step": 55290 }, { "epoch": 2.80699527894817, "grad_norm": 0.2937968671321869, "learning_rate": 1.2866981403455337e-06, "loss": 0.0351, "step": 55295 }, { "epoch": 2.8072490989390326, "grad_norm": 0.3985872268676758, "learning_rate": 1.2850060070731172e-06, "loss": 0.0356, "step": 55300 }, { "epoch": 2.807502918929895, "grad_norm": 0.3850856423377991, "learning_rate": 1.2833138738007006e-06, "loss": 0.0315, "step": 55305 }, { "epoch": 2.8077567389207574, "grad_norm": 0.3013561964035034, "learning_rate": 1.281621740528284e-06, "loss": 0.0286, "step": 55310 }, { "epoch": 2.80801055891162, "grad_norm": 0.40981554985046387, "learning_rate": 1.2799296072558676e-06, "loss": 0.0333, "step": 55315 }, { "epoch": 2.808264378902482, "grad_norm": 0.2424926608800888, "learning_rate": 1.278237473983451e-06, "loss": 0.0323, "step": 55320 }, { "epoch": 2.8085181988933448, "grad_norm": 0.4015446901321411, "learning_rate": 1.2765453407110345e-06, "loss": 0.0266, "step": 55325 }, { "epoch": 2.8087720188842074, "grad_norm": 0.7582650780677795, "learning_rate": 1.274853207438618e-06, "loss": 0.0392, "step": 55330 }, { "epoch": 2.80902583887507, "grad_norm": 1.0067224502563477, "learning_rate": 1.2731610741662014e-06, "loss": 0.0278, "step": 55335 }, { "epoch": 2.809279658865932, "grad_norm": 0.27800679206848145, "learning_rate": 1.271468940893785e-06, "loss": 0.0315, "step": 55340 }, { "epoch": 2.8095334788567947, "grad_norm": 0.5860305428504944, "learning_rate": 1.2697768076213685e-06, "loss": 0.0311, "step": 55345 }, { "epoch": 2.8097872988476573, "grad_norm": 0.34320294857025146, "learning_rate": 1.268084674348952e-06, "loss": 0.0297, "step": 55350 }, { "epoch": 2.8100411188385195, "grad_norm": 0.3073110580444336, "learning_rate": 1.2663925410765354e-06, "loss": 0.0262, "step": 55355 }, { "epoch": 2.810294938829382, "grad_norm": 0.6585887670516968, "learning_rate": 1.2647004078041187e-06, "loss": 0.0306, "step": 55360 }, { "epoch": 2.8105487588202447, "grad_norm": 0.9610297083854675, "learning_rate": 1.263008274531702e-06, "loss": 0.0327, "step": 55365 }, { "epoch": 2.8108025788111073, "grad_norm": 0.2548528015613556, "learning_rate": 1.2613161412592856e-06, "loss": 0.0327, "step": 55370 }, { "epoch": 2.81105639880197, "grad_norm": 0.4964401423931122, "learning_rate": 1.2596240079868691e-06, "loss": 0.0359, "step": 55375 }, { "epoch": 2.811310218792832, "grad_norm": 0.27559715509414673, "learning_rate": 1.2579318747144524e-06, "loss": 0.0302, "step": 55380 }, { "epoch": 2.8115640387836947, "grad_norm": 0.6710149645805359, "learning_rate": 1.256239741442036e-06, "loss": 0.0304, "step": 55385 }, { "epoch": 2.811817858774557, "grad_norm": 1.0265967845916748, "learning_rate": 1.2545476081696195e-06, "loss": 0.029, "step": 55390 }, { "epoch": 2.8120716787654194, "grad_norm": 0.46441468596458435, "learning_rate": 1.252855474897203e-06, "loss": 0.0356, "step": 55395 }, { "epoch": 2.812325498756282, "grad_norm": 0.21976347267627716, "learning_rate": 1.2511633416247864e-06, "loss": 0.0228, "step": 55400 }, { "epoch": 2.8125793187471446, "grad_norm": 0.7399079203605652, "learning_rate": 1.24947120835237e-06, "loss": 0.0308, "step": 55405 }, { "epoch": 2.812833138738007, "grad_norm": 0.38101959228515625, "learning_rate": 1.2477790750799535e-06, "loss": 0.0259, "step": 55410 }, { "epoch": 2.8130869587288694, "grad_norm": 0.30995088815689087, "learning_rate": 1.2460869418075368e-06, "loss": 0.0305, "step": 55415 }, { "epoch": 2.813340778719732, "grad_norm": 0.4461381137371063, "learning_rate": 1.2443948085351202e-06, "loss": 0.0239, "step": 55420 }, { "epoch": 2.8135945987105946, "grad_norm": 0.33550941944122314, "learning_rate": 1.2427026752627037e-06, "loss": 0.0292, "step": 55425 }, { "epoch": 2.8138484187014567, "grad_norm": 0.25769856572151184, "learning_rate": 1.2410105419902873e-06, "loss": 0.0264, "step": 55430 }, { "epoch": 2.8141022386923193, "grad_norm": 0.39704206585884094, "learning_rate": 1.2393184087178708e-06, "loss": 0.0312, "step": 55435 }, { "epoch": 2.814356058683182, "grad_norm": 0.4254438877105713, "learning_rate": 1.2376262754454541e-06, "loss": 0.0329, "step": 55440 }, { "epoch": 2.8146098786740446, "grad_norm": 0.2788540720939636, "learning_rate": 1.2359341421730377e-06, "loss": 0.0289, "step": 55445 }, { "epoch": 2.8148636986649067, "grad_norm": 0.37466317415237427, "learning_rate": 1.2342420089006212e-06, "loss": 0.0301, "step": 55450 }, { "epoch": 2.8151175186557693, "grad_norm": 0.2767610251903534, "learning_rate": 1.2325498756282046e-06, "loss": 0.0343, "step": 55455 }, { "epoch": 2.815371338646632, "grad_norm": 0.30491724610328674, "learning_rate": 1.2308577423557879e-06, "loss": 0.0295, "step": 55460 }, { "epoch": 2.815625158637494, "grad_norm": 0.3644537031650543, "learning_rate": 1.2291656090833714e-06, "loss": 0.027, "step": 55465 }, { "epoch": 2.8158789786283567, "grad_norm": 0.5963749885559082, "learning_rate": 1.227473475810955e-06, "loss": 0.0304, "step": 55470 }, { "epoch": 2.8161327986192193, "grad_norm": 0.31018519401550293, "learning_rate": 1.2257813425385385e-06, "loss": 0.0256, "step": 55475 }, { "epoch": 2.816386618610082, "grad_norm": 0.311709463596344, "learning_rate": 1.2240892092661219e-06, "loss": 0.0264, "step": 55480 }, { "epoch": 2.8166404386009445, "grad_norm": 0.4451717734336853, "learning_rate": 1.2223970759937054e-06, "loss": 0.0304, "step": 55485 }, { "epoch": 2.8168942585918066, "grad_norm": 0.3190285265445709, "learning_rate": 1.220704942721289e-06, "loss": 0.0262, "step": 55490 }, { "epoch": 2.8171480785826692, "grad_norm": 0.29641619324684143, "learning_rate": 1.2190128094488723e-06, "loss": 0.0332, "step": 55495 }, { "epoch": 2.8174018985735314, "grad_norm": 0.3630962371826172, "learning_rate": 1.2173206761764556e-06, "loss": 0.0196, "step": 55500 }, { "epoch": 2.817655718564394, "grad_norm": 0.26542800664901733, "learning_rate": 1.2156285429040392e-06, "loss": 0.0379, "step": 55505 }, { "epoch": 2.8179095385552566, "grad_norm": 0.45730018615722656, "learning_rate": 1.2139364096316227e-06, "loss": 0.0365, "step": 55510 }, { "epoch": 2.818163358546119, "grad_norm": 0.3015592694282532, "learning_rate": 1.212244276359206e-06, "loss": 0.0382, "step": 55515 }, { "epoch": 2.818417178536982, "grad_norm": 0.2231670320034027, "learning_rate": 1.2105521430867896e-06, "loss": 0.0276, "step": 55520 }, { "epoch": 2.818670998527844, "grad_norm": 0.3285776674747467, "learning_rate": 1.2088600098143731e-06, "loss": 0.0272, "step": 55525 }, { "epoch": 2.8189248185187066, "grad_norm": 0.23628677427768707, "learning_rate": 1.2071678765419567e-06, "loss": 0.0328, "step": 55530 }, { "epoch": 2.819178638509569, "grad_norm": 0.5668171644210815, "learning_rate": 1.20547574326954e-06, "loss": 0.0319, "step": 55535 }, { "epoch": 2.8194324585004313, "grad_norm": 0.3623851537704468, "learning_rate": 1.2037836099971233e-06, "loss": 0.0282, "step": 55540 }, { "epoch": 2.819686278491294, "grad_norm": 0.2985495626926422, "learning_rate": 1.2020914767247069e-06, "loss": 0.0288, "step": 55545 }, { "epoch": 2.8199400984821565, "grad_norm": 0.32171109318733215, "learning_rate": 1.2003993434522904e-06, "loss": 0.03, "step": 55550 }, { "epoch": 2.820193918473019, "grad_norm": 0.3371548056602478, "learning_rate": 1.1987072101798738e-06, "loss": 0.0345, "step": 55555 }, { "epoch": 2.8204477384638813, "grad_norm": 0.3954242467880249, "learning_rate": 1.1970150769074573e-06, "loss": 0.0306, "step": 55560 }, { "epoch": 2.820701558454744, "grad_norm": 0.34994304180145264, "learning_rate": 1.1953229436350408e-06, "loss": 0.0276, "step": 55565 }, { "epoch": 2.8209553784456065, "grad_norm": 0.34854093194007874, "learning_rate": 1.1936308103626244e-06, "loss": 0.031, "step": 55570 }, { "epoch": 2.8212091984364687, "grad_norm": 0.3053123652935028, "learning_rate": 1.1919386770902077e-06, "loss": 0.0298, "step": 55575 }, { "epoch": 2.8214630184273313, "grad_norm": 0.3606565296649933, "learning_rate": 1.1902465438177913e-06, "loss": 0.0282, "step": 55580 }, { "epoch": 2.821716838418194, "grad_norm": 0.3627634048461914, "learning_rate": 1.1885544105453746e-06, "loss": 0.031, "step": 55585 }, { "epoch": 2.8219706584090565, "grad_norm": 0.30210357904434204, "learning_rate": 1.1868622772729581e-06, "loss": 0.0233, "step": 55590 }, { "epoch": 2.8222244783999186, "grad_norm": 0.318068265914917, "learning_rate": 1.1851701440005415e-06, "loss": 0.0287, "step": 55595 }, { "epoch": 2.8224782983907812, "grad_norm": 0.24310652911663055, "learning_rate": 1.183478010728125e-06, "loss": 0.0317, "step": 55600 }, { "epoch": 2.822732118381644, "grad_norm": 0.33054736256599426, "learning_rate": 1.1817858774557086e-06, "loss": 0.0297, "step": 55605 }, { "epoch": 2.822985938372506, "grad_norm": 0.33596691489219666, "learning_rate": 1.1800937441832921e-06, "loss": 0.0331, "step": 55610 }, { "epoch": 2.8232397583633686, "grad_norm": 0.3044825792312622, "learning_rate": 1.1784016109108754e-06, "loss": 0.0281, "step": 55615 }, { "epoch": 2.823493578354231, "grad_norm": 0.3748861253261566, "learning_rate": 1.176709477638459e-06, "loss": 0.0272, "step": 55620 }, { "epoch": 2.823747398345094, "grad_norm": 0.28317007422447205, "learning_rate": 1.1750173443660423e-06, "loss": 0.027, "step": 55625 }, { "epoch": 2.8240012183359564, "grad_norm": 0.3502991795539856, "learning_rate": 1.1733252110936259e-06, "loss": 0.0318, "step": 55630 }, { "epoch": 2.8242550383268186, "grad_norm": 0.42141517996788025, "learning_rate": 1.1716330778212092e-06, "loss": 0.0389, "step": 55635 }, { "epoch": 2.824508858317681, "grad_norm": 0.30312493443489075, "learning_rate": 1.1699409445487927e-06, "loss": 0.0276, "step": 55640 }, { "epoch": 2.8247626783085433, "grad_norm": 0.34598830342292786, "learning_rate": 1.1682488112763763e-06, "loss": 0.0298, "step": 55645 }, { "epoch": 2.825016498299406, "grad_norm": 0.5238077640533447, "learning_rate": 1.1665566780039596e-06, "loss": 0.0283, "step": 55650 }, { "epoch": 2.8252703182902685, "grad_norm": 0.2969505190849304, "learning_rate": 1.1648645447315432e-06, "loss": 0.0256, "step": 55655 }, { "epoch": 2.825524138281131, "grad_norm": 0.18817229568958282, "learning_rate": 1.1631724114591267e-06, "loss": 0.0243, "step": 55660 }, { "epoch": 2.8257779582719937, "grad_norm": 0.2759462594985962, "learning_rate": 1.16148027818671e-06, "loss": 0.0291, "step": 55665 }, { "epoch": 2.826031778262856, "grad_norm": 0.26524046063423157, "learning_rate": 1.1597881449142936e-06, "loss": 0.0374, "step": 55670 }, { "epoch": 2.8262855982537185, "grad_norm": 0.2896076440811157, "learning_rate": 1.158096011641877e-06, "loss": 0.0283, "step": 55675 }, { "epoch": 2.826539418244581, "grad_norm": 0.598273754119873, "learning_rate": 1.1564038783694605e-06, "loss": 0.0348, "step": 55680 }, { "epoch": 2.8267932382354433, "grad_norm": 0.38251566886901855, "learning_rate": 1.154711745097044e-06, "loss": 0.0281, "step": 55685 }, { "epoch": 2.827047058226306, "grad_norm": 0.32450079917907715, "learning_rate": 1.1530196118246273e-06, "loss": 0.0327, "step": 55690 }, { "epoch": 2.8273008782171685, "grad_norm": 0.24178165197372437, "learning_rate": 1.1513274785522109e-06, "loss": 0.0259, "step": 55695 }, { "epoch": 2.827554698208031, "grad_norm": 0.4267941117286682, "learning_rate": 1.1496353452797944e-06, "loss": 0.0364, "step": 55700 }, { "epoch": 2.8278085181988932, "grad_norm": 0.2857091426849365, "learning_rate": 1.1479432120073778e-06, "loss": 0.0295, "step": 55705 }, { "epoch": 2.828062338189756, "grad_norm": 0.24375779926776886, "learning_rate": 1.1462510787349613e-06, "loss": 0.0227, "step": 55710 }, { "epoch": 2.8283161581806184, "grad_norm": 0.3215489685535431, "learning_rate": 1.1445589454625446e-06, "loss": 0.0341, "step": 55715 }, { "epoch": 2.8285699781714806, "grad_norm": 0.23902076482772827, "learning_rate": 1.1428668121901282e-06, "loss": 0.0302, "step": 55720 }, { "epoch": 2.828823798162343, "grad_norm": 0.3301446735858917, "learning_rate": 1.1411746789177117e-06, "loss": 0.0301, "step": 55725 }, { "epoch": 2.829077618153206, "grad_norm": 0.516697108745575, "learning_rate": 1.139482545645295e-06, "loss": 0.0381, "step": 55730 }, { "epoch": 2.8293314381440684, "grad_norm": 0.30288365483283997, "learning_rate": 1.1377904123728786e-06, "loss": 0.0317, "step": 55735 }, { "epoch": 2.8295852581349306, "grad_norm": 0.42075833678245544, "learning_rate": 1.1360982791004622e-06, "loss": 0.0278, "step": 55740 }, { "epoch": 2.829839078125793, "grad_norm": 0.5544710755348206, "learning_rate": 1.1344061458280455e-06, "loss": 0.0245, "step": 55745 }, { "epoch": 2.8300928981166558, "grad_norm": 0.5366796851158142, "learning_rate": 1.132714012555629e-06, "loss": 0.0281, "step": 55750 }, { "epoch": 2.830346718107518, "grad_norm": 0.2638997435569763, "learning_rate": 1.1310218792832124e-06, "loss": 0.0318, "step": 55755 }, { "epoch": 2.8306005380983805, "grad_norm": 0.449851393699646, "learning_rate": 1.129329746010796e-06, "loss": 0.0354, "step": 55760 }, { "epoch": 2.830854358089243, "grad_norm": 0.4421868324279785, "learning_rate": 1.1276376127383794e-06, "loss": 0.0262, "step": 55765 }, { "epoch": 2.8311081780801057, "grad_norm": 0.18787184357643127, "learning_rate": 1.1259454794659628e-06, "loss": 0.0308, "step": 55770 }, { "epoch": 2.8313619980709683, "grad_norm": 0.7075026631355286, "learning_rate": 1.1242533461935463e-06, "loss": 0.0309, "step": 55775 }, { "epoch": 2.8316158180618305, "grad_norm": 0.17095202207565308, "learning_rate": 1.1225612129211299e-06, "loss": 0.0243, "step": 55780 }, { "epoch": 2.831869638052693, "grad_norm": 0.38210830092430115, "learning_rate": 1.1208690796487132e-06, "loss": 0.0353, "step": 55785 }, { "epoch": 2.8321234580435553, "grad_norm": 0.23966944217681885, "learning_rate": 1.1191769463762967e-06, "loss": 0.0292, "step": 55790 }, { "epoch": 2.832377278034418, "grad_norm": 0.18793806433677673, "learning_rate": 1.11748481310388e-06, "loss": 0.0326, "step": 55795 }, { "epoch": 2.8326310980252805, "grad_norm": 0.652092695236206, "learning_rate": 1.1157926798314636e-06, "loss": 0.0278, "step": 55800 }, { "epoch": 2.832884918016143, "grad_norm": 0.30326351523399353, "learning_rate": 1.1141005465590472e-06, "loss": 0.0267, "step": 55805 }, { "epoch": 2.8331387380070057, "grad_norm": 0.34887462854385376, "learning_rate": 1.1124084132866305e-06, "loss": 0.0313, "step": 55810 }, { "epoch": 2.833392557997868, "grad_norm": 0.48567840456962585, "learning_rate": 1.110716280014214e-06, "loss": 0.0318, "step": 55815 }, { "epoch": 2.8336463779887304, "grad_norm": 0.22675849497318268, "learning_rate": 1.1090241467417976e-06, "loss": 0.0284, "step": 55820 }, { "epoch": 2.833900197979593, "grad_norm": 0.36345478892326355, "learning_rate": 1.107332013469381e-06, "loss": 0.028, "step": 55825 }, { "epoch": 2.834154017970455, "grad_norm": 0.24584625661373138, "learning_rate": 1.1056398801969643e-06, "loss": 0.0317, "step": 55830 }, { "epoch": 2.834407837961318, "grad_norm": 0.18756209313869476, "learning_rate": 1.1039477469245478e-06, "loss": 0.0279, "step": 55835 }, { "epoch": 2.8346616579521804, "grad_norm": 0.2258630394935608, "learning_rate": 1.1022556136521313e-06, "loss": 0.0256, "step": 55840 }, { "epoch": 2.834915477943043, "grad_norm": 0.39259475469589233, "learning_rate": 1.1005634803797149e-06, "loss": 0.0267, "step": 55845 }, { "epoch": 2.835169297933905, "grad_norm": 0.43295443058013916, "learning_rate": 1.0988713471072982e-06, "loss": 0.0394, "step": 55850 }, { "epoch": 2.8354231179247678, "grad_norm": 0.5019866228103638, "learning_rate": 1.0971792138348818e-06, "loss": 0.0359, "step": 55855 }, { "epoch": 2.8356769379156304, "grad_norm": 0.3963499069213867, "learning_rate": 1.0954870805624653e-06, "loss": 0.0316, "step": 55860 }, { "epoch": 2.8359307579064925, "grad_norm": 0.5169487595558167, "learning_rate": 1.0937949472900486e-06, "loss": 0.0368, "step": 55865 }, { "epoch": 2.836184577897355, "grad_norm": 0.3360297679901123, "learning_rate": 1.092102814017632e-06, "loss": 0.0338, "step": 55870 }, { "epoch": 2.8364383978882177, "grad_norm": 0.3319953680038452, "learning_rate": 1.0904106807452155e-06, "loss": 0.0289, "step": 55875 }, { "epoch": 2.8366922178790803, "grad_norm": 0.24195292592048645, "learning_rate": 1.088718547472799e-06, "loss": 0.0266, "step": 55880 }, { "epoch": 2.8369460378699425, "grad_norm": 0.327484130859375, "learning_rate": 1.0870264142003826e-06, "loss": 0.0274, "step": 55885 }, { "epoch": 2.837199857860805, "grad_norm": 0.3096347451210022, "learning_rate": 1.085334280927966e-06, "loss": 0.0255, "step": 55890 }, { "epoch": 2.8374536778516677, "grad_norm": 0.2832336723804474, "learning_rate": 1.0836421476555495e-06, "loss": 0.0422, "step": 55895 }, { "epoch": 2.83770749784253, "grad_norm": 0.38523250818252563, "learning_rate": 1.081950014383133e-06, "loss": 0.0344, "step": 55900 }, { "epoch": 2.8379613178333924, "grad_norm": 0.331540584564209, "learning_rate": 1.0802578811107164e-06, "loss": 0.0305, "step": 55905 }, { "epoch": 2.838215137824255, "grad_norm": 0.25240784883499146, "learning_rate": 1.0785657478382997e-06, "loss": 0.0321, "step": 55910 }, { "epoch": 2.8384689578151177, "grad_norm": 0.26983770728111267, "learning_rate": 1.0768736145658832e-06, "loss": 0.027, "step": 55915 }, { "epoch": 2.8387227778059803, "grad_norm": 0.30436670780181885, "learning_rate": 1.0751814812934668e-06, "loss": 0.0244, "step": 55920 }, { "epoch": 2.8389765977968424, "grad_norm": 0.2570286989212036, "learning_rate": 1.0734893480210503e-06, "loss": 0.0286, "step": 55925 }, { "epoch": 2.839230417787705, "grad_norm": 0.3244872987270355, "learning_rate": 1.0717972147486337e-06, "loss": 0.0287, "step": 55930 }, { "epoch": 2.839484237778567, "grad_norm": 0.2520430088043213, "learning_rate": 1.0701050814762172e-06, "loss": 0.0339, "step": 55935 }, { "epoch": 2.83973805776943, "grad_norm": 0.2672327160835266, "learning_rate": 1.0684129482038008e-06, "loss": 0.0219, "step": 55940 }, { "epoch": 2.8399918777602924, "grad_norm": 0.2933124899864197, "learning_rate": 1.066720814931384e-06, "loss": 0.0357, "step": 55945 }, { "epoch": 2.840245697751155, "grad_norm": 0.43850472569465637, "learning_rate": 1.0650286816589674e-06, "loss": 0.0321, "step": 55950 }, { "epoch": 2.8404995177420176, "grad_norm": 0.412503719329834, "learning_rate": 1.063336548386551e-06, "loss": 0.0319, "step": 55955 }, { "epoch": 2.8407533377328797, "grad_norm": 0.3230607509613037, "learning_rate": 1.0616444151141345e-06, "loss": 0.0291, "step": 55960 }, { "epoch": 2.8410071577237423, "grad_norm": 0.25596120953559875, "learning_rate": 1.0599522818417178e-06, "loss": 0.0347, "step": 55965 }, { "epoch": 2.841260977714605, "grad_norm": 0.2851473093032837, "learning_rate": 1.0582601485693014e-06, "loss": 0.0307, "step": 55970 }, { "epoch": 2.841514797705467, "grad_norm": 0.3201811611652374, "learning_rate": 1.056568015296885e-06, "loss": 0.0296, "step": 55975 }, { "epoch": 2.8417686176963297, "grad_norm": 0.3185413181781769, "learning_rate": 1.0548758820244685e-06, "loss": 0.0304, "step": 55980 }, { "epoch": 2.8420224376871923, "grad_norm": 0.3229440748691559, "learning_rate": 1.0531837487520518e-06, "loss": 0.0314, "step": 55985 }, { "epoch": 2.842276257678055, "grad_norm": 0.39506053924560547, "learning_rate": 1.0514916154796351e-06, "loss": 0.0296, "step": 55990 }, { "epoch": 2.842530077668917, "grad_norm": 0.37298551201820374, "learning_rate": 1.0497994822072187e-06, "loss": 0.0321, "step": 55995 }, { "epoch": 2.8427838976597797, "grad_norm": 0.4710085690021515, "learning_rate": 1.0481073489348022e-06, "loss": 0.0377, "step": 56000 }, { "epoch": 2.8430377176506423, "grad_norm": 0.2344180792570114, "learning_rate": 1.0464152156623856e-06, "loss": 0.034, "step": 56005 }, { "epoch": 2.8432915376415044, "grad_norm": 0.42132675647735596, "learning_rate": 1.0447230823899691e-06, "loss": 0.0272, "step": 56010 }, { "epoch": 2.843545357632367, "grad_norm": 0.29779860377311707, "learning_rate": 1.0430309491175527e-06, "loss": 0.0289, "step": 56015 }, { "epoch": 2.8437991776232296, "grad_norm": 0.1912485510110855, "learning_rate": 1.0413388158451362e-06, "loss": 0.025, "step": 56020 }, { "epoch": 2.8440529976140922, "grad_norm": 0.33178281784057617, "learning_rate": 1.0396466825727195e-06, "loss": 0.029, "step": 56025 }, { "epoch": 2.8443068176049544, "grad_norm": 0.28970810770988464, "learning_rate": 1.0379545493003029e-06, "loss": 0.0282, "step": 56030 }, { "epoch": 2.844560637595817, "grad_norm": 0.3231714069843292, "learning_rate": 1.0362624160278864e-06, "loss": 0.0353, "step": 56035 }, { "epoch": 2.8448144575866796, "grad_norm": 0.4571632742881775, "learning_rate": 1.03457028275547e-06, "loss": 0.0315, "step": 56040 }, { "epoch": 2.8450682775775418, "grad_norm": 0.31172847747802734, "learning_rate": 1.0328781494830533e-06, "loss": 0.036, "step": 56045 }, { "epoch": 2.8453220975684044, "grad_norm": 0.5730637907981873, "learning_rate": 1.0311860162106368e-06, "loss": 0.0227, "step": 56050 }, { "epoch": 2.845575917559267, "grad_norm": 0.3472713232040405, "learning_rate": 1.0294938829382204e-06, "loss": 0.0233, "step": 56055 }, { "epoch": 2.8458297375501296, "grad_norm": 0.554363489151001, "learning_rate": 1.027801749665804e-06, "loss": 0.0312, "step": 56060 }, { "epoch": 2.846083557540992, "grad_norm": 0.38508570194244385, "learning_rate": 1.0261096163933873e-06, "loss": 0.0311, "step": 56065 }, { "epoch": 2.8463373775318543, "grad_norm": 0.3509955108165741, "learning_rate": 1.0244174831209706e-06, "loss": 0.0279, "step": 56070 }, { "epoch": 2.846591197522717, "grad_norm": 0.2738310992717743, "learning_rate": 1.0227253498485541e-06, "loss": 0.0352, "step": 56075 }, { "epoch": 2.846845017513579, "grad_norm": 0.2927628755569458, "learning_rate": 1.0210332165761377e-06, "loss": 0.028, "step": 56080 }, { "epoch": 2.8470988375044417, "grad_norm": 0.3419632017612457, "learning_rate": 1.019341083303721e-06, "loss": 0.0321, "step": 56085 }, { "epoch": 2.8473526574953043, "grad_norm": 0.26105278730392456, "learning_rate": 1.0176489500313046e-06, "loss": 0.0235, "step": 56090 }, { "epoch": 2.847606477486167, "grad_norm": 0.35791948437690735, "learning_rate": 1.015956816758888e-06, "loss": 0.0211, "step": 56095 }, { "epoch": 2.8478602974770295, "grad_norm": 0.4351104199886322, "learning_rate": 1.0142646834864714e-06, "loss": 0.0249, "step": 56100 }, { "epoch": 2.8481141174678917, "grad_norm": 0.31804075837135315, "learning_rate": 1.012572550214055e-06, "loss": 0.0288, "step": 56105 }, { "epoch": 2.8483679374587543, "grad_norm": 0.3467938005924225, "learning_rate": 1.0108804169416383e-06, "loss": 0.033, "step": 56110 }, { "epoch": 2.848621757449617, "grad_norm": 0.2748010754585266, "learning_rate": 1.0091882836692218e-06, "loss": 0.0314, "step": 56115 }, { "epoch": 2.848875577440479, "grad_norm": 0.23437300324440002, "learning_rate": 1.0074961503968054e-06, "loss": 0.0271, "step": 56120 }, { "epoch": 2.8491293974313416, "grad_norm": 0.3459556996822357, "learning_rate": 1.0058040171243887e-06, "loss": 0.0329, "step": 56125 }, { "epoch": 2.8493832174222042, "grad_norm": 0.33637312054634094, "learning_rate": 1.0041118838519723e-06, "loss": 0.0289, "step": 56130 }, { "epoch": 2.849637037413067, "grad_norm": 0.27937188744544983, "learning_rate": 1.0024197505795558e-06, "loss": 0.0321, "step": 56135 }, { "epoch": 2.849890857403929, "grad_norm": 0.21973158419132233, "learning_rate": 1.0007276173071391e-06, "loss": 0.0301, "step": 56140 }, { "epoch": 2.8501446773947916, "grad_norm": 0.5233383774757385, "learning_rate": 9.990354840347227e-07, "loss": 0.0283, "step": 56145 }, { "epoch": 2.850398497385654, "grad_norm": 0.3020778000354767, "learning_rate": 9.97343350762306e-07, "loss": 0.0265, "step": 56150 }, { "epoch": 2.8506523173765164, "grad_norm": 0.7818123698234558, "learning_rate": 9.956512174898896e-07, "loss": 0.0327, "step": 56155 }, { "epoch": 2.850906137367379, "grad_norm": 0.41146236658096313, "learning_rate": 9.939590842174731e-07, "loss": 0.0399, "step": 56160 }, { "epoch": 2.8511599573582416, "grad_norm": 0.3017003536224365, "learning_rate": 9.922669509450564e-07, "loss": 0.0287, "step": 56165 }, { "epoch": 2.851413777349104, "grad_norm": 0.2691115438938141, "learning_rate": 9.9057481767264e-07, "loss": 0.0261, "step": 56170 }, { "epoch": 2.8516675973399663, "grad_norm": 0.4630514979362488, "learning_rate": 9.888826844002235e-07, "loss": 0.0304, "step": 56175 }, { "epoch": 2.851921417330829, "grad_norm": 0.4147006869316101, "learning_rate": 9.871905511278069e-07, "loss": 0.0279, "step": 56180 }, { "epoch": 2.8521752373216915, "grad_norm": 0.3111046254634857, "learning_rate": 9.854984178553904e-07, "loss": 0.0283, "step": 56185 }, { "epoch": 2.8524290573125537, "grad_norm": 0.42208629846572876, "learning_rate": 9.838062845829737e-07, "loss": 0.0317, "step": 56190 }, { "epoch": 2.8526828773034163, "grad_norm": 0.3102574348449707, "learning_rate": 9.821141513105573e-07, "loss": 0.0269, "step": 56195 }, { "epoch": 2.852936697294279, "grad_norm": 0.29505234956741333, "learning_rate": 9.804220180381408e-07, "loss": 0.0266, "step": 56200 }, { "epoch": 2.8531905172851415, "grad_norm": 0.2698964476585388, "learning_rate": 9.787298847657242e-07, "loss": 0.031, "step": 56205 }, { "epoch": 2.853444337276004, "grad_norm": 0.722200334072113, "learning_rate": 9.770377514933077e-07, "loss": 0.0329, "step": 56210 }, { "epoch": 2.8536981572668663, "grad_norm": 0.41778144240379333, "learning_rate": 9.753456182208913e-07, "loss": 0.0313, "step": 56215 }, { "epoch": 2.853951977257729, "grad_norm": 0.33847302198410034, "learning_rate": 9.736534849484746e-07, "loss": 0.0264, "step": 56220 }, { "epoch": 2.854205797248591, "grad_norm": 0.36504846811294556, "learning_rate": 9.719613516760581e-07, "loss": 0.0334, "step": 56225 }, { "epoch": 2.8544596172394536, "grad_norm": 0.3869740664958954, "learning_rate": 9.702692184036415e-07, "loss": 0.0328, "step": 56230 }, { "epoch": 2.8547134372303162, "grad_norm": 0.41756024956703186, "learning_rate": 9.68577085131225e-07, "loss": 0.0298, "step": 56235 }, { "epoch": 2.854967257221179, "grad_norm": 0.3898419439792633, "learning_rate": 9.668849518588086e-07, "loss": 0.0318, "step": 56240 }, { "epoch": 2.8552210772120414, "grad_norm": 0.2647220194339752, "learning_rate": 9.651928185863919e-07, "loss": 0.0311, "step": 56245 }, { "epoch": 2.8554748972029036, "grad_norm": 0.4688816964626312, "learning_rate": 9.635006853139754e-07, "loss": 0.0319, "step": 56250 }, { "epoch": 2.855728717193766, "grad_norm": 0.36906033754348755, "learning_rate": 9.61808552041559e-07, "loss": 0.0275, "step": 56255 }, { "epoch": 2.855982537184629, "grad_norm": 0.4400111138820648, "learning_rate": 9.601164187691423e-07, "loss": 0.0286, "step": 56260 }, { "epoch": 2.856236357175491, "grad_norm": 0.2533106505870819, "learning_rate": 9.584242854967259e-07, "loss": 0.0241, "step": 56265 }, { "epoch": 2.8564901771663536, "grad_norm": 0.29996147751808167, "learning_rate": 9.567321522243092e-07, "loss": 0.0299, "step": 56270 }, { "epoch": 2.856743997157216, "grad_norm": 0.2994527220726013, "learning_rate": 9.550400189518927e-07, "loss": 0.0298, "step": 56275 }, { "epoch": 2.8569978171480788, "grad_norm": 0.34880349040031433, "learning_rate": 9.533478856794762e-07, "loss": 0.0247, "step": 56280 }, { "epoch": 2.857251637138941, "grad_norm": 0.43103355169296265, "learning_rate": 9.516557524070596e-07, "loss": 0.037, "step": 56285 }, { "epoch": 2.8575054571298035, "grad_norm": 0.7479355931282043, "learning_rate": 9.499636191346432e-07, "loss": 0.0298, "step": 56290 }, { "epoch": 2.857759277120666, "grad_norm": 0.27349087595939636, "learning_rate": 9.482714858622266e-07, "loss": 0.0244, "step": 56295 }, { "epoch": 2.8580130971115283, "grad_norm": 0.6969117522239685, "learning_rate": 9.465793525898101e-07, "loss": 0.0278, "step": 56300 }, { "epoch": 2.858266917102391, "grad_norm": 0.30732694268226624, "learning_rate": 9.448872193173936e-07, "loss": 0.0295, "step": 56305 }, { "epoch": 2.8585207370932535, "grad_norm": 0.3011256158351898, "learning_rate": 9.431950860449769e-07, "loss": 0.0267, "step": 56310 }, { "epoch": 2.858774557084116, "grad_norm": 1.600549578666687, "learning_rate": 9.415029527725603e-07, "loss": 0.0254, "step": 56315 }, { "epoch": 2.8590283770749787, "grad_norm": 0.5621958374977112, "learning_rate": 9.398108195001439e-07, "loss": 0.0373, "step": 56320 }, { "epoch": 2.859282197065841, "grad_norm": 0.2704627513885498, "learning_rate": 9.381186862277273e-07, "loss": 0.0272, "step": 56325 }, { "epoch": 2.8595360170567035, "grad_norm": 0.3137623965740204, "learning_rate": 9.364265529553109e-07, "loss": 0.0319, "step": 56330 }, { "epoch": 2.8597898370475656, "grad_norm": 0.3069267272949219, "learning_rate": 9.347344196828943e-07, "loss": 0.0285, "step": 56335 }, { "epoch": 2.860043657038428, "grad_norm": 0.2506422698497772, "learning_rate": 9.330422864104779e-07, "loss": 0.0217, "step": 56340 }, { "epoch": 2.860297477029291, "grad_norm": 0.30039626359939575, "learning_rate": 9.313501531380613e-07, "loss": 0.0241, "step": 56345 }, { "epoch": 2.8605512970201534, "grad_norm": 0.3857613503932953, "learning_rate": 9.296580198656446e-07, "loss": 0.0286, "step": 56350 }, { "epoch": 2.860805117011016, "grad_norm": 0.4006558656692505, "learning_rate": 9.279658865932281e-07, "loss": 0.0284, "step": 56355 }, { "epoch": 2.861058937001878, "grad_norm": 0.28864726424217224, "learning_rate": 9.262737533208116e-07, "loss": 0.0362, "step": 56360 }, { "epoch": 2.861312756992741, "grad_norm": 0.31609058380126953, "learning_rate": 9.245816200483951e-07, "loss": 0.0254, "step": 56365 }, { "epoch": 2.8615665769836034, "grad_norm": 0.46757835149765015, "learning_rate": 9.228894867759786e-07, "loss": 0.0291, "step": 56370 }, { "epoch": 2.8618203969744656, "grad_norm": 0.46682101488113403, "learning_rate": 9.21197353503562e-07, "loss": 0.0265, "step": 56375 }, { "epoch": 2.862074216965328, "grad_norm": 0.599969208240509, "learning_rate": 9.195052202311455e-07, "loss": 0.0289, "step": 56380 }, { "epoch": 2.8623280369561908, "grad_norm": 0.39302515983581543, "learning_rate": 9.17813086958729e-07, "loss": 0.0307, "step": 56385 }, { "epoch": 2.8625818569470534, "grad_norm": 0.2010660022497177, "learning_rate": 9.161209536863124e-07, "loss": 0.0275, "step": 56390 }, { "epoch": 2.8628356769379155, "grad_norm": 0.30817094445228577, "learning_rate": 9.144288204138958e-07, "loss": 0.026, "step": 56395 }, { "epoch": 2.863089496928778, "grad_norm": 0.543203592300415, "learning_rate": 9.127366871414793e-07, "loss": 0.03, "step": 56400 }, { "epoch": 2.8633433169196407, "grad_norm": 0.3976742923259735, "learning_rate": 9.110445538690628e-07, "loss": 0.0294, "step": 56405 }, { "epoch": 2.863597136910503, "grad_norm": 0.28443464636802673, "learning_rate": 9.093524205966463e-07, "loss": 0.0315, "step": 56410 }, { "epoch": 2.8638509569013655, "grad_norm": 0.3199726343154907, "learning_rate": 9.076602873242298e-07, "loss": 0.0293, "step": 56415 }, { "epoch": 2.864104776892228, "grad_norm": 0.27055320143699646, "learning_rate": 9.059681540518132e-07, "loss": 0.026, "step": 56420 }, { "epoch": 2.8643585968830907, "grad_norm": 0.336502343416214, "learning_rate": 9.042760207793967e-07, "loss": 0.0198, "step": 56425 }, { "epoch": 2.864612416873953, "grad_norm": 0.33029425144195557, "learning_rate": 9.025838875069801e-07, "loss": 0.0288, "step": 56430 }, { "epoch": 2.8648662368648155, "grad_norm": 0.3340581953525543, "learning_rate": 9.008917542345635e-07, "loss": 0.0248, "step": 56435 }, { "epoch": 2.865120056855678, "grad_norm": 0.47649848461151123, "learning_rate": 8.991996209621471e-07, "loss": 0.0283, "step": 56440 }, { "epoch": 2.86537387684654, "grad_norm": 0.6935096979141235, "learning_rate": 8.975074876897305e-07, "loss": 0.0334, "step": 56445 }, { "epoch": 2.865627696837403, "grad_norm": 0.30507203936576843, "learning_rate": 8.958153544173139e-07, "loss": 0.0286, "step": 56450 }, { "epoch": 2.8658815168282654, "grad_norm": 0.30034294724464417, "learning_rate": 8.941232211448975e-07, "loss": 0.0333, "step": 56455 }, { "epoch": 2.866135336819128, "grad_norm": 0.3122558295726776, "learning_rate": 8.924310878724809e-07, "loss": 0.0338, "step": 56460 }, { "epoch": 2.8663891568099906, "grad_norm": 0.5069664716720581, "learning_rate": 8.907389546000645e-07, "loss": 0.027, "step": 56465 }, { "epoch": 2.866642976800853, "grad_norm": 0.26773950457572937, "learning_rate": 8.890468213276478e-07, "loss": 0.0275, "step": 56470 }, { "epoch": 2.8668967967917154, "grad_norm": 0.46711573004722595, "learning_rate": 8.873546880552312e-07, "loss": 0.03, "step": 56475 }, { "epoch": 2.8671506167825775, "grad_norm": 0.5285343527793884, "learning_rate": 8.856625547828148e-07, "loss": 0.0329, "step": 56480 }, { "epoch": 2.86740443677344, "grad_norm": 0.32522931694984436, "learning_rate": 8.839704215103982e-07, "loss": 0.0326, "step": 56485 }, { "epoch": 2.8676582567643027, "grad_norm": 0.21851252019405365, "learning_rate": 8.822782882379817e-07, "loss": 0.0244, "step": 56490 }, { "epoch": 2.8679120767551654, "grad_norm": 0.3424266576766968, "learning_rate": 8.805861549655652e-07, "loss": 0.0306, "step": 56495 }, { "epoch": 2.868165896746028, "grad_norm": 0.27859312295913696, "learning_rate": 8.788940216931486e-07, "loss": 0.0323, "step": 56500 }, { "epoch": 2.86841971673689, "grad_norm": 0.3467095196247101, "learning_rate": 8.772018884207322e-07, "loss": 0.0296, "step": 56505 }, { "epoch": 2.8686735367277527, "grad_norm": 0.2763430178165436, "learning_rate": 8.755097551483155e-07, "loss": 0.0241, "step": 56510 }, { "epoch": 2.8689273567186153, "grad_norm": 0.5489804744720459, "learning_rate": 8.73817621875899e-07, "loss": 0.0351, "step": 56515 }, { "epoch": 2.8691811767094775, "grad_norm": 0.22821855545043945, "learning_rate": 8.721254886034825e-07, "loss": 0.0299, "step": 56520 }, { "epoch": 2.86943499670034, "grad_norm": 0.26971396803855896, "learning_rate": 8.704333553310659e-07, "loss": 0.029, "step": 56525 }, { "epoch": 2.8696888166912027, "grad_norm": 0.35413697361946106, "learning_rate": 8.687412220586494e-07, "loss": 0.0328, "step": 56530 }, { "epoch": 2.8699426366820653, "grad_norm": 0.58110511302948, "learning_rate": 8.670490887862329e-07, "loss": 0.0304, "step": 56535 }, { "epoch": 2.8701964566729274, "grad_norm": 0.40751639008522034, "learning_rate": 8.653569555138164e-07, "loss": 0.0277, "step": 56540 }, { "epoch": 2.87045027666379, "grad_norm": 0.27954888343811035, "learning_rate": 8.636648222413999e-07, "loss": 0.0326, "step": 56545 }, { "epoch": 2.8707040966546526, "grad_norm": 0.29035764932632446, "learning_rate": 8.619726889689832e-07, "loss": 0.0219, "step": 56550 }, { "epoch": 2.870957916645515, "grad_norm": 0.30947911739349365, "learning_rate": 8.602805556965667e-07, "loss": 0.0294, "step": 56555 }, { "epoch": 2.8712117366363774, "grad_norm": 0.22087480127811432, "learning_rate": 8.585884224241501e-07, "loss": 0.0347, "step": 56560 }, { "epoch": 2.87146555662724, "grad_norm": 0.2977849245071411, "learning_rate": 8.568962891517337e-07, "loss": 0.0283, "step": 56565 }, { "epoch": 2.8717193766181026, "grad_norm": 0.34214112162590027, "learning_rate": 8.552041558793171e-07, "loss": 0.0275, "step": 56570 }, { "epoch": 2.8719731966089648, "grad_norm": 0.2554374039173126, "learning_rate": 8.535120226069006e-07, "loss": 0.0325, "step": 56575 }, { "epoch": 2.8722270165998274, "grad_norm": 0.4054310917854309, "learning_rate": 8.518198893344841e-07, "loss": 0.0283, "step": 56580 }, { "epoch": 2.87248083659069, "grad_norm": 0.26180627942085266, "learning_rate": 8.501277560620675e-07, "loss": 0.0242, "step": 56585 }, { "epoch": 2.872734656581552, "grad_norm": 0.41069480776786804, "learning_rate": 8.48435622789651e-07, "loss": 0.035, "step": 56590 }, { "epoch": 2.8729884765724147, "grad_norm": 0.2548157870769501, "learning_rate": 8.467434895172344e-07, "loss": 0.0236, "step": 56595 }, { "epoch": 2.8732422965632773, "grad_norm": 0.35240301489830017, "learning_rate": 8.450513562448178e-07, "loss": 0.0239, "step": 56600 }, { "epoch": 2.87349611655414, "grad_norm": 0.38352060317993164, "learning_rate": 8.433592229724014e-07, "loss": 0.034, "step": 56605 }, { "epoch": 2.8737499365450025, "grad_norm": 0.3092139661312103, "learning_rate": 8.416670896999848e-07, "loss": 0.0348, "step": 56610 }, { "epoch": 2.8740037565358647, "grad_norm": 0.3200393319129944, "learning_rate": 8.399749564275684e-07, "loss": 0.034, "step": 56615 }, { "epoch": 2.8742575765267273, "grad_norm": 0.31458497047424316, "learning_rate": 8.382828231551518e-07, "loss": 0.0338, "step": 56620 }, { "epoch": 2.8745113965175895, "grad_norm": 0.4197658896446228, "learning_rate": 8.365906898827352e-07, "loss": 0.0319, "step": 56625 }, { "epoch": 2.874765216508452, "grad_norm": 0.2996726632118225, "learning_rate": 8.348985566103187e-07, "loss": 0.0334, "step": 56630 }, { "epoch": 2.8750190364993147, "grad_norm": 0.3455791771411896, "learning_rate": 8.332064233379021e-07, "loss": 0.0277, "step": 56635 }, { "epoch": 2.8752728564901773, "grad_norm": 0.30782240629196167, "learning_rate": 8.315142900654856e-07, "loss": 0.0331, "step": 56640 }, { "epoch": 2.87552667648104, "grad_norm": 0.584852397441864, "learning_rate": 8.298221567930691e-07, "loss": 0.036, "step": 56645 }, { "epoch": 2.875780496471902, "grad_norm": 0.41536372900009155, "learning_rate": 8.281300235206525e-07, "loss": 0.0309, "step": 56650 }, { "epoch": 2.8760343164627646, "grad_norm": 0.3044881522655487, "learning_rate": 8.264378902482361e-07, "loss": 0.0262, "step": 56655 }, { "epoch": 2.8762881364536272, "grad_norm": 0.2610401213169098, "learning_rate": 8.247457569758195e-07, "loss": 0.0226, "step": 56660 }, { "epoch": 2.8765419564444894, "grad_norm": 0.3302207589149475, "learning_rate": 8.23053623703403e-07, "loss": 0.0249, "step": 56665 }, { "epoch": 2.876795776435352, "grad_norm": 0.3605993092060089, "learning_rate": 8.213614904309865e-07, "loss": 0.0303, "step": 56670 }, { "epoch": 2.8770495964262146, "grad_norm": 0.38024815917015076, "learning_rate": 8.196693571585698e-07, "loss": 0.0245, "step": 56675 }, { "epoch": 2.877303416417077, "grad_norm": 0.33744293451309204, "learning_rate": 8.179772238861533e-07, "loss": 0.0309, "step": 56680 }, { "epoch": 2.8775572364079394, "grad_norm": 0.3316429853439331, "learning_rate": 8.162850906137368e-07, "loss": 0.0289, "step": 56685 }, { "epoch": 2.877811056398802, "grad_norm": 0.37818485498428345, "learning_rate": 8.145929573413203e-07, "loss": 0.0296, "step": 56690 }, { "epoch": 2.8780648763896646, "grad_norm": 0.4408175051212311, "learning_rate": 8.129008240689037e-07, "loss": 0.035, "step": 56695 }, { "epoch": 2.8783186963805267, "grad_norm": 0.3286103308200836, "learning_rate": 8.112086907964872e-07, "loss": 0.0305, "step": 56700 }, { "epoch": 2.8785725163713893, "grad_norm": 0.6499526500701904, "learning_rate": 8.095165575240707e-07, "loss": 0.0297, "step": 56705 }, { "epoch": 2.878826336362252, "grad_norm": 0.8811410069465637, "learning_rate": 8.078244242516542e-07, "loss": 0.0308, "step": 56710 }, { "epoch": 2.8790801563531145, "grad_norm": 0.6163033843040466, "learning_rate": 8.061322909792376e-07, "loss": 0.0359, "step": 56715 }, { "epoch": 2.8793339763439767, "grad_norm": 0.48688793182373047, "learning_rate": 8.04440157706821e-07, "loss": 0.0281, "step": 56720 }, { "epoch": 2.8795877963348393, "grad_norm": 0.5693960189819336, "learning_rate": 8.027480244344045e-07, "loss": 0.026, "step": 56725 }, { "epoch": 2.879841616325702, "grad_norm": 0.24559521675109863, "learning_rate": 8.01055891161988e-07, "loss": 0.029, "step": 56730 }, { "epoch": 2.880095436316564, "grad_norm": 0.38832828402519226, "learning_rate": 7.993637578895714e-07, "loss": 0.0371, "step": 56735 }, { "epoch": 2.8803492563074267, "grad_norm": 0.32193049788475037, "learning_rate": 7.97671624617155e-07, "loss": 0.0353, "step": 56740 }, { "epoch": 2.8806030762982893, "grad_norm": 0.2351982444524765, "learning_rate": 7.959794913447384e-07, "loss": 0.0237, "step": 56745 }, { "epoch": 2.880856896289152, "grad_norm": 0.24744842946529388, "learning_rate": 7.94287358072322e-07, "loss": 0.0244, "step": 56750 }, { "epoch": 2.8811107162800145, "grad_norm": 0.4948106110095978, "learning_rate": 7.925952247999053e-07, "loss": 0.042, "step": 56755 }, { "epoch": 2.8813645362708766, "grad_norm": 0.36634719371795654, "learning_rate": 7.909030915274887e-07, "loss": 0.0388, "step": 56760 }, { "epoch": 2.8816183562617392, "grad_norm": 0.3451341390609741, "learning_rate": 7.892109582550723e-07, "loss": 0.035, "step": 56765 }, { "epoch": 2.8818721762526014, "grad_norm": 0.3345429599285126, "learning_rate": 7.875188249826557e-07, "loss": 0.0323, "step": 56770 }, { "epoch": 2.882125996243464, "grad_norm": 0.35555848479270935, "learning_rate": 7.858266917102391e-07, "loss": 0.0284, "step": 56775 }, { "epoch": 2.8823798162343266, "grad_norm": 0.36172765493392944, "learning_rate": 7.841345584378227e-07, "loss": 0.0362, "step": 56780 }, { "epoch": 2.882633636225189, "grad_norm": 0.262870192527771, "learning_rate": 7.824424251654061e-07, "loss": 0.03, "step": 56785 }, { "epoch": 2.882887456216052, "grad_norm": 0.5119378566741943, "learning_rate": 7.807502918929897e-07, "loss": 0.0331, "step": 56790 }, { "epoch": 2.883141276206914, "grad_norm": 0.7077087759971619, "learning_rate": 7.79058158620573e-07, "loss": 0.0265, "step": 56795 }, { "epoch": 2.8833950961977766, "grad_norm": 0.31209030747413635, "learning_rate": 7.773660253481564e-07, "loss": 0.0311, "step": 56800 }, { "epoch": 2.883648916188639, "grad_norm": 0.5953086018562317, "learning_rate": 7.756738920757399e-07, "loss": 0.0343, "step": 56805 }, { "epoch": 2.8839027361795013, "grad_norm": 0.38027554750442505, "learning_rate": 7.739817588033234e-07, "loss": 0.0277, "step": 56810 }, { "epoch": 2.884156556170364, "grad_norm": 0.3770337700843811, "learning_rate": 7.722896255309069e-07, "loss": 0.0287, "step": 56815 }, { "epoch": 2.8844103761612265, "grad_norm": 0.3363008201122284, "learning_rate": 7.705974922584904e-07, "loss": 0.0334, "step": 56820 }, { "epoch": 2.884664196152089, "grad_norm": 1.4316260814666748, "learning_rate": 7.689053589860738e-07, "loss": 0.0302, "step": 56825 }, { "epoch": 2.8849180161429513, "grad_norm": 0.36405399441719055, "learning_rate": 7.672132257136573e-07, "loss": 0.0239, "step": 56830 }, { "epoch": 2.885171836133814, "grad_norm": 0.36887475848197937, "learning_rate": 7.655210924412407e-07, "loss": 0.0328, "step": 56835 }, { "epoch": 2.8854256561246765, "grad_norm": 0.25625044107437134, "learning_rate": 7.638289591688242e-07, "loss": 0.0287, "step": 56840 }, { "epoch": 2.8856794761155387, "grad_norm": 0.25802773237228394, "learning_rate": 7.621368258964076e-07, "loss": 0.0221, "step": 56845 }, { "epoch": 2.8859332961064013, "grad_norm": 0.3497041165828705, "learning_rate": 7.604446926239911e-07, "loss": 0.0292, "step": 56850 }, { "epoch": 2.886187116097264, "grad_norm": 0.3454679548740387, "learning_rate": 7.587525593515746e-07, "loss": 0.0331, "step": 56855 }, { "epoch": 2.8864409360881265, "grad_norm": 0.5030285716056824, "learning_rate": 7.570604260791581e-07, "loss": 0.0359, "step": 56860 }, { "epoch": 2.8866947560789886, "grad_norm": 0.44325920939445496, "learning_rate": 7.553682928067416e-07, "loss": 0.0296, "step": 56865 }, { "epoch": 2.8869485760698512, "grad_norm": 0.4056631326675415, "learning_rate": 7.53676159534325e-07, "loss": 0.0272, "step": 56870 }, { "epoch": 2.887202396060714, "grad_norm": 0.26784148812294006, "learning_rate": 7.519840262619083e-07, "loss": 0.0262, "step": 56875 }, { "epoch": 2.887456216051576, "grad_norm": 0.33540356159210205, "learning_rate": 7.502918929894919e-07, "loss": 0.035, "step": 56880 }, { "epoch": 2.8877100360424386, "grad_norm": 0.30647772550582886, "learning_rate": 7.485997597170753e-07, "loss": 0.0313, "step": 56885 }, { "epoch": 2.887963856033301, "grad_norm": 0.25061747431755066, "learning_rate": 7.469076264446589e-07, "loss": 0.0249, "step": 56890 }, { "epoch": 2.888217676024164, "grad_norm": 0.5146888494491577, "learning_rate": 7.452154931722423e-07, "loss": 0.0272, "step": 56895 }, { "epoch": 2.8884714960150264, "grad_norm": 0.2966390550136566, "learning_rate": 7.435233598998258e-07, "loss": 0.0299, "step": 56900 }, { "epoch": 2.8887253160058886, "grad_norm": 0.37970107793807983, "learning_rate": 7.418312266274093e-07, "loss": 0.0322, "step": 56905 }, { "epoch": 2.888979135996751, "grad_norm": 0.2674563527107239, "learning_rate": 7.401390933549927e-07, "loss": 0.0245, "step": 56910 }, { "epoch": 2.8892329559876133, "grad_norm": 0.44810232520103455, "learning_rate": 7.384469600825761e-07, "loss": 0.0248, "step": 56915 }, { "epoch": 2.889486775978476, "grad_norm": 0.25780561566352844, "learning_rate": 7.367548268101596e-07, "loss": 0.0242, "step": 56920 }, { "epoch": 2.8897405959693385, "grad_norm": 0.35203060507774353, "learning_rate": 7.35062693537743e-07, "loss": 0.0284, "step": 56925 }, { "epoch": 2.889994415960201, "grad_norm": 0.26767706871032715, "learning_rate": 7.333705602653266e-07, "loss": 0.0262, "step": 56930 }, { "epoch": 2.8902482359510637, "grad_norm": 0.3971843719482422, "learning_rate": 7.3167842699291e-07, "loss": 0.0454, "step": 56935 }, { "epoch": 2.890502055941926, "grad_norm": 0.22796855866909027, "learning_rate": 7.299862937204935e-07, "loss": 0.0319, "step": 56940 }, { "epoch": 2.8907558759327885, "grad_norm": 0.32220014929771423, "learning_rate": 7.28294160448077e-07, "loss": 0.0233, "step": 56945 }, { "epoch": 2.891009695923651, "grad_norm": 0.27687567472457886, "learning_rate": 7.266020271756604e-07, "loss": 0.0278, "step": 56950 }, { "epoch": 2.8912635159145132, "grad_norm": 0.2914799749851227, "learning_rate": 7.249098939032438e-07, "loss": 0.0265, "step": 56955 }, { "epoch": 2.891517335905376, "grad_norm": 0.3295184373855591, "learning_rate": 7.232177606308273e-07, "loss": 0.0264, "step": 56960 }, { "epoch": 2.8917711558962385, "grad_norm": 0.2902180850505829, "learning_rate": 7.215256273584108e-07, "loss": 0.0259, "step": 56965 }, { "epoch": 2.892024975887101, "grad_norm": 0.21984949707984924, "learning_rate": 7.198334940859943e-07, "loss": 0.0201, "step": 56970 }, { "epoch": 2.892278795877963, "grad_norm": 0.2158031314611435, "learning_rate": 7.181413608135777e-07, "loss": 0.0327, "step": 56975 }, { "epoch": 2.892532615868826, "grad_norm": 0.27695468068122864, "learning_rate": 7.164492275411612e-07, "loss": 0.0281, "step": 56980 }, { "epoch": 2.8927864358596884, "grad_norm": 0.23929698765277863, "learning_rate": 7.147570942687447e-07, "loss": 0.0273, "step": 56985 }, { "epoch": 2.8930402558505506, "grad_norm": 0.3177143335342407, "learning_rate": 7.130649609963282e-07, "loss": 0.0367, "step": 56990 }, { "epoch": 2.893294075841413, "grad_norm": 0.3839656114578247, "learning_rate": 7.113728277239115e-07, "loss": 0.0342, "step": 56995 }, { "epoch": 2.893547895832276, "grad_norm": 0.30691877007484436, "learning_rate": 7.09680694451495e-07, "loss": 0.0257, "step": 57000 }, { "epoch": 2.8938017158231384, "grad_norm": 0.2793743312358856, "learning_rate": 7.079885611790785e-07, "loss": 0.0404, "step": 57005 }, { "epoch": 2.8940555358140005, "grad_norm": 0.5192366242408752, "learning_rate": 7.062964279066619e-07, "loss": 0.0299, "step": 57010 }, { "epoch": 2.894309355804863, "grad_norm": 0.3496776521205902, "learning_rate": 7.046042946342455e-07, "loss": 0.0346, "step": 57015 }, { "epoch": 2.8945631757957258, "grad_norm": 0.24379898607730865, "learning_rate": 7.029121613618289e-07, "loss": 0.0283, "step": 57020 }, { "epoch": 2.894816995786588, "grad_norm": 0.41297459602355957, "learning_rate": 7.012200280894125e-07, "loss": 0.023, "step": 57025 }, { "epoch": 2.8950708157774505, "grad_norm": 0.2837767004966736, "learning_rate": 6.995278948169959e-07, "loss": 0.0295, "step": 57030 }, { "epoch": 2.895324635768313, "grad_norm": 0.33692866563796997, "learning_rate": 6.978357615445792e-07, "loss": 0.0325, "step": 57035 }, { "epoch": 2.8955784557591757, "grad_norm": 0.32200172543525696, "learning_rate": 6.961436282721628e-07, "loss": 0.0299, "step": 57040 }, { "epoch": 2.8958322757500383, "grad_norm": 0.3966142535209656, "learning_rate": 6.944514949997462e-07, "loss": 0.0302, "step": 57045 }, { "epoch": 2.8960860957409005, "grad_norm": 0.36363843083381653, "learning_rate": 6.927593617273296e-07, "loss": 0.0283, "step": 57050 }, { "epoch": 2.896339915731763, "grad_norm": 0.3507053852081299, "learning_rate": 6.910672284549132e-07, "loss": 0.028, "step": 57055 }, { "epoch": 2.8965937357226252, "grad_norm": 0.2757592499256134, "learning_rate": 6.893750951824966e-07, "loss": 0.0231, "step": 57060 }, { "epoch": 2.896847555713488, "grad_norm": 0.3220710754394531, "learning_rate": 6.876829619100802e-07, "loss": 0.0277, "step": 57065 }, { "epoch": 2.8971013757043504, "grad_norm": 0.25192391872406006, "learning_rate": 6.859908286376636e-07, "loss": 0.0283, "step": 57070 }, { "epoch": 2.897355195695213, "grad_norm": 0.40557751059532166, "learning_rate": 6.842986953652469e-07, "loss": 0.0357, "step": 57075 }, { "epoch": 2.8976090156860757, "grad_norm": 0.24660883843898773, "learning_rate": 6.826065620928305e-07, "loss": 0.0326, "step": 57080 }, { "epoch": 2.897862835676938, "grad_norm": 0.3630150556564331, "learning_rate": 6.809144288204139e-07, "loss": 0.0296, "step": 57085 }, { "epoch": 2.8981166556678004, "grad_norm": 0.5821228623390198, "learning_rate": 6.792222955479974e-07, "loss": 0.03, "step": 57090 }, { "epoch": 2.898370475658663, "grad_norm": 0.3000786602497101, "learning_rate": 6.775301622755809e-07, "loss": 0.0325, "step": 57095 }, { "epoch": 2.898624295649525, "grad_norm": 0.5774555206298828, "learning_rate": 6.758380290031643e-07, "loss": 0.0313, "step": 57100 }, { "epoch": 2.8988781156403878, "grad_norm": 0.23967984318733215, "learning_rate": 6.741458957307479e-07, "loss": 0.0317, "step": 57105 }, { "epoch": 2.8991319356312504, "grad_norm": 0.2716747522354126, "learning_rate": 6.724537624583313e-07, "loss": 0.0365, "step": 57110 }, { "epoch": 2.899385755622113, "grad_norm": 0.20131956040859222, "learning_rate": 6.707616291859147e-07, "loss": 0.029, "step": 57115 }, { "epoch": 2.899639575612975, "grad_norm": 0.2939135432243347, "learning_rate": 6.690694959134981e-07, "loss": 0.0263, "step": 57120 }, { "epoch": 2.8998933956038377, "grad_norm": 0.4463021755218506, "learning_rate": 6.673773626410816e-07, "loss": 0.0323, "step": 57125 }, { "epoch": 2.9001472155947003, "grad_norm": 0.36133143305778503, "learning_rate": 6.656852293686651e-07, "loss": 0.037, "step": 57130 }, { "epoch": 2.9004010355855625, "grad_norm": 0.28862321376800537, "learning_rate": 6.639930960962486e-07, "loss": 0.0256, "step": 57135 }, { "epoch": 2.900654855576425, "grad_norm": 0.3055947422981262, "learning_rate": 6.623009628238321e-07, "loss": 0.0334, "step": 57140 }, { "epoch": 2.9009086755672877, "grad_norm": 0.31747281551361084, "learning_rate": 6.606088295514155e-07, "loss": 0.0257, "step": 57145 }, { "epoch": 2.9011624955581503, "grad_norm": 0.3737865388393402, "learning_rate": 6.589166962789991e-07, "loss": 0.0325, "step": 57150 }, { "epoch": 2.901416315549013, "grad_norm": 0.31869587302207947, "learning_rate": 6.572245630065824e-07, "loss": 0.0358, "step": 57155 }, { "epoch": 2.901670135539875, "grad_norm": 0.33540067076683044, "learning_rate": 6.555324297341658e-07, "loss": 0.0277, "step": 57160 }, { "epoch": 2.9019239555307377, "grad_norm": 0.42595553398132324, "learning_rate": 6.538402964617494e-07, "loss": 0.0333, "step": 57165 }, { "epoch": 2.9021777755216, "grad_norm": 0.2767331302165985, "learning_rate": 6.521481631893328e-07, "loss": 0.0278, "step": 57170 }, { "epoch": 2.9024315955124624, "grad_norm": 0.31812888383865356, "learning_rate": 6.504560299169164e-07, "loss": 0.0263, "step": 57175 }, { "epoch": 2.902685415503325, "grad_norm": 0.27926456928253174, "learning_rate": 6.487638966444998e-07, "loss": 0.0285, "step": 57180 }, { "epoch": 2.9029392354941876, "grad_norm": 0.5825649499893188, "learning_rate": 6.470717633720832e-07, "loss": 0.0298, "step": 57185 }, { "epoch": 2.9031930554850502, "grad_norm": 0.35669177770614624, "learning_rate": 6.453796300996668e-07, "loss": 0.0377, "step": 57190 }, { "epoch": 2.9034468754759124, "grad_norm": 0.3390977382659912, "learning_rate": 6.436874968272502e-07, "loss": 0.0281, "step": 57195 }, { "epoch": 2.903700695466775, "grad_norm": 0.25372812151908875, "learning_rate": 6.419953635548335e-07, "loss": 0.0325, "step": 57200 }, { "epoch": 2.9039545154576376, "grad_norm": 0.48622772097587585, "learning_rate": 6.403032302824171e-07, "loss": 0.0322, "step": 57205 }, { "epoch": 2.9042083354484998, "grad_norm": 0.27143144607543945, "learning_rate": 6.386110970100005e-07, "loss": 0.0299, "step": 57210 }, { "epoch": 2.9044621554393624, "grad_norm": 0.3860352039337158, "learning_rate": 6.369189637375841e-07, "loss": 0.0412, "step": 57215 }, { "epoch": 2.904715975430225, "grad_norm": 0.34994158148765564, "learning_rate": 6.352268304651675e-07, "loss": 0.029, "step": 57220 }, { "epoch": 2.9049697954210876, "grad_norm": 0.3273712694644928, "learning_rate": 6.33534697192751e-07, "loss": 0.0279, "step": 57225 }, { "epoch": 2.9052236154119497, "grad_norm": 0.5091918110847473, "learning_rate": 6.318425639203345e-07, "loss": 0.0296, "step": 57230 }, { "epoch": 2.9054774354028123, "grad_norm": 0.33736932277679443, "learning_rate": 6.301504306479179e-07, "loss": 0.0319, "step": 57235 }, { "epoch": 2.905731255393675, "grad_norm": 0.19080248475074768, "learning_rate": 6.284582973755013e-07, "loss": 0.0273, "step": 57240 }, { "epoch": 2.905985075384537, "grad_norm": 0.4544998109340668, "learning_rate": 6.267661641030848e-07, "loss": 0.0302, "step": 57245 }, { "epoch": 2.9062388953753997, "grad_norm": 0.342433363199234, "learning_rate": 6.250740308306682e-07, "loss": 0.0297, "step": 57250 }, { "epoch": 2.9064927153662623, "grad_norm": 0.28809770941734314, "learning_rate": 6.233818975582517e-07, "loss": 0.0266, "step": 57255 }, { "epoch": 2.906746535357125, "grad_norm": 0.3601595461368561, "learning_rate": 6.216897642858352e-07, "loss": 0.0282, "step": 57260 }, { "epoch": 2.907000355347987, "grad_norm": 0.3118194341659546, "learning_rate": 6.199976310134187e-07, "loss": 0.0333, "step": 57265 }, { "epoch": 2.9072541753388497, "grad_norm": 0.28417107462882996, "learning_rate": 6.183054977410021e-07, "loss": 0.0222, "step": 57270 }, { "epoch": 2.9075079953297123, "grad_norm": 0.3415754437446594, "learning_rate": 6.166133644685855e-07, "loss": 0.0274, "step": 57275 }, { "epoch": 2.9077618153205744, "grad_norm": 0.34622934460639954, "learning_rate": 6.149212311961691e-07, "loss": 0.0242, "step": 57280 }, { "epoch": 2.908015635311437, "grad_norm": 0.2505379021167755, "learning_rate": 6.132290979237525e-07, "loss": 0.0259, "step": 57285 }, { "epoch": 2.9082694553022996, "grad_norm": 0.30466291308403015, "learning_rate": 6.11536964651336e-07, "loss": 0.0341, "step": 57290 }, { "epoch": 2.9085232752931622, "grad_norm": 0.33808496594429016, "learning_rate": 6.098448313789194e-07, "loss": 0.0312, "step": 57295 }, { "epoch": 2.908777095284025, "grad_norm": 0.38466644287109375, "learning_rate": 6.08152698106503e-07, "loss": 0.0384, "step": 57300 }, { "epoch": 2.909030915274887, "grad_norm": 0.41694745421409607, "learning_rate": 6.064605648340864e-07, "loss": 0.0285, "step": 57305 }, { "epoch": 2.9092847352657496, "grad_norm": 0.4306418001651764, "learning_rate": 6.047684315616698e-07, "loss": 0.0294, "step": 57310 }, { "epoch": 2.9095385552566118, "grad_norm": 0.35518577694892883, "learning_rate": 6.030762982892533e-07, "loss": 0.0276, "step": 57315 }, { "epoch": 2.9097923752474744, "grad_norm": 0.4232032001018524, "learning_rate": 6.013841650168368e-07, "loss": 0.0437, "step": 57320 }, { "epoch": 2.910046195238337, "grad_norm": 0.2620871067047119, "learning_rate": 5.996920317444203e-07, "loss": 0.029, "step": 57325 }, { "epoch": 2.9103000152291996, "grad_norm": 0.3478947877883911, "learning_rate": 5.979998984720037e-07, "loss": 0.0334, "step": 57330 }, { "epoch": 2.910553835220062, "grad_norm": 0.3235359489917755, "learning_rate": 5.963077651995871e-07, "loss": 0.0304, "step": 57335 }, { "epoch": 2.9108076552109243, "grad_norm": 0.4142983555793762, "learning_rate": 5.946156319271707e-07, "loss": 0.0341, "step": 57340 }, { "epoch": 2.911061475201787, "grad_norm": 0.3530924916267395, "learning_rate": 5.929234986547541e-07, "loss": 0.0366, "step": 57345 }, { "epoch": 2.9113152951926495, "grad_norm": 0.26504990458488464, "learning_rate": 5.912313653823376e-07, "loss": 0.0246, "step": 57350 }, { "epoch": 2.9115691151835117, "grad_norm": 0.16128307580947876, "learning_rate": 5.89539232109921e-07, "loss": 0.0268, "step": 57355 }, { "epoch": 2.9118229351743743, "grad_norm": 0.26564478874206543, "learning_rate": 5.878470988375045e-07, "loss": 0.0308, "step": 57360 }, { "epoch": 2.912076755165237, "grad_norm": 0.3302893042564392, "learning_rate": 5.86154965565088e-07, "loss": 0.0341, "step": 57365 }, { "epoch": 2.9123305751560995, "grad_norm": 0.2747202515602112, "learning_rate": 5.844628322926714e-07, "loss": 0.0299, "step": 57370 }, { "epoch": 2.9125843951469617, "grad_norm": 0.2258002609014511, "learning_rate": 5.827706990202549e-07, "loss": 0.0292, "step": 57375 }, { "epoch": 2.9128382151378243, "grad_norm": 0.3801233172416687, "learning_rate": 5.810785657478384e-07, "loss": 0.0297, "step": 57380 }, { "epoch": 2.913092035128687, "grad_norm": 0.3353663682937622, "learning_rate": 5.793864324754218e-07, "loss": 0.0289, "step": 57385 }, { "epoch": 2.913345855119549, "grad_norm": 0.3122028708457947, "learning_rate": 5.776942992030053e-07, "loss": 0.0298, "step": 57390 }, { "epoch": 2.9135996751104116, "grad_norm": 0.26420414447784424, "learning_rate": 5.760021659305887e-07, "loss": 0.0303, "step": 57395 }, { "epoch": 2.9138534951012742, "grad_norm": 0.5072758197784424, "learning_rate": 5.743100326581723e-07, "loss": 0.027, "step": 57400 }, { "epoch": 2.914107315092137, "grad_norm": 1.083329677581787, "learning_rate": 5.726178993857557e-07, "loss": 0.0313, "step": 57405 }, { "epoch": 2.914361135082999, "grad_norm": 0.2915167510509491, "learning_rate": 5.709257661133391e-07, "loss": 0.0289, "step": 57410 }, { "epoch": 2.9146149550738616, "grad_norm": 0.2800576388835907, "learning_rate": 5.692336328409226e-07, "loss": 0.0282, "step": 57415 }, { "epoch": 2.914868775064724, "grad_norm": 0.32842501997947693, "learning_rate": 5.675414995685061e-07, "loss": 0.0312, "step": 57420 }, { "epoch": 2.9151225950555864, "grad_norm": 0.28682515025138855, "learning_rate": 5.658493662960896e-07, "loss": 0.0257, "step": 57425 }, { "epoch": 2.915376415046449, "grad_norm": 0.28181514143943787, "learning_rate": 5.64157233023673e-07, "loss": 0.0241, "step": 57430 }, { "epoch": 2.9156302350373116, "grad_norm": 0.4202750027179718, "learning_rate": 5.624650997512564e-07, "loss": 0.0271, "step": 57435 }, { "epoch": 2.915884055028174, "grad_norm": 0.29935333132743835, "learning_rate": 5.6077296647884e-07, "loss": 0.0234, "step": 57440 }, { "epoch": 2.9161378750190368, "grad_norm": 0.38342633843421936, "learning_rate": 5.590808332064234e-07, "loss": 0.0228, "step": 57445 }, { "epoch": 2.916391695009899, "grad_norm": 0.2808830440044403, "learning_rate": 5.573886999340069e-07, "loss": 0.033, "step": 57450 }, { "epoch": 2.9166455150007615, "grad_norm": 0.3798730671405792, "learning_rate": 5.556965666615903e-07, "loss": 0.0295, "step": 57455 }, { "epoch": 2.9168993349916237, "grad_norm": 0.29656025767326355, "learning_rate": 5.540044333891737e-07, "loss": 0.0312, "step": 57460 }, { "epoch": 2.9171531549824863, "grad_norm": 0.520729660987854, "learning_rate": 5.523123001167573e-07, "loss": 0.0337, "step": 57465 }, { "epoch": 2.917406974973349, "grad_norm": 0.3630119562149048, "learning_rate": 5.506201668443407e-07, "loss": 0.0313, "step": 57470 }, { "epoch": 2.9176607949642115, "grad_norm": 0.3356974422931671, "learning_rate": 5.489280335719242e-07, "loss": 0.0287, "step": 57475 }, { "epoch": 2.917914614955074, "grad_norm": 0.22973386943340302, "learning_rate": 5.472359002995076e-07, "loss": 0.0267, "step": 57480 }, { "epoch": 2.9181684349459363, "grad_norm": 0.5934128761291504, "learning_rate": 5.455437670270911e-07, "loss": 0.031, "step": 57485 }, { "epoch": 2.918422254936799, "grad_norm": 0.24467769265174866, "learning_rate": 5.438516337546746e-07, "loss": 0.0236, "step": 57490 }, { "epoch": 2.9186760749276615, "grad_norm": 0.2057151347398758, "learning_rate": 5.42159500482258e-07, "loss": 0.024, "step": 57495 }, { "epoch": 2.9189298949185236, "grad_norm": 0.4651114344596863, "learning_rate": 5.404673672098415e-07, "loss": 0.0253, "step": 57500 }, { "epoch": 2.919183714909386, "grad_norm": 0.4996252954006195, "learning_rate": 5.38775233937425e-07, "loss": 0.0282, "step": 57505 }, { "epoch": 2.919437534900249, "grad_norm": 1.3613667488098145, "learning_rate": 5.370831006650084e-07, "loss": 0.0304, "step": 57510 }, { "epoch": 2.9196913548911114, "grad_norm": 0.30651411414146423, "learning_rate": 5.353909673925919e-07, "loss": 0.0305, "step": 57515 }, { "epoch": 2.9199451748819736, "grad_norm": 0.35898858308792114, "learning_rate": 5.336988341201753e-07, "loss": 0.03, "step": 57520 }, { "epoch": 2.920198994872836, "grad_norm": 0.27816376090049744, "learning_rate": 5.320067008477589e-07, "loss": 0.0307, "step": 57525 }, { "epoch": 2.920452814863699, "grad_norm": 0.561227560043335, "learning_rate": 5.303145675753423e-07, "loss": 0.0262, "step": 57530 }, { "epoch": 2.920706634854561, "grad_norm": 0.4042511582374573, "learning_rate": 5.286224343029257e-07, "loss": 0.0344, "step": 57535 }, { "epoch": 2.9209604548454235, "grad_norm": 0.519243061542511, "learning_rate": 5.269303010305092e-07, "loss": 0.0313, "step": 57540 }, { "epoch": 2.921214274836286, "grad_norm": 0.35028374195098877, "learning_rate": 5.252381677580927e-07, "loss": 0.0333, "step": 57545 }, { "epoch": 2.9214680948271488, "grad_norm": 0.4332272410392761, "learning_rate": 5.235460344856762e-07, "loss": 0.0286, "step": 57550 }, { "epoch": 2.921721914818011, "grad_norm": 0.2854234576225281, "learning_rate": 5.218539012132596e-07, "loss": 0.0321, "step": 57555 }, { "epoch": 2.9219757348088735, "grad_norm": 0.3323298394680023, "learning_rate": 5.20161767940843e-07, "loss": 0.0301, "step": 57560 }, { "epoch": 2.922229554799736, "grad_norm": 0.3795822858810425, "learning_rate": 5.184696346684266e-07, "loss": 0.0266, "step": 57565 }, { "epoch": 2.9224833747905983, "grad_norm": 0.3280698359012604, "learning_rate": 5.167775013960099e-07, "loss": 0.0248, "step": 57570 }, { "epoch": 2.922737194781461, "grad_norm": 0.3984909951686859, "learning_rate": 5.150853681235935e-07, "loss": 0.0293, "step": 57575 }, { "epoch": 2.9229910147723235, "grad_norm": 0.28477180004119873, "learning_rate": 5.133932348511769e-07, "loss": 0.0221, "step": 57580 }, { "epoch": 2.923244834763186, "grad_norm": 0.4867945909500122, "learning_rate": 5.117011015787604e-07, "loss": 0.0366, "step": 57585 }, { "epoch": 2.9234986547540487, "grad_norm": 0.5254274010658264, "learning_rate": 5.100089683063438e-07, "loss": 0.0286, "step": 57590 }, { "epoch": 2.923752474744911, "grad_norm": 0.3182218372821808, "learning_rate": 5.083168350339273e-07, "loss": 0.0236, "step": 57595 }, { "epoch": 2.9240062947357734, "grad_norm": 0.26986467838287354, "learning_rate": 5.066247017615108e-07, "loss": 0.0277, "step": 57600 }, { "epoch": 2.9242601147266356, "grad_norm": 0.7450768947601318, "learning_rate": 5.049325684890943e-07, "loss": 0.0278, "step": 57605 }, { "epoch": 2.924513934717498, "grad_norm": 0.3461737334728241, "learning_rate": 5.032404352166776e-07, "loss": 0.0296, "step": 57610 }, { "epoch": 2.924767754708361, "grad_norm": 0.36083826422691345, "learning_rate": 5.015483019442612e-07, "loss": 0.0275, "step": 57615 }, { "epoch": 2.9250215746992234, "grad_norm": 0.5787628293037415, "learning_rate": 4.998561686718446e-07, "loss": 0.0342, "step": 57620 }, { "epoch": 2.925275394690086, "grad_norm": 0.37813305854797363, "learning_rate": 4.981640353994282e-07, "loss": 0.0266, "step": 57625 }, { "epoch": 2.925529214680948, "grad_norm": 0.5129855275154114, "learning_rate": 4.964719021270115e-07, "loss": 0.0288, "step": 57630 }, { "epoch": 2.925783034671811, "grad_norm": 0.3590392470359802, "learning_rate": 4.94779768854595e-07, "loss": 0.026, "step": 57635 }, { "epoch": 2.9260368546626734, "grad_norm": 0.22634489834308624, "learning_rate": 4.930876355821785e-07, "loss": 0.0237, "step": 57640 }, { "epoch": 2.9262906746535355, "grad_norm": 0.4361048638820648, "learning_rate": 4.91395502309762e-07, "loss": 0.0256, "step": 57645 }, { "epoch": 2.926544494644398, "grad_norm": 0.2713603973388672, "learning_rate": 4.897033690373454e-07, "loss": 0.0246, "step": 57650 }, { "epoch": 2.9267983146352607, "grad_norm": 0.4372120797634125, "learning_rate": 4.880112357649289e-07, "loss": 0.0245, "step": 57655 }, { "epoch": 2.9270521346261233, "grad_norm": 0.2845839858055115, "learning_rate": 4.863191024925123e-07, "loss": 0.0331, "step": 57660 }, { "epoch": 2.9273059546169855, "grad_norm": 0.21914979815483093, "learning_rate": 4.846269692200959e-07, "loss": 0.0255, "step": 57665 }, { "epoch": 2.927559774607848, "grad_norm": 0.14833270013332367, "learning_rate": 4.829348359476792e-07, "loss": 0.0302, "step": 57670 }, { "epoch": 2.9278135945987107, "grad_norm": 0.25538668036460876, "learning_rate": 4.812427026752628e-07, "loss": 0.0234, "step": 57675 }, { "epoch": 2.928067414589573, "grad_norm": 0.47089454531669617, "learning_rate": 4.795505694028462e-07, "loss": 0.025, "step": 57680 }, { "epoch": 2.9283212345804355, "grad_norm": 0.3125099539756775, "learning_rate": 4.778584361304296e-07, "loss": 0.0275, "step": 57685 }, { "epoch": 2.928575054571298, "grad_norm": 0.2715403139591217, "learning_rate": 4.761663028580131e-07, "loss": 0.0343, "step": 57690 }, { "epoch": 2.9288288745621607, "grad_norm": 0.3333417475223541, "learning_rate": 4.7447416958559657e-07, "loss": 0.025, "step": 57695 }, { "epoch": 2.929082694553023, "grad_norm": 0.3405666649341583, "learning_rate": 4.7278203631318006e-07, "loss": 0.0291, "step": 57700 }, { "epoch": 2.9293365145438854, "grad_norm": 0.2805671691894531, "learning_rate": 4.7108990304076355e-07, "loss": 0.0354, "step": 57705 }, { "epoch": 2.929590334534748, "grad_norm": 0.43839380145072937, "learning_rate": 4.6939776976834704e-07, "loss": 0.0271, "step": 57710 }, { "epoch": 2.92984415452561, "grad_norm": 0.50478595495224, "learning_rate": 4.6770563649593043e-07, "loss": 0.0301, "step": 57715 }, { "epoch": 2.930097974516473, "grad_norm": 0.5127033591270447, "learning_rate": 4.660135032235139e-07, "loss": 0.0298, "step": 57720 }, { "epoch": 2.9303517945073354, "grad_norm": 0.31215181946754456, "learning_rate": 4.643213699510974e-07, "loss": 0.0321, "step": 57725 }, { "epoch": 2.930605614498198, "grad_norm": 0.35444653034210205, "learning_rate": 4.626292366786809e-07, "loss": 0.0316, "step": 57730 }, { "epoch": 2.9308594344890606, "grad_norm": 0.39015939831733704, "learning_rate": 4.609371034062643e-07, "loss": 0.0222, "step": 57735 }, { "epoch": 2.9311132544799228, "grad_norm": 0.9289717078208923, "learning_rate": 4.592449701338478e-07, "loss": 0.0307, "step": 57740 }, { "epoch": 2.9313670744707854, "grad_norm": 0.33057901263237, "learning_rate": 4.5755283686143127e-07, "loss": 0.0324, "step": 57745 }, { "epoch": 2.9316208944616475, "grad_norm": 0.31482622027397156, "learning_rate": 4.5586070358901476e-07, "loss": 0.0265, "step": 57750 }, { "epoch": 2.93187471445251, "grad_norm": 0.5130469799041748, "learning_rate": 4.5416857031659815e-07, "loss": 0.0276, "step": 57755 }, { "epoch": 2.9321285344433727, "grad_norm": 0.5404413938522339, "learning_rate": 4.5247643704418164e-07, "loss": 0.0329, "step": 57760 }, { "epoch": 2.9323823544342353, "grad_norm": 0.5473409295082092, "learning_rate": 4.5078430377176513e-07, "loss": 0.0295, "step": 57765 }, { "epoch": 2.932636174425098, "grad_norm": 0.30557894706726074, "learning_rate": 4.490921704993486e-07, "loss": 0.037, "step": 57770 }, { "epoch": 2.93288999441596, "grad_norm": 0.28683364391326904, "learning_rate": 4.47400037226932e-07, "loss": 0.0271, "step": 57775 }, { "epoch": 2.9331438144068227, "grad_norm": 0.3118693232536316, "learning_rate": 4.457079039545155e-07, "loss": 0.0264, "step": 57780 }, { "epoch": 2.9333976343976853, "grad_norm": 0.23888196051120758, "learning_rate": 4.44015770682099e-07, "loss": 0.0351, "step": 57785 }, { "epoch": 2.9336514543885475, "grad_norm": 0.36562538146972656, "learning_rate": 4.4232363740968243e-07, "loss": 0.036, "step": 57790 }, { "epoch": 2.93390527437941, "grad_norm": 0.26900336146354675, "learning_rate": 4.4063150413726587e-07, "loss": 0.0271, "step": 57795 }, { "epoch": 2.9341590943702727, "grad_norm": 0.23322747647762299, "learning_rate": 4.3893937086484936e-07, "loss": 0.0243, "step": 57800 }, { "epoch": 2.9344129143611353, "grad_norm": 0.3980046510696411, "learning_rate": 4.3724723759243285e-07, "loss": 0.0262, "step": 57805 }, { "epoch": 2.9346667343519974, "grad_norm": 0.3880888521671295, "learning_rate": 4.355551043200163e-07, "loss": 0.027, "step": 57810 }, { "epoch": 2.93492055434286, "grad_norm": 0.2841983139514923, "learning_rate": 4.3386297104759973e-07, "loss": 0.0331, "step": 57815 }, { "epoch": 2.9351743743337226, "grad_norm": 0.5500996112823486, "learning_rate": 4.321708377751832e-07, "loss": 0.0257, "step": 57820 }, { "epoch": 2.935428194324585, "grad_norm": 0.3994247615337372, "learning_rate": 4.3047870450276666e-07, "loss": 0.0337, "step": 57825 }, { "epoch": 2.9356820143154474, "grad_norm": 0.23893968760967255, "learning_rate": 4.2878657123035015e-07, "loss": 0.0373, "step": 57830 }, { "epoch": 2.93593583430631, "grad_norm": 0.39422041177749634, "learning_rate": 4.270944379579336e-07, "loss": 0.0282, "step": 57835 }, { "epoch": 2.9361896542971726, "grad_norm": 0.30572760105133057, "learning_rate": 4.254023046855171e-07, "loss": 0.0367, "step": 57840 }, { "epoch": 2.9364434742880348, "grad_norm": 0.23431634902954102, "learning_rate": 4.237101714131005e-07, "loss": 0.0294, "step": 57845 }, { "epoch": 2.9366972942788974, "grad_norm": 0.46416381001472473, "learning_rate": 4.22018038140684e-07, "loss": 0.0334, "step": 57850 }, { "epoch": 2.93695111426976, "grad_norm": 0.29117438197135925, "learning_rate": 4.2032590486826745e-07, "loss": 0.0255, "step": 57855 }, { "epoch": 2.937204934260622, "grad_norm": 0.2204287201166153, "learning_rate": 4.1863377159585094e-07, "loss": 0.0277, "step": 57860 }, { "epoch": 2.9374587542514847, "grad_norm": 0.2894386649131775, "learning_rate": 4.169416383234344e-07, "loss": 0.0247, "step": 57865 }, { "epoch": 2.9377125742423473, "grad_norm": 0.2967325747013092, "learning_rate": 4.1524950505101787e-07, "loss": 0.0255, "step": 57870 }, { "epoch": 2.93796639423321, "grad_norm": 0.26310840249061584, "learning_rate": 4.135573717786013e-07, "loss": 0.0335, "step": 57875 }, { "epoch": 2.9382202142240725, "grad_norm": 0.3313550353050232, "learning_rate": 4.1186523850618475e-07, "loss": 0.035, "step": 57880 }, { "epoch": 2.9384740342149347, "grad_norm": 0.3628803491592407, "learning_rate": 4.1017310523376824e-07, "loss": 0.0307, "step": 57885 }, { "epoch": 2.9387278542057973, "grad_norm": 0.2926108241081238, "learning_rate": 4.0848097196135173e-07, "loss": 0.0278, "step": 57890 }, { "epoch": 2.9389816741966595, "grad_norm": 0.2612711787223816, "learning_rate": 4.0678883868893517e-07, "loss": 0.0291, "step": 57895 }, { "epoch": 2.939235494187522, "grad_norm": 0.35889166593551636, "learning_rate": 4.050967054165186e-07, "loss": 0.0278, "step": 57900 }, { "epoch": 2.9394893141783847, "grad_norm": 0.34467950463294983, "learning_rate": 4.034045721441021e-07, "loss": 0.0264, "step": 57905 }, { "epoch": 2.9397431341692473, "grad_norm": 0.2898823320865631, "learning_rate": 4.017124388716856e-07, "loss": 0.0303, "step": 57910 }, { "epoch": 2.93999695416011, "grad_norm": 0.4356616735458374, "learning_rate": 4.0002030559926903e-07, "loss": 0.0296, "step": 57915 }, { "epoch": 2.940250774150972, "grad_norm": 0.46433550119400024, "learning_rate": 3.9832817232685247e-07, "loss": 0.0294, "step": 57920 }, { "epoch": 2.9405045941418346, "grad_norm": 0.4361477196216583, "learning_rate": 3.9663603905443596e-07, "loss": 0.033, "step": 57925 }, { "epoch": 2.9407584141326972, "grad_norm": 0.2655782103538513, "learning_rate": 3.9494390578201946e-07, "loss": 0.0339, "step": 57930 }, { "epoch": 2.9410122341235594, "grad_norm": 0.28838610649108887, "learning_rate": 3.9325177250960284e-07, "loss": 0.0277, "step": 57935 }, { "epoch": 2.941266054114422, "grad_norm": 0.32494908571243286, "learning_rate": 3.9155963923718633e-07, "loss": 0.0348, "step": 57940 }, { "epoch": 2.9415198741052846, "grad_norm": 0.23130418360233307, "learning_rate": 3.898675059647698e-07, "loss": 0.0311, "step": 57945 }, { "epoch": 2.941773694096147, "grad_norm": 0.3082735538482666, "learning_rate": 3.881753726923533e-07, "loss": 0.0302, "step": 57950 }, { "epoch": 2.9420275140870094, "grad_norm": 0.2921549379825592, "learning_rate": 3.864832394199367e-07, "loss": 0.0262, "step": 57955 }, { "epoch": 2.942281334077872, "grad_norm": 0.2690448760986328, "learning_rate": 3.847911061475202e-07, "loss": 0.0296, "step": 57960 }, { "epoch": 2.9425351540687346, "grad_norm": 0.27467915415763855, "learning_rate": 3.830989728751037e-07, "loss": 0.0311, "step": 57965 }, { "epoch": 2.9427889740595967, "grad_norm": 0.3122628927230835, "learning_rate": 3.814068396026872e-07, "loss": 0.0314, "step": 57970 }, { "epoch": 2.9430427940504593, "grad_norm": 0.20584361255168915, "learning_rate": 3.7971470633027056e-07, "loss": 0.0256, "step": 57975 }, { "epoch": 2.943296614041322, "grad_norm": 0.3449113667011261, "learning_rate": 3.7802257305785405e-07, "loss": 0.0316, "step": 57980 }, { "epoch": 2.9435504340321845, "grad_norm": 0.24136188626289368, "learning_rate": 3.7633043978543755e-07, "loss": 0.0254, "step": 57985 }, { "epoch": 2.943804254023047, "grad_norm": 0.314791738986969, "learning_rate": 3.7463830651302104e-07, "loss": 0.0296, "step": 57990 }, { "epoch": 2.9440580740139093, "grad_norm": 0.41983452439308167, "learning_rate": 3.729461732406044e-07, "loss": 0.0303, "step": 57995 }, { "epoch": 2.944311894004772, "grad_norm": 0.41986700892448425, "learning_rate": 3.712540399681879e-07, "loss": 0.0397, "step": 58000 }, { "epoch": 2.944565713995634, "grad_norm": 0.39581939578056335, "learning_rate": 3.695619066957714e-07, "loss": 0.0319, "step": 58005 }, { "epoch": 2.9448195339864967, "grad_norm": 0.33107373118400574, "learning_rate": 3.678697734233549e-07, "loss": 0.0375, "step": 58010 }, { "epoch": 2.9450733539773593, "grad_norm": 0.25613731145858765, "learning_rate": 3.661776401509383e-07, "loss": 0.0309, "step": 58015 }, { "epoch": 2.945327173968222, "grad_norm": 0.42751434445381165, "learning_rate": 3.644855068785218e-07, "loss": 0.0291, "step": 58020 }, { "epoch": 2.9455809939590845, "grad_norm": 0.29339128732681274, "learning_rate": 3.6279337360610527e-07, "loss": 0.0324, "step": 58025 }, { "epoch": 2.9458348139499466, "grad_norm": 0.3385933041572571, "learning_rate": 3.6110124033368876e-07, "loss": 0.0229, "step": 58030 }, { "epoch": 2.946088633940809, "grad_norm": 0.37300586700439453, "learning_rate": 3.5940910706127214e-07, "loss": 0.0266, "step": 58035 }, { "epoch": 2.946342453931672, "grad_norm": 0.33182457089424133, "learning_rate": 3.5771697378885564e-07, "loss": 0.0213, "step": 58040 }, { "epoch": 2.946596273922534, "grad_norm": 0.4372585713863373, "learning_rate": 3.5602484051643913e-07, "loss": 0.0266, "step": 58045 }, { "epoch": 2.9468500939133966, "grad_norm": 0.3945082426071167, "learning_rate": 3.543327072440226e-07, "loss": 0.0316, "step": 58050 }, { "epoch": 2.947103913904259, "grad_norm": 0.3530140221118927, "learning_rate": 3.52640573971606e-07, "loss": 0.0248, "step": 58055 }, { "epoch": 2.947357733895122, "grad_norm": 0.2474001795053482, "learning_rate": 3.509484406991895e-07, "loss": 0.0307, "step": 58060 }, { "epoch": 2.947611553885984, "grad_norm": 0.37006187438964844, "learning_rate": 3.49256307426773e-07, "loss": 0.028, "step": 58065 }, { "epoch": 2.9478653738768466, "grad_norm": 0.2380700260400772, "learning_rate": 3.475641741543564e-07, "loss": 0.0254, "step": 58070 }, { "epoch": 2.948119193867709, "grad_norm": 0.2604024112224579, "learning_rate": 3.4587204088193986e-07, "loss": 0.0362, "step": 58075 }, { "epoch": 2.9483730138585713, "grad_norm": 0.32506248354911804, "learning_rate": 3.4417990760952336e-07, "loss": 0.0325, "step": 58080 }, { "epoch": 2.948626833849434, "grad_norm": 0.22234822809696198, "learning_rate": 3.4248777433710685e-07, "loss": 0.0306, "step": 58085 }, { "epoch": 2.9488806538402965, "grad_norm": 0.7750864028930664, "learning_rate": 3.407956410646903e-07, "loss": 0.0376, "step": 58090 }, { "epoch": 2.949134473831159, "grad_norm": 0.3774649202823639, "learning_rate": 3.391035077922737e-07, "loss": 0.0311, "step": 58095 }, { "epoch": 2.9493882938220213, "grad_norm": 0.358844518661499, "learning_rate": 3.374113745198572e-07, "loss": 0.0335, "step": 58100 }, { "epoch": 2.949642113812884, "grad_norm": 0.4665594696998596, "learning_rate": 3.3571924124744066e-07, "loss": 0.0314, "step": 58105 }, { "epoch": 2.9498959338037465, "grad_norm": 0.28644874691963196, "learning_rate": 3.3402710797502415e-07, "loss": 0.0279, "step": 58110 }, { "epoch": 2.9501497537946086, "grad_norm": 0.31177669763565063, "learning_rate": 3.323349747026076e-07, "loss": 0.0291, "step": 58115 }, { "epoch": 2.9504035737854712, "grad_norm": 0.26134926080703735, "learning_rate": 3.306428414301911e-07, "loss": 0.0305, "step": 58120 }, { "epoch": 2.950657393776334, "grad_norm": 0.3544405996799469, "learning_rate": 3.289507081577745e-07, "loss": 0.0317, "step": 58125 }, { "epoch": 2.9509112137671965, "grad_norm": 0.39834409952163696, "learning_rate": 3.27258574885358e-07, "loss": 0.0304, "step": 58130 }, { "epoch": 2.951165033758059, "grad_norm": 0.4057234823703766, "learning_rate": 3.2556644161294145e-07, "loss": 0.0343, "step": 58135 }, { "epoch": 2.951418853748921, "grad_norm": 0.24034041166305542, "learning_rate": 3.2387430834052494e-07, "loss": 0.0242, "step": 58140 }, { "epoch": 2.951672673739784, "grad_norm": 0.2940845191478729, "learning_rate": 3.221821750681084e-07, "loss": 0.0236, "step": 58145 }, { "epoch": 2.951926493730646, "grad_norm": 0.3044266998767853, "learning_rate": 3.2049004179569187e-07, "loss": 0.0257, "step": 58150 }, { "epoch": 2.9521803137215086, "grad_norm": 0.27591419219970703, "learning_rate": 3.187979085232753e-07, "loss": 0.0271, "step": 58155 }, { "epoch": 2.952434133712371, "grad_norm": 0.277483731508255, "learning_rate": 3.1710577525085875e-07, "loss": 0.0346, "step": 58160 }, { "epoch": 2.952687953703234, "grad_norm": 0.45967957377433777, "learning_rate": 3.1541364197844224e-07, "loss": 0.0296, "step": 58165 }, { "epoch": 2.9529417736940964, "grad_norm": 0.32595378160476685, "learning_rate": 3.1372150870602573e-07, "loss": 0.0318, "step": 58170 }, { "epoch": 2.9531955936849585, "grad_norm": 0.25108686089515686, "learning_rate": 3.1202937543360917e-07, "loss": 0.0279, "step": 58175 }, { "epoch": 2.953449413675821, "grad_norm": 0.3554666340351105, "learning_rate": 3.103372421611926e-07, "loss": 0.0232, "step": 58180 }, { "epoch": 2.9537032336666837, "grad_norm": 0.324294775724411, "learning_rate": 3.086451088887761e-07, "loss": 0.0262, "step": 58185 }, { "epoch": 2.953957053657546, "grad_norm": 0.27495795488357544, "learning_rate": 3.0695297561635954e-07, "loss": 0.0265, "step": 58190 }, { "epoch": 2.9542108736484085, "grad_norm": 0.3297526240348816, "learning_rate": 3.0526084234394303e-07, "loss": 0.0254, "step": 58195 }, { "epoch": 2.954464693639271, "grad_norm": 0.37720391154289246, "learning_rate": 3.0356870907152647e-07, "loss": 0.0295, "step": 58200 }, { "epoch": 2.9547185136301337, "grad_norm": 0.3667004108428955, "learning_rate": 3.0187657579910996e-07, "loss": 0.0364, "step": 58205 }, { "epoch": 2.954972333620996, "grad_norm": 0.6564368605613708, "learning_rate": 3.001844425266934e-07, "loss": 0.0348, "step": 58210 }, { "epoch": 2.9552261536118585, "grad_norm": 0.31273505091667175, "learning_rate": 2.984923092542769e-07, "loss": 0.0272, "step": 58215 }, { "epoch": 2.955479973602721, "grad_norm": 0.35449856519699097, "learning_rate": 2.9680017598186033e-07, "loss": 0.0266, "step": 58220 }, { "epoch": 2.9557337935935832, "grad_norm": 0.3688802719116211, "learning_rate": 2.951080427094438e-07, "loss": 0.0275, "step": 58225 }, { "epoch": 2.955987613584446, "grad_norm": 0.525647759437561, "learning_rate": 2.9341590943702726e-07, "loss": 0.0292, "step": 58230 }, { "epoch": 2.9562414335753084, "grad_norm": 0.3015739619731903, "learning_rate": 2.9172377616461075e-07, "loss": 0.0299, "step": 58235 }, { "epoch": 2.956495253566171, "grad_norm": 0.26125138998031616, "learning_rate": 2.900316428921942e-07, "loss": 0.0329, "step": 58240 }, { "epoch": 2.956749073557033, "grad_norm": 0.2997949421405792, "learning_rate": 2.883395096197777e-07, "loss": 0.0279, "step": 58245 }, { "epoch": 2.957002893547896, "grad_norm": 0.3520687222480774, "learning_rate": 2.8664737634736117e-07, "loss": 0.0301, "step": 58250 }, { "epoch": 2.9572567135387584, "grad_norm": 0.3813325762748718, "learning_rate": 2.849552430749446e-07, "loss": 0.0309, "step": 58255 }, { "epoch": 2.9575105335296206, "grad_norm": 0.3722776770591736, "learning_rate": 2.832631098025281e-07, "loss": 0.0282, "step": 58260 }, { "epoch": 2.957764353520483, "grad_norm": 0.3159470558166504, "learning_rate": 2.8157097653011154e-07, "loss": 0.0273, "step": 58265 }, { "epoch": 2.9580181735113458, "grad_norm": 0.21048100292682648, "learning_rate": 2.7987884325769503e-07, "loss": 0.0231, "step": 58270 }, { "epoch": 2.9582719935022084, "grad_norm": 0.2904769480228424, "learning_rate": 2.7818670998527847e-07, "loss": 0.0284, "step": 58275 }, { "epoch": 2.958525813493071, "grad_norm": 0.2318405956029892, "learning_rate": 2.7649457671286196e-07, "loss": 0.0293, "step": 58280 }, { "epoch": 2.958779633483933, "grad_norm": 0.323593407869339, "learning_rate": 2.748024434404454e-07, "loss": 0.0225, "step": 58285 }, { "epoch": 2.9590334534747957, "grad_norm": 0.31280264258384705, "learning_rate": 2.731103101680289e-07, "loss": 0.0293, "step": 58290 }, { "epoch": 2.959287273465658, "grad_norm": 0.3686768412590027, "learning_rate": 2.7141817689561233e-07, "loss": 0.0303, "step": 58295 }, { "epoch": 2.9595410934565205, "grad_norm": 0.31249767541885376, "learning_rate": 2.697260436231958e-07, "loss": 0.0353, "step": 58300 }, { "epoch": 2.959794913447383, "grad_norm": 0.33410879969596863, "learning_rate": 2.6803391035077926e-07, "loss": 0.0337, "step": 58305 }, { "epoch": 2.9600487334382457, "grad_norm": 0.4810965359210968, "learning_rate": 2.6634177707836275e-07, "loss": 0.0336, "step": 58310 }, { "epoch": 2.9603025534291083, "grad_norm": 0.24375031888484955, "learning_rate": 2.646496438059462e-07, "loss": 0.0263, "step": 58315 }, { "epoch": 2.9605563734199705, "grad_norm": 0.314066082239151, "learning_rate": 2.629575105335297e-07, "loss": 0.0351, "step": 58320 }, { "epoch": 2.960810193410833, "grad_norm": 0.6520958542823792, "learning_rate": 2.612653772611131e-07, "loss": 0.0339, "step": 58325 }, { "epoch": 2.9610640134016957, "grad_norm": 0.4829593300819397, "learning_rate": 2.5957324398869656e-07, "loss": 0.0292, "step": 58330 }, { "epoch": 2.961317833392558, "grad_norm": 0.39782652258872986, "learning_rate": 2.5788111071628005e-07, "loss": 0.0302, "step": 58335 }, { "epoch": 2.9615716533834204, "grad_norm": 0.3647368550300598, "learning_rate": 2.561889774438635e-07, "loss": 0.0288, "step": 58340 }, { "epoch": 2.961825473374283, "grad_norm": 0.3723721504211426, "learning_rate": 2.54496844171447e-07, "loss": 0.0271, "step": 58345 }, { "epoch": 2.9620792933651456, "grad_norm": 0.5521222352981567, "learning_rate": 2.528047108990304e-07, "loss": 0.0264, "step": 58350 }, { "epoch": 2.962333113356008, "grad_norm": 0.2993130385875702, "learning_rate": 2.511125776266139e-07, "loss": 0.0284, "step": 58355 }, { "epoch": 2.9625869333468704, "grad_norm": 0.827579915523529, "learning_rate": 2.4942044435419735e-07, "loss": 0.0343, "step": 58360 }, { "epoch": 2.962840753337733, "grad_norm": 0.3333107829093933, "learning_rate": 2.4772831108178084e-07, "loss": 0.0364, "step": 58365 }, { "epoch": 2.963094573328595, "grad_norm": 0.3528420925140381, "learning_rate": 2.460361778093643e-07, "loss": 0.0308, "step": 58370 }, { "epoch": 2.9633483933194578, "grad_norm": 0.2768530249595642, "learning_rate": 2.443440445369477e-07, "loss": 0.0272, "step": 58375 }, { "epoch": 2.9636022133103204, "grad_norm": 0.2978229820728302, "learning_rate": 2.426519112645312e-07, "loss": 0.0282, "step": 58380 }, { "epoch": 2.963856033301183, "grad_norm": 0.24835215508937836, "learning_rate": 2.4095977799211465e-07, "loss": 0.026, "step": 58385 }, { "epoch": 2.964109853292045, "grad_norm": 0.2698346674442291, "learning_rate": 2.3926764471969814e-07, "loss": 0.0254, "step": 58390 }, { "epoch": 2.9643636732829077, "grad_norm": 0.28580522537231445, "learning_rate": 2.375755114472816e-07, "loss": 0.0268, "step": 58395 }, { "epoch": 2.9646174932737703, "grad_norm": 0.31052786111831665, "learning_rate": 2.3588337817486507e-07, "loss": 0.0238, "step": 58400 }, { "epoch": 2.9648713132646325, "grad_norm": 0.35599517822265625, "learning_rate": 2.3419124490244854e-07, "loss": 0.032, "step": 58405 }, { "epoch": 2.965125133255495, "grad_norm": 0.3290466070175171, "learning_rate": 2.32499111630032e-07, "loss": 0.0326, "step": 58410 }, { "epoch": 2.9653789532463577, "grad_norm": 0.46071192622184753, "learning_rate": 2.3080697835761547e-07, "loss": 0.0305, "step": 58415 }, { "epoch": 2.9656327732372203, "grad_norm": 0.20090284943580627, "learning_rate": 2.2911484508519893e-07, "loss": 0.0259, "step": 58420 }, { "epoch": 2.965886593228083, "grad_norm": 0.47280457615852356, "learning_rate": 2.274227118127824e-07, "loss": 0.0251, "step": 58425 }, { "epoch": 2.966140413218945, "grad_norm": 0.3381558656692505, "learning_rate": 2.2573057854036586e-07, "loss": 0.0239, "step": 58430 }, { "epoch": 2.9663942332098077, "grad_norm": 0.27177587151527405, "learning_rate": 2.2403844526794933e-07, "loss": 0.0262, "step": 58435 }, { "epoch": 2.96664805320067, "grad_norm": 0.2976479232311249, "learning_rate": 2.223463119955328e-07, "loss": 0.0264, "step": 58440 }, { "epoch": 2.9669018731915324, "grad_norm": 0.3702794909477234, "learning_rate": 2.2065417872311623e-07, "loss": 0.0255, "step": 58445 }, { "epoch": 2.967155693182395, "grad_norm": 0.31776705384254456, "learning_rate": 2.1896204545069972e-07, "loss": 0.0272, "step": 58450 }, { "epoch": 2.9674095131732576, "grad_norm": 0.5179955959320068, "learning_rate": 2.1726991217828316e-07, "loss": 0.025, "step": 58455 }, { "epoch": 2.9676633331641202, "grad_norm": 0.2706942558288574, "learning_rate": 2.1557777890586665e-07, "loss": 0.0315, "step": 58460 }, { "epoch": 2.9679171531549824, "grad_norm": 0.3058590590953827, "learning_rate": 2.138856456334501e-07, "loss": 0.03, "step": 58465 }, { "epoch": 2.968170973145845, "grad_norm": 0.33392003178596497, "learning_rate": 2.1219351236103358e-07, "loss": 0.0252, "step": 58470 }, { "epoch": 2.9684247931367076, "grad_norm": 1.4577997922897339, "learning_rate": 2.1050137908861702e-07, "loss": 0.0249, "step": 58475 }, { "epoch": 2.9686786131275698, "grad_norm": 0.37016749382019043, "learning_rate": 2.0880924581620051e-07, "loss": 0.0295, "step": 58480 }, { "epoch": 2.9689324331184324, "grad_norm": 0.31658273935317993, "learning_rate": 2.0711711254378395e-07, "loss": 0.031, "step": 58485 }, { "epoch": 2.969186253109295, "grad_norm": 0.5395975112915039, "learning_rate": 2.0542497927136744e-07, "loss": 0.0305, "step": 58490 }, { "epoch": 2.9694400731001576, "grad_norm": 0.5022832155227661, "learning_rate": 2.0373284599895088e-07, "loss": 0.0328, "step": 58495 }, { "epoch": 2.9696938930910197, "grad_norm": 0.46155476570129395, "learning_rate": 2.0204071272653437e-07, "loss": 0.031, "step": 58500 }, { "epoch": 2.9699477130818823, "grad_norm": 0.26691576838493347, "learning_rate": 2.003485794541178e-07, "loss": 0.0263, "step": 58505 }, { "epoch": 2.970201533072745, "grad_norm": 0.2853245437145233, "learning_rate": 1.986564461817013e-07, "loss": 0.0292, "step": 58510 }, { "epoch": 2.970455353063607, "grad_norm": 0.3737592101097107, "learning_rate": 1.9696431290928474e-07, "loss": 0.0229, "step": 58515 }, { "epoch": 2.9707091730544697, "grad_norm": 0.30825868248939514, "learning_rate": 1.9527217963686823e-07, "loss": 0.0284, "step": 58520 }, { "epoch": 2.9709629930453323, "grad_norm": 0.2496730536222458, "learning_rate": 1.9358004636445167e-07, "loss": 0.0246, "step": 58525 }, { "epoch": 2.971216813036195, "grad_norm": 0.3173964023590088, "learning_rate": 1.9188791309203517e-07, "loss": 0.0276, "step": 58530 }, { "epoch": 2.971470633027057, "grad_norm": 0.44984614849090576, "learning_rate": 1.901957798196186e-07, "loss": 0.0288, "step": 58535 }, { "epoch": 2.9717244530179197, "grad_norm": 0.3062257468700409, "learning_rate": 1.8850364654720207e-07, "loss": 0.021, "step": 58540 }, { "epoch": 2.9719782730087823, "grad_norm": 0.36753204464912415, "learning_rate": 1.8681151327478553e-07, "loss": 0.0302, "step": 58545 }, { "epoch": 2.9722320929996444, "grad_norm": 0.2601029574871063, "learning_rate": 1.85119380002369e-07, "loss": 0.0235, "step": 58550 }, { "epoch": 2.972485912990507, "grad_norm": 0.2703913152217865, "learning_rate": 1.8342724672995246e-07, "loss": 0.0238, "step": 58555 }, { "epoch": 2.9727397329813696, "grad_norm": 0.3573007881641388, "learning_rate": 1.8173511345753593e-07, "loss": 0.0346, "step": 58560 }, { "epoch": 2.9729935529722322, "grad_norm": 0.35567259788513184, "learning_rate": 1.800429801851194e-07, "loss": 0.0295, "step": 58565 }, { "epoch": 2.973247372963095, "grad_norm": 0.3184675872325897, "learning_rate": 1.7835084691270286e-07, "loss": 0.0256, "step": 58570 }, { "epoch": 2.973501192953957, "grad_norm": 0.27316588163375854, "learning_rate": 1.7665871364028632e-07, "loss": 0.0245, "step": 58575 }, { "epoch": 2.9737550129448196, "grad_norm": 0.27470746636390686, "learning_rate": 1.749665803678698e-07, "loss": 0.0225, "step": 58580 }, { "epoch": 2.9740088329356817, "grad_norm": 0.23476144671440125, "learning_rate": 1.7327444709545323e-07, "loss": 0.0242, "step": 58585 }, { "epoch": 2.9742626529265443, "grad_norm": 0.38638877868652344, "learning_rate": 1.7158231382303672e-07, "loss": 0.0277, "step": 58590 }, { "epoch": 2.974516472917407, "grad_norm": 0.4438152611255646, "learning_rate": 1.6989018055062016e-07, "loss": 0.0358, "step": 58595 }, { "epoch": 2.9747702929082696, "grad_norm": 0.26275134086608887, "learning_rate": 1.6819804727820365e-07, "loss": 0.0334, "step": 58600 }, { "epoch": 2.975024112899132, "grad_norm": 0.5719013214111328, "learning_rate": 1.665059140057871e-07, "loss": 0.0332, "step": 58605 }, { "epoch": 2.9752779328899943, "grad_norm": 0.3588959574699402, "learning_rate": 1.6481378073337058e-07, "loss": 0.0316, "step": 58610 }, { "epoch": 2.975531752880857, "grad_norm": 0.34206733107566833, "learning_rate": 1.6312164746095402e-07, "loss": 0.0337, "step": 58615 }, { "epoch": 2.9757855728717195, "grad_norm": 0.3827134966850281, "learning_rate": 1.614295141885375e-07, "loss": 0.0313, "step": 58620 }, { "epoch": 2.9760393928625817, "grad_norm": 0.32642507553100586, "learning_rate": 1.5973738091612095e-07, "loss": 0.0275, "step": 58625 }, { "epoch": 2.9762932128534443, "grad_norm": 0.3430155813694, "learning_rate": 1.5804524764370444e-07, "loss": 0.0273, "step": 58630 }, { "epoch": 2.976547032844307, "grad_norm": 0.37102141976356506, "learning_rate": 1.5635311437128788e-07, "loss": 0.0285, "step": 58635 }, { "epoch": 2.9768008528351695, "grad_norm": 0.2242543250322342, "learning_rate": 1.5466098109887137e-07, "loss": 0.0283, "step": 58640 }, { "epoch": 2.9770546728260316, "grad_norm": 0.3857623040676117, "learning_rate": 1.5296884782645484e-07, "loss": 0.0274, "step": 58645 }, { "epoch": 2.9773084928168942, "grad_norm": 0.18669942021369934, "learning_rate": 1.512767145540383e-07, "loss": 0.0263, "step": 58650 }, { "epoch": 2.977562312807757, "grad_norm": 0.40870609879493713, "learning_rate": 1.4958458128162177e-07, "loss": 0.0365, "step": 58655 }, { "epoch": 2.977816132798619, "grad_norm": 0.28093990683555603, "learning_rate": 1.4789244800920523e-07, "loss": 0.0243, "step": 58660 }, { "epoch": 2.9780699527894816, "grad_norm": 0.34547853469848633, "learning_rate": 1.462003147367887e-07, "loss": 0.0299, "step": 58665 }, { "epoch": 2.978323772780344, "grad_norm": 0.23397915065288544, "learning_rate": 1.4450818146437216e-07, "loss": 0.038, "step": 58670 }, { "epoch": 2.978577592771207, "grad_norm": 0.2273659110069275, "learning_rate": 1.428160481919556e-07, "loss": 0.0255, "step": 58675 }, { "epoch": 2.978831412762069, "grad_norm": 0.2079605758190155, "learning_rate": 1.4112391491953907e-07, "loss": 0.0347, "step": 58680 }, { "epoch": 2.9790852327529316, "grad_norm": 0.31091585755348206, "learning_rate": 1.3943178164712253e-07, "loss": 0.0299, "step": 58685 }, { "epoch": 2.979339052743794, "grad_norm": 0.28483375906944275, "learning_rate": 1.37739648374706e-07, "loss": 0.0327, "step": 58690 }, { "epoch": 2.9795928727346563, "grad_norm": 0.3354802429676056, "learning_rate": 1.3604751510228946e-07, "loss": 0.0228, "step": 58695 }, { "epoch": 2.979846692725519, "grad_norm": 0.18329808115959167, "learning_rate": 1.3435538182987293e-07, "loss": 0.024, "step": 58700 }, { "epoch": 2.9801005127163815, "grad_norm": 0.3315074145793915, "learning_rate": 1.326632485574564e-07, "loss": 0.0259, "step": 58705 }, { "epoch": 2.980354332707244, "grad_norm": 0.32475075125694275, "learning_rate": 1.3097111528503986e-07, "loss": 0.0256, "step": 58710 }, { "epoch": 2.9806081526981067, "grad_norm": 0.33447596430778503, "learning_rate": 1.2927898201262332e-07, "loss": 0.0247, "step": 58715 }, { "epoch": 2.980861972688969, "grad_norm": 0.32187217473983765, "learning_rate": 1.275868487402068e-07, "loss": 0.0233, "step": 58720 }, { "epoch": 2.9811157926798315, "grad_norm": 0.34107112884521484, "learning_rate": 1.2589471546779025e-07, "loss": 0.0335, "step": 58725 }, { "epoch": 2.9813696126706937, "grad_norm": 0.4276149272918701, "learning_rate": 1.2420258219537372e-07, "loss": 0.0261, "step": 58730 }, { "epoch": 2.9816234326615563, "grad_norm": 0.3580925166606903, "learning_rate": 1.2251044892295718e-07, "loss": 0.0309, "step": 58735 }, { "epoch": 2.981877252652419, "grad_norm": 0.2032964825630188, "learning_rate": 1.2081831565054065e-07, "loss": 0.0283, "step": 58740 }, { "epoch": 2.9821310726432815, "grad_norm": 0.27535682916641235, "learning_rate": 1.1912618237812411e-07, "loss": 0.0278, "step": 58745 }, { "epoch": 2.982384892634144, "grad_norm": 0.7258585691452026, "learning_rate": 1.1743404910570758e-07, "loss": 0.0306, "step": 58750 }, { "epoch": 2.9826387126250062, "grad_norm": 0.2572552561759949, "learning_rate": 1.1574191583329104e-07, "loss": 0.0224, "step": 58755 }, { "epoch": 2.982892532615869, "grad_norm": 0.42224279046058655, "learning_rate": 1.1404978256087451e-07, "loss": 0.0292, "step": 58760 }, { "epoch": 2.9831463526067314, "grad_norm": 0.33688056468963623, "learning_rate": 1.1235764928845797e-07, "loss": 0.0328, "step": 58765 }, { "epoch": 2.9834001725975936, "grad_norm": 0.25573021173477173, "learning_rate": 1.1066551601604144e-07, "loss": 0.0264, "step": 58770 }, { "epoch": 2.983653992588456, "grad_norm": 0.2507616877555847, "learning_rate": 1.0897338274362489e-07, "loss": 0.0344, "step": 58775 }, { "epoch": 2.983907812579319, "grad_norm": 0.3209986388683319, "learning_rate": 1.0728124947120836e-07, "loss": 0.0228, "step": 58780 }, { "epoch": 2.9841616325701814, "grad_norm": 0.6421720385551453, "learning_rate": 1.0558911619879182e-07, "loss": 0.022, "step": 58785 }, { "epoch": 2.9844154525610436, "grad_norm": 0.3483797311782837, "learning_rate": 1.0389698292637529e-07, "loss": 0.0292, "step": 58790 }, { "epoch": 2.984669272551906, "grad_norm": 0.2420889437198639, "learning_rate": 1.0220484965395875e-07, "loss": 0.035, "step": 58795 }, { "epoch": 2.9849230925427688, "grad_norm": 0.4202343821525574, "learning_rate": 1.0051271638154222e-07, "loss": 0.033, "step": 58800 }, { "epoch": 2.985176912533631, "grad_norm": 0.247958704829216, "learning_rate": 9.882058310912568e-08, "loss": 0.0231, "step": 58805 }, { "epoch": 2.9854307325244935, "grad_norm": 0.3831901252269745, "learning_rate": 9.712844983670915e-08, "loss": 0.0388, "step": 58810 }, { "epoch": 2.985684552515356, "grad_norm": 0.34248149394989014, "learning_rate": 9.543631656429261e-08, "loss": 0.028, "step": 58815 }, { "epoch": 2.9859383725062187, "grad_norm": 0.2834693491458893, "learning_rate": 9.374418329187608e-08, "loss": 0.0308, "step": 58820 }, { "epoch": 2.9861921924970813, "grad_norm": 0.3265041410923004, "learning_rate": 9.205205001945954e-08, "loss": 0.0333, "step": 58825 }, { "epoch": 2.9864460124879435, "grad_norm": 0.34004950523376465, "learning_rate": 9.035991674704301e-08, "loss": 0.0277, "step": 58830 }, { "epoch": 2.986699832478806, "grad_norm": 0.32642513513565063, "learning_rate": 8.866778347462647e-08, "loss": 0.0252, "step": 58835 }, { "epoch": 2.9869536524696683, "grad_norm": 0.30227944254875183, "learning_rate": 8.697565020220994e-08, "loss": 0.0248, "step": 58840 }, { "epoch": 2.987207472460531, "grad_norm": 0.8182564377784729, "learning_rate": 8.528351692979339e-08, "loss": 0.029, "step": 58845 }, { "epoch": 2.9874612924513935, "grad_norm": 0.32346653938293457, "learning_rate": 8.359138365737685e-08, "loss": 0.0269, "step": 58850 }, { "epoch": 2.987715112442256, "grad_norm": 0.2588391900062561, "learning_rate": 8.189925038496032e-08, "loss": 0.0238, "step": 58855 }, { "epoch": 2.9879689324331187, "grad_norm": 0.44902580976486206, "learning_rate": 8.020711711254378e-08, "loss": 0.0275, "step": 58860 }, { "epoch": 2.988222752423981, "grad_norm": 0.3256726861000061, "learning_rate": 7.851498384012725e-08, "loss": 0.0243, "step": 58865 }, { "epoch": 2.9884765724148434, "grad_norm": 0.2934799790382385, "learning_rate": 7.682285056771071e-08, "loss": 0.0308, "step": 58870 }, { "epoch": 2.988730392405706, "grad_norm": 0.8592537045478821, "learning_rate": 7.513071729529418e-08, "loss": 0.0336, "step": 58875 }, { "epoch": 2.988984212396568, "grad_norm": 0.25373250246047974, "learning_rate": 7.343858402287764e-08, "loss": 0.029, "step": 58880 }, { "epoch": 2.989238032387431, "grad_norm": 0.2816363573074341, "learning_rate": 7.174645075046111e-08, "loss": 0.0299, "step": 58885 }, { "epoch": 2.9894918523782934, "grad_norm": 0.26894432306289673, "learning_rate": 7.005431747804458e-08, "loss": 0.0323, "step": 58890 }, { "epoch": 2.989745672369156, "grad_norm": 0.2629813551902771, "learning_rate": 6.836218420562804e-08, "loss": 0.0294, "step": 58895 }, { "epoch": 2.989999492360018, "grad_norm": 0.24025852978229523, "learning_rate": 6.66700509332115e-08, "loss": 0.0305, "step": 58900 }, { "epoch": 2.9902533123508808, "grad_norm": 0.26809507608413696, "learning_rate": 6.497791766079497e-08, "loss": 0.0284, "step": 58905 }, { "epoch": 2.9905071323417434, "grad_norm": 0.32450565695762634, "learning_rate": 6.328578438837844e-08, "loss": 0.0265, "step": 58910 }, { "epoch": 2.9907609523326055, "grad_norm": 0.3137778043746948, "learning_rate": 6.15936511159619e-08, "loss": 0.0296, "step": 58915 }, { "epoch": 2.991014772323468, "grad_norm": 0.472679078578949, "learning_rate": 5.990151784354537e-08, "loss": 0.0265, "step": 58920 }, { "epoch": 2.9912685923143307, "grad_norm": 0.2648654580116272, "learning_rate": 5.820938457112883e-08, "loss": 0.0259, "step": 58925 }, { "epoch": 2.9915224123051933, "grad_norm": 0.25962552428245544, "learning_rate": 5.6517251298712296e-08, "loss": 0.0246, "step": 58930 }, { "epoch": 2.9917762322960555, "grad_norm": 0.32846131920814514, "learning_rate": 5.482511802629576e-08, "loss": 0.0249, "step": 58935 }, { "epoch": 2.992030052286918, "grad_norm": 0.3609420657157898, "learning_rate": 5.313298475387922e-08, "loss": 0.026, "step": 58940 }, { "epoch": 2.9922838722777807, "grad_norm": 0.33980563282966614, "learning_rate": 5.1440851481462685e-08, "loss": 0.0307, "step": 58945 }, { "epoch": 2.992537692268643, "grad_norm": 0.2794458270072937, "learning_rate": 4.974871820904615e-08, "loss": 0.0314, "step": 58950 }, { "epoch": 2.9927915122595055, "grad_norm": 0.536361038684845, "learning_rate": 4.8056584936629615e-08, "loss": 0.0329, "step": 58955 }, { "epoch": 2.993045332250368, "grad_norm": 0.5886624455451965, "learning_rate": 4.636445166421308e-08, "loss": 0.0385, "step": 58960 }, { "epoch": 2.9932991522412307, "grad_norm": 0.3537057638168335, "learning_rate": 4.4672318391796546e-08, "loss": 0.0313, "step": 58965 }, { "epoch": 2.9935529722320933, "grad_norm": 0.2326638102531433, "learning_rate": 4.298018511938001e-08, "loss": 0.024, "step": 58970 }, { "epoch": 2.9938067922229554, "grad_norm": 0.47342896461486816, "learning_rate": 4.128805184696347e-08, "loss": 0.0358, "step": 58975 }, { "epoch": 2.994060612213818, "grad_norm": 0.27559787034988403, "learning_rate": 3.9595918574546934e-08, "loss": 0.0317, "step": 58980 }, { "epoch": 2.99431443220468, "grad_norm": 0.34952622652053833, "learning_rate": 3.79037853021304e-08, "loss": 0.0322, "step": 58985 }, { "epoch": 2.994568252195543, "grad_norm": 0.5940717458724976, "learning_rate": 3.6211652029713865e-08, "loss": 0.0301, "step": 58990 }, { "epoch": 2.9948220721864054, "grad_norm": 0.31109818816185, "learning_rate": 3.451951875729733e-08, "loss": 0.0338, "step": 58995 }, { "epoch": 2.995075892177268, "grad_norm": 0.37035757303237915, "learning_rate": 3.2827385484880795e-08, "loss": 0.03, "step": 59000 }, { "epoch": 2.9953297121681306, "grad_norm": 0.4075457751750946, "learning_rate": 3.113525221246426e-08, "loss": 0.032, "step": 59005 }, { "epoch": 2.9955835321589928, "grad_norm": 0.2673121392726898, "learning_rate": 2.9443118940047722e-08, "loss": 0.0286, "step": 59010 }, { "epoch": 2.9958373521498554, "grad_norm": 0.2799220383167267, "learning_rate": 2.7750985667631187e-08, "loss": 0.0297, "step": 59015 }, { "epoch": 2.996091172140718, "grad_norm": 0.232395201921463, "learning_rate": 2.605885239521465e-08, "loss": 0.0282, "step": 59020 }, { "epoch": 2.99634499213158, "grad_norm": 0.34497490525245667, "learning_rate": 2.4366719122798114e-08, "loss": 0.0311, "step": 59025 }, { "epoch": 2.9965988121224427, "grad_norm": 0.39901643991470337, "learning_rate": 2.267458585038158e-08, "loss": 0.0312, "step": 59030 }, { "epoch": 2.9968526321133053, "grad_norm": 0.31049707531929016, "learning_rate": 2.098245257796504e-08, "loss": 0.0291, "step": 59035 }, { "epoch": 2.997106452104168, "grad_norm": 0.374683141708374, "learning_rate": 1.9290319305548506e-08, "loss": 0.0324, "step": 59040 }, { "epoch": 2.99736027209503, "grad_norm": 0.3533037602901459, "learning_rate": 1.759818603313197e-08, "loss": 0.0333, "step": 59045 }, { "epoch": 2.9976140920858927, "grad_norm": 0.35654324293136597, "learning_rate": 1.5906052760715436e-08, "loss": 0.0313, "step": 59050 }, { "epoch": 2.9978679120767553, "grad_norm": 0.25922712683677673, "learning_rate": 1.42139194882989e-08, "loss": 0.0312, "step": 59055 }, { "epoch": 2.9981217320676175, "grad_norm": 0.28118982911109924, "learning_rate": 1.2521786215882363e-08, "loss": 0.0212, "step": 59060 }, { "epoch": 2.99837555205848, "grad_norm": 0.23331542313098907, "learning_rate": 1.0829652943465827e-08, "loss": 0.0289, "step": 59065 }, { "epoch": 2.9986293720493427, "grad_norm": 0.3286423683166504, "learning_rate": 9.137519671049294e-09, "loss": 0.0252, "step": 59070 }, { "epoch": 2.9988831920402053, "grad_norm": 0.38054725527763367, "learning_rate": 7.445386398632757e-09, "loss": 0.0272, "step": 59075 }, { "epoch": 2.9991370120310674, "grad_norm": 0.23584502935409546, "learning_rate": 5.753253126216221e-09, "loss": 0.0302, "step": 59080 }, { "epoch": 2.99939083202193, "grad_norm": 0.40845179557800293, "learning_rate": 4.061119853799686e-09, "loss": 0.0338, "step": 59085 }, { "epoch": 2.9996446520127926, "grad_norm": 0.2870352864265442, "learning_rate": 2.36898658138315e-09, "loss": 0.0369, "step": 59090 }, { "epoch": 2.999898472003655, "grad_norm": 0.2551342248916626, "learning_rate": 6.768533089666142e-10, "loss": 0.0264, "step": 59095 }, { "epoch": 3.0, "eval_loss": 0.197622150182724, "eval_runtime": 1775.8365, "eval_samples_per_second": 70.389, "eval_steps_per_second": 2.2, "step": 59097 } ], "logging_steps": 5, "max_steps": 59097, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.1337388851021742e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }