{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 4118, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00024283632831471587, "grad_norm": 1.154969573020935, "learning_rate": 0.0001, "loss": 2.074, "step": 1 }, { "epoch": 0.00048567265662943174, "grad_norm": 0.5944371223449707, "learning_rate": 0.0001, "loss": 1.9917, "step": 2 }, { "epoch": 0.0007285089849441476, "grad_norm": 2.7935380935668945, "learning_rate": 0.0001, "loss": 2.0706, "step": 3 }, { "epoch": 0.0009713453132588635, "grad_norm": 1.0198408365249634, "learning_rate": 0.0001, "loss": 1.9375, "step": 4 }, { "epoch": 0.0012141816415735794, "grad_norm": 0.4888165295124054, "learning_rate": 0.0001, "loss": 1.8088, "step": 5 }, { "epoch": 0.0014570179698882952, "grad_norm": 0.5396842956542969, "learning_rate": 0.0001, "loss": 1.889, "step": 6 }, { "epoch": 0.001699854298203011, "grad_norm": 0.5880922079086304, "learning_rate": 0.0001, "loss": 2.0318, "step": 7 }, { "epoch": 0.001942690626517727, "grad_norm": 0.48738348484039307, "learning_rate": 0.0001, "loss": 2.0962, "step": 8 }, { "epoch": 0.002185526954832443, "grad_norm": 0.37848764657974243, "learning_rate": 0.0001, "loss": 1.8828, "step": 9 }, { "epoch": 0.0024283632831471587, "grad_norm": 0.47919952869415283, "learning_rate": 0.0001, "loss": 1.8523, "step": 10 }, { "epoch": 0.0026711996114618746, "grad_norm": 0.45462319254875183, "learning_rate": 0.0001, "loss": 1.6941, "step": 11 }, { "epoch": 0.0029140359397765905, "grad_norm": 0.4438392221927643, "learning_rate": 0.0001, "loss": 1.9583, "step": 12 }, { "epoch": 0.0031568722680913063, "grad_norm": 0.35694992542266846, "learning_rate": 0.0001, "loss": 1.8645, "step": 13 }, { "epoch": 0.003399708596406022, "grad_norm": 0.37169331312179565, "learning_rate": 0.0001, "loss": 1.8335, "step": 14 }, { "epoch": 0.003642544924720738, "grad_norm": 0.39208415150642395, "learning_rate": 0.0001, "loss": 1.9256, "step": 15 }, { "epoch": 0.003885381253035454, "grad_norm": 0.39922136068344116, "learning_rate": 0.0001, "loss": 1.921, "step": 16 }, { "epoch": 0.00412821758135017, "grad_norm": 0.360889732837677, "learning_rate": 0.0001, "loss": 1.772, "step": 17 }, { "epoch": 0.004371053909664886, "grad_norm": 0.3539641499519348, "learning_rate": 0.0001, "loss": 1.8403, "step": 18 }, { "epoch": 0.004613890237979602, "grad_norm": 0.3484066128730774, "learning_rate": 0.0001, "loss": 1.877, "step": 19 }, { "epoch": 0.0048567265662943174, "grad_norm": 0.36553072929382324, "learning_rate": 0.0001, "loss": 1.867, "step": 20 }, { "epoch": 0.005099562894609034, "grad_norm": 0.38572415709495544, "learning_rate": 0.0001, "loss": 2.0538, "step": 21 }, { "epoch": 0.005342399222923749, "grad_norm": 0.38361066579818726, "learning_rate": 0.0001, "loss": 1.9438, "step": 22 }, { "epoch": 0.0055852355512384655, "grad_norm": 0.38020607829093933, "learning_rate": 0.0001, "loss": 1.8955, "step": 23 }, { "epoch": 0.005828071879553181, "grad_norm": 0.37575146555900574, "learning_rate": 0.0001, "loss": 1.6813, "step": 24 }, { "epoch": 0.006070908207867897, "grad_norm": 0.3370920419692993, "learning_rate": 0.0001, "loss": 1.7244, "step": 25 }, { "epoch": 0.006313744536182613, "grad_norm": 0.38152262568473816, "learning_rate": 0.0001, "loss": 1.8836, "step": 26 }, { "epoch": 0.006556580864497329, "grad_norm": 0.3809744119644165, "learning_rate": 0.0001, "loss": 1.8553, "step": 27 }, { "epoch": 0.006799417192812044, "grad_norm": 0.3849543035030365, "learning_rate": 0.0001, "loss": 1.9847, "step": 28 }, { "epoch": 0.007042253521126761, "grad_norm": 0.3533986806869507, "learning_rate": 0.0001, "loss": 1.8386, "step": 29 }, { "epoch": 0.007285089849441476, "grad_norm": 0.364499568939209, "learning_rate": 0.0001, "loss": 1.9516, "step": 30 }, { "epoch": 0.0075279261777561925, "grad_norm": 0.3528093099594116, "learning_rate": 0.0001, "loss": 1.8593, "step": 31 }, { "epoch": 0.007770762506070908, "grad_norm": 0.3829546570777893, "learning_rate": 0.0001, "loss": 1.7068, "step": 32 }, { "epoch": 0.008013598834385623, "grad_norm": 0.3647037148475647, "learning_rate": 0.0001, "loss": 1.6717, "step": 33 }, { "epoch": 0.00825643516270034, "grad_norm": 0.35392895340919495, "learning_rate": 0.0001, "loss": 1.9651, "step": 34 }, { "epoch": 0.008499271491015056, "grad_norm": 0.3464745879173279, "learning_rate": 0.0001, "loss": 1.7915, "step": 35 }, { "epoch": 0.008742107819329771, "grad_norm": 0.34323614835739136, "learning_rate": 0.0001, "loss": 1.8433, "step": 36 }, { "epoch": 0.008984944147644487, "grad_norm": 0.3553076982498169, "learning_rate": 0.0001, "loss": 1.863, "step": 37 }, { "epoch": 0.009227780475959204, "grad_norm": 0.39482808113098145, "learning_rate": 0.0001, "loss": 2.0614, "step": 38 }, { "epoch": 0.00947061680427392, "grad_norm": 0.34027889370918274, "learning_rate": 0.0001, "loss": 1.7918, "step": 39 }, { "epoch": 0.009713453132588635, "grad_norm": 0.3214944005012512, "learning_rate": 0.0001, "loss": 1.8169, "step": 40 }, { "epoch": 0.00995628946090335, "grad_norm": 0.3573228716850281, "learning_rate": 0.0001, "loss": 1.8388, "step": 41 }, { "epoch": 0.010199125789218067, "grad_norm": 0.34565800428390503, "learning_rate": 0.0001, "loss": 1.6319, "step": 42 }, { "epoch": 0.010441962117532783, "grad_norm": 0.3430301249027252, "learning_rate": 0.0001, "loss": 1.9434, "step": 43 }, { "epoch": 0.010684798445847498, "grad_norm": 0.3680773675441742, "learning_rate": 0.0001, "loss": 1.9171, "step": 44 }, { "epoch": 0.010927634774162214, "grad_norm": 0.29709112644195557, "learning_rate": 0.0001, "loss": 1.6346, "step": 45 }, { "epoch": 0.011170471102476931, "grad_norm": 0.31409093737602234, "learning_rate": 0.0001, "loss": 1.8279, "step": 46 }, { "epoch": 0.011413307430791646, "grad_norm": 0.3398358225822449, "learning_rate": 0.0001, "loss": 2.0094, "step": 47 }, { "epoch": 0.011656143759106362, "grad_norm": 0.35688671469688416, "learning_rate": 0.0001, "loss": 1.9384, "step": 48 }, { "epoch": 0.011898980087421079, "grad_norm": 0.33628571033477783, "learning_rate": 0.0001, "loss": 1.8191, "step": 49 }, { "epoch": 0.012141816415735794, "grad_norm": 0.3309676945209503, "learning_rate": 0.0001, "loss": 1.8202, "step": 50 }, { "epoch": 0.01238465274405051, "grad_norm": 0.3198463022708893, "learning_rate": 0.0001, "loss": 1.7509, "step": 51 }, { "epoch": 0.012627489072365225, "grad_norm": 0.3255227208137512, "learning_rate": 0.0001, "loss": 1.7996, "step": 52 }, { "epoch": 0.012870325400679943, "grad_norm": 0.3298192322254181, "learning_rate": 0.0001, "loss": 1.8607, "step": 53 }, { "epoch": 0.013113161728994658, "grad_norm": 0.33618688583374023, "learning_rate": 0.0001, "loss": 1.7293, "step": 54 }, { "epoch": 0.013355998057309373, "grad_norm": 0.37747520208358765, "learning_rate": 0.0001, "loss": 2.0701, "step": 55 }, { "epoch": 0.013598834385624089, "grad_norm": 0.3369693458080292, "learning_rate": 0.0001, "loss": 1.728, "step": 56 }, { "epoch": 0.013841670713938806, "grad_norm": 0.34572532773017883, "learning_rate": 0.0001, "loss": 1.9147, "step": 57 }, { "epoch": 0.014084507042253521, "grad_norm": 0.3507564067840576, "learning_rate": 0.0001, "loss": 1.802, "step": 58 }, { "epoch": 0.014327343370568237, "grad_norm": 0.32496514916419983, "learning_rate": 0.0001, "loss": 1.8054, "step": 59 }, { "epoch": 0.014570179698882952, "grad_norm": 0.31435415148735046, "learning_rate": 0.0001, "loss": 1.764, "step": 60 }, { "epoch": 0.01481301602719767, "grad_norm": 0.3256989121437073, "learning_rate": 0.0001, "loss": 1.76, "step": 61 }, { "epoch": 0.015055852355512385, "grad_norm": 0.346720814704895, "learning_rate": 0.0001, "loss": 1.8155, "step": 62 }, { "epoch": 0.0152986886838271, "grad_norm": 0.3564370274543762, "learning_rate": 0.0001, "loss": 1.8645, "step": 63 }, { "epoch": 0.015541525012141816, "grad_norm": 0.3128412663936615, "learning_rate": 0.0001, "loss": 1.7537, "step": 64 }, { "epoch": 0.01578436134045653, "grad_norm": 0.3623558580875397, "learning_rate": 0.0001, "loss": 1.8776, "step": 65 }, { "epoch": 0.016027197668771247, "grad_norm": 0.34936532378196716, "learning_rate": 0.0001, "loss": 1.7606, "step": 66 }, { "epoch": 0.016270033997085966, "grad_norm": 0.32261672616004944, "learning_rate": 0.0001, "loss": 1.8787, "step": 67 }, { "epoch": 0.01651287032540068, "grad_norm": 0.3463958203792572, "learning_rate": 0.0001, "loss": 1.8079, "step": 68 }, { "epoch": 0.016755706653715396, "grad_norm": 0.3851228654384613, "learning_rate": 0.0001, "loss": 2.0075, "step": 69 }, { "epoch": 0.016998542982030112, "grad_norm": 0.347501277923584, "learning_rate": 0.0001, "loss": 1.8561, "step": 70 }, { "epoch": 0.017241379310344827, "grad_norm": 0.33600863814353943, "learning_rate": 0.0001, "loss": 1.9005, "step": 71 }, { "epoch": 0.017484215638659543, "grad_norm": 0.3174665868282318, "learning_rate": 0.0001, "loss": 1.7522, "step": 72 }, { "epoch": 0.017727051966974258, "grad_norm": 0.3470480740070343, "learning_rate": 0.0001, "loss": 1.7866, "step": 73 }, { "epoch": 0.017969888295288974, "grad_norm": 0.35113289952278137, "learning_rate": 0.0001, "loss": 1.9659, "step": 74 }, { "epoch": 0.018212724623603693, "grad_norm": 0.35351213812828064, "learning_rate": 0.0001, "loss": 2.0076, "step": 75 }, { "epoch": 0.018455560951918408, "grad_norm": 0.3192991614341736, "learning_rate": 0.0001, "loss": 1.7576, "step": 76 }, { "epoch": 0.018698397280233123, "grad_norm": 0.3431519567966461, "learning_rate": 0.0001, "loss": 1.7957, "step": 77 }, { "epoch": 0.01894123360854784, "grad_norm": 0.3162384033203125, "learning_rate": 0.0001, "loss": 1.6776, "step": 78 }, { "epoch": 0.019184069936862554, "grad_norm": 0.31073257327079773, "learning_rate": 0.0001, "loss": 1.7227, "step": 79 }, { "epoch": 0.01942690626517727, "grad_norm": 0.319278359413147, "learning_rate": 0.0001, "loss": 1.7953, "step": 80 }, { "epoch": 0.019669742593491985, "grad_norm": 0.34543249011039734, "learning_rate": 0.0001, "loss": 1.8845, "step": 81 }, { "epoch": 0.0199125789218067, "grad_norm": 0.35296866297721863, "learning_rate": 0.0001, "loss": 2.0548, "step": 82 }, { "epoch": 0.02015541525012142, "grad_norm": 0.31435465812683105, "learning_rate": 0.0001, "loss": 1.7889, "step": 83 }, { "epoch": 0.020398251578436135, "grad_norm": 0.34530073404312134, "learning_rate": 0.0001, "loss": 1.8978, "step": 84 }, { "epoch": 0.02064108790675085, "grad_norm": 0.34861063957214355, "learning_rate": 0.0001, "loss": 1.7865, "step": 85 }, { "epoch": 0.020883924235065566, "grad_norm": 0.3257801830768585, "learning_rate": 0.0001, "loss": 1.8877, "step": 86 }, { "epoch": 0.02112676056338028, "grad_norm": 0.33134183287620544, "learning_rate": 0.0001, "loss": 1.9007, "step": 87 }, { "epoch": 0.021369596891694997, "grad_norm": 0.3427002727985382, "learning_rate": 0.0001, "loss": 1.8455, "step": 88 }, { "epoch": 0.021612433220009712, "grad_norm": 0.3223351836204529, "learning_rate": 0.0001, "loss": 1.8392, "step": 89 }, { "epoch": 0.021855269548324428, "grad_norm": 0.31952205300331116, "learning_rate": 0.0001, "loss": 1.805, "step": 90 }, { "epoch": 0.022098105876639147, "grad_norm": 0.33756670355796814, "learning_rate": 0.0001, "loss": 1.7672, "step": 91 }, { "epoch": 0.022340942204953862, "grad_norm": 0.32219502329826355, "learning_rate": 0.0001, "loss": 1.7457, "step": 92 }, { "epoch": 0.022583778533268577, "grad_norm": 0.3273763060569763, "learning_rate": 0.0001, "loss": 1.9236, "step": 93 }, { "epoch": 0.022826614861583293, "grad_norm": 0.34903284907341003, "learning_rate": 0.0001, "loss": 1.9005, "step": 94 }, { "epoch": 0.02306945118989801, "grad_norm": 0.31824183464050293, "learning_rate": 0.0001, "loss": 1.7147, "step": 95 }, { "epoch": 0.023312287518212724, "grad_norm": 0.3329814076423645, "learning_rate": 0.0001, "loss": 1.8056, "step": 96 }, { "epoch": 0.02355512384652744, "grad_norm": 0.3619345426559448, "learning_rate": 0.0001, "loss": 1.9174, "step": 97 }, { "epoch": 0.023797960174842158, "grad_norm": 0.32598811388015747, "learning_rate": 0.0001, "loss": 1.6829, "step": 98 }, { "epoch": 0.024040796503156873, "grad_norm": 0.33037275075912476, "learning_rate": 0.0001, "loss": 1.8287, "step": 99 }, { "epoch": 0.02428363283147159, "grad_norm": 0.3756055533885956, "learning_rate": 0.0001, "loss": 1.907, "step": 100 }, { "epoch": 0.024526469159786304, "grad_norm": 0.34237828850746155, "learning_rate": 0.0001, "loss": 1.8935, "step": 101 }, { "epoch": 0.02476930548810102, "grad_norm": 0.3579355478286743, "learning_rate": 0.0001, "loss": 1.8596, "step": 102 }, { "epoch": 0.025012141816415735, "grad_norm": 0.3444015085697174, "learning_rate": 0.0001, "loss": 1.8065, "step": 103 }, { "epoch": 0.02525497814473045, "grad_norm": 0.3132593333721161, "learning_rate": 0.0001, "loss": 1.4983, "step": 104 }, { "epoch": 0.025497814473045166, "grad_norm": 0.3273251950740814, "learning_rate": 0.0001, "loss": 1.7559, "step": 105 }, { "epoch": 0.025740650801359885, "grad_norm": 0.34056222438812256, "learning_rate": 0.0001, "loss": 1.7862, "step": 106 }, { "epoch": 0.0259834871296746, "grad_norm": 0.36062437295913696, "learning_rate": 0.0001, "loss": 2.0101, "step": 107 }, { "epoch": 0.026226323457989316, "grad_norm": 0.30914780497550964, "learning_rate": 0.0001, "loss": 1.7174, "step": 108 }, { "epoch": 0.02646915978630403, "grad_norm": 0.34357085824012756, "learning_rate": 0.0001, "loss": 1.9291, "step": 109 }, { "epoch": 0.026711996114618747, "grad_norm": 0.32527053356170654, "learning_rate": 0.0001, "loss": 1.7997, "step": 110 }, { "epoch": 0.026954832442933462, "grad_norm": 0.3142812252044678, "learning_rate": 0.0001, "loss": 1.697, "step": 111 }, { "epoch": 0.027197668771248178, "grad_norm": 0.32341161370277405, "learning_rate": 0.0001, "loss": 1.8289, "step": 112 }, { "epoch": 0.027440505099562893, "grad_norm": 0.3451857566833496, "learning_rate": 0.0001, "loss": 1.8117, "step": 113 }, { "epoch": 0.027683341427877612, "grad_norm": 0.3279164433479309, "learning_rate": 0.0001, "loss": 1.6973, "step": 114 }, { "epoch": 0.027926177756192327, "grad_norm": 0.31405824422836304, "learning_rate": 0.0001, "loss": 1.8133, "step": 115 }, { "epoch": 0.028169014084507043, "grad_norm": 0.3456348478794098, "learning_rate": 0.0001, "loss": 1.7479, "step": 116 }, { "epoch": 0.02841185041282176, "grad_norm": 0.31980592012405396, "learning_rate": 0.0001, "loss": 1.6188, "step": 117 }, { "epoch": 0.028654686741136474, "grad_norm": 0.3357499837875366, "learning_rate": 0.0001, "loss": 1.8281, "step": 118 }, { "epoch": 0.02889752306945119, "grad_norm": 0.32338961958885193, "learning_rate": 0.0001, "loss": 1.6977, "step": 119 }, { "epoch": 0.029140359397765905, "grad_norm": 0.3575379550457001, "learning_rate": 0.0001, "loss": 1.8371, "step": 120 }, { "epoch": 0.02938319572608062, "grad_norm": 0.35191610455513, "learning_rate": 0.0001, "loss": 1.8743, "step": 121 }, { "epoch": 0.02962603205439534, "grad_norm": 0.3267911374568939, "learning_rate": 0.0001, "loss": 1.8122, "step": 122 }, { "epoch": 0.029868868382710054, "grad_norm": 0.37161776423454285, "learning_rate": 0.0001, "loss": 1.8069, "step": 123 }, { "epoch": 0.03011170471102477, "grad_norm": 0.3311915397644043, "learning_rate": 0.0001, "loss": 1.8613, "step": 124 }, { "epoch": 0.030354541039339485, "grad_norm": 0.34188058972358704, "learning_rate": 0.0001, "loss": 1.9314, "step": 125 }, { "epoch": 0.0305973773676542, "grad_norm": 0.3299490213394165, "learning_rate": 0.0001, "loss": 1.737, "step": 126 }, { "epoch": 0.030840213695968916, "grad_norm": 0.3847339451313019, "learning_rate": 0.0001, "loss": 1.8562, "step": 127 }, { "epoch": 0.03108305002428363, "grad_norm": 0.34246835112571716, "learning_rate": 0.0001, "loss": 1.9026, "step": 128 }, { "epoch": 0.03132588635259835, "grad_norm": 0.33350276947021484, "learning_rate": 0.0001, "loss": 1.7956, "step": 129 }, { "epoch": 0.03156872268091306, "grad_norm": 0.3377183675765991, "learning_rate": 0.0001, "loss": 1.9385, "step": 130 }, { "epoch": 0.03181155900922778, "grad_norm": 0.36835625767707825, "learning_rate": 0.0001, "loss": 1.9134, "step": 131 }, { "epoch": 0.03205439533754249, "grad_norm": 0.3150362968444824, "learning_rate": 0.0001, "loss": 1.8216, "step": 132 }, { "epoch": 0.03229723166585721, "grad_norm": 0.31782564520835876, "learning_rate": 0.0001, "loss": 1.7913, "step": 133 }, { "epoch": 0.03254006799417193, "grad_norm": 0.3307948112487793, "learning_rate": 0.0001, "loss": 1.9733, "step": 134 }, { "epoch": 0.03278290432248664, "grad_norm": 0.3569931089878082, "learning_rate": 0.0001, "loss": 1.7413, "step": 135 }, { "epoch": 0.03302574065080136, "grad_norm": 0.325895756483078, "learning_rate": 0.0001, "loss": 1.73, "step": 136 }, { "epoch": 0.033268576979116074, "grad_norm": 0.33818817138671875, "learning_rate": 0.0001, "loss": 1.9272, "step": 137 }, { "epoch": 0.03351141330743079, "grad_norm": 0.3144405782222748, "learning_rate": 0.0001, "loss": 1.6437, "step": 138 }, { "epoch": 0.033754249635745505, "grad_norm": 0.3207385540008545, "learning_rate": 0.0001, "loss": 1.7101, "step": 139 }, { "epoch": 0.033997085964060224, "grad_norm": 0.34677934646606445, "learning_rate": 0.0001, "loss": 1.7878, "step": 140 }, { "epoch": 0.03423992229237494, "grad_norm": 0.3334146738052368, "learning_rate": 0.0001, "loss": 1.9245, "step": 141 }, { "epoch": 0.034482758620689655, "grad_norm": 0.3302057087421417, "learning_rate": 0.0001, "loss": 1.8471, "step": 142 }, { "epoch": 0.034725594949004374, "grad_norm": 0.3368750214576721, "learning_rate": 0.0001, "loss": 1.8592, "step": 143 }, { "epoch": 0.034968431277319086, "grad_norm": 0.3442443311214447, "learning_rate": 0.0001, "loss": 1.7756, "step": 144 }, { "epoch": 0.035211267605633804, "grad_norm": 0.311424195766449, "learning_rate": 0.0001, "loss": 1.6777, "step": 145 }, { "epoch": 0.035454103933948516, "grad_norm": 0.3602541387081146, "learning_rate": 0.0001, "loss": 1.9142, "step": 146 }, { "epoch": 0.035696940262263235, "grad_norm": 0.34334802627563477, "learning_rate": 0.0001, "loss": 1.8417, "step": 147 }, { "epoch": 0.03593977659057795, "grad_norm": 0.3241439461708069, "learning_rate": 0.0001, "loss": 1.7917, "step": 148 }, { "epoch": 0.036182612918892666, "grad_norm": 0.3447269797325134, "learning_rate": 0.0001, "loss": 1.9037, "step": 149 }, { "epoch": 0.036425449247207385, "grad_norm": 0.3451700508594513, "learning_rate": 0.0001, "loss": 1.9233, "step": 150 }, { "epoch": 0.0366682855755221, "grad_norm": 0.3239392638206482, "learning_rate": 0.0001, "loss": 1.7476, "step": 151 }, { "epoch": 0.036911121903836816, "grad_norm": 0.3280225694179535, "learning_rate": 0.0001, "loss": 1.6909, "step": 152 }, { "epoch": 0.03715395823215153, "grad_norm": 0.304781436920166, "learning_rate": 0.0001, "loss": 1.7175, "step": 153 }, { "epoch": 0.03739679456046625, "grad_norm": 0.325508177280426, "learning_rate": 0.0001, "loss": 1.8113, "step": 154 }, { "epoch": 0.03763963088878096, "grad_norm": 0.35371872782707214, "learning_rate": 0.0001, "loss": 1.7723, "step": 155 }, { "epoch": 0.03788246721709568, "grad_norm": 0.3424810469150543, "learning_rate": 0.0001, "loss": 1.8947, "step": 156 }, { "epoch": 0.0381253035454104, "grad_norm": 0.32752588391304016, "learning_rate": 0.0001, "loss": 1.6501, "step": 157 }, { "epoch": 0.03836813987372511, "grad_norm": 0.345795214176178, "learning_rate": 0.0001, "loss": 1.9681, "step": 158 }, { "epoch": 0.03861097620203983, "grad_norm": 0.33940884470939636, "learning_rate": 0.0001, "loss": 1.7907, "step": 159 }, { "epoch": 0.03885381253035454, "grad_norm": 0.3449257016181946, "learning_rate": 0.0001, "loss": 1.8764, "step": 160 }, { "epoch": 0.03909664885866926, "grad_norm": 0.3274487257003784, "learning_rate": 0.0001, "loss": 1.7455, "step": 161 }, { "epoch": 0.03933948518698397, "grad_norm": 0.3211442828178406, "learning_rate": 0.0001, "loss": 1.7798, "step": 162 }, { "epoch": 0.03958232151529869, "grad_norm": 0.3389868140220642, "learning_rate": 0.0001, "loss": 1.9089, "step": 163 }, { "epoch": 0.0398251578436134, "grad_norm": 0.3453965485095978, "learning_rate": 0.0001, "loss": 1.7192, "step": 164 }, { "epoch": 0.04006799417192812, "grad_norm": 0.3375605344772339, "learning_rate": 0.0001, "loss": 1.8576, "step": 165 }, { "epoch": 0.04031083050024284, "grad_norm": 0.3315996527671814, "learning_rate": 0.0001, "loss": 1.8289, "step": 166 }, { "epoch": 0.04055366682855755, "grad_norm": 0.3440003991127014, "learning_rate": 0.0001, "loss": 1.8337, "step": 167 }, { "epoch": 0.04079650315687227, "grad_norm": 0.3501895070075989, "learning_rate": 0.0001, "loss": 1.8386, "step": 168 }, { "epoch": 0.04103933948518698, "grad_norm": 0.3258925974369049, "learning_rate": 0.0001, "loss": 1.8062, "step": 169 }, { "epoch": 0.0412821758135017, "grad_norm": 0.3586982488632202, "learning_rate": 0.0001, "loss": 1.7095, "step": 170 }, { "epoch": 0.04152501214181641, "grad_norm": 0.33566269278526306, "learning_rate": 0.0001, "loss": 1.8377, "step": 171 }, { "epoch": 0.04176784847013113, "grad_norm": 0.3345046639442444, "learning_rate": 0.0001, "loss": 1.7932, "step": 172 }, { "epoch": 0.04201068479844585, "grad_norm": 0.35495448112487793, "learning_rate": 0.0001, "loss": 1.971, "step": 173 }, { "epoch": 0.04225352112676056, "grad_norm": 0.3234632611274719, "learning_rate": 0.0001, "loss": 1.6855, "step": 174 }, { "epoch": 0.04249635745507528, "grad_norm": 0.33416181802749634, "learning_rate": 0.0001, "loss": 1.7332, "step": 175 }, { "epoch": 0.042739193783389993, "grad_norm": 0.3306339979171753, "learning_rate": 0.0001, "loss": 1.8605, "step": 176 }, { "epoch": 0.04298203011170471, "grad_norm": 0.3303837478160858, "learning_rate": 0.0001, "loss": 1.7574, "step": 177 }, { "epoch": 0.043224866440019424, "grad_norm": 0.3273058235645294, "learning_rate": 0.0001, "loss": 1.7745, "step": 178 }, { "epoch": 0.04346770276833414, "grad_norm": 0.3233434557914734, "learning_rate": 0.0001, "loss": 1.6972, "step": 179 }, { "epoch": 0.043710539096648855, "grad_norm": 0.3248269855976105, "learning_rate": 0.0001, "loss": 1.7851, "step": 180 }, { "epoch": 0.043953375424963574, "grad_norm": 0.3396579921245575, "learning_rate": 0.0001, "loss": 1.85, "step": 181 }, { "epoch": 0.04419621175327829, "grad_norm": 0.32688796520233154, "learning_rate": 0.0001, "loss": 1.8739, "step": 182 }, { "epoch": 0.044439048081593005, "grad_norm": 0.3261704444885254, "learning_rate": 0.0001, "loss": 1.8259, "step": 183 }, { "epoch": 0.044681884409907724, "grad_norm": 0.31057843565940857, "learning_rate": 0.0001, "loss": 1.4949, "step": 184 }, { "epoch": 0.044924720738222436, "grad_norm": 0.32633113861083984, "learning_rate": 0.0001, "loss": 1.8141, "step": 185 }, { "epoch": 0.045167557066537155, "grad_norm": 0.33019405603408813, "learning_rate": 0.0001, "loss": 1.9112, "step": 186 }, { "epoch": 0.04541039339485187, "grad_norm": 0.3250356614589691, "learning_rate": 0.0001, "loss": 1.6517, "step": 187 }, { "epoch": 0.045653229723166586, "grad_norm": 0.3314146101474762, "learning_rate": 0.0001, "loss": 1.774, "step": 188 }, { "epoch": 0.045896066051481305, "grad_norm": 0.31466880440711975, "learning_rate": 0.0001, "loss": 1.6895, "step": 189 }, { "epoch": 0.04613890237979602, "grad_norm": 0.32345980405807495, "learning_rate": 0.0001, "loss": 1.6928, "step": 190 }, { "epoch": 0.046381738708110735, "grad_norm": 0.3433418273925781, "learning_rate": 0.0001, "loss": 1.8835, "step": 191 }, { "epoch": 0.04662457503642545, "grad_norm": 0.31912872195243835, "learning_rate": 0.0001, "loss": 1.5716, "step": 192 }, { "epoch": 0.046867411364740166, "grad_norm": 0.32511550188064575, "learning_rate": 0.0001, "loss": 1.7293, "step": 193 }, { "epoch": 0.04711024769305488, "grad_norm": 0.35534897446632385, "learning_rate": 0.0001, "loss": 1.8179, "step": 194 }, { "epoch": 0.0473530840213696, "grad_norm": 0.408071368932724, "learning_rate": 0.0001, "loss": 1.6752, "step": 195 }, { "epoch": 0.047595920349684316, "grad_norm": 0.349654883146286, "learning_rate": 0.0001, "loss": 1.805, "step": 196 }, { "epoch": 0.04783875667799903, "grad_norm": 0.351043701171875, "learning_rate": 0.0001, "loss": 1.7478, "step": 197 }, { "epoch": 0.04808159300631375, "grad_norm": 0.33979853987693787, "learning_rate": 0.0001, "loss": 1.778, "step": 198 }, { "epoch": 0.04832442933462846, "grad_norm": 0.32497379183769226, "learning_rate": 0.0001, "loss": 1.7174, "step": 199 }, { "epoch": 0.04856726566294318, "grad_norm": 0.3525506854057312, "learning_rate": 0.0001, "loss": 1.8249, "step": 200 }, { "epoch": 0.04881010199125789, "grad_norm": 0.347342312335968, "learning_rate": 0.0001, "loss": 1.7451, "step": 201 }, { "epoch": 0.04905293831957261, "grad_norm": 0.3493291139602661, "learning_rate": 0.0001, "loss": 1.9292, "step": 202 }, { "epoch": 0.04929577464788732, "grad_norm": 0.33886533975601196, "learning_rate": 0.0001, "loss": 1.7692, "step": 203 }, { "epoch": 0.04953861097620204, "grad_norm": 0.3711925148963928, "learning_rate": 0.0001, "loss": 2.0593, "step": 204 }, { "epoch": 0.04978144730451676, "grad_norm": 0.35785242915153503, "learning_rate": 0.0001, "loss": 1.8335, "step": 205 }, { "epoch": 0.05002428363283147, "grad_norm": 0.33297452330589294, "learning_rate": 0.0001, "loss": 1.6759, "step": 206 }, { "epoch": 0.05026711996114619, "grad_norm": 0.3229871988296509, "learning_rate": 0.0001, "loss": 1.767, "step": 207 }, { "epoch": 0.0505099562894609, "grad_norm": 0.328980952501297, "learning_rate": 0.0001, "loss": 1.7328, "step": 208 }, { "epoch": 0.05075279261777562, "grad_norm": 0.324479341506958, "learning_rate": 0.0001, "loss": 1.7687, "step": 209 }, { "epoch": 0.05099562894609033, "grad_norm": 0.33051010966300964, "learning_rate": 0.0001, "loss": 1.7712, "step": 210 }, { "epoch": 0.05123846527440505, "grad_norm": 0.32872438430786133, "learning_rate": 0.0001, "loss": 1.9356, "step": 211 }, { "epoch": 0.05148130160271977, "grad_norm": 0.314836710691452, "learning_rate": 0.0001, "loss": 1.6447, "step": 212 }, { "epoch": 0.05172413793103448, "grad_norm": 0.3571523129940033, "learning_rate": 0.0001, "loss": 1.8033, "step": 213 }, { "epoch": 0.0519669742593492, "grad_norm": 0.34860020875930786, "learning_rate": 0.0001, "loss": 1.7296, "step": 214 }, { "epoch": 0.05220981058766391, "grad_norm": 0.33273938298225403, "learning_rate": 0.0001, "loss": 1.8285, "step": 215 }, { "epoch": 0.05245264691597863, "grad_norm": 0.3613104522228241, "learning_rate": 0.0001, "loss": 1.9188, "step": 216 }, { "epoch": 0.052695483244293344, "grad_norm": 0.33187010884284973, "learning_rate": 0.0001, "loss": 1.8428, "step": 217 }, { "epoch": 0.05293831957260806, "grad_norm": 0.34256044030189514, "learning_rate": 0.0001, "loss": 1.9037, "step": 218 }, { "epoch": 0.053181155900922775, "grad_norm": 0.34337806701660156, "learning_rate": 0.0001, "loss": 1.7928, "step": 219 }, { "epoch": 0.053423992229237494, "grad_norm": 0.3422665596008301, "learning_rate": 0.0001, "loss": 1.9033, "step": 220 }, { "epoch": 0.05366682855755221, "grad_norm": 0.3296110928058624, "learning_rate": 0.0001, "loss": 1.8822, "step": 221 }, { "epoch": 0.053909664885866924, "grad_norm": 0.3197755515575409, "learning_rate": 0.0001, "loss": 1.6152, "step": 222 }, { "epoch": 0.05415250121418164, "grad_norm": 0.3136097490787506, "learning_rate": 0.0001, "loss": 1.6212, "step": 223 }, { "epoch": 0.054395337542496355, "grad_norm": 0.32114914059638977, "learning_rate": 0.0001, "loss": 1.8609, "step": 224 }, { "epoch": 0.054638173870811074, "grad_norm": 0.3307587206363678, "learning_rate": 0.0001, "loss": 1.6285, "step": 225 }, { "epoch": 0.054881010199125786, "grad_norm": 0.3631536364555359, "learning_rate": 0.0001, "loss": 1.8162, "step": 226 }, { "epoch": 0.055123846527440505, "grad_norm": 0.3194067180156708, "learning_rate": 0.0001, "loss": 1.7193, "step": 227 }, { "epoch": 0.055366682855755224, "grad_norm": 0.32269614934921265, "learning_rate": 0.0001, "loss": 1.6614, "step": 228 }, { "epoch": 0.055609519184069936, "grad_norm": 0.3524293303489685, "learning_rate": 0.0001, "loss": 1.7887, "step": 229 }, { "epoch": 0.055852355512384655, "grad_norm": 0.346718966960907, "learning_rate": 0.0001, "loss": 1.837, "step": 230 }, { "epoch": 0.05609519184069937, "grad_norm": 0.33732110261917114, "learning_rate": 0.0001, "loss": 1.7503, "step": 231 }, { "epoch": 0.056338028169014086, "grad_norm": 0.3578680753707886, "learning_rate": 0.0001, "loss": 1.9632, "step": 232 }, { "epoch": 0.0565808644973288, "grad_norm": 0.29416367411613464, "learning_rate": 0.0001, "loss": 1.5471, "step": 233 }, { "epoch": 0.05682370082564352, "grad_norm": 0.33240172266960144, "learning_rate": 0.0001, "loss": 1.8694, "step": 234 }, { "epoch": 0.057066537153958236, "grad_norm": 0.2979382574558258, "learning_rate": 0.0001, "loss": 1.5838, "step": 235 }, { "epoch": 0.05730937348227295, "grad_norm": 0.36220940947532654, "learning_rate": 0.0001, "loss": 1.9537, "step": 236 }, { "epoch": 0.057552209810587666, "grad_norm": 0.3136692941188812, "learning_rate": 0.0001, "loss": 1.8313, "step": 237 }, { "epoch": 0.05779504613890238, "grad_norm": 0.323866069316864, "learning_rate": 0.0001, "loss": 1.881, "step": 238 }, { "epoch": 0.0580378824672171, "grad_norm": 0.32461974024772644, "learning_rate": 0.0001, "loss": 1.6583, "step": 239 }, { "epoch": 0.05828071879553181, "grad_norm": 0.34341827034950256, "learning_rate": 0.0001, "loss": 1.8666, "step": 240 }, { "epoch": 0.05852355512384653, "grad_norm": 0.3396712839603424, "learning_rate": 0.0001, "loss": 1.8707, "step": 241 }, { "epoch": 0.05876639145216124, "grad_norm": 0.32624468207359314, "learning_rate": 0.0001, "loss": 1.8068, "step": 242 }, { "epoch": 0.05900922778047596, "grad_norm": 0.36357343196868896, "learning_rate": 0.0001, "loss": 1.9051, "step": 243 }, { "epoch": 0.05925206410879068, "grad_norm": 0.3347633481025696, "learning_rate": 0.0001, "loss": 1.7729, "step": 244 }, { "epoch": 0.05949490043710539, "grad_norm": 0.33824750781059265, "learning_rate": 0.0001, "loss": 1.8523, "step": 245 }, { "epoch": 0.05973773676542011, "grad_norm": 0.3478322923183441, "learning_rate": 0.0001, "loss": 1.8405, "step": 246 }, { "epoch": 0.05998057309373482, "grad_norm": 0.33303165435791016, "learning_rate": 0.0001, "loss": 1.78, "step": 247 }, { "epoch": 0.06022340942204954, "grad_norm": 0.319014310836792, "learning_rate": 0.0001, "loss": 1.7309, "step": 248 }, { "epoch": 0.06046624575036425, "grad_norm": 0.3302060067653656, "learning_rate": 0.0001, "loss": 1.7848, "step": 249 }, { "epoch": 0.06070908207867897, "grad_norm": 0.3306678831577301, "learning_rate": 0.0001, "loss": 1.7027, "step": 250 }, { "epoch": 0.06095191840699369, "grad_norm": 0.3561919629573822, "learning_rate": 0.0001, "loss": 1.8096, "step": 251 }, { "epoch": 0.0611947547353084, "grad_norm": 0.3397621214389801, "learning_rate": 0.0001, "loss": 1.8675, "step": 252 }, { "epoch": 0.06143759106362312, "grad_norm": 0.3315463066101074, "learning_rate": 0.0001, "loss": 1.6153, "step": 253 }, { "epoch": 0.06168042739193783, "grad_norm": 0.33696696162223816, "learning_rate": 0.0001, "loss": 1.8535, "step": 254 }, { "epoch": 0.06192326372025255, "grad_norm": 0.33258795738220215, "learning_rate": 0.0001, "loss": 1.92, "step": 255 }, { "epoch": 0.06216610004856726, "grad_norm": 0.3105964660644531, "learning_rate": 0.0001, "loss": 1.6666, "step": 256 }, { "epoch": 0.06240893637688198, "grad_norm": 0.3291695713996887, "learning_rate": 0.0001, "loss": 1.7214, "step": 257 }, { "epoch": 0.0626517727051967, "grad_norm": 0.3137649893760681, "learning_rate": 0.0001, "loss": 1.7208, "step": 258 }, { "epoch": 0.06289460903351142, "grad_norm": 0.32082751393318176, "learning_rate": 0.0001, "loss": 1.6783, "step": 259 }, { "epoch": 0.06313744536182612, "grad_norm": 0.33842983841896057, "learning_rate": 0.0001, "loss": 1.8637, "step": 260 }, { "epoch": 0.06338028169014084, "grad_norm": 0.31122681498527527, "learning_rate": 0.0001, "loss": 1.6713, "step": 261 }, { "epoch": 0.06362311801845556, "grad_norm": 0.3500661253929138, "learning_rate": 0.0001, "loss": 1.9031, "step": 262 }, { "epoch": 0.06386595434677028, "grad_norm": 0.3374018371105194, "learning_rate": 0.0001, "loss": 1.7129, "step": 263 }, { "epoch": 0.06410879067508499, "grad_norm": 0.3464416563510895, "learning_rate": 0.0001, "loss": 1.8661, "step": 264 }, { "epoch": 0.0643516270033997, "grad_norm": 0.34645119309425354, "learning_rate": 0.0001, "loss": 1.7762, "step": 265 }, { "epoch": 0.06459446333171442, "grad_norm": 0.3312506079673767, "learning_rate": 0.0001, "loss": 1.7213, "step": 266 }, { "epoch": 0.06483729966002914, "grad_norm": 0.33370789885520935, "learning_rate": 0.0001, "loss": 1.8174, "step": 267 }, { "epoch": 0.06508013598834386, "grad_norm": 0.36095327138900757, "learning_rate": 0.0001, "loss": 1.864, "step": 268 }, { "epoch": 0.06532297231665857, "grad_norm": 0.35194554924964905, "learning_rate": 0.0001, "loss": 1.9681, "step": 269 }, { "epoch": 0.06556580864497329, "grad_norm": 0.321267306804657, "learning_rate": 0.0001, "loss": 1.8207, "step": 270 }, { "epoch": 0.065808644973288, "grad_norm": 0.3664778769016266, "learning_rate": 0.0001, "loss": 1.8354, "step": 271 }, { "epoch": 0.06605148130160272, "grad_norm": 0.37435176968574524, "learning_rate": 0.0001, "loss": 1.8364, "step": 272 }, { "epoch": 0.06629431762991743, "grad_norm": 0.352040559053421, "learning_rate": 0.0001, "loss": 1.7374, "step": 273 }, { "epoch": 0.06653715395823215, "grad_norm": 0.3306851089000702, "learning_rate": 0.0001, "loss": 1.6247, "step": 274 }, { "epoch": 0.06677999028654687, "grad_norm": 0.3537105321884155, "learning_rate": 0.0001, "loss": 2.0477, "step": 275 }, { "epoch": 0.06702282661486159, "grad_norm": 0.33372071385383606, "learning_rate": 0.0001, "loss": 1.6669, "step": 276 }, { "epoch": 0.0672656629431763, "grad_norm": 0.3090078830718994, "learning_rate": 0.0001, "loss": 1.6595, "step": 277 }, { "epoch": 0.06750849927149101, "grad_norm": 0.3390367031097412, "learning_rate": 0.0001, "loss": 1.8347, "step": 278 }, { "epoch": 0.06775133559980573, "grad_norm": 0.34644871950149536, "learning_rate": 0.0001, "loss": 1.7536, "step": 279 }, { "epoch": 0.06799417192812045, "grad_norm": 0.3659220337867737, "learning_rate": 0.0001, "loss": 1.9978, "step": 280 }, { "epoch": 0.06823700825643517, "grad_norm": 0.3907443881034851, "learning_rate": 0.0001, "loss": 1.7997, "step": 281 }, { "epoch": 0.06847984458474989, "grad_norm": 0.3244320750236511, "learning_rate": 0.0001, "loss": 1.7634, "step": 282 }, { "epoch": 0.06872268091306459, "grad_norm": 0.3205203115940094, "learning_rate": 0.0001, "loss": 1.7509, "step": 283 }, { "epoch": 0.06896551724137931, "grad_norm": 0.3349047303199768, "learning_rate": 0.0001, "loss": 1.8051, "step": 284 }, { "epoch": 0.06920835356969403, "grad_norm": 0.3221355080604553, "learning_rate": 0.0001, "loss": 1.7895, "step": 285 }, { "epoch": 0.06945118989800875, "grad_norm": 0.3335476517677307, "learning_rate": 0.0001, "loss": 1.6604, "step": 286 }, { "epoch": 0.06969402622632345, "grad_norm": 0.34389203786849976, "learning_rate": 0.0001, "loss": 1.8544, "step": 287 }, { "epoch": 0.06993686255463817, "grad_norm": 0.3776070773601532, "learning_rate": 0.0001, "loss": 2.0003, "step": 288 }, { "epoch": 0.07017969888295289, "grad_norm": 0.3268282115459442, "learning_rate": 0.0001, "loss": 1.8411, "step": 289 }, { "epoch": 0.07042253521126761, "grad_norm": 0.33194541931152344, "learning_rate": 0.0001, "loss": 1.6502, "step": 290 }, { "epoch": 0.07066537153958233, "grad_norm": 0.3324213922023773, "learning_rate": 0.0001, "loss": 1.8558, "step": 291 }, { "epoch": 0.07090820786789703, "grad_norm": 0.3163861930370331, "learning_rate": 0.0001, "loss": 1.7196, "step": 292 }, { "epoch": 0.07115104419621175, "grad_norm": 0.3342863917350769, "learning_rate": 0.0001, "loss": 1.7527, "step": 293 }, { "epoch": 0.07139388052452647, "grad_norm": 0.32636377215385437, "learning_rate": 0.0001, "loss": 1.7561, "step": 294 }, { "epoch": 0.07163671685284119, "grad_norm": 0.3220379650592804, "learning_rate": 0.0001, "loss": 1.8233, "step": 295 }, { "epoch": 0.0718795531811559, "grad_norm": 0.3426109552383423, "learning_rate": 0.0001, "loss": 1.772, "step": 296 }, { "epoch": 0.07212238950947061, "grad_norm": 0.31494760513305664, "learning_rate": 0.0001, "loss": 1.71, "step": 297 }, { "epoch": 0.07236522583778533, "grad_norm": 0.3238487243652344, "learning_rate": 0.0001, "loss": 1.7168, "step": 298 }, { "epoch": 0.07260806216610005, "grad_norm": 0.34287282824516296, "learning_rate": 0.0001, "loss": 1.7304, "step": 299 }, { "epoch": 0.07285089849441477, "grad_norm": 0.3240461051464081, "learning_rate": 0.0001, "loss": 1.7774, "step": 300 }, { "epoch": 0.07309373482272948, "grad_norm": 0.33182549476623535, "learning_rate": 0.0001, "loss": 1.8504, "step": 301 }, { "epoch": 0.0733365711510442, "grad_norm": 0.3246839940547943, "learning_rate": 0.0001, "loss": 1.6562, "step": 302 }, { "epoch": 0.07357940747935891, "grad_norm": 0.33326035737991333, "learning_rate": 0.0001, "loss": 1.7738, "step": 303 }, { "epoch": 0.07382224380767363, "grad_norm": 0.3405746817588806, "learning_rate": 0.0001, "loss": 1.8336, "step": 304 }, { "epoch": 0.07406508013598834, "grad_norm": 0.328939825296402, "learning_rate": 0.0001, "loss": 1.5373, "step": 305 }, { "epoch": 0.07430791646430306, "grad_norm": 0.3274134397506714, "learning_rate": 0.0001, "loss": 1.834, "step": 306 }, { "epoch": 0.07455075279261777, "grad_norm": 0.3510563373565674, "learning_rate": 0.0001, "loss": 1.8107, "step": 307 }, { "epoch": 0.0747935891209325, "grad_norm": 0.3266802728176117, "learning_rate": 0.0001, "loss": 1.8049, "step": 308 }, { "epoch": 0.07503642544924721, "grad_norm": 0.3214014172554016, "learning_rate": 0.0001, "loss": 1.7304, "step": 309 }, { "epoch": 0.07527926177756192, "grad_norm": 0.3484552800655365, "learning_rate": 0.0001, "loss": 1.8983, "step": 310 }, { "epoch": 0.07552209810587664, "grad_norm": 0.32560989260673523, "learning_rate": 0.0001, "loss": 1.7182, "step": 311 }, { "epoch": 0.07576493443419136, "grad_norm": 0.31363043189048767, "learning_rate": 0.0001, "loss": 1.6791, "step": 312 }, { "epoch": 0.07600777076250607, "grad_norm": 0.30790412425994873, "learning_rate": 0.0001, "loss": 1.646, "step": 313 }, { "epoch": 0.0762506070908208, "grad_norm": 0.35224685072898865, "learning_rate": 0.0001, "loss": 1.7997, "step": 314 }, { "epoch": 0.0764934434191355, "grad_norm": 0.3509777784347534, "learning_rate": 0.0001, "loss": 1.8086, "step": 315 }, { "epoch": 0.07673627974745022, "grad_norm": 0.45471304655075073, "learning_rate": 0.0001, "loss": 1.7234, "step": 316 }, { "epoch": 0.07697911607576494, "grad_norm": 0.3238165080547333, "learning_rate": 0.0001, "loss": 1.8179, "step": 317 }, { "epoch": 0.07722195240407966, "grad_norm": 0.31785327196121216, "learning_rate": 0.0001, "loss": 1.6927, "step": 318 }, { "epoch": 0.07746478873239436, "grad_norm": 0.33723047375679016, "learning_rate": 0.0001, "loss": 1.807, "step": 319 }, { "epoch": 0.07770762506070908, "grad_norm": 0.3171544373035431, "learning_rate": 0.0001, "loss": 1.6861, "step": 320 }, { "epoch": 0.0779504613890238, "grad_norm": 0.31941697001457214, "learning_rate": 0.0001, "loss": 1.7365, "step": 321 }, { "epoch": 0.07819329771733852, "grad_norm": 0.348278284072876, "learning_rate": 0.0001, "loss": 1.7495, "step": 322 }, { "epoch": 0.07843613404565324, "grad_norm": 0.33012667298316956, "learning_rate": 0.0001, "loss": 1.6431, "step": 323 }, { "epoch": 0.07867897037396794, "grad_norm": 0.3364087641239166, "learning_rate": 0.0001, "loss": 1.8088, "step": 324 }, { "epoch": 0.07892180670228266, "grad_norm": 0.31294387578964233, "learning_rate": 0.0001, "loss": 1.7033, "step": 325 }, { "epoch": 0.07916464303059738, "grad_norm": 0.32259300351142883, "learning_rate": 0.0001, "loss": 1.7142, "step": 326 }, { "epoch": 0.0794074793589121, "grad_norm": 0.34282100200653076, "learning_rate": 0.0001, "loss": 1.8833, "step": 327 }, { "epoch": 0.0796503156872268, "grad_norm": 0.34852755069732666, "learning_rate": 0.0001, "loss": 1.8127, "step": 328 }, { "epoch": 0.07989315201554152, "grad_norm": 0.31135955452919006, "learning_rate": 0.0001, "loss": 1.6054, "step": 329 }, { "epoch": 0.08013598834385624, "grad_norm": 0.34507012367248535, "learning_rate": 0.0001, "loss": 1.8801, "step": 330 }, { "epoch": 0.08037882467217096, "grad_norm": 0.33243077993392944, "learning_rate": 0.0001, "loss": 1.7688, "step": 331 }, { "epoch": 0.08062166100048568, "grad_norm": 0.32714489102363586, "learning_rate": 0.0001, "loss": 1.7063, "step": 332 }, { "epoch": 0.08086449732880038, "grad_norm": 0.337786465883255, "learning_rate": 0.0001, "loss": 1.8207, "step": 333 }, { "epoch": 0.0811073336571151, "grad_norm": 0.3262454569339752, "learning_rate": 0.0001, "loss": 1.7529, "step": 334 }, { "epoch": 0.08135016998542982, "grad_norm": 0.29085323214530945, "learning_rate": 0.0001, "loss": 1.5068, "step": 335 }, { "epoch": 0.08159300631374454, "grad_norm": 0.32306885719299316, "learning_rate": 0.0001, "loss": 1.7703, "step": 336 }, { "epoch": 0.08183584264205926, "grad_norm": 0.3417051434516907, "learning_rate": 0.0001, "loss": 1.8377, "step": 337 }, { "epoch": 0.08207867897037396, "grad_norm": 0.3171888589859009, "learning_rate": 0.0001, "loss": 1.7178, "step": 338 }, { "epoch": 0.08232151529868868, "grad_norm": 0.31271278858184814, "learning_rate": 0.0001, "loss": 1.6236, "step": 339 }, { "epoch": 0.0825643516270034, "grad_norm": 0.3355056047439575, "learning_rate": 0.0001, "loss": 1.744, "step": 340 }, { "epoch": 0.08280718795531812, "grad_norm": 0.3089101016521454, "learning_rate": 0.0001, "loss": 1.5724, "step": 341 }, { "epoch": 0.08305002428363283, "grad_norm": 0.3490867614746094, "learning_rate": 0.0001, "loss": 1.8589, "step": 342 }, { "epoch": 0.08329286061194754, "grad_norm": 0.3519854247570038, "learning_rate": 0.0001, "loss": 1.8846, "step": 343 }, { "epoch": 0.08353569694026226, "grad_norm": 0.3247239291667938, "learning_rate": 0.0001, "loss": 1.8309, "step": 344 }, { "epoch": 0.08377853326857698, "grad_norm": 0.3213523030281067, "learning_rate": 0.0001, "loss": 1.652, "step": 345 }, { "epoch": 0.0840213695968917, "grad_norm": 0.32256901264190674, "learning_rate": 0.0001, "loss": 1.6963, "step": 346 }, { "epoch": 0.0842642059252064, "grad_norm": 0.3471747934818268, "learning_rate": 0.0001, "loss": 1.786, "step": 347 }, { "epoch": 0.08450704225352113, "grad_norm": 0.2998027503490448, "learning_rate": 0.0001, "loss": 1.5939, "step": 348 }, { "epoch": 0.08474987858183584, "grad_norm": 0.3406505286693573, "learning_rate": 0.0001, "loss": 1.7973, "step": 349 }, { "epoch": 0.08499271491015056, "grad_norm": 0.3450128734111786, "learning_rate": 0.0001, "loss": 1.7727, "step": 350 }, { "epoch": 0.08523555123846527, "grad_norm": 0.33610254526138306, "learning_rate": 0.0001, "loss": 1.8183, "step": 351 }, { "epoch": 0.08547838756677999, "grad_norm": 0.3284231126308441, "learning_rate": 0.0001, "loss": 1.773, "step": 352 }, { "epoch": 0.0857212238950947, "grad_norm": 0.31227028369903564, "learning_rate": 0.0001, "loss": 1.7105, "step": 353 }, { "epoch": 0.08596406022340942, "grad_norm": 0.32455405592918396, "learning_rate": 0.0001, "loss": 1.6372, "step": 354 }, { "epoch": 0.08620689655172414, "grad_norm": 0.3156833052635193, "learning_rate": 0.0001, "loss": 1.8633, "step": 355 }, { "epoch": 0.08644973288003885, "grad_norm": 0.33389779925346375, "learning_rate": 0.0001, "loss": 1.6658, "step": 356 }, { "epoch": 0.08669256920835357, "grad_norm": 0.3363843560218811, "learning_rate": 0.0001, "loss": 1.751, "step": 357 }, { "epoch": 0.08693540553666829, "grad_norm": 0.33338844776153564, "learning_rate": 0.0001, "loss": 1.6921, "step": 358 }, { "epoch": 0.087178241864983, "grad_norm": 0.33218914270401, "learning_rate": 0.0001, "loss": 1.9268, "step": 359 }, { "epoch": 0.08742107819329771, "grad_norm": 0.32392585277557373, "learning_rate": 0.0001, "loss": 1.7296, "step": 360 }, { "epoch": 0.08766391452161243, "grad_norm": 0.33016619086265564, "learning_rate": 0.0001, "loss": 1.5462, "step": 361 }, { "epoch": 0.08790675084992715, "grad_norm": 0.34675028920173645, "learning_rate": 0.0001, "loss": 1.8096, "step": 362 }, { "epoch": 0.08814958717824187, "grad_norm": 0.36310189962387085, "learning_rate": 0.0001, "loss": 1.8494, "step": 363 }, { "epoch": 0.08839242350655659, "grad_norm": 0.31934189796447754, "learning_rate": 0.0001, "loss": 1.7754, "step": 364 }, { "epoch": 0.08863525983487129, "grad_norm": 0.3338501453399658, "learning_rate": 0.0001, "loss": 1.7632, "step": 365 }, { "epoch": 0.08887809616318601, "grad_norm": 0.361849308013916, "learning_rate": 0.0001, "loss": 1.9469, "step": 366 }, { "epoch": 0.08912093249150073, "grad_norm": 0.3035111129283905, "learning_rate": 0.0001, "loss": 1.6553, "step": 367 }, { "epoch": 0.08936376881981545, "grad_norm": 0.3231661021709442, "learning_rate": 0.0001, "loss": 1.8295, "step": 368 }, { "epoch": 0.08960660514813017, "grad_norm": 0.33550918102264404, "learning_rate": 0.0001, "loss": 1.8573, "step": 369 }, { "epoch": 0.08984944147644487, "grad_norm": 0.3429529666900635, "learning_rate": 0.0001, "loss": 1.8223, "step": 370 }, { "epoch": 0.09009227780475959, "grad_norm": 0.31798985600471497, "learning_rate": 0.0001, "loss": 1.6636, "step": 371 }, { "epoch": 0.09033511413307431, "grad_norm": 0.3305976986885071, "learning_rate": 0.0001, "loss": 1.7622, "step": 372 }, { "epoch": 0.09057795046138903, "grad_norm": 0.32804372906684875, "learning_rate": 0.0001, "loss": 1.7525, "step": 373 }, { "epoch": 0.09082078678970373, "grad_norm": 0.33984291553497314, "learning_rate": 0.0001, "loss": 1.9339, "step": 374 }, { "epoch": 0.09106362311801845, "grad_norm": 0.29461875557899475, "learning_rate": 0.0001, "loss": 1.7179, "step": 375 }, { "epoch": 0.09130645944633317, "grad_norm": 0.3483908474445343, "learning_rate": 0.0001, "loss": 1.8206, "step": 376 }, { "epoch": 0.09154929577464789, "grad_norm": 0.30330803990364075, "learning_rate": 0.0001, "loss": 1.6415, "step": 377 }, { "epoch": 0.09179213210296261, "grad_norm": 0.34878110885620117, "learning_rate": 0.0001, "loss": 1.7985, "step": 378 }, { "epoch": 0.09203496843127731, "grad_norm": 0.32693809270858765, "learning_rate": 0.0001, "loss": 1.7828, "step": 379 }, { "epoch": 0.09227780475959203, "grad_norm": 0.3255230486392975, "learning_rate": 0.0001, "loss": 1.7238, "step": 380 }, { "epoch": 0.09252064108790675, "grad_norm": 0.3401491045951843, "learning_rate": 0.0001, "loss": 1.7669, "step": 381 }, { "epoch": 0.09276347741622147, "grad_norm": 0.3755502998828888, "learning_rate": 0.0001, "loss": 1.8206, "step": 382 }, { "epoch": 0.09300631374453618, "grad_norm": 0.3082270920276642, "learning_rate": 0.0001, "loss": 1.7214, "step": 383 }, { "epoch": 0.0932491500728509, "grad_norm": 0.3387890160083771, "learning_rate": 0.0001, "loss": 1.8306, "step": 384 }, { "epoch": 0.09349198640116561, "grad_norm": 0.3410017192363739, "learning_rate": 0.0001, "loss": 1.7629, "step": 385 }, { "epoch": 0.09373482272948033, "grad_norm": 0.32617995142936707, "learning_rate": 0.0001, "loss": 1.6484, "step": 386 }, { "epoch": 0.09397765905779505, "grad_norm": 0.3287573754787445, "learning_rate": 0.0001, "loss": 1.8165, "step": 387 }, { "epoch": 0.09422049538610976, "grad_norm": 0.35360467433929443, "learning_rate": 0.0001, "loss": 1.8817, "step": 388 }, { "epoch": 0.09446333171442448, "grad_norm": 0.3364380896091461, "learning_rate": 0.0001, "loss": 1.8327, "step": 389 }, { "epoch": 0.0947061680427392, "grad_norm": 0.3223830461502075, "learning_rate": 0.0001, "loss": 1.7869, "step": 390 }, { "epoch": 0.09494900437105391, "grad_norm": 0.34865981340408325, "learning_rate": 0.0001, "loss": 1.7537, "step": 391 }, { "epoch": 0.09519184069936863, "grad_norm": 0.32730600237846375, "learning_rate": 0.0001, "loss": 1.7977, "step": 392 }, { "epoch": 0.09543467702768334, "grad_norm": 0.3286747336387634, "learning_rate": 0.0001, "loss": 1.6173, "step": 393 }, { "epoch": 0.09567751335599806, "grad_norm": 0.3481329679489136, "learning_rate": 0.0001, "loss": 1.837, "step": 394 }, { "epoch": 0.09592034968431278, "grad_norm": 0.3496640920639038, "learning_rate": 0.0001, "loss": 1.7007, "step": 395 }, { "epoch": 0.0961631860126275, "grad_norm": 0.35474106669425964, "learning_rate": 0.0001, "loss": 1.9123, "step": 396 }, { "epoch": 0.0964060223409422, "grad_norm": 0.3372008204460144, "learning_rate": 0.0001, "loss": 1.814, "step": 397 }, { "epoch": 0.09664885866925692, "grad_norm": 0.33549004793167114, "learning_rate": 0.0001, "loss": 1.8799, "step": 398 }, { "epoch": 0.09689169499757164, "grad_norm": 0.3251035213470459, "learning_rate": 0.0001, "loss": 1.6462, "step": 399 }, { "epoch": 0.09713453132588636, "grad_norm": 0.3387112021446228, "learning_rate": 0.0001, "loss": 1.8511, "step": 400 }, { "epoch": 0.09737736765420107, "grad_norm": 0.32067248225212097, "learning_rate": 0.0001, "loss": 1.922, "step": 401 }, { "epoch": 0.09762020398251578, "grad_norm": 0.3748277425765991, "learning_rate": 0.0001, "loss": 1.7758, "step": 402 }, { "epoch": 0.0978630403108305, "grad_norm": 0.3434027135372162, "learning_rate": 0.0001, "loss": 1.7802, "step": 403 }, { "epoch": 0.09810587663914522, "grad_norm": 0.31411343812942505, "learning_rate": 0.0001, "loss": 1.5578, "step": 404 }, { "epoch": 0.09834871296745994, "grad_norm": 0.3480266332626343, "learning_rate": 0.0001, "loss": 1.801, "step": 405 }, { "epoch": 0.09859154929577464, "grad_norm": 0.3487769663333893, "learning_rate": 0.0001, "loss": 1.8862, "step": 406 }, { "epoch": 0.09883438562408936, "grad_norm": 0.3437488079071045, "learning_rate": 0.0001, "loss": 1.9915, "step": 407 }, { "epoch": 0.09907722195240408, "grad_norm": 0.3372859060764313, "learning_rate": 0.0001, "loss": 1.8603, "step": 408 }, { "epoch": 0.0993200582807188, "grad_norm": 0.3287096321582794, "learning_rate": 0.0001, "loss": 1.7998, "step": 409 }, { "epoch": 0.09956289460903352, "grad_norm": 0.35017016530036926, "learning_rate": 0.0001, "loss": 1.9084, "step": 410 }, { "epoch": 0.09980573093734822, "grad_norm": 0.3446860909461975, "learning_rate": 0.0001, "loss": 1.9251, "step": 411 }, { "epoch": 0.10004856726566294, "grad_norm": 0.3437003493309021, "learning_rate": 0.0001, "loss": 1.9018, "step": 412 }, { "epoch": 0.10029140359397766, "grad_norm": 0.33558472990989685, "learning_rate": 0.0001, "loss": 1.7663, "step": 413 }, { "epoch": 0.10053423992229238, "grad_norm": 0.3409295082092285, "learning_rate": 0.0001, "loss": 1.9402, "step": 414 }, { "epoch": 0.1007770762506071, "grad_norm": 0.3394022285938263, "learning_rate": 0.0001, "loss": 1.8369, "step": 415 }, { "epoch": 0.1010199125789218, "grad_norm": 0.33328884840011597, "learning_rate": 0.0001, "loss": 1.8291, "step": 416 }, { "epoch": 0.10126274890723652, "grad_norm": 0.35661789774894714, "learning_rate": 0.0001, "loss": 1.8364, "step": 417 }, { "epoch": 0.10150558523555124, "grad_norm": 0.34863409399986267, "learning_rate": 0.0001, "loss": 1.9184, "step": 418 }, { "epoch": 0.10174842156386596, "grad_norm": 0.3370678424835205, "learning_rate": 0.0001, "loss": 1.728, "step": 419 }, { "epoch": 0.10199125789218066, "grad_norm": 0.3543989360332489, "learning_rate": 0.0001, "loss": 1.7346, "step": 420 }, { "epoch": 0.10223409422049538, "grad_norm": 0.3625810444355011, "learning_rate": 0.0001, "loss": 1.8968, "step": 421 }, { "epoch": 0.1024769305488101, "grad_norm": 0.31931373476982117, "learning_rate": 0.0001, "loss": 1.6817, "step": 422 }, { "epoch": 0.10271976687712482, "grad_norm": 0.33884134888648987, "learning_rate": 0.0001, "loss": 1.7368, "step": 423 }, { "epoch": 0.10296260320543954, "grad_norm": 0.32916051149368286, "learning_rate": 0.0001, "loss": 1.7307, "step": 424 }, { "epoch": 0.10320543953375425, "grad_norm": 0.3354704976081848, "learning_rate": 0.0001, "loss": 1.8176, "step": 425 }, { "epoch": 0.10344827586206896, "grad_norm": 0.32648178935050964, "learning_rate": 0.0001, "loss": 1.7642, "step": 426 }, { "epoch": 0.10369111219038368, "grad_norm": 0.3041088283061981, "learning_rate": 0.0001, "loss": 1.5416, "step": 427 }, { "epoch": 0.1039339485186984, "grad_norm": 0.34351372718811035, "learning_rate": 0.0001, "loss": 1.8235, "step": 428 }, { "epoch": 0.1041767848470131, "grad_norm": 0.33838582038879395, "learning_rate": 0.0001, "loss": 1.7042, "step": 429 }, { "epoch": 0.10441962117532783, "grad_norm": 0.34839949011802673, "learning_rate": 0.0001, "loss": 1.8394, "step": 430 }, { "epoch": 0.10466245750364254, "grad_norm": 0.32552745938301086, "learning_rate": 0.0001, "loss": 1.548, "step": 431 }, { "epoch": 0.10490529383195726, "grad_norm": 0.32809004187583923, "learning_rate": 0.0001, "loss": 1.861, "step": 432 }, { "epoch": 0.10514813016027198, "grad_norm": 0.35891664028167725, "learning_rate": 0.0001, "loss": 1.7159, "step": 433 }, { "epoch": 0.10539096648858669, "grad_norm": 0.3640296757221222, "learning_rate": 0.0001, "loss": 1.7317, "step": 434 }, { "epoch": 0.1056338028169014, "grad_norm": 0.3368436396121979, "learning_rate": 0.0001, "loss": 1.7596, "step": 435 }, { "epoch": 0.10587663914521613, "grad_norm": 0.31572410464286804, "learning_rate": 0.0001, "loss": 1.6735, "step": 436 }, { "epoch": 0.10611947547353084, "grad_norm": 0.341825395822525, "learning_rate": 0.0001, "loss": 1.8139, "step": 437 }, { "epoch": 0.10636231180184555, "grad_norm": 0.3547295928001404, "learning_rate": 0.0001, "loss": 1.8928, "step": 438 }, { "epoch": 0.10660514813016027, "grad_norm": 0.3295591175556183, "learning_rate": 0.0001, "loss": 1.6168, "step": 439 }, { "epoch": 0.10684798445847499, "grad_norm": 0.3370591104030609, "learning_rate": 0.0001, "loss": 1.9244, "step": 440 }, { "epoch": 0.1070908207867897, "grad_norm": 0.33818763494491577, "learning_rate": 0.0001, "loss": 1.7455, "step": 441 }, { "epoch": 0.10733365711510442, "grad_norm": 0.35193806886672974, "learning_rate": 0.0001, "loss": 1.7823, "step": 442 }, { "epoch": 0.10757649344341913, "grad_norm": 0.3400358557701111, "learning_rate": 0.0001, "loss": 1.8871, "step": 443 }, { "epoch": 0.10781932977173385, "grad_norm": 0.3593761622905731, "learning_rate": 0.0001, "loss": 1.8103, "step": 444 }, { "epoch": 0.10806216610004857, "grad_norm": 0.3300077021121979, "learning_rate": 0.0001, "loss": 1.786, "step": 445 }, { "epoch": 0.10830500242836329, "grad_norm": 0.3168260455131531, "learning_rate": 0.0001, "loss": 1.6937, "step": 446 }, { "epoch": 0.108547838756678, "grad_norm": 0.36123761534690857, "learning_rate": 0.0001, "loss": 1.9772, "step": 447 }, { "epoch": 0.10879067508499271, "grad_norm": 0.3371562659740448, "learning_rate": 0.0001, "loss": 1.8531, "step": 448 }, { "epoch": 0.10903351141330743, "grad_norm": 0.34795111417770386, "learning_rate": 0.0001, "loss": 1.7316, "step": 449 }, { "epoch": 0.10927634774162215, "grad_norm": 0.3352266252040863, "learning_rate": 0.0001, "loss": 1.799, "step": 450 }, { "epoch": 0.10951918406993687, "grad_norm": 0.31838861107826233, "learning_rate": 0.0001, "loss": 1.7578, "step": 451 }, { "epoch": 0.10976202039825157, "grad_norm": 0.3576313555240631, "learning_rate": 0.0001, "loss": 1.9083, "step": 452 }, { "epoch": 0.11000485672656629, "grad_norm": 0.3273816406726837, "learning_rate": 0.0001, "loss": 1.8002, "step": 453 }, { "epoch": 0.11024769305488101, "grad_norm": 0.3398890197277069, "learning_rate": 0.0001, "loss": 1.7914, "step": 454 }, { "epoch": 0.11049052938319573, "grad_norm": 0.3422471582889557, "learning_rate": 0.0001, "loss": 1.7997, "step": 455 }, { "epoch": 0.11073336571151045, "grad_norm": 0.3258894681930542, "learning_rate": 0.0001, "loss": 1.6239, "step": 456 }, { "epoch": 0.11097620203982515, "grad_norm": 0.3137560486793518, "learning_rate": 0.0001, "loss": 1.6991, "step": 457 }, { "epoch": 0.11121903836813987, "grad_norm": 0.3078104555606842, "learning_rate": 0.0001, "loss": 1.6844, "step": 458 }, { "epoch": 0.11146187469645459, "grad_norm": 0.3332054615020752, "learning_rate": 0.0001, "loss": 1.6455, "step": 459 }, { "epoch": 0.11170471102476931, "grad_norm": 0.3348306119441986, "learning_rate": 0.0001, "loss": 1.8087, "step": 460 }, { "epoch": 0.11194754735308401, "grad_norm": 0.3279060423374176, "learning_rate": 0.0001, "loss": 1.7843, "step": 461 }, { "epoch": 0.11219038368139873, "grad_norm": 0.330898255109787, "learning_rate": 0.0001, "loss": 1.7168, "step": 462 }, { "epoch": 0.11243322000971345, "grad_norm": 0.3327697813510895, "learning_rate": 0.0001, "loss": 1.7686, "step": 463 }, { "epoch": 0.11267605633802817, "grad_norm": 0.32823678851127625, "learning_rate": 0.0001, "loss": 1.6963, "step": 464 }, { "epoch": 0.11291889266634289, "grad_norm": 0.3418535590171814, "learning_rate": 0.0001, "loss": 1.794, "step": 465 }, { "epoch": 0.1131617289946576, "grad_norm": 0.34460851550102234, "learning_rate": 0.0001, "loss": 1.6965, "step": 466 }, { "epoch": 0.11340456532297231, "grad_norm": 0.32411473989486694, "learning_rate": 0.0001, "loss": 1.6109, "step": 467 }, { "epoch": 0.11364740165128703, "grad_norm": 0.3328358232975006, "learning_rate": 0.0001, "loss": 1.6898, "step": 468 }, { "epoch": 0.11389023797960175, "grad_norm": 0.35975319147109985, "learning_rate": 0.0001, "loss": 1.8466, "step": 469 }, { "epoch": 0.11413307430791647, "grad_norm": 0.32818377017974854, "learning_rate": 0.0001, "loss": 1.7855, "step": 470 }, { "epoch": 0.11437591063623118, "grad_norm": 0.29983869194984436, "learning_rate": 0.0001, "loss": 1.7306, "step": 471 }, { "epoch": 0.1146187469645459, "grad_norm": 0.3202539384365082, "learning_rate": 0.0001, "loss": 1.7312, "step": 472 }, { "epoch": 0.11486158329286061, "grad_norm": 0.3432933986186981, "learning_rate": 0.0001, "loss": 1.6656, "step": 473 }, { "epoch": 0.11510441962117533, "grad_norm": 0.32516351342201233, "learning_rate": 0.0001, "loss": 1.5996, "step": 474 }, { "epoch": 0.11534725594949004, "grad_norm": 0.3278566598892212, "learning_rate": 0.0001, "loss": 1.7985, "step": 475 }, { "epoch": 0.11559009227780476, "grad_norm": 0.32134944200515747, "learning_rate": 0.0001, "loss": 1.7525, "step": 476 }, { "epoch": 0.11583292860611948, "grad_norm": 0.3515169024467468, "learning_rate": 0.0001, "loss": 1.831, "step": 477 }, { "epoch": 0.1160757649344342, "grad_norm": 0.31824618577957153, "learning_rate": 0.0001, "loss": 1.6206, "step": 478 }, { "epoch": 0.11631860126274891, "grad_norm": 0.3707296550273895, "learning_rate": 0.0001, "loss": 1.9, "step": 479 }, { "epoch": 0.11656143759106362, "grad_norm": 0.33282679319381714, "learning_rate": 0.0001, "loss": 1.6946, "step": 480 }, { "epoch": 0.11680427391937834, "grad_norm": 0.33055081963539124, "learning_rate": 0.0001, "loss": 1.785, "step": 481 }, { "epoch": 0.11704711024769306, "grad_norm": 0.324970543384552, "learning_rate": 0.0001, "loss": 1.6858, "step": 482 }, { "epoch": 0.11728994657600778, "grad_norm": 0.3231464922428131, "learning_rate": 0.0001, "loss": 1.6393, "step": 483 }, { "epoch": 0.11753278290432248, "grad_norm": 0.34346282482147217, "learning_rate": 0.0001, "loss": 1.8243, "step": 484 }, { "epoch": 0.1177756192326372, "grad_norm": 0.3255658447742462, "learning_rate": 0.0001, "loss": 1.6712, "step": 485 }, { "epoch": 0.11801845556095192, "grad_norm": 0.3029419779777527, "learning_rate": 0.0001, "loss": 1.6564, "step": 486 }, { "epoch": 0.11826129188926664, "grad_norm": 0.3497994542121887, "learning_rate": 0.0001, "loss": 1.6322, "step": 487 }, { "epoch": 0.11850412821758136, "grad_norm": 0.3377457559108734, "learning_rate": 0.0001, "loss": 1.7269, "step": 488 }, { "epoch": 0.11874696454589606, "grad_norm": 0.3266313970088959, "learning_rate": 0.0001, "loss": 1.7791, "step": 489 }, { "epoch": 0.11898980087421078, "grad_norm": 0.352076917886734, "learning_rate": 0.0001, "loss": 1.9166, "step": 490 }, { "epoch": 0.1192326372025255, "grad_norm": 0.33107295632362366, "learning_rate": 0.0001, "loss": 1.8014, "step": 491 }, { "epoch": 0.11947547353084022, "grad_norm": 0.3481456935405731, "learning_rate": 0.0001, "loss": 1.8638, "step": 492 }, { "epoch": 0.11971830985915492, "grad_norm": 0.3252372741699219, "learning_rate": 0.0001, "loss": 1.7377, "step": 493 }, { "epoch": 0.11996114618746964, "grad_norm": 0.3195580542087555, "learning_rate": 0.0001, "loss": 1.7241, "step": 494 }, { "epoch": 0.12020398251578436, "grad_norm": 0.34002894163131714, "learning_rate": 0.0001, "loss": 1.6945, "step": 495 }, { "epoch": 0.12044681884409908, "grad_norm": 0.31778785586357117, "learning_rate": 0.0001, "loss": 1.6256, "step": 496 }, { "epoch": 0.1206896551724138, "grad_norm": 0.34070226550102234, "learning_rate": 0.0001, "loss": 1.8871, "step": 497 }, { "epoch": 0.1209324915007285, "grad_norm": 0.34664463996887207, "learning_rate": 0.0001, "loss": 1.9494, "step": 498 }, { "epoch": 0.12117532782904322, "grad_norm": 0.32989418506622314, "learning_rate": 0.0001, "loss": 1.7978, "step": 499 }, { "epoch": 0.12141816415735794, "grad_norm": 0.34744179248809814, "learning_rate": 0.0001, "loss": 1.8161, "step": 500 }, { "epoch": 0.12166100048567266, "grad_norm": 0.329497367143631, "learning_rate": 0.0001, "loss": 1.6066, "step": 501 }, { "epoch": 0.12190383681398738, "grad_norm": 0.3245505392551422, "learning_rate": 0.0001, "loss": 1.6534, "step": 502 }, { "epoch": 0.12214667314230208, "grad_norm": 0.35946330428123474, "learning_rate": 0.0001, "loss": 1.9062, "step": 503 }, { "epoch": 0.1223895094706168, "grad_norm": 0.36018314957618713, "learning_rate": 0.0001, "loss": 1.6777, "step": 504 }, { "epoch": 0.12263234579893152, "grad_norm": 0.35960716009140015, "learning_rate": 0.0001, "loss": 1.7391, "step": 505 }, { "epoch": 0.12287518212724624, "grad_norm": 0.32001397013664246, "learning_rate": 0.0001, "loss": 1.7721, "step": 506 }, { "epoch": 0.12311801845556095, "grad_norm": 0.31397107243537903, "learning_rate": 0.0001, "loss": 1.5642, "step": 507 }, { "epoch": 0.12336085478387566, "grad_norm": 0.358695924282074, "learning_rate": 0.0001, "loss": 1.8194, "step": 508 }, { "epoch": 0.12360369111219038, "grad_norm": 0.3431941270828247, "learning_rate": 0.0001, "loss": 1.8609, "step": 509 }, { "epoch": 0.1238465274405051, "grad_norm": 0.3243958652019501, "learning_rate": 0.0001, "loss": 1.844, "step": 510 }, { "epoch": 0.12408936376881982, "grad_norm": 0.3306693136692047, "learning_rate": 0.0001, "loss": 1.6731, "step": 511 }, { "epoch": 0.12433220009713453, "grad_norm": 0.31046029925346375, "learning_rate": 0.0001, "loss": 1.6751, "step": 512 }, { "epoch": 0.12457503642544925, "grad_norm": 0.3342955708503723, "learning_rate": 0.0001, "loss": 1.701, "step": 513 }, { "epoch": 0.12481787275376396, "grad_norm": 0.3202895224094391, "learning_rate": 0.0001, "loss": 1.6878, "step": 514 }, { "epoch": 0.12506070908207867, "grad_norm": 0.37030142545700073, "learning_rate": 0.0001, "loss": 1.9408, "step": 515 }, { "epoch": 0.1253035454103934, "grad_norm": 0.3309297561645508, "learning_rate": 0.0001, "loss": 1.7087, "step": 516 }, { "epoch": 0.1255463817387081, "grad_norm": 0.3122987151145935, "learning_rate": 0.0001, "loss": 1.7112, "step": 517 }, { "epoch": 0.12578921806702284, "grad_norm": 0.32288306951522827, "learning_rate": 0.0001, "loss": 1.6103, "step": 518 }, { "epoch": 0.12603205439533754, "grad_norm": 0.34724926948547363, "learning_rate": 0.0001, "loss": 1.9673, "step": 519 }, { "epoch": 0.12627489072365225, "grad_norm": 0.3623920679092407, "learning_rate": 0.0001, "loss": 1.8321, "step": 520 }, { "epoch": 0.12651772705196698, "grad_norm": 0.3387831151485443, "learning_rate": 0.0001, "loss": 1.8256, "step": 521 }, { "epoch": 0.1267605633802817, "grad_norm": 0.330636590719223, "learning_rate": 0.0001, "loss": 1.5437, "step": 522 }, { "epoch": 0.1270033997085964, "grad_norm": 0.36128905415534973, "learning_rate": 0.0001, "loss": 1.8782, "step": 523 }, { "epoch": 0.12724623603691113, "grad_norm": 0.33911311626434326, "learning_rate": 0.0001, "loss": 1.8102, "step": 524 }, { "epoch": 0.12748907236522583, "grad_norm": 0.3348335921764374, "learning_rate": 0.0001, "loss": 1.7712, "step": 525 }, { "epoch": 0.12773190869354056, "grad_norm": 0.3538147807121277, "learning_rate": 0.0001, "loss": 1.9288, "step": 526 }, { "epoch": 0.12797474502185527, "grad_norm": 0.3452605605125427, "learning_rate": 0.0001, "loss": 1.7673, "step": 527 }, { "epoch": 0.12821758135016997, "grad_norm": 0.31932786107063293, "learning_rate": 0.0001, "loss": 1.7969, "step": 528 }, { "epoch": 0.1284604176784847, "grad_norm": 0.34146761894226074, "learning_rate": 0.0001, "loss": 1.8897, "step": 529 }, { "epoch": 0.1287032540067994, "grad_norm": 0.333464652299881, "learning_rate": 0.0001, "loss": 1.8103, "step": 530 }, { "epoch": 0.12894609033511414, "grad_norm": 0.33564871549606323, "learning_rate": 0.0001, "loss": 1.7477, "step": 531 }, { "epoch": 0.12918892666342885, "grad_norm": 0.33123332262039185, "learning_rate": 0.0001, "loss": 1.8184, "step": 532 }, { "epoch": 0.12943176299174355, "grad_norm": 0.33338358998298645, "learning_rate": 0.0001, "loss": 1.7733, "step": 533 }, { "epoch": 0.1296745993200583, "grad_norm": 0.3367150127887726, "learning_rate": 0.0001, "loss": 1.8073, "step": 534 }, { "epoch": 0.129917435648373, "grad_norm": 0.3337629735469818, "learning_rate": 0.0001, "loss": 1.6832, "step": 535 }, { "epoch": 0.13016027197668772, "grad_norm": 0.33553630113601685, "learning_rate": 0.0001, "loss": 1.7301, "step": 536 }, { "epoch": 0.13040310830500243, "grad_norm": 0.32271620631217957, "learning_rate": 0.0001, "loss": 1.7196, "step": 537 }, { "epoch": 0.13064594463331713, "grad_norm": 0.3418119251728058, "learning_rate": 0.0001, "loss": 1.7169, "step": 538 }, { "epoch": 0.13088878096163187, "grad_norm": 0.3349496126174927, "learning_rate": 0.0001, "loss": 1.7522, "step": 539 }, { "epoch": 0.13113161728994657, "grad_norm": 0.35157325863838196, "learning_rate": 0.0001, "loss": 1.7772, "step": 540 }, { "epoch": 0.1313744536182613, "grad_norm": 0.3331843316555023, "learning_rate": 0.0001, "loss": 1.624, "step": 541 }, { "epoch": 0.131617289946576, "grad_norm": 0.34814342856407166, "learning_rate": 0.0001, "loss": 1.8239, "step": 542 }, { "epoch": 0.13186012627489072, "grad_norm": 0.326874703168869, "learning_rate": 0.0001, "loss": 1.5981, "step": 543 }, { "epoch": 0.13210296260320545, "grad_norm": 0.32875895500183105, "learning_rate": 0.0001, "loss": 1.6496, "step": 544 }, { "epoch": 0.13234579893152015, "grad_norm": 0.36154043674468994, "learning_rate": 0.0001, "loss": 1.8287, "step": 545 }, { "epoch": 0.13258863525983486, "grad_norm": 0.3320012092590332, "learning_rate": 0.0001, "loss": 1.8379, "step": 546 }, { "epoch": 0.1328314715881496, "grad_norm": 0.3641113340854645, "learning_rate": 0.0001, "loss": 1.8627, "step": 547 }, { "epoch": 0.1330743079164643, "grad_norm": 0.33732619881629944, "learning_rate": 0.0001, "loss": 1.755, "step": 548 }, { "epoch": 0.13331714424477903, "grad_norm": 0.32615527510643005, "learning_rate": 0.0001, "loss": 1.637, "step": 549 }, { "epoch": 0.13355998057309373, "grad_norm": 0.32284268736839294, "learning_rate": 0.0001, "loss": 1.8013, "step": 550 }, { "epoch": 0.13380281690140844, "grad_norm": 0.3423888683319092, "learning_rate": 0.0001, "loss": 1.7242, "step": 551 }, { "epoch": 0.13404565322972317, "grad_norm": 0.3580892086029053, "learning_rate": 0.0001, "loss": 1.9369, "step": 552 }, { "epoch": 0.13428848955803788, "grad_norm": 0.32854658365249634, "learning_rate": 0.0001, "loss": 1.6249, "step": 553 }, { "epoch": 0.1345313258863526, "grad_norm": 0.3406185507774353, "learning_rate": 0.0001, "loss": 1.9058, "step": 554 }, { "epoch": 0.13477416221466731, "grad_norm": 0.34051644802093506, "learning_rate": 0.0001, "loss": 1.8254, "step": 555 }, { "epoch": 0.13501699854298202, "grad_norm": 0.33127209544181824, "learning_rate": 0.0001, "loss": 1.6908, "step": 556 }, { "epoch": 0.13525983487129675, "grad_norm": 0.3405444622039795, "learning_rate": 0.0001, "loss": 1.9559, "step": 557 }, { "epoch": 0.13550267119961146, "grad_norm": 0.33380869030952454, "learning_rate": 0.0001, "loss": 1.8753, "step": 558 }, { "epoch": 0.1357455075279262, "grad_norm": 0.3485771119594574, "learning_rate": 0.0001, "loss": 1.9057, "step": 559 }, { "epoch": 0.1359883438562409, "grad_norm": 0.36507877707481384, "learning_rate": 0.0001, "loss": 1.8183, "step": 560 }, { "epoch": 0.1362311801845556, "grad_norm": 0.3692066967487335, "learning_rate": 0.0001, "loss": 1.7619, "step": 561 }, { "epoch": 0.13647401651287033, "grad_norm": 0.32849472761154175, "learning_rate": 0.0001, "loss": 1.5954, "step": 562 }, { "epoch": 0.13671685284118504, "grad_norm": 0.3511491119861603, "learning_rate": 0.0001, "loss": 1.7778, "step": 563 }, { "epoch": 0.13695968916949977, "grad_norm": 0.3083047866821289, "learning_rate": 0.0001, "loss": 1.5886, "step": 564 }, { "epoch": 0.13720252549781448, "grad_norm": 0.3148990571498871, "learning_rate": 0.0001, "loss": 1.6174, "step": 565 }, { "epoch": 0.13744536182612918, "grad_norm": 0.34316617250442505, "learning_rate": 0.0001, "loss": 1.7766, "step": 566 }, { "epoch": 0.1376881981544439, "grad_norm": 0.34978803992271423, "learning_rate": 0.0001, "loss": 1.8194, "step": 567 }, { "epoch": 0.13793103448275862, "grad_norm": 0.3379075229167938, "learning_rate": 0.0001, "loss": 1.8286, "step": 568 }, { "epoch": 0.13817387081107332, "grad_norm": 0.3451768755912781, "learning_rate": 0.0001, "loss": 1.7955, "step": 569 }, { "epoch": 0.13841670713938806, "grad_norm": 0.3590940535068512, "learning_rate": 0.0001, "loss": 1.87, "step": 570 }, { "epoch": 0.13865954346770276, "grad_norm": 0.3292856216430664, "learning_rate": 0.0001, "loss": 1.8424, "step": 571 }, { "epoch": 0.1389023797960175, "grad_norm": 0.3355911672115326, "learning_rate": 0.0001, "loss": 1.7929, "step": 572 }, { "epoch": 0.1391452161243322, "grad_norm": 0.33305859565734863, "learning_rate": 0.0001, "loss": 1.8061, "step": 573 }, { "epoch": 0.1393880524526469, "grad_norm": 0.3276538550853729, "learning_rate": 0.0001, "loss": 1.8168, "step": 574 }, { "epoch": 0.13963088878096164, "grad_norm": 0.32917794585227966, "learning_rate": 0.0001, "loss": 1.7679, "step": 575 }, { "epoch": 0.13987372510927634, "grad_norm": 0.32212531566619873, "learning_rate": 0.0001, "loss": 1.8593, "step": 576 }, { "epoch": 0.14011656143759108, "grad_norm": 0.3472840487957001, "learning_rate": 0.0001, "loss": 1.7646, "step": 577 }, { "epoch": 0.14035939776590578, "grad_norm": 0.3321049213409424, "learning_rate": 0.0001, "loss": 1.7385, "step": 578 }, { "epoch": 0.14060223409422049, "grad_norm": 0.316599041223526, "learning_rate": 0.0001, "loss": 1.7995, "step": 579 }, { "epoch": 0.14084507042253522, "grad_norm": 0.3480801284313202, "learning_rate": 0.0001, "loss": 1.7401, "step": 580 }, { "epoch": 0.14108790675084992, "grad_norm": 0.3251743018627167, "learning_rate": 0.0001, "loss": 1.6566, "step": 581 }, { "epoch": 0.14133074307916466, "grad_norm": 0.3258562982082367, "learning_rate": 0.0001, "loss": 1.7591, "step": 582 }, { "epoch": 0.14157357940747936, "grad_norm": 0.3337818682193756, "learning_rate": 0.0001, "loss": 1.7909, "step": 583 }, { "epoch": 0.14181641573579407, "grad_norm": 0.32916077971458435, "learning_rate": 0.0001, "loss": 1.768, "step": 584 }, { "epoch": 0.1420592520641088, "grad_norm": 0.3424016833305359, "learning_rate": 0.0001, "loss": 1.7864, "step": 585 }, { "epoch": 0.1423020883924235, "grad_norm": 0.3284737765789032, "learning_rate": 0.0001, "loss": 1.7983, "step": 586 }, { "epoch": 0.1425449247207382, "grad_norm": 0.3327319920063019, "learning_rate": 0.0001, "loss": 1.8206, "step": 587 }, { "epoch": 0.14278776104905294, "grad_norm": 0.34315967559814453, "learning_rate": 0.0001, "loss": 1.8132, "step": 588 }, { "epoch": 0.14303059737736765, "grad_norm": 0.31191959977149963, "learning_rate": 0.0001, "loss": 1.5604, "step": 589 }, { "epoch": 0.14327343370568238, "grad_norm": 0.35059061646461487, "learning_rate": 0.0001, "loss": 1.9002, "step": 590 }, { "epoch": 0.14351627003399708, "grad_norm": 0.3173618018627167, "learning_rate": 0.0001, "loss": 1.7253, "step": 591 }, { "epoch": 0.1437591063623118, "grad_norm": 0.3325091302394867, "learning_rate": 0.0001, "loss": 1.7917, "step": 592 }, { "epoch": 0.14400194269062652, "grad_norm": 0.3475317656993866, "learning_rate": 0.0001, "loss": 1.7804, "step": 593 }, { "epoch": 0.14424477901894123, "grad_norm": 0.34290921688079834, "learning_rate": 0.0001, "loss": 1.9093, "step": 594 }, { "epoch": 0.14448761534725596, "grad_norm": 0.32887551188468933, "learning_rate": 0.0001, "loss": 1.848, "step": 595 }, { "epoch": 0.14473045167557066, "grad_norm": 0.307198166847229, "learning_rate": 0.0001, "loss": 1.5295, "step": 596 }, { "epoch": 0.14497328800388537, "grad_norm": 0.31939375400543213, "learning_rate": 0.0001, "loss": 1.7991, "step": 597 }, { "epoch": 0.1452161243322001, "grad_norm": 0.3266233205795288, "learning_rate": 0.0001, "loss": 1.7458, "step": 598 }, { "epoch": 0.1454589606605148, "grad_norm": 0.3055732846260071, "learning_rate": 0.0001, "loss": 1.7317, "step": 599 }, { "epoch": 0.14570179698882954, "grad_norm": 0.31025564670562744, "learning_rate": 0.0001, "loss": 1.6654, "step": 600 }, { "epoch": 0.14594463331714425, "grad_norm": 0.34017419815063477, "learning_rate": 0.0001, "loss": 1.8332, "step": 601 }, { "epoch": 0.14618746964545895, "grad_norm": 0.32183635234832764, "learning_rate": 0.0001, "loss": 1.7224, "step": 602 }, { "epoch": 0.14643030597377368, "grad_norm": 0.35014277696609497, "learning_rate": 0.0001, "loss": 1.7805, "step": 603 }, { "epoch": 0.1466731423020884, "grad_norm": 0.35021066665649414, "learning_rate": 0.0001, "loss": 1.8568, "step": 604 }, { "epoch": 0.14691597863040312, "grad_norm": 0.3157992660999298, "learning_rate": 0.0001, "loss": 1.69, "step": 605 }, { "epoch": 0.14715881495871783, "grad_norm": 0.351871132850647, "learning_rate": 0.0001, "loss": 1.7699, "step": 606 }, { "epoch": 0.14740165128703253, "grad_norm": 0.3164587914943695, "learning_rate": 0.0001, "loss": 1.5622, "step": 607 }, { "epoch": 0.14764448761534726, "grad_norm": 0.35028359293937683, "learning_rate": 0.0001, "loss": 1.7516, "step": 608 }, { "epoch": 0.14788732394366197, "grad_norm": 0.3479732871055603, "learning_rate": 0.0001, "loss": 1.7743, "step": 609 }, { "epoch": 0.14813016027197667, "grad_norm": 0.3294665515422821, "learning_rate": 0.0001, "loss": 1.8481, "step": 610 }, { "epoch": 0.1483729966002914, "grad_norm": 0.319428026676178, "learning_rate": 0.0001, "loss": 1.8482, "step": 611 }, { "epoch": 0.1486158329286061, "grad_norm": 0.3437284827232361, "learning_rate": 0.0001, "loss": 1.8229, "step": 612 }, { "epoch": 0.14885866925692084, "grad_norm": 0.3444206118583679, "learning_rate": 0.0001, "loss": 1.958, "step": 613 }, { "epoch": 0.14910150558523555, "grad_norm": 0.3380897343158722, "learning_rate": 0.0001, "loss": 1.7921, "step": 614 }, { "epoch": 0.14934434191355025, "grad_norm": 0.33966124057769775, "learning_rate": 0.0001, "loss": 1.8157, "step": 615 }, { "epoch": 0.149587178241865, "grad_norm": 0.32820945978164673, "learning_rate": 0.0001, "loss": 1.805, "step": 616 }, { "epoch": 0.1498300145701797, "grad_norm": 0.31519755721092224, "learning_rate": 0.0001, "loss": 1.6555, "step": 617 }, { "epoch": 0.15007285089849443, "grad_norm": 0.3371162414550781, "learning_rate": 0.0001, "loss": 1.8202, "step": 618 }, { "epoch": 0.15031568722680913, "grad_norm": 0.33624976873397827, "learning_rate": 0.0001, "loss": 1.8528, "step": 619 }, { "epoch": 0.15055852355512384, "grad_norm": 0.3345712423324585, "learning_rate": 0.0001, "loss": 1.6205, "step": 620 }, { "epoch": 0.15080135988343857, "grad_norm": 0.32715481519699097, "learning_rate": 0.0001, "loss": 1.7635, "step": 621 }, { "epoch": 0.15104419621175327, "grad_norm": 0.35149484872817993, "learning_rate": 0.0001, "loss": 1.7891, "step": 622 }, { "epoch": 0.151287032540068, "grad_norm": 0.3212732672691345, "learning_rate": 0.0001, "loss": 1.7351, "step": 623 }, { "epoch": 0.1515298688683827, "grad_norm": 0.33642488718032837, "learning_rate": 0.0001, "loss": 1.8604, "step": 624 }, { "epoch": 0.15177270519669742, "grad_norm": 0.35052698850631714, "learning_rate": 0.0001, "loss": 1.9189, "step": 625 }, { "epoch": 0.15201554152501215, "grad_norm": 0.32550784945487976, "learning_rate": 0.0001, "loss": 1.5858, "step": 626 }, { "epoch": 0.15225837785332685, "grad_norm": 0.3275245428085327, "learning_rate": 0.0001, "loss": 1.6605, "step": 627 }, { "epoch": 0.1525012141816416, "grad_norm": 0.3372390568256378, "learning_rate": 0.0001, "loss": 1.7338, "step": 628 }, { "epoch": 0.1527440505099563, "grad_norm": 0.32684433460235596, "learning_rate": 0.0001, "loss": 1.6609, "step": 629 }, { "epoch": 0.152986886838271, "grad_norm": 0.3422621786594391, "learning_rate": 0.0001, "loss": 1.7199, "step": 630 }, { "epoch": 0.15322972316658573, "grad_norm": 0.3409489095211029, "learning_rate": 0.0001, "loss": 1.8106, "step": 631 }, { "epoch": 0.15347255949490043, "grad_norm": 0.3328961730003357, "learning_rate": 0.0001, "loss": 1.7863, "step": 632 }, { "epoch": 0.15371539582321514, "grad_norm": 0.32431545853614807, "learning_rate": 0.0001, "loss": 1.7251, "step": 633 }, { "epoch": 0.15395823215152987, "grad_norm": 0.3159632384777069, "learning_rate": 0.0001, "loss": 1.581, "step": 634 }, { "epoch": 0.15420106847984458, "grad_norm": 0.325759619474411, "learning_rate": 0.0001, "loss": 1.7943, "step": 635 }, { "epoch": 0.1544439048081593, "grad_norm": 0.34994059801101685, "learning_rate": 0.0001, "loss": 1.9051, "step": 636 }, { "epoch": 0.15468674113647402, "grad_norm": 0.33518439531326294, "learning_rate": 0.0001, "loss": 1.7541, "step": 637 }, { "epoch": 0.15492957746478872, "grad_norm": 0.32423776388168335, "learning_rate": 0.0001, "loss": 1.753, "step": 638 }, { "epoch": 0.15517241379310345, "grad_norm": 0.3157038688659668, "learning_rate": 0.0001, "loss": 1.7187, "step": 639 }, { "epoch": 0.15541525012141816, "grad_norm": 0.33567070960998535, "learning_rate": 0.0001, "loss": 1.6209, "step": 640 }, { "epoch": 0.1556580864497329, "grad_norm": 0.3170723617076874, "learning_rate": 0.0001, "loss": 1.7539, "step": 641 }, { "epoch": 0.1559009227780476, "grad_norm": 0.32920214533805847, "learning_rate": 0.0001, "loss": 1.7689, "step": 642 }, { "epoch": 0.1561437591063623, "grad_norm": 0.3828733265399933, "learning_rate": 0.0001, "loss": 1.7785, "step": 643 }, { "epoch": 0.15638659543467703, "grad_norm": 0.35701796412467957, "learning_rate": 0.0001, "loss": 1.7291, "step": 644 }, { "epoch": 0.15662943176299174, "grad_norm": 0.3379092812538147, "learning_rate": 0.0001, "loss": 1.9058, "step": 645 }, { "epoch": 0.15687226809130647, "grad_norm": 0.35852211713790894, "learning_rate": 0.0001, "loss": 1.979, "step": 646 }, { "epoch": 0.15711510441962118, "grad_norm": 0.344271183013916, "learning_rate": 0.0001, "loss": 1.7638, "step": 647 }, { "epoch": 0.15735794074793588, "grad_norm": 0.34935322403907776, "learning_rate": 0.0001, "loss": 1.8539, "step": 648 }, { "epoch": 0.15760077707625061, "grad_norm": 0.3357618451118469, "learning_rate": 0.0001, "loss": 1.9117, "step": 649 }, { "epoch": 0.15784361340456532, "grad_norm": 0.3315989673137665, "learning_rate": 0.0001, "loss": 1.7258, "step": 650 }, { "epoch": 0.15808644973288005, "grad_norm": 0.33593153953552246, "learning_rate": 0.0001, "loss": 1.734, "step": 651 }, { "epoch": 0.15832928606119476, "grad_norm": 0.352849543094635, "learning_rate": 0.0001, "loss": 1.9499, "step": 652 }, { "epoch": 0.15857212238950946, "grad_norm": 0.3458506762981415, "learning_rate": 0.0001, "loss": 1.7657, "step": 653 }, { "epoch": 0.1588149587178242, "grad_norm": 0.33314961194992065, "learning_rate": 0.0001, "loss": 1.8456, "step": 654 }, { "epoch": 0.1590577950461389, "grad_norm": 0.3287748098373413, "learning_rate": 0.0001, "loss": 1.7065, "step": 655 }, { "epoch": 0.1593006313744536, "grad_norm": 0.3667800724506378, "learning_rate": 0.0001, "loss": 1.8753, "step": 656 }, { "epoch": 0.15954346770276834, "grad_norm": 0.3313208520412445, "learning_rate": 0.0001, "loss": 1.7463, "step": 657 }, { "epoch": 0.15978630403108304, "grad_norm": 0.33500197529792786, "learning_rate": 0.0001, "loss": 1.8661, "step": 658 }, { "epoch": 0.16002914035939778, "grad_norm": 0.3511989414691925, "learning_rate": 0.0001, "loss": 1.8231, "step": 659 }, { "epoch": 0.16027197668771248, "grad_norm": 0.343047559261322, "learning_rate": 0.0001, "loss": 1.7863, "step": 660 }, { "epoch": 0.16051481301602719, "grad_norm": 0.32878488302230835, "learning_rate": 0.0001, "loss": 1.749, "step": 661 }, { "epoch": 0.16075764934434192, "grad_norm": 0.3422974646091461, "learning_rate": 0.0001, "loss": 1.8309, "step": 662 }, { "epoch": 0.16100048567265662, "grad_norm": 0.32340022921562195, "learning_rate": 0.0001, "loss": 1.7192, "step": 663 }, { "epoch": 0.16124332200097136, "grad_norm": 0.35579779744148254, "learning_rate": 0.0001, "loss": 1.7947, "step": 664 }, { "epoch": 0.16148615832928606, "grad_norm": 0.34640470147132874, "learning_rate": 0.0001, "loss": 1.7191, "step": 665 }, { "epoch": 0.16172899465760077, "grad_norm": 0.3360711932182312, "learning_rate": 0.0001, "loss": 1.7657, "step": 666 }, { "epoch": 0.1619718309859155, "grad_norm": 0.3053452968597412, "learning_rate": 0.0001, "loss": 1.5486, "step": 667 }, { "epoch": 0.1622146673142302, "grad_norm": 0.34008896350860596, "learning_rate": 0.0001, "loss": 1.7507, "step": 668 }, { "epoch": 0.16245750364254494, "grad_norm": 0.374798059463501, "learning_rate": 0.0001, "loss": 1.8685, "step": 669 }, { "epoch": 0.16270033997085964, "grad_norm": 0.32496216893196106, "learning_rate": 0.0001, "loss": 1.7575, "step": 670 }, { "epoch": 0.16294317629917435, "grad_norm": 0.3341841697692871, "learning_rate": 0.0001, "loss": 1.6965, "step": 671 }, { "epoch": 0.16318601262748908, "grad_norm": 0.33429092168807983, "learning_rate": 0.0001, "loss": 1.702, "step": 672 }, { "epoch": 0.16342884895580378, "grad_norm": 0.3410471975803375, "learning_rate": 0.0001, "loss": 1.6771, "step": 673 }, { "epoch": 0.16367168528411852, "grad_norm": 0.35043495893478394, "learning_rate": 0.0001, "loss": 1.8804, "step": 674 }, { "epoch": 0.16391452161243322, "grad_norm": 0.37732118368148804, "learning_rate": 0.0001, "loss": 2.0228, "step": 675 }, { "epoch": 0.16415735794074793, "grad_norm": 0.36240872740745544, "learning_rate": 0.0001, "loss": 1.8744, "step": 676 }, { "epoch": 0.16440019426906266, "grad_norm": 0.37208640575408936, "learning_rate": 0.0001, "loss": 1.7277, "step": 677 }, { "epoch": 0.16464303059737737, "grad_norm": 0.3265897035598755, "learning_rate": 0.0001, "loss": 1.7531, "step": 678 }, { "epoch": 0.16488586692569207, "grad_norm": 0.3237488865852356, "learning_rate": 0.0001, "loss": 1.8174, "step": 679 }, { "epoch": 0.1651287032540068, "grad_norm": 0.33787935972213745, "learning_rate": 0.0001, "loss": 1.7959, "step": 680 }, { "epoch": 0.1653715395823215, "grad_norm": 0.3404181897640228, "learning_rate": 0.0001, "loss": 1.7234, "step": 681 }, { "epoch": 0.16561437591063624, "grad_norm": 0.3305708169937134, "learning_rate": 0.0001, "loss": 1.768, "step": 682 }, { "epoch": 0.16585721223895095, "grad_norm": 0.305209755897522, "learning_rate": 0.0001, "loss": 1.6544, "step": 683 }, { "epoch": 0.16610004856726565, "grad_norm": 0.3397432267665863, "learning_rate": 0.0001, "loss": 1.6999, "step": 684 }, { "epoch": 0.16634288489558038, "grad_norm": 0.3438527286052704, "learning_rate": 0.0001, "loss": 1.8724, "step": 685 }, { "epoch": 0.1665857212238951, "grad_norm": 0.34619447588920593, "learning_rate": 0.0001, "loss": 1.8039, "step": 686 }, { "epoch": 0.16682855755220982, "grad_norm": 0.33126625418663025, "learning_rate": 0.0001, "loss": 1.6389, "step": 687 }, { "epoch": 0.16707139388052453, "grad_norm": 0.34118935465812683, "learning_rate": 0.0001, "loss": 1.8495, "step": 688 }, { "epoch": 0.16731423020883923, "grad_norm": 0.3436391353607178, "learning_rate": 0.0001, "loss": 1.8543, "step": 689 }, { "epoch": 0.16755706653715396, "grad_norm": 0.3279227912425995, "learning_rate": 0.0001, "loss": 1.8061, "step": 690 }, { "epoch": 0.16779990286546867, "grad_norm": 0.32945412397384644, "learning_rate": 0.0001, "loss": 1.7128, "step": 691 }, { "epoch": 0.1680427391937834, "grad_norm": 0.32346343994140625, "learning_rate": 0.0001, "loss": 1.7311, "step": 692 }, { "epoch": 0.1682855755220981, "grad_norm": 0.31251630187034607, "learning_rate": 0.0001, "loss": 1.4493, "step": 693 }, { "epoch": 0.1685284118504128, "grad_norm": 0.3643507957458496, "learning_rate": 0.0001, "loss": 1.9172, "step": 694 }, { "epoch": 0.16877124817872755, "grad_norm": 0.34988000988960266, "learning_rate": 0.0001, "loss": 1.7222, "step": 695 }, { "epoch": 0.16901408450704225, "grad_norm": 0.3611491322517395, "learning_rate": 0.0001, "loss": 1.8329, "step": 696 }, { "epoch": 0.16925692083535698, "grad_norm": 0.32318976521492004, "learning_rate": 0.0001, "loss": 1.6282, "step": 697 }, { "epoch": 0.1694997571636717, "grad_norm": 0.34611061215400696, "learning_rate": 0.0001, "loss": 1.756, "step": 698 }, { "epoch": 0.1697425934919864, "grad_norm": 0.33741113543510437, "learning_rate": 0.0001, "loss": 1.8066, "step": 699 }, { "epoch": 0.16998542982030113, "grad_norm": 0.3311440944671631, "learning_rate": 0.0001, "loss": 1.6861, "step": 700 }, { "epoch": 0.17022826614861583, "grad_norm": 0.3328585624694824, "learning_rate": 0.0001, "loss": 1.7464, "step": 701 }, { "epoch": 0.17047110247693054, "grad_norm": 0.32861262559890747, "learning_rate": 0.0001, "loss": 1.7738, "step": 702 }, { "epoch": 0.17071393880524527, "grad_norm": 0.3296881318092346, "learning_rate": 0.0001, "loss": 1.6663, "step": 703 }, { "epoch": 0.17095677513355997, "grad_norm": 0.3431016206741333, "learning_rate": 0.0001, "loss": 1.8015, "step": 704 }, { "epoch": 0.1711996114618747, "grad_norm": 0.32219427824020386, "learning_rate": 0.0001, "loss": 1.6279, "step": 705 }, { "epoch": 0.1714424477901894, "grad_norm": 0.32654130458831787, "learning_rate": 0.0001, "loss": 1.6762, "step": 706 }, { "epoch": 0.17168528411850412, "grad_norm": 0.33777257800102234, "learning_rate": 0.0001, "loss": 1.9124, "step": 707 }, { "epoch": 0.17192812044681885, "grad_norm": 0.3405590057373047, "learning_rate": 0.0001, "loss": 1.7402, "step": 708 }, { "epoch": 0.17217095677513355, "grad_norm": 0.36250537633895874, "learning_rate": 0.0001, "loss": 1.8756, "step": 709 }, { "epoch": 0.1724137931034483, "grad_norm": 0.33841243386268616, "learning_rate": 0.0001, "loss": 1.8326, "step": 710 }, { "epoch": 0.172656629431763, "grad_norm": 0.33011090755462646, "learning_rate": 0.0001, "loss": 1.7608, "step": 711 }, { "epoch": 0.1728994657600777, "grad_norm": 0.3336141109466553, "learning_rate": 0.0001, "loss": 1.7569, "step": 712 }, { "epoch": 0.17314230208839243, "grad_norm": 0.33510497212409973, "learning_rate": 0.0001, "loss": 1.6263, "step": 713 }, { "epoch": 0.17338513841670714, "grad_norm": 0.32303619384765625, "learning_rate": 0.0001, "loss": 1.7252, "step": 714 }, { "epoch": 0.17362797474502187, "grad_norm": 0.34422940015792847, "learning_rate": 0.0001, "loss": 1.8453, "step": 715 }, { "epoch": 0.17387081107333657, "grad_norm": 0.3252314329147339, "learning_rate": 0.0001, "loss": 1.7176, "step": 716 }, { "epoch": 0.17411364740165128, "grad_norm": 0.34678858518600464, "learning_rate": 0.0001, "loss": 1.798, "step": 717 }, { "epoch": 0.174356483729966, "grad_norm": 0.3389412760734558, "learning_rate": 0.0001, "loss": 1.8228, "step": 718 }, { "epoch": 0.17459932005828072, "grad_norm": 0.33434489369392395, "learning_rate": 0.0001, "loss": 1.809, "step": 719 }, { "epoch": 0.17484215638659542, "grad_norm": 0.31604427099227905, "learning_rate": 0.0001, "loss": 1.3931, "step": 720 }, { "epoch": 0.17508499271491015, "grad_norm": 0.3255927264690399, "learning_rate": 0.0001, "loss": 1.6887, "step": 721 }, { "epoch": 0.17532782904322486, "grad_norm": 0.35983723402023315, "learning_rate": 0.0001, "loss": 1.994, "step": 722 }, { "epoch": 0.1755706653715396, "grad_norm": 0.32813572883605957, "learning_rate": 0.0001, "loss": 1.6857, "step": 723 }, { "epoch": 0.1758135016998543, "grad_norm": 0.3567933440208435, "learning_rate": 0.0001, "loss": 1.887, "step": 724 }, { "epoch": 0.176056338028169, "grad_norm": 0.34803661704063416, "learning_rate": 0.0001, "loss": 1.9376, "step": 725 }, { "epoch": 0.17629917435648373, "grad_norm": 0.34169384837150574, "learning_rate": 0.0001, "loss": 1.7396, "step": 726 }, { "epoch": 0.17654201068479844, "grad_norm": 0.3188161551952362, "learning_rate": 0.0001, "loss": 1.6764, "step": 727 }, { "epoch": 0.17678484701311317, "grad_norm": 0.33624914288520813, "learning_rate": 0.0001, "loss": 1.8037, "step": 728 }, { "epoch": 0.17702768334142788, "grad_norm": 0.3575475811958313, "learning_rate": 0.0001, "loss": 1.8988, "step": 729 }, { "epoch": 0.17727051966974258, "grad_norm": 0.3442639708518982, "learning_rate": 0.0001, "loss": 1.6926, "step": 730 }, { "epoch": 0.17751335599805732, "grad_norm": 0.327396959066391, "learning_rate": 0.0001, "loss": 1.7721, "step": 731 }, { "epoch": 0.17775619232637202, "grad_norm": 0.3187220096588135, "learning_rate": 0.0001, "loss": 1.6799, "step": 732 }, { "epoch": 0.17799902865468675, "grad_norm": 0.3227187395095825, "learning_rate": 0.0001, "loss": 1.6265, "step": 733 }, { "epoch": 0.17824186498300146, "grad_norm": 0.3068956732749939, "learning_rate": 0.0001, "loss": 1.6813, "step": 734 }, { "epoch": 0.17848470131131616, "grad_norm": 0.3299641013145447, "learning_rate": 0.0001, "loss": 1.7025, "step": 735 }, { "epoch": 0.1787275376396309, "grad_norm": 0.34794217348098755, "learning_rate": 0.0001, "loss": 1.7394, "step": 736 }, { "epoch": 0.1789703739679456, "grad_norm": 0.361322283744812, "learning_rate": 0.0001, "loss": 1.9326, "step": 737 }, { "epoch": 0.17921321029626033, "grad_norm": 0.32667848467826843, "learning_rate": 0.0001, "loss": 1.7336, "step": 738 }, { "epoch": 0.17945604662457504, "grad_norm": 0.32748880982398987, "learning_rate": 0.0001, "loss": 1.7288, "step": 739 }, { "epoch": 0.17969888295288974, "grad_norm": 0.30917081236839294, "learning_rate": 0.0001, "loss": 1.5652, "step": 740 }, { "epoch": 0.17994171928120448, "grad_norm": 0.32722023129463196, "learning_rate": 0.0001, "loss": 1.8459, "step": 741 }, { "epoch": 0.18018455560951918, "grad_norm": 0.346816748380661, "learning_rate": 0.0001, "loss": 1.8984, "step": 742 }, { "epoch": 0.1804273919378339, "grad_norm": 0.32882779836654663, "learning_rate": 0.0001, "loss": 1.8174, "step": 743 }, { "epoch": 0.18067022826614862, "grad_norm": 0.3457486629486084, "learning_rate": 0.0001, "loss": 1.848, "step": 744 }, { "epoch": 0.18091306459446332, "grad_norm": 0.3878709673881531, "learning_rate": 0.0001, "loss": 1.9496, "step": 745 }, { "epoch": 0.18115590092277806, "grad_norm": 0.3353903889656067, "learning_rate": 0.0001, "loss": 1.868, "step": 746 }, { "epoch": 0.18139873725109276, "grad_norm": 0.3408859372138977, "learning_rate": 0.0001, "loss": 1.6315, "step": 747 }, { "epoch": 0.18164157357940747, "grad_norm": 0.34258410334587097, "learning_rate": 0.0001, "loss": 1.7475, "step": 748 }, { "epoch": 0.1818844099077222, "grad_norm": 0.3541860580444336, "learning_rate": 0.0001, "loss": 1.7701, "step": 749 }, { "epoch": 0.1821272462360369, "grad_norm": 0.3331660330295563, "learning_rate": 0.0001, "loss": 1.8313, "step": 750 }, { "epoch": 0.18237008256435164, "grad_norm": 0.33861926198005676, "learning_rate": 0.0001, "loss": 1.6838, "step": 751 }, { "epoch": 0.18261291889266634, "grad_norm": 0.3338991105556488, "learning_rate": 0.0001, "loss": 1.7838, "step": 752 }, { "epoch": 0.18285575522098105, "grad_norm": 0.342155784368515, "learning_rate": 0.0001, "loss": 1.8657, "step": 753 }, { "epoch": 0.18309859154929578, "grad_norm": 0.3422032594680786, "learning_rate": 0.0001, "loss": 1.5988, "step": 754 }, { "epoch": 0.18334142787761049, "grad_norm": 0.3455490469932556, "learning_rate": 0.0001, "loss": 1.858, "step": 755 }, { "epoch": 0.18358426420592522, "grad_norm": 0.3422670364379883, "learning_rate": 0.0001, "loss": 1.7285, "step": 756 }, { "epoch": 0.18382710053423992, "grad_norm": 0.3451416790485382, "learning_rate": 0.0001, "loss": 1.8276, "step": 757 }, { "epoch": 0.18406993686255463, "grad_norm": 0.3365596532821655, "learning_rate": 0.0001, "loss": 1.7377, "step": 758 }, { "epoch": 0.18431277319086936, "grad_norm": 0.3370806872844696, "learning_rate": 0.0001, "loss": 1.763, "step": 759 }, { "epoch": 0.18455560951918407, "grad_norm": 0.3182312846183777, "learning_rate": 0.0001, "loss": 1.6808, "step": 760 }, { "epoch": 0.1847984458474988, "grad_norm": 0.3098803460597992, "learning_rate": 0.0001, "loss": 1.5653, "step": 761 }, { "epoch": 0.1850412821758135, "grad_norm": 0.34688398241996765, "learning_rate": 0.0001, "loss": 1.8465, "step": 762 }, { "epoch": 0.1852841185041282, "grad_norm": 0.3338069021701813, "learning_rate": 0.0001, "loss": 1.5843, "step": 763 }, { "epoch": 0.18552695483244294, "grad_norm": 0.3415437340736389, "learning_rate": 0.0001, "loss": 1.6496, "step": 764 }, { "epoch": 0.18576979116075765, "grad_norm": 0.33651795983314514, "learning_rate": 0.0001, "loss": 1.7268, "step": 765 }, { "epoch": 0.18601262748907235, "grad_norm": 0.31493595242500305, "learning_rate": 0.0001, "loss": 1.6041, "step": 766 }, { "epoch": 0.18625546381738708, "grad_norm": 0.35351628065109253, "learning_rate": 0.0001, "loss": 1.9251, "step": 767 }, { "epoch": 0.1864983001457018, "grad_norm": 0.3334658741950989, "learning_rate": 0.0001, "loss": 1.6445, "step": 768 }, { "epoch": 0.18674113647401652, "grad_norm": 0.33200061321258545, "learning_rate": 0.0001, "loss": 1.8252, "step": 769 }, { "epoch": 0.18698397280233123, "grad_norm": 0.32640960812568665, "learning_rate": 0.0001, "loss": 1.7991, "step": 770 }, { "epoch": 0.18722680913064593, "grad_norm": 0.3249363601207733, "learning_rate": 0.0001, "loss": 1.8871, "step": 771 }, { "epoch": 0.18746964545896067, "grad_norm": 0.33192652463912964, "learning_rate": 0.0001, "loss": 1.7089, "step": 772 }, { "epoch": 0.18771248178727537, "grad_norm": 0.3350042700767517, "learning_rate": 0.0001, "loss": 1.7861, "step": 773 }, { "epoch": 0.1879553181155901, "grad_norm": 0.34054598212242126, "learning_rate": 0.0001, "loss": 1.6476, "step": 774 }, { "epoch": 0.1881981544439048, "grad_norm": 0.3331514894962311, "learning_rate": 0.0001, "loss": 1.6713, "step": 775 }, { "epoch": 0.1884409907722195, "grad_norm": 0.34734639525413513, "learning_rate": 0.0001, "loss": 1.6703, "step": 776 }, { "epoch": 0.18868382710053425, "grad_norm": 0.3209059238433838, "learning_rate": 0.0001, "loss": 1.6813, "step": 777 }, { "epoch": 0.18892666342884895, "grad_norm": 0.33769097924232483, "learning_rate": 0.0001, "loss": 1.6145, "step": 778 }, { "epoch": 0.18916949975716368, "grad_norm": 0.3314269483089447, "learning_rate": 0.0001, "loss": 1.891, "step": 779 }, { "epoch": 0.1894123360854784, "grad_norm": 0.3271227478981018, "learning_rate": 0.0001, "loss": 1.7157, "step": 780 }, { "epoch": 0.1896551724137931, "grad_norm": 0.33472874760627747, "learning_rate": 0.0001, "loss": 1.8272, "step": 781 }, { "epoch": 0.18989800874210783, "grad_norm": 0.3343239426612854, "learning_rate": 0.0001, "loss": 1.8228, "step": 782 }, { "epoch": 0.19014084507042253, "grad_norm": 0.32455193996429443, "learning_rate": 0.0001, "loss": 1.8218, "step": 783 }, { "epoch": 0.19038368139873726, "grad_norm": 0.3229330778121948, "learning_rate": 0.0001, "loss": 1.6711, "step": 784 }, { "epoch": 0.19062651772705197, "grad_norm": 0.32274356484413147, "learning_rate": 0.0001, "loss": 1.6694, "step": 785 }, { "epoch": 0.19086935405536667, "grad_norm": 0.34533029794692993, "learning_rate": 0.0001, "loss": 1.9482, "step": 786 }, { "epoch": 0.1911121903836814, "grad_norm": 0.33702874183654785, "learning_rate": 0.0001, "loss": 1.6929, "step": 787 }, { "epoch": 0.1913550267119961, "grad_norm": 0.3319770097732544, "learning_rate": 0.0001, "loss": 1.74, "step": 788 }, { "epoch": 0.19159786304031082, "grad_norm": 0.35082754492759705, "learning_rate": 0.0001, "loss": 1.8279, "step": 789 }, { "epoch": 0.19184069936862555, "grad_norm": 0.36771881580352783, "learning_rate": 0.0001, "loss": 1.9524, "step": 790 }, { "epoch": 0.19208353569694026, "grad_norm": 0.3544480502605438, "learning_rate": 0.0001, "loss": 1.7698, "step": 791 }, { "epoch": 0.192326372025255, "grad_norm": 0.3526579737663269, "learning_rate": 0.0001, "loss": 1.7691, "step": 792 }, { "epoch": 0.1925692083535697, "grad_norm": 0.34120672941207886, "learning_rate": 0.0001, "loss": 1.5646, "step": 793 }, { "epoch": 0.1928120446818844, "grad_norm": 0.31328442692756653, "learning_rate": 0.0001, "loss": 1.6347, "step": 794 }, { "epoch": 0.19305488101019913, "grad_norm": 0.3437521159648895, "learning_rate": 0.0001, "loss": 1.7563, "step": 795 }, { "epoch": 0.19329771733851384, "grad_norm": 0.35751140117645264, "learning_rate": 0.0001, "loss": 1.8915, "step": 796 }, { "epoch": 0.19354055366682857, "grad_norm": 0.3591373562812805, "learning_rate": 0.0001, "loss": 1.9232, "step": 797 }, { "epoch": 0.19378338999514327, "grad_norm": 0.3579035997390747, "learning_rate": 0.0001, "loss": 1.7625, "step": 798 }, { "epoch": 0.19402622632345798, "grad_norm": 0.32526394724845886, "learning_rate": 0.0001, "loss": 1.6856, "step": 799 }, { "epoch": 0.1942690626517727, "grad_norm": 0.3462944030761719, "learning_rate": 0.0001, "loss": 1.9237, "step": 800 }, { "epoch": 0.19451189898008742, "grad_norm": 0.3462536633014679, "learning_rate": 0.0001, "loss": 1.7484, "step": 801 }, { "epoch": 0.19475473530840215, "grad_norm": 0.343323290348053, "learning_rate": 0.0001, "loss": 1.8536, "step": 802 }, { "epoch": 0.19499757163671685, "grad_norm": 0.3693440556526184, "learning_rate": 0.0001, "loss": 1.931, "step": 803 }, { "epoch": 0.19524040796503156, "grad_norm": 0.3456905782222748, "learning_rate": 0.0001, "loss": 1.8112, "step": 804 }, { "epoch": 0.1954832442933463, "grad_norm": 0.3245755136013031, "learning_rate": 0.0001, "loss": 1.8371, "step": 805 }, { "epoch": 0.195726080621661, "grad_norm": 0.3217308223247528, "learning_rate": 0.0001, "loss": 1.6225, "step": 806 }, { "epoch": 0.19596891694997573, "grad_norm": 0.3111560642719269, "learning_rate": 0.0001, "loss": 1.6079, "step": 807 }, { "epoch": 0.19621175327829043, "grad_norm": 0.33693355321884155, "learning_rate": 0.0001, "loss": 1.5574, "step": 808 }, { "epoch": 0.19645458960660514, "grad_norm": 0.3521810472011566, "learning_rate": 0.0001, "loss": 1.7933, "step": 809 }, { "epoch": 0.19669742593491987, "grad_norm": 0.3548601269721985, "learning_rate": 0.0001, "loss": 1.8576, "step": 810 }, { "epoch": 0.19694026226323458, "grad_norm": 0.33542320132255554, "learning_rate": 0.0001, "loss": 1.8073, "step": 811 }, { "epoch": 0.19718309859154928, "grad_norm": 0.3425523638725281, "learning_rate": 0.0001, "loss": 1.7489, "step": 812 }, { "epoch": 0.19742593491986402, "grad_norm": 0.34044787287712097, "learning_rate": 0.0001, "loss": 1.8246, "step": 813 }, { "epoch": 0.19766877124817872, "grad_norm": 0.33918073773384094, "learning_rate": 0.0001, "loss": 1.8108, "step": 814 }, { "epoch": 0.19791160757649345, "grad_norm": 0.35202568769454956, "learning_rate": 0.0001, "loss": 1.6453, "step": 815 }, { "epoch": 0.19815444390480816, "grad_norm": 0.33459073305130005, "learning_rate": 0.0001, "loss": 1.6348, "step": 816 }, { "epoch": 0.19839728023312286, "grad_norm": 0.3267429769039154, "learning_rate": 0.0001, "loss": 1.7359, "step": 817 }, { "epoch": 0.1986401165614376, "grad_norm": 0.3398960530757904, "learning_rate": 0.0001, "loss": 1.9159, "step": 818 }, { "epoch": 0.1988829528897523, "grad_norm": 0.35579246282577515, "learning_rate": 0.0001, "loss": 1.8049, "step": 819 }, { "epoch": 0.19912578921806703, "grad_norm": 0.34332728385925293, "learning_rate": 0.0001, "loss": 1.7352, "step": 820 }, { "epoch": 0.19936862554638174, "grad_norm": 0.3549124598503113, "learning_rate": 0.0001, "loss": 1.844, "step": 821 }, { "epoch": 0.19961146187469644, "grad_norm": 0.35614240169525146, "learning_rate": 0.0001, "loss": 1.768, "step": 822 }, { "epoch": 0.19985429820301118, "grad_norm": 0.32354140281677246, "learning_rate": 0.0001, "loss": 1.7093, "step": 823 }, { "epoch": 0.20009713453132588, "grad_norm": 0.3101747930049896, "learning_rate": 0.0001, "loss": 1.604, "step": 824 }, { "epoch": 0.20033997085964061, "grad_norm": 0.340231329202652, "learning_rate": 0.0001, "loss": 1.7756, "step": 825 }, { "epoch": 0.20058280718795532, "grad_norm": 0.3541657030582428, "learning_rate": 0.0001, "loss": 1.9198, "step": 826 }, { "epoch": 0.20082564351627002, "grad_norm": 0.34510529041290283, "learning_rate": 0.0001, "loss": 1.7255, "step": 827 }, { "epoch": 0.20106847984458476, "grad_norm": 0.31408369541168213, "learning_rate": 0.0001, "loss": 1.6342, "step": 828 }, { "epoch": 0.20131131617289946, "grad_norm": 0.33251187205314636, "learning_rate": 0.0001, "loss": 1.6463, "step": 829 }, { "epoch": 0.2015541525012142, "grad_norm": 0.368576318025589, "learning_rate": 0.0001, "loss": 1.8156, "step": 830 }, { "epoch": 0.2017969888295289, "grad_norm": 0.3493247926235199, "learning_rate": 0.0001, "loss": 1.8595, "step": 831 }, { "epoch": 0.2020398251578436, "grad_norm": 0.3393223285675049, "learning_rate": 0.0001, "loss": 1.7177, "step": 832 }, { "epoch": 0.20228266148615834, "grad_norm": 0.33512645959854126, "learning_rate": 0.0001, "loss": 1.7787, "step": 833 }, { "epoch": 0.20252549781447304, "grad_norm": 0.3425748646259308, "learning_rate": 0.0001, "loss": 1.7336, "step": 834 }, { "epoch": 0.20276833414278775, "grad_norm": 0.3310282826423645, "learning_rate": 0.0001, "loss": 1.67, "step": 835 }, { "epoch": 0.20301117047110248, "grad_norm": 0.34667834639549255, "learning_rate": 0.0001, "loss": 1.7295, "step": 836 }, { "epoch": 0.2032540067994172, "grad_norm": 0.3334478437900543, "learning_rate": 0.0001, "loss": 1.7254, "step": 837 }, { "epoch": 0.20349684312773192, "grad_norm": 0.3700467646121979, "learning_rate": 0.0001, "loss": 1.7012, "step": 838 }, { "epoch": 0.20373967945604662, "grad_norm": 0.36770233511924744, "learning_rate": 0.0001, "loss": 1.8803, "step": 839 }, { "epoch": 0.20398251578436133, "grad_norm": 0.38123154640197754, "learning_rate": 0.0001, "loss": 1.8847, "step": 840 }, { "epoch": 0.20422535211267606, "grad_norm": 0.35769400000572205, "learning_rate": 0.0001, "loss": 1.8712, "step": 841 }, { "epoch": 0.20446818844099077, "grad_norm": 0.3599579632282257, "learning_rate": 0.0001, "loss": 1.7323, "step": 842 }, { "epoch": 0.2047110247693055, "grad_norm": 0.3479319214820862, "learning_rate": 0.0001, "loss": 1.6642, "step": 843 }, { "epoch": 0.2049538610976202, "grad_norm": 0.33099737763404846, "learning_rate": 0.0001, "loss": 1.6858, "step": 844 }, { "epoch": 0.2051966974259349, "grad_norm": 0.3572336733341217, "learning_rate": 0.0001, "loss": 1.7512, "step": 845 }, { "epoch": 0.20543953375424964, "grad_norm": 0.36039644479751587, "learning_rate": 0.0001, "loss": 1.6811, "step": 846 }, { "epoch": 0.20568237008256435, "grad_norm": 0.33296865224838257, "learning_rate": 0.0001, "loss": 1.7238, "step": 847 }, { "epoch": 0.20592520641087908, "grad_norm": 0.32468274235725403, "learning_rate": 0.0001, "loss": 1.7885, "step": 848 }, { "epoch": 0.20616804273919379, "grad_norm": 0.34729287028312683, "learning_rate": 0.0001, "loss": 1.7283, "step": 849 }, { "epoch": 0.2064108790675085, "grad_norm": 0.32223427295684814, "learning_rate": 0.0001, "loss": 1.8602, "step": 850 }, { "epoch": 0.20665371539582322, "grad_norm": 0.33634892106056213, "learning_rate": 0.0001, "loss": 1.762, "step": 851 }, { "epoch": 0.20689655172413793, "grad_norm": 0.3353589177131653, "learning_rate": 0.0001, "loss": 1.7168, "step": 852 }, { "epoch": 0.20713938805245263, "grad_norm": 0.3339422047138214, "learning_rate": 0.0001, "loss": 1.6908, "step": 853 }, { "epoch": 0.20738222438076737, "grad_norm": 0.3292504847049713, "learning_rate": 0.0001, "loss": 1.6529, "step": 854 }, { "epoch": 0.20762506070908207, "grad_norm": 0.33124348521232605, "learning_rate": 0.0001, "loss": 1.666, "step": 855 }, { "epoch": 0.2078678970373968, "grad_norm": 0.3586646616458893, "learning_rate": 0.0001, "loss": 1.8378, "step": 856 }, { "epoch": 0.2081107333657115, "grad_norm": 0.3264204263687134, "learning_rate": 0.0001, "loss": 1.649, "step": 857 }, { "epoch": 0.2083535696940262, "grad_norm": 0.3210090100765228, "learning_rate": 0.0001, "loss": 1.7189, "step": 858 }, { "epoch": 0.20859640602234095, "grad_norm": 0.33976536989212036, "learning_rate": 0.0001, "loss": 1.8222, "step": 859 }, { "epoch": 0.20883924235065565, "grad_norm": 0.3441348373889923, "learning_rate": 0.0001, "loss": 1.834, "step": 860 }, { "epoch": 0.20908207867897038, "grad_norm": 0.356228768825531, "learning_rate": 0.0001, "loss": 1.7859, "step": 861 }, { "epoch": 0.2093249150072851, "grad_norm": 0.3454718589782715, "learning_rate": 0.0001, "loss": 1.7108, "step": 862 }, { "epoch": 0.2095677513355998, "grad_norm": 0.3440695106983185, "learning_rate": 0.0001, "loss": 1.757, "step": 863 }, { "epoch": 0.20981058766391453, "grad_norm": 0.368131548166275, "learning_rate": 0.0001, "loss": 1.8494, "step": 864 }, { "epoch": 0.21005342399222923, "grad_norm": 0.33047816157341003, "learning_rate": 0.0001, "loss": 1.7694, "step": 865 }, { "epoch": 0.21029626032054397, "grad_norm": 0.4003385603427887, "learning_rate": 0.0001, "loss": 1.7922, "step": 866 }, { "epoch": 0.21053909664885867, "grad_norm": 0.3227761387825012, "learning_rate": 0.0001, "loss": 1.6887, "step": 867 }, { "epoch": 0.21078193297717338, "grad_norm": 0.3393837511539459, "learning_rate": 0.0001, "loss": 1.7138, "step": 868 }, { "epoch": 0.2110247693054881, "grad_norm": 0.33335569500923157, "learning_rate": 0.0001, "loss": 1.7631, "step": 869 }, { "epoch": 0.2112676056338028, "grad_norm": 0.3336202800273895, "learning_rate": 0.0001, "loss": 1.6655, "step": 870 }, { "epoch": 0.21151044196211755, "grad_norm": 0.3336716592311859, "learning_rate": 0.0001, "loss": 1.6685, "step": 871 }, { "epoch": 0.21175327829043225, "grad_norm": 0.34575212001800537, "learning_rate": 0.0001, "loss": 1.8437, "step": 872 }, { "epoch": 0.21199611461874696, "grad_norm": 0.3509778678417206, "learning_rate": 0.0001, "loss": 1.674, "step": 873 }, { "epoch": 0.2122389509470617, "grad_norm": 0.32758817076683044, "learning_rate": 0.0001, "loss": 1.6476, "step": 874 }, { "epoch": 0.2124817872753764, "grad_norm": 0.3272348940372467, "learning_rate": 0.0001, "loss": 1.5082, "step": 875 }, { "epoch": 0.2127246236036911, "grad_norm": 0.3567332625389099, "learning_rate": 0.0001, "loss": 1.8242, "step": 876 }, { "epoch": 0.21296745993200583, "grad_norm": 0.36553505063056946, "learning_rate": 0.0001, "loss": 1.7268, "step": 877 }, { "epoch": 0.21321029626032054, "grad_norm": 0.3332120180130005, "learning_rate": 0.0001, "loss": 1.7061, "step": 878 }, { "epoch": 0.21345313258863527, "grad_norm": 0.3399730920791626, "learning_rate": 0.0001, "loss": 1.8343, "step": 879 }, { "epoch": 0.21369596891694997, "grad_norm": 0.346131831407547, "learning_rate": 0.0001, "loss": 1.7754, "step": 880 }, { "epoch": 0.21393880524526468, "grad_norm": 0.35501691699028015, "learning_rate": 0.0001, "loss": 1.7037, "step": 881 }, { "epoch": 0.2141816415735794, "grad_norm": 0.3296474516391754, "learning_rate": 0.0001, "loss": 1.7668, "step": 882 }, { "epoch": 0.21442447790189412, "grad_norm": 0.34373652935028076, "learning_rate": 0.0001, "loss": 1.9377, "step": 883 }, { "epoch": 0.21466731423020885, "grad_norm": 0.36046624183654785, "learning_rate": 0.0001, "loss": 1.6914, "step": 884 }, { "epoch": 0.21491015055852355, "grad_norm": 0.3706079125404358, "learning_rate": 0.0001, "loss": 1.835, "step": 885 }, { "epoch": 0.21515298688683826, "grad_norm": 0.3273414969444275, "learning_rate": 0.0001, "loss": 1.5998, "step": 886 }, { "epoch": 0.215395823215153, "grad_norm": 0.3641795814037323, "learning_rate": 0.0001, "loss": 1.7866, "step": 887 }, { "epoch": 0.2156386595434677, "grad_norm": 0.3411569595336914, "learning_rate": 0.0001, "loss": 1.5151, "step": 888 }, { "epoch": 0.21588149587178243, "grad_norm": 0.3366343080997467, "learning_rate": 0.0001, "loss": 1.8079, "step": 889 }, { "epoch": 0.21612433220009714, "grad_norm": 0.3335423171520233, "learning_rate": 0.0001, "loss": 1.7633, "step": 890 }, { "epoch": 0.21636716852841184, "grad_norm": 0.3207727074623108, "learning_rate": 0.0001, "loss": 1.5874, "step": 891 }, { "epoch": 0.21661000485672657, "grad_norm": 0.3799630403518677, "learning_rate": 0.0001, "loss": 1.6903, "step": 892 }, { "epoch": 0.21685284118504128, "grad_norm": 0.3363555669784546, "learning_rate": 0.0001, "loss": 1.7002, "step": 893 }, { "epoch": 0.217095677513356, "grad_norm": 0.3288339376449585, "learning_rate": 0.0001, "loss": 1.7877, "step": 894 }, { "epoch": 0.21733851384167072, "grad_norm": 0.3369339406490326, "learning_rate": 0.0001, "loss": 1.6823, "step": 895 }, { "epoch": 0.21758135016998542, "grad_norm": 0.3292815387248993, "learning_rate": 0.0001, "loss": 1.6129, "step": 896 }, { "epoch": 0.21782418649830015, "grad_norm": 0.3598136901855469, "learning_rate": 0.0001, "loss": 1.9099, "step": 897 }, { "epoch": 0.21806702282661486, "grad_norm": 0.3366025984287262, "learning_rate": 0.0001, "loss": 1.7813, "step": 898 }, { "epoch": 0.21830985915492956, "grad_norm": 0.33517763018608093, "learning_rate": 0.0001, "loss": 1.7882, "step": 899 }, { "epoch": 0.2185526954832443, "grad_norm": 0.36312445998191833, "learning_rate": 0.0001, "loss": 1.7446, "step": 900 }, { "epoch": 0.218795531811559, "grad_norm": 0.32939261198043823, "learning_rate": 0.0001, "loss": 1.7117, "step": 901 }, { "epoch": 0.21903836813987373, "grad_norm": 0.32495957612991333, "learning_rate": 0.0001, "loss": 1.5321, "step": 902 }, { "epoch": 0.21928120446818844, "grad_norm": 0.32330915331840515, "learning_rate": 0.0001, "loss": 1.7465, "step": 903 }, { "epoch": 0.21952404079650314, "grad_norm": 0.3653425872325897, "learning_rate": 0.0001, "loss": 1.9813, "step": 904 }, { "epoch": 0.21976687712481788, "grad_norm": 0.3383382260799408, "learning_rate": 0.0001, "loss": 1.706, "step": 905 }, { "epoch": 0.22000971345313258, "grad_norm": 0.31804466247558594, "learning_rate": 0.0001, "loss": 1.752, "step": 906 }, { "epoch": 0.22025254978144732, "grad_norm": 0.3489072322845459, "learning_rate": 0.0001, "loss": 1.9311, "step": 907 }, { "epoch": 0.22049538610976202, "grad_norm": 0.34647881984710693, "learning_rate": 0.0001, "loss": 1.9223, "step": 908 }, { "epoch": 0.22073822243807673, "grad_norm": 0.35893869400024414, "learning_rate": 0.0001, "loss": 1.7646, "step": 909 }, { "epoch": 0.22098105876639146, "grad_norm": 0.34356799721717834, "learning_rate": 0.0001, "loss": 1.7663, "step": 910 }, { "epoch": 0.22122389509470616, "grad_norm": 0.3178933262825012, "learning_rate": 0.0001, "loss": 1.5849, "step": 911 }, { "epoch": 0.2214667314230209, "grad_norm": 0.3426609933376312, "learning_rate": 0.0001, "loss": 1.7133, "step": 912 }, { "epoch": 0.2217095677513356, "grad_norm": 0.3565383851528168, "learning_rate": 0.0001, "loss": 1.7553, "step": 913 }, { "epoch": 0.2219524040796503, "grad_norm": 0.3379584550857544, "learning_rate": 0.0001, "loss": 1.6644, "step": 914 }, { "epoch": 0.22219524040796504, "grad_norm": 0.33079883456230164, "learning_rate": 0.0001, "loss": 1.609, "step": 915 }, { "epoch": 0.22243807673627974, "grad_norm": 0.32773423194885254, "learning_rate": 0.0001, "loss": 1.7287, "step": 916 }, { "epoch": 0.22268091306459448, "grad_norm": 0.3350023329257965, "learning_rate": 0.0001, "loss": 1.7385, "step": 917 }, { "epoch": 0.22292374939290918, "grad_norm": 0.34006428718566895, "learning_rate": 0.0001, "loss": 1.6834, "step": 918 }, { "epoch": 0.2231665857212239, "grad_norm": 0.33220747113227844, "learning_rate": 0.0001, "loss": 1.6562, "step": 919 }, { "epoch": 0.22340942204953862, "grad_norm": 0.3405970335006714, "learning_rate": 0.0001, "loss": 1.7838, "step": 920 }, { "epoch": 0.22365225837785332, "grad_norm": 0.32862991094589233, "learning_rate": 0.0001, "loss": 1.5924, "step": 921 }, { "epoch": 0.22389509470616803, "grad_norm": 0.33819305896759033, "learning_rate": 0.0001, "loss": 1.7711, "step": 922 }, { "epoch": 0.22413793103448276, "grad_norm": 0.3736564815044403, "learning_rate": 0.0001, "loss": 1.808, "step": 923 }, { "epoch": 0.22438076736279747, "grad_norm": 0.37374764680862427, "learning_rate": 0.0001, "loss": 1.9576, "step": 924 }, { "epoch": 0.2246236036911122, "grad_norm": 0.35444918274879456, "learning_rate": 0.0001, "loss": 1.896, "step": 925 }, { "epoch": 0.2248664400194269, "grad_norm": 0.34131696820259094, "learning_rate": 0.0001, "loss": 1.7908, "step": 926 }, { "epoch": 0.2251092763477416, "grad_norm": 0.3766651451587677, "learning_rate": 0.0001, "loss": 2.0439, "step": 927 }, { "epoch": 0.22535211267605634, "grad_norm": 0.343290239572525, "learning_rate": 0.0001, "loss": 1.8129, "step": 928 }, { "epoch": 0.22559494900437105, "grad_norm": 0.32844051718711853, "learning_rate": 0.0001, "loss": 1.6883, "step": 929 }, { "epoch": 0.22583778533268578, "grad_norm": 0.3543758988380432, "learning_rate": 0.0001, "loss": 1.8604, "step": 930 }, { "epoch": 0.22608062166100049, "grad_norm": 0.3468215763568878, "learning_rate": 0.0001, "loss": 1.8882, "step": 931 }, { "epoch": 0.2263234579893152, "grad_norm": 0.3537542521953583, "learning_rate": 0.0001, "loss": 1.8822, "step": 932 }, { "epoch": 0.22656629431762992, "grad_norm": 0.3319475054740906, "learning_rate": 0.0001, "loss": 1.6875, "step": 933 }, { "epoch": 0.22680913064594463, "grad_norm": 0.31763777136802673, "learning_rate": 0.0001, "loss": 1.7159, "step": 934 }, { "epoch": 0.22705196697425936, "grad_norm": 0.34334397315979004, "learning_rate": 0.0001, "loss": 1.6944, "step": 935 }, { "epoch": 0.22729480330257407, "grad_norm": 0.3653299808502197, "learning_rate": 0.0001, "loss": 1.8486, "step": 936 }, { "epoch": 0.22753763963088877, "grad_norm": 0.3533567786216736, "learning_rate": 0.0001, "loss": 1.8319, "step": 937 }, { "epoch": 0.2277804759592035, "grad_norm": 0.3710135221481323, "learning_rate": 0.0001, "loss": 1.7908, "step": 938 }, { "epoch": 0.2280233122875182, "grad_norm": 0.3512992262840271, "learning_rate": 0.0001, "loss": 1.891, "step": 939 }, { "epoch": 0.22826614861583294, "grad_norm": 0.34045422077178955, "learning_rate": 0.0001, "loss": 1.8874, "step": 940 }, { "epoch": 0.22850898494414765, "grad_norm": 0.30804723501205444, "learning_rate": 0.0001, "loss": 1.5742, "step": 941 }, { "epoch": 0.22875182127246235, "grad_norm": 0.3212561011314392, "learning_rate": 0.0001, "loss": 1.682, "step": 942 }, { "epoch": 0.22899465760077709, "grad_norm": 0.30441156029701233, "learning_rate": 0.0001, "loss": 1.6751, "step": 943 }, { "epoch": 0.2292374939290918, "grad_norm": 0.3184438645839691, "learning_rate": 0.0001, "loss": 1.6578, "step": 944 }, { "epoch": 0.2294803302574065, "grad_norm": 0.35863617062568665, "learning_rate": 0.0001, "loss": 1.7332, "step": 945 }, { "epoch": 0.22972316658572123, "grad_norm": 0.3359723687171936, "learning_rate": 0.0001, "loss": 1.7791, "step": 946 }, { "epoch": 0.22996600291403593, "grad_norm": 0.32070714235305786, "learning_rate": 0.0001, "loss": 1.6934, "step": 947 }, { "epoch": 0.23020883924235067, "grad_norm": 0.34434184432029724, "learning_rate": 0.0001, "loss": 1.7682, "step": 948 }, { "epoch": 0.23045167557066537, "grad_norm": 0.3323972225189209, "learning_rate": 0.0001, "loss": 1.8066, "step": 949 }, { "epoch": 0.23069451189898008, "grad_norm": 0.34174755215644836, "learning_rate": 0.0001, "loss": 1.6917, "step": 950 }, { "epoch": 0.2309373482272948, "grad_norm": 0.34504199028015137, "learning_rate": 0.0001, "loss": 1.9412, "step": 951 }, { "epoch": 0.2311801845556095, "grad_norm": 0.36647096276283264, "learning_rate": 0.0001, "loss": 1.8285, "step": 952 }, { "epoch": 0.23142302088392425, "grad_norm": 0.34093159437179565, "learning_rate": 0.0001, "loss": 1.8101, "step": 953 }, { "epoch": 0.23166585721223895, "grad_norm": 0.33534058928489685, "learning_rate": 0.0001, "loss": 1.6945, "step": 954 }, { "epoch": 0.23190869354055366, "grad_norm": 0.3567858934402466, "learning_rate": 0.0001, "loss": 1.9115, "step": 955 }, { "epoch": 0.2321515298688684, "grad_norm": 0.3529800474643707, "learning_rate": 0.0001, "loss": 1.9022, "step": 956 }, { "epoch": 0.2323943661971831, "grad_norm": 0.345431923866272, "learning_rate": 0.0001, "loss": 1.7276, "step": 957 }, { "epoch": 0.23263720252549783, "grad_norm": 0.3385120630264282, "learning_rate": 0.0001, "loss": 1.8453, "step": 958 }, { "epoch": 0.23288003885381253, "grad_norm": 0.3333473205566406, "learning_rate": 0.0001, "loss": 1.702, "step": 959 }, { "epoch": 0.23312287518212724, "grad_norm": 0.33618485927581787, "learning_rate": 0.0001, "loss": 1.6041, "step": 960 }, { "epoch": 0.23336571151044197, "grad_norm": 0.33134040236473083, "learning_rate": 0.0001, "loss": 1.681, "step": 961 }, { "epoch": 0.23360854783875667, "grad_norm": 0.3481343984603882, "learning_rate": 0.0001, "loss": 1.7838, "step": 962 }, { "epoch": 0.2338513841670714, "grad_norm": 0.3489163815975189, "learning_rate": 0.0001, "loss": 1.6594, "step": 963 }, { "epoch": 0.2340942204953861, "grad_norm": 0.3307543992996216, "learning_rate": 0.0001, "loss": 1.5912, "step": 964 }, { "epoch": 0.23433705682370082, "grad_norm": 0.3228107690811157, "learning_rate": 0.0001, "loss": 1.6931, "step": 965 }, { "epoch": 0.23457989315201555, "grad_norm": 0.3382891118526459, "learning_rate": 0.0001, "loss": 1.7528, "step": 966 }, { "epoch": 0.23482272948033026, "grad_norm": 0.325954794883728, "learning_rate": 0.0001, "loss": 1.7212, "step": 967 }, { "epoch": 0.23506556580864496, "grad_norm": 0.3358007073402405, "learning_rate": 0.0001, "loss": 1.8042, "step": 968 }, { "epoch": 0.2353084021369597, "grad_norm": 0.32503238320350647, "learning_rate": 0.0001, "loss": 1.6813, "step": 969 }, { "epoch": 0.2355512384652744, "grad_norm": 0.32884976267814636, "learning_rate": 0.0001, "loss": 1.6773, "step": 970 }, { "epoch": 0.23579407479358913, "grad_norm": 0.3466799259185791, "learning_rate": 0.0001, "loss": 1.8319, "step": 971 }, { "epoch": 0.23603691112190384, "grad_norm": 0.36012017726898193, "learning_rate": 0.0001, "loss": 1.8824, "step": 972 }, { "epoch": 0.23627974745021854, "grad_norm": 0.3252657353878021, "learning_rate": 0.0001, "loss": 1.7845, "step": 973 }, { "epoch": 0.23652258377853327, "grad_norm": 0.36254772543907166, "learning_rate": 0.0001, "loss": 1.776, "step": 974 }, { "epoch": 0.23676542010684798, "grad_norm": 0.369875431060791, "learning_rate": 0.0001, "loss": 1.7605, "step": 975 }, { "epoch": 0.2370082564351627, "grad_norm": 0.36949196457862854, "learning_rate": 0.0001, "loss": 1.9715, "step": 976 }, { "epoch": 0.23725109276347742, "grad_norm": 0.35166341066360474, "learning_rate": 0.0001, "loss": 1.8055, "step": 977 }, { "epoch": 0.23749392909179212, "grad_norm": 0.364952027797699, "learning_rate": 0.0001, "loss": 1.8131, "step": 978 }, { "epoch": 0.23773676542010685, "grad_norm": 0.3355357348918915, "learning_rate": 0.0001, "loss": 1.8336, "step": 979 }, { "epoch": 0.23797960174842156, "grad_norm": 0.34965017437934875, "learning_rate": 0.0001, "loss": 1.6352, "step": 980 }, { "epoch": 0.2382224380767363, "grad_norm": 0.350437194108963, "learning_rate": 0.0001, "loss": 1.6983, "step": 981 }, { "epoch": 0.238465274405051, "grad_norm": 0.32104507088661194, "learning_rate": 0.0001, "loss": 1.6894, "step": 982 }, { "epoch": 0.2387081107333657, "grad_norm": 0.32047441601753235, "learning_rate": 0.0001, "loss": 1.4182, "step": 983 }, { "epoch": 0.23895094706168044, "grad_norm": 0.3423304855823517, "learning_rate": 0.0001, "loss": 1.7036, "step": 984 }, { "epoch": 0.23919378338999514, "grad_norm": 0.3440588712692261, "learning_rate": 0.0001, "loss": 1.8234, "step": 985 }, { "epoch": 0.23943661971830985, "grad_norm": 0.32514244318008423, "learning_rate": 0.0001, "loss": 1.7542, "step": 986 }, { "epoch": 0.23967945604662458, "grad_norm": 0.32740339636802673, "learning_rate": 0.0001, "loss": 1.572, "step": 987 }, { "epoch": 0.23992229237493928, "grad_norm": 0.4369216561317444, "learning_rate": 0.0001, "loss": 1.7198, "step": 988 }, { "epoch": 0.24016512870325402, "grad_norm": 0.35954874753952026, "learning_rate": 0.0001, "loss": 1.899, "step": 989 }, { "epoch": 0.24040796503156872, "grad_norm": 0.3517872393131256, "learning_rate": 0.0001, "loss": 1.8624, "step": 990 }, { "epoch": 0.24065080135988343, "grad_norm": 0.36518657207489014, "learning_rate": 0.0001, "loss": 1.9199, "step": 991 }, { "epoch": 0.24089363768819816, "grad_norm": 0.3197648525238037, "learning_rate": 0.0001, "loss": 1.5309, "step": 992 }, { "epoch": 0.24113647401651286, "grad_norm": 0.35127437114715576, "learning_rate": 0.0001, "loss": 1.7003, "step": 993 }, { "epoch": 0.2413793103448276, "grad_norm": 0.355819433927536, "learning_rate": 0.0001, "loss": 1.8029, "step": 994 }, { "epoch": 0.2416221466731423, "grad_norm": 0.350644588470459, "learning_rate": 0.0001, "loss": 1.8459, "step": 995 }, { "epoch": 0.241864983001457, "grad_norm": 0.3549037575721741, "learning_rate": 0.0001, "loss": 1.6761, "step": 996 }, { "epoch": 0.24210781932977174, "grad_norm": 0.335479199886322, "learning_rate": 0.0001, "loss": 1.6987, "step": 997 }, { "epoch": 0.24235065565808644, "grad_norm": 0.3091221749782562, "learning_rate": 0.0001, "loss": 1.7735, "step": 998 }, { "epoch": 0.24259349198640118, "grad_norm": 0.3324226140975952, "learning_rate": 0.0001, "loss": 1.7726, "step": 999 }, { "epoch": 0.24283632831471588, "grad_norm": 0.35145866870880127, "learning_rate": 0.0001, "loss": 1.8383, "step": 1000 }, { "epoch": 0.2430791646430306, "grad_norm": 0.6738609671592712, "learning_rate": 0.0001, "loss": 1.7831, "step": 1001 }, { "epoch": 0.24332200097134532, "grad_norm": 0.34129729866981506, "learning_rate": 0.0001, "loss": 1.6946, "step": 1002 }, { "epoch": 0.24356483729966003, "grad_norm": 0.34623393416404724, "learning_rate": 0.0001, "loss": 1.8287, "step": 1003 }, { "epoch": 0.24380767362797476, "grad_norm": 0.32859230041503906, "learning_rate": 0.0001, "loss": 1.6113, "step": 1004 }, { "epoch": 0.24405050995628946, "grad_norm": 0.3451049327850342, "learning_rate": 0.0001, "loss": 1.8371, "step": 1005 }, { "epoch": 0.24429334628460417, "grad_norm": 0.3516858220100403, "learning_rate": 0.0001, "loss": 1.7367, "step": 1006 }, { "epoch": 0.2445361826129189, "grad_norm": 0.35251277685165405, "learning_rate": 0.0001, "loss": 1.8181, "step": 1007 }, { "epoch": 0.2447790189412336, "grad_norm": 0.3534775972366333, "learning_rate": 0.0001, "loss": 1.779, "step": 1008 }, { "epoch": 0.2450218552695483, "grad_norm": 0.35041284561157227, "learning_rate": 0.0001, "loss": 1.6782, "step": 1009 }, { "epoch": 0.24526469159786304, "grad_norm": 0.34657296538352966, "learning_rate": 0.0001, "loss": 1.7771, "step": 1010 }, { "epoch": 0.24550752792617775, "grad_norm": 0.33760038018226624, "learning_rate": 0.0001, "loss": 1.6074, "step": 1011 }, { "epoch": 0.24575036425449248, "grad_norm": 0.33262863755226135, "learning_rate": 0.0001, "loss": 1.5741, "step": 1012 }, { "epoch": 0.2459932005828072, "grad_norm": 0.358273983001709, "learning_rate": 0.0001, "loss": 1.8846, "step": 1013 }, { "epoch": 0.2462360369111219, "grad_norm": 0.33746251463890076, "learning_rate": 0.0001, "loss": 1.7365, "step": 1014 }, { "epoch": 0.24647887323943662, "grad_norm": 0.3334433436393738, "learning_rate": 0.0001, "loss": 1.8094, "step": 1015 }, { "epoch": 0.24672170956775133, "grad_norm": 0.3502381443977356, "learning_rate": 0.0001, "loss": 1.8298, "step": 1016 }, { "epoch": 0.24696454589606606, "grad_norm": 0.3290548026561737, "learning_rate": 0.0001, "loss": 1.6413, "step": 1017 }, { "epoch": 0.24720738222438077, "grad_norm": 0.3583875000476837, "learning_rate": 0.0001, "loss": 1.7214, "step": 1018 }, { "epoch": 0.24745021855269547, "grad_norm": 0.3353625535964966, "learning_rate": 0.0001, "loss": 1.7246, "step": 1019 }, { "epoch": 0.2476930548810102, "grad_norm": 0.3170337975025177, "learning_rate": 0.0001, "loss": 1.6516, "step": 1020 }, { "epoch": 0.2479358912093249, "grad_norm": 0.3242403268814087, "learning_rate": 0.0001, "loss": 1.6552, "step": 1021 }, { "epoch": 0.24817872753763964, "grad_norm": 0.3511442542076111, "learning_rate": 0.0001, "loss": 1.5973, "step": 1022 }, { "epoch": 0.24842156386595435, "grad_norm": 0.33678367733955383, "learning_rate": 0.0001, "loss": 1.7889, "step": 1023 }, { "epoch": 0.24866440019426905, "grad_norm": 0.37609171867370605, "learning_rate": 0.0001, "loss": 1.9319, "step": 1024 }, { "epoch": 0.24890723652258379, "grad_norm": 0.33121398091316223, "learning_rate": 0.0001, "loss": 1.5948, "step": 1025 }, { "epoch": 0.2491500728508985, "grad_norm": 0.3493736684322357, "learning_rate": 0.0001, "loss": 1.8547, "step": 1026 }, { "epoch": 0.24939290917921322, "grad_norm": 0.35862937569618225, "learning_rate": 0.0001, "loss": 1.6668, "step": 1027 }, { "epoch": 0.24963574550752793, "grad_norm": 0.34165531396865845, "learning_rate": 0.0001, "loss": 1.6661, "step": 1028 }, { "epoch": 0.24987858183584263, "grad_norm": 0.33531108498573303, "learning_rate": 0.0001, "loss": 1.8937, "step": 1029 }, { "epoch": 0.25012141816415734, "grad_norm": 0.34681662917137146, "learning_rate": 0.0001, "loss": 1.7362, "step": 1030 }, { "epoch": 0.25036425449247207, "grad_norm": 0.3579552471637726, "learning_rate": 0.0001, "loss": 1.8967, "step": 1031 }, { "epoch": 0.2506070908207868, "grad_norm": 0.3542710542678833, "learning_rate": 0.0001, "loss": 1.6862, "step": 1032 }, { "epoch": 0.2508499271491015, "grad_norm": 0.3317181169986725, "learning_rate": 0.0001, "loss": 1.816, "step": 1033 }, { "epoch": 0.2510927634774162, "grad_norm": 0.34695062041282654, "learning_rate": 0.0001, "loss": 1.8554, "step": 1034 }, { "epoch": 0.25133559980573095, "grad_norm": 0.3395116329193115, "learning_rate": 0.0001, "loss": 1.8296, "step": 1035 }, { "epoch": 0.2515784361340457, "grad_norm": 0.3394494354724884, "learning_rate": 0.0001, "loss": 1.7659, "step": 1036 }, { "epoch": 0.25182127246236036, "grad_norm": 0.34419873356819153, "learning_rate": 0.0001, "loss": 1.8675, "step": 1037 }, { "epoch": 0.2520641087906751, "grad_norm": 0.32015788555145264, "learning_rate": 0.0001, "loss": 1.8231, "step": 1038 }, { "epoch": 0.2523069451189898, "grad_norm": 0.33248281478881836, "learning_rate": 0.0001, "loss": 1.6975, "step": 1039 }, { "epoch": 0.2525497814473045, "grad_norm": 0.3703489303588867, "learning_rate": 0.0001, "loss": 1.7294, "step": 1040 }, { "epoch": 0.25279261777561923, "grad_norm": 0.353352427482605, "learning_rate": 0.0001, "loss": 1.8938, "step": 1041 }, { "epoch": 0.25303545410393397, "grad_norm": 0.3542862832546234, "learning_rate": 0.0001, "loss": 1.744, "step": 1042 }, { "epoch": 0.25327829043224864, "grad_norm": 0.3363003134727478, "learning_rate": 0.0001, "loss": 1.6655, "step": 1043 }, { "epoch": 0.2535211267605634, "grad_norm": 0.32284411787986755, "learning_rate": 0.0001, "loss": 1.6319, "step": 1044 }, { "epoch": 0.2537639630888781, "grad_norm": 0.33836817741394043, "learning_rate": 0.0001, "loss": 1.6846, "step": 1045 }, { "epoch": 0.2540067994171928, "grad_norm": 0.34558039903640747, "learning_rate": 0.0001, "loss": 1.6283, "step": 1046 }, { "epoch": 0.2542496357455075, "grad_norm": 0.3438437879085541, "learning_rate": 0.0001, "loss": 1.7201, "step": 1047 }, { "epoch": 0.25449247207382225, "grad_norm": 0.3524024486541748, "learning_rate": 0.0001, "loss": 1.8669, "step": 1048 }, { "epoch": 0.254735308402137, "grad_norm": 0.34446337819099426, "learning_rate": 0.0001, "loss": 1.838, "step": 1049 }, { "epoch": 0.25497814473045166, "grad_norm": 0.3405302166938782, "learning_rate": 0.0001, "loss": 1.5848, "step": 1050 }, { "epoch": 0.2552209810587664, "grad_norm": 0.3391573429107666, "learning_rate": 0.0001, "loss": 1.7496, "step": 1051 }, { "epoch": 0.2554638173870811, "grad_norm": 0.37194111943244934, "learning_rate": 0.0001, "loss": 1.7892, "step": 1052 }, { "epoch": 0.2557066537153958, "grad_norm": 0.37367933988571167, "learning_rate": 0.0001, "loss": 1.8058, "step": 1053 }, { "epoch": 0.25594949004371054, "grad_norm": 0.3580639958381653, "learning_rate": 0.0001, "loss": 1.6933, "step": 1054 }, { "epoch": 0.25619232637202527, "grad_norm": 0.3281092941761017, "learning_rate": 0.0001, "loss": 1.5708, "step": 1055 }, { "epoch": 0.25643516270033995, "grad_norm": 0.3570118546485901, "learning_rate": 0.0001, "loss": 1.8818, "step": 1056 }, { "epoch": 0.2566779990286547, "grad_norm": 0.358441561460495, "learning_rate": 0.0001, "loss": 1.943, "step": 1057 }, { "epoch": 0.2569208353569694, "grad_norm": 0.3399358093738556, "learning_rate": 0.0001, "loss": 1.746, "step": 1058 }, { "epoch": 0.25716367168528415, "grad_norm": 0.34234362840652466, "learning_rate": 0.0001, "loss": 1.6475, "step": 1059 }, { "epoch": 0.2574065080135988, "grad_norm": 0.34568747878074646, "learning_rate": 0.0001, "loss": 1.6586, "step": 1060 }, { "epoch": 0.25764934434191356, "grad_norm": 0.38038501143455505, "learning_rate": 0.0001, "loss": 1.6381, "step": 1061 }, { "epoch": 0.2578921806702283, "grad_norm": 0.33781126141548157, "learning_rate": 0.0001, "loss": 1.7275, "step": 1062 }, { "epoch": 0.25813501699854297, "grad_norm": 0.36487317085266113, "learning_rate": 0.0001, "loss": 1.7277, "step": 1063 }, { "epoch": 0.2583778533268577, "grad_norm": 0.35598310828208923, "learning_rate": 0.0001, "loss": 1.7926, "step": 1064 }, { "epoch": 0.25862068965517243, "grad_norm": 0.3338935673236847, "learning_rate": 0.0001, "loss": 1.5767, "step": 1065 }, { "epoch": 0.2588635259834871, "grad_norm": 0.3760107159614563, "learning_rate": 0.0001, "loss": 2.0399, "step": 1066 }, { "epoch": 0.25910636231180184, "grad_norm": 0.3443123698234558, "learning_rate": 0.0001, "loss": 1.6412, "step": 1067 }, { "epoch": 0.2593491986401166, "grad_norm": 0.32958465814590454, "learning_rate": 0.0001, "loss": 1.6551, "step": 1068 }, { "epoch": 0.25959203496843125, "grad_norm": 0.36839255690574646, "learning_rate": 0.0001, "loss": 1.805, "step": 1069 }, { "epoch": 0.259834871296746, "grad_norm": 0.33118322491645813, "learning_rate": 0.0001, "loss": 1.811, "step": 1070 }, { "epoch": 0.2600777076250607, "grad_norm": 0.33004656434059143, "learning_rate": 0.0001, "loss": 1.7253, "step": 1071 }, { "epoch": 0.26032054395337545, "grad_norm": 0.34900596737861633, "learning_rate": 0.0001, "loss": 1.8114, "step": 1072 }, { "epoch": 0.2605633802816901, "grad_norm": 0.31525158882141113, "learning_rate": 0.0001, "loss": 1.5306, "step": 1073 }, { "epoch": 0.26080621661000486, "grad_norm": 0.3604126274585724, "learning_rate": 0.0001, "loss": 1.9126, "step": 1074 }, { "epoch": 0.2610490529383196, "grad_norm": 0.36580803990364075, "learning_rate": 0.0001, "loss": 1.7778, "step": 1075 }, { "epoch": 0.26129188926663427, "grad_norm": 0.32837146520614624, "learning_rate": 0.0001, "loss": 1.5917, "step": 1076 }, { "epoch": 0.261534725594949, "grad_norm": 0.35636448860168457, "learning_rate": 0.0001, "loss": 1.8045, "step": 1077 }, { "epoch": 0.26177756192326374, "grad_norm": 0.3386078476905823, "learning_rate": 0.0001, "loss": 1.8125, "step": 1078 }, { "epoch": 0.2620203982515784, "grad_norm": 0.32973551750183105, "learning_rate": 0.0001, "loss": 1.7973, "step": 1079 }, { "epoch": 0.26226323457989315, "grad_norm": 0.3724091947078705, "learning_rate": 0.0001, "loss": 1.9441, "step": 1080 }, { "epoch": 0.2625060709082079, "grad_norm": 0.345823734998703, "learning_rate": 0.0001, "loss": 1.7337, "step": 1081 }, { "epoch": 0.2627489072365226, "grad_norm": 0.369800329208374, "learning_rate": 0.0001, "loss": 1.7492, "step": 1082 }, { "epoch": 0.2629917435648373, "grad_norm": 0.35781586170196533, "learning_rate": 0.0001, "loss": 1.8698, "step": 1083 }, { "epoch": 0.263234579893152, "grad_norm": 0.3464943766593933, "learning_rate": 0.0001, "loss": 1.7151, "step": 1084 }, { "epoch": 0.26347741622146675, "grad_norm": 0.34492209553718567, "learning_rate": 0.0001, "loss": 1.8346, "step": 1085 }, { "epoch": 0.26372025254978143, "grad_norm": 0.32053568959236145, "learning_rate": 0.0001, "loss": 1.7688, "step": 1086 }, { "epoch": 0.26396308887809616, "grad_norm": 0.30503571033477783, "learning_rate": 0.0001, "loss": 1.6619, "step": 1087 }, { "epoch": 0.2642059252064109, "grad_norm": 0.3440854847431183, "learning_rate": 0.0001, "loss": 1.9203, "step": 1088 }, { "epoch": 0.2644487615347256, "grad_norm": 0.3316665589809418, "learning_rate": 0.0001, "loss": 1.833, "step": 1089 }, { "epoch": 0.2646915978630403, "grad_norm": 0.34309491515159607, "learning_rate": 0.0001, "loss": 1.9606, "step": 1090 }, { "epoch": 0.26493443419135504, "grad_norm": 0.3301270008087158, "learning_rate": 0.0001, "loss": 1.8049, "step": 1091 }, { "epoch": 0.2651772705196697, "grad_norm": 0.32701781392097473, "learning_rate": 0.0001, "loss": 1.5786, "step": 1092 }, { "epoch": 0.26542010684798445, "grad_norm": 0.3516250550746918, "learning_rate": 0.0001, "loss": 1.8085, "step": 1093 }, { "epoch": 0.2656629431762992, "grad_norm": 0.34311559796333313, "learning_rate": 0.0001, "loss": 1.8699, "step": 1094 }, { "epoch": 0.2659057795046139, "grad_norm": 0.3706408441066742, "learning_rate": 0.0001, "loss": 1.621, "step": 1095 }, { "epoch": 0.2661486158329286, "grad_norm": 0.43447747826576233, "learning_rate": 0.0001, "loss": 1.6379, "step": 1096 }, { "epoch": 0.2663914521612433, "grad_norm": 0.35497137904167175, "learning_rate": 0.0001, "loss": 1.8351, "step": 1097 }, { "epoch": 0.26663428848955806, "grad_norm": 0.3245193064212799, "learning_rate": 0.0001, "loss": 1.5895, "step": 1098 }, { "epoch": 0.26687712481787274, "grad_norm": 0.3037337362766266, "learning_rate": 0.0001, "loss": 1.5043, "step": 1099 }, { "epoch": 0.26711996114618747, "grad_norm": 0.35018908977508545, "learning_rate": 0.0001, "loss": 1.7701, "step": 1100 }, { "epoch": 0.2673627974745022, "grad_norm": 0.3398996889591217, "learning_rate": 0.0001, "loss": 1.6926, "step": 1101 }, { "epoch": 0.2676056338028169, "grad_norm": 0.36320433020591736, "learning_rate": 0.0001, "loss": 1.8357, "step": 1102 }, { "epoch": 0.2678484701311316, "grad_norm": 0.3703799843788147, "learning_rate": 0.0001, "loss": 1.7514, "step": 1103 }, { "epoch": 0.26809130645944634, "grad_norm": 0.34844735264778137, "learning_rate": 0.0001, "loss": 1.7834, "step": 1104 }, { "epoch": 0.2683341427877611, "grad_norm": 0.3272978961467743, "learning_rate": 0.0001, "loss": 1.7616, "step": 1105 }, { "epoch": 0.26857697911607575, "grad_norm": 0.33166933059692383, "learning_rate": 0.0001, "loss": 1.6827, "step": 1106 }, { "epoch": 0.2688198154443905, "grad_norm": 0.32959240674972534, "learning_rate": 0.0001, "loss": 1.8041, "step": 1107 }, { "epoch": 0.2690626517727052, "grad_norm": 0.3293261229991913, "learning_rate": 0.0001, "loss": 1.5557, "step": 1108 }, { "epoch": 0.2693054881010199, "grad_norm": 0.3458395004272461, "learning_rate": 0.0001, "loss": 1.864, "step": 1109 }, { "epoch": 0.26954832442933463, "grad_norm": 0.3345559537410736, "learning_rate": 0.0001, "loss": 1.8151, "step": 1110 }, { "epoch": 0.26979116075764936, "grad_norm": 0.3532974123954773, "learning_rate": 0.0001, "loss": 1.7696, "step": 1111 }, { "epoch": 0.27003399708596404, "grad_norm": 0.32688626646995544, "learning_rate": 0.0001, "loss": 1.6863, "step": 1112 }, { "epoch": 0.27027683341427877, "grad_norm": 0.35777220129966736, "learning_rate": 0.0001, "loss": 1.7081, "step": 1113 }, { "epoch": 0.2705196697425935, "grad_norm": 0.3583647906780243, "learning_rate": 0.0001, "loss": 1.7594, "step": 1114 }, { "epoch": 0.2707625060709082, "grad_norm": 0.34510111808776855, "learning_rate": 0.0001, "loss": 1.7195, "step": 1115 }, { "epoch": 0.2710053423992229, "grad_norm": 0.3345058858394623, "learning_rate": 0.0001, "loss": 1.8784, "step": 1116 }, { "epoch": 0.27124817872753765, "grad_norm": 0.34480851888656616, "learning_rate": 0.0001, "loss": 1.6358, "step": 1117 }, { "epoch": 0.2714910150558524, "grad_norm": 0.33593034744262695, "learning_rate": 0.0001, "loss": 1.7992, "step": 1118 }, { "epoch": 0.27173385138416706, "grad_norm": 0.34252050518989563, "learning_rate": 0.0001, "loss": 1.6939, "step": 1119 }, { "epoch": 0.2719766877124818, "grad_norm": 0.3233051598072052, "learning_rate": 0.0001, "loss": 1.5801, "step": 1120 }, { "epoch": 0.2722195240407965, "grad_norm": 0.32091420888900757, "learning_rate": 0.0001, "loss": 1.8013, "step": 1121 }, { "epoch": 0.2724623603691112, "grad_norm": 0.35031384229660034, "learning_rate": 0.0001, "loss": 1.7404, "step": 1122 }, { "epoch": 0.27270519669742593, "grad_norm": 0.33827584981918335, "learning_rate": 0.0001, "loss": 1.9034, "step": 1123 }, { "epoch": 0.27294803302574067, "grad_norm": 0.3441354036331177, "learning_rate": 0.0001, "loss": 1.7871, "step": 1124 }, { "epoch": 0.27319086935405534, "grad_norm": 0.33458590507507324, "learning_rate": 0.0001, "loss": 1.6163, "step": 1125 }, { "epoch": 0.2734337056823701, "grad_norm": 0.36066651344299316, "learning_rate": 0.0001, "loss": 1.9302, "step": 1126 }, { "epoch": 0.2736765420106848, "grad_norm": 0.3288789689540863, "learning_rate": 0.0001, "loss": 1.6241, "step": 1127 }, { "epoch": 0.27391937833899954, "grad_norm": 0.35188278555870056, "learning_rate": 0.0001, "loss": 1.8012, "step": 1128 }, { "epoch": 0.2741622146673142, "grad_norm": 0.33101344108581543, "learning_rate": 0.0001, "loss": 1.6815, "step": 1129 }, { "epoch": 0.27440505099562895, "grad_norm": 0.3569447100162506, "learning_rate": 0.0001, "loss": 1.7191, "step": 1130 }, { "epoch": 0.2746478873239437, "grad_norm": 0.35680562257766724, "learning_rate": 0.0001, "loss": 1.7218, "step": 1131 }, { "epoch": 0.27489072365225836, "grad_norm": 0.33136245608329773, "learning_rate": 0.0001, "loss": 1.6662, "step": 1132 }, { "epoch": 0.2751335599805731, "grad_norm": 0.3718142807483673, "learning_rate": 0.0001, "loss": 1.9081, "step": 1133 }, { "epoch": 0.2753763963088878, "grad_norm": 0.32091063261032104, "learning_rate": 0.0001, "loss": 1.7089, "step": 1134 }, { "epoch": 0.2756192326372025, "grad_norm": 0.3750685453414917, "learning_rate": 0.0001, "loss": 1.9496, "step": 1135 }, { "epoch": 0.27586206896551724, "grad_norm": 0.36933282017707825, "learning_rate": 0.0001, "loss": 1.7596, "step": 1136 }, { "epoch": 0.27610490529383197, "grad_norm": 0.32732218503952026, "learning_rate": 0.0001, "loss": 1.7492, "step": 1137 }, { "epoch": 0.27634774162214665, "grad_norm": 0.3513854444026947, "learning_rate": 0.0001, "loss": 1.9196, "step": 1138 }, { "epoch": 0.2765905779504614, "grad_norm": 0.3355165123939514, "learning_rate": 0.0001, "loss": 1.7641, "step": 1139 }, { "epoch": 0.2768334142787761, "grad_norm": 0.3600131869316101, "learning_rate": 0.0001, "loss": 1.8531, "step": 1140 }, { "epoch": 0.27707625060709085, "grad_norm": 0.34687429666519165, "learning_rate": 0.0001, "loss": 1.7288, "step": 1141 }, { "epoch": 0.2773190869354055, "grad_norm": 0.3468457758426666, "learning_rate": 0.0001, "loss": 1.8647, "step": 1142 }, { "epoch": 0.27756192326372026, "grad_norm": 0.36524146795272827, "learning_rate": 0.0001, "loss": 1.745, "step": 1143 }, { "epoch": 0.277804759592035, "grad_norm": 0.3697652220726013, "learning_rate": 0.0001, "loss": 1.7623, "step": 1144 }, { "epoch": 0.27804759592034967, "grad_norm": 0.3558076024055481, "learning_rate": 0.0001, "loss": 1.5631, "step": 1145 }, { "epoch": 0.2782904322486644, "grad_norm": 0.3579453229904175, "learning_rate": 0.0001, "loss": 1.7172, "step": 1146 }, { "epoch": 0.27853326857697913, "grad_norm": 0.35010629892349243, "learning_rate": 0.0001, "loss": 1.7867, "step": 1147 }, { "epoch": 0.2787761049052938, "grad_norm": 0.3663645386695862, "learning_rate": 0.0001, "loss": 1.8068, "step": 1148 }, { "epoch": 0.27901894123360854, "grad_norm": 0.34487295150756836, "learning_rate": 0.0001, "loss": 1.7174, "step": 1149 }, { "epoch": 0.2792617775619233, "grad_norm": 0.35026541352272034, "learning_rate": 0.0001, "loss": 1.6901, "step": 1150 }, { "epoch": 0.279504613890238, "grad_norm": 0.33784544467926025, "learning_rate": 0.0001, "loss": 1.8176, "step": 1151 }, { "epoch": 0.2797474502185527, "grad_norm": 0.35948890447616577, "learning_rate": 0.0001, "loss": 1.6484, "step": 1152 }, { "epoch": 0.2799902865468674, "grad_norm": 0.33784404397010803, "learning_rate": 0.0001, "loss": 1.829, "step": 1153 }, { "epoch": 0.28023312287518215, "grad_norm": 0.36918583512306213, "learning_rate": 0.0001, "loss": 1.691, "step": 1154 }, { "epoch": 0.2804759592034968, "grad_norm": 0.32872992753982544, "learning_rate": 0.0001, "loss": 1.4461, "step": 1155 }, { "epoch": 0.28071879553181156, "grad_norm": 0.3497970402240753, "learning_rate": 0.0001, "loss": 1.8181, "step": 1156 }, { "epoch": 0.2809616318601263, "grad_norm": 0.34661903977394104, "learning_rate": 0.0001, "loss": 1.9472, "step": 1157 }, { "epoch": 0.28120446818844097, "grad_norm": 0.34361758828163147, "learning_rate": 0.0001, "loss": 1.7651, "step": 1158 }, { "epoch": 0.2814473045167557, "grad_norm": 0.32696354389190674, "learning_rate": 0.0001, "loss": 1.4568, "step": 1159 }, { "epoch": 0.28169014084507044, "grad_norm": 0.3309074640274048, "learning_rate": 0.0001, "loss": 1.6149, "step": 1160 }, { "epoch": 0.2819329771733851, "grad_norm": 0.32438015937805176, "learning_rate": 0.0001, "loss": 1.5894, "step": 1161 }, { "epoch": 0.28217581350169985, "grad_norm": 0.36756324768066406, "learning_rate": 0.0001, "loss": 1.8005, "step": 1162 }, { "epoch": 0.2824186498300146, "grad_norm": 0.3359546661376953, "learning_rate": 0.0001, "loss": 1.6269, "step": 1163 }, { "epoch": 0.2826614861583293, "grad_norm": 0.3752913475036621, "learning_rate": 0.0001, "loss": 1.8105, "step": 1164 }, { "epoch": 0.282904322486644, "grad_norm": 0.33042916655540466, "learning_rate": 0.0001, "loss": 1.6781, "step": 1165 }, { "epoch": 0.2831471588149587, "grad_norm": 0.3419846296310425, "learning_rate": 0.0001, "loss": 1.7958, "step": 1166 }, { "epoch": 0.28338999514327345, "grad_norm": 0.354832261800766, "learning_rate": 0.0001, "loss": 1.8978, "step": 1167 }, { "epoch": 0.28363283147158813, "grad_norm": 0.3304416835308075, "learning_rate": 0.0001, "loss": 1.6946, "step": 1168 }, { "epoch": 0.28387566779990286, "grad_norm": 0.3565024435520172, "learning_rate": 0.0001, "loss": 1.7724, "step": 1169 }, { "epoch": 0.2841185041282176, "grad_norm": 0.341604083776474, "learning_rate": 0.0001, "loss": 1.764, "step": 1170 }, { "epoch": 0.2843613404565323, "grad_norm": 0.35261428356170654, "learning_rate": 0.0001, "loss": 1.7225, "step": 1171 }, { "epoch": 0.284604176784847, "grad_norm": 0.32757368683815, "learning_rate": 0.0001, "loss": 1.4222, "step": 1172 }, { "epoch": 0.28484701311316174, "grad_norm": 0.3484155237674713, "learning_rate": 0.0001, "loss": 1.7307, "step": 1173 }, { "epoch": 0.2850898494414764, "grad_norm": 0.3501245379447937, "learning_rate": 0.0001, "loss": 1.7643, "step": 1174 }, { "epoch": 0.28533268576979115, "grad_norm": 0.3286750614643097, "learning_rate": 0.0001, "loss": 1.6124, "step": 1175 }, { "epoch": 0.2855755220981059, "grad_norm": 0.3350943624973297, "learning_rate": 0.0001, "loss": 1.7167, "step": 1176 }, { "epoch": 0.2858183584264206, "grad_norm": 0.3862624764442444, "learning_rate": 0.0001, "loss": 1.8182, "step": 1177 }, { "epoch": 0.2860611947547353, "grad_norm": 0.4132660925388336, "learning_rate": 0.0001, "loss": 1.8159, "step": 1178 }, { "epoch": 0.28630403108305, "grad_norm": 0.3466343283653259, "learning_rate": 0.0001, "loss": 1.8844, "step": 1179 }, { "epoch": 0.28654686741136476, "grad_norm": 0.342422217130661, "learning_rate": 0.0001, "loss": 1.8049, "step": 1180 }, { "epoch": 0.28678970373967944, "grad_norm": 0.3274907171726227, "learning_rate": 0.0001, "loss": 1.8003, "step": 1181 }, { "epoch": 0.28703254006799417, "grad_norm": 0.34425753355026245, "learning_rate": 0.0001, "loss": 1.8901, "step": 1182 }, { "epoch": 0.2872753763963089, "grad_norm": 0.3420788645744324, "learning_rate": 0.0001, "loss": 1.8626, "step": 1183 }, { "epoch": 0.2875182127246236, "grad_norm": 0.37537306547164917, "learning_rate": 0.0001, "loss": 1.768, "step": 1184 }, { "epoch": 0.2877610490529383, "grad_norm": 0.3327960968017578, "learning_rate": 0.0001, "loss": 1.743, "step": 1185 }, { "epoch": 0.28800388538125304, "grad_norm": 0.3642200231552124, "learning_rate": 0.0001, "loss": 1.9741, "step": 1186 }, { "epoch": 0.2882467217095678, "grad_norm": 0.3397223949432373, "learning_rate": 0.0001, "loss": 1.8706, "step": 1187 }, { "epoch": 0.28848955803788245, "grad_norm": 0.32793283462524414, "learning_rate": 0.0001, "loss": 1.6044, "step": 1188 }, { "epoch": 0.2887323943661972, "grad_norm": 0.3629734516143799, "learning_rate": 0.0001, "loss": 1.7398, "step": 1189 }, { "epoch": 0.2889752306945119, "grad_norm": 0.34847310185432434, "learning_rate": 0.0001, "loss": 1.687, "step": 1190 }, { "epoch": 0.2892180670228266, "grad_norm": 0.34422287344932556, "learning_rate": 0.0001, "loss": 1.7657, "step": 1191 }, { "epoch": 0.28946090335114133, "grad_norm": 0.34734368324279785, "learning_rate": 0.0001, "loss": 1.8799, "step": 1192 }, { "epoch": 0.28970373967945606, "grad_norm": 0.3396027386188507, "learning_rate": 0.0001, "loss": 1.7647, "step": 1193 }, { "epoch": 0.28994657600777074, "grad_norm": 0.3526374399662018, "learning_rate": 0.0001, "loss": 1.9586, "step": 1194 }, { "epoch": 0.2901894123360855, "grad_norm": 0.34345927834510803, "learning_rate": 0.0001, "loss": 1.7944, "step": 1195 }, { "epoch": 0.2904322486644002, "grad_norm": 0.3783220052719116, "learning_rate": 0.0001, "loss": 1.681, "step": 1196 }, { "epoch": 0.2906750849927149, "grad_norm": 0.35073280334472656, "learning_rate": 0.0001, "loss": 1.5092, "step": 1197 }, { "epoch": 0.2909179213210296, "grad_norm": 0.352989137172699, "learning_rate": 0.0001, "loss": 1.7788, "step": 1198 }, { "epoch": 0.29116075764934435, "grad_norm": 0.346623033285141, "learning_rate": 0.0001, "loss": 1.6689, "step": 1199 }, { "epoch": 0.2914035939776591, "grad_norm": 0.3349616825580597, "learning_rate": 0.0001, "loss": 1.8613, "step": 1200 }, { "epoch": 0.29164643030597376, "grad_norm": 0.3176116943359375, "learning_rate": 0.0001, "loss": 1.5567, "step": 1201 }, { "epoch": 0.2918892666342885, "grad_norm": 0.316850870847702, "learning_rate": 0.0001, "loss": 1.6958, "step": 1202 }, { "epoch": 0.2921321029626032, "grad_norm": 0.36198094487190247, "learning_rate": 0.0001, "loss": 1.9053, "step": 1203 }, { "epoch": 0.2923749392909179, "grad_norm": 0.33802559971809387, "learning_rate": 0.0001, "loss": 1.6131, "step": 1204 }, { "epoch": 0.29261777561923263, "grad_norm": 0.353243887424469, "learning_rate": 0.0001, "loss": 1.6892, "step": 1205 }, { "epoch": 0.29286061194754737, "grad_norm": 0.3540343642234802, "learning_rate": 0.0001, "loss": 1.6646, "step": 1206 }, { "epoch": 0.29310344827586204, "grad_norm": 0.3387995958328247, "learning_rate": 0.0001, "loss": 1.7045, "step": 1207 }, { "epoch": 0.2933462846041768, "grad_norm": 0.3644068241119385, "learning_rate": 0.0001, "loss": 1.8486, "step": 1208 }, { "epoch": 0.2935891209324915, "grad_norm": 0.35250940918922424, "learning_rate": 0.0001, "loss": 1.7084, "step": 1209 }, { "epoch": 0.29383195726080624, "grad_norm": 0.3456801176071167, "learning_rate": 0.0001, "loss": 1.6556, "step": 1210 }, { "epoch": 0.2940747935891209, "grad_norm": 0.35365554690361023, "learning_rate": 0.0001, "loss": 1.8963, "step": 1211 }, { "epoch": 0.29431762991743565, "grad_norm": 0.3480612337589264, "learning_rate": 0.0001, "loss": 1.6345, "step": 1212 }, { "epoch": 0.2945604662457504, "grad_norm": 0.3324591815471649, "learning_rate": 0.0001, "loss": 1.7345, "step": 1213 }, { "epoch": 0.29480330257406506, "grad_norm": 0.36215439438819885, "learning_rate": 0.0001, "loss": 1.7907, "step": 1214 }, { "epoch": 0.2950461389023798, "grad_norm": 0.36731013655662537, "learning_rate": 0.0001, "loss": 1.7969, "step": 1215 }, { "epoch": 0.29528897523069453, "grad_norm": 0.3301365375518799, "learning_rate": 0.0001, "loss": 1.7202, "step": 1216 }, { "epoch": 0.2955318115590092, "grad_norm": 0.3361029624938965, "learning_rate": 0.0001, "loss": 1.6314, "step": 1217 }, { "epoch": 0.29577464788732394, "grad_norm": 0.3317160904407501, "learning_rate": 0.0001, "loss": 1.6849, "step": 1218 }, { "epoch": 0.29601748421563867, "grad_norm": 0.32502481341362, "learning_rate": 0.0001, "loss": 1.4891, "step": 1219 }, { "epoch": 0.29626032054395335, "grad_norm": 0.3280821740627289, "learning_rate": 0.0001, "loss": 1.6065, "step": 1220 }, { "epoch": 0.2965031568722681, "grad_norm": 0.3488386869430542, "learning_rate": 0.0001, "loss": 1.7172, "step": 1221 }, { "epoch": 0.2967459932005828, "grad_norm": 0.3545106053352356, "learning_rate": 0.0001, "loss": 1.7147, "step": 1222 }, { "epoch": 0.29698882952889755, "grad_norm": 0.34625324606895447, "learning_rate": 0.0001, "loss": 1.7451, "step": 1223 }, { "epoch": 0.2972316658572122, "grad_norm": 0.365535169839859, "learning_rate": 0.0001, "loss": 1.9306, "step": 1224 }, { "epoch": 0.29747450218552696, "grad_norm": 0.3703577518463135, "learning_rate": 0.0001, "loss": 1.7975, "step": 1225 }, { "epoch": 0.2977173385138417, "grad_norm": 0.4047102630138397, "learning_rate": 0.0001, "loss": 2.0506, "step": 1226 }, { "epoch": 0.29796017484215637, "grad_norm": 0.35949185490608215, "learning_rate": 0.0001, "loss": 1.9218, "step": 1227 }, { "epoch": 0.2982030111704711, "grad_norm": 0.34228986501693726, "learning_rate": 0.0001, "loss": 1.6128, "step": 1228 }, { "epoch": 0.29844584749878583, "grad_norm": 0.3485621511936188, "learning_rate": 0.0001, "loss": 1.8851, "step": 1229 }, { "epoch": 0.2986886838271005, "grad_norm": 0.3268779516220093, "learning_rate": 0.0001, "loss": 1.68, "step": 1230 }, { "epoch": 0.29893152015541524, "grad_norm": 0.3476763069629669, "learning_rate": 0.0001, "loss": 1.7868, "step": 1231 }, { "epoch": 0.29917435648373, "grad_norm": 0.34079062938690186, "learning_rate": 0.0001, "loss": 1.6513, "step": 1232 }, { "epoch": 0.2994171928120447, "grad_norm": 0.3342800736427307, "learning_rate": 0.0001, "loss": 1.6783, "step": 1233 }, { "epoch": 0.2996600291403594, "grad_norm": 0.3746899366378784, "learning_rate": 0.0001, "loss": 1.7811, "step": 1234 }, { "epoch": 0.2999028654686741, "grad_norm": 0.362958699464798, "learning_rate": 0.0001, "loss": 1.8097, "step": 1235 }, { "epoch": 0.30014570179698885, "grad_norm": 0.34721145033836365, "learning_rate": 0.0001, "loss": 1.8311, "step": 1236 }, { "epoch": 0.30038853812530353, "grad_norm": 0.3652319610118866, "learning_rate": 0.0001, "loss": 1.948, "step": 1237 }, { "epoch": 0.30063137445361826, "grad_norm": 0.331961452960968, "learning_rate": 0.0001, "loss": 1.6468, "step": 1238 }, { "epoch": 0.300874210781933, "grad_norm": 0.3217456638813019, "learning_rate": 0.0001, "loss": 1.7451, "step": 1239 }, { "epoch": 0.30111704711024767, "grad_norm": 0.3655998408794403, "learning_rate": 0.0001, "loss": 1.8643, "step": 1240 }, { "epoch": 0.3013598834385624, "grad_norm": 0.32531607151031494, "learning_rate": 0.0001, "loss": 1.713, "step": 1241 }, { "epoch": 0.30160271976687714, "grad_norm": 0.3473297357559204, "learning_rate": 0.0001, "loss": 1.7422, "step": 1242 }, { "epoch": 0.3018455560951918, "grad_norm": 0.33475473523139954, "learning_rate": 0.0001, "loss": 1.5828, "step": 1243 }, { "epoch": 0.30208839242350655, "grad_norm": 0.3307666480541229, "learning_rate": 0.0001, "loss": 1.6227, "step": 1244 }, { "epoch": 0.3023312287518213, "grad_norm": 0.345688134431839, "learning_rate": 0.0001, "loss": 1.8162, "step": 1245 }, { "epoch": 0.302574065080136, "grad_norm": 0.37213560938835144, "learning_rate": 0.0001, "loss": 1.777, "step": 1246 }, { "epoch": 0.3028169014084507, "grad_norm": 0.3556644320487976, "learning_rate": 0.0001, "loss": 1.7458, "step": 1247 }, { "epoch": 0.3030597377367654, "grad_norm": 0.31667470932006836, "learning_rate": 0.0001, "loss": 1.6664, "step": 1248 }, { "epoch": 0.30330257406508015, "grad_norm": 0.3247813284397125, "learning_rate": 0.0001, "loss": 1.5334, "step": 1249 }, { "epoch": 0.30354541039339483, "grad_norm": 0.3366319239139557, "learning_rate": 0.0001, "loss": 1.7826, "step": 1250 }, { "epoch": 0.30378824672170957, "grad_norm": 0.3648545444011688, "learning_rate": 0.0001, "loss": 1.8183, "step": 1251 }, { "epoch": 0.3040310830500243, "grad_norm": 0.33485549688339233, "learning_rate": 0.0001, "loss": 1.7598, "step": 1252 }, { "epoch": 0.304273919378339, "grad_norm": 0.3424319624900818, "learning_rate": 0.0001, "loss": 1.73, "step": 1253 }, { "epoch": 0.3045167557066537, "grad_norm": 0.3423166871070862, "learning_rate": 0.0001, "loss": 1.9582, "step": 1254 }, { "epoch": 0.30475959203496844, "grad_norm": 0.3264666795730591, "learning_rate": 0.0001, "loss": 1.6974, "step": 1255 }, { "epoch": 0.3050024283632832, "grad_norm": 0.3453938663005829, "learning_rate": 0.0001, "loss": 1.7855, "step": 1256 }, { "epoch": 0.30524526469159785, "grad_norm": 0.34164607524871826, "learning_rate": 0.0001, "loss": 1.7747, "step": 1257 }, { "epoch": 0.3054881010199126, "grad_norm": 0.3310208022594452, "learning_rate": 0.0001, "loss": 1.6425, "step": 1258 }, { "epoch": 0.3057309373482273, "grad_norm": 0.37676697969436646, "learning_rate": 0.0001, "loss": 1.7901, "step": 1259 }, { "epoch": 0.305973773676542, "grad_norm": 0.3380137085914612, "learning_rate": 0.0001, "loss": 1.7139, "step": 1260 }, { "epoch": 0.3062166100048567, "grad_norm": 0.3388557732105255, "learning_rate": 0.0001, "loss": 1.7235, "step": 1261 }, { "epoch": 0.30645944633317146, "grad_norm": 0.31976181268692017, "learning_rate": 0.0001, "loss": 1.6238, "step": 1262 }, { "epoch": 0.30670228266148614, "grad_norm": 0.3477676808834076, "learning_rate": 0.0001, "loss": 1.7641, "step": 1263 }, { "epoch": 0.30694511898980087, "grad_norm": 0.3569043278694153, "learning_rate": 0.0001, "loss": 1.852, "step": 1264 }, { "epoch": 0.3071879553181156, "grad_norm": 0.3672388792037964, "learning_rate": 0.0001, "loss": 1.9131, "step": 1265 }, { "epoch": 0.3074307916464303, "grad_norm": 0.3455599546432495, "learning_rate": 0.0001, "loss": 1.5969, "step": 1266 }, { "epoch": 0.307673627974745, "grad_norm": 0.33061739802360535, "learning_rate": 0.0001, "loss": 1.5682, "step": 1267 }, { "epoch": 0.30791646430305974, "grad_norm": 0.34152230620384216, "learning_rate": 0.0001, "loss": 1.5829, "step": 1268 }, { "epoch": 0.3081593006313745, "grad_norm": 0.36030837893486023, "learning_rate": 0.0001, "loss": 1.696, "step": 1269 }, { "epoch": 0.30840213695968915, "grad_norm": 0.3398350179195404, "learning_rate": 0.0001, "loss": 1.9147, "step": 1270 }, { "epoch": 0.3086449732880039, "grad_norm": 0.33502107858657837, "learning_rate": 0.0001, "loss": 1.696, "step": 1271 }, { "epoch": 0.3088878096163186, "grad_norm": 0.34701743721961975, "learning_rate": 0.0001, "loss": 1.625, "step": 1272 }, { "epoch": 0.3091306459446333, "grad_norm": 0.3735635578632355, "learning_rate": 0.0001, "loss": 1.7718, "step": 1273 }, { "epoch": 0.30937348227294803, "grad_norm": 0.35327258706092834, "learning_rate": 0.0001, "loss": 1.7464, "step": 1274 }, { "epoch": 0.30961631860126276, "grad_norm": 0.35081592202186584, "learning_rate": 0.0001, "loss": 1.7941, "step": 1275 }, { "epoch": 0.30985915492957744, "grad_norm": 0.32337236404418945, "learning_rate": 0.0001, "loss": 1.5163, "step": 1276 }, { "epoch": 0.3101019912578922, "grad_norm": 0.32143720984458923, "learning_rate": 0.0001, "loss": 1.5397, "step": 1277 }, { "epoch": 0.3103448275862069, "grad_norm": 0.3316914439201355, "learning_rate": 0.0001, "loss": 1.7304, "step": 1278 }, { "epoch": 0.31058766391452164, "grad_norm": 0.336727112531662, "learning_rate": 0.0001, "loss": 1.6108, "step": 1279 }, { "epoch": 0.3108305002428363, "grad_norm": 0.33665451407432556, "learning_rate": 0.0001, "loss": 1.6865, "step": 1280 }, { "epoch": 0.31107333657115105, "grad_norm": 0.34888777136802673, "learning_rate": 0.0001, "loss": 1.707, "step": 1281 }, { "epoch": 0.3113161728994658, "grad_norm": 0.3641833961009979, "learning_rate": 0.0001, "loss": 1.9235, "step": 1282 }, { "epoch": 0.31155900922778046, "grad_norm": 0.3506174087524414, "learning_rate": 0.0001, "loss": 1.7019, "step": 1283 }, { "epoch": 0.3118018455560952, "grad_norm": 0.33513206243515015, "learning_rate": 0.0001, "loss": 1.7835, "step": 1284 }, { "epoch": 0.3120446818844099, "grad_norm": 0.31959009170532227, "learning_rate": 0.0001, "loss": 1.6274, "step": 1285 }, { "epoch": 0.3122875182127246, "grad_norm": 0.33899250626564026, "learning_rate": 0.0001, "loss": 1.6284, "step": 1286 }, { "epoch": 0.31253035454103933, "grad_norm": 0.32353276014328003, "learning_rate": 0.0001, "loss": 1.7145, "step": 1287 }, { "epoch": 0.31277319086935407, "grad_norm": 0.3442525565624237, "learning_rate": 0.0001, "loss": 1.8363, "step": 1288 }, { "epoch": 0.31301602719766874, "grad_norm": 0.3439512550830841, "learning_rate": 0.0001, "loss": 1.76, "step": 1289 }, { "epoch": 0.3132588635259835, "grad_norm": 0.32078948616981506, "learning_rate": 0.0001, "loss": 1.7175, "step": 1290 }, { "epoch": 0.3135016998542982, "grad_norm": 0.33406755328178406, "learning_rate": 0.0001, "loss": 1.8142, "step": 1291 }, { "epoch": 0.31374453618261294, "grad_norm": 0.36002907156944275, "learning_rate": 0.0001, "loss": 1.7645, "step": 1292 }, { "epoch": 0.3139873725109276, "grad_norm": 0.33843985199928284, "learning_rate": 0.0001, "loss": 1.6883, "step": 1293 }, { "epoch": 0.31423020883924235, "grad_norm": 0.33737096190452576, "learning_rate": 0.0001, "loss": 1.7249, "step": 1294 }, { "epoch": 0.3144730451675571, "grad_norm": 0.36410626769065857, "learning_rate": 0.0001, "loss": 1.7059, "step": 1295 }, { "epoch": 0.31471588149587176, "grad_norm": 0.35024207830429077, "learning_rate": 0.0001, "loss": 1.8031, "step": 1296 }, { "epoch": 0.3149587178241865, "grad_norm": 0.35997697710990906, "learning_rate": 0.0001, "loss": 1.8436, "step": 1297 }, { "epoch": 0.31520155415250123, "grad_norm": 0.34690675139427185, "learning_rate": 0.0001, "loss": 1.8562, "step": 1298 }, { "epoch": 0.3154443904808159, "grad_norm": 0.3096066415309906, "learning_rate": 0.0001, "loss": 1.3673, "step": 1299 }, { "epoch": 0.31568722680913064, "grad_norm": 0.3499063551425934, "learning_rate": 0.0001, "loss": 1.7165, "step": 1300 }, { "epoch": 0.31593006313744537, "grad_norm": 0.3726784288883209, "learning_rate": 0.0001, "loss": 1.9799, "step": 1301 }, { "epoch": 0.3161728994657601, "grad_norm": 0.32575857639312744, "learning_rate": 0.0001, "loss": 1.6414, "step": 1302 }, { "epoch": 0.3164157357940748, "grad_norm": 0.3620957136154175, "learning_rate": 0.0001, "loss": 1.7306, "step": 1303 }, { "epoch": 0.3166585721223895, "grad_norm": 0.3882763683795929, "learning_rate": 0.0001, "loss": 1.8216, "step": 1304 }, { "epoch": 0.31690140845070425, "grad_norm": 0.37228524684906006, "learning_rate": 0.0001, "loss": 1.9724, "step": 1305 }, { "epoch": 0.3171442447790189, "grad_norm": 0.3398188650608063, "learning_rate": 0.0001, "loss": 1.7181, "step": 1306 }, { "epoch": 0.31738708110733366, "grad_norm": 0.3388833701610565, "learning_rate": 0.0001, "loss": 1.6542, "step": 1307 }, { "epoch": 0.3176299174356484, "grad_norm": 0.3429461717605591, "learning_rate": 0.0001, "loss": 1.6822, "step": 1308 }, { "epoch": 0.31787275376396307, "grad_norm": 0.3804921805858612, "learning_rate": 0.0001, "loss": 2.0971, "step": 1309 }, { "epoch": 0.3181155900922778, "grad_norm": 0.3579254448413849, "learning_rate": 0.0001, "loss": 1.7412, "step": 1310 }, { "epoch": 0.31835842642059253, "grad_norm": 0.3545834422111511, "learning_rate": 0.0001, "loss": 1.7221, "step": 1311 }, { "epoch": 0.3186012627489072, "grad_norm": 0.3544105887413025, "learning_rate": 0.0001, "loss": 1.8662, "step": 1312 }, { "epoch": 0.31884409907722194, "grad_norm": 0.35376471281051636, "learning_rate": 0.0001, "loss": 1.7822, "step": 1313 }, { "epoch": 0.3190869354055367, "grad_norm": 0.34648221731185913, "learning_rate": 0.0001, "loss": 1.6543, "step": 1314 }, { "epoch": 0.3193297717338514, "grad_norm": 0.35739588737487793, "learning_rate": 0.0001, "loss": 1.7637, "step": 1315 }, { "epoch": 0.3195726080621661, "grad_norm": 0.3418010473251343, "learning_rate": 0.0001, "loss": 1.7071, "step": 1316 }, { "epoch": 0.3198154443904808, "grad_norm": 0.34581372141838074, "learning_rate": 0.0001, "loss": 1.7706, "step": 1317 }, { "epoch": 0.32005828071879555, "grad_norm": 0.3538050651550293, "learning_rate": 0.0001, "loss": 1.6932, "step": 1318 }, { "epoch": 0.32030111704711023, "grad_norm": 0.3454187214374542, "learning_rate": 0.0001, "loss": 1.719, "step": 1319 }, { "epoch": 0.32054395337542496, "grad_norm": 0.35107696056365967, "learning_rate": 0.0001, "loss": 1.7574, "step": 1320 }, { "epoch": 0.3207867897037397, "grad_norm": 0.3261638879776001, "learning_rate": 0.0001, "loss": 1.6096, "step": 1321 }, { "epoch": 0.32102962603205437, "grad_norm": 0.36752843856811523, "learning_rate": 0.0001, "loss": 1.8988, "step": 1322 }, { "epoch": 0.3212724623603691, "grad_norm": 0.3847307562828064, "learning_rate": 0.0001, "loss": 1.9061, "step": 1323 }, { "epoch": 0.32151529868868384, "grad_norm": 0.3572913110256195, "learning_rate": 0.0001, "loss": 1.7083, "step": 1324 }, { "epoch": 0.32175813501699857, "grad_norm": 0.34111347794532776, "learning_rate": 0.0001, "loss": 1.6437, "step": 1325 }, { "epoch": 0.32200097134531325, "grad_norm": 0.346483439207077, "learning_rate": 0.0001, "loss": 1.6687, "step": 1326 }, { "epoch": 0.322243807673628, "grad_norm": 0.35447895526885986, "learning_rate": 0.0001, "loss": 1.83, "step": 1327 }, { "epoch": 0.3224866440019427, "grad_norm": 0.3246619999408722, "learning_rate": 0.0001, "loss": 1.5981, "step": 1328 }, { "epoch": 0.3227294803302574, "grad_norm": 0.3411337435245514, "learning_rate": 0.0001, "loss": 1.8093, "step": 1329 }, { "epoch": 0.3229723166585721, "grad_norm": 0.33755388855934143, "learning_rate": 0.0001, "loss": 1.7882, "step": 1330 }, { "epoch": 0.32321515298688686, "grad_norm": 0.3567180335521698, "learning_rate": 0.0001, "loss": 1.6274, "step": 1331 }, { "epoch": 0.32345798931520153, "grad_norm": 0.3355996906757355, "learning_rate": 0.0001, "loss": 1.6726, "step": 1332 }, { "epoch": 0.32370082564351627, "grad_norm": 0.34801527857780457, "learning_rate": 0.0001, "loss": 1.651, "step": 1333 }, { "epoch": 0.323943661971831, "grad_norm": 0.34723374247550964, "learning_rate": 0.0001, "loss": 1.8538, "step": 1334 }, { "epoch": 0.3241864983001457, "grad_norm": 0.33615392446517944, "learning_rate": 0.0001, "loss": 1.5849, "step": 1335 }, { "epoch": 0.3244293346284604, "grad_norm": 0.3725658655166626, "learning_rate": 0.0001, "loss": 1.8235, "step": 1336 }, { "epoch": 0.32467217095677514, "grad_norm": 0.3329296410083771, "learning_rate": 0.0001, "loss": 1.7168, "step": 1337 }, { "epoch": 0.3249150072850899, "grad_norm": 0.3577701449394226, "learning_rate": 0.0001, "loss": 1.7251, "step": 1338 }, { "epoch": 0.32515784361340455, "grad_norm": 0.33400166034698486, "learning_rate": 0.0001, "loss": 1.7042, "step": 1339 }, { "epoch": 0.3254006799417193, "grad_norm": 0.3208601474761963, "learning_rate": 0.0001, "loss": 1.5577, "step": 1340 }, { "epoch": 0.325643516270034, "grad_norm": 0.35561156272888184, "learning_rate": 0.0001, "loss": 1.7421, "step": 1341 }, { "epoch": 0.3258863525983487, "grad_norm": 0.3534224331378937, "learning_rate": 0.0001, "loss": 1.6784, "step": 1342 }, { "epoch": 0.3261291889266634, "grad_norm": 0.34640082716941833, "learning_rate": 0.0001, "loss": 1.7314, "step": 1343 }, { "epoch": 0.32637202525497816, "grad_norm": 0.35193905234336853, "learning_rate": 0.0001, "loss": 1.7245, "step": 1344 }, { "epoch": 0.32661486158329284, "grad_norm": 0.3425352871417999, "learning_rate": 0.0001, "loss": 1.7848, "step": 1345 }, { "epoch": 0.32685769791160757, "grad_norm": 0.343161016702652, "learning_rate": 0.0001, "loss": 1.6993, "step": 1346 }, { "epoch": 0.3271005342399223, "grad_norm": 0.36429017782211304, "learning_rate": 0.0001, "loss": 1.7049, "step": 1347 }, { "epoch": 0.32734337056823704, "grad_norm": 0.3147446811199188, "learning_rate": 0.0001, "loss": 1.6569, "step": 1348 }, { "epoch": 0.3275862068965517, "grad_norm": 0.3437577188014984, "learning_rate": 0.0001, "loss": 1.6792, "step": 1349 }, { "epoch": 0.32782904322486645, "grad_norm": 0.37101826071739197, "learning_rate": 0.0001, "loss": 1.6529, "step": 1350 }, { "epoch": 0.3280718795531812, "grad_norm": 0.36280542612075806, "learning_rate": 0.0001, "loss": 1.7888, "step": 1351 }, { "epoch": 0.32831471588149586, "grad_norm": 0.3654901087284088, "learning_rate": 0.0001, "loss": 1.9293, "step": 1352 }, { "epoch": 0.3285575522098106, "grad_norm": 0.3405821621417999, "learning_rate": 0.0001, "loss": 1.5597, "step": 1353 }, { "epoch": 0.3288003885381253, "grad_norm": 0.33608195185661316, "learning_rate": 0.0001, "loss": 1.7485, "step": 1354 }, { "epoch": 0.32904322486644, "grad_norm": 0.3530266284942627, "learning_rate": 0.0001, "loss": 1.7163, "step": 1355 }, { "epoch": 0.32928606119475473, "grad_norm": 0.35584160685539246, "learning_rate": 0.0001, "loss": 1.6821, "step": 1356 }, { "epoch": 0.32952889752306946, "grad_norm": 0.33496901392936707, "learning_rate": 0.0001, "loss": 1.5494, "step": 1357 }, { "epoch": 0.32977173385138414, "grad_norm": 0.33359116315841675, "learning_rate": 0.0001, "loss": 1.7098, "step": 1358 }, { "epoch": 0.3300145701796989, "grad_norm": 0.34813758730888367, "learning_rate": 0.0001, "loss": 1.8438, "step": 1359 }, { "epoch": 0.3302574065080136, "grad_norm": 0.37358787655830383, "learning_rate": 0.0001, "loss": 1.8214, "step": 1360 }, { "epoch": 0.33050024283632834, "grad_norm": 0.3805057406425476, "learning_rate": 0.0001, "loss": 1.8504, "step": 1361 }, { "epoch": 0.330743079164643, "grad_norm": 0.35961398482322693, "learning_rate": 0.0001, "loss": 1.8235, "step": 1362 }, { "epoch": 0.33098591549295775, "grad_norm": 0.37374040484428406, "learning_rate": 0.0001, "loss": 1.8868, "step": 1363 }, { "epoch": 0.3312287518212725, "grad_norm": 0.33554673194885254, "learning_rate": 0.0001, "loss": 1.6591, "step": 1364 }, { "epoch": 0.33147158814958716, "grad_norm": 0.3829343318939209, "learning_rate": 0.0001, "loss": 1.9936, "step": 1365 }, { "epoch": 0.3317144244779019, "grad_norm": 0.35701921582221985, "learning_rate": 0.0001, "loss": 1.7347, "step": 1366 }, { "epoch": 0.3319572608062166, "grad_norm": 0.35286015272140503, "learning_rate": 0.0001, "loss": 1.7225, "step": 1367 }, { "epoch": 0.3322000971345313, "grad_norm": 0.3747299015522003, "learning_rate": 0.0001, "loss": 1.9376, "step": 1368 }, { "epoch": 0.33244293346284604, "grad_norm": 0.3595302402973175, "learning_rate": 0.0001, "loss": 1.7511, "step": 1369 }, { "epoch": 0.33268576979116077, "grad_norm": 0.36371472477912903, "learning_rate": 0.0001, "loss": 1.6936, "step": 1370 }, { "epoch": 0.3329286061194755, "grad_norm": 0.3578799068927765, "learning_rate": 0.0001, "loss": 1.7735, "step": 1371 }, { "epoch": 0.3331714424477902, "grad_norm": 0.3342965245246887, "learning_rate": 0.0001, "loss": 1.6468, "step": 1372 }, { "epoch": 0.3334142787761049, "grad_norm": 0.41231048107147217, "learning_rate": 0.0001, "loss": 1.7679, "step": 1373 }, { "epoch": 0.33365711510441964, "grad_norm": 0.3757196068763733, "learning_rate": 0.0001, "loss": 1.8346, "step": 1374 }, { "epoch": 0.3338999514327343, "grad_norm": 0.3343157470226288, "learning_rate": 0.0001, "loss": 1.5956, "step": 1375 }, { "epoch": 0.33414278776104905, "grad_norm": 0.3424040675163269, "learning_rate": 0.0001, "loss": 1.7438, "step": 1376 }, { "epoch": 0.3343856240893638, "grad_norm": 0.35042518377304077, "learning_rate": 0.0001, "loss": 1.6813, "step": 1377 }, { "epoch": 0.33462846041767846, "grad_norm": 0.3496730327606201, "learning_rate": 0.0001, "loss": 1.7828, "step": 1378 }, { "epoch": 0.3348712967459932, "grad_norm": 0.3514433801174164, "learning_rate": 0.0001, "loss": 1.6792, "step": 1379 }, { "epoch": 0.33511413307430793, "grad_norm": 0.37553372979164124, "learning_rate": 0.0001, "loss": 1.8507, "step": 1380 }, { "epoch": 0.3353569694026226, "grad_norm": 0.3467164933681488, "learning_rate": 0.0001, "loss": 1.7598, "step": 1381 }, { "epoch": 0.33559980573093734, "grad_norm": 0.3462772071361542, "learning_rate": 0.0001, "loss": 1.8121, "step": 1382 }, { "epoch": 0.3358426420592521, "grad_norm": 0.3283761441707611, "learning_rate": 0.0001, "loss": 1.6598, "step": 1383 }, { "epoch": 0.3360854783875668, "grad_norm": 0.34071117639541626, "learning_rate": 0.0001, "loss": 1.6984, "step": 1384 }, { "epoch": 0.3363283147158815, "grad_norm": 0.3350028395652771, "learning_rate": 0.0001, "loss": 1.7327, "step": 1385 }, { "epoch": 0.3365711510441962, "grad_norm": 0.33372920751571655, "learning_rate": 0.0001, "loss": 1.7262, "step": 1386 }, { "epoch": 0.33681398737251095, "grad_norm": 0.36559152603149414, "learning_rate": 0.0001, "loss": 1.6677, "step": 1387 }, { "epoch": 0.3370568237008256, "grad_norm": 0.3441181480884552, "learning_rate": 0.0001, "loss": 1.6399, "step": 1388 }, { "epoch": 0.33729966002914036, "grad_norm": 0.37654390931129456, "learning_rate": 0.0001, "loss": 1.8389, "step": 1389 }, { "epoch": 0.3375424963574551, "grad_norm": 0.3329363465309143, "learning_rate": 0.0001, "loss": 1.6735, "step": 1390 }, { "epoch": 0.33778533268576977, "grad_norm": 0.34903109073638916, "learning_rate": 0.0001, "loss": 1.7471, "step": 1391 }, { "epoch": 0.3380281690140845, "grad_norm": 0.31249791383743286, "learning_rate": 0.0001, "loss": 1.659, "step": 1392 }, { "epoch": 0.33827100534239923, "grad_norm": 0.3493087887763977, "learning_rate": 0.0001, "loss": 1.7833, "step": 1393 }, { "epoch": 0.33851384167071397, "grad_norm": 0.3338823616504669, "learning_rate": 0.0001, "loss": 1.7842, "step": 1394 }, { "epoch": 0.33875667799902864, "grad_norm": 0.33419299125671387, "learning_rate": 0.0001, "loss": 1.7272, "step": 1395 }, { "epoch": 0.3389995143273434, "grad_norm": 0.35470160841941833, "learning_rate": 0.0001, "loss": 1.5258, "step": 1396 }, { "epoch": 0.3392423506556581, "grad_norm": 0.33839255571365356, "learning_rate": 0.0001, "loss": 1.7123, "step": 1397 }, { "epoch": 0.3394851869839728, "grad_norm": 0.36841094493865967, "learning_rate": 0.0001, "loss": 1.8869, "step": 1398 }, { "epoch": 0.3397280233122875, "grad_norm": 0.3372727334499359, "learning_rate": 0.0001, "loss": 1.6797, "step": 1399 }, { "epoch": 0.33997085964060225, "grad_norm": 0.30915823578834534, "learning_rate": 0.0001, "loss": 1.5637, "step": 1400 }, { "epoch": 0.34021369596891693, "grad_norm": 0.31683653593063354, "learning_rate": 0.0001, "loss": 1.6456, "step": 1401 }, { "epoch": 0.34045653229723166, "grad_norm": 0.32861605286598206, "learning_rate": 0.0001, "loss": 1.7809, "step": 1402 }, { "epoch": 0.3406993686255464, "grad_norm": 0.3321719765663147, "learning_rate": 0.0001, "loss": 1.7957, "step": 1403 }, { "epoch": 0.34094220495386107, "grad_norm": 0.34554940462112427, "learning_rate": 0.0001, "loss": 1.6703, "step": 1404 }, { "epoch": 0.3411850412821758, "grad_norm": 0.4025746285915375, "learning_rate": 0.0001, "loss": 1.8275, "step": 1405 }, { "epoch": 0.34142787761049054, "grad_norm": 0.34001925587654114, "learning_rate": 0.0001, "loss": 1.6531, "step": 1406 }, { "epoch": 0.34167071393880527, "grad_norm": 0.38130107522010803, "learning_rate": 0.0001, "loss": 1.8164, "step": 1407 }, { "epoch": 0.34191355026711995, "grad_norm": 0.3359420597553253, "learning_rate": 0.0001, "loss": 1.5757, "step": 1408 }, { "epoch": 0.3421563865954347, "grad_norm": 0.32789063453674316, "learning_rate": 0.0001, "loss": 1.5708, "step": 1409 }, { "epoch": 0.3423992229237494, "grad_norm": 0.3676958680152893, "learning_rate": 0.0001, "loss": 1.7915, "step": 1410 }, { "epoch": 0.3426420592520641, "grad_norm": 0.376945436000824, "learning_rate": 0.0001, "loss": 1.8536, "step": 1411 }, { "epoch": 0.3428848955803788, "grad_norm": 0.32787764072418213, "learning_rate": 0.0001, "loss": 1.7462, "step": 1412 }, { "epoch": 0.34312773190869356, "grad_norm": 0.40259850025177, "learning_rate": 0.0001, "loss": 1.8247, "step": 1413 }, { "epoch": 0.34337056823700823, "grad_norm": 0.36955440044403076, "learning_rate": 0.0001, "loss": 1.8181, "step": 1414 }, { "epoch": 0.34361340456532297, "grad_norm": 0.336770623922348, "learning_rate": 0.0001, "loss": 1.7475, "step": 1415 }, { "epoch": 0.3438562408936377, "grad_norm": 0.360385537147522, "learning_rate": 0.0001, "loss": 1.7349, "step": 1416 }, { "epoch": 0.3440990772219524, "grad_norm": 0.36137905716896057, "learning_rate": 0.0001, "loss": 1.7219, "step": 1417 }, { "epoch": 0.3443419135502671, "grad_norm": 0.3530389368534088, "learning_rate": 0.0001, "loss": 1.7545, "step": 1418 }, { "epoch": 0.34458474987858184, "grad_norm": 0.3292929232120514, "learning_rate": 0.0001, "loss": 1.7318, "step": 1419 }, { "epoch": 0.3448275862068966, "grad_norm": 0.3354680836200714, "learning_rate": 0.0001, "loss": 1.7757, "step": 1420 }, { "epoch": 0.34507042253521125, "grad_norm": 0.3158077895641327, "learning_rate": 0.0001, "loss": 1.4464, "step": 1421 }, { "epoch": 0.345313258863526, "grad_norm": 0.3723140060901642, "learning_rate": 0.0001, "loss": 1.7779, "step": 1422 }, { "epoch": 0.3455560951918407, "grad_norm": 0.3340955078601837, "learning_rate": 0.0001, "loss": 1.8542, "step": 1423 }, { "epoch": 0.3457989315201554, "grad_norm": 0.3777885437011719, "learning_rate": 0.0001, "loss": 1.7496, "step": 1424 }, { "epoch": 0.3460417678484701, "grad_norm": 0.352083295583725, "learning_rate": 0.0001, "loss": 1.8658, "step": 1425 }, { "epoch": 0.34628460417678486, "grad_norm": 0.3653123378753662, "learning_rate": 0.0001, "loss": 1.7476, "step": 1426 }, { "epoch": 0.34652744050509954, "grad_norm": 0.35643982887268066, "learning_rate": 0.0001, "loss": 1.6245, "step": 1427 }, { "epoch": 0.34677027683341427, "grad_norm": 0.3376123309135437, "learning_rate": 0.0001, "loss": 1.7228, "step": 1428 }, { "epoch": 0.347013113161729, "grad_norm": 0.3336902856826782, "learning_rate": 0.0001, "loss": 1.5102, "step": 1429 }, { "epoch": 0.34725594949004374, "grad_norm": 0.35413724184036255, "learning_rate": 0.0001, "loss": 1.7126, "step": 1430 }, { "epoch": 0.3474987858183584, "grad_norm": 0.3520013093948364, "learning_rate": 0.0001, "loss": 1.7388, "step": 1431 }, { "epoch": 0.34774162214667315, "grad_norm": 0.3354269564151764, "learning_rate": 0.0001, "loss": 1.7037, "step": 1432 }, { "epoch": 0.3479844584749879, "grad_norm": 0.37913069128990173, "learning_rate": 0.0001, "loss": 1.615, "step": 1433 }, { "epoch": 0.34822729480330256, "grad_norm": 0.35012054443359375, "learning_rate": 0.0001, "loss": 1.7384, "step": 1434 }, { "epoch": 0.3484701311316173, "grad_norm": 0.32427114248275757, "learning_rate": 0.0001, "loss": 1.5869, "step": 1435 }, { "epoch": 0.348712967459932, "grad_norm": 0.35008156299591064, "learning_rate": 0.0001, "loss": 1.7277, "step": 1436 }, { "epoch": 0.3489558037882467, "grad_norm": 0.3610100746154785, "learning_rate": 0.0001, "loss": 1.654, "step": 1437 }, { "epoch": 0.34919864011656143, "grad_norm": 0.34857314825057983, "learning_rate": 0.0001, "loss": 1.748, "step": 1438 }, { "epoch": 0.34944147644487616, "grad_norm": 0.3715338706970215, "learning_rate": 0.0001, "loss": 1.5845, "step": 1439 }, { "epoch": 0.34968431277319084, "grad_norm": 0.3498457074165344, "learning_rate": 0.0001, "loss": 1.78, "step": 1440 }, { "epoch": 0.3499271491015056, "grad_norm": 0.33495819568634033, "learning_rate": 0.0001, "loss": 1.6412, "step": 1441 }, { "epoch": 0.3501699854298203, "grad_norm": 0.38084495067596436, "learning_rate": 0.0001, "loss": 1.5973, "step": 1442 }, { "epoch": 0.35041282175813504, "grad_norm": 0.348736047744751, "learning_rate": 0.0001, "loss": 1.739, "step": 1443 }, { "epoch": 0.3506556580864497, "grad_norm": 0.33308863639831543, "learning_rate": 0.0001, "loss": 1.7768, "step": 1444 }, { "epoch": 0.35089849441476445, "grad_norm": 0.3505055606365204, "learning_rate": 0.0001, "loss": 1.7172, "step": 1445 }, { "epoch": 0.3511413307430792, "grad_norm": 0.3615930676460266, "learning_rate": 0.0001, "loss": 1.8188, "step": 1446 }, { "epoch": 0.35138416707139386, "grad_norm": 0.33312997221946716, "learning_rate": 0.0001, "loss": 1.6119, "step": 1447 }, { "epoch": 0.3516270033997086, "grad_norm": 0.34861356019973755, "learning_rate": 0.0001, "loss": 1.7229, "step": 1448 }, { "epoch": 0.3518698397280233, "grad_norm": 0.34396806359291077, "learning_rate": 0.0001, "loss": 1.6834, "step": 1449 }, { "epoch": 0.352112676056338, "grad_norm": 0.3472231924533844, "learning_rate": 0.0001, "loss": 1.6941, "step": 1450 }, { "epoch": 0.35235551238465274, "grad_norm": 0.33803194761276245, "learning_rate": 0.0001, "loss": 1.663, "step": 1451 }, { "epoch": 0.35259834871296747, "grad_norm": 0.3511747419834137, "learning_rate": 0.0001, "loss": 1.7896, "step": 1452 }, { "epoch": 0.3528411850412822, "grad_norm": 0.3400138318538666, "learning_rate": 0.0001, "loss": 1.7776, "step": 1453 }, { "epoch": 0.3530840213695969, "grad_norm": 0.34329378604888916, "learning_rate": 0.0001, "loss": 1.7485, "step": 1454 }, { "epoch": 0.3533268576979116, "grad_norm": 0.3581506311893463, "learning_rate": 0.0001, "loss": 1.8759, "step": 1455 }, { "epoch": 0.35356969402622634, "grad_norm": 0.3372110426425934, "learning_rate": 0.0001, "loss": 1.7721, "step": 1456 }, { "epoch": 0.353812530354541, "grad_norm": 0.33393704891204834, "learning_rate": 0.0001, "loss": 1.7497, "step": 1457 }, { "epoch": 0.35405536668285575, "grad_norm": 0.3570740818977356, "learning_rate": 0.0001, "loss": 1.8485, "step": 1458 }, { "epoch": 0.3542982030111705, "grad_norm": 0.3288159668445587, "learning_rate": 0.0001, "loss": 1.7145, "step": 1459 }, { "epoch": 0.35454103933948516, "grad_norm": 0.352760910987854, "learning_rate": 0.0001, "loss": 1.7081, "step": 1460 }, { "epoch": 0.3547838756677999, "grad_norm": 0.33334290981292725, "learning_rate": 0.0001, "loss": 1.6774, "step": 1461 }, { "epoch": 0.35502671199611463, "grad_norm": 0.36578717827796936, "learning_rate": 0.0001, "loss": 1.6655, "step": 1462 }, { "epoch": 0.3552695483244293, "grad_norm": 0.34803271293640137, "learning_rate": 0.0001, "loss": 1.8048, "step": 1463 }, { "epoch": 0.35551238465274404, "grad_norm": 0.34706413745880127, "learning_rate": 0.0001, "loss": 1.666, "step": 1464 }, { "epoch": 0.3557552209810588, "grad_norm": 0.3548772633075714, "learning_rate": 0.0001, "loss": 1.7261, "step": 1465 }, { "epoch": 0.3559980573093735, "grad_norm": 0.34797123074531555, "learning_rate": 0.0001, "loss": 1.7126, "step": 1466 }, { "epoch": 0.3562408936376882, "grad_norm": 0.3482211232185364, "learning_rate": 0.0001, "loss": 1.703, "step": 1467 }, { "epoch": 0.3564837299660029, "grad_norm": 0.380947083234787, "learning_rate": 0.0001, "loss": 1.8435, "step": 1468 }, { "epoch": 0.35672656629431765, "grad_norm": 0.33425045013427734, "learning_rate": 0.0001, "loss": 1.5974, "step": 1469 }, { "epoch": 0.3569694026226323, "grad_norm": 0.33053338527679443, "learning_rate": 0.0001, "loss": 1.5485, "step": 1470 }, { "epoch": 0.35721223895094706, "grad_norm": 0.33164921402931213, "learning_rate": 0.0001, "loss": 1.7375, "step": 1471 }, { "epoch": 0.3574550752792618, "grad_norm": 0.3542589843273163, "learning_rate": 0.0001, "loss": 1.8596, "step": 1472 }, { "epoch": 0.35769791160757647, "grad_norm": 0.36848315596580505, "learning_rate": 0.0001, "loss": 1.8971, "step": 1473 }, { "epoch": 0.3579407479358912, "grad_norm": 0.3263801336288452, "learning_rate": 0.0001, "loss": 1.6098, "step": 1474 }, { "epoch": 0.35818358426420593, "grad_norm": 0.3408321142196655, "learning_rate": 0.0001, "loss": 1.7054, "step": 1475 }, { "epoch": 0.35842642059252067, "grad_norm": 0.3380805552005768, "learning_rate": 0.0001, "loss": 1.7244, "step": 1476 }, { "epoch": 0.35866925692083534, "grad_norm": 0.3343592882156372, "learning_rate": 0.0001, "loss": 1.6569, "step": 1477 }, { "epoch": 0.3589120932491501, "grad_norm": 0.3570973873138428, "learning_rate": 0.0001, "loss": 1.7114, "step": 1478 }, { "epoch": 0.3591549295774648, "grad_norm": 0.3254868984222412, "learning_rate": 0.0001, "loss": 1.7063, "step": 1479 }, { "epoch": 0.3593977659057795, "grad_norm": 0.33075040578842163, "learning_rate": 0.0001, "loss": 1.6969, "step": 1480 }, { "epoch": 0.3596406022340942, "grad_norm": 0.3524262011051178, "learning_rate": 0.0001, "loss": 1.7907, "step": 1481 }, { "epoch": 0.35988343856240895, "grad_norm": 0.36520183086395264, "learning_rate": 0.0001, "loss": 1.8654, "step": 1482 }, { "epoch": 0.36012627489072363, "grad_norm": 0.3641956150531769, "learning_rate": 0.0001, "loss": 1.8671, "step": 1483 }, { "epoch": 0.36036911121903836, "grad_norm": 0.3406789004802704, "learning_rate": 0.0001, "loss": 1.7531, "step": 1484 }, { "epoch": 0.3606119475473531, "grad_norm": 0.33503258228302, "learning_rate": 0.0001, "loss": 1.6198, "step": 1485 }, { "epoch": 0.3608547838756678, "grad_norm": 0.3479795753955841, "learning_rate": 0.0001, "loss": 1.8708, "step": 1486 }, { "epoch": 0.3610976202039825, "grad_norm": 0.33785268664360046, "learning_rate": 0.0001, "loss": 1.772, "step": 1487 }, { "epoch": 0.36134045653229724, "grad_norm": 0.37516918778419495, "learning_rate": 0.0001, "loss": 1.7923, "step": 1488 }, { "epoch": 0.36158329286061197, "grad_norm": 0.35505229234695435, "learning_rate": 0.0001, "loss": 1.8019, "step": 1489 }, { "epoch": 0.36182612918892665, "grad_norm": 0.3542248606681824, "learning_rate": 0.0001, "loss": 1.7801, "step": 1490 }, { "epoch": 0.3620689655172414, "grad_norm": 0.36539554595947266, "learning_rate": 0.0001, "loss": 1.8357, "step": 1491 }, { "epoch": 0.3623118018455561, "grad_norm": 0.33330610394477844, "learning_rate": 0.0001, "loss": 1.6645, "step": 1492 }, { "epoch": 0.3625546381738708, "grad_norm": 0.331411749124527, "learning_rate": 0.0001, "loss": 1.6139, "step": 1493 }, { "epoch": 0.3627974745021855, "grad_norm": 0.35043078660964966, "learning_rate": 0.0001, "loss": 1.7627, "step": 1494 }, { "epoch": 0.36304031083050026, "grad_norm": 0.33841216564178467, "learning_rate": 0.0001, "loss": 1.6763, "step": 1495 }, { "epoch": 0.36328314715881493, "grad_norm": 0.3518892526626587, "learning_rate": 0.0001, "loss": 1.6667, "step": 1496 }, { "epoch": 0.36352598348712967, "grad_norm": 0.3356791138648987, "learning_rate": 0.0001, "loss": 1.7648, "step": 1497 }, { "epoch": 0.3637688198154444, "grad_norm": 0.33415839076042175, "learning_rate": 0.0001, "loss": 1.6838, "step": 1498 }, { "epoch": 0.36401165614375913, "grad_norm": 0.35371869802474976, "learning_rate": 0.0001, "loss": 1.7046, "step": 1499 }, { "epoch": 0.3642544924720738, "grad_norm": 0.3273749053478241, "learning_rate": 0.0001, "loss": 1.5889, "step": 1500 }, { "epoch": 0.36449732880038854, "grad_norm": 0.39279237389564514, "learning_rate": 0.0001, "loss": 1.8744, "step": 1501 }, { "epoch": 0.3647401651287033, "grad_norm": 0.3533165752887726, "learning_rate": 0.0001, "loss": 1.7483, "step": 1502 }, { "epoch": 0.36498300145701795, "grad_norm": 0.34811070561408997, "learning_rate": 0.0001, "loss": 1.624, "step": 1503 }, { "epoch": 0.3652258377853327, "grad_norm": 0.36167362332344055, "learning_rate": 0.0001, "loss": 1.6942, "step": 1504 }, { "epoch": 0.3654686741136474, "grad_norm": 0.33047184348106384, "learning_rate": 0.0001, "loss": 1.5787, "step": 1505 }, { "epoch": 0.3657115104419621, "grad_norm": 0.36502954363822937, "learning_rate": 0.0001, "loss": 1.76, "step": 1506 }, { "epoch": 0.36595434677027683, "grad_norm": 0.34943050146102905, "learning_rate": 0.0001, "loss": 1.7061, "step": 1507 }, { "epoch": 0.36619718309859156, "grad_norm": 0.3463122546672821, "learning_rate": 0.0001, "loss": 1.8641, "step": 1508 }, { "epoch": 0.36644001942690624, "grad_norm": 0.3423214256763458, "learning_rate": 0.0001, "loss": 1.6902, "step": 1509 }, { "epoch": 0.36668285575522097, "grad_norm": 0.3398582339286804, "learning_rate": 0.0001, "loss": 1.569, "step": 1510 }, { "epoch": 0.3669256920835357, "grad_norm": 0.3707105815410614, "learning_rate": 0.0001, "loss": 1.8673, "step": 1511 }, { "epoch": 0.36716852841185044, "grad_norm": 0.32776641845703125, "learning_rate": 0.0001, "loss": 1.5771, "step": 1512 }, { "epoch": 0.3674113647401651, "grad_norm": 0.3616585433483124, "learning_rate": 0.0001, "loss": 1.7499, "step": 1513 }, { "epoch": 0.36765420106847985, "grad_norm": 0.3802650272846222, "learning_rate": 0.0001, "loss": 1.8077, "step": 1514 }, { "epoch": 0.3678970373967946, "grad_norm": 0.3210430145263672, "learning_rate": 0.0001, "loss": 1.5099, "step": 1515 }, { "epoch": 0.36813987372510926, "grad_norm": 0.33786076307296753, "learning_rate": 0.0001, "loss": 1.7593, "step": 1516 }, { "epoch": 0.368382710053424, "grad_norm": 0.3465520441532135, "learning_rate": 0.0001, "loss": 1.5437, "step": 1517 }, { "epoch": 0.3686255463817387, "grad_norm": 0.3694959580898285, "learning_rate": 0.0001, "loss": 1.6638, "step": 1518 }, { "epoch": 0.3688683827100534, "grad_norm": 0.3485603332519531, "learning_rate": 0.0001, "loss": 1.6426, "step": 1519 }, { "epoch": 0.36911121903836813, "grad_norm": 0.3477795422077179, "learning_rate": 0.0001, "loss": 1.851, "step": 1520 }, { "epoch": 0.36935405536668287, "grad_norm": 0.3689527213573456, "learning_rate": 0.0001, "loss": 1.8298, "step": 1521 }, { "epoch": 0.3695968916949976, "grad_norm": 0.337512344121933, "learning_rate": 0.0001, "loss": 1.7631, "step": 1522 }, { "epoch": 0.3698397280233123, "grad_norm": 0.3260522484779358, "learning_rate": 0.0001, "loss": 1.5334, "step": 1523 }, { "epoch": 0.370082564351627, "grad_norm": 0.3651309013366699, "learning_rate": 0.0001, "loss": 1.766, "step": 1524 }, { "epoch": 0.37032540067994174, "grad_norm": 0.345079630613327, "learning_rate": 0.0001, "loss": 1.7777, "step": 1525 }, { "epoch": 0.3705682370082564, "grad_norm": 0.36292821168899536, "learning_rate": 0.0001, "loss": 1.7766, "step": 1526 }, { "epoch": 0.37081107333657115, "grad_norm": 0.34273114800453186, "learning_rate": 0.0001, "loss": 1.8465, "step": 1527 }, { "epoch": 0.3710539096648859, "grad_norm": 0.3660784363746643, "learning_rate": 0.0001, "loss": 1.7597, "step": 1528 }, { "epoch": 0.37129674599320056, "grad_norm": 0.3469104468822479, "learning_rate": 0.0001, "loss": 1.6156, "step": 1529 }, { "epoch": 0.3715395823215153, "grad_norm": 0.3330024778842926, "learning_rate": 0.0001, "loss": 1.6671, "step": 1530 }, { "epoch": 0.37178241864983, "grad_norm": 0.3564825654029846, "learning_rate": 0.0001, "loss": 1.814, "step": 1531 }, { "epoch": 0.3720252549781447, "grad_norm": 0.3567167818546295, "learning_rate": 0.0001, "loss": 1.7433, "step": 1532 }, { "epoch": 0.37226809130645944, "grad_norm": 0.3620092272758484, "learning_rate": 0.0001, "loss": 1.7187, "step": 1533 }, { "epoch": 0.37251092763477417, "grad_norm": 0.36428067088127136, "learning_rate": 0.0001, "loss": 1.7305, "step": 1534 }, { "epoch": 0.3727537639630889, "grad_norm": 0.3668808043003082, "learning_rate": 0.0001, "loss": 1.7448, "step": 1535 }, { "epoch": 0.3729966002914036, "grad_norm": 0.35790756344795227, "learning_rate": 0.0001, "loss": 1.6841, "step": 1536 }, { "epoch": 0.3732394366197183, "grad_norm": 0.3803383409976959, "learning_rate": 0.0001, "loss": 1.6986, "step": 1537 }, { "epoch": 0.37348227294803304, "grad_norm": 0.35111626982688904, "learning_rate": 0.0001, "loss": 1.6785, "step": 1538 }, { "epoch": 0.3737251092763477, "grad_norm": 0.3548724055290222, "learning_rate": 0.0001, "loss": 1.6832, "step": 1539 }, { "epoch": 0.37396794560466246, "grad_norm": 0.3986164629459381, "learning_rate": 0.0001, "loss": 1.8182, "step": 1540 }, { "epoch": 0.3742107819329772, "grad_norm": 0.35997113585472107, "learning_rate": 0.0001, "loss": 1.8724, "step": 1541 }, { "epoch": 0.37445361826129187, "grad_norm": 0.35795292258262634, "learning_rate": 0.0001, "loss": 1.7193, "step": 1542 }, { "epoch": 0.3746964545896066, "grad_norm": 0.35861730575561523, "learning_rate": 0.0001, "loss": 1.7895, "step": 1543 }, { "epoch": 0.37493929091792133, "grad_norm": 0.3538043200969696, "learning_rate": 0.0001, "loss": 1.7324, "step": 1544 }, { "epoch": 0.37518212724623606, "grad_norm": 0.3179115355014801, "learning_rate": 0.0001, "loss": 1.728, "step": 1545 }, { "epoch": 0.37542496357455074, "grad_norm": 0.3643108606338501, "learning_rate": 0.0001, "loss": 1.7634, "step": 1546 }, { "epoch": 0.3756677999028655, "grad_norm": 0.342984676361084, "learning_rate": 0.0001, "loss": 1.7601, "step": 1547 }, { "epoch": 0.3759106362311802, "grad_norm": 0.3417559266090393, "learning_rate": 0.0001, "loss": 1.7127, "step": 1548 }, { "epoch": 0.3761534725594949, "grad_norm": 0.3582463264465332, "learning_rate": 0.0001, "loss": 1.8364, "step": 1549 }, { "epoch": 0.3763963088878096, "grad_norm": 0.3490729331970215, "learning_rate": 0.0001, "loss": 1.6524, "step": 1550 }, { "epoch": 0.37663914521612435, "grad_norm": 0.33228304982185364, "learning_rate": 0.0001, "loss": 1.6648, "step": 1551 }, { "epoch": 0.376881981544439, "grad_norm": 0.3435888886451721, "learning_rate": 0.0001, "loss": 1.6654, "step": 1552 }, { "epoch": 0.37712481787275376, "grad_norm": 0.34257256984710693, "learning_rate": 0.0001, "loss": 1.6772, "step": 1553 }, { "epoch": 0.3773676542010685, "grad_norm": 0.3472994267940521, "learning_rate": 0.0001, "loss": 1.6576, "step": 1554 }, { "epoch": 0.37761049052938317, "grad_norm": 0.36370810866355896, "learning_rate": 0.0001, "loss": 1.7062, "step": 1555 }, { "epoch": 0.3778533268576979, "grad_norm": 0.33490875363349915, "learning_rate": 0.0001, "loss": 1.7911, "step": 1556 }, { "epoch": 0.37809616318601263, "grad_norm": 0.3443796932697296, "learning_rate": 0.0001, "loss": 1.6962, "step": 1557 }, { "epoch": 0.37833899951432737, "grad_norm": 0.3875202536582947, "learning_rate": 0.0001, "loss": 1.7595, "step": 1558 }, { "epoch": 0.37858183584264204, "grad_norm": 0.3398173451423645, "learning_rate": 0.0001, "loss": 1.7226, "step": 1559 }, { "epoch": 0.3788246721709568, "grad_norm": 0.3591778576374054, "learning_rate": 0.0001, "loss": 1.7631, "step": 1560 }, { "epoch": 0.3790675084992715, "grad_norm": 0.3375695049762726, "learning_rate": 0.0001, "loss": 1.7873, "step": 1561 }, { "epoch": 0.3793103448275862, "grad_norm": 0.3466924726963043, "learning_rate": 0.0001, "loss": 1.8187, "step": 1562 }, { "epoch": 0.3795531811559009, "grad_norm": 0.3472690284252167, "learning_rate": 0.0001, "loss": 1.7919, "step": 1563 }, { "epoch": 0.37979601748421565, "grad_norm": 0.3553646206855774, "learning_rate": 0.0001, "loss": 1.7024, "step": 1564 }, { "epoch": 0.38003885381253033, "grad_norm": 0.36639171838760376, "learning_rate": 0.0001, "loss": 1.7116, "step": 1565 }, { "epoch": 0.38028169014084506, "grad_norm": 0.3448989689350128, "learning_rate": 0.0001, "loss": 1.8521, "step": 1566 }, { "epoch": 0.3805245264691598, "grad_norm": 0.3462245762348175, "learning_rate": 0.0001, "loss": 1.7795, "step": 1567 }, { "epoch": 0.38076736279747453, "grad_norm": 0.3502362370491028, "learning_rate": 0.0001, "loss": 1.8843, "step": 1568 }, { "epoch": 0.3810101991257892, "grad_norm": 0.36486515402793884, "learning_rate": 0.0001, "loss": 1.7465, "step": 1569 }, { "epoch": 0.38125303545410394, "grad_norm": 0.3245135545730591, "learning_rate": 0.0001, "loss": 1.5685, "step": 1570 }, { "epoch": 0.38149587178241867, "grad_norm": 0.33357447385787964, "learning_rate": 0.0001, "loss": 1.849, "step": 1571 }, { "epoch": 0.38173870811073335, "grad_norm": 0.3470303416252136, "learning_rate": 0.0001, "loss": 1.7443, "step": 1572 }, { "epoch": 0.3819815444390481, "grad_norm": 0.3471367061138153, "learning_rate": 0.0001, "loss": 1.6293, "step": 1573 }, { "epoch": 0.3822243807673628, "grad_norm": 0.32362157106399536, "learning_rate": 0.0001, "loss": 1.557, "step": 1574 }, { "epoch": 0.3824672170956775, "grad_norm": 0.33684155344963074, "learning_rate": 0.0001, "loss": 1.7009, "step": 1575 }, { "epoch": 0.3827100534239922, "grad_norm": 0.3579930067062378, "learning_rate": 0.0001, "loss": 1.8257, "step": 1576 }, { "epoch": 0.38295288975230696, "grad_norm": 0.3533737063407898, "learning_rate": 0.0001, "loss": 1.8456, "step": 1577 }, { "epoch": 0.38319572608062163, "grad_norm": 0.36287230253219604, "learning_rate": 0.0001, "loss": 1.7177, "step": 1578 }, { "epoch": 0.38343856240893637, "grad_norm": 0.35856160521507263, "learning_rate": 0.0001, "loss": 1.7155, "step": 1579 }, { "epoch": 0.3836813987372511, "grad_norm": 0.3617909550666809, "learning_rate": 0.0001, "loss": 1.7741, "step": 1580 }, { "epoch": 0.38392423506556583, "grad_norm": 0.35343724489212036, "learning_rate": 0.0001, "loss": 1.7358, "step": 1581 }, { "epoch": 0.3841670713938805, "grad_norm": 0.3512588441371918, "learning_rate": 0.0001, "loss": 1.8715, "step": 1582 }, { "epoch": 0.38440990772219524, "grad_norm": 0.3507743775844574, "learning_rate": 0.0001, "loss": 1.7062, "step": 1583 }, { "epoch": 0.38465274405051, "grad_norm": 0.36896374821662903, "learning_rate": 0.0001, "loss": 1.7494, "step": 1584 }, { "epoch": 0.38489558037882465, "grad_norm": 0.3897750675678253, "learning_rate": 0.0001, "loss": 1.7953, "step": 1585 }, { "epoch": 0.3851384167071394, "grad_norm": 0.3406773805618286, "learning_rate": 0.0001, "loss": 1.7495, "step": 1586 }, { "epoch": 0.3853812530354541, "grad_norm": 0.35307011008262634, "learning_rate": 0.0001, "loss": 1.6683, "step": 1587 }, { "epoch": 0.3856240893637688, "grad_norm": 0.32022756338119507, "learning_rate": 0.0001, "loss": 1.5555, "step": 1588 }, { "epoch": 0.38586692569208353, "grad_norm": 0.3569686710834503, "learning_rate": 0.0001, "loss": 1.8626, "step": 1589 }, { "epoch": 0.38610976202039826, "grad_norm": 0.3682452142238617, "learning_rate": 0.0001, "loss": 1.8699, "step": 1590 }, { "epoch": 0.386352598348713, "grad_norm": 0.3267800211906433, "learning_rate": 0.0001, "loss": 1.5346, "step": 1591 }, { "epoch": 0.38659543467702767, "grad_norm": 0.34441307187080383, "learning_rate": 0.0001, "loss": 1.7764, "step": 1592 }, { "epoch": 0.3868382710053424, "grad_norm": 0.33361029624938965, "learning_rate": 0.0001, "loss": 1.6749, "step": 1593 }, { "epoch": 0.38708110733365714, "grad_norm": 0.34326493740081787, "learning_rate": 0.0001, "loss": 1.783, "step": 1594 }, { "epoch": 0.3873239436619718, "grad_norm": 0.3545917868614197, "learning_rate": 0.0001, "loss": 1.7397, "step": 1595 }, { "epoch": 0.38756677999028655, "grad_norm": 0.3923596143722534, "learning_rate": 0.0001, "loss": 1.7354, "step": 1596 }, { "epoch": 0.3878096163186013, "grad_norm": 0.35300305485725403, "learning_rate": 0.0001, "loss": 1.7267, "step": 1597 }, { "epoch": 0.38805245264691596, "grad_norm": 0.42850735783576965, "learning_rate": 0.0001, "loss": 1.8924, "step": 1598 }, { "epoch": 0.3882952889752307, "grad_norm": 0.361499160528183, "learning_rate": 0.0001, "loss": 1.7558, "step": 1599 }, { "epoch": 0.3885381253035454, "grad_norm": 0.35589104890823364, "learning_rate": 0.0001, "loss": 1.634, "step": 1600 }, { "epoch": 0.3887809616318601, "grad_norm": 0.37668150663375854, "learning_rate": 0.0001, "loss": 1.799, "step": 1601 }, { "epoch": 0.38902379796017483, "grad_norm": 0.34165313839912415, "learning_rate": 0.0001, "loss": 1.7319, "step": 1602 }, { "epoch": 0.38926663428848957, "grad_norm": 0.341299831867218, "learning_rate": 0.0001, "loss": 1.7136, "step": 1603 }, { "epoch": 0.3895094706168043, "grad_norm": 0.34204524755477905, "learning_rate": 0.0001, "loss": 1.5748, "step": 1604 }, { "epoch": 0.389752306945119, "grad_norm": 0.3674558401107788, "learning_rate": 0.0001, "loss": 1.7721, "step": 1605 }, { "epoch": 0.3899951432734337, "grad_norm": 0.33253777027130127, "learning_rate": 0.0001, "loss": 1.6459, "step": 1606 }, { "epoch": 0.39023797960174844, "grad_norm": 0.36387547850608826, "learning_rate": 0.0001, "loss": 1.6895, "step": 1607 }, { "epoch": 0.3904808159300631, "grad_norm": 0.3665448725223541, "learning_rate": 0.0001, "loss": 1.7362, "step": 1608 }, { "epoch": 0.39072365225837785, "grad_norm": 0.3469473123550415, "learning_rate": 0.0001, "loss": 1.8904, "step": 1609 }, { "epoch": 0.3909664885866926, "grad_norm": 0.365561306476593, "learning_rate": 0.0001, "loss": 1.661, "step": 1610 }, { "epoch": 0.39120932491500726, "grad_norm": 0.340475469827652, "learning_rate": 0.0001, "loss": 1.7433, "step": 1611 }, { "epoch": 0.391452161243322, "grad_norm": 0.3633958697319031, "learning_rate": 0.0001, "loss": 1.6913, "step": 1612 }, { "epoch": 0.3916949975716367, "grad_norm": 0.3879851996898651, "learning_rate": 0.0001, "loss": 1.7817, "step": 1613 }, { "epoch": 0.39193783389995146, "grad_norm": 0.3605000376701355, "learning_rate": 0.0001, "loss": 1.7143, "step": 1614 }, { "epoch": 0.39218067022826614, "grad_norm": 0.3635120689868927, "learning_rate": 0.0001, "loss": 1.8388, "step": 1615 }, { "epoch": 0.39242350655658087, "grad_norm": 0.3307245969772339, "learning_rate": 0.0001, "loss": 1.6376, "step": 1616 }, { "epoch": 0.3926663428848956, "grad_norm": 0.3344554603099823, "learning_rate": 0.0001, "loss": 1.7015, "step": 1617 }, { "epoch": 0.3929091792132103, "grad_norm": 0.3535119891166687, "learning_rate": 0.0001, "loss": 1.6508, "step": 1618 }, { "epoch": 0.393152015541525, "grad_norm": 0.35171249508857727, "learning_rate": 0.0001, "loss": 1.8599, "step": 1619 }, { "epoch": 0.39339485186983975, "grad_norm": 0.34228235483169556, "learning_rate": 0.0001, "loss": 1.7377, "step": 1620 }, { "epoch": 0.3936376881981544, "grad_norm": 0.3834402859210968, "learning_rate": 0.0001, "loss": 1.7709, "step": 1621 }, { "epoch": 0.39388052452646916, "grad_norm": 0.34480172395706177, "learning_rate": 0.0001, "loss": 1.7076, "step": 1622 }, { "epoch": 0.3941233608547839, "grad_norm": 0.35638877749443054, "learning_rate": 0.0001, "loss": 1.7495, "step": 1623 }, { "epoch": 0.39436619718309857, "grad_norm": 0.3370180130004883, "learning_rate": 0.0001, "loss": 1.6121, "step": 1624 }, { "epoch": 0.3946090335114133, "grad_norm": 0.37028077244758606, "learning_rate": 0.0001, "loss": 1.9731, "step": 1625 }, { "epoch": 0.39485186983972803, "grad_norm": 0.3528951108455658, "learning_rate": 0.0001, "loss": 1.6877, "step": 1626 }, { "epoch": 0.39509470616804276, "grad_norm": 0.3634454905986786, "learning_rate": 0.0001, "loss": 1.7671, "step": 1627 }, { "epoch": 0.39533754249635744, "grad_norm": 0.3391231894493103, "learning_rate": 0.0001, "loss": 1.6827, "step": 1628 }, { "epoch": 0.3955803788246722, "grad_norm": 0.3546386957168579, "learning_rate": 0.0001, "loss": 1.6595, "step": 1629 }, { "epoch": 0.3958232151529869, "grad_norm": 0.34772059321403503, "learning_rate": 0.0001, "loss": 1.7565, "step": 1630 }, { "epoch": 0.3960660514813016, "grad_norm": 0.3234846591949463, "learning_rate": 0.0001, "loss": 1.6029, "step": 1631 }, { "epoch": 0.3963088878096163, "grad_norm": 0.34120452404022217, "learning_rate": 0.0001, "loss": 1.612, "step": 1632 }, { "epoch": 0.39655172413793105, "grad_norm": 0.34159693121910095, "learning_rate": 0.0001, "loss": 1.7595, "step": 1633 }, { "epoch": 0.3967945604662457, "grad_norm": 0.35023289918899536, "learning_rate": 0.0001, "loss": 1.7803, "step": 1634 }, { "epoch": 0.39703739679456046, "grad_norm": 0.3789989650249481, "learning_rate": 0.0001, "loss": 1.7713, "step": 1635 }, { "epoch": 0.3972802331228752, "grad_norm": 0.36430492997169495, "learning_rate": 0.0001, "loss": 1.8014, "step": 1636 }, { "epoch": 0.3975230694511899, "grad_norm": 0.3651634156703949, "learning_rate": 0.0001, "loss": 1.9387, "step": 1637 }, { "epoch": 0.3977659057795046, "grad_norm": 0.33149656653404236, "learning_rate": 0.0001, "loss": 1.7681, "step": 1638 }, { "epoch": 0.39800874210781934, "grad_norm": 0.3321326971054077, "learning_rate": 0.0001, "loss": 1.7215, "step": 1639 }, { "epoch": 0.39825157843613407, "grad_norm": 0.36206716299057007, "learning_rate": 0.0001, "loss": 1.7122, "step": 1640 }, { "epoch": 0.39849441476444875, "grad_norm": 0.3373028039932251, "learning_rate": 0.0001, "loss": 1.696, "step": 1641 }, { "epoch": 0.3987372510927635, "grad_norm": 0.34143638610839844, "learning_rate": 0.0001, "loss": 1.7125, "step": 1642 }, { "epoch": 0.3989800874210782, "grad_norm": 0.3449946343898773, "learning_rate": 0.0001, "loss": 1.7054, "step": 1643 }, { "epoch": 0.3992229237493929, "grad_norm": 0.36091458797454834, "learning_rate": 0.0001, "loss": 1.6785, "step": 1644 }, { "epoch": 0.3994657600777076, "grad_norm": 0.35519731044769287, "learning_rate": 0.0001, "loss": 1.7946, "step": 1645 }, { "epoch": 0.39970859640602235, "grad_norm": 0.34838569164276123, "learning_rate": 0.0001, "loss": 1.7462, "step": 1646 }, { "epoch": 0.39995143273433703, "grad_norm": 0.3532315194606781, "learning_rate": 0.0001, "loss": 1.7446, "step": 1647 }, { "epoch": 0.40019426906265176, "grad_norm": 0.34514230489730835, "learning_rate": 0.0001, "loss": 1.6328, "step": 1648 }, { "epoch": 0.4004371053909665, "grad_norm": 0.3498397767543793, "learning_rate": 0.0001, "loss": 1.9101, "step": 1649 }, { "epoch": 0.40067994171928123, "grad_norm": 0.36845964193344116, "learning_rate": 0.0001, "loss": 1.7796, "step": 1650 }, { "epoch": 0.4009227780475959, "grad_norm": 0.3510598838329315, "learning_rate": 0.0001, "loss": 1.6093, "step": 1651 }, { "epoch": 0.40116561437591064, "grad_norm": 0.33339422941207886, "learning_rate": 0.0001, "loss": 1.6679, "step": 1652 }, { "epoch": 0.4014084507042254, "grad_norm": 0.33257877826690674, "learning_rate": 0.0001, "loss": 1.7101, "step": 1653 }, { "epoch": 0.40165128703254005, "grad_norm": 0.3461006283760071, "learning_rate": 0.0001, "loss": 1.6459, "step": 1654 }, { "epoch": 0.4018941233608548, "grad_norm": 0.3384184241294861, "learning_rate": 0.0001, "loss": 1.6637, "step": 1655 }, { "epoch": 0.4021369596891695, "grad_norm": 0.34670189023017883, "learning_rate": 0.0001, "loss": 1.6808, "step": 1656 }, { "epoch": 0.4023797960174842, "grad_norm": 0.34890082478523254, "learning_rate": 0.0001, "loss": 1.7313, "step": 1657 }, { "epoch": 0.4026226323457989, "grad_norm": 0.3604244887828827, "learning_rate": 0.0001, "loss": 1.92, "step": 1658 }, { "epoch": 0.40286546867411366, "grad_norm": 0.3499077260494232, "learning_rate": 0.0001, "loss": 1.7897, "step": 1659 }, { "epoch": 0.4031083050024284, "grad_norm": 0.3570781648159027, "learning_rate": 0.0001, "loss": 1.7975, "step": 1660 }, { "epoch": 0.40335114133074307, "grad_norm": 0.39222121238708496, "learning_rate": 0.0001, "loss": 1.6846, "step": 1661 }, { "epoch": 0.4035939776590578, "grad_norm": 0.34657740592956543, "learning_rate": 0.0001, "loss": 1.6149, "step": 1662 }, { "epoch": 0.40383681398737253, "grad_norm": 0.34491151571273804, "learning_rate": 0.0001, "loss": 1.85, "step": 1663 }, { "epoch": 0.4040796503156872, "grad_norm": 0.3567986488342285, "learning_rate": 0.0001, "loss": 1.8325, "step": 1664 }, { "epoch": 0.40432248664400194, "grad_norm": 0.34347212314605713, "learning_rate": 0.0001, "loss": 1.7163, "step": 1665 }, { "epoch": 0.4045653229723167, "grad_norm": 0.36440059542655945, "learning_rate": 0.0001, "loss": 1.8832, "step": 1666 }, { "epoch": 0.40480815930063135, "grad_norm": 0.3367612063884735, "learning_rate": 0.0001, "loss": 1.7437, "step": 1667 }, { "epoch": 0.4050509956289461, "grad_norm": 0.3793525993824005, "learning_rate": 0.0001, "loss": 1.6954, "step": 1668 }, { "epoch": 0.4052938319572608, "grad_norm": 0.34889304637908936, "learning_rate": 0.0001, "loss": 1.6213, "step": 1669 }, { "epoch": 0.4055366682855755, "grad_norm": 0.33768993616104126, "learning_rate": 0.0001, "loss": 1.6736, "step": 1670 }, { "epoch": 0.40577950461389023, "grad_norm": 0.349176824092865, "learning_rate": 0.0001, "loss": 1.8238, "step": 1671 }, { "epoch": 0.40602234094220496, "grad_norm": 0.35951730608940125, "learning_rate": 0.0001, "loss": 1.8022, "step": 1672 }, { "epoch": 0.4062651772705197, "grad_norm": 0.36430323123931885, "learning_rate": 0.0001, "loss": 1.8402, "step": 1673 }, { "epoch": 0.4065080135988344, "grad_norm": 0.3360004723072052, "learning_rate": 0.0001, "loss": 1.5745, "step": 1674 }, { "epoch": 0.4067508499271491, "grad_norm": 0.3629210293292999, "learning_rate": 0.0001, "loss": 1.825, "step": 1675 }, { "epoch": 0.40699368625546384, "grad_norm": 0.35018420219421387, "learning_rate": 0.0001, "loss": 1.7295, "step": 1676 }, { "epoch": 0.4072365225837785, "grad_norm": 0.3400278687477112, "learning_rate": 0.0001, "loss": 1.6958, "step": 1677 }, { "epoch": 0.40747935891209325, "grad_norm": 0.34914901852607727, "learning_rate": 0.0001, "loss": 1.6115, "step": 1678 }, { "epoch": 0.407722195240408, "grad_norm": 0.3571421504020691, "learning_rate": 0.0001, "loss": 1.7384, "step": 1679 }, { "epoch": 0.40796503156872266, "grad_norm": 0.3605473041534424, "learning_rate": 0.0001, "loss": 1.8193, "step": 1680 }, { "epoch": 0.4082078678970374, "grad_norm": 0.37068045139312744, "learning_rate": 0.0001, "loss": 1.9287, "step": 1681 }, { "epoch": 0.4084507042253521, "grad_norm": 0.34394922852516174, "learning_rate": 0.0001, "loss": 1.7579, "step": 1682 }, { "epoch": 0.4086935405536668, "grad_norm": 0.3192780315876007, "learning_rate": 0.0001, "loss": 1.5888, "step": 1683 }, { "epoch": 0.40893637688198153, "grad_norm": 0.3368231952190399, "learning_rate": 0.0001, "loss": 1.7074, "step": 1684 }, { "epoch": 0.40917921321029627, "grad_norm": 0.3436574935913086, "learning_rate": 0.0001, "loss": 1.6942, "step": 1685 }, { "epoch": 0.409422049538611, "grad_norm": 0.3476155996322632, "learning_rate": 0.0001, "loss": 1.7639, "step": 1686 }, { "epoch": 0.4096648858669257, "grad_norm": 0.33362385630607605, "learning_rate": 0.0001, "loss": 1.6146, "step": 1687 }, { "epoch": 0.4099077221952404, "grad_norm": 0.3497028052806854, "learning_rate": 0.0001, "loss": 1.5816, "step": 1688 }, { "epoch": 0.41015055852355514, "grad_norm": 0.3652702569961548, "learning_rate": 0.0001, "loss": 1.8025, "step": 1689 }, { "epoch": 0.4103933948518698, "grad_norm": 0.3235238194465637, "learning_rate": 0.0001, "loss": 1.5762, "step": 1690 }, { "epoch": 0.41063623118018455, "grad_norm": 0.3403080403804779, "learning_rate": 0.0001, "loss": 1.7607, "step": 1691 }, { "epoch": 0.4108790675084993, "grad_norm": 0.3631891906261444, "learning_rate": 0.0001, "loss": 1.8404, "step": 1692 }, { "epoch": 0.41112190383681396, "grad_norm": 0.3438990116119385, "learning_rate": 0.0001, "loss": 1.7936, "step": 1693 }, { "epoch": 0.4113647401651287, "grad_norm": 0.35403940081596375, "learning_rate": 0.0001, "loss": 1.7144, "step": 1694 }, { "epoch": 0.41160757649344343, "grad_norm": 0.3484737277030945, "learning_rate": 0.0001, "loss": 1.6433, "step": 1695 }, { "epoch": 0.41185041282175816, "grad_norm": 0.3359359800815582, "learning_rate": 0.0001, "loss": 1.583, "step": 1696 }, { "epoch": 0.41209324915007284, "grad_norm": 0.35734885931015015, "learning_rate": 0.0001, "loss": 1.8943, "step": 1697 }, { "epoch": 0.41233608547838757, "grad_norm": 0.37979212403297424, "learning_rate": 0.0001, "loss": 1.7848, "step": 1698 }, { "epoch": 0.4125789218067023, "grad_norm": 0.3913626968860626, "learning_rate": 0.0001, "loss": 1.9229, "step": 1699 }, { "epoch": 0.412821758135017, "grad_norm": 0.38415777683258057, "learning_rate": 0.0001, "loss": 1.8476, "step": 1700 }, { "epoch": 0.4130645944633317, "grad_norm": 0.3729405403137207, "learning_rate": 0.0001, "loss": 1.8208, "step": 1701 }, { "epoch": 0.41330743079164645, "grad_norm": 0.36757805943489075, "learning_rate": 0.0001, "loss": 1.6945, "step": 1702 }, { "epoch": 0.4135502671199611, "grad_norm": 0.34475553035736084, "learning_rate": 0.0001, "loss": 1.6519, "step": 1703 }, { "epoch": 0.41379310344827586, "grad_norm": 0.34733209013938904, "learning_rate": 0.0001, "loss": 1.5978, "step": 1704 }, { "epoch": 0.4140359397765906, "grad_norm": 0.3799619674682617, "learning_rate": 0.0001, "loss": 1.677, "step": 1705 }, { "epoch": 0.41427877610490527, "grad_norm": 0.3415276110172272, "learning_rate": 0.0001, "loss": 1.736, "step": 1706 }, { "epoch": 0.41452161243322, "grad_norm": 0.35778936743736267, "learning_rate": 0.0001, "loss": 1.8407, "step": 1707 }, { "epoch": 0.41476444876153473, "grad_norm": 0.347223699092865, "learning_rate": 0.0001, "loss": 1.6321, "step": 1708 }, { "epoch": 0.41500728508984946, "grad_norm": 0.36570411920547485, "learning_rate": 0.0001, "loss": 1.8242, "step": 1709 }, { "epoch": 0.41525012141816414, "grad_norm": 0.3869484066963196, "learning_rate": 0.0001, "loss": 1.734, "step": 1710 }, { "epoch": 0.4154929577464789, "grad_norm": 0.36324143409729004, "learning_rate": 0.0001, "loss": 1.6839, "step": 1711 }, { "epoch": 0.4157357940747936, "grad_norm": 0.35620391368865967, "learning_rate": 0.0001, "loss": 1.7555, "step": 1712 }, { "epoch": 0.4159786304031083, "grad_norm": 0.3773002028465271, "learning_rate": 0.0001, "loss": 1.8965, "step": 1713 }, { "epoch": 0.416221466731423, "grad_norm": 0.3578970432281494, "learning_rate": 0.0001, "loss": 1.6807, "step": 1714 }, { "epoch": 0.41646430305973775, "grad_norm": 0.36377522349357605, "learning_rate": 0.0001, "loss": 1.9471, "step": 1715 }, { "epoch": 0.4167071393880524, "grad_norm": 0.3433545231819153, "learning_rate": 0.0001, "loss": 1.7947, "step": 1716 }, { "epoch": 0.41694997571636716, "grad_norm": 0.3755989968776703, "learning_rate": 0.0001, "loss": 1.8395, "step": 1717 }, { "epoch": 0.4171928120446819, "grad_norm": 0.33563217520713806, "learning_rate": 0.0001, "loss": 1.6544, "step": 1718 }, { "epoch": 0.4174356483729966, "grad_norm": 0.33854302763938904, "learning_rate": 0.0001, "loss": 1.6766, "step": 1719 }, { "epoch": 0.4176784847013113, "grad_norm": 0.3348889648914337, "learning_rate": 0.0001, "loss": 1.7056, "step": 1720 }, { "epoch": 0.41792132102962604, "grad_norm": 0.3639450669288635, "learning_rate": 0.0001, "loss": 1.7382, "step": 1721 }, { "epoch": 0.41816415735794077, "grad_norm": 0.34863460063934326, "learning_rate": 0.0001, "loss": 1.6771, "step": 1722 }, { "epoch": 0.41840699368625545, "grad_norm": 0.35017862915992737, "learning_rate": 0.0001, "loss": 1.7366, "step": 1723 }, { "epoch": 0.4186498300145702, "grad_norm": 0.3767487108707428, "learning_rate": 0.0001, "loss": 1.8036, "step": 1724 }, { "epoch": 0.4188926663428849, "grad_norm": 0.35246649384498596, "learning_rate": 0.0001, "loss": 1.7452, "step": 1725 }, { "epoch": 0.4191355026711996, "grad_norm": 0.33406075835227966, "learning_rate": 0.0001, "loss": 1.7114, "step": 1726 }, { "epoch": 0.4193783389995143, "grad_norm": 0.35864290595054626, "learning_rate": 0.0001, "loss": 1.7944, "step": 1727 }, { "epoch": 0.41962117532782905, "grad_norm": 0.37582850456237793, "learning_rate": 0.0001, "loss": 1.8609, "step": 1728 }, { "epoch": 0.41986401165614373, "grad_norm": 0.3527149558067322, "learning_rate": 0.0001, "loss": 1.6799, "step": 1729 }, { "epoch": 0.42010684798445846, "grad_norm": 0.3358190059661865, "learning_rate": 0.0001, "loss": 1.6489, "step": 1730 }, { "epoch": 0.4203496843127732, "grad_norm": 0.3604898750782013, "learning_rate": 0.0001, "loss": 1.6847, "step": 1731 }, { "epoch": 0.42059252064108793, "grad_norm": 0.3554806113243103, "learning_rate": 0.0001, "loss": 1.7477, "step": 1732 }, { "epoch": 0.4208353569694026, "grad_norm": 0.3845835328102112, "learning_rate": 0.0001, "loss": 1.8873, "step": 1733 }, { "epoch": 0.42107819329771734, "grad_norm": 0.37965497374534607, "learning_rate": 0.0001, "loss": 1.8029, "step": 1734 }, { "epoch": 0.4213210296260321, "grad_norm": 0.3325018882751465, "learning_rate": 0.0001, "loss": 1.6127, "step": 1735 }, { "epoch": 0.42156386595434675, "grad_norm": 0.3592410981655121, "learning_rate": 0.0001, "loss": 1.7491, "step": 1736 }, { "epoch": 0.4218067022826615, "grad_norm": 0.3251824378967285, "learning_rate": 0.0001, "loss": 1.5902, "step": 1737 }, { "epoch": 0.4220495386109762, "grad_norm": 0.36237943172454834, "learning_rate": 0.0001, "loss": 1.7964, "step": 1738 }, { "epoch": 0.4222923749392909, "grad_norm": 0.320538729429245, "learning_rate": 0.0001, "loss": 1.6364, "step": 1739 }, { "epoch": 0.4225352112676056, "grad_norm": 0.34662577509880066, "learning_rate": 0.0001, "loss": 1.6954, "step": 1740 }, { "epoch": 0.42277804759592036, "grad_norm": 0.35295093059539795, "learning_rate": 0.0001, "loss": 1.7166, "step": 1741 }, { "epoch": 0.4230208839242351, "grad_norm": 0.3445799648761749, "learning_rate": 0.0001, "loss": 1.6879, "step": 1742 }, { "epoch": 0.42326372025254977, "grad_norm": 0.368622362613678, "learning_rate": 0.0001, "loss": 1.8182, "step": 1743 }, { "epoch": 0.4235065565808645, "grad_norm": 0.3579779267311096, "learning_rate": 0.0001, "loss": 1.8737, "step": 1744 }, { "epoch": 0.42374939290917923, "grad_norm": 0.35230371356010437, "learning_rate": 0.0001, "loss": 1.6501, "step": 1745 }, { "epoch": 0.4239922292374939, "grad_norm": 0.3304905593395233, "learning_rate": 0.0001, "loss": 1.7073, "step": 1746 }, { "epoch": 0.42423506556580864, "grad_norm": 0.34176966547966003, "learning_rate": 0.0001, "loss": 1.6926, "step": 1747 }, { "epoch": 0.4244779018941234, "grad_norm": 0.3877265453338623, "learning_rate": 0.0001, "loss": 1.8788, "step": 1748 }, { "epoch": 0.42472073822243805, "grad_norm": 0.35358646512031555, "learning_rate": 0.0001, "loss": 1.7133, "step": 1749 }, { "epoch": 0.4249635745507528, "grad_norm": 0.3553861677646637, "learning_rate": 0.0001, "loss": 1.7243, "step": 1750 }, { "epoch": 0.4252064108790675, "grad_norm": 0.3517538607120514, "learning_rate": 0.0001, "loss": 1.751, "step": 1751 }, { "epoch": 0.4254492472073822, "grad_norm": 0.36939677596092224, "learning_rate": 0.0001, "loss": 1.8552, "step": 1752 }, { "epoch": 0.42569208353569693, "grad_norm": 0.33711880445480347, "learning_rate": 0.0001, "loss": 1.6337, "step": 1753 }, { "epoch": 0.42593491986401166, "grad_norm": 0.3469174802303314, "learning_rate": 0.0001, "loss": 1.6768, "step": 1754 }, { "epoch": 0.4261777561923264, "grad_norm": 0.4004037380218506, "learning_rate": 0.0001, "loss": 1.888, "step": 1755 }, { "epoch": 0.4264205925206411, "grad_norm": 0.378012478351593, "learning_rate": 0.0001, "loss": 1.8775, "step": 1756 }, { "epoch": 0.4266634288489558, "grad_norm": 0.3540249764919281, "learning_rate": 0.0001, "loss": 1.8041, "step": 1757 }, { "epoch": 0.42690626517727054, "grad_norm": 0.36658915877342224, "learning_rate": 0.0001, "loss": 1.6588, "step": 1758 }, { "epoch": 0.4271491015055852, "grad_norm": 0.36717551946640015, "learning_rate": 0.0001, "loss": 1.7617, "step": 1759 }, { "epoch": 0.42739193783389995, "grad_norm": 0.3298027217388153, "learning_rate": 0.0001, "loss": 1.5488, "step": 1760 }, { "epoch": 0.4276347741622147, "grad_norm": 0.34846270084381104, "learning_rate": 0.0001, "loss": 1.8168, "step": 1761 }, { "epoch": 0.42787761049052936, "grad_norm": 0.3526111841201782, "learning_rate": 0.0001, "loss": 1.7507, "step": 1762 }, { "epoch": 0.4281204468188441, "grad_norm": 0.35745999217033386, "learning_rate": 0.0001, "loss": 1.7467, "step": 1763 }, { "epoch": 0.4283632831471588, "grad_norm": 0.360914409160614, "learning_rate": 0.0001, "loss": 1.587, "step": 1764 }, { "epoch": 0.42860611947547356, "grad_norm": 0.3446374535560608, "learning_rate": 0.0001, "loss": 1.6978, "step": 1765 }, { "epoch": 0.42884895580378823, "grad_norm": 0.3785291910171509, "learning_rate": 0.0001, "loss": 1.8251, "step": 1766 }, { "epoch": 0.42909179213210297, "grad_norm": 0.35578465461730957, "learning_rate": 0.0001, "loss": 1.7313, "step": 1767 }, { "epoch": 0.4293346284604177, "grad_norm": 0.39511141180992126, "learning_rate": 0.0001, "loss": 1.8263, "step": 1768 }, { "epoch": 0.4295774647887324, "grad_norm": 0.36311453580856323, "learning_rate": 0.0001, "loss": 1.8702, "step": 1769 }, { "epoch": 0.4298203011170471, "grad_norm": 0.39010193943977356, "learning_rate": 0.0001, "loss": 1.868, "step": 1770 }, { "epoch": 0.43006313744536184, "grad_norm": 0.3557552099227905, "learning_rate": 0.0001, "loss": 1.738, "step": 1771 }, { "epoch": 0.4303059737736765, "grad_norm": 0.34697064757347107, "learning_rate": 0.0001, "loss": 1.7679, "step": 1772 }, { "epoch": 0.43054881010199125, "grad_norm": 0.3562696576118469, "learning_rate": 0.0001, "loss": 1.6916, "step": 1773 }, { "epoch": 0.430791646430306, "grad_norm": 0.3603959381580353, "learning_rate": 0.0001, "loss": 1.8988, "step": 1774 }, { "epoch": 0.43103448275862066, "grad_norm": 0.34189701080322266, "learning_rate": 0.0001, "loss": 1.628, "step": 1775 }, { "epoch": 0.4312773190869354, "grad_norm": 0.3767715394496918, "learning_rate": 0.0001, "loss": 1.7701, "step": 1776 }, { "epoch": 0.43152015541525013, "grad_norm": 0.3822302222251892, "learning_rate": 0.0001, "loss": 1.7353, "step": 1777 }, { "epoch": 0.43176299174356486, "grad_norm": 0.37838810682296753, "learning_rate": 0.0001, "loss": 1.866, "step": 1778 }, { "epoch": 0.43200582807187954, "grad_norm": 0.36003077030181885, "learning_rate": 0.0001, "loss": 1.8084, "step": 1779 }, { "epoch": 0.43224866440019427, "grad_norm": 0.3723047375679016, "learning_rate": 0.0001, "loss": 1.8549, "step": 1780 }, { "epoch": 0.432491500728509, "grad_norm": 0.3599883019924164, "learning_rate": 0.0001, "loss": 1.692, "step": 1781 }, { "epoch": 0.4327343370568237, "grad_norm": 0.35087719559669495, "learning_rate": 0.0001, "loss": 1.7804, "step": 1782 }, { "epoch": 0.4329771733851384, "grad_norm": 0.3767469823360443, "learning_rate": 0.0001, "loss": 1.7284, "step": 1783 }, { "epoch": 0.43322000971345315, "grad_norm": 0.33156484365463257, "learning_rate": 0.0001, "loss": 1.6055, "step": 1784 }, { "epoch": 0.4334628460417678, "grad_norm": 0.3367984890937805, "learning_rate": 0.0001, "loss": 1.5564, "step": 1785 }, { "epoch": 0.43370568237008256, "grad_norm": 0.3552202880382538, "learning_rate": 0.0001, "loss": 1.829, "step": 1786 }, { "epoch": 0.4339485186983973, "grad_norm": 0.353182852268219, "learning_rate": 0.0001, "loss": 1.6823, "step": 1787 }, { "epoch": 0.434191355026712, "grad_norm": 0.3680180013179779, "learning_rate": 0.0001, "loss": 1.7933, "step": 1788 }, { "epoch": 0.4344341913550267, "grad_norm": 0.338387668132782, "learning_rate": 0.0001, "loss": 1.7065, "step": 1789 }, { "epoch": 0.43467702768334143, "grad_norm": 0.36020609736442566, "learning_rate": 0.0001, "loss": 1.8273, "step": 1790 }, { "epoch": 0.43491986401165617, "grad_norm": 0.3294867277145386, "learning_rate": 0.0001, "loss": 1.6185, "step": 1791 }, { "epoch": 0.43516270033997084, "grad_norm": 0.36147022247314453, "learning_rate": 0.0001, "loss": 1.4078, "step": 1792 }, { "epoch": 0.4354055366682856, "grad_norm": 0.35484182834625244, "learning_rate": 0.0001, "loss": 1.6535, "step": 1793 }, { "epoch": 0.4356483729966003, "grad_norm": 0.33079761266708374, "learning_rate": 0.0001, "loss": 1.585, "step": 1794 }, { "epoch": 0.435891209324915, "grad_norm": 0.35023409128189087, "learning_rate": 0.0001, "loss": 1.73, "step": 1795 }, { "epoch": 0.4361340456532297, "grad_norm": 0.3546440601348877, "learning_rate": 0.0001, "loss": 1.7474, "step": 1796 }, { "epoch": 0.43637688198154445, "grad_norm": 0.35574740171432495, "learning_rate": 0.0001, "loss": 1.9602, "step": 1797 }, { "epoch": 0.43661971830985913, "grad_norm": 0.3341620862483978, "learning_rate": 0.0001, "loss": 1.6204, "step": 1798 }, { "epoch": 0.43686255463817386, "grad_norm": 0.3560903072357178, "learning_rate": 0.0001, "loss": 1.6419, "step": 1799 }, { "epoch": 0.4371053909664886, "grad_norm": 0.35073208808898926, "learning_rate": 0.0001, "loss": 1.6708, "step": 1800 }, { "epoch": 0.4373482272948033, "grad_norm": 0.35862451791763306, "learning_rate": 0.0001, "loss": 1.7134, "step": 1801 }, { "epoch": 0.437591063623118, "grad_norm": 0.36779993772506714, "learning_rate": 0.0001, "loss": 1.8379, "step": 1802 }, { "epoch": 0.43783389995143274, "grad_norm": 0.35280412435531616, "learning_rate": 0.0001, "loss": 1.9213, "step": 1803 }, { "epoch": 0.43807673627974747, "grad_norm": 0.3488386869430542, "learning_rate": 0.0001, "loss": 1.7665, "step": 1804 }, { "epoch": 0.43831957260806215, "grad_norm": 0.3537375330924988, "learning_rate": 0.0001, "loss": 1.6809, "step": 1805 }, { "epoch": 0.4385624089363769, "grad_norm": 0.3733062148094177, "learning_rate": 0.0001, "loss": 1.9652, "step": 1806 }, { "epoch": 0.4388052452646916, "grad_norm": 0.3548164367675781, "learning_rate": 0.0001, "loss": 1.8302, "step": 1807 }, { "epoch": 0.4390480815930063, "grad_norm": 0.35301345586776733, "learning_rate": 0.0001, "loss": 1.861, "step": 1808 }, { "epoch": 0.439290917921321, "grad_norm": 0.34159356355667114, "learning_rate": 0.0001, "loss": 1.6349, "step": 1809 }, { "epoch": 0.43953375424963576, "grad_norm": 0.36830708384513855, "learning_rate": 0.0001, "loss": 1.8702, "step": 1810 }, { "epoch": 0.4397765905779505, "grad_norm": 0.3883112370967865, "learning_rate": 0.0001, "loss": 1.9395, "step": 1811 }, { "epoch": 0.44001942690626517, "grad_norm": 0.34868156909942627, "learning_rate": 0.0001, "loss": 1.7817, "step": 1812 }, { "epoch": 0.4402622632345799, "grad_norm": 0.36234238743782043, "learning_rate": 0.0001, "loss": 1.8744, "step": 1813 }, { "epoch": 0.44050509956289463, "grad_norm": 0.35224321484565735, "learning_rate": 0.0001, "loss": 1.8057, "step": 1814 }, { "epoch": 0.4407479358912093, "grad_norm": 0.3585010766983032, "learning_rate": 0.0001, "loss": 1.7047, "step": 1815 }, { "epoch": 0.44099077221952404, "grad_norm": 0.3444727957248688, "learning_rate": 0.0001, "loss": 1.6929, "step": 1816 }, { "epoch": 0.4412336085478388, "grad_norm": 0.34820786118507385, "learning_rate": 0.0001, "loss": 1.7742, "step": 1817 }, { "epoch": 0.44147644487615345, "grad_norm": 0.32335302233695984, "learning_rate": 0.0001, "loss": 1.6456, "step": 1818 }, { "epoch": 0.4417192812044682, "grad_norm": 0.3473454713821411, "learning_rate": 0.0001, "loss": 1.6254, "step": 1819 }, { "epoch": 0.4419621175327829, "grad_norm": 0.32856178283691406, "learning_rate": 0.0001, "loss": 1.5016, "step": 1820 }, { "epoch": 0.4422049538610976, "grad_norm": 0.346571147441864, "learning_rate": 0.0001, "loss": 1.7544, "step": 1821 }, { "epoch": 0.4424477901894123, "grad_norm": 0.35231831669807434, "learning_rate": 0.0001, "loss": 1.6208, "step": 1822 }, { "epoch": 0.44269062651772706, "grad_norm": 0.35263708233833313, "learning_rate": 0.0001, "loss": 1.7064, "step": 1823 }, { "epoch": 0.4429334628460418, "grad_norm": 0.3441479206085205, "learning_rate": 0.0001, "loss": 1.7732, "step": 1824 }, { "epoch": 0.44317629917435647, "grad_norm": 0.3736101984977722, "learning_rate": 0.0001, "loss": 1.7838, "step": 1825 }, { "epoch": 0.4434191355026712, "grad_norm": 0.3775104880332947, "learning_rate": 0.0001, "loss": 1.6946, "step": 1826 }, { "epoch": 0.44366197183098594, "grad_norm": 0.36177685856819153, "learning_rate": 0.0001, "loss": 1.9272, "step": 1827 }, { "epoch": 0.4439048081593006, "grad_norm": 0.35995036363601685, "learning_rate": 0.0001, "loss": 1.8043, "step": 1828 }, { "epoch": 0.44414764448761535, "grad_norm": 0.36013609170913696, "learning_rate": 0.0001, "loss": 1.8527, "step": 1829 }, { "epoch": 0.4443904808159301, "grad_norm": 0.35395514965057373, "learning_rate": 0.0001, "loss": 1.7323, "step": 1830 }, { "epoch": 0.44463331714424476, "grad_norm": 0.3533592224121094, "learning_rate": 0.0001, "loss": 1.7319, "step": 1831 }, { "epoch": 0.4448761534725595, "grad_norm": 0.350333034992218, "learning_rate": 0.0001, "loss": 1.6835, "step": 1832 }, { "epoch": 0.4451189898008742, "grad_norm": 0.3550693094730377, "learning_rate": 0.0001, "loss": 1.8188, "step": 1833 }, { "epoch": 0.44536182612918895, "grad_norm": 0.3411439061164856, "learning_rate": 0.0001, "loss": 1.7458, "step": 1834 }, { "epoch": 0.44560466245750363, "grad_norm": 0.3670227825641632, "learning_rate": 0.0001, "loss": 1.7695, "step": 1835 }, { "epoch": 0.44584749878581836, "grad_norm": 0.333892822265625, "learning_rate": 0.0001, "loss": 1.6497, "step": 1836 }, { "epoch": 0.4460903351141331, "grad_norm": 0.3675907850265503, "learning_rate": 0.0001, "loss": 1.7669, "step": 1837 }, { "epoch": 0.4463331714424478, "grad_norm": 0.36830997467041016, "learning_rate": 0.0001, "loss": 1.9007, "step": 1838 }, { "epoch": 0.4465760077707625, "grad_norm": 0.34332361817359924, "learning_rate": 0.0001, "loss": 1.62, "step": 1839 }, { "epoch": 0.44681884409907724, "grad_norm": 0.3424611985683441, "learning_rate": 0.0001, "loss": 1.5369, "step": 1840 }, { "epoch": 0.4470616804273919, "grad_norm": 0.3539303243160248, "learning_rate": 0.0001, "loss": 1.7515, "step": 1841 }, { "epoch": 0.44730451675570665, "grad_norm": 0.3896627724170685, "learning_rate": 0.0001, "loss": 1.861, "step": 1842 }, { "epoch": 0.4475473530840214, "grad_norm": 0.3435671031475067, "learning_rate": 0.0001, "loss": 1.6936, "step": 1843 }, { "epoch": 0.44779018941233606, "grad_norm": 0.3572455942630768, "learning_rate": 0.0001, "loss": 1.7223, "step": 1844 }, { "epoch": 0.4480330257406508, "grad_norm": 0.3949750065803528, "learning_rate": 0.0001, "loss": 1.8012, "step": 1845 }, { "epoch": 0.4482758620689655, "grad_norm": 0.3492611050605774, "learning_rate": 0.0001, "loss": 1.645, "step": 1846 }, { "epoch": 0.44851869839728026, "grad_norm": 0.33171871304512024, "learning_rate": 0.0001, "loss": 1.5878, "step": 1847 }, { "epoch": 0.44876153472559493, "grad_norm": 0.3477494418621063, "learning_rate": 0.0001, "loss": 1.7476, "step": 1848 }, { "epoch": 0.44900437105390967, "grad_norm": 0.35885927081108093, "learning_rate": 0.0001, "loss": 1.8095, "step": 1849 }, { "epoch": 0.4492472073822244, "grad_norm": 0.35665690898895264, "learning_rate": 0.0001, "loss": 1.8063, "step": 1850 }, { "epoch": 0.4494900437105391, "grad_norm": 0.31958791613578796, "learning_rate": 0.0001, "loss": 1.5572, "step": 1851 }, { "epoch": 0.4497328800388538, "grad_norm": 0.34891462326049805, "learning_rate": 0.0001, "loss": 1.5507, "step": 1852 }, { "epoch": 0.44997571636716854, "grad_norm": 0.34152233600616455, "learning_rate": 0.0001, "loss": 1.7601, "step": 1853 }, { "epoch": 0.4502185526954832, "grad_norm": 0.37790095806121826, "learning_rate": 0.0001, "loss": 1.8899, "step": 1854 }, { "epoch": 0.45046138902379795, "grad_norm": 0.35623136162757874, "learning_rate": 0.0001, "loss": 1.8179, "step": 1855 }, { "epoch": 0.4507042253521127, "grad_norm": 0.3646155297756195, "learning_rate": 0.0001, "loss": 1.8371, "step": 1856 }, { "epoch": 0.4509470616804274, "grad_norm": 0.3449878692626953, "learning_rate": 0.0001, "loss": 1.7263, "step": 1857 }, { "epoch": 0.4511898980087421, "grad_norm": 0.3796631991863251, "learning_rate": 0.0001, "loss": 1.8453, "step": 1858 }, { "epoch": 0.45143273433705683, "grad_norm": 0.36037197709083557, "learning_rate": 0.0001, "loss": 1.7422, "step": 1859 }, { "epoch": 0.45167557066537156, "grad_norm": 0.3578833341598511, "learning_rate": 0.0001, "loss": 1.7522, "step": 1860 }, { "epoch": 0.45191840699368624, "grad_norm": 0.36234596371650696, "learning_rate": 0.0001, "loss": 1.9205, "step": 1861 }, { "epoch": 0.45216124332200097, "grad_norm": 0.34786903858184814, "learning_rate": 0.0001, "loss": 1.6425, "step": 1862 }, { "epoch": 0.4524040796503157, "grad_norm": 0.3553171455860138, "learning_rate": 0.0001, "loss": 1.7245, "step": 1863 }, { "epoch": 0.4526469159786304, "grad_norm": 0.3154301345348358, "learning_rate": 0.0001, "loss": 1.4654, "step": 1864 }, { "epoch": 0.4528897523069451, "grad_norm": 0.34884896874427795, "learning_rate": 0.0001, "loss": 1.6409, "step": 1865 }, { "epoch": 0.45313258863525985, "grad_norm": 0.353974848985672, "learning_rate": 0.0001, "loss": 1.6605, "step": 1866 }, { "epoch": 0.4533754249635745, "grad_norm": 0.3681231141090393, "learning_rate": 0.0001, "loss": 1.7232, "step": 1867 }, { "epoch": 0.45361826129188926, "grad_norm": 0.3452052175998688, "learning_rate": 0.0001, "loss": 1.6828, "step": 1868 }, { "epoch": 0.453861097620204, "grad_norm": 0.3314920663833618, "learning_rate": 0.0001, "loss": 1.609, "step": 1869 }, { "epoch": 0.4541039339485187, "grad_norm": 0.36491215229034424, "learning_rate": 0.0001, "loss": 1.683, "step": 1870 }, { "epoch": 0.4543467702768334, "grad_norm": 0.3672131896018982, "learning_rate": 0.0001, "loss": 1.7338, "step": 1871 }, { "epoch": 0.45458960660514813, "grad_norm": 0.3563331663608551, "learning_rate": 0.0001, "loss": 1.877, "step": 1872 }, { "epoch": 0.45483244293346287, "grad_norm": 0.35385245084762573, "learning_rate": 0.0001, "loss": 1.6971, "step": 1873 }, { "epoch": 0.45507527926177754, "grad_norm": 0.33963268995285034, "learning_rate": 0.0001, "loss": 1.5958, "step": 1874 }, { "epoch": 0.4553181155900923, "grad_norm": 0.3327486515045166, "learning_rate": 0.0001, "loss": 1.7109, "step": 1875 }, { "epoch": 0.455560951918407, "grad_norm": 0.35015425086021423, "learning_rate": 0.0001, "loss": 1.7014, "step": 1876 }, { "epoch": 0.4558037882467217, "grad_norm": 0.3478430211544037, "learning_rate": 0.0001, "loss": 1.7751, "step": 1877 }, { "epoch": 0.4560466245750364, "grad_norm": 0.3734671175479889, "learning_rate": 0.0001, "loss": 1.8478, "step": 1878 }, { "epoch": 0.45628946090335115, "grad_norm": 0.3410831391811371, "learning_rate": 0.0001, "loss": 1.5819, "step": 1879 }, { "epoch": 0.4565322972316659, "grad_norm": 0.3469892740249634, "learning_rate": 0.0001, "loss": 1.706, "step": 1880 }, { "epoch": 0.45677513355998056, "grad_norm": 0.35892051458358765, "learning_rate": 0.0001, "loss": 1.6773, "step": 1881 }, { "epoch": 0.4570179698882953, "grad_norm": 0.3807806372642517, "learning_rate": 0.0001, "loss": 1.8788, "step": 1882 }, { "epoch": 0.45726080621661, "grad_norm": 0.34584805369377136, "learning_rate": 0.0001, "loss": 1.7598, "step": 1883 }, { "epoch": 0.4575036425449247, "grad_norm": 0.4014298617839813, "learning_rate": 0.0001, "loss": 1.932, "step": 1884 }, { "epoch": 0.45774647887323944, "grad_norm": 0.36960092186927795, "learning_rate": 0.0001, "loss": 1.8015, "step": 1885 }, { "epoch": 0.45798931520155417, "grad_norm": 0.34163033962249756, "learning_rate": 0.0001, "loss": 1.593, "step": 1886 }, { "epoch": 0.45823215152986885, "grad_norm": 0.36526426672935486, "learning_rate": 0.0001, "loss": 1.8164, "step": 1887 }, { "epoch": 0.4584749878581836, "grad_norm": 0.31698787212371826, "learning_rate": 0.0001, "loss": 1.65, "step": 1888 }, { "epoch": 0.4587178241864983, "grad_norm": 0.3732241988182068, "learning_rate": 0.0001, "loss": 1.8166, "step": 1889 }, { "epoch": 0.458960660514813, "grad_norm": 0.38333624601364136, "learning_rate": 0.0001, "loss": 1.8435, "step": 1890 }, { "epoch": 0.4592034968431277, "grad_norm": 0.3748052716255188, "learning_rate": 0.0001, "loss": 1.6272, "step": 1891 }, { "epoch": 0.45944633317144246, "grad_norm": 0.34906819462776184, "learning_rate": 0.0001, "loss": 1.719, "step": 1892 }, { "epoch": 0.4596891694997572, "grad_norm": 0.3537774384021759, "learning_rate": 0.0001, "loss": 1.6984, "step": 1893 }, { "epoch": 0.45993200582807187, "grad_norm": 0.3544154167175293, "learning_rate": 0.0001, "loss": 1.8092, "step": 1894 }, { "epoch": 0.4601748421563866, "grad_norm": 0.3754320740699768, "learning_rate": 0.0001, "loss": 1.5766, "step": 1895 }, { "epoch": 0.46041767848470133, "grad_norm": 0.34848159551620483, "learning_rate": 0.0001, "loss": 1.6516, "step": 1896 }, { "epoch": 0.460660514813016, "grad_norm": 0.3676202893257141, "learning_rate": 0.0001, "loss": 1.6977, "step": 1897 }, { "epoch": 0.46090335114133074, "grad_norm": 0.323017954826355, "learning_rate": 0.0001, "loss": 1.605, "step": 1898 }, { "epoch": 0.4611461874696455, "grad_norm": 0.3825530707836151, "learning_rate": 0.0001, "loss": 1.7385, "step": 1899 }, { "epoch": 0.46138902379796015, "grad_norm": 0.3630683124065399, "learning_rate": 0.0001, "loss": 1.786, "step": 1900 }, { "epoch": 0.4616318601262749, "grad_norm": 0.33266085386276245, "learning_rate": 0.0001, "loss": 1.6726, "step": 1901 }, { "epoch": 0.4618746964545896, "grad_norm": 0.3453136384487152, "learning_rate": 0.0001, "loss": 1.5764, "step": 1902 }, { "epoch": 0.46211753278290435, "grad_norm": 0.3523610830307007, "learning_rate": 0.0001, "loss": 1.7086, "step": 1903 }, { "epoch": 0.462360369111219, "grad_norm": 0.34262746572494507, "learning_rate": 0.0001, "loss": 1.6824, "step": 1904 }, { "epoch": 0.46260320543953376, "grad_norm": 0.3551229238510132, "learning_rate": 0.0001, "loss": 1.7265, "step": 1905 }, { "epoch": 0.4628460417678485, "grad_norm": 0.3580963909626007, "learning_rate": 0.0001, "loss": 1.7887, "step": 1906 }, { "epoch": 0.46308887809616317, "grad_norm": 0.32605284452438354, "learning_rate": 0.0001, "loss": 1.5617, "step": 1907 }, { "epoch": 0.4633317144244779, "grad_norm": 0.3345804512500763, "learning_rate": 0.0001, "loss": 1.5542, "step": 1908 }, { "epoch": 0.46357455075279264, "grad_norm": 0.35500892996788025, "learning_rate": 0.0001, "loss": 1.6323, "step": 1909 }, { "epoch": 0.4638173870811073, "grad_norm": 0.3939019739627838, "learning_rate": 0.0001, "loss": 1.6667, "step": 1910 }, { "epoch": 0.46406022340942205, "grad_norm": 0.3847699463367462, "learning_rate": 0.0001, "loss": 1.869, "step": 1911 }, { "epoch": 0.4643030597377368, "grad_norm": 0.35265713930130005, "learning_rate": 0.0001, "loss": 1.8361, "step": 1912 }, { "epoch": 0.46454589606605146, "grad_norm": 0.3612956404685974, "learning_rate": 0.0001, "loss": 1.867, "step": 1913 }, { "epoch": 0.4647887323943662, "grad_norm": 0.31383344531059265, "learning_rate": 0.0001, "loss": 1.4885, "step": 1914 }, { "epoch": 0.4650315687226809, "grad_norm": 0.3553354740142822, "learning_rate": 0.0001, "loss": 1.7453, "step": 1915 }, { "epoch": 0.46527440505099565, "grad_norm": 0.34411731362342834, "learning_rate": 0.0001, "loss": 1.5518, "step": 1916 }, { "epoch": 0.46551724137931033, "grad_norm": 0.3603088855743408, "learning_rate": 0.0001, "loss": 1.7127, "step": 1917 }, { "epoch": 0.46576007770762506, "grad_norm": 0.36315515637397766, "learning_rate": 0.0001, "loss": 1.7099, "step": 1918 }, { "epoch": 0.4660029140359398, "grad_norm": 0.33600983023643494, "learning_rate": 0.0001, "loss": 1.6182, "step": 1919 }, { "epoch": 0.4662457503642545, "grad_norm": 0.3534862995147705, "learning_rate": 0.0001, "loss": 1.6327, "step": 1920 }, { "epoch": 0.4664885866925692, "grad_norm": 0.3495807647705078, "learning_rate": 0.0001, "loss": 1.7932, "step": 1921 }, { "epoch": 0.46673142302088394, "grad_norm": 0.3636340796947479, "learning_rate": 0.0001, "loss": 1.7882, "step": 1922 }, { "epoch": 0.4669742593491986, "grad_norm": 0.3711027503013611, "learning_rate": 0.0001, "loss": 1.7182, "step": 1923 }, { "epoch": 0.46721709567751335, "grad_norm": 0.34208375215530396, "learning_rate": 0.0001, "loss": 1.6319, "step": 1924 }, { "epoch": 0.4674599320058281, "grad_norm": 0.3585227429866791, "learning_rate": 0.0001, "loss": 1.7672, "step": 1925 }, { "epoch": 0.4677027683341428, "grad_norm": 0.36455008387565613, "learning_rate": 0.0001, "loss": 1.7176, "step": 1926 }, { "epoch": 0.4679456046624575, "grad_norm": 0.3617648184299469, "learning_rate": 0.0001, "loss": 1.77, "step": 1927 }, { "epoch": 0.4681884409907722, "grad_norm": 0.3547668755054474, "learning_rate": 0.0001, "loss": 1.6885, "step": 1928 }, { "epoch": 0.46843127731908696, "grad_norm": 0.3823624849319458, "learning_rate": 0.0001, "loss": 1.8885, "step": 1929 }, { "epoch": 0.46867411364740164, "grad_norm": 0.36009302735328674, "learning_rate": 0.0001, "loss": 1.6773, "step": 1930 }, { "epoch": 0.46891694997571637, "grad_norm": 0.34541547298431396, "learning_rate": 0.0001, "loss": 1.7179, "step": 1931 }, { "epoch": 0.4691597863040311, "grad_norm": 0.3517388105392456, "learning_rate": 0.0001, "loss": 1.8771, "step": 1932 }, { "epoch": 0.4694026226323458, "grad_norm": 0.3726664185523987, "learning_rate": 0.0001, "loss": 1.9417, "step": 1933 }, { "epoch": 0.4696454589606605, "grad_norm": 0.3713761568069458, "learning_rate": 0.0001, "loss": 1.8633, "step": 1934 }, { "epoch": 0.46988829528897524, "grad_norm": 0.34845829010009766, "learning_rate": 0.0001, "loss": 1.6415, "step": 1935 }, { "epoch": 0.4701311316172899, "grad_norm": 0.3643852770328522, "learning_rate": 0.0001, "loss": 1.8709, "step": 1936 }, { "epoch": 0.47037396794560465, "grad_norm": 0.37239712476730347, "learning_rate": 0.0001, "loss": 1.8805, "step": 1937 }, { "epoch": 0.4706168042739194, "grad_norm": 0.34127670526504517, "learning_rate": 0.0001, "loss": 1.7415, "step": 1938 }, { "epoch": 0.4708596406022341, "grad_norm": 0.3482852876186371, "learning_rate": 0.0001, "loss": 1.8389, "step": 1939 }, { "epoch": 0.4711024769305488, "grad_norm": 0.35871124267578125, "learning_rate": 0.0001, "loss": 1.7189, "step": 1940 }, { "epoch": 0.47134531325886353, "grad_norm": 0.33642691373825073, "learning_rate": 0.0001, "loss": 1.5999, "step": 1941 }, { "epoch": 0.47158814958717826, "grad_norm": 0.36310839653015137, "learning_rate": 0.0001, "loss": 1.7654, "step": 1942 }, { "epoch": 0.47183098591549294, "grad_norm": 0.334359735250473, "learning_rate": 0.0001, "loss": 1.5615, "step": 1943 }, { "epoch": 0.4720738222438077, "grad_norm": 0.3224147856235504, "learning_rate": 0.0001, "loss": 1.5394, "step": 1944 }, { "epoch": 0.4723166585721224, "grad_norm": 0.34409651160240173, "learning_rate": 0.0001, "loss": 1.6985, "step": 1945 }, { "epoch": 0.4725594949004371, "grad_norm": 0.346697598695755, "learning_rate": 0.0001, "loss": 1.5183, "step": 1946 }, { "epoch": 0.4728023312287518, "grad_norm": 0.33633822202682495, "learning_rate": 0.0001, "loss": 1.6303, "step": 1947 }, { "epoch": 0.47304516755706655, "grad_norm": 0.3662882447242737, "learning_rate": 0.0001, "loss": 1.9724, "step": 1948 }, { "epoch": 0.4732880038853812, "grad_norm": 0.35439127683639526, "learning_rate": 0.0001, "loss": 1.5703, "step": 1949 }, { "epoch": 0.47353084021369596, "grad_norm": 0.34117916226387024, "learning_rate": 0.0001, "loss": 1.5617, "step": 1950 }, { "epoch": 0.4737736765420107, "grad_norm": 0.3378322124481201, "learning_rate": 0.0001, "loss": 1.704, "step": 1951 }, { "epoch": 0.4740165128703254, "grad_norm": 0.32980063557624817, "learning_rate": 0.0001, "loss": 1.631, "step": 1952 }, { "epoch": 0.4742593491986401, "grad_norm": 0.37976697087287903, "learning_rate": 0.0001, "loss": 1.8591, "step": 1953 }, { "epoch": 0.47450218552695483, "grad_norm": 0.35602810978889465, "learning_rate": 0.0001, "loss": 1.6447, "step": 1954 }, { "epoch": 0.47474502185526957, "grad_norm": 0.34376612305641174, "learning_rate": 0.0001, "loss": 1.7337, "step": 1955 }, { "epoch": 0.47498785818358424, "grad_norm": 0.3398648798465729, "learning_rate": 0.0001, "loss": 1.7641, "step": 1956 }, { "epoch": 0.475230694511899, "grad_norm": 0.3545699119567871, "learning_rate": 0.0001, "loss": 1.7535, "step": 1957 }, { "epoch": 0.4754735308402137, "grad_norm": 0.31818118691444397, "learning_rate": 0.0001, "loss": 1.6283, "step": 1958 }, { "epoch": 0.4757163671685284, "grad_norm": 0.3350203335285187, "learning_rate": 0.0001, "loss": 1.6672, "step": 1959 }, { "epoch": 0.4759592034968431, "grad_norm": 0.37390241026878357, "learning_rate": 0.0001, "loss": 1.7388, "step": 1960 }, { "epoch": 0.47620203982515785, "grad_norm": 0.35129526257514954, "learning_rate": 0.0001, "loss": 1.7303, "step": 1961 }, { "epoch": 0.4764448761534726, "grad_norm": 0.35428422689437866, "learning_rate": 0.0001, "loss": 1.8535, "step": 1962 }, { "epoch": 0.47668771248178726, "grad_norm": 0.3445446491241455, "learning_rate": 0.0001, "loss": 1.7052, "step": 1963 }, { "epoch": 0.476930548810102, "grad_norm": 0.35369494557380676, "learning_rate": 0.0001, "loss": 1.7234, "step": 1964 }, { "epoch": 0.47717338513841673, "grad_norm": 0.3897012770175934, "learning_rate": 0.0001, "loss": 1.7988, "step": 1965 }, { "epoch": 0.4774162214667314, "grad_norm": 0.3370416760444641, "learning_rate": 0.0001, "loss": 1.7065, "step": 1966 }, { "epoch": 0.47765905779504614, "grad_norm": 0.3592875301837921, "learning_rate": 0.0001, "loss": 1.7909, "step": 1967 }, { "epoch": 0.47790189412336087, "grad_norm": 0.3603609800338745, "learning_rate": 0.0001, "loss": 1.5402, "step": 1968 }, { "epoch": 0.47814473045167555, "grad_norm": 0.34685638546943665, "learning_rate": 0.0001, "loss": 1.734, "step": 1969 }, { "epoch": 0.4783875667799903, "grad_norm": 0.3325386643409729, "learning_rate": 0.0001, "loss": 1.4731, "step": 1970 }, { "epoch": 0.478630403108305, "grad_norm": 0.3731539249420166, "learning_rate": 0.0001, "loss": 1.7351, "step": 1971 }, { "epoch": 0.4788732394366197, "grad_norm": 0.34925469756126404, "learning_rate": 0.0001, "loss": 1.7128, "step": 1972 }, { "epoch": 0.4791160757649344, "grad_norm": 0.3686675429344177, "learning_rate": 0.0001, "loss": 1.7226, "step": 1973 }, { "epoch": 0.47935891209324916, "grad_norm": 0.3480817675590515, "learning_rate": 0.0001, "loss": 1.6505, "step": 1974 }, { "epoch": 0.4796017484215639, "grad_norm": 0.3474791944026947, "learning_rate": 0.0001, "loss": 1.5714, "step": 1975 }, { "epoch": 0.47984458474987857, "grad_norm": 0.36596643924713135, "learning_rate": 0.0001, "loss": 1.8255, "step": 1976 }, { "epoch": 0.4800874210781933, "grad_norm": 0.3744461238384247, "learning_rate": 0.0001, "loss": 1.8206, "step": 1977 }, { "epoch": 0.48033025740650803, "grad_norm": 0.3452211618423462, "learning_rate": 0.0001, "loss": 1.7232, "step": 1978 }, { "epoch": 0.4805730937348227, "grad_norm": 0.3522043824195862, "learning_rate": 0.0001, "loss": 1.5923, "step": 1979 }, { "epoch": 0.48081593006313744, "grad_norm": 0.36870819330215454, "learning_rate": 0.0001, "loss": 1.8372, "step": 1980 }, { "epoch": 0.4810587663914522, "grad_norm": 0.36815735697746277, "learning_rate": 0.0001, "loss": 1.7929, "step": 1981 }, { "epoch": 0.48130160271976685, "grad_norm": 0.3665809631347656, "learning_rate": 0.0001, "loss": 1.7355, "step": 1982 }, { "epoch": 0.4815444390480816, "grad_norm": 0.4022822678089142, "learning_rate": 0.0001, "loss": 2.1035, "step": 1983 }, { "epoch": 0.4817872753763963, "grad_norm": 0.336421400308609, "learning_rate": 0.0001, "loss": 1.6003, "step": 1984 }, { "epoch": 0.48203011170471105, "grad_norm": 0.35770168900489807, "learning_rate": 0.0001, "loss": 1.7644, "step": 1985 }, { "epoch": 0.48227294803302573, "grad_norm": 0.3650563657283783, "learning_rate": 0.0001, "loss": 1.8717, "step": 1986 }, { "epoch": 0.48251578436134046, "grad_norm": 0.39090093970298767, "learning_rate": 0.0001, "loss": 1.7926, "step": 1987 }, { "epoch": 0.4827586206896552, "grad_norm": 0.37530893087387085, "learning_rate": 0.0001, "loss": 1.6973, "step": 1988 }, { "epoch": 0.48300145701796987, "grad_norm": 0.3707762062549591, "learning_rate": 0.0001, "loss": 1.8737, "step": 1989 }, { "epoch": 0.4832442933462846, "grad_norm": 0.35839855670928955, "learning_rate": 0.0001, "loss": 1.5889, "step": 1990 }, { "epoch": 0.48348712967459934, "grad_norm": 0.3365665674209595, "learning_rate": 0.0001, "loss": 1.5985, "step": 1991 }, { "epoch": 0.483729966002914, "grad_norm": 0.3621251583099365, "learning_rate": 0.0001, "loss": 1.8714, "step": 1992 }, { "epoch": 0.48397280233122875, "grad_norm": 0.34295421838760376, "learning_rate": 0.0001, "loss": 1.6912, "step": 1993 }, { "epoch": 0.4842156386595435, "grad_norm": 0.3422091007232666, "learning_rate": 0.0001, "loss": 1.7885, "step": 1994 }, { "epoch": 0.48445847498785816, "grad_norm": 0.3517985939979553, "learning_rate": 0.0001, "loss": 1.7095, "step": 1995 }, { "epoch": 0.4847013113161729, "grad_norm": 0.36482927203178406, "learning_rate": 0.0001, "loss": 1.7971, "step": 1996 }, { "epoch": 0.4849441476444876, "grad_norm": 0.359244167804718, "learning_rate": 0.0001, "loss": 1.5207, "step": 1997 }, { "epoch": 0.48518698397280235, "grad_norm": 0.3884401321411133, "learning_rate": 0.0001, "loss": 1.8081, "step": 1998 }, { "epoch": 0.48542982030111703, "grad_norm": 0.3618896007537842, "learning_rate": 0.0001, "loss": 1.805, "step": 1999 }, { "epoch": 0.48567265662943176, "grad_norm": 0.3770894706249237, "learning_rate": 0.0001, "loss": 1.7803, "step": 2000 }, { "epoch": 0.4859154929577465, "grad_norm": 0.3363848030567169, "learning_rate": 0.0001, "loss": 1.5195, "step": 2001 }, { "epoch": 0.4861583292860612, "grad_norm": 0.3846726417541504, "learning_rate": 0.0001, "loss": 1.7128, "step": 2002 }, { "epoch": 0.4864011656143759, "grad_norm": 0.3797794282436371, "learning_rate": 0.0001, "loss": 1.769, "step": 2003 }, { "epoch": 0.48664400194269064, "grad_norm": 0.3585468530654907, "learning_rate": 0.0001, "loss": 1.8298, "step": 2004 }, { "epoch": 0.4868868382710053, "grad_norm": 0.34364134073257446, "learning_rate": 0.0001, "loss": 1.8856, "step": 2005 }, { "epoch": 0.48712967459932005, "grad_norm": 0.3312186896800995, "learning_rate": 0.0001, "loss": 1.5684, "step": 2006 }, { "epoch": 0.4873725109276348, "grad_norm": 0.3411174416542053, "learning_rate": 0.0001, "loss": 1.6713, "step": 2007 }, { "epoch": 0.4876153472559495, "grad_norm": 0.36895158886909485, "learning_rate": 0.0001, "loss": 1.8552, "step": 2008 }, { "epoch": 0.4878581835842642, "grad_norm": 0.3716360032558441, "learning_rate": 0.0001, "loss": 1.886, "step": 2009 }, { "epoch": 0.4881010199125789, "grad_norm": 0.3676500916481018, "learning_rate": 0.0001, "loss": 1.7994, "step": 2010 }, { "epoch": 0.48834385624089366, "grad_norm": 0.35117608308792114, "learning_rate": 0.0001, "loss": 1.6921, "step": 2011 }, { "epoch": 0.48858669256920834, "grad_norm": 0.3696017265319824, "learning_rate": 0.0001, "loss": 1.8051, "step": 2012 }, { "epoch": 0.48882952889752307, "grad_norm": 0.3984812796115875, "learning_rate": 0.0001, "loss": 1.8622, "step": 2013 }, { "epoch": 0.4890723652258378, "grad_norm": 0.3565909266471863, "learning_rate": 0.0001, "loss": 1.731, "step": 2014 }, { "epoch": 0.4893152015541525, "grad_norm": 0.35164588689804077, "learning_rate": 0.0001, "loss": 1.7458, "step": 2015 }, { "epoch": 0.4895580378824672, "grad_norm": 0.36790066957473755, "learning_rate": 0.0001, "loss": 1.9291, "step": 2016 }, { "epoch": 0.48980087421078194, "grad_norm": 0.36064937710762024, "learning_rate": 0.0001, "loss": 1.7073, "step": 2017 }, { "epoch": 0.4900437105390966, "grad_norm": 0.3749876916408539, "learning_rate": 0.0001, "loss": 1.7085, "step": 2018 }, { "epoch": 0.49028654686741135, "grad_norm": 0.3429160416126251, "learning_rate": 0.0001, "loss": 1.619, "step": 2019 }, { "epoch": 0.4905293831957261, "grad_norm": 0.3696604371070862, "learning_rate": 0.0001, "loss": 1.7821, "step": 2020 }, { "epoch": 0.4907722195240408, "grad_norm": 0.3488297760486603, "learning_rate": 0.0001, "loss": 1.7989, "step": 2021 }, { "epoch": 0.4910150558523555, "grad_norm": 0.3773786127567291, "learning_rate": 0.0001, "loss": 1.8005, "step": 2022 }, { "epoch": 0.49125789218067023, "grad_norm": 0.36527174711227417, "learning_rate": 0.0001, "loss": 1.8716, "step": 2023 }, { "epoch": 0.49150072850898496, "grad_norm": 0.3569713830947876, "learning_rate": 0.0001, "loss": 1.7227, "step": 2024 }, { "epoch": 0.49174356483729964, "grad_norm": 0.358445942401886, "learning_rate": 0.0001, "loss": 1.802, "step": 2025 }, { "epoch": 0.4919864011656144, "grad_norm": 0.34071841835975647, "learning_rate": 0.0001, "loss": 1.7493, "step": 2026 }, { "epoch": 0.4922292374939291, "grad_norm": 0.37970632314682007, "learning_rate": 0.0001, "loss": 1.7215, "step": 2027 }, { "epoch": 0.4924720738222438, "grad_norm": 0.3528176546096802, "learning_rate": 0.0001, "loss": 1.6993, "step": 2028 }, { "epoch": 0.4927149101505585, "grad_norm": 0.36992496252059937, "learning_rate": 0.0001, "loss": 1.8592, "step": 2029 }, { "epoch": 0.49295774647887325, "grad_norm": 0.36124253273010254, "learning_rate": 0.0001, "loss": 1.703, "step": 2030 }, { "epoch": 0.493200582807188, "grad_norm": 0.35745152831077576, "learning_rate": 0.0001, "loss": 1.7555, "step": 2031 }, { "epoch": 0.49344341913550266, "grad_norm": 0.36192139983177185, "learning_rate": 0.0001, "loss": 1.8643, "step": 2032 }, { "epoch": 0.4936862554638174, "grad_norm": 0.3453996777534485, "learning_rate": 0.0001, "loss": 1.4152, "step": 2033 }, { "epoch": 0.4939290917921321, "grad_norm": 0.36538147926330566, "learning_rate": 0.0001, "loss": 1.7344, "step": 2034 }, { "epoch": 0.4941719281204468, "grad_norm": 0.3792535066604614, "learning_rate": 0.0001, "loss": 1.8989, "step": 2035 }, { "epoch": 0.49441476444876153, "grad_norm": 0.3576485216617584, "learning_rate": 0.0001, "loss": 1.7443, "step": 2036 }, { "epoch": 0.49465760077707627, "grad_norm": 0.3794737756252289, "learning_rate": 0.0001, "loss": 1.8746, "step": 2037 }, { "epoch": 0.49490043710539094, "grad_norm": 0.35361653566360474, "learning_rate": 0.0001, "loss": 1.6875, "step": 2038 }, { "epoch": 0.4951432734337057, "grad_norm": 0.34948018193244934, "learning_rate": 0.0001, "loss": 1.7099, "step": 2039 }, { "epoch": 0.4953861097620204, "grad_norm": 0.3640398383140564, "learning_rate": 0.0001, "loss": 1.7508, "step": 2040 }, { "epoch": 0.4956289460903351, "grad_norm": 0.3670260012149811, "learning_rate": 0.0001, "loss": 1.7846, "step": 2041 }, { "epoch": 0.4958717824186498, "grad_norm": 0.35220327973365784, "learning_rate": 0.0001, "loss": 1.7981, "step": 2042 }, { "epoch": 0.49611461874696455, "grad_norm": 0.36040714383125305, "learning_rate": 0.0001, "loss": 1.756, "step": 2043 }, { "epoch": 0.4963574550752793, "grad_norm": 0.3941101133823395, "learning_rate": 0.0001, "loss": 1.7831, "step": 2044 }, { "epoch": 0.49660029140359396, "grad_norm": 0.37837153673171997, "learning_rate": 0.0001, "loss": 1.7308, "step": 2045 }, { "epoch": 0.4968431277319087, "grad_norm": 0.3587818145751953, "learning_rate": 0.0001, "loss": 1.6994, "step": 2046 }, { "epoch": 0.49708596406022343, "grad_norm": 0.33974432945251465, "learning_rate": 0.0001, "loss": 1.7277, "step": 2047 }, { "epoch": 0.4973288003885381, "grad_norm": 0.34887629747390747, "learning_rate": 0.0001, "loss": 1.6602, "step": 2048 }, { "epoch": 0.49757163671685284, "grad_norm": 0.348217636346817, "learning_rate": 0.0001, "loss": 1.7607, "step": 2049 }, { "epoch": 0.49781447304516757, "grad_norm": 0.38094449043273926, "learning_rate": 0.0001, "loss": 1.8629, "step": 2050 }, { "epoch": 0.49805730937348225, "grad_norm": 0.3522297739982605, "learning_rate": 0.0001, "loss": 1.8177, "step": 2051 }, { "epoch": 0.498300145701797, "grad_norm": 0.35744819045066833, "learning_rate": 0.0001, "loss": 1.637, "step": 2052 }, { "epoch": 0.4985429820301117, "grad_norm": 0.3388936221599579, "learning_rate": 0.0001, "loss": 1.5783, "step": 2053 }, { "epoch": 0.49878581835842645, "grad_norm": 0.35953521728515625, "learning_rate": 0.0001, "loss": 1.6525, "step": 2054 }, { "epoch": 0.4990286546867411, "grad_norm": 0.3640817105770111, "learning_rate": 0.0001, "loss": 1.676, "step": 2055 }, { "epoch": 0.49927149101505586, "grad_norm": 0.3549385666847229, "learning_rate": 0.0001, "loss": 1.6348, "step": 2056 }, { "epoch": 0.4995143273433706, "grad_norm": 0.36192211508750916, "learning_rate": 0.0001, "loss": 1.7436, "step": 2057 }, { "epoch": 0.49975716367168527, "grad_norm": 0.33218875527381897, "learning_rate": 0.0001, "loss": 1.7101, "step": 2058 }, { "epoch": 0.5, "grad_norm": 0.3624739944934845, "learning_rate": 0.0001, "loss": 1.7806, "step": 2059 }, { "epoch": 0.5002428363283147, "grad_norm": 0.33389827609062195, "learning_rate": 0.0001, "loss": 1.6401, "step": 2060 }, { "epoch": 0.5004856726566295, "grad_norm": 0.3476710915565491, "learning_rate": 0.0001, "loss": 1.64, "step": 2061 }, { "epoch": 0.5007285089849441, "grad_norm": 0.37466832995414734, "learning_rate": 0.0001, "loss": 1.7737, "step": 2062 }, { "epoch": 0.5009713453132588, "grad_norm": 0.34958529472351074, "learning_rate": 0.0001, "loss": 1.7622, "step": 2063 }, { "epoch": 0.5012141816415736, "grad_norm": 0.3609834611415863, "learning_rate": 0.0001, "loss": 1.6627, "step": 2064 }, { "epoch": 0.5014570179698883, "grad_norm": 0.3426356315612793, "learning_rate": 0.0001, "loss": 1.7813, "step": 2065 }, { "epoch": 0.501699854298203, "grad_norm": 0.3439556360244751, "learning_rate": 0.0001, "loss": 1.6216, "step": 2066 }, { "epoch": 0.5019426906265178, "grad_norm": 0.34030354022979736, "learning_rate": 0.0001, "loss": 1.7757, "step": 2067 }, { "epoch": 0.5021855269548324, "grad_norm": 0.3406347632408142, "learning_rate": 0.0001, "loss": 1.776, "step": 2068 }, { "epoch": 0.5024283632831471, "grad_norm": 0.3400167226791382, "learning_rate": 0.0001, "loss": 1.6481, "step": 2069 }, { "epoch": 0.5026711996114619, "grad_norm": 0.3473275899887085, "learning_rate": 0.0001, "loss": 1.7927, "step": 2070 }, { "epoch": 0.5029140359397766, "grad_norm": 0.40642112493515015, "learning_rate": 0.0001, "loss": 1.7355, "step": 2071 }, { "epoch": 0.5031568722680914, "grad_norm": 0.3894907832145691, "learning_rate": 0.0001, "loss": 1.7494, "step": 2072 }, { "epoch": 0.503399708596406, "grad_norm": 0.34641486406326294, "learning_rate": 0.0001, "loss": 1.6575, "step": 2073 }, { "epoch": 0.5036425449247207, "grad_norm": 0.34368327260017395, "learning_rate": 0.0001, "loss": 1.706, "step": 2074 }, { "epoch": 0.5038853812530355, "grad_norm": 0.3774399161338806, "learning_rate": 0.0001, "loss": 1.809, "step": 2075 }, { "epoch": 0.5041282175813502, "grad_norm": 0.3743821084499359, "learning_rate": 0.0001, "loss": 1.6534, "step": 2076 }, { "epoch": 0.5043710539096649, "grad_norm": 0.3586447834968567, "learning_rate": 0.0001, "loss": 1.7862, "step": 2077 }, { "epoch": 0.5046138902379796, "grad_norm": 0.3688397705554962, "learning_rate": 0.0001, "loss": 1.6918, "step": 2078 }, { "epoch": 0.5048567265662943, "grad_norm": 0.3526496887207031, "learning_rate": 0.0001, "loss": 1.7641, "step": 2079 }, { "epoch": 0.505099562894609, "grad_norm": 0.35157665610313416, "learning_rate": 0.0001, "loss": 1.6627, "step": 2080 }, { "epoch": 0.5053423992229238, "grad_norm": 0.3329943120479584, "learning_rate": 0.0001, "loss": 1.5457, "step": 2081 }, { "epoch": 0.5055852355512385, "grad_norm": 0.34889695048332214, "learning_rate": 0.0001, "loss": 1.683, "step": 2082 }, { "epoch": 0.5058280718795531, "grad_norm": 0.34723854064941406, "learning_rate": 0.0001, "loss": 1.7536, "step": 2083 }, { "epoch": 0.5060709082078679, "grad_norm": 0.36458468437194824, "learning_rate": 0.0001, "loss": 1.7602, "step": 2084 }, { "epoch": 0.5063137445361826, "grad_norm": 0.369058221578598, "learning_rate": 0.0001, "loss": 1.7215, "step": 2085 }, { "epoch": 0.5065565808644973, "grad_norm": 0.35088416934013367, "learning_rate": 0.0001, "loss": 1.7839, "step": 2086 }, { "epoch": 0.5067994171928121, "grad_norm": 0.389716774225235, "learning_rate": 0.0001, "loss": 1.7872, "step": 2087 }, { "epoch": 0.5070422535211268, "grad_norm": 0.36617782711982727, "learning_rate": 0.0001, "loss": 1.7071, "step": 2088 }, { "epoch": 0.5072850898494414, "grad_norm": 0.36376142501831055, "learning_rate": 0.0001, "loss": 1.6022, "step": 2089 }, { "epoch": 0.5075279261777562, "grad_norm": 0.36511048674583435, "learning_rate": 0.0001, "loss": 1.7629, "step": 2090 }, { "epoch": 0.5077707625060709, "grad_norm": 0.3856488764286041, "learning_rate": 0.0001, "loss": 1.7168, "step": 2091 }, { "epoch": 0.5080135988343856, "grad_norm": 0.37251293659210205, "learning_rate": 0.0001, "loss": 1.6808, "step": 2092 }, { "epoch": 0.5082564351627004, "grad_norm": 0.35156142711639404, "learning_rate": 0.0001, "loss": 1.7066, "step": 2093 }, { "epoch": 0.508499271491015, "grad_norm": 0.32686617970466614, "learning_rate": 0.0001, "loss": 1.6424, "step": 2094 }, { "epoch": 0.5087421078193298, "grad_norm": 0.34049227833747864, "learning_rate": 0.0001, "loss": 1.5752, "step": 2095 }, { "epoch": 0.5089849441476445, "grad_norm": 0.3699936866760254, "learning_rate": 0.0001, "loss": 1.7793, "step": 2096 }, { "epoch": 0.5092277804759592, "grad_norm": 0.3387681543827057, "learning_rate": 0.0001, "loss": 1.762, "step": 2097 }, { "epoch": 0.509470616804274, "grad_norm": 0.34820428490638733, "learning_rate": 0.0001, "loss": 1.6655, "step": 2098 }, { "epoch": 0.5097134531325886, "grad_norm": 0.3199908435344696, "learning_rate": 0.0001, "loss": 1.5278, "step": 2099 }, { "epoch": 0.5099562894609033, "grad_norm": 0.33102068305015564, "learning_rate": 0.0001, "loss": 1.4953, "step": 2100 }, { "epoch": 0.5101991257892181, "grad_norm": 0.351879358291626, "learning_rate": 0.0001, "loss": 1.6847, "step": 2101 }, { "epoch": 0.5104419621175328, "grad_norm": 0.3446686863899231, "learning_rate": 0.0001, "loss": 1.6902, "step": 2102 }, { "epoch": 0.5106847984458475, "grad_norm": 0.3876822888851166, "learning_rate": 0.0001, "loss": 1.8998, "step": 2103 }, { "epoch": 0.5109276347741623, "grad_norm": 0.3358386158943176, "learning_rate": 0.0001, "loss": 1.5755, "step": 2104 }, { "epoch": 0.5111704711024769, "grad_norm": 0.3508923053741455, "learning_rate": 0.0001, "loss": 1.7499, "step": 2105 }, { "epoch": 0.5114133074307916, "grad_norm": 0.3613468110561371, "learning_rate": 0.0001, "loss": 1.8817, "step": 2106 }, { "epoch": 0.5116561437591064, "grad_norm": 0.3340017795562744, "learning_rate": 0.0001, "loss": 1.5897, "step": 2107 }, { "epoch": 0.5118989800874211, "grad_norm": 0.3561114966869354, "learning_rate": 0.0001, "loss": 1.6141, "step": 2108 }, { "epoch": 0.5121418164157358, "grad_norm": 0.3637521266937256, "learning_rate": 0.0001, "loss": 1.6148, "step": 2109 }, { "epoch": 0.5123846527440505, "grad_norm": 0.3636873662471771, "learning_rate": 0.0001, "loss": 1.8358, "step": 2110 }, { "epoch": 0.5126274890723652, "grad_norm": 0.3566911220550537, "learning_rate": 0.0001, "loss": 1.7366, "step": 2111 }, { "epoch": 0.5128703254006799, "grad_norm": 0.33937013149261475, "learning_rate": 0.0001, "loss": 1.7309, "step": 2112 }, { "epoch": 0.5131131617289947, "grad_norm": 0.36434876918792725, "learning_rate": 0.0001, "loss": 1.7042, "step": 2113 }, { "epoch": 0.5133559980573094, "grad_norm": 0.35941821336746216, "learning_rate": 0.0001, "loss": 1.8072, "step": 2114 }, { "epoch": 0.513598834385624, "grad_norm": 0.36316072940826416, "learning_rate": 0.0001, "loss": 1.5349, "step": 2115 }, { "epoch": 0.5138416707139388, "grad_norm": 0.37612438201904297, "learning_rate": 0.0001, "loss": 1.8388, "step": 2116 }, { "epoch": 0.5140845070422535, "grad_norm": 0.3560262620449066, "learning_rate": 0.0001, "loss": 1.663, "step": 2117 }, { "epoch": 0.5143273433705683, "grad_norm": 0.37767353653907776, "learning_rate": 0.0001, "loss": 1.9991, "step": 2118 }, { "epoch": 0.514570179698883, "grad_norm": 0.33369192481040955, "learning_rate": 0.0001, "loss": 1.6447, "step": 2119 }, { "epoch": 0.5148130160271976, "grad_norm": 0.38418659567832947, "learning_rate": 0.0001, "loss": 1.8411, "step": 2120 }, { "epoch": 0.5150558523555124, "grad_norm": 0.3417730927467346, "learning_rate": 0.0001, "loss": 1.701, "step": 2121 }, { "epoch": 0.5152986886838271, "grad_norm": 0.3553977608680725, "learning_rate": 0.0001, "loss": 1.6914, "step": 2122 }, { "epoch": 0.5155415250121418, "grad_norm": 0.3727787435054779, "learning_rate": 0.0001, "loss": 1.8149, "step": 2123 }, { "epoch": 0.5157843613404566, "grad_norm": 0.38117727637290955, "learning_rate": 0.0001, "loss": 1.7707, "step": 2124 }, { "epoch": 0.5160271976687713, "grad_norm": 0.378630131483078, "learning_rate": 0.0001, "loss": 1.7856, "step": 2125 }, { "epoch": 0.5162700339970859, "grad_norm": 0.36176884174346924, "learning_rate": 0.0001, "loss": 1.8372, "step": 2126 }, { "epoch": 0.5165128703254007, "grad_norm": 0.3552527129650116, "learning_rate": 0.0001, "loss": 1.6077, "step": 2127 }, { "epoch": 0.5167557066537154, "grad_norm": 0.3917563855648041, "learning_rate": 0.0001, "loss": 1.825, "step": 2128 }, { "epoch": 0.5169985429820301, "grad_norm": 0.34401583671569824, "learning_rate": 0.0001, "loss": 1.7408, "step": 2129 }, { "epoch": 0.5172413793103449, "grad_norm": 0.36782148480415344, "learning_rate": 0.0001, "loss": 1.7808, "step": 2130 }, { "epoch": 0.5174842156386595, "grad_norm": 0.342189759016037, "learning_rate": 0.0001, "loss": 1.6103, "step": 2131 }, { "epoch": 0.5177270519669742, "grad_norm": 0.353523313999176, "learning_rate": 0.0001, "loss": 1.7307, "step": 2132 }, { "epoch": 0.517969888295289, "grad_norm": 0.3676711320877075, "learning_rate": 0.0001, "loss": 1.8342, "step": 2133 }, { "epoch": 0.5182127246236037, "grad_norm": 0.32856231927871704, "learning_rate": 0.0001, "loss": 1.6015, "step": 2134 }, { "epoch": 0.5184555609519184, "grad_norm": 0.35762903094291687, "learning_rate": 0.0001, "loss": 1.7695, "step": 2135 }, { "epoch": 0.5186983972802331, "grad_norm": 0.3561696708202362, "learning_rate": 0.0001, "loss": 1.8157, "step": 2136 }, { "epoch": 0.5189412336085478, "grad_norm": 0.343925416469574, "learning_rate": 0.0001, "loss": 1.7213, "step": 2137 }, { "epoch": 0.5191840699368625, "grad_norm": 0.356314480304718, "learning_rate": 0.0001, "loss": 1.7979, "step": 2138 }, { "epoch": 0.5194269062651773, "grad_norm": 0.37433797121047974, "learning_rate": 0.0001, "loss": 1.8121, "step": 2139 }, { "epoch": 0.519669742593492, "grad_norm": 0.34445253014564514, "learning_rate": 0.0001, "loss": 1.7456, "step": 2140 }, { "epoch": 0.5199125789218068, "grad_norm": 0.36527112126350403, "learning_rate": 0.0001, "loss": 1.7099, "step": 2141 }, { "epoch": 0.5201554152501214, "grad_norm": 0.3757280707359314, "learning_rate": 0.0001, "loss": 1.7387, "step": 2142 }, { "epoch": 0.5203982515784361, "grad_norm": 0.3530278503894806, "learning_rate": 0.0001, "loss": 1.7446, "step": 2143 }, { "epoch": 0.5206410879067509, "grad_norm": 0.36654311418533325, "learning_rate": 0.0001, "loss": 1.6952, "step": 2144 }, { "epoch": 0.5208839242350656, "grad_norm": 0.3734758198261261, "learning_rate": 0.0001, "loss": 1.6797, "step": 2145 }, { "epoch": 0.5211267605633803, "grad_norm": 0.3684309422969818, "learning_rate": 0.0001, "loss": 1.746, "step": 2146 }, { "epoch": 0.521369596891695, "grad_norm": 0.3862002491950989, "learning_rate": 0.0001, "loss": 1.8234, "step": 2147 }, { "epoch": 0.5216124332200097, "grad_norm": 0.3541664183139801, "learning_rate": 0.0001, "loss": 1.6616, "step": 2148 }, { "epoch": 0.5218552695483244, "grad_norm": 0.35986196994781494, "learning_rate": 0.0001, "loss": 1.711, "step": 2149 }, { "epoch": 0.5220981058766392, "grad_norm": 0.3609998822212219, "learning_rate": 0.0001, "loss": 1.8106, "step": 2150 }, { "epoch": 0.5223409422049539, "grad_norm": 0.3694157004356384, "learning_rate": 0.0001, "loss": 1.7587, "step": 2151 }, { "epoch": 0.5225837785332685, "grad_norm": 0.3328161835670471, "learning_rate": 0.0001, "loss": 1.5532, "step": 2152 }, { "epoch": 0.5228266148615833, "grad_norm": 0.32815298438072205, "learning_rate": 0.0001, "loss": 1.5474, "step": 2153 }, { "epoch": 0.523069451189898, "grad_norm": 0.37450891733169556, "learning_rate": 0.0001, "loss": 1.6402, "step": 2154 }, { "epoch": 0.5233122875182127, "grad_norm": 0.3439551591873169, "learning_rate": 0.0001, "loss": 1.7877, "step": 2155 }, { "epoch": 0.5235551238465275, "grad_norm": 0.3464338183403015, "learning_rate": 0.0001, "loss": 1.6436, "step": 2156 }, { "epoch": 0.5237979601748421, "grad_norm": 0.3607020378112793, "learning_rate": 0.0001, "loss": 1.7033, "step": 2157 }, { "epoch": 0.5240407965031568, "grad_norm": 0.36788424849510193, "learning_rate": 0.0001, "loss": 1.8079, "step": 2158 }, { "epoch": 0.5242836328314716, "grad_norm": 0.36867842078208923, "learning_rate": 0.0001, "loss": 1.7583, "step": 2159 }, { "epoch": 0.5245264691597863, "grad_norm": 0.34436655044555664, "learning_rate": 0.0001, "loss": 1.6348, "step": 2160 }, { "epoch": 0.524769305488101, "grad_norm": 0.33278247714042664, "learning_rate": 0.0001, "loss": 1.6667, "step": 2161 }, { "epoch": 0.5250121418164158, "grad_norm": 0.34308114647865295, "learning_rate": 0.0001, "loss": 1.6579, "step": 2162 }, { "epoch": 0.5252549781447304, "grad_norm": 0.33665505051612854, "learning_rate": 0.0001, "loss": 1.5245, "step": 2163 }, { "epoch": 0.5254978144730452, "grad_norm": 0.34041884541511536, "learning_rate": 0.0001, "loss": 1.6513, "step": 2164 }, { "epoch": 0.5257406508013599, "grad_norm": 0.6450101137161255, "learning_rate": 0.0001, "loss": 1.8578, "step": 2165 }, { "epoch": 0.5259834871296746, "grad_norm": 0.3492946922779083, "learning_rate": 0.0001, "loss": 1.6815, "step": 2166 }, { "epoch": 0.5262263234579894, "grad_norm": 0.39884358644485474, "learning_rate": 0.0001, "loss": 1.8944, "step": 2167 }, { "epoch": 0.526469159786304, "grad_norm": 0.3553820848464966, "learning_rate": 0.0001, "loss": 1.7083, "step": 2168 }, { "epoch": 0.5267119961146187, "grad_norm": 0.35157763957977295, "learning_rate": 0.0001, "loss": 1.7117, "step": 2169 }, { "epoch": 0.5269548324429335, "grad_norm": 0.33850955963134766, "learning_rate": 0.0001, "loss": 1.7726, "step": 2170 }, { "epoch": 0.5271976687712482, "grad_norm": 0.3849356472492218, "learning_rate": 0.0001, "loss": 1.8159, "step": 2171 }, { "epoch": 0.5274405050995629, "grad_norm": 0.3549228012561798, "learning_rate": 0.0001, "loss": 1.7322, "step": 2172 }, { "epoch": 0.5276833414278777, "grad_norm": 0.36918267607688904, "learning_rate": 0.0001, "loss": 1.8431, "step": 2173 }, { "epoch": 0.5279261777561923, "grad_norm": 0.3418583273887634, "learning_rate": 0.0001, "loss": 1.6985, "step": 2174 }, { "epoch": 0.528169014084507, "grad_norm": 0.34361299872398376, "learning_rate": 0.0001, "loss": 1.632, "step": 2175 }, { "epoch": 0.5284118504128218, "grad_norm": 0.35774165391921997, "learning_rate": 0.0001, "loss": 1.7134, "step": 2176 }, { "epoch": 0.5286546867411365, "grad_norm": 0.35581788420677185, "learning_rate": 0.0001, "loss": 1.7574, "step": 2177 }, { "epoch": 0.5288975230694511, "grad_norm": 0.3620051443576813, "learning_rate": 0.0001, "loss": 1.6363, "step": 2178 }, { "epoch": 0.5291403593977659, "grad_norm": 0.3930107355117798, "learning_rate": 0.0001, "loss": 1.9798, "step": 2179 }, { "epoch": 0.5293831957260806, "grad_norm": 0.37771743535995483, "learning_rate": 0.0001, "loss": 1.93, "step": 2180 }, { "epoch": 0.5296260320543953, "grad_norm": 0.3509754240512848, "learning_rate": 0.0001, "loss": 1.6358, "step": 2181 }, { "epoch": 0.5298688683827101, "grad_norm": 0.3814009428024292, "learning_rate": 0.0001, "loss": 1.8188, "step": 2182 }, { "epoch": 0.5301117047110248, "grad_norm": 0.34286820888519287, "learning_rate": 0.0001, "loss": 1.7615, "step": 2183 }, { "epoch": 0.5303545410393394, "grad_norm": 0.3434714674949646, "learning_rate": 0.0001, "loss": 1.8686, "step": 2184 }, { "epoch": 0.5305973773676542, "grad_norm": 0.35580405592918396, "learning_rate": 0.0001, "loss": 1.7198, "step": 2185 }, { "epoch": 0.5308402136959689, "grad_norm": 0.34779301285743713, "learning_rate": 0.0001, "loss": 1.5524, "step": 2186 }, { "epoch": 0.5310830500242837, "grad_norm": 0.37721124291419983, "learning_rate": 0.0001, "loss": 1.77, "step": 2187 }, { "epoch": 0.5313258863525984, "grad_norm": 0.32382553815841675, "learning_rate": 0.0001, "loss": 1.5097, "step": 2188 }, { "epoch": 0.531568722680913, "grad_norm": 0.3677857518196106, "learning_rate": 0.0001, "loss": 1.8567, "step": 2189 }, { "epoch": 0.5318115590092278, "grad_norm": 0.3528325855731964, "learning_rate": 0.0001, "loss": 1.781, "step": 2190 }, { "epoch": 0.5320543953375425, "grad_norm": 0.35266974568367004, "learning_rate": 0.0001, "loss": 1.6727, "step": 2191 }, { "epoch": 0.5322972316658572, "grad_norm": 0.38058826327323914, "learning_rate": 0.0001, "loss": 1.9637, "step": 2192 }, { "epoch": 0.532540067994172, "grad_norm": 0.37474575638771057, "learning_rate": 0.0001, "loss": 1.8031, "step": 2193 }, { "epoch": 0.5327829043224867, "grad_norm": 0.3883814215660095, "learning_rate": 0.0001, "loss": 1.8517, "step": 2194 }, { "epoch": 0.5330257406508013, "grad_norm": 0.37679925560951233, "learning_rate": 0.0001, "loss": 1.8224, "step": 2195 }, { "epoch": 0.5332685769791161, "grad_norm": 0.3672942817211151, "learning_rate": 0.0001, "loss": 1.9133, "step": 2196 }, { "epoch": 0.5335114133074308, "grad_norm": 0.35203585028648376, "learning_rate": 0.0001, "loss": 1.782, "step": 2197 }, { "epoch": 0.5337542496357455, "grad_norm": 0.3779953420162201, "learning_rate": 0.0001, "loss": 1.7499, "step": 2198 }, { "epoch": 0.5339970859640603, "grad_norm": 0.34961748123168945, "learning_rate": 0.0001, "loss": 1.8082, "step": 2199 }, { "epoch": 0.5342399222923749, "grad_norm": 0.36361628770828247, "learning_rate": 0.0001, "loss": 1.9355, "step": 2200 }, { "epoch": 0.5344827586206896, "grad_norm": 0.3708609342575073, "learning_rate": 0.0001, "loss": 1.7396, "step": 2201 }, { "epoch": 0.5347255949490044, "grad_norm": 0.3699905276298523, "learning_rate": 0.0001, "loss": 1.8169, "step": 2202 }, { "epoch": 0.5349684312773191, "grad_norm": 0.34191349148750305, "learning_rate": 0.0001, "loss": 1.6469, "step": 2203 }, { "epoch": 0.5352112676056338, "grad_norm": 0.34473779797554016, "learning_rate": 0.0001, "loss": 1.6362, "step": 2204 }, { "epoch": 0.5354541039339485, "grad_norm": 0.34325653314590454, "learning_rate": 0.0001, "loss": 1.623, "step": 2205 }, { "epoch": 0.5356969402622632, "grad_norm": 0.386184424161911, "learning_rate": 0.0001, "loss": 1.8688, "step": 2206 }, { "epoch": 0.5359397765905779, "grad_norm": 0.33374670147895813, "learning_rate": 0.0001, "loss": 1.4897, "step": 2207 }, { "epoch": 0.5361826129188927, "grad_norm": 0.3500770330429077, "learning_rate": 0.0001, "loss": 1.6829, "step": 2208 }, { "epoch": 0.5364254492472074, "grad_norm": 0.35503390431404114, "learning_rate": 0.0001, "loss": 1.7536, "step": 2209 }, { "epoch": 0.5366682855755222, "grad_norm": 0.3908369243144989, "learning_rate": 0.0001, "loss": 1.9048, "step": 2210 }, { "epoch": 0.5369111219038368, "grad_norm": 0.400844544172287, "learning_rate": 0.0001, "loss": 1.8024, "step": 2211 }, { "epoch": 0.5371539582321515, "grad_norm": 0.3470640480518341, "learning_rate": 0.0001, "loss": 1.7288, "step": 2212 }, { "epoch": 0.5373967945604663, "grad_norm": 0.34582623839378357, "learning_rate": 0.0001, "loss": 1.668, "step": 2213 }, { "epoch": 0.537639630888781, "grad_norm": 0.3589247763156891, "learning_rate": 0.0001, "loss": 1.8759, "step": 2214 }, { "epoch": 0.5378824672170957, "grad_norm": 0.313040167093277, "learning_rate": 0.0001, "loss": 1.5255, "step": 2215 }, { "epoch": 0.5381253035454104, "grad_norm": 0.36275044083595276, "learning_rate": 0.0001, "loss": 1.765, "step": 2216 }, { "epoch": 0.5383681398737251, "grad_norm": 0.35852962732315063, "learning_rate": 0.0001, "loss": 1.7484, "step": 2217 }, { "epoch": 0.5386109762020398, "grad_norm": 0.3503633737564087, "learning_rate": 0.0001, "loss": 1.6479, "step": 2218 }, { "epoch": 0.5388538125303546, "grad_norm": 0.34514978528022766, "learning_rate": 0.0001, "loss": 1.6507, "step": 2219 }, { "epoch": 0.5390966488586693, "grad_norm": 0.3434945344924927, "learning_rate": 0.0001, "loss": 1.5982, "step": 2220 }, { "epoch": 0.5393394851869839, "grad_norm": 0.3565481901168823, "learning_rate": 0.0001, "loss": 1.6312, "step": 2221 }, { "epoch": 0.5395823215152987, "grad_norm": 0.34751904010772705, "learning_rate": 0.0001, "loss": 1.5949, "step": 2222 }, { "epoch": 0.5398251578436134, "grad_norm": 0.36038702726364136, "learning_rate": 0.0001, "loss": 1.8388, "step": 2223 }, { "epoch": 0.5400679941719281, "grad_norm": 0.3723470866680145, "learning_rate": 0.0001, "loss": 1.7968, "step": 2224 }, { "epoch": 0.5403108305002429, "grad_norm": 0.35585710406303406, "learning_rate": 0.0001, "loss": 1.6106, "step": 2225 }, { "epoch": 0.5405536668285575, "grad_norm": 0.48469194769859314, "learning_rate": 0.0001, "loss": 1.5693, "step": 2226 }, { "epoch": 0.5407965031568722, "grad_norm": 0.37182337045669556, "learning_rate": 0.0001, "loss": 1.8367, "step": 2227 }, { "epoch": 0.541039339485187, "grad_norm": 0.3512868285179138, "learning_rate": 0.0001, "loss": 1.7117, "step": 2228 }, { "epoch": 0.5412821758135017, "grad_norm": 0.37100744247436523, "learning_rate": 0.0001, "loss": 1.6873, "step": 2229 }, { "epoch": 0.5415250121418164, "grad_norm": 0.3717704117298126, "learning_rate": 0.0001, "loss": 1.815, "step": 2230 }, { "epoch": 0.5417678484701312, "grad_norm": 0.3247489333152771, "learning_rate": 0.0001, "loss": 1.5791, "step": 2231 }, { "epoch": 0.5420106847984458, "grad_norm": 0.3410359025001526, "learning_rate": 0.0001, "loss": 1.7587, "step": 2232 }, { "epoch": 0.5422535211267606, "grad_norm": 0.3632831275463104, "learning_rate": 0.0001, "loss": 1.7161, "step": 2233 }, { "epoch": 0.5424963574550753, "grad_norm": 0.34458988904953003, "learning_rate": 0.0001, "loss": 1.8121, "step": 2234 }, { "epoch": 0.54273919378339, "grad_norm": 0.3627568185329437, "learning_rate": 0.0001, "loss": 1.729, "step": 2235 }, { "epoch": 0.5429820301117048, "grad_norm": 0.3488233983516693, "learning_rate": 0.0001, "loss": 1.8708, "step": 2236 }, { "epoch": 0.5432248664400194, "grad_norm": 0.37403398752212524, "learning_rate": 0.0001, "loss": 1.8789, "step": 2237 }, { "epoch": 0.5434677027683341, "grad_norm": 0.34006038308143616, "learning_rate": 0.0001, "loss": 1.6338, "step": 2238 }, { "epoch": 0.5437105390966489, "grad_norm": 0.3680495321750641, "learning_rate": 0.0001, "loss": 1.8293, "step": 2239 }, { "epoch": 0.5439533754249636, "grad_norm": 0.33197692036628723, "learning_rate": 0.0001, "loss": 1.6022, "step": 2240 }, { "epoch": 0.5441962117532783, "grad_norm": 0.3618168532848358, "learning_rate": 0.0001, "loss": 1.7324, "step": 2241 }, { "epoch": 0.544439048081593, "grad_norm": 0.357052206993103, "learning_rate": 0.0001, "loss": 1.7062, "step": 2242 }, { "epoch": 0.5446818844099077, "grad_norm": 0.346254825592041, "learning_rate": 0.0001, "loss": 1.6179, "step": 2243 }, { "epoch": 0.5449247207382224, "grad_norm": 0.37603044509887695, "learning_rate": 0.0001, "loss": 1.8327, "step": 2244 }, { "epoch": 0.5451675570665372, "grad_norm": 0.3336375951766968, "learning_rate": 0.0001, "loss": 1.6047, "step": 2245 }, { "epoch": 0.5454103933948519, "grad_norm": 0.354708194732666, "learning_rate": 0.0001, "loss": 1.7201, "step": 2246 }, { "epoch": 0.5456532297231665, "grad_norm": 0.35244420170783997, "learning_rate": 0.0001, "loss": 1.6719, "step": 2247 }, { "epoch": 0.5458960660514813, "grad_norm": 0.3440207839012146, "learning_rate": 0.0001, "loss": 1.5432, "step": 2248 }, { "epoch": 0.546138902379796, "grad_norm": 0.36056414246559143, "learning_rate": 0.0001, "loss": 1.8697, "step": 2249 }, { "epoch": 0.5463817387081107, "grad_norm": 0.32609304785728455, "learning_rate": 0.0001, "loss": 1.5132, "step": 2250 }, { "epoch": 0.5466245750364255, "grad_norm": 0.3519289493560791, "learning_rate": 0.0001, "loss": 1.4774, "step": 2251 }, { "epoch": 0.5468674113647402, "grad_norm": 0.35858359932899475, "learning_rate": 0.0001, "loss": 1.6757, "step": 2252 }, { "epoch": 0.5471102476930548, "grad_norm": 0.3339728116989136, "learning_rate": 0.0001, "loss": 1.5052, "step": 2253 }, { "epoch": 0.5473530840213696, "grad_norm": 0.36226099729537964, "learning_rate": 0.0001, "loss": 1.7865, "step": 2254 }, { "epoch": 0.5475959203496843, "grad_norm": 0.3570326864719391, "learning_rate": 0.0001, "loss": 1.7404, "step": 2255 }, { "epoch": 0.5478387566779991, "grad_norm": 0.376078724861145, "learning_rate": 0.0001, "loss": 1.8135, "step": 2256 }, { "epoch": 0.5480815930063138, "grad_norm": 0.3843255937099457, "learning_rate": 0.0001, "loss": 1.6103, "step": 2257 }, { "epoch": 0.5483244293346284, "grad_norm": 0.3383480906486511, "learning_rate": 0.0001, "loss": 1.6364, "step": 2258 }, { "epoch": 0.5485672656629432, "grad_norm": 0.36217546463012695, "learning_rate": 0.0001, "loss": 1.7194, "step": 2259 }, { "epoch": 0.5488101019912579, "grad_norm": 0.36972519755363464, "learning_rate": 0.0001, "loss": 1.7844, "step": 2260 }, { "epoch": 0.5490529383195726, "grad_norm": 0.3472967743873596, "learning_rate": 0.0001, "loss": 1.6209, "step": 2261 }, { "epoch": 0.5492957746478874, "grad_norm": 0.3597005009651184, "learning_rate": 0.0001, "loss": 1.6316, "step": 2262 }, { "epoch": 0.549538610976202, "grad_norm": 0.34558573365211487, "learning_rate": 0.0001, "loss": 1.6492, "step": 2263 }, { "epoch": 0.5497814473045167, "grad_norm": 0.3785961866378784, "learning_rate": 0.0001, "loss": 1.8447, "step": 2264 }, { "epoch": 0.5500242836328315, "grad_norm": 0.3740163743495941, "learning_rate": 0.0001, "loss": 1.7282, "step": 2265 }, { "epoch": 0.5502671199611462, "grad_norm": 0.37089261412620544, "learning_rate": 0.0001, "loss": 1.7876, "step": 2266 }, { "epoch": 0.5505099562894609, "grad_norm": 0.36666280031204224, "learning_rate": 0.0001, "loss": 1.7471, "step": 2267 }, { "epoch": 0.5507527926177757, "grad_norm": 0.36859360337257385, "learning_rate": 0.0001, "loss": 1.7612, "step": 2268 }, { "epoch": 0.5509956289460903, "grad_norm": 0.3185975253582001, "learning_rate": 0.0001, "loss": 1.5243, "step": 2269 }, { "epoch": 0.551238465274405, "grad_norm": 0.35294508934020996, "learning_rate": 0.0001, "loss": 1.7157, "step": 2270 }, { "epoch": 0.5514813016027198, "grad_norm": 0.3579111099243164, "learning_rate": 0.0001, "loss": 1.7531, "step": 2271 }, { "epoch": 0.5517241379310345, "grad_norm": 0.33986896276474, "learning_rate": 0.0001, "loss": 1.5978, "step": 2272 }, { "epoch": 0.5519669742593492, "grad_norm": 0.3544631600379944, "learning_rate": 0.0001, "loss": 1.7566, "step": 2273 }, { "epoch": 0.5522098105876639, "grad_norm": 0.3550451099872589, "learning_rate": 0.0001, "loss": 1.6645, "step": 2274 }, { "epoch": 0.5524526469159786, "grad_norm": 0.3410367965698242, "learning_rate": 0.0001, "loss": 1.7098, "step": 2275 }, { "epoch": 0.5526954832442933, "grad_norm": 0.37646061182022095, "learning_rate": 0.0001, "loss": 1.6585, "step": 2276 }, { "epoch": 0.5529383195726081, "grad_norm": 0.3732985854148865, "learning_rate": 0.0001, "loss": 1.7198, "step": 2277 }, { "epoch": 0.5531811559009228, "grad_norm": 0.3641611337661743, "learning_rate": 0.0001, "loss": 1.8386, "step": 2278 }, { "epoch": 0.5534239922292375, "grad_norm": 0.3658776879310608, "learning_rate": 0.0001, "loss": 1.7795, "step": 2279 }, { "epoch": 0.5536668285575522, "grad_norm": 0.37467020750045776, "learning_rate": 0.0001, "loss": 1.7865, "step": 2280 }, { "epoch": 0.5539096648858669, "grad_norm": 0.3677930533885956, "learning_rate": 0.0001, "loss": 1.7656, "step": 2281 }, { "epoch": 0.5541525012141817, "grad_norm": 0.3741012513637543, "learning_rate": 0.0001, "loss": 1.6091, "step": 2282 }, { "epoch": 0.5543953375424964, "grad_norm": 0.34404319524765015, "learning_rate": 0.0001, "loss": 1.6146, "step": 2283 }, { "epoch": 0.554638173870811, "grad_norm": 0.363204687833786, "learning_rate": 0.0001, "loss": 1.7769, "step": 2284 }, { "epoch": 0.5548810101991258, "grad_norm": 0.3678099811077118, "learning_rate": 0.0001, "loss": 1.7809, "step": 2285 }, { "epoch": 0.5551238465274405, "grad_norm": 0.3772222697734833, "learning_rate": 0.0001, "loss": 1.7406, "step": 2286 }, { "epoch": 0.5553666828557552, "grad_norm": 0.3508082330226898, "learning_rate": 0.0001, "loss": 1.772, "step": 2287 }, { "epoch": 0.55560951918407, "grad_norm": 0.35846176743507385, "learning_rate": 0.0001, "loss": 1.8131, "step": 2288 }, { "epoch": 0.5558523555123847, "grad_norm": 0.35966917872428894, "learning_rate": 0.0001, "loss": 1.677, "step": 2289 }, { "epoch": 0.5560951918406993, "grad_norm": 0.35907042026519775, "learning_rate": 0.0001, "loss": 1.7196, "step": 2290 }, { "epoch": 0.5563380281690141, "grad_norm": 0.36267662048339844, "learning_rate": 0.0001, "loss": 1.8167, "step": 2291 }, { "epoch": 0.5565808644973288, "grad_norm": 0.3560062050819397, "learning_rate": 0.0001, "loss": 1.7889, "step": 2292 }, { "epoch": 0.5568237008256435, "grad_norm": 0.3562415540218353, "learning_rate": 0.0001, "loss": 1.6888, "step": 2293 }, { "epoch": 0.5570665371539583, "grad_norm": 0.365655779838562, "learning_rate": 0.0001, "loss": 1.8064, "step": 2294 }, { "epoch": 0.5573093734822729, "grad_norm": 0.35320165753364563, "learning_rate": 0.0001, "loss": 1.7139, "step": 2295 }, { "epoch": 0.5575522098105876, "grad_norm": 0.39178183674812317, "learning_rate": 0.0001, "loss": 1.6381, "step": 2296 }, { "epoch": 0.5577950461389024, "grad_norm": 0.36920660734176636, "learning_rate": 0.0001, "loss": 1.81, "step": 2297 }, { "epoch": 0.5580378824672171, "grad_norm": 0.362719863653183, "learning_rate": 0.0001, "loss": 1.6541, "step": 2298 }, { "epoch": 0.5582807187955318, "grad_norm": 0.3680153489112854, "learning_rate": 0.0001, "loss": 1.9173, "step": 2299 }, { "epoch": 0.5585235551238465, "grad_norm": 0.37122082710266113, "learning_rate": 0.0001, "loss": 1.8223, "step": 2300 }, { "epoch": 0.5587663914521612, "grad_norm": 0.37359800934791565, "learning_rate": 0.0001, "loss": 1.7213, "step": 2301 }, { "epoch": 0.559009227780476, "grad_norm": 0.36098548769950867, "learning_rate": 0.0001, "loss": 1.7291, "step": 2302 }, { "epoch": 0.5592520641087907, "grad_norm": 0.3613625466823578, "learning_rate": 0.0001, "loss": 1.7256, "step": 2303 }, { "epoch": 0.5594949004371054, "grad_norm": 0.35531091690063477, "learning_rate": 0.0001, "loss": 1.7711, "step": 2304 }, { "epoch": 0.5597377367654202, "grad_norm": 0.37719884514808655, "learning_rate": 0.0001, "loss": 1.7064, "step": 2305 }, { "epoch": 0.5599805730937348, "grad_norm": 0.33986160159111023, "learning_rate": 0.0001, "loss": 1.6904, "step": 2306 }, { "epoch": 0.5602234094220495, "grad_norm": 0.3476993143558502, "learning_rate": 0.0001, "loss": 1.757, "step": 2307 }, { "epoch": 0.5604662457503643, "grad_norm": 0.3476622402667999, "learning_rate": 0.0001, "loss": 1.6747, "step": 2308 }, { "epoch": 0.560709082078679, "grad_norm": 0.33027929067611694, "learning_rate": 0.0001, "loss": 1.4717, "step": 2309 }, { "epoch": 0.5609519184069937, "grad_norm": 0.3475174307823181, "learning_rate": 0.0001, "loss": 1.7306, "step": 2310 }, { "epoch": 0.5611947547353084, "grad_norm": 0.3623664081096649, "learning_rate": 0.0001, "loss": 1.6337, "step": 2311 }, { "epoch": 0.5614375910636231, "grad_norm": 0.3583698272705078, "learning_rate": 0.0001, "loss": 1.7394, "step": 2312 }, { "epoch": 0.5616804273919378, "grad_norm": 0.3368013799190521, "learning_rate": 0.0001, "loss": 1.7616, "step": 2313 }, { "epoch": 0.5619232637202526, "grad_norm": 0.3758639395236969, "learning_rate": 0.0001, "loss": 1.6414, "step": 2314 }, { "epoch": 0.5621661000485673, "grad_norm": 0.3569642901420593, "learning_rate": 0.0001, "loss": 1.7582, "step": 2315 }, { "epoch": 0.5624089363768819, "grad_norm": 0.38806402683258057, "learning_rate": 0.0001, "loss": 1.9369, "step": 2316 }, { "epoch": 0.5626517727051967, "grad_norm": 0.3777295649051666, "learning_rate": 0.0001, "loss": 1.8661, "step": 2317 }, { "epoch": 0.5628946090335114, "grad_norm": 0.33787399530410767, "learning_rate": 0.0001, "loss": 1.6169, "step": 2318 }, { "epoch": 0.5631374453618261, "grad_norm": 0.3719368875026703, "learning_rate": 0.0001, "loss": 1.7745, "step": 2319 }, { "epoch": 0.5633802816901409, "grad_norm": 0.3340829610824585, "learning_rate": 0.0001, "loss": 1.5761, "step": 2320 }, { "epoch": 0.5636231180184555, "grad_norm": 0.3609652519226074, "learning_rate": 0.0001, "loss": 1.7269, "step": 2321 }, { "epoch": 0.5638659543467702, "grad_norm": 0.34608790278434753, "learning_rate": 0.0001, "loss": 1.5749, "step": 2322 }, { "epoch": 0.564108790675085, "grad_norm": 0.3639295697212219, "learning_rate": 0.0001, "loss": 1.7738, "step": 2323 }, { "epoch": 0.5643516270033997, "grad_norm": 0.3458191454410553, "learning_rate": 0.0001, "loss": 1.7874, "step": 2324 }, { "epoch": 0.5645944633317144, "grad_norm": 0.36620959639549255, "learning_rate": 0.0001, "loss": 1.7537, "step": 2325 }, { "epoch": 0.5648372996600292, "grad_norm": 0.3745834231376648, "learning_rate": 0.0001, "loss": 1.7993, "step": 2326 }, { "epoch": 0.5650801359883438, "grad_norm": 0.3734908401966095, "learning_rate": 0.0001, "loss": 1.6732, "step": 2327 }, { "epoch": 0.5653229723166586, "grad_norm": 0.35607555508613586, "learning_rate": 0.0001, "loss": 1.831, "step": 2328 }, { "epoch": 0.5655658086449733, "grad_norm": 0.3680264949798584, "learning_rate": 0.0001, "loss": 1.671, "step": 2329 }, { "epoch": 0.565808644973288, "grad_norm": 0.33606141805648804, "learning_rate": 0.0001, "loss": 1.4986, "step": 2330 }, { "epoch": 0.5660514813016028, "grad_norm": 0.3809346854686737, "learning_rate": 0.0001, "loss": 1.8307, "step": 2331 }, { "epoch": 0.5662943176299174, "grad_norm": 0.3508315682411194, "learning_rate": 0.0001, "loss": 1.6756, "step": 2332 }, { "epoch": 0.5665371539582321, "grad_norm": 0.34755823016166687, "learning_rate": 0.0001, "loss": 1.6317, "step": 2333 }, { "epoch": 0.5667799902865469, "grad_norm": 0.3529258966445923, "learning_rate": 0.0001, "loss": 1.6157, "step": 2334 }, { "epoch": 0.5670228266148616, "grad_norm": 0.3624809980392456, "learning_rate": 0.0001, "loss": 1.7348, "step": 2335 }, { "epoch": 0.5672656629431763, "grad_norm": 0.36164456605911255, "learning_rate": 0.0001, "loss": 1.7053, "step": 2336 }, { "epoch": 0.567508499271491, "grad_norm": 0.3604152500629425, "learning_rate": 0.0001, "loss": 1.7097, "step": 2337 }, { "epoch": 0.5677513355998057, "grad_norm": 0.3611051142215729, "learning_rate": 0.0001, "loss": 1.7406, "step": 2338 }, { "epoch": 0.5679941719281204, "grad_norm": 0.3456195294857025, "learning_rate": 0.0001, "loss": 1.7376, "step": 2339 }, { "epoch": 0.5682370082564352, "grad_norm": 0.34690234065055847, "learning_rate": 0.0001, "loss": 1.7485, "step": 2340 }, { "epoch": 0.5684798445847499, "grad_norm": 0.39296525716781616, "learning_rate": 0.0001, "loss": 1.7647, "step": 2341 }, { "epoch": 0.5687226809130645, "grad_norm": 0.36980095505714417, "learning_rate": 0.0001, "loss": 1.8579, "step": 2342 }, { "epoch": 0.5689655172413793, "grad_norm": 0.36739233136177063, "learning_rate": 0.0001, "loss": 1.7161, "step": 2343 }, { "epoch": 0.569208353569694, "grad_norm": 0.3408131003379822, "learning_rate": 0.0001, "loss": 1.5564, "step": 2344 }, { "epoch": 0.5694511898980087, "grad_norm": 0.3714952766895294, "learning_rate": 0.0001, "loss": 1.9206, "step": 2345 }, { "epoch": 0.5696940262263235, "grad_norm": 0.34982210397720337, "learning_rate": 0.0001, "loss": 1.5884, "step": 2346 }, { "epoch": 0.5699368625546382, "grad_norm": 0.35678350925445557, "learning_rate": 0.0001, "loss": 1.7792, "step": 2347 }, { "epoch": 0.5701796988829528, "grad_norm": 0.35591959953308105, "learning_rate": 0.0001, "loss": 1.7195, "step": 2348 }, { "epoch": 0.5704225352112676, "grad_norm": 0.3432258665561676, "learning_rate": 0.0001, "loss": 1.7125, "step": 2349 }, { "epoch": 0.5706653715395823, "grad_norm": 0.37227383255958557, "learning_rate": 0.0001, "loss": 1.7048, "step": 2350 }, { "epoch": 0.5709082078678971, "grad_norm": 0.34763336181640625, "learning_rate": 0.0001, "loss": 1.6562, "step": 2351 }, { "epoch": 0.5711510441962118, "grad_norm": 0.3854842185974121, "learning_rate": 0.0001, "loss": 1.5957, "step": 2352 }, { "epoch": 0.5713938805245264, "grad_norm": 0.364199697971344, "learning_rate": 0.0001, "loss": 1.7444, "step": 2353 }, { "epoch": 0.5716367168528412, "grad_norm": 0.35069018602371216, "learning_rate": 0.0001, "loss": 1.6804, "step": 2354 }, { "epoch": 0.5718795531811559, "grad_norm": 0.34375110268592834, "learning_rate": 0.0001, "loss": 1.6428, "step": 2355 }, { "epoch": 0.5721223895094706, "grad_norm": 0.3363782465457916, "learning_rate": 0.0001, "loss": 1.6488, "step": 2356 }, { "epoch": 0.5723652258377854, "grad_norm": 0.3884119987487793, "learning_rate": 0.0001, "loss": 1.7942, "step": 2357 }, { "epoch": 0.5726080621661, "grad_norm": 0.3595485985279083, "learning_rate": 0.0001, "loss": 1.7423, "step": 2358 }, { "epoch": 0.5728508984944147, "grad_norm": 0.3650807738304138, "learning_rate": 0.0001, "loss": 1.751, "step": 2359 }, { "epoch": 0.5730937348227295, "grad_norm": 0.3601040244102478, "learning_rate": 0.0001, "loss": 1.6485, "step": 2360 }, { "epoch": 0.5733365711510442, "grad_norm": 0.3422304689884186, "learning_rate": 0.0001, "loss": 1.6074, "step": 2361 }, { "epoch": 0.5735794074793589, "grad_norm": 0.35458236932754517, "learning_rate": 0.0001, "loss": 1.6944, "step": 2362 }, { "epoch": 0.5738222438076737, "grad_norm": 0.35751640796661377, "learning_rate": 0.0001, "loss": 1.7291, "step": 2363 }, { "epoch": 0.5740650801359883, "grad_norm": 0.34842342138290405, "learning_rate": 0.0001, "loss": 1.6863, "step": 2364 }, { "epoch": 0.574307916464303, "grad_norm": 0.352594256401062, "learning_rate": 0.0001, "loss": 1.7502, "step": 2365 }, { "epoch": 0.5745507527926178, "grad_norm": 0.36940956115722656, "learning_rate": 0.0001, "loss": 1.723, "step": 2366 }, { "epoch": 0.5747935891209325, "grad_norm": 0.34755411744117737, "learning_rate": 0.0001, "loss": 1.7728, "step": 2367 }, { "epoch": 0.5750364254492472, "grad_norm": 0.34088075160980225, "learning_rate": 0.0001, "loss": 1.6711, "step": 2368 }, { "epoch": 0.575279261777562, "grad_norm": 0.36846232414245605, "learning_rate": 0.0001, "loss": 1.695, "step": 2369 }, { "epoch": 0.5755220981058766, "grad_norm": 0.3828352391719818, "learning_rate": 0.0001, "loss": 1.8301, "step": 2370 }, { "epoch": 0.5757649344341913, "grad_norm": 0.3610253930091858, "learning_rate": 0.0001, "loss": 1.6749, "step": 2371 }, { "epoch": 0.5760077707625061, "grad_norm": 0.34908774495124817, "learning_rate": 0.0001, "loss": 1.7736, "step": 2372 }, { "epoch": 0.5762506070908208, "grad_norm": 0.3399523198604584, "learning_rate": 0.0001, "loss": 1.7515, "step": 2373 }, { "epoch": 0.5764934434191356, "grad_norm": 0.3800559341907501, "learning_rate": 0.0001, "loss": 1.828, "step": 2374 }, { "epoch": 0.5767362797474502, "grad_norm": 0.33364370465278625, "learning_rate": 0.0001, "loss": 1.6143, "step": 2375 }, { "epoch": 0.5769791160757649, "grad_norm": 0.3978668451309204, "learning_rate": 0.0001, "loss": 1.8787, "step": 2376 }, { "epoch": 0.5772219524040797, "grad_norm": 0.3593222498893738, "learning_rate": 0.0001, "loss": 1.734, "step": 2377 }, { "epoch": 0.5774647887323944, "grad_norm": 0.3668678104877472, "learning_rate": 0.0001, "loss": 1.7804, "step": 2378 }, { "epoch": 0.577707625060709, "grad_norm": 0.36095866560935974, "learning_rate": 0.0001, "loss": 1.6346, "step": 2379 }, { "epoch": 0.5779504613890238, "grad_norm": 0.33825570344924927, "learning_rate": 0.0001, "loss": 1.6229, "step": 2380 }, { "epoch": 0.5781932977173385, "grad_norm": 0.3309857249259949, "learning_rate": 0.0001, "loss": 1.4445, "step": 2381 }, { "epoch": 0.5784361340456532, "grad_norm": 0.3851270079612732, "learning_rate": 0.0001, "loss": 1.8312, "step": 2382 }, { "epoch": 0.578678970373968, "grad_norm": 0.3448592722415924, "learning_rate": 0.0001, "loss": 1.7032, "step": 2383 }, { "epoch": 0.5789218067022827, "grad_norm": 0.3687342405319214, "learning_rate": 0.0001, "loss": 1.7347, "step": 2384 }, { "epoch": 0.5791646430305973, "grad_norm": 0.3448163568973541, "learning_rate": 0.0001, "loss": 1.6357, "step": 2385 }, { "epoch": 0.5794074793589121, "grad_norm": 0.3667164742946625, "learning_rate": 0.0001, "loss": 1.7147, "step": 2386 }, { "epoch": 0.5796503156872268, "grad_norm": 0.3618122935295105, "learning_rate": 0.0001, "loss": 1.746, "step": 2387 }, { "epoch": 0.5798931520155415, "grad_norm": 0.3543204963207245, "learning_rate": 0.0001, "loss": 1.7053, "step": 2388 }, { "epoch": 0.5801359883438563, "grad_norm": 0.3682529032230377, "learning_rate": 0.0001, "loss": 1.7884, "step": 2389 }, { "epoch": 0.580378824672171, "grad_norm": 0.3413553237915039, "learning_rate": 0.0001, "loss": 1.619, "step": 2390 }, { "epoch": 0.5806216610004856, "grad_norm": 0.3471716642379761, "learning_rate": 0.0001, "loss": 1.6995, "step": 2391 }, { "epoch": 0.5808644973288004, "grad_norm": 0.34798210859298706, "learning_rate": 0.0001, "loss": 1.6615, "step": 2392 }, { "epoch": 0.5811073336571151, "grad_norm": 0.3592934012413025, "learning_rate": 0.0001, "loss": 1.603, "step": 2393 }, { "epoch": 0.5813501699854298, "grad_norm": 0.3641080856323242, "learning_rate": 0.0001, "loss": 1.8342, "step": 2394 }, { "epoch": 0.5815930063137446, "grad_norm": 0.3546813726425171, "learning_rate": 0.0001, "loss": 1.7738, "step": 2395 }, { "epoch": 0.5818358426420592, "grad_norm": 0.3424903452396393, "learning_rate": 0.0001, "loss": 1.6683, "step": 2396 }, { "epoch": 0.582078678970374, "grad_norm": 0.3579918444156647, "learning_rate": 0.0001, "loss": 1.6985, "step": 2397 }, { "epoch": 0.5823215152986887, "grad_norm": 0.35348811745643616, "learning_rate": 0.0001, "loss": 1.6904, "step": 2398 }, { "epoch": 0.5825643516270034, "grad_norm": 0.3623894155025482, "learning_rate": 0.0001, "loss": 1.7517, "step": 2399 }, { "epoch": 0.5828071879553182, "grad_norm": 0.36576738953590393, "learning_rate": 0.0001, "loss": 1.7106, "step": 2400 }, { "epoch": 0.5830500242836328, "grad_norm": 0.3525370657444, "learning_rate": 0.0001, "loss": 1.5246, "step": 2401 }, { "epoch": 0.5832928606119475, "grad_norm": 0.3752543330192566, "learning_rate": 0.0001, "loss": 1.758, "step": 2402 }, { "epoch": 0.5835356969402623, "grad_norm": 0.364324152469635, "learning_rate": 0.0001, "loss": 1.7315, "step": 2403 }, { "epoch": 0.583778533268577, "grad_norm": 0.3631533086299896, "learning_rate": 0.0001, "loss": 1.6388, "step": 2404 }, { "epoch": 0.5840213695968917, "grad_norm": 0.3875806927680969, "learning_rate": 0.0001, "loss": 1.9683, "step": 2405 }, { "epoch": 0.5842642059252064, "grad_norm": 0.35269179940223694, "learning_rate": 0.0001, "loss": 1.6662, "step": 2406 }, { "epoch": 0.5845070422535211, "grad_norm": 0.3396052122116089, "learning_rate": 0.0001, "loss": 1.6706, "step": 2407 }, { "epoch": 0.5847498785818358, "grad_norm": 0.3488730490207672, "learning_rate": 0.0001, "loss": 1.5859, "step": 2408 }, { "epoch": 0.5849927149101506, "grad_norm": 0.356308251619339, "learning_rate": 0.0001, "loss": 1.6471, "step": 2409 }, { "epoch": 0.5852355512384653, "grad_norm": 0.366447389125824, "learning_rate": 0.0001, "loss": 1.6691, "step": 2410 }, { "epoch": 0.58547838756678, "grad_norm": 0.35855424404144287, "learning_rate": 0.0001, "loss": 1.7121, "step": 2411 }, { "epoch": 0.5857212238950947, "grad_norm": 0.3608049154281616, "learning_rate": 0.0001, "loss": 1.6389, "step": 2412 }, { "epoch": 0.5859640602234094, "grad_norm": 0.3465087115764618, "learning_rate": 0.0001, "loss": 1.7342, "step": 2413 }, { "epoch": 0.5862068965517241, "grad_norm": 0.38191014528274536, "learning_rate": 0.0001, "loss": 1.7077, "step": 2414 }, { "epoch": 0.5864497328800389, "grad_norm": 0.36607179045677185, "learning_rate": 0.0001, "loss": 1.6006, "step": 2415 }, { "epoch": 0.5866925692083536, "grad_norm": 0.3555096983909607, "learning_rate": 0.0001, "loss": 1.6222, "step": 2416 }, { "epoch": 0.5869354055366682, "grad_norm": 0.36466822028160095, "learning_rate": 0.0001, "loss": 1.8408, "step": 2417 }, { "epoch": 0.587178241864983, "grad_norm": 0.36148613691329956, "learning_rate": 0.0001, "loss": 1.8149, "step": 2418 }, { "epoch": 0.5874210781932977, "grad_norm": 0.36628925800323486, "learning_rate": 0.0001, "loss": 1.7473, "step": 2419 }, { "epoch": 0.5876639145216125, "grad_norm": 0.32230299711227417, "learning_rate": 0.0001, "loss": 1.5559, "step": 2420 }, { "epoch": 0.5879067508499272, "grad_norm": 0.3491017818450928, "learning_rate": 0.0001, "loss": 1.6424, "step": 2421 }, { "epoch": 0.5881495871782418, "grad_norm": 0.3417803645133972, "learning_rate": 0.0001, "loss": 1.5939, "step": 2422 }, { "epoch": 0.5883924235065566, "grad_norm": 0.3433781862258911, "learning_rate": 0.0001, "loss": 1.6713, "step": 2423 }, { "epoch": 0.5886352598348713, "grad_norm": 0.35483452677726746, "learning_rate": 0.0001, "loss": 1.6194, "step": 2424 }, { "epoch": 0.588878096163186, "grad_norm": 0.3255854547023773, "learning_rate": 0.0001, "loss": 1.6343, "step": 2425 }, { "epoch": 0.5891209324915008, "grad_norm": 0.3660036325454712, "learning_rate": 0.0001, "loss": 1.7423, "step": 2426 }, { "epoch": 0.5893637688198154, "grad_norm": 0.36366453766822815, "learning_rate": 0.0001, "loss": 1.7891, "step": 2427 }, { "epoch": 0.5896066051481301, "grad_norm": 0.36128878593444824, "learning_rate": 0.0001, "loss": 1.7371, "step": 2428 }, { "epoch": 0.5898494414764449, "grad_norm": 0.3683059811592102, "learning_rate": 0.0001, "loss": 1.7501, "step": 2429 }, { "epoch": 0.5900922778047596, "grad_norm": 0.37489473819732666, "learning_rate": 0.0001, "loss": 1.7181, "step": 2430 }, { "epoch": 0.5903351141330743, "grad_norm": 0.34531599283218384, "learning_rate": 0.0001, "loss": 1.541, "step": 2431 }, { "epoch": 0.5905779504613891, "grad_norm": 0.37469252943992615, "learning_rate": 0.0001, "loss": 1.8827, "step": 2432 }, { "epoch": 0.5908207867897037, "grad_norm": 0.36275172233581543, "learning_rate": 0.0001, "loss": 1.7681, "step": 2433 }, { "epoch": 0.5910636231180184, "grad_norm": 0.35241881012916565, "learning_rate": 0.0001, "loss": 1.6363, "step": 2434 }, { "epoch": 0.5913064594463332, "grad_norm": 0.36884981393814087, "learning_rate": 0.0001, "loss": 1.6707, "step": 2435 }, { "epoch": 0.5915492957746479, "grad_norm": 0.353599488735199, "learning_rate": 0.0001, "loss": 1.7469, "step": 2436 }, { "epoch": 0.5917921321029626, "grad_norm": 0.3728369176387787, "learning_rate": 0.0001, "loss": 1.8387, "step": 2437 }, { "epoch": 0.5920349684312773, "grad_norm": 0.3472503125667572, "learning_rate": 0.0001, "loss": 1.6978, "step": 2438 }, { "epoch": 0.592277804759592, "grad_norm": 0.3670949339866638, "learning_rate": 0.0001, "loss": 1.7349, "step": 2439 }, { "epoch": 0.5925206410879067, "grad_norm": 0.3487583100795746, "learning_rate": 0.0001, "loss": 1.8164, "step": 2440 }, { "epoch": 0.5927634774162215, "grad_norm": 0.37606680393218994, "learning_rate": 0.0001, "loss": 1.7293, "step": 2441 }, { "epoch": 0.5930063137445362, "grad_norm": 0.3815697729587555, "learning_rate": 0.0001, "loss": 1.6828, "step": 2442 }, { "epoch": 0.593249150072851, "grad_norm": 0.35163789987564087, "learning_rate": 0.0001, "loss": 1.7363, "step": 2443 }, { "epoch": 0.5934919864011656, "grad_norm": 0.3514600396156311, "learning_rate": 0.0001, "loss": 1.7021, "step": 2444 }, { "epoch": 0.5937348227294803, "grad_norm": 0.3635815978050232, "learning_rate": 0.0001, "loss": 1.6672, "step": 2445 }, { "epoch": 0.5939776590577951, "grad_norm": 0.37155693769454956, "learning_rate": 0.0001, "loss": 1.6893, "step": 2446 }, { "epoch": 0.5942204953861098, "grad_norm": 0.3761202394962311, "learning_rate": 0.0001, "loss": 1.777, "step": 2447 }, { "epoch": 0.5944633317144244, "grad_norm": 0.3590002954006195, "learning_rate": 0.0001, "loss": 1.7537, "step": 2448 }, { "epoch": 0.5947061680427392, "grad_norm": 0.35956037044525146, "learning_rate": 0.0001, "loss": 1.6303, "step": 2449 }, { "epoch": 0.5949490043710539, "grad_norm": 0.345742791891098, "learning_rate": 0.0001, "loss": 1.8315, "step": 2450 }, { "epoch": 0.5951918406993686, "grad_norm": 0.35536545515060425, "learning_rate": 0.0001, "loss": 1.7572, "step": 2451 }, { "epoch": 0.5954346770276834, "grad_norm": 0.35663557052612305, "learning_rate": 0.0001, "loss": 1.8523, "step": 2452 }, { "epoch": 0.5956775133559981, "grad_norm": 0.341354101896286, "learning_rate": 0.0001, "loss": 1.7301, "step": 2453 }, { "epoch": 0.5959203496843127, "grad_norm": 0.3717452883720398, "learning_rate": 0.0001, "loss": 1.7591, "step": 2454 }, { "epoch": 0.5961631860126275, "grad_norm": 0.3513641357421875, "learning_rate": 0.0001, "loss": 1.565, "step": 2455 }, { "epoch": 0.5964060223409422, "grad_norm": 0.35649535059928894, "learning_rate": 0.0001, "loss": 1.6757, "step": 2456 }, { "epoch": 0.5966488586692569, "grad_norm": 0.375552237033844, "learning_rate": 0.0001, "loss": 1.7401, "step": 2457 }, { "epoch": 0.5968916949975717, "grad_norm": 0.3812007009983063, "learning_rate": 0.0001, "loss": 1.7399, "step": 2458 }, { "epoch": 0.5971345313258863, "grad_norm": 0.3755456805229187, "learning_rate": 0.0001, "loss": 1.7869, "step": 2459 }, { "epoch": 0.597377367654201, "grad_norm": 0.357443243265152, "learning_rate": 0.0001, "loss": 1.7365, "step": 2460 }, { "epoch": 0.5976202039825158, "grad_norm": 0.3731152415275574, "learning_rate": 0.0001, "loss": 1.677, "step": 2461 }, { "epoch": 0.5978630403108305, "grad_norm": 0.3395928144454956, "learning_rate": 0.0001, "loss": 1.4929, "step": 2462 }, { "epoch": 0.5981058766391452, "grad_norm": 0.36976638436317444, "learning_rate": 0.0001, "loss": 1.864, "step": 2463 }, { "epoch": 0.59834871296746, "grad_norm": 0.3605227470397949, "learning_rate": 0.0001, "loss": 1.733, "step": 2464 }, { "epoch": 0.5985915492957746, "grad_norm": 0.3721519112586975, "learning_rate": 0.0001, "loss": 1.5462, "step": 2465 }, { "epoch": 0.5988343856240894, "grad_norm": 0.3864923119544983, "learning_rate": 0.0001, "loss": 1.7985, "step": 2466 }, { "epoch": 0.5990772219524041, "grad_norm": 0.35037946701049805, "learning_rate": 0.0001, "loss": 1.8543, "step": 2467 }, { "epoch": 0.5993200582807188, "grad_norm": 0.3450808525085449, "learning_rate": 0.0001, "loss": 1.6291, "step": 2468 }, { "epoch": 0.5995628946090336, "grad_norm": 0.3674449920654297, "learning_rate": 0.0001, "loss": 1.7048, "step": 2469 }, { "epoch": 0.5998057309373482, "grad_norm": 0.3634595274925232, "learning_rate": 0.0001, "loss": 1.8068, "step": 2470 }, { "epoch": 0.6000485672656629, "grad_norm": 0.36641907691955566, "learning_rate": 0.0001, "loss": 1.7807, "step": 2471 }, { "epoch": 0.6002914035939777, "grad_norm": 0.3526882231235504, "learning_rate": 0.0001, "loss": 1.6572, "step": 2472 }, { "epoch": 0.6005342399222924, "grad_norm": 0.33994728326797485, "learning_rate": 0.0001, "loss": 1.6133, "step": 2473 }, { "epoch": 0.6007770762506071, "grad_norm": 0.35702425241470337, "learning_rate": 0.0001, "loss": 1.75, "step": 2474 }, { "epoch": 0.6010199125789218, "grad_norm": 0.3425130546092987, "learning_rate": 0.0001, "loss": 1.651, "step": 2475 }, { "epoch": 0.6012627489072365, "grad_norm": 0.37431252002716064, "learning_rate": 0.0001, "loss": 1.5988, "step": 2476 }, { "epoch": 0.6015055852355512, "grad_norm": 0.3774125874042511, "learning_rate": 0.0001, "loss": 1.8422, "step": 2477 }, { "epoch": 0.601748421563866, "grad_norm": 0.36197513341903687, "learning_rate": 0.0001, "loss": 1.7569, "step": 2478 }, { "epoch": 0.6019912578921807, "grad_norm": 0.37086841464042664, "learning_rate": 0.0001, "loss": 1.6805, "step": 2479 }, { "epoch": 0.6022340942204953, "grad_norm": 0.3471476137638092, "learning_rate": 0.0001, "loss": 1.7082, "step": 2480 }, { "epoch": 0.6024769305488101, "grad_norm": 0.34941521286964417, "learning_rate": 0.0001, "loss": 1.6494, "step": 2481 }, { "epoch": 0.6027197668771248, "grad_norm": 0.3552822768688202, "learning_rate": 0.0001, "loss": 1.6597, "step": 2482 }, { "epoch": 0.6029626032054395, "grad_norm": 0.3908756673336029, "learning_rate": 0.0001, "loss": 1.7776, "step": 2483 }, { "epoch": 0.6032054395337543, "grad_norm": 0.3638925850391388, "learning_rate": 0.0001, "loss": 1.6662, "step": 2484 }, { "epoch": 0.603448275862069, "grad_norm": 0.35275381803512573, "learning_rate": 0.0001, "loss": 1.6222, "step": 2485 }, { "epoch": 0.6036911121903836, "grad_norm": 0.34850969910621643, "learning_rate": 0.0001, "loss": 1.7403, "step": 2486 }, { "epoch": 0.6039339485186984, "grad_norm": 0.3468134105205536, "learning_rate": 0.0001, "loss": 1.7265, "step": 2487 }, { "epoch": 0.6041767848470131, "grad_norm": 0.36274734139442444, "learning_rate": 0.0001, "loss": 1.6525, "step": 2488 }, { "epoch": 0.6044196211753279, "grad_norm": 0.3470844328403473, "learning_rate": 0.0001, "loss": 1.699, "step": 2489 }, { "epoch": 0.6046624575036426, "grad_norm": 0.3809892535209656, "learning_rate": 0.0001, "loss": 1.8901, "step": 2490 }, { "epoch": 0.6049052938319572, "grad_norm": 0.35343703627586365, "learning_rate": 0.0001, "loss": 1.6325, "step": 2491 }, { "epoch": 0.605148130160272, "grad_norm": 0.345963716506958, "learning_rate": 0.0001, "loss": 1.7031, "step": 2492 }, { "epoch": 0.6053909664885867, "grad_norm": 0.37025344371795654, "learning_rate": 0.0001, "loss": 1.8492, "step": 2493 }, { "epoch": 0.6056338028169014, "grad_norm": 0.3486054539680481, "learning_rate": 0.0001, "loss": 1.6296, "step": 2494 }, { "epoch": 0.6058766391452162, "grad_norm": 0.34594064950942993, "learning_rate": 0.0001, "loss": 1.5821, "step": 2495 }, { "epoch": 0.6061194754735308, "grad_norm": 0.37135642766952515, "learning_rate": 0.0001, "loss": 1.7481, "step": 2496 }, { "epoch": 0.6063623118018455, "grad_norm": 0.3677951693534851, "learning_rate": 0.0001, "loss": 1.7435, "step": 2497 }, { "epoch": 0.6066051481301603, "grad_norm": 0.34797558188438416, "learning_rate": 0.0001, "loss": 1.6978, "step": 2498 }, { "epoch": 0.606847984458475, "grad_norm": 0.37647685408592224, "learning_rate": 0.0001, "loss": 1.7424, "step": 2499 }, { "epoch": 0.6070908207867897, "grad_norm": 0.35586169362068176, "learning_rate": 0.0001, "loss": 1.6601, "step": 2500 }, { "epoch": 0.6073336571151045, "grad_norm": 0.37027135491371155, "learning_rate": 0.0001, "loss": 1.7841, "step": 2501 }, { "epoch": 0.6075764934434191, "grad_norm": 0.3504107892513275, "learning_rate": 0.0001, "loss": 1.6539, "step": 2502 }, { "epoch": 0.6078193297717338, "grad_norm": 0.3658817708492279, "learning_rate": 0.0001, "loss": 1.6151, "step": 2503 }, { "epoch": 0.6080621661000486, "grad_norm": 0.3606242835521698, "learning_rate": 0.0001, "loss": 1.7228, "step": 2504 }, { "epoch": 0.6083050024283633, "grad_norm": 0.3518098294734955, "learning_rate": 0.0001, "loss": 1.7997, "step": 2505 }, { "epoch": 0.608547838756678, "grad_norm": 0.3385467827320099, "learning_rate": 0.0001, "loss": 1.6134, "step": 2506 }, { "epoch": 0.6087906750849927, "grad_norm": 0.35489702224731445, "learning_rate": 0.0001, "loss": 1.6014, "step": 2507 }, { "epoch": 0.6090335114133074, "grad_norm": 0.341196209192276, "learning_rate": 0.0001, "loss": 1.6125, "step": 2508 }, { "epoch": 0.6092763477416221, "grad_norm": 0.3764464557170868, "learning_rate": 0.0001, "loss": 1.7118, "step": 2509 }, { "epoch": 0.6095191840699369, "grad_norm": 0.3787265121936798, "learning_rate": 0.0001, "loss": 1.8545, "step": 2510 }, { "epoch": 0.6097620203982516, "grad_norm": 0.3660854399204254, "learning_rate": 0.0001, "loss": 1.7869, "step": 2511 }, { "epoch": 0.6100048567265663, "grad_norm": 0.32687124609947205, "learning_rate": 0.0001, "loss": 1.5415, "step": 2512 }, { "epoch": 0.610247693054881, "grad_norm": 0.3694194257259369, "learning_rate": 0.0001, "loss": 1.7856, "step": 2513 }, { "epoch": 0.6104905293831957, "grad_norm": 0.37333932518959045, "learning_rate": 0.0001, "loss": 1.6478, "step": 2514 }, { "epoch": 0.6107333657115105, "grad_norm": 0.363366961479187, "learning_rate": 0.0001, "loss": 1.8028, "step": 2515 }, { "epoch": 0.6109762020398252, "grad_norm": 0.3533819615840912, "learning_rate": 0.0001, "loss": 1.8264, "step": 2516 }, { "epoch": 0.6112190383681398, "grad_norm": 0.3544665575027466, "learning_rate": 0.0001, "loss": 1.6421, "step": 2517 }, { "epoch": 0.6114618746964546, "grad_norm": 0.358873575925827, "learning_rate": 0.0001, "loss": 1.5992, "step": 2518 }, { "epoch": 0.6117047110247693, "grad_norm": 0.3534395098686218, "learning_rate": 0.0001, "loss": 1.5811, "step": 2519 }, { "epoch": 0.611947547353084, "grad_norm": 0.3571525812149048, "learning_rate": 0.0001, "loss": 1.6819, "step": 2520 }, { "epoch": 0.6121903836813988, "grad_norm": 0.35596469044685364, "learning_rate": 0.0001, "loss": 1.7233, "step": 2521 }, { "epoch": 0.6124332200097135, "grad_norm": 0.36680924892425537, "learning_rate": 0.0001, "loss": 1.653, "step": 2522 }, { "epoch": 0.6126760563380281, "grad_norm": 0.3670637905597687, "learning_rate": 0.0001, "loss": 1.7633, "step": 2523 }, { "epoch": 0.6129188926663429, "grad_norm": 0.3535623550415039, "learning_rate": 0.0001, "loss": 1.7664, "step": 2524 }, { "epoch": 0.6131617289946576, "grad_norm": 0.36691170930862427, "learning_rate": 0.0001, "loss": 1.811, "step": 2525 }, { "epoch": 0.6134045653229723, "grad_norm": 0.3531697690486908, "learning_rate": 0.0001, "loss": 1.7234, "step": 2526 }, { "epoch": 0.6136474016512871, "grad_norm": 0.3551914691925049, "learning_rate": 0.0001, "loss": 1.6729, "step": 2527 }, { "epoch": 0.6138902379796017, "grad_norm": 0.36252561211586, "learning_rate": 0.0001, "loss": 1.7676, "step": 2528 }, { "epoch": 0.6141330743079164, "grad_norm": 0.3431050181388855, "learning_rate": 0.0001, "loss": 1.7429, "step": 2529 }, { "epoch": 0.6143759106362312, "grad_norm": 0.34209996461868286, "learning_rate": 0.0001, "loss": 1.6347, "step": 2530 }, { "epoch": 0.6146187469645459, "grad_norm": 0.34176865220069885, "learning_rate": 0.0001, "loss": 1.5931, "step": 2531 }, { "epoch": 0.6148615832928606, "grad_norm": 0.3619667887687683, "learning_rate": 0.0001, "loss": 1.6407, "step": 2532 }, { "epoch": 0.6151044196211753, "grad_norm": 0.3568732738494873, "learning_rate": 0.0001, "loss": 1.5506, "step": 2533 }, { "epoch": 0.61534725594949, "grad_norm": 0.3575170040130615, "learning_rate": 0.0001, "loss": 1.6636, "step": 2534 }, { "epoch": 0.6155900922778048, "grad_norm": 0.3435920774936676, "learning_rate": 0.0001, "loss": 1.6788, "step": 2535 }, { "epoch": 0.6158329286061195, "grad_norm": 0.3486897945404053, "learning_rate": 0.0001, "loss": 1.6519, "step": 2536 }, { "epoch": 0.6160757649344342, "grad_norm": 0.36233004927635193, "learning_rate": 0.0001, "loss": 1.7229, "step": 2537 }, { "epoch": 0.616318601262749, "grad_norm": 0.36419281363487244, "learning_rate": 0.0001, "loss": 1.7536, "step": 2538 }, { "epoch": 0.6165614375910636, "grad_norm": 0.36152634024620056, "learning_rate": 0.0001, "loss": 1.7739, "step": 2539 }, { "epoch": 0.6168042739193783, "grad_norm": 0.3492983877658844, "learning_rate": 0.0001, "loss": 1.6995, "step": 2540 }, { "epoch": 0.6170471102476931, "grad_norm": 0.35183224081993103, "learning_rate": 0.0001, "loss": 1.5924, "step": 2541 }, { "epoch": 0.6172899465760078, "grad_norm": 0.3499293029308319, "learning_rate": 0.0001, "loss": 1.6334, "step": 2542 }, { "epoch": 0.6175327829043225, "grad_norm": 0.36725538969039917, "learning_rate": 0.0001, "loss": 1.8583, "step": 2543 }, { "epoch": 0.6177756192326372, "grad_norm": 0.3790731430053711, "learning_rate": 0.0001, "loss": 1.729, "step": 2544 }, { "epoch": 0.6180184555609519, "grad_norm": 0.3718978464603424, "learning_rate": 0.0001, "loss": 1.7639, "step": 2545 }, { "epoch": 0.6182612918892666, "grad_norm": 0.3763367831707001, "learning_rate": 0.0001, "loss": 1.8659, "step": 2546 }, { "epoch": 0.6185041282175814, "grad_norm": 0.36928942799568176, "learning_rate": 0.0001, "loss": 1.8138, "step": 2547 }, { "epoch": 0.6187469645458961, "grad_norm": 0.36250531673431396, "learning_rate": 0.0001, "loss": 1.623, "step": 2548 }, { "epoch": 0.6189898008742107, "grad_norm": 0.36561158299446106, "learning_rate": 0.0001, "loss": 1.6913, "step": 2549 }, { "epoch": 0.6192326372025255, "grad_norm": 0.3352079689502716, "learning_rate": 0.0001, "loss": 1.5639, "step": 2550 }, { "epoch": 0.6194754735308402, "grad_norm": 0.3718976378440857, "learning_rate": 0.0001, "loss": 1.6785, "step": 2551 }, { "epoch": 0.6197183098591549, "grad_norm": 0.33391445875167847, "learning_rate": 0.0001, "loss": 1.4878, "step": 2552 }, { "epoch": 0.6199611461874697, "grad_norm": 0.34767550230026245, "learning_rate": 0.0001, "loss": 1.6781, "step": 2553 }, { "epoch": 0.6202039825157843, "grad_norm": 0.41571366786956787, "learning_rate": 0.0001, "loss": 2.0179, "step": 2554 }, { "epoch": 0.620446818844099, "grad_norm": 0.3613235354423523, "learning_rate": 0.0001, "loss": 1.8425, "step": 2555 }, { "epoch": 0.6206896551724138, "grad_norm": 0.35716602206230164, "learning_rate": 0.0001, "loss": 1.7749, "step": 2556 }, { "epoch": 0.6209324915007285, "grad_norm": 0.3657638728618622, "learning_rate": 0.0001, "loss": 1.7389, "step": 2557 }, { "epoch": 0.6211753278290433, "grad_norm": 0.354874849319458, "learning_rate": 0.0001, "loss": 1.7393, "step": 2558 }, { "epoch": 0.621418164157358, "grad_norm": 0.3623853325843811, "learning_rate": 0.0001, "loss": 1.6127, "step": 2559 }, { "epoch": 0.6216610004856726, "grad_norm": 0.3865921199321747, "learning_rate": 0.0001, "loss": 1.7271, "step": 2560 }, { "epoch": 0.6219038368139874, "grad_norm": 0.38097888231277466, "learning_rate": 0.0001, "loss": 1.8403, "step": 2561 }, { "epoch": 0.6221466731423021, "grad_norm": 0.34053757786750793, "learning_rate": 0.0001, "loss": 1.4744, "step": 2562 }, { "epoch": 0.6223895094706168, "grad_norm": 0.35708364844322205, "learning_rate": 0.0001, "loss": 1.6543, "step": 2563 }, { "epoch": 0.6226323457989316, "grad_norm": 0.3617381155490875, "learning_rate": 0.0001, "loss": 1.7614, "step": 2564 }, { "epoch": 0.6228751821272462, "grad_norm": 0.38946405053138733, "learning_rate": 0.0001, "loss": 1.6281, "step": 2565 }, { "epoch": 0.6231180184555609, "grad_norm": 0.3813820481300354, "learning_rate": 0.0001, "loss": 1.7442, "step": 2566 }, { "epoch": 0.6233608547838757, "grad_norm": 0.3859807848930359, "learning_rate": 0.0001, "loss": 1.88, "step": 2567 }, { "epoch": 0.6236036911121904, "grad_norm": 0.3598704934120178, "learning_rate": 0.0001, "loss": 1.6405, "step": 2568 }, { "epoch": 0.6238465274405051, "grad_norm": 0.37942245602607727, "learning_rate": 0.0001, "loss": 1.782, "step": 2569 }, { "epoch": 0.6240893637688198, "grad_norm": 0.34600645303726196, "learning_rate": 0.0001, "loss": 1.591, "step": 2570 }, { "epoch": 0.6243322000971345, "grad_norm": 0.35516923666000366, "learning_rate": 0.0001, "loss": 1.6701, "step": 2571 }, { "epoch": 0.6245750364254492, "grad_norm": 0.365143746137619, "learning_rate": 0.0001, "loss": 1.7584, "step": 2572 }, { "epoch": 0.624817872753764, "grad_norm": 0.3468589782714844, "learning_rate": 0.0001, "loss": 1.5642, "step": 2573 }, { "epoch": 0.6250607090820787, "grad_norm": 0.3397524952888489, "learning_rate": 0.0001, "loss": 1.7321, "step": 2574 }, { "epoch": 0.6253035454103933, "grad_norm": 0.3729221224784851, "learning_rate": 0.0001, "loss": 1.7544, "step": 2575 }, { "epoch": 0.6255463817387081, "grad_norm": 0.3533952236175537, "learning_rate": 0.0001, "loss": 1.7231, "step": 2576 }, { "epoch": 0.6257892180670228, "grad_norm": 0.38188955187797546, "learning_rate": 0.0001, "loss": 1.6541, "step": 2577 }, { "epoch": 0.6260320543953375, "grad_norm": 0.38412585854530334, "learning_rate": 0.0001, "loss": 1.7092, "step": 2578 }, { "epoch": 0.6262748907236523, "grad_norm": 0.40415629744529724, "learning_rate": 0.0001, "loss": 1.8571, "step": 2579 }, { "epoch": 0.626517727051967, "grad_norm": 0.36079704761505127, "learning_rate": 0.0001, "loss": 1.6119, "step": 2580 }, { "epoch": 0.6267605633802817, "grad_norm": 0.3780444264411926, "learning_rate": 0.0001, "loss": 1.7924, "step": 2581 }, { "epoch": 0.6270033997085964, "grad_norm": 0.3634578287601471, "learning_rate": 0.0001, "loss": 1.7201, "step": 2582 }, { "epoch": 0.6272462360369111, "grad_norm": 0.3554176688194275, "learning_rate": 0.0001, "loss": 1.6298, "step": 2583 }, { "epoch": 0.6274890723652259, "grad_norm": 0.34220534563064575, "learning_rate": 0.0001, "loss": 1.6022, "step": 2584 }, { "epoch": 0.6277319086935406, "grad_norm": 0.38494858145713806, "learning_rate": 0.0001, "loss": 1.742, "step": 2585 }, { "epoch": 0.6279747450218552, "grad_norm": 0.3729349970817566, "learning_rate": 0.0001, "loss": 1.7127, "step": 2586 }, { "epoch": 0.62821758135017, "grad_norm": 0.35282763838768005, "learning_rate": 0.0001, "loss": 1.6586, "step": 2587 }, { "epoch": 0.6284604176784847, "grad_norm": 0.3638066053390503, "learning_rate": 0.0001, "loss": 1.8, "step": 2588 }, { "epoch": 0.6287032540067994, "grad_norm": 0.36281734704971313, "learning_rate": 0.0001, "loss": 1.7697, "step": 2589 }, { "epoch": 0.6289460903351142, "grad_norm": 0.38954979181289673, "learning_rate": 0.0001, "loss": 1.8074, "step": 2590 }, { "epoch": 0.6291889266634288, "grad_norm": 0.3460639715194702, "learning_rate": 0.0001, "loss": 1.7445, "step": 2591 }, { "epoch": 0.6294317629917435, "grad_norm": 0.3687112033367157, "learning_rate": 0.0001, "loss": 1.7311, "step": 2592 }, { "epoch": 0.6296745993200583, "grad_norm": 0.3805738091468811, "learning_rate": 0.0001, "loss": 1.8862, "step": 2593 }, { "epoch": 0.629917435648373, "grad_norm": 0.346627801656723, "learning_rate": 0.0001, "loss": 1.6794, "step": 2594 }, { "epoch": 0.6301602719766877, "grad_norm": 0.34023693203926086, "learning_rate": 0.0001, "loss": 1.5122, "step": 2595 }, { "epoch": 0.6304031083050025, "grad_norm": 0.373219758272171, "learning_rate": 0.0001, "loss": 1.6898, "step": 2596 }, { "epoch": 0.6306459446333171, "grad_norm": 0.37325000762939453, "learning_rate": 0.0001, "loss": 1.789, "step": 2597 }, { "epoch": 0.6308887809616318, "grad_norm": 0.38243454694747925, "learning_rate": 0.0001, "loss": 1.8489, "step": 2598 }, { "epoch": 0.6311316172899466, "grad_norm": 0.35410985350608826, "learning_rate": 0.0001, "loss": 1.6322, "step": 2599 }, { "epoch": 0.6313744536182613, "grad_norm": 0.3585367798805237, "learning_rate": 0.0001, "loss": 1.805, "step": 2600 }, { "epoch": 0.631617289946576, "grad_norm": 0.37655138969421387, "learning_rate": 0.0001, "loss": 1.7564, "step": 2601 }, { "epoch": 0.6318601262748907, "grad_norm": 0.3572215139865875, "learning_rate": 0.0001, "loss": 1.7444, "step": 2602 }, { "epoch": 0.6321029626032054, "grad_norm": 0.39637553691864014, "learning_rate": 0.0001, "loss": 1.8615, "step": 2603 }, { "epoch": 0.6323457989315202, "grad_norm": 0.35991159081459045, "learning_rate": 0.0001, "loss": 1.7543, "step": 2604 }, { "epoch": 0.6325886352598349, "grad_norm": 0.32327163219451904, "learning_rate": 0.0001, "loss": 1.3716, "step": 2605 }, { "epoch": 0.6328314715881496, "grad_norm": 0.3595682978630066, "learning_rate": 0.0001, "loss": 1.8769, "step": 2606 }, { "epoch": 0.6330743079164644, "grad_norm": 0.3780916929244995, "learning_rate": 0.0001, "loss": 1.7702, "step": 2607 }, { "epoch": 0.633317144244779, "grad_norm": 0.3648173213005066, "learning_rate": 0.0001, "loss": 1.774, "step": 2608 }, { "epoch": 0.6335599805730937, "grad_norm": 0.3711946904659271, "learning_rate": 0.0001, "loss": 1.7383, "step": 2609 }, { "epoch": 0.6338028169014085, "grad_norm": 0.3568876385688782, "learning_rate": 0.0001, "loss": 1.6766, "step": 2610 }, { "epoch": 0.6340456532297232, "grad_norm": 0.3626377284526825, "learning_rate": 0.0001, "loss": 1.6791, "step": 2611 }, { "epoch": 0.6342884895580378, "grad_norm": 0.340934157371521, "learning_rate": 0.0001, "loss": 1.6389, "step": 2612 }, { "epoch": 0.6345313258863526, "grad_norm": 0.3646719455718994, "learning_rate": 0.0001, "loss": 1.8103, "step": 2613 }, { "epoch": 0.6347741622146673, "grad_norm": 0.36216479539871216, "learning_rate": 0.0001, "loss": 1.8449, "step": 2614 }, { "epoch": 0.635016998542982, "grad_norm": 0.36999836564064026, "learning_rate": 0.0001, "loss": 1.8061, "step": 2615 }, { "epoch": 0.6352598348712968, "grad_norm": 0.37942442297935486, "learning_rate": 0.0001, "loss": 1.7946, "step": 2616 }, { "epoch": 0.6355026711996115, "grad_norm": 0.3583141565322876, "learning_rate": 0.0001, "loss": 1.6131, "step": 2617 }, { "epoch": 0.6357455075279261, "grad_norm": 0.35164546966552734, "learning_rate": 0.0001, "loss": 1.771, "step": 2618 }, { "epoch": 0.6359883438562409, "grad_norm": 0.35949042439460754, "learning_rate": 0.0001, "loss": 1.6526, "step": 2619 }, { "epoch": 0.6362311801845556, "grad_norm": 0.38381242752075195, "learning_rate": 0.0001, "loss": 1.6743, "step": 2620 }, { "epoch": 0.6364740165128703, "grad_norm": 0.3700384497642517, "learning_rate": 0.0001, "loss": 1.721, "step": 2621 }, { "epoch": 0.6367168528411851, "grad_norm": 0.3850778043270111, "learning_rate": 0.0001, "loss": 1.9642, "step": 2622 }, { "epoch": 0.6369596891694997, "grad_norm": 0.3443865180015564, "learning_rate": 0.0001, "loss": 1.6247, "step": 2623 }, { "epoch": 0.6372025254978144, "grad_norm": 0.39186325669288635, "learning_rate": 0.0001, "loss": 1.8336, "step": 2624 }, { "epoch": 0.6374453618261292, "grad_norm": 0.404778391122818, "learning_rate": 0.0001, "loss": 1.622, "step": 2625 }, { "epoch": 0.6376881981544439, "grad_norm": 0.35440367460250854, "learning_rate": 0.0001, "loss": 1.8359, "step": 2626 }, { "epoch": 0.6379310344827587, "grad_norm": 0.387531042098999, "learning_rate": 0.0001, "loss": 1.7942, "step": 2627 }, { "epoch": 0.6381738708110734, "grad_norm": 0.35026273131370544, "learning_rate": 0.0001, "loss": 1.8036, "step": 2628 }, { "epoch": 0.638416707139388, "grad_norm": 0.35991358757019043, "learning_rate": 0.0001, "loss": 1.6788, "step": 2629 }, { "epoch": 0.6386595434677028, "grad_norm": 0.3719328045845032, "learning_rate": 0.0001, "loss": 1.7535, "step": 2630 }, { "epoch": 0.6389023797960175, "grad_norm": 0.38722363114356995, "learning_rate": 0.0001, "loss": 1.741, "step": 2631 }, { "epoch": 0.6391452161243322, "grad_norm": 0.3769434690475464, "learning_rate": 0.0001, "loss": 1.7619, "step": 2632 }, { "epoch": 0.639388052452647, "grad_norm": 0.3432542085647583, "learning_rate": 0.0001, "loss": 1.7678, "step": 2633 }, { "epoch": 0.6396308887809616, "grad_norm": 0.36289897561073303, "learning_rate": 0.0001, "loss": 1.5774, "step": 2634 }, { "epoch": 0.6398737251092763, "grad_norm": 0.37560367584228516, "learning_rate": 0.0001, "loss": 1.8036, "step": 2635 }, { "epoch": 0.6401165614375911, "grad_norm": 0.33592575788497925, "learning_rate": 0.0001, "loss": 1.6657, "step": 2636 }, { "epoch": 0.6403593977659058, "grad_norm": 0.34603649377822876, "learning_rate": 0.0001, "loss": 1.6675, "step": 2637 }, { "epoch": 0.6406022340942205, "grad_norm": 0.35474568605422974, "learning_rate": 0.0001, "loss": 1.5955, "step": 2638 }, { "epoch": 0.6408450704225352, "grad_norm": 0.37561899423599243, "learning_rate": 0.0001, "loss": 1.79, "step": 2639 }, { "epoch": 0.6410879067508499, "grad_norm": 0.3733697533607483, "learning_rate": 0.0001, "loss": 1.6762, "step": 2640 }, { "epoch": 0.6413307430791646, "grad_norm": 0.3769213557243347, "learning_rate": 0.0001, "loss": 1.74, "step": 2641 }, { "epoch": 0.6415735794074794, "grad_norm": 0.3656337857246399, "learning_rate": 0.0001, "loss": 1.7458, "step": 2642 }, { "epoch": 0.6418164157357941, "grad_norm": 0.358331561088562, "learning_rate": 0.0001, "loss": 1.7657, "step": 2643 }, { "epoch": 0.6420592520641087, "grad_norm": 0.36741209030151367, "learning_rate": 0.0001, "loss": 1.676, "step": 2644 }, { "epoch": 0.6423020883924235, "grad_norm": 0.35470911860466003, "learning_rate": 0.0001, "loss": 1.7777, "step": 2645 }, { "epoch": 0.6425449247207382, "grad_norm": 0.3638315200805664, "learning_rate": 0.0001, "loss": 1.8138, "step": 2646 }, { "epoch": 0.6427877610490529, "grad_norm": 0.36279577016830444, "learning_rate": 0.0001, "loss": 1.6799, "step": 2647 }, { "epoch": 0.6430305973773677, "grad_norm": 0.38609495759010315, "learning_rate": 0.0001, "loss": 1.8977, "step": 2648 }, { "epoch": 0.6432734337056824, "grad_norm": 0.379506379365921, "learning_rate": 0.0001, "loss": 1.7512, "step": 2649 }, { "epoch": 0.6435162700339971, "grad_norm": 0.3665315508842468, "learning_rate": 0.0001, "loss": 1.8016, "step": 2650 }, { "epoch": 0.6437591063623118, "grad_norm": 0.36043789982795715, "learning_rate": 0.0001, "loss": 1.7292, "step": 2651 }, { "epoch": 0.6440019426906265, "grad_norm": 0.3489297330379486, "learning_rate": 0.0001, "loss": 1.6868, "step": 2652 }, { "epoch": 0.6442447790189413, "grad_norm": 0.37208572030067444, "learning_rate": 0.0001, "loss": 1.8481, "step": 2653 }, { "epoch": 0.644487615347256, "grad_norm": 0.35702675580978394, "learning_rate": 0.0001, "loss": 1.7971, "step": 2654 }, { "epoch": 0.6447304516755706, "grad_norm": 0.36701202392578125, "learning_rate": 0.0001, "loss": 1.7149, "step": 2655 }, { "epoch": 0.6449732880038854, "grad_norm": 0.35397055745124817, "learning_rate": 0.0001, "loss": 1.615, "step": 2656 }, { "epoch": 0.6452161243322001, "grad_norm": 0.3622033894062042, "learning_rate": 0.0001, "loss": 1.7409, "step": 2657 }, { "epoch": 0.6454589606605148, "grad_norm": 0.35813143849372864, "learning_rate": 0.0001, "loss": 1.5353, "step": 2658 }, { "epoch": 0.6457017969888296, "grad_norm": 0.352824330329895, "learning_rate": 0.0001, "loss": 1.6359, "step": 2659 }, { "epoch": 0.6459446333171442, "grad_norm": 0.37167853116989136, "learning_rate": 0.0001, "loss": 1.7035, "step": 2660 }, { "epoch": 0.6461874696454589, "grad_norm": 0.360668808221817, "learning_rate": 0.0001, "loss": 1.7314, "step": 2661 }, { "epoch": 0.6464303059737737, "grad_norm": 0.36218956112861633, "learning_rate": 0.0001, "loss": 1.7743, "step": 2662 }, { "epoch": 0.6466731423020884, "grad_norm": 0.3747027814388275, "learning_rate": 0.0001, "loss": 1.7106, "step": 2663 }, { "epoch": 0.6469159786304031, "grad_norm": 0.33726468682289124, "learning_rate": 0.0001, "loss": 1.5821, "step": 2664 }, { "epoch": 0.6471588149587179, "grad_norm": 0.3692800998687744, "learning_rate": 0.0001, "loss": 1.7861, "step": 2665 }, { "epoch": 0.6474016512870325, "grad_norm": 0.35162967443466187, "learning_rate": 0.0001, "loss": 1.7523, "step": 2666 }, { "epoch": 0.6476444876153472, "grad_norm": 0.3360744118690491, "learning_rate": 0.0001, "loss": 1.6948, "step": 2667 }, { "epoch": 0.647887323943662, "grad_norm": 0.3539160192012787, "learning_rate": 0.0001, "loss": 1.6048, "step": 2668 }, { "epoch": 0.6481301602719767, "grad_norm": 0.3462900221347809, "learning_rate": 0.0001, "loss": 1.7686, "step": 2669 }, { "epoch": 0.6483729966002914, "grad_norm": 0.35002318024635315, "learning_rate": 0.0001, "loss": 1.7655, "step": 2670 }, { "epoch": 0.6486158329286061, "grad_norm": 0.3510165810585022, "learning_rate": 0.0001, "loss": 1.6869, "step": 2671 }, { "epoch": 0.6488586692569208, "grad_norm": 0.3706774115562439, "learning_rate": 0.0001, "loss": 1.8192, "step": 2672 }, { "epoch": 0.6491015055852356, "grad_norm": 0.35293927788734436, "learning_rate": 0.0001, "loss": 1.7139, "step": 2673 }, { "epoch": 0.6493443419135503, "grad_norm": 0.35285595059394836, "learning_rate": 0.0001, "loss": 1.7176, "step": 2674 }, { "epoch": 0.649587178241865, "grad_norm": 0.3747918903827667, "learning_rate": 0.0001, "loss": 1.8401, "step": 2675 }, { "epoch": 0.6498300145701797, "grad_norm": 0.36907392740249634, "learning_rate": 0.0001, "loss": 1.8169, "step": 2676 }, { "epoch": 0.6500728508984944, "grad_norm": 0.37845489382743835, "learning_rate": 0.0001, "loss": 1.832, "step": 2677 }, { "epoch": 0.6503156872268091, "grad_norm": 0.3632284998893738, "learning_rate": 0.0001, "loss": 1.7413, "step": 2678 }, { "epoch": 0.6505585235551239, "grad_norm": 0.36341753602027893, "learning_rate": 0.0001, "loss": 1.7788, "step": 2679 }, { "epoch": 0.6508013598834386, "grad_norm": 0.3553708791732788, "learning_rate": 0.0001, "loss": 1.6001, "step": 2680 }, { "epoch": 0.6510441962117532, "grad_norm": 0.3506467640399933, "learning_rate": 0.0001, "loss": 1.656, "step": 2681 }, { "epoch": 0.651287032540068, "grad_norm": 0.3974582850933075, "learning_rate": 0.0001, "loss": 1.958, "step": 2682 }, { "epoch": 0.6515298688683827, "grad_norm": 0.3517482876777649, "learning_rate": 0.0001, "loss": 1.6594, "step": 2683 }, { "epoch": 0.6517727051966974, "grad_norm": 0.3600420653820038, "learning_rate": 0.0001, "loss": 1.4849, "step": 2684 }, { "epoch": 0.6520155415250122, "grad_norm": 0.36825674772262573, "learning_rate": 0.0001, "loss": 1.8789, "step": 2685 }, { "epoch": 0.6522583778533269, "grad_norm": 0.37107062339782715, "learning_rate": 0.0001, "loss": 1.7713, "step": 2686 }, { "epoch": 0.6525012141816415, "grad_norm": 0.3648766875267029, "learning_rate": 0.0001, "loss": 1.7974, "step": 2687 }, { "epoch": 0.6527440505099563, "grad_norm": 0.34810009598731995, "learning_rate": 0.0001, "loss": 1.693, "step": 2688 }, { "epoch": 0.652986886838271, "grad_norm": 0.3525703549385071, "learning_rate": 0.0001, "loss": 1.641, "step": 2689 }, { "epoch": 0.6532297231665857, "grad_norm": 0.39204278588294983, "learning_rate": 0.0001, "loss": 1.8355, "step": 2690 }, { "epoch": 0.6534725594949005, "grad_norm": 0.3839750289916992, "learning_rate": 0.0001, "loss": 1.7873, "step": 2691 }, { "epoch": 0.6537153958232151, "grad_norm": 0.3619327247142792, "learning_rate": 0.0001, "loss": 1.7816, "step": 2692 }, { "epoch": 0.6539582321515298, "grad_norm": 0.34967416524887085, "learning_rate": 0.0001, "loss": 1.5844, "step": 2693 }, { "epoch": 0.6542010684798446, "grad_norm": 0.35875624418258667, "learning_rate": 0.0001, "loss": 1.6636, "step": 2694 }, { "epoch": 0.6544439048081593, "grad_norm": 0.35244202613830566, "learning_rate": 0.0001, "loss": 1.5934, "step": 2695 }, { "epoch": 0.6546867411364741, "grad_norm": 0.37988707423210144, "learning_rate": 0.0001, "loss": 1.6672, "step": 2696 }, { "epoch": 0.6549295774647887, "grad_norm": 0.3807011544704437, "learning_rate": 0.0001, "loss": 1.7633, "step": 2697 }, { "epoch": 0.6551724137931034, "grad_norm": 0.3748406171798706, "learning_rate": 0.0001, "loss": 1.7146, "step": 2698 }, { "epoch": 0.6554152501214182, "grad_norm": 0.3780806362628937, "learning_rate": 0.0001, "loss": 1.786, "step": 2699 }, { "epoch": 0.6556580864497329, "grad_norm": 0.37719249725341797, "learning_rate": 0.0001, "loss": 1.7855, "step": 2700 }, { "epoch": 0.6559009227780476, "grad_norm": 0.36691877245903015, "learning_rate": 0.0001, "loss": 1.6269, "step": 2701 }, { "epoch": 0.6561437591063624, "grad_norm": 0.3625624477863312, "learning_rate": 0.0001, "loss": 1.5904, "step": 2702 }, { "epoch": 0.656386595434677, "grad_norm": 0.3556610345840454, "learning_rate": 0.0001, "loss": 1.9223, "step": 2703 }, { "epoch": 0.6566294317629917, "grad_norm": 0.3541751801967621, "learning_rate": 0.0001, "loss": 1.7873, "step": 2704 }, { "epoch": 0.6568722680913065, "grad_norm": 0.39495357871055603, "learning_rate": 0.0001, "loss": 1.8065, "step": 2705 }, { "epoch": 0.6571151044196212, "grad_norm": 0.362796425819397, "learning_rate": 0.0001, "loss": 1.6427, "step": 2706 }, { "epoch": 0.6573579407479359, "grad_norm": 0.35276418924331665, "learning_rate": 0.0001, "loss": 1.7092, "step": 2707 }, { "epoch": 0.6576007770762506, "grad_norm": 0.36206701397895813, "learning_rate": 0.0001, "loss": 1.6966, "step": 2708 }, { "epoch": 0.6578436134045653, "grad_norm": 0.36278581619262695, "learning_rate": 0.0001, "loss": 1.7378, "step": 2709 }, { "epoch": 0.65808644973288, "grad_norm": 0.38123491406440735, "learning_rate": 0.0001, "loss": 1.7312, "step": 2710 }, { "epoch": 0.6583292860611948, "grad_norm": 0.3612028658390045, "learning_rate": 0.0001, "loss": 1.8957, "step": 2711 }, { "epoch": 0.6585721223895095, "grad_norm": 0.3690524101257324, "learning_rate": 0.0001, "loss": 1.712, "step": 2712 }, { "epoch": 0.6588149587178241, "grad_norm": 0.35725656151771545, "learning_rate": 0.0001, "loss": 1.5862, "step": 2713 }, { "epoch": 0.6590577950461389, "grad_norm": 0.3594600260257721, "learning_rate": 0.0001, "loss": 1.6163, "step": 2714 }, { "epoch": 0.6593006313744536, "grad_norm": 0.34838706254959106, "learning_rate": 0.0001, "loss": 1.6886, "step": 2715 }, { "epoch": 0.6595434677027683, "grad_norm": 0.34556108713150024, "learning_rate": 0.0001, "loss": 1.6638, "step": 2716 }, { "epoch": 0.6597863040310831, "grad_norm": 0.34152528643608093, "learning_rate": 0.0001, "loss": 1.6916, "step": 2717 }, { "epoch": 0.6600291403593977, "grad_norm": 0.36784660816192627, "learning_rate": 0.0001, "loss": 1.8092, "step": 2718 }, { "epoch": 0.6602719766877125, "grad_norm": 0.35600847005844116, "learning_rate": 0.0001, "loss": 1.7819, "step": 2719 }, { "epoch": 0.6605148130160272, "grad_norm": 0.36031457781791687, "learning_rate": 0.0001, "loss": 1.6415, "step": 2720 }, { "epoch": 0.6607576493443419, "grad_norm": 0.3358955681324005, "learning_rate": 0.0001, "loss": 1.6377, "step": 2721 }, { "epoch": 0.6610004856726567, "grad_norm": 0.39089998602867126, "learning_rate": 0.0001, "loss": 1.8941, "step": 2722 }, { "epoch": 0.6612433220009714, "grad_norm": 0.3684552311897278, "learning_rate": 0.0001, "loss": 1.5994, "step": 2723 }, { "epoch": 0.661486158329286, "grad_norm": 0.38114041090011597, "learning_rate": 0.0001, "loss": 1.7248, "step": 2724 }, { "epoch": 0.6617289946576008, "grad_norm": 0.3674573600292206, "learning_rate": 0.0001, "loss": 1.653, "step": 2725 }, { "epoch": 0.6619718309859155, "grad_norm": 0.35975441336631775, "learning_rate": 0.0001, "loss": 1.6908, "step": 2726 }, { "epoch": 0.6622146673142302, "grad_norm": 0.37502220273017883, "learning_rate": 0.0001, "loss": 1.8135, "step": 2727 }, { "epoch": 0.662457503642545, "grad_norm": 0.3751458525657654, "learning_rate": 0.0001, "loss": 1.4792, "step": 2728 }, { "epoch": 0.6627003399708596, "grad_norm": 0.376924991607666, "learning_rate": 0.0001, "loss": 1.7133, "step": 2729 }, { "epoch": 0.6629431762991743, "grad_norm": 0.3626424968242645, "learning_rate": 0.0001, "loss": 1.7239, "step": 2730 }, { "epoch": 0.6631860126274891, "grad_norm": 0.4036501348018646, "learning_rate": 0.0001, "loss": 1.7198, "step": 2731 }, { "epoch": 0.6634288489558038, "grad_norm": 0.38245660066604614, "learning_rate": 0.0001, "loss": 1.6446, "step": 2732 }, { "epoch": 0.6636716852841185, "grad_norm": 0.3560940623283386, "learning_rate": 0.0001, "loss": 1.6699, "step": 2733 }, { "epoch": 0.6639145216124333, "grad_norm": 0.37661460041999817, "learning_rate": 0.0001, "loss": 1.8264, "step": 2734 }, { "epoch": 0.6641573579407479, "grad_norm": 0.38734447956085205, "learning_rate": 0.0001, "loss": 1.6237, "step": 2735 }, { "epoch": 0.6644001942690626, "grad_norm": 0.37723541259765625, "learning_rate": 0.0001, "loss": 1.625, "step": 2736 }, { "epoch": 0.6646430305973774, "grad_norm": 0.358579158782959, "learning_rate": 0.0001, "loss": 1.8073, "step": 2737 }, { "epoch": 0.6648858669256921, "grad_norm": 0.3974575400352478, "learning_rate": 0.0001, "loss": 1.8406, "step": 2738 }, { "epoch": 0.6651287032540067, "grad_norm": 0.39381760358810425, "learning_rate": 0.0001, "loss": 1.7404, "step": 2739 }, { "epoch": 0.6653715395823215, "grad_norm": 0.38782989978790283, "learning_rate": 0.0001, "loss": 1.6999, "step": 2740 }, { "epoch": 0.6656143759106362, "grad_norm": 0.38447389006614685, "learning_rate": 0.0001, "loss": 1.8801, "step": 2741 }, { "epoch": 0.665857212238951, "grad_norm": 0.3697817921638489, "learning_rate": 0.0001, "loss": 1.7613, "step": 2742 }, { "epoch": 0.6661000485672657, "grad_norm": 0.3970728814601898, "learning_rate": 0.0001, "loss": 1.6759, "step": 2743 }, { "epoch": 0.6663428848955804, "grad_norm": 0.3800773620605469, "learning_rate": 0.0001, "loss": 1.6984, "step": 2744 }, { "epoch": 0.6665857212238951, "grad_norm": 0.35148799419403076, "learning_rate": 0.0001, "loss": 1.6721, "step": 2745 }, { "epoch": 0.6668285575522098, "grad_norm": 0.3488072156906128, "learning_rate": 0.0001, "loss": 1.8215, "step": 2746 }, { "epoch": 0.6670713938805245, "grad_norm": 0.39117431640625, "learning_rate": 0.0001, "loss": 1.6453, "step": 2747 }, { "epoch": 0.6673142302088393, "grad_norm": 0.3691113591194153, "learning_rate": 0.0001, "loss": 1.709, "step": 2748 }, { "epoch": 0.667557066537154, "grad_norm": 0.3842622935771942, "learning_rate": 0.0001, "loss": 1.837, "step": 2749 }, { "epoch": 0.6677999028654686, "grad_norm": 0.36402004957199097, "learning_rate": 0.0001, "loss": 1.7675, "step": 2750 }, { "epoch": 0.6680427391937834, "grad_norm": 0.35241395235061646, "learning_rate": 0.0001, "loss": 1.71, "step": 2751 }, { "epoch": 0.6682855755220981, "grad_norm": 0.35116317868232727, "learning_rate": 0.0001, "loss": 1.6839, "step": 2752 }, { "epoch": 0.6685284118504128, "grad_norm": 0.3499755561351776, "learning_rate": 0.0001, "loss": 1.761, "step": 2753 }, { "epoch": 0.6687712481787276, "grad_norm": 0.3414444625377655, "learning_rate": 0.0001, "loss": 1.6769, "step": 2754 }, { "epoch": 0.6690140845070423, "grad_norm": 0.3383598029613495, "learning_rate": 0.0001, "loss": 1.3583, "step": 2755 }, { "epoch": 0.6692569208353569, "grad_norm": 0.3479803204536438, "learning_rate": 0.0001, "loss": 1.778, "step": 2756 }, { "epoch": 0.6694997571636717, "grad_norm": 0.3396441340446472, "learning_rate": 0.0001, "loss": 1.6624, "step": 2757 }, { "epoch": 0.6697425934919864, "grad_norm": 0.36957108974456787, "learning_rate": 0.0001, "loss": 1.7515, "step": 2758 }, { "epoch": 0.6699854298203011, "grad_norm": 0.37735646963119507, "learning_rate": 0.0001, "loss": 1.8101, "step": 2759 }, { "epoch": 0.6702282661486159, "grad_norm": 0.37875017523765564, "learning_rate": 0.0001, "loss": 1.9074, "step": 2760 }, { "epoch": 0.6704711024769305, "grad_norm": 0.37791571021080017, "learning_rate": 0.0001, "loss": 1.9334, "step": 2761 }, { "epoch": 0.6707139388052452, "grad_norm": 0.36337870359420776, "learning_rate": 0.0001, "loss": 1.7473, "step": 2762 }, { "epoch": 0.67095677513356, "grad_norm": 0.4029197096824646, "learning_rate": 0.0001, "loss": 1.8366, "step": 2763 }, { "epoch": 0.6711996114618747, "grad_norm": 0.3762977421283722, "learning_rate": 0.0001, "loss": 1.6142, "step": 2764 }, { "epoch": 0.6714424477901895, "grad_norm": 0.3758258819580078, "learning_rate": 0.0001, "loss": 1.8017, "step": 2765 }, { "epoch": 0.6716852841185041, "grad_norm": 0.3770975172519684, "learning_rate": 0.0001, "loss": 1.8988, "step": 2766 }, { "epoch": 0.6719281204468188, "grad_norm": 0.35840946435928345, "learning_rate": 0.0001, "loss": 1.6796, "step": 2767 }, { "epoch": 0.6721709567751336, "grad_norm": 0.3695116937160492, "learning_rate": 0.0001, "loss": 1.6958, "step": 2768 }, { "epoch": 0.6724137931034483, "grad_norm": 0.37600207328796387, "learning_rate": 0.0001, "loss": 1.7066, "step": 2769 }, { "epoch": 0.672656629431763, "grad_norm": 0.3682120740413666, "learning_rate": 0.0001, "loss": 1.8241, "step": 2770 }, { "epoch": 0.6728994657600778, "grad_norm": 0.3645362854003906, "learning_rate": 0.0001, "loss": 1.5875, "step": 2771 }, { "epoch": 0.6731423020883924, "grad_norm": 0.39225202798843384, "learning_rate": 0.0001, "loss": 1.7588, "step": 2772 }, { "epoch": 0.6733851384167071, "grad_norm": 0.3571353852748871, "learning_rate": 0.0001, "loss": 1.6266, "step": 2773 }, { "epoch": 0.6736279747450219, "grad_norm": 0.3667353689670563, "learning_rate": 0.0001, "loss": 1.7951, "step": 2774 }, { "epoch": 0.6738708110733366, "grad_norm": 0.3817640542984009, "learning_rate": 0.0001, "loss": 1.8936, "step": 2775 }, { "epoch": 0.6741136474016513, "grad_norm": 0.35165515542030334, "learning_rate": 0.0001, "loss": 1.8528, "step": 2776 }, { "epoch": 0.674356483729966, "grad_norm": 0.38937145471572876, "learning_rate": 0.0001, "loss": 1.7106, "step": 2777 }, { "epoch": 0.6745993200582807, "grad_norm": 0.3663376271724701, "learning_rate": 0.0001, "loss": 1.7573, "step": 2778 }, { "epoch": 0.6748421563865954, "grad_norm": 0.3608282506465912, "learning_rate": 0.0001, "loss": 1.6956, "step": 2779 }, { "epoch": 0.6750849927149102, "grad_norm": 0.3559147119522095, "learning_rate": 0.0001, "loss": 1.6093, "step": 2780 }, { "epoch": 0.6753278290432249, "grad_norm": 0.3778630197048187, "learning_rate": 0.0001, "loss": 1.8678, "step": 2781 }, { "epoch": 0.6755706653715395, "grad_norm": 0.35783466696739197, "learning_rate": 0.0001, "loss": 1.6642, "step": 2782 }, { "epoch": 0.6758135016998543, "grad_norm": 0.3654789924621582, "learning_rate": 0.0001, "loss": 1.7328, "step": 2783 }, { "epoch": 0.676056338028169, "grad_norm": 0.35034000873565674, "learning_rate": 0.0001, "loss": 1.4726, "step": 2784 }, { "epoch": 0.6762991743564837, "grad_norm": 0.35254645347595215, "learning_rate": 0.0001, "loss": 1.6496, "step": 2785 }, { "epoch": 0.6765420106847985, "grad_norm": 0.35973024368286133, "learning_rate": 0.0001, "loss": 1.6284, "step": 2786 }, { "epoch": 0.6767848470131131, "grad_norm": 0.35698050260543823, "learning_rate": 0.0001, "loss": 1.7358, "step": 2787 }, { "epoch": 0.6770276833414279, "grad_norm": 0.38141682744026184, "learning_rate": 0.0001, "loss": 1.8136, "step": 2788 }, { "epoch": 0.6772705196697426, "grad_norm": 0.33951547741889954, "learning_rate": 0.0001, "loss": 1.6219, "step": 2789 }, { "epoch": 0.6775133559980573, "grad_norm": 0.3280647397041321, "learning_rate": 0.0001, "loss": 1.4926, "step": 2790 }, { "epoch": 0.6777561923263721, "grad_norm": 0.38123634457588196, "learning_rate": 0.0001, "loss": 1.7184, "step": 2791 }, { "epoch": 0.6779990286546868, "grad_norm": 0.4019968807697296, "learning_rate": 0.0001, "loss": 1.9837, "step": 2792 }, { "epoch": 0.6782418649830014, "grad_norm": 0.3574753701686859, "learning_rate": 0.0001, "loss": 1.584, "step": 2793 }, { "epoch": 0.6784847013113162, "grad_norm": 0.36737924814224243, "learning_rate": 0.0001, "loss": 1.5873, "step": 2794 }, { "epoch": 0.6787275376396309, "grad_norm": 0.3804500102996826, "learning_rate": 0.0001, "loss": 1.7311, "step": 2795 }, { "epoch": 0.6789703739679456, "grad_norm": 0.34975457191467285, "learning_rate": 0.0001, "loss": 1.5167, "step": 2796 }, { "epoch": 0.6792132102962604, "grad_norm": 0.3589155673980713, "learning_rate": 0.0001, "loss": 1.756, "step": 2797 }, { "epoch": 0.679456046624575, "grad_norm": 0.3515421152114868, "learning_rate": 0.0001, "loss": 1.7368, "step": 2798 }, { "epoch": 0.6796988829528897, "grad_norm": 0.3487219512462616, "learning_rate": 0.0001, "loss": 1.588, "step": 2799 }, { "epoch": 0.6799417192812045, "grad_norm": 0.3846356272697449, "learning_rate": 0.0001, "loss": 1.6109, "step": 2800 }, { "epoch": 0.6801845556095192, "grad_norm": 0.3570033609867096, "learning_rate": 0.0001, "loss": 1.6234, "step": 2801 }, { "epoch": 0.6804273919378339, "grad_norm": 0.3869325518608093, "learning_rate": 0.0001, "loss": 1.7455, "step": 2802 }, { "epoch": 0.6806702282661486, "grad_norm": 0.35647138953208923, "learning_rate": 0.0001, "loss": 1.6681, "step": 2803 }, { "epoch": 0.6809130645944633, "grad_norm": 0.3512788414955139, "learning_rate": 0.0001, "loss": 1.5214, "step": 2804 }, { "epoch": 0.681155900922778, "grad_norm": 0.38840922713279724, "learning_rate": 0.0001, "loss": 1.8565, "step": 2805 }, { "epoch": 0.6813987372510928, "grad_norm": 0.3611179292201996, "learning_rate": 0.0001, "loss": 1.6661, "step": 2806 }, { "epoch": 0.6816415735794075, "grad_norm": 0.3591524660587311, "learning_rate": 0.0001, "loss": 1.791, "step": 2807 }, { "epoch": 0.6818844099077221, "grad_norm": 0.38657450675964355, "learning_rate": 0.0001, "loss": 1.7421, "step": 2808 }, { "epoch": 0.6821272462360369, "grad_norm": 0.36568185687065125, "learning_rate": 0.0001, "loss": 1.732, "step": 2809 }, { "epoch": 0.6823700825643516, "grad_norm": 0.36642563343048096, "learning_rate": 0.0001, "loss": 1.7422, "step": 2810 }, { "epoch": 0.6826129188926664, "grad_norm": 0.39892640709877014, "learning_rate": 0.0001, "loss": 1.965, "step": 2811 }, { "epoch": 0.6828557552209811, "grad_norm": 0.39376381039619446, "learning_rate": 0.0001, "loss": 1.8844, "step": 2812 }, { "epoch": 0.6830985915492958, "grad_norm": 0.35928022861480713, "learning_rate": 0.0001, "loss": 1.6422, "step": 2813 }, { "epoch": 0.6833414278776105, "grad_norm": 0.3709716796875, "learning_rate": 0.0001, "loss": 1.721, "step": 2814 }, { "epoch": 0.6835842642059252, "grad_norm": 0.3744778037071228, "learning_rate": 0.0001, "loss": 1.8316, "step": 2815 }, { "epoch": 0.6838271005342399, "grad_norm": 0.3626949191093445, "learning_rate": 0.0001, "loss": 1.608, "step": 2816 }, { "epoch": 0.6840699368625547, "grad_norm": 0.37098953127861023, "learning_rate": 0.0001, "loss": 1.8472, "step": 2817 }, { "epoch": 0.6843127731908694, "grad_norm": 0.35896000266075134, "learning_rate": 0.0001, "loss": 1.8092, "step": 2818 }, { "epoch": 0.684555609519184, "grad_norm": 0.3691065013408661, "learning_rate": 0.0001, "loss": 1.792, "step": 2819 }, { "epoch": 0.6847984458474988, "grad_norm": 0.3581167161464691, "learning_rate": 0.0001, "loss": 1.6491, "step": 2820 }, { "epoch": 0.6850412821758135, "grad_norm": 0.3448159098625183, "learning_rate": 0.0001, "loss": 1.7149, "step": 2821 }, { "epoch": 0.6852841185041282, "grad_norm": 0.3719812035560608, "learning_rate": 0.0001, "loss": 1.7454, "step": 2822 }, { "epoch": 0.685526954832443, "grad_norm": 0.35453951358795166, "learning_rate": 0.0001, "loss": 1.7427, "step": 2823 }, { "epoch": 0.6857697911607576, "grad_norm": 0.3828970193862915, "learning_rate": 0.0001, "loss": 1.7638, "step": 2824 }, { "epoch": 0.6860126274890723, "grad_norm": 0.34769728779792786, "learning_rate": 0.0001, "loss": 1.6054, "step": 2825 }, { "epoch": 0.6862554638173871, "grad_norm": 0.3649182915687561, "learning_rate": 0.0001, "loss": 1.8765, "step": 2826 }, { "epoch": 0.6864983001457018, "grad_norm": 0.3718123137950897, "learning_rate": 0.0001, "loss": 1.8563, "step": 2827 }, { "epoch": 0.6867411364740165, "grad_norm": 0.38165369629859924, "learning_rate": 0.0001, "loss": 1.7033, "step": 2828 }, { "epoch": 0.6869839728023313, "grad_norm": 0.3523254990577698, "learning_rate": 0.0001, "loss": 1.6182, "step": 2829 }, { "epoch": 0.6872268091306459, "grad_norm": 0.3618701994419098, "learning_rate": 0.0001, "loss": 1.6489, "step": 2830 }, { "epoch": 0.6874696454589606, "grad_norm": 0.3542143404483795, "learning_rate": 0.0001, "loss": 1.7151, "step": 2831 }, { "epoch": 0.6877124817872754, "grad_norm": 0.3631667494773865, "learning_rate": 0.0001, "loss": 1.7198, "step": 2832 }, { "epoch": 0.6879553181155901, "grad_norm": 0.35690179467201233, "learning_rate": 0.0001, "loss": 1.7414, "step": 2833 }, { "epoch": 0.6881981544439048, "grad_norm": 0.3542751371860504, "learning_rate": 0.0001, "loss": 1.7052, "step": 2834 }, { "epoch": 0.6884409907722195, "grad_norm": 0.36255085468292236, "learning_rate": 0.0001, "loss": 1.7469, "step": 2835 }, { "epoch": 0.6886838271005342, "grad_norm": 0.3458736836910248, "learning_rate": 0.0001, "loss": 1.6074, "step": 2836 }, { "epoch": 0.688926663428849, "grad_norm": 0.35049203038215637, "learning_rate": 0.0001, "loss": 1.6416, "step": 2837 }, { "epoch": 0.6891694997571637, "grad_norm": 0.3697221875190735, "learning_rate": 0.0001, "loss": 1.7879, "step": 2838 }, { "epoch": 0.6894123360854784, "grad_norm": 0.34680137038230896, "learning_rate": 0.0001, "loss": 1.6623, "step": 2839 }, { "epoch": 0.6896551724137931, "grad_norm": 0.4107730984687805, "learning_rate": 0.0001, "loss": 1.7127, "step": 2840 }, { "epoch": 0.6898980087421078, "grad_norm": 0.36184608936309814, "learning_rate": 0.0001, "loss": 1.7176, "step": 2841 }, { "epoch": 0.6901408450704225, "grad_norm": 0.3733268082141876, "learning_rate": 0.0001, "loss": 1.7568, "step": 2842 }, { "epoch": 0.6903836813987373, "grad_norm": 0.3708178997039795, "learning_rate": 0.0001, "loss": 1.8397, "step": 2843 }, { "epoch": 0.690626517727052, "grad_norm": 0.34265971183776855, "learning_rate": 0.0001, "loss": 1.7555, "step": 2844 }, { "epoch": 0.6908693540553666, "grad_norm": 0.3513777256011963, "learning_rate": 0.0001, "loss": 1.6347, "step": 2845 }, { "epoch": 0.6911121903836814, "grad_norm": 0.3604748249053955, "learning_rate": 0.0001, "loss": 1.8085, "step": 2846 }, { "epoch": 0.6913550267119961, "grad_norm": 0.36842814087867737, "learning_rate": 0.0001, "loss": 1.7507, "step": 2847 }, { "epoch": 0.6915978630403108, "grad_norm": 0.3745664656162262, "learning_rate": 0.0001, "loss": 1.6002, "step": 2848 }, { "epoch": 0.6918406993686256, "grad_norm": 0.3670363128185272, "learning_rate": 0.0001, "loss": 1.714, "step": 2849 }, { "epoch": 0.6920835356969403, "grad_norm": 0.3517747223377228, "learning_rate": 0.0001, "loss": 1.6155, "step": 2850 }, { "epoch": 0.6923263720252549, "grad_norm": 0.3622724115848541, "learning_rate": 0.0001, "loss": 1.7528, "step": 2851 }, { "epoch": 0.6925692083535697, "grad_norm": 0.36922353506088257, "learning_rate": 0.0001, "loss": 1.6394, "step": 2852 }, { "epoch": 0.6928120446818844, "grad_norm": 0.3616962432861328, "learning_rate": 0.0001, "loss": 1.6272, "step": 2853 }, { "epoch": 0.6930548810101991, "grad_norm": 0.36178451776504517, "learning_rate": 0.0001, "loss": 1.6249, "step": 2854 }, { "epoch": 0.6932977173385139, "grad_norm": 0.3681275248527527, "learning_rate": 0.0001, "loss": 1.7381, "step": 2855 }, { "epoch": 0.6935405536668285, "grad_norm": 0.37139177322387695, "learning_rate": 0.0001, "loss": 1.6641, "step": 2856 }, { "epoch": 0.6937833899951432, "grad_norm": 0.4304312765598297, "learning_rate": 0.0001, "loss": 1.6573, "step": 2857 }, { "epoch": 0.694026226323458, "grad_norm": 0.35460364818573, "learning_rate": 0.0001, "loss": 1.6168, "step": 2858 }, { "epoch": 0.6942690626517727, "grad_norm": 0.35733842849731445, "learning_rate": 0.0001, "loss": 1.6385, "step": 2859 }, { "epoch": 0.6945118989800875, "grad_norm": 0.3486827611923218, "learning_rate": 0.0001, "loss": 1.6365, "step": 2860 }, { "epoch": 0.6947547353084021, "grad_norm": 0.3677719235420227, "learning_rate": 0.0001, "loss": 1.7202, "step": 2861 }, { "epoch": 0.6949975716367168, "grad_norm": 0.4203099310398102, "learning_rate": 0.0001, "loss": 1.806, "step": 2862 }, { "epoch": 0.6952404079650316, "grad_norm": 0.3546834886074066, "learning_rate": 0.0001, "loss": 1.6461, "step": 2863 }, { "epoch": 0.6954832442933463, "grad_norm": 0.34874799847602844, "learning_rate": 0.0001, "loss": 1.6211, "step": 2864 }, { "epoch": 0.695726080621661, "grad_norm": 0.3491641581058502, "learning_rate": 0.0001, "loss": 1.6389, "step": 2865 }, { "epoch": 0.6959689169499758, "grad_norm": 0.3777684271335602, "learning_rate": 0.0001, "loss": 1.6018, "step": 2866 }, { "epoch": 0.6962117532782904, "grad_norm": 0.35895413160324097, "learning_rate": 0.0001, "loss": 1.5895, "step": 2867 }, { "epoch": 0.6964545896066051, "grad_norm": 0.3357354402542114, "learning_rate": 0.0001, "loss": 1.6225, "step": 2868 }, { "epoch": 0.6966974259349199, "grad_norm": 0.36405596137046814, "learning_rate": 0.0001, "loss": 1.6561, "step": 2869 }, { "epoch": 0.6969402622632346, "grad_norm": 0.3359208405017853, "learning_rate": 0.0001, "loss": 1.6273, "step": 2870 }, { "epoch": 0.6971830985915493, "grad_norm": 0.333678662776947, "learning_rate": 0.0001, "loss": 1.544, "step": 2871 }, { "epoch": 0.697425934919864, "grad_norm": 0.35640749335289, "learning_rate": 0.0001, "loss": 1.6572, "step": 2872 }, { "epoch": 0.6976687712481787, "grad_norm": 0.34949982166290283, "learning_rate": 0.0001, "loss": 1.7163, "step": 2873 }, { "epoch": 0.6979116075764934, "grad_norm": 0.3794506788253784, "learning_rate": 0.0001, "loss": 1.8469, "step": 2874 }, { "epoch": 0.6981544439048082, "grad_norm": 0.36223942041397095, "learning_rate": 0.0001, "loss": 1.802, "step": 2875 }, { "epoch": 0.6983972802331229, "grad_norm": 0.3552629351615906, "learning_rate": 0.0001, "loss": 1.6599, "step": 2876 }, { "epoch": 0.6986401165614375, "grad_norm": 0.35770854353904724, "learning_rate": 0.0001, "loss": 1.7617, "step": 2877 }, { "epoch": 0.6988829528897523, "grad_norm": 0.35151875019073486, "learning_rate": 0.0001, "loss": 1.6911, "step": 2878 }, { "epoch": 0.699125789218067, "grad_norm": 0.3744480013847351, "learning_rate": 0.0001, "loss": 1.6648, "step": 2879 }, { "epoch": 0.6993686255463817, "grad_norm": 0.3655317723751068, "learning_rate": 0.0001, "loss": 1.7782, "step": 2880 }, { "epoch": 0.6996114618746965, "grad_norm": 0.3632819950580597, "learning_rate": 0.0001, "loss": 1.6503, "step": 2881 }, { "epoch": 0.6998542982030111, "grad_norm": 0.365756094455719, "learning_rate": 0.0001, "loss": 1.6464, "step": 2882 }, { "epoch": 0.7000971345313259, "grad_norm": 0.34989890456199646, "learning_rate": 0.0001, "loss": 1.6492, "step": 2883 }, { "epoch": 0.7003399708596406, "grad_norm": 0.35626035928726196, "learning_rate": 0.0001, "loss": 1.4924, "step": 2884 }, { "epoch": 0.7005828071879553, "grad_norm": 0.38774046301841736, "learning_rate": 0.0001, "loss": 1.7844, "step": 2885 }, { "epoch": 0.7008256435162701, "grad_norm": 0.3692142963409424, "learning_rate": 0.0001, "loss": 1.7175, "step": 2886 }, { "epoch": 0.7010684798445848, "grad_norm": 0.33797192573547363, "learning_rate": 0.0001, "loss": 1.6566, "step": 2887 }, { "epoch": 0.7013113161728994, "grad_norm": 0.3465418219566345, "learning_rate": 0.0001, "loss": 1.595, "step": 2888 }, { "epoch": 0.7015541525012142, "grad_norm": 0.353791743516922, "learning_rate": 0.0001, "loss": 1.8116, "step": 2889 }, { "epoch": 0.7017969888295289, "grad_norm": 0.3460966944694519, "learning_rate": 0.0001, "loss": 1.6999, "step": 2890 }, { "epoch": 0.7020398251578436, "grad_norm": 0.35177546739578247, "learning_rate": 0.0001, "loss": 1.7354, "step": 2891 }, { "epoch": 0.7022826614861584, "grad_norm": 0.3689141869544983, "learning_rate": 0.0001, "loss": 1.704, "step": 2892 }, { "epoch": 0.702525497814473, "grad_norm": 0.35670557618141174, "learning_rate": 0.0001, "loss": 1.6332, "step": 2893 }, { "epoch": 0.7027683341427877, "grad_norm": 0.32746395468711853, "learning_rate": 0.0001, "loss": 1.3454, "step": 2894 }, { "epoch": 0.7030111704711025, "grad_norm": 0.351516455411911, "learning_rate": 0.0001, "loss": 1.7503, "step": 2895 }, { "epoch": 0.7032540067994172, "grad_norm": 0.3373326361179352, "learning_rate": 0.0001, "loss": 1.5746, "step": 2896 }, { "epoch": 0.7034968431277319, "grad_norm": 0.36539536714553833, "learning_rate": 0.0001, "loss": 1.6384, "step": 2897 }, { "epoch": 0.7037396794560467, "grad_norm": 0.6192057728767395, "learning_rate": 0.0001, "loss": 1.7346, "step": 2898 }, { "epoch": 0.7039825157843613, "grad_norm": 0.3435933291912079, "learning_rate": 0.0001, "loss": 1.6926, "step": 2899 }, { "epoch": 0.704225352112676, "grad_norm": 0.3592207729816437, "learning_rate": 0.0001, "loss": 1.686, "step": 2900 }, { "epoch": 0.7044681884409908, "grad_norm": 0.33544716238975525, "learning_rate": 0.0001, "loss": 1.6719, "step": 2901 }, { "epoch": 0.7047110247693055, "grad_norm": 0.379833847284317, "learning_rate": 0.0001, "loss": 1.7507, "step": 2902 }, { "epoch": 0.7049538610976201, "grad_norm": 0.3567056655883789, "learning_rate": 0.0001, "loss": 1.5885, "step": 2903 }, { "epoch": 0.7051966974259349, "grad_norm": 0.36313581466674805, "learning_rate": 0.0001, "loss": 1.773, "step": 2904 }, { "epoch": 0.7054395337542496, "grad_norm": 0.35985293984413147, "learning_rate": 0.0001, "loss": 1.8007, "step": 2905 }, { "epoch": 0.7056823700825644, "grad_norm": 0.3519170582294464, "learning_rate": 0.0001, "loss": 1.7713, "step": 2906 }, { "epoch": 0.7059252064108791, "grad_norm": 0.35734522342681885, "learning_rate": 0.0001, "loss": 1.5381, "step": 2907 }, { "epoch": 0.7061680427391938, "grad_norm": 0.38351139426231384, "learning_rate": 0.0001, "loss": 1.621, "step": 2908 }, { "epoch": 0.7064108790675085, "grad_norm": 0.39112213253974915, "learning_rate": 0.0001, "loss": 1.8291, "step": 2909 }, { "epoch": 0.7066537153958232, "grad_norm": 0.3656308948993683, "learning_rate": 0.0001, "loss": 1.8392, "step": 2910 }, { "epoch": 0.7068965517241379, "grad_norm": 0.3724428713321686, "learning_rate": 0.0001, "loss": 1.8628, "step": 2911 }, { "epoch": 0.7071393880524527, "grad_norm": 0.38988369703292847, "learning_rate": 0.0001, "loss": 1.7934, "step": 2912 }, { "epoch": 0.7073822243807674, "grad_norm": 0.34413278102874756, "learning_rate": 0.0001, "loss": 1.6022, "step": 2913 }, { "epoch": 0.707625060709082, "grad_norm": 0.3609641492366791, "learning_rate": 0.0001, "loss": 1.7286, "step": 2914 }, { "epoch": 0.7078678970373968, "grad_norm": 0.3715507984161377, "learning_rate": 0.0001, "loss": 1.8692, "step": 2915 }, { "epoch": 0.7081107333657115, "grad_norm": 0.3761596083641052, "learning_rate": 0.0001, "loss": 1.7286, "step": 2916 }, { "epoch": 0.7083535696940262, "grad_norm": 0.35243546962738037, "learning_rate": 0.0001, "loss": 1.7567, "step": 2917 }, { "epoch": 0.708596406022341, "grad_norm": 0.36848515272140503, "learning_rate": 0.0001, "loss": 1.7247, "step": 2918 }, { "epoch": 0.7088392423506557, "grad_norm": 0.36660486459732056, "learning_rate": 0.0001, "loss": 1.9038, "step": 2919 }, { "epoch": 0.7090820786789703, "grad_norm": 0.3525727093219757, "learning_rate": 0.0001, "loss": 1.7261, "step": 2920 }, { "epoch": 0.7093249150072851, "grad_norm": 0.402280330657959, "learning_rate": 0.0001, "loss": 1.8771, "step": 2921 }, { "epoch": 0.7095677513355998, "grad_norm": 0.3789054751396179, "learning_rate": 0.0001, "loss": 1.7986, "step": 2922 }, { "epoch": 0.7098105876639145, "grad_norm": 0.36707112193107605, "learning_rate": 0.0001, "loss": 1.7941, "step": 2923 }, { "epoch": 0.7100534239922293, "grad_norm": 0.35683396458625793, "learning_rate": 0.0001, "loss": 1.7371, "step": 2924 }, { "epoch": 0.7102962603205439, "grad_norm": 0.37030884623527527, "learning_rate": 0.0001, "loss": 1.6644, "step": 2925 }, { "epoch": 0.7105390966488586, "grad_norm": 0.38591259717941284, "learning_rate": 0.0001, "loss": 1.7018, "step": 2926 }, { "epoch": 0.7107819329771734, "grad_norm": 0.3896026611328125, "learning_rate": 0.0001, "loss": 1.7778, "step": 2927 }, { "epoch": 0.7110247693054881, "grad_norm": 0.3603595793247223, "learning_rate": 0.0001, "loss": 1.5979, "step": 2928 }, { "epoch": 0.7112676056338029, "grad_norm": 0.3804043233394623, "learning_rate": 0.0001, "loss": 1.7772, "step": 2929 }, { "epoch": 0.7115104419621175, "grad_norm": 0.3650084435939789, "learning_rate": 0.0001, "loss": 1.7025, "step": 2930 }, { "epoch": 0.7117532782904322, "grad_norm": 0.37033146619796753, "learning_rate": 0.0001, "loss": 1.8184, "step": 2931 }, { "epoch": 0.711996114618747, "grad_norm": 0.3535173237323761, "learning_rate": 0.0001, "loss": 1.6509, "step": 2932 }, { "epoch": 0.7122389509470617, "grad_norm": 0.3423483967781067, "learning_rate": 0.0001, "loss": 1.5214, "step": 2933 }, { "epoch": 0.7124817872753764, "grad_norm": 0.36774322390556335, "learning_rate": 0.0001, "loss": 1.8374, "step": 2934 }, { "epoch": 0.7127246236036912, "grad_norm": 0.3929206430912018, "learning_rate": 0.0001, "loss": 1.8696, "step": 2935 }, { "epoch": 0.7129674599320058, "grad_norm": 0.36784839630126953, "learning_rate": 0.0001, "loss": 1.6886, "step": 2936 }, { "epoch": 0.7132102962603205, "grad_norm": 0.39813923835754395, "learning_rate": 0.0001, "loss": 1.8575, "step": 2937 }, { "epoch": 0.7134531325886353, "grad_norm": 0.3539840877056122, "learning_rate": 0.0001, "loss": 1.7424, "step": 2938 }, { "epoch": 0.71369596891695, "grad_norm": 0.35620298981666565, "learning_rate": 0.0001, "loss": 1.6124, "step": 2939 }, { "epoch": 0.7139388052452647, "grad_norm": 0.35802677273750305, "learning_rate": 0.0001, "loss": 1.6363, "step": 2940 }, { "epoch": 0.7141816415735794, "grad_norm": 0.3788433074951172, "learning_rate": 0.0001, "loss": 1.8535, "step": 2941 }, { "epoch": 0.7144244779018941, "grad_norm": 0.3388618230819702, "learning_rate": 0.0001, "loss": 1.5753, "step": 2942 }, { "epoch": 0.7146673142302088, "grad_norm": 0.37521353363990784, "learning_rate": 0.0001, "loss": 1.7129, "step": 2943 }, { "epoch": 0.7149101505585236, "grad_norm": 0.3805568218231201, "learning_rate": 0.0001, "loss": 1.815, "step": 2944 }, { "epoch": 0.7151529868868383, "grad_norm": 0.3507835865020752, "learning_rate": 0.0001, "loss": 1.6339, "step": 2945 }, { "epoch": 0.7153958232151529, "grad_norm": 0.36764758825302124, "learning_rate": 0.0001, "loss": 1.5961, "step": 2946 }, { "epoch": 0.7156386595434677, "grad_norm": 0.3647833466529846, "learning_rate": 0.0001, "loss": 1.6382, "step": 2947 }, { "epoch": 0.7158814958717824, "grad_norm": 0.38218680024147034, "learning_rate": 0.0001, "loss": 1.9101, "step": 2948 }, { "epoch": 0.7161243322000971, "grad_norm": 0.34345221519470215, "learning_rate": 0.0001, "loss": 1.7023, "step": 2949 }, { "epoch": 0.7163671685284119, "grad_norm": 0.3746163249015808, "learning_rate": 0.0001, "loss": 1.7223, "step": 2950 }, { "epoch": 0.7166100048567265, "grad_norm": 0.3593830168247223, "learning_rate": 0.0001, "loss": 1.5907, "step": 2951 }, { "epoch": 0.7168528411850413, "grad_norm": 0.35187071561813354, "learning_rate": 0.0001, "loss": 1.6425, "step": 2952 }, { "epoch": 0.717095677513356, "grad_norm": 0.36968597769737244, "learning_rate": 0.0001, "loss": 1.5398, "step": 2953 }, { "epoch": 0.7173385138416707, "grad_norm": 0.35344114899635315, "learning_rate": 0.0001, "loss": 1.5868, "step": 2954 }, { "epoch": 0.7175813501699855, "grad_norm": 0.3591856360435486, "learning_rate": 0.0001, "loss": 1.5745, "step": 2955 }, { "epoch": 0.7178241864983002, "grad_norm": 0.3720721900463104, "learning_rate": 0.0001, "loss": 1.776, "step": 2956 }, { "epoch": 0.7180670228266148, "grad_norm": 0.36205706000328064, "learning_rate": 0.0001, "loss": 1.7799, "step": 2957 }, { "epoch": 0.7183098591549296, "grad_norm": 0.3556406497955322, "learning_rate": 0.0001, "loss": 1.7076, "step": 2958 }, { "epoch": 0.7185526954832443, "grad_norm": 0.3606123924255371, "learning_rate": 0.0001, "loss": 1.7962, "step": 2959 }, { "epoch": 0.718795531811559, "grad_norm": 0.35451430082321167, "learning_rate": 0.0001, "loss": 1.6689, "step": 2960 }, { "epoch": 0.7190383681398738, "grad_norm": 0.3367184102535248, "learning_rate": 0.0001, "loss": 1.6781, "step": 2961 }, { "epoch": 0.7192812044681884, "grad_norm": 0.356923907995224, "learning_rate": 0.0001, "loss": 1.6426, "step": 2962 }, { "epoch": 0.7195240407965031, "grad_norm": 0.38560399413108826, "learning_rate": 0.0001, "loss": 2.0482, "step": 2963 }, { "epoch": 0.7197668771248179, "grad_norm": 0.357217401266098, "learning_rate": 0.0001, "loss": 1.8023, "step": 2964 }, { "epoch": 0.7200097134531326, "grad_norm": 0.36859866976737976, "learning_rate": 0.0001, "loss": 1.6244, "step": 2965 }, { "epoch": 0.7202525497814473, "grad_norm": 0.3331631124019623, "learning_rate": 0.0001, "loss": 1.6152, "step": 2966 }, { "epoch": 0.720495386109762, "grad_norm": 0.3537011444568634, "learning_rate": 0.0001, "loss": 1.8049, "step": 2967 }, { "epoch": 0.7207382224380767, "grad_norm": 0.38022685050964355, "learning_rate": 0.0001, "loss": 1.8788, "step": 2968 }, { "epoch": 0.7209810587663914, "grad_norm": 0.3764863610267639, "learning_rate": 0.0001, "loss": 1.7169, "step": 2969 }, { "epoch": 0.7212238950947062, "grad_norm": 0.36463648080825806, "learning_rate": 0.0001, "loss": 1.8591, "step": 2970 }, { "epoch": 0.7214667314230209, "grad_norm": 0.34684088826179504, "learning_rate": 0.0001, "loss": 1.6458, "step": 2971 }, { "epoch": 0.7217095677513355, "grad_norm": 0.3718288838863373, "learning_rate": 0.0001, "loss": 1.7119, "step": 2972 }, { "epoch": 0.7219524040796503, "grad_norm": 0.3367420732975006, "learning_rate": 0.0001, "loss": 1.6449, "step": 2973 }, { "epoch": 0.722195240407965, "grad_norm": 0.3648276627063751, "learning_rate": 0.0001, "loss": 1.6718, "step": 2974 }, { "epoch": 0.7224380767362798, "grad_norm": 0.36360329389572144, "learning_rate": 0.0001, "loss": 1.8129, "step": 2975 }, { "epoch": 0.7226809130645945, "grad_norm": 0.38630610704421997, "learning_rate": 0.0001, "loss": 1.6333, "step": 2976 }, { "epoch": 0.7229237493929092, "grad_norm": 0.36072230339050293, "learning_rate": 0.0001, "loss": 1.7083, "step": 2977 }, { "epoch": 0.7231665857212239, "grad_norm": 0.3993285000324249, "learning_rate": 0.0001, "loss": 1.7232, "step": 2978 }, { "epoch": 0.7234094220495386, "grad_norm": 0.36294326186180115, "learning_rate": 0.0001, "loss": 1.7431, "step": 2979 }, { "epoch": 0.7236522583778533, "grad_norm": 0.35069945454597473, "learning_rate": 0.0001, "loss": 1.6702, "step": 2980 }, { "epoch": 0.7238950947061681, "grad_norm": 0.35238704085350037, "learning_rate": 0.0001, "loss": 1.6071, "step": 2981 }, { "epoch": 0.7241379310344828, "grad_norm": 0.3727080225944519, "learning_rate": 0.0001, "loss": 1.8298, "step": 2982 }, { "epoch": 0.7243807673627974, "grad_norm": 0.3415742516517639, "learning_rate": 0.0001, "loss": 1.6771, "step": 2983 }, { "epoch": 0.7246236036911122, "grad_norm": 0.3733143210411072, "learning_rate": 0.0001, "loss": 1.7415, "step": 2984 }, { "epoch": 0.7248664400194269, "grad_norm": 0.4065549671649933, "learning_rate": 0.0001, "loss": 1.6586, "step": 2985 }, { "epoch": 0.7251092763477416, "grad_norm": 0.3903333246707916, "learning_rate": 0.0001, "loss": 1.8753, "step": 2986 }, { "epoch": 0.7253521126760564, "grad_norm": 0.36670219898223877, "learning_rate": 0.0001, "loss": 1.7198, "step": 2987 }, { "epoch": 0.725594949004371, "grad_norm": 0.372465580701828, "learning_rate": 0.0001, "loss": 1.6701, "step": 2988 }, { "epoch": 0.7258377853326857, "grad_norm": 0.3704904317855835, "learning_rate": 0.0001, "loss": 1.7167, "step": 2989 }, { "epoch": 0.7260806216610005, "grad_norm": 0.36772340536117554, "learning_rate": 0.0001, "loss": 1.7857, "step": 2990 }, { "epoch": 0.7263234579893152, "grad_norm": 0.3715022802352905, "learning_rate": 0.0001, "loss": 1.7589, "step": 2991 }, { "epoch": 0.7265662943176299, "grad_norm": 0.35818248987197876, "learning_rate": 0.0001, "loss": 1.646, "step": 2992 }, { "epoch": 0.7268091306459447, "grad_norm": 0.3635038435459137, "learning_rate": 0.0001, "loss": 1.6426, "step": 2993 }, { "epoch": 0.7270519669742593, "grad_norm": 0.3737863004207611, "learning_rate": 0.0001, "loss": 1.7217, "step": 2994 }, { "epoch": 0.727294803302574, "grad_norm": 0.37764856219291687, "learning_rate": 0.0001, "loss": 1.6232, "step": 2995 }, { "epoch": 0.7275376396308888, "grad_norm": 0.34850314259529114, "learning_rate": 0.0001, "loss": 1.5159, "step": 2996 }, { "epoch": 0.7277804759592035, "grad_norm": 0.36448803544044495, "learning_rate": 0.0001, "loss": 1.6992, "step": 2997 }, { "epoch": 0.7280233122875183, "grad_norm": 0.36929845809936523, "learning_rate": 0.0001, "loss": 1.7365, "step": 2998 }, { "epoch": 0.7282661486158329, "grad_norm": 0.3795127868652344, "learning_rate": 0.0001, "loss": 1.7614, "step": 2999 }, { "epoch": 0.7285089849441476, "grad_norm": 0.3693547248840332, "learning_rate": 0.0001, "loss": 1.8719, "step": 3000 }, { "epoch": 0.7287518212724624, "grad_norm": 0.33187270164489746, "learning_rate": 0.0001, "loss": 1.6259, "step": 3001 }, { "epoch": 0.7289946576007771, "grad_norm": 0.3986161947250366, "learning_rate": 0.0001, "loss": 1.7832, "step": 3002 }, { "epoch": 0.7292374939290918, "grad_norm": 0.36065754294395447, "learning_rate": 0.0001, "loss": 1.8036, "step": 3003 }, { "epoch": 0.7294803302574066, "grad_norm": 0.35556501150131226, "learning_rate": 0.0001, "loss": 1.5954, "step": 3004 }, { "epoch": 0.7297231665857212, "grad_norm": 0.3530411720275879, "learning_rate": 0.0001, "loss": 1.7182, "step": 3005 }, { "epoch": 0.7299660029140359, "grad_norm": 0.40550586581230164, "learning_rate": 0.0001, "loss": 1.6723, "step": 3006 }, { "epoch": 0.7302088392423507, "grad_norm": 0.37494075298309326, "learning_rate": 0.0001, "loss": 1.7971, "step": 3007 }, { "epoch": 0.7304516755706654, "grad_norm": 0.38023585081100464, "learning_rate": 0.0001, "loss": 1.6531, "step": 3008 }, { "epoch": 0.73069451189898, "grad_norm": 0.3713429868221283, "learning_rate": 0.0001, "loss": 1.9022, "step": 3009 }, { "epoch": 0.7309373482272948, "grad_norm": 0.41121378540992737, "learning_rate": 0.0001, "loss": 1.7952, "step": 3010 }, { "epoch": 0.7311801845556095, "grad_norm": 0.3646053373813629, "learning_rate": 0.0001, "loss": 1.8038, "step": 3011 }, { "epoch": 0.7314230208839242, "grad_norm": 0.34566327929496765, "learning_rate": 0.0001, "loss": 1.4985, "step": 3012 }, { "epoch": 0.731665857212239, "grad_norm": 0.3599144220352173, "learning_rate": 0.0001, "loss": 1.6797, "step": 3013 }, { "epoch": 0.7319086935405537, "grad_norm": 0.34851646423339844, "learning_rate": 0.0001, "loss": 1.5837, "step": 3014 }, { "epoch": 0.7321515298688683, "grad_norm": 0.3771142065525055, "learning_rate": 0.0001, "loss": 1.6967, "step": 3015 }, { "epoch": 0.7323943661971831, "grad_norm": 0.34787657856941223, "learning_rate": 0.0001, "loss": 1.6806, "step": 3016 }, { "epoch": 0.7326372025254978, "grad_norm": 0.3611025810241699, "learning_rate": 0.0001, "loss": 1.5241, "step": 3017 }, { "epoch": 0.7328800388538125, "grad_norm": 0.3520788848400116, "learning_rate": 0.0001, "loss": 1.7057, "step": 3018 }, { "epoch": 0.7331228751821273, "grad_norm": 0.3628937900066376, "learning_rate": 0.0001, "loss": 1.5687, "step": 3019 }, { "epoch": 0.7333657115104419, "grad_norm": 0.35097360610961914, "learning_rate": 0.0001, "loss": 1.61, "step": 3020 }, { "epoch": 0.7336085478387567, "grad_norm": 0.37622305750846863, "learning_rate": 0.0001, "loss": 1.7781, "step": 3021 }, { "epoch": 0.7338513841670714, "grad_norm": 0.39038702845573425, "learning_rate": 0.0001, "loss": 1.8756, "step": 3022 }, { "epoch": 0.7340942204953861, "grad_norm": 0.381448358297348, "learning_rate": 0.0001, "loss": 1.6518, "step": 3023 }, { "epoch": 0.7343370568237009, "grad_norm": 0.3688746988773346, "learning_rate": 0.0001, "loss": 1.761, "step": 3024 }, { "epoch": 0.7345798931520156, "grad_norm": 0.37213847041130066, "learning_rate": 0.0001, "loss": 1.7584, "step": 3025 }, { "epoch": 0.7348227294803302, "grad_norm": 0.37292101979255676, "learning_rate": 0.0001, "loss": 1.686, "step": 3026 }, { "epoch": 0.735065565808645, "grad_norm": 0.3280561566352844, "learning_rate": 0.0001, "loss": 1.4754, "step": 3027 }, { "epoch": 0.7353084021369597, "grad_norm": 0.3627604842185974, "learning_rate": 0.0001, "loss": 1.6521, "step": 3028 }, { "epoch": 0.7355512384652744, "grad_norm": 0.3830265402793884, "learning_rate": 0.0001, "loss": 1.8071, "step": 3029 }, { "epoch": 0.7357940747935892, "grad_norm": 0.3494594991207123, "learning_rate": 0.0001, "loss": 1.5432, "step": 3030 }, { "epoch": 0.7360369111219038, "grad_norm": 0.3553903102874756, "learning_rate": 0.0001, "loss": 1.7695, "step": 3031 }, { "epoch": 0.7362797474502185, "grad_norm": 0.3740635812282562, "learning_rate": 0.0001, "loss": 1.7267, "step": 3032 }, { "epoch": 0.7365225837785333, "grad_norm": 0.34639549255371094, "learning_rate": 0.0001, "loss": 1.6156, "step": 3033 }, { "epoch": 0.736765420106848, "grad_norm": 0.4009366035461426, "learning_rate": 0.0001, "loss": 1.6878, "step": 3034 }, { "epoch": 0.7370082564351627, "grad_norm": 0.3689112067222595, "learning_rate": 0.0001, "loss": 1.6398, "step": 3035 }, { "epoch": 0.7372510927634774, "grad_norm": 0.3872334063053131, "learning_rate": 0.0001, "loss": 1.769, "step": 3036 }, { "epoch": 0.7374939290917921, "grad_norm": 0.3638404607772827, "learning_rate": 0.0001, "loss": 1.6397, "step": 3037 }, { "epoch": 0.7377367654201068, "grad_norm": 0.3534509241580963, "learning_rate": 0.0001, "loss": 1.6901, "step": 3038 }, { "epoch": 0.7379796017484216, "grad_norm": 0.36130160093307495, "learning_rate": 0.0001, "loss": 1.628, "step": 3039 }, { "epoch": 0.7382224380767363, "grad_norm": 0.35487523674964905, "learning_rate": 0.0001, "loss": 1.6811, "step": 3040 }, { "epoch": 0.7384652744050509, "grad_norm": 0.3612043857574463, "learning_rate": 0.0001, "loss": 1.7166, "step": 3041 }, { "epoch": 0.7387081107333657, "grad_norm": 0.37539663910865784, "learning_rate": 0.0001, "loss": 1.8484, "step": 3042 }, { "epoch": 0.7389509470616804, "grad_norm": 0.35037562251091003, "learning_rate": 0.0001, "loss": 1.6824, "step": 3043 }, { "epoch": 0.7391937833899952, "grad_norm": 0.36021849513053894, "learning_rate": 0.0001, "loss": 1.7272, "step": 3044 }, { "epoch": 0.7394366197183099, "grad_norm": 0.38384193181991577, "learning_rate": 0.0001, "loss": 1.7511, "step": 3045 }, { "epoch": 0.7396794560466246, "grad_norm": 0.3991917669773102, "learning_rate": 0.0001, "loss": 1.6954, "step": 3046 }, { "epoch": 0.7399222923749393, "grad_norm": 0.3840140402317047, "learning_rate": 0.0001, "loss": 1.6328, "step": 3047 }, { "epoch": 0.740165128703254, "grad_norm": 0.36042118072509766, "learning_rate": 0.0001, "loss": 1.6662, "step": 3048 }, { "epoch": 0.7404079650315687, "grad_norm": 0.3783377707004547, "learning_rate": 0.0001, "loss": 1.6943, "step": 3049 }, { "epoch": 0.7406508013598835, "grad_norm": 0.3565843403339386, "learning_rate": 0.0001, "loss": 1.7268, "step": 3050 }, { "epoch": 0.7408936376881982, "grad_norm": 0.34464597702026367, "learning_rate": 0.0001, "loss": 1.7426, "step": 3051 }, { "epoch": 0.7411364740165128, "grad_norm": 0.36159613728523254, "learning_rate": 0.0001, "loss": 1.7378, "step": 3052 }, { "epoch": 0.7413793103448276, "grad_norm": 0.3757307231426239, "learning_rate": 0.0001, "loss": 1.7074, "step": 3053 }, { "epoch": 0.7416221466731423, "grad_norm": 0.3887554705142975, "learning_rate": 0.0001, "loss": 1.8408, "step": 3054 }, { "epoch": 0.741864983001457, "grad_norm": 0.38285601139068604, "learning_rate": 0.0001, "loss": 1.7693, "step": 3055 }, { "epoch": 0.7421078193297718, "grad_norm": 0.3463350236415863, "learning_rate": 0.0001, "loss": 1.6086, "step": 3056 }, { "epoch": 0.7423506556580864, "grad_norm": 0.3648509979248047, "learning_rate": 0.0001, "loss": 1.6091, "step": 3057 }, { "epoch": 0.7425934919864011, "grad_norm": 0.34089362621307373, "learning_rate": 0.0001, "loss": 1.5889, "step": 3058 }, { "epoch": 0.7428363283147159, "grad_norm": 0.38336682319641113, "learning_rate": 0.0001, "loss": 1.6736, "step": 3059 }, { "epoch": 0.7430791646430306, "grad_norm": 0.37535834312438965, "learning_rate": 0.0001, "loss": 1.8989, "step": 3060 }, { "epoch": 0.7433220009713453, "grad_norm": 0.3540712594985962, "learning_rate": 0.0001, "loss": 1.7103, "step": 3061 }, { "epoch": 0.74356483729966, "grad_norm": 0.3841947019100189, "learning_rate": 0.0001, "loss": 1.7098, "step": 3062 }, { "epoch": 0.7438076736279747, "grad_norm": 0.38840192556381226, "learning_rate": 0.0001, "loss": 1.5798, "step": 3063 }, { "epoch": 0.7440505099562894, "grad_norm": 0.3693578243255615, "learning_rate": 0.0001, "loss": 1.6931, "step": 3064 }, { "epoch": 0.7442933462846042, "grad_norm": 0.3905344605445862, "learning_rate": 0.0001, "loss": 1.6618, "step": 3065 }, { "epoch": 0.7445361826129189, "grad_norm": 0.37690243124961853, "learning_rate": 0.0001, "loss": 1.7186, "step": 3066 }, { "epoch": 0.7447790189412337, "grad_norm": 0.37685608863830566, "learning_rate": 0.0001, "loss": 1.6061, "step": 3067 }, { "epoch": 0.7450218552695483, "grad_norm": 0.33402764797210693, "learning_rate": 0.0001, "loss": 1.581, "step": 3068 }, { "epoch": 0.745264691597863, "grad_norm": 0.3556559085845947, "learning_rate": 0.0001, "loss": 1.6959, "step": 3069 }, { "epoch": 0.7455075279261778, "grad_norm": 0.3709651231765747, "learning_rate": 0.0001, "loss": 1.6965, "step": 3070 }, { "epoch": 0.7457503642544925, "grad_norm": 0.37813064455986023, "learning_rate": 0.0001, "loss": 1.521, "step": 3071 }, { "epoch": 0.7459932005828072, "grad_norm": 0.3927704691886902, "learning_rate": 0.0001, "loss": 1.7058, "step": 3072 }, { "epoch": 0.746236036911122, "grad_norm": 0.3572740852832794, "learning_rate": 0.0001, "loss": 1.8172, "step": 3073 }, { "epoch": 0.7464788732394366, "grad_norm": 0.39202433824539185, "learning_rate": 0.0001, "loss": 1.6642, "step": 3074 }, { "epoch": 0.7467217095677513, "grad_norm": 0.37363842129707336, "learning_rate": 0.0001, "loss": 1.6773, "step": 3075 }, { "epoch": 0.7469645458960661, "grad_norm": 0.375654399394989, "learning_rate": 0.0001, "loss": 1.7064, "step": 3076 }, { "epoch": 0.7472073822243808, "grad_norm": 0.359342485666275, "learning_rate": 0.0001, "loss": 1.6948, "step": 3077 }, { "epoch": 0.7474502185526954, "grad_norm": 0.3476933538913727, "learning_rate": 0.0001, "loss": 1.6175, "step": 3078 }, { "epoch": 0.7476930548810102, "grad_norm": 0.38797739148139954, "learning_rate": 0.0001, "loss": 1.741, "step": 3079 }, { "epoch": 0.7479358912093249, "grad_norm": 0.3711453676223755, "learning_rate": 0.0001, "loss": 1.6954, "step": 3080 }, { "epoch": 0.7481787275376396, "grad_norm": 0.37687355279922485, "learning_rate": 0.0001, "loss": 1.7159, "step": 3081 }, { "epoch": 0.7484215638659544, "grad_norm": 0.36160585284233093, "learning_rate": 0.0001, "loss": 1.7158, "step": 3082 }, { "epoch": 0.748664400194269, "grad_norm": 0.3428528606891632, "learning_rate": 0.0001, "loss": 1.5954, "step": 3083 }, { "epoch": 0.7489072365225837, "grad_norm": 0.38035425543785095, "learning_rate": 0.0001, "loss": 1.7305, "step": 3084 }, { "epoch": 0.7491500728508985, "grad_norm": 0.37780630588531494, "learning_rate": 0.0001, "loss": 1.6278, "step": 3085 }, { "epoch": 0.7493929091792132, "grad_norm": 0.3517446219921112, "learning_rate": 0.0001, "loss": 1.7097, "step": 3086 }, { "epoch": 0.7496357455075279, "grad_norm": 0.384857177734375, "learning_rate": 0.0001, "loss": 1.7569, "step": 3087 }, { "epoch": 0.7498785818358427, "grad_norm": 0.35901719331741333, "learning_rate": 0.0001, "loss": 1.6987, "step": 3088 }, { "epoch": 0.7501214181641573, "grad_norm": 0.3643746078014374, "learning_rate": 0.0001, "loss": 1.8, "step": 3089 }, { "epoch": 0.7503642544924721, "grad_norm": 0.34800952672958374, "learning_rate": 0.0001, "loss": 1.802, "step": 3090 }, { "epoch": 0.7506070908207868, "grad_norm": 0.3544056713581085, "learning_rate": 0.0001, "loss": 1.7547, "step": 3091 }, { "epoch": 0.7508499271491015, "grad_norm": 0.32631686329841614, "learning_rate": 0.0001, "loss": 1.6443, "step": 3092 }, { "epoch": 0.7510927634774163, "grad_norm": 0.368476927280426, "learning_rate": 0.0001, "loss": 1.6593, "step": 3093 }, { "epoch": 0.751335599805731, "grad_norm": 0.3730914890766144, "learning_rate": 0.0001, "loss": 1.7031, "step": 3094 }, { "epoch": 0.7515784361340456, "grad_norm": 0.3924466669559479, "learning_rate": 0.0001, "loss": 1.708, "step": 3095 }, { "epoch": 0.7518212724623604, "grad_norm": 0.3600999414920807, "learning_rate": 0.0001, "loss": 1.5777, "step": 3096 }, { "epoch": 0.7520641087906751, "grad_norm": 0.3866841197013855, "learning_rate": 0.0001, "loss": 1.6916, "step": 3097 }, { "epoch": 0.7523069451189898, "grad_norm": 0.39760568737983704, "learning_rate": 0.0001, "loss": 1.7249, "step": 3098 }, { "epoch": 0.7525497814473046, "grad_norm": 0.3764370083808899, "learning_rate": 0.0001, "loss": 1.8291, "step": 3099 }, { "epoch": 0.7527926177756192, "grad_norm": 0.3639552593231201, "learning_rate": 0.0001, "loss": 1.8444, "step": 3100 }, { "epoch": 0.7530354541039339, "grad_norm": 0.34328848123550415, "learning_rate": 0.0001, "loss": 1.5405, "step": 3101 }, { "epoch": 0.7532782904322487, "grad_norm": 0.36111384630203247, "learning_rate": 0.0001, "loss": 1.7172, "step": 3102 }, { "epoch": 0.7535211267605634, "grad_norm": 0.3573237955570221, "learning_rate": 0.0001, "loss": 1.6974, "step": 3103 }, { "epoch": 0.753763963088878, "grad_norm": 0.36783355474472046, "learning_rate": 0.0001, "loss": 1.7718, "step": 3104 }, { "epoch": 0.7540067994171928, "grad_norm": 0.3471871614456177, "learning_rate": 0.0001, "loss": 1.7798, "step": 3105 }, { "epoch": 0.7542496357455075, "grad_norm": 0.37970730662345886, "learning_rate": 0.0001, "loss": 1.8948, "step": 3106 }, { "epoch": 0.7544924720738222, "grad_norm": 0.37085381150245667, "learning_rate": 0.0001, "loss": 1.6477, "step": 3107 }, { "epoch": 0.754735308402137, "grad_norm": 0.39417344331741333, "learning_rate": 0.0001, "loss": 1.8159, "step": 3108 }, { "epoch": 0.7549781447304517, "grad_norm": 0.3625888526439667, "learning_rate": 0.0001, "loss": 1.6386, "step": 3109 }, { "epoch": 0.7552209810587663, "grad_norm": 0.3569849729537964, "learning_rate": 0.0001, "loss": 1.644, "step": 3110 }, { "epoch": 0.7554638173870811, "grad_norm": 0.3573492169380188, "learning_rate": 0.0001, "loss": 1.7321, "step": 3111 }, { "epoch": 0.7557066537153958, "grad_norm": 0.3579084873199463, "learning_rate": 0.0001, "loss": 1.6692, "step": 3112 }, { "epoch": 0.7559494900437106, "grad_norm": 0.3422337770462036, "learning_rate": 0.0001, "loss": 1.7031, "step": 3113 }, { "epoch": 0.7561923263720253, "grad_norm": 0.3654865026473999, "learning_rate": 0.0001, "loss": 1.806, "step": 3114 }, { "epoch": 0.75643516270034, "grad_norm": 0.34083136916160583, "learning_rate": 0.0001, "loss": 1.5453, "step": 3115 }, { "epoch": 0.7566779990286547, "grad_norm": 0.3878861665725708, "learning_rate": 0.0001, "loss": 1.8776, "step": 3116 }, { "epoch": 0.7569208353569694, "grad_norm": 0.3917778730392456, "learning_rate": 0.0001, "loss": 1.7033, "step": 3117 }, { "epoch": 0.7571636716852841, "grad_norm": 0.3512471914291382, "learning_rate": 0.0001, "loss": 1.6972, "step": 3118 }, { "epoch": 0.7574065080135989, "grad_norm": 0.3681905269622803, "learning_rate": 0.0001, "loss": 1.6938, "step": 3119 }, { "epoch": 0.7576493443419136, "grad_norm": 0.373844712972641, "learning_rate": 0.0001, "loss": 1.6651, "step": 3120 }, { "epoch": 0.7578921806702282, "grad_norm": 0.36612239480018616, "learning_rate": 0.0001, "loss": 1.6204, "step": 3121 }, { "epoch": 0.758135016998543, "grad_norm": 0.35830995440483093, "learning_rate": 0.0001, "loss": 1.6255, "step": 3122 }, { "epoch": 0.7583778533268577, "grad_norm": 0.3689367473125458, "learning_rate": 0.0001, "loss": 1.6545, "step": 3123 }, { "epoch": 0.7586206896551724, "grad_norm": 0.36791351437568665, "learning_rate": 0.0001, "loss": 1.702, "step": 3124 }, { "epoch": 0.7588635259834872, "grad_norm": 0.37115055322647095, "learning_rate": 0.0001, "loss": 1.7153, "step": 3125 }, { "epoch": 0.7591063623118018, "grad_norm": 0.3720249831676483, "learning_rate": 0.0001, "loss": 1.7402, "step": 3126 }, { "epoch": 0.7593491986401165, "grad_norm": 0.3667154014110565, "learning_rate": 0.0001, "loss": 1.8012, "step": 3127 }, { "epoch": 0.7595920349684313, "grad_norm": 0.369220495223999, "learning_rate": 0.0001, "loss": 1.8846, "step": 3128 }, { "epoch": 0.759834871296746, "grad_norm": 0.37342166900634766, "learning_rate": 0.0001, "loss": 1.7446, "step": 3129 }, { "epoch": 0.7600777076250607, "grad_norm": 0.34881383180618286, "learning_rate": 0.0001, "loss": 1.6816, "step": 3130 }, { "epoch": 0.7603205439533754, "grad_norm": 0.34620028734207153, "learning_rate": 0.0001, "loss": 1.6644, "step": 3131 }, { "epoch": 0.7605633802816901, "grad_norm": 0.3626459538936615, "learning_rate": 0.0001, "loss": 1.7328, "step": 3132 }, { "epoch": 0.7608062166100048, "grad_norm": 0.3597855567932129, "learning_rate": 0.0001, "loss": 1.7039, "step": 3133 }, { "epoch": 0.7610490529383196, "grad_norm": 0.37836751341819763, "learning_rate": 0.0001, "loss": 1.916, "step": 3134 }, { "epoch": 0.7612918892666343, "grad_norm": 0.3582778871059418, "learning_rate": 0.0001, "loss": 1.6265, "step": 3135 }, { "epoch": 0.7615347255949491, "grad_norm": 0.34617099165916443, "learning_rate": 0.0001, "loss": 1.6059, "step": 3136 }, { "epoch": 0.7617775619232637, "grad_norm": 0.3793935775756836, "learning_rate": 0.0001, "loss": 1.7159, "step": 3137 }, { "epoch": 0.7620203982515784, "grad_norm": 0.3514363765716553, "learning_rate": 0.0001, "loss": 1.7584, "step": 3138 }, { "epoch": 0.7622632345798932, "grad_norm": 0.36340010166168213, "learning_rate": 0.0001, "loss": 1.7109, "step": 3139 }, { "epoch": 0.7625060709082079, "grad_norm": 0.3772117793560028, "learning_rate": 0.0001, "loss": 1.8007, "step": 3140 }, { "epoch": 0.7627489072365226, "grad_norm": 0.3619104027748108, "learning_rate": 0.0001, "loss": 1.7076, "step": 3141 }, { "epoch": 0.7629917435648373, "grad_norm": 0.366484671831131, "learning_rate": 0.0001, "loss": 1.749, "step": 3142 }, { "epoch": 0.763234579893152, "grad_norm": 0.37760522961616516, "learning_rate": 0.0001, "loss": 1.6653, "step": 3143 }, { "epoch": 0.7634774162214667, "grad_norm": 0.35971128940582275, "learning_rate": 0.0001, "loss": 1.7426, "step": 3144 }, { "epoch": 0.7637202525497815, "grad_norm": 0.3709493577480316, "learning_rate": 0.0001, "loss": 1.8147, "step": 3145 }, { "epoch": 0.7639630888780962, "grad_norm": 0.3713069558143616, "learning_rate": 0.0001, "loss": 1.6445, "step": 3146 }, { "epoch": 0.7642059252064108, "grad_norm": 0.3586064577102661, "learning_rate": 0.0001, "loss": 1.6478, "step": 3147 }, { "epoch": 0.7644487615347256, "grad_norm": 0.34258317947387695, "learning_rate": 0.0001, "loss": 1.696, "step": 3148 }, { "epoch": 0.7646915978630403, "grad_norm": 0.3723694682121277, "learning_rate": 0.0001, "loss": 1.7666, "step": 3149 }, { "epoch": 0.764934434191355, "grad_norm": 0.41363194584846497, "learning_rate": 0.0001, "loss": 1.7405, "step": 3150 }, { "epoch": 0.7651772705196698, "grad_norm": 0.36498725414276123, "learning_rate": 0.0001, "loss": 1.7503, "step": 3151 }, { "epoch": 0.7654201068479844, "grad_norm": 0.3648782968521118, "learning_rate": 0.0001, "loss": 1.7038, "step": 3152 }, { "epoch": 0.7656629431762991, "grad_norm": 0.3406696617603302, "learning_rate": 0.0001, "loss": 1.6182, "step": 3153 }, { "epoch": 0.7659057795046139, "grad_norm": 0.34064993262290955, "learning_rate": 0.0001, "loss": 1.6491, "step": 3154 }, { "epoch": 0.7661486158329286, "grad_norm": 0.3697326183319092, "learning_rate": 0.0001, "loss": 1.7287, "step": 3155 }, { "epoch": 0.7663914521612433, "grad_norm": 0.3929752707481384, "learning_rate": 0.0001, "loss": 1.7711, "step": 3156 }, { "epoch": 0.7666342884895581, "grad_norm": 0.3791561424732208, "learning_rate": 0.0001, "loss": 1.5454, "step": 3157 }, { "epoch": 0.7668771248178727, "grad_norm": 0.35193535685539246, "learning_rate": 0.0001, "loss": 1.6301, "step": 3158 }, { "epoch": 0.7671199611461875, "grad_norm": 0.3666219711303711, "learning_rate": 0.0001, "loss": 1.7659, "step": 3159 }, { "epoch": 0.7673627974745022, "grad_norm": 0.3909856379032135, "learning_rate": 0.0001, "loss": 1.6792, "step": 3160 }, { "epoch": 0.7676056338028169, "grad_norm": 0.38402125239372253, "learning_rate": 0.0001, "loss": 1.6724, "step": 3161 }, { "epoch": 0.7678484701311317, "grad_norm": 0.3770609498023987, "learning_rate": 0.0001, "loss": 1.6962, "step": 3162 }, { "epoch": 0.7680913064594463, "grad_norm": 0.3623834550380707, "learning_rate": 0.0001, "loss": 1.6792, "step": 3163 }, { "epoch": 0.768334142787761, "grad_norm": 0.38644856214523315, "learning_rate": 0.0001, "loss": 1.862, "step": 3164 }, { "epoch": 0.7685769791160758, "grad_norm": 0.36994674801826477, "learning_rate": 0.0001, "loss": 1.8121, "step": 3165 }, { "epoch": 0.7688198154443905, "grad_norm": 0.3379240334033966, "learning_rate": 0.0001, "loss": 1.6578, "step": 3166 }, { "epoch": 0.7690626517727052, "grad_norm": 0.3766805827617645, "learning_rate": 0.0001, "loss": 1.7598, "step": 3167 }, { "epoch": 0.76930548810102, "grad_norm": 0.3783973455429077, "learning_rate": 0.0001, "loss": 1.7333, "step": 3168 }, { "epoch": 0.7695483244293346, "grad_norm": 0.354950487613678, "learning_rate": 0.0001, "loss": 1.6588, "step": 3169 }, { "epoch": 0.7697911607576493, "grad_norm": 0.37209805846214294, "learning_rate": 0.0001, "loss": 1.6948, "step": 3170 }, { "epoch": 0.7700339970859641, "grad_norm": 0.38874351978302, "learning_rate": 0.0001, "loss": 1.7529, "step": 3171 }, { "epoch": 0.7702768334142788, "grad_norm": 0.3658055365085602, "learning_rate": 0.0001, "loss": 1.7363, "step": 3172 }, { "epoch": 0.7705196697425934, "grad_norm": 0.37785252928733826, "learning_rate": 0.0001, "loss": 1.6342, "step": 3173 }, { "epoch": 0.7707625060709082, "grad_norm": 0.35146087408065796, "learning_rate": 0.0001, "loss": 1.6762, "step": 3174 }, { "epoch": 0.7710053423992229, "grad_norm": 0.3499802350997925, "learning_rate": 0.0001, "loss": 1.7154, "step": 3175 }, { "epoch": 0.7712481787275376, "grad_norm": 0.3618769943714142, "learning_rate": 0.0001, "loss": 1.7542, "step": 3176 }, { "epoch": 0.7714910150558524, "grad_norm": 0.37386730313301086, "learning_rate": 0.0001, "loss": 1.7876, "step": 3177 }, { "epoch": 0.7717338513841671, "grad_norm": 0.3643173575401306, "learning_rate": 0.0001, "loss": 1.6575, "step": 3178 }, { "epoch": 0.7719766877124817, "grad_norm": 0.3616684377193451, "learning_rate": 0.0001, "loss": 1.6947, "step": 3179 }, { "epoch": 0.7722195240407965, "grad_norm": 0.3882141709327698, "learning_rate": 0.0001, "loss": 1.5895, "step": 3180 }, { "epoch": 0.7724623603691112, "grad_norm": 0.35174623131752014, "learning_rate": 0.0001, "loss": 1.5863, "step": 3181 }, { "epoch": 0.772705196697426, "grad_norm": 0.3746509552001953, "learning_rate": 0.0001, "loss": 1.7051, "step": 3182 }, { "epoch": 0.7729480330257407, "grad_norm": 0.3511878252029419, "learning_rate": 0.0001, "loss": 1.6308, "step": 3183 }, { "epoch": 0.7731908693540553, "grad_norm": 0.36943355202674866, "learning_rate": 0.0001, "loss": 1.8696, "step": 3184 }, { "epoch": 0.7734337056823701, "grad_norm": 0.3677257299423218, "learning_rate": 0.0001, "loss": 1.6531, "step": 3185 }, { "epoch": 0.7736765420106848, "grad_norm": 0.37608298659324646, "learning_rate": 0.0001, "loss": 1.7762, "step": 3186 }, { "epoch": 0.7739193783389995, "grad_norm": 0.35966455936431885, "learning_rate": 0.0001, "loss": 1.6512, "step": 3187 }, { "epoch": 0.7741622146673143, "grad_norm": 0.35690024495124817, "learning_rate": 0.0001, "loss": 1.5475, "step": 3188 }, { "epoch": 0.774405050995629, "grad_norm": 0.3662291169166565, "learning_rate": 0.0001, "loss": 1.7397, "step": 3189 }, { "epoch": 0.7746478873239436, "grad_norm": 0.3676881492137909, "learning_rate": 0.0001, "loss": 1.7465, "step": 3190 }, { "epoch": 0.7748907236522584, "grad_norm": 0.36643221974372864, "learning_rate": 0.0001, "loss": 1.6811, "step": 3191 }, { "epoch": 0.7751335599805731, "grad_norm": 0.3677716553211212, "learning_rate": 0.0001, "loss": 1.7394, "step": 3192 }, { "epoch": 0.7753763963088878, "grad_norm": 0.3882474899291992, "learning_rate": 0.0001, "loss": 1.9068, "step": 3193 }, { "epoch": 0.7756192326372026, "grad_norm": 0.36003318428993225, "learning_rate": 0.0001, "loss": 1.5764, "step": 3194 }, { "epoch": 0.7758620689655172, "grad_norm": 0.3616494834423065, "learning_rate": 0.0001, "loss": 1.7874, "step": 3195 }, { "epoch": 0.7761049052938319, "grad_norm": 0.36378318071365356, "learning_rate": 0.0001, "loss": 1.8228, "step": 3196 }, { "epoch": 0.7763477416221467, "grad_norm": 0.3754560053348541, "learning_rate": 0.0001, "loss": 1.8119, "step": 3197 }, { "epoch": 0.7765905779504614, "grad_norm": 0.38387665152549744, "learning_rate": 0.0001, "loss": 1.7857, "step": 3198 }, { "epoch": 0.7768334142787761, "grad_norm": 0.3557393550872803, "learning_rate": 0.0001, "loss": 1.704, "step": 3199 }, { "epoch": 0.7770762506070908, "grad_norm": 0.36891213059425354, "learning_rate": 0.0001, "loss": 1.6919, "step": 3200 }, { "epoch": 0.7773190869354055, "grad_norm": 0.38402387499809265, "learning_rate": 0.0001, "loss": 1.7416, "step": 3201 }, { "epoch": 0.7775619232637202, "grad_norm": 0.37884989380836487, "learning_rate": 0.0001, "loss": 1.8708, "step": 3202 }, { "epoch": 0.777804759592035, "grad_norm": 0.37382736802101135, "learning_rate": 0.0001, "loss": 1.8483, "step": 3203 }, { "epoch": 0.7780475959203497, "grad_norm": 0.38139647245407104, "learning_rate": 0.0001, "loss": 1.8322, "step": 3204 }, { "epoch": 0.7782904322486645, "grad_norm": 0.3646901547908783, "learning_rate": 0.0001, "loss": 1.6103, "step": 3205 }, { "epoch": 0.7785332685769791, "grad_norm": 0.3592281937599182, "learning_rate": 0.0001, "loss": 1.7267, "step": 3206 }, { "epoch": 0.7787761049052938, "grad_norm": 0.40910977125167847, "learning_rate": 0.0001, "loss": 1.7943, "step": 3207 }, { "epoch": 0.7790189412336086, "grad_norm": 0.3776848316192627, "learning_rate": 0.0001, "loss": 1.7475, "step": 3208 }, { "epoch": 0.7792617775619233, "grad_norm": 0.38275858759880066, "learning_rate": 0.0001, "loss": 1.7987, "step": 3209 }, { "epoch": 0.779504613890238, "grad_norm": 0.38461169600486755, "learning_rate": 0.0001, "loss": 1.7708, "step": 3210 }, { "epoch": 0.7797474502185527, "grad_norm": 0.36892619729042053, "learning_rate": 0.0001, "loss": 1.6829, "step": 3211 }, { "epoch": 0.7799902865468674, "grad_norm": 0.3585686981678009, "learning_rate": 0.0001, "loss": 1.6911, "step": 3212 }, { "epoch": 0.7802331228751821, "grad_norm": 0.358227014541626, "learning_rate": 0.0001, "loss": 1.5892, "step": 3213 }, { "epoch": 0.7804759592034969, "grad_norm": 0.37613505125045776, "learning_rate": 0.0001, "loss": 1.7145, "step": 3214 }, { "epoch": 0.7807187955318116, "grad_norm": 0.3856728672981262, "learning_rate": 0.0001, "loss": 1.7167, "step": 3215 }, { "epoch": 0.7809616318601262, "grad_norm": 0.3552917540073395, "learning_rate": 0.0001, "loss": 1.6204, "step": 3216 }, { "epoch": 0.781204468188441, "grad_norm": 0.3964640498161316, "learning_rate": 0.0001, "loss": 1.9078, "step": 3217 }, { "epoch": 0.7814473045167557, "grad_norm": 0.3828042447566986, "learning_rate": 0.0001, "loss": 1.7612, "step": 3218 }, { "epoch": 0.7816901408450704, "grad_norm": 0.37503907084465027, "learning_rate": 0.0001, "loss": 1.5186, "step": 3219 }, { "epoch": 0.7819329771733852, "grad_norm": 0.395419716835022, "learning_rate": 0.0001, "loss": 1.5914, "step": 3220 }, { "epoch": 0.7821758135016998, "grad_norm": 0.34283438324928284, "learning_rate": 0.0001, "loss": 1.6393, "step": 3221 }, { "epoch": 0.7824186498300145, "grad_norm": 0.3886133134365082, "learning_rate": 0.0001, "loss": 1.7853, "step": 3222 }, { "epoch": 0.7826614861583293, "grad_norm": 0.3807658851146698, "learning_rate": 0.0001, "loss": 1.6544, "step": 3223 }, { "epoch": 0.782904322486644, "grad_norm": 0.3506130874156952, "learning_rate": 0.0001, "loss": 1.6089, "step": 3224 }, { "epoch": 0.7831471588149587, "grad_norm": 0.3771856129169464, "learning_rate": 0.0001, "loss": 1.7467, "step": 3225 }, { "epoch": 0.7833899951432735, "grad_norm": 0.37501615285873413, "learning_rate": 0.0001, "loss": 1.6253, "step": 3226 }, { "epoch": 0.7836328314715881, "grad_norm": 0.3541441261768341, "learning_rate": 0.0001, "loss": 1.6923, "step": 3227 }, { "epoch": 0.7838756677999029, "grad_norm": 0.3698452413082123, "learning_rate": 0.0001, "loss": 1.7794, "step": 3228 }, { "epoch": 0.7841185041282176, "grad_norm": 0.3618011176586151, "learning_rate": 0.0001, "loss": 1.6233, "step": 3229 }, { "epoch": 0.7843613404565323, "grad_norm": 0.3783111870288849, "learning_rate": 0.0001, "loss": 1.7096, "step": 3230 }, { "epoch": 0.7846041767848471, "grad_norm": 0.3707350790500641, "learning_rate": 0.0001, "loss": 1.7606, "step": 3231 }, { "epoch": 0.7848470131131617, "grad_norm": 0.34714633226394653, "learning_rate": 0.0001, "loss": 1.6162, "step": 3232 }, { "epoch": 0.7850898494414764, "grad_norm": 0.3586050868034363, "learning_rate": 0.0001, "loss": 1.6275, "step": 3233 }, { "epoch": 0.7853326857697912, "grad_norm": 0.3658125400543213, "learning_rate": 0.0001, "loss": 1.7027, "step": 3234 }, { "epoch": 0.7855755220981059, "grad_norm": 0.35601937770843506, "learning_rate": 0.0001, "loss": 1.6929, "step": 3235 }, { "epoch": 0.7858183584264206, "grad_norm": 0.377439945936203, "learning_rate": 0.0001, "loss": 1.7305, "step": 3236 }, { "epoch": 0.7860611947547353, "grad_norm": 0.3666037619113922, "learning_rate": 0.0001, "loss": 1.7382, "step": 3237 }, { "epoch": 0.78630403108305, "grad_norm": 0.3553290367126465, "learning_rate": 0.0001, "loss": 1.6883, "step": 3238 }, { "epoch": 0.7865468674113647, "grad_norm": 0.36804431676864624, "learning_rate": 0.0001, "loss": 1.7385, "step": 3239 }, { "epoch": 0.7867897037396795, "grad_norm": 0.3820963501930237, "learning_rate": 0.0001, "loss": 1.831, "step": 3240 }, { "epoch": 0.7870325400679942, "grad_norm": 0.3654751181602478, "learning_rate": 0.0001, "loss": 1.6394, "step": 3241 }, { "epoch": 0.7872753763963088, "grad_norm": 0.37659505009651184, "learning_rate": 0.0001, "loss": 1.7862, "step": 3242 }, { "epoch": 0.7875182127246236, "grad_norm": 0.3688657283782959, "learning_rate": 0.0001, "loss": 1.6812, "step": 3243 }, { "epoch": 0.7877610490529383, "grad_norm": 0.37824395298957825, "learning_rate": 0.0001, "loss": 1.6927, "step": 3244 }, { "epoch": 0.788003885381253, "grad_norm": 0.36304405331611633, "learning_rate": 0.0001, "loss": 1.5247, "step": 3245 }, { "epoch": 0.7882467217095678, "grad_norm": 0.36583948135375977, "learning_rate": 0.0001, "loss": 1.7472, "step": 3246 }, { "epoch": 0.7884895580378825, "grad_norm": 0.3592684864997864, "learning_rate": 0.0001, "loss": 1.7444, "step": 3247 }, { "epoch": 0.7887323943661971, "grad_norm": 0.3583742380142212, "learning_rate": 0.0001, "loss": 1.6766, "step": 3248 }, { "epoch": 0.7889752306945119, "grad_norm": 0.3744942247867584, "learning_rate": 0.0001, "loss": 1.6817, "step": 3249 }, { "epoch": 0.7892180670228266, "grad_norm": 0.38749590516090393, "learning_rate": 0.0001, "loss": 1.5955, "step": 3250 }, { "epoch": 0.7894609033511414, "grad_norm": 0.35330215096473694, "learning_rate": 0.0001, "loss": 1.755, "step": 3251 }, { "epoch": 0.7897037396794561, "grad_norm": 0.36863937973976135, "learning_rate": 0.0001, "loss": 1.7673, "step": 3252 }, { "epoch": 0.7899465760077707, "grad_norm": 0.4081970751285553, "learning_rate": 0.0001, "loss": 1.7699, "step": 3253 }, { "epoch": 0.7901894123360855, "grad_norm": 0.3634713888168335, "learning_rate": 0.0001, "loss": 1.7146, "step": 3254 }, { "epoch": 0.7904322486644002, "grad_norm": 0.35160213708877563, "learning_rate": 0.0001, "loss": 1.7863, "step": 3255 }, { "epoch": 0.7906750849927149, "grad_norm": 0.3982776403427124, "learning_rate": 0.0001, "loss": 1.9224, "step": 3256 }, { "epoch": 0.7909179213210297, "grad_norm": 0.381833016872406, "learning_rate": 0.0001, "loss": 1.6776, "step": 3257 }, { "epoch": 0.7911607576493443, "grad_norm": 0.3521765172481537, "learning_rate": 0.0001, "loss": 1.7072, "step": 3258 }, { "epoch": 0.791403593977659, "grad_norm": 0.37415578961372375, "learning_rate": 0.0001, "loss": 1.815, "step": 3259 }, { "epoch": 0.7916464303059738, "grad_norm": 0.3757515549659729, "learning_rate": 0.0001, "loss": 1.7893, "step": 3260 }, { "epoch": 0.7918892666342885, "grad_norm": 0.3607475459575653, "learning_rate": 0.0001, "loss": 1.8022, "step": 3261 }, { "epoch": 0.7921321029626032, "grad_norm": 0.3828256130218506, "learning_rate": 0.0001, "loss": 1.7075, "step": 3262 }, { "epoch": 0.792374939290918, "grad_norm": 0.3982337415218353, "learning_rate": 0.0001, "loss": 1.9973, "step": 3263 }, { "epoch": 0.7926177756192326, "grad_norm": 0.35930705070495605, "learning_rate": 0.0001, "loss": 1.6939, "step": 3264 }, { "epoch": 0.7928606119475473, "grad_norm": 0.3679361641407013, "learning_rate": 0.0001, "loss": 1.7, "step": 3265 }, { "epoch": 0.7931034482758621, "grad_norm": 0.3649376928806305, "learning_rate": 0.0001, "loss": 1.6656, "step": 3266 }, { "epoch": 0.7933462846041768, "grad_norm": 0.3413490951061249, "learning_rate": 0.0001, "loss": 1.5184, "step": 3267 }, { "epoch": 0.7935891209324915, "grad_norm": 0.3625178039073944, "learning_rate": 0.0001, "loss": 1.6144, "step": 3268 }, { "epoch": 0.7938319572608062, "grad_norm": 0.36175259947776794, "learning_rate": 0.0001, "loss": 1.7998, "step": 3269 }, { "epoch": 0.7940747935891209, "grad_norm": 0.35525405406951904, "learning_rate": 0.0001, "loss": 1.831, "step": 3270 }, { "epoch": 0.7943176299174356, "grad_norm": 0.3651837706565857, "learning_rate": 0.0001, "loss": 1.7108, "step": 3271 }, { "epoch": 0.7945604662457504, "grad_norm": 0.34924477338790894, "learning_rate": 0.0001, "loss": 1.6447, "step": 3272 }, { "epoch": 0.7948033025740651, "grad_norm": 0.37113112211227417, "learning_rate": 0.0001, "loss": 1.7026, "step": 3273 }, { "epoch": 0.7950461389023799, "grad_norm": 0.3404080867767334, "learning_rate": 0.0001, "loss": 1.613, "step": 3274 }, { "epoch": 0.7952889752306945, "grad_norm": 0.38144057989120483, "learning_rate": 0.0001, "loss": 1.9026, "step": 3275 }, { "epoch": 0.7955318115590092, "grad_norm": 0.3685757517814636, "learning_rate": 0.0001, "loss": 1.7348, "step": 3276 }, { "epoch": 0.795774647887324, "grad_norm": 0.3650652766227722, "learning_rate": 0.0001, "loss": 1.7319, "step": 3277 }, { "epoch": 0.7960174842156387, "grad_norm": 0.3495384752750397, "learning_rate": 0.0001, "loss": 1.5908, "step": 3278 }, { "epoch": 0.7962603205439533, "grad_norm": 0.4011358618736267, "learning_rate": 0.0001, "loss": 1.6868, "step": 3279 }, { "epoch": 0.7965031568722681, "grad_norm": 0.37123218178749084, "learning_rate": 0.0001, "loss": 1.7107, "step": 3280 }, { "epoch": 0.7967459932005828, "grad_norm": 0.3909901976585388, "learning_rate": 0.0001, "loss": 1.7587, "step": 3281 }, { "epoch": 0.7969888295288975, "grad_norm": 0.39892497658729553, "learning_rate": 0.0001, "loss": 1.8257, "step": 3282 }, { "epoch": 0.7972316658572123, "grad_norm": 0.3590630888938904, "learning_rate": 0.0001, "loss": 1.6685, "step": 3283 }, { "epoch": 0.797474502185527, "grad_norm": 0.3754388093948364, "learning_rate": 0.0001, "loss": 1.8419, "step": 3284 }, { "epoch": 0.7977173385138416, "grad_norm": 0.39368849992752075, "learning_rate": 0.0001, "loss": 1.6508, "step": 3285 }, { "epoch": 0.7979601748421564, "grad_norm": 0.3552359938621521, "learning_rate": 0.0001, "loss": 1.7739, "step": 3286 }, { "epoch": 0.7982030111704711, "grad_norm": 0.3980265259742737, "learning_rate": 0.0001, "loss": 1.7555, "step": 3287 }, { "epoch": 0.7984458474987858, "grad_norm": 0.36231398582458496, "learning_rate": 0.0001, "loss": 1.5782, "step": 3288 }, { "epoch": 0.7986886838271006, "grad_norm": 0.3526536822319031, "learning_rate": 0.0001, "loss": 1.6628, "step": 3289 }, { "epoch": 0.7989315201554152, "grad_norm": 0.401947557926178, "learning_rate": 0.0001, "loss": 1.7001, "step": 3290 }, { "epoch": 0.7991743564837299, "grad_norm": 0.3727927505970001, "learning_rate": 0.0001, "loss": 1.6996, "step": 3291 }, { "epoch": 0.7994171928120447, "grad_norm": 0.37071672081947327, "learning_rate": 0.0001, "loss": 1.6681, "step": 3292 }, { "epoch": 0.7996600291403594, "grad_norm": 0.38639959692955017, "learning_rate": 0.0001, "loss": 1.7947, "step": 3293 }, { "epoch": 0.7999028654686741, "grad_norm": 0.35984519124031067, "learning_rate": 0.0001, "loss": 1.7382, "step": 3294 }, { "epoch": 0.8001457017969889, "grad_norm": 0.3773345947265625, "learning_rate": 0.0001, "loss": 1.8011, "step": 3295 }, { "epoch": 0.8003885381253035, "grad_norm": 0.3689408004283905, "learning_rate": 0.0001, "loss": 1.6109, "step": 3296 }, { "epoch": 0.8006313744536183, "grad_norm": 0.36196985840797424, "learning_rate": 0.0001, "loss": 1.6933, "step": 3297 }, { "epoch": 0.800874210781933, "grad_norm": 0.3734223544597626, "learning_rate": 0.0001, "loss": 1.6886, "step": 3298 }, { "epoch": 0.8011170471102477, "grad_norm": 0.38002875447273254, "learning_rate": 0.0001, "loss": 1.7362, "step": 3299 }, { "epoch": 0.8013598834385625, "grad_norm": 0.3728928864002228, "learning_rate": 0.0001, "loss": 1.8203, "step": 3300 }, { "epoch": 0.8016027197668771, "grad_norm": 0.3470155894756317, "learning_rate": 0.0001, "loss": 1.6447, "step": 3301 }, { "epoch": 0.8018455560951918, "grad_norm": 0.36024266481399536, "learning_rate": 0.0001, "loss": 1.8473, "step": 3302 }, { "epoch": 0.8020883924235066, "grad_norm": 0.3593677282333374, "learning_rate": 0.0001, "loss": 1.7207, "step": 3303 }, { "epoch": 0.8023312287518213, "grad_norm": 0.37037888169288635, "learning_rate": 0.0001, "loss": 1.7462, "step": 3304 }, { "epoch": 0.802574065080136, "grad_norm": 0.37236881256103516, "learning_rate": 0.0001, "loss": 1.7995, "step": 3305 }, { "epoch": 0.8028169014084507, "grad_norm": 0.3446558117866516, "learning_rate": 0.0001, "loss": 1.6447, "step": 3306 }, { "epoch": 0.8030597377367654, "grad_norm": 0.3672121465206146, "learning_rate": 0.0001, "loss": 1.7503, "step": 3307 }, { "epoch": 0.8033025740650801, "grad_norm": 0.3923403322696686, "learning_rate": 0.0001, "loss": 1.717, "step": 3308 }, { "epoch": 0.8035454103933949, "grad_norm": 0.39351925253868103, "learning_rate": 0.0001, "loss": 1.6714, "step": 3309 }, { "epoch": 0.8037882467217096, "grad_norm": 0.37669646739959717, "learning_rate": 0.0001, "loss": 1.7114, "step": 3310 }, { "epoch": 0.8040310830500242, "grad_norm": 0.3658166825771332, "learning_rate": 0.0001, "loss": 1.7235, "step": 3311 }, { "epoch": 0.804273919378339, "grad_norm": 0.3610089421272278, "learning_rate": 0.0001, "loss": 1.6012, "step": 3312 }, { "epoch": 0.8045167557066537, "grad_norm": 0.3617137670516968, "learning_rate": 0.0001, "loss": 1.6845, "step": 3313 }, { "epoch": 0.8047595920349684, "grad_norm": 0.37453964352607727, "learning_rate": 0.0001, "loss": 1.7513, "step": 3314 }, { "epoch": 0.8050024283632832, "grad_norm": 0.38044503331184387, "learning_rate": 0.0001, "loss": 1.7897, "step": 3315 }, { "epoch": 0.8052452646915979, "grad_norm": 0.3552364408969879, "learning_rate": 0.0001, "loss": 1.545, "step": 3316 }, { "epoch": 0.8054881010199125, "grad_norm": 0.38444262742996216, "learning_rate": 0.0001, "loss": 1.7455, "step": 3317 }, { "epoch": 0.8057309373482273, "grad_norm": 0.36202672123908997, "learning_rate": 0.0001, "loss": 1.6165, "step": 3318 }, { "epoch": 0.805973773676542, "grad_norm": 0.38152796030044556, "learning_rate": 0.0001, "loss": 1.8369, "step": 3319 }, { "epoch": 0.8062166100048568, "grad_norm": 0.3856159448623657, "learning_rate": 0.0001, "loss": 1.7539, "step": 3320 }, { "epoch": 0.8064594463331715, "grad_norm": 0.37758535146713257, "learning_rate": 0.0001, "loss": 1.7429, "step": 3321 }, { "epoch": 0.8067022826614861, "grad_norm": 0.4032396376132965, "learning_rate": 0.0001, "loss": 1.7092, "step": 3322 }, { "epoch": 0.8069451189898009, "grad_norm": 0.3742205798625946, "learning_rate": 0.0001, "loss": 1.6522, "step": 3323 }, { "epoch": 0.8071879553181156, "grad_norm": 0.3685240149497986, "learning_rate": 0.0001, "loss": 1.818, "step": 3324 }, { "epoch": 0.8074307916464303, "grad_norm": 0.39305341243743896, "learning_rate": 0.0001, "loss": 1.9695, "step": 3325 }, { "epoch": 0.8076736279747451, "grad_norm": 0.38635092973709106, "learning_rate": 0.0001, "loss": 1.6922, "step": 3326 }, { "epoch": 0.8079164643030597, "grad_norm": 0.3558487594127655, "learning_rate": 0.0001, "loss": 1.588, "step": 3327 }, { "epoch": 0.8081593006313744, "grad_norm": 0.37422701716423035, "learning_rate": 0.0001, "loss": 1.6272, "step": 3328 }, { "epoch": 0.8084021369596892, "grad_norm": 0.3510863184928894, "learning_rate": 0.0001, "loss": 1.7223, "step": 3329 }, { "epoch": 0.8086449732880039, "grad_norm": 0.38053059577941895, "learning_rate": 0.0001, "loss": 1.7258, "step": 3330 }, { "epoch": 0.8088878096163186, "grad_norm": 0.39362215995788574, "learning_rate": 0.0001, "loss": 1.6726, "step": 3331 }, { "epoch": 0.8091306459446334, "grad_norm": 0.3837401866912842, "learning_rate": 0.0001, "loss": 1.8808, "step": 3332 }, { "epoch": 0.809373482272948, "grad_norm": 0.35187309980392456, "learning_rate": 0.0001, "loss": 1.6576, "step": 3333 }, { "epoch": 0.8096163186012627, "grad_norm": 0.4003773331642151, "learning_rate": 0.0001, "loss": 1.8689, "step": 3334 }, { "epoch": 0.8098591549295775, "grad_norm": 0.3761930465698242, "learning_rate": 0.0001, "loss": 1.6872, "step": 3335 }, { "epoch": 0.8101019912578922, "grad_norm": 0.34285593032836914, "learning_rate": 0.0001, "loss": 1.471, "step": 3336 }, { "epoch": 0.8103448275862069, "grad_norm": 0.36871927976608276, "learning_rate": 0.0001, "loss": 1.7828, "step": 3337 }, { "epoch": 0.8105876639145216, "grad_norm": 0.4032946825027466, "learning_rate": 0.0001, "loss": 1.9575, "step": 3338 }, { "epoch": 0.8108305002428363, "grad_norm": 0.3758159875869751, "learning_rate": 0.0001, "loss": 1.702, "step": 3339 }, { "epoch": 0.811073336571151, "grad_norm": 0.396798312664032, "learning_rate": 0.0001, "loss": 1.9003, "step": 3340 }, { "epoch": 0.8113161728994658, "grad_norm": 0.39947810769081116, "learning_rate": 0.0001, "loss": 1.8968, "step": 3341 }, { "epoch": 0.8115590092277805, "grad_norm": 0.36287468671798706, "learning_rate": 0.0001, "loss": 1.6774, "step": 3342 }, { "epoch": 0.8118018455560952, "grad_norm": 0.37061214447021484, "learning_rate": 0.0001, "loss": 1.5444, "step": 3343 }, { "epoch": 0.8120446818844099, "grad_norm": 0.3543872535228729, "learning_rate": 0.0001, "loss": 1.7137, "step": 3344 }, { "epoch": 0.8122875182127246, "grad_norm": 0.40300488471984863, "learning_rate": 0.0001, "loss": 1.764, "step": 3345 }, { "epoch": 0.8125303545410394, "grad_norm": 0.34426894783973694, "learning_rate": 0.0001, "loss": 1.5996, "step": 3346 }, { "epoch": 0.8127731908693541, "grad_norm": 0.3738694489002228, "learning_rate": 0.0001, "loss": 1.6846, "step": 3347 }, { "epoch": 0.8130160271976687, "grad_norm": 0.38625332713127136, "learning_rate": 0.0001, "loss": 1.7664, "step": 3348 }, { "epoch": 0.8132588635259835, "grad_norm": 0.38722801208496094, "learning_rate": 0.0001, "loss": 1.922, "step": 3349 }, { "epoch": 0.8135016998542982, "grad_norm": 0.38053563237190247, "learning_rate": 0.0001, "loss": 1.836, "step": 3350 }, { "epoch": 0.8137445361826129, "grad_norm": 0.3337772786617279, "learning_rate": 0.0001, "loss": 1.4316, "step": 3351 }, { "epoch": 0.8139873725109277, "grad_norm": 0.3726194202899933, "learning_rate": 0.0001, "loss": 1.6717, "step": 3352 }, { "epoch": 0.8142302088392424, "grad_norm": 0.36288002133369446, "learning_rate": 0.0001, "loss": 1.8114, "step": 3353 }, { "epoch": 0.814473045167557, "grad_norm": 0.3562511205673218, "learning_rate": 0.0001, "loss": 1.6636, "step": 3354 }, { "epoch": 0.8147158814958718, "grad_norm": 0.360158234834671, "learning_rate": 0.0001, "loss": 1.6963, "step": 3355 }, { "epoch": 0.8149587178241865, "grad_norm": 0.36954307556152344, "learning_rate": 0.0001, "loss": 1.5675, "step": 3356 }, { "epoch": 0.8152015541525012, "grad_norm": 0.3674972653388977, "learning_rate": 0.0001, "loss": 1.7673, "step": 3357 }, { "epoch": 0.815444390480816, "grad_norm": 0.3780413866043091, "learning_rate": 0.0001, "loss": 1.68, "step": 3358 }, { "epoch": 0.8156872268091306, "grad_norm": 0.3669142723083496, "learning_rate": 0.0001, "loss": 1.7466, "step": 3359 }, { "epoch": 0.8159300631374453, "grad_norm": 0.34627634286880493, "learning_rate": 0.0001, "loss": 1.5616, "step": 3360 }, { "epoch": 0.8161728994657601, "grad_norm": 0.3495223820209503, "learning_rate": 0.0001, "loss": 1.7176, "step": 3361 }, { "epoch": 0.8164157357940748, "grad_norm": 0.3799058198928833, "learning_rate": 0.0001, "loss": 1.8741, "step": 3362 }, { "epoch": 0.8166585721223895, "grad_norm": 0.3761565685272217, "learning_rate": 0.0001, "loss": 1.7821, "step": 3363 }, { "epoch": 0.8169014084507042, "grad_norm": 0.39104485511779785, "learning_rate": 0.0001, "loss": 1.8327, "step": 3364 }, { "epoch": 0.8171442447790189, "grad_norm": 0.4119609296321869, "learning_rate": 0.0001, "loss": 1.8113, "step": 3365 }, { "epoch": 0.8173870811073336, "grad_norm": 0.37373781204223633, "learning_rate": 0.0001, "loss": 1.7457, "step": 3366 }, { "epoch": 0.8176299174356484, "grad_norm": 0.38254791498184204, "learning_rate": 0.0001, "loss": 1.7419, "step": 3367 }, { "epoch": 0.8178727537639631, "grad_norm": 0.36546850204467773, "learning_rate": 0.0001, "loss": 1.8308, "step": 3368 }, { "epoch": 0.8181155900922779, "grad_norm": 0.39306992292404175, "learning_rate": 0.0001, "loss": 1.8687, "step": 3369 }, { "epoch": 0.8183584264205925, "grad_norm": 0.3737972378730774, "learning_rate": 0.0001, "loss": 1.9084, "step": 3370 }, { "epoch": 0.8186012627489072, "grad_norm": 0.3632451891899109, "learning_rate": 0.0001, "loss": 1.7257, "step": 3371 }, { "epoch": 0.818844099077222, "grad_norm": 0.3642801344394684, "learning_rate": 0.0001, "loss": 1.7191, "step": 3372 }, { "epoch": 0.8190869354055367, "grad_norm": 0.3375829756259918, "learning_rate": 0.0001, "loss": 1.4421, "step": 3373 }, { "epoch": 0.8193297717338514, "grad_norm": 0.3556838035583496, "learning_rate": 0.0001, "loss": 1.5868, "step": 3374 }, { "epoch": 0.8195726080621661, "grad_norm": 0.348859041929245, "learning_rate": 0.0001, "loss": 1.5974, "step": 3375 }, { "epoch": 0.8198154443904808, "grad_norm": 0.3708842098712921, "learning_rate": 0.0001, "loss": 1.743, "step": 3376 }, { "epoch": 0.8200582807187955, "grad_norm": 0.36357876658439636, "learning_rate": 0.0001, "loss": 1.7435, "step": 3377 }, { "epoch": 0.8203011170471103, "grad_norm": 0.3622495234012604, "learning_rate": 0.0001, "loss": 1.7109, "step": 3378 }, { "epoch": 0.820543953375425, "grad_norm": 0.35432299971580505, "learning_rate": 0.0001, "loss": 1.7539, "step": 3379 }, { "epoch": 0.8207867897037396, "grad_norm": 0.35265427827835083, "learning_rate": 0.0001, "loss": 1.5913, "step": 3380 }, { "epoch": 0.8210296260320544, "grad_norm": 0.37112993001937866, "learning_rate": 0.0001, "loss": 1.5671, "step": 3381 }, { "epoch": 0.8212724623603691, "grad_norm": 0.35254931449890137, "learning_rate": 0.0001, "loss": 1.7341, "step": 3382 }, { "epoch": 0.8215152986886838, "grad_norm": 0.38896042108535767, "learning_rate": 0.0001, "loss": 1.8627, "step": 3383 }, { "epoch": 0.8217581350169986, "grad_norm": 0.37757647037506104, "learning_rate": 0.0001, "loss": 1.8308, "step": 3384 }, { "epoch": 0.8220009713453132, "grad_norm": 0.3572361171245575, "learning_rate": 0.0001, "loss": 1.6857, "step": 3385 }, { "epoch": 0.8222438076736279, "grad_norm": 0.35898885130882263, "learning_rate": 0.0001, "loss": 1.8175, "step": 3386 }, { "epoch": 0.8224866440019427, "grad_norm": 0.36892169713974, "learning_rate": 0.0001, "loss": 1.675, "step": 3387 }, { "epoch": 0.8227294803302574, "grad_norm": 0.3749498426914215, "learning_rate": 0.0001, "loss": 1.6009, "step": 3388 }, { "epoch": 0.8229723166585721, "grad_norm": 0.3872259557247162, "learning_rate": 0.0001, "loss": 1.8164, "step": 3389 }, { "epoch": 0.8232151529868869, "grad_norm": 0.36481043696403503, "learning_rate": 0.0001, "loss": 1.7736, "step": 3390 }, { "epoch": 0.8234579893152015, "grad_norm": 0.3672960698604584, "learning_rate": 0.0001, "loss": 1.8317, "step": 3391 }, { "epoch": 0.8237008256435163, "grad_norm": 0.5431075692176819, "learning_rate": 0.0001, "loss": 1.7295, "step": 3392 }, { "epoch": 0.823943661971831, "grad_norm": 0.36839866638183594, "learning_rate": 0.0001, "loss": 1.7219, "step": 3393 }, { "epoch": 0.8241864983001457, "grad_norm": 0.36228734254837036, "learning_rate": 0.0001, "loss": 1.7996, "step": 3394 }, { "epoch": 0.8244293346284605, "grad_norm": 0.3687099516391754, "learning_rate": 0.0001, "loss": 1.8457, "step": 3395 }, { "epoch": 0.8246721709567751, "grad_norm": 0.3856877088546753, "learning_rate": 0.0001, "loss": 1.5591, "step": 3396 }, { "epoch": 0.8249150072850898, "grad_norm": 0.3498803675174713, "learning_rate": 0.0001, "loss": 1.6165, "step": 3397 }, { "epoch": 0.8251578436134046, "grad_norm": 0.3575108051300049, "learning_rate": 0.0001, "loss": 1.6455, "step": 3398 }, { "epoch": 0.8254006799417193, "grad_norm": 0.3399507403373718, "learning_rate": 0.0001, "loss": 1.5308, "step": 3399 }, { "epoch": 0.825643516270034, "grad_norm": 0.39992624521255493, "learning_rate": 0.0001, "loss": 1.8403, "step": 3400 }, { "epoch": 0.8258863525983487, "grad_norm": 0.34879302978515625, "learning_rate": 0.0001, "loss": 1.6182, "step": 3401 }, { "epoch": 0.8261291889266634, "grad_norm": 0.3787181079387665, "learning_rate": 0.0001, "loss": 1.8406, "step": 3402 }, { "epoch": 0.8263720252549781, "grad_norm": 0.366046667098999, "learning_rate": 0.0001, "loss": 1.6661, "step": 3403 }, { "epoch": 0.8266148615832929, "grad_norm": 0.38305869698524475, "learning_rate": 0.0001, "loss": 1.6747, "step": 3404 }, { "epoch": 0.8268576979116076, "grad_norm": 0.3716723322868347, "learning_rate": 0.0001, "loss": 1.8145, "step": 3405 }, { "epoch": 0.8271005342399222, "grad_norm": 0.3804088830947876, "learning_rate": 0.0001, "loss": 1.6595, "step": 3406 }, { "epoch": 0.827343370568237, "grad_norm": 0.36902347207069397, "learning_rate": 0.0001, "loss": 1.7445, "step": 3407 }, { "epoch": 0.8275862068965517, "grad_norm": 0.3657947778701782, "learning_rate": 0.0001, "loss": 1.719, "step": 3408 }, { "epoch": 0.8278290432248664, "grad_norm": 0.3669492304325104, "learning_rate": 0.0001, "loss": 1.7128, "step": 3409 }, { "epoch": 0.8280718795531812, "grad_norm": 0.34419408440589905, "learning_rate": 0.0001, "loss": 1.6568, "step": 3410 }, { "epoch": 0.8283147158814959, "grad_norm": 0.39030420780181885, "learning_rate": 0.0001, "loss": 1.899, "step": 3411 }, { "epoch": 0.8285575522098105, "grad_norm": 0.3708794116973877, "learning_rate": 0.0001, "loss": 1.5666, "step": 3412 }, { "epoch": 0.8288003885381253, "grad_norm": 0.3762510120868683, "learning_rate": 0.0001, "loss": 1.6719, "step": 3413 }, { "epoch": 0.82904322486644, "grad_norm": 0.36242151260375977, "learning_rate": 0.0001, "loss": 1.713, "step": 3414 }, { "epoch": 0.8292860611947548, "grad_norm": 0.35218676924705505, "learning_rate": 0.0001, "loss": 1.6207, "step": 3415 }, { "epoch": 0.8295288975230695, "grad_norm": 0.3742830157279968, "learning_rate": 0.0001, "loss": 1.8128, "step": 3416 }, { "epoch": 0.8297717338513841, "grad_norm": 0.360321044921875, "learning_rate": 0.0001, "loss": 1.5888, "step": 3417 }, { "epoch": 0.8300145701796989, "grad_norm": 0.3791470527648926, "learning_rate": 0.0001, "loss": 1.8753, "step": 3418 }, { "epoch": 0.8302574065080136, "grad_norm": 0.3563266396522522, "learning_rate": 0.0001, "loss": 1.6355, "step": 3419 }, { "epoch": 0.8305002428363283, "grad_norm": 0.3572905361652374, "learning_rate": 0.0001, "loss": 1.678, "step": 3420 }, { "epoch": 0.8307430791646431, "grad_norm": 0.3519369065761566, "learning_rate": 0.0001, "loss": 1.6316, "step": 3421 }, { "epoch": 0.8309859154929577, "grad_norm": 0.3865967392921448, "learning_rate": 0.0001, "loss": 1.6014, "step": 3422 }, { "epoch": 0.8312287518212724, "grad_norm": 0.36829259991645813, "learning_rate": 0.0001, "loss": 1.5808, "step": 3423 }, { "epoch": 0.8314715881495872, "grad_norm": 0.5048966407775879, "learning_rate": 0.0001, "loss": 1.783, "step": 3424 }, { "epoch": 0.8317144244779019, "grad_norm": 0.3445897400379181, "learning_rate": 0.0001, "loss": 1.5677, "step": 3425 }, { "epoch": 0.8319572608062166, "grad_norm": 0.4075135588645935, "learning_rate": 0.0001, "loss": 1.8443, "step": 3426 }, { "epoch": 0.8322000971345314, "grad_norm": 0.3757880926132202, "learning_rate": 0.0001, "loss": 1.8096, "step": 3427 }, { "epoch": 0.832442933462846, "grad_norm": 0.4158633351325989, "learning_rate": 0.0001, "loss": 1.722, "step": 3428 }, { "epoch": 0.8326857697911607, "grad_norm": 0.33717289566993713, "learning_rate": 0.0001, "loss": 1.6177, "step": 3429 }, { "epoch": 0.8329286061194755, "grad_norm": 0.3926994204521179, "learning_rate": 0.0001, "loss": 1.7179, "step": 3430 }, { "epoch": 0.8331714424477902, "grad_norm": 0.3628498911857605, "learning_rate": 0.0001, "loss": 1.7354, "step": 3431 }, { "epoch": 0.8334142787761049, "grad_norm": 0.3821134567260742, "learning_rate": 0.0001, "loss": 1.6441, "step": 3432 }, { "epoch": 0.8336571151044196, "grad_norm": 0.3743181526660919, "learning_rate": 0.0001, "loss": 1.7191, "step": 3433 }, { "epoch": 0.8338999514327343, "grad_norm": 0.38392940163612366, "learning_rate": 0.0001, "loss": 1.8581, "step": 3434 }, { "epoch": 0.834142787761049, "grad_norm": 0.3931395411491394, "learning_rate": 0.0001, "loss": 1.7641, "step": 3435 }, { "epoch": 0.8343856240893638, "grad_norm": 0.3985886871814728, "learning_rate": 0.0001, "loss": 1.7006, "step": 3436 }, { "epoch": 0.8346284604176785, "grad_norm": 0.3533487617969513, "learning_rate": 0.0001, "loss": 1.6744, "step": 3437 }, { "epoch": 0.8348712967459933, "grad_norm": 0.3607969582080841, "learning_rate": 0.0001, "loss": 1.7189, "step": 3438 }, { "epoch": 0.8351141330743079, "grad_norm": 0.35621362924575806, "learning_rate": 0.0001, "loss": 1.5766, "step": 3439 }, { "epoch": 0.8353569694026226, "grad_norm": 0.3671231269836426, "learning_rate": 0.0001, "loss": 1.7925, "step": 3440 }, { "epoch": 0.8355998057309374, "grad_norm": 0.3626062572002411, "learning_rate": 0.0001, "loss": 1.5959, "step": 3441 }, { "epoch": 0.8358426420592521, "grad_norm": 0.3930570185184479, "learning_rate": 0.0001, "loss": 1.676, "step": 3442 }, { "epoch": 0.8360854783875667, "grad_norm": 0.3632255792617798, "learning_rate": 0.0001, "loss": 1.8025, "step": 3443 }, { "epoch": 0.8363283147158815, "grad_norm": 0.3855805993080139, "learning_rate": 0.0001, "loss": 1.6734, "step": 3444 }, { "epoch": 0.8365711510441962, "grad_norm": 0.36899393796920776, "learning_rate": 0.0001, "loss": 1.707, "step": 3445 }, { "epoch": 0.8368139873725109, "grad_norm": 0.39814889430999756, "learning_rate": 0.0001, "loss": 1.7721, "step": 3446 }, { "epoch": 0.8370568237008257, "grad_norm": 0.34774938225746155, "learning_rate": 0.0001, "loss": 1.581, "step": 3447 }, { "epoch": 0.8372996600291404, "grad_norm": 0.39525139331817627, "learning_rate": 0.0001, "loss": 1.8362, "step": 3448 }, { "epoch": 0.837542496357455, "grad_norm": 0.37141239643096924, "learning_rate": 0.0001, "loss": 1.7525, "step": 3449 }, { "epoch": 0.8377853326857698, "grad_norm": 0.38118383288383484, "learning_rate": 0.0001, "loss": 1.6791, "step": 3450 }, { "epoch": 0.8380281690140845, "grad_norm": 0.3800981640815735, "learning_rate": 0.0001, "loss": 1.6531, "step": 3451 }, { "epoch": 0.8382710053423992, "grad_norm": 0.38465872406959534, "learning_rate": 0.0001, "loss": 1.8196, "step": 3452 }, { "epoch": 0.838513841670714, "grad_norm": 0.33680155873298645, "learning_rate": 0.0001, "loss": 1.5088, "step": 3453 }, { "epoch": 0.8387566779990286, "grad_norm": 0.3530154228210449, "learning_rate": 0.0001, "loss": 1.7184, "step": 3454 }, { "epoch": 0.8389995143273433, "grad_norm": 0.3624134659767151, "learning_rate": 0.0001, "loss": 1.6819, "step": 3455 }, { "epoch": 0.8392423506556581, "grad_norm": 0.3938109874725342, "learning_rate": 0.0001, "loss": 1.625, "step": 3456 }, { "epoch": 0.8394851869839728, "grad_norm": 0.39321231842041016, "learning_rate": 0.0001, "loss": 1.9044, "step": 3457 }, { "epoch": 0.8397280233122875, "grad_norm": 0.36550790071487427, "learning_rate": 0.0001, "loss": 1.691, "step": 3458 }, { "epoch": 0.8399708596406023, "grad_norm": 0.3689191937446594, "learning_rate": 0.0001, "loss": 1.6638, "step": 3459 }, { "epoch": 0.8402136959689169, "grad_norm": 0.36174771189689636, "learning_rate": 0.0001, "loss": 1.7413, "step": 3460 }, { "epoch": 0.8404565322972317, "grad_norm": 0.37011727690696716, "learning_rate": 0.0001, "loss": 1.794, "step": 3461 }, { "epoch": 0.8406993686255464, "grad_norm": 0.37291502952575684, "learning_rate": 0.0001, "loss": 1.7135, "step": 3462 }, { "epoch": 0.8409422049538611, "grad_norm": 0.40054547786712646, "learning_rate": 0.0001, "loss": 1.651, "step": 3463 }, { "epoch": 0.8411850412821759, "grad_norm": 0.3493271768093109, "learning_rate": 0.0001, "loss": 1.702, "step": 3464 }, { "epoch": 0.8414278776104905, "grad_norm": 0.36861565709114075, "learning_rate": 0.0001, "loss": 1.5792, "step": 3465 }, { "epoch": 0.8416707139388052, "grad_norm": 0.37903013825416565, "learning_rate": 0.0001, "loss": 1.8688, "step": 3466 }, { "epoch": 0.84191355026712, "grad_norm": 0.36558017134666443, "learning_rate": 0.0001, "loss": 1.6547, "step": 3467 }, { "epoch": 0.8421563865954347, "grad_norm": 0.3424556255340576, "learning_rate": 0.0001, "loss": 1.528, "step": 3468 }, { "epoch": 0.8423992229237494, "grad_norm": 0.3787643611431122, "learning_rate": 0.0001, "loss": 1.7236, "step": 3469 }, { "epoch": 0.8426420592520641, "grad_norm": 0.3981590270996094, "learning_rate": 0.0001, "loss": 1.8502, "step": 3470 }, { "epoch": 0.8428848955803788, "grad_norm": 0.35316038131713867, "learning_rate": 0.0001, "loss": 1.5544, "step": 3471 }, { "epoch": 0.8431277319086935, "grad_norm": 0.36104702949523926, "learning_rate": 0.0001, "loss": 1.6113, "step": 3472 }, { "epoch": 0.8433705682370083, "grad_norm": 0.3761642277240753, "learning_rate": 0.0001, "loss": 1.7298, "step": 3473 }, { "epoch": 0.843613404565323, "grad_norm": 0.3798404932022095, "learning_rate": 0.0001, "loss": 1.7458, "step": 3474 }, { "epoch": 0.8438562408936376, "grad_norm": 0.36524298787117004, "learning_rate": 0.0001, "loss": 1.724, "step": 3475 }, { "epoch": 0.8440990772219524, "grad_norm": 0.3702562749385834, "learning_rate": 0.0001, "loss": 1.8075, "step": 3476 }, { "epoch": 0.8443419135502671, "grad_norm": 0.369831383228302, "learning_rate": 0.0001, "loss": 1.6562, "step": 3477 }, { "epoch": 0.8445847498785818, "grad_norm": 0.35366007685661316, "learning_rate": 0.0001, "loss": 1.6473, "step": 3478 }, { "epoch": 0.8448275862068966, "grad_norm": 0.3676561117172241, "learning_rate": 0.0001, "loss": 1.7891, "step": 3479 }, { "epoch": 0.8450704225352113, "grad_norm": 0.36195895075798035, "learning_rate": 0.0001, "loss": 1.6914, "step": 3480 }, { "epoch": 0.8453132588635259, "grad_norm": 0.36972975730895996, "learning_rate": 0.0001, "loss": 1.7172, "step": 3481 }, { "epoch": 0.8455560951918407, "grad_norm": 0.37382155656814575, "learning_rate": 0.0001, "loss": 1.6309, "step": 3482 }, { "epoch": 0.8457989315201554, "grad_norm": 0.3648379445075989, "learning_rate": 0.0001, "loss": 1.7619, "step": 3483 }, { "epoch": 0.8460417678484702, "grad_norm": 0.36794230341911316, "learning_rate": 0.0001, "loss": 1.6669, "step": 3484 }, { "epoch": 0.8462846041767849, "grad_norm": 0.3614242970943451, "learning_rate": 0.0001, "loss": 1.6528, "step": 3485 }, { "epoch": 0.8465274405050995, "grad_norm": 0.3677575886249542, "learning_rate": 0.0001, "loss": 1.7447, "step": 3486 }, { "epoch": 0.8467702768334143, "grad_norm": 0.371786504983902, "learning_rate": 0.0001, "loss": 1.5776, "step": 3487 }, { "epoch": 0.847013113161729, "grad_norm": 0.36296510696411133, "learning_rate": 0.0001, "loss": 1.6704, "step": 3488 }, { "epoch": 0.8472559494900437, "grad_norm": 0.3505253791809082, "learning_rate": 0.0001, "loss": 1.7259, "step": 3489 }, { "epoch": 0.8474987858183585, "grad_norm": 0.34446287155151367, "learning_rate": 0.0001, "loss": 1.504, "step": 3490 }, { "epoch": 0.8477416221466731, "grad_norm": 0.362392783164978, "learning_rate": 0.0001, "loss": 1.5495, "step": 3491 }, { "epoch": 0.8479844584749878, "grad_norm": 0.3405704200267792, "learning_rate": 0.0001, "loss": 1.5627, "step": 3492 }, { "epoch": 0.8482272948033026, "grad_norm": 0.36630943417549133, "learning_rate": 0.0001, "loss": 1.6703, "step": 3493 }, { "epoch": 0.8484701311316173, "grad_norm": 0.35960787534713745, "learning_rate": 0.0001, "loss": 1.7689, "step": 3494 }, { "epoch": 0.848712967459932, "grad_norm": 0.35360845923423767, "learning_rate": 0.0001, "loss": 1.6148, "step": 3495 }, { "epoch": 0.8489558037882468, "grad_norm": 0.3669142723083496, "learning_rate": 0.0001, "loss": 1.683, "step": 3496 }, { "epoch": 0.8491986401165614, "grad_norm": 0.36652398109436035, "learning_rate": 0.0001, "loss": 1.6641, "step": 3497 }, { "epoch": 0.8494414764448761, "grad_norm": 0.4173559844493866, "learning_rate": 0.0001, "loss": 1.846, "step": 3498 }, { "epoch": 0.8496843127731909, "grad_norm": 0.37224480509757996, "learning_rate": 0.0001, "loss": 1.7263, "step": 3499 }, { "epoch": 0.8499271491015056, "grad_norm": 0.35871750116348267, "learning_rate": 0.0001, "loss": 1.5461, "step": 3500 }, { "epoch": 0.8501699854298203, "grad_norm": 0.37326905131340027, "learning_rate": 0.0001, "loss": 1.7384, "step": 3501 }, { "epoch": 0.850412821758135, "grad_norm": 0.4032769799232483, "learning_rate": 0.0001, "loss": 1.7194, "step": 3502 }, { "epoch": 0.8506556580864497, "grad_norm": 0.354282945394516, "learning_rate": 0.0001, "loss": 1.6118, "step": 3503 }, { "epoch": 0.8508984944147644, "grad_norm": 0.3966940939426422, "learning_rate": 0.0001, "loss": 1.7711, "step": 3504 }, { "epoch": 0.8511413307430792, "grad_norm": 0.37445423007011414, "learning_rate": 0.0001, "loss": 1.6806, "step": 3505 }, { "epoch": 0.8513841670713939, "grad_norm": 0.35755404829978943, "learning_rate": 0.0001, "loss": 1.5763, "step": 3506 }, { "epoch": 0.8516270033997086, "grad_norm": 0.407652348279953, "learning_rate": 0.0001, "loss": 1.6067, "step": 3507 }, { "epoch": 0.8518698397280233, "grad_norm": 0.37608498334884644, "learning_rate": 0.0001, "loss": 1.8628, "step": 3508 }, { "epoch": 0.852112676056338, "grad_norm": 0.37260276079177856, "learning_rate": 0.0001, "loss": 1.6454, "step": 3509 }, { "epoch": 0.8523555123846528, "grad_norm": 0.38507553935050964, "learning_rate": 0.0001, "loss": 1.7672, "step": 3510 }, { "epoch": 0.8525983487129675, "grad_norm": 0.3733923137187958, "learning_rate": 0.0001, "loss": 1.6671, "step": 3511 }, { "epoch": 0.8528411850412821, "grad_norm": 0.3613528609275818, "learning_rate": 0.0001, "loss": 1.6734, "step": 3512 }, { "epoch": 0.8530840213695969, "grad_norm": 0.3519417345523834, "learning_rate": 0.0001, "loss": 1.7748, "step": 3513 }, { "epoch": 0.8533268576979116, "grad_norm": 0.37620311975479126, "learning_rate": 0.0001, "loss": 1.6402, "step": 3514 }, { "epoch": 0.8535696940262263, "grad_norm": 0.32235080003738403, "learning_rate": 0.0001, "loss": 1.5878, "step": 3515 }, { "epoch": 0.8538125303545411, "grad_norm": 0.3643632233142853, "learning_rate": 0.0001, "loss": 1.5403, "step": 3516 }, { "epoch": 0.8540553666828558, "grad_norm": 0.37543153762817383, "learning_rate": 0.0001, "loss": 1.8312, "step": 3517 }, { "epoch": 0.8542982030111704, "grad_norm": 0.36344969272613525, "learning_rate": 0.0001, "loss": 1.8156, "step": 3518 }, { "epoch": 0.8545410393394852, "grad_norm": 0.3584924340248108, "learning_rate": 0.0001, "loss": 1.631, "step": 3519 }, { "epoch": 0.8547838756677999, "grad_norm": 0.3910501301288605, "learning_rate": 0.0001, "loss": 1.6077, "step": 3520 }, { "epoch": 0.8550267119961146, "grad_norm": 0.3873438239097595, "learning_rate": 0.0001, "loss": 1.7625, "step": 3521 }, { "epoch": 0.8552695483244294, "grad_norm": 0.37016668915748596, "learning_rate": 0.0001, "loss": 1.6542, "step": 3522 }, { "epoch": 0.855512384652744, "grad_norm": 0.34312549233436584, "learning_rate": 0.0001, "loss": 1.5476, "step": 3523 }, { "epoch": 0.8557552209810587, "grad_norm": 0.3576250672340393, "learning_rate": 0.0001, "loss": 1.7494, "step": 3524 }, { "epoch": 0.8559980573093735, "grad_norm": 0.39972227811813354, "learning_rate": 0.0001, "loss": 1.7725, "step": 3525 }, { "epoch": 0.8562408936376882, "grad_norm": 0.41491860151290894, "learning_rate": 0.0001, "loss": 1.9628, "step": 3526 }, { "epoch": 0.8564837299660029, "grad_norm": 0.35856276750564575, "learning_rate": 0.0001, "loss": 1.6327, "step": 3527 }, { "epoch": 0.8567265662943176, "grad_norm": 0.3800105154514313, "learning_rate": 0.0001, "loss": 1.6284, "step": 3528 }, { "epoch": 0.8569694026226323, "grad_norm": 0.36823752522468567, "learning_rate": 0.0001, "loss": 1.7279, "step": 3529 }, { "epoch": 0.8572122389509471, "grad_norm": 0.40262046456336975, "learning_rate": 0.0001, "loss": 1.8158, "step": 3530 }, { "epoch": 0.8574550752792618, "grad_norm": 0.35609057545661926, "learning_rate": 0.0001, "loss": 1.7015, "step": 3531 }, { "epoch": 0.8576979116075765, "grad_norm": 0.35543057322502136, "learning_rate": 0.0001, "loss": 1.5971, "step": 3532 }, { "epoch": 0.8579407479358913, "grad_norm": 0.3998347520828247, "learning_rate": 0.0001, "loss": 1.755, "step": 3533 }, { "epoch": 0.8581835842642059, "grad_norm": 0.35681387782096863, "learning_rate": 0.0001, "loss": 1.639, "step": 3534 }, { "epoch": 0.8584264205925206, "grad_norm": 0.35703030228614807, "learning_rate": 0.0001, "loss": 1.6203, "step": 3535 }, { "epoch": 0.8586692569208354, "grad_norm": 0.39074602723121643, "learning_rate": 0.0001, "loss": 1.7393, "step": 3536 }, { "epoch": 0.8589120932491501, "grad_norm": 0.37359052896499634, "learning_rate": 0.0001, "loss": 1.7177, "step": 3537 }, { "epoch": 0.8591549295774648, "grad_norm": 0.3567638397216797, "learning_rate": 0.0001, "loss": 1.6187, "step": 3538 }, { "epoch": 0.8593977659057795, "grad_norm": 0.3632083535194397, "learning_rate": 0.0001, "loss": 1.8805, "step": 3539 }, { "epoch": 0.8596406022340942, "grad_norm": 0.36028143763542175, "learning_rate": 0.0001, "loss": 1.7016, "step": 3540 }, { "epoch": 0.8598834385624089, "grad_norm": 0.3658236563205719, "learning_rate": 0.0001, "loss": 1.8488, "step": 3541 }, { "epoch": 0.8601262748907237, "grad_norm": 0.3709207773208618, "learning_rate": 0.0001, "loss": 1.553, "step": 3542 }, { "epoch": 0.8603691112190384, "grad_norm": 0.3640122711658478, "learning_rate": 0.0001, "loss": 1.6917, "step": 3543 }, { "epoch": 0.860611947547353, "grad_norm": 0.36741796135902405, "learning_rate": 0.0001, "loss": 1.7421, "step": 3544 }, { "epoch": 0.8608547838756678, "grad_norm": 0.36788538098335266, "learning_rate": 0.0001, "loss": 1.5018, "step": 3545 }, { "epoch": 0.8610976202039825, "grad_norm": 0.3530402183532715, "learning_rate": 0.0001, "loss": 1.6676, "step": 3546 }, { "epoch": 0.8613404565322972, "grad_norm": 0.39383524656295776, "learning_rate": 0.0001, "loss": 1.7636, "step": 3547 }, { "epoch": 0.861583292860612, "grad_norm": 0.3982522189617157, "learning_rate": 0.0001, "loss": 1.7767, "step": 3548 }, { "epoch": 0.8618261291889266, "grad_norm": 0.3580620586872101, "learning_rate": 0.0001, "loss": 1.597, "step": 3549 }, { "epoch": 0.8620689655172413, "grad_norm": 0.35779640078544617, "learning_rate": 0.0001, "loss": 1.6438, "step": 3550 }, { "epoch": 0.8623118018455561, "grad_norm": 0.3500531017780304, "learning_rate": 0.0001, "loss": 1.6659, "step": 3551 }, { "epoch": 0.8625546381738708, "grad_norm": 0.34910285472869873, "learning_rate": 0.0001, "loss": 1.658, "step": 3552 }, { "epoch": 0.8627974745021856, "grad_norm": 0.35888442397117615, "learning_rate": 0.0001, "loss": 1.5498, "step": 3553 }, { "epoch": 0.8630403108305003, "grad_norm": 0.38242191076278687, "learning_rate": 0.0001, "loss": 1.6856, "step": 3554 }, { "epoch": 0.8632831471588149, "grad_norm": 0.3862399160861969, "learning_rate": 0.0001, "loss": 1.7477, "step": 3555 }, { "epoch": 0.8635259834871297, "grad_norm": 0.3789171576499939, "learning_rate": 0.0001, "loss": 1.6583, "step": 3556 }, { "epoch": 0.8637688198154444, "grad_norm": 0.3593288064002991, "learning_rate": 0.0001, "loss": 1.5699, "step": 3557 }, { "epoch": 0.8640116561437591, "grad_norm": 0.3673674762248993, "learning_rate": 0.0001, "loss": 1.7593, "step": 3558 }, { "epoch": 0.8642544924720739, "grad_norm": 0.38266125321388245, "learning_rate": 0.0001, "loss": 1.6827, "step": 3559 }, { "epoch": 0.8644973288003885, "grad_norm": 0.39288368821144104, "learning_rate": 0.0001, "loss": 1.8141, "step": 3560 }, { "epoch": 0.8647401651287032, "grad_norm": 0.3890032172203064, "learning_rate": 0.0001, "loss": 1.5849, "step": 3561 }, { "epoch": 0.864983001457018, "grad_norm": 0.36325764656066895, "learning_rate": 0.0001, "loss": 1.6591, "step": 3562 }, { "epoch": 0.8652258377853327, "grad_norm": 0.37350988388061523, "learning_rate": 0.0001, "loss": 1.7085, "step": 3563 }, { "epoch": 0.8654686741136474, "grad_norm": 0.3759698271751404, "learning_rate": 0.0001, "loss": 1.6911, "step": 3564 }, { "epoch": 0.8657115104419622, "grad_norm": 0.39357560873031616, "learning_rate": 0.0001, "loss": 1.7312, "step": 3565 }, { "epoch": 0.8659543467702768, "grad_norm": 0.37965479493141174, "learning_rate": 0.0001, "loss": 1.808, "step": 3566 }, { "epoch": 0.8661971830985915, "grad_norm": 0.3746955096721649, "learning_rate": 0.0001, "loss": 1.7676, "step": 3567 }, { "epoch": 0.8664400194269063, "grad_norm": 0.38889482617378235, "learning_rate": 0.0001, "loss": 1.7351, "step": 3568 }, { "epoch": 0.866682855755221, "grad_norm": 0.3991025388240814, "learning_rate": 0.0001, "loss": 1.8694, "step": 3569 }, { "epoch": 0.8669256920835356, "grad_norm": 0.3674953579902649, "learning_rate": 0.0001, "loss": 1.6615, "step": 3570 }, { "epoch": 0.8671685284118504, "grad_norm": 0.3669956922531128, "learning_rate": 0.0001, "loss": 1.8463, "step": 3571 }, { "epoch": 0.8674113647401651, "grad_norm": 0.3681383728981018, "learning_rate": 0.0001, "loss": 1.6519, "step": 3572 }, { "epoch": 0.8676542010684798, "grad_norm": 0.37041378021240234, "learning_rate": 0.0001, "loss": 1.8967, "step": 3573 }, { "epoch": 0.8678970373967946, "grad_norm": 0.34929654002189636, "learning_rate": 0.0001, "loss": 1.4997, "step": 3574 }, { "epoch": 0.8681398737251093, "grad_norm": 0.3579554855823517, "learning_rate": 0.0001, "loss": 1.6965, "step": 3575 }, { "epoch": 0.868382710053424, "grad_norm": 0.3582157492637634, "learning_rate": 0.0001, "loss": 1.6105, "step": 3576 }, { "epoch": 0.8686255463817387, "grad_norm": 0.37660810351371765, "learning_rate": 0.0001, "loss": 1.8288, "step": 3577 }, { "epoch": 0.8688683827100534, "grad_norm": 0.36132362484931946, "learning_rate": 0.0001, "loss": 1.5915, "step": 3578 }, { "epoch": 0.8691112190383682, "grad_norm": 0.37348681688308716, "learning_rate": 0.0001, "loss": 1.6533, "step": 3579 }, { "epoch": 0.8693540553666829, "grad_norm": 0.38948729634284973, "learning_rate": 0.0001, "loss": 1.7371, "step": 3580 }, { "epoch": 0.8695968916949975, "grad_norm": 0.35966864228248596, "learning_rate": 0.0001, "loss": 1.4848, "step": 3581 }, { "epoch": 0.8698397280233123, "grad_norm": 0.37547367811203003, "learning_rate": 0.0001, "loss": 1.8124, "step": 3582 }, { "epoch": 0.870082564351627, "grad_norm": 0.35957401990890503, "learning_rate": 0.0001, "loss": 1.609, "step": 3583 }, { "epoch": 0.8703254006799417, "grad_norm": 0.36093026399612427, "learning_rate": 0.0001, "loss": 1.7151, "step": 3584 }, { "epoch": 0.8705682370082565, "grad_norm": 0.3634076714515686, "learning_rate": 0.0001, "loss": 1.652, "step": 3585 }, { "epoch": 0.8708110733365712, "grad_norm": 0.3747778534889221, "learning_rate": 0.0001, "loss": 1.7686, "step": 3586 }, { "epoch": 0.8710539096648858, "grad_norm": 0.3573356568813324, "learning_rate": 0.0001, "loss": 1.7262, "step": 3587 }, { "epoch": 0.8712967459932006, "grad_norm": 0.3708506226539612, "learning_rate": 0.0001, "loss": 1.7808, "step": 3588 }, { "epoch": 0.8715395823215153, "grad_norm": 0.3369008004665375, "learning_rate": 0.0001, "loss": 1.603, "step": 3589 }, { "epoch": 0.87178241864983, "grad_norm": 0.37004029750823975, "learning_rate": 0.0001, "loss": 1.6321, "step": 3590 }, { "epoch": 0.8720252549781448, "grad_norm": 0.3614467680454254, "learning_rate": 0.0001, "loss": 1.5722, "step": 3591 }, { "epoch": 0.8722680913064594, "grad_norm": 0.3672022223472595, "learning_rate": 0.0001, "loss": 1.7806, "step": 3592 }, { "epoch": 0.8725109276347741, "grad_norm": 0.37763553857803345, "learning_rate": 0.0001, "loss": 1.6137, "step": 3593 }, { "epoch": 0.8727537639630889, "grad_norm": 0.3805335760116577, "learning_rate": 0.0001, "loss": 1.7193, "step": 3594 }, { "epoch": 0.8729966002914036, "grad_norm": 0.3677334785461426, "learning_rate": 0.0001, "loss": 1.7612, "step": 3595 }, { "epoch": 0.8732394366197183, "grad_norm": 0.36495909094810486, "learning_rate": 0.0001, "loss": 1.6714, "step": 3596 }, { "epoch": 0.873482272948033, "grad_norm": 0.37761765718460083, "learning_rate": 0.0001, "loss": 1.7882, "step": 3597 }, { "epoch": 0.8737251092763477, "grad_norm": 0.3680744767189026, "learning_rate": 0.0001, "loss": 1.6574, "step": 3598 }, { "epoch": 0.8739679456046625, "grad_norm": 0.35450413823127747, "learning_rate": 0.0001, "loss": 1.6302, "step": 3599 }, { "epoch": 0.8742107819329772, "grad_norm": 0.36951565742492676, "learning_rate": 0.0001, "loss": 1.7608, "step": 3600 }, { "epoch": 0.8744536182612919, "grad_norm": 0.34576210379600525, "learning_rate": 0.0001, "loss": 1.5916, "step": 3601 }, { "epoch": 0.8746964545896067, "grad_norm": 0.34308797121047974, "learning_rate": 0.0001, "loss": 1.5128, "step": 3602 }, { "epoch": 0.8749392909179213, "grad_norm": 0.3728722333908081, "learning_rate": 0.0001, "loss": 1.6882, "step": 3603 }, { "epoch": 0.875182127246236, "grad_norm": 0.3887479603290558, "learning_rate": 0.0001, "loss": 1.6687, "step": 3604 }, { "epoch": 0.8754249635745508, "grad_norm": 0.37138471007347107, "learning_rate": 0.0001, "loss": 1.6479, "step": 3605 }, { "epoch": 0.8756677999028655, "grad_norm": 0.3760085701942444, "learning_rate": 0.0001, "loss": 1.7381, "step": 3606 }, { "epoch": 0.8759106362311802, "grad_norm": 0.3597986698150635, "learning_rate": 0.0001, "loss": 1.5603, "step": 3607 }, { "epoch": 0.8761534725594949, "grad_norm": 0.3942025899887085, "learning_rate": 0.0001, "loss": 1.8538, "step": 3608 }, { "epoch": 0.8763963088878096, "grad_norm": 0.3929956257343292, "learning_rate": 0.0001, "loss": 1.6928, "step": 3609 }, { "epoch": 0.8766391452161243, "grad_norm": 0.415682315826416, "learning_rate": 0.0001, "loss": 1.584, "step": 3610 }, { "epoch": 0.8768819815444391, "grad_norm": 0.3906037211418152, "learning_rate": 0.0001, "loss": 1.8822, "step": 3611 }, { "epoch": 0.8771248178727538, "grad_norm": 0.3817678391933441, "learning_rate": 0.0001, "loss": 1.6585, "step": 3612 }, { "epoch": 0.8773676542010684, "grad_norm": 0.37527331709861755, "learning_rate": 0.0001, "loss": 1.8332, "step": 3613 }, { "epoch": 0.8776104905293832, "grad_norm": 0.34259480237960815, "learning_rate": 0.0001, "loss": 1.6764, "step": 3614 }, { "epoch": 0.8778533268576979, "grad_norm": 0.37580329179763794, "learning_rate": 0.0001, "loss": 1.5702, "step": 3615 }, { "epoch": 0.8780961631860126, "grad_norm": 0.38391757011413574, "learning_rate": 0.0001, "loss": 1.7783, "step": 3616 }, { "epoch": 0.8783389995143274, "grad_norm": 0.36990684270858765, "learning_rate": 0.0001, "loss": 1.7667, "step": 3617 }, { "epoch": 0.878581835842642, "grad_norm": 0.37066319584846497, "learning_rate": 0.0001, "loss": 1.452, "step": 3618 }, { "epoch": 0.8788246721709567, "grad_norm": 0.3481326401233673, "learning_rate": 0.0001, "loss": 1.6822, "step": 3619 }, { "epoch": 0.8790675084992715, "grad_norm": 0.40429386496543884, "learning_rate": 0.0001, "loss": 1.7546, "step": 3620 }, { "epoch": 0.8793103448275862, "grad_norm": 0.40886256098747253, "learning_rate": 0.0001, "loss": 1.7198, "step": 3621 }, { "epoch": 0.879553181155901, "grad_norm": 0.38799768686294556, "learning_rate": 0.0001, "loss": 1.9331, "step": 3622 }, { "epoch": 0.8797960174842157, "grad_norm": 0.37042146921157837, "learning_rate": 0.0001, "loss": 1.6779, "step": 3623 }, { "epoch": 0.8800388538125303, "grad_norm": 0.3842093348503113, "learning_rate": 0.0001, "loss": 1.6768, "step": 3624 }, { "epoch": 0.8802816901408451, "grad_norm": 0.39390814304351807, "learning_rate": 0.0001, "loss": 1.7565, "step": 3625 }, { "epoch": 0.8805245264691598, "grad_norm": 0.3362022340297699, "learning_rate": 0.0001, "loss": 1.4937, "step": 3626 }, { "epoch": 0.8807673627974745, "grad_norm": 0.36625936627388, "learning_rate": 0.0001, "loss": 1.7513, "step": 3627 }, { "epoch": 0.8810101991257893, "grad_norm": 0.3637999892234802, "learning_rate": 0.0001, "loss": 1.5824, "step": 3628 }, { "epoch": 0.8812530354541039, "grad_norm": 0.3684556484222412, "learning_rate": 0.0001, "loss": 1.742, "step": 3629 }, { "epoch": 0.8814958717824186, "grad_norm": 0.3652805685997009, "learning_rate": 0.0001, "loss": 1.4867, "step": 3630 }, { "epoch": 0.8817387081107334, "grad_norm": 0.3626108765602112, "learning_rate": 0.0001, "loss": 1.5675, "step": 3631 }, { "epoch": 0.8819815444390481, "grad_norm": 0.40082186460494995, "learning_rate": 0.0001, "loss": 1.6533, "step": 3632 }, { "epoch": 0.8822243807673628, "grad_norm": 0.34865519404411316, "learning_rate": 0.0001, "loss": 1.5657, "step": 3633 }, { "epoch": 0.8824672170956775, "grad_norm": 0.3757842779159546, "learning_rate": 0.0001, "loss": 1.7309, "step": 3634 }, { "epoch": 0.8827100534239922, "grad_norm": 0.36966630816459656, "learning_rate": 0.0001, "loss": 1.7184, "step": 3635 }, { "epoch": 0.8829528897523069, "grad_norm": 0.409877210855484, "learning_rate": 0.0001, "loss": 1.8053, "step": 3636 }, { "epoch": 0.8831957260806217, "grad_norm": 0.3520601689815521, "learning_rate": 0.0001, "loss": 1.4993, "step": 3637 }, { "epoch": 0.8834385624089364, "grad_norm": 0.3983246088027954, "learning_rate": 0.0001, "loss": 1.7949, "step": 3638 }, { "epoch": 0.883681398737251, "grad_norm": 0.3668357729911804, "learning_rate": 0.0001, "loss": 1.7983, "step": 3639 }, { "epoch": 0.8839242350655658, "grad_norm": 0.3689560294151306, "learning_rate": 0.0001, "loss": 1.806, "step": 3640 }, { "epoch": 0.8841670713938805, "grad_norm": 0.37663447856903076, "learning_rate": 0.0001, "loss": 1.7754, "step": 3641 }, { "epoch": 0.8844099077221952, "grad_norm": 0.3643248677253723, "learning_rate": 0.0001, "loss": 1.7468, "step": 3642 }, { "epoch": 0.88465274405051, "grad_norm": 0.36683642864227295, "learning_rate": 0.0001, "loss": 1.815, "step": 3643 }, { "epoch": 0.8848955803788247, "grad_norm": 0.3876796364784241, "learning_rate": 0.0001, "loss": 1.7493, "step": 3644 }, { "epoch": 0.8851384167071394, "grad_norm": 0.3821657598018646, "learning_rate": 0.0001, "loss": 1.6686, "step": 3645 }, { "epoch": 0.8853812530354541, "grad_norm": 0.3620165288448334, "learning_rate": 0.0001, "loss": 1.57, "step": 3646 }, { "epoch": 0.8856240893637688, "grad_norm": 0.3420584499835968, "learning_rate": 0.0001, "loss": 1.5326, "step": 3647 }, { "epoch": 0.8858669256920836, "grad_norm": 0.37859800457954407, "learning_rate": 0.0001, "loss": 1.7683, "step": 3648 }, { "epoch": 0.8861097620203983, "grad_norm": 0.369691401720047, "learning_rate": 0.0001, "loss": 1.7959, "step": 3649 }, { "epoch": 0.8863525983487129, "grad_norm": 0.37752020359039307, "learning_rate": 0.0001, "loss": 1.6003, "step": 3650 }, { "epoch": 0.8865954346770277, "grad_norm": 0.3806999921798706, "learning_rate": 0.0001, "loss": 1.8619, "step": 3651 }, { "epoch": 0.8868382710053424, "grad_norm": 0.3760840594768524, "learning_rate": 0.0001, "loss": 1.7124, "step": 3652 }, { "epoch": 0.8870811073336571, "grad_norm": 0.3627361059188843, "learning_rate": 0.0001, "loss": 1.7258, "step": 3653 }, { "epoch": 0.8873239436619719, "grad_norm": 0.3710281252861023, "learning_rate": 0.0001, "loss": 1.6779, "step": 3654 }, { "epoch": 0.8875667799902865, "grad_norm": 0.36439019441604614, "learning_rate": 0.0001, "loss": 1.8435, "step": 3655 }, { "epoch": 0.8878096163186012, "grad_norm": 0.385211318731308, "learning_rate": 0.0001, "loss": 1.8635, "step": 3656 }, { "epoch": 0.888052452646916, "grad_norm": 0.3514195382595062, "learning_rate": 0.0001, "loss": 1.5961, "step": 3657 }, { "epoch": 0.8882952889752307, "grad_norm": 0.3596414625644684, "learning_rate": 0.0001, "loss": 1.6581, "step": 3658 }, { "epoch": 0.8885381253035454, "grad_norm": 0.37253424525260925, "learning_rate": 0.0001, "loss": 1.7471, "step": 3659 }, { "epoch": 0.8887809616318602, "grad_norm": 0.3801403343677521, "learning_rate": 0.0001, "loss": 1.7299, "step": 3660 }, { "epoch": 0.8890237979601748, "grad_norm": 0.3856620192527771, "learning_rate": 0.0001, "loss": 1.7593, "step": 3661 }, { "epoch": 0.8892666342884895, "grad_norm": 0.3800942301750183, "learning_rate": 0.0001, "loss": 1.7336, "step": 3662 }, { "epoch": 0.8895094706168043, "grad_norm": 0.3820795714855194, "learning_rate": 0.0001, "loss": 1.8671, "step": 3663 }, { "epoch": 0.889752306945119, "grad_norm": 0.3742009103298187, "learning_rate": 0.0001, "loss": 1.7299, "step": 3664 }, { "epoch": 0.8899951432734337, "grad_norm": 0.33877500891685486, "learning_rate": 0.0001, "loss": 1.6424, "step": 3665 }, { "epoch": 0.8902379796017484, "grad_norm": 0.35122042894363403, "learning_rate": 0.0001, "loss": 1.5245, "step": 3666 }, { "epoch": 0.8904808159300631, "grad_norm": 0.345304936170578, "learning_rate": 0.0001, "loss": 1.5726, "step": 3667 }, { "epoch": 0.8907236522583779, "grad_norm": 0.3724261522293091, "learning_rate": 0.0001, "loss": 1.6823, "step": 3668 }, { "epoch": 0.8909664885866926, "grad_norm": 0.3617289066314697, "learning_rate": 0.0001, "loss": 1.6174, "step": 3669 }, { "epoch": 0.8912093249150073, "grad_norm": 0.36587101221084595, "learning_rate": 0.0001, "loss": 1.6748, "step": 3670 }, { "epoch": 0.891452161243322, "grad_norm": 0.3875347673892975, "learning_rate": 0.0001, "loss": 1.8046, "step": 3671 }, { "epoch": 0.8916949975716367, "grad_norm": 0.3689577579498291, "learning_rate": 0.0001, "loss": 1.6712, "step": 3672 }, { "epoch": 0.8919378338999514, "grad_norm": 0.3996396064758301, "learning_rate": 0.0001, "loss": 1.7927, "step": 3673 }, { "epoch": 0.8921806702282662, "grad_norm": 0.3754492700099945, "learning_rate": 0.0001, "loss": 1.6346, "step": 3674 }, { "epoch": 0.8924235065565809, "grad_norm": 0.37411370873451233, "learning_rate": 0.0001, "loss": 1.72, "step": 3675 }, { "epoch": 0.8926663428848955, "grad_norm": 0.35851696133613586, "learning_rate": 0.0001, "loss": 1.7167, "step": 3676 }, { "epoch": 0.8929091792132103, "grad_norm": 0.4156700372695923, "learning_rate": 0.0001, "loss": 1.7594, "step": 3677 }, { "epoch": 0.893152015541525, "grad_norm": 0.3887973129749298, "learning_rate": 0.0001, "loss": 1.8126, "step": 3678 }, { "epoch": 0.8933948518698397, "grad_norm": 0.38091030716896057, "learning_rate": 0.0001, "loss": 1.8359, "step": 3679 }, { "epoch": 0.8936376881981545, "grad_norm": 0.40002167224884033, "learning_rate": 0.0001, "loss": 1.5869, "step": 3680 }, { "epoch": 0.8938805245264692, "grad_norm": 0.3787648379802704, "learning_rate": 0.0001, "loss": 1.6256, "step": 3681 }, { "epoch": 0.8941233608547838, "grad_norm": 0.3811265230178833, "learning_rate": 0.0001, "loss": 1.6876, "step": 3682 }, { "epoch": 0.8943661971830986, "grad_norm": 0.3632388114929199, "learning_rate": 0.0001, "loss": 1.7248, "step": 3683 }, { "epoch": 0.8946090335114133, "grad_norm": 0.3976041376590729, "learning_rate": 0.0001, "loss": 1.667, "step": 3684 }, { "epoch": 0.894851869839728, "grad_norm": 0.37803617119789124, "learning_rate": 0.0001, "loss": 1.6494, "step": 3685 }, { "epoch": 0.8950947061680428, "grad_norm": 0.3664271831512451, "learning_rate": 0.0001, "loss": 1.7021, "step": 3686 }, { "epoch": 0.8953375424963574, "grad_norm": 0.35976269841194153, "learning_rate": 0.0001, "loss": 1.6618, "step": 3687 }, { "epoch": 0.8955803788246721, "grad_norm": 0.35800105333328247, "learning_rate": 0.0001, "loss": 1.6671, "step": 3688 }, { "epoch": 0.8958232151529869, "grad_norm": 0.342416375875473, "learning_rate": 0.0001, "loss": 1.6609, "step": 3689 }, { "epoch": 0.8960660514813016, "grad_norm": 0.3788902759552002, "learning_rate": 0.0001, "loss": 1.6947, "step": 3690 }, { "epoch": 0.8963088878096164, "grad_norm": 0.4403901696205139, "learning_rate": 0.0001, "loss": 1.6916, "step": 3691 }, { "epoch": 0.896551724137931, "grad_norm": 0.3746020197868347, "learning_rate": 0.0001, "loss": 1.7877, "step": 3692 }, { "epoch": 0.8967945604662457, "grad_norm": 0.3692503273487091, "learning_rate": 0.0001, "loss": 1.6245, "step": 3693 }, { "epoch": 0.8970373967945605, "grad_norm": 0.38501957058906555, "learning_rate": 0.0001, "loss": 1.8146, "step": 3694 }, { "epoch": 0.8972802331228752, "grad_norm": 0.3959503769874573, "learning_rate": 0.0001, "loss": 1.7034, "step": 3695 }, { "epoch": 0.8975230694511899, "grad_norm": 0.35865819454193115, "learning_rate": 0.0001, "loss": 1.7013, "step": 3696 }, { "epoch": 0.8977659057795047, "grad_norm": 0.3601124584674835, "learning_rate": 0.0001, "loss": 1.6855, "step": 3697 }, { "epoch": 0.8980087421078193, "grad_norm": 0.40033718943595886, "learning_rate": 0.0001, "loss": 1.8622, "step": 3698 }, { "epoch": 0.898251578436134, "grad_norm": 0.35714730620384216, "learning_rate": 0.0001, "loss": 1.718, "step": 3699 }, { "epoch": 0.8984944147644488, "grad_norm": 0.37696969509124756, "learning_rate": 0.0001, "loss": 1.6804, "step": 3700 }, { "epoch": 0.8987372510927635, "grad_norm": 0.3685283660888672, "learning_rate": 0.0001, "loss": 1.6738, "step": 3701 }, { "epoch": 0.8989800874210782, "grad_norm": 0.3915679156780243, "learning_rate": 0.0001, "loss": 1.7391, "step": 3702 }, { "epoch": 0.8992229237493929, "grad_norm": 0.37142446637153625, "learning_rate": 0.0001, "loss": 1.7583, "step": 3703 }, { "epoch": 0.8994657600777076, "grad_norm": 0.3911544978618622, "learning_rate": 0.0001, "loss": 1.8277, "step": 3704 }, { "epoch": 0.8997085964060223, "grad_norm": 0.3824489116668701, "learning_rate": 0.0001, "loss": 1.7645, "step": 3705 }, { "epoch": 0.8999514327343371, "grad_norm": 0.36203762888908386, "learning_rate": 0.0001, "loss": 1.7458, "step": 3706 }, { "epoch": 0.9001942690626518, "grad_norm": 0.3630374073982239, "learning_rate": 0.0001, "loss": 1.6611, "step": 3707 }, { "epoch": 0.9004371053909664, "grad_norm": 0.3603218197822571, "learning_rate": 0.0001, "loss": 1.5932, "step": 3708 }, { "epoch": 0.9006799417192812, "grad_norm": 0.34822002053260803, "learning_rate": 0.0001, "loss": 1.5359, "step": 3709 }, { "epoch": 0.9009227780475959, "grad_norm": 0.36869946122169495, "learning_rate": 0.0001, "loss": 1.6381, "step": 3710 }, { "epoch": 0.9011656143759106, "grad_norm": 0.37184393405914307, "learning_rate": 0.0001, "loss": 1.7159, "step": 3711 }, { "epoch": 0.9014084507042254, "grad_norm": 0.3584497272968292, "learning_rate": 0.0001, "loss": 1.7466, "step": 3712 }, { "epoch": 0.90165128703254, "grad_norm": 0.3819580674171448, "learning_rate": 0.0001, "loss": 1.7638, "step": 3713 }, { "epoch": 0.9018941233608548, "grad_norm": 0.3784763514995575, "learning_rate": 0.0001, "loss": 1.7203, "step": 3714 }, { "epoch": 0.9021369596891695, "grad_norm": 0.3671453297138214, "learning_rate": 0.0001, "loss": 1.7007, "step": 3715 }, { "epoch": 0.9023797960174842, "grad_norm": 0.3914904296398163, "learning_rate": 0.0001, "loss": 1.8468, "step": 3716 }, { "epoch": 0.902622632345799, "grad_norm": 0.3483647406101227, "learning_rate": 0.0001, "loss": 1.5126, "step": 3717 }, { "epoch": 0.9028654686741137, "grad_norm": 0.3823911249637604, "learning_rate": 0.0001, "loss": 1.6252, "step": 3718 }, { "epoch": 0.9031083050024283, "grad_norm": 0.3677743673324585, "learning_rate": 0.0001, "loss": 1.7183, "step": 3719 }, { "epoch": 0.9033511413307431, "grad_norm": 0.38830864429473877, "learning_rate": 0.0001, "loss": 1.757, "step": 3720 }, { "epoch": 0.9035939776590578, "grad_norm": 0.3823908865451813, "learning_rate": 0.0001, "loss": 1.6822, "step": 3721 }, { "epoch": 0.9038368139873725, "grad_norm": 0.3889397382736206, "learning_rate": 0.0001, "loss": 1.8468, "step": 3722 }, { "epoch": 0.9040796503156873, "grad_norm": 0.3682743012905121, "learning_rate": 0.0001, "loss": 1.6469, "step": 3723 }, { "epoch": 0.9043224866440019, "grad_norm": 0.41113588213920593, "learning_rate": 0.0001, "loss": 1.9098, "step": 3724 }, { "epoch": 0.9045653229723166, "grad_norm": 0.3677564859390259, "learning_rate": 0.0001, "loss": 1.6514, "step": 3725 }, { "epoch": 0.9048081593006314, "grad_norm": 0.3636486232280731, "learning_rate": 0.0001, "loss": 1.527, "step": 3726 }, { "epoch": 0.9050509956289461, "grad_norm": 0.4054773151874542, "learning_rate": 0.0001, "loss": 1.6989, "step": 3727 }, { "epoch": 0.9052938319572608, "grad_norm": 0.3783499598503113, "learning_rate": 0.0001, "loss": 1.7008, "step": 3728 }, { "epoch": 0.9055366682855756, "grad_norm": 0.3834938406944275, "learning_rate": 0.0001, "loss": 1.7659, "step": 3729 }, { "epoch": 0.9057795046138902, "grad_norm": 0.3626881241798401, "learning_rate": 0.0001, "loss": 1.7242, "step": 3730 }, { "epoch": 0.9060223409422049, "grad_norm": 0.37479889392852783, "learning_rate": 0.0001, "loss": 1.6904, "step": 3731 }, { "epoch": 0.9062651772705197, "grad_norm": 0.38191792368888855, "learning_rate": 0.0001, "loss": 1.7189, "step": 3732 }, { "epoch": 0.9065080135988344, "grad_norm": 0.3727664649486542, "learning_rate": 0.0001, "loss": 1.5817, "step": 3733 }, { "epoch": 0.906750849927149, "grad_norm": 0.3694825768470764, "learning_rate": 0.0001, "loss": 1.6841, "step": 3734 }, { "epoch": 0.9069936862554638, "grad_norm": 0.36148035526275635, "learning_rate": 0.0001, "loss": 1.7172, "step": 3735 }, { "epoch": 0.9072365225837785, "grad_norm": 0.42565613985061646, "learning_rate": 0.0001, "loss": 1.7159, "step": 3736 }, { "epoch": 0.9074793589120933, "grad_norm": 0.4023953080177307, "learning_rate": 0.0001, "loss": 1.7227, "step": 3737 }, { "epoch": 0.907722195240408, "grad_norm": 0.35882890224456787, "learning_rate": 0.0001, "loss": 1.6104, "step": 3738 }, { "epoch": 0.9079650315687227, "grad_norm": 0.4042493999004364, "learning_rate": 0.0001, "loss": 1.6337, "step": 3739 }, { "epoch": 0.9082078678970374, "grad_norm": 0.3844760060310364, "learning_rate": 0.0001, "loss": 1.7242, "step": 3740 }, { "epoch": 0.9084507042253521, "grad_norm": 0.3652195930480957, "learning_rate": 0.0001, "loss": 1.785, "step": 3741 }, { "epoch": 0.9086935405536668, "grad_norm": 0.38959187269210815, "learning_rate": 0.0001, "loss": 1.8141, "step": 3742 }, { "epoch": 0.9089363768819816, "grad_norm": 0.37486904859542847, "learning_rate": 0.0001, "loss": 1.7031, "step": 3743 }, { "epoch": 0.9091792132102963, "grad_norm": 0.360111266374588, "learning_rate": 0.0001, "loss": 1.4131, "step": 3744 }, { "epoch": 0.9094220495386109, "grad_norm": 0.37102243304252625, "learning_rate": 0.0001, "loss": 1.8195, "step": 3745 }, { "epoch": 0.9096648858669257, "grad_norm": 0.36768385767936707, "learning_rate": 0.0001, "loss": 1.6769, "step": 3746 }, { "epoch": 0.9099077221952404, "grad_norm": 0.37154293060302734, "learning_rate": 0.0001, "loss": 1.7091, "step": 3747 }, { "epoch": 0.9101505585235551, "grad_norm": 0.3653973639011383, "learning_rate": 0.0001, "loss": 1.6808, "step": 3748 }, { "epoch": 0.9103933948518699, "grad_norm": 0.3743472695350647, "learning_rate": 0.0001, "loss": 1.7694, "step": 3749 }, { "epoch": 0.9106362311801846, "grad_norm": 0.3726973831653595, "learning_rate": 0.0001, "loss": 1.7917, "step": 3750 }, { "epoch": 0.9108790675084992, "grad_norm": 0.36880922317504883, "learning_rate": 0.0001, "loss": 1.703, "step": 3751 }, { "epoch": 0.911121903836814, "grad_norm": 0.3703029751777649, "learning_rate": 0.0001, "loss": 1.7295, "step": 3752 }, { "epoch": 0.9113647401651287, "grad_norm": 0.3539191484451294, "learning_rate": 0.0001, "loss": 1.7341, "step": 3753 }, { "epoch": 0.9116075764934434, "grad_norm": 0.3663886785507202, "learning_rate": 0.0001, "loss": 1.6153, "step": 3754 }, { "epoch": 0.9118504128217582, "grad_norm": 0.3673847019672394, "learning_rate": 0.0001, "loss": 1.7479, "step": 3755 }, { "epoch": 0.9120932491500728, "grad_norm": 0.3632906973361969, "learning_rate": 0.0001, "loss": 1.7295, "step": 3756 }, { "epoch": 0.9123360854783875, "grad_norm": 0.37695321440696716, "learning_rate": 0.0001, "loss": 1.6302, "step": 3757 }, { "epoch": 0.9125789218067023, "grad_norm": 0.3578214645385742, "learning_rate": 0.0001, "loss": 1.6948, "step": 3758 }, { "epoch": 0.912821758135017, "grad_norm": 0.4027178883552551, "learning_rate": 0.0001, "loss": 1.7601, "step": 3759 }, { "epoch": 0.9130645944633318, "grad_norm": 0.38166484236717224, "learning_rate": 0.0001, "loss": 1.7154, "step": 3760 }, { "epoch": 0.9133074307916464, "grad_norm": 0.3768790364265442, "learning_rate": 0.0001, "loss": 1.7746, "step": 3761 }, { "epoch": 0.9135502671199611, "grad_norm": 0.3585771322250366, "learning_rate": 0.0001, "loss": 1.5268, "step": 3762 }, { "epoch": 0.9137931034482759, "grad_norm": 0.35531941056251526, "learning_rate": 0.0001, "loss": 1.6352, "step": 3763 }, { "epoch": 0.9140359397765906, "grad_norm": 0.37948161363601685, "learning_rate": 0.0001, "loss": 1.7789, "step": 3764 }, { "epoch": 0.9142787761049053, "grad_norm": 0.3917665183544159, "learning_rate": 0.0001, "loss": 1.8557, "step": 3765 }, { "epoch": 0.91452161243322, "grad_norm": 0.3560101091861725, "learning_rate": 0.0001, "loss": 1.6469, "step": 3766 }, { "epoch": 0.9147644487615347, "grad_norm": 0.37302523851394653, "learning_rate": 0.0001, "loss": 1.6948, "step": 3767 }, { "epoch": 0.9150072850898494, "grad_norm": 0.3936711251735687, "learning_rate": 0.0001, "loss": 1.7581, "step": 3768 }, { "epoch": 0.9152501214181642, "grad_norm": 0.3600723445415497, "learning_rate": 0.0001, "loss": 1.6131, "step": 3769 }, { "epoch": 0.9154929577464789, "grad_norm": 0.3593932092189789, "learning_rate": 0.0001, "loss": 1.5959, "step": 3770 }, { "epoch": 0.9157357940747936, "grad_norm": 0.3671623468399048, "learning_rate": 0.0001, "loss": 1.6203, "step": 3771 }, { "epoch": 0.9159786304031083, "grad_norm": 0.3588806986808777, "learning_rate": 0.0001, "loss": 1.616, "step": 3772 }, { "epoch": 0.916221466731423, "grad_norm": 0.3804694712162018, "learning_rate": 0.0001, "loss": 1.7997, "step": 3773 }, { "epoch": 0.9164643030597377, "grad_norm": 0.36302250623703003, "learning_rate": 0.0001, "loss": 1.7443, "step": 3774 }, { "epoch": 0.9167071393880525, "grad_norm": 0.34705448150634766, "learning_rate": 0.0001, "loss": 1.578, "step": 3775 }, { "epoch": 0.9169499757163672, "grad_norm": 0.359637975692749, "learning_rate": 0.0001, "loss": 1.6751, "step": 3776 }, { "epoch": 0.9171928120446818, "grad_norm": 0.3869219422340393, "learning_rate": 0.0001, "loss": 1.7907, "step": 3777 }, { "epoch": 0.9174356483729966, "grad_norm": 0.3761953115463257, "learning_rate": 0.0001, "loss": 1.8541, "step": 3778 }, { "epoch": 0.9176784847013113, "grad_norm": 0.3853410482406616, "learning_rate": 0.0001, "loss": 1.7271, "step": 3779 }, { "epoch": 0.917921321029626, "grad_norm": 0.37119677662849426, "learning_rate": 0.0001, "loss": 1.7134, "step": 3780 }, { "epoch": 0.9181641573579408, "grad_norm": 0.369057834148407, "learning_rate": 0.0001, "loss": 1.7957, "step": 3781 }, { "epoch": 0.9184069936862554, "grad_norm": 0.37399792671203613, "learning_rate": 0.0001, "loss": 1.7382, "step": 3782 }, { "epoch": 0.9186498300145702, "grad_norm": 0.38667723536491394, "learning_rate": 0.0001, "loss": 1.7569, "step": 3783 }, { "epoch": 0.9188926663428849, "grad_norm": 0.39617523550987244, "learning_rate": 0.0001, "loss": 1.6252, "step": 3784 }, { "epoch": 0.9191355026711996, "grad_norm": 0.38363882899284363, "learning_rate": 0.0001, "loss": 1.8444, "step": 3785 }, { "epoch": 0.9193783389995144, "grad_norm": 0.4173693358898163, "learning_rate": 0.0001, "loss": 1.9011, "step": 3786 }, { "epoch": 0.919621175327829, "grad_norm": 0.4081033766269684, "learning_rate": 0.0001, "loss": 1.772, "step": 3787 }, { "epoch": 0.9198640116561437, "grad_norm": 0.3917405903339386, "learning_rate": 0.0001, "loss": 1.8649, "step": 3788 }, { "epoch": 0.9201068479844585, "grad_norm": 0.36920738220214844, "learning_rate": 0.0001, "loss": 1.6866, "step": 3789 }, { "epoch": 0.9203496843127732, "grad_norm": 0.359141081571579, "learning_rate": 0.0001, "loss": 1.8148, "step": 3790 }, { "epoch": 0.9205925206410879, "grad_norm": 0.35884061455726624, "learning_rate": 0.0001, "loss": 1.5744, "step": 3791 }, { "epoch": 0.9208353569694027, "grad_norm": 0.38075488805770874, "learning_rate": 0.0001, "loss": 1.7728, "step": 3792 }, { "epoch": 0.9210781932977173, "grad_norm": 0.36001816391944885, "learning_rate": 0.0001, "loss": 1.6481, "step": 3793 }, { "epoch": 0.921321029626032, "grad_norm": 0.37972912192344666, "learning_rate": 0.0001, "loss": 1.601, "step": 3794 }, { "epoch": 0.9215638659543468, "grad_norm": 0.36784154176712036, "learning_rate": 0.0001, "loss": 1.679, "step": 3795 }, { "epoch": 0.9218067022826615, "grad_norm": 0.36483627557754517, "learning_rate": 0.0001, "loss": 1.5875, "step": 3796 }, { "epoch": 0.9220495386109762, "grad_norm": 0.3781699538230896, "learning_rate": 0.0001, "loss": 1.8234, "step": 3797 }, { "epoch": 0.922292374939291, "grad_norm": 0.3724982440471649, "learning_rate": 0.0001, "loss": 1.6933, "step": 3798 }, { "epoch": 0.9225352112676056, "grad_norm": 0.39953988790512085, "learning_rate": 0.0001, "loss": 1.7848, "step": 3799 }, { "epoch": 0.9227780475959203, "grad_norm": 0.3567099869251251, "learning_rate": 0.0001, "loss": 1.7083, "step": 3800 }, { "epoch": 0.9230208839242351, "grad_norm": 0.38667944073677063, "learning_rate": 0.0001, "loss": 1.7049, "step": 3801 }, { "epoch": 0.9232637202525498, "grad_norm": 0.4046122133731842, "learning_rate": 0.0001, "loss": 1.7332, "step": 3802 }, { "epoch": 0.9235065565808644, "grad_norm": 0.3688383400440216, "learning_rate": 0.0001, "loss": 1.5917, "step": 3803 }, { "epoch": 0.9237493929091792, "grad_norm": 0.3840380609035492, "learning_rate": 0.0001, "loss": 1.555, "step": 3804 }, { "epoch": 0.9239922292374939, "grad_norm": 0.36803650856018066, "learning_rate": 0.0001, "loss": 1.8166, "step": 3805 }, { "epoch": 0.9242350655658087, "grad_norm": 0.3958253860473633, "learning_rate": 0.0001, "loss": 1.8586, "step": 3806 }, { "epoch": 0.9244779018941234, "grad_norm": 0.3751855492591858, "learning_rate": 0.0001, "loss": 1.7138, "step": 3807 }, { "epoch": 0.924720738222438, "grad_norm": 0.3692433834075928, "learning_rate": 0.0001, "loss": 1.6246, "step": 3808 }, { "epoch": 0.9249635745507528, "grad_norm": 0.3688707947731018, "learning_rate": 0.0001, "loss": 1.8355, "step": 3809 }, { "epoch": 0.9252064108790675, "grad_norm": 0.39333146810531616, "learning_rate": 0.0001, "loss": 1.7563, "step": 3810 }, { "epoch": 0.9254492472073822, "grad_norm": 0.3726106584072113, "learning_rate": 0.0001, "loss": 1.6823, "step": 3811 }, { "epoch": 0.925692083535697, "grad_norm": 0.36215999722480774, "learning_rate": 0.0001, "loss": 1.6521, "step": 3812 }, { "epoch": 0.9259349198640117, "grad_norm": 0.38629502058029175, "learning_rate": 0.0001, "loss": 1.7459, "step": 3813 }, { "epoch": 0.9261777561923263, "grad_norm": 0.3587053418159485, "learning_rate": 0.0001, "loss": 1.6508, "step": 3814 }, { "epoch": 0.9264205925206411, "grad_norm": 0.3717343509197235, "learning_rate": 0.0001, "loss": 1.7678, "step": 3815 }, { "epoch": 0.9266634288489558, "grad_norm": 0.37315699458122253, "learning_rate": 0.0001, "loss": 1.6838, "step": 3816 }, { "epoch": 0.9269062651772705, "grad_norm": 0.3515320122241974, "learning_rate": 0.0001, "loss": 1.524, "step": 3817 }, { "epoch": 0.9271491015055853, "grad_norm": 0.36067575216293335, "learning_rate": 0.0001, "loss": 1.7694, "step": 3818 }, { "epoch": 0.9273919378339, "grad_norm": 0.37640073895454407, "learning_rate": 0.0001, "loss": 1.7702, "step": 3819 }, { "epoch": 0.9276347741622146, "grad_norm": 0.3961258828639984, "learning_rate": 0.0001, "loss": 1.8072, "step": 3820 }, { "epoch": 0.9278776104905294, "grad_norm": 0.373757004737854, "learning_rate": 0.0001, "loss": 1.6027, "step": 3821 }, { "epoch": 0.9281204468188441, "grad_norm": 0.36781755089759827, "learning_rate": 0.0001, "loss": 1.6513, "step": 3822 }, { "epoch": 0.9283632831471588, "grad_norm": 0.3814017176628113, "learning_rate": 0.0001, "loss": 1.7724, "step": 3823 }, { "epoch": 0.9286061194754736, "grad_norm": 0.40861770510673523, "learning_rate": 0.0001, "loss": 1.6388, "step": 3824 }, { "epoch": 0.9288489558037882, "grad_norm": 0.3678155839443207, "learning_rate": 0.0001, "loss": 1.5734, "step": 3825 }, { "epoch": 0.9290917921321029, "grad_norm": 0.3855389654636383, "learning_rate": 0.0001, "loss": 1.7397, "step": 3826 }, { "epoch": 0.9293346284604177, "grad_norm": 0.3705948293209076, "learning_rate": 0.0001, "loss": 1.6456, "step": 3827 }, { "epoch": 0.9295774647887324, "grad_norm": 0.3740312159061432, "learning_rate": 0.0001, "loss": 1.7897, "step": 3828 }, { "epoch": 0.9298203011170472, "grad_norm": 0.38665881752967834, "learning_rate": 0.0001, "loss": 1.8266, "step": 3829 }, { "epoch": 0.9300631374453618, "grad_norm": 0.36732590198516846, "learning_rate": 0.0001, "loss": 1.7069, "step": 3830 }, { "epoch": 0.9303059737736765, "grad_norm": 0.37280961871147156, "learning_rate": 0.0001, "loss": 1.8124, "step": 3831 }, { "epoch": 0.9305488101019913, "grad_norm": 0.4017069637775421, "learning_rate": 0.0001, "loss": 1.8979, "step": 3832 }, { "epoch": 0.930791646430306, "grad_norm": 0.3593195378780365, "learning_rate": 0.0001, "loss": 1.7046, "step": 3833 }, { "epoch": 0.9310344827586207, "grad_norm": 0.3705736994743347, "learning_rate": 0.0001, "loss": 1.6845, "step": 3834 }, { "epoch": 0.9312773190869355, "grad_norm": 0.38185861706733704, "learning_rate": 0.0001, "loss": 1.6715, "step": 3835 }, { "epoch": 0.9315201554152501, "grad_norm": 0.37733927369117737, "learning_rate": 0.0001, "loss": 1.7452, "step": 3836 }, { "epoch": 0.9317629917435648, "grad_norm": 0.4084572196006775, "learning_rate": 0.0001, "loss": 1.8061, "step": 3837 }, { "epoch": 0.9320058280718796, "grad_norm": 0.3832477927207947, "learning_rate": 0.0001, "loss": 1.7575, "step": 3838 }, { "epoch": 0.9322486644001943, "grad_norm": 0.4194474518299103, "learning_rate": 0.0001, "loss": 1.8246, "step": 3839 }, { "epoch": 0.932491500728509, "grad_norm": 0.36019614338874817, "learning_rate": 0.0001, "loss": 1.7305, "step": 3840 }, { "epoch": 0.9327343370568237, "grad_norm": 0.3596409857273102, "learning_rate": 0.0001, "loss": 1.6803, "step": 3841 }, { "epoch": 0.9329771733851384, "grad_norm": 0.3968151807785034, "learning_rate": 0.0001, "loss": 1.8563, "step": 3842 }, { "epoch": 0.9332200097134531, "grad_norm": 0.33716362714767456, "learning_rate": 0.0001, "loss": 1.6342, "step": 3843 }, { "epoch": 0.9334628460417679, "grad_norm": 0.3743828535079956, "learning_rate": 0.0001, "loss": 1.7532, "step": 3844 }, { "epoch": 0.9337056823700826, "grad_norm": 0.355603963136673, "learning_rate": 0.0001, "loss": 1.5373, "step": 3845 }, { "epoch": 0.9339485186983972, "grad_norm": 0.36174502968788147, "learning_rate": 0.0001, "loss": 1.6813, "step": 3846 }, { "epoch": 0.934191355026712, "grad_norm": 0.355954110622406, "learning_rate": 0.0001, "loss": 1.5443, "step": 3847 }, { "epoch": 0.9344341913550267, "grad_norm": 0.3647748529911041, "learning_rate": 0.0001, "loss": 1.4645, "step": 3848 }, { "epoch": 0.9346770276833414, "grad_norm": 0.3363376259803772, "learning_rate": 0.0001, "loss": 1.52, "step": 3849 }, { "epoch": 0.9349198640116562, "grad_norm": 0.3913493752479553, "learning_rate": 0.0001, "loss": 1.7376, "step": 3850 }, { "epoch": 0.9351627003399708, "grad_norm": 0.4021153748035431, "learning_rate": 0.0001, "loss": 1.9212, "step": 3851 }, { "epoch": 0.9354055366682856, "grad_norm": 0.3638807237148285, "learning_rate": 0.0001, "loss": 1.7574, "step": 3852 }, { "epoch": 0.9356483729966003, "grad_norm": 0.36912328004837036, "learning_rate": 0.0001, "loss": 1.8145, "step": 3853 }, { "epoch": 0.935891209324915, "grad_norm": 0.3873084485530853, "learning_rate": 0.0001, "loss": 1.7894, "step": 3854 }, { "epoch": 0.9361340456532298, "grad_norm": 0.3698597848415375, "learning_rate": 0.0001, "loss": 1.76, "step": 3855 }, { "epoch": 0.9363768819815445, "grad_norm": 0.37964126467704773, "learning_rate": 0.0001, "loss": 1.8156, "step": 3856 }, { "epoch": 0.9366197183098591, "grad_norm": 0.3631061911582947, "learning_rate": 0.0001, "loss": 1.6366, "step": 3857 }, { "epoch": 0.9368625546381739, "grad_norm": 0.3854769766330719, "learning_rate": 0.0001, "loss": 1.6486, "step": 3858 }, { "epoch": 0.9371053909664886, "grad_norm": 0.3673323094844818, "learning_rate": 0.0001, "loss": 1.741, "step": 3859 }, { "epoch": 0.9373482272948033, "grad_norm": 0.3874039947986603, "learning_rate": 0.0001, "loss": 1.8782, "step": 3860 }, { "epoch": 0.9375910636231181, "grad_norm": 0.3830685615539551, "learning_rate": 0.0001, "loss": 1.7935, "step": 3861 }, { "epoch": 0.9378338999514327, "grad_norm": 0.3593977689743042, "learning_rate": 0.0001, "loss": 1.6822, "step": 3862 }, { "epoch": 0.9380767362797474, "grad_norm": 0.3805672228336334, "learning_rate": 0.0001, "loss": 1.8244, "step": 3863 }, { "epoch": 0.9383195726080622, "grad_norm": 0.35292261838912964, "learning_rate": 0.0001, "loss": 1.6097, "step": 3864 }, { "epoch": 0.9385624089363769, "grad_norm": 0.3597135543823242, "learning_rate": 0.0001, "loss": 1.5708, "step": 3865 }, { "epoch": 0.9388052452646916, "grad_norm": 0.3945610523223877, "learning_rate": 0.0001, "loss": 1.8461, "step": 3866 }, { "epoch": 0.9390480815930063, "grad_norm": 0.36307209730148315, "learning_rate": 0.0001, "loss": 1.655, "step": 3867 }, { "epoch": 0.939290917921321, "grad_norm": 0.3550439178943634, "learning_rate": 0.0001, "loss": 1.616, "step": 3868 }, { "epoch": 0.9395337542496357, "grad_norm": 0.35855671763420105, "learning_rate": 0.0001, "loss": 1.6995, "step": 3869 }, { "epoch": 0.9397765905779505, "grad_norm": 0.3556819260120392, "learning_rate": 0.0001, "loss": 1.486, "step": 3870 }, { "epoch": 0.9400194269062652, "grad_norm": 0.38059186935424805, "learning_rate": 0.0001, "loss": 1.7648, "step": 3871 }, { "epoch": 0.9402622632345798, "grad_norm": 0.35753384232521057, "learning_rate": 0.0001, "loss": 1.5893, "step": 3872 }, { "epoch": 0.9405050995628946, "grad_norm": 0.37142637372016907, "learning_rate": 0.0001, "loss": 1.7485, "step": 3873 }, { "epoch": 0.9407479358912093, "grad_norm": 0.3409467041492462, "learning_rate": 0.0001, "loss": 1.5089, "step": 3874 }, { "epoch": 0.940990772219524, "grad_norm": 0.35744860768318176, "learning_rate": 0.0001, "loss": 1.7779, "step": 3875 }, { "epoch": 0.9412336085478388, "grad_norm": 0.3719422221183777, "learning_rate": 0.0001, "loss": 1.5868, "step": 3876 }, { "epoch": 0.9414764448761535, "grad_norm": 0.3818061649799347, "learning_rate": 0.0001, "loss": 1.7106, "step": 3877 }, { "epoch": 0.9417192812044682, "grad_norm": 0.3610047399997711, "learning_rate": 0.0001, "loss": 1.5557, "step": 3878 }, { "epoch": 0.9419621175327829, "grad_norm": 0.36827659606933594, "learning_rate": 0.0001, "loss": 1.7735, "step": 3879 }, { "epoch": 0.9422049538610976, "grad_norm": 0.3562065660953522, "learning_rate": 0.0001, "loss": 1.6174, "step": 3880 }, { "epoch": 0.9424477901894124, "grad_norm": 0.3670153021812439, "learning_rate": 0.0001, "loss": 1.7448, "step": 3881 }, { "epoch": 0.9426906265177271, "grad_norm": 0.37725064158439636, "learning_rate": 0.0001, "loss": 1.6493, "step": 3882 }, { "epoch": 0.9429334628460417, "grad_norm": 0.3852989375591278, "learning_rate": 0.0001, "loss": 1.8689, "step": 3883 }, { "epoch": 0.9431762991743565, "grad_norm": 0.35966503620147705, "learning_rate": 0.0001, "loss": 1.5603, "step": 3884 }, { "epoch": 0.9434191355026712, "grad_norm": 0.36065688729286194, "learning_rate": 0.0001, "loss": 1.6563, "step": 3885 }, { "epoch": 0.9436619718309859, "grad_norm": 0.37432312965393066, "learning_rate": 0.0001, "loss": 1.7617, "step": 3886 }, { "epoch": 0.9439048081593007, "grad_norm": 0.372538298368454, "learning_rate": 0.0001, "loss": 1.8255, "step": 3887 }, { "epoch": 0.9441476444876153, "grad_norm": 0.3747842013835907, "learning_rate": 0.0001, "loss": 1.8332, "step": 3888 }, { "epoch": 0.94439048081593, "grad_norm": 0.367948055267334, "learning_rate": 0.0001, "loss": 1.7293, "step": 3889 }, { "epoch": 0.9446333171442448, "grad_norm": 0.3862752914428711, "learning_rate": 0.0001, "loss": 1.8169, "step": 3890 }, { "epoch": 0.9448761534725595, "grad_norm": 0.38457968831062317, "learning_rate": 0.0001, "loss": 1.654, "step": 3891 }, { "epoch": 0.9451189898008742, "grad_norm": 0.37712398171424866, "learning_rate": 0.0001, "loss": 1.8021, "step": 3892 }, { "epoch": 0.945361826129189, "grad_norm": 0.36352089047431946, "learning_rate": 0.0001, "loss": 1.6227, "step": 3893 }, { "epoch": 0.9456046624575036, "grad_norm": 0.37528321146965027, "learning_rate": 0.0001, "loss": 1.629, "step": 3894 }, { "epoch": 0.9458474987858183, "grad_norm": 0.3859131634235382, "learning_rate": 0.0001, "loss": 1.7157, "step": 3895 }, { "epoch": 0.9460903351141331, "grad_norm": 0.367913156747818, "learning_rate": 0.0001, "loss": 1.773, "step": 3896 }, { "epoch": 0.9463331714424478, "grad_norm": 0.3919924199581146, "learning_rate": 0.0001, "loss": 1.7986, "step": 3897 }, { "epoch": 0.9465760077707625, "grad_norm": 0.3621267080307007, "learning_rate": 0.0001, "loss": 1.766, "step": 3898 }, { "epoch": 0.9468188440990772, "grad_norm": 0.375805526971817, "learning_rate": 0.0001, "loss": 1.7918, "step": 3899 }, { "epoch": 0.9470616804273919, "grad_norm": 0.38171714544296265, "learning_rate": 0.0001, "loss": 1.779, "step": 3900 }, { "epoch": 0.9473045167557067, "grad_norm": 0.3639489710330963, "learning_rate": 0.0001, "loss": 1.7265, "step": 3901 }, { "epoch": 0.9475473530840214, "grad_norm": 0.3477586805820465, "learning_rate": 0.0001, "loss": 1.6296, "step": 3902 }, { "epoch": 0.9477901894123361, "grad_norm": 0.37721481919288635, "learning_rate": 0.0001, "loss": 1.6934, "step": 3903 }, { "epoch": 0.9480330257406508, "grad_norm": 0.3980064392089844, "learning_rate": 0.0001, "loss": 1.736, "step": 3904 }, { "epoch": 0.9482758620689655, "grad_norm": 0.3610898554325104, "learning_rate": 0.0001, "loss": 1.6393, "step": 3905 }, { "epoch": 0.9485186983972802, "grad_norm": 0.3711906671524048, "learning_rate": 0.0001, "loss": 1.7033, "step": 3906 }, { "epoch": 0.948761534725595, "grad_norm": 0.38572028279304504, "learning_rate": 0.0001, "loss": 1.754, "step": 3907 }, { "epoch": 0.9490043710539097, "grad_norm": 0.36621928215026855, "learning_rate": 0.0001, "loss": 1.6322, "step": 3908 }, { "epoch": 0.9492472073822243, "grad_norm": 0.3782571256160736, "learning_rate": 0.0001, "loss": 1.8551, "step": 3909 }, { "epoch": 0.9494900437105391, "grad_norm": 0.36709949374198914, "learning_rate": 0.0001, "loss": 1.5676, "step": 3910 }, { "epoch": 0.9497328800388538, "grad_norm": 0.38570162653923035, "learning_rate": 0.0001, "loss": 1.7557, "step": 3911 }, { "epoch": 0.9499757163671685, "grad_norm": 0.3895321488380432, "learning_rate": 0.0001, "loss": 1.7039, "step": 3912 }, { "epoch": 0.9502185526954833, "grad_norm": 0.35144907236099243, "learning_rate": 0.0001, "loss": 1.5781, "step": 3913 }, { "epoch": 0.950461389023798, "grad_norm": 0.38092508912086487, "learning_rate": 0.0001, "loss": 1.6042, "step": 3914 }, { "epoch": 0.9507042253521126, "grad_norm": 0.3619476854801178, "learning_rate": 0.0001, "loss": 1.5631, "step": 3915 }, { "epoch": 0.9509470616804274, "grad_norm": 0.40158382058143616, "learning_rate": 0.0001, "loss": 1.7766, "step": 3916 }, { "epoch": 0.9511898980087421, "grad_norm": 0.3472714424133301, "learning_rate": 0.0001, "loss": 1.5479, "step": 3917 }, { "epoch": 0.9514327343370568, "grad_norm": 0.36904680728912354, "learning_rate": 0.0001, "loss": 1.7577, "step": 3918 }, { "epoch": 0.9516755706653716, "grad_norm": 0.3941371440887451, "learning_rate": 0.0001, "loss": 1.8633, "step": 3919 }, { "epoch": 0.9519184069936862, "grad_norm": 0.3843080401420593, "learning_rate": 0.0001, "loss": 1.7322, "step": 3920 }, { "epoch": 0.9521612433220009, "grad_norm": 0.3651866912841797, "learning_rate": 0.0001, "loss": 1.6752, "step": 3921 }, { "epoch": 0.9524040796503157, "grad_norm": 0.38083866238594055, "learning_rate": 0.0001, "loss": 1.6955, "step": 3922 }, { "epoch": 0.9526469159786304, "grad_norm": 0.3733740746974945, "learning_rate": 0.0001, "loss": 1.5993, "step": 3923 }, { "epoch": 0.9528897523069452, "grad_norm": 0.380954384803772, "learning_rate": 0.0001, "loss": 1.7203, "step": 3924 }, { "epoch": 0.9531325886352598, "grad_norm": 0.3862300515174866, "learning_rate": 0.0001, "loss": 1.6818, "step": 3925 }, { "epoch": 0.9533754249635745, "grad_norm": 0.40166255831718445, "learning_rate": 0.0001, "loss": 1.7067, "step": 3926 }, { "epoch": 0.9536182612918893, "grad_norm": 0.3671046495437622, "learning_rate": 0.0001, "loss": 1.6741, "step": 3927 }, { "epoch": 0.953861097620204, "grad_norm": 0.3554973602294922, "learning_rate": 0.0001, "loss": 1.6256, "step": 3928 }, { "epoch": 0.9541039339485187, "grad_norm": 0.3730063736438751, "learning_rate": 0.0001, "loss": 1.7237, "step": 3929 }, { "epoch": 0.9543467702768335, "grad_norm": 0.36285051703453064, "learning_rate": 0.0001, "loss": 1.6649, "step": 3930 }, { "epoch": 0.9545896066051481, "grad_norm": 0.36590075492858887, "learning_rate": 0.0001, "loss": 1.4982, "step": 3931 }, { "epoch": 0.9548324429334628, "grad_norm": 0.38101863861083984, "learning_rate": 0.0001, "loss": 1.7006, "step": 3932 }, { "epoch": 0.9550752792617776, "grad_norm": 0.38884878158569336, "learning_rate": 0.0001, "loss": 1.7229, "step": 3933 }, { "epoch": 0.9553181155900923, "grad_norm": 0.40515512228012085, "learning_rate": 0.0001, "loss": 1.7678, "step": 3934 }, { "epoch": 0.955560951918407, "grad_norm": 0.4170911908149719, "learning_rate": 0.0001, "loss": 1.7991, "step": 3935 }, { "epoch": 0.9558037882467217, "grad_norm": 0.36002835631370544, "learning_rate": 0.0001, "loss": 1.6004, "step": 3936 }, { "epoch": 0.9560466245750364, "grad_norm": 0.38889098167419434, "learning_rate": 0.0001, "loss": 1.51, "step": 3937 }, { "epoch": 0.9562894609033511, "grad_norm": 0.393414169549942, "learning_rate": 0.0001, "loss": 1.6472, "step": 3938 }, { "epoch": 0.9565322972316659, "grad_norm": 0.38613879680633545, "learning_rate": 0.0001, "loss": 1.7916, "step": 3939 }, { "epoch": 0.9567751335599806, "grad_norm": 0.384308785200119, "learning_rate": 0.0001, "loss": 1.8509, "step": 3940 }, { "epoch": 0.9570179698882952, "grad_norm": 0.39423057436943054, "learning_rate": 0.0001, "loss": 1.7192, "step": 3941 }, { "epoch": 0.95726080621661, "grad_norm": 0.3493192195892334, "learning_rate": 0.0001, "loss": 1.4989, "step": 3942 }, { "epoch": 0.9575036425449247, "grad_norm": 0.3633043169975281, "learning_rate": 0.0001, "loss": 1.6156, "step": 3943 }, { "epoch": 0.9577464788732394, "grad_norm": 0.3607250154018402, "learning_rate": 0.0001, "loss": 1.6399, "step": 3944 }, { "epoch": 0.9579893152015542, "grad_norm": 0.3949401080608368, "learning_rate": 0.0001, "loss": 1.7105, "step": 3945 }, { "epoch": 0.9582321515298688, "grad_norm": 0.4055007994174957, "learning_rate": 0.0001, "loss": 1.8245, "step": 3946 }, { "epoch": 0.9584749878581836, "grad_norm": 0.368541955947876, "learning_rate": 0.0001, "loss": 1.7052, "step": 3947 }, { "epoch": 0.9587178241864983, "grad_norm": 0.39234501123428345, "learning_rate": 0.0001, "loss": 1.7882, "step": 3948 }, { "epoch": 0.958960660514813, "grad_norm": 0.37539973855018616, "learning_rate": 0.0001, "loss": 1.768, "step": 3949 }, { "epoch": 0.9592034968431278, "grad_norm": 0.3946327269077301, "learning_rate": 0.0001, "loss": 1.6774, "step": 3950 }, { "epoch": 0.9594463331714425, "grad_norm": 0.399637371301651, "learning_rate": 0.0001, "loss": 1.9732, "step": 3951 }, { "epoch": 0.9596891694997571, "grad_norm": 0.4541732668876648, "learning_rate": 0.0001, "loss": 1.6445, "step": 3952 }, { "epoch": 0.9599320058280719, "grad_norm": 0.3677199184894562, "learning_rate": 0.0001, "loss": 1.6625, "step": 3953 }, { "epoch": 0.9601748421563866, "grad_norm": 0.3860390782356262, "learning_rate": 0.0001, "loss": 1.8102, "step": 3954 }, { "epoch": 0.9604176784847013, "grad_norm": 0.36609208583831787, "learning_rate": 0.0001, "loss": 1.6788, "step": 3955 }, { "epoch": 0.9606605148130161, "grad_norm": 0.39333397150039673, "learning_rate": 0.0001, "loss": 1.6569, "step": 3956 }, { "epoch": 0.9609033511413307, "grad_norm": 0.39077579975128174, "learning_rate": 0.0001, "loss": 1.826, "step": 3957 }, { "epoch": 0.9611461874696454, "grad_norm": 0.37753769755363464, "learning_rate": 0.0001, "loss": 1.6429, "step": 3958 }, { "epoch": 0.9613890237979602, "grad_norm": 0.36394500732421875, "learning_rate": 0.0001, "loss": 1.6673, "step": 3959 }, { "epoch": 0.9616318601262749, "grad_norm": 0.35576146841049194, "learning_rate": 0.0001, "loss": 1.7007, "step": 3960 }, { "epoch": 0.9618746964545896, "grad_norm": 0.35573631525039673, "learning_rate": 0.0001, "loss": 1.6759, "step": 3961 }, { "epoch": 0.9621175327829043, "grad_norm": 0.40379825234413147, "learning_rate": 0.0001, "loss": 1.8536, "step": 3962 }, { "epoch": 0.962360369111219, "grad_norm": 0.35767439007759094, "learning_rate": 0.0001, "loss": 1.6918, "step": 3963 }, { "epoch": 0.9626032054395337, "grad_norm": 0.4079418182373047, "learning_rate": 0.0001, "loss": 1.9354, "step": 3964 }, { "epoch": 0.9628460417678485, "grad_norm": 0.37000563740730286, "learning_rate": 0.0001, "loss": 1.6034, "step": 3965 }, { "epoch": 0.9630888780961632, "grad_norm": 0.36689653992652893, "learning_rate": 0.0001, "loss": 1.6183, "step": 3966 }, { "epoch": 0.9633317144244778, "grad_norm": 0.3575601577758789, "learning_rate": 0.0001, "loss": 1.7348, "step": 3967 }, { "epoch": 0.9635745507527926, "grad_norm": 0.3624182641506195, "learning_rate": 0.0001, "loss": 1.7706, "step": 3968 }, { "epoch": 0.9638173870811073, "grad_norm": 0.3750225603580475, "learning_rate": 0.0001, "loss": 1.8725, "step": 3969 }, { "epoch": 0.9640602234094221, "grad_norm": 0.4041099548339844, "learning_rate": 0.0001, "loss": 1.9005, "step": 3970 }, { "epoch": 0.9643030597377368, "grad_norm": 0.38125571608543396, "learning_rate": 0.0001, "loss": 1.7553, "step": 3971 }, { "epoch": 0.9645458960660515, "grad_norm": 0.3875647783279419, "learning_rate": 0.0001, "loss": 1.8006, "step": 3972 }, { "epoch": 0.9647887323943662, "grad_norm": 0.3712661564350128, "learning_rate": 0.0001, "loss": 1.6987, "step": 3973 }, { "epoch": 0.9650315687226809, "grad_norm": 0.35640186071395874, "learning_rate": 0.0001, "loss": 1.7312, "step": 3974 }, { "epoch": 0.9652744050509956, "grad_norm": 0.3601911962032318, "learning_rate": 0.0001, "loss": 1.74, "step": 3975 }, { "epoch": 0.9655172413793104, "grad_norm": 0.3869849443435669, "learning_rate": 0.0001, "loss": 1.9955, "step": 3976 }, { "epoch": 0.9657600777076251, "grad_norm": 0.38196122646331787, "learning_rate": 0.0001, "loss": 1.8113, "step": 3977 }, { "epoch": 0.9660029140359397, "grad_norm": 0.3775624930858612, "learning_rate": 0.0001, "loss": 1.664, "step": 3978 }, { "epoch": 0.9662457503642545, "grad_norm": 0.3636208176612854, "learning_rate": 0.0001, "loss": 1.7108, "step": 3979 }, { "epoch": 0.9664885866925692, "grad_norm": 0.37071430683135986, "learning_rate": 0.0001, "loss": 1.7082, "step": 3980 }, { "epoch": 0.9667314230208839, "grad_norm": 0.37515515089035034, "learning_rate": 0.0001, "loss": 1.6976, "step": 3981 }, { "epoch": 0.9669742593491987, "grad_norm": 0.3826182186603546, "learning_rate": 0.0001, "loss": 1.6529, "step": 3982 }, { "epoch": 0.9672170956775133, "grad_norm": 0.3860194683074951, "learning_rate": 0.0001, "loss": 1.8768, "step": 3983 }, { "epoch": 0.967459932005828, "grad_norm": 0.4301397502422333, "learning_rate": 0.0001, "loss": 1.8274, "step": 3984 }, { "epoch": 0.9677027683341428, "grad_norm": 0.37735873460769653, "learning_rate": 0.0001, "loss": 1.7217, "step": 3985 }, { "epoch": 0.9679456046624575, "grad_norm": 0.40235722064971924, "learning_rate": 0.0001, "loss": 1.8216, "step": 3986 }, { "epoch": 0.9681884409907722, "grad_norm": 0.38523074984550476, "learning_rate": 0.0001, "loss": 1.7234, "step": 3987 }, { "epoch": 0.968431277319087, "grad_norm": 0.4025649428367615, "learning_rate": 0.0001, "loss": 1.7751, "step": 3988 }, { "epoch": 0.9686741136474016, "grad_norm": 0.3688255250453949, "learning_rate": 0.0001, "loss": 1.7218, "step": 3989 }, { "epoch": 0.9689169499757163, "grad_norm": 0.4351263642311096, "learning_rate": 0.0001, "loss": 1.8507, "step": 3990 }, { "epoch": 0.9691597863040311, "grad_norm": 0.37375950813293457, "learning_rate": 0.0001, "loss": 1.5947, "step": 3991 }, { "epoch": 0.9694026226323458, "grad_norm": 0.39403340220451355, "learning_rate": 0.0001, "loss": 1.7429, "step": 3992 }, { "epoch": 0.9696454589606606, "grad_norm": 0.38074350357055664, "learning_rate": 0.0001, "loss": 1.6262, "step": 3993 }, { "epoch": 0.9698882952889752, "grad_norm": 0.3697483241558075, "learning_rate": 0.0001, "loss": 1.5779, "step": 3994 }, { "epoch": 0.9701311316172899, "grad_norm": 0.3529646694660187, "learning_rate": 0.0001, "loss": 1.5475, "step": 3995 }, { "epoch": 0.9703739679456047, "grad_norm": 0.3757368326187134, "learning_rate": 0.0001, "loss": 1.6681, "step": 3996 }, { "epoch": 0.9706168042739194, "grad_norm": 0.41512370109558105, "learning_rate": 0.0001, "loss": 1.803, "step": 3997 }, { "epoch": 0.9708596406022341, "grad_norm": 0.3747655153274536, "learning_rate": 0.0001, "loss": 1.7778, "step": 3998 }, { "epoch": 0.9711024769305489, "grad_norm": 0.3599449396133423, "learning_rate": 0.0001, "loss": 1.6616, "step": 3999 }, { "epoch": 0.9713453132588635, "grad_norm": 0.39672431349754333, "learning_rate": 0.0001, "loss": 1.8923, "step": 4000 }, { "epoch": 0.9715881495871782, "grad_norm": 0.36832189559936523, "learning_rate": 0.0001, "loss": 1.5115, "step": 4001 }, { "epoch": 0.971830985915493, "grad_norm": 0.37256690859794617, "learning_rate": 0.0001, "loss": 1.8418, "step": 4002 }, { "epoch": 0.9720738222438077, "grad_norm": 0.39560651779174805, "learning_rate": 0.0001, "loss": 1.6514, "step": 4003 }, { "epoch": 0.9723166585721223, "grad_norm": 0.377238005399704, "learning_rate": 0.0001, "loss": 1.7049, "step": 4004 }, { "epoch": 0.9725594949004371, "grad_norm": 0.354621559381485, "learning_rate": 0.0001, "loss": 1.6693, "step": 4005 }, { "epoch": 0.9728023312287518, "grad_norm": 0.368179589509964, "learning_rate": 0.0001, "loss": 1.7526, "step": 4006 }, { "epoch": 0.9730451675570665, "grad_norm": 0.4011801481246948, "learning_rate": 0.0001, "loss": 1.705, "step": 4007 }, { "epoch": 0.9732880038853813, "grad_norm": 0.3765765130519867, "learning_rate": 0.0001, "loss": 1.6606, "step": 4008 }, { "epoch": 0.973530840213696, "grad_norm": 0.38201904296875, "learning_rate": 0.0001, "loss": 1.7826, "step": 4009 }, { "epoch": 0.9737736765420106, "grad_norm": 0.376761257648468, "learning_rate": 0.0001, "loss": 1.5094, "step": 4010 }, { "epoch": 0.9740165128703254, "grad_norm": 0.36620327830314636, "learning_rate": 0.0001, "loss": 1.6652, "step": 4011 }, { "epoch": 0.9742593491986401, "grad_norm": 0.39449259638786316, "learning_rate": 0.0001, "loss": 1.8538, "step": 4012 }, { "epoch": 0.9745021855269548, "grad_norm": 0.4007440507411957, "learning_rate": 0.0001, "loss": 1.7131, "step": 4013 }, { "epoch": 0.9747450218552696, "grad_norm": 0.36265507340431213, "learning_rate": 0.0001, "loss": 1.5114, "step": 4014 }, { "epoch": 0.9749878581835842, "grad_norm": 0.35982275009155273, "learning_rate": 0.0001, "loss": 1.647, "step": 4015 }, { "epoch": 0.975230694511899, "grad_norm": 0.4066230058670044, "learning_rate": 0.0001, "loss": 1.8601, "step": 4016 }, { "epoch": 0.9754735308402137, "grad_norm": 0.3756217658519745, "learning_rate": 0.0001, "loss": 1.6316, "step": 4017 }, { "epoch": 0.9757163671685284, "grad_norm": 0.39345574378967285, "learning_rate": 0.0001, "loss": 1.7711, "step": 4018 }, { "epoch": 0.9759592034968432, "grad_norm": 0.37413719296455383, "learning_rate": 0.0001, "loss": 1.7526, "step": 4019 }, { "epoch": 0.9762020398251579, "grad_norm": 0.37252193689346313, "learning_rate": 0.0001, "loss": 1.6338, "step": 4020 }, { "epoch": 0.9764448761534725, "grad_norm": 0.37571969628334045, "learning_rate": 0.0001, "loss": 1.6935, "step": 4021 }, { "epoch": 0.9766877124817873, "grad_norm": 0.3627742826938629, "learning_rate": 0.0001, "loss": 1.6191, "step": 4022 }, { "epoch": 0.976930548810102, "grad_norm": 0.4085633158683777, "learning_rate": 0.0001, "loss": 1.7949, "step": 4023 }, { "epoch": 0.9771733851384167, "grad_norm": 0.3718832731246948, "learning_rate": 0.0001, "loss": 1.7685, "step": 4024 }, { "epoch": 0.9774162214667315, "grad_norm": 0.3533092439174652, "learning_rate": 0.0001, "loss": 1.5106, "step": 4025 }, { "epoch": 0.9776590577950461, "grad_norm": 0.37770017981529236, "learning_rate": 0.0001, "loss": 1.702, "step": 4026 }, { "epoch": 0.9779018941233608, "grad_norm": 0.37683406472206116, "learning_rate": 0.0001, "loss": 1.7316, "step": 4027 }, { "epoch": 0.9781447304516756, "grad_norm": 0.3591940999031067, "learning_rate": 0.0001, "loss": 1.6739, "step": 4028 }, { "epoch": 0.9783875667799903, "grad_norm": 0.36840593814849854, "learning_rate": 0.0001, "loss": 1.679, "step": 4029 }, { "epoch": 0.978630403108305, "grad_norm": 0.35593312978744507, "learning_rate": 0.0001, "loss": 1.5049, "step": 4030 }, { "epoch": 0.9788732394366197, "grad_norm": 0.3793998062610626, "learning_rate": 0.0001, "loss": 1.6473, "step": 4031 }, { "epoch": 0.9791160757649344, "grad_norm": 0.3793684244155884, "learning_rate": 0.0001, "loss": 1.7173, "step": 4032 }, { "epoch": 0.9793589120932491, "grad_norm": 0.3691100776195526, "learning_rate": 0.0001, "loss": 1.6269, "step": 4033 }, { "epoch": 0.9796017484215639, "grad_norm": 0.3731459379196167, "learning_rate": 0.0001, "loss": 1.6124, "step": 4034 }, { "epoch": 0.9798445847498786, "grad_norm": 0.3664308786392212, "learning_rate": 0.0001, "loss": 1.6795, "step": 4035 }, { "epoch": 0.9800874210781932, "grad_norm": 0.3941652774810791, "learning_rate": 0.0001, "loss": 1.6496, "step": 4036 }, { "epoch": 0.980330257406508, "grad_norm": 0.3622600734233856, "learning_rate": 0.0001, "loss": 1.6641, "step": 4037 }, { "epoch": 0.9805730937348227, "grad_norm": 0.38590314984321594, "learning_rate": 0.0001, "loss": 1.8057, "step": 4038 }, { "epoch": 0.9808159300631375, "grad_norm": 0.4126585125923157, "learning_rate": 0.0001, "loss": 1.9473, "step": 4039 }, { "epoch": 0.9810587663914522, "grad_norm": 0.37464776635169983, "learning_rate": 0.0001, "loss": 1.7544, "step": 4040 }, { "epoch": 0.9813016027197669, "grad_norm": 0.3711651861667633, "learning_rate": 0.0001, "loss": 1.7174, "step": 4041 }, { "epoch": 0.9815444390480816, "grad_norm": 0.36861082911491394, "learning_rate": 0.0001, "loss": 1.6576, "step": 4042 }, { "epoch": 0.9817872753763963, "grad_norm": 0.3674752414226532, "learning_rate": 0.0001, "loss": 1.8249, "step": 4043 }, { "epoch": 0.982030111704711, "grad_norm": 0.3985970616340637, "learning_rate": 0.0001, "loss": 1.7037, "step": 4044 }, { "epoch": 0.9822729480330258, "grad_norm": 0.4111131727695465, "learning_rate": 0.0001, "loss": 1.8352, "step": 4045 }, { "epoch": 0.9825157843613405, "grad_norm": 0.39453092217445374, "learning_rate": 0.0001, "loss": 1.7642, "step": 4046 }, { "epoch": 0.9827586206896551, "grad_norm": 0.37974950671195984, "learning_rate": 0.0001, "loss": 1.6592, "step": 4047 }, { "epoch": 0.9830014570179699, "grad_norm": 0.3616590201854706, "learning_rate": 0.0001, "loss": 1.7328, "step": 4048 }, { "epoch": 0.9832442933462846, "grad_norm": 0.34769493341445923, "learning_rate": 0.0001, "loss": 1.6637, "step": 4049 }, { "epoch": 0.9834871296745993, "grad_norm": 0.36645328998565674, "learning_rate": 0.0001, "loss": 1.7429, "step": 4050 }, { "epoch": 0.9837299660029141, "grad_norm": 0.3879911005496979, "learning_rate": 0.0001, "loss": 1.689, "step": 4051 }, { "epoch": 0.9839728023312287, "grad_norm": 0.3560008108615875, "learning_rate": 0.0001, "loss": 1.55, "step": 4052 }, { "epoch": 0.9842156386595434, "grad_norm": 0.35079431533813477, "learning_rate": 0.0001, "loss": 1.6343, "step": 4053 }, { "epoch": 0.9844584749878582, "grad_norm": 0.3875662684440613, "learning_rate": 0.0001, "loss": 1.6194, "step": 4054 }, { "epoch": 0.9847013113161729, "grad_norm": 0.39650678634643555, "learning_rate": 0.0001, "loss": 1.8524, "step": 4055 }, { "epoch": 0.9849441476444876, "grad_norm": 0.34503695368766785, "learning_rate": 0.0001, "loss": 1.6514, "step": 4056 }, { "epoch": 0.9851869839728024, "grad_norm": 0.3781222403049469, "learning_rate": 0.0001, "loss": 1.8418, "step": 4057 }, { "epoch": 0.985429820301117, "grad_norm": 0.3813125789165497, "learning_rate": 0.0001, "loss": 1.6916, "step": 4058 }, { "epoch": 0.9856726566294317, "grad_norm": 0.3896786570549011, "learning_rate": 0.0001, "loss": 1.7672, "step": 4059 }, { "epoch": 0.9859154929577465, "grad_norm": 0.3770349323749542, "learning_rate": 0.0001, "loss": 1.6771, "step": 4060 }, { "epoch": 0.9861583292860612, "grad_norm": 0.3951159417629242, "learning_rate": 0.0001, "loss": 1.7921, "step": 4061 }, { "epoch": 0.986401165614376, "grad_norm": 0.3489895462989807, "learning_rate": 0.0001, "loss": 1.5675, "step": 4062 }, { "epoch": 0.9866440019426906, "grad_norm": 0.3497614860534668, "learning_rate": 0.0001, "loss": 1.6035, "step": 4063 }, { "epoch": 0.9868868382710053, "grad_norm": 0.3808273375034332, "learning_rate": 0.0001, "loss": 1.7012, "step": 4064 }, { "epoch": 0.9871296745993201, "grad_norm": 0.38426393270492554, "learning_rate": 0.0001, "loss": 1.6891, "step": 4065 }, { "epoch": 0.9873725109276348, "grad_norm": 0.38831883668899536, "learning_rate": 0.0001, "loss": 1.8116, "step": 4066 }, { "epoch": 0.9876153472559495, "grad_norm": 0.3684384524822235, "learning_rate": 0.0001, "loss": 1.6775, "step": 4067 }, { "epoch": 0.9878581835842642, "grad_norm": 0.3812960088253021, "learning_rate": 0.0001, "loss": 1.729, "step": 4068 }, { "epoch": 0.9881010199125789, "grad_norm": 0.35406169295310974, "learning_rate": 0.0001, "loss": 1.4515, "step": 4069 }, { "epoch": 0.9883438562408936, "grad_norm": 0.3798271715641022, "learning_rate": 0.0001, "loss": 1.7221, "step": 4070 }, { "epoch": 0.9885866925692084, "grad_norm": 0.3925783038139343, "learning_rate": 0.0001, "loss": 1.6974, "step": 4071 }, { "epoch": 0.9888295288975231, "grad_norm": 0.3802926540374756, "learning_rate": 0.0001, "loss": 1.5563, "step": 4072 }, { "epoch": 0.9890723652258377, "grad_norm": 0.3390859365463257, "learning_rate": 0.0001, "loss": 1.5779, "step": 4073 }, { "epoch": 0.9893152015541525, "grad_norm": 0.44702109694480896, "learning_rate": 0.0001, "loss": 1.8813, "step": 4074 }, { "epoch": 0.9895580378824672, "grad_norm": 0.37999674677848816, "learning_rate": 0.0001, "loss": 1.6977, "step": 4075 }, { "epoch": 0.9898008742107819, "grad_norm": 0.3615684509277344, "learning_rate": 0.0001, "loss": 1.6233, "step": 4076 }, { "epoch": 0.9900437105390967, "grad_norm": 0.3654703199863434, "learning_rate": 0.0001, "loss": 1.7714, "step": 4077 }, { "epoch": 0.9902865468674114, "grad_norm": 0.3864661157131195, "learning_rate": 0.0001, "loss": 1.7514, "step": 4078 }, { "epoch": 0.990529383195726, "grad_norm": 0.3851757347583771, "learning_rate": 0.0001, "loss": 1.642, "step": 4079 }, { "epoch": 0.9907722195240408, "grad_norm": 0.35797446966171265, "learning_rate": 0.0001, "loss": 1.7346, "step": 4080 }, { "epoch": 0.9910150558523555, "grad_norm": 0.3770246207714081, "learning_rate": 0.0001, "loss": 1.5539, "step": 4081 }, { "epoch": 0.9912578921806702, "grad_norm": 0.3706028461456299, "learning_rate": 0.0001, "loss": 1.6336, "step": 4082 }, { "epoch": 0.991500728508985, "grad_norm": 0.362161248922348, "learning_rate": 0.0001, "loss": 1.6249, "step": 4083 }, { "epoch": 0.9917435648372996, "grad_norm": 0.34968650341033936, "learning_rate": 0.0001, "loss": 1.5998, "step": 4084 }, { "epoch": 0.9919864011656144, "grad_norm": 0.38522493839263916, "learning_rate": 0.0001, "loss": 1.7859, "step": 4085 }, { "epoch": 0.9922292374939291, "grad_norm": 0.3981729745864868, "learning_rate": 0.0001, "loss": 1.6727, "step": 4086 }, { "epoch": 0.9924720738222438, "grad_norm": 0.3760201632976532, "learning_rate": 0.0001, "loss": 1.6928, "step": 4087 }, { "epoch": 0.9927149101505586, "grad_norm": 0.3839087188243866, "learning_rate": 0.0001, "loss": 1.8002, "step": 4088 }, { "epoch": 0.9929577464788732, "grad_norm": 0.4088483452796936, "learning_rate": 0.0001, "loss": 1.8349, "step": 4089 }, { "epoch": 0.9932005828071879, "grad_norm": 0.37059393525123596, "learning_rate": 0.0001, "loss": 1.7424, "step": 4090 }, { "epoch": 0.9934434191355027, "grad_norm": 0.4004857838153839, "learning_rate": 0.0001, "loss": 1.7749, "step": 4091 }, { "epoch": 0.9936862554638174, "grad_norm": 0.37603604793548584, "learning_rate": 0.0001, "loss": 1.624, "step": 4092 }, { "epoch": 0.9939290917921321, "grad_norm": 0.38639020919799805, "learning_rate": 0.0001, "loss": 1.5217, "step": 4093 }, { "epoch": 0.9941719281204469, "grad_norm": 0.3561013340950012, "learning_rate": 0.0001, "loss": 1.6206, "step": 4094 }, { "epoch": 0.9944147644487615, "grad_norm": 0.38830888271331787, "learning_rate": 0.0001, "loss": 1.7821, "step": 4095 }, { "epoch": 0.9946576007770762, "grad_norm": 0.37778744101524353, "learning_rate": 0.0001, "loss": 1.6911, "step": 4096 }, { "epoch": 0.994900437105391, "grad_norm": 0.3863472044467926, "learning_rate": 0.0001, "loss": 1.7793, "step": 4097 }, { "epoch": 0.9951432734337057, "grad_norm": 0.39536169171333313, "learning_rate": 0.0001, "loss": 1.8105, "step": 4098 }, { "epoch": 0.9953861097620204, "grad_norm": 0.38470953702926636, "learning_rate": 0.0001, "loss": 1.6353, "step": 4099 }, { "epoch": 0.9956289460903351, "grad_norm": 0.36688435077667236, "learning_rate": 0.0001, "loss": 1.8089, "step": 4100 }, { "epoch": 0.9958717824186498, "grad_norm": 0.3831002116203308, "learning_rate": 0.0001, "loss": 1.8249, "step": 4101 }, { "epoch": 0.9961146187469645, "grad_norm": 0.3933352530002594, "learning_rate": 0.0001, "loss": 1.7054, "step": 4102 }, { "epoch": 0.9963574550752793, "grad_norm": 0.36921703815460205, "learning_rate": 0.0001, "loss": 1.6879, "step": 4103 }, { "epoch": 0.996600291403594, "grad_norm": 0.39410069584846497, "learning_rate": 0.0001, "loss": 1.7148, "step": 4104 }, { "epoch": 0.9968431277319086, "grad_norm": 0.3676906228065491, "learning_rate": 0.0001, "loss": 1.6285, "step": 4105 }, { "epoch": 0.9970859640602234, "grad_norm": 0.3701580762863159, "learning_rate": 0.0001, "loss": 1.6518, "step": 4106 }, { "epoch": 0.9973288003885381, "grad_norm": 0.36121150851249695, "learning_rate": 0.0001, "loss": 1.6662, "step": 4107 }, { "epoch": 0.9975716367168529, "grad_norm": 0.36982738971710205, "learning_rate": 0.0001, "loss": 1.6054, "step": 4108 }, { "epoch": 0.9978144730451676, "grad_norm": 0.37975645065307617, "learning_rate": 0.0001, "loss": 1.7856, "step": 4109 }, { "epoch": 0.9980573093734822, "grad_norm": 0.3816273808479309, "learning_rate": 0.0001, "loss": 1.5978, "step": 4110 }, { "epoch": 0.998300145701797, "grad_norm": 0.3661021292209625, "learning_rate": 0.0001, "loss": 1.6231, "step": 4111 }, { "epoch": 0.9985429820301117, "grad_norm": 0.3899565041065216, "learning_rate": 0.0001, "loss": 1.7643, "step": 4112 }, { "epoch": 0.9987858183584264, "grad_norm": 0.39457520842552185, "learning_rate": 0.0001, "loss": 1.8735, "step": 4113 }, { "epoch": 0.9990286546867412, "grad_norm": 0.3817576766014099, "learning_rate": 0.0001, "loss": 1.7596, "step": 4114 }, { "epoch": 0.9992714910150559, "grad_norm": 0.4239327609539032, "learning_rate": 0.0001, "loss": 1.9292, "step": 4115 }, { "epoch": 0.9995143273433705, "grad_norm": 0.4147722125053406, "learning_rate": 0.0001, "loss": 1.7747, "step": 4116 }, { "epoch": 0.9997571636716853, "grad_norm": 0.3927810788154602, "learning_rate": 0.0001, "loss": 1.6095, "step": 4117 }, { "epoch": 1.0, "grad_norm": 0.3737819790840149, "learning_rate": 0.0001, "loss": 1.7487, "step": 4118 }, { "epoch": 1.0, "step": 4118, "total_flos": 6.212058917275435e+18, "train_loss": 1.7339236946173118, "train_runtime": 80202.1439, "train_samples_per_second": 0.205, "train_steps_per_second": 0.051 } ], "logging_steps": 1.0, "max_steps": 4118, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 6.212058917275435e+18, "train_batch_size": 2, "trial_name": null, "trial_params": null }