{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9896907216494846, "eval_steps": 500, "global_step": 435, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.006872852233676976, "grad_norm": 0.8808155010538851, "learning_rate": 4.5454545454545455e-06, "loss": 1.3046, "step": 1 }, { "epoch": 0.013745704467353952, "grad_norm": 0.8837076421227394, "learning_rate": 9.090909090909091e-06, "loss": 1.3105, "step": 2 }, { "epoch": 0.020618556701030927, "grad_norm": 0.8883116505084088, "learning_rate": 1.3636363636363637e-05, "loss": 1.3429, "step": 3 }, { "epoch": 0.027491408934707903, "grad_norm": 0.8774062532818568, "learning_rate": 1.8181818181818182e-05, "loss": 1.3362, "step": 4 }, { "epoch": 0.03436426116838488, "grad_norm": 0.8400804960821896, "learning_rate": 2.272727272727273e-05, "loss": 1.2737, "step": 5 }, { "epoch": 0.041237113402061855, "grad_norm": 0.8061300676137277, "learning_rate": 2.7272727272727273e-05, "loss": 1.287, "step": 6 }, { "epoch": 0.048109965635738834, "grad_norm": 0.7020133809368244, "learning_rate": 3.181818181818182e-05, "loss": 1.2053, "step": 7 }, { "epoch": 0.054982817869415807, "grad_norm": 0.5634654476509289, "learning_rate": 3.6363636363636364e-05, "loss": 1.1234, "step": 8 }, { "epoch": 0.061855670103092786, "grad_norm": 0.5203352830419217, "learning_rate": 4.0909090909090915e-05, "loss": 1.08, "step": 9 }, { "epoch": 0.06872852233676977, "grad_norm": 0.5411670544659613, "learning_rate": 4.545454545454546e-05, "loss": 1.0238, "step": 10 }, { "epoch": 0.07560137457044673, "grad_norm": 0.5855321223758054, "learning_rate": 5e-05, "loss": 0.9608, "step": 11 }, { "epoch": 0.08247422680412371, "grad_norm": 0.5791005231127199, "learning_rate": 5.4545454545454546e-05, "loss": 0.8885, "step": 12 }, { "epoch": 0.08934707903780069, "grad_norm": 0.553535114528042, "learning_rate": 5.90909090909091e-05, "loss": 0.8041, "step": 13 }, { "epoch": 0.09621993127147767, "grad_norm": 0.5156049248171256, "learning_rate": 6.363636363636364e-05, "loss": 0.7594, "step": 14 }, { "epoch": 0.10309278350515463, "grad_norm": 0.5150676508038536, "learning_rate": 6.818181818181818e-05, "loss": 0.7018, "step": 15 }, { "epoch": 0.10996563573883161, "grad_norm": 0.43964075096694194, "learning_rate": 7.272727272727273e-05, "loss": 0.6468, "step": 16 }, { "epoch": 0.11683848797250859, "grad_norm": 0.3808296402586853, "learning_rate": 7.727272727272727e-05, "loss": 0.6008, "step": 17 }, { "epoch": 0.12371134020618557, "grad_norm": 0.2512564695769253, "learning_rate": 8.181818181818183e-05, "loss": 0.5707, "step": 18 }, { "epoch": 0.13058419243986255, "grad_norm": 0.2660044368451707, "learning_rate": 8.636363636363637e-05, "loss": 0.5493, "step": 19 }, { "epoch": 0.13745704467353953, "grad_norm": 0.23218268140828116, "learning_rate": 9.090909090909092e-05, "loss": 0.5425, "step": 20 }, { "epoch": 0.14432989690721648, "grad_norm": 0.23816008579498857, "learning_rate": 9.545454545454546e-05, "loss": 0.5089, "step": 21 }, { "epoch": 0.15120274914089346, "grad_norm": 0.2223913690275482, "learning_rate": 0.0001, "loss": 0.5108, "step": 22 }, { "epoch": 0.15807560137457044, "grad_norm": 0.22759090811628024, "learning_rate": 0.00010454545454545455, "loss": 0.531, "step": 23 }, { "epoch": 0.16494845360824742, "grad_norm": 0.20521730744998928, "learning_rate": 0.00010909090909090909, "loss": 0.5149, "step": 24 }, { "epoch": 0.1718213058419244, "grad_norm": 0.18339095123680196, "learning_rate": 0.00011363636363636365, "loss": 0.4889, "step": 25 }, { "epoch": 0.17869415807560138, "grad_norm": 0.1410652471512316, "learning_rate": 0.0001181818181818182, "loss": 0.4817, "step": 26 }, { "epoch": 0.18556701030927836, "grad_norm": 0.14181357870570988, "learning_rate": 0.00012272727272727272, "loss": 0.4693, "step": 27 }, { "epoch": 0.19243986254295534, "grad_norm": 0.13430416072548018, "learning_rate": 0.00012727272727272728, "loss": 0.4608, "step": 28 }, { "epoch": 0.19931271477663232, "grad_norm": 0.11567186906866042, "learning_rate": 0.0001318181818181818, "loss": 0.4484, "step": 29 }, { "epoch": 0.20618556701030927, "grad_norm": 0.12218477154105682, "learning_rate": 0.00013636363636363637, "loss": 0.4569, "step": 30 }, { "epoch": 0.21305841924398625, "grad_norm": 0.12536509367629275, "learning_rate": 0.00014090909090909093, "loss": 0.453, "step": 31 }, { "epoch": 0.21993127147766323, "grad_norm": 0.118995576206575, "learning_rate": 0.00014545454545454546, "loss": 0.4439, "step": 32 }, { "epoch": 0.2268041237113402, "grad_norm": 0.12172401901535913, "learning_rate": 0.00015000000000000001, "loss": 0.4467, "step": 33 }, { "epoch": 0.23367697594501718, "grad_norm": 0.1108164025517821, "learning_rate": 0.00015454545454545454, "loss": 0.458, "step": 34 }, { "epoch": 0.24054982817869416, "grad_norm": 0.11894024672570398, "learning_rate": 0.0001590909090909091, "loss": 0.4625, "step": 35 }, { "epoch": 0.24742268041237114, "grad_norm": 0.10916146722689778, "learning_rate": 0.00016363636363636366, "loss": 0.4353, "step": 36 }, { "epoch": 0.2542955326460481, "grad_norm": 0.10204842768302742, "learning_rate": 0.0001681818181818182, "loss": 0.434, "step": 37 }, { "epoch": 0.2611683848797251, "grad_norm": 0.10533296570452817, "learning_rate": 0.00017272727272727275, "loss": 0.4316, "step": 38 }, { "epoch": 0.26804123711340205, "grad_norm": 0.10077433186165685, "learning_rate": 0.00017727272727272728, "loss": 0.4179, "step": 39 }, { "epoch": 0.27491408934707906, "grad_norm": 0.11807660854841677, "learning_rate": 0.00018181818181818183, "loss": 0.4262, "step": 40 }, { "epoch": 0.281786941580756, "grad_norm": 0.0989056129554379, "learning_rate": 0.00018636363636363636, "loss": 0.4194, "step": 41 }, { "epoch": 0.28865979381443296, "grad_norm": 0.09850476327642269, "learning_rate": 0.00019090909090909092, "loss": 0.4253, "step": 42 }, { "epoch": 0.29553264604810997, "grad_norm": 0.10213513575040335, "learning_rate": 0.00019545454545454548, "loss": 0.4154, "step": 43 }, { "epoch": 0.3024054982817869, "grad_norm": 0.10039613584445682, "learning_rate": 0.0002, "loss": 0.4131, "step": 44 }, { "epoch": 0.30927835051546393, "grad_norm": 0.09548916532972716, "learning_rate": 0.00019999677214588312, "loss": 0.4098, "step": 45 }, { "epoch": 0.3161512027491409, "grad_norm": 0.08875143854200031, "learning_rate": 0.00019998708879191335, "loss": 0.4002, "step": 46 }, { "epoch": 0.3230240549828179, "grad_norm": 0.09227050448571845, "learning_rate": 0.00019997095056321971, "loss": 0.401, "step": 47 }, { "epoch": 0.32989690721649484, "grad_norm": 0.09734533604430338, "learning_rate": 0.00019994835850163924, "loss": 0.4018, "step": 48 }, { "epoch": 0.33676975945017185, "grad_norm": 0.0934892591292568, "learning_rate": 0.00019991931406564944, "loss": 0.4093, "step": 49 }, { "epoch": 0.3436426116838488, "grad_norm": 0.09630034700320117, "learning_rate": 0.00019988381913027442, "loss": 0.4148, "step": 50 }, { "epoch": 0.35051546391752575, "grad_norm": 0.09691841534500147, "learning_rate": 0.00019984187598696363, "loss": 0.4042, "step": 51 }, { "epoch": 0.35738831615120276, "grad_norm": 0.09717400846861948, "learning_rate": 0.00019979348734344398, "loss": 0.3978, "step": 52 }, { "epoch": 0.3642611683848797, "grad_norm": 0.08983890835940804, "learning_rate": 0.00019973865632354516, "loss": 0.3996, "step": 53 }, { "epoch": 0.3711340206185567, "grad_norm": 0.08949834924068677, "learning_rate": 0.0001996773864669978, "loss": 0.394, "step": 54 }, { "epoch": 0.37800687285223367, "grad_norm": 0.09266153338513465, "learning_rate": 0.00019960968172920516, "loss": 0.3788, "step": 55 }, { "epoch": 0.3848797250859107, "grad_norm": 0.09025552188224956, "learning_rate": 0.00019953554648098748, "loss": 0.4041, "step": 56 }, { "epoch": 0.3917525773195876, "grad_norm": 0.08488224181630279, "learning_rate": 0.0001994549855083001, "loss": 0.3848, "step": 57 }, { "epoch": 0.39862542955326463, "grad_norm": 0.09482153746102255, "learning_rate": 0.0001993680040119244, "loss": 0.3959, "step": 58 }, { "epoch": 0.4054982817869416, "grad_norm": 0.09328848987824308, "learning_rate": 0.00019927460760713197, "loss": 0.3851, "step": 59 }, { "epoch": 0.41237113402061853, "grad_norm": 0.08618631131441057, "learning_rate": 0.00019917480232332224, "loss": 0.389, "step": 60 }, { "epoch": 0.41924398625429554, "grad_norm": 0.08833677141910345, "learning_rate": 0.00019906859460363307, "loss": 0.397, "step": 61 }, { "epoch": 0.4261168384879725, "grad_norm": 0.08599936842298324, "learning_rate": 0.00019895599130452505, "loss": 0.3832, "step": 62 }, { "epoch": 0.4329896907216495, "grad_norm": 0.09103734223309105, "learning_rate": 0.0001988369996953386, "loss": 0.3932, "step": 63 }, { "epoch": 0.43986254295532645, "grad_norm": 0.0897659233390075, "learning_rate": 0.00019871162745782478, "loss": 0.3815, "step": 64 }, { "epoch": 0.44673539518900346, "grad_norm": 0.10726255183504214, "learning_rate": 0.00019857988268564953, "loss": 0.3832, "step": 65 }, { "epoch": 0.4536082474226804, "grad_norm": 0.08817755172035288, "learning_rate": 0.0001984417738838709, "loss": 0.3795, "step": 66 }, { "epoch": 0.46048109965635736, "grad_norm": 0.09345043988393144, "learning_rate": 0.0001982973099683902, "loss": 0.3815, "step": 67 }, { "epoch": 0.46735395189003437, "grad_norm": 0.09305840536421162, "learning_rate": 0.0001981465002653763, "loss": 0.4007, "step": 68 }, { "epoch": 0.4742268041237113, "grad_norm": 0.09607476739884908, "learning_rate": 0.00019798935451066361, "loss": 0.3809, "step": 69 }, { "epoch": 0.48109965635738833, "grad_norm": 0.0901546218853118, "learning_rate": 0.0001978258828491236, "loss": 0.3943, "step": 70 }, { "epoch": 0.4879725085910653, "grad_norm": 0.0894295974911312, "learning_rate": 0.00019765609583400977, "loss": 0.3871, "step": 71 }, { "epoch": 0.4948453608247423, "grad_norm": 0.09070234315219955, "learning_rate": 0.0001974800044262764, "loss": 0.3829, "step": 72 }, { "epoch": 0.5017182130584192, "grad_norm": 0.09202244079027587, "learning_rate": 0.00019729761999387103, "loss": 0.3986, "step": 73 }, { "epoch": 0.5085910652920962, "grad_norm": 0.09632314725424158, "learning_rate": 0.00019710895431100046, "loss": 0.3898, "step": 74 }, { "epoch": 0.5154639175257731, "grad_norm": 0.08733172925389271, "learning_rate": 0.00019691401955737072, "loss": 0.3837, "step": 75 }, { "epoch": 0.5223367697594502, "grad_norm": 0.10056720034956684, "learning_rate": 0.00019671282831740076, "loss": 0.3686, "step": 76 }, { "epoch": 0.5292096219931272, "grad_norm": 0.09612617875511892, "learning_rate": 0.00019650539357941003, "loss": 0.3838, "step": 77 }, { "epoch": 0.5360824742268041, "grad_norm": 0.10208272966951391, "learning_rate": 0.00019629172873477995, "loss": 0.3838, "step": 78 }, { "epoch": 0.5429553264604811, "grad_norm": 0.09681279787064641, "learning_rate": 0.00019607184757708951, "loss": 0.3749, "step": 79 }, { "epoch": 0.5498281786941581, "grad_norm": 0.09211778867112251, "learning_rate": 0.00019584576430122473, "loss": 0.3687, "step": 80 }, { "epoch": 0.5567010309278351, "grad_norm": 0.09404914729887474, "learning_rate": 0.00019561349350246226, "loss": 0.3843, "step": 81 }, { "epoch": 0.563573883161512, "grad_norm": 0.08829555711343479, "learning_rate": 0.00019537505017552716, "loss": 0.3593, "step": 82 }, { "epoch": 0.570446735395189, "grad_norm": 0.08892007285913149, "learning_rate": 0.00019513044971362494, "loss": 0.3606, "step": 83 }, { "epoch": 0.5773195876288659, "grad_norm": 0.08928642771485325, "learning_rate": 0.00019487970790744774, "loss": 0.3768, "step": 84 }, { "epoch": 0.584192439862543, "grad_norm": 0.09522002456888026, "learning_rate": 0.000194622840944155, "loss": 0.3722, "step": 85 }, { "epoch": 0.5910652920962199, "grad_norm": 0.0909488382487123, "learning_rate": 0.00019435986540632843, "loss": 0.3708, "step": 86 }, { "epoch": 0.5979381443298969, "grad_norm": 0.09831046466987592, "learning_rate": 0.00019409079827090145, "loss": 0.3638, "step": 87 }, { "epoch": 0.6048109965635738, "grad_norm": 0.0983620918050961, "learning_rate": 0.00019381565690806328, "loss": 0.3601, "step": 88 }, { "epoch": 0.6116838487972509, "grad_norm": 0.09644986661420377, "learning_rate": 0.00019353445908013755, "loss": 0.373, "step": 89 }, { "epoch": 0.6185567010309279, "grad_norm": 0.09539056648598977, "learning_rate": 0.00019324722294043558, "loss": 0.3817, "step": 90 }, { "epoch": 0.6254295532646048, "grad_norm": 0.08885977439706004, "learning_rate": 0.00019295396703208453, "loss": 0.3605, "step": 91 }, { "epoch": 0.6323024054982818, "grad_norm": 0.08772187936350022, "learning_rate": 0.00019265471028683014, "loss": 0.3624, "step": 92 }, { "epoch": 0.6391752577319587, "grad_norm": 0.09711808830918083, "learning_rate": 0.00019234947202381486, "loss": 0.3715, "step": 93 }, { "epoch": 0.6460481099656358, "grad_norm": 0.0847748744771232, "learning_rate": 0.00019203827194833026, "loss": 0.384, "step": 94 }, { "epoch": 0.6529209621993127, "grad_norm": 0.09372490667720777, "learning_rate": 0.00019172113015054532, "loss": 0.3715, "step": 95 }, { "epoch": 0.6597938144329897, "grad_norm": 0.08794740597609421, "learning_rate": 0.00019139806710420914, "loss": 0.3674, "step": 96 }, { "epoch": 0.6666666666666666, "grad_norm": 0.08904937124808622, "learning_rate": 0.00019106910366532942, "loss": 0.3523, "step": 97 }, { "epoch": 0.6735395189003437, "grad_norm": 0.09441908555917516, "learning_rate": 0.000190734261070826, "loss": 0.3591, "step": 98 }, { "epoch": 0.6804123711340206, "grad_norm": 0.08942655574279681, "learning_rate": 0.00019039356093715975, "loss": 0.363, "step": 99 }, { "epoch": 0.6872852233676976, "grad_norm": 0.09460143758744942, "learning_rate": 0.00019004702525893732, "loss": 0.3748, "step": 100 }, { "epoch": 0.6941580756013745, "grad_norm": 0.09082033159745441, "learning_rate": 0.000189694676407491, "loss": 0.3566, "step": 101 }, { "epoch": 0.7010309278350515, "grad_norm": 0.08958646989363664, "learning_rate": 0.0001893365371294346, "loss": 0.3583, "step": 102 }, { "epoch": 0.7079037800687286, "grad_norm": 0.08849903305951616, "learning_rate": 0.00018897263054519498, "loss": 0.3524, "step": 103 }, { "epoch": 0.7147766323024055, "grad_norm": 0.08872183059116338, "learning_rate": 0.00018860298014751944, "loss": 0.3782, "step": 104 }, { "epoch": 0.7216494845360825, "grad_norm": 0.08675401659801961, "learning_rate": 0.0001882276097999592, "loss": 0.3653, "step": 105 }, { "epoch": 0.7285223367697594, "grad_norm": 0.08529423927605412, "learning_rate": 0.00018784654373532866, "loss": 0.352, "step": 106 }, { "epoch": 0.7353951890034365, "grad_norm": 0.09349610844394966, "learning_rate": 0.00018745980655414114, "loss": 0.3779, "step": 107 }, { "epoch": 0.7422680412371134, "grad_norm": 0.08828855293867635, "learning_rate": 0.00018706742322302064, "loss": 0.3773, "step": 108 }, { "epoch": 0.7491408934707904, "grad_norm": 0.08977338477187427, "learning_rate": 0.00018666941907309026, "loss": 0.3651, "step": 109 }, { "epoch": 0.7560137457044673, "grad_norm": 0.0892290194907705, "learning_rate": 0.0001862658197983366, "loss": 0.3626, "step": 110 }, { "epoch": 0.7628865979381443, "grad_norm": 0.0856666054934264, "learning_rate": 0.0001858566514539513, "loss": 0.348, "step": 111 }, { "epoch": 0.7697594501718213, "grad_norm": 0.09180917706699249, "learning_rate": 0.00018544194045464886, "loss": 0.3669, "step": 112 }, { "epoch": 0.7766323024054983, "grad_norm": 0.09276555250612153, "learning_rate": 0.00018502171357296144, "loss": 0.3628, "step": 113 }, { "epoch": 0.7835051546391752, "grad_norm": 0.09289466268533818, "learning_rate": 0.0001845959979375104, "loss": 0.3526, "step": 114 }, { "epoch": 0.7903780068728522, "grad_norm": 0.09726599258876537, "learning_rate": 0.00018416482103125506, "loss": 0.3704, "step": 115 }, { "epoch": 0.7972508591065293, "grad_norm": 0.08564494364397046, "learning_rate": 0.0001837282106897185, "loss": 0.3553, "step": 116 }, { "epoch": 0.8041237113402062, "grad_norm": 0.08954111754416803, "learning_rate": 0.00018328619509919044, "loss": 0.3491, "step": 117 }, { "epoch": 0.8109965635738832, "grad_norm": 0.0913094403572883, "learning_rate": 0.0001828388027949078, "loss": 0.3637, "step": 118 }, { "epoch": 0.8178694158075601, "grad_norm": 0.08662716026489146, "learning_rate": 0.00018238606265921238, "loss": 0.3537, "step": 119 }, { "epoch": 0.8247422680412371, "grad_norm": 0.08813455001635272, "learning_rate": 0.00018192800391968642, "loss": 0.3691, "step": 120 }, { "epoch": 0.8316151202749141, "grad_norm": 0.09371831479415668, "learning_rate": 0.00018146465614726567, "loss": 0.3707, "step": 121 }, { "epoch": 0.8384879725085911, "grad_norm": 0.08359086307507001, "learning_rate": 0.00018099604925433043, "loss": 0.3645, "step": 122 }, { "epoch": 0.845360824742268, "grad_norm": 0.09216880844230459, "learning_rate": 0.00018052221349277442, "loss": 0.3565, "step": 123 }, { "epoch": 0.852233676975945, "grad_norm": 0.08471708919906472, "learning_rate": 0.00018004317945205197, "loss": 0.3545, "step": 124 }, { "epoch": 0.8591065292096219, "grad_norm": 0.08703130501246686, "learning_rate": 0.0001795589780572031, "loss": 0.3639, "step": 125 }, { "epoch": 0.865979381443299, "grad_norm": 0.09057730560122727, "learning_rate": 0.00017906964056685706, "loss": 0.3507, "step": 126 }, { "epoch": 0.872852233676976, "grad_norm": 0.09292205553606385, "learning_rate": 0.00017857519857121458, "loss": 0.3659, "step": 127 }, { "epoch": 0.8797250859106529, "grad_norm": 0.0849541320516587, "learning_rate": 0.00017807568399000822, "loss": 0.3501, "step": 128 }, { "epoch": 0.8865979381443299, "grad_norm": 0.09274091459468825, "learning_rate": 0.000177571129070442, "loss": 0.3537, "step": 129 }, { "epoch": 0.8934707903780069, "grad_norm": 0.09037805593626291, "learning_rate": 0.0001770615663851093, "loss": 0.3615, "step": 130 }, { "epoch": 0.9003436426116839, "grad_norm": 0.08501414876879057, "learning_rate": 0.0001765470288298905, "loss": 0.364, "step": 131 }, { "epoch": 0.9072164948453608, "grad_norm": 0.0873618917017783, "learning_rate": 0.0001760275496218288, "loss": 0.3661, "step": 132 }, { "epoch": 0.9140893470790378, "grad_norm": 0.09203391208780351, "learning_rate": 0.0001755031622969862, "loss": 0.3619, "step": 133 }, { "epoch": 0.9209621993127147, "grad_norm": 0.08308747880797346, "learning_rate": 0.00017497390070827848, "loss": 0.3467, "step": 134 }, { "epoch": 0.9278350515463918, "grad_norm": 0.09386492668545833, "learning_rate": 0.00017443979902328956, "loss": 0.36, "step": 135 }, { "epoch": 0.9347079037800687, "grad_norm": 0.09261241215288052, "learning_rate": 0.00017390089172206592, "loss": 0.3609, "step": 136 }, { "epoch": 0.9415807560137457, "grad_norm": 0.08879952645877409, "learning_rate": 0.00017335721359489057, "loss": 0.3603, "step": 137 }, { "epoch": 0.9484536082474226, "grad_norm": 0.08265878491768738, "learning_rate": 0.00017280879974003707, "loss": 0.3566, "step": 138 }, { "epoch": 0.9553264604810997, "grad_norm": 0.08716647280731805, "learning_rate": 0.0001722556855615039, "loss": 0.3519, "step": 139 }, { "epoch": 0.9621993127147767, "grad_norm": 0.0922933236258279, "learning_rate": 0.00017169790676672858, "loss": 0.3552, "step": 140 }, { "epoch": 0.9690721649484536, "grad_norm": 0.08888211633055264, "learning_rate": 0.0001711354993642827, "loss": 0.3461, "step": 141 }, { "epoch": 0.9759450171821306, "grad_norm": 0.08595044701350857, "learning_rate": 0.0001705684996615472, "loss": 0.3651, "step": 142 }, { "epoch": 0.9828178694158075, "grad_norm": 0.0863466471667844, "learning_rate": 0.0001699969442623686, "loss": 0.3601, "step": 143 }, { "epoch": 0.9896907216494846, "grad_norm": 0.09313568523721967, "learning_rate": 0.00016942087006469592, "loss": 0.3569, "step": 144 }, { "epoch": 0.9965635738831615, "grad_norm": 0.0903996011360782, "learning_rate": 0.00016884031425819853, "loss": 0.3481, "step": 145 }, { "epoch": 0.9965635738831615, "eval_loss": 0.3567394018173218, "eval_runtime": 35.6889, "eval_samples_per_second": 27.432, "eval_steps_per_second": 0.869, "step": 145 }, { "epoch": 1.0034364261168385, "grad_norm": 0.08782681595861347, "learning_rate": 0.00016825531432186543, "loss": 0.3375, "step": 146 }, { "epoch": 1.0103092783505154, "grad_norm": 0.08803384953829434, "learning_rate": 0.00016766590802158566, "loss": 0.3619, "step": 147 }, { "epoch": 1.0171821305841924, "grad_norm": 0.0816994830925102, "learning_rate": 0.0001670721334077103, "loss": 0.3466, "step": 148 }, { "epoch": 1.0240549828178693, "grad_norm": 0.0827010426630684, "learning_rate": 0.00016647402881259598, "loss": 0.3312, "step": 149 }, { "epoch": 1.0309278350515463, "grad_norm": 0.09258788228951616, "learning_rate": 0.00016587163284813032, "loss": 0.3414, "step": 150 }, { "epoch": 1.0378006872852235, "grad_norm": 0.09085251831889371, "learning_rate": 0.00016526498440323914, "loss": 0.3389, "step": 151 }, { "epoch": 1.0446735395189004, "grad_norm": 0.09192659698142644, "learning_rate": 0.0001646541226413761, "loss": 0.351, "step": 152 }, { "epoch": 1.0515463917525774, "grad_norm": 0.08501861411219969, "learning_rate": 0.00016403908699799425, "loss": 0.3368, "step": 153 }, { "epoch": 1.0584192439862543, "grad_norm": 0.0908688248172718, "learning_rate": 0.00016341991717800023, "loss": 0.3392, "step": 154 }, { "epoch": 1.0652920962199313, "grad_norm": 0.0862317294687601, "learning_rate": 0.00016279665315319114, "loss": 0.3419, "step": 155 }, { "epoch": 1.0721649484536082, "grad_norm": 0.09549298527828191, "learning_rate": 0.0001621693351596739, "loss": 0.3326, "step": 156 }, { "epoch": 1.0790378006872852, "grad_norm": 0.08932594426411528, "learning_rate": 0.00016153800369526788, "loss": 0.3421, "step": 157 }, { "epoch": 1.0859106529209621, "grad_norm": 0.08944710132986658, "learning_rate": 0.0001609026995168904, "loss": 0.3352, "step": 158 }, { "epoch": 1.0927835051546393, "grad_norm": 0.08841072232596323, "learning_rate": 0.00016026346363792567, "loss": 0.3462, "step": 159 }, { "epoch": 1.0996563573883162, "grad_norm": 0.08644545934177786, "learning_rate": 0.00015962033732557686, "loss": 0.3333, "step": 160 }, { "epoch": 1.1065292096219932, "grad_norm": 0.08575048202262156, "learning_rate": 0.00015897336209820239, "loss": 0.337, "step": 161 }, { "epoch": 1.1134020618556701, "grad_norm": 0.08846069353271505, "learning_rate": 0.00015832257972263523, "loss": 0.335, "step": 162 }, { "epoch": 1.120274914089347, "grad_norm": 0.08639733609654054, "learning_rate": 0.00015766803221148673, "loss": 0.3377, "step": 163 }, { "epoch": 1.127147766323024, "grad_norm": 0.08952723466390139, "learning_rate": 0.0001570097618204345, "loss": 0.3313, "step": 164 }, { "epoch": 1.134020618556701, "grad_norm": 0.09160126155421501, "learning_rate": 0.00015634781104549442, "loss": 0.3374, "step": 165 }, { "epoch": 1.140893470790378, "grad_norm": 0.0861415865118192, "learning_rate": 0.00015568222262027717, "loss": 0.3385, "step": 166 }, { "epoch": 1.147766323024055, "grad_norm": 0.0892272267738352, "learning_rate": 0.00015501303951322943, "loss": 0.3347, "step": 167 }, { "epoch": 1.1546391752577319, "grad_norm": 0.08908420140050345, "learning_rate": 0.00015434030492486023, "loss": 0.3304, "step": 168 }, { "epoch": 1.161512027491409, "grad_norm": 0.08770848161332553, "learning_rate": 0.00015366406228495172, "loss": 0.3295, "step": 169 }, { "epoch": 1.168384879725086, "grad_norm": 0.08965869421343549, "learning_rate": 0.00015298435524975572, "loss": 0.3359, "step": 170 }, { "epoch": 1.175257731958763, "grad_norm": 0.08642899060579919, "learning_rate": 0.00015230122769917527, "loss": 0.334, "step": 171 }, { "epoch": 1.1821305841924399, "grad_norm": 0.08975773852908198, "learning_rate": 0.00015161472373393186, "loss": 0.3398, "step": 172 }, { "epoch": 1.1890034364261168, "grad_norm": 0.08518298546780753, "learning_rate": 0.00015092488767271857, "loss": 0.3313, "step": 173 }, { "epoch": 1.1958762886597938, "grad_norm": 0.08941986976791949, "learning_rate": 0.00015023176404933874, "loss": 0.3253, "step": 174 }, { "epoch": 1.2027491408934707, "grad_norm": 0.09160487176057319, "learning_rate": 0.00014953539760983122, "loss": 0.3379, "step": 175 }, { "epoch": 1.2096219931271477, "grad_norm": 0.0849090301975046, "learning_rate": 0.0001488358333095816, "loss": 0.3296, "step": 176 }, { "epoch": 1.2164948453608249, "grad_norm": 0.09285043330112927, "learning_rate": 0.00014813311631041995, "loss": 0.3364, "step": 177 }, { "epoch": 1.2233676975945018, "grad_norm": 0.0897620035811687, "learning_rate": 0.00014742729197770552, "loss": 0.3354, "step": 178 }, { "epoch": 1.2302405498281788, "grad_norm": 0.08914984409937951, "learning_rate": 0.00014671840587739783, "loss": 0.3458, "step": 179 }, { "epoch": 1.2371134020618557, "grad_norm": 0.09040579690749873, "learning_rate": 0.00014600650377311522, "loss": 0.3403, "step": 180 }, { "epoch": 1.2439862542955327, "grad_norm": 0.08605565292313601, "learning_rate": 0.0001452916316231805, "loss": 0.331, "step": 181 }, { "epoch": 1.2508591065292096, "grad_norm": 0.08979636897760025, "learning_rate": 0.00014457383557765386, "loss": 0.3311, "step": 182 }, { "epoch": 1.2577319587628866, "grad_norm": 0.08586694856320021, "learning_rate": 0.00014385316197535372, "loss": 0.3426, "step": 183 }, { "epoch": 1.2646048109965635, "grad_norm": 0.09048157344056702, "learning_rate": 0.00014312965734086518, "loss": 0.3383, "step": 184 }, { "epoch": 1.2714776632302405, "grad_norm": 0.09075786427342458, "learning_rate": 0.0001424033683815365, "loss": 0.336, "step": 185 }, { "epoch": 1.2783505154639174, "grad_norm": 0.0879839237048081, "learning_rate": 0.00014167434198446383, "loss": 0.337, "step": 186 }, { "epoch": 1.2852233676975944, "grad_norm": 0.08693212719565475, "learning_rate": 0.00014094262521346427, "loss": 0.3253, "step": 187 }, { "epoch": 1.2920962199312716, "grad_norm": 0.0885405535824916, "learning_rate": 0.00014020826530603776, "loss": 0.3305, "step": 188 }, { "epoch": 1.2989690721649485, "grad_norm": 0.0904204838771864, "learning_rate": 0.00013947130967031717, "loss": 0.3413, "step": 189 }, { "epoch": 1.3058419243986255, "grad_norm": 0.08974576533367018, "learning_rate": 0.00013873180588200827, "loss": 0.3393, "step": 190 }, { "epoch": 1.3127147766323024, "grad_norm": 0.09291805913909876, "learning_rate": 0.00013798980168131794, "loss": 0.3389, "step": 191 }, { "epoch": 1.3195876288659794, "grad_norm": 0.09044119443531376, "learning_rate": 0.00013724534496987247, "loss": 0.3341, "step": 192 }, { "epoch": 1.3264604810996563, "grad_norm": 0.09284731464676423, "learning_rate": 0.00013649848380762513, "loss": 0.3332, "step": 193 }, { "epoch": 1.3333333333333333, "grad_norm": 0.09496521530156195, "learning_rate": 0.0001357492664097534, "loss": 0.3418, "step": 194 }, { "epoch": 1.3402061855670104, "grad_norm": 0.09300741794557242, "learning_rate": 0.00013499774114354655, "loss": 0.335, "step": 195 }, { "epoch": 1.3470790378006874, "grad_norm": 0.09064966485662805, "learning_rate": 0.0001342439565252831, "loss": 0.324, "step": 196 }, { "epoch": 1.3539518900343643, "grad_norm": 0.0964677730019416, "learning_rate": 0.00013348796121709862, "loss": 0.3336, "step": 197 }, { "epoch": 1.3608247422680413, "grad_norm": 0.09295059509899933, "learning_rate": 0.0001327298040238446, "loss": 0.3339, "step": 198 }, { "epoch": 1.3676975945017182, "grad_norm": 0.09314673210920393, "learning_rate": 0.00013196953388993726, "loss": 0.3286, "step": 199 }, { "epoch": 1.3745704467353952, "grad_norm": 0.0889292114826813, "learning_rate": 0.00013120719989619833, "loss": 0.3358, "step": 200 }, { "epoch": 1.3814432989690721, "grad_norm": 0.09124463200105672, "learning_rate": 0.00013044285125668614, "loss": 0.3438, "step": 201 }, { "epoch": 1.388316151202749, "grad_norm": 0.08897358501358303, "learning_rate": 0.0001296765373155188, "loss": 0.3411, "step": 202 }, { "epoch": 1.395189003436426, "grad_norm": 0.09041374802400134, "learning_rate": 0.00012890830754368855, "loss": 0.3398, "step": 203 }, { "epoch": 1.402061855670103, "grad_norm": 0.08521282966783036, "learning_rate": 0.0001281382115358679, "loss": 0.3374, "step": 204 }, { "epoch": 1.40893470790378, "grad_norm": 0.08813226433937452, "learning_rate": 0.0001273662990072083, "loss": 0.3357, "step": 205 }, { "epoch": 1.4158075601374571, "grad_norm": 0.08745232388121076, "learning_rate": 0.00012659261979013043, "loss": 0.3222, "step": 206 }, { "epoch": 1.422680412371134, "grad_norm": 0.0898416285395422, "learning_rate": 0.00012581722383110718, "loss": 0.3352, "step": 207 }, { "epoch": 1.429553264604811, "grad_norm": 0.08787574891295864, "learning_rate": 0.00012504016118743935, "loss": 0.3386, "step": 208 }, { "epoch": 1.436426116838488, "grad_norm": 0.08541270802176625, "learning_rate": 0.00012426148202402404, "loss": 0.3434, "step": 209 }, { "epoch": 1.443298969072165, "grad_norm": 0.08637908829018968, "learning_rate": 0.00012348123661011601, "loss": 0.3317, "step": 210 }, { "epoch": 1.4501718213058419, "grad_norm": 0.08749867010652411, "learning_rate": 0.00012269947531608276, "loss": 0.3446, "step": 211 }, { "epoch": 1.4570446735395188, "grad_norm": 0.08640325511022803, "learning_rate": 0.00012191624861015254, "loss": 0.343, "step": 212 }, { "epoch": 1.463917525773196, "grad_norm": 0.09199913362313598, "learning_rate": 0.00012113160705515625, "loss": 0.3385, "step": 213 }, { "epoch": 1.470790378006873, "grad_norm": 0.08992246148202934, "learning_rate": 0.0001203456013052634, "loss": 0.3376, "step": 214 }, { "epoch": 1.47766323024055, "grad_norm": 0.09160087841822283, "learning_rate": 0.00011955828210271187, "loss": 0.3403, "step": 215 }, { "epoch": 1.4845360824742269, "grad_norm": 0.0887799155644521, "learning_rate": 0.00011876970027453222, "loss": 0.3303, "step": 216 }, { "epoch": 1.4914089347079038, "grad_norm": 0.08886359485860894, "learning_rate": 0.00011797990672926652, "loss": 0.3344, "step": 217 }, { "epoch": 1.4982817869415808, "grad_norm": 0.09255091225198908, "learning_rate": 0.00011718895245368167, "loss": 0.328, "step": 218 }, { "epoch": 1.5051546391752577, "grad_norm": 0.08667303799347106, "learning_rate": 0.00011639688850947799, "loss": 0.3315, "step": 219 }, { "epoch": 1.5120274914089347, "grad_norm": 0.09175513887904742, "learning_rate": 0.00011560376602999272, "loss": 0.3387, "step": 220 }, { "epoch": 1.5189003436426116, "grad_norm": 0.09367250024088228, "learning_rate": 0.00011480963621689905, "loss": 0.3478, "step": 221 }, { "epoch": 1.5257731958762886, "grad_norm": 0.09109402811269762, "learning_rate": 0.00011401455033690076, "loss": 0.3315, "step": 222 }, { "epoch": 1.5326460481099655, "grad_norm": 0.08760164800021272, "learning_rate": 0.00011321855971842243, "loss": 0.3261, "step": 223 }, { "epoch": 1.5395189003436425, "grad_norm": 0.08849686044274614, "learning_rate": 0.00011242171574829599, "loss": 0.3268, "step": 224 }, { "epoch": 1.5463917525773194, "grad_norm": 0.09158926339030027, "learning_rate": 0.00011162406986844323, "loss": 0.3315, "step": 225 }, { "epoch": 1.5532646048109966, "grad_norm": 0.09713922336845163, "learning_rate": 0.00011082567357255484, "loss": 0.3343, "step": 226 }, { "epoch": 1.5601374570446735, "grad_norm": 0.0898092630687934, "learning_rate": 0.00011002657840276627, "loss": 0.3483, "step": 227 }, { "epoch": 1.5670103092783505, "grad_norm": 0.0874368101733527, "learning_rate": 0.00010922683594633021, "loss": 0.3315, "step": 228 }, { "epoch": 1.5738831615120275, "grad_norm": 0.0935456185158146, "learning_rate": 0.00010842649783228624, "loss": 0.3268, "step": 229 }, { "epoch": 1.5807560137457046, "grad_norm": 0.09041381874473402, "learning_rate": 0.00010762561572812788, "loss": 0.3362, "step": 230 }, { "epoch": 1.5876288659793816, "grad_norm": 0.09201387286010286, "learning_rate": 0.0001068242413364671, "loss": 0.3376, "step": 231 }, { "epoch": 1.5945017182130585, "grad_norm": 0.08811093497928178, "learning_rate": 0.00010602242639169648, "loss": 0.3436, "step": 232 }, { "epoch": 1.6013745704467355, "grad_norm": 0.08723599950744933, "learning_rate": 0.0001052202226566494, "loss": 0.3406, "step": 233 }, { "epoch": 1.6082474226804124, "grad_norm": 0.08674469336006953, "learning_rate": 0.00010441768191925847, "loss": 0.3291, "step": 234 }, { "epoch": 1.6151202749140894, "grad_norm": 0.08464056008589892, "learning_rate": 0.00010361485598921212, "loss": 0.324, "step": 235 }, { "epoch": 1.6219931271477663, "grad_norm": 0.08422702725473777, "learning_rate": 0.00010281179669461005, "loss": 0.3278, "step": 236 }, { "epoch": 1.6288659793814433, "grad_norm": 0.08295666465456814, "learning_rate": 0.00010200855587861724, "loss": 0.3241, "step": 237 }, { "epoch": 1.6357388316151202, "grad_norm": 0.08548502829224709, "learning_rate": 0.0001012051853961172, "loss": 0.323, "step": 238 }, { "epoch": 1.6426116838487972, "grad_norm": 0.08504257743951933, "learning_rate": 0.00010040173711036431, "loss": 0.3319, "step": 239 }, { "epoch": 1.6494845360824741, "grad_norm": 0.08897006984234093, "learning_rate": 9.959826288963571e-05, "loss": 0.3298, "step": 240 }, { "epoch": 1.656357388316151, "grad_norm": 0.08569379226855248, "learning_rate": 9.879481460388282e-05, "loss": 0.3324, "step": 241 }, { "epoch": 1.663230240549828, "grad_norm": 0.08807735613767498, "learning_rate": 9.799144412138275e-05, "loss": 0.3325, "step": 242 }, { "epoch": 1.670103092783505, "grad_norm": 0.08151407457258486, "learning_rate": 9.718820330538998e-05, "loss": 0.3232, "step": 243 }, { "epoch": 1.6769759450171822, "grad_norm": 0.087023659443741, "learning_rate": 9.638514401078788e-05, "loss": 0.3318, "step": 244 }, { "epoch": 1.6838487972508591, "grad_norm": 0.08594518906158083, "learning_rate": 9.558231808074156e-05, "loss": 0.3453, "step": 245 }, { "epoch": 1.690721649484536, "grad_norm": 0.08352272303412105, "learning_rate": 9.477977734335061e-05, "loss": 0.3166, "step": 246 }, { "epoch": 1.697594501718213, "grad_norm": 0.08568852091684943, "learning_rate": 9.397757360830353e-05, "loss": 0.3264, "step": 247 }, { "epoch": 1.7044673539518902, "grad_norm": 0.08885733489863677, "learning_rate": 9.317575866353292e-05, "loss": 0.3266, "step": 248 }, { "epoch": 1.7113402061855671, "grad_norm": 0.08463037683241642, "learning_rate": 9.23743842718721e-05, "loss": 0.3205, "step": 249 }, { "epoch": 1.718213058419244, "grad_norm": 0.0843390482517884, "learning_rate": 9.157350216771378e-05, "loss": 0.3269, "step": 250 }, { "epoch": 1.725085910652921, "grad_norm": 0.09007246070238653, "learning_rate": 9.077316405366981e-05, "loss": 0.3276, "step": 251 }, { "epoch": 1.731958762886598, "grad_norm": 0.09244843516410303, "learning_rate": 8.997342159723371e-05, "loss": 0.3401, "step": 252 }, { "epoch": 1.738831615120275, "grad_norm": 0.08881441822645278, "learning_rate": 8.917432642744518e-05, "loss": 0.3281, "step": 253 }, { "epoch": 1.745704467353952, "grad_norm": 0.09189583409616871, "learning_rate": 8.83759301315568e-05, "loss": 0.3455, "step": 254 }, { "epoch": 1.7525773195876289, "grad_norm": 0.09044573284661134, "learning_rate": 8.757828425170404e-05, "loss": 0.3303, "step": 255 }, { "epoch": 1.7594501718213058, "grad_norm": 0.08705355037506475, "learning_rate": 8.678144028157759e-05, "loss": 0.322, "step": 256 }, { "epoch": 1.7663230240549828, "grad_norm": 0.08508811778261953, "learning_rate": 8.598544966309925e-05, "loss": 0.3293, "step": 257 }, { "epoch": 1.7731958762886597, "grad_norm": 0.08413556741266857, "learning_rate": 8.519036378310096e-05, "loss": 0.325, "step": 258 }, { "epoch": 1.7800687285223367, "grad_norm": 0.08741466736761512, "learning_rate": 8.43962339700073e-05, "loss": 0.3331, "step": 259 }, { "epoch": 1.7869415807560136, "grad_norm": 0.0838872436233333, "learning_rate": 8.360311149052205e-05, "loss": 0.3157, "step": 260 }, { "epoch": 1.7938144329896906, "grad_norm": 0.08675166520815596, "learning_rate": 8.281104754631835e-05, "loss": 0.3295, "step": 261 }, { "epoch": 1.8006872852233677, "grad_norm": 0.0891584493314357, "learning_rate": 8.20200932707335e-05, "loss": 0.3293, "step": 262 }, { "epoch": 1.8075601374570447, "grad_norm": 0.08838716486686246, "learning_rate": 8.123029972546781e-05, "loss": 0.339, "step": 263 }, { "epoch": 1.8144329896907216, "grad_norm": 0.08490002194019693, "learning_rate": 8.044171789728816e-05, "loss": 0.3166, "step": 264 }, { "epoch": 1.8213058419243986, "grad_norm": 0.08879467710865714, "learning_rate": 7.965439869473664e-05, "loss": 0.3225, "step": 265 }, { "epoch": 1.8281786941580758, "grad_norm": 0.08685788646044905, "learning_rate": 7.886839294484377e-05, "loss": 0.3379, "step": 266 }, { "epoch": 1.8350515463917527, "grad_norm": 0.0847245750551125, "learning_rate": 7.808375138984745e-05, "loss": 0.3204, "step": 267 }, { "epoch": 1.8419243986254297, "grad_norm": 0.08832244387556033, "learning_rate": 7.730052468391725e-05, "loss": 0.3342, "step": 268 }, { "epoch": 1.8487972508591066, "grad_norm": 0.08433464283867892, "learning_rate": 7.6518763389884e-05, "loss": 0.3204, "step": 269 }, { "epoch": 1.8556701030927836, "grad_norm": 0.08916030625620067, "learning_rate": 7.573851797597602e-05, "loss": 0.3259, "step": 270 }, { "epoch": 1.8625429553264605, "grad_norm": 0.08838217497382118, "learning_rate": 7.495983881256067e-05, "loss": 0.3286, "step": 271 }, { "epoch": 1.8694158075601375, "grad_norm": 0.08681625916427234, "learning_rate": 7.418277616889282e-05, "loss": 0.3383, "step": 272 }, { "epoch": 1.8762886597938144, "grad_norm": 0.08607392773613289, "learning_rate": 7.340738020986961e-05, "loss": 0.3399, "step": 273 }, { "epoch": 1.8831615120274914, "grad_norm": 0.08394316266478502, "learning_rate": 7.263370099279172e-05, "loss": 0.336, "step": 274 }, { "epoch": 1.8900343642611683, "grad_norm": 0.08506010816714532, "learning_rate": 7.186178846413214e-05, "loss": 0.3209, "step": 275 }, { "epoch": 1.8969072164948453, "grad_norm": 0.0862849296740383, "learning_rate": 7.109169245631149e-05, "loss": 0.3299, "step": 276 }, { "epoch": 1.9037800687285222, "grad_norm": 0.08618319460830526, "learning_rate": 7.032346268448118e-05, "loss": 0.3334, "step": 277 }, { "epoch": 1.9106529209621992, "grad_norm": 0.08606262384609771, "learning_rate": 6.955714874331387e-05, "loss": 0.3196, "step": 278 }, { "epoch": 1.9175257731958761, "grad_norm": 0.08598722768681304, "learning_rate": 6.87928001038017e-05, "loss": 0.3332, "step": 279 }, { "epoch": 1.9243986254295533, "grad_norm": 0.09354335747737587, "learning_rate": 6.803046611006278e-05, "loss": 0.3388, "step": 280 }, { "epoch": 1.9312714776632303, "grad_norm": 0.08678201560782468, "learning_rate": 6.727019597615545e-05, "loss": 0.3375, "step": 281 }, { "epoch": 1.9381443298969072, "grad_norm": 0.0908176800809841, "learning_rate": 6.651203878290139e-05, "loss": 0.3367, "step": 282 }, { "epoch": 1.9450171821305842, "grad_norm": 0.0889822547044272, "learning_rate": 6.575604347471695e-05, "loss": 0.3323, "step": 283 }, { "epoch": 1.9518900343642611, "grad_norm": 0.08764015474023973, "learning_rate": 6.500225885645346e-05, "loss": 0.3414, "step": 284 }, { "epoch": 1.9587628865979383, "grad_norm": 0.08874567526431988, "learning_rate": 6.425073359024663e-05, "loss": 0.3219, "step": 285 }, { "epoch": 1.9656357388316152, "grad_norm": 0.08658768880053598, "learning_rate": 6.350151619237488e-05, "loss": 0.3195, "step": 286 }, { "epoch": 1.9725085910652922, "grad_norm": 0.08643147213647358, "learning_rate": 6.275465503012751e-05, "loss": 0.3248, "step": 287 }, { "epoch": 1.9793814432989691, "grad_norm": 0.08664505956001961, "learning_rate": 6.201019831868208e-05, "loss": 0.3243, "step": 288 }, { "epoch": 1.986254295532646, "grad_norm": 0.09093058888184664, "learning_rate": 6.126819411799175e-05, "loss": 0.3317, "step": 289 }, { "epoch": 1.993127147766323, "grad_norm": 0.08786573282756825, "learning_rate": 6.052869032968285e-05, "loss": 0.3215, "step": 290 }, { "epoch": 2.0, "grad_norm": 0.08542610384961091, "learning_rate": 5.979173469396227e-05, "loss": 0.3218, "step": 291 }, { "epoch": 2.0, "eval_loss": 0.33904463052749634, "eval_runtime": 30.6784, "eval_samples_per_second": 31.912, "eval_steps_per_second": 1.01, "step": 291 }, { "epoch": 2.006872852233677, "grad_norm": 0.08934965744345454, "learning_rate": 5.905737478653572e-05, "loss": 0.3054, "step": 292 }, { "epoch": 2.013745704467354, "grad_norm": 0.0893916377704053, "learning_rate": 5.83256580155362e-05, "loss": 0.322, "step": 293 }, { "epoch": 2.020618556701031, "grad_norm": 0.08725809210257407, "learning_rate": 5.7596631618463514e-05, "loss": 0.3058, "step": 294 }, { "epoch": 2.027491408934708, "grad_norm": 0.08691258758136665, "learning_rate": 5.687034265913485e-05, "loss": 0.3099, "step": 295 }, { "epoch": 2.0343642611683848, "grad_norm": 0.08947653259802571, "learning_rate": 5.614683802464631e-05, "loss": 0.3183, "step": 296 }, { "epoch": 2.0412371134020617, "grad_norm": 0.09664420937426539, "learning_rate": 5.542616442234618e-05, "loss": 0.3061, "step": 297 }, { "epoch": 2.0481099656357387, "grad_norm": 0.08974986149039771, "learning_rate": 5.470836837681954e-05, "loss": 0.3091, "step": 298 }, { "epoch": 2.0549828178694156, "grad_norm": 0.0940492568909753, "learning_rate": 5.399349622688479e-05, "loss": 0.3041, "step": 299 }, { "epoch": 2.0618556701030926, "grad_norm": 0.09524768270421514, "learning_rate": 5.32815941226022e-05, "loss": 0.3141, "step": 300 }, { "epoch": 2.06872852233677, "grad_norm": 0.09202928786242714, "learning_rate": 5.2572708022294504e-05, "loss": 0.3029, "step": 301 }, { "epoch": 2.075601374570447, "grad_norm": 0.08814430236454712, "learning_rate": 5.1866883689580056e-05, "loss": 0.3091, "step": 302 }, { "epoch": 2.082474226804124, "grad_norm": 0.09147944152132396, "learning_rate": 5.116416669041843e-05, "loss": 0.3213, "step": 303 }, { "epoch": 2.089347079037801, "grad_norm": 0.09357164146140912, "learning_rate": 5.046460239016879e-05, "loss": 0.3186, "step": 304 }, { "epoch": 2.0962199312714778, "grad_norm": 0.09664634774615204, "learning_rate": 4.976823595066128e-05, "loss": 0.311, "step": 305 }, { "epoch": 2.1030927835051547, "grad_norm": 0.0970574243672544, "learning_rate": 4.907511232728145e-05, "loss": 0.3052, "step": 306 }, { "epoch": 2.1099656357388317, "grad_norm": 0.09092882502601024, "learning_rate": 4.8385276266068146e-05, "loss": 0.3199, "step": 307 }, { "epoch": 2.1168384879725086, "grad_norm": 0.09255706363584175, "learning_rate": 4.7698772300824756e-05, "loss": 0.2993, "step": 308 }, { "epoch": 2.1237113402061856, "grad_norm": 0.09624778249540229, "learning_rate": 4.7015644750244306e-05, "loss": 0.3189, "step": 309 }, { "epoch": 2.1305841924398625, "grad_norm": 0.09389052956911129, "learning_rate": 4.6335937715048306e-05, "loss": 0.324, "step": 310 }, { "epoch": 2.1374570446735395, "grad_norm": 0.09427586727237926, "learning_rate": 4.565969507513981e-05, "loss": 0.3004, "step": 311 }, { "epoch": 2.1443298969072164, "grad_norm": 0.09019034631046423, "learning_rate": 4.498696048677059e-05, "loss": 0.3091, "step": 312 }, { "epoch": 2.1512027491408934, "grad_norm": 0.09167012868845459, "learning_rate": 4.4317777379722866e-05, "loss": 0.3094, "step": 313 }, { "epoch": 2.1580756013745703, "grad_norm": 0.09211532597283713, "learning_rate": 4.365218895450558e-05, "loss": 0.3118, "step": 314 }, { "epoch": 2.1649484536082473, "grad_norm": 0.0924637021465344, "learning_rate": 4.29902381795655e-05, "loss": 0.3092, "step": 315 }, { "epoch": 2.1718213058419242, "grad_norm": 0.0955910932252606, "learning_rate": 4.2331967788513295e-05, "loss": 0.3068, "step": 316 }, { "epoch": 2.178694158075601, "grad_norm": 0.09271063523116922, "learning_rate": 4.167742027736482e-05, "loss": 0.3178, "step": 317 }, { "epoch": 2.1855670103092786, "grad_norm": 0.0891299836263922, "learning_rate": 4.102663790179764e-05, "loss": 0.3045, "step": 318 }, { "epoch": 2.1924398625429555, "grad_norm": 0.09593320928446104, "learning_rate": 4.037966267442315e-05, "loss": 0.3175, "step": 319 }, { "epoch": 2.1993127147766325, "grad_norm": 0.09078840359431019, "learning_rate": 3.973653636207437e-05, "loss": 0.3143, "step": 320 }, { "epoch": 2.2061855670103094, "grad_norm": 0.09211197188172149, "learning_rate": 3.909730048310962e-05, "loss": 0.3236, "step": 321 }, { "epoch": 2.2130584192439864, "grad_norm": 0.09145307720913523, "learning_rate": 3.846199630473216e-05, "loss": 0.3053, "step": 322 }, { "epoch": 2.2199312714776633, "grad_norm": 0.0939124948752541, "learning_rate": 3.7830664840326145e-05, "loss": 0.3173, "step": 323 }, { "epoch": 2.2268041237113403, "grad_norm": 0.09108067374454602, "learning_rate": 3.720334684680889e-05, "loss": 0.2956, "step": 324 }, { "epoch": 2.2336769759450172, "grad_norm": 0.09557933138068328, "learning_rate": 3.6580082821999786e-05, "loss": 0.32, "step": 325 }, { "epoch": 2.240549828178694, "grad_norm": 0.09573407011620999, "learning_rate": 3.596091300200578e-05, "loss": 0.3138, "step": 326 }, { "epoch": 2.247422680412371, "grad_norm": 0.09493927500941335, "learning_rate": 3.534587735862391e-05, "loss": 0.3151, "step": 327 }, { "epoch": 2.254295532646048, "grad_norm": 0.09378564206143014, "learning_rate": 3.473501559676088e-05, "loss": 0.3223, "step": 328 }, { "epoch": 2.261168384879725, "grad_norm": 0.09099664156013709, "learning_rate": 3.4128367151869714e-05, "loss": 0.2983, "step": 329 }, { "epoch": 2.268041237113402, "grad_norm": 0.09148661096549023, "learning_rate": 3.352597118740404e-05, "loss": 0.3157, "step": 330 }, { "epoch": 2.274914089347079, "grad_norm": 0.09016840919131286, "learning_rate": 3.292786659228973e-05, "loss": 0.3004, "step": 331 }, { "epoch": 2.281786941580756, "grad_norm": 0.09050286397229691, "learning_rate": 3.233409197841437e-05, "loss": 0.3072, "step": 332 }, { "epoch": 2.288659793814433, "grad_norm": 0.09237950835807537, "learning_rate": 3.174468567813461e-05, "loss": 0.2993, "step": 333 }, { "epoch": 2.29553264604811, "grad_norm": 0.09430237581871379, "learning_rate": 3.115968574180149e-05, "loss": 0.3043, "step": 334 }, { "epoch": 2.3024054982817868, "grad_norm": 0.09164183964007294, "learning_rate": 3.0579129935304066e-05, "loss": 0.311, "step": 335 }, { "epoch": 2.3092783505154637, "grad_norm": 0.09152612089667635, "learning_rate": 3.0003055737631403e-05, "loss": 0.313, "step": 336 }, { "epoch": 2.3161512027491407, "grad_norm": 0.09442445367811866, "learning_rate": 2.9431500338452832e-05, "loss": 0.3008, "step": 337 }, { "epoch": 2.323024054982818, "grad_norm": 0.09110084268893474, "learning_rate": 2.886450063571735e-05, "loss": 0.3088, "step": 338 }, { "epoch": 2.329896907216495, "grad_norm": 0.0921353275111459, "learning_rate": 2.8302093233271453e-05, "loss": 0.3055, "step": 339 }, { "epoch": 2.336769759450172, "grad_norm": 0.09334871004249388, "learning_rate": 2.7744314438496088e-05, "loss": 0.3072, "step": 340 }, { "epoch": 2.343642611683849, "grad_norm": 0.0938802497823231, "learning_rate": 2.7191200259962934e-05, "loss": 0.308, "step": 341 }, { "epoch": 2.350515463917526, "grad_norm": 0.0898567021536528, "learning_rate": 2.6642786405109475e-05, "loss": 0.3034, "step": 342 }, { "epoch": 2.357388316151203, "grad_norm": 0.09293096177060846, "learning_rate": 2.6099108277934103e-05, "loss": 0.3048, "step": 343 }, { "epoch": 2.3642611683848798, "grad_norm": 0.08844746597613538, "learning_rate": 2.556020097671046e-05, "loss": 0.2952, "step": 344 }, { "epoch": 2.3711340206185567, "grad_norm": 0.09287553148248984, "learning_rate": 2.5026099291721516e-05, "loss": 0.2995, "step": 345 }, { "epoch": 2.3780068728522337, "grad_norm": 0.09264677351652409, "learning_rate": 2.449683770301382e-05, "loss": 0.2894, "step": 346 }, { "epoch": 2.3848797250859106, "grad_norm": 0.09576792377226563, "learning_rate": 2.397245037817125e-05, "loss": 0.3109, "step": 347 }, { "epoch": 2.3917525773195876, "grad_norm": 0.09311833469289232, "learning_rate": 2.345297117010954e-05, "loss": 0.3076, "step": 348 }, { "epoch": 2.3986254295532645, "grad_norm": 0.09019455931992065, "learning_rate": 2.2938433614890697e-05, "loss": 0.3044, "step": 349 }, { "epoch": 2.4054982817869415, "grad_norm": 0.09266164178819782, "learning_rate": 2.242887092955801e-05, "loss": 0.3079, "step": 350 }, { "epoch": 2.4123711340206184, "grad_norm": 0.09268050904966632, "learning_rate": 2.1924316009991787e-05, "loss": 0.3102, "step": 351 }, { "epoch": 2.4192439862542954, "grad_norm": 0.09147715690303095, "learning_rate": 2.1424801428785447e-05, "loss": 0.304, "step": 352 }, { "epoch": 2.4261168384879723, "grad_norm": 0.09374390633466752, "learning_rate": 2.0930359433142932e-05, "loss": 0.3133, "step": 353 }, { "epoch": 2.4329896907216497, "grad_norm": 0.09240848129888696, "learning_rate": 2.0441021942796944e-05, "loss": 0.3093, "step": 354 }, { "epoch": 2.4398625429553267, "grad_norm": 0.0908201115114702, "learning_rate": 1.995682054794803e-05, "loss": 0.2979, "step": 355 }, { "epoch": 2.4467353951890036, "grad_norm": 0.08951402036487009, "learning_rate": 1.9477786507225616e-05, "loss": 0.3062, "step": 356 }, { "epoch": 2.4536082474226806, "grad_norm": 0.09253412233961794, "learning_rate": 1.900395074566962e-05, "loss": 0.3068, "step": 357 }, { "epoch": 2.4604810996563575, "grad_norm": 0.09564887234650561, "learning_rate": 1.8535343852734332e-05, "loss": 0.3247, "step": 358 }, { "epoch": 2.4673539518900345, "grad_norm": 0.09078364644099739, "learning_rate": 1.8071996080313602e-05, "loss": 0.3054, "step": 359 }, { "epoch": 2.4742268041237114, "grad_norm": 0.09423638948358164, "learning_rate": 1.76139373407876e-05, "loss": 0.2901, "step": 360 }, { "epoch": 2.4810996563573884, "grad_norm": 0.09417570817189251, "learning_rate": 1.7161197205092216e-05, "loss": 0.3074, "step": 361 }, { "epoch": 2.4879725085910653, "grad_norm": 0.09280056424793226, "learning_rate": 1.6713804900809582e-05, "loss": 0.3046, "step": 362 }, { "epoch": 2.4948453608247423, "grad_norm": 0.09481341645270996, "learning_rate": 1.6271789310281517e-05, "loss": 0.3055, "step": 363 }, { "epoch": 2.5017182130584192, "grad_norm": 0.09374100014573863, "learning_rate": 1.583517896874498e-05, "loss": 0.3088, "step": 364 }, { "epoch": 2.508591065292096, "grad_norm": 0.09398340253654071, "learning_rate": 1.540400206248963e-05, "loss": 0.3031, "step": 365 }, { "epoch": 2.515463917525773, "grad_norm": 0.09704966740341212, "learning_rate": 1.4978286427038601e-05, "loss": 0.3035, "step": 366 }, { "epoch": 2.52233676975945, "grad_norm": 0.0942317189363672, "learning_rate": 1.4558059545351143e-05, "loss": 0.3017, "step": 367 }, { "epoch": 2.529209621993127, "grad_norm": 0.09410721020239764, "learning_rate": 1.4143348546048707e-05, "loss": 0.2993, "step": 368 }, { "epoch": 2.536082474226804, "grad_norm": 0.09140901915265269, "learning_rate": 1.3734180201663439e-05, "loss": 0.2992, "step": 369 }, { "epoch": 2.542955326460481, "grad_norm": 0.09282138740607261, "learning_rate": 1.3330580926909763e-05, "loss": 0.3031, "step": 370 }, { "epoch": 2.549828178694158, "grad_norm": 0.09306222409559974, "learning_rate": 1.2932576776979377e-05, "loss": 0.304, "step": 371 }, { "epoch": 2.556701030927835, "grad_norm": 0.09315714906555249, "learning_rate": 1.2540193445858883e-05, "loss": 0.3034, "step": 372 }, { "epoch": 2.563573883161512, "grad_norm": 0.09239490285360592, "learning_rate": 1.2153456264671337e-05, "loss": 0.302, "step": 373 }, { "epoch": 2.5704467353951888, "grad_norm": 0.09388397576387222, "learning_rate": 1.1772390200040817e-05, "loss": 0.3047, "step": 374 }, { "epoch": 2.5773195876288657, "grad_norm": 0.09296980432149467, "learning_rate": 1.139701985248055e-05, "loss": 0.3044, "step": 375 }, { "epoch": 2.584192439862543, "grad_norm": 0.09364059163575278, "learning_rate": 1.1027369454805058e-05, "loss": 0.3136, "step": 376 }, { "epoch": 2.59106529209622, "grad_norm": 0.09635834843398819, "learning_rate": 1.0663462870565411e-05, "loss": 0.3108, "step": 377 }, { "epoch": 2.597938144329897, "grad_norm": 0.09391819390703894, "learning_rate": 1.0305323592509009e-05, "loss": 0.3097, "step": 378 }, { "epoch": 2.604810996563574, "grad_norm": 0.0929082468697906, "learning_rate": 9.952974741062703e-06, "loss": 0.3114, "step": 379 }, { "epoch": 2.611683848797251, "grad_norm": 0.09217596739130325, "learning_rate": 9.606439062840256e-06, "loss": 0.296, "step": 380 }, { "epoch": 2.618556701030928, "grad_norm": 0.09142311512733288, "learning_rate": 9.265738929174051e-06, "loss": 0.3048, "step": 381 }, { "epoch": 2.625429553264605, "grad_norm": 0.0904838758026923, "learning_rate": 8.93089633467058e-06, "loss": 0.2993, "step": 382 }, { "epoch": 2.6323024054982818, "grad_norm": 0.09155038949290019, "learning_rate": 8.601932895790877e-06, "loss": 0.3089, "step": 383 }, { "epoch": 2.6391752577319587, "grad_norm": 0.09253938327453687, "learning_rate": 8.278869849454718e-06, "loss": 0.3091, "step": 384 }, { "epoch": 2.6460481099656357, "grad_norm": 0.09516003752782264, "learning_rate": 7.961728051669737e-06, "loss": 0.2986, "step": 385 }, { "epoch": 2.6529209621993126, "grad_norm": 0.09421805049010851, "learning_rate": 7.650527976185173e-06, "loss": 0.3155, "step": 386 }, { "epoch": 2.6597938144329896, "grad_norm": 0.09154788639674317, "learning_rate": 7.3452897131698564e-06, "loss": 0.3062, "step": 387 }, { "epoch": 2.6666666666666665, "grad_norm": 0.09122873803065291, "learning_rate": 7.046032967915483e-06, "loss": 0.3185, "step": 388 }, { "epoch": 2.673539518900344, "grad_norm": 0.09367952245805052, "learning_rate": 6.75277705956443e-06, "loss": 0.3068, "step": 389 }, { "epoch": 2.680412371134021, "grad_norm": 0.09214905438448907, "learning_rate": 6.465540919862456e-06, "loss": 0.3065, "step": 390 }, { "epoch": 2.687285223367698, "grad_norm": 0.0916570054323852, "learning_rate": 6.184343091936751e-06, "loss": 0.3044, "step": 391 }, { "epoch": 2.6941580756013748, "grad_norm": 0.0931819392758384, "learning_rate": 5.909201729098579e-06, "loss": 0.2959, "step": 392 }, { "epoch": 2.7010309278350517, "grad_norm": 0.09268359375004617, "learning_rate": 5.640134593671598e-06, "loss": 0.3105, "step": 393 }, { "epoch": 2.7079037800687287, "grad_norm": 0.09257736661481182, "learning_rate": 5.3771590558450265e-06, "loss": 0.2992, "step": 394 }, { "epoch": 2.7147766323024056, "grad_norm": 0.09089249254613374, "learning_rate": 5.12029209255227e-06, "loss": 0.2991, "step": 395 }, { "epoch": 2.7216494845360826, "grad_norm": 0.09230389852286562, "learning_rate": 4.869550286375091e-06, "loss": 0.2991, "step": 396 }, { "epoch": 2.7285223367697595, "grad_norm": 0.0939308714072509, "learning_rate": 4.624949824472858e-06, "loss": 0.3167, "step": 397 }, { "epoch": 2.7353951890034365, "grad_norm": 0.09423045409883361, "learning_rate": 4.386506497537757e-06, "loss": 0.2964, "step": 398 }, { "epoch": 2.7422680412371134, "grad_norm": 0.09442465096579741, "learning_rate": 4.154235698775277e-06, "loss": 0.3117, "step": 399 }, { "epoch": 2.7491408934707904, "grad_norm": 0.09404849451544063, "learning_rate": 3.928152422910491e-06, "loss": 0.3075, "step": 400 }, { "epoch": 2.7560137457044673, "grad_norm": 0.09032494191687969, "learning_rate": 3.7082712652200867e-06, "loss": 0.3002, "step": 401 }, { "epoch": 2.7628865979381443, "grad_norm": 0.09338676868642957, "learning_rate": 3.4946064205899965e-06, "loss": 0.3102, "step": 402 }, { "epoch": 2.7697594501718212, "grad_norm": 0.09058201872565531, "learning_rate": 3.287171682599255e-06, "loss": 0.3005, "step": 403 }, { "epoch": 2.776632302405498, "grad_norm": 0.09124885428190838, "learning_rate": 3.085980442629288e-06, "loss": 0.3102, "step": 404 }, { "epoch": 2.783505154639175, "grad_norm": 0.097707714719703, "learning_rate": 2.8910456889995498e-06, "loss": 0.3101, "step": 405 }, { "epoch": 2.790378006872852, "grad_norm": 0.09406199842124141, "learning_rate": 2.7023800061289907e-06, "loss": 0.3029, "step": 406 }, { "epoch": 2.797250859106529, "grad_norm": 0.0937270430771374, "learning_rate": 2.5199955737236104e-06, "loss": 0.3138, "step": 407 }, { "epoch": 2.804123711340206, "grad_norm": 0.09351577018221054, "learning_rate": 2.3439041659902407e-06, "loss": 0.2946, "step": 408 }, { "epoch": 2.810996563573883, "grad_norm": 0.09264665661951106, "learning_rate": 2.174117150876398e-06, "loss": 0.3004, "step": 409 }, { "epoch": 2.81786941580756, "grad_norm": 0.09238906202514245, "learning_rate": 2.010645489336382e-06, "loss": 0.3097, "step": 410 }, { "epoch": 2.824742268041237, "grad_norm": 0.09190554668546122, "learning_rate": 1.8534997346237093e-06, "loss": 0.2975, "step": 411 }, { "epoch": 2.8316151202749142, "grad_norm": 0.09181507313193155, "learning_rate": 1.7026900316098215e-06, "loss": 0.3003, "step": 412 }, { "epoch": 2.838487972508591, "grad_norm": 0.09143861144115928, "learning_rate": 1.5582261161291245e-06, "loss": 0.3034, "step": 413 }, { "epoch": 2.845360824742268, "grad_norm": 0.09247995886089962, "learning_rate": 1.4201173143504888e-06, "loss": 0.3049, "step": 414 }, { "epoch": 2.852233676975945, "grad_norm": 0.09188706812752409, "learning_rate": 1.2883725421752201e-06, "loss": 0.3021, "step": 415 }, { "epoch": 2.859106529209622, "grad_norm": 0.09205248578883383, "learning_rate": 1.1630003046614323e-06, "loss": 0.3057, "step": 416 }, { "epoch": 2.865979381443299, "grad_norm": 0.09083436125388404, "learning_rate": 1.0440086954749517e-06, "loss": 0.2989, "step": 417 }, { "epoch": 2.872852233676976, "grad_norm": 0.09374039579864873, "learning_rate": 9.314053963669245e-07, "loss": 0.3068, "step": 418 }, { "epoch": 2.879725085910653, "grad_norm": 0.09335523461956806, "learning_rate": 8.251976766777913e-07, "loss": 0.3083, "step": 419 }, { "epoch": 2.88659793814433, "grad_norm": 0.09501160485547541, "learning_rate": 7.253923928680406e-07, "loss": 0.3168, "step": 420 }, { "epoch": 2.893470790378007, "grad_norm": 0.09249423661472182, "learning_rate": 6.319959880756177e-07, "loss": 0.3053, "step": 421 }, { "epoch": 2.9003436426116838, "grad_norm": 0.0929914691504264, "learning_rate": 5.450144916999134e-07, "loss": 0.3036, "step": 422 }, { "epoch": 2.9072164948453607, "grad_norm": 0.09382724419135216, "learning_rate": 4.644535190125421e-07, "loss": 0.3117, "step": 423 }, { "epoch": 2.9140893470790377, "grad_norm": 0.09312281718144377, "learning_rate": 3.903182707948649e-07, "loss": 0.3144, "step": 424 }, { "epoch": 2.9209621993127146, "grad_norm": 0.09110190356198779, "learning_rate": 3.2261353300219176e-07, "loss": 0.2994, "step": 425 }, { "epoch": 2.927835051546392, "grad_norm": 0.09128452604561245, "learning_rate": 2.613436764548505e-07, "loss": 0.2993, "step": 426 }, { "epoch": 2.934707903780069, "grad_norm": 0.09212545839697747, "learning_rate": 2.0651265655603492e-07, "loss": 0.2946, "step": 427 }, { "epoch": 2.941580756013746, "grad_norm": 0.0913506938676497, "learning_rate": 1.5812401303639813e-07, "loss": 0.3039, "step": 428 }, { "epoch": 2.948453608247423, "grad_norm": 0.08983898980081578, "learning_rate": 1.1618086972559062e-07, "loss": 0.2986, "step": 429 }, { "epoch": 2.9553264604811, "grad_norm": 0.09008475970418757, "learning_rate": 8.068593435055505e-08, "loss": 0.3005, "step": 430 }, { "epoch": 2.9621993127147768, "grad_norm": 0.09195705112033116, "learning_rate": 5.164149836077714e-08, "loss": 0.313, "step": 431 }, { "epoch": 2.9690721649484537, "grad_norm": 0.09294231579523113, "learning_rate": 2.9049436780281825e-08, "loss": 0.3128, "step": 432 }, { "epoch": 2.9759450171821307, "grad_norm": 0.09069531595263271, "learning_rate": 1.2911208086663351e-08, "loss": 0.3044, "step": 433 }, { "epoch": 2.9828178694158076, "grad_norm": 0.09077240628653975, "learning_rate": 3.2278541168717646e-09, "loss": 0.2964, "step": 434 }, { "epoch": 2.9896907216494846, "grad_norm": 0.09402049762344616, "learning_rate": 0.0, "loss": 0.3118, "step": 435 }, { "epoch": 2.9896907216494846, "eval_loss": 0.3359867036342621, "eval_runtime": 29.8961, "eval_samples_per_second": 32.747, "eval_steps_per_second": 1.037, "step": 435 }, { "epoch": 2.9896907216494846, "step": 435, "total_flos": 1.5583643170557133e+17, "train_loss": 0.3695695283769191, "train_runtime": 4934.2641, "train_samples_per_second": 11.307, "train_steps_per_second": 0.088 } ], "logging_steps": 1, "max_steps": 435, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.5583643170557133e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }