{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9982631930527722, "eval_steps": 400, "global_step": 467, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01068804275217101, "grad_norm": 7.6839051021356335, "learning_rate": 1.0638297872340425e-07, "logits/chosen": 0.030031317844986916, "logits/rejected": -0.005169146694242954, "logps/chosen": -0.2549566626548767, "logps/rejected": -0.26970332860946655, "loss": 1.2612, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2549566626548767, "rewards/margins": 0.014746698550879955, "rewards/rejected": -0.26970332860946655, "step": 5 }, { "epoch": 0.02137608550434202, "grad_norm": 6.455969735668311, "learning_rate": 2.127659574468085e-07, "logits/chosen": -0.030284494161605835, "logits/rejected": -0.02130025625228882, "logps/chosen": -0.27859872579574585, "logps/rejected": -0.2721685469150543, "loss": 1.2728, "rewards/accuracies": 0.45625001192092896, "rewards/chosen": -0.27859872579574585, "rewards/margins": -0.006430179812014103, "rewards/rejected": -0.2721685469150543, "step": 10 }, { "epoch": 0.03206412825651302, "grad_norm": 6.3108235358652385, "learning_rate": 3.1914893617021275e-07, "logits/chosen": 0.05979523807764053, "logits/rejected": 0.05295870825648308, "logps/chosen": -0.2907688021659851, "logps/rejected": -0.305183470249176, "loss": 1.2799, "rewards/accuracies": 0.5, "rewards/chosen": -0.2907688021659851, "rewards/margins": 0.014414620585739613, "rewards/rejected": -0.305183470249176, "step": 15 }, { "epoch": 0.04275217100868404, "grad_norm": 6.093982428242224, "learning_rate": 4.25531914893617e-07, "logits/chosen": 0.03507867455482483, "logits/rejected": 0.03253958001732826, "logps/chosen": -0.261181116104126, "logps/rejected": -0.27250123023986816, "loss": 1.2661, "rewards/accuracies": 0.581250011920929, "rewards/chosen": -0.261181116104126, "rewards/margins": 0.011320129036903381, "rewards/rejected": -0.27250123023986816, "step": 20 }, { "epoch": 0.053440213760855046, "grad_norm": 5.3174858162797864, "learning_rate": 5.319148936170212e-07, "logits/chosen": 0.025609856471419334, "logits/rejected": 0.009822970256209373, "logps/chosen": -0.2632735073566437, "logps/rejected": -0.28461751341819763, "loss": 1.2665, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2632735073566437, "rewards/margins": 0.02134399674832821, "rewards/rejected": -0.28461751341819763, "step": 25 }, { "epoch": 0.06412825651302605, "grad_norm": 5.716113428524292, "learning_rate": 6.382978723404255e-07, "logits/chosen": -0.0970643013715744, "logits/rejected": -0.07809214293956757, "logps/chosen": -0.2763083279132843, "logps/rejected": -0.2804708778858185, "loss": 1.2658, "rewards/accuracies": 0.4437499940395355, "rewards/chosen": -0.2763083279132843, "rewards/margins": 0.0041625602170825005, "rewards/rejected": -0.2804708778858185, "step": 30 }, { "epoch": 0.07481629926519706, "grad_norm": 4.978401481602216, "learning_rate": 7.446808510638297e-07, "logits/chosen": -0.0406271293759346, "logits/rejected": -0.07336937636137009, "logps/chosen": -0.29073840379714966, "logps/rejected": -0.293030709028244, "loss": 1.2797, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.29073840379714966, "rewards/margins": 0.0022922889329493046, "rewards/rejected": -0.293030709028244, "step": 35 }, { "epoch": 0.08550434201736808, "grad_norm": 6.722458816777254, "learning_rate": 8.51063829787234e-07, "logits/chosen": -0.12586958706378937, "logits/rejected": -0.1309432089328766, "logps/chosen": -0.2860681414604187, "logps/rejected": -0.29788246750831604, "loss": 1.2768, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.2860681414604187, "rewards/margins": 0.01181434839963913, "rewards/rejected": -0.29788246750831604, "step": 40 }, { "epoch": 0.09619238476953908, "grad_norm": 9.862978906902363, "learning_rate": 9.574468085106384e-07, "logits/chosen": -0.0928565114736557, "logits/rejected": -0.08263979852199554, "logps/chosen": -0.28130441904067993, "logps/rejected": -0.306749552488327, "loss": 1.2561, "rewards/accuracies": 0.543749988079071, "rewards/chosen": -0.28130441904067993, "rewards/margins": 0.0254451222717762, "rewards/rejected": -0.306749552488327, "step": 45 }, { "epoch": 0.10688042752171009, "grad_norm": 6.916554774701635, "learning_rate": 9.998741174712533e-07, "logits/chosen": -0.1109582781791687, "logits/rejected": -0.13332585990428925, "logps/chosen": -0.2940751612186432, "logps/rejected": -0.33872976899147034, "loss": 1.2504, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.2940751612186432, "rewards/margins": 0.04465465992689133, "rewards/rejected": -0.33872976899147034, "step": 50 }, { "epoch": 0.11756847027388109, "grad_norm": 6.886280103270556, "learning_rate": 9.991050648838675e-07, "logits/chosen": -0.07307116687297821, "logits/rejected": -0.09735921025276184, "logps/chosen": -0.29753613471984863, "logps/rejected": -0.36083537340164185, "loss": 1.2622, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.29753613471984863, "rewards/margins": 0.06329929828643799, "rewards/rejected": -0.36083537340164185, "step": 55 }, { "epoch": 0.1282565130260521, "grad_norm": 6.581763598962665, "learning_rate": 9.97637968732563e-07, "logits/chosen": -0.08233795315027237, "logits/rejected": -0.08182443678379059, "logps/chosen": -0.3031911253929138, "logps/rejected": -0.34265732765197754, "loss": 1.2444, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.3031911253929138, "rewards/margins": 0.03946622088551521, "rewards/rejected": -0.34265732765197754, "step": 60 }, { "epoch": 0.13894455577822312, "grad_norm": 9.155624459526475, "learning_rate": 9.954748808839674e-07, "logits/chosen": -0.12494422495365143, "logits/rejected": -0.10039836168289185, "logps/chosen": -0.31139710545539856, "logps/rejected": -0.40564531087875366, "loss": 1.2551, "rewards/accuracies": 0.53125, "rewards/chosen": -0.31139710545539856, "rewards/margins": 0.0942481979727745, "rewards/rejected": -0.40564531087875366, "step": 65 }, { "epoch": 0.14963259853039412, "grad_norm": 6.234129539834448, "learning_rate": 9.926188266120295e-07, "logits/chosen": -0.12459670007228851, "logits/rejected": -0.1347140073776245, "logps/chosen": -0.2900499701499939, "logps/rejected": -0.34025058150291443, "loss": 1.2499, "rewards/accuracies": 0.5562499761581421, "rewards/chosen": -0.2900499701499939, "rewards/margins": 0.05020058900117874, "rewards/rejected": -0.34025058150291443, "step": 70 }, { "epoch": 0.16032064128256512, "grad_norm": 6.758490065806782, "learning_rate": 9.890738003669027e-07, "logits/chosen": -0.04522291570901871, "logits/rejected": -0.10806401073932648, "logps/chosen": -0.36918264627456665, "logps/rejected": -0.37469878792762756, "loss": 1.255, "rewards/accuracies": 0.48750001192092896, "rewards/chosen": -0.36918264627456665, "rewards/margins": 0.005516159348189831, "rewards/rejected": -0.37469878792762756, "step": 75 }, { "epoch": 0.17100868403473615, "grad_norm": 10.191842225644526, "learning_rate": 9.848447601883433e-07, "logits/chosen": -0.08387650549411774, "logits/rejected": -0.07887469977140427, "logps/chosen": -0.33048170804977417, "logps/rejected": -0.4197458326816559, "loss": 1.2368, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.33048170804977417, "rewards/margins": 0.0892641618847847, "rewards/rejected": -0.4197458326816559, "step": 80 }, { "epoch": 0.18169672678690715, "grad_norm": 9.352000749502443, "learning_rate": 9.799376207714444e-07, "logits/chosen": -0.13144788146018982, "logits/rejected": -0.13928399980068207, "logps/chosen": -0.3154647946357727, "logps/rejected": -0.38948363065719604, "loss": 1.2319, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.3154647946357727, "rewards/margins": 0.07401885092258453, "rewards/rejected": -0.38948363065719604, "step": 85 }, { "epoch": 0.19238476953907815, "grad_norm": 6.072420839953192, "learning_rate": 9.743592451943998e-07, "logits/chosen": -0.1306827962398529, "logits/rejected": -0.1379804015159607, "logps/chosen": -0.33726412057876587, "logps/rejected": -0.3857743442058563, "loss": 1.2455, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.33726412057876587, "rewards/margins": 0.04851023852825165, "rewards/rejected": -0.3857743442058563, "step": 90 }, { "epoch": 0.20307281229124916, "grad_norm": 8.382063384630092, "learning_rate": 9.681174353198686e-07, "logits/chosen": -0.12879344820976257, "logits/rejected": -0.14443747699260712, "logps/chosen": -0.35025691986083984, "logps/rejected": -0.40873831510543823, "loss": 1.2414, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.35025691986083984, "rewards/margins": 0.058481425046920776, "rewards/rejected": -0.40873831510543823, "step": 95 }, { "epoch": 0.21376085504342018, "grad_norm": 8.051181065225872, "learning_rate": 9.612209208833646e-07, "logits/chosen": -0.0478597953915596, "logits/rejected": -0.0929640680551529, "logps/chosen": -0.366682231426239, "logps/rejected": -0.42526760697364807, "loss": 1.2454, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.366682231426239, "rewards/margins": 0.05858539417386055, "rewards/rejected": -0.42526760697364807, "step": 100 }, { "epoch": 0.22444889779559118, "grad_norm": 8.423595722166493, "learning_rate": 9.536793472839324e-07, "logits/chosen": -0.024932144209742546, "logits/rejected": -0.029018620029091835, "logps/chosen": -0.3645615577697754, "logps/rejected": -0.4377099871635437, "loss": 1.2321, "rewards/accuracies": 0.65625, "rewards/chosen": -0.3645615577697754, "rewards/margins": 0.07314839214086533, "rewards/rejected": -0.4377099871635437, "step": 105 }, { "epoch": 0.23513694054776219, "grad_norm": 8.583118717745613, "learning_rate": 9.455032620941839e-07, "logits/chosen": -0.08025398850440979, "logits/rejected": -0.11082024872303009, "logps/chosen": -0.37390726804733276, "logps/rejected": -0.44736775755882263, "loss": 1.2231, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.37390726804733276, "rewards/margins": 0.07346051186323166, "rewards/rejected": -0.44736775755882263, "step": 110 }, { "epoch": 0.2458249832999332, "grad_norm": 8.813654782582985, "learning_rate": 9.367041003085648e-07, "logits/chosen": -0.05264202877879143, "logits/rejected": -0.08138756453990936, "logps/chosen": -0.3586878478527069, "logps/rejected": -0.4460626244544983, "loss": 1.2357, "rewards/accuracies": 0.643750011920929, "rewards/chosen": -0.3586878478527069, "rewards/margins": 0.08737480640411377, "rewards/rejected": -0.4460626244544983, "step": 115 }, { "epoch": 0.2565130260521042, "grad_norm": 18.26289480767173, "learning_rate": 9.272941683504808e-07, "logits/chosen": -0.0790601596236229, "logits/rejected": -0.10665098577737808, "logps/chosen": -0.3613402545452118, "logps/rejected": -0.43312540650367737, "loss": 1.2356, "rewards/accuracies": 0.6312500238418579, "rewards/chosen": -0.3613402545452118, "rewards/margins": 0.07178511470556259, "rewards/rejected": -0.43312540650367737, "step": 120 }, { "epoch": 0.26720106880427524, "grad_norm": 8.650831994920619, "learning_rate": 9.172866268606513e-07, "logits/chosen": -0.1340969204902649, "logits/rejected": -0.1336316168308258, "logps/chosen": -0.3937236964702606, "logps/rejected": -0.5055503249168396, "loss": 1.2281, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.3937236964702606, "rewards/margins": 0.11182668060064316, "rewards/rejected": -0.5055503249168396, "step": 125 }, { "epoch": 0.27788911155644624, "grad_norm": 7.496527663898527, "learning_rate": 9.066954722907638e-07, "logits/chosen": -0.1845657378435135, "logits/rejected": -0.24125418066978455, "logps/chosen": -0.38387566804885864, "logps/rejected": -0.452624648809433, "loss": 1.2223, "rewards/accuracies": 0.6187499761581421, "rewards/chosen": -0.38387566804885864, "rewards/margins": 0.06874893605709076, "rewards/rejected": -0.452624648809433, "step": 130 }, { "epoch": 0.28857715430861725, "grad_norm": 6.7313487554996545, "learning_rate": 8.955355173281707e-07, "logits/chosen": -0.1565982848405838, "logits/rejected": -0.15710704028606415, "logps/chosen": -0.3900142312049866, "logps/rejected": -0.5559936761856079, "loss": 1.2183, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.3900142312049866, "rewards/margins": 0.16597944498062134, "rewards/rejected": -0.5559936761856079, "step": 135 }, { "epoch": 0.29926519706078825, "grad_norm": 6.459204367259454, "learning_rate": 8.838223701790055e-07, "logits/chosen": -0.19400301575660706, "logits/rejected": -0.23119351267814636, "logps/chosen": -0.37699031829833984, "logps/rejected": -0.4609736502170563, "loss": 1.2272, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.37699031829833984, "rewards/margins": 0.0839833989739418, "rewards/rejected": -0.4609736502170563, "step": 140 }, { "epoch": 0.30995323981295925, "grad_norm": 9.060033916313321, "learning_rate": 8.71572412738697e-07, "logits/chosen": -0.07448846101760864, "logits/rejected": -0.16315212845802307, "logps/chosen": -0.4142521917819977, "logps/rejected": -0.5054866075515747, "loss": 1.2179, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.4142521917819977, "rewards/margins": 0.09123442322015762, "rewards/rejected": -0.5054866075515747, "step": 145 }, { "epoch": 0.32064128256513025, "grad_norm": 5.736273343276732, "learning_rate": 8.588027776804058e-07, "logits/chosen": -0.10538250207901001, "logits/rejected": -0.1413673311471939, "logps/chosen": -0.41122564673423767, "logps/rejected": -0.5390647053718567, "loss": 1.2124, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.41122564673423767, "rewards/margins": 0.12783899903297424, "rewards/rejected": -0.5390647053718567, "step": 150 }, { "epoch": 0.33132932531730125, "grad_norm": 6.063903579743197, "learning_rate": 8.455313244934324e-07, "logits/chosen": -0.11919336020946503, "logits/rejected": -0.1652189940214157, "logps/chosen": -0.4010487496852875, "logps/rejected": -0.5092573165893555, "loss": 1.2134, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -0.4010487496852875, "rewards/margins": 0.1082085520029068, "rewards/rejected": -0.5092573165893555, "step": 155 }, { "epoch": 0.3420173680694723, "grad_norm": 9.279674751992077, "learning_rate": 8.317766145051057e-07, "logits/chosen": -0.08387523144483566, "logits/rejected": -0.14803090691566467, "logps/chosen": -0.43099141120910645, "logps/rejected": -0.5257623791694641, "loss": 1.2144, "rewards/accuracies": 0.65625, "rewards/chosen": -0.43099141120910645, "rewards/margins": 0.09477093070745468, "rewards/rejected": -0.5257623791694641, "step": 160 }, { "epoch": 0.3527054108216433, "grad_norm": 8.059439047004146, "learning_rate": 8.175578849210894e-07, "logits/chosen": -0.16660968959331512, "logits/rejected": -0.18010763823986053, "logps/chosen": -0.4078282415866852, "logps/rejected": -0.5577529668807983, "loss": 1.201, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4078282415866852, "rewards/margins": 0.14992472529411316, "rewards/rejected": -0.5577529668807983, "step": 165 }, { "epoch": 0.3633934535738143, "grad_norm": 8.63786807542291, "learning_rate": 8.028950219204099e-07, "logits/chosen": -0.052240192890167236, "logits/rejected": -0.06318216025829315, "logps/chosen": -0.4152770936489105, "logps/rejected": -0.6054114103317261, "loss": 1.203, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4152770936489105, "rewards/margins": 0.19013427197933197, "rewards/rejected": -0.6054114103317261, "step": 170 }, { "epoch": 0.3740814963259853, "grad_norm": 10.518951318209595, "learning_rate": 7.878085328428368e-07, "logits/chosen": -0.11517999321222305, "logits/rejected": -0.1849624663591385, "logps/chosen": -0.40625524520874023, "logps/rejected": -0.5451288819313049, "loss": 1.212, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -0.40625524520874023, "rewards/margins": 0.1388736218214035, "rewards/rejected": -0.5451288819313049, "step": 175 }, { "epoch": 0.3847695390781563, "grad_norm": 8.417946401352186, "learning_rate": 7.723195175075135e-07, "logits/chosen": -0.06935660541057587, "logits/rejected": -0.1301901787519455, "logps/chosen": -0.4220534861087799, "logps/rejected": -0.5573703050613403, "loss": 1.2083, "rewards/accuracies": 0.78125, "rewards/chosen": -0.4220534861087799, "rewards/margins": 0.13531681895256042, "rewards/rejected": -0.5573703050613403, "step": 180 }, { "epoch": 0.3954575818303273, "grad_norm": 7.995331136842524, "learning_rate": 7.564496387029531e-07, "logits/chosen": -0.08758817613124847, "logits/rejected": -0.14539772272109985, "logps/chosen": -0.4507700502872467, "logps/rejected": -0.5500799417495728, "loss": 1.1962, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -0.4507700502872467, "rewards/margins": 0.09930990636348724, "rewards/rejected": -0.5500799417495728, "step": 185 }, { "epoch": 0.4061456245824983, "grad_norm": 6.542669653809171, "learning_rate": 7.402210918896689e-07, "logits/chosen": -0.0885528177022934, "logits/rejected": -0.1532018929719925, "logps/chosen": -0.39389023184776306, "logps/rejected": -0.5325407981872559, "loss": 1.2053, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.39389023184776306, "rewards/margins": 0.13865062594413757, "rewards/rejected": -0.5325407981872559, "step": 190 }, { "epoch": 0.4168336673346693, "grad_norm": 8.450137364374076, "learning_rate": 7.236565741578162e-07, "logits/chosen": -0.07352186739444733, "logits/rejected": -0.15036916732788086, "logps/chosen": -0.42540302872657776, "logps/rejected": -0.5537828207015991, "loss": 1.2084, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.42540302872657776, "rewards/margins": 0.12837986648082733, "rewards/rejected": -0.5537828207015991, "step": 195 }, { "epoch": 0.42752171008684037, "grad_norm": 8.694643710416935, "learning_rate": 7.067792524832603e-07, "logits/chosen": -0.09027515351772308, "logits/rejected": -0.1412956416606903, "logps/chosen": -0.40201014280319214, "logps/rejected": -0.5238697528839111, "loss": 1.1957, "rewards/accuracies": 0.706250011920929, "rewards/chosen": -0.40201014280319214, "rewards/margins": 0.12185963243246078, "rewards/rejected": -0.5238697528839111, "step": 200 }, { "epoch": 0.43820975283901137, "grad_norm": 8.952421817833013, "learning_rate": 6.896127313264642e-07, "logits/chosen": -0.028889209032058716, "logits/rejected": -0.11139396578073502, "logps/chosen": -0.41819238662719727, "logps/rejected": -0.5573530197143555, "loss": 1.192, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.41819238662719727, "rewards/margins": 0.13916069269180298, "rewards/rejected": -0.5573530197143555, "step": 205 }, { "epoch": 0.44889779559118237, "grad_norm": 8.187027602318263, "learning_rate": 6.721810196195174e-07, "logits/chosen": -0.21568699181079865, "logits/rejected": -0.21867302060127258, "logps/chosen": -0.43785786628723145, "logps/rejected": -0.5911111235618591, "loss": 1.185, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.43785786628723145, "rewards/margins": 0.1532532274723053, "rewards/rejected": -0.5911111235618591, "step": 210 }, { "epoch": 0.45958583834335337, "grad_norm": 5.9916116804065584, "learning_rate": 6.545084971874736e-07, "logits/chosen": -0.12575657665729523, "logits/rejected": -0.1455686092376709, "logps/chosen": -0.4271029531955719, "logps/rejected": -0.5920487642288208, "loss": 1.1742, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4271029531955719, "rewards/margins": 0.1649458110332489, "rewards/rejected": -0.5920487642288208, "step": 215 }, { "epoch": 0.47027388109552437, "grad_norm": 10.450264869504016, "learning_rate": 6.3661988065096e-07, "logits/chosen": -0.18063326179981232, "logits/rejected": -0.23072651028633118, "logps/chosen": -0.44927778840065, "logps/rejected": -0.6041940450668335, "loss": 1.1923, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.44927778840065, "rewards/margins": 0.1549161970615387, "rewards/rejected": -0.6041940450668335, "step": 220 }, { "epoch": 0.48096192384769537, "grad_norm": 9.723089980517251, "learning_rate": 6.185401888577487e-07, "logits/chosen": -0.17207232117652893, "logits/rejected": -0.20382137596607208, "logps/chosen": -0.4381163716316223, "logps/rejected": -0.604756236076355, "loss": 1.192, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.4381163716316223, "rewards/margins": 0.16663983464241028, "rewards/rejected": -0.604756236076355, "step": 225 }, { "epoch": 0.4916499665998664, "grad_norm": 9.054770077776112, "learning_rate": 6.002947078916364e-07, "logits/chosen": -0.14859004318714142, "logits/rejected": -0.15977120399475098, "logps/chosen": -0.4646037220954895, "logps/rejected": -0.6633389592170715, "loss": 1.1958, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4646037220954895, "rewards/margins": 0.198735311627388, "rewards/rejected": -0.6633389592170715, "step": 230 }, { "epoch": 0.5023380093520374, "grad_norm": 17.895526950620496, "learning_rate": 5.819089557075688e-07, "logits/chosen": -0.12733140587806702, "logits/rejected": -0.18139609694480896, "logps/chosen": -0.4529820382595062, "logps/rejected": -0.592745304107666, "loss": 1.1902, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4529820382595062, "rewards/margins": 0.1397632509469986, "rewards/rejected": -0.592745304107666, "step": 235 }, { "epoch": 0.5130260521042084, "grad_norm": 9.140622097956182, "learning_rate": 5.634086464424742e-07, "logits/chosen": -0.12596455216407776, "logits/rejected": -0.18648605048656464, "logps/chosen": -0.4592694640159607, "logps/rejected": -0.6695916056632996, "loss": 1.1965, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4592694640159607, "rewards/margins": 0.21032221615314484, "rewards/rejected": -0.6695916056632996, "step": 240 }, { "epoch": 0.5237140948563794, "grad_norm": 7.85051968813952, "learning_rate": 5.448196544517167e-07, "logits/chosen": -0.15113191306591034, "logits/rejected": -0.20874838531017303, "logps/chosen": -0.448641836643219, "logps/rejected": -0.6651071906089783, "loss": 1.2037, "rewards/accuracies": 0.75, "rewards/chosen": -0.448641836643219, "rewards/margins": 0.21646539866924286, "rewards/rejected": -0.6651071906089783, "step": 245 }, { "epoch": 0.5344021376085505, "grad_norm": 8.655989549308861, "learning_rate": 5.26167978121472e-07, "logits/chosen": -0.10786600410938263, "logits/rejected": -0.13217635452747345, "logps/chosen": -0.42935776710510254, "logps/rejected": -0.6120445132255554, "loss": 1.1759, "rewards/accuracies": 0.75, "rewards/chosen": -0.42935776710510254, "rewards/margins": 0.1826867312192917, "rewards/rejected": -0.6120445132255554, "step": 250 }, { "epoch": 0.5450901803607214, "grad_norm": 9.048614473634702, "learning_rate": 5.074797035076318e-07, "logits/chosen": -0.18685856461524963, "logits/rejected": -0.2535242736339569, "logps/chosen": -0.441723495721817, "logps/rejected": -0.6506599187850952, "loss": 1.1816, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.441723495721817, "rewards/margins": 0.2089364230632782, "rewards/rejected": -0.6506599187850952, "step": 255 }, { "epoch": 0.5557782231128925, "grad_norm": 7.6389154059915105, "learning_rate": 4.887809678520975e-07, "logits/chosen": -0.19917742908000946, "logits/rejected": -0.27246275544166565, "logps/chosen": -0.46937423944473267, "logps/rejected": -0.6870771646499634, "loss": 1.1951, "rewards/accuracies": 0.78125, "rewards/chosen": -0.46937423944473267, "rewards/margins": 0.21770286560058594, "rewards/rejected": -0.6870771646499634, "step": 260 }, { "epoch": 0.5664662658650634, "grad_norm": 9.10073399455548, "learning_rate": 4.700979230274829e-07, "logits/chosen": -0.11132203042507172, "logits/rejected": -0.09603560715913773, "logps/chosen": -0.44644594192504883, "logps/rejected": -0.6944222450256348, "loss": 1.1747, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.44644594192504883, "rewards/margins": 0.24797634780406952, "rewards/rejected": -0.6944222450256348, "step": 265 }, { "epoch": 0.5771543086172345, "grad_norm": 8.916003107478165, "learning_rate": 4.514566989613559e-07, "logits/chosen": -0.15380506217479706, "logits/rejected": -0.19326263666152954, "logps/chosen": -0.446754515171051, "logps/rejected": -0.6446987390518188, "loss": 1.1864, "rewards/accuracies": 0.78125, "rewards/chosen": -0.446754515171051, "rewards/margins": 0.19794420897960663, "rewards/rejected": -0.6446987390518188, "step": 270 }, { "epoch": 0.5878423513694054, "grad_norm": 10.314754743141027, "learning_rate": 4.328833670911724e-07, "logits/chosen": -0.15855953097343445, "logits/rejected": -0.20561406016349792, "logps/chosen": -0.4342300295829773, "logps/rejected": -0.6545408964157104, "loss": 1.1667, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.4342300295829773, "rewards/margins": 0.22031080722808838, "rewards/rejected": -0.6545408964157104, "step": 275 }, { "epoch": 0.5985303941215765, "grad_norm": 9.8305630967928, "learning_rate": 4.144039039010124e-07, "logits/chosen": -0.12788251042366028, "logits/rejected": -0.127780020236969, "logps/chosen": -0.43477168679237366, "logps/rejected": -0.7111866474151611, "loss": 1.1802, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.43477168679237366, "rewards/margins": 0.27641505002975464, "rewards/rejected": -0.7111866474151611, "step": 280 }, { "epoch": 0.6092184368737475, "grad_norm": 8.860790080933853, "learning_rate": 3.960441545911204e-07, "logits/chosen": -0.161110520362854, "logits/rejected": -0.17115116119384766, "logps/chosen": -0.42666491866111755, "logps/rejected": -0.6175069808959961, "loss": 1.1752, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.42666491866111755, "rewards/margins": 0.19084201753139496, "rewards/rejected": -0.6175069808959961, "step": 285 }, { "epoch": 0.6199064796259185, "grad_norm": 7.06233886841465, "learning_rate": 3.778297969310529e-07, "logits/chosen": -0.1376260668039322, "logits/rejected": -0.16905562579631805, "logps/chosen": -0.4378681182861328, "logps/rejected": -0.6595510840415955, "loss": 1.1705, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4378681182861328, "rewards/margins": 0.22168295085430145, "rewards/rejected": -0.6595510840415955, "step": 290 }, { "epoch": 0.6305945223780896, "grad_norm": 10.466639287529468, "learning_rate": 3.5978630534699865e-07, "logits/chosen": -0.2025509625673294, "logits/rejected": -0.24337442219257355, "logps/chosen": -0.42942291498184204, "logps/rejected": -0.618620753288269, "loss": 1.1967, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.42942291498184204, "rewards/margins": 0.18919780850410461, "rewards/rejected": -0.618620753288269, "step": 295 }, { "epoch": 0.6412825651302605, "grad_norm": 8.130739148831358, "learning_rate": 3.4193891529348795e-07, "logits/chosen": -0.16753128170967102, "logits/rejected": -0.21636962890625, "logps/chosen": -0.4428304135799408, "logps/rejected": -0.6359456777572632, "loss": 1.1789, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4428304135799408, "rewards/margins": 0.19311529397964478, "rewards/rejected": -0.6359456777572632, "step": 300 }, { "epoch": 0.6519706078824316, "grad_norm": 7.273224925160923, "learning_rate": 3.243125879593286e-07, "logits/chosen": -0.15392135083675385, "logits/rejected": -0.21642914414405823, "logps/chosen": -0.47176113724708557, "logps/rejected": -0.6521760821342468, "loss": 1.1688, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.47176113724708557, "rewards/margins": 0.18041494488716125, "rewards/rejected": -0.6521760821342468, "step": 305 }, { "epoch": 0.6626586506346025, "grad_norm": 7.301685407436071, "learning_rate": 3.069319753571269e-07, "logits/chosen": -0.08738047629594803, "logits/rejected": -0.17941518127918243, "logps/chosen": -0.4444531500339508, "logps/rejected": -0.6384583711624146, "loss": 1.1782, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4444531500339508, "rewards/margins": 0.19400522112846375, "rewards/rejected": -0.6384583711624146, "step": 310 }, { "epoch": 0.6733466933867736, "grad_norm": 8.792927929961811, "learning_rate": 2.898213858452173e-07, "logits/chosen": -0.12821796536445618, "logits/rejected": -0.17637769877910614, "logps/chosen": -0.430549293756485, "logps/rejected": -0.673394501209259, "loss": 1.1838, "rewards/accuracies": 0.78125, "rewards/chosen": -0.430549293756485, "rewards/margins": 0.24284520745277405, "rewards/rejected": -0.673394501209259, "step": 315 }, { "epoch": 0.6840347361389446, "grad_norm": 9.135320352888758, "learning_rate": 2.730047501302266e-07, "logits/chosen": -0.1663762778043747, "logits/rejected": -0.1760939061641693, "logps/chosen": -0.44656792283058167, "logps/rejected": -0.696045994758606, "loss": 1.1597, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -0.44656792283058167, "rewards/margins": 0.2494780570268631, "rewards/rejected": -0.696045994758606, "step": 320 }, { "epoch": 0.6947227788911156, "grad_norm": 7.683969138295821, "learning_rate": 2.5650558779781635e-07, "logits/chosen": -0.10967272520065308, "logits/rejected": -0.16051502525806427, "logps/chosen": -0.4491657614707947, "logps/rejected": -0.6456891298294067, "loss": 1.1768, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4491657614707947, "rewards/margins": 0.1965232938528061, "rewards/rejected": -0.6456891298294067, "step": 325 }, { "epoch": 0.7054108216432866, "grad_norm": 7.237560221559409, "learning_rate": 2.403469744184154e-07, "logits/chosen": -0.11157947778701782, "logits/rejected": -0.1505707949399948, "logps/chosen": -0.4335803985595703, "logps/rejected": -0.6525880694389343, "loss": 1.1697, "rewards/accuracies": 0.78125, "rewards/chosen": -0.4335803985595703, "rewards/margins": 0.2190077304840088, "rewards/rejected": -0.6525880694389343, "step": 330 }, { "epoch": 0.7160988643954576, "grad_norm": 10.892277516023302, "learning_rate": 2.2455150927394878e-07, "logits/chosen": -0.07384945452213287, "logits/rejected": -0.11905944347381592, "logps/chosen": -0.44785404205322266, "logps/rejected": -0.6679562926292419, "loss": 1.165, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.44785404205322266, "rewards/margins": 0.22010228037834167, "rewards/rejected": -0.6679562926292419, "step": 335 }, { "epoch": 0.7267869071476286, "grad_norm": 10.223464451109537, "learning_rate": 2.0914128375069722e-07, "logits/chosen": -0.1539086103439331, "logits/rejected": -0.17600053548812866, "logps/chosen": -0.4725138545036316, "logps/rejected": -0.6988605260848999, "loss": 1.1726, "rewards/accuracies": 0.75, "rewards/chosen": -0.4725138545036316, "rewards/margins": 0.2263466864824295, "rewards/rejected": -0.6988605260848999, "step": 340 }, { "epoch": 0.7374749498997996, "grad_norm": 9.502978098916548, "learning_rate": 1.9413785044249676e-07, "logits/chosen": -0.21818223595619202, "logits/rejected": -0.291820764541626, "logps/chosen": -0.4557631015777588, "logps/rejected": -0.6338008642196655, "loss": 1.1847, "rewards/accuracies": 0.75, "rewards/chosen": -0.4557631015777588, "rewards/margins": 0.17803780734539032, "rewards/rejected": -0.6338008642196655, "step": 345 }, { "epoch": 0.7481629926519706, "grad_norm": 8.906197274685542, "learning_rate": 1.7956219300748792e-07, "logits/chosen": -0.16545064747333527, "logits/rejected": -0.2143837958574295, "logps/chosen": -0.485832542181015, "logps/rejected": -0.6850191950798035, "loss": 1.1775, "rewards/accuracies": 0.78125, "rewards/chosen": -0.485832542181015, "rewards/margins": 0.19918662309646606, "rewards/rejected": -0.6850191950798035, "step": 350 }, { "epoch": 0.7588510354041417, "grad_norm": 9.223968907693207, "learning_rate": 1.6543469682057104e-07, "logits/chosen": -0.15154987573623657, "logits/rejected": -0.1329428106546402, "logps/chosen": -0.43399348855018616, "logps/rejected": -0.701296865940094, "loss": 1.1559, "rewards/accuracies": 0.856249988079071, "rewards/chosen": -0.43399348855018616, "rewards/margins": 0.26730337738990784, "rewards/rejected": -0.701296865940094, "step": 355 }, { "epoch": 0.7695390781563126, "grad_norm": 8.817413940297333, "learning_rate": 1.5177512046261666e-07, "logits/chosen": -0.1650848090648651, "logits/rejected": -0.18241888284683228, "logps/chosen": -0.4807559847831726, "logps/rejected": -0.7194213271141052, "loss": 1.1709, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4807559847831726, "rewards/margins": 0.2386653870344162, "rewards/rejected": -0.7194213271141052, "step": 360 }, { "epoch": 0.7802271209084837, "grad_norm": 9.841178407980468, "learning_rate": 1.3860256808630427e-07, "logits/chosen": -0.11777649074792862, "logits/rejected": -0.1842351108789444, "logps/chosen": -0.44159936904907227, "logps/rejected": -0.6455782651901245, "loss": 1.1759, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.44159936904907227, "rewards/margins": 0.20397885143756866, "rewards/rejected": -0.6455782651901245, "step": 365 }, { "epoch": 0.7909151636606546, "grad_norm": 7.821082078374487, "learning_rate": 1.2593546269723647e-07, "logits/chosen": -0.21091553568840027, "logits/rejected": -0.23493704199790955, "logps/chosen": -0.47468656301498413, "logps/rejected": -0.6395518779754639, "loss": 1.1857, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.47468656301498413, "rewards/margins": 0.16486527025699615, "rewards/rejected": -0.6395518779754639, "step": 370 }, { "epoch": 0.8016032064128257, "grad_norm": 8.045168365501775, "learning_rate": 1.1379152038770029e-07, "logits/chosen": -0.13924507796764374, "logits/rejected": -0.159819096326828, "logps/chosen": -0.4886155128479004, "logps/rejected": -0.7014551758766174, "loss": 1.1647, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.4886155128479004, "rewards/margins": 0.21283963322639465, "rewards/rejected": -0.7014551758766174, "step": 375 }, { "epoch": 0.8122912491649966, "grad_norm": 6.9748624620101465, "learning_rate": 1.0218772555910954e-07, "logits/chosen": -0.11520252376794815, "logits/rejected": -0.1667747050523758, "logps/chosen": -0.4690398573875427, "logps/rejected": -0.6327452659606934, "loss": 1.174, "rewards/accuracies": 0.71875, "rewards/chosen": -0.4690398573875427, "rewards/margins": 0.16370537877082825, "rewards/rejected": -0.6327452659606934, "step": 380 }, { "epoch": 0.8229792919171677, "grad_norm": 9.914022835602436, "learning_rate": 9.114030716778432e-08, "logits/chosen": -0.10868623107671738, "logits/rejected": -0.08764289319515228, "logps/chosen": -0.46836796402931213, "logps/rejected": -0.7122930288314819, "loss": 1.1774, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.46836796402931213, "rewards/margins": 0.2439250648021698, "rewards/rejected": -0.7122930288314819, "step": 385 }, { "epoch": 0.8336673346693386, "grad_norm": 8.145036242622055, "learning_rate": 8.066471602728803e-08, "logits/chosen": -0.06981998682022095, "logits/rejected": -0.1801852583885193, "logps/chosen": -0.448079913854599, "logps/rejected": -0.650068461894989, "loss": 1.1708, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.448079913854599, "rewards/margins": 0.20198853313922882, "rewards/rejected": -0.650068461894989, "step": 390 }, { "epoch": 0.8443553774215097, "grad_norm": 9.222740510393672, "learning_rate": 7.077560319906694e-08, "logits/chosen": -0.13624027371406555, "logits/rejected": -0.1553780883550644, "logps/chosen": -0.4544126093387604, "logps/rejected": -0.6581717133522034, "loss": 1.157, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -0.4544126093387604, "rewards/margins": 0.20375914871692657, "rewards/rejected": -0.6581717133522034, "step": 395 }, { "epoch": 0.8550434201736807, "grad_norm": 7.23560205761774, "learning_rate": 6.148679950161672e-08, "logits/chosen": -0.1308642476797104, "logits/rejected": -0.12576356530189514, "logps/chosen": -0.4730139374732971, "logps/rejected": -0.6597884893417358, "loss": 1.1796, "rewards/accuracies": 0.71875, "rewards/chosen": -0.4730139374732971, "rewards/margins": 0.18677455186843872, "rewards/rejected": -0.6597884893417358, "step": 400 }, { "epoch": 0.8550434201736807, "eval_logits/chosen": 0.07805733382701874, "eval_logits/rejected": 0.055379413068294525, "eval_logps/chosen": -0.46297329664230347, "eval_logps/rejected": -0.67759770154953, "eval_loss": 1.1742559671401978, "eval_rewards/accuracies": 0.7682926654815674, "eval_rewards/chosen": -0.46297329664230347, "eval_rewards/margins": 0.21462443470954895, "eval_rewards/rejected": -0.67759770154953, "eval_runtime": 422.9, "eval_samples_per_second": 4.637, "eval_steps_per_second": 0.291, "step": 400 }, { "epoch": 0.8657314629258517, "grad_norm": 14.062888091631285, "learning_rate": 5.2811296166831666e-08, "logits/chosen": -0.14197275042533875, "logits/rejected": -0.19198641180992126, "logps/chosen": -0.4838024973869324, "logps/rejected": -0.6774718165397644, "loss": 1.1821, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.4838024973869324, "rewards/margins": 0.19366928935050964, "rewards/rejected": -0.6774718165397644, "step": 405 }, { "epoch": 0.8764195056780227, "grad_norm": 9.708416047557408, "learning_rate": 4.4761226670592066e-08, "logits/chosen": -0.19223374128341675, "logits/rejected": -0.2349742203950882, "logps/chosen": -0.4497530460357666, "logps/rejected": -0.6498802900314331, "loss": 1.1835, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4497530460357666, "rewards/margins": 0.20012721419334412, "rewards/rejected": -0.6498802900314331, "step": 410 }, { "epoch": 0.8871075484301937, "grad_norm": 10.370830255142504, "learning_rate": 3.734784976300165e-08, "logits/chosen": -0.0883495882153511, "logits/rejected": -0.15084786713123322, "logps/chosen": -0.4854651093482971, "logps/rejected": -0.7252976298332214, "loss": 1.1824, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4854651093482971, "rewards/margins": 0.23983249068260193, "rewards/rejected": -0.7252976298332214, "step": 415 }, { "epoch": 0.8977955911823647, "grad_norm": 10.050934008810426, "learning_rate": 3.058153372200695e-08, "logits/chosen": -0.11994824558496475, "logits/rejected": -0.1958351880311966, "logps/chosen": -0.4402541220188141, "logps/rejected": -0.6553457379341125, "loss": 1.169, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4402541220188141, "rewards/margins": 0.21509161591529846, "rewards/rejected": -0.6553457379341125, "step": 420 }, { "epoch": 0.9084836339345357, "grad_norm": 10.234554818131295, "learning_rate": 2.4471741852423233e-08, "logits/chosen": -0.023924821987748146, "logits/rejected": -0.06178613379597664, "logps/chosen": -0.4181637763977051, "logps/rejected": -0.6148607134819031, "loss": 1.1693, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4181637763977051, "rewards/margins": 0.1966969072818756, "rewards/rejected": -0.6148607134819031, "step": 425 }, { "epoch": 0.9191716766867067, "grad_norm": 7.946863491070272, "learning_rate": 1.9027019250647036e-08, "logits/chosen": -0.06870210915803909, "logits/rejected": -0.1558951586484909, "logps/chosen": -0.4686119556427002, "logps/rejected": -0.6619764566421509, "loss": 1.1734, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.4686119556427002, "rewards/margins": 0.19336441159248352, "rewards/rejected": -0.6619764566421509, "step": 430 }, { "epoch": 0.9298597194388778, "grad_norm": 7.127358424532503, "learning_rate": 1.4254980853566246e-08, "logits/chosen": -0.16497072577476501, "logits/rejected": -0.2038412094116211, "logps/chosen": -0.4426957666873932, "logps/rejected": -0.6900163888931274, "loss": 1.1724, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4426957666873932, "rewards/margins": 0.2473207414150238, "rewards/rejected": -0.6900163888931274, "step": 435 }, { "epoch": 0.9405477621910487, "grad_norm": 8.700791903961722, "learning_rate": 1.016230078838226e-08, "logits/chosen": -0.16181275248527527, "logits/rejected": -0.19335032999515533, "logps/chosen": -0.4476253092288971, "logps/rejected": -0.6875879168510437, "loss": 1.1756, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -0.4476253092288971, "rewards/margins": 0.2399626225233078, "rewards/rejected": -0.6875879168510437, "step": 440 }, { "epoch": 0.9512358049432198, "grad_norm": 7.814249452801736, "learning_rate": 6.754703038239329e-09, "logits/chosen": -0.1357225775718689, "logits/rejected": -0.13643740117549896, "logps/chosen": -0.44572919607162476, "logps/rejected": -0.6457980871200562, "loss": 1.168, "rewards/accuracies": 0.75, "rewards/chosen": -0.44572919607162476, "rewards/margins": 0.20006892085075378, "rewards/rejected": -0.6457980871200562, "step": 445 }, { "epoch": 0.9619238476953907, "grad_norm": 7.707995425556612, "learning_rate": 4.036953436716895e-09, "logits/chosen": -0.09782582521438599, "logits/rejected": -0.12389625608921051, "logps/chosen": -0.5031002759933472, "logps/rejected": -0.7217256426811218, "loss": 1.1607, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -0.5031002759933472, "rewards/margins": 0.21862535178661346, "rewards/rejected": -0.7217256426811218, "step": 450 }, { "epoch": 0.9726118904475618, "grad_norm": 8.180782374754285, "learning_rate": 2.0128530023804656e-09, "logits/chosen": -0.049927808344364166, "logits/rejected": -0.12839142978191376, "logps/chosen": -0.4278712272644043, "logps/rejected": -0.662420928478241, "loss": 1.1491, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.4278712272644043, "rewards/margins": 0.23454967141151428, "rewards/rejected": -0.662420928478241, "step": 455 }, { "epoch": 0.9832999331997327, "grad_norm": 9.706979128792979, "learning_rate": 6.852326227130833e-10, "logits/chosen": 0.007074593100696802, "logits/rejected": -0.09796188771724701, "logps/chosen": -0.4365665316581726, "logps/rejected": -0.6665615439414978, "loss": 1.183, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4365665316581726, "rewards/margins": 0.2299949824810028, "rewards/rejected": -0.6665615439414978, "step": 460 }, { "epoch": 0.9939879759519038, "grad_norm": 8.36305887438443, "learning_rate": 5.594909486328348e-11, "logits/chosen": -0.06538979709148407, "logits/rejected": -0.11664478480815887, "logps/chosen": -0.4604206681251526, "logps/rejected": -0.6352392435073853, "loss": 1.1796, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4604206681251526, "rewards/margins": 0.17481860518455505, "rewards/rejected": -0.6352392435073853, "step": 465 }, { "epoch": 0.9982631930527722, "step": 467, "total_flos": 0.0, "train_loss": 1.2022870587587866, "train_runtime": 20947.8363, "train_samples_per_second": 2.858, "train_steps_per_second": 0.022 } ], "logging_steps": 5, "max_steps": 467, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }