|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9964868029907215, |
|
"eval_steps": 800, |
|
"global_step": 2079, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0014413115935501305, |
|
"grad_norm": 21.287893295288086, |
|
"learning_rate": 4.807692307692308e-10, |
|
"logits/chosen": -2.3065450191497803, |
|
"logits/rejected": -2.3093364238739014, |
|
"logps/chosen": -43.837303161621094, |
|
"logps/rejected": -48.05693054199219, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.0625, |
|
"rewards/chosen": 9.900308214128017e-06, |
|
"rewards/margins": 0.0009647191036492586, |
|
"rewards/rejected": -0.0009548187954351306, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.014413115935501306, |
|
"grad_norm": 21.111068725585938, |
|
"learning_rate": 4.807692307692308e-09, |
|
"logits/chosen": -2.3277647495269775, |
|
"logits/rejected": -2.3011653423309326, |
|
"logps/chosen": -42.82107162475586, |
|
"logps/rejected": -44.892906188964844, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": -0.001543219666928053, |
|
"rewards/margins": 0.0007266975590027869, |
|
"rewards/rejected": -0.002269917167723179, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02882623187100261, |
|
"grad_norm": 19.57785987854004, |
|
"learning_rate": 9.615384615384615e-09, |
|
"logits/chosen": -2.288435459136963, |
|
"logits/rejected": -2.2758889198303223, |
|
"logps/chosen": -45.44641876220703, |
|
"logps/rejected": -48.17905044555664, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0001232023787451908, |
|
"rewards/margins": 0.0005566190229728818, |
|
"rewards/rejected": -0.00043341662967577577, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04323934780650392, |
|
"grad_norm": 26.79755401611328, |
|
"learning_rate": 1.442307692307692e-08, |
|
"logits/chosen": -2.3148436546325684, |
|
"logits/rejected": -2.3025364875793457, |
|
"logps/chosen": -46.84934616088867, |
|
"logps/rejected": -48.419532775878906, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.00020735207363031805, |
|
"rewards/margins": 0.0002374596951995045, |
|
"rewards/rejected": -0.0004448117106221616, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05765246374200522, |
|
"grad_norm": 22.711183547973633, |
|
"learning_rate": 1.923076923076923e-08, |
|
"logits/chosen": -2.347923517227173, |
|
"logits/rejected": -2.3385748863220215, |
|
"logps/chosen": -50.60676956176758, |
|
"logps/rejected": -52.752296447753906, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.00011574259406188503, |
|
"rewards/margins": 0.0005067865131422877, |
|
"rewards/rejected": -0.0006225291872397065, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07206557967750653, |
|
"grad_norm": 23.8702392578125, |
|
"learning_rate": 2.403846153846154e-08, |
|
"logits/chosen": -2.329652786254883, |
|
"logits/rejected": -2.322984218597412, |
|
"logps/chosen": -47.366519927978516, |
|
"logps/rejected": -49.952056884765625, |
|
"loss": 0.6938, |
|
"rewards/accuracies": 0.40937501192092896, |
|
"rewards/chosen": -0.00085345224943012, |
|
"rewards/margins": -0.0012199878692626953, |
|
"rewards/rejected": 0.0003665355616249144, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.08647869561300783, |
|
"grad_norm": 18.53515625, |
|
"learning_rate": 2.884615384615384e-08, |
|
"logits/chosen": -2.3065052032470703, |
|
"logits/rejected": -2.2884154319763184, |
|
"logps/chosen": -46.60871124267578, |
|
"logps/rejected": -48.89433670043945, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4781250059604645, |
|
"rewards/chosen": -0.0008969244663603604, |
|
"rewards/margins": -0.00045310668065212667, |
|
"rewards/rejected": -0.0004438180476427078, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10089181154850914, |
|
"grad_norm": 18.022958755493164, |
|
"learning_rate": 3.365384615384615e-08, |
|
"logits/chosen": -2.3429152965545654, |
|
"logits/rejected": -2.327995777130127, |
|
"logps/chosen": -47.61640167236328, |
|
"logps/rejected": -50.77202606201172, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.00031301408307626843, |
|
"rewards/margins": 0.000775355554651469, |
|
"rewards/rejected": -0.0010883695213124156, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.11530492748401044, |
|
"grad_norm": 18.332494735717773, |
|
"learning_rate": 3.846153846153846e-08, |
|
"logits/chosen": -2.3375937938690186, |
|
"logits/rejected": -2.311336040496826, |
|
"logps/chosen": -44.785614013671875, |
|
"logps/rejected": -48.0066032409668, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.00027114865952171385, |
|
"rewards/margins": -9.801401574804913e-06, |
|
"rewards/rejected": 0.00028095004381611943, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.12971804341951176, |
|
"grad_norm": 18.671464920043945, |
|
"learning_rate": 4.326923076923077e-08, |
|
"logits/chosen": -2.3199424743652344, |
|
"logits/rejected": -2.2938828468322754, |
|
"logps/chosen": -44.98554611206055, |
|
"logps/rejected": -46.84703826904297, |
|
"loss": 0.6941, |
|
"rewards/accuracies": 0.46562498807907104, |
|
"rewards/chosen": -0.0013123347889631987, |
|
"rewards/margins": -0.0016984030371531844, |
|
"rewards/rejected": 0.0003860682772938162, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14413115935501306, |
|
"grad_norm": 23.246063232421875, |
|
"learning_rate": 4.807692307692308e-08, |
|
"logits/chosen": -2.3865551948547363, |
|
"logits/rejected": -2.3804287910461426, |
|
"logps/chosen": -42.85012435913086, |
|
"logps/rejected": -46.24885177612305, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": 0.0006392289651557803, |
|
"rewards/margins": -0.00039641355397179723, |
|
"rewards/rejected": 0.0010356425773352385, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15854427529051437, |
|
"grad_norm": 19.473865509033203, |
|
"learning_rate": 5.288461538461538e-08, |
|
"logits/chosen": -2.3142473697662354, |
|
"logits/rejected": -2.3037219047546387, |
|
"logps/chosen": -45.20660400390625, |
|
"logps/rejected": -47.961936950683594, |
|
"loss": 0.6937, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": -0.0003208608250133693, |
|
"rewards/margins": -0.0010648202151060104, |
|
"rewards/rejected": 0.0007439593900926411, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17295739122601567, |
|
"grad_norm": 22.531965255737305, |
|
"learning_rate": 5.769230769230768e-08, |
|
"logits/chosen": -2.3439688682556152, |
|
"logits/rejected": -2.3307394981384277, |
|
"logps/chosen": -46.80133056640625, |
|
"logps/rejected": -49.72489547729492, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.453125, |
|
"rewards/chosen": 1.1269899914623238e-05, |
|
"rewards/margins": -0.00012028318451484665, |
|
"rewards/rejected": 0.0001315530389547348, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.18737050716151699, |
|
"grad_norm": 24.544965744018555, |
|
"learning_rate": 6.25e-08, |
|
"logits/chosen": -2.2930805683135986, |
|
"logits/rejected": -2.279456377029419, |
|
"logps/chosen": -49.83013916015625, |
|
"logps/rejected": -51.1182975769043, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.534375011920929, |
|
"rewards/chosen": 0.0026741260662674904, |
|
"rewards/margins": 0.0031126493122428656, |
|
"rewards/rejected": -0.00043852307135239244, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.20178362309701828, |
|
"grad_norm": 19.973854064941406, |
|
"learning_rate": 6.73076923076923e-08, |
|
"logits/chosen": -2.3479955196380615, |
|
"logits/rejected": -2.3185691833496094, |
|
"logps/chosen": -44.58715057373047, |
|
"logps/rejected": -46.512718200683594, |
|
"loss": 0.691, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0033654593862593174, |
|
"rewards/margins": 0.0043604555539786816, |
|
"rewards/rejected": -0.0009949964005500078, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2161967390325196, |
|
"grad_norm": 20.358179092407227, |
|
"learning_rate": 7.211538461538461e-08, |
|
"logits/chosen": -2.3313281536102295, |
|
"logits/rejected": -2.307065486907959, |
|
"logps/chosen": -46.340797424316406, |
|
"logps/rejected": -48.234344482421875, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.00332791730761528, |
|
"rewards/margins": 0.002700595883652568, |
|
"rewards/rejected": 0.0006273213075473905, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2306098549680209, |
|
"grad_norm": 20.67792510986328, |
|
"learning_rate": 7.692307692307692e-08, |
|
"logits/chosen": -2.305814266204834, |
|
"logits/rejected": -2.290213108062744, |
|
"logps/chosen": -47.53135681152344, |
|
"logps/rejected": -50.77891540527344, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.005107083357870579, |
|
"rewards/margins": 0.002246940741315484, |
|
"rewards/rejected": 0.002860142383724451, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2450229709035222, |
|
"grad_norm": 20.16474723815918, |
|
"learning_rate": 8.173076923076923e-08, |
|
"logits/chosen": -2.349069356918335, |
|
"logits/rejected": -2.331510066986084, |
|
"logps/chosen": -50.888572692871094, |
|
"logps/rejected": -50.80742645263672, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.515625, |
|
"rewards/chosen": 0.0035659130662679672, |
|
"rewards/margins": 0.0006433400558307767, |
|
"rewards/rejected": 0.0029225728940218687, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.2594360868390235, |
|
"grad_norm": 22.5780029296875, |
|
"learning_rate": 8.653846153846154e-08, |
|
"logits/chosen": -2.305760622024536, |
|
"logits/rejected": -2.28718638420105, |
|
"logps/chosen": -49.73812484741211, |
|
"logps/rejected": -52.55815887451172, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.00477216811850667, |
|
"rewards/margins": 0.0024364416021853685, |
|
"rewards/rejected": 0.002335726749151945, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2738492027745248, |
|
"grad_norm": 20.311643600463867, |
|
"learning_rate": 9.134615384615383e-08, |
|
"logits/chosen": -2.3253870010375977, |
|
"logits/rejected": -2.3114845752716064, |
|
"logps/chosen": -47.62921142578125, |
|
"logps/rejected": -50.506046295166016, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.503125011920929, |
|
"rewards/chosen": 0.004789062775671482, |
|
"rewards/margins": 0.0012888021301478148, |
|
"rewards/rejected": 0.003500260878354311, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.2882623187100261, |
|
"grad_norm": 22.185544967651367, |
|
"learning_rate": 9.615384615384616e-08, |
|
"logits/chosen": -2.351552963256836, |
|
"logits/rejected": -2.335294246673584, |
|
"logps/chosen": -46.49347686767578, |
|
"logps/rejected": -48.789695739746094, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.006301163230091333, |
|
"rewards/margins": 0.0041467128321528435, |
|
"rewards/rejected": 0.002154449699446559, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.30267543464552743, |
|
"grad_norm": 19.66486167907715, |
|
"learning_rate": 9.999971806320255e-08, |
|
"logits/chosen": -2.3723862171173096, |
|
"logits/rejected": -2.3726882934570312, |
|
"logps/chosen": -46.52350616455078, |
|
"logps/rejected": -49.28281021118164, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00785305630415678, |
|
"rewards/margins": 0.004859209060668945, |
|
"rewards/rejected": 0.0029938467778265476, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.31708855058102875, |
|
"grad_norm": 21.849241256713867, |
|
"learning_rate": 9.998985060913876e-08, |
|
"logits/chosen": -2.29600191116333, |
|
"logits/rejected": -2.280897855758667, |
|
"logps/chosen": -45.98343276977539, |
|
"logps/rejected": -47.80237579345703, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": 0.007365316152572632, |
|
"rewards/margins": 0.0048685320653021336, |
|
"rewards/rejected": 0.0024967845529317856, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33150166651653007, |
|
"grad_norm": 29.88494300842285, |
|
"learning_rate": 9.996588949457546e-08, |
|
"logits/chosen": -2.3516383171081543, |
|
"logits/rejected": -2.3211302757263184, |
|
"logps/chosen": -53.43247604370117, |
|
"logps/rejected": -53.46599197387695, |
|
"loss": 0.6896, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.009954456239938736, |
|
"rewards/margins": 0.007440419401973486, |
|
"rewards/rejected": 0.0025140370707958937, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.34591478245203133, |
|
"grad_norm": 21.79450798034668, |
|
"learning_rate": 9.992784147488017e-08, |
|
"logits/chosen": -2.3784842491149902, |
|
"logits/rejected": -2.3466084003448486, |
|
"logps/chosen": -47.536399841308594, |
|
"logps/rejected": -49.28679275512695, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.008210290223360062, |
|
"rewards/margins": 0.0038244971074163914, |
|
"rewards/rejected": 0.004385794512927532, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36032789838753265, |
|
"grad_norm": 19.872880935668945, |
|
"learning_rate": 9.987571727694775e-08, |
|
"logits/chosen": -2.3470616340637207, |
|
"logits/rejected": -2.322946548461914, |
|
"logps/chosen": -44.467933654785156, |
|
"logps/rejected": -47.677467346191406, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.565625011920929, |
|
"rewards/chosen": 0.01355994027107954, |
|
"rewards/margins": 0.008894408121705055, |
|
"rewards/rejected": 0.00466553308069706, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37474101432303397, |
|
"grad_norm": 25.680984497070312, |
|
"learning_rate": 9.98095315961762e-08, |
|
"logits/chosen": -2.3500027656555176, |
|
"logits/rejected": -2.3312158584594727, |
|
"logps/chosen": -49.896331787109375, |
|
"logps/rejected": -51.985511779785156, |
|
"loss": 0.6901, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.012083685956895351, |
|
"rewards/margins": 0.0064600324258208275, |
|
"rewards/rejected": 0.005623653531074524, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.3891541302585353, |
|
"grad_norm": 21.92185401916504, |
|
"learning_rate": 9.97293030923235e-08, |
|
"logits/chosen": -2.3450703620910645, |
|
"logits/rejected": -2.3156611919403076, |
|
"logps/chosen": -46.08183670043945, |
|
"logps/rejected": -47.64861297607422, |
|
"loss": 0.6882, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.01599208638072014, |
|
"rewards/margins": 0.010242667980492115, |
|
"rewards/rejected": 0.005749420262873173, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.40356724619403656, |
|
"grad_norm": 22.253570556640625, |
|
"learning_rate": 9.963505438424693e-08, |
|
"logits/chosen": -2.311044454574585, |
|
"logits/rejected": -2.294201612472534, |
|
"logps/chosen": -48.35077667236328, |
|
"logps/rejected": -49.112300872802734, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": 0.013459725305438042, |
|
"rewards/margins": 0.006622823420912027, |
|
"rewards/rejected": 0.006836901418864727, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.4179803621295379, |
|
"grad_norm": 20.787418365478516, |
|
"learning_rate": 9.952681204352607e-08, |
|
"logits/chosen": -2.333068609237671, |
|
"logits/rejected": -2.299546003341675, |
|
"logps/chosen": -47.46521759033203, |
|
"logps/rejected": -49.262413024902344, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.019271234050393105, |
|
"rewards/margins": 0.013373048976063728, |
|
"rewards/rejected": 0.005898186471313238, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4323934780650392, |
|
"grad_norm": 23.323768615722656, |
|
"learning_rate": 9.94046065869715e-08, |
|
"logits/chosen": -2.343045234680176, |
|
"logits/rejected": -2.330648899078369, |
|
"logps/chosen": -46.3621711730957, |
|
"logps/rejected": -51.10980987548828, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": 0.020211653783917427, |
|
"rewards/margins": 0.01878417655825615, |
|
"rewards/rejected": 0.0014274796703830361, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.4468065940005405, |
|
"grad_norm": 16.884517669677734, |
|
"learning_rate": 9.926847246802116e-08, |
|
"logits/chosen": -2.325950860977173, |
|
"logits/rejected": -2.298354148864746, |
|
"logps/chosen": -46.33721160888672, |
|
"logps/rejected": -47.747737884521484, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.0174238421022892, |
|
"rewards/margins": 0.011514835990965366, |
|
"rewards/rejected": 0.005909005645662546, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.4612197099360418, |
|
"grad_norm": 20.160341262817383, |
|
"learning_rate": 9.911844806702691e-08, |
|
"logits/chosen": -2.324944019317627, |
|
"logits/rejected": -2.314134120941162, |
|
"logps/chosen": -44.099708557128906, |
|
"logps/rejected": -47.27263641357422, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.023020442575216293, |
|
"rewards/margins": 0.019213002175092697, |
|
"rewards/rejected": 0.0038074390031397343, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.4756328258715431, |
|
"grad_norm": 23.56479263305664, |
|
"learning_rate": 9.895457568043387e-08, |
|
"logits/chosen": -2.349403142929077, |
|
"logits/rejected": -2.330629825592041, |
|
"logps/chosen": -46.192623138427734, |
|
"logps/rejected": -47.701568603515625, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.02301434613764286, |
|
"rewards/margins": 0.018842367455363274, |
|
"rewards/rejected": 0.004171978682279587, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.4900459418070444, |
|
"grad_norm": 26.761240005493164, |
|
"learning_rate": 9.877690150885587e-08, |
|
"logits/chosen": -2.29034423828125, |
|
"logits/rejected": -2.259958028793335, |
|
"logps/chosen": -49.6630859375, |
|
"logps/rejected": -50.368370056152344, |
|
"loss": 0.6842, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.02521464228630066, |
|
"rewards/margins": 0.018750475719571114, |
|
"rewards/rejected": 0.006464164704084396, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5044590577425457, |
|
"grad_norm": 17.43931770324707, |
|
"learning_rate": 9.858547564404998e-08, |
|
"logits/chosen": -2.3370158672332764, |
|
"logits/rejected": -2.314396381378174, |
|
"logps/chosen": -47.03008270263672, |
|
"logps/rejected": -49.929176330566406, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.02510705217719078, |
|
"rewards/margins": 0.020147310569882393, |
|
"rewards/rejected": 0.004959740675985813, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.518872173678047, |
|
"grad_norm": 21.918987274169922, |
|
"learning_rate": 9.838035205479418e-08, |
|
"logits/chosen": -2.302408218383789, |
|
"logits/rejected": -2.283946990966797, |
|
"logps/chosen": -45.20514678955078, |
|
"logps/rejected": -47.9190559387207, |
|
"loss": 0.6832, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.025177201256155968, |
|
"rewards/margins": 0.021118884906172752, |
|
"rewards/rejected": 0.004058316815644503, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5332852896135484, |
|
"grad_norm": 24.303964614868164, |
|
"learning_rate": 9.816158857167196e-08, |
|
"logits/chosen": -2.3255884647369385, |
|
"logits/rejected": -2.3051230907440186, |
|
"logps/chosen": -47.729164123535156, |
|
"logps/rejected": -48.6644401550293, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.02227911166846752, |
|
"rewards/margins": 0.016225317493081093, |
|
"rewards/rejected": 0.006053795572370291, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.5476984055490496, |
|
"grad_norm": 24.593324661254883, |
|
"learning_rate": 9.7929246870768e-08, |
|
"logits/chosen": -2.323387384414673, |
|
"logits/rejected": -2.308987855911255, |
|
"logps/chosen": -47.08444595336914, |
|
"logps/rejected": -49.36346435546875, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.028125789016485214, |
|
"rewards/margins": 0.02563219703733921, |
|
"rewards/rejected": 0.0024935940746217966, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.5621115214845509, |
|
"grad_norm": 21.75504493713379, |
|
"learning_rate": 9.768339245627993e-08, |
|
"logits/chosen": -2.294553518295288, |
|
"logits/rejected": -2.2829360961914062, |
|
"logps/chosen": -45.68154525756836, |
|
"logps/rejected": -48.066612243652344, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.030594781041145325, |
|
"rewards/margins": 0.02912401221692562, |
|
"rewards/rejected": 0.0014707675436511636, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.5765246374200522, |
|
"grad_norm": 21.862024307250977, |
|
"learning_rate": 9.742409464205059e-08, |
|
"logits/chosen": -2.3316078186035156, |
|
"logits/rejected": -2.3144497871398926, |
|
"logps/chosen": -48.248443603515625, |
|
"logps/rejected": -50.65959548950195, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.032895296812057495, |
|
"rewards/margins": 0.026586730033159256, |
|
"rewards/rejected": 0.006308571435511112, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.5909377533555535, |
|
"grad_norm": 20.746496200561523, |
|
"learning_rate": 9.715142653202644e-08, |
|
"logits/chosen": -2.3111040592193604, |
|
"logits/rejected": -2.293923854827881, |
|
"logps/chosen": -45.711585998535156, |
|
"logps/rejected": -47.74725341796875, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.029646728187799454, |
|
"rewards/margins": 0.028508460149168968, |
|
"rewards/rejected": 0.001138267107307911, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6053508692910549, |
|
"grad_norm": 17.426355361938477, |
|
"learning_rate": 9.68654649996473e-08, |
|
"logits/chosen": -2.32612681388855, |
|
"logits/rejected": -2.314548969268799, |
|
"logps/chosen": -44.34481430053711, |
|
"logps/rejected": -46.076744079589844, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.03846784681081772, |
|
"rewards/margins": 0.0339292511343956, |
|
"rewards/rejected": 0.004538598004728556, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6197639852265562, |
|
"grad_norm": 22.193706512451172, |
|
"learning_rate": 9.656629066617335e-08, |
|
"logits/chosen": -2.312929630279541, |
|
"logits/rejected": -2.2936108112335205, |
|
"logps/chosen": -51.8912467956543, |
|
"logps/rejected": -53.661651611328125, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.034915387630462646, |
|
"rewards/margins": 0.0364970937371254, |
|
"rewards/rejected": -0.001581709599122405, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.6341771011620575, |
|
"grad_norm": 20.207712173461914, |
|
"learning_rate": 9.62539878779556e-08, |
|
"logits/chosen": -2.3140978813171387, |
|
"logits/rejected": -2.3001463413238525, |
|
"logps/chosen": -46.92143630981445, |
|
"logps/rejected": -49.142173767089844, |
|
"loss": 0.6789, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.0315568745136261, |
|
"rewards/margins": 0.03146868199110031, |
|
"rewards/rejected": 8.819000504445285e-05, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.6485902170975588, |
|
"grad_norm": 26.56516456604004, |
|
"learning_rate": 9.592864468265604e-08, |
|
"logits/chosen": -2.3407697677612305, |
|
"logits/rejected": -2.3281524181365967, |
|
"logps/chosen": -48.421573638916016, |
|
"logps/rejected": -50.97784423828125, |
|
"loss": 0.6779, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": 0.033747684210538864, |
|
"rewards/margins": 0.03344957157969475, |
|
"rewards/rejected": 0.0002981135621666908, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.6630033330330601, |
|
"grad_norm": 23.340192794799805, |
|
"learning_rate": 9.559035280442441e-08, |
|
"logits/chosen": -2.299325942993164, |
|
"logits/rejected": -2.2764008045196533, |
|
"logps/chosen": -46.117820739746094, |
|
"logps/rejected": -47.99044418334961, |
|
"loss": 0.6785, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.03373030200600624, |
|
"rewards/margins": 0.03273017704486847, |
|
"rewards/rejected": 0.0010001230984926224, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.6774164489685613, |
|
"grad_norm": 20.64352798461914, |
|
"learning_rate": 9.523920761803823e-08, |
|
"logits/chosen": -2.3671813011169434, |
|
"logits/rejected": -2.346903085708618, |
|
"logps/chosen": -50.475929260253906, |
|
"logps/rejected": -52.254722595214844, |
|
"loss": 0.6762, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.039838653057813644, |
|
"rewards/margins": 0.036926448345184326, |
|
"rewards/rejected": 0.002912207506597042, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.6918295649040627, |
|
"grad_norm": 22.629377365112305, |
|
"learning_rate": 9.487530812201383e-08, |
|
"logits/chosen": -2.3167202472686768, |
|
"logits/rejected": -2.3114185333251953, |
|
"logps/chosen": -47.46406936645508, |
|
"logps/rejected": -51.130619049072266, |
|
"loss": 0.6793, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.036858368664979935, |
|
"rewards/margins": 0.030934974551200867, |
|
"rewards/rejected": 0.005923392251133919, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.706242680839564, |
|
"grad_norm": 23.737966537475586, |
|
"learning_rate": 9.449875691069571e-08, |
|
"logits/chosen": -2.3150174617767334, |
|
"logits/rejected": -2.3096041679382324, |
|
"logps/chosen": -47.43329620361328, |
|
"logps/rejected": -51.78876876831055, |
|
"loss": 0.6761, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.041950371116399765, |
|
"rewards/margins": 0.03815682604908943, |
|
"rewards/rejected": 0.003793553216382861, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.7206557967750653, |
|
"grad_norm": 22.39310646057129, |
|
"learning_rate": 9.410966014533195e-08, |
|
"logits/chosen": -2.3104045391082764, |
|
"logits/rejected": -2.2926175594329834, |
|
"logps/chosen": -48.61553955078125, |
|
"logps/rejected": -50.52370834350586, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": 0.04291251674294472, |
|
"rewards/margins": 0.03616334870457649, |
|
"rewards/rejected": 0.00674917409196496, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.7350689127105666, |
|
"grad_norm": 22.58913803100586, |
|
"learning_rate": 9.37081275241442e-08, |
|
"logits/chosen": -2.308424711227417, |
|
"logits/rejected": -2.2905216217041016, |
|
"logps/chosen": -44.69788360595703, |
|
"logps/rejected": -46.66436004638672, |
|
"loss": 0.679, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.03513062000274658, |
|
"rewards/margins": 0.031742557883262634, |
|
"rewards/rejected": 0.003388059791177511, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.7494820286460679, |
|
"grad_norm": 21.075912475585938, |
|
"learning_rate": 9.329427225140042e-08, |
|
"logits/chosen": -2.297698974609375, |
|
"logits/rejected": -2.280587911605835, |
|
"logps/chosen": -44.73003387451172, |
|
"logps/rejected": -47.60186004638672, |
|
"loss": 0.6711, |
|
"rewards/accuracies": 0.5718749761581421, |
|
"rewards/chosen": 0.04519854113459587, |
|
"rewards/margins": 0.04976027458906174, |
|
"rewards/rejected": -0.004561735317111015, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.7638951445815693, |
|
"grad_norm": 19.522680282592773, |
|
"learning_rate": 9.286821100549906e-08, |
|
"logits/chosen": -2.307309150695801, |
|
"logits/rejected": -2.272381544113159, |
|
"logps/chosen": -44.07297897338867, |
|
"logps/rejected": -46.60788345336914, |
|
"loss": 0.6686, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04921981692314148, |
|
"rewards/margins": 0.05518011003732681, |
|
"rewards/rejected": -0.005960285663604736, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.7783082605170706, |
|
"grad_norm": 21.739355087280273, |
|
"learning_rate": 9.243006390607402e-08, |
|
"logits/chosen": -2.3263185024261475, |
|
"logits/rejected": -2.316568613052368, |
|
"logps/chosen": -50.72089385986328, |
|
"logps/rejected": -54.4951057434082, |
|
"loss": 0.6673, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 0.05768689513206482, |
|
"rewards/margins": 0.05719071626663208, |
|
"rewards/rejected": 0.0004961833474226296, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.7927213764525718, |
|
"grad_norm": 22.593128204345703, |
|
"learning_rate": 9.197995448012912e-08, |
|
"logits/chosen": -2.340376853942871, |
|
"logits/rejected": -2.319169044494629, |
|
"logps/chosen": -49.77606964111328, |
|
"logps/rejected": -52.580345153808594, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.042526066303253174, |
|
"rewards/margins": 0.05721992999315262, |
|
"rewards/rejected": -0.014693861827254295, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.8071344923880731, |
|
"grad_norm": 19.409584045410156, |
|
"learning_rate": 9.151800962721217e-08, |
|
"logits/chosen": -2.282543182373047, |
|
"logits/rejected": -2.2616894245147705, |
|
"logps/chosen": -45.87504959106445, |
|
"logps/rejected": -47.72060775756836, |
|
"loss": 0.668, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.04841776564717293, |
|
"rewards/margins": 0.05708543583750725, |
|
"rewards/rejected": -0.008667677640914917, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.8215476083235744, |
|
"grad_norm": 23.79241371154785, |
|
"learning_rate": 9.104435958363807e-08, |
|
"logits/chosen": -2.3320469856262207, |
|
"logits/rejected": -2.321155309677124, |
|
"logps/chosen": -45.04407501220703, |
|
"logps/rejected": -47.318321228027344, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.0415559858083725, |
|
"rewards/margins": 0.053853344172239304, |
|
"rewards/rejected": -0.012297360226511955, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.8359607242590757, |
|
"grad_norm": 20.56794548034668, |
|
"learning_rate": 9.055913788577128e-08, |
|
"logits/chosen": -2.300914764404297, |
|
"logits/rejected": -2.283904790878296, |
|
"logps/chosen": -49.61848068237305, |
|
"logps/rejected": -51.29194259643555, |
|
"loss": 0.6718, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": 0.03667648881673813, |
|
"rewards/margins": 0.050280071794986725, |
|
"rewards/rejected": -0.013603581115603447, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.8503738401945771, |
|
"grad_norm": 26.347415924072266, |
|
"learning_rate": 9.006248133237782e-08, |
|
"logits/chosen": -2.337028980255127, |
|
"logits/rejected": -2.301743984222412, |
|
"logps/chosen": -47.668113708496094, |
|
"logps/rejected": -48.43418884277344, |
|
"loss": 0.6724, |
|
"rewards/accuracies": 0.578125, |
|
"rewards/chosen": 0.036908913403749466, |
|
"rewards/margins": 0.04973364248871803, |
|
"rewards/rejected": -0.012824726291000843, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.8647869561300784, |
|
"grad_norm": 23.15882110595703, |
|
"learning_rate": 8.955452994605753e-08, |
|
"logits/chosen": -2.316641330718994, |
|
"logits/rejected": -2.285348653793335, |
|
"logps/chosen": -50.31378173828125, |
|
"logps/rejected": -52.169654846191406, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.032930124551057816, |
|
"rewards/margins": 0.05345344543457031, |
|
"rewards/rejected": -0.0205233171582222, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.8792000720655797, |
|
"grad_norm": 22.113082885742188, |
|
"learning_rate": 8.903542693376747e-08, |
|
"logits/chosen": -2.2881808280944824, |
|
"logits/rejected": -2.277193784713745, |
|
"logps/chosen": -44.89480209350586, |
|
"logps/rejected": -48.47951126098633, |
|
"loss": 0.6676, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.04625421017408371, |
|
"rewards/margins": 0.06053239107131958, |
|
"rewards/rejected": -0.014278176240622997, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.893613188001081, |
|
"grad_norm": 24.942516326904297, |
|
"learning_rate": 8.850531864644748e-08, |
|
"logits/chosen": -2.297983407974243, |
|
"logits/rejected": -2.265835762023926, |
|
"logps/chosen": -45.724159240722656, |
|
"logps/rejected": -48.599876403808594, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.047544609755277634, |
|
"rewards/margins": 0.07158304750919342, |
|
"rewards/rejected": -0.024038437753915787, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.9080263039365822, |
|
"grad_norm": 21.02975845336914, |
|
"learning_rate": 8.796435453775943e-08, |
|
"logits/chosen": -2.3123021125793457, |
|
"logits/rejected": -2.3164889812469482, |
|
"logps/chosen": -48.549461364746094, |
|
"logps/rejected": -53.92346954345703, |
|
"loss": 0.6717, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": 0.031459733843803406, |
|
"rewards/margins": 0.051202088594436646, |
|
"rewards/rejected": -0.01974235475063324, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.9224394198720836, |
|
"grad_norm": 21.564970016479492, |
|
"learning_rate": 8.741268712195164e-08, |
|
"logits/chosen": -2.325171947479248, |
|
"logits/rejected": -2.2981820106506348, |
|
"logps/chosen": -45.690956115722656, |
|
"logps/rejected": -49.10464859008789, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.05857594683766365, |
|
"rewards/margins": 0.08950765430927277, |
|
"rewards/rejected": -0.030931716784834862, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.9368525358075849, |
|
"grad_norm": 22.0821475982666, |
|
"learning_rate": 8.685047193086053e-08, |
|
"logits/chosen": -2.336580276489258, |
|
"logits/rejected": -2.329068660736084, |
|
"logps/chosen": -46.42755126953125, |
|
"logps/rejected": -49.21259307861328, |
|
"loss": 0.672, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.034749384969472885, |
|
"rewards/margins": 0.05111519619822502, |
|
"rewards/rejected": -0.016365814954042435, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.9512656517430862, |
|
"grad_norm": 19.028423309326172, |
|
"learning_rate": 8.627786747006144e-08, |
|
"logits/chosen": -2.3250503540039062, |
|
"logits/rejected": -2.3135623931884766, |
|
"logps/chosen": -44.120948791503906, |
|
"logps/rejected": -47.624244689941406, |
|
"loss": 0.6613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.049294039607048035, |
|
"rewards/margins": 0.07464224100112915, |
|
"rewards/rejected": -0.025348205119371414, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.9656787676785875, |
|
"grad_norm": 22.04196548461914, |
|
"learning_rate": 8.569503517418104e-08, |
|
"logits/chosen": -2.31101131439209, |
|
"logits/rejected": -2.2869081497192383, |
|
"logps/chosen": -48.54884719848633, |
|
"logps/rejected": -51.203269958496094, |
|
"loss": 0.6638, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": 0.04563365504145622, |
|
"rewards/margins": 0.0732460767030716, |
|
"rewards/rejected": -0.027612417936325073, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.9800918836140888, |
|
"grad_norm": 23.0523624420166, |
|
"learning_rate": 8.510213936138402e-08, |
|
"logits/chosen": -2.271580696105957, |
|
"logits/rejected": -2.249781847000122, |
|
"logps/chosen": -47.92513656616211, |
|
"logps/rejected": -49.661014556884766, |
|
"loss": 0.6593, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": 0.055300354957580566, |
|
"rewards/margins": 0.07859370857477188, |
|
"rewards/rejected": -0.023293355479836464, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.9945049995495902, |
|
"grad_norm": 23.048105239868164, |
|
"learning_rate": 8.449934718704685e-08, |
|
"logits/chosen": -2.301927328109741, |
|
"logits/rejected": -2.2857935428619385, |
|
"logps/chosen": -45.03214645385742, |
|
"logps/rejected": -48.306583404541016, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.04563567042350769, |
|
"rewards/margins": 0.06622838228940964, |
|
"rewards/rejected": -0.02059270814061165, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.0089181154850915, |
|
"grad_norm": 21.652252197265625, |
|
"learning_rate": 8.388682859663152e-08, |
|
"logits/chosen": -2.284125804901123, |
|
"logits/rejected": -2.2744266986846924, |
|
"logps/chosen": -45.62473678588867, |
|
"logps/rejected": -48.55522918701172, |
|
"loss": 0.6568, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.053375959396362305, |
|
"rewards/margins": 0.08940999954938889, |
|
"rewards/rejected": -0.03603404015302658, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.0233312314205927, |
|
"grad_norm": 22.383621215820312, |
|
"learning_rate": 8.326475627777277e-08, |
|
"logits/chosen": -2.289461612701416, |
|
"logits/rejected": -2.2808353900909424, |
|
"logps/chosen": -46.80120849609375, |
|
"logps/rejected": -50.2525520324707, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.05878598242998123, |
|
"rewards/margins": 0.10358556360006332, |
|
"rewards/rejected": -0.04479958117008209, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.037744347356094, |
|
"grad_norm": 21.419694900512695, |
|
"learning_rate": 8.26333056115922e-08, |
|
"logits/chosen": -2.3371403217315674, |
|
"logits/rejected": -2.312253475189209, |
|
"logps/chosen": -47.29637908935547, |
|
"logps/rejected": -48.51680374145508, |
|
"loss": 0.6598, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.04448707401752472, |
|
"rewards/margins": 0.08294164389371872, |
|
"rewards/rejected": -0.038454569876194, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.0521574632915953, |
|
"grad_norm": 25.013229370117188, |
|
"learning_rate": 8.1992654623253e-08, |
|
"logits/chosen": -2.3014025688171387, |
|
"logits/rejected": -2.2878174781799316, |
|
"logps/chosen": -44.94810104370117, |
|
"logps/rejected": -50.638641357421875, |
|
"loss": 0.6423, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.050335705280303955, |
|
"rewards/margins": 0.12491028010845184, |
|
"rewards/rejected": -0.07457457482814789, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.0665705792270967, |
|
"grad_norm": 21.155508041381836, |
|
"learning_rate": 8.134298393176915e-08, |
|
"logits/chosen": -2.2642767429351807, |
|
"logits/rejected": -2.2350783348083496, |
|
"logps/chosen": -44.5008659362793, |
|
"logps/rejected": -48.16975021362305, |
|
"loss": 0.6497, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.045155756175518036, |
|
"rewards/margins": 0.10517505556344986, |
|
"rewards/rejected": -0.06001930311322212, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.080983695162598, |
|
"grad_norm": 20.4642276763916, |
|
"learning_rate": 8.068447669908356e-08, |
|
"logits/chosen": -2.2687973976135254, |
|
"logits/rejected": -2.2399725914001465, |
|
"logps/chosen": -49.161521911621094, |
|
"logps/rejected": -51.75724411010742, |
|
"loss": 0.651, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": 0.048742182552814484, |
|
"rewards/margins": 0.10028008371591568, |
|
"rewards/rejected": -0.0515378937125206, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.0953968110980994, |
|
"grad_norm": 22.40403938293457, |
|
"learning_rate": 8.001731857842906e-08, |
|
"logits/chosen": -2.273935317993164, |
|
"logits/rejected": -2.265725612640381, |
|
"logps/chosen": -48.28545379638672, |
|
"logps/rejected": -49.2262077331543, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.03592974692583084, |
|
"rewards/margins": 0.0714699849486351, |
|
"rewards/rejected": -0.03554024174809456, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.1098099270336006, |
|
"grad_norm": 23.13152503967285, |
|
"learning_rate": 7.934169766198712e-08, |
|
"logits/chosen": -2.3057303428649902, |
|
"logits/rejected": -2.281688690185547, |
|
"logps/chosen": -46.85943603515625, |
|
"logps/rejected": -50.839698791503906, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.6031249761581421, |
|
"rewards/chosen": 0.0495249405503273, |
|
"rewards/margins": 0.09827858954668045, |
|
"rewards/rejected": -0.04875364899635315, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.1242230429691018, |
|
"grad_norm": 26.3281192779541, |
|
"learning_rate": 7.86578044278589e-08, |
|
"logits/chosen": -2.320192575454712, |
|
"logits/rejected": -2.297276735305786, |
|
"logps/chosen": -48.3865966796875, |
|
"logps/rejected": -51.7015380859375, |
|
"loss": 0.6443, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.057862721383571625, |
|
"rewards/margins": 0.11766588687896729, |
|
"rewards/rejected": -0.05980316549539566, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.1386361589046032, |
|
"grad_norm": 24.804874420166016, |
|
"learning_rate": 7.796583168636375e-08, |
|
"logits/chosen": -2.3215794563293457, |
|
"logits/rejected": -2.305657148361206, |
|
"logps/chosen": -48.32183074951172, |
|
"logps/rejected": -52.143150329589844, |
|
"loss": 0.6451, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.04665009304881096, |
|
"rewards/margins": 0.11924920231103897, |
|
"rewards/rejected": -0.07259909808635712, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.1530492748401044, |
|
"grad_norm": 18.46070671081543, |
|
"learning_rate": 7.726597452568007e-08, |
|
"logits/chosen": -2.300274610519409, |
|
"logits/rejected": -2.2758469581604004, |
|
"logps/chosen": -46.92626190185547, |
|
"logps/rejected": -50.41767501831055, |
|
"loss": 0.6505, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.04316678270697594, |
|
"rewards/margins": 0.10699989646673203, |
|
"rewards/rejected": -0.06383311003446579, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.1674623907756059, |
|
"grad_norm": 25.514638900756836, |
|
"learning_rate": 7.655843025684402e-08, |
|
"logits/chosen": -2.319176435470581, |
|
"logits/rejected": -2.308589458465576, |
|
"logps/chosen": -48.81470489501953, |
|
"logps/rejected": -52.32573318481445, |
|
"loss": 0.6545, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": 0.04523879662156105, |
|
"rewards/margins": 0.09590532630681992, |
|
"rewards/rejected": -0.050666533410549164, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.181875506711107, |
|
"grad_norm": 19.846221923828125, |
|
"learning_rate": 7.584339835812151e-08, |
|
"logits/chosen": -2.2872939109802246, |
|
"logits/rejected": -2.2704885005950928, |
|
"logps/chosen": -48.955596923828125, |
|
"logps/rejected": -49.713871002197266, |
|
"loss": 0.6512, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.0468595027923584, |
|
"rewards/margins": 0.1036025881767273, |
|
"rewards/rejected": -0.0567430779337883, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.1962886226466085, |
|
"grad_norm": 21.35214614868164, |
|
"learning_rate": 7.512108041876924e-08, |
|
"logits/chosen": -2.253537654876709, |
|
"logits/rejected": -2.2463467121124268, |
|
"logps/chosen": -43.18073654174805, |
|
"logps/rejected": -45.50495147705078, |
|
"loss": 0.6565, |
|
"rewards/accuracies": 0.5843750238418579, |
|
"rewards/chosen": 0.0258056428283453, |
|
"rewards/margins": 0.09474330395460129, |
|
"rewards/rejected": -0.06893765181303024, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.2107017385821097, |
|
"grad_norm": 21.493972778320312, |
|
"learning_rate": 7.439168008220056e-08, |
|
"logits/chosen": -2.293834686279297, |
|
"logits/rejected": -2.276794195175171, |
|
"logps/chosen": -45.07364273071289, |
|
"logps/rejected": -49.38530731201172, |
|
"loss": 0.6437, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": 0.04228875786066055, |
|
"rewards/margins": 0.12463889271020889, |
|
"rewards/rejected": -0.08235013484954834, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.225114854517611, |
|
"grad_norm": 19.528154373168945, |
|
"learning_rate": 7.365540298857215e-08, |
|
"logits/chosen": -2.2884204387664795, |
|
"logits/rejected": -2.274423837661743, |
|
"logps/chosen": -50.20172119140625, |
|
"logps/rejected": -53.54350662231445, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.06707637757062912, |
|
"rewards/margins": 0.15372148156166077, |
|
"rewards/rejected": -0.08664509654045105, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.2395279704531124, |
|
"grad_norm": 18.76193618774414, |
|
"learning_rate": 7.291245671680781e-08, |
|
"logits/chosen": -2.2782771587371826, |
|
"logits/rejected": -2.2508130073547363, |
|
"logps/chosen": -42.898929595947266, |
|
"logps/rejected": -46.70447540283203, |
|
"loss": 0.6438, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.046091653406620026, |
|
"rewards/margins": 0.12635795772075653, |
|
"rewards/rejected": -0.08026628941297531, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.2539410863886136, |
|
"grad_norm": 21.306549072265625, |
|
"learning_rate": 7.216305072607568e-08, |
|
"logits/chosen": -2.310192823410034, |
|
"logits/rejected": -2.2984118461608887, |
|
"logps/chosen": -47.682533264160156, |
|
"logps/rejected": -50.96216583251953, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.029919151216745377, |
|
"rewards/margins": 0.13648976385593414, |
|
"rewards/rejected": -0.10657060146331787, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.268354202324115, |
|
"grad_norm": 23.378387451171875, |
|
"learning_rate": 7.14073962967353e-08, |
|
"logits/chosen": -2.3239712715148926, |
|
"logits/rejected": -2.2972521781921387, |
|
"logps/chosen": -51.19306564331055, |
|
"logps/rejected": -52.882469177246094, |
|
"loss": 0.6301, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.04189233481884003, |
|
"rewards/margins": 0.16474562883377075, |
|
"rewards/rejected": -0.12285330146551132, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.2827673182596162, |
|
"grad_norm": 25.997648239135742, |
|
"learning_rate": 7.064570647077124e-08, |
|
"logits/chosen": -2.3059449195861816, |
|
"logits/rejected": -2.2785696983337402, |
|
"logps/chosen": -48.22880935668945, |
|
"logps/rejected": -50.62089538574219, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.02739674784243107, |
|
"rewards/margins": 0.09727232158184052, |
|
"rewards/rejected": -0.06987558305263519, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.2971804341951176, |
|
"grad_norm": 25.10089111328125, |
|
"learning_rate": 6.987819599173006e-08, |
|
"logits/chosen": -2.296082019805908, |
|
"logits/rejected": -2.2839462757110596, |
|
"logps/chosen": -46.80525207519531, |
|
"logps/rejected": -51.168792724609375, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.037016816437244415, |
|
"rewards/margins": 0.09267648309469223, |
|
"rewards/rejected": -0.05565967038273811, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.3115935501306188, |
|
"grad_norm": 23.599550247192383, |
|
"learning_rate": 6.910508124417765e-08, |
|
"logits/chosen": -2.268026113510132, |
|
"logits/rejected": -2.2599265575408936, |
|
"logps/chosen": -43.31938934326172, |
|
"logps/rejected": -47.721927642822266, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.03140413016080856, |
|
"rewards/margins": 0.13336870074272156, |
|
"rewards/rejected": -0.101964570581913, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.32600666606612, |
|
"grad_norm": 17.291772842407227, |
|
"learning_rate": 6.832658019269373e-08, |
|
"logits/chosen": -2.2494916915893555, |
|
"logits/rejected": -2.225579261779785, |
|
"logps/chosen": -45.836544036865234, |
|
"logps/rejected": -48.53606414794922, |
|
"loss": 0.6422, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.022828983142971992, |
|
"rewards/margins": 0.13301518559455872, |
|
"rewards/rejected": -0.11018619686365128, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.3404197820016215, |
|
"grad_norm": 21.943674087524414, |
|
"learning_rate": 6.75429123204211e-08, |
|
"logits/chosen": -2.2929205894470215, |
|
"logits/rejected": -2.2728652954101562, |
|
"logps/chosen": -46.83967208862305, |
|
"logps/rejected": -51.28687286376953, |
|
"loss": 0.6367, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": 0.023386573418974876, |
|
"rewards/margins": 0.14642710983753204, |
|
"rewards/rejected": -0.12304054200649261, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.354832897937123, |
|
"grad_norm": 23.507673263549805, |
|
"learning_rate": 6.675429856718652e-08, |
|
"logits/chosen": -2.2670910358428955, |
|
"logits/rejected": -2.2336556911468506, |
|
"logps/chosen": -45.63422393798828, |
|
"logps/rejected": -48.890968322753906, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.033365607261657715, |
|
"rewards/margins": 0.13297542929649353, |
|
"rewards/rejected": -0.09960982948541641, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.3692460138726241, |
|
"grad_norm": 18.229249954223633, |
|
"learning_rate": 6.596096126721123e-08, |
|
"logits/chosen": -2.2298777103424072, |
|
"logits/rejected": -2.223173141479492, |
|
"logps/chosen": -45.84309768676758, |
|
"logps/rejected": -50.1830940246582, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.03936393931508064, |
|
"rewards/margins": 0.13684986531734467, |
|
"rewards/rejected": -0.09748590737581253, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.3836591298081253, |
|
"grad_norm": 19.752758026123047, |
|
"learning_rate": 6.516312408642804e-08, |
|
"logits/chosen": -2.279609203338623, |
|
"logits/rejected": -2.279059410095215, |
|
"logps/chosen": -42.76288604736328, |
|
"logps/rejected": -48.05985641479492, |
|
"loss": 0.6364, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.02452887035906315, |
|
"rewards/margins": 0.1416589766740799, |
|
"rewards/rejected": -0.1171300858259201, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.3980722457436268, |
|
"grad_norm": 23.075225830078125, |
|
"learning_rate": 6.436101195942312e-08, |
|
"logits/chosen": -2.278662919998169, |
|
"logits/rejected": -2.2638564109802246, |
|
"logps/chosen": -48.03376007080078, |
|
"logps/rejected": -50.43840789794922, |
|
"loss": 0.6507, |
|
"rewards/accuracies": 0.621874988079071, |
|
"rewards/chosen": 0.02580978348851204, |
|
"rewards/margins": 0.11886006593704224, |
|
"rewards/rejected": -0.0930502638220787, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.412485361679128, |
|
"grad_norm": 23.39198875427246, |
|
"learning_rate": 6.35548510260201e-08, |
|
"logits/chosen": -2.252816677093506, |
|
"logits/rejected": -2.236603260040283, |
|
"logps/chosen": -44.25969314575195, |
|
"logps/rejected": -48.64405059814453, |
|
"loss": 0.6368, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": 0.021795066073536873, |
|
"rewards/margins": 0.155609592795372, |
|
"rewards/rejected": -0.1338145136833191, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.4268984776146292, |
|
"grad_norm": 19.332801818847656, |
|
"learning_rate": 6.274486856752442e-08, |
|
"logits/chosen": -2.2814013957977295, |
|
"logits/rejected": -2.268404483795166, |
|
"logps/chosen": -43.803199768066406, |
|
"logps/rejected": -48.746334075927734, |
|
"loss": 0.6393, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.0010702230501919985, |
|
"rewards/margins": 0.15793892741203308, |
|
"rewards/rejected": -0.15686871111392975, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.4413115935501306, |
|
"grad_norm": 17.606380462646484, |
|
"learning_rate": 6.193129294264568e-08, |
|
"logits/chosen": -2.286543130874634, |
|
"logits/rejected": -2.2696192264556885, |
|
"logps/chosen": -43.068660736083984, |
|
"logps/rejected": -47.965354919433594, |
|
"loss": 0.6332, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.026370450854301453, |
|
"rewards/margins": 0.15689751505851746, |
|
"rewards/rejected": -0.130527064204216, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.455724709485632, |
|
"grad_norm": 24.786046981811523, |
|
"learning_rate": 6.111435352311653e-08, |
|
"logits/chosen": -2.2797257900238037, |
|
"logits/rejected": -2.264954090118408, |
|
"logps/chosen": -45.28757095336914, |
|
"logps/rejected": -49.00934600830078, |
|
"loss": 0.6405, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.010084993205964565, |
|
"rewards/margins": 0.14780084788799286, |
|
"rewards/rejected": -0.13771584630012512, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.4701378254211332, |
|
"grad_norm": 26.044845581054688, |
|
"learning_rate": 6.02942806290257e-08, |
|
"logits/chosen": -2.2898941040039062, |
|
"logits/rejected": -2.2842609882354736, |
|
"logps/chosen": -48.29700469970703, |
|
"logps/rejected": -52.98006057739258, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.018248267471790314, |
|
"rewards/margins": 0.12896670401096344, |
|
"rewards/rejected": -0.11071842908859253, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.4845509413566345, |
|
"grad_norm": 16.25486946105957, |
|
"learning_rate": 5.947130546388376e-08, |
|
"logits/chosen": -2.2709572315216064, |
|
"logits/rejected": -2.2334933280944824, |
|
"logps/chosen": -47.84028244018555, |
|
"logps/rejected": -49.67833709716797, |
|
"loss": 0.6366, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.009173278696835041, |
|
"rewards/margins": 0.1511419117450714, |
|
"rewards/rejected": -0.14196862280368805, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.4989640572921359, |
|
"grad_norm": 21.530025482177734, |
|
"learning_rate": 5.864566004943983e-08, |
|
"logits/chosen": -2.2685012817382812, |
|
"logits/rejected": -2.240506649017334, |
|
"logps/chosen": -50.17546463012695, |
|
"logps/rejected": -53.723243713378906, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": 0.00729725044220686, |
|
"rewards/margins": 0.15390922129154205, |
|
"rewards/rejected": -0.14661197364330292, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.513377173227637, |
|
"grad_norm": 23.661149978637695, |
|
"learning_rate": 5.78175771602676e-08, |
|
"logits/chosen": -2.2800815105438232, |
|
"logits/rejected": -2.277493953704834, |
|
"logps/chosen": -43.04485321044922, |
|
"logps/rejected": -46.71100997924805, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": 0.01757994294166565, |
|
"rewards/margins": 0.1627650111913681, |
|
"rewards/rejected": -0.14518508315086365, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.5277902891631383, |
|
"grad_norm": 23.422590255737305, |
|
"learning_rate": 5.6987290258139073e-08, |
|
"logits/chosen": -2.2339248657226562, |
|
"logits/rejected": -2.2009925842285156, |
|
"logps/chosen": -48.11365509033203, |
|
"logps/rejected": -51.516197204589844, |
|
"loss": 0.6384, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.02754228189587593, |
|
"rewards/margins": 0.15339332818984985, |
|
"rewards/rejected": -0.12585103511810303, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.5422034050986397, |
|
"grad_norm": 27.1705379486084, |
|
"learning_rate": 5.6155033426204615e-08, |
|
"logits/chosen": -2.2623772621154785, |
|
"logits/rejected": -2.2473068237304688, |
|
"logps/chosen": -51.77325439453125, |
|
"logps/rejected": -54.96337127685547, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.6156250238418579, |
|
"rewards/chosen": 0.01034320704638958, |
|
"rewards/margins": 0.15684732794761658, |
|
"rewards/rejected": -0.14650413393974304, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.5566165210341412, |
|
"grad_norm": 25.371421813964844, |
|
"learning_rate": 5.532104130299771e-08, |
|
"logits/chosen": -2.26564359664917, |
|
"logits/rejected": -2.250556230545044, |
|
"logps/chosen": -51.636810302734375, |
|
"logps/rejected": -54.49198532104492, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": 0.014989291317760944, |
|
"rewards/margins": 0.16550514101982117, |
|
"rewards/rejected": -0.1505158692598343, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.5710296369696424, |
|
"grad_norm": 26.66484260559082, |
|
"learning_rate": 5.448554901628333e-08, |
|
"logits/chosen": -2.2615230083465576, |
|
"logits/rejected": -2.251753330230713, |
|
"logps/chosen": -48.62981033325195, |
|
"logps/rejected": -53.1638069152832, |
|
"loss": 0.6483, |
|
"rewards/accuracies": 0.609375, |
|
"rewards/chosen": 0.001556683098897338, |
|
"rewards/margins": 0.1258353888988495, |
|
"rewards/rejected": -0.124278724193573, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.5854427529051436, |
|
"grad_norm": 23.419185638427734, |
|
"learning_rate": 5.364879211676816e-08, |
|
"logits/chosen": -2.2811994552612305, |
|
"logits/rejected": -2.272005558013916, |
|
"logps/chosen": -48.42817687988281, |
|
"logps/rejected": -53.677734375, |
|
"loss": 0.6268, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": 0.036996982991695404, |
|
"rewards/margins": 0.17727255821228027, |
|
"rewards/rejected": -0.14027558267116547, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.599855868840645, |
|
"grad_norm": 25.174808502197266, |
|
"learning_rate": 5.281100651169175e-08, |
|
"logits/chosen": -2.290276527404785, |
|
"logits/rejected": -2.277024030685425, |
|
"logps/chosen": -50.967430114746094, |
|
"logps/rejected": -55.05602264404297, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.041182320564985275, |
|
"rewards/margins": 0.17106422781944275, |
|
"rewards/rejected": -0.12988190352916718, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.6142689847761464, |
|
"grad_norm": 20.062232971191406, |
|
"learning_rate": 5.197242839831706e-08, |
|
"logits/chosen": -2.2445547580718994, |
|
"logits/rejected": -2.235886335372925, |
|
"logps/chosen": -45.176246643066406, |
|
"logps/rejected": -50.29578399658203, |
|
"loss": 0.6329, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.004402970429509878, |
|
"rewards/margins": 0.17448854446411133, |
|
"rewards/rejected": -0.17889150977134705, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.6286821007116477, |
|
"grad_norm": 24.2050838470459, |
|
"learning_rate": 5.1133294197339274e-08, |
|
"logits/chosen": -2.2965331077575684, |
|
"logits/rejected": -2.269667387008667, |
|
"logps/chosen": -47.03449630737305, |
|
"logps/rejected": -50.98591232299805, |
|
"loss": 0.6476, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0063371858559548855, |
|
"rewards/margins": 0.13688282668590546, |
|
"rewards/rejected": -0.13054564595222473, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.6430952166471489, |
|
"grad_norm": 22.103654861450195, |
|
"learning_rate": 5.029384048623153e-08, |
|
"logits/chosen": -2.253809690475464, |
|
"logits/rejected": -2.233123779296875, |
|
"logps/chosen": -48.81449508666992, |
|
"logps/rejected": -52.2353515625, |
|
"loss": 0.649, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": 0.0027180719189345837, |
|
"rewards/margins": 0.13031043112277985, |
|
"rewards/rejected": -0.12759235501289368, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.6575083325826503, |
|
"grad_norm": 19.620878219604492, |
|
"learning_rate": 4.9454303932546675e-08, |
|
"logits/chosen": -2.2449498176574707, |
|
"logits/rejected": -2.2211432456970215, |
|
"logps/chosen": -47.44291687011719, |
|
"logps/rejected": -48.738990783691406, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": 0.014069506898522377, |
|
"rewards/margins": 0.18172375857830048, |
|
"rewards/rejected": -0.16765424609184265, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.6719214485181515, |
|
"grad_norm": 22.302276611328125, |
|
"learning_rate": 4.861492122719338e-08, |
|
"logits/chosen": -2.2866158485412598, |
|
"logits/rejected": -2.264674425125122, |
|
"logps/chosen": -49.207786560058594, |
|
"logps/rejected": -52.398353576660156, |
|
"loss": 0.6408, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.012931051664054394, |
|
"rewards/margins": 0.15574631094932556, |
|
"rewards/rejected": -0.1428152620792389, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.6863345644536527, |
|
"grad_norm": 23.777292251586914, |
|
"learning_rate": 4.777592901770575e-08, |
|
"logits/chosen": -2.2917380332946777, |
|
"logits/rejected": -2.276853561401367, |
|
"logps/chosen": -42.12696075439453, |
|
"logps/rejected": -46.638572692871094, |
|
"loss": 0.6227, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.00846984051167965, |
|
"rewards/margins": 0.20765790343284607, |
|
"rewards/rejected": -0.19918808341026306, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7007476803891541, |
|
"grad_norm": 25.883188247680664, |
|
"learning_rate": 4.693756384152529e-08, |
|
"logits/chosen": -2.254065752029419, |
|
"logits/rejected": -2.2268402576446533, |
|
"logps/chosen": -49.0328483581543, |
|
"logps/rejected": -53.10075759887695, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.0014540397096425295, |
|
"rewards/margins": 0.19669906795024872, |
|
"rewards/rejected": -0.19815312325954437, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.7151607963246556, |
|
"grad_norm": 20.475727081298828, |
|
"learning_rate": 4.610006205931365e-08, |
|
"logits/chosen": -2.2955899238586426, |
|
"logits/rejected": -2.279146194458008, |
|
"logps/chosen": -50.25695037841797, |
|
"logps/rejected": -52.637916564941406, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0065743387676775455, |
|
"rewards/margins": 0.1675548255443573, |
|
"rewards/rejected": -0.1609804779291153, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.7295739122601568, |
|
"grad_norm": 24.313840866088867, |
|
"learning_rate": 4.526365978831551e-08, |
|
"logits/chosen": -2.2783608436584473, |
|
"logits/rejected": -2.2607553005218506, |
|
"logps/chosen": -47.65949249267578, |
|
"logps/rejected": -51.959983825683594, |
|
"loss": 0.631, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.00622877012938261, |
|
"rewards/margins": 0.17043979465961456, |
|
"rewards/rejected": -0.17666855454444885, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.743987028195658, |
|
"grad_norm": 20.83986473083496, |
|
"learning_rate": 4.442859283578981e-08, |
|
"logits/chosen": -2.2713427543640137, |
|
"logits/rejected": -2.252413511276245, |
|
"logps/chosen": -45.476287841796875, |
|
"logps/rejected": -49.17133331298828, |
|
"loss": 0.6303, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.020858388394117355, |
|
"rewards/margins": 0.1802762746810913, |
|
"rewards/rejected": -0.20113465189933777, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.7584001441311594, |
|
"grad_norm": 23.378725051879883, |
|
"learning_rate": 4.359509663252864e-08, |
|
"logits/chosen": -2.251262903213501, |
|
"logits/rejected": -2.2400929927825928, |
|
"logps/chosen": -46.6330451965332, |
|
"logps/rejected": -51.300270080566406, |
|
"loss": 0.64, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.014711695723235607, |
|
"rewards/margins": 0.15496547520160675, |
|
"rewards/rejected": -0.16967716813087463, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.7728132600666606, |
|
"grad_norm": 20.105392456054688, |
|
"learning_rate": 4.276340616648198e-08, |
|
"logits/chosen": -2.3081746101379395, |
|
"logits/rejected": -2.2849628925323486, |
|
"logps/chosen": -46.36747360229492, |
|
"logps/rejected": -48.48231506347656, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": 0.00806148536503315, |
|
"rewards/margins": 0.18995113670825958, |
|
"rewards/rejected": -0.18188965320587158, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.7872263760021618, |
|
"grad_norm": 20.087594985961914, |
|
"learning_rate": 4.193375591650758e-08, |
|
"logits/chosen": -2.2986416816711426, |
|
"logits/rejected": -2.275165557861328, |
|
"logps/chosen": -51.384132385253906, |
|
"logps/rejected": -54.01851272583008, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.004061401821672916, |
|
"rewards/margins": 0.17897441983222961, |
|
"rewards/rejected": -0.18303583562374115, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8016394919376633, |
|
"grad_norm": 21.758268356323242, |
|
"learning_rate": 4.110637978626415e-08, |
|
"logits/chosen": -2.257802963256836, |
|
"logits/rejected": -2.2421045303344727, |
|
"logps/chosen": -44.12052536010742, |
|
"logps/rejected": -49.74687576293945, |
|
"loss": 0.6012, |
|
"rewards/accuracies": 0.703125, |
|
"rewards/chosen": 0.036210522055625916, |
|
"rewards/margins": 0.24105677008628845, |
|
"rewards/rejected": -0.20484623312950134, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.8160526078731647, |
|
"grad_norm": 19.154407501220703, |
|
"learning_rate": 4.0281511038266867e-08, |
|
"logits/chosen": -2.201878309249878, |
|
"logits/rejected": -2.183927059173584, |
|
"logps/chosen": -49.49979782104492, |
|
"logps/rejected": -53.208091735839844, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.014157836325466633, |
|
"rewards/margins": 0.20892783999443054, |
|
"rewards/rejected": -0.2230856716632843, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.830465723808666, |
|
"grad_norm": 19.655216217041016, |
|
"learning_rate": 3.9459382228123475e-08, |
|
"logits/chosen": -2.2442800998687744, |
|
"logits/rejected": -2.238391160964966, |
|
"logps/chosen": -42.37008285522461, |
|
"logps/rejected": -47.834983825683594, |
|
"loss": 0.6055, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": 0.017191831022500992, |
|
"rewards/margins": 0.2501511871814728, |
|
"rewards/rejected": -0.2329593151807785, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.844878839744167, |
|
"grad_norm": 24.132625579833984, |
|
"learning_rate": 3.864022513896989e-08, |
|
"logits/chosen": -2.255995750427246, |
|
"logits/rejected": -2.223539113998413, |
|
"logps/chosen": -44.1163330078125, |
|
"logps/rejected": -47.280296325683594, |
|
"loss": 0.6397, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.004155662842094898, |
|
"rewards/margins": 0.1677740067243576, |
|
"rewards/rejected": -0.17192967236042023, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.8592919556796685, |
|
"grad_norm": 24.518510818481445, |
|
"learning_rate": 3.782427071612339e-08, |
|
"logits/chosen": -2.278082847595215, |
|
"logits/rejected": -2.2593398094177246, |
|
"logps/chosen": -48.02725601196289, |
|
"logps/rejected": -51.3366813659668, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.005302610341459513, |
|
"rewards/margins": 0.19030724465847015, |
|
"rewards/rejected": -0.19560983777046204, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.87370507161517, |
|
"grad_norm": 23.40630531311035, |
|
"learning_rate": 3.7011749001972174e-08, |
|
"logits/chosen": -2.2769112586975098, |
|
"logits/rejected": -2.2606639862060547, |
|
"logps/chosen": -43.46620559692383, |
|
"logps/rejected": -47.172611236572266, |
|
"loss": 0.6354, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -4.972890019416809e-05, |
|
"rewards/margins": 0.1833050400018692, |
|
"rewards/rejected": -0.18335476517677307, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.888118187550671, |
|
"grad_norm": 22.18378448486328, |
|
"learning_rate": 3.620288907111931e-08, |
|
"logits/chosen": -2.250119686126709, |
|
"logits/rejected": -2.2260870933532715, |
|
"logps/chosen": -45.36948776245117, |
|
"logps/rejected": -48.90538787841797, |
|
"loss": 0.6175, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.0030071833170950413, |
|
"rewards/margins": 0.2247433215379715, |
|
"rewards/rejected": -0.2277504950761795, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9025313034861724, |
|
"grad_norm": 25.240890502929688, |
|
"learning_rate": 3.539791896579978e-08, |
|
"logits/chosen": -2.2886531352996826, |
|
"logits/rejected": -2.2704544067382812, |
|
"logps/chosen": -51.9533805847168, |
|
"logps/rejected": -54.27134323120117, |
|
"loss": 0.6407, |
|
"rewards/accuracies": 0.590624988079071, |
|
"rewards/chosen": -0.04531291872262955, |
|
"rewards/margins": 0.1595449447631836, |
|
"rewards/rejected": -0.20485787093639374, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.9169444194216738, |
|
"grad_norm": 20.785062789916992, |
|
"learning_rate": 3.459706563158828e-08, |
|
"logits/chosen": -2.2486228942871094, |
|
"logits/rejected": -2.2339444160461426, |
|
"logps/chosen": -51.625091552734375, |
|
"logps/rejected": -56.50333786010742, |
|
"loss": 0.6202, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.01646718569099903, |
|
"rewards/margins": 0.2016635239124298, |
|
"rewards/rejected": -0.2181307077407837, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.931357535357175, |
|
"grad_norm": 18.25945281982422, |
|
"learning_rate": 3.380055485341644e-08, |
|
"logits/chosen": -2.2824645042419434, |
|
"logits/rejected": -2.2728466987609863, |
|
"logps/chosen": -47.70806121826172, |
|
"logps/rejected": -52.762123107910156, |
|
"loss": 0.6396, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.014143446460366249, |
|
"rewards/margins": 0.1655816286802292, |
|
"rewards/rejected": -0.17972508072853088, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.9457706512926762, |
|
"grad_norm": 27.237255096435547, |
|
"learning_rate": 3.300861119191718e-08, |
|
"logits/chosen": -2.2605082988739014, |
|
"logits/rejected": -2.240718126296997, |
|
"logps/chosen": -50.2346076965332, |
|
"logps/rejected": -52.86682891845703, |
|
"loss": 0.6388, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.022719861939549446, |
|
"rewards/margins": 0.16368316113948822, |
|
"rewards/rejected": -0.18640300631523132, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.9601837672281777, |
|
"grad_norm": 21.566373825073242, |
|
"learning_rate": 3.2221457920114213e-08, |
|
"logits/chosen": -2.281573534011841, |
|
"logits/rejected": -2.260481595993042, |
|
"logps/chosen": -44.61939239501953, |
|
"logps/rejected": -49.09664535522461, |
|
"loss": 0.6305, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.01838817074894905, |
|
"rewards/margins": 0.18449218571186066, |
|
"rewards/rejected": -0.2028803527355194, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.974596883163679, |
|
"grad_norm": 22.27100944519043, |
|
"learning_rate": 3.143931696047454e-08, |
|
"logits/chosen": -2.26967191696167, |
|
"logits/rejected": -2.254939079284668, |
|
"logps/chosen": -44.52263259887695, |
|
"logps/rejected": -48.41568374633789, |
|
"loss": 0.6323, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.027285214513540268, |
|
"rewards/margins": 0.18556539714336395, |
|
"rewards/rejected": -0.21285061538219452, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.9890099990991803, |
|
"grad_norm": 26.420007705688477, |
|
"learning_rate": 3.066240882234186e-08, |
|
"logits/chosen": -2.2749416828155518, |
|
"logits/rejected": -2.275374412536621, |
|
"logps/chosen": -49.047096252441406, |
|
"logps/rejected": -54.0599250793457, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.008531761355698109, |
|
"rewards/margins": 0.18480119109153748, |
|
"rewards/rejected": -0.1933329850435257, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0034231150346815, |
|
"grad_norm": 21.0603084564209, |
|
"learning_rate": 2.989095253976816e-08, |
|
"logits/chosen": -2.261396884918213, |
|
"logits/rejected": -2.252016305923462, |
|
"logps/chosen": -47.794715881347656, |
|
"logps/rejected": -51.5122184753418, |
|
"loss": 0.6434, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.034887559711933136, |
|
"rewards/margins": 0.15125319361686707, |
|
"rewards/rejected": -0.1861407607793808, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 2.017836230970183, |
|
"grad_norm": 21.241836547851562, |
|
"learning_rate": 2.912516560976146e-08, |
|
"logits/chosen": -2.236016035079956, |
|
"logits/rejected": -2.2192585468292236, |
|
"logps/chosen": -45.089847564697266, |
|
"logps/rejected": -50.58442306518555, |
|
"loss": 0.6138, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.017469603568315506, |
|
"rewards/margins": 0.23276808857917786, |
|
"rewards/rejected": -0.25023767352104187, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 2.0322493469056844, |
|
"grad_norm": 21.38671112060547, |
|
"learning_rate": 2.836526393096661e-08, |
|
"logits/chosen": -2.2866103649139404, |
|
"logits/rejected": -2.277172088623047, |
|
"logps/chosen": -47.35311508178711, |
|
"logps/rejected": -52.31220626831055, |
|
"loss": 0.6238, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.022761840373277664, |
|
"rewards/margins": 0.19283010065555573, |
|
"rewards/rejected": -0.2155919373035431, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 2.0466624628411854, |
|
"grad_norm": 21.73073959350586, |
|
"learning_rate": 2.7611461742797165e-08, |
|
"logits/chosen": -2.2647812366485596, |
|
"logits/rejected": -2.2458884716033936, |
|
"logps/chosen": -42.06100082397461, |
|
"logps/rejected": -46.326446533203125, |
|
"loss": 0.6044, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": 0.0016145706176757812, |
|
"rewards/margins": 0.2537813186645508, |
|
"rewards/rejected": -0.2521667778491974, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 2.061075578776687, |
|
"grad_norm": 26.361936569213867, |
|
"learning_rate": 2.686397156503445e-08, |
|
"logits/chosen": -2.266921043395996, |
|
"logits/rejected": -2.2467474937438965, |
|
"logps/chosen": -45.512332916259766, |
|
"logps/rejected": -49.53184127807617, |
|
"loss": 0.6322, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.04077625274658203, |
|
"rewards/margins": 0.18639865517616272, |
|
"rewards/rejected": -0.22717487812042236, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 2.075488694712188, |
|
"grad_norm": 22.614898681640625, |
|
"learning_rate": 2.6123004137912084e-08, |
|
"logits/chosen": -2.2463908195495605, |
|
"logits/rejected": -2.2374794483184814, |
|
"logps/chosen": -44.685935974121094, |
|
"logps/rejected": -49.08884811401367, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": 0.0009622380021028221, |
|
"rewards/margins": 0.20883917808532715, |
|
"rewards/rejected": -0.20787692070007324, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 2.089901810647689, |
|
"grad_norm": 24.510360717773438, |
|
"learning_rate": 2.5388768362701585e-08, |
|
"logits/chosen": -2.240734100341797, |
|
"logits/rejected": -2.229846954345703, |
|
"logps/chosen": -50.42451095581055, |
|
"logps/rejected": -54.28529739379883, |
|
"loss": 0.634, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": 0.0019600100349634886, |
|
"rewards/margins": 0.17666472494602203, |
|
"rewards/rejected": -0.17470474541187286, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 2.1043149265831906, |
|
"grad_norm": 19.324567794799805, |
|
"learning_rate": 2.466147124281703e-08, |
|
"logits/chosen": -2.314894437789917, |
|
"logits/rejected": -2.2869911193847656, |
|
"logps/chosen": -46.29553985595703, |
|
"logps/rejected": -49.53280258178711, |
|
"loss": 0.6158, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.03561275079846382, |
|
"rewards/margins": 0.2275541126728058, |
|
"rewards/rejected": -0.2631668448448181, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 2.118728042518692, |
|
"grad_norm": 22.356782913208008, |
|
"learning_rate": 2.3941317825454278e-08, |
|
"logits/chosen": -2.263237476348877, |
|
"logits/rejected": -2.2347958087921143, |
|
"logps/chosen": -47.52599334716797, |
|
"logps/rejected": -51.044593811035156, |
|
"loss": 0.6157, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.00275771738961339, |
|
"rewards/margins": 0.22175411880016327, |
|
"rewards/rejected": -0.22451183199882507, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 2.1331411584541935, |
|
"grad_norm": 28.187788009643555, |
|
"learning_rate": 2.322851114378203e-08, |
|
"logits/chosen": -2.234954595565796, |
|
"logits/rejected": -2.228529691696167, |
|
"logps/chosen": -47.602561950683594, |
|
"logps/rejected": -51.5837287902832, |
|
"loss": 0.6259, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.056993234902620316, |
|
"rewards/margins": 0.20701098442077637, |
|
"rewards/rejected": -0.2640042006969452, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 2.1475542743896945, |
|
"grad_norm": 21.61797523498535, |
|
"learning_rate": 2.252325215970059e-08, |
|
"logits/chosen": -2.223036050796509, |
|
"logits/rejected": -2.206120014190674, |
|
"logps/chosen": -47.96808624267578, |
|
"logps/rejected": -53.099937438964844, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.013963679783046246, |
|
"rewards/margins": 0.2430466115474701, |
|
"rewards/rejected": -0.2570103108882904, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 2.161967390325196, |
|
"grad_norm": 23.325836181640625, |
|
"learning_rate": 2.182573970718449e-08, |
|
"logits/chosen": -2.2531425952911377, |
|
"logits/rejected": -2.2373714447021484, |
|
"logps/chosen": -48.79794692993164, |
|
"logps/rejected": -53.21220016479492, |
|
"loss": 0.6085, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.012846941128373146, |
|
"rewards/margins": 0.24934491515159607, |
|
"rewards/rejected": -0.2621918320655823, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 2.1763805062606973, |
|
"grad_norm": 24.3940372467041, |
|
"learning_rate": 2.113617043622536e-08, |
|
"logits/chosen": -2.222809076309204, |
|
"logits/rejected": -2.2011048793792725, |
|
"logps/chosen": -45.179893493652344, |
|
"logps/rejected": -49.6671257019043, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.028503399342298508, |
|
"rewards/margins": 0.22944235801696777, |
|
"rewards/rejected": -0.2579457759857178, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 2.1907936221961988, |
|
"grad_norm": 23.14939308166504, |
|
"learning_rate": 2.045473875739001e-08, |
|
"logits/chosen": -2.262533664703369, |
|
"logits/rejected": -2.2459208965301514, |
|
"logps/chosen": -49.05790328979492, |
|
"logps/rejected": -53.393707275390625, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.017335880547761917, |
|
"rewards/margins": 0.2230241745710373, |
|
"rewards/rejected": -0.2403600662946701, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 2.2052067381316998, |
|
"grad_norm": 23.007627487182617, |
|
"learning_rate": 1.9781636787010503e-08, |
|
"logits/chosen": -2.2727057933807373, |
|
"logits/rejected": -2.252056837081909, |
|
"logps/chosen": -50.972694396972656, |
|
"logps/rejected": -55.11802291870117, |
|
"loss": 0.6016, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": 0.011264542117714882, |
|
"rewards/margins": 0.26211434602737427, |
|
"rewards/rejected": -0.25084978342056274, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 2.219619854067201, |
|
"grad_norm": 23.422765731811523, |
|
"learning_rate": 1.911705429302038e-08, |
|
"logits/chosen": -2.2223095893859863, |
|
"logits/rejected": -2.2044150829315186, |
|
"logps/chosen": -47.016273498535156, |
|
"logps/rejected": -49.784297943115234, |
|
"loss": 0.6307, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.02597871981561184, |
|
"rewards/margins": 0.17457632720470428, |
|
"rewards/rejected": -0.20055504143238068, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 2.2340329700027026, |
|
"grad_norm": 20.89065933227539, |
|
"learning_rate": 1.8461178641453617e-08, |
|
"logits/chosen": -2.2396929264068604, |
|
"logits/rejected": -2.2292518615722656, |
|
"logps/chosen": -43.25371551513672, |
|
"logps/rejected": -46.49152755737305, |
|
"loss": 0.6194, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.04524295777082443, |
|
"rewards/margins": 0.21859809756278992, |
|
"rewards/rejected": -0.26384103298187256, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 2.2484460859382036, |
|
"grad_norm": 20.526012420654297, |
|
"learning_rate": 1.781419474362017e-08, |
|
"logits/chosen": -2.2325713634490967, |
|
"logits/rejected": -2.2240681648254395, |
|
"logps/chosen": -45.1739616394043, |
|
"logps/rejected": -50.48863220214844, |
|
"loss": 0.614, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.03004586696624756, |
|
"rewards/margins": 0.22514891624450684, |
|
"rewards/rejected": -0.2551947832107544, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 2.262859201873705, |
|
"grad_norm": 19.403095245361328, |
|
"learning_rate": 1.7176285003974033e-08, |
|
"logits/chosen": -2.2410597801208496, |
|
"logits/rejected": -2.2172932624816895, |
|
"logps/chosen": -46.04480743408203, |
|
"logps/rejected": -49.85961151123047, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.009199894964694977, |
|
"rewards/margins": 0.21350452303886414, |
|
"rewards/rejected": -0.2227044403553009, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 2.2772723178092065, |
|
"grad_norm": 22.78799057006836, |
|
"learning_rate": 1.6547629268687786e-08, |
|
"logits/chosen": -2.271934747695923, |
|
"logits/rejected": -2.2632410526275635, |
|
"logps/chosen": -42.374305725097656, |
|
"logps/rejected": -48.10682678222656, |
|
"loss": 0.6102, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.007346804253757, |
|
"rewards/margins": 0.23881065845489502, |
|
"rewards/rejected": -0.2461574524641037, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 2.291685433744708, |
|
"grad_norm": 19.8133487701416, |
|
"learning_rate": 1.59284047749485e-08, |
|
"logits/chosen": -2.239629030227661, |
|
"logits/rejected": -2.217893123626709, |
|
"logps/chosen": -45.59327697753906, |
|
"logps/rejected": -50.000091552734375, |
|
"loss": 0.6122, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.005661819130182266, |
|
"rewards/margins": 0.25302523374557495, |
|
"rewards/rejected": -0.2586870789527893, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 2.306098549680209, |
|
"grad_norm": 26.438047409057617, |
|
"learning_rate": 1.5318786100989188e-08, |
|
"logits/chosen": -2.2031397819519043, |
|
"logits/rejected": -2.195650577545166, |
|
"logps/chosen": -50.8388671875, |
|
"logps/rejected": -54.697113037109375, |
|
"loss": 0.6324, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.06328658759593964, |
|
"rewards/margins": 0.19921903312206268, |
|
"rewards/rejected": -0.2625056207180023, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.3205116656157103, |
|
"grad_norm": 24.46269416809082, |
|
"learning_rate": 1.471894511686988e-08, |
|
"logits/chosen": -2.2040247917175293, |
|
"logits/rejected": -2.189892530441284, |
|
"logps/chosen": -48.797935485839844, |
|
"logps/rejected": -50.93223190307617, |
|
"loss": 0.6291, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.05211130529642105, |
|
"rewards/margins": 0.1955440789461136, |
|
"rewards/rejected": -0.24765542149543762, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 2.3349247815512117, |
|
"grad_norm": 20.400657653808594, |
|
"learning_rate": 1.4129050936022214e-08, |
|
"logits/chosen": -2.211453914642334, |
|
"logits/rejected": -2.1973557472229004, |
|
"logps/chosen": -44.81827926635742, |
|
"logps/rejected": -49.232845306396484, |
|
"loss": 0.6262, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.03966861963272095, |
|
"rewards/margins": 0.20301298797130585, |
|
"rewards/rejected": -0.2426815927028656, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 2.3493378974867127, |
|
"grad_norm": 23.146989822387695, |
|
"learning_rate": 1.3549269867571222e-08, |
|
"logits/chosen": -2.2070627212524414, |
|
"logits/rejected": -2.2003607749938965, |
|
"logps/chosen": -50.647216796875, |
|
"logps/rejected": -54.42750930786133, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.026784483343362808, |
|
"rewards/margins": 0.1822723001241684, |
|
"rewards/rejected": -0.2090567797422409, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 2.363751013422214, |
|
"grad_norm": 22.72720718383789, |
|
"learning_rate": 1.2979765369447742e-08, |
|
"logits/chosen": -2.2865893840789795, |
|
"logits/rejected": -2.261965036392212, |
|
"logps/chosen": -46.86061477661133, |
|
"logps/rejected": -51.7364616394043, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.037126921117305756, |
|
"rewards/margins": 0.25349533557891846, |
|
"rewards/rejected": -0.2906222939491272, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 2.3781641293577156, |
|
"grad_norm": 24.311439514160156, |
|
"learning_rate": 1.2420698002304608e-08, |
|
"logits/chosen": -2.2154691219329834, |
|
"logits/rejected": -2.1979966163635254, |
|
"logps/chosen": -41.86544418334961, |
|
"logps/rejected": -47.02150344848633, |
|
"loss": 0.6235, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06195966154336929, |
|
"rewards/margins": 0.23036758601665497, |
|
"rewards/rejected": -0.29232725501060486, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 2.392577245293217, |
|
"grad_norm": 18.456798553466797, |
|
"learning_rate": 1.1872225384249768e-08, |
|
"logits/chosen": -2.2427659034729004, |
|
"logits/rejected": -2.232330799102783, |
|
"logps/chosen": -48.07630157470703, |
|
"logps/rejected": -53.72954177856445, |
|
"loss": 0.618, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.028111791238188744, |
|
"rewards/margins": 0.22475573420524597, |
|
"rewards/rejected": -0.25286751985549927, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 2.406990361228718, |
|
"grad_norm": 29.3465633392334, |
|
"learning_rate": 1.1334502146408881e-08, |
|
"logits/chosen": -2.2178328037261963, |
|
"logits/rejected": -2.2086894512176514, |
|
"logps/chosen": -48.64324188232422, |
|
"logps/rejected": -52.14066696166992, |
|
"loss": 0.6293, |
|
"rewards/accuracies": 0.6781250238418579, |
|
"rewards/chosen": -0.05821915715932846, |
|
"rewards/margins": 0.20005354285240173, |
|
"rewards/rejected": -0.2582727074623108, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 2.4214034771642194, |
|
"grad_norm": 21.647228240966797, |
|
"learning_rate": 1.0807679889330163e-08, |
|
"logits/chosen": -2.290550708770752, |
|
"logits/rejected": -2.289184331893921, |
|
"logps/chosen": -48.02743148803711, |
|
"logps/rejected": -52.760520935058594, |
|
"loss": 0.6348, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.035547636449337006, |
|
"rewards/margins": 0.17515380680561066, |
|
"rewards/rejected": -0.21070146560668945, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 2.435816593099721, |
|
"grad_norm": 23.48753547668457, |
|
"learning_rate": 1.0291907140243538e-08, |
|
"logits/chosen": -2.237405300140381, |
|
"logits/rejected": -2.217073917388916, |
|
"logps/chosen": -45.324363708496094, |
|
"logps/rejected": -51.532958984375, |
|
"loss": 0.5945, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.014387303963303566, |
|
"rewards/margins": 0.2854435443878174, |
|
"rewards/rejected": -0.2998308539390564, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 2.450229709035222, |
|
"grad_norm": 21.15599822998047, |
|
"learning_rate": 9.787329311186249e-09, |
|
"logits/chosen": -2.2333407402038574, |
|
"logits/rejected": -2.2088422775268555, |
|
"logps/chosen": -45.39387893676758, |
|
"logps/rejected": -48.80021286010742, |
|
"loss": 0.6107, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.010507221333682537, |
|
"rewards/margins": 0.24884942173957825, |
|
"rewards/rejected": -0.25935661792755127, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 2.4646428249707233, |
|
"grad_norm": 23.810028076171875, |
|
"learning_rate": 9.294088658006916e-09, |
|
"logits/chosen": -2.2471401691436768, |
|
"logits/rejected": -2.2246270179748535, |
|
"logps/chosen": -47.628807067871094, |
|
"logps/rejected": -52.19799041748047, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.026676829904317856, |
|
"rewards/margins": 0.24438074231147766, |
|
"rewards/rejected": -0.2710575461387634, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 2.4790559409062247, |
|
"grad_norm": 19.998149871826172, |
|
"learning_rate": 8.812324240259094e-09, |
|
"logits/chosen": -2.2432353496551514, |
|
"logits/rejected": -2.2173349857330322, |
|
"logps/chosen": -48.27139663696289, |
|
"logps/rejected": -52.815460205078125, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.016811879351735115, |
|
"rewards/margins": 0.2507106065750122, |
|
"rewards/rejected": -0.2675224542617798, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 2.493469056841726, |
|
"grad_norm": 23.85225486755371, |
|
"learning_rate": 8.342171881996351e-09, |
|
"logits/chosen": -2.2501230239868164, |
|
"logits/rejected": -2.2334678173065186, |
|
"logps/chosen": -47.89342498779297, |
|
"logps/rejected": -51.11579132080078, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.053554244339466095, |
|
"rewards/margins": 0.2084587812423706, |
|
"rewards/rejected": -0.2620130181312561, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 2.507882172777227, |
|
"grad_norm": 22.4180908203125, |
|
"learning_rate": 7.883764133479137e-09, |
|
"logits/chosen": -2.243783473968506, |
|
"logits/rejected": -2.2160232067108154, |
|
"logps/chosen": -44.910865783691406, |
|
"logps/rejected": -50.00033187866211, |
|
"loss": 0.6146, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.03045772947371006, |
|
"rewards/margins": 0.23907272517681122, |
|
"rewards/rejected": -0.2695304751396179, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 2.5222952887127286, |
|
"grad_norm": 26.53935432434082, |
|
"learning_rate": 7.43723023380502e-09, |
|
"logits/chosen": -2.185051441192627, |
|
"logits/rejected": -2.1775598526000977, |
|
"logps/chosen": -47.21742630004883, |
|
"logps/rejected": -51.211883544921875, |
|
"loss": 0.6049, |
|
"rewards/accuracies": 0.690625011920929, |
|
"rewards/chosen": -0.0470205657184124, |
|
"rewards/margins": 0.2449074536561966, |
|
"rewards/rejected": -0.2919279932975769, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 2.53670840464823, |
|
"grad_norm": 21.709890365600586, |
|
"learning_rate": 7.002696074472075e-09, |
|
"logits/chosen": -2.233487606048584, |
|
"logits/rejected": -2.21376633644104, |
|
"logps/chosen": -50.533409118652344, |
|
"logps/rejected": -53.34821319580078, |
|
"loss": 0.6136, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.058121807873249054, |
|
"rewards/margins": 0.2471979856491089, |
|
"rewards/rejected": -0.30531978607177734, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 2.551121520583731, |
|
"grad_norm": 24.80657958984375, |
|
"learning_rate": 6.580284163886369e-09, |
|
"logits/chosen": -2.237624168395996, |
|
"logits/rejected": -2.225713014602661, |
|
"logps/chosen": -51.041221618652344, |
|
"logps/rejected": -54.869285583496094, |
|
"loss": 0.6162, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.038633137941360474, |
|
"rewards/margins": 0.22483810782432556, |
|
"rewards/rejected": -0.2634713053703308, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 2.5655346365192324, |
|
"grad_norm": 21.370935440063477, |
|
"learning_rate": 6.1701135928230566e-09, |
|
"logits/chosen": -2.1967005729675293, |
|
"logits/rejected": -2.1760928630828857, |
|
"logps/chosen": -51.85515594482422, |
|
"logps/rejected": -55.939857482910156, |
|
"loss": 0.6185, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.07306867837905884, |
|
"rewards/margins": 0.2407924234867096, |
|
"rewards/rejected": -0.31386110186576843, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 2.579947752454734, |
|
"grad_norm": 22.11069679260254, |
|
"learning_rate": 5.7723000008510655e-09, |
|
"logits/chosen": -2.251694440841675, |
|
"logits/rejected": -2.226647138595581, |
|
"logps/chosen": -47.68011474609375, |
|
"logps/rejected": -50.42749786376953, |
|
"loss": 0.6207, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.06913193315267563, |
|
"rewards/margins": 0.22913888096809387, |
|
"rewards/rejected": -0.2982707917690277, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 2.5943608683902353, |
|
"grad_norm": 22.6718692779541, |
|
"learning_rate": 5.386955543730798e-09, |
|
"logits/chosen": -2.2583675384521484, |
|
"logits/rejected": -2.234276533126831, |
|
"logps/chosen": -49.61945343017578, |
|
"logps/rejected": -55.055259704589844, |
|
"loss": 0.6101, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.0628320500254631, |
|
"rewards/margins": 0.25214314460754395, |
|
"rewards/rejected": -0.31497520208358765, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.6087739843257363, |
|
"grad_norm": 22.774456024169922, |
|
"learning_rate": 5.014188861794e-09, |
|
"logits/chosen": -2.201119899749756, |
|
"logits/rejected": -2.1815037727355957, |
|
"logps/chosen": -48.0115966796875, |
|
"logps/rejected": -53.1959228515625, |
|
"loss": 0.6, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.05451589822769165, |
|
"rewards/margins": 0.2779920697212219, |
|
"rewards/rejected": -0.3325079679489136, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 2.6231871002612377, |
|
"grad_norm": 26.43316650390625, |
|
"learning_rate": 4.654105049314744e-09, |
|
"logits/chosen": -2.257012367248535, |
|
"logits/rejected": -2.2474682331085205, |
|
"logps/chosen": -49.35502243041992, |
|
"logps/rejected": -53.5402946472168, |
|
"loss": 0.62, |
|
"rewards/accuracies": 0.684374988079071, |
|
"rewards/chosen": -0.02924627996981144, |
|
"rewards/margins": 0.22886374592781067, |
|
"rewards/rejected": -0.25811004638671875, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 2.637600216196739, |
|
"grad_norm": 25.40489959716797, |
|
"learning_rate": 4.3068056248801496e-09, |
|
"logits/chosen": -2.2381327152252197, |
|
"logits/rejected": -2.2229650020599365, |
|
"logps/chosen": -47.8486328125, |
|
"logps/rejected": -53.079307556152344, |
|
"loss": 0.624, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.05802469328045845, |
|
"rewards/margins": 0.22190991044044495, |
|
"rewards/rejected": -0.2799345850944519, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 2.65201333213224, |
|
"grad_norm": 24.248754501342773, |
|
"learning_rate": 3.972388502769225e-09, |
|
"logits/chosen": -2.2772481441497803, |
|
"logits/rejected": -2.2594523429870605, |
|
"logps/chosen": -52.78043746948242, |
|
"logps/rejected": -56.90644454956055, |
|
"loss": 0.612, |
|
"rewards/accuracies": 0.671875, |
|
"rewards/chosen": -0.07180557399988174, |
|
"rewards/margins": 0.2366293966770172, |
|
"rewards/rejected": -0.30843502283096313, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 2.6664264480677415, |
|
"grad_norm": 21.849775314331055, |
|
"learning_rate": 3.650947965347817e-09, |
|
"logits/chosen": -2.2574167251586914, |
|
"logits/rejected": -2.242363452911377, |
|
"logps/chosen": -49.745262145996094, |
|
"logps/rejected": -55.80384063720703, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.7281249761581421, |
|
"rewards/chosen": -0.021196410059928894, |
|
"rewards/margins": 0.2885274291038513, |
|
"rewards/rejected": -0.3097238540649414, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 2.680839564003243, |
|
"grad_norm": 26.125858306884766, |
|
"learning_rate": 3.342574636487583e-09, |
|
"logits/chosen": -2.2989931106567383, |
|
"logits/rejected": -2.2858481407165527, |
|
"logps/chosen": -48.27467346191406, |
|
"logps/rejected": -52.693336486816406, |
|
"loss": 0.6224, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.0547061562538147, |
|
"rewards/margins": 0.20482540130615234, |
|
"rewards/rejected": -0.25953155755996704, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 2.6952526799387444, |
|
"grad_norm": 21.578413009643555, |
|
"learning_rate": 3.0473554560163207e-09, |
|
"logits/chosen": -2.2350730895996094, |
|
"logits/rejected": -2.2092785835266113, |
|
"logps/chosen": -45.995094299316406, |
|
"logps/rejected": -50.268714904785156, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.048373352736234665, |
|
"rewards/margins": 0.21610048413276672, |
|
"rewards/rejected": -0.2644737958908081, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 2.709665795874246, |
|
"grad_norm": 28.808284759521484, |
|
"learning_rate": 2.7653736552070207e-09, |
|
"logits/chosen": -2.26047945022583, |
|
"logits/rejected": -2.2417685985565186, |
|
"logps/chosen": -52.168251037597656, |
|
"logps/rejected": -55.9251594543457, |
|
"loss": 0.6094, |
|
"rewards/accuracies": 0.628125011920929, |
|
"rewards/chosen": -0.016536137089133263, |
|
"rewards/margins": 0.24921099841594696, |
|
"rewards/rejected": -0.2657471299171448, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 2.724078911809747, |
|
"grad_norm": 20.450273513793945, |
|
"learning_rate": 2.496708733312419e-09, |
|
"logits/chosen": -2.2290151119232178, |
|
"logits/rejected": -2.222752094268799, |
|
"logps/chosen": -45.20454025268555, |
|
"logps/rejected": -50.58509826660156, |
|
"loss": 0.6211, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.056550413370132446, |
|
"rewards/margins": 0.211500883102417, |
|
"rewards/rejected": -0.26805129647254944, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 2.7384920277452482, |
|
"grad_norm": 19.10955047607422, |
|
"learning_rate": 2.241436435151717e-09, |
|
"logits/chosen": -2.234227418899536, |
|
"logits/rejected": -2.2173571586608887, |
|
"logps/chosen": -45.842037200927734, |
|
"logps/rejected": -50.64638137817383, |
|
"loss": 0.619, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -0.01847267895936966, |
|
"rewards/margins": 0.2231343686580658, |
|
"rewards/rejected": -0.24160704016685486, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 2.7529051436807492, |
|
"grad_norm": 18.778671264648438, |
|
"learning_rate": 1.9996287297558866e-09, |
|
"logits/chosen": -2.2211852073669434, |
|
"logits/rejected": -2.2099175453186035, |
|
"logps/chosen": -46.6721305847168, |
|
"logps/rejected": -51.1614990234375, |
|
"loss": 0.6287, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.056424111127853394, |
|
"rewards/margins": 0.22064605355262756, |
|
"rewards/rejected": -0.27707016468048096, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 2.7673182596162507, |
|
"grad_norm": 19.748558044433594, |
|
"learning_rate": 1.7713537900772957e-09, |
|
"logits/chosen": -2.2626030445098877, |
|
"logits/rejected": -2.255251407623291, |
|
"logps/chosen": -43.59907913208008, |
|
"logps/rejected": -47.853111267089844, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.06305160373449326, |
|
"rewards/margins": 0.17098698019981384, |
|
"rewards/rejected": -0.2340385913848877, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 2.781731375551752, |
|
"grad_norm": 24.59339714050293, |
|
"learning_rate": 1.5566759737697998e-09, |
|
"logits/chosen": -2.2315924167633057, |
|
"logits/rejected": -2.215991258621216, |
|
"logps/chosen": -49.84008026123047, |
|
"logps/rejected": -53.77178955078125, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.022936221212148666, |
|
"rewards/margins": 0.2307942807674408, |
|
"rewards/rejected": -0.2537304759025574, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 2.7961444914872535, |
|
"grad_norm": 22.342159271240234, |
|
"learning_rate": 1.3556558050442425e-09, |
|
"logits/chosen": -2.2505908012390137, |
|
"logits/rejected": -2.233022928237915, |
|
"logps/chosen": -48.119747161865234, |
|
"logps/rejected": -54.29258346557617, |
|
"loss": 0.609, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.041114725172519684, |
|
"rewards/margins": 0.2513982951641083, |
|
"rewards/rejected": -0.292512983083725, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 2.810557607422755, |
|
"grad_norm": 23.257844924926758, |
|
"learning_rate": 1.1683499576049583e-09, |
|
"logits/chosen": -2.2333426475524902, |
|
"logits/rejected": -2.222357749938965, |
|
"logps/chosen": -46.32872772216797, |
|
"logps/rejected": -51.00041961669922, |
|
"loss": 0.6181, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.05180941894650459, |
|
"rewards/margins": 0.22404730319976807, |
|
"rewards/rejected": -0.27585673332214355, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 2.824970723358256, |
|
"grad_norm": 20.40143585205078, |
|
"learning_rate": 9.948112386716167e-10, |
|
"logits/chosen": -2.2653684616088867, |
|
"logits/rejected": -2.2385129928588867, |
|
"logps/chosen": -52.7291259765625, |
|
"logps/rejected": -56.984962463378906, |
|
"loss": 0.6034, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.016901496797800064, |
|
"rewards/margins": 0.2864697277545929, |
|
"rewards/rejected": -0.30337125062942505, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 2.8393838392937574, |
|
"grad_norm": 24.3416748046875, |
|
"learning_rate": 8.350885740913416e-10, |
|
"logits/chosen": -2.199658155441284, |
|
"logits/rejected": -2.183741569519043, |
|
"logps/chosen": -48.105308532714844, |
|
"logps/rejected": -52.25703811645508, |
|
"loss": 0.6347, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.06897452473640442, |
|
"rewards/margins": 0.20502345263957977, |
|
"rewards/rejected": -0.273997962474823, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 2.8537969552292584, |
|
"grad_norm": 27.89311408996582, |
|
"learning_rate": 6.89226994544978e-10, |
|
"logits/chosen": -2.202312707901001, |
|
"logits/rejected": -2.1836862564086914, |
|
"logps/chosen": -50.84931564331055, |
|
"logps/rejected": -54.451866149902344, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.596875011920929, |
|
"rewards/chosen": -0.0366673581302166, |
|
"rewards/margins": 0.19457104802131653, |
|
"rewards/rejected": -0.23123839497566223, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 2.86821007116476, |
|
"grad_norm": 23.623729705810547, |
|
"learning_rate": 5.572676228516038e-10, |
|
"logits/chosen": -2.23040509223938, |
|
"logits/rejected": -2.2104344367980957, |
|
"logps/chosen": -48.78104019165039, |
|
"logps/rejected": -54.99827194213867, |
|
"loss": 0.6092, |
|
"rewards/accuracies": 0.6968749761581421, |
|
"rewards/chosen": -0.02846195362508297, |
|
"rewards/margins": 0.24717013537883759, |
|
"rewards/rejected": -0.2756320834159851, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 2.882623187100261, |
|
"grad_norm": 23.66622543334961, |
|
"learning_rate": 4.3924766237473656e-10, |
|
"logits/chosen": -2.235327959060669, |
|
"logits/rejected": -2.2082302570343018, |
|
"logps/chosen": -47.306785583496094, |
|
"logps/rejected": -52.0076789855957, |
|
"loss": 0.5916, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.014220048673450947, |
|
"rewards/margins": 0.29147493839263916, |
|
"rewards/rejected": -0.2772548794746399, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.8970363030357626, |
|
"grad_norm": 22.549758911132812, |
|
"learning_rate": 3.35200386533574e-10, |
|
"logits/chosen": -2.2073681354522705, |
|
"logits/rejected": -2.191131114959717, |
|
"logps/chosen": -48.15802001953125, |
|
"logps/rejected": -52.05684280395508, |
|
"loss": 0.6241, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.039518579840660095, |
|
"rewards/margins": 0.22054967284202576, |
|
"rewards/rejected": -0.26006826758384705, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 2.911449418971264, |
|
"grad_norm": 23.719388961791992, |
|
"learning_rate": 2.4515512942220874e-10, |
|
"logits/chosen": -2.253865957260132, |
|
"logits/rejected": -2.2316102981567383, |
|
"logps/chosen": -50.0792236328125, |
|
"logps/rejected": -53.56706619262695, |
|
"loss": 0.6187, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.053325168788433075, |
|
"rewards/margins": 0.22036127746105194, |
|
"rewards/rejected": -0.2736864686012268, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 2.925862534906765, |
|
"grad_norm": 27.85939598083496, |
|
"learning_rate": 1.691372775394717e-10, |
|
"logits/chosen": -2.226738214492798, |
|
"logits/rejected": -2.2154603004455566, |
|
"logps/chosen": -47.156124114990234, |
|
"logps/rejected": -50.58882141113281, |
|
"loss": 0.6321, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.07649682462215424, |
|
"rewards/margins": 0.18964572250843048, |
|
"rewards/rejected": -0.2661425769329071, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 2.9402756508422665, |
|
"grad_norm": 26.678600311279297, |
|
"learning_rate": 1.0716826263165724e-10, |
|
"logits/chosen": -2.2740628719329834, |
|
"logits/rejected": -2.248230218887329, |
|
"logps/chosen": -47.3420295715332, |
|
"logps/rejected": -53.25926971435547, |
|
"loss": 0.6144, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.07511458545923233, |
|
"rewards/margins": 0.2504141330718994, |
|
"rewards/rejected": -0.32552871108055115, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 2.954688766777768, |
|
"grad_norm": 22.650192260742188, |
|
"learning_rate": 5.926555565031743e-11, |
|
"logits/chosen": -2.266533613204956, |
|
"logits/rejected": -2.254985809326172, |
|
"logps/chosen": -49.164058685302734, |
|
"logps/rejected": -53.79437255859375, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.6468750238418579, |
|
"rewards/chosen": -0.06538953632116318, |
|
"rewards/margins": 0.20318007469177246, |
|
"rewards/rejected": -0.26856961846351624, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 2.969101882713269, |
|
"grad_norm": 24.89960289001465, |
|
"learning_rate": 2.544266182662458e-11, |
|
"logits/chosen": -2.2338194847106934, |
|
"logits/rejected": -2.210289239883423, |
|
"logps/chosen": -44.41590118408203, |
|
"logps/rejected": -49.89021301269531, |
|
"loss": 0.6038, |
|
"rewards/accuracies": 0.6656249761581421, |
|
"rewards/chosen": -0.04492425546050072, |
|
"rewards/margins": 0.25600799918174744, |
|
"rewards/rejected": -0.30093228816986084, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 2.9835149986487703, |
|
"grad_norm": 23.839759826660156, |
|
"learning_rate": 5.709116863872321e-12, |
|
"logits/chosen": -2.250903606414795, |
|
"logits/rejected": -2.2327446937561035, |
|
"logps/chosen": -45.14078140258789, |
|
"logps/rejected": -48.739810943603516, |
|
"loss": 0.6244, |
|
"rewards/accuracies": 0.659375011920929, |
|
"rewards/chosen": -0.013980092480778694, |
|
"rewards/margins": 0.2065594643354416, |
|
"rewards/rejected": -0.22053952515125275, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 2.9964868029907215, |
|
"step": 2079, |
|
"total_flos": 0.0, |
|
"train_loss": 0.646036940936345, |
|
"train_runtime": 18428.4848, |
|
"train_samples_per_second": 3.614, |
|
"train_steps_per_second": 0.113 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 2079, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|