martimfasantos's picture
Model save
b6d2950 verified
raw
history blame contribute delete
No virus
112 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9964868029907215,
"eval_steps": 800,
"global_step": 2079,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0014413115935501305,
"grad_norm": 21.287893295288086,
"learning_rate": 4.807692307692308e-10,
"logits/chosen": -2.3065450191497803,
"logits/rejected": -2.3093364238739014,
"logps/chosen": -43.837303161621094,
"logps/rejected": -48.05693054199219,
"loss": 0.6927,
"rewards/accuracies": 0.0625,
"rewards/chosen": 9.900308214128017e-06,
"rewards/margins": 0.0009647191036492586,
"rewards/rejected": -0.0009548187954351306,
"step": 1
},
{
"epoch": 0.014413115935501306,
"grad_norm": 21.111068725585938,
"learning_rate": 4.807692307692308e-09,
"logits/chosen": -2.3277647495269775,
"logits/rejected": -2.3011653423309326,
"logps/chosen": -42.82107162475586,
"logps/rejected": -44.892906188964844,
"loss": 0.6928,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": -0.001543219666928053,
"rewards/margins": 0.0007266975590027869,
"rewards/rejected": -0.002269917167723179,
"step": 10
},
{
"epoch": 0.02882623187100261,
"grad_norm": 19.57785987854004,
"learning_rate": 9.615384615384615e-09,
"logits/chosen": -2.288435459136963,
"logits/rejected": -2.2758889198303223,
"logps/chosen": -45.44641876220703,
"logps/rejected": -48.17905044555664,
"loss": 0.6929,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.0001232023787451908,
"rewards/margins": 0.0005566190229728818,
"rewards/rejected": -0.00043341662967577577,
"step": 20
},
{
"epoch": 0.04323934780650392,
"grad_norm": 26.79755401611328,
"learning_rate": 1.442307692307692e-08,
"logits/chosen": -2.3148436546325684,
"logits/rejected": -2.3025364875793457,
"logps/chosen": -46.84934616088867,
"logps/rejected": -48.419532775878906,
"loss": 0.6931,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.00020735207363031805,
"rewards/margins": 0.0002374596951995045,
"rewards/rejected": -0.0004448117106221616,
"step": 30
},
{
"epoch": 0.05765246374200522,
"grad_norm": 22.711183547973633,
"learning_rate": 1.923076923076923e-08,
"logits/chosen": -2.347923517227173,
"logits/rejected": -2.3385748863220215,
"logps/chosen": -50.60676956176758,
"logps/rejected": -52.752296447753906,
"loss": 0.6929,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -0.00011574259406188503,
"rewards/margins": 0.0005067865131422877,
"rewards/rejected": -0.0006225291872397065,
"step": 40
},
{
"epoch": 0.07206557967750653,
"grad_norm": 23.8702392578125,
"learning_rate": 2.403846153846154e-08,
"logits/chosen": -2.329652786254883,
"logits/rejected": -2.322984218597412,
"logps/chosen": -47.366519927978516,
"logps/rejected": -49.952056884765625,
"loss": 0.6938,
"rewards/accuracies": 0.40937501192092896,
"rewards/chosen": -0.00085345224943012,
"rewards/margins": -0.0012199878692626953,
"rewards/rejected": 0.0003665355616249144,
"step": 50
},
{
"epoch": 0.08647869561300783,
"grad_norm": 18.53515625,
"learning_rate": 2.884615384615384e-08,
"logits/chosen": -2.3065052032470703,
"logits/rejected": -2.2884154319763184,
"logps/chosen": -46.60871124267578,
"logps/rejected": -48.89433670043945,
"loss": 0.6934,
"rewards/accuracies": 0.4781250059604645,
"rewards/chosen": -0.0008969244663603604,
"rewards/margins": -0.00045310668065212667,
"rewards/rejected": -0.0004438180476427078,
"step": 60
},
{
"epoch": 0.10089181154850914,
"grad_norm": 18.022958755493164,
"learning_rate": 3.365384615384615e-08,
"logits/chosen": -2.3429152965545654,
"logits/rejected": -2.327995777130127,
"logps/chosen": -47.61640167236328,
"logps/rejected": -50.77202606201172,
"loss": 0.6928,
"rewards/accuracies": 0.5062500238418579,
"rewards/chosen": -0.00031301408307626843,
"rewards/margins": 0.000775355554651469,
"rewards/rejected": -0.0010883695213124156,
"step": 70
},
{
"epoch": 0.11530492748401044,
"grad_norm": 18.332494735717773,
"learning_rate": 3.846153846153846e-08,
"logits/chosen": -2.3375937938690186,
"logits/rejected": -2.311336040496826,
"logps/chosen": -44.785614013671875,
"logps/rejected": -48.0066032409668,
"loss": 0.6932,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.00027114865952171385,
"rewards/margins": -9.801401574804913e-06,
"rewards/rejected": 0.00028095004381611943,
"step": 80
},
{
"epoch": 0.12971804341951176,
"grad_norm": 18.671464920043945,
"learning_rate": 4.326923076923077e-08,
"logits/chosen": -2.3199424743652344,
"logits/rejected": -2.2938828468322754,
"logps/chosen": -44.98554611206055,
"logps/rejected": -46.84703826904297,
"loss": 0.6941,
"rewards/accuracies": 0.46562498807907104,
"rewards/chosen": -0.0013123347889631987,
"rewards/margins": -0.0016984030371531844,
"rewards/rejected": 0.0003860682772938162,
"step": 90
},
{
"epoch": 0.14413115935501306,
"grad_norm": 23.246063232421875,
"learning_rate": 4.807692307692308e-08,
"logits/chosen": -2.3865551948547363,
"logits/rejected": -2.3804287910461426,
"logps/chosen": -42.85012435913086,
"logps/rejected": -46.24885177612305,
"loss": 0.6934,
"rewards/accuracies": 0.4437499940395355,
"rewards/chosen": 0.0006392289651557803,
"rewards/margins": -0.00039641355397179723,
"rewards/rejected": 0.0010356425773352385,
"step": 100
},
{
"epoch": 0.15854427529051437,
"grad_norm": 19.473865509033203,
"learning_rate": 5.288461538461538e-08,
"logits/chosen": -2.3142473697662354,
"logits/rejected": -2.3037219047546387,
"logps/chosen": -45.20660400390625,
"logps/rejected": -47.961936950683594,
"loss": 0.6937,
"rewards/accuracies": 0.453125,
"rewards/chosen": -0.0003208608250133693,
"rewards/margins": -0.0010648202151060104,
"rewards/rejected": 0.0007439593900926411,
"step": 110
},
{
"epoch": 0.17295739122601567,
"grad_norm": 22.531965255737305,
"learning_rate": 5.769230769230768e-08,
"logits/chosen": -2.3439688682556152,
"logits/rejected": -2.3307394981384277,
"logps/chosen": -46.80133056640625,
"logps/rejected": -49.72489547729492,
"loss": 0.6933,
"rewards/accuracies": 0.453125,
"rewards/chosen": 1.1269899914623238e-05,
"rewards/margins": -0.00012028318451484665,
"rewards/rejected": 0.0001315530389547348,
"step": 120
},
{
"epoch": 0.18737050716151699,
"grad_norm": 24.544965744018555,
"learning_rate": 6.25e-08,
"logits/chosen": -2.2930805683135986,
"logits/rejected": -2.279456377029419,
"logps/chosen": -49.83013916015625,
"logps/rejected": -51.1182975769043,
"loss": 0.6917,
"rewards/accuracies": 0.534375011920929,
"rewards/chosen": 0.0026741260662674904,
"rewards/margins": 0.0031126493122428656,
"rewards/rejected": -0.00043852307135239244,
"step": 130
},
{
"epoch": 0.20178362309701828,
"grad_norm": 19.973854064941406,
"learning_rate": 6.73076923076923e-08,
"logits/chosen": -2.3479955196380615,
"logits/rejected": -2.3185691833496094,
"logps/chosen": -44.58715057373047,
"logps/rejected": -46.512718200683594,
"loss": 0.691,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.0033654593862593174,
"rewards/margins": 0.0043604555539786816,
"rewards/rejected": -0.0009949964005500078,
"step": 140
},
{
"epoch": 0.2161967390325196,
"grad_norm": 20.358179092407227,
"learning_rate": 7.211538461538461e-08,
"logits/chosen": -2.3313281536102295,
"logits/rejected": -2.307065486907959,
"logps/chosen": -46.340797424316406,
"logps/rejected": -48.234344482421875,
"loss": 0.6919,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.00332791730761528,
"rewards/margins": 0.002700595883652568,
"rewards/rejected": 0.0006273213075473905,
"step": 150
},
{
"epoch": 0.2306098549680209,
"grad_norm": 20.67792510986328,
"learning_rate": 7.692307692307692e-08,
"logits/chosen": -2.305814266204834,
"logits/rejected": -2.290213108062744,
"logps/chosen": -47.53135681152344,
"logps/rejected": -50.77891540527344,
"loss": 0.6921,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.005107083357870579,
"rewards/margins": 0.002246940741315484,
"rewards/rejected": 0.002860142383724451,
"step": 160
},
{
"epoch": 0.2450229709035222,
"grad_norm": 20.16474723815918,
"learning_rate": 8.173076923076923e-08,
"logits/chosen": -2.349069356918335,
"logits/rejected": -2.331510066986084,
"logps/chosen": -50.888572692871094,
"logps/rejected": -50.80742645263672,
"loss": 0.6929,
"rewards/accuracies": 0.515625,
"rewards/chosen": 0.0035659130662679672,
"rewards/margins": 0.0006433400558307767,
"rewards/rejected": 0.0029225728940218687,
"step": 170
},
{
"epoch": 0.2594360868390235,
"grad_norm": 22.5780029296875,
"learning_rate": 8.653846153846154e-08,
"logits/chosen": -2.305760622024536,
"logits/rejected": -2.28718638420105,
"logps/chosen": -49.73812484741211,
"logps/rejected": -52.55815887451172,
"loss": 0.692,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.00477216811850667,
"rewards/margins": 0.0024364416021853685,
"rewards/rejected": 0.002335726749151945,
"step": 180
},
{
"epoch": 0.2738492027745248,
"grad_norm": 20.311643600463867,
"learning_rate": 9.134615384615383e-08,
"logits/chosen": -2.3253870010375977,
"logits/rejected": -2.3114845752716064,
"logps/chosen": -47.62921142578125,
"logps/rejected": -50.506046295166016,
"loss": 0.6926,
"rewards/accuracies": 0.503125011920929,
"rewards/chosen": 0.004789062775671482,
"rewards/margins": 0.0012888021301478148,
"rewards/rejected": 0.003500260878354311,
"step": 190
},
{
"epoch": 0.2882623187100261,
"grad_norm": 22.185544967651367,
"learning_rate": 9.615384615384616e-08,
"logits/chosen": -2.351552963256836,
"logits/rejected": -2.335294246673584,
"logps/chosen": -46.49347686767578,
"logps/rejected": -48.789695739746094,
"loss": 0.6912,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.006301163230091333,
"rewards/margins": 0.0041467128321528435,
"rewards/rejected": 0.002154449699446559,
"step": 200
},
{
"epoch": 0.30267543464552743,
"grad_norm": 19.66486167907715,
"learning_rate": 9.999971806320255e-08,
"logits/chosen": -2.3723862171173096,
"logits/rejected": -2.3726882934570312,
"logps/chosen": -46.52350616455078,
"logps/rejected": -49.28281021118164,
"loss": 0.6908,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.00785305630415678,
"rewards/margins": 0.004859209060668945,
"rewards/rejected": 0.0029938467778265476,
"step": 210
},
{
"epoch": 0.31708855058102875,
"grad_norm": 21.849241256713867,
"learning_rate": 9.998985060913876e-08,
"logits/chosen": -2.29600191116333,
"logits/rejected": -2.280897855758667,
"logps/chosen": -45.98343276977539,
"logps/rejected": -47.80237579345703,
"loss": 0.6908,
"rewards/accuracies": 0.5406249761581421,
"rewards/chosen": 0.007365316152572632,
"rewards/margins": 0.0048685320653021336,
"rewards/rejected": 0.0024967845529317856,
"step": 220
},
{
"epoch": 0.33150166651653007,
"grad_norm": 29.88494300842285,
"learning_rate": 9.996588949457546e-08,
"logits/chosen": -2.3516383171081543,
"logits/rejected": -2.3211302757263184,
"logps/chosen": -53.43247604370117,
"logps/rejected": -53.46599197387695,
"loss": 0.6896,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.009954456239938736,
"rewards/margins": 0.007440419401973486,
"rewards/rejected": 0.0025140370707958937,
"step": 230
},
{
"epoch": 0.34591478245203133,
"grad_norm": 21.79450798034668,
"learning_rate": 9.992784147488017e-08,
"logits/chosen": -2.3784842491149902,
"logits/rejected": -2.3466084003448486,
"logps/chosen": -47.536399841308594,
"logps/rejected": -49.28679275512695,
"loss": 0.6913,
"rewards/accuracies": 0.518750011920929,
"rewards/chosen": 0.008210290223360062,
"rewards/margins": 0.0038244971074163914,
"rewards/rejected": 0.004385794512927532,
"step": 240
},
{
"epoch": 0.36032789838753265,
"grad_norm": 19.872880935668945,
"learning_rate": 9.987571727694775e-08,
"logits/chosen": -2.3470616340637207,
"logits/rejected": -2.322946548461914,
"logps/chosen": -44.467933654785156,
"logps/rejected": -47.677467346191406,
"loss": 0.6888,
"rewards/accuracies": 0.565625011920929,
"rewards/chosen": 0.01355994027107954,
"rewards/margins": 0.008894408121705055,
"rewards/rejected": 0.00466553308069706,
"step": 250
},
{
"epoch": 0.37474101432303397,
"grad_norm": 25.680984497070312,
"learning_rate": 9.98095315961762e-08,
"logits/chosen": -2.3500027656555176,
"logits/rejected": -2.3312158584594727,
"logps/chosen": -49.896331787109375,
"logps/rejected": -51.985511779785156,
"loss": 0.6901,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.012083685956895351,
"rewards/margins": 0.0064600324258208275,
"rewards/rejected": 0.005623653531074524,
"step": 260
},
{
"epoch": 0.3891541302585353,
"grad_norm": 21.92185401916504,
"learning_rate": 9.97293030923235e-08,
"logits/chosen": -2.3450703620910645,
"logits/rejected": -2.3156611919403076,
"logps/chosen": -46.08183670043945,
"logps/rejected": -47.64861297607422,
"loss": 0.6882,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.01599208638072014,
"rewards/margins": 0.010242667980492115,
"rewards/rejected": 0.005749420262873173,
"step": 270
},
{
"epoch": 0.40356724619403656,
"grad_norm": 22.253570556640625,
"learning_rate": 9.963505438424693e-08,
"logits/chosen": -2.311044454574585,
"logits/rejected": -2.294201612472534,
"logps/chosen": -48.35077667236328,
"logps/rejected": -49.112300872802734,
"loss": 0.69,
"rewards/accuracies": 0.543749988079071,
"rewards/chosen": 0.013459725305438042,
"rewards/margins": 0.006622823420912027,
"rewards/rejected": 0.006836901418864727,
"step": 280
},
{
"epoch": 0.4179803621295379,
"grad_norm": 20.787418365478516,
"learning_rate": 9.952681204352607e-08,
"logits/chosen": -2.333068609237671,
"logits/rejected": -2.299546003341675,
"logps/chosen": -47.46521759033203,
"logps/rejected": -49.262413024902344,
"loss": 0.6867,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.019271234050393105,
"rewards/margins": 0.013373048976063728,
"rewards/rejected": 0.005898186471313238,
"step": 290
},
{
"epoch": 0.4323934780650392,
"grad_norm": 23.323768615722656,
"learning_rate": 9.94046065869715e-08,
"logits/chosen": -2.343045234680176,
"logits/rejected": -2.330648899078369,
"logps/chosen": -46.3621711730957,
"logps/rejected": -51.10980987548828,
"loss": 0.6841,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": 0.020211653783917427,
"rewards/margins": 0.01878417655825615,
"rewards/rejected": 0.0014274796703830361,
"step": 300
},
{
"epoch": 0.4468065940005405,
"grad_norm": 16.884517669677734,
"learning_rate": 9.926847246802116e-08,
"logits/chosen": -2.325950860977173,
"logits/rejected": -2.298354148864746,
"logps/chosen": -46.33721160888672,
"logps/rejected": -47.747737884521484,
"loss": 0.6877,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.0174238421022892,
"rewards/margins": 0.011514835990965366,
"rewards/rejected": 0.005909005645662546,
"step": 310
},
{
"epoch": 0.4612197099360418,
"grad_norm": 20.160341262817383,
"learning_rate": 9.911844806702691e-08,
"logits/chosen": -2.324944019317627,
"logits/rejected": -2.314134120941162,
"logps/chosen": -44.099708557128906,
"logps/rejected": -47.27263641357422,
"loss": 0.684,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.023020442575216293,
"rewards/margins": 0.019213002175092697,
"rewards/rejected": 0.0038074390031397343,
"step": 320
},
{
"epoch": 0.4756328258715431,
"grad_norm": 23.56479263305664,
"learning_rate": 9.895457568043387e-08,
"logits/chosen": -2.349403142929077,
"logits/rejected": -2.330629825592041,
"logps/chosen": -46.192623138427734,
"logps/rejected": -47.701568603515625,
"loss": 0.6842,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.02301434613764286,
"rewards/margins": 0.018842367455363274,
"rewards/rejected": 0.004171978682279587,
"step": 330
},
{
"epoch": 0.4900459418070444,
"grad_norm": 26.761240005493164,
"learning_rate": 9.877690150885587e-08,
"logits/chosen": -2.29034423828125,
"logits/rejected": -2.259958028793335,
"logps/chosen": -49.6630859375,
"logps/rejected": -50.368370056152344,
"loss": 0.6842,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.02521464228630066,
"rewards/margins": 0.018750475719571114,
"rewards/rejected": 0.006464164704084396,
"step": 340
},
{
"epoch": 0.5044590577425457,
"grad_norm": 17.43931770324707,
"learning_rate": 9.858547564404998e-08,
"logits/chosen": -2.3370158672332764,
"logits/rejected": -2.314396381378174,
"logps/chosen": -47.03008270263672,
"logps/rejected": -49.929176330566406,
"loss": 0.6836,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.02510705217719078,
"rewards/margins": 0.020147310569882393,
"rewards/rejected": 0.004959740675985813,
"step": 350
},
{
"epoch": 0.518872173678047,
"grad_norm": 21.918987274169922,
"learning_rate": 9.838035205479418e-08,
"logits/chosen": -2.302408218383789,
"logits/rejected": -2.283946990966797,
"logps/chosen": -45.20514678955078,
"logps/rejected": -47.9190559387207,
"loss": 0.6832,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.025177201256155968,
"rewards/margins": 0.021118884906172752,
"rewards/rejected": 0.004058316815644503,
"step": 360
},
{
"epoch": 0.5332852896135484,
"grad_norm": 24.303964614868164,
"learning_rate": 9.816158857167196e-08,
"logits/chosen": -2.3255884647369385,
"logits/rejected": -2.3051230907440186,
"logps/chosen": -47.729164123535156,
"logps/rejected": -48.6644401550293,
"loss": 0.6856,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.02227911166846752,
"rewards/margins": 0.016225317493081093,
"rewards/rejected": 0.006053795572370291,
"step": 370
},
{
"epoch": 0.5476984055490496,
"grad_norm": 24.593324661254883,
"learning_rate": 9.7929246870768e-08,
"logits/chosen": -2.323387384414673,
"logits/rejected": -2.308987855911255,
"logps/chosen": -47.08444595336914,
"logps/rejected": -49.36346435546875,
"loss": 0.6811,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.028125789016485214,
"rewards/margins": 0.02563219703733921,
"rewards/rejected": 0.0024935940746217966,
"step": 380
},
{
"epoch": 0.5621115214845509,
"grad_norm": 21.75504493713379,
"learning_rate": 9.768339245627993e-08,
"logits/chosen": -2.294553518295288,
"logits/rejected": -2.2829360961914062,
"logps/chosen": -45.68154525756836,
"logps/rejected": -48.066612243652344,
"loss": 0.6796,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": 0.030594781041145325,
"rewards/margins": 0.02912401221692562,
"rewards/rejected": 0.0014707675436511636,
"step": 390
},
{
"epoch": 0.5765246374200522,
"grad_norm": 21.862024307250977,
"learning_rate": 9.742409464205059e-08,
"logits/chosen": -2.3316078186035156,
"logits/rejected": -2.3144497871398926,
"logps/chosen": -48.248443603515625,
"logps/rejected": -50.65959548950195,
"loss": 0.6809,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.032895296812057495,
"rewards/margins": 0.026586730033159256,
"rewards/rejected": 0.006308571435511112,
"step": 400
},
{
"epoch": 0.5909377533555535,
"grad_norm": 20.746496200561523,
"learning_rate": 9.715142653202644e-08,
"logits/chosen": -2.3111040592193604,
"logits/rejected": -2.293923854827881,
"logps/chosen": -45.711585998535156,
"logps/rejected": -47.74725341796875,
"loss": 0.6801,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.029646728187799454,
"rewards/margins": 0.028508460149168968,
"rewards/rejected": 0.001138267107307911,
"step": 410
},
{
"epoch": 0.6053508692910549,
"grad_norm": 17.426355361938477,
"learning_rate": 9.68654649996473e-08,
"logits/chosen": -2.32612681388855,
"logits/rejected": -2.314548969268799,
"logps/chosen": -44.34481430053711,
"logps/rejected": -46.076744079589844,
"loss": 0.6775,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.03846784681081772,
"rewards/margins": 0.0339292511343956,
"rewards/rejected": 0.004538598004728556,
"step": 420
},
{
"epoch": 0.6197639852265562,
"grad_norm": 22.193706512451172,
"learning_rate": 9.656629066617335e-08,
"logits/chosen": -2.312929630279541,
"logits/rejected": -2.2936108112335205,
"logps/chosen": -51.8912467956543,
"logps/rejected": -53.661651611328125,
"loss": 0.6762,
"rewards/accuracies": 0.640625,
"rewards/chosen": 0.034915387630462646,
"rewards/margins": 0.0364970937371254,
"rewards/rejected": -0.001581709599122405,
"step": 430
},
{
"epoch": 0.6341771011620575,
"grad_norm": 20.207712173461914,
"learning_rate": 9.62539878779556e-08,
"logits/chosen": -2.3140978813171387,
"logits/rejected": -2.3001463413238525,
"logps/chosen": -46.92143630981445,
"logps/rejected": -49.142173767089844,
"loss": 0.6789,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.0315568745136261,
"rewards/margins": 0.03146868199110031,
"rewards/rejected": 8.819000504445285e-05,
"step": 440
},
{
"epoch": 0.6485902170975588,
"grad_norm": 26.56516456604004,
"learning_rate": 9.592864468265604e-08,
"logits/chosen": -2.3407697677612305,
"logits/rejected": -2.3281524181365967,
"logps/chosen": -48.421573638916016,
"logps/rejected": -50.97784423828125,
"loss": 0.6779,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": 0.033747684210538864,
"rewards/margins": 0.03344957157969475,
"rewards/rejected": 0.0002981135621666908,
"step": 450
},
{
"epoch": 0.6630033330330601,
"grad_norm": 23.340192794799805,
"learning_rate": 9.559035280442441e-08,
"logits/chosen": -2.299325942993164,
"logits/rejected": -2.2764008045196533,
"logps/chosen": -46.117820739746094,
"logps/rejected": -47.99044418334961,
"loss": 0.6785,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.03373030200600624,
"rewards/margins": 0.03273017704486847,
"rewards/rejected": 0.0010001230984926224,
"step": 460
},
{
"epoch": 0.6774164489685613,
"grad_norm": 20.64352798461914,
"learning_rate": 9.523920761803823e-08,
"logits/chosen": -2.3671813011169434,
"logits/rejected": -2.346903085708618,
"logps/chosen": -50.475929260253906,
"logps/rejected": -52.254722595214844,
"loss": 0.6762,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.039838653057813644,
"rewards/margins": 0.036926448345184326,
"rewards/rejected": 0.002912207506597042,
"step": 470
},
{
"epoch": 0.6918295649040627,
"grad_norm": 22.629377365112305,
"learning_rate": 9.487530812201383e-08,
"logits/chosen": -2.3167202472686768,
"logits/rejected": -2.3114185333251953,
"logps/chosen": -47.46406936645508,
"logps/rejected": -51.130619049072266,
"loss": 0.6793,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.036858368664979935,
"rewards/margins": 0.030934974551200867,
"rewards/rejected": 0.005923392251133919,
"step": 480
},
{
"epoch": 0.706242680839564,
"grad_norm": 23.737966537475586,
"learning_rate": 9.449875691069571e-08,
"logits/chosen": -2.3150174617767334,
"logits/rejected": -2.3096041679382324,
"logps/chosen": -47.43329620361328,
"logps/rejected": -51.78876876831055,
"loss": 0.6761,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.041950371116399765,
"rewards/margins": 0.03815682604908943,
"rewards/rejected": 0.003793553216382861,
"step": 490
},
{
"epoch": 0.7206557967750653,
"grad_norm": 22.39310646057129,
"learning_rate": 9.410966014533195e-08,
"logits/chosen": -2.3104045391082764,
"logits/rejected": -2.2926175594329834,
"logps/chosen": -48.61553955078125,
"logps/rejected": -50.52370834350586,
"loss": 0.6772,
"rewards/accuracies": 0.559374988079071,
"rewards/chosen": 0.04291251674294472,
"rewards/margins": 0.03616334870457649,
"rewards/rejected": 0.00674917409196496,
"step": 500
},
{
"epoch": 0.7350689127105666,
"grad_norm": 22.58913803100586,
"learning_rate": 9.37081275241442e-08,
"logits/chosen": -2.308424711227417,
"logits/rejected": -2.2905216217041016,
"logps/chosen": -44.69788360595703,
"logps/rejected": -46.66436004638672,
"loss": 0.679,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": 0.03513062000274658,
"rewards/margins": 0.031742557883262634,
"rewards/rejected": 0.003388059791177511,
"step": 510
},
{
"epoch": 0.7494820286460679,
"grad_norm": 21.075912475585938,
"learning_rate": 9.329427225140042e-08,
"logits/chosen": -2.297698974609375,
"logits/rejected": -2.280587911605835,
"logps/chosen": -44.73003387451172,
"logps/rejected": -47.60186004638672,
"loss": 0.6711,
"rewards/accuracies": 0.5718749761581421,
"rewards/chosen": 0.04519854113459587,
"rewards/margins": 0.04976027458906174,
"rewards/rejected": -0.004561735317111015,
"step": 520
},
{
"epoch": 0.7638951445815693,
"grad_norm": 19.522680282592773,
"learning_rate": 9.286821100549906e-08,
"logits/chosen": -2.307309150695801,
"logits/rejected": -2.272381544113159,
"logps/chosen": -44.07297897338867,
"logps/rejected": -46.60788345336914,
"loss": 0.6686,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.04921981692314148,
"rewards/margins": 0.05518011003732681,
"rewards/rejected": -0.005960285663604736,
"step": 530
},
{
"epoch": 0.7783082605170706,
"grad_norm": 21.739355087280273,
"learning_rate": 9.243006390607402e-08,
"logits/chosen": -2.3263185024261475,
"logits/rejected": -2.316568613052368,
"logps/chosen": -50.72089385986328,
"logps/rejected": -54.4951057434082,
"loss": 0.6673,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": 0.05768689513206482,
"rewards/margins": 0.05719071626663208,
"rewards/rejected": 0.0004961833474226296,
"step": 540
},
{
"epoch": 0.7927213764525718,
"grad_norm": 22.593128204345703,
"learning_rate": 9.197995448012912e-08,
"logits/chosen": -2.340376853942871,
"logits/rejected": -2.319169044494629,
"logps/chosen": -49.77606964111328,
"logps/rejected": -52.580345153808594,
"loss": 0.6677,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.042526066303253174,
"rewards/margins": 0.05721992999315262,
"rewards/rejected": -0.014693861827254295,
"step": 550
},
{
"epoch": 0.8071344923880731,
"grad_norm": 19.409584045410156,
"learning_rate": 9.151800962721217e-08,
"logits/chosen": -2.282543182373047,
"logits/rejected": -2.2616894245147705,
"logps/chosen": -45.87504959106445,
"logps/rejected": -47.72060775756836,
"loss": 0.668,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.04841776564717293,
"rewards/margins": 0.05708543583750725,
"rewards/rejected": -0.008667677640914917,
"step": 560
},
{
"epoch": 0.8215476083235744,
"grad_norm": 23.79241371154785,
"learning_rate": 9.104435958363807e-08,
"logits/chosen": -2.3320469856262207,
"logits/rejected": -2.321155309677124,
"logps/chosen": -45.04407501220703,
"logps/rejected": -47.318321228027344,
"loss": 0.67,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.0415559858083725,
"rewards/margins": 0.053853344172239304,
"rewards/rejected": -0.012297360226511955,
"step": 570
},
{
"epoch": 0.8359607242590757,
"grad_norm": 20.56794548034668,
"learning_rate": 9.055913788577128e-08,
"logits/chosen": -2.300914764404297,
"logits/rejected": -2.283904790878296,
"logps/chosen": -49.61848068237305,
"logps/rejected": -51.29194259643555,
"loss": 0.6718,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": 0.03667648881673813,
"rewards/margins": 0.050280071794986725,
"rewards/rejected": -0.013603581115603447,
"step": 580
},
{
"epoch": 0.8503738401945771,
"grad_norm": 26.347415924072266,
"learning_rate": 9.006248133237782e-08,
"logits/chosen": -2.337028980255127,
"logits/rejected": -2.301743984222412,
"logps/chosen": -47.668113708496094,
"logps/rejected": -48.43418884277344,
"loss": 0.6724,
"rewards/accuracies": 0.578125,
"rewards/chosen": 0.036908913403749466,
"rewards/margins": 0.04973364248871803,
"rewards/rejected": -0.012824726291000843,
"step": 590
},
{
"epoch": 0.8647869561300784,
"grad_norm": 23.15882110595703,
"learning_rate": 8.955452994605753e-08,
"logits/chosen": -2.316641330718994,
"logits/rejected": -2.285348653793335,
"logps/chosen": -50.31378173828125,
"logps/rejected": -52.169654846191406,
"loss": 0.67,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.032930124551057816,
"rewards/margins": 0.05345344543457031,
"rewards/rejected": -0.0205233171582222,
"step": 600
},
{
"epoch": 0.8792000720655797,
"grad_norm": 22.113082885742188,
"learning_rate": 8.903542693376747e-08,
"logits/chosen": -2.2881808280944824,
"logits/rejected": -2.277193784713745,
"logps/chosen": -44.89480209350586,
"logps/rejected": -48.47951126098633,
"loss": 0.6676,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": 0.04625421017408371,
"rewards/margins": 0.06053239107131958,
"rewards/rejected": -0.014278176240622997,
"step": 610
},
{
"epoch": 0.893613188001081,
"grad_norm": 24.942516326904297,
"learning_rate": 8.850531864644748e-08,
"logits/chosen": -2.297983407974243,
"logits/rejected": -2.265835762023926,
"logps/chosen": -45.724159240722656,
"logps/rejected": -48.599876403808594,
"loss": 0.6627,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.047544609755277634,
"rewards/margins": 0.07158304750919342,
"rewards/rejected": -0.024038437753915787,
"step": 620
},
{
"epoch": 0.9080263039365822,
"grad_norm": 21.02975845336914,
"learning_rate": 8.796435453775943e-08,
"logits/chosen": -2.3123021125793457,
"logits/rejected": -2.3164889812469482,
"logps/chosen": -48.549461364746094,
"logps/rejected": -53.92346954345703,
"loss": 0.6717,
"rewards/accuracies": 0.5562499761581421,
"rewards/chosen": 0.031459733843803406,
"rewards/margins": 0.051202088594436646,
"rewards/rejected": -0.01974235475063324,
"step": 630
},
{
"epoch": 0.9224394198720836,
"grad_norm": 21.564970016479492,
"learning_rate": 8.741268712195164e-08,
"logits/chosen": -2.325171947479248,
"logits/rejected": -2.2981820106506348,
"logps/chosen": -45.690956115722656,
"logps/rejected": -49.10464859008789,
"loss": 0.6558,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.05857594683766365,
"rewards/margins": 0.08950765430927277,
"rewards/rejected": -0.030931716784834862,
"step": 640
},
{
"epoch": 0.9368525358075849,
"grad_norm": 22.0821475982666,
"learning_rate": 8.685047193086053e-08,
"logits/chosen": -2.336580276489258,
"logits/rejected": -2.329068660736084,
"logps/chosen": -46.42755126953125,
"logps/rejected": -49.21259307861328,
"loss": 0.672,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.034749384969472885,
"rewards/margins": 0.05111519619822502,
"rewards/rejected": -0.016365814954042435,
"step": 650
},
{
"epoch": 0.9512656517430862,
"grad_norm": 19.028423309326172,
"learning_rate": 8.627786747006144e-08,
"logits/chosen": -2.3250503540039062,
"logits/rejected": -2.3135623931884766,
"logps/chosen": -44.120948791503906,
"logps/rejected": -47.624244689941406,
"loss": 0.6613,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.049294039607048035,
"rewards/margins": 0.07464224100112915,
"rewards/rejected": -0.025348205119371414,
"step": 660
},
{
"epoch": 0.9656787676785875,
"grad_norm": 22.04196548461914,
"learning_rate": 8.569503517418104e-08,
"logits/chosen": -2.31101131439209,
"logits/rejected": -2.2869081497192383,
"logps/chosen": -48.54884719848633,
"logps/rejected": -51.203269958496094,
"loss": 0.6638,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": 0.04563365504145622,
"rewards/margins": 0.0732460767030716,
"rewards/rejected": -0.027612417936325073,
"step": 670
},
{
"epoch": 0.9800918836140888,
"grad_norm": 23.0523624420166,
"learning_rate": 8.510213936138402e-08,
"logits/chosen": -2.271580696105957,
"logits/rejected": -2.249781847000122,
"logps/chosen": -47.92513656616211,
"logps/rejected": -49.661014556884766,
"loss": 0.6593,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": 0.055300354957580566,
"rewards/margins": 0.07859370857477188,
"rewards/rejected": -0.023293355479836464,
"step": 680
},
{
"epoch": 0.9945049995495902,
"grad_norm": 23.048105239868164,
"learning_rate": 8.449934718704685e-08,
"logits/chosen": -2.301927328109741,
"logits/rejected": -2.2857935428619385,
"logps/chosen": -45.03214645385742,
"logps/rejected": -48.306583404541016,
"loss": 0.6656,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.04563567042350769,
"rewards/margins": 0.06622838228940964,
"rewards/rejected": -0.02059270814061165,
"step": 690
},
{
"epoch": 1.0089181154850915,
"grad_norm": 21.652252197265625,
"learning_rate": 8.388682859663152e-08,
"logits/chosen": -2.284125804901123,
"logits/rejected": -2.2744266986846924,
"logps/chosen": -45.62473678588867,
"logps/rejected": -48.55522918701172,
"loss": 0.6568,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.053375959396362305,
"rewards/margins": 0.08940999954938889,
"rewards/rejected": -0.03603404015302658,
"step": 700
},
{
"epoch": 1.0233312314205927,
"grad_norm": 22.383621215820312,
"learning_rate": 8.326475627777277e-08,
"logits/chosen": -2.289461612701416,
"logits/rejected": -2.2808353900909424,
"logps/chosen": -46.80120849609375,
"logps/rejected": -50.2525520324707,
"loss": 0.6506,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.05878598242998123,
"rewards/margins": 0.10358556360006332,
"rewards/rejected": -0.04479958117008209,
"step": 710
},
{
"epoch": 1.037744347356094,
"grad_norm": 21.419694900512695,
"learning_rate": 8.26333056115922e-08,
"logits/chosen": -2.3371403217315674,
"logits/rejected": -2.312253475189209,
"logps/chosen": -47.29637908935547,
"logps/rejected": -48.51680374145508,
"loss": 0.6598,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": 0.04448707401752472,
"rewards/margins": 0.08294164389371872,
"rewards/rejected": -0.038454569876194,
"step": 720
},
{
"epoch": 1.0521574632915953,
"grad_norm": 25.013229370117188,
"learning_rate": 8.1992654623253e-08,
"logits/chosen": -2.3014025688171387,
"logits/rejected": -2.2878174781799316,
"logps/chosen": -44.94810104370117,
"logps/rejected": -50.638641357421875,
"loss": 0.6423,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.050335705280303955,
"rewards/margins": 0.12491028010845184,
"rewards/rejected": -0.07457457482814789,
"step": 730
},
{
"epoch": 1.0665705792270967,
"grad_norm": 21.155508041381836,
"learning_rate": 8.134298393176915e-08,
"logits/chosen": -2.2642767429351807,
"logits/rejected": -2.2350783348083496,
"logps/chosen": -44.5008659362793,
"logps/rejected": -48.16975021362305,
"loss": 0.6497,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.045155756175518036,
"rewards/margins": 0.10517505556344986,
"rewards/rejected": -0.06001930311322212,
"step": 740
},
{
"epoch": 1.080983695162598,
"grad_norm": 20.4642276763916,
"learning_rate": 8.068447669908356e-08,
"logits/chosen": -2.2687973976135254,
"logits/rejected": -2.2399725914001465,
"logps/chosen": -49.161521911621094,
"logps/rejected": -51.75724411010742,
"loss": 0.651,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": 0.048742182552814484,
"rewards/margins": 0.10028008371591568,
"rewards/rejected": -0.0515378937125206,
"step": 750
},
{
"epoch": 1.0953968110980994,
"grad_norm": 22.40403938293457,
"learning_rate": 8.001731857842906e-08,
"logits/chosen": -2.273935317993164,
"logits/rejected": -2.265725612640381,
"logps/chosen": -48.28545379638672,
"logps/rejected": -49.2262077331543,
"loss": 0.6661,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.03592974692583084,
"rewards/margins": 0.0714699849486351,
"rewards/rejected": -0.03554024174809456,
"step": 760
},
{
"epoch": 1.1098099270336006,
"grad_norm": 23.13152503967285,
"learning_rate": 7.934169766198712e-08,
"logits/chosen": -2.3057303428649902,
"logits/rejected": -2.281688690185547,
"logps/chosen": -46.85943603515625,
"logps/rejected": -50.839698791503906,
"loss": 0.6527,
"rewards/accuracies": 0.6031249761581421,
"rewards/chosen": 0.0495249405503273,
"rewards/margins": 0.09827858954668045,
"rewards/rejected": -0.04875364899635315,
"step": 770
},
{
"epoch": 1.1242230429691018,
"grad_norm": 26.3281192779541,
"learning_rate": 7.86578044278589e-08,
"logits/chosen": -2.320192575454712,
"logits/rejected": -2.297276735305786,
"logps/chosen": -48.3865966796875,
"logps/rejected": -51.7015380859375,
"loss": 0.6443,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.057862721383571625,
"rewards/margins": 0.11766588687896729,
"rewards/rejected": -0.05980316549539566,
"step": 780
},
{
"epoch": 1.1386361589046032,
"grad_norm": 24.804874420166016,
"learning_rate": 7.796583168636375e-08,
"logits/chosen": -2.3215794563293457,
"logits/rejected": -2.305657148361206,
"logps/chosen": -48.32183074951172,
"logps/rejected": -52.143150329589844,
"loss": 0.6451,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.04665009304881096,
"rewards/margins": 0.11924920231103897,
"rewards/rejected": -0.07259909808635712,
"step": 790
},
{
"epoch": 1.1530492748401044,
"grad_norm": 18.46070671081543,
"learning_rate": 7.726597452568007e-08,
"logits/chosen": -2.300274610519409,
"logits/rejected": -2.2758469581604004,
"logps/chosen": -46.92626190185547,
"logps/rejected": -50.41767501831055,
"loss": 0.6505,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.04316678270697594,
"rewards/margins": 0.10699989646673203,
"rewards/rejected": -0.06383311003446579,
"step": 800
},
{
"epoch": 1.1674623907756059,
"grad_norm": 25.514638900756836,
"learning_rate": 7.655843025684402e-08,
"logits/chosen": -2.319176435470581,
"logits/rejected": -2.308589458465576,
"logps/chosen": -48.81470489501953,
"logps/rejected": -52.32573318481445,
"loss": 0.6545,
"rewards/accuracies": 0.59375,
"rewards/chosen": 0.04523879662156105,
"rewards/margins": 0.09590532630681992,
"rewards/rejected": -0.050666533410549164,
"step": 810
},
{
"epoch": 1.181875506711107,
"grad_norm": 19.846221923828125,
"learning_rate": 7.584339835812151e-08,
"logits/chosen": -2.2872939109802246,
"logits/rejected": -2.2704885005950928,
"logps/chosen": -48.955596923828125,
"logps/rejected": -49.713871002197266,
"loss": 0.6512,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.0468595027923584,
"rewards/margins": 0.1036025881767273,
"rewards/rejected": -0.0567430779337883,
"step": 820
},
{
"epoch": 1.1962886226466085,
"grad_norm": 21.35214614868164,
"learning_rate": 7.512108041876924e-08,
"logits/chosen": -2.253537654876709,
"logits/rejected": -2.2463467121124268,
"logps/chosen": -43.18073654174805,
"logps/rejected": -45.50495147705078,
"loss": 0.6565,
"rewards/accuracies": 0.5843750238418579,
"rewards/chosen": 0.0258056428283453,
"rewards/margins": 0.09474330395460129,
"rewards/rejected": -0.06893765181303024,
"step": 830
},
{
"epoch": 1.2107017385821097,
"grad_norm": 21.493972778320312,
"learning_rate": 7.439168008220056e-08,
"logits/chosen": -2.293834686279297,
"logits/rejected": -2.276794195175171,
"logps/chosen": -45.07364273071289,
"logps/rejected": -49.38530731201172,
"loss": 0.6437,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": 0.04228875786066055,
"rewards/margins": 0.12463889271020889,
"rewards/rejected": -0.08235013484954834,
"step": 840
},
{
"epoch": 1.225114854517611,
"grad_norm": 19.528154373168945,
"learning_rate": 7.365540298857215e-08,
"logits/chosen": -2.2884204387664795,
"logits/rejected": -2.274423837661743,
"logps/chosen": -50.20172119140625,
"logps/rejected": -53.54350662231445,
"loss": 0.6309,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": 0.06707637757062912,
"rewards/margins": 0.15372148156166077,
"rewards/rejected": -0.08664509654045105,
"step": 850
},
{
"epoch": 1.2395279704531124,
"grad_norm": 18.76193618774414,
"learning_rate": 7.291245671680781e-08,
"logits/chosen": -2.2782771587371826,
"logits/rejected": -2.2508130073547363,
"logps/chosen": -42.898929595947266,
"logps/rejected": -46.70447540283203,
"loss": 0.6438,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.046091653406620026,
"rewards/margins": 0.12635795772075653,
"rewards/rejected": -0.08026628941297531,
"step": 860
},
{
"epoch": 1.2539410863886136,
"grad_norm": 21.306549072265625,
"learning_rate": 7.216305072607568e-08,
"logits/chosen": -2.310192823410034,
"logits/rejected": -2.2984118461608887,
"logps/chosen": -47.682533264160156,
"logps/rejected": -50.96216583251953,
"loss": 0.6397,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.029919151216745377,
"rewards/margins": 0.13648976385593414,
"rewards/rejected": -0.10657060146331787,
"step": 870
},
{
"epoch": 1.268354202324115,
"grad_norm": 23.378387451171875,
"learning_rate": 7.14073962967353e-08,
"logits/chosen": -2.3239712715148926,
"logits/rejected": -2.2972521781921387,
"logps/chosen": -51.19306564331055,
"logps/rejected": -52.882469177246094,
"loss": 0.6301,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.04189233481884003,
"rewards/margins": 0.16474562883377075,
"rewards/rejected": -0.12285330146551132,
"step": 880
},
{
"epoch": 1.2827673182596162,
"grad_norm": 25.997648239135742,
"learning_rate": 7.064570647077124e-08,
"logits/chosen": -2.3059449195861816,
"logits/rejected": -2.2785696983337402,
"logps/chosen": -48.22880935668945,
"logps/rejected": -50.62089538574219,
"loss": 0.6557,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.02739674784243107,
"rewards/margins": 0.09727232158184052,
"rewards/rejected": -0.06987558305263519,
"step": 890
},
{
"epoch": 1.2971804341951176,
"grad_norm": 25.10089111328125,
"learning_rate": 6.987819599173006e-08,
"logits/chosen": -2.296082019805908,
"logits/rejected": -2.2839462757110596,
"logps/chosen": -46.80525207519531,
"logps/rejected": -51.168792724609375,
"loss": 0.6608,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.037016816437244415,
"rewards/margins": 0.09267648309469223,
"rewards/rejected": -0.05565967038273811,
"step": 900
},
{
"epoch": 1.3115935501306188,
"grad_norm": 23.599550247192383,
"learning_rate": 6.910508124417765e-08,
"logits/chosen": -2.268026113510132,
"logits/rejected": -2.2599265575408936,
"logps/chosen": -43.31938934326172,
"logps/rejected": -47.721927642822266,
"loss": 0.6397,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.03140413016080856,
"rewards/margins": 0.13336870074272156,
"rewards/rejected": -0.101964570581913,
"step": 910
},
{
"epoch": 1.32600666606612,
"grad_norm": 17.291772842407227,
"learning_rate": 6.832658019269373e-08,
"logits/chosen": -2.2494916915893555,
"logits/rejected": -2.225579261779785,
"logps/chosen": -45.836544036865234,
"logps/rejected": -48.53606414794922,
"loss": 0.6422,
"rewards/accuracies": 0.640625,
"rewards/chosen": 0.022828983142971992,
"rewards/margins": 0.13301518559455872,
"rewards/rejected": -0.11018619686365128,
"step": 920
},
{
"epoch": 1.3404197820016215,
"grad_norm": 21.943674087524414,
"learning_rate": 6.75429123204211e-08,
"logits/chosen": -2.2929205894470215,
"logits/rejected": -2.2728652954101562,
"logps/chosen": -46.83967208862305,
"logps/rejected": -51.28687286376953,
"loss": 0.6367,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": 0.023386573418974876,
"rewards/margins": 0.14642710983753204,
"rewards/rejected": -0.12304054200649261,
"step": 930
},
{
"epoch": 1.354832897937123,
"grad_norm": 23.507673263549805,
"learning_rate": 6.675429856718652e-08,
"logits/chosen": -2.2670910358428955,
"logits/rejected": -2.2336556911468506,
"logps/chosen": -45.63422393798828,
"logps/rejected": -48.890968322753906,
"loss": 0.6405,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.033365607261657715,
"rewards/margins": 0.13297542929649353,
"rewards/rejected": -0.09960982948541641,
"step": 940
},
{
"epoch": 1.3692460138726241,
"grad_norm": 18.229249954223633,
"learning_rate": 6.596096126721123e-08,
"logits/chosen": -2.2298777103424072,
"logits/rejected": -2.223173141479492,
"logps/chosen": -45.84309768676758,
"logps/rejected": -50.1830940246582,
"loss": 0.6405,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.03936393931508064,
"rewards/margins": 0.13684986531734467,
"rewards/rejected": -0.09748590737581253,
"step": 950
},
{
"epoch": 1.3836591298081253,
"grad_norm": 19.752758026123047,
"learning_rate": 6.516312408642804e-08,
"logits/chosen": -2.279609203338623,
"logits/rejected": -2.279059410095215,
"logps/chosen": -42.76288604736328,
"logps/rejected": -48.05985641479492,
"loss": 0.6364,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": 0.02452887035906315,
"rewards/margins": 0.1416589766740799,
"rewards/rejected": -0.1171300858259201,
"step": 960
},
{
"epoch": 1.3980722457436268,
"grad_norm": 23.075225830078125,
"learning_rate": 6.436101195942312e-08,
"logits/chosen": -2.278662919998169,
"logits/rejected": -2.2638564109802246,
"logps/chosen": -48.03376007080078,
"logps/rejected": -50.43840789794922,
"loss": 0.6507,
"rewards/accuracies": 0.621874988079071,
"rewards/chosen": 0.02580978348851204,
"rewards/margins": 0.11886006593704224,
"rewards/rejected": -0.0930502638220787,
"step": 970
},
{
"epoch": 1.412485361679128,
"grad_norm": 23.39198875427246,
"learning_rate": 6.35548510260201e-08,
"logits/chosen": -2.252816677093506,
"logits/rejected": -2.236603260040283,
"logps/chosen": -44.25969314575195,
"logps/rejected": -48.64405059814453,
"loss": 0.6368,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": 0.021795066073536873,
"rewards/margins": 0.155609592795372,
"rewards/rejected": -0.1338145136833191,
"step": 980
},
{
"epoch": 1.4268984776146292,
"grad_norm": 19.332801818847656,
"learning_rate": 6.274486856752442e-08,
"logits/chosen": -2.2814013957977295,
"logits/rejected": -2.268404483795166,
"logps/chosen": -43.803199768066406,
"logps/rejected": -48.746334075927734,
"loss": 0.6393,
"rewards/accuracies": 0.640625,
"rewards/chosen": 0.0010702230501919985,
"rewards/margins": 0.15793892741203308,
"rewards/rejected": -0.15686871111392975,
"step": 990
},
{
"epoch": 1.4413115935501306,
"grad_norm": 17.606380462646484,
"learning_rate": 6.193129294264568e-08,
"logits/chosen": -2.286543130874634,
"logits/rejected": -2.2696192264556885,
"logps/chosen": -43.068660736083984,
"logps/rejected": -47.965354919433594,
"loss": 0.6332,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.026370450854301453,
"rewards/margins": 0.15689751505851746,
"rewards/rejected": -0.130527064204216,
"step": 1000
},
{
"epoch": 1.455724709485632,
"grad_norm": 24.786046981811523,
"learning_rate": 6.111435352311653e-08,
"logits/chosen": -2.2797257900238037,
"logits/rejected": -2.264954090118408,
"logps/chosen": -45.28757095336914,
"logps/rejected": -49.00934600830078,
"loss": 0.6405,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.010084993205964565,
"rewards/margins": 0.14780084788799286,
"rewards/rejected": -0.13771584630012512,
"step": 1010
},
{
"epoch": 1.4701378254211332,
"grad_norm": 26.044845581054688,
"learning_rate": 6.02942806290257e-08,
"logits/chosen": -2.2898941040039062,
"logits/rejected": -2.2842609882354736,
"logps/chosen": -48.29700469970703,
"logps/rejected": -52.98006057739258,
"loss": 0.6511,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.018248267471790314,
"rewards/margins": 0.12896670401096344,
"rewards/rejected": -0.11071842908859253,
"step": 1020
},
{
"epoch": 1.4845509413566345,
"grad_norm": 16.25486946105957,
"learning_rate": 5.947130546388376e-08,
"logits/chosen": -2.2709572315216064,
"logits/rejected": -2.2334933280944824,
"logps/chosen": -47.84028244018555,
"logps/rejected": -49.67833709716797,
"loss": 0.6366,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.009173278696835041,
"rewards/margins": 0.1511419117450714,
"rewards/rejected": -0.14196862280368805,
"step": 1030
},
{
"epoch": 1.4989640572921359,
"grad_norm": 21.530025482177734,
"learning_rate": 5.864566004943983e-08,
"logits/chosen": -2.2685012817382812,
"logits/rejected": -2.240506649017334,
"logps/chosen": -50.17546463012695,
"logps/rejected": -53.723243713378906,
"loss": 0.6373,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": 0.00729725044220686,
"rewards/margins": 0.15390922129154205,
"rewards/rejected": -0.14661197364330292,
"step": 1040
},
{
"epoch": 1.513377173227637,
"grad_norm": 23.661149978637695,
"learning_rate": 5.78175771602676e-08,
"logits/chosen": -2.2800815105438232,
"logits/rejected": -2.277493953704834,
"logps/chosen": -43.04485321044922,
"logps/rejected": -46.71100997924805,
"loss": 0.6347,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": 0.01757994294166565,
"rewards/margins": 0.1627650111913681,
"rewards/rejected": -0.14518508315086365,
"step": 1050
},
{
"epoch": 1.5277902891631383,
"grad_norm": 23.422590255737305,
"learning_rate": 5.6987290258139073e-08,
"logits/chosen": -2.2339248657226562,
"logits/rejected": -2.2009925842285156,
"logps/chosen": -48.11365509033203,
"logps/rejected": -51.516197204589844,
"loss": 0.6384,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.02754228189587593,
"rewards/margins": 0.15339332818984985,
"rewards/rejected": -0.12585103511810303,
"step": 1060
},
{
"epoch": 1.5422034050986397,
"grad_norm": 27.1705379486084,
"learning_rate": 5.6155033426204615e-08,
"logits/chosen": -2.2623772621154785,
"logits/rejected": -2.2473068237304688,
"logps/chosen": -51.77325439453125,
"logps/rejected": -54.96337127685547,
"loss": 0.6343,
"rewards/accuracies": 0.6156250238418579,
"rewards/chosen": 0.01034320704638958,
"rewards/margins": 0.15684732794761658,
"rewards/rejected": -0.14650413393974304,
"step": 1070
},
{
"epoch": 1.5566165210341412,
"grad_norm": 25.371421813964844,
"learning_rate": 5.532104130299771e-08,
"logits/chosen": -2.26564359664917,
"logits/rejected": -2.250556230545044,
"logps/chosen": -51.636810302734375,
"logps/rejected": -54.49198532104492,
"loss": 0.634,
"rewards/accuracies": 0.671875,
"rewards/chosen": 0.014989291317760944,
"rewards/margins": 0.16550514101982117,
"rewards/rejected": -0.1505158692598343,
"step": 1080
},
{
"epoch": 1.5710296369696424,
"grad_norm": 26.66484260559082,
"learning_rate": 5.448554901628333e-08,
"logits/chosen": -2.2615230083465576,
"logits/rejected": -2.251753330230713,
"logps/chosen": -48.62981033325195,
"logps/rejected": -53.1638069152832,
"loss": 0.6483,
"rewards/accuracies": 0.609375,
"rewards/chosen": 0.001556683098897338,
"rewards/margins": 0.1258353888988495,
"rewards/rejected": -0.124278724193573,
"step": 1090
},
{
"epoch": 1.5854427529051436,
"grad_norm": 23.419185638427734,
"learning_rate": 5.364879211676816e-08,
"logits/chosen": -2.2811994552612305,
"logits/rejected": -2.272005558013916,
"logps/chosen": -48.42817687988281,
"logps/rejected": -53.677734375,
"loss": 0.6268,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": 0.036996982991695404,
"rewards/margins": 0.17727255821228027,
"rewards/rejected": -0.14027558267116547,
"step": 1100
},
{
"epoch": 1.599855868840645,
"grad_norm": 25.174808502197266,
"learning_rate": 5.281100651169175e-08,
"logits/chosen": -2.290276527404785,
"logits/rejected": -2.277024030685425,
"logps/chosen": -50.967430114746094,
"logps/rejected": -55.05602264404297,
"loss": 0.6327,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.041182320564985275,
"rewards/margins": 0.17106422781944275,
"rewards/rejected": -0.12988190352916718,
"step": 1110
},
{
"epoch": 1.6142689847761464,
"grad_norm": 20.062232971191406,
"learning_rate": 5.197242839831706e-08,
"logits/chosen": -2.2445547580718994,
"logits/rejected": -2.235886335372925,
"logps/chosen": -45.176246643066406,
"logps/rejected": -50.29578399658203,
"loss": 0.6329,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.004402970429509878,
"rewards/margins": 0.17448854446411133,
"rewards/rejected": -0.17889150977134705,
"step": 1120
},
{
"epoch": 1.6286821007116477,
"grad_norm": 24.2050838470459,
"learning_rate": 5.1133294197339274e-08,
"logits/chosen": -2.2965331077575684,
"logits/rejected": -2.269667387008667,
"logps/chosen": -47.03449630737305,
"logps/rejected": -50.98591232299805,
"loss": 0.6476,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.0063371858559548855,
"rewards/margins": 0.13688282668590546,
"rewards/rejected": -0.13054564595222473,
"step": 1130
},
{
"epoch": 1.6430952166471489,
"grad_norm": 22.103654861450195,
"learning_rate": 5.029384048623153e-08,
"logits/chosen": -2.253809690475464,
"logits/rejected": -2.233123779296875,
"logps/chosen": -48.81449508666992,
"logps/rejected": -52.2353515625,
"loss": 0.649,
"rewards/accuracies": 0.640625,
"rewards/chosen": 0.0027180719189345837,
"rewards/margins": 0.13031043112277985,
"rewards/rejected": -0.12759235501289368,
"step": 1140
},
{
"epoch": 1.6575083325826503,
"grad_norm": 19.620878219604492,
"learning_rate": 4.9454303932546675e-08,
"logits/chosen": -2.2449498176574707,
"logits/rejected": -2.2211432456970215,
"logps/chosen": -47.44291687011719,
"logps/rejected": -48.738990783691406,
"loss": 0.6305,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": 0.014069506898522377,
"rewards/margins": 0.18172375857830048,
"rewards/rejected": -0.16765424609184265,
"step": 1150
},
{
"epoch": 1.6719214485181515,
"grad_norm": 22.302276611328125,
"learning_rate": 4.861492122719338e-08,
"logits/chosen": -2.2866158485412598,
"logits/rejected": -2.264674425125122,
"logps/chosen": -49.207786560058594,
"logps/rejected": -52.398353576660156,
"loss": 0.6408,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.012931051664054394,
"rewards/margins": 0.15574631094932556,
"rewards/rejected": -0.1428152620792389,
"step": 1160
},
{
"epoch": 1.6863345644536527,
"grad_norm": 23.777292251586914,
"learning_rate": 4.777592901770575e-08,
"logits/chosen": -2.2917380332946777,
"logits/rejected": -2.276853561401367,
"logps/chosen": -42.12696075439453,
"logps/rejected": -46.638572692871094,
"loss": 0.6227,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.00846984051167965,
"rewards/margins": 0.20765790343284607,
"rewards/rejected": -0.19918808341026306,
"step": 1170
},
{
"epoch": 1.7007476803891541,
"grad_norm": 25.883188247680664,
"learning_rate": 4.693756384152529e-08,
"logits/chosen": -2.254065752029419,
"logits/rejected": -2.2268402576446533,
"logps/chosen": -49.0328483581543,
"logps/rejected": -53.10075759887695,
"loss": 0.6234,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.0014540397096425295,
"rewards/margins": 0.19669906795024872,
"rewards/rejected": -0.19815312325954437,
"step": 1180
},
{
"epoch": 1.7151607963246556,
"grad_norm": 20.475727081298828,
"learning_rate": 4.610006205931365e-08,
"logits/chosen": -2.2955899238586426,
"logits/rejected": -2.279146194458008,
"logps/chosen": -50.25695037841797,
"logps/rejected": -52.637916564941406,
"loss": 0.6331,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0065743387676775455,
"rewards/margins": 0.1675548255443573,
"rewards/rejected": -0.1609804779291153,
"step": 1190
},
{
"epoch": 1.7295739122601568,
"grad_norm": 24.313840866088867,
"learning_rate": 4.526365978831551e-08,
"logits/chosen": -2.2783608436584473,
"logits/rejected": -2.2607553005218506,
"logps/chosen": -47.65949249267578,
"logps/rejected": -51.959983825683594,
"loss": 0.631,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.00622877012938261,
"rewards/margins": 0.17043979465961456,
"rewards/rejected": -0.17666855454444885,
"step": 1200
},
{
"epoch": 1.743987028195658,
"grad_norm": 20.83986473083496,
"learning_rate": 4.442859283578981e-08,
"logits/chosen": -2.2713427543640137,
"logits/rejected": -2.252413511276245,
"logps/chosen": -45.476287841796875,
"logps/rejected": -49.17133331298828,
"loss": 0.6303,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.020858388394117355,
"rewards/margins": 0.1802762746810913,
"rewards/rejected": -0.20113465189933777,
"step": 1210
},
{
"epoch": 1.7584001441311594,
"grad_norm": 23.378725051879883,
"learning_rate": 4.359509663252864e-08,
"logits/chosen": -2.251262903213501,
"logits/rejected": -2.2400929927825928,
"logps/chosen": -46.6330451965332,
"logps/rejected": -51.300270080566406,
"loss": 0.64,
"rewards/accuracies": 0.6187499761581421,
"rewards/chosen": -0.014711695723235607,
"rewards/margins": 0.15496547520160675,
"rewards/rejected": -0.16967716813087463,
"step": 1220
},
{
"epoch": 1.7728132600666606,
"grad_norm": 20.105392456054688,
"learning_rate": 4.276340616648198e-08,
"logits/chosen": -2.3081746101379395,
"logits/rejected": -2.2849628925323486,
"logps/chosen": -46.36747360229492,
"logps/rejected": -48.48231506347656,
"loss": 0.6288,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": 0.00806148536503315,
"rewards/margins": 0.18995113670825958,
"rewards/rejected": -0.18188965320587158,
"step": 1230
},
{
"epoch": 1.7872263760021618,
"grad_norm": 20.087594985961914,
"learning_rate": 4.193375591650758e-08,
"logits/chosen": -2.2986416816711426,
"logits/rejected": -2.275165557861328,
"logps/chosen": -51.384132385253906,
"logps/rejected": -54.01851272583008,
"loss": 0.6311,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.004061401821672916,
"rewards/margins": 0.17897441983222961,
"rewards/rejected": -0.18303583562374115,
"step": 1240
},
{
"epoch": 1.8016394919376633,
"grad_norm": 21.758268356323242,
"learning_rate": 4.110637978626415e-08,
"logits/chosen": -2.257802963256836,
"logits/rejected": -2.2421045303344727,
"logps/chosen": -44.12052536010742,
"logps/rejected": -49.74687576293945,
"loss": 0.6012,
"rewards/accuracies": 0.703125,
"rewards/chosen": 0.036210522055625916,
"rewards/margins": 0.24105677008628845,
"rewards/rejected": -0.20484623312950134,
"step": 1250
},
{
"epoch": 1.8160526078731647,
"grad_norm": 19.154407501220703,
"learning_rate": 4.0281511038266867e-08,
"logits/chosen": -2.201878309249878,
"logits/rejected": -2.183927059173584,
"logps/chosen": -49.49979782104492,
"logps/rejected": -53.208091735839844,
"loss": 0.6219,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.014157836325466633,
"rewards/margins": 0.20892783999443054,
"rewards/rejected": -0.2230856716632843,
"step": 1260
},
{
"epoch": 1.830465723808666,
"grad_norm": 19.655216217041016,
"learning_rate": 3.9459382228123475e-08,
"logits/chosen": -2.2442800998687744,
"logits/rejected": -2.238391160964966,
"logps/chosen": -42.37008285522461,
"logps/rejected": -47.834983825683594,
"loss": 0.6055,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": 0.017191831022500992,
"rewards/margins": 0.2501511871814728,
"rewards/rejected": -0.2329593151807785,
"step": 1270
},
{
"epoch": 1.844878839744167,
"grad_norm": 24.132625579833984,
"learning_rate": 3.864022513896989e-08,
"logits/chosen": -2.255995750427246,
"logits/rejected": -2.223539113998413,
"logps/chosen": -44.1163330078125,
"logps/rejected": -47.280296325683594,
"loss": 0.6397,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.004155662842094898,
"rewards/margins": 0.1677740067243576,
"rewards/rejected": -0.17192967236042023,
"step": 1280
},
{
"epoch": 1.8592919556796685,
"grad_norm": 24.518510818481445,
"learning_rate": 3.782427071612339e-08,
"logits/chosen": -2.278082847595215,
"logits/rejected": -2.2593398094177246,
"logps/chosen": -48.02725601196289,
"logps/rejected": -51.3366813659668,
"loss": 0.6259,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.005302610341459513,
"rewards/margins": 0.19030724465847015,
"rewards/rejected": -0.19560983777046204,
"step": 1290
},
{
"epoch": 1.87370507161517,
"grad_norm": 23.40630531311035,
"learning_rate": 3.7011749001972174e-08,
"logits/chosen": -2.2769112586975098,
"logits/rejected": -2.2606639862060547,
"logps/chosen": -43.46620559692383,
"logps/rejected": -47.172611236572266,
"loss": 0.6354,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": -4.972890019416809e-05,
"rewards/margins": 0.1833050400018692,
"rewards/rejected": -0.18335476517677307,
"step": 1300
},
{
"epoch": 1.888118187550671,
"grad_norm": 22.18378448486328,
"learning_rate": 3.620288907111931e-08,
"logits/chosen": -2.250119686126709,
"logits/rejected": -2.2260870933532715,
"logps/chosen": -45.36948776245117,
"logps/rejected": -48.90538787841797,
"loss": 0.6175,
"rewards/accuracies": 0.6343749761581421,
"rewards/chosen": -0.0030071833170950413,
"rewards/margins": 0.2247433215379715,
"rewards/rejected": -0.2277504950761795,
"step": 1310
},
{
"epoch": 1.9025313034861724,
"grad_norm": 25.240890502929688,
"learning_rate": 3.539791896579978e-08,
"logits/chosen": -2.2886531352996826,
"logits/rejected": -2.2704544067382812,
"logps/chosen": -51.9533805847168,
"logps/rejected": -54.27134323120117,
"loss": 0.6407,
"rewards/accuracies": 0.590624988079071,
"rewards/chosen": -0.04531291872262955,
"rewards/margins": 0.1595449447631836,
"rewards/rejected": -0.20485787093639374,
"step": 1320
},
{
"epoch": 1.9169444194216738,
"grad_norm": 20.785062789916992,
"learning_rate": 3.459706563158828e-08,
"logits/chosen": -2.2486228942871094,
"logits/rejected": -2.2339444160461426,
"logps/chosen": -51.625091552734375,
"logps/rejected": -56.50333786010742,
"loss": 0.6202,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.01646718569099903,
"rewards/margins": 0.2016635239124298,
"rewards/rejected": -0.2181307077407837,
"step": 1330
},
{
"epoch": 1.931357535357175,
"grad_norm": 18.25945281982422,
"learning_rate": 3.380055485341644e-08,
"logits/chosen": -2.2824645042419434,
"logits/rejected": -2.2728466987609863,
"logps/chosen": -47.70806121826172,
"logps/rejected": -52.762123107910156,
"loss": 0.6396,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.014143446460366249,
"rewards/margins": 0.1655816286802292,
"rewards/rejected": -0.17972508072853088,
"step": 1340
},
{
"epoch": 1.9457706512926762,
"grad_norm": 27.237255096435547,
"learning_rate": 3.300861119191718e-08,
"logits/chosen": -2.2605082988739014,
"logits/rejected": -2.240718126296997,
"logps/chosen": -50.2346076965332,
"logps/rejected": -52.86682891845703,
"loss": 0.6388,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.022719861939549446,
"rewards/margins": 0.16368316113948822,
"rewards/rejected": -0.18640300631523132,
"step": 1350
},
{
"epoch": 1.9601837672281777,
"grad_norm": 21.566373825073242,
"learning_rate": 3.2221457920114213e-08,
"logits/chosen": -2.281573534011841,
"logits/rejected": -2.260481595993042,
"logps/chosen": -44.61939239501953,
"logps/rejected": -49.09664535522461,
"loss": 0.6305,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.01838817074894905,
"rewards/margins": 0.18449218571186066,
"rewards/rejected": -0.2028803527355194,
"step": 1360
},
{
"epoch": 1.974596883163679,
"grad_norm": 22.27100944519043,
"learning_rate": 3.143931696047454e-08,
"logits/chosen": -2.26967191696167,
"logits/rejected": -2.254939079284668,
"logps/chosen": -44.52263259887695,
"logps/rejected": -48.41568374633789,
"loss": 0.6323,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.027285214513540268,
"rewards/margins": 0.18556539714336395,
"rewards/rejected": -0.21285061538219452,
"step": 1370
},
{
"epoch": 1.9890099990991803,
"grad_norm": 26.420007705688477,
"learning_rate": 3.066240882234186e-08,
"logits/chosen": -2.2749416828155518,
"logits/rejected": -2.275374412536621,
"logps/chosen": -49.047096252441406,
"logps/rejected": -54.0599250793457,
"loss": 0.6264,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": -0.008531761355698109,
"rewards/margins": 0.18480119109153748,
"rewards/rejected": -0.1933329850435257,
"step": 1380
},
{
"epoch": 2.0034231150346815,
"grad_norm": 21.0603084564209,
"learning_rate": 2.989095253976816e-08,
"logits/chosen": -2.261396884918213,
"logits/rejected": -2.252016305923462,
"logps/chosen": -47.794715881347656,
"logps/rejected": -51.5122184753418,
"loss": 0.6434,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.034887559711933136,
"rewards/margins": 0.15125319361686707,
"rewards/rejected": -0.1861407607793808,
"step": 1390
},
{
"epoch": 2.017836230970183,
"grad_norm": 21.241836547851562,
"learning_rate": 2.912516560976146e-08,
"logits/chosen": -2.236016035079956,
"logits/rejected": -2.2192585468292236,
"logps/chosen": -45.089847564697266,
"logps/rejected": -50.58442306518555,
"loss": 0.6138,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.017469603568315506,
"rewards/margins": 0.23276808857917786,
"rewards/rejected": -0.25023767352104187,
"step": 1400
},
{
"epoch": 2.0322493469056844,
"grad_norm": 21.38671112060547,
"learning_rate": 2.836526393096661e-08,
"logits/chosen": -2.2866103649139404,
"logits/rejected": -2.277172088623047,
"logps/chosen": -47.35311508178711,
"logps/rejected": -52.31220626831055,
"loss": 0.6238,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.022761840373277664,
"rewards/margins": 0.19283010065555573,
"rewards/rejected": -0.2155919373035431,
"step": 1410
},
{
"epoch": 2.0466624628411854,
"grad_norm": 21.73073959350586,
"learning_rate": 2.7611461742797165e-08,
"logits/chosen": -2.2647812366485596,
"logits/rejected": -2.2458884716033936,
"logps/chosen": -42.06100082397461,
"logps/rejected": -46.326446533203125,
"loss": 0.6044,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": 0.0016145706176757812,
"rewards/margins": 0.2537813186645508,
"rewards/rejected": -0.2521667778491974,
"step": 1420
},
{
"epoch": 2.061075578776687,
"grad_norm": 26.361936569213867,
"learning_rate": 2.686397156503445e-08,
"logits/chosen": -2.266921043395996,
"logits/rejected": -2.2467474937438965,
"logps/chosen": -45.512332916259766,
"logps/rejected": -49.53184127807617,
"loss": 0.6322,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.04077625274658203,
"rewards/margins": 0.18639865517616272,
"rewards/rejected": -0.22717487812042236,
"step": 1430
},
{
"epoch": 2.075488694712188,
"grad_norm": 22.614898681640625,
"learning_rate": 2.6123004137912084e-08,
"logits/chosen": -2.2463908195495605,
"logits/rejected": -2.2374794483184814,
"logps/chosen": -44.685935974121094,
"logps/rejected": -49.08884811401367,
"loss": 0.6136,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": 0.0009622380021028221,
"rewards/margins": 0.20883917808532715,
"rewards/rejected": -0.20787692070007324,
"step": 1440
},
{
"epoch": 2.089901810647689,
"grad_norm": 24.510360717773438,
"learning_rate": 2.5388768362701585e-08,
"logits/chosen": -2.240734100341797,
"logits/rejected": -2.229846954345703,
"logps/chosen": -50.42451095581055,
"logps/rejected": -54.28529739379883,
"loss": 0.634,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": 0.0019600100349634886,
"rewards/margins": 0.17666472494602203,
"rewards/rejected": -0.17470474541187286,
"step": 1450
},
{
"epoch": 2.1043149265831906,
"grad_norm": 19.324567794799805,
"learning_rate": 2.466147124281703e-08,
"logits/chosen": -2.314894437789917,
"logits/rejected": -2.2869911193847656,
"logps/chosen": -46.29553985595703,
"logps/rejected": -49.53280258178711,
"loss": 0.6158,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.03561275079846382,
"rewards/margins": 0.2275541126728058,
"rewards/rejected": -0.2631668448448181,
"step": 1460
},
{
"epoch": 2.118728042518692,
"grad_norm": 22.356782913208008,
"learning_rate": 2.3941317825454278e-08,
"logits/chosen": -2.263237476348877,
"logits/rejected": -2.2347958087921143,
"logps/chosen": -47.52599334716797,
"logps/rejected": -51.044593811035156,
"loss": 0.6157,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.00275771738961339,
"rewards/margins": 0.22175411880016327,
"rewards/rejected": -0.22451183199882507,
"step": 1470
},
{
"epoch": 2.1331411584541935,
"grad_norm": 28.187788009643555,
"learning_rate": 2.322851114378203e-08,
"logits/chosen": -2.234954595565796,
"logits/rejected": -2.228529691696167,
"logps/chosen": -47.602561950683594,
"logps/rejected": -51.5837287902832,
"loss": 0.6259,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.056993234902620316,
"rewards/margins": 0.20701098442077637,
"rewards/rejected": -0.2640042006969452,
"step": 1480
},
{
"epoch": 2.1475542743896945,
"grad_norm": 21.61797523498535,
"learning_rate": 2.252325215970059e-08,
"logits/chosen": -2.223036050796509,
"logits/rejected": -2.206120014190674,
"logps/chosen": -47.96808624267578,
"logps/rejected": -53.099937438964844,
"loss": 0.6109,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.013963679783046246,
"rewards/margins": 0.2430466115474701,
"rewards/rejected": -0.2570103108882904,
"step": 1490
},
{
"epoch": 2.161967390325196,
"grad_norm": 23.325836181640625,
"learning_rate": 2.182573970718449e-08,
"logits/chosen": -2.2531425952911377,
"logits/rejected": -2.2373714447021484,
"logps/chosen": -48.79794692993164,
"logps/rejected": -53.21220016479492,
"loss": 0.6085,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.012846941128373146,
"rewards/margins": 0.24934491515159607,
"rewards/rejected": -0.2621918320655823,
"step": 1500
},
{
"epoch": 2.1763805062606973,
"grad_norm": 24.3940372467041,
"learning_rate": 2.113617043622536e-08,
"logits/chosen": -2.222809076309204,
"logits/rejected": -2.2011048793792725,
"logps/chosen": -45.179893493652344,
"logps/rejected": -49.6671257019043,
"loss": 0.6207,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.028503399342298508,
"rewards/margins": 0.22944235801696777,
"rewards/rejected": -0.2579457759857178,
"step": 1510
},
{
"epoch": 2.1907936221961988,
"grad_norm": 23.14939308166504,
"learning_rate": 2.045473875739001e-08,
"logits/chosen": -2.262533664703369,
"logits/rejected": -2.2459208965301514,
"logps/chosen": -49.05790328979492,
"logps/rejected": -53.393707275390625,
"loss": 0.6171,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.017335880547761917,
"rewards/margins": 0.2230241745710373,
"rewards/rejected": -0.2403600662946701,
"step": 1520
},
{
"epoch": 2.2052067381316998,
"grad_norm": 23.007627487182617,
"learning_rate": 1.9781636787010503e-08,
"logits/chosen": -2.2727057933807373,
"logits/rejected": -2.252056837081909,
"logps/chosen": -50.972694396972656,
"logps/rejected": -55.11802291870117,
"loss": 0.6016,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": 0.011264542117714882,
"rewards/margins": 0.26211434602737427,
"rewards/rejected": -0.25084978342056274,
"step": 1530
},
{
"epoch": 2.219619854067201,
"grad_norm": 23.422765731811523,
"learning_rate": 1.911705429302038e-08,
"logits/chosen": -2.2223095893859863,
"logits/rejected": -2.2044150829315186,
"logps/chosen": -47.016273498535156,
"logps/rejected": -49.784297943115234,
"loss": 0.6307,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.02597871981561184,
"rewards/margins": 0.17457632720470428,
"rewards/rejected": -0.20055504143238068,
"step": 1540
},
{
"epoch": 2.2340329700027026,
"grad_norm": 20.89065933227539,
"learning_rate": 1.8461178641453617e-08,
"logits/chosen": -2.2396929264068604,
"logits/rejected": -2.2292518615722656,
"logps/chosen": -43.25371551513672,
"logps/rejected": -46.49152755737305,
"loss": 0.6194,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.04524295777082443,
"rewards/margins": 0.21859809756278992,
"rewards/rejected": -0.26384103298187256,
"step": 1550
},
{
"epoch": 2.2484460859382036,
"grad_norm": 20.526012420654297,
"learning_rate": 1.781419474362017e-08,
"logits/chosen": -2.2325713634490967,
"logits/rejected": -2.2240681648254395,
"logps/chosen": -45.1739616394043,
"logps/rejected": -50.48863220214844,
"loss": 0.614,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.03004586696624756,
"rewards/margins": 0.22514891624450684,
"rewards/rejected": -0.2551947832107544,
"step": 1560
},
{
"epoch": 2.262859201873705,
"grad_norm": 19.403095245361328,
"learning_rate": 1.7176285003974033e-08,
"logits/chosen": -2.2410597801208496,
"logits/rejected": -2.2172932624816895,
"logps/chosen": -46.04480743408203,
"logps/rejected": -49.85961151123047,
"loss": 0.6228,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.009199894964694977,
"rewards/margins": 0.21350452303886414,
"rewards/rejected": -0.2227044403553009,
"step": 1570
},
{
"epoch": 2.2772723178092065,
"grad_norm": 22.78799057006836,
"learning_rate": 1.6547629268687786e-08,
"logits/chosen": -2.271934747695923,
"logits/rejected": -2.2632410526275635,
"logps/chosen": -42.374305725097656,
"logps/rejected": -48.10682678222656,
"loss": 0.6102,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.007346804253757,
"rewards/margins": 0.23881065845489502,
"rewards/rejected": -0.2461574524641037,
"step": 1580
},
{
"epoch": 2.291685433744708,
"grad_norm": 19.8133487701416,
"learning_rate": 1.59284047749485e-08,
"logits/chosen": -2.239629030227661,
"logits/rejected": -2.217893123626709,
"logps/chosen": -45.59327697753906,
"logps/rejected": -50.000091552734375,
"loss": 0.6122,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.005661819130182266,
"rewards/margins": 0.25302523374557495,
"rewards/rejected": -0.2586870789527893,
"step": 1590
},
{
"epoch": 2.306098549680209,
"grad_norm": 26.438047409057617,
"learning_rate": 1.5318786100989188e-08,
"logits/chosen": -2.2031397819519043,
"logits/rejected": -2.195650577545166,
"logps/chosen": -50.8388671875,
"logps/rejected": -54.697113037109375,
"loss": 0.6324,
"rewards/accuracies": 0.6312500238418579,
"rewards/chosen": -0.06328658759593964,
"rewards/margins": 0.19921903312206268,
"rewards/rejected": -0.2625056207180023,
"step": 1600
},
{
"epoch": 2.3205116656157103,
"grad_norm": 24.46269416809082,
"learning_rate": 1.471894511686988e-08,
"logits/chosen": -2.2040247917175293,
"logits/rejected": -2.189892530441284,
"logps/chosen": -48.797935485839844,
"logps/rejected": -50.93223190307617,
"loss": 0.6291,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.05211130529642105,
"rewards/margins": 0.1955440789461136,
"rewards/rejected": -0.24765542149543762,
"step": 1610
},
{
"epoch": 2.3349247815512117,
"grad_norm": 20.400657653808594,
"learning_rate": 1.4129050936022214e-08,
"logits/chosen": -2.211453914642334,
"logits/rejected": -2.1973557472229004,
"logps/chosen": -44.81827926635742,
"logps/rejected": -49.232845306396484,
"loss": 0.6262,
"rewards/accuracies": 0.643750011920929,
"rewards/chosen": -0.03966861963272095,
"rewards/margins": 0.20301298797130585,
"rewards/rejected": -0.2426815927028656,
"step": 1620
},
{
"epoch": 2.3493378974867127,
"grad_norm": 23.146989822387695,
"learning_rate": 1.3549269867571222e-08,
"logits/chosen": -2.2070627212524414,
"logits/rejected": -2.2003607749938965,
"logps/chosen": -50.647216796875,
"logps/rejected": -54.42750930786133,
"loss": 0.6403,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.026784483343362808,
"rewards/margins": 0.1822723001241684,
"rewards/rejected": -0.2090567797422409,
"step": 1630
},
{
"epoch": 2.363751013422214,
"grad_norm": 22.72720718383789,
"learning_rate": 1.2979765369447742e-08,
"logits/chosen": -2.2865893840789795,
"logits/rejected": -2.261965036392212,
"logps/chosen": -46.86061477661133,
"logps/rejected": -51.7364616394043,
"loss": 0.616,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.037126921117305756,
"rewards/margins": 0.25349533557891846,
"rewards/rejected": -0.2906222939491272,
"step": 1640
},
{
"epoch": 2.3781641293577156,
"grad_norm": 24.311439514160156,
"learning_rate": 1.2420698002304608e-08,
"logits/chosen": -2.2154691219329834,
"logits/rejected": -2.1979966163635254,
"logps/chosen": -41.86544418334961,
"logps/rejected": -47.02150344848633,
"loss": 0.6235,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.06195966154336929,
"rewards/margins": 0.23036758601665497,
"rewards/rejected": -0.29232725501060486,
"step": 1650
},
{
"epoch": 2.392577245293217,
"grad_norm": 18.456798553466797,
"learning_rate": 1.1872225384249768e-08,
"logits/chosen": -2.2427659034729004,
"logits/rejected": -2.232330799102783,
"logps/chosen": -48.07630157470703,
"logps/rejected": -53.72954177856445,
"loss": 0.618,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.028111791238188744,
"rewards/margins": 0.22475573420524597,
"rewards/rejected": -0.25286751985549927,
"step": 1660
},
{
"epoch": 2.406990361228718,
"grad_norm": 29.3465633392334,
"learning_rate": 1.1334502146408881e-08,
"logits/chosen": -2.2178328037261963,
"logits/rejected": -2.2086894512176514,
"logps/chosen": -48.64324188232422,
"logps/rejected": -52.14066696166992,
"loss": 0.6293,
"rewards/accuracies": 0.6781250238418579,
"rewards/chosen": -0.05821915715932846,
"rewards/margins": 0.20005354285240173,
"rewards/rejected": -0.2582727074623108,
"step": 1670
},
{
"epoch": 2.4214034771642194,
"grad_norm": 21.647228240966797,
"learning_rate": 1.0807679889330163e-08,
"logits/chosen": -2.290550708770752,
"logits/rejected": -2.289184331893921,
"logps/chosen": -48.02743148803711,
"logps/rejected": -52.760520935058594,
"loss": 0.6348,
"rewards/accuracies": 0.65625,
"rewards/chosen": -0.035547636449337006,
"rewards/margins": 0.17515380680561066,
"rewards/rejected": -0.21070146560668945,
"step": 1680
},
{
"epoch": 2.435816593099721,
"grad_norm": 23.48753547668457,
"learning_rate": 1.0291907140243538e-08,
"logits/chosen": -2.237405300140381,
"logits/rejected": -2.217073917388916,
"logps/chosen": -45.324363708496094,
"logps/rejected": -51.532958984375,
"loss": 0.5945,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.014387303963303566,
"rewards/margins": 0.2854435443878174,
"rewards/rejected": -0.2998308539390564,
"step": 1690
},
{
"epoch": 2.450229709035222,
"grad_norm": 21.15599822998047,
"learning_rate": 9.787329311186249e-09,
"logits/chosen": -2.2333407402038574,
"logits/rejected": -2.2088422775268555,
"logps/chosen": -45.39387893676758,
"logps/rejected": -48.80021286010742,
"loss": 0.6107,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.010507221333682537,
"rewards/margins": 0.24884942173957825,
"rewards/rejected": -0.25935661792755127,
"step": 1700
},
{
"epoch": 2.4646428249707233,
"grad_norm": 23.810028076171875,
"learning_rate": 9.294088658006916e-09,
"logits/chosen": -2.2471401691436768,
"logits/rejected": -2.2246270179748535,
"logps/chosen": -47.628807067871094,
"logps/rejected": -52.19799041748047,
"loss": 0.6198,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": -0.026676829904317856,
"rewards/margins": 0.24438074231147766,
"rewards/rejected": -0.2710575461387634,
"step": 1710
},
{
"epoch": 2.4790559409062247,
"grad_norm": 19.998149871826172,
"learning_rate": 8.812324240259094e-09,
"logits/chosen": -2.2432353496551514,
"logits/rejected": -2.2173349857330322,
"logps/chosen": -48.27139663696289,
"logps/rejected": -52.815460205078125,
"loss": 0.6092,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.016811879351735115,
"rewards/margins": 0.2507106065750122,
"rewards/rejected": -0.2675224542617798,
"step": 1720
},
{
"epoch": 2.493469056841726,
"grad_norm": 23.85225486755371,
"learning_rate": 8.342171881996351e-09,
"logits/chosen": -2.2501230239868164,
"logits/rejected": -2.2334678173065186,
"logps/chosen": -47.89342498779297,
"logps/rejected": -51.11579132080078,
"loss": 0.6234,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.053554244339466095,
"rewards/margins": 0.2084587812423706,
"rewards/rejected": -0.2620130181312561,
"step": 1730
},
{
"epoch": 2.507882172777227,
"grad_norm": 22.4180908203125,
"learning_rate": 7.883764133479137e-09,
"logits/chosen": -2.243783473968506,
"logits/rejected": -2.2160232067108154,
"logps/chosen": -44.910865783691406,
"logps/rejected": -50.00033187866211,
"loss": 0.6146,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.03045772947371006,
"rewards/margins": 0.23907272517681122,
"rewards/rejected": -0.2695304751396179,
"step": 1740
},
{
"epoch": 2.5222952887127286,
"grad_norm": 26.53935432434082,
"learning_rate": 7.43723023380502e-09,
"logits/chosen": -2.185051441192627,
"logits/rejected": -2.1775598526000977,
"logps/chosen": -47.21742630004883,
"logps/rejected": -51.211883544921875,
"loss": 0.6049,
"rewards/accuracies": 0.690625011920929,
"rewards/chosen": -0.0470205657184124,
"rewards/margins": 0.2449074536561966,
"rewards/rejected": -0.2919279932975769,
"step": 1750
},
{
"epoch": 2.53670840464823,
"grad_norm": 21.709890365600586,
"learning_rate": 7.002696074472075e-09,
"logits/chosen": -2.233487606048584,
"logits/rejected": -2.21376633644104,
"logps/chosen": -50.533409118652344,
"logps/rejected": -53.34821319580078,
"loss": 0.6136,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.058121807873249054,
"rewards/margins": 0.2471979856491089,
"rewards/rejected": -0.30531978607177734,
"step": 1760
},
{
"epoch": 2.551121520583731,
"grad_norm": 24.80657958984375,
"learning_rate": 6.580284163886369e-09,
"logits/chosen": -2.237624168395996,
"logits/rejected": -2.225713014602661,
"logps/chosen": -51.041221618652344,
"logps/rejected": -54.869285583496094,
"loss": 0.6162,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.038633137941360474,
"rewards/margins": 0.22483810782432556,
"rewards/rejected": -0.2634713053703308,
"step": 1770
},
{
"epoch": 2.5655346365192324,
"grad_norm": 21.370935440063477,
"learning_rate": 6.1701135928230566e-09,
"logits/chosen": -2.1967005729675293,
"logits/rejected": -2.1760928630828857,
"logps/chosen": -51.85515594482422,
"logps/rejected": -55.939857482910156,
"loss": 0.6185,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.07306867837905884,
"rewards/margins": 0.2407924234867096,
"rewards/rejected": -0.31386110186576843,
"step": 1780
},
{
"epoch": 2.579947752454734,
"grad_norm": 22.11069679260254,
"learning_rate": 5.7723000008510655e-09,
"logits/chosen": -2.251694440841675,
"logits/rejected": -2.226647138595581,
"logps/chosen": -47.68011474609375,
"logps/rejected": -50.42749786376953,
"loss": 0.6207,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.06913193315267563,
"rewards/margins": 0.22913888096809387,
"rewards/rejected": -0.2982707917690277,
"step": 1790
},
{
"epoch": 2.5943608683902353,
"grad_norm": 22.6718692779541,
"learning_rate": 5.386955543730798e-09,
"logits/chosen": -2.2583675384521484,
"logits/rejected": -2.234276533126831,
"logps/chosen": -49.61945343017578,
"logps/rejected": -55.055259704589844,
"loss": 0.6101,
"rewards/accuracies": 0.668749988079071,
"rewards/chosen": -0.0628320500254631,
"rewards/margins": 0.25214314460754395,
"rewards/rejected": -0.31497520208358765,
"step": 1800
},
{
"epoch": 2.6087739843257363,
"grad_norm": 22.774456024169922,
"learning_rate": 5.014188861794e-09,
"logits/chosen": -2.201119899749756,
"logits/rejected": -2.1815037727355957,
"logps/chosen": -48.0115966796875,
"logps/rejected": -53.1959228515625,
"loss": 0.6,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.05451589822769165,
"rewards/margins": 0.2779920697212219,
"rewards/rejected": -0.3325079679489136,
"step": 1810
},
{
"epoch": 2.6231871002612377,
"grad_norm": 26.43316650390625,
"learning_rate": 4.654105049314744e-09,
"logits/chosen": -2.257012367248535,
"logits/rejected": -2.2474682331085205,
"logps/chosen": -49.35502243041992,
"logps/rejected": -53.5402946472168,
"loss": 0.62,
"rewards/accuracies": 0.684374988079071,
"rewards/chosen": -0.02924627996981144,
"rewards/margins": 0.22886374592781067,
"rewards/rejected": -0.25811004638671875,
"step": 1820
},
{
"epoch": 2.637600216196739,
"grad_norm": 25.40489959716797,
"learning_rate": 4.3068056248801496e-09,
"logits/chosen": -2.2381327152252197,
"logits/rejected": -2.2229650020599365,
"logps/chosen": -47.8486328125,
"logps/rejected": -53.079307556152344,
"loss": 0.624,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.05802469328045845,
"rewards/margins": 0.22190991044044495,
"rewards/rejected": -0.2799345850944519,
"step": 1830
},
{
"epoch": 2.65201333213224,
"grad_norm": 24.248754501342773,
"learning_rate": 3.972388502769225e-09,
"logits/chosen": -2.2772481441497803,
"logits/rejected": -2.2594523429870605,
"logps/chosen": -52.78043746948242,
"logps/rejected": -56.90644454956055,
"loss": 0.612,
"rewards/accuracies": 0.671875,
"rewards/chosen": -0.07180557399988174,
"rewards/margins": 0.2366293966770172,
"rewards/rejected": -0.30843502283096313,
"step": 1840
},
{
"epoch": 2.6664264480677415,
"grad_norm": 21.849775314331055,
"learning_rate": 3.650947965347817e-09,
"logits/chosen": -2.2574167251586914,
"logits/rejected": -2.242363452911377,
"logps/chosen": -49.745262145996094,
"logps/rejected": -55.80384063720703,
"loss": 0.5987,
"rewards/accuracies": 0.7281249761581421,
"rewards/chosen": -0.021196410059928894,
"rewards/margins": 0.2885274291038513,
"rewards/rejected": -0.3097238540649414,
"step": 1850
},
{
"epoch": 2.680839564003243,
"grad_norm": 26.125858306884766,
"learning_rate": 3.342574636487583e-09,
"logits/chosen": -2.2989931106567383,
"logits/rejected": -2.2858481407165527,
"logps/chosen": -48.27467346191406,
"logps/rejected": -52.693336486816406,
"loss": 0.6224,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.0547061562538147,
"rewards/margins": 0.20482540130615234,
"rewards/rejected": -0.25953155755996704,
"step": 1860
},
{
"epoch": 2.6952526799387444,
"grad_norm": 21.578413009643555,
"learning_rate": 3.0473554560163207e-09,
"logits/chosen": -2.2350730895996094,
"logits/rejected": -2.2092785835266113,
"logps/chosen": -45.995094299316406,
"logps/rejected": -50.268714904785156,
"loss": 0.6228,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.048373352736234665,
"rewards/margins": 0.21610048413276672,
"rewards/rejected": -0.2644737958908081,
"step": 1870
},
{
"epoch": 2.709665795874246,
"grad_norm": 28.808284759521484,
"learning_rate": 2.7653736552070207e-09,
"logits/chosen": -2.26047945022583,
"logits/rejected": -2.2417685985565186,
"logps/chosen": -52.168251037597656,
"logps/rejected": -55.9251594543457,
"loss": 0.6094,
"rewards/accuracies": 0.628125011920929,
"rewards/chosen": -0.016536137089133263,
"rewards/margins": 0.24921099841594696,
"rewards/rejected": -0.2657471299171448,
"step": 1880
},
{
"epoch": 2.724078911809747,
"grad_norm": 20.450273513793945,
"learning_rate": 2.496708733312419e-09,
"logits/chosen": -2.2290151119232178,
"logits/rejected": -2.222752094268799,
"logps/chosen": -45.20454025268555,
"logps/rejected": -50.58509826660156,
"loss": 0.6211,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.056550413370132446,
"rewards/margins": 0.211500883102417,
"rewards/rejected": -0.26805129647254944,
"step": 1890
},
{
"epoch": 2.7384920277452482,
"grad_norm": 19.10955047607422,
"learning_rate": 2.241436435151717e-09,
"logits/chosen": -2.234227418899536,
"logits/rejected": -2.2173571586608887,
"logps/chosen": -45.842037200927734,
"logps/rejected": -50.64638137817383,
"loss": 0.619,
"rewards/accuracies": 0.59375,
"rewards/chosen": -0.01847267895936966,
"rewards/margins": 0.2231343686580658,
"rewards/rejected": -0.24160704016685486,
"step": 1900
},
{
"epoch": 2.7529051436807492,
"grad_norm": 18.778671264648438,
"learning_rate": 1.9996287297558866e-09,
"logits/chosen": -2.2211852073669434,
"logits/rejected": -2.2099175453186035,
"logps/chosen": -46.6721305847168,
"logps/rejected": -51.1614990234375,
"loss": 0.6287,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.056424111127853394,
"rewards/margins": 0.22064605355262756,
"rewards/rejected": -0.27707016468048096,
"step": 1910
},
{
"epoch": 2.7673182596162507,
"grad_norm": 19.748558044433594,
"learning_rate": 1.7713537900772957e-09,
"logits/chosen": -2.2626030445098877,
"logits/rejected": -2.255251407623291,
"logps/chosen": -43.59907913208008,
"logps/rejected": -47.853111267089844,
"loss": 0.6399,
"rewards/accuracies": 0.606249988079071,
"rewards/chosen": -0.06305160373449326,
"rewards/margins": 0.17098698019981384,
"rewards/rejected": -0.2340385913848877,
"step": 1920
},
{
"epoch": 2.781731375551752,
"grad_norm": 24.59339714050293,
"learning_rate": 1.5566759737697998e-09,
"logits/chosen": -2.2315924167633057,
"logits/rejected": -2.215991258621216,
"logps/chosen": -49.84008026123047,
"logps/rejected": -53.77178955078125,
"loss": 0.6144,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.022936221212148666,
"rewards/margins": 0.2307942807674408,
"rewards/rejected": -0.2537304759025574,
"step": 1930
},
{
"epoch": 2.7961444914872535,
"grad_norm": 22.342159271240234,
"learning_rate": 1.3556558050442425e-09,
"logits/chosen": -2.2505908012390137,
"logits/rejected": -2.233022928237915,
"logps/chosen": -48.119747161865234,
"logps/rejected": -54.29258346557617,
"loss": 0.609,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.041114725172519684,
"rewards/margins": 0.2513982951641083,
"rewards/rejected": -0.292512983083725,
"step": 1940
},
{
"epoch": 2.810557607422755,
"grad_norm": 23.257844924926758,
"learning_rate": 1.1683499576049583e-09,
"logits/chosen": -2.2333426475524902,
"logits/rejected": -2.222357749938965,
"logps/chosen": -46.32872772216797,
"logps/rejected": -51.00041961669922,
"loss": 0.6181,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.05180941894650459,
"rewards/margins": 0.22404730319976807,
"rewards/rejected": -0.27585673332214355,
"step": 1950
},
{
"epoch": 2.824970723358256,
"grad_norm": 20.40143585205078,
"learning_rate": 9.948112386716167e-10,
"logits/chosen": -2.2653684616088867,
"logits/rejected": -2.2385129928588867,
"logps/chosen": -52.7291259765625,
"logps/rejected": -56.984962463378906,
"loss": 0.6034,
"rewards/accuracies": 0.653124988079071,
"rewards/chosen": -0.016901496797800064,
"rewards/margins": 0.2864697277545929,
"rewards/rejected": -0.30337125062942505,
"step": 1960
},
{
"epoch": 2.8393838392937574,
"grad_norm": 24.3416748046875,
"learning_rate": 8.350885740913416e-10,
"logits/chosen": -2.199658155441284,
"logits/rejected": -2.183741569519043,
"logps/chosen": -48.105308532714844,
"logps/rejected": -52.25703811645508,
"loss": 0.6347,
"rewards/accuracies": 0.581250011920929,
"rewards/chosen": -0.06897452473640442,
"rewards/margins": 0.20502345263957977,
"rewards/rejected": -0.273997962474823,
"step": 1970
},
{
"epoch": 2.8537969552292584,
"grad_norm": 27.89311408996582,
"learning_rate": 6.89226994544978e-10,
"logits/chosen": -2.202312707901001,
"logits/rejected": -2.1836862564086914,
"logps/chosen": -50.84931564331055,
"logps/rejected": -54.451866149902344,
"loss": 0.63,
"rewards/accuracies": 0.596875011920929,
"rewards/chosen": -0.0366673581302166,
"rewards/margins": 0.19457104802131653,
"rewards/rejected": -0.23123839497566223,
"step": 1980
},
{
"epoch": 2.86821007116476,
"grad_norm": 23.623729705810547,
"learning_rate": 5.572676228516038e-10,
"logits/chosen": -2.23040509223938,
"logits/rejected": -2.2104344367980957,
"logps/chosen": -48.78104019165039,
"logps/rejected": -54.99827194213867,
"loss": 0.6092,
"rewards/accuracies": 0.6968749761581421,
"rewards/chosen": -0.02846195362508297,
"rewards/margins": 0.24717013537883759,
"rewards/rejected": -0.2756320834159851,
"step": 1990
},
{
"epoch": 2.882623187100261,
"grad_norm": 23.66622543334961,
"learning_rate": 4.3924766237473656e-10,
"logits/chosen": -2.235327959060669,
"logits/rejected": -2.2082302570343018,
"logps/chosen": -47.306785583496094,
"logps/rejected": -52.0076789855957,
"loss": 0.5916,
"rewards/accuracies": 0.6937500238418579,
"rewards/chosen": 0.014220048673450947,
"rewards/margins": 0.29147493839263916,
"rewards/rejected": -0.2772548794746399,
"step": 2000
},
{
"epoch": 2.8970363030357626,
"grad_norm": 22.549758911132812,
"learning_rate": 3.35200386533574e-10,
"logits/chosen": -2.2073681354522705,
"logits/rejected": -2.191131114959717,
"logps/chosen": -48.15802001953125,
"logps/rejected": -52.05684280395508,
"loss": 0.6241,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.039518579840660095,
"rewards/margins": 0.22054967284202576,
"rewards/rejected": -0.26006826758384705,
"step": 2010
},
{
"epoch": 2.911449418971264,
"grad_norm": 23.719388961791992,
"learning_rate": 2.4515512942220874e-10,
"logits/chosen": -2.253865957260132,
"logits/rejected": -2.2316102981567383,
"logps/chosen": -50.0792236328125,
"logps/rejected": -53.56706619262695,
"loss": 0.6187,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.053325168788433075,
"rewards/margins": 0.22036127746105194,
"rewards/rejected": -0.2736864686012268,
"step": 2020
},
{
"epoch": 2.925862534906765,
"grad_norm": 27.85939598083496,
"learning_rate": 1.691372775394717e-10,
"logits/chosen": -2.226738214492798,
"logits/rejected": -2.2154603004455566,
"logps/chosen": -47.156124114990234,
"logps/rejected": -50.58882141113281,
"loss": 0.6321,
"rewards/accuracies": 0.640625,
"rewards/chosen": -0.07649682462215424,
"rewards/margins": 0.18964572250843048,
"rewards/rejected": -0.2661425769329071,
"step": 2030
},
{
"epoch": 2.9402756508422665,
"grad_norm": 26.678600311279297,
"learning_rate": 1.0716826263165724e-10,
"logits/chosen": -2.2740628719329834,
"logits/rejected": -2.248230218887329,
"logps/chosen": -47.3420295715332,
"logps/rejected": -53.25926971435547,
"loss": 0.6144,
"rewards/accuracies": 0.6812499761581421,
"rewards/chosen": -0.07511458545923233,
"rewards/margins": 0.2504141330718994,
"rewards/rejected": -0.32552871108055115,
"step": 2040
},
{
"epoch": 2.954688766777768,
"grad_norm": 22.650192260742188,
"learning_rate": 5.926555565031743e-11,
"logits/chosen": -2.266533613204956,
"logits/rejected": -2.254985809326172,
"logps/chosen": -49.164058685302734,
"logps/rejected": -53.79437255859375,
"loss": 0.6302,
"rewards/accuracies": 0.6468750238418579,
"rewards/chosen": -0.06538953632116318,
"rewards/margins": 0.20318007469177246,
"rewards/rejected": -0.26856961846351624,
"step": 2050
},
{
"epoch": 2.969101882713269,
"grad_norm": 24.89960289001465,
"learning_rate": 2.544266182662458e-11,
"logits/chosen": -2.2338194847106934,
"logits/rejected": -2.210289239883423,
"logps/chosen": -44.41590118408203,
"logps/rejected": -49.89021301269531,
"loss": 0.6038,
"rewards/accuracies": 0.6656249761581421,
"rewards/chosen": -0.04492425546050072,
"rewards/margins": 0.25600799918174744,
"rewards/rejected": -0.30093228816986084,
"step": 2060
},
{
"epoch": 2.9835149986487703,
"grad_norm": 23.839759826660156,
"learning_rate": 5.709116863872321e-12,
"logits/chosen": -2.250903606414795,
"logits/rejected": -2.2327446937561035,
"logps/chosen": -45.14078140258789,
"logps/rejected": -48.739810943603516,
"loss": 0.6244,
"rewards/accuracies": 0.659375011920929,
"rewards/chosen": -0.013980092480778694,
"rewards/margins": 0.2065594643354416,
"rewards/rejected": -0.22053952515125275,
"step": 2070
},
{
"epoch": 2.9964868029907215,
"step": 2079,
"total_flos": 0.0,
"train_loss": 0.646036940936345,
"train_runtime": 18428.4848,
"train_samples_per_second": 3.614,
"train_steps_per_second": 0.113
}
],
"logging_steps": 10,
"max_steps": 2079,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}