|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.998691442030882, |
|
"eval_steps": 500, |
|
"global_step": 477, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010468463752944255, |
|
"grad_norm": 1.1784705641049245, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.4463237524032593, |
|
"logits/rejected": -1.4477096796035767, |
|
"logps/chosen": -7.9823808670043945, |
|
"logps/rejected": -8.155555725097656, |
|
"loss": 1.798, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.9823808670043945, |
|
"rewards/margins": 0.1731749176979065, |
|
"rewards/rejected": -8.155555725097656, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02093692750588851, |
|
"grad_norm": 0.9602764459933493, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.4451031684875488, |
|
"logits/rejected": -1.436326265335083, |
|
"logps/chosen": -8.046354293823242, |
|
"logps/rejected": -7.959715843200684, |
|
"loss": 1.7982, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.046354293823242, |
|
"rewards/margins": -0.0866377204656601, |
|
"rewards/rejected": -7.959715843200684, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.031405391258832765, |
|
"grad_norm": 1.689559474976433, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.448919415473938, |
|
"logits/rejected": -1.444911003112793, |
|
"logps/chosen": -7.851029872894287, |
|
"logps/rejected": -7.866568565368652, |
|
"loss": 1.789, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -7.851029872894287, |
|
"rewards/margins": 0.015538657084107399, |
|
"rewards/rejected": -7.866568565368652, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04187385501177702, |
|
"grad_norm": 1.4544738071780356, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.4363288879394531, |
|
"logits/rejected": -1.4409823417663574, |
|
"logps/chosen": -8.180266380310059, |
|
"logps/rejected": -8.204001426696777, |
|
"loss": 1.8048, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.180266380310059, |
|
"rewards/margins": 0.023736288771033287, |
|
"rewards/rejected": -8.204001426696777, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05234231876472128, |
|
"grad_norm": 1.304458351649718, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -1.475970983505249, |
|
"logits/rejected": -1.4659450054168701, |
|
"logps/chosen": -8.084822654724121, |
|
"logps/rejected": -7.987278938293457, |
|
"loss": 1.8124, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.084822654724121, |
|
"rewards/margins": -0.09754323959350586, |
|
"rewards/rejected": -7.987278938293457, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06281078251766553, |
|
"grad_norm": 1.2140086077938599, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.4456737041473389, |
|
"logits/rejected": -1.4336217641830444, |
|
"logps/chosen": -7.773144721984863, |
|
"logps/rejected": -7.694056510925293, |
|
"loss": 1.8017, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -7.773144721984863, |
|
"rewards/margins": -0.0790884867310524, |
|
"rewards/rejected": -7.694056510925293, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07327924627060979, |
|
"grad_norm": 0.9984160768782803, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.4582607746124268, |
|
"logits/rejected": -1.4302797317504883, |
|
"logps/chosen": -7.980234622955322, |
|
"logps/rejected": -7.845289707183838, |
|
"loss": 1.7982, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -7.980234622955322, |
|
"rewards/margins": -0.13494457304477692, |
|
"rewards/rejected": -7.845289707183838, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08374771002355404, |
|
"grad_norm": 0.8920926404480951, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.454561471939087, |
|
"logits/rejected": -1.4629584550857544, |
|
"logps/chosen": -7.85833740234375, |
|
"logps/rejected": -8.055027961730957, |
|
"loss": 1.8008, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -7.85833740234375, |
|
"rewards/margins": 0.19669005274772644, |
|
"rewards/rejected": -8.055027961730957, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0942161737764983, |
|
"grad_norm": 1.0898338388611548, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -1.4877290725708008, |
|
"logits/rejected": -1.485259771347046, |
|
"logps/chosen": -7.855520725250244, |
|
"logps/rejected": -7.64023494720459, |
|
"loss": 1.7987, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -7.855520725250244, |
|
"rewards/margins": -0.21528606116771698, |
|
"rewards/rejected": -7.64023494720459, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10468463752944256, |
|
"grad_norm": 1.166801443928394, |
|
"learning_rate": 4.999731868769026e-07, |
|
"logits/chosen": -1.477698564529419, |
|
"logits/rejected": -1.4901760816574097, |
|
"logps/chosen": -8.011387825012207, |
|
"logps/rejected": -8.05154037475586, |
|
"loss": 1.8065, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.011387825012207, |
|
"rewards/margins": 0.040151696652173996, |
|
"rewards/rejected": -8.05154037475586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11515310128238682, |
|
"grad_norm": 0.9751544943818836, |
|
"learning_rate": 4.996716052911017e-07, |
|
"logits/chosen": -1.48259699344635, |
|
"logits/rejected": -1.4775769710540771, |
|
"logps/chosen": -7.901836395263672, |
|
"logps/rejected": -7.928590297698975, |
|
"loss": 1.8086, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -7.901836395263672, |
|
"rewards/margins": 0.0267526563256979, |
|
"rewards/rejected": -7.928590297698975, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.12562156503533106, |
|
"grad_norm": 1.453860208239476, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -1.4845072031021118, |
|
"logits/rejected": -1.495591402053833, |
|
"logps/chosen": -7.9037041664123535, |
|
"logps/rejected": -8.206459045410156, |
|
"loss": 1.7865, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -7.9037041664123535, |
|
"rewards/margins": 0.30275601148605347, |
|
"rewards/rejected": -8.206459045410156, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1360900287882753, |
|
"grad_norm": 1.734106310553517, |
|
"learning_rate": 4.980652179769217e-07, |
|
"logits/chosen": -1.4411917924880981, |
|
"logits/rejected": -1.4547481536865234, |
|
"logps/chosen": -7.689724922180176, |
|
"logps/rejected": -8.240130424499512, |
|
"loss": 1.7898, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.689724922180176, |
|
"rewards/margins": 0.5504060387611389, |
|
"rewards/rejected": -8.240130424499512, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14655849254121958, |
|
"grad_norm": 1.8736880005786551, |
|
"learning_rate": 4.967625656594781e-07, |
|
"logits/chosen": -1.430119514465332, |
|
"logits/rejected": -1.420650839805603, |
|
"logps/chosen": -7.873298645019531, |
|
"logps/rejected": -7.9891533851623535, |
|
"loss": 1.7922, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -7.873298645019531, |
|
"rewards/margins": 0.11585396528244019, |
|
"rewards/rejected": -7.9891533851623535, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.15702695629416383, |
|
"grad_norm": 1.3664496432393678, |
|
"learning_rate": 4.951291206355559e-07, |
|
"logits/chosen": -1.4261605739593506, |
|
"logits/rejected": -1.4281210899353027, |
|
"logps/chosen": -7.770443916320801, |
|
"logps/rejected": -7.860281467437744, |
|
"loss": 1.7858, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -7.770443916320801, |
|
"rewards/margins": 0.08983758091926575, |
|
"rewards/rejected": -7.860281467437744, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.16749542004710807, |
|
"grad_norm": 1.6834122106300342, |
|
"learning_rate": 4.93167072587771e-07, |
|
"logits/chosen": -1.4214476346969604, |
|
"logits/rejected": -1.4338067770004272, |
|
"logps/chosen": -7.932246208190918, |
|
"logps/rejected": -8.00547981262207, |
|
"loss": 1.7858, |
|
"rewards/accuracies": 0.45625001192092896, |
|
"rewards/chosen": -7.932246208190918, |
|
"rewards/margins": 0.07323212176561356, |
|
"rewards/rejected": -8.00547981262207, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.17796388380005235, |
|
"grad_norm": 1.2997280215141844, |
|
"learning_rate": 4.908790517010636e-07, |
|
"logits/chosen": -1.435379981994629, |
|
"logits/rejected": -1.4292837381362915, |
|
"logps/chosen": -8.106579780578613, |
|
"logps/rejected": -8.394915580749512, |
|
"loss": 1.7975, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -8.106579780578613, |
|
"rewards/margins": 0.2883368134498596, |
|
"rewards/rejected": -8.394915580749512, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.1884323475529966, |
|
"grad_norm": 1.4395629403420762, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -1.405975341796875, |
|
"logits/rejected": -1.4189186096191406, |
|
"logps/chosen": -8.203514099121094, |
|
"logps/rejected": -8.477225303649902, |
|
"loss": 1.7939, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -8.203514099121094, |
|
"rewards/margins": 0.27371111512184143, |
|
"rewards/rejected": -8.477225303649902, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.19890081130594087, |
|
"grad_norm": 1.4670380498675017, |
|
"learning_rate": 4.853377929214243e-07, |
|
"logits/chosen": -1.4155464172363281, |
|
"logits/rejected": -1.408503532409668, |
|
"logps/chosen": -8.385589599609375, |
|
"logps/rejected": -8.298436164855957, |
|
"loss": 1.7993, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -8.385589599609375, |
|
"rewards/margins": -0.08715416491031647, |
|
"rewards/rejected": -8.298436164855957, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2093692750588851, |
|
"grad_norm": 1.8173814697081554, |
|
"learning_rate": 4.820919832540181e-07, |
|
"logits/chosen": -1.4453513622283936, |
|
"logits/rejected": -1.4449245929718018, |
|
"logps/chosen": -8.229129791259766, |
|
"logps/rejected": -8.375448226928711, |
|
"loss": 1.7935, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -8.229129791259766, |
|
"rewards/margins": 0.1463182270526886, |
|
"rewards/rejected": -8.375448226928711, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21983773881182936, |
|
"grad_norm": 1.5123285049323918, |
|
"learning_rate": 4.785350472409791e-07, |
|
"logits/chosen": -1.417307734489441, |
|
"logits/rejected": -1.4141901731491089, |
|
"logps/chosen": -7.822029113769531, |
|
"logps/rejected": -8.184598922729492, |
|
"loss": 1.7821, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.822029113769531, |
|
"rewards/margins": 0.3625703752040863, |
|
"rewards/rejected": -8.184598922729492, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23030620256477363, |
|
"grad_norm": 1.6377413056182937, |
|
"learning_rate": 4.7467175306295647e-07, |
|
"logits/chosen": -1.4389078617095947, |
|
"logits/rejected": -1.4492288827896118, |
|
"logps/chosen": -8.023492813110352, |
|
"logps/rejected": -8.443803787231445, |
|
"loss": 1.7872, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -8.023492813110352, |
|
"rewards/margins": 0.4203101694583893, |
|
"rewards/rejected": -8.443803787231445, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24077466631771788, |
|
"grad_norm": 98.6962749407612, |
|
"learning_rate": 4.70507279583015e-07, |
|
"logits/chosen": -1.370867133140564, |
|
"logits/rejected": -1.3796217441558838, |
|
"logps/chosen": -7.94110107421875, |
|
"logps/rejected": -8.37704086303711, |
|
"loss": 1.7801, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -7.94110107421875, |
|
"rewards/margins": 0.4359405040740967, |
|
"rewards/rejected": -8.37704086303711, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2512431300706621, |
|
"grad_norm": 1.8443926963030073, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -1.3745276927947998, |
|
"logits/rejected": -1.3777697086334229, |
|
"logps/chosen": -8.194904327392578, |
|
"logps/rejected": -8.444576263427734, |
|
"loss": 1.7749, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.194904327392578, |
|
"rewards/margins": 0.24967141449451447, |
|
"rewards/rejected": -8.444576263427734, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26171159382360637, |
|
"grad_norm": 2.195114945918261, |
|
"learning_rate": 4.612975213859487e-07, |
|
"logits/chosen": -1.4081896543502808, |
|
"logits/rejected": -1.4049599170684814, |
|
"logps/chosen": -8.501805305480957, |
|
"logps/rejected": -8.36412525177002, |
|
"loss": 1.7924, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -8.501805305480957, |
|
"rewards/margins": -0.13768072426319122, |
|
"rewards/rejected": -8.36412525177002, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2721800575765506, |
|
"grad_norm": 2.0830951299288785, |
|
"learning_rate": 4.5626458262912735e-07, |
|
"logits/chosen": -1.3827464580535889, |
|
"logits/rejected": -1.396815299987793, |
|
"logps/chosen": -8.410286903381348, |
|
"logps/rejected": -8.63615608215332, |
|
"loss": 1.7843, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.410286903381348, |
|
"rewards/margins": 0.225869819521904, |
|
"rewards/rejected": -8.63615608215332, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2826485213294949, |
|
"grad_norm": 2.8119817250060923, |
|
"learning_rate": 4.5095513994085974e-07, |
|
"logits/chosen": -1.4122145175933838, |
|
"logits/rejected": -1.4246305227279663, |
|
"logps/chosen": -8.588987350463867, |
|
"logps/rejected": -9.209198951721191, |
|
"loss": 1.7873, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.588987350463867, |
|
"rewards/margins": 0.6202121376991272, |
|
"rewards/rejected": -9.209198951721191, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29311698508243916, |
|
"grad_norm": 2.927322308508806, |
|
"learning_rate": 4.453763107901675e-07, |
|
"logits/chosen": -1.435681700706482, |
|
"logits/rejected": -1.439152479171753, |
|
"logps/chosen": -8.261212348937988, |
|
"logps/rejected": -8.506053924560547, |
|
"loss": 1.7771, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -8.261212348937988, |
|
"rewards/margins": 0.2448415756225586, |
|
"rewards/rejected": -8.506053924560547, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3035854488353834, |
|
"grad_norm": 1.7606419766373003, |
|
"learning_rate": 4.395355737667985e-07, |
|
"logits/chosen": -1.4473952054977417, |
|
"logits/rejected": -1.4762256145477295, |
|
"logps/chosen": -8.1802396774292, |
|
"logps/rejected": -8.576211929321289, |
|
"loss": 1.7748, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -8.1802396774292, |
|
"rewards/margins": 0.3959727883338928, |
|
"rewards/rejected": -8.576211929321289, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.31405391258832765, |
|
"grad_norm": 2.502338791234934, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -1.4698665142059326, |
|
"logits/rejected": -1.475287914276123, |
|
"logps/chosen": -8.098420143127441, |
|
"logps/rejected": -8.450376510620117, |
|
"loss": 1.7739, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -8.098420143127441, |
|
"rewards/margins": 0.3519565463066101, |
|
"rewards/rejected": -8.450376510620117, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3245223763412719, |
|
"grad_norm": 2.2537748410992924, |
|
"learning_rate": 4.271000354423425e-07, |
|
"logits/chosen": -1.459938645362854, |
|
"logits/rejected": -1.4374125003814697, |
|
"logps/chosen": -8.157195091247559, |
|
"logps/rejected": -8.490175247192383, |
|
"loss": 1.7767, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.157195091247559, |
|
"rewards/margins": 0.3329797685146332, |
|
"rewards/rejected": -8.490175247192383, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.33499084009421615, |
|
"grad_norm": 2.9160012212300086, |
|
"learning_rate": 4.2052190435769554e-07, |
|
"logits/chosen": -1.447819471359253, |
|
"logits/rejected": -1.4601446390151978, |
|
"logps/chosen": -8.642538070678711, |
|
"logps/rejected": -8.85395336151123, |
|
"loss": 1.7908, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -8.642538070678711, |
|
"rewards/margins": 0.2114148586988449, |
|
"rewards/rejected": -8.85395336151123, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.34545930384716045, |
|
"grad_norm": 2.332693331124937, |
|
"learning_rate": 4.137151834863213e-07, |
|
"logits/chosen": -1.4943044185638428, |
|
"logits/rejected": -1.4839502573013306, |
|
"logps/chosen": -8.424747467041016, |
|
"logps/rejected": -8.770976066589355, |
|
"loss": 1.7765, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.424747467041016, |
|
"rewards/margins": 0.3462288975715637, |
|
"rewards/rejected": -8.770976066589355, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3559277676001047, |
|
"grad_norm": 2.4152194824308517, |
|
"learning_rate": 4.0668899744407567e-07, |
|
"logits/chosen": -1.4917938709259033, |
|
"logits/rejected": -1.4810343980789185, |
|
"logps/chosen": -8.424253463745117, |
|
"logps/rejected": -8.7727632522583, |
|
"loss": 1.7856, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.424253463745117, |
|
"rewards/margins": 0.34850937128067017, |
|
"rewards/rejected": -8.7727632522583, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.36639623135304894, |
|
"grad_norm": 2.653215706659519, |
|
"learning_rate": 3.994527650465352e-07, |
|
"logits/chosen": -1.5167206525802612, |
|
"logits/rejected": -1.5095903873443604, |
|
"logps/chosen": -8.30429458618164, |
|
"logps/rejected": -8.76606559753418, |
|
"loss": 1.7754, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -8.30429458618164, |
|
"rewards/margins": 0.46177130937576294, |
|
"rewards/rejected": -8.76606559753418, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3768646951059932, |
|
"grad_norm": 3.002251381640332, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -1.5167474746704102, |
|
"logits/rejected": -1.524743914604187, |
|
"logps/chosen": -8.360708236694336, |
|
"logps/rejected": -8.7758207321167, |
|
"loss": 1.7796, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.360708236694336, |
|
"rewards/margins": 0.41511401534080505, |
|
"rewards/rejected": -8.7758207321167, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.38733315885893743, |
|
"grad_norm": 2.621525982629558, |
|
"learning_rate": 3.8438923131177237e-07, |
|
"logits/chosen": -1.5086079835891724, |
|
"logits/rejected": -1.5038981437683105, |
|
"logps/chosen": -8.088265419006348, |
|
"logps/rejected": -8.633108139038086, |
|
"loss": 1.7613, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.088265419006348, |
|
"rewards/margins": 0.5448437929153442, |
|
"rewards/rejected": -8.633108139038086, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.39780162261188173, |
|
"grad_norm": 2.7052866783401983, |
|
"learning_rate": 3.765821230985757e-07, |
|
"logits/chosen": -1.530807375907898, |
|
"logits/rejected": -1.5209619998931885, |
|
"logps/chosen": -8.019875526428223, |
|
"logps/rejected": -8.384284973144531, |
|
"loss": 1.7721, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -8.019875526428223, |
|
"rewards/margins": 0.3644091784954071, |
|
"rewards/rejected": -8.384284973144531, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.408270086364826, |
|
"grad_norm": 3.1926636795520884, |
|
"learning_rate": 3.6860532770864005e-07, |
|
"logits/chosen": -1.4885269403457642, |
|
"logits/rejected": -1.483224630355835, |
|
"logps/chosen": -8.213752746582031, |
|
"logps/rejected": -8.523615837097168, |
|
"loss": 1.7799, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.213752746582031, |
|
"rewards/margins": 0.3098633289337158, |
|
"rewards/rejected": -8.523615837097168, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.4187385501177702, |
|
"grad_norm": 2.929908108293166, |
|
"learning_rate": 3.604695382782159e-07, |
|
"logits/chosen": -1.5416061878204346, |
|
"logits/rejected": -1.5284559726715088, |
|
"logps/chosen": -8.0154447555542, |
|
"logps/rejected": -8.428897857666016, |
|
"loss": 1.7863, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -8.0154447555542, |
|
"rewards/margins": 0.4134535789489746, |
|
"rewards/rejected": -8.428897857666016, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42920701387071447, |
|
"grad_norm": 3.9208987619596365, |
|
"learning_rate": 3.5218566107988867e-07, |
|
"logits/chosen": -1.5039266347885132, |
|
"logits/rejected": -1.495948076248169, |
|
"logps/chosen": -8.161928176879883, |
|
"logps/rejected": -8.836877822875977, |
|
"loss": 1.7649, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -8.161928176879883, |
|
"rewards/margins": 0.6749483346939087, |
|
"rewards/rejected": -8.836877822875977, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.4396754776236587, |
|
"grad_norm": 9.132837726981721, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -1.4978554248809814, |
|
"logits/rejected": -1.5001240968704224, |
|
"logps/chosen": -8.470359802246094, |
|
"logps/rejected": -8.813634872436523, |
|
"loss": 1.7745, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.470359802246094, |
|
"rewards/margins": 0.34327542781829834, |
|
"rewards/rejected": -8.813634872436523, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45014394137660296, |
|
"grad_norm": 2.6786514985319276, |
|
"learning_rate": 3.3521824616429284e-07, |
|
"logits/chosen": -1.5123167037963867, |
|
"logits/rejected": -1.5113458633422852, |
|
"logps/chosen": -7.9097490310668945, |
|
"logps/rejected": -8.861387252807617, |
|
"loss": 1.761, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -7.9097490310668945, |
|
"rewards/margins": 0.9516386985778809, |
|
"rewards/rejected": -8.861387252807617, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.46061240512954726, |
|
"grad_norm": 3.81508961382941, |
|
"learning_rate": 3.265574537815398e-07, |
|
"logits/chosen": -1.4957890510559082, |
|
"logits/rejected": -1.503143072128296, |
|
"logps/chosen": -7.837327480316162, |
|
"logps/rejected": -8.386968612670898, |
|
"loss": 1.7778, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.837327480316162, |
|
"rewards/margins": 0.549641489982605, |
|
"rewards/rejected": -8.386968612670898, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4710808688824915, |
|
"grad_norm": 4.889361688771567, |
|
"learning_rate": 3.1779403380910425e-07, |
|
"logits/chosen": -1.530035376548767, |
|
"logits/rejected": -1.5252281427383423, |
|
"logps/chosen": -7.843958377838135, |
|
"logps/rejected": -8.631728172302246, |
|
"loss": 1.7605, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.843958377838135, |
|
"rewards/margins": 0.787769079208374, |
|
"rewards/rejected": -8.631728172302246, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.48154933263543576, |
|
"grad_norm": 4.621367765160452, |
|
"learning_rate": 3.0893973387735683e-07, |
|
"logits/chosen": -1.5648292303085327, |
|
"logits/rejected": -1.5472863912582397, |
|
"logps/chosen": -8.125567436218262, |
|
"logps/rejected": -9.158833503723145, |
|
"loss": 1.7587, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -8.125567436218262, |
|
"rewards/margins": 1.033265471458435, |
|
"rewards/rejected": -9.158833503723145, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.49201779638838, |
|
"grad_norm": 4.510315294900107, |
|
"learning_rate": 3.000064234440111e-07, |
|
"logits/chosen": -1.6225063800811768, |
|
"logits/rejected": -1.597538948059082, |
|
"logps/chosen": -8.116875648498535, |
|
"logps/rejected": -9.23936939239502, |
|
"loss": 1.7614, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -8.116875648498535, |
|
"rewards/margins": 1.1224939823150635, |
|
"rewards/rejected": -9.23936939239502, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5024862601413242, |
|
"grad_norm": 7.892385115240221, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -1.5823067426681519, |
|
"logits/rejected": -1.574851155281067, |
|
"logps/chosen": -8.326906204223633, |
|
"logps/rejected": -9.232850074768066, |
|
"loss": 1.7551, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -8.326906204223633, |
|
"rewards/margins": 0.9059435129165649, |
|
"rewards/rejected": -9.232850074768066, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5129547238942685, |
|
"grad_norm": 7.898664476513504, |
|
"learning_rate": 2.8195076242990116e-07, |
|
"logits/chosen": -1.5616357326507568, |
|
"logits/rejected": -1.5433732271194458, |
|
"logps/chosen": -9.017087936401367, |
|
"logps/rejected": -9.785380363464355, |
|
"loss": 1.7399, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -9.017087936401367, |
|
"rewards/margins": 0.7682939767837524, |
|
"rewards/rejected": -9.785380363464355, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5234231876472127, |
|
"grad_norm": 7.533973692196864, |
|
"learning_rate": 2.7285261601056697e-07, |
|
"logits/chosen": -1.5792642831802368, |
|
"logits/rejected": -1.5693460702896118, |
|
"logps/chosen": -7.595057010650635, |
|
"logps/rejected": -8.57075309753418, |
|
"loss": 1.7492, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -7.595057010650635, |
|
"rewards/margins": 0.9756966829299927, |
|
"rewards/rejected": -8.57075309753418, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.533891651400157, |
|
"grad_norm": 10.624973349509592, |
|
"learning_rate": 2.6372383496608186e-07, |
|
"logits/chosen": -1.5440006256103516, |
|
"logits/rejected": -1.5143473148345947, |
|
"logps/chosen": -8.250520706176758, |
|
"logps/rejected": -9.301929473876953, |
|
"loss": 1.7613, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.250520706176758, |
|
"rewards/margins": 1.0514087677001953, |
|
"rewards/rejected": -9.301929473876953, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5443601151531012, |
|
"grad_norm": 6.827054099571504, |
|
"learning_rate": 2.5457665670441937e-07, |
|
"logits/chosen": -1.4768860340118408, |
|
"logits/rejected": -1.4396953582763672, |
|
"logps/chosen": -7.612262725830078, |
|
"logps/rejected": -9.350198745727539, |
|
"loss": 1.7355, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -7.612262725830078, |
|
"rewards/margins": 1.7379356622695923, |
|
"rewards/rejected": -9.350198745727539, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5548285789060455, |
|
"grad_norm": 8.933359425352409, |
|
"learning_rate": 2.454233432955807e-07, |
|
"logits/chosen": -1.4639561176300049, |
|
"logits/rejected": -1.453298807144165, |
|
"logps/chosen": -8.868209838867188, |
|
"logps/rejected": -9.566887855529785, |
|
"loss": 1.7463, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.868209838867188, |
|
"rewards/margins": 0.698679506778717, |
|
"rewards/rejected": -9.566887855529785, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5652970426589898, |
|
"grad_norm": 7.0066263897371535, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -1.4920735359191895, |
|
"logits/rejected": -1.4540319442749023, |
|
"logps/chosen": -8.762821197509766, |
|
"logps/rejected": -9.565961837768555, |
|
"loss": 1.7653, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.762821197509766, |
|
"rewards/margins": 0.8031412363052368, |
|
"rewards/rejected": -9.565961837768555, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.575765506411934, |
|
"grad_norm": 6.881020189800575, |
|
"learning_rate": 2.2714738398943308e-07, |
|
"logits/chosen": -1.5479252338409424, |
|
"logits/rejected": -1.5279855728149414, |
|
"logps/chosen": -7.815232753753662, |
|
"logps/rejected": -9.187639236450195, |
|
"loss": 1.7328, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -7.815232753753662, |
|
"rewards/margins": 1.3724067211151123, |
|
"rewards/rejected": -9.187639236450195, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5862339701648783, |
|
"grad_norm": 7.517058454845967, |
|
"learning_rate": 2.1804923757009882e-07, |
|
"logits/chosen": -1.5499231815338135, |
|
"logits/rejected": -1.5244100093841553, |
|
"logps/chosen": -8.39962100982666, |
|
"logps/rejected": -10.24171257019043, |
|
"loss": 1.7267, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -8.39962100982666, |
|
"rewards/margins": 1.842092514038086, |
|
"rewards/rejected": -10.24171257019043, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5967024339178225, |
|
"grad_norm": 8.013738309459756, |
|
"learning_rate": 2.089939221172446e-07, |
|
"logits/chosen": -1.5420681238174438, |
|
"logits/rejected": -1.5153690576553345, |
|
"logps/chosen": -8.524008750915527, |
|
"logps/rejected": -9.523444175720215, |
|
"loss": 1.7487, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.524008750915527, |
|
"rewards/margins": 0.999435305595398, |
|
"rewards/rejected": -9.523444175720215, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6071708976707668, |
|
"grad_norm": 6.486821674482447, |
|
"learning_rate": 1.9999357655598891e-07, |
|
"logits/chosen": -1.6035845279693604, |
|
"logits/rejected": -1.5711686611175537, |
|
"logps/chosen": -8.237485885620117, |
|
"logps/rejected": -9.552020072937012, |
|
"loss": 1.735, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -8.237485885620117, |
|
"rewards/margins": 1.3145345449447632, |
|
"rewards/rejected": -9.552020072937012, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6176393614237111, |
|
"grad_norm": 8.83521561541266, |
|
"learning_rate": 1.9106026612264315e-07, |
|
"logits/chosen": -1.6141548156738281, |
|
"logits/rejected": -1.594178557395935, |
|
"logps/chosen": -7.968149662017822, |
|
"logps/rejected": -8.619260787963867, |
|
"loss": 1.7566, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.968149662017822, |
|
"rewards/margins": 0.6511108875274658, |
|
"rewards/rejected": -8.619260787963867, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6281078251766553, |
|
"grad_norm": 6.975353142841212, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -1.5654271841049194, |
|
"logits/rejected": -1.546992301940918, |
|
"logps/chosen": -8.48424243927002, |
|
"logps/rejected": -9.418001174926758, |
|
"loss": 1.7583, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.48424243927002, |
|
"rewards/margins": 0.9337590336799622, |
|
"rewards/rejected": -9.418001174926758, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6385762889295996, |
|
"grad_norm": 5.4690365037746815, |
|
"learning_rate": 1.7344254621846017e-07, |
|
"logits/chosen": -1.5813648700714111, |
|
"logits/rejected": -1.5758841037750244, |
|
"logps/chosen": -8.727537155151367, |
|
"logps/rejected": -9.66854190826416, |
|
"loss": 1.732, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.727537155151367, |
|
"rewards/margins": 0.9410043954849243, |
|
"rewards/rejected": -9.66854190826416, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6490447526825438, |
|
"grad_norm": 5.361180962867588, |
|
"learning_rate": 1.647817538357072e-07, |
|
"logits/chosen": -1.6157958507537842, |
|
"logits/rejected": -1.6046196222305298, |
|
"logps/chosen": -8.187897682189941, |
|
"logps/rejected": -8.897558212280273, |
|
"loss": 1.7699, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -8.187897682189941, |
|
"rewards/margins": 0.7096610069274902, |
|
"rewards/rejected": -8.897558212280273, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6595132164354881, |
|
"grad_norm": 5.449920555042311, |
|
"learning_rate": 1.562351990976095e-07, |
|
"logits/chosen": -1.6411434412002563, |
|
"logits/rejected": -1.62042236328125, |
|
"logps/chosen": -7.746342658996582, |
|
"logps/rejected": -8.609481811523438, |
|
"loss": 1.7396, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -7.746342658996582, |
|
"rewards/margins": 0.8631397485733032, |
|
"rewards/rejected": -8.609481811523438, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6699816801884323, |
|
"grad_norm": 6.360997845008894, |
|
"learning_rate": 1.478143389201113e-07, |
|
"logits/chosen": -1.6342664957046509, |
|
"logits/rejected": -1.598560094833374, |
|
"logps/chosen": -8.291830062866211, |
|
"logps/rejected": -9.186594009399414, |
|
"loss": 1.7428, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -8.291830062866211, |
|
"rewards/margins": 0.8947637677192688, |
|
"rewards/rejected": -9.186594009399414, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6804501439413766, |
|
"grad_norm": 7.837240923658896, |
|
"learning_rate": 1.3953046172178413e-07, |
|
"logits/chosen": -1.6066200733184814, |
|
"logits/rejected": -1.5687167644500732, |
|
"logps/chosen": -8.601926803588867, |
|
"logps/rejected": -9.912989616394043, |
|
"loss": 1.7306, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.601926803588867, |
|
"rewards/margins": 1.3110629320144653, |
|
"rewards/rejected": -9.912989616394043, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6909186076943209, |
|
"grad_norm": 5.555679671882387, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -1.6012904644012451, |
|
"logits/rejected": -1.5771445035934448, |
|
"logps/chosen": -8.177051544189453, |
|
"logps/rejected": -8.873910903930664, |
|
"loss": 1.7496, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -8.177051544189453, |
|
"rewards/margins": 0.6968590021133423, |
|
"rewards/rejected": -8.873910903930664, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7013870714472651, |
|
"grad_norm": 5.848948219669636, |
|
"learning_rate": 1.2341787690142435e-07, |
|
"logits/chosen": -1.6410300731658936, |
|
"logits/rejected": -1.5906805992126465, |
|
"logps/chosen": -7.888881683349609, |
|
"logps/rejected": -8.846321105957031, |
|
"loss": 1.7444, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -7.888881683349609, |
|
"rewards/margins": 0.9574400782585144, |
|
"rewards/rejected": -8.846321105957031, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7118555352002094, |
|
"grad_norm": 5.607244300528084, |
|
"learning_rate": 1.1561076868822755e-07, |
|
"logits/chosen": -1.6162534952163696, |
|
"logits/rejected": -1.590820074081421, |
|
"logps/chosen": -8.39061450958252, |
|
"logps/rejected": -9.30118465423584, |
|
"loss": 1.7583, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.39061450958252, |
|
"rewards/margins": 0.9105701446533203, |
|
"rewards/rejected": -9.30118465423584, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7223239989531536, |
|
"grad_norm": 7.279367786185165, |
|
"learning_rate": 1.0798381331721107e-07, |
|
"logits/chosen": -1.6430072784423828, |
|
"logits/rejected": -1.6136901378631592, |
|
"logps/chosen": -8.215340614318848, |
|
"logps/rejected": -9.647343635559082, |
|
"loss": 1.7453, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.215340614318848, |
|
"rewards/margins": 1.4320037364959717, |
|
"rewards/rejected": -9.647343635559082, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7327924627060979, |
|
"grad_norm": 7.386464884448549, |
|
"learning_rate": 1.0054723495346482e-07, |
|
"logits/chosen": -1.5686066150665283, |
|
"logits/rejected": -1.5304858684539795, |
|
"logps/chosen": -8.146261215209961, |
|
"logps/rejected": -9.257192611694336, |
|
"loss": 1.7426, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -8.146261215209961, |
|
"rewards/margins": 1.1109315156936646, |
|
"rewards/rejected": -9.257192611694336, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7432609264590422, |
|
"grad_norm": 5.785018086716381, |
|
"learning_rate": 9.331100255592436e-08, |
|
"logits/chosen": -1.6148964166641235, |
|
"logits/rejected": -1.5602537393569946, |
|
"logps/chosen": -8.22186279296875, |
|
"logps/rejected": -9.004861831665039, |
|
"loss": 1.7401, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -8.22186279296875, |
|
"rewards/margins": 0.7829989194869995, |
|
"rewards/rejected": -9.004861831665039, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7537293902119864, |
|
"grad_norm": 6.331566938080069, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -1.6229692697525024, |
|
"logits/rejected": -1.5903146266937256, |
|
"logps/chosen": -7.560595512390137, |
|
"logps/rejected": -8.47083568572998, |
|
"loss": 1.7385, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -7.560595512390137, |
|
"rewards/margins": 0.9102400541305542, |
|
"rewards/rejected": -8.47083568572998, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7641978539649307, |
|
"grad_norm": 6.319560115409626, |
|
"learning_rate": 7.947809564230445e-08, |
|
"logits/chosen": -1.6155359745025635, |
|
"logits/rejected": -1.5554075241088867, |
|
"logps/chosen": -7.953149318695068, |
|
"logps/rejected": -8.690530776977539, |
|
"loss": 1.7605, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -7.953149318695068, |
|
"rewards/margins": 0.7373809814453125, |
|
"rewards/rejected": -8.690530776977539, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7746663177178749, |
|
"grad_norm": 6.673242442518388, |
|
"learning_rate": 7.289996455765748e-08, |
|
"logits/chosen": -1.6045255661010742, |
|
"logits/rejected": -1.5444786548614502, |
|
"logps/chosen": -8.404296875, |
|
"logps/rejected": -9.452798843383789, |
|
"loss": 1.7498, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -8.404296875, |
|
"rewards/margins": 1.04850172996521, |
|
"rewards/rejected": -9.452798843383789, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7851347814708192, |
|
"grad_norm": 7.020475966648844, |
|
"learning_rate": 6.655924144404906e-08, |
|
"logits/chosen": -1.5848079919815063, |
|
"logits/rejected": -1.5374637842178345, |
|
"logps/chosen": -8.454399108886719, |
|
"logps/rejected": -9.83703899383545, |
|
"loss": 1.739, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -8.454399108886719, |
|
"rewards/margins": 1.3826408386230469, |
|
"rewards/rejected": -9.83703899383545, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7956032452237635, |
|
"grad_norm": 6.653238331291029, |
|
"learning_rate": 6.046442623320145e-08, |
|
"logits/chosen": -1.5949599742889404, |
|
"logits/rejected": -1.5306172370910645, |
|
"logps/chosen": -8.711164474487305, |
|
"logps/rejected": -9.62182331085205, |
|
"loss": 1.7155, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.711164474487305, |
|
"rewards/margins": 0.910659670829773, |
|
"rewards/rejected": -9.62182331085205, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8060717089767077, |
|
"grad_norm": 7.782341216017442, |
|
"learning_rate": 5.4623689209832484e-08, |
|
"logits/chosen": -1.577016830444336, |
|
"logits/rejected": -1.5303077697753906, |
|
"logps/chosen": -8.757134437561035, |
|
"logps/rejected": -9.896451950073242, |
|
"loss": 1.7371, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.757134437561035, |
|
"rewards/margins": 1.1393181085586548, |
|
"rewards/rejected": -9.896451950073242, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.816540172729652, |
|
"grad_norm": 6.684411144542067, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -1.5992292165756226, |
|
"logits/rejected": -1.5533421039581299, |
|
"logps/chosen": -8.044672012329102, |
|
"logps/rejected": -9.49129581451416, |
|
"loss": 1.7299, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -8.044672012329102, |
|
"rewards/margins": 1.446624517440796, |
|
"rewards/rejected": -9.49129581451416, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8270086364825961, |
|
"grad_norm": 8.354771172189295, |
|
"learning_rate": 4.373541737087263e-08, |
|
"logits/chosen": -1.5767982006072998, |
|
"logits/rejected": -1.5374451875686646, |
|
"logps/chosen": -7.573851108551025, |
|
"logps/rejected": -8.567195892333984, |
|
"loss": 1.7336, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -7.573851108551025, |
|
"rewards/margins": 0.9933451414108276, |
|
"rewards/rejected": -8.567195892333984, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8374771002355405, |
|
"grad_norm": 7.4009477251723865, |
|
"learning_rate": 3.8702478614051345e-08, |
|
"logits/chosen": -1.5977306365966797, |
|
"logits/rejected": -1.5405915975570679, |
|
"logps/chosen": -7.894219875335693, |
|
"logps/rejected": -8.81593132019043, |
|
"loss": 1.7454, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -7.894219875335693, |
|
"rewards/margins": 0.9217125177383423, |
|
"rewards/rejected": -8.81593132019043, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8479455639884846, |
|
"grad_norm": 6.210047238602565, |
|
"learning_rate": 3.3952790595787986e-08, |
|
"logits/chosen": -1.60799241065979, |
|
"logits/rejected": -1.6026899814605713, |
|
"logps/chosen": -8.381356239318848, |
|
"logps/rejected": -9.069351196289062, |
|
"loss": 1.742, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -8.381356239318848, |
|
"rewards/margins": 0.6879969835281372, |
|
"rewards/rejected": -9.069351196289062, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8584140277414289, |
|
"grad_norm": 6.765719877755184, |
|
"learning_rate": 2.9492720416985e-08, |
|
"logits/chosen": -1.6209001541137695, |
|
"logits/rejected": -1.5718666315078735, |
|
"logps/chosen": -7.976959228515625, |
|
"logps/rejected": -8.827299118041992, |
|
"loss": 1.7364, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -7.976959228515625, |
|
"rewards/margins": 0.8503401875495911, |
|
"rewards/rejected": -8.827299118041992, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8688824914943732, |
|
"grad_norm": 8.979600106055784, |
|
"learning_rate": 2.5328246937043525e-08, |
|
"logits/chosen": -1.6459277868270874, |
|
"logits/rejected": -1.6195147037506104, |
|
"logps/chosen": -7.984838008880615, |
|
"logps/rejected": -9.079233169555664, |
|
"loss": 1.7264, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -7.984838008880615, |
|
"rewards/margins": 1.094395637512207, |
|
"rewards/rejected": -9.079233169555664, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8793509552473174, |
|
"grad_norm": 6.181198265400907, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -1.6045997142791748, |
|
"logits/rejected": -1.5594054460525513, |
|
"logps/chosen": -7.9123215675354, |
|
"logps/rejected": -8.782036781311035, |
|
"loss": 1.7493, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -7.9123215675354, |
|
"rewards/margins": 0.8697155117988586, |
|
"rewards/rejected": -8.782036781311035, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8898194190002617, |
|
"grad_norm": 7.693545140925747, |
|
"learning_rate": 1.7908016745981856e-08, |
|
"logits/chosen": -1.615017294883728, |
|
"logits/rejected": -1.5611761808395386, |
|
"logps/chosen": -8.289026260375977, |
|
"logps/rejected": -9.341882705688477, |
|
"loss": 1.733, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -8.289026260375977, |
|
"rewards/margins": 1.0528560876846313, |
|
"rewards/rejected": -9.341882705688477, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9002878827532059, |
|
"grad_norm": 6.582142344582445, |
|
"learning_rate": 1.4662207078575684e-08, |
|
"logits/chosen": -1.5919125080108643, |
|
"logits/rejected": -1.5359071493148804, |
|
"logps/chosen": -7.956709861755371, |
|
"logps/rejected": -9.315800666809082, |
|
"loss": 1.7211, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -7.956709861755371, |
|
"rewards/margins": 1.3590909242630005, |
|
"rewards/rejected": -9.315800666809082, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9107563465061502, |
|
"grad_norm": 8.523918422589938, |
|
"learning_rate": 1.1731874863145142e-08, |
|
"logits/chosen": -1.599135398864746, |
|
"logits/rejected": -1.5693204402923584, |
|
"logps/chosen": -8.411205291748047, |
|
"logps/rejected": -9.192506790161133, |
|
"loss": 1.7356, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -8.411205291748047, |
|
"rewards/margins": 0.781301736831665, |
|
"rewards/rejected": -9.192506790161133, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9212248102590945, |
|
"grad_norm": 7.451771387036121, |
|
"learning_rate": 9.12094829893642e-09, |
|
"logits/chosen": -1.607877492904663, |
|
"logits/rejected": -1.5552005767822266, |
|
"logps/chosen": -8.183059692382812, |
|
"logps/rejected": -9.428353309631348, |
|
"loss": 1.7353, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -8.183059692382812, |
|
"rewards/margins": 1.2452945709228516, |
|
"rewards/rejected": -9.428353309631348, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9316932740120387, |
|
"grad_norm": 9.436081897769167, |
|
"learning_rate": 6.832927412229017e-09, |
|
"logits/chosen": -1.6107797622680664, |
|
"logits/rejected": -1.5790627002716064, |
|
"logps/chosen": -8.090998649597168, |
|
"logps/rejected": -9.029291152954102, |
|
"loss": 1.7392, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -8.090998649597168, |
|
"rewards/margins": 0.9382919073104858, |
|
"rewards/rejected": -9.029291152954102, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.942161737764983, |
|
"grad_norm": 8.582428584508216, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -1.5821881294250488, |
|
"logits/rejected": -1.5397818088531494, |
|
"logps/chosen": -8.387347221374512, |
|
"logps/rejected": -9.367807388305664, |
|
"loss": 1.746, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -8.387347221374512, |
|
"rewards/margins": 0.9804602861404419, |
|
"rewards/rejected": -9.367807388305664, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9526302015179272, |
|
"grad_norm": 6.129069741713946, |
|
"learning_rate": 3.2374343405217884e-09, |
|
"logits/chosen": -1.6205213069915771, |
|
"logits/rejected": -1.573266625404358, |
|
"logps/chosen": -8.136959075927734, |
|
"logps/rejected": -9.380440711975098, |
|
"loss": 1.7286, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -8.136959075927734, |
|
"rewards/margins": 1.243481993675232, |
|
"rewards/rejected": -9.380440711975098, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9630986652708715, |
|
"grad_norm": 7.975954172184094, |
|
"learning_rate": 1.9347820230782295e-09, |
|
"logits/chosen": -1.6145153045654297, |
|
"logits/rejected": -1.5640535354614258, |
|
"logps/chosen": -8.394238471984863, |
|
"logps/rejected": -9.535688400268555, |
|
"loss": 1.727, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -8.394238471984863, |
|
"rewards/margins": 1.14145028591156, |
|
"rewards/rejected": -9.535688400268555, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9735671290238157, |
|
"grad_norm": 5.8616098100207195, |
|
"learning_rate": 9.64668657069706e-10, |
|
"logits/chosen": -1.639035940170288, |
|
"logits/rejected": -1.5807130336761475, |
|
"logps/chosen": -8.210695266723633, |
|
"logps/rejected": -9.582908630371094, |
|
"loss": 1.7511, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -8.210695266723633, |
|
"rewards/margins": 1.372214674949646, |
|
"rewards/rejected": -9.582908630371094, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.98403559277676, |
|
"grad_norm": 6.126688648522468, |
|
"learning_rate": 3.2839470889836627e-10, |
|
"logits/chosen": -1.6169319152832031, |
|
"logits/rejected": -1.5936636924743652, |
|
"logps/chosen": -8.055643081665039, |
|
"logps/rejected": -8.981685638427734, |
|
"loss": 1.739, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.055643081665039, |
|
"rewards/margins": 0.9260419011116028, |
|
"rewards/rejected": -8.981685638427734, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9945040565297043, |
|
"grad_norm": 7.844036990048118, |
|
"learning_rate": 2.6813123097352287e-11, |
|
"logits/chosen": -1.6366430521011353, |
|
"logits/rejected": -1.5729566812515259, |
|
"logps/chosen": -8.483617782592773, |
|
"logps/rejected": -9.541167259216309, |
|
"loss": 1.7467, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -8.483617782592773, |
|
"rewards/margins": 1.0575507879257202, |
|
"rewards/rejected": -9.541167259216309, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.998691442030882, |
|
"step": 477, |
|
"total_flos": 0.0, |
|
"train_loss": 1.7629729861733299, |
|
"train_runtime": 8203.4319, |
|
"train_samples_per_second": 7.452, |
|
"train_steps_per_second": 0.058 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 477, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|