|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 47.923506570215594, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0110366344451904, |
|
"logits/rejected": -0.9818881750106812, |
|
"logps/chosen": -0.27409863471984863, |
|
"logps/rejected": -0.27151164412498474, |
|
"loss": 3.0607, |
|
"rewards/accuracies": 0.4312500059604645, |
|
"rewards/chosen": -2.7409865856170654, |
|
"rewards/margins": -0.025869915261864662, |
|
"rewards/rejected": -2.715116500854492, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 39.987585891736785, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0418651103973389, |
|
"logits/rejected": -0.9748126864433289, |
|
"logps/chosen": -0.2945522964000702, |
|
"logps/rejected": -0.29994362592697144, |
|
"loss": 3.0104, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.9455230236053467, |
|
"rewards/margins": 0.05391312763094902, |
|
"rewards/rejected": -2.999436378479004, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 52.07278122268582, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.963701069355011, |
|
"logits/rejected": -0.9835487604141235, |
|
"logps/chosen": -0.2644619345664978, |
|
"logps/rejected": -0.3007102608680725, |
|
"loss": 3.0162, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -2.6446194648742676, |
|
"rewards/margins": 0.362483412027359, |
|
"rewards/rejected": -3.0071024894714355, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 93.33861075914483, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9671205282211304, |
|
"logits/rejected": -0.9406957626342773, |
|
"logps/chosen": -0.27761051058769226, |
|
"logps/rejected": -0.2907746732234955, |
|
"loss": 2.9342, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.7761049270629883, |
|
"rewards/margins": 0.13164177536964417, |
|
"rewards/rejected": -2.9077467918395996, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 52.349708457694014, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.015834093093872, |
|
"logits/rejected": -0.9864752888679504, |
|
"logps/chosen": -0.2717323899269104, |
|
"logps/rejected": -0.27839282155036926, |
|
"loss": 3.1216, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.7173242568969727, |
|
"rewards/margins": 0.06660404056310654, |
|
"rewards/rejected": -2.783928394317627, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 45.104515251326376, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9981824159622192, |
|
"logits/rejected": -0.9536676406860352, |
|
"logps/chosen": -0.2733208239078522, |
|
"logps/rejected": -0.2788906693458557, |
|
"loss": 2.9453, |
|
"rewards/accuracies": 0.4437499940395355, |
|
"rewards/chosen": -2.733208179473877, |
|
"rewards/margins": 0.055698495358228683, |
|
"rewards/rejected": -2.7889065742492676, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 61.54928932943931, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.051733136177063, |
|
"logits/rejected": -0.9763606190681458, |
|
"logps/chosen": -0.2938762605190277, |
|
"logps/rejected": -0.3207188844680786, |
|
"loss": 2.9156, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -2.938762664794922, |
|
"rewards/margins": 0.26842620968818665, |
|
"rewards/rejected": -3.2071890830993652, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 55.913783341396325, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.0160491466522217, |
|
"logits/rejected": -0.9717121124267578, |
|
"logps/chosen": -0.27992749214172363, |
|
"logps/rejected": -0.32374969124794006, |
|
"loss": 2.9079, |
|
"rewards/accuracies": 0.59375, |
|
"rewards/chosen": -2.7992749214172363, |
|
"rewards/margins": 0.43822187185287476, |
|
"rewards/rejected": -3.237496852874756, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 38.79733201252679, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.0506359338760376, |
|
"logits/rejected": -1.0073621273040771, |
|
"logps/chosen": -0.3326144218444824, |
|
"logps/rejected": -0.38409319519996643, |
|
"loss": 2.9658, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -3.3261444568634033, |
|
"rewards/margins": 0.5147874355316162, |
|
"rewards/rejected": -3.8409321308135986, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 101.77454221179983, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.028257131576538, |
|
"logits/rejected": -0.9783049821853638, |
|
"logps/chosen": -0.3342127203941345, |
|
"logps/rejected": -0.3756522536277771, |
|
"loss": 2.9987, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -3.342127561569214, |
|
"rewards/margins": 0.4143945574760437, |
|
"rewards/rejected": -3.7565224170684814, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 70.06029649060484, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0614262819290161, |
|
"logits/rejected": -1.025525689125061, |
|
"logps/chosen": -0.2905944287776947, |
|
"logps/rejected": -0.35211512446403503, |
|
"loss": 2.7815, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.905944347381592, |
|
"rewards/margins": 0.6152070164680481, |
|
"rewards/rejected": -3.521151065826416, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 49.123079394299815, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.0964637994766235, |
|
"logits/rejected": -1.061679720878601, |
|
"logps/chosen": -0.3209289014339447, |
|
"logps/rejected": -0.3418692350387573, |
|
"loss": 2.8596, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.209289073944092, |
|
"rewards/margins": 0.2094031274318695, |
|
"rewards/rejected": -3.4186923503875732, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 53.59523574650431, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.0083563327789307, |
|
"logits/rejected": -0.9795120358467102, |
|
"logps/chosen": -0.3694208264350891, |
|
"logps/rejected": -0.4273703694343567, |
|
"loss": 2.7899, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -3.6942081451416016, |
|
"rewards/margins": 0.5794947743415833, |
|
"rewards/rejected": -4.273703098297119, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 39.11989937521066, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.02675461769104, |
|
"logits/rejected": -1.0018466711044312, |
|
"logps/chosen": -0.35180264711380005, |
|
"logps/rejected": -0.4284419119358063, |
|
"loss": 2.8671, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -3.518026828765869, |
|
"rewards/margins": 0.7663925290107727, |
|
"rewards/rejected": -4.284419059753418, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 51.11281867224414, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.9933602213859558, |
|
"logits/rejected": -0.9224111437797546, |
|
"logps/chosen": -0.3594875931739807, |
|
"logps/rejected": -0.40996867418289185, |
|
"loss": 2.7704, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.5948760509490967, |
|
"rewards/margins": 0.5048106908798218, |
|
"rewards/rejected": -4.099686622619629, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 48.022103189017436, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9609634280204773, |
|
"logits/rejected": -0.9471040964126587, |
|
"logps/chosen": -0.35821908712387085, |
|
"logps/rejected": -0.45667845010757446, |
|
"loss": 2.6966, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -3.582190990447998, |
|
"rewards/margins": 0.984593391418457, |
|
"rewards/rejected": -4.566784858703613, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 54.03450562178558, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.9785356521606445, |
|
"logits/rejected": -0.9566847085952759, |
|
"logps/chosen": -0.3405897319316864, |
|
"logps/rejected": -0.4017128050327301, |
|
"loss": 2.6144, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -3.405897617340088, |
|
"rewards/margins": 0.6112309098243713, |
|
"rewards/rejected": -4.0171284675598145, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 62.750052897303675, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.0191900730133057, |
|
"logits/rejected": -0.9845901727676392, |
|
"logps/chosen": -0.4232923090457916, |
|
"logps/rejected": -0.5109944939613342, |
|
"loss": 2.866, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -4.23292350769043, |
|
"rewards/margins": 0.8770216107368469, |
|
"rewards/rejected": -5.109944820404053, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 55.61240306403997, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.1014890670776367, |
|
"logits/rejected": -1.0177241563796997, |
|
"logps/chosen": -0.4533822536468506, |
|
"logps/rejected": -0.4995104670524597, |
|
"loss": 2.7432, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -4.533822536468506, |
|
"rewards/margins": 0.4612821042537689, |
|
"rewards/rejected": -4.995104789733887, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 80.5027346612393, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.9957372546195984, |
|
"logits/rejected": -0.9701834917068481, |
|
"logps/chosen": -0.43816161155700684, |
|
"logps/rejected": -0.5128804445266724, |
|
"loss": 2.7813, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -4.381616592407227, |
|
"rewards/margins": 0.7471875548362732, |
|
"rewards/rejected": -5.128803253173828, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 66.31806821536476, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -0.9997787475585938, |
|
"logits/rejected": -0.947482705116272, |
|
"logps/chosen": -0.4254922866821289, |
|
"logps/rejected": -0.5347083806991577, |
|
"loss": 2.7046, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -4.254923343658447, |
|
"rewards/margins": 1.092160701751709, |
|
"rewards/rejected": -5.347084045410156, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 61.1266120827584, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.9583929181098938, |
|
"logits/rejected": -0.8993922472000122, |
|
"logps/chosen": -0.4909549355506897, |
|
"logps/rejected": -0.620493471622467, |
|
"loss": 2.6559, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -4.909549713134766, |
|
"rewards/margins": 1.2953848838806152, |
|
"rewards/rejected": -6.204934120178223, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 66.56145340935555, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.019431471824646, |
|
"logits/rejected": -0.9595627784729004, |
|
"logps/chosen": -0.5270282030105591, |
|
"logps/rejected": -0.600238025188446, |
|
"loss": 2.4928, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -5.27028226852417, |
|
"rewards/margins": 0.7320979833602905, |
|
"rewards/rejected": -6.00238037109375, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 66.16205862286387, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.9745362997055054, |
|
"logits/rejected": -0.8843653798103333, |
|
"logps/chosen": -0.5472803115844727, |
|
"logps/rejected": -0.7492850422859192, |
|
"loss": 2.3982, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -5.472803115844727, |
|
"rewards/margins": 2.020047187805176, |
|
"rewards/rejected": -7.492850303649902, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 70.88843943146098, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.0500959157943726, |
|
"logits/rejected": -1.007611632347107, |
|
"logps/chosen": -0.6212247610092163, |
|
"logps/rejected": -0.7247714996337891, |
|
"loss": 2.3233, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -6.212247371673584, |
|
"rewards/margins": 1.0354671478271484, |
|
"rewards/rejected": -7.247714042663574, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 94.40161191780366, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.0675666332244873, |
|
"logits/rejected": -1.0614221096038818, |
|
"logps/chosen": -0.6142371892929077, |
|
"logps/rejected": -0.8813148736953735, |
|
"loss": 2.1102, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -6.1423726081848145, |
|
"rewards/margins": 2.670776844024658, |
|
"rewards/rejected": -8.813148498535156, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 71.42739738901432, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.0529481172561646, |
|
"logits/rejected": -1.0047996044158936, |
|
"logps/chosen": -0.7235802412033081, |
|
"logps/rejected": -0.8823626637458801, |
|
"loss": 2.1377, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -7.235803127288818, |
|
"rewards/margins": 1.5878244638442993, |
|
"rewards/rejected": -8.823626518249512, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 87.9759333714625, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.1124293804168701, |
|
"logits/rejected": -1.0896517038345337, |
|
"logps/chosen": -0.862978458404541, |
|
"logps/rejected": -1.0037717819213867, |
|
"loss": 2.1017, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -8.62978458404541, |
|
"rewards/margins": 1.4079326391220093, |
|
"rewards/rejected": -10.037717819213867, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 78.07225371686874, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -1.030829906463623, |
|
"logits/rejected": -1.0042556524276733, |
|
"logps/chosen": -0.8588500022888184, |
|
"logps/rejected": -1.1039783954620361, |
|
"loss": 2.0002, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -8.588499069213867, |
|
"rewards/margins": 2.4512839317321777, |
|
"rewards/rejected": -11.039785385131836, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 70.30730129459549, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.060490369796753, |
|
"logits/rejected": -1.0404036045074463, |
|
"logps/chosen": -0.9423840641975403, |
|
"logps/rejected": -1.1874125003814697, |
|
"loss": 1.9455, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -9.42384147644043, |
|
"rewards/margins": 2.4502837657928467, |
|
"rewards/rejected": -11.874125480651855, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 81.54625041986957, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.0910407304763794, |
|
"logits/rejected": -1.0684020519256592, |
|
"logps/chosen": -0.9991434812545776, |
|
"logps/rejected": -1.3156726360321045, |
|
"loss": 2.0451, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -9.991434097290039, |
|
"rewards/margins": 3.165290355682373, |
|
"rewards/rejected": -13.15672492980957, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 78.5490421908409, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.109403371810913, |
|
"logits/rejected": -1.090001106262207, |
|
"logps/chosen": -1.1215949058532715, |
|
"logps/rejected": -1.5121821165084839, |
|
"loss": 1.9436, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -11.215949058532715, |
|
"rewards/margins": 3.9058711528778076, |
|
"rewards/rejected": -15.121821403503418, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 59.08371857927558, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.1232795715332031, |
|
"logits/rejected": -1.0980435609817505, |
|
"logps/chosen": -1.0903780460357666, |
|
"logps/rejected": -1.459205150604248, |
|
"loss": 1.8384, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -10.903780937194824, |
|
"rewards/margins": 3.688269853591919, |
|
"rewards/rejected": -14.59205150604248, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 85.71218468828272, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.1307361125946045, |
|
"logits/rejected": -1.1074953079223633, |
|
"logps/chosen": -1.0654685497283936, |
|
"logps/rejected": -1.4472792148590088, |
|
"loss": 1.7884, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -10.654685020446777, |
|
"rewards/margins": 3.818106174468994, |
|
"rewards/rejected": -14.47279167175293, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 92.85957749639208, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.1518357992172241, |
|
"logits/rejected": -1.102372407913208, |
|
"logps/chosen": -1.1460392475128174, |
|
"logps/rejected": -1.4155685901641846, |
|
"loss": 1.6771, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.460393905639648, |
|
"rewards/margins": 2.695291757583618, |
|
"rewards/rejected": -14.155685424804688, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 75.98858315922392, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.0996112823486328, |
|
"logits/rejected": -1.0788969993591309, |
|
"logps/chosen": -1.1098445653915405, |
|
"logps/rejected": -1.476881504058838, |
|
"loss": 1.6011, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -11.098443984985352, |
|
"rewards/margins": 3.6703686714172363, |
|
"rewards/rejected": -14.76881217956543, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 88.81502196023631, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.1378796100616455, |
|
"logits/rejected": -1.0828906297683716, |
|
"logps/chosen": -1.1474685668945312, |
|
"logps/rejected": -1.5796287059783936, |
|
"loss": 1.6663, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -11.474684715270996, |
|
"rewards/margins": 4.321602821350098, |
|
"rewards/rejected": -15.796287536621094, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 90.03714203036446, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.1278326511383057, |
|
"logits/rejected": -1.1358839273452759, |
|
"logps/chosen": -1.2729408740997314, |
|
"logps/rejected": -1.7558482885360718, |
|
"loss": 1.5442, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -12.729410171508789, |
|
"rewards/margins": 4.829073905944824, |
|
"rewards/rejected": -17.558483123779297, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 72.89600233357321, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -1.0958189964294434, |
|
"logits/rejected": -1.076554775238037, |
|
"logps/chosen": -1.2896816730499268, |
|
"logps/rejected": -1.6636635065078735, |
|
"loss": 1.6021, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -12.896817207336426, |
|
"rewards/margins": 3.7398200035095215, |
|
"rewards/rejected": -16.636634826660156, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 93.9340667463585, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -1.0816549062728882, |
|
"logits/rejected": -1.0706536769866943, |
|
"logps/chosen": -1.3197344541549683, |
|
"logps/rejected": -1.7450058460235596, |
|
"loss": 1.5092, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -13.197346687316895, |
|
"rewards/margins": 4.252710819244385, |
|
"rewards/rejected": -17.450056076049805, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 91.15403821743105, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.1295057535171509, |
|
"logits/rejected": -1.0786478519439697, |
|
"logps/chosen": -1.3944091796875, |
|
"logps/rejected": -1.8417927026748657, |
|
"loss": 1.7223, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -13.944093704223633, |
|
"rewards/margins": 4.473834037780762, |
|
"rewards/rejected": -18.417926788330078, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 99.88773415242756, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.1591789722442627, |
|
"logits/rejected": -1.147062063217163, |
|
"logps/chosen": -1.3990533351898193, |
|
"logps/rejected": -1.8112404346466064, |
|
"loss": 1.6082, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -13.990533828735352, |
|
"rewards/margins": 4.121870040893555, |
|
"rewards/rejected": -18.112403869628906, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 108.24791172325133, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -1.1198530197143555, |
|
"logits/rejected": -1.101109504699707, |
|
"logps/chosen": -1.390649437904358, |
|
"logps/rejected": -1.8630450963974, |
|
"loss": 1.4791, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -13.906494140625, |
|
"rewards/margins": 4.7239580154418945, |
|
"rewards/rejected": -18.630451202392578, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 95.83911690989143, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.1780140399932861, |
|
"logits/rejected": -1.1579878330230713, |
|
"logps/chosen": -1.4568861722946167, |
|
"logps/rejected": -1.9470503330230713, |
|
"loss": 1.4586, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -14.56886100769043, |
|
"rewards/margins": 4.901640892028809, |
|
"rewards/rejected": -19.470502853393555, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 69.10471107204022, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.1474467515945435, |
|
"logits/rejected": -1.1124647855758667, |
|
"logps/chosen": -1.48002028465271, |
|
"logps/rejected": -1.9400886297225952, |
|
"loss": 1.4409, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -14.800203323364258, |
|
"rewards/margins": 4.600685119628906, |
|
"rewards/rejected": -19.400888442993164, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 80.62484140193865, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.2124546766281128, |
|
"logits/rejected": -1.161115050315857, |
|
"logps/chosen": -1.4423153400421143, |
|
"logps/rejected": -1.9036369323730469, |
|
"loss": 1.3817, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -14.423154830932617, |
|
"rewards/margins": 4.613214015960693, |
|
"rewards/rejected": -19.03636932373047, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 84.05912123531321, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.2311934232711792, |
|
"logits/rejected": -1.2020883560180664, |
|
"logps/chosen": -1.4844694137573242, |
|
"logps/rejected": -1.9821853637695312, |
|
"loss": 1.4172, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.844694137573242, |
|
"rewards/margins": 4.97715950012207, |
|
"rewards/rejected": -19.821855545043945, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 84.00316536161533, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.198540449142456, |
|
"logits/rejected": -1.1992590427398682, |
|
"logps/chosen": -1.3957428932189941, |
|
"logps/rejected": -1.8944737911224365, |
|
"loss": 1.4343, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -13.957429885864258, |
|
"rewards/margins": 4.987309455871582, |
|
"rewards/rejected": -18.944738388061523, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 137.49078119090206, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.2955886125564575, |
|
"logits/rejected": -1.23685622215271, |
|
"logps/chosen": -1.460442066192627, |
|
"logps/rejected": -2.0612359046936035, |
|
"loss": 1.3532, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -14.60442066192627, |
|
"rewards/margins": 6.007939338684082, |
|
"rewards/rejected": -20.612361907958984, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 129.54289500612722, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.2238231897354126, |
|
"logits/rejected": -1.2080833911895752, |
|
"logps/chosen": -1.5243932008743286, |
|
"logps/rejected": -2.1077561378479004, |
|
"loss": 1.3459, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.243929862976074, |
|
"rewards/margins": 5.833629608154297, |
|
"rewards/rejected": -21.077558517456055, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 102.89768684384153, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.267345905303955, |
|
"logits/rejected": -1.2413192987442017, |
|
"logps/chosen": -1.6330616474151611, |
|
"logps/rejected": -2.104926824569702, |
|
"loss": 1.4434, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.330615997314453, |
|
"rewards/margins": 4.718654155731201, |
|
"rewards/rejected": -21.049266815185547, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 89.20095630673174, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.245793104171753, |
|
"logits/rejected": -1.214970350265503, |
|
"logps/chosen": -1.554158091545105, |
|
"logps/rejected": -2.0427088737487793, |
|
"loss": 1.4276, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.541582107543945, |
|
"rewards/margins": 4.8855085372924805, |
|
"rewards/rejected": -20.42708969116211, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 76.055827552827, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.2102077007293701, |
|
"logits/rejected": -1.1913068294525146, |
|
"logps/chosen": -1.6448841094970703, |
|
"logps/rejected": -2.155822277069092, |
|
"loss": 1.3609, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.448841094970703, |
|
"rewards/margins": 5.109385967254639, |
|
"rewards/rejected": -21.5582275390625, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 129.8909118017969, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.2157796621322632, |
|
"logits/rejected": -1.186073899269104, |
|
"logps/chosen": -1.4407769441604614, |
|
"logps/rejected": -1.9774402379989624, |
|
"loss": 1.2996, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.407770156860352, |
|
"rewards/margins": 5.366633415222168, |
|
"rewards/rejected": -19.774402618408203, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 74.47995587471961, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.177504301071167, |
|
"logits/rejected": -1.1408427953720093, |
|
"logps/chosen": -1.4323005676269531, |
|
"logps/rejected": -1.8869625329971313, |
|
"loss": 1.4405, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -14.323003768920898, |
|
"rewards/margins": 4.546619892120361, |
|
"rewards/rejected": -18.869625091552734, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 96.7891504750656, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.262804627418518, |
|
"logits/rejected": -1.2378443479537964, |
|
"logps/chosen": -1.4722181558609009, |
|
"logps/rejected": -2.023758888244629, |
|
"loss": 1.3202, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -14.72218132019043, |
|
"rewards/margins": 5.515408515930176, |
|
"rewards/rejected": -20.23758888244629, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 98.51578082175142, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.2408018112182617, |
|
"logits/rejected": -1.2075875997543335, |
|
"logps/chosen": -1.5188751220703125, |
|
"logps/rejected": -2.0878236293792725, |
|
"loss": 1.0977, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -15.188751220703125, |
|
"rewards/margins": 5.689483642578125, |
|
"rewards/rejected": -20.878236770629883, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 77.81254701258105, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.2707680463790894, |
|
"logits/rejected": -1.2261282205581665, |
|
"logps/chosen": -1.5314843654632568, |
|
"logps/rejected": -2.0101191997528076, |
|
"loss": 1.3577, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.314845085144043, |
|
"rewards/margins": 4.786349296569824, |
|
"rewards/rejected": -20.101192474365234, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 87.64994632483507, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.199864387512207, |
|
"logits/rejected": -1.1842243671417236, |
|
"logps/chosen": -1.545689344406128, |
|
"logps/rejected": -2.0575714111328125, |
|
"loss": 1.1919, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.456893920898438, |
|
"rewards/margins": 5.118819713592529, |
|
"rewards/rejected": -20.575714111328125, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 84.61392252215398, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.1328258514404297, |
|
"logits/rejected": -1.1063092947006226, |
|
"logps/chosen": -1.631317138671875, |
|
"logps/rejected": -2.079132556915283, |
|
"loss": 1.6288, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -16.31317138671875, |
|
"rewards/margins": 4.478152275085449, |
|
"rewards/rejected": -20.791322708129883, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 84.46013666927763, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.2454413175582886, |
|
"logits/rejected": -1.1997601985931396, |
|
"logps/chosen": -1.6037687063217163, |
|
"logps/rejected": -2.0645315647125244, |
|
"loss": 1.3155, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -16.03768539428711, |
|
"rewards/margins": 4.6076273918151855, |
|
"rewards/rejected": -20.645313262939453, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 91.59670184758677, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.2738150358200073, |
|
"logits/rejected": -1.253278136253357, |
|
"logps/chosen": -1.6317838430404663, |
|
"logps/rejected": -2.138291835784912, |
|
"loss": 1.372, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -16.31783676147461, |
|
"rewards/margins": 5.065082550048828, |
|
"rewards/rejected": -21.382923126220703, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 87.02614481244046, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.284517526626587, |
|
"logits/rejected": -1.2254732847213745, |
|
"logps/chosen": -1.618208885192871, |
|
"logps/rejected": -2.1373062133789062, |
|
"loss": 1.3415, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.182092666625977, |
|
"rewards/margins": 5.190975189208984, |
|
"rewards/rejected": -21.373065948486328, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 104.27219685818618, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.2704033851623535, |
|
"logits/rejected": -1.2657862901687622, |
|
"logps/chosen": -1.6442874670028687, |
|
"logps/rejected": -2.2715744972229004, |
|
"loss": 1.2614, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.4428768157959, |
|
"rewards/margins": 6.2728681564331055, |
|
"rewards/rejected": -22.715742111206055, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 78.77445808060149, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.2807691097259521, |
|
"logits/rejected": -1.2303869724273682, |
|
"logps/chosen": -1.6990268230438232, |
|
"logps/rejected": -2.368220329284668, |
|
"loss": 1.3078, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.99026870727539, |
|
"rewards/margins": 6.6919355392456055, |
|
"rewards/rejected": -23.682205200195312, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 70.04351714156043, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.1783530712127686, |
|
"logits/rejected": -1.136584758758545, |
|
"logps/chosen": -1.6521613597869873, |
|
"logps/rejected": -2.1305041313171387, |
|
"loss": 1.3592, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.5216121673584, |
|
"rewards/margins": 4.783430099487305, |
|
"rewards/rejected": -21.305042266845703, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 75.03379354143011, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.2156535387039185, |
|
"logits/rejected": -1.1975212097167969, |
|
"logps/chosen": -1.6360639333724976, |
|
"logps/rejected": -2.187391757965088, |
|
"loss": 1.1952, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.360637664794922, |
|
"rewards/margins": 5.513278484344482, |
|
"rewards/rejected": -21.873918533325195, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 103.30210442360509, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.2379086017608643, |
|
"logits/rejected": -1.2029554843902588, |
|
"logps/chosen": -1.5814708471298218, |
|
"logps/rejected": -2.1416497230529785, |
|
"loss": 1.3219, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.814706802368164, |
|
"rewards/margins": 5.601790428161621, |
|
"rewards/rejected": -21.41649627685547, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 87.41940209533863, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.254070520401001, |
|
"logits/rejected": -1.2306808233261108, |
|
"logps/chosen": -1.665123701095581, |
|
"logps/rejected": -2.303457021713257, |
|
"loss": 1.3788, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.65123748779297, |
|
"rewards/margins": 6.3833327293396, |
|
"rewards/rejected": -23.034570693969727, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 147.74903637059256, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.2324841022491455, |
|
"logits/rejected": -1.2356057167053223, |
|
"logps/chosen": -1.5469470024108887, |
|
"logps/rejected": -2.0821375846862793, |
|
"loss": 1.2883, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.469470024108887, |
|
"rewards/margins": 5.351906776428223, |
|
"rewards/rejected": -20.82137680053711, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 66.59688038674247, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.1590429544448853, |
|
"logits/rejected": -1.1739274263381958, |
|
"logps/chosen": -1.5381479263305664, |
|
"logps/rejected": -2.0882415771484375, |
|
"loss": 1.181, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.381479263305664, |
|
"rewards/margins": 5.500934600830078, |
|
"rewards/rejected": -20.882413864135742, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 79.30848988409956, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.2186603546142578, |
|
"logits/rejected": -1.2177612781524658, |
|
"logps/chosen": -1.5483216047286987, |
|
"logps/rejected": -2.190535306930542, |
|
"loss": 1.3007, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.48321533203125, |
|
"rewards/margins": 6.422137260437012, |
|
"rewards/rejected": -21.905353546142578, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 71.1995833686848, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.2522964477539062, |
|
"logits/rejected": -1.1880736351013184, |
|
"logps/chosen": -1.599200963973999, |
|
"logps/rejected": -2.2274394035339355, |
|
"loss": 1.294, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.992010116577148, |
|
"rewards/margins": 6.28238582611084, |
|
"rewards/rejected": -22.274394989013672, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 97.88071644925103, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.1726973056793213, |
|
"logits/rejected": -1.1615909337997437, |
|
"logps/chosen": -1.5849040746688843, |
|
"logps/rejected": -2.063690662384033, |
|
"loss": 1.2653, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.849041938781738, |
|
"rewards/margins": 4.787867546081543, |
|
"rewards/rejected": -20.63690948486328, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 93.22521656476633, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.2195771932601929, |
|
"logits/rejected": -1.223771095275879, |
|
"logps/chosen": -1.7087455987930298, |
|
"logps/rejected": -2.2848830223083496, |
|
"loss": 1.2583, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -17.087453842163086, |
|
"rewards/margins": 5.76137638092041, |
|
"rewards/rejected": -22.848833084106445, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 122.65282224004734, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.2245140075683594, |
|
"logits/rejected": -1.2064614295959473, |
|
"logps/chosen": -1.5752016305923462, |
|
"logps/rejected": -2.1021199226379395, |
|
"loss": 1.4127, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.752016067504883, |
|
"rewards/margins": 5.2691850662231445, |
|
"rewards/rejected": -21.021198272705078, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 79.05677313510314, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.2155396938323975, |
|
"logits/rejected": -1.194136142730713, |
|
"logps/chosen": -1.5979677438735962, |
|
"logps/rejected": -2.291325330734253, |
|
"loss": 1.0803, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.9796781539917, |
|
"rewards/margins": 6.9335784912109375, |
|
"rewards/rejected": -22.91325569152832, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 83.81957692142457, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.223331093788147, |
|
"logits/rejected": -1.209160327911377, |
|
"logps/chosen": -1.668593406677246, |
|
"logps/rejected": -2.259793519973755, |
|
"loss": 1.2346, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -16.68593406677246, |
|
"rewards/margins": 5.911999702453613, |
|
"rewards/rejected": -22.59793472290039, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 76.36054598990746, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.2313239574432373, |
|
"logits/rejected": -1.211395502090454, |
|
"logps/chosen": -1.5763094425201416, |
|
"logps/rejected": -2.1317121982574463, |
|
"loss": 1.2849, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.763093948364258, |
|
"rewards/margins": 5.554028511047363, |
|
"rewards/rejected": -21.317119598388672, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 62.72495111163961, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.2403868436813354, |
|
"logits/rejected": -1.2243949174880981, |
|
"logps/chosen": -1.6198228597640991, |
|
"logps/rejected": -2.1331706047058105, |
|
"loss": 1.2445, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -16.19822883605957, |
|
"rewards/margins": 5.133477687835693, |
|
"rewards/rejected": -21.331707000732422, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.4456316232681274, |
|
"eval_logits/rejected": -1.4547516107559204, |
|
"eval_logps/chosen": -1.623605728149414, |
|
"eval_logps/rejected": -2.176786422729492, |
|
"eval_loss": 1.3307912349700928, |
|
"eval_rewards/accuracies": 0.8353658318519592, |
|
"eval_rewards/chosen": -16.23605728149414, |
|
"eval_rewards/margins": 5.531808376312256, |
|
"eval_rewards/rejected": -21.767864227294922, |
|
"eval_runtime": 94.8719, |
|
"eval_samples_per_second": 20.67, |
|
"eval_steps_per_second": 1.296, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 99.36460974571031, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.2008370161056519, |
|
"logits/rejected": -1.2194417715072632, |
|
"logps/chosen": -1.7075388431549072, |
|
"logps/rejected": -2.2549824714660645, |
|
"loss": 1.2048, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -17.075387954711914, |
|
"rewards/margins": 5.474437713623047, |
|
"rewards/rejected": -22.54982566833496, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 138.34780783301264, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.216778039932251, |
|
"logits/rejected": -1.2035914659500122, |
|
"logps/chosen": -1.6350791454315186, |
|
"logps/rejected": -2.172778844833374, |
|
"loss": 1.3538, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -16.350793838500977, |
|
"rewards/margins": 5.376997947692871, |
|
"rewards/rejected": -21.727787017822266, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 72.98512679113071, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.2200865745544434, |
|
"logits/rejected": -1.1671762466430664, |
|
"logps/chosen": -1.5793800354003906, |
|
"logps/rejected": -2.232057571411133, |
|
"loss": 1.3478, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.793802261352539, |
|
"rewards/margins": 6.5267744064331055, |
|
"rewards/rejected": -22.32057762145996, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 98.07615613251582, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.2454715967178345, |
|
"logits/rejected": -1.195953607559204, |
|
"logps/chosen": -1.5281785726547241, |
|
"logps/rejected": -2.1256656646728516, |
|
"loss": 1.2056, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.28178596496582, |
|
"rewards/margins": 5.974873065948486, |
|
"rewards/rejected": -21.25665855407715, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 106.16711447135498, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.2416163682937622, |
|
"logits/rejected": -1.231783390045166, |
|
"logps/chosen": -1.7100231647491455, |
|
"logps/rejected": -2.2363858222961426, |
|
"loss": 1.4487, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.100229263305664, |
|
"rewards/margins": 5.263625621795654, |
|
"rewards/rejected": -22.36385726928711, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 104.40358802274892, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.2276403903961182, |
|
"logits/rejected": -1.211700439453125, |
|
"logps/chosen": -1.6992714405059814, |
|
"logps/rejected": -2.2929625511169434, |
|
"loss": 1.2603, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -16.992717742919922, |
|
"rewards/margins": 5.9369096755981445, |
|
"rewards/rejected": -22.92962646484375, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 76.71410236428167, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.1829754114151, |
|
"logits/rejected": -1.1444637775421143, |
|
"logps/chosen": -1.5611233711242676, |
|
"logps/rejected": -2.1490211486816406, |
|
"loss": 1.2066, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -15.611233711242676, |
|
"rewards/margins": 5.8789777755737305, |
|
"rewards/rejected": -21.490211486816406, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 89.37849635838704, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.2062740325927734, |
|
"logits/rejected": -1.148478627204895, |
|
"logps/chosen": -1.6622101068496704, |
|
"logps/rejected": -2.1926727294921875, |
|
"loss": 1.253, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.622098922729492, |
|
"rewards/margins": 5.304628849029541, |
|
"rewards/rejected": -21.926727294921875, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 78.38263983661439, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.1661369800567627, |
|
"logits/rejected": -1.1493126153945923, |
|
"logps/chosen": -1.6586837768554688, |
|
"logps/rejected": -2.293992280960083, |
|
"loss": 1.0815, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -16.58683967590332, |
|
"rewards/margins": 6.353082180023193, |
|
"rewards/rejected": -22.939918518066406, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 85.39108439896182, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.2724522352218628, |
|
"logits/rejected": -1.2523143291473389, |
|
"logps/chosen": -1.607690453529358, |
|
"logps/rejected": -2.168273448944092, |
|
"loss": 1.3282, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -16.076906204223633, |
|
"rewards/margins": 5.605828285217285, |
|
"rewards/rejected": -21.682735443115234, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 102.09796631455698, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.2248659133911133, |
|
"logits/rejected": -1.1913433074951172, |
|
"logps/chosen": -1.6112314462661743, |
|
"logps/rejected": -2.2650115489959717, |
|
"loss": 1.0445, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -16.112314224243164, |
|
"rewards/margins": 6.537802696228027, |
|
"rewards/rejected": -22.650117874145508, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 90.8560495778911, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.2340514659881592, |
|
"logits/rejected": -1.2225282192230225, |
|
"logps/chosen": -1.6787292957305908, |
|
"logps/rejected": -2.2820496559143066, |
|
"loss": 1.1894, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -16.787290573120117, |
|
"rewards/margins": 6.033202648162842, |
|
"rewards/rejected": -22.82049560546875, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 86.47798441930765, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.2089763879776, |
|
"logits/rejected": -1.2146103382110596, |
|
"logps/chosen": -1.703181266784668, |
|
"logps/rejected": -2.318962335586548, |
|
"loss": 1.3219, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -17.03181266784668, |
|
"rewards/margins": 6.157810688018799, |
|
"rewards/rejected": -23.189624786376953, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.8032665589636858, |
|
"train_runtime": 11474.0462, |
|
"train_samples_per_second": 5.218, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|