|
{ |
|
"best_metric": 0.6289177536964417, |
|
"best_model_checkpoint": "models/llama-3.2-3b-sft-dpo/checkpoint-500", |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 633, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.004739336492890996, |
|
"grad_norm": 18.306584799400138, |
|
"learning_rate": 5.2631578947368416e-08, |
|
"logits/chosen": 1.1032867431640625, |
|
"logits/rejected": 1.1176480054855347, |
|
"logps/chosen": -175.54205322265625, |
|
"logps/rejected": -196.64266967773438, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.04739336492890995, |
|
"grad_norm": 18.19518017806804, |
|
"learning_rate": 5.263157894736842e-07, |
|
"logits/chosen": 0.6209686994552612, |
|
"logits/rejected": 0.7449740171432495, |
|
"logps/chosen": -350.8912658691406, |
|
"logps/rejected": -307.96142578125, |
|
"loss": 0.9979, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.00011829059076262638, |
|
"rewards/margins": 0.016186419874429703, |
|
"rewards/rejected": -0.016068127006292343, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0947867298578199, |
|
"grad_norm": 15.415652807377189, |
|
"learning_rate": 9.99993455114332e-07, |
|
"logits/chosen": 0.9229280352592468, |
|
"logits/rejected": 0.8609384298324585, |
|
"logps/chosen": -252.894775390625, |
|
"logps/rejected": -263.6702575683594, |
|
"loss": 0.9588, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0033816881477832794, |
|
"rewards/margins": 0.16803663969039917, |
|
"rewards/rejected": -0.164654940366745, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14218009478672985, |
|
"grad_norm": 12.850588595957225, |
|
"learning_rate": 9.992082761369566e-07, |
|
"logits/chosen": 0.8715411424636841, |
|
"logits/rejected": 0.8170267343521118, |
|
"logps/chosen": -296.8494567871094, |
|
"logps/rejected": -305.7926025390625, |
|
"loss": 0.8133, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.6128842830657959, |
|
"rewards/margins": 1.1374889612197876, |
|
"rewards/rejected": -0.5246046781539917, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 14.501186311778227, |
|
"learning_rate": 9.971164749660148e-07, |
|
"logits/chosen": 0.9155582189559937, |
|
"logits/rejected": 0.9567469358444214, |
|
"logps/chosen": -313.08514404296875, |
|
"logps/rejected": -309.0679626464844, |
|
"loss": 0.7405, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.23792271316051483, |
|
"rewards/margins": 2.1163926124572754, |
|
"rewards/rejected": -1.878469467163086, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.23696682464454977, |
|
"grad_norm": 11.740811645701724, |
|
"learning_rate": 9.937235266586424e-07, |
|
"logits/chosen": 0.6986435651779175, |
|
"logits/rejected": 0.8309999704360962, |
|
"logps/chosen": -319.8310852050781, |
|
"logps/rejected": -317.59918212890625, |
|
"loss": 0.6552, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.6028285622596741, |
|
"rewards/margins": 3.663621425628662, |
|
"rewards/rejected": -3.060793161392212, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2843601895734597, |
|
"grad_norm": 14.434952077378005, |
|
"learning_rate": 9.890383118800284e-07, |
|
"logits/chosen": 0.7444020509719849, |
|
"logits/rejected": 0.7484663724899292, |
|
"logps/chosen": -327.59576416015625, |
|
"logps/rejected": -349.929931640625, |
|
"loss": 0.6285, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.3002261221408844, |
|
"rewards/margins": 3.5275771617889404, |
|
"rewards/rejected": -3.227351427078247, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.33175355450236965, |
|
"grad_norm": 10.030890442911925, |
|
"learning_rate": 9.830730936592615e-07, |
|
"logits/chosen": 0.7815200090408325, |
|
"logits/rejected": 0.7069059610366821, |
|
"logps/chosen": -252.94921875, |
|
"logps/rejected": -323.2224426269531, |
|
"loss": 0.6106, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.3401187658309937, |
|
"rewards/margins": 5.26017427444458, |
|
"rewards/rejected": -3.920055866241455, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 12.131364583934603, |
|
"learning_rate": 9.758434852922123e-07, |
|
"logits/chosen": 0.7100412249565125, |
|
"logits/rejected": 0.6621907353401184, |
|
"logps/chosen": -271.33331298828125, |
|
"logps/rejected": -328.0660705566406, |
|
"loss": 0.59, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.908360481262207, |
|
"rewards/margins": 4.926724910736084, |
|
"rewards/rejected": -4.018364429473877, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.4265402843601896, |
|
"grad_norm": 11.822232959802975, |
|
"learning_rate": 9.673684094754685e-07, |
|
"logits/chosen": 0.6003296375274658, |
|
"logits/rejected": 0.6765642762184143, |
|
"logps/chosen": -293.85015869140625, |
|
"logps/rejected": -305.929443359375, |
|
"loss": 0.586, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.231705904006958, |
|
"rewards/margins": 4.982685089111328, |
|
"rewards/rejected": -3.750978946685791, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"grad_norm": 9.616291876594419, |
|
"learning_rate": 9.576700487782773e-07, |
|
"logits/chosen": 0.6642001867294312, |
|
"logits/rejected": 0.6596721410751343, |
|
"logps/chosen": -326.2373046875, |
|
"logps/rejected": -381.3326110839844, |
|
"loss": 0.5801, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.7316535711288452, |
|
"rewards/margins": 6.260350704193115, |
|
"rewards/rejected": -4.5286970138549805, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47393364928909953, |
|
"eval_logits/chosen": 0.610289990901947, |
|
"eval_logits/rejected": 0.6783497929573059, |
|
"eval_logps/chosen": -339.33251953125, |
|
"eval_logps/rejected": -361.24346923828125, |
|
"eval_loss": 0.6839759349822998, |
|
"eval_rewards/accuracies": 0.6898733973503113, |
|
"eval_rewards/chosen": 0.6485355496406555, |
|
"eval_rewards/margins": 3.587477684020996, |
|
"eval_rewards/rejected": -2.9389421939849854, |
|
"eval_runtime": 76.922, |
|
"eval_samples_per_second": 32.5, |
|
"eval_steps_per_second": 1.027, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.5213270142180095, |
|
"grad_norm": 11.519611398516883, |
|
"learning_rate": 9.467737875821367e-07, |
|
"logits/chosen": 0.659843385219574, |
|
"logits/rejected": 0.6010033488273621, |
|
"logps/chosen": -293.62200927734375, |
|
"logps/rejected": -334.9098205566406, |
|
"loss": 0.5742, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.1434353590011597, |
|
"rewards/margins": 5.331825256347656, |
|
"rewards/rejected": -4.188389301300049, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 10.75922014108817, |
|
"learning_rate": 9.347081456399957e-07, |
|
"logits/chosen": 0.6637296676635742, |
|
"logits/rejected": 0.5958945155143738, |
|
"logps/chosen": -272.2585144042969, |
|
"logps/rejected": -393.41949462890625, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.9803568124771118, |
|
"rewards/margins": 6.413501739501953, |
|
"rewards/rejected": -5.433144569396973, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.6161137440758294, |
|
"grad_norm": 11.497074098204886, |
|
"learning_rate": 9.215047034289715e-07, |
|
"logits/chosen": 0.6836856603622437, |
|
"logits/rejected": 0.6638469696044922, |
|
"logps/chosen": -275.0943603515625, |
|
"logps/rejected": -332.6889343261719, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.4476346969604492, |
|
"rewards/margins": 6.094024658203125, |
|
"rewards/rejected": -4.646389961242676, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6635071090047393, |
|
"grad_norm": 9.658859904375, |
|
"learning_rate": 9.07198019491959e-07, |
|
"logits/chosen": 0.61662757396698, |
|
"logits/rejected": 0.5779851675033569, |
|
"logps/chosen": -272.382080078125, |
|
"logps/rejected": -355.6089172363281, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.8889511227607727, |
|
"rewards/margins": 5.594452857971191, |
|
"rewards/rejected": -4.705502510070801, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.7109004739336493, |
|
"grad_norm": 10.07652231167762, |
|
"learning_rate": 8.918255399844853e-07, |
|
"logits/chosen": 0.5373108983039856, |
|
"logits/rejected": 0.654308021068573, |
|
"logps/chosen": -330.0559997558594, |
|
"logps/rejected": -349.55224609375, |
|
"loss": 0.5738, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.3335852324962616, |
|
"rewards/margins": 4.550914287567139, |
|
"rewards/rejected": -4.217329502105713, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 8.965490487953566, |
|
"learning_rate": 8.754275006635572e-07, |
|
"logits/chosen": 0.565764844417572, |
|
"logits/rejected": 0.539226233959198, |
|
"logps/chosen": -269.29742431640625, |
|
"logps/rejected": -355.60589599609375, |
|
"loss": 0.5997, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.5406277179718018, |
|
"rewards/margins": 5.479567527770996, |
|
"rewards/rejected": -4.938939571380615, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.8056872037914692, |
|
"grad_norm": 9.437674903727038, |
|
"learning_rate": 8.580468215750391e-07, |
|
"logits/chosen": 0.6932438611984253, |
|
"logits/rejected": 0.636594831943512, |
|
"logps/chosen": -296.7684631347656, |
|
"logps/rejected": -367.45318603515625, |
|
"loss": 0.5783, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 1.147369146347046, |
|
"rewards/margins": 5.5389909744262695, |
|
"rewards/rejected": -4.391622066497803, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.8530805687203792, |
|
"grad_norm": 8.5658002946873, |
|
"learning_rate": 8.39728994715202e-07, |
|
"logits/chosen": 0.6020892858505249, |
|
"logits/rejected": 0.5168766379356384, |
|
"logps/chosen": -288.558349609375, |
|
"logps/rejected": -348.62640380859375, |
|
"loss": 0.5531, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.6757786870002747, |
|
"rewards/margins": 5.149857997894287, |
|
"rewards/rejected": -4.474079132080078, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.9004739336492891, |
|
"grad_norm": 11.065263225689659, |
|
"learning_rate": 8.20521964960477e-07, |
|
"logits/chosen": 0.6599653363227844, |
|
"logits/rejected": 0.6458830237388611, |
|
"logps/chosen": -289.4867858886719, |
|
"logps/rejected": -342.56243896484375, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 1.274778962135315, |
|
"rewards/margins": 6.3435516357421875, |
|
"rewards/rejected": -5.068772792816162, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 8.426424572195439, |
|
"learning_rate": 8.0047600457707e-07, |
|
"logits/chosen": 0.6277160048484802, |
|
"logits/rejected": 0.6192003488540649, |
|
"logps/chosen": -318.033447265625, |
|
"logps/rejected": -377.3500061035156, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.3354051113128662, |
|
"rewards/margins": 6.755140781402588, |
|
"rewards/rejected": -5.419735431671143, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"eval_logits/chosen": 0.494819700717926, |
|
"eval_logits/rejected": 0.5648438930511475, |
|
"eval_logps/chosen": -343.7730712890625, |
|
"eval_logps/rejected": -372.1695861816406, |
|
"eval_loss": 0.6514427661895752, |
|
"eval_rewards/accuracies": 0.7278481125831604, |
|
"eval_rewards/chosen": 0.20448331534862518, |
|
"eval_rewards/margins": 4.236032485961914, |
|
"eval_rewards/rejected": -4.031548976898193, |
|
"eval_runtime": 74.0508, |
|
"eval_samples_per_second": 33.761, |
|
"eval_steps_per_second": 1.067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.995260663507109, |
|
"grad_norm": 9.878709661135902, |
|
"learning_rate": 7.796435816388898e-07, |
|
"logits/chosen": 0.6760674118995667, |
|
"logits/rejected": 0.6518660187721252, |
|
"logps/chosen": -284.24749755859375, |
|
"logps/rejected": -363.0601501464844, |
|
"loss": 0.554, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6821473836898804, |
|
"rewards/margins": 6.51880407333374, |
|
"rewards/rejected": -5.8366570472717285, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.042654028436019, |
|
"grad_norm": 10.875728154843127, |
|
"learning_rate": 7.580792226981954e-07, |
|
"logits/chosen": 0.5221652984619141, |
|
"logits/rejected": 0.44479990005493164, |
|
"logps/chosen": -281.39190673828125, |
|
"logps/rejected": -370.33941650390625, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.0442254543304443, |
|
"rewards/margins": 7.068573951721191, |
|
"rewards/rejected": -5.024348258972168, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.0900473933649288, |
|
"grad_norm": 10.04148994728917, |
|
"learning_rate": 7.358393700684032e-07, |
|
"logits/chosen": 0.5540430545806885, |
|
"logits/rejected": 0.5128260850906372, |
|
"logps/chosen": -279.4583435058594, |
|
"logps/rejected": -350.32684326171875, |
|
"loss": 0.5022, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.9357398152351379, |
|
"rewards/margins": 5.9159369468688965, |
|
"rewards/rejected": -4.980198383331299, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 11.466420945945197, |
|
"learning_rate": 7.129822340926043e-07, |
|
"logits/chosen": 0.5252267122268677, |
|
"logits/rejected": 0.6392233371734619, |
|
"logps/chosen": -300.5268859863281, |
|
"logps/rejected": -328.5356750488281, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.1534405946731567, |
|
"rewards/margins": 6.1857991218566895, |
|
"rewards/rejected": -5.032358169555664, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.1848341232227488, |
|
"grad_norm": 9.714339627017372, |
|
"learning_rate": 6.895676407844586e-07, |
|
"logits/chosen": 0.5342652797698975, |
|
"logits/rejected": 0.5475658178329468, |
|
"logps/chosen": -275.02972412109375, |
|
"logps/rejected": -325.74993896484375, |
|
"loss": 0.4508, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 2.0915255546569824, |
|
"rewards/margins": 6.8750715255737305, |
|
"rewards/rejected": -4.783546447753906, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.2322274881516588, |
|
"grad_norm": 8.702659887264469, |
|
"learning_rate": 6.656568752402521e-07, |
|
"logits/chosen": 0.4584909975528717, |
|
"logits/rejected": 0.5478152632713318, |
|
"logps/chosen": -314.6927185058594, |
|
"logps/rejected": -357.88226318359375, |
|
"loss": 0.4621, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.3858213424682617, |
|
"rewards/margins": 6.8659563064575195, |
|
"rewards/rejected": -5.480134963989258, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.2796208530805688, |
|
"grad_norm": 10.924278197277149, |
|
"learning_rate": 6.413125212319663e-07, |
|
"logits/chosen": 0.6362992525100708, |
|
"logits/rejected": 0.6484791040420532, |
|
"logps/chosen": -285.7840270996094, |
|
"logps/rejected": -360.7676086425781, |
|
"loss": 0.4712, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.0224599838256836, |
|
"rewards/margins": 7.362783908843994, |
|
"rewards/rejected": -5.3403239250183105, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 9.286266066829205, |
|
"learning_rate": 6.165982974012104e-07, |
|
"logits/chosen": 0.48062658309936523, |
|
"logits/rejected": 0.4873732626438141, |
|
"logps/chosen": -345.07586669921875, |
|
"logps/rejected": -393.88165283203125, |
|
"loss": 0.4628, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.449973225593567, |
|
"rewards/margins": 7.039644718170166, |
|
"rewards/rejected": -5.589670658111572, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.3744075829383886, |
|
"grad_norm": 9.83819564198541, |
|
"learning_rate": 5.915788904827553e-07, |
|
"logits/chosen": 0.43026304244995117, |
|
"logits/rejected": 0.459343820810318, |
|
"logps/chosen": -294.733154296875, |
|
"logps/rejected": -363.80340576171875, |
|
"loss": 0.4507, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.6585981845855713, |
|
"rewards/margins": 6.437933444976807, |
|
"rewards/rejected": -4.779335021972656, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"grad_norm": 8.577071743246128, |
|
"learning_rate": 5.663197859941938e-07, |
|
"logits/chosen": 0.6086027026176453, |
|
"logits/rejected": 0.6251193881034851, |
|
"logps/chosen": -262.66644287109375, |
|
"logps/rejected": -320.42974853515625, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 1.298060655593872, |
|
"rewards/margins": 6.941515922546387, |
|
"rewards/rejected": -5.643455505371094, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4218009478672986, |
|
"eval_logits/chosen": 0.45885032415390015, |
|
"eval_logits/rejected": 0.5325651168823242, |
|
"eval_logps/chosen": -341.7187194824219, |
|
"eval_logps/rejected": -371.7361145019531, |
|
"eval_loss": 0.6386769413948059, |
|
"eval_rewards/accuracies": 0.7215189933776855, |
|
"eval_rewards/chosen": 0.40991881489753723, |
|
"eval_rewards/margins": 4.398120880126953, |
|
"eval_rewards/rejected": -3.98820161819458, |
|
"eval_runtime": 72.3153, |
|
"eval_samples_per_second": 34.571, |
|
"eval_steps_per_second": 1.092, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.4691943127962086, |
|
"grad_norm": 12.642599504555136, |
|
"learning_rate": 5.408870968348749e-07, |
|
"logits/chosen": 0.46862930059432983, |
|
"logits/rejected": 0.45317015051841736, |
|
"logps/chosen": -269.1434631347656, |
|
"logps/rejected": -348.3428955078125, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.3798935413360596, |
|
"rewards/margins": 6.562399864196777, |
|
"rewards/rejected": -5.182506561279297, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 9.79584839845262, |
|
"learning_rate": 5.153473902427354e-07, |
|
"logits/chosen": 0.47858723998069763, |
|
"logits/rejected": 0.5644794702529907, |
|
"logps/chosen": -321.48345947265625, |
|
"logps/rejected": -343.6278991699219, |
|
"loss": 0.4803, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.1607013940811157, |
|
"rewards/margins": 5.799595832824707, |
|
"rewards/rejected": -4.638894557952881, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.5639810426540284, |
|
"grad_norm": 8.875212778872154, |
|
"learning_rate": 4.897675135619516e-07, |
|
"logits/chosen": 0.47927242517471313, |
|
"logits/rejected": 0.605729341506958, |
|
"logps/chosen": -296.8520812988281, |
|
"logps/rejected": -339.26220703125, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.206688404083252, |
|
"rewards/margins": 6.4211745262146, |
|
"rewards/rejected": -5.214486598968506, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.6113744075829384, |
|
"grad_norm": 9.788751062324735, |
|
"learning_rate": 4.642144192774429e-07, |
|
"logits/chosen": 0.6517030000686646, |
|
"logits/rejected": 0.6343492269515991, |
|
"logps/chosen": -256.8311767578125, |
|
"logps/rejected": -318.10504150390625, |
|
"loss": 0.4687, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.4574129581451416, |
|
"rewards/margins": 7.180891513824463, |
|
"rewards/rejected": -5.723478317260742, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.6587677725118484, |
|
"grad_norm": 8.123068784558978, |
|
"learning_rate": 4.387549897741825e-07, |
|
"logits/chosen": 0.43539008498191833, |
|
"logits/rejected": 0.4823547303676605, |
|
"logps/chosen": -322.7386474609375, |
|
"logps/rejected": -349.6393127441406, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.6534090042114258, |
|
"rewards/margins": 6.494222164154053, |
|
"rewards/rejected": -4.840813159942627, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 10.106462346167355, |
|
"learning_rate": 4.1345586227998634e-07, |
|
"logits/chosen": 0.4860106110572815, |
|
"logits/rejected": 0.48908883333206177, |
|
"logps/chosen": -289.710693359375, |
|
"logps/rejected": -384.22686767578125, |
|
"loss": 0.446, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.587738275527954, |
|
"rewards/margins": 7.2089128494262695, |
|
"rewards/rejected": -5.6211748123168945, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.7535545023696684, |
|
"grad_norm": 10.81635763601606, |
|
"learning_rate": 3.883832544499735e-07, |
|
"logits/chosen": 0.5913195013999939, |
|
"logits/rejected": 0.5606914758682251, |
|
"logps/chosen": -292.9503173828125, |
|
"logps/rejected": -390.93878173828125, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.614689588546753, |
|
"rewards/margins": 6.656731605529785, |
|
"rewards/rejected": -5.042041301727295, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.8009478672985781, |
|
"grad_norm": 10.495084061438284, |
|
"learning_rate": 3.636027910492114e-07, |
|
"logits/chosen": 0.4658740162849426, |
|
"logits/rejected": 0.5308722257614136, |
|
"logps/chosen": -305.28753662109375, |
|
"logps/rejected": -352.7513122558594, |
|
"loss": 0.4648, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": 1.0712064504623413, |
|
"rewards/margins": 6.167966365814209, |
|
"rewards/rejected": -5.096759796142578, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.8483412322274881, |
|
"grad_norm": 11.413974134819627, |
|
"learning_rate": 3.3917933218718566e-07, |
|
"logits/chosen": 0.6185089349746704, |
|
"logits/rejected": 0.6838531494140625, |
|
"logps/chosen": -284.1628112792969, |
|
"logps/rejected": -333.17657470703125, |
|
"loss": 0.4426, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.4776874780654907, |
|
"rewards/margins": 6.398137092590332, |
|
"rewards/rejected": -4.920449733734131, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 9.664147195442332, |
|
"learning_rate": 3.151768035536698e-07, |
|
"logits/chosen": 0.6407091617584229, |
|
"logits/rejected": 0.6542560458183289, |
|
"logps/chosen": -284.20037841796875, |
|
"logps/rejected": -345.27880859375, |
|
"loss": 0.4559, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.0247559547424316, |
|
"rewards/margins": 7.09304141998291, |
|
"rewards/rejected": -5.0682854652404785, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": 0.41101595759391785, |
|
"eval_logits/rejected": 0.4840773642063141, |
|
"eval_logps/chosen": -338.1277160644531, |
|
"eval_logps/rejected": -368.54248046875, |
|
"eval_loss": 0.6332134008407593, |
|
"eval_rewards/accuracies": 0.7341772317886353, |
|
"eval_rewards/chosen": 0.7690173983573914, |
|
"eval_rewards/margins": 4.437857151031494, |
|
"eval_rewards/rejected": -3.668839931488037, |
|
"eval_runtime": 72.5998, |
|
"eval_samples_per_second": 34.435, |
|
"eval_steps_per_second": 1.088, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.943127962085308, |
|
"grad_norm": 10.263641095491934, |
|
"learning_rate": 2.9165802910033603e-07, |
|
"logits/chosen": 0.5565508604049683, |
|
"logits/rejected": 0.5877315402030945, |
|
"logps/chosen": -328.7551574707031, |
|
"logps/rejected": -364.5121154785156, |
|
"loss": 0.4644, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 1.852020502090454, |
|
"rewards/margins": 6.0383710861206055, |
|
"rewards/rejected": -4.186350345611572, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.9905213270142181, |
|
"grad_norm": 8.889403142715599, |
|
"learning_rate": 2.686845666060415e-07, |
|
"logits/chosen": 0.5102426409721375, |
|
"logits/rejected": 0.43454083800315857, |
|
"logps/chosen": -271.08160400390625, |
|
"logps/rejected": -369.26458740234375, |
|
"loss": 0.461, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 1.6376615762710571, |
|
"rewards/margins": 7.588493347167969, |
|
"rewards/rejected": -5.950831413269043, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.037914691943128, |
|
"grad_norm": 7.4495856256114195, |
|
"learning_rate": 2.4631654655618287e-07, |
|
"logits/chosen": 0.37354058027267456, |
|
"logits/rejected": 0.4436867833137512, |
|
"logps/chosen": -310.15802001953125, |
|
"logps/rejected": -382.03253173828125, |
|
"loss": 0.3945, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.8288238048553467, |
|
"rewards/margins": 7.114483833312988, |
|
"rewards/rejected": -5.2856597900390625, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.085308056872038, |
|
"grad_norm": 8.829254132221473, |
|
"learning_rate": 2.2461251475783155e-07, |
|
"logits/chosen": 0.5162326693534851, |
|
"logits/rejected": 0.4021889567375183, |
|
"logps/chosen": -288.923095703125, |
|
"logps/rejected": -389.34979248046875, |
|
"loss": 0.3748, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.8741111755371094, |
|
"rewards/margins": 7.6665802001953125, |
|
"rewards/rejected": -5.792468547821045, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.132701421800948, |
|
"grad_norm": 8.156529944948277, |
|
"learning_rate": 2.0362927910258986e-07, |
|
"logits/chosen": 0.45688456296920776, |
|
"logits/rejected": 0.4526469111442566, |
|
"logps/chosen": -253.50131225585938, |
|
"logps/rejected": -349.1957702636719, |
|
"loss": 0.4147, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.0875327587127686, |
|
"rewards/margins": 8.09435749053955, |
|
"rewards/rejected": -6.006823539733887, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.1800947867298577, |
|
"grad_norm": 7.824692642426332, |
|
"learning_rate": 1.8342176087824573e-07, |
|
"logits/chosen": 0.4325633645057678, |
|
"logits/rejected": 0.3565566837787628, |
|
"logps/chosen": -284.46624755859375, |
|
"logps/rejected": -372.12091064453125, |
|
"loss": 0.3992, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.8221031427383423, |
|
"rewards/margins": 7.619426727294922, |
|
"rewards/rejected": -5.797322750091553, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.227488151658768, |
|
"grad_norm": 13.407256371457692, |
|
"learning_rate": 1.6404285101840565e-07, |
|
"logits/chosen": 0.3386808931827545, |
|
"logits/rejected": 0.47734910249710083, |
|
"logps/chosen": -331.7251892089844, |
|
"logps/rejected": -367.4866638183594, |
|
"loss": 0.3822, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.9130542278289795, |
|
"rewards/margins": 7.692631721496582, |
|
"rewards/rejected": -5.779577732086182, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.2748815165876777, |
|
"grad_norm": 10.86707059625683, |
|
"learning_rate": 1.455432716663517e-07, |
|
"logits/chosen": 0.36686116456985474, |
|
"logits/rejected": 0.48829737305641174, |
|
"logps/chosen": -285.77008056640625, |
|
"logps/rejected": -328.3174743652344, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 1.7794748544692993, |
|
"rewards/margins": 6.214818477630615, |
|
"rewards/rejected": -4.435343265533447, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.322274881516588, |
|
"grad_norm": 9.830177502454013, |
|
"learning_rate": 1.2797144341546883e-07, |
|
"logits/chosen": 0.3986554741859436, |
|
"logits/rejected": 0.44396382570266724, |
|
"logps/chosen": -321.13818359375, |
|
"logps/rejected": -390.934326171875, |
|
"loss": 0.4219, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": 1.6029850244522095, |
|
"rewards/margins": 7.5643768310546875, |
|
"rewards/rejected": -5.961391448974609, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"grad_norm": 9.42905977432162, |
|
"learning_rate": 1.1137335857372043e-07, |
|
"logits/chosen": 0.4437794089317322, |
|
"logits/rejected": 0.42870789766311646, |
|
"logps/chosen": -287.81451416015625, |
|
"logps/rejected": -374.01873779296875, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 2.1330111026763916, |
|
"rewards/margins": 7.767390251159668, |
|
"rewards/rejected": -5.6343793869018555, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.3696682464454977, |
|
"eval_logits/chosen": 0.3730663061141968, |
|
"eval_logits/rejected": 0.4475269019603729, |
|
"eval_logps/chosen": -338.3392028808594, |
|
"eval_logps/rejected": -370.232666015625, |
|
"eval_loss": 0.6289177536964417, |
|
"eval_rewards/accuracies": 0.7405063509941101, |
|
"eval_rewards/chosen": 0.7478683590888977, |
|
"eval_rewards/margins": 4.585729122161865, |
|
"eval_rewards/rejected": -3.8378612995147705, |
|
"eval_runtime": 73.3012, |
|
"eval_samples_per_second": 34.106, |
|
"eval_steps_per_second": 1.078, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.4170616113744074, |
|
"grad_norm": 10.06462647313331, |
|
"learning_rate": 9.579246078389403e-08, |
|
"logits/chosen": 0.5295278429985046, |
|
"logits/rejected": 0.43623122572898865, |
|
"logps/chosen": -258.68963623046875, |
|
"logps/rejected": -339.7721252441406, |
|
"loss": 0.3858, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 1.592254400253296, |
|
"rewards/margins": 7.2217698097229, |
|
"rewards/rejected": -5.629514694213867, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.4644549763033177, |
|
"grad_norm": 9.022052721765009, |
|
"learning_rate": 8.126953131469228e-08, |
|
"logits/chosen": 0.44106584787368774, |
|
"logits/rejected": 0.39466392993927, |
|
"logps/chosen": -303.3637390136719, |
|
"logps/rejected": -370.74114990234375, |
|
"loss": 0.4143, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.8263496160507202, |
|
"rewards/margins": 7.823184013366699, |
|
"rewards/rejected": -5.996834754943848, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.5118483412322274, |
|
"grad_norm": 8.021054640921763, |
|
"learning_rate": 6.784258232029472e-08, |
|
"logits/chosen": 0.3634105622768402, |
|
"logits/rejected": 0.3859165608882904, |
|
"logps/chosen": -307.2467041015625, |
|
"logps/rejected": -376.1995849609375, |
|
"loss": 0.3822, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": 2.497091770172119, |
|
"rewards/margins": 7.9943437576293945, |
|
"rewards/rejected": -5.497252464294434, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.5592417061611377, |
|
"grad_norm": 10.013425700067337, |
|
"learning_rate": 5.554675734776665e-08, |
|
"logits/chosen": 0.5024563074111938, |
|
"logits/rejected": 0.5056658387184143, |
|
"logps/chosen": -276.1619567871094, |
|
"logps/rejected": -368.4447021484375, |
|
"loss": 0.4035, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.820339560508728, |
|
"rewards/margins": 8.141976356506348, |
|
"rewards/rejected": -6.321636199951172, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.6066350710900474, |
|
"grad_norm": 9.209955480260117, |
|
"learning_rate": 4.4414239352730867e-08, |
|
"logits/chosen": 0.42310771346092224, |
|
"logits/rejected": 0.48689502477645874, |
|
"logps/chosen": -313.3210754394531, |
|
"logps/rejected": -351.4210205078125, |
|
"loss": 0.406, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.1306679248809814, |
|
"rewards/margins": 7.7195258140563965, |
|
"rewards/rejected": -5.588858127593994, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.654028436018957, |
|
"grad_norm": 9.959818332708023, |
|
"learning_rate": 3.447416646405632e-08, |
|
"logits/chosen": 0.5685544610023499, |
|
"logits/rejected": 0.5256290435791016, |
|
"logps/chosen": -287.7798156738281, |
|
"logps/rejected": -380.33685302734375, |
|
"loss": 0.4009, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 1.8459497690200806, |
|
"rewards/margins": 7.295513153076172, |
|
"rewards/rejected": -5.449563503265381, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.7014218009478674, |
|
"grad_norm": 8.593809820816018, |
|
"learning_rate": 2.575255571804391e-08, |
|
"logits/chosen": 0.41258078813552856, |
|
"logits/rejected": 0.4132450222969055, |
|
"logps/chosen": -287.94476318359375, |
|
"logps/rejected": -369.03656005859375, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.5231783390045166, |
|
"rewards/margins": 7.392594814300537, |
|
"rewards/rejected": -5.8694167137146, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.748815165876777, |
|
"grad_norm": 9.646946039027634, |
|
"learning_rate": 1.8272234961725084e-08, |
|
"logits/chosen": 0.48128992319107056, |
|
"logits/rejected": 0.4887717366218567, |
|
"logps/chosen": -303.7729797363281, |
|
"logps/rejected": -359.5372314453125, |
|
"loss": 0.3912, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": 2.173060655593872, |
|
"rewards/margins": 8.012847900390625, |
|
"rewards/rejected": -5.839787006378174, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.7962085308056874, |
|
"grad_norm": 11.09612482230785, |
|
"learning_rate": 1.2052783103508102e-08, |
|
"logits/chosen": 0.5081132650375366, |
|
"logits/rejected": 0.5602059364318848, |
|
"logps/chosen": -270.61737060546875, |
|
"logps/rejected": -335.85577392578125, |
|
"loss": 0.3991, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 1.619431495666504, |
|
"rewards/margins": 6.8268561363220215, |
|
"rewards/rejected": -5.207424163818359, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"grad_norm": 8.273064520857158, |
|
"learning_rate": 7.1104788675613315e-09, |
|
"logits/chosen": 0.32943224906921387, |
|
"logits/rejected": 0.4085375666618347, |
|
"logps/chosen": -288.88995361328125, |
|
"logps/rejected": -364.12860107421875, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 2.0637223720550537, |
|
"rewards/margins": 7.937726020812988, |
|
"rewards/rejected": -5.874002933502197, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.843601895734597, |
|
"eval_logits/chosen": 0.38198891282081604, |
|
"eval_logits/rejected": 0.45711585879325867, |
|
"eval_logps/chosen": -337.3143310546875, |
|
"eval_logps/rejected": -368.9125061035156, |
|
"eval_loss": 0.6283919215202332, |
|
"eval_rewards/accuracies": 0.7436708807945251, |
|
"eval_rewards/chosen": 0.8503568768501282, |
|
"eval_rewards/margins": 4.556199073791504, |
|
"eval_rewards/rejected": -3.7058422565460205, |
|
"eval_runtime": 73.7958, |
|
"eval_samples_per_second": 33.877, |
|
"eval_steps_per_second": 1.071, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.890995260663507, |
|
"grad_norm": 9.238913123295514, |
|
"learning_rate": 3.4582581860612137e-09, |
|
"logits/chosen": 0.43385523557662964, |
|
"logits/rejected": 0.43230634927749634, |
|
"logps/chosen": -292.0911865234375, |
|
"logps/rejected": -353.61590576171875, |
|
"loss": 0.3884, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": 1.989989995956421, |
|
"rewards/margins": 6.724064826965332, |
|
"rewards/rejected": -4.734074115753174, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.938388625592417, |
|
"grad_norm": 9.407237089972764, |
|
"learning_rate": 1.1056803408273085e-09, |
|
"logits/chosen": 0.48387131094932556, |
|
"logits/rejected": 0.4587581753730774, |
|
"logps/chosen": -282.6869201660156, |
|
"logps/rejected": -344.5205078125, |
|
"loss": 0.4089, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 1.806133508682251, |
|
"rewards/margins": 7.467283725738525, |
|
"rewards/rejected": -5.661149978637695, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.985781990521327, |
|
"grad_norm": 8.481488205996529, |
|
"learning_rate": 5.890294296428955e-11, |
|
"logits/chosen": 0.44664233922958374, |
|
"logits/rejected": 0.5504810810089111, |
|
"logps/chosen": -319.47119140625, |
|
"logps/rejected": -348.36090087890625, |
|
"loss": 0.3848, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 2.1828243732452393, |
|
"rewards/margins": 6.884246826171875, |
|
"rewards/rejected": -4.701422214508057, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 633, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5009220597491634, |
|
"train_runtime": 6227.6413, |
|
"train_samples_per_second": 13.002, |
|
"train_steps_per_second": 0.102 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 633, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|