|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8668408393859863, |
|
"logits/rejected": -1.8711602687835693, |
|
"logps/chosen": -36.98978042602539, |
|
"logps/rejected": -33.66878890991211, |
|
"loss": 0.4962, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.006728413049131632, |
|
"rewards/margins": 0.022216208279132843, |
|
"rewards/rejected": -0.015487794764339924, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9970359802246094, |
|
"logits/rejected": -1.9996885061264038, |
|
"logps/chosen": -29.635208129882812, |
|
"logps/rejected": -29.063350677490234, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.002794977743178606, |
|
"rewards/margins": 0.002757500857114792, |
|
"rewards/rejected": 3.747665323317051e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9208602905273438, |
|
"logits/rejected": -1.9181534051895142, |
|
"logps/chosen": -31.40317726135254, |
|
"logps/rejected": -33.23335647583008, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005168457515537739, |
|
"rewards/margins": 0.011126698926091194, |
|
"rewards/rejected": -0.005958239547908306, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.017474889755249, |
|
"logits/rejected": -2.008759021759033, |
|
"logps/chosen": -32.54490661621094, |
|
"logps/rejected": -32.49110794067383, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.012759355828166008, |
|
"rewards/margins": 0.004133358132094145, |
|
"rewards/rejected": 0.008625999093055725, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8631575107574463, |
|
"logits/rejected": -1.8523809909820557, |
|
"logps/chosen": -33.509056091308594, |
|
"logps/rejected": -35.39984893798828, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.01936880685389042, |
|
"rewards/margins": -0.001629653968848288, |
|
"rewards/rejected": 0.020998459309339523, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9419567584991455, |
|
"logits/rejected": -1.9438903331756592, |
|
"logps/chosen": -32.50743103027344, |
|
"logps/rejected": -33.188419342041016, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.03710051625967026, |
|
"rewards/margins": 0.04391607269644737, |
|
"rewards/rejected": -0.006815555039793253, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0737738609313965, |
|
"logits/rejected": -2.0787465572357178, |
|
"logps/chosen": -33.928836822509766, |
|
"logps/rejected": -36.540794372558594, |
|
"loss": 0.4971, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.023859737440943718, |
|
"rewards/margins": 0.01262708194553852, |
|
"rewards/rejected": 0.01123266015201807, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.934011697769165, |
|
"logits/rejected": -1.9371341466903687, |
|
"logps/chosen": -34.23366928100586, |
|
"logps/rejected": -34.542274475097656, |
|
"loss": 0.4864, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07765541225671768, |
|
"rewards/margins": 0.057602256536483765, |
|
"rewards/rejected": 0.020053153857588768, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9421571493148804, |
|
"logits/rejected": -1.9466804265975952, |
|
"logps/chosen": -32.304595947265625, |
|
"logps/rejected": -32.284873962402344, |
|
"loss": 0.4909, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0651295855641365, |
|
"rewards/margins": 0.035516757518053055, |
|
"rewards/rejected": 0.02961282804608345, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.0406017303466797, |
|
"logits/rejected": -2.038613796234131, |
|
"logps/chosen": -32.027652740478516, |
|
"logps/rejected": -31.224151611328125, |
|
"loss": 0.4845, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.08527366816997528, |
|
"rewards/margins": 0.0684497207403183, |
|
"rewards/rejected": 0.01682395115494728, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.234653949737549, |
|
"eval_logits/rejected": -2.229806423187256, |
|
"eval_logps/chosen": -33.91841125488281, |
|
"eval_logps/rejected": -37.42335891723633, |
|
"eval_loss": 0.4981803297996521, |
|
"eval_rewards/accuracies": 0.5510797500610352, |
|
"eval_rewards/chosen": 0.04645563289523125, |
|
"eval_rewards/margins": 0.009152057580649853, |
|
"eval_rewards/rejected": 0.03730357065796852, |
|
"eval_runtime": 145.8684, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9949238300323486, |
|
"logits/rejected": -1.9925572872161865, |
|
"logps/chosen": -32.99268341064453, |
|
"logps/rejected": -33.90182113647461, |
|
"loss": 0.4869, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.10045422613620758, |
|
"rewards/margins": 0.049433451145887375, |
|
"rewards/rejected": 0.051020748913288116, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0055007934570312, |
|
"logits/rejected": -1.997157335281372, |
|
"logps/chosen": -32.192996978759766, |
|
"logps/rejected": -32.01173400878906, |
|
"loss": 0.491, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.10076460987329483, |
|
"rewards/margins": 0.04085635766386986, |
|
"rewards/rejected": 0.05990824103355408, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.034147262573242, |
|
"logits/rejected": -2.026184558868408, |
|
"logps/chosen": -30.194936752319336, |
|
"logps/rejected": -31.905689239501953, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.11231068521738052, |
|
"rewards/margins": 0.05338172987103462, |
|
"rewards/rejected": 0.0589289590716362, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9643388986587524, |
|
"logits/rejected": -1.9745395183563232, |
|
"logps/chosen": -31.08599853515625, |
|
"logps/rejected": -32.422943115234375, |
|
"loss": 0.4795, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.13346607983112335, |
|
"rewards/margins": 0.08869143575429916, |
|
"rewards/rejected": 0.044774629175662994, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8776963949203491, |
|
"logits/rejected": -1.8788686990737915, |
|
"logps/chosen": -33.690345764160156, |
|
"logps/rejected": -34.572776794433594, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.20093801617622375, |
|
"rewards/margins": 0.12714678049087524, |
|
"rewards/rejected": 0.07379122078418732, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9286582469940186, |
|
"logits/rejected": -1.9253017902374268, |
|
"logps/chosen": -35.773475646972656, |
|
"logps/rejected": -32.47566604614258, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.15928010642528534, |
|
"rewards/margins": 0.06044477969408035, |
|
"rewards/rejected": 0.09883531928062439, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0298352241516113, |
|
"logits/rejected": -2.0225348472595215, |
|
"logps/chosen": -33.22509002685547, |
|
"logps/rejected": -31.19403648376465, |
|
"loss": 0.4603, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.22910937666893005, |
|
"rewards/margins": 0.165984109044075, |
|
"rewards/rejected": 0.06312531232833862, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.036612033843994, |
|
"logits/rejected": -2.041813373565674, |
|
"logps/chosen": -31.960119247436523, |
|
"logps/rejected": -32.171165466308594, |
|
"loss": 0.4734, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.23386716842651367, |
|
"rewards/margins": 0.1082783117890358, |
|
"rewards/rejected": 0.12558886408805847, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.037198066711426, |
|
"logits/rejected": -2.0344595909118652, |
|
"logps/chosen": -31.026615142822266, |
|
"logps/rejected": -31.082998275756836, |
|
"loss": 0.4781, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.184935063123703, |
|
"rewards/margins": 0.09084881842136383, |
|
"rewards/rejected": 0.09408621490001678, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.908818006515503, |
|
"logits/rejected": -1.913496732711792, |
|
"logps/chosen": -31.075199127197266, |
|
"logps/rejected": -32.616241455078125, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2124340832233429, |
|
"rewards/margins": 0.14255891740322113, |
|
"rewards/rejected": 0.06987515836954117, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.23366379737854, |
|
"eval_logits/rejected": -2.2288384437561035, |
|
"eval_logps/chosen": -33.779903411865234, |
|
"eval_logps/rejected": -37.29892349243164, |
|
"eval_loss": 0.4965229630470276, |
|
"eval_rewards/accuracies": 0.545265793800354, |
|
"eval_rewards/chosen": 0.10185908526182175, |
|
"eval_rewards/margins": 0.014780867844820023, |
|
"eval_rewards/rejected": 0.08707821369171143, |
|
"eval_runtime": 145.8117, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0208163261413574, |
|
"logits/rejected": -2.0314111709594727, |
|
"logps/chosen": -31.514019012451172, |
|
"logps/rejected": -33.690643310546875, |
|
"loss": 0.4728, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.18291443586349487, |
|
"rewards/margins": 0.114091657102108, |
|
"rewards/rejected": 0.06882277131080627, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9141031503677368, |
|
"logits/rejected": -1.9288082122802734, |
|
"logps/chosen": -29.5712947845459, |
|
"logps/rejected": -31.429983139038086, |
|
"loss": 0.4615, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2160976380109787, |
|
"rewards/margins": 0.16482076048851013, |
|
"rewards/rejected": 0.05127686262130737, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9712814092636108, |
|
"logits/rejected": -1.975285291671753, |
|
"logps/chosen": -32.82429885864258, |
|
"logps/rejected": -31.416866302490234, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.23726816475391388, |
|
"rewards/margins": 0.18429425358772278, |
|
"rewards/rejected": 0.052973903715610504, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.969780683517456, |
|
"logits/rejected": -1.9480432271957397, |
|
"logps/chosen": -33.583518981933594, |
|
"logps/rejected": -34.8461799621582, |
|
"loss": 0.4567, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.22809162735939026, |
|
"rewards/margins": 0.18624703586101532, |
|
"rewards/rejected": 0.04184458404779434, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.0109124183654785, |
|
"logits/rejected": -2.0076212882995605, |
|
"logps/chosen": -32.46331024169922, |
|
"logps/rejected": -35.97381591796875, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.18470348417758942, |
|
"rewards/margins": 0.08426074683666229, |
|
"rewards/rejected": 0.10044274479150772, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.878321647644043, |
|
"logits/rejected": -1.8758872747421265, |
|
"logps/chosen": -33.721397399902344, |
|
"logps/rejected": -35.270362854003906, |
|
"loss": 0.4785, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1868000328540802, |
|
"rewards/margins": 0.09309660643339157, |
|
"rewards/rejected": 0.09370341151952744, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8631956577301025, |
|
"logits/rejected": -1.8606828451156616, |
|
"logps/chosen": -33.90094757080078, |
|
"logps/rejected": -31.57466697692871, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.198659747838974, |
|
"rewards/margins": 0.11999478191137314, |
|
"rewards/rejected": 0.07866497337818146, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9663877487182617, |
|
"logits/rejected": -1.9559704065322876, |
|
"logps/chosen": -34.72657775878906, |
|
"logps/rejected": -31.63601303100586, |
|
"loss": 0.4585, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.25202488899230957, |
|
"rewards/margins": 0.17003390192985535, |
|
"rewards/rejected": 0.08199100196361542, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.062107801437378, |
|
"logits/rejected": -2.0472733974456787, |
|
"logps/chosen": -30.40212059020996, |
|
"logps/rejected": -32.340721130371094, |
|
"loss": 0.4799, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2088027447462082, |
|
"rewards/margins": 0.08435753732919693, |
|
"rewards/rejected": 0.12444518506526947, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9330482482910156, |
|
"logits/rejected": -1.9305979013442993, |
|
"logps/chosen": -32.06965255737305, |
|
"logps/rejected": -30.65035629272461, |
|
"loss": 0.4349, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.3429831862449646, |
|
"rewards/margins": 0.27640262246131897, |
|
"rewards/rejected": 0.06658058613538742, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2318336963653564, |
|
"eval_logits/rejected": -2.2270052433013916, |
|
"eval_logps/chosen": -33.749427795410156, |
|
"eval_logps/rejected": -37.27041244506836, |
|
"eval_loss": 0.4961945414543152, |
|
"eval_rewards/accuracies": 0.5357142686843872, |
|
"eval_rewards/chosen": 0.11405016481876373, |
|
"eval_rewards/margins": 0.015567691065371037, |
|
"eval_rewards/rejected": 0.09848246723413467, |
|
"eval_runtime": 145.5479, |
|
"eval_samples_per_second": 2.357, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9179494380950928, |
|
"logits/rejected": -1.9148216247558594, |
|
"logps/chosen": -31.037755966186523, |
|
"logps/rejected": -33.56406021118164, |
|
"loss": 0.4639, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.22623713314533234, |
|
"rewards/margins": 0.15500028431415558, |
|
"rewards/rejected": 0.07123686373233795, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9682966470718384, |
|
"logits/rejected": -1.9561439752578735, |
|
"logps/chosen": -34.023921966552734, |
|
"logps/rejected": -33.415985107421875, |
|
"loss": 0.4575, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.22886168956756592, |
|
"rewards/margins": 0.17971986532211304, |
|
"rewards/rejected": 0.04914180561900139, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.003788709640503, |
|
"logits/rejected": -2.002437114715576, |
|
"logps/chosen": -32.8862419128418, |
|
"logps/rejected": -32.21629333496094, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.23982055485248566, |
|
"rewards/margins": 0.13450448215007782, |
|
"rewards/rejected": 0.10531606525182724, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0905163288116455, |
|
"logits/rejected": -2.0749027729034424, |
|
"logps/chosen": -33.47309112548828, |
|
"logps/rejected": -32.81334686279297, |
|
"loss": 0.4618, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2825208008289337, |
|
"rewards/margins": 0.16064420342445374, |
|
"rewards/rejected": 0.12187659740447998, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9627025127410889, |
|
"logits/rejected": -1.9618685245513916, |
|
"logps/chosen": -32.528175354003906, |
|
"logps/rejected": -32.22235107421875, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.30156710743904114, |
|
"rewards/margins": 0.20444798469543457, |
|
"rewards/rejected": 0.09711913019418716, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.918320655822754, |
|
"logits/rejected": -1.9285932779312134, |
|
"logps/chosen": -31.606945037841797, |
|
"logps/rejected": -34.98893356323242, |
|
"loss": 0.4718, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.2390974760055542, |
|
"rewards/margins": 0.11916828155517578, |
|
"rewards/rejected": 0.11992917954921722, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.0581445693969727, |
|
"logits/rejected": -2.051628828048706, |
|
"logps/chosen": -33.017967224121094, |
|
"logps/rejected": -28.989696502685547, |
|
"loss": 0.4612, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.253601610660553, |
|
"rewards/margins": 0.15891048312187195, |
|
"rewards/rejected": 0.09469114243984222, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9176830053329468, |
|
"logits/rejected": -1.919847846031189, |
|
"logps/chosen": -33.614540100097656, |
|
"logps/rejected": -30.760555267333984, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.24980488419532776, |
|
"rewards/margins": 0.2049761563539505, |
|
"rewards/rejected": 0.04482869431376457, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4751699732495593, |
|
"train_runtime": 3253.0203, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|