|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9997996125843297, |
|
"eval_steps": 100, |
|
"global_step": 3742, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.28125, |
|
"learning_rate": 1.3333333333333334e-08, |
|
"logits/chosen": -3.025045394897461, |
|
"logits/rejected": -2.9554684162139893, |
|
"logps/chosen": -203.51824951171875, |
|
"logps/rejected": -217.89437866210938, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 1.3333333333333336e-07, |
|
"logits/chosen": -2.852483034133911, |
|
"logits/rejected": -2.5924882888793945, |
|
"logps/chosen": -203.398681640625, |
|
"logps/rejected": -200.5269012451172, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.00020951780606992543, |
|
"rewards/margins": 0.0005117396358400583, |
|
"rewards/rejected": -0.0003022218297701329, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 2.666666666666667e-07, |
|
"logits/chosen": -2.891592025756836, |
|
"logits/rejected": -2.6086668968200684, |
|
"logps/chosen": -196.91787719726562, |
|
"logps/rejected": -190.93399047851562, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.00016576508642174304, |
|
"rewards/margins": 0.0010707227047532797, |
|
"rewards/rejected": -0.001236487878486514, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.25, |
|
"learning_rate": 4.0000000000000003e-07, |
|
"logits/chosen": -2.8247406482696533, |
|
"logits/rejected": -2.615675449371338, |
|
"logps/chosen": -184.14871215820312, |
|
"logps/rejected": -184.94290161132812, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.0007558080833405256, |
|
"rewards/margins": 0.0030627017840743065, |
|
"rewards/rejected": -0.0038185096345841885, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 5.333333333333335e-07, |
|
"logits/chosen": -2.7942700386047363, |
|
"logits/rejected": -2.5588576793670654, |
|
"logps/chosen": -190.18087768554688, |
|
"logps/rejected": -184.86978149414062, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.0013035403098911047, |
|
"rewards/margins": 0.006345006637275219, |
|
"rewards/rejected": -0.007648547179996967, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 6.666666666666667e-07, |
|
"logits/chosen": -2.7790920734405518, |
|
"logits/rejected": -2.594180107116699, |
|
"logps/chosen": -195.8500213623047, |
|
"logps/rejected": -197.25079345703125, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.001157461549155414, |
|
"rewards/margins": 0.01313072256743908, |
|
"rewards/rejected": -0.011973262764513493, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 8.000000000000001e-07, |
|
"logits/chosen": -2.7710585594177246, |
|
"logits/rejected": -2.5242323875427246, |
|
"logps/chosen": -185.81253051757812, |
|
"logps/rejected": -195.85052490234375, |
|
"loss": 0.6806, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": 0.009653949178755283, |
|
"rewards/margins": 0.025605550035834312, |
|
"rewards/rejected": -0.015951603651046753, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 9.333333333333334e-07, |
|
"logits/chosen": -2.845823049545288, |
|
"logits/rejected": -2.7732534408569336, |
|
"logps/chosen": -185.01431274414062, |
|
"logps/rejected": -185.13381958007812, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.025843212381005287, |
|
"rewards/margins": 0.025248954072594643, |
|
"rewards/rejected": 0.0005942584248259664, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.066666666666667e-06, |
|
"logits/chosen": -2.7801637649536133, |
|
"logits/rejected": -2.580289840698242, |
|
"logps/chosen": -207.74093627929688, |
|
"logps/rejected": -203.61135864257812, |
|
"loss": 0.6783, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.03248121589422226, |
|
"rewards/margins": 0.030628155916929245, |
|
"rewards/rejected": 0.0018530559027567506, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 1.421875, |
|
"learning_rate": 1.2000000000000002e-06, |
|
"logits/chosen": -2.8460533618927, |
|
"logits/rejected": -2.6127703189849854, |
|
"logps/chosen": -184.6515655517578, |
|
"logps/rejected": -189.5701446533203, |
|
"loss": 0.6742, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.015769537538290024, |
|
"rewards/margins": 0.03935455530881882, |
|
"rewards/rejected": -0.023585019633173943, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.6484375, |
|
"learning_rate": 1.3333333333333334e-06, |
|
"logits/chosen": -2.798549175262451, |
|
"logits/rejected": -2.63997220993042, |
|
"logps/chosen": -186.33349609375, |
|
"logps/rejected": -193.92739868164062, |
|
"loss": 0.6698, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": 0.00546322762966156, |
|
"rewards/margins": 0.048379648476839066, |
|
"rewards/rejected": -0.042916424572467804, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"eval_logits/chosen": -2.69581937789917, |
|
"eval_logits/rejected": -2.562171220779419, |
|
"eval_logps/chosen": -288.78289794921875, |
|
"eval_logps/rejected": -267.54718017578125, |
|
"eval_loss": 0.6901289224624634, |
|
"eval_rewards/accuracies": 0.5625, |
|
"eval_rewards/chosen": -0.03648632764816284, |
|
"eval_rewards/margins": 0.006536239292472601, |
|
"eval_rewards/rejected": -0.04302256554365158, |
|
"eval_runtime": 782.2962, |
|
"eval_samples_per_second": 2.557, |
|
"eval_steps_per_second": 0.32, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.3125, |
|
"learning_rate": 1.4666666666666669e-06, |
|
"logits/chosen": -2.820195198059082, |
|
"logits/rejected": -2.545072317123413, |
|
"logps/chosen": -190.32762145996094, |
|
"logps/rejected": -188.912353515625, |
|
"loss": 0.6649, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00024031568318605423, |
|
"rewards/margins": 0.05883366987109184, |
|
"rewards/rejected": -0.05859335511922836, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.5625, |
|
"learning_rate": 1.6000000000000001e-06, |
|
"logits/chosen": -2.7726855278015137, |
|
"logits/rejected": -2.5571906566619873, |
|
"logps/chosen": -194.81582641601562, |
|
"logps/rejected": -202.47158813476562, |
|
"loss": 0.6592, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.427392509067431e-05, |
|
"rewards/margins": 0.07058823108673096, |
|
"rewards/rejected": -0.07065249979496002, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 1.7333333333333336e-06, |
|
"logits/chosen": -2.8007471561431885, |
|
"logits/rejected": -2.6735222339630127, |
|
"logps/chosen": -180.63816833496094, |
|
"logps/rejected": -196.3402099609375, |
|
"loss": 0.6591, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.001957343192771077, |
|
"rewards/margins": 0.07113702595233917, |
|
"rewards/rejected": -0.07309436798095703, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 1.8666666666666669e-06, |
|
"logits/chosen": -2.764997959136963, |
|
"logits/rejected": -2.5524096488952637, |
|
"logps/chosen": -184.10682678222656, |
|
"logps/rejected": -201.29014587402344, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.01610555127263069, |
|
"rewards/margins": 0.0830421894788742, |
|
"rewards/rejected": -0.06693664193153381, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.3359375, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"logits/chosen": -2.812412738800049, |
|
"logits/rejected": -2.6014838218688965, |
|
"logps/chosen": -193.41714477539062, |
|
"logps/rejected": -208.3372344970703, |
|
"loss": 0.6436, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.01172459963709116, |
|
"rewards/margins": 0.10551564395427704, |
|
"rewards/rejected": -0.11724023520946503, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 1.6640625, |
|
"learning_rate": 2.133333333333334e-06, |
|
"logits/chosen": -2.8185415267944336, |
|
"logits/rejected": -2.615097761154175, |
|
"logps/chosen": -195.69102478027344, |
|
"logps/rejected": -213.40408325195312, |
|
"loss": 0.6294, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.03036859631538391, |
|
"rewards/margins": 0.13596105575561523, |
|
"rewards/rejected": -0.16632965207099915, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.75, |
|
"learning_rate": 2.266666666666667e-06, |
|
"logits/chosen": -2.826056957244873, |
|
"logits/rejected": -2.6662731170654297, |
|
"logps/chosen": -205.1206512451172, |
|
"logps/rejected": -218.390869140625, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": 0.005031009670346975, |
|
"rewards/margins": 0.1644003987312317, |
|
"rewards/rejected": -0.15936937928199768, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 2.4000000000000003e-06, |
|
"logits/chosen": -2.845885753631592, |
|
"logits/rejected": -2.6642587184906006, |
|
"logps/chosen": -207.2623748779297, |
|
"logps/rejected": -219.3222198486328, |
|
"loss": 0.621, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06885858625173569, |
|
"rewards/margins": 0.15684355795383453, |
|
"rewards/rejected": -0.22570213675498962, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.9921875, |
|
"learning_rate": 2.5333333333333338e-06, |
|
"logits/chosen": -2.8301947116851807, |
|
"logits/rejected": -2.6005446910858154, |
|
"logps/chosen": -194.04434204101562, |
|
"logps/rejected": -226.3549346923828, |
|
"loss": 0.6042, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.010565501637756824, |
|
"rewards/margins": 0.1950242817401886, |
|
"rewards/rejected": -0.20558981597423553, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.90625, |
|
"learning_rate": 2.666666666666667e-06, |
|
"logits/chosen": -2.886862277984619, |
|
"logits/rejected": -2.6888222694396973, |
|
"logps/chosen": -198.06605529785156, |
|
"logps/rejected": -230.2741241455078, |
|
"loss": 0.5864, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.08909536898136139, |
|
"rewards/margins": 0.23902097344398499, |
|
"rewards/rejected": -0.32811635732650757, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"eval_logits/chosen": -2.721036434173584, |
|
"eval_logits/rejected": -2.583653211593628, |
|
"eval_logps/chosen": -300.270263671875, |
|
"eval_logps/rejected": -283.8880615234375, |
|
"eval_loss": 0.6711774468421936, |
|
"eval_rewards/accuracies": 0.5914999842643738, |
|
"eval_rewards/chosen": -0.15135958790779114, |
|
"eval_rewards/margins": 0.055071912705898285, |
|
"eval_rewards/rejected": -0.20643149316310883, |
|
"eval_runtime": 781.0761, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.32, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 2.8000000000000003e-06, |
|
"logits/chosen": -2.8395516872406006, |
|
"logits/rejected": -2.6661925315856934, |
|
"logps/chosen": -191.8214874267578, |
|
"logps/rejected": -225.88900756835938, |
|
"loss": 0.5772, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.06861170381307602, |
|
"rewards/margins": 0.2652565836906433, |
|
"rewards/rejected": -0.3338682949542999, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.9453125, |
|
"learning_rate": 2.9333333333333338e-06, |
|
"logits/chosen": -2.861643075942993, |
|
"logits/rejected": -2.617266893386841, |
|
"logps/chosen": -202.16476440429688, |
|
"logps/rejected": -233.1599578857422, |
|
"loss": 0.5759, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -0.09456236660480499, |
|
"rewards/margins": 0.27134519815444946, |
|
"rewards/rejected": -0.36590754985809326, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 3.066666666666667e-06, |
|
"logits/chosen": -2.8064911365509033, |
|
"logits/rejected": -2.6874587535858154, |
|
"logps/chosen": -211.124267578125, |
|
"logps/rejected": -253.9136962890625, |
|
"loss": 0.5572, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.11212892830371857, |
|
"rewards/margins": 0.3169172704219818, |
|
"rewards/rejected": -0.4290461540222168, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 3.2000000000000003e-06, |
|
"logits/chosen": -2.8507437705993652, |
|
"logits/rejected": -2.614583969116211, |
|
"logps/chosen": -207.95834350585938, |
|
"logps/rejected": -242.54525756835938, |
|
"loss": 0.5409, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.1064368262887001, |
|
"rewards/margins": 0.3595541715621948, |
|
"rewards/rejected": -0.46599096059799194, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 3.375, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -2.758396625518799, |
|
"logits/rejected": -2.6282601356506348, |
|
"logps/chosen": -205.42391967773438, |
|
"logps/rejected": -261.75799560546875, |
|
"loss": 0.5237, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.1666347235441208, |
|
"rewards/margins": 0.413015216588974, |
|
"rewards/rejected": -0.5796499252319336, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 3.4666666666666672e-06, |
|
"logits/chosen": -2.8751864433288574, |
|
"logits/rejected": -2.75618052482605, |
|
"logps/chosen": -206.491943359375, |
|
"logps/rejected": -259.9626770019531, |
|
"loss": 0.5182, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.12468788772821426, |
|
"rewards/margins": 0.4317372739315033, |
|
"rewards/rejected": -0.556425154209137, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 2.21875, |
|
"learning_rate": 3.6000000000000003e-06, |
|
"logits/chosen": -2.9339048862457275, |
|
"logits/rejected": -2.759460210800171, |
|
"logps/chosen": -219.48843383789062, |
|
"logps/rejected": -272.0813293457031, |
|
"loss": 0.4921, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.20950379967689514, |
|
"rewards/margins": 0.4998226761817932, |
|
"rewards/rejected": -0.709326446056366, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 3.7333333333333337e-06, |
|
"logits/chosen": -2.836683511734009, |
|
"logits/rejected": -2.7271854877471924, |
|
"logps/chosen": -229.08169555664062, |
|
"logps/rejected": -287.5020446777344, |
|
"loss": 0.4868, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.2740945518016815, |
|
"rewards/margins": 0.520361065864563, |
|
"rewards/rejected": -0.7944557070732117, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 3.866666666666667e-06, |
|
"logits/chosen": -2.8678526878356934, |
|
"logits/rejected": -2.572840690612793, |
|
"logps/chosen": -224.20431518554688, |
|
"logps/rejected": -272.62664794921875, |
|
"loss": 0.4813, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -0.2870492935180664, |
|
"rewards/margins": 0.539145827293396, |
|
"rewards/rejected": -0.8261950612068176, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 4.000000000000001e-06, |
|
"logits/chosen": -2.9098057746887207, |
|
"logits/rejected": -2.6796698570251465, |
|
"logps/chosen": -236.15304565429688, |
|
"logps/rejected": -302.85540771484375, |
|
"loss": 0.4604, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -0.3245059847831726, |
|
"rewards/margins": 0.5988339185714722, |
|
"rewards/rejected": -0.9233399629592896, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"eval_logits/chosen": -2.7866616249084473, |
|
"eval_logits/rejected": -2.646491050720215, |
|
"eval_logps/chosen": -319.6733703613281, |
|
"eval_logps/rejected": -309.0577087402344, |
|
"eval_loss": 0.6634631752967834, |
|
"eval_rewards/accuracies": 0.5755000114440918, |
|
"eval_rewards/chosen": -0.34539124369621277, |
|
"eval_rewards/margins": 0.11273663491010666, |
|
"eval_rewards/rejected": -0.45812782645225525, |
|
"eval_runtime": 781.0865, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.32, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.133333333333333e-06, |
|
"logits/chosen": -2.762162446975708, |
|
"logits/rejected": -2.5808608531951904, |
|
"logps/chosen": -220.88076782226562, |
|
"logps/rejected": -284.20111083984375, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.34411242604255676, |
|
"rewards/margins": 0.6413618922233582, |
|
"rewards/rejected": -0.9854742884635925, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 2.921875, |
|
"learning_rate": 4.266666666666668e-06, |
|
"logits/chosen": -2.8597700595855713, |
|
"logits/rejected": -2.6923470497131348, |
|
"logps/chosen": -216.56784057617188, |
|
"logps/rejected": -292.9040222167969, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.37289419770240784, |
|
"rewards/margins": 0.6711525917053223, |
|
"rewards/rejected": -1.0440467596054077, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.40625, |
|
"learning_rate": 4.4e-06, |
|
"logits/chosen": -2.923088312149048, |
|
"logits/rejected": -2.7437281608581543, |
|
"logps/chosen": -242.5861358642578, |
|
"logps/rejected": -329.2769775390625, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5358756184577942, |
|
"rewards/margins": 0.750207781791687, |
|
"rewards/rejected": -1.286083459854126, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 4.533333333333334e-06, |
|
"logits/chosen": -2.840907096862793, |
|
"logits/rejected": -2.619574785232544, |
|
"logps/chosen": -250.60830688476562, |
|
"logps/rejected": -342.74652099609375, |
|
"loss": 0.417, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -0.5628878474235535, |
|
"rewards/margins": 0.7784547805786133, |
|
"rewards/rejected": -1.3413426876068115, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 3.875, |
|
"learning_rate": 4.666666666666667e-06, |
|
"logits/chosen": -2.8517518043518066, |
|
"logits/rejected": -2.663865089416504, |
|
"logps/chosen": -267.8255920410156, |
|
"logps/rejected": -348.36627197265625, |
|
"loss": 0.4245, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7652944922447205, |
|
"rewards/margins": 0.7990447282791138, |
|
"rewards/rejected": -1.5643391609191895, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.800000000000001e-06, |
|
"logits/chosen": -2.710493564605713, |
|
"logits/rejected": -2.574153184890747, |
|
"logps/chosen": -266.52459716796875, |
|
"logps/rejected": -371.8028259277344, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.861789345741272, |
|
"rewards/margins": 0.9031310081481934, |
|
"rewards/rejected": -1.7649204730987549, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 4.933333333333334e-06, |
|
"logits/chosen": -2.7110581398010254, |
|
"logits/rejected": -2.490288019180298, |
|
"logps/chosen": -316.9415283203125, |
|
"logps/rejected": -405.4663391113281, |
|
"loss": 0.4073, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -1.1048616170883179, |
|
"rewards/margins": 0.959460437297821, |
|
"rewards/rejected": -2.064321994781494, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 4.999972794121976e-06, |
|
"logits/chosen": -2.7278122901916504, |
|
"logits/rejected": -2.473001003265381, |
|
"logps/chosen": -306.61614990234375, |
|
"logps/rejected": -403.7631530761719, |
|
"loss": 0.3723, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.060858964920044, |
|
"rewards/margins": 1.0833942890167236, |
|
"rewards/rejected": -2.1442532539367676, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 6.625, |
|
"learning_rate": 4.999755150650535e-06, |
|
"logits/chosen": -2.621072292327881, |
|
"logits/rejected": -2.448957920074463, |
|
"logps/chosen": -318.9244384765625, |
|
"logps/rejected": -465.15625, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -1.2451636791229248, |
|
"rewards/margins": 1.3955293893814087, |
|
"rewards/rejected": -2.640693187713623, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 10.4375, |
|
"learning_rate": 4.99931988265533e-06, |
|
"logits/chosen": -2.5788636207580566, |
|
"logits/rejected": -2.3275580406188965, |
|
"logps/chosen": -312.1104431152344, |
|
"logps/rejected": -457.56646728515625, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -1.3098253011703491, |
|
"rewards/margins": 1.4334557056427002, |
|
"rewards/rejected": -2.743281126022339, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"eval_logits/chosen": -2.465298652648926, |
|
"eval_logits/rejected": -2.3317997455596924, |
|
"eval_logps/chosen": -425.34393310546875, |
|
"eval_logps/rejected": -439.54949951171875, |
|
"eval_loss": 0.6736099720001221, |
|
"eval_rewards/accuracies": 0.5864999890327454, |
|
"eval_rewards/chosen": -1.4020962715148926, |
|
"eval_rewards/margins": 0.36094897985458374, |
|
"eval_rewards/rejected": -1.7630454301834106, |
|
"eval_runtime": 780.8961, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.32, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 3.34375, |
|
"learning_rate": 4.998667028030071e-06, |
|
"logits/chosen": -2.462109327316284, |
|
"logits/rejected": -2.3724803924560547, |
|
"logps/chosen": -334.2771911621094, |
|
"logps/rejected": -479.7632751464844, |
|
"loss": 0.351, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -1.4377202987670898, |
|
"rewards/margins": 1.3972641229629517, |
|
"rewards/rejected": -2.834984540939331, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 11.875, |
|
"learning_rate": 4.997796643611192e-06, |
|
"logits/chosen": -2.428496837615967, |
|
"logits/rejected": -2.3284060955047607, |
|
"logps/chosen": -304.59942626953125, |
|
"logps/rejected": -476.5513610839844, |
|
"loss": 0.3337, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.3155182600021362, |
|
"rewards/margins": 1.556190013885498, |
|
"rewards/rejected": -2.871708393096924, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 4.9967088051729154e-06, |
|
"logits/chosen": -2.4595181941986084, |
|
"logits/rejected": -2.2439799308776855, |
|
"logps/chosen": -356.4442443847656, |
|
"logps/rejected": -502.91497802734375, |
|
"loss": 0.3412, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -1.5988712310791016, |
|
"rewards/margins": 1.4717421531677246, |
|
"rewards/rejected": -3.070613384246826, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.995403607420644e-06, |
|
"logits/chosen": -2.4543726444244385, |
|
"logits/rejected": -2.2793850898742676, |
|
"logps/chosen": -318.9652404785156, |
|
"logps/rejected": -472.81903076171875, |
|
"loss": 0.3197, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.3662667274475098, |
|
"rewards/margins": 1.52040696144104, |
|
"rewards/rejected": -2.8866734504699707, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 3.609375, |
|
"learning_rate": 4.993881163982721e-06, |
|
"logits/chosen": -2.369863271713257, |
|
"logits/rejected": -2.2286698818206787, |
|
"logps/chosen": -356.9123840332031, |
|
"logps/rejected": -509.55908203125, |
|
"loss": 0.3459, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -1.6083195209503174, |
|
"rewards/margins": 1.4843461513519287, |
|
"rewards/rejected": -3.092665672302246, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 4.992141607400541e-06, |
|
"logits/chosen": -2.3464982509613037, |
|
"logits/rejected": -2.1806864738464355, |
|
"logps/chosen": -380.472900390625, |
|
"logps/rejected": -534.5485229492188, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -1.8383054733276367, |
|
"rewards/margins": 1.5852895975112915, |
|
"rewards/rejected": -3.4235949516296387, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.990185089117005e-06, |
|
"logits/chosen": -2.3217694759368896, |
|
"logits/rejected": -2.139650821685791, |
|
"logps/chosen": -352.61322021484375, |
|
"logps/rejected": -521.1868896484375, |
|
"loss": 0.315, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4914997816085815, |
|
"rewards/margins": 1.6338955163955688, |
|
"rewards/rejected": -3.1253952980041504, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 4.988011779463336e-06, |
|
"logits/chosen": -2.3029465675354004, |
|
"logits/rejected": -2.1867263317108154, |
|
"logps/chosen": -386.57373046875, |
|
"logps/rejected": -545.6253662109375, |
|
"loss": 0.3282, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.977741003036499, |
|
"rewards/margins": 1.5777519941329956, |
|
"rewards/rejected": -3.555493116378784, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 4.375, |
|
"learning_rate": 4.98562186764426e-06, |
|
"logits/chosen": -2.2591264247894287, |
|
"logits/rejected": -2.1315784454345703, |
|
"logps/chosen": -339.3611145019531, |
|
"logps/rejected": -513.1484375, |
|
"loss": 0.297, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -1.6218554973602295, |
|
"rewards/margins": 1.7187793254852295, |
|
"rewards/rejected": -3.340634822845459, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 5.25, |
|
"learning_rate": 4.983015561721522e-06, |
|
"logits/chosen": -2.2526965141296387, |
|
"logits/rejected": -2.069826602935791, |
|
"logps/chosen": -320.3377990722656, |
|
"logps/rejected": -532.88623046875, |
|
"loss": 0.2403, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -1.4934319257736206, |
|
"rewards/margins": 1.9852195978164673, |
|
"rewards/rejected": -3.478651523590088, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"eval_logits/chosen": -2.168368339538574, |
|
"eval_logits/rejected": -2.045610189437866, |
|
"eval_logps/chosen": -513.8699340820312, |
|
"eval_logps/rejected": -537.7081298828125, |
|
"eval_loss": 0.6997026205062866, |
|
"eval_rewards/accuracies": 0.5985000133514404, |
|
"eval_rewards/chosen": -2.2873566150665283, |
|
"eval_rewards/margins": 0.4572749435901642, |
|
"eval_rewards/rejected": -2.74463152885437, |
|
"eval_runtime": 781.5275, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 6.8125, |
|
"learning_rate": 4.980193088595777e-06, |
|
"logits/chosen": -2.1158764362335205, |
|
"logits/rejected": -2.001831531524658, |
|
"logps/chosen": -453.5465393066406, |
|
"logps/rejected": -606.9456787109375, |
|
"loss": 0.3288, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.537163257598877, |
|
"rewards/margins": 1.5891704559326172, |
|
"rewards/rejected": -4.126333713531494, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.640625, |
|
"learning_rate": 4.977154693986841e-06, |
|
"logits/chosen": -2.253612518310547, |
|
"logits/rejected": -2.078402042388916, |
|
"logps/chosen": -348.5957946777344, |
|
"logps/rejected": -533.9927978515625, |
|
"loss": 0.2757, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -1.4598500728607178, |
|
"rewards/margins": 1.7905546426773071, |
|
"rewards/rejected": -3.2504043579101562, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 3.90625, |
|
"learning_rate": 4.97390064241229e-06, |
|
"logits/chosen": -2.1055116653442383, |
|
"logits/rejected": -1.9794048070907593, |
|
"logps/chosen": -391.5548095703125, |
|
"logps/rejected": -594.6146240234375, |
|
"loss": 0.2911, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.0448694229125977, |
|
"rewards/margins": 1.8874633312225342, |
|
"rewards/rejected": -3.9323325157165527, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 4.970431217164442e-06, |
|
"logits/chosen": -2.1032092571258545, |
|
"logits/rejected": -1.9064871072769165, |
|
"logps/chosen": -426.44989013671875, |
|
"logps/rejected": -619.9075317382812, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.313416004180908, |
|
"rewards/margins": 1.9491608142852783, |
|
"rewards/rejected": -4.262576580047607, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.71875, |
|
"learning_rate": 4.966746720285684e-06, |
|
"logits/chosen": -2.1194474697113037, |
|
"logits/rejected": -1.8741849660873413, |
|
"logps/chosen": -383.41705322265625, |
|
"logps/rejected": -557.76904296875, |
|
"loss": 0.3418, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -1.8038151264190674, |
|
"rewards/margins": 1.8368213176727295, |
|
"rewards/rejected": -3.640636444091797, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 4.9628474725421845e-06, |
|
"logits/chosen": -2.0428099632263184, |
|
"logits/rejected": -1.881706953048706, |
|
"logps/chosen": -403.2717590332031, |
|
"logps/rejected": -580.6181030273438, |
|
"loss": 0.3346, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -2.0810344219207764, |
|
"rewards/margins": 1.7879607677459717, |
|
"rewards/rejected": -3.868995189666748, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 4.958733813395963e-06, |
|
"logits/chosen": -2.0323638916015625, |
|
"logits/rejected": -1.907156229019165, |
|
"logps/chosen": -435.7867126464844, |
|
"logps/rejected": -609.8319091796875, |
|
"loss": 0.3164, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.3000435829162598, |
|
"rewards/margins": 1.7664451599121094, |
|
"rewards/rejected": -4.066488742828369, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 4.875, |
|
"learning_rate": 4.95440610097534e-06, |
|
"logits/chosen": -2.064138889312744, |
|
"logits/rejected": -1.9411983489990234, |
|
"logps/chosen": -364.3408508300781, |
|
"logps/rejected": -598.0901489257812, |
|
"loss": 0.2586, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.8342880010604858, |
|
"rewards/margins": 2.1939587593078613, |
|
"rewards/rejected": -4.028246879577637, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.949864712043756e-06, |
|
"logits/chosen": -2.042320728302002, |
|
"logits/rejected": -1.8681459426879883, |
|
"logps/chosen": -400.0122985839844, |
|
"logps/rejected": -617.6143798828125, |
|
"loss": 0.2548, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.086648464202881, |
|
"rewards/margins": 2.196746349334717, |
|
"rewards/rejected": -4.283394813537598, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 4.945110041966975e-06, |
|
"logits/chosen": -1.9855597019195557, |
|
"logits/rejected": -1.8242261409759521, |
|
"logps/chosen": -444.31317138671875, |
|
"logps/rejected": -660.8572387695312, |
|
"loss": 0.2586, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.530787944793701, |
|
"rewards/margins": 2.156179904937744, |
|
"rewards/rejected": -4.686967372894287, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"eval_logits/chosen": -2.0491983890533447, |
|
"eval_logits/rejected": -1.9324215650558472, |
|
"eval_logps/chosen": -524.0146484375, |
|
"eval_logps/rejected": -548.6314697265625, |
|
"eval_loss": 0.7062023878097534, |
|
"eval_rewards/accuracies": 0.5839999914169312, |
|
"eval_rewards/chosen": -2.388803720474243, |
|
"eval_rewards/margins": 0.4650622308254242, |
|
"eval_rewards/rejected": -2.8538658618927, |
|
"eval_runtime": 781.5672, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 8.125, |
|
"learning_rate": 4.940142504678662e-06, |
|
"logits/chosen": -2.0201022624969482, |
|
"logits/rejected": -1.893396019935608, |
|
"logps/chosen": -410.92913818359375, |
|
"logps/rejected": -636.2786865234375, |
|
"loss": 0.2489, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.2299957275390625, |
|
"rewards/margins": 2.2826168537139893, |
|
"rewards/rejected": -4.512612819671631, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 7.875, |
|
"learning_rate": 4.934962532644348e-06, |
|
"logits/chosen": -2.003357410430908, |
|
"logits/rejected": -1.861501693725586, |
|
"logps/chosen": -430.82904052734375, |
|
"logps/rejected": -698.9578857421875, |
|
"loss": 0.2217, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -2.4477930068969727, |
|
"rewards/margins": 2.599547863006592, |
|
"rewards/rejected": -5.0473408699035645, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.875, |
|
"learning_rate": 4.929570576823779e-06, |
|
"logits/chosen": -2.044196605682373, |
|
"logits/rejected": -1.859323263168335, |
|
"logps/chosen": -417.61865234375, |
|
"logps/rejected": -642.5675048828125, |
|
"loss": 0.2632, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.268167018890381, |
|
"rewards/margins": 2.251687526702881, |
|
"rewards/rejected": -4.519854545593262, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 4.923967106631655e-06, |
|
"logits/chosen": -2.0745182037353516, |
|
"logits/rejected": -1.884718894958496, |
|
"logps/chosen": -407.1151428222656, |
|
"logps/rejected": -658.7186889648438, |
|
"loss": 0.24, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -2.072746992111206, |
|
"rewards/margins": 2.464381456375122, |
|
"rewards/rejected": -4.537128448486328, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 13.0, |
|
"learning_rate": 4.918152609896768e-06, |
|
"logits/chosen": -2.031191110610962, |
|
"logits/rejected": -1.8076276779174805, |
|
"logps/chosen": -422.650390625, |
|
"logps/rejected": -628.3702392578125, |
|
"loss": 0.2986, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -2.175328493118286, |
|
"rewards/margins": 2.1270930767059326, |
|
"rewards/rejected": -4.3024210929870605, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.9121275928195265e-06, |
|
"logits/chosen": -1.9430030584335327, |
|
"logits/rejected": -1.8268959522247314, |
|
"logps/chosen": -450.4566345214844, |
|
"logps/rejected": -635.5111083984375, |
|
"loss": 0.382, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.5708768367767334, |
|
"rewards/margins": 1.800329566001892, |
|
"rewards/rejected": -4.371206283569336, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.9058925799278934e-06, |
|
"logits/chosen": -1.9249922037124634, |
|
"logits/rejected": -1.7653077840805054, |
|
"logps/chosen": -440.85693359375, |
|
"logps/rejected": -660.0525512695312, |
|
"loss": 0.2587, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.6279947757720947, |
|
"rewards/margins": 2.1633927822113037, |
|
"rewards/rejected": -4.791387557983398, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 4.899448114031714e-06, |
|
"logits/chosen": -1.9734159708023071, |
|
"logits/rejected": -1.7737067937850952, |
|
"logps/chosen": -416.80517578125, |
|
"logps/rejected": -651.2550659179688, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.3417036533355713, |
|
"rewards/margins": 2.3331680297851562, |
|
"rewards/rejected": -4.674871921539307, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.984375, |
|
"learning_rate": 4.892794756175467e-06, |
|
"logits/chosen": -1.8331539630889893, |
|
"logits/rejected": -1.6363424062728882, |
|
"logps/chosen": -449.67218017578125, |
|
"logps/rejected": -700.6417236328125, |
|
"loss": 0.2215, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.48677134513855, |
|
"rewards/margins": 2.510742664337158, |
|
"rewards/rejected": -4.997514247894287, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 4.885933085589416e-06, |
|
"logits/chosen": -1.9560575485229492, |
|
"logits/rejected": -1.7940680980682373, |
|
"logps/chosen": -468.181640625, |
|
"logps/rejected": -718.8138427734375, |
|
"loss": 0.2338, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -2.665666103363037, |
|
"rewards/margins": 2.501105546951294, |
|
"rewards/rejected": -5.166770935058594, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"eval_logits/chosen": -2.0428850650787354, |
|
"eval_logits/rejected": -1.9258164167404175, |
|
"eval_logps/chosen": -527.9253540039062, |
|
"eval_logps/rejected": -551.26611328125, |
|
"eval_loss": 0.707597553730011, |
|
"eval_rewards/accuracies": 0.5864999890327454, |
|
"eval_rewards/chosen": -2.4279117584228516, |
|
"eval_rewards/margins": 0.45229974389076233, |
|
"eval_rewards/rejected": -2.880211353302002, |
|
"eval_runtime": 781.2446, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 4.878863699639183e-06, |
|
"logits/chosen": -2.0451254844665527, |
|
"logits/rejected": -1.7745431661605835, |
|
"logps/chosen": -405.2158508300781, |
|
"logps/rejected": -666.8660888671875, |
|
"loss": 0.2089, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.268383502960205, |
|
"rewards/margins": 2.578892230987549, |
|
"rewards/rejected": -4.847275733947754, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 7.875, |
|
"learning_rate": 4.871587213773745e-06, |
|
"logits/chosen": -1.8383365869522095, |
|
"logits/rejected": -1.6933542490005493, |
|
"logps/chosen": -471.1405334472656, |
|
"logps/rejected": -719.7000122070312, |
|
"loss": 0.254, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.016746997833252, |
|
"rewards/margins": 2.3775877952575684, |
|
"rewards/rejected": -5.39433479309082, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 8.1875, |
|
"learning_rate": 4.864104261471856e-06, |
|
"logits/chosen": -2.0093419551849365, |
|
"logits/rejected": -1.8601839542388916, |
|
"logps/chosen": -440.412353515625, |
|
"logps/rejected": -686.5137939453125, |
|
"loss": 0.2582, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -2.4893579483032227, |
|
"rewards/margins": 2.3431363105773926, |
|
"rewards/rejected": -4.832494258880615, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.09375, |
|
"learning_rate": 4.856415494186888e-06, |
|
"logits/chosen": -1.8051137924194336, |
|
"logits/rejected": -1.6217536926269531, |
|
"logps/chosen": -465.5696716308594, |
|
"logps/rejected": -726.9296264648438, |
|
"loss": 0.2112, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -2.7774364948272705, |
|
"rewards/margins": 2.651355266571045, |
|
"rewards/rejected": -5.4287919998168945, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 2.140625, |
|
"learning_rate": 4.848521581290129e-06, |
|
"logits/chosen": -1.7539660930633545, |
|
"logits/rejected": -1.5488591194152832, |
|
"logps/chosen": -537.1699829101562, |
|
"logps/rejected": -836.3923950195312, |
|
"loss": 0.1996, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.470181941986084, |
|
"rewards/margins": 2.852477550506592, |
|
"rewards/rejected": -6.322659492492676, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 4.840423210012499e-06, |
|
"logits/chosen": -1.7398707866668701, |
|
"logits/rejected": -1.5924098491668701, |
|
"logps/chosen": -524.1094970703125, |
|
"logps/rejected": -829.4354248046875, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3822684288024902, |
|
"rewards/margins": 3.0030548572540283, |
|
"rewards/rejected": -6.385323524475098, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.832121085384726e-06, |
|
"logits/chosen": -1.686767339706421, |
|
"logits/rejected": -1.5091755390167236, |
|
"logps/chosen": -591.5139770507812, |
|
"logps/rejected": -932.99365234375, |
|
"loss": 0.2262, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.011205196380615, |
|
"rewards/margins": 3.2888877391815186, |
|
"rewards/rejected": -7.300093650817871, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 15.3125, |
|
"learning_rate": 4.823615930175965e-06, |
|
"logits/chosen": -1.840598702430725, |
|
"logits/rejected": -1.5934467315673828, |
|
"logps/chosen": -480.89990234375, |
|
"logps/rejected": -787.3369140625, |
|
"loss": 0.1987, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.085526943206787, |
|
"rewards/margins": 3.0567963123321533, |
|
"rewards/rejected": -6.1423234939575195, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 8.25, |
|
"learning_rate": 4.814908484830876e-06, |
|
"logits/chosen": -1.8566583395004272, |
|
"logits/rejected": -1.7526509761810303, |
|
"logps/chosen": -491.6114196777344, |
|
"logps/rejected": -752.9496459960938, |
|
"loss": 0.3075, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -2.904597520828247, |
|
"rewards/margins": 2.5675301551818848, |
|
"rewards/rejected": -5.4721269607543945, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 9.375, |
|
"learning_rate": 4.80599950740516e-06, |
|
"logits/chosen": -1.920092225074768, |
|
"logits/rejected": -1.7277867794036865, |
|
"logps/chosen": -479.88238525390625, |
|
"logps/rejected": -748.7023315429688, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.6948907375335693, |
|
"rewards/margins": 2.6764466762542725, |
|
"rewards/rejected": -5.371337413787842, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_logits/chosen": -1.9782260656356812, |
|
"eval_logits/rejected": -1.8626127243041992, |
|
"eval_logps/chosen": -610.1845092773438, |
|
"eval_logps/rejected": -641.7468872070312, |
|
"eval_loss": 0.7138827443122864, |
|
"eval_rewards/accuracies": 0.6029999852180481, |
|
"eval_rewards/chosen": -3.250502109527588, |
|
"eval_rewards/margins": 0.5345167517662048, |
|
"eval_rewards/rejected": -3.785019636154175, |
|
"eval_runtime": 781.4822, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.796889773499569e-06, |
|
"logits/chosen": -1.8404449224472046, |
|
"logits/rejected": -1.6686649322509766, |
|
"logps/chosen": -595.162109375, |
|
"logps/rejected": -937.796875, |
|
"loss": 0.2058, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.065727710723877, |
|
"rewards/margins": 3.215528964996338, |
|
"rewards/rejected": -7.281256198883057, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 4.787580076192377e-06, |
|
"logits/chosen": -1.7283341884613037, |
|
"logits/rejected": -1.5389282703399658, |
|
"logps/chosen": -565.5133056640625, |
|
"logps/rejected": -909.4611206054688, |
|
"loss": 0.1806, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.863267421722412, |
|
"rewards/margins": 3.289839506149292, |
|
"rewards/rejected": -7.153106689453125, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 4.77807122597034e-06, |
|
"logits/chosen": -1.8481029272079468, |
|
"logits/rejected": -1.607322096824646, |
|
"logps/chosen": -589.0106811523438, |
|
"logps/rejected": -942.8314208984375, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.9117865562438965, |
|
"rewards/margins": 3.432347536087036, |
|
"rewards/rejected": -7.3441338539123535, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 4.768364050658135e-06, |
|
"logits/chosen": -1.9006433486938477, |
|
"logits/rejected": -1.6561591625213623, |
|
"logps/chosen": -529.4610595703125, |
|
"logps/rejected": -857.2683715820312, |
|
"loss": 0.1928, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -3.359536647796631, |
|
"rewards/margins": 3.191103458404541, |
|
"rewards/rejected": -6.550640106201172, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 4.758459395346292e-06, |
|
"logits/chosen": -1.8754297494888306, |
|
"logits/rejected": -1.6810500621795654, |
|
"logps/chosen": -516.5537109375, |
|
"logps/rejected": -870.4509887695312, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -3.3872780799865723, |
|
"rewards/margins": 3.3747024536132812, |
|
"rewards/rejected": -6.761981010437012, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 4.748358122317621e-06, |
|
"logits/chosen": -1.7386939525604248, |
|
"logits/rejected": -1.560772180557251, |
|
"logps/chosen": -666.9801025390625, |
|
"logps/rejected": -998.2257080078125, |
|
"loss": 0.2537, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -4.832745552062988, |
|
"rewards/margins": 3.112260341644287, |
|
"rewards/rejected": -7.945005893707275, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 16.375, |
|
"learning_rate": 4.738061110972143e-06, |
|
"logits/chosen": -1.8336429595947266, |
|
"logits/rejected": -1.627294898033142, |
|
"logps/chosen": -668.6233520507812, |
|
"logps/rejected": -1004.8411865234375, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.7506890296936035, |
|
"rewards/margins": 3.425739288330078, |
|
"rewards/rejected": -8.176427841186523, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 11.875, |
|
"learning_rate": 4.727569257750531e-06, |
|
"logits/chosen": -1.9241821765899658, |
|
"logits/rejected": -1.7090564966201782, |
|
"logps/chosen": -580.5578002929688, |
|
"logps/rejected": -904.0485229492188, |
|
"loss": 0.2519, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.875545024871826, |
|
"rewards/margins": 3.183382511138916, |
|
"rewards/rejected": -7.0589280128479, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 4.71688347605607e-06, |
|
"logits/chosen": -2.0419352054595947, |
|
"logits/rejected": -1.7710177898406982, |
|
"logps/chosen": -566.7761840820312, |
|
"logps/rejected": -831.5400390625, |
|
"loss": 0.1916, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.699720859527588, |
|
"rewards/margins": 2.6933109760284424, |
|
"rewards/rejected": -6.393032073974609, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 5.25, |
|
"learning_rate": 4.70600469617513e-06, |
|
"logits/chosen": -1.9878368377685547, |
|
"logits/rejected": -1.7731959819793701, |
|
"logps/chosen": -579.6895141601562, |
|
"logps/rejected": -862.3414306640625, |
|
"loss": 0.2297, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.830165386199951, |
|
"rewards/margins": 2.8225326538085938, |
|
"rewards/rejected": -6.652697563171387, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"eval_logits/chosen": -2.071340799331665, |
|
"eval_logits/rejected": -1.9512320756912231, |
|
"eval_logps/chosen": -649.6268920898438, |
|
"eval_logps/rejected": -682.4832153320312, |
|
"eval_loss": 0.7276944518089294, |
|
"eval_rewards/accuracies": 0.6014999747276306, |
|
"eval_rewards/chosen": -3.644925355911255, |
|
"eval_rewards/margins": 0.5474573969841003, |
|
"eval_rewards/rejected": -4.1923828125, |
|
"eval_runtime": 780.6188, |
|
"eval_samples_per_second": 2.562, |
|
"eval_steps_per_second": 0.32, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 4.694933865196185e-06, |
|
"logits/chosen": -1.9098104238510132, |
|
"logits/rejected": -1.6890531778335571, |
|
"logps/chosen": -601.0123291015625, |
|
"logps/rejected": -1018.0811767578125, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.1650543212890625, |
|
"rewards/margins": 3.963716506958008, |
|
"rewards/rejected": -8.12877082824707, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 4.625, |
|
"learning_rate": 4.68367194692736e-06, |
|
"logits/chosen": -1.815717339515686, |
|
"logits/rejected": -1.5684750080108643, |
|
"logps/chosen": -765.4061279296875, |
|
"logps/rejected": -1324.0279541015625, |
|
"loss": 0.2069, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.8041791915893555, |
|
"rewards/margins": 5.39047384262085, |
|
"rewards/rejected": -11.194653511047363, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.75, |
|
"learning_rate": 4.672219921812517e-06, |
|
"logits/chosen": -1.815259337425232, |
|
"logits/rejected": -1.4773845672607422, |
|
"logps/chosen": -702.4598388671875, |
|
"logps/rejected": -1157.608642578125, |
|
"loss": 0.2329, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.984345436096191, |
|
"rewards/margins": 4.5979905128479, |
|
"rewards/rejected": -9.58233642578125, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 4.660578786845907e-06, |
|
"logits/chosen": -1.9987341165542603, |
|
"logits/rejected": -1.7102264165878296, |
|
"logps/chosen": -521.5107421875, |
|
"logps/rejected": -883.5695190429688, |
|
"loss": 0.1534, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.3418431282043457, |
|
"rewards/margins": 3.685710906982422, |
|
"rewards/rejected": -7.027552604675293, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 4.6487495554853706e-06, |
|
"logits/chosen": -1.8766582012176514, |
|
"logits/rejected": -1.727230429649353, |
|
"logps/chosen": -566.2430419921875, |
|
"logps/rejected": -948.5830078125, |
|
"loss": 0.2355, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.5930278301239014, |
|
"rewards/margins": 3.7260138988494873, |
|
"rewards/rejected": -7.319043159484863, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.5, |
|
"learning_rate": 4.636733257564104e-06, |
|
"logits/chosen": -1.8848927021026611, |
|
"logits/rejected": -1.6707156896591187, |
|
"logps/chosen": -633.5640869140625, |
|
"logps/rejected": -1011.9500122070312, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.319493770599365, |
|
"rewards/margins": 3.7195515632629395, |
|
"rewards/rejected": -8.039045333862305, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 14.625, |
|
"learning_rate": 4.6245309392010094e-06, |
|
"logits/chosen": -1.8968805074691772, |
|
"logits/rejected": -1.5086712837219238, |
|
"logps/chosen": -695.408935546875, |
|
"logps/rejected": -1168.6949462890625, |
|
"loss": 0.1625, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.129621982574463, |
|
"rewards/margins": 4.753883361816406, |
|
"rewards/rejected": -9.883504867553711, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.612143662709619e-06, |
|
"logits/chosen": -1.941301703453064, |
|
"logits/rejected": -1.7558329105377197, |
|
"logps/chosen": -655.1188354492188, |
|
"logps/rejected": -1162.034912109375, |
|
"loss": 0.1826, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.672366142272949, |
|
"rewards/margins": 4.921967506408691, |
|
"rewards/rejected": -9.59433364868164, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 15.0, |
|
"learning_rate": 4.599572506505611e-06, |
|
"logits/chosen": -2.061692476272583, |
|
"logits/rejected": -1.7294963598251343, |
|
"logps/chosen": -608.3771362304688, |
|
"logps/rejected": -967.8352661132812, |
|
"loss": 0.2038, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.9572815895080566, |
|
"rewards/margins": 3.729525089263916, |
|
"rewards/rejected": -7.686806678771973, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 4.586818565012925e-06, |
|
"logits/chosen": -1.9608147144317627, |
|
"logits/rejected": -1.8354988098144531, |
|
"logps/chosen": -583.6975708007812, |
|
"logps/rejected": -952.1292724609375, |
|
"loss": 0.1739, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.060886383056641, |
|
"rewards/margins": 3.516300678253174, |
|
"rewards/rejected": -7.577187538146973, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"eval_logits/chosen": -2.0702786445617676, |
|
"eval_logits/rejected": -1.9476977586746216, |
|
"eval_logps/chosen": -655.5535888671875, |
|
"eval_logps/rejected": -696.7918701171875, |
|
"eval_loss": 0.761256992816925, |
|
"eval_rewards/accuracies": 0.6010000109672546, |
|
"eval_rewards/chosen": -3.704192638397217, |
|
"eval_rewards/margins": 0.6312769055366516, |
|
"eval_rewards/rejected": -4.3354692459106445, |
|
"eval_runtime": 781.7978, |
|
"eval_samples_per_second": 2.558, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 19.125, |
|
"learning_rate": 4.573882948568487e-06, |
|
"logits/chosen": -1.999812126159668, |
|
"logits/rejected": -1.7216962575912476, |
|
"logps/chosen": -565.93505859375, |
|
"logps/rejected": -929.3225708007812, |
|
"loss": 0.2591, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.8413009643554688, |
|
"rewards/margins": 3.6320464611053467, |
|
"rewards/rejected": -7.4733476638793945, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 4.560766783325536e-06, |
|
"logits/chosen": -1.9002296924591064, |
|
"logits/rejected": -1.7296489477157593, |
|
"logps/chosen": -585.817626953125, |
|
"logps/rejected": -975.8126831054688, |
|
"loss": 0.2053, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.8914363384246826, |
|
"rewards/margins": 3.8831589221954346, |
|
"rewards/rejected": -7.774595737457275, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 16.875, |
|
"learning_rate": 4.547471211155595e-06, |
|
"logits/chosen": -1.9321715831756592, |
|
"logits/rejected": -1.7310463190078735, |
|
"logps/chosen": -688.9970092773438, |
|
"logps/rejected": -1087.14306640625, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.974576473236084, |
|
"rewards/margins": 3.8230443000793457, |
|
"rewards/rejected": -8.797619819641113, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 4.533997389549052e-06, |
|
"logits/chosen": -1.9757192134857178, |
|
"logits/rejected": -1.7459895610809326, |
|
"logps/chosen": -632.3358154296875, |
|
"logps/rejected": -1068.840576171875, |
|
"loss": 0.2295, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.492635250091553, |
|
"rewards/margins": 4.191953182220459, |
|
"rewards/rejected": -8.684588432312012, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 4.520346491514395e-06, |
|
"logits/chosen": -1.8071882724761963, |
|
"logits/rejected": -1.4430408477783203, |
|
"logps/chosen": -718.0726318359375, |
|
"logps/rejected": -1229.7724609375, |
|
"loss": 0.1538, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.237442970275879, |
|
"rewards/margins": 5.075723171234131, |
|
"rewards/rejected": -10.313165664672852, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.506519705476092e-06, |
|
"logits/chosen": -1.7625377178192139, |
|
"logits/rejected": -1.508211374282837, |
|
"logps/chosen": -698.8258056640625, |
|
"logps/rejected": -1272.889892578125, |
|
"loss": 0.187, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.130419731140137, |
|
"rewards/margins": 5.599501609802246, |
|
"rewards/rejected": -10.729921340942383, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 3.265625, |
|
"learning_rate": 4.4925182351711286e-06, |
|
"logits/chosen": -1.927294135093689, |
|
"logits/rejected": -1.6415202617645264, |
|
"logps/chosen": -584.6781005859375, |
|
"logps/rejected": -968.0924072265625, |
|
"loss": 0.2194, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.002772331237793, |
|
"rewards/margins": 3.7211735248565674, |
|
"rewards/rejected": -7.723946571350098, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 14.375, |
|
"learning_rate": 4.478343299544208e-06, |
|
"logits/chosen": -1.805546522140503, |
|
"logits/rejected": -1.6112562417984009, |
|
"logps/chosen": -647.7420043945312, |
|
"logps/rejected": -1060.8192138671875, |
|
"loss": 0.2179, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.759741306304932, |
|
"rewards/margins": 4.042403221130371, |
|
"rewards/rejected": -8.802144050598145, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 4.463996132641641e-06, |
|
"logits/chosen": -1.8722712993621826, |
|
"logits/rejected": -1.560368537902832, |
|
"logps/chosen": -722.08349609375, |
|
"logps/rejected": -1167.6322021484375, |
|
"loss": 0.162, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.170421600341797, |
|
"rewards/margins": 4.453159809112549, |
|
"rewards/rejected": -9.623581886291504, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 4.449477983503902e-06, |
|
"logits/chosen": -1.6484050750732422, |
|
"logits/rejected": -1.3613227605819702, |
|
"logps/chosen": -704.55322265625, |
|
"logps/rejected": -1220.0943603515625, |
|
"loss": 0.1868, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.208704948425293, |
|
"rewards/margins": 5.147915840148926, |
|
"rewards/rejected": -10.356620788574219, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -1.8779360055923462, |
|
"eval_logits/rejected": -1.7606240510940552, |
|
"eval_logps/chosen": -797.6035766601562, |
|
"eval_logps/rejected": -851.0315551757812, |
|
"eval_loss": 0.9360729455947876, |
|
"eval_rewards/accuracies": 0.5724999904632568, |
|
"eval_rewards/chosen": -5.124693393707275, |
|
"eval_rewards/margins": 0.753172755241394, |
|
"eval_rewards/rejected": -5.877865791320801, |
|
"eval_runtime": 780.1782, |
|
"eval_samples_per_second": 2.564, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 4.434790116056898e-06, |
|
"logits/chosen": -1.6855872869491577, |
|
"logits/rejected": -1.4614320993423462, |
|
"logps/chosen": -692.4788818359375, |
|
"logps/rejected": -1222.5704345703125, |
|
"loss": 0.2294, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.072499752044678, |
|
"rewards/margins": 5.230921745300293, |
|
"rewards/rejected": -10.303421974182129, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 2.625, |
|
"learning_rate": 4.419933809001929e-06, |
|
"logits/chosen": -1.776877760887146, |
|
"logits/rejected": -1.4783637523651123, |
|
"logps/chosen": -685.8599853515625, |
|
"logps/rejected": -1175.57958984375, |
|
"loss": 0.1559, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.9904985427856445, |
|
"rewards/margins": 4.894043922424316, |
|
"rewards/rejected": -9.884542465209961, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 13.125, |
|
"learning_rate": 4.404910355704362e-06, |
|
"logits/chosen": -1.676234483718872, |
|
"logits/rejected": -1.4960626363754272, |
|
"logps/chosen": -788.3824462890625, |
|
"logps/rejected": -1284.5242919921875, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.101700305938721, |
|
"rewards/margins": 4.996757507324219, |
|
"rewards/rejected": -11.098456382751465, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 9.125, |
|
"learning_rate": 4.389721064081045e-06, |
|
"logits/chosen": -1.748809814453125, |
|
"logits/rejected": -1.509686827659607, |
|
"logps/chosen": -748.4895629882812, |
|
"logps/rejected": -1260.5853271484375, |
|
"loss": 0.204, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.650696754455566, |
|
"rewards/margins": 5.0570387840271, |
|
"rewards/rejected": -10.707735061645508, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 2.3125, |
|
"learning_rate": 4.3743672564864305e-06, |
|
"logits/chosen": -1.7800718545913696, |
|
"logits/rejected": -1.5566697120666504, |
|
"logps/chosen": -759.5309448242188, |
|
"logps/rejected": -1315.963134765625, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.749941825866699, |
|
"rewards/margins": 5.49472713470459, |
|
"rewards/rejected": -11.244667053222656, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 13.75, |
|
"learning_rate": 4.358850269597458e-06, |
|
"logits/chosen": -1.9110691547393799, |
|
"logits/rejected": -1.7078921794891357, |
|
"logps/chosen": -694.5254516601562, |
|
"logps/rejected": -1189.4229736328125, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.015244960784912, |
|
"rewards/margins": 5.033026695251465, |
|
"rewards/rejected": -10.048272132873535, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 12.6875, |
|
"learning_rate": 4.343171454297187e-06, |
|
"logits/chosen": -1.8295984268188477, |
|
"logits/rejected": -1.617525339126587, |
|
"logps/chosen": -707.7969970703125, |
|
"logps/rejected": -1195.290771484375, |
|
"loss": 0.1967, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.0687055587768555, |
|
"rewards/margins": 4.745570182800293, |
|
"rewards/rejected": -9.814275741577148, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 8.875, |
|
"learning_rate": 4.3273321755571855e-06, |
|
"logits/chosen": -1.7773466110229492, |
|
"logits/rejected": -1.505315899848938, |
|
"logps/chosen": -755.8875732421875, |
|
"logps/rejected": -1358.144287109375, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.826977252960205, |
|
"rewards/margins": 5.8857316970825195, |
|
"rewards/rejected": -11.712709426879883, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 4.3113338123187046e-06, |
|
"logits/chosen": -1.7583072185516357, |
|
"logits/rejected": -1.4466537237167358, |
|
"logps/chosen": -786.8846435546875, |
|
"logps/rejected": -1387.0775146484375, |
|
"loss": 0.1455, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.128576755523682, |
|
"rewards/margins": 5.886734962463379, |
|
"rewards/rejected": -12.015311241149902, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 3.78125, |
|
"learning_rate": 4.295177757372627e-06, |
|
"logits/chosen": -1.9021990299224854, |
|
"logits/rejected": -1.6431891918182373, |
|
"logps/chosen": -681.1278076171875, |
|
"logps/rejected": -1244.9158935546875, |
|
"loss": 0.191, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.913496971130371, |
|
"rewards/margins": 5.573685646057129, |
|
"rewards/rejected": -10.487181663513184, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"eval_logits/chosen": -2.013472557067871, |
|
"eval_logits/rejected": -1.893463134765625, |
|
"eval_logps/chosen": -719.9283447265625, |
|
"eval_logps/rejected": -763.7403564453125, |
|
"eval_loss": 0.8649675250053406, |
|
"eval_rewards/accuracies": 0.578499972820282, |
|
"eval_rewards/chosen": -4.347940444946289, |
|
"eval_rewards/margins": 0.6570136547088623, |
|
"eval_rewards/rejected": -5.0049543380737305, |
|
"eval_runtime": 780.7054, |
|
"eval_samples_per_second": 2.562, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 2.6875, |
|
"learning_rate": 4.278865417238212e-06, |
|
"logits/chosen": -1.8427289724349976, |
|
"logits/rejected": -1.6436818838119507, |
|
"logps/chosen": -644.0859375, |
|
"logps/rejected": -1054.9425048828125, |
|
"loss": 0.188, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.5779314041137695, |
|
"rewards/margins": 4.159191608428955, |
|
"rewards/rejected": -8.737122535705566, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 2.359375, |
|
"learning_rate": 4.262398212040646e-06, |
|
"logits/chosen": -1.8668807744979858, |
|
"logits/rejected": -1.623335838317871, |
|
"logps/chosen": -723.8055419921875, |
|
"logps/rejected": -1369.9927978515625, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.331421375274658, |
|
"rewards/margins": 6.487430572509766, |
|
"rewards/rejected": -11.818851470947266, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 32.5, |
|
"learning_rate": 4.245777575387413e-06, |
|
"logits/chosen": -1.7328977584838867, |
|
"logits/rejected": -1.4244635105133057, |
|
"logps/chosen": -891.6882934570312, |
|
"logps/rejected": -1529.746337890625, |
|
"loss": 0.2673, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.042812347412109, |
|
"rewards/margins": 6.3449177742004395, |
|
"rewards/rejected": -13.387730598449707, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.5625, |
|
"learning_rate": 4.229004954243483e-06, |
|
"logits/chosen": -1.7528269290924072, |
|
"logits/rejected": -1.5056416988372803, |
|
"logps/chosen": -1028.7825927734375, |
|
"logps/rejected": -1734.1068115234375, |
|
"loss": 0.1938, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -8.365468978881836, |
|
"rewards/margins": 6.998432159423828, |
|
"rewards/rejected": -15.36390209197998, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 5.375, |
|
"learning_rate": 4.212081808805342e-06, |
|
"logits/chosen": -1.768991470336914, |
|
"logits/rejected": -1.4559440612792969, |
|
"logps/chosen": -1029.68505859375, |
|
"logps/rejected": -1800.233154296875, |
|
"loss": 0.158, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -8.380985260009766, |
|
"rewards/margins": 7.644388675689697, |
|
"rewards/rejected": -16.025373458862305, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 4.195009612373873e-06, |
|
"logits/chosen": -1.784402847290039, |
|
"logits/rejected": -1.4816844463348389, |
|
"logps/chosen": -856.8828125, |
|
"logps/rejected": -1528.76513671875, |
|
"loss": 0.2102, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.638609409332275, |
|
"rewards/margins": 6.666424751281738, |
|
"rewards/rejected": -13.305035591125488, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 11.25, |
|
"learning_rate": 4.177789851226086e-06, |
|
"logits/chosen": -1.8825385570526123, |
|
"logits/rejected": -1.5658035278320312, |
|
"logps/chosen": -762.8591918945312, |
|
"logps/rejected": -1407.0120849609375, |
|
"loss": 0.1716, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.705621719360352, |
|
"rewards/margins": 6.4548749923706055, |
|
"rewards/rejected": -12.160497665405273, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.25, |
|
"learning_rate": 4.160424024485734e-06, |
|
"logits/chosen": -1.794632911682129, |
|
"logits/rejected": -1.5640695095062256, |
|
"logps/chosen": -725.0952758789062, |
|
"logps/rejected": -1341.0850830078125, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.345667839050293, |
|
"rewards/margins": 6.119729995727539, |
|
"rewards/rejected": -11.465397834777832, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 4.1429136439927965e-06, |
|
"logits/chosen": -1.8779207468032837, |
|
"logits/rejected": -1.570647120475769, |
|
"logps/chosen": -688.0980224609375, |
|
"logps/rejected": -1310.856689453125, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.92476749420166, |
|
"rewards/margins": 6.2549147605896, |
|
"rewards/rejected": -11.179682731628418, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 4.125260234171861e-06, |
|
"logits/chosen": -1.7949470281600952, |
|
"logits/rejected": -1.541264295578003, |
|
"logps/chosen": -773.5762939453125, |
|
"logps/rejected": -1390.2864990234375, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.758688926696777, |
|
"rewards/margins": 6.116512298583984, |
|
"rewards/rejected": -11.875202178955078, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_logits/chosen": -1.9477717876434326, |
|
"eval_logits/rejected": -1.8285531997680664, |
|
"eval_logps/chosen": -788.9777221679688, |
|
"eval_logps/rejected": -848.410400390625, |
|
"eval_loss": 0.9262253642082214, |
|
"eval_rewards/accuracies": 0.593999981880188, |
|
"eval_rewards/chosen": -5.038434982299805, |
|
"eval_rewards/margins": 0.8132193684577942, |
|
"eval_rewards/rejected": -5.851653575897217, |
|
"eval_runtime": 780.3851, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 13.8125, |
|
"learning_rate": 4.107465331899411e-06, |
|
"logits/chosen": -1.7850643396377563, |
|
"logits/rejected": -1.4996929168701172, |
|
"logps/chosen": -707.7679443359375, |
|
"logps/rejected": -1261.5113525390625, |
|
"loss": 0.238, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.266989707946777, |
|
"rewards/margins": 5.456046104431152, |
|
"rewards/rejected": -10.72303581237793, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 27.25, |
|
"learning_rate": 4.089530486370025e-06, |
|
"logits/chosen": -1.7926385402679443, |
|
"logits/rejected": -1.560814619064331, |
|
"logps/chosen": -695.866455078125, |
|
"logps/rejected": -1322.89208984375, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.130670070648193, |
|
"rewards/margins": 6.105543613433838, |
|
"rewards/rejected": -11.236213684082031, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.625, |
|
"learning_rate": 4.071457258961514e-06, |
|
"logits/chosen": -1.8761751651763916, |
|
"logits/rejected": -1.5752075910568237, |
|
"logps/chosen": -634.4463500976562, |
|
"logps/rejected": -1144.76708984375, |
|
"loss": 0.182, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.44040060043335, |
|
"rewards/margins": 4.965670585632324, |
|
"rewards/rejected": -9.406070709228516, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 4.05324722309898e-06, |
|
"logits/chosen": -1.9114227294921875, |
|
"logits/rejected": -1.6285560131072998, |
|
"logps/chosen": -579.0138549804688, |
|
"logps/rejected": -1113.2867431640625, |
|
"loss": 0.1373, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.8820624351501465, |
|
"rewards/margins": 5.322786808013916, |
|
"rewards/rejected": -9.204849243164062, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.5, |
|
"learning_rate": 4.034901964117844e-06, |
|
"logits/chosen": -1.9491599798202515, |
|
"logits/rejected": -1.6949039697647095, |
|
"logps/chosen": -544.5189819335938, |
|
"logps/rejected": -1022.8623046875, |
|
"loss": 0.2103, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.5138885974884033, |
|
"rewards/margins": 4.748744010925293, |
|
"rewards/rejected": -8.262632369995117, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 5.3125, |
|
"learning_rate": 4.0164230791258265e-06, |
|
"logits/chosen": -1.790183663368225, |
|
"logits/rejected": -1.5226788520812988, |
|
"logps/chosen": -645.8140869140625, |
|
"logps/rejected": -1219.503662109375, |
|
"loss": 0.1358, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.579228401184082, |
|
"rewards/margins": 5.6584672927856445, |
|
"rewards/rejected": -10.23769474029541, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 3.9978121768639035e-06, |
|
"logits/chosen": -1.713234543800354, |
|
"logits/rejected": -1.463331699371338, |
|
"logps/chosen": -963.2076416015625, |
|
"logps/rejected": -1653.765625, |
|
"loss": 0.3174, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -7.8012800216674805, |
|
"rewards/margins": 6.784623146057129, |
|
"rewards/rejected": -14.585902214050293, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 3.979070877566259e-06, |
|
"logits/chosen": -1.6942354440689087, |
|
"logits/rejected": -1.4278452396392822, |
|
"logps/chosen": -851.5432739257812, |
|
"logps/rejected": -1492.692138671875, |
|
"loss": 0.1636, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.686502933502197, |
|
"rewards/margins": 6.411787509918213, |
|
"rewards/rejected": -13.098289489746094, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 14.875, |
|
"learning_rate": 3.960200812819223e-06, |
|
"logits/chosen": -1.8888661861419678, |
|
"logits/rejected": -1.6964051723480225, |
|
"logps/chosen": -689.935791015625, |
|
"logps/rejected": -1223.9864501953125, |
|
"loss": 0.27, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.14157772064209, |
|
"rewards/margins": 5.335638523101807, |
|
"rewards/rejected": -10.477215766906738, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 2.46875, |
|
"learning_rate": 3.941203625419233e-06, |
|
"logits/chosen": -1.9211117029190063, |
|
"logits/rejected": -1.690458059310913, |
|
"logps/chosen": -591.420166015625, |
|
"logps/rejected": -1057.7073974609375, |
|
"loss": 0.1899, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -3.9465689659118652, |
|
"rewards/margins": 4.7175774574279785, |
|
"rewards/rejected": -8.664146423339844, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"eval_logits/chosen": -2.0371508598327637, |
|
"eval_logits/rejected": -1.9178376197814941, |
|
"eval_logps/chosen": -655.8643188476562, |
|
"eval_logps/rejected": -707.4119873046875, |
|
"eval_loss": 0.7746438384056091, |
|
"eval_rewards/accuracies": 0.6079999804496765, |
|
"eval_rewards/chosen": -3.7073001861572266, |
|
"eval_rewards/margins": 0.7343699336051941, |
|
"eval_rewards/rejected": -4.441669940948486, |
|
"eval_runtime": 780.6813, |
|
"eval_samples_per_second": 2.562, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 3.92208096922981e-06, |
|
"logits/chosen": -1.9045965671539307, |
|
"logits/rejected": -1.6174876689910889, |
|
"logps/chosen": -607.1253051757812, |
|
"logps/rejected": -1079.6739501953125, |
|
"loss": 0.1974, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.075843334197998, |
|
"rewards/margins": 4.8129777908325195, |
|
"rewards/rejected": -8.88882064819336, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 18.25, |
|
"learning_rate": 3.902834509037584e-06, |
|
"logits/chosen": -1.8484761714935303, |
|
"logits/rejected": -1.5948092937469482, |
|
"logps/chosen": -668.1317749023438, |
|
"logps/rejected": -1173.568359375, |
|
"loss": 0.1977, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.78977108001709, |
|
"rewards/margins": 5.134987831115723, |
|
"rewards/rejected": -9.924758911132812, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 3.883465920407351e-06, |
|
"logits/chosen": -1.7886062860488892, |
|
"logits/rejected": -1.529721975326538, |
|
"logps/chosen": -671.2840576171875, |
|
"logps/rejected": -1196.9456787109375, |
|
"loss": 0.1944, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.772359848022461, |
|
"rewards/margins": 5.296358108520508, |
|
"rewards/rejected": -10.068717002868652, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 15.0, |
|
"learning_rate": 3.8639768895362075e-06, |
|
"logits/chosen": -1.8040317296981812, |
|
"logits/rejected": -1.5752843618392944, |
|
"logps/chosen": -648.8005981445312, |
|
"logps/rejected": -1176.04638671875, |
|
"loss": 0.2305, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.622233867645264, |
|
"rewards/margins": 5.306826114654541, |
|
"rewards/rejected": -9.929059982299805, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 3.8443691131067525e-06, |
|
"logits/chosen": -1.8379939794540405, |
|
"logits/rejected": -1.543233036994934, |
|
"logps/chosen": -604.583740234375, |
|
"logps/rejected": -1111.179443359375, |
|
"loss": 0.1573, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.116429328918457, |
|
"rewards/margins": 5.1361494064331055, |
|
"rewards/rejected": -9.252578735351562, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 3.824644298139372e-06, |
|
"logits/chosen": -1.7858953475952148, |
|
"logits/rejected": -1.6210616827011108, |
|
"logps/chosen": -655.4627685546875, |
|
"logps/rejected": -1142.6436767578125, |
|
"loss": 0.1781, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -4.720765113830566, |
|
"rewards/margins": 4.827541828155518, |
|
"rewards/rejected": -9.548307418823242, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 18.875, |
|
"learning_rate": 3.8048041618436365e-06, |
|
"logits/chosen": -1.8527462482452393, |
|
"logits/rejected": -1.6160118579864502, |
|
"logps/chosen": -651.9395751953125, |
|
"logps/rejected": -1180.6568603515625, |
|
"loss": 0.2885, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.603270530700684, |
|
"rewards/margins": 5.251911640167236, |
|
"rewards/rejected": -9.855181694030762, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 1.921875, |
|
"learning_rate": 3.784850431468795e-06, |
|
"logits/chosen": -1.9396499395370483, |
|
"logits/rejected": -1.6909902095794678, |
|
"logps/chosen": -533.6766357421875, |
|
"logps/rejected": -1005.97705078125, |
|
"loss": 0.17, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.5864109992980957, |
|
"rewards/margins": 4.638134479522705, |
|
"rewards/rejected": -8.224546432495117, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 4.5, |
|
"learning_rate": 3.764784844153413e-06, |
|
"logits/chosen": -1.8069417476654053, |
|
"logits/rejected": -1.5476117134094238, |
|
"logps/chosen": -604.2606811523438, |
|
"logps/rejected": -1145.07275390625, |
|
"loss": 0.1285, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.072413444519043, |
|
"rewards/margins": 5.356746673583984, |
|
"rewards/rejected": -9.429161071777344, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 7.25, |
|
"learning_rate": 3.7446091467741314e-06, |
|
"logits/chosen": -1.7064955234527588, |
|
"logits/rejected": -1.3745331764221191, |
|
"logps/chosen": -820.8228759765625, |
|
"logps/rejected": -1404.6578369140625, |
|
"loss": 0.1972, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.311244964599609, |
|
"rewards/margins": 5.807946681976318, |
|
"rewards/rejected": -12.119193077087402, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_logits/chosen": -1.8571399450302124, |
|
"eval_logits/rejected": -1.7422724962234497, |
|
"eval_logps/chosen": -874.4112548828125, |
|
"eval_logps/rejected": -957.8101806640625, |
|
"eval_loss": 0.9739969372749329, |
|
"eval_rewards/accuracies": 0.6129999756813049, |
|
"eval_rewards/chosen": -5.8927693367004395, |
|
"eval_rewards/margins": 1.0528827905654907, |
|
"eval_rewards/rejected": -6.945652961730957, |
|
"eval_runtime": 780.1629, |
|
"eval_samples_per_second": 2.564, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 8.875, |
|
"learning_rate": 3.7243250957935904e-06, |
|
"logits/chosen": -1.7506663799285889, |
|
"logits/rejected": -1.4978208541870117, |
|
"logps/chosen": -850.6290283203125, |
|
"logps/rejected": -1508.726318359375, |
|
"loss": 0.219, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.688626766204834, |
|
"rewards/margins": 6.346508979797363, |
|
"rewards/rejected": -13.035135269165039, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 7.65625, |
|
"learning_rate": 3.703934457107517e-06, |
|
"logits/chosen": -1.839238166809082, |
|
"logits/rejected": -1.5624536275863647, |
|
"logps/chosen": -738.9110717773438, |
|
"logps/rejected": -1317.7669677734375, |
|
"loss": 0.1813, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.391977787017822, |
|
"rewards/margins": 5.72281551361084, |
|
"rewards/rejected": -11.114792823791504, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 3.683439005890983e-06, |
|
"logits/chosen": -1.7876466512680054, |
|
"logits/rejected": -1.5029432773590088, |
|
"logps/chosen": -661.0178833007812, |
|
"logps/rejected": -1189.1959228515625, |
|
"loss": 0.2421, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -4.857475280761719, |
|
"rewards/margins": 5.292153835296631, |
|
"rewards/rejected": -10.149629592895508, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 3.662840526443868e-06, |
|
"logits/chosen": -1.8758341073989868, |
|
"logits/rejected": -1.6289899349212646, |
|
"logps/chosen": -683.6453247070312, |
|
"logps/rejected": -1202.3697509765625, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.952923774719238, |
|
"rewards/margins": 5.2092509269714355, |
|
"rewards/rejected": -10.162174224853516, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 4.28125, |
|
"learning_rate": 3.6421408120355145e-06, |
|
"logits/chosen": -1.8700506687164307, |
|
"logits/rejected": -1.6114925146102905, |
|
"logps/chosen": -732.8409423828125, |
|
"logps/rejected": -1359.912109375, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.466494560241699, |
|
"rewards/margins": 6.277614593505859, |
|
"rewards/rejected": -11.744108200073242, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 3.6213416647486157e-06, |
|
"logits/chosen": -1.8383686542510986, |
|
"logits/rejected": -1.524852991104126, |
|
"logps/chosen": -737.9182739257812, |
|
"logps/rejected": -1409.013427734375, |
|
"loss": 0.2423, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.449790000915527, |
|
"rewards/margins": 6.735495567321777, |
|
"rewards/rejected": -12.185285568237305, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 3.6004448953223225e-06, |
|
"logits/chosen": -1.757846474647522, |
|
"logits/rejected": -1.5626018047332764, |
|
"logps/chosen": -797.1041870117188, |
|
"logps/rejected": -1491.272216796875, |
|
"loss": 0.1457, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.0056328773498535, |
|
"rewards/margins": 6.717007637023926, |
|
"rewards/rejected": -12.722640037536621, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 3.5794523229946105e-06, |
|
"logits/chosen": -1.597733497619629, |
|
"logits/rejected": -1.324507474899292, |
|
"logps/chosen": -1041.21435546875, |
|
"logps/rejected": -1858.760498046875, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -8.502154350280762, |
|
"rewards/margins": 8.060552597045898, |
|
"rewards/rejected": -16.56270408630371, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 16.5, |
|
"learning_rate": 3.558365775343892e-06, |
|
"logits/chosen": -1.7607545852661133, |
|
"logits/rejected": -1.4137681722640991, |
|
"logps/chosen": -923.2019653320312, |
|
"logps/rejected": -1746.294189453125, |
|
"loss": 0.2314, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -7.116549491882324, |
|
"rewards/margins": 8.30859661102295, |
|
"rewards/rejected": -15.425145149230957, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 7.40625, |
|
"learning_rate": 3.537187088129919e-06, |
|
"logits/chosen": -1.9187705516815186, |
|
"logits/rejected": -1.618198037147522, |
|
"logps/chosen": -692.7543334960938, |
|
"logps/rejected": -1304.157470703125, |
|
"loss": 0.1712, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.782906532287598, |
|
"rewards/margins": 6.2092204093933105, |
|
"rewards/rejected": -10.99212646484375, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"eval_logits/chosen": -1.997864842414856, |
|
"eval_logits/rejected": -1.8792200088500977, |
|
"eval_logps/chosen": -680.6552124023438, |
|
"eval_logps/rejected": -728.7279663085938, |
|
"eval_loss": 0.8206447958946228, |
|
"eval_rewards/accuracies": 0.593500018119812, |
|
"eval_rewards/chosen": -3.9552090167999268, |
|
"eval_rewards/margins": 0.6996216177940369, |
|
"eval_rewards/rejected": -4.654830455780029, |
|
"eval_runtime": 780.7801, |
|
"eval_samples_per_second": 2.562, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 3.5159181051339574e-06, |
|
"logits/chosen": -1.9240127801895142, |
|
"logits/rejected": -1.6421921253204346, |
|
"logps/chosen": -656.6437377929688, |
|
"logps/rejected": -1210.740234375, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.618703842163086, |
|
"rewards/margins": 5.614047050476074, |
|
"rewards/rejected": -10.232751846313477, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 1.9765625, |
|
"learning_rate": 3.494560677998275e-06, |
|
"logits/chosen": -1.7963730096817017, |
|
"logits/rejected": -1.4937909841537476, |
|
"logps/chosen": -813.3496704101562, |
|
"logps/rejected": -1511.3714599609375, |
|
"loss": 0.1662, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.14628267288208, |
|
"rewards/margins": 6.9289398193359375, |
|
"rewards/rejected": -13.075222969055176, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 15.25, |
|
"learning_rate": 3.473116666064939e-06, |
|
"logits/chosen": -1.7890408039093018, |
|
"logits/rejected": -1.4465700387954712, |
|
"logps/chosen": -864.6702880859375, |
|
"logps/rejected": -1585.6441650390625, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.7736711502075195, |
|
"rewards/margins": 7.236307621002197, |
|
"rewards/rejected": -14.009979248046875, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 11.0, |
|
"learning_rate": 3.4515879362139453e-06, |
|
"logits/chosen": -1.7052695751190186, |
|
"logits/rejected": -1.4470438957214355, |
|
"logps/chosen": -879.1795654296875, |
|
"logps/rejected": -1647.1077880859375, |
|
"loss": 0.2115, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -6.843623161315918, |
|
"rewards/margins": 7.675244331359863, |
|
"rewards/rejected": -14.518865585327148, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 9.875, |
|
"learning_rate": 3.42997636270069e-06, |
|
"logits/chosen": -1.8049097061157227, |
|
"logits/rejected": -1.564592719078064, |
|
"logps/chosen": -801.4486083984375, |
|
"logps/rejected": -1398.755126953125, |
|
"loss": 0.2083, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.123307704925537, |
|
"rewards/margins": 5.921048164367676, |
|
"rewards/rejected": -12.044357299804688, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.408283826992801e-06, |
|
"logits/chosen": -1.8630778789520264, |
|
"logits/rejected": -1.5700795650482178, |
|
"logps/chosen": -832.0042724609375, |
|
"logps/rejected": -1476.0029296875, |
|
"loss": 0.2379, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -6.297715187072754, |
|
"rewards/margins": 6.487104892730713, |
|
"rewards/rejected": -12.784818649291992, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 10.0, |
|
"learning_rate": 3.386512217606339e-06, |
|
"logits/chosen": -1.7772754430770874, |
|
"logits/rejected": -1.5164556503295898, |
|
"logps/chosen": -838.1605224609375, |
|
"logps/rejected": -1455.7694091796875, |
|
"loss": 0.1919, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.386895179748535, |
|
"rewards/margins": 6.187010765075684, |
|
"rewards/rejected": -12.573904991149902, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 3.364663429941391e-06, |
|
"logits/chosen": -1.793426752090454, |
|
"logits/rejected": -1.4886395931243896, |
|
"logps/chosen": -872.0006103515625, |
|
"logps/rejected": -1503.099365234375, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -6.79733419418335, |
|
"rewards/margins": 6.3793768882751465, |
|
"rewards/rejected": -13.17671012878418, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.875, |
|
"learning_rate": 3.3427393661170532e-06, |
|
"logits/chosen": -1.8139142990112305, |
|
"logits/rejected": -1.59800386428833, |
|
"logps/chosen": -764.7420043945312, |
|
"logps/rejected": -1374.8375244140625, |
|
"loss": 0.1911, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.704885959625244, |
|
"rewards/margins": 6.062502384185791, |
|
"rewards/rejected": -11.767388343811035, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.3207419348058393e-06, |
|
"logits/chosen": -1.8668022155761719, |
|
"logits/rejected": -1.5570051670074463, |
|
"logps/chosen": -714.6455078125, |
|
"logps/rejected": -1290.4510498046875, |
|
"loss": 0.2211, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.201146125793457, |
|
"rewards/margins": 5.8124284744262695, |
|
"rewards/rejected": -11.013574600219727, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"eval_logits/chosen": -1.973204255104065, |
|
"eval_logits/rejected": -1.8550846576690674, |
|
"eval_logps/chosen": -717.7827758789062, |
|
"eval_logps/rejected": -775.1051025390625, |
|
"eval_loss": 0.8185168504714966, |
|
"eval_rewards/accuracies": 0.6119999885559082, |
|
"eval_rewards/chosen": -4.326484680175781, |
|
"eval_rewards/margins": 0.7921165823936462, |
|
"eval_rewards/rejected": -5.1186017990112305, |
|
"eval_runtime": 780.2406, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 8.625, |
|
"learning_rate": 3.2986730510675158e-06, |
|
"logits/chosen": -1.8806215524673462, |
|
"logits/rejected": -1.581820011138916, |
|
"logps/chosen": -694.9906005859375, |
|
"logps/rejected": -1254.445556640625, |
|
"loss": 0.1772, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.930051326751709, |
|
"rewards/margins": 5.521780490875244, |
|
"rewards/rejected": -10.451830863952637, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.34375, |
|
"learning_rate": 3.276534636182378e-06, |
|
"logits/chosen": -1.8050769567489624, |
|
"logits/rejected": -1.5063773393630981, |
|
"logps/chosen": -626.0925903320312, |
|
"logps/rejected": -1262.783447265625, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.493157386779785, |
|
"rewards/margins": 6.255930423736572, |
|
"rewards/rejected": -10.749089241027832, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 4.75, |
|
"learning_rate": 3.2543286174839856e-06, |
|
"logits/chosen": -1.8059381246566772, |
|
"logits/rejected": -1.4949989318847656, |
|
"logps/chosen": -771.2824096679688, |
|
"logps/rejected": -1471.7220458984375, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.776717662811279, |
|
"rewards/margins": 6.974827766418457, |
|
"rewards/rejected": -12.751544952392578, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 6.125, |
|
"learning_rate": 3.232056928191376e-06, |
|
"logits/chosen": -1.740046501159668, |
|
"logits/rejected": -1.4656656980514526, |
|
"logps/chosen": -784.59326171875, |
|
"logps/rejected": -1440.2933349609375, |
|
"loss": 0.1952, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.885767459869385, |
|
"rewards/margins": 6.415579319000244, |
|
"rewards/rejected": -12.301345825195312, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.5, |
|
"learning_rate": 3.2097215072407595e-06, |
|
"logits/chosen": -1.8645740747451782, |
|
"logits/rejected": -1.5591399669647217, |
|
"logps/chosen": -744.97216796875, |
|
"logps/rejected": -1370.182373046875, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.665326118469238, |
|
"rewards/margins": 6.191165447235107, |
|
"rewards/rejected": -11.856492042541504, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 3.4375, |
|
"learning_rate": 3.1873242991167153e-06, |
|
"logits/chosen": -1.7916425466537476, |
|
"logits/rejected": -1.556660771369934, |
|
"logps/chosen": -705.9693603515625, |
|
"logps/rejected": -1279.2801513671875, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.239865303039551, |
|
"rewards/margins": 5.630426406860352, |
|
"rewards/rejected": -10.870291709899902, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 7.25, |
|
"learning_rate": 3.164867253682915e-06, |
|
"logits/chosen": -1.880875587463379, |
|
"logits/rejected": -1.615635871887207, |
|
"logps/chosen": -632.2330322265625, |
|
"logps/rejected": -1230.26318359375, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.562050819396973, |
|
"rewards/margins": 5.894750595092773, |
|
"rewards/rejected": -10.456802368164062, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 12.875, |
|
"learning_rate": 3.1423523260123627e-06, |
|
"logits/chosen": -1.873928427696228, |
|
"logits/rejected": -1.63522469997406, |
|
"logps/chosen": -672.1016845703125, |
|
"logps/rejected": -1353.87744140625, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.872590065002441, |
|
"rewards/margins": 6.644981384277344, |
|
"rewards/rejected": -11.517570495605469, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 3.1197814762171986e-06, |
|
"logits/chosen": -1.8514392375946045, |
|
"logits/rejected": -1.6486479043960571, |
|
"logps/chosen": -698.458984375, |
|
"logps/rejected": -1315.129638671875, |
|
"loss": 0.1982, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.084946632385254, |
|
"rewards/margins": 6.099282741546631, |
|
"rewards/rejected": -11.184229850769043, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 18.125, |
|
"learning_rate": 3.097156669278046e-06, |
|
"logits/chosen": -1.7210429906845093, |
|
"logits/rejected": -1.4064064025878906, |
|
"logps/chosen": -816.0775756835938, |
|
"logps/rejected": -1553.640625, |
|
"loss": 0.1773, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.110322952270508, |
|
"rewards/margins": 7.464188575744629, |
|
"rewards/rejected": -13.574511528015137, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_logits/chosen": -1.9209892749786377, |
|
"eval_logits/rejected": -1.8040364980697632, |
|
"eval_logps/chosen": -810.9664916992188, |
|
"eval_logps/rejected": -883.3089599609375, |
|
"eval_loss": 0.9660559296607971, |
|
"eval_rewards/accuracies": 0.5950000286102295, |
|
"eval_rewards/chosen": -5.258322715759277, |
|
"eval_rewards/margins": 0.9423174858093262, |
|
"eval_rewards/rejected": -6.2006402015686035, |
|
"eval_runtime": 781.3981, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 24.625, |
|
"learning_rate": 3.074479874872949e-06, |
|
"logits/chosen": -1.7587913274765015, |
|
"logits/rejected": -1.476696252822876, |
|
"logps/chosen": -767.0065307617188, |
|
"logps/rejected": -1501.1015625, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.815308094024658, |
|
"rewards/margins": 7.267469882965088, |
|
"rewards/rejected": -13.082776069641113, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 21.5, |
|
"learning_rate": 3.051753067205895e-06, |
|
"logits/chosen": -1.7685956954956055, |
|
"logits/rejected": -1.5216073989868164, |
|
"logps/chosen": -767.9783935546875, |
|
"logps/rejected": -1409.71240234375, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.720013618469238, |
|
"rewards/margins": 6.2696709632873535, |
|
"rewards/rejected": -11.989683151245117, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 3.0289782248349394e-06, |
|
"logits/chosen": -1.8148343563079834, |
|
"logits/rejected": -1.5846790075302124, |
|
"logps/chosen": -754.4901123046875, |
|
"logps/rejected": -1433.83203125, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.541926383972168, |
|
"rewards/margins": 6.822430610656738, |
|
"rewards/rejected": -12.364358901977539, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 7.0, |
|
"learning_rate": 3.0061573304999626e-06, |
|
"logits/chosen": -1.762790322303772, |
|
"logits/rejected": -1.459867238998413, |
|
"logps/chosen": -755.7040405273438, |
|
"logps/rejected": -1503.606689453125, |
|
"loss": 0.1869, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.6962385177612305, |
|
"rewards/margins": 7.409841060638428, |
|
"rewards/rejected": -13.1060791015625, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 2.9832923709500507e-06, |
|
"logits/chosen": -1.853634238243103, |
|
"logits/rejected": -1.6102838516235352, |
|
"logps/chosen": -765.724609375, |
|
"logps/rejected": -1428.977783203125, |
|
"loss": 0.2409, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.709242343902588, |
|
"rewards/margins": 6.602417945861816, |
|
"rewards/rejected": -12.311660766601562, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 2.9603853367705334e-06, |
|
"logits/chosen": -1.7447372674942017, |
|
"logits/rejected": -1.4683334827423096, |
|
"logps/chosen": -815.2188110351562, |
|
"logps/rejected": -1613.6304931640625, |
|
"loss": 0.1237, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.318219184875488, |
|
"rewards/margins": 7.942680358886719, |
|
"rewards/rejected": -14.260897636413574, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 4.3125, |
|
"learning_rate": 2.9374382222096885e-06, |
|
"logits/chosen": -1.728420615196228, |
|
"logits/rejected": -1.4341685771942139, |
|
"logps/chosen": -946.7561645507812, |
|
"logps/rejected": -1834.0970458984375, |
|
"loss": 0.2257, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.539031028747559, |
|
"rewards/margins": 8.83682632446289, |
|
"rewards/rejected": -16.375858306884766, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 19.875, |
|
"learning_rate": 2.9144530250051266e-06, |
|
"logits/chosen": -1.7010129690170288, |
|
"logits/rejected": -1.3637703657150269, |
|
"logps/chosen": -856.9533081054688, |
|
"logps/rejected": -1696.8665771484375, |
|
"loss": 0.1865, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.755218505859375, |
|
"rewards/margins": 8.420073509216309, |
|
"rewards/rejected": -15.175291061401367, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 13.875, |
|
"learning_rate": 2.891431746209868e-06, |
|
"logits/chosen": -1.7717593908309937, |
|
"logits/rejected": -1.5268971920013428, |
|
"logps/chosen": -791.3832397460938, |
|
"logps/rejected": -1521.106689453125, |
|
"loss": 0.1918, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.118847846984863, |
|
"rewards/margins": 7.1408281326293945, |
|
"rewards/rejected": -13.259675979614258, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 2.868376390018136e-06, |
|
"logits/chosen": -1.8260672092437744, |
|
"logits/rejected": -1.6104164123535156, |
|
"logps/chosen": -674.1734008789062, |
|
"logps/rejected": -1275.109619140625, |
|
"loss": 0.2611, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -4.913368225097656, |
|
"rewards/margins": 5.8313703536987305, |
|
"rewards/rejected": -10.744738578796387, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"eval_logits/chosen": -1.984923243522644, |
|
"eval_logits/rejected": -1.8682689666748047, |
|
"eval_logps/chosen": -678.0472412109375, |
|
"eval_logps/rejected": -734.593505859375, |
|
"eval_loss": 0.8357905149459839, |
|
"eval_rewards/accuracies": 0.6035000085830688, |
|
"eval_rewards/chosen": -3.9291298389434814, |
|
"eval_rewards/margins": 0.7843554615974426, |
|
"eval_rewards/rejected": -4.713485240936279, |
|
"eval_runtime": 780.35, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.32, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 7.15625, |
|
"learning_rate": 2.8452889635908758e-06, |
|
"logits/chosen": -1.8920762538909912, |
|
"logits/rejected": -1.573889970779419, |
|
"logps/chosen": -592.5186767578125, |
|
"logps/rejected": -1212.495361328125, |
|
"loss": 0.1637, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -3.980005979537964, |
|
"rewards/margins": 6.280307292938232, |
|
"rewards/rejected": -10.260313987731934, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 2.8221714768810144e-06, |
|
"logits/chosen": -1.822471261024475, |
|
"logits/rejected": -1.5430892705917358, |
|
"logps/chosen": -584.103271484375, |
|
"logps/rejected": -1109.2965087890625, |
|
"loss": 0.198, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.9089488983154297, |
|
"rewards/margins": 5.281431198120117, |
|
"rewards/rejected": -9.19037914276123, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.6875, |
|
"learning_rate": 2.799025942458477e-06, |
|
"logits/chosen": -1.9587557315826416, |
|
"logits/rejected": -1.6500368118286133, |
|
"logps/chosen": -631.9678344726562, |
|
"logps/rejected": -1182.78466796875, |
|
"loss": 0.1954, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.307094573974609, |
|
"rewards/margins": 5.625722885131836, |
|
"rewards/rejected": -9.932817459106445, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 2.775854375334976e-06, |
|
"logits/chosen": -1.908205270767212, |
|
"logits/rejected": -1.5645700693130493, |
|
"logps/chosen": -686.67529296875, |
|
"logps/rejected": -1329.472900390625, |
|
"loss": 0.1638, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -4.845526695251465, |
|
"rewards/margins": 6.522275447845459, |
|
"rewards/rejected": -11.367802619934082, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 10.125, |
|
"learning_rate": 2.7526587927885855e-06, |
|
"logits/chosen": -1.7340272665023804, |
|
"logits/rejected": -1.4183170795440674, |
|
"logps/chosen": -756.422607421875, |
|
"logps/rejected": -1329.1837158203125, |
|
"loss": 0.2525, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -5.652913570404053, |
|
"rewards/margins": 5.638332366943359, |
|
"rewards/rejected": -11.29124641418457, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 7.3125, |
|
"learning_rate": 2.729441214188127e-06, |
|
"logits/chosen": -1.7929729223251343, |
|
"logits/rejected": -1.5234935283660889, |
|
"logps/chosen": -770.7953491210938, |
|
"logps/rejected": -1437.832763671875, |
|
"loss": 0.156, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.866808891296387, |
|
"rewards/margins": 6.6957688331604, |
|
"rewards/rejected": -12.562577247619629, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 6.28125, |
|
"learning_rate": 2.706203660817355e-06, |
|
"logits/chosen": -1.7897508144378662, |
|
"logits/rejected": -1.490918755531311, |
|
"logps/chosen": -712.3719482421875, |
|
"logps/rejected": -1381.3006591796875, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.262055397033691, |
|
"rewards/margins": 6.684333801269531, |
|
"rewards/rejected": -11.946390151977539, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 4.0625, |
|
"learning_rate": 2.6829481556990017e-06, |
|
"logits/chosen": -1.8363555669784546, |
|
"logits/rejected": -1.5211414098739624, |
|
"logps/chosen": -658.4310302734375, |
|
"logps/rejected": -1290.535888671875, |
|
"loss": 0.1377, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.622659683227539, |
|
"rewards/margins": 6.354128360748291, |
|
"rewards/rejected": -10.976788520812988, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 3.75, |
|
"learning_rate": 2.6596767234186427e-06, |
|
"logits/chosen": -1.8283014297485352, |
|
"logits/rejected": -1.628281593322754, |
|
"logps/chosen": -666.2095947265625, |
|
"logps/rejected": -1244.331787109375, |
|
"loss": 0.1876, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.930241107940674, |
|
"rewards/margins": 5.740645408630371, |
|
"rewards/rejected": -10.670886039733887, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 2.636391389948449e-06, |
|
"logits/chosen": -1.8633663654327393, |
|
"logits/rejected": -1.6405309438705444, |
|
"logps/chosen": -635.901611328125, |
|
"logps/rejected": -1253.4234619140625, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -4.540086269378662, |
|
"rewards/margins": 5.974652290344238, |
|
"rewards/rejected": -10.514738082885742, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"eval_logits/chosen": -1.9624484777450562, |
|
"eval_logits/rejected": -1.8456604480743408, |
|
"eval_logps/chosen": -742.7199096679688, |
|
"eval_logps/rejected": -804.4951171875, |
|
"eval_loss": 0.9011984467506409, |
|
"eval_rewards/accuracies": 0.6010000109672546, |
|
"eval_rewards/chosen": -4.575857162475586, |
|
"eval_rewards/margins": 0.8366447687149048, |
|
"eval_rewards/rejected": -5.412501335144043, |
|
"eval_runtime": 781.1923, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 2.613094182470804e-06, |
|
"logits/chosen": -1.833001732826233, |
|
"logits/rejected": -1.6033201217651367, |
|
"logps/chosen": -710.7305908203125, |
|
"logps/rejected": -1363.15185546875, |
|
"loss": 0.2535, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.038983345031738, |
|
"rewards/margins": 6.4814558029174805, |
|
"rewards/rejected": -11.520438194274902, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 2.5897871292018255e-06, |
|
"logits/chosen": -1.901118516921997, |
|
"logits/rejected": -1.5881661176681519, |
|
"logps/chosen": -653.0816650390625, |
|
"logps/rejected": -1297.2364501953125, |
|
"loss": 0.1509, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.45406436920166, |
|
"rewards/margins": 6.3539299964904785, |
|
"rewards/rejected": -10.80799388885498, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.53125, |
|
"learning_rate": 2.5664722592147866e-06, |
|
"logits/chosen": -1.866217851638794, |
|
"logits/rejected": -1.6465851068496704, |
|
"logps/chosen": -623.0480346679688, |
|
"logps/rejected": -1156.857666015625, |
|
"loss": 0.2831, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -4.450149059295654, |
|
"rewards/margins": 5.248156547546387, |
|
"rewards/rejected": -9.6983060836792, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 10.875, |
|
"learning_rate": 2.5431516022634718e-06, |
|
"logits/chosen": -1.925252914428711, |
|
"logits/rejected": -1.6734755039215088, |
|
"logps/chosen": -602.1317138671875, |
|
"logps/rejected": -1146.4388427734375, |
|
"loss": 0.177, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -4.30599308013916, |
|
"rewards/margins": 5.237140655517578, |
|
"rewards/rejected": -9.543133735656738, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 4.375, |
|
"learning_rate": 2.5198271886054693e-06, |
|
"logits/chosen": -1.8272100687026978, |
|
"logits/rejected": -1.5635576248168945, |
|
"logps/chosen": -690.4618530273438, |
|
"logps/rejected": -1262.934814453125, |
|
"loss": 0.2364, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.088870048522949, |
|
"rewards/margins": 5.745336055755615, |
|
"rewards/rejected": -10.834206581115723, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 2.4965010488254198e-06, |
|
"logits/chosen": -1.8737865686416626, |
|
"logits/rejected": -1.5293992757797241, |
|
"logps/chosen": -696.8263549804688, |
|
"logps/rejected": -1301.195068359375, |
|
"loss": 0.1246, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.076106071472168, |
|
"rewards/margins": 5.952775955200195, |
|
"rewards/rejected": -11.028882026672363, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 2.473175213658236e-06, |
|
"logits/chosen": -1.898840308189392, |
|
"logits/rejected": -1.673750877380371, |
|
"logps/chosen": -655.5372924804688, |
|
"logps/rejected": -1246.764892578125, |
|
"loss": 0.215, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.903532981872559, |
|
"rewards/margins": 5.727369785308838, |
|
"rewards/rejected": -10.630903244018555, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 12.9375, |
|
"learning_rate": 2.4498517138123153e-06, |
|
"logits/chosen": -1.8875300884246826, |
|
"logits/rejected": -1.6077525615692139, |
|
"logps/chosen": -679.5547485351562, |
|
"logps/rejected": -1191.851318359375, |
|
"loss": 0.2411, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -4.939997673034668, |
|
"rewards/margins": 5.089707374572754, |
|
"rewards/rejected": -10.029705047607422, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 6.375, |
|
"learning_rate": 2.426532579792742e-06, |
|
"logits/chosen": -1.903921127319336, |
|
"logits/rejected": -1.640777826309204, |
|
"logps/chosen": -701.1472778320312, |
|
"logps/rejected": -1259.781982421875, |
|
"loss": 0.1961, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.0907464027404785, |
|
"rewards/margins": 5.620265007019043, |
|
"rewards/rejected": -10.71101188659668, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 4.75, |
|
"learning_rate": 2.40321984172452e-06, |
|
"logits/chosen": -1.799851417541504, |
|
"logits/rejected": -1.5513416528701782, |
|
"logps/chosen": -734.9310302734375, |
|
"logps/rejected": -1328.170166015625, |
|
"loss": 0.173, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.326907157897949, |
|
"rewards/margins": 6.046074867248535, |
|
"rewards/rejected": -11.372981071472168, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -1.9477238655090332, |
|
"eval_logits/rejected": -1.831076741218567, |
|
"eval_logps/chosen": -817.8319091796875, |
|
"eval_logps/rejected": -883.6325073242188, |
|
"eval_loss": 0.9585374593734741, |
|
"eval_rewards/accuracies": 0.590499997138977, |
|
"eval_rewards/chosen": -5.326976299285889, |
|
"eval_rewards/margins": 0.8768988251686096, |
|
"eval_rewards/rejected": -6.2038750648498535, |
|
"eval_runtime": 781.5335, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 24.875, |
|
"learning_rate": 2.3799155291758332e-06, |
|
"logits/chosen": -1.7613136768341064, |
|
"logits/rejected": -1.5118930339813232, |
|
"logps/chosen": -802.9510498046875, |
|
"logps/rejected": -1477.132568359375, |
|
"loss": 0.1726, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.113633632659912, |
|
"rewards/margins": 6.739386558532715, |
|
"rewards/rejected": -12.853021621704102, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 24.875, |
|
"learning_rate": 2.356621670981353e-06, |
|
"logits/chosen": -1.7708122730255127, |
|
"logits/rejected": -1.4993810653686523, |
|
"logps/chosen": -834.2234497070312, |
|
"logps/rejected": -1653.456298828125, |
|
"loss": 0.2399, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.487744331359863, |
|
"rewards/margins": 7.984147548675537, |
|
"rewards/rejected": -14.471891403198242, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 2.3333402950656124e-06, |
|
"logits/chosen": -1.7092750072479248, |
|
"logits/rejected": -1.4365252256393433, |
|
"logps/chosen": -843.7501220703125, |
|
"logps/rejected": -1490.2340087890625, |
|
"loss": 0.1593, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.497511863708496, |
|
"rewards/margins": 6.372196197509766, |
|
"rewards/rejected": -12.869707107543945, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 2.3100734282664565e-06, |
|
"logits/chosen": -1.743865728378296, |
|
"logits/rejected": -1.5017584562301636, |
|
"logps/chosen": -744.8902587890625, |
|
"logps/rejected": -1406.6005859375, |
|
"loss": 0.1513, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.662583351135254, |
|
"rewards/margins": 6.43302059173584, |
|
"rewards/rejected": -12.095603942871094, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 2.286823096158595e-06, |
|
"logits/chosen": -1.7671382427215576, |
|
"logits/rejected": -1.5393540859222412, |
|
"logps/chosen": -764.4788208007812, |
|
"logps/rejected": -1386.416259765625, |
|
"loss": 0.148, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.647488594055176, |
|
"rewards/margins": 6.092167854309082, |
|
"rewards/rejected": -11.739656448364258, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 2.2635913228772495e-06, |
|
"logits/chosen": -1.7948739528656006, |
|
"logits/rejected": -1.5345538854599, |
|
"logps/chosen": -731.1153564453125, |
|
"logps/rejected": -1379.321044921875, |
|
"loss": 0.1676, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.540210247039795, |
|
"rewards/margins": 6.384571552276611, |
|
"rewards/rejected": -11.924782752990723, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 2.240380130941944e-06, |
|
"logits/chosen": -1.7852880954742432, |
|
"logits/rejected": -1.4755876064300537, |
|
"logps/chosen": -762.9698486328125, |
|
"logps/rejected": -1461.54296875, |
|
"loss": 0.1365, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.76432991027832, |
|
"rewards/margins": 6.822981834411621, |
|
"rewards/rejected": -12.587312698364258, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 2.217191541080426e-06, |
|
"logits/chosen": -1.7540229558944702, |
|
"logits/rejected": -1.5075386762619019, |
|
"logps/chosen": -811.2817993164062, |
|
"logps/rejected": -1491.155517578125, |
|
"loss": 0.2447, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.2686567306518555, |
|
"rewards/margins": 6.715971946716309, |
|
"rewards/rejected": -12.984628677368164, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 15.8125, |
|
"learning_rate": 2.194027572052741e-06, |
|
"logits/chosen": -1.780588150024414, |
|
"logits/rejected": -1.435884952545166, |
|
"logps/chosen": -880.83349609375, |
|
"logps/rejected": -1586.189697265625, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.738241672515869, |
|
"rewards/margins": 7.2081298828125, |
|
"rewards/rejected": -13.946372985839844, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 7.625, |
|
"learning_rate": 2.170890240475488e-06, |
|
"logits/chosen": -1.842301607131958, |
|
"logits/rejected": -1.4893786907196045, |
|
"logps/chosen": -848.5924072265625, |
|
"logps/rejected": -1563.589111328125, |
|
"loss": 0.1348, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.58083963394165, |
|
"rewards/margins": 7.14026403427124, |
|
"rewards/rejected": -13.721104621887207, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"eval_logits/chosen": -1.933610439300537, |
|
"eval_logits/rejected": -1.8167284727096558, |
|
"eval_logps/chosen": -850.277587890625, |
|
"eval_logps/rejected": -931.1090698242188, |
|
"eval_loss": 1.0276434421539307, |
|
"eval_rewards/accuracies": 0.6010000109672546, |
|
"eval_rewards/chosen": -5.651431560516357, |
|
"eval_rewards/margins": 1.0272092819213867, |
|
"eval_rewards/rejected": -6.6786417961120605, |
|
"eval_runtime": 780.3127, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 2.147781560646252e-06, |
|
"logits/chosen": -1.7368297576904297, |
|
"logits/rejected": -1.452894926071167, |
|
"logps/chosen": -835.8079223632812, |
|
"logps/rejected": -1602.2353515625, |
|
"loss": 0.1927, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.314393997192383, |
|
"rewards/margins": 7.641695976257324, |
|
"rewards/rejected": -13.956090927124023, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 12.75, |
|
"learning_rate": 2.1247035443682466e-06, |
|
"logits/chosen": -1.7991231679916382, |
|
"logits/rejected": -1.4767308235168457, |
|
"logps/chosen": -748.2978515625, |
|
"logps/rejected": -1403.584716796875, |
|
"loss": 0.1813, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.546191215515137, |
|
"rewards/margins": 6.627876281738281, |
|
"rewards/rejected": -12.174067497253418, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 2.101658200775166e-06, |
|
"logits/chosen": -1.8935410976409912, |
|
"logits/rejected": -1.6258153915405273, |
|
"logps/chosen": -756.5455932617188, |
|
"logps/rejected": -1427.7357177734375, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.5328755378723145, |
|
"rewards/margins": 6.666754722595215, |
|
"rewards/rejected": -12.199630737304688, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 2.0786475361562754e-06, |
|
"logits/chosen": -1.8521206378936768, |
|
"logits/rejected": -1.5795223712921143, |
|
"logps/chosen": -647.3450927734375, |
|
"logps/rejected": -1349.6434326171875, |
|
"loss": 0.1187, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.813652515411377, |
|
"rewards/margins": 6.857410430908203, |
|
"rewards/rejected": -11.671062469482422, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 2.0556735537817464e-06, |
|
"logits/chosen": -1.8903312683105469, |
|
"logits/rejected": -1.6123740673065186, |
|
"logps/chosen": -781.2482299804688, |
|
"logps/rejected": -1511.971923828125, |
|
"loss": 0.1646, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.933591365814209, |
|
"rewards/margins": 7.245237827301025, |
|
"rewards/rejected": -13.17883014678955, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 2.0327382537282563e-06, |
|
"logits/chosen": -1.8345636129379272, |
|
"logits/rejected": -1.5723216533660889, |
|
"logps/chosen": -747.3956298828125, |
|
"logps/rejected": -1473.4298095703125, |
|
"loss": 0.1579, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.571768760681152, |
|
"rewards/margins": 7.050948143005371, |
|
"rewards/rejected": -12.622716903686523, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 9.875, |
|
"learning_rate": 2.009843632704861e-06, |
|
"logits/chosen": -1.8921455144882202, |
|
"logits/rejected": -1.6603155136108398, |
|
"logps/chosen": -674.5091552734375, |
|
"logps/rejected": -1375.080810546875, |
|
"loss": 0.1659, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -4.866215705871582, |
|
"rewards/margins": 6.783639430999756, |
|
"rewards/rejected": -11.64985466003418, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 5.125, |
|
"learning_rate": 1.9869916838791704e-06, |
|
"logits/chosen": -1.9177888631820679, |
|
"logits/rejected": -1.6464917659759521, |
|
"logps/chosen": -677.2672729492188, |
|
"logps/rejected": -1329.63818359375, |
|
"loss": 0.1531, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -4.961805820465088, |
|
"rewards/margins": 6.457982063293457, |
|
"rewards/rejected": -11.419788360595703, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 1.964184396703823e-06, |
|
"logits/chosen": -1.8470436334609985, |
|
"logits/rejected": -1.5801961421966553, |
|
"logps/chosen": -734.0846557617188, |
|
"logps/rejected": -1403.310791015625, |
|
"loss": 0.2223, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.368366241455078, |
|
"rewards/margins": 6.740887641906738, |
|
"rewards/rejected": -12.109254837036133, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 11.625, |
|
"learning_rate": 1.9414237567432887e-06, |
|
"logits/chosen": -1.8151607513427734, |
|
"logits/rejected": -1.5806912183761597, |
|
"logps/chosen": -775.1886596679688, |
|
"logps/rejected": -1465.5963134765625, |
|
"loss": 0.1724, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.8581719398498535, |
|
"rewards/margins": 6.805610656738281, |
|
"rewards/rejected": -12.663783073425293, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"eval_logits/chosen": -1.9729039669036865, |
|
"eval_logits/rejected": -1.8557853698730469, |
|
"eval_logps/chosen": -799.329345703125, |
|
"eval_logps/rejected": -868.793701171875, |
|
"eval_loss": 0.9419282078742981, |
|
"eval_rewards/accuracies": 0.6029999852180481, |
|
"eval_rewards/chosen": -5.141951560974121, |
|
"eval_rewards/margins": 0.9135352373123169, |
|
"eval_rewards/rejected": -6.055487155914307, |
|
"eval_runtime": 781.02, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 1.9187117455010082e-06, |
|
"logits/chosen": -1.8257277011871338, |
|
"logits/rejected": -1.544679880142212, |
|
"logps/chosen": -741.0110473632812, |
|
"logps/rejected": -1510.415771484375, |
|
"loss": 0.1389, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.247300148010254, |
|
"rewards/margins": 7.593186378479004, |
|
"rewards/rejected": -12.840486526489258, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 1.896050340246886e-06, |
|
"logits/chosen": -1.865552306175232, |
|
"logits/rejected": -1.5399208068847656, |
|
"logps/chosen": -729.8931884765625, |
|
"logps/rejected": -1392.20751953125, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.416408538818359, |
|
"rewards/margins": 6.736737251281738, |
|
"rewards/rejected": -12.153144836425781, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 16.875, |
|
"learning_rate": 1.8734415138451556e-06, |
|
"logits/chosen": -1.8815135955810547, |
|
"logits/rejected": -1.6109225749969482, |
|
"logps/chosen": -702.2775268554688, |
|
"logps/rejected": -1290.330322265625, |
|
"loss": 0.1526, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.315953254699707, |
|
"rewards/margins": 5.775982856750488, |
|
"rewards/rejected": -11.091935157775879, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 0.91015625, |
|
"learning_rate": 1.8508872345826217e-06, |
|
"logits/chosen": -1.7835582494735718, |
|
"logits/rejected": -1.5049933195114136, |
|
"logps/chosen": -776.7059326171875, |
|
"logps/rejected": -1574.740966796875, |
|
"loss": 0.1563, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.917466640472412, |
|
"rewards/margins": 7.796708583831787, |
|
"rewards/rejected": -13.714177131652832, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 1.828389465997305e-06, |
|
"logits/chosen": -1.783422827720642, |
|
"logits/rejected": -1.5323078632354736, |
|
"logps/chosen": -845.9024658203125, |
|
"logps/rejected": -1603.354248046875, |
|
"loss": 0.1634, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.5129241943359375, |
|
"rewards/margins": 7.4951491355896, |
|
"rewards/rejected": -14.008073806762695, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 4.125, |
|
"learning_rate": 1.8059501667075033e-06, |
|
"logits/chosen": -1.762046456336975, |
|
"logits/rejected": -1.4177411794662476, |
|
"logps/chosen": -846.1433715820312, |
|
"logps/rejected": -1681.241455078125, |
|
"loss": 0.1306, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.4577531814575195, |
|
"rewards/margins": 8.345453262329102, |
|
"rewards/rejected": -14.803205490112305, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 3.96875, |
|
"learning_rate": 1.7835712902412727e-06, |
|
"logits/chosen": -1.7055094242095947, |
|
"logits/rejected": -1.4395256042480469, |
|
"logps/chosen": -878.4562377929688, |
|
"logps/rejected": -1672.9664306640625, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.844629764556885, |
|
"rewards/margins": 7.841983795166016, |
|
"rewards/rejected": -14.686613082885742, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 1.7612547848663585e-06, |
|
"logits/chosen": -1.7910232543945312, |
|
"logits/rejected": -1.5396807193756104, |
|
"logps/chosen": -862.0275268554688, |
|
"logps/rejected": -1550.447998046875, |
|
"loss": 0.2311, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.806525230407715, |
|
"rewards/margins": 6.917562961578369, |
|
"rewards/rejected": -13.724087715148926, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 1.7390025934205836e-06, |
|
"logits/chosen": -1.8132579326629639, |
|
"logits/rejected": -1.5044097900390625, |
|
"logps/chosen": -865.0626220703125, |
|
"logps/rejected": -1650.018798828125, |
|
"loss": 0.2765, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.646265983581543, |
|
"rewards/margins": 7.8968071937561035, |
|
"rewards/rejected": -14.543073654174805, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 1.7168166531427083e-06, |
|
"logits/chosen": -1.8180538415908813, |
|
"logits/rejected": -1.4960973262786865, |
|
"logps/chosen": -817.0841674804688, |
|
"logps/rejected": -1640.4371337890625, |
|
"loss": 0.1652, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.297974586486816, |
|
"rewards/margins": 8.122183799743652, |
|
"rewards/rejected": -14.420158386230469, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"eval_logits/chosen": -1.9630491733551025, |
|
"eval_logits/rejected": -1.8445292711257935, |
|
"eval_logps/chosen": -813.1928100585938, |
|
"eval_logps/rejected": -888.5103149414062, |
|
"eval_loss": 0.9893488883972168, |
|
"eval_rewards/accuracies": 0.5960000157356262, |
|
"eval_rewards/chosen": -5.280584812164307, |
|
"eval_rewards/margins": 0.9720681309700012, |
|
"eval_rewards/rejected": -6.252652645111084, |
|
"eval_runtime": 780.9929, |
|
"eval_samples_per_second": 2.561, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 2.9375, |
|
"learning_rate": 1.694698895503774e-06, |
|
"logits/chosen": -1.8146822452545166, |
|
"logits/rejected": -1.5159728527069092, |
|
"logps/chosen": -773.5902099609375, |
|
"logps/rejected": -1589.297607421875, |
|
"loss": 0.1422, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.86234188079834, |
|
"rewards/margins": 8.083044052124023, |
|
"rewards/rejected": -13.94538688659668, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 1.6726512460389566e-06, |
|
"logits/chosen": -1.8080215454101562, |
|
"logits/rejected": -1.52683424949646, |
|
"logps/chosen": -794.5113525390625, |
|
"logps/rejected": -1582.714111328125, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.883685111999512, |
|
"rewards/margins": 7.861043453216553, |
|
"rewards/rejected": -13.744728088378906, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.515625, |
|
"learning_rate": 1.650675624179931e-06, |
|
"logits/chosen": -1.8216609954833984, |
|
"logits/rejected": -1.4835867881774902, |
|
"logps/chosen": -799.8715209960938, |
|
"logps/rejected": -1556.9541015625, |
|
"loss": 0.1083, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.9547319412231445, |
|
"rewards/margins": 7.664329528808594, |
|
"rewards/rejected": -13.619061470031738, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 2.984375, |
|
"learning_rate": 1.628773943087768e-06, |
|
"logits/chosen": -1.7566239833831787, |
|
"logits/rejected": -1.5169622898101807, |
|
"logps/chosen": -790.89453125, |
|
"logps/rejected": -1523.702392578125, |
|
"loss": 0.3056, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.1184186935424805, |
|
"rewards/margins": 7.254298210144043, |
|
"rewards/rejected": -13.372715950012207, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 3.734375, |
|
"learning_rate": 1.606948109486379e-06, |
|
"logits/chosen": -1.8213424682617188, |
|
"logits/rejected": -1.517643690109253, |
|
"logps/chosen": -821.3567504882812, |
|
"logps/rejected": -1711.3013916015625, |
|
"loss": 0.1673, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.3845534324646, |
|
"rewards/margins": 8.916479110717773, |
|
"rewards/rejected": -15.301034927368164, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 3.3125, |
|
"learning_rate": 1.5852000234965176e-06, |
|
"logits/chosen": -1.8380916118621826, |
|
"logits/rejected": -1.556247353553772, |
|
"logps/chosen": -863.4910888671875, |
|
"logps/rejected": -1631.0631103515625, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.359866619110107, |
|
"rewards/margins": 7.785064697265625, |
|
"rewards/rejected": -14.144930839538574, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 9.5, |
|
"learning_rate": 1.563531578470362e-06, |
|
"logits/chosen": -1.7966903448104858, |
|
"logits/rejected": -1.5243536233901978, |
|
"logps/chosen": -778.637939453125, |
|
"logps/rejected": -1521.4149169921875, |
|
"loss": 0.1896, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.939091682434082, |
|
"rewards/margins": 7.2415289878845215, |
|
"rewards/rejected": -13.180621147155762, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 1.5419446608266792e-06, |
|
"logits/chosen": -1.8752574920654297, |
|
"logits/rejected": -1.4560130834579468, |
|
"logps/chosen": -757.1990356445312, |
|
"logps/rejected": -1594.9390869140625, |
|
"loss": 0.1539, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.606993675231934, |
|
"rewards/margins": 8.46636962890625, |
|
"rewards/rejected": -14.073362350463867, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 1.5204411498865961e-06, |
|
"logits/chosen": -1.7594566345214844, |
|
"logits/rejected": -1.507631540298462, |
|
"logps/chosen": -827.5431518554688, |
|
"logps/rejected": -1608.100830078125, |
|
"loss": 0.1661, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.1717095375061035, |
|
"rewards/margins": 7.750910758972168, |
|
"rewards/rejected": -13.922619819641113, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 1.4990229177099957e-06, |
|
"logits/chosen": -1.9248380661010742, |
|
"logits/rejected": -1.610783576965332, |
|
"logps/chosen": -834.01904296875, |
|
"logps/rejected": -1623.152099609375, |
|
"loss": 0.1393, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.52529239654541, |
|
"rewards/margins": 7.815352439880371, |
|
"rewards/rejected": -14.340644836425781, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_logits/chosen": -1.9431767463684082, |
|
"eval_logits/rejected": -1.8254231214523315, |
|
"eval_logps/chosen": -853.2055053710938, |
|
"eval_logps/rejected": -937.9327392578125, |
|
"eval_loss": 1.0265454053878784, |
|
"eval_rewards/accuracies": 0.5985000133514404, |
|
"eval_rewards/chosen": -5.68071174621582, |
|
"eval_rewards/margins": 1.0661665201187134, |
|
"eval_rewards/rejected": -6.746878623962402, |
|
"eval_runtime": 781.5036, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 1.4776918289325298e-06, |
|
"logits/chosen": -1.7308515310287476, |
|
"logits/rejected": -1.5036309957504272, |
|
"logps/chosen": -862.5445556640625, |
|
"logps/rejected": -1760.222412109375, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.750277042388916, |
|
"rewards/margins": 8.782461166381836, |
|
"rewards/rejected": -15.532739639282227, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 1.456449740603291e-06, |
|
"logits/chosen": -1.803753137588501, |
|
"logits/rejected": -1.5564358234405518, |
|
"logps/chosen": -908.7633056640625, |
|
"logps/rejected": -1765.212890625, |
|
"loss": 0.1407, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -7.143259525299072, |
|
"rewards/margins": 8.473600387573242, |
|
"rewards/rejected": -15.616859436035156, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 3.015625, |
|
"learning_rate": 1.4352985020231421e-06, |
|
"logits/chosen": -1.7121673822402954, |
|
"logits/rejected": -1.417112112045288, |
|
"logps/chosen": -905.90087890625, |
|
"logps/rejected": -1849.311279296875, |
|
"loss": 0.1203, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.201010704040527, |
|
"rewards/margins": 9.321398735046387, |
|
"rewards/rejected": -16.52240753173828, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 12.0, |
|
"learning_rate": 1.4142399545837182e-06, |
|
"logits/chosen": -1.7247213125228882, |
|
"logits/rejected": -1.4465917348861694, |
|
"logps/chosen": -866.37548828125, |
|
"logps/rejected": -1694.348388671875, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.842932224273682, |
|
"rewards/margins": 8.215066909790039, |
|
"rewards/rejected": -15.058000564575195, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 8.0625, |
|
"learning_rate": 1.3932759316071184e-06, |
|
"logits/chosen": -1.718605399131775, |
|
"logits/rejected": -1.4183330535888672, |
|
"logps/chosen": -816.7738647460938, |
|
"logps/rejected": -1566.388427734375, |
|
"loss": 0.1356, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.286736488342285, |
|
"rewards/margins": 7.438478946685791, |
|
"rewards/rejected": -13.725214958190918, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 12.5625, |
|
"learning_rate": 1.3724082581862963e-06, |
|
"logits/chosen": -1.7261683940887451, |
|
"logits/rejected": -1.461607575416565, |
|
"logps/chosen": -756.42431640625, |
|
"logps/rejected": -1514.209716796875, |
|
"loss": 0.228, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.900428771972656, |
|
"rewards/margins": 7.4936652183532715, |
|
"rewards/rejected": -13.394094467163086, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 20.5, |
|
"learning_rate": 1.351638751026178e-06, |
|
"logits/chosen": -1.86214280128479, |
|
"logits/rejected": -1.4936145544052124, |
|
"logps/chosen": -812.3070678710938, |
|
"logps/rejected": -1635.2750244140625, |
|
"loss": 0.2018, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.023752212524414, |
|
"rewards/margins": 8.384181022644043, |
|
"rewards/rejected": -14.407933235168457, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 24.0, |
|
"learning_rate": 1.3309692182854933e-06, |
|
"logits/chosen": -1.8188070058822632, |
|
"logits/rejected": -1.4605897665023804, |
|
"logps/chosen": -798.3870239257812, |
|
"logps/rejected": -1644.860595703125, |
|
"loss": 0.1926, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.045955181121826, |
|
"rewards/margins": 8.516426086425781, |
|
"rewards/rejected": -14.56238079071045, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 1.8671875, |
|
"learning_rate": 1.3104014594193703e-06, |
|
"logits/chosen": -1.8970272541046143, |
|
"logits/rejected": -1.6189887523651123, |
|
"logps/chosen": -709.9751586914062, |
|
"logps/rejected": -1469.901611328125, |
|
"loss": 0.1165, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.162797451019287, |
|
"rewards/margins": 7.589346408843994, |
|
"rewards/rejected": -12.752143859863281, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 25.875, |
|
"learning_rate": 1.2899372650226688e-06, |
|
"logits/chosen": -1.897132158279419, |
|
"logits/rejected": -1.589751124382019, |
|
"logps/chosen": -739.879638671875, |
|
"logps/rejected": -1452.0478515625, |
|
"loss": 0.2064, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.397900104522705, |
|
"rewards/margins": 7.155561923980713, |
|
"rewards/rejected": -12.553461074829102, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_logits/chosen": -1.9722185134887695, |
|
"eval_logits/rejected": -1.853868842124939, |
|
"eval_logps/chosen": -780.031494140625, |
|
"eval_logps/rejected": -850.7296752929688, |
|
"eval_loss": 0.9616246223449707, |
|
"eval_rewards/accuracies": 0.5960000157356262, |
|
"eval_rewards/chosen": -4.948971748352051, |
|
"eval_rewards/margins": 0.925875186920166, |
|
"eval_rewards/rejected": -5.874846935272217, |
|
"eval_runtime": 781.7509, |
|
"eval_samples_per_second": 2.558, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.65625, |
|
"learning_rate": 1.269578416674101e-06, |
|
"logits/chosen": -1.8619873523712158, |
|
"logits/rejected": -1.5740466117858887, |
|
"logps/chosen": -758.4598999023438, |
|
"logps/rejected": -1500.849853515625, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.509732246398926, |
|
"rewards/margins": 7.478974342346191, |
|
"rewards/rejected": -12.98870849609375, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 2.703125, |
|
"learning_rate": 1.249326686781127e-06, |
|
"logits/chosen": -1.913484811782837, |
|
"logits/rejected": -1.5594704151153564, |
|
"logps/chosen": -777.2727661132812, |
|
"logps/rejected": -1519.972900390625, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.689510345458984, |
|
"rewards/margins": 7.493929386138916, |
|
"rewards/rejected": -13.183438301086426, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 11.75, |
|
"learning_rate": 1.2291838384256508e-06, |
|
"logits/chosen": -1.7811142206192017, |
|
"logits/rejected": -1.4538167715072632, |
|
"logps/chosen": -757.9189453125, |
|
"logps/rejected": -1458.7880859375, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.850201606750488, |
|
"rewards/margins": 6.9421892166137695, |
|
"rewards/rejected": -12.792390823364258, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 2.90625, |
|
"learning_rate": 1.2091516252105323e-06, |
|
"logits/chosen": -1.9148595333099365, |
|
"logits/rejected": -1.6675951480865479, |
|
"logps/chosen": -781.6110229492188, |
|
"logps/rejected": -1495.281982421875, |
|
"loss": 0.1428, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.9503889083862305, |
|
"rewards/margins": 7.140740394592285, |
|
"rewards/rejected": -13.091130256652832, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.7109375, |
|
"learning_rate": 1.1892317911069212e-06, |
|
"logits/chosen": -1.8337042331695557, |
|
"logits/rejected": -1.4769656658172607, |
|
"logps/chosen": -804.5179443359375, |
|
"logps/rejected": -1690.7152099609375, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.174851417541504, |
|
"rewards/margins": 8.737001419067383, |
|
"rewards/rejected": -14.911852836608887, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.546875, |
|
"learning_rate": 1.1694260703024266e-06, |
|
"logits/chosen": -1.903599739074707, |
|
"logits/rejected": -1.5903232097625732, |
|
"logps/chosen": -827.9391479492188, |
|
"logps/rejected": -1669.436767578125, |
|
"loss": 0.1213, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.192580699920654, |
|
"rewards/margins": 8.526972770690918, |
|
"rewards/rejected": -14.719552993774414, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 1.8203125, |
|
"learning_rate": 1.1497361870501425e-06, |
|
"logits/chosen": -1.8243070840835571, |
|
"logits/rejected": -1.5200848579406738, |
|
"logps/chosen": -765.5211181640625, |
|
"logps/rejected": -1657.3450927734375, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.842063903808594, |
|
"rewards/margins": 8.801877975463867, |
|
"rewards/rejected": -14.643940925598145, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 1.130163855518544e-06, |
|
"logits/chosen": -1.822270154953003, |
|
"logits/rejected": -1.5637589693069458, |
|
"logps/chosen": -796.2030029296875, |
|
"logps/rejected": -1690.8538818359375, |
|
"loss": 0.1248, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.09335470199585, |
|
"rewards/margins": 8.841546058654785, |
|
"rewards/rejected": -14.934901237487793, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 18.875, |
|
"learning_rate": 1.110710779642244e-06, |
|
"logits/chosen": -1.839624047279358, |
|
"logits/rejected": -1.576751708984375, |
|
"logps/chosen": -819.2501831054688, |
|
"logps/rejected": -1549.195556640625, |
|
"loss": 0.2897, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.27780818939209, |
|
"rewards/margins": 7.272715091705322, |
|
"rewards/rejected": -13.55052375793457, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 11.375, |
|
"learning_rate": 1.0913786529736558e-06, |
|
"logits/chosen": -1.802791953086853, |
|
"logits/rejected": -1.4809070825576782, |
|
"logps/chosen": -757.2066650390625, |
|
"logps/rejected": -1667.732421875, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.712438106536865, |
|
"rewards/margins": 9.024209976196289, |
|
"rewards/rejected": -14.736648559570312, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -1.9443422555923462, |
|
"eval_logits/rejected": -1.8262375593185425, |
|
"eval_logps/chosen": -845.3806762695312, |
|
"eval_logps/rejected": -925.70849609375, |
|
"eval_loss": 1.044278621673584, |
|
"eval_rewards/accuracies": 0.5924999713897705, |
|
"eval_rewards/chosen": -5.602463722229004, |
|
"eval_rewards/margins": 1.0221716165542603, |
|
"eval_rewards/rejected": -6.624634742736816, |
|
"eval_runtime": 780.7798, |
|
"eval_samples_per_second": 2.562, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 1.0721691585355618e-06, |
|
"logits/chosen": -1.7941747903823853, |
|
"logits/rejected": -1.5727746486663818, |
|
"logps/chosen": -802.1234130859375, |
|
"logps/rejected": -1506.57763671875, |
|
"loss": 0.1902, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.127458095550537, |
|
"rewards/margins": 6.992899417877197, |
|
"rewards/rejected": -13.12035846710205, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 10.875, |
|
"learning_rate": 1.0530839686745806e-06, |
|
"logits/chosen": -1.825495958328247, |
|
"logits/rejected": -1.555251121520996, |
|
"logps/chosen": -809.5203857421875, |
|
"logps/rejected": -1606.273193359375, |
|
"loss": 0.1539, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.172567367553711, |
|
"rewards/margins": 7.86652135848999, |
|
"rewards/rejected": -14.039088249206543, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.25, |
|
"learning_rate": 1.0341247449155824e-06, |
|
"logits/chosen": -1.8205457925796509, |
|
"logits/rejected": -1.512557864189148, |
|
"logps/chosen": -829.7819213867188, |
|
"logps/rejected": -1568.7506103515625, |
|
"loss": 0.1747, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.401388645172119, |
|
"rewards/margins": 7.3971991539001465, |
|
"rewards/rejected": -13.798588752746582, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.71875, |
|
"learning_rate": 1.0152931378170406e-06, |
|
"logits/chosen": -1.7705323696136475, |
|
"logits/rejected": -1.4258768558502197, |
|
"logps/chosen": -788.82373046875, |
|
"logps/rejected": -1534.7718505859375, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -6.10504674911499, |
|
"rewards/margins": 7.536930084228516, |
|
"rewards/rejected": -13.641977310180664, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.09375, |
|
"learning_rate": 9.96590786827333e-07, |
|
"logits/chosen": -1.8109601736068726, |
|
"logits/rejected": -1.5829339027404785, |
|
"logps/chosen": -827.0929565429688, |
|
"logps/rejected": -1609.902099609375, |
|
"loss": 0.1431, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.4153337478637695, |
|
"rewards/margins": 7.681593418121338, |
|
"rewards/rejected": -14.09692668914795, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 16.75, |
|
"learning_rate": 9.780193201420144e-07, |
|
"logits/chosen": -1.8415769338607788, |
|
"logits/rejected": -1.5490391254425049, |
|
"logps/chosen": -801.6826782226562, |
|
"logps/rejected": -1576.27587890625, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.982812404632568, |
|
"rewards/margins": 7.840212821960449, |
|
"rewards/rejected": -13.823025703430176, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.9375, |
|
"learning_rate": 9.59580354562072e-07, |
|
"logits/chosen": -1.8422110080718994, |
|
"logits/rejected": -1.512509822845459, |
|
"logps/chosen": -825.0670776367188, |
|
"logps/rejected": -1616.693115234375, |
|
"loss": 0.1553, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.282979965209961, |
|
"rewards/margins": 7.964785575866699, |
|
"rewards/rejected": -14.247766494750977, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 10.0, |
|
"learning_rate": 9.412754953531664e-07, |
|
"logits/chosen": -1.6988179683685303, |
|
"logits/rejected": -1.3882054090499878, |
|
"logps/chosen": -789.2442626953125, |
|
"logps/rejected": -1564.3502197265625, |
|
"loss": 0.1322, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.106675148010254, |
|
"rewards/margins": 7.6955246925354, |
|
"rewards/rejected": -13.802200317382812, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 9.231063361058806e-07, |
|
"logits/chosen": -1.7611534595489502, |
|
"logits/rejected": -1.5275523662567139, |
|
"logps/chosen": -850.1500854492188, |
|
"logps/rejected": -1655.7884521484375, |
|
"loss": 0.1367, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.542205810546875, |
|
"rewards/margins": 7.965720176696777, |
|
"rewards/rejected": -14.507925033569336, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.265625, |
|
"learning_rate": 9.050744585969834e-07, |
|
"logits/chosen": -1.7793487310409546, |
|
"logits/rejected": -1.5123307704925537, |
|
"logps/chosen": -768.626953125, |
|
"logps/rejected": -1476.6981201171875, |
|
"loss": 0.1229, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.841226577758789, |
|
"rewards/margins": 7.072574615478516, |
|
"rewards/rejected": -12.913800239562988, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"eval_logits/chosen": -1.9388341903686523, |
|
"eval_logits/rejected": -1.821057915687561, |
|
"eval_logps/chosen": -857.1437377929688, |
|
"eval_logps/rejected": -939.6195678710938, |
|
"eval_loss": 1.0562974214553833, |
|
"eval_rewards/accuracies": 0.5914999842643738, |
|
"eval_rewards/chosen": -5.720094680786133, |
|
"eval_rewards/margins": 1.0436511039733887, |
|
"eval_rewards/rejected": -6.763746738433838, |
|
"eval_runtime": 781.6527, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 10.5, |
|
"learning_rate": 8.871814326517264e-07, |
|
"logits/chosen": -1.7912660837173462, |
|
"logits/rejected": -1.4652750492095947, |
|
"logps/chosen": -845.84033203125, |
|
"logps/rejected": -1647.9117431640625, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.425032615661621, |
|
"rewards/margins": 8.037796974182129, |
|
"rewards/rejected": -14.46282958984375, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 16.375, |
|
"learning_rate": 8.694288160071746e-07, |
|
"logits/chosen": -1.7436532974243164, |
|
"logits/rejected": -1.343569040298462, |
|
"logps/chosen": -901.9475708007812, |
|
"logps/rejected": -1744.094970703125, |
|
"loss": 0.1745, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -7.057692050933838, |
|
"rewards/margins": 8.397699356079102, |
|
"rewards/rejected": -15.455390930175781, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 19.875, |
|
"learning_rate": 8.518181541765904e-07, |
|
"logits/chosen": -1.8024213314056396, |
|
"logits/rejected": -1.5170562267303467, |
|
"logps/chosen": -840.4650268554688, |
|
"logps/rejected": -1586.270263671875, |
|
"loss": 0.1989, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.36107063293457, |
|
"rewards/margins": 7.343822479248047, |
|
"rewards/rejected": -13.7048921585083, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 1.8515625, |
|
"learning_rate": 8.343509803148898e-07, |
|
"logits/chosen": -1.760271668434143, |
|
"logits/rejected": -1.4487478733062744, |
|
"logps/chosen": -829.8733520507812, |
|
"logps/rejected": -1621.2315673828125, |
|
"loss": 0.2415, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -6.347391605377197, |
|
"rewards/margins": 7.86527156829834, |
|
"rewards/rejected": -14.212663650512695, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 11.125, |
|
"learning_rate": 8.170288150851635e-07, |
|
"logits/chosen": -1.7877166271209717, |
|
"logits/rejected": -1.4713761806488037, |
|
"logps/chosen": -742.2184448242188, |
|
"logps/rejected": -1476.7142333984375, |
|
"loss": 0.1751, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.588148593902588, |
|
"rewards/margins": 7.356741905212402, |
|
"rewards/rejected": -12.944890022277832, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 7.998531665262907e-07, |
|
"logits/chosen": -1.813119888305664, |
|
"logits/rejected": -1.519991397857666, |
|
"logps/chosen": -784.0347900390625, |
|
"logps/rejected": -1572.8804931640625, |
|
"loss": 0.1347, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.94631290435791, |
|
"rewards/margins": 7.838288307189941, |
|
"rewards/rejected": -13.784601211547852, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 7.828255299216553e-07, |
|
"logits/chosen": -1.8793045282363892, |
|
"logits/rejected": -1.6276319026947021, |
|
"logps/chosen": -803.3926391601562, |
|
"logps/rejected": -1561.659423828125, |
|
"loss": 0.117, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.136702537536621, |
|
"rewards/margins": 7.4938812255859375, |
|
"rewards/rejected": -13.630582809448242, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 7.659473876689649e-07, |
|
"logits/chosen": -1.7960598468780518, |
|
"logits/rejected": -1.5569416284561157, |
|
"logps/chosen": -826.9943237304688, |
|
"logps/rejected": -1568.2489013671875, |
|
"loss": 0.1817, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.515174865722656, |
|
"rewards/margins": 7.30636739730835, |
|
"rewards/rejected": -13.821542739868164, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 11.375, |
|
"learning_rate": 7.492202091511986e-07, |
|
"logits/chosen": -1.8298485279083252, |
|
"logits/rejected": -1.5398129224777222, |
|
"logps/chosen": -814.5342407226562, |
|
"logps/rejected": -1584.274169921875, |
|
"loss": 0.2433, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.155816078186035, |
|
"rewards/margins": 7.666536808013916, |
|
"rewards/rejected": -13.822354316711426, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 7.326454506086844e-07, |
|
"logits/chosen": -1.836544394493103, |
|
"logits/rejected": -1.5596864223480225, |
|
"logps/chosen": -795.0130004882812, |
|
"logps/rejected": -1612.1021728515625, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.960467338562012, |
|
"rewards/margins": 8.215749740600586, |
|
"rewards/rejected": -14.176218032836914, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"eval_logits/chosen": -1.9442803859710693, |
|
"eval_logits/rejected": -1.8265658617019653, |
|
"eval_logps/chosen": -834.2926635742188, |
|
"eval_logps/rejected": -913.9391479492188, |
|
"eval_loss": 1.024413824081421, |
|
"eval_rewards/accuracies": 0.593999981880188, |
|
"eval_rewards/chosen": -5.491583824157715, |
|
"eval_rewards/margins": 1.015357494354248, |
|
"eval_rewards/rejected": -6.506941795349121, |
|
"eval_runtime": 781.3907, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 7.162245550123193e-07, |
|
"logits/chosen": -1.8039443492889404, |
|
"logits/rejected": -1.4971197843551636, |
|
"logps/chosen": -814.34326171875, |
|
"logps/rejected": -1535.5086669921875, |
|
"loss": 0.1568, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.270487308502197, |
|
"rewards/margins": 7.268198490142822, |
|
"rewards/rejected": -13.53868579864502, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 12.0, |
|
"learning_rate": 6.999589519379494e-07, |
|
"logits/chosen": -1.7954915761947632, |
|
"logits/rejected": -1.5133321285247803, |
|
"logps/chosen": -779.1148071289062, |
|
"logps/rejected": -1520.70703125, |
|
"loss": 0.1384, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.82504415512085, |
|
"rewards/margins": 7.360219478607178, |
|
"rewards/rejected": -13.185264587402344, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 8.5, |
|
"learning_rate": 6.83850057441913e-07, |
|
"logits/chosen": -1.8146121501922607, |
|
"logits/rejected": -1.5364322662353516, |
|
"logps/chosen": -776.1300659179688, |
|
"logps/rejected": -1522.8975830078125, |
|
"loss": 0.1867, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.84547758102417, |
|
"rewards/margins": 7.345385551452637, |
|
"rewards/rejected": -13.190862655639648, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 18.875, |
|
"learning_rate": 6.678992739377582e-07, |
|
"logits/chosen": -1.8299884796142578, |
|
"logits/rejected": -1.5080091953277588, |
|
"logps/chosen": -812.95654296875, |
|
"logps/rejected": -1622.835205078125, |
|
"loss": 0.1807, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.02605676651001, |
|
"rewards/margins": 8.1326265335083, |
|
"rewards/rejected": -14.158681869506836, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 55.5, |
|
"learning_rate": 6.521079900741542e-07, |
|
"logits/chosen": -1.767059087753296, |
|
"logits/rejected": -1.5588467121124268, |
|
"logps/chosen": -786.4676513671875, |
|
"logps/rejected": -1427.7884521484375, |
|
"loss": 0.2542, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.954463005065918, |
|
"rewards/margins": 6.433099269866943, |
|
"rewards/rejected": -12.38756275177002, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 0.78515625, |
|
"learning_rate": 6.364775806139975e-07, |
|
"logits/chosen": -1.793999433517456, |
|
"logits/rejected": -1.5091029405593872, |
|
"logps/chosen": -839.9996337890625, |
|
"logps/rejected": -1650.9144287109375, |
|
"loss": 0.1235, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.430754661560059, |
|
"rewards/margins": 8.038093566894531, |
|
"rewards/rejected": -14.468847274780273, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 6.21009406314727e-07, |
|
"logits/chosen": -1.8470089435577393, |
|
"logits/rejected": -1.5226188898086548, |
|
"logps/chosen": -806.4625244140625, |
|
"logps/rejected": -1513.0128173828125, |
|
"loss": 0.2682, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.04176139831543, |
|
"rewards/margins": 7.0099334716796875, |
|
"rewards/rejected": -13.0516939163208, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 6.057048138098567e-07, |
|
"logits/chosen": -1.8832355737686157, |
|
"logits/rejected": -1.579980731010437, |
|
"logps/chosen": -732.5565795898438, |
|
"logps/rejected": -1567.3011474609375, |
|
"loss": 0.1713, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.4515252113342285, |
|
"rewards/margins": 8.183982849121094, |
|
"rewards/rejected": -13.635507583618164, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 4.78125, |
|
"learning_rate": 5.90565135491743e-07, |
|
"logits/chosen": -1.8270089626312256, |
|
"logits/rejected": -1.6248699426651, |
|
"logps/chosen": -783.6031494140625, |
|
"logps/rejected": -1493.7191162109375, |
|
"loss": 0.1782, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -6.045929431915283, |
|
"rewards/margins": 7.007973670959473, |
|
"rewards/rejected": -13.053903579711914, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 13.375, |
|
"learning_rate": 5.755916893955887e-07, |
|
"logits/chosen": -1.8112058639526367, |
|
"logits/rejected": -1.626491904258728, |
|
"logps/chosen": -764.4779052734375, |
|
"logps/rejected": -1465.9271240234375, |
|
"loss": 0.2791, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.7741007804870605, |
|
"rewards/margins": 6.939056396484375, |
|
"rewards/rejected": -12.713155746459961, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_logits/chosen": -1.9510712623596191, |
|
"eval_logits/rejected": -1.8336251974105835, |
|
"eval_logps/chosen": -809.6419677734375, |
|
"eval_logps/rejected": -884.2896118164062, |
|
"eval_loss": 0.9939026832580566, |
|
"eval_rewards/accuracies": 0.593999981880188, |
|
"eval_rewards/chosen": -5.2450761795043945, |
|
"eval_rewards/margins": 0.9653691649436951, |
|
"eval_rewards/rejected": -6.210445880889893, |
|
"eval_runtime": 781.2328, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 5.607857790846932e-07, |
|
"logits/chosen": -1.7774875164031982, |
|
"logits/rejected": -1.4975415468215942, |
|
"logps/chosen": -744.4974365234375, |
|
"logps/rejected": -1493.6400146484375, |
|
"loss": 0.1508, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.606020450592041, |
|
"rewards/margins": 7.40170431137085, |
|
"rewards/rejected": -13.007725715637207, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 5.461486935369714e-07, |
|
"logits/chosen": -1.8177086114883423, |
|
"logits/rejected": -1.5261489152908325, |
|
"logps/chosen": -819.9765625, |
|
"logps/rejected": -1483.857666015625, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -6.2067670822143555, |
|
"rewards/margins": 6.750323295593262, |
|
"rewards/rejected": -12.957090377807617, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 9.5, |
|
"learning_rate": 5.316817070327354e-07, |
|
"logits/chosen": -1.816895842552185, |
|
"logits/rejected": -1.521397352218628, |
|
"logps/chosen": -768.5750732421875, |
|
"logps/rejected": -1489.7919921875, |
|
"loss": 0.2322, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.702691078186035, |
|
"rewards/margins": 7.254155158996582, |
|
"rewards/rejected": -12.956846237182617, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 15.6875, |
|
"learning_rate": 5.173860790437563e-07, |
|
"logits/chosen": -1.870107650756836, |
|
"logits/rejected": -1.59227454662323, |
|
"logps/chosen": -747.5250854492188, |
|
"logps/rejected": -1507.5074462890625, |
|
"loss": 0.1589, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.517577171325684, |
|
"rewards/margins": 7.614779472351074, |
|
"rewards/rejected": -13.132356643676758, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 2.84375, |
|
"learning_rate": 5.032630541236167e-07, |
|
"logits/chosen": -1.7885679006576538, |
|
"logits/rejected": -1.5312520265579224, |
|
"logps/chosen": -728.3582153320312, |
|
"logps/rejected": -1373.1923828125, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.529536724090576, |
|
"rewards/margins": 6.345106601715088, |
|
"rewards/rejected": -11.874643325805664, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 4.893138617993681e-07, |
|
"logits/chosen": -1.8757107257843018, |
|
"logits/rejected": -1.4865679740905762, |
|
"logps/chosen": -737.5164794921875, |
|
"logps/rejected": -1416.6087646484375, |
|
"loss": 0.1654, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.339116096496582, |
|
"rewards/margins": 6.8614068031311035, |
|
"rewards/rejected": -12.200522422790527, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 4.9375, |
|
"learning_rate": 4.755397164644812e-07, |
|
"logits/chosen": -1.8473145961761475, |
|
"logits/rejected": -1.6185519695281982, |
|
"logps/chosen": -758.03515625, |
|
"logps/rejected": -1406.828857421875, |
|
"loss": 0.1818, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.706026077270508, |
|
"rewards/margins": 6.3536272048950195, |
|
"rewards/rejected": -12.059653282165527, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 4.619418172731277e-07, |
|
"logits/chosen": -1.8655025959014893, |
|
"logits/rejected": -1.5346721410751343, |
|
"logps/chosen": -714.9515380859375, |
|
"logps/rejected": -1431.1217041015625, |
|
"loss": 0.1951, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.192868232727051, |
|
"rewards/margins": 7.240492820739746, |
|
"rewards/rejected": -12.433362007141113, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 4.4852134803578446e-07, |
|
"logits/chosen": -1.8112146854400635, |
|
"logits/rejected": -1.541689157485962, |
|
"logps/chosen": -722.6507568359375, |
|
"logps/rejected": -1418.703857421875, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.355412483215332, |
|
"rewards/margins": 6.820165157318115, |
|
"rewards/rejected": -12.175577163696289, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 25.0, |
|
"learning_rate": 4.3527947711617197e-07, |
|
"logits/chosen": -1.8729737997055054, |
|
"logits/rejected": -1.5761798620224, |
|
"logps/chosen": -731.1620483398438, |
|
"logps/rejected": -1336.569091796875, |
|
"loss": 0.2041, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.474403381347656, |
|
"rewards/margins": 6.022130489349365, |
|
"rewards/rejected": -11.496532440185547, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_logits/chosen": -1.9610016345977783, |
|
"eval_logits/rejected": -1.8434377908706665, |
|
"eval_logps/chosen": -785.0302734375, |
|
"eval_logps/rejected": -855.6673583984375, |
|
"eval_loss": 0.9620700478553772, |
|
"eval_rewards/accuracies": 0.5954999923706055, |
|
"eval_rewards/chosen": -4.998960018157959, |
|
"eval_rewards/margins": 0.9252643585205078, |
|
"eval_rewards/rejected": -5.924223899841309, |
|
"eval_runtime": 781.2922, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 24.125, |
|
"learning_rate": 4.2221735732953655e-07, |
|
"logits/chosen": -1.8799352645874023, |
|
"logits/rejected": -1.5558679103851318, |
|
"logps/chosen": -739.7000732421875, |
|
"logps/rejected": -1477.8525390625, |
|
"loss": 0.1092, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.51525354385376, |
|
"rewards/margins": 7.406874179840088, |
|
"rewards/rejected": -12.922128677368164, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 4.1875, |
|
"learning_rate": 4.0933612584229174e-07, |
|
"logits/chosen": -1.8271852731704712, |
|
"logits/rejected": -1.4540526866912842, |
|
"logps/chosen": -776.713134765625, |
|
"logps/rejected": -1541.1552734375, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.6901164054870605, |
|
"rewards/margins": 7.6587700843811035, |
|
"rewards/rejected": -13.348886489868164, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 13.3125, |
|
"learning_rate": 3.9663690407301644e-07, |
|
"logits/chosen": -1.8121604919433594, |
|
"logits/rejected": -1.4967972040176392, |
|
"logps/chosen": -767.4049682617188, |
|
"logps/rejected": -1499.587646484375, |
|
"loss": 0.2297, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -5.7149529457092285, |
|
"rewards/margins": 7.2757568359375, |
|
"rewards/rejected": -12.990710258483887, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 4.21875, |
|
"learning_rate": 3.841207975948255e-07, |
|
"logits/chosen": -1.8146421909332275, |
|
"logits/rejected": -1.556372880935669, |
|
"logps/chosen": -736.1464233398438, |
|
"logps/rejected": -1403.8233642578125, |
|
"loss": 0.1693, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.403313636779785, |
|
"rewards/margins": 6.708578586578369, |
|
"rewards/rejected": -12.111891746520996, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 3.717888960391222e-07, |
|
"logits/chosen": -1.8087650537490845, |
|
"logits/rejected": -1.6352970600128174, |
|
"logps/chosen": -742.0700073242188, |
|
"logps/rejected": -1389.765380859375, |
|
"loss": 0.132, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.522411346435547, |
|
"rewards/margins": 6.3892717361450195, |
|
"rewards/rejected": -11.911683082580566, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 18.5, |
|
"learning_rate": 3.5964227300073485e-07, |
|
"logits/chosen": -1.8568089008331299, |
|
"logits/rejected": -1.5052803754806519, |
|
"logps/chosen": -732.3570556640625, |
|
"logps/rejected": -1547.2830810546875, |
|
"loss": 0.1797, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.472035884857178, |
|
"rewards/margins": 8.027372360229492, |
|
"rewards/rejected": -13.499404907226562, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 22.25, |
|
"learning_rate": 3.4768198594445386e-07, |
|
"logits/chosen": -1.7955982685089111, |
|
"logits/rejected": -1.534495234489441, |
|
"logps/chosen": -768.7007446289062, |
|
"logps/rejected": -1410.9127197265625, |
|
"loss": 0.2282, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.612591743469238, |
|
"rewards/margins": 6.481339454650879, |
|
"rewards/rejected": -12.0939302444458, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.59375, |
|
"learning_rate": 3.359090761129671e-07, |
|
"logits/chosen": -1.8354012966156006, |
|
"logits/rejected": -1.6204370260238647, |
|
"logps/chosen": -778.430419921875, |
|
"logps/rejected": -1483.1397705078125, |
|
"loss": 0.1945, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.689809799194336, |
|
"rewards/margins": 6.9429473876953125, |
|
"rewards/rejected": -12.632757186889648, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 3.2432456843621454e-07, |
|
"logits/chosen": -1.8581463098526, |
|
"logits/rejected": -1.5767614841461182, |
|
"logps/chosen": -744.0443115234375, |
|
"logps/rejected": -1415.0477294921875, |
|
"loss": 0.1642, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.575232982635498, |
|
"rewards/margins": 6.698835849761963, |
|
"rewards/rejected": -12.274068832397461, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 2.53125, |
|
"learning_rate": 3.1292947144215795e-07, |
|
"logits/chosen": -1.9127811193466187, |
|
"logits/rejected": -1.5582940578460693, |
|
"logps/chosen": -683.0210571289062, |
|
"logps/rejected": -1300.957763671875, |
|
"loss": 0.1699, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.052832126617432, |
|
"rewards/margins": 6.252196311950684, |
|
"rewards/rejected": -11.305027961730957, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.9592163562774658, |
|
"eval_logits/rejected": -1.8420151472091675, |
|
"eval_logps/chosen": -793.1118774414062, |
|
"eval_logps/rejected": -864.7924194335938, |
|
"eval_loss": 0.9697643518447876, |
|
"eval_rewards/accuracies": 0.5950000286102295, |
|
"eval_rewards/chosen": -5.079776287078857, |
|
"eval_rewards/margins": 0.9356989860534668, |
|
"eval_rewards/rejected": -6.015475749969482, |
|
"eval_runtime": 781.2877, |
|
"eval_samples_per_second": 2.56, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 11.75, |
|
"learning_rate": 3.0172477716897936e-07, |
|
"logits/chosen": -1.882510781288147, |
|
"logits/rejected": -1.5755535364151, |
|
"logps/chosen": -773.5219116210938, |
|
"logps/rejected": -1535.2706298828125, |
|
"loss": 0.1173, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.749590873718262, |
|
"rewards/margins": 7.536416530609131, |
|
"rewards/rejected": -13.28600788116455, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 24.5, |
|
"learning_rate": 2.907114610787179e-07, |
|
"logits/chosen": -1.8907438516616821, |
|
"logits/rejected": -1.600659728050232, |
|
"logps/chosen": -763.7069091796875, |
|
"logps/rejected": -1600.47412109375, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.60447883605957, |
|
"rewards/margins": 8.27302360534668, |
|
"rewards/rejected": -13.877504348754883, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 2.798904819723453e-07, |
|
"logits/chosen": -1.8532946109771729, |
|
"logits/rejected": -1.6151609420776367, |
|
"logps/chosen": -724.9547119140625, |
|
"logps/rejected": -1497.78466796875, |
|
"loss": 0.1694, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.479042053222656, |
|
"rewards/margins": 7.565389156341553, |
|
"rewards/rejected": -13.044431686401367, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 3.171875, |
|
"learning_rate": 2.6926278190629624e-07, |
|
"logits/chosen": -1.8193203210830688, |
|
"logits/rejected": -1.5701786279678345, |
|
"logps/chosen": -761.4595947265625, |
|
"logps/rejected": -1493.7257080078125, |
|
"loss": 0.1443, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.704920768737793, |
|
"rewards/margins": 7.19381046295166, |
|
"rewards/rejected": -12.898730278015137, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 29.375, |
|
"learning_rate": 2.588292861104547e-07, |
|
"logits/chosen": -1.8176294565200806, |
|
"logits/rejected": -1.4913674592971802, |
|
"logps/chosen": -757.4988403320312, |
|
"logps/rejected": -1452.015869140625, |
|
"loss": 0.2572, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.766777038574219, |
|
"rewards/margins": 6.933912754058838, |
|
"rewards/rejected": -12.700689315795898, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 17.5, |
|
"learning_rate": 2.485909029076017e-07, |
|
"logits/chosen": -1.8745567798614502, |
|
"logits/rejected": -1.4693113565444946, |
|
"logps/chosen": -770.8389892578125, |
|
"logps/rejected": -1528.6536865234375, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.627139091491699, |
|
"rewards/margins": 7.68057107925415, |
|
"rewards/rejected": -13.307708740234375, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 7.71875, |
|
"learning_rate": 2.3854852363434294e-07, |
|
"logits/chosen": -1.790553092956543, |
|
"logits/rejected": -1.4430289268493652, |
|
"logps/chosen": -768.5995483398438, |
|
"logps/rejected": -1433.785888671875, |
|
"loss": 0.1821, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.625730991363525, |
|
"rewards/margins": 6.759538173675537, |
|
"rewards/rejected": -12.385269165039062, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 2.2870302256350702e-07, |
|
"logits/chosen": -1.8332099914550781, |
|
"logits/rejected": -1.5230542421340942, |
|
"logps/chosen": -758.5438842773438, |
|
"logps/rejected": -1465.334716796875, |
|
"loss": 0.2688, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.736861228942871, |
|
"rewards/margins": 7.083024024963379, |
|
"rewards/rejected": -12.819883346557617, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.375, |
|
"learning_rate": 2.1905525682803408e-07, |
|
"logits/chosen": -1.7580779790878296, |
|
"logits/rejected": -1.3955549001693726, |
|
"logps/chosen": -742.316162109375, |
|
"logps/rejected": -1486.322021484375, |
|
"loss": 0.12, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -5.4525275230407715, |
|
"rewards/margins": 7.50750732421875, |
|
"rewards/rejected": -12.960034370422363, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 7.84375, |
|
"learning_rate": 2.0960606634635366e-07, |
|
"logits/chosen": -1.7858350276947021, |
|
"logits/rejected": -1.522297739982605, |
|
"logps/chosen": -778.438720703125, |
|
"logps/rejected": -1517.631591796875, |
|
"loss": 0.1894, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.899752616882324, |
|
"rewards/margins": 7.459959506988525, |
|
"rewards/rejected": -13.359713554382324, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -1.9565173387527466, |
|
"eval_logits/rejected": -1.8393272161483765, |
|
"eval_logps/chosen": -796.8450927734375, |
|
"eval_logps/rejected": -869.2388916015625, |
|
"eval_loss": 0.9730798602104187, |
|
"eval_rewards/accuracies": 0.5975000262260437, |
|
"eval_rewards/chosen": -5.11710786819458, |
|
"eval_rewards/margins": 0.9428322315216064, |
|
"eval_rewards/rejected": -6.059940338134766, |
|
"eval_runtime": 784.0287, |
|
"eval_samples_per_second": 2.551, |
|
"eval_steps_per_second": 0.319, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.3125, |
|
"learning_rate": 2.003562737492676e-07, |
|
"logits/chosen": -1.7809603214263916, |
|
"logits/rejected": -1.4814460277557373, |
|
"logps/chosen": -758.2651977539062, |
|
"logps/rejected": -1489.6539306640625, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.66076135635376, |
|
"rewards/margins": 7.262644290924072, |
|
"rewards/rejected": -12.923406600952148, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 3.453125, |
|
"learning_rate": 1.913066843083264e-07, |
|
"logits/chosen": -1.8235909938812256, |
|
"logits/rejected": -1.6878944635391235, |
|
"logps/chosen": -703.0023193359375, |
|
"logps/rejected": -1377.5068359375, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.281097412109375, |
|
"rewards/margins": 6.587450981140137, |
|
"rewards/rejected": -11.868548393249512, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 1.8245808586572738e-07, |
|
"logits/chosen": -1.8138974905014038, |
|
"logits/rejected": -1.5821456909179688, |
|
"logps/chosen": -710.8995361328125, |
|
"logps/rejected": -1410.432373046875, |
|
"loss": 0.1257, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.360553741455078, |
|
"rewards/margins": 6.941224098205566, |
|
"rewards/rejected": -12.301777839660645, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 1.7381124876572757e-07, |
|
"logits/chosen": -1.9142138957977295, |
|
"logits/rejected": -1.6659278869628906, |
|
"logps/chosen": -739.8534545898438, |
|
"logps/rejected": -1462.5361328125, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.500533103942871, |
|
"rewards/margins": 7.069828987121582, |
|
"rewards/rejected": -12.570362091064453, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 1.6536692578757647e-07, |
|
"logits/chosen": -1.852924108505249, |
|
"logits/rejected": -1.5509072542190552, |
|
"logps/chosen": -708.359375, |
|
"logps/rejected": -1479.578857421875, |
|
"loss": 0.115, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.126400470733643, |
|
"rewards/margins": 7.5811262130737305, |
|
"rewards/rejected": -12.707525253295898, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 1.571258520799804e-07, |
|
"logits/chosen": -1.8212575912475586, |
|
"logits/rejected": -1.5021746158599854, |
|
"logps/chosen": -801.4302978515625, |
|
"logps/rejected": -1474.905029296875, |
|
"loss": 0.1442, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -6.002583980560303, |
|
"rewards/margins": 6.724352836608887, |
|
"rewards/rejected": -12.726938247680664, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 3.046875, |
|
"learning_rate": 1.490887450971032e-07, |
|
"logits/chosen": -1.8178669214248657, |
|
"logits/rejected": -1.5735647678375244, |
|
"logps/chosen": -782.1468505859375, |
|
"logps/rejected": -1470.4990234375, |
|
"loss": 0.2163, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -6.029472351074219, |
|
"rewards/margins": 6.646093845367432, |
|
"rewards/rejected": -12.675565719604492, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 4.4375, |
|
"learning_rate": 1.4125630453610539e-07, |
|
"logits/chosen": -1.8485219478607178, |
|
"logits/rejected": -1.6230590343475342, |
|
"logps/chosen": -785.03857421875, |
|
"logps/rejected": -1493.5316162109375, |
|
"loss": 0.1828, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -5.861344814300537, |
|
"rewards/margins": 7.066938877105713, |
|
"rewards/rejected": -12.928281784057617, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 10.625, |
|
"learning_rate": 1.336292122762281e-07, |
|
"logits/chosen": -1.7747573852539062, |
|
"logits/rejected": -1.5330942869186401, |
|
"logps/chosen": -760.6348266601562, |
|
"logps/rejected": -1411.752685546875, |
|
"loss": 0.1911, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.722105979919434, |
|
"rewards/margins": 6.500117301940918, |
|
"rewards/rejected": -12.222223281860352, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 8.25, |
|
"learning_rate": 1.2620813231943197e-07, |
|
"logits/chosen": -1.842785120010376, |
|
"logits/rejected": -1.5948874950408936, |
|
"logps/chosen": -769.0888061523438, |
|
"logps/rejected": -1497.182861328125, |
|
"loss": 0.1929, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.66414737701416, |
|
"rewards/margins": 7.302771091461182, |
|
"rewards/rejected": -12.966917037963867, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"eval_logits/chosen": -1.9565365314483643, |
|
"eval_logits/rejected": -1.8393818140029907, |
|
"eval_logps/chosen": -797.0282592773438, |
|
"eval_logps/rejected": -869.4041748046875, |
|
"eval_loss": 0.9733775854110718, |
|
"eval_rewards/accuracies": 0.5945000052452087, |
|
"eval_rewards/chosen": -5.118940353393555, |
|
"eval_rewards/margins": 0.9426524639129639, |
|
"eval_rewards/rejected": -6.061592102050781, |
|
"eval_runtime": 781.5982, |
|
"eval_samples_per_second": 2.559, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 15.4375, |
|
"learning_rate": 1.1899371073258947e-07, |
|
"logits/chosen": -1.8293758630752563, |
|
"logits/rejected": -1.5503754615783691, |
|
"logps/chosen": -763.3578491210938, |
|
"logps/rejected": -1477.820556640625, |
|
"loss": 0.1756, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -5.740475654602051, |
|
"rewards/margins": 7.120656490325928, |
|
"rewards/rejected": -12.861132621765137, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 2.390625, |
|
"learning_rate": 1.1198657559123887e-07, |
|
"logits/chosen": -1.8027288913726807, |
|
"logits/rejected": -1.5142316818237305, |
|
"logps/chosen": -748.8643188476562, |
|
"logps/rejected": -1528.968017578125, |
|
"loss": 0.1251, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.598520755767822, |
|
"rewards/margins": 7.6766767501831055, |
|
"rewards/rejected": -13.275197982788086, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 14.4375, |
|
"learning_rate": 1.0518733692490512e-07, |
|
"logits/chosen": -1.809152603149414, |
|
"logits/rejected": -1.4840996265411377, |
|
"logps/chosen": -713.273681640625, |
|
"logps/rejected": -1412.4957275390625, |
|
"loss": 0.1484, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.270025253295898, |
|
"rewards/margins": 6.962747097015381, |
|
"rewards/rejected": -12.232772827148438, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 15.25, |
|
"learning_rate": 9.859658666399291e-08, |
|
"logits/chosen": -1.877356767654419, |
|
"logits/rejected": -1.6048234701156616, |
|
"logps/chosen": -761.9014892578125, |
|
"logps/rejected": -1552.405517578125, |
|
"loss": 0.1827, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.749403476715088, |
|
"rewards/margins": 7.860726356506348, |
|
"rewards/rejected": -13.610130310058594, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 3.46875, |
|
"learning_rate": 9.221489858825289e-08, |
|
"logits/chosen": -1.8329734802246094, |
|
"logits/rejected": -1.5475187301635742, |
|
"logps/chosen": -776.5346069335938, |
|
"logps/rejected": -1564.7760009765625, |
|
"loss": 0.1415, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.798548698425293, |
|
"rewards/margins": 7.909842014312744, |
|
"rewards/rejected": -13.708391189575195, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 9.0625, |
|
"learning_rate": 8.604282827682942e-08, |
|
"logits/chosen": -1.8030303716659546, |
|
"logits/rejected": -1.5392242670059204, |
|
"logps/chosen": -753.5026245117188, |
|
"logps/rejected": -1457.5640869140625, |
|
"loss": 0.1391, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.680572509765625, |
|
"rewards/margins": 6.985505104064941, |
|
"rewards/rejected": -12.666077613830566, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.90625, |
|
"learning_rate": 8.008091305989313e-08, |
|
"logits/chosen": -1.8074871301651, |
|
"logits/rejected": -1.5014359951019287, |
|
"logps/chosen": -736.6246337890625, |
|
"logps/rejected": -1578.62548828125, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.956250011920929, |
|
"rewards/chosen": -5.4359283447265625, |
|
"rewards/margins": 8.487021446228027, |
|
"rewards/rejected": -13.922948837280273, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.375, |
|
"learning_rate": 7.432967197186225e-08, |
|
"logits/chosen": -1.8415800333023071, |
|
"logits/rejected": -1.548337697982788, |
|
"logps/chosen": -760.2332763671875, |
|
"logps/rejected": -1478.7425537109375, |
|
"loss": 0.1622, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.600264549255371, |
|
"rewards/margins": 7.201352119445801, |
|
"rewards/rejected": -12.801614761352539, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 7.875, |
|
"learning_rate": 6.878960570621568e-08, |
|
"logits/chosen": -1.849948525428772, |
|
"logits/rejected": -1.6014125347137451, |
|
"logps/chosen": -792.1825561523438, |
|
"logps/rejected": -1483.4019775390625, |
|
"loss": 0.1436, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.94309139251709, |
|
"rewards/margins": 6.779400825500488, |
|
"rewards/rejected": -12.722491264343262, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 6.346119657190396e-08, |
|
"logits/chosen": -1.8813422918319702, |
|
"logits/rejected": -1.5884922742843628, |
|
"logps/chosen": -810.8375854492188, |
|
"logps/rejected": -1503.427490234375, |
|
"loss": 0.1222, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.075570583343506, |
|
"rewards/margins": 6.977696418762207, |
|
"rewards/rejected": -13.053268432617188, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"eval_logits/chosen": -1.9550334215164185, |
|
"eval_logits/rejected": -1.837749719619751, |
|
"eval_logps/chosen": -797.5565185546875, |
|
"eval_logps/rejected": -869.9833984375, |
|
"eval_loss": 0.9741838574409485, |
|
"eval_rewards/accuracies": 0.5954999923706055, |
|
"eval_rewards/chosen": -5.124222755432129, |
|
"eval_rewards/margins": 0.9431625008583069, |
|
"eval_rewards/rejected": -6.067384719848633, |
|
"eval_runtime": 781.8275, |
|
"eval_samples_per_second": 2.558, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 11.6875, |
|
"learning_rate": 5.8344908451359315e-08, |
|
"logits/chosen": -1.798156976699829, |
|
"logits/rejected": -1.5139014720916748, |
|
"logps/chosen": -691.9673461914062, |
|
"logps/rejected": -1474.0509033203125, |
|
"loss": 0.1717, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.160942077636719, |
|
"rewards/margins": 7.799500942230225, |
|
"rewards/rejected": -12.960443496704102, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 18.0, |
|
"learning_rate": 5.344118676011173e-08, |
|
"logits/chosen": -1.8956835269927979, |
|
"logits/rejected": -1.5818126201629639, |
|
"logps/chosen": -761.4198608398438, |
|
"logps/rejected": -1536.6260986328125, |
|
"loss": 0.1966, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.5117411613464355, |
|
"rewards/margins": 7.931551933288574, |
|
"rewards/rejected": -13.443292617797852, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 20.25, |
|
"learning_rate": 4.875045840801257e-08, |
|
"logits/chosen": -1.9040454626083374, |
|
"logits/rejected": -1.6035842895507812, |
|
"logps/chosen": -798.6568603515625, |
|
"logps/rejected": -1530.2841796875, |
|
"loss": 0.2062, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.8380513191223145, |
|
"rewards/margins": 7.351933479309082, |
|
"rewards/rejected": -13.189984321594238, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 4.427313176206621e-08, |
|
"logits/chosen": -1.8618987798690796, |
|
"logits/rejected": -1.5344703197479248, |
|
"logps/chosen": -754.1892700195312, |
|
"logps/rejected": -1544.8671875, |
|
"loss": 0.1424, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.624054908752441, |
|
"rewards/margins": 7.836933135986328, |
|
"rewards/rejected": -13.46098804473877, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 4.000959661087961e-08, |
|
"logits/chosen": -1.8265072107315063, |
|
"logits/rejected": -1.576949119567871, |
|
"logps/chosen": -750.4592895507812, |
|
"logps/rejected": -1366.3184814453125, |
|
"loss": 0.2174, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.603542804718018, |
|
"rewards/margins": 6.079636573791504, |
|
"rewards/rejected": -11.68317985534668, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 3.596022413072886e-08, |
|
"logits/chosen": -1.8795325756072998, |
|
"logits/rejected": -1.6326515674591064, |
|
"logps/chosen": -711.9053955078125, |
|
"logps/rejected": -1451.708251953125, |
|
"loss": 0.1559, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.207059860229492, |
|
"rewards/margins": 7.259676456451416, |
|
"rewards/rejected": -12.46673583984375, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 2.953125, |
|
"learning_rate": 3.2125366853243964e-08, |
|
"logits/chosen": -1.8214938640594482, |
|
"logits/rejected": -1.5755712985992432, |
|
"logps/chosen": -723.0794067382812, |
|
"logps/rejected": -1359.31689453125, |
|
"loss": 0.2401, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -5.46134090423584, |
|
"rewards/margins": 6.4174017906188965, |
|
"rewards/rejected": -11.878742218017578, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.1875, |
|
"learning_rate": 2.8505358634718095e-08, |
|
"logits/chosen": -1.7878949642181396, |
|
"logits/rejected": -1.488386631011963, |
|
"logps/chosen": -745.4979858398438, |
|
"logps/rejected": -1443.983642578125, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -5.672155857086182, |
|
"rewards/margins": 6.9494218826293945, |
|
"rewards/rejected": -12.621576309204102, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 10.25, |
|
"learning_rate": 2.5100514627042773e-08, |
|
"logits/chosen": -1.8504598140716553, |
|
"logits/rejected": -1.5906519889831543, |
|
"logps/chosen": -742.5360107421875, |
|
"logps/rejected": -1424.46142578125, |
|
"loss": 0.137, |
|
"rewards/accuracies": 0.9437500238418579, |
|
"rewards/chosen": -5.449435234069824, |
|
"rewards/margins": 6.750896453857422, |
|
"rewards/rejected": -12.20033073425293, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 2.1911131250271778e-08, |
|
"logits/chosen": -1.8526302576065063, |
|
"logits/rejected": -1.6113744974136353, |
|
"logps/chosen": -719.0070190429688, |
|
"logps/rejected": -1461.58642578125, |
|
"loss": 0.1486, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.213129043579102, |
|
"rewards/margins": 7.295968532562256, |
|
"rewards/rejected": -12.5090970993042, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"eval_logits/chosen": -1.9550325870513916, |
|
"eval_logits/rejected": -1.8377885818481445, |
|
"eval_logps/chosen": -797.7341918945312, |
|
"eval_logps/rejected": -870.1893310546875, |
|
"eval_loss": 0.9740959405899048, |
|
"eval_rewards/accuracies": 0.5954999923706055, |
|
"eval_rewards/chosen": -5.125999450683594, |
|
"eval_rewards/margins": 0.9434443116188049, |
|
"eval_rewards/rejected": -6.0694427490234375, |
|
"eval_runtime": 780.393, |
|
"eval_samples_per_second": 2.563, |
|
"eval_steps_per_second": 0.32, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 1.8937486166814568e-08, |
|
"logits/chosen": -1.9209082126617432, |
|
"logits/rejected": -1.6933987140655518, |
|
"logps/chosen": -755.1649780273438, |
|
"logps/rejected": -1393.343994140625, |
|
"loss": 0.174, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.591456413269043, |
|
"rewards/margins": 6.302043437957764, |
|
"rewards/rejected": -11.893500328063965, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 8.6875, |
|
"learning_rate": 1.6179838257263935e-08, |
|
"logits/chosen": -1.8362079858779907, |
|
"logits/rejected": -1.5869410037994385, |
|
"logps/chosen": -773.01318359375, |
|
"logps/rejected": -1502.8729248046875, |
|
"loss": 0.2091, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.817551612854004, |
|
"rewards/margins": 7.409367561340332, |
|
"rewards/rejected": -13.22691822052002, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 5.0, |
|
"learning_rate": 1.363842759785794e-08, |
|
"logits/chosen": -1.849810004234314, |
|
"logits/rejected": -1.6076825857162476, |
|
"logps/chosen": -761.3151245117188, |
|
"logps/rejected": -1389.025146484375, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.615696907043457, |
|
"rewards/margins": 6.342384338378906, |
|
"rewards/rejected": -11.958080291748047, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 23.75, |
|
"learning_rate": 1.1313475439580225e-08, |
|
"logits/chosen": -1.8430496454238892, |
|
"logits/rejected": -1.4863206148147583, |
|
"logps/chosen": -747.9796752929688, |
|
"logps/rejected": -1544.6732177734375, |
|
"loss": 0.1458, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.6696696281433105, |
|
"rewards/margins": 7.953704833984375, |
|
"rewards/rejected": -13.623373031616211, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 16.75, |
|
"learning_rate": 9.205184188895988e-09, |
|
"logits/chosen": -1.8420413732528687, |
|
"logits/rejected": -1.5764353275299072, |
|
"logps/chosen": -797.4532470703125, |
|
"logps/rejected": -1453.536865234375, |
|
"loss": 0.1949, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.8908371925354, |
|
"rewards/margins": 6.618434906005859, |
|
"rewards/rejected": -12.509271621704102, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 4.8125, |
|
"learning_rate": 7.313737390133857e-09, |
|
"logits/chosen": -1.8078467845916748, |
|
"logits/rejected": -1.4992420673370361, |
|
"logps/chosen": -765.2530517578125, |
|
"logps/rejected": -1426.178955078125, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -5.691425323486328, |
|
"rewards/margins": 6.622584342956543, |
|
"rewards/rejected": -12.314008712768555, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 6.34375, |
|
"learning_rate": 5.6392997095047756e-09, |
|
"logits/chosen": -1.7872775793075562, |
|
"logits/rejected": -1.508326768875122, |
|
"logps/chosen": -758.366943359375, |
|
"logps/rejected": -1513.539794921875, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.68167781829834, |
|
"rewards/margins": 7.58060359954834, |
|
"rewards/rejected": -13.26228141784668, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 2.640625, |
|
"learning_rate": 4.182016920766529e-09, |
|
"logits/chosen": -1.8777058124542236, |
|
"logits/rejected": -1.6157386302947998, |
|
"logps/chosen": -753.9669799804688, |
|
"logps/rejected": -1563.1689453125, |
|
"loss": 0.1037, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.5306901931762695, |
|
"rewards/margins": 7.934181213378906, |
|
"rewards/rejected": -13.464871406555176, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 2.859375, |
|
"learning_rate": 2.942015892534178e-09, |
|
"logits/chosen": -1.7432610988616943, |
|
"logits/rejected": -1.4490609169006348, |
|
"logps/chosen": -728.8178100585938, |
|
"logps/rejected": -1425.911376953125, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.359938621520996, |
|
"rewards/margins": 7.0258660316467285, |
|
"rewards/rejected": -12.385805130004883, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.75, |
|
"learning_rate": 1.9194045772336077e-09, |
|
"logits/chosen": -1.8491967916488647, |
|
"logits/rejected": -1.5299230813980103, |
|
"logps/chosen": -698.197998046875, |
|
"logps/rejected": -1445.8939208984375, |
|
"loss": 0.1384, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.197235584259033, |
|
"rewards/margins": 7.472482204437256, |
|
"rewards/rejected": -12.669717788696289, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_logits/chosen": -1.9538819789886475, |
|
"eval_logits/rejected": -1.8366447687149048, |
|
"eval_logps/chosen": -797.7002563476562, |
|
"eval_logps/rejected": -870.1065063476562, |
|
"eval_loss": 0.974000096321106, |
|
"eval_rewards/accuracies": 0.5950000286102295, |
|
"eval_rewards/chosen": -5.125659465789795, |
|
"eval_rewards/margins": 0.9429554343223572, |
|
"eval_rewards/rejected": -6.068615436553955, |
|
"eval_runtime": 786.1332, |
|
"eval_samples_per_second": 2.544, |
|
"eval_steps_per_second": 0.318, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.625, |
|
"learning_rate": 1.1142720017040532e-09, |
|
"logits/chosen": -1.829294204711914, |
|
"logits/rejected": -1.6115293502807617, |
|
"logps/chosen": -751.1514892578125, |
|
"logps/rejected": -1485.13427734375, |
|
"loss": 0.2801, |
|
"rewards/accuracies": 0.918749988079071, |
|
"rewards/chosen": -5.69688081741333, |
|
"rewards/margins": 7.314256191253662, |
|
"rewards/rejected": -13.011137008666992, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 9.125, |
|
"learning_rate": 5.266882594481826e-10, |
|
"logits/chosen": -1.890520691871643, |
|
"logits/rejected": -1.6448471546173096, |
|
"logps/chosen": -768.3478393554688, |
|
"logps/rejected": -1508.867919921875, |
|
"loss": 0.1475, |
|
"rewards/accuracies": 0.9312499761581421, |
|
"rewards/chosen": -5.754606246948242, |
|
"rewards/margins": 7.364068031311035, |
|
"rewards/rejected": -13.118673324584961, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 1.5670450452892617e-10, |
|
"logits/chosen": -1.957965612411499, |
|
"logits/rejected": -1.7082374095916748, |
|
"logps/chosen": -737.3411865234375, |
|
"logps/rejected": -1421.47607421875, |
|
"loss": 0.1594, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -5.499425411224365, |
|
"rewards/margins": 6.8082733154296875, |
|
"rewards/rejected": -12.307699203491211, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 4.3529471158154646e-12, |
|
"logits/chosen": -1.8396613597869873, |
|
"logits/rejected": -1.5516573190689087, |
|
"logps/chosen": -753.6124267578125, |
|
"logps/rejected": -1473.0452880859375, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -5.577627658843994, |
|
"rewards/margins": 7.1511077880859375, |
|
"rewards/rejected": -12.728734016418457, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 3742, |
|
"total_flos": 0.0, |
|
"train_loss": 0.23287739991377154, |
|
"train_runtime": 68266.2327, |
|
"train_samples_per_second": 0.877, |
|
"train_steps_per_second": 0.055 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 3742, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|