|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 100, |
|
"global_step": 1065, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.601457471427555, |
|
"learning_rate": 4.672897196261682e-08, |
|
"logits/chosen": -2.861618995666504, |
|
"logits/rejected": -2.8205904960632324, |
|
"logps/chosen": -271.06011962890625, |
|
"logps/rejected": -211.1704559326172, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 9.383478018784075, |
|
"learning_rate": 4.6728971962616824e-07, |
|
"logits/chosen": -2.834562063217163, |
|
"logits/rejected": -2.7922489643096924, |
|
"logps/chosen": -325.0357360839844, |
|
"logps/rejected": -274.966796875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.00014581691357307136, |
|
"rewards/margins": 0.0001575500500621274, |
|
"rewards/margins_max": 0.0024408893659710884, |
|
"rewards/margins_min": -0.002742145210504532, |
|
"rewards/margins_std": 0.0023130779154598713, |
|
"rewards/rejected": -1.173312557511963e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.8412658637892019, |
|
"learning_rate": 9.345794392523365e-07, |
|
"logits/chosen": -2.7256200313568115, |
|
"logits/rejected": -2.707315444946289, |
|
"logps/chosen": -293.6407775878906, |
|
"logps/rejected": -215.7820281982422, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0018517475109547377, |
|
"rewards/margins": 0.0018822858110070229, |
|
"rewards/margins_max": 0.005471331533044577, |
|
"rewards/margins_min": -0.0010383042972534895, |
|
"rewards/margins_std": 0.002963448641821742, |
|
"rewards/rejected": -3.053832188015804e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 2.174968684179302, |
|
"learning_rate": 1.4018691588785047e-06, |
|
"logits/chosen": -2.8197181224823, |
|
"logits/rejected": -2.7506394386291504, |
|
"logps/chosen": -302.8995666503906, |
|
"logps/rejected": -232.47256469726562, |
|
"loss": 0.6888, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008063090965151787, |
|
"rewards/margins": 0.007646501995623112, |
|
"rewards/margins_max": 0.015395646914839745, |
|
"rewards/margins_min": 0.0007923411321826279, |
|
"rewards/margins_std": 0.006716990377753973, |
|
"rewards/rejected": 0.0004165889695286751, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 1.7099389772513702, |
|
"learning_rate": 1.869158878504673e-06, |
|
"logits/chosen": -2.8403024673461914, |
|
"logits/rejected": -2.759880781173706, |
|
"logps/chosen": -275.9002380371094, |
|
"logps/rejected": -225.5954132080078, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.01584392786026001, |
|
"rewards/margins": 0.014450883492827415, |
|
"rewards/margins_max": 0.03173653036355972, |
|
"rewards/margins_min": -2.6600435376167297e-05, |
|
"rewards/margins_std": 0.014551711268723011, |
|
"rewards/rejected": 0.0013930455315858126, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 2.1338277224043574, |
|
"learning_rate": 2.3364485981308413e-06, |
|
"logits/chosen": -2.8058629035949707, |
|
"logits/rejected": -2.734032154083252, |
|
"logps/chosen": -271.67120361328125, |
|
"logps/rejected": -233.6707305908203, |
|
"loss": 0.6753, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.038989000022411346, |
|
"rewards/margins": 0.036923374980688095, |
|
"rewards/margins_max": 0.08067025989294052, |
|
"rewards/margins_min": 0.006618264131247997, |
|
"rewards/margins_std": 0.03399632126092911, |
|
"rewards/rejected": 0.0020656271371990442, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 2.3538977095192313, |
|
"learning_rate": 2.8037383177570094e-06, |
|
"logits/chosen": -2.739483594894409, |
|
"logits/rejected": -2.7014524936676025, |
|
"logps/chosen": -306.43206787109375, |
|
"logps/rejected": -262.4384460449219, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.0720754936337471, |
|
"rewards/margins": 0.06874484568834305, |
|
"rewards/margins_max": 0.12744362652301788, |
|
"rewards/margins_min": 0.017528068274259567, |
|
"rewards/margins_std": 0.04889371618628502, |
|
"rewards/rejected": 0.0033306567929685116, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 1.673361144474326, |
|
"learning_rate": 3.2710280373831774e-06, |
|
"logits/chosen": -2.761547565460205, |
|
"logits/rejected": -2.701035976409912, |
|
"logps/chosen": -312.3368225097656, |
|
"logps/rejected": -234.6005401611328, |
|
"loss": 0.6461, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1052437424659729, |
|
"rewards/margins": 0.09483315050601959, |
|
"rewards/margins_max": 0.19849452376365662, |
|
"rewards/margins_min": 0.015507131814956665, |
|
"rewards/margins_std": 0.08316393196582794, |
|
"rewards/rejected": 0.010410590097308159, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.8350886553726478, |
|
"learning_rate": 3.738317757009346e-06, |
|
"logits/chosen": -2.7897353172302246, |
|
"logits/rejected": -2.7348127365112305, |
|
"logps/chosen": -310.0438537597656, |
|
"logps/rejected": -290.1259765625, |
|
"loss": 0.6264, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.11226633936166763, |
|
"rewards/margins": 0.14973895251750946, |
|
"rewards/margins_max": 0.30203038454055786, |
|
"rewards/margins_min": 0.01934988982975483, |
|
"rewards/margins_std": 0.13135038316249847, |
|
"rewards/rejected": -0.03747261315584183, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 2.370057132370328, |
|
"learning_rate": 4.205607476635514e-06, |
|
"logits/chosen": -2.6879115104675293, |
|
"logits/rejected": -2.650247812271118, |
|
"logps/chosen": -264.0439453125, |
|
"logps/rejected": -208.5765380859375, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.11950834840536118, |
|
"rewards/margins": 0.21540161967277527, |
|
"rewards/margins_max": 0.40502986311912537, |
|
"rewards/margins_min": 0.061323970556259155, |
|
"rewards/margins_std": 0.15978315472602844, |
|
"rewards/rejected": -0.09589327871799469, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 2.3715260848384814, |
|
"learning_rate": 4.6728971962616825e-06, |
|
"logits/chosen": -2.6909117698669434, |
|
"logits/rejected": -2.6588971614837646, |
|
"logps/chosen": -273.89483642578125, |
|
"logps/rejected": -280.07440185546875, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.07516907155513763, |
|
"rewards/margins": 0.25403863191604614, |
|
"rewards/margins_max": 0.5021854639053345, |
|
"rewards/margins_min": 0.0338195376098156, |
|
"rewards/margins_std": 0.20746219158172607, |
|
"rewards/rejected": -0.1788695752620697, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_logits/chosen": -2.6587636470794678, |
|
"eval_logits/rejected": -2.624938726425171, |
|
"eval_logps/chosen": -294.36553955078125, |
|
"eval_logps/rejected": -276.0350341796875, |
|
"eval_loss": 0.6725258231163025, |
|
"eval_rewards/accuracies": 0.6029999852180481, |
|
"eval_rewards/chosen": -0.09772102534770966, |
|
"eval_rewards/margins": 0.07684005051851273, |
|
"eval_rewards/margins_max": 0.4634929597377777, |
|
"eval_rewards/margins_min": -0.27960655093193054, |
|
"eval_rewards/margins_std": 0.25082939863204956, |
|
"eval_rewards/rejected": -0.17456106841564178, |
|
"eval_runtime": 429.6888, |
|
"eval_samples_per_second": 4.655, |
|
"eval_steps_per_second": 0.291, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 4.663865383973278, |
|
"learning_rate": 4.999879018839288e-06, |
|
"logits/chosen": -2.696274518966675, |
|
"logits/rejected": -2.6191954612731934, |
|
"logps/chosen": -361.00341796875, |
|
"logps/rejected": -324.7152404785156, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.1360231339931488, |
|
"rewards/margins": 0.5188378095626831, |
|
"rewards/margins_max": 0.8877674341201782, |
|
"rewards/margins_min": 0.15628832578659058, |
|
"rewards/margins_std": 0.33103400468826294, |
|
"rewards/rejected": -0.3828147053718567, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 2.659078012596696, |
|
"learning_rate": 4.99772856836941e-06, |
|
"logits/chosen": -2.6332004070281982, |
|
"logits/rejected": -2.58402681350708, |
|
"logps/chosen": -338.8200988769531, |
|
"logps/rejected": -314.74078369140625, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.09566140174865723, |
|
"rewards/margins": 0.5811273455619812, |
|
"rewards/margins_max": 1.0773193836212158, |
|
"rewards/margins_min": 0.19689173996448517, |
|
"rewards/margins_std": 0.4066368043422699, |
|
"rewards/rejected": -0.4854659140110016, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 5.638039796957378, |
|
"learning_rate": 4.992892309373227e-06, |
|
"logits/chosen": -2.5800509452819824, |
|
"logits/rejected": -2.5182909965515137, |
|
"logps/chosen": -377.07415771484375, |
|
"logps/rejected": -370.76007080078125, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.05290098860859871, |
|
"rewards/margins": 0.7561925649642944, |
|
"rewards/margins_max": 1.318340539932251, |
|
"rewards/margins_min": 0.10839029401540756, |
|
"rewards/margins_std": 0.5403656363487244, |
|
"rewards/rejected": -0.7032915949821472, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 3.185506159687688, |
|
"learning_rate": 4.985375442281969e-06, |
|
"logits/chosen": -2.529670476913452, |
|
"logits/rejected": -2.505495548248291, |
|
"logps/chosen": -311.046875, |
|
"logps/rejected": -341.42388916015625, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.07367168366909027, |
|
"rewards/margins": 0.7894155383110046, |
|
"rewards/margins_max": 1.4857099056243896, |
|
"rewards/margins_min": 0.17245283722877502, |
|
"rewards/margins_std": 0.6018984317779541, |
|
"rewards/rejected": -0.715743899345398, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 6.522701528001161, |
|
"learning_rate": 4.9751860499858175e-06, |
|
"logits/chosen": -2.501380443572998, |
|
"logits/rejected": -2.4765429496765137, |
|
"logps/chosen": -295.21844482421875, |
|
"logps/rejected": -294.5282897949219, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.0653342604637146, |
|
"rewards/margins": 0.7491210699081421, |
|
"rewards/margins_max": 1.2866442203521729, |
|
"rewards/margins_min": 0.1819653958082199, |
|
"rewards/margins_std": 0.5079216957092285, |
|
"rewards/rejected": -0.8144553303718567, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 7.099952708342032, |
|
"learning_rate": 4.962335089142376e-06, |
|
"logits/chosen": -2.4243741035461426, |
|
"logits/rejected": -2.382873058319092, |
|
"logps/chosen": -311.75506591796875, |
|
"logps/rejected": -337.52227783203125, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.11208178848028183, |
|
"rewards/margins": 0.919207751750946, |
|
"rewards/margins_max": 1.5249192714691162, |
|
"rewards/margins_min": 0.28068000078201294, |
|
"rewards/margins_std": 0.5570467710494995, |
|
"rewards/rejected": -1.0312894582748413, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 15.17640060673072, |
|
"learning_rate": 4.946836378394967e-06, |
|
"logits/chosen": -2.3504722118377686, |
|
"logits/rejected": -2.3078646659851074, |
|
"logps/chosen": -345.75726318359375, |
|
"logps/rejected": -430.4729919433594, |
|
"loss": 0.3207, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16135653853416443, |
|
"rewards/margins": 1.1807162761688232, |
|
"rewards/margins_max": 1.7726972103118896, |
|
"rewards/margins_min": 0.30320629477500916, |
|
"rewards/margins_std": 0.6691843867301941, |
|
"rewards/rejected": -1.34207284450531, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 8.646835771533034, |
|
"learning_rate": 4.928706583513441e-06, |
|
"logits/chosen": -2.1459343433380127, |
|
"logits/rejected": -2.055025577545166, |
|
"logps/chosen": -378.0511779785156, |
|
"logps/rejected": -468.014404296875, |
|
"loss": 0.3002, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -0.46150344610214233, |
|
"rewards/margins": 1.2767913341522217, |
|
"rewards/margins_max": 2.0464911460876465, |
|
"rewards/margins_min": 0.511903703212738, |
|
"rewards/margins_std": 0.6761992573738098, |
|
"rewards/rejected": -1.7382948398590088, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 4.978015250452758, |
|
"learning_rate": 4.907965199473471e-06, |
|
"logits/chosen": -1.873817801475525, |
|
"logits/rejected": -1.7417463064193726, |
|
"logps/chosen": -362.2750549316406, |
|
"logps/rejected": -456.6219787597656, |
|
"loss": 0.2276, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.40314167737960815, |
|
"rewards/margins": 1.7882015705108643, |
|
"rewards/margins_max": 2.7738163471221924, |
|
"rewards/margins_min": 0.8575057983398438, |
|
"rewards/margins_std": 0.8512203097343445, |
|
"rewards/rejected": -2.191343069076538, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.452666973020474, |
|
"learning_rate": 4.884634529493591e-06, |
|
"logits/chosen": -1.8183701038360596, |
|
"logits/rejected": -1.7065311670303345, |
|
"logps/chosen": -416.6236877441406, |
|
"logps/rejected": -549.5675048828125, |
|
"loss": 0.2267, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.7709085941314697, |
|
"rewards/margins": 2.068791627883911, |
|
"rewards/margins_max": 3.4109108448028564, |
|
"rewards/margins_min": 0.585421621799469, |
|
"rewards/margins_std": 1.2988938093185425, |
|
"rewards/rejected": -2.839700222015381, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"eval_logits/chosen": -1.6714030504226685, |
|
"eval_logits/rejected": -1.6187551021575928, |
|
"eval_logps/chosen": -474.511962890625, |
|
"eval_logps/rejected": -497.81463623046875, |
|
"eval_loss": 0.7397594451904297, |
|
"eval_rewards/accuracies": 0.6439999938011169, |
|
"eval_rewards/chosen": -1.899185299873352, |
|
"eval_rewards/margins": 0.49317169189453125, |
|
"eval_rewards/margins_max": 2.671410083770752, |
|
"eval_rewards/margins_min": -1.8999947309494019, |
|
"eval_rewards/margins_std": 1.5475962162017822, |
|
"eval_rewards/rejected": -2.392357110977173, |
|
"eval_runtime": 429.7827, |
|
"eval_samples_per_second": 4.654, |
|
"eval_steps_per_second": 0.291, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 8.87270228770415, |
|
"learning_rate": 4.858739661052539e-06, |
|
"logits/chosen": -1.511608600616455, |
|
"logits/rejected": -1.4413245916366577, |
|
"logps/chosen": -427.55413818359375, |
|
"logps/rejected": -620.9583740234375, |
|
"loss": 0.1779, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.0736979246139526, |
|
"rewards/margins": 2.571638822555542, |
|
"rewards/margins_max": 4.121321678161621, |
|
"rewards/margins_min": 0.6724111437797546, |
|
"rewards/margins_std": 1.547525405883789, |
|
"rewards/rejected": -3.645336866378784, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 6.5430570772956, |
|
"learning_rate": 4.830308438912687e-06, |
|
"logits/chosen": -1.3631094694137573, |
|
"logits/rejected": -1.1896626949310303, |
|
"logps/chosen": -610.7598876953125, |
|
"logps/rejected": -881.2283325195312, |
|
"loss": 0.1472, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5730366706848145, |
|
"rewards/margins": 3.4959709644317627, |
|
"rewards/margins_max": 5.009349822998047, |
|
"rewards/margins_min": 1.5561037063598633, |
|
"rewards/margins_std": 1.543906331062317, |
|
"rewards/rejected": -6.069007396697998, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 11.567738598963295, |
|
"learning_rate": 4.799371435178544e-06, |
|
"logits/chosen": -1.2935478687286377, |
|
"logits/rejected": -1.1057153940200806, |
|
"logps/chosen": -756.6351318359375, |
|
"logps/rejected": -983.3760986328125, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.8495945930480957, |
|
"rewards/margins": 3.363232135772705, |
|
"rewards/margins_max": 5.4596266746521, |
|
"rewards/margins_min": 0.4015835225582123, |
|
"rewards/margins_std": 2.3402669429779053, |
|
"rewards/rejected": -7.212827205657959, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 14.908052027638925, |
|
"learning_rate": 4.765961916422575e-06, |
|
"logits/chosen": -1.3409693241119385, |
|
"logits/rejected": -1.2054760456085205, |
|
"logps/chosen": -675.9885864257812, |
|
"logps/rejected": -992.09375, |
|
"loss": 0.1872, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -3.511915683746338, |
|
"rewards/margins": 3.5772738456726074, |
|
"rewards/margins_max": 5.6575751304626465, |
|
"rewards/margins_min": 1.1669104099273682, |
|
"rewards/margins_std": 2.040917158126831, |
|
"rewards/rejected": -7.089189052581787, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.045837659115827, |
|
"learning_rate": 4.730115807913627e-06, |
|
"logits/chosen": -1.4189417362213135, |
|
"logits/rejected": -1.2720701694488525, |
|
"logps/chosen": -674.1248779296875, |
|
"logps/rejected": -974.5089721679688, |
|
"loss": 0.1161, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -3.62843656539917, |
|
"rewards/margins": 3.6062092781066895, |
|
"rewards/margins_max": 5.835866451263428, |
|
"rewards/margins_min": 1.5058424472808838, |
|
"rewards/margins_std": 1.905207633972168, |
|
"rewards/rejected": -7.234647274017334, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 9.416665631409534, |
|
"learning_rate": 4.691871654986485e-06, |
|
"logits/chosen": -1.5399147272109985, |
|
"logits/rejected": -1.3777363300323486, |
|
"logps/chosen": -710.0699462890625, |
|
"logps/rejected": -1064.373779296875, |
|
"loss": 0.1185, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.1797637939453125, |
|
"rewards/margins": 3.8746650218963623, |
|
"rewards/margins_max": 5.889615058898926, |
|
"rewards/margins_min": 1.7422330379486084, |
|
"rewards/margins_std": 1.8929340839385986, |
|
"rewards/rejected": -8.054429054260254, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 56.620770226956026, |
|
"learning_rate": 4.651270581594054e-06, |
|
"logits/chosen": -1.5505702495574951, |
|
"logits/rejected": -1.439883828163147, |
|
"logps/chosen": -655.2439575195312, |
|
"logps/rejected": -985.9658203125, |
|
"loss": 0.2278, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -3.4612839221954346, |
|
"rewards/margins": 3.773378372192383, |
|
"rewards/margins_max": 5.983767509460449, |
|
"rewards/margins_min": 1.2523890733718872, |
|
"rewards/margins_std": 2.149752378463745, |
|
"rewards/rejected": -7.234662055969238, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 9.941259668614844, |
|
"learning_rate": 4.6083562460867545e-06, |
|
"logits/chosen": -1.4796500205993652, |
|
"logits/rejected": -1.3813179731369019, |
|
"logps/chosen": -780.708984375, |
|
"logps/rejected": -1187.9755859375, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.772520542144775, |
|
"rewards/margins": 4.3812642097473145, |
|
"rewards/margins_max": 6.6738691329956055, |
|
"rewards/margins_min": 1.5353296995162964, |
|
"rewards/margins_std": 2.349224805831909, |
|
"rewards/rejected": -9.153783798217773, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 15.917323127244398, |
|
"learning_rate": 4.563174794266684e-06, |
|
"logits/chosen": -1.5392366647720337, |
|
"logits/rejected": -1.4464019536972046, |
|
"logps/chosen": -692.5883178710938, |
|
"logps/rejected": -963.4357299804688, |
|
"loss": 0.2109, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -4.056720733642578, |
|
"rewards/margins": 2.9374544620513916, |
|
"rewards/margins_max": 5.395993232727051, |
|
"rewards/margins_min": 0.5851330161094666, |
|
"rewards/margins_std": 2.2416446208953857, |
|
"rewards/rejected": -6.994175910949707, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 11.476540562223757, |
|
"learning_rate": 4.5157748097670125e-06, |
|
"logits/chosen": -1.5950560569763184, |
|
"logits/rejected": -1.4536263942718506, |
|
"logps/chosen": -938.9279174804688, |
|
"logps/rejected": -1296.3175048828125, |
|
"loss": 0.1011, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.9241485595703125, |
|
"rewards/margins": 4.1446919441223145, |
|
"rewards/margins_max": 6.245351314544678, |
|
"rewards/margins_min": 1.5497524738311768, |
|
"rewards/margins_std": 2.1239330768585205, |
|
"rewards/rejected": -10.068840026855469, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"eval_logits/chosen": -1.527121663093567, |
|
"eval_logits/rejected": -1.4628735780715942, |
|
"eval_logps/chosen": -1076.8594970703125, |
|
"eval_logps/rejected": -1150.1253662109375, |
|
"eval_loss": 0.9229267835617065, |
|
"eval_rewards/accuracies": 0.6470000147819519, |
|
"eval_rewards/chosen": -7.9226603507995605, |
|
"eval_rewards/margins": 0.992804765701294, |
|
"eval_rewards/margins_max": 5.051580905914307, |
|
"eval_rewards/margins_min": -3.0808050632476807, |
|
"eval_rewards/margins_std": 2.7076425552368164, |
|
"eval_rewards/rejected": -8.915464401245117, |
|
"eval_runtime": 428.5869, |
|
"eval_samples_per_second": 4.666, |
|
"eval_steps_per_second": 0.292, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 5.622465452747041, |
|
"learning_rate": 4.466207261809989e-06, |
|
"logits/chosen": -1.625128149986267, |
|
"logits/rejected": -1.4389641284942627, |
|
"logps/chosen": -856.7615356445312, |
|
"logps/rejected": -1196.298583984375, |
|
"loss": 0.1046, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -5.589455604553223, |
|
"rewards/margins": 4.231289863586426, |
|
"rewards/margins_max": 6.631104946136475, |
|
"rewards/margins_min": 1.5690397024154663, |
|
"rewards/margins_std": 2.2617735862731934, |
|
"rewards/rejected": -9.820745468139648, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 37.31728926549998, |
|
"learning_rate": 4.414525450399713e-06, |
|
"logits/chosen": -1.6272573471069336, |
|
"logits/rejected": -1.5049296617507935, |
|
"logps/chosen": -816.5538330078125, |
|
"logps/rejected": -1220.7586669921875, |
|
"loss": 0.1477, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -5.082805633544922, |
|
"rewards/margins": 4.518080711364746, |
|
"rewards/margins_max": 6.932036399841309, |
|
"rewards/margins_min": 1.335532546043396, |
|
"rewards/margins_std": 2.588527202606201, |
|
"rewards/rejected": -9.600885391235352, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 5.37421997088044, |
|
"learning_rate": 4.360784949008615e-06, |
|
"logits/chosen": -1.8167043924331665, |
|
"logits/rejected": -1.645042061805725, |
|
"logps/chosen": -831.2081298828125, |
|
"logps/rejected": -1208.270263671875, |
|
"loss": 0.111, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.768882751464844, |
|
"rewards/margins": 4.644423007965088, |
|
"rewards/margins_max": 7.169321537017822, |
|
"rewards/margins_min": 1.9509897232055664, |
|
"rewards/margins_std": 2.3954663276672363, |
|
"rewards/rejected": -9.413305282592773, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 6.115341044262903, |
|
"learning_rate": 4.30504354481929e-06, |
|
"logits/chosen": -1.7410516738891602, |
|
"logits/rejected": -1.6124862432479858, |
|
"logps/chosen": -741.0687866210938, |
|
"logps/rejected": -1153.75390625, |
|
"loss": 0.1044, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.412232875823975, |
|
"rewards/margins": 4.613609790802002, |
|
"rewards/margins_max": 6.75095272064209, |
|
"rewards/margins_min": 1.8239591121673584, |
|
"rewards/margins_std": 2.2112793922424316, |
|
"rewards/rejected": -9.025842666625977, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 8.804373815951685, |
|
"learning_rate": 4.247361176585904e-06, |
|
"logits/chosen": -1.6892824172973633, |
|
"logits/rejected": -1.567959189414978, |
|
"logps/chosen": -782.8369140625, |
|
"logps/rejected": -1259.287353515625, |
|
"loss": 0.0817, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -4.678778648376465, |
|
"rewards/margins": 5.049575328826904, |
|
"rewards/margins_max": 6.799111366271973, |
|
"rewards/margins_min": 2.811235189437866, |
|
"rewards/margins_std": 1.8613466024398804, |
|
"rewards/rejected": -9.728352546691895, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 22.068799726915795, |
|
"learning_rate": 4.187799870182038e-06, |
|
"logits/chosen": -1.7105668783187866, |
|
"logits/rejected": -1.5694526433944702, |
|
"logps/chosen": -762.7816162109375, |
|
"logps/rejected": -1217.321044921875, |
|
"loss": 0.1032, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -4.626708507537842, |
|
"rewards/margins": 5.017427444458008, |
|
"rewards/margins_max": 7.252201080322266, |
|
"rewards/margins_min": 2.1052348613739014, |
|
"rewards/margins_std": 2.383836507797241, |
|
"rewards/rejected": -9.644137382507324, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 10.290993940063032, |
|
"learning_rate": 4.1264236719042365e-06, |
|
"logits/chosen": -1.7839868068695068, |
|
"logits/rejected": -1.6120306253433228, |
|
"logps/chosen": -801.9637451171875, |
|
"logps/rejected": -1164.2841796875, |
|
"loss": 0.1588, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.478141784667969, |
|
"rewards/margins": 4.724917411804199, |
|
"rewards/margins_max": 7.042010307312012, |
|
"rewards/margins_min": 1.6127008199691772, |
|
"rewards/margins_std": 2.5478250980377197, |
|
"rewards/rejected": -9.203059196472168, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 0.5210034728309734, |
|
"learning_rate": 4.063298579603001e-06, |
|
"logits/chosen": -1.6867786645889282, |
|
"logits/rejected": -1.4948246479034424, |
|
"logps/chosen": -782.1204223632812, |
|
"logps/rejected": -1320.6646728515625, |
|
"loss": 0.0414, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.968000888824463, |
|
"rewards/margins": 5.882228851318359, |
|
"rewards/margins_max": 7.5977654457092285, |
|
"rewards/margins_min": 3.866016387939453, |
|
"rewards/margins_std": 1.67121160030365, |
|
"rewards/rejected": -10.850229263305664, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"grad_norm": 13.427534231462952, |
|
"learning_rate": 3.998492471715272e-06, |
|
"logits/chosen": -1.6988388299942017, |
|
"logits/rejected": -1.5951545238494873, |
|
"logps/chosen": -877.1390380859375, |
|
"logps/rejected": -1402.83203125, |
|
"loss": 0.0685, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.7572479248046875, |
|
"rewards/margins": 5.6500396728515625, |
|
"rewards/margins_max": 7.7508039474487305, |
|
"rewards/margins_min": 3.0813615322113037, |
|
"rewards/margins_std": 2.1727612018585205, |
|
"rewards/rejected": -11.407288551330566, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"grad_norm": 2.4923200900882536, |
|
"learning_rate": 3.932075034274723e-06, |
|
"logits/chosen": -1.695990800857544, |
|
"logits/rejected": -1.5507137775421143, |
|
"logps/chosen": -851.5281372070312, |
|
"logps/rejected": -1309.4801025390625, |
|
"loss": 0.1396, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.59298038482666, |
|
"rewards/margins": 5.056563854217529, |
|
"rewards/margins_max": 7.312686920166016, |
|
"rewards/margins_min": 1.9958137273788452, |
|
"rewards/margins_std": 2.379727840423584, |
|
"rewards/rejected": -10.649542808532715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"eval_logits/chosen": -1.667060375213623, |
|
"eval_logits/rejected": -1.5979340076446533, |
|
"eval_logps/chosen": -1116.99462890625, |
|
"eval_logps/rejected": -1209.6519775390625, |
|
"eval_loss": 0.9696508646011353, |
|
"eval_rewards/accuracies": 0.6779999732971191, |
|
"eval_rewards/chosen": -8.324010848999023, |
|
"eval_rewards/margins": 1.1867200136184692, |
|
"eval_rewards/margins_max": 5.737547397613525, |
|
"eval_rewards/margins_min": -3.3923180103302, |
|
"eval_rewards/margins_std": 3.034074544906616, |
|
"eval_rewards/rejected": -9.510730743408203, |
|
"eval_runtime": 428.9385, |
|
"eval_samples_per_second": 4.663, |
|
"eval_steps_per_second": 0.291, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"grad_norm": 6.537657300759786, |
|
"learning_rate": 3.864117685978339e-06, |
|
"logits/chosen": -1.705518126487732, |
|
"logits/rejected": -1.5725294351577759, |
|
"logps/chosen": -897.5511474609375, |
|
"logps/rejected": -1346.69091796875, |
|
"loss": 0.0939, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -6.0087456703186035, |
|
"rewards/margins": 4.9630446434021, |
|
"rewards/margins_max": 7.469670295715332, |
|
"rewards/margins_min": 1.8066009283065796, |
|
"rewards/margins_std": 2.5759172439575195, |
|
"rewards/rejected": -10.971790313720703, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"grad_norm": 8.302069752936143, |
|
"learning_rate": 3.794693501389861e-06, |
|
"logits/chosen": -1.6544630527496338, |
|
"logits/rejected": -1.5131093263626099, |
|
"logps/chosen": -929.0003051757812, |
|
"logps/rejected": -1400.305419921875, |
|
"loss": 0.0548, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -6.110236644744873, |
|
"rewards/margins": 5.219418525695801, |
|
"rewards/margins_max": 7.189891815185547, |
|
"rewards/margins_min": 2.5033233165740967, |
|
"rewards/margins_std": 2.120957374572754, |
|
"rewards/rejected": -11.329654693603516, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"grad_norm": 1.8619960615196327, |
|
"learning_rate": 3.7238771323626822e-06, |
|
"logits/chosen": -1.677835464477539, |
|
"logits/rejected": -1.5019906759262085, |
|
"logps/chosen": -999.4791259765625, |
|
"logps/rejected": -1461.9598388671875, |
|
"loss": 0.0742, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -6.562595367431641, |
|
"rewards/margins": 5.4552412033081055, |
|
"rewards/margins_max": 7.70766544342041, |
|
"rewards/margins_min": 2.421809434890747, |
|
"rewards/margins_std": 2.3860526084899902, |
|
"rewards/rejected": -12.01783561706543, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 5.868124977117504, |
|
"learning_rate": 3.651744727766676e-06, |
|
"logits/chosen": -1.6518735885620117, |
|
"logits/rejected": -1.497201681137085, |
|
"logps/chosen": -996.3165893554688, |
|
"logps/rejected": -1532.0673828125, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.051259517669678, |
|
"rewards/margins": 5.819365501403809, |
|
"rewards/margins_max": 7.876378536224365, |
|
"rewards/margins_min": 2.8851966857910156, |
|
"rewards/margins_std": 2.268291473388672, |
|
"rewards/rejected": -12.870625495910645, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 10.805483266746087, |
|
"learning_rate": 3.57837385160529e-06, |
|
"logits/chosen": -1.621983289718628, |
|
"logits/rejected": -1.479236364364624, |
|
"logps/chosen": -850.7548828125, |
|
"logps/rejected": -1321.7237548828125, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.389442443847656, |
|
"rewards/margins": 5.238432884216309, |
|
"rewards/margins_max": 7.7712531089782715, |
|
"rewards/margins_min": 2.671607255935669, |
|
"rewards/margins_std": 2.3198580741882324, |
|
"rewards/rejected": -10.627875328063965, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 2.3174254055425183, |
|
"learning_rate": 3.503843399610941e-06, |
|
"logits/chosen": -1.6503874063491821, |
|
"logits/rejected": -1.4967344999313354, |
|
"logps/chosen": -1084.4403076171875, |
|
"logps/rejected": -1629.10693359375, |
|
"loss": 0.0463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.2910637855529785, |
|
"rewards/margins": 6.02940034866333, |
|
"rewards/margins_max": 8.215094566345215, |
|
"rewards/margins_min": 3.005946636199951, |
|
"rewards/margins_std": 2.3927676677703857, |
|
"rewards/rejected": -13.320462226867676, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 2.3666379183603676, |
|
"learning_rate": 3.4282335144083985e-06, |
|
"logits/chosen": -1.6708223819732666, |
|
"logits/rejected": -1.5695239305496216, |
|
"logps/chosen": -911.2108154296875, |
|
"logps/rejected": -1447.9605712890625, |
|
"loss": 0.046, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -5.949938774108887, |
|
"rewards/margins": 6.058177471160889, |
|
"rewards/margins_max": 8.24023723602295, |
|
"rewards/margins_min": 3.495572566986084, |
|
"rewards/margins_std": 2.158477783203125, |
|
"rewards/rejected": -12.008115768432617, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 5.998521622676278, |
|
"learning_rate": 3.351625499337395e-06, |
|
"logits/chosen": -1.7066646814346313, |
|
"logits/rejected": -1.5283164978027344, |
|
"logps/chosen": -988.6871337890625, |
|
"logps/rejected": -1536.398681640625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -6.574495792388916, |
|
"rewards/margins": 6.3713812828063965, |
|
"rewards/margins_max": 8.416463851928711, |
|
"rewards/margins_min": 3.5055854320526123, |
|
"rewards/margins_std": 2.2722041606903076, |
|
"rewards/rejected": -12.945878982543945, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 2.3121304603384734, |
|
"learning_rate": 3.2741017310271056e-06, |
|
"logits/chosen": -1.6702913045883179, |
|
"logits/rejected": -1.5516611337661743, |
|
"logps/chosen": -985.5250244140625, |
|
"logps/rejected": -1516.626708984375, |
|
"loss": 0.0956, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -6.465473175048828, |
|
"rewards/margins": 5.861384868621826, |
|
"rewards/margins_max": 8.299718856811523, |
|
"rewards/margins_min": 2.847576141357422, |
|
"rewards/margins_std": 2.479989767074585, |
|
"rewards/rejected": -12.326857566833496, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 7.590092284353976, |
|
"learning_rate": 3.195745570816532e-06, |
|
"logits/chosen": -1.580214500427246, |
|
"logits/rejected": -1.4903004169464111, |
|
"logps/chosen": -1054.06103515625, |
|
"logps/rejected": -1565.05810546875, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.562958717346191, |
|
"rewards/margins": 5.623807430267334, |
|
"rewards/margins_max": 7.8602399826049805, |
|
"rewards/margins_min": 2.7611820697784424, |
|
"rewards/margins_std": 2.2531216144561768, |
|
"rewards/rejected": -13.186765670776367, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"eval_logits/chosen": -1.625468134880066, |
|
"eval_logits/rejected": -1.5552992820739746, |
|
"eval_logps/chosen": -1304.2783203125, |
|
"eval_logps/rejected": -1404.43701171875, |
|
"eval_loss": 1.0425163507461548, |
|
"eval_rewards/accuracies": 0.6539999842643738, |
|
"eval_rewards/chosen": -10.196849822998047, |
|
"eval_rewards/margins": 1.2617301940917969, |
|
"eval_rewards/margins_max": 6.198861598968506, |
|
"eval_rewards/margins_min": -3.7952890396118164, |
|
"eval_rewards/margins_std": 3.3487019538879395, |
|
"eval_rewards/rejected": -11.45858097076416, |
|
"eval_runtime": 428.5936, |
|
"eval_samples_per_second": 4.666, |
|
"eval_steps_per_second": 0.292, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"grad_norm": 6.506789256384592, |
|
"learning_rate": 3.116641275116018e-06, |
|
"logits/chosen": -1.6757932901382446, |
|
"logits/rejected": -1.4905316829681396, |
|
"logps/chosen": -1033.5491943359375, |
|
"logps/rejected": -1559.284912109375, |
|
"loss": 0.0438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.929083824157715, |
|
"rewards/margins": 6.069881916046143, |
|
"rewards/margins_max": 8.297313690185547, |
|
"rewards/margins_min": 3.3508517742156982, |
|
"rewards/margins_std": 2.215510606765747, |
|
"rewards/rejected": -12.998964309692383, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 2.799331098085792, |
|
"learning_rate": 3.0368739048062956e-06, |
|
"logits/chosen": -1.759708046913147, |
|
"logits/rejected": -1.5871171951293945, |
|
"logps/chosen": -981.7990112304688, |
|
"logps/rejected": -1526.3701171875, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.813815116882324, |
|
"rewards/margins": 6.061458587646484, |
|
"rewards/margins_max": 8.386785507202148, |
|
"rewards/margins_min": 3.3189563751220703, |
|
"rewards/margins_std": 2.240609884262085, |
|
"rewards/rejected": -12.875274658203125, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"grad_norm": 5.0163934897293325, |
|
"learning_rate": 2.956529233772492e-06, |
|
"logits/chosen": -1.8143419027328491, |
|
"logits/rejected": -1.6911777257919312, |
|
"logps/chosen": -1105.9581298828125, |
|
"logps/rejected": -1680.5181884765625, |
|
"loss": 0.0611, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -7.72055721282959, |
|
"rewards/margins": 6.132667064666748, |
|
"rewards/margins_max": 8.71304702758789, |
|
"rewards/margins_min": 2.979393243789673, |
|
"rewards/margins_std": 2.5647242069244385, |
|
"rewards/rejected": -13.85322380065918, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 8.7260672105137, |
|
"learning_rate": 2.8756936566714317e-06, |
|
"logits/chosen": -1.8574295043945312, |
|
"logits/rejected": -1.6885216236114502, |
|
"logps/chosen": -1066.135009765625, |
|
"logps/rejected": -1536.2845458984375, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.2507524490356445, |
|
"rewards/margins": 5.671202659606934, |
|
"rewards/margins_max": 8.193965911865234, |
|
"rewards/margins_min": 2.9109997749328613, |
|
"rewards/margins_std": 2.3909668922424316, |
|
"rewards/rejected": -12.921956062316895, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"grad_norm": 1.3009208627187219, |
|
"learning_rate": 2.794454096031429e-06, |
|
"logits/chosen": -1.9122663736343384, |
|
"logits/rejected": -1.7744579315185547, |
|
"logps/chosen": -971.1412963867188, |
|
"logps/rejected": -1555.514404296875, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.982313632965088, |
|
"rewards/margins": 6.4228410720825195, |
|
"rewards/margins_max": 8.959406852722168, |
|
"rewards/margins_min": 3.101313352584839, |
|
"rewards/margins_std": 2.621415615081787, |
|
"rewards/rejected": -12.405153274536133, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 1.9328399730262527, |
|
"learning_rate": 2.71289790878446e-06, |
|
"logits/chosen": -1.8311843872070312, |
|
"logits/rejected": -1.6815801858901978, |
|
"logps/chosen": -1012.6105346679688, |
|
"logps/rejected": -1622.107666015625, |
|
"loss": 0.0697, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.073877811431885, |
|
"rewards/margins": 6.487514495849609, |
|
"rewards/margins_max": 8.834905624389648, |
|
"rewards/margins_min": 3.4811978340148926, |
|
"rewards/margins_std": 2.3898167610168457, |
|
"rewards/rejected": -13.561391830444336, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 0.17510978882217287, |
|
"learning_rate": 2.6311127923312156e-06, |
|
"logits/chosen": -1.8733352422714233, |
|
"logits/rejected": -1.731903314590454, |
|
"logps/chosen": -1004.5771484375, |
|
"logps/rejected": -1612.7529296875, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -6.689506530761719, |
|
"rewards/margins": 6.522040367126465, |
|
"rewards/margins_max": 8.876073837280273, |
|
"rewards/margins_min": 3.256171464920044, |
|
"rewards/margins_std": 2.6161324977874756, |
|
"rewards/rejected": -13.211545944213867, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 12.817311644147658, |
|
"learning_rate": 2.549186690240057e-06, |
|
"logits/chosen": -1.7239491939544678, |
|
"logits/rejected": -1.6188468933105469, |
|
"logps/chosen": -1058.948486328125, |
|
"logps/rejected": -1677.268310546875, |
|
"loss": 0.0444, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.725058078765869, |
|
"rewards/margins": 6.387824058532715, |
|
"rewards/margins_max": 8.449275016784668, |
|
"rewards/margins_min": 3.585833787918091, |
|
"rewards/margins_std": 2.1896438598632812, |
|
"rewards/rejected": -14.112882614135742, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 2.200716474214234, |
|
"learning_rate": 2.4672076976812548e-06, |
|
"logits/chosen": -1.7416937351226807, |
|
"logits/rejected": -1.5824648141860962, |
|
"logps/chosen": -1067.9490966796875, |
|
"logps/rejected": -1658.8199462890625, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.612107753753662, |
|
"rewards/margins": 6.36210823059082, |
|
"rewards/margins_max": 8.805683135986328, |
|
"rewards/margins_min": 3.530320405960083, |
|
"rewards/margins_std": 2.4696502685546875, |
|
"rewards/rejected": -13.974217414855957, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 5.8486806702260115, |
|
"learning_rate": 2.3852639666982218e-06, |
|
"logits/chosen": -1.729406714439392, |
|
"logits/rejected": -1.5859451293945312, |
|
"logps/chosen": -1029.7244873046875, |
|
"logps/rejected": -1697.3372802734375, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.36349630355835, |
|
"rewards/margins": 6.701470851898193, |
|
"rewards/margins_max": 9.139188766479492, |
|
"rewards/margins_min": 3.311300754547119, |
|
"rewards/margins_std": 2.6797633171081543, |
|
"rewards/rejected": -14.064967155456543, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"eval_logits/chosen": -1.7166643142700195, |
|
"eval_logits/rejected": -1.6462373733520508, |
|
"eval_logps/chosen": -1312.563232421875, |
|
"eval_logps/rejected": -1434.9708251953125, |
|
"eval_loss": 1.171522855758667, |
|
"eval_rewards/accuracies": 0.6610000133514404, |
|
"eval_rewards/chosen": -10.279698371887207, |
|
"eval_rewards/margins": 1.4842207431793213, |
|
"eval_rewards/margins_max": 7.0606184005737305, |
|
"eval_rewards/margins_min": -4.507997989654541, |
|
"eval_rewards/margins_std": 3.902109384536743, |
|
"eval_rewards/rejected": -11.76391887664795, |
|
"eval_runtime": 428.7286, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 0.292, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"grad_norm": 2.9887908700456385, |
|
"learning_rate": 2.303443611417584e-06, |
|
"logits/chosen": -1.7610228061676025, |
|
"logits/rejected": -1.5708558559417725, |
|
"logps/chosen": -1019.3812255859375, |
|
"logps/rejected": -1596.500244140625, |
|
"loss": 0.0749, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -6.916808128356934, |
|
"rewards/margins": 6.629319190979004, |
|
"rewards/margins_max": 9.27853775024414, |
|
"rewards/margins_min": 3.639543056488037, |
|
"rewards/margins_std": 2.523704767227173, |
|
"rewards/rejected": -13.546127319335938, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 0.09345851725609673, |
|
"learning_rate": 2.2218346133000264e-06, |
|
"logits/chosen": -1.8310705423355103, |
|
"logits/rejected": -1.6571632623672485, |
|
"logps/chosen": -1089.475341796875, |
|
"logps/rejected": -1714.6595458984375, |
|
"loss": 0.0874, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.478503227233887, |
|
"rewards/margins": 6.835662841796875, |
|
"rewards/margins_max": 9.080436706542969, |
|
"rewards/margins_min": 3.885005235671997, |
|
"rewards/margins_std": 2.379390239715576, |
|
"rewards/rejected": -14.314167976379395, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 13.20707399800831, |
|
"learning_rate": 2.140524726533792e-06, |
|
"logits/chosen": -1.787641167640686, |
|
"logits/rejected": -1.661877989768982, |
|
"logps/chosen": -947.0399169921875, |
|
"logps/rejected": -1524.828369140625, |
|
"loss": 0.0539, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.729840278625488, |
|
"rewards/margins": 6.693819999694824, |
|
"rewards/margins_max": 9.224821090698242, |
|
"rewards/margins_min": 3.935499906539917, |
|
"rewards/margins_std": 2.420135021209717, |
|
"rewards/rejected": -12.423660278320312, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 3.4772116065816014, |
|
"learning_rate": 2.059601383672566e-06, |
|
"logits/chosen": -1.8164135217666626, |
|
"logits/rejected": -1.6359403133392334, |
|
"logps/chosen": -1021.05322265625, |
|
"logps/rejected": -1599.884033203125, |
|
"loss": 0.04, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.091916561126709, |
|
"rewards/margins": 6.576811790466309, |
|
"rewards/margins_max": 8.615550994873047, |
|
"rewards/margins_min": 4.3320631980896, |
|
"rewards/margins_std": 2.022761821746826, |
|
"rewards/rejected": -13.668729782104492, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"grad_norm": 1.910640538145904, |
|
"learning_rate": 1.9791516016192214e-06, |
|
"logits/chosen": -1.7743873596191406, |
|
"logits/rejected": -1.6393556594848633, |
|
"logps/chosen": -1051.207763671875, |
|
"logps/rejected": -1660.7542724609375, |
|
"loss": 0.0612, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.210175514221191, |
|
"rewards/margins": 6.484718322753906, |
|
"rewards/margins_max": 9.02783489227295, |
|
"rewards/margins_min": 3.8449549674987793, |
|
"rewards/margins_std": 2.3319091796875, |
|
"rewards/rejected": -13.694894790649414, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"grad_norm": 0.4156394296306771, |
|
"learning_rate": 1.8992618880565039e-06, |
|
"logits/chosen": -1.6157350540161133, |
|
"logits/rejected": -1.5133240222930908, |
|
"logps/chosen": -1027.439453125, |
|
"logps/rejected": -1595.850830078125, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.812713623046875, |
|
"rewards/margins": 5.9946794509887695, |
|
"rewards/margins_max": 8.839725494384766, |
|
"rewards/margins_min": 2.7734172344207764, |
|
"rewards/margins_std": 2.6815245151519775, |
|
"rewards/rejected": -13.807393074035645, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 1.3243616077705502, |
|
"learning_rate": 1.8200181484252888e-06, |
|
"logits/chosen": -1.809934377670288, |
|
"logits/rejected": -1.6905943155288696, |
|
"logps/chosen": -1084.2518310546875, |
|
"logps/rejected": -1680.405029296875, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -7.783478736877441, |
|
"rewards/margins": 6.605474948883057, |
|
"rewards/margins_max": 9.080102920532227, |
|
"rewards/margins_min": 3.5593819618225098, |
|
"rewards/margins_std": 2.538597822189331, |
|
"rewards/rejected": -14.388954162597656, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 4.935603103347596, |
|
"learning_rate": 1.7415055935504234e-06, |
|
"logits/chosen": -1.845766305923462, |
|
"logits/rejected": -1.6762946844100952, |
|
"logps/chosen": -1092.99609375, |
|
"logps/rejected": -1732.690185546875, |
|
"loss": 0.0317, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.822856903076172, |
|
"rewards/margins": 6.983065605163574, |
|
"rewards/margins_max": 9.305206298828125, |
|
"rewards/margins_min": 4.250351428985596, |
|
"rewards/margins_std": 2.260586738586426, |
|
"rewards/rejected": -14.80592155456543, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"grad_norm": 7.946766648058278, |
|
"learning_rate": 1.6638086480134954e-06, |
|
"logits/chosen": -1.7061771154403687, |
|
"logits/rejected": -1.5929887294769287, |
|
"logps/chosen": -1015.9044189453125, |
|
"logps/rejected": -1602.688232421875, |
|
"loss": 0.0565, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -7.775514125823975, |
|
"rewards/margins": 6.233893394470215, |
|
"rewards/margins_max": 9.065168380737305, |
|
"rewards/margins_min": 2.735471725463867, |
|
"rewards/margins_std": 2.843477725982666, |
|
"rewards/rejected": -14.009408950805664, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 14.357423867713438, |
|
"learning_rate": 1.5870108593710473e-06, |
|
"logits/chosen": -1.6323438882827759, |
|
"logits/rejected": -1.4323724508285522, |
|
"logps/chosen": -1116.0875244140625, |
|
"logps/rejected": -1646.796875, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -8.165563583374023, |
|
"rewards/margins": 6.203519821166992, |
|
"rewards/margins_max": 8.34221363067627, |
|
"rewards/margins_min": 3.4348888397216797, |
|
"rewards/margins_std": 2.265625476837158, |
|
"rewards/rejected": -14.369084358215332, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"eval_logits/chosen": -1.7082782983779907, |
|
"eval_logits/rejected": -1.6383651494979858, |
|
"eval_logps/chosen": -1494.51513671875, |
|
"eval_logps/rejected": -1592.3466796875, |
|
"eval_loss": 1.10393488407135, |
|
"eval_rewards/accuracies": 0.6510000228881836, |
|
"eval_rewards/chosen": -12.099217414855957, |
|
"eval_rewards/margins": 1.2384591102600098, |
|
"eval_rewards/margins_max": 6.618937015533447, |
|
"eval_rewards/margins_min": -4.080103874206543, |
|
"eval_rewards/margins_std": 3.540152072906494, |
|
"eval_rewards/rejected": -13.337677001953125, |
|
"eval_runtime": 428.89, |
|
"eval_samples_per_second": 4.663, |
|
"eval_steps_per_second": 0.291, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 1.9864414899165639, |
|
"learning_rate": 1.511194808315853e-06, |
|
"logits/chosen": -1.6388124227523804, |
|
"logits/rejected": -1.5256621837615967, |
|
"logps/chosen": -1023.98486328125, |
|
"logps/rejected": -1670.1683349609375, |
|
"loss": 0.0281, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.728632926940918, |
|
"rewards/margins": 6.720816135406494, |
|
"rewards/margins_max": 8.741449356079102, |
|
"rewards/margins_min": 3.9726672172546387, |
|
"rewards/margins_std": 2.161562204360962, |
|
"rewards/rejected": -14.44944953918457, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 2.4447392288346776, |
|
"learning_rate": 1.4364420198778662e-06, |
|
"logits/chosen": -1.9084421396255493, |
|
"logits/rejected": -1.7372974157333374, |
|
"logps/chosen": -1069.986572265625, |
|
"logps/rejected": -1748.271484375, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.059340000152588, |
|
"rewards/margins": 7.3868408203125, |
|
"rewards/margins_max": 9.624174118041992, |
|
"rewards/margins_min": 5.218744277954102, |
|
"rewards/margins_std": 2.0435428619384766, |
|
"rewards/rejected": -14.44618034362793, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 0.3283356036109342, |
|
"learning_rate": 1.3628328757603243e-06, |
|
"logits/chosen": -1.7824742794036865, |
|
"logits/rejected": -1.607553243637085, |
|
"logps/chosen": -1106.8240966796875, |
|
"logps/rejected": -1757.1396484375, |
|
"loss": 0.0279, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.68569278717041, |
|
"rewards/margins": 7.141098976135254, |
|
"rewards/margins_max": 9.206721305847168, |
|
"rewards/margins_min": 4.535717010498047, |
|
"rewards/margins_std": 2.135599374771118, |
|
"rewards/rejected": -14.826791763305664, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 0.466472720676363, |
|
"learning_rate": 1.2904465279052725e-06, |
|
"logits/chosen": -1.7631629705429077, |
|
"logits/rejected": -1.602264165878296, |
|
"logps/chosen": -1061.498291015625, |
|
"logps/rejected": -1701.393798828125, |
|
"loss": 0.0468, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.410794734954834, |
|
"rewards/margins": 6.905499458312988, |
|
"rewards/margins_max": 9.171496391296387, |
|
"rewards/margins_min": 3.907447099685669, |
|
"rewards/margins_std": 2.4243547916412354, |
|
"rewards/rejected": -14.316293716430664, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"grad_norm": 0.18287903072298267, |
|
"learning_rate": 1.219360813381446e-06, |
|
"logits/chosen": -1.707327127456665, |
|
"logits/rejected": -1.5934031009674072, |
|
"logps/chosen": -995.9183349609375, |
|
"logps/rejected": -1665.1839599609375, |
|
"loss": 0.0293, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.091825008392334, |
|
"rewards/margins": 7.211878776550293, |
|
"rewards/margins_max": 9.057371139526367, |
|
"rewards/margins_min": 5.072964668273926, |
|
"rewards/margins_std": 1.7897049188613892, |
|
"rewards/rejected": -14.303705215454102, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 3.972318831886565, |
|
"learning_rate": 1.1496521706860392e-06, |
|
"logits/chosen": -1.6829960346221924, |
|
"logits/rejected": -1.5544617176055908, |
|
"logps/chosen": -1081.756103515625, |
|
"logps/rejected": -1768.875732421875, |
|
"loss": 0.0206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.939679145812988, |
|
"rewards/margins": 7.1435041427612305, |
|
"rewards/margins_max": 9.630821228027344, |
|
"rewards/margins_min": 4.051230430603027, |
|
"rewards/margins_std": 2.557648181915283, |
|
"rewards/rejected": -15.083181381225586, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"grad_norm": 0.15453005325463406, |
|
"learning_rate": 1.0813955575503588e-06, |
|
"logits/chosen": -1.7566072940826416, |
|
"logits/rejected": -1.5845129489898682, |
|
"logps/chosen": -1044.108154296875, |
|
"logps/rejected": -1700.744140625, |
|
"loss": 0.0287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.392706871032715, |
|
"rewards/margins": 7.490866661071777, |
|
"rewards/margins_max": 9.396993637084961, |
|
"rewards/margins_min": 5.832265377044678, |
|
"rewards/margins_std": 1.631260871887207, |
|
"rewards/rejected": -14.883572578430176, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"grad_norm": 5.041769273622829, |
|
"learning_rate": 1.0146643703377488e-06, |
|
"logits/chosen": -1.817198395729065, |
|
"logits/rejected": -1.6213362216949463, |
|
"logps/chosen": -1110.951416015625, |
|
"logps/rejected": -1716.0474853515625, |
|
"loss": 0.0267, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.933794975280762, |
|
"rewards/margins": 6.956693172454834, |
|
"rewards/margins_max": 9.445747375488281, |
|
"rewards/margins_min": 4.286118984222412, |
|
"rewards/margins_std": 2.26704740524292, |
|
"rewards/rejected": -14.89048957824707, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 0.028319940482359873, |
|
"learning_rate": 9.495303651204496e-07, |
|
"logits/chosen": -1.7651485204696655, |
|
"logits/rejected": -1.5782719850540161, |
|
"logps/chosen": -1116.5997314453125, |
|
"logps/rejected": -1775.474853515625, |
|
"loss": 0.0151, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.942474365234375, |
|
"rewards/margins": 7.359992027282715, |
|
"rewards/margins_max": 9.237930297851562, |
|
"rewards/margins_min": 5.0483293533325195, |
|
"rewards/margins_std": 1.877681016921997, |
|
"rewards/rejected": -15.302465438842773, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"grad_norm": 5.560910630060733, |
|
"learning_rate": 8.860635805202616e-07, |
|
"logits/chosen": -1.7791054248809814, |
|
"logits/rejected": -1.6470226049423218, |
|
"logps/chosen": -1128.700439453125, |
|
"logps/rejected": -1887.7562255859375, |
|
"loss": 0.0325, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.071495056152344, |
|
"rewards/margins": 7.690678596496582, |
|
"rewards/margins_max": 10.314142227172852, |
|
"rewards/margins_min": 4.382508754730225, |
|
"rewards/margins_std": 2.6400108337402344, |
|
"rewards/rejected": -15.762173652648926, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"eval_logits/chosen": -1.7630056142807007, |
|
"eval_logits/rejected": -1.6934845447540283, |
|
"eval_logps/chosen": -1308.7979736328125, |
|
"eval_logps/rejected": -1442.1707763671875, |
|
"eval_loss": 1.2213647365570068, |
|
"eval_rewards/accuracies": 0.6600000262260437, |
|
"eval_rewards/chosen": -10.242044448852539, |
|
"eval_rewards/margins": 1.5938735008239746, |
|
"eval_rewards/margins_max": 7.453612327575684, |
|
"eval_rewards/margins_min": -4.738708972930908, |
|
"eval_rewards/margins_std": 4.117012023925781, |
|
"eval_rewards/rejected": -11.835918426513672, |
|
"eval_runtime": 428.6302, |
|
"eval_samples_per_second": 4.666, |
|
"eval_steps_per_second": 0.292, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"grad_norm": 2.130920241454253, |
|
"learning_rate": 8.24332262395994e-07, |
|
"logits/chosen": -1.8262383937835693, |
|
"logits/rejected": -1.701570749282837, |
|
"logps/chosen": -990.9318237304688, |
|
"logps/rejected": -1709.775390625, |
|
"loss": 0.0187, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.961573600769043, |
|
"rewards/margins": 7.437863826751709, |
|
"rewards/margins_max": 9.480931282043457, |
|
"rewards/margins_min": 4.916778087615967, |
|
"rewards/margins_std": 2.042966365814209, |
|
"rewards/rejected": -14.399436950683594, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 2.7824509845813816, |
|
"learning_rate": 7.644027904586587e-07, |
|
"logits/chosen": -1.7199032306671143, |
|
"logits/rejected": -1.584393858909607, |
|
"logps/chosen": -1131.29541015625, |
|
"logps/rejected": -1871.33984375, |
|
"loss": 0.0245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.013903617858887, |
|
"rewards/margins": 7.894224643707275, |
|
"rewards/margins_max": 10.34322738647461, |
|
"rewards/margins_min": 5.228058338165283, |
|
"rewards/margins_std": 2.269243001937866, |
|
"rewards/rejected": -15.908126831054688, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 2.942249921804053, |
|
"learning_rate": 7.06339606893347e-07, |
|
"logits/chosen": -1.7625993490219116, |
|
"logits/rejected": -1.552851915359497, |
|
"logps/chosen": -1175.3865966796875, |
|
"logps/rejected": -1861.589599609375, |
|
"loss": 0.0079, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.902280330657959, |
|
"rewards/margins": 8.019886016845703, |
|
"rewards/margins_max": 10.1281156539917, |
|
"rewards/margins_min": 6.0280256271362305, |
|
"rewards/margins_std": 1.8365955352783203, |
|
"rewards/rejected": -15.92216682434082, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"grad_norm": 0.9426802566028485, |
|
"learning_rate": 6.502051470645149e-07, |
|
"logits/chosen": -1.780339241027832, |
|
"logits/rejected": -1.6216917037963867, |
|
"logps/chosen": -1083.676513671875, |
|
"logps/rejected": -1733.9345703125, |
|
"loss": 0.0234, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.57614278793335, |
|
"rewards/margins": 7.293878078460693, |
|
"rewards/margins_max": 9.180914878845215, |
|
"rewards/margins_min": 4.970505714416504, |
|
"rewards/margins_std": 1.8781248331069946, |
|
"rewards/rejected": -14.870019912719727, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 0.2273620604649508, |
|
"learning_rate": 5.960597723792194e-07, |
|
"logits/chosen": -1.7474027872085571, |
|
"logits/rejected": -1.575292944908142, |
|
"logps/chosen": -1081.188232421875, |
|
"logps/rejected": -1787.9605712890625, |
|
"loss": 0.0229, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.850655555725098, |
|
"rewards/margins": 7.63167667388916, |
|
"rewards/margins_max": 10.038192749023438, |
|
"rewards/margins_min": 5.033900737762451, |
|
"rewards/margins_std": 2.2446444034576416, |
|
"rewards/rejected": -15.482332229614258, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 2.008660400899101, |
|
"learning_rate": 5.43961705380465e-07, |
|
"logits/chosen": -1.791469931602478, |
|
"logits/rejected": -1.6313838958740234, |
|
"logps/chosen": -1132.4666748046875, |
|
"logps/rejected": -1828.349609375, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -7.8505706787109375, |
|
"rewards/margins": 7.863633632659912, |
|
"rewards/margins_max": 10.446196556091309, |
|
"rewards/margins_min": 4.516094207763672, |
|
"rewards/margins_std": 2.653343915939331, |
|
"rewards/rejected": -15.714204788208008, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"grad_norm": 1.9443236752501327, |
|
"learning_rate": 4.939669671404871e-07, |
|
"logits/chosen": -1.708809494972229, |
|
"logits/rejected": -1.5626459121704102, |
|
"logps/chosen": -1073.6954345703125, |
|
"logps/rejected": -1811.253662109375, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.609139919281006, |
|
"rewards/margins": 7.719372749328613, |
|
"rewards/margins_max": 9.861176490783691, |
|
"rewards/margins_min": 5.364067077636719, |
|
"rewards/margins_std": 2.00132155418396, |
|
"rewards/rejected": -15.328509330749512, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"grad_norm": 2.06741221987676, |
|
"learning_rate": 4.461293170212644e-07, |
|
"logits/chosen": -1.8483781814575195, |
|
"logits/rejected": -1.6546274423599243, |
|
"logps/chosen": -1123.468017578125, |
|
"logps/rejected": -1798.621826171875, |
|
"loss": 0.0322, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.950200080871582, |
|
"rewards/margins": 7.411231994628906, |
|
"rewards/margins_max": 10.043291091918945, |
|
"rewards/margins_min": 4.137426853179932, |
|
"rewards/margins_std": 2.554241418838501, |
|
"rewards/rejected": -15.361432075500488, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"grad_norm": 0.8360988782034983, |
|
"learning_rate": 4.005001948670606e-07, |
|
"logits/chosen": -1.813595175743103, |
|
"logits/rejected": -1.6409099102020264, |
|
"logps/chosen": -1167.838623046875, |
|
"logps/rejected": -1849.715576171875, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.237103462219238, |
|
"rewards/margins": 7.6171464920043945, |
|
"rewards/margins_max": 10.030054092407227, |
|
"rewards/margins_min": 5.162562847137451, |
|
"rewards/margins_std": 2.175448417663574, |
|
"rewards/rejected": -15.854248046875, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"grad_norm": 0.28012086124588453, |
|
"learning_rate": 3.571286656911377e-07, |
|
"logits/chosen": -1.765481948852539, |
|
"logits/rejected": -1.5610095262527466, |
|
"logps/chosen": -1176.97509765625, |
|
"logps/rejected": -1906.4827880859375, |
|
"loss": 0.0256, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.486291885375977, |
|
"rewards/margins": 7.7398223876953125, |
|
"rewards/margins_max": 10.43345832824707, |
|
"rewards/margins_min": 4.932800769805908, |
|
"rewards/margins_std": 2.4372851848602295, |
|
"rewards/rejected": -16.226112365722656, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"eval_logits/chosen": -1.7013623714447021, |
|
"eval_logits/rejected": -1.6318581104278564, |
|
"eval_logps/chosen": -1451.88916015625, |
|
"eval_logps/rejected": -1581.395751953125, |
|
"eval_loss": 1.202013373374939, |
|
"eval_rewards/accuracies": 0.6620000004768372, |
|
"eval_rewards/chosen": -11.672956466674805, |
|
"eval_rewards/margins": 1.555212140083313, |
|
"eval_rewards/margins_max": 7.462009906768799, |
|
"eval_rewards/margins_min": -4.611362457275391, |
|
"eval_rewards/margins_std": 4.051472187042236, |
|
"eval_rewards/rejected": -13.228167533874512, |
|
"eval_runtime": 428.5009, |
|
"eval_samples_per_second": 4.667, |
|
"eval_steps_per_second": 0.292, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 0.6107279357125659, |
|
"learning_rate": 3.1606136691612555e-07, |
|
"logits/chosen": -1.7235673666000366, |
|
"logits/rejected": -1.5583069324493408, |
|
"logps/chosen": -1131.2056884765625, |
|
"logps/rejected": -1782.4332275390625, |
|
"loss": 0.0174, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.073932647705078, |
|
"rewards/margins": 7.4360671043396, |
|
"rewards/margins_max": 9.632651329040527, |
|
"rewards/margins_min": 5.327752113342285, |
|
"rewards/margins_std": 1.8935825824737549, |
|
"rewards/rejected": -15.50999927520752, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"grad_norm": 0.00966975682935343, |
|
"learning_rate": 2.773424582247844e-07, |
|
"logits/chosen": -1.6917803287506104, |
|
"logits/rejected": -1.4805718660354614, |
|
"logps/chosen": -1141.4068603515625, |
|
"logps/rejected": -1758.5318603515625, |
|
"loss": 0.0178, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.279281616210938, |
|
"rewards/margins": 7.198742866516113, |
|
"rewards/margins_max": 9.452996253967285, |
|
"rewards/margins_min": 4.647868633270264, |
|
"rewards/margins_std": 2.1528563499450684, |
|
"rewards/rejected": -15.478025436401367, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 3.0376153555107446, |
|
"learning_rate": 2.410135740750821e-07, |
|
"logits/chosen": -1.7053037881851196, |
|
"logits/rejected": -1.5509663820266724, |
|
"logps/chosen": -1090.0576171875, |
|
"logps/rejected": -1777.7945556640625, |
|
"loss": 0.043, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.083145141601562, |
|
"rewards/margins": 7.410282135009766, |
|
"rewards/margins_max": 9.764742851257324, |
|
"rewards/margins_min": 5.139273643493652, |
|
"rewards/margins_std": 2.1024787425994873, |
|
"rewards/rejected": -15.493428230285645, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 0.6859350599797326, |
|
"learning_rate": 2.0711377893064182e-07, |
|
"logits/chosen": -1.8094221353530884, |
|
"logits/rejected": -1.6414306163787842, |
|
"logps/chosen": -1164.137451171875, |
|
"logps/rejected": -1852.5625, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.158674240112305, |
|
"rewards/margins": 7.432664394378662, |
|
"rewards/margins_max": 10.111716270446777, |
|
"rewards/margins_min": 4.017355442047119, |
|
"rewards/margins_std": 2.706058979034424, |
|
"rewards/rejected": -15.591337203979492, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 6.314035361122387, |
|
"learning_rate": 1.756795252547111e-07, |
|
"logits/chosen": -1.665837287902832, |
|
"logits/rejected": -1.5277420282363892, |
|
"logps/chosen": -1078.7557373046875, |
|
"logps/rejected": -1684.4287109375, |
|
"loss": 0.0295, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.158330917358398, |
|
"rewards/margins": 6.608637809753418, |
|
"rewards/margins_max": 9.02342700958252, |
|
"rewards/margins_min": 3.459864854812622, |
|
"rewards/margins_std": 2.52087664604187, |
|
"rewards/rejected": -14.766969680786133, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 7.031354165895073, |
|
"learning_rate": 1.4674461431281013e-07, |
|
"logits/chosen": -1.7678325176239014, |
|
"logits/rejected": -1.6092376708984375, |
|
"logps/chosen": -1103.3350830078125, |
|
"logps/rejected": -1758.6500244140625, |
|
"loss": 0.0242, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.200953483581543, |
|
"rewards/margins": 7.219841957092285, |
|
"rewards/margins_max": 9.616026878356934, |
|
"rewards/margins_min": 4.519529819488525, |
|
"rewards/margins_std": 2.263463258743286, |
|
"rewards/rejected": -15.420794486999512, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 0.3134845483065753, |
|
"learning_rate": 1.2034015982622243e-07, |
|
"logits/chosen": -1.7572071552276611, |
|
"logits/rejected": -1.5487779378890991, |
|
"logps/chosen": -1225.2569580078125, |
|
"logps/rejected": -1896.434326171875, |
|
"loss": 0.0271, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.728785514831543, |
|
"rewards/margins": 7.427072048187256, |
|
"rewards/margins_max": 9.873006820678711, |
|
"rewards/margins_min": 4.541165351867676, |
|
"rewards/margins_std": 2.364122152328491, |
|
"rewards/rejected": -16.155858993530273, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"grad_norm": 0.3690247126654468, |
|
"learning_rate": 9.649455451539419e-08, |
|
"logits/chosen": -1.6380853652954102, |
|
"logits/rejected": -1.4841035604476929, |
|
"logps/chosen": -1118.8951416015625, |
|
"logps/rejected": -1833.6126708984375, |
|
"loss": 0.0234, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.304391860961914, |
|
"rewards/margins": 7.7766313552856445, |
|
"rewards/margins_max": 10.320856094360352, |
|
"rewards/margins_min": 5.046825885772705, |
|
"rewards/margins_std": 2.3247740268707275, |
|
"rewards/rejected": -16.081022262573242, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 0.035471082675790036, |
|
"learning_rate": 7.523343956923196e-08, |
|
"logits/chosen": -1.7599372863769531, |
|
"logits/rejected": -1.5641086101531982, |
|
"logps/chosen": -1154.5972900390625, |
|
"logps/rejected": -1892.8466796875, |
|
"loss": 0.0177, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.36001205444336, |
|
"rewards/margins": 7.979167938232422, |
|
"rewards/margins_max": 10.33701229095459, |
|
"rewards/margins_min": 5.4104814529418945, |
|
"rewards/margins_std": 2.1920626163482666, |
|
"rewards/rejected": -16.33917999267578, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"grad_norm": 3.7962060660896455, |
|
"learning_rate": 5.657967707312195e-08, |
|
"logits/chosen": -1.6692126989364624, |
|
"logits/rejected": -1.5857051610946655, |
|
"logps/chosen": -1184.306884765625, |
|
"logps/rejected": -1848.2109375, |
|
"loss": 0.0246, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.908744812011719, |
|
"rewards/margins": 6.874230861663818, |
|
"rewards/margins_max": 9.302106857299805, |
|
"rewards/margins_min": 3.831719160079956, |
|
"rewards/margins_std": 2.495060443878174, |
|
"rewards/rejected": -15.782976150512695, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"eval_logits/chosen": -1.6955701112747192, |
|
"eval_logits/rejected": -1.6262598037719727, |
|
"eval_logps/chosen": -1466.096923828125, |
|
"eval_logps/rejected": -1594.279541015625, |
|
"eval_loss": 1.2153818607330322, |
|
"eval_rewards/accuracies": 0.6570000052452087, |
|
"eval_rewards/chosen": -11.815034866333008, |
|
"eval_rewards/margins": 1.5419700145721436, |
|
"eval_rewards/margins_max": 7.536928653717041, |
|
"eval_rewards/margins_min": -4.68462610244751, |
|
"eval_rewards/margins_std": 4.09072208404541, |
|
"eval_rewards/rejected": -13.357006072998047, |
|
"eval_runtime": 428.679, |
|
"eval_samples_per_second": 4.665, |
|
"eval_steps_per_second": 0.292, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"grad_norm": 0.6729976013886217, |
|
"learning_rate": 4.055332542531959e-08, |
|
"logits/chosen": -1.7815234661102295, |
|
"logits/rejected": -1.622179627418518, |
|
"logps/chosen": -1156.6016845703125, |
|
"logps/rejected": -1884.806884765625, |
|
"loss": 0.036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.329094886779785, |
|
"rewards/margins": 7.462734222412109, |
|
"rewards/margins_max": 10.055309295654297, |
|
"rewards/margins_min": 4.672645568847656, |
|
"rewards/margins_std": 2.401289701461792, |
|
"rewards/rejected": -15.791829109191895, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 0.3931332359603542, |
|
"learning_rate": 2.7171617768147472e-08, |
|
"logits/chosen": -1.757817268371582, |
|
"logits/rejected": -1.6103594303131104, |
|
"logps/chosen": -1205.610107421875, |
|
"logps/rejected": -1883.1265869140625, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.91980266571045, |
|
"rewards/margins": 7.268828392028809, |
|
"rewards/margins_max": 9.548690795898438, |
|
"rewards/margins_min": 4.854549884796143, |
|
"rewards/margins_std": 2.135824203491211, |
|
"rewards/rejected": -16.18863296508789, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"grad_norm": 0.22753287376533807, |
|
"learning_rate": 1.6448943457189616e-08, |
|
"logits/chosen": -1.680837869644165, |
|
"logits/rejected": -1.540766716003418, |
|
"logps/chosen": -1161.3184814453125, |
|
"logps/rejected": -1843.625, |
|
"loss": 0.0264, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.517123222351074, |
|
"rewards/margins": 7.349859714508057, |
|
"rewards/margins_max": 10.050976753234863, |
|
"rewards/margins_min": 4.578632354736328, |
|
"rewards/margins_std": 2.4442994594573975, |
|
"rewards/rejected": -15.866983413696289, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"grad_norm": 0.6569270546900866, |
|
"learning_rate": 8.39683258841123e-09, |
|
"logits/chosen": -1.621664047241211, |
|
"logits/rejected": -1.4453307390213013, |
|
"logps/chosen": -1106.493896484375, |
|
"logps/rejected": -1756.8822021484375, |
|
"loss": 0.0222, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.116470336914062, |
|
"rewards/margins": 7.085239410400391, |
|
"rewards/margins_max": 9.558416366577148, |
|
"rewards/margins_min": 4.0455121994018555, |
|
"rewards/margins_std": 2.4827523231506348, |
|
"rewards/rejected": -15.20171070098877, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 1.8056138868772267, |
|
"learning_rate": 3.0239435998430376e-09, |
|
"logits/chosen": -1.7272727489471436, |
|
"logits/rejected": -1.5463558435440063, |
|
"logps/chosen": -1105.938720703125, |
|
"logps/rejected": -1762.3658447265625, |
|
"loss": 0.0288, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -8.043752670288086, |
|
"rewards/margins": 7.3192243576049805, |
|
"rewards/margins_max": 9.873791694641113, |
|
"rewards/margins_min": 4.222177028656006, |
|
"rewards/margins_std": 2.4955527782440186, |
|
"rewards/rejected": -15.36297607421875, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 0.37053192172842564, |
|
"learning_rate": 3.3605396115826695e-10, |
|
"logits/chosen": -1.6333061456680298, |
|
"logits/rejected": -1.5385651588439941, |
|
"logps/chosen": -1083.2177734375, |
|
"logps/rejected": -1845.8958740234375, |
|
"loss": 0.004, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.038263320922852, |
|
"rewards/margins": 7.885945796966553, |
|
"rewards/margins_max": 9.852472305297852, |
|
"rewards/margins_min": 5.348562717437744, |
|
"rewards/margins_std": 1.9733645915985107, |
|
"rewards/rejected": -15.924209594726562, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 1065, |
|
"total_flos": 0.0, |
|
"train_loss": 0.14573693349257882, |
|
"train_runtime": 13238.8899, |
|
"train_samples_per_second": 1.287, |
|
"train_steps_per_second": 0.08 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1065, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|