|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 684, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"dpo_losses": 0.6931471824645996, |
|
"epoch": 0.0, |
|
"grad_norm": 2.025402631880394, |
|
"learning_rate": 7.246376811594204e-08, |
|
"logits/chosen": -2.961127519607544, |
|
"logits/rejected": -2.9461119174957275, |
|
"logps/chosen": -261.90582275390625, |
|
"logps/rejected": -270.03265380859375, |
|
"loss": 0.6931, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"dpo_losses": 0.6932222843170166, |
|
"epoch": 0.01, |
|
"grad_norm": 1.892116368261662, |
|
"learning_rate": 7.246376811594204e-07, |
|
"logits/chosen": -2.875087022781372, |
|
"logits/rejected": -2.855910062789917, |
|
"logps/chosen": -217.50634765625, |
|
"logps/rejected": -222.0803985595703, |
|
"loss": 0.6974, |
|
"positive_losses": 0.04892720282077789, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.00041001950739882886, |
|
"rewards/margins": -0.0001489536080043763, |
|
"rewards/margins_max": 0.0012003988958895206, |
|
"rewards/margins_min": -0.0014983059372752905, |
|
"rewards/margins_std": 0.0019082725048065186, |
|
"rewards/rejected": 0.0005589731154032052, |
|
"step": 10 |
|
}, |
|
{ |
|
"dpo_losses": 0.6928491592407227, |
|
"epoch": 0.03, |
|
"grad_norm": 10.889312446124245, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"logits/chosen": -2.855677366256714, |
|
"logits/rejected": -2.8727664947509766, |
|
"logps/chosen": -228.65463256835938, |
|
"logps/rejected": -176.28146362304688, |
|
"loss": 0.695, |
|
"positive_losses": 0.026834487915039062, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0038786642253398895, |
|
"rewards/margins": 0.0005991062498651445, |
|
"rewards/margins_max": 0.0024178135208785534, |
|
"rewards/margins_min": -0.001219600671902299, |
|
"rewards/margins_std": 0.0025720400735735893, |
|
"rewards/rejected": 0.0032795581500977278, |
|
"step": 20 |
|
}, |
|
{ |
|
"dpo_losses": 0.691431999206543, |
|
"epoch": 0.04, |
|
"grad_norm": 2.0735563380689697, |
|
"learning_rate": 2.173913043478261e-06, |
|
"logits/chosen": -2.932262420654297, |
|
"logits/rejected": -2.8772940635681152, |
|
"logps/chosen": -258.99334716796875, |
|
"logps/rejected": -237.83096313476562, |
|
"loss": 0.692, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.015487673692405224, |
|
"rewards/margins": 0.0034467983059585094, |
|
"rewards/margins_max": 0.008276171050965786, |
|
"rewards/margins_min": -0.001382575137540698, |
|
"rewards/margins_std": 0.006829765625298023, |
|
"rewards/rejected": 0.012040875852108002, |
|
"step": 30 |
|
}, |
|
{ |
|
"dpo_losses": 0.6876205205917358, |
|
"epoch": 0.06, |
|
"grad_norm": 1.9226295416634294, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"logits/chosen": -2.8300986289978027, |
|
"logits/rejected": -2.7832179069519043, |
|
"logps/chosen": -325.06231689453125, |
|
"logps/rejected": -363.68426513671875, |
|
"loss": 0.6887, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.028436576947569847, |
|
"rewards/margins": 0.011200213804841042, |
|
"rewards/margins_max": 0.022198233753442764, |
|
"rewards/margins_min": 0.00020219237194396555, |
|
"rewards/margins_std": 0.015553551726043224, |
|
"rewards/rejected": 0.017236361280083656, |
|
"step": 40 |
|
}, |
|
{ |
|
"dpo_losses": 0.6896201968193054, |
|
"epoch": 0.07, |
|
"grad_norm": 9.118712466857515, |
|
"learning_rate": 3.6231884057971017e-06, |
|
"logits/chosen": -2.895482301712036, |
|
"logits/rejected": -2.8222224712371826, |
|
"logps/chosen": -247.339111328125, |
|
"logps/rejected": -244.00790405273438, |
|
"loss": 0.6891, |
|
"positive_losses": 0.0010955811012536287, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.03269472345709801, |
|
"rewards/margins": 0.007144673261791468, |
|
"rewards/margins_max": 0.017206599935889244, |
|
"rewards/margins_min": -0.002917253179475665, |
|
"rewards/margins_std": 0.014229713007807732, |
|
"rewards/rejected": 0.025550048798322678, |
|
"step": 50 |
|
}, |
|
{ |
|
"dpo_losses": 0.678604006767273, |
|
"epoch": 0.09, |
|
"grad_norm": 1.6865302230972352, |
|
"learning_rate": 4.347826086956522e-06, |
|
"logits/chosen": -3.02363920211792, |
|
"logits/rejected": -2.9497618675231934, |
|
"logps/chosen": -302.65142822265625, |
|
"logps/rejected": -242.8329620361328, |
|
"loss": 0.6829, |
|
"positive_losses": 0.0033214569557458162, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.06167557090520859, |
|
"rewards/margins": 0.02971900999546051, |
|
"rewards/margins_max": 0.05635114759206772, |
|
"rewards/margins_min": 0.003086873795837164, |
|
"rewards/margins_std": 0.0376635305583477, |
|
"rewards/rejected": 0.03195656090974808, |
|
"step": 60 |
|
}, |
|
{ |
|
"dpo_losses": 0.6720460653305054, |
|
"epoch": 0.1, |
|
"grad_norm": 7.877876119832288, |
|
"learning_rate": 4.999967381905813e-06, |
|
"logits/chosen": -3.0418922901153564, |
|
"logits/rejected": -2.9664931297302246, |
|
"logps/chosen": -266.40692138671875, |
|
"logps/rejected": -203.53610229492188, |
|
"loss": 0.6792, |
|
"positive_losses": 0.09538726508617401, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.05729568004608154, |
|
"rewards/margins": 0.04375966638326645, |
|
"rewards/margins_max": 0.05517454072833061, |
|
"rewards/margins_min": 0.032344792038202286, |
|
"rewards/margins_std": 0.0161430723965168, |
|
"rewards/rejected": 0.013536013662815094, |
|
"step": 70 |
|
}, |
|
{ |
|
"dpo_losses": 0.6691193580627441, |
|
"epoch": 0.12, |
|
"grad_norm": 9.317121958881922, |
|
"learning_rate": 4.9960542403925095e-06, |
|
"logits/chosen": -2.8223726749420166, |
|
"logits/rejected": -2.7410635948181152, |
|
"logps/chosen": -249.7649383544922, |
|
"logps/rejected": -231.88986206054688, |
|
"loss": 0.6716, |
|
"positive_losses": 0.03403167799115181, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.07761463522911072, |
|
"rewards/margins": 0.05084484815597534, |
|
"rewards/margins_max": 0.10426272451877594, |
|
"rewards/margins_min": -0.0025730193592607975, |
|
"rewards/margins_std": 0.07554427534341812, |
|
"rewards/rejected": 0.026769787073135376, |
|
"step": 80 |
|
}, |
|
{ |
|
"dpo_losses": 0.6618136167526245, |
|
"epoch": 0.13, |
|
"grad_norm": 4.628284010129237, |
|
"learning_rate": 4.98562917836165e-06, |
|
"logits/chosen": -2.881012201309204, |
|
"logits/rejected": -2.8446288108825684, |
|
"logps/chosen": -254.67495727539062, |
|
"logps/rejected": -195.2042694091797, |
|
"loss": 0.6694, |
|
"positive_losses": 0.05231323093175888, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.08632297068834305, |
|
"rewards/margins": 0.06475184857845306, |
|
"rewards/margins_max": 0.08643685281276703, |
|
"rewards/margins_min": 0.0430668406188488, |
|
"rewards/margins_std": 0.030667226761579514, |
|
"rewards/rejected": 0.021571118384599686, |
|
"step": 90 |
|
}, |
|
{ |
|
"dpo_losses": 0.6343039274215698, |
|
"epoch": 0.15, |
|
"grad_norm": 12.853627066309556, |
|
"learning_rate": 4.968719393609757e-06, |
|
"logits/chosen": -2.973792552947998, |
|
"logits/rejected": -2.9188525676727295, |
|
"logps/chosen": -364.15399169921875, |
|
"logps/rejected": -228.38418579101562, |
|
"loss": 0.6612, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1419464498758316, |
|
"rewards/margins": 0.12534113228321075, |
|
"rewards/margins_max": 0.19193264842033386, |
|
"rewards/margins_min": 0.05874960869550705, |
|
"rewards/margins_std": 0.09417462348937988, |
|
"rewards/rejected": 0.016605319455266, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_dpo_losses": 0.6805257797241211, |
|
"eval_logits/chosen": -2.8549489974975586, |
|
"eval_logits/rejected": -2.8096561431884766, |
|
"eval_logps/chosen": -277.0838928222656, |
|
"eval_logps/rejected": -253.83172607421875, |
|
"eval_loss": 0.7167356014251709, |
|
"eval_positive_losses": 0.34227606654167175, |
|
"eval_rewards/accuracies": 0.591269850730896, |
|
"eval_rewards/chosen": 0.0813729390501976, |
|
"eval_rewards/margins": 0.027863360941410065, |
|
"eval_rewards/margins_max": 0.13411983847618103, |
|
"eval_rewards/margins_min": -0.06980105489492416, |
|
"eval_rewards/margins_std": 0.0907549038529396, |
|
"eval_rewards/rejected": 0.05350957810878754, |
|
"eval_runtime": 284.067, |
|
"eval_samples_per_second": 7.041, |
|
"eval_steps_per_second": 0.222, |
|
"step": 100 |
|
}, |
|
{ |
|
"dpo_losses": 0.646482527256012, |
|
"epoch": 0.16, |
|
"grad_norm": 7.886066230281033, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -2.90840482711792, |
|
"logits/rejected": -2.870948553085327, |
|
"logps/chosen": -332.84967041015625, |
|
"logps/rejected": -298.0467529296875, |
|
"loss": 0.6608, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.11125056445598602, |
|
"rewards/margins": 0.09774880856275558, |
|
"rewards/margins_max": 0.12077156454324722, |
|
"rewards/margins_min": 0.07472606003284454, |
|
"rewards/margins_std": 0.03255908936262131, |
|
"rewards/rejected": 0.01350175030529499, |
|
"step": 110 |
|
}, |
|
{ |
|
"dpo_losses": 0.6566277742385864, |
|
"epoch": 0.18, |
|
"grad_norm": 1.913254174024933, |
|
"learning_rate": 4.915638921541952e-06, |
|
"logits/chosen": -2.8616955280303955, |
|
"logits/rejected": -2.8522956371307373, |
|
"logps/chosen": -277.81402587890625, |
|
"logps/rejected": -261.228759765625, |
|
"loss": 0.6473, |
|
"positive_losses": 0.04311790317296982, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0988793820142746, |
|
"rewards/margins": 0.07747145742177963, |
|
"rewards/margins_max": 0.14120900630950928, |
|
"rewards/margins_min": 0.013733914121985435, |
|
"rewards/margins_std": 0.0901385098695755, |
|
"rewards/rejected": 0.02140791341662407, |
|
"step": 120 |
|
}, |
|
{ |
|
"dpo_losses": 0.6377557516098022, |
|
"epoch": 0.19, |
|
"grad_norm": 12.904038948771282, |
|
"learning_rate": 4.879606715117019e-06, |
|
"logits/chosen": -2.95271897315979, |
|
"logits/rejected": -2.8435444831848145, |
|
"logps/chosen": -294.14813232421875, |
|
"logps/rejected": -240.53207397460938, |
|
"loss": 0.6576, |
|
"positive_losses": 0.14576569199562073, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.12051650136709213, |
|
"rewards/margins": 0.12185319513082504, |
|
"rewards/margins_max": 0.16484162211418152, |
|
"rewards/margins_min": 0.07886476814746857, |
|
"rewards/margins_std": 0.06079481169581413, |
|
"rewards/rejected": -0.0013366841012611985, |
|
"step": 130 |
|
}, |
|
{ |
|
"dpo_losses": 0.640169620513916, |
|
"epoch": 0.2, |
|
"grad_norm": 16.18813154837036, |
|
"learning_rate": 4.837366386472175e-06, |
|
"logits/chosen": -3.0178513526916504, |
|
"logits/rejected": -2.9269156455993652, |
|
"logps/chosen": -279.7732238769531, |
|
"logps/rejected": -236.86874389648438, |
|
"loss": 0.6767, |
|
"positive_losses": 0.29906386137008667, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.11848002672195435, |
|
"rewards/margins": 0.11471493542194366, |
|
"rewards/margins_max": 0.2023201882839203, |
|
"rewards/margins_min": 0.02710966393351555, |
|
"rewards/margins_std": 0.12389256060123444, |
|
"rewards/rejected": 0.0037650964222848415, |
|
"step": 140 |
|
}, |
|
{ |
|
"dpo_losses": 0.6285568475723267, |
|
"epoch": 0.22, |
|
"grad_norm": 6.5324825602261045, |
|
"learning_rate": 4.789028135801919e-06, |
|
"logits/chosen": -2.973548650741577, |
|
"logits/rejected": -2.9277901649475098, |
|
"logps/chosen": -281.6429443359375, |
|
"logps/rejected": -295.3494873046875, |
|
"loss": 0.8701, |
|
"positive_losses": 0.2579152584075928, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.16488775610923767, |
|
"rewards/margins": 0.14175625145435333, |
|
"rewards/margins_max": 0.25869402289390564, |
|
"rewards/margins_min": 0.02481846883893013, |
|
"rewards/margins_std": 0.1653749793767929, |
|
"rewards/rejected": 0.023131517693400383, |
|
"step": 150 |
|
}, |
|
{ |
|
"dpo_losses": 0.6205035448074341, |
|
"epoch": 0.23, |
|
"grad_norm": 2.7066336555848176, |
|
"learning_rate": 4.7347180720830635e-06, |
|
"logits/chosen": -2.9675240516662598, |
|
"logits/rejected": -2.804438829421997, |
|
"logps/chosen": -316.6290283203125, |
|
"logps/rejected": -282.70538330078125, |
|
"loss": 0.6598, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1802542507648468, |
|
"rewards/margins": 0.15980175137519836, |
|
"rewards/margins_max": 0.272554486989975, |
|
"rewards/margins_min": 0.047049008309841156, |
|
"rewards/margins_std": 0.15945644676685333, |
|
"rewards/rejected": 0.02045249193906784, |
|
"step": 160 |
|
}, |
|
{ |
|
"dpo_losses": 0.6271503567695618, |
|
"epoch": 0.25, |
|
"grad_norm": 1.8801745537623247, |
|
"learning_rate": 4.674577884070811e-06, |
|
"logits/chosen": -2.929814577102661, |
|
"logits/rejected": -2.8838062286376953, |
|
"logps/chosen": -308.89947509765625, |
|
"logps/rejected": -252.41775512695312, |
|
"loss": 0.6279, |
|
"positive_losses": 0.5797370672225952, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.1764679253101349, |
|
"rewards/margins": 0.14800240099430084, |
|
"rewards/margins_max": 0.237053781747818, |
|
"rewards/margins_min": 0.058951012790203094, |
|
"rewards/margins_std": 0.12593765556812286, |
|
"rewards/rejected": 0.02846553362905979, |
|
"step": 170 |
|
}, |
|
{ |
|
"dpo_losses": 0.6267830729484558, |
|
"epoch": 0.26, |
|
"grad_norm": 2.1466440934520645, |
|
"learning_rate": 4.608764470648971e-06, |
|
"logits/chosen": -2.9405906200408936, |
|
"logits/rejected": -2.8729405403137207, |
|
"logps/chosen": -290.3728942871094, |
|
"logps/rejected": -330.1247863769531, |
|
"loss": 0.6545, |
|
"positive_losses": 0.3962584435939789, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.17637397348880768, |
|
"rewards/margins": 0.14918141067028046, |
|
"rewards/margins_max": 0.28907179832458496, |
|
"rewards/margins_min": 0.00929100438952446, |
|
"rewards/margins_std": 0.19783492386341095, |
|
"rewards/rejected": 0.027192572131752968, |
|
"step": 180 |
|
}, |
|
{ |
|
"dpo_losses": 0.5917172431945801, |
|
"epoch": 0.28, |
|
"grad_norm": 2.287767271676791, |
|
"learning_rate": 4.5374495314986874e-06, |
|
"logits/chosen": -2.7434186935424805, |
|
"logits/rejected": -2.7498998641967773, |
|
"logps/chosen": -312.6622619628906, |
|
"logps/rejected": -238.92544555664062, |
|
"loss": 0.6419, |
|
"positive_losses": 0.6725692749023438, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.21900752186775208, |
|
"rewards/margins": 0.22816917300224304, |
|
"rewards/margins_max": 0.34974128007888794, |
|
"rewards/margins_min": 0.10659710317850113, |
|
"rewards/margins_std": 0.17192888259887695, |
|
"rewards/rejected": -0.00916165579110384, |
|
"step": 190 |
|
}, |
|
{ |
|
"dpo_losses": 0.6394025087356567, |
|
"epoch": 0.29, |
|
"grad_norm": 10.012997763234694, |
|
"learning_rate": 4.460819119153574e-06, |
|
"logits/chosen": -2.8694872856140137, |
|
"logits/rejected": -2.831519603729248, |
|
"logps/chosen": -254.72994995117188, |
|
"logps/rejected": -285.6749572753906, |
|
"loss": 0.6652, |
|
"positive_losses": 0.53265380859375, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1096222847700119, |
|
"rewards/margins": 0.11550626903772354, |
|
"rewards/margins_max": 0.21246998012065887, |
|
"rewards/margins_min": 0.018542537465691566, |
|
"rewards/margins_std": 0.13712741434574127, |
|
"rewards/rejected": -0.005883966572582722, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_dpo_losses": 0.6683889627456665, |
|
"eval_logits/chosen": -2.804701566696167, |
|
"eval_logits/rejected": -2.761190176010132, |
|
"eval_logps/chosen": -270.78045654296875, |
|
"eval_logps/rejected": -250.50535583496094, |
|
"eval_loss": 0.7424377202987671, |
|
"eval_positive_losses": 0.6157510280609131, |
|
"eval_rewards/accuracies": 0.6071428656578064, |
|
"eval_rewards/chosen": 0.14440776407718658, |
|
"eval_rewards/margins": 0.05763502046465874, |
|
"eval_rewards/margins_max": 0.23871682584285736, |
|
"eval_rewards/margins_min": -0.10859239846467972, |
|
"eval_rewards/margins_std": 0.15644660592079163, |
|
"eval_rewards/rejected": 0.08677274733781815, |
|
"eval_runtime": 283.023, |
|
"eval_samples_per_second": 7.067, |
|
"eval_steps_per_second": 0.223, |
|
"step": 200 |
|
}, |
|
{ |
|
"dpo_losses": 0.6189439296722412, |
|
"epoch": 0.31, |
|
"grad_norm": 6.2964509201178664, |
|
"learning_rate": 4.379073153609896e-06, |
|
"logits/chosen": -2.980807304382324, |
|
"logits/rejected": -2.9285852909088135, |
|
"logps/chosen": -310.4910583496094, |
|
"logps/rejected": -283.2040100097656, |
|
"loss": 0.66, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.17465968430042267, |
|
"rewards/margins": 0.16720545291900635, |
|
"rewards/margins_max": 0.2678540349006653, |
|
"rewards/margins_min": 0.0665569081902504, |
|
"rewards/margins_std": 0.14233854413032532, |
|
"rewards/rejected": 0.007454232778400183, |
|
"step": 210 |
|
}, |
|
{ |
|
"dpo_losses": 0.6231792569160461, |
|
"epoch": 0.32, |
|
"grad_norm": 1.9190071231894708, |
|
"learning_rate": 4.292424900758129e-06, |
|
"logits/chosen": -2.8317534923553467, |
|
"logits/rejected": -2.745687961578369, |
|
"logps/chosen": -241.54098510742188, |
|
"logps/rejected": -261.16180419921875, |
|
"loss": 0.6416, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.17161966860294342, |
|
"rewards/margins": 0.15514414012432098, |
|
"rewards/margins_max": 0.2861190140247345, |
|
"rewards/margins_min": 0.024169281125068665, |
|
"rewards/margins_std": 0.1852264106273651, |
|
"rewards/rejected": 0.016475532203912735, |
|
"step": 220 |
|
}, |
|
{ |
|
"dpo_losses": 0.6433408260345459, |
|
"epoch": 0.34, |
|
"grad_norm": 2.0868151243656565, |
|
"learning_rate": 4.201100415996598e-06, |
|
"logits/chosen": -2.727468490600586, |
|
"logits/rejected": -2.6615824699401855, |
|
"logps/chosen": -234.8723602294922, |
|
"logps/rejected": -252.6638641357422, |
|
"loss": 0.6425, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1352681964635849, |
|
"rewards/margins": 0.10586099326610565, |
|
"rewards/margins_max": 0.18672436475753784, |
|
"rewards/margins_min": 0.024997618049383163, |
|
"rewards/margins_std": 0.11435806751251221, |
|
"rewards/rejected": 0.029407206922769547, |
|
"step": 230 |
|
}, |
|
{ |
|
"dpo_losses": 0.5827513933181763, |
|
"epoch": 0.35, |
|
"grad_norm": 8.16859688297942, |
|
"learning_rate": 4.105337954478756e-06, |
|
"logits/chosen": -2.9261674880981445, |
|
"logits/rejected": -2.7930915355682373, |
|
"logps/chosen": -369.94781494140625, |
|
"logps/rejected": -238.9222869873047, |
|
"loss": 0.6532, |
|
"positive_losses": 0.4402889311313629, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.21584495902061462, |
|
"rewards/margins": 0.2518808841705322, |
|
"rewards/margins_max": 0.37274229526519775, |
|
"rewards/margins_min": 0.1310194730758667, |
|
"rewards/margins_std": 0.1709238588809967, |
|
"rewards/rejected": -0.0360359326004982, |
|
"step": 240 |
|
}, |
|
{ |
|
"dpo_losses": 0.6088570356369019, |
|
"epoch": 0.37, |
|
"grad_norm": 7.483775073000372, |
|
"learning_rate": 4.005387349532697e-06, |
|
"logits/chosen": -2.9306282997131348, |
|
"logits/rejected": -2.87394642829895, |
|
"logps/chosen": -293.2420654296875, |
|
"logps/rejected": -272.403076171875, |
|
"loss": 0.6241, |
|
"positive_losses": 0.6371370553970337, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.14793848991394043, |
|
"rewards/margins": 0.18766427040100098, |
|
"rewards/margins_max": 0.2927020192146301, |
|
"rewards/margins_min": 0.08262647688388824, |
|
"rewards/margins_std": 0.14854584634304047, |
|
"rewards/rejected": -0.039725758135318756, |
|
"step": 250 |
|
}, |
|
{ |
|
"dpo_losses": 0.630215048789978, |
|
"epoch": 0.38, |
|
"grad_norm": 13.905695323383071, |
|
"learning_rate": 3.901509360874515e-06, |
|
"logits/chosen": -2.832815170288086, |
|
"logits/rejected": -2.8140697479248047, |
|
"logps/chosen": -197.61141967773438, |
|
"logps/rejected": -194.48846435546875, |
|
"loss": 0.6319, |
|
"positive_losses": 0.17327670753002167, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.15451067686080933, |
|
"rewards/margins": 0.14662909507751465, |
|
"rewards/margins_max": 0.2917208671569824, |
|
"rewards/margins_min": 0.0015373497735708952, |
|
"rewards/margins_std": 0.2051907330751419, |
|
"rewards/rejected": 0.00788155198097229, |
|
"step": 260 |
|
}, |
|
{ |
|
"dpo_losses": 0.6374494433403015, |
|
"epoch": 0.39, |
|
"grad_norm": 10.795361999407069, |
|
"learning_rate": 3.793974994315991e-06, |
|
"logits/chosen": -2.788649797439575, |
|
"logits/rejected": -2.7856884002685547, |
|
"logps/chosen": -167.8430938720703, |
|
"logps/rejected": -188.2590789794922, |
|
"loss": 0.6446, |
|
"positive_losses": 0.739077091217041, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.13068287074565887, |
|
"rewards/margins": 0.11988089978694916, |
|
"rewards/margins_max": 0.2015564739704132, |
|
"rewards/margins_min": 0.0382053516805172, |
|
"rewards/margins_std": 0.11550667136907578, |
|
"rewards/rejected": 0.010801966302096844, |
|
"step": 270 |
|
}, |
|
{ |
|
"dpo_losses": 0.601833701133728, |
|
"epoch": 0.41, |
|
"grad_norm": 1.9286233539251623, |
|
"learning_rate": 3.68306479474137e-06, |
|
"logits/chosen": -3.023601531982422, |
|
"logits/rejected": -2.880030632019043, |
|
"logps/chosen": -352.0651550292969, |
|
"logps/rejected": -203.5298309326172, |
|
"loss": 0.6428, |
|
"positive_losses": 0.44579315185546875, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.21998050808906555, |
|
"rewards/margins": 0.2220487892627716, |
|
"rewards/margins_max": 0.3289792239665985, |
|
"rewards/margins_min": 0.11511830985546112, |
|
"rewards/margins_std": 0.15122249722480774, |
|
"rewards/rejected": -0.002068266272544861, |
|
"step": 280 |
|
}, |
|
{ |
|
"dpo_losses": 0.6319935321807861, |
|
"epoch": 0.42, |
|
"grad_norm": 2.944032588280805, |
|
"learning_rate": 3.569068114197784e-06, |
|
"logits/chosen": -2.928798198699951, |
|
"logits/rejected": -2.8589279651641846, |
|
"logps/chosen": -206.49636840820312, |
|
"logps/rejected": -176.32420349121094, |
|
"loss": 0.6206, |
|
"positive_losses": 0.1757659912109375, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.161897212266922, |
|
"rewards/margins": 0.13675031065940857, |
|
"rewards/margins_max": 0.2698245942592621, |
|
"rewards/margins_min": 0.003676059888675809, |
|
"rewards/margins_std": 0.18819543719291687, |
|
"rewards/rejected": 0.02514689229428768, |
|
"step": 290 |
|
}, |
|
{ |
|
"dpo_losses": 0.5872830748558044, |
|
"epoch": 0.44, |
|
"grad_norm": 8.216053799669432, |
|
"learning_rate": 3.4522823570088073e-06, |
|
"logits/chosen": -2.855262279510498, |
|
"logits/rejected": -2.8310704231262207, |
|
"logps/chosen": -240.1536407470703, |
|
"logps/rejected": -225.31997680664062, |
|
"loss": 0.6493, |
|
"positive_losses": 0.42629069089889526, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.14640358090400696, |
|
"rewards/margins": 0.24259230494499207, |
|
"rewards/margins_max": 0.3872792422771454, |
|
"rewards/margins_min": 0.09790538251399994, |
|
"rewards/margins_std": 0.20461821556091309, |
|
"rewards/rejected": -0.0961887389421463, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_dpo_losses": 0.6609499454498291, |
|
"eval_logits/chosen": -2.8105618953704834, |
|
"eval_logits/rejected": -2.7656409740448, |
|
"eval_logps/chosen": -268.21075439453125, |
|
"eval_logps/rejected": -249.68173217773438, |
|
"eval_loss": 0.7585510015487671, |
|
"eval_positive_losses": 0.735747218132019, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": 0.17010442912578583, |
|
"eval_rewards/margins": 0.0750950425863266, |
|
"eval_rewards/margins_max": 0.2765759229660034, |
|
"eval_rewards/margins_min": -0.10647378861904144, |
|
"eval_rewards/margins_std": 0.17135697603225708, |
|
"eval_rewards/rejected": 0.09500937908887863, |
|
"eval_runtime": 283.2506, |
|
"eval_samples_per_second": 7.061, |
|
"eval_steps_per_second": 0.222, |
|
"step": 300 |
|
}, |
|
{ |
|
"dpo_losses": 0.6002415418624878, |
|
"epoch": 0.45, |
|
"grad_norm": 15.987450446439036, |
|
"learning_rate": 3.333012203880528e-06, |
|
"logits/chosen": -2.9334702491760254, |
|
"logits/rejected": -2.8857316970825195, |
|
"logps/chosen": -221.07736206054688, |
|
"logps/rejected": -165.5961456298828, |
|
"loss": 0.6259, |
|
"positive_losses": 0.29423028230667114, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.20694272220134735, |
|
"rewards/margins": 0.22076299786567688, |
|
"rewards/margins_max": 0.40377649664878845, |
|
"rewards/margins_min": 0.03774946928024292, |
|
"rewards/margins_std": 0.25882020592689514, |
|
"rewards/rejected": -0.013820228166878223, |
|
"step": 310 |
|
}, |
|
{ |
|
"dpo_losses": 0.5651403069496155, |
|
"epoch": 0.47, |
|
"grad_norm": 41.49020086210869, |
|
"learning_rate": 3.2115688170243735e-06, |
|
"logits/chosen": -2.916748523712158, |
|
"logits/rejected": -2.9150567054748535, |
|
"logps/chosen": -294.4731750488281, |
|
"logps/rejected": -308.61346435546875, |
|
"loss": 0.6257, |
|
"positive_losses": 0.09651489555835724, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.2138613760471344, |
|
"rewards/margins": 0.30642035603523254, |
|
"rewards/margins_max": 0.4777792990207672, |
|
"rewards/margins_min": 0.13506139814853668, |
|
"rewards/margins_std": 0.24233810603618622, |
|
"rewards/rejected": -0.09255897253751755, |
|
"step": 320 |
|
}, |
|
{ |
|
"dpo_losses": 0.6225263476371765, |
|
"epoch": 0.48, |
|
"grad_norm": 10.387168274872021, |
|
"learning_rate": 3.0882690283704355e-06, |
|
"logits/chosen": -2.800654649734497, |
|
"logits/rejected": -2.744640588760376, |
|
"logps/chosen": -230.0958709716797, |
|
"logps/rejected": -211.9070587158203, |
|
"loss": 0.6505, |
|
"positive_losses": 0.48747172951698303, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.16328363120555878, |
|
"rewards/margins": 0.16463473439216614, |
|
"rewards/margins_max": 0.3351263403892517, |
|
"rewards/margins_min": -0.005856870673596859, |
|
"rewards/margins_std": 0.24111154675483704, |
|
"rewards/rejected": -0.001351101673208177, |
|
"step": 330 |
|
}, |
|
{ |
|
"dpo_losses": 0.5763748288154602, |
|
"epoch": 0.5, |
|
"grad_norm": 11.15788842573243, |
|
"learning_rate": 2.9634345129891296e-06, |
|
"logits/chosen": -2.82387638092041, |
|
"logits/rejected": -2.72804594039917, |
|
"logps/chosen": -288.6628112792969, |
|
"logps/rejected": -272.2504577636719, |
|
"loss": 0.6348, |
|
"positive_losses": 0.106109619140625, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21110180020332336, |
|
"rewards/margins": 0.2855163514614105, |
|
"rewards/margins_max": 0.5410599708557129, |
|
"rewards/margins_min": 0.029972758144140244, |
|
"rewards/margins_std": 0.36139318346977234, |
|
"rewards/rejected": -0.07441455870866776, |
|
"step": 340 |
|
}, |
|
{ |
|
"dpo_losses": 0.6027976870536804, |
|
"epoch": 0.51, |
|
"grad_norm": 15.405775577444263, |
|
"learning_rate": 2.8373909498776746e-06, |
|
"logits/chosen": -2.9628233909606934, |
|
"logits/rejected": -2.954535961151123, |
|
"logps/chosen": -264.20440673828125, |
|
"logps/rejected": -265.29058837890625, |
|
"loss": 0.6444, |
|
"positive_losses": 0.14543990790843964, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.18934306502342224, |
|
"rewards/margins": 0.20881037414073944, |
|
"rewards/margins_max": 0.3755717873573303, |
|
"rewards/margins_min": 0.04204897955060005, |
|
"rewards/margins_std": 0.2358362227678299, |
|
"rewards/rejected": -0.019467316567897797, |
|
"step": 350 |
|
}, |
|
{ |
|
"dpo_losses": 0.5799789428710938, |
|
"epoch": 0.53, |
|
"grad_norm": 2.0183316768741295, |
|
"learning_rate": 2.710467172300768e-06, |
|
"logits/chosen": -2.8956894874572754, |
|
"logits/rejected": -2.8347909450531006, |
|
"logps/chosen": -329.68585205078125, |
|
"logps/rejected": -321.13037109375, |
|
"loss": 0.6196, |
|
"positive_losses": 0.2545227110385895, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.23316507041454315, |
|
"rewards/margins": 0.2625313103199005, |
|
"rewards/margins_max": 0.398255318403244, |
|
"rewards/margins_min": 0.12680727243423462, |
|
"rewards/margins_std": 0.19194276630878448, |
|
"rewards/rejected": -0.029366234317421913, |
|
"step": 360 |
|
}, |
|
{ |
|
"dpo_losses": 0.5937001705169678, |
|
"epoch": 0.54, |
|
"grad_norm": 12.06051129169852, |
|
"learning_rate": 2.582994309902146e-06, |
|
"logits/chosen": -2.884967565536499, |
|
"logits/rejected": -2.743847370147705, |
|
"logps/chosen": -285.45123291015625, |
|
"logps/rejected": -247.1053466796875, |
|
"loss": 0.6401, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.1828571856021881, |
|
"rewards/margins": 0.22877776622772217, |
|
"rewards/margins_max": 0.3255845904350281, |
|
"rewards/margins_min": 0.13197092711925507, |
|
"rewards/margins_std": 0.13690553605556488, |
|
"rewards/rejected": -0.04592058062553406, |
|
"step": 370 |
|
}, |
|
{ |
|
"dpo_losses": 0.595887303352356, |
|
"epoch": 0.56, |
|
"grad_norm": 3.3756204670545618, |
|
"learning_rate": 2.4553049248251512e-06, |
|
"logits/chosen": -2.788328170776367, |
|
"logits/rejected": -2.836977958679199, |
|
"logps/chosen": -214.0673065185547, |
|
"logps/rejected": -235.258544921875, |
|
"loss": 0.5859, |
|
"positive_losses": 0.17899170517921448, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.1966702789068222, |
|
"rewards/margins": 0.2272748053073883, |
|
"rewards/margins_max": 0.33700865507125854, |
|
"rewards/margins_min": 0.11754089593887329, |
|
"rewards/margins_std": 0.15518715977668762, |
|
"rewards/rejected": -0.03060450591146946, |
|
"step": 380 |
|
}, |
|
{ |
|
"dpo_losses": 0.6092433333396912, |
|
"epoch": 0.57, |
|
"grad_norm": 2.5893672084668107, |
|
"learning_rate": 2.3277321440960733e-06, |
|
"logits/chosen": -2.9706382751464844, |
|
"logits/rejected": -2.9543769359588623, |
|
"logps/chosen": -256.9505615234375, |
|
"logps/rejected": -262.0420227050781, |
|
"loss": 0.6297, |
|
"positive_losses": 0.7152191400527954, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.16275997459888458, |
|
"rewards/margins": 0.20270125567913055, |
|
"rewards/margins_max": 0.401836633682251, |
|
"rewards/margins_min": 0.0035658925771713257, |
|
"rewards/margins_std": 0.28161993622779846, |
|
"rewards/rejected": -0.039941295981407166, |
|
"step": 390 |
|
}, |
|
{ |
|
"dpo_losses": 0.5526755452156067, |
|
"epoch": 0.58, |
|
"grad_norm": 21.63358202743805, |
|
"learning_rate": 2.20060879053377e-06, |
|
"logits/chosen": -2.8260955810546875, |
|
"logits/rejected": -2.7942147254943848, |
|
"logps/chosen": -184.72544860839844, |
|
"logps/rejected": -232.3735809326172, |
|
"loss": 0.6224, |
|
"positive_losses": 0.5248996019363403, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.16737225651741028, |
|
"rewards/margins": 0.3254551887512207, |
|
"rewards/margins_max": 0.506883442401886, |
|
"rewards/margins_min": 0.14402692019939423, |
|
"rewards/margins_std": 0.256578266620636, |
|
"rewards/rejected": -0.15808293223381042, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_dpo_losses": 0.6528598070144653, |
|
"eval_logits/chosen": -2.819880723953247, |
|
"eval_logits/rejected": -2.7766764163970947, |
|
"eval_logps/chosen": -274.035888671875, |
|
"eval_logps/rejected": -258.0920715332031, |
|
"eval_loss": 0.9942816495895386, |
|
"eval_positive_losses": 3.3746726512908936, |
|
"eval_rewards/accuracies": 0.6388888955116272, |
|
"eval_rewards/chosen": 0.11185282468795776, |
|
"eval_rewards/margins": 0.10094699263572693, |
|
"eval_rewards/margins_max": 0.38355645537376404, |
|
"eval_rewards/margins_min": -0.1620650738477707, |
|
"eval_rewards/margins_std": 0.24338804185390472, |
|
"eval_rewards/rejected": 0.01090583112090826, |
|
"eval_runtime": 283.2899, |
|
"eval_samples_per_second": 7.06, |
|
"eval_steps_per_second": 0.222, |
|
"step": 400 |
|
}, |
|
{ |
|
"dpo_losses": 0.531283438205719, |
|
"epoch": 0.6, |
|
"grad_norm": 11.192408672928963, |
|
"learning_rate": 2.0742665144529374e-06, |
|
"logits/chosen": -2.8772964477539062, |
|
"logits/rejected": -2.7996630668640137, |
|
"logps/chosen": -310.952392578125, |
|
"logps/rejected": -263.61370849609375, |
|
"loss": 0.6614, |
|
"positive_losses": 0.6912002563476562, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.3101710379123688, |
|
"rewards/margins": 0.4205913543701172, |
|
"rewards/margins_max": 0.6372691988945007, |
|
"rewards/margins_min": 0.20391342043876648, |
|
"rewards/margins_std": 0.30642884969711304, |
|
"rewards/rejected": -0.11042030900716782, |
|
"step": 410 |
|
}, |
|
{ |
|
"dpo_losses": 0.5354982614517212, |
|
"epoch": 0.61, |
|
"grad_norm": 15.45856376263706, |
|
"learning_rate": 1.9490349284263036e-06, |
|
"logits/chosen": -2.8157570362091064, |
|
"logits/rejected": -2.758150577545166, |
|
"logps/chosen": -286.8282775878906, |
|
"logps/rejected": -253.0894775390625, |
|
"loss": 0.6483, |
|
"positive_losses": 0.7530021667480469, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.2359578162431717, |
|
"rewards/margins": 0.3807125985622406, |
|
"rewards/margins_max": 0.5745470523834229, |
|
"rewards/margins_min": 0.18687808513641357, |
|
"rewards/margins_std": 0.27412334084510803, |
|
"rewards/rejected": -0.14475473761558533, |
|
"step": 420 |
|
}, |
|
{ |
|
"dpo_losses": 0.5478672385215759, |
|
"epoch": 0.63, |
|
"grad_norm": 9.397460886758937, |
|
"learning_rate": 1.8252407473630606e-06, |
|
"logits/chosen": -2.981616735458374, |
|
"logits/rejected": -2.9860920906066895, |
|
"logps/chosen": -268.0840148925781, |
|
"logps/rejected": -284.7814025878906, |
|
"loss": 0.6187, |
|
"positive_losses": 0.08653469383716583, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.19860908389091492, |
|
"rewards/margins": 0.35602104663848877, |
|
"rewards/margins_max": 0.5557164549827576, |
|
"rewards/margins_min": 0.15632562339305878, |
|
"rewards/margins_std": 0.28241199254989624, |
|
"rewards/rejected": -0.15741200745105743, |
|
"step": 430 |
|
}, |
|
{ |
|
"dpo_losses": 0.5760594010353088, |
|
"epoch": 0.64, |
|
"grad_norm": 23.342372051867663, |
|
"learning_rate": 1.7032069361469765e-06, |
|
"logits/chosen": -2.775434732437134, |
|
"logits/rejected": -2.77290678024292, |
|
"logps/chosen": -213.5185089111328, |
|
"logps/rejected": -278.1703186035156, |
|
"loss": 0.6391, |
|
"positive_losses": 0.018738174811005592, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.19827936589717865, |
|
"rewards/margins": 0.2895718812942505, |
|
"rewards/margins_max": 0.5261528491973877, |
|
"rewards/margins_min": 0.05299092084169388, |
|
"rewards/margins_std": 0.3345760405063629, |
|
"rewards/rejected": -0.09129253774881363, |
|
"step": 440 |
|
}, |
|
{ |
|
"dpo_losses": 0.5661223530769348, |
|
"epoch": 0.66, |
|
"grad_norm": 2.1591786937116098, |
|
"learning_rate": 1.5832518670578802e-06, |
|
"logits/chosen": -3.0003552436828613, |
|
"logits/rejected": -2.9453907012939453, |
|
"logps/chosen": -272.0791015625, |
|
"logps/rejected": -293.05645751953125, |
|
"loss": 0.6369, |
|
"positive_losses": 0.25814515352249146, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.2058447152376175, |
|
"rewards/margins": 0.2981029152870178, |
|
"rewards/margins_max": 0.49641966819763184, |
|
"rewards/margins_min": 0.099786177277565, |
|
"rewards/margins_std": 0.28046220541000366, |
|
"rewards/rejected": -0.09225818514823914, |
|
"step": 450 |
|
}, |
|
{ |
|
"dpo_losses": 0.5694680213928223, |
|
"epoch": 0.67, |
|
"grad_norm": 2.6035617787075602, |
|
"learning_rate": 1.4656884891747398e-06, |
|
"logits/chosen": -2.813758373260498, |
|
"logits/rejected": -2.838399887084961, |
|
"logps/chosen": -266.3056945800781, |
|
"logps/rejected": -263.6679382324219, |
|
"loss": 0.6275, |
|
"positive_losses": 0.17335128784179688, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.19134476780891418, |
|
"rewards/margins": 0.3021687865257263, |
|
"rewards/margins_max": 0.5000616312026978, |
|
"rewards/margins_min": 0.10427598655223846, |
|
"rewards/margins_std": 0.2798627018928528, |
|
"rewards/rejected": -0.11082406342029572, |
|
"step": 460 |
|
}, |
|
{ |
|
"dpo_losses": 0.6458374261856079, |
|
"epoch": 0.69, |
|
"grad_norm": 2.5506219741141543, |
|
"learning_rate": 1.3508235119272466e-06, |
|
"logits/chosen": -2.8581721782684326, |
|
"logits/rejected": -2.8341779708862305, |
|
"logps/chosen": -254.4308319091797, |
|
"logps/rejected": -253.99722290039062, |
|
"loss": 0.6175, |
|
"positive_losses": 2.3088302612304688, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.11066363006830215, |
|
"rewards/margins": 0.1266460120677948, |
|
"rewards/margins_max": 0.2916102409362793, |
|
"rewards/margins_min": -0.0383182056248188, |
|
"rewards/margins_std": 0.23329463601112366, |
|
"rewards/rejected": -0.01598239876329899, |
|
"step": 470 |
|
}, |
|
{ |
|
"dpo_losses": 0.5230454206466675, |
|
"epoch": 0.7, |
|
"grad_norm": 24.43956168373256, |
|
"learning_rate": 1.238956604925934e-06, |
|
"logits/chosen": -2.7936887741088867, |
|
"logits/rejected": -2.7815768718719482, |
|
"logps/chosen": -241.1090850830078, |
|
"logps/rejected": -284.8046569824219, |
|
"loss": 0.6176, |
|
"positive_losses": 0.7508819699287415, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.23765699565410614, |
|
"rewards/margins": 0.422058641910553, |
|
"rewards/margins_max": 0.6369160413742065, |
|
"rewards/margins_min": 0.20720121264457703, |
|
"rewards/margins_std": 0.3038543164730072, |
|
"rewards/rejected": -0.18440163135528564, |
|
"step": 480 |
|
}, |
|
{ |
|
"dpo_losses": 0.5921580195426941, |
|
"epoch": 0.72, |
|
"grad_norm": 9.14828905462399, |
|
"learning_rate": 1.1303796161583763e-06, |
|
"logits/chosen": -2.953059673309326, |
|
"logits/rejected": -2.935351848602295, |
|
"logps/chosen": -277.16998291015625, |
|
"logps/rejected": -322.24908447265625, |
|
"loss": 0.5856, |
|
"positive_losses": 0.24810238182544708, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.18392740190029144, |
|
"rewards/margins": 0.252055287361145, |
|
"rewards/margins_max": 0.42164430022239685, |
|
"rewards/margins_min": 0.08246620744466782, |
|
"rewards/margins_std": 0.23983514308929443, |
|
"rewards/rejected": -0.06812787055969238, |
|
"step": 490 |
|
}, |
|
{ |
|
"dpo_losses": 0.5570933222770691, |
|
"epoch": 0.73, |
|
"grad_norm": 26.824091044275153, |
|
"learning_rate": 1.0253758105911169e-06, |
|
"logits/chosen": -2.970041275024414, |
|
"logits/rejected": -2.90769362449646, |
|
"logps/chosen": -304.5309143066406, |
|
"logps/rejected": -356.88641357421875, |
|
"loss": 0.5674, |
|
"positive_losses": 0.19576720893383026, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.23450036346912384, |
|
"rewards/margins": 0.35680580139160156, |
|
"rewards/margins_max": 0.5582734942436218, |
|
"rewards/margins_min": 0.15533806383609772, |
|
"rewards/margins_std": 0.2849184274673462, |
|
"rewards/rejected": -0.12230543792247772, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_dpo_losses": 0.6564862728118896, |
|
"eval_logits/chosen": -2.8376405239105225, |
|
"eval_logits/rejected": -2.793374538421631, |
|
"eval_logps/chosen": -278.80975341796875, |
|
"eval_logps/rejected": -262.5242004394531, |
|
"eval_loss": 1.1831417083740234, |
|
"eval_positive_losses": 5.736485481262207, |
|
"eval_rewards/accuracies": 0.6269841194152832, |
|
"eval_rewards/chosen": 0.06411468237638474, |
|
"eval_rewards/margins": 0.09753014147281647, |
|
"eval_rewards/margins_max": 0.41429367661476135, |
|
"eval_rewards/margins_min": -0.18840637803077698, |
|
"eval_rewards/margins_std": 0.27024951577186584, |
|
"eval_rewards/rejected": -0.033415455371141434, |
|
"eval_runtime": 283.1365, |
|
"eval_samples_per_second": 7.064, |
|
"eval_steps_per_second": 0.223, |
|
"step": 500 |
|
}, |
|
{ |
|
"dpo_losses": 0.5341383218765259, |
|
"epoch": 0.75, |
|
"grad_norm": 2.630596915267566, |
|
"learning_rate": 9.24219131163705e-07, |
|
"logits/chosen": -2.8331634998321533, |
|
"logits/rejected": -2.8200929164886475, |
|
"logps/chosen": -288.0255126953125, |
|
"logps/rejected": -313.35601806640625, |
|
"loss": 0.6361, |
|
"positive_losses": 1.7603362798690796, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.21840150654315948, |
|
"rewards/margins": 0.4240838885307312, |
|
"rewards/margins_max": 0.7840636968612671, |
|
"rewards/margins_min": 0.06410404294729233, |
|
"rewards/margins_std": 0.509088397026062, |
|
"rewards/rejected": -0.2056823968887329, |
|
"step": 510 |
|
}, |
|
{ |
|
"dpo_losses": 0.49555259943008423, |
|
"epoch": 0.76, |
|
"grad_norm": 13.21991755241966, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -2.72208833694458, |
|
"logits/rejected": -2.6312568187713623, |
|
"logps/chosen": -286.3044128417969, |
|
"logps/rejected": -246.79788208007812, |
|
"loss": 0.6459, |
|
"positive_losses": 0.1013515442609787, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.28137513995170593, |
|
"rewards/margins": 0.4984091818332672, |
|
"rewards/margins_max": 0.7110401391983032, |
|
"rewards/margins_min": 0.28577831387519836, |
|
"rewards/margins_std": 0.3007054626941681, |
|
"rewards/rejected": -0.21703402698040009, |
|
"step": 520 |
|
}, |
|
{ |
|
"dpo_losses": 0.5471224188804626, |
|
"epoch": 0.77, |
|
"grad_norm": 7.226241019031729, |
|
"learning_rate": 7.344920504212244e-07, |
|
"logits/chosen": -2.9117705821990967, |
|
"logits/rejected": -2.8744723796844482, |
|
"logps/chosen": -206.4365997314453, |
|
"logps/rejected": -208.7721710205078, |
|
"loss": 0.6331, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20657558739185333, |
|
"rewards/margins": 0.34465348720550537, |
|
"rewards/margins_max": 0.5002694725990295, |
|
"rewards/margins_min": 0.1890375316143036, |
|
"rewards/margins_std": 0.22007422149181366, |
|
"rewards/rejected": -0.13807791471481323, |
|
"step": 530 |
|
}, |
|
{ |
|
"dpo_losses": 0.5800082087516785, |
|
"epoch": 0.79, |
|
"grad_norm": 48.796149530310025, |
|
"learning_rate": 6.464166253970672e-07, |
|
"logits/chosen": -2.8512115478515625, |
|
"logits/rejected": -2.8704025745391846, |
|
"logps/chosen": -307.69915771484375, |
|
"logps/rejected": -285.3216857910156, |
|
"loss": 0.5932, |
|
"positive_losses": 0.1961948424577713, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1846921294927597, |
|
"rewards/margins": 0.2769491672515869, |
|
"rewards/margins_max": 0.47591620683670044, |
|
"rewards/margins_min": 0.077982097864151, |
|
"rewards/margins_std": 0.28138190507888794, |
|
"rewards/rejected": -0.09225703775882721, |
|
"step": 540 |
|
}, |
|
{ |
|
"dpo_losses": 0.5755653381347656, |
|
"epoch": 0.8, |
|
"grad_norm": 2.795078318527347, |
|
"learning_rate": 5.631769877579535e-07, |
|
"logits/chosen": -2.9338223934173584, |
|
"logits/rejected": -2.885927200317383, |
|
"logps/chosen": -237.5154266357422, |
|
"logps/rejected": -245.45254516601562, |
|
"loss": 0.6043, |
|
"positive_losses": 1.3403419256210327, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.13946720957756042, |
|
"rewards/margins": 0.28026267886161804, |
|
"rewards/margins_max": 0.4180312752723694, |
|
"rewards/margins_min": 0.1424940675497055, |
|
"rewards/margins_std": 0.19483418762683868, |
|
"rewards/rejected": -0.14079545438289642, |
|
"step": 550 |
|
}, |
|
{ |
|
"dpo_losses": 0.5164459943771362, |
|
"epoch": 0.82, |
|
"grad_norm": 3.090907013989327, |
|
"learning_rate": 4.849903002143114e-07, |
|
"logits/chosen": -3.0459659099578857, |
|
"logits/rejected": -2.9688525199890137, |
|
"logps/chosen": -348.83740234375, |
|
"logps/rejected": -346.1819763183594, |
|
"loss": 0.5621, |
|
"positive_losses": 0.1016082763671875, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.23445944488048553, |
|
"rewards/margins": 0.4368625283241272, |
|
"rewards/margins_max": 0.6422832012176514, |
|
"rewards/margins_min": 0.23144181072711945, |
|
"rewards/margins_std": 0.2905087471008301, |
|
"rewards/rejected": -0.20240306854248047, |
|
"step": 560 |
|
}, |
|
{ |
|
"dpo_losses": 0.5176469087600708, |
|
"epoch": 0.83, |
|
"grad_norm": 7.846420016024575, |
|
"learning_rate": 4.1206054290670537e-07, |
|
"logits/chosen": -2.894327163696289, |
|
"logits/rejected": -2.896099805831909, |
|
"logps/chosen": -234.593994140625, |
|
"logps/rejected": -323.6400146484375, |
|
"loss": 0.6136, |
|
"positive_losses": 1.2456893920898438, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.17635245621204376, |
|
"rewards/margins": 0.4464770257472992, |
|
"rewards/margins_max": 0.6318201422691345, |
|
"rewards/margins_min": 0.2611338794231415, |
|
"rewards/margins_std": 0.2621147930622101, |
|
"rewards/rejected": -0.27012452483177185, |
|
"step": 570 |
|
}, |
|
{ |
|
"dpo_losses": 0.619495689868927, |
|
"epoch": 0.85, |
|
"grad_norm": 47.00247250810323, |
|
"learning_rate": 3.44577981244944e-07, |
|
"logits/chosen": -2.9653825759887695, |
|
"logits/rejected": -2.980112314224243, |
|
"logps/chosen": -235.0575408935547, |
|
"logps/rejected": -250.17593383789062, |
|
"loss": 0.6852, |
|
"positive_losses": 1.9925556182861328, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1124948263168335, |
|
"rewards/margins": 0.1802656650543213, |
|
"rewards/margins_max": 0.35439062118530273, |
|
"rewards/margins_min": 0.006140687968581915, |
|
"rewards/margins_std": 0.24624991416931152, |
|
"rewards/rejected": -0.06777085363864899, |
|
"step": 580 |
|
}, |
|
{ |
|
"dpo_losses": 0.5588719248771667, |
|
"epoch": 0.86, |
|
"grad_norm": 4.545382225154878, |
|
"learning_rate": 2.827186695273482e-07, |
|
"logits/chosen": -3.0688188076019287, |
|
"logits/rejected": -2.9839656352996826, |
|
"logps/chosen": -374.6413269042969, |
|
"logps/rejected": -343.8844909667969, |
|
"loss": 0.6152, |
|
"positive_losses": 0.27655029296875, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2746116518974304, |
|
"rewards/margins": 0.3599032461643219, |
|
"rewards/margins_max": 0.609930157661438, |
|
"rewards/margins_min": 0.10987631976604462, |
|
"rewards/margins_std": 0.3535914719104767, |
|
"rewards/rejected": -0.08529156446456909, |
|
"step": 590 |
|
}, |
|
{ |
|
"dpo_losses": 0.5024896860122681, |
|
"epoch": 0.88, |
|
"grad_norm": 10.429875129983618, |
|
"learning_rate": 2.2664399163518786e-07, |
|
"logits/chosen": -2.891846179962158, |
|
"logits/rejected": -2.812058925628662, |
|
"logps/chosen": -292.8556213378906, |
|
"logps/rejected": -254.47628784179688, |
|
"loss": 0.5749, |
|
"positive_losses": 0.0, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.27775660157203674, |
|
"rewards/margins": 0.49465060234069824, |
|
"rewards/margins_max": 0.8027955293655396, |
|
"rewards/margins_min": 0.18650567531585693, |
|
"rewards/margins_std": 0.4357827305793762, |
|
"rewards/rejected": -0.2168940007686615, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_dpo_losses": 0.6511540412902832, |
|
"eval_logits/chosen": -2.8279476165771484, |
|
"eval_logits/rejected": -2.7839229106903076, |
|
"eval_logps/chosen": -274.8698425292969, |
|
"eval_logps/rejected": -259.9163513183594, |
|
"eval_loss": 1.0992192029953003, |
|
"eval_positive_losses": 4.5979323387146, |
|
"eval_rewards/accuracies": 0.6190476417541504, |
|
"eval_rewards/chosen": 0.1035134568810463, |
|
"eval_rewards/margins": 0.11085036396980286, |
|
"eval_rewards/margins_max": 0.4368113875389099, |
|
"eval_rewards/margins_min": -0.18844377994537354, |
|
"eval_rewards/margins_std": 0.2790098786354065, |
|
"eval_rewards/rejected": -0.0073369028978049755, |
|
"eval_runtime": 283.0506, |
|
"eval_samples_per_second": 7.066, |
|
"eval_steps_per_second": 0.223, |
|
"step": 600 |
|
}, |
|
{ |
|
"dpo_losses": 0.5908172726631165, |
|
"epoch": 0.89, |
|
"grad_norm": 8.899268268122263, |
|
"learning_rate": 1.7650024000056415e-07, |
|
"logits/chosen": -2.910013198852539, |
|
"logits/rejected": -2.8941874504089355, |
|
"logps/chosen": -202.37757873535156, |
|
"logps/rejected": -226.99508666992188, |
|
"loss": 0.6342, |
|
"positive_losses": 1.9391590356826782, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.1791270673274994, |
|
"rewards/margins": 0.25547030568122864, |
|
"rewards/margins_max": 0.4546757638454437, |
|
"rewards/margins_min": 0.05626480653882027, |
|
"rewards/margins_std": 0.2817191183567047, |
|
"rewards/rejected": -0.07634319365024567, |
|
"step": 610 |
|
}, |
|
{ |
|
"dpo_losses": 0.5537667870521545, |
|
"epoch": 0.91, |
|
"grad_norm": 27.10072020527372, |
|
"learning_rate": 1.324182339461544e-07, |
|
"logits/chosen": -2.87471342086792, |
|
"logits/rejected": -2.8323957920074463, |
|
"logps/chosen": -255.8001251220703, |
|
"logps/rejected": -216.3787841796875, |
|
"loss": 0.6711, |
|
"positive_losses": 0.1423923522233963, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.22555597126483917, |
|
"rewards/margins": 0.34654873609542847, |
|
"rewards/margins_max": 0.5156995058059692, |
|
"rewards/margins_min": 0.17739805579185486, |
|
"rewards/margins_std": 0.23921525478363037, |
|
"rewards/rejected": -0.12099279463291168, |
|
"step": 620 |
|
}, |
|
{ |
|
"dpo_losses": 0.48896676301956177, |
|
"epoch": 0.92, |
|
"grad_norm": 2.6767258362110957, |
|
"learning_rate": 9.451297839253915e-08, |
|
"logits/chosen": -2.836315393447876, |
|
"logits/rejected": -2.7731261253356934, |
|
"logps/chosen": -304.1548156738281, |
|
"logps/rejected": -336.2890319824219, |
|
"loss": 0.6356, |
|
"positive_losses": 0.17710499465465546, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.31015846133232117, |
|
"rewards/margins": 0.5357273817062378, |
|
"rewards/margins_max": 0.8079883456230164, |
|
"rewards/margins_min": 0.263466477394104, |
|
"rewards/margins_std": 0.38503509759902954, |
|
"rewards/rejected": -0.2255689650774002, |
|
"step": 630 |
|
}, |
|
{ |
|
"dpo_losses": 0.5176305174827576, |
|
"epoch": 0.94, |
|
"grad_norm": 18.280815132381182, |
|
"learning_rate": 6.288336382349463e-08, |
|
"logits/chosen": -2.8430378437042236, |
|
"logits/rejected": -2.7495901584625244, |
|
"logps/chosen": -359.60797119140625, |
|
"logps/rejected": -298.79827880859375, |
|
"loss": 0.5757, |
|
"positive_losses": 1.0459808111190796, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.24247094988822937, |
|
"rewards/margins": 0.44258061051368713, |
|
"rewards/margins_max": 0.6816617846488953, |
|
"rewards/margins_min": 0.20349940657615662, |
|
"rewards/margins_std": 0.33811187744140625, |
|
"rewards/rejected": -0.20010964572429657, |
|
"step": 640 |
|
}, |
|
{ |
|
"dpo_losses": 0.5474685430526733, |
|
"epoch": 0.95, |
|
"grad_norm": 2.7207730538280863, |
|
"learning_rate": 3.761190829201067e-08, |
|
"logits/chosen": -2.8519351482391357, |
|
"logits/rejected": -2.805759906768799, |
|
"logps/chosen": -365.9830627441406, |
|
"logps/rejected": -278.6522521972656, |
|
"loss": 0.5966, |
|
"positive_losses": 0.05963592603802681, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.2809067666530609, |
|
"rewards/margins": 0.4035716652870178, |
|
"rewards/margins_max": 0.7429118156433105, |
|
"rewards/margins_min": 0.06423152983188629, |
|
"rewards/margins_std": 0.47989946603775024, |
|
"rewards/rejected": -0.12266488373279572, |
|
"step": 650 |
|
}, |
|
{ |
|
"dpo_losses": 0.5853177309036255, |
|
"epoch": 0.96, |
|
"grad_norm": 40.690683079215134, |
|
"learning_rate": 1.876454214011253e-08, |
|
"logits/chosen": -2.9034006595611572, |
|
"logits/rejected": -2.8463714122772217, |
|
"logps/chosen": -243.8646697998047, |
|
"logps/rejected": -225.9236602783203, |
|
"loss": 0.6752, |
|
"positive_losses": 0.5306800603866577, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21680143475532532, |
|
"rewards/margins": 0.2727014422416687, |
|
"rewards/margins_max": 0.4745156764984131, |
|
"rewards/margins_min": 0.07088717073202133, |
|
"rewards/margins_std": 0.2854084372520447, |
|
"rewards/rejected": -0.05589999631047249, |
|
"step": 660 |
|
}, |
|
{ |
|
"dpo_losses": 0.5261252522468567, |
|
"epoch": 0.98, |
|
"grad_norm": 8.821965322055776, |
|
"learning_rate": 6.390435994127753e-09, |
|
"logits/chosen": -2.8224921226501465, |
|
"logits/rejected": -2.8528945446014404, |
|
"logps/chosen": -284.4530944824219, |
|
"logps/rejected": -375.3456115722656, |
|
"loss": 0.6203, |
|
"positive_losses": 0.12337493896484375, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.2311427891254425, |
|
"rewards/margins": 0.4244818687438965, |
|
"rewards/margins_max": 0.6252471208572388, |
|
"rewards/margins_min": 0.2237166464328766, |
|
"rewards/margins_std": 0.2839249074459076, |
|
"rewards/rejected": -0.19333907961845398, |
|
"step": 670 |
|
}, |
|
{ |
|
"dpo_losses": 0.5133123397827148, |
|
"epoch": 0.99, |
|
"grad_norm": 23.801829753390283, |
|
"learning_rate": 5.218724841346556e-10, |
|
"logits/chosen": -2.689457416534424, |
|
"logits/rejected": -2.6667604446411133, |
|
"logps/chosen": -358.9125061035156, |
|
"logps/rejected": -298.8949890136719, |
|
"loss": 0.6128, |
|
"positive_losses": 1.849574327468872, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.30895963311195374, |
|
"rewards/margins": 0.4615735113620758, |
|
"rewards/margins_max": 0.7685288190841675, |
|
"rewards/margins_min": 0.15461814403533936, |
|
"rewards/margins_std": 0.43410032987594604, |
|
"rewards/rejected": -0.15261384844779968, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 684, |
|
"total_flos": 0.0, |
|
"train_loss": 0.641208895814349, |
|
"train_runtime": 6249.598, |
|
"train_samples_per_second": 1.751, |
|
"train_steps_per_second": 0.109 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 684, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|