|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 1.09375, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.09645080566406, |
|
"logits/rejected": 80.80389404296875, |
|
"logps/chosen": -34.27156066894531, |
|
"logps/rejected": -33.039093017578125, |
|
"loss": 0.9995, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": -0.00029834467568434775, |
|
"rewards/margins": 0.0005084889708086848, |
|
"rewards/rejected": -0.0008068337920121849, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 1.2734375, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.66552734375, |
|
"logits/rejected": 80.5560073852539, |
|
"logps/chosen": -33.4774055480957, |
|
"logps/rejected": -30.691213607788086, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0021250424906611443, |
|
"rewards/margins": 0.0012622694484889507, |
|
"rewards/rejected": 0.000862772751133889, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.51115417480469, |
|
"logits/rejected": 82.54508972167969, |
|
"logps/chosen": -33.80036926269531, |
|
"logps/rejected": -31.189748764038086, |
|
"loss": 1.0002, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.002124499063938856, |
|
"rewards/margins": -0.00021631647541653365, |
|
"rewards/rejected": 0.0023408152628690004, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.1796875, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.10090637207031, |
|
"logits/rejected": 81.09576416015625, |
|
"logps/chosen": -32.7674560546875, |
|
"logps/rejected": -33.11550521850586, |
|
"loss": 0.9985, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0040660640224814415, |
|
"rewards/margins": 0.0015358638484030962, |
|
"rewards/rejected": 0.0025301999412477016, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.1875, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.85154724121094, |
|
"logits/rejected": 78.85734558105469, |
|
"logps/chosen": -30.360393524169922, |
|
"logps/rejected": -30.609283447265625, |
|
"loss": 0.9962, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.008448543958365917, |
|
"rewards/margins": 0.0038085163105279207, |
|
"rewards/rejected": 0.0046400283463299274, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 0.96484375, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.49021911621094, |
|
"logits/rejected": 83.54866027832031, |
|
"logps/chosen": -30.763973236083984, |
|
"logps/rejected": -29.17538833618164, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.004111888352781534, |
|
"rewards/margins": -3.9446913433494046e-05, |
|
"rewards/rejected": 0.004151335451751947, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 1.2109375, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 84.11228942871094, |
|
"logits/rejected": 84.1441650390625, |
|
"logps/chosen": -30.222454071044922, |
|
"logps/rejected": -32.666595458984375, |
|
"loss": 0.9996, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0040657008066773415, |
|
"rewards/margins": 0.0003545849467627704, |
|
"rewards/rejected": 0.003711115103214979, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.86946868896484, |
|
"logits/rejected": 81.84814453125, |
|
"logps/chosen": -30.959096908569336, |
|
"logps/rejected": -30.652545928955078, |
|
"loss": 0.9956, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005458436906337738, |
|
"rewards/margins": 0.0044073979370296, |
|
"rewards/rejected": 0.001051038852892816, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.68418884277344, |
|
"logits/rejected": 78.65721893310547, |
|
"logps/chosen": -32.17829513549805, |
|
"logps/rejected": -30.884775161743164, |
|
"loss": 0.9961, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.00459885410964489, |
|
"rewards/margins": 0.00393189862370491, |
|
"rewards/rejected": 0.0006669552531093359, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 1.2421875, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.70716857910156, |
|
"logits/rejected": 83.73751068115234, |
|
"logps/chosen": -33.73701477050781, |
|
"logps/rejected": -31.63702964782715, |
|
"loss": 0.9955, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005679761990904808, |
|
"rewards/margins": 0.004544637631624937, |
|
"rewards/rejected": 0.0011351245921105146, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.73394012451172, |
|
"eval_logits/rejected": 98.7273941040039, |
|
"eval_logps/chosen": -32.38990783691406, |
|
"eval_logps/rejected": -35.92463684082031, |
|
"eval_loss": 0.999876856803894, |
|
"eval_rewards/accuracies": 0.5186877250671387, |
|
"eval_rewards/chosen": 0.0005326389218680561, |
|
"eval_rewards/margins": 0.00011375291069271043, |
|
"eval_rewards/rejected": 0.00041888616397045553, |
|
"eval_runtime": 104.2424, |
|
"eval_samples_per_second": 3.29, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 1.4140625, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.85816955566406, |
|
"logits/rejected": 83.75128936767578, |
|
"logps/chosen": -32.19211959838867, |
|
"logps/rejected": -32.65901565551758, |
|
"loss": 0.9917, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.008306830190122128, |
|
"rewards/margins": 0.008338114246726036, |
|
"rewards/rejected": -3.12842421408277e-05, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 1.3046875, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.82106018066406, |
|
"logits/rejected": 83.92265319824219, |
|
"logps/chosen": -28.150625228881836, |
|
"logps/rejected": -35.3939208984375, |
|
"loss": 0.9929, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.007789556868374348, |
|
"rewards/margins": 0.0071373311802744865, |
|
"rewards/rejected": 0.000652224407531321, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 0.9453125, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.96563720703125, |
|
"logits/rejected": 80.99563598632812, |
|
"logps/chosen": -30.216140747070312, |
|
"logps/rejected": -31.844036102294922, |
|
"loss": 0.994, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.006912143435329199, |
|
"rewards/margins": 0.006036223843693733, |
|
"rewards/rejected": 0.0008759202319197357, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 1.1171875, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 81.863525390625, |
|
"logits/rejected": 81.86921691894531, |
|
"logps/chosen": -26.845142364501953, |
|
"logps/rejected": -33.07027816772461, |
|
"loss": 0.9875, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.006021805107593536, |
|
"rewards/margins": 0.012490840628743172, |
|
"rewards/rejected": -0.006469034589827061, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.13746643066406, |
|
"logits/rejected": 80.10902404785156, |
|
"logps/chosen": -28.976547241210938, |
|
"logps/rejected": -33.208518981933594, |
|
"loss": 0.9895, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.004125660751014948, |
|
"rewards/margins": 0.010545835830271244, |
|
"rewards/rejected": -0.006420175079256296, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 81.72142028808594, |
|
"logits/rejected": 81.74298858642578, |
|
"logps/chosen": -33.8978157043457, |
|
"logps/rejected": -30.907711029052734, |
|
"loss": 0.9881, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.002389371395111084, |
|
"rewards/margins": 0.011941083706915379, |
|
"rewards/rejected": -0.009551710449159145, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 1.4765625, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 82.35487365722656, |
|
"logits/rejected": 82.30474090576172, |
|
"logps/chosen": -30.870525360107422, |
|
"logps/rejected": -33.04078674316406, |
|
"loss": 0.9859, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.003887615632265806, |
|
"rewards/margins": 0.014094889163970947, |
|
"rewards/rejected": -0.010207273997366428, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 1.21875, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 79.4852066040039, |
|
"logits/rejected": 79.46187591552734, |
|
"logps/chosen": -31.02083396911621, |
|
"logps/rejected": -32.165191650390625, |
|
"loss": 0.9882, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0017631975933909416, |
|
"rewards/margins": 0.011843027547001839, |
|
"rewards/rejected": -0.010079829022288322, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 0.9609375, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 80.97419738769531, |
|
"logits/rejected": 80.94820404052734, |
|
"logps/chosen": -30.60634994506836, |
|
"logps/rejected": -31.083566665649414, |
|
"loss": 0.9922, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0016006485093384981, |
|
"rewards/margins": 0.007756076753139496, |
|
"rewards/rejected": -0.006155428942292929, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 1.453125, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 76.12115478515625, |
|
"logits/rejected": 76.07009887695312, |
|
"logps/chosen": -34.18424606323242, |
|
"logps/rejected": -33.341392517089844, |
|
"loss": 0.9858, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.005684881471097469, |
|
"rewards/margins": 0.014164777472615242, |
|
"rewards/rejected": -0.008479896001517773, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.05126190185547, |
|
"eval_logits/rejected": 98.02639770507812, |
|
"eval_logps/chosen": -32.97175979614258, |
|
"eval_logps/rejected": -36.845333099365234, |
|
"eval_loss": 0.9964954853057861, |
|
"eval_rewards/accuracies": 0.5274086594581604, |
|
"eval_rewards/chosen": -0.005285844672471285, |
|
"eval_rewards/margins": 0.0035022026859223843, |
|
"eval_rewards/rejected": -0.008788047358393669, |
|
"eval_runtime": 104.1082, |
|
"eval_samples_per_second": 3.295, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 1.84375, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 78.64119720458984, |
|
"logits/rejected": 78.55430603027344, |
|
"logps/chosen": -33.689414978027344, |
|
"logps/rejected": -36.20193862915039, |
|
"loss": 0.9853, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.002563029993325472, |
|
"rewards/margins": 0.014721485786139965, |
|
"rewards/rejected": -0.012158457189798355, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 80.5840835571289, |
|
"logits/rejected": 80.67861938476562, |
|
"logps/chosen": -31.57720947265625, |
|
"logps/rejected": -31.91719627380371, |
|
"loss": 0.9844, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.003717451822012663, |
|
"rewards/margins": 0.015599893406033516, |
|
"rewards/rejected": -0.011882440187036991, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 1.3984375, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 77.53582763671875, |
|
"logits/rejected": 77.5838851928711, |
|
"logps/chosen": -32.72165298461914, |
|
"logps/rejected": -35.34224319458008, |
|
"loss": 0.9859, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0008409392321482301, |
|
"rewards/margins": 0.014141863211989403, |
|
"rewards/rejected": -0.013300922699272633, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 1.671875, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 79.64659118652344, |
|
"logits/rejected": 79.958984375, |
|
"logps/chosen": -31.332469940185547, |
|
"logps/rejected": -32.86049270629883, |
|
"loss": 0.9832, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.003555959090590477, |
|
"rewards/margins": 0.016840480268001556, |
|
"rewards/rejected": -0.01328451931476593, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 1.484375, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 78.12522888183594, |
|
"logits/rejected": 78.17500305175781, |
|
"logps/chosen": -27.822484970092773, |
|
"logps/rejected": -31.34881019592285, |
|
"loss": 0.9878, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.002265883143991232, |
|
"rewards/margins": 0.012214846909046173, |
|
"rewards/rejected": -0.014480730518698692, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 1.6171875, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 75.08821105957031, |
|
"logits/rejected": 75.22389221191406, |
|
"logps/chosen": -31.112863540649414, |
|
"logps/rejected": -38.40215301513672, |
|
"loss": 0.9725, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.004481295123696327, |
|
"rewards/margins": 0.027486557140946388, |
|
"rewards/rejected": -0.023005260154604912, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 1.3515625, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 74.10564422607422, |
|
"logits/rejected": 74.13673400878906, |
|
"logps/chosen": -32.07135772705078, |
|
"logps/rejected": -33.231197357177734, |
|
"loss": 0.9857, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.002924318192526698, |
|
"rewards/margins": 0.01433342695236206, |
|
"rewards/rejected": -0.017257746309041977, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 2.03125, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 77.0625991821289, |
|
"logits/rejected": 76.84493255615234, |
|
"logps/chosen": -32.438629150390625, |
|
"logps/rejected": -31.16558265686035, |
|
"loss": 0.9913, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.006287367548793554, |
|
"rewards/margins": 0.008698503486812115, |
|
"rewards/rejected": -0.014985869638621807, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 1.34375, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 77.05482482910156, |
|
"logits/rejected": 76.97974395751953, |
|
"logps/chosen": -34.25292205810547, |
|
"logps/rejected": -34.649898529052734, |
|
"loss": 0.9752, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.0014867703430354595, |
|
"rewards/margins": 0.02484356239438057, |
|
"rewards/rejected": -0.026330333203077316, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 1.4921875, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 72.6989974975586, |
|
"logits/rejected": 72.833984375, |
|
"logps/chosen": -33.264137268066406, |
|
"logps/rejected": -30.818592071533203, |
|
"loss": 0.9814, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0014156814431771636, |
|
"rewards/margins": 0.018581366166472435, |
|
"rewards/rejected": -0.017165686935186386, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 97.67390441894531, |
|
"eval_logits/rejected": 97.64021301269531, |
|
"eval_logps/chosen": -33.30087661743164, |
|
"eval_logps/rejected": -37.35591125488281, |
|
"eval_loss": 0.99467533826828, |
|
"eval_rewards/accuracies": 0.5888704061508179, |
|
"eval_rewards/chosen": -0.008576988242566586, |
|
"eval_rewards/margins": 0.00531682837754488, |
|
"eval_rewards/rejected": -0.01389381755143404, |
|
"eval_runtime": 103.9426, |
|
"eval_samples_per_second": 3.3, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 1.609375, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 80.20366668701172, |
|
"logits/rejected": 80.20387268066406, |
|
"logps/chosen": -30.933481216430664, |
|
"logps/rejected": -34.256614685058594, |
|
"loss": 0.9801, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0006994610885158181, |
|
"rewards/margins": 0.019869104027748108, |
|
"rewards/rejected": -0.020568564534187317, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 1.5390625, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 77.26924133300781, |
|
"logits/rejected": 77.28764343261719, |
|
"logps/chosen": -31.467296600341797, |
|
"logps/rejected": -30.592571258544922, |
|
"loss": 0.982, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0013126448029652238, |
|
"rewards/margins": 0.01804344728589058, |
|
"rewards/rejected": -0.01673080213367939, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 1.875, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 74.20513916015625, |
|
"logits/rejected": 74.25221252441406, |
|
"logps/chosen": -29.909320831298828, |
|
"logps/rejected": -34.725521087646484, |
|
"loss": 0.9747, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.004432853776961565, |
|
"rewards/margins": 0.025339430198073387, |
|
"rewards/rejected": -0.02090657688677311, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 1.7578125, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 78.9554672241211, |
|
"logits/rejected": 78.990478515625, |
|
"logps/chosen": -33.333351135253906, |
|
"logps/rejected": -35.866722106933594, |
|
"loss": 0.9762, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.0012120162136852741, |
|
"rewards/margins": 0.023768287152051926, |
|
"rewards/rejected": -0.024980302900075912, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 1.5859375, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 77.8236083984375, |
|
"logits/rejected": 77.84061431884766, |
|
"logps/chosen": -33.3131103515625, |
|
"logps/rejected": -35.05299758911133, |
|
"loss": 0.9771, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0033062633592635393, |
|
"rewards/margins": 0.0229182131588459, |
|
"rewards/rejected": -0.01961195096373558, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 1.5234375, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 79.53218078613281, |
|
"logits/rejected": 79.56050109863281, |
|
"logps/chosen": -29.001379013061523, |
|
"logps/rejected": -33.16984176635742, |
|
"loss": 0.9793, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005508318077772856, |
|
"rewards/margins": 0.020708225667476654, |
|
"rewards/rejected": -0.015199905261397362, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 1.7890625, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 78.969970703125, |
|
"logits/rejected": 78.9748306274414, |
|
"logps/chosen": -33.2999267578125, |
|
"logps/rejected": -37.32087326049805, |
|
"loss": 0.9821, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.004237635992467403, |
|
"rewards/margins": 0.017877381294965744, |
|
"rewards/rejected": -0.022115018218755722, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 1.359375, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 72.49492645263672, |
|
"logits/rejected": 72.36249542236328, |
|
"logps/chosen": -30.828378677368164, |
|
"logps/rejected": -29.885875701904297, |
|
"loss": 0.9853, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0025225188583135605, |
|
"rewards/margins": 0.014730495400726795, |
|
"rewards/rejected": -0.01725301705300808, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9878765378679548, |
|
"train_runtime": 2559.7639, |
|
"train_samples_per_second": 1.203, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|