|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8666962385177612, |
|
"logits/rejected": -1.8709977865219116, |
|
"logps/chosen": -36.98939514160156, |
|
"logps/rejected": -33.66963195800781, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.00017197892884723842, |
|
"rewards/margins": 0.0005675320862792432, |
|
"rewards/rejected": -0.0003955531574320048, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9978935718536377, |
|
"logits/rejected": -2.000532627105713, |
|
"logps/chosen": -29.66562843322754, |
|
"logps/rejected": -29.045883178710938, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.00023434234026353806, |
|
"rewards/margins": -0.0004099405778106302, |
|
"rewards/rejected": 0.0001755982666509226, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9211324453353882, |
|
"logits/rejected": -1.9184545278549194, |
|
"logps/chosen": -31.41294288635254, |
|
"logps/rejected": -33.23053741455078, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 3.152530553052202e-05, |
|
"rewards/margins": 0.000152341352077201, |
|
"rewards/rejected": -0.00012081606837455183, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.017341375350952, |
|
"logits/rejected": -2.0086092948913574, |
|
"logps/chosen": -32.60146713256836, |
|
"logps/rejected": -32.49399185180664, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.0002466029836796224, |
|
"rewards/margins": -0.0004333632532507181, |
|
"rewards/rejected": 0.00018676018225960433, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.862633466720581, |
|
"logits/rejected": -1.8518692255020142, |
|
"logps/chosen": -33.55931091308594, |
|
"logps/rejected": -35.44870376586914, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -1.831089502957184e-05, |
|
"rewards/margins": -5.47249146620743e-05, |
|
"rewards/rejected": 3.641402145149186e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9409154653549194, |
|
"logits/rejected": -1.9428699016571045, |
|
"logps/chosen": -32.53916549682617, |
|
"logps/rejected": -33.24130630493164, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0006101715262047946, |
|
"rewards/margins": 0.0013094183523207903, |
|
"rewards/rejected": -0.0006992466514930129, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.070591926574707, |
|
"logits/rejected": -2.075544834136963, |
|
"logps/chosen": -34.023067474365234, |
|
"logps/rejected": -36.647151947021484, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.00034581663203425705, |
|
"rewards/margins": 0.0004369783273432404, |
|
"rewards/rejected": -0.0007827949011698365, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9308092594146729, |
|
"logits/rejected": -1.933943748474121, |
|
"logps/chosen": -34.318023681640625, |
|
"logps/rejected": -34.67802429199219, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0010978971840813756, |
|
"rewards/margins": 0.0019540609791874886, |
|
"rewards/rejected": -0.000856163795106113, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9380912780761719, |
|
"logits/rejected": -1.9425855875015259, |
|
"logps/chosen": -32.38385009765625, |
|
"logps/rejected": -32.35346603393555, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0008357145707122982, |
|
"rewards/margins": 0.0007813175907358527, |
|
"rewards/rejected": 5.439693995867856e-05, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.035137891769409, |
|
"logits/rejected": -2.0331528186798096, |
|
"logps/chosen": -32.112831115722656, |
|
"logps/rejected": -31.29166030883789, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0012800416443496943, |
|
"rewards/margins": 0.0015345367137342691, |
|
"rewards/rejected": -0.0002544948656577617, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2300801277160645, |
|
"eval_logits/rejected": -2.225238084793091, |
|
"eval_logps/chosen": -34.04683303833008, |
|
"eval_logps/rejected": -37.53927230834961, |
|
"eval_loss": 0.6930972337722778, |
|
"eval_rewards/accuracies": 0.5186877250671387, |
|
"eval_rewards/chosen": -0.00012280470400583, |
|
"eval_rewards/margins": 0.00010372586984885857, |
|
"eval_rewards/rejected": -0.0002265305956825614, |
|
"eval_runtime": 145.7259, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.989782691001892, |
|
"logits/rejected": -1.9873950481414795, |
|
"logps/chosen": -33.12385559082031, |
|
"logps/rejected": -34.011810302734375, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0011996207758784294, |
|
"rewards/margins": 0.001024017808958888, |
|
"rewards/rejected": 0.00017560287960805, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0008151531219482, |
|
"logits/rejected": -1.992500901222229, |
|
"logps/chosen": -32.320838928222656, |
|
"logps/rejected": -32.128170013427734, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.001240686746314168, |
|
"rewards/margins": 0.0009073130786418915, |
|
"rewards/rejected": 0.0003333735803607851, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0289230346679688, |
|
"logits/rejected": -2.020946502685547, |
|
"logps/chosen": -30.313907623291016, |
|
"logps/rejected": -32.086116790771484, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0016180993989109993, |
|
"rewards/margins": 0.0019491963321343064, |
|
"rewards/rejected": -0.00033109664218500257, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9592479467391968, |
|
"logits/rejected": -1.9694607257843018, |
|
"logps/chosen": -31.223953247070312, |
|
"logps/rejected": -32.547454833984375, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0019570994190871716, |
|
"rewards/margins": 0.002082846825942397, |
|
"rewards/rejected": -0.00012574761058203876, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8708124160766602, |
|
"logits/rejected": -1.8719879388809204, |
|
"logps/chosen": -33.877174377441406, |
|
"logps/rejected": -34.78774642944336, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.003155181184411049, |
|
"rewards/margins": 0.0034600873477756977, |
|
"rewards/rejected": -0.00030490627977997065, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9215673208236694, |
|
"logits/rejected": -1.9181665182113647, |
|
"logps/chosen": -36.011531829833984, |
|
"logps/rejected": -32.685707092285156, |
|
"loss": 0.6925, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.001601455733180046, |
|
"rewards/margins": 0.001231002388522029, |
|
"rewards/rejected": 0.00037045328645035625, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.021604061126709, |
|
"logits/rejected": -2.014291524887085, |
|
"logps/chosen": -33.482086181640625, |
|
"logps/rejected": -31.404422760009766, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0031577465124428272, |
|
"rewards/margins": 0.003683448536321521, |
|
"rewards/rejected": -0.0005257020820863545, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.027444839477539, |
|
"logits/rejected": -2.032665729522705, |
|
"logps/chosen": -32.183101654052734, |
|
"logps/rejected": -32.39936065673828, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0036168727092444897, |
|
"rewards/margins": 0.0027590212412178516, |
|
"rewards/rejected": 0.0008578516426496208, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.027879238128662, |
|
"logits/rejected": -2.025132656097412, |
|
"logps/chosen": -31.258464813232422, |
|
"logps/rejected": -31.348388671875, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0023048892617225647, |
|
"rewards/margins": 0.0026066480204463005, |
|
"rewards/rejected": -0.000301758642308414, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.8983129262924194, |
|
"logits/rejected": -1.902967095375061, |
|
"logps/chosen": -31.276391983032227, |
|
"logps/rejected": -32.81935119628906, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0032989257015287876, |
|
"rewards/margins": 0.00358308176510036, |
|
"rewards/rejected": -0.00028415597626008093, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2249655723571777, |
|
"eval_logits/rejected": -2.220139503479004, |
|
"eval_logps/chosen": -34.04255294799805, |
|
"eval_logps/rejected": -37.55300521850586, |
|
"eval_loss": 0.6930080056190491, |
|
"eval_rewards/accuracies": 0.5245016813278198, |
|
"eval_rewards/chosen": -8.006239659152925e-05, |
|
"eval_rewards/margins": 0.00028380370349623263, |
|
"eval_rewards/rejected": -0.00036386612919159234, |
|
"eval_runtime": 145.5269, |
|
"eval_samples_per_second": 2.357, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.010593891143799, |
|
"logits/rejected": -2.021207332611084, |
|
"logps/chosen": -31.7437801361084, |
|
"logps/rejected": -33.93886947631836, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0022752191871404648, |
|
"rewards/margins": 0.003036911366507411, |
|
"rewards/rejected": -0.000761692295782268, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9029136896133423, |
|
"logits/rejected": -1.9176632165908813, |
|
"logps/chosen": -29.78145408630371, |
|
"logps/rejected": -31.63638687133789, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.003300876123830676, |
|
"rewards/margins": 0.0040829661302268505, |
|
"rewards/rejected": -0.000782089657150209, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9591538906097412, |
|
"logits/rejected": -1.9631026983261108, |
|
"logps/chosen": -33.05189895629883, |
|
"logps/rejected": -31.594707489013672, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.003655704203993082, |
|
"rewards/margins": 0.004109731875360012, |
|
"rewards/rejected": -0.00045402703108265996, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9572566747665405, |
|
"logits/rejected": -1.9354870319366455, |
|
"logps/chosen": -33.83857727050781, |
|
"logps/rejected": -35.12303924560547, |
|
"loss": 0.6907, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0031517534516751766, |
|
"rewards/margins": 0.004874187987297773, |
|
"rewards/rejected": -0.0017224351176992059, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -1.998875379562378, |
|
"logits/rejected": -1.9955555200576782, |
|
"logps/chosen": -32.72559356689453, |
|
"logps/rejected": -36.2435417175293, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0019947518594563007, |
|
"rewards/margins": 0.0021809376776218414, |
|
"rewards/rejected": -0.0001861859782366082, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8661177158355713, |
|
"logits/rejected": -1.8636993169784546, |
|
"logps/chosen": -33.959014892578125, |
|
"logps/rejected": -35.526344299316406, |
|
"loss": 0.6919, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0022938635665923357, |
|
"rewards/margins": 0.0025111136492341757, |
|
"rewards/rejected": -0.0002172500389860943, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8511241674423218, |
|
"logits/rejected": -1.8487510681152344, |
|
"logps/chosen": -34.16337585449219, |
|
"logps/rejected": -31.830408096313477, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.002342230873182416, |
|
"rewards/margins": 0.0029330006800591946, |
|
"rewards/rejected": -0.0005907699232921004, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9535648822784424, |
|
"logits/rejected": -1.943101167678833, |
|
"logps/chosen": -35.01304244995117, |
|
"logps/rejected": -31.87521743774414, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0034359837882220745, |
|
"rewards/margins": 0.0037782168947160244, |
|
"rewards/rejected": -0.0003422332229092717, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.048783302307129, |
|
"logits/rejected": -2.0339112281799316, |
|
"logps/chosen": -30.716812133789062, |
|
"logps/rejected": -32.62614059448242, |
|
"loss": 0.6922, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0020731096155941486, |
|
"rewards/margins": 0.001816184027120471, |
|
"rewards/rejected": 0.00025692558847367764, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9198474884033203, |
|
"logits/rejected": -1.917340636253357, |
|
"logps/chosen": -32.29683303833008, |
|
"logps/rejected": -30.91409683227539, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0063027567230165005, |
|
"rewards/margins": 0.007275627460330725, |
|
"rewards/rejected": -0.0009728703880682588, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.221111297607422, |
|
"eval_logits/rejected": -2.2162926197052, |
|
"eval_logps/chosen": -34.0648307800293, |
|
"eval_logps/rejected": -37.58684158325195, |
|
"eval_loss": 0.6929495930671692, |
|
"eval_rewards/accuracies": 0.5419435501098633, |
|
"eval_rewards/chosen": -0.0003027978236787021, |
|
"eval_rewards/margins": 0.0003993964346591383, |
|
"eval_rewards/rejected": -0.0007021942874416709, |
|
"eval_runtime": 145.7415, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.84533120650964e-06, |
|
"logits/chosen": -1.9055675268173218, |
|
"logits/rejected": -1.902345895767212, |
|
"logps/chosen": -31.301956176757812, |
|
"logps/rejected": -33.823036193847656, |
|
"loss": 0.6912, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0030139132868498564, |
|
"rewards/margins": 0.0038227462209761143, |
|
"rewards/rejected": -0.0008088329923339188, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.825108134172131e-06, |
|
"logits/chosen": -1.951906442642212, |
|
"logits/rejected": -1.939772605895996, |
|
"logps/chosen": -34.27196502685547, |
|
"logps/rejected": -33.685001373291016, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.003241057973355055, |
|
"rewards/margins": 0.004702677950263023, |
|
"rewards/rejected": -0.0014616195112466812, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.80369052967602e-06, |
|
"logits/chosen": -1.9854780435562134, |
|
"logits/rejected": -1.9840580224990845, |
|
"logps/chosen": -33.0145378112793, |
|
"logps/rejected": -32.56486511230469, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.004712558351457119, |
|
"rewards/margins": 0.005565387196838856, |
|
"rewards/rejected": -0.0008528297767043114, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.781089396387968e-06, |
|
"logits/chosen": -2.070883274078369, |
|
"logits/rejected": -2.055272102355957, |
|
"logps/chosen": -33.69978713989258, |
|
"logps/rejected": -33.0802001953125, |
|
"loss": 0.6909, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.004796043038368225, |
|
"rewards/margins": 0.004417680203914642, |
|
"rewards/rejected": 0.0003783629508689046, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits/chosen": -1.944435477256775, |
|
"logits/rejected": -1.943645715713501, |
|
"logps/chosen": -32.76495361328125, |
|
"logps/rejected": -32.4921760559082, |
|
"loss": 0.6905, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0051714470610022545, |
|
"rewards/margins": 0.005441715009510517, |
|
"rewards/rejected": -0.0002702682395465672, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.73238359114687e-06, |
|
"logits/chosen": -1.8958152532577515, |
|
"logits/rejected": -1.9060084819793701, |
|
"logps/chosen": -31.695724487304688, |
|
"logps/rejected": -35.41404342651367, |
|
"loss": 0.69, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.005089647602289915, |
|
"rewards/margins": 0.0063424864783883095, |
|
"rewards/rejected": -0.0012528380611911416, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.706303941965804e-06, |
|
"logits/chosen": -2.029942035675049, |
|
"logits/rejected": -2.0236124992370605, |
|
"logps/chosen": -33.23334884643555, |
|
"logps/rejected": -29.281543731689453, |
|
"loss": 0.6908, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.004186153877526522, |
|
"rewards/margins": 0.004737343639135361, |
|
"rewards/rejected": -0.0005511896451935172, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.679090796681225e-06, |
|
"logits/chosen": -1.8858661651611328, |
|
"logits/rejected": -1.8880888223648071, |
|
"logps/chosen": -33.61238098144531, |
|
"logps/rejected": -30.986286163330078, |
|
"loss": 0.6895, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0062666991725564, |
|
"rewards/margins": 0.007403238210827112, |
|
"rewards/rejected": -0.001136539620347321, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.650758136138454e-06, |
|
"logits/chosen": -1.9126472473144531, |
|
"logits/rejected": -1.9113785028457642, |
|
"logps/chosen": -33.73168182373047, |
|
"logps/rejected": -36.05659484863281, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.7458333373069763, |
|
"rewards/chosen": 0.006531029939651489, |
|
"rewards/margins": 0.010536923073232174, |
|
"rewards/rejected": -0.004005893599241972, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.621320516337559e-06, |
|
"logits/chosen": -1.8457567691802979, |
|
"logits/rejected": -1.8373829126358032, |
|
"logps/chosen": -30.92877197265625, |
|
"logps/rejected": -36.478904724121094, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008452029898762703, |
|
"rewards/margins": 0.013835062272846699, |
|
"rewards/rejected": -0.005383032839745283, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": -2.193706512451172, |
|
"eval_logits/rejected": -2.1888742446899414, |
|
"eval_logps/chosen": -34.14311218261719, |
|
"eval_logps/rejected": -37.68904113769531, |
|
"eval_loss": 0.6928316950798035, |
|
"eval_rewards/accuracies": 0.5681062936782837, |
|
"eval_rewards/chosen": -0.0010856210719794035, |
|
"eval_rewards/margins": 0.0006385648157447577, |
|
"eval_rewards/rejected": -0.001724186004139483, |
|
"eval_runtime": 146.0208, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.294, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.590793060955158e-06, |
|
"logits/chosen": -2.0138370990753174, |
|
"logits/rejected": -2.0166878700256348, |
|
"logps/chosen": -32.178985595703125, |
|
"logps/rejected": -35.35575485229492, |
|
"loss": 0.686, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008749553933739662, |
|
"rewards/margins": 0.014378642663359642, |
|
"rewards/rejected": -0.005629089195281267, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits/chosen": -1.8486782312393188, |
|
"logits/rejected": -1.8472837209701538, |
|
"logps/chosen": -28.309524536132812, |
|
"logps/rejected": -32.836753845214844, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.0075803459621965885, |
|
"rewards/margins": 0.012771248817443848, |
|
"rewards/rejected": -0.005190903786569834, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.52653192962838e-06, |
|
"logits/chosen": -1.802756905555725, |
|
"logits/rejected": -1.7958400249481201, |
|
"logps/chosen": -33.09931182861328, |
|
"logps/rejected": -34.53899002075195, |
|
"loss": 0.6868, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.009747742675244808, |
|
"rewards/margins": 0.012684956192970276, |
|
"rewards/rejected": -0.002937213983386755, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.492831268057307e-06, |
|
"logits/chosen": -1.9703264236450195, |
|
"logits/rejected": -1.9651902914047241, |
|
"logps/chosen": -30.736658096313477, |
|
"logps/rejected": -32.6190071105957, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.010466375388205051, |
|
"rewards/margins": 0.01698034629225731, |
|
"rewards/rejected": -0.006513969041407108, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.458106782690094e-06, |
|
"logits/chosen": -1.8493196964263916, |
|
"logits/rejected": -1.8536157608032227, |
|
"logps/chosen": -33.46088409423828, |
|
"logps/rejected": -33.30448532104492, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.010549607686698437, |
|
"rewards/margins": 0.01770811341702938, |
|
"rewards/rejected": -0.007158507592976093, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.422376313348405e-06, |
|
"logits/chosen": -1.8494908809661865, |
|
"logits/rejected": -1.843927025794983, |
|
"logps/chosen": -34.2591552734375, |
|
"logps/rejected": -35.904815673828125, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.011603695340454578, |
|
"rewards/margins": 0.02116088569164276, |
|
"rewards/rejected": -0.009557187557220459, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.3856582166815696e-06, |
|
"logits/chosen": -1.868131399154663, |
|
"logits/rejected": -1.8679981231689453, |
|
"logps/chosen": -33.08659362792969, |
|
"logps/rejected": -34.75391387939453, |
|
"loss": 0.685, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.01081737782806158, |
|
"rewards/margins": 0.0165016558021307, |
|
"rewards/rejected": -0.005684278905391693, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.347971356735789e-06, |
|
"logits/chosen": -1.9114658832550049, |
|
"logits/rejected": -1.8928560018539429, |
|
"logps/chosen": -32.96870040893555, |
|
"logps/rejected": -33.964908599853516, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.011920640245079994, |
|
"rewards/margins": 0.020869914442300797, |
|
"rewards/rejected": -0.008949270471930504, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits/chosen": -1.8733381032943726, |
|
"logits/rejected": -1.8726457357406616, |
|
"logps/chosen": -30.497507095336914, |
|
"logps/rejected": -31.803579330444336, |
|
"loss": 0.6849, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.011137938126921654, |
|
"rewards/margins": 0.016713283956050873, |
|
"rewards/rejected": -0.005575346294790506, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.269769281772082e-06, |
|
"logits/chosen": -1.8297357559204102, |
|
"logits/rejected": -1.8228442668914795, |
|
"logps/chosen": -31.466567993164062, |
|
"logps/rejected": -35.563499450683594, |
|
"loss": 0.6826, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.012035631574690342, |
|
"rewards/margins": 0.021403178572654724, |
|
"rewards/rejected": -0.009367546997964382, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_logits/chosen": -2.122058868408203, |
|
"eval_logits/rejected": -2.117284059524536, |
|
"eval_logps/chosen": -34.34208297729492, |
|
"eval_logps/rejected": -37.94715118408203, |
|
"eval_loss": 0.6925419569015503, |
|
"eval_rewards/accuracies": 0.5651993155479431, |
|
"eval_rewards/chosen": -0.003075304673984647, |
|
"eval_rewards/margins": 0.001229992602020502, |
|
"eval_rewards/rejected": -0.0043052975088357925, |
|
"eval_runtime": 145.8949, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.22929424333435e-06, |
|
"logits/chosen": -1.8198668956756592, |
|
"logits/rejected": -1.8234672546386719, |
|
"logps/chosen": -28.312463760375977, |
|
"logps/rejected": -33.89719772338867, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008623605594038963, |
|
"rewards/margins": 0.019002709537744522, |
|
"rewards/rejected": -0.010379104875028133, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.1879307741372085e-06, |
|
"logits/chosen": -1.8108766078948975, |
|
"logits/rejected": -1.8216520547866821, |
|
"logps/chosen": -32.165672302246094, |
|
"logps/rejected": -31.733028411865234, |
|
"loss": 0.6824, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.010966637171804905, |
|
"rewards/margins": 0.021780062466859818, |
|
"rewards/rejected": -0.010813427157700062, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.145700124802693e-06, |
|
"logits/chosen": -1.74923837184906, |
|
"logits/rejected": -1.7469356060028076, |
|
"logps/chosen": -30.605663299560547, |
|
"logps/rejected": -31.276514053344727, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.010614162310957909, |
|
"rewards/margins": 0.021611668169498444, |
|
"rewards/rejected": -0.010997505858540535, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.102623991469562e-06, |
|
"logits/chosen": -1.816229224205017, |
|
"logits/rejected": -1.8094854354858398, |
|
"logps/chosen": -33.24816131591797, |
|
"logps/rejected": -34.189598083496094, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.009846633300185204, |
|
"rewards/margins": 0.021567735821008682, |
|
"rewards/rejected": -0.011721103452146053, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -1.7789214849472046, |
|
"logits/rejected": -1.7853628396987915, |
|
"logps/chosen": -30.978107452392578, |
|
"logps/rejected": -33.693607330322266, |
|
"loss": 0.6847, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.007750834338366985, |
|
"rewards/margins": 0.017099570482969284, |
|
"rewards/rejected": -0.009348735213279724, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits/chosen": -1.8461157083511353, |
|
"logits/rejected": -1.8232545852661133, |
|
"logps/chosen": -30.5151424407959, |
|
"logps/rejected": -33.84736633300781, |
|
"loss": 0.6844, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.009224263951182365, |
|
"rewards/margins": 0.01773734949529171, |
|
"rewards/rejected": -0.008513087406754494, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.968546095984911e-06, |
|
"logits/chosen": -1.7744262218475342, |
|
"logits/rejected": -1.769487738609314, |
|
"logps/chosen": -31.48854637145996, |
|
"logps/rejected": -33.041587829589844, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.009281843900680542, |
|
"rewards/margins": 0.018488582223653793, |
|
"rewards/rejected": -0.009206734597682953, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.922313503607806e-06, |
|
"logits/chosen": -1.8039462566375732, |
|
"logits/rejected": -1.8057708740234375, |
|
"logps/chosen": -33.585567474365234, |
|
"logps/rejected": -36.357948303222656, |
|
"loss": 0.6816, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.007902255281805992, |
|
"rewards/margins": 0.023472566157579422, |
|
"rewards/rejected": -0.015570309944450855, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.875350192863368e-06, |
|
"logits/chosen": -1.779675841331482, |
|
"logits/rejected": -1.7792049646377563, |
|
"logps/chosen": -29.52834129333496, |
|
"logps/rejected": -32.76404571533203, |
|
"loss": 0.6815, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.010637165978550911, |
|
"rewards/margins": 0.023661229759454727, |
|
"rewards/rejected": -0.013024063780903816, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.8276802913111436e-06, |
|
"logits/chosen": -1.7808748483657837, |
|
"logits/rejected": -1.778590440750122, |
|
"logps/chosen": -32.0461540222168, |
|
"logps/rejected": -33.55706024169922, |
|
"loss": 0.6823, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.009521286003291607, |
|
"rewards/margins": 0.02209232933819294, |
|
"rewards/rejected": -0.012571041472256184, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/chosen": -2.0287230014801025, |
|
"eval_logits/rejected": -2.024071455001831, |
|
"eval_logps/chosen": -34.73115539550781, |
|
"eval_logps/rejected": -38.49046325683594, |
|
"eval_loss": 0.6917924880981445, |
|
"eval_rewards/accuracies": 0.5830564498901367, |
|
"eval_rewards/chosen": -0.006966045591980219, |
|
"eval_rewards/margins": 0.0027723864186555147, |
|
"eval_rewards/rejected": -0.009738431312143803, |
|
"eval_runtime": 145.7839, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.7793282895240927e-06, |
|
"logits/chosen": -1.8076483011245728, |
|
"logits/rejected": -1.8139461278915405, |
|
"logps/chosen": -31.64394187927246, |
|
"logps/rejected": -33.57398986816406, |
|
"loss": 0.6828, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.0059343790635466576, |
|
"rewards/margins": 0.02104238048195839, |
|
"rewards/rejected": -0.015108002349734306, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.730319028506478e-06, |
|
"logits/chosen": -1.753603219985962, |
|
"logits/rejected": -1.75141179561615, |
|
"logps/chosen": -33.820560455322266, |
|
"logps/rejected": -32.37050247192383, |
|
"loss": 0.6817, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.009353202767670155, |
|
"rewards/margins": 0.0232031662017107, |
|
"rewards/rejected": -0.013849964365363121, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits/chosen": -1.6926358938217163, |
|
"logits/rejected": -1.686195731163025, |
|
"logps/chosen": -34.429847717285156, |
|
"logps/rejected": -33.97523880004883, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.01034373790025711, |
|
"rewards/margins": 0.026524048298597336, |
|
"rewards/rejected": -0.016180310398340225, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.6304297682067146e-06, |
|
"logits/chosen": -1.7083446979522705, |
|
"logits/rejected": -1.7146565914154053, |
|
"logps/chosen": -33.29853820800781, |
|
"logps/rejected": -34.668426513671875, |
|
"loss": 0.6827, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006807624362409115, |
|
"rewards/margins": 0.021089300513267517, |
|
"rewards/rejected": -0.014281675219535828, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.579601087369492e-06, |
|
"logits/chosen": -1.7786967754364014, |
|
"logits/rejected": -1.792654037475586, |
|
"logps/chosen": -31.198848724365234, |
|
"logps/rejected": -33.51192855834961, |
|
"loss": 0.6825, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.006536136381328106, |
|
"rewards/margins": 0.021678542718291283, |
|
"rewards/rejected": -0.015142406336963177, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.5282177578265295e-06, |
|
"logits/chosen": -1.6418495178222656, |
|
"logits/rejected": -1.6386057138442993, |
|
"logps/chosen": -32.84505081176758, |
|
"logps/rejected": -36.883094787597656, |
|
"loss": 0.6772, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.010779094882309437, |
|
"rewards/margins": 0.03260749578475952, |
|
"rewards/rejected": -0.02182840369641781, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.476306177936961e-06, |
|
"logits/chosen": -1.7246978282928467, |
|
"logits/rejected": -1.7246736288070679, |
|
"logps/chosen": -30.864843368530273, |
|
"logps/rejected": -36.09869384765625, |
|
"loss": 0.6799, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.004207477904856205, |
|
"rewards/margins": 0.026930591091513634, |
|
"rewards/rejected": -0.022723112255334854, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.423893017450324e-06, |
|
"logits/chosen": -1.6627376079559326, |
|
"logits/rejected": -1.6593656539916992, |
|
"logps/chosen": -30.347408294677734, |
|
"logps/rejected": -34.78777313232422, |
|
"loss": 0.6809, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.005249389447271824, |
|
"rewards/margins": 0.0249490849673748, |
|
"rewards/rejected": -0.0196996983140707, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.3710052038048794e-06, |
|
"logits/chosen": -1.676200270652771, |
|
"logits/rejected": -1.6763780117034912, |
|
"logps/chosen": -29.362756729125977, |
|
"logps/rejected": -32.716041564941406, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.008003375492990017, |
|
"rewards/margins": 0.028458837419748306, |
|
"rewards/rejected": -0.020455462858080864, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits/chosen": -1.5964815616607666, |
|
"logits/rejected": -1.599886417388916, |
|
"logps/chosen": -33.50843048095703, |
|
"logps/rejected": -33.53223419189453, |
|
"loss": 0.6784, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.009700378403067589, |
|
"rewards/margins": 0.03037584200501442, |
|
"rewards/rejected": -0.02067546173930168, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_logits/chosen": -1.9464259147644043, |
|
"eval_logits/rejected": -1.9419163465499878, |
|
"eval_logps/chosen": -35.35507583618164, |
|
"eval_logps/rejected": -39.15093994140625, |
|
"eval_loss": 0.6916440725326538, |
|
"eval_rewards/accuracies": 0.565614640712738, |
|
"eval_rewards/chosen": -0.013205258175730705, |
|
"eval_rewards/margins": 0.0031379179563373327, |
|
"eval_rewards/rejected": -0.01634317822754383, |
|
"eval_runtime": 145.8665, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.2639145321045933e-06, |
|
"logits/chosen": -1.667773962020874, |
|
"logits/rejected": -1.6596691608428955, |
|
"logps/chosen": -36.030296325683594, |
|
"logps/rejected": -33.893470764160156, |
|
"loss": 0.681, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.004817272536456585, |
|
"rewards/margins": 0.024693841114640236, |
|
"rewards/rejected": -0.019876569509506226, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.2097666922441107e-06, |
|
"logits/chosen": -1.6702191829681396, |
|
"logits/rejected": -1.6717958450317383, |
|
"logps/chosen": -36.07275390625, |
|
"logps/rejected": -35.63324737548828, |
|
"loss": 0.6788, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.0036263135261833668, |
|
"rewards/margins": 0.02919856831431389, |
|
"rewards/rejected": -0.025572258979082108, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1552542073477554e-06, |
|
"logits/chosen": -1.6882798671722412, |
|
"logits/rejected": -1.6859245300292969, |
|
"logps/chosen": -31.580810546875, |
|
"logps/rejected": -34.97660446166992, |
|
"loss": 0.6787, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.009825185872614384, |
|
"rewards/margins": 0.02962224744260311, |
|
"rewards/rejected": -0.019797060638666153, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.100405083388799e-06, |
|
"logits/chosen": -1.6548511981964111, |
|
"logits/rejected": -1.6600011587142944, |
|
"logps/chosen": -30.910289764404297, |
|
"logps/rejected": -35.500179290771484, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008754345588386059, |
|
"rewards/margins": 0.03220795840024948, |
|
"rewards/rejected": -0.023453611880540848, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.0452474992899645e-06, |
|
"logits/chosen": -1.609222412109375, |
|
"logits/rejected": -1.6077518463134766, |
|
"logps/chosen": -32.664878845214844, |
|
"logps/rejected": -37.466697692871094, |
|
"loss": 0.6774, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.004600999411195517, |
|
"rewards/margins": 0.032408393919467926, |
|
"rewards/rejected": -0.02780739590525627, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.989809792446417e-06, |
|
"logits/chosen": -1.4765026569366455, |
|
"logits/rejected": -1.472049355506897, |
|
"logps/chosen": -35.379676818847656, |
|
"logps/rejected": -38.33124542236328, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.006187664810568094, |
|
"rewards/margins": 0.03741595149040222, |
|
"rewards/rejected": -0.031228289008140564, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.5943939685821533, |
|
"logits/rejected": -1.5989573001861572, |
|
"logps/chosen": -34.98912811279297, |
|
"logps/rejected": -36.04502487182617, |
|
"loss": 0.6801, |
|
"rewards/accuracies": 0.7208333611488342, |
|
"rewards/chosen": 0.00224525248631835, |
|
"rewards/margins": 0.026747092604637146, |
|
"rewards/rejected": -0.024501841515302658, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.878208065043501e-06, |
|
"logits/chosen": -1.542252779006958, |
|
"logits/rejected": -1.5407251119613647, |
|
"logps/chosen": -32.663124084472656, |
|
"logps/rejected": -38.704864501953125, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.012384667061269283, |
|
"rewards/margins": 0.05538671463727951, |
|
"rewards/rejected": -0.04300205036997795, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.8221013802485974e-06, |
|
"logits/chosen": -1.5762343406677246, |
|
"logits/rejected": -1.5737056732177734, |
|
"logps/chosen": -32.12613296508789, |
|
"logps/rejected": -36.56070327758789, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.012496042996644974, |
|
"rewards/margins": 0.04723736643791199, |
|
"rewards/rejected": -0.03474132716655731, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.76582921478147e-06, |
|
"logits/chosen": -1.4931247234344482, |
|
"logits/rejected": -1.487870454788208, |
|
"logps/chosen": -33.71710968017578, |
|
"logps/rejected": -34.95537567138672, |
|
"loss": 0.6729, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.006659658160060644, |
|
"rewards/margins": 0.04157082363963127, |
|
"rewards/rejected": -0.034911174327135086, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_logits/chosen": -1.8692306280136108, |
|
"eval_logits/rejected": -1.8648308515548706, |
|
"eval_logps/chosen": -35.941200256347656, |
|
"eval_logps/rejected": -39.86221694946289, |
|
"eval_loss": 0.6910557746887207, |
|
"eval_rewards/accuracies": 0.5539867281913757, |
|
"eval_rewards/chosen": -0.019066473469138145, |
|
"eval_rewards/margins": 0.004389475099742413, |
|
"eval_rewards/rejected": -0.023455949500203133, |
|
"eval_runtime": 145.7021, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.7094204786572254e-06, |
|
"logits/chosen": -1.579530954360962, |
|
"logits/rejected": -1.58658766746521, |
|
"logps/chosen": -31.1917724609375, |
|
"logps/rejected": -38.586029052734375, |
|
"loss": 0.6677, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.009364848956465721, |
|
"rewards/margins": 0.05266670510172844, |
|
"rewards/rejected": -0.04330185800790787, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.6529041520546072e-06, |
|
"logits/chosen": -1.5474834442138672, |
|
"logits/rejected": -1.5493825674057007, |
|
"logps/chosen": -31.922176361083984, |
|
"logps/rejected": -36.21441650390625, |
|
"loss": 0.6755, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.006489004008471966, |
|
"rewards/margins": 0.03633515536785126, |
|
"rewards/rejected": -0.029846150428056717, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.5963092704273302e-06, |
|
"logits/chosen": -1.4332886934280396, |
|
"logits/rejected": -1.4374314546585083, |
|
"logps/chosen": -32.152000427246094, |
|
"logps/rejected": -39.53594207763672, |
|
"loss": 0.6685, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.004394562914967537, |
|
"rewards/margins": 0.05065219849348068, |
|
"rewards/rejected": -0.046257637441158295, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits/chosen": -1.4998013973236084, |
|
"logits/rejected": -1.496098518371582, |
|
"logps/chosen": -32.54491424560547, |
|
"logps/rejected": -36.80445098876953, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.004237356595695019, |
|
"rewards/margins": 0.04233536496758461, |
|
"rewards/rejected": -0.03809800371527672, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.4830001707654135e-06, |
|
"logits/chosen": -1.5474607944488525, |
|
"logits/rejected": -1.549788236618042, |
|
"logps/chosen": -31.952754974365234, |
|
"logps/rejected": -40.619407653808594, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.00822580885142088, |
|
"rewards/margins": 0.057800523936748505, |
|
"rewards/rejected": -0.04957471415400505, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.4263441656635054e-06, |
|
"logits/chosen": -1.3754708766937256, |
|
"logits/rejected": -1.3716084957122803, |
|
"logps/chosen": -36.57474136352539, |
|
"logps/rejected": -37.647613525390625, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.002930630696937442, |
|
"rewards/margins": 0.04462647810578346, |
|
"rewards/rejected": -0.04755710810422897, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.3697260014953107e-06, |
|
"logits/chosen": -1.4011175632476807, |
|
"logits/rejected": -1.4010181427001953, |
|
"logps/chosen": -35.5493049621582, |
|
"logps/rejected": -40.10515213012695, |
|
"loss": 0.6662, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.0029350135009735823, |
|
"rewards/margins": 0.055523864924907684, |
|
"rewards/rejected": -0.052588850259780884, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3131747660339396e-06, |
|
"logits/chosen": -1.4290226697921753, |
|
"logits/rejected": -1.4171994924545288, |
|
"logps/chosen": -33.74538040161133, |
|
"logps/rejected": -38.27408981323242, |
|
"loss": 0.6654, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.0036120389122515917, |
|
"rewards/margins": 0.057132624089717865, |
|
"rewards/rejected": -0.05352058261632919, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.256719512667651e-06, |
|
"logits/chosen": -1.5206860303878784, |
|
"logits/rejected": -1.5256131887435913, |
|
"logps/chosen": -33.839393615722656, |
|
"logps/rejected": -38.63503646850586, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.005094523541629314, |
|
"rewards/margins": 0.056891001760959625, |
|
"rewards/rejected": -0.061985522508621216, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2003892454735786e-06, |
|
"logits/chosen": -1.4437249898910522, |
|
"logits/rejected": -1.4366403818130493, |
|
"logps/chosen": -34.63188552856445, |
|
"logps/rejected": -38.323524475097656, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.0022219305392354727, |
|
"rewards/margins": 0.06143581122159958, |
|
"rewards/rejected": -0.05921388417482376, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_logits/chosen": -1.7487633228302002, |
|
"eval_logits/rejected": -1.7445435523986816, |
|
"eval_logps/chosen": -37.12141799926758, |
|
"eval_logps/rejected": -41.19174575805664, |
|
"eval_loss": 0.6904172301292419, |
|
"eval_rewards/accuracies": 0.5365448594093323, |
|
"eval_rewards/chosen": -0.03086867742240429, |
|
"eval_rewards/margins": 0.00588257284834981, |
|
"eval_rewards/rejected": -0.03675125539302826, |
|
"eval_runtime": 145.8716, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits/chosen": -1.433040738105774, |
|
"logits/rejected": -1.4330635070800781, |
|
"logps/chosen": -31.19219970703125, |
|
"logps/rejected": -41.520694732666016, |
|
"loss": 0.6604, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.00025037964223884046, |
|
"rewards/margins": 0.06805966049432755, |
|
"rewards/rejected": -0.06831003725528717, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.088219349982323e-06, |
|
"logits/chosen": -1.3752285242080688, |
|
"logits/rejected": -1.3669588565826416, |
|
"logps/chosen": -32.70459747314453, |
|
"logps/rejected": -40.24443817138672, |
|
"loss": 0.6653, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.008656435646116734, |
|
"rewards/margins": 0.058074213564395905, |
|
"rewards/rejected": -0.06673064827919006, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.0324373493478803e-06, |
|
"logits/chosen": -1.5194597244262695, |
|
"logits/rejected": -1.517913818359375, |
|
"logps/chosen": -30.247411727905273, |
|
"logps/rejected": -39.21205520629883, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.002503753872588277, |
|
"rewards/margins": 0.06096818298101425, |
|
"rewards/rejected": -0.06347193568944931, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.976895560604729e-06, |
|
"logits/chosen": -1.4003164768218994, |
|
"logits/rejected": -1.4109015464782715, |
|
"logps/chosen": -35.36278533935547, |
|
"logps/rejected": -40.1971549987793, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.009377234615385532, |
|
"rewards/margins": 0.06269785016775131, |
|
"rewards/rejected": -0.07207508385181427, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.921622518534466e-06, |
|
"logits/chosen": -1.4363105297088623, |
|
"logits/rejected": -1.4391801357269287, |
|
"logps/chosen": -31.704153060913086, |
|
"logps/rejected": -38.39413070678711, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.010564305819571018, |
|
"rewards/margins": 0.056016188114881516, |
|
"rewards/rejected": -0.06658048927783966, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.8666466198491794e-06, |
|
"logits/chosen": -1.411747932434082, |
|
"logits/rejected": -1.4068377017974854, |
|
"logps/chosen": -34.955177307128906, |
|
"logps/rejected": -41.352115631103516, |
|
"loss": 0.6619, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.008799608796834946, |
|
"rewards/margins": 0.06565765291452408, |
|
"rewards/rejected": -0.07445726543664932, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.8119961086025376e-06, |
|
"logits/chosen": -1.3314030170440674, |
|
"logits/rejected": -1.3333518505096436, |
|
"logps/chosen": -33.6180534362793, |
|
"logps/rejected": -42.525047302246094, |
|
"loss": 0.6617, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.008053036406636238, |
|
"rewards/margins": 0.06538151204586029, |
|
"rewards/rejected": -0.07343455404043198, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits/chosen": -1.3536522388458252, |
|
"logits/rejected": -1.3478691577911377, |
|
"logps/chosen": -37.943336486816406, |
|
"logps/rejected": -44.42793655395508, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.025382736697793007, |
|
"rewards/margins": 0.0562770739197731, |
|
"rewards/rejected": -0.08165980130434036, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.7037833743707892e-06, |
|
"logits/chosen": -1.3406635522842407, |
|
"logits/rejected": -1.3342511653900146, |
|
"logps/chosen": -32.205875396728516, |
|
"logps/rejected": -44.02067565917969, |
|
"loss": 0.6603, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.012029221281409264, |
|
"rewards/margins": 0.0682787075638771, |
|
"rewards/rejected": -0.08030791580677032, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.6502767460434588e-06, |
|
"logits/chosen": -1.3228440284729004, |
|
"logits/rejected": -1.3131605386734009, |
|
"logps/chosen": -33.538795471191406, |
|
"logps/rejected": -35.858123779296875, |
|
"loss": 0.6719, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.01790105737745762, |
|
"rewards/margins": 0.04468285292387009, |
|
"rewards/rejected": -0.06258390843868256, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -1.660041332244873, |
|
"eval_logits/rejected": -1.6560044288635254, |
|
"eval_logps/chosen": -38.48649597167969, |
|
"eval_logps/rejected": -42.736080169677734, |
|
"eval_loss": 0.6896607875823975, |
|
"eval_rewards/accuracies": 0.5485880374908447, |
|
"eval_rewards/chosen": -0.04451945051550865, |
|
"eval_rewards/margins": 0.007675125263631344, |
|
"eval_rewards/rejected": -0.05219458416104317, |
|
"eval_runtime": 145.8291, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.5972066659083796e-06, |
|
"logits/chosen": -1.3907979726791382, |
|
"logits/rejected": -1.3904699087142944, |
|
"logps/chosen": -33.401824951171875, |
|
"logps/rejected": -37.548805236816406, |
|
"loss": 0.6661, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.015776285901665688, |
|
"rewards/margins": 0.05712694674730301, |
|
"rewards/rejected": -0.07290322333574295, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.5446003988985041e-06, |
|
"logits/chosen": -1.4413875341415405, |
|
"logits/rejected": -1.4416849613189697, |
|
"logps/chosen": -33.36183547973633, |
|
"logps/rejected": -38.4256477355957, |
|
"loss": 0.665, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.014182562939822674, |
|
"rewards/margins": 0.05841582268476486, |
|
"rewards/rejected": -0.07259838283061981, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4924849716612211e-06, |
|
"logits/chosen": -1.3972914218902588, |
|
"logits/rejected": -1.4021806716918945, |
|
"logps/chosen": -34.44452667236328, |
|
"logps/rejected": -34.45269775390625, |
|
"loss": 0.6727, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.021644581109285355, |
|
"rewards/margins": 0.043045226484537125, |
|
"rewards/rejected": -0.06468981504440308, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.440887158673332e-06, |
|
"logits/chosen": -1.3861340284347534, |
|
"logits/rejected": -1.377633810043335, |
|
"logps/chosen": -32.912872314453125, |
|
"logps/rejected": -42.408958435058594, |
|
"loss": 0.6588, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.01734977774322033, |
|
"rewards/margins": 0.07173751294612885, |
|
"rewards/rejected": -0.08908729255199432, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits/chosen": -1.3528214693069458, |
|
"logits/rejected": -1.3631798028945923, |
|
"logps/chosen": -35.93256378173828, |
|
"logps/rejected": -40.24216079711914, |
|
"loss": 0.666, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.023794613778591156, |
|
"rewards/margins": 0.05617685988545418, |
|
"rewards/rejected": -0.07997147738933563, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3393501301037245e-06, |
|
"logits/chosen": -1.4136943817138672, |
|
"logits/rejected": -1.405368447303772, |
|
"logps/chosen": -35.31805419921875, |
|
"logps/rejected": -45.923988342285156, |
|
"loss": 0.6566, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.018172740936279297, |
|
"rewards/margins": 0.07775326073169708, |
|
"rewards/rejected": -0.09592600166797638, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.2894630795134454e-06, |
|
"logits/chosen": -1.320759892463684, |
|
"logits/rejected": -1.3234620094299316, |
|
"logps/chosen": -37.43547439575195, |
|
"logps/rejected": -40.544471740722656, |
|
"loss": 0.6625, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.014461873099207878, |
|
"rewards/margins": 0.06400416046380997, |
|
"rewards/rejected": -0.078466035425663, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2401979463554984e-06, |
|
"logits/chosen": -1.4195467233657837, |
|
"logits/rejected": -1.4183709621429443, |
|
"logps/chosen": -34.790035247802734, |
|
"logps/rejected": -43.464012145996094, |
|
"loss": 0.6569, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.017323976382613182, |
|
"rewards/margins": 0.07615931332111359, |
|
"rewards/rejected": -0.09348328411579132, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1915800407584705e-06, |
|
"logits/chosen": -1.4136641025543213, |
|
"logits/rejected": -1.4168442487716675, |
|
"logps/chosen": -32.618568420410156, |
|
"logps/rejected": -41.96255111694336, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.017252514138817787, |
|
"rewards/margins": 0.06689468771219254, |
|
"rewards/rejected": -0.08414719998836517, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.1436343403356019e-06, |
|
"logits/chosen": -1.3993356227874756, |
|
"logits/rejected": -1.4037957191467285, |
|
"logps/chosen": -35.935604095458984, |
|
"logps/rejected": -36.58147430419922, |
|
"loss": 0.6749, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.023666027933359146, |
|
"rewards/margins": 0.038131967186927795, |
|
"rewards/rejected": -0.06179799512028694, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_logits/chosen": -1.6285730600357056, |
|
"eval_logits/rejected": -1.6245777606964111, |
|
"eval_logps/chosen": -38.960731506347656, |
|
"eval_logps/rejected": -43.267208099365234, |
|
"eval_loss": 0.6894330978393555, |
|
"eval_rewards/accuracies": 0.5365448594093323, |
|
"eval_rewards/chosen": -0.04926181212067604, |
|
"eval_rewards/margins": 0.008244064636528492, |
|
"eval_rewards/rejected": -0.057505879551172256, |
|
"eval_runtime": 145.7626, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.0963854773524548e-06, |
|
"logits/chosen": -1.3872336149215698, |
|
"logits/rejected": -1.3884273767471313, |
|
"logps/chosen": -34.366111755371094, |
|
"logps/rejected": -38.44298553466797, |
|
"loss": 0.6646, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.014259574934840202, |
|
"rewards/margins": 0.06001114100217819, |
|
"rewards/rejected": -0.07427072525024414, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits/chosen": -1.2578824758529663, |
|
"logits/rejected": -1.2607439756393433, |
|
"logps/chosen": -36.28580856323242, |
|
"logps/rejected": -40.985992431640625, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.017208317294716835, |
|
"rewards/margins": 0.06223895400762558, |
|
"rewards/rejected": -0.07944727689027786, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0040749902836508e-06, |
|
"logits/chosen": -1.2802751064300537, |
|
"logits/rejected": -1.2788641452789307, |
|
"logps/chosen": -33.49232864379883, |
|
"logps/rejected": -38.528602600097656, |
|
"loss": 0.67, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.024806050583720207, |
|
"rewards/margins": 0.04882260411977768, |
|
"rewards/rejected": -0.07362865656614304, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.59060791022566e-07, |
|
"logits/chosen": -1.3983075618743896, |
|
"logits/rejected": -1.3943830728530884, |
|
"logps/chosen": -34.20863723754883, |
|
"logps/rejected": -41.15024185180664, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.011688429862260818, |
|
"rewards/margins": 0.07027387619018555, |
|
"rewards/rejected": -0.08196230232715607, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.148382544856885e-07, |
|
"logits/chosen": -1.3060632944107056, |
|
"logits/rejected": -1.2978880405426025, |
|
"logps/chosen": -35.88400650024414, |
|
"logps/rejected": -39.35108184814453, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.024322666227817535, |
|
"rewards/margins": 0.05626412108540535, |
|
"rewards/rejected": -0.08058679848909378, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.714301001505568e-07, |
|
"logits/chosen": -1.332467794418335, |
|
"logits/rejected": -1.3338046073913574, |
|
"logps/chosen": -35.79069900512695, |
|
"logps/rejected": -38.749183654785156, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.7541667222976685, |
|
"rewards/chosen": -0.021089451387524605, |
|
"rewards/margins": 0.05438787862658501, |
|
"rewards/rejected": -0.07547733187675476, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.288586291031025e-07, |
|
"logits/chosen": -1.4120080471038818, |
|
"logits/rejected": -1.4067761898040771, |
|
"logps/chosen": -35.47734069824219, |
|
"logps/rejected": -40.39026641845703, |
|
"loss": 0.667, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.019513698294758797, |
|
"rewards/margins": 0.05497678369283676, |
|
"rewards/rejected": -0.074490487575531, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.871457125803897e-07, |
|
"logits/chosen": -1.3105064630508423, |
|
"logits/rejected": -1.3196675777435303, |
|
"logps/chosen": -35.979042053222656, |
|
"logps/rejected": -40.29875946044922, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.026460427790880203, |
|
"rewards/margins": 0.05291280895471573, |
|
"rewards/rejected": -0.07937324047088623, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits/chosen": -1.3309608697891235, |
|
"logits/rejected": -1.325539231300354, |
|
"logps/chosen": -33.38233947753906, |
|
"logps/rejected": -41.38855743408203, |
|
"loss": 0.66, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.012087189592421055, |
|
"rewards/margins": 0.06896394491195679, |
|
"rewards/rejected": -0.08105112612247467, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.063808116212021e-07, |
|
"logits/chosen": -1.2911185026168823, |
|
"logits/rejected": -1.2924482822418213, |
|
"logps/chosen": -35.253963470458984, |
|
"logps/rejected": -42.57808303833008, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.017635192722082138, |
|
"rewards/margins": 0.08054514229297638, |
|
"rewards/rejected": -0.09818033874034882, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_logits/chosen": -1.6238328218460083, |
|
"eval_logits/rejected": -1.6198344230651855, |
|
"eval_logps/chosen": -39.01145935058594, |
|
"eval_logps/rejected": -43.33791732788086, |
|
"eval_loss": 0.6893402338027954, |
|
"eval_rewards/accuracies": 0.5365448594093323, |
|
"eval_rewards/chosen": -0.049769096076488495, |
|
"eval_rewards/margins": 0.008443917147815228, |
|
"eval_rewards/rejected": -0.058213010430336, |
|
"eval_runtime": 145.8737, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 6.673703204254348e-07, |
|
"logits/chosen": -1.2456345558166504, |
|
"logits/rejected": -1.2452775239944458, |
|
"logps/chosen": -37.221336364746094, |
|
"logps/rejected": -42.06071853637695, |
|
"loss": 0.6544, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.01168847642838955, |
|
"rewards/margins": 0.08182507008314133, |
|
"rewards/rejected": -0.09351354837417603, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.293013489185315e-07, |
|
"logits/chosen": -1.3760040998458862, |
|
"logits/rejected": -1.3689346313476562, |
|
"logps/chosen": -33.46622848510742, |
|
"logps/rejected": -42.4327392578125, |
|
"loss": 0.6562, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.017625439912080765, |
|
"rewards/margins": 0.07791656255722046, |
|
"rewards/rejected": -0.09554200619459152, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 5.921934551632086e-07, |
|
"logits/chosen": -1.2549601793289185, |
|
"logits/rejected": -1.2447240352630615, |
|
"logps/chosen": -35.567508697509766, |
|
"logps/rejected": -42.02611541748047, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.011818263679742813, |
|
"rewards/margins": 0.07950461655855179, |
|
"rewards/rejected": -0.0913228839635849, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.560657034652405e-07, |
|
"logits/chosen": -1.3439350128173828, |
|
"logits/rejected": -1.338648796081543, |
|
"logps/chosen": -33.191280364990234, |
|
"logps/rejected": -36.837867736816406, |
|
"loss": 0.6678, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.024910490959882736, |
|
"rewards/margins": 0.05479263514280319, |
|
"rewards/rejected": -0.07970312982797623, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5.2093665457911e-07, |
|
"logits/chosen": -1.3508336544036865, |
|
"logits/rejected": -1.3587679862976074, |
|
"logps/chosen": -37.35521697998047, |
|
"logps/rejected": -39.65736770629883, |
|
"loss": 0.6635, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.019598282873630524, |
|
"rewards/margins": 0.06179226562380791, |
|
"rewards/rejected": -0.08139055222272873, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits/chosen": -1.3516252040863037, |
|
"logits/rejected": -1.3514872789382935, |
|
"logps/chosen": -35.94284439086914, |
|
"logps/rejected": -42.4056510925293, |
|
"loss": 0.6611, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.029142867773771286, |
|
"rewards/margins": 0.0667649507522583, |
|
"rewards/rejected": -0.09590782225131989, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.537463335535161e-07, |
|
"logits/chosen": -1.265148401260376, |
|
"logits/rejected": -1.2630140781402588, |
|
"logps/chosen": -34.26659393310547, |
|
"logps/rejected": -43.09412384033203, |
|
"loss": 0.6533, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.01175761315971613, |
|
"rewards/margins": 0.08397753536701202, |
|
"rewards/rejected": -0.09573514014482498, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.217195806684629e-07, |
|
"logits/chosen": -1.1799885034561157, |
|
"logits/rejected": -1.1763312816619873, |
|
"logps/chosen": -36.85099411010742, |
|
"logps/rejected": -38.88633346557617, |
|
"loss": 0.6628, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.016135532408952713, |
|
"rewards/margins": 0.06346292048692703, |
|
"rewards/rejected": -0.07959844172000885, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.907605513696808e-07, |
|
"logits/chosen": -1.353476881980896, |
|
"logits/rejected": -1.339864730834961, |
|
"logps/chosen": -36.990108489990234, |
|
"logps/rejected": -45.27104568481445, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.0264253169298172, |
|
"rewards/margins": 0.0786319449543953, |
|
"rewards/rejected": -0.1050572618842125, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.6088515096305675e-07, |
|
"logits/chosen": -1.304051160812378, |
|
"logits/rejected": -1.3077205419540405, |
|
"logps/chosen": -35.44999313354492, |
|
"logps/rejected": -47.28888702392578, |
|
"loss": 0.6489, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.019323688000440598, |
|
"rewards/margins": 0.09253297001123428, |
|
"rewards/rejected": -0.11185667663812637, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_logits/chosen": -1.6211615800857544, |
|
"eval_logits/rejected": -1.6171820163726807, |
|
"eval_logps/chosen": -39.072174072265625, |
|
"eval_logps/rejected": -43.41142654418945, |
|
"eval_loss": 0.6892833113670349, |
|
"eval_rewards/accuracies": 0.5394518375396729, |
|
"eval_rewards/chosen": -0.050376225262880325, |
|
"eval_rewards/margins": 0.008571851067245007, |
|
"eval_rewards/rejected": -0.058948077261447906, |
|
"eval_runtime": 145.8858, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.321087280364757e-07, |
|
"logits/chosen": -1.2905550003051758, |
|
"logits/rejected": -1.2912893295288086, |
|
"logps/chosen": -38.21614074707031, |
|
"logps/rejected": -47.26013946533203, |
|
"loss": 0.6547, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.01972118392586708, |
|
"rewards/margins": 0.08203905820846558, |
|
"rewards/rejected": -0.10176024585962296, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.044460665744284e-07, |
|
"logits/chosen": -1.3596677780151367, |
|
"logits/rejected": -1.3584003448486328, |
|
"logps/chosen": -33.941978454589844, |
|
"logps/rejected": -39.85774230957031, |
|
"loss": 0.6596, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.01848122850060463, |
|
"rewards/margins": 0.07056263089179993, |
|
"rewards/rejected": -0.08904386311769485, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits/chosen": -1.2977750301361084, |
|
"logits/rejected": -1.2993113994598389, |
|
"logps/chosen": -35.68281936645508, |
|
"logps/rejected": -42.771202087402344, |
|
"loss": 0.6558, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.013085673563182354, |
|
"rewards/margins": 0.07813958078622818, |
|
"rewards/rejected": -0.09122525155544281, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.5251829568697204e-07, |
|
"logits/chosen": -1.3435966968536377, |
|
"logits/rejected": -1.3425482511520386, |
|
"logps/chosen": -32.46406555175781, |
|
"logps/rejected": -40.374244689941406, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.012653985992074013, |
|
"rewards/margins": 0.07116873562335968, |
|
"rewards/rejected": -0.08382271975278854, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.2827986432927774e-07, |
|
"logits/chosen": -1.362319827079773, |
|
"logits/rejected": -1.3474690914154053, |
|
"logps/chosen": -36.95580291748047, |
|
"logps/rejected": -47.64240264892578, |
|
"loss": 0.6519, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.026391323655843735, |
|
"rewards/margins": 0.08738837391138077, |
|
"rewards/rejected": -0.11377968639135361, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.0520853686560177e-07, |
|
"logits/chosen": -1.3275715112686157, |
|
"logits/rejected": -1.3374977111816406, |
|
"logps/chosen": -33.35503387451172, |
|
"logps/rejected": -41.160377502441406, |
|
"loss": 0.6572, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.012865647673606873, |
|
"rewards/margins": 0.07634075731039047, |
|
"rewards/rejected": -0.08920640498399734, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.833161662683672e-07, |
|
"logits/chosen": -1.4463578462600708, |
|
"logits/rejected": -1.4461679458618164, |
|
"logps/chosen": -32.90170669555664, |
|
"logps/rejected": -47.23381423950195, |
|
"loss": 0.6432, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.008712759241461754, |
|
"rewards/margins": 0.10531653463840485, |
|
"rewards/rejected": -0.11402928829193115, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.626139998169246e-07, |
|
"logits/chosen": -1.3187510967254639, |
|
"logits/rejected": -1.3259624242782593, |
|
"logps/chosen": -35.57271957397461, |
|
"logps/rejected": -47.857994079589844, |
|
"loss": 0.6506, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.013922369107604027, |
|
"rewards/margins": 0.09016112238168716, |
|
"rewards/rejected": -0.10408350080251694, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.4311267331922535e-07, |
|
"logits/chosen": -1.2786071300506592, |
|
"logits/rejected": -1.2746905088424683, |
|
"logps/chosen": -35.84669876098633, |
|
"logps/rejected": -39.81802749633789, |
|
"loss": 0.659, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.010794862173497677, |
|
"rewards/margins": 0.07141149789094925, |
|
"rewards/rejected": -0.0822063684463501, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits/chosen": -1.401760458946228, |
|
"logits/rejected": -1.4005050659179688, |
|
"logps/chosen": -32.612770080566406, |
|
"logps/rejected": -40.44251251220703, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.013162161223590374, |
|
"rewards/margins": 0.06981828063726425, |
|
"rewards/rejected": -0.0829804539680481, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_logits/chosen": -1.6204686164855957, |
|
"eval_logits/rejected": -1.6164851188659668, |
|
"eval_logps/chosen": -39.084259033203125, |
|
"eval_logps/rejected": -43.422786712646484, |
|
"eval_loss": 0.6892901659011841, |
|
"eval_rewards/accuracies": 0.5423588156700134, |
|
"eval_rewards/chosen": -0.050497058779001236, |
|
"eval_rewards/margins": 0.008564572781324387, |
|
"eval_rewards/rejected": -0.05906163901090622, |
|
"eval_runtime": 145.7418, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.0775199359171346e-07, |
|
"logits/chosen": -1.372238278388977, |
|
"logits/rejected": -1.3673722743988037, |
|
"logps/chosen": -35.539161682128906, |
|
"logps/rejected": -37.22252655029297, |
|
"loss": 0.6659, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.018966300413012505, |
|
"rewards/margins": 0.05771438404917717, |
|
"rewards/rejected": -0.07668069750070572, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 9.191080703056604e-08, |
|
"logits/chosen": -1.321447730064392, |
|
"logits/rejected": -1.3224408626556396, |
|
"logps/chosen": -34.87453079223633, |
|
"logps/rejected": -43.16680145263672, |
|
"loss": 0.6608, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.015096550807356834, |
|
"rewards/margins": 0.06733135879039764, |
|
"rewards/rejected": -0.08242791891098022, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.730678442730539e-08, |
|
"logits/chosen": -1.271436095237732, |
|
"logits/rejected": -1.265836477279663, |
|
"logps/chosen": -35.28139114379883, |
|
"logps/rejected": -47.02886199951172, |
|
"loss": 0.6503, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.01139620877802372, |
|
"rewards/margins": 0.09199832379817963, |
|
"rewards/rejected": -0.10339454561471939, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.394742864787806e-08, |
|
"logits/chosen": -1.285681962966919, |
|
"logits/rejected": -1.2799713611602783, |
|
"logps/chosen": -30.83676528930664, |
|
"logps/rejected": -40.77880096435547, |
|
"loss": 0.6574, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.015235202386975288, |
|
"rewards/margins": 0.07524626702070236, |
|
"rewards/rejected": -0.0904814749956131, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 5.183960310644748e-08, |
|
"logits/chosen": -1.337096929550171, |
|
"logits/rejected": -1.3268693685531616, |
|
"logps/chosen": -34.95880889892578, |
|
"logps/rejected": -44.96342086791992, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.02477274276316166, |
|
"rewards/margins": 0.07200786471366882, |
|
"rewards/rejected": -0.09678061306476593, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.098952823928693e-08, |
|
"logits/chosen": -1.2949423789978027, |
|
"logits/rejected": -1.2914998531341553, |
|
"logps/chosen": -35.32928466796875, |
|
"logps/rejected": -39.03660583496094, |
|
"loss": 0.664, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.02019507996737957, |
|
"rewards/margins": 0.06126277893781662, |
|
"rewards/rejected": -0.08145786076784134, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits/chosen": -1.3512227535247803, |
|
"logits/rejected": -1.3558355569839478, |
|
"logps/chosen": -33.2025260925293, |
|
"logps/rejected": -41.91522979736328, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.011333522386848927, |
|
"rewards/margins": 0.07819454371929169, |
|
"rewards/rejected": -0.08952806890010834, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.3084278540791427e-08, |
|
"logits/chosen": -1.3535398244857788, |
|
"logits/rejected": -1.3626043796539307, |
|
"logps/chosen": -32.97187042236328, |
|
"logps/rejected": -37.688446044921875, |
|
"loss": 0.6605, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.012766157276928425, |
|
"rewards/margins": 0.06783264130353928, |
|
"rewards/rejected": -0.08059880137443542, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.6038302591975807e-08, |
|
"logits/chosen": -1.2888884544372559, |
|
"logits/rejected": -1.2834962606430054, |
|
"logps/chosen": -35.54216003417969, |
|
"logps/rejected": -40.272682189941406, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.018856767565011978, |
|
"rewards/margins": 0.061765290796756744, |
|
"rewards/rejected": -0.08062206208705902, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.0268470356514237e-08, |
|
"logits/chosen": -1.3495625257492065, |
|
"logits/rejected": -1.346825122833252, |
|
"logps/chosen": -35.6667366027832, |
|
"logps/rejected": -43.17388153076172, |
|
"loss": 0.6557, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.020411662757396698, |
|
"rewards/margins": 0.0787249356508255, |
|
"rewards/rejected": -0.0991365909576416, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_logits/chosen": -1.6205651760101318, |
|
"eval_logits/rejected": -1.616579294204712, |
|
"eval_logps/chosen": -39.0870246887207, |
|
"eval_logps/rejected": -43.4185791015625, |
|
"eval_loss": 0.6893215179443359, |
|
"eval_rewards/accuracies": 0.5423588156700134, |
|
"eval_rewards/chosen": -0.05052470788359642, |
|
"eval_rewards/margins": 0.008494864217936993, |
|
"eval_rewards/rejected": -0.05901956930756569, |
|
"eval_runtime": 145.7021, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.777746105209147e-09, |
|
"logits/chosen": -1.4113116264343262, |
|
"logits/rejected": -1.411259651184082, |
|
"logps/chosen": -30.929424285888672, |
|
"logps/rejected": -41.88774871826172, |
|
"loss": 0.6548, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.011131499893963337, |
|
"rewards/margins": 0.08034153282642365, |
|
"rewards/rejected": -0.09147302061319351, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.5684369628148352e-09, |
|
"logits/chosen": -1.276719331741333, |
|
"logits/rejected": -1.2754055261611938, |
|
"logps/chosen": -34.34500503540039, |
|
"logps/rejected": -42.410675048828125, |
|
"loss": 0.6587, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.013031329028308392, |
|
"rewards/margins": 0.07233807444572449, |
|
"rewards/rejected": -0.08536941558122635, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.421917227455999e-10, |
|
"logits/chosen": -1.4115439653396606, |
|
"logits/rejected": -1.4091360569000244, |
|
"logps/chosen": -33.317054748535156, |
|
"logps/rejected": -40.02583694458008, |
|
"loss": 0.6612, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.018429961055517197, |
|
"rewards/margins": 0.0669919028878212, |
|
"rewards/rejected": -0.0854218453168869, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.398667573928833, |
|
"logits/rejected": -1.3992483615875244, |
|
"logps/chosen": -33.088409423828125, |
|
"logps/rejected": -36.9452018737793, |
|
"loss": 0.6707, |
|
"rewards/accuracies": 0.7791666388511658, |
|
"rewards/chosen": -0.024684693664312363, |
|
"rewards/margins": 0.0468655489385128, |
|
"rewards/rejected": -0.07155025750398636, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1540, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5396727961379212, |
|
"train_runtime": 10793.7948, |
|
"train_samples_per_second": 1.141, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|