aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
83180b2 verified
raw
history blame
84.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 4.0,
"eval_steps": 100,
"global_step": 1540,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -1.8666962385177612,
"logits/rejected": -1.8709977865219116,
"logps/chosen": -36.98939514160156,
"logps/rejected": -33.66963195800781,
"loss": 0.6929,
"rewards/accuracies": 0.5694444179534912,
"rewards/chosen": 0.00017197892884723842,
"rewards/margins": 0.0005675320862792432,
"rewards/rejected": -0.0003955531574320048,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -1.9978935718536377,
"logits/rejected": -2.000532627105713,
"logps/chosen": -29.66562843322754,
"logps/rejected": -29.045883178710938,
"loss": 0.6934,
"rewards/accuracies": 0.36250001192092896,
"rewards/chosen": -0.00023434234026353806,
"rewards/margins": -0.0004099405778106302,
"rewards/rejected": 0.0001755982666509226,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.9211324453353882,
"logits/rejected": -1.9184545278549194,
"logps/chosen": -31.41294288635254,
"logps/rejected": -33.23053741455078,
"loss": 0.6931,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 3.152530553052202e-05,
"rewards/margins": 0.000152341352077201,
"rewards/rejected": -0.00012081606837455183,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.017341375350952,
"logits/rejected": -2.0086092948913574,
"logps/chosen": -32.60146713256836,
"logps/rejected": -32.49399185180664,
"loss": 0.6934,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.0002466029836796224,
"rewards/margins": -0.0004333632532507181,
"rewards/rejected": 0.00018676018225960433,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -1.862633466720581,
"logits/rejected": -1.8518692255020142,
"logps/chosen": -33.55931091308594,
"logps/rejected": -35.44870376586914,
"loss": 0.6932,
"rewards/accuracies": 0.5,
"rewards/chosen": -1.831089502957184e-05,
"rewards/margins": -5.47249146620743e-05,
"rewards/rejected": 3.641402145149186e-05,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -1.9409154653549194,
"logits/rejected": -1.9428699016571045,
"logps/chosen": -32.53916549682617,
"logps/rejected": -33.24130630493164,
"loss": 0.6925,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.0006101715262047946,
"rewards/margins": 0.0013094183523207903,
"rewards/rejected": -0.0006992466514930129,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.070591926574707,
"logits/rejected": -2.075544834136963,
"logps/chosen": -34.023067474365234,
"logps/rejected": -36.647151947021484,
"loss": 0.6929,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": -0.00034581663203425705,
"rewards/margins": 0.0004369783273432404,
"rewards/rejected": -0.0007827949011698365,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -1.9308092594146729,
"logits/rejected": -1.933943748474121,
"logps/chosen": -34.318023681640625,
"logps/rejected": -34.67802429199219,
"loss": 0.6922,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.0010978971840813756,
"rewards/margins": 0.0019540609791874886,
"rewards/rejected": -0.000856163795106113,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -1.9380912780761719,
"logits/rejected": -1.9425855875015259,
"logps/chosen": -32.38385009765625,
"logps/rejected": -32.35346603393555,
"loss": 0.6928,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.0008357145707122982,
"rewards/margins": 0.0007813175907358527,
"rewards/rejected": 5.439693995867856e-05,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -2.035137891769409,
"logits/rejected": -2.0331528186798096,
"logps/chosen": -32.112831115722656,
"logps/rejected": -31.29166030883789,
"loss": 0.6924,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.0012800416443496943,
"rewards/margins": 0.0015345367137342691,
"rewards/rejected": -0.0002544948656577617,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.2300801277160645,
"eval_logits/rejected": -2.225238084793091,
"eval_logps/chosen": -34.04683303833008,
"eval_logps/rejected": -37.53927230834961,
"eval_loss": 0.6930972337722778,
"eval_rewards/accuracies": 0.5186877250671387,
"eval_rewards/chosen": -0.00012280470400583,
"eval_rewards/margins": 0.00010372586984885857,
"eval_rewards/rejected": -0.0002265305956825614,
"eval_runtime": 145.7259,
"eval_samples_per_second": 2.354,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -1.989782691001892,
"logits/rejected": -1.9873950481414795,
"logps/chosen": -33.12385559082031,
"logps/rejected": -34.011810302734375,
"loss": 0.6926,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.0011996207758784294,
"rewards/margins": 0.001024017808958888,
"rewards/rejected": 0.00017560287960805,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -2.0008151531219482,
"logits/rejected": -1.992500901222229,
"logps/chosen": -32.320838928222656,
"logps/rejected": -32.128170013427734,
"loss": 0.6927,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.001240686746314168,
"rewards/margins": 0.0009073130786418915,
"rewards/rejected": 0.0003333735803607851,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.0289230346679688,
"logits/rejected": -2.020946502685547,
"logps/chosen": -30.313907623291016,
"logps/rejected": -32.086116790771484,
"loss": 0.6922,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0016180993989109993,
"rewards/margins": 0.0019491963321343064,
"rewards/rejected": -0.00033109664218500257,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -1.9592479467391968,
"logits/rejected": -1.9694607257843018,
"logps/chosen": -31.223953247070312,
"logps/rejected": -32.547454833984375,
"loss": 0.6921,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0019570994190871716,
"rewards/margins": 0.002082846825942397,
"rewards/rejected": -0.00012574761058203876,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -1.8708124160766602,
"logits/rejected": -1.8719879388809204,
"logps/chosen": -33.877174377441406,
"logps/rejected": -34.78774642944336,
"loss": 0.6914,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.003155181184411049,
"rewards/margins": 0.0034600873477756977,
"rewards/rejected": -0.00030490627977997065,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -1.9215673208236694,
"logits/rejected": -1.9181665182113647,
"logps/chosen": -36.011531829833984,
"logps/rejected": -32.685707092285156,
"loss": 0.6925,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.001601455733180046,
"rewards/margins": 0.001231002388522029,
"rewards/rejected": 0.00037045328645035625,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.021604061126709,
"logits/rejected": -2.014291524887085,
"logps/chosen": -33.482086181640625,
"logps/rejected": -31.404422760009766,
"loss": 0.6913,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.0031577465124428272,
"rewards/margins": 0.003683448536321521,
"rewards/rejected": -0.0005257020820863545,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.027444839477539,
"logits/rejected": -2.032665729522705,
"logps/chosen": -32.183101654052734,
"logps/rejected": -32.39936065673828,
"loss": 0.6918,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.0036168727092444897,
"rewards/margins": 0.0027590212412178516,
"rewards/rejected": 0.0008578516426496208,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.027879238128662,
"logits/rejected": -2.025132656097412,
"logps/chosen": -31.258464813232422,
"logps/rejected": -31.348388671875,
"loss": 0.6919,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.0023048892617225647,
"rewards/margins": 0.0026066480204463005,
"rewards/rejected": -0.000301758642308414,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -1.8983129262924194,
"logits/rejected": -1.902967095375061,
"logps/chosen": -31.276391983032227,
"logps/rejected": -32.81935119628906,
"loss": 0.6914,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.0032989257015287876,
"rewards/margins": 0.00358308176510036,
"rewards/rejected": -0.00028415597626008093,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.2249655723571777,
"eval_logits/rejected": -2.220139503479004,
"eval_logps/chosen": -34.04255294799805,
"eval_logps/rejected": -37.55300521850586,
"eval_loss": 0.6930080056190491,
"eval_rewards/accuracies": 0.5245016813278198,
"eval_rewards/chosen": -8.006239659152925e-05,
"eval_rewards/margins": 0.00028380370349623263,
"eval_rewards/rejected": -0.00036386612919159234,
"eval_runtime": 145.5269,
"eval_samples_per_second": 2.357,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.010593891143799,
"logits/rejected": -2.021207332611084,
"logps/chosen": -31.7437801361084,
"logps/rejected": -33.93886947631836,
"loss": 0.6916,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.0022752191871404648,
"rewards/margins": 0.003036911366507411,
"rewards/rejected": -0.000761692295782268,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -1.9029136896133423,
"logits/rejected": -1.9176632165908813,
"logps/chosen": -29.78145408630371,
"logps/rejected": -31.63638687133789,
"loss": 0.6911,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.003300876123830676,
"rewards/margins": 0.0040829661302268505,
"rewards/rejected": -0.000782089657150209,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -1.9591538906097412,
"logits/rejected": -1.9631026983261108,
"logps/chosen": -33.05189895629883,
"logps/rejected": -31.594707489013672,
"loss": 0.6911,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.003655704203993082,
"rewards/margins": 0.004109731875360012,
"rewards/rejected": -0.00045402703108265996,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -1.9572566747665405,
"logits/rejected": -1.9354870319366455,
"logps/chosen": -33.83857727050781,
"logps/rejected": -35.12303924560547,
"loss": 0.6907,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.0031517534516751766,
"rewards/margins": 0.004874187987297773,
"rewards/rejected": -0.0017224351176992059,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -1.998875379562378,
"logits/rejected": -1.9955555200576782,
"logps/chosen": -32.72559356689453,
"logps/rejected": -36.2435417175293,
"loss": 0.6921,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.0019947518594563007,
"rewards/margins": 0.0021809376776218414,
"rewards/rejected": -0.0001861859782366082,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -1.8661177158355713,
"logits/rejected": -1.8636993169784546,
"logps/chosen": -33.959014892578125,
"logps/rejected": -35.526344299316406,
"loss": 0.6919,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.0022938635665923357,
"rewards/margins": 0.0025111136492341757,
"rewards/rejected": -0.0002172500389860943,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -1.8511241674423218,
"logits/rejected": -1.8487510681152344,
"logps/chosen": -34.16337585449219,
"logps/rejected": -31.830408096313477,
"loss": 0.6917,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.002342230873182416,
"rewards/margins": 0.0029330006800591946,
"rewards/rejected": -0.0005907699232921004,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -1.9535648822784424,
"logits/rejected": -1.943101167678833,
"logps/chosen": -35.01304244995117,
"logps/rejected": -31.87521743774414,
"loss": 0.6913,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.0034359837882220745,
"rewards/margins": 0.0037782168947160244,
"rewards/rejected": -0.0003422332229092717,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.048783302307129,
"logits/rejected": -2.0339112281799316,
"logps/chosen": -30.716812133789062,
"logps/rejected": -32.62614059448242,
"loss": 0.6922,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.0020731096155941486,
"rewards/margins": 0.001816184027120471,
"rewards/rejected": 0.00025692558847367764,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -1.9198474884033203,
"logits/rejected": -1.917340636253357,
"logps/chosen": -32.29683303833008,
"logps/rejected": -30.91409683227539,
"loss": 0.6895,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.0063027567230165005,
"rewards/margins": 0.007275627460330725,
"rewards/rejected": -0.0009728703880682588,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.221111297607422,
"eval_logits/rejected": -2.2162926197052,
"eval_logps/chosen": -34.0648307800293,
"eval_logps/rejected": -37.58684158325195,
"eval_loss": 0.6929495930671692,
"eval_rewards/accuracies": 0.5419435501098633,
"eval_rewards/chosen": -0.0003027978236787021,
"eval_rewards/margins": 0.0003993964346591383,
"eval_rewards/rejected": -0.0007021942874416709,
"eval_runtime": 145.7415,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 4.84533120650964e-06,
"logits/chosen": -1.9055675268173218,
"logits/rejected": -1.902345895767212,
"logps/chosen": -31.301956176757812,
"logps/rejected": -33.823036193847656,
"loss": 0.6912,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0030139132868498564,
"rewards/margins": 0.0038227462209761143,
"rewards/rejected": -0.0008088329923339188,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.825108134172131e-06,
"logits/chosen": -1.951906442642212,
"logits/rejected": -1.939772605895996,
"logps/chosen": -34.27196502685547,
"logps/rejected": -33.685001373291016,
"loss": 0.6908,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.003241057973355055,
"rewards/margins": 0.004702677950263023,
"rewards/rejected": -0.0014616195112466812,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 4.80369052967602e-06,
"logits/chosen": -1.9854780435562134,
"logits/rejected": -1.9840580224990845,
"logps/chosen": -33.0145378112793,
"logps/rejected": -32.56486511230469,
"loss": 0.6904,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.004712558351457119,
"rewards/margins": 0.005565387196838856,
"rewards/rejected": -0.0008528297767043114,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 4.781089396387968e-06,
"logits/chosen": -2.070883274078369,
"logits/rejected": -2.055272102355957,
"logps/chosen": -33.69978713989258,
"logps/rejected": -33.0802001953125,
"loss": 0.6909,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.004796043038368225,
"rewards/margins": 0.004417680203914642,
"rewards/rejected": 0.0003783629508689046,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 4.757316345716554e-06,
"logits/chosen": -1.944435477256775,
"logits/rejected": -1.943645715713501,
"logps/chosen": -32.76495361328125,
"logps/rejected": -32.4921760559082,
"loss": 0.6905,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.0051714470610022545,
"rewards/margins": 0.005441715009510517,
"rewards/rejected": -0.0002702682395465672,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 4.73238359114687e-06,
"logits/chosen": -1.8958152532577515,
"logits/rejected": -1.9060084819793701,
"logps/chosen": -31.695724487304688,
"logps/rejected": -35.41404342651367,
"loss": 0.69,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.005089647602289915,
"rewards/margins": 0.0063424864783883095,
"rewards/rejected": -0.0012528380611911416,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 4.706303941965804e-06,
"logits/chosen": -2.029942035675049,
"logits/rejected": -2.0236124992370605,
"logps/chosen": -33.23334884643555,
"logps/rejected": -29.281543731689453,
"loss": 0.6908,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.004186153877526522,
"rewards/margins": 0.004737343639135361,
"rewards/rejected": -0.0005511896451935172,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 4.679090796681225e-06,
"logits/chosen": -1.8858661651611328,
"logits/rejected": -1.8880888223648071,
"logps/chosen": -33.61238098144531,
"logps/rejected": -30.986286163330078,
"loss": 0.6895,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0062666991725564,
"rewards/margins": 0.007403238210827112,
"rewards/rejected": -0.001136539620347321,
"step": 380
},
{
"epoch": 1.01,
"learning_rate": 4.650758136138454e-06,
"logits/chosen": -1.9126472473144531,
"logits/rejected": -1.9113785028457642,
"logps/chosen": -33.73168182373047,
"logps/rejected": -36.05659484863281,
"loss": 0.6879,
"rewards/accuracies": 0.7458333373069763,
"rewards/chosen": 0.006531029939651489,
"rewards/margins": 0.010536923073232174,
"rewards/rejected": -0.004005893599241972,
"step": 390
},
{
"epoch": 1.04,
"learning_rate": 4.621320516337559e-06,
"logits/chosen": -1.8457567691802979,
"logits/rejected": -1.8373829126358032,
"logps/chosen": -30.92877197265625,
"logps/rejected": -36.478904724121094,
"loss": 0.6863,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.008452029898762703,
"rewards/margins": 0.013835062272846699,
"rewards/rejected": -0.005383032839745283,
"step": 400
},
{
"epoch": 1.04,
"eval_logits/chosen": -2.193706512451172,
"eval_logits/rejected": -2.1888742446899414,
"eval_logps/chosen": -34.14311218261719,
"eval_logps/rejected": -37.68904113769531,
"eval_loss": 0.6928316950798035,
"eval_rewards/accuracies": 0.5681062936782837,
"eval_rewards/chosen": -0.0010856210719794035,
"eval_rewards/margins": 0.0006385648157447577,
"eval_rewards/rejected": -0.001724186004139483,
"eval_runtime": 146.0208,
"eval_samples_per_second": 2.349,
"eval_steps_per_second": 0.294,
"step": 400
},
{
"epoch": 1.06,
"learning_rate": 4.590793060955158e-06,
"logits/chosen": -2.0138370990753174,
"logits/rejected": -2.0166878700256348,
"logps/chosen": -32.178985595703125,
"logps/rejected": -35.35575485229492,
"loss": 0.686,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.008749553933739662,
"rewards/margins": 0.014378642663359642,
"rewards/rejected": -0.005629089195281267,
"step": 410
},
{
"epoch": 1.09,
"learning_rate": 4.559191453574582e-06,
"logits/chosen": -1.8486782312393188,
"logits/rejected": -1.8472837209701538,
"logps/chosen": -28.309524536132812,
"logps/rejected": -32.836753845214844,
"loss": 0.6868,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": 0.0075803459621965885,
"rewards/margins": 0.012771248817443848,
"rewards/rejected": -0.005190903786569834,
"step": 420
},
{
"epoch": 1.12,
"learning_rate": 4.52653192962838e-06,
"logits/chosen": -1.802756905555725,
"logits/rejected": -1.7958400249481201,
"logps/chosen": -33.09931182861328,
"logps/rejected": -34.53899002075195,
"loss": 0.6868,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": 0.009747742675244808,
"rewards/margins": 0.012684956192970276,
"rewards/rejected": -0.002937213983386755,
"step": 430
},
{
"epoch": 1.14,
"learning_rate": 4.492831268057307e-06,
"logits/chosen": -1.9703264236450195,
"logits/rejected": -1.9651902914047241,
"logps/chosen": -30.736658096313477,
"logps/rejected": -32.6190071105957,
"loss": 0.6847,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.010466375388205051,
"rewards/margins": 0.01698034629225731,
"rewards/rejected": -0.006513969041407108,
"step": 440
},
{
"epoch": 1.17,
"learning_rate": 4.458106782690094e-06,
"logits/chosen": -1.8493196964263916,
"logits/rejected": -1.8536157608032227,
"logps/chosen": -33.46088409423828,
"logps/rejected": -33.30448532104492,
"loss": 0.6844,
"rewards/accuracies": 0.9125000238418579,
"rewards/chosen": 0.010549607686698437,
"rewards/margins": 0.01770811341702938,
"rewards/rejected": -0.007158507592976093,
"step": 450
},
{
"epoch": 1.19,
"learning_rate": 4.422376313348405e-06,
"logits/chosen": -1.8494908809661865,
"logits/rejected": -1.843927025794983,
"logps/chosen": -34.2591552734375,
"logps/rejected": -35.904815673828125,
"loss": 0.6827,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.011603695340454578,
"rewards/margins": 0.02116088569164276,
"rewards/rejected": -0.009557187557220459,
"step": 460
},
{
"epoch": 1.22,
"learning_rate": 4.3856582166815696e-06,
"logits/chosen": -1.868131399154663,
"logits/rejected": -1.8679981231689453,
"logps/chosen": -33.08659362792969,
"logps/rejected": -34.75391387939453,
"loss": 0.685,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.01081737782806158,
"rewards/margins": 0.0165016558021307,
"rewards/rejected": -0.005684278905391693,
"step": 470
},
{
"epoch": 1.25,
"learning_rate": 4.347971356735789e-06,
"logits/chosen": -1.9114658832550049,
"logits/rejected": -1.8928560018539429,
"logps/chosen": -32.96870040893555,
"logps/rejected": -33.964908599853516,
"loss": 0.6828,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": 0.011920640245079994,
"rewards/margins": 0.020869914442300797,
"rewards/rejected": -0.008949270471930504,
"step": 480
},
{
"epoch": 1.27,
"learning_rate": 4.309335095262675e-06,
"logits/chosen": -1.8733381032943726,
"logits/rejected": -1.8726457357406616,
"logps/chosen": -30.497507095336914,
"logps/rejected": -31.803579330444336,
"loss": 0.6849,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.011137938126921654,
"rewards/margins": 0.016713283956050873,
"rewards/rejected": -0.005575346294790506,
"step": 490
},
{
"epoch": 1.3,
"learning_rate": 4.269769281772082e-06,
"logits/chosen": -1.8297357559204102,
"logits/rejected": -1.8228442668914795,
"logps/chosen": -31.466567993164062,
"logps/rejected": -35.563499450683594,
"loss": 0.6826,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.012035631574690342,
"rewards/margins": 0.021403178572654724,
"rewards/rejected": -0.009367546997964382,
"step": 500
},
{
"epoch": 1.3,
"eval_logits/chosen": -2.122058868408203,
"eval_logits/rejected": -2.117284059524536,
"eval_logps/chosen": -34.34208297729492,
"eval_logps/rejected": -37.94715118408203,
"eval_loss": 0.6925419569015503,
"eval_rewards/accuracies": 0.5651993155479431,
"eval_rewards/chosen": -0.003075304673984647,
"eval_rewards/margins": 0.001229992602020502,
"eval_rewards/rejected": -0.0043052975088357925,
"eval_runtime": 145.8949,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 500
},
{
"epoch": 1.32,
"learning_rate": 4.22929424333435e-06,
"logits/chosen": -1.8198668956756592,
"logits/rejected": -1.8234672546386719,
"logps/chosen": -28.312463760375977,
"logps/rejected": -33.89719772338867,
"loss": 0.6837,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.008623605594038963,
"rewards/margins": 0.019002709537744522,
"rewards/rejected": -0.010379104875028133,
"step": 510
},
{
"epoch": 1.35,
"learning_rate": 4.1879307741372085e-06,
"logits/chosen": -1.8108766078948975,
"logits/rejected": -1.8216520547866821,
"logps/chosen": -32.165672302246094,
"logps/rejected": -31.733028411865234,
"loss": 0.6824,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.010966637171804905,
"rewards/margins": 0.021780062466859818,
"rewards/rejected": -0.010813427157700062,
"step": 520
},
{
"epoch": 1.38,
"learning_rate": 4.145700124802693e-06,
"logits/chosen": -1.74923837184906,
"logits/rejected": -1.7469356060028076,
"logps/chosen": -30.605663299560547,
"logps/rejected": -31.276514053344727,
"loss": 0.6825,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.010614162310957909,
"rewards/margins": 0.021611668169498444,
"rewards/rejected": -0.010997505858540535,
"step": 530
},
{
"epoch": 1.4,
"learning_rate": 4.102623991469562e-06,
"logits/chosen": -1.816229224205017,
"logits/rejected": -1.8094854354858398,
"logps/chosen": -33.24816131591797,
"logps/rejected": -34.189598083496094,
"loss": 0.6825,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.009846633300185204,
"rewards/margins": 0.021567735821008682,
"rewards/rejected": -0.011721103452146053,
"step": 540
},
{
"epoch": 1.43,
"learning_rate": 4.058724504646834e-06,
"logits/chosen": -1.7789214849472046,
"logits/rejected": -1.7853628396987915,
"logps/chosen": -30.978107452392578,
"logps/rejected": -33.693607330322266,
"loss": 0.6847,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.007750834338366985,
"rewards/margins": 0.017099570482969284,
"rewards/rejected": -0.009348735213279724,
"step": 550
},
{
"epoch": 1.45,
"learning_rate": 4.014024217844167e-06,
"logits/chosen": -1.8461157083511353,
"logits/rejected": -1.8232545852661133,
"logps/chosen": -30.5151424407959,
"logps/rejected": -33.84736633300781,
"loss": 0.6844,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.009224263951182365,
"rewards/margins": 0.01773734949529171,
"rewards/rejected": -0.008513087406754494,
"step": 560
},
{
"epoch": 1.48,
"learning_rate": 3.968546095984911e-06,
"logits/chosen": -1.7744262218475342,
"logits/rejected": -1.769487738609314,
"logps/chosen": -31.48854637145996,
"logps/rejected": -33.041587829589844,
"loss": 0.684,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.009281843900680542,
"rewards/margins": 0.018488582223653793,
"rewards/rejected": -0.009206734597682953,
"step": 570
},
{
"epoch": 1.51,
"learning_rate": 3.922313503607806e-06,
"logits/chosen": -1.8039462566375732,
"logits/rejected": -1.8057708740234375,
"logps/chosen": -33.585567474365234,
"logps/rejected": -36.357948303222656,
"loss": 0.6816,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.007902255281805992,
"rewards/margins": 0.023472566157579422,
"rewards/rejected": -0.015570309944450855,
"step": 580
},
{
"epoch": 1.53,
"learning_rate": 3.875350192863368e-06,
"logits/chosen": -1.779675841331482,
"logits/rejected": -1.7792049646377563,
"logps/chosen": -29.52834129333496,
"logps/rejected": -32.76404571533203,
"loss": 0.6815,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.010637165978550911,
"rewards/margins": 0.023661229759454727,
"rewards/rejected": -0.013024063780903816,
"step": 590
},
{
"epoch": 1.56,
"learning_rate": 3.8276802913111436e-06,
"logits/chosen": -1.7808748483657837,
"logits/rejected": -1.778590440750122,
"logps/chosen": -32.0461540222168,
"logps/rejected": -33.55706024169922,
"loss": 0.6823,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.009521286003291607,
"rewards/margins": 0.02209232933819294,
"rewards/rejected": -0.012571041472256184,
"step": 600
},
{
"epoch": 1.56,
"eval_logits/chosen": -2.0287230014801025,
"eval_logits/rejected": -2.024071455001831,
"eval_logps/chosen": -34.73115539550781,
"eval_logps/rejected": -38.49046325683594,
"eval_loss": 0.6917924880981445,
"eval_rewards/accuracies": 0.5830564498901367,
"eval_rewards/chosen": -0.006966045591980219,
"eval_rewards/margins": 0.0027723864186555147,
"eval_rewards/rejected": -0.009738431312143803,
"eval_runtime": 145.7839,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 600
},
{
"epoch": 1.58,
"learning_rate": 3.7793282895240927e-06,
"logits/chosen": -1.8076483011245728,
"logits/rejected": -1.8139461278915405,
"logps/chosen": -31.64394187927246,
"logps/rejected": -33.57398986816406,
"loss": 0.6828,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.0059343790635466576,
"rewards/margins": 0.02104238048195839,
"rewards/rejected": -0.015108002349734306,
"step": 610
},
{
"epoch": 1.61,
"learning_rate": 3.730319028506478e-06,
"logits/chosen": -1.753603219985962,
"logits/rejected": -1.75141179561615,
"logps/chosen": -33.820560455322266,
"logps/rejected": -32.37050247192383,
"loss": 0.6817,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.009353202767670155,
"rewards/margins": 0.0232031662017107,
"rewards/rejected": -0.013849964365363121,
"step": 620
},
{
"epoch": 1.64,
"learning_rate": 3.6806776869317074e-06,
"logits/chosen": -1.6926358938217163,
"logits/rejected": -1.686195731163025,
"logps/chosen": -34.429847717285156,
"logps/rejected": -33.97523880004883,
"loss": 0.6801,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.01034373790025711,
"rewards/margins": 0.026524048298597336,
"rewards/rejected": -0.016180310398340225,
"step": 630
},
{
"epoch": 1.66,
"learning_rate": 3.6304297682067146e-06,
"logits/chosen": -1.7083446979522705,
"logits/rejected": -1.7146565914154053,
"logps/chosen": -33.29853820800781,
"logps/rejected": -34.668426513671875,
"loss": 0.6827,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.006807624362409115,
"rewards/margins": 0.021089300513267517,
"rewards/rejected": -0.014281675219535828,
"step": 640
},
{
"epoch": 1.69,
"learning_rate": 3.579601087369492e-06,
"logits/chosen": -1.7786967754364014,
"logits/rejected": -1.792654037475586,
"logps/chosen": -31.198848724365234,
"logps/rejected": -33.51192855834961,
"loss": 0.6825,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.006536136381328106,
"rewards/margins": 0.021678542718291283,
"rewards/rejected": -0.015142406336963177,
"step": 650
},
{
"epoch": 1.71,
"learning_rate": 3.5282177578265295e-06,
"logits/chosen": -1.6418495178222656,
"logits/rejected": -1.6386057138442993,
"logps/chosen": -32.84505081176758,
"logps/rejected": -36.883094787597656,
"loss": 0.6772,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.010779094882309437,
"rewards/margins": 0.03260749578475952,
"rewards/rejected": -0.02182840369641781,
"step": 660
},
{
"epoch": 1.74,
"learning_rate": 3.476306177936961e-06,
"logits/chosen": -1.7246978282928467,
"logits/rejected": -1.7246736288070679,
"logps/chosen": -30.864843368530273,
"logps/rejected": -36.09869384765625,
"loss": 0.6799,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.004207477904856205,
"rewards/margins": 0.026930591091513634,
"rewards/rejected": -0.022723112255334854,
"step": 670
},
{
"epoch": 1.77,
"learning_rate": 3.423893017450324e-06,
"logits/chosen": -1.6627376079559326,
"logits/rejected": -1.6593656539916992,
"logps/chosen": -30.347408294677734,
"logps/rejected": -34.78777313232422,
"loss": 0.6809,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.005249389447271824,
"rewards/margins": 0.0249490849673748,
"rewards/rejected": -0.0196996983140707,
"step": 680
},
{
"epoch": 1.79,
"learning_rate": 3.3710052038048794e-06,
"logits/chosen": -1.676200270652771,
"logits/rejected": -1.6763780117034912,
"logps/chosen": -29.362756729125977,
"logps/rejected": -32.716041564941406,
"loss": 0.6792,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": 0.008003375492990017,
"rewards/margins": 0.028458837419748306,
"rewards/rejected": -0.020455462858080864,
"step": 690
},
{
"epoch": 1.82,
"learning_rate": 3.3176699082935546e-06,
"logits/chosen": -1.5964815616607666,
"logits/rejected": -1.599886417388916,
"logps/chosen": -33.50843048095703,
"logps/rejected": -33.53223419189453,
"loss": 0.6784,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.009700378403067589,
"rewards/margins": 0.03037584200501442,
"rewards/rejected": -0.02067546173930168,
"step": 700
},
{
"epoch": 1.82,
"eval_logits/chosen": -1.9464259147644043,
"eval_logits/rejected": -1.9419163465499878,
"eval_logps/chosen": -35.35507583618164,
"eval_logps/rejected": -39.15093994140625,
"eval_loss": 0.6916440725326538,
"eval_rewards/accuracies": 0.565614640712738,
"eval_rewards/chosen": -0.013205258175730705,
"eval_rewards/margins": 0.0031379179563373327,
"eval_rewards/rejected": -0.01634317822754383,
"eval_runtime": 145.8665,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 700
},
{
"epoch": 1.84,
"learning_rate": 3.2639145321045933e-06,
"logits/chosen": -1.667773962020874,
"logits/rejected": -1.6596691608428955,
"logps/chosen": -36.030296325683594,
"logps/rejected": -33.893470764160156,
"loss": 0.681,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.004817272536456585,
"rewards/margins": 0.024693841114640236,
"rewards/rejected": -0.019876569509506226,
"step": 710
},
{
"epoch": 1.87,
"learning_rate": 3.2097666922441107e-06,
"logits/chosen": -1.6702191829681396,
"logits/rejected": -1.6717958450317383,
"logps/chosen": -36.07275390625,
"logps/rejected": -35.63324737548828,
"loss": 0.6788,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": 0.0036263135261833668,
"rewards/margins": 0.02919856831431389,
"rewards/rejected": -0.025572258979082108,
"step": 720
},
{
"epoch": 1.9,
"learning_rate": 3.1552542073477554e-06,
"logits/chosen": -1.6882798671722412,
"logits/rejected": -1.6859245300292969,
"logps/chosen": -31.580810546875,
"logps/rejected": -34.97660446166992,
"loss": 0.6787,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.009825185872614384,
"rewards/margins": 0.02962224744260311,
"rewards/rejected": -0.019797060638666153,
"step": 730
},
{
"epoch": 1.92,
"learning_rate": 3.100405083388799e-06,
"logits/chosen": -1.6548511981964111,
"logits/rejected": -1.6600011587142944,
"logps/chosen": -30.910289764404297,
"logps/rejected": -35.500179290771484,
"loss": 0.6774,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": 0.008754345588386059,
"rewards/margins": 0.03220795840024948,
"rewards/rejected": -0.023453611880540848,
"step": 740
},
{
"epoch": 1.95,
"learning_rate": 3.0452474992899645e-06,
"logits/chosen": -1.609222412109375,
"logits/rejected": -1.6077518463134766,
"logps/chosen": -32.664878845214844,
"logps/rejected": -37.466697692871094,
"loss": 0.6774,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.004600999411195517,
"rewards/margins": 0.032408393919467926,
"rewards/rejected": -0.02780739590525627,
"step": 750
},
{
"epoch": 1.97,
"learning_rate": 2.989809792446417e-06,
"logits/chosen": -1.4765026569366455,
"logits/rejected": -1.472049355506897,
"logps/chosen": -35.379676818847656,
"logps/rejected": -38.33124542236328,
"loss": 0.6749,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.006187664810568094,
"rewards/margins": 0.03741595149040222,
"rewards/rejected": -0.031228289008140564,
"step": 760
},
{
"epoch": 2.0,
"learning_rate": 2.9341204441673267e-06,
"logits/chosen": -1.5943939685821533,
"logits/rejected": -1.5989573001861572,
"logps/chosen": -34.98912811279297,
"logps/rejected": -36.04502487182617,
"loss": 0.6801,
"rewards/accuracies": 0.7208333611488342,
"rewards/chosen": 0.00224525248631835,
"rewards/margins": 0.026747092604637146,
"rewards/rejected": -0.024501841515302658,
"step": 770
},
{
"epoch": 2.03,
"learning_rate": 2.878208065043501e-06,
"logits/chosen": -1.542252779006958,
"logits/rejected": -1.5407251119613647,
"logps/chosen": -32.663124084472656,
"logps/rejected": -38.704864501953125,
"loss": 0.6661,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.012384667061269283,
"rewards/margins": 0.05538671463727951,
"rewards/rejected": -0.04300205036997795,
"step": 780
},
{
"epoch": 2.05,
"learning_rate": 2.8221013802485974e-06,
"logits/chosen": -1.5762343406677246,
"logits/rejected": -1.5737056732177734,
"logps/chosen": -32.12613296508789,
"logps/rejected": -36.56070327758789,
"loss": 0.6701,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.012496042996644974,
"rewards/margins": 0.04723736643791199,
"rewards/rejected": -0.03474132716655731,
"step": 790
},
{
"epoch": 2.08,
"learning_rate": 2.76582921478147e-06,
"logits/chosen": -1.4931247234344482,
"logits/rejected": -1.487870454788208,
"logps/chosen": -33.71710968017578,
"logps/rejected": -34.95537567138672,
"loss": 0.6729,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.006659658160060644,
"rewards/margins": 0.04157082363963127,
"rewards/rejected": -0.034911174327135086,
"step": 800
},
{
"epoch": 2.08,
"eval_logits/chosen": -1.8692306280136108,
"eval_logits/rejected": -1.8648308515548706,
"eval_logps/chosen": -35.941200256347656,
"eval_logps/rejected": -39.86221694946289,
"eval_loss": 0.6910557746887207,
"eval_rewards/accuracies": 0.5539867281913757,
"eval_rewards/chosen": -0.019066473469138145,
"eval_rewards/margins": 0.004389475099742413,
"eval_rewards/rejected": -0.023455949500203133,
"eval_runtime": 145.7021,
"eval_samples_per_second": 2.354,
"eval_steps_per_second": 0.295,
"step": 800
},
{
"epoch": 2.1,
"learning_rate": 2.7094204786572254e-06,
"logits/chosen": -1.579530954360962,
"logits/rejected": -1.58658766746521,
"logps/chosen": -31.1917724609375,
"logps/rejected": -38.586029052734375,
"loss": 0.6677,
"rewards/accuracies": 0.875,
"rewards/chosen": 0.009364848956465721,
"rewards/margins": 0.05266670510172844,
"rewards/rejected": -0.04330185800790787,
"step": 810
},
{
"epoch": 2.13,
"learning_rate": 2.6529041520546072e-06,
"logits/chosen": -1.5474834442138672,
"logits/rejected": -1.5493825674057007,
"logps/chosen": -31.922176361083984,
"logps/rejected": -36.21441650390625,
"loss": 0.6755,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": 0.006489004008471966,
"rewards/margins": 0.03633515536785126,
"rewards/rejected": -0.029846150428056717,
"step": 820
},
{
"epoch": 2.16,
"learning_rate": 2.5963092704273302e-06,
"logits/chosen": -1.4332886934280396,
"logits/rejected": -1.4374314546585083,
"logps/chosen": -32.152000427246094,
"logps/rejected": -39.53594207763672,
"loss": 0.6685,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.004394562914967537,
"rewards/margins": 0.05065219849348068,
"rewards/rejected": -0.046257637441158295,
"step": 830
},
{
"epoch": 2.18,
"learning_rate": 2.53966490958702e-06,
"logits/chosen": -1.4998013973236084,
"logits/rejected": -1.496098518371582,
"logps/chosen": -32.54491424560547,
"logps/rejected": -36.80445098876953,
"loss": 0.6725,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.004237356595695019,
"rewards/margins": 0.04233536496758461,
"rewards/rejected": -0.03809800371527672,
"step": 840
},
{
"epoch": 2.21,
"learning_rate": 2.4830001707654135e-06,
"logits/chosen": -1.5474607944488525,
"logits/rejected": -1.549788236618042,
"logps/chosen": -31.952754974365234,
"logps/rejected": -40.619407653808594,
"loss": 0.6651,
"rewards/accuracies": 0.9375,
"rewards/chosen": 0.00822580885142088,
"rewards/margins": 0.057800523936748505,
"rewards/rejected": -0.04957471415400505,
"step": 850
},
{
"epoch": 2.23,
"learning_rate": 2.4263441656635054e-06,
"logits/chosen": -1.3754708766937256,
"logits/rejected": -1.3716084957122803,
"logps/chosen": -36.57474136352539,
"logps/rejected": -37.647613525390625,
"loss": 0.6716,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.002930630696937442,
"rewards/margins": 0.04462647810578346,
"rewards/rejected": -0.04755710810422897,
"step": 860
},
{
"epoch": 2.26,
"learning_rate": 2.3697260014953107e-06,
"logits/chosen": -1.4011175632476807,
"logits/rejected": -1.4010181427001953,
"logps/chosen": -35.5493049621582,
"logps/rejected": -40.10515213012695,
"loss": 0.6662,
"rewards/accuracies": 0.8125,
"rewards/chosen": 0.0029350135009735823,
"rewards/margins": 0.055523864924907684,
"rewards/rejected": -0.052588850259780884,
"step": 870
},
{
"epoch": 2.29,
"learning_rate": 2.3131747660339396e-06,
"logits/chosen": -1.4290226697921753,
"logits/rejected": -1.4171994924545288,
"logps/chosen": -33.74538040161133,
"logps/rejected": -38.27408981323242,
"loss": 0.6654,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": 0.0036120389122515917,
"rewards/margins": 0.057132624089717865,
"rewards/rejected": -0.05352058261632919,
"step": 880
},
{
"epoch": 2.31,
"learning_rate": 2.256719512667651e-06,
"logits/chosen": -1.5206860303878784,
"logits/rejected": -1.5256131887435913,
"logps/chosen": -33.839393615722656,
"logps/rejected": -38.63503646850586,
"loss": 0.6659,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.005094523541629314,
"rewards/margins": 0.056891001760959625,
"rewards/rejected": -0.061985522508621216,
"step": 890
},
{
"epoch": 2.34,
"learning_rate": 2.2003892454735786e-06,
"logits/chosen": -1.4437249898910522,
"logits/rejected": -1.4366403818130493,
"logps/chosen": -34.63188552856445,
"logps/rejected": -38.323524475097656,
"loss": 0.6635,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": 0.0022219305392354727,
"rewards/margins": 0.06143581122159958,
"rewards/rejected": -0.05921388417482376,
"step": 900
},
{
"epoch": 2.34,
"eval_logits/chosen": -1.7487633228302002,
"eval_logits/rejected": -1.7445435523986816,
"eval_logps/chosen": -37.12141799926758,
"eval_logps/rejected": -41.19174575805664,
"eval_loss": 0.6904172301292419,
"eval_rewards/accuracies": 0.5365448594093323,
"eval_rewards/chosen": -0.03086867742240429,
"eval_rewards/margins": 0.00588257284834981,
"eval_rewards/rejected": -0.03675125539302826,
"eval_runtime": 145.8716,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 900
},
{
"epoch": 2.36,
"learning_rate": 2.1442129043167877e-06,
"logits/chosen": -1.433040738105774,
"logits/rejected": -1.4330635070800781,
"logps/chosen": -31.19219970703125,
"logps/rejected": -41.520694732666016,
"loss": 0.6604,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.00025037964223884046,
"rewards/margins": 0.06805966049432755,
"rewards/rejected": -0.06831003725528717,
"step": 910
},
{
"epoch": 2.39,
"learning_rate": 2.088219349982323e-06,
"logits/chosen": -1.3752285242080688,
"logits/rejected": -1.3669588565826416,
"logps/chosen": -32.70459747314453,
"logps/rejected": -40.24443817138672,
"loss": 0.6653,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.008656435646116734,
"rewards/margins": 0.058074213564395905,
"rewards/rejected": -0.06673064827919006,
"step": 920
},
{
"epoch": 2.42,
"learning_rate": 2.0324373493478803e-06,
"logits/chosen": -1.5194597244262695,
"logits/rejected": -1.517913818359375,
"logps/chosen": -30.247411727905273,
"logps/rejected": -39.21205520629883,
"loss": 0.664,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.002503753872588277,
"rewards/margins": 0.06096818298101425,
"rewards/rejected": -0.06347193568944931,
"step": 930
},
{
"epoch": 2.44,
"learning_rate": 1.976895560604729e-06,
"logits/chosen": -1.4003164768218994,
"logits/rejected": -1.4109015464782715,
"logps/chosen": -35.36278533935547,
"logps/rejected": -40.1971549987793,
"loss": 0.6632,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.009377234615385532,
"rewards/margins": 0.06269785016775131,
"rewards/rejected": -0.07207508385181427,
"step": 940
},
{
"epoch": 2.47,
"learning_rate": 1.921622518534466e-06,
"logits/chosen": -1.4363105297088623,
"logits/rejected": -1.4391801357269287,
"logps/chosen": -31.704153060913086,
"logps/rejected": -38.39413070678711,
"loss": 0.6663,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.010564305819571018,
"rewards/margins": 0.056016188114881516,
"rewards/rejected": -0.06658048927783966,
"step": 950
},
{
"epoch": 2.49,
"learning_rate": 1.8666466198491794e-06,
"logits/chosen": -1.411747932434082,
"logits/rejected": -1.4068377017974854,
"logps/chosen": -34.955177307128906,
"logps/rejected": -41.352115631103516,
"loss": 0.6619,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.008799608796834946,
"rewards/margins": 0.06565765291452408,
"rewards/rejected": -0.07445726543664932,
"step": 960
},
{
"epoch": 2.52,
"learning_rate": 1.8119961086025376e-06,
"logits/chosen": -1.3314030170440674,
"logits/rejected": -1.3333518505096436,
"logps/chosen": -33.6180534362793,
"logps/rejected": -42.525047302246094,
"loss": 0.6617,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.008053036406636238,
"rewards/margins": 0.06538151204586029,
"rewards/rejected": -0.07343455404043198,
"step": 970
},
{
"epoch": 2.55,
"learning_rate": 1.7576990616793139e-06,
"logits/chosen": -1.3536522388458252,
"logits/rejected": -1.3478691577911377,
"logps/chosen": -37.943336486816406,
"logps/rejected": -44.42793655395508,
"loss": 0.6663,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.025382736697793007,
"rewards/margins": 0.0562770739197731,
"rewards/rejected": -0.08165980130434036,
"step": 980
},
{
"epoch": 2.57,
"learning_rate": 1.7037833743707892e-06,
"logits/chosen": -1.3406635522842407,
"logits/rejected": -1.3342511653900146,
"logps/chosen": -32.205875396728516,
"logps/rejected": -44.02067565917969,
"loss": 0.6603,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.012029221281409264,
"rewards/margins": 0.0682787075638771,
"rewards/rejected": -0.08030791580677032,
"step": 990
},
{
"epoch": 2.6,
"learning_rate": 1.6502767460434588e-06,
"logits/chosen": -1.3228440284729004,
"logits/rejected": -1.3131605386734009,
"logps/chosen": -33.538795471191406,
"logps/rejected": -35.858123779296875,
"loss": 0.6719,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.01790105737745762,
"rewards/margins": 0.04468285292387009,
"rewards/rejected": -0.06258390843868256,
"step": 1000
},
{
"epoch": 2.6,
"eval_logits/chosen": -1.660041332244873,
"eval_logits/rejected": -1.6560044288635254,
"eval_logps/chosen": -38.48649597167969,
"eval_logps/rejected": -42.736080169677734,
"eval_loss": 0.6896607875823975,
"eval_rewards/accuracies": 0.5485880374908447,
"eval_rewards/chosen": -0.04451945051550865,
"eval_rewards/margins": 0.007675125263631344,
"eval_rewards/rejected": -0.05219458416104317,
"eval_runtime": 145.8291,
"eval_samples_per_second": 2.352,
"eval_steps_per_second": 0.295,
"step": 1000
},
{
"epoch": 2.62,
"learning_rate": 1.5972066659083796e-06,
"logits/chosen": -1.3907979726791382,
"logits/rejected": -1.3904699087142944,
"logps/chosen": -33.401824951171875,
"logps/rejected": -37.548805236816406,
"loss": 0.6661,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.015776285901665688,
"rewards/margins": 0.05712694674730301,
"rewards/rejected": -0.07290322333574295,
"step": 1010
},
{
"epoch": 2.65,
"learning_rate": 1.5446003988985041e-06,
"logits/chosen": -1.4413875341415405,
"logits/rejected": -1.4416849613189697,
"logps/chosen": -33.36183547973633,
"logps/rejected": -38.4256477355957,
"loss": 0.665,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.014182562939822674,
"rewards/margins": 0.05841582268476486,
"rewards/rejected": -0.07259838283061981,
"step": 1020
},
{
"epoch": 2.68,
"learning_rate": 1.4924849716612211e-06,
"logits/chosen": -1.3972914218902588,
"logits/rejected": -1.4021806716918945,
"logps/chosen": -34.44452667236328,
"logps/rejected": -34.45269775390625,
"loss": 0.6727,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.021644581109285355,
"rewards/margins": 0.043045226484537125,
"rewards/rejected": -0.06468981504440308,
"step": 1030
},
{
"epoch": 2.7,
"learning_rate": 1.440887158673332e-06,
"logits/chosen": -1.3861340284347534,
"logits/rejected": -1.377633810043335,
"logps/chosen": -32.912872314453125,
"logps/rejected": -42.408958435058594,
"loss": 0.6588,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.01734977774322033,
"rewards/margins": 0.07173751294612885,
"rewards/rejected": -0.08908729255199432,
"step": 1040
},
{
"epoch": 2.73,
"learning_rate": 1.3898334684855647e-06,
"logits/chosen": -1.3528214693069458,
"logits/rejected": -1.3631798028945923,
"logps/chosen": -35.93256378173828,
"logps/rejected": -40.24216079711914,
"loss": 0.666,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.023794613778591156,
"rewards/margins": 0.05617685988545418,
"rewards/rejected": -0.07997147738933563,
"step": 1050
},
{
"epoch": 2.75,
"learning_rate": 1.3393501301037245e-06,
"logits/chosen": -1.4136943817138672,
"logits/rejected": -1.405368447303772,
"logps/chosen": -35.31805419921875,
"logps/rejected": -45.923988342285156,
"loss": 0.6566,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.018172740936279297,
"rewards/margins": 0.07775326073169708,
"rewards/rejected": -0.09592600166797638,
"step": 1060
},
{
"epoch": 2.78,
"learning_rate": 1.2894630795134454e-06,
"logits/chosen": -1.320759892463684,
"logits/rejected": -1.3234620094299316,
"logps/chosen": -37.43547439575195,
"logps/rejected": -40.544471740722656,
"loss": 0.6625,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.014461873099207878,
"rewards/margins": 0.06400416046380997,
"rewards/rejected": -0.078466035425663,
"step": 1070
},
{
"epoch": 2.81,
"learning_rate": 1.2401979463554984e-06,
"logits/chosen": -1.4195467233657837,
"logits/rejected": -1.4183709621429443,
"logps/chosen": -34.790035247802734,
"logps/rejected": -43.464012145996094,
"loss": 0.6569,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.017323976382613182,
"rewards/margins": 0.07615931332111359,
"rewards/rejected": -0.09348328411579132,
"step": 1080
},
{
"epoch": 2.83,
"learning_rate": 1.1915800407584705e-06,
"logits/chosen": -1.4136641025543213,
"logits/rejected": -1.4168442487716675,
"logps/chosen": -32.618568420410156,
"logps/rejected": -41.96255111694336,
"loss": 0.6611,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.017252514138817787,
"rewards/margins": 0.06689468771219254,
"rewards/rejected": -0.08414719998836517,
"step": 1090
},
{
"epoch": 2.86,
"learning_rate": 1.1436343403356019e-06,
"logits/chosen": -1.3993356227874756,
"logits/rejected": -1.4037957191467285,
"logps/chosen": -35.935604095458984,
"logps/rejected": -36.58147430419922,
"loss": 0.6749,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.023666027933359146,
"rewards/margins": 0.038131967186927795,
"rewards/rejected": -0.06179799512028694,
"step": 1100
},
{
"epoch": 2.86,
"eval_logits/chosen": -1.6285730600357056,
"eval_logits/rejected": -1.6245777606964111,
"eval_logps/chosen": -38.960731506347656,
"eval_logps/rejected": -43.267208099365234,
"eval_loss": 0.6894330978393555,
"eval_rewards/accuracies": 0.5365448594093323,
"eval_rewards/chosen": -0.04926181212067604,
"eval_rewards/margins": 0.008244064636528492,
"eval_rewards/rejected": -0.057505879551172256,
"eval_runtime": 145.7626,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 1100
},
{
"epoch": 2.88,
"learning_rate": 1.0963854773524548e-06,
"logits/chosen": -1.3872336149215698,
"logits/rejected": -1.3884273767471313,
"logps/chosen": -34.366111755371094,
"logps/rejected": -38.44298553466797,
"loss": 0.6646,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.014259574934840202,
"rewards/margins": 0.06001114100217819,
"rewards/rejected": -0.07427072525024414,
"step": 1110
},
{
"epoch": 2.91,
"learning_rate": 1.049857726072005e-06,
"logits/chosen": -1.2578824758529663,
"logits/rejected": -1.2607439756393433,
"logps/chosen": -36.28580856323242,
"logps/rejected": -40.985992431640625,
"loss": 0.6636,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.017208317294716835,
"rewards/margins": 0.06223895400762558,
"rewards/rejected": -0.07944727689027786,
"step": 1120
},
{
"epoch": 2.94,
"learning_rate": 1.0040749902836508e-06,
"logits/chosen": -1.2802751064300537,
"logits/rejected": -1.2788641452789307,
"logps/chosen": -33.49232864379883,
"logps/rejected": -38.528602600097656,
"loss": 0.67,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.024806050583720207,
"rewards/margins": 0.04882260411977768,
"rewards/rejected": -0.07362865656614304,
"step": 1130
},
{
"epoch": 2.96,
"learning_rate": 9.59060791022566e-07,
"logits/chosen": -1.3983075618743896,
"logits/rejected": -1.3943830728530884,
"logps/chosen": -34.20863723754883,
"logps/rejected": -41.15024185180664,
"loss": 0.6594,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.011688429862260818,
"rewards/margins": 0.07027387619018555,
"rewards/rejected": -0.08196230232715607,
"step": 1140
},
{
"epoch": 2.99,
"learning_rate": 9.148382544856885e-07,
"logits/chosen": -1.3060632944107056,
"logits/rejected": -1.2978880405426025,
"logps/chosen": -35.88400650024414,
"logps/rejected": -39.35108184814453,
"loss": 0.6663,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.024322666227817535,
"rewards/margins": 0.05626412108540535,
"rewards/rejected": -0.08058679848909378,
"step": 1150
},
{
"epoch": 3.01,
"learning_rate": 8.714301001505568e-07,
"logits/chosen": -1.332467794418335,
"logits/rejected": -1.3338046073913574,
"logps/chosen": -35.79069900512695,
"logps/rejected": -38.749183654785156,
"loss": 0.6671,
"rewards/accuracies": 0.7541667222976685,
"rewards/chosen": -0.021089451387524605,
"rewards/margins": 0.05438787862658501,
"rewards/rejected": -0.07547733187675476,
"step": 1160
},
{
"epoch": 3.04,
"learning_rate": 8.288586291031025e-07,
"logits/chosen": -1.4120080471038818,
"logits/rejected": -1.4067761898040771,
"logps/chosen": -35.47734069824219,
"logps/rejected": -40.39026641845703,
"loss": 0.667,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.019513698294758797,
"rewards/margins": 0.05497678369283676,
"rewards/rejected": -0.074490487575531,
"step": 1170
},
{
"epoch": 3.06,
"learning_rate": 7.871457125803897e-07,
"logits/chosen": -1.3105064630508423,
"logits/rejected": -1.3196675777435303,
"logps/chosen": -35.979042053222656,
"logps/rejected": -40.29875946044922,
"loss": 0.6678,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.026460427790880203,
"rewards/margins": 0.05291280895471573,
"rewards/rejected": -0.07937324047088623,
"step": 1180
},
{
"epoch": 3.09,
"learning_rate": 7.463127807341966e-07,
"logits/chosen": -1.3309608697891235,
"logits/rejected": -1.325539231300354,
"logps/chosen": -33.38233947753906,
"logps/rejected": -41.38855743408203,
"loss": 0.66,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.012087189592421055,
"rewards/margins": 0.06896394491195679,
"rewards/rejected": -0.08105112612247467,
"step": 1190
},
{
"epoch": 3.12,
"learning_rate": 7.063808116212021e-07,
"logits/chosen": -1.2911185026168823,
"logits/rejected": -1.2924482822418213,
"logps/chosen": -35.253963470458984,
"logps/rejected": -42.57808303833008,
"loss": 0.6551,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.017635192722082138,
"rewards/margins": 0.08054514229297638,
"rewards/rejected": -0.09818033874034882,
"step": 1200
},
{
"epoch": 3.12,
"eval_logits/chosen": -1.6238328218460083,
"eval_logits/rejected": -1.6198344230651855,
"eval_logps/chosen": -39.01145935058594,
"eval_logps/rejected": -43.33791732788086,
"eval_loss": 0.6893402338027954,
"eval_rewards/accuracies": 0.5365448594093323,
"eval_rewards/chosen": -0.049769096076488495,
"eval_rewards/margins": 0.008443917147815228,
"eval_rewards/rejected": -0.058213010430336,
"eval_runtime": 145.8737,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 1200
},
{
"epoch": 3.14,
"learning_rate": 6.673703204254348e-07,
"logits/chosen": -1.2456345558166504,
"logits/rejected": -1.2452775239944458,
"logps/chosen": -37.221336364746094,
"logps/rejected": -42.06071853637695,
"loss": 0.6544,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.01168847642838955,
"rewards/margins": 0.08182507008314133,
"rewards/rejected": -0.09351354837417603,
"step": 1210
},
{
"epoch": 3.17,
"learning_rate": 6.293013489185315e-07,
"logits/chosen": -1.3760040998458862,
"logits/rejected": -1.3689346313476562,
"logps/chosen": -33.46622848510742,
"logps/rejected": -42.4327392578125,
"loss": 0.6562,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.017625439912080765,
"rewards/margins": 0.07791656255722046,
"rewards/rejected": -0.09554200619459152,
"step": 1220
},
{
"epoch": 3.19,
"learning_rate": 5.921934551632086e-07,
"logits/chosen": -1.2549601793289185,
"logits/rejected": -1.2447240352630615,
"logps/chosen": -35.567508697509766,
"logps/rejected": -42.02611541748047,
"loss": 0.6551,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.011818263679742813,
"rewards/margins": 0.07950461655855179,
"rewards/rejected": -0.0913228839635849,
"step": 1230
},
{
"epoch": 3.22,
"learning_rate": 5.560657034652405e-07,
"logits/chosen": -1.3439350128173828,
"logits/rejected": -1.338648796081543,
"logps/chosen": -33.191280364990234,
"logps/rejected": -36.837867736816406,
"loss": 0.6678,
"rewards/accuracies": 0.6875,
"rewards/chosen": -0.024910490959882736,
"rewards/margins": 0.05479263514280319,
"rewards/rejected": -0.07970312982797623,
"step": 1240
},
{
"epoch": 3.25,
"learning_rate": 5.2093665457911e-07,
"logits/chosen": -1.3508336544036865,
"logits/rejected": -1.3587679862976074,
"logps/chosen": -37.35521697998047,
"logps/rejected": -39.65736770629883,
"loss": 0.6635,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.019598282873630524,
"rewards/margins": 0.06179226562380791,
"rewards/rejected": -0.08139055222272873,
"step": 1250
},
{
"epoch": 3.27,
"learning_rate": 4.868243561723535e-07,
"logits/chosen": -1.3516252040863037,
"logits/rejected": -1.3514872789382935,
"logps/chosen": -35.94284439086914,
"logps/rejected": -42.4056510925293,
"loss": 0.6611,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.029142867773771286,
"rewards/margins": 0.0667649507522583,
"rewards/rejected": -0.09590782225131989,
"step": 1260
},
{
"epoch": 3.3,
"learning_rate": 4.537463335535161e-07,
"logits/chosen": -1.265148401260376,
"logits/rejected": -1.2630140781402588,
"logps/chosen": -34.26659393310547,
"logps/rejected": -43.09412384033203,
"loss": 0.6533,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.01175761315971613,
"rewards/margins": 0.08397753536701202,
"rewards/rejected": -0.09573514014482498,
"step": 1270
},
{
"epoch": 3.32,
"learning_rate": 4.217195806684629e-07,
"logits/chosen": -1.1799885034561157,
"logits/rejected": -1.1763312816619873,
"logps/chosen": -36.85099411010742,
"logps/rejected": -38.88633346557617,
"loss": 0.6628,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.016135532408952713,
"rewards/margins": 0.06346292048692703,
"rewards/rejected": -0.07959844172000885,
"step": 1280
},
{
"epoch": 3.35,
"learning_rate": 3.907605513696808e-07,
"logits/chosen": -1.353476881980896,
"logits/rejected": -1.339864730834961,
"logps/chosen": -36.990108489990234,
"logps/rejected": -45.27104568481445,
"loss": 0.6558,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.0264253169298172,
"rewards/margins": 0.0786319449543953,
"rewards/rejected": -0.1050572618842125,
"step": 1290
},
{
"epoch": 3.38,
"learning_rate": 3.6088515096305675e-07,
"logits/chosen": -1.304051160812378,
"logits/rejected": -1.3077205419540405,
"logps/chosen": -35.44999313354492,
"logps/rejected": -47.28888702392578,
"loss": 0.6489,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.019323688000440598,
"rewards/margins": 0.09253297001123428,
"rewards/rejected": -0.11185667663812637,
"step": 1300
},
{
"epoch": 3.38,
"eval_logits/chosen": -1.6211615800857544,
"eval_logits/rejected": -1.6171820163726807,
"eval_logps/chosen": -39.072174072265625,
"eval_logps/rejected": -43.41142654418945,
"eval_loss": 0.6892833113670349,
"eval_rewards/accuracies": 0.5394518375396729,
"eval_rewards/chosen": -0.050376225262880325,
"eval_rewards/margins": 0.008571851067245007,
"eval_rewards/rejected": -0.058948077261447906,
"eval_runtime": 145.8858,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 1300
},
{
"epoch": 3.4,
"learning_rate": 3.321087280364757e-07,
"logits/chosen": -1.2905550003051758,
"logits/rejected": -1.2912893295288086,
"logps/chosen": -38.21614074707031,
"logps/rejected": -47.26013946533203,
"loss": 0.6547,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.01972118392586708,
"rewards/margins": 0.08203905820846558,
"rewards/rejected": -0.10176024585962296,
"step": 1310
},
{
"epoch": 3.43,
"learning_rate": 3.044460665744284e-07,
"logits/chosen": -1.3596677780151367,
"logits/rejected": -1.3584003448486328,
"logps/chosen": -33.941978454589844,
"logps/rejected": -39.85774230957031,
"loss": 0.6596,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": -0.01848122850060463,
"rewards/margins": 0.07056263089179993,
"rewards/rejected": -0.08904386311769485,
"step": 1320
},
{
"epoch": 3.45,
"learning_rate": 2.779113783626916e-07,
"logits/chosen": -1.2977750301361084,
"logits/rejected": -1.2993113994598389,
"logps/chosen": -35.68281936645508,
"logps/rejected": -42.771202087402344,
"loss": 0.6558,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.013085673563182354,
"rewards/margins": 0.07813958078622818,
"rewards/rejected": -0.09122525155544281,
"step": 1330
},
{
"epoch": 3.48,
"learning_rate": 2.5251829568697204e-07,
"logits/chosen": -1.3435966968536377,
"logits/rejected": -1.3425482511520386,
"logps/chosen": -32.46406555175781,
"logps/rejected": -40.374244689941406,
"loss": 0.659,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.012653985992074013,
"rewards/margins": 0.07116873562335968,
"rewards/rejected": -0.08382271975278854,
"step": 1340
},
{
"epoch": 3.51,
"learning_rate": 2.2827986432927774e-07,
"logits/chosen": -1.362319827079773,
"logits/rejected": -1.3474690914154053,
"logps/chosen": -36.95580291748047,
"logps/rejected": -47.64240264892578,
"loss": 0.6519,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.026391323655843735,
"rewards/margins": 0.08738837391138077,
"rewards/rejected": -0.11377968639135361,
"step": 1350
},
{
"epoch": 3.53,
"learning_rate": 2.0520853686560177e-07,
"logits/chosen": -1.3275715112686157,
"logits/rejected": -1.3374977111816406,
"logps/chosen": -33.35503387451172,
"logps/rejected": -41.160377502441406,
"loss": 0.6572,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.012865647673606873,
"rewards/margins": 0.07634075731039047,
"rewards/rejected": -0.08920640498399734,
"step": 1360
},
{
"epoch": 3.56,
"learning_rate": 1.833161662683672e-07,
"logits/chosen": -1.4463578462600708,
"logits/rejected": -1.4461679458618164,
"logps/chosen": -32.90170669555664,
"logps/rejected": -47.23381423950195,
"loss": 0.6432,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.008712759241461754,
"rewards/margins": 0.10531653463840485,
"rewards/rejected": -0.11402928829193115,
"step": 1370
},
{
"epoch": 3.58,
"learning_rate": 1.626139998169246e-07,
"logits/chosen": -1.3187510967254639,
"logits/rejected": -1.3259624242782593,
"logps/chosen": -35.57271957397461,
"logps/rejected": -47.857994079589844,
"loss": 0.6506,
"rewards/accuracies": 0.887499988079071,
"rewards/chosen": -0.013922369107604027,
"rewards/margins": 0.09016112238168716,
"rewards/rejected": -0.10408350080251694,
"step": 1380
},
{
"epoch": 3.61,
"learning_rate": 1.4311267331922535e-07,
"logits/chosen": -1.2786071300506592,
"logits/rejected": -1.2746905088424683,
"logps/chosen": -35.84669876098633,
"logps/rejected": -39.81802749633789,
"loss": 0.659,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.010794862173497677,
"rewards/margins": 0.07141149789094925,
"rewards/rejected": -0.0822063684463501,
"step": 1390
},
{
"epoch": 3.64,
"learning_rate": 1.2482220564763669e-07,
"logits/chosen": -1.401760458946228,
"logits/rejected": -1.4005050659179688,
"logps/chosen": -32.612770080566406,
"logps/rejected": -40.44251251220703,
"loss": 0.6597,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.013162161223590374,
"rewards/margins": 0.06981828063726425,
"rewards/rejected": -0.0829804539680481,
"step": 1400
},
{
"epoch": 3.64,
"eval_logits/chosen": -1.6204686164855957,
"eval_logits/rejected": -1.6164851188659668,
"eval_logps/chosen": -39.084259033203125,
"eval_logps/rejected": -43.422786712646484,
"eval_loss": 0.6892901659011841,
"eval_rewards/accuracies": 0.5423588156700134,
"eval_rewards/chosen": -0.050497058779001236,
"eval_rewards/margins": 0.008564572781324387,
"eval_rewards/rejected": -0.05906163901090622,
"eval_runtime": 145.7418,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 1400
},
{
"epoch": 3.66,
"learning_rate": 1.0775199359171346e-07,
"logits/chosen": -1.372238278388977,
"logits/rejected": -1.3673722743988037,
"logps/chosen": -35.539161682128906,
"logps/rejected": -37.22252655029297,
"loss": 0.6659,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": -0.018966300413012505,
"rewards/margins": 0.05771438404917717,
"rewards/rejected": -0.07668069750070572,
"step": 1410
},
{
"epoch": 3.69,
"learning_rate": 9.191080703056604e-08,
"logits/chosen": -1.321447730064392,
"logits/rejected": -1.3224408626556396,
"logps/chosen": -34.87453079223633,
"logps/rejected": -43.16680145263672,
"loss": 0.6608,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.015096550807356834,
"rewards/margins": 0.06733135879039764,
"rewards/rejected": -0.08242791891098022,
"step": 1420
},
{
"epoch": 3.71,
"learning_rate": 7.730678442730539e-08,
"logits/chosen": -1.271436095237732,
"logits/rejected": -1.265836477279663,
"logps/chosen": -35.28139114379883,
"logps/rejected": -47.02886199951172,
"loss": 0.6503,
"rewards/accuracies": 0.862500011920929,
"rewards/chosen": -0.01139620877802372,
"rewards/margins": 0.09199832379817963,
"rewards/rejected": -0.10339454561471939,
"step": 1430
},
{
"epoch": 3.74,
"learning_rate": 6.394742864787806e-08,
"logits/chosen": -1.285681962966919,
"logits/rejected": -1.2799713611602783,
"logps/chosen": -30.83676528930664,
"logps/rejected": -40.77880096435547,
"loss": 0.6574,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.015235202386975288,
"rewards/margins": 0.07524626702070236,
"rewards/rejected": -0.0904814749956131,
"step": 1440
},
{
"epoch": 3.77,
"learning_rate": 5.183960310644748e-08,
"logits/chosen": -1.337096929550171,
"logits/rejected": -1.3268693685531616,
"logps/chosen": -34.95880889892578,
"logps/rejected": -44.96342086791992,
"loss": 0.6587,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.02477274276316166,
"rewards/margins": 0.07200786471366882,
"rewards/rejected": -0.09678061306476593,
"step": 1450
},
{
"epoch": 3.79,
"learning_rate": 4.098952823928693e-08,
"logits/chosen": -1.2949423789978027,
"logits/rejected": -1.2914998531341553,
"logps/chosen": -35.32928466796875,
"logps/rejected": -39.03660583496094,
"loss": 0.664,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.02019507996737957,
"rewards/margins": 0.06126277893781662,
"rewards/rejected": -0.08145786076784134,
"step": 1460
},
{
"epoch": 3.82,
"learning_rate": 3.1402778309014284e-08,
"logits/chosen": -1.3512227535247803,
"logits/rejected": -1.3558355569839478,
"logps/chosen": -33.2025260925293,
"logps/rejected": -41.91522979736328,
"loss": 0.656,
"rewards/accuracies": 0.8125,
"rewards/chosen": -0.011333522386848927,
"rewards/margins": 0.07819454371929169,
"rewards/rejected": -0.08952806890010834,
"step": 1470
},
{
"epoch": 3.84,
"learning_rate": 2.3084278540791427e-08,
"logits/chosen": -1.3535398244857788,
"logits/rejected": -1.3626043796539307,
"logps/chosen": -32.97187042236328,
"logps/rejected": -37.688446044921875,
"loss": 0.6605,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.012766157276928425,
"rewards/margins": 0.06783264130353928,
"rewards/rejected": -0.08059880137443542,
"step": 1480
},
{
"epoch": 3.87,
"learning_rate": 1.6038302591975807e-08,
"logits/chosen": -1.2888884544372559,
"logits/rejected": -1.2834962606430054,
"logps/chosen": -35.54216003417969,
"logps/rejected": -40.272682189941406,
"loss": 0.6634,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": -0.018856767565011978,
"rewards/margins": 0.061765290796756744,
"rewards/rejected": -0.08062206208705902,
"step": 1490
},
{
"epoch": 3.9,
"learning_rate": 1.0268470356514237e-08,
"logits/chosen": -1.3495625257492065,
"logits/rejected": -1.346825122833252,
"logps/chosen": -35.6667366027832,
"logps/rejected": -43.17388153076172,
"loss": 0.6557,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.020411662757396698,
"rewards/margins": 0.0787249356508255,
"rewards/rejected": -0.0991365909576416,
"step": 1500
},
{
"epoch": 3.9,
"eval_logits/chosen": -1.6205651760101318,
"eval_logits/rejected": -1.616579294204712,
"eval_logps/chosen": -39.0870246887207,
"eval_logps/rejected": -43.4185791015625,
"eval_loss": 0.6893215179443359,
"eval_rewards/accuracies": 0.5423588156700134,
"eval_rewards/chosen": -0.05052470788359642,
"eval_rewards/margins": 0.008494864217936993,
"eval_rewards/rejected": -0.05901956930756569,
"eval_runtime": 145.7021,
"eval_samples_per_second": 2.354,
"eval_steps_per_second": 0.295,
"step": 1500
},
{
"epoch": 3.92,
"learning_rate": 5.777746105209147e-09,
"logits/chosen": -1.4113116264343262,
"logits/rejected": -1.411259651184082,
"logps/chosen": -30.929424285888672,
"logps/rejected": -41.88774871826172,
"loss": 0.6548,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.011131499893963337,
"rewards/margins": 0.08034153282642365,
"rewards/rejected": -0.09147302061319351,
"step": 1510
},
{
"epoch": 3.95,
"learning_rate": 2.5684369628148352e-09,
"logits/chosen": -1.276719331741333,
"logits/rejected": -1.2754055261611938,
"logps/chosen": -34.34500503540039,
"logps/rejected": -42.410675048828125,
"loss": 0.6587,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.013031329028308392,
"rewards/margins": 0.07233807444572449,
"rewards/rejected": -0.08536941558122635,
"step": 1520
},
{
"epoch": 3.97,
"learning_rate": 6.421917227455999e-10,
"logits/chosen": -1.4115439653396606,
"logits/rejected": -1.4091360569000244,
"logps/chosen": -33.317054748535156,
"logps/rejected": -40.02583694458008,
"loss": 0.6612,
"rewards/accuracies": 0.8374999761581421,
"rewards/chosen": -0.018429961055517197,
"rewards/margins": 0.0669919028878212,
"rewards/rejected": -0.0854218453168869,
"step": 1530
},
{
"epoch": 4.0,
"learning_rate": 0.0,
"logits/chosen": -1.398667573928833,
"logits/rejected": -1.3992483615875244,
"logps/chosen": -33.088409423828125,
"logps/rejected": -36.9452018737793,
"loss": 0.6707,
"rewards/accuracies": 0.7791666388511658,
"rewards/chosen": -0.024684693664312363,
"rewards/margins": 0.0468655489385128,
"rewards/rejected": -0.07155025750398636,
"step": 1540
},
{
"epoch": 4.0,
"step": 1540,
"total_flos": 0.0,
"train_loss": 0.5396727961379212,
"train_runtime": 10793.7948,
"train_samples_per_second": 1.141,
"train_steps_per_second": 0.143
}
],
"logging_steps": 10,
"max_steps": 1540,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}