diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,9 +1,9 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 4.0, + "epoch": 1.0, "eval_steps": 100, - "global_step": 1540, + "global_step": 385, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -15,7 +15,7 @@ "logits/rejected": -1.7377450466156006, "logps/chosen": -29.553977966308594, "logps/rejected": -42.813133239746094, - "loss": 2500.0, + "loss": 1.0, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, @@ -25,155 +25,155 @@ { "epoch": 0.03, "learning_rate": 1.282051282051282e-06, - "logits/chosen": -1.866841197013855, - "logits/rejected": -1.871166467666626, - "logps/chosen": -36.98617172241211, - "logps/rejected": -33.65531539916992, - "loss": 2495.4616, - "rewards/accuracies": 0.5, - "rewards/chosen": 0.00020427265553735197, - "rewards/margins": 0.00045667175436392426, - "rewards/rejected": -0.0002523990988265723, + "logits/chosen": -1.8664803504943848, + "logits/rejected": -1.8707994222640991, + "logps/chosen": -36.978511810302734, + "logps/rejected": -33.66939163208008, + "loss": 0.9993, + "rewards/accuracies": 0.5694444179534912, + "rewards/chosen": 0.00028087408281862736, + "rewards/margins": 0.0006740752141922712, + "rewards/rejected": -0.00039320107316598296, "step": 10 }, { "epoch": 0.05, "learning_rate": 2.564102564102564e-06, - "logits/chosen": -1.997936487197876, - "logits/rejected": -2.0005903244018555, - "logps/chosen": -29.64678382873535, - "logps/rejected": -29.045034408569336, - "loss": 2502.3262, - "rewards/accuracies": 0.36250001192092896, - "rewards/chosen": -4.586054274113849e-05, - "rewards/margins": -0.00022994528990238905, - "rewards/rejected": 0.0001840847689891234, + "logits/chosen": -1.9984451532363892, + "logits/rejected": -2.0010995864868164, + "logps/chosen": -29.63176918029785, + "logps/rejected": -29.05954933166504, + "loss": 0.9999, + "rewards/accuracies": 0.48750001192092896, + "rewards/chosen": 0.00010425634536659345, + "rewards/margins": 6.528960511786863e-05, + "rewards/rejected": 3.8966707506915554e-05, "step": 20 }, { "epoch": 0.08, "learning_rate": 3.846153846153847e-06, - "logits/chosen": -1.9207321405410767, - "logits/rejected": -1.9180399179458618, - "logps/chosen": -31.407222747802734, - "logps/rejected": -33.223663330078125, - "loss": 2498.6508, - "rewards/accuracies": 0.5625, - "rewards/chosen": 8.869935118127614e-05, - "rewards/margins": 0.00014070476754568517, - "rewards/rejected": -5.200541272643022e-05, + "logits/chosen": -1.9210799932479858, + "logits/rejected": -1.9183847904205322, + "logps/chosen": -31.414783477783203, + "logps/rejected": -33.19659423828125, + "loss": 1.0002, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 1.3138540452928282e-05, + "rewards/margins": -0.00020548875909298658, + "rewards/rejected": 0.00021862727589905262, "step": 30 }, { "epoch": 0.1, "learning_rate": 4.999896948438434e-06, - "logits/chosen": -2.0177226066589355, - "logits/rejected": -2.0089757442474365, - "logps/chosen": -32.58082962036133, - "logps/rejected": -32.527244567871094, - "loss": 2498.9926, - "rewards/accuracies": 0.5, - "rewards/chosen": -4.022592111141421e-05, - "rewards/margins": 0.00010554380423855036, - "rewards/rejected": -0.00014576970716007054, + "logits/chosen": -2.0177221298217773, + "logits/rejected": -2.008965492248535, + "logps/chosen": -32.57322311401367, + "logps/rejected": -32.500308990478516, + "loss": 1.0001, + "rewards/accuracies": 0.4749999940395355, + "rewards/chosen": 3.584356090868823e-05, + "rewards/margins": -8.777440234553069e-05, + "rewards/rejected": 0.00012361796689219773, "step": 40 }, { "epoch": 0.13, "learning_rate": 4.987541037542187e-06, - "logits/chosen": -1.8629518747329712, - "logits/rejected": -1.8521617650985718, - "logps/chosen": -33.5596923828125, - "logps/rejected": -35.45528793334961, - "loss": 2499.9863, - "rewards/accuracies": 0.4749999940395355, - "rewards/chosen": -2.213427796959877e-05, - "rewards/margins": 7.289124368980993e-06, - "rewards/rejected": -2.9423434170894325e-05, + "logits/chosen": -1.8622690439224243, + "logits/rejected": -1.851509690284729, + "logps/chosen": -33.547603607177734, + "logps/rejected": -35.463592529296875, + "loss": 0.9998, + "rewards/accuracies": 0.5, + "rewards/chosen": 9.876764670480043e-05, + "rewards/margins": 0.0002112251240760088, + "rewards/rejected": -0.00011245747737120837, "step": 50 }, { "epoch": 0.16, "learning_rate": 4.954691471941119e-06, - "logits/chosen": -1.9416849613189697, - "logits/rejected": -1.9436241388320923, - "logps/chosen": -32.546897888183594, - "logps/rejected": -33.21548843383789, - "loss": 2490.4672, - "rewards/accuracies": 0.5375000238418579, - "rewards/chosen": 0.0005328331026248634, - "rewards/margins": 0.00097393908072263, - "rewards/rejected": -0.00044110597809776664, + "logits/chosen": -1.9400131702423096, + "logits/rejected": -1.9419806003570557, + "logps/chosen": -32.52842330932617, + "logps/rejected": -33.22877883911133, + "loss": 0.9987, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0007175664068199694, + "rewards/margins": 0.0012915965635329485, + "rewards/rejected": -0.00057403021492064, "step": 60 }, { "epoch": 0.18, "learning_rate": 4.901618883413549e-06, - "logits/chosen": -2.072330951690674, - "logits/rejected": -2.0772910118103027, - "logps/chosen": -34.00098419189453, - "logps/rejected": -36.63383102416992, - "loss": 2494.9414, - "rewards/accuracies": 0.550000011920929, - "rewards/chosen": -0.00012498130672611296, - "rewards/margins": 0.0005246406653895974, - "rewards/rejected": -0.0006496219430118799, + "logits/chosen": -2.070552349090576, + "logits/rejected": -2.0755274295806885, + "logps/chosen": -34.00461959838867, + "logps/rejected": -36.64922332763672, + "loss": 0.9994, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.00016131921438500285, + "rewards/margins": 0.0006422021542675793, + "rewards/rejected": -0.0008035213686525822, "step": 70 }, { "epoch": 0.21, "learning_rate": 4.828760511501322e-06, - "logits/chosen": -1.9325841665267944, - "logits/rejected": -1.9357010126113892, - "logps/chosen": -34.33161163330078, - "logps/rejected": -34.630489349365234, - "loss": 2486.8059, - "rewards/accuracies": 0.5249999761581421, - "rewards/chosen": 0.0009619827615097165, - "rewards/margins": 0.0013428140664473176, - "rewards/rejected": -0.0003808312467299402, + "logits/chosen": -1.9306777715682983, + "logits/rejected": -1.9338254928588867, + "logps/chosen": -34.32624816894531, + "logps/rejected": -34.661468505859375, + "loss": 0.9983, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0010156143689528108, + "rewards/margins": 0.0017062196275219321, + "rewards/rejected": -0.0006906053749844432, "step": 80 }, { "epoch": 0.23, "learning_rate": 4.7367166013034295e-06, - "logits/chosen": -1.9400427341461182, - "logits/rejected": -1.9445598125457764, - "logps/chosen": -32.36492156982422, - "logps/rejected": -32.34357452392578, - "loss": 2491.4584, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.001024983124807477, - "rewards/margins": 0.0008716614102013409, - "rewards/rejected": 0.00015332190378103405, + "logits/chosen": -1.9389193058013916, + "logits/rejected": -1.9434226751327515, + "logps/chosen": -32.38957214355469, + "logps/rejected": -32.348140716552734, + "loss": 0.9993, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": 0.0007785108755342662, + "rewards/margins": 0.0006708315922878683, + "rewards/rejected": 0.00010767912317533046, "step": 90 }, { "epoch": 0.26, "learning_rate": 4.626245458345211e-06, - "logits/chosen": -2.037466526031494, - "logits/rejected": -2.0354855060577393, - "logps/chosen": -32.11969757080078, - "logps/rejected": -31.30398178100586, - "loss": 2484.2775, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.001211356371641159, - "rewards/margins": 0.0015890670474618673, - "rewards/rejected": -0.00037771055940538645, + "logits/chosen": -2.0358777046203613, + "logits/rejected": -2.0339014530181885, + "logps/chosen": -32.13254165649414, + "logps/rejected": -31.29019546508789, + "loss": 0.9987, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0010829826351255178, + "rewards/margins": 0.001322855008766055, + "rewards/rejected": -0.0002398724900558591, "step": 100 }, { "epoch": 0.26, - "eval_logits/chosen": -2.232342481613159, - "eval_logits/rejected": -2.2275006771087646, - "eval_logps/chosen": -34.01866149902344, - "eval_logps/rejected": -37.52037811279297, - "eval_loss": 2498.15966796875, - "eval_rewards/accuracies": 0.5564784407615662, - "eval_rewards/chosen": 0.00015893821546342224, - "eval_rewards/margins": 0.0001965187693713233, - "eval_rewards/rejected": -3.7580521166091785e-05, - "eval_runtime": 146.0331, + "eval_logits/chosen": -2.2312774658203125, + "eval_logits/rejected": -2.226422071456909, + "eval_logps/chosen": -34.04991149902344, + "eval_logps/rejected": -37.55283737182617, + "eval_loss": 0.9997907280921936, + "eval_rewards/accuracies": 0.5336378812789917, + "eval_rewards/chosen": -0.0001535558985779062, + "eval_rewards/margins": 0.00020861340453848243, + "eval_rewards/rejected": -0.00036216925946064293, + "eval_runtime": 146.0254, "eval_samples_per_second": 2.349, "eval_steps_per_second": 0.294, "step": 100 @@ -181,2257 +181,441 @@ { "epoch": 0.29, "learning_rate": 4.498257201263691e-06, - "logits/chosen": -1.991633415222168, - "logits/rejected": -1.9892610311508179, - "logps/chosen": -33.10456085205078, - "logps/rejected": -34.01618194580078, - "loss": 2488.0367, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.0013925316743552685, - "rewards/margins": 0.0012606054078787565, - "rewards/rejected": 0.00013192615006119013, + "logits/chosen": -1.9907060861587524, + "logits/rejected": -1.9883339405059814, + "logps/chosen": -33.13169860839844, + "logps/rejected": -34.033958435058594, + "loss": 0.9988, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.0011211589444428682, + "rewards/margins": 0.0011670273961499333, + "rewards/rejected": -4.586850991472602e-05, "step": 110 }, { "epoch": 0.31, "learning_rate": 4.353806263777678e-06, - "logits/chosen": -2.003302812576294, - "logits/rejected": -1.994974136352539, - "logps/chosen": -32.31616973876953, - "logps/rejected": -32.14063262939453, - "loss": 2489.4971, - "rewards/accuracies": 0.5874999761581421, - "rewards/chosen": 0.001287340302951634, - "rewards/margins": 0.0010786365019157529, - "rewards/rejected": 0.00020870394655503333, + "logits/chosen": -2.002023458480835, + "logits/rejected": -1.993699312210083, + "logps/chosen": -32.341697692871094, + "logps/rejected": -32.16511917114258, + "loss": 0.9989, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0010320657165721059, + "rewards/margins": 0.001068194629624486, + "rewards/rejected": -3.612901855376549e-05, "step": 120 }, { "epoch": 0.34, "learning_rate": 4.1940827077152755e-06, - "logits/chosen": -2.0306484699249268, - "logits/rejected": -2.022704601287842, - "logps/chosen": -30.306324005126953, - "logps/rejected": -32.04903793334961, - "loss": 2483.8781, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": 0.0016939423512667418, - "rewards/margins": 0.0016542377416044474, - "rewards/rejected": 3.9704824303044006e-05, + "logits/chosen": -2.028505802154541, + "logits/rejected": -2.020526885986328, + "logps/chosen": -30.3519287109375, + "logps/rejected": -32.101314544677734, + "loss": 0.9983, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0012378758983686566, + "rewards/margins": 0.001720982021652162, + "rewards/rejected": -0.0004831062688026577, "step": 130 }, { "epoch": 0.36, "learning_rate": 4.0204024186666215e-06, - "logits/chosen": -1.9617973566055298, - "logits/rejected": -1.9720312356948853, - "logps/chosen": -31.230310440063477, - "logps/rejected": -32.547096252441406, - "loss": 2480.1322, - "rewards/accuracies": 0.574999988079071, - "rewards/chosen": 0.0018935112748295069, - "rewards/margins": 0.0020156968384981155, - "rewards/rejected": -0.00012218570918776095, + "logits/chosen": -1.9588673114776611, + "logits/rejected": -1.9690834283828735, + "logps/chosen": -31.205490112304688, + "logps/rejected": -32.55961608886719, + "loss": 0.9976, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0021417266689240932, + "rewards/margins": 0.0023890691809356213, + "rewards/rejected": -0.00024734257021918893, "step": 140 }, { "epoch": 0.39, "learning_rate": 3.834196265035119e-06, - "logits/chosen": -1.8725357055664062, - "logits/rejected": -1.8737138509750366, - "logps/chosen": -33.889976501464844, - "logps/rejected": -34.795631408691406, - "loss": 2466.4881, - "rewards/accuracies": 0.6499999761581421, - "rewards/chosen": 0.003027186496183276, - "rewards/margins": 0.0034109093248844147, - "rewards/rejected": -0.0003837232361547649, + "logits/chosen": -1.8695415258407593, + "logits/rejected": -1.8707062005996704, + "logps/chosen": -33.88127899169922, + "logps/rejected": -34.7686653137207, + "loss": 0.9968, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": 0.0031141345389187336, + "rewards/margins": 0.003228238318115473, + "rewards/rejected": -0.00011410393926780671, "step": 150 }, { "epoch": 0.42, "learning_rate": 3.636998309800573e-06, - "logits/chosen": -1.9241313934326172, - "logits/rejected": -1.9207313060760498, - "logps/chosen": -35.98552322387695, - "logps/rejected": -32.693538665771484, - "loss": 2484.5627, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": 0.0018615787848830223, - "rewards/margins": 0.0015694532776251435, - "rewards/rejected": 0.00029212533263489604, + "logits/chosen": -1.9212032556533813, + "logits/rejected": -1.9178003072738647, + "logps/chosen": -35.99773406982422, + "logps/rejected": -32.705848693847656, + "loss": 0.9984, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": 0.0017394202295690775, + "rewards/margins": 0.0015704210381954908, + "rewards/rejected": 0.00016899927868507802, "step": 160 }, { "epoch": 0.44, "learning_rate": 3.4304331721118078e-06, - "logits/chosen": -2.023880958557129, - "logits/rejected": -2.0165772438049316, - "logps/chosen": -33.457122802734375, - "logps/rejected": -31.414859771728516, - "loss": 2460.2227, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.0034074243158102036, - "rewards/margins": 0.004037545528262854, - "rewards/rejected": -0.0006301216781139374, + "logits/chosen": -2.0206995010375977, + "logits/rejected": -2.0133931636810303, + "logps/chosen": -33.504085540771484, + "logps/rejected": -31.432220458984375, + "loss": 0.9963, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.0029377774335443974, + "rewards/margins": 0.003741443855687976, + "rewards/rejected": -0.0008036663057282567, "step": 170 }, { "epoch": 0.47, "learning_rate": 3.2162026428305436e-06, - "logits/chosen": -2.030813455581665, - "logits/rejected": -2.0360608100891113, - "logps/chosen": -32.20356750488281, - "logps/rejected": -32.4092903137207, - "loss": 2473.8211, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.0034122199285775423, - "rewards/margins": 0.0026536998338997364, - "rewards/rejected": 0.0007585205021314323, + "logits/chosen": -2.0269291400909424, + "logits/rejected": -2.032160997390747, + "logps/chosen": -32.24355697631836, + "logps/rejected": -32.431182861328125, + "loss": 0.9975, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.003012270200997591, + "rewards/margins": 0.0024727012496441603, + "rewards/rejected": 0.0005395688931457698, "step": 180 }, { "epoch": 0.49, "learning_rate": 2.996071664294641e-06, - "logits/chosen": -2.0314080715179443, - "logits/rejected": -2.0286362171173096, - "logps/chosen": -31.27242088317871, - "logps/rejected": -31.320995330810547, - "loss": 2478.5072, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": 0.0021653182338923216, - "rewards/margins": 0.0021931403316557407, - "rewards/rejected": -2.7821719413623214e-05, + "logits/chosen": -2.027367115020752, + "logits/rejected": -2.0246078968048096, + "logps/chosen": -31.290613174438477, + "logps/rejected": -31.361133575439453, + "loss": 0.9976, + "rewards/accuracies": 0.625, + "rewards/chosen": 0.0019833946134895086, + "rewards/margins": 0.002412599278613925, + "rewards/rejected": -0.00042920451960526407, "step": 190 }, { "epoch": 0.52, "learning_rate": 2.7718537898066833e-06, - "logits/chosen": -1.9025766849517822, - "logits/rejected": -1.9072151184082031, - "logps/chosen": -31.255901336669922, - "logps/rejected": -32.79901885986328, - "loss": 2464.7859, - "rewards/accuracies": 0.6625000238418579, - "rewards/chosen": 0.0035038013011217117, - "rewards/margins": 0.0035846265964210033, - "rewards/rejected": -8.082549902610481e-05, + "logits/chosen": -1.8985168933868408, + "logits/rejected": -1.903148889541626, + "logps/chosen": -31.30405616760254, + "logps/rejected": -32.838443756103516, + "loss": 0.9965, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": 0.0030222723726183176, + "rewards/margins": 0.0034973658621311188, + "rewards/rejected": -0.0004750936641357839, "step": 200 }, { "epoch": 0.52, - "eval_logits/chosen": -2.229053497314453, - "eval_logits/rejected": -2.224233865737915, - "eval_logps/chosen": -34.033775329589844, - "eval_logps/rejected": -37.55736541748047, - "eval_loss": 2496.0791015625, - "eval_rewards/accuracies": 0.5544019937515259, - "eval_rewards/chosen": 7.761791493976489e-06, - "eval_rewards/margins": 0.00041520988452248275, - "eval_rewards/rejected": -0.00040744812577031553, - "eval_runtime": 145.4716, - "eval_samples_per_second": 2.358, - "eval_steps_per_second": 0.296, + "eval_logits/chosen": -2.225177764892578, + "eval_logits/rejected": -2.2203547954559326, + "eval_logps/chosen": -34.06184387207031, + "eval_logps/rejected": -37.579010009765625, + "eval_loss": 0.9996482133865356, + "eval_rewards/accuracies": 0.5070598125457764, + "eval_rewards/chosen": -0.0002729461120907217, + "eval_rewards/margins": 0.00035095339990220964, + "eval_rewards/rejected": -0.0006238995119929314, + "eval_runtime": 145.704, + "eval_samples_per_second": 2.354, + "eval_steps_per_second": 0.295, "step": 200 }, { "epoch": 0.55, "learning_rate": 2.5453962426402006e-06, - "logits/chosen": -2.0153257846832275, - "logits/rejected": -2.0259604454040527, - "logps/chosen": -31.77630615234375, - "logps/rejected": -33.9268798828125, - "loss": 2474.5629, + "logits/chosen": -2.011120557785034, + "logits/rejected": -2.021751880645752, + "logps/chosen": -31.745685577392578, + "logps/rejected": -33.96772003173828, + "loss": 0.9967, "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.001949988305568695, - "rewards/margins": 0.0025917969178408384, - "rewards/rejected": -0.0006418084958568215, + "rewards/chosen": 0.0022561827208846807, + "rewards/margins": 0.003306365106254816, + "rewards/rejected": -0.0010501822689548135, "step": 210 }, { "epoch": 0.57, "learning_rate": 2.3185646976551794e-06, - "logits/chosen": -1.907790184020996, - "logits/rejected": -1.9225451946258545, - "logps/chosen": -29.77730941772461, - "logps/rejected": -31.612323760986328, - "loss": 2461.7975, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.0033422994893044233, - "rewards/margins": 0.0038837480824440718, - "rewards/rejected": -0.0005414488259702921, + "logits/chosen": -1.903857946395874, + "logits/rejected": -1.918621301651001, + "logps/chosen": -29.797290802001953, + "logps/rejected": -31.628814697265625, + "loss": 0.9962, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 0.0031425058841705322, + "rewards/margins": 0.0038488968275487423, + "rewards/rejected": -0.0007063907687552273, "step": 220 }, { "epoch": 0.6, "learning_rate": 2.0932279108998323e-06, - "logits/chosen": -1.9650691747665405, - "logits/rejected": -1.9690206050872803, - "logps/chosen": -33.07447052001953, - "logps/rejected": -31.645030975341797, - "loss": 2457.1672, - "rewards/accuracies": 0.6875, - "rewards/chosen": 0.003429980482906103, - "rewards/margins": 0.004387288354337215, - "rewards/rejected": -0.0009573075803928077, + "logits/chosen": -1.9593979120254517, + "logits/rejected": -1.9633464813232422, + "logps/chosen": -33.067623138427734, + "logps/rejected": -31.64206886291504, + "loss": 0.9956, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": 0.003498472273349762, + "rewards/margins": 0.004426136147230864, + "rewards/rejected": -0.0009276636992581189, "step": 230 }, { "epoch": 0.62, "learning_rate": 1.8712423238279358e-06, - "logits/chosen": -1.9625787734985352, - "logits/rejected": -1.9408048391342163, - "logps/chosen": -33.812347412109375, - "logps/rejected": -35.121795654296875, - "loss": 2449.7848, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.0034140206407755613, - "rewards/margins": 0.005124006420373917, - "rewards/rejected": -0.0017099861288443208, + "logits/chosen": -1.9572632312774658, + "logits/rejected": -1.9354908466339111, + "logps/chosen": -33.843727111816406, + "logps/rejected": -35.1453742980957, + "loss": 0.995, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0031002266332507133, + "rewards/margins": 0.0050460235215723515, + "rewards/rejected": -0.0019457967719063163, "step": 240 }, { "epoch": 0.65, "learning_rate": 1.6544367689701824e-06, - "logits/chosen": -2.003685712814331, - "logits/rejected": -2.000408172607422, - "logps/chosen": -32.71784210205078, - "logps/rejected": -36.25305938720703, - "loss": 2476.9369, - "rewards/accuracies": 0.612500011920929, - "rewards/chosen": 0.002072283299639821, - "rewards/margins": 0.0023536235094070435, - "rewards/rejected": -0.00028134050080552697, + "logits/chosen": -1.9997854232788086, + "logits/rejected": -1.9964803457260132, + "logps/chosen": -32.75019454956055, + "logps/rejected": -36.28661346435547, + "loss": 0.9976, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": 0.0017487213481217623, + "rewards/margins": 0.0023655896075069904, + "rewards/rejected": -0.0006168682011775672, "step": 250 }, { "epoch": 0.68, "learning_rate": 1.4445974030621963e-06, - "logits/chosen": -1.8708940744400024, - "logits/rejected": -1.8684980869293213, - "logps/chosen": -33.97399139404297, - "logps/rejected": -35.522247314453125, - "loss": 2477.3357, - "rewards/accuracies": 0.625, - "rewards/chosen": 0.0021441043354570866, - "rewards/margins": 0.002320351079106331, - "rewards/rejected": -0.00017624672909732908, + "logits/chosen": -1.8673791885375977, + "logits/rejected": -1.8649587631225586, + "logps/chosen": -34.018226623535156, + "logps/rejected": -35.539276123046875, + "loss": 0.998, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.001701725646853447, + "rewards/margins": 0.002048287307843566, + "rewards/rejected": -0.0003465614281594753, "step": 260 }, { "epoch": 0.7, "learning_rate": 1.243452991757889e-06, - "logits/chosen": -1.8561140298843384, - "logits/rejected": -1.8537418842315674, - "logps/chosen": -34.15688705444336, - "logps/rejected": -31.835697174072266, - "loss": 2470.1545, - "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.0024070844519883394, - "rewards/margins": 0.003050738014280796, - "rewards/rejected": -0.0006436532130464911, + "logits/chosen": -1.8522275686264038, + "logits/rejected": -1.849872350692749, + "logps/chosen": -34.16339874267578, + "logps/rejected": -31.845317840576172, + "loss": 0.9969, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": 0.0023420110810548067, + "rewards/margins": 0.0030819105450063944, + "rewards/rejected": -0.0007398994639515877, "step": 270 }, { "epoch": 0.73, "learning_rate": 1.0526606671603523e-06, - "logits/chosen": -1.959524154663086, - "logits/rejected": -1.9490426778793335, - "logps/chosen": -34.99895477294922, - "logps/rejected": -31.8908634185791, - "loss": 2459.8076, - "rewards/accuracies": 0.6875, - "rewards/chosen": 0.003576862858608365, - "rewards/margins": 0.004075545351952314, - "rewards/rejected": -0.0004986823769286275, + "logits/chosen": -1.9549518823623657, + "logits/rejected": -1.94447922706604, + "logps/chosen": -35.027687072753906, + "logps/rejected": -31.895471572875977, + "loss": 0.9962, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.003289591521024704, + "rewards/margins": 0.003834384260699153, + "rewards/rejected": -0.0005447928560897708, "step": 280 }, { "epoch": 0.75, "learning_rate": 8.737922755071455e-07, - "logits/chosen": -2.0554285049438477, - "logits/rejected": -2.0405356884002686, - "logps/chosen": -30.697057723999023, - "logps/rejected": -32.610191345214844, - "loss": 2482.0641, + "logits/chosen": -2.0498766899108887, + "logits/rejected": -2.034980297088623, + "logps/chosen": -30.72440528869629, + "logps/rejected": -32.658695220947266, + "loss": 0.9979, "rewards/accuracies": 0.637499988079071, - "rewards/chosen": 0.0022706831805408, - "rewards/margins": 0.0018542330944910645, - "rewards/rejected": 0.0004164502606727183, + "rewards/chosen": 0.0019971781875938177, + "rewards/margins": 0.002065772656351328, + "rewards/rejected": -6.859400309622288e-05, "step": 290 }, { "epoch": 0.78, "learning_rate": 7.08321427484816e-07, - "logits/chosen": -1.9246467351913452, - "logits/rejected": -1.922141671180725, - "logps/chosen": -32.302886962890625, - "logps/rejected": -30.90523338317871, - "loss": 2430.5529, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.0062422603368759155, - "rewards/margins": 0.007126508746296167, - "rewards/rejected": -0.0008842485258355737, + "logits/chosen": -1.9201946258544922, + "logits/rejected": -1.9177051782608032, + "logps/chosen": -32.3183479309082, + "logps/rejected": -30.95510482788086, + "loss": 0.9925, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": 0.006087628658860922, + "rewards/margins": 0.007470599375665188, + "rewards/rejected": -0.0013829706003889441, "step": 300 }, { "epoch": 0.78, - "eval_logits/chosen": -2.22615647315979, - "eval_logits/rejected": -2.2213311195373535, - "eval_logps/chosen": -34.06288146972656, - "eval_logps/rejected": -37.59059524536133, - "eval_loss": 2495.66796875, - "eval_rewards/accuracies": 0.5390365719795227, - "eval_rewards/chosen": -0.0002833307080436498, - "eval_rewards/margins": 0.00045641581527888775, - "eval_rewards/rejected": -0.0007397464942187071, - "eval_runtime": 145.8977, + "eval_logits/chosen": -2.2213804721832275, + "eval_logits/rejected": -2.216555118560791, + "eval_logps/chosen": -34.083614349365234, + "eval_logps/rejected": -37.60634994506836, + "eval_loss": 0.999591052532196, + "eval_rewards/accuracies": 0.559385359287262, + "eval_rewards/chosen": -0.0004906260874122381, + "eval_rewards/margins": 0.00040669209556654096, + "eval_rewards/rejected": -0.0008973181829787791, + "eval_runtime": 145.8707, "eval_samples_per_second": 2.351, "eval_steps_per_second": 0.295, "step": 300 }, { "epoch": 0.81, - "learning_rate": 4.84533120650964e-06, - "logits/chosen": -1.9104417562484741, - "logits/rejected": -1.9072014093399048, - "logps/chosen": -31.30303955078125, - "logps/rejected": -33.819358825683594, - "loss": 2463.1744, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.0030030703637748957, - "rewards/margins": 0.003775153774768114, - "rewards/rejected": -0.0007720834692008793, + "learning_rate": 5.576113578589035e-07, + "logits/chosen": -1.9060642719268799, + "logits/rejected": -1.9028133153915405, + "logps/chosen": -31.319162368774414, + "logps/rejected": -33.85043716430664, + "loss": 0.9961, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 0.002841859357431531, + "rewards/margins": 0.003924719989299774, + "rewards/rejected": -0.0010828599333763123, "step": 310 }, { "epoch": 0.83, - "learning_rate": 4.825108134172131e-06, - "logits/chosen": -1.9579178094863892, - "logits/rejected": -1.9457557201385498, - "logps/chosen": -34.26006317138672, - "logps/rejected": -33.66352462768555, - "loss": 2454.818, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.0033601075410842896, - "rewards/margins": 0.004606915637850761, - "rewards/rejected": -0.0012468084460124373, + "learning_rate": 4.229036944380913e-07, + "logits/chosen": -1.9553836584091187, + "logits/rejected": -1.9432109594345093, + "logps/chosen": -34.27588653564453, + "logps/rejected": -33.672359466552734, + "loss": 0.9955, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.0032019000500440598, + "rewards/margins": 0.004537059459835291, + "rewards/rejected": -0.0013351596426218748, "step": 320 }, { "epoch": 0.86, - "learning_rate": 4.80369052967602e-06, - "logits/chosen": -1.991537094116211, - "logits/rejected": -1.9901418685913086, - "logps/chosen": -33.10230255126953, - "logps/rejected": -32.55553436279297, - "loss": 2455.166, - "rewards/accuracies": 0.675000011920929, - "rewards/chosen": 0.003834925591945648, - "rewards/margins": 0.004594448953866959, - "rewards/rejected": -0.0007595239439979196, + "learning_rate": 3.053082288996112e-07, + "logits/chosen": -1.9905990362167358, + "logits/rejected": -1.9891618490219116, + "logps/chosen": -33.116233825683594, + "logps/rejected": -32.55724334716797, + "loss": 0.9955, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": 0.0036955769173800945, + "rewards/margins": 0.004472161643207073, + "rewards/rejected": -0.0007765850750729442, "step": 330 }, { "epoch": 0.88, - "learning_rate": 4.781089396387968e-06, - "logits/chosen": -2.0774741172790527, - "logits/rejected": -2.0618669986724854, - "logps/chosen": -33.6904182434082, - "logps/rejected": -33.073814392089844, - "loss": 2456.2992, - "rewards/accuracies": 0.625, - "rewards/chosen": 0.004889755509793758, - "rewards/margins": 0.004447542130947113, - "rewards/rejected": 0.0004422132042236626, + "learning_rate": 2.0579377374915805e-07, + "logits/chosen": -2.0769362449645996, + "logits/rejected": -2.0613036155700684, + "logps/chosen": -33.791297912597656, + "logps/rejected": -33.12422180175781, + "loss": 0.9961, + "rewards/accuracies": 0.699999988079071, + "rewards/chosen": 0.003880967851728201, + "rewards/margins": 0.00394281093031168, + "rewards/rejected": -6.184288213262334e-05, "step": 340 }, { "epoch": 0.91, - "learning_rate": 4.757316345716554e-06, - "logits/chosen": -1.9498752355575562, - "logits/rejected": -1.9490633010864258, - "logps/chosen": -32.76622009277344, - "logps/rejected": -32.49995040893555, - "loss": 2446.7852, - "rewards/accuracies": 0.699999988079071, - "rewards/chosen": 0.00515871262177825, - "rewards/margins": 0.005506747402250767, - "rewards/rejected": -0.0003480348386801779, + "learning_rate": 1.2518018074041684e-07, + "logits/chosen": -1.950060248374939, + "logits/rejected": -1.9492241144180298, + "logps/chosen": -32.82404327392578, + "logps/rejected": -32.50709915161133, + "loss": 0.995, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": 0.004580510314553976, + "rewards/margins": 0.005000022705644369, + "rewards/rejected": -0.000419511750806123, "step": 350 }, { "epoch": 0.94, - "learning_rate": 4.73238359114687e-06, - "logits/chosen": -1.9010334014892578, - "logits/rejected": -1.91123366355896, - "logps/chosen": -31.694040298461914, - "logps/rejected": -35.382728576660156, - "loss": 2441.1234, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.005106499884277582, - "rewards/margins": 0.006046179216355085, - "rewards/rejected": -0.0009396795067004859, + "learning_rate": 6.41315865106129e-08, + "logits/chosen": -1.9050449132919312, + "logits/rejected": -1.915305733680725, + "logps/chosen": -31.87860679626465, + "logps/rejected": -35.34981155395508, + "loss": 0.9961, + "rewards/accuracies": 0.6875, + "rewards/chosen": 0.0032608420588076115, + "rewards/margins": 0.0038713677786290646, + "rewards/rejected": -0.0006105261854827404, "step": 360 }, { "epoch": 0.96, - "learning_rate": 4.706303941965804e-06, - "logits/chosen": -2.036052703857422, - "logits/rejected": -2.029733180999756, - "logps/chosen": -33.1943473815918, - "logps/rejected": -29.27004051208496, - "loss": 2450.7623, - "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.004576197825372219, - "rewards/margins": 0.0050123645924031734, - "rewards/rejected": -0.00043616676703095436, + "learning_rate": 2.3150941078050325e-08, + "logits/chosen": -2.04546856880188, + "logits/rejected": -2.039043426513672, + "logps/chosen": -33.336219787597656, + "logps/rejected": -29.269311904907227, + "loss": 0.9964, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": 0.0031574335880577564, + "rewards/margins": 0.003586276201531291, + "rewards/rejected": -0.0004288425261620432, "step": 370 }, { "epoch": 0.99, - "learning_rate": 4.679090796681225e-06, - "logits/chosen": -1.8926509618759155, - "logits/rejected": -1.894890546798706, - "logps/chosen": -33.61520004272461, - "logps/rejected": -30.98312759399414, - "loss": 2428.4018, + "learning_rate": 2.575864278703266e-09, + "logits/chosen": -1.905160665512085, + "logits/rejected": -1.907360315322876, + "logps/chosen": -33.86741256713867, + "logps/rejected": -30.982807159423828, + "loss": 0.9952, "rewards/accuracies": 0.7250000238418579, - "rewards/chosen": 0.006238477770239115, - "rewards/margins": 0.007343468256294727, - "rewards/rejected": -0.0011049896711483598, + "rewards/chosen": 0.0037163912784308195, + "rewards/margins": 0.004818186163902283, + "rewards/rejected": -0.0011017953511327505, "step": 380 }, { - "epoch": 1.01, - "learning_rate": 4.650758136138454e-06, - "logits/chosen": -1.9188745021820068, - "logits/rejected": -1.9176126718521118, - "logps/chosen": -33.695579528808594, - "logps/rejected": -36.02911376953125, - "loss": 2397.0137, - "rewards/accuracies": 0.7291666865348816, - "rewards/chosen": 0.006892119534313679, - "rewards/margins": 0.010623215697705746, - "rewards/rejected": -0.003731096163392067, - "step": 390 - }, - { - "epoch": 1.04, - "learning_rate": 4.621320516337559e-06, - "logits/chosen": -1.8515088558197021, - "logits/rejected": -1.8431167602539062, - "logps/chosen": -30.941198348999023, - "logps/rejected": -36.45293426513672, - "loss": 2370.302, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.008327776566147804, - "rewards/margins": 0.013451090082526207, - "rewards/rejected": -0.005123314447700977, - "step": 400 - }, - { - "epoch": 1.04, - "eval_logits/chosen": -2.199411153793335, - "eval_logits/rejected": -2.194589376449585, - "eval_logps/chosen": -34.099369049072266, - "eval_logps/rejected": -37.661293029785156, - "eval_loss": 2492.46533203125, - "eval_rewards/accuracies": 0.5622923374176025, - "eval_rewards/chosen": -0.0006481813034042716, - "eval_rewards/margins": 0.0007985630072653294, - "eval_rewards/rejected": -0.001446744310669601, - "eval_runtime": 146.2529, - "eval_samples_per_second": 2.345, - "eval_steps_per_second": 0.294, - "step": 400 - }, - { - "epoch": 1.06, - "learning_rate": 4.590793060955158e-06, - "logits/chosen": -2.0204148292541504, - "logits/rejected": -2.023253917694092, - "logps/chosen": -32.13569259643555, - "logps/rejected": -35.30311584472656, - "loss": 2361.9771, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.00918244756758213, - "rewards/margins": 0.014285160228610039, - "rewards/rejected": -0.0051027145236730576, - "step": 410 - }, - { - "epoch": 1.09, - "learning_rate": 4.559191453574582e-06, - "logits/chosen": -1.856715202331543, - "logits/rejected": -1.8553537130355835, - "logps/chosen": -28.340347290039062, - "logps/rejected": -32.772071838378906, - "loss": 2384.5215, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.007272079586982727, - "rewards/margins": 0.01181616447865963, - "rewards/rejected": -0.004544084891676903, - "step": 420 - }, - { - "epoch": 1.12, - "learning_rate": 4.52653192962838e-06, - "logits/chosen": -1.8120425939559937, - "logits/rejected": -1.8051426410675049, - "logps/chosen": -33.048492431640625, - "logps/rejected": -34.51493453979492, - "loss": 2373.559, - "rewards/accuracies": 0.9375, - "rewards/chosen": 0.010255918838083744, - "rewards/margins": 0.012952560558915138, - "rewards/rejected": -0.0026966414880007505, - "step": 430 - }, - { - "epoch": 1.14, - "learning_rate": 4.492831268057307e-06, - "logits/chosen": -1.9794769287109375, - "logits/rejected": -1.9743585586547852, - "logps/chosen": -30.73288345336914, - "logps/rejected": -32.56402587890625, - "loss": 2341.4699, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.010504107922315598, - "rewards/margins": 0.01646825671195984, - "rewards/rejected": -0.0059641506522893906, - "step": 440 - }, - { - "epoch": 1.17, - "learning_rate": 4.458106782690094e-06, - "logits/chosen": -1.8598779439926147, - "logits/rejected": -1.8641446828842163, - "logps/chosen": -33.39701461791992, - "logps/rejected": -33.232383728027344, - "loss": 2329.0439, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.011188305914402008, - "rewards/margins": 0.01762574352324009, - "rewards/rejected": -0.006437439471483231, - "step": 450 - }, - { - "epoch": 1.19, - "learning_rate": 4.422376313348405e-06, - "logits/chosen": -1.8614333868026733, - "logits/rejected": -1.8558467626571655, - "logps/chosen": -34.22340774536133, - "logps/rejected": -35.80681610107422, - "loss": 2302.8248, - "rewards/accuracies": 0.9375, - "rewards/chosen": 0.011961170472204685, - "rewards/margins": 0.020538393408060074, - "rewards/rejected": -0.008577222935855389, - "step": 460 - }, - { - "epoch": 1.22, - "learning_rate": 4.3856582166815696e-06, - "logits/chosen": -1.8815793991088867, - "logits/rejected": -1.8814213275909424, - "logps/chosen": -33.06370544433594, - "logps/rejected": -34.739097595214844, - "loss": 2340.4611, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.011046240106225014, - "rewards/margins": 0.01658240333199501, - "rewards/rejected": -0.005536160431802273, - "step": 470 - }, - { - "epoch": 1.25, - "learning_rate": 4.347971356735789e-06, - "logits/chosen": -1.9247829914093018, - "logits/rejected": -1.9061830043792725, - "logps/chosen": -32.92525863647461, - "logps/rejected": -33.87827682495117, - "loss": 2304.1588, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.012355051003396511, - "rewards/margins": 0.020438065752387047, - "rewards/rejected": -0.008083016611635685, - "step": 480 - }, - { - "epoch": 1.27, - "learning_rate": 4.309335095262675e-06, - "logits/chosen": -1.8873250484466553, - "logits/rejected": -1.886690378189087, - "logps/chosen": -30.484582901000977, - "logps/rejected": -31.771377563476562, - "loss": 2340.8271, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.011267202906310558, - "rewards/margins": 0.016520529985427856, - "rewards/rejected": -0.0052533275447785854, - "step": 490 - }, - { - "epoch": 1.3, - "learning_rate": 4.269769281772082e-06, - "logits/chosen": -1.8447484970092773, - "logits/rejected": -1.837871789932251, - "logps/chosen": -31.42559242248535, - "logps/rejected": -35.48058319091797, - "loss": 2298.9412, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.01244533620774746, - "rewards/margins": 0.02098379284143448, - "rewards/rejected": -0.00853845663368702, - "step": 500 - }, - { - "epoch": 1.3, - "eval_logits/chosen": -2.1343138217926025, - "eval_logits/rejected": -2.12954044342041, - "eval_logps/chosen": -34.28038787841797, - "eval_logps/rejected": -37.88829803466797, - "eval_loss": 2488.40625, - "eval_rewards/accuracies": 0.5772424936294556, - "eval_rewards/chosen": -0.002458348637446761, - "eval_rewards/margins": 0.0012584367068484426, - "eval_rewards/rejected": -0.0037167854607105255, - "eval_runtime": 145.9415, - "eval_samples_per_second": 2.35, - "eval_steps_per_second": 0.295, - "step": 500 - }, - { - "epoch": 1.32, - "learning_rate": 4.22929424333435e-06, - "logits/chosen": -1.8362356424331665, - "logits/rejected": -1.8398487567901611, - "logps/chosen": -28.270023345947266, - "logps/rejected": -33.78419876098633, - "loss": 2323.2496, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.009048042818903923, - "rewards/margins": 0.018297135829925537, - "rewards/rejected": -0.009249093011021614, - "step": 510 - }, - { - "epoch": 1.35, - "learning_rate": 4.1879307741372085e-06, - "logits/chosen": -1.8308753967285156, - "logits/rejected": -1.8415968418121338, - "logps/chosen": -32.14521408081055, - "logps/rejected": -31.652883529663086, - "loss": 2299.3625, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.011171149089932442, - "rewards/margins": 0.021183136850595474, - "rewards/rejected": -0.010011989623308182, - "step": 520 - }, - { - "epoch": 1.38, - "learning_rate": 4.145700124802693e-06, - "logits/chosen": -1.7703996896743774, - "logits/rejected": -1.7680895328521729, - "logps/chosen": -30.59372329711914, - "logps/rejected": -31.122241973876953, - "loss": 2307.9037, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.010733595117926598, - "rewards/margins": 0.020188378170132637, - "rewards/rejected": -0.009454783983528614, - "step": 530 - }, - { - "epoch": 1.4, - "learning_rate": 4.102623991469562e-06, - "logits/chosen": -1.840515375137329, - "logits/rejected": -1.833764672279358, - "logps/chosen": -33.129478454589844, - "logps/rejected": -34.03999328613281, - "loss": 2296.7414, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.01103346236050129, - "rewards/margins": 0.021258534863591194, - "rewards/rejected": -0.010225074365735054, - "step": 540 - }, - { - "epoch": 1.43, - "learning_rate": 4.058724504646834e-06, - "logits/chosen": -1.8037725687026978, - "logits/rejected": -1.8101667165756226, - "logps/chosen": -30.930444717407227, - "logps/rejected": -33.56714630126953, - "loss": 2343.0631, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.008227399550378323, - "rewards/margins": 0.016311541199684143, - "rewards/rejected": -0.008084140717983246, - "step": 550 - }, - { - "epoch": 1.45, - "learning_rate": 4.014024217844167e-06, - "logits/chosen": -1.8711225986480713, - "logits/rejected": -1.8482850790023804, - "logps/chosen": -30.459259033203125, - "logps/rejected": -33.72909927368164, - "loss": 2335.1771, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.009783074259757996, - "rewards/margins": 0.01711348444223404, - "rewards/rejected": -0.007330409251153469, - "step": 560 - }, - { - "epoch": 1.48, - "learning_rate": 3.968546095984911e-06, - "logits/chosen": -1.8007291555404663, - "logits/rejected": -1.7958128452301025, - "logps/chosen": -31.415090560913086, - "logps/rejected": -32.90663528442383, - "loss": 2330.2, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.010016413405537605, - "rewards/margins": 0.01787360943853855, - "rewards/rejected": -0.007857195101678371, - "step": 570 - }, - { - "epoch": 1.51, - "learning_rate": 3.922313503607806e-06, - "logits/chosen": -1.83207106590271, - "logits/rejected": -1.8339207172393799, - "logps/chosen": -33.55345153808594, - "logps/rejected": -36.1082763671875, - "loss": 2297.7742, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.008223430253565311, - "rewards/margins": 0.02129700407385826, - "rewards/rejected": -0.013073575682938099, - "step": 580 - }, - { - "epoch": 1.53, - "learning_rate": 3.875350192863368e-06, - "logits/chosen": -1.812063217163086, - "logits/rejected": -1.811581015586853, - "logps/chosen": -29.506006240844727, - "logps/rejected": -32.62559127807617, - "loss": 2286.0877, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.010860500857234001, - "rewards/margins": 0.022500045597553253, - "rewards/rejected": -0.011639544740319252, - "step": 590 - }, - { - "epoch": 1.56, - "learning_rate": 3.8276802913111436e-06, - "logits/chosen": -1.8164310455322266, - "logits/rejected": -1.8141977787017822, - "logps/chosen": -31.94429588317871, - "logps/rejected": -33.383872985839844, - "loss": 2298.7582, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.010539887472987175, - "rewards/margins": 0.021379008889198303, - "rewards/rejected": -0.010839122347533703, - "step": 600 - }, - { - "epoch": 1.56, - "eval_logits/chosen": -2.063612222671509, - "eval_logits/rejected": -2.058927059173584, - "eval_logps/chosen": -34.559974670410156, - "eval_logps/rejected": -38.285404205322266, - "eval_loss": 2477.658935546875, - "eval_rewards/accuracies": 0.6121262311935425, - "eval_rewards/chosen": -0.005254245828837156, - "eval_rewards/margins": 0.0024335861671715975, - "eval_rewards/rejected": -0.007687832228839397, - "eval_runtime": 145.6811, - "eval_samples_per_second": 2.354, - "eval_steps_per_second": 0.295, - "step": 600 - }, - { - "epoch": 1.58, - "learning_rate": 3.7793282895240927e-06, - "logits/chosen": -1.847764015197754, - "logits/rejected": -1.8541465997695923, - "logps/chosen": -31.449474334716797, - "logps/rejected": -33.3134880065918, - "loss": 2305.692, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.007879074662923813, - "rewards/margins": 0.02038208767771721, - "rewards/rejected": -0.012503013014793396, - "step": 610 - }, - { - "epoch": 1.61, - "learning_rate": 3.730319028506478e-06, - "logits/chosen": -1.7965530157089233, - "logits/rejected": -1.7944053411483765, - "logps/chosen": -33.688297271728516, - "logps/rejected": -32.105934143066406, - "loss": 2292.9066, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.010675834491848946, - "rewards/margins": 0.021880075335502625, - "rewards/rejected": -0.011204240843653679, - "step": 620 - }, - { - "epoch": 1.64, - "learning_rate": 3.6806776869317074e-06, - "logits/chosen": -1.7377105951309204, - "logits/rejected": -1.731245756149292, - "logps/chosen": -34.34708786010742, - "logps/rejected": -33.66561508178711, - "loss": 2271.1943, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.011171405203640461, - "rewards/margins": 0.024255482479929924, - "rewards/rejected": -0.013084076344966888, - "step": 630 - }, - { - "epoch": 1.66, - "learning_rate": 3.6304297682067146e-06, - "logits/chosen": -1.7545562982559204, - "logits/rejected": -1.7608686685562134, - "logps/chosen": -33.110076904296875, - "logps/rejected": -34.38447189331055, - "loss": 2307.9031, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.008692274801433086, - "rewards/margins": 0.02013438567519188, - "rewards/rejected": -0.011442111805081367, - "step": 640 - }, - { - "epoch": 1.69, - "learning_rate": 3.579601087369492e-06, - "logits/chosen": -1.8283579349517822, - "logits/rejected": -1.8423779010772705, - "logps/chosen": -31.087310791015625, - "logps/rejected": -33.21766662597656, - "loss": 2311.0535, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.007651531603187323, - "rewards/margins": 0.01985129900276661, - "rewards/rejected": -0.012199767865240574, - "step": 650 - }, - { - "epoch": 1.71, - "learning_rate": 3.5282177578265295e-06, - "logits/chosen": -1.6931848526000977, - "logits/rejected": -1.6900306940078735, - "logps/chosen": -32.68722152709961, - "logps/rejected": -36.44025421142578, - "loss": 2222.7857, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.012357336468994617, - "rewards/margins": 0.029757345095276833, - "rewards/rejected": -0.01740000769495964, - "step": 660 - }, - { - "epoch": 1.74, - "learning_rate": 3.476306177936961e-06, - "logits/chosen": -1.7785106897354126, - "logits/rejected": -1.7785335779190063, - "logps/chosen": -30.625701904296875, - "logps/rejected": -35.58719253540039, - "loss": 2272.5373, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.006598903331905603, - "rewards/margins": 0.024207040667533875, - "rewards/rejected": -0.01760813593864441, - "step": 670 - }, - { - "epoch": 1.77, - "learning_rate": 3.423893017450324e-06, - "logits/chosen": -1.7213503122329712, - "logits/rejected": -1.718073844909668, - "logps/chosen": -30.16400146484375, - "logps/rejected": -34.405784606933594, - "loss": 2284.7229, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.007083491422235966, - "rewards/margins": 0.02296331152319908, - "rewards/rejected": -0.01587982103228569, - "step": 680 - }, - { - "epoch": 1.79, - "learning_rate": 3.3710052038048794e-06, - "logits/chosen": -1.7414871454238892, - "logits/rejected": -1.7416623830795288, - "logps/chosen": -29.0936279296875, - "logps/rejected": -32.20520782470703, - "loss": 2256.3047, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.010694684460759163, - "rewards/margins": 0.026041794568300247, - "rewards/rejected": -0.015347110107541084, - "step": 690 - }, - { - "epoch": 1.82, - "learning_rate": 3.3176699082935546e-06, - "logits/chosen": -1.660274863243103, - "logits/rejected": -1.663644552230835, - "logps/chosen": -33.33858108520508, - "logps/rejected": -33.01979064941406, - "loss": 2254.2998, - "rewards/accuracies": 0.7124999761581421, - "rewards/chosen": 0.011398938484489918, - "rewards/margins": 0.026949990540742874, - "rewards/rejected": -0.015551051124930382, - "step": 700 - }, - { - "epoch": 1.82, - "eval_logits/chosen": -2.0085763931274414, - "eval_logits/rejected": -2.0039827823638916, - "eval_logps/chosen": -34.992069244384766, - "eval_logps/rejected": -38.73301696777344, - "eval_loss": 2477.662353515625, - "eval_rewards/accuracies": 0.5539867281913757, - "eval_rewards/chosen": -0.009575208649039268, - "eval_rewards/margins": 0.0025887340307235718, - "eval_rewards/rejected": -0.01216394267976284, - "eval_runtime": 145.5671, - "eval_samples_per_second": 2.356, - "eval_steps_per_second": 0.295, - "step": 700 - }, - { - "epoch": 1.84, - "learning_rate": 3.2639145321045933e-06, - "logits/chosen": -1.7369210720062256, - "logits/rejected": -1.7286014556884766, - "logps/chosen": -35.7460823059082, - "logps/rejected": -33.445213317871094, - "loss": 2282.4748, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.0076593635603785515, - "rewards/margins": 0.02305331453680992, - "rewards/rejected": -0.015393950045108795, - "step": 710 - }, - { - "epoch": 1.87, - "learning_rate": 3.2097666922441107e-06, - "logits/chosen": -1.7424733638763428, - "logits/rejected": -1.7439861297607422, - "logps/chosen": -35.72047424316406, - "logps/rejected": -34.96687698364258, - "loss": 2256.5887, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.007149122655391693, - "rewards/margins": 0.026057641953229904, - "rewards/rejected": -0.01890851929783821, - "step": 720 - }, - { - "epoch": 1.9, - "learning_rate": 3.1552542073477554e-06, - "logits/chosen": -1.7609472274780273, - "logits/rejected": -1.7586179971694946, - "logps/chosen": -31.435550689697266, - "logps/rejected": -34.51602554321289, - "loss": 2254.2758, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.011277793906629086, - "rewards/margins": 0.026469092816114426, - "rewards/rejected": -0.015191297046840191, - "step": 730 - }, - { - "epoch": 1.92, - "learning_rate": 3.100405083388799e-06, - "logits/chosen": -1.733371376991272, - "logits/rejected": -1.7385714054107666, - "logps/chosen": -30.687509536743164, - "logps/rejected": -34.88238525390625, - "loss": 2235.6635, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.010982171632349491, - "rewards/margins": 0.028257867321372032, - "rewards/rejected": -0.017275694757699966, - "step": 740 - }, - { - "epoch": 1.95, - "learning_rate": 3.0452474992899645e-06, - "logits/chosen": -1.686703085899353, - "logits/rejected": -1.68540358543396, - "logps/chosen": -32.28951644897461, - "logps/rejected": -36.70497512817383, - "loss": 2236.9605, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.008354658260941505, - "rewards/margins": 0.028544824570417404, - "rewards/rejected": -0.020190168172121048, - "step": 750 - }, - { - "epoch": 1.97, - "learning_rate": 2.989809792446417e-06, - "logits/chosen": -1.5596857070922852, - "logits/rejected": -1.5550428628921509, - "logps/chosen": -34.99411392211914, - "logps/rejected": -37.3930778503418, - "loss": 2205.4355, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.010043250396847725, - "rewards/margins": 0.0318898968398571, - "rewards/rejected": -0.021846650168299675, - "step": 760 - }, - { - "epoch": 2.0, - "learning_rate": 2.9341204441673267e-06, - "logits/chosen": -1.6859843730926514, - "logits/rejected": -1.690326452255249, - "logps/chosen": -34.527626037597656, - "logps/rejected": -35.3222541809082, - "loss": 2274.9828, - "rewards/accuracies": 0.7916666269302368, - "rewards/chosen": 0.006860324647277594, - "rewards/margins": 0.024134492501616478, - "rewards/rejected": -0.017274167388677597, - "step": 770 - }, - { - "epoch": 2.03, - "learning_rate": 2.878208065043501e-06, - "logits/chosen": -1.633776068687439, - "logits/rejected": -1.632145643234253, - "logps/chosen": -32.2863883972168, - "logps/rejected": -37.45145797729492, - "loss": 2069.3428, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.01615205779671669, - "rewards/margins": 0.04662010073661804, - "rewards/rejected": -0.030468037351965904, - "step": 780 - }, - { - "epoch": 2.05, - "learning_rate": 2.8221013802485974e-06, - "logits/chosen": -1.6747426986694336, - "logits/rejected": -1.6725879907608032, - "logps/chosen": -31.845199584960938, - "logps/rejected": -35.52173614501953, - "loss": 2131.4416, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.015305367298424244, - "rewards/margins": 0.039657000452280045, - "rewards/rejected": -0.024351635947823524, - "step": 790 - }, - { - "epoch": 2.08, - "learning_rate": 2.76582921478147e-06, - "logits/chosen": -1.597703218460083, - "logits/rejected": -1.592248797416687, - "logps/chosen": -33.2725830078125, - "logps/rejected": -33.86598205566406, - "loss": 2173.4393, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.011104973964393139, - "rewards/margins": 0.03512220084667206, - "rewards/rejected": -0.024017225950956345, - "step": 800 - }, - { - "epoch": 2.08, - "eval_logits/chosen": -1.970989465713501, - "eval_logits/rejected": -1.9664607048034668, - "eval_logps/chosen": -35.21713638305664, - "eval_logps/rejected": -39.03861618041992, - "eval_loss": 2470.59033203125, - "eval_rewards/accuracies": 0.5568937063217163, - "eval_rewards/chosen": -0.011825831606984138, - "eval_rewards/margins": 0.003394143423065543, - "eval_rewards/rejected": -0.015219975262880325, - "eval_runtime": 145.776, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.295, - "step": 800 - }, - { - "epoch": 2.1, - "learning_rate": 2.7094204786572254e-06, - "logits/chosen": -1.6900947093963623, - "logits/rejected": -1.6973447799682617, - "logps/chosen": -30.748676300048828, - "logps/rejected": -37.139137268066406, - "loss": 2112.8461, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.01379583589732647, - "rewards/margins": 0.04262876883149147, - "rewards/rejected": -0.02883293107151985, - "step": 810 - }, - { - "epoch": 2.13, - "learning_rate": 2.6529041520546072e-06, - "logits/chosen": -1.6644665002822876, - "logits/rejected": -1.666691541671753, - "logps/chosen": -31.436620712280273, - "logps/rejected": -35.15371322631836, - "loss": 2218.1766, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.011344591155648232, - "rewards/margins": 0.030583670362830162, - "rewards/rejected": -0.01923907920718193, - "step": 820 - }, - { - "epoch": 2.16, - "learning_rate": 2.5963092704273302e-06, - "logits/chosen": -1.5569541454315186, - "logits/rejected": -1.5611451864242554, - "logps/chosen": -31.492889404296875, - "logps/rejected": -37.86591339111328, - "loss": 2126.5043, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.010985675267875195, - "rewards/margins": 0.04054299369454384, - "rewards/rejected": -0.02955731749534607, - "step": 830 - }, - { - "epoch": 2.18, - "learning_rate": 2.53966490958702e-06, - "logits/chosen": -1.6258825063705444, - "logits/rejected": -1.6220991611480713, - "logps/chosen": -31.927608489990234, - "logps/rejected": -35.32285690307617, - "loss": 2187.909, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.010410415939986706, - "rewards/margins": 0.033692531287670135, - "rewards/rejected": -0.023282116279006004, - "step": 840 - }, - { - "epoch": 2.21, - "learning_rate": 2.4830001707654135e-06, - "logits/chosen": -1.6901594400405884, - "logits/rejected": -1.6924806833267212, - "logps/chosen": -31.357311248779297, - "logps/rejected": -38.720489501953125, - "loss": 2089.2604, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.014180228114128113, - "rewards/margins": 0.04476577043533325, - "rewards/rejected": -0.03058554232120514, - "step": 850 - }, - { - "epoch": 2.23, - "learning_rate": 2.4263441656635054e-06, - "logits/chosen": -1.5084383487701416, - "logits/rejected": -1.5038378238677979, - "logps/chosen": -35.46610641479492, - "logps/rejected": -35.76911544799805, - "loss": 2161.0381, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.00815567746758461, - "rewards/margins": 0.03692782670259476, - "rewards/rejected": -0.02877214550971985, - "step": 860 - }, - { - "epoch": 2.26, - "learning_rate": 2.3697260014953107e-06, - "logits/chosen": -1.5508906841278076, - "logits/rejected": -1.5508755445480347, - "logps/chosen": -34.66786575317383, - "logps/rejected": -37.92455291748047, - "loss": 2108.2408, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.011749391444027424, - "rewards/margins": 0.042532261461019516, - "rewards/rejected": -0.03078286722302437, - "step": 870 - }, - { - "epoch": 2.29, - "learning_rate": 2.3131747660339396e-06, - "logits/chosen": -1.5863968133926392, - "logits/rejected": -1.5747534036636353, - "logps/chosen": -32.88094711303711, - "logps/rejected": -36.167869567871094, - "loss": 2086.3512, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.012256348505616188, - "rewards/margins": 0.04471471160650253, - "rewards/rejected": -0.032458364963531494, - "step": 880 - }, - { - "epoch": 2.31, - "learning_rate": 2.256719512667651e-06, - "logits/chosen": -1.6844234466552734, - "logits/rejected": -1.6891088485717773, - "logps/chosen": -32.565818786621094, - "logps/rejected": -36.00508499145508, - "loss": 2111.5299, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.00764118880033493, - "rewards/margins": 0.04332723096013069, - "rewards/rejected": -0.03568603843450546, - "step": 890 - }, - { - "epoch": 2.34, - "learning_rate": 2.2003892454735786e-06, - "logits/chosen": -1.6079037189483643, - "logits/rejected": -1.6007716655731201, - "logps/chosen": -33.532958984375, - "logps/rejected": -35.84773254394531, - "loss": 2065.36, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.013211173936724663, - "rewards/margins": 0.047667164355516434, - "rewards/rejected": -0.03445599228143692, - "step": 900 - }, - { - "epoch": 2.34, - "eval_logits/chosen": -1.9166266918182373, - "eval_logits/rejected": -1.9121689796447754, - "eval_logps/chosen": -35.597782135009766, - "eval_logps/rejected": -39.466796875, - "eval_loss": 2467.73291015625, - "eval_rewards/accuracies": 0.5598006844520569, - "eval_rewards/chosen": -0.01563231088221073, - "eval_rewards/margins": 0.003869474632665515, - "eval_rewards/rejected": -0.019501786679029465, - "eval_runtime": 145.8623, - "eval_samples_per_second": 2.352, - "eval_steps_per_second": 0.295, - "step": 900 - }, - { - "epoch": 2.36, - "learning_rate": 2.1442129043167877e-06, - "logits/chosen": -1.6010173559188843, - "logits/rejected": -1.6011197566986084, - "logps/chosen": -30.0789794921875, - "logps/rejected": -38.56249237060547, - "loss": 2050.1326, - "rewards/accuracies": 0.9375, - "rewards/chosen": 0.010881805792450905, - "rewards/margins": 0.04960983246564865, - "rewards/rejected": -0.0387280248105526, - "step": 910 - }, - { - "epoch": 2.39, - "learning_rate": 2.088219349982323e-06, - "logits/chosen": -1.5475326776504517, - "logits/rejected": -1.5394935607910156, - "logps/chosen": -31.198156356811523, - "logps/rejected": -37.26829147338867, - "loss": 2106.8049, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.006407910026609898, - "rewards/margins": 0.04337712749838829, - "rewards/rejected": -0.03696921840310097, - "step": 920 - }, - { - "epoch": 2.42, - "learning_rate": 2.0324373493478803e-06, - "logits/chosen": -1.7085663080215454, - "logits/rejected": -1.7077171802520752, - "logps/chosen": -29.088947296142578, - "logps/rejected": -36.169918060302734, - "loss": 2121.3875, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.009080884046852589, - "rewards/margins": 0.04213147610425949, - "rewards/rejected": -0.03305059298872948, - "step": 930 - }, - { - "epoch": 2.44, - "learning_rate": 1.976895560604729e-06, - "logits/chosen": -1.5875581502914429, - "logits/rejected": -1.597825527191162, - "logps/chosen": -33.56743621826172, - "logps/rejected": -36.87736511230469, - "loss": 2078.3281, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.008576255291700363, - "rewards/margins": 0.04745348542928696, - "rewards/rejected": -0.038877226412296295, - "step": 940 - }, - { - "epoch": 2.47, - "learning_rate": 1.921622518534466e-06, - "logits/chosen": -1.6309471130371094, - "logits/rejected": -1.6342302560806274, - "logps/chosen": -30.122472763061523, - "logps/rejected": -35.17578887939453, - "loss": 2139.8102, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.005252503324300051, - "rewards/margins": 0.03964962065219879, - "rewards/rejected": -0.03439711779356003, - "step": 950 - }, - { - "epoch": 2.49, - "learning_rate": 1.8666466198491794e-06, - "logits/chosen": -1.6128339767456055, - "logits/rejected": -1.6084800958633423, - "logps/chosen": -33.1923713684082, - "logps/rejected": -37.553993225097656, - "loss": 2098.0742, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.008828431367874146, - "rewards/margins": 0.04530448839068413, - "rewards/rejected": -0.03647606074810028, - "step": 960 - }, - { - "epoch": 2.52, - "learning_rate": 1.8119961086025376e-06, - "logits/chosen": -1.532700538635254, - "logits/rejected": -1.5348151922225952, - "logps/chosen": -31.818435668945312, - "logps/rejected": -38.84191131591797, - "loss": 2077.2633, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.009943163953721523, - "rewards/margins": 0.04654636234045029, - "rewards/rejected": -0.03660320118069649, - "step": 970 - }, - { - "epoch": 2.55, - "learning_rate": 1.7576990616793139e-06, - "logits/chosen": -1.5599863529205322, - "logits/rejected": -1.5538241863250732, - "logps/chosen": -35.342994689941406, - "logps/rejected": -40.327735900878906, - "loss": 2127.4625, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.0006207667174749076, - "rewards/margins": 0.041278596967458725, - "rewards/rejected": -0.040657833218574524, - "step": 980 - }, - { - "epoch": 2.57, - "learning_rate": 1.7037833743707892e-06, - "logits/chosen": -1.5475926399230957, - "logits/rejected": -1.5419275760650635, - "logps/chosen": -30.222143173217773, - "logps/rejected": -39.89192581176758, - "loss": 2074.9148, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.007808062247931957, - "rewards/margins": 0.046828486025333405, - "rewards/rejected": -0.03902042657136917, - "step": 990 - }, - { - "epoch": 2.6, - "learning_rate": 1.6502767460434588e-06, - "logits/chosen": -1.5285046100616455, - "logits/rejected": -1.51847505569458, - "logps/chosen": -31.368820190429688, - "logps/rejected": -32.55973815917969, - "loss": 2196.3246, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.0037986873649060726, - "rewards/margins": 0.033398739993572235, - "rewards/rejected": -0.029600050300359726, - "step": 1000 - }, - { - "epoch": 2.6, - "eval_logits/chosen": -1.8825738430023193, - "eval_logits/rejected": -1.878185510635376, - "eval_logps/chosen": -35.89006423950195, - "eval_logps/rejected": -39.809085845947266, - "eval_loss": 2464.3681640625, - "eval_rewards/accuracies": 0.5510797500610352, - "eval_rewards/chosen": -0.01855510286986828, - "eval_rewards/margins": 0.004369591362774372, - "eval_rewards/rejected": -0.022924695163965225, - "eval_runtime": 145.9322, - "eval_samples_per_second": 2.35, - "eval_steps_per_second": 0.295, - "step": 1000 - }, - { - "epoch": 2.62, - "learning_rate": 1.5972066659083796e-06, - "logits/chosen": -1.6132911443710327, - "logits/rejected": -1.612853765487671, - "logps/chosen": -31.150531768798828, - "logps/rejected": -33.747276306152344, - "loss": 2132.9832, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.006736672017723322, - "rewards/margins": 0.04162462800741196, - "rewards/rejected": -0.03488795459270477, - "step": 1010 - }, - { - "epoch": 2.65, - "learning_rate": 1.5446003988985041e-06, - "logits/chosen": -1.6631847620010376, - "logits/rejected": -1.663709044456482, - "logps/chosen": -31.313705444335938, - "logps/rejected": -34.68809127807617, - "loss": 2119.0834, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.006298714783042669, - "rewards/margins": 0.04152151942253113, - "rewards/rejected": -0.03522280603647232, - "step": 1020 - }, - { - "epoch": 2.68, - "learning_rate": 1.4924849716612211e-06, - "logits/chosen": -1.6204774379730225, - "logits/rejected": -1.6248018741607666, - "logps/chosen": -31.891056060791016, - "logps/rejected": -30.86574363708496, - "loss": 2204.4455, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.003890159772709012, - "rewards/margins": 0.03271043300628662, - "rewards/rejected": -0.028820272535085678, - "step": 1030 - }, - { - "epoch": 2.7, - "learning_rate": 1.440887158673332e-06, - "logits/chosen": -1.6208614110946655, - "logits/rejected": -1.6127662658691406, - "logps/chosen": -30.53921127319336, - "logps/rejected": -37.73412322998047, - "loss": 2060.6334, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.006386814173310995, - "rewards/margins": 0.048725761473178864, - "rewards/rejected": -0.04233894869685173, - "step": 1040 - }, - { - "epoch": 2.73, - "learning_rate": 1.3898334684855647e-06, - "logits/chosen": -1.572749137878418, - "logits/rejected": -1.5833861827850342, - "logps/chosen": -33.21635437011719, - "logps/rejected": -35.97629928588867, - "loss": 2125.9623, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.003367505269125104, - "rewards/margins": 0.04068039730191231, - "rewards/rejected": -0.03731289133429527, - "step": 1050 - }, - { - "epoch": 2.75, - "learning_rate": 1.3393501301037245e-06, - "logits/chosen": -1.6456438302993774, - "logits/rejected": -1.6367809772491455, - "logps/chosen": -32.83705139160156, - "logps/rejected": -41.05707550048828, - "loss": 2036.1875, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.00663726544007659, - "rewards/margins": 0.05389411002397537, - "rewards/rejected": -0.047256845980882645, - "step": 1060 - }, - { - "epoch": 2.78, - "learning_rate": 1.2894630795134454e-06, - "logits/chosen": -1.5514529943466187, - "logits/rejected": -1.5537471771240234, - "logps/chosen": -34.93277359008789, - "logps/rejected": -36.4191780090332, - "loss": 2069.123, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.010565127246081829, - "rewards/margins": 0.04777819663286209, - "rewards/rejected": -0.03721306473016739, - "step": 1070 - }, - { - "epoch": 2.81, - "learning_rate": 1.2401979463554984e-06, - "logits/chosen": -1.6648813486099243, - "logits/rejected": -1.6647241115570068, - "logps/chosen": -32.27571487426758, - "logps/rejected": -38.67870330810547, - "loss": 2022.6195, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.007819265127182007, - "rewards/margins": 0.053449440747499466, - "rewards/rejected": -0.04563017934560776, - "step": 1080 - }, - { - "epoch": 2.83, - "learning_rate": 1.1915800407584705e-06, - "logits/chosen": -1.6493892669677734, - "logits/rejected": -1.6531116962432861, - "logps/chosen": -30.350088119506836, - "logps/rejected": -37.505104064941406, - "loss": 2092.1893, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.005432260222733021, - "rewards/margins": 0.04500500112771988, - "rewards/rejected": -0.03957274183630943, - "step": 1090 - }, - { - "epoch": 2.86, - "learning_rate": 1.1436343403356019e-06, - "logits/chosen": -1.637351632118225, - "logits/rejected": -1.642260193824768, - "logps/chosen": -33.443363189697266, - "logps/rejected": -33.12295913696289, - "loss": 2237.6512, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.0012564079370349646, - "rewards/margins": 0.028469255194067955, - "rewards/rejected": -0.027212847024202347, - "step": 1100 - }, - { - "epoch": 2.86, - "eval_logits/chosen": -1.8689898252487183, - "eval_logits/rejected": -1.8646091222763062, - "eval_logps/chosen": -35.997581481933594, - "eval_logps/rejected": -39.92316436767578, - "eval_loss": 2464.299072265625, - "eval_rewards/accuracies": 0.545265793800354, - "eval_rewards/chosen": -0.01963029056787491, - "eval_rewards/margins": 0.004435177426785231, - "eval_rewards/rejected": -0.024065470322966576, - "eval_runtime": 145.934, - "eval_samples_per_second": 2.35, - "eval_steps_per_second": 0.295, - "step": 1100 - }, - { - "epoch": 2.88, - "learning_rate": 1.0963854773524548e-06, - "logits/chosen": -1.6270654201507568, - "logits/rejected": -1.6279323101043701, - "logps/chosen": -31.9213809967041, - "logps/rejected": -34.26952362060547, - "loss": 2115.159, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.010187694802880287, - "rewards/margins": 0.042723797261714935, - "rewards/rejected": -0.032536108046770096, - "step": 1110 - }, - { - "epoch": 2.91, - "learning_rate": 1.049857726072005e-06, - "logits/chosen": -1.481483817100525, - "logits/rejected": -1.4840071201324463, - "logps/chosen": -33.81633377075195, - "logps/rejected": -36.6799201965332, - "loss": 2111.7611, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.007486463990062475, - "rewards/margins": 0.04387300834059715, - "rewards/rejected": -0.03638654574751854, - "step": 1120 - }, - { - "epoch": 2.94, - "learning_rate": 1.0040749902836508e-06, - "logits/chosen": -1.5083402395248413, - "logits/rejected": -1.5064888000488281, - "logps/chosen": -30.776952743530273, - "logps/rejected": -34.5029182434082, - "loss": 2184.3811, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": 0.002347666770219803, - "rewards/margins": 0.03571944683790207, - "rewards/rejected": -0.03337177634239197, - "step": 1130 - }, - { - "epoch": 2.96, - "learning_rate": 9.59060791022566e-07, - "logits/chosen": -1.641847848892212, - "logits/rejected": -1.637139916419983, - "logps/chosen": -31.925174713134766, - "logps/rejected": -36.643123626708984, - "loss": 2063.1439, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.011146209202706814, - "rewards/margins": 0.04803737998008728, - "rewards/rejected": -0.03689116612076759, - "step": 1140 - }, - { - "epoch": 2.99, - "learning_rate": 9.148382544856885e-07, - "logits/chosen": -1.5244739055633545, - "logits/rejected": -1.5154194831848145, - "logps/chosen": -33.11268615722656, - "logps/rejected": -34.98213195800781, - "loss": 2134.0221, - "rewards/accuracies": 0.8125, - "rewards/chosen": 0.0033905128948390484, - "rewards/margins": 0.0402878001332283, - "rewards/rejected": -0.03689728304743767, - "step": 1150 - }, - { - "epoch": 3.01, - "learning_rate": 8.714301001505568e-07, - "logits/chosen": -1.5689246654510498, - "logits/rejected": -1.569645643234253, - "logps/chosen": -33.039424896240234, - "logps/rejected": -34.57393264770508, - "loss": 2133.5518, - "rewards/accuracies": 0.8416666984558105, - "rewards/chosen": 0.0064233215525746346, - "rewards/margins": 0.04014817252755165, - "rewards/rejected": -0.03372485190629959, - "step": 1160 - }, - { - "epoch": 3.04, - "learning_rate": 8.288586291031025e-07, - "logits/chosen": -1.6524006128311157, - "logits/rejected": -1.6470394134521484, - "logps/chosen": -33.036277770996094, - "logps/rejected": -36.106807708740234, - "loss": 2169.9916, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.004896899685263634, - "rewards/margins": 0.03655281290411949, - "rewards/rejected": -0.03165591508150101, - "step": 1170 - }, - { - "epoch": 3.06, - "learning_rate": 7.871457125803897e-07, - "logits/chosen": -1.5274744033813477, - "logits/rejected": -1.5358660221099854, - "logps/chosen": -33.17569351196289, - "logps/rejected": -35.91423416137695, - "loss": 2159.4746, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.0015730734448879957, - "rewards/margins": 0.03710102289915085, - "rewards/rejected": -0.03552795201539993, - "step": 1180 - }, - { - "epoch": 3.09, - "learning_rate": 7.463127807341966e-07, - "logits/chosen": -1.5684562921524048, - "logits/rejected": -1.5627862215042114, - "logps/chosen": -31.11408042907715, - "logps/rejected": -37.03162384033203, - "loss": 2063.1006, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.010595379397273064, - "rewards/margins": 0.04807712510228157, - "rewards/rejected": -0.03748174011707306, - "step": 1190 - }, - { - "epoch": 3.12, - "learning_rate": 7.063808116212021e-07, - "logits/chosen": -1.5203880071640015, - "logits/rejected": -1.522077202796936, - "logps/chosen": -32.758827209472656, - "logps/rejected": -37.34809112548828, - "loss": 2032.5133, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.007316121365875006, - "rewards/margins": 0.05319654941558838, - "rewards/rejected": -0.045880429446697235, - "step": 1200 - }, - { - "epoch": 3.12, - "eval_logits/chosen": -1.866162657737732, - "eval_logits/rejected": -1.861803650856018, - "eval_logps/chosen": -36.02009963989258, - "eval_logps/rejected": -39.944732666015625, - "eval_loss": 2464.46533203125, - "eval_rewards/accuracies": 0.5598006844520569, - "eval_rewards/chosen": -0.019855517894029617, - "eval_rewards/margins": 0.004425638820976019, - "eval_rewards/rejected": -0.024281155318021774, - "eval_runtime": 145.911, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.295, - "step": 1200 - }, - { - "epoch": 3.14, - "learning_rate": 6.673703204254348e-07, - "logits/chosen": -1.466104507446289, - "logits/rejected": -1.4655678272247314, - "logps/chosen": -34.974365234375, - "logps/rejected": -36.99479293823242, - "loss": 2027.0777, - "rewards/accuracies": 0.9125000238418579, - "rewards/chosen": 0.010781234130263329, - "rewards/margins": 0.05363558605313301, - "rewards/rejected": -0.04285435378551483, - "step": 1210 - }, - { - "epoch": 3.17, - "learning_rate": 6.293013489185315e-07, - "logits/chosen": -1.6160688400268555, - "logits/rejected": -1.6096382141113281, - "logps/chosen": -31.019649505615234, - "logps/rejected": -37.333335876464844, - "loss": 2040.7531, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.006840378977358341, - "rewards/margins": 0.05138836055994034, - "rewards/rejected": -0.04454797878861427, - "step": 1220 - }, - { - "epoch": 3.19, - "learning_rate": 5.921934551632086e-07, - "logits/chosen": -1.4815315008163452, - "logits/rejected": -1.4708069562911987, - "logps/chosen": -33.21098327636719, - "logps/rejected": -37.002418518066406, - "loss": 2020.3844, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.011746999807655811, - "rewards/margins": 0.05283288285136223, - "rewards/rejected": -0.041085876524448395, - "step": 1230 - }, - { - "epoch": 3.22, - "learning_rate": 5.560657034652405e-07, - "logits/chosen": -1.5710773468017578, - "logits/rejected": -1.565071702003479, - "logps/chosen": -30.515600204467773, - "logps/rejected": -32.57416534423828, - "loss": 2164.5504, - "rewards/accuracies": 0.75, - "rewards/chosen": 0.001846333616413176, - "rewards/margins": 0.03891240432858467, - "rewards/rejected": -0.03706606850028038, - "step": 1240 - }, - { - "epoch": 3.25, - "learning_rate": 5.2093665457911e-07, - "logits/chosen": -1.586578130722046, - "logits/rejected": -1.5945528745651245, - "logps/chosen": -34.664546966552734, - "logps/rejected": -34.95591354370117, - "loss": 2119.1492, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.00730844447389245, - "rewards/margins": 0.04168447107076645, - "rewards/rejected": -0.034376028925180435, - "step": 1250 - }, - { - "epoch": 3.27, - "learning_rate": 4.868243561723535e-07, - "logits/chosen": -1.5772063732147217, - "logits/rejected": -1.577383041381836, - "logps/chosen": -32.863037109375, - "logps/rejected": -37.241397857666016, - "loss": 2082.3426, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.0016552206361666322, - "rewards/margins": 0.045920491218566895, - "rewards/rejected": -0.044265273958444595, - "step": 1260 - }, - { - "epoch": 3.3, - "learning_rate": 4.537463335535161e-07, - "logits/chosen": -1.5012638568878174, - "logits/rejected": -1.5000605583190918, - "logps/chosen": -32.05634689331055, - "logps/rejected": -37.78838348388672, - "loss": 2023.8406, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.010344896465539932, - "rewards/margins": 0.05302266404032707, - "rewards/rejected": -0.04267776757478714, - "step": 1270 - }, - { - "epoch": 3.32, - "learning_rate": 4.217195806684629e-07, - "logits/chosen": -1.4007158279418945, - "logits/rejected": -1.3967105150222778, - "logps/chosen": -34.35404586791992, - "logps/rejected": -34.51953887939453, - "loss": 2094.5336, - "rewards/accuracies": 0.7875000238418579, - "rewards/chosen": 0.008833990432322025, - "rewards/margins": 0.04476445913314819, - "rewards/rejected": -0.03593046963214874, - "step": 1280 - }, - { - "epoch": 3.35, - "learning_rate": 3.907605513696808e-07, - "logits/chosen": -1.5938528776168823, - "logits/rejected": -1.5794765949249268, - "logps/chosen": -34.033790588378906, - "logps/rejected": -39.640159606933594, - "loss": 2033.0867, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.00313788210041821, - "rewards/margins": 0.05188627913594246, - "rewards/rejected": -0.04874839633703232, - "step": 1290 - }, - { - "epoch": 3.38, - "learning_rate": 3.6088515096305675e-07, - "logits/chosen": -1.5395238399505615, - "logits/rejected": -1.5438177585601807, - "logps/chosen": -32.82494354248047, - "logps/rejected": -41.31450653076172, - "loss": 1967.852, - "rewards/accuracies": 0.9624999761581421, - "rewards/chosen": 0.006926815025508404, - "rewards/margins": 0.059039629995822906, - "rewards/rejected": -0.05211281776428223, - "step": 1300 - }, - { - "epoch": 3.38, - "eval_logits/chosen": -1.8651809692382812, - "eval_logits/rejected": -1.8608282804489136, - "eval_logps/chosen": -36.00510025024414, - "eval_logps/rejected": -39.96323013305664, - "eval_loss": 2461.20361328125, - "eval_rewards/accuracies": 0.5539867281913757, - "eval_rewards/chosen": -0.01970548741519451, - "eval_rewards/margins": 0.004760634154081345, - "eval_rewards/rejected": -0.024466121569275856, - "eval_runtime": 145.7832, - "eval_samples_per_second": 2.353, - "eval_steps_per_second": 0.295, - "step": 1300 - }, - { - "epoch": 3.4, - "learning_rate": 3.321087280364757e-07, - "logits/chosen": -1.519902229309082, - "logits/rejected": -1.520318627357483, - "logps/chosen": -35.439937591552734, - "logps/rejected": -41.7154426574707, - "loss": 2029.7744, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.008040779270231724, - "rewards/margins": 0.054354071617126465, - "rewards/rejected": -0.046313293278217316, - "step": 1310 - }, - { - "epoch": 3.43, - "learning_rate": 3.044460665744284e-07, - "logits/chosen": -1.601548194885254, - "logits/rejected": -1.6004295349121094, - "logps/chosen": -31.515766143798828, - "logps/rejected": -35.187618255615234, - "loss": 2068.9873, - "rewards/accuracies": 0.8500000238418579, - "rewards/chosen": 0.005780898500233889, - "rewards/margins": 0.04812353104352951, - "rewards/rejected": -0.04234262555837631, - "step": 1320 - }, - { - "epoch": 3.45, - "learning_rate": 2.779113783626916e-07, - "logits/chosen": -1.521244764328003, - "logits/rejected": -1.5227617025375366, - "logps/chosen": -33.48499298095703, - "logps/rejected": -37.73661804199219, - "loss": 2050.5445, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.008892608806490898, - "rewards/margins": 0.04977206513285637, - "rewards/rejected": -0.04087945073843002, - "step": 1330 - }, - { - "epoch": 3.48, - "learning_rate": 2.5251829568697204e-07, - "logits/chosen": -1.5785353183746338, - "logits/rejected": -1.577487587928772, - "logps/chosen": -30.377460479736328, - "logps/rejected": -35.70696258544922, - "loss": 2086.1504, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.008212093263864517, - "rewards/margins": 0.04536201059818268, - "rewards/rejected": -0.03714991733431816, - "step": 1340 - }, - { - "epoch": 3.51, - "learning_rate": 2.2827986432927774e-07, - "logits/chosen": -1.5923887491226196, - "logits/rejected": -1.5776017904281616, - "logps/chosen": -34.099876403808594, - "logps/rejected": -41.499168395996094, - "loss": 2014.8814, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.002167941303923726, - "rewards/margins": 0.05451526492834091, - "rewards/rejected": -0.052347324788570404, - "step": 1350 - }, - { - "epoch": 3.53, - "learning_rate": 2.0520853686560177e-07, - "logits/chosen": -1.5695116519927979, - "logits/rejected": -1.5808777809143066, - "logps/chosen": -31.036209106445312, - "logps/rejected": -36.333526611328125, - "loss": 2047.0217, - "rewards/accuracies": 0.8374999761581421, - "rewards/chosen": 0.010322622954845428, - "rewards/margins": 0.05126044154167175, - "rewards/rejected": -0.04093782603740692, - "step": 1360 - }, - { - "epoch": 3.56, - "learning_rate": 1.833161662683672e-07, - "logits/chosen": -1.6847254037857056, - "logits/rejected": -1.6842361688613892, - "logps/chosen": -30.7957763671875, - "logps/rejected": -41.20909881591797, - "loss": 1917.1037, - "rewards/accuracies": 0.949999988079071, - "rewards/chosen": 0.01234652940183878, - "rewards/margins": 0.06612871587276459, - "rewards/rejected": -0.05378218740224838, - "step": 1370 - }, - { - "epoch": 3.58, - "learning_rate": 1.626139998169246e-07, - "logits/chosen": -1.5517163276672363, - "logits/rejected": -1.55906081199646, - "logps/chosen": -33.17098617553711, - "logps/rejected": -42.29136657714844, - "loss": 1991.1635, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.010094953700900078, - "rewards/margins": 0.05851215124130249, - "rewards/rejected": -0.04841719567775726, - "step": 1380 - }, - { - "epoch": 3.61, - "learning_rate": 1.4311267331922535e-07, - "logits/chosen": -1.509541392326355, - "logits/rejected": -1.5056698322296143, - "logps/chosen": -33.6932258605957, - "logps/rejected": -35.26158905029297, - "loss": 2073.9217, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.010739867575466633, - "rewards/margins": 0.04738181084394455, - "rewards/rejected": -0.03664194419980049, - "step": 1390 - }, - { - "epoch": 3.64, - "learning_rate": 1.2482220564763669e-07, - "logits/chosen": -1.6483919620513916, - "logits/rejected": -1.6464850902557373, - "logps/chosen": -30.515304565429688, - "logps/rejected": -35.92851257324219, - "loss": 2084.0914, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.007812450639903545, - "rewards/margins": 0.045652881264686584, - "rewards/rejected": -0.03784043341875076, - "step": 1400 - }, - { - "epoch": 3.64, - "eval_logits/chosen": -1.8656350374221802, - "eval_logits/rejected": -1.8612688779830933, - "eval_logps/chosen": -36.0242919921875, - "eval_logps/rejected": -39.967525482177734, - "eval_loss": 2462.612060546875, - "eval_rewards/accuracies": 0.5598006844520569, - "eval_rewards/chosen": -0.019897375255823135, - "eval_rewards/margins": 0.00461164116859436, - "eval_rewards/rejected": -0.024509014561772346, - "eval_runtime": 145.8827, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.295, - "step": 1400 - }, - { - "epoch": 3.66, - "learning_rate": 1.0775199359171346e-07, - "logits/chosen": -1.603907823562622, - "logits/rejected": -1.5997169017791748, - "logps/chosen": -32.91996765136719, - "logps/rejected": -33.04853820800781, - "loss": 2124.1023, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.00722561776638031, - "rewards/margins": 0.042166419327259064, - "rewards/rejected": -0.034940801560878754, - "step": 1410 - }, - { - "epoch": 3.69, - "learning_rate": 9.191080703056604e-08, - "logits/chosen": -1.5563673973083496, - "logits/rejected": -1.5573166608810425, - "logps/chosen": -32.476478576660156, - "logps/rejected": -38.40611267089844, - "loss": 2100.1363, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.008883940055966377, - "rewards/margins": 0.04370499402284622, - "rewards/rejected": -0.034821052104234695, - "step": 1420 - }, - { - "epoch": 3.71, - "learning_rate": 7.730678442730539e-08, - "logits/chosen": -1.5083153247833252, - "logits/rejected": -1.5021404027938843, - "logps/chosen": -33.119529724121094, - "logps/rejected": -41.40352249145508, - "loss": 2006.5551, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.010222419165074825, - "rewards/margins": 0.057363539934158325, - "rewards/rejected": -0.047141119837760925, - "step": 1430 - }, - { - "epoch": 3.74, - "learning_rate": 6.394742864787806e-08, - "logits/chosen": -1.5188112258911133, - "logits/rejected": -1.5131750106811523, - "logps/chosen": -28.496017456054688, - "logps/rejected": -35.698753356933594, - "loss": 2071.1072, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.008172214962542057, - "rewards/margins": 0.047853223979473114, - "rewards/rejected": -0.03968100994825363, - "step": 1440 - }, - { - "epoch": 3.77, - "learning_rate": 5.183960310644748e-08, - "logits/chosen": -1.5658049583435059, - "logits/rejected": -1.5554416179656982, - "logps/chosen": -32.46862030029297, - "logps/rejected": -39.8042106628418, - "loss": 2087.5406, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.00012906994379591197, - "rewards/margins": 0.04531756415963173, - "rewards/rejected": -0.04518849402666092, - "step": 1450 - }, - { - "epoch": 3.79, - "learning_rate": 4.098952823928693e-08, - "logits/chosen": -1.5264514684677124, - "logits/rejected": -1.5234899520874023, - "logps/chosen": -32.81951141357422, - "logps/rejected": -34.28097915649414, - "loss": 2147.4246, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.004902643617242575, - "rewards/margins": 0.03880416229367256, - "rewards/rejected": -0.03390152007341385, - "step": 1460 - }, - { - "epoch": 3.82, - "learning_rate": 3.1402778309014284e-08, - "logits/chosen": -1.5864768028259277, - "logits/rejected": -1.592008352279663, - "logps/chosen": -30.852685928344727, - "logps/rejected": -36.960899353027344, - "loss": 2034.7088, - "rewards/accuracies": 0.925000011920929, - "rewards/chosen": 0.012164896354079247, - "rewards/margins": 0.05214967206120491, - "rewards/rejected": -0.03998477756977081, - "step": 1470 - }, - { - "epoch": 3.84, - "learning_rate": 2.3084278540791427e-08, - "logits/chosen": -1.5875871181488037, - "logits/rejected": -1.5970607995986938, - "logps/chosen": -30.86104393005371, - "logps/rejected": -33.266883850097656, - "loss": 2088.916, - "rewards/accuracies": 0.887499988079071, - "rewards/chosen": 0.008342106826603413, - "rewards/margins": 0.044725269079208374, - "rewards/rejected": -0.03638315945863724, - "step": 1480 - }, - { - "epoch": 3.87, - "learning_rate": 1.6038302591975807e-08, - "logits/chosen": -1.5217053890228271, - "logits/rejected": -1.5153101682662964, - "logps/chosen": -33.224308013916016, - "logps/rejected": -36.06281661987305, - "loss": 2109.2895, - "rewards/accuracies": 0.862500011920929, - "rewards/chosen": 0.004321801941841841, - "rewards/margins": 0.042845211923122406, - "rewards/rejected": -0.03852340579032898, - "step": 1490 - }, - { - "epoch": 3.9, - "learning_rate": 1.0268470356514237e-08, - "logits/chosen": -1.5790612697601318, - "logits/rejected": -1.5761630535125732, - "logps/chosen": -33.064552307128906, - "logps/rejected": -37.95295333862305, - "loss": 2032.7156, - "rewards/accuracies": 0.875, - "rewards/chosen": 0.005610200576484203, - "rewards/margins": 0.05253750830888748, - "rewards/rejected": -0.046927306801080704, - "step": 1500 - }, - { - "epoch": 3.9, - "eval_logits/chosen": -1.8648215532302856, - "eval_logits/rejected": -1.8604679107666016, - "eval_logps/chosen": -36.019161224365234, - "eval_logps/rejected": -39.95817184448242, - "eval_loss": 2463.105712890625, - "eval_rewards/accuracies": 0.565614640712738, - "eval_rewards/chosen": -0.019846076145768166, - "eval_rewards/margins": 0.004569429438561201, - "eval_rewards/rejected": -0.024415504187345505, - "eval_runtime": 145.9094, - "eval_samples_per_second": 2.351, - "eval_steps_per_second": 0.295, - "step": 1500 - }, - { - "epoch": 3.92, - "learning_rate": 5.777746105209147e-09, - "logits/chosen": -1.645821213722229, - "logits/rejected": -1.6461843252182007, - "logps/chosen": -28.770349502563477, - "logps/rejected": -36.86784744262695, - "loss": 2038.2139, - "rewards/accuracies": 0.762499988079071, - "rewards/chosen": 0.010459242388606071, - "rewards/margins": 0.05173317715525627, - "rewards/rejected": -0.04127394035458565, - "step": 1510 - }, - { - "epoch": 3.95, - "learning_rate": 2.5684369628148352e-09, - "logits/chosen": -1.5073591470718384, - "logits/rejected": -1.5066778659820557, - "logps/chosen": -32.11809158325195, - "logps/rejected": -37.76689910888672, - "loss": 2069.6059, - "rewards/accuracies": 0.824999988079071, - "rewards/chosen": 0.00923779234290123, - "rewards/margins": 0.04816945642232895, - "rewards/rejected": -0.03893166407942772, - "step": 1520 - }, - { - "epoch": 3.97, - "learning_rate": 6.421917227455999e-10, - "logits/chosen": -1.6536405086517334, - "logits/rejected": -1.6510140895843506, - "logps/chosen": -30.886306762695312, - "logps/rejected": -35.38301467895508, - "loss": 2094.1572, - "rewards/accuracies": 0.8999999761581421, - "rewards/chosen": 0.0058775185607373714, - "rewards/margins": 0.04487111419439316, - "rewards/rejected": -0.03899358958005905, - "step": 1530 - }, - { - "epoch": 4.0, - "learning_rate": 0.0, - "logits/chosen": -1.638646125793457, - "logits/rejected": -1.6395971775054932, - "logps/chosen": -30.43972396850586, - "logps/rejected": -33.02666473388672, - "loss": 2186.526, - "rewards/accuracies": 0.82916659116745, - "rewards/chosen": 0.001802150160074234, - "rewards/margins": 0.034166958183050156, - "rewards/rejected": -0.03236480802297592, - "step": 1540 - }, - { - "epoch": 4.0, - "step": 1540, + "epoch": 1.0, + "step": 385, "total_flos": 0.0, - "train_loss": 1756.4536297686689, - "train_runtime": 10797.265, - "train_samples_per_second": 1.141, - "train_steps_per_second": 0.143 + "train_loss": 0.9973225085766284, + "train_runtime": 3253.1307, + "train_samples_per_second": 0.946, + "train_steps_per_second": 0.118 } ], "logging_steps": 10, - "max_steps": 1540, + "max_steps": 385, "num_input_tokens_seen": 0, - "num_train_epochs": 4, + "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 4,