|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 2500.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.866841197013855, |
|
"logits/rejected": -1.871166467666626, |
|
"logps/chosen": -36.98617172241211, |
|
"logps/rejected": -33.65531539916992, |
|
"loss": 2495.4616, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.00020427265553735197, |
|
"rewards/margins": 0.00045667175436392426, |
|
"rewards/rejected": -0.0002523990988265723, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.997936487197876, |
|
"logits/rejected": -2.0005903244018555, |
|
"logps/chosen": -29.64678382873535, |
|
"logps/rejected": -29.045034408569336, |
|
"loss": 2502.3262, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -4.586054274113849e-05, |
|
"rewards/margins": -0.00022994528990238905, |
|
"rewards/rejected": 0.0001840847689891234, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9207321405410767, |
|
"logits/rejected": -1.9180399179458618, |
|
"logps/chosen": -31.407222747802734, |
|
"logps/rejected": -33.223663330078125, |
|
"loss": 2498.6508, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 8.869935118127614e-05, |
|
"rewards/margins": 0.00014070476754568517, |
|
"rewards/rejected": -5.200541272643022e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0177226066589355, |
|
"logits/rejected": -2.0089757442474365, |
|
"logps/chosen": -32.58082962036133, |
|
"logps/rejected": -32.527244567871094, |
|
"loss": 2498.9926, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -4.022592111141421e-05, |
|
"rewards/margins": 0.00010554380423855036, |
|
"rewards/rejected": -0.00014576970716007054, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8629518747329712, |
|
"logits/rejected": -1.8521617650985718, |
|
"logps/chosen": -33.5596923828125, |
|
"logps/rejected": -35.45528793334961, |
|
"loss": 2499.9863, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -2.213427796959877e-05, |
|
"rewards/margins": 7.289124368980993e-06, |
|
"rewards/rejected": -2.9423434170894325e-05, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9416849613189697, |
|
"logits/rejected": -1.9436241388320923, |
|
"logps/chosen": -32.546897888183594, |
|
"logps/rejected": -33.21548843383789, |
|
"loss": 2490.4672, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0005328331026248634, |
|
"rewards/margins": 0.00097393908072263, |
|
"rewards/rejected": -0.00044110597809776664, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.072330951690674, |
|
"logits/rejected": -2.0772910118103027, |
|
"logps/chosen": -34.00098419189453, |
|
"logps/rejected": -36.63383102416992, |
|
"loss": 2494.9414, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.00012498130672611296, |
|
"rewards/margins": 0.0005246406653895974, |
|
"rewards/rejected": -0.0006496219430118799, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9325841665267944, |
|
"logits/rejected": -1.9357010126113892, |
|
"logps/chosen": -34.33161163330078, |
|
"logps/rejected": -34.630489349365234, |
|
"loss": 2486.8059, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0009619827615097165, |
|
"rewards/margins": 0.0013428140664473176, |
|
"rewards/rejected": -0.0003808312467299402, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9400427341461182, |
|
"logits/rejected": -1.9445598125457764, |
|
"logps/chosen": -32.36492156982422, |
|
"logps/rejected": -32.34357452392578, |
|
"loss": 2491.4584, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.001024983124807477, |
|
"rewards/margins": 0.0008716614102013409, |
|
"rewards/rejected": 0.00015332190378103405, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.037466526031494, |
|
"logits/rejected": -2.0354855060577393, |
|
"logps/chosen": -32.11969757080078, |
|
"logps/rejected": -31.30398178100586, |
|
"loss": 2484.2775, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.001211356371641159, |
|
"rewards/margins": 0.0015890670474618673, |
|
"rewards/rejected": -0.00037771055940538645, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.232342481613159, |
|
"eval_logits/rejected": -2.2275006771087646, |
|
"eval_logps/chosen": -34.01866149902344, |
|
"eval_logps/rejected": -37.52037811279297, |
|
"eval_loss": 2498.15966796875, |
|
"eval_rewards/accuracies": 0.5564784407615662, |
|
"eval_rewards/chosen": 0.00015893821546342224, |
|
"eval_rewards/margins": 0.0001965187693713233, |
|
"eval_rewards/rejected": -3.7580521166091785e-05, |
|
"eval_runtime": 146.0331, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.991633415222168, |
|
"logits/rejected": -1.9892610311508179, |
|
"logps/chosen": -33.10456085205078, |
|
"logps/rejected": -34.01618194580078, |
|
"loss": 2488.0367, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0013925316743552685, |
|
"rewards/margins": 0.0012606054078787565, |
|
"rewards/rejected": 0.00013192615006119013, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.003302812576294, |
|
"logits/rejected": -1.994974136352539, |
|
"logps/chosen": -32.31616973876953, |
|
"logps/rejected": -32.14063262939453, |
|
"loss": 2489.4971, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.001287340302951634, |
|
"rewards/margins": 0.0010786365019157529, |
|
"rewards/rejected": 0.00020870394655503333, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0306484699249268, |
|
"logits/rejected": -2.022704601287842, |
|
"logps/chosen": -30.306324005126953, |
|
"logps/rejected": -32.04903793334961, |
|
"loss": 2483.8781, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.0016939423512667418, |
|
"rewards/margins": 0.0016542377416044474, |
|
"rewards/rejected": 3.9704824303044006e-05, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9617973566055298, |
|
"logits/rejected": -1.9720312356948853, |
|
"logps/chosen": -31.230310440063477, |
|
"logps/rejected": -32.547096252441406, |
|
"loss": 2480.1322, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0018935112748295069, |
|
"rewards/margins": 0.0020156968384981155, |
|
"rewards/rejected": -0.00012218570918776095, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8725357055664062, |
|
"logits/rejected": -1.8737138509750366, |
|
"logps/chosen": -33.889976501464844, |
|
"logps/rejected": -34.795631408691406, |
|
"loss": 2466.4881, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.003027186496183276, |
|
"rewards/margins": 0.0034109093248844147, |
|
"rewards/rejected": -0.0003837232361547649, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9241313934326172, |
|
"logits/rejected": -1.9207313060760498, |
|
"logps/chosen": -35.98552322387695, |
|
"logps/rejected": -32.693538665771484, |
|
"loss": 2484.5627, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0018615787848830223, |
|
"rewards/margins": 0.0015694532776251435, |
|
"rewards/rejected": 0.00029212533263489604, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.023880958557129, |
|
"logits/rejected": -2.0165772438049316, |
|
"logps/chosen": -33.457122802734375, |
|
"logps/rejected": -31.414859771728516, |
|
"loss": 2460.2227, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0034074243158102036, |
|
"rewards/margins": 0.004037545528262854, |
|
"rewards/rejected": -0.0006301216781139374, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.030813455581665, |
|
"logits/rejected": -2.0360608100891113, |
|
"logps/chosen": -32.20356750488281, |
|
"logps/rejected": -32.4092903137207, |
|
"loss": 2473.8211, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0034122199285775423, |
|
"rewards/margins": 0.0026536998338997364, |
|
"rewards/rejected": 0.0007585205021314323, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0314080715179443, |
|
"logits/rejected": -2.0286362171173096, |
|
"logps/chosen": -31.27242088317871, |
|
"logps/rejected": -31.320995330810547, |
|
"loss": 2478.5072, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0021653182338923216, |
|
"rewards/margins": 0.0021931403316557407, |
|
"rewards/rejected": -2.7821719413623214e-05, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9025766849517822, |
|
"logits/rejected": -1.9072151184082031, |
|
"logps/chosen": -31.255901336669922, |
|
"logps/rejected": -32.79901885986328, |
|
"loss": 2464.7859, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0035038013011217117, |
|
"rewards/margins": 0.0035846265964210033, |
|
"rewards/rejected": -8.082549902610481e-05, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.229053497314453, |
|
"eval_logits/rejected": -2.224233865737915, |
|
"eval_logps/chosen": -34.033775329589844, |
|
"eval_logps/rejected": -37.55736541748047, |
|
"eval_loss": 2496.0791015625, |
|
"eval_rewards/accuracies": 0.5544019937515259, |
|
"eval_rewards/chosen": 7.761791493976489e-06, |
|
"eval_rewards/margins": 0.00041520988452248275, |
|
"eval_rewards/rejected": -0.00040744812577031553, |
|
"eval_runtime": 145.4716, |
|
"eval_samples_per_second": 2.358, |
|
"eval_steps_per_second": 0.296, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0153257846832275, |
|
"logits/rejected": -2.0259604454040527, |
|
"logps/chosen": -31.77630615234375, |
|
"logps/rejected": -33.9268798828125, |
|
"loss": 2474.5629, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.001949988305568695, |
|
"rewards/margins": 0.0025917969178408384, |
|
"rewards/rejected": -0.0006418084958568215, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.907790184020996, |
|
"logits/rejected": -1.9225451946258545, |
|
"logps/chosen": -29.77730941772461, |
|
"logps/rejected": -31.612323760986328, |
|
"loss": 2461.7975, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0033422994893044233, |
|
"rewards/margins": 0.0038837480824440718, |
|
"rewards/rejected": -0.0005414488259702921, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9650691747665405, |
|
"logits/rejected": -1.9690206050872803, |
|
"logps/chosen": -33.07447052001953, |
|
"logps/rejected": -31.645030975341797, |
|
"loss": 2457.1672, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.003429980482906103, |
|
"rewards/margins": 0.004387288354337215, |
|
"rewards/rejected": -0.0009573075803928077, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9625787734985352, |
|
"logits/rejected": -1.9408048391342163, |
|
"logps/chosen": -33.812347412109375, |
|
"logps/rejected": -35.121795654296875, |
|
"loss": 2449.7848, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0034140206407755613, |
|
"rewards/margins": 0.005124006420373917, |
|
"rewards/rejected": -0.0017099861288443208, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.003685712814331, |
|
"logits/rejected": -2.000408172607422, |
|
"logps/chosen": -32.71784210205078, |
|
"logps/rejected": -36.25305938720703, |
|
"loss": 2476.9369, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.002072283299639821, |
|
"rewards/margins": 0.0023536235094070435, |
|
"rewards/rejected": -0.00028134050080552697, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8708940744400024, |
|
"logits/rejected": -1.8684980869293213, |
|
"logps/chosen": -33.97399139404297, |
|
"logps/rejected": -35.522247314453125, |
|
"loss": 2477.3357, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0021441043354570866, |
|
"rewards/margins": 0.002320351079106331, |
|
"rewards/rejected": -0.00017624672909732908, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8561140298843384, |
|
"logits/rejected": -1.8537418842315674, |
|
"logps/chosen": -34.15688705444336, |
|
"logps/rejected": -31.835697174072266, |
|
"loss": 2470.1545, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0024070844519883394, |
|
"rewards/margins": 0.003050738014280796, |
|
"rewards/rejected": -0.0006436532130464911, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.959524154663086, |
|
"logits/rejected": -1.9490426778793335, |
|
"logps/chosen": -34.99895477294922, |
|
"logps/rejected": -31.8908634185791, |
|
"loss": 2459.8076, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.003576862858608365, |
|
"rewards/margins": 0.004075545351952314, |
|
"rewards/rejected": -0.0004986823769286275, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0554285049438477, |
|
"logits/rejected": -2.0405356884002686, |
|
"logps/chosen": -30.697057723999023, |
|
"logps/rejected": -32.610191345214844, |
|
"loss": 2482.0641, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0022706831805408, |
|
"rewards/margins": 0.0018542330944910645, |
|
"rewards/rejected": 0.0004164502606727183, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9246467351913452, |
|
"logits/rejected": -1.922141671180725, |
|
"logps/chosen": -32.302886962890625, |
|
"logps/rejected": -30.90523338317871, |
|
"loss": 2430.5529, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.0062422603368759155, |
|
"rewards/margins": 0.007126508746296167, |
|
"rewards/rejected": -0.0008842485258355737, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.22615647315979, |
|
"eval_logits/rejected": -2.2213311195373535, |
|
"eval_logps/chosen": -34.06288146972656, |
|
"eval_logps/rejected": -37.59059524536133, |
|
"eval_loss": 2495.66796875, |
|
"eval_rewards/accuracies": 0.5390365719795227, |
|
"eval_rewards/chosen": -0.0002833307080436498, |
|
"eval_rewards/margins": 0.00045641581527888775, |
|
"eval_rewards/rejected": -0.0007397464942187071, |
|
"eval_runtime": 145.8977, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 4.84533120650964e-06, |
|
"logits/chosen": -1.9104417562484741, |
|
"logits/rejected": -1.9072014093399048, |
|
"logps/chosen": -31.30303955078125, |
|
"logps/rejected": -33.819358825683594, |
|
"loss": 2463.1744, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.0030030703637748957, |
|
"rewards/margins": 0.003775153774768114, |
|
"rewards/rejected": -0.0007720834692008793, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.825108134172131e-06, |
|
"logits/chosen": -1.9579178094863892, |
|
"logits/rejected": -1.9457557201385498, |
|
"logps/chosen": -34.26006317138672, |
|
"logps/rejected": -33.66352462768555, |
|
"loss": 2454.818, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.0033601075410842896, |
|
"rewards/margins": 0.004606915637850761, |
|
"rewards/rejected": -0.0012468084460124373, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 4.80369052967602e-06, |
|
"logits/chosen": -1.991537094116211, |
|
"logits/rejected": -1.9901418685913086, |
|
"logps/chosen": -33.10230255126953, |
|
"logps/rejected": -32.55553436279297, |
|
"loss": 2455.166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.003834925591945648, |
|
"rewards/margins": 0.004594448953866959, |
|
"rewards/rejected": -0.0007595239439979196, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 4.781089396387968e-06, |
|
"logits/chosen": -2.0774741172790527, |
|
"logits/rejected": -2.0618669986724854, |
|
"logps/chosen": -33.6904182434082, |
|
"logps/rejected": -33.073814392089844, |
|
"loss": 2456.2992, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.004889755509793758, |
|
"rewards/margins": 0.004447542130947113, |
|
"rewards/rejected": 0.0004422132042236626, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits/chosen": -1.9498752355575562, |
|
"logits/rejected": -1.9490633010864258, |
|
"logps/chosen": -32.76622009277344, |
|
"logps/rejected": -32.49995040893555, |
|
"loss": 2446.7852, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.00515871262177825, |
|
"rewards/margins": 0.005506747402250767, |
|
"rewards/rejected": -0.0003480348386801779, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.73238359114687e-06, |
|
"logits/chosen": -1.9010334014892578, |
|
"logits/rejected": -1.91123366355896, |
|
"logps/chosen": -31.694040298461914, |
|
"logps/rejected": -35.382728576660156, |
|
"loss": 2441.1234, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.005106499884277582, |
|
"rewards/margins": 0.006046179216355085, |
|
"rewards/rejected": -0.0009396795067004859, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 4.706303941965804e-06, |
|
"logits/chosen": -2.036052703857422, |
|
"logits/rejected": -2.029733180999756, |
|
"logps/chosen": -33.1943473815918, |
|
"logps/rejected": -29.27004051208496, |
|
"loss": 2450.7623, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.004576197825372219, |
|
"rewards/margins": 0.0050123645924031734, |
|
"rewards/rejected": -0.00043616676703095436, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 4.679090796681225e-06, |
|
"logits/chosen": -1.8926509618759155, |
|
"logits/rejected": -1.894890546798706, |
|
"logps/chosen": -33.61520004272461, |
|
"logps/rejected": -30.98312759399414, |
|
"loss": 2428.4018, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.006238477770239115, |
|
"rewards/margins": 0.007343468256294727, |
|
"rewards/rejected": -0.0011049896711483598, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 4.650758136138454e-06, |
|
"logits/chosen": -1.9188745021820068, |
|
"logits/rejected": -1.9176126718521118, |
|
"logps/chosen": -33.695579528808594, |
|
"logps/rejected": -36.02911376953125, |
|
"loss": 2397.0137, |
|
"rewards/accuracies": 0.7291666865348816, |
|
"rewards/chosen": 0.006892119534313679, |
|
"rewards/margins": 0.010623215697705746, |
|
"rewards/rejected": -0.003731096163392067, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 4.621320516337559e-06, |
|
"logits/chosen": -1.8515088558197021, |
|
"logits/rejected": -1.8431167602539062, |
|
"logps/chosen": -30.941198348999023, |
|
"logps/rejected": -36.45293426513672, |
|
"loss": 2370.302, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008327776566147804, |
|
"rewards/margins": 0.013451090082526207, |
|
"rewards/rejected": -0.005123314447700977, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": -2.199411153793335, |
|
"eval_logits/rejected": -2.194589376449585, |
|
"eval_logps/chosen": -34.099369049072266, |
|
"eval_logps/rejected": -37.661293029785156, |
|
"eval_loss": 2492.46533203125, |
|
"eval_rewards/accuracies": 0.5622923374176025, |
|
"eval_rewards/chosen": -0.0006481813034042716, |
|
"eval_rewards/margins": 0.0007985630072653294, |
|
"eval_rewards/rejected": -0.001446744310669601, |
|
"eval_runtime": 146.2529, |
|
"eval_samples_per_second": 2.345, |
|
"eval_steps_per_second": 0.294, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 4.590793060955158e-06, |
|
"logits/chosen": -2.0204148292541504, |
|
"logits/rejected": -2.023253917694092, |
|
"logps/chosen": -32.13569259643555, |
|
"logps/rejected": -35.30311584472656, |
|
"loss": 2361.9771, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00918244756758213, |
|
"rewards/margins": 0.014285160228610039, |
|
"rewards/rejected": -0.0051027145236730576, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits/chosen": -1.856715202331543, |
|
"logits/rejected": -1.8553537130355835, |
|
"logps/chosen": -28.340347290039062, |
|
"logps/rejected": -32.772071838378906, |
|
"loss": 2384.5215, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.007272079586982727, |
|
"rewards/margins": 0.01181616447865963, |
|
"rewards/rejected": -0.004544084891676903, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 4.52653192962838e-06, |
|
"logits/chosen": -1.8120425939559937, |
|
"logits/rejected": -1.8051426410675049, |
|
"logps/chosen": -33.048492431640625, |
|
"logps/rejected": -34.51493453979492, |
|
"loss": 2373.559, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.010255918838083744, |
|
"rewards/margins": 0.012952560558915138, |
|
"rewards/rejected": -0.0026966414880007505, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 4.492831268057307e-06, |
|
"logits/chosen": -1.9794769287109375, |
|
"logits/rejected": -1.9743585586547852, |
|
"logps/chosen": -30.73288345336914, |
|
"logps/rejected": -32.56402587890625, |
|
"loss": 2341.4699, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.010504107922315598, |
|
"rewards/margins": 0.01646825671195984, |
|
"rewards/rejected": -0.0059641506522893906, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 4.458106782690094e-06, |
|
"logits/chosen": -1.8598779439926147, |
|
"logits/rejected": -1.8641446828842163, |
|
"logps/chosen": -33.39701461791992, |
|
"logps/rejected": -33.232383728027344, |
|
"loss": 2329.0439, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.011188305914402008, |
|
"rewards/margins": 0.01762574352324009, |
|
"rewards/rejected": -0.006437439471483231, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 4.422376313348405e-06, |
|
"logits/chosen": -1.8614333868026733, |
|
"logits/rejected": -1.8558467626571655, |
|
"logps/chosen": -34.22340774536133, |
|
"logps/rejected": -35.80681610107422, |
|
"loss": 2302.8248, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.011961170472204685, |
|
"rewards/margins": 0.020538393408060074, |
|
"rewards/rejected": -0.008577222935855389, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 4.3856582166815696e-06, |
|
"logits/chosen": -1.8815793991088867, |
|
"logits/rejected": -1.8814213275909424, |
|
"logps/chosen": -33.06370544433594, |
|
"logps/rejected": -34.739097595214844, |
|
"loss": 2340.4611, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.011046240106225014, |
|
"rewards/margins": 0.01658240333199501, |
|
"rewards/rejected": -0.005536160431802273, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 4.347971356735789e-06, |
|
"logits/chosen": -1.9247829914093018, |
|
"logits/rejected": -1.9061830043792725, |
|
"logps/chosen": -32.92525863647461, |
|
"logps/rejected": -33.87827682495117, |
|
"loss": 2304.1588, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.012355051003396511, |
|
"rewards/margins": 0.020438065752387047, |
|
"rewards/rejected": -0.008083016611635685, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits/chosen": -1.8873250484466553, |
|
"logits/rejected": -1.886690378189087, |
|
"logps/chosen": -30.484582901000977, |
|
"logps/rejected": -31.771377563476562, |
|
"loss": 2340.8271, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.011267202906310558, |
|
"rewards/margins": 0.016520529985427856, |
|
"rewards/rejected": -0.0052533275447785854, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 4.269769281772082e-06, |
|
"logits/chosen": -1.8447484970092773, |
|
"logits/rejected": -1.837871789932251, |
|
"logps/chosen": -31.42559242248535, |
|
"logps/rejected": -35.48058319091797, |
|
"loss": 2298.9412, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.01244533620774746, |
|
"rewards/margins": 0.02098379284143448, |
|
"rewards/rejected": -0.00853845663368702, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_logits/chosen": -2.1343138217926025, |
|
"eval_logits/rejected": -2.12954044342041, |
|
"eval_logps/chosen": -34.28038787841797, |
|
"eval_logps/rejected": -37.88829803466797, |
|
"eval_loss": 2488.40625, |
|
"eval_rewards/accuracies": 0.5772424936294556, |
|
"eval_rewards/chosen": -0.002458348637446761, |
|
"eval_rewards/margins": 0.0012584367068484426, |
|
"eval_rewards/rejected": -0.0037167854607105255, |
|
"eval_runtime": 145.9415, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 4.22929424333435e-06, |
|
"logits/chosen": -1.8362356424331665, |
|
"logits/rejected": -1.8398487567901611, |
|
"logps/chosen": -28.270023345947266, |
|
"logps/rejected": -33.78419876098633, |
|
"loss": 2323.2496, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.009048042818903923, |
|
"rewards/margins": 0.018297135829925537, |
|
"rewards/rejected": -0.009249093011021614, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 4.1879307741372085e-06, |
|
"logits/chosen": -1.8308753967285156, |
|
"logits/rejected": -1.8415968418121338, |
|
"logps/chosen": -32.14521408081055, |
|
"logps/rejected": -31.652883529663086, |
|
"loss": 2299.3625, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.011171149089932442, |
|
"rewards/margins": 0.021183136850595474, |
|
"rewards/rejected": -0.010011989623308182, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 4.145700124802693e-06, |
|
"logits/chosen": -1.7703996896743774, |
|
"logits/rejected": -1.7680895328521729, |
|
"logps/chosen": -30.59372329711914, |
|
"logps/rejected": -31.122241973876953, |
|
"loss": 2307.9037, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.010733595117926598, |
|
"rewards/margins": 0.020188378170132637, |
|
"rewards/rejected": -0.009454783983528614, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 4.102623991469562e-06, |
|
"logits/chosen": -1.840515375137329, |
|
"logits/rejected": -1.833764672279358, |
|
"logps/chosen": -33.129478454589844, |
|
"logps/rejected": -34.03999328613281, |
|
"loss": 2296.7414, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.01103346236050129, |
|
"rewards/margins": 0.021258534863591194, |
|
"rewards/rejected": -0.010225074365735054, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -1.8037725687026978, |
|
"logits/rejected": -1.8101667165756226, |
|
"logps/chosen": -30.930444717407227, |
|
"logps/rejected": -33.56714630126953, |
|
"loss": 2343.0631, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.008227399550378323, |
|
"rewards/margins": 0.016311541199684143, |
|
"rewards/rejected": -0.008084140717983246, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits/chosen": -1.8711225986480713, |
|
"logits/rejected": -1.8482850790023804, |
|
"logps/chosen": -30.459259033203125, |
|
"logps/rejected": -33.72909927368164, |
|
"loss": 2335.1771, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.009783074259757996, |
|
"rewards/margins": 0.01711348444223404, |
|
"rewards/rejected": -0.007330409251153469, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 3.968546095984911e-06, |
|
"logits/chosen": -1.8007291555404663, |
|
"logits/rejected": -1.7958128452301025, |
|
"logps/chosen": -31.415090560913086, |
|
"logps/rejected": -32.90663528442383, |
|
"loss": 2330.2, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.010016413405537605, |
|
"rewards/margins": 0.01787360943853855, |
|
"rewards/rejected": -0.007857195101678371, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 3.922313503607806e-06, |
|
"logits/chosen": -1.83207106590271, |
|
"logits/rejected": -1.8339207172393799, |
|
"logps/chosen": -33.55345153808594, |
|
"logps/rejected": -36.1082763671875, |
|
"loss": 2297.7742, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.008223430253565311, |
|
"rewards/margins": 0.02129700407385826, |
|
"rewards/rejected": -0.013073575682938099, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 3.875350192863368e-06, |
|
"logits/chosen": -1.812063217163086, |
|
"logits/rejected": -1.811581015586853, |
|
"logps/chosen": -29.506006240844727, |
|
"logps/rejected": -32.62559127807617, |
|
"loss": 2286.0877, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.010860500857234001, |
|
"rewards/margins": 0.022500045597553253, |
|
"rewards/rejected": -0.011639544740319252, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 3.8276802913111436e-06, |
|
"logits/chosen": -1.8164310455322266, |
|
"logits/rejected": -1.8141977787017822, |
|
"logps/chosen": -31.94429588317871, |
|
"logps/rejected": -33.383872985839844, |
|
"loss": 2298.7582, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.010539887472987175, |
|
"rewards/margins": 0.021379008889198303, |
|
"rewards/rejected": -0.010839122347533703, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/chosen": -2.063612222671509, |
|
"eval_logits/rejected": -2.058927059173584, |
|
"eval_logps/chosen": -34.559974670410156, |
|
"eval_logps/rejected": -38.285404205322266, |
|
"eval_loss": 2477.658935546875, |
|
"eval_rewards/accuracies": 0.6121262311935425, |
|
"eval_rewards/chosen": -0.005254245828837156, |
|
"eval_rewards/margins": 0.0024335861671715975, |
|
"eval_rewards/rejected": -0.007687832228839397, |
|
"eval_runtime": 145.6811, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 3.7793282895240927e-06, |
|
"logits/chosen": -1.847764015197754, |
|
"logits/rejected": -1.8541465997695923, |
|
"logps/chosen": -31.449474334716797, |
|
"logps/rejected": -33.3134880065918, |
|
"loss": 2305.692, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.007879074662923813, |
|
"rewards/margins": 0.02038208767771721, |
|
"rewards/rejected": -0.012503013014793396, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 3.730319028506478e-06, |
|
"logits/chosen": -1.7965530157089233, |
|
"logits/rejected": -1.7944053411483765, |
|
"logps/chosen": -33.688297271728516, |
|
"logps/rejected": -32.105934143066406, |
|
"loss": 2292.9066, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.010675834491848946, |
|
"rewards/margins": 0.021880075335502625, |
|
"rewards/rejected": -0.011204240843653679, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits/chosen": -1.7377105951309204, |
|
"logits/rejected": -1.731245756149292, |
|
"logps/chosen": -34.34708786010742, |
|
"logps/rejected": -33.66561508178711, |
|
"loss": 2271.1943, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.011171405203640461, |
|
"rewards/margins": 0.024255482479929924, |
|
"rewards/rejected": -0.013084076344966888, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 3.6304297682067146e-06, |
|
"logits/chosen": -1.7545562982559204, |
|
"logits/rejected": -1.7608686685562134, |
|
"logps/chosen": -33.110076904296875, |
|
"logps/rejected": -34.38447189331055, |
|
"loss": 2307.9031, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.008692274801433086, |
|
"rewards/margins": 0.02013438567519188, |
|
"rewards/rejected": -0.011442111805081367, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 3.579601087369492e-06, |
|
"logits/chosen": -1.8283579349517822, |
|
"logits/rejected": -1.8423779010772705, |
|
"logps/chosen": -31.087310791015625, |
|
"logps/rejected": -33.21766662597656, |
|
"loss": 2311.0535, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.007651531603187323, |
|
"rewards/margins": 0.01985129900276661, |
|
"rewards/rejected": -0.012199767865240574, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 3.5282177578265295e-06, |
|
"logits/chosen": -1.6931848526000977, |
|
"logits/rejected": -1.6900306940078735, |
|
"logps/chosen": -32.68722152709961, |
|
"logps/rejected": -36.44025421142578, |
|
"loss": 2222.7857, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.012357336468994617, |
|
"rewards/margins": 0.029757345095276833, |
|
"rewards/rejected": -0.01740000769495964, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 3.476306177936961e-06, |
|
"logits/chosen": -1.7785106897354126, |
|
"logits/rejected": -1.7785335779190063, |
|
"logps/chosen": -30.625701904296875, |
|
"logps/rejected": -35.58719253540039, |
|
"loss": 2272.5373, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006598903331905603, |
|
"rewards/margins": 0.024207040667533875, |
|
"rewards/rejected": -0.01760813593864441, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 3.423893017450324e-06, |
|
"logits/chosen": -1.7213503122329712, |
|
"logits/rejected": -1.718073844909668, |
|
"logps/chosen": -30.16400146484375, |
|
"logps/rejected": -34.405784606933594, |
|
"loss": 2284.7229, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.007083491422235966, |
|
"rewards/margins": 0.02296331152319908, |
|
"rewards/rejected": -0.01587982103228569, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 3.3710052038048794e-06, |
|
"logits/chosen": -1.7414871454238892, |
|
"logits/rejected": -1.7416623830795288, |
|
"logps/chosen": -29.0936279296875, |
|
"logps/rejected": -32.20520782470703, |
|
"loss": 2256.3047, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.010694684460759163, |
|
"rewards/margins": 0.026041794568300247, |
|
"rewards/rejected": -0.015347110107541084, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits/chosen": -1.660274863243103, |
|
"logits/rejected": -1.663644552230835, |
|
"logps/chosen": -33.33858108520508, |
|
"logps/rejected": -33.01979064941406, |
|
"loss": 2254.2998, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.011398938484489918, |
|
"rewards/margins": 0.026949990540742874, |
|
"rewards/rejected": -0.015551051124930382, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_logits/chosen": -2.0085763931274414, |
|
"eval_logits/rejected": -2.0039827823638916, |
|
"eval_logps/chosen": -34.992069244384766, |
|
"eval_logps/rejected": -38.73301696777344, |
|
"eval_loss": 2477.662353515625, |
|
"eval_rewards/accuracies": 0.5539867281913757, |
|
"eval_rewards/chosen": -0.009575208649039268, |
|
"eval_rewards/margins": 0.0025887340307235718, |
|
"eval_rewards/rejected": -0.01216394267976284, |
|
"eval_runtime": 145.5671, |
|
"eval_samples_per_second": 2.356, |
|
"eval_steps_per_second": 0.295, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 3.2639145321045933e-06, |
|
"logits/chosen": -1.7369210720062256, |
|
"logits/rejected": -1.7286014556884766, |
|
"logps/chosen": -35.7460823059082, |
|
"logps/rejected": -33.445213317871094, |
|
"loss": 2282.4748, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.0076593635603785515, |
|
"rewards/margins": 0.02305331453680992, |
|
"rewards/rejected": -0.015393950045108795, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.2097666922441107e-06, |
|
"logits/chosen": -1.7424733638763428, |
|
"logits/rejected": -1.7439861297607422, |
|
"logps/chosen": -35.72047424316406, |
|
"logps/rejected": -34.96687698364258, |
|
"loss": 2256.5887, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.007149122655391693, |
|
"rewards/margins": 0.026057641953229904, |
|
"rewards/rejected": -0.01890851929783821, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 3.1552542073477554e-06, |
|
"logits/chosen": -1.7609472274780273, |
|
"logits/rejected": -1.7586179971694946, |
|
"logps/chosen": -31.435550689697266, |
|
"logps/rejected": -34.51602554321289, |
|
"loss": 2254.2758, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.011277793906629086, |
|
"rewards/margins": 0.026469092816114426, |
|
"rewards/rejected": -0.015191297046840191, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 3.100405083388799e-06, |
|
"logits/chosen": -1.733371376991272, |
|
"logits/rejected": -1.7385714054107666, |
|
"logps/chosen": -30.687509536743164, |
|
"logps/rejected": -34.88238525390625, |
|
"loss": 2235.6635, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.010982171632349491, |
|
"rewards/margins": 0.028257867321372032, |
|
"rewards/rejected": -0.017275694757699966, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 3.0452474992899645e-06, |
|
"logits/chosen": -1.686703085899353, |
|
"logits/rejected": -1.68540358543396, |
|
"logps/chosen": -32.28951644897461, |
|
"logps/rejected": -36.70497512817383, |
|
"loss": 2236.9605, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.008354658260941505, |
|
"rewards/margins": 0.028544824570417404, |
|
"rewards/rejected": -0.020190168172121048, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 2.989809792446417e-06, |
|
"logits/chosen": -1.5596857070922852, |
|
"logits/rejected": -1.5550428628921509, |
|
"logps/chosen": -34.99411392211914, |
|
"logps/rejected": -37.3930778503418, |
|
"loss": 2205.4355, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.010043250396847725, |
|
"rewards/margins": 0.0318898968398571, |
|
"rewards/rejected": -0.021846650168299675, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.6859843730926514, |
|
"logits/rejected": -1.690326452255249, |
|
"logps/chosen": -34.527626037597656, |
|
"logps/rejected": -35.3222541809082, |
|
"loss": 2274.9828, |
|
"rewards/accuracies": 0.7916666269302368, |
|
"rewards/chosen": 0.006860324647277594, |
|
"rewards/margins": 0.024134492501616478, |
|
"rewards/rejected": -0.017274167388677597, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 2.878208065043501e-06, |
|
"logits/chosen": -1.633776068687439, |
|
"logits/rejected": -1.632145643234253, |
|
"logps/chosen": -32.2863883972168, |
|
"logps/rejected": -37.45145797729492, |
|
"loss": 2069.3428, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.01615205779671669, |
|
"rewards/margins": 0.04662010073661804, |
|
"rewards/rejected": -0.030468037351965904, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 2.8221013802485974e-06, |
|
"logits/chosen": -1.6747426986694336, |
|
"logits/rejected": -1.6725879907608032, |
|
"logps/chosen": -31.845199584960938, |
|
"logps/rejected": -35.52173614501953, |
|
"loss": 2131.4416, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.015305367298424244, |
|
"rewards/margins": 0.039657000452280045, |
|
"rewards/rejected": -0.024351635947823524, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 2.76582921478147e-06, |
|
"logits/chosen": -1.597703218460083, |
|
"logits/rejected": -1.592248797416687, |
|
"logps/chosen": -33.2725830078125, |
|
"logps/rejected": -33.86598205566406, |
|
"loss": 2173.4393, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.011104973964393139, |
|
"rewards/margins": 0.03512220084667206, |
|
"rewards/rejected": -0.024017225950956345, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_logits/chosen": -1.970989465713501, |
|
"eval_logits/rejected": -1.9664607048034668, |
|
"eval_logps/chosen": -35.21713638305664, |
|
"eval_logps/rejected": -39.03861618041992, |
|
"eval_loss": 2470.59033203125, |
|
"eval_rewards/accuracies": 0.5568937063217163, |
|
"eval_rewards/chosen": -0.011825831606984138, |
|
"eval_rewards/margins": 0.003394143423065543, |
|
"eval_rewards/rejected": -0.015219975262880325, |
|
"eval_runtime": 145.776, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 2.7094204786572254e-06, |
|
"logits/chosen": -1.6900947093963623, |
|
"logits/rejected": -1.6973447799682617, |
|
"logps/chosen": -30.748676300048828, |
|
"logps/rejected": -37.139137268066406, |
|
"loss": 2112.8461, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.01379583589732647, |
|
"rewards/margins": 0.04262876883149147, |
|
"rewards/rejected": -0.02883293107151985, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 2.6529041520546072e-06, |
|
"logits/chosen": -1.6644665002822876, |
|
"logits/rejected": -1.666691541671753, |
|
"logps/chosen": -31.436620712280273, |
|
"logps/rejected": -35.15371322631836, |
|
"loss": 2218.1766, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.011344591155648232, |
|
"rewards/margins": 0.030583670362830162, |
|
"rewards/rejected": -0.01923907920718193, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 2.5963092704273302e-06, |
|
"logits/chosen": -1.5569541454315186, |
|
"logits/rejected": -1.5611451864242554, |
|
"logps/chosen": -31.492889404296875, |
|
"logps/rejected": -37.86591339111328, |
|
"loss": 2126.5043, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.010985675267875195, |
|
"rewards/margins": 0.04054299369454384, |
|
"rewards/rejected": -0.02955731749534607, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits/chosen": -1.6258825063705444, |
|
"logits/rejected": -1.6220991611480713, |
|
"logps/chosen": -31.927608489990234, |
|
"logps/rejected": -35.32285690307617, |
|
"loss": 2187.909, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.010410415939986706, |
|
"rewards/margins": 0.033692531287670135, |
|
"rewards/rejected": -0.023282116279006004, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 2.4830001707654135e-06, |
|
"logits/chosen": -1.6901594400405884, |
|
"logits/rejected": -1.6924806833267212, |
|
"logps/chosen": -31.357311248779297, |
|
"logps/rejected": -38.720489501953125, |
|
"loss": 2089.2604, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.014180228114128113, |
|
"rewards/margins": 0.04476577043533325, |
|
"rewards/rejected": -0.03058554232120514, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 2.4263441656635054e-06, |
|
"logits/chosen": -1.5084383487701416, |
|
"logits/rejected": -1.5038378238677979, |
|
"logps/chosen": -35.46610641479492, |
|
"logps/rejected": -35.76911544799805, |
|
"loss": 2161.0381, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.00815567746758461, |
|
"rewards/margins": 0.03692782670259476, |
|
"rewards/rejected": -0.02877214550971985, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 2.3697260014953107e-06, |
|
"logits/chosen": -1.5508906841278076, |
|
"logits/rejected": -1.5508755445480347, |
|
"logps/chosen": -34.66786575317383, |
|
"logps/rejected": -37.92455291748047, |
|
"loss": 2108.2408, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.011749391444027424, |
|
"rewards/margins": 0.042532261461019516, |
|
"rewards/rejected": -0.03078286722302437, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 2.3131747660339396e-06, |
|
"logits/chosen": -1.5863968133926392, |
|
"logits/rejected": -1.5747534036636353, |
|
"logps/chosen": -32.88094711303711, |
|
"logps/rejected": -36.167869567871094, |
|
"loss": 2086.3512, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.012256348505616188, |
|
"rewards/margins": 0.04471471160650253, |
|
"rewards/rejected": -0.032458364963531494, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 2.256719512667651e-06, |
|
"logits/chosen": -1.6844234466552734, |
|
"logits/rejected": -1.6891088485717773, |
|
"logps/chosen": -32.565818786621094, |
|
"logps/rejected": -36.00508499145508, |
|
"loss": 2111.5299, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.00764118880033493, |
|
"rewards/margins": 0.04332723096013069, |
|
"rewards/rejected": -0.03568603843450546, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 2.2003892454735786e-06, |
|
"logits/chosen": -1.6079037189483643, |
|
"logits/rejected": -1.6007716655731201, |
|
"logps/chosen": -33.532958984375, |
|
"logps/rejected": -35.84773254394531, |
|
"loss": 2065.36, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.013211173936724663, |
|
"rewards/margins": 0.047667164355516434, |
|
"rewards/rejected": -0.03445599228143692, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_logits/chosen": -1.9166266918182373, |
|
"eval_logits/rejected": -1.9121689796447754, |
|
"eval_logps/chosen": -35.597782135009766, |
|
"eval_logps/rejected": -39.466796875, |
|
"eval_loss": 2467.73291015625, |
|
"eval_rewards/accuracies": 0.5598006844520569, |
|
"eval_rewards/chosen": -0.01563231088221073, |
|
"eval_rewards/margins": 0.003869474632665515, |
|
"eval_rewards/rejected": -0.019501786679029465, |
|
"eval_runtime": 145.8623, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits/chosen": -1.6010173559188843, |
|
"logits/rejected": -1.6011197566986084, |
|
"logps/chosen": -30.0789794921875, |
|
"logps/rejected": -38.56249237060547, |
|
"loss": 2050.1326, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.010881805792450905, |
|
"rewards/margins": 0.04960983246564865, |
|
"rewards/rejected": -0.0387280248105526, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 2.088219349982323e-06, |
|
"logits/chosen": -1.5475326776504517, |
|
"logits/rejected": -1.5394935607910156, |
|
"logps/chosen": -31.198156356811523, |
|
"logps/rejected": -37.26829147338867, |
|
"loss": 2106.8049, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.006407910026609898, |
|
"rewards/margins": 0.04337712749838829, |
|
"rewards/rejected": -0.03696921840310097, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 2.0324373493478803e-06, |
|
"logits/chosen": -1.7085663080215454, |
|
"logits/rejected": -1.7077171802520752, |
|
"logps/chosen": -29.088947296142578, |
|
"logps/rejected": -36.169918060302734, |
|
"loss": 2121.3875, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.009080884046852589, |
|
"rewards/margins": 0.04213147610425949, |
|
"rewards/rejected": -0.03305059298872948, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 1.976895560604729e-06, |
|
"logits/chosen": -1.5875581502914429, |
|
"logits/rejected": -1.597825527191162, |
|
"logps/chosen": -33.56743621826172, |
|
"logps/rejected": -36.87736511230469, |
|
"loss": 2078.3281, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008576255291700363, |
|
"rewards/margins": 0.04745348542928696, |
|
"rewards/rejected": -0.038877226412296295, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 1.921622518534466e-06, |
|
"logits/chosen": -1.6309471130371094, |
|
"logits/rejected": -1.6342302560806274, |
|
"logps/chosen": -30.122472763061523, |
|
"logps/rejected": -35.17578887939453, |
|
"loss": 2139.8102, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.005252503324300051, |
|
"rewards/margins": 0.03964962065219879, |
|
"rewards/rejected": -0.03439711779356003, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 1.8666466198491794e-06, |
|
"logits/chosen": -1.6128339767456055, |
|
"logits/rejected": -1.6084800958633423, |
|
"logps/chosen": -33.1923713684082, |
|
"logps/rejected": -37.553993225097656, |
|
"loss": 2098.0742, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008828431367874146, |
|
"rewards/margins": 0.04530448839068413, |
|
"rewards/rejected": -0.03647606074810028, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 1.8119961086025376e-06, |
|
"logits/chosen": -1.532700538635254, |
|
"logits/rejected": -1.5348151922225952, |
|
"logps/chosen": -31.818435668945312, |
|
"logps/rejected": -38.84191131591797, |
|
"loss": 2077.2633, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.009943163953721523, |
|
"rewards/margins": 0.04654636234045029, |
|
"rewards/rejected": -0.03660320118069649, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits/chosen": -1.5599863529205322, |
|
"logits/rejected": -1.5538241863250732, |
|
"logps/chosen": -35.342994689941406, |
|
"logps/rejected": -40.327735900878906, |
|
"loss": 2127.4625, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0006207667174749076, |
|
"rewards/margins": 0.041278596967458725, |
|
"rewards/rejected": -0.040657833218574524, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 1.7037833743707892e-06, |
|
"logits/chosen": -1.5475926399230957, |
|
"logits/rejected": -1.5419275760650635, |
|
"logps/chosen": -30.222143173217773, |
|
"logps/rejected": -39.89192581176758, |
|
"loss": 2074.9148, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.007808062247931957, |
|
"rewards/margins": 0.046828486025333405, |
|
"rewards/rejected": -0.03902042657136917, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 1.6502767460434588e-06, |
|
"logits/chosen": -1.5285046100616455, |
|
"logits/rejected": -1.51847505569458, |
|
"logps/chosen": -31.368820190429688, |
|
"logps/rejected": -32.55973815917969, |
|
"loss": 2196.3246, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.0037986873649060726, |
|
"rewards/margins": 0.033398739993572235, |
|
"rewards/rejected": -0.029600050300359726, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -1.8825738430023193, |
|
"eval_logits/rejected": -1.878185510635376, |
|
"eval_logps/chosen": -35.89006423950195, |
|
"eval_logps/rejected": -39.809085845947266, |
|
"eval_loss": 2464.3681640625, |
|
"eval_rewards/accuracies": 0.5510797500610352, |
|
"eval_rewards/chosen": -0.01855510286986828, |
|
"eval_rewards/margins": 0.004369591362774372, |
|
"eval_rewards/rejected": -0.022924695163965225, |
|
"eval_runtime": 145.9322, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 1.5972066659083796e-06, |
|
"logits/chosen": -1.6132911443710327, |
|
"logits/rejected": -1.612853765487671, |
|
"logps/chosen": -31.150531768798828, |
|
"logps/rejected": -33.747276306152344, |
|
"loss": 2132.9832, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.006736672017723322, |
|
"rewards/margins": 0.04162462800741196, |
|
"rewards/rejected": -0.03488795459270477, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 1.5446003988985041e-06, |
|
"logits/chosen": -1.6631847620010376, |
|
"logits/rejected": -1.663709044456482, |
|
"logps/chosen": -31.313705444335938, |
|
"logps/rejected": -34.68809127807617, |
|
"loss": 2119.0834, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.006298714783042669, |
|
"rewards/margins": 0.04152151942253113, |
|
"rewards/rejected": -0.03522280603647232, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 1.4924849716612211e-06, |
|
"logits/chosen": -1.6204774379730225, |
|
"logits/rejected": -1.6248018741607666, |
|
"logps/chosen": -31.891056060791016, |
|
"logps/rejected": -30.86574363708496, |
|
"loss": 2204.4455, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.003890159772709012, |
|
"rewards/margins": 0.03271043300628662, |
|
"rewards/rejected": -0.028820272535085678, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 1.440887158673332e-06, |
|
"logits/chosen": -1.6208614110946655, |
|
"logits/rejected": -1.6127662658691406, |
|
"logps/chosen": -30.53921127319336, |
|
"logps/rejected": -37.73412322998047, |
|
"loss": 2060.6334, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.006386814173310995, |
|
"rewards/margins": 0.048725761473178864, |
|
"rewards/rejected": -0.04233894869685173, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits/chosen": -1.572749137878418, |
|
"logits/rejected": -1.5833861827850342, |
|
"logps/chosen": -33.21635437011719, |
|
"logps/rejected": -35.97629928588867, |
|
"loss": 2125.9623, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.003367505269125104, |
|
"rewards/margins": 0.04068039730191231, |
|
"rewards/rejected": -0.03731289133429527, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 1.3393501301037245e-06, |
|
"logits/chosen": -1.6456438302993774, |
|
"logits/rejected": -1.6367809772491455, |
|
"logps/chosen": -32.83705139160156, |
|
"logps/rejected": -41.05707550048828, |
|
"loss": 2036.1875, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.00663726544007659, |
|
"rewards/margins": 0.05389411002397537, |
|
"rewards/rejected": -0.047256845980882645, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 1.2894630795134454e-06, |
|
"logits/chosen": -1.5514529943466187, |
|
"logits/rejected": -1.5537471771240234, |
|
"logps/chosen": -34.93277359008789, |
|
"logps/rejected": -36.4191780090332, |
|
"loss": 2069.123, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.010565127246081829, |
|
"rewards/margins": 0.04777819663286209, |
|
"rewards/rejected": -0.03721306473016739, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 1.2401979463554984e-06, |
|
"logits/chosen": -1.6648813486099243, |
|
"logits/rejected": -1.6647241115570068, |
|
"logps/chosen": -32.27571487426758, |
|
"logps/rejected": -38.67870330810547, |
|
"loss": 2022.6195, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.007819265127182007, |
|
"rewards/margins": 0.053449440747499466, |
|
"rewards/rejected": -0.04563017934560776, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 1.1915800407584705e-06, |
|
"logits/chosen": -1.6493892669677734, |
|
"logits/rejected": -1.6531116962432861, |
|
"logps/chosen": -30.350088119506836, |
|
"logps/rejected": -37.505104064941406, |
|
"loss": 2092.1893, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.005432260222733021, |
|
"rewards/margins": 0.04500500112771988, |
|
"rewards/rejected": -0.03957274183630943, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 1.1436343403356019e-06, |
|
"logits/chosen": -1.637351632118225, |
|
"logits/rejected": -1.642260193824768, |
|
"logps/chosen": -33.443363189697266, |
|
"logps/rejected": -33.12295913696289, |
|
"loss": 2237.6512, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.0012564079370349646, |
|
"rewards/margins": 0.028469255194067955, |
|
"rewards/rejected": -0.027212847024202347, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_logits/chosen": -1.8689898252487183, |
|
"eval_logits/rejected": -1.8646091222763062, |
|
"eval_logps/chosen": -35.997581481933594, |
|
"eval_logps/rejected": -39.92316436767578, |
|
"eval_loss": 2464.299072265625, |
|
"eval_rewards/accuracies": 0.545265793800354, |
|
"eval_rewards/chosen": -0.01963029056787491, |
|
"eval_rewards/margins": 0.004435177426785231, |
|
"eval_rewards/rejected": -0.024065470322966576, |
|
"eval_runtime": 145.934, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 1.0963854773524548e-06, |
|
"logits/chosen": -1.6270654201507568, |
|
"logits/rejected": -1.6279323101043701, |
|
"logps/chosen": -31.9213809967041, |
|
"logps/rejected": -34.26952362060547, |
|
"loss": 2115.159, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.010187694802880287, |
|
"rewards/margins": 0.042723797261714935, |
|
"rewards/rejected": -0.032536108046770096, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits/chosen": -1.481483817100525, |
|
"logits/rejected": -1.4840071201324463, |
|
"logps/chosen": -33.81633377075195, |
|
"logps/rejected": -36.6799201965332, |
|
"loss": 2111.7611, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.007486463990062475, |
|
"rewards/margins": 0.04387300834059715, |
|
"rewards/rejected": -0.03638654574751854, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 1.0040749902836508e-06, |
|
"logits/chosen": -1.5083402395248413, |
|
"logits/rejected": -1.5064888000488281, |
|
"logps/chosen": -30.776952743530273, |
|
"logps/rejected": -34.5029182434082, |
|
"loss": 2184.3811, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.002347666770219803, |
|
"rewards/margins": 0.03571944683790207, |
|
"rewards/rejected": -0.03337177634239197, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 9.59060791022566e-07, |
|
"logits/chosen": -1.641847848892212, |
|
"logits/rejected": -1.637139916419983, |
|
"logps/chosen": -31.925174713134766, |
|
"logps/rejected": -36.643123626708984, |
|
"loss": 2063.1439, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.011146209202706814, |
|
"rewards/margins": 0.04803737998008728, |
|
"rewards/rejected": -0.03689116612076759, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 9.148382544856885e-07, |
|
"logits/chosen": -1.5244739055633545, |
|
"logits/rejected": -1.5154194831848145, |
|
"logps/chosen": -33.11268615722656, |
|
"logps/rejected": -34.98213195800781, |
|
"loss": 2134.0221, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": 0.0033905128948390484, |
|
"rewards/margins": 0.0402878001332283, |
|
"rewards/rejected": -0.03689728304743767, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"learning_rate": 8.714301001505568e-07, |
|
"logits/chosen": -1.5689246654510498, |
|
"logits/rejected": -1.569645643234253, |
|
"logps/chosen": -33.039424896240234, |
|
"logps/rejected": -34.57393264770508, |
|
"loss": 2133.5518, |
|
"rewards/accuracies": 0.8416666984558105, |
|
"rewards/chosen": 0.0064233215525746346, |
|
"rewards/margins": 0.04014817252755165, |
|
"rewards/rejected": -0.03372485190629959, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"learning_rate": 8.288586291031025e-07, |
|
"logits/chosen": -1.6524006128311157, |
|
"logits/rejected": -1.6470394134521484, |
|
"logps/chosen": -33.036277770996094, |
|
"logps/rejected": -36.106807708740234, |
|
"loss": 2169.9916, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.004896899685263634, |
|
"rewards/margins": 0.03655281290411949, |
|
"rewards/rejected": -0.03165591508150101, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 7.871457125803897e-07, |
|
"logits/chosen": -1.5274744033813477, |
|
"logits/rejected": -1.5358660221099854, |
|
"logps/chosen": -33.17569351196289, |
|
"logps/rejected": -35.91423416137695, |
|
"loss": 2159.4746, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.0015730734448879957, |
|
"rewards/margins": 0.03710102289915085, |
|
"rewards/rejected": -0.03552795201539993, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits/chosen": -1.5684562921524048, |
|
"logits/rejected": -1.5627862215042114, |
|
"logps/chosen": -31.11408042907715, |
|
"logps/rejected": -37.03162384033203, |
|
"loss": 2063.1006, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.010595379397273064, |
|
"rewards/margins": 0.04807712510228157, |
|
"rewards/rejected": -0.03748174011707306, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 7.063808116212021e-07, |
|
"logits/chosen": -1.5203880071640015, |
|
"logits/rejected": -1.522077202796936, |
|
"logps/chosen": -32.758827209472656, |
|
"logps/rejected": -37.34809112548828, |
|
"loss": 2032.5133, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.007316121365875006, |
|
"rewards/margins": 0.05319654941558838, |
|
"rewards/rejected": -0.045880429446697235, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_logits/chosen": -1.866162657737732, |
|
"eval_logits/rejected": -1.861803650856018, |
|
"eval_logps/chosen": -36.02009963989258, |
|
"eval_logps/rejected": -39.944732666015625, |
|
"eval_loss": 2464.46533203125, |
|
"eval_rewards/accuracies": 0.5598006844520569, |
|
"eval_rewards/chosen": -0.019855517894029617, |
|
"eval_rewards/margins": 0.004425638820976019, |
|
"eval_rewards/rejected": -0.024281155318021774, |
|
"eval_runtime": 145.911, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"learning_rate": 6.673703204254348e-07, |
|
"logits/chosen": -1.466104507446289, |
|
"logits/rejected": -1.4655678272247314, |
|
"logps/chosen": -34.974365234375, |
|
"logps/rejected": -36.99479293823242, |
|
"loss": 2027.0777, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.010781234130263329, |
|
"rewards/margins": 0.05363558605313301, |
|
"rewards/rejected": -0.04285435378551483, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"learning_rate": 6.293013489185315e-07, |
|
"logits/chosen": -1.6160688400268555, |
|
"logits/rejected": -1.6096382141113281, |
|
"logps/chosen": -31.019649505615234, |
|
"logps/rejected": -37.333335876464844, |
|
"loss": 2040.7531, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.006840378977358341, |
|
"rewards/margins": 0.05138836055994034, |
|
"rewards/rejected": -0.04454797878861427, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"learning_rate": 5.921934551632086e-07, |
|
"logits/chosen": -1.4815315008163452, |
|
"logits/rejected": -1.4708069562911987, |
|
"logps/chosen": -33.21098327636719, |
|
"logps/rejected": -37.002418518066406, |
|
"loss": 2020.3844, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.011746999807655811, |
|
"rewards/margins": 0.05283288285136223, |
|
"rewards/rejected": -0.041085876524448395, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"learning_rate": 5.560657034652405e-07, |
|
"logits/chosen": -1.5710773468017578, |
|
"logits/rejected": -1.565071702003479, |
|
"logps/chosen": -30.515600204467773, |
|
"logps/rejected": -32.57416534423828, |
|
"loss": 2164.5504, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.001846333616413176, |
|
"rewards/margins": 0.03891240432858467, |
|
"rewards/rejected": -0.03706606850028038, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"learning_rate": 5.2093665457911e-07, |
|
"logits/chosen": -1.586578130722046, |
|
"logits/rejected": -1.5945528745651245, |
|
"logps/chosen": -34.664546966552734, |
|
"logps/rejected": -34.95591354370117, |
|
"loss": 2119.1492, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.00730844447389245, |
|
"rewards/margins": 0.04168447107076645, |
|
"rewards/rejected": -0.034376028925180435, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits/chosen": -1.5772063732147217, |
|
"logits/rejected": -1.577383041381836, |
|
"logps/chosen": -32.863037109375, |
|
"logps/rejected": -37.241397857666016, |
|
"loss": 2082.3426, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.0016552206361666322, |
|
"rewards/margins": 0.045920491218566895, |
|
"rewards/rejected": -0.044265273958444595, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"learning_rate": 4.537463335535161e-07, |
|
"logits/chosen": -1.5012638568878174, |
|
"logits/rejected": -1.5000605583190918, |
|
"logps/chosen": -32.05634689331055, |
|
"logps/rejected": -37.78838348388672, |
|
"loss": 2023.8406, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.010344896465539932, |
|
"rewards/margins": 0.05302266404032707, |
|
"rewards/rejected": -0.04267776757478714, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 4.217195806684629e-07, |
|
"logits/chosen": -1.4007158279418945, |
|
"logits/rejected": -1.3967105150222778, |
|
"logps/chosen": -34.35404586791992, |
|
"logps/rejected": -34.51953887939453, |
|
"loss": 2094.5336, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.008833990432322025, |
|
"rewards/margins": 0.04476445913314819, |
|
"rewards/rejected": -0.03593046963214874, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 3.907605513696808e-07, |
|
"logits/chosen": -1.5938528776168823, |
|
"logits/rejected": -1.5794765949249268, |
|
"logps/chosen": -34.033790588378906, |
|
"logps/rejected": -39.640159606933594, |
|
"loss": 2033.0867, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00313788210041821, |
|
"rewards/margins": 0.05188627913594246, |
|
"rewards/rejected": -0.04874839633703232, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 3.6088515096305675e-07, |
|
"logits/chosen": -1.5395238399505615, |
|
"logits/rejected": -1.5438177585601807, |
|
"logps/chosen": -32.82494354248047, |
|
"logps/rejected": -41.31450653076172, |
|
"loss": 1967.852, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 0.006926815025508404, |
|
"rewards/margins": 0.059039629995822906, |
|
"rewards/rejected": -0.05211281776428223, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_logits/chosen": -1.8651809692382812, |
|
"eval_logits/rejected": -1.8608282804489136, |
|
"eval_logps/chosen": -36.00510025024414, |
|
"eval_logps/rejected": -39.96323013305664, |
|
"eval_loss": 2461.20361328125, |
|
"eval_rewards/accuracies": 0.5539867281913757, |
|
"eval_rewards/chosen": -0.01970548741519451, |
|
"eval_rewards/margins": 0.004760634154081345, |
|
"eval_rewards/rejected": -0.024466121569275856, |
|
"eval_runtime": 145.7832, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"learning_rate": 3.321087280364757e-07, |
|
"logits/chosen": -1.519902229309082, |
|
"logits/rejected": -1.520318627357483, |
|
"logps/chosen": -35.439937591552734, |
|
"logps/rejected": -41.7154426574707, |
|
"loss": 2029.7744, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.008040779270231724, |
|
"rewards/margins": 0.054354071617126465, |
|
"rewards/rejected": -0.046313293278217316, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"learning_rate": 3.044460665744284e-07, |
|
"logits/chosen": -1.601548194885254, |
|
"logits/rejected": -1.6004295349121094, |
|
"logps/chosen": -31.515766143798828, |
|
"logps/rejected": -35.187618255615234, |
|
"loss": 2068.9873, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.005780898500233889, |
|
"rewards/margins": 0.04812353104352951, |
|
"rewards/rejected": -0.04234262555837631, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits/chosen": -1.521244764328003, |
|
"logits/rejected": -1.5227617025375366, |
|
"logps/chosen": -33.48499298095703, |
|
"logps/rejected": -37.73661804199219, |
|
"loss": 2050.5445, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008892608806490898, |
|
"rewards/margins": 0.04977206513285637, |
|
"rewards/rejected": -0.04087945073843002, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"learning_rate": 2.5251829568697204e-07, |
|
"logits/chosen": -1.5785353183746338, |
|
"logits/rejected": -1.577487587928772, |
|
"logps/chosen": -30.377460479736328, |
|
"logps/rejected": -35.70696258544922, |
|
"loss": 2086.1504, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.008212093263864517, |
|
"rewards/margins": 0.04536201059818268, |
|
"rewards/rejected": -0.03714991733431816, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"learning_rate": 2.2827986432927774e-07, |
|
"logits/chosen": -1.5923887491226196, |
|
"logits/rejected": -1.5776017904281616, |
|
"logps/chosen": -34.099876403808594, |
|
"logps/rejected": -41.499168395996094, |
|
"loss": 2014.8814, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.002167941303923726, |
|
"rewards/margins": 0.05451526492834091, |
|
"rewards/rejected": -0.052347324788570404, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 2.0520853686560177e-07, |
|
"logits/chosen": -1.5695116519927979, |
|
"logits/rejected": -1.5808777809143066, |
|
"logps/chosen": -31.036209106445312, |
|
"logps/rejected": -36.333526611328125, |
|
"loss": 2047.0217, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.010322622954845428, |
|
"rewards/margins": 0.05126044154167175, |
|
"rewards/rejected": -0.04093782603740692, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 1.833161662683672e-07, |
|
"logits/chosen": -1.6847254037857056, |
|
"logits/rejected": -1.6842361688613892, |
|
"logps/chosen": -30.7957763671875, |
|
"logps/rejected": -41.20909881591797, |
|
"loss": 1917.1037, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.01234652940183878, |
|
"rewards/margins": 0.06612871587276459, |
|
"rewards/rejected": -0.05378218740224838, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"learning_rate": 1.626139998169246e-07, |
|
"logits/chosen": -1.5517163276672363, |
|
"logits/rejected": -1.55906081199646, |
|
"logps/chosen": -33.17098617553711, |
|
"logps/rejected": -42.29136657714844, |
|
"loss": 1991.1635, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.010094953700900078, |
|
"rewards/margins": 0.05851215124130249, |
|
"rewards/rejected": -0.04841719567775726, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 1.4311267331922535e-07, |
|
"logits/chosen": -1.509541392326355, |
|
"logits/rejected": -1.5056698322296143, |
|
"logps/chosen": -33.6932258605957, |
|
"logps/rejected": -35.26158905029297, |
|
"loss": 2073.9217, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.010739867575466633, |
|
"rewards/margins": 0.04738181084394455, |
|
"rewards/rejected": -0.03664194419980049, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits/chosen": -1.6483919620513916, |
|
"logits/rejected": -1.6464850902557373, |
|
"logps/chosen": -30.515304565429688, |
|
"logps/rejected": -35.92851257324219, |
|
"loss": 2084.0914, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.007812450639903545, |
|
"rewards/margins": 0.045652881264686584, |
|
"rewards/rejected": -0.03784043341875076, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_logits/chosen": -1.8656350374221802, |
|
"eval_logits/rejected": -1.8612688779830933, |
|
"eval_logps/chosen": -36.0242919921875, |
|
"eval_logps/rejected": -39.967525482177734, |
|
"eval_loss": 2462.612060546875, |
|
"eval_rewards/accuracies": 0.5598006844520569, |
|
"eval_rewards/chosen": -0.019897375255823135, |
|
"eval_rewards/margins": 0.00461164116859436, |
|
"eval_rewards/rejected": -0.024509014561772346, |
|
"eval_runtime": 145.8827, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"learning_rate": 1.0775199359171346e-07, |
|
"logits/chosen": -1.603907823562622, |
|
"logits/rejected": -1.5997169017791748, |
|
"logps/chosen": -32.91996765136719, |
|
"logps/rejected": -33.04853820800781, |
|
"loss": 2124.1023, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.00722561776638031, |
|
"rewards/margins": 0.042166419327259064, |
|
"rewards/rejected": -0.034940801560878754, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"learning_rate": 9.191080703056604e-08, |
|
"logits/chosen": -1.5563673973083496, |
|
"logits/rejected": -1.5573166608810425, |
|
"logps/chosen": -32.476478576660156, |
|
"logps/rejected": -38.40611267089844, |
|
"loss": 2100.1363, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.008883940055966377, |
|
"rewards/margins": 0.04370499402284622, |
|
"rewards/rejected": -0.034821052104234695, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 7.730678442730539e-08, |
|
"logits/chosen": -1.5083153247833252, |
|
"logits/rejected": -1.5021404027938843, |
|
"logps/chosen": -33.119529724121094, |
|
"logps/rejected": -41.40352249145508, |
|
"loss": 2006.5551, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.010222419165074825, |
|
"rewards/margins": 0.057363539934158325, |
|
"rewards/rejected": -0.047141119837760925, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 6.394742864787806e-08, |
|
"logits/chosen": -1.5188112258911133, |
|
"logits/rejected": -1.5131750106811523, |
|
"logps/chosen": -28.496017456054688, |
|
"logps/rejected": -35.698753356933594, |
|
"loss": 2071.1072, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.008172214962542057, |
|
"rewards/margins": 0.047853223979473114, |
|
"rewards/rejected": -0.03968100994825363, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"learning_rate": 5.183960310644748e-08, |
|
"logits/chosen": -1.5658049583435059, |
|
"logits/rejected": -1.5554416179656982, |
|
"logps/chosen": -32.46862030029297, |
|
"logps/rejected": -39.8042106628418, |
|
"loss": 2087.5406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.00012906994379591197, |
|
"rewards/margins": 0.04531756415963173, |
|
"rewards/rejected": -0.04518849402666092, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 4.098952823928693e-08, |
|
"logits/chosen": -1.5264514684677124, |
|
"logits/rejected": -1.5234899520874023, |
|
"logps/chosen": -32.81951141357422, |
|
"logps/rejected": -34.28097915649414, |
|
"loss": 2147.4246, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.004902643617242575, |
|
"rewards/margins": 0.03880416229367256, |
|
"rewards/rejected": -0.03390152007341385, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits/chosen": -1.5864768028259277, |
|
"logits/rejected": -1.592008352279663, |
|
"logps/chosen": -30.852685928344727, |
|
"logps/rejected": -36.960899353027344, |
|
"loss": 2034.7088, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.012164896354079247, |
|
"rewards/margins": 0.05214967206120491, |
|
"rewards/rejected": -0.03998477756977081, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"learning_rate": 2.3084278540791427e-08, |
|
"logits/chosen": -1.5875871181488037, |
|
"logits/rejected": -1.5970607995986938, |
|
"logps/chosen": -30.86104393005371, |
|
"logps/rejected": -33.266883850097656, |
|
"loss": 2088.916, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.008342106826603413, |
|
"rewards/margins": 0.044725269079208374, |
|
"rewards/rejected": -0.03638315945863724, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"learning_rate": 1.6038302591975807e-08, |
|
"logits/chosen": -1.5217053890228271, |
|
"logits/rejected": -1.5153101682662964, |
|
"logps/chosen": -33.224308013916016, |
|
"logps/rejected": -36.06281661987305, |
|
"loss": 2109.2895, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.004321801941841841, |
|
"rewards/margins": 0.042845211923122406, |
|
"rewards/rejected": -0.03852340579032898, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"learning_rate": 1.0268470356514237e-08, |
|
"logits/chosen": -1.5790612697601318, |
|
"logits/rejected": -1.5761630535125732, |
|
"logps/chosen": -33.064552307128906, |
|
"logps/rejected": -37.95295333862305, |
|
"loss": 2032.7156, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.005610200576484203, |
|
"rewards/margins": 0.05253750830888748, |
|
"rewards/rejected": -0.046927306801080704, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_logits/chosen": -1.8648215532302856, |
|
"eval_logits/rejected": -1.8604679107666016, |
|
"eval_logps/chosen": -36.019161224365234, |
|
"eval_logps/rejected": -39.95817184448242, |
|
"eval_loss": 2463.105712890625, |
|
"eval_rewards/accuracies": 0.565614640712738, |
|
"eval_rewards/chosen": -0.019846076145768166, |
|
"eval_rewards/margins": 0.004569429438561201, |
|
"eval_rewards/rejected": -0.024415504187345505, |
|
"eval_runtime": 145.9094, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"learning_rate": 5.777746105209147e-09, |
|
"logits/chosen": -1.645821213722229, |
|
"logits/rejected": -1.6461843252182007, |
|
"logps/chosen": -28.770349502563477, |
|
"logps/rejected": -36.86784744262695, |
|
"loss": 2038.2139, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.010459242388606071, |
|
"rewards/margins": 0.05173317715525627, |
|
"rewards/rejected": -0.04127394035458565, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"learning_rate": 2.5684369628148352e-09, |
|
"logits/chosen": -1.5073591470718384, |
|
"logits/rejected": -1.5066778659820557, |
|
"logps/chosen": -32.11809158325195, |
|
"logps/rejected": -37.76689910888672, |
|
"loss": 2069.6059, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.00923779234290123, |
|
"rewards/margins": 0.04816945642232895, |
|
"rewards/rejected": -0.03893166407942772, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 6.421917227455999e-10, |
|
"logits/chosen": -1.6536405086517334, |
|
"logits/rejected": -1.6510140895843506, |
|
"logps/chosen": -30.886306762695312, |
|
"logps/rejected": -35.38301467895508, |
|
"loss": 2094.1572, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.0058775185607373714, |
|
"rewards/margins": 0.04487111419439316, |
|
"rewards/rejected": -0.03899358958005905, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.638646125793457, |
|
"logits/rejected": -1.6395971775054932, |
|
"logps/chosen": -30.43972396850586, |
|
"logps/rejected": -33.02666473388672, |
|
"loss": 2186.526, |
|
"rewards/accuracies": 0.82916659116745, |
|
"rewards/chosen": 0.001802150160074234, |
|
"rewards/margins": 0.034166958183050156, |
|
"rewards/rejected": -0.03236480802297592, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1540, |
|
"total_flos": 0.0, |
|
"train_loss": 1756.4536297686689, |
|
"train_runtime": 10797.265, |
|
"train_samples_per_second": 1.141, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|