|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8664803504943848, |
|
"logits/rejected": -1.8707994222640991, |
|
"logps/chosen": -36.978511810302734, |
|
"logps/rejected": -33.66939163208008, |
|
"loss": 0.9993, |
|
"rewards/accuracies": 0.5694444179534912, |
|
"rewards/chosen": 0.00028087408281862736, |
|
"rewards/margins": 0.0006740752141922712, |
|
"rewards/rejected": -0.00039320107316598296, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9984451532363892, |
|
"logits/rejected": -2.0010995864868164, |
|
"logps/chosen": -29.63176918029785, |
|
"logps/rejected": -29.05954933166504, |
|
"loss": 0.9999, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00010425634536659345, |
|
"rewards/margins": 6.528960511786863e-05, |
|
"rewards/rejected": 3.8966707506915554e-05, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9210799932479858, |
|
"logits/rejected": -1.9183847904205322, |
|
"logps/chosen": -31.414783477783203, |
|
"logps/rejected": -33.19659423828125, |
|
"loss": 1.0002, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 1.3138540452928282e-05, |
|
"rewards/margins": -0.00020548875909298658, |
|
"rewards/rejected": 0.00021862727589905262, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0177221298217773, |
|
"logits/rejected": -2.008965492248535, |
|
"logps/chosen": -32.57322311401367, |
|
"logps/rejected": -32.500308990478516, |
|
"loss": 1.0001, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 3.584356090868823e-05, |
|
"rewards/margins": -8.777440234553069e-05, |
|
"rewards/rejected": 0.00012361796689219773, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8622690439224243, |
|
"logits/rejected": -1.851509690284729, |
|
"logps/chosen": -33.547603607177734, |
|
"logps/rejected": -35.463592529296875, |
|
"loss": 0.9998, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 9.876764670480043e-05, |
|
"rewards/margins": 0.0002112251240760088, |
|
"rewards/rejected": -0.00011245747737120837, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9400131702423096, |
|
"logits/rejected": -1.9419806003570557, |
|
"logps/chosen": -32.52842330932617, |
|
"logps/rejected": -33.22877883911133, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0007175664068199694, |
|
"rewards/margins": 0.0012915965635329485, |
|
"rewards/rejected": -0.00057403021492064, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.070552349090576, |
|
"logits/rejected": -2.0755274295806885, |
|
"logps/chosen": -34.00461959838867, |
|
"logps/rejected": -36.64922332763672, |
|
"loss": 0.9994, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.00016131921438500285, |
|
"rewards/margins": 0.0006422021542675793, |
|
"rewards/rejected": -0.0008035213686525822, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9306777715682983, |
|
"logits/rejected": -1.9338254928588867, |
|
"logps/chosen": -34.32624816894531, |
|
"logps/rejected": -34.661468505859375, |
|
"loss": 0.9983, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0010156143689528108, |
|
"rewards/margins": 0.0017062196275219321, |
|
"rewards/rejected": -0.0006906053749844432, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9389193058013916, |
|
"logits/rejected": -1.9434226751327515, |
|
"logps/chosen": -32.38957214355469, |
|
"logps/rejected": -32.348140716552734, |
|
"loss": 0.9993, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0007785108755342662, |
|
"rewards/margins": 0.0006708315922878683, |
|
"rewards/rejected": 0.00010767912317533046, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.0358777046203613, |
|
"logits/rejected": -2.0339014530181885, |
|
"logps/chosen": -32.13254165649414, |
|
"logps/rejected": -31.29019546508789, |
|
"loss": 0.9987, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0010829826351255178, |
|
"rewards/margins": 0.001322855008766055, |
|
"rewards/rejected": -0.0002398724900558591, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2312774658203125, |
|
"eval_logits/rejected": -2.226422071456909, |
|
"eval_logps/chosen": -34.04991149902344, |
|
"eval_logps/rejected": -37.55283737182617, |
|
"eval_loss": 0.9997907280921936, |
|
"eval_rewards/accuracies": 0.5336378812789917, |
|
"eval_rewards/chosen": -0.0001535558985779062, |
|
"eval_rewards/margins": 0.00020861340453848243, |
|
"eval_rewards/rejected": -0.00036216925946064293, |
|
"eval_runtime": 146.0254, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9907060861587524, |
|
"logits/rejected": -1.9883339405059814, |
|
"logps/chosen": -33.13169860839844, |
|
"logps/rejected": -34.033958435058594, |
|
"loss": 0.9988, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0011211589444428682, |
|
"rewards/margins": 0.0011670273961499333, |
|
"rewards/rejected": -4.586850991472602e-05, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.002023458480835, |
|
"logits/rejected": -1.993699312210083, |
|
"logps/chosen": -32.341697692871094, |
|
"logps/rejected": -32.16511917114258, |
|
"loss": 0.9989, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0010320657165721059, |
|
"rewards/margins": 0.001068194629624486, |
|
"rewards/rejected": -3.612901855376549e-05, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.028505802154541, |
|
"logits/rejected": -2.020526885986328, |
|
"logps/chosen": -30.3519287109375, |
|
"logps/rejected": -32.101314544677734, |
|
"loss": 0.9983, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0012378758983686566, |
|
"rewards/margins": 0.001720982021652162, |
|
"rewards/rejected": -0.0004831062688026577, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9588673114776611, |
|
"logits/rejected": -1.9690834283828735, |
|
"logps/chosen": -31.205490112304688, |
|
"logps/rejected": -32.55961608886719, |
|
"loss": 0.9976, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0021417266689240932, |
|
"rewards/margins": 0.0023890691809356213, |
|
"rewards/rejected": -0.00024734257021918893, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8695415258407593, |
|
"logits/rejected": -1.8707062005996704, |
|
"logps/chosen": -33.88127899169922, |
|
"logps/rejected": -34.7686653137207, |
|
"loss": 0.9968, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0031141345389187336, |
|
"rewards/margins": 0.003228238318115473, |
|
"rewards/rejected": -0.00011410393926780671, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9212032556533813, |
|
"logits/rejected": -1.9178003072738647, |
|
"logps/chosen": -35.99773406982422, |
|
"logps/rejected": -32.705848693847656, |
|
"loss": 0.9984, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0017394202295690775, |
|
"rewards/margins": 0.0015704210381954908, |
|
"rewards/rejected": 0.00016899927868507802, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0206995010375977, |
|
"logits/rejected": -2.0133931636810303, |
|
"logps/chosen": -33.504085540771484, |
|
"logps/rejected": -31.432220458984375, |
|
"loss": 0.9963, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0029377774335443974, |
|
"rewards/margins": 0.003741443855687976, |
|
"rewards/rejected": -0.0008036663057282567, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0269291400909424, |
|
"logits/rejected": -2.032160997390747, |
|
"logps/chosen": -32.24355697631836, |
|
"logps/rejected": -32.431182861328125, |
|
"loss": 0.9975, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.003012270200997591, |
|
"rewards/margins": 0.0024727012496441603, |
|
"rewards/rejected": 0.0005395688931457698, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.027367115020752, |
|
"logits/rejected": -2.0246078968048096, |
|
"logps/chosen": -31.290613174438477, |
|
"logps/rejected": -31.361133575439453, |
|
"loss": 0.9976, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0019833946134895086, |
|
"rewards/margins": 0.002412599278613925, |
|
"rewards/rejected": -0.00042920451960526407, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.8985168933868408, |
|
"logits/rejected": -1.903148889541626, |
|
"logps/chosen": -31.30405616760254, |
|
"logps/rejected": -32.838443756103516, |
|
"loss": 0.9965, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0030222723726183176, |
|
"rewards/margins": 0.0034973658621311188, |
|
"rewards/rejected": -0.0004750936641357839, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.225177764892578, |
|
"eval_logits/rejected": -2.2203547954559326, |
|
"eval_logps/chosen": -34.06184387207031, |
|
"eval_logps/rejected": -37.579010009765625, |
|
"eval_loss": 0.9996482133865356, |
|
"eval_rewards/accuracies": 0.5070598125457764, |
|
"eval_rewards/chosen": -0.0002729461120907217, |
|
"eval_rewards/margins": 0.00035095339990220964, |
|
"eval_rewards/rejected": -0.0006238995119929314, |
|
"eval_runtime": 145.704, |
|
"eval_samples_per_second": 2.354, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.011120557785034, |
|
"logits/rejected": -2.021751880645752, |
|
"logps/chosen": -31.745685577392578, |
|
"logps/rejected": -33.96772003173828, |
|
"loss": 0.9967, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0022561827208846807, |
|
"rewards/margins": 0.003306365106254816, |
|
"rewards/rejected": -0.0010501822689548135, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.903857946395874, |
|
"logits/rejected": -1.918621301651001, |
|
"logps/chosen": -29.797290802001953, |
|
"logps/rejected": -31.628814697265625, |
|
"loss": 0.9962, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0031425058841705322, |
|
"rewards/margins": 0.0038488968275487423, |
|
"rewards/rejected": -0.0007063907687552273, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9593979120254517, |
|
"logits/rejected": -1.9633464813232422, |
|
"logps/chosen": -33.067623138427734, |
|
"logps/rejected": -31.64206886291504, |
|
"loss": 0.9956, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.003498472273349762, |
|
"rewards/margins": 0.004426136147230864, |
|
"rewards/rejected": -0.0009276636992581189, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9572632312774658, |
|
"logits/rejected": -1.9354908466339111, |
|
"logps/chosen": -33.843727111816406, |
|
"logps/rejected": -35.1453742980957, |
|
"loss": 0.995, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0031002266332507133, |
|
"rewards/margins": 0.0050460235215723515, |
|
"rewards/rejected": -0.0019457967719063163, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -1.9997854232788086, |
|
"logits/rejected": -1.9964803457260132, |
|
"logps/chosen": -32.75019454956055, |
|
"logps/rejected": -36.28661346435547, |
|
"loss": 0.9976, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0017487213481217623, |
|
"rewards/margins": 0.0023655896075069904, |
|
"rewards/rejected": -0.0006168682011775672, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8673791885375977, |
|
"logits/rejected": -1.8649587631225586, |
|
"logps/chosen": -34.018226623535156, |
|
"logps/rejected": -35.539276123046875, |
|
"loss": 0.998, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.001701725646853447, |
|
"rewards/margins": 0.002048287307843566, |
|
"rewards/rejected": -0.0003465614281594753, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8522275686264038, |
|
"logits/rejected": -1.849872350692749, |
|
"logps/chosen": -34.16339874267578, |
|
"logps/rejected": -31.845317840576172, |
|
"loss": 0.9969, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0023420110810548067, |
|
"rewards/margins": 0.0030819105450063944, |
|
"rewards/rejected": -0.0007398994639515877, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9549518823623657, |
|
"logits/rejected": -1.94447922706604, |
|
"logps/chosen": -35.027687072753906, |
|
"logps/rejected": -31.895471572875977, |
|
"loss": 0.9962, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.003289591521024704, |
|
"rewards/margins": 0.003834384260699153, |
|
"rewards/rejected": -0.0005447928560897708, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0498766899108887, |
|
"logits/rejected": -2.034980297088623, |
|
"logps/chosen": -30.72440528869629, |
|
"logps/rejected": -32.658695220947266, |
|
"loss": 0.9979, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.0019971781875938177, |
|
"rewards/margins": 0.002065772656351328, |
|
"rewards/rejected": -6.859400309622288e-05, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9201946258544922, |
|
"logits/rejected": -1.9177051782608032, |
|
"logps/chosen": -32.3183479309082, |
|
"logps/rejected": -30.95510482788086, |
|
"loss": 0.9925, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.006087628658860922, |
|
"rewards/margins": 0.007470599375665188, |
|
"rewards/rejected": -0.0013829706003889441, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2213804721832275, |
|
"eval_logits/rejected": -2.216555118560791, |
|
"eval_logps/chosen": -34.083614349365234, |
|
"eval_logps/rejected": -37.60634994506836, |
|
"eval_loss": 0.999591052532196, |
|
"eval_rewards/accuracies": 0.559385359287262, |
|
"eval_rewards/chosen": -0.0004906260874122381, |
|
"eval_rewards/margins": 0.00040669209556654096, |
|
"eval_rewards/rejected": -0.0008973181829787791, |
|
"eval_runtime": 145.8707, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9060642719268799, |
|
"logits/rejected": -1.9028133153915405, |
|
"logps/chosen": -31.319162368774414, |
|
"logps/rejected": -33.85043716430664, |
|
"loss": 0.9961, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.002841859357431531, |
|
"rewards/margins": 0.003924719989299774, |
|
"rewards/rejected": -0.0010828599333763123, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9553836584091187, |
|
"logits/rejected": -1.9432109594345093, |
|
"logps/chosen": -34.27588653564453, |
|
"logps/rejected": -33.672359466552734, |
|
"loss": 0.9955, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0032019000500440598, |
|
"rewards/margins": 0.004537059459835291, |
|
"rewards/rejected": -0.0013351596426218748, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -1.9905990362167358, |
|
"logits/rejected": -1.9891618490219116, |
|
"logps/chosen": -33.116233825683594, |
|
"logps/rejected": -32.55724334716797, |
|
"loss": 0.9955, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.0036955769173800945, |
|
"rewards/margins": 0.004472161643207073, |
|
"rewards/rejected": -0.0007765850750729442, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.0769362449645996, |
|
"logits/rejected": -2.0613036155700684, |
|
"logps/chosen": -33.791297912597656, |
|
"logps/rejected": -33.12422180175781, |
|
"loss": 0.9961, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.003880967851728201, |
|
"rewards/margins": 0.00394281093031168, |
|
"rewards/rejected": -6.184288213262334e-05, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.950060248374939, |
|
"logits/rejected": -1.9492241144180298, |
|
"logps/chosen": -32.82404327392578, |
|
"logps/rejected": -32.50709915161133, |
|
"loss": 0.995, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.004580510314553976, |
|
"rewards/margins": 0.005000022705644369, |
|
"rewards/rejected": -0.000419511750806123, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9050449132919312, |
|
"logits/rejected": -1.915305733680725, |
|
"logps/chosen": -31.87860679626465, |
|
"logps/rejected": -35.34981155395508, |
|
"loss": 0.9961, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.0032608420588076115, |
|
"rewards/margins": 0.0038713677786290646, |
|
"rewards/rejected": -0.0006105261854827404, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.04546856880188, |
|
"logits/rejected": -2.039043426513672, |
|
"logps/chosen": -33.336219787597656, |
|
"logps/rejected": -29.269311904907227, |
|
"loss": 0.9964, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.0031574335880577564, |
|
"rewards/margins": 0.003586276201531291, |
|
"rewards/rejected": -0.0004288425261620432, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.905160665512085, |
|
"logits/rejected": -1.907360315322876, |
|
"logps/chosen": -33.86741256713867, |
|
"logps/rejected": -30.982807159423828, |
|
"loss": 0.9952, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.0037163912784308195, |
|
"rewards/margins": 0.004818186163902283, |
|
"rewards/rejected": -0.0011017953511327505, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.9973225085766284, |
|
"train_runtime": 3253.1307, |
|
"train_samples_per_second": 0.946, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|