|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8668904304504395, |
|
"logits/rejected": -1.8712035417556763, |
|
"logps/chosen": -36.981239318847656, |
|
"logps/rejected": -33.63866424560547, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.017751876264810562, |
|
"rewards/margins": 0.023763436824083328, |
|
"rewards/rejected": -0.0060115596279501915, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9975477457046509, |
|
"logits/rejected": -2.0001885890960693, |
|
"logps/chosen": -29.638402938842773, |
|
"logps/rejected": -29.045080184936523, |
|
"loss": 0.7005, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.002654150128364563, |
|
"rewards/margins": -0.010201702825725079, |
|
"rewards/rejected": 0.012855852022767067, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9209153652191162, |
|
"logits/rejected": -1.9182332754135132, |
|
"logps/chosen": -31.392269134521484, |
|
"logps/rejected": -33.214996337890625, |
|
"loss": 0.6893, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.016679534688591957, |
|
"rewards/margins": 0.014253495261073112, |
|
"rewards/rejected": 0.0024260382633656263, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.017472505569458, |
|
"logits/rejected": -2.0087223052978516, |
|
"logps/chosen": -32.56648635864258, |
|
"logps/rejected": -32.53681564331055, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.007227012421935797, |
|
"rewards/margins": 0.02412785217165947, |
|
"rewards/rejected": -0.016900835558772087, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8619842529296875, |
|
"logits/rejected": -1.8512147665023804, |
|
"logps/chosen": -33.57755661010742, |
|
"logps/rejected": -35.45317840576172, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.014055396430194378, |
|
"rewards/margins": -0.013470378704369068, |
|
"rewards/rejected": -0.0005850124871358275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.940241813659668, |
|
"logits/rejected": -1.942185401916504, |
|
"logps/chosen": -32.55712127685547, |
|
"logps/rejected": -33.23926544189453, |
|
"loss": 0.6629, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.030143504962325096, |
|
"rewards/margins": 0.07766537368297577, |
|
"rewards/rejected": -0.047521863132715225, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.070986270904541, |
|
"logits/rejected": -2.0759458541870117, |
|
"logps/chosen": -33.997291564941406, |
|
"logps/rejected": -36.65125274658203, |
|
"loss": 0.6775, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.006164415739476681, |
|
"rewards/margins": 0.051500022411346436, |
|
"rewards/rejected": -0.057664431631565094, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9328521490097046, |
|
"logits/rejected": -1.935974359512329, |
|
"logps/chosen": -34.317806243896484, |
|
"logps/rejected": -34.648555755615234, |
|
"loss": 0.6465, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.07700104266405106, |
|
"rewards/margins": 0.11630574613809586, |
|
"rewards/rejected": -0.0393047034740448, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.941239356994629, |
|
"logits/rejected": -1.9457323551177979, |
|
"logps/chosen": -32.385948181152344, |
|
"logps/rejected": -32.34455108642578, |
|
"loss": 0.6805, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.057031381875276566, |
|
"rewards/margins": 0.046979233622550964, |
|
"rewards/rejected": 0.010052147321403027, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.0390255451202393, |
|
"logits/rejected": -2.0370407104492188, |
|
"logps/chosen": -32.15161895751953, |
|
"logps/rejected": -31.304489135742188, |
|
"loss": 0.6577, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.06245182827115059, |
|
"rewards/margins": 0.08924683928489685, |
|
"rewards/rejected": -0.02679501473903656, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.233787775039673, |
|
"eval_logits/rejected": -2.228933572769165, |
|
"eval_logps/chosen": -34.04001235961914, |
|
"eval_logps/rejected": -37.535667419433594, |
|
"eval_loss": 0.6948937773704529, |
|
"eval_rewards/accuracies": 0.5315614938735962, |
|
"eval_rewards/chosen": -0.0038188453763723373, |
|
"eval_rewards/margins": 0.00951432902365923, |
|
"eval_rewards/rejected": -0.013333176262676716, |
|
"eval_runtime": 145.7707, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.994390845298767, |
|
"logits/rejected": -1.991999864578247, |
|
"logps/chosen": -33.1099739074707, |
|
"logps/rejected": -34.02802276611328, |
|
"loss": 0.6747, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.09369214624166489, |
|
"rewards/margins": 0.0927465409040451, |
|
"rewards/rejected": 0.0009456165134906769, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0057625770568848, |
|
"logits/rejected": -1.9974448680877686, |
|
"logps/chosen": -32.31781768798828, |
|
"logps/rejected": -32.132328033447266, |
|
"loss": 0.6694, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0889604315161705, |
|
"rewards/margins": 0.06853620707988739, |
|
"rewards/rejected": 0.02042422816157341, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0336880683898926, |
|
"logits/rejected": -2.0257279872894287, |
|
"logps/chosen": -30.32443618774414, |
|
"logps/rejected": -32.07221221923828, |
|
"loss": 0.6522, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.10589826107025146, |
|
"rewards/margins": 0.11934350430965424, |
|
"rewards/rejected": -0.013445250689983368, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9642670154571533, |
|
"logits/rejected": -1.9745124578475952, |
|
"logps/chosen": -31.212024688720703, |
|
"logps/rejected": -32.565834045410156, |
|
"loss": 0.6272, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.14534947276115417, |
|
"rewards/margins": 0.16701875627040863, |
|
"rewards/rejected": -0.02166926860809326, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8759937286376953, |
|
"logits/rejected": -1.8771553039550781, |
|
"logps/chosen": -33.917335510253906, |
|
"logps/rejected": -34.8185920715332, |
|
"loss": 0.6064, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.19274887442588806, |
|
"rewards/margins": 0.235683411359787, |
|
"rewards/rejected": -0.042934536933898926, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.927369475364685, |
|
"logits/rejected": -1.9239553213119507, |
|
"logps/chosen": -35.98323440551758, |
|
"logps/rejected": -32.705108642578125, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.13190819323062897, |
|
"rewards/margins": 0.11955627053976059, |
|
"rewards/rejected": 0.012351910583674908, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.028454542160034, |
|
"logits/rejected": -2.0210976600646973, |
|
"logps/chosen": -33.50457763671875, |
|
"logps/rejected": -31.386072158813477, |
|
"loss": 0.6077, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.20529839396476746, |
|
"rewards/margins": 0.22925393283367157, |
|
"rewards/rejected": -0.02395555004477501, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0345230102539062, |
|
"logits/rejected": -2.0397608280181885, |
|
"logps/chosen": -32.19758224487305, |
|
"logps/rejected": -32.435523986816406, |
|
"loss": 0.6083, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.24304144084453583, |
|
"rewards/margins": 0.2083090990781784, |
|
"rewards/rejected": 0.03473237156867981, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.035979747772217, |
|
"logits/rejected": -2.0332181453704834, |
|
"logps/chosen": -31.255151748657227, |
|
"logps/rejected": -31.321630477905273, |
|
"loss": 0.6326, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.163661926984787, |
|
"rewards/margins": 0.16605310142040253, |
|
"rewards/rejected": -0.0023911758325994015, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9058892726898193, |
|
"logits/rejected": -1.910540223121643, |
|
"logps/chosen": -31.33197021484375, |
|
"logps/rejected": -32.817604064941406, |
|
"loss": 0.6156, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.19201864302158356, |
|
"rewards/margins": 0.21068540215492249, |
|
"rewards/rejected": -0.01866675540804863, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2316529750823975, |
|
"eval_logits/rejected": -2.2268118858337402, |
|
"eval_logps/chosen": -34.053470611572266, |
|
"eval_logps/rejected": -37.557804107666016, |
|
"eval_loss": 0.6943473219871521, |
|
"eval_rewards/accuracies": 0.5191029906272888, |
|
"eval_rewards/chosen": -0.013246187008917332, |
|
"eval_rewards/margins": 0.015584951266646385, |
|
"eval_rewards/rejected": -0.028831137344241142, |
|
"eval_runtime": 145.8114, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.01808762550354, |
|
"logits/rejected": -2.0287296772003174, |
|
"logps/chosen": -31.763586044311523, |
|
"logps/rejected": -33.926063537597656, |
|
"loss": 0.622, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.14540112018585205, |
|
"rewards/margins": 0.1897541582584381, |
|
"rewards/rejected": -0.04435301572084427, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.910658597946167, |
|
"logits/rejected": -1.9254140853881836, |
|
"logps/chosen": -29.832544326782227, |
|
"logps/rejected": -31.612533569335938, |
|
"loss": 0.6024, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.19529443979263306, |
|
"rewards/margins": 0.23334410786628723, |
|
"rewards/rejected": -0.038049641996622086, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9673402309417725, |
|
"logits/rejected": -1.9713077545166016, |
|
"logps/chosen": -33.062660217285156, |
|
"logps/rejected": -31.608203887939453, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.24836687743663788, |
|
"rewards/margins": 0.2895973324775696, |
|
"rewards/rejected": -0.0412304513156414, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9657704830169678, |
|
"logits/rejected": -1.9439115524291992, |
|
"logps/chosen": -33.82358932495117, |
|
"logps/rejected": -35.09131622314453, |
|
"loss": 0.5733, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.23111140727996826, |
|
"rewards/margins": 0.32947883009910583, |
|
"rewards/rejected": -0.09836738556623459, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.007312536239624, |
|
"logits/rejected": -2.003988742828369, |
|
"logps/chosen": -32.68491744995117, |
|
"logps/rejected": -36.24928283691406, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.1681055724620819, |
|
"rewards/margins": 0.18515849113464355, |
|
"rewards/rejected": -0.01705293543636799, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8734614849090576, |
|
"logits/rejected": -1.8710591793060303, |
|
"logps/chosen": -33.96207809448242, |
|
"logps/rejected": -35.5266227722168, |
|
"loss": 0.6286, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.15842413902282715, |
|
"rewards/margins": 0.17382602393627167, |
|
"rewards/rejected": -0.015401872806251049, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8584483861923218, |
|
"logits/rejected": -1.8560174703598022, |
|
"logps/chosen": -34.1807975769043, |
|
"logps/rejected": -31.80449867248535, |
|
"loss": 0.6325, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.15176042914390564, |
|
"rewards/margins": 0.17497751116752625, |
|
"rewards/rejected": -0.02321707457304001, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9630708694458008, |
|
"logits/rejected": -1.9525701999664307, |
|
"logps/chosen": -35.00975036621094, |
|
"logps/rejected": -31.84867286682129, |
|
"loss": 0.5978, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2428235560655594, |
|
"rewards/margins": 0.24820086359977722, |
|
"rewards/rejected": -0.005377279128879309, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0586793422698975, |
|
"logits/rejected": -2.043766498565674, |
|
"logps/chosen": -30.7253360748291, |
|
"logps/rejected": -32.66551971435547, |
|
"loss": 0.6462, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.13915367424488068, |
|
"rewards/margins": 0.14873233437538147, |
|
"rewards/rejected": -0.009578653611242771, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9287078380584717, |
|
"logits/rejected": -1.92616868019104, |
|
"logps/chosen": -32.39979553222656, |
|
"logps/rejected": -30.87893295288086, |
|
"loss": 0.5468, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.36911940574645996, |
|
"rewards/margins": 0.4126061797142029, |
|
"rewards/rejected": -0.04348675161600113, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2277145385742188, |
|
"eval_logits/rejected": -2.222882032394409, |
|
"eval_logps/chosen": -34.06844711303711, |
|
"eval_logps/rejected": -37.586029052734375, |
|
"eval_loss": 0.6902604699134827, |
|
"eval_rewards/accuracies": 0.5191029906272888, |
|
"eval_rewards/chosen": -0.023726314306259155, |
|
"eval_rewards/margins": 0.024859989061951637, |
|
"eval_rewards/rejected": -0.04858630895614624, |
|
"eval_runtime": 145.7514, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9126994609832764, |
|
"logits/rejected": -1.909444808959961, |
|
"logps/chosen": -31.307331085205078, |
|
"logps/rejected": -33.79678726196289, |
|
"loss": 0.6022, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.20721367001533508, |
|
"rewards/margins": 0.24546091258525848, |
|
"rewards/rejected": -0.038247235119342804, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9636253118515015, |
|
"logits/rejected": -1.9514182806015015, |
|
"logps/chosen": -34.32239532470703, |
|
"logps/rejected": -33.65345764160156, |
|
"loss": 0.5933, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.19157564640045166, |
|
"rewards/margins": 0.27180662751197815, |
|
"rewards/rejected": -0.08023098856210709, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -1.9983371496200562, |
|
"logits/rejected": -1.9969165325164795, |
|
"logps/chosen": -33.176841735839844, |
|
"logps/rejected": -32.538509368896484, |
|
"loss": 0.5981, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.21626754105091095, |
|
"rewards/margins": 0.257517009973526, |
|
"rewards/rejected": -0.04124947637319565, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.085096836090088, |
|
"logits/rejected": -2.069387435913086, |
|
"logps/chosen": -33.758583068847656, |
|
"logps/rejected": -33.068748474121094, |
|
"loss": 0.5952, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.29456597566604614, |
|
"rewards/margins": 0.26006320118904114, |
|
"rewards/rejected": 0.034502796828746796, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9578405618667603, |
|
"logits/rejected": -1.9569900035858154, |
|
"logps/chosen": -32.835628509521484, |
|
"logps/rejected": -32.520538330078125, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.3125234544277191, |
|
"rewards/margins": 0.3512992262840271, |
|
"rewards/rejected": -0.038775794208049774, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.9134842157363892, |
|
"logits/rejected": -1.9237855672836304, |
|
"logps/chosen": -31.848628997802734, |
|
"logps/rejected": -35.309120178222656, |
|
"loss": 0.5923, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.24924305081367493, |
|
"rewards/margins": 0.2634957432746887, |
|
"rewards/rejected": -0.014252680353820324, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.052818775177002, |
|
"logits/rejected": -2.046323776245117, |
|
"logps/chosen": -33.34727096557617, |
|
"logps/rejected": -29.256671905517578, |
|
"loss": 0.5993, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.21328690648078918, |
|
"rewards/margins": 0.23445896804332733, |
|
"rewards/rejected": -0.021172069013118744, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.9116294384002686, |
|
"logits/rejected": -1.913830041885376, |
|
"logps/chosen": -33.83928680419922, |
|
"logps/rejected": -30.931400299072266, |
|
"loss": 0.5714, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.27983543276786804, |
|
"rewards/margins": 0.3209769129753113, |
|
"rewards/rejected": -0.04114149510860443, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6291831790626823, |
|
"train_runtime": 3251.1508, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|