|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.866280436515808, |
|
"logits/rejected": -1.8705918788909912, |
|
"logps/chosen": -37.00367736816406, |
|
"logps/rejected": -33.67123794555664, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.000584296474698931, |
|
"rewards/margins": 0.008817334659397602, |
|
"rewards/rejected": -0.00823303870856762, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.9974257946014404, |
|
"logits/rejected": -2.0000691413879395, |
|
"logps/chosen": -29.649478912353516, |
|
"logps/rejected": -29.038330078125, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.0014564015436917543, |
|
"rewards/margins": -0.0064794206991791725, |
|
"rewards/rejected": 0.0050230189226567745, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9199994802474976, |
|
"logits/rejected": -1.91730535030365, |
|
"logps/chosen": -31.410457611083984, |
|
"logps/rejected": -33.23019027709961, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0011272300034761429, |
|
"rewards/margins": 0.0034732469357550144, |
|
"rewards/rejected": -0.0023460157681256533, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.017059326171875, |
|
"logits/rejected": -2.008305788040161, |
|
"logps/chosen": -32.582275390625, |
|
"logps/rejected": -32.48918914794922, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0010934959864243865, |
|
"rewards/margins": -0.005789449438452721, |
|
"rewards/rejected": 0.004695953335613012, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.8646684885025024, |
|
"logits/rejected": -1.853882074356079, |
|
"logps/chosen": -33.567169189453125, |
|
"logps/rejected": -35.43851852416992, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.0019382357131689787, |
|
"rewards/margins": -0.00470335315912962, |
|
"rewards/rejected": 0.0027651176787912846, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.9455400705337524, |
|
"logits/rejected": -1.947479486465454, |
|
"logps/chosen": -32.57770919799805, |
|
"logps/rejected": -33.181541442871094, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004494256805628538, |
|
"rewards/margins": 0.006526687648147345, |
|
"rewards/rejected": -0.00203243107534945, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.079427719116211, |
|
"logits/rejected": -2.084414482116699, |
|
"logps/chosen": -33.99236297607422, |
|
"logps/rejected": -36.600887298583984, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.0007750942604616284, |
|
"rewards/margins": 0.005627653561532497, |
|
"rewards/rejected": -0.006402746774256229, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.9420673847198486, |
|
"logits/rejected": -1.9452102184295654, |
|
"logps/chosen": -34.41423416137695, |
|
"logps/rejected": -34.5662727355957, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.0027157063595950603, |
|
"rewards/margins": -0.0025114950258284807, |
|
"rewards/rejected": 0.00522720068693161, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.951348066329956, |
|
"logits/rejected": -1.9558594226837158, |
|
"logps/chosen": -32.46399688720703, |
|
"logps/rejected": -32.344329833984375, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.0006849506171420217, |
|
"rewards/margins": -0.0022309008054435253, |
|
"rewards/rejected": 0.002915852004662156, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.0500051975250244, |
|
"logits/rejected": -2.0480096340179443, |
|
"logps/chosen": -32.25367736816406, |
|
"logps/rejected": -31.2783260345459, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.002567791845649481, |
|
"rewards/margins": -0.00014422755339182913, |
|
"rewards/rejected": -0.0024235642049461603, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.244023323059082, |
|
"eval_logits/rejected": -2.2391417026519775, |
|
"eval_logps/chosen": -34.02798080444336, |
|
"eval_logps/rejected": -37.506290435791016, |
|
"eval_loss": 0.500300407409668, |
|
"eval_rewards/accuracies": 0.49335551261901855, |
|
"eval_rewards/chosen": 0.0013138726353645325, |
|
"eval_rewards/margins": -0.0007514380267821252, |
|
"eval_rewards/rejected": 0.0020653100218623877, |
|
"eval_runtime": 146.2282, |
|
"eval_samples_per_second": 2.346, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.0061328411102295, |
|
"logits/rejected": -2.0037178993225098, |
|
"logps/chosen": -33.24314880371094, |
|
"logps/rejected": -34.023292541503906, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0001343002513749525, |
|
"rewards/margins": -0.001081160269677639, |
|
"rewards/rejected": 0.0012154604773968458, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.0173094272613525, |
|
"logits/rejected": -2.008920431137085, |
|
"logps/chosen": -32.45183563232422, |
|
"logps/rejected": -32.17412185668945, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0013863157946616411, |
|
"rewards/margins": 0.0011368464911356568, |
|
"rewards/rejected": -0.002523162867873907, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0478129386901855, |
|
"logits/rejected": -2.0397789478302, |
|
"logps/chosen": -30.514944076538086, |
|
"logps/rejected": -32.053070068359375, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.007845849730074406, |
|
"rewards/margins": -0.007833347655832767, |
|
"rewards/rejected": -1.2502726349339355e-05, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.978299856185913, |
|
"logits/rejected": -1.9885809421539307, |
|
"logps/chosen": -31.413021087646484, |
|
"logps/rejected": -32.54629135131836, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0013283130247145891, |
|
"rewards/margins": 0.0036107772029936314, |
|
"rewards/rejected": -0.002282463712617755, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.8922052383422852, |
|
"logits/rejected": -1.8932926654815674, |
|
"logps/chosen": -34.210052490234375, |
|
"logps/rejected": -34.78533935546875, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.003472150769084692, |
|
"rewards/margins": 0.0021447453182190657, |
|
"rewards/rejected": -0.005616897251456976, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.9435640573501587, |
|
"logits/rejected": -1.9400733709335327, |
|
"logps/chosen": -36.18402862548828, |
|
"logps/rejected": -32.75020217895508, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0024705410469323397, |
|
"rewards/margins": 0.0030191238038241863, |
|
"rewards/rejected": -0.00548966508358717, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.043527603149414, |
|
"logits/rejected": -2.0361220836639404, |
|
"logps/chosen": -33.81038284301758, |
|
"logps/rejected": -31.369457244873047, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.002503907773643732, |
|
"rewards/margins": 0.0010178396478295326, |
|
"rewards/rejected": -0.0035217474214732647, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0494205951690674, |
|
"logits/rejected": -2.0546982288360596, |
|
"logps/chosen": -32.51402282714844, |
|
"logps/rejected": -32.5015983581543, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.0061522433534264565, |
|
"rewards/margins": 0.009443378075957298, |
|
"rewards/rejected": -0.003291133791208267, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.0493359565734863, |
|
"logits/rejected": -2.046536922454834, |
|
"logps/chosen": -31.47686195373535, |
|
"logps/rejected": -31.326452255249023, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.002418341813609004, |
|
"rewards/margins": 0.004066127352416515, |
|
"rewards/rejected": -0.0016477858880534768, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9191436767578125, |
|
"logits/rejected": -1.9238201379776, |
|
"logps/chosen": -31.60672378540039, |
|
"logps/rejected": -32.80228805541992, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.77844140632078e-05, |
|
"rewards/margins": 0.002183270873501897, |
|
"rewards/rejected": -0.0022710547782480717, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.2449820041656494, |
|
"eval_logits/rejected": -2.2400975227355957, |
|
"eval_logps/chosen": -34.03920364379883, |
|
"eval_logps/rejected": -37.51057434082031, |
|
"eval_loss": 0.5004644989967346, |
|
"eval_rewards/accuracies": 0.5020764470100403, |
|
"eval_rewards/chosen": -0.0009311072644777596, |
|
"eval_rewards/margins": -0.002139872871339321, |
|
"eval_rewards/rejected": 0.0012087655486539006, |
|
"eval_runtime": 146.0512, |
|
"eval_samples_per_second": 2.348, |
|
"eval_steps_per_second": 0.294, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.03255033493042, |
|
"logits/rejected": -2.0432262420654297, |
|
"logps/chosen": -31.951080322265625, |
|
"logps/rejected": -33.896484375, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004044383764266968, |
|
"rewards/margins": 0.010801524855196476, |
|
"rewards/rejected": -0.006757141090929508, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.9258638620376587, |
|
"logits/rejected": -1.9407155513763428, |
|
"logps/chosen": -30.115543365478516, |
|
"logps/rejected": -31.575191497802734, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0008008191362023354, |
|
"rewards/margins": 0.002602284774184227, |
|
"rewards/rejected": -0.003403103444725275, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.983538269996643, |
|
"logits/rejected": -1.9875080585479736, |
|
"logps/chosen": -33.397071838378906, |
|
"logps/rejected": -31.56662940979004, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.004079356789588928, |
|
"rewards/margins": 0.007544734515249729, |
|
"rewards/rejected": -0.0034653779584914446, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.9831326007843018, |
|
"logits/rejected": -1.9611790180206299, |
|
"logps/chosen": -34.164363861083984, |
|
"logps/rejected": -34.951324462890625, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.0021236296743154526, |
|
"rewards/margins": -0.0020176086109131575, |
|
"rewards/rejected": -0.00010602096881484613, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.0248496532440186, |
|
"logits/rejected": -2.021538734436035, |
|
"logps/chosen": -32.92078399658203, |
|
"logps/rejected": -36.21001434326172, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.0008573724189773202, |
|
"rewards/margins": -0.002124571241438389, |
|
"rewards/rejected": 0.002981943776831031, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.8912862539291382, |
|
"logits/rejected": -1.8888485431671143, |
|
"logps/chosen": -34.20059585571289, |
|
"logps/rejected": -35.51679992675781, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0024391734041273594, |
|
"rewards/margins": -3.7313438951969147e-06, |
|
"rewards/rejected": -0.002435441594570875, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.876117467880249, |
|
"logits/rejected": -1.8735599517822266, |
|
"logps/chosen": -34.39899444580078, |
|
"logps/rejected": -31.73566246032715, |
|
"loss": 0.5014, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.00027935029356740415, |
|
"rewards/margins": -0.007413160987198353, |
|
"rewards/rejected": 0.007133810315281153, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.9805666208267212, |
|
"logits/rejected": -1.9699329137802124, |
|
"logps/chosen": -35.328285217285156, |
|
"logps/rejected": -31.84103012084961, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.005670648999512196, |
|
"rewards/margins": 0.0056780558079481125, |
|
"rewards/rejected": -7.406389158859383e-06, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.0757369995117188, |
|
"logits/rejected": -2.06070613861084, |
|
"logps/chosen": -30.9174861907959, |
|
"logps/rejected": -32.63935470581055, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.001327360630966723, |
|
"rewards/margins": -0.0011682776967063546, |
|
"rewards/rejected": 0.0024956378620117903, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.947251319885254, |
|
"logits/rejected": -1.9447133541107178, |
|
"logps/chosen": -32.908634185791016, |
|
"logps/rejected": -30.82659912109375, |
|
"loss": 0.499, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0036955091636627913, |
|
"rewards/margins": 0.005653515458106995, |
|
"rewards/rejected": -0.001958005130290985, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.2445528507232666, |
|
"eval_logits/rejected": -2.23966383934021, |
|
"eval_logps/chosen": -34.04175567626953, |
|
"eval_logps/rejected": -37.499141693115234, |
|
"eval_loss": 0.5009724497795105, |
|
"eval_rewards/accuracies": 0.4431063234806061, |
|
"eval_rewards/chosen": -0.0014407250564545393, |
|
"eval_rewards/margins": -0.004937068559229374, |
|
"eval_rewards/rejected": 0.003496343968436122, |
|
"eval_runtime": 145.9859, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.9292316436767578, |
|
"logits/rejected": -1.9259681701660156, |
|
"logps/chosen": -31.583276748657227, |
|
"logps/rejected": -33.727840423583984, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.004015020560473204, |
|
"rewards/margins": 0.001152882701717317, |
|
"rewards/rejected": 0.002862137509509921, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9808372259140015, |
|
"logits/rejected": -1.9685356616973877, |
|
"logps/chosen": -34.57278823852539, |
|
"logps/rejected": -33.57910919189453, |
|
"loss": 0.498, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.004657471086829901, |
|
"rewards/margins": 0.01271037757396698, |
|
"rewards/rejected": -0.008052906021475792, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.0161705017089844, |
|
"logits/rejected": -2.0147035121917725, |
|
"logps/chosen": -33.47340393066406, |
|
"logps/rejected": -32.46953582763672, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0024778605438768864, |
|
"rewards/margins": 0.00046821607975289226, |
|
"rewards/rejected": 0.0020096441730856895, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.1033377647399902, |
|
"logits/rejected": -2.0875496864318848, |
|
"logps/chosen": -34.152557373046875, |
|
"logps/rejected": -33.08795166015625, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005367305129766464, |
|
"rewards/margins": -0.0006489218212664127, |
|
"rewards/rejected": 0.006016227416694164, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.9754228591918945, |
|
"logits/rejected": -1.974477767944336, |
|
"logps/chosen": -33.24272537231445, |
|
"logps/rejected": -32.46410369873047, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.007874632254242897, |
|
"rewards/margins": 0.007666703313589096, |
|
"rewards/rejected": 0.00020792819850612432, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.9320882558822632, |
|
"logits/rejected": -1.9424550533294678, |
|
"logps/chosen": -32.212371826171875, |
|
"logps/rejected": -35.286354064941406, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.0015360517427325249, |
|
"rewards/margins": -0.002017115242779255, |
|
"rewards/rejected": 0.00048106274334713817, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.070864200592041, |
|
"logits/rejected": -2.0643177032470703, |
|
"logps/chosen": -33.652870178222656, |
|
"logps/rejected": -29.220972061157227, |
|
"loss": 0.5004, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00018037435074802488, |
|
"rewards/margins": -0.001271072425879538, |
|
"rewards/rejected": 0.001090698060579598, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.9306039810180664, |
|
"logits/rejected": -1.932790756225586, |
|
"logps/chosen": -34.26443862915039, |
|
"logps/rejected": -30.895788192749023, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.005078143440186977, |
|
"rewards/margins": -0.00044635325320996344, |
|
"rewards/rejected": -0.004631790332496166, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4997496530607149, |
|
"train_runtime": 3257.6696, |
|
"train_samples_per_second": 0.945, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|