|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.8662465810775757, |
|
"logits/rejected": -1.8705615997314453, |
|
"logps/chosen": -36.9855842590332, |
|
"logps/rejected": -33.65031433105469, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": 0.016807515174150467, |
|
"rewards/margins": 0.03299880772829056, |
|
"rewards/rejected": -0.01619129255414009, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.9972314834594727, |
|
"logits/rejected": -1.999875783920288, |
|
"logps/chosen": -29.622329711914062, |
|
"logps/rejected": -29.04340171813965, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.01589435711503029, |
|
"rewards/margins": -0.00013793967082165182, |
|
"rewards/rejected": 0.016032297164201736, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9198119640350342, |
|
"logits/rejected": -1.9171171188354492, |
|
"logps/chosen": -31.40401268005371, |
|
"logps/rejected": -33.211997985839844, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0096666868776083, |
|
"rewards/margins": 0.0044937655329704285, |
|
"rewards/rejected": 0.005172918550670147, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.0162081718444824, |
|
"logits/rejected": -2.007472515106201, |
|
"logps/chosen": -32.587196350097656, |
|
"logps/rejected": -32.514732360839844, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.00831048097461462, |
|
"rewards/margins": -0.00666379788890481, |
|
"rewards/rejected": -0.0016466856468468904, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.8641865253448486, |
|
"logits/rejected": -1.8533992767333984, |
|
"logps/chosen": -33.56541061401367, |
|
"logps/rejected": -35.421974182128906, |
|
"loss": 0.5049, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.006343575660139322, |
|
"rewards/margins": -0.030638951808214188, |
|
"rewards/rejected": 0.02429538033902645, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.945642113685608, |
|
"logits/rejected": -1.9475781917572021, |
|
"logps/chosen": -32.572914123535156, |
|
"logps/rejected": -33.161590576171875, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.021815191954374313, |
|
"rewards/margins": 0.01398499310016632, |
|
"rewards/rejected": 0.007830199785530567, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.0798556804656982, |
|
"logits/rejected": -2.0848286151885986, |
|
"logps/chosen": -33.97870635986328, |
|
"logps/rejected": -36.580543518066406, |
|
"loss": 0.4965, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.007821302860975266, |
|
"rewards/margins": 0.017155062407255173, |
|
"rewards/rejected": -0.009333762340247631, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.9424225091934204, |
|
"logits/rejected": -1.9455715417861938, |
|
"logps/chosen": -34.389671325683594, |
|
"logps/rejected": -34.575042724609375, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.030510548502206802, |
|
"rewards/margins": 0.016619805246591568, |
|
"rewards/rejected": 0.013890743255615234, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.9505561590194702, |
|
"logits/rejected": -1.9550676345825195, |
|
"logps/chosen": -32.456565856933594, |
|
"logps/rejected": -32.361209869384766, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.008682352490723133, |
|
"rewards/margins": 0.0105207534506917, |
|
"rewards/rejected": -0.0018384016584604979, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.048992156982422, |
|
"logits/rejected": -2.0470006465911865, |
|
"logps/chosen": -32.2261848449707, |
|
"logps/rejected": -31.262670516967773, |
|
"loss": 0.4981, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.01171906292438507, |
|
"rewards/margins": 0.008886159397661686, |
|
"rewards/rejected": 0.0028329025954008102, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.244107484817505, |
|
"eval_logits/rejected": -2.239222764968872, |
|
"eval_logps/chosen": -34.02288818359375, |
|
"eval_logps/rejected": -37.50926208496094, |
|
"eval_loss": 0.49965521693229675, |
|
"eval_rewards/accuracies": 0.5078904032707214, |
|
"eval_rewards/chosen": 0.009330343455076218, |
|
"eval_rewards/margins": 0.003444999223574996, |
|
"eval_rewards/rejected": 0.005885345861315727, |
|
"eval_runtime": 146.2405, |
|
"eval_samples_per_second": 2.345, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.005356788635254, |
|
"logits/rejected": -2.0029444694519043, |
|
"logps/chosen": -33.24960708618164, |
|
"logps/rejected": -34.023834228515625, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.004633937496691942, |
|
"rewards/margins": -0.009062351658940315, |
|
"rewards/rejected": 0.00442841649055481, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.017059564590454, |
|
"logits/rejected": -2.008686065673828, |
|
"logps/chosen": -32.44651412963867, |
|
"logps/rejected": -32.1725959777832, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0012888375204056501, |
|
"rewards/margins": 0.007584270089864731, |
|
"rewards/rejected": -0.008873110637068748, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0462288856506348, |
|
"logits/rejected": -2.038172960281372, |
|
"logps/chosen": -30.503637313842773, |
|
"logps/rejected": -32.05141067504883, |
|
"loss": 0.5043, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -0.022338179871439934, |
|
"rewards/margins": -0.023616474121809006, |
|
"rewards/rejected": 0.0012782930862158537, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.9769847393035889, |
|
"logits/rejected": -1.9872528314590454, |
|
"logps/chosen": -31.388320922851562, |
|
"logps/rejected": -32.554039001464844, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.02507348358631134, |
|
"rewards/margins": 0.04039759188890457, |
|
"rewards/rejected": -0.015324106439948082, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.8903350830078125, |
|
"logits/rejected": -1.8914152383804321, |
|
"logps/chosen": -34.154296875, |
|
"logps/rejected": -34.76646423339844, |
|
"loss": 0.4927, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.030716974288225174, |
|
"rewards/margins": 0.03808692842721939, |
|
"rewards/rejected": -0.007369957864284515, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.9426672458648682, |
|
"logits/rejected": -1.939186453819275, |
|
"logps/chosen": -36.157989501953125, |
|
"logps/rejected": -32.7253303527832, |
|
"loss": 0.4972, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.010952227748930454, |
|
"rewards/margins": 0.013016450218856335, |
|
"rewards/rejected": -0.0020642229355871677, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.0421078205108643, |
|
"logits/rejected": -2.0347187519073486, |
|
"logps/chosen": -33.78765106201172, |
|
"logps/rejected": -31.363611221313477, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.008164674043655396, |
|
"rewards/margins": 0.0175738874822855, |
|
"rewards/rejected": -0.009409213438630104, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0476274490356445, |
|
"logits/rejected": -2.0528929233551025, |
|
"logps/chosen": -32.52192306518555, |
|
"logps/rejected": -32.50886917114258, |
|
"loss": 0.4919, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.01828894577920437, |
|
"rewards/margins": 0.037272557616233826, |
|
"rewards/rejected": -0.018983609974384308, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.0479583740234375, |
|
"logits/rejected": -2.0451717376708984, |
|
"logps/chosen": -31.4959774017334, |
|
"logps/rejected": -31.333343505859375, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.005620955489575863, |
|
"rewards/margins": 0.006484903395175934, |
|
"rewards/rejected": -0.012105859816074371, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9188188314437866, |
|
"logits/rejected": -1.9234987497329712, |
|
"logps/chosen": -31.607952117919922, |
|
"logps/rejected": -32.79724884033203, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -0.0013334359973669052, |
|
"rewards/margins": 0.0037218607030808926, |
|
"rewards/rejected": -0.005055299494415522, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.243473768234253, |
|
"eval_logits/rejected": -2.2385945320129395, |
|
"eval_logps/chosen": -34.013954162597656, |
|
"eval_logps/rejected": -37.49945068359375, |
|
"eval_loss": 0.49993959069252014, |
|
"eval_rewards/accuracies": 0.5074750781059265, |
|
"eval_rewards/chosen": 0.016473928466439247, |
|
"eval_rewards/margins": 0.0027353279292583466, |
|
"eval_rewards/rejected": 0.0137386005371809, |
|
"eval_runtime": 145.8931, |
|
"eval_samples_per_second": 2.351, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.0315659046173096, |
|
"logits/rejected": -2.0422449111938477, |
|
"logps/chosen": -31.945932388305664, |
|
"logps/rejected": -33.85708999633789, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.020296860486268997, |
|
"rewards/margins": 0.015811126679182053, |
|
"rewards/rejected": 0.0044857352040708065, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.9255174398422241, |
|
"logits/rejected": -1.9403587579727173, |
|
"logps/chosen": -30.07940101623535, |
|
"logps/rejected": -31.557880401611328, |
|
"loss": 0.4944, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.02570931240916252, |
|
"rewards/margins": 0.02547053061425686, |
|
"rewards/rejected": 0.00023878086358308792, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.9828882217407227, |
|
"logits/rejected": -1.986853003501892, |
|
"logps/chosen": -33.40575408935547, |
|
"logps/rejected": -31.552501678466797, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00937105156481266, |
|
"rewards/margins": 0.011929656378924847, |
|
"rewards/rejected": -0.0025586034171283245, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.982568383216858, |
|
"logits/rejected": -1.9606094360351562, |
|
"logps/chosen": -34.157630920410156, |
|
"logps/rejected": -34.96028518676758, |
|
"loss": 0.4991, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.00310176657512784, |
|
"rewards/margins": 0.004490714054554701, |
|
"rewards/rejected": -0.007592480629682541, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.024078845977783, |
|
"logits/rejected": -2.0207676887512207, |
|
"logps/chosen": -32.89586639404297, |
|
"logps/rejected": -36.22296905517578, |
|
"loss": 0.4952, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.023358644917607307, |
|
"rewards/margins": 0.02179408073425293, |
|
"rewards/rejected": 0.0015645644161850214, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.8911396265029907, |
|
"logits/rejected": -1.8886913061141968, |
|
"logps/chosen": -34.1867790222168, |
|
"logps/rejected": -35.52009201049805, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0012989563401788473, |
|
"rewards/margins": 0.013674641028046608, |
|
"rewards/rejected": -0.012375684455037117, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.8761117458343506, |
|
"logits/rejected": -1.8735707998275757, |
|
"logps/chosen": -34.38152313232422, |
|
"logps/rejected": -31.744131088256836, |
|
"loss": 0.5019, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.012860517017543316, |
|
"rewards/margins": -0.008900386281311512, |
|
"rewards/rejected": 0.02176090143620968, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.979835867881775, |
|
"logits/rejected": -1.9692022800445557, |
|
"logps/chosen": -35.314170837402344, |
|
"logps/rejected": -31.835962295532227, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.033973388373851776, |
|
"rewards/margins": 0.029950443655252457, |
|
"rewards/rejected": 0.004022946115583181, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.075986385345459, |
|
"logits/rejected": -2.0609803199768066, |
|
"logps/chosen": -30.91201400756836, |
|
"logps/rejected": -32.63774871826172, |
|
"loss": 0.5005, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.009686904028058052, |
|
"rewards/margins": -0.0015827339375391603, |
|
"rewards/rejected": 0.011269642040133476, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.9475170373916626, |
|
"logits/rejected": -1.944972038269043, |
|
"logps/chosen": -32.88249969482422, |
|
"logps/rejected": -30.8377685546875, |
|
"loss": 0.4901, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.03568952530622482, |
|
"rewards/margins": 0.052456192672252655, |
|
"rewards/rejected": -0.01676667109131813, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.244415283203125, |
|
"eval_logits/rejected": -2.2395384311676025, |
|
"eval_logps/chosen": -34.01211166381836, |
|
"eval_logps/rejected": -37.495113372802734, |
|
"eval_loss": 0.5000770092010498, |
|
"eval_rewards/accuracies": 0.5045680999755859, |
|
"eval_rewards/chosen": 0.01795424334704876, |
|
"eval_rewards/margins": 0.0007461770437657833, |
|
"eval_rewards/rejected": 0.01720806397497654, |
|
"eval_runtime": 145.9415, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.9293571710586548, |
|
"logits/rejected": -1.926099419593811, |
|
"logps/chosen": -31.5543212890625, |
|
"logps/rejected": -33.74175262451172, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.039222296327352524, |
|
"rewards/margins": 0.03890404850244522, |
|
"rewards/rejected": 0.00031825463520362973, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9810116291046143, |
|
"logits/rejected": -1.9687116146087646, |
|
"logps/chosen": -34.56316375732422, |
|
"logps/rejected": -33.561100006103516, |
|
"loss": 0.4923, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.026327725499868393, |
|
"rewards/margins": 0.04413483291864395, |
|
"rewards/rejected": -0.01780710555613041, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.0162246227264404, |
|
"logits/rejected": -2.014770269393921, |
|
"logps/chosen": -33.477134704589844, |
|
"logps/rejected": -32.470088958740234, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.006926077418029308, |
|
"rewards/margins": -0.0006712455069646239, |
|
"rewards/rejected": 0.007597323507070541, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.103529214859009, |
|
"logits/rejected": -2.0877299308776855, |
|
"logps/chosen": -34.1456298828125, |
|
"logps/rejected": -33.083770751953125, |
|
"loss": 0.5012, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.027009794488549232, |
|
"rewards/margins": -0.0004009060503449291, |
|
"rewards/rejected": 0.02741069719195366, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.9753166437149048, |
|
"logits/rejected": -1.974352478981018, |
|
"logps/chosen": -33.25692367553711, |
|
"logps/rejected": -32.45876693725586, |
|
"loss": 0.4973, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.02013799361884594, |
|
"rewards/margins": 0.015033388510346413, |
|
"rewards/rejected": 0.005104603711515665, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.931349515914917, |
|
"logits/rejected": -1.941706895828247, |
|
"logps/chosen": -32.206031799316406, |
|
"logps/rejected": -35.306983947753906, |
|
"loss": 0.4989, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.001073227496817708, |
|
"rewards/margins": 0.013507463037967682, |
|
"rewards/rejected": -0.01458069123327732, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.070504665374756, |
|
"logits/rejected": -2.0639472007751465, |
|
"logps/chosen": -33.65058135986328, |
|
"logps/rejected": -29.202342987060547, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.0011108577018603683, |
|
"rewards/margins": -0.018153894692659378, |
|
"rewards/rejected": 0.01926475390791893, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.9301570653915405, |
|
"logits/rejected": -1.9323084354400635, |
|
"logps/chosen": -34.23206329345703, |
|
"logps/rejected": -30.903820037841797, |
|
"loss": 0.4945, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005591380409896374, |
|
"rewards/margins": 0.03054152801632881, |
|
"rewards/rejected": -0.024950148537755013, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.10985468208015739, |
|
"train_runtime": 628.1848, |
|
"train_samples_per_second": 4.901, |
|
"train_steps_per_second": 0.613 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|