|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-08, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.2820512820512818e-07, |
|
"logits/chosen": -1.8662705421447754, |
|
"logits/rejected": -1.870587706565857, |
|
"logps/chosen": -36.9873046875, |
|
"logps/rejected": -33.66048049926758, |
|
"loss": 0.4974, |
|
"rewards/accuracies": 0.5138888955116272, |
|
"rewards/chosen": 0.005787754897028208, |
|
"rewards/margins": 0.014910436235368252, |
|
"rewards/rejected": -0.00912268366664648, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.5641025641025636e-07, |
|
"logits/chosen": -1.997283935546875, |
|
"logits/rejected": -1.9999357461929321, |
|
"logps/chosen": -29.62722396850586, |
|
"logps/rejected": -29.066070556640625, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.004491320811212063, |
|
"rewards/margins": 0.005279188044369221, |
|
"rewards/rejected": -0.0007878671167418361, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.8461538461538463e-07, |
|
"logits/chosen": -1.9196703433990479, |
|
"logits/rejected": -1.9169772863388062, |
|
"logps/chosen": -31.424610137939453, |
|
"logps/rejected": -33.22700119018555, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.0025542874354869127, |
|
"rewards/margins": 8.203647666960023e-06, |
|
"rewards/rejected": -0.002562491921707988, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438433e-07, |
|
"logits/chosen": -2.0166847705841064, |
|
"logits/rejected": -2.007936954498291, |
|
"logps/chosen": -32.587745666503906, |
|
"logps/rejected": -32.51628875732422, |
|
"loss": 0.5006, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0032799947075545788, |
|
"rewards/margins": -0.002195248380303383, |
|
"rewards/rejected": -0.0010847460944205523, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542186e-07, |
|
"logits/chosen": -1.8643096685409546, |
|
"logits/rejected": -1.853522539138794, |
|
"logps/chosen": -33.55935287475586, |
|
"logps/rejected": -35.42003631591797, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0005626566708087921, |
|
"rewards/margins": -0.010254684835672379, |
|
"rewards/rejected": 0.009692028164863586, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941118e-07, |
|
"logits/chosen": -1.9451143741607666, |
|
"logits/rejected": -1.9470453262329102, |
|
"logps/chosen": -32.59963607788086, |
|
"logps/rejected": -33.16991424560547, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.00016471892013214529, |
|
"rewards/margins": -0.0002745899255387485, |
|
"rewards/rejected": 0.00043930913670919836, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413548e-07, |
|
"logits/chosen": -2.080049991607666, |
|
"logits/rejected": -2.085019588470459, |
|
"logps/chosen": -33.990013122558594, |
|
"logps/rejected": -36.58393478393555, |
|
"loss": 0.4992, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.00045763104571960866, |
|
"rewards/margins": 0.004060628358274698, |
|
"rewards/rejected": -0.004518259782344103, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-07, |
|
"logits/chosen": -1.943108320236206, |
|
"logits/rejected": -1.9462664127349854, |
|
"logps/chosen": -34.41640090942383, |
|
"logps/rejected": -34.587799072265625, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0034226563293486834, |
|
"rewards/margins": 0.002040152670815587, |
|
"rewards/rejected": 0.0013825036585330963, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.736716601303429e-07, |
|
"logits/chosen": -1.9505999088287354, |
|
"logits/rejected": -1.955100655555725, |
|
"logps/chosen": -32.46385955810547, |
|
"logps/rejected": -32.354026794433594, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.0010680232662707567, |
|
"rewards/margins": -0.00039678759640082717, |
|
"rewards/rejected": 0.0014648116193711758, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.62624545834521e-07, |
|
"logits/chosen": -2.0487732887268066, |
|
"logits/rejected": -2.0467641353607178, |
|
"logps/chosen": -32.22932052612305, |
|
"logps/rejected": -31.277706146240234, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.003454477759078145, |
|
"rewards/margins": 0.0069038658402860165, |
|
"rewards/rejected": -0.003449387848377228, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2437498569488525, |
|
"eval_logits/rejected": -2.238868474960327, |
|
"eval_logps/chosen": -34.018192291259766, |
|
"eval_logps/rejected": -37.51003646850586, |
|
"eval_loss": 0.49943608045578003, |
|
"eval_rewards/accuracies": 0.5074750781059265, |
|
"eval_rewards/chosen": 0.0049087232910096645, |
|
"eval_rewards/margins": 0.00293393200263381, |
|
"eval_rewards/rejected": 0.001974791055545211, |
|
"eval_runtime": 146.1062, |
|
"eval_samples_per_second": 2.348, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.4982572012636904e-07, |
|
"logits/chosen": -2.005328893661499, |
|
"logits/rejected": -2.0029098987579346, |
|
"logps/chosen": -33.22368240356445, |
|
"logps/rejected": -34.016380310058594, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006039897445589304, |
|
"rewards/margins": 0.0021425553131848574, |
|
"rewards/rejected": 0.0038973423652350903, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777677e-07, |
|
"logits/chosen": -2.0170254707336426, |
|
"logits/rejected": -2.0086662769317627, |
|
"logps/chosen": -32.444522857666016, |
|
"logps/rejected": -32.190677642822266, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00011485694267321378, |
|
"rewards/margins": 0.008865321055054665, |
|
"rewards/rejected": -0.008750463835895061, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.194082707715275e-07, |
|
"logits/chosen": -2.0464835166931152, |
|
"logits/rejected": -2.0384185314178467, |
|
"logps/chosen": -30.49222183227539, |
|
"logps/rejected": -32.05754852294922, |
|
"loss": 0.5009, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -0.004951108247041702, |
|
"rewards/margins": -0.0035899754147976637, |
|
"rewards/rejected": -0.0013611322501674294, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020402418666621e-07, |
|
"logits/chosen": -1.9772565364837646, |
|
"logits/rejected": -1.987534761428833, |
|
"logps/chosen": -31.39870262145996, |
|
"logps/rejected": -32.54944610595703, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.0062883696518838406, |
|
"rewards/margins": 0.01065803598612547, |
|
"rewards/rejected": -0.004369667265564203, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.8341962650351185e-07, |
|
"logits/chosen": -1.8916442394256592, |
|
"logits/rejected": -1.8927291631698608, |
|
"logps/chosen": -34.21247863769531, |
|
"logps/rejected": -34.76598358154297, |
|
"loss": 0.501, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.005936207715421915, |
|
"rewards/margins": -0.0033174168784171343, |
|
"rewards/rejected": -0.0026187908370047808, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800572e-07, |
|
"logits/chosen": -1.9432052373886108, |
|
"logits/rejected": -1.9397189617156982, |
|
"logps/chosen": -36.16191864013672, |
|
"logps/rejected": -32.74675750732422, |
|
"loss": 0.4985, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.002928206929937005, |
|
"rewards/margins": 0.010129809379577637, |
|
"rewards/rejected": -0.007201602216809988, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.430433172111807e-07, |
|
"logits/chosen": -2.042198896408081, |
|
"logits/rejected": -2.034799098968506, |
|
"logps/chosen": -33.776123046875, |
|
"logps/rejected": -31.36188316345215, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.0065220496617257595, |
|
"rewards/margins": 0.009532475844025612, |
|
"rewards/rejected": -0.003010427113622427, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.216202642830543e-07, |
|
"logits/chosen": -2.0474331378936768, |
|
"logits/rejected": -2.0527079105377197, |
|
"logps/chosen": -32.522254943847656, |
|
"logps/rejected": -32.51903533935547, |
|
"loss": 0.497, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.006759388837963343, |
|
"rewards/margins": 0.016926825046539307, |
|
"rewards/rejected": -0.010167436674237251, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.9960716642946403e-07, |
|
"logits/chosen": -2.0483100414276123, |
|
"logits/rejected": -2.0455245971679688, |
|
"logps/chosen": -31.49411964416504, |
|
"logps/rejected": -31.317398071289062, |
|
"loss": 0.5003, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0015499559231102467, |
|
"rewards/margins": -0.0017946911975741386, |
|
"rewards/rejected": 0.00024473536177538335, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.771853789806683e-07, |
|
"logits/chosen": -1.9187097549438477, |
|
"logits/rejected": -1.9233875274658203, |
|
"logps/chosen": -31.572490692138672, |
|
"logps/rejected": -32.78968811035156, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01013648696243763, |
|
"rewards/margins": 0.009763057343661785, |
|
"rewards/rejected": 0.0003734306083060801, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.243237257003784, |
|
"eval_logits/rejected": -2.238358736038208, |
|
"eval_logps/chosen": -34.021446228027344, |
|
"eval_logps/rejected": -37.5062255859375, |
|
"eval_loss": 0.499795526266098, |
|
"eval_rewards/accuracies": 0.4862956702709198, |
|
"eval_rewards/chosen": 0.003931538667529821, |
|
"eval_rewards/margins": 0.0008117269026115537, |
|
"eval_rewards/rejected": 0.003119812114164233, |
|
"eval_runtime": 145.9975, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402e-07, |
|
"logits/chosen": -2.0315887928009033, |
|
"logits/rejected": -2.042264461517334, |
|
"logps/chosen": -31.964473724365234, |
|
"logps/rejected": -33.852508544921875, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.0020490700844675303, |
|
"rewards/margins": -0.001008716062642634, |
|
"rewards/rejected": 0.003057786263525486, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.318564697655179e-07, |
|
"logits/chosen": -1.925210952758789, |
|
"logits/rejected": -1.9400602579116821, |
|
"logps/chosen": -30.0924129486084, |
|
"logps/rejected": -31.5645694732666, |
|
"loss": 0.4987, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0057382904924452305, |
|
"rewards/margins": 0.0076560890302062035, |
|
"rewards/rejected": -0.0019177987705916166, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.093227910899832e-07, |
|
"logits/chosen": -1.98291015625, |
|
"logits/rejected": -1.9868860244750977, |
|
"logps/chosen": -33.397972106933594, |
|
"logps/rejected": -31.54607582092285, |
|
"loss": 0.4988, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.005849479231983423, |
|
"rewards/margins": 0.004881345666944981, |
|
"rewards/rejected": 0.0009681343799456954, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279356e-07, |
|
"logits/chosen": -1.9833847284317017, |
|
"logits/rejected": -1.9614006280899048, |
|
"logps/chosen": -34.151329040527344, |
|
"logps/rejected": -34.96162414550781, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0007256423123180866, |
|
"rewards/margins": 0.0039748698472976685, |
|
"rewards/rejected": -0.003249228000640869, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.654436768970182e-07, |
|
"logits/chosen": -2.0242693424224854, |
|
"logits/rejected": -2.0209662914276123, |
|
"logps/chosen": -32.91448974609375, |
|
"logps/rejected": -36.216251373291016, |
|
"loss": 0.4995, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.003173952456563711, |
|
"rewards/margins": 0.0005722272908315063, |
|
"rewards/rejected": 0.0026017245836555958, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.444597403062196e-07, |
|
"logits/chosen": -1.890960931777954, |
|
"logits/rejected": -1.8885114192962646, |
|
"logps/chosen": -34.190608978271484, |
|
"logps/rejected": -35.51725769042969, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.0006630702991969883, |
|
"rewards/margins": 0.0031278349924832582, |
|
"rewards/rejected": -0.003790905699133873, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2434529917578887e-07, |
|
"logits/chosen": -1.875756025314331, |
|
"logits/rejected": -1.8732185363769531, |
|
"logps/chosen": -34.378639221191406, |
|
"logps/rejected": -31.751827239990234, |
|
"loss": 0.4999, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005688496865332127, |
|
"rewards/margins": -0.00016186293214559555, |
|
"rewards/rejected": 0.005850359331816435, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603521e-07, |
|
"logits/chosen": -1.9793775081634521, |
|
"logits/rejected": -1.9687445163726807, |
|
"logps/chosen": -35.337181091308594, |
|
"logps/rejected": -31.856109619140625, |
|
"loss": 0.4979, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.005838591605424881, |
|
"rewards/margins": 0.010373086668550968, |
|
"rewards/rejected": -0.0045344955287873745, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071453e-08, |
|
"logits/chosen": -2.0756278038024902, |
|
"logits/rejected": -2.0606112480163574, |
|
"logps/chosen": -30.936153411865234, |
|
"logps/rejected": -32.660118103027344, |
|
"loss": 0.5002, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.003609871957451105, |
|
"rewards/margins": -0.001124321250244975, |
|
"rewards/rejected": -0.0024855495430529118, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-08, |
|
"logits/chosen": -1.9465348720550537, |
|
"logits/rejected": -1.9439990520477295, |
|
"logps/chosen": -32.864601135253906, |
|
"logps/rejected": -30.782527923583984, |
|
"loss": 0.4978, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.01875343546271324, |
|
"rewards/margins": 0.008469512686133385, |
|
"rewards/rejected": 0.010283923707902431, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.243088722229004, |
|
"eval_logits/rejected": -2.2382218837738037, |
|
"eval_logps/chosen": -34.02065658569336, |
|
"eval_logps/rejected": -37.50462341308594, |
|
"eval_loss": 0.4998457729816437, |
|
"eval_rewards/accuracies": 0.5137043595314026, |
|
"eval_rewards/chosen": 0.004168376792222261, |
|
"eval_rewards/margins": 0.0005690783145837486, |
|
"eval_rewards/rejected": 0.0035992988850921392, |
|
"eval_runtime": 145.9968, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589034e-08, |
|
"logits/chosen": -1.928755521774292, |
|
"logits/rejected": -1.9254906177520752, |
|
"logps/chosen": -31.5792236328125, |
|
"logps/rejected": -33.729896545410156, |
|
"loss": 0.4993, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0072378418408334255, |
|
"rewards/margins": 0.0035611852072179317, |
|
"rewards/rejected": 0.0036766561679542065, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380912e-08, |
|
"logits/chosen": -1.9806935787200928, |
|
"logits/rejected": -1.968379020690918, |
|
"logps/chosen": -34.56597900390625, |
|
"logps/rejected": -33.5899658203125, |
|
"loss": 0.4957, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.009028470143675804, |
|
"rewards/margins": 0.024366382509469986, |
|
"rewards/rejected": -0.015337912365794182, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-08, |
|
"logits/chosen": -2.0154762268066406, |
|
"logits/rejected": -2.014007091522217, |
|
"logps/chosen": -33.468040466308594, |
|
"logps/rejected": -32.471534729003906, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.005325844045728445, |
|
"rewards/margins": 0.0029113669879734516, |
|
"rewards/rejected": 0.0024144775234162807, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.05793773749158e-08, |
|
"logits/chosen": -2.102926254272461, |
|
"logits/rejected": -2.0871331691741943, |
|
"logps/chosen": -34.16908645629883, |
|
"logps/rejected": -33.08391189575195, |
|
"loss": 0.5017, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.003090888261795044, |
|
"rewards/margins": -0.007146535906940699, |
|
"rewards/rejected": 0.010237423703074455, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.251801807404168e-08, |
|
"logits/chosen": -1.974313735961914, |
|
"logits/rejected": -1.9733721017837524, |
|
"logps/chosen": -33.25641632080078, |
|
"logps/rejected": -32.458290100097656, |
|
"loss": 0.4986, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.007705795578658581, |
|
"rewards/margins": 0.005649130791425705, |
|
"rewards/rejected": 0.0020566643215715885, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-09, |
|
"logits/chosen": -1.9302194118499756, |
|
"logits/rejected": -1.940574288368225, |
|
"logps/chosen": -32.19397735595703, |
|
"logps/rejected": -35.28559875488281, |
|
"loss": 0.4996, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.003214403986930847, |
|
"rewards/margins": 0.0022677627857774496, |
|
"rewards/rejected": 0.0009466406190767884, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050324e-09, |
|
"logits/chosen": -2.0694031715393066, |
|
"logits/rejected": -2.0628397464752197, |
|
"logps/chosen": -33.639854431152344, |
|
"logps/rejected": -29.21550941467285, |
|
"loss": 0.4998, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0036338320933282375, |
|
"rewards/margins": 0.00035826730891130865, |
|
"rewards/rejected": 0.0032755639404058456, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-10, |
|
"logits/chosen": -1.9289796352386475, |
|
"logits/rejected": -1.9311374425888062, |
|
"logps/chosen": -34.22175979614258, |
|
"logps/rejected": -30.88715171813965, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.005186386872082949, |
|
"rewards/margins": 0.009542430751025677, |
|
"rewards/rejected": -0.004356043878942728, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.49920677581390777, |
|
"train_runtime": 3256.0166, |
|
"train_samples_per_second": 0.946, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|