sfulay's picture
Model save
1cf4035 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 50,
"global_step": 476,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02100840336134454,
"grad_norm": 16.28906943780765,
"learning_rate": 1.0416666666666667e-07,
"logits/chosen": -1.109375,
"logits/rejected": -1.09375,
"logps/chosen": -3552.0,
"logps/rejected": -3360.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 10
},
{
"epoch": 0.04201680672268908,
"grad_norm": 17.42578535458299,
"learning_rate": 2.0833333333333333e-07,
"logits/chosen": -1.171875,
"logits/rejected": -1.109375,
"logps/chosen": -4256.0,
"logps/rejected": -3408.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 20
},
{
"epoch": 0.06302521008403361,
"grad_norm": 19.8731189533398,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -1.3203125,
"logits/rejected": -1.2890625,
"logps/chosen": -4288.0,
"logps/rejected": -3888.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 30
},
{
"epoch": 0.08403361344537816,
"grad_norm": 14.405736256208325,
"learning_rate": 4.1666666666666667e-07,
"logits/chosen": -1.7265625,
"logits/rejected": -1.6796875,
"logps/chosen": -4192.0,
"logps/rejected": -3312.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 40
},
{
"epoch": 0.10504201680672269,
"grad_norm": 9.363286794597972,
"learning_rate": 4.999730614378151e-07,
"logits/chosen": -2.375,
"logits/rejected": -2.34375,
"logps/chosen": -3728.0,
"logps/rejected": -3392.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 50
},
{
"epoch": 0.10504201680672269,
"eval_logits/chosen": -2.5,
"eval_logits/rejected": -2.484375,
"eval_logps/chosen": -3440.0,
"eval_logps/rejected": -3024.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.4147,
"eval_samples_per_second": 19.392,
"eval_steps_per_second": 0.312,
"step": 50
},
{
"epoch": 0.12605042016806722,
"grad_norm": 11.711357348387072,
"learning_rate": 4.99030821197584e-07,
"logits/chosen": -2.5,
"logits/rejected": -2.5,
"logps/chosen": -3216.0,
"logps/rejected": -2848.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 60
},
{
"epoch": 0.14705882352941177,
"grad_norm": 10.992333167872541,
"learning_rate": 4.967474527158459e-07,
"logits/chosen": -2.53125,
"logits/rejected": -2.515625,
"logps/chosen": -3584.0,
"logps/rejected": -3344.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 70
},
{
"epoch": 0.16806722689075632,
"grad_norm": 10.760162307426238,
"learning_rate": 4.931352528237397e-07,
"logits/chosen": -2.421875,
"logits/rejected": -2.421875,
"logps/chosen": -3312.0,
"logps/rejected": -3024.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 80
},
{
"epoch": 0.18907563025210083,
"grad_norm": 12.926141811678383,
"learning_rate": 4.882136746285794e-07,
"logits/chosen": -2.28125,
"logits/rejected": -2.28125,
"logps/chosen": -3120.0,
"logps/rejected": -2784.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 90
},
{
"epoch": 0.21008403361344538,
"grad_norm": 7.053588811204541,
"learning_rate": 4.820092227512735e-07,
"logits/chosen": -2.171875,
"logits/rejected": -2.15625,
"logps/chosen": -3392.0,
"logps/rejected": -2928.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 100
},
{
"epoch": 0.21008403361344538,
"eval_logits/chosen": -2.046875,
"eval_logits/rejected": -2.03125,
"eval_logps/chosen": -2880.0,
"eval_logps/rejected": -2544.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.9839,
"eval_samples_per_second": 19.285,
"eval_steps_per_second": 0.311,
"step": 100
},
{
"epoch": 0.23109243697478993,
"grad_norm": 9.09261170699122,
"learning_rate": 4.745553105885882e-07,
"logits/chosen": -1.953125,
"logits/rejected": -1.9453125,
"logps/chosen": -2832.0,
"logps/rejected": -2432.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 110
},
{
"epoch": 0.25210084033613445,
"grad_norm": 12.748620762742686,
"learning_rate": 4.658920803689553e-07,
"logits/chosen": -1.8671875,
"logits/rejected": -1.859375,
"logps/chosen": -3056.0,
"logps/rejected": -2544.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 120
},
{
"epoch": 0.27310924369747897,
"grad_norm": 7.022734911341551,
"learning_rate": 4.560661869708929e-07,
"logits/chosen": -1.8984375,
"logits/rejected": -1.890625,
"logps/chosen": -2928.0,
"logps/rejected": -2768.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 130
},
{
"epoch": 0.29411764705882354,
"grad_norm": 9.578363785870927,
"learning_rate": 4.4513054666826144e-07,
"logits/chosen": -1.7734375,
"logits/rejected": -1.765625,
"logps/chosen": -3136.0,
"logps/rejected": -2704.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 140
},
{
"epoch": 0.31512605042016806,
"grad_norm": 8.139451643424685,
"learning_rate": 4.3314405215546036e-07,
"logits/chosen": -1.6875,
"logits/rejected": -1.6953125,
"logps/chosen": -2608.0,
"logps/rejected": -2528.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 150
},
{
"epoch": 0.31512605042016806,
"eval_logits/chosen": -1.5859375,
"eval_logits/rejected": -1.59375,
"eval_logps/chosen": -2736.0,
"eval_logps/rejected": -2432.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 103.2169,
"eval_samples_per_second": 19.241,
"eval_steps_per_second": 0.31,
"step": 150
},
{
"epoch": 0.33613445378151263,
"grad_norm": 17.892723758314286,
"learning_rate": 4.201712553872657e-07,
"logits/chosen": -1.6875,
"logits/rejected": -1.6953125,
"logps/chosen": -2688.0,
"logps/rejected": -2544.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 160
},
{
"epoch": 0.35714285714285715,
"grad_norm": 22.27823672845172,
"learning_rate": 4.062820199413401e-07,
"logits/chosen": -1.90625,
"logits/rejected": -1.8984375,
"logps/chosen": -2816.0,
"logps/rejected": -2336.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 170
},
{
"epoch": 0.37815126050420167,
"grad_norm": 14.06298479728037,
"learning_rate": 3.9155114477557926e-07,
"logits/chosen": -1.734375,
"logits/rejected": -1.7421875,
"logps/chosen": -2944.0,
"logps/rejected": -2512.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 180
},
{
"epoch": 0.39915966386554624,
"grad_norm": 7.672499104925795,
"learning_rate": 3.760579614065076e-07,
"logits/chosen": -1.5625,
"logits/rejected": -1.5625,
"logps/chosen": -3040.0,
"logps/rejected": -2512.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 190
},
{
"epoch": 0.42016806722689076,
"grad_norm": 6.6620635447942975,
"learning_rate": 3.598859066780754e-07,
"logits/chosen": -1.6796875,
"logits/rejected": -1.6796875,
"logps/chosen": -2832.0,
"logps/rejected": -2432.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 200
},
{
"epoch": 0.42016806722689076,
"eval_logits/chosen": -1.6875,
"eval_logits/rejected": -1.6875,
"eval_logps/chosen": -2688.0,
"eval_logps/rejected": -2384.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.6512,
"eval_samples_per_second": 19.347,
"eval_steps_per_second": 0.312,
"step": 200
},
{
"epoch": 0.4411764705882353,
"grad_norm": 13.458763562501465,
"learning_rate": 3.4312207342166483e-07,
"logits/chosen": -1.7109375,
"logits/rejected": -1.71875,
"logps/chosen": -2464.0,
"logps/rejected": -2192.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 210
},
{
"epoch": 0.46218487394957986,
"grad_norm": 11.168375013873582,
"learning_rate": 3.2585674142717477e-07,
"logits/chosen": -1.7265625,
"logits/rejected": -1.7265625,
"logps/chosen": -3024.0,
"logps/rejected": -2480.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 220
},
{
"epoch": 0.4831932773109244,
"grad_norm": 14.465492326709429,
"learning_rate": 3.081828912510901e-07,
"logits/chosen": -1.6640625,
"logits/rejected": -1.671875,
"logps/chosen": -2880.0,
"logps/rejected": -2448.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 230
},
{
"epoch": 0.5042016806722689,
"grad_norm": 9.34309193516938,
"learning_rate": 2.9019570347986706e-07,
"logits/chosen": -1.6953125,
"logits/rejected": -1.6953125,
"logps/chosen": -2944.0,
"logps/rejected": -2416.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 240
},
{
"epoch": 0.5252100840336135,
"grad_norm": 8.777510895642173,
"learning_rate": 2.719920461452991e-07,
"logits/chosen": -1.6484375,
"logits/rejected": -1.6484375,
"logps/chosen": -2624.0,
"logps/rejected": -2256.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 250
},
{
"epoch": 0.5252100840336135,
"eval_logits/chosen": -1.703125,
"eval_logits/rejected": -1.703125,
"eval_logps/chosen": -2640.0,
"eval_logps/rejected": -2352.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.4791,
"eval_samples_per_second": 19.38,
"eval_steps_per_second": 0.312,
"step": 250
},
{
"epoch": 0.5462184873949579,
"grad_norm": 10.81202800911272,
"learning_rate": 2.536699530523291e-07,
"logits/chosen": -1.71875,
"logits/rejected": -1.71875,
"logps/chosen": -3072.0,
"logps/rejected": -2480.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 260
},
{
"epoch": 0.5672268907563025,
"grad_norm": 7.1452276225234295,
"learning_rate": 2.353280958287195e-07,
"logits/chosen": -1.6953125,
"logits/rejected": -1.703125,
"logps/chosen": -2832.0,
"logps/rejected": -2592.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 270
},
{
"epoch": 0.5882352941176471,
"grad_norm": 16.243346282591897,
"learning_rate": 2.1706525253979534e-07,
"logits/chosen": -1.5859375,
"logits/rejected": -1.59375,
"logps/chosen": -2880.0,
"logps/rejected": -2464.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 280
},
{
"epoch": 0.6092436974789915,
"grad_norm": 12.28591647805525,
"learning_rate": 1.9897977572998152e-07,
"logits/chosen": -1.5234375,
"logits/rejected": -1.5234375,
"logps/chosen": -2960.0,
"logps/rejected": -2624.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 290
},
{
"epoch": 0.6302521008403361,
"grad_norm": 10.712221070111289,
"learning_rate": 1.8116906275593507e-07,
"logits/chosen": -1.65625,
"logits/rejected": -1.6484375,
"logps/chosen": -2976.0,
"logps/rejected": -2480.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 300
},
{
"epoch": 0.6302521008403361,
"eval_logits/chosen": -1.671875,
"eval_logits/rejected": -1.6796875,
"eval_logps/chosen": -2624.0,
"eval_logps/rejected": -2336.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 103.0042,
"eval_samples_per_second": 19.281,
"eval_steps_per_second": 0.311,
"step": 300
},
{
"epoch": 0.6512605042016807,
"grad_norm": 11.873878441097949,
"learning_rate": 1.6372903126373684e-07,
"logits/chosen": -1.6953125,
"logits/rejected": -1.6875,
"logps/chosen": -2864.0,
"logps/rejected": -2784.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 310
},
{
"epoch": 0.6722689075630253,
"grad_norm": 16.36161277353074,
"learning_rate": 1.4675360263490295e-07,
"logits/chosen": -1.65625,
"logits/rejected": -1.65625,
"logps/chosen": -2608.0,
"logps/rejected": -2560.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 320
},
{
"epoch": 0.6932773109243697,
"grad_norm": 16.718448992720084,
"learning_rate": 1.3033419618306018e-07,
"logits/chosen": -1.578125,
"logits/rejected": -1.5703125,
"logps/chosen": -2768.0,
"logps/rejected": -2224.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 330
},
{
"epoch": 0.7142857142857143,
"grad_norm": 17.049047455790024,
"learning_rate": 1.1455923682523475e-07,
"logits/chosen": -1.59375,
"logits/rejected": -1.59375,
"logps/chosen": -2752.0,
"logps/rejected": -2256.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 340
},
{
"epoch": 0.7352941176470589,
"grad_norm": 8.628681230007127,
"learning_rate": 9.951367887913573e-08,
"logits/chosen": -1.6015625,
"logits/rejected": -1.609375,
"logps/chosen": -3328.0,
"logps/rejected": -2880.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 350
},
{
"epoch": 0.7352941176470589,
"eval_logits/chosen": -1.6015625,
"eval_logits/rejected": -1.6015625,
"eval_logps/chosen": -2640.0,
"eval_logps/rejected": -2352.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.7271,
"eval_samples_per_second": 19.333,
"eval_steps_per_second": 0.312,
"step": 350
},
{
"epoch": 0.7563025210084033,
"grad_norm": 11.363934563365467,
"learning_rate": 8.527854855097224e-08,
"logits/chosen": -1.6171875,
"logits/rejected": -1.6015625,
"logps/chosen": -2688.0,
"logps/rejected": -2352.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 360
},
{
"epoch": 0.7773109243697479,
"grad_norm": 14.871357784427335,
"learning_rate": 7.193050757768431e-08,
"logits/chosen": -1.578125,
"logits/rejected": -1.5703125,
"logps/chosen": -2720.0,
"logps/rejected": -2208.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 370
},
{
"epoch": 0.7983193277310925,
"grad_norm": 12.038701774247873,
"learning_rate": 5.9541440373546445e-08,
"logits/chosen": -1.625,
"logits/rejected": -1.625,
"logps/chosen": -2832.0,
"logps/rejected": -2320.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 380
},
{
"epoch": 0.819327731092437,
"grad_norm": 14.978538565892313,
"learning_rate": 4.8178066904518894e-08,
"logits/chosen": -1.6015625,
"logits/rejected": -1.6015625,
"logps/chosen": -2592.0,
"logps/rejected": -2288.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 390
},
{
"epoch": 0.8403361344537815,
"grad_norm": 8.293782679846284,
"learning_rate": 3.790158337517127e-08,
"logits/chosen": -1.578125,
"logits/rejected": -1.578125,
"logps/chosen": -2944.0,
"logps/rejected": -2736.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 400
},
{
"epoch": 0.8403361344537815,
"eval_logits/chosen": -1.578125,
"eval_logits/rejected": -1.578125,
"eval_logps/chosen": -2640.0,
"eval_logps/rejected": -2368.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 102.8311,
"eval_samples_per_second": 19.313,
"eval_steps_per_second": 0.311,
"step": 400
},
{
"epoch": 0.8613445378151261,
"grad_norm": 17.01214709788288,
"learning_rate": 2.876733266321765e-08,
"logits/chosen": -1.5625,
"logits/rejected": -1.5546875,
"logps/chosen": -3056.0,
"logps/rejected": -2560.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 410
},
{
"epoch": 0.8823529411764706,
"grad_norm": 13.078800555411027,
"learning_rate": 2.0824506276503894e-08,
"logits/chosen": -1.5703125,
"logits/rejected": -1.5625,
"logps/chosen": -2912.0,
"logps/rejected": -2592.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 420
},
{
"epoch": 0.9033613445378151,
"grad_norm": 9.301856974075692,
"learning_rate": 1.4115879437524043e-08,
"logits/chosen": -1.578125,
"logits/rejected": -1.578125,
"logps/chosen": -2736.0,
"logps/rejected": -2432.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 430
},
{
"epoch": 0.9243697478991597,
"grad_norm": 9.51276981883413,
"learning_rate": 8.677580722139671e-09,
"logits/chosen": -1.6015625,
"logits/rejected": -1.6015625,
"logps/chosen": -2752.0,
"logps/rejected": -2432.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 440
},
{
"epoch": 0.9453781512605042,
"grad_norm": 15.942237520125405,
"learning_rate": 4.538897493087112e-09,
"logits/chosen": -1.5625,
"logits/rejected": -1.5625,
"logps/chosen": -2816.0,
"logps/rejected": -2336.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 450
},
{
"epoch": 0.9453781512605042,
"eval_logits/chosen": -1.578125,
"eval_logits/rejected": -1.578125,
"eval_logps/chosen": -2624.0,
"eval_logps/rejected": -2352.0,
"eval_loss": 0.69140625,
"eval_rewards/accuracies": 0.0,
"eval_rewards/chosen": 0.0,
"eval_rewards/margins": 0.0,
"eval_rewards/rejected": 0.0,
"eval_runtime": 101.63,
"eval_samples_per_second": 19.541,
"eval_steps_per_second": 0.315,
"step": 450
},
{
"epoch": 0.9663865546218487,
"grad_norm": 13.30372792679492,
"learning_rate": 1.722118176089915e-09,
"logits/chosen": -1.59375,
"logits/rejected": -1.5859375,
"logps/chosen": -2720.0,
"logps/rejected": -2416.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 460
},
{
"epoch": 0.9873949579831933,
"grad_norm": 8.647302168816173,
"learning_rate": 2.424122279805485e-10,
"logits/chosen": -1.5703125,
"logits/rejected": -1.5625,
"logps/chosen": -2672.0,
"logps/rejected": -2320.0,
"loss": 0.6914,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 470
},
{
"epoch": 1.0,
"step": 476,
"total_flos": 0.0,
"train_loss": 0.69140625,
"train_runtime": 12796.3799,
"train_samples_per_second": 4.76,
"train_steps_per_second": 0.037
}
],
"logging_steps": 10,
"max_steps": 476,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}