|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 476, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02100840336134454, |
|
"grad_norm": 16.28906943780765, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -1.109375, |
|
"logits/rejected": -1.09375, |
|
"logps/chosen": -3552.0, |
|
"logps/rejected": -3360.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04201680672268908, |
|
"grad_norm": 17.42578535458299, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -1.171875, |
|
"logits/rejected": -1.109375, |
|
"logps/chosen": -4256.0, |
|
"logps/rejected": -3408.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06302521008403361, |
|
"grad_norm": 19.8731189533398, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -1.3203125, |
|
"logits/rejected": -1.2890625, |
|
"logps/chosen": -4288.0, |
|
"logps/rejected": -3888.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08403361344537816, |
|
"grad_norm": 14.405736256208325, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -1.7265625, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -4192.0, |
|
"logps/rejected": -3312.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10504201680672269, |
|
"grad_norm": 9.363286794597972, |
|
"learning_rate": 4.999730614378151e-07, |
|
"logits/chosen": -2.375, |
|
"logits/rejected": -2.34375, |
|
"logps/chosen": -3728.0, |
|
"logps/rejected": -3392.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10504201680672269, |
|
"eval_logits/chosen": -2.5, |
|
"eval_logits/rejected": -2.484375, |
|
"eval_logps/chosen": -3440.0, |
|
"eval_logps/rejected": -3024.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.4147, |
|
"eval_samples_per_second": 19.392, |
|
"eval_steps_per_second": 0.312, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.12605042016806722, |
|
"grad_norm": 11.711357348387072, |
|
"learning_rate": 4.99030821197584e-07, |
|
"logits/chosen": -2.5, |
|
"logits/rejected": -2.5, |
|
"logps/chosen": -3216.0, |
|
"logps/rejected": -2848.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.14705882352941177, |
|
"grad_norm": 10.992333167872541, |
|
"learning_rate": 4.967474527158459e-07, |
|
"logits/chosen": -2.53125, |
|
"logits/rejected": -2.515625, |
|
"logps/chosen": -3584.0, |
|
"logps/rejected": -3344.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16806722689075632, |
|
"grad_norm": 10.760162307426238, |
|
"learning_rate": 4.931352528237397e-07, |
|
"logits/chosen": -2.421875, |
|
"logits/rejected": -2.421875, |
|
"logps/chosen": -3312.0, |
|
"logps/rejected": -3024.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18907563025210083, |
|
"grad_norm": 12.926141811678383, |
|
"learning_rate": 4.882136746285794e-07, |
|
"logits/chosen": -2.28125, |
|
"logits/rejected": -2.28125, |
|
"logps/chosen": -3120.0, |
|
"logps/rejected": -2784.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.21008403361344538, |
|
"grad_norm": 7.053588811204541, |
|
"learning_rate": 4.820092227512735e-07, |
|
"logits/chosen": -2.171875, |
|
"logits/rejected": -2.15625, |
|
"logps/chosen": -3392.0, |
|
"logps/rejected": -2928.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.21008403361344538, |
|
"eval_logits/chosen": -2.046875, |
|
"eval_logits/rejected": -2.03125, |
|
"eval_logps/chosen": -2880.0, |
|
"eval_logps/rejected": -2544.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.9839, |
|
"eval_samples_per_second": 19.285, |
|
"eval_steps_per_second": 0.311, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.23109243697478993, |
|
"grad_norm": 9.09261170699122, |
|
"learning_rate": 4.745553105885882e-07, |
|
"logits/chosen": -1.953125, |
|
"logits/rejected": -1.9453125, |
|
"logps/chosen": -2832.0, |
|
"logps/rejected": -2432.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.25210084033613445, |
|
"grad_norm": 12.748620762742686, |
|
"learning_rate": 4.658920803689553e-07, |
|
"logits/chosen": -1.8671875, |
|
"logits/rejected": -1.859375, |
|
"logps/chosen": -3056.0, |
|
"logps/rejected": -2544.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.27310924369747897, |
|
"grad_norm": 7.022734911341551, |
|
"learning_rate": 4.560661869708929e-07, |
|
"logits/chosen": -1.8984375, |
|
"logits/rejected": -1.890625, |
|
"logps/chosen": -2928.0, |
|
"logps/rejected": -2768.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.29411764705882354, |
|
"grad_norm": 9.578363785870927, |
|
"learning_rate": 4.4513054666826144e-07, |
|
"logits/chosen": -1.7734375, |
|
"logits/rejected": -1.765625, |
|
"logps/chosen": -3136.0, |
|
"logps/rejected": -2704.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.31512605042016806, |
|
"grad_norm": 8.139451643424685, |
|
"learning_rate": 4.3314405215546036e-07, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.6953125, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2528.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.31512605042016806, |
|
"eval_logits/chosen": -1.5859375, |
|
"eval_logits/rejected": -1.59375, |
|
"eval_logps/chosen": -2736.0, |
|
"eval_logps/rejected": -2432.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 103.2169, |
|
"eval_samples_per_second": 19.241, |
|
"eval_steps_per_second": 0.31, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33613445378151263, |
|
"grad_norm": 17.892723758314286, |
|
"learning_rate": 4.201712553872657e-07, |
|
"logits/chosen": -1.6875, |
|
"logits/rejected": -1.6953125, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2544.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 22.27823672845172, |
|
"learning_rate": 4.062820199413401e-07, |
|
"logits/chosen": -1.90625, |
|
"logits/rejected": -1.8984375, |
|
"logps/chosen": -2816.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37815126050420167, |
|
"grad_norm": 14.06298479728037, |
|
"learning_rate": 3.9155114477557926e-07, |
|
"logits/chosen": -1.734375, |
|
"logits/rejected": -1.7421875, |
|
"logps/chosen": -2944.0, |
|
"logps/rejected": -2512.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.39915966386554624, |
|
"grad_norm": 7.672499104925795, |
|
"learning_rate": 3.760579614065076e-07, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -3040.0, |
|
"logps/rejected": -2512.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.42016806722689076, |
|
"grad_norm": 6.6620635447942975, |
|
"learning_rate": 3.598859066780754e-07, |
|
"logits/chosen": -1.6796875, |
|
"logits/rejected": -1.6796875, |
|
"logps/chosen": -2832.0, |
|
"logps/rejected": -2432.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.42016806722689076, |
|
"eval_logits/chosen": -1.6875, |
|
"eval_logits/rejected": -1.6875, |
|
"eval_logps/chosen": -2688.0, |
|
"eval_logps/rejected": -2384.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.6512, |
|
"eval_samples_per_second": 19.347, |
|
"eval_steps_per_second": 0.312, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4411764705882353, |
|
"grad_norm": 13.458763562501465, |
|
"learning_rate": 3.4312207342166483e-07, |
|
"logits/chosen": -1.7109375, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -2464.0, |
|
"logps/rejected": -2192.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.46218487394957986, |
|
"grad_norm": 11.168375013873582, |
|
"learning_rate": 3.2585674142717477e-07, |
|
"logits/chosen": -1.7265625, |
|
"logits/rejected": -1.7265625, |
|
"logps/chosen": -3024.0, |
|
"logps/rejected": -2480.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4831932773109244, |
|
"grad_norm": 14.465492326709429, |
|
"learning_rate": 3.081828912510901e-07, |
|
"logits/chosen": -1.6640625, |
|
"logits/rejected": -1.671875, |
|
"logps/chosen": -2880.0, |
|
"logps/rejected": -2448.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5042016806722689, |
|
"grad_norm": 9.34309193516938, |
|
"learning_rate": 2.9019570347986706e-07, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.6953125, |
|
"logps/chosen": -2944.0, |
|
"logps/rejected": -2416.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5252100840336135, |
|
"grad_norm": 8.777510895642173, |
|
"learning_rate": 2.719920461452991e-07, |
|
"logits/chosen": -1.6484375, |
|
"logits/rejected": -1.6484375, |
|
"logps/chosen": -2624.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5252100840336135, |
|
"eval_logits/chosen": -1.703125, |
|
"eval_logits/rejected": -1.703125, |
|
"eval_logps/chosen": -2640.0, |
|
"eval_logps/rejected": -2352.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.4791, |
|
"eval_samples_per_second": 19.38, |
|
"eval_steps_per_second": 0.312, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5462184873949579, |
|
"grad_norm": 10.81202800911272, |
|
"learning_rate": 2.536699530523291e-07, |
|
"logits/chosen": -1.71875, |
|
"logits/rejected": -1.71875, |
|
"logps/chosen": -3072.0, |
|
"logps/rejected": -2480.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5672268907563025, |
|
"grad_norm": 7.1452276225234295, |
|
"learning_rate": 2.353280958287195e-07, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.703125, |
|
"logps/chosen": -2832.0, |
|
"logps/rejected": -2592.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5882352941176471, |
|
"grad_norm": 16.243346282591897, |
|
"learning_rate": 2.1706525253979534e-07, |
|
"logits/chosen": -1.5859375, |
|
"logits/rejected": -1.59375, |
|
"logps/chosen": -2880.0, |
|
"logps/rejected": -2464.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092436974789915, |
|
"grad_norm": 12.28591647805525, |
|
"learning_rate": 1.9897977572998152e-07, |
|
"logits/chosen": -1.5234375, |
|
"logits/rejected": -1.5234375, |
|
"logps/chosen": -2960.0, |
|
"logps/rejected": -2624.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6302521008403361, |
|
"grad_norm": 10.712221070111289, |
|
"learning_rate": 1.8116906275593507e-07, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.6484375, |
|
"logps/chosen": -2976.0, |
|
"logps/rejected": -2480.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6302521008403361, |
|
"eval_logits/chosen": -1.671875, |
|
"eval_logits/rejected": -1.6796875, |
|
"eval_logps/chosen": -2624.0, |
|
"eval_logps/rejected": -2336.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 103.0042, |
|
"eval_samples_per_second": 19.281, |
|
"eval_steps_per_second": 0.311, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6512605042016807, |
|
"grad_norm": 11.873878441097949, |
|
"learning_rate": 1.6372903126373684e-07, |
|
"logits/chosen": -1.6953125, |
|
"logits/rejected": -1.6875, |
|
"logps/chosen": -2864.0, |
|
"logps/rejected": -2784.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6722689075630253, |
|
"grad_norm": 16.36161277353074, |
|
"learning_rate": 1.4675360263490295e-07, |
|
"logits/chosen": -1.65625, |
|
"logits/rejected": -1.65625, |
|
"logps/chosen": -2608.0, |
|
"logps/rejected": -2560.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6932773109243697, |
|
"grad_norm": 16.718448992720084, |
|
"learning_rate": 1.3033419618306018e-07, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -2768.0, |
|
"logps/rejected": -2224.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 17.049047455790024, |
|
"learning_rate": 1.1455923682523475e-07, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.59375, |
|
"logps/chosen": -2752.0, |
|
"logps/rejected": -2256.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"grad_norm": 8.628681230007127, |
|
"learning_rate": 9.951367887913573e-08, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.609375, |
|
"logps/chosen": -3328.0, |
|
"logps/rejected": -2880.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7352941176470589, |
|
"eval_logits/chosen": -1.6015625, |
|
"eval_logits/rejected": -1.6015625, |
|
"eval_logps/chosen": -2640.0, |
|
"eval_logps/rejected": -2352.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.7271, |
|
"eval_samples_per_second": 19.333, |
|
"eval_steps_per_second": 0.312, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7563025210084033, |
|
"grad_norm": 11.363934563365467, |
|
"learning_rate": 8.527854855097224e-08, |
|
"logits/chosen": -1.6171875, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -2688.0, |
|
"logps/rejected": -2352.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7773109243697479, |
|
"grad_norm": 14.871357784427335, |
|
"learning_rate": 7.193050757768431e-08, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.5703125, |
|
"logps/chosen": -2720.0, |
|
"logps/rejected": -2208.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7983193277310925, |
|
"grad_norm": 12.038701774247873, |
|
"learning_rate": 5.9541440373546445e-08, |
|
"logits/chosen": -1.625, |
|
"logits/rejected": -1.625, |
|
"logps/chosen": -2832.0, |
|
"logps/rejected": -2320.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.819327731092437, |
|
"grad_norm": 14.978538565892313, |
|
"learning_rate": 4.8178066904518894e-08, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -2592.0, |
|
"logps/rejected": -2288.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8403361344537815, |
|
"grad_norm": 8.293782679846284, |
|
"learning_rate": 3.790158337517127e-08, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -2944.0, |
|
"logps/rejected": -2736.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8403361344537815, |
|
"eval_logits/chosen": -1.578125, |
|
"eval_logits/rejected": -1.578125, |
|
"eval_logps/chosen": -2640.0, |
|
"eval_logps/rejected": -2368.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 102.8311, |
|
"eval_samples_per_second": 19.313, |
|
"eval_steps_per_second": 0.311, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8613445378151261, |
|
"grad_norm": 17.01214709788288, |
|
"learning_rate": 2.876733266321765e-08, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.5546875, |
|
"logps/chosen": -3056.0, |
|
"logps/rejected": -2560.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8823529411764706, |
|
"grad_norm": 13.078800555411027, |
|
"learning_rate": 2.0824506276503894e-08, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -2912.0, |
|
"logps/rejected": -2592.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9033613445378151, |
|
"grad_norm": 9.301856974075692, |
|
"learning_rate": 1.4115879437524043e-08, |
|
"logits/chosen": -1.578125, |
|
"logits/rejected": -1.578125, |
|
"logps/chosen": -2736.0, |
|
"logps/rejected": -2432.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9243697478991597, |
|
"grad_norm": 9.51276981883413, |
|
"learning_rate": 8.677580722139671e-09, |
|
"logits/chosen": -1.6015625, |
|
"logits/rejected": -1.6015625, |
|
"logps/chosen": -2752.0, |
|
"logps/rejected": -2432.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9453781512605042, |
|
"grad_norm": 15.942237520125405, |
|
"learning_rate": 4.538897493087112e-09, |
|
"logits/chosen": -1.5625, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -2816.0, |
|
"logps/rejected": -2336.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9453781512605042, |
|
"eval_logits/chosen": -1.578125, |
|
"eval_logits/rejected": -1.578125, |
|
"eval_logps/chosen": -2624.0, |
|
"eval_logps/rejected": -2352.0, |
|
"eval_loss": 0.69140625, |
|
"eval_rewards/accuracies": 0.0, |
|
"eval_rewards/chosen": 0.0, |
|
"eval_rewards/margins": 0.0, |
|
"eval_rewards/rejected": 0.0, |
|
"eval_runtime": 101.63, |
|
"eval_samples_per_second": 19.541, |
|
"eval_steps_per_second": 0.315, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9663865546218487, |
|
"grad_norm": 13.30372792679492, |
|
"learning_rate": 1.722118176089915e-09, |
|
"logits/chosen": -1.59375, |
|
"logits/rejected": -1.5859375, |
|
"logps/chosen": -2720.0, |
|
"logps/rejected": -2416.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9873949579831933, |
|
"grad_norm": 8.647302168816173, |
|
"learning_rate": 2.424122279805485e-10, |
|
"logits/chosen": -1.5703125, |
|
"logits/rejected": -1.5625, |
|
"logps/chosen": -2672.0, |
|
"logps/rejected": -2320.0, |
|
"loss": 0.6914, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 476, |
|
"total_flos": 0.0, |
|
"train_loss": 0.69140625, |
|
"train_runtime": 12796.3799, |
|
"train_samples_per_second": 4.76, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 476, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|