|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 32.75, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 25.5, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08353424072266, |
|
"logits/rejected": 80.7865219116211, |
|
"logps/chosen": -34.20063781738281, |
|
"logps/rejected": -33.02577590942383, |
|
"loss": 0.684, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.020544879138469696, |
|
"rewards/margins": 0.05422975495457649, |
|
"rewards/rejected": -0.033684875816106796, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 25.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.67228698730469, |
|
"logits/rejected": 80.55956268310547, |
|
"logps/chosen": -33.523399353027344, |
|
"logps/rejected": -30.766698837280273, |
|
"loss": 0.6867, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0832558423280716, |
|
"rewards/margins": 0.0778605192899704, |
|
"rewards/rejected": 0.00539533281698823, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 29.375, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.52102661132812, |
|
"logits/rejected": 82.54798889160156, |
|
"logps/chosen": -33.94279861450195, |
|
"logps/rejected": -31.214736938476562, |
|
"loss": 0.7575, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.035010773688554764, |
|
"rewards/margins": -0.06953731179237366, |
|
"rewards/rejected": 0.10454808175563812, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 24.25, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.06597137451172, |
|
"logits/rejected": 81.06147766113281, |
|
"logps/chosen": -32.76929473876953, |
|
"logps/rejected": -33.212310791015625, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.20238244533538818, |
|
"rewards/margins": 0.12427721172571182, |
|
"rewards/rejected": 0.07810524851083755, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.75, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.68827819824219, |
|
"logits/rejected": 78.70021057128906, |
|
"logps/chosen": -30.579212188720703, |
|
"logps/rejected": -30.904521942138672, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3130193054676056, |
|
"rewards/margins": 0.22863590717315674, |
|
"rewards/rejected": 0.08438339829444885, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 25.375, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.18531799316406, |
|
"logits/rejected": 83.2416763305664, |
|
"logps/chosen": -30.874561309814453, |
|
"logps/rejected": -29.523483276367188, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.15030017495155334, |
|
"rewards/margins": 0.11678256839513779, |
|
"rewards/rejected": 0.03351757675409317, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 40.75, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.8118896484375, |
|
"logits/rejected": 83.84835815429688, |
|
"logps/chosen": -30.521596908569336, |
|
"logps/rejected": -33.058387756347656, |
|
"loss": 0.7083, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.05371224880218506, |
|
"rewards/margins": 0.06404958665370941, |
|
"rewards/rejected": -0.010337340645492077, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 25.625, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.4435043334961, |
|
"logits/rejected": 81.42585754394531, |
|
"logps/chosen": -31.408594131469727, |
|
"logps/rejected": -31.023513793945312, |
|
"loss": 0.6594, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.04817419499158859, |
|
"rewards/margins": 0.18110866844654083, |
|
"rewards/rejected": -0.13293446600437164, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 31.75, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.11305236816406, |
|
"logits/rejected": 78.08197784423828, |
|
"logps/chosen": -32.596641540527344, |
|
"logps/rejected": -31.254547119140625, |
|
"loss": 0.677, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.02076822891831398, |
|
"rewards/margins": 0.17230577766895294, |
|
"rewards/rejected": -0.15153756737709045, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 24.25, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.37003326416016, |
|
"logits/rejected": 83.38694763183594, |
|
"logps/chosen": -34.16094207763672, |
|
"logps/rejected": -31.8507137298584, |
|
"loss": 0.6859, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.0720243752002716, |
|
"rewards/margins": 0.12210811674594879, |
|
"rewards/rejected": -0.05008373409509659, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.68260955810547, |
|
"eval_logits/rejected": 98.67269134521484, |
|
"eval_logps/chosen": -32.555023193359375, |
|
"eval_logps/rejected": -36.136436462402344, |
|
"eval_loss": 0.7229137420654297, |
|
"eval_rewards/accuracies": 0.5282392501831055, |
|
"eval_rewards/chosen": -0.05592246353626251, |
|
"eval_rewards/margins": 0.029031021520495415, |
|
"eval_rewards/rejected": -0.08495348691940308, |
|
"eval_runtime": 104.2675, |
|
"eval_samples_per_second": 3.29, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 35.25, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.4867172241211, |
|
"logits/rejected": 83.372802734375, |
|
"logps/chosen": -32.426979064941406, |
|
"logps/rejected": -32.882118225097656, |
|
"loss": 0.5808, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2979138493537903, |
|
"rewards/margins": 0.4110265374183655, |
|
"rewards/rejected": -0.11311274766921997, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 38.5, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.65151977539062, |
|
"logits/rejected": 83.7606201171875, |
|
"logps/chosen": -28.169925689697266, |
|
"logps/rejected": -35.475196838378906, |
|
"loss": 0.6054, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.37982815504074097, |
|
"rewards/margins": 0.3878515660762787, |
|
"rewards/rejected": -0.008023401722311974, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 24.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.93061065673828, |
|
"logits/rejected": 80.95037841796875, |
|
"logps/chosen": -30.428030014038086, |
|
"logps/rejected": -32.12748336791992, |
|
"loss": 0.6058, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.23966185748577118, |
|
"rewards/margins": 0.33758825063705444, |
|
"rewards/rejected": -0.09792639315128326, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 17.75, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.01722717285156, |
|
"logits/rejected": 82.02776336669922, |
|
"logps/chosen": -27.013805389404297, |
|
"logps/rejected": -33.108734130859375, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.2167574167251587, |
|
"rewards/margins": 0.5594369173049927, |
|
"rewards/rejected": -0.3426794409751892, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 26.125, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.49574279785156, |
|
"logits/rejected": 80.46543884277344, |
|
"logps/chosen": -28.96371078491211, |
|
"logps/rejected": -33.28276443481445, |
|
"loss": 0.5393, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.21269837021827698, |
|
"rewards/margins": 0.5708296895027161, |
|
"rewards/rejected": -0.3581313192844391, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 36.75, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.2945556640625, |
|
"logits/rejected": 82.3069839477539, |
|
"logps/chosen": -33.60367202758789, |
|
"logps/rejected": -30.597631454467773, |
|
"loss": 0.5835, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.266539990901947, |
|
"rewards/margins": 0.5890872478485107, |
|
"rewards/rejected": -0.3225473165512085, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 26.25, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.09800720214844, |
|
"logits/rejected": 83.04072570800781, |
|
"logps/chosen": -30.741390228271484, |
|
"logps/rejected": -32.57554626464844, |
|
"loss": 0.5698, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.25894853472709656, |
|
"rewards/margins": 0.5366927981376648, |
|
"rewards/rejected": -0.27774426341056824, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 23.625, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.62547302246094, |
|
"logits/rejected": 80.6036605834961, |
|
"logps/chosen": -30.658870697021484, |
|
"logps/rejected": -31.662927627563477, |
|
"loss": 0.55, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.2691388726234436, |
|
"rewards/margins": 0.5219991207122803, |
|
"rewards/rejected": -0.25286024808883667, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.23011779785156, |
|
"logits/rejected": 82.23200225830078, |
|
"logps/chosen": -30.246551513671875, |
|
"logps/rejected": -30.743694305419922, |
|
"loss": 0.6343, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.25993233919143677, |
|
"rewards/margins": 0.3977679908275604, |
|
"rewards/rejected": -0.13783565163612366, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 14.625, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 77.68287658691406, |
|
"logits/rejected": 77.6307144165039, |
|
"logps/chosen": -33.85746383666992, |
|
"logps/rejected": -32.90498352050781, |
|
"loss": 0.5453, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.44763606786727905, |
|
"rewards/margins": 0.6534249186515808, |
|
"rewards/rejected": -0.20578882098197937, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.49555206298828, |
|
"eval_logits/rejected": 98.47319030761719, |
|
"eval_logps/chosen": -32.7014274597168, |
|
"eval_logps/rejected": -36.39837646484375, |
|
"eval_loss": 0.7067598700523376, |
|
"eval_rewards/accuracies": 0.5332226157188416, |
|
"eval_rewards/chosen": -0.12912562489509583, |
|
"eval_rewards/margins": 0.0867983028292656, |
|
"eval_rewards/rejected": -0.21592393517494202, |
|
"eval_runtime": 103.9124, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 46.5, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 80.28144836425781, |
|
"logits/rejected": 80.18412780761719, |
|
"logps/chosen": -33.1484489440918, |
|
"logps/rejected": -35.34803009033203, |
|
"loss": 0.5401, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.39863476157188416, |
|
"rewards/margins": 0.5796052813529968, |
|
"rewards/rejected": -0.18097054958343506, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 18.875, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 82.44231414794922, |
|
"logits/rejected": 82.5237808227539, |
|
"logps/chosen": -31.035247802734375, |
|
"logps/rejected": -31.218652725219727, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.45685210824012756, |
|
"rewards/margins": 0.7017010450363159, |
|
"rewards/rejected": -0.24484892189502716, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 27.5, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 79.57551574707031, |
|
"logits/rejected": 79.63117218017578, |
|
"logps/chosen": -32.342308044433594, |
|
"logps/rejected": -34.377655029296875, |
|
"loss": 0.6337, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.23171910643577576, |
|
"rewards/margins": 0.4144717752933502, |
|
"rewards/rejected": -0.18275271356105804, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 29.375, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 81.96827697753906, |
|
"logits/rejected": 82.25470733642578, |
|
"logps/chosen": -30.665319442749023, |
|
"logps/rejected": -31.84256362915039, |
|
"loss": 0.5035, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5113734006881714, |
|
"rewards/margins": 0.6666342616081238, |
|
"rewards/rejected": -0.1552608758211136, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 27.125, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 80.69163513183594, |
|
"logits/rejected": 80.74885559082031, |
|
"logps/chosen": -26.948383331298828, |
|
"logps/rejected": -30.213348388671875, |
|
"loss": 0.6037, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.32375770807266235, |
|
"rewards/margins": 0.48006218671798706, |
|
"rewards/rejected": -0.1563045233488083, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 27.75, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 77.91614532470703, |
|
"logits/rejected": 78.04708099365234, |
|
"logps/chosen": -30.49196434020996, |
|
"logps/rejected": -36.51909255981445, |
|
"loss": 0.4936, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5345155000686646, |
|
"rewards/margins": 0.7432474493980408, |
|
"rewards/rejected": -0.2087319791316986, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 16.75, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 77.25732421875, |
|
"logits/rejected": 77.28456115722656, |
|
"logps/chosen": -30.877111434936523, |
|
"logps/rejected": -31.975589752197266, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.4509071707725525, |
|
"rewards/margins": 0.685990035533905, |
|
"rewards/rejected": -0.23508290946483612, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 25.125, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.03535461425781, |
|
"logits/rejected": 79.8197250366211, |
|
"logps/chosen": -31.083843231201172, |
|
"logps/rejected": -29.844961166381836, |
|
"loss": 0.6198, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3630252182483673, |
|
"rewards/margins": 0.45200806856155396, |
|
"rewards/rejected": -0.08898283541202545, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 16.625, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.13519287109375, |
|
"logits/rejected": 80.05864715576172, |
|
"logps/chosen": -33.047523498535156, |
|
"logps/rejected": -32.68352127075195, |
|
"loss": 0.4787, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.5283600091934204, |
|
"rewards/margins": 0.8616873025894165, |
|
"rewards/rejected": -0.3333272337913513, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 26.875, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 75.82341003417969, |
|
"logits/rejected": 75.91938781738281, |
|
"logps/chosen": -32.24137878417969, |
|
"logps/rejected": -29.33247947692871, |
|
"loss": 0.5511, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.5821641087532043, |
|
"rewards/margins": 0.6973918676376343, |
|
"rewards/rejected": -0.11522769927978516, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.5571517944336, |
|
"eval_logits/rejected": 98.53510284423828, |
|
"eval_logps/chosen": -32.68053436279297, |
|
"eval_logps/rejected": -36.284751892089844, |
|
"eval_loss": 0.7289925813674927, |
|
"eval_rewards/accuracies": 0.545265793800354, |
|
"eval_rewards/chosen": -0.11868056654930115, |
|
"eval_rewards/margins": 0.04043150320649147, |
|
"eval_rewards/rejected": -0.1591120809316635, |
|
"eval_runtime": 104.072, |
|
"eval_samples_per_second": 3.296, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 21.625, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 82.95035552978516, |
|
"logits/rejected": 82.97549438476562, |
|
"logps/chosen": -30.2395076751709, |
|
"logps/rejected": -32.58390808105469, |
|
"loss": 0.5895, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.31201133131980896, |
|
"rewards/margins": 0.5040835738182068, |
|
"rewards/rejected": -0.19207225739955902, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 18.0, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 80.41062927246094, |
|
"logits/rejected": 80.41141510009766, |
|
"logps/chosen": -30.43392562866211, |
|
"logps/rejected": -29.273609161376953, |
|
"loss": 0.4816, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5823162794113159, |
|
"rewards/margins": 0.7593742609024048, |
|
"rewards/rejected": -0.1770579218864441, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 16.625, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 77.58735656738281, |
|
"logits/rejected": 77.6371841430664, |
|
"logps/chosen": -29.118694305419922, |
|
"logps/rejected": -33.023460388183594, |
|
"loss": 0.4851, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6169556975364685, |
|
"rewards/margins": 0.8112555742263794, |
|
"rewards/rejected": -0.19429990649223328, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 33.5, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 81.87162780761719, |
|
"logits/rejected": 81.90576171875, |
|
"logps/chosen": -32.27037048339844, |
|
"logps/rejected": -33.92435836791992, |
|
"loss": 0.5157, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.4708884358406067, |
|
"rewards/margins": 0.7487217783927917, |
|
"rewards/rejected": -0.27783337235450745, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 13.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 80.89490509033203, |
|
"logits/rejected": 80.90391540527344, |
|
"logps/chosen": -32.372520446777344, |
|
"logps/rejected": -33.53419876098633, |
|
"loss": 0.4983, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.6356090307235718, |
|
"rewards/margins": 0.8568087816238403, |
|
"rewards/rejected": -0.22119958698749542, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 20.125, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 82.41497802734375, |
|
"logits/rejected": 82.44314575195312, |
|
"logps/chosen": -28.537517547607422, |
|
"logps/rejected": -31.826416015625, |
|
"loss": 0.5313, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5073467493057251, |
|
"rewards/margins": 0.5956294536590576, |
|
"rewards/rejected": -0.08828280866146088, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.75, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 81.85018920898438, |
|
"logits/rejected": 81.87150573730469, |
|
"logps/chosen": -31.847997665405273, |
|
"logps/rejected": -35.508277893066406, |
|
"loss": 0.5486, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5140814781188965, |
|
"rewards/margins": 0.7135335206985474, |
|
"rewards/rejected": -0.19945205748081207, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 25.5, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 75.74685668945312, |
|
"logits/rejected": 75.61808013916016, |
|
"logps/chosen": -29.746694564819336, |
|
"logps/rejected": -28.432201385498047, |
|
"loss": 0.5715, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4147160053253174, |
|
"rewards/margins": 0.5505291223526001, |
|
"rewards/rejected": -0.1358131319284439, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5860598966672823, |
|
"train_runtime": 2558.949, |
|
"train_samples_per_second": 1.203, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|