|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 400, |
|
"global_step": 468, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010683760683760684, |
|
"grad_norm": 53.325706395174244, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -0.129314124584198, |
|
"logits/rejected": -0.1248931884765625, |
|
"logps/chosen": -135.08358764648438, |
|
"logps/rejected": -137.43325805664062, |
|
"loss": 1.7058, |
|
"nll_loss": 0.33312344551086426, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -13.508357048034668, |
|
"rewards/margins": 0.23496881127357483, |
|
"rewards/rejected": -13.743327140808105, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.021367521367521368, |
|
"grad_norm": 55.47829235394904, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -0.14523069560527802, |
|
"logits/rejected": -0.12604156136512756, |
|
"logps/chosen": -138.52426147460938, |
|
"logps/rejected": -138.5316619873047, |
|
"loss": 1.8327, |
|
"nll_loss": 0.3906691372394562, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -13.85242748260498, |
|
"rewards/margins": 0.0007393002742901444, |
|
"rewards/rejected": -13.853166580200195, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03205128205128205, |
|
"grad_norm": 49.43226333748803, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.11341211944818497, |
|
"logits/rejected": -0.049495745450258255, |
|
"logps/chosen": -137.65377807617188, |
|
"logps/rejected": -135.8640594482422, |
|
"loss": 1.8016, |
|
"nll_loss": 0.38431602716445923, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -13.765378952026367, |
|
"rewards/margins": -0.1789727509021759, |
|
"rewards/rejected": -13.586407661437988, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.042735042735042736, |
|
"grad_norm": 49.78305628254871, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.016971366479992867, |
|
"logits/rejected": 0.0019870593678206205, |
|
"logps/chosen": -114.83842468261719, |
|
"logps/rejected": -117.45811462402344, |
|
"loss": 1.9135, |
|
"nll_loss": 0.3634462356567383, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -11.483842849731445, |
|
"rewards/margins": 0.2619696259498596, |
|
"rewards/rejected": -11.74581241607666, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053418803418803416, |
|
"grad_norm": 51.145326847505586, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -0.07376699149608612, |
|
"logits/rejected": -0.0683017149567604, |
|
"logps/chosen": -117.89664459228516, |
|
"logps/rejected": -117.4900894165039, |
|
"loss": 1.9047, |
|
"nll_loss": 0.36660850048065186, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.789665222167969, |
|
"rewards/margins": -0.040656279772520065, |
|
"rewards/rejected": -11.749008178710938, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0641025641025641, |
|
"grad_norm": 49.16493361461768, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.025973210111260414, |
|
"logits/rejected": -0.06585584580898285, |
|
"logps/chosen": -125.08711242675781, |
|
"logps/rejected": -131.66029357910156, |
|
"loss": 1.7073, |
|
"nll_loss": 0.32747945189476013, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -12.508711814880371, |
|
"rewards/margins": 0.6573159694671631, |
|
"rewards/rejected": -13.166028022766113, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07478632478632478, |
|
"grad_norm": 47.421405220624315, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -0.21625056862831116, |
|
"logits/rejected": -0.2136630266904831, |
|
"logps/chosen": -132.8556671142578, |
|
"logps/rejected": -141.15939331054688, |
|
"loss": 1.7525, |
|
"nll_loss": 0.34708237648010254, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -13.285565376281738, |
|
"rewards/margins": 0.8303732872009277, |
|
"rewards/rejected": -14.115941047668457, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08547008547008547, |
|
"grad_norm": 51.87443941562729, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -0.17511573433876038, |
|
"logits/rejected": -0.1541801393032074, |
|
"logps/chosen": -90.94963073730469, |
|
"logps/rejected": -96.69755554199219, |
|
"loss": 1.6671, |
|
"nll_loss": 0.3230934143066406, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -9.094963073730469, |
|
"rewards/margins": 0.5747929215431213, |
|
"rewards/rejected": -9.669755935668945, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09615384615384616, |
|
"grad_norm": 52.47331055200087, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -0.1329955905675888, |
|
"logits/rejected": -0.1524827778339386, |
|
"logps/chosen": -109.51045989990234, |
|
"logps/rejected": -113.86258697509766, |
|
"loss": 1.7698, |
|
"nll_loss": 0.35021230578422546, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -10.951045036315918, |
|
"rewards/margins": 0.43521156907081604, |
|
"rewards/rejected": -11.386259078979492, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10683760683760683, |
|
"grad_norm": 47.963761732132916, |
|
"learning_rate": 9.998747147528373e-07, |
|
"logits/chosen": -0.19674669206142426, |
|
"logits/rejected": -0.17990216612815857, |
|
"logps/chosen": -133.21511840820312, |
|
"logps/rejected": -130.31539916992188, |
|
"loss": 1.632, |
|
"nll_loss": 0.3093932569026947, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -13.321512222290039, |
|
"rewards/margins": -0.28997141122817993, |
|
"rewards/rejected": -13.031539916992188, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11752136752136752, |
|
"grad_norm": 51.94560167409897, |
|
"learning_rate": 9.991093100466482e-07, |
|
"logits/chosen": -0.21401865780353546, |
|
"logits/rejected": -0.19104179739952087, |
|
"logps/chosen": -110.11128234863281, |
|
"logps/rejected": -112.35163879394531, |
|
"loss": 1.5823, |
|
"nll_loss": 0.2831841707229614, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -11.011127471923828, |
|
"rewards/margins": 0.22403621673583984, |
|
"rewards/rejected": -11.235164642333984, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282051282051282, |
|
"grad_norm": 58.49154963276172, |
|
"learning_rate": 9.976491676662678e-07, |
|
"logits/chosen": -0.13603931665420532, |
|
"logits/rejected": -0.18451443314552307, |
|
"logps/chosen": -119.95387268066406, |
|
"logps/rejected": -133.7266845703125, |
|
"loss": 1.6162, |
|
"nll_loss": 0.2651820778846741, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.995387077331543, |
|
"rewards/margins": 1.3772820234298706, |
|
"rewards/rejected": -13.37267017364502, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1388888888888889, |
|
"grad_norm": 54.3184236422879, |
|
"learning_rate": 9.95496320064109e-07, |
|
"logits/chosen": -0.2574421763420105, |
|
"logits/rejected": -0.1615368276834488, |
|
"logps/chosen": -102.09004211425781, |
|
"logps/rejected": -92.2038345336914, |
|
"loss": 1.5964, |
|
"nll_loss": 0.27036991715431213, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -10.209004402160645, |
|
"rewards/margins": -0.9886210560798645, |
|
"rewards/rejected": -9.220383644104004, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14957264957264957, |
|
"grad_norm": 59.11629390354106, |
|
"learning_rate": 9.926537639070456e-07, |
|
"logits/chosen": -0.28000539541244507, |
|
"logits/rejected": -0.23213541507720947, |
|
"logps/chosen": -119.5620346069336, |
|
"logps/rejected": -123.65787506103516, |
|
"loss": 1.7745, |
|
"nll_loss": 0.36628904938697815, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -11.95620346069336, |
|
"rewards/margins": 0.4095849096775055, |
|
"rewards/rejected": -12.365787506103516, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16025641025641027, |
|
"grad_norm": 57.53497747164318, |
|
"learning_rate": 9.891254559051884e-07, |
|
"logits/chosen": -0.1931193619966507, |
|
"logits/rejected": -0.15742453932762146, |
|
"logps/chosen": -115.27913665771484, |
|
"logps/rejected": -126.52079772949219, |
|
"loss": 1.5256, |
|
"nll_loss": 0.3370510935783386, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.527913093566895, |
|
"rewards/margins": 1.1241672039031982, |
|
"rewards/rejected": -12.652081489562988, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17094017094017094, |
|
"grad_norm": 52.58145471881646, |
|
"learning_rate": 9.849163073043223e-07, |
|
"logits/chosen": -0.12034114450216293, |
|
"logits/rejected": -0.09974785149097443, |
|
"logps/chosen": -134.50930786132812, |
|
"logps/rejected": -125.11856842041016, |
|
"loss": 1.7211, |
|
"nll_loss": 0.3088015913963318, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -13.450933456420898, |
|
"rewards/margins": -0.939074695110321, |
|
"rewards/rejected": -12.511857986450195, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18162393162393162, |
|
"grad_norm": 52.38736187449637, |
|
"learning_rate": 9.800321770496724e-07, |
|
"logits/chosen": -0.0975460559129715, |
|
"logits/rejected": -0.0958404392004013, |
|
"logps/chosen": -89.18054962158203, |
|
"logps/rejected": -94.65280151367188, |
|
"loss": 1.4466, |
|
"nll_loss": 0.301828533411026, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -8.918054580688477, |
|
"rewards/margins": 0.5472255945205688, |
|
"rewards/rejected": -9.465280532836914, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19230769230769232, |
|
"grad_norm": 52.449979107103914, |
|
"learning_rate": 9.744798636305187e-07, |
|
"logits/chosen": -0.18882372975349426, |
|
"logits/rejected": -0.18104039132595062, |
|
"logps/chosen": -94.19184875488281, |
|
"logps/rejected": -104.16746520996094, |
|
"loss": 1.3906, |
|
"nll_loss": 0.26845496892929077, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -9.419185638427734, |
|
"rewards/margins": 0.9975622892379761, |
|
"rewards/rejected": -10.416748046875, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.202991452991453, |
|
"grad_norm": 56.906972668285476, |
|
"learning_rate": 9.68267095617003e-07, |
|
"logits/chosen": -0.17206290364265442, |
|
"logits/rejected": -0.12139072269201279, |
|
"logps/chosen": -85.0337142944336, |
|
"logps/rejected": -85.30671691894531, |
|
"loss": 1.627, |
|
"nll_loss": 0.3232787847518921, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -8.50337028503418, |
|
"rewards/margins": 0.027300655841827393, |
|
"rewards/rejected": -8.530672073364258, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21367521367521367, |
|
"grad_norm": 56.063471274711276, |
|
"learning_rate": 9.614025209023083e-07, |
|
"logits/chosen": -0.21710339188575745, |
|
"logits/rejected": -0.187973290681839, |
|
"logps/chosen": -129.84634399414062, |
|
"logps/rejected": -132.91452026367188, |
|
"loss": 1.4296, |
|
"nll_loss": 0.2875466048717499, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -12.984634399414062, |
|
"rewards/margins": 0.3068179488182068, |
|
"rewards/rejected": -13.291452407836914, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22435897435897437, |
|
"grad_norm": 54.253076126268894, |
|
"learning_rate": 9.538956946651815e-07, |
|
"logits/chosen": -0.05166339874267578, |
|
"logits/rejected": 0.029095903038978577, |
|
"logps/chosen": -104.00286865234375, |
|
"logps/rejected": -113.63838958740234, |
|
"loss": 1.4212, |
|
"nll_loss": 0.2850767970085144, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -10.400287628173828, |
|
"rewards/margins": 0.9635505676269531, |
|
"rewards/rejected": -11.363838195800781, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23504273504273504, |
|
"grad_norm": 65.81870203591372, |
|
"learning_rate": 9.457570660695539e-07, |
|
"logits/chosen": -0.07271315902471542, |
|
"logits/rejected": -0.11916762590408325, |
|
"logps/chosen": -127.29766845703125, |
|
"logps/rejected": -130.58584594726562, |
|
"loss": 1.5306, |
|
"nll_loss": 0.33747240900993347, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -12.729766845703125, |
|
"rewards/margins": 0.3288170099258423, |
|
"rewards/rejected": -13.05858325958252, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.24572649572649571, |
|
"grad_norm": 58.367624421766024, |
|
"learning_rate": 9.369979637197774e-07, |
|
"logits/chosen": -0.12270005792379379, |
|
"logits/rejected": -0.17853489518165588, |
|
"logps/chosen": -106.76663970947266, |
|
"logps/rejected": -109.66035461425781, |
|
"loss": 1.5831, |
|
"nll_loss": 0.2727692425251007, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -10.676663398742676, |
|
"rewards/margins": 0.2893708348274231, |
|
"rewards/rejected": -10.966034889221191, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 53.75246999549208, |
|
"learning_rate": 9.276305798917158e-07, |
|
"logits/chosen": -0.047394849359989166, |
|
"logits/rejected": -0.08673441410064697, |
|
"logps/chosen": -113.4908447265625, |
|
"logps/rejected": -123.5615234375, |
|
"loss": 1.551, |
|
"nll_loss": 0.28447219729423523, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.34908390045166, |
|
"rewards/margins": 1.0070677995681763, |
|
"rewards/rejected": -12.35615348815918, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2670940170940171, |
|
"grad_norm": 64.46832867529025, |
|
"learning_rate": 9.176679535616476e-07, |
|
"logits/chosen": 0.08038081228733063, |
|
"logits/rejected": 0.05699559301137924, |
|
"logps/chosen": -112.93634033203125, |
|
"logps/rejected": -125.99974060058594, |
|
"loss": 1.4662, |
|
"nll_loss": 0.35629984736442566, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -11.293634414672852, |
|
"rewards/margins": 1.3063404560089111, |
|
"rewards/rejected": -12.599973678588867, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2777777777777778, |
|
"grad_norm": 55.82818467114938, |
|
"learning_rate": 9.071239522565976e-07, |
|
"logits/chosen": -0.07054271548986435, |
|
"logits/rejected": -0.024266820400953293, |
|
"logps/chosen": -113.1104507446289, |
|
"logps/rejected": -116.25431060791016, |
|
"loss": 1.4477, |
|
"nll_loss": 0.33033448457717896, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.311044692993164, |
|
"rewards/margins": 0.3143869638442993, |
|
"rewards/rejected": -11.625432014465332, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28846153846153844, |
|
"grad_norm": 59.467620258292605, |
|
"learning_rate": 8.960132527513642e-07, |
|
"logits/chosen": -0.06084425374865532, |
|
"logits/rejected": -0.062250006943941116, |
|
"logps/chosen": -127.11384582519531, |
|
"logps/rejected": -127.90572357177734, |
|
"loss": 1.5105, |
|
"nll_loss": 0.354878306388855, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -12.711384773254395, |
|
"rewards/margins": 0.07918860763311386, |
|
"rewards/rejected": -12.790571212768555, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29914529914529914, |
|
"grad_norm": 67.52329899130741, |
|
"learning_rate": 8.8435132063911e-07, |
|
"logits/chosen": -0.06606234610080719, |
|
"logits/rejected": -0.02088196575641632, |
|
"logps/chosen": -135.88082885742188, |
|
"logps/rejected": -141.05441284179688, |
|
"loss": 1.3854, |
|
"nll_loss": 0.35649529099464417, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.58808422088623, |
|
"rewards/margins": 0.5173591375350952, |
|
"rewards/rejected": -14.105443000793457, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30982905982905984, |
|
"grad_norm": 66.6282576658114, |
|
"learning_rate": 8.721543888039532e-07, |
|
"logits/chosen": -0.14654412865638733, |
|
"logits/rejected": -0.15081673860549927, |
|
"logps/chosen": -135.72756958007812, |
|
"logps/rejected": -131.85450744628906, |
|
"loss": 1.4429, |
|
"nll_loss": 0.3281570076942444, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -13.57275676727295, |
|
"rewards/margins": -0.38730812072753906, |
|
"rewards/rejected": -13.185449600219727, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32051282051282054, |
|
"grad_norm": 57.916364691190665, |
|
"learning_rate": 8.594394348255237e-07, |
|
"logits/chosen": -0.2932497560977936, |
|
"logits/rejected": -0.25581642985343933, |
|
"logps/chosen": -141.12599182128906, |
|
"logps/rejected": -141.9236602783203, |
|
"loss": 1.4486, |
|
"nll_loss": 0.329673707485199, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.11259937286377, |
|
"rewards/margins": 0.07976653426885605, |
|
"rewards/rejected": -14.192365646362305, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3311965811965812, |
|
"grad_norm": 54.1500700625341, |
|
"learning_rate": 8.462241573469377e-07, |
|
"logits/chosen": -0.19544358551502228, |
|
"logits/rejected": -0.1730412244796753, |
|
"logps/chosen": -145.17381286621094, |
|
"logps/rejected": -144.12461853027344, |
|
"loss": 1.4251, |
|
"nll_loss": 0.34274980425834656, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -14.51738166809082, |
|
"rewards/margins": -0.10491929203271866, |
|
"rewards/rejected": -14.412463188171387, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3418803418803419, |
|
"grad_norm": 62.757464156888936, |
|
"learning_rate": 8.325269514390834e-07, |
|
"logits/chosen": 0.013430899009108543, |
|
"logits/rejected": -0.037021975964307785, |
|
"logps/chosen": -111.84146881103516, |
|
"logps/rejected": -128.94786071777344, |
|
"loss": 1.4507, |
|
"nll_loss": 0.315662145614624, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.184146881103516, |
|
"rewards/margins": 1.7106406688690186, |
|
"rewards/rejected": -12.894787788391113, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3525641025641026, |
|
"grad_norm": 50.800038470428575, |
|
"learning_rate": 8.183668829955111e-07, |
|
"logits/chosen": -0.3039621412754059, |
|
"logits/rejected": -0.287003755569458, |
|
"logps/chosen": -139.82981872558594, |
|
"logps/rejected": -146.7205352783203, |
|
"loss": 1.37, |
|
"nll_loss": 0.3524821996688843, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -13.982983589172363, |
|
"rewards/margins": 0.6890703439712524, |
|
"rewards/rejected": -14.672053337097168, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.36324786324786323, |
|
"grad_norm": 59.841427525159546, |
|
"learning_rate": 8.037636621935684e-07, |
|
"logits/chosen": -0.31735068559646606, |
|
"logits/rejected": -0.23542420566082, |
|
"logps/chosen": -103.27725982666016, |
|
"logps/rejected": -106.25889587402344, |
|
"loss": 1.3626, |
|
"nll_loss": 0.3173479437828064, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -10.327725410461426, |
|
"rewards/margins": 0.2981634736061096, |
|
"rewards/rejected": -10.625889778137207, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.37393162393162394, |
|
"grad_norm": 52.9514862200353, |
|
"learning_rate": 7.887376160587213e-07, |
|
"logits/chosen": -0.20337620377540588, |
|
"logits/rejected": -0.17184031009674072, |
|
"logps/chosen": -116.390869140625, |
|
"logps/rejected": -118.56620788574219, |
|
"loss": 1.3521, |
|
"nll_loss": 0.3265441358089447, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.639086723327637, |
|
"rewards/margins": 0.21753445267677307, |
|
"rewards/rejected": -11.856620788574219, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.38461538461538464, |
|
"grad_norm": 50.63958304716666, |
|
"learning_rate": 7.733096601702507e-07, |
|
"logits/chosen": -0.012370765209197998, |
|
"logits/rejected": 0.05527879670262337, |
|
"logps/chosen": -113.68470764160156, |
|
"logps/rejected": -110.1121597290039, |
|
"loss": 1.3928, |
|
"nll_loss": 0.3376830518245697, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -11.368470191955566, |
|
"rewards/margins": -0.3572540283203125, |
|
"rewards/rejected": -11.01121711730957, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3952991452991453, |
|
"grad_norm": 53.743159898285576, |
|
"learning_rate": 7.575012695477076e-07, |
|
"logits/chosen": 0.06333889812231064, |
|
"logits/rejected": 0.09429727494716644, |
|
"logps/chosen": -110.7640609741211, |
|
"logps/rejected": -115.84847259521484, |
|
"loss": 1.5223, |
|
"nll_loss": 0.3274967670440674, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -11.076406478881836, |
|
"rewards/margins": 0.5084413290023804, |
|
"rewards/rejected": -11.584847450256348, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.405982905982906, |
|
"grad_norm": 56.40313393417172, |
|
"learning_rate": 7.413344487586542e-07, |
|
"logits/chosen": -0.08225846290588379, |
|
"logits/rejected": -0.055325210094451904, |
|
"logps/chosen": -116.2511978149414, |
|
"logps/rejected": -132.88140869140625, |
|
"loss": 1.4658, |
|
"nll_loss": 0.3596312403678894, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -11.62511920928955, |
|
"rewards/margins": 1.6630210876464844, |
|
"rewards/rejected": -13.288141250610352, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 54.645494345995765, |
|
"learning_rate": 7.248317012892968e-07, |
|
"logits/chosen": -0.1261519491672516, |
|
"logits/rejected": -0.16661445796489716, |
|
"logps/chosen": -125.1494140625, |
|
"logps/rejected": -137.20791625976562, |
|
"loss": 1.4354, |
|
"nll_loss": 0.3696037232875824, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -12.514939308166504, |
|
"rewards/margins": 1.2058517932891846, |
|
"rewards/rejected": -13.720791816711426, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42735042735042733, |
|
"grad_norm": 61.519785155196736, |
|
"learning_rate": 7.08015998220647e-07, |
|
"logits/chosen": -0.13151851296424866, |
|
"logits/rejected": -0.08859863132238388, |
|
"logps/chosen": -165.24661254882812, |
|
"logps/rejected": -170.71624755859375, |
|
"loss": 1.4629, |
|
"nll_loss": 0.34175121784210205, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -16.524662017822266, |
|
"rewards/margins": 0.5469658374786377, |
|
"rewards/rejected": -17.07162857055664, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43803418803418803, |
|
"grad_norm": 55.57722114588639, |
|
"learning_rate": 6.909107462538111e-07, |
|
"logits/chosen": -0.19121451675891876, |
|
"logits/rejected": -0.20065097510814667, |
|
"logps/chosen": -142.35458374023438, |
|
"logps/rejected": -150.1237030029297, |
|
"loss": 1.4106, |
|
"nll_loss": 0.34670525789260864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -14.235458374023438, |
|
"rewards/margins": 0.7769120335578918, |
|
"rewards/rejected": -15.012370109558105, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44871794871794873, |
|
"grad_norm": 69.86075329234693, |
|
"learning_rate": 6.735397551289178e-07, |
|
"logits/chosen": -0.1635718047618866, |
|
"logits/rejected": -0.1015796884894371, |
|
"logps/chosen": -132.5345458984375, |
|
"logps/rejected": -135.8687286376953, |
|
"loss": 1.4962, |
|
"nll_loss": 0.32979699969291687, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -13.253456115722656, |
|
"rewards/margins": 0.33341652154922485, |
|
"rewards/rejected": -13.586873054504395, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4594017094017094, |
|
"grad_norm": 58.3273614395911, |
|
"learning_rate": 6.559272044830316e-07, |
|
"logits/chosen": -0.07360713183879852, |
|
"logits/rejected": -6.962567567825317e-05, |
|
"logps/chosen": -122.03056335449219, |
|
"logps/rejected": -129.28549194335938, |
|
"loss": 1.3599, |
|
"nll_loss": 0.3558313846588135, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -12.203059196472168, |
|
"rewards/margins": 0.7254913449287415, |
|
"rewards/rejected": -12.928548812866211, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.4700854700854701, |
|
"grad_norm": 50.10334120589398, |
|
"learning_rate": 6.380976101931879e-07, |
|
"logits/chosen": 0.01870536431670189, |
|
"logits/rejected": 0.0029756189323961735, |
|
"logps/chosen": -107.1243896484375, |
|
"logps/rejected": -112.00141906738281, |
|
"loss": 1.3138, |
|
"nll_loss": 0.3773984909057617, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.712437629699707, |
|
"rewards/margins": 0.48770326375961304, |
|
"rewards/rejected": -11.200141906738281, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4807692307692308, |
|
"grad_norm": 57.01257456037248, |
|
"learning_rate": 6.200757902513962e-07, |
|
"logits/chosen": -0.0710291862487793, |
|
"logits/rejected": -0.1122066006064415, |
|
"logps/chosen": -132.7135467529297, |
|
"logps/rejected": -147.7503204345703, |
|
"loss": 1.3819, |
|
"nll_loss": 0.3411773443222046, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -13.271354675292969, |
|
"rewards/margins": 1.5036779642105103, |
|
"rewards/rejected": -14.775032043457031, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.49145299145299143, |
|
"grad_norm": 56.10732954543998, |
|
"learning_rate": 6.018868302191139e-07, |
|
"logits/chosen": -0.02616579458117485, |
|
"logits/rejected": -0.08220602571964264, |
|
"logps/chosen": -109.6393051147461, |
|
"logps/rejected": -119.71272277832031, |
|
"loss": 1.4894, |
|
"nll_loss": 0.32466286420822144, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -10.963930130004883, |
|
"rewards/margins": 1.0073410272598267, |
|
"rewards/rejected": -11.971270561218262, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5021367521367521, |
|
"grad_norm": 57.150169644968244, |
|
"learning_rate": 5.835560483092742e-07, |
|
"logits/chosen": -0.08456510305404663, |
|
"logits/rejected": -0.052301835268735886, |
|
"logps/chosen": -110.45853424072266, |
|
"logps/rejected": -109.49703216552734, |
|
"loss": 1.5332, |
|
"nll_loss": 0.35862648487091064, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -11.045854568481445, |
|
"rewards/margins": -0.09615027904510498, |
|
"rewards/rejected": -10.949705123901367, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 56.92818984806646, |
|
"learning_rate": 5.651089601444752e-07, |
|
"logits/chosen": -0.25266486406326294, |
|
"logits/rejected": -0.27402007579803467, |
|
"logps/chosen": -163.11080932617188, |
|
"logps/rejected": -170.6371612548828, |
|
"loss": 1.3947, |
|
"nll_loss": 0.3526006042957306, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.311084747314453, |
|
"rewards/margins": 0.7526326775550842, |
|
"rewards/rejected": -17.063716888427734, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5235042735042735, |
|
"grad_norm": 55.824252362448014, |
|
"learning_rate": 5.465712432403811e-07, |
|
"logits/chosen": -0.1512390673160553, |
|
"logits/rejected": -0.15393702685832977, |
|
"logps/chosen": -140.1497039794922, |
|
"logps/rejected": -155.41702270507812, |
|
"loss": 1.2664, |
|
"nll_loss": 0.34822720289230347, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.014970779418945, |
|
"rewards/margins": 1.5267311334609985, |
|
"rewards/rejected": -15.541702270507812, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5341880341880342, |
|
"grad_norm": 61.259217624033624, |
|
"learning_rate": 5.279687012637798e-07, |
|
"logits/chosen": 0.047980885952711105, |
|
"logits/rejected": 0.0825057402253151, |
|
"logps/chosen": -131.10092163085938, |
|
"logps/rejected": -141.0630645751953, |
|
"loss": 1.3979, |
|
"nll_loss": 0.3648400902748108, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -13.110092163085938, |
|
"rewards/margins": 0.9962142109870911, |
|
"rewards/rejected": -14.106305122375488, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5448717948717948, |
|
"grad_norm": 54.05289475380886, |
|
"learning_rate": 5.093272281150382e-07, |
|
"logits/chosen": -0.019353587180376053, |
|
"logits/rejected": 0.09568696469068527, |
|
"logps/chosen": -133.12542724609375, |
|
"logps/rejected": -135.47377014160156, |
|
"loss": 1.4605, |
|
"nll_loss": 0.3375224173069, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -13.312542915344238, |
|
"rewards/margins": 0.2348347008228302, |
|
"rewards/rejected": -13.54737663269043, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5555555555555556, |
|
"grad_norm": 55.813930730207794, |
|
"learning_rate": 4.906727718849618e-07, |
|
"logits/chosen": 0.031209534034132957, |
|
"logits/rejected": 0.0828268900513649, |
|
"logps/chosen": -109.07014465332031, |
|
"logps/rejected": -123.6259994506836, |
|
"loss": 1.3422, |
|
"nll_loss": 0.2994317412376404, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -10.907014846801758, |
|
"rewards/margins": 1.4555847644805908, |
|
"rewards/rejected": -12.362600326538086, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5662393162393162, |
|
"grad_norm": 55.164171360826785, |
|
"learning_rate": 4.7203129873622036e-07, |
|
"logits/chosen": -0.12409428507089615, |
|
"logits/rejected": -0.08072350919246674, |
|
"logps/chosen": -142.841552734375, |
|
"logps/rejected": -144.4403533935547, |
|
"loss": 1.3871, |
|
"nll_loss": 0.36165937781333923, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.284154891967773, |
|
"rewards/margins": 0.15988163650035858, |
|
"rewards/rejected": -14.444036483764648, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5769230769230769, |
|
"grad_norm": 61.02385741830308, |
|
"learning_rate": 4.534287567596188e-07, |
|
"logits/chosen": -0.16588857769966125, |
|
"logits/rejected": -0.10881330817937851, |
|
"logps/chosen": -142.35317993164062, |
|
"logps/rejected": -145.8653106689453, |
|
"loss": 1.4096, |
|
"nll_loss": 0.3703126013278961, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -14.235316276550293, |
|
"rewards/margins": 0.35121291875839233, |
|
"rewards/rejected": -14.586529731750488, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5876068376068376, |
|
"grad_norm": 58.004085751417605, |
|
"learning_rate": 4.348910398555249e-07, |
|
"logits/chosen": 0.025946801528334618, |
|
"logits/rejected": 0.05849064514040947, |
|
"logps/chosen": -112.86296081542969, |
|
"logps/rejected": -121.48736572265625, |
|
"loss": 1.2885, |
|
"nll_loss": 0.3448982238769531, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -11.286294937133789, |
|
"rewards/margins": 0.8624418377876282, |
|
"rewards/rejected": -12.148736953735352, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5982905982905983, |
|
"grad_norm": 60.093543566265524, |
|
"learning_rate": 4.1644395169072575e-07, |
|
"logits/chosen": -0.1264003962278366, |
|
"logits/rejected": -0.11314131319522858, |
|
"logps/chosen": -158.03598022460938, |
|
"logps/rejected": -167.41009521484375, |
|
"loss": 1.3685, |
|
"nll_loss": 0.3446425497531891, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -15.803598403930664, |
|
"rewards/margins": 0.9374116063117981, |
|
"rewards/rejected": -16.741008758544922, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6089743589743589, |
|
"grad_norm": 63.57104854646875, |
|
"learning_rate": 3.9811316978088615e-07, |
|
"logits/chosen": -0.08768756687641144, |
|
"logits/rejected": -0.06864931434392929, |
|
"logps/chosen": -114.5308837890625, |
|
"logps/rejected": -113.0389633178711, |
|
"loss": 1.3041, |
|
"nll_loss": 0.3934231698513031, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -11.453088760375977, |
|
"rewards/margins": -0.14919374883174896, |
|
"rewards/rejected": -11.303895950317383, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6196581196581197, |
|
"grad_norm": 46.88181022689408, |
|
"learning_rate": 3.799242097486038e-07, |
|
"logits/chosen": -0.06880898773670197, |
|
"logits/rejected": -0.0963631272315979, |
|
"logps/chosen": -118.00128173828125, |
|
"logps/rejected": -126.088134765625, |
|
"loss": 1.3619, |
|
"nll_loss": 0.3445819616317749, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -11.800127983093262, |
|
"rewards/margins": 0.8086857795715332, |
|
"rewards/rejected": -12.60881519317627, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6303418803418803, |
|
"grad_norm": 52.868293692367715, |
|
"learning_rate": 3.619023898068123e-07, |
|
"logits/chosen": 0.04794033616781235, |
|
"logits/rejected": 0.02935163304209709, |
|
"logps/chosen": -108.1368408203125, |
|
"logps/rejected": -113.2763442993164, |
|
"loss": 1.3385, |
|
"nll_loss": 0.38119053840637207, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -10.813684463500977, |
|
"rewards/margins": 0.5139498710632324, |
|
"rewards/rejected": -11.327634811401367, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6410256410256411, |
|
"grad_norm": 51.20036019131344, |
|
"learning_rate": 3.4407279551696846e-07, |
|
"logits/chosen": 0.03451260179281235, |
|
"logits/rejected": 0.060915928333997726, |
|
"logps/chosen": -117.63232421875, |
|
"logps/rejected": -124.12806701660156, |
|
"loss": 1.3697, |
|
"nll_loss": 0.3462775647640228, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.763232231140137, |
|
"rewards/margins": 0.6495749354362488, |
|
"rewards/rejected": -12.412806510925293, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6517094017094017, |
|
"grad_norm": 58.582600175811734, |
|
"learning_rate": 3.2646024487108213e-07, |
|
"logits/chosen": -0.004241435322910547, |
|
"logits/rejected": -0.03947947174310684, |
|
"logps/chosen": -144.97406005859375, |
|
"logps/rejected": -152.7334442138672, |
|
"loss": 1.4605, |
|
"nll_loss": 0.34281834959983826, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.497406005859375, |
|
"rewards/margins": 0.7759408354759216, |
|
"rewards/rejected": -15.273347854614258, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6623931623931624, |
|
"grad_norm": 57.95314258124262, |
|
"learning_rate": 3.0908925374618887e-07, |
|
"logits/chosen": -0.12898002564907074, |
|
"logits/rejected": -0.06738940626382828, |
|
"logps/chosen": -147.439697265625, |
|
"logps/rejected": -149.19259643554688, |
|
"loss": 1.3813, |
|
"nll_loss": 0.3252050578594208, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.74397087097168, |
|
"rewards/margins": 0.1752903163433075, |
|
"rewards/rejected": -14.91926097869873, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6730769230769231, |
|
"grad_norm": 62.09918976711039, |
|
"learning_rate": 2.91984001779353e-07, |
|
"logits/chosen": -0.05374965816736221, |
|
"logits/rejected": 0.0732099637389183, |
|
"logps/chosen": -148.73020935058594, |
|
"logps/rejected": -150.61476135253906, |
|
"loss": 1.3364, |
|
"nll_loss": 0.31267058849334717, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -14.873019218444824, |
|
"rewards/margins": 0.18845567107200623, |
|
"rewards/rejected": -15.06147575378418, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6837606837606838, |
|
"grad_norm": 50.667972535787875, |
|
"learning_rate": 2.751682987107029e-07, |
|
"logits/chosen": 0.08584196865558624, |
|
"logits/rejected": 0.1222977414727211, |
|
"logps/chosen": -110.92390441894531, |
|
"logps/rejected": -119.34083557128906, |
|
"loss": 1.306, |
|
"nll_loss": 0.3358796238899231, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.092391967773438, |
|
"rewards/margins": 0.841692328453064, |
|
"rewards/rejected": -11.934083938598633, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6944444444444444, |
|
"grad_norm": 55.51527877131126, |
|
"learning_rate": 2.5866555124134577e-07, |
|
"logits/chosen": 0.03731069713830948, |
|
"logits/rejected": 0.012952113524079323, |
|
"logps/chosen": -145.85842895507812, |
|
"logps/rejected": -154.74974060058594, |
|
"loss": 1.3157, |
|
"nll_loss": 0.31585749983787537, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.585843086242676, |
|
"rewards/margins": 0.8891321420669556, |
|
"rewards/rejected": -15.474973678588867, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7051282051282052, |
|
"grad_norm": 52.22477831416299, |
|
"learning_rate": 2.424987304522924e-07, |
|
"logits/chosen": 0.07864940166473389, |
|
"logits/rejected": 0.1205131784081459, |
|
"logps/chosen": -111.40937805175781, |
|
"logps/rejected": -114.6185073852539, |
|
"loss": 1.3989, |
|
"nll_loss": 0.3170923590660095, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -11.140935897827148, |
|
"rewards/margins": 0.3209128677845001, |
|
"rewards/rejected": -11.4618501663208, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7158119658119658, |
|
"grad_norm": 59.70611600791039, |
|
"learning_rate": 2.2669033982974944e-07, |
|
"logits/chosen": -0.060904957354068756, |
|
"logits/rejected": 0.005220590624958277, |
|
"logps/chosen": -142.15638732910156, |
|
"logps/rejected": -146.87728881835938, |
|
"loss": 1.3442, |
|
"nll_loss": 0.3452945053577423, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -14.2156400680542, |
|
"rewards/margins": 0.472089946269989, |
|
"rewards/rejected": -14.687728881835938, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7264957264957265, |
|
"grad_norm": 71.85103597981242, |
|
"learning_rate": 2.1126238394127867e-07, |
|
"logits/chosen": -0.016889113932847977, |
|
"logits/rejected": -0.0013866141671314836, |
|
"logps/chosen": -133.07833862304688, |
|
"logps/rejected": -141.3354949951172, |
|
"loss": 1.3755, |
|
"nll_loss": 0.294593870639801, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -13.307835578918457, |
|
"rewards/margins": 0.8257135152816772, |
|
"rewards/rejected": -14.133550643920898, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7371794871794872, |
|
"grad_norm": 55.16644312498042, |
|
"learning_rate": 1.9623633780643155e-07, |
|
"logits/chosen": 0.07130751758813858, |
|
"logits/rejected": 0.010698718018829823, |
|
"logps/chosen": -111.66865539550781, |
|
"logps/rejected": -113.45048522949219, |
|
"loss": 1.3458, |
|
"nll_loss": 0.42604750394821167, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -11.166866302490234, |
|
"rewards/margins": 0.1781845986843109, |
|
"rewards/rejected": -11.345048904418945, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7478632478632479, |
|
"grad_norm": 56.371622200044676, |
|
"learning_rate": 1.8163311700448898e-07, |
|
"logits/chosen": -0.0578581877052784, |
|
"logits/rejected": -0.026563648134469986, |
|
"logps/chosen": -113.5242919921875, |
|
"logps/rejected": -123.50186920166016, |
|
"loss": 1.3004, |
|
"nll_loss": 0.35681912302970886, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -11.352429389953613, |
|
"rewards/margins": 0.9977572560310364, |
|
"rewards/rejected": -12.350186347961426, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7585470085470085, |
|
"grad_norm": 62.52480406001481, |
|
"learning_rate": 1.674730485609166e-07, |
|
"logits/chosen": -0.012843991629779339, |
|
"logits/rejected": -0.071006640791893, |
|
"logps/chosen": -118.711181640625, |
|
"logps/rejected": -130.95870971679688, |
|
"loss": 1.2971, |
|
"nll_loss": 0.3193722665309906, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -11.871118545532227, |
|
"rewards/margins": 1.22475266456604, |
|
"rewards/rejected": -13.095870971679688, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 55.56720356363359, |
|
"learning_rate": 1.537758426530622e-07, |
|
"logits/chosen": -0.01560185570269823, |
|
"logits/rejected": 0.07330085337162018, |
|
"logps/chosen": -118.5744400024414, |
|
"logps/rejected": -115.8942642211914, |
|
"loss": 1.4566, |
|
"nll_loss": 0.38159191608428955, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -11.857443809509277, |
|
"rewards/margins": -0.26801711320877075, |
|
"rewards/rejected": -11.589426040649414, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7799145299145299, |
|
"grad_norm": 53.63595812481311, |
|
"learning_rate": 1.4056056517447634e-07, |
|
"logits/chosen": 0.0017164063174277544, |
|
"logits/rejected": 0.017641058191657066, |
|
"logps/chosen": -107.86031341552734, |
|
"logps/rejected": -113.5797119140625, |
|
"loss": 1.3115, |
|
"nll_loss": 0.328066885471344, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -10.786030769348145, |
|
"rewards/margins": 0.5719406008720398, |
|
"rewards/rejected": -11.357972145080566, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7905982905982906, |
|
"grad_norm": 46.826432951695814, |
|
"learning_rate": 1.2784561119604682e-07, |
|
"logits/chosen": -0.038930945098400116, |
|
"logits/rejected": -0.10993669927120209, |
|
"logps/chosen": -128.37625122070312, |
|
"logps/rejected": -142.15855407714844, |
|
"loss": 1.3305, |
|
"nll_loss": 0.38407421112060547, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -12.837625503540039, |
|
"rewards/margins": 1.3782320022583008, |
|
"rewards/rejected": -14.215856552124023, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8012820512820513, |
|
"grad_norm": 54.285824433116986, |
|
"learning_rate": 1.156486793608899e-07, |
|
"logits/chosen": -0.0009039134019985795, |
|
"logits/rejected": -0.0710187703371048, |
|
"logps/chosen": -118.76985168457031, |
|
"logps/rejected": -133.30014038085938, |
|
"loss": 1.2896, |
|
"nll_loss": 0.3520449101924896, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -11.876985549926758, |
|
"rewards/margins": 1.4530264139175415, |
|
"rewards/rejected": -13.330012321472168, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.811965811965812, |
|
"grad_norm": 52.729935383003465, |
|
"learning_rate": 1.0398674724863581e-07, |
|
"logits/chosen": -0.009277289733290672, |
|
"logits/rejected": 0.07069944590330124, |
|
"logps/chosen": -118.21263122558594, |
|
"logps/rejected": -118.44798278808594, |
|
"loss": 1.3282, |
|
"nll_loss": 0.3493942320346832, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -11.821264266967773, |
|
"rewards/margins": 0.02353498339653015, |
|
"rewards/rejected": -11.844799041748047, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8226495726495726, |
|
"grad_norm": 64.0051849097744, |
|
"learning_rate": 9.287604774340235e-08, |
|
"logits/chosen": 0.08045725524425507, |
|
"logits/rejected": 0.1136372834444046, |
|
"logps/chosen": -118.67901611328125, |
|
"logps/rejected": -126.129638671875, |
|
"loss": 1.2627, |
|
"nll_loss": 0.3460482656955719, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -11.867900848388672, |
|
"rewards/margins": 0.745063304901123, |
|
"rewards/rejected": -12.612964630126953, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 57.70135234770639, |
|
"learning_rate": 8.233204643835234e-08, |
|
"logits/chosen": -0.02369830384850502, |
|
"logits/rejected": -0.13596853613853455, |
|
"logps/chosen": -144.19119262695312, |
|
"logps/rejected": -155.91761779785156, |
|
"loss": 1.2748, |
|
"nll_loss": 0.3630937933921814, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -14.419118881225586, |
|
"rewards/margins": 1.1726421117782593, |
|
"rewards/rejected": -15.591761589050293, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.844017094017094, |
|
"grad_norm": 58.46522179629448, |
|
"learning_rate": 7.236942010828429e-08, |
|
"logits/chosen": -0.08849911391735077, |
|
"logits/rejected": -0.05617945268750191, |
|
"logps/chosen": -123.7318344116211, |
|
"logps/rejected": -124.00958251953125, |
|
"loss": 1.3258, |
|
"nll_loss": 0.3757060170173645, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -12.373184204101562, |
|
"rewards/margins": 0.027775108814239502, |
|
"rewards/rejected": -12.400957107543945, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"grad_norm": 57.911987107984345, |
|
"learning_rate": 6.300203628022271e-08, |
|
"logits/chosen": -0.08188799023628235, |
|
"logits/rejected": -0.1166069284081459, |
|
"logps/chosen": -125.78984069824219, |
|
"logps/rejected": -133.1624755859375, |
|
"loss": 1.2968, |
|
"nll_loss": 0.37085098028182983, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -12.578984260559082, |
|
"rewards/margins": 0.7372626066207886, |
|
"rewards/rejected": -13.316246032714844, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8547008547008547, |
|
"eval_logits/chosen": 0.1708301454782486, |
|
"eval_logits/rejected": 0.21349689364433289, |
|
"eval_logps/chosen": -131.8848114013672, |
|
"eval_logps/rejected": -143.98866271972656, |
|
"eval_loss": 1.3273688554763794, |
|
"eval_nll_loss": 0.3287227749824524, |
|
"eval_rewards/accuracies": 0.7016128897666931, |
|
"eval_rewards/chosen": -13.188480377197266, |
|
"eval_rewards/margins": 1.2103854417800903, |
|
"eval_rewards/rejected": -14.398866653442383, |
|
"eval_runtime": 102.9227, |
|
"eval_samples_per_second": 19.053, |
|
"eval_steps_per_second": 0.301, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8653846153846154, |
|
"grad_norm": 52.22311027332078, |
|
"learning_rate": 5.42429339304461e-08, |
|
"logits/chosen": 0.03789149597287178, |
|
"logits/rejected": -0.024609360843896866, |
|
"logps/chosen": -125.41561126708984, |
|
"logps/rejected": -138.07968139648438, |
|
"loss": 1.1837, |
|
"nll_loss": 0.32930082082748413, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -12.541561126708984, |
|
"rewards/margins": 1.2664083242416382, |
|
"rewards/rejected": -13.807971000671387, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8760683760683761, |
|
"grad_norm": 59.39997515872638, |
|
"learning_rate": 4.610430533481857e-08, |
|
"logits/chosen": -0.09911607950925827, |
|
"logits/rejected": -0.08993721008300781, |
|
"logps/chosen": -114.1660385131836, |
|
"logps/rejected": -119.9376449584961, |
|
"loss": 1.3091, |
|
"nll_loss": 0.3979756832122803, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.416604042053223, |
|
"rewards/margins": 0.5771591067314148, |
|
"rewards/rejected": -11.993764877319336, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8867521367521367, |
|
"grad_norm": 60.29868077187127, |
|
"learning_rate": 3.859747909769162e-08, |
|
"logits/chosen": -0.0670127421617508, |
|
"logits/rejected": -0.014563268050551414, |
|
"logps/chosen": -157.24630737304688, |
|
"logps/rejected": -162.0565948486328, |
|
"loss": 1.413, |
|
"nll_loss": 0.33768731355667114, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.724632263183594, |
|
"rewards/margins": 0.48102617263793945, |
|
"rewards/rejected": -16.205659866333008, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8974358974358975, |
|
"grad_norm": 60.41014318466645, |
|
"learning_rate": 3.173290438299697e-08, |
|
"logits/chosen": 0.08699943125247955, |
|
"logits/rejected": 0.08032406866550446, |
|
"logps/chosen": -125.44620513916016, |
|
"logps/rejected": -134.0313720703125, |
|
"loss": 1.3516, |
|
"nll_loss": 0.35607296228408813, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -12.5446195602417, |
|
"rewards/margins": 0.8585165739059448, |
|
"rewards/rejected": -13.40313720703125, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9081196581196581, |
|
"grad_norm": 55.3705487410181, |
|
"learning_rate": 2.5520136369481194e-08, |
|
"logits/chosen": -0.04224336892366409, |
|
"logits/rejected": -0.06296161562204361, |
|
"logps/chosen": -165.10733032226562, |
|
"logps/rejected": -172.8993682861328, |
|
"loss": 1.2141, |
|
"nll_loss": 0.3679961562156677, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -16.510732650756836, |
|
"rewards/margins": 0.7792031168937683, |
|
"rewards/rejected": -17.289936065673828, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9188034188034188, |
|
"grad_norm": 54.089208241870814, |
|
"learning_rate": 1.996782295032745e-08, |
|
"logits/chosen": -0.14895446598529816, |
|
"logits/rejected": -0.13183912634849548, |
|
"logps/chosen": -142.65121459960938, |
|
"logps/rejected": -148.99490356445312, |
|
"loss": 1.2328, |
|
"nll_loss": 0.31830939650535583, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -14.265121459960938, |
|
"rewards/margins": 0.6343703269958496, |
|
"rewards/rejected": -14.899490356445312, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9294871794871795, |
|
"grad_norm": 55.028012487261535, |
|
"learning_rate": 1.508369269567783e-08, |
|
"logits/chosen": 0.002587652299553156, |
|
"logits/rejected": -0.038656361401081085, |
|
"logps/chosen": -132.2308349609375, |
|
"logps/rejected": -143.6236114501953, |
|
"loss": 1.3378, |
|
"nll_loss": 0.35204577445983887, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -13.22308349609375, |
|
"rewards/margins": 1.1392773389816284, |
|
"rewards/rejected": -14.362360954284668, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9401709401709402, |
|
"grad_norm": 65.09993319908288, |
|
"learning_rate": 1.0874544094811422e-08, |
|
"logits/chosen": 0.02723981812596321, |
|
"logits/rejected": 0.026110615581274033, |
|
"logps/chosen": -121.16329193115234, |
|
"logps/rejected": -125.1588134765625, |
|
"loss": 1.3971, |
|
"nll_loss": 0.3366636037826538, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -12.116328239440918, |
|
"rewards/margins": 0.3995509743690491, |
|
"rewards/rejected": -12.515880584716797, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9508547008547008, |
|
"grad_norm": 58.673331587997495, |
|
"learning_rate": 7.346236092954316e-09, |
|
"logits/chosen": -0.07683765143156052, |
|
"logits/rejected": -0.08344952762126923, |
|
"logps/chosen": -159.8972625732422, |
|
"logps/rejected": -165.53884887695312, |
|
"loss": 1.3386, |
|
"nll_loss": 0.3503979444503784, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -15.989726066589355, |
|
"rewards/margins": 0.5641571879386902, |
|
"rewards/rejected": -16.553882598876953, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9615384615384616, |
|
"grad_norm": 54.28528572494338, |
|
"learning_rate": 4.50367993589107e-09, |
|
"logits/chosen": -0.16253122687339783, |
|
"logits/rejected": -0.07185138761997223, |
|
"logps/chosen": -142.61741638183594, |
|
"logps/rejected": -142.0267791748047, |
|
"loss": 1.3953, |
|
"nll_loss": 0.4227580428123474, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -14.261739730834961, |
|
"rewards/margins": -0.05906330421566963, |
|
"rewards/rejected": -14.202677726745605, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9722222222222222, |
|
"grad_norm": 57.751616603455034, |
|
"learning_rate": 2.3508323337321224e-09, |
|
"logits/chosen": 0.05629957839846611, |
|
"logits/rejected": 0.04388625547289848, |
|
"logps/chosen": -109.04362487792969, |
|
"logps/rejected": -116.85518646240234, |
|
"loss": 1.3543, |
|
"nll_loss": 0.4259931445121765, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -10.904363632202148, |
|
"rewards/margins": 0.7811557650566101, |
|
"rewards/rejected": -11.685519218444824, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9829059829059829, |
|
"grad_norm": 53.54066392797414, |
|
"learning_rate": 8.906899533517864e-10, |
|
"logits/chosen": 0.12563523650169373, |
|
"logits/rejected": 0.1722358763217926, |
|
"logps/chosen": -120.39036560058594, |
|
"logps/rejected": -125.317138671875, |
|
"loss": 1.31, |
|
"nll_loss": 0.3701631426811218, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -12.039037704467773, |
|
"rewards/margins": 0.49267855286598206, |
|
"rewards/rejected": -12.531715393066406, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9935897435897436, |
|
"grad_norm": 54.485812172434166, |
|
"learning_rate": 1.252852471625987e-10, |
|
"logits/chosen": 0.047066349536180496, |
|
"logits/rejected": 0.08690531551837921, |
|
"logps/chosen": -110.74609375, |
|
"logps/rejected": -122.6104965209961, |
|
"loss": 1.1667, |
|
"nll_loss": 0.3447542190551758, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -11.074609756469727, |
|
"rewards/margins": 1.1864404678344727, |
|
"rewards/rejected": -12.261051177978516, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 468, |
|
"total_flos": 0.0, |
|
"train_loss": 1.4382328207676227, |
|
"train_runtime": 9608.0854, |
|
"train_samples_per_second": 6.232, |
|
"train_steps_per_second": 0.049 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 468, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|