|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 2865.851318359375, |
|
"learning_rate": 7.8125e-07, |
|
"log_odds_chosen": 5.047066688537598, |
|
"log_odds_ratio": -9.840336799621582, |
|
"logits/chosen": 138.34872436523438, |
|
"logits/rejected": 152.904296875, |
|
"logps/chosen": -20.9003849029541, |
|
"logps/rejected": -25.947729110717773, |
|
"loss": 459.6987, |
|
"nll_loss": 8.465580940246582, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -10.45019245147705, |
|
"rewards/margins": 2.523672580718994, |
|
"rewards/rejected": -12.973864555358887, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 2037.79638671875, |
|
"learning_rate": 1.5625e-06, |
|
"log_odds_chosen": 2.4296860694885254, |
|
"log_odds_ratio": -7.415003299713135, |
|
"logits/chosen": 131.40785217285156, |
|
"logits/rejected": 155.49392700195312, |
|
"logps/chosen": -16.34769058227539, |
|
"logps/rejected": -18.776687622070312, |
|
"loss": 405.2566, |
|
"nll_loss": 7.926710605621338, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -8.173845291137695, |
|
"rewards/margins": 1.21449875831604, |
|
"rewards/rejected": -9.388343811035156, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 716.259521484375, |
|
"learning_rate": 2.3437500000000002e-06, |
|
"log_odds_chosen": 4.6704511642456055, |
|
"log_odds_ratio": -8.288759231567383, |
|
"logits/chosen": 103.96134948730469, |
|
"logits/rejected": 130.3046417236328, |
|
"logps/chosen": -21.693960189819336, |
|
"logps/rejected": -26.36324691772461, |
|
"loss": 396.9101, |
|
"nll_loss": 9.263435363769531, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -10.846980094909668, |
|
"rewards/margins": 2.334644079208374, |
|
"rewards/rejected": -13.181623458862305, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 773.0562133789062, |
|
"learning_rate": 3.125e-06, |
|
"log_odds_chosen": -0.22766943275928497, |
|
"log_odds_ratio": -8.08221435546875, |
|
"logits/chosen": 127.30647277832031, |
|
"logits/rejected": 133.12646484375, |
|
"logps/chosen": -18.663068771362305, |
|
"logps/rejected": -18.436843872070312, |
|
"loss": 433.5117, |
|
"nll_loss": 8.696657180786133, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -9.331534385681152, |
|
"rewards/margins": -0.11311338096857071, |
|
"rewards/rejected": -9.218421936035156, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 461.5646667480469, |
|
"learning_rate": 3.90625e-06, |
|
"log_odds_chosen": 4.552544593811035, |
|
"log_odds_ratio": -8.04813003540039, |
|
"logits/chosen": 129.98939514160156, |
|
"logits/rejected": 138.5162811279297, |
|
"logps/chosen": -19.01546287536621, |
|
"logps/rejected": -23.568456649780273, |
|
"loss": 361.2036, |
|
"nll_loss": 7.481656074523926, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -9.507731437683105, |
|
"rewards/margins": 2.276496648788452, |
|
"rewards/rejected": -11.784228324890137, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 364.19580078125, |
|
"learning_rate": 4.6875000000000004e-06, |
|
"log_odds_chosen": 1.745486855506897, |
|
"log_odds_ratio": -7.753143310546875, |
|
"logits/chosen": 116.81257629394531, |
|
"logits/rejected": 154.61524963378906, |
|
"logps/chosen": -19.596527099609375, |
|
"logps/rejected": -21.339160919189453, |
|
"loss": 394.3339, |
|
"nll_loss": 8.240059852600098, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -9.798263549804688, |
|
"rewards/margins": 0.8713172674179077, |
|
"rewards/rejected": -10.669580459594727, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 526.892333984375, |
|
"learning_rate": 4.998613757348784e-06, |
|
"log_odds_chosen": -2.7288818359375, |
|
"log_odds_ratio": -8.869112968444824, |
|
"logits/chosen": 149.9911651611328, |
|
"logits/rejected": 133.53860473632812, |
|
"logps/chosen": -19.254487991333008, |
|
"logps/rejected": -16.524951934814453, |
|
"loss": 371.9777, |
|
"nll_loss": 7.2948455810546875, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -9.627243995666504, |
|
"rewards/margins": -1.3647680282592773, |
|
"rewards/rejected": -8.262475967407227, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 638.0831909179688, |
|
"learning_rate": 4.990147841143462e-06, |
|
"log_odds_chosen": 2.0575106143951416, |
|
"log_odds_ratio": -4.877079963684082, |
|
"logits/chosen": 153.21884155273438, |
|
"logits/rejected": 167.37942504882812, |
|
"logps/chosen": -14.817087173461914, |
|
"logps/rejected": -16.873952865600586, |
|
"loss": 333.9902, |
|
"nll_loss": 7.247918128967285, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": -7.408543586730957, |
|
"rewards/margins": 1.0284324884414673, |
|
"rewards/rejected": -8.436976432800293, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 895.48095703125, |
|
"learning_rate": 4.97401218720448e-06, |
|
"log_odds_chosen": 1.4577323198318481, |
|
"log_odds_ratio": -8.850030899047852, |
|
"logits/chosen": 176.9611053466797, |
|
"logits/rejected": 164.4369354248047, |
|
"logps/chosen": -18.82620620727539, |
|
"logps/rejected": -20.282642364501953, |
|
"loss": 335.1132, |
|
"nll_loss": 6.7939629554748535, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -9.413103103637695, |
|
"rewards/margins": 0.7282184362411499, |
|
"rewards/rejected": -10.141321182250977, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 1942.3775634765625, |
|
"learning_rate": 4.950256493879795e-06, |
|
"log_odds_chosen": 2.4757702350616455, |
|
"log_odds_ratio": -5.5582990646362305, |
|
"logits/chosen": 197.7135772705078, |
|
"logits/rejected": 197.5384521484375, |
|
"logps/chosen": -13.839834213256836, |
|
"logps/rejected": -16.314041137695312, |
|
"loss": 313.9627, |
|
"nll_loss": 6.590758323669434, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -6.919917106628418, |
|
"rewards/margins": 1.237102746963501, |
|
"rewards/rejected": -8.157020568847656, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 1690.2572021484375, |
|
"learning_rate": 4.918953929490768e-06, |
|
"log_odds_chosen": 0.49450284242630005, |
|
"log_odds_ratio": -4.888993740081787, |
|
"logits/chosen": 212.30068969726562, |
|
"logits/rejected": 219.568115234375, |
|
"logps/chosen": -10.383580207824707, |
|
"logps/rejected": -10.86988353729248, |
|
"loss": 223.3722, |
|
"nll_loss": 5.05554723739624, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -5.1917901039123535, |
|
"rewards/margins": 0.24315185844898224, |
|
"rewards/rejected": -5.43494176864624, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 983.438720703125, |
|
"learning_rate": 4.880200906971321e-06, |
|
"log_odds_chosen": 0.5941994786262512, |
|
"log_odds_ratio": -1.8958613872528076, |
|
"logits/chosen": 207.96792602539062, |
|
"logits/rejected": 225.57461547851562, |
|
"logps/chosen": -5.315521717071533, |
|
"logps/rejected": -5.895586967468262, |
|
"loss": 129.5271, |
|
"nll_loss": 3.1816208362579346, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -2.6577608585357666, |
|
"rewards/margins": 0.2900325059890747, |
|
"rewards/rejected": -2.947793483734131, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 435.369384765625, |
|
"learning_rate": 4.834116786912897e-06, |
|
"log_odds_chosen": -0.35661205649375916, |
|
"log_odds_ratio": -1.3505533933639526, |
|
"logits/chosen": 233.6535186767578, |
|
"logits/rejected": 232.8253936767578, |
|
"logps/chosen": -2.915100574493408, |
|
"logps/rejected": -2.5467069149017334, |
|
"loss": 93.9753, |
|
"nll_loss": 2.4082727432250977, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -1.457550287246704, |
|
"rewards/margins": -0.18419687449932098, |
|
"rewards/rejected": -1.2733534574508667, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 266.7743835449219, |
|
"learning_rate": 4.780843509929905e-06, |
|
"log_odds_chosen": 0.5245118141174316, |
|
"log_odds_ratio": -0.6836276650428772, |
|
"logits/chosen": 223.84109497070312, |
|
"logits/rejected": 249.83383178710938, |
|
"logps/chosen": -1.803598165512085, |
|
"logps/rejected": -2.3056464195251465, |
|
"loss": 80.6457, |
|
"nll_loss": 2.1085317134857178, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9017990827560425, |
|
"rewards/margins": 0.25102415680885315, |
|
"rewards/rejected": -1.1528232097625732, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 146.09365844726562, |
|
"learning_rate": 4.720545159477921e-06, |
|
"log_odds_chosen": 0.19110290706157684, |
|
"log_odds_ratio": -0.896527886390686, |
|
"logits/chosen": 229.1388702392578, |
|
"logits/rejected": 218.0093536376953, |
|
"logps/chosen": -1.8344182968139648, |
|
"logps/rejected": -1.9851324558258057, |
|
"loss": 76.4303, |
|
"nll_loss": 2.0232486724853516, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9172091484069824, |
|
"rewards/margins": 0.0753571167588234, |
|
"rewards/rejected": -0.9925662279129028, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 188.69973754882812, |
|
"learning_rate": 4.653407456471222e-06, |
|
"log_odds_chosen": 0.3417088985443115, |
|
"log_odds_ratio": -0.7933382391929626, |
|
"logits/chosen": 206.59188842773438, |
|
"logits/rejected": 223.7476348876953, |
|
"logps/chosen": -1.6943140029907227, |
|
"logps/rejected": -1.9918110370635986, |
|
"loss": 73.3448, |
|
"nll_loss": 1.9511661529541016, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8471570014953613, |
|
"rewards/margins": 0.1487484872341156, |
|
"rewards/rejected": -0.9959055185317993, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 137.05148315429688, |
|
"learning_rate": 4.579637187256222e-06, |
|
"log_odds_chosen": 0.39623409509658813, |
|
"log_odds_ratio": -0.7564057111740112, |
|
"logits/chosen": 211.7617950439453, |
|
"logits/rejected": 254.6263885498047, |
|
"logps/chosen": -1.5494643449783325, |
|
"logps/rejected": -1.891304612159729, |
|
"loss": 71.0347, |
|
"nll_loss": 1.7963597774505615, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7747321724891663, |
|
"rewards/margins": 0.17092011868953705, |
|
"rewards/rejected": -0.9456523060798645, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 158.83348083496094, |
|
"learning_rate": 4.499461566702685e-06, |
|
"log_odds_chosen": 0.28660133481025696, |
|
"log_odds_ratio": -0.7941745519638062, |
|
"logits/chosen": 222.36099243164062, |
|
"logits/rejected": 244.03665161132812, |
|
"logps/chosen": -1.6755253076553345, |
|
"logps/rejected": -1.9040807485580444, |
|
"loss": 73.0658, |
|
"nll_loss": 1.8526337146759033, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.8377626538276672, |
|
"rewards/margins": 0.11427769809961319, |
|
"rewards/rejected": -0.9520403742790222, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 170.69134521484375, |
|
"learning_rate": 4.413127538374411e-06, |
|
"log_odds_chosen": 0.4414753317832947, |
|
"log_odds_ratio": -0.6728295087814331, |
|
"logits/chosen": 227.6732940673828, |
|
"logits/rejected": 249.4246063232422, |
|
"logps/chosen": -1.4834215641021729, |
|
"logps/rejected": -1.850646734237671, |
|
"loss": 70.3617, |
|
"nll_loss": 1.7307571172714233, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7417107820510864, |
|
"rewards/margins": 0.18361257016658783, |
|
"rewards/rejected": -0.9253233671188354, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 195.4022216796875, |
|
"learning_rate": 4.320901013934887e-06, |
|
"log_odds_chosen": 0.26656374335289, |
|
"log_odds_ratio": -0.7893905639648438, |
|
"logits/chosen": 230.3835906982422, |
|
"logits/rejected": 250.1712188720703, |
|
"logps/chosen": -1.6341091394424438, |
|
"logps/rejected": -1.8852962255477905, |
|
"loss": 70.7829, |
|
"nll_loss": 1.838405966758728, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.8170545697212219, |
|
"rewards/margins": 0.12559358775615692, |
|
"rewards/rejected": -0.9426481127738953, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 120.9432144165039, |
|
"learning_rate": 4.223066054130568e-06, |
|
"log_odds_chosen": 0.40909630060195923, |
|
"log_odds_ratio": -0.6454111337661743, |
|
"logits/chosen": 230.5096435546875, |
|
"logits/rejected": 254.70449829101562, |
|
"logps/chosen": -1.4842649698257446, |
|
"logps/rejected": -1.8271305561065674, |
|
"loss": 65.2115, |
|
"nll_loss": 1.6993058919906616, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7421324849128723, |
|
"rewards/margins": 0.17143282294273376, |
|
"rewards/rejected": -0.9135652780532837, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 135.30653381347656, |
|
"learning_rate": 4.11992399387438e-06, |
|
"log_odds_chosen": 0.27806416153907776, |
|
"log_odds_ratio": -0.739980936050415, |
|
"logits/chosen": 248.7511749267578, |
|
"logits/rejected": 262.7025146484375, |
|
"logps/chosen": -1.4985274076461792, |
|
"logps/rejected": -1.7603181600570679, |
|
"loss": 58.8669, |
|
"nll_loss": 1.6877117156982422, |
|
"rewards/accuracies": 0.5874125957489014, |
|
"rewards/chosen": -0.7492637038230896, |
|
"rewards/margins": 0.1308954954147339, |
|
"rewards/rejected": -0.8801590800285339, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 111.60261535644531, |
|
"learning_rate": 4.011792514124217e-06, |
|
"log_odds_chosen": 0.49480900168418884, |
|
"log_odds_ratio": -0.6541386842727661, |
|
"logits/chosen": 237.9220733642578, |
|
"logits/rejected": 269.5302429199219, |
|
"logps/chosen": -1.4647657871246338, |
|
"logps/rejected": -1.8502311706542969, |
|
"loss": 64.0187, |
|
"nll_loss": 1.6229032278060913, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.7323828935623169, |
|
"rewards/margins": 0.19273266196250916, |
|
"rewards/rejected": -0.9251155853271484, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 136.4354248046875, |
|
"learning_rate": 3.899004663415083e-06, |
|
"log_odds_chosen": 0.5176212191581726, |
|
"log_odds_ratio": -0.6403207778930664, |
|
"logits/chosen": 233.19796752929688, |
|
"logits/rejected": 246.4919891357422, |
|
"logps/chosen": -1.4554225206375122, |
|
"logps/rejected": -1.8822691440582275, |
|
"loss": 66.2991, |
|
"nll_loss": 1.7435401678085327, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7277112603187561, |
|
"rewards/margins": 0.21342334151268005, |
|
"rewards/rejected": -0.9411345720291138, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 115.97614288330078, |
|
"learning_rate": 3.7819078320585865e-06, |
|
"log_odds_chosen": 0.27887487411499023, |
|
"log_odds_ratio": -0.7117995023727417, |
|
"logits/chosen": 243.6487274169922, |
|
"logits/rejected": 249.3553924560547, |
|
"logps/chosen": -1.5128828287124634, |
|
"logps/rejected": -1.7247974872589111, |
|
"loss": 63.7844, |
|
"nll_loss": 1.6903963088989258, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.7564414143562317, |
|
"rewards/margins": 0.10595734417438507, |
|
"rewards/rejected": -0.8623987436294556, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 105.05059051513672, |
|
"learning_rate": 3.660862682169283e-06, |
|
"log_odds_chosen": 0.2080799639225006, |
|
"log_odds_ratio": -0.7350869178771973, |
|
"logits/chosen": 235.3159942626953, |
|
"logits/rejected": 248.73782348632812, |
|
"logps/chosen": -1.3867459297180176, |
|
"logps/rejected": -1.5404752492904663, |
|
"loss": 61.9593, |
|
"nll_loss": 1.6034740209579468, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.6933729648590088, |
|
"rewards/margins": 0.07686467468738556, |
|
"rewards/rejected": -0.7702376246452332, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 129.73594665527344, |
|
"learning_rate": 3.536242036813436e-06, |
|
"log_odds_chosen": 0.2643691599369049, |
|
"log_odds_ratio": -0.7503995895385742, |
|
"logits/chosen": 247.5218048095703, |
|
"logits/rejected": 255.9250030517578, |
|
"logps/chosen": -1.4334195852279663, |
|
"logps/rejected": -1.6370153427124023, |
|
"loss": 62.8912, |
|
"nll_loss": 1.6255853176116943, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.7167097926139832, |
|
"rewards/margins": 0.10179783403873444, |
|
"rewards/rejected": -0.8185076713562012, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 156.6639404296875, |
|
"learning_rate": 3.4084297317016353e-06, |
|
"log_odds_chosen": 0.2968347668647766, |
|
"log_odds_ratio": -0.7262933254241943, |
|
"logits/chosen": 256.20166015625, |
|
"logits/rejected": 252.2810516357422, |
|
"logps/chosen": -1.4891420602798462, |
|
"logps/rejected": -1.745237946510315, |
|
"loss": 65.243, |
|
"nll_loss": 1.665055274963379, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.7445710301399231, |
|
"rewards/margins": 0.12804797291755676, |
|
"rewards/rejected": -0.8726189732551575, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 133.37393188476562, |
|
"learning_rate": 3.2778194329621104e-06, |
|
"log_odds_chosen": 0.48826122283935547, |
|
"log_odds_ratio": -0.6439377665519714, |
|
"logits/chosen": 242.45767211914062, |
|
"logits/rejected": 267.6114501953125, |
|
"logps/chosen": -1.3750956058502197, |
|
"logps/rejected": -1.8073225021362305, |
|
"loss": 62.7163, |
|
"nll_loss": 1.6385784149169922, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6875478029251099, |
|
"rewards/margins": 0.21611353754997253, |
|
"rewards/rejected": -0.9036612510681152, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 145.82296752929688, |
|
"learning_rate": 3.144813424636031e-06, |
|
"log_odds_chosen": 0.10933913290500641, |
|
"log_odds_ratio": -0.7888091802597046, |
|
"logits/chosen": 251.4317169189453, |
|
"logits/rejected": 244.14462280273438, |
|
"logps/chosen": -1.416845679283142, |
|
"logps/rejected": -1.5029141902923584, |
|
"loss": 62.2472, |
|
"nll_loss": 1.6043399572372437, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.708422839641571, |
|
"rewards/margins": 0.043034255504608154, |
|
"rewards/rejected": -0.7514570951461792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 124.13430786132812, |
|
"learning_rate": 3.0098213696293542e-06, |
|
"log_odds_chosen": 0.3432820439338684, |
|
"log_odds_ratio": -0.6613715887069702, |
|
"logits/chosen": 228.41439819335938, |
|
"logits/rejected": 260.245849609375, |
|
"logps/chosen": -1.31131911277771, |
|
"logps/rejected": -1.5758918523788452, |
|
"loss": 61.9618, |
|
"nll_loss": 1.592642903327942, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.655659556388855, |
|
"rewards/margins": 0.13228638470172882, |
|
"rewards/rejected": -0.7879459261894226, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 169.77215576171875, |
|
"learning_rate": 2.8732590479375167e-06, |
|
"log_odds_chosen": 0.3919784128665924, |
|
"log_odds_ratio": -0.6399356126785278, |
|
"logits/chosen": 238.72976684570312, |
|
"logits/rejected": 242.7633514404297, |
|
"logps/chosen": -1.339658498764038, |
|
"logps/rejected": -1.6707757711410522, |
|
"loss": 62.1449, |
|
"nll_loss": 1.5825494527816772, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.669829249382019, |
|
"rewards/margins": 0.1655586212873459, |
|
"rewards/rejected": -0.8353878855705261, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 136.43710327148438, |
|
"learning_rate": 2.735547076029296e-06, |
|
"log_odds_chosen": 0.28347623348236084, |
|
"log_odds_ratio": -0.6901515126228333, |
|
"logits/chosen": 249.5279541015625, |
|
"logits/rejected": 233.10983276367188, |
|
"logps/chosen": -1.348504900932312, |
|
"logps/rejected": -1.563481092453003, |
|
"loss": 62.4916, |
|
"nll_loss": 1.5731277465820312, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.674252450466156, |
|
"rewards/margins": 0.10748811066150665, |
|
"rewards/rejected": -0.7817405462265015, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 162.6702880859375, |
|
"learning_rate": 2.5971096113341692e-06, |
|
"log_odds_chosen": 0.3690604269504547, |
|
"log_odds_ratio": -0.6491698622703552, |
|
"logits/chosen": 239.92678833007812, |
|
"logits/rejected": 247.5673828125, |
|
"logps/chosen": -1.3685719966888428, |
|
"logps/rejected": -1.6624857187271118, |
|
"loss": 61.4999, |
|
"nll_loss": 1.5894715785980225, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6842859983444214, |
|
"rewards/margins": 0.14695683121681213, |
|
"rewards/rejected": -0.8312428593635559, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 121.98951721191406, |
|
"learning_rate": 2.458373045823404e-06, |
|
"log_odds_chosen": 0.37640315294265747, |
|
"log_odds_ratio": -0.6424993872642517, |
|
"logits/chosen": 243.83737182617188, |
|
"logits/rejected": 263.46844482421875, |
|
"logps/chosen": -1.3080052137374878, |
|
"logps/rejected": -1.6126611232757568, |
|
"loss": 61.3657, |
|
"nll_loss": 1.595855712890625, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.6540026068687439, |
|
"rewards/margins": 0.15232795476913452, |
|
"rewards/rejected": -0.8063305616378784, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 126.68740844726562, |
|
"learning_rate": 2.3197646927086697e-06, |
|
"log_odds_chosen": 0.05865911766886711, |
|
"log_odds_ratio": -0.7959011793136597, |
|
"logits/chosen": 255.73562622070312, |
|
"logits/rejected": 250.8922882080078, |
|
"logps/chosen": -1.4331071376800537, |
|
"logps/rejected": -1.4851996898651123, |
|
"loss": 62.2336, |
|
"nll_loss": 1.6543314456939697, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.7165535688400269, |
|
"rewards/margins": 0.026046359911561012, |
|
"rewards/rejected": -0.7425998449325562, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 126.31131744384766, |
|
"learning_rate": 2.1817114703032176e-06, |
|
"log_odds_chosen": 0.3499384820461273, |
|
"log_odds_ratio": -0.6687244176864624, |
|
"logits/chosen": 250.3800048828125, |
|
"logits/rejected": 253.47647094726562, |
|
"logps/chosen": -1.3136674165725708, |
|
"logps/rejected": -1.603899598121643, |
|
"loss": 61.6932, |
|
"nll_loss": 1.5699495077133179, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6568337082862854, |
|
"rewards/margins": 0.14511603116989136, |
|
"rewards/rejected": -0.8019497990608215, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 117.38888549804688, |
|
"learning_rate": 2.044638587099347e-06, |
|
"log_odds_chosen": 0.3149377703666687, |
|
"log_odds_ratio": -0.661319375038147, |
|
"logits/chosen": 235.9986572265625, |
|
"logits/rejected": 258.65423583984375, |
|
"logps/chosen": -1.3008835315704346, |
|
"logps/rejected": -1.5506656169891357, |
|
"loss": 61.813, |
|
"nll_loss": 1.5823218822479248, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.6504417657852173, |
|
"rewards/margins": 0.12489104270935059, |
|
"rewards/rejected": -0.7753328084945679, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 142.8717498779297, |
|
"learning_rate": 1.9089682321121834e-06, |
|
"log_odds_chosen": 0.4244133532047272, |
|
"log_odds_ratio": -0.6273762583732605, |
|
"logits/chosen": 248.58889770507812, |
|
"logits/rejected": 268.8494567871094, |
|
"logps/chosen": -1.3011976480484009, |
|
"logps/rejected": -1.6522626876831055, |
|
"loss": 60.3822, |
|
"nll_loss": 1.586854100227356, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6505988240242004, |
|
"rewards/margins": 0.17553254961967468, |
|
"rewards/rejected": -0.8261313438415527, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 140.85545349121094, |
|
"learning_rate": 1.775118274523545e-06, |
|
"log_odds_chosen": 0.1862161010503769, |
|
"log_odds_ratio": -0.7570394277572632, |
|
"logits/chosen": 239.0512237548828, |
|
"logits/rejected": 254.9342041015625, |
|
"logps/chosen": -1.307045578956604, |
|
"logps/rejected": -1.4443647861480713, |
|
"loss": 61.6621, |
|
"nll_loss": 1.504206895828247, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.653522789478302, |
|
"rewards/margins": 0.06865964829921722, |
|
"rewards/rejected": -0.7221823930740356, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 124.06290435791016, |
|
"learning_rate": 1.6435009766310372e-06, |
|
"log_odds_chosen": 0.3265306353569031, |
|
"log_odds_ratio": -0.6406243443489075, |
|
"logits/chosen": 236.4462890625, |
|
"logits/rejected": 262.8599548339844, |
|
"logps/chosen": -1.22454035282135, |
|
"logps/rejected": -1.4862773418426514, |
|
"loss": 58.7041, |
|
"nll_loss": 1.498868703842163, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.612270176410675, |
|
"rewards/margins": 0.13086840510368347, |
|
"rewards/rejected": -0.7431386709213257, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 139.6620330810547, |
|
"learning_rate": 1.5145217240665373e-06, |
|
"log_odds_chosen": 0.2167925387620926, |
|
"log_odds_ratio": -0.6764565110206604, |
|
"logits/chosen": 254.82162475585938, |
|
"logits/rejected": 242.75906372070312, |
|
"logps/chosen": -1.2110469341278076, |
|
"logps/rejected": -1.3860079050064087, |
|
"loss": 57.7464, |
|
"nll_loss": 1.4964487552642822, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.6055234670639038, |
|
"rewards/margins": 0.08748046308755875, |
|
"rewards/rejected": -0.6930039525032043, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 164.2100372314453, |
|
"learning_rate": 1.388577777195035e-06, |
|
"log_odds_chosen": 0.36885133385658264, |
|
"log_odds_ratio": -0.6371248960494995, |
|
"logits/chosen": 246.92779541015625, |
|
"logits/rejected": 258.91119384765625, |
|
"logps/chosen": -1.2711718082427979, |
|
"logps/rejected": -1.5534406900405884, |
|
"loss": 53.7807, |
|
"nll_loss": 1.5334032773971558, |
|
"rewards/accuracies": 0.6643356680870056, |
|
"rewards/chosen": -0.6355859041213989, |
|
"rewards/margins": 0.14113430678844452, |
|
"rewards/rejected": -0.7767203450202942, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 153.154052734375, |
|
"learning_rate": 1.2660570475395684e-06, |
|
"log_odds_chosen": 0.4296096861362457, |
|
"log_odds_ratio": -0.6230024695396423, |
|
"logits/chosen": 241.6239013671875, |
|
"logits/rejected": 247.5516815185547, |
|
"logps/chosen": -1.179203748703003, |
|
"logps/rejected": -1.5177547931671143, |
|
"loss": 57.7354, |
|
"nll_loss": 1.486169457435608, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.5896018743515015, |
|
"rewards/margins": 0.16927553713321686, |
|
"rewards/rejected": -0.7588773965835571, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 106.61084747314453, |
|
"learning_rate": 1.1473369030008974e-06, |
|
"log_odds_chosen": 0.32008111476898193, |
|
"log_odds_ratio": -0.6721247434616089, |
|
"logits/chosen": 247.1652069091797, |
|
"logits/rejected": 241.549072265625, |
|
"logps/chosen": -1.2260723114013672, |
|
"logps/rejected": -1.4864741563796997, |
|
"loss": 58.5367, |
|
"nll_loss": 1.5331952571868896, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.6130361557006836, |
|
"rewards/margins": 0.13020093739032745, |
|
"rewards/rejected": -0.7432370781898499, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 148.289794921875, |
|
"learning_rate": 1.0327830055518843e-06, |
|
"log_odds_chosen": 0.43876928091049194, |
|
"log_odds_ratio": -0.6304255127906799, |
|
"logits/chosen": 246.761962890625, |
|
"logits/rejected": 262.81085205078125, |
|
"logps/chosen": -1.3328922986984253, |
|
"logps/rejected": -1.6859073638916016, |
|
"loss": 58.6867, |
|
"nll_loss": 1.554168462753296, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.6664461493492126, |
|
"rewards/margins": 0.17650756239891052, |
|
"rewards/rejected": -0.8429536819458008, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 181.2135467529297, |
|
"learning_rate": 9.227481849865236e-07, |
|
"log_odds_chosen": 0.3924606442451477, |
|
"log_odds_ratio": -0.6264532208442688, |
|
"logits/chosen": 233.6820831298828, |
|
"logits/rejected": 240.0151824951172, |
|
"logps/chosen": -1.2045910358428955, |
|
"logps/rejected": -1.5118606090545654, |
|
"loss": 58.7122, |
|
"nll_loss": 1.4953967332839966, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.6022955179214478, |
|
"rewards/margins": 0.15363483130931854, |
|
"rewards/rejected": -0.7559303045272827, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 110.6124496459961, |
|
"learning_rate": 8.175713521924977e-07, |
|
"log_odds_chosen": 0.31769445538520813, |
|
"log_odds_ratio": -0.6737982630729675, |
|
"logits/chosen": 243.349853515625, |
|
"logits/rejected": 242.6227264404297, |
|
"logps/chosen": -1.2512071132659912, |
|
"logps/rejected": -1.5016454458236694, |
|
"loss": 58.2297, |
|
"nll_loss": 1.538447618484497, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.6256035566329956, |
|
"rewards/margins": 0.12521910667419434, |
|
"rewards/rejected": -0.7508227229118347, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 142.4850616455078, |
|
"learning_rate": 7.175764552944368e-07, |
|
"log_odds_chosen": 0.4037380814552307, |
|
"log_odds_ratio": -0.6004605293273926, |
|
"logits/chosen": 235.74459838867188, |
|
"logits/rejected": 261.41607666015625, |
|
"logps/chosen": -1.1916182041168213, |
|
"logps/rejected": -1.5017354488372803, |
|
"loss": 56.862, |
|
"nll_loss": 1.520082712173462, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.5958091020584106, |
|
"rewards/margins": 0.1550586223602295, |
|
"rewards/rejected": -0.7508677244186401, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 135.36468505859375, |
|
"learning_rate": 6.230714818829733e-07, |
|
"log_odds_chosen": 0.402596652507782, |
|
"log_odds_ratio": -0.6101157069206238, |
|
"logits/chosen": 243.6725311279297, |
|
"logits/rejected": 250.9963836669922, |
|
"logps/chosen": -1.2181828022003174, |
|
"logps/rejected": -1.5303680896759033, |
|
"loss": 56.9306, |
|
"nll_loss": 1.48748779296875, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.6090914011001587, |
|
"rewards/margins": 0.1560925841331482, |
|
"rewards/rejected": -0.7651840448379517, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 140.75233459472656, |
|
"learning_rate": 5.343475104027743e-07, |
|
"log_odds_chosen": 0.4917730391025543, |
|
"log_odds_ratio": -0.6072665452957153, |
|
"logits/chosen": 241.0843505859375, |
|
"logits/rejected": 245.1985626220703, |
|
"logps/chosen": -1.2005137205123901, |
|
"logps/rejected": -1.5952293872833252, |
|
"loss": 56.6815, |
|
"nll_loss": 1.4808170795440674, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6002568602561951, |
|
"rewards/margins": 0.1973578780889511, |
|
"rewards/rejected": -0.7976146936416626, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 158.60487365722656, |
|
"learning_rate": 4.5167781362130374e-07, |
|
"log_odds_chosen": 0.5367686152458191, |
|
"log_odds_ratio": -0.5729767084121704, |
|
"logits/chosen": 228.5010528564453, |
|
"logits/rejected": 252.38162231445312, |
|
"logps/chosen": -1.2318106889724731, |
|
"logps/rejected": -1.6468757390975952, |
|
"loss": 57.0996, |
|
"nll_loss": 1.498032808303833, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.6159053444862366, |
|
"rewards/margins": 0.20753255486488342, |
|
"rewards/rejected": -0.8234378695487976, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 137.73834228515625, |
|
"learning_rate": 3.7531701693965555e-07, |
|
"log_odds_chosen": 0.32621318101882935, |
|
"log_odds_ratio": -0.6674071550369263, |
|
"logits/chosen": 242.7371063232422, |
|
"logits/rejected": 238.8314666748047, |
|
"logps/chosen": -1.201547622680664, |
|
"logps/rejected": -1.4522160291671753, |
|
"loss": 56.4953, |
|
"nll_loss": 1.4682562351226807, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.600773811340332, |
|
"rewards/margins": 0.125334233045578, |
|
"rewards/rejected": -0.7261080145835876, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 177.38958740234375, |
|
"learning_rate": 3.0550031413789485e-07, |
|
"log_odds_chosen": 0.5363808870315552, |
|
"log_odds_ratio": -0.5923758745193481, |
|
"logits/chosen": 230.21829223632812, |
|
"logits/rejected": 238.24636840820312, |
|
"logps/chosen": -1.2215114831924438, |
|
"logps/rejected": -1.6396996974945068, |
|
"loss": 55.9249, |
|
"nll_loss": 1.4734989404678345, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.6107557415962219, |
|
"rewards/margins": 0.20909413695335388, |
|
"rewards/rejected": -0.8198498487472534, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 157.81935119628906, |
|
"learning_rate": 2.4244274297043653e-07, |
|
"log_odds_chosen": 0.5345597267150879, |
|
"log_odds_ratio": -0.5606809258460999, |
|
"logits/chosen": 236.24282836914062, |
|
"logits/rejected": 264.61431884765625, |
|
"logps/chosen": -1.1804494857788086, |
|
"logps/rejected": -1.58183753490448, |
|
"loss": 57.3731, |
|
"nll_loss": 1.4926464557647705, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.5902247428894043, |
|
"rewards/margins": 0.2006940096616745, |
|
"rewards/rejected": -0.79091876745224, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 163.67861938476562, |
|
"learning_rate": 1.8633852284264508e-07, |
|
"log_odds_chosen": 0.37513643503189087, |
|
"log_odds_ratio": -0.6290008425712585, |
|
"logits/chosen": 242.14004516601562, |
|
"logits/rejected": 243.6206817626953, |
|
"logps/chosen": -1.1577939987182617, |
|
"logps/rejected": -1.4585858583450317, |
|
"loss": 56.5557, |
|
"nll_loss": 1.4161715507507324, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5788969993591309, |
|
"rewards/margins": 0.15039584040641785, |
|
"rewards/rejected": -0.7292929291725159, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 131.90818786621094, |
|
"learning_rate": 1.3736045660864034e-07, |
|
"log_odds_chosen": 0.39001405239105225, |
|
"log_odds_ratio": -0.6308606863021851, |
|
"logits/chosen": 236.4365234375, |
|
"logits/rejected": 251.0836181640625, |
|
"logps/chosen": -1.2004508972167969, |
|
"logps/rejected": -1.521728754043579, |
|
"loss": 58.3533, |
|
"nll_loss": 1.5017693042755127, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -0.6002254486083984, |
|
"rewards/margins": 0.1606389582157135, |
|
"rewards/rejected": -0.7608643770217896, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 123.62786865234375, |
|
"learning_rate": 9.565939833279192e-08, |
|
"log_odds_chosen": 0.4970114827156067, |
|
"log_odds_ratio": -0.5855778455734253, |
|
"logits/chosen": 245.94058227539062, |
|
"logits/rejected": 250.78225708007812, |
|
"logps/chosen": -1.196718454360962, |
|
"logps/rejected": -1.5853480100631714, |
|
"loss": 57.6325, |
|
"nll_loss": 1.5269972085952759, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.598359227180481, |
|
"rewards/margins": 0.19431476294994354, |
|
"rewards/rejected": -0.7926740050315857, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 156.63650512695312, |
|
"learning_rate": 6.136378865420872e-08, |
|
"log_odds_chosen": 0.3856307566165924, |
|
"log_odds_ratio": -0.6614529490470886, |
|
"logits/chosen": 238.5367889404297, |
|
"logits/rejected": 231.7664031982422, |
|
"logps/chosen": -1.2782148122787476, |
|
"logps/rejected": -1.5829877853393555, |
|
"loss": 56.9746, |
|
"nll_loss": 1.4864990711212158, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6391074061393738, |
|
"rewards/margins": 0.15238651633262634, |
|
"rewards/rejected": -0.7914938926696777, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 150.2822723388672, |
|
"learning_rate": 3.45792591853214e-08, |
|
"log_odds_chosen": 0.5988565683364868, |
|
"log_odds_ratio": -0.5730681419372559, |
|
"logits/chosen": 223.4080810546875, |
|
"logits/rejected": 250.70947265625, |
|
"logps/chosen": -1.1857376098632812, |
|
"logps/rejected": -1.6649852991104126, |
|
"loss": 57.5247, |
|
"nll_loss": 1.4728622436523438, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.5928688049316406, |
|
"rewards/margins": 0.2396237850189209, |
|
"rewards/rejected": -0.8324926495552063, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 124.27201080322266, |
|
"learning_rate": 1.538830716302092e-08, |
|
"log_odds_chosen": 0.4612645208835602, |
|
"log_odds_ratio": -0.6060934662818909, |
|
"logits/chosen": 231.5652618408203, |
|
"logits/rejected": 249.997802734375, |
|
"logps/chosen": -1.2019062042236328, |
|
"logps/rejected": -1.5464013814926147, |
|
"loss": 55.1052, |
|
"nll_loss": 1.423748254776001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6009531021118164, |
|
"rewards/margins": 0.17224761843681335, |
|
"rewards/rejected": -0.7732006907463074, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 174.55288696289062, |
|
"learning_rate": 3.850041354441503e-09, |
|
"log_odds_chosen": 0.38001030683517456, |
|
"log_odds_ratio": -0.6417083144187927, |
|
"logits/chosen": 238.8291473388672, |
|
"logits/rejected": 261.79193115234375, |
|
"logps/chosen": -1.2446626424789429, |
|
"logps/rejected": -1.5207360982894897, |
|
"loss": 57.5668, |
|
"nll_loss": 1.507879376411438, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6223313212394714, |
|
"rewards/margins": 0.13803674280643463, |
|
"rewards/rejected": -0.7603680491447449, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 151.5247039794922, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 0.4647287428379059, |
|
"log_odds_ratio": -0.5936863422393799, |
|
"logits/chosen": 231.6604461669922, |
|
"logits/rejected": 250.37442016601562, |
|
"logps/chosen": -1.172564148902893, |
|
"logps/rejected": -1.534595251083374, |
|
"loss": 55.8487, |
|
"nll_loss": 1.4678246974945068, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.5862820744514465, |
|
"rewards/margins": 0.18101553618907928, |
|
"rewards/rejected": -0.767297625541687, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 116.41342008076016, |
|
"train_runtime": 8101.4659, |
|
"train_samples_per_second": 2.5, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|