|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9765925925925925, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 875.1458129882812, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 4.998103618621826, |
|
"log_odds_ratio": -9.803796768188477, |
|
"logits/chosen": 138.401611328125, |
|
"logits/rejected": 153.0216064453125, |
|
"logps/chosen": -20.76495361328125, |
|
"logps/rejected": -25.763286590576172, |
|
"loss": 171.0682, |
|
"nll_loss": 8.368230819702148, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -10.382476806640625, |
|
"rewards/margins": 2.4991683959960938, |
|
"rewards/rejected": -12.881643295288086, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 1327.8896484375, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 2.35148024559021, |
|
"log_odds_ratio": -7.3204779624938965, |
|
"logits/chosen": 132.05416870117188, |
|
"logits/rejected": 156.443603515625, |
|
"logps/chosen": -15.675480842590332, |
|
"logps/rejected": -18.02592658996582, |
|
"loss": 245.0754, |
|
"nll_loss": 7.408373832702637, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -7.837740421295166, |
|
"rewards/margins": 1.175222635269165, |
|
"rewards/rejected": -9.01296329498291, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 697.5897827148438, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 4.538907051086426, |
|
"log_odds_ratio": -8.128061294555664, |
|
"logits/chosen": 115.15523529052734, |
|
"logits/rejected": 142.7751922607422, |
|
"logps/chosen": -20.854244232177734, |
|
"logps/rejected": -25.39171028137207, |
|
"loss": 260.4323, |
|
"nll_loss": 8.780919075012207, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -10.427122116088867, |
|
"rewards/margins": 2.268732786178589, |
|
"rewards/rejected": -12.695855140686035, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 1123.942626953125, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": -0.19573011994361877, |
|
"log_odds_ratio": -7.7527055740356445, |
|
"logits/chosen": 145.3936309814453, |
|
"logits/rejected": 150.71353149414062, |
|
"logps/chosen": -17.822494506835938, |
|
"logps/rejected": -17.628124237060547, |
|
"loss": 155.0807, |
|
"nll_loss": 8.136808395385742, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -8.911247253417969, |
|
"rewards/margins": -0.09718628227710724, |
|
"rewards/rejected": -8.814062118530273, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 1145.396484375, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 4.295346260070801, |
|
"log_odds_ratio": -7.432453155517578, |
|
"logits/chosen": 151.52081298828125, |
|
"logits/rejected": 160.0590362548828, |
|
"logps/chosen": -17.441072463989258, |
|
"logps/rejected": -21.736679077148438, |
|
"loss": 370.5377, |
|
"nll_loss": 6.786558628082275, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -8.720536231994629, |
|
"rewards/margins": 2.147803783416748, |
|
"rewards/rejected": -10.868339538574219, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 2596.38818359375, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.009631728753447533, |
|
"log_odds_ratio": -5.7961225509643555, |
|
"logits/chosen": 152.17312622070312, |
|
"logits/rejected": 188.4014129638672, |
|
"logps/chosen": -13.26770305633545, |
|
"logps/rejected": -13.267865180969238, |
|
"loss": 140.9719, |
|
"nll_loss": 5.588016510009766, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -6.633851528167725, |
|
"rewards/margins": 8.130073547363281e-05, |
|
"rewards/rejected": -6.633932590484619, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 2872.860107421875, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": -0.7113814353942871, |
|
"log_odds_ratio": -2.4185237884521484, |
|
"logits/chosen": 196.71951293945312, |
|
"logits/rejected": 181.12623596191406, |
|
"logps/chosen": -5.303072929382324, |
|
"logps/rejected": -4.604456901550293, |
|
"loss": 104.958, |
|
"nll_loss": 2.8738203048706055, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.651536464691162, |
|
"rewards/margins": -0.3493083119392395, |
|
"rewards/rejected": -2.3022284507751465, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 25351.75, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 1.3610541820526123, |
|
"log_odds_ratio": -2.1230006217956543, |
|
"logits/chosen": 193.37913513183594, |
|
"logits/rejected": 204.16732788085938, |
|
"logps/chosen": -7.698246002197266, |
|
"logps/rejected": -9.058138847351074, |
|
"loss": 204.6441, |
|
"nll_loss": 5.2039594650268555, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.849123001098633, |
|
"rewards/margins": 0.6799466013908386, |
|
"rewards/rejected": -4.529069423675537, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 7550.92822265625, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": -0.7872304916381836, |
|
"log_odds_ratio": -2.196615695953369, |
|
"logits/chosen": 213.85107421875, |
|
"logits/rejected": 207.1276092529297, |
|
"logps/chosen": -5.1990180015563965, |
|
"logps/rejected": -4.413318157196045, |
|
"loss": 74.1465, |
|
"nll_loss": 3.3599143028259277, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": -2.5995090007781982, |
|
"rewards/margins": -0.39284998178482056, |
|
"rewards/rejected": -2.2066590785980225, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 19450.84375, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.3536006510257721, |
|
"log_odds_ratio": -0.9575241208076477, |
|
"logits/chosen": 228.44052124023438, |
|
"logits/rejected": 228.40786743164062, |
|
"logps/chosen": -2.896968126296997, |
|
"logps/rejected": -3.246020555496216, |
|
"loss": 153.1728, |
|
"nll_loss": 2.854421615600586, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.4484840631484985, |
|
"rewards/margins": 0.17452631890773773, |
|
"rewards/rejected": -1.623010277748108, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 1182.28564453125, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.05445149540901184, |
|
"log_odds_ratio": -1.0194499492645264, |
|
"logits/chosen": 261.8747863769531, |
|
"logits/rejected": 269.243896484375, |
|
"logps/chosen": -2.4033851623535156, |
|
"logps/rejected": -2.4503068923950195, |
|
"loss": 106.4384, |
|
"nll_loss": 2.4143242835998535, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.2016925811767578, |
|
"rewards/margins": 0.02346091903746128, |
|
"rewards/rejected": -1.2251534461975098, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 11634.9443359375, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.2782652974128723, |
|
"log_odds_ratio": -0.8532482981681824, |
|
"logits/chosen": 250.95071411132812, |
|
"logits/rejected": 266.6951599121094, |
|
"logps/chosen": -2.0876777172088623, |
|
"logps/rejected": -2.3408493995666504, |
|
"loss": 109.2007, |
|
"nll_loss": 2.205104351043701, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -1.0438388586044312, |
|
"rewards/margins": 0.1265856921672821, |
|
"rewards/rejected": -1.1704246997833252, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 2758.363525390625, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.15672044456005096, |
|
"log_odds_ratio": -1.0671449899673462, |
|
"logits/chosen": 279.3172607421875, |
|
"logits/rejected": 281.0866394042969, |
|
"logps/chosen": -2.3741352558135986, |
|
"logps/rejected": -2.5186164379119873, |
|
"loss": 131.6787, |
|
"nll_loss": 2.280827760696411, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -1.1870676279067993, |
|
"rewards/margins": 0.07224057614803314, |
|
"rewards/rejected": -1.2593082189559937, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 2126.38232421875, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.7884730100631714, |
|
"log_odds_ratio": -0.7412351369857788, |
|
"logits/chosen": 259.4008483886719, |
|
"logits/rejected": 279.88018798828125, |
|
"logps/chosen": -2.333249092102051, |
|
"logps/rejected": -3.116851329803467, |
|
"loss": 105.9595, |
|
"nll_loss": 2.3083605766296387, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.1666245460510254, |
|
"rewards/margins": 0.39180102944374084, |
|
"rewards/rejected": -1.5584256649017334, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 1613.800048828125, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.15139682590961456, |
|
"log_odds_ratio": -2.676135301589966, |
|
"logits/chosen": 253.4937286376953, |
|
"logits/rejected": 245.2483673095703, |
|
"logps/chosen": -5.529114723205566, |
|
"logps/rejected": -5.663898468017578, |
|
"loss": 205.7061, |
|
"nll_loss": 3.4207730293273926, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -2.764557361602783, |
|
"rewards/margins": 0.06739232689142227, |
|
"rewards/rejected": -2.831949234008789, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 1384.3203125, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 1.3567793369293213, |
|
"log_odds_ratio": -1.5762678384780884, |
|
"logits/chosen": 249.103759765625, |
|
"logits/rejected": 261.83154296875, |
|
"logps/chosen": -4.300660610198975, |
|
"logps/rejected": -5.641573905944824, |
|
"loss": 96.7149, |
|
"nll_loss": 2.916186809539795, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -2.1503303050994873, |
|
"rewards/margins": 0.6704565286636353, |
|
"rewards/rejected": -2.820786952972412, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 6067.19921875, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.5072377920150757, |
|
"log_odds_ratio": -1.1216537952423096, |
|
"logits/chosen": 266.18145751953125, |
|
"logits/rejected": 295.95086669921875, |
|
"logps/chosen": -2.4558284282684326, |
|
"logps/rejected": -2.9313106536865234, |
|
"loss": 114.5914, |
|
"nll_loss": 2.423276901245117, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.2279142141342163, |
|
"rewards/margins": 0.23774108290672302, |
|
"rewards/rejected": -1.4656553268432617, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 3012.88671875, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.59010249376297, |
|
"log_odds_ratio": -1.1751501560211182, |
|
"logits/chosen": 277.5395812988281, |
|
"logits/rejected": 293.25543212890625, |
|
"logps/chosen": -2.758129835128784, |
|
"logps/rejected": -3.3149051666259766, |
|
"loss": 100.0177, |
|
"nll_loss": 2.5514206886291504, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -1.379064917564392, |
|
"rewards/margins": 0.27838796377182007, |
|
"rewards/rejected": -1.6574525833129883, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 2200.108154296875, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.4700884222984314, |
|
"log_odds_ratio": -0.7967466711997986, |
|
"logits/chosen": 276.75836181640625, |
|
"logits/rejected": 292.6000671386719, |
|
"logps/chosen": -1.8207991123199463, |
|
"logps/rejected": -2.2410740852355957, |
|
"loss": 66.5128, |
|
"nll_loss": 2.0072360038757324, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9103995561599731, |
|
"rewards/margins": 0.2101375162601471, |
|
"rewards/rejected": -1.1205370426177979, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 3112.162353515625, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.36486712098121643, |
|
"log_odds_ratio": -0.9160555005073547, |
|
"logits/chosen": 289.7991027832031, |
|
"logits/rejected": 304.82513427734375, |
|
"logps/chosen": -2.173243284225464, |
|
"logps/rejected": -2.5329384803771973, |
|
"loss": 62.2007, |
|
"nll_loss": 2.255070209503174, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -1.086621642112732, |
|
"rewards/margins": 0.17984741926193237, |
|
"rewards/rejected": -1.2664692401885986, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 1157.5224609375, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.3608945906162262, |
|
"log_odds_ratio": -0.7305700182914734, |
|
"logits/chosen": 281.0811462402344, |
|
"logits/rejected": 300.54522705078125, |
|
"logps/chosen": -1.7265421152114868, |
|
"logps/rejected": -2.0366110801696777, |
|
"loss": 50.5091, |
|
"nll_loss": 1.927724838256836, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8632710576057434, |
|
"rewards/margins": 0.15503451228141785, |
|
"rewards/rejected": -1.0183055400848389, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.037925925925926, |
|
"grad_norm": 1815.9822998046875, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.05243406444787979, |
|
"log_odds_ratio": -0.8580684065818787, |
|
"logits/chosen": 291.1260070800781, |
|
"logits/rejected": 301.667236328125, |
|
"logps/chosen": -1.6703624725341797, |
|
"logps/rejected": -1.733909010887146, |
|
"loss": 55.2378, |
|
"nll_loss": 1.869752049446106, |
|
"rewards/accuracies": 0.5384615659713745, |
|
"rewards/chosen": -0.8351812362670898, |
|
"rewards/margins": 0.03177327290177345, |
|
"rewards/rejected": -0.866954505443573, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0853333333333333, |
|
"grad_norm": 473.7134094238281, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.2678685784339905, |
|
"log_odds_ratio": -0.8520981073379517, |
|
"logits/chosen": 275.2568359375, |
|
"logits/rejected": 300.43701171875, |
|
"logps/chosen": -1.8224836587905884, |
|
"logps/rejected": -2.0139191150665283, |
|
"loss": 73.8236, |
|
"nll_loss": 1.8877710103988647, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.9112418293952942, |
|
"rewards/margins": 0.09571774303913116, |
|
"rewards/rejected": -1.0069595575332642, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1327407407407408, |
|
"grad_norm": 652.1748046875, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.3419615924358368, |
|
"log_odds_ratio": -0.8070743680000305, |
|
"logits/chosen": 255.7921142578125, |
|
"logits/rejected": 267.5791931152344, |
|
"logps/chosen": -1.7789255380630493, |
|
"logps/rejected": -2.0702507495880127, |
|
"loss": 72.9068, |
|
"nll_loss": 1.9794025421142578, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8894627690315247, |
|
"rewards/margins": 0.14566253125667572, |
|
"rewards/rejected": -1.0351253747940063, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1801481481481482, |
|
"grad_norm": 1498.9239501953125, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.06549356132745743, |
|
"log_odds_ratio": -0.8704937100410461, |
|
"logits/chosen": 256.5135803222656, |
|
"logits/rejected": 259.24957275390625, |
|
"logps/chosen": -1.743499755859375, |
|
"logps/rejected": -1.771620750427246, |
|
"loss": 67.5531, |
|
"nll_loss": 1.8592092990875244, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.8717498779296875, |
|
"rewards/margins": 0.014060517773032188, |
|
"rewards/rejected": -0.885810375213623, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2275555555555555, |
|
"grad_norm": 785.5113525390625, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.061852775514125824, |
|
"log_odds_ratio": -0.8480948209762573, |
|
"logits/chosen": 251.6976318359375, |
|
"logits/rejected": 261.1220397949219, |
|
"logps/chosen": -1.5321893692016602, |
|
"logps/rejected": -1.576385498046875, |
|
"loss": 61.6414, |
|
"nll_loss": 1.7627137899398804, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.7660946846008301, |
|
"rewards/margins": 0.022097986191511154, |
|
"rewards/rejected": -0.7881927490234375, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.274962962962963, |
|
"grad_norm": 1566.5196533203125, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": -0.06569625437259674, |
|
"log_odds_ratio": -1.0464322566986084, |
|
"logits/chosen": 257.01080322265625, |
|
"logits/rejected": 264.09246826171875, |
|
"logps/chosen": -1.8015447854995728, |
|
"logps/rejected": -1.727919578552246, |
|
"loss": 49.7802, |
|
"nll_loss": 1.8332347869873047, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.9007723927497864, |
|
"rewards/margins": -0.036812592297792435, |
|
"rewards/rejected": -0.863959789276123, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3223703703703704, |
|
"grad_norm": 3892.252685546875, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.12928451597690582, |
|
"log_odds_ratio": -1.1143265962600708, |
|
"logits/chosen": 249.24832153320312, |
|
"logits/rejected": 242.64840698242188, |
|
"logps/chosen": -2.36354923248291, |
|
"logps/rejected": -2.504887342453003, |
|
"loss": 68.5205, |
|
"nll_loss": 2.1543807983398438, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -1.181774616241455, |
|
"rewards/margins": 0.07066915184259415, |
|
"rewards/rejected": -1.2524436712265015, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.3697777777777778, |
|
"grad_norm": 1954.7860107421875, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.6030800342559814, |
|
"log_odds_ratio": -0.880083441734314, |
|
"logits/chosen": 238.984619140625, |
|
"logits/rejected": 257.8157653808594, |
|
"logps/chosen": -2.005361318588257, |
|
"logps/rejected": -2.592895984649658, |
|
"loss": 106.2088, |
|
"nll_loss": 1.994653344154358, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": -1.0026806592941284, |
|
"rewards/margins": 0.29376715421676636, |
|
"rewards/rejected": -1.296447992324829, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.417185185185185, |
|
"grad_norm": 4879.1201171875, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": -0.14236605167388916, |
|
"log_odds_ratio": -1.0077848434448242, |
|
"logits/chosen": 254.04440307617188, |
|
"logits/rejected": 244.98947143554688, |
|
"logps/chosen": -1.827314019203186, |
|
"logps/rejected": -1.7160383462905884, |
|
"loss": 76.4689, |
|
"nll_loss": 1.9274555444717407, |
|
"rewards/accuracies": 0.48124998807907104, |
|
"rewards/chosen": -0.913657009601593, |
|
"rewards/margins": -0.055637799203395844, |
|
"rewards/rejected": -0.8580191731452942, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4645925925925927, |
|
"grad_norm": 3663.07275390625, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.31877607107162476, |
|
"log_odds_ratio": -0.8575546145439148, |
|
"logits/chosen": 244.96865844726562, |
|
"logits/rejected": 271.86456298828125, |
|
"logps/chosen": -1.880878210067749, |
|
"logps/rejected": -2.1720147132873535, |
|
"loss": 81.7164, |
|
"nll_loss": 2.0283596515655518, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9404391050338745, |
|
"rewards/margins": 0.14556822180747986, |
|
"rewards/rejected": -1.0860073566436768, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.512, |
|
"grad_norm": 2128.646240234375, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.2958715856075287, |
|
"log_odds_ratio": -0.7991440892219543, |
|
"logits/chosen": 261.80657958984375, |
|
"logits/rejected": 264.6015930175781, |
|
"logps/chosen": -1.747931718826294, |
|
"logps/rejected": -2.023869037628174, |
|
"loss": 64.2607, |
|
"nll_loss": 1.854034423828125, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.873965859413147, |
|
"rewards/margins": 0.13796871900558472, |
|
"rewards/rejected": -1.011934518814087, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5594074074074074, |
|
"grad_norm": 1701.7451171875, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.026621544733643532, |
|
"log_odds_ratio": -0.913711428642273, |
|
"logits/chosen": 275.054931640625, |
|
"logits/rejected": 260.64874267578125, |
|
"logps/chosen": -1.8813059329986572, |
|
"logps/rejected": -1.8814128637313843, |
|
"loss": 60.1293, |
|
"nll_loss": 1.8933929204940796, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9406529664993286, |
|
"rewards/margins": 5.351304935174994e-05, |
|
"rewards/rejected": -0.9407064318656921, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6068148148148147, |
|
"grad_norm": 892.0233154296875, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.10228142887353897, |
|
"log_odds_ratio": -0.8465485572814941, |
|
"logits/chosen": 270.9327087402344, |
|
"logits/rejected": 275.8879699707031, |
|
"logps/chosen": -1.8762061595916748, |
|
"logps/rejected": -1.9606273174285889, |
|
"loss": 59.1188, |
|
"nll_loss": 1.9105665683746338, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.9381030797958374, |
|
"rewards/margins": 0.042210519313812256, |
|
"rewards/rejected": -0.9803136587142944, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6542222222222223, |
|
"grad_norm": 363.839111328125, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.25977933406829834, |
|
"log_odds_ratio": -0.8015204668045044, |
|
"logits/chosen": 272.14178466796875, |
|
"logits/rejected": 288.24725341796875, |
|
"logps/chosen": -1.7361786365509033, |
|
"logps/rejected": -1.966091513633728, |
|
"loss": 63.3352, |
|
"nll_loss": 1.8658736944198608, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8680893182754517, |
|
"rewards/margins": 0.11495651304721832, |
|
"rewards/rejected": -0.983045756816864, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7016296296296296, |
|
"grad_norm": 1070.2236328125, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": -0.2573428750038147, |
|
"log_odds_ratio": -1.1848556995391846, |
|
"logits/chosen": 278.9873352050781, |
|
"logits/rejected": 272.8277587890625, |
|
"logps/chosen": -2.0303432941436768, |
|
"logps/rejected": -1.8005205392837524, |
|
"loss": 60.8183, |
|
"nll_loss": 1.9714701175689697, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -1.0151716470718384, |
|
"rewards/margins": -0.11491117626428604, |
|
"rewards/rejected": -0.9002602696418762, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.749037037037037, |
|
"grad_norm": 344.8516540527344, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.2049332559108734, |
|
"log_odds_ratio": -0.8525213003158569, |
|
"logits/chosen": 267.901123046875, |
|
"logits/rejected": 269.7948303222656, |
|
"logps/chosen": -1.668211579322815, |
|
"logps/rejected": -1.8583217859268188, |
|
"loss": 65.7532, |
|
"nll_loss": 1.8099273443222046, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8341057896614075, |
|
"rewards/margins": 0.09505517780780792, |
|
"rewards/rejected": -0.9291608929634094, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.7964444444444445, |
|
"grad_norm": 1921.921630859375, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": -0.010730123147368431, |
|
"log_odds_ratio": -1.041703462600708, |
|
"logits/chosen": 247.10665893554688, |
|
"logits/rejected": 266.3430480957031, |
|
"logps/chosen": -2.0603463649749756, |
|
"logps/rejected": -2.0158324241638184, |
|
"loss": 4.8149, |
|
"nll_loss": 2.0877997875213623, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.0301731824874878, |
|
"rewards/margins": -0.022256921976804733, |
|
"rewards/rejected": -1.0079162120819092, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8438518518518519, |
|
"grad_norm": 1207.0048828125, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.04649388790130615, |
|
"log_odds_ratio": -0.9928682446479797, |
|
"logits/chosen": 263.4989929199219, |
|
"logits/rejected": 280.4521789550781, |
|
"logps/chosen": -1.9523179531097412, |
|
"logps/rejected": -1.9815547466278076, |
|
"loss": 37.5282, |
|
"nll_loss": 2.034578800201416, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9761589765548706, |
|
"rewards/margins": 0.014618346467614174, |
|
"rewards/rejected": -0.9907773733139038, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8912592592592592, |
|
"grad_norm": 1364.85009765625, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": -0.1880253702402115, |
|
"log_odds_ratio": -1.0025999546051025, |
|
"logits/chosen": 259.7791748046875, |
|
"logits/rejected": 271.92303466796875, |
|
"logps/chosen": -1.6487674713134766, |
|
"logps/rejected": -1.4585245847702026, |
|
"loss": 2.3249, |
|
"nll_loss": 1.7344061136245728, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.8243837356567383, |
|
"rewards/margins": -0.09512142091989517, |
|
"rewards/rejected": -0.7292622923851013, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9386666666666668, |
|
"grad_norm": 423.42108154296875, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.18183811008930206, |
|
"log_odds_ratio": -1.011243462562561, |
|
"logits/chosen": 255.23471069335938, |
|
"logits/rejected": 276.84356689453125, |
|
"logps/chosen": -1.7864488363265991, |
|
"logps/rejected": -1.945235252380371, |
|
"loss": 53.8824, |
|
"nll_loss": 1.8585844039916992, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.8932244181632996, |
|
"rewards/margins": 0.07939319312572479, |
|
"rewards/rejected": -0.9726176261901855, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.986074074074074, |
|
"grad_norm": 217.96726989746094, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": -0.03663633018732071, |
|
"log_odds_ratio": -0.8743799924850464, |
|
"logits/chosen": 272.13055419921875, |
|
"logits/rejected": 262.0260925292969, |
|
"logps/chosen": -1.5016670227050781, |
|
"logps/rejected": -1.47659170627594, |
|
"loss": 53.2491, |
|
"nll_loss": 1.7205402851104736, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.7508335113525391, |
|
"rewards/margins": -0.0125376982614398, |
|
"rewards/rejected": -0.73829585313797, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0284444444444443, |
|
"grad_norm": 1592.707275390625, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": -0.2431742548942566, |
|
"log_odds_ratio": -1.065582275390625, |
|
"logits/chosen": 264.0666809082031, |
|
"logits/rejected": 270.9798278808594, |
|
"logps/chosen": -1.8248804807662964, |
|
"logps/rejected": -1.5742560625076294, |
|
"loss": 23.2293, |
|
"nll_loss": 1.8352609872817993, |
|
"rewards/accuracies": 0.5524475574493408, |
|
"rewards/chosen": -0.9124402403831482, |
|
"rewards/margins": -0.1253122240304947, |
|
"rewards/rejected": -0.7871280312538147, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.075851851851852, |
|
"grad_norm": 2064.20458984375, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 0.07270346581935883, |
|
"log_odds_ratio": -0.8794776201248169, |
|
"logits/chosen": 257.7349548339844, |
|
"logits/rejected": 261.220703125, |
|
"logps/chosen": -1.5441248416900635, |
|
"logps/rejected": -1.5986078977584839, |
|
"loss": 46.8863, |
|
"nll_loss": 1.7352149486541748, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.7720624208450317, |
|
"rewards/margins": 0.027241546660661697, |
|
"rewards/rejected": -0.7993039488792419, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1232592592592594, |
|
"grad_norm": 718.3240966796875, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": -0.3160693943500519, |
|
"log_odds_ratio": -1.118138074874878, |
|
"logits/chosen": 255.883056640625, |
|
"logits/rejected": 249.1800537109375, |
|
"logps/chosen": -1.7974827289581299, |
|
"logps/rejected": -1.5016992092132568, |
|
"loss": -6.6175, |
|
"nll_loss": 1.8773807287216187, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -0.8987413644790649, |
|
"rewards/margins": -0.14789175987243652, |
|
"rewards/rejected": -0.7508496046066284, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.1706666666666665, |
|
"grad_norm": 2849.17138671875, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 0.05444896221160889, |
|
"log_odds_ratio": -1.0051008462905884, |
|
"logits/chosen": 249.77755737304688, |
|
"logits/rejected": 262.1796569824219, |
|
"logps/chosen": -1.957166314125061, |
|
"logps/rejected": -1.9977023601531982, |
|
"loss": 32.8461, |
|
"nll_loss": 1.9164683818817139, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.9785831570625305, |
|
"rewards/margins": 0.020268145948648453, |
|
"rewards/rejected": -0.9988511800765991, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.218074074074074, |
|
"grad_norm": 772.2219848632812, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": -0.5832428932189941, |
|
"log_odds_ratio": -1.5142714977264404, |
|
"logits/chosen": 238.1352996826172, |
|
"logits/rejected": 243.27197265625, |
|
"logps/chosen": -2.4328596591949463, |
|
"logps/rejected": -1.8463695049285889, |
|
"loss": -10.1247, |
|
"nll_loss": 2.1351406574249268, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -1.2164298295974731, |
|
"rewards/margins": -0.2932450473308563, |
|
"rewards/rejected": -0.9231847524642944, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2654814814814817, |
|
"grad_norm": 206.4999237060547, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": -0.37373510003089905, |
|
"log_odds_ratio": -1.2141059637069702, |
|
"logits/chosen": 247.3380126953125, |
|
"logits/rejected": 246.8715057373047, |
|
"logps/chosen": -2.0405170917510986, |
|
"logps/rejected": -1.6850570440292358, |
|
"loss": -7.9574, |
|
"nll_loss": 1.9059865474700928, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -1.0202585458755493, |
|
"rewards/margins": -0.17772991955280304, |
|
"rewards/rejected": -0.8425285220146179, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.3128888888888888, |
|
"grad_norm": 5002.2255859375, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": -0.3410179018974304, |
|
"log_odds_ratio": -1.1818878650665283, |
|
"logits/chosen": 239.45654296875, |
|
"logits/rejected": 262.42694091796875, |
|
"logps/chosen": -2.027069568634033, |
|
"logps/rejected": -1.6753078699111938, |
|
"loss": -1.4536, |
|
"nll_loss": 2.0037341117858887, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -1.0135347843170166, |
|
"rewards/margins": -0.17588095366954803, |
|
"rewards/rejected": -0.8376539349555969, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3602962962962963, |
|
"grad_norm": 3950.781982421875, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 0.018829286098480225, |
|
"log_odds_ratio": -0.8809181451797485, |
|
"logits/chosen": 248.4897918701172, |
|
"logits/rejected": 255.39193725585938, |
|
"logps/chosen": -1.5735387802124023, |
|
"logps/rejected": -1.5727895498275757, |
|
"loss": 29.9793, |
|
"nll_loss": 1.7044947147369385, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.7867693901062012, |
|
"rewards/margins": -0.00037460625753737986, |
|
"rewards/rejected": -0.7863947749137878, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.407703703703704, |
|
"grad_norm": 896.8823852539062, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": -0.20623116195201874, |
|
"log_odds_ratio": -1.1634149551391602, |
|
"logits/chosen": 247.5481414794922, |
|
"logits/rejected": 250.3152618408203, |
|
"logps/chosen": -1.9348275661468506, |
|
"logps/rejected": -1.7173500061035156, |
|
"loss": 10.0473, |
|
"nll_loss": 1.8512252569198608, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.9674137830734253, |
|
"rewards/margins": -0.10873879492282867, |
|
"rewards/rejected": -0.8586750030517578, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.455111111111111, |
|
"grad_norm": 3805.9501953125, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": -0.09736748039722443, |
|
"log_odds_ratio": -1.0045772790908813, |
|
"logits/chosen": 240.8005828857422, |
|
"logits/rejected": 259.3901062011719, |
|
"logps/chosen": -1.9838569164276123, |
|
"logps/rejected": -1.8580623865127563, |
|
"loss": 28.1764, |
|
"nll_loss": 1.8320090770721436, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.9919284582138062, |
|
"rewards/margins": -0.0628972202539444, |
|
"rewards/rejected": -0.9290311932563782, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5025185185185186, |
|
"grad_norm": 839.794677734375, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": -0.46628624200820923, |
|
"log_odds_ratio": -1.336814522743225, |
|
"logits/chosen": 253.2608184814453, |
|
"logits/rejected": 247.44625854492188, |
|
"logps/chosen": -1.995568037033081, |
|
"logps/rejected": -1.50832998752594, |
|
"loss": -22.3793, |
|
"nll_loss": 1.8523412942886353, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.9977840185165405, |
|
"rewards/margins": -0.24361905455589294, |
|
"rewards/rejected": -0.75416499376297, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.549925925925926, |
|
"grad_norm": 415.7859802246094, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 0.025583285838365555, |
|
"log_odds_ratio": -1.0144554376602173, |
|
"logits/chosen": 242.5679168701172, |
|
"logits/rejected": 247.4667205810547, |
|
"logps/chosen": -2.184016466140747, |
|
"logps/rejected": -2.171065092086792, |
|
"loss": 31.7258, |
|
"nll_loss": 1.8770112991333008, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -1.0920082330703735, |
|
"rewards/margins": -0.006475582718849182, |
|
"rewards/rejected": -1.085532546043396, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.5973333333333333, |
|
"grad_norm": 871.19482421875, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": -0.06566087901592255, |
|
"log_odds_ratio": -1.05461847782135, |
|
"logits/chosen": 243.1831512451172, |
|
"logits/rejected": 265.54376220703125, |
|
"logps/chosen": -1.7891054153442383, |
|
"logps/rejected": -1.672284483909607, |
|
"loss": 12.4619, |
|
"nll_loss": 1.8154491186141968, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8945527076721191, |
|
"rewards/margins": -0.05841045826673508, |
|
"rewards/rejected": -0.8361422419548035, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.644740740740741, |
|
"grad_norm": 3279.919921875, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": -0.36494511365890503, |
|
"log_odds_ratio": -1.200988531112671, |
|
"logits/chosen": 246.86233520507812, |
|
"logits/rejected": 247.6204071044922, |
|
"logps/chosen": -1.8717445135116577, |
|
"logps/rejected": -1.5146552324295044, |
|
"loss": 14.1496, |
|
"nll_loss": 1.7873508930206299, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": -0.9358722567558289, |
|
"rewards/margins": -0.17854467034339905, |
|
"rewards/rejected": -0.7573276162147522, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.6921481481481484, |
|
"grad_norm": 591.1192626953125, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": -0.10209669917821884, |
|
"log_odds_ratio": -1.0389295816421509, |
|
"logits/chosen": 244.7912139892578, |
|
"logits/rejected": 257.46722412109375, |
|
"logps/chosen": -1.8741945028305054, |
|
"logps/rejected": -1.7793128490447998, |
|
"loss": 13.954, |
|
"nll_loss": 1.839137315750122, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.9370972514152527, |
|
"rewards/margins": -0.04744075611233711, |
|
"rewards/rejected": -0.8896564245223999, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.7395555555555555, |
|
"grad_norm": 3014.538818359375, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": -0.32551804184913635, |
|
"log_odds_ratio": -1.3097738027572632, |
|
"logits/chosen": 253.1048126220703, |
|
"logits/rejected": 256.01220703125, |
|
"logps/chosen": -2.233328104019165, |
|
"logps/rejected": -1.8837789297103882, |
|
"loss": -51.2977, |
|
"nll_loss": 2.0668323040008545, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -1.1166640520095825, |
|
"rewards/margins": -0.1747743934392929, |
|
"rewards/rejected": -0.9418894648551941, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.786962962962963, |
|
"grad_norm": 1583.771728515625, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": -0.180761456489563, |
|
"log_odds_ratio": -1.1848331689834595, |
|
"logits/chosen": 246.4217071533203, |
|
"logits/rejected": 238.6134796142578, |
|
"logps/chosen": -2.0827651023864746, |
|
"logps/rejected": -1.8982934951782227, |
|
"loss": -4.1222, |
|
"nll_loss": 1.8400661945343018, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.0413825511932373, |
|
"rewards/margins": -0.09223591536283493, |
|
"rewards/rejected": -0.9491467475891113, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.83437037037037, |
|
"grad_norm": 370.90655517578125, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 0.15687787532806396, |
|
"log_odds_ratio": -0.8642757534980774, |
|
"logits/chosen": 231.45712280273438, |
|
"logits/rejected": 254.4613494873047, |
|
"logps/chosen": -1.6515134572982788, |
|
"logps/rejected": -1.7699718475341797, |
|
"loss": 36.0028, |
|
"nll_loss": 1.6890325546264648, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8257567286491394, |
|
"rewards/margins": 0.05922911688685417, |
|
"rewards/rejected": -0.8849859237670898, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.8817777777777778, |
|
"grad_norm": 1475.66357421875, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 0.0742514431476593, |
|
"log_odds_ratio": -0.9600369334220886, |
|
"logits/chosen": 239.23818969726562, |
|
"logits/rejected": 255.12649536132812, |
|
"logps/chosen": -1.6762956380844116, |
|
"logps/rejected": -1.7098366022109985, |
|
"loss": 16.3917, |
|
"nll_loss": 1.6855227947235107, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8381478190422058, |
|
"rewards/margins": 0.01677042618393898, |
|
"rewards/rejected": -0.8549183011054993, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9291851851851853, |
|
"grad_norm": 4500.62158203125, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 0.004546451382339001, |
|
"log_odds_ratio": -1.0055034160614014, |
|
"logits/chosen": 246.24240112304688, |
|
"logits/rejected": 267.38336181640625, |
|
"logps/chosen": -1.752664566040039, |
|
"logps/rejected": -1.7212101221084595, |
|
"loss": 12.0693, |
|
"nll_loss": 1.76467764377594, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -0.8763322830200195, |
|
"rewards/margins": -0.015727439895272255, |
|
"rewards/rejected": -0.8606050610542297, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"grad_norm": 349.34295654296875, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": -0.17005635797977448, |
|
"log_odds_ratio": -1.134798526763916, |
|
"logits/chosen": 242.64279174804688, |
|
"logits/rejected": 257.0933532714844, |
|
"logps/chosen": -1.8541597127914429, |
|
"logps/rejected": -1.6562083959579468, |
|
"loss": -6.7425, |
|
"nll_loss": 1.7939285039901733, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.9270798563957214, |
|
"rewards/margins": -0.09897565841674805, |
|
"rewards/rejected": -0.8281041979789734, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.9765925925925925, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 70.88754168313648, |
|
"train_runtime": 8139.5297, |
|
"train_samples_per_second": 2.488, |
|
"train_steps_per_second": 0.039 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|