{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9928514694201747, "eval_steps": 500, "global_step": 471, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03177124702144559, "grad_norm": 1690.3359375, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": 0.23363462090492249, "log_odds_ratio": -1.1611130237579346, "logits/chosen": 247.689697265625, "logits/rejected": 248.79653930664062, "logps/chosen": -15.079050064086914, "logps/rejected": -15.312664985656738, "loss": 14.8338, "nll_loss": 14.632547378540039, "rewards/accuracies": 0.4625000059604645, "rewards/chosen": -0.7539524435997009, "rewards/margins": 0.01168082281947136, "rewards/rejected": -0.7656332850456238, "step": 5 }, { "epoch": 0.06354249404289118, "grad_norm": 1072.1336669921875, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": -0.00788118876516819, "log_odds_ratio": -1.0648075342178345, "logits/chosen": 250.407958984375, "logits/rejected": 238.48251342773438, "logps/chosen": -12.971229553222656, "logps/rejected": -12.963252067565918, "loss": 12.9311, "nll_loss": 12.861352920532227, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.6485615372657776, "rewards/margins": -0.0003988705575466156, "rewards/rejected": -0.6481626033782959, "step": 10 }, { "epoch": 0.09531374106433678, "grad_norm": 567.5018310546875, "learning_rate": 7.5e-07, "log_odds_chosen": 0.23998384177684784, "log_odds_ratio": -1.0259145498275757, "logits/chosen": 272.71136474609375, "logits/rejected": 306.32379150390625, "logps/chosen": -8.60844612121582, "logps/rejected": -8.848184585571289, "loss": 8.2022, "nll_loss": 8.230855941772461, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.430422306060791, "rewards/margins": 0.011986932717263699, "rewards/rejected": -0.4424092769622803, "step": 15 }, { "epoch": 0.12708498808578236, "grad_norm": 203.41554260253906, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": -0.189157634973526, "log_odds_ratio": -0.9602483510971069, "logits/chosen": 267.5614318847656, "logits/rejected": 265.63116455078125, "logps/chosen": -5.070517063140869, "logps/rejected": -4.881190299987793, "loss": 5.3347, "nll_loss": 5.30587911605835, "rewards/accuracies": 0.4375, "rewards/chosen": -0.25352585315704346, "rewards/margins": -0.009466320276260376, "rewards/rejected": -0.2440594881772995, "step": 20 }, { "epoch": 0.15885623510722796, "grad_norm": 114.50188446044922, "learning_rate": 1.25e-06, "log_odds_chosen": -0.28530603647232056, "log_odds_ratio": -0.9948366284370422, "logits/chosen": 293.87176513671875, "logits/rejected": 281.34368896484375, "logps/chosen": -3.002964973449707, "logps/rejected": -2.7326130867004395, "loss": 3.2523, "nll_loss": 3.1672582626342773, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.15014824271202087, "rewards/margins": -0.01351759023964405, "rewards/rejected": -0.13663065433502197, "step": 25 }, { "epoch": 0.19062748212867356, "grad_norm": 115.87854766845703, "learning_rate": 1.5e-06, "log_odds_chosen": 0.011725234799087048, "log_odds_ratio": -0.9292875528335571, "logits/chosen": 348.6666259765625, "logits/rejected": 345.4848327636719, "logps/chosen": -2.375824451446533, "logps/rejected": -2.386888027191162, "loss": 2.5635, "nll_loss": 2.6432275772094727, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.11879122257232666, "rewards/margins": 0.0005531776696443558, "rewards/rejected": -0.11934439837932587, "step": 30 }, { "epoch": 0.22239872915011913, "grad_norm": 62.12422561645508, "learning_rate": 1.75e-06, "log_odds_chosen": 0.27093321084976196, "log_odds_ratio": -0.6616618037223816, "logits/chosen": 379.88690185546875, "logits/rejected": 405.73138427734375, "logps/chosen": -1.729331612586975, "logps/rejected": -1.973097562789917, "loss": 2.0724, "nll_loss": 2.065566062927246, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.08646658807992935, "rewards/margins": 0.01218829583376646, "rewards/rejected": -0.09865488857030869, "step": 35 }, { "epoch": 0.2541699761715647, "grad_norm": 82.40242004394531, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.08508733659982681, "log_odds_ratio": -0.7854072451591492, "logits/chosen": 400.20355224609375, "logits/rejected": 401.4512023925781, "logps/chosen": -1.6863765716552734, "logps/rejected": -1.738532304763794, "loss": 2.1651, "nll_loss": 2.0580391883850098, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.08431883156299591, "rewards/margins": 0.0026077807415276766, "rewards/rejected": -0.08692661672830582, "step": 40 }, { "epoch": 0.28594122319301035, "grad_norm": 36.50082778930664, "learning_rate": 2.25e-06, "log_odds_chosen": 0.13595962524414062, "log_odds_ratio": -0.729759693145752, "logits/chosen": 399.974365234375, "logits/rejected": 397.14892578125, "logps/chosen": -1.4609622955322266, "logps/rejected": -1.5716912746429443, "loss": 1.889, "nll_loss": 1.9739797115325928, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07304811477661133, "rewards/margins": 0.005536452867090702, "rewards/rejected": -0.07858456671237946, "step": 45 }, { "epoch": 0.3177124702144559, "grad_norm": 41.53988265991211, "learning_rate": 2.5e-06, "log_odds_chosen": -0.2444552630186081, "log_odds_ratio": -0.9091756939888, "logits/chosen": 404.7247009277344, "logits/rejected": 382.5657653808594, "logps/chosen": -1.5261789560317993, "logps/rejected": -1.345625877380371, "loss": 1.8269, "nll_loss": 1.8267475366592407, "rewards/accuracies": 0.4124999940395355, "rewards/chosen": -0.07630894333124161, "rewards/margins": -0.009027653373777866, "rewards/rejected": -0.06728129088878632, "step": 50 }, { "epoch": 0.3494837172359015, "grad_norm": 33.210506439208984, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.0011972666252404451, "log_odds_ratio": -0.801056981086731, "logits/chosen": 393.1558837890625, "logits/rejected": 376.5151672363281, "logps/chosen": -1.3886432647705078, "logps/rejected": -1.3649101257324219, "loss": 1.6794, "nll_loss": 1.606018304824829, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": -0.06943216919898987, "rewards/margins": -0.0011866561835631728, "rewards/rejected": -0.06824551522731781, "step": 55 }, { "epoch": 0.3812549642573471, "grad_norm": 70.0084228515625, "learning_rate": 3e-06, "log_odds_chosen": 0.1671205312013626, "log_odds_ratio": -0.6716384887695312, "logits/chosen": 373.5995178222656, "logits/rejected": 394.85931396484375, "logps/chosen": -1.235502004623413, "logps/rejected": -1.3615134954452515, "loss": 1.7286, "nll_loss": 1.637976884841919, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.06177510693669319, "rewards/margins": 0.00630057230591774, "rewards/rejected": -0.06807567924261093, "step": 60 }, { "epoch": 0.4130262112787927, "grad_norm": 51.036556243896484, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": -0.011178660206496716, "log_odds_ratio": -0.7863475680351257, "logits/chosen": 371.5272521972656, "logits/rejected": 370.0439758300781, "logps/chosen": -1.2801154851913452, "logps/rejected": -1.2939107418060303, "loss": 1.6974, "nll_loss": 1.6114212274551392, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0640057772397995, "rewards/margins": 0.0006897579878568649, "rewards/rejected": -0.06469553709030151, "step": 65 }, { "epoch": 0.44479745830023826, "grad_norm": 51.9803352355957, "learning_rate": 3.5e-06, "log_odds_chosen": 0.30882030725479126, "log_odds_ratio": -0.6189635992050171, "logits/chosen": 390.2756652832031, "logits/rejected": 397.1745910644531, "logps/chosen": -1.1560722589492798, "logps/rejected": -1.3726001977920532, "loss": 1.6066, "nll_loss": 1.5191279649734497, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.05780360847711563, "rewards/margins": 0.010826398618519306, "rewards/rejected": -0.06863000988960266, "step": 70 }, { "epoch": 0.4765687053216839, "grad_norm": 37.97161102294922, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.20173080265522003, "log_odds_ratio": -0.6899309754371643, "logits/chosen": 401.42218017578125, "logits/rejected": 395.65472412109375, "logps/chosen": -1.2323527336120605, "logps/rejected": -1.3662245273590088, "loss": 1.696, "nll_loss": 1.6926769018173218, "rewards/accuracies": 0.5625, "rewards/chosen": -0.06161763519048691, "rewards/margins": 0.00669359415769577, "rewards/rejected": -0.06831122934818268, "step": 75 }, { "epoch": 0.5083399523431295, "grad_norm": 27.679792404174805, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.3312566876411438, "log_odds_ratio": -0.6321982145309448, "logits/chosen": 394.644775390625, "logits/rejected": 373.4120788574219, "logps/chosen": -1.1556816101074219, "logps/rejected": -1.391071081161499, "loss": 1.6105, "nll_loss": 1.6470855474472046, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.057784080505371094, "rewards/margins": 0.011769475415349007, "rewards/rejected": -0.06955355405807495, "step": 80 }, { "epoch": 0.5401111993645751, "grad_norm": 32.451473236083984, "learning_rate": 4.25e-06, "log_odds_chosen": 0.20187363028526306, "log_odds_ratio": -0.6924890279769897, "logits/chosen": 396.11480712890625, "logits/rejected": 401.170654296875, "logps/chosen": -1.189259648323059, "logps/rejected": -1.3549880981445312, "loss": 1.6372, "nll_loss": 1.544048547744751, "rewards/accuracies": 0.5625, "rewards/chosen": -0.05946297571063042, "rewards/margins": 0.008286429569125175, "rewards/rejected": -0.06774941086769104, "step": 85 }, { "epoch": 0.5718824463860207, "grad_norm": 39.86790084838867, "learning_rate": 4.5e-06, "log_odds_chosen": 0.030326470732688904, "log_odds_ratio": -0.7561807036399841, "logits/chosen": 386.7585754394531, "logits/rejected": 383.3739013671875, "logps/chosen": -1.1144773960113525, "logps/rejected": -1.1401522159576416, "loss": 1.4773, "nll_loss": 1.4328895807266235, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.05572386458516121, "rewards/margins": 0.001283742836676538, "rewards/rejected": -0.05700760334730148, "step": 90 }, { "epoch": 0.6036536934074662, "grad_norm": 47.055973052978516, "learning_rate": 4.75e-06, "log_odds_chosen": -0.09162646532058716, "log_odds_ratio": -0.8472088575363159, "logits/chosen": 389.9097595214844, "logits/rejected": 398.4985656738281, "logps/chosen": -1.3111821413040161, "logps/rejected": -1.2368316650390625, "loss": 1.5514, "nll_loss": 1.5718324184417725, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06555911153554916, "rewards/margins": -0.0037175267934799194, "rewards/rejected": -0.061841584742069244, "step": 95 }, { "epoch": 0.6354249404289118, "grad_norm": 37.49895477294922, "learning_rate": 5e-06, "log_odds_chosen": 0.08311296999454498, "log_odds_ratio": -0.8223379254341125, "logits/chosen": 384.28240966796875, "logits/rejected": 397.09820556640625, "logps/chosen": -1.2262612581253052, "logps/rejected": -1.2850096225738525, "loss": 1.5995, "nll_loss": 1.5745359659194946, "rewards/accuracies": 0.5, "rewards/chosen": -0.06131306290626526, "rewards/margins": 0.0029374232981354, "rewards/rejected": -0.06425048410892487, "step": 100 }, { "epoch": 0.6671961874503575, "grad_norm": 41.63264846801758, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.24810612201690674, "log_odds_ratio": -0.6684719324111938, "logits/chosen": 397.1859436035156, "logits/rejected": 402.24468994140625, "logps/chosen": -1.1126227378845215, "logps/rejected": -1.2968095541000366, "loss": 1.4715, "nll_loss": 1.4355812072753906, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.0556311309337616, "rewards/margins": 0.009209347888827324, "rewards/rejected": -0.06484048068523407, "step": 105 }, { "epoch": 0.698967434471803, "grad_norm": 496.76666259765625, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.3310456871986389, "log_odds_ratio": -0.6511000394821167, "logits/chosen": 380.10076904296875, "logits/rejected": 382.27056884765625, "logps/chosen": -1.0597606897354126, "logps/rejected": -1.2940515279769897, "loss": 1.5519, "nll_loss": 1.5879844427108765, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.05298804119229317, "rewards/margins": 0.011714537627995014, "rewards/rejected": -0.0647025778889656, "step": 110 }, { "epoch": 0.7307386814932486, "grad_norm": 23.994157791137695, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.26323679089546204, "log_odds_ratio": -0.6691566705703735, "logits/chosen": 380.04034423828125, "logits/rejected": 367.25006103515625, "logps/chosen": -1.07338285446167, "logps/rejected": -1.2431564331054688, "loss": 1.5679, "nll_loss": 1.458599328994751, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.053669143468141556, "rewards/margins": 0.008488676510751247, "rewards/rejected": -0.06215781718492508, "step": 115 }, { "epoch": 0.7625099285146942, "grad_norm": 20.148061752319336, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.2933538556098938, "log_odds_ratio": -0.6565826535224915, "logits/chosen": 406.1932067871094, "logits/rejected": 407.4499816894531, "logps/chosen": -1.0362493991851807, "logps/rejected": -1.2259390354156494, "loss": 1.4528, "nll_loss": 1.3933870792388916, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.05181247740983963, "rewards/margins": 0.009484483860433102, "rewards/rejected": -0.06129695847630501, "step": 120 }, { "epoch": 0.7942811755361397, "grad_norm": 35.251365661621094, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 0.2278643101453781, "log_odds_ratio": -0.6969554424285889, "logits/chosen": 411.54229736328125, "logits/rejected": 392.1700744628906, "logps/chosen": -1.151000738143921, "logps/rejected": -1.2925688028335571, "loss": 1.4981, "nll_loss": 1.506788969039917, "rewards/accuracies": 0.5625, "rewards/chosen": -0.057550035417079926, "rewards/margins": 0.00707840034738183, "rewards/rejected": -0.06462844461202621, "step": 125 }, { "epoch": 0.8260524225575854, "grad_norm": 33.39112091064453, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.1466347724199295, "log_odds_ratio": -0.7478917837142944, "logits/chosen": 414.90576171875, "logits/rejected": 358.86920166015625, "logps/chosen": -1.0797452926635742, "logps/rejected": -1.1873576641082764, "loss": 1.5469, "nll_loss": 1.5067487955093384, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05398727208375931, "rewards/margins": 0.005380617920309305, "rewards/rejected": -0.059367887675762177, "step": 130 }, { "epoch": 0.857823669579031, "grad_norm": 19.592321395874023, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 0.38857999444007874, "log_odds_ratio": -0.6473441123962402, "logits/chosen": 396.6593017578125, "logits/rejected": 397.8945617675781, "logps/chosen": -0.9662467837333679, "logps/rejected": -1.2553133964538574, "loss": 1.4694, "nll_loss": 1.4183107614517212, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.04831233620643616, "rewards/margins": 0.014453329145908356, "rewards/rejected": -0.06276567280292511, "step": 135 }, { "epoch": 0.8895949166004765, "grad_norm": 21.185075759887695, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": 0.4796748161315918, "log_odds_ratio": -0.6582080125808716, "logits/chosen": 397.1627197265625, "logits/rejected": 431.449951171875, "logps/chosen": -1.0234482288360596, "logps/rejected": -1.401524305343628, "loss": 1.4705, "nll_loss": 1.3904411792755127, "rewards/accuracies": 0.625, "rewards/chosen": -0.051172398030757904, "rewards/margins": 0.018903804942965508, "rewards/rejected": -0.07007621228694916, "step": 140 }, { "epoch": 0.9213661636219221, "grad_norm": 43.18369674682617, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": 0.20573845505714417, "log_odds_ratio": -0.7000477910041809, "logits/chosen": 411.5990295410156, "logits/rejected": 409.30670166015625, "logps/chosen": -1.0646284818649292, "logps/rejected": -1.2127861976623535, "loss": 1.4419, "nll_loss": 1.4223747253417969, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.05323142558336258, "rewards/margins": 0.007407893426716328, "rewards/rejected": -0.060639314353466034, "step": 145 }, { "epoch": 0.9531374106433678, "grad_norm": 32.28419876098633, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.16876378655433655, "log_odds_ratio": -0.7580657601356506, "logits/chosen": 422.814208984375, "logits/rejected": 394.9488220214844, "logps/chosen": -1.0943793058395386, "logps/rejected": -1.2127244472503662, "loss": 1.4868, "nll_loss": 1.514913558959961, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.054718971252441406, "rewards/margins": 0.005917248781770468, "rewards/rejected": -0.06063622236251831, "step": 150 }, { "epoch": 0.9849086576648134, "grad_norm": 29.756258010864258, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.24277754127979279, "log_odds_ratio": -0.6817291378974915, "logits/chosen": 382.361572265625, "logits/rejected": 412.5480041503906, "logps/chosen": -1.031072735786438, "logps/rejected": -1.2335267066955566, "loss": 1.4933, "nll_loss": 1.3717305660247803, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0515536367893219, "rewards/margins": 0.010122699663043022, "rewards/rejected": -0.06167633458971977, "step": 155 }, { "epoch": 0.9976171564733916, "eval_log_odds_chosen": 0.32397615909576416, "eval_log_odds_ratio": -0.6557961106300354, "eval_logits/chosen": 318.2524108886719, "eval_logits/rejected": 307.1602478027344, "eval_logps/chosen": -1.0023059844970703, "eval_logps/rejected": -1.216626763343811, "eval_loss": 1.4686377048492432, "eval_nll_loss": 1.412723422050476, "eval_rewards/accuracies": 0.577617347240448, "eval_rewards/chosen": -0.050115302205085754, "eval_rewards/margins": 0.010716039687395096, "eval_rewards/rejected": -0.06083134561777115, "eval_runtime": 278.9943, "eval_samples_per_second": 1.982, "eval_steps_per_second": 0.993, "step": 157 }, { "epoch": 1.016679904686259, "grad_norm": 23.072566986083984, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.27298638224601746, "log_odds_ratio": -0.656934380531311, "logits/chosen": 396.47760009765625, "logits/rejected": 389.45477294921875, "logps/chosen": -0.8926975131034851, "logps/rejected": -1.0694555044174194, "loss": 1.2664, "nll_loss": 1.2796380519866943, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.0446348711848259, "rewards/margins": 0.008837896399199963, "rewards/rejected": -0.053472768515348434, "step": 160 }, { "epoch": 1.0484511517077044, "grad_norm": 37.10022735595703, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 1.2404422760009766, "log_odds_ratio": -0.39491257071495056, "logits/chosen": 401.09716796875, "logits/rejected": 423.9908142089844, "logps/chosen": -0.6766742467880249, "logps/rejected": -1.383744478225708, "loss": 1.082, "nll_loss": 1.041725754737854, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.033833712339401245, "rewards/margins": 0.03535350412130356, "rewards/rejected": -0.0691872239112854, "step": 165 }, { "epoch": 1.0802223987291502, "grad_norm": 19.41335105895996, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 1.1259419918060303, "log_odds_ratio": -0.42683523893356323, "logits/chosen": 380.9306335449219, "logits/rejected": 400.4065856933594, "logps/chosen": -0.7301725149154663, "logps/rejected": -1.4242193698883057, "loss": 1.0335, "nll_loss": 1.010096788406372, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.036508627235889435, "rewards/margins": 0.03470234200358391, "rewards/rejected": -0.07121096551418304, "step": 170 }, { "epoch": 1.1119936457505957, "grad_norm": 33.67777633666992, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 1.1387039422988892, "log_odds_ratio": -0.389271080493927, "logits/chosen": 397.97271728515625, "logits/rejected": 384.86895751953125, "logps/chosen": -0.7504047751426697, "logps/rejected": -1.4629099369049072, "loss": 1.1499, "nll_loss": 1.1535775661468506, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.037520237267017365, "rewards/margins": 0.03562526777386665, "rewards/rejected": -0.07314550131559372, "step": 175 }, { "epoch": 1.1437648927720412, "grad_norm": 28.816181182861328, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.9216247797012329, "log_odds_ratio": -0.4554959833621979, "logits/chosen": 383.74090576171875, "logits/rejected": 390.2010498046875, "logps/chosen": -0.7649926543235779, "logps/rejected": -1.3056840896606445, "loss": 1.1342, "nll_loss": 1.0860129594802856, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03824963420629501, "rewards/margins": 0.027034565806388855, "rewards/rejected": -0.06528420001268387, "step": 180 }, { "epoch": 1.175536139793487, "grad_norm": 22.407983779907227, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": 1.1218883991241455, "log_odds_ratio": -0.39646559953689575, "logits/chosen": 377.808349609375, "logits/rejected": 365.0235595703125, "logps/chosen": -0.6293179988861084, "logps/rejected": -1.2226511240005493, "loss": 1.1139, "nll_loss": 1.145390510559082, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03146589919924736, "rewards/margins": 0.029666652902960777, "rewards/rejected": -0.061132557690143585, "step": 185 }, { "epoch": 1.2073073868149324, "grad_norm": 21.72103500366211, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 1.041015863418579, "log_odds_ratio": -0.4681476950645447, "logits/chosen": 411.81097412109375, "logits/rejected": 402.80029296875, "logps/chosen": -0.7096566557884216, "logps/rejected": -1.3321136236190796, "loss": 1.0785, "nll_loss": 1.052976369857788, "rewards/accuracies": 0.75, "rewards/chosen": -0.03548283129930496, "rewards/margins": 0.031122848391532898, "rewards/rejected": -0.06660567224025726, "step": 190 }, { "epoch": 1.2390786338363782, "grad_norm": 30.01280403137207, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": 1.0631742477416992, "log_odds_ratio": -0.3908316493034363, "logits/chosen": 401.2222595214844, "logits/rejected": 386.70068359375, "logps/chosen": -0.6636060476303101, "logps/rejected": -1.2677198648452759, "loss": 1.0619, "nll_loss": 1.0240118503570557, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.03318030759692192, "rewards/margins": 0.030205685645341873, "rewards/rejected": -0.0633859932422638, "step": 195 }, { "epoch": 1.2708498808578237, "grad_norm": 27.974498748779297, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.97685307264328, "log_odds_ratio": -0.43242964148521423, "logits/chosen": 381.5447692871094, "logits/rejected": 378.9277038574219, "logps/chosen": -0.6805351972579956, "logps/rejected": -1.2395280599594116, "loss": 0.9719, "nll_loss": 0.9232224225997925, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03402676433324814, "rewards/margins": 0.027949640527367592, "rewards/rejected": -0.06197641044855118, "step": 200 }, { "epoch": 1.3026211278792692, "grad_norm": 20.539955139160156, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 1.0363399982452393, "log_odds_ratio": -0.4161090850830078, "logits/chosen": 394.122802734375, "logits/rejected": 394.5590515136719, "logps/chosen": -0.7591055631637573, "logps/rejected": -1.369094967842102, "loss": 1.1178, "nll_loss": 1.1349411010742188, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.037955284118652344, "rewards/margins": 0.03049946390092373, "rewards/rejected": -0.06845474243164062, "step": 205 }, { "epoch": 1.3343923749007147, "grad_norm": 17.045740127563477, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 1.0268789529800415, "log_odds_ratio": -0.4292878210544586, "logits/chosen": 388.66632080078125, "logits/rejected": 399.6094970703125, "logps/chosen": -0.6952003240585327, "logps/rejected": -1.2499881982803345, "loss": 1.0443, "nll_loss": 0.9765409231185913, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.034760020673274994, "rewards/margins": 0.02773938700556755, "rewards/rejected": -0.062499403953552246, "step": 210 }, { "epoch": 1.3661636219221605, "grad_norm": 18.652254104614258, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 1.1054929494857788, "log_odds_ratio": -0.39855724573135376, "logits/chosen": 404.5065002441406, "logits/rejected": 406.90728759765625, "logps/chosen": -0.615829348564148, "logps/rejected": -1.1530225276947021, "loss": 0.966, "nll_loss": 0.9155328869819641, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.030791467055678368, "rewards/margins": 0.02685965970158577, "rewards/rejected": -0.057651132345199585, "step": 215 }, { "epoch": 1.397934868943606, "grad_norm": 22.75200080871582, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 1.1173226833343506, "log_odds_ratio": -0.39363011717796326, "logits/chosen": 395.583251953125, "logits/rejected": 389.8007507324219, "logps/chosen": -0.6299307942390442, "logps/rejected": -1.1920979022979736, "loss": 1.0238, "nll_loss": 1.0118134021759033, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.03149653971195221, "rewards/margins": 0.02810835838317871, "rewards/rejected": -0.05960489436984062, "step": 220 }, { "epoch": 1.4297061159650517, "grad_norm": 35.366146087646484, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 1.0686665773391724, "log_odds_ratio": -0.42429256439208984, "logits/chosen": 380.085693359375, "logits/rejected": 382.55035400390625, "logps/chosen": -0.703887403011322, "logps/rejected": -1.3395355939865112, "loss": 1.0705, "nll_loss": 0.9919706583023071, "rewards/accuracies": 0.75, "rewards/chosen": -0.03519437089562416, "rewards/margins": 0.03178241103887558, "rewards/rejected": -0.06697677820920944, "step": 225 }, { "epoch": 1.4614773629864972, "grad_norm": 15.779678344726562, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 0.9577558636665344, "log_odds_ratio": -0.4354848265647888, "logits/chosen": 390.24658203125, "logits/rejected": 427.69805908203125, "logps/chosen": -0.6856449842453003, "logps/rejected": -1.2423439025878906, "loss": 1.0161, "nll_loss": 1.042870283126831, "rewards/accuracies": 0.8125, "rewards/chosen": -0.034282244741916656, "rewards/margins": 0.027834951877593994, "rewards/rejected": -0.06211719661951065, "step": 230 }, { "epoch": 1.4932486100079427, "grad_norm": 15.136199951171875, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": 0.9250560998916626, "log_odds_ratio": -0.4900631904602051, "logits/chosen": 376.62750244140625, "logits/rejected": 379.6344299316406, "logps/chosen": -0.7374454140663147, "logps/rejected": -1.2676187753677368, "loss": 1.1053, "nll_loss": 1.0843344926834106, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.036872267723083496, "rewards/margins": 0.026508668437600136, "rewards/rejected": -0.06338094174861908, "step": 235 }, { "epoch": 1.5250198570293882, "grad_norm": 18.867752075195312, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 1.0353556871414185, "log_odds_ratio": -0.4396829605102539, "logits/chosen": 395.94573974609375, "logits/rejected": 393.00555419921875, "logps/chosen": -0.7767394185066223, "logps/rejected": -1.3642452955245972, "loss": 1.1411, "nll_loss": 1.2226094007492065, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.03883696720004082, "rewards/margins": 0.02937529981136322, "rewards/rejected": -0.06821225583553314, "step": 240 }, { "epoch": 1.556791104050834, "grad_norm": 17.112279891967773, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 0.8108429908752441, "log_odds_ratio": -0.5097193717956543, "logits/chosen": 373.17376708984375, "logits/rejected": 381.7115783691406, "logps/chosen": -0.7546018362045288, "logps/rejected": -1.2042465209960938, "loss": 1.0514, "nll_loss": 1.0946916341781616, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.03773009032011032, "rewards/margins": 0.022482234984636307, "rewards/rejected": -0.06021232530474663, "step": 245 }, { "epoch": 1.5885623510722797, "grad_norm": 16.28799819946289, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 1.0346342325210571, "log_odds_ratio": -0.4391708970069885, "logits/chosen": 413.6966247558594, "logits/rejected": 385.60546875, "logps/chosen": -0.6579457521438599, "logps/rejected": -1.2464921474456787, "loss": 1.0729, "nll_loss": 1.0166854858398438, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.032897286117076874, "rewards/margins": 0.02942732349038124, "rewards/rejected": -0.06232461333274841, "step": 250 }, { "epoch": 1.6203335980937252, "grad_norm": 16.664676666259766, "learning_rate": 3.131121455425748e-06, "log_odds_chosen": 0.9674752950668335, "log_odds_ratio": -0.45131349563598633, "logits/chosen": 403.8053894042969, "logits/rejected": 375.2102966308594, "logps/chosen": -0.7002569437026978, "logps/rejected": -1.2120827436447144, "loss": 1.0503, "nll_loss": 1.0049232244491577, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.03501284867525101, "rewards/margins": 0.0255912933498621, "rewards/rejected": -0.060604143887758255, "step": 255 }, { "epoch": 1.6521048451151708, "grad_norm": 18.00334358215332, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 1.1998499631881714, "log_odds_ratio": -0.38401293754577637, "logits/chosen": 387.16314697265625, "logits/rejected": 405.91461181640625, "logps/chosen": -0.7084919810295105, "logps/rejected": -1.3958265781402588, "loss": 1.0121, "nll_loss": 0.9756923913955688, "rewards/accuracies": 0.8125, "rewards/chosen": -0.035424597561359406, "rewards/margins": 0.034366730600595474, "rewards/rejected": -0.06979133188724518, "step": 260 }, { "epoch": 1.6838760921366163, "grad_norm": 16.293354034423828, "learning_rate": 3.0714755841697565e-06, "log_odds_chosen": 0.8822715878486633, "log_odds_ratio": -0.44727301597595215, "logits/chosen": 388.2609558105469, "logits/rejected": 401.40472412109375, "logps/chosen": -0.6763411164283752, "logps/rejected": -1.164041519165039, "loss": 1.0175, "nll_loss": 1.0139930248260498, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03381705284118652, "rewards/margins": 0.02438502386212349, "rewards/rejected": -0.05820208042860031, "step": 265 }, { "epoch": 1.715647339158062, "grad_norm": 16.327428817749023, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.8710346221923828, "log_odds_ratio": -0.47604647278785706, "logits/chosen": 409.24114990234375, "logits/rejected": 395.74285888671875, "logps/chosen": -0.7325721383094788, "logps/rejected": -1.2142250537872314, "loss": 1.0611, "nll_loss": 1.0560299158096313, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.036628607660532, "rewards/margins": 0.024082642048597336, "rewards/rejected": -0.06071125343441963, "step": 270 }, { "epoch": 1.7474185861795075, "grad_norm": 18.973796844482422, "learning_rate": 3.0151134457776365e-06, "log_odds_chosen": 1.0033023357391357, "log_odds_ratio": -0.441061407327652, "logits/chosen": 389.3612365722656, "logits/rejected": 402.40740966796875, "logps/chosen": -0.809950053691864, "logps/rejected": -1.4129191637039185, "loss": 1.0708, "nll_loss": 1.0950496196746826, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04049750417470932, "rewards/margins": 0.030148455873131752, "rewards/rejected": -0.07064596563577652, "step": 275 }, { "epoch": 1.7791898332009533, "grad_norm": 16.511871337890625, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 1.0175421237945557, "log_odds_ratio": -0.42562809586524963, "logits/chosen": 379.0143737792969, "logits/rejected": 409.40411376953125, "logps/chosen": -0.750076413154602, "logps/rejected": -1.3597370386123657, "loss": 1.1496, "nll_loss": 1.0727407932281494, "rewards/accuracies": 0.8125, "rewards/chosen": -0.03750381991267204, "rewards/margins": 0.030483026057481766, "rewards/rejected": -0.0679868534207344, "step": 280 }, { "epoch": 1.8109610802223988, "grad_norm": 16.760597229003906, "learning_rate": 2.961744388795462e-06, "log_odds_chosen": 0.8824595212936401, "log_odds_ratio": -0.5069230198860168, "logits/chosen": 411.53924560546875, "logits/rejected": 398.7159118652344, "logps/chosen": -0.8067032098770142, "logps/rejected": -1.3228529691696167, "loss": 1.0232, "nll_loss": 1.0439965724945068, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04033515974879265, "rewards/margins": 0.025807490572333336, "rewards/rejected": -0.06614264845848083, "step": 285 }, { "epoch": 1.8427323272438443, "grad_norm": 17.98776626586914, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 1.003203272819519, "log_odds_ratio": -0.4108172357082367, "logits/chosen": 396.21820068359375, "logits/rejected": 378.0450134277344, "logps/chosen": -0.7365471720695496, "logps/rejected": -1.3615410327911377, "loss": 1.1144, "nll_loss": 1.0785415172576904, "rewards/accuracies": 0.875, "rewards/chosen": -0.03682735934853554, "rewards/margins": 0.031249692663550377, "rewards/rejected": -0.06807705014944077, "step": 290 }, { "epoch": 1.8745035742652898, "grad_norm": 15.520238876342773, "learning_rate": 2.9111125486979104e-06, "log_odds_chosen": 0.9161802530288696, "log_odds_ratio": -0.459242582321167, "logits/chosen": 410.2061462402344, "logits/rejected": 410.3428649902344, "logps/chosen": -0.7132889628410339, "logps/rejected": -1.2278392314910889, "loss": 1.0323, "nll_loss": 1.0033968687057495, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.03566444665193558, "rewards/margins": 0.025727516040205956, "rewards/rejected": -0.061391960829496384, "step": 295 }, { "epoch": 1.9062748212867355, "grad_norm": 16.573244094848633, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 0.9094133377075195, "log_odds_ratio": -0.45279788970947266, "logits/chosen": 386.76165771484375, "logits/rejected": 400.73443603515625, "logps/chosen": -0.7042160034179688, "logps/rejected": -1.1693499088287354, "loss": 1.0304, "nll_loss": 1.048201560974121, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.03521079570055008, "rewards/margins": 0.0232566986232996, "rewards/rejected": -0.05846749618649483, "step": 300 }, { "epoch": 1.938046068308181, "grad_norm": 22.685237884521484, "learning_rate": 2.862991671569341e-06, "log_odds_chosen": 0.9926900863647461, "log_odds_ratio": -0.41787558794021606, "logits/chosen": 413.96343994140625, "logits/rejected": 387.29730224609375, "logps/chosen": -0.6416295170783997, "logps/rejected": -1.1413004398345947, "loss": 0.988, "nll_loss": 0.9365374445915222, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.032081473618745804, "rewards/margins": 0.024983543902635574, "rewards/rejected": -0.05706502124667168, "step": 305 }, { "epoch": 1.9698173153296268, "grad_norm": 17.650312423706055, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 0.936631977558136, "log_odds_ratio": -0.4232844412326813, "logits/chosen": 384.41680908203125, "logits/rejected": 393.4200439453125, "logps/chosen": -0.7465909719467163, "logps/rejected": -1.3220902681350708, "loss": 1.036, "nll_loss": 1.0644495487213135, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -0.037329547107219696, "rewards/margins": 0.028774961829185486, "rewards/rejected": -0.06610451638698578, "step": 310 }, { "epoch": 1.995234312946783, "eval_log_odds_chosen": 0.34870076179504395, "eval_log_odds_ratio": -0.6608841419219971, "eval_logits/chosen": 312.93048095703125, "eval_logits/rejected": 302.5973815917969, "eval_logps/chosen": -0.9867467880249023, "eval_logps/rejected": -1.2230703830718994, "eval_loss": 1.419357180595398, "eval_nll_loss": 1.3670318126678467, "eval_rewards/accuracies": 0.5667870044708252, "eval_rewards/chosen": -0.04933733493089676, "eval_rewards/margins": 0.011816184036433697, "eval_rewards/rejected": -0.06115352362394333, "eval_runtime": 278.7523, "eval_samples_per_second": 1.984, "eval_steps_per_second": 0.994, "step": 314 }, { "epoch": 2.0015885623510723, "grad_norm": 16.02027702331543, "learning_rate": 2.817180849095055e-06, "log_odds_chosen": 1.0939061641693115, "log_odds_ratio": -0.39633578062057495, "logits/chosen": 402.9229431152344, "logits/rejected": 395.6060791015625, "logps/chosen": -0.7204245924949646, "logps/rejected": -1.3800289630889893, "loss": 1.0066, "nll_loss": 0.9838098287582397, "rewards/accuracies": 0.862500011920929, "rewards/chosen": -0.03602122515439987, "rewards/margins": 0.032980211079120636, "rewards/rejected": -0.0690014436841011, "step": 315 }, { "epoch": 2.033359809372518, "grad_norm": 18.79454803466797, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 2.4993255138397217, "log_odds_ratio": -0.16073401272296906, "logits/chosen": 368.0367736816406, "logits/rejected": 383.66949462890625, "logps/chosen": -0.36704158782958984, "logps/rejected": -1.6748905181884766, "loss": 0.5746, "nll_loss": 0.5446859002113342, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.018352080136537552, "rewards/margins": 0.06539243459701538, "rewards/rejected": -0.08374451845884323, "step": 320 }, { "epoch": 2.0651310563939633, "grad_norm": 15.766267776489258, "learning_rate": 2.773500981126146e-06, "log_odds_chosen": 2.2665276527404785, "log_odds_ratio": -0.1551036536693573, "logits/chosen": 406.095703125, "logits/rejected": 392.67034912109375, "logps/chosen": -0.319181352853775, "logps/rejected": -1.3770430088043213, "loss": 0.6228, "nll_loss": 0.5448837280273438, "rewards/accuracies": 1.0, "rewards/chosen": -0.01595906727015972, "rewards/margins": 0.05289308354258537, "rewards/rejected": -0.06885214149951935, "step": 325 }, { "epoch": 2.096902303415409, "grad_norm": 14.977751731872559, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 2.5944995880126953, "log_odds_ratio": -0.12087088823318481, "logits/chosen": 390.13653564453125, "logits/rejected": 381.3854064941406, "logps/chosen": -0.35282859206199646, "logps/rejected": -1.6678215265274048, "loss": 0.5863, "nll_loss": 0.6006873846054077, "rewards/accuracies": 1.0, "rewards/chosen": -0.017641428858041763, "rewards/margins": 0.0657496452331543, "rewards/rejected": -0.08339107036590576, "step": 330 }, { "epoch": 2.128673550436855, "grad_norm": 11.052816390991211, "learning_rate": 2.7317918235407652e-06, "log_odds_chosen": 2.7388923168182373, "log_odds_ratio": -0.11470385640859604, "logits/chosen": 373.2349548339844, "logits/rejected": 372.8748474121094, "logps/chosen": -0.34725895524024963, "logps/rejected": -1.7965190410614014, "loss": 0.5343, "nll_loss": 0.5674458146095276, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01736294850707054, "rewards/margins": 0.07246299088001251, "rewards/rejected": -0.08982594311237335, "step": 335 }, { "epoch": 2.1604447974583003, "grad_norm": 13.367264747619629, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 2.605971336364746, "log_odds_ratio": -0.1160891056060791, "logits/chosen": 375.6283874511719, "logits/rejected": 366.3837890625, "logps/chosen": -0.3731013238430023, "logps/rejected": -1.7575727701187134, "loss": 0.5037, "nll_loss": 0.5094397664070129, "rewards/accuracies": 1.0, "rewards/chosen": -0.018655067309737206, "rewards/margins": 0.0692235678434372, "rewards/rejected": -0.08787862956523895, "step": 340 }, { "epoch": 2.192216044479746, "grad_norm": 15.93548583984375, "learning_rate": 2.691909510290828e-06, "log_odds_chosen": 2.464402675628662, "log_odds_ratio": -0.13156327605247498, "logits/chosen": 368.25299072265625, "logits/rejected": 391.8240966796875, "logps/chosen": -0.35556745529174805, "logps/rejected": -1.6176868677139282, "loss": 0.5562, "nll_loss": 0.5545670986175537, "rewards/accuracies": 1.0, "rewards/chosen": -0.017778372392058372, "rewards/margins": 0.0631059780716896, "rewards/rejected": -0.08088434487581253, "step": 345 }, { "epoch": 2.2239872915011913, "grad_norm": 18.228683471679688, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 2.6247172355651855, "log_odds_ratio": -0.12060055881738663, "logits/chosen": 378.7240295410156, "logits/rejected": 385.0981750488281, "logps/chosen": -0.3274136483669281, "logps/rejected": -1.6788215637207031, "loss": 0.568, "nll_loss": 0.5665196180343628, "rewards/accuracies": 1.0, "rewards/chosen": -0.016370682045817375, "rewards/margins": 0.06757040321826935, "rewards/rejected": -0.08394108712673187, "step": 350 }, { "epoch": 2.255758538522637, "grad_norm": 10.90892219543457, "learning_rate": 2.6537244621713765e-06, "log_odds_chosen": 2.860001802444458, "log_odds_ratio": -0.10223189741373062, "logits/chosen": 389.40496826171875, "logits/rejected": 375.42901611328125, "logps/chosen": -0.2831525504589081, "logps/rejected": -1.7654014825820923, "loss": 0.5138, "nll_loss": 0.5121490359306335, "rewards/accuracies": 1.0, "rewards/chosen": -0.014157627709209919, "rewards/margins": 0.07411245256662369, "rewards/rejected": -0.08827006816864014, "step": 355 }, { "epoch": 2.2875297855440824, "grad_norm": 19.659900665283203, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 2.4854187965393066, "log_odds_ratio": -0.1478622853755951, "logits/chosen": 372.8605041503906, "logits/rejected": 345.42425537109375, "logps/chosen": -0.35203009843826294, "logps/rejected": -1.6439100503921509, "loss": 0.6183, "nll_loss": 0.6371638774871826, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.017601503059267998, "rewards/margins": 0.06459399312734604, "rewards/rejected": -0.08219550549983978, "step": 360 }, { "epoch": 2.3193010325655283, "grad_norm": 13.868301391601562, "learning_rate": 2.6171196129510684e-06, "log_odds_chosen": 2.2805027961730957, "log_odds_ratio": -0.15696506202220917, "logits/chosen": 368.7457580566406, "logits/rejected": 381.3095703125, "logps/chosen": -0.36261260509490967, "logps/rejected": -1.5051987171173096, "loss": 0.5374, "nll_loss": 0.5559507012367249, "rewards/accuracies": 1.0, "rewards/chosen": -0.018130630254745483, "rewards/margins": 0.057129304856061935, "rewards/rejected": -0.07525994628667831, "step": 365 }, { "epoch": 2.351072279586974, "grad_norm": 11.472841262817383, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 2.569275379180908, "log_odds_ratio": -0.1351589858531952, "logits/chosen": 341.255615234375, "logits/rejected": 375.5405578613281, "logps/chosen": -0.3256201148033142, "logps/rejected": -1.5576858520507812, "loss": 0.5394, "nll_loss": 0.5407100915908813, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01628100499510765, "rewards/margins": 0.06160329654812813, "rewards/rejected": -0.07788430154323578, "step": 370 }, { "epoch": 2.3828435266084194, "grad_norm": 15.338998794555664, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 2.368185520172119, "log_odds_ratio": -0.16596445441246033, "logits/chosen": 367.3100891113281, "logits/rejected": 379.1470642089844, "logps/chosen": -0.34503039717674255, "logps/rejected": -1.4970388412475586, "loss": 0.5385, "nll_loss": 0.5925895571708679, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.017251521348953247, "rewards/margins": 0.057600416243076324, "rewards/rejected": -0.07485193014144897, "step": 375 }, { "epoch": 2.414614773629865, "grad_norm": 11.992910385131836, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 2.668848991394043, "log_odds_ratio": -0.11368580907583237, "logits/chosen": 339.14764404296875, "logits/rejected": 374.33258056640625, "logps/chosen": -0.29525333642959595, "logps/rejected": -1.6697683334350586, "loss": 0.5286, "nll_loss": 0.530687689781189, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.014762667007744312, "rewards/margins": 0.06872574985027313, "rewards/rejected": -0.08348841965198517, "step": 380 }, { "epoch": 2.4463860206513104, "grad_norm": 14.318426132202148, "learning_rate": 2.5482359571881276e-06, "log_odds_chosen": 2.5295252799987793, "log_odds_ratio": -0.13907964527606964, "logits/chosen": 355.3772888183594, "logits/rejected": 369.38250732421875, "logps/chosen": -0.32363104820251465, "logps/rejected": -1.5537617206573486, "loss": 0.5316, "nll_loss": 0.5073675513267517, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.016181552782654762, "rewards/margins": 0.061506547033786774, "rewards/rejected": -0.07768810540437698, "step": 385 }, { "epoch": 2.4781572676727563, "grad_norm": 12.480424880981445, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 2.844482898712158, "log_odds_ratio": -0.10580587387084961, "logits/chosen": 375.2264709472656, "logits/rejected": 378.9322509765625, "logps/chosen": -0.31661707162857056, "logps/rejected": -1.8109395503997803, "loss": 0.5889, "nll_loss": 0.5691739320755005, "rewards/accuracies": 1.0, "rewards/chosen": -0.015830855816602707, "rewards/margins": 0.07471612840890884, "rewards/rejected": -0.09054698050022125, "step": 390 }, { "epoch": 2.509928514694202, "grad_norm": 12.692961692810059, "learning_rate": 2.515773027133138e-06, "log_odds_chosen": 2.711256742477417, "log_odds_ratio": -0.11297377198934555, "logits/chosen": 366.85809326171875, "logits/rejected": 381.2143859863281, "logps/chosen": -0.3398984372615814, "logps/rejected": -1.6760709285736084, "loss": 0.5906, "nll_loss": 0.5562863349914551, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01699492149055004, "rewards/margins": 0.06680862605571747, "rewards/rejected": -0.08380354940891266, "step": 395 }, { "epoch": 2.5416997617156474, "grad_norm": 14.05392837524414, "learning_rate": 2.5e-06, "log_odds_chosen": 2.806440830230713, "log_odds_ratio": -0.1206049919128418, "logits/chosen": 385.48016357421875, "logits/rejected": 387.5498046875, "logps/chosen": -0.33043619990348816, "logps/rejected": -1.7994086742401123, "loss": 0.536, "nll_loss": 0.5314000844955444, "rewards/accuracies": 1.0, "rewards/chosen": -0.016521811485290527, "rewards/margins": 0.07344862073659897, "rewards/rejected": -0.0899704322218895, "step": 400 }, { "epoch": 2.573471008737093, "grad_norm": 13.364317893981934, "learning_rate": 2.484519974999767e-06, "log_odds_chosen": 2.552408218383789, "log_odds_ratio": -0.15109024941921234, "logits/chosen": 402.896728515625, "logits/rejected": 358.8814697265625, "logps/chosen": -0.35223886370658875, "logps/rejected": -1.5426979064941406, "loss": 0.5462, "nll_loss": 0.5571905374526978, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.017611945047974586, "rewards/margins": 0.05952295660972595, "rewards/rejected": -0.07713489979505539, "step": 405 }, { "epoch": 2.6052422557585384, "grad_norm": 13.045223236083984, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 2.6123578548431396, "log_odds_ratio": -0.13238325715065002, "logits/chosen": 398.14410400390625, "logits/rejected": 372.25506591796875, "logps/chosen": -0.32812008261680603, "logps/rejected": -1.6102240085601807, "loss": 0.5432, "nll_loss": 0.5351387858390808, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.016406003385782242, "rewards/margins": 0.06410519778728485, "rewards/rejected": -0.0805111974477768, "step": 410 }, { "epoch": 2.6370135027799844, "grad_norm": 12.631697654724121, "learning_rate": 2.4544034683690802e-06, "log_odds_chosen": 2.6268792152404785, "log_odds_ratio": -0.1338309347629547, "logits/chosen": 357.38592529296875, "logits/rejected": 389.5252380371094, "logps/chosen": -0.31788235902786255, "logps/rejected": -1.6665666103363037, "loss": 0.5004, "nll_loss": 0.5015383958816528, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015894118696451187, "rewards/margins": 0.06743422150611877, "rewards/rejected": -0.08332833647727966, "step": 415 }, { "epoch": 2.6687847498014294, "grad_norm": 15.014891624450684, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 2.591372013092041, "log_odds_ratio": -0.1354280561208725, "logits/chosen": 327.8882751464844, "logits/rejected": 381.07318115234375, "logps/chosen": -0.3104853630065918, "logps/rejected": -1.6048510074615479, "loss": 0.5341, "nll_loss": 0.5047799348831177, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.01552426815032959, "rewards/margins": 0.06471828371286392, "rewards/rejected": -0.08024255931377411, "step": 420 }, { "epoch": 2.7005559968228754, "grad_norm": 16.257761001586914, "learning_rate": 2.4253562503633297e-06, "log_odds_chosen": 2.5250930786132812, "log_odds_ratio": -0.1304975003004074, "logits/chosen": 358.3866271972656, "logits/rejected": 365.5904235839844, "logps/chosen": -0.37705713510513306, "logps/rejected": -1.6503069400787354, "loss": 0.5585, "nll_loss": 0.5689770579338074, "rewards/accuracies": 1.0, "rewards/chosen": -0.018852856010198593, "rewards/margins": 0.06366249173879623, "rewards/rejected": -0.08251535147428513, "step": 425 }, { "epoch": 2.732327243844321, "grad_norm": 13.000679969787598, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 2.5947813987731934, "log_odds_ratio": -0.1432962715625763, "logits/chosen": 405.462158203125, "logits/rejected": 397.40185546875, "logps/chosen": -0.31710636615753174, "logps/rejected": -1.525342583656311, "loss": 0.5913, "nll_loss": 0.5503649115562439, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015855319797992706, "rewards/margins": 0.060411810874938965, "rewards/rejected": -0.07626713067293167, "step": 430 }, { "epoch": 2.7640984908657664, "grad_norm": 12.692647933959961, "learning_rate": 2.3973165074269213e-06, "log_odds_chosen": 2.4075734615325928, "log_odds_ratio": -0.12895731627941132, "logits/chosen": 380.5270080566406, "logits/rejected": 378.729736328125, "logps/chosen": -0.35148996114730835, "logps/rejected": -1.6106551885604858, "loss": 0.5662, "nll_loss": 0.552409291267395, "rewards/accuracies": 1.0, "rewards/chosen": -0.017574498429894447, "rewards/margins": 0.062958262860775, "rewards/rejected": -0.08053276687860489, "step": 435 }, { "epoch": 2.795869737887212, "grad_norm": 14.066009521484375, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 2.4299874305725098, "log_odds_ratio": -0.13510316610336304, "logits/chosen": 371.70501708984375, "logits/rejected": 391.11016845703125, "logps/chosen": -0.346214234828949, "logps/rejected": -1.5722700357437134, "loss": 0.5331, "nll_loss": 0.6044758558273315, "rewards/accuracies": 1.0, "rewards/chosen": -0.01731071248650551, "rewards/margins": 0.0613027922809124, "rewards/rejected": -0.07861350476741791, "step": 440 }, { "epoch": 2.8276409849086575, "grad_norm": 16.693822860717773, "learning_rate": 2.3702273156998867e-06, "log_odds_chosen": 2.664226770401001, "log_odds_ratio": -0.1495479792356491, "logits/chosen": 375.3480224609375, "logits/rejected": 390.2784729003906, "logps/chosen": -0.30481138825416565, "logps/rejected": -1.5847148895263672, "loss": 0.5774, "nll_loss": 0.5591589212417603, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015240569598972797, "rewards/margins": 0.06399518251419067, "rewards/rejected": -0.0792357474565506, "step": 445 }, { "epoch": 2.8594122319301034, "grad_norm": 11.772316932678223, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 2.4105961322784424, "log_odds_ratio": -0.14894258975982666, "logits/chosen": 369.1900329589844, "logits/rejected": 388.2864074707031, "logps/chosen": -0.32753241062164307, "logps/rejected": -1.5716395378112793, "loss": 0.5385, "nll_loss": 0.5137172937393188, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.016376618295907974, "rewards/margins": 0.06220535561442375, "rewards/rejected": -0.07858197391033173, "step": 450 }, { "epoch": 2.891183478951549, "grad_norm": 11.692276000976562, "learning_rate": 2.3440361546924774e-06, "log_odds_chosen": 2.5121138095855713, "log_odds_ratio": -0.13669057190418243, "logits/chosen": 368.95159912109375, "logits/rejected": 374.1610412597656, "logps/chosen": -0.30898115038871765, "logps/rejected": -1.5191621780395508, "loss": 0.5456, "nll_loss": 0.5575789213180542, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.015449057333171368, "rewards/margins": 0.06050904467701912, "rewards/rejected": -0.07595811039209366, "step": 455 }, { "epoch": 2.9229547259729944, "grad_norm": 10.796197891235352, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 2.5624585151672363, "log_odds_ratio": -0.11981997638940811, "logits/chosen": 385.6537170410156, "logits/rejected": 393.69268798828125, "logps/chosen": -0.292979896068573, "logps/rejected": -1.5465893745422363, "loss": 0.5032, "nll_loss": 0.514795184135437, "rewards/accuracies": 1.0, "rewards/chosen": -0.01464899629354477, "rewards/margins": 0.06268046796321869, "rewards/rejected": -0.07732947170734406, "step": 460 }, { "epoch": 2.95472597299444, "grad_norm": 15.433439254760742, "learning_rate": 2.3186944788008413e-06, "log_odds_chosen": 2.7584941387176514, "log_odds_ratio": -0.10928479582071304, "logits/chosen": 379.510498046875, "logits/rejected": 370.8335876464844, "logps/chosen": -0.3042075037956238, "logps/rejected": -1.7168937921524048, "loss": 0.5496, "nll_loss": 0.5133975744247437, "rewards/accuracies": 1.0, "rewards/chosen": -0.01521037332713604, "rewards/margins": 0.07063432037830353, "rewards/rejected": -0.08584468811750412, "step": 465 }, { "epoch": 2.9864972200158855, "grad_norm": 14.072263717651367, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 2.5119848251342773, "log_odds_ratio": -0.11926700919866562, "logits/chosen": 385.35107421875, "logits/rejected": 383.25164794921875, "logps/chosen": -0.3074565529823303, "logps/rejected": -1.477611780166626, "loss": 0.56, "nll_loss": 0.5478503704071045, "rewards/accuracies": 1.0, "rewards/chosen": -0.015372827649116516, "rewards/margins": 0.0585077628493309, "rewards/rejected": -0.07388059794902802, "step": 470 }, { "epoch": 2.9928514694201747, "eval_log_odds_chosen": 0.44380733370780945, "eval_log_odds_ratio": -0.6702221632003784, "eval_logits/chosen": 286.3763122558594, "eval_logits/rejected": 275.9735412597656, "eval_logps/chosen": -1.2025552988052368, "eval_logps/rejected": -1.5090675354003906, "eval_loss": 1.639459252357483, "eval_nll_loss": 1.5846672058105469, "eval_rewards/accuracies": 0.6028881072998047, "eval_rewards/chosen": -0.060127776116132736, "eval_rewards/margins": 0.015325604937970638, "eval_rewards/rejected": -0.07545337826013565, "eval_runtime": 278.3004, "eval_samples_per_second": 1.987, "eval_steps_per_second": 0.995, "step": 471 }, { "epoch": 2.9928514694201747, "step": 471, "total_flos": 0.0, "train_loss": 1.4771008792703066, "train_runtime": 40012.5124, "train_samples_per_second": 0.377, "train_steps_per_second": 0.012 } ], "logging_steps": 5, "max_steps": 471, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }