diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,14554 +1,1774 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 3.0, + "epoch": 2.9928514694201747, "eval_steps": 500, - "global_step": 4023, + "global_step": 471, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0037285607755406414, - "grad_norm": 1640.8118896484375, + "epoch": 0.03177124702144559, + "grad_norm": 1690.3359375, "learning_rate": 2.5000000000000004e-07, - "log_odds_chosen": -1.2131872177124023, - "log_odds_ratio": -1.5312442779541016, - "logits/chosen": 95.62350463867188, - "logits/rejected": 53.78803634643555, - "logps/chosen": -17.15298080444336, - "logps/rejected": -15.939793586730957, - "loss": 14.8587, - "nll_loss": 15.809553146362305, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.8576489686965942, - "rewards/margins": -0.06065933778882027, - "rewards/rejected": -0.7969896197319031, + "log_odds_chosen": 0.23363462090492249, + "log_odds_ratio": -1.1611130237579346, + "logits/chosen": 247.689697265625, + "logits/rejected": 248.79653930664062, + "logps/chosen": -15.079050064086914, + "logps/rejected": -15.312664985656738, + "loss": 14.8338, + "nll_loss": 14.632547378540039, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.7539524435997009, + "rewards/margins": 0.01168082281947136, + "rewards/rejected": -0.7656332850456238, "step": 5 }, { - "epoch": 0.007457121551081283, - "grad_norm": 1322.96875, + "epoch": 0.06354249404289118, + "grad_norm": 1072.1336669921875, "learning_rate": 5.000000000000001e-07, - "log_odds_chosen": 1.0361322164535522, - "log_odds_ratio": -0.6402125358581543, - "logits/chosen": 26.042469024658203, - "logits/rejected": 303.7039794921875, - "logps/chosen": -12.449737548828125, - "logps/rejected": -13.485891342163086, - "loss": 13.3348, - "nll_loss": 12.565601348876953, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.6224868893623352, - "rewards/margins": 0.05180773138999939, - "rewards/rejected": -0.6742945909500122, + "log_odds_chosen": -0.00788118876516819, + "log_odds_ratio": -1.0648075342178345, + "logits/chosen": 250.407958984375, + "logits/rejected": 238.48251342773438, + "logps/chosen": -12.971229553222656, + "logps/rejected": -12.963252067565918, + "loss": 12.9311, + "nll_loss": 12.861352920532227, + "rewards/accuracies": 0.4000000059604645, + "rewards/chosen": -0.6485615372657776, + "rewards/margins": -0.0003988705575466156, + "rewards/rejected": -0.6481626033782959, "step": 10 }, { - "epoch": 0.011185682326621925, - "grad_norm": 636.3076782226562, + "epoch": 0.09531374106433678, + "grad_norm": 567.5018310546875, "learning_rate": 7.5e-07, - "log_odds_chosen": -0.026751559227705002, - "log_odds_ratio": -0.7491689920425415, - "logits/chosen": 189.0467987060547, - "logits/rejected": 92.66807556152344, - "logps/chosen": -8.59510326385498, - "logps/rejected": -8.568397521972656, - "loss": 9.2566, - "nll_loss": 9.054757118225098, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.42975515127182007, - "rewards/margins": -0.0013352930545806885, - "rewards/rejected": -0.428419828414917, + "log_odds_chosen": 0.23998384177684784, + "log_odds_ratio": -1.0259145498275757, + "logits/chosen": 272.71136474609375, + "logits/rejected": 306.32379150390625, + "logps/chosen": -8.60844612121582, + "logps/rejected": -8.848184585571289, + "loss": 8.2022, + "nll_loss": 8.230855941772461, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.430422306060791, + "rewards/margins": 0.011986932717263699, + "rewards/rejected": -0.4424092769622803, "step": 15 }, { - "epoch": 0.014914243102162566, - "grad_norm": 276.0909423828125, + "epoch": 0.12708498808578236, + "grad_norm": 203.41554260253906, "learning_rate": 1.0000000000000002e-06, - "log_odds_chosen": -0.0895579382777214, - "log_odds_ratio": -0.7453578114509583, - "logits/chosen": 227.78182983398438, - "logits/rejected": 100.99510192871094, - "logps/chosen": -5.509385108947754, - "logps/rejected": -5.420079231262207, - "loss": 6.076, - "nll_loss": 5.97393798828125, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.27546927332878113, - "rewards/margins": -0.004465299658477306, - "rewards/rejected": -0.27100396156311035, + "log_odds_chosen": -0.189157634973526, + "log_odds_ratio": -0.9602483510971069, + "logits/chosen": 267.5614318847656, + "logits/rejected": 265.63116455078125, + "logps/chosen": -5.070517063140869, + "logps/rejected": -4.881190299987793, + "loss": 5.3347, + "nll_loss": 5.30587911605835, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.25352585315704346, + "rewards/margins": -0.009466320276260376, + "rewards/rejected": -0.2440594881772995, "step": 20 }, { - "epoch": 0.018642803877703208, - "grad_norm": 124.50040435791016, + "epoch": 0.15885623510722796, + "grad_norm": 114.50188446044922, "learning_rate": 1.25e-06, - "log_odds_chosen": -0.008144287392497063, - "log_odds_ratio": -0.7349498867988586, - "logits/chosen": 143.55819702148438, - "logits/rejected": 63.01588821411133, - "logps/chosen": -3.8675436973571777, - "logps/rejected": -3.8630337715148926, - "loss": 4.2777, - "nll_loss": 4.286264896392822, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.19337718188762665, - "rewards/margins": -0.0002255052386317402, - "rewards/rejected": -0.19315168261528015, + "log_odds_chosen": -0.28530603647232056, + "log_odds_ratio": -0.9948366284370422, + "logits/chosen": 293.87176513671875, + "logits/rejected": 281.34368896484375, + "logps/chosen": -3.002964973449707, + "logps/rejected": -2.7326130867004395, + "loss": 3.2523, + "nll_loss": 3.1672582626342773, + "rewards/accuracies": 0.42500001192092896, + "rewards/chosen": -0.15014824271202087, + "rewards/margins": -0.01351759023964405, + "rewards/rejected": -0.13663065433502197, "step": 25 }, { - "epoch": 0.02237136465324385, - "grad_norm": 205.4697723388672, + "epoch": 0.19062748212867356, + "grad_norm": 115.87854766845703, "learning_rate": 1.5e-06, - "log_odds_chosen": 0.8379373550415039, - "log_odds_ratio": -0.5807265043258667, - "logits/chosen": 198.85906982421875, - "logits/rejected": 203.31541442871094, - "logps/chosen": -1.8878374099731445, - "logps/rejected": -2.694627046585083, - "loss": 3.2355, - "nll_loss": 2.5267791748046875, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.09439187496900558, - "rewards/margins": 0.04033948853611946, - "rewards/rejected": -0.13473136723041534, + "log_odds_chosen": 0.011725234799087048, + "log_odds_ratio": -0.9292875528335571, + "logits/chosen": 348.6666259765625, + "logits/rejected": 345.4848327636719, + "logps/chosen": -2.375824451446533, + "logps/rejected": -2.386888027191162, + "loss": 2.5635, + "nll_loss": 2.6432275772094727, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.11879122257232666, + "rewards/margins": 0.0005531776696443558, + "rewards/rejected": -0.11934439837932587, "step": 30 }, { - "epoch": 0.02609992542878449, - "grad_norm": 73.313720703125, + "epoch": 0.22239872915011913, + "grad_norm": 62.12422561645508, "learning_rate": 1.75e-06, - "log_odds_chosen": 0.3058754503726959, - "log_odds_ratio": -0.5941787958145142, - "logits/chosen": 203.56773376464844, - "logits/rejected": 263.705322265625, - "logps/chosen": -1.6541156768798828, - "logps/rejected": -1.84432053565979, - "loss": 2.4003, - "nll_loss": 1.9200798273086548, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0827057808637619, - "rewards/margins": 0.009510248899459839, - "rewards/rejected": -0.09221602976322174, + "log_odds_chosen": 0.27093321084976196, + "log_odds_ratio": -0.6616618037223816, + "logits/chosen": 379.88690185546875, + "logits/rejected": 405.73138427734375, + "logps/chosen": -1.729331612586975, + "logps/rejected": -1.973097562789917, + "loss": 2.0724, + "nll_loss": 2.065566062927246, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.08646658807992935, + "rewards/margins": 0.01218829583376646, + "rewards/rejected": -0.09865488857030869, "step": 35 }, { - "epoch": 0.02982848620432513, - "grad_norm": 466.95208740234375, + "epoch": 0.2541699761715647, + "grad_norm": 82.40242004394531, "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": 0.28428730368614197, - "log_odds_ratio": -0.5803871154785156, - "logits/chosen": 282.6171875, - "logits/rejected": 269.4759826660156, - "logps/chosen": -1.4066884517669678, - "logps/rejected": -1.6653988361358643, - "loss": 2.4981, - "nll_loss": 1.9554020166397095, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07033442705869675, - "rewards/margins": 0.012935513630509377, - "rewards/rejected": -0.08326993882656097, + "log_odds_chosen": 0.08508733659982681, + "log_odds_ratio": -0.7854072451591492, + "logits/chosen": 400.20355224609375, + "logits/rejected": 401.4512023925781, + "logps/chosen": -1.6863765716552734, + "logps/rejected": -1.738532304763794, + "loss": 2.1651, + "nll_loss": 2.0580391883850098, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08431883156299591, + "rewards/margins": 0.0026077807415276766, + "rewards/rejected": -0.08692661672830582, "step": 40 }, { - "epoch": 0.03355704697986577, - "grad_norm": 94.82681274414062, + "epoch": 0.28594122319301035, + "grad_norm": 36.50082778930664, "learning_rate": 2.25e-06, - "log_odds_chosen": -0.21197958290576935, - "log_odds_ratio": -0.8181799054145813, - "logits/chosen": 224.0678253173828, - "logits/rejected": 217.28475952148438, - "logps/chosen": -1.5141856670379639, - "logps/rejected": -1.357521653175354, - "loss": 2.2177, - "nll_loss": 1.9071203470230103, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07570929080247879, - "rewards/margins": -0.007833211682736874, - "rewards/rejected": -0.06787607818841934, + "log_odds_chosen": 0.13595962524414062, + "log_odds_ratio": -0.729759693145752, + "logits/chosen": 399.974365234375, + "logits/rejected": 397.14892578125, + "logps/chosen": -1.4609622955322266, + "logps/rejected": -1.5716912746429443, + "loss": 1.889, + "nll_loss": 1.9739797115325928, + "rewards/accuracies": 0.5249999761581421, + "rewards/chosen": -0.07304811477661133, + "rewards/margins": 0.005536452867090702, + "rewards/rejected": -0.07858456671237946, "step": 45 }, { - "epoch": 0.037285607755406416, - "grad_norm": 119.91692352294922, + "epoch": 0.3177124702144559, + "grad_norm": 41.53988265991211, "learning_rate": 2.5e-06, - "log_odds_chosen": -0.12046583741903305, - "log_odds_ratio": -0.758738100528717, - "logits/chosen": 223.9627685546875, - "logits/rejected": 263.611083984375, - "logps/chosen": -1.7306101322174072, - "logps/rejected": -1.6333354711532593, - "loss": 2.2005, - "nll_loss": 2.081503391265869, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08653049916028976, - "rewards/margins": -0.004863731563091278, - "rewards/rejected": -0.08166676759719849, + "log_odds_chosen": -0.2444552630186081, + "log_odds_ratio": -0.9091756939888, + "logits/chosen": 404.7247009277344, + "logits/rejected": 382.5657653808594, + "logps/chosen": -1.5261789560317993, + "logps/rejected": -1.345625877380371, + "loss": 1.8269, + "nll_loss": 1.8267475366592407, + "rewards/accuracies": 0.4124999940395355, + "rewards/chosen": -0.07630894333124161, + "rewards/margins": -0.009027653373777866, + "rewards/rejected": -0.06728129088878632, "step": 50 }, { - "epoch": 0.041014168530947054, - "grad_norm": 103.0398178100586, + "epoch": 0.3494837172359015, + "grad_norm": 33.210506439208984, "learning_rate": 2.7500000000000004e-06, - "log_odds_chosen": -0.06655925512313843, - "log_odds_ratio": -0.7688400149345398, - "logits/chosen": 350.70721435546875, - "logits/rejected": 262.0745849609375, - "logps/chosen": -1.8242229223251343, - "logps/rejected": -1.8010963201522827, - "loss": 2.1083, - "nll_loss": 2.0412707328796387, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.09121114015579224, - "rewards/margins": -0.0011563145089894533, - "rewards/rejected": -0.09005482494831085, + "log_odds_chosen": 0.0011972666252404451, + "log_odds_ratio": -0.801056981086731, + "logits/chosen": 393.1558837890625, + "logits/rejected": 376.5151672363281, + "logps/chosen": -1.3886432647705078, + "logps/rejected": -1.3649101257324219, + "loss": 1.6794, + "nll_loss": 1.606018304824829, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.06943216919898987, + "rewards/margins": -0.0011866561835631728, + "rewards/rejected": -0.06824551522731781, "step": 55 }, { - "epoch": 0.0447427293064877, - "grad_norm": 455.7726745605469, + "epoch": 0.3812549642573471, + "grad_norm": 70.0084228515625, "learning_rate": 3e-06, - "log_odds_chosen": 1.407046914100647, - "log_odds_ratio": -0.30637988448143005, - "logits/chosen": 223.0300750732422, - "logits/rejected": 330.1884765625, - "logps/chosen": -1.148965835571289, - "logps/rejected": -2.2877042293548584, - "loss": 2.3268, - "nll_loss": 1.547377586364746, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.057448290288448334, - "rewards/margins": 0.05693693086504936, - "rewards/rejected": -0.114385224878788, + "log_odds_chosen": 0.1671205312013626, + "log_odds_ratio": -0.6716384887695312, + "logits/chosen": 373.5995178222656, + "logits/rejected": 394.85931396484375, + "logps/chosen": -1.235502004623413, + "logps/rejected": -1.3615134954452515, + "loss": 1.7286, + "nll_loss": 1.637976884841919, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.06177510693669319, + "rewards/margins": 0.00630057230591774, + "rewards/rejected": -0.06807567924261093, "step": 60 }, { - "epoch": 0.048471290082028336, - "grad_norm": 66.61519622802734, + "epoch": 0.4130262112787927, + "grad_norm": 51.036556243896484, "learning_rate": 3.2500000000000002e-06, - "log_odds_chosen": 0.4561333656311035, - "log_odds_ratio": -0.5122390985488892, - "logits/chosen": 313.30706787109375, - "logits/rejected": 269.1872253417969, - "logps/chosen": -1.4225890636444092, - "logps/rejected": -1.7800216674804688, - "loss": 1.8738, - "nll_loss": 1.5836204290390015, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0711294561624527, - "rewards/margins": 0.017871633172035217, - "rewards/rejected": -0.08900108188390732, + "log_odds_chosen": -0.011178660206496716, + "log_odds_ratio": -0.7863475680351257, + "logits/chosen": 371.5272521972656, + "logits/rejected": 370.0439758300781, + "logps/chosen": -1.2801154851913452, + "logps/rejected": -1.2939107418060303, + "loss": 1.6974, + "nll_loss": 1.6114212274551392, + "rewards/accuracies": 0.4375, + "rewards/chosen": -0.0640057772397995, + "rewards/margins": 0.0006897579878568649, + "rewards/rejected": -0.06469553709030151, "step": 65 }, { - "epoch": 0.05219985085756898, - "grad_norm": 764.498046875, + "epoch": 0.44479745830023826, + "grad_norm": 51.9803352355957, "learning_rate": 3.5e-06, - "log_odds_chosen": 0.32774752378463745, - "log_odds_ratio": -0.5594722628593445, - "logits/chosen": 243.994873046875, - "logits/rejected": 290.7823791503906, - "logps/chosen": -1.3039686679840088, - "logps/rejected": -1.5470539331436157, - "loss": 2.1688, - "nll_loss": 1.7477737665176392, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06519843637943268, - "rewards/margins": 0.012154261581599712, - "rewards/rejected": -0.07735269516706467, + "log_odds_chosen": 0.30882030725479126, + "log_odds_ratio": -0.6189635992050171, + "logits/chosen": 390.2756652832031, + "logits/rejected": 397.1745910644531, + "logps/chosen": -1.1560722589492798, + "logps/rejected": -1.3726001977920532, + "loss": 1.6066, + "nll_loss": 1.5191279649734497, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.05780360847711563, + "rewards/margins": 0.010826398618519306, + "rewards/rejected": -0.06863000988960266, "step": 70 }, { - "epoch": 0.05592841163310962, - "grad_norm": 104.47029113769531, + "epoch": 0.4765687053216839, + "grad_norm": 37.97161102294922, "learning_rate": 3.7500000000000005e-06, - "log_odds_chosen": 0.4321501851081848, - "log_odds_ratio": -0.5143711566925049, - "logits/chosen": 227.9460906982422, - "logits/rejected": 218.782958984375, - "logps/chosen": -1.1221461296081543, - "logps/rejected": -1.377166509628296, - "loss": 2.1158, - "nll_loss": 2.030961036682129, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05610731244087219, - "rewards/margins": 0.012751013040542603, - "rewards/rejected": -0.0688583254814148, + "log_odds_chosen": 0.20173080265522003, + "log_odds_ratio": -0.6899309754371643, + "logits/chosen": 401.42218017578125, + "logits/rejected": 395.65472412109375, + "logps/chosen": -1.2323527336120605, + "logps/rejected": -1.3662245273590088, + "loss": 1.696, + "nll_loss": 1.6926769018173218, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.06161763519048691, + "rewards/margins": 0.00669359415769577, + "rewards/rejected": -0.06831122934818268, "step": 75 }, { - "epoch": 0.05965697240865026, - "grad_norm": 114.0363998413086, + "epoch": 0.5083399523431295, + "grad_norm": 27.679792404174805, "learning_rate": 4.000000000000001e-06, - "log_odds_chosen": 0.11052701622247696, - "log_odds_ratio": -0.6484851241111755, - "logits/chosen": 242.1932373046875, - "logits/rejected": 247.5641632080078, - "logps/chosen": -1.564343810081482, - "logps/rejected": -1.6490647792816162, - "loss": 2.0079, - "nll_loss": 1.964008092880249, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07821719348430634, - "rewards/margins": 0.004236044827848673, - "rewards/rejected": -0.08245324343442917, + "log_odds_chosen": 0.3312566876411438, + "log_odds_ratio": -0.6321982145309448, + "logits/chosen": 394.644775390625, + "logits/rejected": 373.4120788574219, + "logps/chosen": -1.1556816101074219, + "logps/rejected": -1.391071081161499, + "loss": 1.6105, + "nll_loss": 1.6470855474472046, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.057784080505371094, + "rewards/margins": 0.011769475415349007, + "rewards/rejected": -0.06955355405807495, "step": 80 }, { - "epoch": 0.0633855331841909, - "grad_norm": 224.0396728515625, + "epoch": 0.5401111993645751, + "grad_norm": 32.451473236083984, "learning_rate": 4.25e-06, - "log_odds_chosen": 0.5332654714584351, - "log_odds_ratio": -0.4968414902687073, - "logits/chosen": 335.633056640625, - "logits/rejected": 220.3801727294922, - "logps/chosen": -1.5125176906585693, - "logps/rejected": -1.9514904022216797, - "loss": 2.5581, - "nll_loss": 2.124267101287842, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07562588900327682, - "rewards/margins": 0.02194863185286522, - "rewards/rejected": -0.09757451713085175, + "log_odds_chosen": 0.20187363028526306, + "log_odds_ratio": -0.6924890279769897, + "logits/chosen": 396.11480712890625, + "logits/rejected": 401.170654296875, + "logps/chosen": -1.189259648323059, + "logps/rejected": -1.3549880981445312, + "loss": 1.6372, + "nll_loss": 1.544048547744751, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.05946297571063042, + "rewards/margins": 0.008286429569125175, + "rewards/rejected": -0.06774941086769104, "step": 85 }, { - "epoch": 0.06711409395973154, - "grad_norm": 65.78761291503906, + "epoch": 0.5718824463860207, + "grad_norm": 39.86790084838867, "learning_rate": 4.5e-06, - "log_odds_chosen": 0.10792939364910126, - "log_odds_ratio": -0.6557523012161255, - "logits/chosen": 270.37286376953125, - "logits/rejected": 204.8466796875, - "logps/chosen": -1.491753101348877, - "logps/rejected": -1.5502411127090454, - "loss": 2.2332, - "nll_loss": 2.3828036785125732, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.07458765804767609, - "rewards/margins": 0.0029244013130664825, - "rewards/rejected": -0.07751204818487167, + "log_odds_chosen": 0.030326470732688904, + "log_odds_ratio": -0.7561807036399841, + "logits/chosen": 386.7585754394531, + "logits/rejected": 383.3739013671875, + "logps/chosen": -1.1144773960113525, + "logps/rejected": -1.1401522159576416, + "loss": 1.4773, + "nll_loss": 1.4328895807266235, + "rewards/accuracies": 0.44999998807907104, + "rewards/chosen": -0.05572386458516121, + "rewards/margins": 0.001283742836676538, + "rewards/rejected": -0.05700760334730148, "step": 90 }, { - "epoch": 0.07084265473527218, - "grad_norm": 167.13412475585938, + "epoch": 0.6036536934074662, + "grad_norm": 47.055973052978516, "learning_rate": 4.75e-06, - "log_odds_chosen": -0.003233870957046747, - "log_odds_ratio": -0.6979271173477173, - "logits/chosen": 217.70120239257812, - "logits/rejected": 259.27044677734375, - "logps/chosen": -1.3917222023010254, - "logps/rejected": -1.3951622247695923, - "loss": 1.7876, - "nll_loss": 1.734521508216858, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06958611309528351, - "rewards/margins": 0.00017200336151290685, - "rewards/rejected": -0.0697581097483635, + "log_odds_chosen": -0.09162646532058716, + "log_odds_ratio": -0.8472088575363159, + "logits/chosen": 389.9097595214844, + "logits/rejected": 398.4985656738281, + "logps/chosen": -1.3111821413040161, + "logps/rejected": -1.2368316650390625, + "loss": 1.5514, + "nll_loss": 1.5718324184417725, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.06555911153554916, + "rewards/margins": -0.0037175267934799194, + "rewards/rejected": -0.061841584742069244, "step": 95 }, { - "epoch": 0.07457121551081283, - "grad_norm": 51.20694351196289, + "epoch": 0.6354249404289118, + "grad_norm": 37.49895477294922, "learning_rate": 5e-06, - "log_odds_chosen": 0.33344805240631104, - "log_odds_ratio": -0.5784560441970825, - "logits/chosen": 350.18475341796875, - "logits/rejected": 261.2807312011719, - "logps/chosen": -1.2394134998321533, - "logps/rejected": -1.4791924953460693, - "loss": 1.8536, - "nll_loss": 1.7119747400283813, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06197067350149155, - "rewards/margins": 0.011988953687250614, - "rewards/rejected": -0.07395963370800018, + "log_odds_chosen": 0.08311296999454498, + "log_odds_ratio": -0.8223379254341125, + "logits/chosen": 384.28240966796875, + "logits/rejected": 397.09820556640625, + "logps/chosen": -1.2262612581253052, + "logps/rejected": -1.2850096225738525, + "loss": 1.5995, + "nll_loss": 1.5745359659194946, + "rewards/accuracies": 0.5, + "rewards/chosen": -0.06131306290626526, + "rewards/margins": 0.0029374232981354, + "rewards/rejected": -0.06425048410892487, "step": 100 }, { - "epoch": 0.07829977628635347, - "grad_norm": 89.20001220703125, + "epoch": 0.6671961874503575, + "grad_norm": 41.63264846801758, "learning_rate": 4.8795003647426654e-06, - "log_odds_chosen": -0.2172829806804657, - "log_odds_ratio": -0.8197504281997681, - "logits/chosen": 289.9922180175781, - "logits/rejected": 210.4510040283203, - "logps/chosen": -1.2777154445648193, - "logps/rejected": -1.126091718673706, - "loss": 1.7674, - "nll_loss": 1.5219529867172241, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06388577073812485, - "rewards/margins": -0.007581187877804041, - "rewards/rejected": -0.05630458518862724, + "log_odds_chosen": 0.24810612201690674, + "log_odds_ratio": -0.6684719324111938, + "logits/chosen": 397.1859436035156, + "logits/rejected": 402.24468994140625, + "logps/chosen": -1.1126227378845215, + "logps/rejected": -1.2968095541000366, + "loss": 1.4715, + "nll_loss": 1.4355812072753906, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.0556311309337616, + "rewards/margins": 0.009209347888827324, + "rewards/rejected": -0.06484048068523407, "step": 105 }, { - "epoch": 0.08202833706189411, - "grad_norm": 66.20305633544922, + "epoch": 0.698967434471803, + "grad_norm": 496.76666259765625, "learning_rate": 4.767312946227961e-06, - "log_odds_chosen": 0.36778706312179565, - "log_odds_ratio": -0.5662060379981995, - "logits/chosen": 263.53118896484375, - "logits/rejected": 276.343994140625, - "logps/chosen": -1.6583328247070312, - "logps/rejected": -1.9768173694610596, - "loss": 1.7919, - "nll_loss": 2.0997543334960938, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.08291663974523544, - "rewards/margins": 0.01592421904206276, - "rewards/rejected": -0.0988408699631691, + "log_odds_chosen": 0.3310456871986389, + "log_odds_ratio": -0.6511000394821167, + "logits/chosen": 380.10076904296875, + "logits/rejected": 382.27056884765625, + "logps/chosen": -1.0597606897354126, + "logps/rejected": -1.2940515279769897, + "loss": 1.5519, + "nll_loss": 1.5879844427108765, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.05298804119229317, + "rewards/margins": 0.011714537627995014, + "rewards/rejected": -0.0647025778889656, "step": 110 }, { - "epoch": 0.08575689783743475, - "grad_norm": 38.97867202758789, + "epoch": 0.7307386814932486, + "grad_norm": 23.994157791137695, "learning_rate": 4.662524041201569e-06, - "log_odds_chosen": -0.5233656167984009, - "log_odds_ratio": -1.0624151229858398, - "logits/chosen": 257.26715087890625, - "logits/rejected": 346.6606140136719, - "logps/chosen": -1.4641997814178467, - "logps/rejected": -1.1092920303344727, - "loss": 1.7582, - "nll_loss": 2.0867035388946533, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07320999354124069, - "rewards/margins": -0.01774539239704609, - "rewards/rejected": -0.05546460300683975, + "log_odds_chosen": 0.26323679089546204, + "log_odds_ratio": -0.6691566705703735, + "logits/chosen": 380.04034423828125, + "logits/rejected": 367.25006103515625, + "logps/chosen": -1.07338285446167, + "logps/rejected": -1.2431564331054688, + "loss": 1.5679, + "nll_loss": 1.458599328994751, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.053669143468141556, + "rewards/margins": 0.008488676510751247, + "rewards/rejected": -0.06215781718492508, "step": 115 }, { - "epoch": 0.0894854586129754, - "grad_norm": 72.83590698242188, + "epoch": 0.7625099285146942, + "grad_norm": 20.148061752319336, "learning_rate": 4.564354645876385e-06, - "log_odds_chosen": 0.1867630034685135, - "log_odds_ratio": -0.635473906993866, - "logits/chosen": 257.12939453125, - "logits/rejected": 273.4212646484375, - "logps/chosen": -1.0779876708984375, - "logps/rejected": -1.214703917503357, - "loss": 1.6817, - "nll_loss": 1.9282290935516357, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053899385035037994, - "rewards/margins": 0.006835813634097576, - "rewards/rejected": -0.060735203325748444, + "log_odds_chosen": 0.2933538556098938, + "log_odds_ratio": -0.6565826535224915, + "logits/chosen": 406.1932067871094, + "logits/rejected": 407.4499816894531, + "logps/chosen": -1.0362493991851807, + "logps/rejected": -1.2259390354156494, + "loss": 1.4528, + "nll_loss": 1.3933870792388916, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.05181247740983963, + "rewards/margins": 0.009484483860433102, + "rewards/rejected": -0.06129695847630501, "step": 120 }, { - "epoch": 0.09321401938851603, - "grad_norm": 53.919158935546875, + "epoch": 0.7942811755361397, + "grad_norm": 35.251365661621094, "learning_rate": 4.47213595499958e-06, - "log_odds_chosen": -0.6173880696296692, - "log_odds_ratio": -1.0799881219863892, - "logits/chosen": 354.52215576171875, - "logits/rejected": 317.95318603515625, - "logps/chosen": -1.6679084300994873, - "logps/rejected": -1.2116508483886719, - "loss": 2.0518, - "nll_loss": 1.7800493240356445, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.08339542150497437, - "rewards/margins": -0.022812869399785995, - "rewards/rejected": -0.06058254837989807, + "log_odds_chosen": 0.2278643101453781, + "log_odds_ratio": -0.6969554424285889, + "logits/chosen": 411.54229736328125, + "logits/rejected": 392.1700744628906, + "logps/chosen": -1.151000738143921, + "logps/rejected": -1.2925688028335571, + "loss": 1.4981, + "nll_loss": 1.506788969039917, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.057550035417079926, + "rewards/margins": 0.00707840034738183, + "rewards/rejected": -0.06462844461202621, "step": 125 }, { - "epoch": 0.09694258016405667, - "grad_norm": 148.11685180664062, + "epoch": 0.8260524225575854, + "grad_norm": 33.39112091064453, "learning_rate": 4.385290096535147e-06, - "log_odds_chosen": -0.9434579610824585, - "log_odds_ratio": -1.3099195957183838, - "logits/chosen": 304.92437744140625, - "logits/rejected": 323.55010986328125, - "logps/chosen": -2.212212562561035, - "logps/rejected": -1.413895845413208, - "loss": 2.086, - "nll_loss": 2.250018358230591, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.11061062663793564, - "rewards/margins": -0.039915841072797775, - "rewards/rejected": -0.07069478929042816, + "log_odds_chosen": 0.1466347724199295, + "log_odds_ratio": -0.7478917837142944, + "logits/chosen": 414.90576171875, + "logits/rejected": 358.86920166015625, + "logps/chosen": -1.0797452926635742, + "logps/rejected": -1.1873576641082764, + "loss": 1.5469, + "nll_loss": 1.5067487955093384, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.05398727208375931, + "rewards/margins": 0.005380617920309305, + "rewards/rejected": -0.059367887675762177, "step": 130 }, { - "epoch": 0.10067114093959731, - "grad_norm": 48.90298843383789, + "epoch": 0.857823669579031, + "grad_norm": 19.592321395874023, "learning_rate": 4.303314829119352e-06, - "log_odds_chosen": 0.061207592487335205, - "log_odds_ratio": -0.6816572546958923, - "logits/chosen": 249.1715545654297, - "logits/rejected": 434.4105529785156, - "logps/chosen": -1.5163899660110474, - "logps/rejected": -1.553469181060791, - "loss": 1.7243, - "nll_loss": 1.705309271812439, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07581949979066849, - "rewards/margins": 0.001853963709436357, - "rewards/rejected": -0.07767346501350403, + "log_odds_chosen": 0.38857999444007874, + "log_odds_ratio": -0.6473441123962402, + "logits/chosen": 396.6593017578125, + "logits/rejected": 397.8945617675781, + "logps/chosen": -0.9662467837333679, + "logps/rejected": -1.2553133964538574, + "loss": 1.4694, + "nll_loss": 1.4183107614517212, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.04831233620643616, + "rewards/margins": 0.014453329145908356, + "rewards/rejected": -0.06276567280292511, "step": 135 }, { - "epoch": 0.10439970171513796, - "grad_norm": 64.9086685180664, + "epoch": 0.8895949166004765, + "grad_norm": 21.185075759887695, "learning_rate": 4.2257712736425835e-06, - "log_odds_chosen": -0.13451829552650452, - "log_odds_ratio": -0.8019693493843079, - "logits/chosen": 319.10736083984375, - "logits/rejected": 289.54827880859375, - "logps/chosen": -0.8691197633743286, - "logps/rejected": -0.82783442735672, - "loss": 1.7754, - "nll_loss": 1.4918982982635498, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04345599189400673, - "rewards/margins": -0.0020642676390707493, - "rewards/rejected": -0.04139172285795212, + "log_odds_chosen": 0.4796748161315918, + "log_odds_ratio": -0.6582080125808716, + "logits/chosen": 397.1627197265625, + "logits/rejected": 431.449951171875, + "logps/chosen": -1.0234482288360596, + "logps/rejected": -1.401524305343628, + "loss": 1.4705, + "nll_loss": 1.3904411792755127, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.051172398030757904, + "rewards/margins": 0.018903804942965508, + "rewards/rejected": -0.07007621228694916, "step": 140 }, { - "epoch": 0.1081282624906786, - "grad_norm": 53.79122543334961, + "epoch": 0.9213661636219221, + "grad_norm": 43.18369674682617, "learning_rate": 4.1522739926869985e-06, - "log_odds_chosen": -0.6043481826782227, - "log_odds_ratio": -1.0626015663146973, - "logits/chosen": 314.5655517578125, - "logits/rejected": 355.81402587890625, - "logps/chosen": -1.7926788330078125, - "logps/rejected": -1.2945716381072998, - "loss": 1.7818, - "nll_loss": 1.7832496166229248, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.08963393419981003, - "rewards/margins": -0.024905351921916008, - "rewards/rejected": -0.06472858786582947, + "log_odds_chosen": 0.20573845505714417, + "log_odds_ratio": -0.7000477910041809, + "logits/chosen": 411.5990295410156, + "logits/rejected": 409.30670166015625, + "logps/chosen": -1.0646284818649292, + "logps/rejected": -1.2127861976623535, + "loss": 1.4419, + "nll_loss": 1.4223747253417969, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.05323142558336258, + "rewards/margins": 0.007407893426716328, + "rewards/rejected": -0.060639314353466034, "step": 145 }, { - "epoch": 0.11185682326621924, - "grad_norm": 103.34313201904297, + "epoch": 0.9531374106433678, + "grad_norm": 32.28419876098633, "learning_rate": 4.082482904638631e-06, - "log_odds_chosen": -0.4338023066520691, - "log_odds_ratio": -1.0923845767974854, - "logits/chosen": 296.53668212890625, - "logits/rejected": 270.31048583984375, - "logps/chosen": -1.7469406127929688, - "logps/rejected": -1.4009050130844116, - "loss": 1.8981, - "nll_loss": 2.0452022552490234, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08734703063964844, - "rewards/margins": -0.0173017755150795, - "rewards/rejected": -0.07004524767398834, + "log_odds_chosen": 0.16876378655433655, + "log_odds_ratio": -0.7580657601356506, + "logits/chosen": 422.814208984375, + "logits/rejected": 394.9488220214844, + "logps/chosen": -1.0943793058395386, + "logps/rejected": -1.2127244472503662, + "loss": 1.4868, + "nll_loss": 1.514913558959961, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.054718971252441406, + "rewards/margins": 0.005917248781770468, + "rewards/rejected": -0.06063622236251831, "step": 150 }, { - "epoch": 0.11558538404175989, - "grad_norm": 139.6165771484375, + "epoch": 0.9849086576648134, + "grad_norm": 29.756258010864258, "learning_rate": 4.016096644512495e-06, - "log_odds_chosen": -0.22967422008514404, - "log_odds_ratio": -0.8180699348449707, - "logits/chosen": 308.9629211425781, - "logits/rejected": 294.52862548828125, - "logps/chosen": -1.7505199909210205, - "logps/rejected": -1.5576521158218384, - "loss": 1.7895, - "nll_loss": 1.9372638463974, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.08752600103616714, - "rewards/margins": -0.009643396362662315, - "rewards/rejected": -0.07788260281085968, + "log_odds_chosen": 0.24277754127979279, + "log_odds_ratio": -0.6817291378974915, + "logits/chosen": 382.361572265625, + "logits/rejected": 412.5480041503906, + "logps/chosen": -1.031072735786438, + "logps/rejected": -1.2335267066955566, + "loss": 1.4933, + "nll_loss": 1.3717305660247803, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0515536367893219, + "rewards/margins": 0.010122699663043022, + "rewards/rejected": -0.06167633458971977, "step": 155 }, { - "epoch": 0.11931394481730052, - "grad_norm": 116.25865936279297, + "epoch": 0.9976171564733916, + "eval_log_odds_chosen": 0.32397615909576416, + "eval_log_odds_ratio": -0.6557961106300354, + "eval_logits/chosen": 318.2524108886719, + "eval_logits/rejected": 307.1602478027344, + "eval_logps/chosen": -1.0023059844970703, + "eval_logps/rejected": -1.216626763343811, + "eval_loss": 1.4686377048492432, + "eval_nll_loss": 1.412723422050476, + "eval_rewards/accuracies": 0.577617347240448, + "eval_rewards/chosen": -0.050115302205085754, + "eval_rewards/margins": 0.010716039687395096, + "eval_rewards/rejected": -0.06083134561777115, + "eval_runtime": 278.9943, + "eval_samples_per_second": 1.982, + "eval_steps_per_second": 0.993, + "step": 157 + }, + { + "epoch": 1.016679904686259, + "grad_norm": 23.072566986083984, "learning_rate": 3.952847075210474e-06, - "log_odds_chosen": 0.03539223596453667, - "log_odds_ratio": -0.7288457155227661, - "logits/chosen": 262.11688232421875, - "logits/rejected": 344.2138977050781, - "logps/chosen": -1.3145208358764648, - "logps/rejected": -1.3181880712509155, - "loss": 1.8174, - "nll_loss": 1.916478157043457, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06572604179382324, - "rewards/margins": 0.00018336325592827052, - "rewards/rejected": -0.06590940058231354, + "log_odds_chosen": 0.27298638224601746, + "log_odds_ratio": -0.656934380531311, + "logits/chosen": 396.47760009765625, + "logits/rejected": 389.45477294921875, + "logps/chosen": -0.8926975131034851, + "logps/rejected": -1.0694555044174194, + "loss": 1.2664, + "nll_loss": 1.2796380519866943, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0446348711848259, + "rewards/margins": 0.008837896399199963, + "rewards/rejected": -0.053472768515348434, "step": 160 }, { - "epoch": 0.12304250559284116, - "grad_norm": 62.18147277832031, + "epoch": 1.0484511517077044, + "grad_norm": 37.10022735595703, "learning_rate": 3.892494720807615e-06, - "log_odds_chosen": 0.2947339415550232, - "log_odds_ratio": -0.558483898639679, - "logits/chosen": 296.03948974609375, - "logits/rejected": 271.2762756347656, - "logps/chosen": -1.169194221496582, - "logps/rejected": -1.3821598291397095, - "loss": 1.56, - "nll_loss": 1.6544113159179688, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05845970660448074, - "rewards/margins": 0.010648288764059544, - "rewards/rejected": -0.06910799443721771, + "log_odds_chosen": 1.2404422760009766, + "log_odds_ratio": -0.39491257071495056, + "logits/chosen": 401.09716796875, + "logits/rejected": 423.9908142089844, + "logps/chosen": -0.6766742467880249, + "logps/rejected": -1.383744478225708, + "loss": 1.082, + "nll_loss": 1.041725754737854, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.033833712339401245, + "rewards/margins": 0.03535350412130356, + "rewards/rejected": -0.0691872239112854, "step": 165 }, { - "epoch": 0.1267710663683818, - "grad_norm": 58.30064010620117, + "epoch": 1.0802223987291502, + "grad_norm": 19.41335105895996, "learning_rate": 3.834824944236852e-06, - "log_odds_chosen": 0.11295785754919052, - "log_odds_ratio": -0.7195954322814941, - "logits/chosen": 265.0535583496094, - "logits/rejected": 339.28814697265625, - "logps/chosen": -1.49436354637146, - "logps/rejected": -1.5810743570327759, - "loss": 1.8568, - "nll_loss": 1.840406060218811, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0747181847691536, - "rewards/margins": 0.004335532430559397, - "rewards/rejected": -0.07905371487140656, + "log_odds_chosen": 1.1259419918060303, + "log_odds_ratio": -0.42683523893356323, + "logits/chosen": 380.9306335449219, + "logits/rejected": 400.4065856933594, + "logps/chosen": -0.7301725149154663, + "logps/rejected": -1.4242193698883057, + "loss": 1.0335, + "nll_loss": 1.010096788406372, + "rewards/accuracies": 0.7875000238418579, + "rewards/chosen": -0.036508627235889435, + "rewards/margins": 0.03470234200358391, + "rewards/rejected": -0.07121096551418304, "step": 170 }, { - "epoch": 0.13049962714392244, - "grad_norm": 77.85432434082031, + "epoch": 1.1119936457505957, + "grad_norm": 33.67777633666992, "learning_rate": 3.7796447300922724e-06, - "log_odds_chosen": -0.2555588185787201, - "log_odds_ratio": -0.8658846616744995, - "logits/chosen": 255.42410278320312, - "logits/rejected": 375.9440002441406, - "logps/chosen": -1.324678897857666, - "logps/rejected": -1.1620999574661255, - "loss": 1.6704, - "nll_loss": 1.6739990711212158, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06623394787311554, - "rewards/margins": -0.00812895130366087, - "rewards/rejected": -0.058104999363422394, + "log_odds_chosen": 1.1387039422988892, + "log_odds_ratio": -0.389271080493927, + "logits/chosen": 397.97271728515625, + "logits/rejected": 384.86895751953125, + "logps/chosen": -0.7504047751426697, + "logps/rejected": -1.4629099369049072, + "loss": 1.1499, + "nll_loss": 1.1535775661468506, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.037520237267017365, + "rewards/margins": 0.03562526777386665, + "rewards/rejected": -0.07314550131559372, "step": 175 }, { - "epoch": 0.1342281879194631, - "grad_norm": 62.58670425415039, + "epoch": 1.1437648927720412, + "grad_norm": 28.816181182861328, "learning_rate": 3.72677996249965e-06, - "log_odds_chosen": 0.5823972225189209, - "log_odds_ratio": -0.4817780554294586, - "logits/chosen": 331.03826904296875, - "logits/rejected": 247.48464965820312, - "logps/chosen": -1.2510488033294678, - "logps/rejected": -1.6883567571640015, - "loss": 1.5885, - "nll_loss": 1.53799307346344, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06255243718624115, - "rewards/margins": 0.021865393966436386, - "rewards/rejected": -0.08441783487796783, + "log_odds_chosen": 0.9216247797012329, + "log_odds_ratio": -0.4554959833621979, + "logits/chosen": 383.74090576171875, + "logits/rejected": 390.2010498046875, + "logps/chosen": -0.7649926543235779, + "logps/rejected": -1.3056840896606445, + "loss": 1.1342, + "nll_loss": 1.0860129594802856, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.03824963420629501, + "rewards/margins": 0.027034565806388855, + "rewards/rejected": -0.06528420001268387, "step": 180 }, { - "epoch": 0.13795674869500374, - "grad_norm": 54.748451232910156, + "epoch": 1.175536139793487, + "grad_norm": 22.407983779907227, "learning_rate": 3.6760731104690393e-06, - "log_odds_chosen": 0.5397362112998962, - "log_odds_ratio": -0.5600816607475281, - "logits/chosen": 351.63714599609375, - "logits/rejected": 283.5084228515625, - "logps/chosen": -1.2479544878005981, - "logps/rejected": -1.5669915676116943, - "loss": 1.7736, - "nll_loss": 1.910321831703186, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06239772588014603, - "rewards/margins": 0.015951860696077347, - "rewards/rejected": -0.07834958285093307, + "log_odds_chosen": 1.1218883991241455, + "log_odds_ratio": -0.39646559953689575, + "logits/chosen": 377.808349609375, + "logits/rejected": 365.0235595703125, + "logps/chosen": -0.6293179988861084, + "logps/rejected": -1.2226511240005493, + "loss": 1.1139, + "nll_loss": 1.145390510559082, + "rewards/accuracies": 0.8500000238418579, + "rewards/chosen": -0.03146589919924736, + "rewards/margins": 0.029666652902960777, + "rewards/rejected": -0.061132557690143585, "step": 185 }, { - "epoch": 0.14168530947054436, - "grad_norm": 248.18423461914062, + "epoch": 1.2073073868149324, + "grad_norm": 21.72103500366211, "learning_rate": 3.6273812505500587e-06, - "log_odds_chosen": 0.25448402762413025, - "log_odds_ratio": -0.5890822410583496, - "logits/chosen": 259.63690185546875, - "logits/rejected": 348.0275573730469, - "logps/chosen": -1.3718206882476807, - "logps/rejected": -1.5867301225662231, - "loss": 1.9085, - "nll_loss": 2.138718605041504, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06859103590250015, - "rewards/margins": 0.010745471343398094, - "rewards/rejected": -0.0793365091085434, + "log_odds_chosen": 1.041015863418579, + "log_odds_ratio": -0.4681476950645447, + "logits/chosen": 411.81097412109375, + "logits/rejected": 402.80029296875, + "logps/chosen": -0.7096566557884216, + "logps/rejected": -1.3321136236190796, + "loss": 1.0785, + "nll_loss": 1.052976369857788, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03548283129930496, + "rewards/margins": 0.031122848391532898, + "rewards/rejected": -0.06660567224025726, "step": 190 }, { - "epoch": 0.14541387024608501, - "grad_norm": 163.2615203857422, + "epoch": 1.2390786338363782, + "grad_norm": 30.01280403137207, "learning_rate": 3.5805743701971648e-06, - "log_odds_chosen": 0.5804846882820129, - "log_odds_ratio": -0.5761188268661499, - "logits/chosen": 362.5606384277344, - "logits/rejected": 323.415283203125, - "logps/chosen": -1.30750572681427, - "logps/rejected": -1.7822011709213257, - "loss": 2.1105, - "nll_loss": 1.4836498498916626, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06537528336048126, - "rewards/margins": 0.023734772577881813, - "rewards/rejected": -0.08911006152629852, + "log_odds_chosen": 1.0631742477416992, + "log_odds_ratio": -0.3908316493034363, + "logits/chosen": 401.2222595214844, + "logits/rejected": 386.70068359375, + "logps/chosen": -0.6636060476303101, + "logps/rejected": -1.2677198648452759, + "loss": 1.0619, + "nll_loss": 1.0240118503570557, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.03318030759692192, + "rewards/margins": 0.030205685645341873, + "rewards/rejected": -0.0633859932422638, "step": 195 }, { - "epoch": 0.14914243102162567, - "grad_norm": 67.66429901123047, + "epoch": 1.2708498808578237, + "grad_norm": 27.974498748779297, "learning_rate": 3.5355339059327378e-06, - "log_odds_chosen": -0.09840559959411621, - "log_odds_ratio": -0.7696808576583862, - "logits/chosen": 292.17132568359375, - "logits/rejected": 350.8939514160156, - "logps/chosen": -1.4228382110595703, - "logps/rejected": -1.3467998504638672, - "loss": 1.6893, - "nll_loss": 2.027844190597534, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07114190608263016, - "rewards/margins": -0.0038019120693206787, - "rewards/rejected": -0.06733999401330948, + "log_odds_chosen": 0.97685307264328, + "log_odds_ratio": -0.43242964148521423, + "logits/chosen": 381.5447692871094, + "logits/rejected": 378.9277038574219, + "logps/chosen": -0.6805351972579956, + "logps/rejected": -1.2395280599594116, + "loss": 0.9719, + "nll_loss": 0.9232224225997925, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03402676433324814, + "rewards/margins": 0.027949640527367592, + "rewards/rejected": -0.06197641044855118, "step": 200 }, { - "epoch": 0.1528709917971663, - "grad_norm": 49.09300231933594, + "epoch": 1.3026211278792692, + "grad_norm": 20.539955139160156, "learning_rate": 3.4921514788478916e-06, - "log_odds_chosen": -0.05064551904797554, - "log_odds_ratio": -0.7766270637512207, - "logits/chosen": 283.8548278808594, - "logits/rejected": 396.5144958496094, - "logps/chosen": -1.3490222692489624, - "logps/rejected": -1.2980540990829468, - "loss": 1.6375, - "nll_loss": 1.6541990041732788, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.067451111972332, - "rewards/margins": -0.002548404736444354, - "rewards/rejected": -0.06490271538496017, + "log_odds_chosen": 1.0363399982452393, + "log_odds_ratio": -0.4161090850830078, + "logits/chosen": 394.122802734375, + "logits/rejected": 394.5590515136719, + "logps/chosen": -0.7591055631637573, + "logps/rejected": -1.369094967842102, + "loss": 1.1178, + "nll_loss": 1.1349411010742188, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.037955284118652344, + "rewards/margins": 0.03049946390092373, + "rewards/rejected": -0.06845474243164062, "step": 205 }, { - "epoch": 0.15659955257270694, - "grad_norm": 33.90250015258789, + "epoch": 1.3343923749007147, + "grad_norm": 17.045740127563477, "learning_rate": 3.450327796711771e-06, - "log_odds_chosen": -0.09521440416574478, - "log_odds_ratio": -0.797051191329956, - "logits/chosen": 353.33123779296875, - "logits/rejected": 341.5868835449219, - "logps/chosen": -1.3612937927246094, - "logps/rejected": -1.2704167366027832, - "loss": 1.7825, - "nll_loss": 1.6414436101913452, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06806468963623047, - "rewards/margins": -0.004543852061033249, - "rewards/rejected": -0.06352083384990692, + "log_odds_chosen": 1.0268789529800415, + "log_odds_ratio": -0.4292878210544586, + "logits/chosen": 388.66632080078125, + "logits/rejected": 399.6094970703125, + "logps/chosen": -0.6952003240585327, + "logps/rejected": -1.2499881982803345, + "loss": 1.0443, + "nll_loss": 0.9765409231185913, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.034760020673274994, + "rewards/margins": 0.02773938700556755, + "rewards/rejected": -0.062499403953552246, "step": 210 }, { - "epoch": 0.16032811334824756, - "grad_norm": 90.1456527709961, + "epoch": 1.3661636219221605, + "grad_norm": 18.652254104614258, "learning_rate": 3.409971697352368e-06, - "log_odds_chosen": -0.1627979427576065, - "log_odds_ratio": -0.814998984336853, - "logits/chosen": 330.3114318847656, - "logits/rejected": 322.73486328125, - "logps/chosen": -1.5525935888290405, - "logps/rejected": -1.429889440536499, - "loss": 1.7222, - "nll_loss": 1.991061806678772, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.0776296854019165, - "rewards/margins": -0.006135205272585154, - "rewards/rejected": -0.07149447500705719, + "log_odds_chosen": 1.1054929494857788, + "log_odds_ratio": -0.39855724573135376, + "logits/chosen": 404.5065002441406, + "logits/rejected": 406.90728759765625, + "logps/chosen": -0.615829348564148, + "logps/rejected": -1.1530225276947021, + "loss": 0.966, + "nll_loss": 0.9155328869819641, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.030791467055678368, + "rewards/margins": 0.02685965970158577, + "rewards/rejected": -0.057651132345199585, "step": 215 }, { - "epoch": 0.16405667412378822, - "grad_norm": 59.1326904296875, + "epoch": 1.397934868943606, + "grad_norm": 22.75200080871582, "learning_rate": 3.3709993123162106e-06, - "log_odds_chosen": 0.1278153955936432, - "log_odds_ratio": -0.6686308979988098, - "logits/chosen": 377.4311218261719, - "logits/rejected": 256.1131896972656, - "logps/chosen": -1.5023193359375, - "logps/rejected": -1.5602844953536987, - "loss": 1.7563, - "nll_loss": 1.657243013381958, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07511596381664276, - "rewards/margins": 0.002898257225751877, - "rewards/rejected": -0.07801422476768494, + "log_odds_chosen": 1.1173226833343506, + "log_odds_ratio": -0.39363011717796326, + "logits/chosen": 395.583251953125, + "logits/rejected": 389.8007507324219, + "logps/chosen": -0.6299307942390442, + "logps/rejected": -1.1920979022979736, + "loss": 1.0238, + "nll_loss": 1.0118134021759033, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.03149653971195221, + "rewards/margins": 0.02810835838317871, + "rewards/rejected": -0.05960489436984062, "step": 220 }, { - "epoch": 0.16778523489932887, - "grad_norm": 37.11774444580078, + "epoch": 1.4297061159650517, + "grad_norm": 35.366146087646484, "learning_rate": 3.3333333333333333e-06, - "log_odds_chosen": 0.1853155642747879, - "log_odds_ratio": -0.6285178065299988, - "logits/chosen": 260.8616027832031, - "logits/rejected": 330.13543701171875, - "logps/chosen": -1.6330864429473877, - "logps/rejected": -1.7959448099136353, - "loss": 1.7641, - "nll_loss": 1.6786426305770874, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.08165432512760162, - "rewards/margins": 0.008142916485667229, - "rewards/rejected": -0.089797243475914, + "log_odds_chosen": 1.0686665773391724, + "log_odds_ratio": -0.42429256439208984, + "logits/chosen": 380.085693359375, + "logits/rejected": 382.55035400390625, + "logps/chosen": -0.703887403011322, + "logps/rejected": -1.3395355939865112, + "loss": 1.0705, + "nll_loss": 0.9919706583023071, + "rewards/accuracies": 0.75, + "rewards/chosen": -0.03519437089562416, + "rewards/margins": 0.03178241103887558, + "rewards/rejected": -0.06697677820920944, "step": 225 }, { - "epoch": 0.1715137956748695, - "grad_norm": 61.218482971191406, + "epoch": 1.4614773629864972, + "grad_norm": 15.779678344726562, "learning_rate": 3.296902366978936e-06, - "log_odds_chosen": -0.07680871337652206, - "log_odds_ratio": -0.7396112680435181, - "logits/chosen": 344.4859924316406, - "logits/rejected": 308.43035888671875, - "logps/chosen": -1.4502063989639282, - "logps/rejected": -1.3879715204238892, - "loss": 1.7551, - "nll_loss": 1.920217752456665, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07251031696796417, - "rewards/margins": -0.0031117431353777647, - "rewards/rejected": -0.06939857453107834, + "log_odds_chosen": 0.9577558636665344, + "log_odds_ratio": -0.4354848265647888, + "logits/chosen": 390.24658203125, + "logits/rejected": 427.69805908203125, + "logps/chosen": -0.6856449842453003, + "logps/rejected": -1.2423439025878906, + "loss": 1.0161, + "nll_loss": 1.042870283126831, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.034282244741916656, + "rewards/margins": 0.027834951877593994, + "rewards/rejected": -0.06211719661951065, "step": 230 }, { - "epoch": 0.17524235645041014, - "grad_norm": 77.78043365478516, + "epoch": 1.4932486100079427, + "grad_norm": 15.136199951171875, "learning_rate": 3.2616403652672114e-06, - "log_odds_chosen": -0.5430160164833069, - "log_odds_ratio": -1.0225481986999512, - "logits/chosen": 266.37896728515625, - "logits/rejected": 334.75946044921875, - "logps/chosen": -1.344204306602478, - "logps/rejected": -0.987316906452179, - "loss": 1.8505, - "nll_loss": 1.8181025981903076, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.06721021980047226, - "rewards/margins": -0.017844373360276222, - "rewards/rejected": -0.049365848302841187, + "log_odds_chosen": 0.9250560998916626, + "log_odds_ratio": -0.4900631904602051, + "logits/chosen": 376.62750244140625, + "logits/rejected": 379.6344299316406, + "logps/chosen": -0.7374454140663147, + "logps/rejected": -1.2676187753677368, + "loss": 1.1053, + "nll_loss": 1.0843344926834106, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.036872267723083496, + "rewards/margins": 0.026508668437600136, + "rewards/rejected": -0.06338094174861908, "step": 235 }, { - "epoch": 0.1789709172259508, - "grad_norm": 110.16991424560547, + "epoch": 1.5250198570293882, + "grad_norm": 18.867752075195312, "learning_rate": 3.2274861218395142e-06, - "log_odds_chosen": 0.2322903424501419, - "log_odds_ratio": -0.6068936586380005, - "logits/chosen": 311.11181640625, - "logits/rejected": 343.5069580078125, - "logps/chosen": -1.0900825262069702, - "logps/rejected": -1.2500512599945068, - "loss": 1.6734, - "nll_loss": 1.8383229970932007, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05450413376092911, - "rewards/margins": 0.007998432032763958, - "rewards/rejected": -0.06250257045030594, + "log_odds_chosen": 1.0353556871414185, + "log_odds_ratio": -0.4396829605102539, + "logits/chosen": 395.94573974609375, + "logits/rejected": 393.00555419921875, + "logps/chosen": -0.7767394185066223, + "logps/rejected": -1.3642452955245972, + "loss": 1.1411, + "nll_loss": 1.2226094007492065, + "rewards/accuracies": 0.7250000238418579, + "rewards/chosen": -0.03883696720004082, + "rewards/margins": 0.02937529981136322, + "rewards/rejected": -0.06821225583553314, "step": 240 }, { - "epoch": 0.18269947800149142, - "grad_norm": 40.92695617675781, + "epoch": 1.556791104050834, + "grad_norm": 17.112279891967773, "learning_rate": 3.1943828249997e-06, - "log_odds_chosen": 0.06262041628360748, - "log_odds_ratio": -0.6643093824386597, - "logits/chosen": 274.20391845703125, - "logits/rejected": 372.626220703125, - "logps/chosen": -1.2779630422592163, - "logps/rejected": -1.327146291732788, - "loss": 1.685, - "nll_loss": 1.5228652954101562, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06389816105365753, - "rewards/margins": 0.0024591602850705385, - "rewards/rejected": -0.0663573145866394, + "log_odds_chosen": 0.8108429908752441, + "log_odds_ratio": -0.5097193717956543, + "logits/chosen": 373.17376708984375, + "logits/rejected": 381.7115783691406, + "logps/chosen": -0.7546018362045288, + "logps/rejected": -1.2042465209960938, + "loss": 1.0514, + "nll_loss": 1.0946916341781616, + "rewards/accuracies": 0.737500011920929, + "rewards/chosen": -0.03773009032011032, + "rewards/margins": 0.022482234984636307, + "rewards/rejected": -0.06021232530474663, "step": 245 }, { - "epoch": 0.18642803877703207, - "grad_norm": 62.30152893066406, + "epoch": 1.5885623510722797, + "grad_norm": 16.28799819946289, "learning_rate": 3.1622776601683796e-06, - "log_odds_chosen": 0.16150760650634766, - "log_odds_ratio": -0.7118579149246216, - "logits/chosen": 259.01483154296875, - "logits/rejected": 345.0381774902344, - "logps/chosen": -0.9796780347824097, - "logps/rejected": -1.1620023250579834, - "loss": 1.7592, - "nll_loss": 1.6112806797027588, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04898390546441078, - "rewards/margins": 0.009116211906075478, - "rewards/rejected": -0.05810011550784111, + "log_odds_chosen": 1.0346342325210571, + "log_odds_ratio": -0.4391708970069885, + "logits/chosen": 413.6966247558594, + "logits/rejected": 385.60546875, + "logps/chosen": -0.6579457521438599, + "logps/rejected": -1.2464921474456787, + "loss": 1.0729, + "nll_loss": 1.0166854858398438, + "rewards/accuracies": 0.7749999761581421, + "rewards/chosen": -0.032897286117076874, + "rewards/margins": 0.02942732349038124, + "rewards/rejected": -0.06232461333274841, "step": 250 }, { - "epoch": 0.19015659955257272, - "grad_norm": 71.03755950927734, + "epoch": 1.6203335980937252, + "grad_norm": 16.664676666259766, "learning_rate": 3.131121455425748e-06, - "log_odds_chosen": -0.0898306593298912, - "log_odds_ratio": -0.7973640561103821, - "logits/chosen": 300.42340087890625, - "logits/rejected": 282.3985290527344, - "logps/chosen": -1.7017714977264404, - "logps/rejected": -1.5805952548980713, - "loss": 1.6074, - "nll_loss": 1.9174150228500366, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0850885882973671, - "rewards/margins": -0.006058814935386181, - "rewards/rejected": -0.07902976870536804, + "log_odds_chosen": 0.9674752950668335, + "log_odds_ratio": -0.45131349563598633, + "logits/chosen": 403.8053894042969, + "logits/rejected": 375.2102966308594, + "logps/chosen": -0.7002569437026978, + "logps/rejected": -1.2120827436447144, + "loss": 1.0503, + "nll_loss": 1.0049232244491577, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.03501284867525101, + "rewards/margins": 0.0255912933498621, + "rewards/rejected": -0.060604143887758255, "step": 255 }, { - "epoch": 0.19388516032811334, - "grad_norm": 61.416507720947266, + "epoch": 1.6521048451151708, + "grad_norm": 18.00334358215332, "learning_rate": 3.1008683647302113e-06, - "log_odds_chosen": 0.26187822222709656, - "log_odds_ratio": -0.5910924673080444, - "logits/chosen": 287.07330322265625, - "logits/rejected": 318.9379577636719, - "logps/chosen": -0.9035602807998657, - "logps/rejected": -1.054553747177124, - "loss": 1.7412, - "nll_loss": 1.5014116764068604, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04517801105976105, - "rewards/margins": 0.00754967238754034, - "rewards/rejected": -0.05272768810391426, + "log_odds_chosen": 1.1998499631881714, + "log_odds_ratio": -0.38401293754577637, + "logits/chosen": 387.16314697265625, + "logits/rejected": 405.91461181640625, + "logps/chosen": -0.7084919810295105, + "logps/rejected": -1.3958265781402588, + "loss": 1.0121, + "nll_loss": 0.9756923913955688, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.035424597561359406, + "rewards/margins": 0.034366730600595474, + "rewards/rejected": -0.06979133188724518, "step": 260 }, { - "epoch": 0.197613721103654, - "grad_norm": 36.60934066772461, + "epoch": 1.6838760921366163, + "grad_norm": 16.293354034423828, "learning_rate": 3.0714755841697565e-06, - "log_odds_chosen": 0.4029797613620758, - "log_odds_ratio": -0.5314388871192932, - "logits/chosen": 288.8212585449219, - "logits/rejected": 330.1380920410156, - "logps/chosen": -1.100858449935913, - "logps/rejected": -1.3479360342025757, - "loss": 1.6105, - "nll_loss": 1.2334349155426025, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05504292994737625, - "rewards/margins": 0.012353872880339622, - "rewards/rejected": -0.06739680469036102, + "log_odds_chosen": 0.8822715878486633, + "log_odds_ratio": -0.44727301597595215, + "logits/chosen": 388.2609558105469, + "logits/rejected": 401.40472412109375, + "logps/chosen": -0.6763411164283752, + "logps/rejected": -1.164041519165039, + "loss": 1.0175, + "nll_loss": 1.0139930248260498, + "rewards/accuracies": 0.824999988079071, + "rewards/chosen": -0.03381705284118652, + "rewards/margins": 0.02438502386212349, + "rewards/rejected": -0.05820208042860031, "step": 265 }, { - "epoch": 0.20134228187919462, - "grad_norm": 85.45577239990234, + "epoch": 1.715647339158062, + "grad_norm": 16.327428817749023, "learning_rate": 3.0429030972509227e-06, - "log_odds_chosen": 0.39814701676368713, - "log_odds_ratio": -0.5613337159156799, - "logits/chosen": 316.4441833496094, - "logits/rejected": 332.3796691894531, - "logps/chosen": -1.053895354270935, - "logps/rejected": -1.2830650806427002, - "loss": 1.9288, - "nll_loss": 1.5248372554779053, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05269476771354675, - "rewards/margins": 0.011458488181233406, - "rewards/rejected": -0.06415325403213501, + "log_odds_chosen": 0.8710346221923828, + "log_odds_ratio": -0.47604647278785706, + "logits/chosen": 409.24114990234375, + "logits/rejected": 395.74285888671875, + "logps/chosen": -0.7325721383094788, + "logps/rejected": -1.2142250537872314, + "loss": 1.0611, + "nll_loss": 1.0560299158096313, + "rewards/accuracies": 0.762499988079071, + "rewards/chosen": -0.036628607660532, + "rewards/margins": 0.024082642048597336, + "rewards/rejected": -0.06071125343441963, "step": 270 }, { - "epoch": 0.20507084265473527, - "grad_norm": 140.5638885498047, + "epoch": 1.7474185861795075, + "grad_norm": 18.973796844482422, "learning_rate": 3.0151134457776365e-06, - "log_odds_chosen": 1.2600421541719697e-05, - "log_odds_ratio": -0.6980763673782349, - "logits/chosen": 310.30523681640625, - "logits/rejected": 371.6546630859375, - "logps/chosen": -1.4998719692230225, - "logps/rejected": -1.4954092502593994, - "loss": 1.8764, - "nll_loss": 1.6765997409820557, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07499360293149948, - "rewards/margins": -0.0002231396792922169, - "rewards/rejected": -0.07477046549320221, + "log_odds_chosen": 1.0033023357391357, + "log_odds_ratio": -0.441061407327652, + "logits/chosen": 389.3612365722656, + "logits/rejected": 402.40740966796875, + "logps/chosen": -0.809950053691864, + "logps/rejected": -1.4129191637039185, + "loss": 1.0708, + "nll_loss": 1.0950496196746826, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.04049750417470932, + "rewards/margins": 0.030148455873131752, + "rewards/rejected": -0.07064596563577652, "step": 275 }, { - "epoch": 0.20879940343027592, - "grad_norm": 44.8569221496582, + "epoch": 1.7791898332009533, + "grad_norm": 16.511871337890625, "learning_rate": 2.988071523335984e-06, - "log_odds_chosen": 0.17957556247711182, - "log_odds_ratio": -0.6337924003601074, - "logits/chosen": 345.49163818359375, - "logits/rejected": 298.91046142578125, - "logps/chosen": -1.1268608570098877, - "logps/rejected": -1.2587931156158447, - "loss": 1.4873, - "nll_loss": 1.4705688953399658, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.056343041360378265, - "rewards/margins": 0.006596612278372049, - "rewards/rejected": -0.06293965876102448, + "log_odds_chosen": 1.0175421237945557, + "log_odds_ratio": -0.42562809586524963, + "logits/chosen": 379.0143737792969, + "logits/rejected": 409.40411376953125, + "logps/chosen": -0.750076413154602, + "logps/rejected": -1.3597370386123657, + "loss": 1.1496, + "nll_loss": 1.0727407932281494, + "rewards/accuracies": 0.8125, + "rewards/chosen": -0.03750381991267204, + "rewards/margins": 0.030483026057481766, + "rewards/rejected": -0.0679868534207344, "step": 280 }, { - "epoch": 0.21252796420581654, - "grad_norm": 44.751277923583984, + "epoch": 1.8109610802223988, + "grad_norm": 16.760597229003906, "learning_rate": 2.961744388795462e-06, - "log_odds_chosen": 0.5632280111312866, - "log_odds_ratio": -0.4795703887939453, - "logits/chosen": 319.538330078125, - "logits/rejected": 337.0677795410156, - "logps/chosen": -1.4938560724258423, - "logps/rejected": -1.9443782567977905, - "loss": 1.7018, - "nll_loss": 1.8579524755477905, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.07469280064105988, - "rewards/margins": 0.022526118904352188, - "rewards/rejected": -0.09721891582012177, + "log_odds_chosen": 0.8824595212936401, + "log_odds_ratio": -0.5069230198860168, + "logits/chosen": 411.53924560546875, + "logits/rejected": 398.7159118652344, + "logps/chosen": -0.8067032098770142, + "logps/rejected": -1.3228529691696167, + "loss": 1.0232, + "nll_loss": 1.0439965724945068, + "rewards/accuracies": 0.7124999761581421, + "rewards/chosen": -0.04033515974879265, + "rewards/margins": 0.025807490572333336, + "rewards/rejected": -0.06614264845848083, "step": 285 }, { - "epoch": 0.2162565249813572, - "grad_norm": 77.22563934326172, + "epoch": 1.8427323272438443, + "grad_norm": 17.98776626586914, "learning_rate": 2.9361010975735177e-06, - "log_odds_chosen": 0.13184069097042084, - "log_odds_ratio": -0.725172221660614, - "logits/chosen": 297.5882263183594, - "logits/rejected": 260.326416015625, - "logps/chosen": -0.8922001123428345, - "logps/rejected": -0.9390857815742493, - "loss": 1.7045, - "nll_loss": 1.6197850704193115, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.044610004872083664, - "rewards/margins": 0.002344283740967512, - "rewards/rejected": -0.04695428907871246, + "log_odds_chosen": 1.003203272819519, + "log_odds_ratio": -0.4108172357082367, + "logits/chosen": 396.21820068359375, + "logits/rejected": 378.0450134277344, + "logps/chosen": -0.7365471720695496, + "logps/rejected": -1.3615410327911377, + "loss": 1.1144, + "nll_loss": 1.0785415172576904, + "rewards/accuracies": 0.875, + "rewards/chosen": -0.03682735934853554, + "rewards/margins": 0.031249692663550377, + "rewards/rejected": -0.06807705014944077, "step": 290 }, { - "epoch": 0.21998508575689785, - "grad_norm": 63.429115295410156, + "epoch": 1.8745035742652898, + "grad_norm": 15.520238876342773, "learning_rate": 2.9111125486979104e-06, - "log_odds_chosen": 0.1062239408493042, - "log_odds_ratio": -0.6641025543212891, - "logits/chosen": 442.32476806640625, - "logits/rejected": 293.81756591796875, - "logps/chosen": -1.2926907539367676, - "logps/rejected": -1.3611558675765991, - "loss": 1.6662, - "nll_loss": 1.4464445114135742, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0646345466375351, - "rewards/margins": 0.0034232556354254484, - "rewards/rejected": -0.06805779784917831, + "log_odds_chosen": 0.9161802530288696, + "log_odds_ratio": -0.459242582321167, + "logits/chosen": 410.2061462402344, + "logits/rejected": 410.3428649902344, + "logps/chosen": -0.7132889628410339, + "logps/rejected": -1.2278392314910889, + "loss": 1.0323, + "nll_loss": 1.0033968687057495, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.03566444665193558, + "rewards/margins": 0.025727516040205956, + "rewards/rejected": -0.061391960829496384, "step": 295 }, { - "epoch": 0.22371364653243847, - "grad_norm": 36.564453125, + "epoch": 1.9062748212867355, + "grad_norm": 16.573244094848633, "learning_rate": 2.8867513459481293e-06, - "log_odds_chosen": 0.423289954662323, - "log_odds_ratio": -0.5702893137931824, - "logits/chosen": 316.58404541015625, - "logits/rejected": 328.70166015625, - "logps/chosen": -1.165248155593872, - "logps/rejected": -1.3282543420791626, - "loss": 1.7803, - "nll_loss": 1.9016978740692139, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0582624152302742, - "rewards/margins": 0.008150309324264526, - "rewards/rejected": -0.06641272455453873, + "log_odds_chosen": 0.9094133377075195, + "log_odds_ratio": -0.45279788970947266, + "logits/chosen": 386.76165771484375, + "logits/rejected": 400.73443603515625, + "logps/chosen": -0.7042160034179688, + "logps/rejected": -1.1693499088287354, + "loss": 1.0304, + "nll_loss": 1.048201560974121, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.03521079570055008, + "rewards/margins": 0.0232566986232996, + "rewards/rejected": -0.05846749618649483, "step": 300 }, { - "epoch": 0.22744220730797912, - "grad_norm": 36.413822174072266, + "epoch": 1.938046068308181, + "grad_norm": 22.685237884521484, "learning_rate": 2.862991671569341e-06, - "log_odds_chosen": 0.13049376010894775, - "log_odds_ratio": -0.670722484588623, - "logits/chosen": 269.01959228515625, - "logits/rejected": 280.21246337890625, - "logps/chosen": -1.2154343128204346, - "logps/rejected": -1.2640982866287231, - "loss": 1.5373, - "nll_loss": 1.3931777477264404, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06077171489596367, - "rewards/margins": 0.0024332008324563503, - "rewards/rejected": -0.06320490688085556, + "log_odds_chosen": 0.9926900863647461, + "log_odds_ratio": -0.41787558794021606, + "logits/chosen": 413.96343994140625, + "logits/rejected": 387.29730224609375, + "logps/chosen": -0.6416295170783997, + "logps/rejected": -1.1413004398345947, + "loss": 0.988, + "nll_loss": 0.9365374445915222, + "rewards/accuracies": 0.800000011920929, + "rewards/chosen": -0.032081473618745804, + "rewards/margins": 0.024983543902635574, + "rewards/rejected": -0.05706502124667168, "step": 305 }, { - "epoch": 0.23117076808351977, - "grad_norm": 48.64401626586914, + "epoch": 1.9698173153296268, + "grad_norm": 17.650312423706055, "learning_rate": 2.839809171235324e-06, - "log_odds_chosen": 0.2653113603591919, - "log_odds_ratio": -0.6074713468551636, - "logits/chosen": 334.58282470703125, - "logits/rejected": 329.324462890625, - "logps/chosen": -1.1074600219726562, - "logps/rejected": -1.2625815868377686, - "loss": 1.5326, - "nll_loss": 1.534833550453186, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.055372994393110275, - "rewards/margins": 0.007756076753139496, - "rewards/rejected": -0.06312907487154007, + "log_odds_chosen": 0.936631977558136, + "log_odds_ratio": -0.4232844412326813, + "logits/chosen": 384.41680908203125, + "logits/rejected": 393.4200439453125, + "logps/chosen": -0.7465909719467163, + "logps/rejected": -1.3220902681350708, + "loss": 1.036, + "nll_loss": 1.0644495487213135, + "rewards/accuracies": 0.8374999761581421, + "rewards/chosen": -0.037329547107219696, + "rewards/margins": 0.028774961829185486, + "rewards/rejected": -0.06610451638698578, "step": 310 }, { - "epoch": 0.2348993288590604, - "grad_norm": 41.79473876953125, + "epoch": 1.995234312946783, + "eval_log_odds_chosen": 0.34870076179504395, + "eval_log_odds_ratio": -0.6608841419219971, + "eval_logits/chosen": 312.93048095703125, + "eval_logits/rejected": 302.5973815917969, + "eval_logps/chosen": -0.9867467880249023, + "eval_logps/rejected": -1.2230703830718994, + "eval_loss": 1.419357180595398, + "eval_nll_loss": 1.3670318126678467, + "eval_rewards/accuracies": 0.5667870044708252, + "eval_rewards/chosen": -0.04933733493089676, + "eval_rewards/margins": 0.011816184036433697, + "eval_rewards/rejected": -0.06115352362394333, + "eval_runtime": 278.7523, + "eval_samples_per_second": 1.984, + "eval_steps_per_second": 0.994, + "step": 314 + }, + { + "epoch": 2.0015885623510723, + "grad_norm": 16.02027702331543, "learning_rate": 2.817180849095055e-06, - "log_odds_chosen": 0.915366530418396, - "log_odds_ratio": -0.3970385491847992, - "logits/chosen": 249.0862579345703, - "logits/rejected": 339.94757080078125, - "logps/chosen": -1.102460503578186, - "logps/rejected": -1.793702483177185, - "loss": 1.5845, - "nll_loss": 1.5702228546142578, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05512302368879318, - "rewards/margins": 0.03456209599971771, - "rewards/rejected": -0.0896851196885109, + "log_odds_chosen": 1.0939061641693115, + "log_odds_ratio": -0.39633578062057495, + "logits/chosen": 402.9229431152344, + "logits/rejected": 395.6060791015625, + "logps/chosen": -0.7204245924949646, + "logps/rejected": -1.3800289630889893, + "loss": 1.0066, + "nll_loss": 0.9838098287582397, + "rewards/accuracies": 0.862500011920929, + "rewards/chosen": -0.03602122515439987, + "rewards/margins": 0.032980211079120636, + "rewards/rejected": -0.0690014436841011, "step": 315 }, { - "epoch": 0.23862788963460105, - "grad_norm": 55.401554107666016, + "epoch": 2.033359809372518, + "grad_norm": 18.79454803466797, "learning_rate": 2.7950849718747376e-06, - "log_odds_chosen": 0.3789331912994385, - "log_odds_ratio": -0.539412796497345, - "logits/chosen": 298.9725646972656, - "logits/rejected": 265.35821533203125, - "logps/chosen": -0.9812465906143188, - "logps/rejected": -1.1424061059951782, - "loss": 1.757, - "nll_loss": 1.7272613048553467, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0490623340010643, - "rewards/margins": 0.008057966828346252, - "rewards/rejected": -0.05712030082941055, + "log_odds_chosen": 2.4993255138397217, + "log_odds_ratio": -0.16073401272296906, + "logits/chosen": 368.0367736816406, + "logits/rejected": 383.66949462890625, + "logps/chosen": -0.36704158782958984, + "logps/rejected": -1.6748905181884766, + "loss": 0.5746, + "nll_loss": 0.5446859002113342, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.018352080136537552, + "rewards/margins": 0.06539243459701538, + "rewards/rejected": -0.08374451845884323, "step": 320 }, { - "epoch": 0.24235645041014167, - "grad_norm": 110.39041900634766, + "epoch": 2.0651310563939633, + "grad_norm": 15.766267776489258, "learning_rate": 2.773500981126146e-06, - "log_odds_chosen": -0.051452331244945526, - "log_odds_ratio": -0.7377279996871948, - "logits/chosen": 354.8937072753906, - "logits/rejected": 272.44512939453125, - "logps/chosen": -1.2038339376449585, - "logps/rejected": -1.13150155544281, - "loss": 1.757, - "nll_loss": 1.9262282848358154, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.060191698372364044, - "rewards/margins": -0.0036166186910122633, - "rewards/rejected": -0.05657507851719856, + "log_odds_chosen": 2.2665276527404785, + "log_odds_ratio": -0.1551036536693573, + "logits/chosen": 406.095703125, + "logits/rejected": 392.67034912109375, + "logps/chosen": -0.319181352853775, + "logps/rejected": -1.3770430088043213, + "loss": 0.6228, + "nll_loss": 0.5448837280273438, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01595906727015972, + "rewards/margins": 0.05289308354258537, + "rewards/rejected": -0.06885214149951935, "step": 325 }, { - "epoch": 0.24608501118568232, - "grad_norm": 281.7074890136719, + "epoch": 2.096902303415409, + "grad_norm": 14.977751731872559, "learning_rate": 2.752409412815902e-06, - "log_odds_chosen": 0.015334022231400013, - "log_odds_ratio": -0.778980016708374, - "logits/chosen": 367.615234375, - "logits/rejected": 328.98260498046875, - "logps/chosen": -1.055513620376587, - "logps/rejected": -1.0729763507843018, - "loss": 1.6971, - "nll_loss": 1.1969518661499023, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.052775681018829346, - "rewards/margins": 0.0008731361594982445, - "rewards/rejected": -0.05364881828427315, + "log_odds_chosen": 2.5944995880126953, + "log_odds_ratio": -0.12087088823318481, + "logits/chosen": 390.13653564453125, + "logits/rejected": 381.3854064941406, + "logps/chosen": -0.35282859206199646, + "logps/rejected": -1.6678215265274048, + "loss": 0.5863, + "nll_loss": 0.6006873846054077, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017641428858041763, + "rewards/margins": 0.0657496452331543, + "rewards/rejected": -0.08339107036590576, "step": 330 }, { - "epoch": 0.24981357196122297, - "grad_norm": 47.77407455444336, + "epoch": 2.128673550436855, + "grad_norm": 11.052816390991211, "learning_rate": 2.7317918235407652e-06, - "log_odds_chosen": 0.538671612739563, - "log_odds_ratio": -0.6098898649215698, - "logits/chosen": 325.3160095214844, - "logits/rejected": 305.4865417480469, - "logps/chosen": -1.131415605545044, - "logps/rejected": -1.36110520362854, - "loss": 1.6656, - "nll_loss": 1.8968321084976196, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05657077580690384, - "rewards/margins": 0.011484487913548946, - "rewards/rejected": -0.06805525720119476, + "log_odds_chosen": 2.7388923168182373, + "log_odds_ratio": -0.11470385640859604, + "logits/chosen": 373.2349548339844, + "logits/rejected": 372.8748474121094, + "logps/chosen": -0.34725895524024963, + "logps/rejected": -1.7965190410614014, + "loss": 0.5343, + "nll_loss": 0.5674458146095276, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.01736294850707054, + "rewards/margins": 0.07246299088001251, + "rewards/rejected": -0.08982594311237335, "step": 335 }, { - "epoch": 0.2535421327367636, - "grad_norm": 35.23573684692383, + "epoch": 2.1604447974583003, + "grad_norm": 13.367264747619629, "learning_rate": 2.711630722733202e-06, - "log_odds_chosen": 0.32363080978393555, - "log_odds_ratio": -0.5865469574928284, - "logits/chosen": 341.9852600097656, - "logits/rejected": 267.70391845703125, - "logps/chosen": -1.0992166996002197, - "logps/rejected": -1.2794673442840576, - "loss": 1.5265, - "nll_loss": 1.3950005769729614, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.054960835725069046, - "rewards/margins": 0.009012536145746708, - "rewards/rejected": -0.06397336721420288, + "log_odds_chosen": 2.605971336364746, + "log_odds_ratio": -0.1160891056060791, + "logits/chosen": 375.6283874511719, + "logits/rejected": 366.3837890625, + "logps/chosen": -0.3731013238430023, + "logps/rejected": -1.7575727701187134, + "loss": 0.5037, + "nll_loss": 0.5094397664070129, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018655067309737206, + "rewards/margins": 0.0692235678434372, + "rewards/rejected": -0.08787862956523895, "step": 340 }, { - "epoch": 0.25727069351230425, - "grad_norm": 66.05382537841797, + "epoch": 2.192216044479746, + "grad_norm": 15.93548583984375, "learning_rate": 2.691909510290828e-06, - "log_odds_chosen": -0.027811408042907715, - "log_odds_ratio": -0.7312370538711548, - "logits/chosen": 292.13934326171875, - "logits/rejected": 275.47589111328125, - "logps/chosen": -1.0181611776351929, - "logps/rejected": -1.0466936826705933, - "loss": 1.699, - "nll_loss": 1.7770719528198242, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.050908058881759644, - "rewards/margins": 0.0014266285579651594, - "rewards/rejected": -0.05233468860387802, + "log_odds_chosen": 2.464402675628662, + "log_odds_ratio": -0.13156327605247498, + "logits/chosen": 368.25299072265625, + "logits/rejected": 391.8240966796875, + "logps/chosen": -0.35556745529174805, + "logps/rejected": -1.6176868677139282, + "loss": 0.5562, + "nll_loss": 0.5545670986175537, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017778372392058372, + "rewards/margins": 0.0631059780716896, + "rewards/rejected": -0.08088434487581253, "step": 345 }, { - "epoch": 0.2609992542878449, - "grad_norm": 52.86804962158203, + "epoch": 2.2239872915011913, + "grad_norm": 18.228683471679688, "learning_rate": 2.6726124191242444e-06, - "log_odds_chosen": -0.19208799302577972, - "log_odds_ratio": -0.798243522644043, - "logits/chosen": 247.29525756835938, - "logits/rejected": 324.7398986816406, - "logps/chosen": -1.2389428615570068, - "logps/rejected": -1.1306030750274658, - "loss": 1.6042, - "nll_loss": 1.7317432165145874, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06194714829325676, - "rewards/margins": -0.005416995845735073, - "rewards/rejected": -0.056530147790908813, + "log_odds_chosen": 2.6247172355651855, + "log_odds_ratio": -0.12060055881738663, + "logits/chosen": 378.7240295410156, + "logits/rejected": 385.0981750488281, + "logps/chosen": -0.3274136483669281, + "logps/rejected": -1.6788215637207031, + "loss": 0.568, + "nll_loss": 0.5665196180343628, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016370682045817375, + "rewards/margins": 0.06757040321826935, + "rewards/rejected": -0.08394108712673187, "step": 350 }, { - "epoch": 0.26472781506338555, - "grad_norm": 120.67245483398438, + "epoch": 2.255758538522637, + "grad_norm": 10.90892219543457, "learning_rate": 2.6537244621713765e-06, - "log_odds_chosen": 0.3479830324649811, - "log_odds_ratio": -0.5401866436004639, - "logits/chosen": 296.92706298828125, - "logits/rejected": 351.3667907714844, - "logps/chosen": -1.060681700706482, - "logps/rejected": -1.2779345512390137, - "loss": 1.5781, - "nll_loss": 1.5104572772979736, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.053034089505672455, - "rewards/margins": 0.010862639173865318, - "rewards/rejected": -0.06389673054218292, + "log_odds_chosen": 2.860001802444458, + "log_odds_ratio": -0.10223189741373062, + "logits/chosen": 389.40496826171875, + "logits/rejected": 375.42901611328125, + "logps/chosen": -0.2831525504589081, + "logps/rejected": -1.7654014825820923, + "loss": 0.5138, + "nll_loss": 0.5121490359306335, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.014157627709209919, + "rewards/margins": 0.07411245256662369, + "rewards/rejected": -0.08827006816864014, "step": 355 }, { - "epoch": 0.2684563758389262, - "grad_norm": 82.6792984008789, + "epoch": 2.2875297855440824, + "grad_norm": 19.659900665283203, "learning_rate": 2.6352313834736496e-06, - "log_odds_chosen": 0.24271515011787415, - "log_odds_ratio": -0.8010608553886414, - "logits/chosen": 321.4865417480469, - "logits/rejected": 336.76251220703125, - "logps/chosen": -1.0023505687713623, - "logps/rejected": -1.1415283679962158, - "loss": 1.425, - "nll_loss": 1.5412629842758179, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.050117529928684235, - "rewards/margins": 0.006958886981010437, - "rewards/rejected": -0.05707641690969467, + "log_odds_chosen": 2.4854187965393066, + "log_odds_ratio": -0.1478622853755951, + "logits/chosen": 372.8605041503906, + "logits/rejected": 345.42425537109375, + "logps/chosen": -0.35203009843826294, + "logps/rejected": -1.6439100503921509, + "loss": 0.6183, + "nll_loss": 0.6371638774871826, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.017601503059267998, + "rewards/margins": 0.06459399312734604, + "rewards/rejected": -0.08219550549983978, "step": 360 }, { - "epoch": 0.2721849366144668, - "grad_norm": 50.61399459838867, + "epoch": 2.3193010325655283, + "grad_norm": 13.868301391601562, "learning_rate": 2.6171196129510684e-06, - "log_odds_chosen": 0.11651144176721573, - "log_odds_ratio": -0.7204474806785583, - "logits/chosen": 272.99993896484375, - "logits/rejected": 302.78436279296875, - "logps/chosen": -1.0196096897125244, - "logps/rejected": -1.1078453063964844, - "loss": 1.7633, - "nll_loss": 1.478210687637329, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05098047852516174, - "rewards/margins": 0.004411784000694752, - "rewards/rejected": -0.05539226531982422, + "log_odds_chosen": 2.2805027961730957, + "log_odds_ratio": -0.15696506202220917, + "logits/chosen": 368.7457580566406, + "logits/rejected": 381.3095703125, + "logps/chosen": -0.36261260509490967, + "logps/rejected": -1.5051987171173096, + "loss": 0.5374, + "nll_loss": 0.5559507012367249, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018130630254745483, + "rewards/margins": 0.057129304856061935, + "rewards/rejected": -0.07525994628667831, "step": 365 }, { - "epoch": 0.2759134973900075, - "grad_norm": 34.96334457397461, + "epoch": 2.351072279586974, + "grad_norm": 11.472841262817383, "learning_rate": 2.599376224550182e-06, - "log_odds_chosen": 0.1464993804693222, - "log_odds_ratio": -0.6580086946487427, - "logits/chosen": 250.0161590576172, - "logits/rejected": 255.68252563476562, - "logps/chosen": -0.9106311798095703, - "logps/rejected": -0.9577559232711792, - "loss": 1.7259, - "nll_loss": 1.9213666915893555, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.045531563460826874, - "rewards/margins": 0.002356233075261116, - "rewards/rejected": -0.04788779839873314, + "log_odds_chosen": 2.569275379180908, + "log_odds_ratio": -0.1351589858531952, + "logits/chosen": 341.255615234375, + "logits/rejected": 375.5405578613281, + "logps/chosen": -0.3256201148033142, + "logps/rejected": -1.5576858520507812, + "loss": 0.5394, + "nll_loss": 0.5407100915908813, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.01628100499510765, + "rewards/margins": 0.06160329654812813, + "rewards/rejected": -0.07788430154323578, "step": 370 }, { - "epoch": 0.2796420581655481, - "grad_norm": 45.18181228637695, + "epoch": 2.3828435266084194, + "grad_norm": 15.338998794555664, "learning_rate": 2.5819888974716113e-06, - "log_odds_chosen": 0.26931139826774597, - "log_odds_ratio": -0.58686363697052, - "logits/chosen": 262.8601989746094, - "logits/rejected": 340.15826416015625, - "logps/chosen": -0.9156142473220825, - "logps/rejected": -1.0498270988464355, - "loss": 1.4395, - "nll_loss": 1.4553390741348267, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04578071087598801, - "rewards/margins": 0.0067106448113918304, - "rewards/rejected": -0.05249135568737984, + "log_odds_chosen": 2.368185520172119, + "log_odds_ratio": -0.16596445441246033, + "logits/chosen": 367.3100891113281, + "logits/rejected": 379.1470642089844, + "logps/chosen": -0.34503039717674255, + "logps/rejected": -1.4970388412475586, + "loss": 0.5385, + "nll_loss": 0.5925895571708679, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.017251521348953247, + "rewards/margins": 0.057600416243076324, + "rewards/rejected": -0.07485193014144897, "step": 375 }, { - "epoch": 0.2833706189410887, - "grad_norm": 42.87923812866211, + "epoch": 2.414614773629865, + "grad_norm": 11.992910385131836, "learning_rate": 2.564945880212886e-06, - "log_odds_chosen": 0.2824031114578247, - "log_odds_ratio": -0.6069055795669556, - "logits/chosen": 248.80227661132812, - "logits/rejected": 321.45172119140625, - "logps/chosen": -1.295594573020935, - "logps/rejected": -1.4929348230361938, - "loss": 1.5799, - "nll_loss": 1.7149022817611694, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06477973610162735, - "rewards/margins": 0.009867008775472641, - "rewards/rejected": -0.07464674860239029, + "log_odds_chosen": 2.668848991394043, + "log_odds_ratio": -0.11368580907583237, + "logits/chosen": 339.14764404296875, + "logits/rejected": 374.33258056640625, + "logps/chosen": -0.29525333642959595, + "logps/rejected": -1.6697683334350586, + "loss": 0.5286, + "nll_loss": 0.530687689781189, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.014762667007744312, + "rewards/margins": 0.06872574985027313, + "rewards/rejected": -0.08348841965198517, "step": 380 }, { - "epoch": 0.2870991797166294, - "grad_norm": 56.1366081237793, + "epoch": 2.4463860206513104, + "grad_norm": 14.318426132202148, "learning_rate": 2.5482359571881276e-06, - "log_odds_chosen": 0.2957385182380676, - "log_odds_ratio": -0.5782660841941833, - "logits/chosen": 352.4662170410156, - "logits/rejected": 253.92819213867188, - "logps/chosen": -0.9833849668502808, - "logps/rejected": -1.1791446208953857, - "loss": 1.5972, - "nll_loss": 1.813367486000061, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04916924983263016, - "rewards/margins": 0.009787973016500473, - "rewards/rejected": -0.05895722657442093, + "log_odds_chosen": 2.5295252799987793, + "log_odds_ratio": -0.13907964527606964, + "logits/chosen": 355.3772888183594, + "logits/rejected": 369.38250732421875, + "logps/chosen": -0.32363104820251465, + "logps/rejected": -1.5537617206573486, + "loss": 0.5316, + "nll_loss": 0.5073675513267517, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.016181552782654762, + "rewards/margins": 0.061506547033786774, + "rewards/rejected": -0.07768810540437698, "step": 385 }, { - "epoch": 0.29082774049217003, - "grad_norm": 58.69853210449219, + "epoch": 2.4781572676727563, + "grad_norm": 12.480424880981445, "learning_rate": 2.5318484177091667e-06, - "log_odds_chosen": -0.20551224052906036, - "log_odds_ratio": -0.8410626649856567, - "logits/chosen": 371.9249267578125, - "logits/rejected": 373.3065490722656, - "logps/chosen": -1.2597036361694336, - "logps/rejected": -1.091282844543457, - "loss": 1.8968, - "nll_loss": 1.7972428798675537, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06298518180847168, - "rewards/margins": -0.0084210354834795, - "rewards/rejected": -0.05456414073705673, + "log_odds_chosen": 2.844482898712158, + "log_odds_ratio": -0.10580587387084961, + "logits/chosen": 375.2264709472656, + "logits/rejected": 378.9322509765625, + "logps/chosen": -0.31661707162857056, + "logps/rejected": -1.8109395503997803, + "loss": 0.5889, + "nll_loss": 0.5691739320755005, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015830855816602707, + "rewards/margins": 0.07471612840890884, + "rewards/rejected": -0.09054698050022125, "step": 390 }, { - "epoch": 0.29455630126771065, - "grad_norm": 30.802459716796875, + "epoch": 2.509928514694202, + "grad_norm": 12.692961692810059, "learning_rate": 2.515773027133138e-06, - "log_odds_chosen": 0.2683835029602051, - "log_odds_ratio": -0.6589955687522888, - "logits/chosen": 287.94647216796875, - "logits/rejected": 356.33929443359375, - "logps/chosen": -1.028784990310669, - "logps/rejected": -1.221069097518921, - "loss": 1.7537, - "nll_loss": 1.4103275537490845, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05143924802541733, - "rewards/margins": 0.009614207781851292, - "rewards/rejected": -0.061053454875946045, + "log_odds_chosen": 2.711256742477417, + "log_odds_ratio": -0.11297377198934555, + "logits/chosen": 366.85809326171875, + "logits/rejected": 381.2143859863281, + "logps/chosen": -0.3398984372615814, + "logps/rejected": -1.6760709285736084, + "loss": 0.5906, + "nll_loss": 0.5562863349914551, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.01699492149055004, + "rewards/margins": 0.06680862605571747, + "rewards/rejected": -0.08380354940891266, "step": 395 }, { - "epoch": 0.29828486204325133, - "grad_norm": 70.3304443359375, + "epoch": 2.5416997617156474, + "grad_norm": 14.05392837524414, "learning_rate": 2.5e-06, - "log_odds_chosen": -0.10383357852697372, - "log_odds_ratio": -0.7529563903808594, - "logits/chosen": 340.66265869140625, - "logits/rejected": 249.51904296875, - "logps/chosen": -1.2885087728500366, - "logps/rejected": -1.1990267038345337, - "loss": 1.4771, - "nll_loss": 1.4057632684707642, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06442544609308243, - "rewards/margins": -0.004474103916436434, - "rewards/rejected": -0.059951335191726685, + "log_odds_chosen": 2.806440830230713, + "log_odds_ratio": -0.1206049919128418, + "logits/chosen": 385.48016357421875, + "logits/rejected": 387.5498046875, + "logps/chosen": -0.33043619990348816, + "logps/rejected": -1.7994086742401123, + "loss": 0.536, + "nll_loss": 0.5314000844955444, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.016521811485290527, + "rewards/margins": 0.07344862073659897, + "rewards/rejected": -0.0899704322218895, "step": 400 }, { - "epoch": 0.30201342281879195, - "grad_norm": 70.40211486816406, + "epoch": 2.573471008737093, + "grad_norm": 13.364317893981934, "learning_rate": 2.484519974999767e-06, - "log_odds_chosen": 0.013537973165512085, - "log_odds_ratio": -0.8023442029953003, - "logits/chosen": 327.9271545410156, - "logits/rejected": 299.4677429199219, - "logps/chosen": -1.3635926246643066, - "logps/rejected": -1.3437942266464233, - "loss": 1.6932, - "nll_loss": 1.8231136798858643, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06817962974309921, - "rewards/margins": -0.0009899236029013991, - "rewards/rejected": -0.06718970835208893, + "log_odds_chosen": 2.552408218383789, + "log_odds_ratio": -0.15109024941921234, + "logits/chosen": 402.896728515625, + "logits/rejected": 358.8814697265625, + "logps/chosen": -0.35223886370658875, + "logps/rejected": -1.5426979064941406, + "loss": 0.5462, + "nll_loss": 0.5571905374526978, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.017611945047974586, + "rewards/margins": 0.05952295660972595, + "rewards/rejected": -0.07713489979505539, "step": 405 }, { - "epoch": 0.3057419835943326, - "grad_norm": 44.82709884643555, + "epoch": 2.6052422557585384, + "grad_norm": 13.045223236083984, "learning_rate": 2.4693239916239746e-06, - "log_odds_chosen": 0.19524511694908142, - "log_odds_ratio": -0.6434694528579712, - "logits/chosen": 255.536376953125, - "logits/rejected": 369.4391174316406, - "logps/chosen": -0.9387086629867554, - "logps/rejected": -1.0535556077957153, - "loss": 1.5825, - "nll_loss": 1.339302659034729, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04693543538451195, - "rewards/margins": 0.005742342211306095, - "rewards/rejected": -0.05267778038978577, + "log_odds_chosen": 2.6123578548431396, + "log_odds_ratio": -0.13238325715065002, + "logits/chosen": 398.14410400390625, + "logits/rejected": 372.25506591796875, + "logps/chosen": -0.32812008261680603, + "logps/rejected": -1.6102240085601807, + "loss": 0.5432, + "nll_loss": 0.5351387858390808, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.016406003385782242, + "rewards/margins": 0.06410519778728485, + "rewards/rejected": -0.0805111974477768, "step": 410 }, { - "epoch": 0.3094705443698732, - "grad_norm": 44.1151123046875, + "epoch": 2.6370135027799844, + "grad_norm": 12.631697654724121, "learning_rate": 2.4544034683690802e-06, - "log_odds_chosen": -0.14562582969665527, - "log_odds_ratio": -0.7805188298225403, - "logits/chosen": 277.15936279296875, - "logits/rejected": 269.63623046875, - "logps/chosen": -1.0123965740203857, - "logps/rejected": -0.9123673439025879, - "loss": 1.708, - "nll_loss": 1.9185701608657837, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05061982944607735, - "rewards/margins": -0.00500146159902215, - "rewards/rejected": -0.045618366450071335, + "log_odds_chosen": 2.6268792152404785, + "log_odds_ratio": -0.1338309347629547, + "logits/chosen": 357.38592529296875, + "logits/rejected": 389.5252380371094, + "logps/chosen": -0.31788235902786255, + "logps/rejected": -1.6665666103363037, + "loss": 0.5004, + "nll_loss": 0.5015383958816528, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.015894118696451187, + "rewards/margins": 0.06743422150611877, + "rewards/rejected": -0.08332833647727966, "step": 415 }, { - "epoch": 0.3131991051454139, - "grad_norm": 44.975223541259766, + "epoch": 2.6687847498014294, + "grad_norm": 15.014891624450684, "learning_rate": 2.4397501823713327e-06, - "log_odds_chosen": 0.07992539554834366, - "log_odds_ratio": -0.6781617999076843, - "logits/chosen": 328.70452880859375, - "logits/rejected": 274.3320007324219, - "logps/chosen": -1.3227722644805908, - "logps/rejected": -1.410556435585022, - "loss": 1.7033, - "nll_loss": 1.7576982975006104, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0661386102437973, - "rewards/margins": 0.004389205016195774, - "rewards/rejected": -0.0705278143286705, + "log_odds_chosen": 2.591372013092041, + "log_odds_ratio": -0.1354280561208725, + "logits/chosen": 327.8882751464844, + "logits/rejected": 381.07318115234375, + "logps/chosen": -0.3104853630065918, + "logps/rejected": -1.6048510074615479, + "loss": 0.5341, + "nll_loss": 0.5047799348831177, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.01552426815032959, + "rewards/margins": 0.06471828371286392, + "rewards/rejected": -0.08024255931377411, "step": 420 }, { - "epoch": 0.3169276659209545, - "grad_norm": 99.2433853149414, + "epoch": 2.7005559968228754, + "grad_norm": 16.257761001586914, "learning_rate": 2.4253562503633297e-06, - "log_odds_chosen": 0.5625776052474976, - "log_odds_ratio": -0.519619345664978, - "logits/chosen": 332.6693420410156, - "logits/rejected": 323.271728515625, - "logps/chosen": -1.0857082605361938, - "logps/rejected": -1.4357129335403442, - "loss": 1.6215, - "nll_loss": 1.4172000885009766, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05428541451692581, - "rewards/margins": 0.01750023290514946, - "rewards/rejected": -0.07178565114736557, + "log_odds_chosen": 2.5250930786132812, + "log_odds_ratio": -0.1304975003004074, + "logits/chosen": 358.3866271972656, + "logits/rejected": 365.5904235839844, + "logps/chosen": -0.37705713510513306, + "logps/rejected": -1.6503069400787354, + "loss": 0.5585, + "nll_loss": 0.5689770579338074, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.018852856010198593, + "rewards/margins": 0.06366249173879623, + "rewards/rejected": -0.08251535147428513, "step": 425 }, { - "epoch": 0.32065622669649513, - "grad_norm": 55.38136672973633, + "epoch": 2.732327243844321, + "grad_norm": 13.000679969787598, "learning_rate": 2.411214110852061e-06, - "log_odds_chosen": 0.00560649624094367, - "log_odds_ratio": -0.735174298286438, - "logits/chosen": 255.99331665039062, - "logits/rejected": 261.8822937011719, - "logps/chosen": -1.0162136554718018, - "logps/rejected": -0.9721221923828125, - "loss": 1.8587, - "nll_loss": 1.5896416902542114, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05081068351864815, - "rewards/margins": -0.002204576041549444, - "rewards/rejected": -0.04860610514879227, + "log_odds_chosen": 2.5947813987731934, + "log_odds_ratio": -0.1432962715625763, + "logits/chosen": 405.462158203125, + "logits/rejected": 397.40185546875, + "logps/chosen": -0.31710636615753174, + "logps/rejected": -1.525342583656311, + "loss": 0.5913, + "nll_loss": 0.5503649115562439, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.015855319797992706, + "rewards/margins": 0.060411810874938965, + "rewards/rejected": -0.07626713067293167, "step": 430 }, { - "epoch": 0.3243847874720358, - "grad_norm": 55.785892486572266, + "epoch": 2.7640984908657664, + "grad_norm": 12.692647933959961, "learning_rate": 2.3973165074269213e-06, - "log_odds_chosen": 0.43692225217819214, - "log_odds_ratio": -0.51969313621521, - "logits/chosen": 391.9912414550781, - "logits/rejected": 282.2325744628906, - "logps/chosen": -0.8839141130447388, - "logps/rejected": -1.1037542819976807, - "loss": 1.4134, - "nll_loss": 1.334592580795288, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04419570416212082, - "rewards/margins": 0.010992004536092281, - "rewards/rejected": -0.055187709629535675, + "log_odds_chosen": 2.4075734615325928, + "log_odds_ratio": -0.12895731627941132, + "logits/chosen": 380.5270080566406, + "logits/rejected": 378.729736328125, + "logps/chosen": -0.35148996114730835, + "logps/rejected": -1.6106551885604858, + "loss": 0.5662, + "nll_loss": 0.552409291267395, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.017574498429894447, + "rewards/margins": 0.062958262860775, + "rewards/rejected": -0.08053276687860489, "step": 435 }, { - "epoch": 0.32811334824757643, - "grad_norm": 35.8914794921875, + "epoch": 2.795869737887212, + "grad_norm": 14.066009521484375, "learning_rate": 2.3836564731139807e-06, - "log_odds_chosen": 0.29982471466064453, - "log_odds_ratio": -0.740268349647522, - "logits/chosen": 292.290771484375, - "logits/rejected": 312.8147277832031, - "logps/chosen": -1.4986966848373413, - "logps/rejected": -1.7851412296295166, - "loss": 1.7771, - "nll_loss": 1.9529426097869873, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07493484020233154, - "rewards/margins": 0.014322230592370033, - "rewards/rejected": -0.08925706893205643, + "log_odds_chosen": 2.4299874305725098, + "log_odds_ratio": -0.13510316610336304, + "logits/chosen": 371.70501708984375, + "logits/rejected": 391.11016845703125, + "logps/chosen": -0.346214234828949, + "logps/rejected": -1.5722700357437134, + "loss": 0.5331, + "nll_loss": 0.6044758558273315, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01731071248650551, + "rewards/margins": 0.0613027922809124, + "rewards/rejected": -0.07861350476741791, "step": 440 }, { - "epoch": 0.33184190902311705, - "grad_norm": 68.631591796875, + "epoch": 2.8276409849086575, + "grad_norm": 16.693822860717773, "learning_rate": 2.3702273156998867e-06, - "log_odds_chosen": -0.416208416223526, - "log_odds_ratio": -0.9583233594894409, - "logits/chosen": 260.71075439453125, - "logits/rejected": 314.3780517578125, - "logps/chosen": -1.1581990718841553, - "logps/rejected": -0.9223856925964355, - "loss": 1.7183, - "nll_loss": 2.0336034297943115, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05790995433926582, - "rewards/margins": -0.011790660209953785, - "rewards/rejected": -0.046119287610054016, + "log_odds_chosen": 2.664226770401001, + "log_odds_ratio": -0.1495479792356491, + "logits/chosen": 375.3480224609375, + "logits/rejected": 390.2784729003906, + "logps/chosen": -0.30481138825416565, + "logps/rejected": -1.5847148895263672, + "loss": 0.5774, + "nll_loss": 0.5591589212417603, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.015240569598972797, + "rewards/margins": 0.06399518251419067, + "rewards/rejected": -0.0792357474565506, "step": 445 }, { - "epoch": 0.33557046979865773, - "grad_norm": 52.649349212646484, + "epoch": 2.8594122319301034, + "grad_norm": 11.772316932678223, "learning_rate": 2.357022603955159e-06, - "log_odds_chosen": 0.2694109380245209, - "log_odds_ratio": -0.598590075969696, - "logits/chosen": 332.5001525878906, - "logits/rejected": 267.6921081542969, - "logps/chosen": -1.1636695861816406, - "logps/rejected": -1.2612754106521606, - "loss": 1.5237, - "nll_loss": 1.697426438331604, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05818348377943039, - "rewards/margins": 0.004880286753177643, - "rewards/rejected": -0.06306376308202744, + "log_odds_chosen": 2.4105961322784424, + "log_odds_ratio": -0.14894258975982666, + "logits/chosen": 369.1900329589844, + "logits/rejected": 388.2864074707031, + "logps/chosen": -0.32753241062164307, + "logps/rejected": -1.5716395378112793, + "loss": 0.5385, + "nll_loss": 0.5137172937393188, + "rewards/accuracies": 0.987500011920929, + "rewards/chosen": -0.016376618295907974, + "rewards/margins": 0.06220535561442375, + "rewards/rejected": -0.07858197391033173, "step": 450 }, { - "epoch": 0.33929903057419836, - "grad_norm": 38.54649353027344, + "epoch": 2.891183478951549, + "grad_norm": 11.692276000976562, "learning_rate": 2.3440361546924774e-06, - "log_odds_chosen": 0.5386314392089844, - "log_odds_ratio": -0.4845854341983795, - "logits/chosen": 311.5939636230469, - "logits/rejected": 280.4867858886719, - "logps/chosen": -1.2091641426086426, - "logps/rejected": -1.6010057926177979, - "loss": 1.4632, - "nll_loss": 1.2646825313568115, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06045820564031601, - "rewards/margins": 0.019592078402638435, - "rewards/rejected": -0.08005028963088989, + "log_odds_chosen": 2.5121138095855713, + "log_odds_ratio": -0.13669057190418243, + "logits/chosen": 368.95159912109375, + "logits/rejected": 374.1610412597656, + "logps/chosen": -0.30898115038871765, + "logps/rejected": -1.5191621780395508, + "loss": 0.5456, + "nll_loss": 0.5575789213180542, + "rewards/accuracies": 0.9750000238418579, + "rewards/chosen": -0.015449057333171368, + "rewards/margins": 0.06050904467701912, + "rewards/rejected": -0.07595811039209366, "step": 455 }, { - "epoch": 0.343027591349739, - "grad_norm": 65.10990905761719, + "epoch": 2.9229547259729944, + "grad_norm": 10.796197891235352, "learning_rate": 2.3312620206007847e-06, - "log_odds_chosen": -0.24770459532737732, - "log_odds_ratio": -0.870587170124054, - "logits/chosen": 280.466796875, - "logits/rejected": 324.1703186035156, - "logps/chosen": -1.0976872444152832, - "logps/rejected": -0.9688448905944824, - "loss": 1.5405, - "nll_loss": 1.3995617628097534, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05488436296582222, - "rewards/margins": -0.006442120764404535, - "rewards/rejected": -0.04844224452972412, + "log_odds_chosen": 2.5624585151672363, + "log_odds_ratio": -0.11981997638940811, + "logits/chosen": 385.6537170410156, + "logits/rejected": 393.69268798828125, + "logps/chosen": -0.292979896068573, + "logps/rejected": -1.5465893745422363, + "loss": 0.5032, + "nll_loss": 0.514795184135437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01464899629354477, + "rewards/margins": 0.06268046796321869, + "rewards/rejected": -0.07732947170734406, "step": 460 }, { - "epoch": 0.34675615212527966, - "grad_norm": 111.66654205322266, + "epoch": 2.95472597299444, + "grad_norm": 15.433439254760742, "learning_rate": 2.3186944788008413e-06, - "log_odds_chosen": 0.18531930446624756, - "log_odds_ratio": -0.6512736082077026, - "logits/chosen": 299.8744201660156, - "logits/rejected": 295.8683166503906, - "logps/chosen": -1.2978415489196777, - "logps/rejected": -1.4664907455444336, - "loss": 1.6974, - "nll_loss": 1.870469331741333, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06489207595586777, - "rewards/margins": 0.008432460948824883, - "rewards/rejected": -0.0733245387673378, + "log_odds_chosen": 2.7584941387176514, + "log_odds_ratio": -0.10928479582071304, + "logits/chosen": 379.510498046875, + "logits/rejected": 370.8335876464844, + "logps/chosen": -0.3042075037956238, + "logps/rejected": -1.7168937921524048, + "loss": 0.5496, + "nll_loss": 0.5133975744247437, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.01521037332713604, + "rewards/margins": 0.07063432037830353, + "rewards/rejected": -0.08584468811750412, "step": 465 }, { - "epoch": 0.3504847129008203, - "grad_norm": 73.24522399902344, + "epoch": 2.9864972200158855, + "grad_norm": 14.072263717651367, "learning_rate": 2.3063280200722128e-06, - "log_odds_chosen": -0.32817691564559937, - "log_odds_ratio": -0.885696530342102, - "logits/chosen": 278.877197265625, - "logits/rejected": 345.8519592285156, - "logps/chosen": -1.5019890069961548, - "logps/rejected": -1.275105595588684, - "loss": 1.6116, - "nll_loss": 1.7504619359970093, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07509945333003998, - "rewards/margins": -0.011344168335199356, - "rewards/rejected": -0.06375528126955032, + "log_odds_chosen": 2.5119848251342773, + "log_odds_ratio": -0.11926700919866562, + "logits/chosen": 385.35107421875, + "logits/rejected": 383.25164794921875, + "logps/chosen": -0.3074565529823303, + "logps/rejected": -1.477611780166626, + "loss": 0.56, + "nll_loss": 0.5478503704071045, + "rewards/accuracies": 1.0, + "rewards/chosen": -0.015372827649116516, + "rewards/margins": 0.0585077628493309, + "rewards/rejected": -0.07388059794902802, "step": 470 }, { - "epoch": 0.3542132736763609, - "grad_norm": 57.83740997314453, - "learning_rate": 2.2941573387056174e-06, - "log_odds_chosen": -0.0840623676776886, - "log_odds_ratio": -0.7635816335678101, - "logits/chosen": 273.50653076171875, - "logits/rejected": 302.1976318359375, - "logps/chosen": -1.0302342176437378, - "logps/rejected": -0.9838453531265259, - "loss": 1.6115, - "nll_loss": 1.3236974477767944, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05151171609759331, - "rewards/margins": -0.002319442806765437, - "rewards/rejected": -0.04919227212667465, - "step": 475 - }, - { - "epoch": 0.3579418344519016, - "grad_norm": 45.96052169799805, - "learning_rate": 2.2821773229381924e-06, - "log_odds_chosen": 0.06460478156805038, - "log_odds_ratio": -0.7020595073699951, - "logits/chosen": 253.98086547851562, - "logits/rejected": 381.29498291015625, - "logps/chosen": -1.0283799171447754, - "logps/rejected": -1.0781841278076172, - "loss": 1.4119, - "nll_loss": 1.3641859292984009, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05141900107264519, - "rewards/margins": 0.002490200102329254, - "rewards/rejected": -0.05390920117497444, - "step": 480 - }, - { - "epoch": 0.3616703952274422, - "grad_norm": 30.543123245239258, - "learning_rate": 2.270383045932499e-06, - "log_odds_chosen": -0.3691864013671875, - "log_odds_ratio": -0.9283899068832397, - "logits/chosen": 352.7226257324219, - "logits/rejected": 337.7225036621094, - "logps/chosen": -1.8711189031600952, - "logps/rejected": -1.548882246017456, - "loss": 1.7202, - "nll_loss": 1.8925946950912476, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.09355594962835312, - "rewards/margins": -0.016111837700009346, - "rewards/rejected": -0.07744411379098892, - "step": 485 - }, - { - "epoch": 0.36539895600298283, - "grad_norm": 48.37338638305664, - "learning_rate": 2.2587697572631284e-06, - "log_odds_chosen": 0.314678430557251, - "log_odds_ratio": -0.5791245698928833, - "logits/chosen": 271.5398254394531, - "logits/rejected": 383.7361755371094, - "logps/chosen": -1.0169918537139893, - "logps/rejected": -1.193527102470398, - "loss": 1.4824, - "nll_loss": 1.5833736658096313, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05084959417581558, - "rewards/margins": 0.00882676150649786, - "rewards/rejected": -0.05967635661363602, - "step": 490 - }, - { - "epoch": 0.3691275167785235, - "grad_norm": 43.09674072265625, - "learning_rate": 2.2473328748774737e-06, - "log_odds_chosen": -0.3310704529285431, - "log_odds_ratio": -0.9055169820785522, - "logits/chosen": 316.2137451171875, - "logits/rejected": 317.1672668457031, - "logps/chosen": -0.9479414224624634, - "logps/rejected": -0.7993487119674683, - "loss": 1.3794, - "nll_loss": 1.12400221824646, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04739706963300705, - "rewards/margins": -0.007429635617882013, - "rewards/rejected": -0.03996743634343147, - "step": 495 - }, - { - "epoch": 0.37285607755406414, - "grad_norm": 25.89630889892578, - "learning_rate": 2.23606797749979e-06, - "log_odds_chosen": -0.3393707275390625, - "log_odds_ratio": -0.8984289169311523, - "logits/chosen": 314.33160400390625, - "logits/rejected": 359.8158264160156, - "logps/chosen": -1.4752811193466187, - "logps/rejected": -1.2486653327941895, - "loss": 1.4169, - "nll_loss": 1.8315353393554688, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07376404851675034, - "rewards/margins": -0.011330785229802132, - "rewards/rejected": -0.06243326514959335, - "step": 500 - }, - { - "epoch": 0.37658463832960476, - "grad_norm": 107.64212799072266, - "learning_rate": 2.224970797449924e-06, - "log_odds_chosen": -0.3466617465019226, - "log_odds_ratio": -0.9759294390678406, - "logits/chosen": 337.16802978515625, - "logits/rejected": 263.6734924316406, - "logps/chosen": -1.3896830081939697, - "logps/rejected": -1.212558388710022, - "loss": 1.6898, - "nll_loss": 1.4566766023635864, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0694841518998146, - "rewards/margins": -0.008856229484081268, - "rewards/rejected": -0.060627926141023636, - "step": 505 - }, - { - "epoch": 0.38031319910514544, - "grad_norm": 38.60774230957031, - "learning_rate": 2.2140372138502386e-06, - "log_odds_chosen": 0.20238986611366272, - "log_odds_ratio": -0.6242908835411072, - "logits/chosen": 331.1517028808594, - "logits/rejected": 350.15509033203125, - "logps/chosen": -1.2198134660720825, - "logps/rejected": -1.3732125759124756, - "loss": 1.3657, - "nll_loss": 1.242480993270874, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.060990672558546066, - "rewards/margins": 0.00766995782032609, - "rewards/rejected": -0.06866063177585602, - "step": 510 - }, - { - "epoch": 0.38404175988068606, - "grad_norm": 74.76829528808594, - "learning_rate": 2.203263246196159e-06, - "log_odds_chosen": 0.09643487632274628, - "log_odds_ratio": -0.6862896680831909, - "logits/chosen": 342.02642822265625, - "logits/rejected": 295.3052673339844, - "logps/chosen": -1.0735890865325928, - "logps/rejected": -1.1096007823944092, - "loss": 1.4682, - "nll_loss": 1.314708948135376, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.053679458796978, - "rewards/margins": 0.001800581463612616, - "rewards/rejected": -0.05548004060983658, - "step": 515 - }, - { - "epoch": 0.3877703206562267, - "grad_norm": 51.815650939941406, - "learning_rate": 2.1926450482675734e-06, - "log_odds_chosen": 0.14587938785552979, - "log_odds_ratio": -0.6839359998703003, - "logits/chosen": 307.8727111816406, - "logits/rejected": 315.9523620605469, - "logps/chosen": -0.8382126688957214, - "logps/rejected": -0.8969368934631348, - "loss": 1.4935, - "nll_loss": 1.0695974826812744, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04191063717007637, - "rewards/margins": 0.002936213742941618, - "rewards/rejected": -0.04484684765338898, - "step": 520 - }, - { - "epoch": 0.39149888143176736, - "grad_norm": 39.482357025146484, - "learning_rate": 2.182178902359924e-06, - "log_odds_chosen": 0.1520734578371048, - "log_odds_ratio": -0.6565192341804504, - "logits/chosen": 281.9866027832031, - "logits/rejected": 368.71795654296875, - "logps/chosen": -1.3588690757751465, - "logps/rejected": -1.5189663171768188, - "loss": 1.4687, - "nll_loss": 1.3546841144561768, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06794346123933792, - "rewards/margins": 0.008004861883819103, - "rewards/rejected": -0.0759483203291893, - "step": 525 - }, - { - "epoch": 0.395227442207308, - "grad_norm": 35.176082611083984, - "learning_rate": 2.1718612138153473e-06, - "log_odds_chosen": 0.284365177154541, - "log_odds_ratio": -0.5949299335479736, - "logits/chosen": 288.9742736816406, - "logits/rejected": 331.4393310546875, - "logps/chosen": -0.9501428604125977, - "logps/rejected": -1.1652799844741821, - "loss": 1.5816, - "nll_loss": 1.463678240776062, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.047507144510746, - "rewards/margins": 0.01075685489922762, - "rewards/rejected": -0.05826399847865105, - "step": 530 - }, - { - "epoch": 0.3989560029828486, - "grad_norm": 61.95853042602539, - "learning_rate": 2.161688505835585e-06, - "log_odds_chosen": -0.06088218837976456, - "log_odds_ratio": -0.7299826145172119, - "logits/chosen": 279.20355224609375, - "logits/rejected": 282.70269775390625, - "logps/chosen": -1.0809414386749268, - "logps/rejected": -1.0638335943222046, - "loss": 1.6266, - "nll_loss": 1.7451934814453125, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.054047077894210815, - "rewards/margins": -0.000855399644933641, - "rewards/rejected": -0.05319168046116829, - "step": 535 - }, - { - "epoch": 0.40268456375838924, - "grad_norm": 72.43805694580078, - "learning_rate": 2.151657414559676e-06, - "log_odds_chosen": 0.7745304107666016, - "log_odds_ratio": -0.4688674807548523, - "logits/chosen": 346.61077880859375, - "logits/rejected": 298.93206787109375, - "logps/chosen": -0.8585512042045593, - "logps/rejected": -1.2471251487731934, - "loss": 1.3943, - "nll_loss": 1.282961368560791, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04292755946516991, - "rewards/margins": 0.019428696483373642, - "rewards/rejected": -0.06235625594854355, - "step": 540 - }, - { - "epoch": 0.4064131245339299, - "grad_norm": 46.46628189086914, - "learning_rate": 2.1417646843905967e-06, - "log_odds_chosen": 0.4291912019252777, - "log_odds_ratio": -0.5761655569076538, - "logits/chosen": 286.1690368652344, - "logits/rejected": 319.75164794921875, - "logps/chosen": -1.1731668710708618, - "logps/rejected": -1.476829171180725, - "loss": 1.7608, - "nll_loss": 1.622045874595642, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05865834280848503, - "rewards/margins": 0.015183120965957642, - "rewards/rejected": -0.07384146749973297, - "step": 545 - }, - { - "epoch": 0.41014168530947054, - "grad_norm": 51.46520233154297, - "learning_rate": 2.132007163556104e-06, - "log_odds_chosen": 0.434037983417511, - "log_odds_ratio": -0.5396202206611633, - "logits/chosen": 247.6271514892578, - "logits/rejected": 364.06866455078125, - "logps/chosen": -1.0448224544525146, - "logps/rejected": -1.3365886211395264, - "loss": 1.6014, - "nll_loss": 1.6536359786987305, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05224112421274185, - "rewards/margins": 0.0145883085206151, - "rewards/rejected": -0.06682942807674408, - "step": 550 - }, - { - "epoch": 0.41387024608501116, - "grad_norm": 60.97431564331055, - "learning_rate": 2.122381799890045e-06, - "log_odds_chosen": 0.12737317383289337, - "log_odds_ratio": -0.6503344774246216, - "logits/chosen": 327.8324890136719, - "logits/rejected": 369.25628662109375, - "logps/chosen": -0.9760522842407227, - "logps/rejected": -1.0689146518707275, - "loss": 1.4635, - "nll_loss": 1.0822558403015137, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04880261421203613, - "rewards/margins": 0.004643112421035767, - "rewards/rejected": -0.0534457266330719, - "step": 555 - }, - { - "epoch": 0.41759880686055184, - "grad_norm": 55.75449752807617, - "learning_rate": 2.1128856368212917e-06, - "log_odds_chosen": 0.07938291132450104, - "log_odds_ratio": -0.6940451860427856, - "logits/chosen": 268.52459716796875, - "logits/rejected": 313.84796142578125, - "logps/chosen": -1.273742914199829, - "logps/rejected": -1.377414345741272, - "loss": 1.6964, - "nll_loss": 1.6442970037460327, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06368714570999146, - "rewards/margins": 0.005183566827327013, - "rewards/rejected": -0.06887070834636688, - "step": 560 - }, - { - "epoch": 0.42132736763609246, - "grad_norm": 47.75613784790039, - "learning_rate": 2.1035158095583564e-06, - "log_odds_chosen": 0.6850577592849731, - "log_odds_ratio": -0.44497498869895935, - "logits/chosen": 316.3045654296875, - "logits/rejected": 279.0616760253906, - "logps/chosen": -1.0636051893234253, - "logps/rejected": -1.4853187799453735, - "loss": 1.4632, - "nll_loss": 1.6317638158798218, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.053180258721113205, - "rewards/margins": 0.021085679531097412, - "rewards/rejected": -0.07426594197750092, - "step": 565 - }, - { - "epoch": 0.4250559284116331, - "grad_norm": 45.31257629394531, - "learning_rate": 2.0942695414584777e-06, - "log_odds_chosen": 0.0364144966006279, - "log_odds_ratio": -0.6805545091629028, - "logits/chosen": 296.89178466796875, - "logits/rejected": 286.87103271484375, - "logps/chosen": -1.0835291147232056, - "logps/rejected": -1.0858829021453857, - "loss": 1.3878, - "nll_loss": 1.3450695276260376, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0541764572262764, - "rewards/margins": 0.00011768974218284711, - "rewards/rejected": -0.054294146597385406, - "step": 570 - }, - { - "epoch": 0.42878448918717377, - "grad_norm": 36.731266021728516, - "learning_rate": 2.085144140570748e-06, - "log_odds_chosen": -0.07109051197767258, - "log_odds_ratio": -0.7384371757507324, - "logits/chosen": 296.91326904296875, - "logits/rejected": 282.1145935058594, - "logps/chosen": -1.3243277072906494, - "logps/rejected": -1.2724934816360474, - "loss": 1.4543, - "nll_loss": 1.7479616403579712, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06621638685464859, - "rewards/margins": -0.0025917149614542723, - "rewards/rejected": -0.06362467259168625, - "step": 575 - }, - { - "epoch": 0.4325130499627144, - "grad_norm": 73.74729919433594, - "learning_rate": 2.0761369963434992e-06, - "log_odds_chosen": -0.27058273553848267, - "log_odds_ratio": -0.8615729212760925, - "logits/chosen": 377.09161376953125, - "logits/rejected": 290.49139404296875, - "logps/chosen": -1.483351469039917, - "logps/rejected": -1.2866142988204956, - "loss": 1.616, - "nll_loss": 1.6317939758300781, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.07416756451129913, - "rewards/margins": -0.009836850687861443, - "rewards/rejected": -0.06433071941137314, - "step": 580 - }, - { - "epoch": 0.436241610738255, - "grad_norm": 73.45361328125, - "learning_rate": 2.067245576486808e-06, - "log_odds_chosen": 0.042069900780916214, - "log_odds_ratio": -0.7026094198226929, - "logits/chosen": 367.9713439941406, - "logits/rejected": 303.32000732421875, - "logps/chosen": -1.2307021617889404, - "logps/rejected": -1.267197847366333, - "loss": 1.5845, - "nll_loss": 1.6542478799819946, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06153510883450508, - "rewards/margins": 0.0018247828120365739, - "rewards/rejected": -0.06335989385843277, - "step": 585 - }, - { - "epoch": 0.4399701715137957, - "grad_norm": 34.30842971801758, - "learning_rate": 2.058467423981546e-06, - "log_odds_chosen": -0.4121587872505188, - "log_odds_ratio": -0.9469677209854126, - "logits/chosen": 381.0136413574219, - "logits/rejected": 263.22039794921875, - "logps/chosen": -1.1629843711853027, - "logps/rejected": -0.8845303654670715, - "loss": 1.4643, - "nll_loss": 1.3448362350463867, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05814921855926514, - "rewards/margins": -0.013922693207859993, - "rewards/rejected": -0.044226523488759995, - "step": 590 - }, - { - "epoch": 0.4436987322893363, - "grad_norm": 84.87673950195312, - "learning_rate": 2.0498001542269694e-06, - "log_odds_chosen": 0.41603097319602966, - "log_odds_ratio": -0.5475112199783325, - "logits/chosen": 405.03143310546875, - "logits/rejected": 270.9808654785156, - "logps/chosen": -1.0688560009002686, - "logps/rejected": -1.3580535650253296, - "loss": 1.4987, - "nll_loss": 1.4497476816177368, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05344279855489731, - "rewards/margins": 0.014459880068898201, - "rewards/rejected": -0.06790267676115036, - "step": 595 - }, - { - "epoch": 0.44742729306487694, - "grad_norm": 45.61537551879883, - "learning_rate": 2.0412414523193154e-06, - "log_odds_chosen": 0.3689327836036682, - "log_odds_ratio": -0.5495279431343079, - "logits/chosen": 372.7005920410156, - "logits/rejected": 336.993896484375, - "logps/chosen": -0.805282711982727, - "logps/rejected": -0.9973527789115906, - "loss": 1.4417, - "nll_loss": 1.0664907693862915, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04026413708925247, - "rewards/margins": 0.009603505954146385, - "rewards/rejected": -0.04986763745546341, - "step": 600 - }, - { - "epoch": 0.4511558538404176, - "grad_norm": 69.08150482177734, - "learning_rate": 2.0327890704543546e-06, - "log_odds_chosen": 0.44087108969688416, - "log_odds_ratio": -0.5167280435562134, - "logits/chosen": 265.6076965332031, - "logits/rejected": 342.70281982421875, - "logps/chosen": -1.2730252742767334, - "logps/rejected": -1.579519271850586, - "loss": 1.4826, - "nll_loss": 1.6149978637695312, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06365126371383667, - "rewards/margins": 0.015324696898460388, - "rewards/rejected": -0.07897596061229706, - "step": 605 - }, - { - "epoch": 0.45488441461595824, - "grad_norm": 31.625337600708008, - "learning_rate": 2.0244408254472904e-06, - "log_odds_chosen": 0.20098280906677246, - "log_odds_ratio": -0.6163533926010132, - "logits/chosen": 327.77655029296875, - "logits/rejected": 366.5870056152344, - "logps/chosen": -1.4981778860092163, - "logps/rejected": -1.61972177028656, - "loss": 1.4176, - "nll_loss": 1.5821648836135864, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.07490889728069305, - "rewards/margins": 0.006077195517718792, - "rewards/rejected": -0.08098609745502472, - "step": 610 - }, - { - "epoch": 0.45861297539149887, - "grad_norm": 34.91094970703125, - "learning_rate": 2.0161945963637796e-06, - "log_odds_chosen": -0.013902264647185802, - "log_odds_ratio": -0.7140836715698242, - "logits/chosen": 320.482666015625, - "logits/rejected": 308.71875, - "logps/chosen": -1.1631667613983154, - "logps/rejected": -1.1471284627914429, - "loss": 1.5636, - "nll_loss": 1.747257947921753, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05815834552049637, - "rewards/margins": -0.0008019238593988121, - "rewards/rejected": -0.05735642835497856, - "step": 615 - }, - { - "epoch": 0.46234153616703955, - "grad_norm": 90.02574157714844, - "learning_rate": 2.0080483222562476e-06, - "log_odds_chosen": 0.3175165355205536, - "log_odds_ratio": -0.6062830090522766, - "logits/chosen": 267.30389404296875, - "logits/rejected": 293.7818908691406, - "logps/chosen": -1.235954999923706, - "logps/rejected": -1.501263976097107, - "loss": 1.6207, - "nll_loss": 1.9376163482666016, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06179774925112724, - "rewards/margins": 0.013265450485050678, - "rewards/rejected": -0.07506320625543594, - "step": 620 - }, - { - "epoch": 0.46607009694258017, - "grad_norm": 63.74228286743164, - "learning_rate": 2.0000000000000003e-06, - "log_odds_chosen": -0.01827806606888771, - "log_odds_ratio": -0.7216897010803223, - "logits/chosen": 316.70721435546875, - "logits/rejected": 275.85076904296875, - "logps/chosen": -1.3453762531280518, - "logps/rejected": -1.323955774307251, - "loss": 1.701, - "nll_loss": 1.8425439596176147, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06726881116628647, - "rewards/margins": -0.0010710202623158693, - "rewards/rejected": -0.06619779020547867, - "step": 625 - }, - { - "epoch": 0.4697986577181208, - "grad_norm": 66.6411361694336, - "learning_rate": 1.9920476822239895e-06, - "log_odds_chosen": -0.20506183803081512, - "log_odds_ratio": -0.89848792552948, - "logits/chosen": 323.22021484375, - "logits/rejected": 383.5016784667969, - "logps/chosen": -1.262315273284912, - "logps/rejected": -1.1673616170883179, - "loss": 1.5234, - "nll_loss": 1.445989727973938, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06311576813459396, - "rewards/margins": -0.004747686441987753, - "rewards/rejected": -0.058368079364299774, - "step": 630 - }, - { - "epoch": 0.4735272184936615, - "grad_norm": 98.79306030273438, - "learning_rate": 1.9841894753313627e-06, - "log_odds_chosen": 0.10681043565273285, - "log_odds_ratio": -0.6550741791725159, - "logits/chosen": 307.7252197265625, - "logits/rejected": 288.76483154296875, - "logps/chosen": -1.0565249919891357, - "logps/rejected": -1.1405563354492188, - "loss": 1.5384, - "nll_loss": 1.6263669729232788, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.052826255559921265, - "rewards/margins": 0.004201564937829971, - "rewards/rejected": -0.05702781677246094, - "step": 635 - }, - { - "epoch": 0.4772557792692021, - "grad_norm": 76.47772216796875, - "learning_rate": 1.976423537605237e-06, - "log_odds_chosen": 0.5704589486122131, - "log_odds_ratio": -0.48903316259384155, - "logits/chosen": 300.20831298828125, - "logits/rejected": 289.21014404296875, - "logps/chosen": -0.8931530714035034, - "logps/rejected": -1.209450125694275, - "loss": 1.6178, - "nll_loss": 1.946489691734314, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04465765878558159, - "rewards/margins": 0.015814850106835365, - "rewards/rejected": -0.060472507029771805, - "step": 640 - }, - { - "epoch": 0.4809843400447427, - "grad_norm": 36.52433395385742, - "learning_rate": 1.9687480773953947e-06, - "log_odds_chosen": 0.12118474394083023, - "log_odds_ratio": -0.7020589709281921, - "logits/chosen": 299.3000793457031, - "logits/rejected": 318.505126953125, - "logps/chosen": -1.267259120941162, - "logps/rejected": -1.3607771396636963, - "loss": 1.5683, - "nll_loss": 1.5003077983856201, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0633629634976387, - "rewards/margins": 0.0046758996322751045, - "rewards/rejected": -0.06803885847330093, - "step": 645 - }, - { - "epoch": 0.48471290082028334, - "grad_norm": 52.15703201293945, - "learning_rate": 1.961161351381841e-06, - "log_odds_chosen": 0.21166205406188965, - "log_odds_ratio": -0.6087368726730347, - "logits/chosen": 282.2923889160156, - "logits/rejected": 295.5262756347656, - "logps/chosen": -0.9324533343315125, - "logps/rejected": -1.0711817741394043, - "loss": 1.6972, - "nll_loss": 1.9642865657806396, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.046622663736343384, - "rewards/margins": 0.0069364262744784355, - "rewards/rejected": -0.053559087216854095, - "step": 650 - }, - { - "epoch": 0.488441461595824, - "grad_norm": 45.506004333496094, - "learning_rate": 1.953661662911409e-06, - "log_odds_chosen": 0.5246542096138, - "log_odds_ratio": -0.5277723670005798, - "logits/chosen": 277.5283203125, - "logits/rejected": 263.9239807128906, - "logps/chosen": -1.1885125637054443, - "logps/rejected": -1.629077672958374, - "loss": 1.7338, - "nll_loss": 2.5156092643737793, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.059425629675388336, - "rewards/margins": 0.022028258070349693, - "rewards/rejected": -0.08145389705896378, - "step": 655 - }, - { - "epoch": 0.49217002237136465, - "grad_norm": 58.75932312011719, - "learning_rate": 1.9462473604038077e-06, - "log_odds_chosen": 0.05632822960615158, - "log_odds_ratio": -0.6731787323951721, - "logits/chosen": 313.13519287109375, - "logits/rejected": 259.0019226074219, - "logps/chosen": -1.3810701370239258, - "logps/rejected": -1.410433053970337, - "loss": 1.6778, - "nll_loss": 1.8444935083389282, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06905350834131241, - "rewards/margins": 0.0014681480824947357, - "rewards/rejected": -0.07052166014909744, - "step": 660 - }, - { - "epoch": 0.49589858314690527, - "grad_norm": 26.21990394592285, - "learning_rate": 1.938916835823703e-06, - "log_odds_chosen": 0.24045352637767792, - "log_odds_ratio": -0.6123930215835571, - "logits/chosen": 327.16070556640625, - "logits/rejected": 287.02850341796875, - "logps/chosen": -1.2068796157836914, - "logps/rejected": -1.3557159900665283, - "loss": 1.3913, - "nll_loss": 1.5790693759918213, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06034398823976517, - "rewards/margins": 0.007441818714141846, - "rewards/rejected": -0.06778579950332642, - "step": 665 - }, - { - "epoch": 0.49962714392244595, - "grad_norm": 81.00346374511719, - "learning_rate": 1.9316685232156397e-06, - "log_odds_chosen": 0.5014033317565918, - "log_odds_ratio": -0.5646325945854187, - "logits/chosen": 263.29791259765625, - "logits/rejected": 319.8410339355469, - "logps/chosen": -1.0634019374847412, - "logps/rejected": -1.3695071935653687, - "loss": 1.4767, - "nll_loss": 1.2057592868804932, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.053170096129179, - "rewards/margins": 0.015305262990295887, - "rewards/rejected": -0.06847536563873291, - "step": 670 - }, - { - "epoch": 0.5033557046979866, - "grad_norm": 93.53106689453125, - "learning_rate": 1.924500897298753e-06, - "log_odds_chosen": 0.4352389872074127, - "log_odds_ratio": -0.5134842991828918, - "logits/chosen": 331.6639404296875, - "logits/rejected": 294.3586730957031, - "logps/chosen": -1.1794989109039307, - "logps/rejected": -1.476166009902954, - "loss": 1.4359, - "nll_loss": 1.374155879020691, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.058974944055080414, - "rewards/margins": 0.014833351597189903, - "rewards/rejected": -0.07380829751491547, - "step": 675 - }, - { - "epoch": 0.5070842654735273, - "grad_norm": 75.60554504394531, - "learning_rate": 1.917412472118426e-06, - "log_odds_chosen": 0.6222040057182312, - "log_odds_ratio": -0.5033690333366394, - "logits/chosen": 363.5578308105469, - "logits/rejected": 377.66357421875, - "logps/chosen": -0.8245936632156372, - "logps/rejected": -1.1339231729507446, - "loss": 1.3619, - "nll_loss": 1.1601089239120483, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04122968390583992, - "rewards/margins": 0.015466478653252125, - "rewards/rejected": -0.05669615790247917, - "step": 680 - }, - { - "epoch": 0.5108128262490679, - "grad_norm": 73.79634094238281, - "learning_rate": 1.9104017997521752e-06, - "log_odds_chosen": -0.06551072746515274, - "log_odds_ratio": -0.7596521973609924, - "logits/chosen": 277.48712158203125, - "logits/rejected": 287.5028991699219, - "logps/chosen": -1.3580987453460693, - "logps/rejected": -1.3178906440734863, - "loss": 1.333, - "nll_loss": 1.3939554691314697, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06790493428707123, - "rewards/margins": -0.0020104036666452885, - "rewards/rejected": -0.06589453667402267, - "step": 685 - }, - { - "epoch": 0.5145413870246085, - "grad_norm": 35.4735107421875, - "learning_rate": 1.9034674690672024e-06, - "log_odds_chosen": 0.826036810874939, - "log_odds_ratio": -0.5864019393920898, - "logits/chosen": 404.75860595703125, - "logits/rejected": 265.9111022949219, - "logps/chosen": -1.1555328369140625, - "logps/rejected": -1.720414400100708, - "loss": 1.4246, - "nll_loss": 1.5269211530685425, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05777664855122566, - "rewards/margins": 0.028244078159332275, - "rewards/rejected": -0.08602072298526764, - "step": 690 - }, - { - "epoch": 0.5182699478001491, - "grad_norm": 66.41799926757812, - "learning_rate": 1.8966081045272043e-06, - "log_odds_chosen": 0.6990260481834412, - "log_odds_ratio": -0.4625016152858734, - "logits/chosen": 255.0375518798828, - "logits/rejected": 315.13494873046875, - "logps/chosen": -1.0141682624816895, - "logps/rejected": -1.3992934226989746, - "loss": 1.3664, - "nll_loss": 1.0842437744140625, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05070841312408447, - "rewards/margins": 0.01925625652074814, - "rewards/rejected": -0.06996466964483261, - "step": 695 - }, - { - "epoch": 0.5219985085756897, - "grad_norm": 37.180965423583984, - "learning_rate": 1.8898223650461362e-06, - "log_odds_chosen": 0.5215853452682495, - "log_odds_ratio": -0.5324159264564514, - "logits/chosen": 356.8740539550781, - "logits/rejected": 266.0075988769531, - "logps/chosen": -0.6164702773094177, - "logps/rejected": -0.7757848501205444, - "loss": 1.4405, - "nll_loss": 1.3255054950714111, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.030823510140180588, - "rewards/margins": 0.00796573143452406, - "rewards/rejected": -0.03878924995660782, - "step": 700 - }, - { - "epoch": 0.5257270693512305, - "grad_norm": 36.878074645996094, - "learning_rate": 1.8831089428867739e-06, - "log_odds_chosen": 0.4977583885192871, - "log_odds_ratio": -0.6600570678710938, - "logits/chosen": 279.70587158203125, - "logits/rejected": 428.71612548828125, - "logps/chosen": -1.31363844871521, - "logps/rejected": -1.8152077198028564, - "loss": 1.3612, - "nll_loss": 1.3942934274673462, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06568192690610886, - "rewards/margins": 0.025078460574150085, - "rewards/rejected": -0.09076038748025894, - "step": 705 - }, - { - "epoch": 0.5294556301267711, - "grad_norm": 42.2510986328125, - "learning_rate": 1.876466562602004e-06, - "log_odds_chosen": 1.1870137453079224, - "log_odds_ratio": -0.4820845127105713, - "logits/chosen": 363.2850036621094, - "logits/rejected": 270.81231689453125, - "logps/chosen": -1.0144596099853516, - "logps/rejected": -2.0093092918395996, - "loss": 1.4128, - "nll_loss": 1.5129729509353638, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05072297900915146, - "rewards/margins": 0.04974249377846718, - "rewards/rejected": -0.10046547651290894, - "step": 710 - }, - { - "epoch": 0.5331841909023117, - "grad_norm": 87.8765869140625, - "learning_rate": 1.8698939800169145e-06, - "log_odds_chosen": 0.08628182858228683, - "log_odds_ratio": -0.6681681871414185, - "logits/chosen": 300.84539794921875, - "logits/rejected": 297.59576416015625, - "logps/chosen": -1.0929430723190308, - "logps/rejected": -1.140328288078308, - "loss": 1.4071, - "nll_loss": 1.3067317008972168, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05464714765548706, - "rewards/margins": 0.002369265304878354, - "rewards/rejected": -0.057016413658857346, - "step": 715 - }, - { - "epoch": 0.5369127516778524, - "grad_norm": 44.11328887939453, - "learning_rate": 1.863389981249825e-06, - "log_odds_chosen": -0.17118872702121735, - "log_odds_ratio": -0.8053637742996216, - "logits/chosen": 326.38372802734375, - "logits/rejected": 331.0024108886719, - "logps/chosen": -1.4682586193084717, - "logps/rejected": -1.3386746644973755, - "loss": 1.5342, - "nll_loss": 1.6988483667373657, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.0734129324555397, - "rewards/margins": -0.006479197181761265, - "rewards/rejected": -0.06693373620510101, - "step": 720 - }, - { - "epoch": 0.540641312453393, - "grad_norm": 44.91902160644531, - "learning_rate": 1.8569533817705187e-06, - "log_odds_chosen": 0.2976382076740265, - "log_odds_ratio": -0.6181797385215759, - "logits/chosen": 294.4140625, - "logits/rejected": 302.21771240234375, - "logps/chosen": -0.9882805943489075, - "logps/rejected": -1.1811177730560303, - "loss": 1.5729, - "nll_loss": 1.4958490133285522, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.049414027482271194, - "rewards/margins": 0.009641863405704498, - "rewards/rejected": -0.059055887162685394, - "step": 725 - }, - { - "epoch": 0.5443698732289336, - "grad_norm": 29.04867172241211, - "learning_rate": 1.8505830254940132e-06, - "log_odds_chosen": -0.19224703311920166, - "log_odds_ratio": -0.807803750038147, - "logits/chosen": 273.5694274902344, - "logits/rejected": 393.3309020996094, - "logps/chosen": -1.3325914144515991, - "logps/rejected": -1.2068517208099365, - "loss": 1.4367, - "nll_loss": 1.4846899509429932, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.0666295737028122, - "rewards/margins": -0.006286990828812122, - "rewards/rejected": -0.060342587530612946, - "step": 730 - }, - { - "epoch": 0.5480984340044742, - "grad_norm": 39.93384552001953, - "learning_rate": 1.8442777839082938e-06, - "log_odds_chosen": 0.4540717601776123, - "log_odds_ratio": -0.5996807813644409, - "logits/chosen": 266.80950927734375, - "logits/rejected": 307.60614013671875, - "logps/chosen": -1.0521700382232666, - "logps/rejected": -1.2652854919433594, - "loss": 1.5859, - "nll_loss": 1.328599452972412, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05260850116610527, - "rewards/margins": 0.010655772872269154, - "rewards/rejected": -0.06326427310705185, - "step": 735 - }, - { - "epoch": 0.551826994780015, - "grad_norm": 44.01530075073242, - "learning_rate": 1.8380365552345197e-06, - "log_odds_chosen": -0.20413148403167725, - "log_odds_ratio": -0.8123435974121094, - "logits/chosen": 359.1883544921875, - "logits/rejected": 277.4364013671875, - "logps/chosen": -1.334236741065979, - "logps/rejected": -1.1840208768844604, - "loss": 1.6695, - "nll_loss": 1.6883920431137085, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06671183556318283, - "rewards/margins": -0.007510791532695293, - "rewards/rejected": -0.05920104309916496, - "step": 740 - }, - { - "epoch": 0.5555555555555556, - "grad_norm": 35.47201156616211, - "learning_rate": 1.8318582636182793e-06, - "log_odds_chosen": -0.3666035234928131, - "log_odds_ratio": -0.934908390045166, - "logits/chosen": 348.7666931152344, - "logits/rejected": 264.0419921875, - "logps/chosen": -1.0095113515853882, - "logps/rejected": -0.8479156494140625, - "loss": 1.5792, - "nll_loss": 1.3474349975585938, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.050475575029850006, - "rewards/margins": -0.008079791441559792, - "rewards/rejected": -0.04239577800035477, - "step": 745 - }, - { - "epoch": 0.5592841163310962, - "grad_norm": 28.068103790283203, - "learning_rate": 1.8257418583505536e-06, - "log_odds_chosen": 0.24572262167930603, - "log_odds_ratio": -0.6043493151664734, - "logits/chosen": 305.16845703125, - "logits/rejected": 377.0613708496094, - "logps/chosen": -1.0514767169952393, - "logps/rejected": -1.208012342453003, - "loss": 1.5125, - "nll_loss": 1.3279523849487305, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05257384106516838, - "rewards/margins": 0.00782677810639143, - "rewards/rejected": -0.06040061637759209, - "step": 750 - }, - { - "epoch": 0.5630126771066368, - "grad_norm": 41.3109130859375, - "learning_rate": 1.8196863131170976e-06, - "log_odds_chosen": 0.33467787504196167, - "log_odds_ratio": -0.602263331413269, - "logits/chosen": 303.75750732421875, - "logits/rejected": 408.30877685546875, - "logps/chosen": -1.1425364017486572, - "logps/rejected": -1.3638980388641357, - "loss": 1.322, - "nll_loss": 1.3299028873443604, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05712682008743286, - "rewards/margins": 0.01106808241456747, - "rewards/rejected": -0.0681949108839035, - "step": 755 - }, - { - "epoch": 0.5667412378821775, - "grad_norm": 40.294212341308594, - "learning_rate": 1.8136906252750293e-06, - "log_odds_chosen": 0.04786134511232376, - "log_odds_ratio": -0.6870894432067871, - "logits/chosen": 322.7200622558594, - "logits/rejected": 284.36895751953125, - "logps/chosen": -0.9226962327957153, - "logps/rejected": -0.9492494463920593, - "loss": 1.2692, - "nll_loss": 1.3214387893676758, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.04613481089472771, - "rewards/margins": 0.001327660633251071, - "rewards/rejected": -0.047462474554777145, - "step": 760 - }, - { - "epoch": 0.5704697986577181, - "grad_norm": 44.73657989501953, - "learning_rate": 1.807753815155468e-06, - "log_odds_chosen": 0.29072660207748413, - "log_odds_ratio": -0.567733883857727, - "logits/chosen": 270.7079772949219, - "logits/rejected": 340.41693115234375, - "logps/chosen": -0.9397411346435547, - "logps/rejected": -1.1076202392578125, - "loss": 1.3163, - "nll_loss": 1.4030935764312744, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04698706045746803, - "rewards/margins": 0.008393961004912853, - "rewards/rejected": -0.05538101866841316, - "step": 765 - }, - { - "epoch": 0.5741983594332588, - "grad_norm": 44.78424835205078, - "learning_rate": 1.801874925391118e-06, - "log_odds_chosen": 0.42616671323776245, - "log_odds_ratio": -0.5204517245292664, - "logits/chosen": 343.5841064453125, - "logits/rejected": 296.48834228515625, - "logps/chosen": -1.2563741207122803, - "logps/rejected": -1.562455177307129, - "loss": 1.5542, - "nll_loss": 1.8364181518554688, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06281870603561401, - "rewards/margins": 0.01530405879020691, - "rewards/rejected": -0.07812275737524033, - "step": 770 - }, - { - "epoch": 0.5779269202087994, - "grad_norm": 43.74664306640625, - "learning_rate": 1.7960530202677493e-06, - "log_odds_chosen": 0.22721421718597412, - "log_odds_ratio": -0.6121419072151184, - "logits/chosen": 303.1134948730469, - "logits/rejected": 344.8284912109375, - "logps/chosen": -1.2034282684326172, - "logps/rejected": -1.3533918857574463, - "loss": 1.5502, - "nll_loss": 1.7862331867218018, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06017140671610832, - "rewards/margins": 0.007498185150325298, - "rewards/rejected": -0.06766960024833679, - "step": 775 - }, - { - "epoch": 0.5816554809843401, - "grad_norm": 57.60182189941406, - "learning_rate": 1.7902871850985824e-06, - "log_odds_chosen": -0.2535324990749359, - "log_odds_ratio": -0.8664606213569641, - "logits/chosen": 326.0122985839844, - "logits/rejected": 293.62823486328125, - "logps/chosen": -1.120538353919983, - "logps/rejected": -0.9833032488822937, - "loss": 1.471, - "nll_loss": 1.5057554244995117, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.056026916950941086, - "rewards/margins": -0.006861755158752203, - "rewards/rejected": -0.04916516691446304, - "step": 780 - }, - { - "epoch": 0.5853840417598807, - "grad_norm": 36.422813415527344, - "learning_rate": 1.7845765256206243e-06, - "log_odds_chosen": 0.36582398414611816, - "log_odds_ratio": -0.5669955015182495, - "logits/chosen": 331.7763671875, - "logits/rejected": 257.35052490234375, - "logps/chosen": -1.0402021408081055, - "logps/rejected": -1.2888065576553345, - "loss": 1.493, - "nll_loss": 1.4259154796600342, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05201010778546333, - "rewards/margins": 0.01243022084236145, - "rewards/rejected": -0.06444032490253448, - "step": 785 - }, - { - "epoch": 0.5891126025354213, - "grad_norm": 38.56682205200195, - "learning_rate": 1.7789201674120502e-06, - "log_odds_chosen": -0.07571077346801758, - "log_odds_ratio": -0.7648593187332153, - "logits/chosen": 313.2991943359375, - "logits/rejected": 279.7563781738281, - "logps/chosen": -1.0626533031463623, - "logps/rejected": -0.9896480441093445, - "loss": 1.6427, - "nll_loss": 1.4641739130020142, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053132664412260056, - "rewards/margins": -0.0036502599250525236, - "rewards/rejected": -0.04948240518569946, - "step": 790 - }, - { - "epoch": 0.5928411633109619, - "grad_norm": 61.707801818847656, - "learning_rate": 1.7733172553297718e-06, - "log_odds_chosen": 0.38354557752609253, - "log_odds_ratio": -0.6057776212692261, - "logits/chosen": 283.5621032714844, - "logits/rejected": 362.52764892578125, - "logps/chosen": -1.2894601821899414, - "logps/rejected": -1.623434066772461, - "loss": 1.5378, - "nll_loss": 1.5090787410736084, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06447301059961319, - "rewards/margins": 0.016698691993951797, - "rewards/rejected": -0.08117170631885529, - "step": 795 - }, - { - "epoch": 0.5965697240865027, - "grad_norm": 53.67112731933594, - "learning_rate": 1.7677669529663689e-06, - "log_odds_chosen": 0.5591620206832886, - "log_odds_ratio": -0.5266520977020264, - "logits/chosen": 249.25369262695312, - "logits/rejected": 347.70196533203125, - "logps/chosen": -0.740845263004303, - "logps/rejected": -1.0669397115707397, - "loss": 1.5624, - "nll_loss": 1.4592483043670654, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03704226389527321, - "rewards/margins": 0.016304723918437958, - "rewards/rejected": -0.053346991539001465, - "step": 800 - }, - { - "epoch": 0.6002982848620433, - "grad_norm": 60.729400634765625, - "learning_rate": 1.7622684421256037e-06, - "log_odds_chosen": 0.20960378646850586, - "log_odds_ratio": -0.6377035975456238, - "logits/chosen": 382.6636657714844, - "logits/rejected": 279.068603515625, - "logps/chosen": -0.9764991998672485, - "logps/rejected": -1.0392272472381592, - "loss": 1.6298, - "nll_loss": 1.3610070943832397, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04882495850324631, - "rewards/margins": 0.003136401530355215, - "rewards/rejected": -0.05196135491132736, - "step": 805 - }, - { - "epoch": 0.6040268456375839, - "grad_norm": 56.520965576171875, - "learning_rate": 1.7568209223157664e-06, - "log_odds_chosen": 0.10302247852087021, - "log_odds_ratio": -0.7827788591384888, - "logits/chosen": 240.3090362548828, - "logits/rejected": 403.95013427734375, - "logps/chosen": -0.967578113079071, - "logps/rejected": -0.9905353784561157, - "loss": 1.3787, - "nll_loss": 1.5501364469528198, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04837890714406967, - "rewards/margins": 0.0011478618253022432, - "rewards/rejected": -0.049526769667863846, - "step": 810 - }, - { - "epoch": 0.6077554064131245, - "grad_norm": 58.121559143066406, - "learning_rate": 1.751423610260147e-06, - "log_odds_chosen": 0.703899085521698, - "log_odds_ratio": -0.4190893769264221, - "logits/chosen": 265.3694763183594, - "logits/rejected": 315.39862060546875, - "logps/chosen": -1.056476354598999, - "logps/rejected": -1.5370362997055054, - "loss": 1.5646, - "nll_loss": 1.6132590770721436, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05282381922006607, - "rewards/margins": 0.024028003215789795, - "rewards/rejected": -0.07685182243585587, - "step": 815 - }, - { - "epoch": 0.6114839671886652, - "grad_norm": 76.88304138183594, - "learning_rate": 1.7460757394239458e-06, - "log_odds_chosen": 0.942089855670929, - "log_odds_ratio": -0.34855127334594727, - "logits/chosen": 318.4146423339844, - "logits/rejected": 276.064453125, - "logps/chosen": -0.6604598760604858, - "logps/rejected": -1.1219940185546875, - "loss": 1.3757, - "nll_loss": 1.1699841022491455, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03302299603819847, - "rewards/margins": 0.023076709359884262, - "rewards/rejected": -0.05609970539808273, - "step": 820 - }, - { - "epoch": 0.6152125279642058, - "grad_norm": 52.92924118041992, - "learning_rate": 1.7407765595569787e-06, - "log_odds_chosen": 0.3512675166130066, - "log_odds_ratio": -0.5422166585922241, - "logits/chosen": 390.3765563964844, - "logits/rejected": 271.755615234375, - "logps/chosen": -1.0632984638214111, - "logps/rejected": -1.299971580505371, - "loss": 1.4656, - "nll_loss": 1.2909324169158936, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05316491797566414, - "rewards/margins": 0.011833662167191505, - "rewards/rejected": -0.0649985820055008, - "step": 825 - }, - { - "epoch": 0.6189410887397464, - "grad_norm": 36.87918472290039, - "learning_rate": 1.7355253362515584e-06, - "log_odds_chosen": 0.12026530504226685, - "log_odds_ratio": -0.6763606071472168, - "logits/chosen": 260.8428955078125, - "logits/rejected": 404.7275390625, - "logps/chosen": -1.0512555837631226, - "logps/rejected": -1.1124379634857178, - "loss": 1.5174, - "nll_loss": 1.3253700733184814, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05256278067827225, - "rewards/margins": 0.0030591185204684734, - "rewards/rejected": -0.05562189966440201, - "step": 830 - }, - { - "epoch": 0.6226696495152871, - "grad_norm": 33.301212310791016, - "learning_rate": 1.7303213505149572e-06, - "log_odds_chosen": -0.16119511425495148, - "log_odds_ratio": -0.7973025441169739, - "logits/chosen": 320.09918212890625, - "logits/rejected": 457.6670837402344, - "logps/chosen": -1.1876848936080933, - "logps/rejected": -1.0549150705337524, - "loss": 1.5318, - "nll_loss": 1.6052442789077759, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05938424542546272, - "rewards/margins": -0.00663848826661706, - "rewards/rejected": -0.0527457594871521, - "step": 835 - }, - { - "epoch": 0.6263982102908278, - "grad_norm": 45.2703742980957, - "learning_rate": 1.7251638983558855e-06, - "log_odds_chosen": 0.07661239802837372, - "log_odds_ratio": -0.670253574848175, - "logits/chosen": 285.00244140625, - "logits/rejected": 322.8631286621094, - "logps/chosen": -1.1225502490997314, - "logps/rejected": -1.1541229486465454, - "loss": 1.4992, - "nll_loss": 1.5679997205734253, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05612751096487045, - "rewards/margins": 0.0015786353033035994, - "rewards/rejected": -0.05770614743232727, - "step": 840 - }, - { - "epoch": 0.6301267710663684, - "grad_norm": 25.487768173217773, - "learning_rate": 1.7200522903844539e-06, - "log_odds_chosen": -0.07646378129720688, - "log_odds_ratio": -0.8076605796813965, - "logits/chosen": 305.77423095703125, - "logits/rejected": 266.5284423828125, - "logps/chosen": -1.1652753353118896, - "logps/rejected": -1.1281827688217163, - "loss": 1.3441, - "nll_loss": 1.441038727760315, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.058263760060071945, - "rewards/margins": -0.0018546171486377716, - "rewards/rejected": -0.056409142911434174, - "step": 845 - }, - { - "epoch": 0.633855331841909, - "grad_norm": 56.31645965576172, - "learning_rate": 1.7149858514250883e-06, - "log_odds_chosen": 1.665338158607483, - "log_odds_ratio": -0.28545942902565, - "logits/chosen": 301.8177490234375, - "logits/rejected": 302.3929443359375, - "logps/chosen": -1.045939326286316, - "logps/rejected": -2.4172801971435547, - "loss": 1.3983, - "nll_loss": 1.2774924039840698, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.052296966314315796, - "rewards/margins": 0.06856702268123627, - "rewards/rejected": -0.12086399644613266, - "step": 850 - }, - { - "epoch": 0.6375838926174496, - "grad_norm": 54.15258026123047, - "learning_rate": 1.7099639201419239e-06, - "log_odds_chosen": 0.3978537917137146, - "log_odds_ratio": -0.5233870148658752, - "logits/chosen": 348.6756286621094, - "logits/rejected": 299.1526184082031, - "logps/chosen": -1.1191155910491943, - "logps/rejected": -1.398201584815979, - "loss": 1.4381, - "nll_loss": 1.2286999225616455, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.055955786257982254, - "rewards/margins": 0.013954291120171547, - "rewards/rejected": -0.06991007924079895, - "step": 855 - }, - { - "epoch": 0.6413124533929903, - "grad_norm": 33.70398712158203, - "learning_rate": 1.704985848676184e-06, - "log_odds_chosen": 0.06520811468362808, - "log_odds_ratio": -0.6703125834465027, - "logits/chosen": 285.54254150390625, - "logits/rejected": 374.1261901855469, - "logps/chosen": -1.068627119064331, - "logps/rejected": -1.1003258228302002, - "loss": 1.5435, - "nll_loss": 1.349015474319458, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05343135446310043, - "rewards/margins": 0.0015849340707063675, - "rewards/rejected": -0.05501629039645195, - "step": 860 - }, - { - "epoch": 0.645041014168531, - "grad_norm": 46.971187591552734, - "learning_rate": 1.700051002295115e-06, - "log_odds_chosen": 0.3869665861129761, - "log_odds_ratio": -0.5489532947540283, - "logits/chosen": 280.66497802734375, - "logits/rejected": 277.3331604003906, - "logps/chosen": -1.2994598150253296, - "logps/rejected": -1.5973399877548218, - "loss": 1.6026, - "nll_loss": 1.5630300045013428, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06497299671173096, - "rewards/margins": 0.01489400863647461, - "rewards/rejected": -0.07986699789762497, - "step": 865 - }, - { - "epoch": 0.6487695749440716, - "grad_norm": 42.03346633911133, - "learning_rate": 1.6951587590520263e-06, - "log_odds_chosen": 0.4487427771091461, - "log_odds_ratio": -0.5255690217018127, - "logits/chosen": 393.89581298828125, - "logits/rejected": 318.9569396972656, - "logps/chosen": -0.7929225564002991, - "logps/rejected": -1.055435061454773, - "loss": 1.3457, - "nll_loss": 1.1510719060897827, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.039646126329898834, - "rewards/margins": 0.013125626370310783, - "rewards/rejected": -0.05277175456285477, - "step": 870 - }, - { - "epoch": 0.6524981357196122, - "grad_norm": 45.03654861450195, - "learning_rate": 1.6903085094570331e-06, - "log_odds_chosen": 0.06261751800775528, - "log_odds_ratio": -0.7041198015213013, - "logits/chosen": 304.47100830078125, - "logits/rejected": 368.55584716796875, - "logps/chosen": -1.4198213815689087, - "logps/rejected": -1.4638946056365967, - "loss": 1.4738, - "nll_loss": 1.5588788986206055, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.07099107652902603, - "rewards/margins": 0.0022036649752408266, - "rewards/rejected": -0.07319473475217819, - "step": 875 - }, - { - "epoch": 0.6562266964951529, - "grad_norm": 52.14153289794922, - "learning_rate": 1.6854996561581053e-06, - "log_odds_chosen": 0.7895190119743347, - "log_odds_ratio": -0.4148394465446472, - "logits/chosen": 359.88897705078125, - "logits/rejected": 320.61981201171875, - "logps/chosen": -1.1623995304107666, - "logps/rejected": -1.6637179851531982, - "loss": 1.6622, - "nll_loss": 2.17972993850708, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05811998248100281, - "rewards/margins": 0.025065923109650612, - "rewards/rejected": -0.08318589627742767, - "step": 880 - }, - { - "epoch": 0.6599552572706935, - "grad_norm": 78.20367431640625, - "learning_rate": 1.680731613632036e-06, - "log_odds_chosen": -0.02199208177626133, - "log_odds_ratio": -0.7950271368026733, - "logits/chosen": 303.8291320800781, - "logits/rejected": 326.6224060058594, - "logps/chosen": -1.3236578702926636, - "logps/rejected": -1.2975791692733765, - "loss": 1.5618, - "nll_loss": 1.8742249011993408, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06618289649486542, - "rewards/margins": -0.0013039376353845, - "rewards/rejected": -0.06487895548343658, - "step": 885 - }, - { - "epoch": 0.6636838180462341, - "grad_norm": 35.67908477783203, - "learning_rate": 1.6760038078849776e-06, - "log_odds_chosen": 0.7553747892379761, - "log_odds_ratio": -0.48680806159973145, - "logits/chosen": 320.37579345703125, - "logits/rejected": 313.5995178222656, - "logps/chosen": -0.8530148267745972, - "logps/rejected": -1.1114470958709717, - "loss": 1.3765, - "nll_loss": 1.3643596172332764, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0426507368683815, - "rewards/margins": 0.012921608984470367, - "rewards/rejected": -0.055572349578142166, - "step": 890 - }, - { - "epoch": 0.6674123788217748, - "grad_norm": 50.42169189453125, - "learning_rate": 1.6713156761621891e-06, - "log_odds_chosen": 0.2919197678565979, - "log_odds_ratio": -0.5754855275154114, - "logits/chosen": 285.0351257324219, - "logits/rejected": 268.43182373046875, - "logps/chosen": -0.8414399027824402, - "logps/rejected": -1.0067099332809448, - "loss": 1.3879, - "nll_loss": 1.085858941078186, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04207199439406395, - "rewards/margins": 0.008263500407338142, - "rewards/rejected": -0.05033549666404724, - "step": 895 - }, - { - "epoch": 0.6711409395973155, - "grad_norm": 52.282875061035156, - "learning_rate": 1.6666666666666667e-06, - "log_odds_chosen": 0.3528556227684021, - "log_odds_ratio": -0.564329981803894, - "logits/chosen": 338.33135986328125, - "logits/rejected": 290.14764404296875, - "logps/chosen": -1.236096978187561, - "logps/rejected": -1.4558098316192627, - "loss": 1.4841, - "nll_loss": 1.5852864980697632, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06180485337972641, - "rewards/margins": 0.01098563987761736, - "rewards/rejected": -0.07279049605131149, - "step": 900 - }, - { - "epoch": 0.6748695003728561, - "grad_norm": 42.157962799072266, - "learning_rate": 1.6620562382863342e-06, - "log_odds_chosen": 0.5405559539794922, - "log_odds_ratio": -0.4779587388038635, - "logits/chosen": 242.44210815429688, - "logits/rejected": 298.4053955078125, - "logps/chosen": -0.9968141317367554, - "logps/rejected": -1.3804486989974976, - "loss": 1.5401, - "nll_loss": 1.620736837387085, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04984070733189583, - "rewards/margins": 0.019181735813617706, - "rewards/rejected": -0.06902243942022324, - "step": 905 - }, - { - "epoch": 0.6785980611483967, - "grad_norm": 41.22214126586914, - "learning_rate": 1.6574838603294898e-06, - "log_odds_chosen": 0.4839659631252289, - "log_odds_ratio": -0.5105474591255188, - "logits/chosen": 322.82781982421875, - "logits/rejected": 272.53094482421875, - "logps/chosen": -1.091977834701538, - "logps/rejected": -1.3571361303329468, - "loss": 1.4944, - "nll_loss": 1.678820252418518, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.054598886519670486, - "rewards/margins": 0.013257920742034912, - "rewards/rejected": -0.0678568109869957, - "step": 910 - }, - { - "epoch": 0.6823266219239373, - "grad_norm": 49.313926696777344, - "learning_rate": 1.6529490122682157e-06, - "log_odds_chosen": -0.11443217843770981, - "log_odds_ratio": -0.7687914371490479, - "logits/chosen": 253.67044067382812, - "logits/rejected": 294.2286376953125, - "logps/chosen": -1.0927972793579102, - "logps/rejected": -0.9589353799819946, - "loss": 1.5325, - "nll_loss": 1.8832004070281982, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05463986471295357, - "rewards/margins": -0.0066930977627635, - "rewards/rejected": -0.04794676974415779, - "step": 915 - }, - { - "epoch": 0.686055182699478, - "grad_norm": 59.40565490722656, - "learning_rate": 1.648451183489468e-06, - "log_odds_chosen": -0.588416576385498, - "log_odds_ratio": -1.1371772289276123, - "logits/chosen": 270.09844970703125, - "logits/rejected": 304.44219970703125, - "logps/chosen": -1.1386425495147705, - "logps/rejected": -0.8333494067192078, - "loss": 1.3983, - "nll_loss": 1.591930627822876, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05693213269114494, - "rewards/margins": -0.015264660120010376, - "rewards/rejected": -0.04166747257113457, - "step": 920 - }, - { - "epoch": 0.6897837434750187, - "grad_norm": 54.09523391723633, - "learning_rate": 1.643989873053573e-06, - "log_odds_chosen": 0.3963429927825928, - "log_odds_ratio": -0.6805258989334106, - "logits/chosen": 298.8218688964844, - "logits/rejected": 268.6084289550781, - "logps/chosen": -0.8265215754508972, - "logps/rejected": -0.8585597276687622, - "loss": 1.6125, - "nll_loss": 1.461158037185669, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04132607951760292, - "rewards/margins": 0.0016019083559513092, - "rewards/rejected": -0.04292798787355423, - "step": 925 - }, - { - "epoch": 0.6935123042505593, - "grad_norm": 61.90615463256836, - "learning_rate": 1.6395645894598825e-06, - "log_odds_chosen": 0.22568130493164062, - "log_odds_ratio": -0.6447573900222778, - "logits/chosen": 300.03546142578125, - "logits/rejected": 299.1715087890625, - "logps/chosen": -1.2759240865707397, - "logps/rejected": -1.4430197477340698, - "loss": 1.4714, - "nll_loss": 1.6073062419891357, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06379620730876923, - "rewards/margins": 0.008354783058166504, - "rewards/rejected": -0.07215099036693573, - "step": 930 - }, - { - "epoch": 0.6972408650260999, - "grad_norm": 56.264984130859375, - "learning_rate": 1.6351748504193218e-06, - "log_odds_chosen": 0.1340426206588745, - "log_odds_ratio": -0.6875895857810974, - "logits/chosen": 349.20184326171875, - "logits/rejected": 286.81524658203125, - "logps/chosen": -0.9498621225357056, - "logps/rejected": -1.0197668075561523, - "loss": 1.3205, - "nll_loss": 1.2776739597320557, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0474931076169014, - "rewards/margins": 0.0034952356945723295, - "rewards/rejected": -0.0509883388876915, - "step": 935 - }, - { - "epoch": 0.7009694258016406, - "grad_norm": 46.958274841308594, - "learning_rate": 1.6308201826336057e-06, - "log_odds_chosen": 0.2847537696361542, - "log_odds_ratio": -0.5769249796867371, - "logits/chosen": 321.4469299316406, - "logits/rejected": 264.03448486328125, - "logps/chosen": -1.0103200674057007, - "logps/rejected": -1.2143007516860962, - "loss": 1.6778, - "nll_loss": 1.7304160594940186, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05051600933074951, - "rewards/margins": 0.010199028067290783, - "rewards/rejected": -0.06071503087878227, - "step": 940 - }, - { - "epoch": 0.7046979865771812, - "grad_norm": 42.902793884277344, - "learning_rate": 1.6265001215808888e-06, - "log_odds_chosen": 0.827401340007782, - "log_odds_ratio": -0.4402908384799957, - "logits/chosen": 285.54595947265625, - "logits/rejected": 351.05450439453125, - "logps/chosen": -1.2171316146850586, - "logps/rejected": -1.760729193687439, - "loss": 1.4834, - "nll_loss": 1.3298847675323486, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06085658073425293, - "rewards/margins": 0.027179885655641556, - "rewards/rejected": -0.08803646266460419, - "step": 945 - }, - { - "epoch": 0.7084265473527218, - "grad_norm": 55.1160774230957, - "learning_rate": 1.6222142113076255e-06, - "log_odds_chosen": 0.4405880868434906, - "log_odds_ratio": -0.508270800113678, - "logits/chosen": 287.17529296875, - "logits/rejected": 368.376708984375, - "logps/chosen": -1.1966034173965454, - "logps/rejected": -1.4957139492034912, - "loss": 1.7038, - "nll_loss": 1.6129255294799805, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05983016639947891, - "rewards/margins": 0.014955529943108559, - "rewards/rejected": -0.07478569447994232, - "step": 950 - }, - { - "epoch": 0.7121551081282624, - "grad_norm": 53.026771545410156, - "learning_rate": 1.617962004226434e-06, - "log_odds_chosen": 0.4412068724632263, - "log_odds_ratio": -0.5039135217666626, - "logits/chosen": 346.5382080078125, - "logits/rejected": 321.6978454589844, - "logps/chosen": -0.9980529546737671, - "logps/rejected": -1.2747349739074707, - "loss": 1.5233, - "nll_loss": 1.7366981506347656, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04990265145897865, - "rewards/margins": 0.013834101147949696, - "rewards/rejected": -0.06373675167560577, - "step": 955 - }, - { - "epoch": 0.7158836689038032, - "grad_norm": 66.82451629638672, - "learning_rate": 1.6137430609197571e-06, - "log_odds_chosen": -0.491224467754364, - "log_odds_ratio": -1.00169038772583, - "logits/chosen": 261.90771484375, - "logits/rejected": 277.78155517578125, - "logps/chosen": -1.3195855617523193, - "logps/rejected": -0.9671088457107544, - "loss": 1.3866, - "nll_loss": 1.5242819786071777, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06597927957773209, - "rewards/margins": -0.017623836174607277, - "rewards/rejected": -0.04835544154047966, - "step": 960 - }, - { - "epoch": 0.7196122296793438, - "grad_norm": 41.259483337402344, - "learning_rate": 1.6095569499491263e-06, - "log_odds_chosen": -0.757089376449585, - "log_odds_ratio": -1.2757459878921509, - "logits/chosen": 302.40789794921875, - "logits/rejected": 278.52838134765625, - "logps/chosen": -1.2257049083709717, - "logps/rejected": -0.8643633127212524, - "loss": 1.6155, - "nll_loss": 1.7133779525756836, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06128524988889694, - "rewards/margins": -0.01806708797812462, - "rewards/rejected": -0.04321816563606262, - "step": 965 - }, - { - "epoch": 0.7233407904548844, - "grad_norm": 44.51772689819336, - "learning_rate": 1.605403247669839e-06, - "log_odds_chosen": 0.7628762722015381, - "log_odds_ratio": -0.5448654294013977, - "logits/chosen": 301.1988830566406, - "logits/rejected": 338.5502014160156, - "logps/chosen": -0.9106384515762329, - "logps/rejected": -1.422669768333435, - "loss": 1.3305, - "nll_loss": 1.5777578353881836, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.045531924813985825, - "rewards/margins": 0.025601565837860107, - "rewards/rejected": -0.07113349437713623, - "step": 970 - }, - { - "epoch": 0.727069351230425, - "grad_norm": 30.294321060180664, - "learning_rate": 1.6012815380508715e-06, - "log_odds_chosen": 0.4624800682067871, - "log_odds_ratio": -0.5589762330055237, - "logits/chosen": 282.449951171875, - "logits/rejected": 274.6116027832031, - "logps/chosen": -0.8997858166694641, - "logps/rejected": -1.2427423000335693, - "loss": 1.6616, - "nll_loss": 1.8807052373886108, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.044989295303821564, - "rewards/margins": 0.01714782603085041, - "rewards/rejected": -0.062137115746736526, - "step": 975 - }, - { - "epoch": 0.7307979120059657, - "grad_norm": 34.38119888305664, - "learning_rate": 1.59719141249985e-06, - "log_odds_chosen": -0.4235716462135315, - "log_odds_ratio": -0.9345256090164185, - "logits/chosen": 306.9768981933594, - "logits/rejected": 280.8311767578125, - "logps/chosen": -1.0978089570999146, - "logps/rejected": -0.8761543035507202, - "loss": 1.497, - "nll_loss": 1.8366124629974365, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.05489044636487961, - "rewards/margins": -0.011082729324698448, - "rewards/rejected": -0.04380771517753601, - "step": 980 - }, - { - "epoch": 0.7345264727815063, - "grad_norm": 35.161888122558594, - "learning_rate": 1.5931324696929157e-06, - "log_odds_chosen": 0.2983691692352295, - "log_odds_ratio": -0.5613266229629517, - "logits/chosen": 278.8953857421875, - "logits/rejected": 265.04840087890625, - "logps/chosen": -1.1326143741607666, - "logps/rejected": -1.3249703645706177, - "loss": 1.4658, - "nll_loss": 1.419944405555725, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05663071945309639, - "rewards/margins": 0.009617794305086136, - "rewards/rejected": -0.06624852120876312, - "step": 985 - }, - { - "epoch": 0.738255033557047, - "grad_norm": 37.08230972290039, - "learning_rate": 1.5891043154093205e-06, - "log_odds_chosen": -0.06146463006734848, - "log_odds_ratio": -0.7437344789505005, - "logits/chosen": 266.5087890625, - "logits/rejected": 298.4480895996094, - "logps/chosen": -1.1592657566070557, - "logps/rejected": -1.1337125301361084, - "loss": 1.5438, - "nll_loss": 1.2325575351715088, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.057963281869888306, - "rewards/margins": -0.0012776597868651152, - "rewards/rejected": -0.05668563395738602, - "step": 990 - }, - { - "epoch": 0.7419835943325876, - "grad_norm": 38.726905822753906, - "learning_rate": 1.5851065623706038e-06, - "log_odds_chosen": 0.4816429615020752, - "log_odds_ratio": -0.5086499452590942, - "logits/chosen": 244.3563690185547, - "logits/rejected": 281.7378234863281, - "logps/chosen": -0.7211805582046509, - "logps/rejected": -1.0412851572036743, - "loss": 1.3907, - "nll_loss": 1.1479618549346924, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03605902940034866, - "rewards/margins": 0.016005229204893112, - "rewards/rejected": -0.052064258605241776, - "step": 995 - }, - { - "epoch": 0.7457121551081283, - "grad_norm": 33.784637451171875, - "learning_rate": 1.5811388300841898e-06, - "log_odds_chosen": 0.4328362047672272, - "log_odds_ratio": -0.5592437982559204, - "logits/chosen": 276.10150146484375, - "logits/rejected": 346.55487060546875, - "logps/chosen": -1.157006025314331, - "logps/rejected": -1.4446651935577393, - "loss": 1.3176, - "nll_loss": 1.5155082941055298, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05785030871629715, - "rewards/margins": 0.014382955618202686, - "rewards/rejected": -0.07223325967788696, - "step": 1000 - }, - { - "epoch": 0.7494407158836689, - "grad_norm": 69.28606414794922, - "learning_rate": 1.5772007446912793e-06, - "log_odds_chosen": -0.6843798160552979, - "log_odds_ratio": -1.1197946071624756, - "logits/chosen": 307.5853576660156, - "logits/rejected": 245.27847290039062, - "logps/chosen": -1.3067998886108398, - "logps/rejected": -0.884222149848938, - "loss": 1.5347, - "nll_loss": 1.5978238582611084, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.06533998996019363, - "rewards/margins": -0.021128883585333824, - "rewards/rejected": -0.04421110451221466, - "step": 1005 - }, - { - "epoch": 0.7531692766592095, - "grad_norm": 52.32899475097656, - "learning_rate": 1.5732919388188816e-06, - "log_odds_chosen": -0.012659728527069092, - "log_odds_ratio": -0.7548945546150208, - "logits/chosen": 338.98590087890625, - "logits/rejected": 309.00677490234375, - "logps/chosen": -1.2037309408187866, - "logps/rejected": -1.1854820251464844, - "loss": 1.4881, - "nll_loss": 1.628455400466919, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.06018654629588127, - "rewards/margins": -0.0009124472853727639, - "rewards/rejected": -0.0592740997672081, - "step": 1010 - }, - { - "epoch": 0.7568978374347501, - "grad_norm": 40.6379508972168, - "learning_rate": 1.5694120514358613e-06, - "log_odds_chosen": -0.006588673684746027, - "log_odds_ratio": -0.7242330312728882, - "logits/chosen": 273.86895751953125, - "logits/rejected": 272.7266540527344, - "logps/chosen": -1.2961971759796143, - "logps/rejected": -1.3301607370376587, - "loss": 1.5657, - "nll_loss": 1.6446641683578491, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06480986624956131, - "rewards/margins": 0.0016981728840619326, - "rewards/rejected": -0.06650803983211517, - "step": 1015 - }, - { - "epoch": 0.7606263982102909, - "grad_norm": 41.96585464477539, - "learning_rate": 1.565560727712874e-06, - "log_odds_chosen": 1.1196025609970093, - "log_odds_ratio": -0.44418764114379883, - "logits/chosen": 333.15814208984375, - "logits/rejected": 312.60565185546875, - "logps/chosen": -1.3886038064956665, - "logps/rejected": -2.351370096206665, - "loss": 1.342, - "nll_loss": 1.4324030876159668, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06943019479513168, - "rewards/margins": 0.04813830181956291, - "rewards/rejected": -0.11756850779056549, - "step": 1020 - }, - { - "epoch": 0.7643549589858315, - "grad_norm": 63.37541961669922, - "learning_rate": 1.561737618886061e-06, - "log_odds_chosen": 0.2972729802131653, - "log_odds_ratio": -0.6164146661758423, - "logits/chosen": 376.42303466796875, - "logits/rejected": 304.6986999511719, - "logps/chosen": -0.9759232401847839, - "logps/rejected": -1.1585075855255127, - "loss": 1.6394, - "nll_loss": 1.5050007104873657, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.048796165734529495, - "rewards/margins": 0.009129209443926811, - "rewards/rejected": -0.05792537331581116, - "step": 1025 - }, - { - "epoch": 0.7680835197613721, - "grad_norm": 56.96404266357422, - "learning_rate": 1.5579423821243897e-06, - "log_odds_chosen": 0.11918880045413971, - "log_odds_ratio": -0.6956043243408203, - "logits/chosen": 353.91241455078125, - "logits/rejected": 333.98687744140625, - "logps/chosen": -1.0623157024383545, - "logps/rejected": -1.044559121131897, - "loss": 1.4902, - "nll_loss": 1.4832748174667358, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.053115785121917725, - "rewards/margins": -0.0008878243970684707, - "rewards/rejected": -0.052227962762117386, - "step": 1030 - }, - { - "epoch": 0.7718120805369127, - "grad_norm": 45.98897933959961, - "learning_rate": 1.554174680400523e-06, - "log_odds_chosen": 0.01295551098883152, - "log_odds_ratio": -0.6930908560752869, - "logits/chosen": 275.81512451171875, - "logits/rejected": 285.530517578125, - "logps/chosen": -1.1258041858673096, - "logps/rejected": -1.117311716079712, - "loss": 1.5803, - "nll_loss": 1.3986682891845703, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05629020929336548, - "rewards/margins": -0.0004246201424393803, - "rewards/rejected": -0.055865585803985596, - "step": 1035 - }, - { - "epoch": 0.7755406413124534, - "grad_norm": 30.511985778808594, - "learning_rate": 1.5504341823651056e-06, - "log_odds_chosen": 0.5323464870452881, - "log_odds_ratio": -0.5798054337501526, - "logits/chosen": 271.96026611328125, - "logits/rejected": 338.9732971191406, - "logps/chosen": -1.0563971996307373, - "logps/rejected": -1.4582374095916748, - "loss": 1.3906, - "nll_loss": 1.427484154701233, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.052819859236478806, - "rewards/margins": 0.020092012360692024, - "rewards/rejected": -0.07291187345981598, - "step": 1040 - }, - { - "epoch": 0.779269202087994, - "grad_norm": 40.224727630615234, - "learning_rate": 1.546720562224365e-06, - "log_odds_chosen": 0.24499432742595673, - "log_odds_ratio": -0.6468842029571533, - "logits/chosen": 272.1739196777344, - "logits/rejected": 302.99169921875, - "logps/chosen": -0.9424095153808594, - "logps/rejected": -1.1509238481521606, - "loss": 1.3696, - "nll_loss": 1.123616337776184, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04712047800421715, - "rewards/margins": 0.010425711050629616, - "rewards/rejected": -0.05754619091749191, - "step": 1045 - }, - { - "epoch": 0.7829977628635347, - "grad_norm": 70.58052062988281, - "learning_rate": 1.5430334996209192e-06, - "log_odds_chosen": 0.30246010422706604, - "log_odds_ratio": -0.7540239691734314, - "logits/chosen": 326.69598388671875, - "logits/rejected": 298.440185546875, - "logps/chosen": -0.732296347618103, - "logps/rejected": -0.9012425541877747, - "loss": 1.456, - "nll_loss": 1.3398606777191162, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.03661481663584709, - "rewards/margins": 0.008447309955954552, - "rewards/rejected": -0.045062124729156494, - "step": 1050 - }, - { - "epoch": 0.7867263236390754, - "grad_norm": 39.13371658325195, - "learning_rate": 1.539372679517698e-06, - "log_odds_chosen": -0.20822691917419434, - "log_odds_ratio": -0.9284356832504272, - "logits/chosen": 385.7300109863281, - "logits/rejected": 252.3714141845703, - "logps/chosen": -1.0195338726043701, - "logps/rejected": -0.94868004322052, - "loss": 1.42, - "nll_loss": 1.3612464666366577, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0509767010807991, - "rewards/margins": -0.0035426937974989414, - "rewards/rejected": -0.047433994710445404, - "step": 1055 - }, - { - "epoch": 0.790454884414616, - "grad_norm": 48.92668533325195, - "learning_rate": 1.5357377920848783e-06, - "log_odds_chosen": -0.8790766596794128, - "log_odds_ratio": -1.3677198886871338, - "logits/chosen": 298.668701171875, - "logits/rejected": 257.3388977050781, - "logps/chosen": -1.9107418060302734, - "logps/rejected": -1.1696285009384155, - "loss": 1.5434, - "nll_loss": 1.5949474573135376, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.09553708881139755, - "rewards/margins": -0.03705566003918648, - "rewards/rejected": -0.05848143249750137, - "step": 1060 - }, - { - "epoch": 0.7941834451901566, - "grad_norm": 42.43621826171875, - "learning_rate": 1.532128532589739e-06, - "log_odds_chosen": 0.49294376373291016, - "log_odds_ratio": -0.5396267175674438, - "logits/chosen": 295.39984130859375, - "logits/rejected": 306.10968017578125, - "logps/chosen": -0.6909875869750977, - "logps/rejected": -0.9469335675239563, - "loss": 1.348, - "nll_loss": 1.300163984298706, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03454938158392906, - "rewards/margins": 0.012797298841178417, - "rewards/rejected": -0.047346677631139755, - "step": 1065 - }, - { - "epoch": 0.7979120059656972, - "grad_norm": 38.49888229370117, - "learning_rate": 1.5285446012893579e-06, - "log_odds_chosen": 0.2575642168521881, - "log_odds_ratio": -0.7339036464691162, - "logits/chosen": 258.6667785644531, - "logits/rejected": 355.6258239746094, - "logps/chosen": -1.1392613649368286, - "logps/rejected": -1.367376446723938, - "loss": 1.6348, - "nll_loss": 1.3361315727233887, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05696306750178337, - "rewards/margins": 0.011405754834413528, - "rewards/rejected": -0.0683688223361969, - "step": 1070 - }, - { - "epoch": 0.8016405667412378, - "grad_norm": 68.9556655883789, - "learning_rate": 1.5249857033260468e-06, - "log_odds_chosen": 0.042417120188474655, - "log_odds_ratio": -0.7042632102966309, - "logits/chosen": 281.87432861328125, - "logits/rejected": 349.04766845703125, - "logps/chosen": -1.3799294233322144, - "logps/rejected": -1.4671729803085327, - "loss": 1.6201, - "nll_loss": 1.6090809106826782, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06899647414684296, - "rewards/margins": 0.004362170584499836, - "rewards/rejected": -0.07335864752531052, - "step": 1075 - }, - { - "epoch": 0.8053691275167785, - "grad_norm": 48.29362487792969, - "learning_rate": 1.5214515486254614e-06, - "log_odds_chosen": -0.0832916721701622, - "log_odds_ratio": -0.7588063478469849, - "logits/chosen": 247.9734649658203, - "logits/rejected": 289.4851379394531, - "logps/chosen": -1.2415721416473389, - "logps/rejected": -1.2067878246307373, - "loss": 1.4512, - "nll_loss": 1.564984679222107, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06207861378788948, - "rewards/margins": -0.0017392225563526154, - "rewards/rejected": -0.060339391231536865, - "step": 1080 - }, - { - "epoch": 0.8090976882923192, - "grad_norm": 51.412437438964844, - "learning_rate": 1.517941851797291e-06, - "log_odds_chosen": 0.4743901193141937, - "log_odds_ratio": -0.5469616055488586, - "logits/chosen": 356.06988525390625, - "logits/rejected": 255.2526092529297, - "logps/chosen": -0.7988730072975159, - "logps/rejected": -1.0890055894851685, - "loss": 1.4737, - "nll_loss": 1.5287182331085205, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.039943646639585495, - "rewards/margins": 0.014506635256111622, - "rewards/rejected": -0.05445028468966484, - "step": 1085 - }, - { - "epoch": 0.8128262490678598, - "grad_norm": 35.49870681762695, - "learning_rate": 1.5144563320384566e-06, - "log_odds_chosen": -0.060308314859867096, - "log_odds_ratio": -0.7548069357872009, - "logits/chosen": 379.72113037109375, - "logits/rejected": 283.58807373046875, - "logps/chosen": -1.1512119770050049, - "logps/rejected": -1.1127034425735474, - "loss": 1.2694, - "nll_loss": 1.36198091506958, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05756059288978577, - "rewards/margins": -0.0019254222279414535, - "rewards/rejected": -0.05563517287373543, - "step": 1090 - }, - { - "epoch": 0.8165548098434005, - "grad_norm": 95.50517272949219, - "learning_rate": 1.5109947130387486e-06, - "log_odds_chosen": -0.26178619265556335, - "log_odds_ratio": -0.9343220591545105, - "logits/chosen": 310.42437744140625, - "logits/rejected": 318.2205810546875, - "logps/chosen": -1.193917155265808, - "logps/rejected": -0.9704820513725281, - "loss": 1.6804, - "nll_loss": 1.4915359020233154, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05969586223363876, - "rewards/margins": -0.011171762831509113, - "rewards/rejected": -0.048524100333452225, - "step": 1095 - }, - { - "epoch": 0.8202833706189411, - "grad_norm": 37.66985321044922, - "learning_rate": 1.5075567228888182e-06, - "log_odds_chosen": 0.8683313131332397, - "log_odds_ratio": -0.38970455527305603, - "logits/chosen": 309.0008239746094, - "logits/rejected": 268.5347595214844, - "logps/chosen": -0.9652371406555176, - "logps/rejected": -1.3671009540557861, - "loss": 1.443, - "nll_loss": 1.320831060409546, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.048261858522892, - "rewards/margins": 0.020093193277716637, - "rewards/rejected": -0.06835505366325378, - "step": 1100 - }, - { - "epoch": 0.8240119313944817, - "grad_norm": 36.23188400268555, - "learning_rate": 1.5041420939904672e-06, - "log_odds_chosen": -0.2738190293312073, - "log_odds_ratio": -0.8961642980575562, - "logits/chosen": 418.0433654785156, - "logits/rejected": 254.0409698486328, - "logps/chosen": -1.226500153541565, - "logps/rejected": -1.0759589672088623, - "loss": 1.6096, - "nll_loss": 1.7704360485076904, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06132500618696213, - "rewards/margins": -0.007527062203735113, - "rewards/rejected": -0.053797949105501175, - "step": 1105 - }, - { - "epoch": 0.8277404921700223, - "grad_norm": 33.85231399536133, - "learning_rate": 1.5007505629691608e-06, - "log_odds_chosen": -0.15626640617847443, - "log_odds_ratio": -0.8521029353141785, - "logits/chosen": 319.37530517578125, - "logits/rejected": 282.8531494140625, - "logps/chosen": -1.2550684213638306, - "logps/rejected": -1.111943006515503, - "loss": 1.4466, - "nll_loss": 1.3546196222305298, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06275341659784317, - "rewards/margins": -0.0071562668308615685, - "rewards/rejected": -0.05559714883565903, - "step": 1110 - }, - { - "epoch": 0.8314690529455631, - "grad_norm": 36.85125732421875, - "learning_rate": 1.4973818705886997e-06, - "log_odds_chosen": -0.014731859788298607, - "log_odds_ratio": -0.7488952875137329, - "logits/chosen": 245.2974090576172, - "logits/rejected": 291.6301574707031, - "logps/chosen": -1.2714779376983643, - "logps/rejected": -1.2771275043487549, - "loss": 1.4907, - "nll_loss": 1.7302742004394531, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06357388943433762, - "rewards/margins": 0.0002824820694513619, - "rewards/rejected": -0.06385637819766998, - "step": 1115 - }, - { - "epoch": 0.8351976137211037, - "grad_norm": 36.33005905151367, - "learning_rate": 1.494035761667992e-06, - "log_odds_chosen": -0.09489155560731888, - "log_odds_ratio": -0.7711436152458191, - "logits/chosen": 265.71282958984375, - "logits/rejected": 339.16046142578125, - "logps/chosen": -0.9258662462234497, - "logps/rejected": -0.8838087320327759, - "loss": 1.3712, - "nll_loss": 1.2217100858688354, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.046293314546346664, - "rewards/margins": -0.0021028779447078705, - "rewards/rejected": -0.044190436601638794, - "step": 1120 - }, - { - "epoch": 0.8389261744966443, - "grad_norm": 81.46223449707031, - "learning_rate": 1.49071198499986e-06, - "log_odds_chosen": 0.29209503531455994, - "log_odds_ratio": -0.6241984367370605, - "logits/chosen": 372.13775634765625, - "logits/rejected": 277.0876770019531, - "logps/chosen": -1.1362206935882568, - "logps/rejected": -1.3141521215438843, - "loss": 1.6122, - "nll_loss": 1.6898285150527954, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05681103467941284, - "rewards/margins": 0.00889657437801361, - "rewards/rejected": -0.06570760905742645, - "step": 1125 - }, - { - "epoch": 0.8426547352721849, - "grad_norm": 26.17449188232422, - "learning_rate": 1.487410293271824e-06, - "log_odds_chosen": 0.022488439455628395, - "log_odds_ratio": -0.7313835620880127, - "logits/chosen": 264.5323791503906, - "logits/rejected": 315.54742431640625, - "logps/chosen": -1.0775610208511353, - "logps/rejected": -1.0274304151535034, - "loss": 1.4202, - "nll_loss": 1.5788538455963135, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0538780502974987, - "rewards/margins": -0.002506525721400976, - "rewards/rejected": -0.05137152597308159, - "step": 1130 - }, - { - "epoch": 0.8463832960477256, - "grad_norm": 42.4337158203125, - "learning_rate": 1.484130442988812e-06, - "log_odds_chosen": 0.10694253444671631, - "log_odds_ratio": -0.6453635692596436, - "logits/chosen": 327.66241455078125, - "logits/rejected": 267.99005126953125, - "logps/chosen": -1.1753156185150146, - "logps/rejected": -1.2637088298797607, - "loss": 1.5396, - "nll_loss": 1.5597258806228638, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05876578018069267, - "rewards/margins": 0.004419657401740551, - "rewards/rejected": -0.0631854385137558, - "step": 1135 - }, - { - "epoch": 0.8501118568232662, - "grad_norm": 41.627845764160156, - "learning_rate": 1.480872194397731e-06, - "log_odds_chosen": -0.03932800143957138, - "log_odds_ratio": -0.7172124981880188, - "logits/chosen": 282.0204772949219, - "logits/rejected": 327.0501403808594, - "logps/chosen": -1.2394376993179321, - "logps/rejected": -1.2088899612426758, - "loss": 1.585, - "nll_loss": 1.2343940734863281, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06197188422083855, - "rewards/margins": -0.001527387648820877, - "rewards/rejected": -0.06044449657201767, - "step": 1140 - }, - { - "epoch": 0.8538404175988069, - "grad_norm": 33.353004455566406, - "learning_rate": 1.4776353114138545e-06, - "log_odds_chosen": 1.6136095523834229, - "log_odds_ratio": -0.4471051096916199, - "logits/chosen": 274.882080078125, - "logits/rejected": 391.7216796875, - "logps/chosen": -1.1087409257888794, - "logps/rejected": -2.5487961769104004, - "loss": 1.2646, - "nll_loss": 1.0889408588409424, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05543703958392143, - "rewards/margins": 0.07200276106595993, - "rewards/rejected": -0.12743981182575226, - "step": 1145 - }, - { - "epoch": 0.8575689783743475, - "grad_norm": 35.73836898803711, - "learning_rate": 1.4744195615489715e-06, - "log_odds_chosen": 0.11635205894708633, - "log_odds_ratio": -0.6417000889778137, - "logits/chosen": 331.94451904296875, - "logits/rejected": 267.5284729003906, - "logps/chosen": -0.9152507781982422, - "logps/rejected": -1.0024302005767822, - "loss": 1.4611, - "nll_loss": 1.216604471206665, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.045762546360492706, - "rewards/margins": 0.004358966834843159, - "rewards/rejected": -0.05012150853872299, - "step": 1150 - }, - { - "epoch": 0.8612975391498882, - "grad_norm": 35.827842712402344, - "learning_rate": 1.4712247158412494e-06, - "log_odds_chosen": -0.47101902961730957, - "log_odds_ratio": -0.9811735153198242, - "logits/chosen": 295.36578369140625, - "logits/rejected": 266.8309020996094, - "logps/chosen": -1.040876030921936, - "logps/rejected": -0.8466850519180298, - "loss": 1.628, - "nll_loss": 1.424377679824829, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.05204380303621292, - "rewards/margins": -0.009709550067782402, - "rewards/rejected": -0.04233425110578537, - "step": 1155 - }, - { - "epoch": 0.8650260999254288, - "grad_norm": 50.965755462646484, - "learning_rate": 1.4680505487867589e-06, - "log_odds_chosen": 0.18784022331237793, - "log_odds_ratio": -0.6155949831008911, - "logits/chosen": 339.56378173828125, - "logits/rejected": 351.09967041015625, - "logps/chosen": -1.0030454397201538, - "logps/rejected": -1.1232281923294067, - "loss": 1.4583, - "nll_loss": 1.3366968631744385, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05015227943658829, - "rewards/margins": 0.006009135395288467, - "rewards/rejected": -0.056161414831876755, - "step": 1160 - }, - { - "epoch": 0.8687546607009694, - "grad_norm": 45.82514572143555, - "learning_rate": 1.4648968382726192e-06, - "log_odds_chosen": -0.14122900366783142, - "log_odds_ratio": -0.781011700630188, - "logits/chosen": 272.3401184082031, - "logits/rejected": 332.6485595703125, - "logps/chosen": -1.0699069499969482, - "logps/rejected": -0.9682256579399109, - "loss": 1.2731, - "nll_loss": 1.2015315294265747, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05349534749984741, - "rewards/margins": -0.005084061063826084, - "rewards/rejected": -0.0484112873673439, - "step": 1165 - }, - { - "epoch": 0.87248322147651, - "grad_norm": 67.77047729492188, - "learning_rate": 1.4617633655117156e-06, - "log_odds_chosen": 0.2261693924665451, - "log_odds_ratio": -0.6521937847137451, - "logits/chosen": 306.14080810546875, - "logits/rejected": 256.6727600097656, - "logps/chosen": -1.196653127670288, - "logps/rejected": -1.2829883098602295, - "loss": 1.5578, - "nll_loss": 1.2986233234405518, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.059832654893398285, - "rewards/margins": 0.004316768608987331, - "rewards/rejected": -0.06414942443370819, - "step": 1170 - }, - { - "epoch": 0.8762117822520508, - "grad_norm": 57.44667053222656, - "learning_rate": 1.4586499149789457e-06, - "log_odds_chosen": 0.5633170008659363, - "log_odds_ratio": -0.49517783522605896, - "logits/chosen": 321.69940185546875, - "logits/rejected": 354.8461608886719, - "logps/chosen": -1.0312330722808838, - "logps/rejected": -1.3799893856048584, - "loss": 1.4296, - "nll_loss": 1.2891279458999634, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.05156165361404419, - "rewards/margins": 0.017437811940908432, - "rewards/rejected": -0.06899946928024292, - "step": 1175 - }, - { - "epoch": 0.8799403430275914, - "grad_norm": 31.770212173461914, - "learning_rate": 1.4555562743489552e-06, - "log_odds_chosen": 0.043411023914813995, - "log_odds_ratio": -0.7184240221977234, - "logits/chosen": 415.14923095703125, - "logits/rejected": 285.048828125, - "logps/chosen": -1.0929487943649292, - "logps/rejected": -1.1261825561523438, - "loss": 1.2701, - "nll_loss": 1.3833487033843994, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05464743822813034, - "rewards/margins": 0.0016616880893707275, - "rewards/rejected": -0.05630912631750107, - "step": 1180 - }, - { - "epoch": 0.883668903803132, - "grad_norm": 26.006622314453125, - "learning_rate": 1.4524822344353171e-06, - "log_odds_chosen": 0.5986038446426392, - "log_odds_ratio": -0.49350160360336304, - "logits/chosen": 268.81109619140625, - "logits/rejected": 401.85888671875, - "logps/chosen": -0.9730545282363892, - "logps/rejected": -1.4126075506210327, - "loss": 1.4522, - "nll_loss": 1.3450353145599365, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04865272715687752, - "rewards/margins": 0.021977653726935387, - "rewards/rejected": -0.07063037902116776, - "step": 1185 - }, - { - "epoch": 0.8873974645786726, - "grad_norm": 45.36833190917969, - "learning_rate": 1.4494275891311214e-06, - "log_odds_chosen": 0.43729180097579956, - "log_odds_ratio": -0.6717116832733154, - "logits/chosen": 248.72802734375, - "logits/rejected": 381.82550048828125, - "logps/chosen": -1.332322120666504, - "logps/rejected": -1.8414218425750732, - "loss": 1.4014, - "nll_loss": 1.3842036724090576, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06661610305309296, - "rewards/margins": 0.025454992428421974, - "rewards/rejected": -0.09207110106945038, - "step": 1190 - }, - { - "epoch": 0.8911260253542133, - "grad_norm": 114.89964294433594, - "learning_rate": 1.4463921353509293e-06, - "log_odds_chosen": 0.1801021844148636, - "log_odds_ratio": -0.7485548853874207, - "logits/chosen": 316.08062744140625, - "logits/rejected": 406.1883239746094, - "logps/chosen": -1.0725009441375732, - "logps/rejected": -1.1782128810882568, - "loss": 1.4839, - "nll_loss": 1.3668285608291626, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.05362504720687866, - "rewards/margins": 0.005285605788230896, - "rewards/rejected": -0.05891064926981926, - "step": 1195 - }, - { - "epoch": 0.8948545861297539, - "grad_norm": 33.020179748535156, - "learning_rate": 1.4433756729740647e-06, - "log_odds_chosen": 0.45132356882095337, - "log_odds_ratio": -0.5557500720024109, - "logits/chosen": 320.20703125, - "logits/rejected": 271.36865234375, - "logps/chosen": -1.0171873569488525, - "logps/rejected": -1.2769209146499634, - "loss": 1.4753, - "nll_loss": 1.6078990697860718, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.050859373062849045, - "rewards/margins": 0.012986676767468452, - "rewards/rejected": -0.06384604424238205, - "step": 1200 - }, - { - "epoch": 0.8985831469052945, - "grad_norm": 34.85787582397461, - "learning_rate": 1.4403780047891936e-06, - "log_odds_chosen": 0.03624759241938591, - "log_odds_ratio": -0.7684409022331238, - "logits/chosen": 320.3868103027344, - "logits/rejected": 289.04974365234375, - "logps/chosen": -1.0589901208877563, - "logps/rejected": -1.129260540008545, - "loss": 1.4644, - "nll_loss": 1.6791317462921143, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.052949510514736176, - "rewards/margins": 0.00351352128200233, - "rewards/rejected": -0.05646302551031113, - "step": 1205 - }, - { - "epoch": 0.9023117076808352, - "grad_norm": 25.57440185546875, - "learning_rate": 1.4373989364401727e-06, - "log_odds_chosen": -0.08426200598478317, - "log_odds_ratio": -0.7511380910873413, - "logits/chosen": 298.7584228515625, - "logits/rejected": 353.18817138671875, - "logps/chosen": -1.2927508354187012, - "logps/rejected": -1.1978588104248047, - "loss": 1.495, - "nll_loss": 1.3517141342163086, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06463755667209625, - "rewards/margins": -0.004744607489556074, - "rewards/rejected": -0.059892941266298294, - "step": 1210 - }, - { - "epoch": 0.9060402684563759, - "grad_norm": 39.81608200073242, - "learning_rate": 1.4344382763731173e-06, - "log_odds_chosen": 0.37703046202659607, - "log_odds_ratio": -0.5227408409118652, - "logits/chosen": 315.92864990234375, - "logits/rejected": 344.01202392578125, - "logps/chosen": -0.7612079381942749, - "logps/rejected": -0.9687854647636414, - "loss": 1.4165, - "nll_loss": 1.0292868614196777, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.038060400635004044, - "rewards/margins": 0.010378877632319927, - "rewards/rejected": -0.04843927547335625, - "step": 1215 - }, - { - "epoch": 0.9097688292319165, - "grad_norm": 33.86534881591797, - "learning_rate": 1.4314958357846706e-06, - "log_odds_chosen": -0.18463517725467682, - "log_odds_ratio": -0.8304967880249023, - "logits/chosen": 308.48895263671875, - "logits/rejected": 314.8437194824219, - "logps/chosen": -1.0531213283538818, - "logps/rejected": -0.9781627655029297, - "loss": 1.3589, - "nll_loss": 1.3626785278320312, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05265607312321663, - "rewards/margins": -0.003747930284589529, - "rewards/rejected": -0.048908136785030365, - "step": 1220 - }, - { - "epoch": 0.9134973900074571, - "grad_norm": 34.17255401611328, - "learning_rate": 1.4285714285714286e-06, - "log_odds_chosen": 0.49513500928878784, - "log_odds_ratio": -0.5504399538040161, - "logits/chosen": 309.555419921875, - "logits/rejected": 345.54730224609375, - "logps/chosen": -0.9425355195999146, - "logps/rejected": -1.2455843687057495, - "loss": 1.358, - "nll_loss": 1.2254512310028076, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04712677747011185, - "rewards/margins": 0.015152444131672382, - "rewards/rejected": -0.062279216945171356, - "step": 1225 - }, - { - "epoch": 0.9172259507829977, - "grad_norm": 38.84283447265625, - "learning_rate": 1.4256648712805027e-06, - "log_odds_chosen": 0.0819883868098259, - "log_odds_ratio": -0.6703386306762695, - "logits/chosen": 263.2631530761719, - "logits/rejected": 347.1291198730469, - "logps/chosen": -1.2756901979446411, - "logps/rejected": -1.3151495456695557, - "loss": 1.5388, - "nll_loss": 1.4819085597991943, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.06378450989723206, - "rewards/margins": 0.00197296729311347, - "rewards/rejected": -0.06575748324394226, - "step": 1230 - }, - { - "epoch": 0.9209545115585384, - "grad_norm": 40.17573928833008, - "learning_rate": 1.4227759830611807e-06, - "log_odds_chosen": 0.44204649329185486, - "log_odds_ratio": -0.5211524963378906, - "logits/chosen": 284.2911071777344, - "logits/rejected": 358.6346130371094, - "logps/chosen": -1.0773476362228394, - "logps/rejected": -1.3961951732635498, - "loss": 1.3846, - "nll_loss": 1.4720135927200317, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05386738106608391, - "rewards/margins": 0.015942372381687164, - "rewards/rejected": -0.06980974972248077, - "step": 1235 - }, - { - "epoch": 0.9246830723340791, - "grad_norm": 53.95439147949219, - "learning_rate": 1.419904585617662e-06, - "log_odds_chosen": 0.5053843259811401, - "log_odds_ratio": -0.5039618611335754, - "logits/chosen": 268.06683349609375, - "logits/rejected": 363.40423583984375, - "logps/chosen": -0.8085358738899231, - "logps/rejected": -1.1201403141021729, - "loss": 1.3089, - "nll_loss": 1.1503429412841797, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04042679816484451, - "rewards/margins": 0.01558021642267704, - "rewards/rejected": -0.056007008999586105, - "step": 1240 - }, - { - "epoch": 0.9284116331096197, - "grad_norm": 43.93706512451172, - "learning_rate": 1.4170505031628396e-06, - "log_odds_chosen": -0.44611892104148865, - "log_odds_ratio": -1.0919944047927856, - "logits/chosen": 257.5703125, - "logits/rejected": 327.42755126953125, - "logps/chosen": -1.2226102352142334, - "logps/rejected": -1.0486891269683838, - "loss": 1.4475, - "nll_loss": 1.6010900735855103, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.06113051623106003, - "rewards/margins": -0.008696057833731174, - "rewards/rejected": -0.05243445560336113, - "step": 1245 - }, - { - "epoch": 0.9321401938851603, - "grad_norm": 78.16047668457031, - "learning_rate": 1.4142135623730952e-06, - "log_odds_chosen": 0.36011695861816406, - "log_odds_ratio": -0.6252118945121765, - "logits/chosen": 328.4795837402344, - "logits/rejected": 306.9427185058594, - "logps/chosen": -0.7521727681159973, - "logps/rejected": -1.002949833869934, - "loss": 1.2342, - "nll_loss": 1.1043870449066162, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.037608638405799866, - "rewards/margins": 0.012538852170109749, - "rewards/rejected": -0.050147492438554764, - "step": 1250 - }, - { - "epoch": 0.935868754660701, - "grad_norm": 75.88804626464844, - "learning_rate": 1.4113935923440917e-06, - "log_odds_chosen": 0.2792273461818695, - "log_odds_ratio": -0.5761785507202148, - "logits/chosen": 274.94476318359375, - "logits/rejected": 350.17791748046875, - "logps/chosen": -1.1624740362167358, - "logps/rejected": -1.3483537435531616, - "loss": 1.1774, - "nll_loss": 1.2427549362182617, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05812370777130127, - "rewards/margins": 0.009293981827795506, - "rewards/rejected": -0.0674176886677742, - "step": 1255 - }, - { - "epoch": 0.9395973154362416, - "grad_norm": 44.85163116455078, - "learning_rate": 1.4085904245475275e-06, - "log_odds_chosen": 0.18905280530452728, - "log_odds_ratio": -0.615064799785614, - "logits/chosen": 297.97979736328125, - "logits/rejected": 339.6133728027344, - "logps/chosen": -1.0464216470718384, - "logps/rejected": -1.175970196723938, - "loss": 1.4769, - "nll_loss": 1.3575314283370972, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05232108384370804, - "rewards/margins": 0.006477426737546921, - "rewards/rejected": -0.05879851058125496, - "step": 1260 - }, - { - "epoch": 0.9433258762117822, - "grad_norm": 45.406856536865234, - "learning_rate": 1.4058038927888332e-06, - "log_odds_chosen": -0.2725242078304291, - "log_odds_ratio": -0.8532897233963013, - "logits/chosen": 295.85614013671875, - "logits/rejected": 353.1405029296875, - "logps/chosen": -0.9865337610244751, - "logps/rejected": -0.7998149394989014, - "loss": 1.3973, - "nll_loss": 1.2730759382247925, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.049326688051223755, - "rewards/margins": -0.009335937909781933, - "rewards/rejected": -0.03999074548482895, - "step": 1265 - }, - { - "epoch": 0.947054436987323, - "grad_norm": 34.781436920166016, - "learning_rate": 1.4030338331657844e-06, - "log_odds_chosen": 0.5721542239189148, - "log_odds_ratio": -0.473560631275177, - "logits/chosen": 274.3713684082031, - "logits/rejected": 282.1281433105469, - "logps/chosen": -0.7824453711509705, - "logps/rejected": -1.1291372776031494, - "loss": 1.2393, - "nll_loss": 1.0282164812088013, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.039122264832258224, - "rewards/margins": 0.01733459159731865, - "rewards/rejected": -0.05645686388015747, - "step": 1270 - }, - { - "epoch": 0.9507829977628636, - "grad_norm": 95.3061294555664, - "learning_rate": 1.4002800840280098e-06, - "log_odds_chosen": -0.5678247213363647, - "log_odds_ratio": -1.0276777744293213, - "logits/chosen": 285.3589782714844, - "logits/rejected": 278.0288391113281, - "logps/chosen": -1.137468934059143, - "logps/rejected": -0.8117654919624329, - "loss": 1.4808, - "nll_loss": 1.4416086673736572, - "rewards/accuracies": 0.0, - "rewards/chosen": -0.05687344819307327, - "rewards/margins": -0.01628517173230648, - "rewards/rejected": -0.04058827459812164, - "step": 1275 - }, - { - "epoch": 0.9545115585384042, - "grad_norm": 40.8712158203125, - "learning_rate": 1.3975424859373688e-06, - "log_odds_chosen": 0.9494668245315552, - "log_odds_ratio": -0.9042787551879883, - "logits/chosen": 358.9302062988281, - "logits/rejected": 370.36260986328125, - "logps/chosen": -1.0091290473937988, - "logps/rejected": -2.130140542984009, - "loss": 1.5989, - "nll_loss": 1.3916475772857666, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.050456453114748, - "rewards/margins": 0.05605057626962662, - "rewards/rejected": -0.10650704056024551, - "step": 1280 - }, - { - "epoch": 0.9582401193139448, - "grad_norm": 42.09870529174805, - "learning_rate": 1.3948208816291767e-06, - "log_odds_chosen": -0.19945785403251648, - "log_odds_ratio": -0.8010165095329285, - "logits/chosen": 258.85986328125, - "logits/rejected": 368.784423828125, - "logps/chosen": -1.2303708791732788, - "logps/rejected": -1.0975288152694702, - "loss": 1.4585, - "nll_loss": 1.5195953845977783, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.06151854246854782, - "rewards/margins": -0.00664210831746459, - "rewards/rejected": -0.05487643927335739, - "step": 1285 - }, - { - "epoch": 0.9619686800894854, - "grad_norm": 34.81499481201172, - "learning_rate": 1.3921151159742616e-06, - "log_odds_chosen": 0.11453668773174286, - "log_odds_ratio": -0.706844687461853, - "logits/chosen": 299.8536376953125, - "logits/rejected": 376.01336669921875, - "logps/chosen": -0.8942103385925293, - "logps/rejected": -1.0105385780334473, - "loss": 1.405, - "nll_loss": 1.0705254077911377, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04471052065491676, - "rewards/margins": 0.005816408433020115, - "rewards/rejected": -0.050526928156614304, - "step": 1290 - }, - { - "epoch": 0.9656972408650261, - "grad_norm": 45.12525939941406, - "learning_rate": 1.3894250359418213e-06, - "log_odds_chosen": -0.051154546439647675, - "log_odds_ratio": -0.7208074331283569, - "logits/chosen": 318.514404296875, - "logits/rejected": 299.3011779785156, - "logps/chosen": -1.0884395837783813, - "logps/rejected": -1.0437039136886597, - "loss": 1.4123, - "nll_loss": 1.3551928997039795, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.054421983659267426, - "rewards/margins": -0.002236785367131233, - "rewards/rejected": -0.05218519642949104, - "step": 1295 - }, - { - "epoch": 0.9694258016405667, - "grad_norm": 55.40985870361328, - "learning_rate": 1.386750490563073e-06, - "log_odds_chosen": -0.20424051582813263, - "log_odds_ratio": -0.8131963610649109, - "logits/chosen": 270.74822998046875, - "logits/rejected": 380.3003845214844, - "logps/chosen": -0.7625535130500793, - "logps/rejected": -0.6911493539810181, - "loss": 1.2708, - "nll_loss": 1.0809181928634644, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.03812767565250397, - "rewards/margins": -0.003570209490135312, - "rewards/rejected": -0.03455746918916702, - "step": 1300 - }, - { - "epoch": 0.9731543624161074, - "grad_norm": 68.29988861083984, - "learning_rate": 1.3840913308956663e-06, - "log_odds_chosen": 0.2694585919380188, - "log_odds_ratio": -0.6267434358596802, - "logits/chosen": 332.8673095703125, - "logits/rejected": 301.2939453125, - "logps/chosen": -0.9253619313240051, - "logps/rejected": -1.0249741077423096, - "loss": 1.4691, - "nll_loss": 1.3886725902557373, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.046268098056316376, - "rewards/margins": 0.004980606492608786, - "rewards/rejected": -0.0512487068772316, - "step": 1305 - }, - { - "epoch": 0.976882923191648, - "grad_norm": 41.01861572265625, - "learning_rate": 1.3814474099888442e-06, - "log_odds_chosen": 0.25864332914352417, - "log_odds_ratio": -0.5978071689605713, - "logits/chosen": 253.733642578125, - "logits/rejected": 342.8139343261719, - "logps/chosen": -0.9144594073295593, - "logps/rejected": -1.0654830932617188, - "loss": 1.4299, - "nll_loss": 1.4983320236206055, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.045722972601652145, - "rewards/margins": 0.007551182992756367, - "rewards/rejected": -0.05327415466308594, - "step": 1310 - }, - { - "epoch": 0.9806114839671887, - "grad_norm": 32.862091064453125, - "learning_rate": 1.3788185828493344e-06, - "log_odds_chosen": 0.27600333094596863, - "log_odds_ratio": -0.6295011639595032, - "logits/chosen": 270.9205322265625, - "logits/rejected": 314.25372314453125, - "logps/chosen": -1.02725350856781, - "logps/rejected": -1.2229536771774292, - "loss": 1.4897, - "nll_loss": 1.4280732870101929, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.05136267468333244, - "rewards/margins": 0.009785009548068047, - "rewards/rejected": -0.06114768981933594, - "step": 1315 - }, - { - "epoch": 0.9843400447427293, - "grad_norm": 48.22211837768555, - "learning_rate": 1.376204706407951e-06, - "log_odds_chosen": 0.405795156955719, - "log_odds_ratio": -0.5589355230331421, - "logits/chosen": 297.77642822265625, - "logits/rejected": 281.0946350097656, - "logps/chosen": -0.9366251826286316, - "logps/rejected": -1.215766191482544, - "loss": 1.3897, - "nll_loss": 1.6763694286346436, - "rewards/accuracies": 0.4000000059604645, - "rewards/chosen": -0.04683126136660576, - "rewards/margins": 0.013957047834992409, - "rewards/rejected": -0.060788314789533615, - "step": 1320 - }, - { - "epoch": 0.9880686055182699, - "grad_norm": 51.247982025146484, - "learning_rate": 1.3736056394868905e-06, - "log_odds_chosen": 0.06021606922149658, - "log_odds_ratio": -0.6967827081680298, - "logits/chosen": 314.7353210449219, - "logits/rejected": 257.969970703125, - "logps/chosen": -1.0589067935943604, - "logps/rejected": -1.1166903972625732, - "loss": 1.2068, - "nll_loss": 1.180021047592163, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.0529453344643116, - "rewards/margins": 0.002889187540858984, - "rewards/rejected": -0.05583452433347702, - "step": 1325 - }, - { - "epoch": 0.9917971662938105, - "grad_norm": 38.282752990722656, - "learning_rate": 1.3710212427677044e-06, - "log_odds_chosen": 0.22698381543159485, - "log_odds_ratio": -0.691421389579773, - "logits/chosen": 255.14047241210938, - "logits/rejected": 319.9523010253906, - "logps/chosen": -0.9662703275680542, - "logps/rejected": -1.1458715200424194, - "loss": 1.5185, - "nll_loss": 1.3388429880142212, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04831352084875107, - "rewards/margins": 0.008980056270956993, - "rewards/rejected": -0.05729357525706291, - "step": 1330 - }, - { - "epoch": 0.9955257270693513, - "grad_norm": 28.76251220703125, - "learning_rate": 1.3684513787599335e-06, - "log_odds_chosen": 0.06983167678117752, - "log_odds_ratio": -0.6774301528930664, - "logits/chosen": 303.6507263183594, - "logits/rejected": 329.9453430175781, - "logps/chosen": -0.962182343006134, - "logps/rejected": -0.985345184803009, - "loss": 1.4732, - "nll_loss": 1.3620043992996216, - "rewards/accuracies": 0.20000000298023224, - "rewards/chosen": -0.04810912162065506, - "rewards/margins": 0.001158140948973596, - "rewards/rejected": -0.049267254769802094, - "step": 1335 - }, - { - "epoch": 0.9992542878448919, - "grad_norm": 36.6693229675293, - "learning_rate": 1.3658959117703826e-06, - "log_odds_chosen": 0.387843519449234, - "log_odds_ratio": -0.5281216502189636, - "logits/chosen": 319.11541748046875, - "logits/rejected": 332.63311767578125, - "logps/chosen": -0.8915846943855286, - "logps/rejected": -1.158684492111206, - "loss": 1.4383, - "nll_loss": 1.19465172290802, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.044579241424798965, - "rewards/margins": 0.013354992493987083, - "rewards/rejected": -0.0579342320561409, - "step": 1340 - }, - { - "epoch": 1.0, - "eval_log_odds_chosen": 0.2118016630411148, - "eval_log_odds_ratio": -0.7026273608207703, - "eval_logits/chosen": 332.3973693847656, - "eval_logits/rejected": 305.7246398925781, - "eval_logps/chosen": -1.0203664302825928, - "eval_logps/rejected": -1.1481515169143677, - "eval_loss": 1.4376684427261353, - "eval_nll_loss": 1.3887299299240112, - "eval_rewards/accuracies": 0.5395683646202087, - "eval_rewards/chosen": -0.05101832374930382, - "eval_rewards/margins": 0.00638925563544035, - "eval_rewards/rejected": -0.05740758031606674, - "eval_runtime": 26.1272, - "eval_samples_per_second": 21.166, - "eval_steps_per_second": 5.32, - "step": 1341 - }, - { - "epoch": 1.0029828486204324, - "grad_norm": 42.127445220947266, - "learning_rate": 1.3633547078730297e-06, - "log_odds_chosen": 0.931357741355896, - "log_odds_ratio": -0.4411783814430237, - "logits/chosen": 324.14312744140625, - "logits/rejected": 323.72650146484375, - "logps/chosen": -0.7990046143531799, - "logps/rejected": -1.2728458642959595, - "loss": 0.9362, - "nll_loss": 1.073913335800171, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03995022922754288, - "rewards/margins": 0.023692062124609947, - "rewards/rejected": -0.06364228576421738, - "step": 1345 - }, - { - "epoch": 1.0067114093959733, - "grad_norm": 86.97195434570312, - "learning_rate": 1.3608276348795436e-06, - "log_odds_chosen": 1.6209344863891602, - "log_odds_ratio": -0.24978908896446228, - "logits/chosen": 248.6111297607422, - "logits/rejected": 288.0247497558594, - "logps/chosen": -0.61744225025177, - "logps/rejected": -1.6561813354492188, - "loss": 0.9097, - "nll_loss": 0.7072396874427795, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030872110277414322, - "rewards/margins": 0.05193696171045303, - "rewards/rejected": -0.08280906826257706, - "step": 1350 - }, - { - "epoch": 1.0104399701715139, - "grad_norm": 49.73358154296875, - "learning_rate": 1.3583145623104033e-06, - "log_odds_chosen": 1.1276583671569824, - "log_odds_ratio": -0.3153877854347229, - "logits/chosen": 318.6707458496094, - "logits/rejected": 241.65402221679688, - "logps/chosen": -0.7124746441841125, - "logps/rejected": -1.3722548484802246, - "loss": 0.8046, - "nll_loss": 0.8056241273880005, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03562372922897339, - "rewards/margins": 0.0329890139400959, - "rewards/rejected": -0.06861273944377899, - "step": 1355 - }, - { - "epoch": 1.0141685309470545, - "grad_norm": 40.59423828125, - "learning_rate": 1.355815361366601e-06, - "log_odds_chosen": 2.1742801666259766, - "log_odds_ratio": -0.1411195993423462, - "logits/chosen": 290.86981201171875, - "logits/rejected": 266.17889404296875, - "logps/chosen": -0.6057870388031006, - "logps/rejected": -1.9972374439239502, - "loss": 0.8828, - "nll_loss": 0.7294996380805969, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03028935194015503, - "rewards/margins": 0.06957252323627472, - "rewards/rejected": -0.09986187517642975, - "step": 1360 - }, - { - "epoch": 1.0178970917225951, - "grad_norm": 24.69209861755371, - "learning_rate": 1.353329904901917e-06, - "log_odds_chosen": 0.6982772946357727, - "log_odds_ratio": -0.4903028607368469, - "logits/chosen": 262.68035888671875, - "logits/rejected": 251.77328491210938, - "logps/chosen": -0.6246521472930908, - "logps/rejected": -1.0297539234161377, - "loss": 0.8573, - "nll_loss": 0.9015310406684875, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03123260661959648, - "rewards/margins": 0.020255092531442642, - "rewards/rejected": -0.051487695425748825, - "step": 1365 - }, - { - "epoch": 1.0216256524981358, - "grad_norm": 28.550477981567383, - "learning_rate": 1.350858067395748e-06, - "log_odds_chosen": 1.1646158695220947, - "log_odds_ratio": -0.3022575378417969, - "logits/chosen": 293.45758056640625, - "logits/rejected": 242.8733673095703, - "logps/chosen": -0.4156245291233063, - "logps/rejected": -0.8739725351333618, - "loss": 0.8221, - "nll_loss": 0.7391732931137085, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020781226456165314, - "rewards/margins": 0.022917401045560837, - "rewards/rejected": -0.04369863122701645, - "step": 1370 - }, - { - "epoch": 1.0253542132736764, - "grad_norm": 34.62664031982422, - "learning_rate": 1.3483997249264844e-06, - "log_odds_chosen": 1.5364539623260498, - "log_odds_ratio": -0.23362648487091064, - "logits/chosen": 245.3376007080078, - "logits/rejected": 265.64154052734375, - "logps/chosen": -0.5934082865715027, - "logps/rejected": -1.6038013696670532, - "loss": 0.7428, - "nll_loss": 0.8404462933540344, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02967042103409767, - "rewards/margins": 0.05051964521408081, - "rewards/rejected": -0.08019006252288818, - "step": 1375 - }, - { - "epoch": 1.029082774049217, - "grad_norm": 39.9339485168457, - "learning_rate": 1.345954755145414e-06, - "log_odds_chosen": 1.3495441675186157, - "log_odds_ratio": -0.23969492316246033, - "logits/chosen": 252.93112182617188, - "logits/rejected": 271.5606689453125, - "logps/chosen": -0.5909208655357361, - "logps/rejected": -1.400402307510376, - "loss": 0.7487, - "nll_loss": 0.8053001165390015, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029546046629548073, - "rewards/margins": 0.040474072098731995, - "rewards/rejected": -0.07002012431621552, - "step": 1380 - }, - { - "epoch": 1.0328113348247576, - "grad_norm": 41.644493103027344, - "learning_rate": 1.3435230372511476e-06, - "log_odds_chosen": 1.8577067852020264, - "log_odds_ratio": -0.17572352290153503, - "logits/chosen": 287.23529052734375, - "logits/rejected": 273.3094787597656, - "logps/chosen": -0.3364667296409607, - "logps/rejected": -1.255177617073059, - "loss": 0.7656, - "nll_loss": 0.663829505443573, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016823336482048035, - "rewards/margins": 0.04593554511666298, - "rewards/rejected": -0.06275888532400131, - "step": 1385 - }, - { - "epoch": 1.0365398956002982, - "grad_norm": 36.130985260009766, - "learning_rate": 1.3411044519645502e-06, - "log_odds_chosen": 0.7852060794830322, - "log_odds_ratio": -0.5423336029052734, - "logits/chosen": 210.0573272705078, - "logits/rejected": 377.8829345703125, - "logps/chosen": -0.5309531092643738, - "logps/rejected": -1.0065953731536865, - "loss": 0.8119, - "nll_loss": 0.8519891500473022, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.026547659188508987, - "rewards/margins": 0.02378210797905922, - "rewards/rejected": -0.05032976716756821, - "step": 1390 - }, - { - "epoch": 1.0402684563758389, - "grad_norm": 30.67536735534668, - "learning_rate": 1.3386988815041649e-06, - "log_odds_chosen": 1.412861704826355, - "log_odds_ratio": -0.2887376546859741, - "logits/chosen": 235.3671417236328, - "logits/rejected": 269.898681640625, - "logps/chosen": -0.49546951055526733, - "logps/rejected": -1.208715558052063, - "loss": 0.9294, - "nll_loss": 1.1340452432632446, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.024773476645350456, - "rewards/margins": 0.03566230088472366, - "rewards/rejected": -0.06043577194213867, - "step": 1395 - }, - { - "epoch": 1.0439970171513795, - "grad_norm": 33.715152740478516, - "learning_rate": 1.3363062095621222e-06, - "log_odds_chosen": 1.3442243337631226, - "log_odds_ratio": -0.32211077213287354, - "logits/chosen": 251.59548950195312, - "logits/rejected": 290.841552734375, - "logps/chosen": -0.5850163102149963, - "logps/rejected": -1.3363313674926758, - "loss": 0.7488, - "nll_loss": 0.7832403182983398, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.02925081178545952, - "rewards/margins": 0.03756575286388397, - "rewards/rejected": -0.06681656837463379, - "step": 1400 - }, - { - "epoch": 1.0477255779269201, - "grad_norm": 61.818443298339844, - "learning_rate": 1.3339263212805207e-06, - "log_odds_chosen": 2.728949785232544, - "log_odds_ratio": -0.10403885692358017, - "logits/chosen": 241.85452270507812, - "logits/rejected": 304.59259033203125, - "logps/chosen": -0.4084985852241516, - "logps/rejected": -1.910279631614685, - "loss": 0.8627, - "nll_loss": 0.8647967576980591, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02042493224143982, - "rewards/margins": 0.07508905231952667, - "rewards/rejected": -0.09551398456096649, - "step": 1405 - }, - { - "epoch": 1.0514541387024607, - "grad_norm": 61.780860900878906, - "learning_rate": 1.3315591032282687e-06, - "log_odds_chosen": 0.9714192152023315, - "log_odds_ratio": -0.3451424837112427, - "logits/chosen": 257.13714599609375, - "logits/rejected": 264.44970703125, - "logps/chosen": -0.6640916466712952, - "logps/rejected": -1.2582862377166748, - "loss": 0.8034, - "nll_loss": 0.746362030506134, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0332045815885067, - "rewards/margins": 0.02970973215997219, - "rewards/rejected": -0.06291431188583374, - "step": 1410 - }, - { - "epoch": 1.0551826994780016, - "grad_norm": 29.366186141967773, - "learning_rate": 1.3292044433783766e-06, - "log_odds_chosen": 0.9695445895195007, - "log_odds_ratio": -0.3591395616531372, - "logits/chosen": 267.0685729980469, - "logits/rejected": 254.139404296875, - "logps/chosen": -0.4821487367153168, - "logps/rejected": -1.0064637660980225, - "loss": 0.7063, - "nll_loss": 0.5813789963722229, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0241074375808239, - "rewards/margins": 0.026215756312012672, - "rewards/rejected": -0.05032319575548172, - "step": 1415 - }, - { - "epoch": 1.0589112602535422, - "grad_norm": 29.216754913330078, - "learning_rate": 1.3268622310856882e-06, - "log_odds_chosen": 1.1751203536987305, - "log_odds_ratio": -0.2938051223754883, - "logits/chosen": 280.4278259277344, - "logits/rejected": 250.75369262695312, - "logps/chosen": -0.607087254524231, - "logps/rejected": -1.254347562789917, - "loss": 1.0413, - "nll_loss": 1.1454931497573853, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030354365706443787, - "rewards/margins": 0.03236301243305206, - "rewards/rejected": -0.06271737813949585, - "step": 1420 - }, - { - "epoch": 1.0626398210290828, - "grad_norm": 59.47529602050781, - "learning_rate": 1.324532357065044e-06, - "log_odds_chosen": 1.712083101272583, - "log_odds_ratio": -0.1772737354040146, - "logits/chosen": 218.81857299804688, - "logits/rejected": 230.7904052734375, - "logps/chosen": -0.37490472197532654, - "logps/rejected": -1.0677826404571533, - "loss": 0.8254, - "nll_loss": 0.6617105007171631, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018745236098766327, - "rewards/margins": 0.03464389592409134, - "rewards/rejected": -0.053389132022857666, - "step": 1425 - }, - { - "epoch": 1.0663683818046235, - "grad_norm": 29.332382202148438, - "learning_rate": 1.3222147133698626e-06, - "log_odds_chosen": 1.749395728111267, - "log_odds_ratio": -0.1830216348171234, - "logits/chosen": 222.63412475585938, - "logits/rejected": 226.44961547851562, - "logps/chosen": -0.5369793176651001, - "logps/rejected": -1.589705228805542, - "loss": 0.8386, - "nll_loss": 0.9466124773025513, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026848966255784035, - "rewards/margins": 0.0526362881064415, - "rewards/rejected": -0.07948525995016098, - "step": 1430 - }, - { - "epoch": 1.070096942580164, - "grad_norm": 36.807777404785156, - "learning_rate": 1.3199091933711366e-06, - "log_odds_chosen": 1.6891876459121704, - "log_odds_ratio": -0.18139585852622986, - "logits/chosen": 198.74314880371094, - "logits/rejected": 284.0096130371094, - "logps/chosen": -0.39725184440612793, - "logps/rejected": -1.2075073719024658, - "loss": 0.6991, - "nll_loss": 0.756847083568573, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019862590357661247, - "rewards/margins": 0.040512774139642715, - "rewards/rejected": -0.06037537008523941, - "step": 1435 - }, - { - "epoch": 1.0738255033557047, - "grad_norm": 36.03850173950195, - "learning_rate": 1.3176156917368248e-06, - "log_odds_chosen": 1.0461509227752686, - "log_odds_ratio": -0.539143979549408, - "logits/chosen": 300.151123046875, - "logits/rejected": 267.266845703125, - "logps/chosen": -0.9649711847305298, - "logps/rejected": -1.4759559631347656, - "loss": 0.8546, - "nll_loss": 0.9150388836860657, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04824856296181679, - "rewards/margins": 0.025549232959747314, - "rewards/rejected": -0.0737977921962738, - "step": 1440 - }, - { - "epoch": 1.0775540641312453, - "grad_norm": 36.03170394897461, - "learning_rate": 1.315334104411641e-06, - "log_odds_chosen": 1.1955091953277588, - "log_odds_ratio": -0.3687121570110321, - "logits/chosen": 364.7061462402344, - "logits/rejected": 202.8603057861328, - "logps/chosen": -0.27282923460006714, - "logps/rejected": -0.8297010660171509, - "loss": 0.874, - "nll_loss": 0.7306213974952698, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.013641463592648506, - "rewards/margins": 0.02784358523786068, - "rewards/rejected": -0.041485048830509186, - "step": 1445 - }, - { - "epoch": 1.081282624906786, - "grad_norm": 28.141921997070312, - "learning_rate": 1.3130643285972255e-06, - "log_odds_chosen": 1.3189327716827393, - "log_odds_ratio": -0.3237442076206207, - "logits/chosen": 226.96218872070312, - "logits/rejected": 338.35626220703125, - "logps/chosen": -0.7876234650611877, - "logps/rejected": -1.6797752380371094, - "loss": 0.7297, - "nll_loss": 0.8421438336372375, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03938117250800133, - "rewards/margins": 0.04460758715867996, - "rewards/rejected": -0.08398876339197159, - "step": 1450 - }, - { - "epoch": 1.0850111856823266, - "grad_norm": 34.8005256652832, - "learning_rate": 1.310806262732691e-06, - "log_odds_chosen": 2.2563014030456543, - "log_odds_ratio": -0.1132306307554245, - "logits/chosen": 261.6311950683594, - "logits/rejected": 213.78720092773438, - "logps/chosen": -0.3489655554294586, - "logps/rejected": -1.3778300285339355, - "loss": 0.7487, - "nll_loss": 0.5842990875244141, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01744827814400196, - "rewards/margins": 0.05144321918487549, - "rewards/rejected": -0.0688915029168129, - "step": 1455 - }, - { - "epoch": 1.0887397464578672, - "grad_norm": 33.33745574951172, - "learning_rate": 1.3085598064755342e-06, - "log_odds_chosen": 1.2866771221160889, - "log_odds_ratio": -0.31613412499427795, - "logits/chosen": 227.5561065673828, - "logits/rejected": 205.23220825195312, - "logps/chosen": -0.34335339069366455, - "logps/rejected": -0.9550223350524902, - "loss": 0.7178, - "nll_loss": 0.5635708570480347, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.017167668789625168, - "rewards/margins": 0.030583446845412254, - "rewards/rejected": -0.04775111377239227, - "step": 1460 - }, - { - "epoch": 1.0924683072334078, - "grad_norm": 31.573570251464844, - "learning_rate": 1.3063248606829104e-06, - "log_odds_chosen": 1.6186602115631104, - "log_odds_ratio": -0.20021554827690125, - "logits/chosen": 215.5218505859375, - "logits/rejected": 211.7047576904297, - "logps/chosen": -0.43391066789627075, - "logps/rejected": -1.2186429500579834, - "loss": 0.6523, - "nll_loss": 0.553041934967041, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02169553004205227, - "rewards/margins": 0.03923662006855011, - "rewards/rejected": -0.06093214824795723, - "step": 1465 - }, - { - "epoch": 1.0961968680089484, - "grad_norm": 30.1257381439209, - "learning_rate": 1.3041013273932528e-06, - "log_odds_chosen": 1.3792362213134766, - "log_odds_ratio": -0.30702710151672363, - "logits/chosen": 283.5937194824219, - "logits/rejected": 228.5118865966797, - "logps/chosen": -0.6238886713981628, - "logps/rejected": -1.3976141214370728, - "loss": 0.7672, - "nll_loss": 1.2060695886611938, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03119443729519844, - "rewards/margins": 0.03868626803159714, - "rewards/rejected": -0.06988070905208588, - "step": 1470 - }, - { - "epoch": 1.0999254287844893, - "grad_norm": 37.44821548461914, - "learning_rate": 1.301889109808239e-06, - "log_odds_chosen": 3.469080686569214, - "log_odds_ratio": -0.14077824354171753, - "logits/chosen": 315.31524658203125, - "logits/rejected": 282.1437072753906, - "logps/chosen": -0.42284083366394043, - "logps/rejected": -2.3912270069122314, - "loss": 0.8238, - "nll_loss": 0.5950416326522827, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02114204131066799, - "rewards/margins": 0.09841931611299515, - "rewards/rejected": -0.11956135183572769, - "step": 1475 - }, - { - "epoch": 1.10365398956003, - "grad_norm": 33.69953155517578, - "learning_rate": 1.299688112275091e-06, - "log_odds_chosen": 1.8417549133300781, - "log_odds_ratio": -0.23399467766284943, - "logits/chosen": 285.3125, - "logits/rejected": 255.7438201904297, - "logps/chosen": -0.6845270991325378, - "logps/rejected": -1.7951700687408447, - "loss": 0.8799, - "nll_loss": 0.915195643901825, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03422635421156883, - "rewards/margins": 0.055532146245241165, - "rewards/rejected": -0.08975850045681, - "step": 1480 - }, - { - "epoch": 1.1073825503355705, - "grad_norm": 36.922096252441406, - "learning_rate": 1.2974982402692051e-06, - "log_odds_chosen": 1.3473453521728516, - "log_odds_ratio": -0.29881125688552856, - "logits/chosen": 209.1744384765625, - "logits/rejected": 285.01251220703125, - "logps/chosen": -1.2574278116226196, - "logps/rejected": -2.363554000854492, - "loss": 0.87, - "nll_loss": 1.2245771884918213, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.06287139654159546, - "rewards/margins": 0.055306315422058105, - "rewards/rejected": -0.11817769706249237, - "step": 1485 - }, - { - "epoch": 1.1111111111111112, - "grad_norm": 37.297576904296875, - "learning_rate": 1.2953194003770995e-06, - "log_odds_chosen": 1.8192977905273438, - "log_odds_ratio": -0.15911433100700378, - "logits/chosen": 225.7894287109375, - "logits/rejected": 289.05450439453125, - "logps/chosen": -0.5457912683486938, - "logps/rejected": -1.6060962677001953, - "loss": 0.6365, - "nll_loss": 0.7116705179214478, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02728956565260887, - "rewards/margins": 0.05301525443792343, - "rewards/rejected": -0.080304816365242, - "step": 1490 - }, - { - "epoch": 1.1148396718866518, - "grad_norm": 26.715105056762695, - "learning_rate": 1.2931515002796793e-06, - "log_odds_chosen": 1.3558660745620728, - "log_odds_ratio": -0.27653899788856506, - "logits/chosen": 264.5703125, - "logits/rejected": 239.7965087890625, - "logps/chosen": -0.48938003182411194, - "logps/rejected": -1.1548678874969482, - "loss": 0.8922, - "nll_loss": 0.9948796033859253, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024469001218676567, - "rewards/margins": 0.033274389803409576, - "rewards/rejected": -0.057743389159440994, - "step": 1495 - }, - { - "epoch": 1.1185682326621924, - "grad_norm": 38.29195785522461, - "learning_rate": 1.2909944487358056e-06, - "log_odds_chosen": 1.2164850234985352, - "log_odds_ratio": -0.41655978560447693, - "logits/chosen": 209.8057098388672, - "logits/rejected": 475.67364501953125, - "logps/chosen": -0.8595746159553528, - "logps/rejected": -1.7970449924468994, - "loss": 0.8178, - "nll_loss": 0.9244582056999207, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04297873377799988, - "rewards/margins": 0.046873513609170914, - "rewards/rejected": -0.0898522436618805, - "step": 1500 - }, - { - "epoch": 1.122296793437733, - "grad_norm": 26.110774993896484, - "learning_rate": 1.2888481555661678e-06, - "log_odds_chosen": 1.3765900135040283, - "log_odds_ratio": -0.249309703707695, - "logits/chosen": 251.5668487548828, - "logits/rejected": 235.69802856445312, - "logps/chosen": -0.3903087079524994, - "logps/rejected": -1.098527431488037, - "loss": 0.8169, - "nll_loss": 0.5044815540313721, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01951543614268303, - "rewards/margins": 0.035410936921834946, - "rewards/rejected": -0.054926373064517975, - "step": 1505 - }, - { - "epoch": 1.1260253542132737, - "grad_norm": 35.78208923339844, - "learning_rate": 1.286712531637447e-06, - "log_odds_chosen": 0.7910630106925964, - "log_odds_ratio": -0.41341787576675415, - "logits/chosen": 299.37396240234375, - "logits/rejected": 215.2410430908203, - "logps/chosen": -0.521392822265625, - "logps/rejected": -0.9545801281929016, - "loss": 0.786, - "nll_loss": 0.713547945022583, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.02606964111328125, - "rewards/margins": 0.0216593686491251, - "rewards/rejected": -0.0477290078997612, - "step": 1510 - }, - { - "epoch": 1.1297539149888143, - "grad_norm": 28.358707427978516, - "learning_rate": 1.2845874888467698e-06, - "log_odds_chosen": 1.5931919813156128, - "log_odds_ratio": -0.285575807094574, - "logits/chosen": 203.96324157714844, - "logits/rejected": 255.17227172851562, - "logps/chosen": -0.7659878134727478, - "logps/rejected": -1.563223123550415, - "loss": 0.8418, - "nll_loss": 1.0326498746871948, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03829938918352127, - "rewards/margins": 0.039861761033535004, - "rewards/rejected": -0.07816115766763687, - "step": 1515 - }, - { - "epoch": 1.133482475764355, - "grad_norm": 30.9222469329834, - "learning_rate": 1.282472940106443e-06, - "log_odds_chosen": 2.3044137954711914, - "log_odds_ratio": -0.12457819283008575, - "logits/chosen": 229.6519317626953, - "logits/rejected": 215.55520629882812, - "logps/chosen": -0.2814995348453522, - "logps/rejected": -1.3448331356048584, - "loss": 0.7358, - "nll_loss": 0.717734694480896, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014074976556003094, - "rewards/margins": 0.05316668003797531, - "rewards/rejected": -0.06724165380001068, - "step": 1520 - }, - { - "epoch": 1.1372110365398955, - "grad_norm": 34.52315902709961, - "learning_rate": 1.28036879932896e-06, - "log_odds_chosen": 0.8263221979141235, - "log_odds_ratio": -0.4929322302341461, - "logits/chosen": 262.72564697265625, - "logits/rejected": 224.12979125976562, - "logps/chosen": -0.6459177732467651, - "logps/rejected": -1.0510401725769043, - "loss": 0.8031, - "nll_loss": 0.9292188882827759, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.032295890152454376, - "rewards/margins": 0.02025611698627472, - "rewards/rejected": -0.052552007138729095, - "step": 1525 - }, - { - "epoch": 1.1409395973154361, - "grad_norm": 40.8486442565918, - "learning_rate": 1.278274981412284e-06, - "log_odds_chosen": 1.8425086736679077, - "log_odds_ratio": -0.19337260723114014, - "logits/chosen": 211.13925170898438, - "logits/rejected": 291.943115234375, - "logps/chosen": -0.4659954607486725, - "logps/rejected": -1.4438883066177368, - "loss": 0.7642, - "nll_loss": 0.7568862438201904, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023299772292375565, - "rewards/margins": 0.048894643783569336, - "rewards/rejected": -0.0721944123506546, - "step": 1530 - }, - { - "epoch": 1.144668158090977, - "grad_norm": 27.538938522338867, - "learning_rate": 1.2761914022253899e-06, - "log_odds_chosen": 1.6126947402954102, - "log_odds_ratio": -0.2636811435222626, - "logits/chosen": 359.1217346191406, - "logits/rejected": 243.5980987548828, - "logps/chosen": -0.6101836562156677, - "logps/rejected": -1.351800799369812, - "loss": 0.9562, - "nll_loss": 1.0421425104141235, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030509188771247864, - "rewards/margins": 0.037080854177474976, - "rewards/rejected": -0.06759003549814224, - "step": 1535 - }, - { - "epoch": 1.1483967188665176, - "grad_norm": 25.517148971557617, - "learning_rate": 1.2741179785940638e-06, - "log_odds_chosen": 0.9355126619338989, - "log_odds_ratio": -0.39303717017173767, - "logits/chosen": 300.48785400390625, - "logits/rejected": 254.3318328857422, - "logps/chosen": -0.613010823726654, - "logps/rejected": -1.0216959714889526, - "loss": 0.8346, - "nll_loss": 0.7891796231269836, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.030650544911623, - "rewards/margins": 0.02043425664305687, - "rewards/rejected": -0.05108479782938957, - "step": 1540 - }, - { - "epoch": 1.1521252796420582, - "grad_norm": 29.571413040161133, - "learning_rate": 1.2720546282869612e-06, - "log_odds_chosen": 2.1517910957336426, - "log_odds_ratio": -0.1594831496477127, - "logits/chosen": 268.0123291015625, - "logits/rejected": 257.90045166015625, - "logps/chosen": -0.34652072191238403, - "logps/rejected": -1.3042864799499512, - "loss": 0.8392, - "nll_loss": 0.919192910194397, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01732603646814823, - "rewards/margins": 0.04788828641176224, - "rewards/rejected": -0.06521432101726532, - "step": 1545 - }, - { - "epoch": 1.1558538404175989, - "grad_norm": 35.24119186401367, - "learning_rate": 1.270001270001905e-06, - "log_odds_chosen": 1.9776132106781006, - "log_odds_ratio": -0.13588789105415344, - "logits/chosen": 263.4161071777344, - "logits/rejected": 252.72653198242188, - "logps/chosen": -0.47479143738746643, - "logps/rejected": -1.6614391803741455, - "loss": 0.7425, - "nll_loss": 0.7486006617546082, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02373957261443138, - "rewards/margins": 0.05933239310979843, - "rewards/rejected": -0.08307196199893951, - "step": 1550 - }, - { - "epoch": 1.1595824011931395, - "grad_norm": 30.836347579956055, - "learning_rate": 1.2679578233524345e-06, - "log_odds_chosen": 1.4588923454284668, - "log_odds_ratio": -0.24270901083946228, - "logits/chosen": 270.8523864746094, - "logits/rejected": 238.3240203857422, - "logps/chosen": -0.6290242075920105, - "logps/rejected": -1.5541672706604004, - "loss": 0.9035, - "nll_loss": 0.855979323387146, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.031451210379600525, - "rewards/margins": 0.046257149428129196, - "rewards/rejected": -0.07770836353302002, - "step": 1555 - }, - { - "epoch": 1.1633109619686801, - "grad_norm": 28.928503036499023, - "learning_rate": 1.2659242088545834e-06, - "log_odds_chosen": 1.7064393758773804, - "log_odds_ratio": -0.21430940926074982, - "logits/chosen": 310.9888000488281, - "logits/rejected": 261.7429504394531, - "logps/chosen": -0.6238566040992737, - "logps/rejected": -1.506088376045227, - "loss": 0.8357, - "nll_loss": 0.9611517786979675, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.031192829832434654, - "rewards/margins": 0.044111594557762146, - "rewards/rejected": -0.07530441880226135, - "step": 1560 - }, - { - "epoch": 1.1670395227442207, - "grad_norm": 42.93767547607422, - "learning_rate": 1.2639003479138966e-06, - "log_odds_chosen": 1.6967474222183228, - "log_odds_ratio": -0.22097241878509521, - "logits/chosen": 254.5096435546875, - "logits/rejected": 262.5145263671875, - "logps/chosen": -0.4231342375278473, - "logps/rejected": -1.2029664516448975, - "loss": 0.7368, - "nll_loss": 0.7754932641983032, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.021156711503863335, - "rewards/margins": 0.03899160772562027, - "rewards/rejected": -0.060148321092128754, - "step": 1565 - }, - { - "epoch": 1.1707680835197614, - "grad_norm": 41.588382720947266, - "learning_rate": 1.261886162812672e-06, - "log_odds_chosen": 2.907371997833252, - "log_odds_ratio": -0.10257838666439056, - "logits/chosen": 280.92327880859375, - "logits/rejected": 249.91845703125, - "logps/chosen": -0.42274871468544006, - "logps/rejected": -2.295062780380249, - "loss": 0.8345, - "nll_loss": 0.8451513051986694, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.021137436851859093, - "rewards/margins": 0.09361569583415985, - "rewards/rejected": -0.11475314199924469, - "step": 1570 - }, - { - "epoch": 1.174496644295302, - "grad_norm": 34.014163970947266, - "learning_rate": 1.259881576697424e-06, - "log_odds_chosen": 2.1329522132873535, - "log_odds_ratio": -0.19243749976158142, - "logits/chosen": 242.2947998046875, - "logits/rejected": 276.654541015625, - "logps/chosen": -0.47482815384864807, - "logps/rejected": -1.7851810455322266, - "loss": 0.726, - "nll_loss": 0.5306302905082703, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023741407319903374, - "rewards/margins": 0.06551764905452728, - "rewards/rejected": -0.0892590582370758, - "step": 1575 - }, - { - "epoch": 1.1782252050708426, - "grad_norm": 33.79102325439453, - "learning_rate": 1.257886513566569e-06, - "log_odds_chosen": 1.9249389171600342, - "log_odds_ratio": -0.17345082759857178, - "logits/chosen": 338.67083740234375, - "logits/rejected": 260.0727233886719, - "logps/chosen": -0.4788905084133148, - "logps/rejected": -1.5149332284927368, - "loss": 0.7488, - "nll_loss": 0.8948395848274231, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02394452504813671, - "rewards/margins": 0.0518021285533905, - "rewards/rejected": -0.07574665546417236, - "step": 1580 - }, - { - "epoch": 1.1819537658463832, - "grad_norm": 52.3241081237793, - "learning_rate": 1.255900898258321e-06, - "log_odds_chosen": 1.3362575769424438, - "log_odds_ratio": -0.3876381814479828, - "logits/chosen": 227.29220581054688, - "logits/rejected": 246.7780303955078, - "logps/chosen": -0.7423983812332153, - "logps/rejected": -1.4572218656539917, - "loss": 0.9325, - "nll_loss": 0.9110592603683472, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.037119921296834946, - "rewards/margins": 0.0357411727309227, - "rewards/rejected": -0.07286109775304794, - "step": 1585 - }, - { - "epoch": 1.1856823266219239, - "grad_norm": 28.505565643310547, - "learning_rate": 1.253924656438798e-06, - "log_odds_chosen": 1.8215230703353882, - "log_odds_ratio": -0.17563818395137787, - "logits/chosen": 268.17529296875, - "logits/rejected": 295.59832763671875, - "logps/chosen": -0.4704774022102356, - "logps/rejected": -1.4684885740280151, - "loss": 0.7584, - "nll_loss": 0.4969747066497803, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02352387085556984, - "rewards/margins": 0.049900565296411514, - "rewards/rejected": -0.07342443615198135, - "step": 1590 - }, - { - "epoch": 1.1894108873974645, - "grad_norm": 28.067148208618164, - "learning_rate": 1.2519577145903362e-06, - "log_odds_chosen": 1.5984976291656494, - "log_odds_ratio": -0.21598438918590546, - "logits/chosen": 243.81716918945312, - "logits/rejected": 229.6437530517578, - "logps/chosen": -0.3341395854949951, - "logps/rejected": -1.0134623050689697, - "loss": 0.726, - "nll_loss": 0.5546959042549133, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016706980764865875, - "rewards/margins": 0.03396613150835037, - "rewards/rejected": -0.050673115998506546, - "step": 1595 - }, - { - "epoch": 1.1931394481730053, - "grad_norm": 52.50226974487305, - "learning_rate": 1.25e-06, - "log_odds_chosen": 2.411717414855957, - "log_odds_ratio": -0.1301671862602234, - "logits/chosen": 281.8387145996094, - "logits/rejected": 209.48873901367188, - "logps/chosen": -0.2915397584438324, - "logps/rejected": -1.2820192575454712, - "loss": 0.7986, - "nll_loss": 0.8204711675643921, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014576989226043224, - "rewards/margins": 0.0495239719748497, - "rewards/rejected": -0.0641009658575058, - "step": 1600 - }, - { - "epoch": 1.196868008948546, - "grad_norm": 24.118669509887695, - "learning_rate": 1.2480514407482947e-06, - "log_odds_chosen": 1.2159076929092407, - "log_odds_ratio": -0.33064576983451843, - "logits/chosen": 272.00518798828125, - "logits/rejected": 302.4422912597656, - "logps/chosen": -0.6316283345222473, - "logps/rejected": -1.283645749092102, - "loss": 0.7377, - "nll_loss": 0.7513858079910278, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.031581416726112366, - "rewards/margins": 0.032600872218608856, - "rewards/rejected": -0.06418228894472122, - "step": 1605 - }, - { - "epoch": 1.2005965697240866, - "grad_norm": 55.094844818115234, - "learning_rate": 1.246111965698067e-06, - "log_odds_chosen": 1.0671539306640625, - "log_odds_ratio": -0.32440799474716187, - "logits/chosen": 263.2196350097656, - "logits/rejected": 323.56622314453125, - "logps/chosen": -0.4955400824546814, - "logps/rejected": -1.045562982559204, - "loss": 0.672, - "nll_loss": 0.94267737865448, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0247770044952631, - "rewards/margins": 0.027501141652464867, - "rewards/rejected": -0.052278149873018265, - "step": 1610 - }, - { - "epoch": 1.2043251304996272, - "grad_norm": 31.53770637512207, - "learning_rate": 1.244181504483599e-06, - "log_odds_chosen": 2.1033573150634766, - "log_odds_ratio": -0.17666944861412048, - "logits/chosen": 259.40087890625, - "logits/rejected": 282.67230224609375, - "logps/chosen": -0.5153762698173523, - "logps/rejected": -1.5106281042099, - "loss": 0.8264, - "nll_loss": 0.6682819128036499, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025768812745809555, - "rewards/margins": 0.04976258799433708, - "rewards/rejected": -0.07553140819072723, - "step": 1615 - }, - { - "epoch": 1.2080536912751678, - "grad_norm": 50.90135192871094, - "learning_rate": 1.2422599874998834e-06, - "log_odds_chosen": 1.3928890228271484, - "log_odds_ratio": -0.359669029712677, - "logits/chosen": 349.53912353515625, - "logits/rejected": 224.6630096435547, - "logps/chosen": -0.9010102152824402, - "logps/rejected": -1.8240118026733398, - "loss": 0.8451, - "nll_loss": 1.0206258296966553, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04505051299929619, - "rewards/margins": 0.0461500808596611, - "rewards/rejected": -0.09120059758424759, - "step": 1620 - }, - { - "epoch": 1.2117822520507084, - "grad_norm": 40.575401306152344, - "learning_rate": 1.2403473458920848e-06, - "log_odds_chosen": 2.4568161964416504, - "log_odds_ratio": -0.10361161082983017, - "logits/chosen": 232.0025177001953, - "logits/rejected": 292.8970031738281, - "logps/chosen": -0.3771646022796631, - "logps/rejected": -1.844866156578064, - "loss": 0.909, - "nll_loss": 0.8563951253890991, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018858233466744423, - "rewards/margins": 0.0733850747346878, - "rewards/rejected": -0.09224330633878708, - "step": 1625 - }, - { - "epoch": 1.215510812826249, - "grad_norm": 27.004657745361328, - "learning_rate": 1.238443511545175e-06, - "log_odds_chosen": 2.2845654487609863, - "log_odds_ratio": -0.198068767786026, - "logits/chosen": 288.418212890625, - "logits/rejected": 226.0413055419922, - "logps/chosen": -0.5669429898262024, - "logps/rejected": -2.097766876220703, - "loss": 0.6911, - "nll_loss": 0.7302448153495789, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.028347153216600418, - "rewards/margins": 0.07654118537902832, - "rewards/rejected": -0.10488833487033844, - "step": 1630 - }, - { - "epoch": 1.2192393736017897, - "grad_norm": 23.61784553527832, - "learning_rate": 1.236548417073745e-06, - "log_odds_chosen": 0.9183662533760071, - "log_odds_ratio": -0.544979453086853, - "logits/chosen": 383.1451110839844, - "logits/rejected": 228.16470336914062, - "logps/chosen": -0.717126190662384, - "logps/rejected": -1.1680939197540283, - "loss": 0.7961, - "nll_loss": 1.0090316534042358, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03585631027817726, - "rewards/margins": 0.022548388689756393, - "rewards/rejected": -0.058404695242643356, - "step": 1635 - }, - { - "epoch": 1.2229679343773303, - "grad_norm": 28.29683494567871, - "learning_rate": 1.2346619958119873e-06, - "log_odds_chosen": 1.5689729452133179, - "log_odds_ratio": -0.2774243950843811, - "logits/chosen": 285.69384765625, - "logits/rejected": 276.6675720214844, - "logps/chosen": -0.6132264733314514, - "logps/rejected": -1.4173119068145752, - "loss": 0.791, - "nll_loss": 0.7708568572998047, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0306613240391016, - "rewards/margins": 0.04020426794886589, - "rewards/rejected": -0.07086558640003204, - "step": 1640 - }, - { - "epoch": 1.226696495152871, - "grad_norm": 36.816749572753906, - "learning_rate": 1.2327841818038448e-06, - "log_odds_chosen": 0.9588233828544617, - "log_odds_ratio": -0.3440731465816498, - "logits/chosen": 323.4227294921875, - "logits/rejected": 287.90386962890625, - "logps/chosen": -0.7170368432998657, - "logps/rejected": -1.2901442050933838, - "loss": 0.9647, - "nll_loss": 1.0675601959228516, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.035851843655109406, - "rewards/margins": 0.02865537628531456, - "rewards/rejected": -0.06450722366571426, - "step": 1645 - }, - { - "epoch": 1.2304250559284116, - "grad_norm": 28.033294677734375, - "learning_rate": 1.2309149097933274e-06, - "log_odds_chosen": 1.6305782794952393, - "log_odds_ratio": -0.21618719398975372, - "logits/chosen": 244.4318389892578, - "logits/rejected": 292.76116943359375, - "logps/chosen": -0.5877609252929688, - "logps/rejected": -1.6084457635879517, - "loss": 0.8325, - "nll_loss": 0.9270159006118774, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029388045892119408, - "rewards/margins": 0.051034241914749146, - "rewards/rejected": -0.0804222822189331, - "step": 1650 - }, - { - "epoch": 1.2341536167039522, - "grad_norm": 47.491031646728516, - "learning_rate": 1.2290541152149845e-06, - "log_odds_chosen": 0.6853604912757874, - "log_odds_ratio": -0.47329577803611755, - "logits/chosen": 235.9998016357422, - "logits/rejected": 373.9860534667969, - "logps/chosen": -0.6469461917877197, - "logps/rejected": -1.060056447982788, - "loss": 0.7647, - "nll_loss": 0.8005412817001343, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.032347310334444046, - "rewards/margins": 0.020655518397688866, - "rewards/rejected": -0.053002823144197464, - "step": 1655 - }, - { - "epoch": 1.2378821774794928, - "grad_norm": 35.13934326171875, - "learning_rate": 1.2272017341845401e-06, - "log_odds_chosen": 1.3708057403564453, - "log_odds_ratio": -0.2337532490491867, - "logits/chosen": 244.91397094726562, - "logits/rejected": 261.30194091796875, - "logps/chosen": -0.5349195599555969, - "logps/rejected": -1.3096550703048706, - "loss": 0.8911, - "nll_loss": 0.6685469746589661, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026745978742837906, - "rewards/margins": 0.038736775517463684, - "rewards/rejected": -0.06548275053501129, - "step": 1660 - }, - { - "epoch": 1.2416107382550337, - "grad_norm": 29.529573440551758, - "learning_rate": 1.2253577034896796e-06, - "log_odds_chosen": 1.3065885305404663, - "log_odds_ratio": -0.2823933959007263, - "logits/chosen": 311.48028564453125, - "logits/rejected": 279.7447814941406, - "logps/chosen": -0.3979186415672302, - "logps/rejected": -1.0789427757263184, - "loss": 0.6934, - "nll_loss": 0.6430209875106812, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01989593356847763, - "rewards/margins": 0.034051209688186646, - "rewards/rejected": -0.053947143256664276, - "step": 1665 - }, - { - "epoch": 1.2453392990305743, - "grad_norm": 26.273887634277344, - "learning_rate": 1.223521960580991e-06, - "log_odds_chosen": 1.0781400203704834, - "log_odds_ratio": -0.3933262526988983, - "logits/chosen": 276.5855712890625, - "logits/rejected": 319.51629638671875, - "logps/chosen": -0.6645928621292114, - "logps/rejected": -1.2394425868988037, - "loss": 0.8725, - "nll_loss": 0.7688542008399963, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03322964161634445, - "rewards/margins": 0.028742486611008644, - "rewards/rejected": -0.061972130089998245, - "step": 1670 - }, - { - "epoch": 1.249067859806115, - "grad_norm": 33.42168045043945, - "learning_rate": 1.2216944435630524e-06, - "log_odds_chosen": 1.136856198310852, - "log_odds_ratio": -0.3230389654636383, - "logits/chosen": 230.0050811767578, - "logits/rejected": 315.88787841796875, - "logps/chosen": -0.515019416809082, - "logps/rejected": -1.0131967067718506, - "loss": 0.7148, - "nll_loss": 0.7646769881248474, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02575097046792507, - "rewards/margins": 0.024908866733312607, - "rewards/rejected": -0.05065983533859253, - "step": 1675 - }, - { - "epoch": 1.2527964205816555, - "grad_norm": 29.746746063232422, - "learning_rate": 1.2198750911856664e-06, - "log_odds_chosen": 0.6999019384384155, - "log_odds_ratio": -0.4931694567203522, - "logits/chosen": 290.02777099609375, - "logits/rejected": 305.4759216308594, - "logps/chosen": -0.4515116214752197, - "logps/rejected": -0.8048914074897766, - "loss": 0.773, - "nll_loss": 0.7426081895828247, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.022575581446290016, - "rewards/margins": 0.017668992280960083, - "rewards/rejected": -0.04024457186460495, - "step": 1680 - }, - { - "epoch": 1.2565249813571961, - "grad_norm": 46.15593338012695, - "learning_rate": 1.2180638428352399e-06, - "log_odds_chosen": 0.9378479719161987, - "log_odds_ratio": -0.39460936188697815, - "logits/chosen": 225.0652618408203, - "logits/rejected": 314.2108459472656, - "logps/chosen": -0.7810439467430115, - "logps/rejected": -1.3490484952926636, - "loss": 0.8299, - "nll_loss": 0.9336512684822083, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03905219957232475, - "rewards/margins": 0.028400231152772903, - "rewards/rejected": -0.06745243072509766, - "step": 1685 - }, - { - "epoch": 1.2602535421327368, - "grad_norm": 42.316795349121094, - "learning_rate": 1.2162606385262997e-06, - "log_odds_chosen": 2.453591823577881, - "log_odds_ratio": -0.10359473526477814, - "logits/chosen": 237.62557983398438, - "logits/rejected": 249.3263397216797, - "logps/chosen": -0.4512161612510681, - "logps/rejected": -1.9939167499542236, - "loss": 0.8264, - "nll_loss": 0.8594792485237122, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022560808807611465, - "rewards/margins": 0.07713502645492554, - "rewards/rejected": -0.0996958389878273, - "step": 1690 - }, - { - "epoch": 1.2639821029082774, - "grad_norm": 27.914365768432617, - "learning_rate": 1.2144654188931508e-06, - "log_odds_chosen": 1.5173064470291138, - "log_odds_ratio": -0.26737576723098755, - "logits/chosen": 289.6295471191406, - "logits/rejected": 322.1310119628906, - "logps/chosen": -0.5274262428283691, - "logps/rejected": -1.274106502532959, - "loss": 0.8197, - "nll_loss": 0.7847896814346313, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.026371311396360397, - "rewards/margins": 0.03733401373028755, - "rewards/rejected": -0.06370531767606735, - "step": 1695 - }, - { - "epoch": 1.267710663683818, - "grad_norm": 35.09907531738281, - "learning_rate": 1.2126781251816649e-06, - "log_odds_chosen": 1.6821256875991821, - "log_odds_ratio": -0.2519722282886505, - "logits/chosen": 280.53155517578125, - "logits/rejected": 228.77114868164062, - "logps/chosen": -0.48582687973976135, - "logps/rejected": -1.0553961992263794, - "loss": 0.8488, - "nll_loss": 1.1800885200500488, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02429134212434292, - "rewards/margins": 0.02847846783697605, - "rewards/rejected": -0.05276980996131897, - "step": 1700 - }, - { - "epoch": 1.2714392244593586, - "grad_norm": 19.334335327148438, - "learning_rate": 1.210898699241207e-06, - "log_odds_chosen": 1.526319146156311, - "log_odds_ratio": -0.23670418560504913, - "logits/chosen": 229.595703125, - "logits/rejected": 283.2119445800781, - "logps/chosen": -0.5015705823898315, - "logps/rejected": -1.3897631168365479, - "loss": 0.6819, - "nll_loss": 0.5764104723930359, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025078529492020607, - "rewards/margins": 0.044409625232219696, - "rewards/rejected": -0.06948815286159515, - "step": 1705 - }, - { - "epoch": 1.2751677852348993, - "grad_norm": 35.654541015625, - "learning_rate": 1.2091270835166862e-06, - "log_odds_chosen": 2.6932640075683594, - "log_odds_ratio": -0.10858403146266937, - "logits/chosen": 240.3206787109375, - "logits/rejected": 268.1875, - "logps/chosen": -0.3493538498878479, - "logps/rejected": -1.7736425399780273, - "loss": 0.8655, - "nll_loss": 0.7818929553031921, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017467692494392395, - "rewards/margins": 0.07121443003416061, - "rewards/rejected": -0.0886821299791336, - "step": 1710 - }, - { - "epoch": 1.2788963460104399, - "grad_norm": 27.984630584716797, - "learning_rate": 1.207363221040738e-06, - "log_odds_chosen": 1.6145522594451904, - "log_odds_ratio": -0.21557354927062988, - "logits/chosen": 324.60479736328125, - "logits/rejected": 292.8335266113281, - "logps/chosen": -0.6127289533615112, - "logps/rejected": -1.577701210975647, - "loss": 0.7918, - "nll_loss": 0.8143022656440735, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030636444687843323, - "rewards/margins": 0.048248618841171265, - "rewards/rejected": -0.07888506352901459, - "step": 1715 - }, - { - "epoch": 1.2826249067859807, - "grad_norm": 26.091625213623047, - "learning_rate": 1.2056070554260305e-06, - "log_odds_chosen": 1.5955994129180908, - "log_odds_ratio": -0.209956556558609, - "logits/chosen": 328.44110107421875, - "logits/rejected": 257.0823669433594, - "logps/chosen": -0.5373284220695496, - "logps/rejected": -1.4606409072875977, - "loss": 0.7035, - "nll_loss": 0.6356666088104248, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026866421103477478, - "rewards/margins": 0.046165622770786285, - "rewards/rejected": -0.07303204387426376, - "step": 1720 - }, - { - "epoch": 1.2863534675615211, - "grad_norm": 28.2515926361084, - "learning_rate": 1.2038585308576922e-06, - "log_odds_chosen": 0.8119839429855347, - "log_odds_ratio": -0.4090527594089508, - "logits/chosen": 284.7693176269531, - "logits/rejected": 271.500732421875, - "logps/chosen": -0.34758928418159485, - "logps/rejected": -0.6810197830200195, - "loss": 0.6848, - "nll_loss": 0.6190077662467957, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017379464581608772, - "rewards/margins": 0.016671527177095413, - "rewards/rejected": -0.034050993621349335, - "step": 1725 - }, - { - "epoch": 1.290082028337062, - "grad_norm": 65.89623260498047, - "learning_rate": 1.2021175920858626e-06, - "log_odds_chosen": 1.5590450763702393, - "log_odds_ratio": -0.22129759192466736, - "logits/chosen": 247.5896453857422, - "logits/rejected": 256.21563720703125, - "logps/chosen": -0.4467514455318451, - "logps/rejected": -1.3464257717132568, - "loss": 0.8458, - "nll_loss": 0.6450456380844116, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022337574511766434, - "rewards/margins": 0.04498371481895447, - "rewards/rejected": -0.0673212856054306, - "step": 1730 - }, - { - "epoch": 1.2938105891126026, - "grad_norm": 32.24693298339844, - "learning_rate": 1.200384184418359e-06, - "log_odds_chosen": 1.4001598358154297, - "log_odds_ratio": -0.2528952956199646, - "logits/chosen": 291.37957763671875, - "logits/rejected": 328.18280029296875, - "logps/chosen": -0.7378822565078735, - "logps/rejected": -1.6081041097640991, - "loss": 0.8889, - "nll_loss": 0.8359991908073425, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03689411282539368, - "rewards/margins": 0.04351109638810158, - "rewards/rejected": -0.08040520548820496, - "step": 1735 - }, - { - "epoch": 1.2975391498881432, - "grad_norm": 24.03868293762207, - "learning_rate": 1.1986582537134606e-06, - "log_odds_chosen": 2.4519152641296387, - "log_odds_ratio": -0.16151727735996246, - "logits/chosen": 236.7007598876953, - "logits/rejected": 280.5276184082031, - "logps/chosen": -0.4499146044254303, - "logps/rejected": -1.6271413564682007, - "loss": 0.7675, - "nll_loss": 0.7429088354110718, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022495727986097336, - "rewards/margins": 0.05886133760213852, - "rewards/rejected": -0.08135706931352615, - "step": 1740 - }, - { - "epoch": 1.3012677106636839, - "grad_norm": 36.047813415527344, - "learning_rate": 1.1969397463728037e-06, - "log_odds_chosen": 1.228736162185669, - "log_odds_ratio": -0.32878780364990234, - "logits/chosen": 286.739501953125, - "logits/rejected": 316.8846435546875, - "logps/chosen": -0.6312834024429321, - "logps/rejected": -1.2932671308517456, - "loss": 0.723, - "nll_loss": 0.7154420018196106, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.031564172357320786, - "rewards/margins": 0.033099185675382614, - "rewards/rejected": -0.0646633580327034, - "step": 1745 - }, - { - "epoch": 1.3049962714392245, - "grad_norm": 51.89238357543945, - "learning_rate": 1.1952286093343937e-06, - "log_odds_chosen": 0.8661966323852539, - "log_odds_ratio": -0.36659854650497437, - "logits/chosen": 344.4444885253906, - "logits/rejected": 256.57574462890625, - "logps/chosen": -0.6909769773483276, - "logps/rejected": -1.2050575017929077, - "loss": 0.7458, - "nll_loss": 0.7730010151863098, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03454885259270668, - "rewards/margins": 0.025704020634293556, - "rewards/rejected": -0.06025286763906479, - "step": 1750 - }, - { - "epoch": 1.308724832214765, - "grad_norm": 42.2951545715332, - "learning_rate": 1.1935247900657217e-06, - "log_odds_chosen": 1.1772104501724243, - "log_odds_ratio": -0.31540489196777344, - "logits/chosen": 235.73464965820312, - "logits/rejected": 327.29559326171875, - "logps/chosen": -0.9950253367424011, - "logps/rejected": -1.8061898946762085, - "loss": 0.7634, - "nll_loss": 0.9182122945785522, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04975127428770065, - "rewards/margins": 0.04055823013186455, - "rewards/rejected": -0.0903095081448555, - "step": 1755 - }, - { - "epoch": 1.3124533929903057, - "grad_norm": 40.70754623413086, - "learning_rate": 1.1918282365569903e-06, - "log_odds_chosen": 1.6234588623046875, - "log_odds_ratio": -0.22873333096504211, - "logits/chosen": 223.6549835205078, - "logits/rejected": 334.7487487792969, - "logps/chosen": -0.5522328615188599, - "logps/rejected": -1.3688626289367676, - "loss": 0.9339, - "nll_loss": 0.7713028192520142, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027611643075942993, - "rewards/margins": 0.040831487625837326, - "rewards/rejected": -0.06844313442707062, - "step": 1760 - }, - { - "epoch": 1.3161819537658463, - "grad_norm": 32.01611328125, - "learning_rate": 1.1901388973144479e-06, - "log_odds_chosen": 1.2664210796356201, - "log_odds_ratio": -0.2614901065826416, - "logits/chosen": 282.37823486328125, - "logits/rejected": 248.17489624023438, - "logps/chosen": -0.49149322509765625, - "logps/rejected": -1.1985297203063965, - "loss": 0.6964, - "nll_loss": 0.634373128414154, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024574657902121544, - "rewards/margins": 0.03535183519124985, - "rewards/rejected": -0.059926487505435944, - "step": 1765 - }, - { - "epoch": 1.319910514541387, - "grad_norm": 32.484718322753906, - "learning_rate": 1.1884567213538209e-06, - "log_odds_chosen": 1.5237720012664795, - "log_odds_ratio": -0.23329560458660126, - "logits/chosen": 236.4955291748047, - "logits/rejected": 349.19146728515625, - "logps/chosen": -0.7394946217536926, - "logps/rejected": -1.825143814086914, - "loss": 0.9117, - "nll_loss": 0.911469578742981, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03697473555803299, - "rewards/margins": 0.05428246408700943, - "rewards/rejected": -0.09125719219446182, - "step": 1770 - }, - { - "epoch": 1.3236390753169276, - "grad_norm": 42.93055725097656, - "learning_rate": 1.1867816581938534e-06, - "log_odds_chosen": 1.1945196390151978, - "log_odds_ratio": -0.3477817177772522, - "logits/chosen": 275.17559814453125, - "logits/rejected": 248.1326141357422, - "logps/chosen": -0.5295892357826233, - "logps/rejected": -1.1125218868255615, - "loss": 0.7876, - "nll_loss": 0.7738931179046631, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.026479462161660194, - "rewards/margins": 0.02914663590490818, - "rewards/rejected": -0.05562610179185867, - "step": 1775 - }, - { - "epoch": 1.3273676360924682, - "grad_norm": 23.083099365234375, - "learning_rate": 1.1851136578499433e-06, - "log_odds_chosen": 1.6921672821044922, - "log_odds_ratio": -0.1929929107427597, - "logits/chosen": 242.41769409179688, - "logits/rejected": 277.5686950683594, - "logps/chosen": -0.4227360188961029, - "logps/rejected": -1.1833235025405884, - "loss": 0.7616, - "nll_loss": 0.662434995174408, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.021136801689863205, - "rewards/margins": 0.03802937641739845, - "rewards/rejected": -0.059166181832551956, - "step": 1780 - }, - { - "epoch": 1.331096196868009, - "grad_norm": 41.834842681884766, - "learning_rate": 1.1834526708278771e-06, - "log_odds_chosen": 1.4685407876968384, - "log_odds_ratio": -0.30585941672325134, - "logits/chosen": 234.87960815429688, - "logits/rejected": 249.0299835205078, - "logps/chosen": -0.35979416966438293, - "logps/rejected": -1.0891560316085815, - "loss": 0.7338, - "nll_loss": 0.7741786241531372, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.017989708110690117, - "rewards/margins": 0.03646809607744217, - "rewards/rejected": -0.05445780232548714, - "step": 1785 - }, - { - "epoch": 1.3348247576435495, - "grad_norm": 30.992603302001953, - "learning_rate": 1.181798648117664e-06, - "log_odds_chosen": 1.8716472387313843, - "log_odds_ratio": -0.14769162237644196, - "logits/chosen": 274.3726806640625, - "logits/rejected": 251.3052520751953, - "logps/chosen": -0.5990955233573914, - "logps/rejected": -1.7883827686309814, - "loss": 0.6462, - "nll_loss": 0.6897188425064087, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029954776167869568, - "rewards/margins": 0.059464357793331146, - "rewards/rejected": -0.08941914141178131, - "step": 1790 - }, - { - "epoch": 1.3385533184190903, - "grad_norm": 59.50674057006836, - "learning_rate": 1.1801515411874575e-06, - "log_odds_chosen": 1.2521061897277832, - "log_odds_ratio": -0.41858357191085815, - "logits/chosen": 230.3242950439453, - "logits/rejected": 305.7582702636719, - "logps/chosen": -0.99830162525177, - "logps/rejected": -1.876997947692871, - "loss": 0.8334, - "nll_loss": 1.0483170747756958, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04991507530212402, - "rewards/margins": 0.04393482208251953, - "rewards/rejected": -0.09384991228580475, - "step": 1795 - }, - { - "epoch": 1.342281879194631, - "grad_norm": 22.09417724609375, - "learning_rate": 1.1785113019775794e-06, - "log_odds_chosen": 0.9526540637016296, - "log_odds_ratio": -0.3622133135795593, - "logits/chosen": 261.32220458984375, - "logits/rejected": 320.20648193359375, - "logps/chosen": -0.5776183009147644, - "logps/rejected": -1.0919945240020752, - "loss": 0.7096, - "nll_loss": 0.6318497657775879, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02888091839849949, - "rewards/margins": 0.0257188081741333, - "rewards/rejected": -0.05459972098469734, - "step": 1800 - }, - { - "epoch": 1.3460104399701716, - "grad_norm": 37.82291793823242, - "learning_rate": 1.1768778828946262e-06, - "log_odds_chosen": 1.649553894996643, - "log_odds_ratio": -0.22860905528068542, - "logits/chosen": 255.7153778076172, - "logits/rejected": 267.5897216796875, - "logps/chosen": -0.48092183470726013, - "logps/rejected": -1.2795555591583252, - "loss": 0.7563, - "nll_loss": 0.9221190214157104, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024046093225479126, - "rewards/margins": 0.03993168845772743, - "rewards/rejected": -0.06397777795791626, - "step": 1805 - }, - { - "epoch": 1.3497390007457122, - "grad_norm": 42.69437789916992, - "learning_rate": 1.1752512368056712e-06, - "log_odds_chosen": 1.6391420364379883, - "log_odds_ratio": -0.29302749037742615, - "logits/chosen": 239.2467041015625, - "logits/rejected": 310.4886169433594, - "logps/chosen": -0.7727411389350891, - "logps/rejected": -1.9211339950561523, - "loss": 0.8046, - "nll_loss": 0.8246277570724487, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.038637056946754456, - "rewards/margins": 0.05741964653134346, - "rewards/rejected": -0.09605670720338821, - "step": 1810 - }, - { - "epoch": 1.3534675615212528, - "grad_norm": 34.790775299072266, - "learning_rate": 1.1736313170325507e-06, - "log_odds_chosen": 1.506897211074829, - "log_odds_ratio": -0.2511078715324402, - "logits/chosen": 295.8504333496094, - "logits/rejected": 319.1558532714844, - "logps/chosen": -0.352271169424057, - "logps/rejected": -1.0489745140075684, - "loss": 0.6859, - "nll_loss": 0.6947525143623352, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01761355809867382, - "rewards/margins": 0.034835170954465866, - "rewards/rejected": -0.05244872719049454, - "step": 1815 - }, - { - "epoch": 1.3571961222967934, - "grad_norm": 29.00199317932129, - "learning_rate": 1.1720180773462387e-06, - "log_odds_chosen": 0.5732920169830322, - "log_odds_ratio": -0.47108468413352966, - "logits/chosen": 236.2578887939453, - "logits/rejected": 323.9944763183594, - "logps/chosen": -0.4152335226535797, - "logps/rejected": -0.6816787719726562, - "loss": 0.7091, - "nll_loss": 0.8077095150947571, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.020761676132678986, - "rewards/margins": 0.013322263956069946, - "rewards/rejected": -0.03408394008874893, - "step": 1820 - }, - { - "epoch": 1.360924683072334, - "grad_norm": 41.04883575439453, - "learning_rate": 1.1704114719613058e-06, - "log_odds_chosen": 1.2468080520629883, - "log_odds_ratio": -0.3521755039691925, - "logits/chosen": 263.618896484375, - "logits/rejected": 273.8648376464844, - "logps/chosen": -0.5479956865310669, - "logps/rejected": -1.3121824264526367, - "loss": 0.8056, - "nll_loss": 0.7536294460296631, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.027399783954024315, - "rewards/margins": 0.03820933401584625, - "rewards/rejected": -0.06560911983251572, - "step": 1825 - }, - { - "epoch": 1.3646532438478747, - "grad_norm": 30.926172256469727, - "learning_rate": 1.168811455530461e-06, - "log_odds_chosen": 1.5768635272979736, - "log_odds_ratio": -0.21282634139060974, - "logits/chosen": 289.5645446777344, - "logits/rejected": 269.3079528808594, - "logps/chosen": -0.5356389284133911, - "logps/rejected": -1.4580446481704712, - "loss": 0.7041, - "nll_loss": 0.7809799909591675, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026781946420669556, - "rewards/margins": 0.0461202934384346, - "rewards/rejected": -0.07290224730968475, - "step": 1830 - }, - { - "epoch": 1.3683818046234153, - "grad_norm": 28.547027587890625, - "learning_rate": 1.1672179831391772e-06, - "log_odds_chosen": 1.848193883895874, - "log_odds_ratio": -0.20210571587085724, - "logits/chosen": 262.8218688964844, - "logits/rejected": 269.7100830078125, - "logps/chosen": -0.6101763844490051, - "logps/rejected": -1.5339915752410889, - "loss": 0.8429, - "nll_loss": 0.6229022741317749, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030508821830153465, - "rewards/margins": 0.046190761029720306, - "rewards/rejected": -0.07669957727193832, - "step": 1835 - }, - { - "epoch": 1.372110365398956, - "grad_norm": 31.988309860229492, - "learning_rate": 1.1656310103003923e-06, - "log_odds_chosen": 2.5327706336975098, - "log_odds_ratio": -0.14646422863006592, - "logits/chosen": 244.23129272460938, - "logits/rejected": 336.3633728027344, - "logps/chosen": -0.5229008197784424, - "logps/rejected": -2.138254165649414, - "loss": 0.8167, - "nll_loss": 0.7541581392288208, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02614504098892212, - "rewards/margins": 0.0807676687836647, - "rewards/rejected": -0.10691270977258682, - "step": 1840 - }, - { - "epoch": 1.3758389261744965, - "grad_norm": 28.367507934570312, - "learning_rate": 1.164050492949297e-06, - "log_odds_chosen": 0.6681721806526184, - "log_odds_ratio": -0.5429176092147827, - "logits/chosen": 257.82489013671875, - "logits/rejected": 345.48443603515625, - "logps/chosen": -0.6489526629447937, - "logps/rejected": -1.1076571941375732, - "loss": 0.9131, - "nll_loss": 0.8919655084609985, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.032447636127471924, - "rewards/margins": 0.02293522097170353, - "rewards/rejected": -0.055382855236530304, - "step": 1845 - }, - { - "epoch": 1.3795674869500374, - "grad_norm": 26.4628963470459, - "learning_rate": 1.162476387438193e-06, - "log_odds_chosen": 2.561962604522705, - "log_odds_ratio": -0.19219723343849182, - "logits/chosen": 390.63800048828125, - "logits/rejected": 281.09991455078125, - "logps/chosen": -0.282963365316391, - "logps/rejected": -1.1010653972625732, - "loss": 0.8241, - "nll_loss": 0.6437990069389343, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01414816826581955, - "rewards/margins": 0.04090510308742523, - "rewards/rejected": -0.05505327135324478, - "step": 1850 - }, - { - "epoch": 1.3832960477255778, - "grad_norm": 27.059646606445312, - "learning_rate": 1.1609086505314302e-06, - "log_odds_chosen": 2.261500835418701, - "log_odds_ratio": -0.10023969411849976, - "logits/chosen": 306.4846496582031, - "logits/rejected": 259.9902648925781, - "logps/chosen": -0.31595319509506226, - "logps/rejected": -1.4672874212265015, - "loss": 0.6105, - "nll_loss": 0.5324149131774902, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015797659754753113, - "rewards/margins": 0.057566720992326736, - "rewards/rejected": -0.07336438447237015, - "step": 1855 - }, - { - "epoch": 1.3870246085011186, - "grad_norm": 29.62752914428711, - "learning_rate": 1.1593472394004206e-06, - "log_odds_chosen": 2.192032814025879, - "log_odds_ratio": -0.18115124106407166, - "logits/chosen": 274.9383239746094, - "logits/rejected": 263.09637451171875, - "logps/chosen": -0.39317160844802856, - "logps/rejected": -1.5096148252487183, - "loss": 0.789, - "nll_loss": 0.7113686800003052, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019658580422401428, - "rewards/margins": 0.055822163820266724, - "rewards/rejected": -0.07548074424266815, - "step": 1860 - }, - { - "epoch": 1.3907531692766593, - "grad_norm": 33.17584228515625, - "learning_rate": 1.1577921116187233e-06, - "log_odds_chosen": 1.3187170028686523, - "log_odds_ratio": -0.35434672236442566, - "logits/chosen": 232.39486694335938, - "logits/rejected": 368.14044189453125, - "logps/chosen": -0.6627458333969116, - "logps/rejected": -1.4551833868026733, - "loss": 0.7265, - "nll_loss": 0.8224549293518066, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03313729539513588, - "rewards/margins": 0.039621878415346146, - "rewards/rejected": -0.07275917381048203, - "step": 1865 - }, - { - "epoch": 1.3944817300521999, - "grad_norm": 43.29597091674805, - "learning_rate": 1.1562432251572007e-06, - "log_odds_chosen": 1.8163318634033203, - "log_odds_ratio": -0.2991154193878174, - "logits/chosen": 245.46804809570312, - "logits/rejected": 326.0414733886719, - "logps/chosen": -0.9972810745239258, - "logps/rejected": -2.4638006687164307, - "loss": 0.9639, - "nll_loss": 1.276718020439148, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04986405372619629, - "rewards/margins": 0.073325976729393, - "rewards/rejected": -0.1231900304555893, - "step": 1870 - }, - { - "epoch": 1.3982102908277405, - "grad_norm": 28.351953506469727, - "learning_rate": 1.1547005383792514e-06, - "log_odds_chosen": 2.1960978507995605, - "log_odds_ratio": -0.16799786686897278, - "logits/chosen": 258.14849853515625, - "logits/rejected": 244.8041229248047, - "logps/chosen": -0.25543713569641113, - "logps/rejected": -1.3532871007919312, - "loss": 0.7053, - "nll_loss": 0.43617963790893555, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012771856971085072, - "rewards/margins": 0.05489249899983406, - "rewards/rejected": -0.06766435503959656, - "step": 1875 - }, - { - "epoch": 1.4019388516032811, - "grad_norm": 30.167278289794922, - "learning_rate": 1.1531640100361064e-06, - "log_odds_chosen": 1.5650221109390259, - "log_odds_ratio": -0.26448458433151245, - "logits/chosen": 271.53167724609375, - "logits/rejected": 326.66357421875, - "logps/chosen": -0.5006614923477173, - "logps/rejected": -1.2380117177963257, - "loss": 0.7511, - "nll_loss": 0.7399893403053284, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025033075362443924, - "rewards/margins": 0.03686751052737236, - "rewards/rejected": -0.06190057843923569, - "step": 1880 - }, - { - "epoch": 1.4056674123788218, - "grad_norm": 43.179931640625, - "learning_rate": 1.1516335992621969e-06, - "log_odds_chosen": 1.53621506690979, - "log_odds_ratio": -0.29905521869659424, - "logits/chosen": 248.8231658935547, - "logits/rejected": 303.7766418457031, - "logps/chosen": -0.37832051515579224, - "logps/rejected": -1.0599489212036133, - "loss": 0.7689, - "nll_loss": 0.7458083629608154, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01891602762043476, - "rewards/margins": 0.03408142179250717, - "rewards/rejected": -0.05299744755029678, - "step": 1885 - }, - { - "epoch": 1.4093959731543624, - "grad_norm": 30.22862434387207, - "learning_rate": 1.1501092655705905e-06, - "log_odds_chosen": 0.9890273809432983, - "log_odds_ratio": -0.39196303486824036, - "logits/chosen": 330.25323486328125, - "logits/rejected": 298.7342224121094, - "logps/chosen": -0.7178888916969299, - "logps/rejected": -1.2377245426177979, - "loss": 0.792, - "nll_loss": 0.7899801135063171, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.035894446074962616, - "rewards/margins": 0.025991782546043396, - "rewards/rejected": -0.06188622862100601, - "step": 1890 - }, - { - "epoch": 1.413124533929903, - "grad_norm": 50.4702262878418, - "learning_rate": 1.1485909688484915e-06, - "log_odds_chosen": 1.8111203908920288, - "log_odds_ratio": -0.1968632936477661, - "logits/chosen": 310.6489562988281, - "logits/rejected": 330.21759033203125, - "logps/chosen": -0.5172513723373413, - "logps/rejected": -1.5524265766143799, - "loss": 0.8291, - "nll_loss": 0.8387260437011719, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025862565264105797, - "rewards/margins": 0.051758766174316406, - "rewards/rejected": -0.07762133330106735, - "step": 1895 - }, - { - "epoch": 1.4168530947054436, - "grad_norm": 48.173824310302734, - "learning_rate": 1.1470786693528087e-06, - "log_odds_chosen": 0.675690770149231, - "log_odds_ratio": -0.5433510541915894, - "logits/chosen": 296.62615966796875, - "logits/rejected": 270.0245056152344, - "logps/chosen": -0.8586667776107788, - "logps/rejected": -1.1494834423065186, - "loss": 0.9162, - "nll_loss": 1.093515157699585, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04293334111571312, - "rewards/margins": 0.01454083900898695, - "rewards/rejected": -0.057474177330732346, - "step": 1900 - }, - { - "epoch": 1.4205816554809845, - "grad_norm": 32.232261657714844, - "learning_rate": 1.1455723277057847e-06, - "log_odds_chosen": 1.1263326406478882, - "log_odds_ratio": -0.32713255286216736, - "logits/chosen": 243.72891235351562, - "logits/rejected": 307.7520446777344, - "logps/chosen": -0.7179991006851196, - "logps/rejected": -1.3988850116729736, - "loss": 0.8692, - "nll_loss": 0.9882783889770508, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03589995577931404, - "rewards/margins": 0.0340442955493927, - "rewards/rejected": -0.06994424760341644, - "step": 1905 - }, - { - "epoch": 1.4243102162565249, - "grad_norm": 46.35112380981445, - "learning_rate": 1.144071904890689e-06, - "log_odds_chosen": 1.3164043426513672, - "log_odds_ratio": -0.3788314461708069, - "logits/chosen": 300.08758544921875, - "logits/rejected": 249.1552276611328, - "logps/chosen": -0.6237635612487793, - "logps/rejected": -1.3742353916168213, - "loss": 0.9866, - "nll_loss": 1.235195517539978, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.031188178807497025, - "rewards/margins": 0.03752359002828598, - "rewards/rejected": -0.0687117725610733, - "step": 1910 - }, - { - "epoch": 1.4280387770320657, - "grad_norm": 59.10334777832031, - "learning_rate": 1.1425773622475754e-06, - "log_odds_chosen": 1.813826322555542, - "log_odds_ratio": -0.21033641695976257, - "logits/chosen": 254.73233032226562, - "logits/rejected": 361.1746520996094, - "logps/chosen": -0.418854296207428, - "logps/rejected": -1.3537778854370117, - "loss": 0.7433, - "nll_loss": 0.6197955012321472, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02094271406531334, - "rewards/margins": 0.04674617573618889, - "rewards/rejected": -0.06768889725208282, - "step": 1915 - }, - { - "epoch": 1.4317673378076063, - "grad_norm": 29.183979034423828, - "learning_rate": 1.1410886614690962e-06, - "log_odds_chosen": 1.6801363229751587, - "log_odds_ratio": -0.19390347599983215, - "logits/chosen": 408.24853515625, - "logits/rejected": 232.81881713867188, - "logps/chosen": -0.30109384655952454, - "logps/rejected": -1.091995358467102, - "loss": 0.7381, - "nll_loss": 0.7416561841964722, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015054690651595592, - "rewards/margins": 0.03954508155584335, - "rewards/rejected": -0.05459976941347122, - "step": 1920 - }, - { - "epoch": 1.435495898583147, - "grad_norm": 59.23794174194336, - "learning_rate": 1.1396057645963796e-06, - "log_odds_chosen": 2.149590253829956, - "log_odds_ratio": -0.1566992700099945, - "logits/chosen": 320.3479919433594, - "logits/rejected": 290.57147216796875, - "logps/chosen": -0.3963201344013214, - "logps/rejected": -1.5540482997894287, - "loss": 0.8951, - "nll_loss": 0.8463636636734009, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01981600746512413, - "rewards/margins": 0.057886410504579544, - "rewards/rejected": -0.07770241796970367, - "step": 1925 - }, - { - "epoch": 1.4392244593586876, - "grad_norm": 25.69407844543457, - "learning_rate": 1.1381286340149635e-06, - "log_odds_chosen": 0.7239212393760681, - "log_odds_ratio": -0.7000375986099243, - "logits/chosen": 281.64404296875, - "logits/rejected": 349.39373779296875, - "logps/chosen": -0.6092414855957031, - "logps/rejected": -1.2273468971252441, - "loss": 0.8293, - "nll_loss": 0.8291558027267456, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.030462075024843216, - "rewards/margins": 0.03090527094900608, - "rewards/rejected": -0.06136734411120415, - "step": 1930 - }, - { - "epoch": 1.4429530201342282, - "grad_norm": 46.23264694213867, - "learning_rate": 1.1366572324507892e-06, - "log_odds_chosen": 1.0710437297821045, - "log_odds_ratio": -0.6793659329414368, - "logits/chosen": 362.2379150390625, - "logits/rejected": 229.1209716796875, - "logps/chosen": -0.3832743763923645, - "logps/rejected": -0.9978560209274292, - "loss": 0.857, - "nll_loss": 0.7869169116020203, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.019163718447089195, - "rewards/margins": 0.030729085206985474, - "rewards/rejected": -0.04989280551671982, - "step": 1935 - }, - { - "epoch": 1.4466815809097688, - "grad_norm": 27.47172737121582, - "learning_rate": 1.1351915229662496e-06, - "log_odds_chosen": 1.058064579963684, - "log_odds_ratio": -0.3427963852882385, - "logits/chosen": 338.1394348144531, - "logits/rejected": 232.3114776611328, - "logps/chosen": -0.5034564733505249, - "logps/rejected": -1.0323948860168457, - "loss": 0.812, - "nll_loss": 0.9182608723640442, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025172820314764977, - "rewards/margins": 0.026446929201483727, - "rewards/rejected": -0.0516197495162487, - "step": 1940 - }, - { - "epoch": 1.4504101416853095, - "grad_norm": 31.107166290283203, - "learning_rate": 1.1337314689562956e-06, - "log_odds_chosen": 1.7948204278945923, - "log_odds_ratio": -0.1776677966117859, - "logits/chosen": 307.9141540527344, - "logits/rejected": 281.5089111328125, - "logps/chosen": -0.39940565824508667, - "logps/rejected": -1.3574388027191162, - "loss": 0.6474, - "nll_loss": 0.5864914655685425, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019970281049609184, - "rewards/margins": 0.04790165647864342, - "rewards/rejected": -0.06787194311618805, - "step": 1945 - }, - { - "epoch": 1.45413870246085, - "grad_norm": 38.70953369140625, - "learning_rate": 1.1322770341445958e-06, - "log_odds_chosen": 1.323652982711792, - "log_odds_ratio": -0.2568439245223999, - "logits/chosen": 268.8689880371094, - "logits/rejected": 325.8135681152344, - "logps/chosen": -0.5023938417434692, - "logps/rejected": -1.1213634014129639, - "loss": 0.8374, - "nll_loss": 0.7907627820968628, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025119692087173462, - "rewards/margins": 0.03094847872853279, - "rewards/rejected": -0.05606817081570625, - "step": 1950 - }, - { - "epoch": 1.4578672632363907, - "grad_norm": 30.42875862121582, - "learning_rate": 1.1308281825797517e-06, - "log_odds_chosen": 0.6185647249221802, - "log_odds_ratio": -0.47689881920814514, - "logits/chosen": 235.9018096923828, - "logits/rejected": 260.88653564453125, - "logps/chosen": -0.7720372080802917, - "logps/rejected": -1.1144832372665405, - "loss": 0.7717, - "nll_loss": 0.9720867276191711, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.03860186040401459, - "rewards/margins": 0.017122305929660797, - "rewards/rejected": -0.055724166333675385, - "step": 1955 - }, - { - "epoch": 1.4615958240119313, - "grad_norm": 36.399417877197266, - "learning_rate": 1.1293848786315642e-06, - "log_odds_chosen": 0.7905017137527466, - "log_odds_ratio": -0.4467952251434326, - "logits/chosen": 245.4092254638672, - "logits/rejected": 257.3954162597656, - "logps/chosen": -0.8914252519607544, - "logps/rejected": -1.2062699794769287, - "loss": 0.9233, - "nll_loss": 1.1203893423080444, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.04457126557826996, - "rewards/margins": 0.015742242336273193, - "rewards/rejected": -0.06031350418925285, - "step": 1960 - }, - { - "epoch": 1.465324384787472, - "grad_norm": 41.00490951538086, - "learning_rate": 1.1279470869873539e-06, - "log_odds_chosen": 0.9932208061218262, - "log_odds_ratio": -0.37757548689842224, - "logits/chosen": 292.61138916015625, - "logits/rejected": 380.5915222167969, - "logps/chosen": -0.5649142861366272, - "logps/rejected": -1.0759313106536865, - "loss": 0.8739, - "nll_loss": 0.8407251238822937, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0282457172870636, - "rewards/margins": 0.025550847873091698, - "rewards/rejected": -0.053796567022800446, - "step": 1965 - }, - { - "epoch": 1.4690529455630128, - "grad_norm": 32.022056579589844, - "learning_rate": 1.1265147726483323e-06, - "log_odds_chosen": 0.8087921142578125, - "log_odds_ratio": -0.3946351706981659, - "logits/chosen": 322.15045166015625, - "logits/rejected": 275.70947265625, - "logps/chosen": -0.8515889048576355, - "logps/rejected": -1.3052632808685303, - "loss": 0.848, - "nll_loss": 1.064632773399353, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04257944971323013, - "rewards/margins": 0.02268371917307377, - "rewards/rejected": -0.06526316702365875, - "step": 1970 - }, - { - "epoch": 1.4727815063385532, - "grad_norm": 54.58523941040039, - "learning_rate": 1.125087900926024e-06, - "log_odds_chosen": 1.2596776485443115, - "log_odds_ratio": -0.26161423325538635, - "logits/chosen": 287.2241516113281, - "logits/rejected": 312.9441223144531, - "logps/chosen": -0.35484570264816284, - "logps/rejected": -0.8538276553153992, - "loss": 0.7674, - "nll_loss": 0.5733861923217773, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017742285504937172, - "rewards/margins": 0.024949096143245697, - "rewards/rejected": -0.04269138723611832, - "step": 1975 - }, - { - "epoch": 1.476510067114094, - "grad_norm": 33.71122360229492, - "learning_rate": 1.1236664374387369e-06, - "log_odds_chosen": 1.6564041376113892, - "log_odds_ratio": -0.22768302261829376, - "logits/chosen": 285.9105224609375, - "logits/rejected": 322.6634216308594, - "logps/chosen": -0.5396953821182251, - "logps/rejected": -1.482778787612915, - "loss": 0.8176, - "nll_loss": 0.6871273517608643, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026984769850969315, - "rewards/margins": 0.04715416952967644, - "rewards/rejected": -0.07413893193006516, - "step": 1980 - }, - { - "epoch": 1.4802386278896347, - "grad_norm": 28.664356231689453, - "learning_rate": 1.1222503481080839e-06, - "log_odds_chosen": 1.7492344379425049, - "log_odds_ratio": -0.27775874733924866, - "logits/chosen": 227.9129638671875, - "logits/rejected": 385.39202880859375, - "logps/chosen": -0.5473363995552063, - "logps/rejected": -1.3526593446731567, - "loss": 0.7442, - "nll_loss": 0.8101294636726379, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.027366820722818375, - "rewards/margins": 0.04026614874601364, - "rewards/rejected": -0.06763297319412231, - "step": 1985 - }, - { - "epoch": 1.4839671886651753, - "grad_norm": 33.91633224487305, - "learning_rate": 1.120839599155551e-06, - "log_odds_chosen": 1.0849989652633667, - "log_odds_ratio": -0.3185434341430664, - "logits/chosen": 338.0903015136719, - "logits/rejected": 316.83123779296875, - "logps/chosen": -0.7546719908714294, - "logps/rejected": -1.4624735116958618, - "loss": 0.7482, - "nll_loss": 0.8821079134941101, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03773360326886177, - "rewards/margins": 0.03539007529616356, - "rewards/rejected": -0.07312367856502533, - "step": 1990 - }, - { - "epoch": 1.487695749440716, - "grad_norm": 25.492862701416016, - "learning_rate": 1.1194341570991125e-06, - "log_odds_chosen": 1.5896761417388916, - "log_odds_ratio": -0.22764191031455994, - "logits/chosen": 313.38189697265625, - "logits/rejected": 250.6473846435547, - "logps/chosen": -0.4717642664909363, - "logps/rejected": -1.2518455982208252, - "loss": 0.7685, - "nll_loss": 0.7693471908569336, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023588214069604874, - "rewards/margins": 0.039004068821668625, - "rewards/rejected": -0.0625922828912735, - "step": 1995 - }, - { - "epoch": 1.4914243102162565, - "grad_norm": 35.26829528808594, - "learning_rate": 1.118033988749895e-06, - "log_odds_chosen": 1.8179967403411865, - "log_odds_ratio": -0.2562178075313568, - "logits/chosen": 234.9374237060547, - "logits/rejected": 295.0100402832031, - "logps/chosen": -0.1950119137763977, - "logps/rejected": -0.9596652984619141, - "loss": 0.7534, - "nll_loss": 0.6103488206863403, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.009750595316290855, - "rewards/margins": 0.03823266178369522, - "rewards/rejected": -0.047983258962631226, - "step": 2000 - }, - { - "epoch": 1.4951528709917972, - "grad_norm": 36.95999526977539, - "learning_rate": 1.1166390612088834e-06, - "log_odds_chosen": 1.5793342590332031, - "log_odds_ratio": -0.2435876876115799, - "logits/chosen": 320.1354675292969, - "logits/rejected": 235.7510528564453, - "logps/chosen": -0.5459139943122864, - "logps/rejected": -1.367599368095398, - "loss": 0.809, - "nll_loss": 0.8568132519721985, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02729569934308529, - "rewards/margins": 0.04108427092432976, - "rewards/rejected": -0.0683799684047699, - "step": 2005 - }, - { - "epoch": 1.4988814317673378, - "grad_norm": 37.972896575927734, - "learning_rate": 1.1152493418636764e-06, - "log_odds_chosen": 2.0125808715820312, - "log_odds_ratio": -0.16030338406562805, - "logits/chosen": 288.9795227050781, - "logits/rejected": 307.9769592285156, - "logps/chosen": -0.3095664381980896, - "logps/rejected": -1.2621102333068848, - "loss": 0.8006, - "nll_loss": 0.6460586190223694, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015478323213756084, - "rewards/margins": 0.04762718826532364, - "rewards/rejected": -0.063105508685112, - "step": 2010 - }, - { - "epoch": 1.5026099925428784, - "grad_norm": 65.89371490478516, - "learning_rate": 1.1138647983852827e-06, - "log_odds_chosen": 1.1287071704864502, - "log_odds_ratio": -0.39613598585128784, - "logits/chosen": 285.7611389160156, - "logits/rejected": 274.4689636230469, - "logps/chosen": -0.5075243711471558, - "logps/rejected": -1.108507513999939, - "loss": 0.9712, - "nll_loss": 1.0209357738494873, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.025376219302415848, - "rewards/margins": 0.030049163848161697, - "rewards/rejected": -0.055425383150577545, - "step": 2015 - }, - { - "epoch": 1.506338553318419, - "grad_norm": 68.79418182373047, - "learning_rate": 1.112485398724962e-06, - "log_odds_chosen": 0.8733970522880554, - "log_odds_ratio": -0.3815053105354309, - "logits/chosen": 258.07861328125, - "logits/rejected": 240.7571563720703, - "logps/chosen": -0.546515703201294, - "logps/rejected": -1.0057778358459473, - "loss": 0.7184, - "nll_loss": 0.9173129200935364, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.027325784787535667, - "rewards/margins": 0.022963106632232666, - "rewards/rejected": -0.05028889328241348, - "step": 2020 - }, - { - "epoch": 1.5100671140939599, - "grad_norm": 32.41799545288086, - "learning_rate": 1.111111111111111e-06, - "log_odds_chosen": 1.286834716796875, - "log_odds_ratio": -0.2619245648384094, - "logits/chosen": 230.1335906982422, - "logits/rejected": 287.27337646484375, - "logps/chosen": -0.6369329690933228, - "logps/rejected": -1.3609791994094849, - "loss": 0.8077, - "nll_loss": 0.7672790288925171, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03184664994478226, - "rewards/margins": 0.036202311515808105, - "rewards/rejected": -0.06804896146059036, - "step": 2025 - }, - { - "epoch": 1.5137956748695003, - "grad_norm": 29.79248046875, - "learning_rate": 1.1097419040461884e-06, - "log_odds_chosen": 1.6876888275146484, - "log_odds_ratio": -0.18691857159137726, - "logits/chosen": 258.54901123046875, - "logits/rejected": 256.8870544433594, - "logps/chosen": -0.30914106965065, - "logps/rejected": -0.9687968492507935, - "loss": 0.7907, - "nll_loss": 0.516217052936554, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015457053668797016, - "rewards/margins": 0.03298278898000717, - "rewards/rejected": -0.04843984171748161, - "step": 2030 - }, - { - "epoch": 1.5175242356450411, - "grad_norm": 31.602066040039062, - "learning_rate": 1.1083777463036816e-06, - "log_odds_chosen": 1.1109898090362549, - "log_odds_ratio": -0.291120707988739, - "logits/chosen": 335.81060791015625, - "logits/rejected": 278.8406982421875, - "logps/chosen": -0.4814034402370453, - "logps/rejected": -1.0182723999023438, - "loss": 0.7328, - "nll_loss": 0.8696242570877075, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024070175364613533, - "rewards/margins": 0.026843447238206863, - "rewards/rejected": -0.05091362074017525, - "step": 2035 - }, - { - "epoch": 1.5212527964205815, - "grad_norm": 43.18628692626953, - "learning_rate": 1.1070186069251193e-06, - "log_odds_chosen": 2.0655016899108887, - "log_odds_ratio": -0.16812577843666077, - "logits/chosen": 218.53622436523438, - "logits/rejected": 323.7992248535156, - "logps/chosen": -0.39612141251564026, - "logps/rejected": -1.3310635089874268, - "loss": 0.8751, - "nll_loss": 0.5830885171890259, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019806072115898132, - "rewards/margins": 0.046747103333473206, - "rewards/rejected": -0.06655317544937134, - "step": 2040 - }, - { - "epoch": 1.5249813571961224, - "grad_norm": 31.315994262695312, - "learning_rate": 1.1056644552171163e-06, - "log_odds_chosen": 1.4219014644622803, - "log_odds_ratio": -0.23915810883045197, - "logits/chosen": 351.3125915527344, - "logits/rejected": 230.1565399169922, - "logps/chosen": -0.6768624186515808, - "logps/rejected": -1.5607593059539795, - "loss": 0.7794, - "nll_loss": 0.7453454732894897, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03384312242269516, - "rewards/margins": 0.044194839894771576, - "rewards/rejected": -0.07803796231746674, - "step": 2045 - }, - { - "epoch": 1.5287099179716628, - "grad_norm": 18.903549194335938, - "learning_rate": 1.1043152607484655e-06, - "log_odds_chosen": 0.8176400065422058, - "log_odds_ratio": -0.44728603959083557, - "logits/chosen": 216.34323120117188, - "logits/rejected": 294.29400634765625, - "logps/chosen": -0.974872887134552, - "logps/rejected": -1.4821202754974365, - "loss": 0.7152, - "nll_loss": 1.073530912399292, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04874364286661148, - "rewards/margins": 0.025362377986311913, - "rewards/rejected": -0.07410602271556854, - "step": 2050 - }, - { - "epoch": 1.5324384787472036, - "grad_norm": 35.3013801574707, - "learning_rate": 1.1029709933472638e-06, - "log_odds_chosen": 0.6240479350090027, - "log_odds_ratio": -0.5656148791313171, - "logits/chosen": 282.98883056640625, - "logits/rejected": 297.6065979003906, - "logps/chosen": -0.7927047610282898, - "logps/rejected": -1.162345290184021, - "loss": 0.7203, - "nll_loss": 0.9205354452133179, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03963523358106613, - "rewards/margins": 0.018482033163309097, - "rewards/rejected": -0.05811727046966553, - "step": 2055 - }, - { - "epoch": 1.5361670395227442, - "grad_norm": 33.50325393676758, - "learning_rate": 1.1016316230980794e-06, - "log_odds_chosen": 1.0286537408828735, - "log_odds_ratio": -0.4473956227302551, - "logits/chosen": 259.4228515625, - "logits/rejected": 329.91473388671875, - "logps/chosen": -0.3793571889400482, - "logps/rejected": -0.9909393191337585, - "loss": 0.7435, - "nll_loss": 0.7537007331848145, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01896785944700241, - "rewards/margins": 0.030579105019569397, - "rewards/rejected": -0.04954696446657181, - "step": 2060 - }, - { - "epoch": 1.5398956002982849, - "grad_norm": 51.72113037109375, - "learning_rate": 1.100297120339154e-06, - "log_odds_chosen": 1.8136399984359741, - "log_odds_ratio": -0.28766146302223206, - "logits/chosen": 231.6405792236328, - "logits/rejected": 295.9111633300781, - "logps/chosen": -0.6584376096725464, - "logps/rejected": -1.8166630268096924, - "loss": 0.7734, - "nll_loss": 0.831320583820343, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03292187675833702, - "rewards/margins": 0.05791127681732178, - "rewards/rejected": -0.0908331573009491, - "step": 2065 - }, - { - "epoch": 1.5436241610738255, - "grad_norm": 35.22842788696289, - "learning_rate": 1.098967455659645e-06, - "log_odds_chosen": 0.7052236795425415, - "log_odds_ratio": -0.46038952469825745, - "logits/chosen": 267.4461364746094, - "logits/rejected": 253.51708984375, - "logps/chosen": -0.8432542681694031, - "logps/rejected": -1.2414722442626953, - "loss": 0.8752, - "nll_loss": 1.2147767543792725, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.04216271638870239, - "rewards/margins": 0.019910890609025955, - "rewards/rejected": -0.062073610723018646, - "step": 2070 - }, - { - "epoch": 1.5473527218493661, - "grad_norm": 37.081695556640625, - "learning_rate": 1.0976425998969036e-06, - "log_odds_chosen": 2.2167558670043945, - "log_odds_ratio": -0.16859343647956848, - "logits/chosen": 225.94784545898438, - "logits/rejected": 330.6673278808594, - "logps/chosen": -0.4822764992713928, - "logps/rejected": -1.6261937618255615, - "loss": 0.6839, - "nll_loss": 0.7189350128173828, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02411382645368576, - "rewards/margins": 0.057195864617824554, - "rewards/rejected": -0.08130969107151031, - "step": 2075 - }, - { - "epoch": 1.5510812826249067, - "grad_norm": 31.778491973876953, - "learning_rate": 1.0963225241337867e-06, - "log_odds_chosen": 1.104088544845581, - "log_odds_ratio": -0.29859620332717896, - "logits/chosen": 262.4356689453125, - "logits/rejected": 342.31982421875, - "logps/chosen": -0.758231520652771, - "logps/rejected": -1.469195008277893, - "loss": 0.7981, - "nll_loss": 0.876001238822937, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03791157528758049, - "rewards/margins": 0.03554817661643028, - "rewards/rejected": -0.07345975935459137, - "step": 2080 - }, - { - "epoch": 1.5548098434004474, - "grad_norm": 35.34234619140625, - "learning_rate": 1.0950071996960073e-06, - "log_odds_chosen": 2.7725419998168945, - "log_odds_ratio": -0.2070862054824829, - "logits/chosen": 305.04058837890625, - "logits/rejected": 270.51055908203125, - "logps/chosen": -0.33819884061813354, - "logps/rejected": -2.0783004760742188, - "loss": 0.7546, - "nll_loss": 0.677337110042572, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016909942030906677, - "rewards/margins": 0.08700508624315262, - "rewards/rejected": -0.1039150208234787, - "step": 2085 - }, - { - "epoch": 1.5585384041759882, - "grad_norm": 29.645185470581055, - "learning_rate": 1.093696598149518e-06, - "log_odds_chosen": 1.5234193801879883, - "log_odds_ratio": -0.203911691904068, - "logits/chosen": 241.8101043701172, - "logits/rejected": 383.9946594238281, - "logps/chosen": -0.5147231817245483, - "logps/rejected": -1.3972744941711426, - "loss": 0.8013, - "nll_loss": 0.6940908432006836, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025736162438988686, - "rewards/margins": 0.04412756487727165, - "rewards/rejected": -0.06986372917890549, - "step": 2090 - }, - { - "epoch": 1.5622669649515286, - "grad_norm": 24.560203552246094, - "learning_rate": 1.0923906912979294e-06, - "log_odds_chosen": 1.2990704774856567, - "log_odds_ratio": -0.34273821115493774, - "logits/chosen": 244.2469940185547, - "logits/rejected": 354.0511474609375, - "logps/chosen": -0.585131049156189, - "logps/rejected": -1.241407036781311, - "loss": 0.8062, - "nll_loss": 0.6809495091438293, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.029256552457809448, - "rewards/margins": 0.032813798636198044, - "rewards/rejected": -0.06207035854458809, - "step": 2095 - }, - { - "epoch": 1.5659955257270695, - "grad_norm": 44.75265884399414, - "learning_rate": 1.091089451179962e-06, - "log_odds_chosen": 1.2979768514633179, - "log_odds_ratio": -0.24608448147773743, - "logits/chosen": 257.48663330078125, - "logits/rejected": 282.4682312011719, - "logps/chosen": -0.6524869203567505, - "logps/rejected": -1.4456067085266113, - "loss": 0.7177, - "nll_loss": 0.7115803956985474, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03262434899806976, - "rewards/margins": 0.03965599089860916, - "rewards/rejected": -0.07228033989667892, - "step": 2100 - }, - { - "epoch": 1.5697240865026099, - "grad_norm": 29.779512405395508, - "learning_rate": 1.0897928500669322e-06, - "log_odds_chosen": 1.904710054397583, - "log_odds_ratio": -0.26888972520828247, - "logits/chosen": 271.5237121582031, - "logits/rejected": 230.94589233398438, - "logps/chosen": -0.4675252437591553, - "logps/rejected": -1.6127923727035522, - "loss": 0.7141, - "nll_loss": 0.7437263131141663, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023376263678073883, - "rewards/margins": 0.05726335197687149, - "rewards/rejected": -0.08063961565494537, - "step": 2105 - }, - { - "epoch": 1.5734526472781507, - "grad_norm": 37.24168395996094, - "learning_rate": 1.0885008604602703e-06, - "log_odds_chosen": 1.9846556186676025, - "log_odds_ratio": -0.19076094031333923, - "logits/chosen": 337.10516357421875, - "logits/rejected": 232.5842742919922, - "logps/chosen": -0.2807057201862335, - "logps/rejected": -0.9922233819961548, - "loss": 0.9029, - "nll_loss": 0.8385713696479797, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014035286381840706, - "rewards/margins": 0.035575877875089645, - "rewards/rejected": -0.0496111698448658, - "step": 2110 - }, - { - "epoch": 1.5771812080536913, - "grad_norm": 20.96354866027832, - "learning_rate": 1.0872134550890703e-06, - "log_odds_chosen": 2.2109742164611816, - "log_odds_ratio": -0.17065270245075226, - "logits/chosen": 359.87103271484375, - "logits/rejected": 263.1868896484375, - "logps/chosen": -0.37477824091911316, - "logps/rejected": -1.4987995624542236, - "loss": 0.7587, - "nll_loss": 0.6580853462219238, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018738912418484688, - "rewards/margins": 0.056201063096523285, - "rewards/rejected": -0.07493996620178223, - "step": 2115 - }, - { - "epoch": 1.580909768829232, - "grad_norm": 29.629714965820312, - "learning_rate": 1.0859306069076736e-06, - "log_odds_chosen": 1.4747037887573242, - "log_odds_ratio": -0.24402527511119843, - "logits/chosen": 252.5889434814453, - "logits/rejected": 270.71044921875, - "logps/chosen": -0.4646952152252197, - "logps/rejected": -1.2191599607467651, - "loss": 0.7438, - "nll_loss": 0.5707162022590637, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023234760388731956, - "rewards/margins": 0.03772324323654175, - "rewards/rejected": -0.060958005487918854, - "step": 2120 - }, - { - "epoch": 1.5846383296047726, - "grad_norm": 39.636173248291016, - "learning_rate": 1.084652289093281e-06, - "log_odds_chosen": 1.207501769065857, - "log_odds_ratio": -0.3173540532588959, - "logits/chosen": 273.82684326171875, - "logits/rejected": 323.8694152832031, - "logps/chosen": -0.6688281893730164, - "logps/rejected": -1.4689304828643799, - "loss": 0.7937, - "nll_loss": 0.7868322134017944, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03344140946865082, - "rewards/margins": 0.04000511020421982, - "rewards/rejected": -0.07344651967287064, - "step": 2125 - }, - { - "epoch": 1.5883668903803132, - "grad_norm": 32.797786712646484, - "learning_rate": 1.083378475043599e-06, - "log_odds_chosen": 1.4022068977355957, - "log_odds_ratio": -0.31950122117996216, - "logits/chosen": 296.51611328125, - "logits/rejected": 315.21453857421875, - "logps/chosen": -0.47831106185913086, - "logps/rejected": -1.1761341094970703, - "loss": 0.7969, - "nll_loss": 0.8280470967292786, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023915555328130722, - "rewards/margins": 0.03489115834236145, - "rewards/rejected": -0.05880671739578247, - "step": 2130 - }, - { - "epoch": 1.5920954511558538, - "grad_norm": 41.72588348388672, - "learning_rate": 1.0821091383745125e-06, - "log_odds_chosen": 1.1766207218170166, - "log_odds_ratio": -0.29251858592033386, - "logits/chosen": 325.98712158203125, - "logits/rejected": 295.76727294921875, - "logps/chosen": -0.5880266427993774, - "logps/rejected": -1.1857539415359497, - "loss": 0.7138, - "nll_loss": 0.736579179763794, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029401332139968872, - "rewards/margins": 0.029886364936828613, - "rewards/rejected": -0.059287697076797485, - "step": 2135 - }, - { - "epoch": 1.5958240119313944, - "grad_norm": 31.581787109375, - "learning_rate": 1.0808442529177925e-06, - "log_odds_chosen": 1.7339025735855103, - "log_odds_ratio": -0.22807173430919647, - "logits/chosen": 235.99472045898438, - "logits/rejected": 245.6271514892578, - "logps/chosen": -0.4243757128715515, - "logps/rejected": -1.3414266109466553, - "loss": 0.6748, - "nll_loss": 0.8026620745658875, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.021218786016106606, - "rewards/margins": 0.04585254192352295, - "rewards/rejected": -0.067071333527565, - "step": 2140 - }, - { - "epoch": 1.599552572706935, - "grad_norm": 36.91671371459961, - "learning_rate": 1.0795837927188263e-06, - "log_odds_chosen": 2.544846296310425, - "log_odds_ratio": -0.12393288314342499, - "logits/chosen": 262.7583312988281, - "logits/rejected": 227.25296020507812, - "logps/chosen": -0.38296079635620117, - "logps/rejected": -1.7017844915390015, - "loss": 0.847, - "nll_loss": 0.8456487655639648, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01914804056286812, - "rewards/margins": 0.06594119220972061, - "rewards/rejected": -0.08508922904729843, - "step": 2145 - }, - { - "epoch": 1.6032811334824757, - "grad_norm": 37.5830078125, - "learning_rate": 1.0783277320343842e-06, - "log_odds_chosen": 1.7595809698104858, - "log_odds_ratio": -0.20227794349193573, - "logits/chosen": 280.80010986328125, - "logits/rejected": 277.4466857910156, - "logps/chosen": -0.53038489818573, - "logps/rejected": -1.6522629261016846, - "loss": 0.7063, - "nll_loss": 0.653715193271637, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02651924453675747, - "rewards/margins": 0.056093912571668625, - "rewards/rejected": -0.08261314779520035, - "step": 2150 - }, - { - "epoch": 1.6070096942580165, - "grad_norm": 29.117944717407227, - "learning_rate": 1.0770760453304094e-06, - "log_odds_chosen": 1.5606470108032227, - "log_odds_ratio": -0.2243514508008957, - "logits/chosen": 235.1315460205078, - "logits/rejected": 249.7449188232422, - "logps/chosen": -0.5522037744522095, - "logps/rejected": -1.3416163921356201, - "loss": 0.9441, - "nll_loss": 1.3872045278549194, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027610192075371742, - "rewards/margins": 0.039470624178647995, - "rewards/rejected": -0.06708081811666489, - "step": 2155 - }, - { - "epoch": 1.610738255033557, - "grad_norm": 32.56808090209961, - "learning_rate": 1.075828707279838e-06, - "log_odds_chosen": 1.9911972284317017, - "log_odds_ratio": -0.15757043659687042, - "logits/chosen": 309.86517333984375, - "logits/rejected": 254.0618896484375, - "logps/chosen": -0.3107526898384094, - "logps/rejected": -1.300750970840454, - "loss": 0.9245, - "nll_loss": 0.7740663886070251, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015537634491920471, - "rewards/margins": 0.04949990659952164, - "rewards/rejected": -0.06503754109144211, - "step": 2160 - }, - { - "epoch": 1.6144668158090978, - "grad_norm": 36.27479934692383, - "learning_rate": 1.0745856927604474e-06, - "log_odds_chosen": 1.4359018802642822, - "log_odds_ratio": -0.23699648678302765, - "logits/chosen": 244.4740753173828, - "logits/rejected": 239.9732208251953, - "logps/chosen": -0.4890363812446594, - "logps/rejected": -1.3122421503067017, - "loss": 0.8576, - "nll_loss": 0.6015297174453735, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024451816454529762, - "rewards/margins": 0.04116028919816017, - "rewards/rejected": -0.06561211496591568, - "step": 2165 - }, - { - "epoch": 1.6181953765846382, - "grad_norm": 23.645828247070312, - "learning_rate": 1.0733469768527298e-06, - "log_odds_chosen": 0.8346256017684937, - "log_odds_ratio": -0.4226621687412262, - "logits/chosen": 264.03125, - "logits/rejected": 271.36175537109375, - "logps/chosen": -0.641783595085144, - "logps/rejected": -1.1595213413238525, - "loss": 0.8842, - "nll_loss": 0.8658092617988586, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03208918124437332, - "rewards/margins": 0.025886883959174156, - "rewards/rejected": -0.05797606706619263, - "step": 2170 - }, - { - "epoch": 1.621923937360179, - "grad_norm": 31.07721710205078, - "learning_rate": 1.0721125348377948e-06, - "log_odds_chosen": 1.6434698104858398, - "log_odds_ratio": -0.22117725014686584, - "logits/chosen": 349.50567626953125, - "logits/rejected": 235.7578125, - "logps/chosen": -0.5138940811157227, - "logps/rejected": -1.2052644491195679, - "loss": 0.7943, - "nll_loss": 0.7714478373527527, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.025694703683257103, - "rewards/margins": 0.03456852585077286, - "rewards/rejected": -0.06026322767138481, - "step": 2175 - }, - { - "epoch": 1.6256524981357197, - "grad_norm": 33.44121551513672, - "learning_rate": 1.0708823421952984e-06, - "log_odds_chosen": 1.2164733409881592, - "log_odds_ratio": -0.306210994720459, - "logits/chosen": 224.77566528320312, - "logits/rejected": 281.5746765136719, - "logps/chosen": -0.4834056794643402, - "logps/rejected": -1.1493103504180908, - "loss": 0.7838, - "nll_loss": 0.5859417915344238, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02417028322815895, - "rewards/margins": 0.03329523652791977, - "rewards/rejected": -0.05746551603078842, - "step": 2180 - }, - { - "epoch": 1.6293810589112603, - "grad_norm": 28.834686279296875, - "learning_rate": 1.0696563746013951e-06, - "log_odds_chosen": 1.1053838729858398, - "log_odds_ratio": -0.3232475817203522, - "logits/chosen": 284.88372802734375, - "logits/rejected": 268.83868408203125, - "logps/chosen": -0.3062482476234436, - "logps/rejected": -0.824844241142273, - "loss": 0.7404, - "nll_loss": 0.6026932001113892, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01531241275370121, - "rewards/margins": 0.025929799303412437, - "rewards/rejected": -0.041242215782403946, - "step": 2185 - }, - { - "epoch": 1.633109619686801, - "grad_norm": 31.15171241760254, - "learning_rate": 1.0684346079267208e-06, - "log_odds_chosen": 1.9964755773544312, - "log_odds_ratio": -0.1456063687801361, - "logits/chosen": 246.9029541015625, - "logits/rejected": 365.8131408691406, - "logps/chosen": -0.3451932370662689, - "logps/rejected": -1.362628698348999, - "loss": 0.7229, - "nll_loss": 0.5294897556304932, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017259661108255386, - "rewards/margins": 0.05087178200483322, - "rewards/rejected": -0.0681314468383789, - "step": 2190 - }, - { - "epoch": 1.6368381804623415, - "grad_norm": 29.215320587158203, - "learning_rate": 1.0672170182343944e-06, - "log_odds_chosen": 1.2299872636795044, - "log_odds_ratio": -0.34041067957878113, - "logits/chosen": 311.1549987792969, - "logits/rejected": 251.7224884033203, - "logps/chosen": -0.45255494117736816, - "logps/rejected": -1.0221174955368042, - "loss": 0.8578, - "nll_loss": 1.1535117626190186, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.022627748548984528, - "rewards/margins": 0.028478126972913742, - "rewards/rejected": -0.05110587552189827, - "step": 2195 - }, - { - "epoch": 1.6405667412378822, - "grad_norm": 102.62886047363281, - "learning_rate": 1.066003581778052e-06, - "log_odds_chosen": 2.2655184268951416, - "log_odds_ratio": -0.20223557949066162, - "logits/chosen": 308.0247802734375, - "logits/rejected": 289.7030944824219, - "logps/chosen": -0.36854150891304016, - "logps/rejected": -1.5121017694473267, - "loss": 0.729, - "nll_loss": 0.5330934524536133, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018427077680826187, - "rewards/margins": 0.057178013026714325, - "rewards/rejected": -0.07560509443283081, - "step": 2200 - }, - { - "epoch": 1.6442953020134228, - "grad_norm": 53.24579620361328, - "learning_rate": 1.0647942749998999e-06, - "log_odds_chosen": 1.2364635467529297, - "log_odds_ratio": -0.29979556798934937, - "logits/chosen": 271.5903625488281, - "logits/rejected": 243.54150390625, - "logps/chosen": -0.3408764600753784, - "logps/rejected": -0.9431129693984985, - "loss": 0.8529, - "nll_loss": 0.8009014129638672, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0170438252389431, - "rewards/margins": 0.030111823230981827, - "rewards/rejected": -0.04715564846992493, - "step": 2205 - }, - { - "epoch": 1.6480238627889636, - "grad_norm": 24.219669342041016, - "learning_rate": 1.0635890745287928e-06, - "log_odds_chosen": 1.7963542938232422, - "log_odds_ratio": -0.21773004531860352, - "logits/chosen": 319.0221252441406, - "logits/rejected": 287.6115417480469, - "logps/chosen": -0.33851131796836853, - "logps/rejected": -1.1391186714172363, - "loss": 0.6733, - "nll_loss": 0.6551445126533508, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016925565898418427, - "rewards/margins": 0.04003036394715309, - "rewards/rejected": -0.056955933570861816, - "step": 2210 - }, - { - "epoch": 1.651752423564504, - "grad_norm": 71.611328125, - "learning_rate": 1.0623879571783382e-06, - "log_odds_chosen": 0.6888846158981323, - "log_odds_ratio": -0.4963061809539795, - "logits/chosen": 288.99658203125, - "logits/rejected": 229.61874389648438, - "logps/chosen": -1.0426151752471924, - "logps/rejected": -1.4094139337539673, - "loss": 0.8587, - "nll_loss": 1.005955457687378, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05213075876235962, - "rewards/margins": 0.018339937552809715, - "rewards/rejected": -0.07047069072723389, - "step": 2215 - }, - { - "epoch": 1.6554809843400449, - "grad_norm": 45.586490631103516, - "learning_rate": 1.0611908999450224e-06, - "log_odds_chosen": 2.0382678508758545, - "log_odds_ratio": -0.2408449947834015, - "logits/chosen": 347.50482177734375, - "logits/rejected": 337.23687744140625, - "logps/chosen": -0.40741243958473206, - "logps/rejected": -1.6915562152862549, - "loss": 0.8573, - "nll_loss": 0.9340305328369141, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020370621234178543, - "rewards/margins": 0.06420718133449554, - "rewards/rejected": -0.08457780629396439, - "step": 2220 - }, - { - "epoch": 1.6592095451155853, - "grad_norm": 22.511913299560547, - "learning_rate": 1.0599978800063601e-06, - "log_odds_chosen": 1.3330711126327515, - "log_odds_ratio": -0.2973523736000061, - "logits/chosen": 282.751708984375, - "logits/rejected": 317.66302490234375, - "logps/chosen": -0.8652299642562866, - "logps/rejected": -1.6499191522598267, - "loss": 0.8947, - "nll_loss": 1.1454050540924072, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04326149821281433, - "rewards/margins": 0.0392344631254673, - "rewards/rejected": -0.08249595761299133, - "step": 2225 - }, - { - "epoch": 1.6629381058911261, - "grad_norm": 29.339988708496094, - "learning_rate": 1.058808874719067e-06, - "log_odds_chosen": 1.5107152462005615, - "log_odds_ratio": -0.25294479727745056, - "logits/chosen": 339.2851257324219, - "logits/rejected": 234.14334106445312, - "logps/chosen": -0.6838433146476746, - "logps/rejected": -1.6488145589828491, - "loss": 0.7138, - "nll_loss": 0.7522317171096802, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03419216722249985, - "rewards/margins": 0.048248566687107086, - "rewards/rejected": -0.08244072645902634, - "step": 2230 - }, - { - "epoch": 1.6666666666666665, - "grad_norm": 29.70168685913086, - "learning_rate": 1.057623861617254e-06, - "log_odds_chosen": 1.1427756547927856, - "log_odds_ratio": -0.5265284776687622, - "logits/chosen": 227.7393035888672, - "logits/rejected": 364.26092529296875, - "logps/chosen": -0.31302934885025024, - "logps/rejected": -0.8783591985702515, - "loss": 0.7218, - "nll_loss": 0.5387293696403503, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01565147005021572, - "rewards/margins": 0.02826649323105812, - "rewards/rejected": -0.04391796141862869, - "step": 2235 - }, - { - "epoch": 1.6703952274422074, - "grad_norm": 27.187803268432617, - "learning_rate": 1.0564428184106459e-06, - "log_odds_chosen": 2.14603853225708, - "log_odds_ratio": -0.14045679569244385, - "logits/chosen": 243.7932891845703, - "logits/rejected": 317.6813049316406, - "logps/chosen": -0.27092471718788147, - "logps/rejected": -1.323062539100647, - "loss": 0.7533, - "nll_loss": 0.5632174611091614, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013546235859394073, - "rewards/margins": 0.052606888115406036, - "rewards/rejected": -0.06615312397480011, - "step": 2240 - }, - { - "epoch": 1.674123788217748, - "grad_norm": 21.136409759521484, - "learning_rate": 1.0552657229828183e-06, - "log_odds_chosen": 1.5660834312438965, - "log_odds_ratio": -0.20219247043132782, - "logits/chosen": 274.67572021484375, - "logits/rejected": 394.61566162109375, - "logps/chosen": -0.524872899055481, - "logps/rejected": -1.4118573665618896, - "loss": 0.749, - "nll_loss": 0.7359970808029175, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026243645697832108, - "rewards/margins": 0.044349219650030136, - "rewards/rejected": -0.07059286534786224, - "step": 2245 - }, - { - "epoch": 1.6778523489932886, - "grad_norm": 35.24284362792969, - "learning_rate": 1.0540925533894598e-06, - "log_odds_chosen": 1.991207480430603, - "log_odds_ratio": -0.17093271017074585, - "logits/chosen": 265.2437438964844, - "logits/rejected": 267.25506591796875, - "logps/chosen": -0.22901038825511932, - "logps/rejected": -1.0250616073608398, - "loss": 0.725, - "nll_loss": 0.5921927094459534, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011450518853962421, - "rewards/margins": 0.039802562445402145, - "rewards/rejected": -0.051253072917461395, - "step": 2250 - }, - { - "epoch": 1.6815809097688292, - "grad_norm": 44.20802307128906, - "learning_rate": 1.0529232878566533e-06, - "log_odds_chosen": 1.456525444984436, - "log_odds_ratio": -0.21252572536468506, - "logits/chosen": 269.14532470703125, - "logits/rejected": 386.76373291015625, - "logps/chosen": -0.48179349303245544, - "logps/rejected": -1.2953091859817505, - "loss": 0.7782, - "nll_loss": 0.7095552086830139, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024089675396680832, - "rewards/margins": 0.04067578166723251, - "rewards/rejected": -0.06476546078920364, - "step": 2255 - }, - { - "epoch": 1.6853094705443699, - "grad_norm": 27.44753074645996, - "learning_rate": 1.0517579047791782e-06, - "log_odds_chosen": 1.8649475574493408, - "log_odds_ratio": -0.17305569350719452, - "logits/chosen": 290.28131103515625, - "logits/rejected": 323.7326965332031, - "logps/chosen": -0.5407212972640991, - "logps/rejected": -1.6266025304794312, - "loss": 0.7938, - "nll_loss": 0.6335040330886841, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027036065235733986, - "rewards/margins": 0.05429406091570854, - "rewards/rejected": -0.08133012801408768, - "step": 2260 - }, - { - "epoch": 1.6890380313199105, - "grad_norm": 26.230438232421875, - "learning_rate": 1.050596382718834e-06, - "log_odds_chosen": 0.5695899128913879, - "log_odds_ratio": -0.4868980348110199, - "logits/chosen": 296.47265625, - "logits/rejected": 342.98675537109375, - "logps/chosen": -0.44589918851852417, - "logps/rejected": -0.7740705609321594, - "loss": 0.8626, - "nll_loss": 0.6755470037460327, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.022294962778687477, - "rewards/margins": 0.016408566385507584, - "rewards/rejected": -0.03870353102684021, - "step": 2265 - }, - { - "epoch": 1.692766592095451, - "grad_norm": 32.49907302856445, - "learning_rate": 1.049438700402784e-06, - "log_odds_chosen": 1.0417789220809937, - "log_odds_ratio": -0.37776580452919006, - "logits/chosen": 316.171142578125, - "logits/rejected": 282.19830322265625, - "logps/chosen": -0.7012656927108765, - "logps/rejected": -1.42291259765625, - "loss": 0.757, - "nll_loss": 0.858772873878479, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03506328910589218, - "rewards/margins": 0.03608234226703644, - "rewards/rejected": -0.07114563137292862, - "step": 2270 - }, - { - "epoch": 1.696495152870992, - "grad_norm": 30.055395126342773, - "learning_rate": 1.0482848367219184e-06, - "log_odds_chosen": 2.239901065826416, - "log_odds_ratio": -0.13538427650928497, - "logits/chosen": 270.7428283691406, - "logits/rejected": 245.0014190673828, - "logps/chosen": -0.46736520528793335, - "logps/rejected": -1.7247527837753296, - "loss": 0.8141, - "nll_loss": 0.8050268292427063, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023368259891867638, - "rewards/margins": 0.06286938488483429, - "rewards/rejected": -0.08623764663934708, - "step": 2275 - }, - { - "epoch": 1.7002237136465324, - "grad_norm": 30.22320556640625, - "learning_rate": 1.0471347707292389e-06, - "log_odds_chosen": 2.084455966949463, - "log_odds_ratio": -0.16915003955364227, - "logits/chosen": 283.88116455078125, - "logits/rejected": 276.312255859375, - "logps/chosen": -0.407015323638916, - "logps/rejected": -1.264013648033142, - "loss": 0.7253, - "nll_loss": 0.5246295928955078, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02035076916217804, - "rewards/margins": 0.042849913239479065, - "rewards/rejected": -0.0632006824016571, - "step": 2280 - }, - { - "epoch": 1.7039522744220732, - "grad_norm": 31.662309646606445, - "learning_rate": 1.04598848163826e-06, - "log_odds_chosen": 2.118842601776123, - "log_odds_ratio": -0.12763576209545135, - "logits/chosen": 322.88543701171875, - "logits/rejected": 236.43722534179688, - "logps/chosen": -0.3121485114097595, - "logps/rejected": -1.358607530593872, - "loss": 0.6759, - "nll_loss": 0.6424211263656616, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01560742687433958, - "rewards/margins": 0.05232294648885727, - "rewards/rejected": -0.06793037801980972, - "step": 2285 - }, - { - "epoch": 1.7076808351976136, - "grad_norm": 35.000606536865234, - "learning_rate": 1.0448459488214322e-06, - "log_odds_chosen": 1.656010627746582, - "log_odds_ratio": -0.2323346883058548, - "logits/chosen": 226.5392608642578, - "logits/rejected": 343.56005859375, - "logps/chosen": -0.6595849990844727, - "logps/rejected": -1.7632564306259155, - "loss": 0.8784, - "nll_loss": 0.7814360857009888, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03297924995422363, - "rewards/margins": 0.055183570832014084, - "rewards/rejected": -0.08816282451152802, - "step": 2290 - }, - { - "epoch": 1.7114093959731544, - "grad_norm": 42.3331298828125, - "learning_rate": 1.0437071518085826e-06, - "log_odds_chosen": 2.278735399246216, - "log_odds_ratio": -0.2363802194595337, - "logits/chosen": 219.30514526367188, - "logits/rejected": 307.7556457519531, - "logps/chosen": -0.8794175982475281, - "logps/rejected": -2.7193691730499268, - "loss": 0.8859, - "nll_loss": 0.8573682904243469, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.043970879167318344, - "rewards/margins": 0.09199757874011993, - "rewards/rejected": -0.13596846163272858, - "step": 2295 - }, - { - "epoch": 1.7151379567486948, - "grad_norm": 29.715654373168945, - "learning_rate": 1.042572070285374e-06, - "log_odds_chosen": 1.6445808410644531, - "log_odds_ratio": -0.20917654037475586, - "logits/chosen": 373.67364501953125, - "logits/rejected": 262.20782470703125, - "logps/chosen": -0.616637647151947, - "logps/rejected": -1.6696659326553345, - "loss": 0.7245, - "nll_loss": 0.6646779775619507, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03083188459277153, - "rewards/margins": 0.05265141651034355, - "rewards/rejected": -0.08348329365253448, - "step": 2300 - }, - { - "epoch": 1.7188665175242357, - "grad_norm": 43.40738296508789, - "learning_rate": 1.0414406840917835e-06, - "log_odds_chosen": 0.7470336556434631, - "log_odds_ratio": -0.4539114832878113, - "logits/chosen": 259.4789733886719, - "logits/rejected": 320.74505615234375, - "logps/chosen": -0.6084359288215637, - "logps/rejected": -1.0427262783050537, - "loss": 0.7254, - "nll_loss": 0.7061823010444641, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.030421797186136246, - "rewards/margins": 0.02171451970934868, - "rewards/rejected": -0.052136313170194626, - "step": 2305 - }, - { - "epoch": 1.7225950782997763, - "grad_norm": 43.514102935791016, - "learning_rate": 1.0403129732205989e-06, - "log_odds_chosen": 1.3236333131790161, - "log_odds_ratio": -0.34251320362091064, - "logits/chosen": 297.2799377441406, - "logits/rejected": 303.672119140625, - "logps/chosen": -0.6428049206733704, - "logps/rejected": -1.3327915668487549, - "loss": 0.8979, - "nll_loss": 1.0040816068649292, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03214024752378464, - "rewards/margins": 0.03449932485818863, - "rewards/rejected": -0.06663957983255386, - "step": 2310 - }, - { - "epoch": 1.726323639075317, - "grad_norm": 20.143667221069336, - "learning_rate": 1.0391889178159317e-06, - "log_odds_chosen": 1.8761428594589233, - "log_odds_ratio": -0.16749688982963562, - "logits/chosen": 253.75912475585938, - "logits/rejected": 414.29931640625, - "logps/chosen": -0.5082489252090454, - "logps/rejected": -1.6653759479522705, - "loss": 0.7028, - "nll_loss": 0.6099356412887573, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02541244961321354, - "rewards/margins": 0.05785634368658066, - "rewards/rejected": -0.08326879143714905, - "step": 2315 - }, - { - "epoch": 1.7300521998508576, - "grad_norm": 72.17495727539062, - "learning_rate": 1.0380684981717496e-06, - "log_odds_chosen": 1.4154114723205566, - "log_odds_ratio": -0.24966521561145782, - "logits/chosen": 302.89337158203125, - "logits/rejected": 232.43661499023438, - "logps/chosen": -0.5495892763137817, - "logps/rejected": -1.3912850618362427, - "loss": 0.8476, - "nll_loss": 0.6898863911628723, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027479464188218117, - "rewards/margins": 0.042084790766239166, - "rewards/rejected": -0.06956425309181213, - "step": 2320 - }, - { - "epoch": 1.7337807606263982, - "grad_norm": 27.25650978088379, - "learning_rate": 1.0369516947304254e-06, - "log_odds_chosen": 1.4768553972244263, - "log_odds_ratio": -0.2644811272621155, - "logits/chosen": 235.1833953857422, - "logits/rejected": 389.1472473144531, - "logps/chosen": -0.4813452363014221, - "logps/rejected": -1.1004750728607178, - "loss": 0.7095, - "nll_loss": 0.6610730886459351, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024067262187600136, - "rewards/margins": 0.030956488102674484, - "rewards/rejected": -0.05502375215291977, - "step": 2325 - }, - { - "epoch": 1.7375093214019388, - "grad_norm": 25.948041915893555, - "learning_rate": 1.0358384880813022e-06, - "log_odds_chosen": 0.9532462358474731, - "log_odds_ratio": -0.35905149579048157, - "logits/chosen": 261.0428466796875, - "logits/rejected": 301.4358825683594, - "logps/chosen": -0.6915465593338013, - "logps/rejected": -1.262245535850525, - "loss": 0.7547, - "nll_loss": 0.7650719285011292, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03457732871174812, - "rewards/margins": 0.02853494882583618, - "rewards/rejected": -0.063112273812294, - "step": 2330 - }, - { - "epoch": 1.7412378821774794, - "grad_norm": 25.59049415588379, - "learning_rate": 1.0347288589592778e-06, - "log_odds_chosen": 1.745337724685669, - "log_odds_ratio": -0.21074166893959045, - "logits/chosen": 245.003662109375, - "logits/rejected": 316.6591491699219, - "logps/chosen": -0.42334070801734924, - "logps/rejected": -1.4008102416992188, - "loss": 0.8021, - "nll_loss": 0.5939529538154602, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.021167034283280373, - "rewards/margins": 0.048873476684093475, - "rewards/rejected": -0.0700405091047287, - "step": 2335 - }, - { - "epoch": 1.7449664429530203, - "grad_norm": 42.20834732055664, - "learning_rate": 1.033622788243404e-06, - "log_odds_chosen": 1.9389203786849976, - "log_odds_ratio": -0.1936565339565277, - "logits/chosen": 248.25650024414062, - "logits/rejected": 272.44964599609375, - "logps/chosen": -0.3364408314228058, - "logps/rejected": -1.396413803100586, - "loss": 0.7628, - "nll_loss": 0.7906616926193237, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01682204194366932, - "rewards/margins": 0.05299865081906319, - "rewards/rejected": -0.06982068717479706, - "step": 2340 - }, - { - "epoch": 1.7486950037285607, - "grad_norm": 39.957332611083984, - "learning_rate": 1.0325202569555013e-06, - "log_odds_chosen": 1.3037928342819214, - "log_odds_ratio": -0.24151363968849182, - "logits/chosen": 227.22555541992188, - "logits/rejected": 306.24200439453125, - "logps/chosen": -0.6468255519866943, - "logps/rejected": -1.4430795907974243, - "loss": 0.7788, - "nll_loss": 0.9571031332015991, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.032341279089450836, - "rewards/margins": 0.03981270268559456, - "rewards/rejected": -0.0721539855003357, - "step": 2345 - }, - { - "epoch": 1.7524235645041015, - "grad_norm": 28.40102195739746, - "learning_rate": 1.0314212462587935e-06, - "log_odds_chosen": 1.1444731950759888, - "log_odds_ratio": -0.278439462184906, - "logits/chosen": 261.3208312988281, - "logits/rejected": 334.6600341796875, - "logps/chosen": -0.5706318616867065, - "logps/rejected": -1.231285810470581, - "loss": 0.8961, - "nll_loss": 0.9068748354911804, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.028531592339277267, - "rewards/margins": 0.033032696694135666, - "rewards/rejected": -0.06156428903341293, - "step": 2350 - }, - { - "epoch": 1.756152125279642, - "grad_norm": 35.329689025878906, - "learning_rate": 1.0303257374565546e-06, - "log_odds_chosen": 2.055258274078369, - "log_odds_ratio": -0.2668341100215912, - "logits/chosen": 271.6670227050781, - "logits/rejected": 276.3509216308594, - "logps/chosen": -0.6088863015174866, - "logps/rejected": -1.7825111150741577, - "loss": 1.0489, - "nll_loss": 1.1396795511245728, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0304443147033453, - "rewards/margins": 0.05868123844265938, - "rewards/rejected": -0.08912555128335953, - "step": 2355 - }, - { - "epoch": 1.7598806860551828, - "grad_norm": 29.990650177001953, - "learning_rate": 1.029233711990773e-06, - "log_odds_chosen": 1.9555641412734985, - "log_odds_ratio": -0.21977956593036652, - "logits/chosen": 241.07321166992188, - "logits/rejected": 291.32513427734375, - "logps/chosen": -0.587958574295044, - "logps/rejected": -1.4706251621246338, - "loss": 0.882, - "nll_loss": 0.8919598460197449, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029397929087281227, - "rewards/margins": 0.04413332790136337, - "rewards/rejected": -0.07353125512599945, - "step": 2360 - }, - { - "epoch": 1.7636092468307232, - "grad_norm": 33.849735260009766, - "learning_rate": 1.0281451514408315e-06, - "log_odds_chosen": 1.1824842691421509, - "log_odds_ratio": -0.3136942982673645, - "logits/chosen": 288.04937744140625, - "logits/rejected": 276.51513671875, - "logps/chosen": -0.4714314043521881, - "logps/rejected": -1.0039992332458496, - "loss": 0.7739, - "nll_loss": 0.5511269569396973, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023571569472551346, - "rewards/margins": 0.026628395542502403, - "rewards/rejected": -0.0501999631524086, - "step": 2365 - }, - { - "epoch": 1.767337807606264, - "grad_norm": 25.090824127197266, - "learning_rate": 1.0270600375222014e-06, - "log_odds_chosen": 1.1645184755325317, - "log_odds_ratio": -0.3167712688446045, - "logits/chosen": 266.3980712890625, - "logits/rejected": 263.8811340332031, - "logps/chosen": -0.6657355427742004, - "logps/rejected": -1.2382527589797974, - "loss": 0.8538, - "nll_loss": 0.8998764753341675, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03328678011894226, - "rewards/margins": 0.028625864535570145, - "rewards/rejected": -0.061912644654512405, - "step": 2370 - }, - { - "epoch": 1.7710663683818046, - "grad_norm": 25.454782485961914, - "learning_rate": 1.0259783520851542e-06, - "log_odds_chosen": 1.7824980020523071, - "log_odds_ratio": -0.16765980422496796, - "logits/chosen": 228.30282592773438, - "logits/rejected": 250.9532012939453, - "logps/chosen": -0.4539088308811188, - "logps/rejected": -1.367902159690857, - "loss": 0.692, - "nll_loss": 0.6580927968025208, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022695442661643028, - "rewards/margins": 0.045699670910835266, - "rewards/rejected": -0.06839511543512344, - "step": 2375 - }, - { - "epoch": 1.7747949291573453, - "grad_norm": 34.68765640258789, - "learning_rate": 1.0249000771134847e-06, - "log_odds_chosen": 2.4849298000335693, - "log_odds_ratio": -0.10201974958181381, - "logits/chosen": 367.751220703125, - "logits/rejected": 233.81063842773438, - "logps/chosen": -0.35851073265075684, - "logps/rejected": -1.7195594310760498, - "loss": 0.8122, - "nll_loss": 0.6891540884971619, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017925534397363663, - "rewards/margins": 0.06805243343114853, - "rewards/rejected": -0.0859779566526413, - "step": 2380 - }, - { - "epoch": 1.778523489932886, - "grad_norm": 35.67028045654297, - "learning_rate": 1.023825194723252e-06, - "log_odds_chosen": 2.297666311264038, - "log_odds_ratio": -0.27964428067207336, - "logits/chosen": 338.1654968261719, - "logits/rejected": 251.7433319091797, - "logps/chosen": -0.3990646302700043, - "logps/rejected": -1.853380560874939, - "loss": 0.749, - "nll_loss": 0.6812400817871094, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.019953232258558273, - "rewards/margins": 0.07271579653024673, - "rewards/rejected": -0.09266902506351471, - "step": 2385 - }, - { - "epoch": 1.7822520507084265, - "grad_norm": 25.810199737548828, - "learning_rate": 1.022753687161533e-06, - "log_odds_chosen": 1.9922657012939453, - "log_odds_ratio": -0.13860228657722473, - "logits/chosen": 325.4273376464844, - "logits/rejected": 305.3950500488281, - "logps/chosen": -0.2967264950275421, - "logps/rejected": -1.1574678421020508, - "loss": 0.679, - "nll_loss": 0.7255173921585083, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014836324378848076, - "rewards/margins": 0.04303706809878349, - "rewards/rejected": -0.05787339061498642, - "step": 2390 - }, - { - "epoch": 1.7859806114839671, - "grad_norm": 39.687591552734375, - "learning_rate": 1.0216855368051905e-06, - "log_odds_chosen": 1.339708924293518, - "log_odds_ratio": -0.2671332061290741, - "logits/chosen": 252.9008331298828, - "logits/rejected": 413.5098571777344, - "logps/chosen": -0.615842342376709, - "logps/rejected": -1.4581892490386963, - "loss": 0.8396, - "nll_loss": 0.6729527711868286, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03079211339354515, - "rewards/margins": 0.042117346078157425, - "rewards/rejected": -0.07290945947170258, - "step": 2395 - }, - { - "epoch": 1.7897091722595078, - "grad_norm": 37.045509338378906, - "learning_rate": 1.0206207261596577e-06, - "log_odds_chosen": 1.2392650842666626, - "log_odds_ratio": -0.34518322348594666, - "logits/chosen": 268.81793212890625, - "logits/rejected": 271.18133544921875, - "logps/chosen": -1.042672872543335, - "logps/rejected": -1.9756275415420532, - "loss": 0.7901, - "nll_loss": 1.259760856628418, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.052133649587631226, - "rewards/margins": 0.04664773866534233, - "rewards/rejected": -0.09878138452768326, - "step": 2400 - }, - { - "epoch": 1.7934377330350486, - "grad_norm": 27.853675842285156, - "learning_rate": 1.019559237857732e-06, - "log_odds_chosen": 1.2642799615859985, - "log_odds_ratio": -0.28585925698280334, - "logits/chosen": 281.12274169921875, - "logits/rejected": 342.85113525390625, - "logps/chosen": -0.5235310196876526, - "logps/rejected": -1.238629937171936, - "loss": 0.7731, - "nll_loss": 0.8640685081481934, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02617655135691166, - "rewards/margins": 0.035754941403865814, - "rewards/rejected": -0.061931490898132324, - "step": 2405 - }, - { - "epoch": 1.797166293810589, - "grad_norm": 35.89484405517578, - "learning_rate": 1.0185010546583882e-06, - "log_odds_chosen": 1.206677794456482, - "log_odds_ratio": -0.3049725294113159, - "logits/chosen": 306.3790588378906, - "logits/rejected": 318.6260070800781, - "logps/chosen": -0.5708610415458679, - "logps/rejected": -1.2023152112960815, - "loss": 0.7478, - "nll_loss": 0.6578435897827148, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.028543049469590187, - "rewards/margins": 0.03157271072268486, - "rewards/rejected": -0.060115765780210495, - "step": 2410 - }, - { - "epoch": 1.8008948545861299, - "grad_norm": 29.01224708557129, - "learning_rate": 1.0174461594455997e-06, - "log_odds_chosen": 1.6448085308074951, - "log_odds_ratio": -0.2175542414188385, - "logits/chosen": 313.6014099121094, - "logits/rejected": 275.26690673828125, - "logps/chosen": -0.43500185012817383, - "logps/rejected": -1.2673090696334839, - "loss": 0.756, - "nll_loss": 0.7077773213386536, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02175009250640869, - "rewards/margins": 0.041615359485149384, - "rewards/rejected": -0.06336545199155807, - "step": 2415 - }, - { - "epoch": 1.8046234153616703, - "grad_norm": 26.288108825683594, - "learning_rate": 1.0163945352271773e-06, - "log_odds_chosen": 1.7816665172576904, - "log_odds_ratio": -0.17078550159931183, - "logits/chosen": 330.3316345214844, - "logits/rejected": 262.7288513183594, - "logps/chosen": -0.32259467244148254, - "logps/rejected": -1.186099886894226, - "loss": 0.685, - "nll_loss": 0.6159916520118713, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016129733994603157, - "rewards/margins": 0.04317526891827583, - "rewards/rejected": -0.05930500105023384, - "step": 2420 - }, - { - "epoch": 1.808351976137211, - "grad_norm": 25.02924346923828, - "learning_rate": 1.0153461651336193e-06, - "log_odds_chosen": 1.3476650714874268, - "log_odds_ratio": -0.24627909064292908, - "logits/chosen": 265.19744873046875, - "logits/rejected": 225.3936767578125, - "logps/chosen": -0.6194410920143127, - "logps/rejected": -1.441693663597107, - "loss": 0.7142, - "nll_loss": 0.7793861627578735, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030972054228186607, - "rewards/margins": 0.04111263528466225, - "rewards/rejected": -0.0720846876502037, - "step": 2425 - }, - { - "epoch": 1.8120805369127517, - "grad_norm": 30.509244918823242, - "learning_rate": 1.0143010324169743e-06, - "log_odds_chosen": 0.6861900091171265, - "log_odds_ratio": -0.463733971118927, - "logits/chosen": 286.60577392578125, - "logits/rejected": 390.94952392578125, - "logps/chosen": -0.5763664841651917, - "logps/rejected": -0.9059860110282898, - "loss": 0.7244, - "nll_loss": 0.7213357090950012, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.02881832793354988, - "rewards/margins": 0.016480976715683937, - "rewards/rejected": -0.04529929906129837, - "step": 2430 - }, - { - "epoch": 1.8158090976882924, - "grad_norm": 26.72182273864746, - "learning_rate": 1.013259120449719e-06, - "log_odds_chosen": 1.7015457153320312, - "log_odds_ratio": -0.24762597680091858, - "logits/chosen": 255.9739990234375, - "logits/rejected": 267.1307067871094, - "logps/chosen": -0.6648536920547485, - "logps/rejected": -1.7552505731582642, - "loss": 0.8165, - "nll_loss": 0.730551540851593, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03324268385767937, - "rewards/margins": 0.05451985076069832, - "rewards/rejected": -0.08776253461837769, - "step": 2435 - }, - { - "epoch": 1.819537658463833, - "grad_norm": 73.3648681640625, - "learning_rate": 1.0122204127236452e-06, - "log_odds_chosen": 1.8935325145721436, - "log_odds_ratio": -0.16154910624027252, - "logits/chosen": 319.4790954589844, - "logits/rejected": 236.6995849609375, - "logps/chosen": -0.3262883126735687, - "logps/rejected": -1.1091066598892212, - "loss": 0.9358, - "nll_loss": 0.7140798568725586, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016314415261149406, - "rewards/margins": 0.03914091736078262, - "rewards/rejected": -0.05545533448457718, - "step": 2440 - }, - { - "epoch": 1.8232662192393736, - "grad_norm": 28.924421310424805, - "learning_rate": 1.0111848928487622e-06, - "log_odds_chosen": 1.9721031188964844, - "log_odds_ratio": -0.4474707245826721, - "logits/chosen": 270.16143798828125, - "logits/rejected": 225.2101287841797, - "logps/chosen": -0.5820976495742798, - "logps/rejected": -1.4346909523010254, - "loss": 0.8202, - "nll_loss": 0.9000033140182495, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.02910488285124302, - "rewards/margins": 0.0426296666264534, - "rewards/rejected": -0.07173454761505127, - "step": 2445 - }, - { - "epoch": 1.8269947800149142, - "grad_norm": 29.997285842895508, - "learning_rate": 1.0101525445522107e-06, - "log_odds_chosen": 0.9762939214706421, - "log_odds_ratio": -0.3316425383090973, - "logits/chosen": 282.7019348144531, - "logits/rejected": 300.033935546875, - "logps/chosen": -0.7237281799316406, - "logps/rejected": -1.3366034030914307, - "loss": 0.7892, - "nll_loss": 0.7984665632247925, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03618640825152397, - "rewards/margins": 0.030643757432699203, - "rewards/rejected": -0.06683017313480377, - "step": 2450 - }, - { - "epoch": 1.8307233407904548, - "grad_norm": 31.554685592651367, - "learning_rate": 1.0091233516771892e-06, - "log_odds_chosen": 1.5938708782196045, - "log_odds_ratio": -0.2146611213684082, - "logits/chosen": 313.39581298828125, - "logits/rejected": 330.4501953125, - "logps/chosen": -0.600803017616272, - "logps/rejected": -1.5189390182495117, - "loss": 0.8393, - "nll_loss": 0.9431303143501282, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03004015050828457, - "rewards/margins": 0.04590680077672005, - "rewards/rejected": -0.07594694942235947, - "step": 2455 - }, - { - "epoch": 1.8344519015659957, - "grad_norm": 41.26987838745117, - "learning_rate": 1.0080972981818898e-06, - "log_odds_chosen": 1.9265596866607666, - "log_odds_ratio": -0.23007619380950928, - "logits/chosen": 223.29745483398438, - "logits/rejected": 328.76385498046875, - "logps/chosen": -0.3530147969722748, - "logps/rejected": -1.274556040763855, - "loss": 0.701, - "nll_loss": 0.9676543474197388, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.01765073835849762, - "rewards/margins": 0.04607706516981125, - "rewards/rejected": -0.06372780352830887, - "step": 2460 - }, - { - "epoch": 1.838180462341536, - "grad_norm": 31.087038040161133, - "learning_rate": 1.0070743681384512e-06, - "log_odds_chosen": 1.477882981300354, - "log_odds_ratio": -0.23331475257873535, - "logits/chosen": 335.3787536621094, - "logits/rejected": 304.765625, - "logps/chosen": -0.40864768624305725, - "logps/rejected": -1.1895875930786133, - "loss": 0.9157, - "nll_loss": 0.741824746131897, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020432384684681892, - "rewards/margins": 0.0390470027923584, - "rewards/rejected": -0.05947938561439514, - "step": 2465 - }, - { - "epoch": 1.841909023117077, - "grad_norm": 21.843830108642578, - "learning_rate": 1.0060545457319173e-06, - "log_odds_chosen": 1.878055214881897, - "log_odds_ratio": -0.25134095549583435, - "logits/chosen": 271.4361877441406, - "logits/rejected": 328.1044006347656, - "logps/chosen": -0.5826598405838013, - "logps/rejected": -1.872991919517517, - "loss": 0.763, - "nll_loss": 0.7137173414230347, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029132992029190063, - "rewards/margins": 0.06451661884784698, - "rewards/rejected": -0.09364960342645645, - "step": 2470 - }, - { - "epoch": 1.8456375838926173, - "grad_norm": 32.691131591796875, - "learning_rate": 1.0050378152592122e-06, - "log_odds_chosen": 1.0653629302978516, - "log_odds_ratio": -0.3164008557796478, - "logits/chosen": 238.12063598632812, - "logits/rejected": 243.83193969726562, - "logps/chosen": -0.777825653553009, - "logps/rejected": -1.4619934558868408, - "loss": 0.8665, - "nll_loss": 1.1560099124908447, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03889128565788269, - "rewards/margins": 0.03420838713645935, - "rewards/rejected": -0.07309967279434204, - "step": 2475 - }, - { - "epoch": 1.8493661446681582, - "grad_norm": 24.326616287231445, - "learning_rate": 1.0040241611281238e-06, - "log_odds_chosen": 2.5951430797576904, - "log_odds_ratio": -0.17930595576763153, - "logits/chosen": 259.07196044921875, - "logits/rejected": 331.31390380859375, - "logps/chosen": -0.5444868803024292, - "logps/rejected": -2.438835620880127, - "loss": 0.9071, - "nll_loss": 1.0242030620574951, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0272243469953537, - "rewards/margins": 0.09471743553876877, - "rewards/rejected": -0.12194176763296127, - "step": 2480 - }, - { - "epoch": 1.8530947054436986, - "grad_norm": 25.011207580566406, - "learning_rate": 1.0030135678562994e-06, - "log_odds_chosen": 2.789658308029175, - "log_odds_ratio": -0.18728065490722656, - "logits/chosen": 269.24456787109375, - "logits/rejected": 292.9788513183594, - "logps/chosen": -0.4512200951576233, - "logps/rejected": -2.3800320625305176, - "loss": 0.8317, - "nll_loss": 0.6017205715179443, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022561004385352135, - "rewards/margins": 0.09644060581922531, - "rewards/rejected": -0.1190016120672226, - "step": 2485 - }, - { - "epoch": 1.8568232662192394, - "grad_norm": 73.51561737060547, - "learning_rate": 1.002006020070253e-06, - "log_odds_chosen": 0.9777796864509583, - "log_odds_ratio": -0.36118194460868835, - "logits/chosen": 321.1767883300781, - "logits/rejected": 240.0565643310547, - "logps/chosen": -0.9465814828872681, - "logps/rejected": -1.4799751043319702, - "loss": 1.0271, - "nll_loss": 1.507942795753479, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.047329071909189224, - "rewards/margins": 0.026669690385460854, - "rewards/rejected": -0.07399876415729523, - "step": 2490 - }, - { - "epoch": 1.86055182699478, - "grad_norm": 26.286090850830078, - "learning_rate": 1.0010015025043829e-06, - "log_odds_chosen": 1.2896175384521484, - "log_odds_ratio": -0.4019300937652588, - "logits/chosen": 271.53936767578125, - "logits/rejected": 366.7110900878906, - "logps/chosen": -1.0210630893707275, - "logps/rejected": -2.0704128742218018, - "loss": 0.819, - "nll_loss": 0.9950692057609558, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.05105315521359444, - "rewards/margins": 0.05246748775243759, - "rewards/rejected": -0.10352064669132233, - "step": 2495 - }, - { - "epoch": 1.8642803877703207, - "grad_norm": 20.104961395263672, - "learning_rate": 1.0000000000000002e-06, - "log_odds_chosen": 1.461150050163269, - "log_odds_ratio": -0.3153151571750641, - "logits/chosen": 328.796630859375, - "logits/rejected": 246.5860595703125, - "logps/chosen": -0.4590969979763031, - "logps/rejected": -1.1847676038742065, - "loss": 0.7353, - "nll_loss": 0.6384435296058655, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.022954851388931274, - "rewards/margins": 0.03628353029489517, - "rewards/rejected": -0.059238385409116745, - "step": 2500 - }, - { - "epoch": 1.8680089485458613, - "grad_norm": 35.04132080078125, - "learning_rate": 9.990014975043674e-07, - "log_odds_chosen": 1.5580610036849976, - "log_odds_ratio": -0.25468143820762634, - "logits/chosen": 243.8667449951172, - "logits/rejected": 281.18438720703125, - "logps/chosen": -0.40737462043762207, - "logps/rejected": -1.277301549911499, - "loss": 0.7427, - "nll_loss": 0.7110114097595215, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020368730649352074, - "rewards/margins": 0.043496355414390564, - "rewards/rejected": -0.06386508047580719, - "step": 2505 - }, - { - "epoch": 1.871737509321402, - "grad_norm": 24.962072372436523, - "learning_rate": 9.98005980069749e-07, - "log_odds_chosen": 1.254436731338501, - "log_odds_ratio": -0.27541384100914, - "logits/chosen": 229.4415283203125, - "logits/rejected": 393.24273681640625, - "logps/chosen": -0.6574285626411438, - "logps/rejected": -1.3385016918182373, - "loss": 0.8482, - "nll_loss": 0.8830909729003906, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03287142515182495, - "rewards/margins": 0.03405366092920303, - "rewards/rejected": -0.06692508608102798, - "step": 2510 - }, - { - "epoch": 1.8754660700969426, - "grad_norm": 30.453201293945312, - "learning_rate": 9.97013432852472e-07, - "log_odds_chosen": 1.2166677713394165, - "log_odds_ratio": -0.2671045958995819, - "logits/chosen": 272.3184509277344, - "logits/rejected": 280.6361389160156, - "logps/chosen": -0.6184095144271851, - "logps/rejected": -1.3369323015213013, - "loss": 0.75, - "nll_loss": 0.8069745898246765, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.030920475721359253, - "rewards/margins": 0.03592614084482193, - "rewards/rejected": -0.06684662401676178, - "step": 2515 - }, - { - "epoch": 1.8791946308724832, - "grad_norm": 33.327064514160156, - "learning_rate": 9.960238411119948e-07, - "log_odds_chosen": 0.5758689641952515, - "log_odds_ratio": -0.5110028982162476, - "logits/chosen": 287.698974609375, - "logits/rejected": 291.1016845703125, - "logps/chosen": -0.6559032201766968, - "logps/rejected": -0.974493145942688, - "loss": 0.837, - "nll_loss": 0.858150839805603, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.03279516100883484, - "rewards/margins": 0.01592949591577053, - "rewards/rejected": -0.04872466251254082, - "step": 2520 - }, - { - "epoch": 1.882923191648024, - "grad_norm": 25.30487632751465, - "learning_rate": 9.950371902099892e-07, - "log_odds_chosen": 1.8327430486679077, - "log_odds_ratio": -0.18470773100852966, - "logits/chosen": 249.9955291748047, - "logits/rejected": 334.0716552734375, - "logps/chosen": -0.38553938269615173, - "logps/rejected": -1.351219654083252, - "loss": 0.7777, - "nll_loss": 0.7313138842582703, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019276970997452736, - "rewards/margins": 0.04828401282429695, - "rewards/rejected": -0.06756098568439484, - "step": 2525 - }, - { - "epoch": 1.8866517524235644, - "grad_norm": 26.441835403442383, - "learning_rate": 9.9405346560943e-07, - "log_odds_chosen": 2.2847981452941895, - "log_odds_ratio": -0.1951819658279419, - "logits/chosen": 337.1388244628906, - "logits/rejected": 250.0087127685547, - "logps/chosen": -0.4149288535118103, - "logps/rejected": -1.8644115924835205, - "loss": 0.7074, - "nll_loss": 0.609083890914917, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020746443420648575, - "rewards/margins": 0.07247413694858551, - "rewards/rejected": -0.09322059154510498, - "step": 2530 - }, - { - "epoch": 1.8903803131991053, - "grad_norm": 32.91602325439453, - "learning_rate": 9.930726528736969e-07, - "log_odds_chosen": 1.8453359603881836, - "log_odds_ratio": -0.2705146372318268, - "logits/chosen": 282.07501220703125, - "logits/rejected": 294.9857482910156, - "logps/chosen": -0.4658958315849304, - "logps/rejected": -1.3793002367019653, - "loss": 0.7207, - "nll_loss": 0.7077971696853638, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.02329479157924652, - "rewards/margins": 0.045670218765735626, - "rewards/rejected": -0.06896501034498215, - "step": 2535 - }, - { - "epoch": 1.8941088739746457, - "grad_norm": 40.36330032348633, - "learning_rate": 9.920947376656814e-07, - "log_odds_chosen": 1.6241308450698853, - "log_odds_ratio": -0.19381117820739746, - "logits/chosen": 298.33380126953125, - "logits/rejected": 315.4779052734375, - "logps/chosen": -0.40699100494384766, - "logps/rejected": -1.2395427227020264, - "loss": 0.715, - "nll_loss": 0.543134868144989, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020349550992250443, - "rewards/margins": 0.04162757843732834, - "rewards/rejected": -0.06197713688015938, - "step": 2540 - }, - { - "epoch": 1.8978374347501865, - "grad_norm": 30.227581024169922, - "learning_rate": 9.911197057469108e-07, - "log_odds_chosen": 1.1281182765960693, - "log_odds_ratio": -0.5002743601799011, - "logits/chosen": 231.5388946533203, - "logits/rejected": 360.74151611328125, - "logps/chosen": -0.6425597071647644, - "logps/rejected": -1.2096552848815918, - "loss": 0.8401, - "nll_loss": 0.8094128370285034, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.0321279838681221, - "rewards/margins": 0.02835477516055107, - "rewards/rejected": -0.06048276275396347, - "step": 2545 - }, - { - "epoch": 1.901565995525727, - "grad_norm": 32.18780517578125, - "learning_rate": 9.901475429766744e-07, - "log_odds_chosen": 1.5464965105056763, - "log_odds_ratio": -0.19671565294265747, - "logits/chosen": 230.8481903076172, - "logits/rejected": 282.4951477050781, - "logps/chosen": -0.5520056486129761, - "logps/rejected": -1.4748151302337646, - "loss": 0.7832, - "nll_loss": 0.6697397232055664, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027600282803177834, - "rewards/margins": 0.04614047333598137, - "rewards/rejected": -0.07374076545238495, - "step": 2550 - }, - { - "epoch": 1.9052945563012678, - "grad_norm": 30.209247589111328, - "learning_rate": 9.891782353111634e-07, - "log_odds_chosen": 1.0939127206802368, - "log_odds_ratio": -0.32452109456062317, - "logits/chosen": 251.38613891601562, - "logits/rejected": 283.2451477050781, - "logps/chosen": -0.6361740827560425, - "logps/rejected": -1.2634795904159546, - "loss": 0.7993, - "nll_loss": 0.8160279989242554, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.031808704137802124, - "rewards/margins": 0.031365279108285904, - "rewards/rejected": -0.06317397952079773, - "step": 2555 - }, - { - "epoch": 1.9090231170768084, - "grad_norm": 31.539539337158203, - "learning_rate": 9.882117688026186e-07, - "log_odds_chosen": 1.4402602910995483, - "log_odds_ratio": -0.27180877327919006, - "logits/chosen": 270.4451904296875, - "logits/rejected": 353.8406677246094, - "logps/chosen": -0.4802216589450836, - "logps/rejected": -1.1314728260040283, - "loss": 0.732, - "nll_loss": 0.7607029676437378, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02401108108460903, - "rewards/margins": 0.03256256505846977, - "rewards/rejected": -0.05657364800572395, - "step": 2560 - }, - { - "epoch": 1.912751677852349, - "grad_norm": 23.579139709472656, - "learning_rate": 9.872481295984873e-07, - "log_odds_chosen": 1.7532860040664673, - "log_odds_ratio": -0.25976070761680603, - "logits/chosen": 280.870849609375, - "logits/rejected": 333.12335205078125, - "logps/chosen": -0.564735472202301, - "logps/rejected": -1.3669008016586304, - "loss": 0.7341, - "nll_loss": 0.9143686294555664, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02823677286505699, - "rewards/margins": 0.04010826721787453, - "rewards/rejected": -0.06834504753351212, - "step": 2565 - }, - { - "epoch": 1.9164802386278896, - "grad_norm": 28.962295532226562, - "learning_rate": 9.862873039405896e-07, - "log_odds_chosen": 2.081594944000244, - "log_odds_ratio": -0.14410951733589172, - "logits/chosen": 326.0066833496094, - "logits/rejected": 308.6968078613281, - "logps/chosen": -0.24978885054588318, - "logps/rejected": -0.9480496644973755, - "loss": 0.7149, - "nll_loss": 0.750888466835022, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012489442713558674, - "rewards/margins": 0.034913040697574615, - "rewards/rejected": -0.047402482479810715, - "step": 2570 - }, - { - "epoch": 1.9202087994034303, - "grad_norm": 28.942617416381836, - "learning_rate": 9.853292781642933e-07, - "log_odds_chosen": 1.2491536140441895, - "log_odds_ratio": -0.3238029181957245, - "logits/chosen": 314.510009765625, - "logits/rejected": 303.9324951171875, - "logps/chosen": -0.46288585662841797, - "logps/rejected": -1.1104158163070679, - "loss": 0.7426, - "nll_loss": 0.665791928768158, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023144293576478958, - "rewards/margins": 0.03237650543451309, - "rewards/rejected": -0.05552079528570175, - "step": 2575 - }, - { - "epoch": 1.9239373601789709, - "grad_norm": 27.705230712890625, - "learning_rate": 9.843740386976973e-07, - "log_odds_chosen": 1.1103363037109375, - "log_odds_ratio": -0.3132198452949524, - "logits/chosen": 272.8616027832031, - "logits/rejected": 301.89996337890625, - "logps/chosen": -0.5277503728866577, - "logps/rejected": -1.1141116619110107, - "loss": 0.7313, - "nll_loss": 0.8016616702079773, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.026387521997094154, - "rewards/margins": 0.02931806445121765, - "rewards/rejected": -0.055705584585666656, - "step": 2580 - }, - { - "epoch": 1.9276659209545115, - "grad_norm": 35.12834167480469, - "learning_rate": 9.834215720608247e-07, - "log_odds_chosen": 1.2302391529083252, - "log_odds_ratio": -0.2673874795436859, - "logits/chosen": 237.78921508789062, - "logits/rejected": 335.37872314453125, - "logps/chosen": -0.5789225697517395, - "logps/rejected": -1.2856149673461914, - "loss": 0.6312, - "nll_loss": 0.6525896787643433, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.028946127742528915, - "rewards/margins": 0.035334620624780655, - "rewards/rejected": -0.06428074836730957, - "step": 2585 - }, - { - "epoch": 1.9313944817300523, - "grad_norm": 40.43360137939453, - "learning_rate": 9.824718648648244e-07, - "log_odds_chosen": 1.8699783086776733, - "log_odds_ratio": -0.2383967638015747, - "logits/chosen": 290.26654052734375, - "logits/rejected": 243.76693725585938, - "logps/chosen": -0.6230908036231995, - "logps/rejected": -1.5327818393707275, - "loss": 0.8715, - "nll_loss": 0.955919086933136, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.031154539436101913, - "rewards/margins": 0.04548455402255058, - "rewards/rejected": -0.0766391009092331, - "step": 2590 - }, - { - "epoch": 1.9351230425055927, - "grad_norm": 31.101892471313477, - "learning_rate": 9.81524903811178e-07, - "log_odds_chosen": 2.9978115558624268, - "log_odds_ratio": -0.10844038426876068, - "logits/chosen": 327.2557067871094, - "logits/rejected": 237.67526245117188, - "logps/chosen": -0.28935375809669495, - "logps/rejected": -1.5677084922790527, - "loss": 0.7936, - "nll_loss": 0.7093963623046875, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014467688277363777, - "rewards/margins": 0.06391773372888565, - "rewards/rejected": -0.07838542759418488, - "step": 2595 - }, - { - "epoch": 1.9388516032811336, - "grad_norm": 25.807065963745117, - "learning_rate": 9.805806756909204e-07, - "log_odds_chosen": 2.795417070388794, - "log_odds_ratio": -0.2436780035495758, - "logits/chosen": 229.7408447265625, - "logits/rejected": 375.4586486816406, - "logps/chosen": -0.5901502370834351, - "logps/rejected": -2.777219295501709, - "loss": 0.7896, - "nll_loss": 0.6403519511222839, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.029507508501410484, - "rewards/margins": 0.10935346782207489, - "rewards/rejected": -0.13886097073554993, - "step": 2600 - }, - { - "epoch": 1.942580164056674, - "grad_norm": 33.04572296142578, - "learning_rate": 9.796391673838654e-07, - "log_odds_chosen": 1.8519318103790283, - "log_odds_ratio": -0.20386524498462677, - "logits/chosen": 247.8744659423828, - "logits/rejected": 232.85092163085938, - "logps/chosen": -0.47160711884498596, - "logps/rejected": -1.1968075037002563, - "loss": 0.8105, - "nll_loss": 0.6423767805099487, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023580357432365417, - "rewards/margins": 0.03626001626253128, - "rewards/rejected": -0.0598403736948967, - "step": 2605 - }, - { - "epoch": 1.9463087248322148, - "grad_norm": 28.35093879699707, - "learning_rate": 9.787003658578392e-07, - "log_odds_chosen": 1.6584842205047607, - "log_odds_ratio": -0.18459442257881165, - "logits/chosen": 285.55230712890625, - "logits/rejected": 297.67340087890625, - "logps/chosen": -0.4700896143913269, - "logps/rejected": -1.3569244146347046, - "loss": 0.7064, - "nll_loss": 0.6562037467956543, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023504480719566345, - "rewards/margins": 0.044341739267110825, - "rewards/rejected": -0.06784622371196747, - "step": 2610 - }, - { - "epoch": 1.9500372856077552, - "grad_norm": 29.420454025268555, - "learning_rate": 9.777642581679234e-07, - "log_odds_chosen": 1.7738653421401978, - "log_odds_ratio": -0.2129414975643158, - "logits/chosen": 301.41656494140625, - "logits/rejected": 303.32891845703125, - "logps/chosen": -0.5826541781425476, - "logps/rejected": -1.7005112171173096, - "loss": 0.726, - "nll_loss": 0.6999033689498901, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02913270890712738, - "rewards/margins": 0.055892862379550934, - "rewards/rejected": -0.08502557128667831, - "step": 2615 - }, - { - "epoch": 1.953765846383296, - "grad_norm": 27.041465759277344, - "learning_rate": 9.768308314557044e-07, - "log_odds_chosen": 1.692763090133667, - "log_odds_ratio": -0.2312278300523758, - "logits/chosen": 370.321533203125, - "logits/rejected": 266.9137268066406, - "logps/chosen": -0.29972967505455017, - "logps/rejected": -1.1084052324295044, - "loss": 0.9807, - "nll_loss": 0.5641023516654968, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014986485242843628, - "rewards/margins": 0.040433771908283234, - "rewards/rejected": -0.05542025715112686, - "step": 2620 - }, - { - "epoch": 1.9574944071588367, - "grad_norm": 34.10032653808594, - "learning_rate": 9.759000729485334e-07, - "log_odds_chosen": 1.7682090997695923, - "log_odds_ratio": -0.16426576673984528, - "logits/chosen": 252.1747589111328, - "logits/rejected": 321.4801940917969, - "logps/chosen": -0.4632466733455658, - "logps/rejected": -1.394919991493225, - "loss": 0.6636, - "nll_loss": 0.6804040670394897, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02316233143210411, - "rewards/margins": 0.04658365994691849, - "rewards/rejected": -0.0697460025548935, - "step": 2625 - }, - { - "epoch": 1.9612229679343773, - "grad_norm": 30.609376907348633, - "learning_rate": 9.749719699587899e-07, - "log_odds_chosen": 0.6991826295852661, - "log_odds_ratio": -0.5681883096694946, - "logits/chosen": 317.21319580078125, - "logits/rejected": 308.8871154785156, - "logps/chosen": -0.5081771612167358, - "logps/rejected": -0.8716028332710266, - "loss": 0.6752, - "nll_loss": 0.6625097990036011, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.025408858433365822, - "rewards/margins": 0.0181712806224823, - "rewards/rejected": -0.04358014091849327, - "step": 2630 - }, - { - "epoch": 1.964951528709918, - "grad_norm": 42.76338577270508, - "learning_rate": 9.740465098831574e-07, - "log_odds_chosen": 2.969538450241089, - "log_odds_ratio": -0.11367060989141464, - "logits/chosen": 224.41714477539062, - "logits/rejected": 402.7392272949219, - "logps/chosen": -0.47400563955307007, - "logps/rejected": -2.5928235054016113, - "loss": 0.8997, - "nll_loss": 0.6203058958053589, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023700281977653503, - "rewards/margins": 0.10594089329242706, - "rewards/rejected": -0.12964117527008057, - "step": 2635 - }, - { - "epoch": 1.9686800894854586, - "grad_norm": 58.974937438964844, - "learning_rate": 9.731236802019038e-07, - "log_odds_chosen": 0.8806959986686707, - "log_odds_ratio": -0.34731438755989075, - "logits/chosen": 282.51702880859375, - "logits/rejected": 280.09625244140625, - "logps/chosen": -0.7066971659660339, - "logps/rejected": -1.2326793670654297, - "loss": 0.7537, - "nll_loss": 0.8324522972106934, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.035334862768650055, - "rewards/margins": 0.02629910036921501, - "rewards/rejected": -0.061633966863155365, - "step": 2640 - }, - { - "epoch": 1.9724086502609992, - "grad_norm": 28.9498348236084, - "learning_rate": 9.722034684781694e-07, - "log_odds_chosen": 2.4756176471710205, - "log_odds_ratio": -0.24778422713279724, - "logits/chosen": 265.3336181640625, - "logits/rejected": 320.37030029296875, - "logps/chosen": -0.8845283389091492, - "logps/rejected": -2.9828286170959473, - "loss": 0.8349, - "nll_loss": 0.917392909526825, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.04422641545534134, - "rewards/margins": 0.10491502285003662, - "rewards/rejected": -0.14914144575595856, - "step": 2645 - }, - { - "epoch": 1.9761372110365398, - "grad_norm": 34.63941192626953, - "learning_rate": 9.712858623572642e-07, - "log_odds_chosen": 1.0056865215301514, - "log_odds_ratio": -0.3419247269630432, - "logits/chosen": 393.86212158203125, - "logits/rejected": 310.47967529296875, - "logps/chosen": -0.6558185815811157, - "logps/rejected": -1.21368408203125, - "loss": 0.8285, - "nll_loss": 0.74144446849823, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03279092535376549, - "rewards/margins": 0.027893275022506714, - "rewards/rejected": -0.0606842041015625, - "step": 2650 - }, - { - "epoch": 1.9798657718120807, - "grad_norm": 44.08583450317383, - "learning_rate": 9.7037084956597e-07, - "log_odds_chosen": 1.453360676765442, - "log_odds_ratio": -0.2626263499259949, - "logits/chosen": 355.21466064453125, - "logits/rejected": 316.456298828125, - "logps/chosen": -0.6984671354293823, - "logps/rejected": -1.637078046798706, - "loss": 0.7335, - "nll_loss": 0.8530977964401245, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.034923359751701355, - "rewards/margins": 0.04693054407835007, - "rewards/rejected": -0.08185391128063202, - "step": 2655 - }, - { - "epoch": 1.983594332587621, - "grad_norm": 25.64059829711914, - "learning_rate": 9.694584179118515e-07, - "log_odds_chosen": 1.5778143405914307, - "log_odds_ratio": -0.2293388843536377, - "logits/chosen": 275.12872314453125, - "logits/rejected": 276.2782287597656, - "logps/chosen": -0.6455305814743042, - "logps/rejected": -1.6236755847930908, - "loss": 0.8939, - "nll_loss": 0.7800266146659851, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03227652981877327, - "rewards/margins": 0.04890725016593933, - "rewards/rejected": -0.0811837837100029, - "step": 2660 - }, - { - "epoch": 1.987322893363162, - "grad_norm": 25.963117599487305, - "learning_rate": 9.685485552825746e-07, - "log_odds_chosen": 1.682382583618164, - "log_odds_ratio": -0.21915654838085175, - "logits/chosen": 309.519287109375, - "logits/rejected": 300.22711181640625, - "logps/chosen": -0.49202483892440796, - "logps/rejected": -1.5092017650604248, - "loss": 0.8021, - "nll_loss": 0.619569718837738, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024601245298981667, - "rewards/margins": 0.05085884407162666, - "rewards/rejected": -0.07546009123325348, - "step": 2665 - }, - { - "epoch": 1.9910514541387023, - "grad_norm": 22.286958694458008, - "learning_rate": 9.676412496452296e-07, - "log_odds_chosen": 2.5880298614501953, - "log_odds_ratio": -0.12576687335968018, - "logits/chosen": 329.0981140136719, - "logits/rejected": 218.645263671875, - "logps/chosen": -0.20810845494270325, - "logps/rejected": -1.2437965869903564, - "loss": 0.7871, - "nll_loss": 0.7125676274299622, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010405421257019043, - "rewards/margins": 0.05178440734744072, - "rewards/rejected": -0.06218982860445976, - "step": 2670 - }, - { - "epoch": 1.9947800149142432, - "grad_norm": 58.64865493774414, - "learning_rate": 9.667364890456637e-07, - "log_odds_chosen": 1.8333499431610107, - "log_odds_ratio": -0.2274298220872879, - "logits/chosen": 233.14700317382812, - "logits/rejected": 343.3663330078125, - "logps/chosen": -0.4733358323574066, - "logps/rejected": -1.595900058746338, - "loss": 0.7804, - "nll_loss": 0.7983943223953247, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02366679348051548, - "rewards/margins": 0.05612821504473686, - "rewards/rejected": -0.07979501783847809, - "step": 2675 - }, - { - "epoch": 1.9985085756897838, - "grad_norm": 23.050626754760742, - "learning_rate": 9.658342616078198e-07, - "log_odds_chosen": 1.8468844890594482, - "log_odds_ratio": -0.17167513072490692, - "logits/chosen": 348.5022888183594, - "logits/rejected": 312.6022033691406, - "logps/chosen": -0.39397111535072327, - "logps/rejected": -1.3462388515472412, - "loss": 0.7446, - "nll_loss": 0.6003307104110718, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019698558375239372, - "rewards/margins": 0.04761338606476784, - "rewards/rejected": -0.06731195002794266, - "step": 2680 - }, - { - "epoch": 2.0, - "eval_log_odds_chosen": 0.2745331823825836, - "eval_log_odds_ratio": -0.6995564103126526, - "eval_logits/chosen": 318.00775146484375, - "eval_logits/rejected": 290.7071228027344, - "eval_logps/chosen": -1.0406585931777954, - "eval_logps/rejected": -1.2083474397659302, - "eval_loss": 1.4665355682373047, - "eval_nll_loss": 1.4116891622543335, - "eval_rewards/accuracies": 0.5179855823516846, - "eval_rewards/chosen": -0.05203293636441231, - "eval_rewards/margins": 0.008384437300264835, - "eval_rewards/rejected": -0.06041736900806427, - "eval_runtime": 26.0986, - "eval_samples_per_second": 21.189, - "eval_steps_per_second": 5.326, - "step": 2682 - }, - { - "epoch": 2.0022371364653244, - "grad_norm": 26.684385299682617, - "learning_rate": 9.649345555330812e-07, - "log_odds_chosen": 1.777043342590332, - "log_odds_ratio": -0.1763921082019806, - "logits/chosen": 382.5489807128906, - "logits/rejected": 272.52667236328125, - "logps/chosen": -0.36947494745254517, - "logps/rejected": -1.2000513076782227, - "loss": 0.5907, - "nll_loss": 0.6470656991004944, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018473748117685318, - "rewards/margins": 0.041528817266225815, - "rewards/rejected": -0.06000257283449173, - "step": 2685 - }, - { - "epoch": 2.005965697240865, - "grad_norm": 22.610498428344727, - "learning_rate": 9.640373590996239e-07, - "log_odds_chosen": 3.2965126037597656, - "log_odds_ratio": -0.07868436723947525, - "logits/chosen": 309.4055480957031, - "logits/rejected": 214.39370727539062, - "logps/chosen": -0.2039017677307129, - "logps/rejected": -1.4057035446166992, - "loss": 0.3432, - "nll_loss": 0.2935466170310974, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01019508857280016, - "rewards/margins": 0.0600900836288929, - "rewards/rejected": -0.07028517127037048, - "step": 2690 - }, - { - "epoch": 2.0096942580164057, - "grad_norm": 27.6590633392334, - "learning_rate": 9.631426606617747e-07, - "log_odds_chosen": 2.855597972869873, - "log_odds_ratio": -0.06953402608633041, - "logits/chosen": 250.19332885742188, - "logits/rejected": 296.03790283203125, - "logps/chosen": -0.30304738879203796, - "logps/rejected": -1.937173843383789, - "loss": 0.3477, - "nll_loss": 0.4589840769767761, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015152370557188988, - "rewards/margins": 0.08170632272958755, - "rewards/rejected": -0.0968586876988411, - "step": 2695 - }, - { - "epoch": 2.0134228187919465, - "grad_norm": 29.16385841369629, - "learning_rate": 9.622504486493764e-07, - "log_odds_chosen": 3.579169511795044, - "log_odds_ratio": -0.03667069226503372, - "logits/chosen": 312.031982421875, - "logits/rejected": 232.13540649414062, - "logps/chosen": -0.1550385057926178, - "logps/rejected": -1.860772728919983, - "loss": 0.3852, - "nll_loss": 0.27334973216056824, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007751925382763147, - "rewards/margins": 0.0852867141366005, - "rewards/rejected": -0.0930386334657669, - "step": 2700 - }, - { - "epoch": 2.017151379567487, - "grad_norm": 27.79994773864746, - "learning_rate": 9.613607115671605e-07, - "log_odds_chosen": 2.4147298336029053, - "log_odds_ratio": -0.14087031781673431, - "logits/chosen": 217.4571990966797, - "logits/rejected": 313.4581298828125, - "logps/chosen": -0.2927229702472687, - "logps/rejected": -1.3519266843795776, - "loss": 0.4388, - "nll_loss": 0.4571770131587982, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014636148698627949, - "rewards/margins": 0.05296017974615097, - "rewards/rejected": -0.06759633123874664, - "step": 2705 - }, - { - "epoch": 2.0208799403430278, - "grad_norm": 50.74065017700195, - "learning_rate": 9.604734379941232e-07, - "log_odds_chosen": 3.0993762016296387, - "log_odds_ratio": -0.07755591720342636, - "logits/chosen": 222.40615844726562, - "logits/rejected": 364.3323669433594, - "logps/chosen": -0.26645874977111816, - "logps/rejected": -1.9035640954971313, - "loss": 0.3164, - "nll_loss": 0.3632132411003113, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013322939164936543, - "rewards/margins": 0.08185525983572006, - "rewards/rejected": -0.09517820179462433, - "step": 2710 - }, - { - "epoch": 2.024608501118568, - "grad_norm": 26.640838623046875, - "learning_rate": 9.595886165829119e-07, - "log_odds_chosen": 3.6835155487060547, - "log_odds_ratio": -0.046007800847291946, - "logits/chosen": 215.11251831054688, - "logits/rejected": 341.93853759765625, - "logps/chosen": -0.15601372718811035, - "logps/rejected": -2.008997678756714, - "loss": 0.402, - "nll_loss": 0.4539243280887604, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007800686173141003, - "rewards/margins": 0.0926491916179657, - "rewards/rejected": -0.10044988244771957, - "step": 2715 - }, - { - "epoch": 2.028337061894109, - "grad_norm": 25.49390983581543, - "learning_rate": 9.58706236059213e-07, - "log_odds_chosen": 2.7125561237335205, - "log_odds_ratio": -0.1688893735408783, - "logits/chosen": 235.95077514648438, - "logits/rejected": 300.44744873046875, - "logps/chosen": -0.11619459092617035, - "logps/rejected": -1.2292636632919312, - "loss": 0.3524, - "nll_loss": 0.33161500096321106, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.005809730384498835, - "rewards/margins": 0.05565345287322998, - "rewards/rejected": -0.061463188380002975, - "step": 2720 - }, - { - "epoch": 2.0320656226696494, - "grad_norm": 22.085163116455078, - "learning_rate": 9.578262852211515e-07, - "log_odds_chosen": 4.49515438079834, - "log_odds_ratio": -0.017055442556738853, - "logits/chosen": 296.13812255859375, - "logits/rejected": 215.68603515625, - "logps/chosen": -0.08282952010631561, - "logps/rejected": -1.8654909133911133, - "loss": 0.335, - "nll_loss": 0.2726573348045349, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004141476005315781, - "rewards/margins": 0.08913306891918182, - "rewards/rejected": -0.0932745486497879, - "step": 2725 - }, - { - "epoch": 2.0357941834451903, - "grad_norm": 34.273563385009766, - "learning_rate": 9.56948752938691e-07, - "log_odds_chosen": 2.5770914554595947, - "log_odds_ratio": -0.09351523965597153, - "logits/chosen": 216.06442260742188, - "logits/rejected": 237.89999389648438, - "logps/chosen": -0.3800535500049591, - "logps/rejected": -1.7915513515472412, - "loss": 0.5003, - "nll_loss": 0.48338446021080017, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.019002676010131836, - "rewards/margins": 0.07057489454746246, - "rewards/rejected": -0.0895775705575943, - "step": 2730 - }, - { - "epoch": 2.0395227442207307, - "grad_norm": 21.684743881225586, - "learning_rate": 9.560736281530443e-07, - "log_odds_chosen": 2.4269745349884033, - "log_odds_ratio": -0.11397049576044083, - "logits/chosen": 255.40737915039062, - "logits/rejected": 226.7773895263672, - "logps/chosen": -0.32254600524902344, - "logps/rejected": -1.4981099367141724, - "loss": 0.4108, - "nll_loss": 0.4266533851623535, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016127299517393112, - "rewards/margins": 0.05877820402383804, - "rewards/rejected": -0.07490549236536026, - "step": 2735 - }, - { - "epoch": 2.0432513049962715, - "grad_norm": 26.505800247192383, - "learning_rate": 9.552008998760876e-07, - "log_odds_chosen": 2.976888656616211, - "log_odds_ratio": -0.1592601090669632, - "logits/chosen": 187.9979248046875, - "logits/rejected": 275.2182922363281, - "logps/chosen": -0.2908754348754883, - "logps/rejected": -1.6026607751846313, - "loss": 0.2875, - "nll_loss": 0.35587406158447266, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.014543771743774414, - "rewards/margins": 0.06558927148580551, - "rewards/rejected": -0.08013304322957993, - "step": 2740 - }, - { - "epoch": 2.046979865771812, - "grad_norm": 28.83789825439453, - "learning_rate": 9.543305571897804e-07, - "log_odds_chosen": 3.333977222442627, - "log_odds_ratio": -0.04819099232554436, - "logits/chosen": 220.34219360351562, - "logits/rejected": 305.9772033691406, - "logps/chosen": -0.3683032691478729, - "logps/rejected": -2.5844974517822266, - "loss": 0.361, - "nll_loss": 0.41283002495765686, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018415164202451706, - "rewards/margins": 0.11080970615148544, - "rewards/rejected": -0.12922486662864685, - "step": 2745 - }, - { - "epoch": 2.0507084265473527, - "grad_norm": 39.23418426513672, - "learning_rate": 9.534625892455924e-07, - "log_odds_chosen": 4.413471221923828, - "log_odds_ratio": -0.022918984293937683, - "logits/chosen": 275.4587097167969, - "logits/rejected": 287.4325866699219, - "logps/chosen": -0.12055452913045883, - "logps/rejected": -1.5707428455352783, - "loss": 0.4768, - "nll_loss": 0.33015745878219604, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006027726456522942, - "rewards/margins": 0.07250942289829254, - "rewards/rejected": -0.07853714376688004, - "step": 2750 - }, - { - "epoch": 2.054436987322893, - "grad_norm": 17.342527389526367, - "learning_rate": 9.525969852639353e-07, - "log_odds_chosen": 4.0966644287109375, - "log_odds_ratio": -0.021411772817373276, - "logits/chosen": 278.2864074707031, - "logits/rejected": 279.3855285644531, - "logps/chosen": -0.17763462662696838, - "logps/rejected": -2.4085562229156494, - "loss": 0.33, - "nll_loss": 0.30884307622909546, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00888173095881939, - "rewards/margins": 0.11154607683420181, - "rewards/rejected": -0.12042780220508575, - "step": 2755 - }, - { - "epoch": 2.058165548098434, - "grad_norm": 23.607702255249023, - "learning_rate": 9.517337345336012e-07, - "log_odds_chosen": 4.872763156890869, - "log_odds_ratio": -0.033237017691135406, - "logits/chosen": 272.10198974609375, - "logits/rejected": 268.1596374511719, - "logps/chosen": -0.3528057634830475, - "logps/rejected": -3.5775978565216064, - "loss": 0.3943, - "nll_loss": 0.34420034289360046, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017640287056565285, - "rewards/margins": 0.1612395942211151, - "rewards/rejected": -0.17887988686561584, - "step": 2760 - }, - { - "epoch": 2.061894108873975, - "grad_norm": 27.1068172454834, - "learning_rate": 9.508728264112049e-07, - "log_odds_chosen": 3.387683868408203, - "log_odds_ratio": -0.06977426260709763, - "logits/chosen": 185.00680541992188, - "logits/rejected": 304.59295654296875, - "logps/chosen": -0.22868840396404266, - "logps/rejected": -1.8237133026123047, - "loss": 0.3803, - "nll_loss": 0.4933244585990906, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011434420011937618, - "rewards/margins": 0.07975125312805176, - "rewards/rejected": -0.09118566662073135, - "step": 2765 - }, - { - "epoch": 2.0656226696495152, - "grad_norm": 26.8938045501709, - "learning_rate": 9.50014250320633e-07, - "log_odds_chosen": 4.59998083114624, - "log_odds_ratio": -0.04335717111825943, - "logits/chosen": 206.05343627929688, - "logits/rejected": 303.5867004394531, - "logps/chosen": -0.23467274010181427, - "logps/rejected": -3.108351230621338, - "loss": 0.3739, - "nll_loss": 0.32495003938674927, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011733637191355228, - "rewards/margins": 0.14368394017219543, - "rewards/rejected": -0.1554175764322281, - "step": 2770 - }, - { - "epoch": 2.069351230425056, - "grad_norm": 32.615386962890625, - "learning_rate": 9.49157995752499e-07, - "log_odds_chosen": 2.7522850036621094, - "log_odds_ratio": -0.1006212830543518, - "logits/chosen": 257.30401611328125, - "logits/rejected": 234.23684692382812, - "logps/chosen": -0.36796340346336365, - "logps/rejected": -1.6814361810684204, - "loss": 0.3497, - "nll_loss": 0.4328286051750183, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.018398171290755272, - "rewards/margins": 0.06567363440990448, - "rewards/rejected": -0.0840718075633049, - "step": 2775 - }, - { - "epoch": 2.0730797912005965, - "grad_norm": 36.59761428833008, - "learning_rate": 9.483040522636021e-07, - "log_odds_chosen": 3.7382054328918457, - "log_odds_ratio": -0.10035743564367294, - "logits/chosen": 185.81143188476562, - "logits/rejected": 275.2794189453125, - "logps/chosen": -0.15834525227546692, - "logps/rejected": -1.829339623451233, - "loss": 0.4084, - "nll_loss": 0.3559674322605133, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00791726354509592, - "rewards/margins": 0.08354972302913666, - "rewards/rejected": -0.0914669781923294, - "step": 2780 - }, - { - "epoch": 2.0768083519761373, - "grad_norm": 19.883764266967773, - "learning_rate": 9.474524094763924e-07, - "log_odds_chosen": 2.5012927055358887, - "log_odds_ratio": -0.10720161348581314, - "logits/chosen": 370.319091796875, - "logits/rejected": 214.487060546875, - "logps/chosen": -0.23329898715019226, - "logps/rejected": -1.4398752450942993, - "loss": 0.3025, - "nll_loss": 0.2886502146720886, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011664949357509613, - "rewards/margins": 0.06032881140708923, - "rewards/rejected": -0.07199375331401825, - "step": 2785 - }, - { - "epoch": 2.0805369127516777, - "grad_norm": 20.796123504638672, - "learning_rate": 9.466030570784414e-07, - "log_odds_chosen": 3.5899271965026855, - "log_odds_ratio": -0.047377217561006546, - "logits/chosen": 171.82266235351562, - "logits/rejected": 259.2945861816406, - "logps/chosen": -0.3174481987953186, - "logps/rejected": -2.149423837661743, - "loss": 0.3126, - "nll_loss": 0.31615149974823, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01587240770459175, - "rewards/margins": 0.09159878641366959, - "rewards/rejected": -0.10747119039297104, - "step": 2790 - }, - { - "epoch": 2.0842654735272186, - "grad_norm": 26.912216186523438, - "learning_rate": 9.45755984821918e-07, - "log_odds_chosen": 2.9841904640197754, - "log_odds_ratio": -0.061539847403764725, - "logits/chosen": 222.7058868408203, - "logits/rejected": 280.86431884765625, - "logps/chosen": -0.20084086060523987, - "logps/rejected": -1.4955880641937256, - "loss": 0.3627, - "nll_loss": 0.3103131353855133, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010042043402791023, - "rewards/margins": 0.06473736464977264, - "rewards/rejected": -0.07477940618991852, - "step": 2795 - }, - { - "epoch": 2.087994034302759, - "grad_norm": 26.032835006713867, - "learning_rate": 9.449111825230681e-07, - "log_odds_chosen": 2.010430335998535, - "log_odds_ratio": -0.3241332471370697, - "logits/chosen": 211.7117919921875, - "logits/rejected": 221.9749298095703, - "logps/chosen": -0.43613171577453613, - "logps/rejected": -1.1552706956863403, - "loss": 0.3732, - "nll_loss": 0.4796213209629059, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.021806586533784866, - "rewards/margins": 0.03595694899559021, - "rewards/rejected": -0.057763535529375076, - "step": 2800 - }, - { - "epoch": 2.0917225950783, - "grad_norm": 19.38669776916504, - "learning_rate": 9.440686400617012e-07, - "log_odds_chosen": 2.8303778171539307, - "log_odds_ratio": -0.06135711818933487, - "logits/chosen": 200.68824768066406, - "logits/rejected": 242.7178192138672, - "logps/chosen": -0.2095930278301239, - "logps/rejected": -1.5981568098068237, - "loss": 0.339, - "nll_loss": 0.3382203280925751, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01047965232282877, - "rewards/margins": 0.06942819058895111, - "rewards/rejected": -0.0799078494310379, - "step": 2805 - }, - { - "epoch": 2.0954511558538402, - "grad_norm": 43.2166748046875, - "learning_rate": 9.432283473806812e-07, - "log_odds_chosen": 4.2448225021362305, - "log_odds_ratio": -0.03579006344079971, - "logits/chosen": 204.7224884033203, - "logits/rejected": 289.32489013671875, - "logps/chosen": -0.08612949401140213, - "logps/rejected": -1.4989492893218994, - "loss": 0.2764, - "nll_loss": 0.27059516310691833, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004306475166231394, - "rewards/margins": 0.07064099609851837, - "rewards/rejected": -0.07494746893644333, - "step": 2810 - }, - { - "epoch": 2.099179716629381, - "grad_norm": 21.76558494567871, - "learning_rate": 9.423902944854219e-07, - "log_odds_chosen": 3.5233218669891357, - "log_odds_ratio": -0.034064881503582, - "logits/chosen": 252.42172241210938, - "logits/rejected": 209.1394805908203, - "logps/chosen": -0.150877445936203, - "logps/rejected": -1.795823335647583, - "loss": 0.325, - "nll_loss": 0.27992868423461914, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007543871644884348, - "rewards/margins": 0.082247294485569, - "rewards/rejected": -0.08979116380214691, - "step": 2815 - }, - { - "epoch": 2.1029082774049215, - "grad_norm": 21.168607711791992, - "learning_rate": 9.415544714433869e-07, - "log_odds_chosen": 2.6694111824035645, - "log_odds_ratio": -0.08343606442213058, - "logits/chosen": 189.706787109375, - "logits/rejected": 247.787841796875, - "logps/chosen": -0.1889968365430832, - "logps/rejected": -1.406496524810791, - "loss": 0.3144, - "nll_loss": 0.2638685405254364, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00944984145462513, - "rewards/margins": 0.06087498739361763, - "rewards/rejected": -0.07032482326030731, - "step": 2820 - }, - { - "epoch": 2.1066368381804623, - "grad_norm": 35.46297836303711, - "learning_rate": 9.407208683835973e-07, - "log_odds_chosen": 4.897724151611328, - "log_odds_ratio": -0.05642179772257805, - "logits/chosen": 183.9779815673828, - "logits/rejected": 219.1405792236328, - "logps/chosen": -0.22928734123706818, - "logps/rejected": -2.0388245582580566, - "loss": 0.3789, - "nll_loss": 0.4107973575592041, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011464366689324379, - "rewards/margins": 0.0904768630862236, - "rewards/rejected": -0.10194122791290283, - "step": 2825 - }, - { - "epoch": 2.110365398956003, - "grad_norm": 26.470779418945312, - "learning_rate": 9.398894754961406e-07, - "log_odds_chosen": 4.264645576477051, - "log_odds_ratio": -0.02500019408762455, - "logits/chosen": 249.0233917236328, - "logits/rejected": 222.98123168945312, - "logps/chosen": -0.14569704234600067, - "logps/rejected": -2.4342267513275146, - "loss": 0.4098, - "nll_loss": 0.2476125955581665, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007284852210432291, - "rewards/margins": 0.11442649364471436, - "rewards/rejected": -0.1217113509774208, - "step": 2830 - }, - { - "epoch": 2.1140939597315436, - "grad_norm": 26.662687301635742, - "learning_rate": 9.390602830316851e-07, - "log_odds_chosen": 2.894552707672119, - "log_odds_ratio": -0.07001407444477081, - "logits/chosen": 190.96865844726562, - "logits/rejected": 321.7547912597656, - "logps/chosen": -0.29056453704833984, - "logps/rejected": -1.7669216394424438, - "loss": 0.4015, - "nll_loss": 0.47293147444725037, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014528227038681507, - "rewards/margins": 0.07381786406040192, - "rewards/rejected": -0.08834608644247055, - "step": 2835 - }, - { - "epoch": 2.1178225205070844, - "grad_norm": 33.98153305053711, - "learning_rate": 9.38233281301002e-07, - "log_odds_chosen": 4.183767795562744, - "log_odds_ratio": -0.02983788773417473, - "logits/chosen": 193.29376220703125, - "logits/rejected": 208.7139129638672, - "logps/chosen": -0.18377898633480072, - "logps/rejected": -2.428433895111084, - "loss": 0.3177, - "nll_loss": 0.3299584984779358, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009188949130475521, - "rewards/margins": 0.1122327595949173, - "rewards/rejected": -0.1214216947555542, - "step": 2840 - }, - { - "epoch": 2.121551081282625, - "grad_norm": 26.25261116027832, - "learning_rate": 9.374084606744878e-07, - "log_odds_chosen": 2.0691139698028564, - "log_odds_ratio": -0.12764985859394073, - "logits/chosen": 240.1799774169922, - "logits/rejected": 253.2252655029297, - "logps/chosen": -0.2771347463130951, - "logps/rejected": -1.1501766443252563, - "loss": 0.3508, - "nll_loss": 0.4521103501319885, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013856736943125725, - "rewards/margins": 0.04365209490060806, - "rewards/rejected": -0.05750883370637894, - "step": 2845 - }, - { - "epoch": 2.1252796420581657, - "grad_norm": 32.19364929199219, - "learning_rate": 9.365858115816941e-07, - "log_odds_chosen": 3.480781078338623, - "log_odds_ratio": -0.05425524711608887, - "logits/chosen": 181.74063110351562, - "logits/rejected": 249.45803833007812, - "logps/chosen": -0.1660839170217514, - "logps/rejected": -1.5913642644882202, - "loss": 0.3542, - "nll_loss": 0.3055691123008728, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00830419547855854, - "rewards/margins": 0.07126401364803314, - "rewards/rejected": -0.07956821471452713, - "step": 2850 - }, - { - "epoch": 2.129008202833706, - "grad_norm": 17.335956573486328, - "learning_rate": 9.357653245108616e-07, - "log_odds_chosen": 3.678466796875, - "log_odds_ratio": -0.03851805999875069, - "logits/chosen": 170.20901489257812, - "logits/rejected": 299.9815673828125, - "logps/chosen": -0.16496941447257996, - "logps/rejected": -1.7308250665664673, - "loss": 0.3919, - "nll_loss": 0.40823453664779663, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008248471654951572, - "rewards/margins": 0.07829278707504272, - "rewards/rejected": -0.08654125034809113, - "step": 2855 - }, - { - "epoch": 2.132736763609247, - "grad_norm": 30.10215187072754, - "learning_rate": 9.349469900084572e-07, - "log_odds_chosen": 3.880126953125, - "log_odds_ratio": -0.037005677819252014, - "logits/chosen": 201.2039031982422, - "logits/rejected": 269.84136962890625, - "logps/chosen": -0.17618824541568756, - "logps/rejected": -2.21281099319458, - "loss": 0.3709, - "nll_loss": 0.3356180787086487, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008809411898255348, - "rewards/margins": 0.10183112323284149, - "rewards/rejected": -0.11064054071903229, - "step": 2860 - }, - { - "epoch": 2.1364653243847873, - "grad_norm": 20.219209671020508, - "learning_rate": 9.341307986787181e-07, - "log_odds_chosen": 2.2867441177368164, - "log_odds_ratio": -0.16432449221611023, - "logits/chosen": 197.47686767578125, - "logits/rejected": 301.5180969238281, - "logps/chosen": -0.2658995985984802, - "logps/rejected": -1.386378526687622, - "loss": 0.3809, - "nll_loss": 0.4640219807624817, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01329498179256916, - "rewards/margins": 0.05602394789457321, - "rewards/rejected": -0.06931892782449722, - "step": 2865 - }, - { - "epoch": 2.140193885160328, - "grad_norm": 22.819562911987305, - "learning_rate": 9.333167411831968e-07, - "log_odds_chosen": 2.8513801097869873, - "log_odds_ratio": -0.06089717149734497, - "logits/chosen": 248.53884887695312, - "logits/rejected": 275.63641357421875, - "logps/chosen": -0.17872999608516693, - "logps/rejected": -1.4388277530670166, - "loss": 0.4338, - "nll_loss": 0.26669564843177795, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008936500176787376, - "rewards/margins": 0.06300488114356995, - "rewards/rejected": -0.07194138318300247, - "step": 2870 - }, - { - "epoch": 2.1439224459358686, - "grad_norm": 20.076927185058594, - "learning_rate": 9.325048082403139e-07, - "log_odds_chosen": 4.272433280944824, - "log_odds_ratio": -0.04112180322408676, - "logits/chosen": 198.3495330810547, - "logits/rejected": 197.24716186523438, - "logps/chosen": -0.12334243953227997, - "logps/rejected": -1.4984716176986694, - "loss": 0.4105, - "nll_loss": 0.34403038024902344, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006167122162878513, - "rewards/margins": 0.06875646859407425, - "rewards/rejected": -0.07492358982563019, - "step": 2875 - }, - { - "epoch": 2.1476510067114094, - "grad_norm": 42.78199768066406, - "learning_rate": 9.316949906249125e-07, - "log_odds_chosen": 6.3218994140625, - "log_odds_ratio": -0.012074669823050499, - "logits/chosen": 186.21102905273438, - "logits/rejected": 332.6407470703125, - "logps/chosen": -0.1397634744644165, - "logps/rejected": -4.321441173553467, - "loss": 0.3068, - "nll_loss": 0.20485524833202362, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006988172885030508, - "rewards/margins": 0.20908388495445251, - "rewards/rejected": -0.21607205271720886, - "step": 2880 - }, - { - "epoch": 2.1513795674869503, - "grad_norm": 23.701658248901367, - "learning_rate": 9.308872791678188e-07, - "log_odds_chosen": 2.084463596343994, - "log_odds_ratio": -0.1370479315519333, - "logits/chosen": 291.7060852050781, - "logits/rejected": 217.25357055664062, - "logps/chosen": -0.387390673160553, - "logps/rejected": -1.460844874382019, - "loss": 0.3973, - "nll_loss": 0.48597535490989685, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01936953328549862, - "rewards/margins": 0.05367271229624748, - "rewards/rejected": -0.07304225116968155, - "step": 2885 - }, - { - "epoch": 2.1551081282624907, - "grad_norm": 24.885009765625, - "learning_rate": 9.300816647554058e-07, - "log_odds_chosen": 2.939619541168213, - "log_odds_ratio": -0.10120894014835358, - "logits/chosen": 250.43948364257812, - "logits/rejected": 207.31747436523438, - "logps/chosen": -0.23114211857318878, - "logps/rejected": -1.271043062210083, - "loss": 0.4014, - "nll_loss": 0.4772767424583435, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011557105928659439, - "rewards/margins": 0.05199504643678665, - "rewards/rejected": -0.06355215609073639, - "step": 2890 - }, - { - "epoch": 2.1588366890380315, - "grad_norm": 25.912256240844727, - "learning_rate": 9.292781383291611e-07, - "log_odds_chosen": 3.4613661766052246, - "log_odds_ratio": -0.03605017811059952, - "logits/chosen": 273.59906005859375, - "logits/rejected": 188.4845733642578, - "logps/chosen": -0.165787473320961, - "logps/rejected": -1.7480428218841553, - "loss": 0.3949, - "nll_loss": 0.498370498418808, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008289373479783535, - "rewards/margins": 0.07911276817321777, - "rewards/rejected": -0.08740214258432388, - "step": 2895 - }, - { - "epoch": 2.162565249813572, - "grad_norm": 26.755226135253906, - "learning_rate": 9.284766908852594e-07, - "log_odds_chosen": 2.900146007537842, - "log_odds_ratio": -0.06023859977722168, - "logits/chosen": 176.53884887695312, - "logits/rejected": 251.8926239013672, - "logps/chosen": -0.2304307520389557, - "logps/rejected": -1.6596097946166992, - "loss": 0.3476, - "nll_loss": 0.38775497674942017, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0115215377882123, - "rewards/margins": 0.07145895063877106, - "rewards/rejected": -0.08298048377037048, - "step": 2900 - }, - { - "epoch": 2.1662938105891127, - "grad_norm": 26.624910354614258, - "learning_rate": 9.276773134741389e-07, - "log_odds_chosen": 2.690764904022217, - "log_odds_ratio": -0.0929180234670639, - "logits/chosen": 183.284423828125, - "logits/rejected": 339.5240173339844, - "logps/chosen": -0.32581397891044617, - "logps/rejected": -1.8230215311050415, - "loss": 0.394, - "nll_loss": 0.5038581490516663, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01629069820046425, - "rewards/margins": 0.07486037164926529, - "rewards/rejected": -0.09115107357501984, - "step": 2905 - }, - { - "epoch": 2.170022371364653, - "grad_norm": 33.39311599731445, - "learning_rate": 9.26879997200081e-07, - "log_odds_chosen": 3.1535556316375732, - "log_odds_ratio": -0.07849089801311493, - "logits/chosen": 329.34185791015625, - "logits/rejected": 245.7323760986328, - "logps/chosen": -0.13053788244724274, - "logps/rejected": -1.3061907291412354, - "loss": 0.3711, - "nll_loss": 0.28260380029678345, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006526893936097622, - "rewards/margins": 0.05878264829516411, - "rewards/rejected": -0.065309539437294, - "step": 2910 - }, - { - "epoch": 2.173750932140194, - "grad_norm": 18.616939544677734, - "learning_rate": 9.260847332207952e-07, - "log_odds_chosen": 3.3698582649230957, - "log_odds_ratio": -0.07309354841709137, - "logits/chosen": 231.24392700195312, - "logits/rejected": 257.86053466796875, - "logps/chosen": -0.21127252280712128, - "logps/rejected": -1.7263911962509155, - "loss": 0.3842, - "nll_loss": 0.4029383659362793, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010563625954091549, - "rewards/margins": 0.07575593888759613, - "rewards/rejected": -0.08631956577301025, - "step": 2915 - }, - { - "epoch": 2.1774794929157344, - "grad_norm": 21.6580810546875, - "learning_rate": 9.252915127470066e-07, - "log_odds_chosen": 2.4054617881774902, - "log_odds_ratio": -0.09747191518545151, - "logits/chosen": 198.46932983398438, - "logits/rejected": 336.788330078125, - "logps/chosen": -0.4552164077758789, - "logps/rejected": -1.889810562133789, - "loss": 0.416, - "nll_loss": 0.47373443841934204, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022760821506381035, - "rewards/margins": 0.07172970473766327, - "rewards/rejected": -0.09449052810668945, - "step": 2920 - }, - { - "epoch": 2.1812080536912752, - "grad_norm": 27.541921615600586, - "learning_rate": 9.245003270420485e-07, - "log_odds_chosen": 2.8480119705200195, - "log_odds_ratio": -0.08231332898139954, - "logits/chosen": 223.306884765625, - "logits/rejected": 274.29095458984375, - "logps/chosen": -0.2800825238227844, - "logps/rejected": -1.6859405040740967, - "loss": 0.4308, - "nll_loss": 0.39594218134880066, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014004126191139221, - "rewards/margins": 0.07029290497303009, - "rewards/rejected": -0.08429703116416931, - "step": 2925 - }, - { - "epoch": 2.1849366144668156, - "grad_norm": 91.42779541015625, - "learning_rate": 9.23711167421458e-07, - "log_odds_chosen": 3.5117180347442627, - "log_odds_ratio": -0.045738641172647476, - "logits/chosen": 277.21539306640625, - "logits/rejected": 187.24478149414062, - "logps/chosen": -0.20571565628051758, - "logps/rejected": -1.9185712337493896, - "loss": 0.5153, - "nll_loss": 0.3553805351257324, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010285782627761364, - "rewards/margins": 0.08564278483390808, - "rewards/rejected": -0.09592857211828232, - "step": 2930 - }, - { - "epoch": 2.1886651752423565, - "grad_norm": 21.446821212768555, - "learning_rate": 9.229240252525751e-07, - "log_odds_chosen": 3.135779619216919, - "log_odds_ratio": -0.12353216111660004, - "logits/chosen": 175.35745239257812, - "logits/rejected": 243.8357391357422, - "logps/chosen": -0.21148009598255157, - "logps/rejected": -1.6302716732025146, - "loss": 0.3708, - "nll_loss": 0.4025413990020752, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010574006475508213, - "rewards/margins": 0.07093958556652069, - "rewards/rejected": -0.08151358366012573, - "step": 2935 - }, - { - "epoch": 2.192393736017897, - "grad_norm": 21.751096725463867, - "learning_rate": 9.221388919541469e-07, - "log_odds_chosen": 2.6003096103668213, - "log_odds_ratio": -0.09116996824741364, - "logits/chosen": 246.878662109375, - "logits/rejected": 236.09841918945312, - "logps/chosen": -0.18940414488315582, - "logps/rejected": -1.3009185791015625, - "loss": 0.3296, - "nll_loss": 0.26096317172050476, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009470207616686821, - "rewards/margins": 0.05557572841644287, - "rewards/rejected": -0.06504593789577484, - "step": 2940 - }, - { - "epoch": 2.1961222967934377, - "grad_norm": 21.005126953125, - "learning_rate": 9.213557589959346e-07, - "log_odds_chosen": 3.6964645385742188, - "log_odds_ratio": -0.026794373989105225, - "logits/chosen": 222.74612426757812, - "logits/rejected": 189.8030242919922, - "logps/chosen": -0.13917608559131622, - "logps/rejected": -1.8499408960342407, - "loss": 0.3037, - "nll_loss": 0.26311570405960083, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006958804093301296, - "rewards/margins": 0.08553823828697205, - "rewards/rejected": -0.09249705076217651, - "step": 2945 - }, - { - "epoch": 2.1998508575689786, - "grad_norm": 29.459732055664062, - "learning_rate": 9.205746178983235e-07, - "log_odds_chosen": 2.2893059253692627, - "log_odds_ratio": -0.17236411571502686, - "logits/chosen": 262.13262939453125, - "logits/rejected": 263.3992614746094, - "logps/chosen": -0.2650839686393738, - "logps/rejected": -1.2175147533416748, - "loss": 0.3832, - "nll_loss": 0.42878979444503784, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013254198245704174, - "rewards/margins": 0.04762154072523117, - "rewards/rejected": -0.06087573617696762, - "step": 2950 - }, - { - "epoch": 2.203579418344519, - "grad_norm": 19.37701988220215, - "learning_rate": 9.19795460231938e-07, - "log_odds_chosen": 2.8154711723327637, - "log_odds_ratio": -0.17206686735153198, - "logits/chosen": 235.2103729248047, - "logits/rejected": 313.9200439453125, - "logps/chosen": -0.13152365386486053, - "logps/rejected": -1.5220458507537842, - "loss": 0.3308, - "nll_loss": 0.254878431558609, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.006576182786375284, - "rewards/margins": 0.06952611356973648, - "rewards/rejected": -0.07610230147838593, - "step": 2955 - }, - { - "epoch": 2.20730797912006, - "grad_norm": 22.586166381835938, - "learning_rate": 9.190182776172598e-07, - "log_odds_chosen": 3.0794425010681152, - "log_odds_ratio": -0.05862278863787651, - "logits/chosen": 189.49612426757812, - "logits/rejected": 282.5874328613281, - "logps/chosen": -0.16365104913711548, - "logps/rejected": -1.6076217889785767, - "loss": 0.4238, - "nll_loss": 0.32098349928855896, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008182552643120289, - "rewards/margins": 0.0721985325217247, - "rewards/rejected": -0.08038108795881271, - "step": 2960 - }, - { - "epoch": 2.2110365398956002, - "grad_norm": 24.561452865600586, - "learning_rate": 9.182430617242484e-07, - "log_odds_chosen": 2.844458818435669, - "log_odds_ratio": -0.07341152429580688, - "logits/chosen": 183.42471313476562, - "logits/rejected": 255.7192840576172, - "logps/chosen": -0.3470374643802643, - "logps/rejected": -1.8707621097564697, - "loss": 0.481, - "nll_loss": 0.3997482359409332, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017351875081658363, - "rewards/margins": 0.07618623226881027, - "rewards/rejected": -0.09353810548782349, - "step": 2965 - }, - { - "epoch": 2.214765100671141, - "grad_norm": 25.198476791381836, - "learning_rate": 9.174698042719672e-07, - "log_odds_chosen": 3.0866386890411377, - "log_odds_ratio": -0.09157673269510269, - "logits/chosen": 244.6724090576172, - "logits/rejected": 342.2402648925781, - "logps/chosen": -0.35004281997680664, - "logps/rejected": -1.937229871749878, - "loss": 0.399, - "nll_loss": 0.4715425968170166, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017502140253782272, - "rewards/margins": 0.07935936003923416, - "rewards/rejected": -0.09686149656772614, - "step": 2970 - }, - { - "epoch": 2.2184936614466815, - "grad_norm": 17.36049461364746, - "learning_rate": 9.166984970282114e-07, - "log_odds_chosen": 2.841966152191162, - "log_odds_ratio": -0.07786120474338531, - "logits/chosen": 187.41488647460938, - "logits/rejected": 410.0281677246094, - "logps/chosen": -0.2384624481201172, - "logps/rejected": -1.7561447620391846, - "loss": 0.3272, - "nll_loss": 0.3847481608390808, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011923124082386494, - "rewards/margins": 0.07588411122560501, - "rewards/rejected": -0.08780723810195923, - "step": 2975 - }, - { - "epoch": 2.2222222222222223, - "grad_norm": 32.37944030761719, - "learning_rate": 9.159291318091397e-07, - "log_odds_chosen": 3.806011915206909, - "log_odds_ratio": -0.030134279280900955, - "logits/chosen": 185.5968780517578, - "logits/rejected": 303.651123046875, - "logps/chosen": -0.15715068578720093, - "logps/rejected": -2.0221409797668457, - "loss": 0.3241, - "nll_loss": 0.23318199813365936, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007857535034418106, - "rewards/margins": 0.09324951469898224, - "rewards/rejected": -0.10110704600811005, - "step": 2980 - }, - { - "epoch": 2.2259507829977627, - "grad_norm": 27.743755340576172, - "learning_rate": 9.151617004789102e-07, - "log_odds_chosen": 2.549931049346924, - "log_odds_ratio": -0.15616634488105774, - "logits/chosen": 243.7384796142578, - "logits/rejected": 233.5831298828125, - "logps/chosen": -0.19271358847618103, - "logps/rejected": -1.3550409078598022, - "loss": 0.389, - "nll_loss": 0.44030871987342834, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009635680355131626, - "rewards/margins": 0.0581163689494133, - "rewards/rejected": -0.06775204837322235, - "step": 2985 - }, - { - "epoch": 2.2296793437733036, - "grad_norm": 25.109668731689453, - "learning_rate": 9.143961949493189e-07, - "log_odds_chosen": 3.3004798889160156, - "log_odds_ratio": -0.04096206650137901, - "logits/chosen": 269.15374755859375, - "logits/rejected": 240.02664184570312, - "logps/chosen": -0.25250792503356934, - "logps/rejected": -2.0747182369232178, - "loss": 0.425, - "nll_loss": 0.4935689866542816, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012625398114323616, - "rewards/margins": 0.09111051261425018, - "rewards/rejected": -0.10373590886592865, - "step": 2990 - }, - { - "epoch": 2.233407904548844, - "grad_norm": 24.080591201782227, - "learning_rate": 9.136326071794409e-07, - "log_odds_chosen": 3.477325439453125, - "log_odds_ratio": -0.06609585136175156, - "logits/chosen": 186.8747100830078, - "logits/rejected": 222.64111328125, - "logps/chosen": -0.17290890216827393, - "logps/rejected": -1.9812809228897095, - "loss": 0.3655, - "nll_loss": 0.23396065831184387, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008645446039736271, - "rewards/margins": 0.09041859954595566, - "rewards/rejected": -0.09906404465436935, - "step": 2995 - }, - { - "epoch": 2.237136465324385, - "grad_norm": 25.39710807800293, - "learning_rate": 9.128709291752768e-07, - "log_odds_chosen": 3.0382652282714844, - "log_odds_ratio": -0.05770363286137581, - "logits/chosen": 232.21469116210938, - "logits/rejected": 236.98861694335938, - "logps/chosen": -0.2541826367378235, - "logps/rejected": -1.7025249004364014, - "loss": 0.3194, - "nll_loss": 0.33626654744148254, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012709131464362144, - "rewards/margins": 0.07241711765527725, - "rewards/rejected": -0.08512624353170395, - "step": 3000 - }, - { - "epoch": 2.240865026099925, - "grad_norm": 20.628110885620117, - "learning_rate": 9.121111529894007e-07, - "log_odds_chosen": 3.223207950592041, - "log_odds_ratio": -0.050525911152362823, - "logits/chosen": 265.72039794921875, - "logits/rejected": 183.3358917236328, - "logps/chosen": -0.13323859870433807, - "logps/rejected": -1.3987983465194702, - "loss": 0.3814, - "nll_loss": 0.31617099046707153, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006661930587142706, - "rewards/margins": 0.06327798962593079, - "rewards/rejected": -0.06993992626667023, - "step": 3005 - }, - { - "epoch": 2.244593586875466, - "grad_norm": 20.803874969482422, - "learning_rate": 9.113532707206116e-07, - "log_odds_chosen": 3.0128166675567627, - "log_odds_ratio": -0.05278666689991951, - "logits/chosen": 250.66635131835938, - "logits/rejected": 268.3409423828125, - "logps/chosen": -0.24167463183403015, - "logps/rejected": -1.7390873432159424, - "loss": 0.3884, - "nll_loss": 0.37091052532196045, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012083732523024082, - "rewards/margins": 0.07487063109874725, - "rewards/rejected": -0.08695436269044876, - "step": 3010 - }, - { - "epoch": 2.248322147651007, - "grad_norm": 16.228391647338867, - "learning_rate": 9.105972745135884e-07, - "log_odds_chosen": 2.8597218990325928, - "log_odds_ratio": -0.08842327445745468, - "logits/chosen": 277.2126770019531, - "logits/rejected": 297.9247131347656, - "logps/chosen": -0.1328926980495453, - "logps/rejected": -0.8132016062736511, - "loss": 0.3282, - "nll_loss": 0.343722939491272, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006644635461270809, - "rewards/margins": 0.03401544317603111, - "rewards/rejected": -0.0406600758433342, - "step": 3015 - }, - { - "epoch": 2.2520507084265473, - "grad_norm": 41.3417854309082, - "learning_rate": 9.098431565585488e-07, - "log_odds_chosen": 4.174860954284668, - "log_odds_ratio": -0.023507926613092422, - "logits/chosen": 291.0510559082031, - "logits/rejected": 209.86569213867188, - "logps/chosen": -0.10609869658946991, - "logps/rejected": -1.7237930297851562, - "loss": 0.3241, - "nll_loss": 0.195898175239563, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005304934922605753, - "rewards/margins": 0.08088470995426178, - "rewards/rejected": -0.0861896425485611, - "step": 3020 - }, - { - "epoch": 2.255779269202088, - "grad_norm": 19.89918327331543, - "learning_rate": 9.090909090909091e-07, - "log_odds_chosen": 3.217498302459717, - "log_odds_ratio": -0.05090005323290825, - "logits/chosen": 388.0883483886719, - "logits/rejected": 296.2265319824219, - "logps/chosen": -0.20557694137096405, - "logps/rejected": -1.3272684812545776, - "loss": 0.4085, - "nll_loss": 0.5254836082458496, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010278847999870777, - "rewards/margins": 0.056084584444761276, - "rewards/rejected": -0.06636343151330948, - "step": 3025 - }, - { - "epoch": 2.2595078299776286, - "grad_norm": 19.28213119506836, - "learning_rate": 9.083405243909494e-07, - "log_odds_chosen": 3.234055280685425, - "log_odds_ratio": -0.05568747967481613, - "logits/chosen": 249.65371704101562, - "logits/rejected": 220.8927764892578, - "logps/chosen": -0.2724187970161438, - "logps/rejected": -1.8960065841674805, - "loss": 0.3444, - "nll_loss": 0.3943935036659241, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01362094096839428, - "rewards/margins": 0.08117939531803131, - "rewards/rejected": -0.09480033069849014, - "step": 3030 - }, - { - "epoch": 2.2632363907531694, - "grad_norm": 28.03415298461914, - "learning_rate": 9.075919947834808e-07, - "log_odds_chosen": 3.039599895477295, - "log_odds_ratio": -0.04998341202735901, - "logits/chosen": 233.24331665039062, - "logits/rejected": 338.47845458984375, - "logps/chosen": -0.1669793576002121, - "logps/rejected": -1.574297308921814, - "loss": 0.4301, - "nll_loss": 0.35762864351272583, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008348967880010605, - "rewards/margins": 0.07036590576171875, - "rewards/rejected": -0.07871486991643906, - "step": 3035 - }, - { - "epoch": 2.26696495152871, - "grad_norm": 26.147140502929688, - "learning_rate": 9.068453126375147e-07, - "log_odds_chosen": 4.552127838134766, - "log_odds_ratio": -0.0199168361723423, - "logits/chosen": 255.64407348632812, - "logits/rejected": 315.29901123046875, - "logps/chosen": -0.25607097148895264, - "logps/rejected": -2.7542614936828613, - "loss": 0.3529, - "nll_loss": 0.296487957239151, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012803549878299236, - "rewards/margins": 0.12490952014923096, - "rewards/rejected": -0.13771307468414307, - "step": 3040 - }, - { - "epoch": 2.2706935123042506, - "grad_norm": 35.77022171020508, - "learning_rate": 9.061004703659373e-07, - "log_odds_chosen": 2.9147801399230957, - "log_odds_ratio": -0.17095854878425598, - "logits/chosen": 330.08587646484375, - "logits/rejected": 185.89369201660156, - "logps/chosen": -0.07138949632644653, - "logps/rejected": -1.1190823316574097, - "loss": 0.3576, - "nll_loss": 0.1775108277797699, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.003569474909454584, - "rewards/margins": 0.052384644746780396, - "rewards/rejected": -0.05595412105321884, - "step": 3045 - }, - { - "epoch": 2.274422073079791, - "grad_norm": 28.74247932434082, - "learning_rate": 9.053574604251853e-07, - "log_odds_chosen": 3.641831636428833, - "log_odds_ratio": -0.044065140187740326, - "logits/chosen": 222.0181121826172, - "logits/rejected": 216.7024688720703, - "logps/chosen": -0.15740802884101868, - "logps/rejected": -1.8290914297103882, - "loss": 0.4368, - "nll_loss": 0.2943684756755829, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007870402187108994, - "rewards/margins": 0.08358417451381683, - "rewards/rejected": -0.09145458042621613, - "step": 3050 - }, - { - "epoch": 2.278150633855332, - "grad_norm": 25.92182159423828, - "learning_rate": 9.04616275314925e-07, - "log_odds_chosen": 3.5391902923583984, - "log_odds_ratio": -0.053752351552248, - "logits/chosen": 245.0206298828125, - "logits/rejected": 279.80462646484375, - "logps/chosen": -0.20430922508239746, - "logps/rejected": -2.0469372272491455, - "loss": 0.3449, - "nll_loss": 0.2913849353790283, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010215463116765022, - "rewards/margins": 0.09213139861822128, - "rewards/rejected": -0.10234685242176056, - "step": 3055 - }, - { - "epoch": 2.2818791946308723, - "grad_norm": 23.088884353637695, - "learning_rate": 9.03876907577734e-07, - "log_odds_chosen": 3.4226107597351074, - "log_odds_ratio": -0.05105402320623398, - "logits/chosen": 277.6727600097656, - "logits/rejected": 200.53073120117188, - "logps/chosen": -0.23951955139636993, - "logps/rejected": -2.0887532234191895, - "loss": 0.3021, - "nll_loss": 0.3429560661315918, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011975977569818497, - "rewards/margins": 0.09246169030666351, - "rewards/rejected": -0.10443766415119171, - "step": 3060 - }, - { - "epoch": 2.285607755406413, - "grad_norm": 22.03512191772461, - "learning_rate": 9.03139349798787e-07, - "log_odds_chosen": 3.3976242542266846, - "log_odds_ratio": -0.10177646577358246, - "logits/chosen": 289.64178466796875, - "logits/rejected": 270.4364013671875, - "logps/chosen": -0.18663987517356873, - "logps/rejected": -1.4311660528182983, - "loss": 0.3319, - "nll_loss": 0.26277774572372437, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009331993758678436, - "rewards/margins": 0.0622263066470623, - "rewards/rejected": -0.07155830413103104, - "step": 3065 - }, - { - "epoch": 2.289336316181954, - "grad_norm": 25.488845825195312, - "learning_rate": 9.024035946055421e-07, - "log_odds_chosen": 2.5378024578094482, - "log_odds_ratio": -0.1763961762189865, - "logits/chosen": 227.9327850341797, - "logits/rejected": 192.4888458251953, - "logps/chosen": -0.17879600822925568, - "logps/rejected": -1.243435263633728, - "loss": 0.4398, - "nll_loss": 0.39238256216049194, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.008939800783991814, - "rewards/margins": 0.053231965750455856, - "rewards/rejected": -0.06217176839709282, - "step": 3070 - }, - { - "epoch": 2.2930648769574944, - "grad_norm": 23.925722122192383, - "learning_rate": 9.016696346674324e-07, - "log_odds_chosen": 4.112010955810547, - "log_odds_ratio": -0.019568413496017456, - "logits/chosen": 189.310546875, - "logits/rejected": 272.7610778808594, - "logps/chosen": -0.09574665874242783, - "logps/rejected": -1.8901046514511108, - "loss": 0.3813, - "nll_loss": 0.28564220666885376, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004787332843989134, - "rewards/margins": 0.08971790969371796, - "rewards/rejected": -0.09450524300336838, - "step": 3075 - }, - { - "epoch": 2.2967934377330352, - "grad_norm": 24.62265968322754, - "learning_rate": 9.00937462695559e-07, - "log_odds_chosen": 3.1334023475646973, - "log_odds_ratio": -0.054406385868787766, - "logits/chosen": 213.5046844482422, - "logits/rejected": 202.1427459716797, - "logps/chosen": -0.30616599321365356, - "logps/rejected": -1.7166244983673096, - "loss": 0.3535, - "nll_loss": 0.36636242270469666, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015308300033211708, - "rewards/margins": 0.07052291929721832, - "rewards/rejected": -0.08583121746778488, - "step": 3080 - }, - { - "epoch": 2.3005219985085756, - "grad_norm": 20.2014217376709, - "learning_rate": 9.002070714423869e-07, - "log_odds_chosen": 3.4609081745147705, - "log_odds_ratio": -0.03678922727704048, - "logits/chosen": 202.04580688476562, - "logits/rejected": 295.3804626464844, - "logps/chosen": -0.1886526644229889, - "logps/rejected": -1.9671223163604736, - "loss": 0.364, - "nll_loss": 0.3095439076423645, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00943263340741396, - "rewards/margins": 0.08892347663640976, - "rewards/rejected": -0.09835611283779144, - "step": 3085 - }, - { - "epoch": 2.3042505592841165, - "grad_norm": 23.474075317382812, - "learning_rate": 8.994784537014432e-07, - "log_odds_chosen": 3.374971389770508, - "log_odds_ratio": -0.043559085577726364, - "logits/chosen": 252.35092163085938, - "logits/rejected": 209.103759765625, - "logps/chosen": -0.14632463455200195, - "logps/rejected": -1.6318790912628174, - "loss": 0.4649, - "nll_loss": 0.2478252351284027, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007316230796277523, - "rewards/margins": 0.07427772134542465, - "rewards/rejected": -0.08159395307302475, - "step": 3090 - }, - { - "epoch": 2.307979120059657, - "grad_norm": 23.789630889892578, - "learning_rate": 8.987516023070194e-07, - "log_odds_chosen": 3.111093521118164, - "log_odds_ratio": -0.0460456982254982, - "logits/chosen": 204.40284729003906, - "logits/rejected": 229.0106201171875, - "logps/chosen": -0.24734143912792206, - "logps/rejected": -1.9249556064605713, - "loss": 0.4263, - "nll_loss": 0.4004860520362854, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012367071583867073, - "rewards/margins": 0.0838807076215744, - "rewards/rejected": -0.09624778479337692, - "step": 3095 - }, - { - "epoch": 2.3117076808351977, - "grad_norm": 25.98644256591797, - "learning_rate": 8.980265101338747e-07, - "log_odds_chosen": 3.611438751220703, - "log_odds_ratio": -0.05932469293475151, - "logits/chosen": 302.47064208984375, - "logits/rejected": 197.15582275390625, - "logps/chosen": -0.15363460779190063, - "logps/rejected": -1.432770848274231, - "loss": 0.3604, - "nll_loss": 0.334001362323761, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007681730203330517, - "rewards/margins": 0.06395681202411652, - "rewards/rejected": -0.07163853943347931, - "step": 3100 - }, - { - "epoch": 2.315436241610738, - "grad_norm": 21.49013328552246, - "learning_rate": 8.973031700969425e-07, - "log_odds_chosen": 2.849700450897217, - "log_odds_ratio": -0.08350645005702972, - "logits/chosen": 206.13290405273438, - "logits/rejected": 296.19195556640625, - "logps/chosen": -0.16249021887779236, - "logps/rejected": -1.3865817785263062, - "loss": 0.3707, - "nll_loss": 0.32248854637145996, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008124511688947678, - "rewards/margins": 0.06120457500219345, - "rewards/rejected": -0.06932909041643143, - "step": 3105 - }, - { - "epoch": 2.319164802386279, - "grad_norm": 26.05202293395996, - "learning_rate": 8.965815751510408e-07, - "log_odds_chosen": 2.6532676219940186, - "log_odds_ratio": -0.09285443276166916, - "logits/chosen": 199.4306640625, - "logits/rejected": 254.14248657226562, - "logps/chosen": -0.33819395303726196, - "logps/rejected": -1.7446281909942627, - "loss": 0.4261, - "nll_loss": 0.5338677763938904, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016909698024392128, - "rewards/margins": 0.0703217163681984, - "rewards/rejected": -0.08723141252994537, - "step": 3110 - }, - { - "epoch": 2.3228933631618194, - "grad_norm": 28.039813995361328, - "learning_rate": 8.958617182905828e-07, - "log_odds_chosen": 2.90474271774292, - "log_odds_ratio": -0.06860895454883575, - "logits/chosen": 187.49288940429688, - "logits/rejected": 294.84454345703125, - "logps/chosen": -0.3190458118915558, - "logps/rejected": -1.9482667446136475, - "loss": 0.3817, - "nll_loss": 0.38409245014190674, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01595229096710682, - "rewards/margins": 0.08146104216575623, - "rewards/rejected": -0.09741333872079849, - "step": 3115 - }, - { - "epoch": 2.3266219239373602, - "grad_norm": 28.030277252197266, - "learning_rate": 8.951435925492912e-07, - "log_odds_chosen": 3.5165183544158936, - "log_odds_ratio": -0.0887220948934555, - "logits/chosen": 218.1449737548828, - "logits/rejected": 328.18170166015625, - "logps/chosen": -0.3540094494819641, - "logps/rejected": -2.6845059394836426, - "loss": 0.4006, - "nll_loss": 0.48919677734375, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017700472846627235, - "rewards/margins": 0.11652481555938721, - "rewards/rejected": -0.1342252939939499, - "step": 3120 - }, - { - "epoch": 2.3303504847129006, - "grad_norm": 24.0564022064209, - "learning_rate": 8.94427190999916e-07, - "log_odds_chosen": 3.0546951293945312, - "log_odds_ratio": -0.05255626514554024, - "logits/chosen": 194.0518341064453, - "logits/rejected": 291.6320495605469, - "logps/chosen": -0.22541113197803497, - "logps/rejected": -1.8217729330062866, - "loss": 0.4164, - "nll_loss": 0.33331018686294556, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011270557530224323, - "rewards/margins": 0.07981809228658676, - "rewards/rejected": -0.09108865261077881, - "step": 3125 - }, - { - "epoch": 2.3340790454884415, - "grad_norm": 17.967647552490234, - "learning_rate": 8.93712506753953e-07, - "log_odds_chosen": 3.677588701248169, - "log_odds_ratio": -0.05873774737119675, - "logits/chosen": 212.62863159179688, - "logits/rejected": 271.1366882324219, - "logps/chosen": -0.12497290223836899, - "logps/rejected": -1.5208524465560913, - "loss": 0.2958, - "nll_loss": 0.2927534282207489, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006248645484447479, - "rewards/margins": 0.06979397684335709, - "rewards/rejected": -0.07604262232780457, - "step": 3130 - }, - { - "epoch": 2.337807606263982, - "grad_norm": 27.979167938232422, - "learning_rate": 8.929995329613664e-07, - "log_odds_chosen": 1.452033281326294, - "log_odds_ratio": -0.33690154552459717, - "logits/chosen": 199.06521606445312, - "logits/rejected": 231.95681762695312, - "logps/chosen": -0.15720805525779724, - "logps/rejected": -0.735842227935791, - "loss": 0.3869, - "nll_loss": 0.29670625925064087, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.007860402576625347, - "rewards/margins": 0.02893170714378357, - "rewards/rejected": -0.03679211065173149, - "step": 3135 - }, - { - "epoch": 2.3415361670395227, - "grad_norm": 25.046390533447266, - "learning_rate": 8.922882628103122e-07, - "log_odds_chosen": 4.741447925567627, - "log_odds_ratio": -0.03474348783493042, - "logits/chosen": 258.4231872558594, - "logits/rejected": 338.64752197265625, - "logps/chosen": -0.1112012267112732, - "logps/rejected": -1.6443853378295898, - "loss": 0.2895, - "nll_loss": 0.22934076189994812, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005560061428695917, - "rewards/margins": 0.0766592025756836, - "rewards/rejected": -0.08221925795078278, - "step": 3140 - }, - { - "epoch": 2.3452647278150636, - "grad_norm": 42.30475616455078, - "learning_rate": 8.91578689526865e-07, - "log_odds_chosen": 3.4610390663146973, - "log_odds_ratio": -0.03449210897088051, - "logits/chosen": 247.50634765625, - "logits/rejected": 188.04965209960938, - "logps/chosen": -0.14706860482692719, - "logps/rejected": -1.7225325107574463, - "loss": 0.342, - "nll_loss": 0.4909144937992096, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007353430148214102, - "rewards/margins": 0.07877318561077118, - "rewards/rejected": -0.08612662553787231, - "step": 3145 - }, - { - "epoch": 2.348993288590604, - "grad_norm": 21.564380645751953, - "learning_rate": 8.90870806374748e-07, - "log_odds_chosen": 3.8102104663848877, - "log_odds_ratio": -0.02614673413336277, - "logits/chosen": 165.22802734375, - "logits/rejected": 242.3638153076172, - "logps/chosen": -0.07407871633768082, - "logps/rejected": -1.30933678150177, - "loss": 0.3288, - "nll_loss": 0.21453356742858887, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.003703935770317912, - "rewards/margins": 0.06176290661096573, - "rewards/rejected": -0.06546684354543686, - "step": 3150 - }, - { - "epoch": 2.352721849366145, - "grad_norm": 54.574100494384766, - "learning_rate": 8.90164606655063e-07, - "log_odds_chosen": 3.3142688274383545, - "log_odds_ratio": -0.0963517278432846, - "logits/chosen": 249.2423858642578, - "logits/rejected": 206.1385955810547, - "logps/chosen": -0.1641363501548767, - "logps/rejected": -1.1076143980026245, - "loss": 0.3768, - "nll_loss": 0.2787497639656067, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008206818252801895, - "rewards/margins": 0.04717390611767769, - "rewards/rejected": -0.055380724370479584, - "step": 3155 - }, - { - "epoch": 2.356450410141685, - "grad_norm": 35.601959228515625, - "learning_rate": 8.894600837060251e-07, - "log_odds_chosen": 4.003078460693359, - "log_odds_ratio": -0.052100300788879395, - "logits/chosen": 289.2799987792969, - "logits/rejected": 198.52682495117188, - "logps/chosen": -0.23782184720039368, - "logps/rejected": -1.7506332397460938, - "loss": 0.4187, - "nll_loss": 0.48409757018089294, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011891092173755169, - "rewards/margins": 0.07564057409763336, - "rewards/rejected": -0.0875316634774208, - "step": 3160 - }, - { - "epoch": 2.360178970917226, - "grad_norm": 39.460689544677734, - "learning_rate": 8.887572309026986e-07, - "log_odds_chosen": 3.5976366996765137, - "log_odds_ratio": -0.04731101542711258, - "logits/chosen": 187.0335235595703, - "logits/rejected": 327.74237060546875, - "logps/chosen": -0.2827337384223938, - "logps/rejected": -2.452864408493042, - "loss": 0.3912, - "nll_loss": 0.3275352120399475, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01413668692111969, - "rewards/margins": 0.10850652307271957, - "rewards/rejected": -0.12264321744441986, - "step": 3165 - }, - { - "epoch": 2.3639075316927665, - "grad_norm": 24.1800594329834, - "learning_rate": 8.880560416567349e-07, - "log_odds_chosen": 3.441998243331909, - "log_odds_ratio": -0.07032667100429535, - "logits/chosen": 295.73883056640625, - "logits/rejected": 239.9330291748047, - "logps/chosen": -0.1345086693763733, - "logps/rejected": -1.406505823135376, - "loss": 0.3961, - "nll_loss": 0.3835065960884094, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006725432816892862, - "rewards/margins": 0.06359986215829849, - "rewards/rejected": -0.07032529264688492, - "step": 3170 - }, - { - "epoch": 2.3676360924683073, - "grad_norm": 21.01292610168457, - "learning_rate": 8.873565094161139e-07, - "log_odds_chosen": 2.981893539428711, - "log_odds_ratio": -0.16652074456214905, - "logits/chosen": 216.11776733398438, - "logits/rejected": 260.877197265625, - "logps/chosen": -0.187842458486557, - "logps/rejected": -1.5531365871429443, - "loss": 0.3253, - "nll_loss": 0.2918941080570221, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00939212366938591, - "rewards/margins": 0.06826470792293549, - "rewards/rejected": -0.0776568278670311, - "step": 3175 - }, - { - "epoch": 2.3713646532438477, - "grad_norm": 20.27556800842285, - "learning_rate": 8.866586276648859e-07, - "log_odds_chosen": 2.6825194358825684, - "log_odds_ratio": -0.17770548164844513, - "logits/chosen": 239.4822998046875, - "logits/rejected": 249.1979522705078, - "logps/chosen": -0.13705670833587646, - "logps/rejected": -1.1349351406097412, - "loss": 0.4127, - "nll_loss": 0.33577582240104675, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.006852835416793823, - "rewards/margins": 0.04989392310380936, - "rewards/rejected": -0.05674675852060318, - "step": 3180 - }, - { - "epoch": 2.3750932140193886, - "grad_norm": 22.407161712646484, - "learning_rate": 8.859623899229175e-07, - "log_odds_chosen": 2.5132246017456055, - "log_odds_ratio": -0.09370501339435577, - "logits/chosen": 220.55636596679688, - "logits/rejected": 220.04769897460938, - "logps/chosen": -0.3033062517642975, - "logps/rejected": -1.4851950407028198, - "loss": 0.3598, - "nll_loss": 0.5275130271911621, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015165314078330994, - "rewards/margins": 0.05909444019198418, - "rewards/rejected": -0.07425975054502487, - "step": 3185 - }, - { - "epoch": 2.378821774794929, - "grad_norm": 20.698726654052734, - "learning_rate": 8.852677897456389e-07, - "log_odds_chosen": 3.1541950702667236, - "log_odds_ratio": -0.05892621725797653, - "logits/chosen": 303.84051513671875, - "logits/rejected": 236.5983123779297, - "logps/chosen": -0.12892486155033112, - "logps/rejected": -1.2891523838043213, - "loss": 0.3427, - "nll_loss": 0.2928742468357086, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006446243263781071, - "rewards/margins": 0.058011364191770554, - "rewards/rejected": -0.06445761024951935, - "step": 3190 - }, - { - "epoch": 2.38255033557047, - "grad_norm": 25.07916259765625, - "learning_rate": 8.845748207237923e-07, - "log_odds_chosen": 3.2879798412323, - "log_odds_ratio": -0.05045236274600029, - "logits/chosen": 213.40621948242188, - "logits/rejected": 325.6201171875, - "logps/chosen": -0.26954224705696106, - "logps/rejected": -2.176666259765625, - "loss": 0.3062, - "nll_loss": 0.3159855306148529, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013477112166583538, - "rewards/margins": 0.09535620361566544, - "rewards/rejected": -0.10883331298828125, - "step": 3195 - }, - { - "epoch": 2.3862788963460106, - "grad_norm": 22.622098922729492, - "learning_rate": 8.838834764831844e-07, - "log_odds_chosen": 4.0835442543029785, - "log_odds_ratio": -0.059737492352724075, - "logits/chosen": 187.30560302734375, - "logits/rejected": 252.91091918945312, - "logps/chosen": -0.12815776467323303, - "logps/rejected": -1.8101398944854736, - "loss": 0.3335, - "nll_loss": 0.28799089789390564, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006407888140529394, - "rewards/margins": 0.08409909904003143, - "rewards/rejected": -0.09050698578357697, - "step": 3200 - }, - { - "epoch": 2.390007457121551, - "grad_norm": 24.71088981628418, - "learning_rate": 8.831937506844408e-07, - "log_odds_chosen": 3.703902006149292, - "log_odds_ratio": -0.04225796088576317, - "logits/chosen": 222.52645874023438, - "logits/rejected": 308.5802001953125, - "logps/chosen": -0.13523906469345093, - "logps/rejected": -1.4791028499603271, - "loss": 0.358, - "nll_loss": 0.24500031769275665, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006761953234672546, - "rewards/margins": 0.06719319522380829, - "rewards/rejected": -0.07395514845848083, - "step": 3205 - }, - { - "epoch": 2.393736017897092, - "grad_norm": 27.222352981567383, - "learning_rate": 8.825056370227597e-07, - "log_odds_chosen": 2.813965320587158, - "log_odds_ratio": -0.11460931599140167, - "logits/chosen": 259.38751220703125, - "logits/rejected": 203.1404266357422, - "logps/chosen": -0.33725303411483765, - "logps/rejected": -2.0699026584625244, - "loss": 0.3987, - "nll_loss": 0.5295095443725586, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016862653195858, - "rewards/margins": 0.08663247525691986, - "rewards/rejected": -0.10349513590335846, - "step": 3210 - }, - { - "epoch": 2.3974645786726323, - "grad_norm": 47.82686996459961, - "learning_rate": 8.818191292276726e-07, - "log_odds_chosen": 2.813429355621338, - "log_odds_ratio": -0.1074957400560379, - "logits/chosen": 263.68658447265625, - "logits/rejected": 302.1453552246094, - "logps/chosen": -0.1995731145143509, - "logps/rejected": -1.396727204322815, - "loss": 0.4615, - "nll_loss": 0.410992294549942, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009978655725717545, - "rewards/margins": 0.05985770374536514, - "rewards/rejected": -0.06983635574579239, - "step": 3215 - }, - { - "epoch": 2.401193139448173, - "grad_norm": 41.59674835205078, - "learning_rate": 8.811342210628018e-07, - "log_odds_chosen": 4.754201412200928, - "log_odds_ratio": -0.012565049342811108, - "logits/chosen": 167.2486572265625, - "logits/rejected": 396.1829833984375, - "logps/chosen": -0.10020842403173447, - "logps/rejected": -2.0742385387420654, - "loss": 0.3644, - "nll_loss": 0.2846466600894928, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0050104218535125256, - "rewards/margins": 0.09870151430368423, - "rewards/rejected": -0.10371194034814835, - "step": 3220 - }, - { - "epoch": 2.4049217002237135, - "grad_norm": 25.215171813964844, - "learning_rate": 8.804509063256239e-07, - "log_odds_chosen": 3.9853973388671875, - "log_odds_ratio": -0.02131734788417816, - "logits/chosen": 279.9205017089844, - "logits/rejected": 240.15597534179688, - "logps/chosen": -0.1116662248969078, - "logps/rejected": -1.9046733379364014, - "loss": 0.4023, - "nll_loss": 0.33616915345191956, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005583311431109905, - "rewards/margins": 0.08965037018060684, - "rewards/rejected": -0.09523367881774902, - "step": 3225 - }, - { - "epoch": 2.4086502609992544, - "grad_norm": 33.22465896606445, - "learning_rate": 8.797691788472336e-07, - "log_odds_chosen": 3.7225570678710938, - "log_odds_ratio": -0.03898332267999649, - "logits/chosen": 298.5728759765625, - "logits/rejected": 229.9907684326172, - "logps/chosen": -0.15716874599456787, - "logps/rejected": -1.9204161167144775, - "loss": 0.3677, - "nll_loss": 0.3152533173561096, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007858437485992908, - "rewards/margins": 0.088162362575531, - "rewards/rejected": -0.09602080285549164, - "step": 3230 - }, - { - "epoch": 2.412378821774795, - "grad_norm": 16.002124786376953, - "learning_rate": 8.790890324921097e-07, - "log_odds_chosen": 3.855363368988037, - "log_odds_ratio": -0.03006485477089882, - "logits/chosen": 276.25152587890625, - "logits/rejected": 220.700927734375, - "logps/chosen": -0.0922941341996193, - "logps/rejected": -1.619110345840454, - "loss": 0.3005, - "nll_loss": 0.25174954533576965, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004614706616848707, - "rewards/margins": 0.07634081691503525, - "rewards/rejected": -0.08095552027225494, - "step": 3235 - }, - { - "epoch": 2.4161073825503356, - "grad_norm": 22.815568923950195, - "learning_rate": 8.784104611578832e-07, - "log_odds_chosen": 2.4842352867126465, - "log_odds_ratio": -0.20176219940185547, - "logits/chosen": 219.6878204345703, - "logits/rejected": 206.3599090576172, - "logps/chosen": -0.40314269065856934, - "logps/rejected": -1.4633859395980835, - "loss": 0.4375, - "nll_loss": 0.5626382231712341, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020157136023044586, - "rewards/margins": 0.05301216244697571, - "rewards/rejected": -0.0731692984700203, - "step": 3240 - }, - { - "epoch": 2.419835943325876, - "grad_norm": 28.056129455566406, - "learning_rate": 8.777334587751073e-07, - "log_odds_chosen": 2.187136650085449, - "log_odds_ratio": -0.2719046473503113, - "logits/chosen": 243.44229125976562, - "logits/rejected": 321.42486572265625, - "logps/chosen": -0.39295244216918945, - "logps/rejected": -1.7392044067382812, - "loss": 0.445, - "nll_loss": 0.5302949547767639, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.019647620618343353, - "rewards/margins": 0.06731259822845459, - "rewards/rejected": -0.08696021884679794, - "step": 3245 - }, - { - "epoch": 2.423564504101417, - "grad_norm": 26.904155731201172, - "learning_rate": 8.770580193070293e-07, - "log_odds_chosen": 2.8679299354553223, - "log_odds_ratio": -0.07394924014806747, - "logits/chosen": 208.91134643554688, - "logits/rejected": 287.72528076171875, - "logps/chosen": -0.20884692668914795, - "logps/rejected": -1.3615516424179077, - "loss": 0.362, - "nll_loss": 0.49853745102882385, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010442345403134823, - "rewards/margins": 0.05763523653149605, - "rewards/rejected": -0.06807757914066315, - "step": 3250 - }, - { - "epoch": 2.4272930648769577, - "grad_norm": 26.319473266601562, - "learning_rate": 8.763841367493649e-07, - "log_odds_chosen": 3.446805477142334, - "log_odds_ratio": -0.053635694086551666, - "logits/chosen": 247.33773803710938, - "logits/rejected": 274.0096130371094, - "logps/chosen": -0.15113897621631622, - "logps/rejected": -1.5945465564727783, - "loss": 0.3508, - "nll_loss": 0.2718965709209442, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007556949742138386, - "rewards/margins": 0.07217037677764893, - "rewards/rejected": -0.07972732931375504, - "step": 3255 - }, - { - "epoch": 2.431021625652498, - "grad_norm": 24.102439880371094, - "learning_rate": 8.757118051300735e-07, - "log_odds_chosen": 2.659773826599121, - "log_odds_ratio": -0.10224181413650513, - "logits/chosen": 280.1613464355469, - "logits/rejected": 332.77886962890625, - "logps/chosen": -0.3363017141819, - "logps/rejected": -1.7767471075057983, - "loss": 0.3752, - "nll_loss": 0.4292429983615875, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01681508496403694, - "rewards/margins": 0.07202227413654327, - "rewards/rejected": -0.08883735537528992, - "step": 3260 - }, - { - "epoch": 2.4347501864280385, - "grad_norm": 26.981050491333008, - "learning_rate": 8.750410185091365e-07, - "log_odds_chosen": 3.5141959190368652, - "log_odds_ratio": -0.05270819738507271, - "logits/chosen": 220.00552368164062, - "logits/rejected": 195.8616485595703, - "logps/chosen": -0.15375831723213196, - "logps/rejected": -1.5585917234420776, - "loss": 0.2774, - "nll_loss": 0.2667309641838074, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0076879165135324, - "rewards/margins": 0.07024167478084564, - "rewards/rejected": -0.07792958617210388, - "step": 3265 - }, - { - "epoch": 2.4384787472035794, - "grad_norm": 21.20194435119629, - "learning_rate": 8.743717709783363e-07, - "log_odds_chosen": 3.0302891731262207, - "log_odds_ratio": -0.06932055950164795, - "logits/chosen": 190.8003692626953, - "logits/rejected": 369.5704345703125, - "logps/chosen": -0.46225491166114807, - "logps/rejected": -2.490722179412842, - "loss": 0.3344, - "nll_loss": 0.5384808778762817, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.023112744092941284, - "rewards/margins": 0.10142336785793304, - "rewards/rejected": -0.12453611195087433, - "step": 3270 - }, - { - "epoch": 2.4422073079791202, - "grad_norm": 23.56300163269043, - "learning_rate": 8.737040566610381e-07, - "log_odds_chosen": 2.8867886066436768, - "log_odds_ratio": -0.09977389872074127, - "logits/chosen": 232.6405029296875, - "logits/rejected": 293.3111877441406, - "logps/chosen": -0.3134891390800476, - "logps/rejected": -1.5029948949813843, - "loss": 0.369, - "nll_loss": 0.4337400794029236, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01567445881664753, - "rewards/margins": 0.059475284069776535, - "rewards/rejected": -0.07514973729848862, - "step": 3275 - }, - { - "epoch": 2.4459358687546606, - "grad_norm": 26.228349685668945, - "learning_rate": 8.730378697119729e-07, - "log_odds_chosen": 4.876449108123779, - "log_odds_ratio": -0.014519898220896721, - "logits/chosen": 355.37969970703125, - "logits/rejected": 185.3600311279297, - "logps/chosen": -0.08467607200145721, - "logps/rejected": -1.899425745010376, - "loss": 0.3862, - "nll_loss": 0.3006165027618408, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004233804065734148, - "rewards/margins": 0.090737484395504, - "rewards/rejected": -0.09497128427028656, - "step": 3280 - }, - { - "epoch": 2.4496644295302015, - "grad_norm": 23.03717803955078, - "learning_rate": 8.723732043170228e-07, - "log_odds_chosen": 3.1034984588623047, - "log_odds_ratio": -0.04640461876988411, - "logits/chosen": 168.95260620117188, - "logits/rejected": 365.43829345703125, - "logps/chosen": -0.21119841933250427, - "logps/rejected": -1.5615283250808716, - "loss": 0.3437, - "nll_loss": 0.2939775288105011, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010559922084212303, - "rewards/margins": 0.0675164982676506, - "rewards/rejected": -0.07807642221450806, - "step": 3285 - }, - { - "epoch": 2.453392990305742, - "grad_norm": 22.791898727416992, - "learning_rate": 8.717100546930084e-07, - "log_odds_chosen": 1.6493499279022217, - "log_odds_ratio": -0.5213514566421509, - "logits/chosen": 220.5907440185547, - "logits/rejected": 189.372802734375, - "logps/chosen": -0.42620429396629333, - "logps/rejected": -1.2130225896835327, - "loss": 0.4222, - "nll_loss": 0.3548702597618103, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.021310215815901756, - "rewards/margins": 0.03934090957045555, - "rewards/rejected": -0.060651130974292755, - "step": 3290 - }, - { - "epoch": 2.4571215510812827, - "grad_norm": 33.43918991088867, - "learning_rate": 8.710484150874759e-07, - "log_odds_chosen": 3.0471272468566895, - "log_odds_ratio": -0.06540016829967499, - "logits/chosen": 223.7743377685547, - "logits/rejected": 323.17120361328125, - "logps/chosen": -0.18532948195934296, - "logps/rejected": -1.6211522817611694, - "loss": 0.3353, - "nll_loss": 0.39657890796661377, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009266474284231663, - "rewards/margins": 0.0717911496758461, - "rewards/rejected": -0.08105762302875519, - "step": 3295 - }, - { - "epoch": 2.460850111856823, - "grad_norm": 22.554458618164062, - "learning_rate": 8.703882797784894e-07, - "log_odds_chosen": 2.9466617107391357, - "log_odds_ratio": -0.149387925863266, - "logits/chosen": 237.77651977539062, - "logits/rejected": 299.3728332519531, - "logps/chosen": -0.28819936513900757, - "logps/rejected": -1.655093789100647, - "loss": 0.4312, - "nll_loss": 0.5199244618415833, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014409971423447132, - "rewards/margins": 0.06834472715854645, - "rewards/rejected": -0.0827546939253807, - "step": 3300 - }, - { - "epoch": 2.464578672632364, - "grad_norm": 17.090274810791016, - "learning_rate": 8.697296430744212e-07, - "log_odds_chosen": 3.244169235229492, - "log_odds_ratio": -0.04630319029092789, - "logits/chosen": 314.24420166015625, - "logits/rejected": 266.47369384765625, - "logps/chosen": -0.1365170180797577, - "logps/rejected": -1.500445008277893, - "loss": 0.332, - "nll_loss": 0.23831257224082947, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006825850810855627, - "rewards/margins": 0.06819640100002289, - "rewards/rejected": -0.07502225041389465, - "step": 3305 - }, - { - "epoch": 2.4683072334079044, - "grad_norm": 29.4904842376709, - "learning_rate": 8.690724993137478e-07, - "log_odds_chosen": 2.929094076156616, - "log_odds_ratio": -0.08214916288852692, - "logits/chosen": 261.0281066894531, - "logits/rejected": 215.9178924560547, - "logps/chosen": -0.22467467188835144, - "logps/rejected": -1.700722098350525, - "loss": 0.3699, - "nll_loss": 0.34997671842575073, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011233733966946602, - "rewards/margins": 0.07380236685276031, - "rewards/rejected": -0.08503611385822296, - "step": 3310 - }, - { - "epoch": 2.472035794183445, - "grad_norm": 30.013029098510742, - "learning_rate": 8.684168428648437e-07, - "log_odds_chosen": 4.467310428619385, - "log_odds_ratio": -0.040078453719615936, - "logits/chosen": 188.6583709716797, - "logits/rejected": 218.77182006835938, - "logps/chosen": -0.14014068245887756, - "logps/rejected": -2.075855255126953, - "loss": 0.3906, - "nll_loss": 0.32838839292526245, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007007033564150333, - "rewards/margins": 0.09678573906421661, - "rewards/rejected": -0.10379277169704437, - "step": 3315 - }, - { - "epoch": 2.4757643549589856, - "grad_norm": 18.70427131652832, - "learning_rate": 8.677626681257792e-07, - "log_odds_chosen": 2.22550630569458, - "log_odds_ratio": -0.23649457097053528, - "logits/chosen": 215.7761993408203, - "logits/rejected": 275.9485778808594, - "logps/chosen": -0.1792014092206955, - "logps/rejected": -1.182572603225708, - "loss": 0.3279, - "nll_loss": 0.42108216881752014, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.008960070088505745, - "rewards/margins": 0.05016856640577316, - "rewards/rejected": -0.05912863463163376, - "step": 3320 - }, - { - "epoch": 2.4794929157345265, - "grad_norm": 23.467830657958984, - "learning_rate": 8.6710996952412e-07, - "log_odds_chosen": 3.4866690635681152, - "log_odds_ratio": -0.06865102797746658, - "logits/chosen": 306.5169982910156, - "logits/rejected": 272.4681396484375, - "logps/chosen": -0.2295217514038086, - "logps/rejected": -1.8289682865142822, - "loss": 0.3368, - "nll_loss": 0.31515055894851685, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01147608831524849, - "rewards/margins": 0.07997232675552368, - "rewards/rejected": -0.09144841134548187, - "step": 3325 - }, - { - "epoch": 2.4832214765100673, - "grad_norm": 25.622039794921875, - "learning_rate": 8.664587415167274e-07, - "log_odds_chosen": 3.762678623199463, - "log_odds_ratio": -0.04754524305462837, - "logits/chosen": 303.6141662597656, - "logits/rejected": 249.1661834716797, - "logps/chosen": -0.07824189960956573, - "logps/rejected": -1.3127644062042236, - "loss": 0.3712, - "nll_loss": 0.30336469411849976, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.003912094980478287, - "rewards/margins": 0.06172613054513931, - "rewards/rejected": -0.0656382292509079, - "step": 3330 - }, - { - "epoch": 2.4869500372856077, - "grad_norm": 22.303119659423828, - "learning_rate": 8.658089785895599e-07, - "log_odds_chosen": 3.4118170738220215, - "log_odds_ratio": -0.0451614186167717, - "logits/chosen": 216.1046142578125, - "logits/rejected": 265.036865234375, - "logps/chosen": -0.11607503890991211, - "logps/rejected": -1.4673961400985718, - "loss": 0.3712, - "nll_loss": 0.28450506925582886, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005803752224892378, - "rewards/margins": 0.06756605207920074, - "rewards/rejected": -0.07336980849504471, - "step": 3335 - }, - { - "epoch": 2.4906785980611486, - "grad_norm": 25.897743225097656, - "learning_rate": 8.651606752574786e-07, - "log_odds_chosen": 3.109811305999756, - "log_odds_ratio": -0.06896861642599106, - "logits/chosen": 334.9599914550781, - "logits/rejected": 343.8421936035156, - "logps/chosen": -0.27030253410339355, - "logps/rejected": -1.8720884323120117, - "loss": 0.4446, - "nll_loss": 0.38637691736221313, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013515127822756767, - "rewards/margins": 0.08008929342031479, - "rewards/rejected": -0.0936044231057167, - "step": 3340 - }, - { - "epoch": 2.494407158836689, - "grad_norm": 28.768238067626953, - "learning_rate": 8.645138260640511e-07, - "log_odds_chosen": 2.9986720085144043, - "log_odds_ratio": -0.052542924880981445, - "logits/chosen": 232.95248413085938, - "logits/rejected": 218.3754425048828, - "logps/chosen": -0.259349524974823, - "logps/rejected": -1.9149940013885498, - "loss": 0.3366, - "nll_loss": 0.32538244128227234, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012967477552592754, - "rewards/margins": 0.08278223872184753, - "rewards/rejected": -0.09574972093105316, - "step": 3345 - }, - { - "epoch": 2.49813571961223, - "grad_norm": 31.861831665039062, - "learning_rate": 8.638684255813602e-07, - "log_odds_chosen": 4.439074516296387, - "log_odds_ratio": -0.0627383440732956, - "logits/chosen": 293.13616943359375, - "logits/rejected": 274.375732421875, - "logps/chosen": -0.18516728281974792, - "logps/rejected": -2.7764060497283936, - "loss": 0.3586, - "nll_loss": 0.4012930989265442, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009258365258574486, - "rewards/margins": 0.12956194579601288, - "rewards/rejected": -0.13882030546665192, - "step": 3350 - }, - { - "epoch": 2.50186428038777, - "grad_norm": 19.366683959960938, - "learning_rate": 8.63224468409811e-07, - "log_odds_chosen": 2.9175047874450684, - "log_odds_ratio": -0.055683039128780365, - "logits/chosen": 222.6089324951172, - "logits/rejected": 274.3295593261719, - "logps/chosen": -0.2559543251991272, - "logps/rejected": -1.835566520690918, - "loss": 0.3911, - "nll_loss": 0.427818238735199, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012797717936336994, - "rewards/margins": 0.07898060977458954, - "rewards/rejected": -0.09177832305431366, - "step": 3355 - }, - { - "epoch": 2.505592841163311, - "grad_norm": 22.10226821899414, - "learning_rate": 8.625819491779427e-07, - "log_odds_chosen": 3.1126132011413574, - "log_odds_ratio": -0.07509218156337738, - "logits/chosen": 258.5221252441406, - "logits/rejected": 201.2003631591797, - "logps/chosen": -0.24890294671058655, - "logps/rejected": -1.7552366256713867, - "loss": 0.3824, - "nll_loss": 0.3786585330963135, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012445147149264812, - "rewards/margins": 0.07531668245792389, - "rewards/rejected": -0.08776183426380157, - "step": 3360 - }, - { - "epoch": 2.5093214019388514, - "grad_norm": 27.110326766967773, - "learning_rate": 8.619408625422394e-07, - "log_odds_chosen": 3.2041473388671875, - "log_odds_ratio": -0.046427853405475616, - "logits/chosen": 221.7185516357422, - "logits/rejected": 288.5023498535156, - "logps/chosen": -0.31378188729286194, - "logps/rejected": -2.087047815322876, - "loss": 0.3164, - "nll_loss": 0.3671293258666992, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015689093619585037, - "rewards/margins": 0.08866329491138458, - "rewards/rejected": -0.10435239225625992, - "step": 3365 - }, - { - "epoch": 2.5130499627143923, - "grad_norm": 31.875808715820312, - "learning_rate": 8.613012031869432e-07, - "log_odds_chosen": 2.7066802978515625, - "log_odds_ratio": -0.09096715599298477, - "logits/chosen": 223.2712860107422, - "logits/rejected": 245.04055786132812, - "logps/chosen": -0.20985202491283417, - "logps/rejected": -1.534759759902954, - "loss": 0.4235, - "nll_loss": 0.34466052055358887, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010492602363228798, - "rewards/margins": 0.06624539196491241, - "rewards/rejected": -0.07673799246549606, - "step": 3370 - }, - { - "epoch": 2.5167785234899327, - "grad_norm": 34.39300537109375, - "learning_rate": 8.606629658238705e-07, - "log_odds_chosen": 4.067626953125, - "log_odds_ratio": -0.04854317754507065, - "logits/chosen": 255.28060913085938, - "logits/rejected": 245.05538940429688, - "logps/chosen": -0.15639469027519226, - "logps/rejected": -2.0476791858673096, - "loss": 0.3815, - "nll_loss": 0.24313855171203613, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007819734513759613, - "rewards/margins": 0.09456422179937363, - "rewards/rejected": -0.10238395631313324, - "step": 3375 - }, - { - "epoch": 2.5205070842654735, - "grad_norm": 28.54142189025879, - "learning_rate": 8.600261451922269e-07, - "log_odds_chosen": 3.1579582691192627, - "log_odds_ratio": -0.06485016644001007, - "logits/chosen": 225.8318328857422, - "logits/rejected": 230.7448272705078, - "logps/chosen": -0.1623847335577011, - "logps/rejected": -1.2928515672683716, - "loss": 0.3208, - "nll_loss": 0.38194534182548523, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00811923760920763, - "rewards/margins": 0.056523341685533524, - "rewards/rejected": -0.06464257836341858, - "step": 3380 - }, - { - "epoch": 2.5242356450410144, - "grad_norm": 21.664270401000977, - "learning_rate": 8.593907360584258e-07, - "log_odds_chosen": 4.133074760437012, - "log_odds_ratio": -0.03024151548743248, - "logits/chosen": 268.2148132324219, - "logits/rejected": 208.6782989501953, - "logps/chosen": -0.18578381836414337, - "logps/rejected": -2.4341278076171875, - "loss": 0.3856, - "nll_loss": 0.35872989892959595, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009289190173149109, - "rewards/margins": 0.11241719871759415, - "rewards/rejected": -0.12170638889074326, - "step": 3385 - }, - { - "epoch": 2.527964205816555, - "grad_norm": 34.11537551879883, - "learning_rate": 8.587567332159079e-07, - "log_odds_chosen": 3.747706651687622, - "log_odds_ratio": -0.03232043981552124, - "logits/chosen": 272.1806640625, - "logits/rejected": 189.5420379638672, - "logps/chosen": -0.19480302929878235, - "logps/rejected": -2.007012367248535, - "loss": 0.4352, - "nll_loss": 0.41640573740005493, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009740151464939117, - "rewards/margins": 0.09061046689748764, - "rewards/rejected": -0.10035061836242676, - "step": 3390 - }, - { - "epoch": 2.531692766592095, - "grad_norm": 24.21099090576172, - "learning_rate": 8.581241314849612e-07, - "log_odds_chosen": 2.9961347579956055, - "log_odds_ratio": -0.08958049863576889, - "logits/chosen": 241.6681671142578, - "logits/rejected": 203.62557983398438, - "logps/chosen": -0.2778182625770569, - "logps/rejected": -1.6593968868255615, - "loss": 0.447, - "nll_loss": 0.3765244781970978, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013890912756323814, - "rewards/margins": 0.06907892972230911, - "rewards/rejected": -0.08296984434127808, - "step": 3395 - }, - { - "epoch": 2.535421327367636, - "grad_norm": 27.20567512512207, - "learning_rate": 8.574929257125441e-07, - "log_odds_chosen": 4.406573295593262, - "log_odds_ratio": -0.028279537335038185, - "logits/chosen": 307.21966552734375, - "logits/rejected": 254.71914672851562, - "logps/chosen": -0.21347777545452118, - "logps/rejected": -2.999666929244995, - "loss": 0.2776, - "nll_loss": 0.3386712968349457, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010673889890313148, - "rewards/margins": 0.13930946588516235, - "rewards/rejected": -0.14998336136341095, - "step": 3400 - }, - { - "epoch": 2.539149888143177, - "grad_norm": 19.883176803588867, - "learning_rate": 8.568631107721093e-07, - "log_odds_chosen": 3.482332706451416, - "log_odds_ratio": -0.05594583600759506, - "logits/chosen": 335.6827697753906, - "logits/rejected": 235.6362762451172, - "logps/chosen": -0.10303723812103271, - "logps/rejected": -1.4193079471588135, - "loss": 0.2822, - "nll_loss": 0.22481127083301544, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005151862744241953, - "rewards/margins": 0.06581352651119232, - "rewards/rejected": -0.07096540182828903, - "step": 3405 - }, - { - "epoch": 2.5428784489187173, - "grad_norm": 29.344640731811523, - "learning_rate": 8.562346815634272e-07, - "log_odds_chosen": 2.850252151489258, - "log_odds_ratio": -0.06540392339229584, - "logits/chosen": 304.6623229980469, - "logits/rejected": 204.7835693359375, - "logps/chosen": -0.1768922656774521, - "logps/rejected": -1.3843194246292114, - "loss": 0.5028, - "nll_loss": 0.7159074544906616, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008844614960253239, - "rewards/margins": 0.060371361672878265, - "rewards/rejected": -0.06921597570180893, - "step": 3410 - }, - { - "epoch": 2.546607009694258, - "grad_norm": 35.43874740600586, - "learning_rate": 8.556076330124148e-07, - "log_odds_chosen": 3.7215237617492676, - "log_odds_ratio": -0.029679080471396446, - "logits/chosen": 262.3177185058594, - "logits/rejected": 237.70474243164062, - "logps/chosen": -0.11956904828548431, - "logps/rejected": -1.6788661479949951, - "loss": 0.2951, - "nll_loss": 0.2557131350040436, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0059784529730677605, - "rewards/margins": 0.07796485722064972, - "rewards/rejected": -0.08394331485033035, - "step": 3415 - }, - { - "epoch": 2.5503355704697985, - "grad_norm": 19.143217086791992, - "learning_rate": 8.549819600709619e-07, - "log_odds_chosen": 4.059719085693359, - "log_odds_ratio": -0.03937052562832832, - "logits/chosen": 261.39813232421875, - "logits/rejected": 195.63211059570312, - "logps/chosen": -0.19042818248271942, - "logps/rejected": -1.7979545593261719, - "loss": 0.3395, - "nll_loss": 0.31909438967704773, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009521408937871456, - "rewards/margins": 0.08037631213665009, - "rewards/rejected": -0.08989772945642471, - "step": 3420 - }, - { - "epoch": 2.5540641312453394, - "grad_norm": 24.35759162902832, - "learning_rate": 8.54357657716761e-07, - "log_odds_chosen": 4.026578903198242, - "log_odds_ratio": -0.03750822693109512, - "logits/chosen": 235.5579071044922, - "logits/rejected": 192.8203887939453, - "logps/chosen": -0.14845004677772522, - "logps/rejected": -2.0114269256591797, - "loss": 0.3131, - "nll_loss": 0.29279786348342896, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007422502152621746, - "rewards/margins": 0.0931488424539566, - "rewards/rejected": -0.10057134926319122, - "step": 3425 - }, - { - "epoch": 2.5577926920208798, - "grad_norm": 29.606592178344727, - "learning_rate": 8.537347209531384e-07, - "log_odds_chosen": 2.943511962890625, - "log_odds_ratio": -0.09034137427806854, - "logits/chosen": 230.5531768798828, - "logits/rejected": 228.1986541748047, - "logps/chosen": -0.4281812310218811, - "logps/rejected": -2.163428544998169, - "loss": 0.3819, - "nll_loss": 0.5352867841720581, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.021409060806035995, - "rewards/margins": 0.08676236122846603, - "rewards/rejected": -0.10817142575979233, - "step": 3430 - }, - { - "epoch": 2.5615212527964206, - "grad_norm": 21.437780380249023, - "learning_rate": 8.531131448088853e-07, - "log_odds_chosen": 4.461366176605225, - "log_odds_ratio": -0.02399863675236702, - "logits/chosen": 234.2354278564453, - "logits/rejected": 395.1438903808594, - "logps/chosen": -0.11106938123703003, - "logps/rejected": -2.3105406761169434, - "loss": 0.3041, - "nll_loss": 0.20216476917266846, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0055534690618515015, - "rewards/margins": 0.10997356474399567, - "rewards/rejected": -0.11552703380584717, - "step": 3435 - }, - { - "epoch": 2.5652498135719615, - "grad_norm": 22.55005645751953, - "learning_rate": 8.52492924338092e-07, - "log_odds_chosen": 3.384809970855713, - "log_odds_ratio": -0.06382622569799423, - "logits/chosen": 195.168701171875, - "logits/rejected": 212.17636108398438, - "logps/chosen": -0.12939482927322388, - "logps/rejected": -1.1936962604522705, - "loss": 0.388, - "nll_loss": 0.44702714681625366, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006469741463661194, - "rewards/margins": 0.05321506783366203, - "rewards/rejected": -0.059684813022613525, - "step": 3440 - }, - { - "epoch": 2.568978374347502, - "grad_norm": 22.80831527709961, - "learning_rate": 8.51874054619982e-07, - "log_odds_chosen": 2.7899010181427, - "log_odds_ratio": -0.08144643157720566, - "logits/chosen": 278.8482971191406, - "logits/rejected": 245.94601440429688, - "logps/chosen": -0.24511155486106873, - "logps/rejected": -1.5015298128128052, - "loss": 0.381, - "nll_loss": 0.3800579607486725, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012255579233169556, - "rewards/margins": 0.0628209114074707, - "rewards/rejected": -0.07507649064064026, - "step": 3445 - }, - { - "epoch": 2.5727069351230423, - "grad_norm": 18.969877243041992, - "learning_rate": 8.512565307587487e-07, - "log_odds_chosen": 3.7616448402404785, - "log_odds_ratio": -0.03677895665168762, - "logits/chosen": 216.28768920898438, - "logits/rejected": 191.0701141357422, - "logps/chosen": -0.18672287464141846, - "logps/rejected": -1.9407155513763428, - "loss": 0.3522, - "nll_loss": 0.3852882385253906, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009336143732070923, - "rewards/margins": 0.08769964426755905, - "rewards/rejected": -0.09703578054904938, - "step": 3450 - }, - { - "epoch": 2.576435495898583, - "grad_norm": 24.30983543395996, - "learning_rate": 8.50640347883392e-07, - "log_odds_chosen": 4.326138973236084, - "log_odds_ratio": -0.04884684085845947, - "logits/chosen": 200.25784301757812, - "logits/rejected": 321.848876953125, - "logps/chosen": -0.2349204570055008, - "logps/rejected": -2.9066038131713867, - "loss": 0.3505, - "nll_loss": 0.4424287676811218, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011746021918952465, - "rewards/margins": 0.1335841715335846, - "rewards/rejected": -0.14533020555973053, - "step": 3455 - }, - { - "epoch": 2.580164056674124, - "grad_norm": 22.55730438232422, - "learning_rate": 8.500255011475575e-07, - "log_odds_chosen": 3.202910900115967, - "log_odds_ratio": -0.05261116102337837, - "logits/chosen": 258.2731628417969, - "logits/rejected": 285.93316650390625, - "logps/chosen": -0.26982393860816956, - "logps/rejected": -2.028684616088867, - "loss": 0.3686, - "nll_loss": 0.34157276153564453, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013491196557879448, - "rewards/margins": 0.08794303238391876, - "rewards/rejected": -0.10143423080444336, - "step": 3460 - }, - { - "epoch": 2.5838926174496644, - "grad_norm": 21.089845657348633, - "learning_rate": 8.49411985729376e-07, - "log_odds_chosen": 3.011669158935547, - "log_odds_ratio": -0.05198876932263374, - "logits/chosen": 281.50933837890625, - "logits/rejected": 234.55923461914062, - "logps/chosen": -0.24523603916168213, - "logps/rejected": -1.7498829364776611, - "loss": 0.3849, - "nll_loss": 0.39999955892562866, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012261802330613136, - "rewards/margins": 0.07523234188556671, - "rewards/rejected": -0.0874941498041153, - "step": 3465 - }, - { - "epoch": 2.587621178225205, - "grad_norm": 29.023996353149414, - "learning_rate": 8.48799796831305e-07, - "log_odds_chosen": 2.235910415649414, - "log_odds_ratio": -0.18901348114013672, - "logits/chosen": 254.9993438720703, - "logits/rejected": 237.3662109375, - "logps/chosen": -0.2459825575351715, - "logps/rejected": -1.1712698936462402, - "loss": 0.4419, - "nll_loss": 0.3448745310306549, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012299127876758575, - "rewards/margins": 0.046264372766017914, - "rewards/rejected": -0.05856349319219589, - "step": 3470 - }, - { - "epoch": 2.5913497390007456, - "grad_norm": 25.667327880859375, - "learning_rate": 8.48188929679971e-07, - "log_odds_chosen": 4.726805686950684, - "log_odds_ratio": -0.02778707444667816, - "logits/chosen": 281.2161865234375, - "logits/rejected": 211.43716430664062, - "logps/chosen": -0.10811427980661392, - "logps/rejected": -1.6011244058609009, - "loss": 0.3464, - "nll_loss": 0.2792232930660248, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005405713804066181, - "rewards/margins": 0.07465051114559174, - "rewards/rejected": -0.08005622029304504, - "step": 3475 - }, - { - "epoch": 2.5950782997762865, - "grad_norm": 24.140127182006836, - "learning_rate": 8.475793795260132e-07, - "log_odds_chosen": 2.855919361114502, - "log_odds_ratio": -0.10854963213205338, - "logits/chosen": 198.3558349609375, - "logits/rejected": 286.4383850097656, - "logps/chosen": -0.34436410665512085, - "logps/rejected": -1.7222133874893188, - "loss": 0.3593, - "nll_loss": 0.43605518341064453, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017218206077814102, - "rewards/margins": 0.0688924640417099, - "rewards/rejected": -0.0861106738448143, - "step": 3480 - }, - { - "epoch": 2.598806860551827, - "grad_norm": 28.64480972290039, - "learning_rate": 8.469711416439277e-07, - "log_odds_chosen": 3.2688395977020264, - "log_odds_ratio": -0.06745140254497528, - "logits/chosen": 277.61297607421875, - "logits/rejected": 318.14202880859375, - "logps/chosen": -0.3153282701969147, - "logps/rejected": -2.1100354194641113, - "loss": 0.4564, - "nll_loss": 0.3681524395942688, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015766413882374763, - "rewards/margins": 0.08973535150289536, - "rewards/rejected": -0.10550175607204437, - "step": 3485 - }, - { - "epoch": 2.6025354213273677, - "grad_norm": 34.81784439086914, - "learning_rate": 8.463642113319158e-07, - "log_odds_chosen": 3.732060670852661, - "log_odds_ratio": -0.025378528982400894, - "logits/chosen": 252.0109100341797, - "logits/rejected": 253.6973419189453, - "logps/chosen": -0.13953495025634766, - "logps/rejected": -1.921634316444397, - "loss": 0.3097, - "nll_loss": 0.4067252278327942, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0069767483510077, - "rewards/margins": 0.08910496532917023, - "rewards/rejected": -0.09608171135187149, - "step": 3490 - }, - { - "epoch": 2.6062639821029085, - "grad_norm": 22.556472778320312, - "learning_rate": 8.457585839117283e-07, - "log_odds_chosen": 2.1134438514709473, - "log_odds_ratio": -0.16046151518821716, - "logits/chosen": 241.6819610595703, - "logits/rejected": 292.23614501953125, - "logps/chosen": -0.44532400369644165, - "logps/rejected": -1.590132713317871, - "loss": 0.4372, - "nll_loss": 0.5639801621437073, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.022266196087002754, - "rewards/margins": 0.05724043771624565, - "rewards/rejected": -0.07950663566589355, - "step": 3495 - }, - { - "epoch": 2.609992542878449, - "grad_norm": 31.99468994140625, - "learning_rate": 8.451542547285166e-07, - "log_odds_chosen": 3.7036445140838623, - "log_odds_ratio": -0.04753658175468445, - "logits/chosen": 199.26708984375, - "logits/rejected": 243.8401641845703, - "logps/chosen": -0.2293929159641266, - "logps/rejected": -2.165403127670288, - "loss": 0.4672, - "nll_loss": 0.39499402046203613, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0114696454256773, - "rewards/margins": 0.09680050611495972, - "rewards/rejected": -0.10827015340328217, - "step": 3500 - }, - { - "epoch": 2.6137211036539894, - "grad_norm": 50.14274978637695, - "learning_rate": 8.44551219150681e-07, - "log_odds_chosen": 3.3654162883758545, - "log_odds_ratio": -0.05252077057957649, - "logits/chosen": 224.43270874023438, - "logits/rejected": 270.50445556640625, - "logps/chosen": -0.1102999895811081, - "logps/rejected": -1.4939371347427368, - "loss": 0.4246, - "nll_loss": 0.25239789485931396, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0055149998515844345, - "rewards/margins": 0.06918185949325562, - "rewards/rejected": -0.0746968537569046, - "step": 3505 - }, - { - "epoch": 2.61744966442953, - "grad_norm": 24.60614013671875, - "learning_rate": 8.439494725697223e-07, - "log_odds_chosen": 3.9941859245300293, - "log_odds_ratio": -0.045932233333587646, - "logits/chosen": 275.020751953125, - "logits/rejected": 341.7713623046875, - "logps/chosen": -0.11682406812906265, - "logps/rejected": -1.679038405418396, - "loss": 0.3545, - "nll_loss": 0.23870594799518585, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00584120349958539, - "rewards/margins": 0.0781107246875763, - "rewards/rejected": -0.0839519277215004, - "step": 3510 - }, - { - "epoch": 2.621178225205071, - "grad_norm": 26.7822322845459, - "learning_rate": 8.433490104000933e-07, - "log_odds_chosen": 3.4371628761291504, - "log_odds_ratio": -0.05683919042348862, - "logits/chosen": 224.7911376953125, - "logits/rejected": 234.86001586914062, - "logps/chosen": -0.12482891976833344, - "logps/rejected": -1.6017459630966187, - "loss": 0.3941, - "nll_loss": 0.38037413358688354, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006241446360945702, - "rewards/margins": 0.07384584844112396, - "rewards/rejected": -0.08008730411529541, - "step": 3515 - }, - { - "epoch": 2.6249067859806114, - "grad_norm": 24.623247146606445, - "learning_rate": 8.427498280790526e-07, - "log_odds_chosen": 2.9267916679382324, - "log_odds_ratio": -0.059761375188827515, - "logits/chosen": 246.8142547607422, - "logits/rejected": 219.6398162841797, - "logps/chosen": -0.22476792335510254, - "logps/rejected": -1.6892038583755493, - "loss": 0.4353, - "nll_loss": 0.32818129658699036, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011238396167755127, - "rewards/margins": 0.07322179526090622, - "rewards/rejected": -0.08446018397808075, - "step": 3520 - }, - { - "epoch": 2.6286353467561523, - "grad_norm": 24.20062255859375, - "learning_rate": 8.421519210665191e-07, - "log_odds_chosen": 4.120138168334961, - "log_odds_ratio": -0.019826605916023254, - "logits/chosen": 262.1089172363281, - "logits/rejected": 229.13174438476562, - "logps/chosen": -0.11071513593196869, - "logps/rejected": -1.9938390254974365, - "loss": 0.3523, - "nll_loss": 0.2782740592956543, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005535756703466177, - "rewards/margins": 0.0941561907529831, - "rewards/rejected": -0.0996919497847557, - "step": 3525 - }, - { - "epoch": 2.6323639075316927, - "grad_norm": 30.033100128173828, - "learning_rate": 8.415552848449264e-07, - "log_odds_chosen": 4.03501033782959, - "log_odds_ratio": -0.08456217497587204, - "logits/chosen": 170.8258819580078, - "logits/rejected": 301.8872375488281, - "logps/chosen": -0.4833672046661377, - "logps/rejected": -3.520188093185425, - "loss": 0.3741, - "nll_loss": 0.5458129644393921, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024168362841010094, - "rewards/margins": 0.15184104442596436, - "rewards/rejected": -0.176009401679039, - "step": 3530 - }, - { - "epoch": 2.6360924683072335, - "grad_norm": 23.656639099121094, - "learning_rate": 8.409599149190806e-07, - "log_odds_chosen": 4.0739359855651855, - "log_odds_ratio": -0.018000543117523193, - "logits/chosen": 178.20623779296875, - "logits/rejected": 297.9803466796875, - "logps/chosen": -0.1453782320022583, - "logps/rejected": -2.2222495079040527, - "loss": 0.4157, - "nll_loss": 0.2565167546272278, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0072689116932451725, - "rewards/margins": 0.10384354740381241, - "rewards/rejected": -0.11111246049404144, - "step": 3535 - }, - { - "epoch": 2.639821029082774, - "grad_norm": 33.101436614990234, - "learning_rate": 8.40365806816018e-07, - "log_odds_chosen": 3.6905791759490967, - "log_odds_ratio": -0.037542395293712616, - "logits/chosen": 232.94461059570312, - "logits/rejected": 261.0420227050781, - "logps/chosen": -0.1352473795413971, - "logps/rejected": -1.6915199756622314, - "loss": 0.3819, - "nll_loss": 0.38559797406196594, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006762369070202112, - "rewards/margins": 0.07781364023685455, - "rewards/rejected": -0.08457599580287933, - "step": 3540 - }, - { - "epoch": 2.643549589858315, - "grad_norm": 19.557544708251953, - "learning_rate": 8.397729560848629e-07, - "log_odds_chosen": 4.864506721496582, - "log_odds_ratio": -0.019148264080286026, - "logits/chosen": 200.23193359375, - "logits/rejected": 226.42202758789062, - "logps/chosen": -0.09870155155658722, - "logps/rejected": -1.8861507177352905, - "loss": 0.3367, - "nll_loss": 0.3340339958667755, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004935077391564846, - "rewards/margins": 0.08937247097492218, - "rewards/rejected": -0.0943075492978096, - "step": 3545 - }, - { - "epoch": 2.647278150633855, - "grad_norm": 25.434242248535156, - "learning_rate": 8.391813582966891e-07, - "log_odds_chosen": 3.195012331008911, - "log_odds_ratio": -0.05195324495434761, - "logits/chosen": 275.9564208984375, - "logits/rejected": 202.47509765625, - "logps/chosen": -0.14133699238300323, - "logps/rejected": -1.5931345224380493, - "loss": 0.3145, - "nll_loss": 0.22019222378730774, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007066849619150162, - "rewards/margins": 0.07258988171815872, - "rewards/rejected": -0.07965672761201859, - "step": 3550 - }, - { - "epoch": 2.651006711409396, - "grad_norm": 24.041955947875977, - "learning_rate": 8.385910090443796e-07, - "log_odds_chosen": 3.778942823410034, - "log_odds_ratio": -0.051530640572309494, - "logits/chosen": 192.87069702148438, - "logits/rejected": 294.49188232421875, - "logps/chosen": -0.12399828433990479, - "logps/rejected": -1.6466388702392578, - "loss": 0.343, - "nll_loss": 0.45332974195480347, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006199914030730724, - "rewards/margins": 0.07613202929496765, - "rewards/rejected": -0.08233194053173065, - "step": 3555 - }, - { - "epoch": 2.6547352721849364, - "grad_norm": 36.316131591796875, - "learning_rate": 8.380019039424888e-07, - "log_odds_chosen": 4.4577956199646, - "log_odds_ratio": -0.11426451057195663, - "logits/chosen": 183.76168823242188, - "logits/rejected": 218.5436248779297, - "logps/chosen": -0.46208611130714417, - "logps/rejected": -2.7939414978027344, - "loss": 0.4933, - "nll_loss": 0.5632838010787964, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02310430444777012, - "rewards/margins": 0.11659276485443115, - "rewards/rejected": -0.13969707489013672, - "step": 3560 - }, - { - "epoch": 2.6584638329604773, - "grad_norm": 22.187427520751953, - "learning_rate": 8.374140386271069e-07, - "log_odds_chosen": 3.6356072425842285, - "log_odds_ratio": -0.057021550834178925, - "logits/chosen": 259.8240051269531, - "logits/rejected": 248.33767700195312, - "logps/chosen": -0.21968546509742737, - "logps/rejected": -1.899325966835022, - "loss": 0.38, - "nll_loss": 0.46102815866470337, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010984273627400398, - "rewards/margins": 0.08398203551769257, - "rewards/rejected": -0.09496630728244781, - "step": 3565 - }, - { - "epoch": 2.662192393736018, - "grad_norm": 20.289764404296875, - "learning_rate": 8.368274087557231e-07, - "log_odds_chosen": 3.7229652404785156, - "log_odds_ratio": -0.03536496311426163, - "logits/chosen": 270.90643310546875, - "logits/rejected": 195.0957489013672, - "logps/chosen": -0.1633531153202057, - "logps/rejected": -2.014380693435669, - "loss": 0.4044, - "nll_loss": 0.30635353922843933, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00816765520721674, - "rewards/margins": 0.09255137294530869, - "rewards/rejected": -0.10071901977062225, - "step": 3570 - }, - { - "epoch": 2.6659209545115585, - "grad_norm": 36.84571838378906, - "learning_rate": 8.362420100070909e-07, - "log_odds_chosen": 2.9206936359405518, - "log_odds_ratio": -0.09592375159263611, - "logits/chosen": 213.1687774658203, - "logits/rejected": 270.33648681640625, - "logps/chosen": -0.6997449994087219, - "logps/rejected": -2.915762424468994, - "loss": 0.4392, - "nll_loss": 0.6174715161323547, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.03498724848031998, - "rewards/margins": 0.11080087721347809, - "rewards/rejected": -0.14578810334205627, - "step": 3575 - }, - { - "epoch": 2.669649515287099, - "grad_norm": 20.243959426879883, - "learning_rate": 8.356578380810946e-07, - "log_odds_chosen": 3.0224769115448, - "log_odds_ratio": -0.06639382988214493, - "logits/chosen": 177.9281768798828, - "logits/rejected": 269.61083984375, - "logps/chosen": -0.20471401512622833, - "logps/rejected": -1.7721363306045532, - "loss": 0.3419, - "nll_loss": 0.2827315330505371, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010235701687633991, - "rewards/margins": 0.07837111502885818, - "rewards/rejected": -0.0886068195104599, - "step": 3580 - }, - { - "epoch": 2.6733780760626398, - "grad_norm": 24.311582565307617, - "learning_rate": 8.350748886986167e-07, - "log_odds_chosen": 3.7268970012664795, - "log_odds_ratio": -0.03247169777750969, - "logits/chosen": 230.70193481445312, - "logits/rejected": 189.20452880859375, - "logps/chosen": -0.1702023446559906, - "logps/rejected": -2.086784839630127, - "loss": 0.4163, - "nll_loss": 0.4142511785030365, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00851011835038662, - "rewards/margins": 0.09582912176847458, - "rewards/rejected": -0.10433924198150635, - "step": 3585 - }, - { - "epoch": 2.6771066368381806, - "grad_norm": 35.897396087646484, - "learning_rate": 8.344931576014064e-07, - "log_odds_chosen": 3.671043872833252, - "log_odds_ratio": -0.04192491993308067, - "logits/chosen": 250.66943359375, - "logits/rejected": 274.79095458984375, - "logps/chosen": -0.13498903810977936, - "logps/rejected": -1.7046053409576416, - "loss": 0.3683, - "nll_loss": 0.25612497329711914, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006749452091753483, - "rewards/margins": 0.0784808099269867, - "rewards/rejected": -0.0852302610874176, - "step": 3590 - }, - { - "epoch": 2.680835197613721, - "grad_norm": 23.45178985595703, - "learning_rate": 8.339126405519482e-07, - "log_odds_chosen": 2.5658178329467773, - "log_odds_ratio": -0.10515771061182022, - "logits/chosen": 279.10302734375, - "logits/rejected": 212.430908203125, - "logps/chosen": -0.4818514287471771, - "logps/rejected": -2.073087453842163, - "loss": 0.3524, - "nll_loss": 0.5456601977348328, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.024092573672533035, - "rewards/margins": 0.07956179976463318, - "rewards/rejected": -0.10365436971187592, - "step": 3595 - }, - { - "epoch": 2.684563758389262, - "grad_norm": 41.78459930419922, - "learning_rate": 8.333333333333333e-07, - "log_odds_chosen": 2.7806715965270996, - "log_odds_ratio": -0.07684355974197388, - "logits/chosen": 363.69439697265625, - "logits/rejected": 208.3416748046875, - "logps/chosen": -0.2943492829799652, - "logps/rejected": -1.6328697204589844, - "loss": 0.5408, - "nll_loss": 0.547825038433075, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014717464335262775, - "rewards/margins": 0.0669260174036026, - "rewards/rejected": -0.0816434845328331, - "step": 3600 - }, - { - "epoch": 2.6882923191648023, - "grad_norm": 20.528282165527344, - "learning_rate": 8.327552317491304e-07, - "log_odds_chosen": 2.940488338470459, - "log_odds_ratio": -0.0631902813911438, - "logits/chosen": 275.425537109375, - "logits/rejected": 247.97781372070312, - "logps/chosen": -0.23654666543006897, - "logps/rejected": -1.679456114768982, - "loss": 0.3792, - "nll_loss": 0.34635448455810547, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011827333830296993, - "rewards/margins": 0.072145476937294, - "rewards/rejected": -0.08397281169891357, - "step": 3605 - }, - { - "epoch": 2.692020879940343, - "grad_norm": 21.20456886291504, - "learning_rate": 8.321783316232578e-07, - "log_odds_chosen": 2.356825590133667, - "log_odds_ratio": -0.09495440870523453, - "logits/chosen": 209.32296752929688, - "logits/rejected": 321.0606994628906, - "logps/chosen": -0.31225308775901794, - "logps/rejected": -1.5217249393463135, - "loss": 0.5122, - "nll_loss": 0.46532493829727173, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015612654387950897, - "rewards/margins": 0.060473598539829254, - "rewards/rejected": -0.07608624547719955, - "step": 3610 - }, - { - "epoch": 2.6957494407158835, - "grad_norm": 20.72730255126953, - "learning_rate": 8.31602628799857e-07, - "log_odds_chosen": 2.7220089435577393, - "log_odds_ratio": -0.10054464638233185, - "logits/chosen": 202.56076049804688, - "logits/rejected": 321.4881591796875, - "logps/chosen": -0.31978338956832886, - "logps/rejected": -1.6097456216812134, - "loss": 0.3692, - "nll_loss": 0.5176464319229126, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015989169478416443, - "rewards/margins": 0.06449811160564423, - "rewards/rejected": -0.08048728108406067, - "step": 3615 - }, - { - "epoch": 2.6994780014914244, - "grad_norm": 24.637414932250977, - "learning_rate": 8.310281191431671e-07, - "log_odds_chosen": 3.0891425609588623, - "log_odds_ratio": -0.055608153343200684, - "logits/chosen": 173.14376831054688, - "logits/rejected": 263.6024169921875, - "logps/chosen": -0.2228788584470749, - "logps/rejected": -1.8058996200561523, - "loss": 0.3047, - "nll_loss": 0.30154404044151306, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011143943294882774, - "rewards/margins": 0.07915104180574417, - "rewards/rejected": -0.0902949795126915, - "step": 3620 - }, - { - "epoch": 2.703206562266965, - "grad_norm": 23.90869903564453, - "learning_rate": 8.304547985373998e-07, - "log_odds_chosen": 3.703763484954834, - "log_odds_ratio": -0.027110164985060692, - "logits/chosen": 230.4307861328125, - "logits/rejected": 250.58963012695312, - "logps/chosen": -0.1388990432024002, - "logps/rejected": -1.857972502708435, - "loss": 0.3782, - "nll_loss": 0.2704942226409912, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006944952066987753, - "rewards/margins": 0.08595367521047592, - "rewards/rejected": -0.09289862960577011, - "step": 3625 - }, - { - "epoch": 2.7069351230425056, - "grad_norm": 35.69987106323242, - "learning_rate": 8.298826628866154e-07, - "log_odds_chosen": 3.000265121459961, - "log_odds_ratio": -0.1312166452407837, - "logits/chosen": 282.2646484375, - "logits/rejected": 282.4350280761719, - "logps/chosen": -0.2558921277523041, - "logps/rejected": -1.425595998764038, - "loss": 0.4764, - "nll_loss": 0.5287753343582153, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012794608250260353, - "rewards/margins": 0.05848519131541252, - "rewards/rejected": -0.07127979397773743, - "step": 3630 - }, - { - "epoch": 2.710663683818046, - "grad_norm": 19.14484977722168, - "learning_rate": 8.293117081146003e-07, - "log_odds_chosen": 2.8897202014923096, - "log_odds_ratio": -0.163778617978096, - "logits/chosen": 197.95120239257812, - "logits/rejected": 284.4172668457031, - "logps/chosen": -0.34805765748023987, - "logps/rejected": -1.8098453283309937, - "loss": 0.4215, - "nll_loss": 0.4680655598640442, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017402883619070053, - "rewards/margins": 0.07308937609195709, - "rewards/rejected": -0.09049226343631744, - "step": 3635 - }, - { - "epoch": 2.714392244593587, - "grad_norm": 24.24571990966797, - "learning_rate": 8.287419301647449e-07, - "log_odds_chosen": 6.092555046081543, - "log_odds_ratio": -0.01605106331408024, - "logits/chosen": 302.321044921875, - "logits/rejected": 205.52749633789062, - "logps/chosen": -0.10491335391998291, - "logps/rejected": -2.118638277053833, - "loss": 0.3255, - "nll_loss": 0.25814369320869446, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00524566788226366, - "rewards/margins": 0.10068623721599579, - "rewards/rejected": -0.10593191534280777, - "step": 3640 - }, - { - "epoch": 2.7181208053691277, - "grad_norm": 20.861774444580078, - "learning_rate": 8.281733249999222e-07, - "log_odds_chosen": 4.426974296569824, - "log_odds_ratio": -0.0407777763903141, - "logits/chosen": 202.90304565429688, - "logits/rejected": 254.79833984375, - "logps/chosen": -0.14992693066596985, - "logps/rejected": -2.1112332344055176, - "loss": 0.3055, - "nll_loss": 0.2973848283290863, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007496347185224295, - "rewards/margins": 0.0980653166770935, - "rewards/rejected": -0.10556165874004364, - "step": 3645 - }, - { - "epoch": 2.721849366144668, - "grad_norm": 25.626060485839844, - "learning_rate": 8.27605888602368e-07, - "log_odds_chosen": 4.572673797607422, - "log_odds_ratio": -0.03450591862201691, - "logits/chosen": 243.92333984375, - "logits/rejected": 237.47476196289062, - "logps/chosen": -0.21535567939281464, - "logps/rejected": -2.8436686992645264, - "loss": 0.386, - "nll_loss": 0.3188445568084717, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010767784900963306, - "rewards/margins": 0.13141566514968872, - "rewards/rejected": -0.14218345284461975, - "step": 3650 - }, - { - "epoch": 2.725577926920209, - "grad_norm": 27.9399471282959, - "learning_rate": 8.270396169735619e-07, - "log_odds_chosen": 3.459022045135498, - "log_odds_ratio": -0.08003798127174377, - "logits/chosen": 185.19126892089844, - "logits/rejected": 258.51617431640625, - "logps/chosen": -0.219814732670784, - "logps/rejected": -1.836755394935608, - "loss": 0.3616, - "nll_loss": 0.3978520929813385, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010990736074745655, - "rewards/margins": 0.08084703236818314, - "rewards/rejected": -0.09183777123689651, - "step": 3655 - }, - { - "epoch": 2.7293064876957494, - "grad_norm": 24.328432083129883, - "learning_rate": 8.264745061341079e-07, - "log_odds_chosen": 4.252660274505615, - "log_odds_ratio": -0.02448631450533867, - "logits/chosen": 225.21572875976562, - "logits/rejected": 191.4467315673828, - "logps/chosen": -0.21632730960845947, - "logps/rejected": -1.9983962774276733, - "loss": 0.4133, - "nll_loss": 0.2686801850795746, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010816364549100399, - "rewards/margins": 0.08910344541072845, - "rewards/rejected": -0.09991981089115143, - "step": 3660 - }, - { - "epoch": 2.73303504847129, - "grad_norm": 20.892026901245117, - "learning_rate": 8.259105521236187e-07, - "log_odds_chosen": 2.3961756229400635, - "log_odds_ratio": -0.2451595813035965, - "logits/chosen": 335.8856506347656, - "logits/rejected": 224.4939727783203, - "logps/chosen": -0.3332614302635193, - "logps/rejected": -1.460614800453186, - "loss": 0.4726, - "nll_loss": 0.6175149083137512, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.016663074493408203, - "rewards/margins": 0.0563676580786705, - "rewards/rejected": -0.0730307325720787, - "step": 3665 - }, - { - "epoch": 2.7367636092468306, - "grad_norm": 24.32080841064453, - "learning_rate": 8.253477510005973e-07, - "log_odds_chosen": 3.4233994483947754, - "log_odds_ratio": -0.04628372937440872, - "logits/chosen": 244.2325439453125, - "logits/rejected": 238.85287475585938, - "logps/chosen": -0.19442136585712433, - "logps/rejected": -1.7870839834213257, - "loss": 0.3514, - "nll_loss": 0.3266316056251526, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009721068665385246, - "rewards/margins": 0.07963313162326813, - "rewards/rejected": -0.08935420215129852, - "step": 3670 - }, - { - "epoch": 2.7404921700223714, - "grad_norm": 28.305879592895508, - "learning_rate": 8.247860988423226e-07, - "log_odds_chosen": 3.5401275157928467, - "log_odds_ratio": -0.043169040232896805, - "logits/chosen": 184.01461791992188, - "logits/rejected": 262.1426086425781, - "logps/chosen": -0.2112112045288086, - "logps/rejected": -2.016544818878174, - "loss": 0.3267, - "nll_loss": 0.3849395215511322, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010560560040175915, - "rewards/margins": 0.09026667475700378, - "rewards/rejected": -0.10082723945379257, - "step": 3675 - }, - { - "epoch": 2.744220730797912, - "grad_norm": 59.937801361083984, - "learning_rate": 8.24225591744734e-07, - "log_odds_chosen": 1.670037031173706, - "log_odds_ratio": -0.38111382722854614, - "logits/chosen": 182.10000610351562, - "logits/rejected": 297.2353515625, - "logps/chosen": -0.2284696102142334, - "logps/rejected": -1.0256197452545166, - "loss": 0.5483, - "nll_loss": 0.441206157207489, - "rewards/accuracies": 0.6000000238418579, - "rewards/chosen": -0.011423480696976185, - "rewards/margins": 0.03985750675201416, - "rewards/rejected": -0.05128098651766777, - "step": 3680 - }, - { - "epoch": 2.7479492915734527, - "grad_norm": 18.135560989379883, - "learning_rate": 8.23666225822317e-07, - "log_odds_chosen": 3.5306365489959717, - "log_odds_ratio": -0.060637474060058594, - "logits/chosen": 280.31463623046875, - "logits/rejected": 209.8810272216797, - "logps/chosen": -0.2127242535352707, - "logps/rejected": -1.9608150720596313, - "loss": 0.4167, - "nll_loss": 0.3063371181488037, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01063621323555708, - "rewards/margins": 0.0874045342206955, - "rewards/rejected": -0.09804075211286545, - "step": 3685 - }, - { - "epoch": 2.751677852348993, - "grad_norm": 30.07253646850586, - "learning_rate": 8.231079972079914e-07, - "log_odds_chosen": 3.353513717651367, - "log_odds_ratio": -0.05463365465402603, - "logits/chosen": 240.03585815429688, - "logits/rejected": 236.6366729736328, - "logps/chosen": -0.3309222161769867, - "logps/rejected": -2.169532537460327, - "loss": 0.4219, - "nll_loss": 0.54301518201828, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016546111553907394, - "rewards/margins": 0.09193052351474762, - "rewards/rejected": -0.10847663879394531, - "step": 3690 - }, - { - "epoch": 2.755406413124534, - "grad_norm": 28.438556671142578, - "learning_rate": 8.225509020529979e-07, - "log_odds_chosen": 2.3317646980285645, - "log_odds_ratio": -0.109208844602108, - "logits/chosen": 315.05889892578125, - "logits/rejected": 187.8129119873047, - "logps/chosen": -0.3859233856201172, - "logps/rejected": -1.6747890710830688, - "loss": 0.545, - "nll_loss": 0.623500645160675, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01929617114365101, - "rewards/margins": 0.06444328278303146, - "rewards/rejected": -0.08373944461345673, - "step": 3695 - }, - { - "epoch": 2.759134973900075, - "grad_norm": 21.070463180541992, - "learning_rate": 8.219949365267865e-07, - "log_odds_chosen": 4.191210746765137, - "log_odds_ratio": -0.02215607278048992, - "logits/chosen": 262.09832763671875, - "logits/rejected": 270.13165283203125, - "logps/chosen": -0.11166485399007797, - "logps/rejected": -1.986440896987915, - "loss": 0.3592, - "nll_loss": 0.24886159598827362, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005583242978900671, - "rewards/margins": 0.09373880177736282, - "rewards/rejected": -0.09932203590869904, - "step": 3700 - }, - { - "epoch": 2.762863534675615, - "grad_norm": 19.74126434326172, - "learning_rate": 8.214400968169071e-07, - "log_odds_chosen": 2.3822221755981445, - "log_odds_ratio": -0.14275172352790833, - "logits/chosen": 237.77420043945312, - "logits/rejected": 226.2701873779297, - "logps/chosen": -0.4008992314338684, - "logps/rejected": -1.5109293460845947, - "loss": 0.3273, - "nll_loss": 0.5047513246536255, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0200449600815773, - "rewards/margins": 0.055501509457826614, - "rewards/rejected": -0.07554646581411362, - "step": 3705 - }, - { - "epoch": 2.7665920954511556, - "grad_norm": 31.13445472717285, - "learning_rate": 8.208863791288982e-07, - "log_odds_chosen": 2.7487847805023193, - "log_odds_ratio": -0.1507304161787033, - "logits/chosen": 299.6902770996094, - "logits/rejected": 269.3072204589844, - "logps/chosen": -0.2914508581161499, - "logps/rejected": -1.5799973011016846, - "loss": 0.4678, - "nll_loss": 0.5867143869400024, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01457254309207201, - "rewards/margins": 0.06442733108997345, - "rewards/rejected": -0.07899986952543259, - "step": 3710 - }, - { - "epoch": 2.7703206562266964, - "grad_norm": 23.984777450561523, - "learning_rate": 8.203337796861792e-07, - "log_odds_chosen": 3.8152217864990234, - "log_odds_ratio": -0.04265158623456955, - "logits/chosen": 226.2320556640625, - "logits/rejected": 173.759521484375, - "logps/chosen": -0.1983690708875656, - "logps/rejected": -2.1790318489074707, - "loss": 0.3776, - "nll_loss": 0.33237704634666443, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009918454103171825, - "rewards/margins": 0.09903313219547272, - "rewards/rejected": -0.10895159095525742, - "step": 3715 - }, - { - "epoch": 2.7740492170022373, - "grad_norm": 21.937856674194336, - "learning_rate": 8.197822947299412e-07, - "log_odds_chosen": 2.602508068084717, - "log_odds_ratio": -0.22581584751605988, - "logits/chosen": 219.2142791748047, - "logits/rejected": 344.10894775390625, - "logps/chosen": -0.30004191398620605, - "logps/rejected": -1.4368116855621338, - "loss": 0.3391, - "nll_loss": 0.495917946100235, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.015002096071839333, - "rewards/margins": 0.0568385012447834, - "rewards/rejected": -0.07184059917926788, - "step": 3720 - }, - { - "epoch": 2.7777777777777777, - "grad_norm": 46.53112030029297, - "learning_rate": 8.192319205190406e-07, - "log_odds_chosen": 2.4947352409362793, - "log_odds_ratio": -0.1802413910627365, - "logits/chosen": 207.9983673095703, - "logits/rejected": 238.39810180664062, - "logps/chosen": -0.35410842299461365, - "logps/rejected": -1.8120296001434326, - "loss": 0.37, - "nll_loss": 0.38283708691596985, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.017705421894788742, - "rewards/margins": 0.0728960633277893, - "rewards/rejected": -0.09060148894786835, - "step": 3725 - }, - { - "epoch": 2.7815063385533185, - "grad_norm": 27.74759864807129, - "learning_rate": 8.186826533298912e-07, - "log_odds_chosen": 4.303781986236572, - "log_odds_ratio": -0.04765907675027847, - "logits/chosen": 258.99029541015625, - "logits/rejected": 279.06878662109375, - "logps/chosen": -0.1702066957950592, - "logps/rejected": -2.827690839767456, - "loss": 0.3691, - "nll_loss": 0.41369810700416565, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00851033441722393, - "rewards/margins": 0.13287422060966492, - "rewards/rejected": -0.1413845419883728, - "step": 3730 - }, - { - "epoch": 2.785234899328859, - "grad_norm": 28.765104293823242, - "learning_rate": 8.181344894563601e-07, - "log_odds_chosen": 3.072659730911255, - "log_odds_ratio": -0.09961138665676117, - "logits/chosen": 201.01402282714844, - "logits/rejected": 270.0599670410156, - "logps/chosen": -0.32074347138404846, - "logps/rejected": -1.6774356365203857, - "loss": 0.3292, - "nll_loss": 0.4645712375640869, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016037171706557274, - "rewards/margins": 0.06783461570739746, - "rewards/rejected": -0.08387179672718048, - "step": 3735 - }, - { - "epoch": 2.7889634601043998, - "grad_norm": 30.218875885009766, - "learning_rate": 8.175874252096609e-07, - "log_odds_chosen": 3.684021472930908, - "log_odds_ratio": -0.034453969448804855, - "logits/chosen": 227.8923797607422, - "logits/rejected": 215.57009887695312, - "logps/chosen": -0.1304035484790802, - "logps/rejected": -1.7823638916015625, - "loss": 0.3466, - "nll_loss": 0.30709904432296753, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006520177237689495, - "rewards/margins": 0.08259802311658859, - "rewards/rejected": -0.08911819756031036, - "step": 3740 - }, - { - "epoch": 2.79269202087994, - "grad_norm": 23.94392967224121, - "learning_rate": 8.170414569182505e-07, - "log_odds_chosen": 1.9806907176971436, - "log_odds_ratio": -0.17016878724098206, - "logits/chosen": 223.428955078125, - "logits/rejected": 233.9658203125, - "logps/chosen": -0.35646766424179077, - "logps/rejected": -1.4031479358673096, - "loss": 0.4077, - "nll_loss": 0.47835659980773926, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01782338321208954, - "rewards/margins": 0.0523340106010437, - "rewards/rejected": -0.07015739381313324, - "step": 3745 - }, - { - "epoch": 2.796420581655481, - "grad_norm": 21.528024673461914, - "learning_rate": 8.164965809277262e-07, - "log_odds_chosen": 3.173128128051758, - "log_odds_ratio": -0.0792618840932846, - "logits/chosen": 293.5087890625, - "logits/rejected": 200.2118377685547, - "logps/chosen": -0.21373924612998962, - "logps/rejected": -1.6647818088531494, - "loss": 0.3334, - "nll_loss": 0.3730601668357849, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010686961933970451, - "rewards/margins": 0.07255212962627411, - "rewards/rejected": -0.08323909342288971, - "step": 3750 - }, - { - "epoch": 2.800149142431022, - "grad_norm": 27.74806785583496, - "learning_rate": 8.159527936007208e-07, - "log_odds_chosen": 3.2939693927764893, - "log_odds_ratio": -0.04636671394109726, - "logits/chosen": 214.23812866210938, - "logits/rejected": 209.56912231445312, - "logps/chosen": -0.17082057893276215, - "logps/rejected": -1.7360773086547852, - "loss": 0.3487, - "nll_loss": 0.2404172420501709, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008541028946638107, - "rewards/margins": 0.07826283574104309, - "rewards/rejected": -0.0868038684129715, - "step": 3755 - }, - { - "epoch": 2.8038777032065623, - "grad_norm": 22.26565933227539, - "learning_rate": 8.154100913168028e-07, - "log_odds_chosen": 3.1806015968322754, - "log_odds_ratio": -0.05625191330909729, - "logits/chosen": 211.39645385742188, - "logits/rejected": 266.1663818359375, - "logps/chosen": -0.14057624340057373, - "logps/rejected": -1.4899152517318726, - "loss": 0.4028, - "nll_loss": 0.38031357526779175, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007028813008219004, - "rewards/margins": 0.06746695190668106, - "rewards/rejected": -0.07449576258659363, - "step": 3760 - }, - { - "epoch": 2.8076062639821027, - "grad_norm": 18.796110153198242, - "learning_rate": 8.148684704723743e-07, - "log_odds_chosen": 2.6773242950439453, - "log_odds_ratio": -0.18141116201877594, - "logits/chosen": 274.4396057128906, - "logits/rejected": 239.39248657226562, - "logps/chosen": -0.1337202787399292, - "logps/rejected": -1.039009928703308, - "loss": 0.3603, - "nll_loss": 0.26456403732299805, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.006686014123260975, - "rewards/margins": 0.045264486223459244, - "rewards/rejected": -0.051950495690107346, - "step": 3765 - }, - { - "epoch": 2.8113348247576435, - "grad_norm": 25.96636962890625, - "learning_rate": 8.143279274805705e-07, - "log_odds_chosen": 2.4946742057800293, - "log_odds_ratio": -0.08876989781856537, - "logits/chosen": 244.54763793945312, - "logits/rejected": 236.2200469970703, - "logps/chosen": -0.3392036557197571, - "logps/rejected": -1.7499818801879883, - "loss": 0.3936, - "nll_loss": 0.4407782554626465, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016960183158516884, - "rewards/margins": 0.07053892314434052, - "rewards/rejected": -0.08749910444021225, - "step": 3770 - }, - { - "epoch": 2.8150633855331844, - "grad_norm": 30.864294052124023, - "learning_rate": 8.137884587711594e-07, - "log_odds_chosen": 3.4979453086853027, - "log_odds_ratio": -0.04925019294023514, - "logits/chosen": 183.46243286132812, - "logits/rejected": 318.6687927246094, - "logps/chosen": -0.21238403022289276, - "logps/rejected": -1.9723964929580688, - "loss": 0.3778, - "nll_loss": 0.31173357367515564, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010619202628731728, - "rewards/margins": 0.08800062537193298, - "rewards/rejected": -0.09861983358860016, - "step": 3775 - }, - { - "epoch": 2.8187919463087248, - "grad_norm": 21.09513282775879, - "learning_rate": 8.132500607904444e-07, - "log_odds_chosen": 3.158417224884033, - "log_odds_ratio": -0.09778966009616852, - "logits/chosen": 293.9020080566406, - "logits/rejected": 218.22354125976562, - "logps/chosen": -0.21986660361289978, - "logps/rejected": -1.5319629907608032, - "loss": 0.3358, - "nll_loss": 0.35751470923423767, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01099332980811596, - "rewards/margins": 0.06560482084751129, - "rewards/rejected": -0.0765981525182724, - "step": 3780 - }, - { - "epoch": 2.8225205070842656, - "grad_norm": 27.18902587890625, - "learning_rate": 8.127127300011638e-07, - "log_odds_chosen": 2.9097113609313965, - "log_odds_ratio": -0.060733288526535034, - "logits/chosen": 196.9976348876953, - "logits/rejected": 342.5469055175781, - "logps/chosen": -0.1952126920223236, - "logps/rejected": -1.4931366443634033, - "loss": 0.3283, - "nll_loss": 0.32954567670822144, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00976063497364521, - "rewards/margins": 0.06489618867635727, - "rewards/rejected": -0.07465682923793793, - "step": 3785 - }, - { - "epoch": 2.826249067859806, - "grad_norm": 23.29341697692871, - "learning_rate": 8.12176462882395e-07, - "log_odds_chosen": 3.897740125656128, - "log_odds_ratio": -0.06832066923379898, - "logits/chosen": 191.66793823242188, - "logits/rejected": 272.09710693359375, - "logps/chosen": -0.20540109276771545, - "logps/rejected": -1.7500526905059814, - "loss": 0.357, - "nll_loss": 0.31912702322006226, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010270054452121258, - "rewards/margins": 0.07723258435726166, - "rewards/rejected": -0.08750263601541519, - "step": 3790 - }, - { - "epoch": 2.829977628635347, - "grad_norm": 16.75524139404297, - "learning_rate": 8.116412559294567e-07, - "log_odds_chosen": 3.324321746826172, - "log_odds_ratio": -0.08418653905391693, - "logits/chosen": 237.0968780517578, - "logits/rejected": 315.49639892578125, - "logps/chosen": -0.4185584485530853, - "logps/rejected": -1.9376417398452759, - "loss": 0.3797, - "nll_loss": 0.49749669432640076, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020927922800183296, - "rewards/margins": 0.07595416158437729, - "rewards/rejected": -0.09688208997249603, - "step": 3795 - }, - { - "epoch": 2.8337061894108873, - "grad_norm": 32.6544189453125, - "learning_rate": 8.111071056538128e-07, - "log_odds_chosen": 2.038804292678833, - "log_odds_ratio": -0.1433335542678833, - "logits/chosen": 296.8525390625, - "logits/rejected": 213.3878631591797, - "logps/chosen": -0.41061919927597046, - "logps/rejected": -1.555185317993164, - "loss": 0.346, - "nll_loss": 0.5514751672744751, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.020530959591269493, - "rewards/margins": 0.05722830817103386, - "rewards/rejected": -0.0777592658996582, - "step": 3800 - }, - { - "epoch": 2.837434750186428, - "grad_norm": 25.49529457092285, - "learning_rate": 8.105740085829771e-07, - "log_odds_chosen": 3.524195432662964, - "log_odds_ratio": -0.0358196422457695, - "logits/chosen": 197.9256591796875, - "logits/rejected": 315.8657531738281, - "logps/chosen": -0.10687222331762314, - "logps/rejected": -1.4010565280914307, - "loss": 0.2875, - "nll_loss": 0.38622957468032837, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0053436108864843845, - "rewards/margins": 0.06470920890569687, - "rewards/rejected": -0.07005281746387482, - "step": 3805 - }, - { - "epoch": 2.841163310961969, - "grad_norm": 32.325401306152344, - "learning_rate": 8.100419612604182e-07, - "log_odds_chosen": 3.8135390281677246, - "log_odds_ratio": -0.0479932501912117, - "logits/chosen": 210.16250610351562, - "logits/rejected": 251.05654907226562, - "logps/chosen": -0.20431527495384216, - "logps/rejected": -2.080791711807251, - "loss": 0.426, - "nll_loss": 0.5627084374427795, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010215764865279198, - "rewards/margins": 0.09382381290197372, - "rewards/rejected": -0.10403958708047867, - "step": 3810 - }, - { - "epoch": 2.8448918717375093, - "grad_norm": 24.20859718322754, - "learning_rate": 8.095109602454658e-07, - "log_odds_chosen": 3.546051025390625, - "log_odds_ratio": -0.06928587704896927, - "logits/chosen": 300.4501037597656, - "logits/rejected": 228.1247100830078, - "logps/chosen": -0.3153231739997864, - "logps/rejected": -2.191760778427124, - "loss": 0.3867, - "nll_loss": 0.3087129592895508, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01576615869998932, - "rewards/margins": 0.09382189065217972, - "rewards/rejected": -0.10958804935216904, - "step": 3815 - }, - { - "epoch": 2.8486204325130497, - "grad_norm": 24.32912254333496, - "learning_rate": 8.08981002113217e-07, - "log_odds_chosen": 4.526251792907715, - "log_odds_ratio": -0.04737117141485214, - "logits/chosen": 175.68753051757812, - "logits/rejected": 337.61895751953125, - "logps/chosen": -0.2012404501438141, - "logps/rejected": -2.805131673812866, - "loss": 0.3376, - "nll_loss": 0.2573019862174988, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010062022134661674, - "rewards/margins": 0.13019457459449768, - "rewards/rejected": -0.1402565985918045, - "step": 3820 - }, - { - "epoch": 2.8523489932885906, - "grad_norm": 21.30034065246582, - "learning_rate": 8.084520834544433e-07, - "log_odds_chosen": 2.602710247039795, - "log_odds_ratio": -0.12020216137170792, - "logits/chosen": 212.7501983642578, - "logits/rejected": 277.9372253417969, - "logps/chosen": -0.34366852045059204, - "logps/rejected": -1.6059083938598633, - "loss": 0.4448, - "nll_loss": 0.45532941818237305, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017183426767587662, - "rewards/margins": 0.06311199814081192, - "rewards/rejected": -0.08029542118310928, - "step": 3825 - }, - { - "epoch": 2.8560775540641314, - "grad_norm": 15.153382301330566, - "learning_rate": 8.079242008754989e-07, - "log_odds_chosen": 3.660566806793213, - "log_odds_ratio": -0.034583959728479385, - "logits/chosen": 196.50750732421875, - "logits/rejected": 244.08737182617188, - "logps/chosen": -0.11477003991603851, - "logps/rejected": -1.386000633239746, - "loss": 0.342, - "nll_loss": 0.2467333823442459, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005738501902669668, - "rewards/margins": 0.06356153637170792, - "rewards/rejected": -0.06930003315210342, - "step": 3830 - }, - { - "epoch": 2.859806114839672, - "grad_norm": 25.31479263305664, - "learning_rate": 8.073973509982289e-07, - "log_odds_chosen": 3.1076226234436035, - "log_odds_ratio": -0.05301403999328613, - "logits/chosen": 253.2061767578125, - "logits/rejected": 253.92202758789062, - "logps/chosen": -0.18532077968120575, - "logps/rejected": -1.5528008937835693, - "loss": 0.3915, - "nll_loss": 0.4327433705329895, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009266039356589317, - "rewards/margins": 0.06837401539087296, - "rewards/rejected": -0.07764004915952682, - "step": 3835 - }, - { - "epoch": 2.8635346756152127, - "grad_norm": 19.035112380981445, - "learning_rate": 8.068715304598786e-07, - "log_odds_chosen": 3.4401144981384277, - "log_odds_ratio": -0.03753223270177841, - "logits/chosen": 194.92636108398438, - "logits/rejected": 267.55108642578125, - "logps/chosen": -0.08823622763156891, - "logps/rejected": -1.091083288192749, - "loss": 0.2886, - "nll_loss": 0.23893025517463684, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004411811474710703, - "rewards/margins": 0.05014234781265259, - "rewards/rejected": -0.05455416440963745, - "step": 3840 - }, - { - "epoch": 2.867263236390753, - "grad_norm": 24.459787368774414, - "learning_rate": 8.063467359130037e-07, - "log_odds_chosen": 3.6022324562072754, - "log_odds_ratio": -0.050869546830654144, - "logits/chosen": 237.14688110351562, - "logits/rejected": 283.30780029296875, - "logps/chosen": -0.17496509850025177, - "logps/rejected": -1.7731231451034546, - "loss": 0.3785, - "nll_loss": 0.4072284698486328, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008748253807425499, - "rewards/margins": 0.07990790158510208, - "rewards/rejected": -0.08865615725517273, - "step": 3845 - }, - { - "epoch": 2.870991797166294, - "grad_norm": 31.28860092163086, - "learning_rate": 8.058229640253803e-07, - "log_odds_chosen": 3.3452060222625732, - "log_odds_ratio": -0.04601866006851196, - "logits/chosen": 269.595703125, - "logits/rejected": 235.57382202148438, - "logps/chosen": -0.1683049201965332, - "logps/rejected": -1.7391974925994873, - "loss": 0.3207, - "nll_loss": 0.2898971438407898, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00841524638235569, - "rewards/margins": 0.07854463160037994, - "rewards/rejected": -0.08695988357067108, - "step": 3850 - }, - { - "epoch": 2.8747203579418343, - "grad_norm": 29.04055404663086, - "learning_rate": 8.053002114799164e-07, - "log_odds_chosen": 2.744844913482666, - "log_odds_ratio": -0.07824674248695374, - "logits/chosen": 372.739990234375, - "logits/rejected": 275.91058349609375, - "logps/chosen": -0.2801540791988373, - "logps/rejected": -1.6844806671142578, - "loss": 0.3779, - "nll_loss": 0.329964816570282, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014007705263793468, - "rewards/margins": 0.07021632790565491, - "rewards/rejected": -0.08422403782606125, - "step": 3855 - }, - { - "epoch": 2.878448918717375, - "grad_norm": 27.001197814941406, - "learning_rate": 8.047784749745631e-07, - "log_odds_chosen": 4.442195892333984, - "log_odds_ratio": -0.014437940903007984, - "logits/chosen": 301.9033508300781, - "logits/rejected": 243.8429412841797, - "logps/chosen": -0.09553482383489609, - "logps/rejected": -1.8481247425079346, - "loss": 0.3884, - "nll_loss": 0.323634535074234, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.004776740912348032, - "rewards/margins": 0.08762948960065842, - "rewards/rejected": -0.09240622818470001, - "step": 3860 - }, - { - "epoch": 2.8821774794929156, - "grad_norm": 18.882722854614258, - "learning_rate": 8.04257751222228e-07, - "log_odds_chosen": 3.1793572902679443, - "log_odds_ratio": -0.05364120006561279, - "logits/chosen": 297.974365234375, - "logits/rejected": 289.8678283691406, - "logps/chosen": -0.16941623389720917, - "logps/rejected": -1.5830553770065308, - "loss": 0.2889, - "nll_loss": 0.2607272267341614, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008470811881124973, - "rewards/margins": 0.07068196684122086, - "rewards/rejected": -0.07915277779102325, - "step": 3865 - }, - { - "epoch": 2.8859060402684564, - "grad_norm": 23.952634811401367, - "learning_rate": 8.03738036950687e-07, - "log_odds_chosen": 2.2256085872650146, - "log_odds_ratio": -0.1883433759212494, - "logits/chosen": 222.33578491210938, - "logits/rejected": 213.20761108398438, - "logps/chosen": -0.5484896898269653, - "logps/rejected": -1.5137542486190796, - "loss": 0.4121, - "nll_loss": 0.5739758610725403, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.027424484491348267, - "rewards/margins": 0.04826323315501213, - "rewards/rejected": -0.0756877213716507, - "step": 3870 - }, - { - "epoch": 2.889634601043997, - "grad_norm": 25.29210090637207, - "learning_rate": 8.032193289024989e-07, - "log_odds_chosen": 2.9866015911102295, - "log_odds_ratio": -0.08011031150817871, - "logits/chosen": 203.48941040039062, - "logits/rejected": 211.5205078125, - "logps/chosen": -0.3119891583919525, - "logps/rejected": -1.7056903839111328, - "loss": 0.3846, - "nll_loss": 0.4115133285522461, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.015599459409713745, - "rewards/margins": 0.06968505680561066, - "rewards/rejected": -0.085284523665905, - "step": 3875 - }, - { - "epoch": 2.8933631618195377, - "grad_norm": 33.57221984863281, - "learning_rate": 8.027016238349195e-07, - "log_odds_chosen": 3.8543941974639893, - "log_odds_ratio": -0.04395081475377083, - "logits/chosen": 209.9810333251953, - "logits/rejected": 273.63421630859375, - "logps/chosen": -0.14670626819133759, - "logps/rejected": -1.571472406387329, - "loss": 0.347, - "nll_loss": 0.3198845684528351, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.0073353140614926815, - "rewards/margins": 0.07123831659555435, - "rewards/rejected": -0.07857362926006317, - "step": 3880 - }, - { - "epoch": 2.8970917225950785, - "grad_norm": 35.15097427368164, - "learning_rate": 8.021849185198158e-07, - "log_odds_chosen": 3.2628731727600098, - "log_odds_ratio": -0.048248518258333206, - "logits/chosen": 285.5743103027344, - "logits/rejected": 270.71014404296875, - "logps/chosen": -0.2119155377149582, - "logps/rejected": -1.6817209720611572, - "loss": 0.5128, - "nll_loss": 0.27424556016921997, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010595777072012424, - "rewards/margins": 0.07349027693271637, - "rewards/rejected": -0.08408604562282562, - "step": 3885 - }, - { - "epoch": 2.900820283370619, - "grad_norm": 24.42717742919922, - "learning_rate": 8.016692097435824e-07, - "log_odds_chosen": 3.6029605865478516, - "log_odds_ratio": -0.029012978076934814, - "logits/chosen": 221.4920654296875, - "logits/rejected": 285.62042236328125, - "logps/chosen": -0.19252316653728485, - "logps/rejected": -2.0865533351898193, - "loss": 0.4253, - "nll_loss": 0.44975581765174866, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.009626159444451332, - "rewards/margins": 0.09470151364803314, - "rewards/rejected": -0.10432766377925873, - "step": 3890 - }, - { - "epoch": 2.9045488441461593, - "grad_norm": 20.79186248779297, - "learning_rate": 8.011544943070565e-07, - "log_odds_chosen": 2.905656099319458, - "log_odds_ratio": -0.11686693131923676, - "logits/chosen": 232.24545288085938, - "logits/rejected": 197.63827514648438, - "logps/chosen": -0.34481728076934814, - "logps/rejected": -1.8507856130599976, - "loss": 0.398, - "nll_loss": 0.41692763566970825, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.017240863293409348, - "rewards/margins": 0.07529841363430023, - "rewards/rejected": -0.09253928810358047, - "step": 3895 - }, - { - "epoch": 2.9082774049217, - "grad_norm": 28.6600284576416, - "learning_rate": 8.006407690254357e-07, - "log_odds_chosen": 2.8692445755004883, - "log_odds_ratio": -0.07150912284851074, - "logits/chosen": 263.09820556640625, - "logits/rejected": 338.467529296875, - "logps/chosen": -0.32683637738227844, - "logps/rejected": -1.911630630493164, - "loss": 0.4447, - "nll_loss": 0.2720980942249298, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.016341818496584892, - "rewards/margins": 0.07923971116542816, - "rewards/rejected": -0.0955815315246582, - "step": 3900 - }, - { - "epoch": 2.912005965697241, - "grad_norm": 22.87518310546875, - "learning_rate": 8.001280307281944e-07, - "log_odds_chosen": 2.7045392990112305, - "log_odds_ratio": -0.0724663957953453, - "logits/chosen": 242.6070556640625, - "logits/rejected": 239.5297393798828, - "logps/chosen": -0.28118696808815, - "logps/rejected": -1.7274434566497803, - "loss": 0.4638, - "nll_loss": 0.6614148616790771, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014059348031878471, - "rewards/margins": 0.07231283187866211, - "rewards/rejected": -0.08637218177318573, - "step": 3905 - }, - { - "epoch": 2.9157345264727814, - "grad_norm": 30.325584411621094, - "learning_rate": 7.996162762590016e-07, - "log_odds_chosen": 4.0316667556762695, - "log_odds_ratio": -0.03233776241540909, - "logits/chosen": 261.71795654296875, - "logits/rejected": 215.3047332763672, - "logps/chosen": -0.1761138141155243, - "logps/rejected": -2.04801344871521, - "loss": 0.3571, - "nll_loss": 0.28671687841415405, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008805690333247185, - "rewards/margins": 0.0935949832201004, - "rewards/rejected": -0.10240067541599274, - "step": 3910 - }, - { - "epoch": 2.9194630872483223, - "grad_norm": 24.88538932800293, - "learning_rate": 7.991055024756403e-07, - "log_odds_chosen": 3.9035377502441406, - "log_odds_ratio": -0.05246854946017265, - "logits/chosen": 182.58572387695312, - "logits/rejected": 271.51715087890625, - "logps/chosen": -0.16539780795574188, - "logps/rejected": -2.052513599395752, - "loss": 0.3483, - "nll_loss": 0.2589992582798004, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008269891142845154, - "rewards/margins": 0.09435578435659409, - "rewards/rejected": -0.10262566804885864, - "step": 3915 - }, - { - "epoch": 2.9231916480238627, - "grad_norm": 30.739715576171875, - "learning_rate": 7.98595706249925e-07, - "log_odds_chosen": 2.9888224601745605, - "log_odds_ratio": -0.0610596239566803, - "logits/chosen": 191.4695587158203, - "logits/rejected": 278.05804443359375, - "logps/chosen": -0.2988676428794861, - "logps/rejected": -1.8655011653900146, - "loss": 0.4431, - "nll_loss": 0.5340582132339478, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014943381771445274, - "rewards/margins": 0.07833168655633926, - "rewards/rejected": -0.09327506273984909, - "step": 3920 - }, - { - "epoch": 2.9269202087994035, - "grad_norm": 19.176481246948242, - "learning_rate": 7.980868844676222e-07, - "log_odds_chosen": 3.0732309818267822, - "log_odds_ratio": -0.05321504920721054, - "logits/chosen": 210.36654663085938, - "logits/rejected": 307.58648681640625, - "logps/chosen": -0.2359515130519867, - "logps/rejected": -1.9004583358764648, - "loss": 0.3266, - "nll_loss": 0.31325894594192505, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011797577142715454, - "rewards/margins": 0.08322534710168839, - "rewards/rejected": -0.09502293169498444, - "step": 3925 - }, - { - "epoch": 2.930648769574944, - "grad_norm": 33.64284896850586, - "learning_rate": 7.975790340283705e-07, - "log_odds_chosen": 2.779667377471924, - "log_odds_ratio": -0.0833745002746582, - "logits/chosen": 198.58154296875, - "logits/rejected": 287.80145263671875, - "logps/chosen": -0.26572567224502563, - "logps/rejected": -1.7494840621948242, - "loss": 0.3783, - "nll_loss": 0.43532299995422363, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013286283239722252, - "rewards/margins": 0.07418791949748993, - "rewards/rejected": -0.08747420459985733, - "step": 3930 - }, - { - "epoch": 2.9343773303504848, - "grad_norm": 21.99195671081543, - "learning_rate": 7.970721518456008e-07, - "log_odds_chosen": 2.725975513458252, - "log_odds_ratio": -0.08639051020145416, - "logits/chosen": 228.67782592773438, - "logits/rejected": 306.00067138671875, - "logps/chosen": -0.255102276802063, - "logps/rejected": -1.5678352117538452, - "loss": 0.4215, - "nll_loss": 0.4231089949607849, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.012755113653838634, - "rewards/margins": 0.06563664972782135, - "rewards/rejected": -0.07839175313711166, - "step": 3935 - }, - { - "epoch": 2.9381058911260256, - "grad_norm": 27.103235244750977, - "learning_rate": 7.965662348464579e-07, - "log_odds_chosen": 2.9690985679626465, - "log_odds_ratio": -0.06273848563432693, - "logits/chosen": 242.9127655029297, - "logits/rejected": 212.59738159179688, - "logps/chosen": -0.21675646305084229, - "logps/rejected": -1.6340042352676392, - "loss": 0.3203, - "nll_loss": 0.35348156094551086, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010837824083864689, - "rewards/margins": 0.07086239010095596, - "rewards/rejected": -0.08170022070407867, - "step": 3940 - }, - { - "epoch": 2.941834451901566, - "grad_norm": 26.859952926635742, - "learning_rate": 7.960612799717214e-07, - "log_odds_chosen": 2.987098217010498, - "log_odds_ratio": -0.07614287734031677, - "logits/chosen": 203.6111297607422, - "logits/rejected": 198.26031494140625, - "logps/chosen": -0.1704777181148529, - "logps/rejected": -1.456441879272461, - "loss": 0.3499, - "nll_loss": 0.25423508882522583, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008523887023329735, - "rewards/margins": 0.06429821252822876, - "rewards/rejected": -0.07282210141420364, - "step": 3945 - }, - { - "epoch": 2.9455630126771064, - "grad_norm": 26.026161193847656, - "learning_rate": 7.9555728417573e-07, - "log_odds_chosen": 2.797490358352661, - "log_odds_ratio": -0.11651773750782013, - "logits/chosen": 292.34857177734375, - "logits/rejected": 261.5030822753906, - "logps/chosen": -0.16755664348602295, - "logps/rejected": -1.0318130254745483, - "loss": 0.3745, - "nll_loss": 0.36315926909446716, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.008377832360565662, - "rewards/margins": 0.04321282356977463, - "rewards/rejected": -0.05159065127372742, - "step": 3950 - }, - { - "epoch": 2.9492915734526473, - "grad_norm": 30.100515365600586, - "learning_rate": 7.950542444263022e-07, - "log_odds_chosen": 4.335784435272217, - "log_odds_ratio": -0.02678128518164158, - "logits/chosen": 306.726318359375, - "logits/rejected": 227.2478485107422, - "logps/chosen": -0.11286120116710663, - "logps/rejected": -2.193723201751709, - "loss": 0.303, - "nll_loss": 0.2681526839733124, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.005643059965223074, - "rewards/margins": 0.10404310375452042, - "rewards/rejected": -0.10968615859746933, - "step": 3955 - }, - { - "epoch": 2.953020134228188, - "grad_norm": 26.88707160949707, - "learning_rate": 7.945521577046602e-07, - "log_odds_chosen": 3.216456651687622, - "log_odds_ratio": -0.0652243047952652, - "logits/chosen": 278.48443603515625, - "logits/rejected": 214.1090545654297, - "logps/chosen": -0.2462995946407318, - "logps/rejected": -1.766803503036499, - "loss": 0.43, - "nll_loss": 0.37069714069366455, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01231498084962368, - "rewards/margins": 0.07602520287036896, - "rewards/rejected": -0.08834017813205719, - "step": 3960 - }, - { - "epoch": 2.9567486950037285, - "grad_norm": 37.01509094238281, - "learning_rate": 7.940510210053552e-07, - "log_odds_chosen": 3.218850612640381, - "log_odds_ratio": -0.06121605634689331, - "logits/chosen": 268.87835693359375, - "logits/rejected": 215.515625, - "logps/chosen": -0.27633899450302124, - "logps/rejected": -2.103768825531006, - "loss": 0.4245, - "nll_loss": 0.31839513778686523, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.013816950842738152, - "rewards/margins": 0.09137148410081863, - "rewards/rejected": -0.10518844425678253, - "step": 3965 - }, - { - "epoch": 2.9604772557792693, - "grad_norm": 23.802541732788086, - "learning_rate": 7.935508313361897e-07, - "log_odds_chosen": 3.1365904808044434, - "log_odds_ratio": -0.10641616582870483, - "logits/chosen": 265.4117431640625, - "logits/rejected": 216.66024780273438, - "logps/chosen": -0.3051195442676544, - "logps/rejected": -1.7106691598892212, - "loss": 0.4711, - "nll_loss": 0.4385095238685608, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.01525597833096981, - "rewards/margins": 0.07027747482061386, - "rewards/rejected": -0.08553345501422882, - "step": 3970 - }, - { - "epoch": 2.9642058165548097, - "grad_norm": 28.718042373657227, - "learning_rate": 7.930515857181442e-07, - "log_odds_chosen": 3.5454916954040527, - "log_odds_ratio": -0.03936084359884262, - "logits/chosen": 244.72705078125, - "logits/rejected": 231.2872772216797, - "logps/chosen": -0.15465228259563446, - "logps/rejected": -1.706702470779419, - "loss": 0.4288, - "nll_loss": 0.36403995752334595, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007732613477855921, - "rewards/margins": 0.07760250568389893, - "rewards/rejected": -0.08533512055873871, - "step": 3975 - }, - { - "epoch": 2.9679343773303506, - "grad_norm": 20.38542938232422, - "learning_rate": 7.925532811853019e-07, - "log_odds_chosen": 2.760596752166748, - "log_odds_ratio": -0.08602370321750641, - "logits/chosen": 275.7841796875, - "logits/rejected": 226.4866180419922, - "logps/chosen": -0.1591126173734665, - "logps/rejected": -1.2986723184585571, - "loss": 0.3281, - "nll_loss": 0.2552988827228546, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.007955631241202354, - "rewards/margins": 0.056977979838848114, - "rewards/rejected": -0.06493361294269562, - "step": 3980 - }, - { - "epoch": 2.971662938105891, - "grad_norm": 22.067012786865234, - "learning_rate": 7.920559147847744e-07, - "log_odds_chosen": 3.1721627712249756, - "log_odds_ratio": -0.06317927688360214, - "logits/chosen": 222.24447631835938, - "logits/rejected": 262.6744384765625, - "logps/chosen": -0.29022416472435, - "logps/rejected": -1.9268391132354736, - "loss": 0.4359, - "nll_loss": 0.5417429208755493, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.014511209912598133, - "rewards/margins": 0.08183075487613678, - "rewards/rejected": -0.09634196013212204, - "step": 3985 - }, - { - "epoch": 2.975391498881432, - "grad_norm": 19.60210609436035, - "learning_rate": 7.915594835766295e-07, - "log_odds_chosen": 3.7546000480651855, - "log_odds_ratio": -0.039507631212472916, - "logits/chosen": 237.0302734375, - "logits/rejected": 239.23446655273438, - "logps/chosen": -0.13352450728416443, - "logps/rejected": -1.192893385887146, - "loss": 0.4349, - "nll_loss": 0.393111914396286, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.006676225923001766, - "rewards/margins": 0.052968449890613556, - "rewards/rejected": -0.0596446767449379, - "step": 3990 - }, - { - "epoch": 2.9791200596569727, - "grad_norm": 25.271940231323242, - "learning_rate": 7.910639846338164e-07, - "log_odds_chosen": 2.9505350589752197, - "log_odds_ratio": -0.060760747641325, - "logits/chosen": 250.4705352783203, - "logits/rejected": 213.78756713867188, - "logps/chosen": -0.22807908058166504, - "logps/rejected": -1.6011359691619873, - "loss": 0.2885, - "nll_loss": 0.34552353620529175, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.011403953656554222, - "rewards/margins": 0.06865284591913223, - "rewards/rejected": -0.0800568014383316, - "step": 3995 - }, - { - "epoch": 2.982848620432513, - "grad_norm": 32.470664978027344, - "learning_rate": 7.905694150420949e-07, - "log_odds_chosen": 2.979290723800659, - "log_odds_ratio": -0.07630395889282227, - "logits/chosen": 283.91229248046875, - "logits/rejected": 209.39126586914062, - "logps/chosen": -0.21665747463703156, - "logps/rejected": -1.5964548587799072, - "loss": 0.3388, - "nll_loss": 0.4789574146270752, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010832873173058033, - "rewards/margins": 0.06898987293243408, - "rewards/rejected": -0.07982274144887924, - "step": 4000 - }, - { - "epoch": 2.9865771812080535, - "grad_norm": 24.507509231567383, - "learning_rate": 7.900757718999622e-07, - "log_odds_chosen": 2.357649326324463, - "log_odds_ratio": -0.20979559421539307, - "logits/chosen": 176.9601287841797, - "logits/rejected": 267.44464111328125, - "logps/chosen": -0.1762644201517105, - "logps/rejected": -1.3118493556976318, - "loss": 0.3467, - "nll_loss": 0.40073174238204956, - "rewards/accuracies": 0.800000011920929, - "rewards/chosen": -0.008813221007585526, - "rewards/margins": 0.05677925422787666, - "rewards/rejected": -0.06559247523546219, - "step": 4005 - }, - { - "epoch": 2.9903057419835943, - "grad_norm": 27.570331573486328, - "learning_rate": 7.895830523185819e-07, - "log_odds_chosen": 3.586550235748291, - "log_odds_ratio": -0.05765519291162491, - "logits/chosen": 237.9789276123047, - "logits/rejected": 309.8821716308594, - "logps/chosen": -0.45855027437210083, - "logps/rejected": -2.7853572368621826, - "loss": 0.4177, - "nll_loss": 0.5794026255607605, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.02292751334607601, - "rewards/margins": 0.11634035408496857, - "rewards/rejected": -0.13926787674427032, - "step": 4010 - }, - { - "epoch": 2.994034302759135, - "grad_norm": 30.953182220458984, - "learning_rate": 7.890912534217131e-07, - "log_odds_chosen": 2.7769341468811035, - "log_odds_ratio": -0.06907717883586884, - "logits/chosen": 266.03271484375, - "logits/rejected": 254.7345733642578, - "logps/chosen": -0.2119748592376709, - "logps/rejected": -1.4165858030319214, - "loss": 0.4322, - "nll_loss": 0.5442076921463013, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.010598743334412575, - "rewards/margins": 0.060230545699596405, - "rewards/rejected": -0.07082929462194443, - "step": 4015 - }, - { - "epoch": 2.9977628635346756, - "grad_norm": 31.174964904785156, - "learning_rate": 7.886003723456397e-07, - "log_odds_chosen": 3.4881625175476074, - "log_odds_ratio": -0.033604566007852554, - "logits/chosen": 264.588134765625, - "logits/rejected": 245.90945434570312, - "logps/chosen": -0.1696120798587799, - "logps/rejected": -1.9454491138458252, - "loss": 0.3228, - "nll_loss": 0.23898892104625702, - "rewards/accuracies": 1.0, - "rewards/chosen": -0.00848060380667448, - "rewards/margins": 0.0887918546795845, - "rewards/rejected": -0.09727245569229126, - "step": 4020 - }, - { - "epoch": 3.0, - "eval_log_odds_chosen": 0.26997339725494385, - "eval_log_odds_ratio": -0.7197813391685486, - "eval_logits/chosen": 284.18389892578125, - "eval_logits/rejected": 254.83587646484375, - "eval_logps/chosen": -1.2543175220489502, - "eval_logps/rejected": -1.4255971908569336, - "eval_loss": 1.7139935493469238, - "eval_nll_loss": 1.6444751024246216, - "eval_rewards/accuracies": 0.5467625856399536, - "eval_rewards/chosen": -0.06271587312221527, - "eval_rewards/margins": 0.008563979528844357, - "eval_rewards/rejected": -0.0712798535823822, - "eval_runtime": 26.0873, - "eval_samples_per_second": 21.198, - "eval_steps_per_second": 5.328, - "step": 4023 - }, - { - "epoch": 3.0, - "step": 4023, + "epoch": 2.9928514694201747, + "eval_log_odds_chosen": 0.44380733370780945, + "eval_log_odds_ratio": -0.6702221632003784, + "eval_logits/chosen": 286.3763122558594, + "eval_logits/rejected": 275.9735412597656, + "eval_logps/chosen": -1.2025552988052368, + "eval_logps/rejected": -1.5090675354003906, + "eval_loss": 1.639459252357483, + "eval_nll_loss": 1.5846672058105469, + "eval_rewards/accuracies": 0.6028881072998047, + "eval_rewards/chosen": -0.060127776116132736, + "eval_rewards/margins": 0.015325604937970638, + "eval_rewards/rejected": -0.07545337826013565, + "eval_runtime": 278.3004, + "eval_samples_per_second": 1.987, + "eval_steps_per_second": 0.995, + "step": 471 + }, + { + "epoch": 2.9928514694201747, + "step": 471, "total_flos": 0.0, - "train_loss": 0.9688140684296591, - "train_runtime": 3597.9931, - "train_samples_per_second": 4.472, - "train_steps_per_second": 1.118 + "train_loss": 1.4771008792703066, + "train_runtime": 40012.5124, + "train_samples_per_second": 0.377, + "train_steps_per_second": 0.012 } ], "logging_steps": 5, - "max_steps": 4023, + "max_steps": 471, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, @@ -14565,7 +1785,7 @@ } }, "total_flos": 0.0, - "train_batch_size": 1, + "train_batch_size": 2, "trial_name": null, "trial_params": null }