{ "best_metric": null, "best_model_checkpoint": null, "epoch": 15.886524822695035, "eval_steps": 80, "global_step": 840, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6052009456264775, "grad_norm": 9.684629440307617, "learning_rate": 1.9047619047619045e-07, "log_odds_chosen": 0.08431098610162735, "log_odds_ratio": -0.7315660715103149, "logits/chosen": -2.4399943351745605, "logits/rejected": -2.418248414993286, "logps/chosen": -1.316224455833435, "logps/rejected": -1.3803966045379639, "loss": 1.6469, "nll_loss": 1.5446076393127441, "rewards/accuracies": 0.54296875, "rewards/chosen": -0.1974336802959442, "rewards/margins": 0.009625822305679321, "rewards/rejected": -0.20705950260162354, "step": 32 }, { "epoch": 1.210401891252955, "grad_norm": 6.682727813720703, "learning_rate": 3.809523809523809e-07, "log_odds_chosen": 0.1358582228422165, "log_odds_ratio": -0.6844438314437866, "logits/chosen": -2.4866111278533936, "logits/rejected": -2.473512649536133, "logps/chosen": -1.2416539192199707, "logps/rejected": -1.338365912437439, "loss": 1.5513, "nll_loss": 1.43682861328125, "rewards/accuracies": 0.55859375, "rewards/chosen": -0.1862480789422989, "rewards/margins": 0.01450679823756218, "rewards/rejected": -0.20075488090515137, "step": 64 }, { "epoch": 1.5130023640661938, "eval_log_odds_chosen": 1.1365413665771484, "eval_log_odds_ratio": -0.2909667193889618, "eval_logits/chosen": -2.8555617332458496, "eval_logits/rejected": -2.7511401176452637, "eval_logps/chosen": -1.1086950302124023, "eval_logps/rejected": -1.990875244140625, "eval_loss": 1.1764631271362305, "eval_nll_loss": 1.1883823871612549, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.16630426049232483, "eval_rewards/margins": 0.13232707977294922, "eval_rewards/rejected": -0.29863131046295166, "eval_runtime": 0.7889, "eval_samples_per_second": 173.657, "eval_steps_per_second": 6.338, "step": 80 }, { "epoch": 1.8156028368794326, "grad_norm": 5.326328754425049, "learning_rate": 4.996892303047305e-07, "log_odds_chosen": 0.18785640597343445, "log_odds_ratio": -0.6766043901443481, "logits/chosen": -2.4521989822387695, "logits/rejected": -2.457139492034912, "logps/chosen": -1.1742055416107178, "logps/rejected": -1.3268922567367554, "loss": 1.4307, "nll_loss": 1.3236442804336548, "rewards/accuracies": 0.5390625, "rewards/chosen": -0.17613083124160767, "rewards/margins": 0.022903023287653923, "rewards/rejected": -0.19903387129306793, "step": 96 }, { "epoch": 2.42080378250591, "grad_norm": 6.345442295074463, "learning_rate": 4.958326378681848e-07, "log_odds_chosen": 0.20125526189804077, "log_odds_ratio": -0.6606975793838501, "logits/chosen": -2.4338855743408203, "logits/rejected": -2.4032998085021973, "logps/chosen": -1.2052891254425049, "logps/rejected": -1.3293428421020508, "loss": 1.3822, "nll_loss": 1.3110582828521729, "rewards/accuracies": 0.609375, "rewards/chosen": -0.18079334497451782, "rewards/margins": 0.018608052283525467, "rewards/rejected": -0.19940140843391418, "step": 128 }, { "epoch": 3.0260047281323876, "grad_norm": 6.9922590255737305, "learning_rate": 4.876353872369572e-07, "log_odds_chosen": 0.2849215567111969, "log_odds_ratio": -0.6226438283920288, "logits/chosen": -2.28694748878479, "logits/rejected": -2.2813074588775635, "logps/chosen": -1.177689552307129, "logps/rejected": -1.3626967668533325, "loss": 1.3457, "nll_loss": 1.2851402759552002, "rewards/accuracies": 0.68359375, "rewards/chosen": -0.1766534298658371, "rewards/margins": 0.027751106768846512, "rewards/rejected": -0.20440451800823212, "step": 160 }, { "epoch": 3.0260047281323876, "eval_log_odds_chosen": 1.289251685142517, "eval_log_odds_ratio": -0.2533319592475891, "eval_logits/chosen": -2.6994073390960693, "eval_logits/rejected": -2.5944201946258545, "eval_logps/chosen": -1.1015231609344482, "eval_logps/rejected": -2.1139886379241943, "eval_loss": 1.1738542318344116, "eval_nll_loss": 1.1927688121795654, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1652284860610962, "eval_rewards/margins": 0.15186984837055206, "eval_rewards/rejected": -0.31709831953048706, "eval_runtime": 0.7972, "eval_samples_per_second": 171.841, "eval_steps_per_second": 6.272, "step": 160 }, { "epoch": 3.631205673758865, "grad_norm": 6.0752034187316895, "learning_rate": 4.752422169756047e-07, "log_odds_chosen": 0.29861366748809814, "log_odds_ratio": -0.6187925338745117, "logits/chosen": -2.278367280960083, "logits/rejected": -2.197380781173706, "logps/chosen": -1.1402652263641357, "logps/rejected": -1.323896884918213, "loss": 1.3117, "nll_loss": 1.1778795719146729, "rewards/accuracies": 0.69140625, "rewards/chosen": -0.17103978991508484, "rewards/margins": 0.0275447778403759, "rewards/rejected": -0.19858455657958984, "step": 192 }, { "epoch": 4.236406619385343, "grad_norm": 6.758876800537109, "learning_rate": 4.588719528532341e-07, "log_odds_chosen": 0.32968199253082275, "log_odds_ratio": -0.6064258813858032, "logits/chosen": -2.2086422443389893, "logits/rejected": -2.213918447494507, "logps/chosen": -1.2367851734161377, "logps/rejected": -1.4456892013549805, "loss": 1.2914, "nll_loss": 1.220529317855835, "rewards/accuracies": 0.703125, "rewards/chosen": -0.1855177879333496, "rewards/margins": 0.03133557736873627, "rewards/rejected": -0.21685336530208588, "step": 224 }, { "epoch": 4.539007092198582, "eval_log_odds_chosen": 1.4739799499511719, "eval_log_odds_ratio": -0.21903792023658752, "eval_logits/chosen": -2.6473140716552734, "eval_logits/rejected": -2.5277247428894043, "eval_logps/chosen": -1.2001134157180786, "eval_logps/rejected": -2.410470485687256, "eval_loss": 1.1219241619110107, "eval_nll_loss": 1.137636423110962, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18001702427864075, "eval_rewards/margins": 0.18155357241630554, "eval_rewards/rejected": -0.3615706264972687, "eval_runtime": 0.7853, "eval_samples_per_second": 174.46, "eval_steps_per_second": 6.367, "step": 240 }, { "epoch": 4.84160756501182, "grad_norm": 6.2288618087768555, "learning_rate": 4.3881364404463375e-07, "log_odds_chosen": 0.46286657452583313, "log_odds_ratio": -0.5630860328674316, "logits/chosen": -2.216555595397949, "logits/rejected": -2.1122565269470215, "logps/chosen": -1.1853437423706055, "logps/rejected": -1.4803636074066162, "loss": 1.2727, "nll_loss": 1.1509523391723633, "rewards/accuracies": 0.77734375, "rewards/chosen": -0.17780157923698425, "rewards/margins": 0.04425298422574997, "rewards/rejected": -0.222054585814476, "step": 256 }, { "epoch": 5.446808510638298, "grad_norm": 8.837624549865723, "learning_rate": 4.154214593992149e-07, "log_odds_chosen": 0.571550726890564, "log_odds_ratio": -0.5269472002983093, "logits/chosen": -2.189985990524292, "logits/rejected": -2.085646152496338, "logps/chosen": -1.2091223001480103, "logps/rejected": -1.5827255249023438, "loss": 1.2582, "nll_loss": 1.1602920293807983, "rewards/accuracies": 0.78515625, "rewards/chosen": -0.18136833608150482, "rewards/margins": 0.05604049190878868, "rewards/rejected": -0.237408846616745, "step": 288 }, { "epoch": 6.052009456264775, "grad_norm": 10.696549415588379, "learning_rate": 3.891084338941603e-07, "log_odds_chosen": 0.5793906450271606, "log_odds_ratio": -0.5278146862983704, "logits/chosen": -2.0601658821105957, "logits/rejected": -2.04327130317688, "logps/chosen": -1.2197258472442627, "logps/rejected": -1.6007359027862549, "loss": 1.261, "nll_loss": 1.1714200973510742, "rewards/accuracies": 0.76953125, "rewards/chosen": -0.18295888602733612, "rewards/margins": 0.057151518762111664, "rewards/rejected": -0.2401103973388672, "step": 320 }, { "epoch": 6.052009456264775, "eval_log_odds_chosen": 1.5137661695480347, "eval_log_odds_ratio": -0.21324041485786438, "eval_logits/chosen": -2.5153615474700928, "eval_logits/rejected": -2.389270067214966, "eval_logps/chosen": -1.2208881378173828, "eval_logps/rejected": -2.475594997406006, "eval_loss": 1.0919252634048462, "eval_nll_loss": 1.1019929647445679, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18313322961330414, "eval_rewards/margins": 0.1882060021162033, "eval_rewards/rejected": -0.37133923172950745, "eval_runtime": 0.8032, "eval_samples_per_second": 170.562, "eval_steps_per_second": 6.225, "step": 320 }, { "epoch": 6.657210401891253, "grad_norm": 14.943062782287598, "learning_rate": 3.6033917569043597e-07, "log_odds_chosen": 0.6752089858055115, "log_odds_ratio": -0.5140572190284729, "logits/chosen": -2.0157763957977295, "logits/rejected": -2.004314422607422, "logps/chosen": -1.2377139329910278, "logps/rejected": -1.692950963973999, "loss": 1.252, "nll_loss": 1.1812970638275146, "rewards/accuracies": 0.75390625, "rewards/chosen": -0.1856570839881897, "rewards/margins": 0.06828554719686508, "rewards/rejected": -0.25394266843795776, "step": 352 }, { "epoch": 7.26241134751773, "grad_norm": 24.735410690307617, "learning_rate": 3.296216625629211e-07, "log_odds_chosen": 0.8964105844497681, "log_odds_ratio": -0.46805524826049805, "logits/chosen": -1.9912079572677612, "logits/rejected": -1.955162525177002, "logps/chosen": -1.2576524019241333, "logps/rejected": -1.9145023822784424, "loss": 1.2436, "nll_loss": 1.170907974243164, "rewards/accuracies": 0.7578125, "rewards/chosen": -0.18864786624908447, "rewards/margins": 0.09852751344442368, "rewards/rejected": -0.28717538714408875, "step": 384 }, { "epoch": 7.5650118203309695, "eval_log_odds_chosen": 1.5138293504714966, "eval_log_odds_ratio": -0.21318714320659637, "eval_logits/chosen": -2.423281669616699, "eval_logits/rejected": -2.29191255569458, "eval_logps/chosen": -1.25368332862854, "eval_logps/rejected": -2.5206007957458496, "eval_loss": 1.0984269380569458, "eval_nll_loss": 1.1060694456100464, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18805250525474548, "eval_rewards/margins": 0.19003766775131226, "eval_rewards/rejected": -0.37809017300605774, "eval_runtime": 0.7836, "eval_samples_per_second": 174.83, "eval_steps_per_second": 6.381, "step": 400 }, { "epoch": 7.867612293144208, "grad_norm": 17.922605514526367, "learning_rate": 2.974982725547975e-07, "log_odds_chosen": 1.3920270204544067, "log_odds_ratio": -0.4031583368778229, "logits/chosen": -1.9781438112258911, "logits/rejected": -1.9409700632095337, "logps/chosen": -1.2178527116775513, "logps/rejected": -2.3021371364593506, "loss": 1.2447, "nll_loss": 1.1541370153427124, "rewards/accuracies": 0.7734375, "rewards/chosen": -0.18267790973186493, "rewards/margins": 0.16264265775680542, "rewards/rejected": -0.34532058238983154, "step": 416 }, { "epoch": 8.472813238770685, "grad_norm": 18.74088478088379, "learning_rate": 2.6453620722761895e-07, "log_odds_chosen": 1.4127886295318604, "log_odds_ratio": -0.45162278413772583, "logits/chosen": -1.9333115816116333, "logits/rejected": -1.9409185647964478, "logps/chosen": -1.3663212060928345, "logps/rejected": -2.5033111572265625, "loss": 1.2511, "nll_loss": 1.2117295265197754, "rewards/accuracies": 0.76953125, "rewards/chosen": -0.20494820177555084, "rewards/margins": 0.17054852843284607, "rewards/rejected": -0.37549668550491333, "step": 448 }, { "epoch": 9.078014184397164, "grad_norm": 22.932889938354492, "learning_rate": 2.3131747660339394e-07, "log_odds_chosen": 1.6455353498458862, "log_odds_ratio": -0.44734472036361694, "logits/chosen": -1.923959493637085, "logits/rejected": -1.9963738918304443, "logps/chosen": -1.3357137441635132, "logps/rejected": -2.676542043685913, "loss": 1.2336, "nll_loss": 1.197737455368042, "rewards/accuracies": 0.75, "rewards/chosen": -0.2003570795059204, "rewards/margins": 0.2011241912841797, "rewards/rejected": -0.4014812707901001, "step": 480 }, { "epoch": 9.078014184397164, "eval_log_odds_chosen": 1.4782460927963257, "eval_log_odds_ratio": -0.21918949484825134, "eval_logits/chosen": -2.3784029483795166, "eval_logits/rejected": -2.244081497192383, "eval_logps/chosen": -1.296876072883606, "eval_logps/rejected": -2.5429465770721436, "eval_loss": 1.1173924207687378, "eval_nll_loss": 1.114406704902649, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.1945313960313797, "eval_rewards/margins": 0.18691061437129974, "eval_rewards/rejected": -0.38144201040267944, "eval_runtime": 0.8027, "eval_samples_per_second": 170.681, "eval_steps_per_second": 6.229, "step": 480 }, { "epoch": 9.68321513002364, "grad_norm": 27.342180252075195, "learning_rate": 1.984286226342056e-07, "log_odds_chosen": 2.1517550945281982, "log_odds_ratio": -0.3539085388183594, "logits/chosen": -2.0003914833068848, "logits/rejected": -1.958829641342163, "logps/chosen": -1.2932095527648926, "logps/rejected": -3.1245839595794678, "loss": 1.2519, "nll_loss": 1.177643060684204, "rewards/accuracies": 0.796875, "rewards/chosen": -0.19398145377635956, "rewards/margins": 0.2747061550617218, "rewards/rejected": -0.46868759393692017, "step": 512 }, { "epoch": 10.288416075650119, "grad_norm": 21.41707992553711, "learning_rate": 1.6645036265170313e-07, "log_odds_chosen": 2.206300973892212, "log_odds_ratio": -0.41246891021728516, "logits/chosen": -1.9226995706558228, "logits/rejected": -1.9452672004699707, "logps/chosen": -1.3459149599075317, "logps/rejected": -3.243032455444336, "loss": 1.2377, "nll_loss": 1.1743229627609253, "rewards/accuracies": 0.73828125, "rewards/chosen": -0.20188726484775543, "rewards/margins": 0.28456762433052063, "rewards/rejected": -0.48645487427711487, "step": 544 }, { "epoch": 10.591016548463356, "eval_log_odds_chosen": 1.4928518533706665, "eval_log_odds_ratio": -0.21719364821910858, "eval_logits/chosen": -2.406038999557495, "eval_logits/rejected": -2.2726240158081055, "eval_logps/chosen": -1.2588163614273071, "eval_logps/rejected": -2.5091373920440674, "eval_loss": 1.0936493873596191, "eval_nll_loss": 1.0926154851913452, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18882247805595398, "eval_rewards/margins": 0.18754813075065613, "eval_rewards/rejected": -0.3763706088066101, "eval_runtime": 0.7924, "eval_samples_per_second": 172.882, "eval_steps_per_second": 6.31, "step": 560 }, { "epoch": 10.893617021276595, "grad_norm": 28.251604080200195, "learning_rate": 1.3594733566170925e-07, "log_odds_chosen": 1.9746395349502563, "log_odds_ratio": -0.39856040477752686, "logits/chosen": -1.9557344913482666, "logits/rejected": -1.9873769283294678, "logps/chosen": -1.3181676864624023, "logps/rejected": -2.9867465496063232, "loss": 1.2325, "nll_loss": 1.2073771953582764, "rewards/accuracies": 0.796875, "rewards/chosen": -0.19772517681121826, "rewards/margins": 0.2502868175506592, "rewards/rejected": -0.44801196455955505, "step": 576 }, { "epoch": 11.498817966903074, "grad_norm": 30.6031551361084, "learning_rate": 1.0745813253325956e-07, "log_odds_chosen": 2.6722493171691895, "log_odds_ratio": -0.35422736406326294, "logits/chosen": -1.9136030673980713, "logits/rejected": -1.8901042938232422, "logps/chosen": -1.233724594116211, "logps/rejected": -3.5643980503082275, "loss": 1.2295, "nll_loss": 1.1232733726501465, "rewards/accuracies": 0.79296875, "rewards/chosen": -0.18505869805812836, "rewards/margins": 0.3496010899543762, "rewards/rejected": -0.534659743309021, "step": 608 }, { "epoch": 12.10401891252955, "grad_norm": 24.5542049407959, "learning_rate": 8.148578611867113e-08, "log_odds_chosen": 2.4431307315826416, "log_odds_ratio": -0.3909703195095062, "logits/chosen": -1.8700110912322998, "logits/rejected": -1.9457833766937256, "logps/chosen": -1.2895874977111816, "logps/rejected": -3.4220337867736816, "loss": 1.2212, "nll_loss": 1.1831854581832886, "rewards/accuracies": 0.75, "rewards/chosen": -0.19343814253807068, "rewards/margins": 0.31986698508262634, "rewards/rejected": -0.5133051872253418, "step": 640 }, { "epoch": 12.10401891252955, "eval_log_odds_chosen": 1.515697956085205, "eval_log_odds_ratio": -0.213613823056221, "eval_logits/chosen": -2.373903274536133, "eval_logits/rejected": -2.2387218475341797, "eval_logps/chosen": -1.2586114406585693, "eval_logps/rejected": -2.530747175216675, "eval_loss": 1.0882254838943481, "eval_nll_loss": 1.085294485092163, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18879172205924988, "eval_rewards/margins": 0.19082039594650269, "eval_rewards/rejected": -0.37961211800575256, "eval_runtime": 0.8037, "eval_samples_per_second": 170.464, "eval_steps_per_second": 6.221, "step": 640 }, { "epoch": 12.709219858156029, "grad_norm": 19.59035301208496, "learning_rate": 5.848888922025552e-08, "log_odds_chosen": 2.298971176147461, "log_odds_ratio": -0.4026568830013275, "logits/chosen": -1.9471426010131836, "logits/rejected": -1.9182159900665283, "logps/chosen": -1.3048053979873657, "logps/rejected": -3.2884950637817383, "loss": 1.2173, "nll_loss": 1.1782861948013306, "rewards/accuracies": 0.76171875, "rewards/chosen": -0.19572080671787262, "rewards/margins": 0.2975533902645111, "rewards/rejected": -0.4932742416858673, "step": 672 }, { "epoch": 13.314420803782506, "grad_norm": 29.107627868652344, "learning_rate": 3.887349723342303e-08, "log_odds_chosen": 2.654096841812134, "log_odds_ratio": -0.38686317205429077, "logits/chosen": -1.9159326553344727, "logits/rejected": -1.9410839080810547, "logps/chosen": -1.2585792541503906, "logps/rejected": -3.5659923553466797, "loss": 1.2261, "nll_loss": 1.1586594581604004, "rewards/accuracies": 0.76953125, "rewards/chosen": -0.18878689408302307, "rewards/margins": 0.34611204266548157, "rewards/rejected": -0.5348988771438599, "step": 704 }, { "epoch": 13.617021276595745, "eval_log_odds_chosen": 1.5199410915374756, "eval_log_odds_ratio": -0.21233825385570526, "eval_logits/chosen": -2.366751194000244, "eval_logits/rejected": -2.2316524982452393, "eval_logps/chosen": -1.2421499490737915, "eval_logps/rejected": -2.5126953125, "eval_loss": 1.0794531106948853, "eval_nll_loss": 1.0783252716064453, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18632249534130096, "eval_rewards/margins": 0.190581813454628, "eval_rewards/rejected": -0.37690430879592896, "eval_runtime": 0.7829, "eval_samples_per_second": 175.001, "eval_steps_per_second": 6.387, "step": 720 }, { "epoch": 13.919621749408984, "grad_norm": 22.09730339050293, "learning_rate": 2.298595844092377e-08, "log_odds_chosen": 2.671109199523926, "log_odds_ratio": -0.3479268252849579, "logits/chosen": -2.0034313201904297, "logits/rejected": -1.870398998260498, "logps/chosen": -1.2474991083145142, "logps/rejected": -3.5732622146606445, "loss": 1.2194, "nll_loss": 1.1217308044433594, "rewards/accuracies": 0.80859375, "rewards/chosen": -0.18712489306926727, "rewards/margins": 0.3488644063472748, "rewards/rejected": -0.5359892845153809, "step": 736 }, { "epoch": 14.52482269503546, "grad_norm": 24.665891647338867, "learning_rate": 1.1106798553464802e-08, "log_odds_chosen": 2.7288260459899902, "log_odds_ratio": -0.34282395243644714, "logits/chosen": -1.9847919940948486, "logits/rejected": -1.9164719581604004, "logps/chosen": -1.250791311264038, "logps/rejected": -3.6267054080963135, "loss": 1.2016, "nll_loss": 1.1445385217666626, "rewards/accuracies": 0.84375, "rewards/chosen": -0.18761873245239258, "rewards/margins": 0.3563871383666992, "rewards/rejected": -0.5440058708190918, "step": 768 }, { "epoch": 15.130023640661939, "grad_norm": 47.552574157714844, "learning_rate": 3.4457674771554422e-09, "log_odds_chosen": 2.26895809173584, "log_odds_ratio": -0.3884022831916809, "logits/chosen": -1.9943946599960327, "logits/rejected": -1.961279034614563, "logps/chosen": -1.3015713691711426, "logps/rejected": -3.266200304031372, "loss": 1.2176, "nll_loss": 1.1944975852966309, "rewards/accuracies": 0.76953125, "rewards/chosen": -0.1952357143163681, "rewards/margins": 0.29469433426856995, "rewards/rejected": -0.48993009328842163, "step": 800 }, { "epoch": 15.130023640661939, "eval_log_odds_chosen": 1.5204813480377197, "eval_log_odds_ratio": -0.21171817183494568, "eval_logits/chosen": -2.370277166366577, "eval_logits/rejected": -2.233962059020996, "eval_logps/chosen": -1.2457667589187622, "eval_logps/rejected": -2.517129898071289, "eval_loss": 1.081107497215271, "eval_nll_loss": 1.0765924453735352, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18686501681804657, "eval_rewards/margins": 0.19070449471473694, "eval_rewards/rejected": -0.3775694966316223, "eval_runtime": 0.8025, "eval_samples_per_second": 170.724, "eval_steps_per_second": 6.231, "step": 800 }, { "epoch": 15.735224586288416, "grad_norm": 17.138763427734375, "learning_rate": 1.3813576683111006e-10, "log_odds_chosen": 2.620955228805542, "log_odds_ratio": -0.34706899523735046, "logits/chosen": -1.9596831798553467, "logits/rejected": -1.9431588649749756, "logps/chosen": -1.2250535488128662, "logps/rejected": -3.4995248317718506, "loss": 1.2191, "nll_loss": 1.129616379737854, "rewards/accuracies": 0.78515625, "rewards/chosen": -0.18375803530216217, "rewards/margins": 0.34117066860198975, "rewards/rejected": -0.5249287486076355, "step": 832 }, { "epoch": 15.886524822695035, "grad_norm": 26.069650650024414, "learning_rate": 0.0, "log_odds_chosen": 2.719402551651001, "log_odds_ratio": -0.3425367772579193, "logits/chosen": -2.0392448902130127, "logits/rejected": -1.9386732578277588, "logps/chosen": -1.2458713054656982, "logps/rejected": -3.6082699298858643, "loss": 1.2157, "nll_loss": 1.1475220918655396, "rewards/accuracies": 0.828125, "rewards/chosen": -0.1868807077407837, "rewards/margins": 0.35435977578163147, "rewards/rejected": -0.5412405133247375, "step": 840 }, { "epoch": 15.886524822695035, "eval_log_odds_chosen": 1.5112136602401733, "eval_log_odds_ratio": -0.2135576754808426, "eval_logits/chosen": -2.3702893257141113, "eval_logits/rejected": -2.2342278957366943, "eval_logps/chosen": -1.250806450843811, "eval_logps/rejected": -2.515653133392334, "eval_loss": 1.0810010433197021, "eval_nll_loss": 1.0763533115386963, "eval_rewards/accuracies": 1.0, "eval_rewards/chosen": -0.18762096762657166, "eval_rewards/margins": 0.18972699344158173, "eval_rewards/rejected": -0.37734800577163696, "eval_runtime": 0.7924, "eval_samples_per_second": 172.894, "eval_steps_per_second": 6.31, "step": 840 } ], "logging_steps": 32, "max_steps": 840, "num_input_tokens_seen": 0, "num_train_epochs": 17, "save_steps": 80, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }