{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100, "global_step": 684, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 2.024759928283202, "learning_rate": 7.246376811594204e-08, "logits/chosen": -2.961127519607544, "logits/rejected": -2.9461119174957275, "logps/chosen": -261.90582275390625, "logps/rejected": -270.03265380859375, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/margins_max": 0.0, "rewards/margins_min": 0.0, "rewards/margins_std": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "grad_norm": 1.8980625865574121, "learning_rate": 7.246376811594204e-07, "logits/chosen": -2.873429775238037, "logits/rejected": -2.8538858890533447, "logps/chosen": -217.4855194091797, "logps/rejected": -222.1319580078125, "loss": 0.6928, "rewards/accuracies": 0.4444444477558136, "rewards/chosen": 0.000618334801401943, "rewards/margins": 0.0005746870301663876, "rewards/margins_max": 0.0019774516113102436, "rewards/margins_min": -0.0008280774345621467, "rewards/margins_std": 0.001983808586373925, "rewards/rejected": 4.364784399513155e-05, "step": 10 }, { "epoch": 0.03, "grad_norm": 2.110946747867592, "learning_rate": 1.4492753623188408e-06, "logits/chosen": -2.856001377105713, "logits/rejected": -2.873141050338745, "logps/chosen": -228.9456787109375, "logps/rejected": -176.6509246826172, "loss": 0.6924, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.000968199223279953, "rewards/margins": 0.0013832334661856294, "rewards/margins_max": 0.002613522345200181, "rewards/margins_min": 0.0001529444707557559, "rewards/margins_std": 0.0017398912459611893, "rewards/rejected": -0.00041503418469801545, "step": 20 }, { "epoch": 0.04, "grad_norm": 2.0573110330048388, "learning_rate": 2.173913043478261e-06, "logits/chosen": -2.929853677749634, "logits/rejected": -2.875521183013916, "logps/chosen": -260.0462951660156, "logps/rejected": -239.0731658935547, "loss": 0.6904, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.004958462901413441, "rewards/margins": 0.005339525174349546, "rewards/margins_max": 0.011697771959006786, "rewards/margins_min": -0.001018722541630268, "rewards/margins_std": 0.00899192038923502, "rewards/rejected": -0.00038106151623651385, "step": 30 }, { "epoch": 0.06, "grad_norm": 1.9291759229840313, "learning_rate": 2.8985507246376816e-06, "logits/chosen": -2.8282618522644043, "logits/rejected": -2.7805304527282715, "logps/chosen": -326.31719970703125, "logps/rejected": -365.41064453125, "loss": 0.6861, "rewards/accuracies": 0.75, "rewards/chosen": 0.01588786579668522, "rewards/margins": 0.01591557264328003, "rewards/margins_max": 0.029321899637579918, "rewards/margins_min": 0.0025092470459640026, "rewards/margins_std": 0.0189594067633152, "rewards/rejected": -2.7706240871339105e-05, "step": 40 }, { "epoch": 0.07, "grad_norm": 1.679205040063332, "learning_rate": 3.6231884057971017e-06, "logits/chosen": -2.890886068344116, "logits/rejected": -2.8183233737945557, "logps/chosen": -249.2432098388672, "logps/rejected": -246.5466766357422, "loss": 0.6811, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.013654066249728203, "rewards/margins": 0.013491788879036903, "rewards/margins_max": 0.030749738216400146, "rewards/margins_min": -0.003766159061342478, "rewards/margins_std": 0.024406425654888153, "rewards/rejected": 0.00016227728337980807, "step": 50 }, { "epoch": 0.09, "grad_norm": 1.7034528224314027, "learning_rate": 4.347826086956522e-06, "logits/chosen": -3.011662006378174, "logits/rejected": -2.9398560523986816, "logps/chosen": -304.8922119140625, "logps/rejected": -246.6385498046875, "loss": 0.6705, "rewards/accuracies": 0.75, "rewards/chosen": 0.03926776722073555, "rewards/margins": 0.04536719247698784, "rewards/margins_max": 0.0852198451757431, "rewards/margins_min": 0.005514549091458321, "rewards/margins_std": 0.056360144168138504, "rewards/rejected": -0.006099428050220013, "step": 60 }, { "epoch": 0.1, "grad_norm": 1.9031177489294464, "learning_rate": 4.999967381905813e-06, "logits/chosen": -3.010206460952759, "logits/rejected": -2.93892240524292, "logps/chosen": -268.6554870605469, "logps/rejected": -208.88131713867188, "loss": 0.6535, "rewards/accuracies": 0.75, "rewards/chosen": 0.034810397773981094, "rewards/margins": 0.07472650706768036, "rewards/margins_max": 0.11295346170663834, "rewards/margins_min": 0.03649955615401268, "rewards/margins_std": 0.054061077535152435, "rewards/rejected": -0.039916109293699265, "step": 70 }, { "epoch": 0.12, "grad_norm": 2.0564287009461806, "learning_rate": 4.9960542403925095e-06, "logits/chosen": -2.7611324787139893, "logits/rejected": -2.683593273162842, "logps/chosen": -256.10906982421875, "logps/rejected": -241.2465057373047, "loss": 0.6403, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.01417328417301178, "rewards/margins": 0.08097021281719208, "rewards/margins_max": 0.19939911365509033, "rewards/margins_min": -0.03745868057012558, "rewards/margins_std": 0.16748374700546265, "rewards/rejected": -0.0667969286441803, "step": 80 }, { "epoch": 0.13, "grad_norm": 1.9620079371246095, "learning_rate": 4.98562917836165e-06, "logits/chosen": -2.7862794399261475, "logits/rejected": -2.755009651184082, "logps/chosen": -260.46527099609375, "logps/rejected": -209.8500213623047, "loss": 0.6216, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.02841970883309841, "rewards/margins": 0.1533060073852539, "rewards/margins_max": 0.2169797718524933, "rewards/margins_min": 0.08963226526975632, "rewards/margins_std": 0.09004827588796616, "rewards/rejected": -0.12488631159067154, "step": 90 }, { "epoch": 0.15, "grad_norm": 2.682732545242212, "learning_rate": 4.968719393609757e-06, "logits/chosen": -2.8458776473999023, "logits/rejected": -2.793788194656372, "logps/chosen": -373.66241455078125, "logps/rejected": -257.68841552734375, "loss": 0.5821, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.04686256870627403, "rewards/margins": 0.3232998847961426, "rewards/margins_max": 0.5046892166137695, "rewards/margins_min": 0.14191053807735443, "rewards/margins_std": 0.2565232217311859, "rewards/rejected": -0.27643734216690063, "step": 100 }, { "epoch": 0.15, "eval_logits/chosen": -2.7354917526245117, "eval_logits/rejected": -2.6965763568878174, "eval_logps/chosen": -303.8941650390625, "eval_logps/rejected": -286.7320861816406, "eval_loss": 0.6622087359428406, "eval_rewards/accuracies": 0.6150793433189392, "eval_rewards/chosen": -0.186729297041893, "eval_rewards/margins": 0.08876504004001617, "eval_rewards/margins_max": 0.39532026648521423, "eval_rewards/margins_min": -0.1795283555984497, "eval_rewards/margins_std": 0.25878801941871643, "eval_rewards/rejected": -0.2754943370819092, "eval_runtime": 284.1012, "eval_samples_per_second": 7.04, "eval_steps_per_second": 0.222, "step": 100 }, { "epoch": 0.16, "grad_norm": 3.178965793594782, "learning_rate": 4.9453690018345144e-06, "logits/chosen": -2.7822461128234863, "logits/rejected": -2.7551050186157227, "logps/chosen": -346.56549072265625, "logps/rejected": -336.0809020996094, "loss": 0.5771, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.02590801753103733, "rewards/margins": 0.3409319818019867, "rewards/margins_max": 0.4643324017524719, "rewards/margins_min": 0.21753165125846863, "rewards/margins_std": 0.17451441287994385, "rewards/rejected": -0.3668400049209595, "step": 110 }, { "epoch": 0.18, "grad_norm": 3.521510373240137, "learning_rate": 4.915638921541952e-06, "logits/chosen": -2.7236685752868652, "logits/rejected": -2.7280526161193848, "logps/chosen": -310.40826416015625, "logps/rejected": -319.103515625, "loss": 0.5407, "rewards/accuracies": 0.75, "rewards/chosen": -0.22706270217895508, "rewards/margins": 0.33027681708335876, "rewards/margins_max": 0.5542212724685669, "rewards/margins_min": 0.10633233934640884, "rewards/margins_std": 0.3167053163051605, "rewards/rejected": -0.5573395490646362, "step": 120 }, { "epoch": 0.19, "grad_norm": 2.110244119813805, "learning_rate": 4.879606715117019e-06, "logits/chosen": -2.8192946910858154, "logits/rejected": -2.731480836868286, "logps/chosen": -343.879150390625, "logps/rejected": -308.55780029296875, "loss": 0.5251, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.37679314613342285, "rewards/margins": 0.30480116605758667, "rewards/margins_max": 0.5037060379981995, "rewards/margins_min": 0.10589637607336044, "rewards/margins_std": 0.2812938690185547, "rewards/rejected": -0.6815943121910095, "step": 130 }, { "epoch": 0.2, "grad_norm": 5.313237756103227, "learning_rate": 4.837366386472175e-06, "logits/chosen": -2.8442323207855225, "logits/rejected": -2.757608413696289, "logps/chosen": -338.12030029296875, "logps/rejected": -321.89703369140625, "loss": 0.5564, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4649910032749176, "rewards/margins": 0.3815266191959381, "rewards/margins_max": 0.6749385595321655, "rewards/margins_min": 0.08811453729867935, "rewards/margins_std": 0.4149473309516907, "rewards/rejected": -0.8465176820755005, "step": 140 }, { "epoch": 0.22, "grad_norm": 3.739167908499058, "learning_rate": 4.789028135801919e-06, "logits/chosen": -2.8220436573028564, "logits/rejected": -2.8031575679779053, "logps/chosen": -304.0033874511719, "logps/rejected": -348.67108154296875, "loss": 0.5705, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.05871645733714104, "rewards/margins": 0.4513682425022125, "rewards/margins_max": 0.7341547012329102, "rewards/margins_min": 0.16858164966106415, "rewards/margins_std": 0.39992058277130127, "rewards/rejected": -0.5100846290588379, "step": 150 }, { "epoch": 0.23, "grad_norm": 4.005524634813479, "learning_rate": 4.7347180720830635e-06, "logits/chosen": -2.8278419971466064, "logits/rejected": -2.6905112266540527, "logps/chosen": -371.9062805175781, "logps/rejected": -375.65728759765625, "loss": 0.5226, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3725179135799408, "rewards/margins": 0.5365481972694397, "rewards/margins_max": 0.8424277305603027, "rewards/margins_min": 0.2306685745716095, "rewards/margins_std": 0.4325791001319885, "rewards/rejected": -0.9090660810470581, "step": 160 }, { "epoch": 0.25, "grad_norm": 8.89880059550579, "learning_rate": 4.674577884070811e-06, "logits/chosen": -2.7482759952545166, "logits/rejected": -2.7097363471984863, "logps/chosen": -368.9304504394531, "logps/rejected": -368.5115661621094, "loss": 0.4421, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.42384210228919983, "rewards/margins": 0.7086302638053894, "rewards/margins_max": 1.0815365314483643, "rewards/margins_min": 0.33572402596473694, "rewards/margins_std": 0.5273691415786743, "rewards/rejected": -1.1324723958969116, "step": 170 }, { "epoch": 0.26, "grad_norm": 12.030137047752005, "learning_rate": 4.608764470648971e-06, "logits/chosen": -2.782106399536133, "logits/rejected": -2.7207627296447754, "logps/chosen": -344.0601501464844, "logps/rejected": -446.43328857421875, "loss": 0.4886, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.3604986369609833, "rewards/margins": 0.7753941416740417, "rewards/margins_max": 1.1650350093841553, "rewards/margins_min": 0.38575348258018494, "rewards/margins_std": 0.5510352253913879, "rewards/rejected": -1.1358928680419922, "step": 180 }, { "epoch": 0.28, "grad_norm": 4.3464674099690255, "learning_rate": 4.5374495314986874e-06, "logits/chosen": -2.5000321865081787, "logits/rejected": -2.5499188899993896, "logps/chosen": -381.9058532714844, "logps/rejected": -390.3780212402344, "loss": 0.4735, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.4734285771846771, "rewards/margins": 1.050258994102478, "rewards/margins_max": 1.5981611013412476, "rewards/margins_min": 0.5023568868637085, "rewards/margins_std": 0.7748504877090454, "rewards/rejected": -1.523687481880188, "step": 190 }, { "epoch": 0.29, "grad_norm": 9.11783684491386, "learning_rate": 4.460819119153574e-06, "logits/chosen": -2.5870699882507324, "logits/rejected": -2.5736021995544434, "logps/chosen": -338.88360595703125, "logps/rejected": -435.57855224609375, "loss": 0.481, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.7319141030311584, "rewards/margins": 0.7730057835578918, "rewards/margins_max": 1.262407660484314, "rewards/margins_min": 0.2836039066314697, "rewards/margins_std": 0.6921188235282898, "rewards/rejected": -1.5049200057983398, "step": 200 }, { "epoch": 0.29, "eval_logits/chosen": -2.5660696029663086, "eval_logits/rejected": -2.540174722671509, "eval_logps/chosen": -410.9715881347656, "eval_logps/rejected": -423.9108581542969, "eval_loss": 0.6257268190383911, "eval_rewards/accuracies": 0.6746031641960144, "eval_rewards/chosen": -1.2575041055679321, "eval_rewards/margins": 0.389777809381485, "eval_rewards/margins_max": 1.3411911725997925, "eval_rewards/margins_min": -0.5259115695953369, "eval_rewards/margins_std": 0.8282801508903503, "eval_rewards/rejected": -1.6472818851470947, "eval_runtime": 283.175, "eval_samples_per_second": 7.063, "eval_steps_per_second": 0.222, "step": 200 }, { "epoch": 0.31, "grad_norm": 8.587152119215933, "learning_rate": 4.379073153609896e-06, "logits/chosen": -2.6241440773010254, "logits/rejected": -2.5915939807891846, "logps/chosen": -408.4817199707031, "logps/rejected": -439.4585876464844, "loss": 0.4772, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.8052468299865723, "rewards/margins": 0.7498449087142944, "rewards/margins_max": 1.3446953296661377, "rewards/margins_min": 0.15499453246593475, "rewards/margins_std": 0.8412453532218933, "rewards/rejected": -1.5550918579101562, "step": 210 }, { "epoch": 0.32, "grad_norm": 12.858094013695773, "learning_rate": 4.292424900758129e-06, "logits/chosen": -2.3552744388580322, "logits/rejected": -2.266150712966919, "logps/chosen": -326.7586669921875, "logps/rejected": -419.92950439453125, "loss": 0.4407, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6805569529533386, "rewards/margins": 0.8906445503234863, "rewards/margins_max": 1.5019675493240356, "rewards/margins_min": 0.27932122349739075, "rewards/margins_std": 0.8645416498184204, "rewards/rejected": -1.5712013244628906, "step": 220 }, { "epoch": 0.34, "grad_norm": 10.547994008924384, "learning_rate": 4.201100415996598e-06, "logits/chosen": -2.2731940746307373, "logits/rejected": -2.208517551422119, "logps/chosen": -396.6240234375, "logps/rejected": -439.95367431640625, "loss": 0.4373, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -1.4822484254837036, "rewards/margins": 0.3612428605556488, "rewards/margins_max": 0.8377896547317505, "rewards/margins_min": -0.11530391871929169, "rewards/margins_std": 0.6739388704299927, "rewards/rejected": -1.8434913158416748, "step": 230 }, { "epoch": 0.35, "grad_norm": 7.682347998116211, "learning_rate": 4.105337954478756e-06, "logits/chosen": -2.2987964153289795, "logits/rejected": -2.232954263687134, "logps/chosen": -488.194580078125, "logps/rejected": -482.121826171875, "loss": 0.497, "rewards/accuracies": 1.0, "rewards/chosen": -0.9666223526000977, "rewards/margins": 1.5014089345932007, "rewards/margins_max": 2.3409581184387207, "rewards/margins_min": 0.6618598103523254, "rewards/margins_std": 1.1873016357421875, "rewards/rejected": -2.468031167984009, "step": 240 }, { "epoch": 0.37, "grad_norm": 6.522613509898599, "learning_rate": 4.005387349532697e-06, "logits/chosen": -2.3422703742980957, "logits/rejected": -2.3057916164398193, "logps/chosen": -449.12469482421875, "logps/rejected": -528.1080322265625, "loss": 0.3759, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4108879566192627, "rewards/margins": 1.1858874559402466, "rewards/margins_max": 2.1756746768951416, "rewards/margins_min": 0.19610002636909485, "rewards/margins_std": 1.3997704982757568, "rewards/rejected": -2.596775531768799, "step": 250 }, { "epoch": 0.38, "grad_norm": 9.556919040050822, "learning_rate": 3.901509360874515e-06, "logits/chosen": -2.1438629627227783, "logits/rejected": -2.12241792678833, "logps/chosen": -336.4192199707031, "logps/rejected": -396.7298889160156, "loss": 0.3976, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.233567237854004, "rewards/margins": 0.7809652090072632, "rewards/margins_max": 1.5155531167984009, "rewards/margins_min": 0.04637749865651131, "rewards/margins_std": 1.0388638973236084, "rewards/rejected": -2.0145325660705566, "step": 260 }, { "epoch": 0.39, "grad_norm": 8.191966927640436, "learning_rate": 3.793974994315991e-06, "logits/chosen": -1.8757396936416626, "logits/rejected": -1.9094167947769165, "logps/chosen": -297.0498352050781, "logps/rejected": -410.0951232910156, "loss": 0.4022, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.1613843441009521, "rewards/margins": 1.04617440700531, "rewards/margins_max": 1.7157318592071533, "rewards/margins_min": 0.3766169548034668, "rewards/margins_std": 0.9468971490859985, "rewards/rejected": -2.2075586318969727, "step": 270 }, { "epoch": 0.41, "grad_norm": 9.284581469509805, "learning_rate": 3.68306479474137e-06, "logits/chosen": -2.3123269081115723, "logits/rejected": -2.223254680633545, "logps/chosen": -509.96392822265625, "logps/rejected": -465.5381774902344, "loss": 0.3547, "rewards/accuracies": 0.75, "rewards/chosen": -1.3590073585510254, "rewards/margins": 1.2631444931030273, "rewards/margins_max": 1.895696997642517, "rewards/margins_min": 0.6305915713310242, "rewards/margins_std": 0.8945645093917847, "rewards/rejected": -2.6221518516540527, "step": 280 }, { "epoch": 0.42, "grad_norm": 23.380435739964835, "learning_rate": 3.569068114197784e-06, "logits/chosen": -2.013559103012085, "logits/rejected": -1.945844054222107, "logps/chosen": -327.3985290527344, "logps/rejected": -448.2659606933594, "loss": 0.3626, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0471247434616089, "rewards/margins": 1.6471458673477173, "rewards/margins_max": 2.4206135272979736, "rewards/margins_min": 0.8736783266067505, "rewards/margins_std": 1.0938485860824585, "rewards/rejected": -2.694270610809326, "step": 290 }, { "epoch": 0.44, "grad_norm": 14.305787223907084, "learning_rate": 3.4522823570088073e-06, "logits/chosen": -1.9044002294540405, "logits/rejected": -1.8846553564071655, "logps/chosen": -411.216552734375, "logps/rejected": -525.3153686523438, "loss": 0.4017, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.564225196838379, "rewards/margins": 1.5319175720214844, "rewards/margins_max": 2.567899703979492, "rewards/margins_min": 0.49593567848205566, "rewards/margins_std": 1.4650996923446655, "rewards/rejected": -3.0961427688598633, "step": 300 }, { "epoch": 0.44, "eval_logits/chosen": -2.0223634243011475, "eval_logits/rejected": -1.9880024194717407, "eval_logps/chosen": -462.021728515625, "eval_logps/rejected": -509.34765625, "eval_loss": 0.6111792922019958, "eval_rewards/accuracies": 0.6944444179534912, "eval_rewards/chosen": -1.7680050134658813, "eval_rewards/margins": 0.7336447238922119, "eval_rewards/margins_max": 2.3329057693481445, "eval_rewards/margins_min": -0.8123146891593933, "eval_rewards/margins_std": 1.4011034965515137, "eval_rewards/rejected": -2.501649856567383, "eval_runtime": 283.1994, "eval_samples_per_second": 7.062, "eval_steps_per_second": 0.222, "step": 300 }, { "epoch": 0.45, "grad_norm": 13.373839711281382, "learning_rate": 3.333012203880528e-06, "logits/chosen": -2.034450054168701, "logits/rejected": -1.9856176376342773, "logps/chosen": -361.3979187011719, "logps/rejected": -426.4706115722656, "loss": 0.379, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1962629556655884, "rewards/margins": 1.4263014793395996, "rewards/margins_max": 2.3072102069854736, "rewards/margins_min": 0.5453929901123047, "rewards/margins_std": 1.2457928657531738, "rewards/rejected": -2.6225647926330566, "step": 310 }, { "epoch": 0.47, "grad_norm": 12.791871374689537, "learning_rate": 3.2115688170243735e-06, "logits/chosen": -2.104572057723999, "logits/rejected": -2.1095337867736816, "logps/chosen": -430.00299072265625, "logps/rejected": -597.9464721679688, "loss": 0.357, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.14143705368042, "rewards/margins": 1.844452142715454, "rewards/margins_max": 2.6857852935791016, "rewards/margins_min": 1.0031189918518066, "rewards/margins_std": 1.1898245811462402, "rewards/rejected": -2.985888957977295, "step": 320 }, { "epoch": 0.48, "grad_norm": 7.114464717414236, "learning_rate": 3.0882690283704355e-06, "logits/chosen": -1.9746555089950562, "logits/rejected": -1.915001630783081, "logps/chosen": -350.51678466796875, "logps/rejected": -462.4185485839844, "loss": 0.3842, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0409258604049683, "rewards/margins": 1.465539813041687, "rewards/margins_max": 2.2879223823547363, "rewards/margins_min": 0.6431571841239929, "rewards/margins_std": 1.1630247831344604, "rewards/rejected": -2.506465435028076, "step": 330 }, { "epoch": 0.5, "grad_norm": 11.717977839144085, "learning_rate": 2.9634345129891296e-06, "logits/chosen": -1.998160719871521, "logits/rejected": -1.8835424184799194, "logps/chosen": -419.52362060546875, "logps/rejected": -527.9315185546875, "loss": 0.3414, "rewards/accuracies": 0.75, "rewards/chosen": -1.097505807876587, "rewards/margins": 1.5337189435958862, "rewards/margins_max": 2.7524514198303223, "rewards/margins_min": 0.31498652696609497, "rewards/margins_std": 1.723548173904419, "rewards/rejected": -2.631225109100342, "step": 340 }, { "epoch": 0.51, "grad_norm": 14.724171411488078, "learning_rate": 2.8373909498776746e-06, "logits/chosen": -2.16463565826416, "logits/rejected": -2.167154550552368, "logps/chosen": -366.3368225097656, "logps/rejected": -522.6026611328125, "loss": 0.4421, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.8319808840751648, "rewards/margins": 1.7606074810028076, "rewards/margins_max": 2.4449410438537598, "rewards/margins_min": 1.076274037361145, "rewards/margins_std": 0.9677937626838684, "rewards/rejected": -2.5925886631011963, "step": 350 }, { "epoch": 0.53, "grad_norm": 9.998195485379709, "learning_rate": 2.710467172300768e-06, "logits/chosen": -2.146489143371582, "logits/rejected": -2.0911166667938232, "logps/chosen": -431.58660888671875, "logps/rejected": -554.4029541015625, "loss": 0.3448, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7858428955078125, "rewards/margins": 1.576249361038208, "rewards/margins_max": 2.419480562210083, "rewards/margins_min": 0.7330182194709778, "rewards/margins_std": 1.1925089359283447, "rewards/rejected": -2.3620922565460205, "step": 360 }, { "epoch": 0.54, "grad_norm": 16.428444808566788, "learning_rate": 2.582994309902146e-06, "logits/chosen": -2.021066665649414, "logits/rejected": -1.8753414154052734, "logps/chosen": -438.49176025390625, "logps/rejected": -512.5082397460938, "loss": 0.4111, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.347548007965088, "rewards/margins": 1.3524014949798584, "rewards/margins_max": 2.1463735103607178, "rewards/margins_min": 0.5584291815757751, "rewards/margins_std": 1.122846245765686, "rewards/rejected": -2.699949264526367, "step": 370 }, { "epoch": 0.56, "grad_norm": 9.34369488447896, "learning_rate": 2.4553049248251512e-06, "logits/chosen": -1.9435670375823975, "logits/rejected": -1.9931520223617554, "logps/chosen": -381.89532470703125, "logps/rejected": -504.81341552734375, "loss": 0.3435, "rewards/accuracies": 0.75, "rewards/chosen": -1.481609582901001, "rewards/margins": 1.2445435523986816, "rewards/margins_max": 1.940123200416565, "rewards/margins_min": 0.5489639043807983, "rewards/margins_std": 0.9836981892585754, "rewards/rejected": -2.7261533737182617, "step": 380 }, { "epoch": 0.57, "grad_norm": 12.062558188816904, "learning_rate": 2.3277321440960733e-06, "logits/chosen": -2.1283860206604004, "logits/rejected": -2.116414785385132, "logps/chosen": -395.5448913574219, "logps/rejected": -539.8890380859375, "loss": 0.3457, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2231833934783936, "rewards/margins": 1.5952281951904297, "rewards/margins_max": 2.423370838165283, "rewards/margins_min": 0.7670857906341553, "rewards/margins_std": 1.1711702346801758, "rewards/rejected": -2.8184115886688232, "step": 390 }, { "epoch": 0.58, "grad_norm": 10.060650957709997, "learning_rate": 2.20060879053377e-06, "logits/chosen": -1.8268073797225952, "logits/rejected": -1.7773048877716064, "logps/chosen": -360.2474670410156, "logps/rejected": -529.291259765625, "loss": 0.3427, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5878477096557617, "rewards/margins": 1.5394123792648315, "rewards/margins_max": 2.434696674346924, "rewards/margins_min": 0.6441282033920288, "rewards/margins_std": 1.2661231756210327, "rewards/rejected": -3.127260208129883, "step": 400 }, { "epoch": 0.58, "eval_logits/chosen": -1.986289143562317, "eval_logits/rejected": -1.9447433948516846, "eval_logps/chosen": -476.6219177246094, "eval_logps/rejected": -527.7764892578125, "eval_loss": 0.5955031514167786, "eval_rewards/accuracies": 0.7023809552192688, "eval_rewards/chosen": -1.9140070676803589, "eval_rewards/margins": 0.7719313502311707, "eval_rewards/margins_max": 2.272120952606201, "eval_rewards/margins_min": -0.7218120098114014, "eval_rewards/margins_std": 1.3400975465774536, "eval_rewards/rejected": -2.685938596725464, "eval_runtime": 283.4614, "eval_samples_per_second": 7.056, "eval_steps_per_second": 0.222, "step": 400 }, { "epoch": 0.6, "grad_norm": 11.99849119237726, "learning_rate": 2.0742665144529374e-06, "logits/chosen": -1.9736402034759521, "logits/rejected": -1.901617407798767, "logps/chosen": -446.02227783203125, "logps/rejected": -546.3521728515625, "loss": 0.3666, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.0405278205871582, "rewards/margins": 1.8972772359848022, "rewards/margins_max": 2.6515231132507324, "rewards/margins_min": 1.1430312395095825, "rewards/margins_std": 1.066664695739746, "rewards/rejected": -2.937804937362671, "step": 410 }, { "epoch": 0.61, "grad_norm": 6.458783566768089, "learning_rate": 1.9490349284263036e-06, "logits/chosen": -1.8606504201889038, "logits/rejected": -1.7730525732040405, "logps/chosen": -470.9712829589844, "logps/rejected": -612.8939208984375, "loss": 0.3331, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.6054725646972656, "rewards/margins": 2.13732647895813, "rewards/margins_max": 2.9402005672454834, "rewards/margins_min": 1.334452509880066, "rewards/margins_std": 1.1354353427886963, "rewards/rejected": -3.7427992820739746, "step": 420 }, { "epoch": 0.63, "grad_norm": 9.837849328941022, "learning_rate": 1.8252407473630606e-06, "logits/chosen": -1.983541488647461, "logits/rejected": -1.9613971710205078, "logps/chosen": -458.0785217285156, "logps/rejected": -566.628662109375, "loss": 0.3559, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.701336145401001, "rewards/margins": 1.274548053741455, "rewards/margins_max": 1.9377739429473877, "rewards/margins_min": 0.6113225221633911, "rewards/margins_std": 0.9379426836967468, "rewards/rejected": -2.975884437561035, "step": 430 }, { "epoch": 0.64, "grad_norm": 10.432841283470852, "learning_rate": 1.7032069361469765e-06, "logits/chosen": -1.858236312866211, "logits/rejected": -1.8411200046539307, "logps/chosen": -359.7177429199219, "logps/rejected": -587.9736938476562, "loss": 0.3632, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2637131214141846, "rewards/margins": 1.9256131649017334, "rewards/margins_max": 2.9136805534362793, "rewards/margins_min": 0.9375454783439636, "rewards/margins_std": 1.397338628768921, "rewards/rejected": -3.189326047897339, "step": 440 }, { "epoch": 0.66, "grad_norm": 18.395508823434238, "learning_rate": 1.5832518670578802e-06, "logits/chosen": -2.010892391204834, "logits/rejected": -1.9377777576446533, "logps/chosen": -429.5704040527344, "logps/rejected": -617.7625732421875, "loss": 0.4001, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3690681457519531, "rewards/margins": 1.9702503681182861, "rewards/margins_max": 3.1194424629211426, "rewards/margins_min": 0.8210585713386536, "rewards/margins_std": 1.6252025365829468, "rewards/rejected": -3.3393185138702393, "step": 450 }, { "epoch": 0.67, "grad_norm": 11.402368506953254, "learning_rate": 1.4656884891747398e-06, "logits/chosen": -1.8819122314453125, "logits/rejected": -1.9186322689056396, "logps/chosen": -424.16986083984375, "logps/rejected": -586.4650268554688, "loss": 0.3423, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3872971534729004, "rewards/margins": 1.951498031616211, "rewards/margins_max": 3.129544496536255, "rewards/margins_min": 0.7734516263008118, "rewards/margins_std": 1.6660093069076538, "rewards/rejected": -3.3387951850891113, "step": 460 }, { "epoch": 0.69, "grad_norm": 10.853275743311396, "learning_rate": 1.3508235119272466e-06, "logits/chosen": -1.8404920101165771, "logits/rejected": -1.80814528465271, "logps/chosen": -476.41473388671875, "logps/rejected": -532.2838745117188, "loss": 0.3542, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.109175205230713, "rewards/margins": 0.6896736025810242, "rewards/margins_max": 1.9764328002929688, "rewards/margins_min": -0.5970857739448547, "rewards/margins_std": 1.8197526931762695, "rewards/rejected": -2.7988486289978027, "step": 470 }, { "epoch": 0.7, "grad_norm": 13.491064434657412, "learning_rate": 1.238956604925934e-06, "logits/chosen": -1.7815355062484741, "logits/rejected": -1.760663628578186, "logps/chosen": -379.5592346191406, "logps/rejected": -617.5568237304688, "loss": 0.377, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.146844506263733, "rewards/margins": 2.3650784492492676, "rewards/margins_max": 3.120236396789551, "rewards/margins_min": 1.6099202632904053, "rewards/margins_std": 1.0679547786712646, "rewards/rejected": -3.5119223594665527, "step": 480 }, { "epoch": 0.72, "grad_norm": 5.424010985619327, "learning_rate": 1.1303796161583763e-06, "logits/chosen": -2.0357632637023926, "logits/rejected": -2.0330684185028076, "logps/chosen": -418.2298278808594, "logps/rejected": -598.3395385742188, "loss": 0.3425, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2266708612442017, "rewards/margins": 1.6023613214492798, "rewards/margins_max": 2.5102345943450928, "rewards/margins_min": 0.6944878697395325, "rewards/margins_std": 1.2839267253875732, "rewards/rejected": -2.8290319442749023, "step": 490 }, { "epoch": 0.73, "grad_norm": 8.453582130806156, "learning_rate": 1.0253758105911169e-06, "logits/chosen": -2.0878758430480957, "logits/rejected": -1.991579294204712, "logps/chosen": -448.9693298339844, "logps/rejected": -671.1817626953125, "loss": 0.3246, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2098839282989502, "rewards/margins": 2.05537486076355, "rewards/margins_max": 2.98439359664917, "rewards/margins_min": 1.1263563632965088, "rewards/margins_std": 1.313830852508545, "rewards/rejected": -3.2652587890625, "step": 500 }, { "epoch": 0.73, "eval_logits/chosen": -1.886383295059204, "eval_logits/rejected": -1.84441077709198, "eval_logps/chosen": -513.3748168945312, "eval_logps/rejected": -561.1234130859375, "eval_loss": 0.6025983691215515, "eval_rewards/accuracies": 0.6626983880996704, "eval_rewards/chosen": -2.281536102294922, "eval_rewards/margins": 0.7378710508346558, "eval_rewards/margins_max": 2.2879276275634766, "eval_rewards/margins_min": -0.7821336388587952, "eval_rewards/margins_std": 1.3716031312942505, "eval_rewards/rejected": -3.019407033920288, "eval_runtime": 283.5562, "eval_samples_per_second": 7.053, "eval_steps_per_second": 0.222, "step": 500 }, { "epoch": 0.75, "grad_norm": 17.08831198492527, "learning_rate": 9.24219131163705e-07, "logits/chosen": -1.7034380435943604, "logits/rejected": -1.677706003189087, "logps/chosen": -501.74334716796875, "logps/rejected": -674.4254150390625, "loss": 0.3481, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.918776273727417, "rewards/margins": 1.897600531578064, "rewards/margins_max": 3.511080265045166, "rewards/margins_min": 0.284121036529541, "rewards/margins_std": 2.2818045616149902, "rewards/rejected": -3.8163769245147705, "step": 510 }, { "epoch": 0.76, "grad_norm": 3.7704796107061735, "learning_rate": 8.271734841028553e-07, "logits/chosen": -1.5852091312408447, "logits/rejected": -1.471635103225708, "logps/chosen": -462.9111328125, "logps/rejected": -592.2215576171875, "loss": 0.3213, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.484692096710205, "rewards/margins": 2.1865782737731934, "rewards/margins_max": 2.6932501792907715, "rewards/margins_min": 1.6799061298370361, "rewards/margins_std": 0.7165425419807434, "rewards/rejected": -3.6712703704833984, "step": 520 }, { "epoch": 0.77, "grad_norm": 5.607658272294071, "learning_rate": 7.344920504212244e-07, "logits/chosen": -1.838727593421936, "logits/rejected": -1.8044246435165405, "logps/chosen": -373.3072204589844, "logps/rejected": -488.6890563964844, "loss": 0.36, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.4621307849884033, "rewards/margins": 1.4751158952713013, "rewards/margins_max": 2.411809206008911, "rewards/margins_min": 0.5384225845336914, "rewards/margins_std": 1.3246843814849854, "rewards/rejected": -2.937246799468994, "step": 530 }, { "epoch": 0.79, "grad_norm": 10.180442565411226, "learning_rate": 6.464166253970672e-07, "logits/chosen": -1.888649344444275, "logits/rejected": -1.9107511043548584, "logps/chosen": -483.1812438964844, "logps/rejected": -572.9668579101562, "loss": 0.3483, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.5701286792755127, "rewards/margins": 1.3985799551010132, "rewards/margins_max": 2.0270140171051025, "rewards/margins_min": 0.7701458930969238, "rewards/margins_std": 0.8887398838996887, "rewards/rejected": -2.9687085151672363, "step": 540 }, { "epoch": 0.8, "grad_norm": 7.067320132481652, "learning_rate": 5.631769877579535e-07, "logits/chosen": -1.9241430759429932, "logits/rejected": -1.8503801822662354, "logps/chosen": -391.8656311035156, "logps/rejected": -515.9195556640625, "loss": 0.3546, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.404034972190857, "rewards/margins": 1.4414308071136475, "rewards/margins_max": 2.243879556655884, "rewards/margins_min": 0.6389821171760559, "rewards/margins_std": 1.1348340511322021, "rewards/rejected": -2.845465898513794, "step": 550 }, { "epoch": 0.82, "grad_norm": 23.109650997760248, "learning_rate": 4.849903002143114e-07, "logits/chosen": -2.134927988052368, "logits/rejected": -2.0764718055725098, "logps/chosen": -540.7327880859375, "logps/rejected": -669.7936401367188, "loss": 0.3152, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.6844943761825562, "rewards/margins": 1.754024863243103, "rewards/margins_max": 2.7484121322631836, "rewards/margins_min": 0.7596377730369568, "rewards/margins_std": 1.4062758684158325, "rewards/rejected": -3.438519239425659, "step": 560 }, { "epoch": 0.83, "grad_norm": 14.65033008607176, "learning_rate": 4.1206054290670537e-07, "logits/chosen": -1.8871490955352783, "logits/rejected": -1.8968530893325806, "logps/chosen": -413.68682861328125, "logps/rejected": -668.4096069335938, "loss": 0.3224, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.614575743675232, "rewards/margins": 2.1032443046569824, "rewards/margins_max": 3.0623726844787598, "rewards/margins_min": 1.144116759300232, "rewards/margins_std": 1.3564116954803467, "rewards/rejected": -3.717820405960083, "step": 570 }, { "epoch": 0.85, "grad_norm": 7.593895176885149, "learning_rate": 3.44577981244944e-07, "logits/chosen": -1.9477647542953491, "logits/rejected": -1.9660476446151733, "logps/chosen": -438.8433532714844, "logps/rejected": -546.8043212890625, "loss": 0.4149, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9253628253936768, "rewards/margins": 1.1086914539337158, "rewards/margins_max": 2.1370372772216797, "rewards/margins_min": 0.08034573495388031, "rewards/margins_std": 1.4543002843856812, "rewards/rejected": -3.0340542793273926, "step": 580 }, { "epoch": 0.86, "grad_norm": 5.844449740859913, "learning_rate": 2.827186695273482e-07, "logits/chosen": -2.1650023460388184, "logits/rejected": -2.0401604175567627, "logps/chosen": -522.0162963867188, "logps/rejected": -637.875732421875, "loss": 0.3966, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.199137806892395, "rewards/margins": 1.8260653018951416, "rewards/margins_max": 3.141328811645508, "rewards/margins_min": 0.5108016729354858, "rewards/margins_std": 1.8600635528564453, "rewards/rejected": -3.025203227996826, "step": 590 }, { "epoch": 0.88, "grad_norm": 8.653551252508137, "learning_rate": 2.2664399163518786e-07, "logits/chosen": -1.9303522109985352, "logits/rejected": -1.865822434425354, "logps/chosen": -474.86810302734375, "logps/rejected": -598.7958374023438, "loss": 0.2747, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5423686504364014, "rewards/margins": 2.11772084236145, "rewards/margins_max": 3.12973690032959, "rewards/margins_min": 1.1057052612304688, "rewards/margins_std": 1.431206464767456, "rewards/rejected": -3.6600890159606934, "step": 600 }, { "epoch": 0.88, "eval_logits/chosen": -1.8380076885223389, "eval_logits/rejected": -1.7933586835861206, "eval_logps/chosen": -502.5606994628906, "eval_logps/rejected": -556.8073120117188, "eval_loss": 0.5972898602485657, "eval_rewards/accuracies": 0.6785714030265808, "eval_rewards/chosen": -2.1733951568603516, "eval_rewards/margins": 0.8028514385223389, "eval_rewards/margins_max": 2.427276134490967, "eval_rewards/margins_min": -0.751455545425415, "eval_rewards/margins_std": 1.4232866764068604, "eval_rewards/rejected": -2.9762465953826904, "eval_runtime": 283.2826, "eval_samples_per_second": 7.06, "eval_steps_per_second": 0.222, "step": 600 }, { "epoch": 0.89, "grad_norm": 9.369824417732737, "learning_rate": 1.7650024000056415e-07, "logits/chosen": -1.8018262386322021, "logits/rejected": -1.7738994359970093, "logps/chosen": -374.61505126953125, "logps/rejected": -558.9880981445312, "loss": 0.3286, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.5432480573654175, "rewards/margins": 1.8530250787734985, "rewards/margins_max": 2.7572684288024902, "rewards/margins_min": 0.9487819671630859, "rewards/margins_std": 1.2787930965423584, "rewards/rejected": -3.396273374557495, "step": 610 }, { "epoch": 0.91, "grad_norm": 6.630649676820421, "learning_rate": 1.324182339461544e-07, "logits/chosen": -1.8650553226470947, "logits/rejected": -1.8343786001205444, "logps/chosen": -429.2254333496094, "logps/rejected": -518.1988525390625, "loss": 0.3508, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.508697271347046, "rewards/margins": 1.630495309829712, "rewards/margins_max": 2.3903756141662598, "rewards/margins_min": 0.8706151247024536, "rewards/margins_std": 1.0746327638626099, "rewards/rejected": -3.139192581176758, "step": 620 }, { "epoch": 0.92, "grad_norm": 16.0557186788671, "learning_rate": 9.451297839253915e-08, "logits/chosen": -1.8724334239959717, "logits/rejected": -1.7563972473144531, "logps/chosen": -476.4231872558594, "logps/rejected": -727.5858154296875, "loss": 0.3078, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.4125255346298218, "rewards/margins": 2.7260119915008545, "rewards/margins_max": 3.8035919666290283, "rewards/margins_min": 1.6484321355819702, "rewards/margins_std": 1.5239282846450806, "rewards/rejected": -4.138537406921387, "step": 630 }, { "epoch": 0.94, "grad_norm": 10.546642069187087, "learning_rate": 6.288336382349463e-08, "logits/chosen": -1.8520616292953491, "logits/rejected": -1.7307716608047485, "logps/chosen": -558.7571411132812, "logps/rejected": -656.2450561523438, "loss": 0.2733, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.7490208148956299, "rewards/margins": 2.0255560874938965, "rewards/margins_max": 2.8944671154022217, "rewards/margins_min": 1.1566449403762817, "rewards/margins_std": 1.2288259267807007, "rewards/rejected": -3.7745769023895264, "step": 640 }, { "epoch": 0.95, "grad_norm": 14.096656651836478, "learning_rate": 3.761190829201067e-08, "logits/chosen": -1.8188579082489014, "logits/rejected": -1.7514938116073608, "logps/chosen": -534.6187133789062, "logps/rejected": -567.9749755859375, "loss": 0.3434, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.4054492712020874, "rewards/margins": 1.610443115234375, "rewards/margins_max": 2.8384835720062256, "rewards/margins_min": 0.38240256905555725, "rewards/margins_std": 1.7367115020751953, "rewards/rejected": -3.015892505645752, "step": 650 }, { "epoch": 0.96, "grad_norm": 8.340110871014865, "learning_rate": 1.876454214011253e-08, "logits/chosen": -1.8232501745224, "logits/rejected": -1.779158592224121, "logps/chosen": -416.4646911621094, "logps/rejected": -530.9276123046875, "loss": 0.3517, "rewards/accuracies": 0.75, "rewards/chosen": -1.5091989040374756, "rewards/margins": 1.5967410802841187, "rewards/margins_max": 2.6342692375183105, "rewards/margins_min": 0.5592130422592163, "rewards/margins_std": 1.4672863483428955, "rewards/rejected": -3.105940103530884, "step": 660 }, { "epoch": 0.98, "grad_norm": 11.832885590441766, "learning_rate": 6.390435994127753e-09, "logits/chosen": -1.7567815780639648, "logits/rejected": -1.7844308614730835, "logps/chosen": -505.89019775390625, "logps/rejected": -757.8609619140625, "loss": 0.4029, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.9832277297973633, "rewards/margins": 2.035264492034912, "rewards/margins_max": 3.0966219902038574, "rewards/margins_min": 0.9739071130752563, "rewards/margins_std": 1.500985860824585, "rewards/rejected": -4.018492221832275, "step": 670 }, { "epoch": 0.99, "grad_norm": 7.732345900580283, "learning_rate": 5.218724841346556e-10, "logits/chosen": -1.5676209926605225, "logits/rejected": -1.594948172569275, "logps/chosen": -497.2765197753906, "logps/rejected": -658.2857666015625, "loss": 0.3008, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.0746806859970093, "rewards/margins": 2.671840190887451, "rewards/margins_max": 3.8470091819763184, "rewards/margins_min": 1.4966704845428467, "rewards/margins_std": 1.661940336227417, "rewards/rejected": -3.74652099609375, "step": 680 }, { "epoch": 1.0, "step": 684, "total_flos": 0.0, "train_loss": 0.42957196057888497, "train_runtime": 6346.2002, "train_samples_per_second": 1.724, "train_steps_per_second": 0.108 } ], "logging_steps": 10, "max_steps": 684, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }