{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.989399293286219, "eval_steps": 500, "global_step": 741, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.020191822311963654, "grad_norm": 30.7789249420166, "learning_rate": 5e-06, "logits/chosen": -1.0492041110992432, "logits/rejected": -1.0134608745574951, "logps/chosen": -127.44047546386719, "logps/rejected": -147.18359375, "loss": 0.6931, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": 0.009235936217010021, "rewards/margins": 0.0008875288185663521, "rewards/rejected": 0.008348408155143261, "step": 5 }, { "epoch": 0.04038364462392731, "grad_norm": 37.41344451904297, "learning_rate": 5e-06, "logits/chosen": -1.002793312072754, "logits/rejected": -0.9411100149154663, "logps/chosen": -126.66816711425781, "logps/rejected": -159.05479431152344, "loss": 0.6969, "rewards/accuracies": 0.375, "rewards/chosen": 0.006721611134707928, "rewards/margins": -0.006841087248176336, "rewards/rejected": 0.013562696985900402, "step": 10 }, { "epoch": 0.06057546693589096, "grad_norm": 42.148170471191406, "learning_rate": 5e-06, "logits/chosen": -1.0353584289550781, "logits/rejected": -1.0312868356704712, "logps/chosen": -145.601806640625, "logps/rejected": -159.65481567382812, "loss": 0.6928, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": 0.02560463175177574, "rewards/margins": 0.0021010604687035084, "rewards/rejected": 0.023503568023443222, "step": 15 }, { "epoch": 0.08076728924785462, "grad_norm": 58.297794342041016, "learning_rate": 5e-06, "logits/chosen": -1.053727388381958, "logits/rejected": -1.0229905843734741, "logps/chosen": -121.37943267822266, "logps/rejected": -130.5764923095703, "loss": 0.6857, "rewards/accuracies": 0.574999988079071, "rewards/chosen": 0.06899537146091461, "rewards/margins": 0.015979710966348648, "rewards/rejected": 0.053015656769275665, "step": 20 }, { "epoch": 0.10095911155981828, "grad_norm": 158.1420440673828, "learning_rate": 5e-06, "logits/chosen": -1.097388744354248, "logits/rejected": -1.0624990463256836, "logps/chosen": -126.20062255859375, "logps/rejected": -140.00369262695312, "loss": 0.6968, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": 0.040822066366672516, "rewards/margins": -0.004479425959289074, "rewards/rejected": 0.04530149698257446, "step": 25 }, { "epoch": 0.12115093387178193, "grad_norm": 111.64271545410156, "learning_rate": 5e-06, "logits/chosen": -0.9921103715896606, "logits/rejected": -0.9481407999992371, "logps/chosen": -133.66769409179688, "logps/rejected": -149.52188110351562, "loss": 0.7066, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.17871007323265076, "rewards/margins": -0.001442834734916687, "rewards/rejected": -0.17726723849773407, "step": 30 }, { "epoch": 0.1413427561837456, "grad_norm": 35.10946273803711, "learning_rate": 5e-06, "logits/chosen": -0.9996519088745117, "logits/rejected": -1.0022809505462646, "logps/chosen": -142.79452514648438, "logps/rejected": -142.8091583251953, "loss": 0.6933, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.1559412181377411, "rewards/margins": 0.018214434385299683, "rewards/rejected": -0.17415565252304077, "step": 35 }, { "epoch": 0.16153457849570924, "grad_norm": 154.33160400390625, "learning_rate": 5e-06, "logits/chosen": -1.0090668201446533, "logits/rejected": -0.9780625104904175, "logps/chosen": -124.8656234741211, "logps/rejected": -151.53759765625, "loss": 0.6991, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.0610191710293293, "rewards/margins": 0.001720429165288806, "rewards/rejected": -0.06273959577083588, "step": 40 }, { "epoch": 0.18172640080767288, "grad_norm": 66.54533386230469, "learning_rate": 5e-06, "logits/chosen": -1.0511558055877686, "logits/rejected": -0.9915176630020142, "logps/chosen": -125.61368560791016, "logps/rejected": -140.61505126953125, "loss": 0.6762, "rewards/accuracies": 0.5, "rewards/chosen": -0.11788681894540787, "rewards/margins": 0.049356214702129364, "rewards/rejected": -0.16724303364753723, "step": 45 }, { "epoch": 0.20191822311963656, "grad_norm": 133.6022491455078, "learning_rate": 5e-06, "logits/chosen": -1.0434271097183228, "logits/rejected": -1.0047496557235718, "logps/chosen": -126.78190612792969, "logps/rejected": -138.11805725097656, "loss": 0.7026, "rewards/accuracies": 0.375, "rewards/chosen": -0.2226967066526413, "rewards/margins": 0.006922038737684488, "rewards/rejected": -0.22961874306201935, "step": 50 }, { "epoch": 0.2221100454316002, "grad_norm": 61.76374053955078, "learning_rate": 5e-06, "logits/chosen": -1.026590347290039, "logits/rejected": -1.0285896062850952, "logps/chosen": -126.12849426269531, "logps/rejected": -142.56619262695312, "loss": 0.6729, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.26916319131851196, "rewards/margins": 0.06655414402484894, "rewards/rejected": -0.3357173502445221, "step": 55 }, { "epoch": 0.24230186774356385, "grad_norm": 61.80793380737305, "learning_rate": 5e-06, "logits/chosen": -1.0803353786468506, "logits/rejected": -1.0326142311096191, "logps/chosen": -126.38145446777344, "logps/rejected": -143.6183319091797, "loss": 0.6796, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.36116331815719604, "rewards/margins": 0.07047157734632492, "rewards/rejected": -0.43163490295410156, "step": 60 }, { "epoch": 0.26249369005552753, "grad_norm": 63.28056716918945, "learning_rate": 5e-06, "logits/chosen": -0.9822731018066406, "logits/rejected": -0.9656317830085754, "logps/chosen": -127.26124572753906, "logps/rejected": -142.61508178710938, "loss": 0.6992, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.36886388063430786, "rewards/margins": 0.059492867439985275, "rewards/rejected": -0.42835673689842224, "step": 65 }, { "epoch": 0.2826855123674912, "grad_norm": 40.66404724121094, "learning_rate": 5e-06, "logits/chosen": -1.0611032247543335, "logits/rejected": -0.9986160397529602, "logps/chosen": -119.45987701416016, "logps/rejected": -137.6642303466797, "loss": 0.635, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.24067869782447815, "rewards/margins": 0.19307085871696472, "rewards/rejected": -0.4337495267391205, "step": 70 }, { "epoch": 0.3028773346794548, "grad_norm": 141.08375549316406, "learning_rate": 5e-06, "logits/chosen": -1.0468595027923584, "logits/rejected": -1.0398485660552979, "logps/chosen": -123.22066497802734, "logps/rejected": -137.7001190185547, "loss": 0.6822, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.32747992873191833, "rewards/margins": 0.04724354296922684, "rewards/rejected": -0.37472349405288696, "step": 75 }, { "epoch": 0.32306915699141847, "grad_norm": 76.24655151367188, "learning_rate": 5e-06, "logits/chosen": -1.0108659267425537, "logits/rejected": -0.9696664810180664, "logps/chosen": -132.04534912109375, "logps/rejected": -134.4063262939453, "loss": 0.7582, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.3479716181755066, "rewards/margins": -0.06882990151643753, "rewards/rejected": -0.27914175391197205, "step": 80 }, { "epoch": 0.3432609793033821, "grad_norm": 294.6365661621094, "learning_rate": 5e-06, "logits/chosen": -1.0133106708526611, "logits/rejected": -1.0042521953582764, "logps/chosen": -134.86880493164062, "logps/rejected": -145.12660217285156, "loss": 0.7659, "rewards/accuracies": 0.5, "rewards/chosen": -0.3877476453781128, "rewards/margins": -0.0979350134730339, "rewards/rejected": -0.2898126542568207, "step": 85 }, { "epoch": 0.36345280161534577, "grad_norm": 48.36005783081055, "learning_rate": 5e-06, "logits/chosen": -1.0785777568817139, "logits/rejected": -1.0150744915008545, "logps/chosen": -125.0882339477539, "logps/rejected": -134.69866943359375, "loss": 0.7113, "rewards/accuracies": 0.5, "rewards/chosen": -0.36325889825820923, "rewards/margins": 0.047263432294130325, "rewards/rejected": -0.41052237153053284, "step": 90 }, { "epoch": 0.3836446239273094, "grad_norm": 134.56393432617188, "learning_rate": 5e-06, "logits/chosen": -1.0220086574554443, "logits/rejected": -1.0256017446517944, "logps/chosen": -136.06663513183594, "logps/rejected": -141.03758239746094, "loss": 0.7582, "rewards/accuracies": 0.5, "rewards/chosen": -0.5725785493850708, "rewards/margins": -0.02296329475939274, "rewards/rejected": -0.549615204334259, "step": 95 }, { "epoch": 0.4038364462392731, "grad_norm": 192.60671997070312, "learning_rate": 5e-06, "logits/chosen": -1.029658555984497, "logits/rejected": -1.015587568283081, "logps/chosen": -135.1102752685547, "logps/rejected": -132.73965454101562, "loss": 0.7576, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.6215102076530457, "rewards/margins": -0.022445138543844223, "rewards/rejected": -0.5990650653839111, "step": 100 }, { "epoch": 0.42402826855123676, "grad_norm": 42.97038269042969, "learning_rate": 5e-06, "logits/chosen": -1.01835036277771, "logits/rejected": -0.9691454768180847, "logps/chosen": -127.7636489868164, "logps/rejected": -131.01290893554688, "loss": 0.8087, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.43060892820358276, "rewards/margins": -0.1305285394191742, "rewards/rejected": -0.30008038878440857, "step": 105 }, { "epoch": 0.4442200908632004, "grad_norm": 73.89049530029297, "learning_rate": 5e-06, "logits/chosen": -1.0535072088241577, "logits/rejected": -1.0358234643936157, "logps/chosen": -131.6114959716797, "logps/rejected": -148.74351501464844, "loss": 0.6739, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.319467693567276, "rewards/margins": 0.11042511463165283, "rewards/rejected": -0.4298928380012512, "step": 110 }, { "epoch": 0.46441191317516406, "grad_norm": 261.6590881347656, "learning_rate": 5e-06, "logits/chosen": -1.039872407913208, "logits/rejected": -1.061729073524475, "logps/chosen": -133.2412567138672, "logps/rejected": -141.62680053710938, "loss": 0.8194, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.5433087348937988, "rewards/margins": -0.15157613158226013, "rewards/rejected": -0.3917326033115387, "step": 115 }, { "epoch": 0.4846037354871277, "grad_norm": 153.96530151367188, "learning_rate": 5e-06, "logits/chosen": -1.0134645700454712, "logits/rejected": -1.0050514936447144, "logps/chosen": -140.56578063964844, "logps/rejected": -146.6461944580078, "loss": 0.7222, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.4193994104862213, "rewards/margins": -0.014027280732989311, "rewards/rejected": -0.40537214279174805, "step": 120 }, { "epoch": 0.5047955577990914, "grad_norm": 92.5348129272461, "learning_rate": 5e-06, "logits/chosen": -1.0472946166992188, "logits/rejected": -1.0149815082550049, "logps/chosen": -126.81514739990234, "logps/rejected": -138.25100708007812, "loss": 0.641, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.2987712025642395, "rewards/margins": 0.14457064867019653, "rewards/rejected": -0.44334182143211365, "step": 125 }, { "epoch": 0.5249873801110551, "grad_norm": 69.95176696777344, "learning_rate": 5e-06, "logits/chosen": -0.9726712107658386, "logits/rejected": -0.9396511316299438, "logps/chosen": -134.06784057617188, "logps/rejected": -149.26393127441406, "loss": 0.6874, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.4227423071861267, "rewards/margins": 0.07707767188549042, "rewards/rejected": -0.4998199939727783, "step": 130 }, { "epoch": 0.5451792024230186, "grad_norm": 59.02754592895508, "learning_rate": 5e-06, "logits/chosen": -1.0336484909057617, "logits/rejected": -1.0117156505584717, "logps/chosen": -123.12812805175781, "logps/rejected": -138.68785095214844, "loss": 0.6972, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.2850368916988373, "rewards/margins": 0.08022015541791916, "rewards/rejected": -0.3652570843696594, "step": 135 }, { "epoch": 0.5653710247349824, "grad_norm": 164.89817810058594, "learning_rate": 5e-06, "logits/chosen": -0.9582400321960449, "logits/rejected": -0.9342845678329468, "logps/chosen": -136.7896270751953, "logps/rejected": -144.94981384277344, "loss": 0.8658, "rewards/accuracies": 0.5, "rewards/chosen": -0.5358542799949646, "rewards/margins": -0.13312572240829468, "rewards/rejected": -0.40272849798202515, "step": 140 }, { "epoch": 0.585562847046946, "grad_norm": 273.0749816894531, "learning_rate": 5e-06, "logits/chosen": -1.0799394845962524, "logits/rejected": -1.0522867441177368, "logps/chosen": -133.341796875, "logps/rejected": -137.64601135253906, "loss": 0.7, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.5455681681632996, "rewards/margins": 0.1389351338148117, "rewards/rejected": -0.6845033168792725, "step": 145 }, { "epoch": 0.6057546693589096, "grad_norm": 58.67772674560547, "learning_rate": 5e-06, "logits/chosen": -1.0716097354888916, "logits/rejected": -1.0067239999771118, "logps/chosen": -131.79644775390625, "logps/rejected": -145.97897338867188, "loss": 0.6711, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.48813509941101074, "rewards/margins": 0.15445764362812042, "rewards/rejected": -0.64259272813797, "step": 150 }, { "epoch": 0.6259464916708734, "grad_norm": 153.71763610839844, "learning_rate": 5e-06, "logits/chosen": -0.9819652438163757, "logits/rejected": -0.975143551826477, "logps/chosen": -131.63906860351562, "logps/rejected": -136.08070373535156, "loss": 0.6992, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.511472761631012, "rewards/margins": 0.09871827065944672, "rewards/rejected": -0.6101909875869751, "step": 155 }, { "epoch": 0.6461383139828369, "grad_norm": 140.51803588867188, "learning_rate": 5e-06, "logits/chosen": -0.9384062886238098, "logits/rejected": -0.9456030130386353, "logps/chosen": -137.96107482910156, "logps/rejected": -148.41708374023438, "loss": 0.6837, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.5771997570991516, "rewards/margins": 0.1888493001461029, "rewards/rejected": -0.7660490274429321, "step": 160 }, { "epoch": 0.6663301362948006, "grad_norm": 115.60352325439453, "learning_rate": 5e-06, "logits/chosen": -1.032671332359314, "logits/rejected": -1.032019853591919, "logps/chosen": -136.2175750732422, "logps/rejected": -139.32192993164062, "loss": 0.7872, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.6546095609664917, "rewards/margins": -0.08018063008785248, "rewards/rejected": -0.5744289755821228, "step": 165 }, { "epoch": 0.6865219586067642, "grad_norm": 79.54869079589844, "learning_rate": 5e-06, "logits/chosen": -1.0461210012435913, "logits/rejected": -1.001206398010254, "logps/chosen": -133.0095672607422, "logps/rejected": -142.75643920898438, "loss": 0.6265, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.38921013474464417, "rewards/margins": 0.21428516507148743, "rewards/rejected": -0.6034952998161316, "step": 170 }, { "epoch": 0.7067137809187279, "grad_norm": 124.25992584228516, "learning_rate": 5e-06, "logits/chosen": -1.0024333000183105, "logits/rejected": -1.0128402709960938, "logps/chosen": -134.9090576171875, "logps/rejected": -141.35768127441406, "loss": 0.6573, "rewards/accuracies": 0.625, "rewards/chosen": -0.43251633644104004, "rewards/margins": 0.22649070620536804, "rewards/rejected": -0.6590070128440857, "step": 175 }, { "epoch": 0.7269056032306915, "grad_norm": 90.11141204833984, "learning_rate": 5e-06, "logits/chosen": -1.0318266153335571, "logits/rejected": -0.9781469106674194, "logps/chosen": -128.78518676757812, "logps/rejected": -147.6506805419922, "loss": 0.7165, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.4871068596839905, "rewards/margins": 0.07786975055932999, "rewards/rejected": -0.5649765729904175, "step": 180 }, { "epoch": 0.7470974255426552, "grad_norm": 199.0548858642578, "learning_rate": 5e-06, "logits/chosen": -0.984387993812561, "logits/rejected": -0.9830384254455566, "logps/chosen": -132.85769653320312, "logps/rejected": -133.11199951171875, "loss": 0.8698, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.5072764158248901, "rewards/margins": -0.2219184935092926, "rewards/rejected": -0.28535789251327515, "step": 185 }, { "epoch": 0.7672892478546188, "grad_norm": 260.7405090332031, "learning_rate": 5e-06, "logits/chosen": -1.0293523073196411, "logits/rejected": -1.0172580480575562, "logps/chosen": -126.0751724243164, "logps/rejected": -141.263671875, "loss": 0.6471, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.25228002667427063, "rewards/margins": 0.1569499969482422, "rewards/rejected": -0.4092300534248352, "step": 190 }, { "epoch": 0.7874810701665825, "grad_norm": 41.43046569824219, "learning_rate": 5e-06, "logits/chosen": -1.071702480316162, "logits/rejected": -1.0639045238494873, "logps/chosen": -124.64006042480469, "logps/rejected": -129.1591796875, "loss": 0.6881, "rewards/accuracies": 0.5, "rewards/chosen": -0.2144135981798172, "rewards/margins": 0.07804916799068451, "rewards/rejected": -0.2924627363681793, "step": 195 }, { "epoch": 0.8076728924785462, "grad_norm": 35.1358528137207, "learning_rate": 5e-06, "logits/chosen": -0.9687834978103638, "logits/rejected": -0.954856276512146, "logps/chosen": -120.9579849243164, "logps/rejected": -130.809814453125, "loss": 0.7126, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.27170878648757935, "rewards/margins": 0.005672874394804239, "rewards/rejected": -0.27738165855407715, "step": 200 }, { "epoch": 0.8278647147905098, "grad_norm": 27.78827667236328, "learning_rate": 5e-06, "logits/chosen": -1.048870325088501, "logits/rejected": -1.0687897205352783, "logps/chosen": -128.82345581054688, "logps/rejected": -132.6963348388672, "loss": 0.7058, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.2189714014530182, "rewards/margins": 0.05474821850657463, "rewards/rejected": -0.2737196087837219, "step": 205 }, { "epoch": 0.8480565371024735, "grad_norm": 101.58830261230469, "learning_rate": 5e-06, "logits/chosen": -1.0325753688812256, "logits/rejected": -1.0432530641555786, "logps/chosen": -126.06434631347656, "logps/rejected": -136.6858673095703, "loss": 0.7424, "rewards/accuracies": 0.42500001192092896, "rewards/chosen": -0.38726571202278137, "rewards/margins": -0.062184691429138184, "rewards/rejected": -0.3250810503959656, "step": 210 }, { "epoch": 0.8682483594144371, "grad_norm": 145.8494110107422, "learning_rate": 5e-06, "logits/chosen": -1.0436766147613525, "logits/rejected": -1.001501441001892, "logps/chosen": -128.81761169433594, "logps/rejected": -146.7082977294922, "loss": 0.649, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.27072566747665405, "rewards/margins": 0.1860719621181488, "rewards/rejected": -0.45679759979248047, "step": 215 }, { "epoch": 0.8884401817264008, "grad_norm": 122.89201354980469, "learning_rate": 5e-06, "logits/chosen": -0.9502097964286804, "logits/rejected": -0.9250322580337524, "logps/chosen": -136.70542907714844, "logps/rejected": -145.50021362304688, "loss": 0.7171, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4370450973510742, "rewards/margins": 0.014479175209999084, "rewards/rejected": -0.4515243172645569, "step": 220 }, { "epoch": 0.9086320040383644, "grad_norm": 136.3062286376953, "learning_rate": 5e-06, "logits/chosen": -1.0122795104980469, "logits/rejected": -0.976104736328125, "logps/chosen": -130.60975646972656, "logps/rejected": -140.47096252441406, "loss": 0.7608, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.3417353332042694, "rewards/margins": -0.07229948043823242, "rewards/rejected": -0.269435852766037, "step": 225 }, { "epoch": 0.9288238263503281, "grad_norm": 274.8150329589844, "learning_rate": 5e-06, "logits/chosen": -0.9934455156326294, "logits/rejected": -0.9960210919380188, "logps/chosen": -138.07879638671875, "logps/rejected": -148.25611877441406, "loss": 0.7518, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -0.3778955340385437, "rewards/margins": -0.06377332657575607, "rewards/rejected": -0.3141222298145294, "step": 230 }, { "epoch": 0.9490156486622918, "grad_norm": 362.4608154296875, "learning_rate": 5e-06, "logits/chosen": -1.028414249420166, "logits/rejected": -1.0202635526657104, "logps/chosen": -126.71832275390625, "logps/rejected": -131.26434326171875, "loss": 0.7528, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.10429117828607559, "rewards/margins": -0.09285052120685577, "rewards/rejected": -0.011440658941864967, "step": 235 }, { "epoch": 0.9692074709742554, "grad_norm": 200.3191375732422, "learning_rate": 5e-06, "logits/chosen": -1.0229647159576416, "logits/rejected": -0.9428025484085083, "logps/chosen": -126.55399322509766, "logps/rejected": -151.26348876953125, "loss": 0.6815, "rewards/accuracies": 0.550000011920929, "rewards/chosen": 0.17346206307411194, "rewards/margins": 0.028347322717308998, "rewards/rejected": 0.1451147496700287, "step": 240 }, { "epoch": 0.9893992932862191, "grad_norm": 34.3711051940918, "learning_rate": 5e-06, "logits/chosen": -1.0116575956344604, "logits/rejected": -1.0087472200393677, "logps/chosen": -122.85481262207031, "logps/rejected": -133.4599609375, "loss": 0.6786, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.08724311739206314, "rewards/margins": 0.0308841522783041, "rewards/rejected": 0.05635897070169449, "step": 245 }, { "epoch": 1.0080767289247854, "grad_norm": 27.31838607788086, "learning_rate": 5e-06, "logits/chosen": -1.0463143587112427, "logits/rejected": -0.9973466396331787, "logps/chosen": -124.2419204711914, "logps/rejected": -139.740966796875, "loss": 0.6114, "rewards/accuracies": 0.5945945978164673, "rewards/chosen": 0.005640762392431498, "rewards/margins": 0.07190810143947601, "rewards/rejected": -0.06626733392477036, "step": 250 }, { "epoch": 1.028268551236749, "grad_norm": 36.600242614746094, "learning_rate": 5e-06, "logits/chosen": -0.9850190877914429, "logits/rejected": -0.9895715713500977, "logps/chosen": -117.77237701416016, "logps/rejected": -139.15866088867188, "loss": 0.623, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.06336753815412521, "rewards/margins": 0.15559127926826477, "rewards/rejected": -0.09222372621297836, "step": 255 }, { "epoch": 1.0484603735487128, "grad_norm": 25.334484100341797, "learning_rate": 5e-06, "logits/chosen": -0.9737665057182312, "logits/rejected": -0.9691070318222046, "logps/chosen": -125.85533142089844, "logps/rejected": -142.1818084716797, "loss": 0.6441, "rewards/accuracies": 0.75, "rewards/chosen": 0.0201385710388422, "rewards/margins": 0.10802127420902252, "rewards/rejected": -0.08788268268108368, "step": 260 }, { "epoch": 1.0686521958606765, "grad_norm": 170.07984924316406, "learning_rate": 5e-06, "logits/chosen": -0.9972125887870789, "logits/rejected": -0.9916419982910156, "logps/chosen": -128.32261657714844, "logps/rejected": -154.72247314453125, "loss": 0.6211, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.003512249793857336, "rewards/margins": 0.1796935498714447, "rewards/rejected": -0.17618130147457123, "step": 265 }, { "epoch": 1.08884401817264, "grad_norm": 82.89457702636719, "learning_rate": 5e-06, "logits/chosen": -0.9838020205497742, "logits/rejected": -0.9610651135444641, "logps/chosen": -133.44190979003906, "logps/rejected": -135.03680419921875, "loss": 0.6226, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.2014409303665161, "rewards/margins": 0.15444937348365784, "rewards/rejected": 0.04699156433343887, "step": 270 }, { "epoch": 1.1090358404846037, "grad_norm": 256.55047607421875, "learning_rate": 5e-06, "logits/chosen": -1.0722827911376953, "logits/rejected": -1.0403627157211304, "logps/chosen": -129.58506774902344, "logps/rejected": -135.889892578125, "loss": 0.6594, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.1928495317697525, "rewards/margins": 0.09002382308244705, "rewards/rejected": 0.10282570123672485, "step": 275 }, { "epoch": 1.1292276627965674, "grad_norm": 157.06471252441406, "learning_rate": 5e-06, "logits/chosen": -1.0328292846679688, "logits/rejected": -1.0051934719085693, "logps/chosen": -126.65974426269531, "logps/rejected": -137.65618896484375, "loss": 0.6243, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.24083271622657776, "rewards/margins": 0.17111970484256744, "rewards/rejected": 0.06971298158168793, "step": 280 }, { "epoch": 1.149419485108531, "grad_norm": 318.4048767089844, "learning_rate": 5e-06, "logits/chosen": -1.0038608312606812, "logits/rejected": -0.9795931577682495, "logps/chosen": -119.45623779296875, "logps/rejected": -147.02752685546875, "loss": 0.5933, "rewards/accuracies": 0.75, "rewards/chosen": 0.10432057082653046, "rewards/margins": 0.235098198056221, "rewards/rejected": -0.13077762722969055, "step": 285 }, { "epoch": 1.1696113074204948, "grad_norm": 111.46747589111328, "learning_rate": 5e-06, "logits/chosen": -1.0481410026550293, "logits/rejected": -1.0197895765304565, "logps/chosen": -132.78433227539062, "logps/rejected": -139.17845153808594, "loss": 0.615, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.008489435538649559, "rewards/margins": 0.21404604613780975, "rewards/rejected": -0.22253546118736267, "step": 290 }, { "epoch": 1.1898031297324583, "grad_norm": 118.42349243164062, "learning_rate": 5e-06, "logits/chosen": -0.9705491065979004, "logits/rejected": -0.969982922077179, "logps/chosen": -127.90840911865234, "logps/rejected": -132.5115509033203, "loss": 0.6599, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07771094888448715, "rewards/margins": 0.09617092460393906, "rewards/rejected": -0.1738818734884262, "step": 295 }, { "epoch": 1.209994952044422, "grad_norm": 28.792146682739258, "learning_rate": 5e-06, "logits/chosen": -1.0062731504440308, "logits/rejected": -1.0046422481536865, "logps/chosen": -122.89021301269531, "logps/rejected": -128.922119140625, "loss": 0.6536, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.21470877528190613, "rewards/margins": 0.10167400538921356, "rewards/rejected": -0.31638282537460327, "step": 300 }, { "epoch": 1.2301867743563857, "grad_norm": 30.531999588012695, "learning_rate": 5e-06, "logits/chosen": -1.0686832666397095, "logits/rejected": -0.9891031384468079, "logps/chosen": -131.0304718017578, "logps/rejected": -155.66464233398438, "loss": 0.6276, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.22880509495735168, "rewards/margins": 0.17476414144039154, "rewards/rejected": -0.40356922149658203, "step": 305 }, { "epoch": 1.2503785966683494, "grad_norm": 30.640419006347656, "learning_rate": 5e-06, "logits/chosen": -1.0535386800765991, "logits/rejected": -1.0263631343841553, "logps/chosen": -131.8798828125, "logps/rejected": -134.03146362304688, "loss": 0.6779, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.29857125878334045, "rewards/margins": 0.08670302480459213, "rewards/rejected": -0.3852742612361908, "step": 310 }, { "epoch": 1.2705704189803129, "grad_norm": 66.49352264404297, "learning_rate": 5e-06, "logits/chosen": -1.0025132894515991, "logits/rejected": -1.0053356885910034, "logps/chosen": -141.34634399414062, "logps/rejected": -155.73513793945312, "loss": 0.5846, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.12694263458251953, "rewards/margins": 0.32664644718170166, "rewards/rejected": -0.4535890519618988, "step": 315 }, { "epoch": 1.2907622412922766, "grad_norm": 426.6810302734375, "learning_rate": 5e-06, "logits/chosen": -0.9757963418960571, "logits/rejected": -0.971258819103241, "logps/chosen": -133.66746520996094, "logps/rejected": -142.61044311523438, "loss": 0.6566, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.2488606721162796, "rewards/margins": 0.13673368096351624, "rewards/rejected": -0.38559430837631226, "step": 320 }, { "epoch": 1.3109540636042403, "grad_norm": 24.37104034423828, "learning_rate": 5e-06, "logits/chosen": -1.0103740692138672, "logits/rejected": -0.9810827374458313, "logps/chosen": -125.60025787353516, "logps/rejected": -135.81178283691406, "loss": 0.6212, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.18190403282642365, "rewards/margins": 0.23106291890144348, "rewards/rejected": -0.4129669666290283, "step": 325 }, { "epoch": 1.331145885916204, "grad_norm": 25.101844787597656, "learning_rate": 5e-06, "logits/chosen": -0.9837746620178223, "logits/rejected": -0.9348167181015015, "logps/chosen": -123.2785415649414, "logps/rejected": -138.2392578125, "loss": 0.608, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.1881856620311737, "rewards/margins": 0.23057310283184052, "rewards/rejected": -0.4187587797641754, "step": 330 }, { "epoch": 1.3513377082281677, "grad_norm": 33.728919982910156, "learning_rate": 5e-06, "logits/chosen": -0.9812191724777222, "logits/rejected": -0.960713267326355, "logps/chosen": -135.54893493652344, "logps/rejected": -147.19290161132812, "loss": 0.6661, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.39554575085639954, "rewards/margins": 0.18837305903434753, "rewards/rejected": -0.5839187502861023, "step": 335 }, { "epoch": 1.3715295305401312, "grad_norm": 47.88369369506836, "learning_rate": 5e-06, "logits/chosen": -0.9874971508979797, "logits/rejected": -0.9519085884094238, "logps/chosen": -128.935791015625, "logps/rejected": -149.05130004882812, "loss": 0.5798, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3125215470790863, "rewards/margins": 0.3500240445137024, "rewards/rejected": -0.6625455617904663, "step": 340 }, { "epoch": 1.3917213528520949, "grad_norm": 51.914127349853516, "learning_rate": 5e-06, "logits/chosen": -1.0254395008087158, "logits/rejected": -0.9828994870185852, "logps/chosen": -131.8434295654297, "logps/rejected": -147.67422485351562, "loss": 0.5951, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.3478095829486847, "rewards/margins": 0.28336477279663086, "rewards/rejected": -0.6311743855476379, "step": 345 }, { "epoch": 1.4119131751640586, "grad_norm": 202.00729370117188, "learning_rate": 5e-06, "logits/chosen": -1.0301780700683594, "logits/rejected": -0.9973276257514954, "logps/chosen": -134.11044311523438, "logps/rejected": -146.69715881347656, "loss": 0.5718, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.36468935012817383, "rewards/margins": 0.3158627152442932, "rewards/rejected": -0.680552065372467, "step": 350 }, { "epoch": 1.4321049974760223, "grad_norm": 59.660091400146484, "learning_rate": 5e-06, "logits/chosen": -0.9530469179153442, "logits/rejected": -0.9394823312759399, "logps/chosen": -129.4771270751953, "logps/rejected": -139.2011260986328, "loss": 0.6579, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.3907158672809601, "rewards/margins": 0.22507838904857635, "rewards/rejected": -0.61579430103302, "step": 355 }, { "epoch": 1.452296819787986, "grad_norm": 37.25257110595703, "learning_rate": 5e-06, "logits/chosen": -0.9922634363174438, "logits/rejected": -0.960089385509491, "logps/chosen": -132.1666717529297, "logps/rejected": -146.88284301757812, "loss": 0.5824, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30794504284858704, "rewards/margins": 0.31921353936195374, "rewards/rejected": -0.627158522605896, "step": 360 }, { "epoch": 1.4724886420999495, "grad_norm": 32.722537994384766, "learning_rate": 5e-06, "logits/chosen": -0.9966095685958862, "logits/rejected": -0.990027904510498, "logps/chosen": -130.92294311523438, "logps/rejected": -150.19712829589844, "loss": 0.6163, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.5703646540641785, "rewards/margins": 0.3038683533668518, "rewards/rejected": -0.874233067035675, "step": 365 }, { "epoch": 1.4926804644119132, "grad_norm": 25.522695541381836, "learning_rate": 5e-06, "logits/chosen": -0.9670052528381348, "logits/rejected": -0.9508792757987976, "logps/chosen": -126.89973449707031, "logps/rejected": -139.86825561523438, "loss": 0.6495, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.5382200479507446, "rewards/margins": 0.1835038661956787, "rewards/rejected": -0.7217239141464233, "step": 370 }, { "epoch": 1.5128722867238769, "grad_norm": 90.71711730957031, "learning_rate": 5e-06, "logits/chosen": -0.9002586603164673, "logits/rejected": -0.8389655947685242, "logps/chosen": -139.4822235107422, "logps/rejected": -149.20494079589844, "loss": 0.6875, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.6395633816719055, "rewards/margins": 0.2545303702354431, "rewards/rejected": -0.8940938115119934, "step": 375 }, { "epoch": 1.5330641090358403, "grad_norm": 63.06035232543945, "learning_rate": 5e-06, "logits/chosen": -0.969630241394043, "logits/rejected": -0.9403184056282043, "logps/chosen": -128.59902954101562, "logps/rejected": -152.47572326660156, "loss": 0.568, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4838544726371765, "rewards/margins": 0.41895875334739685, "rewards/rejected": -0.9028133153915405, "step": 380 }, { "epoch": 1.5532559313478043, "grad_norm": 31.95730972290039, "learning_rate": 5e-06, "logits/chosen": -0.9574828147888184, "logits/rejected": -0.9362820386886597, "logps/chosen": -133.1338348388672, "logps/rejected": -137.119873046875, "loss": 0.6919, "rewards/accuracies": 0.625, "rewards/chosen": -0.6652856469154358, "rewards/margins": 0.1158902496099472, "rewards/rejected": -0.7811757922172546, "step": 385 }, { "epoch": 1.5734477536597677, "grad_norm": 82.7573471069336, "learning_rate": 5e-06, "logits/chosen": -0.9827107191085815, "logits/rejected": -0.9468755722045898, "logps/chosen": -133.9182891845703, "logps/rejected": -147.95758056640625, "loss": 0.6258, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.508975088596344, "rewards/margins": 0.27125072479248047, "rewards/rejected": -0.7802258133888245, "step": 390 }, { "epoch": 1.5936395759717314, "grad_norm": 44.66698455810547, "learning_rate": 5e-06, "logits/chosen": -0.9529491662979126, "logits/rejected": -0.9102163314819336, "logps/chosen": -127.85603332519531, "logps/rejected": -165.6941375732422, "loss": 0.5216, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.4891042709350586, "rewards/margins": 0.5509933233261108, "rewards/rejected": -1.0400975942611694, "step": 395 }, { "epoch": 1.6138313982836952, "grad_norm": 28.743886947631836, "learning_rate": 5e-06, "logits/chosen": -0.9382497072219849, "logits/rejected": -0.9725173115730286, "logps/chosen": -125.57157135009766, "logps/rejected": -139.066650390625, "loss": 0.6589, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.4889567792415619, "rewards/margins": 0.13421431183815002, "rewards/rejected": -0.6231711506843567, "step": 400 }, { "epoch": 1.6340232205956586, "grad_norm": 26.614904403686523, "learning_rate": 5e-06, "logits/chosen": -0.9904226064682007, "logits/rejected": -0.9899235963821411, "logps/chosen": -130.1407470703125, "logps/rejected": -148.65634155273438, "loss": 0.568, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.3976411819458008, "rewards/margins": 0.3590780794620514, "rewards/rejected": -0.7567192912101746, "step": 405 }, { "epoch": 1.6542150429076226, "grad_norm": 63.328731536865234, "learning_rate": 5e-06, "logits/chosen": -0.9512359499931335, "logits/rejected": -0.9244702458381653, "logps/chosen": -142.51605224609375, "logps/rejected": -146.30654907226562, "loss": 0.6478, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.611981213092804, "rewards/margins": 0.2888861298561096, "rewards/rejected": -0.9008673429489136, "step": 410 }, { "epoch": 1.674406865219586, "grad_norm": 77.01255798339844, "learning_rate": 5e-06, "logits/chosen": -0.957229495048523, "logits/rejected": -0.9442984461784363, "logps/chosen": -129.83509826660156, "logps/rejected": -140.24246215820312, "loss": 0.6499, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.3882976770401001, "rewards/margins": 0.2001279890537262, "rewards/rejected": -0.5884256958961487, "step": 415 }, { "epoch": 1.6945986875315497, "grad_norm": 64.73177337646484, "learning_rate": 5e-06, "logits/chosen": -0.9330608248710632, "logits/rejected": -0.9302207827568054, "logps/chosen": -136.36305236816406, "logps/rejected": -156.6847381591797, "loss": 0.5401, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5075548887252808, "rewards/margins": 0.4187861382961273, "rewards/rejected": -0.9263409376144409, "step": 420 }, { "epoch": 1.7147905098435134, "grad_norm": 33.53404235839844, "learning_rate": 5e-06, "logits/chosen": -0.9966014623641968, "logits/rejected": -0.9909483790397644, "logps/chosen": -131.6799774169922, "logps/rejected": -132.3755340576172, "loss": 0.613, "rewards/accuracies": 0.75, "rewards/chosen": -0.41457399725914, "rewards/margins": 0.2104615867137909, "rewards/rejected": -0.6250355839729309, "step": 425 }, { "epoch": 1.734982332155477, "grad_norm": 22.384197235107422, "learning_rate": 5e-06, "logits/chosen": -0.9789875149726868, "logits/rejected": -0.9263578653335571, "logps/chosen": -135.25543212890625, "logps/rejected": -143.0823211669922, "loss": 0.6018, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4446907639503479, "rewards/margins": 0.33062392473220825, "rewards/rejected": -0.7753147482872009, "step": 430 }, { "epoch": 1.7551741544674409, "grad_norm": 37.48057174682617, "learning_rate": 5e-06, "logits/chosen": -1.0088233947753906, "logits/rejected": -1.007187843322754, "logps/chosen": -129.21136474609375, "logps/rejected": -147.24827575683594, "loss": 0.6201, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.4158124029636383, "rewards/margins": 0.2677377462387085, "rewards/rejected": -0.683550238609314, "step": 435 }, { "epoch": 1.7753659767794043, "grad_norm": 82.85381317138672, "learning_rate": 5e-06, "logits/chosen": -0.9587488174438477, "logits/rejected": -0.9475493431091309, "logps/chosen": -129.45645141601562, "logps/rejected": -136.0018310546875, "loss": 0.6111, "rewards/accuracies": 0.625, "rewards/chosen": -0.42559027671813965, "rewards/margins": 0.2766748368740082, "rewards/rejected": -0.7022650837898254, "step": 440 }, { "epoch": 1.795557799091368, "grad_norm": 386.5701904296875, "learning_rate": 5e-06, "logits/chosen": -0.9646188616752625, "logits/rejected": -0.922524631023407, "logps/chosen": -126.2032699584961, "logps/rejected": -161.66268920898438, "loss": 0.5059, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.5310975313186646, "rewards/margins": 0.5927624702453613, "rewards/rejected": -1.1238601207733154, "step": 445 }, { "epoch": 1.8157496214033317, "grad_norm": 33.02116012573242, "learning_rate": 5e-06, "logits/chosen": -0.9980605840682983, "logits/rejected": -0.9731265902519226, "logps/chosen": -124.5853500366211, "logps/rejected": -143.31353759765625, "loss": 0.5554, "rewards/accuracies": 0.75, "rewards/chosen": -0.6029568314552307, "rewards/margins": 0.38439229130744934, "rewards/rejected": -0.9873491525650024, "step": 450 }, { "epoch": 1.8359414437152952, "grad_norm": 72.07835388183594, "learning_rate": 5e-06, "logits/chosen": -0.9654960632324219, "logits/rejected": -0.9319403767585754, "logps/chosen": -139.59352111816406, "logps/rejected": -148.42514038085938, "loss": 0.598, "rewards/accuracies": 0.75, "rewards/chosen": -0.7650180459022522, "rewards/margins": 0.3671188950538635, "rewards/rejected": -1.1321367025375366, "step": 455 }, { "epoch": 1.856133266027259, "grad_norm": 37.11824417114258, "learning_rate": 5e-06, "logits/chosen": -0.9613730311393738, "logits/rejected": -0.9452563524246216, "logps/chosen": -131.92416381835938, "logps/rejected": -146.32144165039062, "loss": 0.684, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.8234866857528687, "rewards/margins": 0.22403354942798615, "rewards/rejected": -1.0475202798843384, "step": 460 }, { "epoch": 1.8763250883392226, "grad_norm": 102.86495208740234, "learning_rate": 5e-06, "logits/chosen": -0.9701520800590515, "logits/rejected": -0.9547005891799927, "logps/chosen": -140.27639770507812, "logps/rejected": -140.81581115722656, "loss": 0.6644, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.8090511560440063, "rewards/margins": 0.21369917690753937, "rewards/rejected": -1.0227503776550293, "step": 465 }, { "epoch": 1.896516910651186, "grad_norm": 38.73020553588867, "learning_rate": 5e-06, "logits/chosen": -0.9636425971984863, "logits/rejected": -0.8940737843513489, "logps/chosen": -128.97019958496094, "logps/rejected": -144.57125854492188, "loss": 0.5636, "rewards/accuracies": 0.75, "rewards/chosen": -0.7400966882705688, "rewards/margins": 0.3483065962791443, "rewards/rejected": -1.0884032249450684, "step": 470 }, { "epoch": 1.91670873296315, "grad_norm": 50.96113204956055, "learning_rate": 5e-06, "logits/chosen": -1.0122615098953247, "logits/rejected": -0.9771004915237427, "logps/chosen": -130.7502899169922, "logps/rejected": -147.36337280273438, "loss": 0.55, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.5859622359275818, "rewards/margins": 0.4816177785396576, "rewards/rejected": -1.0675798654556274, "step": 475 }, { "epoch": 1.9369005552751135, "grad_norm": 50.336578369140625, "learning_rate": 5e-06, "logits/chosen": -1.009649395942688, "logits/rejected": -0.9977648854255676, "logps/chosen": -138.2186279296875, "logps/rejected": -146.3845977783203, "loss": 0.6772, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.772314190864563, "rewards/margins": 0.2373802661895752, "rewards/rejected": -1.0096943378448486, "step": 480 }, { "epoch": 1.9570923775870772, "grad_norm": 68.63349151611328, "learning_rate": 5e-06, "logits/chosen": -0.9773464202880859, "logits/rejected": -0.9308035969734192, "logps/chosen": -139.3035888671875, "logps/rejected": -161.8266143798828, "loss": 0.5776, "rewards/accuracies": 0.75, "rewards/chosen": -0.8935576677322388, "rewards/margins": 0.4999828338623047, "rewards/rejected": -1.393540382385254, "step": 485 }, { "epoch": 1.977284199899041, "grad_norm": 88.56826782226562, "learning_rate": 5e-06, "logits/chosen": -1.0616611242294312, "logits/rejected": -1.0203070640563965, "logps/chosen": -130.81297302246094, "logps/rejected": -141.33740234375, "loss": 0.6037, "rewards/accuracies": 0.75, "rewards/chosen": -0.8136008977890015, "rewards/margins": 0.2671222984790802, "rewards/rejected": -1.0807230472564697, "step": 490 }, { "epoch": 1.9974760222110044, "grad_norm": 126.10334777832031, "learning_rate": 5e-06, "logits/chosen": -1.0034191608428955, "logits/rejected": -1.010565161705017, "logps/chosen": -135.72381591796875, "logps/rejected": -133.8328399658203, "loss": 0.6832, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.7619636654853821, "rewards/margins": 0.17222894728183746, "rewards/rejected": -0.9341924786567688, "step": 495 }, { "epoch": 2.016153457849571, "grad_norm": 48.9210205078125, "learning_rate": 5e-06, "logits/chosen": -1.023393988609314, "logits/rejected": -0.9911313652992249, "logps/chosen": -122.88961029052734, "logps/rejected": -149.01161193847656, "loss": 0.3935, "rewards/accuracies": 0.8648648858070374, "rewards/chosen": -0.35930752754211426, "rewards/margins": 0.8153496980667114, "rewards/rejected": -1.1746571063995361, "step": 500 }, { "epoch": 2.0363452801615347, "grad_norm": 39.74098587036133, "learning_rate": 5e-06, "logits/chosen": -1.0313756465911865, "logits/rejected": -0.9942135810852051, "logps/chosen": -135.91714477539062, "logps/rejected": -147.81710815429688, "loss": 0.4991, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5435918569564819, "rewards/margins": 0.6506127715110779, "rewards/rejected": -1.194204568862915, "step": 505 }, { "epoch": 2.056537102473498, "grad_norm": 49.587120056152344, "learning_rate": 5e-06, "logits/chosen": -1.0160324573516846, "logits/rejected": -1.0125465393066406, "logps/chosen": -131.9548797607422, "logps/rejected": -152.05862426757812, "loss": 0.5047, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6925584673881531, "rewards/margins": 0.5985544919967651, "rewards/rejected": -1.2911128997802734, "step": 510 }, { "epoch": 2.0767289247854617, "grad_norm": 17.234088897705078, "learning_rate": 5e-06, "logits/chosen": -0.9918926358222961, "logits/rejected": -0.9893625378608704, "logps/chosen": -132.64453125, "logps/rejected": -158.79873657226562, "loss": 0.42, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.5537530779838562, "rewards/margins": 0.938607394695282, "rewards/rejected": -1.4923605918884277, "step": 515 }, { "epoch": 2.0969207470974256, "grad_norm": 162.84471130371094, "learning_rate": 5e-06, "logits/chosen": -1.008296012878418, "logits/rejected": -1.0098919868469238, "logps/chosen": -139.8673095703125, "logps/rejected": -160.15660095214844, "loss": 0.4181, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6410013437271118, "rewards/margins": 0.9079731106758118, "rewards/rejected": -1.5489743947982788, "step": 520 }, { "epoch": 2.117112569409389, "grad_norm": 29.040531158447266, "learning_rate": 5e-06, "logits/chosen": -1.0176864862442017, "logits/rejected": -1.007263422012329, "logps/chosen": -131.76019287109375, "logps/rejected": -149.74111938476562, "loss": 0.4928, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6737687587738037, "rewards/margins": 0.6350966691970825, "rewards/rejected": -1.3088653087615967, "step": 525 }, { "epoch": 2.137304391721353, "grad_norm": 97.38697052001953, "learning_rate": 5e-06, "logits/chosen": -1.0380053520202637, "logits/rejected": -0.9718503952026367, "logps/chosen": -132.33323669433594, "logps/rejected": -152.97665405273438, "loss": 0.4573, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.6338863372802734, "rewards/margins": 0.8563046455383301, "rewards/rejected": -1.4901909828186035, "step": 530 }, { "epoch": 2.1574962140333165, "grad_norm": 77.62568664550781, "learning_rate": 5e-06, "logits/chosen": -1.0812318325042725, "logits/rejected": -1.0583606958389282, "logps/chosen": -135.34068298339844, "logps/rejected": -151.8748016357422, "loss": 0.4322, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.6975581049919128, "rewards/margins": 0.8094134330749512, "rewards/rejected": -1.5069715976715088, "step": 535 }, { "epoch": 2.17768803634528, "grad_norm": 67.6436767578125, "learning_rate": 5e-06, "logits/chosen": -1.0246284008026123, "logits/rejected": -1.014534831047058, "logps/chosen": -137.48171997070312, "logps/rejected": -149.2544708251953, "loss": 0.4063, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.7033039331436157, "rewards/margins": 0.945264458656311, "rewards/rejected": -1.6485683917999268, "step": 540 }, { "epoch": 2.197879858657244, "grad_norm": 63.45933532714844, "learning_rate": 5e-06, "logits/chosen": -1.0668962001800537, "logits/rejected": -1.0191361904144287, "logps/chosen": -136.38905334472656, "logps/rejected": -156.759521484375, "loss": 0.4042, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7140289545059204, "rewards/margins": 0.8608022928237915, "rewards/rejected": -1.574831247329712, "step": 545 }, { "epoch": 2.2180716809692074, "grad_norm": 24.154354095458984, "learning_rate": 5e-06, "logits/chosen": -1.034576654434204, "logits/rejected": -1.0220065116882324, "logps/chosen": -126.9083251953125, "logps/rejected": -147.03082275390625, "loss": 0.4812, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6011126041412354, "rewards/margins": 0.7494941353797913, "rewards/rejected": -1.3506066799163818, "step": 550 }, { "epoch": 2.2382635032811713, "grad_norm": 51.32468032836914, "learning_rate": 5e-06, "logits/chosen": -1.0417560338974, "logits/rejected": -0.995162844657898, "logps/chosen": -134.18875122070312, "logps/rejected": -160.58987426757812, "loss": 0.4263, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7680782079696655, "rewards/margins": 0.9993975758552551, "rewards/rejected": -1.7674758434295654, "step": 555 }, { "epoch": 2.2584553255931348, "grad_norm": 31.85598373413086, "learning_rate": 5e-06, "logits/chosen": -1.091623306274414, "logits/rejected": -1.065324068069458, "logps/chosen": -147.77536010742188, "logps/rejected": -161.46456909179688, "loss": 0.4047, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.9492090940475464, "rewards/margins": 0.9160175323486328, "rewards/rejected": -1.8652265071868896, "step": 560 }, { "epoch": 2.2786471479050983, "grad_norm": 40.57142639160156, "learning_rate": 5e-06, "logits/chosen": -1.0318306684494019, "logits/rejected": -1.0202347040176392, "logps/chosen": -133.6524658203125, "logps/rejected": -154.2473602294922, "loss": 0.4398, "rewards/accuracies": 0.875, "rewards/chosen": -0.774714469909668, "rewards/margins": 0.8600656390190125, "rewards/rejected": -1.6347801685333252, "step": 565 }, { "epoch": 2.298838970217062, "grad_norm": 310.98028564453125, "learning_rate": 5e-06, "logits/chosen": -1.0679352283477783, "logits/rejected": -1.0488876104354858, "logps/chosen": -143.3402557373047, "logps/rejected": -157.1344757080078, "loss": 0.4709, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.8914315104484558, "rewards/margins": 0.8738172650337219, "rewards/rejected": -1.7652488946914673, "step": 570 }, { "epoch": 2.3190307925290257, "grad_norm": 32.55216979980469, "learning_rate": 5e-06, "logits/chosen": -1.0195152759552002, "logits/rejected": -1.0584851503372192, "logps/chosen": -140.6759796142578, "logps/rejected": -161.9089813232422, "loss": 0.5658, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.0393126010894775, "rewards/margins": 0.8234487771987915, "rewards/rejected": -1.8627614974975586, "step": 575 }, { "epoch": 2.3392226148409896, "grad_norm": 68.31248474121094, "learning_rate": 5e-06, "logits/chosen": -1.0342488288879395, "logits/rejected": -1.0230834484100342, "logps/chosen": -129.74427795410156, "logps/rejected": -151.37411499023438, "loss": 0.434, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.707876443862915, "rewards/margins": 1.0140485763549805, "rewards/rejected": -1.721925139427185, "step": 580 }, { "epoch": 2.359414437152953, "grad_norm": 89.6629638671875, "learning_rate": 5e-06, "logits/chosen": -1.1206090450286865, "logits/rejected": -1.0528461933135986, "logps/chosen": -128.90150451660156, "logps/rejected": -145.35079956054688, "loss": 0.4469, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.7581853270530701, "rewards/margins": 0.7499567270278931, "rewards/rejected": -1.5081422328948975, "step": 585 }, { "epoch": 2.3796062594649166, "grad_norm": 43.05556869506836, "learning_rate": 5e-06, "logits/chosen": -1.065771460533142, "logits/rejected": -1.0271015167236328, "logps/chosen": -130.0537872314453, "logps/rejected": -149.29409790039062, "loss": 0.4797, "rewards/accuracies": 0.875, "rewards/chosen": -0.8601150512695312, "rewards/margins": 0.7315414547920227, "rewards/rejected": -1.5916566848754883, "step": 590 }, { "epoch": 2.3997980817768805, "grad_norm": 66.487060546875, "learning_rate": 5e-06, "logits/chosen": -1.1071510314941406, "logits/rejected": -1.0782781839370728, "logps/chosen": -138.64755249023438, "logps/rejected": -162.64129638671875, "loss": 0.4263, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.9284160733222961, "rewards/margins": 1.0049647092819214, "rewards/rejected": -1.9333808422088623, "step": 595 }, { "epoch": 2.419989904088844, "grad_norm": 70.14442443847656, "learning_rate": 5e-06, "logits/chosen": -1.0711498260498047, "logits/rejected": -1.0283135175704956, "logps/chosen": -139.4008026123047, "logps/rejected": -173.4601287841797, "loss": 0.4815, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.1457339525222778, "rewards/margins": 0.9765189290046692, "rewards/rejected": -2.122252941131592, "step": 600 }, { "epoch": 2.440181726400808, "grad_norm": 39.94354248046875, "learning_rate": 5e-06, "logits/chosen": -1.0897289514541626, "logits/rejected": -1.0542991161346436, "logps/chosen": -140.98178100585938, "logps/rejected": -154.21890258789062, "loss": 0.3809, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.9919537305831909, "rewards/margins": 1.0165085792541504, "rewards/rejected": -2.008462429046631, "step": 605 }, { "epoch": 2.4603735487127714, "grad_norm": 45.401206970214844, "learning_rate": 5e-06, "logits/chosen": -1.088404893875122, "logits/rejected": -1.0834258794784546, "logps/chosen": -132.83590698242188, "logps/rejected": -148.06394958496094, "loss": 0.456, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.9679088592529297, "rewards/margins": 0.8238266110420227, "rewards/rejected": -1.7917354106903076, "step": 610 }, { "epoch": 2.480565371024735, "grad_norm": 70.00696563720703, "learning_rate": 5e-06, "logits/chosen": -1.0881659984588623, "logits/rejected": -1.0491617918014526, "logps/chosen": -126.14036560058594, "logps/rejected": -139.42196655273438, "loss": 0.4363, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.7842862010002136, "rewards/margins": 0.8065754771232605, "rewards/rejected": -1.5908616781234741, "step": 615 }, { "epoch": 2.5007571933366988, "grad_norm": 93.69404602050781, "learning_rate": 5e-06, "logits/chosen": -1.088850975036621, "logits/rejected": -1.061447262763977, "logps/chosen": -144.85092163085938, "logps/rejected": -187.40280151367188, "loss": 0.334, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.2291361093521118, "rewards/margins": 1.6782220602035522, "rewards/rejected": -2.907358169555664, "step": 620 }, { "epoch": 2.5209490156486623, "grad_norm": 24.65225601196289, "learning_rate": 5e-06, "logits/chosen": -1.1317861080169678, "logits/rejected": -1.0619679689407349, "logps/chosen": -150.44094848632812, "logps/rejected": -178.79635620117188, "loss": 0.4855, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.2844429016113281, "rewards/margins": 1.1455484628677368, "rewards/rejected": -2.4299912452697754, "step": 625 }, { "epoch": 2.5411408379606257, "grad_norm": 81.89047241210938, "learning_rate": 5e-06, "logits/chosen": -1.108019471168518, "logits/rejected": -1.1051324605941772, "logps/chosen": -144.0532989501953, "logps/rejected": -162.6993865966797, "loss": 0.4032, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3340096473693848, "rewards/margins": 1.0980908870697021, "rewards/rejected": -2.432100772857666, "step": 630 }, { "epoch": 2.5613326602725897, "grad_norm": 143.68875122070312, "learning_rate": 5e-06, "logits/chosen": -1.0815242528915405, "logits/rejected": -1.0449540615081787, "logps/chosen": -138.04147338867188, "logps/rejected": -156.7818603515625, "loss": 0.4695, "rewards/accuracies": 0.75, "rewards/chosen": -1.1757011413574219, "rewards/margins": 1.038955807685852, "rewards/rejected": -2.2146568298339844, "step": 635 }, { "epoch": 2.581524482584553, "grad_norm": 118.63373565673828, "learning_rate": 5e-06, "logits/chosen": -1.1306525468826294, "logits/rejected": -1.1150437593460083, "logps/chosen": -152.9892578125, "logps/rejected": -168.2422332763672, "loss": 0.5398, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.6195358037948608, "rewards/margins": 1.2459442615509033, "rewards/rejected": -2.8654799461364746, "step": 640 }, { "epoch": 2.601716304896517, "grad_norm": 85.33989715576172, "learning_rate": 5e-06, "logits/chosen": -1.1470824480056763, "logits/rejected": -1.1052191257476807, "logps/chosen": -144.22109985351562, "logps/rejected": -171.24172973632812, "loss": 0.3907, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.3878965377807617, "rewards/margins": 1.330134630203247, "rewards/rejected": -2.718031167984009, "step": 645 }, { "epoch": 2.6219081272084805, "grad_norm": 28.526046752929688, "learning_rate": 5e-06, "logits/chosen": -1.1499074697494507, "logits/rejected": -1.1181514263153076, "logps/chosen": -141.45933532714844, "logps/rejected": -165.86814880371094, "loss": 0.3265, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -1.2811788320541382, "rewards/margins": 1.2787883281707764, "rewards/rejected": -2.559967279434204, "step": 650 }, { "epoch": 2.6420999495204445, "grad_norm": 50.67562484741211, "learning_rate": 5e-06, "logits/chosen": -1.0879682302474976, "logits/rejected": -1.0264991521835327, "logps/chosen": -140.26417541503906, "logps/rejected": -162.7800750732422, "loss": 0.3963, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.3028833866119385, "rewards/margins": 1.1044894456863403, "rewards/rejected": -2.4073727130889893, "step": 655 }, { "epoch": 2.662291771832408, "grad_norm": 71.36746215820312, "learning_rate": 5e-06, "logits/chosen": -1.12251877784729, "logits/rejected": -1.0848548412322998, "logps/chosen": -137.73040771484375, "logps/rejected": -154.519287109375, "loss": 0.45, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.2700433731079102, "rewards/margins": 0.8259071111679077, "rewards/rejected": -2.095950126647949, "step": 660 }, { "epoch": 2.6824835941443714, "grad_norm": 145.13929748535156, "learning_rate": 5e-06, "logits/chosen": -1.1168526411056519, "logits/rejected": -1.0883421897888184, "logps/chosen": -138.43850708007812, "logps/rejected": -161.28207397460938, "loss": 0.4334, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.4023481607437134, "rewards/margins": 1.0314326286315918, "rewards/rejected": -2.4337806701660156, "step": 665 }, { "epoch": 2.7026754164563354, "grad_norm": 101.96822357177734, "learning_rate": 5e-06, "logits/chosen": -1.1315155029296875, "logits/rejected": -1.1191498041152954, "logps/chosen": -142.5680694580078, "logps/rejected": -147.30868530273438, "loss": 0.4604, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.2466862201690674, "rewards/margins": 0.8339215517044067, "rewards/rejected": -2.0806076526641846, "step": 670 }, { "epoch": 2.722867238768299, "grad_norm": 35.541744232177734, "learning_rate": 5e-06, "logits/chosen": -1.1304982900619507, "logits/rejected": -1.096704363822937, "logps/chosen": -140.98458862304688, "logps/rejected": -153.99526977539062, "loss": 0.3437, "rewards/accuracies": 0.875, "rewards/chosen": -1.1470431089401245, "rewards/margins": 1.193063735961914, "rewards/rejected": -2.340106964111328, "step": 675 }, { "epoch": 2.7430590610802623, "grad_norm": 119.16114807128906, "learning_rate": 5e-06, "logits/chosen": -1.1810123920440674, "logits/rejected": -1.1393203735351562, "logps/chosen": -143.31198120117188, "logps/rejected": -155.2026824951172, "loss": 0.4346, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.3686244487762451, "rewards/margins": 0.9831773638725281, "rewards/rejected": -2.351801633834839, "step": 680 }, { "epoch": 2.7632508833922262, "grad_norm": 112.19788360595703, "learning_rate": 5e-06, "logits/chosen": -1.1026071310043335, "logits/rejected": -1.115681767463684, "logps/chosen": -145.30560302734375, "logps/rejected": -155.7900848388672, "loss": 0.5291, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.4804359674453735, "rewards/margins": 0.9093244671821594, "rewards/rejected": -2.3897604942321777, "step": 685 }, { "epoch": 2.7834427057041897, "grad_norm": 33.10359191894531, "learning_rate": 5e-06, "logits/chosen": -1.1413536071777344, "logits/rejected": -1.1235370635986328, "logps/chosen": -137.90969848632812, "logps/rejected": -170.1824188232422, "loss": 0.3372, "rewards/accuracies": 0.875, "rewards/chosen": -1.2158892154693604, "rewards/margins": 1.584923505783081, "rewards/rejected": -2.8008127212524414, "step": 690 }, { "epoch": 2.803634528016153, "grad_norm": 150.4509735107422, "learning_rate": 5e-06, "logits/chosen": -1.1553657054901123, "logits/rejected": -1.1448795795440674, "logps/chosen": -137.4596405029297, "logps/rejected": -151.9920196533203, "loss": 0.4594, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3479053974151611, "rewards/margins": 0.890475869178772, "rewards/rejected": -2.2383813858032227, "step": 695 }, { "epoch": 2.823826350328117, "grad_norm": 287.857666015625, "learning_rate": 5e-06, "logits/chosen": -1.1810104846954346, "logits/rejected": -1.1226354837417603, "logps/chosen": -143.69639587402344, "logps/rejected": -163.6051483154297, "loss": 0.4457, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.513098955154419, "rewards/margins": 1.0407955646514893, "rewards/rejected": -2.5538947582244873, "step": 700 }, { "epoch": 2.8440181726400806, "grad_norm": 50.71148681640625, "learning_rate": 5e-06, "logits/chosen": -1.1496613025665283, "logits/rejected": -1.12812340259552, "logps/chosen": -140.67123413085938, "logps/rejected": -169.8626708984375, "loss": 0.3212, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -1.3559614419937134, "rewards/margins": 1.4378584623336792, "rewards/rejected": -2.7938199043273926, "step": 705 }, { "epoch": 2.8642099949520445, "grad_norm": 26.175968170166016, "learning_rate": 5e-06, "logits/chosen": -1.2208508253097534, "logits/rejected": -1.1588385105133057, "logps/chosen": -129.68093872070312, "logps/rejected": -161.8412322998047, "loss": 0.3767, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.222773790359497, "rewards/margins": 1.361515998840332, "rewards/rejected": -2.58428955078125, "step": 710 }, { "epoch": 2.884401817264008, "grad_norm": 51.06369400024414, "learning_rate": 5e-06, "logits/chosen": -1.1848516464233398, "logits/rejected": -1.1376698017120361, "logps/chosen": -131.61441040039062, "logps/rejected": -154.9497528076172, "loss": 0.3265, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -1.088381290435791, "rewards/margins": 1.4161834716796875, "rewards/rejected": -2.5045647621154785, "step": 715 }, { "epoch": 2.904593639575972, "grad_norm": 35.54250717163086, "learning_rate": 5e-06, "logits/chosen": -1.1926292181015015, "logits/rejected": -1.157402753829956, "logps/chosen": -136.55596923828125, "logps/rejected": -163.8975372314453, "loss": 0.2961, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -1.1022846698760986, "rewards/margins": 1.461648941040039, "rewards/rejected": -2.563933849334717, "step": 720 }, { "epoch": 2.9247854618879354, "grad_norm": 155.65060424804688, "learning_rate": 5e-06, "logits/chosen": -1.1305577754974365, "logits/rejected": -1.085231065750122, "logps/chosen": -148.7545928955078, "logps/rejected": -176.36215209960938, "loss": 0.426, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -1.75044846534729, "rewards/margins": 1.306915521621704, "rewards/rejected": -3.057363986968994, "step": 725 }, { "epoch": 2.944977284199899, "grad_norm": 83.46701049804688, "learning_rate": 5e-06, "logits/chosen": -1.1913352012634277, "logits/rejected": -1.1349766254425049, "logps/chosen": -132.3568115234375, "logps/rejected": -161.4365234375, "loss": 0.4037, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -1.3909555673599243, "rewards/margins": 1.264525055885315, "rewards/rejected": -2.6554808616638184, "step": 730 }, { "epoch": 2.965169106511863, "grad_norm": 29.703113555908203, "learning_rate": 5e-06, "logits/chosen": -1.1975009441375732, "logits/rejected": -1.1907942295074463, "logps/chosen": -126.33477783203125, "logps/rejected": -158.81204223632812, "loss": 0.4499, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.3245980739593506, "rewards/margins": 1.180716872215271, "rewards/rejected": -2.5053153038024902, "step": 735 }, { "epoch": 2.9853609288238263, "grad_norm": 41.88002014160156, "learning_rate": 5e-06, "logits/chosen": -1.211860179901123, "logits/rejected": -1.1881868839263916, "logps/chosen": -141.8226776123047, "logps/rejected": -175.9602508544922, "loss": 0.3985, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -1.6403604745864868, "rewards/margins": 1.4641687870025635, "rewards/rejected": -3.1045291423797607, "step": 740 }, { "epoch": 2.989399293286219, "step": 741, "total_flos": 0.0, "train_loss": 0.586526518971653, "train_runtime": 26068.2965, "train_samples_per_second": 0.228, "train_steps_per_second": 0.028 } ], "logging_steps": 5, "max_steps": 741, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }