{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9982851866508377, "eval_steps": 400, "global_step": 473, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00211053950666139, "grad_norm": 5.643460436957748, "learning_rate": 1.0416666666666666e-08, "logits/chosen": -1.5622574090957642, "logits/rejected": -2.016603946685791, "logps/chosen": -279.929443359375, "logps/rejected": -249.6509552001953, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.010552697533306952, "grad_norm": 4.760670706167096, "learning_rate": 5.208333333333333e-08, "logits/chosen": -1.6410560607910156, "logits/rejected": -1.8854162693023682, "logps/chosen": -306.70123291015625, "logps/rejected": -286.2392883300781, "loss": 0.6934, "rewards/accuracies": 0.3125, "rewards/chosen": -0.0017719048773869872, "rewards/margins": -0.0009851222857832909, "rewards/rejected": -0.0007867825916036963, "step": 5 }, { "epoch": 0.021105395066613904, "grad_norm": 4.237628799563217, "learning_rate": 1.0416666666666667e-07, "logits/chosen": -1.4761555194854736, "logits/rejected": -1.7796385288238525, "logps/chosen": -290.88739013671875, "logps/rejected": -265.3614196777344, "loss": 0.6931, "rewards/accuracies": 0.512499988079071, "rewards/chosen": -0.001178879290819168, "rewards/margins": 0.0011062298435717821, "rewards/rejected": -0.00228510913439095, "step": 10 }, { "epoch": 0.031658092599920855, "grad_norm": 5.575235759782868, "learning_rate": 1.5624999999999999e-07, "logits/chosen": -1.5923292636871338, "logits/rejected": -1.9355911016464233, "logps/chosen": -293.08807373046875, "logps/rejected": -261.4955139160156, "loss": 0.6929, "rewards/accuracies": 0.44999998807907104, "rewards/chosen": -8.010577585082501e-05, "rewards/margins": -0.0003216963086742908, "rewards/rejected": 0.00024159046006388962, "step": 15 }, { "epoch": 0.04221079013322781, "grad_norm": 5.446047742742675, "learning_rate": 2.0833333333333333e-07, "logits/chosen": -1.5667310953140259, "logits/rejected": -2.014115810394287, "logps/chosen": -273.8595275878906, "logps/rejected": -235.01364135742188, "loss": 0.6923, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.0028162619564682245, "rewards/margins": 0.0022979697678238153, "rewards/rejected": 0.0005182920140214264, "step": 20 }, { "epoch": 0.052763487666534756, "grad_norm": 5.373521803666822, "learning_rate": 2.604166666666667e-07, "logits/chosen": -1.6274404525756836, "logits/rejected": -1.875451683998108, "logps/chosen": -279.4980163574219, "logps/rejected": -255.500244140625, "loss": 0.691, "rewards/accuracies": 0.65625, "rewards/chosen": 0.011563817039132118, "rewards/margins": 0.003255133982747793, "rewards/rejected": 0.008308682590723038, "step": 25 }, { "epoch": 0.06331618519984171, "grad_norm": 6.225478720213553, "learning_rate": 3.1249999999999997e-07, "logits/chosen": -1.3949791193008423, "logits/rejected": -1.7053276300430298, "logps/chosen": -295.1358337402344, "logps/rejected": -266.3870849609375, "loss": 0.6882, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.02798023819923401, "rewards/margins": 0.00888301245868206, "rewards/rejected": 0.0190972238779068, "step": 30 }, { "epoch": 0.07386888273314866, "grad_norm": 5.237638647535282, "learning_rate": 3.645833333333333e-07, "logits/chosen": -1.6196448802947998, "logits/rejected": -1.9479618072509766, "logps/chosen": -296.2655029296875, "logps/rejected": -268.84454345703125, "loss": 0.684, "rewards/accuracies": 0.78125, "rewards/chosen": 0.05069383978843689, "rewards/margins": 0.019313272088766098, "rewards/rejected": 0.03138056769967079, "step": 35 }, { "epoch": 0.08442158026645562, "grad_norm": 4.876669460762773, "learning_rate": 4.1666666666666667e-07, "logits/chosen": -1.5888957977294922, "logits/rejected": -1.862489938735962, "logps/chosen": -298.8040466308594, "logps/rejected": -281.5601501464844, "loss": 0.681, "rewards/accuracies": 0.71875, "rewards/chosen": 0.07323700189590454, "rewards/margins": 0.02310621738433838, "rewards/rejected": 0.05013079196214676, "step": 40 }, { "epoch": 0.09497427779976256, "grad_norm": 4.155679871417378, "learning_rate": 4.6874999999999996e-07, "logits/chosen": -1.6918067932128906, "logits/rejected": -2.00124454498291, "logps/chosen": -278.1552734375, "logps/rejected": -257.6329345703125, "loss": 0.6733, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.06014139577746391, "rewards/margins": 0.035790883004665375, "rewards/rejected": 0.024350514635443687, "step": 45 }, { "epoch": 0.10552697533306951, "grad_norm": 4.856245709560566, "learning_rate": 4.999726797933858e-07, "logits/chosen": -1.7646106481552124, "logits/rejected": -1.9858261346817017, "logps/chosen": -278.0591125488281, "logps/rejected": -259.8578186035156, "loss": 0.6681, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.03362422436475754, "rewards/margins": 0.0627993568778038, "rewards/rejected": -0.029175132513046265, "step": 50 }, { "epoch": 0.11607967286637647, "grad_norm": 5.091156338266151, "learning_rate": 4.99665396039775e-07, "logits/chosen": -1.7584812641143799, "logits/rejected": -2.0758414268493652, "logps/chosen": -275.55548095703125, "logps/rejected": -267.1745300292969, "loss": 0.6557, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.016241051256656647, "rewards/margins": 0.09382958710193634, "rewards/rejected": -0.11007064580917358, "step": 55 }, { "epoch": 0.12663237039968342, "grad_norm": 7.261447388148616, "learning_rate": 4.99017099386437e-07, "logits/chosen": -1.8382816314697266, "logits/rejected": -2.1405653953552246, "logps/chosen": -281.54827880859375, "logps/rejected": -263.27294921875, "loss": 0.6545, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.11135590076446533, "rewards/margins": 0.09165789932012558, "rewards/rejected": -0.20301377773284912, "step": 60 }, { "epoch": 0.13718506793299037, "grad_norm": 5.4212641641464705, "learning_rate": 4.980286753286194e-07, "logits/chosen": -1.8312028646469116, "logits/rejected": -2.171030282974243, "logps/chosen": -287.714599609375, "logps/rejected": -269.2692565917969, "loss": 0.6518, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.1507767140865326, "rewards/margins": 0.07915514707565308, "rewards/rejected": -0.22993186116218567, "step": 65 }, { "epoch": 0.14773776546629733, "grad_norm": 5.914411454935479, "learning_rate": 4.967014739346915e-07, "logits/chosen": -1.7997725009918213, "logits/rejected": -2.1724910736083984, "logps/chosen": -314.2709045410156, "logps/rejected": -288.6246337890625, "loss": 0.6402, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.12115994840860367, "rewards/margins": 0.11401550471782684, "rewards/rejected": -0.23517544567584991, "step": 70 }, { "epoch": 0.15829046299960428, "grad_norm": 6.712529318578845, "learning_rate": 4.950373080021136e-07, "logits/chosen": -1.7687238454818726, "logits/rejected": -2.1885459423065186, "logps/chosen": -325.1200256347656, "logps/rejected": -298.8721008300781, "loss": 0.6297, "rewards/accuracies": 0.75, "rewards/chosen": -0.15646745264530182, "rewards/margins": 0.1352117955684662, "rewards/rejected": -0.2916792631149292, "step": 75 }, { "epoch": 0.16884316053291124, "grad_norm": 11.136062496010506, "learning_rate": 4.930384505813737e-07, "logits/chosen": -1.8805389404296875, "logits/rejected": -2.243736982345581, "logps/chosen": -311.088134765625, "logps/rejected": -294.6890563964844, "loss": 0.6359, "rewards/accuracies": 0.71875, "rewards/chosen": -0.28947150707244873, "rewards/margins": 0.14183922111988068, "rewards/rejected": -0.4313107430934906, "step": 80 }, { "epoch": 0.1793958580662182, "grad_norm": 9.114819712911686, "learning_rate": 4.907076318712738e-07, "logits/chosen": -1.823948621749878, "logits/rejected": -2.289140462875366, "logps/chosen": -310.9908752441406, "logps/rejected": -296.40277099609375, "loss": 0.6303, "rewards/accuracies": 0.78125, "rewards/chosen": -0.3205372095108032, "rewards/margins": 0.17761529982089996, "rewards/rejected": -0.49815255403518677, "step": 85 }, { "epoch": 0.18994855559952512, "grad_norm": 6.643890919509507, "learning_rate": 4.88048035489807e-07, "logits/chosen": -1.9663282632827759, "logits/rejected": -2.3473124504089355, "logps/chosen": -305.6136779785156, "logps/rejected": -289.89471435546875, "loss": 0.6202, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.31997618079185486, "rewards/margins": 0.13845226168632507, "rewards/rejected": -0.4584284722805023, "step": 90 }, { "epoch": 0.20050125313283207, "grad_norm": 8.184472249085363, "learning_rate": 4.85063294125718e-07, "logits/chosen": -1.9098714590072632, "logits/rejected": -2.2084691524505615, "logps/chosen": -316.2163391113281, "logps/rejected": -309.3311462402344, "loss": 0.6176, "rewards/accuracies": 0.71875, "rewards/chosen": -0.3992615342140198, "rewards/margins": 0.18614216148853302, "rewards/rejected": -0.5854036211967468, "step": 95 }, { "epoch": 0.21105395066613902, "grad_norm": 8.142734346752789, "learning_rate": 4.817574845766874e-07, "logits/chosen": -2.0933427810668945, "logits/rejected": -2.4379730224609375, "logps/chosen": -331.0671691894531, "logps/rejected": -329.8666076660156, "loss": 0.6142, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5073726773262024, "rewards/margins": 0.2520057260990143, "rewards/rejected": -0.7593784332275391, "step": 100 }, { "epoch": 0.22160664819944598, "grad_norm": 7.833427916953237, "learning_rate": 4.781351221809166e-07, "logits/chosen": -2.1106457710266113, "logits/rejected": -2.3915274143218994, "logps/chosen": -346.46337890625, "logps/rejected": -337.7541198730469, "loss": 0.6124, "rewards/accuracies": 0.668749988079071, "rewards/chosen": -0.5495078563690186, "rewards/margins": 0.17171132564544678, "rewards/rejected": -0.7212191820144653, "step": 105 }, { "epoch": 0.23215934573275293, "grad_norm": 7.733643763374119, "learning_rate": 4.742011546497182e-07, "logits/chosen": -1.9331356287002563, "logits/rejected": -2.2653117179870605, "logps/chosen": -344.3125, "logps/rejected": -331.16632080078125, "loss": 0.6061, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.4401538372039795, "rewards/margins": 0.23562327027320862, "rewards/rejected": -0.6757770776748657, "step": 110 }, { "epoch": 0.24271204326605988, "grad_norm": 9.878124133877995, "learning_rate": 4.6996095530953875e-07, "logits/chosen": -2.1782004833221436, "logits/rejected": -2.4763283729553223, "logps/chosen": -324.13983154296875, "logps/rejected": -319.3317565917969, "loss": 0.5985, "rewards/accuracies": 0.75, "rewards/chosen": -0.5510643720626831, "rewards/margins": 0.20593421161174774, "rewards/rejected": -0.7569986581802368, "step": 115 }, { "epoch": 0.25326474079936684, "grad_norm": 10.19310620269605, "learning_rate": 4.654203157626399e-07, "logits/chosen": -1.9924976825714111, "logits/rejected": -2.3102524280548096, "logps/chosen": -377.19183349609375, "logps/rejected": -375.12823486328125, "loss": 0.5964, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -0.7560365796089172, "rewards/margins": 0.3029988408088684, "rewards/rejected": -1.0590355396270752, "step": 120 }, { "epoch": 0.2638174383326738, "grad_norm": 11.001033426114137, "learning_rate": 4.605854379764673e-07, "logits/chosen": -2.199047565460205, "logits/rejected": -2.5438296794891357, "logps/chosen": -374.6500549316406, "logps/rejected": -363.5611572265625, "loss": 0.5867, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -0.9937089681625366, "rewards/margins": 0.28307586908340454, "rewards/rejected": -1.2767850160598755, "step": 125 }, { "epoch": 0.27437013586598075, "grad_norm": 12.37698546731958, "learning_rate": 4.5546292581250857e-07, "logits/chosen": -2.1308746337890625, "logits/rejected": -2.4864110946655273, "logps/chosen": -400.5669860839844, "logps/rejected": -397.34454345703125, "loss": 0.5933, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -1.2083604335784912, "rewards/margins": 0.26188138127326965, "rewards/rejected": -1.4702417850494385, "step": 130 }, { "epoch": 0.2849228333992877, "grad_norm": 11.055505495866356, "learning_rate": 4.5005977600621275e-07, "logits/chosen": -2.0843119621276855, "logits/rejected": -2.539513111114502, "logps/chosen": -385.6966857910156, "logps/rejected": -379.7543029785156, "loss": 0.5772, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.9928609728813171, "rewards/margins": 0.3268323540687561, "rewards/rejected": -1.3196933269500732, "step": 135 }, { "epoch": 0.29547553093259465, "grad_norm": 15.017423788636298, "learning_rate": 4.443833686102919e-07, "logits/chosen": -2.218951940536499, "logits/rejected": -2.4617791175842285, "logps/chosen": -422.04803466796875, "logps/rejected": -423.21685791015625, "loss": 0.5756, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.2504339218139648, "rewards/margins": 0.3247791528701782, "rewards/rejected": -1.575213074684143, "step": 140 }, { "epoch": 0.3060282284659016, "grad_norm": 13.104101042453381, "learning_rate": 4.384414569144561e-07, "logits/chosen": -2.239192485809326, "logits/rejected": -2.4994874000549316, "logps/chosen": -423.623046875, "logps/rejected": -425.60546875, "loss": 0.5866, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.5029816627502441, "rewards/margins": 0.3298465609550476, "rewards/rejected": -1.832828164100647, "step": 145 }, { "epoch": 0.31658092599920856, "grad_norm": 9.51654888826691, "learning_rate": 4.3224215685535287e-07, "logits/chosen": -2.0407261848449707, "logits/rejected": -2.337188720703125, "logps/chosen": -426.2940979003906, "logps/rejected": -424.5220642089844, "loss": 0.5817, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.3242921829223633, "rewards/margins": 0.31421297788619995, "rewards/rejected": -1.638505220413208, "step": 150 }, { "epoch": 0.3271336235325155, "grad_norm": 11.744189119101899, "learning_rate": 4.2579393593117364e-07, "logits/chosen": -2.0881667137145996, "logits/rejected": -2.4598240852355957, "logps/chosen": -373.4230041503906, "logps/rejected": -372.4430847167969, "loss": 0.5648, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.0188493728637695, "rewards/margins": 0.29292336106300354, "rewards/rejected": -1.3117727041244507, "step": 155 }, { "epoch": 0.33768632106582247, "grad_norm": 11.411495536339306, "learning_rate": 4.191056016360699e-07, "logits/chosen": -2.1164355278015137, "logits/rejected": -2.3749523162841797, "logps/chosen": -452.0877380371094, "logps/rejected": -475.936767578125, "loss": 0.5657, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.5110199451446533, "rewards/margins": 0.45130714774131775, "rewards/rejected": -1.962327241897583, "step": 160 }, { "epoch": 0.3482390185991294, "grad_norm": 13.015188201271227, "learning_rate": 4.121862894301754e-07, "logits/chosen": -2.0862815380096436, "logits/rejected": -2.4722859859466553, "logps/chosen": -415.59368896484375, "logps/rejected": -414.7337951660156, "loss": 0.5574, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -1.2593889236450195, "rewards/margins": 0.3909255266189575, "rewards/rejected": -1.6503145694732666, "step": 165 }, { "epoch": 0.3587917161324364, "grad_norm": 15.106508196254897, "learning_rate": 4.050454502616667e-07, "logits/chosen": -2.120917797088623, "logits/rejected": -2.3543829917907715, "logps/chosen": -464.19622802734375, "logps/rejected": -488.60675048828125, "loss": 0.5484, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -1.7536499500274658, "rewards/margins": 0.46063175797462463, "rewards/rejected": -2.2142815589904785, "step": 170 }, { "epoch": 0.36934441366574333, "grad_norm": 17.660956835556952, "learning_rate": 3.976928376579047e-07, "logits/chosen": -2.117267608642578, "logits/rejected": -2.336695432662964, "logps/chosen": -491.78216552734375, "logps/rejected": -518.7801513671875, "loss": 0.5229, "rewards/accuracies": 0.78125, "rewards/chosen": -1.9079921245574951, "rewards/margins": 0.5516217350959778, "rewards/rejected": -2.459613800048828, "step": 175 }, { "epoch": 0.37989711119905023, "grad_norm": 14.241710823955074, "learning_rate": 3.9013849440328945e-07, "logits/chosen": -2.169321060180664, "logits/rejected": -2.405425786972046, "logps/chosen": -436.4549865722656, "logps/rejected": -458.5728454589844, "loss": 0.5505, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -1.5903228521347046, "rewards/margins": 0.4581179618835449, "rewards/rejected": -2.048440933227539, "step": 180 }, { "epoch": 0.3904498087323572, "grad_norm": 14.232999562557966, "learning_rate": 3.8239273882202473e-07, "logits/chosen": -2.1749088764190674, "logits/rejected": -2.4840614795684814, "logps/chosen": -479.7809143066406, "logps/rejected": -491.2457580566406, "loss": 0.5578, "rewards/accuracies": 0.6812499761581421, "rewards/chosen": -1.8379102945327759, "rewards/margins": 0.42885318398475647, "rewards/rejected": -2.266763210296631, "step": 185 }, { "epoch": 0.40100250626566414, "grad_norm": 16.05324813627352, "learning_rate": 3.7446615068452804e-07, "logits/chosen": -2.2167088985443115, "logits/rejected": -2.5488333702087402, "logps/chosen": -488.9418029785156, "logps/rejected": -518.4141845703125, "loss": 0.5337, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -1.9092267751693726, "rewards/margins": 0.5299785137176514, "rewards/rejected": -2.4392056465148926, "step": 190 }, { "epoch": 0.4115552037989711, "grad_norm": 14.147768615324013, "learning_rate": 3.6636955675673743e-07, "logits/chosen": -2.1767070293426514, "logits/rejected": -2.61075496673584, "logps/chosen": -479.6851501464844, "logps/rejected": -483.32501220703125, "loss": 0.5389, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -1.7908236980438232, "rewards/margins": 0.46493005752563477, "rewards/rejected": -2.255753517150879, "step": 195 }, { "epoch": 0.42210790133227805, "grad_norm": 13.882861374739983, "learning_rate": 3.5811401601205093e-07, "logits/chosen": -2.219057321548462, "logits/rejected": -2.5431442260742188, "logps/chosen": -500.2259826660156, "logps/rejected": -521.2952270507812, "loss": 0.5154, "rewards/accuracies": 0.8125, "rewards/chosen": -1.941454529762268, "rewards/margins": 0.5581260919570923, "rewards/rejected": -2.4995803833007812, "step": 200 }, { "epoch": 0.432660598865585, "grad_norm": 13.10437188597977, "learning_rate": 3.497108045260995e-07, "logits/chosen": -2.3422179222106934, "logits/rejected": -2.6200077533721924, "logps/chosen": -444.88433837890625, "logps/rejected": -463.40582275390625, "loss": 0.5479, "rewards/accuracies": 0.71875, "rewards/chosen": -1.721801996231079, "rewards/margins": 0.46254196763038635, "rewards/rejected": -2.1843440532684326, "step": 205 }, { "epoch": 0.44321329639889195, "grad_norm": 17.090042659489537, "learning_rate": 3.411714000749838e-07, "logits/chosen": -2.2252583503723145, "logits/rejected": -2.598954916000366, "logps/chosen": -467.2124938964844, "logps/rejected": -482.96136474609375, "loss": 0.5295, "rewards/accuracies": 0.75, "rewards/chosen": -1.7041940689086914, "rewards/margins": 0.48477378487586975, "rewards/rejected": -2.1889679431915283, "step": 210 }, { "epoch": 0.4537659939321989, "grad_norm": 27.344254292887783, "learning_rate": 3.3250746645801287e-07, "logits/chosen": -2.346909523010254, "logits/rejected": -2.5004947185516357, "logps/chosen": -492.0323791503906, "logps/rejected": -510.87847900390625, "loss": 0.5491, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -2.0776052474975586, "rewards/margins": 0.3693740963935852, "rewards/rejected": -2.446979284286499, "step": 215 }, { "epoch": 0.46431869146550586, "grad_norm": 19.808222569935815, "learning_rate": 3.237308375663571e-07, "logits/chosen": -2.291229486465454, "logits/rejected": -2.6437947750091553, "logps/chosen": -470.88909912109375, "logps/rejected": -506.59393310546875, "loss": 0.5365, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -1.9697215557098389, "rewards/margins": 0.6469660997390747, "rewards/rejected": -2.616687774658203, "step": 220 }, { "epoch": 0.4748713889988128, "grad_norm": 18.32709921824934, "learning_rate": 3.148535012193767e-07, "logits/chosen": -2.1904757022857666, "logits/rejected": -2.5518805980682373, "logps/chosen": -459.55987548828125, "logps/rejected": -499.54840087890625, "loss": 0.5194, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -1.6650127172470093, "rewards/margins": 0.6420146226882935, "rewards/rejected": -2.3070271015167236, "step": 225 }, { "epoch": 0.48542408653211977, "grad_norm": 14.548363920921867, "learning_rate": 3.0588758279070183e-07, "logits/chosen": -2.1270744800567627, "logits/rejected": -2.476382255554199, "logps/chosen": -442.04400634765625, "logps/rejected": -464.97735595703125, "loss": 0.5326, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.7559592723846436, "rewards/margins": 0.4754490852355957, "rewards/rejected": -2.2314083576202393, "step": 230 }, { "epoch": 0.4959767840654267, "grad_norm": 16.709600354788574, "learning_rate": 2.968453286464312e-07, "logits/chosen": -2.354429244995117, "logits/rejected": -2.5410735607147217, "logps/chosen": -514.9906005859375, "logps/rejected": -569.000732421875, "loss": 0.5457, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.1175270080566406, "rewards/margins": 0.6777707934379578, "rewards/rejected": -2.795297861099243, "step": 235 }, { "epoch": 0.5065294815987337, "grad_norm": 14.595717161808087, "learning_rate": 2.8773908941806877e-07, "logits/chosen": -2.191709280014038, "logits/rejected": -2.4795994758605957, "logps/chosen": -513.7542724609375, "logps/rejected": -539.9058837890625, "loss": 0.5283, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -2.063544273376465, "rewards/margins": 0.5612505674362183, "rewards/rejected": -2.6247947216033936, "step": 240 }, { "epoch": 0.5170821791320406, "grad_norm": 27.782416609672772, "learning_rate": 2.785813031330473e-07, "logits/chosen": -2.316455602645874, "logits/rejected": -2.5953054428100586, "logps/chosen": -480.8763122558594, "logps/rejected": -517.4465942382812, "loss": 0.5183, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -1.9154212474822998, "rewards/margins": 0.5959927439689636, "rewards/rejected": -2.5114142894744873, "step": 245 }, { "epoch": 0.5276348766653476, "grad_norm": 19.504383146510033, "learning_rate": 2.693844782258779e-07, "logits/chosen": -2.288198947906494, "logits/rejected": -2.663243293762207, "logps/chosen": -504.8866271972656, "logps/rejected": -565.0271606445312, "loss": 0.4927, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.188603639602661, "rewards/margins": 0.93329256772995, "rewards/rejected": -3.121896266937256, "step": 250 }, { "epoch": 0.5381875741986545, "grad_norm": 16.174904226348485, "learning_rate": 2.601611764531342e-07, "logits/chosen": -2.3196043968200684, "logits/rejected": -2.615384578704834, "logps/chosen": -520.6978759765625, "logps/rejected": -568.9720458984375, "loss": 0.5158, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -2.302415370941162, "rewards/margins": 0.6759995222091675, "rewards/rejected": -2.978415012359619, "step": 255 }, { "epoch": 0.5487402717319615, "grad_norm": 13.877722437423808, "learning_rate": 2.5092399573560323e-07, "logits/chosen": -2.2904419898986816, "logits/rejected": -2.6239161491394043, "logps/chosen": -469.6702575683594, "logps/rejected": -495.83917236328125, "loss": 0.5271, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -1.8616193532943726, "rewards/margins": 0.5212319493293762, "rewards/rejected": -2.3828511238098145, "step": 260 }, { "epoch": 0.5592929692652684, "grad_norm": 25.387018559215, "learning_rate": 2.4168555295104124e-07, "logits/chosen": -2.3710436820983887, "logits/rejected": -2.6667098999023438, "logps/chosen": -551.4783325195312, "logps/rejected": -584.6627197265625, "loss": 0.5207, "rewards/accuracies": 0.75, "rewards/chosen": -2.6731348037719727, "rewards/margins": 0.6126972436904907, "rewards/rejected": -3.285832166671753, "step": 265 }, { "epoch": 0.5698456667985754, "grad_norm": 21.194954670641433, "learning_rate": 2.3245846670103626e-07, "logits/chosen": -2.339695692062378, "logits/rejected": -2.728651285171509, "logps/chosen": -566.4747314453125, "logps/rejected": -614.0745849609375, "loss": 0.4911, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.8559041023254395, "rewards/margins": 0.8008524179458618, "rewards/rejected": -3.656756639480591, "step": 270 }, { "epoch": 0.5803983643318823, "grad_norm": 17.02245510623681, "learning_rate": 2.232553400755159e-07, "logits/chosen": -2.462646007537842, "logits/rejected": -2.7295315265655518, "logps/chosen": -520.3306884765625, "logps/rejected": -550.9982299804688, "loss": 0.515, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.492687940597534, "rewards/margins": 0.6085286140441895, "rewards/rejected": -3.1012163162231445, "step": 275 }, { "epoch": 0.5909510618651893, "grad_norm": 18.57311563682423, "learning_rate": 2.1408874343844294e-07, "logits/chosen": -2.46991229057312, "logits/rejected": -2.839108943939209, "logps/chosen": -564.0499877929688, "logps/rejected": -614.0209350585938, "loss": 0.5184, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.8387348651885986, "rewards/margins": 0.748325765132904, "rewards/rejected": -3.5870604515075684, "step": 280 }, { "epoch": 0.6015037593984962, "grad_norm": 18.112658192267443, "learning_rate": 2.049711972582101e-07, "logits/chosen": -2.4610495567321777, "logits/rejected": -2.7717814445495605, "logps/chosen": -595.5016479492188, "logps/rejected": -650.4605712890625, "loss": 0.4974, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.076230764389038, "rewards/margins": 0.7762446999549866, "rewards/rejected": -3.852475643157959, "step": 285 }, { "epoch": 0.6120564569318032, "grad_norm": 25.977212495196564, "learning_rate": 1.9591515500618588e-07, "logits/chosen": -2.5490634441375732, "logits/rejected": -2.773324728012085, "logps/chosen": -515.6527099609375, "logps/rejected": -566.9156494140625, "loss": 0.5189, "rewards/accuracies": 0.7437499761581421, "rewards/chosen": -2.3954033851623535, "rewards/margins": 0.6492033004760742, "rewards/rejected": -3.0446066856384277, "step": 290 }, { "epoch": 0.6226091544651101, "grad_norm": 16.798443756074835, "learning_rate": 1.8693298614677112e-07, "logits/chosen": -2.400968074798584, "logits/rejected": -2.6868553161621094, "logps/chosen": -556.1025390625, "logps/rejected": -596.8348388671875, "loss": 0.4854, "rewards/accuracies": 0.75, "rewards/chosen": -2.474238872528076, "rewards/margins": 0.6959460377693176, "rewards/rejected": -3.17018461227417, "step": 295 }, { "epoch": 0.6331618519984171, "grad_norm": 23.81678431486218, "learning_rate": 1.7803695924219814e-07, "logits/chosen": -2.479430675506592, "logits/rejected": -2.7782349586486816, "logps/chosen": -578.2171020507812, "logps/rejected": -651.0504150390625, "loss": 0.4662, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.9857161045074463, "rewards/margins": 0.9989351034164429, "rewards/rejected": -3.9846510887145996, "step": 300 }, { "epoch": 0.643714549531724, "grad_norm": 17.09054147088968, "learning_rate": 1.6923922519515067e-07, "logits/chosen": -2.4572558403015137, "logits/rejected": -2.866284132003784, "logps/chosen": -598.88037109375, "logps/rejected": -641.3062744140625, "loss": 0.5055, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -3.1678709983825684, "rewards/margins": 0.8001095056533813, "rewards/rejected": -3.9679806232452393, "step": 305 }, { "epoch": 0.654267247065031, "grad_norm": 14.492251564068122, "learning_rate": 1.605518006520924e-07, "logits/chosen": -2.3932666778564453, "logits/rejected": -2.6719508171081543, "logps/chosen": -501.3484802246094, "logps/rejected": -544.0150146484375, "loss": 0.5221, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -2.2441365718841553, "rewards/margins": 0.6640299558639526, "rewards/rejected": -2.9081664085388184, "step": 310 }, { "epoch": 0.6648199445983379, "grad_norm": 19.243991902749503, "learning_rate": 1.519865515899731e-07, "logits/chosen": -2.444279432296753, "logits/rejected": -2.6949431896209717, "logps/chosen": -506.01519775390625, "logps/rejected": -542.7237548828125, "loss": 0.5115, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.281829357147217, "rewards/margins": 0.6259506940841675, "rewards/rejected": -2.907780170440674, "step": 315 }, { "epoch": 0.6753726421316449, "grad_norm": 27.99539224141589, "learning_rate": 1.4355517710873182e-07, "logits/chosen": -2.5953707695007324, "logits/rejected": -2.877714157104492, "logps/chosen": -571.291015625, "logps/rejected": -615.3301391601562, "loss": 0.5011, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -2.8195748329162598, "rewards/margins": 0.6808874607086182, "rewards/rejected": -3.500462293624878, "step": 320 }, { "epoch": 0.6859253396649518, "grad_norm": 23.88018530349238, "learning_rate": 1.3526919345173318e-07, "logits/chosen": -2.5718350410461426, "logits/rejected": -2.88576078414917, "logps/chosen": -595.6961669921875, "logps/rejected": -665.6595458984375, "loss": 0.4992, "rewards/accuracies": 0.8187500238418579, "rewards/chosen": -3.05544376373291, "rewards/margins": 0.9698305130004883, "rewards/rejected": -4.025274753570557, "step": 325 }, { "epoch": 0.6964780371982588, "grad_norm": 23.60330153062859, "learning_rate": 1.2713991827596443e-07, "logits/chosen": -2.614315986633301, "logits/rejected": -2.894726276397705, "logps/chosen": -562.1041259765625, "logps/rejected": -629.8328857421875, "loss": 0.4933, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -2.73535418510437, "rewards/margins": 0.9043378829956055, "rewards/rejected": -3.6396923065185547, "step": 330 }, { "epoch": 0.7070307347315657, "grad_norm": 18.43994758901315, "learning_rate": 1.191784551934773e-07, "logits/chosen": -2.494032144546509, "logits/rejected": -2.8370561599731445, "logps/chosen": -512.5650024414062, "logps/rejected": -558.193115234375, "loss": 0.4919, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -2.298001766204834, "rewards/margins": 0.7504197955131531, "rewards/rejected": -3.048421859741211, "step": 335 }, { "epoch": 0.7175834322648728, "grad_norm": 19.769138328586244, "learning_rate": 1.1139567860518953e-07, "logits/chosen": -2.399077892303467, "logits/rejected": -2.8016879558563232, "logps/chosen": -532.8413696289062, "logps/rejected": -595.0988159179688, "loss": 0.4698, "rewards/accuracies": 0.84375, "rewards/chosen": -2.4740264415740967, "rewards/margins": 0.9756819009780884, "rewards/rejected": -3.4497084617614746, "step": 340 }, { "epoch": 0.7281361297981797, "grad_norm": 29.121484778204135, "learning_rate": 1.0380221884776128e-07, "logits/chosen": -2.504153251647949, "logits/rejected": -2.826664447784424, "logps/chosen": -588.2379150390625, "logps/rejected": -649.8221435546875, "loss": 0.4541, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -2.9968106746673584, "rewards/margins": 0.9327837824821472, "rewards/rejected": -3.9295945167541504, "step": 345 }, { "epoch": 0.7386888273314867, "grad_norm": 20.917558525767543, "learning_rate": 9.640844767383405e-08, "logits/chosen": -2.4767587184906006, "logits/rejected": -2.815369129180908, "logps/chosen": -636.3276977539062, "logps/rejected": -681.0283203125, "loss": 0.5234, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.6004860401153564, "rewards/margins": 0.719234824180603, "rewards/rejected": -4.31972074508667, "step": 350 }, { "epoch": 0.7492415248647936, "grad_norm": 20.761748981239933, "learning_rate": 8.922446408546378e-08, "logits/chosen": -2.4393577575683594, "logits/rejected": -2.7462494373321533, "logps/chosen": -593.6701049804688, "logps/rejected": -662.8970947265625, "loss": 0.4559, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -3.010227918624878, "rewards/margins": 0.9989708662033081, "rewards/rejected": -4.0091986656188965, "step": 355 }, { "epoch": 0.7597942223981005, "grad_norm": 18.501565997520643, "learning_rate": 8.22600805400994e-08, "logits/chosen": -2.382094144821167, "logits/rejected": -2.714757204055786, "logps/chosen": -528.34033203125, "logps/rejected": -588.7109985351562, "loss": 0.4723, "rewards/accuracies": 0.8062499761581421, "rewards/chosen": -2.4561707973480225, "rewards/margins": 0.8588768243789673, "rewards/rejected": -3.3150477409362793, "step": 360 }, { "epoch": 0.7703469199314075, "grad_norm": 22.596420829881406, "learning_rate": 7.552480954794558e-08, "logits/chosen": -2.496333599090576, "logits/rejected": -2.8438127040863037, "logps/chosen": -587.7208862304688, "logps/rejected": -652.3656005859375, "loss": 0.4838, "rewards/accuracies": 0.768750011920929, "rewards/chosen": -3.0826942920684814, "rewards/margins": 0.9047689437866211, "rewards/rejected": -3.9874634742736816, "step": 365 }, { "epoch": 0.7808996174647144, "grad_norm": 17.83641444640056, "learning_rate": 6.902785067901854e-08, "logits/chosen": -2.5392613410949707, "logits/rejected": -2.8968329429626465, "logps/chosen": -596.1561889648438, "logps/rejected": -664.4248046875, "loss": 0.4774, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.162071943283081, "rewards/margins": 0.8737271428108215, "rewards/rejected": -4.035799026489258, "step": 370 }, { "epoch": 0.7914523149980214, "grad_norm": 22.105262436574318, "learning_rate": 6.277807799763973e-08, "logits/chosen": -2.464101552963257, "logits/rejected": -2.823216199874878, "logps/chosen": -605.5325317382812, "logps/rejected": -688.7382202148438, "loss": 0.4821, "rewards/accuracies": 0.8125, "rewards/chosen": -3.242568254470825, "rewards/margins": 1.0074737071990967, "rewards/rejected": -4.250041961669922, "step": 375 }, { "epoch": 0.8020050125313283, "grad_norm": 25.73803952489229, "learning_rate": 5.678402794153145e-08, "logits/chosen": -2.5645461082458496, "logits/rejected": -2.8685081005096436, "logps/chosen": -624.9561767578125, "logps/rejected": -682.5247802734375, "loss": 0.4853, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.3231148719787598, "rewards/margins": 0.8197879791259766, "rewards/rejected": -4.142902374267578, "step": 380 }, { "epoch": 0.8125577100646353, "grad_norm": 22.783957458664876, "learning_rate": 5.105388766206969e-08, "logits/chosen": -2.611253261566162, "logits/rejected": -2.8708913326263428, "logps/chosen": -601.1683349609375, "logps/rejected": -657.6758422851562, "loss": 0.4961, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -3.23264741897583, "rewards/margins": 0.7812051773071289, "rewards/rejected": -4.013852119445801, "step": 385 }, { "epoch": 0.8231104075979422, "grad_norm": 17.937740756320142, "learning_rate": 4.5595483841620484e-08, "logits/chosen": -2.585615396499634, "logits/rejected": -2.860517978668213, "logps/chosen": -610.9042358398438, "logps/rejected": -673.6950073242188, "loss": 0.5005, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.3800010681152344, "rewards/margins": 0.7753348350524902, "rewards/rejected": -4.155335426330566, "step": 390 }, { "epoch": 0.8336631051312492, "grad_norm": 25.048225079070804, "learning_rate": 4.0416272003232526e-08, "logits/chosen": -2.5495500564575195, "logits/rejected": -2.783395290374756, "logps/chosen": -589.7579956054688, "logps/rejected": -651.603515625, "loss": 0.4634, "rewards/accuracies": 0.793749988079071, "rewards/chosen": -3.1502811908721924, "rewards/margins": 0.8453443646430969, "rewards/rejected": -3.9956252574920654, "step": 395 }, { "epoch": 0.8442158026645561, "grad_norm": 27.936816284901383, "learning_rate": 3.552332632729041e-08, "logits/chosen": -2.5146939754486084, "logits/rejected": -2.804884195327759, "logps/chosen": -594.3411865234375, "logps/rejected": -653.3115844726562, "loss": 0.4978, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -3.1166281700134277, "rewards/margins": 0.8442124128341675, "rewards/rejected": -3.9608407020568848, "step": 400 }, { "epoch": 0.8442158026645561, "eval_logits/chosen": -3.206465244293213, "eval_logits/rejected": -3.0895018577575684, "eval_logps/chosen": -606.6761474609375, "eval_logps/rejected": -664.0686645507812, "eval_loss": 0.6230235695838928, "eval_rewards/accuracies": 0.6370967626571655, "eval_rewards/chosen": -3.440429449081421, "eval_rewards/margins": 0.46332982182502747, "eval_rewards/rejected": -3.903759717941284, "eval_runtime": 145.9837, "eval_samples_per_second": 13.536, "eval_steps_per_second": 0.849, "step": 400 }, { "epoch": 0.8547685001978631, "grad_norm": 19.235680562568444, "learning_rate": 3.092332998903416e-08, "logits/chosen": -2.4855546951293945, "logits/rejected": -2.8127689361572266, "logps/chosen": -608.1304931640625, "logps/rejected": -668.380859375, "loss": 0.44, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.3122177124023438, "rewards/margins": 0.8978776931762695, "rewards/rejected": -4.210095405578613, "step": 405 }, { "epoch": 0.86532119773117, "grad_norm": 20.802575196574956, "learning_rate": 2.6622566030146455e-08, "logits/chosen": -2.571362018585205, "logits/rejected": -2.8206756114959717, "logps/chosen": -649.1343994140625, "logps/rejected": -712.3201293945312, "loss": 0.4668, "rewards/accuracies": 0.7562500238418579, "rewards/chosen": -3.6075432300567627, "rewards/margins": 0.9334003329277039, "rewards/rejected": -4.5409440994262695, "step": 410 }, { "epoch": 0.875873895264477, "grad_norm": 20.49229366313954, "learning_rate": 2.26269087768734e-08, "logits/chosen": -2.5383522510528564, "logits/rejected": -2.862185478210449, "logps/chosen": -622.9601440429688, "logps/rejected": -697.26904296875, "loss": 0.4639, "rewards/accuracies": 0.78125, "rewards/chosen": -3.579232692718506, "rewards/margins": 0.9947258234024048, "rewards/rejected": -4.573958396911621, "step": 415 }, { "epoch": 0.8864265927977839, "grad_norm": 18.435121377291708, "learning_rate": 1.894181581640106e-08, "logits/chosen": -2.4851062297821045, "logits/rejected": -2.818612813949585, "logps/chosen": -691.8073120117188, "logps/rejected": -776.2567138671875, "loss": 0.428, "rewards/accuracies": 0.8374999761581421, "rewards/chosen": -3.87129545211792, "rewards/margins": 1.1842930316925049, "rewards/rejected": -5.055588722229004, "step": 420 }, { "epoch": 0.8969792903310909, "grad_norm": 29.475224627532654, "learning_rate": 1.5572320542448143e-08, "logits/chosen": -2.510409355163574, "logits/rejected": -2.80336594581604, "logps/chosen": -651.34326171875, "logps/rejected": -712.4142456054688, "loss": 0.494, "rewards/accuracies": 0.75, "rewards/chosen": -3.6103858947753906, "rewards/margins": 0.8933914303779602, "rewards/rejected": -4.503777027130127, "step": 425 }, { "epoch": 0.9075319878643978, "grad_norm": 25.052350185477973, "learning_rate": 1.2523025280255729e-08, "logits/chosen": -2.5651907920837402, "logits/rejected": -2.870457172393799, "logps/chosen": -678.6126708984375, "logps/rejected": -742.5474853515625, "loss": 0.4623, "rewards/accuracies": 0.75, "rewards/chosen": -3.7764217853546143, "rewards/margins": 1.0084255933761597, "rewards/rejected": -4.784847259521484, "step": 430 }, { "epoch": 0.9180846853977048, "grad_norm": 20.51875255327418, "learning_rate": 9.798095000364214e-09, "logits/chosen": -2.5898213386535645, "logits/rejected": -2.949827194213867, "logps/chosen": -640.5285034179688, "logps/rejected": -698.768798828125, "loss": 0.5011, "rewards/accuracies": 0.78125, "rewards/chosen": -3.686187744140625, "rewards/margins": 0.8774013519287109, "rewards/rejected": -4.563588619232178, "step": 435 }, { "epoch": 0.9286373829310117, "grad_norm": 20.399922495391955, "learning_rate": 7.401251629764876e-09, "logits/chosen": -2.594036817550659, "logits/rejected": -2.882014274597168, "logps/chosen": -671.9681396484375, "logps/rejected": -730.8321533203125, "loss": 0.4861, "rewards/accuracies": 0.8125, "rewards/chosen": -3.8397529125213623, "rewards/margins": 0.9077512621879578, "rewards/rejected": -4.747504234313965, "step": 440 }, { "epoch": 0.9391900804643187, "grad_norm": 22.999851244619595, "learning_rate": 5.335768968195098e-09, "logits/chosen": -2.593620538711548, "logits/rejected": -2.895954132080078, "logps/chosen": -661.6905517578125, "logps/rejected": -731.1881713867188, "loss": 0.4489, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -3.763947010040283, "rewards/margins": 0.9344717264175415, "rewards/rejected": -4.698418617248535, "step": 445 }, { "epoch": 0.9497427779976256, "grad_norm": 25.929731545060648, "learning_rate": 3.604468216521883e-09, "logits/chosen": -2.6423192024230957, "logits/rejected": -2.9191346168518066, "logps/chosen": -610.2369995117188, "logps/rejected": -673.9677124023438, "loss": 0.4538, "rewards/accuracies": 0.75, "rewards/chosen": -3.2649269104003906, "rewards/margins": 0.8473002314567566, "rewards/rejected": -4.112226963043213, "step": 450 }, { "epoch": 0.9602954755309326, "grad_norm": 30.30149224906545, "learning_rate": 2.2097141233206884e-09, "logits/chosen": -2.479203462600708, "logits/rejected": -2.7860312461853027, "logps/chosen": -680.89453125, "logps/rejected": -742.8502807617188, "loss": 0.4953, "rewards/accuracies": 0.731249988079071, "rewards/chosen": -3.7775790691375732, "rewards/margins": 0.8955792188644409, "rewards/rejected": -4.673158645629883, "step": 455 }, { "epoch": 0.9708481730642395, "grad_norm": 17.27399919597619, "learning_rate": 1.1534117549133472e-09, "logits/chosen": -2.5051302909851074, "logits/rejected": -2.7565226554870605, "logps/chosen": -642.7161254882812, "logps/rejected": -727.14599609375, "loss": 0.4686, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.4852137565612793, "rewards/margins": 1.0831372737884521, "rewards/rejected": -4.5683512687683105, "step": 460 }, { "epoch": 0.9814008705975465, "grad_norm": 22.26013302983421, "learning_rate": 4.3700389327672173e-10, "logits/chosen": -2.3914403915405273, "logits/rejected": -2.711667537689209, "logps/chosen": -663.2670288085938, "logps/rejected": -728.1150512695312, "loss": 0.4779, "rewards/accuracies": 0.831250011920929, "rewards/chosen": -3.666525363922119, "rewards/margins": 0.9401981234550476, "rewards/rejected": -4.606723308563232, "step": 465 }, { "epoch": 0.9919535681308534, "grad_norm": 39.07814619267544, "learning_rate": 6.146906537587982e-11, "logits/chosen": -2.5620739459991455, "logits/rejected": -2.8582262992858887, "logps/chosen": -628.55322265625, "logps/rejected": -687.5040283203125, "loss": 0.4882, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -3.4171409606933594, "rewards/margins": 0.8897517919540405, "rewards/rejected": -4.3068928718566895, "step": 470 }, { "epoch": 0.9982851866508377, "step": 473, "total_flos": 0.0, "train_loss": 0.545083115015171, "train_runtime": 9073.2474, "train_samples_per_second": 6.684, "train_steps_per_second": 0.052 } ], "logging_steps": 5, "max_steps": 473, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }