|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.999630314232902, |
|
"eval_steps": 400, |
|
"global_step": 507, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.001971657424522489, |
|
"grad_norm": 4.641391669893979, |
|
"learning_rate": 9.803921568627451e-09, |
|
"logits/chosen": -1.8306132555007935, |
|
"logits/rejected": -1.2712628841400146, |
|
"logps/chosen": -217.9743194580078, |
|
"logps/rejected": -312.2440185546875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.009858287122612447, |
|
"grad_norm": 3.881553151172807, |
|
"learning_rate": 4.901960784313725e-08, |
|
"logits/chosen": -1.3956289291381836, |
|
"logits/rejected": -1.324476718902588, |
|
"logps/chosen": -213.20277404785156, |
|
"logps/rejected": -243.072509765625, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": 9.495137783233076e-05, |
|
"rewards/margins": -0.00030715527827851474, |
|
"rewards/rejected": 0.0004021066124550998, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.019716574245224893, |
|
"grad_norm": 4.055647051577517, |
|
"learning_rate": 9.80392156862745e-08, |
|
"logits/chosen": -1.464820146560669, |
|
"logits/rejected": -1.329075813293457, |
|
"logps/chosen": -216.189697265625, |
|
"logps/rejected": -249.85464477539062, |
|
"loss": 0.6933, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.0014236138667911291, |
|
"rewards/margins": -0.0008448967710137367, |
|
"rewards/rejected": 0.002268511103466153, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.029574861367837338, |
|
"grad_norm": 3.8004259300545313, |
|
"learning_rate": 1.4705882352941175e-07, |
|
"logits/chosen": -1.590954065322876, |
|
"logits/rejected": -1.3920761346817017, |
|
"logps/chosen": -227.84024047851562, |
|
"logps/rejected": -267.3565368652344, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.518750011920929, |
|
"rewards/chosen": 0.0014651073142886162, |
|
"rewards/margins": 0.0005061920965090394, |
|
"rewards/rejected": 0.0009589152177795768, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.039433148490449786, |
|
"grad_norm": 4.362970881343374, |
|
"learning_rate": 1.96078431372549e-07, |
|
"logits/chosen": -1.4077281951904297, |
|
"logits/rejected": -1.438763976097107, |
|
"logps/chosen": -216.7683563232422, |
|
"logps/rejected": -241.71524047851562, |
|
"loss": 0.6921, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -0.0071268146857619286, |
|
"rewards/margins": 0.002329364651814103, |
|
"rewards/rejected": -0.009456178173422813, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04929143561306223, |
|
"grad_norm": 3.68850001761437, |
|
"learning_rate": 2.4509803921568627e-07, |
|
"logits/chosen": -1.368187427520752, |
|
"logits/rejected": -1.3394204378128052, |
|
"logps/chosen": -225.8297119140625, |
|
"logps/rejected": -254.41439819335938, |
|
"loss": 0.6899, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.015500446781516075, |
|
"rewards/margins": 0.005922852084040642, |
|
"rewards/rejected": -0.021423298865556717, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.059149722735674676, |
|
"grad_norm": 4.847654340669893, |
|
"learning_rate": 2.941176470588235e-07, |
|
"logits/chosen": -1.4356650114059448, |
|
"logits/rejected": -1.2754924297332764, |
|
"logps/chosen": -221.5808563232422, |
|
"logps/rejected": -255.44918823242188, |
|
"loss": 0.6856, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.02648136578500271, |
|
"rewards/margins": 0.015200227499008179, |
|
"rewards/rejected": -0.04168159142136574, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06900800985828712, |
|
"grad_norm": 6.653348898638824, |
|
"learning_rate": 3.431372549019608e-07, |
|
"logits/chosen": -1.347893476486206, |
|
"logits/rejected": -1.2126632928848267, |
|
"logps/chosen": -217.4748992919922, |
|
"logps/rejected": -253.11001586914062, |
|
"loss": 0.6716, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.04387308284640312, |
|
"rewards/margins": 0.04525812342762947, |
|
"rewards/rejected": -0.08913120627403259, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.07886629698089957, |
|
"grad_norm": 6.964114197906881, |
|
"learning_rate": 3.92156862745098e-07, |
|
"logits/chosen": -1.4753751754760742, |
|
"logits/rejected": -1.3836042881011963, |
|
"logps/chosen": -233.50979614257812, |
|
"logps/rejected": -270.6595458984375, |
|
"loss": 0.6487, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.09956349432468414, |
|
"rewards/margins": 0.10375545918941498, |
|
"rewards/rejected": -0.2033189833164215, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.08872458410351201, |
|
"grad_norm": 15.546171706823465, |
|
"learning_rate": 4.4117647058823526e-07, |
|
"logits/chosen": -1.4908992052078247, |
|
"logits/rejected": -1.4922513961791992, |
|
"logps/chosen": -260.85107421875, |
|
"logps/rejected": -310.8064270019531, |
|
"loss": 0.6302, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.39361271262168884, |
|
"rewards/margins": 0.24461090564727783, |
|
"rewards/rejected": -0.6382235884666443, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.09858287122612445, |
|
"grad_norm": 7.346421533742723, |
|
"learning_rate": 4.901960784313725e-07, |
|
"logits/chosen": -1.8035519123077393, |
|
"logits/rejected": -1.7488648891448975, |
|
"logps/chosen": -280.26544189453125, |
|
"logps/rejected": -384.37969970703125, |
|
"loss": 0.6188, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": -0.6396178007125854, |
|
"rewards/margins": 0.6693423986434937, |
|
"rewards/rejected": -1.308960199356079, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.10844115834873691, |
|
"grad_norm": 6.928842609814235, |
|
"learning_rate": 4.999050767562379e-07, |
|
"logits/chosen": -1.500614881515503, |
|
"logits/rejected": -1.514692783355713, |
|
"logps/chosen": -259.22607421875, |
|
"logps/rejected": -324.70147705078125, |
|
"loss": 0.5905, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.4987005591392517, |
|
"rewards/margins": 0.39352884888648987, |
|
"rewards/rejected": -0.8922293782234192, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.11829944547134935, |
|
"grad_norm": 5.183419407454259, |
|
"learning_rate": 4.99519574616467e-07, |
|
"logits/chosen": -1.6389617919921875, |
|
"logits/rejected": -1.5824358463287354, |
|
"logps/chosen": -283.13287353515625, |
|
"logps/rejected": -382.1869201660156, |
|
"loss": 0.6371, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7059253454208374, |
|
"rewards/margins": 0.6277474164962769, |
|
"rewards/rejected": -1.3336727619171143, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1281577325939618, |
|
"grad_norm": 9.230406347476531, |
|
"learning_rate": 4.988380179235842e-07, |
|
"logits/chosen": -1.6305882930755615, |
|
"logits/rejected": -1.6462520360946655, |
|
"logps/chosen": -256.4553527832031, |
|
"logps/rejected": -347.4143371582031, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.49059420824050903, |
|
"rewards/margins": 0.5759122371673584, |
|
"rewards/rejected": -1.0665065050125122, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.13801601971657423, |
|
"grad_norm": 9.206165908014777, |
|
"learning_rate": 4.978612153434526e-07, |
|
"logits/chosen": -1.7708934545516968, |
|
"logits/rejected": -1.7579914331436157, |
|
"logps/chosen": -285.9685974121094, |
|
"logps/rejected": -370.2804260253906, |
|
"loss": 0.5983, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7101233601570129, |
|
"rewards/margins": 0.5043641328811646, |
|
"rewards/rejected": -1.2144873142242432, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1478743068391867, |
|
"grad_norm": 7.723809446488398, |
|
"learning_rate": 4.965903258506806e-07, |
|
"logits/chosen": -1.8401196002960205, |
|
"logits/rejected": -1.7219253778457642, |
|
"logps/chosen": -289.95068359375, |
|
"logps/rejected": -389.30889892578125, |
|
"loss": 0.5573, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.7532116174697876, |
|
"rewards/margins": 0.6191812753677368, |
|
"rewards/rejected": -1.372393012046814, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.15773259396179914, |
|
"grad_norm": 10.916878987391435, |
|
"learning_rate": 4.950268573535011e-07, |
|
"logits/chosen": -2.015733480453491, |
|
"logits/rejected": -1.8680551052093506, |
|
"logps/chosen": -325.2226257324219, |
|
"logps/rejected": -428.49066162109375, |
|
"loss": 0.5476, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -1.0503952503204346, |
|
"rewards/margins": 0.6346156597137451, |
|
"rewards/rejected": -1.6850106716156006, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.16759088108441159, |
|
"grad_norm": 13.425184009136764, |
|
"learning_rate": 4.93172664904641e-07, |
|
"logits/chosen": -1.8795242309570312, |
|
"logits/rejected": -1.913556694984436, |
|
"logps/chosen": -317.763916015625, |
|
"logps/rejected": -424.55450439453125, |
|
"loss": 0.5141, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.1219675540924072, |
|
"rewards/margins": 0.7599529027938843, |
|
"rewards/rejected": -1.881920576095581, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.17744916820702403, |
|
"grad_norm": 15.954473082571113, |
|
"learning_rate": 4.910299485003033e-07, |
|
"logits/chosen": -2.1529054641723633, |
|
"logits/rejected": -2.0844523906707764, |
|
"logps/chosen": -424.82891845703125, |
|
"logps/rejected": -543.7278442382812, |
|
"loss": 0.5199, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.0633959770202637, |
|
"rewards/margins": 0.886804461479187, |
|
"rewards/rejected": -2.950200319290161, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.18730745532963647, |
|
"grad_norm": 17.544754679380226, |
|
"learning_rate": 4.886012504698769e-07, |
|
"logits/chosen": -1.882367730140686, |
|
"logits/rejected": -1.9553489685058594, |
|
"logps/chosen": -406.643310546875, |
|
"logps/rejected": -471.86553955078125, |
|
"loss": 0.5386, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.862217664718628, |
|
"rewards/margins": 0.4628971219062805, |
|
"rewards/rejected": -2.3251149654388428, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.1971657424522489, |
|
"grad_norm": 13.476927825101471, |
|
"learning_rate": 4.858894524594652e-07, |
|
"logits/chosen": -2.1455252170562744, |
|
"logits/rejected": -2.0651824474334717, |
|
"logps/chosen": -392.774169921875, |
|
"logps/rejected": -530.4494018554688, |
|
"loss": 0.4917, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8280452489852905, |
|
"rewards/margins": 0.902090847492218, |
|
"rewards/rejected": -2.7301361560821533, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.20702402957486138, |
|
"grad_norm": 23.287769508042025, |
|
"learning_rate": 4.828977720128198e-07, |
|
"logits/chosen": -1.9681150913238525, |
|
"logits/rejected": -1.9559170007705688, |
|
"logps/chosen": -431.6632385253906, |
|
"logps/rejected": -570.6896362304688, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -2.2001523971557617, |
|
"rewards/margins": 1.0563952922821045, |
|
"rewards/rejected": -3.2565484046936035, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.21688231669747382, |
|
"grad_norm": 23.293354005808915, |
|
"learning_rate": 4.796297587537285e-07, |
|
"logits/chosen": -2.096468448638916, |
|
"logits/rejected": -1.9595563411712646, |
|
"logps/chosen": -497.79400634765625, |
|
"logps/rejected": -643.2481689453125, |
|
"loss": 0.483, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -2.711547374725342, |
|
"rewards/margins": 1.0744675397872925, |
|
"rewards/rejected": -3.7860145568847656, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.22674060382008626, |
|
"grad_norm": 16.815054431474035, |
|
"learning_rate": 4.760892901743944e-07, |
|
"logits/chosen": -2.1025643348693848, |
|
"logits/rejected": -2.091360092163086, |
|
"logps/chosen": -450.98028564453125, |
|
"logps/rejected": -575.75439453125, |
|
"loss": 0.4619, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -2.345163106918335, |
|
"rewards/margins": 1.00288987159729, |
|
"rewards/rejected": -3.348052978515625, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2365988909426987, |
|
"grad_norm": 18.736725526597898, |
|
"learning_rate": 4.7228056703479626e-07, |
|
"logits/chosen": -1.9844331741333008, |
|
"logits/rejected": -2.1090264320373535, |
|
"logps/chosen": -505.28509521484375, |
|
"logps/rejected": -649.0353393554688, |
|
"loss": 0.4298, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.8585612773895264, |
|
"rewards/margins": 1.334934949874878, |
|
"rewards/rejected": -4.193496227264404, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.24645717806531114, |
|
"grad_norm": 29.41530429769772, |
|
"learning_rate": 4.6820810837849535e-07, |
|
"logits/chosen": -1.9075158834457397, |
|
"logits/rejected": -1.952182412147522, |
|
"logps/chosen": -443.2312927246094, |
|
"logps/rejected": -584.6851196289062, |
|
"loss": 0.4647, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.3362534046173096, |
|
"rewards/margins": 1.1353000402450562, |
|
"rewards/rejected": -3.471553087234497, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.2563154651879236, |
|
"grad_norm": 28.047847807749136, |
|
"learning_rate": 4.63876746170797e-07, |
|
"logits/chosen": -1.9407484531402588, |
|
"logits/rejected": -1.9303442239761353, |
|
"logps/chosen": -533.4217529296875, |
|
"logps/rejected": -713.83740234375, |
|
"loss": 0.4145, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.180513858795166, |
|
"rewards/margins": 1.4735915660858154, |
|
"rewards/rejected": -4.6541056632995605, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.266173752310536, |
|
"grad_norm": 18.58702447039976, |
|
"learning_rate": 4.592916195656321e-07, |
|
"logits/chosen": -2.0613300800323486, |
|
"logits/rejected": -1.971636414527893, |
|
"logps/chosen": -469.5445251464844, |
|
"logps/rejected": -650.7494506835938, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -2.464566946029663, |
|
"rewards/margins": 1.3873087167739868, |
|
"rewards/rejected": -3.8518757820129395, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.27603203943314847, |
|
"grad_norm": 43.43885248557689, |
|
"learning_rate": 4.544581688079602e-07, |
|
"logits/chosen": -1.8543685674667358, |
|
"logits/rejected": -1.960680365562439, |
|
"logps/chosen": -499.29150390625, |
|
"logps/rejected": -682.0525512695312, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -2.7792274951934814, |
|
"rewards/margins": 1.672224998474121, |
|
"rewards/rejected": -4.45145320892334, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.2858903265557609, |
|
"grad_norm": 25.06136332684734, |
|
"learning_rate": 4.493821287789272e-07, |
|
"logits/chosen": -2.0097248554229736, |
|
"logits/rejected": -2.05975604057312, |
|
"logps/chosen": -622.1812744140625, |
|
"logps/rejected": -857.2575073242188, |
|
"loss": 0.4115, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -4.092832565307617, |
|
"rewards/margins": 1.9847408533096313, |
|
"rewards/rejected": -6.077573299407959, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.2957486136783734, |
|
"grad_norm": 23.62970192824471, |
|
"learning_rate": 4.4406952219143934e-07, |
|
"logits/chosen": -1.9738140106201172, |
|
"logits/rejected": -1.8969192504882812, |
|
"logps/chosen": -505.8863220214844, |
|
"logps/rejected": -674.2682495117188, |
|
"loss": 0.4551, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -2.9365577697753906, |
|
"rewards/margins": 1.3609775304794312, |
|
"rewards/rejected": -4.297535419464111, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.30560690080098585, |
|
"grad_norm": 36.928411112871835, |
|
"learning_rate": 4.38526652444224e-07, |
|
"logits/chosen": -1.9676620960235596, |
|
"logits/rejected": -1.9335002899169922, |
|
"logps/chosen": -526.3443603515625, |
|
"logps/rejected": -675.4140625, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -3.1301074028015137, |
|
"rewards/margins": 1.176997423171997, |
|
"rewards/rejected": -4.30710506439209, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3154651879235983, |
|
"grad_norm": 35.737507476172006, |
|
"learning_rate": 4.3276009614285824e-07, |
|
"logits/chosen": -2.08416748046875, |
|
"logits/rejected": -2.0275375843048096, |
|
"logps/chosen": -547.2161254882812, |
|
"logps/rejected": -734.8326416015625, |
|
"loss": 0.4361, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.296079635620117, |
|
"rewards/margins": 1.489527940750122, |
|
"rewards/rejected": -4.78560733795166, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.32532347504621073, |
|
"grad_norm": 25.388193696092944, |
|
"learning_rate": 4.2677669529663686e-07, |
|
"logits/chosen": -1.8684972524642944, |
|
"logits/rejected": -1.98639714717865, |
|
"logps/chosen": -492.37518310546875, |
|
"logps/rejected": -663.5337524414062, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -2.736380100250244, |
|
"rewards/margins": 1.5142922401428223, |
|
"rewards/rejected": -4.250672340393066, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.33518176216882317, |
|
"grad_norm": 26.163756341836816, |
|
"learning_rate": 4.2058354920054043e-07, |
|
"logits/chosen": -2.0008151531219482, |
|
"logits/rejected": -2.1545004844665527, |
|
"logps/chosen": -558.0103759765625, |
|
"logps/rejected": -783.8531494140625, |
|
"loss": 0.3635, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.2563652992248535, |
|
"rewards/margins": 2.108079433441162, |
|
"rewards/rejected": -5.364445209503174, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3450400492914356, |
|
"grad_norm": 20.146161792615796, |
|
"learning_rate": 4.141880060119336e-07, |
|
"logits/chosen": -2.138545036315918, |
|
"logits/rejected": -2.1449027061462402, |
|
"logps/chosen": -580.8723754882812, |
|
"logps/rejected": -799.7882690429688, |
|
"loss": 0.4178, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.6856274604797363, |
|
"rewards/margins": 1.9810088872909546, |
|
"rewards/rejected": -5.6666364669799805, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.35489833641404805, |
|
"grad_norm": 20.25459576341684, |
|
"learning_rate": 4.0759765403198877e-07, |
|
"logits/chosen": -1.9771722555160522, |
|
"logits/rejected": -1.9267823696136475, |
|
"logps/chosen": -448.6309509277344, |
|
"logps/rejected": -687.6984252929688, |
|
"loss": 0.3941, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.361887216567993, |
|
"rewards/margins": 1.849793791770935, |
|
"rewards/rejected": -4.211681365966797, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3647566235366605, |
|
"grad_norm": 23.732608340062967, |
|
"learning_rate": 4.008203127021797e-07, |
|
"logits/chosen": -2.0232439041137695, |
|
"logits/rejected": -2.0282373428344727, |
|
"logps/chosen": -536.0543212890625, |
|
"logps/rejected": -753.0247802734375, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.216007947921753, |
|
"rewards/margins": 1.8679723739624023, |
|
"rewards/rejected": -5.083980560302734, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.37461491065927294, |
|
"grad_norm": 33.821388543016646, |
|
"learning_rate": 3.9386402332652754e-07, |
|
"logits/chosen": -2.0202414989471436, |
|
"logits/rejected": -1.956538200378418, |
|
"logps/chosen": -628.9379272460938, |
|
"logps/rejected": -831.4833984375, |
|
"loss": 0.4385, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.222132205963135, |
|
"rewards/margins": 1.7108278274536133, |
|
"rewards/rejected": -5.93295955657959, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3844731977818854, |
|
"grad_norm": 17.956228351745885, |
|
"learning_rate": 3.867370395306068e-07, |
|
"logits/chosen": -1.974908471107483, |
|
"logits/rejected": -1.9330415725708008, |
|
"logps/chosen": -509.0133361816406, |
|
"logps/rejected": -720.5633544921875, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.9117178916931152, |
|
"rewards/margins": 1.6813218593597412, |
|
"rewards/rejected": -4.593040466308594, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.3943314849044978, |
|
"grad_norm": 24.48103397679138, |
|
"learning_rate": 3.794478174686328e-07, |
|
"logits/chosen": -1.9475266933441162, |
|
"logits/rejected": -1.9687010049819946, |
|
"logps/chosen": -549.758544921875, |
|
"logps/rejected": -740.8396606445312, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.3982017040252686, |
|
"rewards/margins": 1.5769809484481812, |
|
"rewards/rejected": -4.97518253326416, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.4041897720271103, |
|
"grad_norm": 19.929793517914295, |
|
"learning_rate": 3.720050057902495e-07, |
|
"logits/chosen": -2.11773419380188, |
|
"logits/rejected": -2.0510640144348145, |
|
"logps/chosen": -678.2037353515625, |
|
"logps/rejected": -897.92822265625, |
|
"loss": 0.3989, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -4.697990894317627, |
|
"rewards/margins": 1.6948550939559937, |
|
"rewards/rejected": -6.39284610748291, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.41404805914972276, |
|
"grad_norm": 19.138382009358025, |
|
"learning_rate": 3.644174353789204e-07, |
|
"logits/chosen": -1.96860671043396, |
|
"logits/rejected": -1.9445680379867554, |
|
"logps/chosen": -541.2803955078125, |
|
"logps/rejected": -714.5045776367188, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.2117228507995605, |
|
"rewards/margins": 1.4313344955444336, |
|
"rewards/rejected": -4.643057346343994, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.4239063462723352, |
|
"grad_norm": 22.61062071667254, |
|
"learning_rate": 3.566941088741009e-07, |
|
"logits/chosen": -1.9290311336517334, |
|
"logits/rejected": -1.9250952005386353, |
|
"logps/chosen": -502.6095275878906, |
|
"logps/rejected": -698.4926147460938, |
|
"loss": 0.3967, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.9526402950286865, |
|
"rewards/margins": 1.6423494815826416, |
|
"rewards/rejected": -4.594989776611328, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.43376463339494764, |
|
"grad_norm": 28.506261562704676, |
|
"learning_rate": 3.488441899896217e-07, |
|
"logits/chosen": -2.1637561321258545, |
|
"logits/rejected": -1.9638168811798096, |
|
"logps/chosen": -579.2008056640625, |
|
"logps/rejected": -836.2589111328125, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.5565972328186035, |
|
"rewards/margins": 2.140427589416504, |
|
"rewards/rejected": -5.697024345397949, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.4436229205175601, |
|
"grad_norm": 22.104238159035294, |
|
"learning_rate": 3.408769926409574e-07, |
|
"logits/chosen": -1.9999799728393555, |
|
"logits/rejected": -1.9067310094833374, |
|
"logps/chosen": -533.4635009765625, |
|
"logps/rejected": -767.3900146484375, |
|
"loss": 0.3601, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.1343350410461426, |
|
"rewards/margins": 1.9703528881072998, |
|
"rewards/rejected": -5.104687690734863, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4534812076401725, |
|
"grad_norm": 21.86054071865173, |
|
"learning_rate": 3.3280196989428263e-07, |
|
"logits/chosen": -2.0549824237823486, |
|
"logits/rejected": -2.079737424850464, |
|
"logps/chosen": -571.4501342773438, |
|
"logps/rejected": -805.6971435546875, |
|
"loss": 0.3644, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -3.492208480834961, |
|
"rewards/margins": 2.0622007846832275, |
|
"rewards/rejected": -5.554409027099609, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.46333949476278496, |
|
"grad_norm": 28.670025336805338, |
|
"learning_rate": 3.2462870275042367e-07, |
|
"logits/chosen": -2.086364269256592, |
|
"logits/rejected": -2.082109212875366, |
|
"logps/chosen": -627.2444458007812, |
|
"logps/rejected": -857.6990356445312, |
|
"loss": 0.3692, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.9953174591064453, |
|
"rewards/margins": 2.085484266281128, |
|
"rewards/rejected": -6.080801963806152, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.4731977818853974, |
|
"grad_norm": 23.098928119258375, |
|
"learning_rate": 3.1636688877701806e-07, |
|
"logits/chosen": -1.9278815984725952, |
|
"logits/rejected": -2.008877992630005, |
|
"logps/chosen": -536.9634399414062, |
|
"logps/rejected": -782.7907104492188, |
|
"loss": 0.3307, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.128661632537842, |
|
"rewards/margins": 2.186957836151123, |
|
"rewards/rejected": -5.315619468688965, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.48305606900800985, |
|
"grad_norm": 18.256316767301172, |
|
"learning_rate": 3.080263306023669e-07, |
|
"logits/chosen": -1.9272663593292236, |
|
"logits/rejected": -1.9132862091064453, |
|
"logps/chosen": -510.236328125, |
|
"logps/rejected": -714.0992431640625, |
|
"loss": 0.3866, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.079207420349121, |
|
"rewards/margins": 1.7918453216552734, |
|
"rewards/rejected": -4.8710527420043945, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.4929143561306223, |
|
"grad_norm": 21.751680260746046, |
|
"learning_rate": 2.996169242846328e-07, |
|
"logits/chosen": -1.8919010162353516, |
|
"logits/rejected": -1.9492820501327515, |
|
"logps/chosen": -575.5780029296875, |
|
"logps/rejected": -819.9085693359375, |
|
"loss": 0.3276, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.592189311981201, |
|
"rewards/margins": 2.1720731258392334, |
|
"rewards/rejected": -5.764262676239014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5027726432532348, |
|
"grad_norm": 22.663811321818965, |
|
"learning_rate": 2.911486475701835e-07, |
|
"logits/chosen": -1.8436260223388672, |
|
"logits/rejected": -1.8624000549316406, |
|
"logps/chosen": -532.0939331054688, |
|
"logps/rejected": -772.1865234375, |
|
"loss": 0.3646, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.234412670135498, |
|
"rewards/margins": 2.037332773208618, |
|
"rewards/rejected": -5.271745681762695, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5126309303758472, |
|
"grad_norm": 45.86539600331869, |
|
"learning_rate": 2.826315480550129e-07, |
|
"logits/chosen": -1.8276054859161377, |
|
"logits/rejected": -1.944835901260376, |
|
"logps/chosen": -522.5578002929688, |
|
"logps/rejected": -729.6175537109375, |
|
"loss": 0.3653, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.1025471687316895, |
|
"rewards/margins": 1.926390290260315, |
|
"rewards/rejected": -5.028937339782715, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5224892174984597, |
|
"grad_norm": 33.790931231853406, |
|
"learning_rate": 2.740757312632854e-07, |
|
"logits/chosen": -1.9260978698730469, |
|
"logits/rejected": -1.8717044591903687, |
|
"logps/chosen": -576.6935424804688, |
|
"logps/rejected": -834.5701904296875, |
|
"loss": 0.3316, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.7069427967071533, |
|
"rewards/margins": 2.2525296211242676, |
|
"rewards/rejected": -5.959472179412842, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.532347504621072, |
|
"grad_norm": 32.72135751726444, |
|
"learning_rate": 2.654913486571487e-07, |
|
"logits/chosen": -1.928877830505371, |
|
"logits/rejected": -1.9832346439361572, |
|
"logps/chosen": -580.7061767578125, |
|
"logps/rejected": -821.77734375, |
|
"loss": 0.3773, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.5770630836486816, |
|
"rewards/margins": 2.1589841842651367, |
|
"rewards/rejected": -5.73604679107666, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5422057917436846, |
|
"grad_norm": 18.44880000765859, |
|
"learning_rate": 2.5688858559204053e-07, |
|
"logits/chosen": -1.8500230312347412, |
|
"logits/rejected": -1.8931682109832764, |
|
"logps/chosen": -484.74420166015625, |
|
"logps/rejected": -701.5289916992188, |
|
"loss": 0.3747, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -2.6471669673919678, |
|
"rewards/margins": 1.9642257690429688, |
|
"rewards/rejected": -4.611392974853516, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5520640788662969, |
|
"grad_norm": 23.335141498824942, |
|
"learning_rate": 2.4827764923178246e-07, |
|
"logits/chosen": -1.8331562280654907, |
|
"logits/rejected": -1.9513938426971436, |
|
"logps/chosen": -470.59405517578125, |
|
"logps/rejected": -660.6781005859375, |
|
"loss": 0.3683, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -2.504983901977539, |
|
"rewards/margins": 1.7337911128997803, |
|
"rewards/rejected": -4.23877477645874, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.5619223659889094, |
|
"grad_norm": 26.351304197321983, |
|
"learning_rate": 2.3966875643779667e-07, |
|
"logits/chosen": -2.0291342735290527, |
|
"logits/rejected": -1.9187507629394531, |
|
"logps/chosen": -495.74639892578125, |
|
"logps/rejected": -760.9273681640625, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -2.894580125808716, |
|
"rewards/margins": 2.167811632156372, |
|
"rewards/rejected": -5.062391757965088, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.5717806531115218, |
|
"grad_norm": 21.633547530781627, |
|
"learning_rate": 2.3107212164681774e-07, |
|
"logits/chosen": -1.871260643005371, |
|
"logits/rejected": -1.916135549545288, |
|
"logps/chosen": -529.8262939453125, |
|
"logps/rejected": -772.5482177734375, |
|
"loss": 0.36, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -3.0958144664764404, |
|
"rewards/margins": 2.1699347496032715, |
|
"rewards/rejected": -5.265749454498291, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.5816389402341343, |
|
"grad_norm": 28.65131510288306, |
|
"learning_rate": 2.2249794475148019e-07, |
|
"logits/chosen": -2.063917636871338, |
|
"logits/rejected": -2.049710750579834, |
|
"logps/chosen": -510.1465759277344, |
|
"logps/rejected": -759.2296752929688, |
|
"loss": 0.3827, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -2.9606268405914307, |
|
"rewards/margins": 2.110110282897949, |
|
"rewards/rejected": -5.070736885070801, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.5914972273567468, |
|
"grad_norm": 23.359544656067033, |
|
"learning_rate": 2.1395639899816332e-07, |
|
"logits/chosen": -2.2645859718322754, |
|
"logits/rejected": -1.9906375408172607, |
|
"logps/chosen": -541.4847412109375, |
|
"logps/rejected": -846.5947265625, |
|
"loss": 0.3488, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.3356003761291504, |
|
"rewards/margins": 2.4959442615509033, |
|
"rewards/rejected": -5.831544399261475, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6013555144793592, |
|
"grad_norm": 24.944829150573064, |
|
"learning_rate": 2.0545761891645177e-07, |
|
"logits/chosen": -2.0867130756378174, |
|
"logits/rejected": -2.074833393096924, |
|
"logps/chosen": -642.1096801757812, |
|
"logps/rejected": -906.7780151367188, |
|
"loss": 0.3502, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -4.175184726715088, |
|
"rewards/margins": 2.322237253189087, |
|
"rewards/rejected": -6.497422218322754, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6112138016019717, |
|
"grad_norm": 24.979816541182146, |
|
"learning_rate": 1.9701168829453305e-07, |
|
"logits/chosen": -1.932847023010254, |
|
"logits/rejected": -1.9259026050567627, |
|
"logps/chosen": -570.7978515625, |
|
"logps/rejected": -823.3259887695312, |
|
"loss": 0.3411, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.579385757446289, |
|
"rewards/margins": 2.1752305030822754, |
|
"rewards/rejected": -5.7546162605285645, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6210720887245841, |
|
"grad_norm": 24.585502500513254, |
|
"learning_rate": 1.886286282148002e-07, |
|
"logits/chosen": -2.069624662399292, |
|
"logits/rejected": -1.978257179260254, |
|
"logps/chosen": -547.39794921875, |
|
"logps/rejected": -787.85302734375, |
|
"loss": 0.331, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -3.470412015914917, |
|
"rewards/margins": 2.049595594406128, |
|
"rewards/rejected": -5.520008087158203, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6309303758471966, |
|
"grad_norm": 29.24520617120494, |
|
"learning_rate": 1.8031838516385422e-07, |
|
"logits/chosen": -2.089122772216797, |
|
"logits/rejected": -2.0376973152160645, |
|
"logps/chosen": -622.2824096679688, |
|
"logps/rejected": -920.2009887695312, |
|
"loss": 0.3733, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.0230793952941895, |
|
"rewards/margins": 2.4440813064575195, |
|
"rewards/rejected": -6.467160701751709, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.640788662969809, |
|
"grad_norm": 20.742877534346576, |
|
"learning_rate": 1.7209081923101472e-07, |
|
"logits/chosen": -2.0211918354034424, |
|
"logits/rejected": -2.014601230621338, |
|
"logps/chosen": -589.6067504882812, |
|
"logps/rejected": -773.7950439453125, |
|
"loss": 0.3436, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.677738904953003, |
|
"rewards/margins": 1.7034008502960205, |
|
"rewards/rejected": -5.381140232086182, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.6506469500924215, |
|
"grad_norm": 16.903001935618324, |
|
"learning_rate": 1.639556924093404e-07, |
|
"logits/chosen": -1.8897491693496704, |
|
"logits/rejected": -1.88128662109375, |
|
"logps/chosen": -517.2490844726562, |
|
"logps/rejected": -746.6140747070312, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.064331531524658, |
|
"rewards/margins": 1.9852135181427002, |
|
"rewards/rejected": -5.0495452880859375, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.6605052372150338, |
|
"grad_norm": 19.809662336676986, |
|
"learning_rate": 1.5592265701304114e-07, |
|
"logits/chosen": -2.0255661010742188, |
|
"logits/rejected": -1.944502592086792, |
|
"logps/chosen": -566.5452270507812, |
|
"logps/rejected": -803.533203125, |
|
"loss": 0.3705, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.5436272621154785, |
|
"rewards/margins": 2.0192878246307373, |
|
"rewards/rejected": -5.562914848327637, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.6703635243376463, |
|
"grad_norm": 26.202979422607854, |
|
"learning_rate": 1.4800124422502334e-07, |
|
"logits/chosen": -1.918569803237915, |
|
"logits/rejected": -2.0119967460632324, |
|
"logps/chosen": -601.0817260742188, |
|
"logps/rejected": -845.7435302734375, |
|
"loss": 0.3597, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.8363289833068848, |
|
"rewards/margins": 2.0620241165161133, |
|
"rewards/rejected": -5.89835262298584, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.6802218114602587, |
|
"grad_norm": 26.077309548266044, |
|
"learning_rate": 1.4020085278815743e-07, |
|
"logits/chosen": -2.0037617683410645, |
|
"logits/rejected": -1.8837954998016357, |
|
"logps/chosen": -645.288818359375, |
|
"logps/rejected": -909.7770385742188, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -4.325263500213623, |
|
"rewards/margins": 2.183290719985962, |
|
"rewards/rejected": -6.508553981781006, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.6900800985828712, |
|
"grad_norm": 20.206758195915803, |
|
"learning_rate": 1.3253073785368545e-07, |
|
"logits/chosen": -1.97844660282135, |
|
"logits/rejected": -1.9779163599014282, |
|
"logps/chosen": -656.6150512695312, |
|
"logps/rejected": -917.7893676757812, |
|
"loss": 0.3432, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -4.431666374206543, |
|
"rewards/margins": 2.2607076168060303, |
|
"rewards/rejected": -6.692374229431152, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.6999383857054837, |
|
"grad_norm": 24.24143829005782, |
|
"learning_rate": 1.2500000000000005e-07, |
|
"logits/chosen": -2.066188335418701, |
|
"logits/rejected": -2.054232120513916, |
|
"logps/chosen": -643.3806762695312, |
|
"logps/rejected": -863.3739013671875, |
|
"loss": 0.3583, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -4.284465789794922, |
|
"rewards/margins": 1.9454777240753174, |
|
"rewards/rejected": -6.22994327545166, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7097966728280961, |
|
"grad_norm": 17.76728293699117, |
|
"learning_rate": 1.1761757443482285e-07, |
|
"logits/chosen": -1.8952592611312866, |
|
"logits/rejected": -1.8270065784454346, |
|
"logps/chosen": -567.1143798828125, |
|
"logps/rejected": -791.7889404296875, |
|
"loss": 0.3722, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.542332172393799, |
|
"rewards/margins": 1.8870967626571655, |
|
"rewards/rejected": -5.429428577423096, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7196549599507086, |
|
"grad_norm": 25.546590661527123, |
|
"learning_rate": 1.1039222039359644e-07, |
|
"logits/chosen": -1.9491792917251587, |
|
"logits/rejected": -1.8340580463409424, |
|
"logps/chosen": -522.3615112304688, |
|
"logps/rejected": -782.1358032226562, |
|
"loss": 0.3194, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -3.073195219039917, |
|
"rewards/margins": 2.195949077606201, |
|
"rewards/rejected": -5.269144535064697, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.729513247073321, |
|
"grad_norm": 25.390221264918292, |
|
"learning_rate": 1.0333251074666608e-07, |
|
"logits/chosen": -1.8948665857315063, |
|
"logits/rejected": -1.8821592330932617, |
|
"logps/chosen": -578.3306884765625, |
|
"logps/rejected": -830.9544677734375, |
|
"loss": 0.3285, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.6443309783935547, |
|
"rewards/margins": 2.2878963947296143, |
|
"rewards/rejected": -5.932227611541748, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.7393715341959335, |
|
"grad_norm": 23.32295380693496, |
|
"learning_rate": 9.644682182758304e-08, |
|
"logits/chosen": -1.8538382053375244, |
|
"logits/rejected": -1.8016763925552368, |
|
"logps/chosen": -604.1889038085938, |
|
"logps/rejected": -873.8849487304688, |
|
"loss": 0.3055, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.8995869159698486, |
|
"rewards/margins": 2.3217251300811768, |
|
"rewards/rejected": -6.221312046051025, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.7492298213185459, |
|
"grad_norm": 23.34487045577994, |
|
"learning_rate": 8.974332349459992e-08, |
|
"logits/chosen": -1.913751244544983, |
|
"logits/rejected": -1.8759132623672485, |
|
"logps/chosen": -620.8341064453125, |
|
"logps/rejected": -879.5367431640625, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -3.9944968223571777, |
|
"rewards/margins": 2.277583360671997, |
|
"rewards/rejected": -6.272080421447754, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.7590881084411584, |
|
"grad_norm": 32.80860271044305, |
|
"learning_rate": 8.322996943714672e-08, |
|
"logits/chosen": -1.9127395153045654, |
|
"logits/rejected": -1.716653823852539, |
|
"logps/chosen": -559.0478515625, |
|
"logps/rejected": -869.8646240234375, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.513237714767456, |
|
"rewards/margins": 2.5553982257843018, |
|
"rewards/rejected": -6.068636894226074, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.7689463955637708, |
|
"grad_norm": 22.70407770691601, |
|
"learning_rate": 7.691448773879256e-08, |
|
"logits/chosen": -1.8521419763565063, |
|
"logits/rejected": -1.7435353994369507, |
|
"logps/chosen": -503.28369140625, |
|
"logps/rejected": -769.7913818359375, |
|
"loss": 0.3582, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -2.978849172592163, |
|
"rewards/margins": 2.2009291648864746, |
|
"rewards/rejected": -5.179778575897217, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.7788046826863833, |
|
"grad_norm": 26.31642324943315, |
|
"learning_rate": 7.080437170788722e-08, |
|
"logits/chosen": -1.9601354598999023, |
|
"logits/rejected": -1.8902816772460938, |
|
"logps/chosen": -511.15478515625, |
|
"logps/rejected": -765.9757690429688, |
|
"loss": 0.3126, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.040039539337158, |
|
"rewards/margins": 2.2116754055023193, |
|
"rewards/rejected": -5.251715183258057, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.7886629698089956, |
|
"grad_norm": 73.40724109949657, |
|
"learning_rate": 6.490687098676332e-08, |
|
"logits/chosen": -1.776098608970642, |
|
"logits/rejected": -1.7230415344238281, |
|
"logps/chosen": -568.650634765625, |
|
"logps/rejected": -813.4503173828125, |
|
"loss": 0.347, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -3.520684003829956, |
|
"rewards/margins": 2.105372190475464, |
|
"rewards/rejected": -5.626055717468262, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7886629698089956, |
|
"eval_logits/chosen": -2.5592944622039795, |
|
"eval_logits/rejected": -2.4283623695373535, |
|
"eval_logps/chosen": -432.9762878417969, |
|
"eval_logps/rejected": -481.0541076660156, |
|
"eval_loss": 0.5772423148155212, |
|
"eval_rewards/accuracies": 0.6794354915618896, |
|
"eval_rewards/chosen": -1.7008415460586548, |
|
"eval_rewards/margins": 0.37072598934173584, |
|
"eval_rewards/rejected": -2.0715677738189697, |
|
"eval_runtime": 324.936, |
|
"eval_samples_per_second": 6.081, |
|
"eval_steps_per_second": 0.382, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.7985212569316081, |
|
"grad_norm": 33.809746730746596, |
|
"learning_rate": 5.9228982950048414e-08, |
|
"logits/chosen": -1.7156673669815063, |
|
"logits/rejected": -1.7448875904083252, |
|
"logps/chosen": -582.68603515625, |
|
"logps/rejected": -882.1572265625, |
|
"loss": 0.3578, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -3.6797871589660645, |
|
"rewards/margins": 2.6076254844665527, |
|
"rewards/rejected": -6.287413120269775, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8083795440542206, |
|
"grad_norm": 23.982039805708112, |
|
"learning_rate": 5.3777444402291345e-08, |
|
"logits/chosen": -1.9656894207000732, |
|
"logits/rejected": -1.7757899761199951, |
|
"logps/chosen": -602.1336059570312, |
|
"logps/rejected": -914.3304443359375, |
|
"loss": 0.2749, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.811291456222534, |
|
"rewards/margins": 2.5836830139160156, |
|
"rewards/rejected": -6.394974708557129, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.818237831176833, |
|
"grad_norm": 21.999889032487328, |
|
"learning_rate": 4.855872358475546e-08, |
|
"logits/chosen": -1.883536696434021, |
|
"logits/rejected": -1.8990424871444702, |
|
"logps/chosen": -593.3975219726562, |
|
"logps/rejected": -852.3743896484375, |
|
"loss": 0.3421, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.749640941619873, |
|
"rewards/margins": 2.2763512134552, |
|
"rewards/rejected": -6.025992393493652, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8280961182994455, |
|
"grad_norm": 22.43509931864549, |
|
"learning_rate": 4.357901250086107e-08, |
|
"logits/chosen": -1.9897289276123047, |
|
"logits/rejected": -1.8019778728485107, |
|
"logps/chosen": -604.5925903320312, |
|
"logps/rejected": -907.0695190429688, |
|
"loss": 0.34, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.7366394996643066, |
|
"rewards/margins": 2.591937303543091, |
|
"rewards/rejected": -6.328576564788818, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.8379544054220579, |
|
"grad_norm": 24.272876807226076, |
|
"learning_rate": 3.884421956938377e-08, |
|
"logits/chosen": -1.7035375833511353, |
|
"logits/rejected": -1.8067095279693604, |
|
"logps/chosen": -621.3763427734375, |
|
"logps/rejected": -809.0113525390625, |
|
"loss": 0.3274, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.943162441253662, |
|
"rewards/margins": 1.8734540939331055, |
|
"rewards/rejected": -5.816616535186768, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.8478126925446704, |
|
"grad_norm": 20.673588966056126, |
|
"learning_rate": 3.435996261412591e-08, |
|
"logits/chosen": -1.7106269598007202, |
|
"logits/rejected": -1.7173693180084229, |
|
"logps/chosen": -582.3190307617188, |
|
"logps/rejected": -837.8707275390625, |
|
"loss": 0.3204, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.7369446754455566, |
|
"rewards/margins": 2.223895311355591, |
|
"rewards/rejected": -5.960839748382568, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.8576709796672828, |
|
"grad_norm": 21.70614636700232, |
|
"learning_rate": 3.013156219837776e-08, |
|
"logits/chosen": -2.0358176231384277, |
|
"logits/rejected": -1.7434278726577759, |
|
"logps/chosen": -567.6253662109375, |
|
"logps/rejected": -890.8966674804688, |
|
"loss": 0.3264, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.516098737716675, |
|
"rewards/margins": 2.7037405967712402, |
|
"rewards/rejected": -6.219839096069336, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.8675292667898953, |
|
"grad_norm": 32.0982872650184, |
|
"learning_rate": 2.6164035312078447e-08, |
|
"logits/chosen": -1.87311589717865, |
|
"logits/rejected": -1.8581056594848633, |
|
"logps/chosen": -588.0389404296875, |
|
"logps/rejected": -895.1696166992188, |
|
"loss": 0.3188, |
|
"rewards/accuracies": 0.8812500238418579, |
|
"rewards/chosen": -3.7721753120422363, |
|
"rewards/margins": 2.676305055618286, |
|
"rewards/rejected": -6.448480129241943, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.8773875539125077, |
|
"grad_norm": 21.51066896519883, |
|
"learning_rate": 2.2462089419165776e-08, |
|
"logits/chosen": -1.8648655414581299, |
|
"logits/rejected": -1.7761850357055664, |
|
"logps/chosen": -582.1537475585938, |
|
"logps/rejected": -876.07080078125, |
|
"loss": 0.3584, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.821885347366333, |
|
"rewards/margins": 2.4609155654907227, |
|
"rewards/rejected": -6.282800197601318, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.8872458410351202, |
|
"grad_norm": 23.61458187816769, |
|
"learning_rate": 1.9030116872178314e-08, |
|
"logits/chosen": -1.8204158544540405, |
|
"logits/rejected": -1.798825979232788, |
|
"logps/chosen": -608.7778930664062, |
|
"logps/rejected": -842.8968505859375, |
|
"loss": 0.355, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -3.7794837951660156, |
|
"rewards/margins": 2.1480660438537598, |
|
"rewards/rejected": -5.927549839019775, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.8971041281577325, |
|
"grad_norm": 19.975596086165712, |
|
"learning_rate": 1.5872189700736337e-08, |
|
"logits/chosen": -1.7636759281158447, |
|
"logits/rejected": -1.8992855548858643, |
|
"logps/chosen": -585.3933715820312, |
|
"logps/rejected": -801.01025390625, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -3.633349657058716, |
|
"rewards/margins": 1.9284839630126953, |
|
"rewards/rejected": -5.561833381652832, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.906962415280345, |
|
"grad_norm": 24.116575473235745, |
|
"learning_rate": 1.2992054780085692e-08, |
|
"logits/chosen": -1.6149314641952515, |
|
"logits/rejected": -1.6830947399139404, |
|
"logps/chosen": -552.21728515625, |
|
"logps/rejected": -793.0897216796875, |
|
"loss": 0.3263, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.4062886238098145, |
|
"rewards/margins": 2.0678482055664062, |
|
"rewards/rejected": -5.474137306213379, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9168207024029574, |
|
"grad_norm": 20.57194341940523, |
|
"learning_rate": 1.0393129385436823e-08, |
|
"logits/chosen": -1.9199676513671875, |
|
"logits/rejected": -1.8623239994049072, |
|
"logps/chosen": -570.3748779296875, |
|
"logps/rejected": -817.0477294921875, |
|
"loss": 0.3323, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.562505006790161, |
|
"rewards/margins": 2.1840949058532715, |
|
"rewards/rejected": -5.7465996742248535, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9266789895255699, |
|
"grad_norm": 23.30110640610616, |
|
"learning_rate": 8.078497137373242e-09, |
|
"logits/chosen": -1.7810325622558594, |
|
"logits/rejected": -1.7818634510040283, |
|
"logps/chosen": -555.9640502929688, |
|
"logps/rejected": -835.23876953125, |
|
"loss": 0.3237, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.4883415699005127, |
|
"rewards/margins": 2.179206609725952, |
|
"rewards/rejected": -5.667548179626465, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.9365372766481824, |
|
"grad_norm": 21.422635902068766, |
|
"learning_rate": 6.0509043431410945e-09, |
|
"logits/chosen": -1.7087141275405884, |
|
"logits/rejected": -1.772657036781311, |
|
"logps/chosen": -568.8113403320312, |
|
"logps/rejected": -804.4452514648438, |
|
"loss": 0.3425, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -3.5404930114746094, |
|
"rewards/margins": 2.15417742729187, |
|
"rewards/rejected": -5.694670677185059, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.9463955637707948, |
|
"grad_norm": 22.24576845817703, |
|
"learning_rate": 4.312756738160145e-09, |
|
"logits/chosen": -1.8130733966827393, |
|
"logits/rejected": -1.7939121723175049, |
|
"logps/chosen": -561.7185668945312, |
|
"logps/rejected": -826.4733276367188, |
|
"loss": 0.3187, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.5638492107391357, |
|
"rewards/margins": 2.2962565422058105, |
|
"rewards/rejected": -5.860105991363525, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.9562538508934073, |
|
"grad_norm": 20.818504977861426, |
|
"learning_rate": 2.8661166316229223e-09, |
|
"logits/chosen": -1.7990143299102783, |
|
"logits/rejected": -1.7799808979034424, |
|
"logps/chosen": -545.7501220703125, |
|
"logps/rejected": -777.5648193359375, |
|
"loss": 0.338, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -3.4501044750213623, |
|
"rewards/margins": 1.9422149658203125, |
|
"rewards/rejected": -5.392319202423096, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.9661121380160197, |
|
"grad_norm": 21.367843020001658, |
|
"learning_rate": 1.7127004595681727e-09, |
|
"logits/chosen": -1.8907989263534546, |
|
"logits/rejected": -1.803995132446289, |
|
"logps/chosen": -572.9863891601562, |
|
"logps/rejected": -869.6575317382812, |
|
"loss": 0.3514, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -3.549314498901367, |
|
"rewards/margins": 2.517167568206787, |
|
"rewards/rejected": -6.066482067108154, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.9759704251386322, |
|
"grad_norm": 26.301841729679015, |
|
"learning_rate": 8.538767483325383e-10, |
|
"logits/chosen": -1.6898645162582397, |
|
"logits/rejected": -1.872666597366333, |
|
"logps/chosen": -564.5504760742188, |
|
"logps/rejected": -813.0301513671875, |
|
"loss": 0.3249, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.5329060554504395, |
|
"rewards/margins": 2.330728054046631, |
|
"rewards/rejected": -5.863633632659912, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.9858287122612446, |
|
"grad_norm": 28.33067138539654, |
|
"learning_rate": 2.9066449079634404e-10, |
|
"logits/chosen": -1.81964910030365, |
|
"logits/rejected": -1.7677667140960693, |
|
"logps/chosen": -553.2039184570312, |
|
"logps/rejected": -806.8800048828125, |
|
"loss": 0.3026, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -3.421668291091919, |
|
"rewards/margins": 2.2354369163513184, |
|
"rewards/rejected": -5.657104969024658, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.9956869993838571, |
|
"grad_norm": 23.713797105940532, |
|
"learning_rate": 2.3731937350224273e-11, |
|
"logits/chosen": -1.9265756607055664, |
|
"logits/rejected": -1.8447071313858032, |
|
"logps/chosen": -565.0730590820312, |
|
"logps/rejected": -841.3292236328125, |
|
"loss": 0.3122, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -3.6411995887756348, |
|
"rewards/margins": 2.3857717514038086, |
|
"rewards/rejected": -6.026970863342285, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.999630314232902, |
|
"step": 507, |
|
"total_flos": 0.0, |
|
"train_loss": 0.41502543125867375, |
|
"train_runtime": 18234.8908, |
|
"train_samples_per_second": 3.56, |
|
"train_steps_per_second": 0.028 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 507, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|