|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9978094194961664, |
|
"eval_steps": 50000, |
|
"global_step": 1216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008214676889375685, |
|
"grad_norm": 47.48785366410319, |
|
"learning_rate": 4.0983606557377046e-08, |
|
"logits/chosen": 26.403932571411133, |
|
"logits/rejected": 25.755094528198242, |
|
"logps/chosen": -185.5782928466797, |
|
"logps/rejected": -79.66442108154297, |
|
"loss": 1.7879, |
|
"rewards/accuracies": 0.30666670203208923, |
|
"rewards/chosen": 0.008285612799227238, |
|
"rewards/margins": 0.017053820192813873, |
|
"rewards/rejected": -0.008768204599618912, |
|
"sft_loss": 0.6387583017349243, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01642935377875137, |
|
"grad_norm": 36.134481992571715, |
|
"learning_rate": 8.196721311475409e-08, |
|
"logits/chosen": 25.775484085083008, |
|
"logits/rejected": 25.31159210205078, |
|
"logps/chosen": -152.4672088623047, |
|
"logps/rejected": -72.757080078125, |
|
"loss": 1.6789, |
|
"rewards/accuracies": 0.7333334684371948, |
|
"rewards/chosen": -0.026889141649007797, |
|
"rewards/margins": 0.14848218858242035, |
|
"rewards/rejected": -0.17537136375904083, |
|
"sft_loss": 0.6469724774360657, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024644030668127054, |
|
"grad_norm": 19.978551205164187, |
|
"learning_rate": 1.2295081967213113e-07, |
|
"logits/chosen": 26.670787811279297, |
|
"logits/rejected": 26.257781982421875, |
|
"logps/chosen": -176.73304748535156, |
|
"logps/rejected": -84.2028579711914, |
|
"loss": 1.4459, |
|
"rewards/accuracies": 0.8666666746139526, |
|
"rewards/chosen": -0.1812039315700531, |
|
"rewards/margins": 0.5640282034873962, |
|
"rewards/rejected": -0.7452322244644165, |
|
"sft_loss": 0.6364741921424866, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03285870755750274, |
|
"grad_norm": 20.48799482343835, |
|
"learning_rate": 1.6393442622950818e-07, |
|
"logits/chosen": 26.263166427612305, |
|
"logits/rejected": 26.03022003173828, |
|
"logps/chosen": -214.57823181152344, |
|
"logps/rejected": -111.45527648925781, |
|
"loss": 1.316, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -0.642541766166687, |
|
"rewards/margins": 1.2724699974060059, |
|
"rewards/rejected": -1.9150116443634033, |
|
"sft_loss": 0.7241686582565308, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04107338444687842, |
|
"grad_norm": 24.43893120773317, |
|
"learning_rate": 2.0491803278688524e-07, |
|
"logits/chosen": 25.63840103149414, |
|
"logits/rejected": 25.88968849182129, |
|
"logps/chosen": -180.67430114746094, |
|
"logps/rejected": -108.99486541748047, |
|
"loss": 1.26, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -0.9794896245002747, |
|
"rewards/margins": 1.706125020980835, |
|
"rewards/rejected": -2.6856143474578857, |
|
"sft_loss": 0.7140628695487976, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04928806133625411, |
|
"grad_norm": 15.575922163206743, |
|
"learning_rate": 2.4590163934426226e-07, |
|
"logits/chosen": 25.174482345581055, |
|
"logits/rejected": 25.23969841003418, |
|
"logps/chosen": -213.48123168945312, |
|
"logps/rejected": -114.4116439819336, |
|
"loss": 1.1511, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -0.9717932343482971, |
|
"rewards/margins": 2.3237061500549316, |
|
"rewards/rejected": -3.295499563217163, |
|
"sft_loss": 0.6879211664199829, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05750273822562979, |
|
"grad_norm": 12.317269413176323, |
|
"learning_rate": 2.868852459016393e-07, |
|
"logits/chosen": 24.615764617919922, |
|
"logits/rejected": 24.808069229125977, |
|
"logps/chosen": -202.15489196777344, |
|
"logps/rejected": -124.00420379638672, |
|
"loss": 1.0435, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -1.0643211603164673, |
|
"rewards/margins": 2.588770627975464, |
|
"rewards/rejected": -3.6530916690826416, |
|
"sft_loss": 0.7430208325386047, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06571741511500548, |
|
"grad_norm": 11.98913328054039, |
|
"learning_rate": 3.2786885245901637e-07, |
|
"logits/chosen": 24.245140075683594, |
|
"logits/rejected": 24.268098831176758, |
|
"logps/chosen": -207.348876953125, |
|
"logps/rejected": -116.2168960571289, |
|
"loss": 0.9343, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -1.0519558191299438, |
|
"rewards/margins": 2.5677475929260254, |
|
"rewards/rejected": -3.619703531265259, |
|
"sft_loss": 0.7124413251876831, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07393209200438117, |
|
"grad_norm": 11.849547311712948, |
|
"learning_rate": 3.6885245901639347e-07, |
|
"logits/chosen": 22.61182403564453, |
|
"logits/rejected": 22.616382598876953, |
|
"logps/chosen": -222.93838500976562, |
|
"logps/rejected": -123.43074798583984, |
|
"loss": 0.8683, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -1.442903995513916, |
|
"rewards/margins": 2.7517101764678955, |
|
"rewards/rejected": -4.194613456726074, |
|
"sft_loss": 0.702341616153717, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08214676889375684, |
|
"grad_norm": 11.859772629239353, |
|
"learning_rate": 4.0983606557377047e-07, |
|
"logits/chosen": 20.62839126586914, |
|
"logits/rejected": 20.336801528930664, |
|
"logps/chosen": -241.59852600097656, |
|
"logps/rejected": -132.82681274414062, |
|
"loss": 0.7963, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -2.2198777198791504, |
|
"rewards/margins": 3.0024592876434326, |
|
"rewards/rejected": -5.2223358154296875, |
|
"sft_loss": 0.7061720490455627, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09036144578313253, |
|
"grad_norm": 9.406942779681957, |
|
"learning_rate": 4.508196721311475e-07, |
|
"logits/chosen": 19.715351104736328, |
|
"logits/rejected": 20.35331153869629, |
|
"logps/chosen": -208.7209930419922, |
|
"logps/rejected": -150.72914123535156, |
|
"loss": 0.8148, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -2.6436386108398438, |
|
"rewards/margins": 3.832695722579956, |
|
"rewards/rejected": -6.476334571838379, |
|
"sft_loss": 0.7786983251571655, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09857612267250822, |
|
"grad_norm": 10.934185099116656, |
|
"learning_rate": 4.918032786885245e-07, |
|
"logits/chosen": 20.9300537109375, |
|
"logits/rejected": 21.388505935668945, |
|
"logps/chosen": -192.5828399658203, |
|
"logps/rejected": -125.1326904296875, |
|
"loss": 0.8114, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -2.3978421688079834, |
|
"rewards/margins": 3.1992502212524414, |
|
"rewards/rejected": -5.597092628479004, |
|
"sft_loss": 0.698898434638977, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10679079956188389, |
|
"grad_norm": 12.361759850691927, |
|
"learning_rate": 4.999852034151641e-07, |
|
"logits/chosen": 19.11568832397461, |
|
"logits/rejected": 19.857196807861328, |
|
"logps/chosen": -242.90460205078125, |
|
"logps/rejected": -149.67938232421875, |
|
"loss": 0.7666, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -2.5540611743927, |
|
"rewards/margins": 3.71470308303833, |
|
"rewards/rejected": -6.268764019012451, |
|
"sft_loss": 0.7993389368057251, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11500547645125958, |
|
"grad_norm": 14.492049698010485, |
|
"learning_rate": 4.999250952911133e-07, |
|
"logits/chosen": 20.96298599243164, |
|
"logits/rejected": 20.906280517578125, |
|
"logps/chosen": -236.47763061523438, |
|
"logps/rejected": -142.59445190429688, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -2.4225642681121826, |
|
"rewards/margins": 4.021193981170654, |
|
"rewards/rejected": -6.4437575340271, |
|
"sft_loss": 0.8038942217826843, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12322015334063527, |
|
"grad_norm": 17.551745284718073, |
|
"learning_rate": 4.998187619501184e-07, |
|
"logits/chosen": 20.637529373168945, |
|
"logits/rejected": 21.148029327392578, |
|
"logps/chosen": -266.9391784667969, |
|
"logps/rejected": -173.1654510498047, |
|
"loss": 0.6651, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.129103660583496, |
|
"rewards/margins": 5.091865062713623, |
|
"rewards/rejected": -8.220968246459961, |
|
"sft_loss": 0.8789225816726685, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13143483023001096, |
|
"grad_norm": 21.266109357356587, |
|
"learning_rate": 4.996662230591989e-07, |
|
"logits/chosen": 18.540781021118164, |
|
"logits/rejected": 19.185565948486328, |
|
"logps/chosen": -252.1251983642578, |
|
"logps/rejected": -169.13851928710938, |
|
"loss": 0.706, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -3.4190313816070557, |
|
"rewards/margins": 4.7408881187438965, |
|
"rewards/rejected": -8.159918785095215, |
|
"sft_loss": 0.8200284242630005, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13964950711938665, |
|
"grad_norm": 14.68921798619268, |
|
"learning_rate": 4.994675068313813e-07, |
|
"logits/chosen": 17.844524383544922, |
|
"logits/rejected": 19.307209014892578, |
|
"logps/chosen": -235.93295288085938, |
|
"logps/rejected": -164.65467834472656, |
|
"loss": 0.6425, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -3.202953577041626, |
|
"rewards/margins": 4.453563213348389, |
|
"rewards/rejected": -7.656517028808594, |
|
"sft_loss": 0.8084096908569336, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14786418400876233, |
|
"grad_norm": 9.391954380287526, |
|
"learning_rate": 4.992226500204806e-07, |
|
"logits/chosen": 18.810604095458984, |
|
"logits/rejected": 19.509326934814453, |
|
"logps/chosen": -239.79638671875, |
|
"logps/rejected": -149.21372985839844, |
|
"loss": 0.6741, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -2.8668696880340576, |
|
"rewards/margins": 4.099938869476318, |
|
"rewards/rejected": -6.966808795928955, |
|
"sft_loss": 0.8505186438560486, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.156078860898138, |
|
"grad_norm": 8.292078325061183, |
|
"learning_rate": 4.989316979143029e-07, |
|
"logits/chosen": 19.036439895629883, |
|
"logits/rejected": 18.50504493713379, |
|
"logps/chosen": -243.55430603027344, |
|
"logps/rejected": -141.56640625, |
|
"loss": 0.7786, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -2.8055150508880615, |
|
"rewards/margins": 4.023467540740967, |
|
"rewards/rejected": -6.828982830047607, |
|
"sft_loss": 0.8537193536758423, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16429353778751368, |
|
"grad_norm": 11.759496127210191, |
|
"learning_rate": 4.985947043262686e-07, |
|
"logits/chosen": 18.438268661499023, |
|
"logits/rejected": 18.92384147644043, |
|
"logps/chosen": -256.82135009765625, |
|
"logps/rejected": -162.3760223388672, |
|
"loss": 0.656, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -3.251594066619873, |
|
"rewards/margins": 4.7661452293396, |
|
"rewards/rejected": -8.017740249633789, |
|
"sft_loss": 0.8523219227790833, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17250821467688937, |
|
"grad_norm": 13.225983298656475, |
|
"learning_rate": 4.982117315854593e-07, |
|
"logits/chosen": 19.018491744995117, |
|
"logits/rejected": 19.4432373046875, |
|
"logps/chosen": -242.88742065429688, |
|
"logps/rejected": -160.6437225341797, |
|
"loss": 0.6173, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -3.371706962585449, |
|
"rewards/margins": 4.9150261878967285, |
|
"rewards/rejected": -8.286733627319336, |
|
"sft_loss": 0.8633176684379578, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18072289156626506, |
|
"grad_norm": 33.60275949690272, |
|
"learning_rate": 4.977828505250903e-07, |
|
"logits/chosen": 18.26275062561035, |
|
"logits/rejected": 18.561012268066406, |
|
"logps/chosen": -232.76333618164062, |
|
"logps/rejected": -153.5156707763672, |
|
"loss": 0.6725, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -3.7783102989196777, |
|
"rewards/margins": 4.282144069671631, |
|
"rewards/rejected": -8.060454368591309, |
|
"sft_loss": 0.8514001369476318, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18893756845564075, |
|
"grad_norm": 29.7322754671122, |
|
"learning_rate": 4.973081404694087e-07, |
|
"logits/chosen": 17.40985679626465, |
|
"logits/rejected": 18.532135009765625, |
|
"logps/chosen": -263.5098571777344, |
|
"logps/rejected": -179.07461547851562, |
|
"loss": 0.6416, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -4.028925895690918, |
|
"rewards/margins": 5.305994033813477, |
|
"rewards/rejected": -9.334918975830078, |
|
"sft_loss": 0.9138454794883728, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19715224534501644, |
|
"grad_norm": 11.64226677208211, |
|
"learning_rate": 4.967876892190227e-07, |
|
"logits/chosen": 18.535491943359375, |
|
"logits/rejected": 18.528560638427734, |
|
"logps/chosen": -261.1396484375, |
|
"logps/rejected": -164.66261291503906, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -3.7333872318267822, |
|
"rewards/margins": 4.9736409187316895, |
|
"rewards/rejected": -8.707027435302734, |
|
"sft_loss": 0.8873167634010315, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20536692223439212, |
|
"grad_norm": 10.419489404925384, |
|
"learning_rate": 4.962215930346614e-07, |
|
"logits/chosen": 18.076738357543945, |
|
"logits/rejected": 18.797412872314453, |
|
"logps/chosen": -240.43885803222656, |
|
"logps/rejected": -170.57994079589844, |
|
"loss": 0.6021, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.8788936138153076, |
|
"rewards/margins": 5.037622928619385, |
|
"rewards/rejected": -8.91651725769043, |
|
"sft_loss": 0.8787587285041809, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21358159912376778, |
|
"grad_norm": 13.538147865005195, |
|
"learning_rate": 4.956099566193716e-07, |
|
"logits/chosen": 17.794748306274414, |
|
"logits/rejected": 18.117393493652344, |
|
"logps/chosen": -263.0421447753906, |
|
"logps/rejected": -180.68548583984375, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.157100677490234, |
|
"rewards/margins": 5.220449924468994, |
|
"rewards/rejected": -9.377551078796387, |
|
"sft_loss": 0.8972741961479187, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22179627601314347, |
|
"grad_norm": 14.579358533691158, |
|
"learning_rate": 4.949528930991521e-07, |
|
"logits/chosen": 17.554058074951172, |
|
"logits/rejected": 18.180675506591797, |
|
"logps/chosen": -265.0473327636719, |
|
"logps/rejected": -177.85751342773438, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -3.8493740558624268, |
|
"rewards/margins": 5.235028266906738, |
|
"rewards/rejected": -9.084402084350586, |
|
"sft_loss": 0.8204969167709351, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.23001095290251916, |
|
"grad_norm": 12.455186291161809, |
|
"learning_rate": 4.9425052400203e-07, |
|
"logits/chosen": 17.611921310424805, |
|
"logits/rejected": 17.878339767456055, |
|
"logps/chosen": -265.25787353515625, |
|
"logps/rejected": -185.50375366210938, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.896492958068848, |
|
"rewards/margins": 4.968528747558594, |
|
"rewards/rejected": -9.865021705627441, |
|
"sft_loss": 0.8832098245620728, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23822562979189485, |
|
"grad_norm": 12.455826945227212, |
|
"learning_rate": 4.935029792355834e-07, |
|
"logits/chosen": 17.996692657470703, |
|
"logits/rejected": 18.594377517700195, |
|
"logps/chosen": -286.6059875488281, |
|
"logps/rejected": -200.05149841308594, |
|
"loss": 0.543, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -5.263542175292969, |
|
"rewards/margins": 5.605571269989014, |
|
"rewards/rejected": -10.86911392211914, |
|
"sft_loss": 0.8996745944023132, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.24644030668127054, |
|
"grad_norm": 15.362573644388778, |
|
"learning_rate": 4.927103970629147e-07, |
|
"logits/chosen": 18.072965621948242, |
|
"logits/rejected": 18.25052261352539, |
|
"logps/chosen": -269.8097839355469, |
|
"logps/rejected": -185.32431030273438, |
|
"loss": 0.6219, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -4.9397172927856445, |
|
"rewards/margins": 5.206496715545654, |
|
"rewards/rejected": -10.14621353149414, |
|
"sft_loss": 0.7995728254318237, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2546549835706462, |
|
"grad_norm": 11.01295684823168, |
|
"learning_rate": 4.918729240770775e-07, |
|
"logits/chosen": 17.353046417236328, |
|
"logits/rejected": 18.587129592895508, |
|
"logps/chosen": -240.89488220214844, |
|
"logps/rejected": -173.4665069580078, |
|
"loss": 0.5702, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -4.678676128387451, |
|
"rewards/margins": 5.200152397155762, |
|
"rewards/rejected": -9.878829002380371, |
|
"sft_loss": 0.9399448037147522, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2628696604600219, |
|
"grad_norm": 19.41550454155631, |
|
"learning_rate": 4.909907151739633e-07, |
|
"logits/chosen": 18.130189895629883, |
|
"logits/rejected": 18.379247665405273, |
|
"logps/chosen": -292.39990234375, |
|
"logps/rejected": -188.62220764160156, |
|
"loss": 0.6561, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.744537353515625, |
|
"rewards/margins": 5.8243794441223145, |
|
"rewards/rejected": -10.568917274475098, |
|
"sft_loss": 0.8947219848632812, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2710843373493976, |
|
"grad_norm": 11.498941704903908, |
|
"learning_rate": 4.900639335236526e-07, |
|
"logits/chosen": 18.79334259033203, |
|
"logits/rejected": 19.24587059020996, |
|
"logps/chosen": -271.9427185058594, |
|
"logps/rejected": -179.41293334960938, |
|
"loss": 0.607, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.425381183624268, |
|
"rewards/margins": 5.343040943145752, |
|
"rewards/rejected": -9.76842212677002, |
|
"sft_loss": 0.9064626097679138, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2792990142387733, |
|
"grad_norm": 10.84500529690262, |
|
"learning_rate": 4.890927505402359e-07, |
|
"logits/chosen": 16.892650604248047, |
|
"logits/rejected": 17.597482681274414, |
|
"logps/chosen": -238.55162048339844, |
|
"logps/rejected": -170.00466918945312, |
|
"loss": 0.5889, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.459685802459717, |
|
"rewards/margins": 4.842031002044678, |
|
"rewards/rejected": -9.301715850830078, |
|
"sft_loss": 0.8489271402359009, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28751369112814895, |
|
"grad_norm": 16.18327307978759, |
|
"learning_rate": 4.880773458501089e-07, |
|
"logits/chosen": 19.4614315032959, |
|
"logits/rejected": 19.801300048828125, |
|
"logps/chosen": -232.73573303222656, |
|
"logps/rejected": -165.04103088378906, |
|
"loss": 0.5662, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -4.241008758544922, |
|
"rewards/margins": 4.859863758087158, |
|
"rewards/rejected": -9.100872993469238, |
|
"sft_loss": 0.8601513504981995, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.29572836801752467, |
|
"grad_norm": 10.178119222467004, |
|
"learning_rate": 4.870179072587498e-07, |
|
"logits/chosen": 17.228599548339844, |
|
"logits/rejected": 17.30803871154785, |
|
"logps/chosen": -250.42587280273438, |
|
"logps/rejected": -171.54635620117188, |
|
"loss": 0.6129, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -5.068057537078857, |
|
"rewards/margins": 5.1040239334106445, |
|
"rewards/rejected": -10.172082901000977, |
|
"sft_loss": 0.9672516584396362, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.30394304490690033, |
|
"grad_norm": 8.317024863573055, |
|
"learning_rate": 4.859146307159841e-07, |
|
"logits/chosen": 18.039478302001953, |
|
"logits/rejected": 18.52968406677246, |
|
"logps/chosen": -248.23155212402344, |
|
"logps/rejected": -179.2881317138672, |
|
"loss": 0.5417, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.143929481506348, |
|
"rewards/margins": 5.1267170906066895, |
|
"rewards/rejected": -10.270648002624512, |
|
"sft_loss": 0.8881379961967468, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.312157721796276, |
|
"grad_norm": 11.89542898588366, |
|
"learning_rate": 4.847677202797414e-07, |
|
"logits/chosen": 18.8001708984375, |
|
"logits/rejected": 19.126699447631836, |
|
"logps/chosen": -263.02789306640625, |
|
"logps/rejected": -183.99911499023438, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.0858845710754395, |
|
"rewards/margins": 5.6055006980896, |
|
"rewards/rejected": -10.691385269165039, |
|
"sft_loss": 0.8070122599601746, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3203723986856517, |
|
"grad_norm": 12.1886798786308, |
|
"learning_rate": 4.835773880783144e-07, |
|
"logits/chosen": 16.390464782714844, |
|
"logits/rejected": 17.854284286499023, |
|
"logps/chosen": -269.9723815917969, |
|
"logps/rejected": -200.60789489746094, |
|
"loss": 0.5446, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.6444902420043945, |
|
"rewards/margins": 6.389484882354736, |
|
"rewards/rejected": -12.033974647521973, |
|
"sft_loss": 0.8605390191078186, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.32858707557502737, |
|
"grad_norm": 11.13911341274398, |
|
"learning_rate": 4.823438542711238e-07, |
|
"logits/chosen": 17.828205108642578, |
|
"logits/rejected": 18.60173797607422, |
|
"logps/chosen": -277.97259521484375, |
|
"logps/rejected": -203.9155731201172, |
|
"loss": 0.5444, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.445572853088379, |
|
"rewards/margins": 6.231374740600586, |
|
"rewards/rejected": -11.676946640014648, |
|
"sft_loss": 0.9524543881416321, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3368017524644031, |
|
"grad_norm": 59.69351759377879, |
|
"learning_rate": 4.81067347007999e-07, |
|
"logits/chosen": 18.93602752685547, |
|
"logits/rejected": 19.728424072265625, |
|
"logps/chosen": -247.34567260742188, |
|
"logps/rejected": -173.0783233642578, |
|
"loss": 0.6075, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.630053997039795, |
|
"rewards/margins": 4.911181449890137, |
|
"rewards/rejected": -9.54123592376709, |
|
"sft_loss": 0.9002848863601685, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.34501642935377874, |
|
"grad_norm": 8.657351709118194, |
|
"learning_rate": 4.797481023869801e-07, |
|
"logits/chosen": 18.50823974609375, |
|
"logits/rejected": 18.78363037109375, |
|
"logps/chosen": -245.55979919433594, |
|
"logps/rejected": -182.1737518310547, |
|
"loss": 0.5425, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.3583760261535645, |
|
"rewards/margins": 5.406437397003174, |
|
"rewards/rejected": -10.764813423156738, |
|
"sft_loss": 0.9510916471481323, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35323110624315446, |
|
"grad_norm": 19.28978220217397, |
|
"learning_rate": 4.783863644106502e-07, |
|
"logits/chosen": 17.9003849029541, |
|
"logits/rejected": 19.15799903869629, |
|
"logps/chosen": -240.30958557128906, |
|
"logps/rejected": -187.32284545898438, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.292669296264648, |
|
"rewards/margins": 5.617033004760742, |
|
"rewards/rejected": -10.909701347351074, |
|
"sft_loss": 0.9965067505836487, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 8.529326956491959, |
|
"learning_rate": 4.769823849410053e-07, |
|
"logits/chosen": 15.990920066833496, |
|
"logits/rejected": 17.267040252685547, |
|
"logps/chosen": -283.7446594238281, |
|
"logps/rejected": -209.57525634765625, |
|
"loss": 0.5062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.516228675842285, |
|
"rewards/margins": 6.538068771362305, |
|
"rewards/rejected": -12.054296493530273, |
|
"sft_loss": 0.9376140832901001, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3696604600219058, |
|
"grad_norm": 14.616903394027977, |
|
"learning_rate": 4.7553642365287127e-07, |
|
"logits/chosen": 16.816274642944336, |
|
"logits/rejected": 17.819963455200195, |
|
"logps/chosen": -245.84878540039062, |
|
"logps/rejected": -188.35284423828125, |
|
"loss": 0.5832, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.176213264465332, |
|
"rewards/margins": 5.302474498748779, |
|
"rewards/rejected": -10.478687286376953, |
|
"sft_loss": 1.0134352445602417, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3778751369112815, |
|
"grad_norm": 15.650965079934865, |
|
"learning_rate": 4.7404874798587493e-07, |
|
"logits/chosen": 18.04664421081543, |
|
"logits/rejected": 19.232574462890625, |
|
"logps/chosen": -268.1763610839844, |
|
"logps/rejected": -193.1671600341797, |
|
"loss": 0.5248, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.173998832702637, |
|
"rewards/margins": 5.888847827911377, |
|
"rewards/rejected": -11.062848091125488, |
|
"sft_loss": 0.9188562035560608, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38608981380065716, |
|
"grad_norm": 9.155968536476317, |
|
"learning_rate": 4.7251963309497965e-07, |
|
"logits/chosen": 17.16444206237793, |
|
"logits/rejected": 18.188404083251953, |
|
"logps/chosen": -281.6944580078125, |
|
"logps/rejected": -214.91883850097656, |
|
"loss": 0.5831, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.146281719207764, |
|
"rewards/margins": 6.598486423492432, |
|
"rewards/rejected": -12.744769096374512, |
|
"sft_loss": 1.0649549961090088, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.39430449069003287, |
|
"grad_norm": 13.480064050397614, |
|
"learning_rate": 4.709493617995938e-07, |
|
"logits/chosen": 18.09016227722168, |
|
"logits/rejected": 18.207592010498047, |
|
"logps/chosen": -278.3957214355469, |
|
"logps/rejected": -195.16822814941406, |
|
"loss": 0.4846, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.534964084625244, |
|
"rewards/margins": 6.029869079589844, |
|
"rewards/rejected": -11.564833641052246, |
|
"sft_loss": 0.9166081547737122, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40251916757940853, |
|
"grad_norm": 8.853519364453792, |
|
"learning_rate": 4.6933822453126114e-07, |
|
"logits/chosen": 17.334672927856445, |
|
"logits/rejected": 18.275968551635742, |
|
"logps/chosen": -229.73594665527344, |
|
"logps/rejected": -182.89251708984375, |
|
"loss": 0.5795, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.739324569702148, |
|
"rewards/margins": 5.62018346786499, |
|
"rewards/rejected": -11.35950756072998, |
|
"sft_loss": 1.0507081747055054, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.41073384446878425, |
|
"grad_norm": 23.105340732527253, |
|
"learning_rate": 4.676865192799443e-07, |
|
"logits/chosen": 18.659299850463867, |
|
"logits/rejected": 19.426942825317383, |
|
"logps/chosen": -310.3028869628906, |
|
"logps/rejected": -233.80967712402344, |
|
"loss": 0.5041, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.381309509277344, |
|
"rewards/margins": 6.830047607421875, |
|
"rewards/rejected": -14.211358070373535, |
|
"sft_loss": 0.9847605228424072, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4189485213581599, |
|
"grad_norm": 12.714869052495171, |
|
"learning_rate": 4.65994551538909e-07, |
|
"logits/chosen": 17.69913101196289, |
|
"logits/rejected": 17.626365661621094, |
|
"logps/chosen": -286.1001892089844, |
|
"logps/rejected": -213.40573120117188, |
|
"loss": 0.5671, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -6.464048385620117, |
|
"rewards/margins": 6.595151424407959, |
|
"rewards/rejected": -13.059199333190918, |
|
"sft_loss": 1.0706934928894043, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.42716319824753557, |
|
"grad_norm": 18.758504329716533, |
|
"learning_rate": 4.642626342482215e-07, |
|
"logits/chosen": 17.131309509277344, |
|
"logits/rejected": 17.48920440673828, |
|
"logps/chosen": -231.87130737304688, |
|
"logps/rejected": -174.91970825195312, |
|
"loss": 0.5728, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.785408973693848, |
|
"rewards/margins": 5.311648368835449, |
|
"rewards/rejected": -10.097058296203613, |
|
"sft_loss": 0.9056914448738098, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4353778751369113, |
|
"grad_norm": 16.34948060980786, |
|
"learning_rate": 4.624910877368684e-07, |
|
"logits/chosen": 17.2136287689209, |
|
"logits/rejected": 18.958431243896484, |
|
"logps/chosen": -265.6873474121094, |
|
"logps/rejected": -200.37913513183594, |
|
"loss": 0.5359, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -5.0484795570373535, |
|
"rewards/margins": 6.235151767730713, |
|
"rewards/rejected": -11.283629417419434, |
|
"sft_loss": 0.897827684879303, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.44359255202628695, |
|
"grad_norm": 8.468377967197654, |
|
"learning_rate": 4.606802396635098e-07, |
|
"logits/chosen": 18.035551071166992, |
|
"logits/rejected": 19.360517501831055, |
|
"logps/chosen": -279.75555419921875, |
|
"logps/rejected": -217.06832885742188, |
|
"loss": 0.4866, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.324933052062988, |
|
"rewards/margins": 6.823997974395752, |
|
"rewards/rejected": -13.148929595947266, |
|
"sft_loss": 0.9062218070030212, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45180722891566266, |
|
"grad_norm": 8.67268227774844, |
|
"learning_rate": 4.588304249558763e-07, |
|
"logits/chosen": 17.523601531982422, |
|
"logits/rejected": 17.99420166015625, |
|
"logps/chosen": -290.8741760253906, |
|
"logps/rejected": -215.23924255371094, |
|
"loss": 0.5245, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.641848087310791, |
|
"rewards/margins": 6.469297409057617, |
|
"rewards/rejected": -13.11114501953125, |
|
"sft_loss": 0.9921270608901978, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4600219058050383, |
|
"grad_norm": 12.040362030861928, |
|
"learning_rate": 4.569419857488228e-07, |
|
"logits/chosen": 17.7161808013916, |
|
"logits/rejected": 17.987571716308594, |
|
"logps/chosen": -297.76318359375, |
|
"logps/rejected": -205.46383666992188, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.587214469909668, |
|
"rewards/margins": 6.473574638366699, |
|
"rewards/rejected": -12.060790061950684, |
|
"sft_loss": 0.9882974028587341, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46823658269441404, |
|
"grad_norm": 18.26018008195662, |
|
"learning_rate": 4.550152713210478e-07, |
|
"logits/chosen": 17.55337905883789, |
|
"logits/rejected": 18.636327743530273, |
|
"logps/chosen": -247.40650939941406, |
|
"logps/rejected": -190.1718292236328, |
|
"loss": 0.5136, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.608924865722656, |
|
"rewards/margins": 5.688971996307373, |
|
"rewards/rejected": -11.297897338867188, |
|
"sft_loss": 0.9460915327072144, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4764512595837897, |
|
"grad_norm": 12.950141797441761, |
|
"learning_rate": 4.530506380304925e-07, |
|
"logits/chosen": 16.12598419189453, |
|
"logits/rejected": 16.963117599487305, |
|
"logps/chosen": -315.90838623046875, |
|
"logps/rejected": -234.9226837158203, |
|
"loss": 0.5254, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.075807094573975, |
|
"rewards/margins": 7.303346157073975, |
|
"rewards/rejected": -14.37915325164795, |
|
"sft_loss": 1.0791391134262085, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4846659364731654, |
|
"grad_norm": 8.510727267783134, |
|
"learning_rate": 4.510484492484301e-07, |
|
"logits/chosen": 16.052139282226562, |
|
"logits/rejected": 18.621992111206055, |
|
"logps/chosen": -293.8525695800781, |
|
"logps/rejected": -249.84762573242188, |
|
"loss": 0.502, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.701572895050049, |
|
"rewards/margins": 7.963890552520752, |
|
"rewards/rejected": -15.665464401245117, |
|
"sft_loss": 1.0291332006454468, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4928806133625411, |
|
"grad_norm": 11.486611534499634, |
|
"learning_rate": 4.4900907529225797e-07, |
|
"logits/chosen": 15.679919242858887, |
|
"logits/rejected": 16.096633911132812, |
|
"logps/chosen": -295.52557373046875, |
|
"logps/rejected": -208.35264587402344, |
|
"loss": 0.5684, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.8986592292785645, |
|
"rewards/margins": 6.808130264282227, |
|
"rewards/rejected": -12.70678997039795, |
|
"sft_loss": 0.9438207149505615, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5010952902519168, |
|
"grad_norm": 11.462228668252441, |
|
"learning_rate": 4.46932893357005e-07, |
|
"logits/chosen": 17.438947677612305, |
|
"logits/rejected": 18.582027435302734, |
|
"logps/chosen": -282.1226501464844, |
|
"logps/rejected": -213.70437622070312, |
|
"loss": 0.4316, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.921773433685303, |
|
"rewards/margins": 6.760589599609375, |
|
"rewards/rejected": -12.68236255645752, |
|
"sft_loss": 0.9546439051628113, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5093099671412924, |
|
"grad_norm": 25.401018773660823, |
|
"learning_rate": 4.448202874455672e-07, |
|
"logits/chosen": 16.973630905151367, |
|
"logits/rejected": 17.916053771972656, |
|
"logps/chosen": -303.2902526855469, |
|
"logps/rejected": -214.27862548828125, |
|
"loss": 0.5904, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -6.163903713226318, |
|
"rewards/margins": 6.488450050354004, |
|
"rewards/rejected": -12.65235424041748, |
|
"sft_loss": 1.065365195274353, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5175246440306681, |
|
"grad_norm": 9.834689967221987, |
|
"learning_rate": 4.426716482976838e-07, |
|
"logits/chosen": 18.023340225219727, |
|
"logits/rejected": 19.0910587310791, |
|
"logps/chosen": -296.31610107421875, |
|
"logps/rejected": -209.1268768310547, |
|
"loss": 0.5109, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.737242221832275, |
|
"rewards/margins": 6.57784366607666, |
|
"rewards/rejected": -12.315085411071777, |
|
"sft_loss": 0.966189444065094, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5257393209200438, |
|
"grad_norm": 12.349530912080413, |
|
"learning_rate": 4.4048737331766774e-07, |
|
"logits/chosen": 19.084957122802734, |
|
"logits/rejected": 19.039499282836914, |
|
"logps/chosen": -273.5611877441406, |
|
"logps/rejected": -193.39707946777344, |
|
"loss": 0.5342, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.792872428894043, |
|
"rewards/margins": 5.747686386108398, |
|
"rewards/rejected": -11.540557861328125, |
|
"sft_loss": 0.8884872198104858, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5339539978094195, |
|
"grad_norm": 13.644842027024733, |
|
"learning_rate": 4.3826786650090273e-07, |
|
"logits/chosen": 15.30917739868164, |
|
"logits/rejected": 16.686445236206055, |
|
"logps/chosen": -261.4600524902344, |
|
"logps/rejected": -197.12661743164062, |
|
"loss": 0.5439, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -6.051290988922119, |
|
"rewards/margins": 5.911606311798096, |
|
"rewards/rejected": -11.962896347045898, |
|
"sft_loss": 1.0287508964538574, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5421686746987951, |
|
"grad_norm": 14.919840859492023, |
|
"learning_rate": 4.3601353835912235e-07, |
|
"logits/chosen": 17.14605712890625, |
|
"logits/rejected": 18.71445655822754, |
|
"logps/chosen": -240.4210968017578, |
|
"logps/rejected": -191.06373596191406, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.086501121520996, |
|
"rewards/margins": 5.632846355438232, |
|
"rewards/rejected": -11.719347953796387, |
|
"sft_loss": 0.9403523206710815, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5503833515881709, |
|
"grad_norm": 34.32717833778974, |
|
"learning_rate": 4.337248058444831e-07, |
|
"logits/chosen": 15.827594757080078, |
|
"logits/rejected": 16.74897575378418, |
|
"logps/chosen": -327.0185852050781, |
|
"logps/rejected": -250.9954376220703, |
|
"loss": 0.5323, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.161806106567383, |
|
"rewards/margins": 7.682919979095459, |
|
"rewards/rejected": -15.844725608825684, |
|
"sft_loss": 1.1408532857894897, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5585980284775466, |
|
"grad_norm": 13.092331667096667, |
|
"learning_rate": 4.3140209227244617e-07, |
|
"logits/chosen": 17.278669357299805, |
|
"logits/rejected": 18.425344467163086, |
|
"logps/chosen": -254.86746215820312, |
|
"logps/rejected": -201.89547729492188, |
|
"loss": 0.5321, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -6.691502094268799, |
|
"rewards/margins": 6.277873516082764, |
|
"rewards/rejected": -12.969375610351562, |
|
"sft_loss": 1.0744267702102661, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5668127053669222, |
|
"grad_norm": 12.663182255141733, |
|
"learning_rate": 4.2904582724348316e-07, |
|
"logits/chosen": 16.910207748413086, |
|
"logits/rejected": 17.029691696166992, |
|
"logps/chosen": -287.6109313964844, |
|
"logps/rejected": -202.47837829589844, |
|
"loss": 0.4913, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.874098777770996, |
|
"rewards/margins": 6.561362266540527, |
|
"rewards/rejected": -12.43545913696289, |
|
"sft_loss": 1.1810688972473145, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5750273822562979, |
|
"grad_norm": 18.101790487079715, |
|
"learning_rate": 4.266564465636182e-07, |
|
"logits/chosen": 17.891399383544922, |
|
"logits/rejected": 19.3447208404541, |
|
"logps/chosen": -306.7535705566406, |
|
"logps/rejected": -237.83753967285156, |
|
"loss": 0.482, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.050681114196777, |
|
"rewards/margins": 7.2094807624816895, |
|
"rewards/rejected": -14.260162353515625, |
|
"sft_loss": 0.964589536190033, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5832420591456736, |
|
"grad_norm": 9.764394722665243, |
|
"learning_rate": 4.242343921638234e-07, |
|
"logits/chosen": 17.71145248413086, |
|
"logits/rejected": 18.48440170288086, |
|
"logps/chosen": -317.6193542480469, |
|
"logps/rejected": -230.48606872558594, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.803144931793213, |
|
"rewards/margins": 7.825214862823486, |
|
"rewards/rejected": -14.6283597946167, |
|
"sft_loss": 1.0540062189102173, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5914567360350493, |
|
"grad_norm": 10.821777272670147, |
|
"learning_rate": 4.2178011201828044e-07, |
|
"logits/chosen": 17.3190975189209, |
|
"logits/rejected": 17.47244644165039, |
|
"logps/chosen": -288.40374755859375, |
|
"logps/rejected": -211.689453125, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.330109596252441, |
|
"rewards/margins": 6.8288254737854, |
|
"rewards/rejected": -13.158934593200684, |
|
"sft_loss": 1.0400768518447876, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5996714129244249, |
|
"grad_norm": 17.857419277834527, |
|
"learning_rate": 4.1929406006152546e-07, |
|
"logits/chosen": 18.516992568969727, |
|
"logits/rejected": 19.116985321044922, |
|
"logps/chosen": -281.31695556640625, |
|
"logps/rejected": -213.61634826660156, |
|
"loss": 0.5566, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -6.528250217437744, |
|
"rewards/margins": 6.995584487915039, |
|
"rewards/rejected": -13.523836135864258, |
|
"sft_loss": 1.0151982307434082, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6078860898138007, |
|
"grad_norm": 14.30267906956284, |
|
"learning_rate": 4.167766961044906e-07, |
|
"logits/chosen": 18.10727882385254, |
|
"logits/rejected": 18.658222198486328, |
|
"logps/chosen": -276.7471923828125, |
|
"logps/rejected": -210.30068969726562, |
|
"loss": 0.4918, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.930126667022705, |
|
"rewards/margins": 6.7833099365234375, |
|
"rewards/rejected": -12.7134370803833, |
|
"sft_loss": 0.8878603577613831, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6161007667031764, |
|
"grad_norm": 13.544123616662414, |
|
"learning_rate": 4.1422848574945923e-07, |
|
"logits/chosen": 18.04473876953125, |
|
"logits/rejected": 18.60536003112793, |
|
"logps/chosen": -297.9788513183594, |
|
"logps/rejected": -217.53721618652344, |
|
"loss": 0.486, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.85421085357666, |
|
"rewards/margins": 7.5605010986328125, |
|
"rewards/rejected": -13.414711952209473, |
|
"sft_loss": 1.005669355392456, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.624315443592552, |
|
"grad_norm": 21.50695855504068, |
|
"learning_rate": 4.1164990030394985e-07, |
|
"logits/chosen": 17.071107864379883, |
|
"logits/rejected": 18.0479679107666, |
|
"logps/chosen": -287.5808410644531, |
|
"logps/rejected": -229.66249084472656, |
|
"loss": 0.5873, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.32340145111084, |
|
"rewards/margins": 7.0957746505737305, |
|
"rewards/rejected": -14.419175148010254, |
|
"sft_loss": 0.9805389046669006, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6325301204819277, |
|
"grad_norm": 8.192201815991636, |
|
"learning_rate": 4.09041416693545e-07, |
|
"logits/chosen": 17.63469886779785, |
|
"logits/rejected": 18.505117416381836, |
|
"logps/chosen": -279.4613342285156, |
|
"logps/rejected": -218.5486297607422, |
|
"loss": 0.5224, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.260526657104492, |
|
"rewards/margins": 6.764527320861816, |
|
"rewards/rejected": -14.025053024291992, |
|
"sft_loss": 1.06680166721344, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6407447973713034, |
|
"grad_norm": 47.97635903653397, |
|
"learning_rate": 4.064035173736804e-07, |
|
"logits/chosen": 15.768574714660645, |
|
"logits/rejected": 16.24512481689453, |
|
"logps/chosen": -303.8434753417969, |
|
"logps/rejected": -227.7042999267578, |
|
"loss": 0.5142, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.973790168762207, |
|
"rewards/margins": 7.414584159851074, |
|
"rewards/rejected": -14.388375282287598, |
|
"sft_loss": 1.1620056629180908, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6489594742606791, |
|
"grad_norm": 22.56750049467428, |
|
"learning_rate": 4.0373669024041225e-07, |
|
"logits/chosen": 17.480152130126953, |
|
"logits/rejected": 19.36970329284668, |
|
"logps/chosen": -268.9180908203125, |
|
"logps/rejected": -223.1499786376953, |
|
"loss": 0.48, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.060788154602051, |
|
"rewards/margins": 7.270442485809326, |
|
"rewards/rejected": -14.331231117248535, |
|
"sft_loss": 1.0084587335586548, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6571741511500547, |
|
"grad_norm": 14.852542656702267, |
|
"learning_rate": 4.010414285401776e-07, |
|
"logits/chosen": 19.486713409423828, |
|
"logits/rejected": 19.6448917388916, |
|
"logps/chosen": -278.3014831542969, |
|
"logps/rejected": -204.4377899169922, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.484006404876709, |
|
"rewards/margins": 6.507460594177246, |
|
"rewards/rejected": -12.991467475891113, |
|
"sft_loss": 1.0000892877578735, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6653888280394304, |
|
"grad_norm": 10.619044380244409, |
|
"learning_rate": 3.9831823077856565e-07, |
|
"logits/chosen": 16.79458236694336, |
|
"logits/rejected": 17.91153907775879, |
|
"logps/chosen": -281.0224304199219, |
|
"logps/rejected": -210.1667022705078, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.140867710113525, |
|
"rewards/margins": 6.682094097137451, |
|
"rewards/rejected": -12.822961807250977, |
|
"sft_loss": 1.0717185735702515, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6736035049288062, |
|
"grad_norm": 47.72028643830778, |
|
"learning_rate": 3.95567600628115e-07, |
|
"logits/chosen": 17.3284912109375, |
|
"logits/rejected": 17.72430419921875, |
|
"logps/chosen": -275.4824523925781, |
|
"logps/rejected": -210.34494018554688, |
|
"loss": 0.4746, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.135570049285889, |
|
"rewards/margins": 6.734328746795654, |
|
"rewards/rejected": -12.86989974975586, |
|
"sft_loss": 0.9496582746505737, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 13.659449625348234, |
|
"learning_rate": 3.9279004683515783e-07, |
|
"logits/chosen": 17.051794052124023, |
|
"logits/rejected": 18.201574325561523, |
|
"logps/chosen": -283.5098876953125, |
|
"logps/rejected": -217.13720703125, |
|
"loss": 0.4834, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.5031657218933105, |
|
"rewards/margins": 6.981623649597168, |
|
"rewards/rejected": -13.484789848327637, |
|
"sft_loss": 1.008143663406372, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6900328587075575, |
|
"grad_norm": 10.688936022811054, |
|
"learning_rate": 3.8998608312572234e-07, |
|
"logits/chosen": 18.112707138061523, |
|
"logits/rejected": 18.169342041015625, |
|
"logps/chosen": -316.6014709472656, |
|
"logps/rejected": -224.16824340820312, |
|
"loss": 0.4278, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.8555474281311035, |
|
"rewards/margins": 7.1582255363464355, |
|
"rewards/rejected": -14.013773918151855, |
|
"sft_loss": 0.9130622148513794, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6982475355969332, |
|
"grad_norm": 13.038331606353893, |
|
"learning_rate": 3.8715622811051753e-07, |
|
"logits/chosen": 17.96015739440918, |
|
"logits/rejected": 18.90926742553711, |
|
"logps/chosen": -330.01348876953125, |
|
"logps/rejected": -245.21107482910156, |
|
"loss": 0.4744, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.965524196624756, |
|
"rewards/margins": 7.482056617736816, |
|
"rewards/rejected": -15.44758129119873, |
|
"sft_loss": 0.9873117208480835, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7064622124863089, |
|
"grad_norm": 14.058645020755897, |
|
"learning_rate": 3.843010051890114e-07, |
|
"logits/chosen": 16.319496154785156, |
|
"logits/rejected": 16.970029830932617, |
|
"logps/chosen": -317.0173645019531, |
|
"logps/rejected": -243.7187042236328, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -7.986619472503662, |
|
"rewards/margins": 7.993711471557617, |
|
"rewards/rejected": -15.980331420898438, |
|
"sft_loss": 1.082601547241211, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7146768893756845, |
|
"grad_norm": 19.82457118000122, |
|
"learning_rate": 3.8142094245262615e-07, |
|
"logits/chosen": 17.59951400756836, |
|
"logits/rejected": 17.434412002563477, |
|
"logps/chosen": -294.1492919921875, |
|
"logps/rejected": -218.65521240234375, |
|
"loss": 0.5787, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.534255504608154, |
|
"rewards/margins": 6.933282852172852, |
|
"rewards/rejected": -14.467540740966797, |
|
"sft_loss": 1.660515308380127, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 11.251537042250078, |
|
"learning_rate": 3.785165725870637e-07, |
|
"logits/chosen": 17.26852798461914, |
|
"logits/rejected": 17.4658203125, |
|
"logps/chosen": -318.1449279785156, |
|
"logps/rejected": -243.87478637695312, |
|
"loss": 0.4501, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.651638507843018, |
|
"rewards/margins": 7.704569339752197, |
|
"rewards/rejected": -15.356207847595215, |
|
"sft_loss": 1.0064337253570557, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.731106243154436, |
|
"grad_norm": 13.388962596712888, |
|
"learning_rate": 3.7558843277378203e-07, |
|
"logits/chosen": 17.070295333862305, |
|
"logits/rejected": 17.869474411010742, |
|
"logps/chosen": -280.3146057128906, |
|
"logps/rejected": -216.09710693359375, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.6985931396484375, |
|
"rewards/margins": 7.212075233459473, |
|
"rewards/rejected": -13.91066837310791, |
|
"sft_loss": 0.9864783883094788, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7393209200438116, |
|
"grad_norm": 14.813083085351893, |
|
"learning_rate": 3.726370645906407e-07, |
|
"logits/chosen": 16.521230697631836, |
|
"logits/rejected": 17.734365463256836, |
|
"logps/chosen": -294.2370300292969, |
|
"logps/rejected": -221.69178771972656, |
|
"loss": 0.4907, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.402077674865723, |
|
"rewards/margins": 6.9355292320251465, |
|
"rewards/rejected": -14.337605476379395, |
|
"sft_loss": 1.1839743852615356, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7475355969331873, |
|
"grad_norm": 12.059854940698536, |
|
"learning_rate": 3.6966301391173204e-07, |
|
"logits/chosen": 17.135530471801758, |
|
"logits/rejected": 19.162967681884766, |
|
"logps/chosen": -284.18438720703125, |
|
"logps/rejected": -233.11984252929688, |
|
"loss": 0.5102, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.429366111755371, |
|
"rewards/margins": 7.891510009765625, |
|
"rewards/rejected": -15.320878028869629, |
|
"sft_loss": 1.079641580581665, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.755750273822563, |
|
"grad_norm": 22.260550575758643, |
|
"learning_rate": 3.6666683080641843e-07, |
|
"logits/chosen": 15.536272048950195, |
|
"logits/rejected": 16.60968780517578, |
|
"logps/chosen": -310.630859375, |
|
"logps/rejected": -241.0422821044922, |
|
"loss": 0.4597, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.208867073059082, |
|
"rewards/margins": 7.248252868652344, |
|
"rewards/rejected": -15.457121849060059, |
|
"sft_loss": 1.0596128702163696, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7639649507119387, |
|
"grad_norm": 12.19610863222244, |
|
"learning_rate": 3.636490694375937e-07, |
|
"logits/chosen": 17.03879165649414, |
|
"logits/rejected": 17.748197555541992, |
|
"logps/chosen": -308.9512023925781, |
|
"logps/rejected": -236.08970642089844, |
|
"loss": 0.4273, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.292715549468994, |
|
"rewards/margins": 8.177362442016602, |
|
"rewards/rejected": -15.470076560974121, |
|
"sft_loss": 1.0068012475967407, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7721796276013143, |
|
"grad_norm": 13.22565269945024, |
|
"learning_rate": 3.6061028795918734e-07, |
|
"logits/chosen": 17.87092399597168, |
|
"logits/rejected": 18.572694778442383, |
|
"logps/chosen": -314.8690490722656, |
|
"logps/rejected": -240.42343139648438, |
|
"loss": 0.5971, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.011045455932617, |
|
"rewards/margins": 7.702009677886963, |
|
"rewards/rejected": -15.713056564331055, |
|
"sft_loss": 1.0346639156341553, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.78039430449069, |
|
"grad_norm": 23.36877627131626, |
|
"learning_rate": 3.5755104841292974e-07, |
|
"logits/chosen": 16.52726936340332, |
|
"logits/rejected": 18.124269485473633, |
|
"logps/chosen": -261.4451599121094, |
|
"logps/rejected": -216.3064727783203, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.079422950744629, |
|
"rewards/margins": 6.967349052429199, |
|
"rewards/rejected": -14.046771049499512, |
|
"sft_loss": 1.0945566892623901, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7886089813800657, |
|
"grad_norm": 12.346922738371601, |
|
"learning_rate": 3.544719166243998e-07, |
|
"logits/chosen": 17.161659240722656, |
|
"logits/rejected": 18.612253189086914, |
|
"logps/chosen": -295.6679992675781, |
|
"logps/rejected": -228.33984375, |
|
"loss": 0.4422, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.870236873626709, |
|
"rewards/margins": 7.495952129364014, |
|
"rewards/rejected": -14.36618709564209, |
|
"sft_loss": 0.9808112382888794, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7968236582694413, |
|
"grad_norm": 14.120403338012792, |
|
"learning_rate": 3.513734620983716e-07, |
|
"logits/chosen": 17.235340118408203, |
|
"logits/rejected": 18.787269592285156, |
|
"logps/chosen": -289.2434997558594, |
|
"logps/rejected": -240.0524444580078, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.261631965637207, |
|
"rewards/margins": 8.297459602355957, |
|
"rewards/rejected": -15.55909252166748, |
|
"sft_loss": 0.9492250084877014, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8050383351588171, |
|
"grad_norm": 14.978501832234636, |
|
"learning_rate": 3.482562579134809e-07, |
|
"logits/chosen": 15.85843276977539, |
|
"logits/rejected": 17.14594268798828, |
|
"logps/chosen": -256.8265380859375, |
|
"logps/rejected": -214.51412963867188, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.612859725952148, |
|
"rewards/margins": 6.866227626800537, |
|
"rewards/rejected": -14.479085922241211, |
|
"sft_loss": 1.0439454317092896, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8132530120481928, |
|
"grad_norm": 13.645938681155632, |
|
"learning_rate": 3.4512088061623073e-07, |
|
"logits/chosen": 17.91840171813965, |
|
"logits/rejected": 18.105796813964844, |
|
"logps/chosen": -344.9450378417969, |
|
"logps/rejected": -257.0929870605469, |
|
"loss": 0.434, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.392577171325684, |
|
"rewards/margins": 8.40063762664795, |
|
"rewards/rejected": -16.793216705322266, |
|
"sft_loss": 1.052524447441101, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8214676889375685, |
|
"grad_norm": 11.793731298003246, |
|
"learning_rate": 3.419679101143555e-07, |
|
"logits/chosen": 16.95572280883789, |
|
"logits/rejected": 18.109580993652344, |
|
"logps/chosen": -257.8283996582031, |
|
"logps/rejected": -217.70062255859375, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.232075214385986, |
|
"rewards/margins": 7.084912300109863, |
|
"rewards/rejected": -14.316986083984375, |
|
"sft_loss": 1.070483684539795, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8296823658269441, |
|
"grad_norm": 18.160358009772516, |
|
"learning_rate": 3.387979295695632e-07, |
|
"logits/chosen": 17.402151107788086, |
|
"logits/rejected": 17.819072723388672, |
|
"logps/chosen": -284.08599853515625, |
|
"logps/rejected": -228.4375, |
|
"loss": 0.4832, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -7.799540042877197, |
|
"rewards/margins": 7.30112886428833, |
|
"rewards/rejected": -15.100667953491211, |
|
"sft_loss": 1.0201059579849243, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8378970427163198, |
|
"grad_norm": 24.681797914609845, |
|
"learning_rate": 3.356115252896764e-07, |
|
"logits/chosen": 16.481372833251953, |
|
"logits/rejected": 17.393707275390625, |
|
"logps/chosen": -318.48956298828125, |
|
"logps/rejected": -238.67076110839844, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.8467302322387695, |
|
"rewards/margins": 7.6555867195129395, |
|
"rewards/rejected": -15.502315521240234, |
|
"sft_loss": 1.1412904262542725, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8461117196056955, |
|
"grad_norm": 11.780066809990752, |
|
"learning_rate": 3.3240928662019043e-07, |
|
"logits/chosen": 14.776932716369629, |
|
"logits/rejected": 16.346778869628906, |
|
"logps/chosen": -313.47589111328125, |
|
"logps/rejected": -242.91506958007812, |
|
"loss": 0.4196, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.860676288604736, |
|
"rewards/margins": 8.015517234802246, |
|
"rewards/rejected": -15.876194953918457, |
|
"sft_loss": 1.059720516204834, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8543263964950711, |
|
"grad_norm": 14.114725277489077, |
|
"learning_rate": 3.291918058352706e-07, |
|
"logits/chosen": 16.27129554748535, |
|
"logits/rejected": 17.153289794921875, |
|
"logps/chosen": -306.25506591796875, |
|
"logps/rejected": -249.3704071044922, |
|
"loss": 0.5092, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.551309585571289, |
|
"rewards/margins": 7.208839416503906, |
|
"rewards/rejected": -16.760149002075195, |
|
"sft_loss": 1.1138993501663208, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8625410733844469, |
|
"grad_norm": 27.760604608400726, |
|
"learning_rate": 3.259596780282074e-07, |
|
"logits/chosen": 18.246183395385742, |
|
"logits/rejected": 18.89859390258789, |
|
"logps/chosen": -346.7146301269531, |
|
"logps/rejected": -260.1651916503906, |
|
"loss": 0.4395, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.043220520019531, |
|
"rewards/margins": 8.856348991394043, |
|
"rewards/rejected": -16.899568557739258, |
|
"sft_loss": 1.1765520572662354, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8707557502738226, |
|
"grad_norm": 15.402410015157315, |
|
"learning_rate": 3.2271350100134975e-07, |
|
"logits/chosen": 17.567943572998047, |
|
"logits/rejected": 17.768869400024414, |
|
"logps/chosen": -298.6788024902344, |
|
"logps/rejected": -236.3932647705078, |
|
"loss": 0.4193, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.671787738800049, |
|
"rewards/margins": 7.834874629974365, |
|
"rewards/rejected": -15.506662368774414, |
|
"sft_loss": 1.071178913116455, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8789704271631983, |
|
"grad_norm": 18.947114003342495, |
|
"learning_rate": 3.1945387515553843e-07, |
|
"logits/chosen": 17.647369384765625, |
|
"logits/rejected": 18.73533821105957, |
|
"logps/chosen": -310.0240478515625, |
|
"logps/rejected": -251.67193603515625, |
|
"loss": 0.441, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.7946672439575195, |
|
"rewards/margins": 9.0897798538208, |
|
"rewards/rejected": -16.88444709777832, |
|
"sft_loss": 1.0311574935913086, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8871851040525739, |
|
"grad_norm": 11.041389503496823, |
|
"learning_rate": 3.1618140337905764e-07, |
|
"logits/chosen": 17.451311111450195, |
|
"logits/rejected": 18.353700637817383, |
|
"logps/chosen": -297.8014831542969, |
|
"logps/rejected": -240.24606323242188, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.106889724731445, |
|
"rewards/margins": 7.919802188873291, |
|
"rewards/rejected": -16.02669334411621, |
|
"sft_loss": 1.1384520530700684, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8953997809419496, |
|
"grad_norm": 9.858801498232785, |
|
"learning_rate": 3.128966909361271e-07, |
|
"logits/chosen": 16.695926666259766, |
|
"logits/rejected": 18.67499351501465, |
|
"logps/chosen": -320.1283874511719, |
|
"logps/rejected": -254.82162475585938, |
|
"loss": 0.3699, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -8.027070999145508, |
|
"rewards/margins": 8.428789138793945, |
|
"rewards/rejected": -16.455860137939453, |
|
"sft_loss": 1.0505129098892212, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 16.103503361054337, |
|
"learning_rate": 3.096003453549549e-07, |
|
"logits/chosen": 17.31558609008789, |
|
"logits/rejected": 17.725223541259766, |
|
"logps/chosen": -345.3844299316406, |
|
"logps/rejected": -261.2863464355469, |
|
"loss": 0.4497, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.239363670349121, |
|
"rewards/margins": 9.474674224853516, |
|
"rewards/rejected": -17.714040756225586, |
|
"sft_loss": 1.020671010017395, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.911829134720701, |
|
"grad_norm": 12.01821136380653, |
|
"learning_rate": 3.06292976315371e-07, |
|
"logits/chosen": 16.277523040771484, |
|
"logits/rejected": 17.34755516052246, |
|
"logps/chosen": -304.7778625488281, |
|
"logps/rejected": -241.48277282714844, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.762810230255127, |
|
"rewards/margins": 8.414238929748535, |
|
"rewards/rejected": -16.17704963684082, |
|
"sft_loss": 1.1222290992736816, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9200438116100766, |
|
"grad_norm": 11.41112495788056, |
|
"learning_rate": 3.0297519553606324e-07, |
|
"logits/chosen": 17.731529235839844, |
|
"logits/rejected": 18.088359832763672, |
|
"logps/chosen": -305.7876281738281, |
|
"logps/rejected": -246.57879638671875, |
|
"loss": 0.4401, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.948428630828857, |
|
"rewards/margins": 8.658875465393066, |
|
"rewards/rejected": -16.60730743408203, |
|
"sft_loss": 1.067797064781189, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9282584884994524, |
|
"grad_norm": 21.985722962679343, |
|
"learning_rate": 2.996476166614363e-07, |
|
"logits/chosen": 15.972024917602539, |
|
"logits/rejected": 16.38096809387207, |
|
"logps/chosen": -330.54388427734375, |
|
"logps/rejected": -267.4414367675781, |
|
"loss": 0.5027, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.933554649353027, |
|
"rewards/margins": 9.095858573913574, |
|
"rewards/rejected": -18.0294132232666, |
|
"sft_loss": 1.1063634157180786, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9364731653888281, |
|
"grad_norm": 10.308382930028264, |
|
"learning_rate": 2.963108551481142e-07, |
|
"logits/chosen": 17.77937889099121, |
|
"logits/rejected": 18.134130477905273, |
|
"logps/chosen": -339.63079833984375, |
|
"logps/rejected": -260.2466735839844, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.120518684387207, |
|
"rewards/margins": 9.045032501220703, |
|
"rewards/rejected": -17.165552139282227, |
|
"sft_loss": 1.072819471359253, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9446878422782037, |
|
"grad_norm": 15.634526225587425, |
|
"learning_rate": 2.929655281511075e-07, |
|
"logits/chosen": 16.544097900390625, |
|
"logits/rejected": 17.375316619873047, |
|
"logps/chosen": -319.2738037109375, |
|
"logps/rejected": -257.0357971191406, |
|
"loss": 0.4126, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.096101760864258, |
|
"rewards/margins": 8.786704063415527, |
|
"rewards/rejected": -16.8828067779541, |
|
"sft_loss": 1.0927081108093262, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9529025191675794, |
|
"grad_norm": 8.788361215925173, |
|
"learning_rate": 2.896122544096667e-07, |
|
"logits/chosen": 16.77577018737793, |
|
"logits/rejected": 17.813331604003906, |
|
"logps/chosen": -297.43548583984375, |
|
"logps/rejected": -240.00099182128906, |
|
"loss": 0.4592, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.802213191986084, |
|
"rewards/margins": 8.326114654541016, |
|
"rewards/rejected": -16.12833023071289, |
|
"sft_loss": 1.088619589805603, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9611171960569551, |
|
"grad_norm": 20.34248392425272, |
|
"learning_rate": 2.8625165413284307e-07, |
|
"logits/chosen": 16.004566192626953, |
|
"logits/rejected": 17.70891761779785, |
|
"logps/chosen": -328.6180725097656, |
|
"logps/rejected": -263.9577941894531, |
|
"loss": 0.5055, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.101932525634766, |
|
"rewards/margins": 9.129469871520996, |
|
"rewards/rejected": -17.23140525817871, |
|
"sft_loss": 1.0326135158538818, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9693318729463308, |
|
"grad_norm": 13.09030046415886, |
|
"learning_rate": 2.8288434888477626e-07, |
|
"logits/chosen": 18.028348922729492, |
|
"logits/rejected": 17.76748275756836, |
|
"logps/chosen": -287.28692626953125, |
|
"logps/rejected": -231.44729614257812, |
|
"loss": 0.3908, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.940645217895508, |
|
"rewards/margins": 7.91243839263916, |
|
"rewards/rejected": -15.853084564208984, |
|
"sft_loss": 1.0779129266738892, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9775465498357064, |
|
"grad_norm": 20.95262748964158, |
|
"learning_rate": 2.795109614697326e-07, |
|
"logits/chosen": 17.00741195678711, |
|
"logits/rejected": 18.209590911865234, |
|
"logps/chosen": -275.52880859375, |
|
"logps/rejected": -232.07052612304688, |
|
"loss": 0.4225, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -7.112081050872803, |
|
"rewards/margins": 8.281967163085938, |
|
"rewards/rejected": -15.394047737121582, |
|
"sft_loss": 1.0076452493667603, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.9857612267250822, |
|
"grad_norm": 13.158949539443237, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": 18.07162094116211, |
|
"logits/rejected": 19.637807846069336, |
|
"logps/chosen": -307.5865478515625, |
|
"logps/rejected": -249.9253387451172, |
|
"loss": 0.4339, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.806564807891846, |
|
"rewards/margins": 8.727023124694824, |
|
"rewards/rejected": -16.53359031677246, |
|
"sft_loss": 1.06932532787323, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9939759036144579, |
|
"grad_norm": 13.610109275739992, |
|
"learning_rate": 2.727484368650553e-07, |
|
"logits/chosen": 15.262972831726074, |
|
"logits/rejected": 16.486412048339844, |
|
"logps/chosen": -305.6347351074219, |
|
"logps/rejected": -252.50546264648438, |
|
"loss": 0.4625, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.769743919372559, |
|
"rewards/margins": 8.321878433227539, |
|
"rewards/rejected": -17.091623306274414, |
|
"sft_loss": 1.1903793811798096, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0021905805038336, |
|
"grad_norm": 9.988555947945434, |
|
"learning_rate": 2.6936055044684425e-07, |
|
"logits/chosen": 17.130857467651367, |
|
"logits/rejected": 17.868497848510742, |
|
"logps/chosen": -278.2147216796875, |
|
"logps/rejected": -229.0367889404297, |
|
"loss": 0.4205, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.578054428100586, |
|
"rewards/margins": 7.36480188369751, |
|
"rewards/rejected": -15.942855834960938, |
|
"sft_loss": 1.0933631658554077, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0104052573932092, |
|
"grad_norm": 11.824094414048218, |
|
"learning_rate": 2.659690831731631e-07, |
|
"logits/chosen": 17.553348541259766, |
|
"logits/rejected": 18.92648696899414, |
|
"logps/chosen": -317.8105163574219, |
|
"logps/rejected": -263.2023620605469, |
|
"loss": 0.3385, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.87080192565918, |
|
"rewards/margins": 9.268915176391602, |
|
"rewards/rejected": -18.13971710205078, |
|
"sft_loss": 1.0447877645492554, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.0186199342825848, |
|
"grad_norm": 15.737059861781074, |
|
"learning_rate": 2.6257466231719676e-07, |
|
"logits/chosen": 15.165780067443848, |
|
"logits/rejected": 16.453243255615234, |
|
"logps/chosen": -338.23773193359375, |
|
"logps/rejected": -283.7428283691406, |
|
"loss": 0.3123, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.231574058532715, |
|
"rewards/margins": 10.158638954162598, |
|
"rewards/rejected": -19.390214920043945, |
|
"sft_loss": 1.2299811840057373, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0268346111719606, |
|
"grad_norm": 11.900623330243908, |
|
"learning_rate": 2.591779156984137e-07, |
|
"logits/chosen": 16.764328002929688, |
|
"logits/rejected": 16.837923049926758, |
|
"logps/chosen": -322.6804504394531, |
|
"logps/rejected": -269.0111999511719, |
|
"loss": 0.3671, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -9.284835815429688, |
|
"rewards/margins": 9.551142692565918, |
|
"rewards/rejected": -18.83597755432129, |
|
"sft_loss": 1.0855733156204224, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0350492880613362, |
|
"grad_norm": 18.88025576733879, |
|
"learning_rate": 2.557794715664465e-07, |
|
"logits/chosen": 15.582106590270996, |
|
"logits/rejected": 16.574077606201172, |
|
"logps/chosen": -330.9181213378906, |
|
"logps/rejected": -281.83709716796875, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.849162101745605, |
|
"rewards/margins": 10.020377159118652, |
|
"rewards/rejected": -19.86954116821289, |
|
"sft_loss": 1.11058509349823, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0432639649507118, |
|
"grad_norm": 22.56812145625195, |
|
"learning_rate": 2.5237995848489417e-07, |
|
"logits/chosen": 16.257413864135742, |
|
"logits/rejected": 16.71412467956543, |
|
"logps/chosen": -332.62506103515625, |
|
"logps/rejected": -271.0566101074219, |
|
"loss": 0.4569, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.809398651123047, |
|
"rewards/margins": 10.076090812683105, |
|
"rewards/rejected": -18.88549041748047, |
|
"sft_loss": 1.1897673606872559, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.0514786418400877, |
|
"grad_norm": 10.647617140402389, |
|
"learning_rate": 2.48980005215064e-07, |
|
"logits/chosen": 16.611183166503906, |
|
"logits/rejected": 17.89920425415039, |
|
"logps/chosen": -271.6616516113281, |
|
"logps/rejected": -231.13978576660156, |
|
"loss": 0.4444, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.08376407623291, |
|
"rewards/margins": 8.19190502166748, |
|
"rewards/rejected": -16.27566909790039, |
|
"sft_loss": 1.3704915046691895, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0596933187294633, |
|
"grad_norm": 19.247491047471033, |
|
"learning_rate": 2.45580240599679e-07, |
|
"logits/chosen": 16.49073028564453, |
|
"logits/rejected": 17.990306854248047, |
|
"logps/chosen": -358.3551025390625, |
|
"logps/rejected": -288.8968505859375, |
|
"loss": 0.3691, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.500346183776855, |
|
"rewards/margins": 10.489169120788574, |
|
"rewards/rejected": -18.98951530456543, |
|
"sft_loss": 1.2408881187438965, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.067907995618839, |
|
"grad_norm": 13.44526599292449, |
|
"learning_rate": 2.421812934465696e-07, |
|
"logits/chosen": 17.065837860107422, |
|
"logits/rejected": 17.75263214111328, |
|
"logps/chosen": -308.9762878417969, |
|
"logps/rejected": -256.1690979003906, |
|
"loss": 0.3945, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.130703926086426, |
|
"rewards/margins": 9.368220329284668, |
|
"rewards/rejected": -17.49892234802246, |
|
"sft_loss": 1.1205145120620728, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0761226725082147, |
|
"grad_norm": 10.753673167776007, |
|
"learning_rate": 2.3878379241237134e-07, |
|
"logits/chosen": 16.457183837890625, |
|
"logits/rejected": 17.42021942138672, |
|
"logps/chosen": -312.5380554199219, |
|
"logps/rejected": -251.23977661132812, |
|
"loss": 0.3696, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.389382362365723, |
|
"rewards/margins": 8.924761772155762, |
|
"rewards/rejected": -17.314144134521484, |
|
"sft_loss": 1.2173506021499634, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.0843373493975903, |
|
"grad_norm": 23.82423804722956, |
|
"learning_rate": 2.3538836588625077e-07, |
|
"logits/chosen": 15.20209789276123, |
|
"logits/rejected": 15.774395942687988, |
|
"logps/chosen": -297.73260498046875, |
|
"logps/rejected": -246.4073944091797, |
|
"loss": 0.4032, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -8.610387802124023, |
|
"rewards/margins": 8.70635986328125, |
|
"rewards/rejected": -17.31674575805664, |
|
"sft_loss": 1.3788336515426636, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0925520262869661, |
|
"grad_norm": 7.166962184073318, |
|
"learning_rate": 2.3199564187368153e-07, |
|
"logits/chosen": 15.194981575012207, |
|
"logits/rejected": 17.136018753051758, |
|
"logps/chosen": -328.6063537597656, |
|
"logps/rejected": -288.6786804199219, |
|
"loss": 0.366, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.775790214538574, |
|
"rewards/margins": 10.185883522033691, |
|
"rewards/rejected": -19.9616756439209, |
|
"sft_loss": 1.1107780933380127, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.1007667031763417, |
|
"grad_norm": 13.216204703949911, |
|
"learning_rate": 2.2860624788029013e-07, |
|
"logits/chosen": 16.70530891418457, |
|
"logits/rejected": 17.76304817199707, |
|
"logps/chosen": -289.44476318359375, |
|
"logps/rejected": -245.6142120361328, |
|
"loss": 0.4321, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -8.357013702392578, |
|
"rewards/margins": 8.433321952819824, |
|
"rewards/rejected": -16.790334701538086, |
|
"sft_loss": 1.1908717155456543, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1089813800657173, |
|
"grad_norm": 26.032896310058877, |
|
"learning_rate": 2.2522081079579497e-07, |
|
"logits/chosen": 15.079482078552246, |
|
"logits/rejected": 16.43825340270996, |
|
"logps/chosen": -327.8377380371094, |
|
"logps/rejected": -283.44158935546875, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.565984725952148, |
|
"rewards/margins": 10.288603782653809, |
|
"rewards/rejected": -19.854589462280273, |
|
"sft_loss": 1.4105526208877563, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1171960569550932, |
|
"grad_norm": 7.35341298145847, |
|
"learning_rate": 2.2183995677805967e-07, |
|
"logits/chosen": 15.347798347473145, |
|
"logits/rejected": 16.887144088745117, |
|
"logps/chosen": -343.8727722167969, |
|
"logps/rejected": -289.7627258300781, |
|
"loss": 0.3343, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.85545539855957, |
|
"rewards/margins": 10.418365478515625, |
|
"rewards/rejected": -20.273822784423828, |
|
"sft_loss": 1.2016042470932007, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1254107338444688, |
|
"grad_norm": 13.095979555911432, |
|
"learning_rate": 2.1846431113728062e-07, |
|
"logits/chosen": 15.633400917053223, |
|
"logits/rejected": 17.45536994934082, |
|
"logps/chosen": -328.1496887207031, |
|
"logps/rejected": -281.7301025390625, |
|
"loss": 0.3718, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.103165626525879, |
|
"rewards/margins": 10.700647354125977, |
|
"rewards/rejected": -19.80381202697754, |
|
"sft_loss": 1.198488473892212, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.1336254107338444, |
|
"grad_norm": 17.038758672339643, |
|
"learning_rate": 2.1509449822033205e-07, |
|
"logits/chosen": 16.633058547973633, |
|
"logits/rejected": 17.105684280395508, |
|
"logps/chosen": -340.9743957519531, |
|
"logps/rejected": -273.4366455078125, |
|
"loss": 0.3328, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.657751083374023, |
|
"rewards/margins": 9.9077787399292, |
|
"rewards/rejected": -18.565532684326172, |
|
"sft_loss": 1.1622406244277954, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1418400876232202, |
|
"grad_norm": 13.181289081121232, |
|
"learning_rate": 2.1173114129528957e-07, |
|
"logits/chosen": 16.235170364379883, |
|
"logits/rejected": 17.971439361572266, |
|
"logps/chosen": -289.8466491699219, |
|
"logps/rejected": -249.1376495361328, |
|
"loss": 0.3625, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -8.312536239624023, |
|
"rewards/margins": 9.367281913757324, |
|
"rewards/rejected": -17.679819107055664, |
|
"sft_loss": 1.2810382843017578, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.1500547645125958, |
|
"grad_norm": 13.226133090678903, |
|
"learning_rate": 2.0837486243615226e-07, |
|
"logits/chosen": 16.742103576660156, |
|
"logits/rejected": 17.46257781982422, |
|
"logps/chosen": -364.11041259765625, |
|
"logps/rejected": -300.90618896484375, |
|
"loss": 0.3981, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.691442489624023, |
|
"rewards/margins": 11.124072074890137, |
|
"rewards/rejected": -20.81551742553711, |
|
"sft_loss": 1.0426690578460693, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1582694414019716, |
|
"grad_norm": 16.747134822775763, |
|
"learning_rate": 2.0502628240778653e-07, |
|
"logits/chosen": 17.3011474609375, |
|
"logits/rejected": 19.28099822998047, |
|
"logps/chosen": -329.4310607910156, |
|
"logps/rejected": -291.73443603515625, |
|
"loss": 0.3664, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.230022430419922, |
|
"rewards/margins": 11.087077140808105, |
|
"rewards/rejected": -20.31709861755371, |
|
"sft_loss": 1.0452929735183716, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.1664841182913472, |
|
"grad_norm": 11.712195080946406, |
|
"learning_rate": 2.0168602055111173e-07, |
|
"logits/chosen": 16.063915252685547, |
|
"logits/rejected": 17.033220291137695, |
|
"logps/chosen": -324.21099853515625, |
|
"logps/rejected": -281.9880065917969, |
|
"loss": 0.3326, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.243894577026367, |
|
"rewards/margins": 10.908008575439453, |
|
"rewards/rejected": -20.15190315246582, |
|
"sft_loss": 1.1959102153778076, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1746987951807228, |
|
"grad_norm": 19.52291295321317, |
|
"learning_rate": 1.9835469466854887e-07, |
|
"logits/chosen": 14.572199821472168, |
|
"logits/rejected": 16.15847396850586, |
|
"logps/chosen": -322.0695495605469, |
|
"logps/rejected": -283.8585205078125, |
|
"loss": 0.3275, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.329268455505371, |
|
"rewards/margins": 10.466280937194824, |
|
"rewards/rejected": -19.795551300048828, |
|
"sft_loss": 1.1618155241012573, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.1829134720700987, |
|
"grad_norm": 14.04137372253548, |
|
"learning_rate": 1.9503292090975454e-07, |
|
"logits/chosen": 16.88302993774414, |
|
"logits/rejected": 17.57504653930664, |
|
"logps/chosen": -292.8112487792969, |
|
"logps/rejected": -249.99221801757812, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.16586685180664, |
|
"rewards/margins": 9.12321662902832, |
|
"rewards/rejected": -18.28908348083496, |
|
"sft_loss": 1.2042182683944702, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1911281489594743, |
|
"grad_norm": 12.34681171872866, |
|
"learning_rate": 1.917213136576602e-07, |
|
"logits/chosen": 16.656551361083984, |
|
"logits/rejected": 17.51203155517578, |
|
"logps/chosen": -327.6507568359375, |
|
"logps/rejected": -284.38262939453125, |
|
"loss": 0.3207, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.919057846069336, |
|
"rewards/margins": 10.376687049865723, |
|
"rewards/rejected": -20.295743942260742, |
|
"sft_loss": 1.18035089969635, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1993428258488499, |
|
"grad_norm": 10.050794300712155, |
|
"learning_rate": 1.8842048541483756e-07, |
|
"logits/chosen": 18.090221405029297, |
|
"logits/rejected": 18.187620162963867, |
|
"logps/chosen": -322.1310119628906, |
|
"logps/rejected": -253.3239288330078, |
|
"loss": 0.3945, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.852034568786621, |
|
"rewards/margins": 9.027352333068848, |
|
"rewards/rejected": -17.879384994506836, |
|
"sft_loss": 1.199164628982544, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2075575027382257, |
|
"grad_norm": 11.698463225887238, |
|
"learning_rate": 1.8513104669021314e-07, |
|
"logits/chosen": 15.768450736999512, |
|
"logits/rejected": 17.4649715423584, |
|
"logps/chosen": -315.5854797363281, |
|
"logps/rejected": -270.3199462890625, |
|
"loss": 0.3727, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.129364967346191, |
|
"rewards/margins": 9.496920585632324, |
|
"rewards/rejected": -18.626283645629883, |
|
"sft_loss": 1.1171187162399292, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.2157721796276013, |
|
"grad_norm": 15.670433550127342, |
|
"learning_rate": 1.8185360588615057e-07, |
|
"logits/chosen": 17.373594284057617, |
|
"logits/rejected": 18.17388916015625, |
|
"logps/chosen": -349.6602478027344, |
|
"logps/rejected": -286.2644958496094, |
|
"loss": 0.3583, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.793112754821777, |
|
"rewards/margins": 10.82769775390625, |
|
"rewards/rejected": -19.620811462402344, |
|
"sft_loss": 1.1327273845672607, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.223986856516977, |
|
"grad_norm": 17.513419996090132, |
|
"learning_rate": 1.7858876918592232e-07, |
|
"logits/chosen": 15.862748146057129, |
|
"logits/rejected": 17.21187400817871, |
|
"logps/chosen": -301.255859375, |
|
"logps/rejected": -256.63555908203125, |
|
"loss": 0.3533, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.769881248474121, |
|
"rewards/margins": 9.70648193359375, |
|
"rewards/rejected": -18.476362228393555, |
|
"sft_loss": 1.1204417943954468, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.2322015334063527, |
|
"grad_norm": 19.125724690968823, |
|
"learning_rate": 1.7533714044159299e-07, |
|
"logits/chosen": 15.58492374420166, |
|
"logits/rejected": 16.52800941467285, |
|
"logps/chosen": -298.8733215332031, |
|
"logps/rejected": -268.4566650390625, |
|
"loss": 0.4265, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.785615921020508, |
|
"rewards/margins": 9.117348670959473, |
|
"rewards/rejected": -18.902963638305664, |
|
"sft_loss": 1.6064594984054565, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2404162102957283, |
|
"grad_norm": 17.968784609019274, |
|
"learning_rate": 1.7209932106233264e-07, |
|
"logits/chosen": 15.145374298095703, |
|
"logits/rejected": 17.433292388916016, |
|
"logps/chosen": -342.9417724609375, |
|
"logps/rejected": -296.39654541015625, |
|
"loss": 0.3766, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.673720359802246, |
|
"rewards/margins": 10.815841674804688, |
|
"rewards/rejected": -20.489561080932617, |
|
"sft_loss": 1.145885944366455, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.248630887185104, |
|
"grad_norm": 13.684786311914898, |
|
"learning_rate": 1.688759099031824e-07, |
|
"logits/chosen": 15.70371150970459, |
|
"logits/rejected": 16.69938087463379, |
|
"logps/chosen": -361.2178955078125, |
|
"logps/rejected": -309.79150390625, |
|
"loss": 0.3508, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -10.506484031677246, |
|
"rewards/margins": 11.59350872039795, |
|
"rewards/rejected": -22.099994659423828, |
|
"sft_loss": 1.1850322484970093, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2568455640744798, |
|
"grad_norm": 14.244960468313039, |
|
"learning_rate": 1.656675031542925e-07, |
|
"logits/chosen": 17.195899963378906, |
|
"logits/rejected": 18.426219940185547, |
|
"logps/chosen": -363.3425598144531, |
|
"logps/rejected": -301.96063232421875, |
|
"loss": 0.3397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.729473114013672, |
|
"rewards/margins": 11.475974082946777, |
|
"rewards/rejected": -21.205448150634766, |
|
"sft_loss": 1.1794105768203735, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.2650602409638554, |
|
"grad_norm": 8.622276211404598, |
|
"learning_rate": 1.6247469423065343e-07, |
|
"logits/chosen": 16.508113861083984, |
|
"logits/rejected": 17.097890853881836, |
|
"logps/chosen": -305.1572570800781, |
|
"logps/rejected": -249.677001953125, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.539449691772461, |
|
"rewards/margins": 8.976499557495117, |
|
"rewards/rejected": -17.515949249267578, |
|
"sft_loss": 1.196576714515686, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.273274917853231, |
|
"grad_norm": 12.358403358119775, |
|
"learning_rate": 1.5929807366233977e-07, |
|
"logits/chosen": 16.241657257080078, |
|
"logits/rejected": 17.03815269470215, |
|
"logps/chosen": -369.39556884765625, |
|
"logps/rejected": -303.64337158203125, |
|
"loss": 0.3163, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.143651008605957, |
|
"rewards/margins": 11.741597175598145, |
|
"rewards/rejected": -20.885250091552734, |
|
"sft_loss": 1.1366469860076904, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.2814895947426068, |
|
"grad_norm": 16.14061914284979, |
|
"learning_rate": 1.5613822898528794e-07, |
|
"logits/chosen": 16.795856475830078, |
|
"logits/rejected": 17.53175163269043, |
|
"logps/chosen": -345.46929931640625, |
|
"logps/rejected": -292.4604187011719, |
|
"loss": 0.3369, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.835062026977539, |
|
"rewards/margins": 11.170233726501465, |
|
"rewards/rejected": -21.005298614501953, |
|
"sft_loss": 1.3101640939712524, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2897042716319824, |
|
"grad_norm": 12.538981244658086, |
|
"learning_rate": 1.5299574463262794e-07, |
|
"logits/chosen": 15.523879051208496, |
|
"logits/rejected": 16.796798706054688, |
|
"logps/chosen": -377.0471496582031, |
|
"logps/rejected": -319.5939025878906, |
|
"loss": 0.4028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.438889503479004, |
|
"rewards/margins": 12.12649917602539, |
|
"rewards/rejected": -22.565387725830078, |
|
"sft_loss": 1.1697484254837036, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.297918948521358, |
|
"grad_norm": 13.959611183566771, |
|
"learning_rate": 1.4987120182658877e-07, |
|
"logits/chosen": 15.972567558288574, |
|
"logits/rejected": 18.35633659362793, |
|
"logps/chosen": -330.76104736328125, |
|
"logps/rejected": -282.9498291015625, |
|
"loss": 0.3757, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.672747611999512, |
|
"rewards/margins": 10.330121994018555, |
|
"rewards/rejected": -20.002866744995117, |
|
"sft_loss": 1.1246702671051025, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3061336254107339, |
|
"grad_norm": 12.65020419545928, |
|
"learning_rate": 1.4676517847099745e-07, |
|
"logits/chosen": 16.62309455871582, |
|
"logits/rejected": 17.682994842529297, |
|
"logps/chosen": -309.1587829589844, |
|
"logps/rejected": -255.12290954589844, |
|
"loss": 0.3603, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.413141250610352, |
|
"rewards/margins": 9.33283805847168, |
|
"rewards/rejected": -17.7459774017334, |
|
"sft_loss": 1.1139575242996216, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.3143483023001095, |
|
"grad_norm": 11.10720994563204, |
|
"learning_rate": 1.4367824904439242e-07, |
|
"logits/chosen": 17.087141036987305, |
|
"logits/rejected": 17.25540542602539, |
|
"logps/chosen": -336.4616394042969, |
|
"logps/rejected": -273.6061096191406, |
|
"loss": 0.371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.36069393157959, |
|
"rewards/margins": 10.413783073425293, |
|
"rewards/rejected": -18.774477005004883, |
|
"sft_loss": 1.0689451694488525, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3225629791894853, |
|
"grad_norm": 12.783247596774917, |
|
"learning_rate": 1.4061098449376985e-07, |
|
"logits/chosen": 15.60853099822998, |
|
"logits/rejected": 17.57704734802246, |
|
"logps/chosen": -362.2177734375, |
|
"logps/rejected": -308.759765625, |
|
"loss": 0.3288, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.05817699432373, |
|
"rewards/margins": 11.833967208862305, |
|
"rewards/rejected": -20.89214324951172, |
|
"sft_loss": 1.2039010524749756, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.330777656078861, |
|
"grad_norm": 8.359077848319595, |
|
"learning_rate": 1.375639521289836e-07, |
|
"logits/chosen": 15.683825492858887, |
|
"logits/rejected": 16.602642059326172, |
|
"logps/chosen": -332.6221008300781, |
|
"logps/rejected": -278.2598571777344, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.19861888885498, |
|
"rewards/margins": 10.37482738494873, |
|
"rewards/rejected": -19.57344627380371, |
|
"sft_loss": 1.17559015750885, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3389923329682367, |
|
"grad_norm": 13.496245877040751, |
|
"learning_rate": 1.3453771551781756e-07, |
|
"logits/chosen": 16.44358253479004, |
|
"logits/rejected": 17.437644958496094, |
|
"logps/chosen": -307.6462707519531, |
|
"logps/rejected": -271.81683349609375, |
|
"loss": 0.3318, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.48726749420166, |
|
"rewards/margins": 10.40507698059082, |
|
"rewards/rejected": -18.892345428466797, |
|
"sft_loss": 1.1855844259262085, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.3472070098576123, |
|
"grad_norm": 14.433148985359804, |
|
"learning_rate": 1.3153283438175034e-07, |
|
"logits/chosen": 15.872283935546875, |
|
"logits/rejected": 16.650604248046875, |
|
"logps/chosen": -324.4306945800781, |
|
"logps/rejected": -276.83929443359375, |
|
"loss": 0.3743, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.549914360046387, |
|
"rewards/margins": 10.544774055480957, |
|
"rewards/rejected": -20.094688415527344, |
|
"sft_loss": 1.1671338081359863, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.355421686746988, |
|
"grad_norm": 22.22401225520154, |
|
"learning_rate": 1.2854986449243124e-07, |
|
"logits/chosen": 16.34712028503418, |
|
"logits/rejected": 16.94756317138672, |
|
"logps/chosen": -331.7503662109375, |
|
"logps/rejected": -286.41705322265625, |
|
"loss": 0.3285, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.810836791992188, |
|
"rewards/margins": 10.913623809814453, |
|
"rewards/rejected": -20.724462509155273, |
|
"sft_loss": 1.0781916379928589, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 11.973621147714551, |
|
"learning_rate": 1.2558935756888675e-07, |
|
"logits/chosen": 15.828746795654297, |
|
"logits/rejected": 16.91975212097168, |
|
"logps/chosen": -322.3880310058594, |
|
"logps/rejected": -279.2362365722656, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.630596160888672, |
|
"rewards/margins": 10.590496063232422, |
|
"rewards/rejected": -20.221094131469727, |
|
"sft_loss": 1.1420843601226807, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3718510405257394, |
|
"grad_norm": 12.897955971332296, |
|
"learning_rate": 1.226518611754767e-07, |
|
"logits/chosen": 17.223234176635742, |
|
"logits/rejected": 18.44441795349121, |
|
"logps/chosen": -314.6831970214844, |
|
"logps/rejected": -273.42083740234375, |
|
"loss": 0.3494, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.791934967041016, |
|
"rewards/margins": 10.404925346374512, |
|
"rewards/rejected": -19.196863174438477, |
|
"sft_loss": 1.11257803440094, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.380065717415115, |
|
"grad_norm": 14.822041663775748, |
|
"learning_rate": 1.1973791862061871e-07, |
|
"logits/chosen": 15.981986045837402, |
|
"logits/rejected": 16.508832931518555, |
|
"logps/chosen": -357.4217529296875, |
|
"logps/rejected": -279.4723815917969, |
|
"loss": 0.4071, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -8.533044815063477, |
|
"rewards/margins": 10.845856666564941, |
|
"rewards/rejected": -19.378902435302734, |
|
"sft_loss": 1.071024775505066, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3882803943044908, |
|
"grad_norm": 12.166531109745293, |
|
"learning_rate": 1.1684806885630003e-07, |
|
"logits/chosen": 17.19085693359375, |
|
"logits/rejected": 18.22423553466797, |
|
"logps/chosen": -336.6310729980469, |
|
"logps/rejected": -288.2579040527344, |
|
"loss": 0.3543, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.727254867553711, |
|
"rewards/margins": 11.067011833190918, |
|
"rewards/rejected": -19.794267654418945, |
|
"sft_loss": 1.0941708087921143, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.3964950711938664, |
|
"grad_norm": 19.61480809183216, |
|
"learning_rate": 1.1398284637839486e-07, |
|
"logits/chosen": 17.393543243408203, |
|
"logits/rejected": 17.97818946838379, |
|
"logps/chosen": -290.88043212890625, |
|
"logps/rejected": -248.78334045410156, |
|
"loss": 0.3532, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.74341869354248, |
|
"rewards/margins": 8.96402359008789, |
|
"rewards/rejected": -17.707439422607422, |
|
"sft_loss": 1.3463881015777588, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.404709748083242, |
|
"grad_norm": 13.04687226615894, |
|
"learning_rate": 1.1114278112780601e-07, |
|
"logits/chosen": 16.697458267211914, |
|
"logits/rejected": 17.817760467529297, |
|
"logps/chosen": -376.94256591796875, |
|
"logps/rejected": -319.7321472167969, |
|
"loss": 0.308, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -10.05405044555664, |
|
"rewards/margins": 12.681156158447266, |
|
"rewards/rejected": -22.735204696655273, |
|
"sft_loss": 1.1224801540374756, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.4129244249726178, |
|
"grad_norm": 13.443707852848624, |
|
"learning_rate": 1.08328398392449e-07, |
|
"logits/chosen": 17.408639907836914, |
|
"logits/rejected": 17.620332717895508, |
|
"logps/chosen": -365.28131103515625, |
|
"logps/rejected": -308.3528137207031, |
|
"loss": 0.3755, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.822293281555176, |
|
"rewards/margins": 11.438949584960938, |
|
"rewards/rejected": -22.261241912841797, |
|
"sft_loss": 1.178871750831604, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4211391018619934, |
|
"grad_norm": 21.58859732751979, |
|
"learning_rate": 1.0554021871009677e-07, |
|
"logits/chosen": 16.947927474975586, |
|
"logits/rejected": 17.420812606811523, |
|
"logps/chosen": -340.0753479003906, |
|
"logps/rejected": -297.9937438964844, |
|
"loss": 0.3588, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.554227828979492, |
|
"rewards/margins": 12.149679183959961, |
|
"rewards/rejected": -21.70391082763672, |
|
"sft_loss": 1.3246734142303467, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.429353778751369, |
|
"grad_norm": 13.8734601142875, |
|
"learning_rate": 1.0277875777210299e-07, |
|
"logits/chosen": 14.887709617614746, |
|
"logits/rejected": 15.843902587890625, |
|
"logps/chosen": -324.3350830078125, |
|
"logps/rejected": -275.6741943359375, |
|
"loss": 0.3712, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.113089561462402, |
|
"rewards/margins": 10.9337158203125, |
|
"rewards/rejected": -20.046804428100586, |
|
"sft_loss": 1.2739402055740356, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4375684556407449, |
|
"grad_norm": 13.714339626163223, |
|
"learning_rate": 1.0004452632802158e-07, |
|
"logits/chosen": 17.476552963256836, |
|
"logits/rejected": 17.923315048217773, |
|
"logps/chosen": -338.1813049316406, |
|
"logps/rejected": -277.5501403808594, |
|
"loss": 0.3129, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.576931953430176, |
|
"rewards/margins": 10.827016830444336, |
|
"rewards/rejected": -19.403947830200195, |
|
"sft_loss": 1.1658498048782349, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.4457831325301205, |
|
"grad_norm": 13.805309365020234, |
|
"learning_rate": 9.733803009114044e-08, |
|
"logits/chosen": 16.891300201416016, |
|
"logits/rejected": 17.32049560546875, |
|
"logps/chosen": -322.0257263183594, |
|
"logps/rejected": -274.27691650390625, |
|
"loss": 0.316, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.144042015075684, |
|
"rewards/margins": 10.516057014465332, |
|
"rewards/rejected": -18.660099029541016, |
|
"sft_loss": 1.110759973526001, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.453997809419496, |
|
"grad_norm": 29.77032111690104, |
|
"learning_rate": 9.465976964494682e-08, |
|
"logits/chosen": 16.620283126831055, |
|
"logits/rejected": 17.72939682006836, |
|
"logps/chosen": -300.1767578125, |
|
"logps/rejected": -261.1438903808594, |
|
"loss": 0.361, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.963627815246582, |
|
"rewards/margins": 9.891008377075195, |
|
"rewards/rejected": -18.854639053344727, |
|
"sft_loss": 1.2920080423355103, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.462212486308872, |
|
"grad_norm": 9.782780560332286, |
|
"learning_rate": 9.201024035054053e-08, |
|
"logits/chosen": 17.15985107421875, |
|
"logits/rejected": 17.535512924194336, |
|
"logps/chosen": -286.6101379394531, |
|
"logps/rejected": -247.57127380371094, |
|
"loss": 0.3835, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.331561088562012, |
|
"rewards/margins": 9.115301132202148, |
|
"rewards/rejected": -18.446863174438477, |
|
"sft_loss": 1.3567354679107666, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4704271631982475, |
|
"grad_norm": 8.768405187815805, |
|
"learning_rate": 8.938993225501495e-08, |
|
"logits/chosen": 17.89764976501465, |
|
"logits/rejected": 18.452497482299805, |
|
"logps/chosen": -351.6549987792969, |
|
"logps/rejected": -302.9189453125, |
|
"loss": 0.3592, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.618634223937988, |
|
"rewards/margins": 11.819962501525879, |
|
"rewards/rejected": -21.4385986328125, |
|
"sft_loss": 1.0768134593963623, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.4786418400876231, |
|
"grad_norm": 21.82788195022886, |
|
"learning_rate": 8.679933000081879e-08, |
|
"logits/chosen": 15.745450019836426, |
|
"logits/rejected": 17.15949249267578, |
|
"logps/chosen": -307.5598449707031, |
|
"logps/rejected": -271.531494140625, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.928607940673828, |
|
"rewards/margins": 10.275431632995605, |
|
"rewards/rejected": -19.204038619995117, |
|
"sft_loss": 1.1987248659133911, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.486856516976999, |
|
"grad_norm": 12.077209434939249, |
|
"learning_rate": 8.423891273611855e-08, |
|
"logits/chosen": 16.016569137573242, |
|
"logits/rejected": 16.249284744262695, |
|
"logps/chosen": -311.76934814453125, |
|
"logps/rejected": -261.8121643066406, |
|
"loss": 0.3799, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.412449836730957, |
|
"rewards/margins": 10.322086334228516, |
|
"rewards/rejected": -18.734539031982422, |
|
"sft_loss": 1.2180228233337402, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.4950711938663745, |
|
"grad_norm": 20.15671717033895, |
|
"learning_rate": 8.170915402617739e-08, |
|
"logits/chosen": 15.889266014099121, |
|
"logits/rejected": 17.218164443969727, |
|
"logps/chosen": -335.0419921875, |
|
"logps/rejected": -293.2705078125, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.390657424926758, |
|
"rewards/margins": 11.274243354797363, |
|
"rewards/rejected": -20.664899826049805, |
|
"sft_loss": 1.1832726001739502, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5032858707557502, |
|
"grad_norm": 11.069682914043863, |
|
"learning_rate": 7.921052176576643e-08, |
|
"logits/chosen": 17.052453994750977, |
|
"logits/rejected": 17.67256736755371, |
|
"logps/chosen": -305.6400146484375, |
|
"logps/rejected": -266.4335632324219, |
|
"loss": 0.3165, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.719382286071777, |
|
"rewards/margins": 10.110651016235352, |
|
"rewards/rejected": -18.830034255981445, |
|
"sft_loss": 1.0706188678741455, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.511500547645126, |
|
"grad_norm": 27.258481926608287, |
|
"learning_rate": 7.674347809262377e-08, |
|
"logits/chosen": 16.615238189697266, |
|
"logits/rejected": 17.932260513305664, |
|
"logps/chosen": -288.8174743652344, |
|
"logps/rejected": -250.63177490234375, |
|
"loss": 0.3758, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -7.821852684020996, |
|
"rewards/margins": 9.426899909973145, |
|
"rewards/rejected": -17.24875259399414, |
|
"sft_loss": 1.12588369846344, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5197152245345018, |
|
"grad_norm": 8.415837096456798, |
|
"learning_rate": 7.430847930198009e-08, |
|
"logits/chosen": 16.921852111816406, |
|
"logits/rejected": 17.39198875427246, |
|
"logps/chosen": -329.8725891113281, |
|
"logps/rejected": -274.1763000488281, |
|
"loss": 0.3708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.946272850036621, |
|
"rewards/margins": 11.179486274719238, |
|
"rewards/rejected": -19.12575912475586, |
|
"sft_loss": 1.286713719367981, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.5279299014238772, |
|
"grad_norm": 7.8387923698583295, |
|
"learning_rate": 7.190597576216384e-08, |
|
"logits/chosen": 15.69840145111084, |
|
"logits/rejected": 17.983213424682617, |
|
"logps/chosen": -329.1253967285156, |
|
"logps/rejected": -290.71051025390625, |
|
"loss": 0.3144, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.661620140075684, |
|
"rewards/margins": 11.160269737243652, |
|
"rewards/rejected": -19.821889877319336, |
|
"sft_loss": 1.1312789916992188, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.536144578313253, |
|
"grad_norm": 14.005325625629936, |
|
"learning_rate": 6.953641183130224e-08, |
|
"logits/chosen": 16.529827117919922, |
|
"logits/rejected": 16.534809112548828, |
|
"logps/chosen": -333.02813720703125, |
|
"logps/rejected": -275.6182556152344, |
|
"loss": 0.3675, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.715841293334961, |
|
"rewards/margins": 9.975454330444336, |
|
"rewards/rejected": -19.691295623779297, |
|
"sft_loss": 1.2341707944869995, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.5443592552026288, |
|
"grad_norm": 11.238181780972436, |
|
"learning_rate": 6.720022577513507e-08, |
|
"logits/chosen": 15.408208847045898, |
|
"logits/rejected": 16.01373291015625, |
|
"logps/chosen": -350.6366882324219, |
|
"logps/rejected": -291.2669677734375, |
|
"loss": 0.3381, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.044300079345703, |
|
"rewards/margins": 10.780247688293457, |
|
"rewards/rejected": -20.82455062866211, |
|
"sft_loss": 1.26254141330719, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5525739320920042, |
|
"grad_norm": 11.413642178268471, |
|
"learning_rate": 6.489784968595444e-08, |
|
"logits/chosen": 15.467609405517578, |
|
"logits/rejected": 16.952180862426758, |
|
"logps/chosen": -346.5306091308594, |
|
"logps/rejected": -312.6312561035156, |
|
"loss": 0.3402, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.993128776550293, |
|
"rewards/margins": 12.808844566345215, |
|
"rewards/rejected": -22.801973342895508, |
|
"sft_loss": 1.1826088428497314, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.56078860898138, |
|
"grad_norm": 22.79199458890795, |
|
"learning_rate": 6.262970940268652e-08, |
|
"logits/chosen": 16.051044464111328, |
|
"logits/rejected": 17.10271453857422, |
|
"logps/chosen": -313.6996765136719, |
|
"logps/rejected": -278.2881774902344, |
|
"loss": 0.333, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.396943092346191, |
|
"rewards/margins": 10.23829460144043, |
|
"rewards/rejected": -19.635236740112305, |
|
"sft_loss": 1.1279245615005493, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5690032858707559, |
|
"grad_norm": 11.668850401054987, |
|
"learning_rate": 6.039622443213008e-08, |
|
"logits/chosen": 16.13634490966797, |
|
"logits/rejected": 17.919300079345703, |
|
"logps/chosen": -325.7288513183594, |
|
"logps/rejected": -289.1236267089844, |
|
"loss": 0.3346, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.695369720458984, |
|
"rewards/margins": 11.087947845458984, |
|
"rewards/rejected": -20.78331756591797, |
|
"sft_loss": 1.1951278448104858, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.5772179627601315, |
|
"grad_norm": 13.415709297062323, |
|
"learning_rate": 5.8197807871366e-08, |
|
"logits/chosen": 15.244779586791992, |
|
"logits/rejected": 16.526262283325195, |
|
"logps/chosen": -370.6669616699219, |
|
"logps/rejected": -322.87847900390625, |
|
"loss": 0.3428, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.789223670959473, |
|
"rewards/margins": 12.587509155273438, |
|
"rewards/rejected": -22.376733779907227, |
|
"sft_loss": 2.1045873165130615, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.585432639649507, |
|
"grad_norm": 13.58873079620651, |
|
"learning_rate": 5.6034866331352376e-08, |
|
"logits/chosen": 15.409506797790527, |
|
"logits/rejected": 16.128753662109375, |
|
"logps/chosen": -322.9807434082031, |
|
"logps/rejected": -271.06378173828125, |
|
"loss": 0.347, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.693291664123535, |
|
"rewards/margins": 10.159637451171875, |
|
"rewards/rejected": -19.852930068969727, |
|
"sft_loss": 1.1238617897033691, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.593647316538883, |
|
"grad_norm": 16.504268173121613, |
|
"learning_rate": 5.390779986171934e-08, |
|
"logits/chosen": 15.72015380859375, |
|
"logits/rejected": 17.518657684326172, |
|
"logps/chosen": -337.39349365234375, |
|
"logps/rejected": -302.06109619140625, |
|
"loss": 0.3214, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.937000274658203, |
|
"rewards/margins": 11.78524112701416, |
|
"rewards/rejected": -20.72224235534668, |
|
"sft_loss": 1.129492998123169, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6018619934282585, |
|
"grad_norm": 14.941336561605484, |
|
"learning_rate": 5.1817001876777314e-08, |
|
"logits/chosen": 15.710195541381836, |
|
"logits/rejected": 16.9680233001709, |
|
"logps/chosen": -324.51251220703125, |
|
"logps/rejected": -286.7372741699219, |
|
"loss": 0.3363, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.67973518371582, |
|
"rewards/margins": 11.069150924682617, |
|
"rewards/rejected": -19.74888801574707, |
|
"sft_loss": 1.168811559677124, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.6100766703176341, |
|
"grad_norm": 11.368129107493246, |
|
"learning_rate": 4.9762859082752464e-08, |
|
"logits/chosen": 17.196496963500977, |
|
"logits/rejected": 18.05078125, |
|
"logps/chosen": -340.8441162109375, |
|
"logps/rejected": -291.5513610839844, |
|
"loss": 0.332, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.99682903289795, |
|
"rewards/margins": 11.247660636901855, |
|
"rewards/rejected": -20.244489669799805, |
|
"sft_loss": 1.040310025215149, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.61829134720701, |
|
"grad_norm": 17.375398637805176, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": 15.612386703491211, |
|
"logits/rejected": 17.049909591674805, |
|
"logps/chosen": -315.4412841796875, |
|
"logps/rejected": -273.022216796875, |
|
"loss": 0.2981, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.78695011138916, |
|
"rewards/margins": 10.45934772491455, |
|
"rewards/rejected": -19.24629783630371, |
|
"sft_loss": 1.122090458869934, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.6265060240963856, |
|
"grad_norm": 18.391059447329464, |
|
"learning_rate": 4.5766051924049975e-08, |
|
"logits/chosen": 19.033084869384766, |
|
"logits/rejected": 19.09506607055664, |
|
"logps/chosen": -344.99224853515625, |
|
"logps/rejected": -281.4374084472656, |
|
"loss": 0.4023, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.408563613891602, |
|
"rewards/margins": 11.344144821166992, |
|
"rewards/rejected": -19.752708435058594, |
|
"sft_loss": 1.2188175916671753, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6347207009857612, |
|
"grad_norm": 4.7857387318547255, |
|
"learning_rate": 4.3824126793972934e-08, |
|
"logits/chosen": 15.44153118133545, |
|
"logits/rejected": 16.74248504638672, |
|
"logps/chosen": -348.91326904296875, |
|
"logps/rejected": -291.33905029296875, |
|
"loss": 0.3604, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.16443157196045, |
|
"rewards/margins": 12.226564407348633, |
|
"rewards/rejected": -20.390995025634766, |
|
"sft_loss": 1.1215661764144897, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.642935377875137, |
|
"grad_norm": 8.425137005894317, |
|
"learning_rate": 4.192033518728819e-08, |
|
"logits/chosen": 16.596193313598633, |
|
"logits/rejected": 16.706600189208984, |
|
"logps/chosen": -337.87109375, |
|
"logps/rejected": -279.28277587890625, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.45007038116455, |
|
"rewards/margins": 11.166516304016113, |
|
"rewards/rejected": -19.616586685180664, |
|
"sft_loss": 1.3097057342529297, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6511500547645126, |
|
"grad_norm": 10.216638281397124, |
|
"learning_rate": 4.0055029222217125e-08, |
|
"logits/chosen": 16.447404861450195, |
|
"logits/rejected": 16.960412979125977, |
|
"logps/chosen": -313.47698974609375, |
|
"logps/rejected": -269.1077880859375, |
|
"loss": 0.3193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.434925079345703, |
|
"rewards/margins": 10.487255096435547, |
|
"rewards/rejected": -19.92218017578125, |
|
"sft_loss": 1.099938988685608, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.6593647316538882, |
|
"grad_norm": 10.435302161258754, |
|
"learning_rate": 3.8228553898819904e-08, |
|
"logits/chosen": 17.95560073852539, |
|
"logits/rejected": 19.009355545043945, |
|
"logps/chosen": -340.97222900390625, |
|
"logps/rejected": -298.7887268066406, |
|
"loss": 0.3949, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.733698844909668, |
|
"rewards/margins": 11.381470680236816, |
|
"rewards/rejected": -21.115171432495117, |
|
"sft_loss": 1.1103211641311646, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.667579408543264, |
|
"grad_norm": 11.907900026431262, |
|
"learning_rate": 3.6441247035185416e-08, |
|
"logits/chosen": 16.81635284423828, |
|
"logits/rejected": 17.959022521972656, |
|
"logps/chosen": -361.63812255859375, |
|
"logps/rejected": -303.6453552246094, |
|
"loss": 0.3353, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.310194969177246, |
|
"rewards/margins": 11.503978729248047, |
|
"rewards/rejected": -20.814172744750977, |
|
"sft_loss": 1.1227651834487915, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.6757940854326396, |
|
"grad_norm": 10.027831680647658, |
|
"learning_rate": 3.4693439204949855e-08, |
|
"logits/chosen": 15.768338203430176, |
|
"logits/rejected": 17.33998680114746, |
|
"logps/chosen": -292.4506530761719, |
|
"logps/rejected": -263.465087890625, |
|
"loss": 0.3701, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.007841110229492, |
|
"rewards/margins": 10.145407676696777, |
|
"rewards/rejected": -19.153249740600586, |
|
"sft_loss": 1.1951355934143066, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6840087623220152, |
|
"grad_norm": 19.083513373797096, |
|
"learning_rate": 3.298545367615493e-08, |
|
"logits/chosen": 17.174707412719727, |
|
"logits/rejected": 17.86057472229004, |
|
"logps/chosen": -288.18280029296875, |
|
"logps/rejected": -254.59439086914062, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -8.990920066833496, |
|
"rewards/margins": 9.506522178649902, |
|
"rewards/rejected": -18.4974422454834, |
|
"sft_loss": 1.2072545289993286, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.692223439211391, |
|
"grad_norm": 12.02229671131509, |
|
"learning_rate": 3.13176063514575e-08, |
|
"logits/chosen": 17.051944732666016, |
|
"logits/rejected": 17.904996871948242, |
|
"logps/chosen": -359.4859619140625, |
|
"logps/rejected": -295.76361083984375, |
|
"loss": 0.3592, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.758131980895996, |
|
"rewards/margins": 11.989044189453125, |
|
"rewards/rejected": -20.747175216674805, |
|
"sft_loss": 1.2417008876800537, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7004381161007667, |
|
"grad_norm": 14.595666831687033, |
|
"learning_rate": 2.96902057097011e-08, |
|
"logits/chosen": 16.427305221557617, |
|
"logits/rejected": 17.641498565673828, |
|
"logps/chosen": -320.2253723144531, |
|
"logps/rejected": -269.6889953613281, |
|
"loss": 0.3571, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.658390998840332, |
|
"rewards/margins": 10.258994102478027, |
|
"rewards/rejected": -18.91738510131836, |
|
"sft_loss": 1.332204818725586, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.7086527929901423, |
|
"grad_norm": 13.068829943035729, |
|
"learning_rate": 2.8103552748861475e-08, |
|
"logits/chosen": 15.954511642456055, |
|
"logits/rejected": 16.74055290222168, |
|
"logps/chosen": -331.81707763671875, |
|
"logps/rejected": -280.3811950683594, |
|
"loss": 0.335, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.056927680969238, |
|
"rewards/margins": 10.4561767578125, |
|
"rewards/rejected": -19.51310157775879, |
|
"sft_loss": 1.1305441856384277, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.716867469879518, |
|
"grad_norm": 14.364271003384296, |
|
"learning_rate": 2.65579409303745e-08, |
|
"logits/chosen": 17.06740951538086, |
|
"logits/rejected": 17.10344886779785, |
|
"logps/chosen": -364.3813171386719, |
|
"logps/rejected": -293.8392333984375, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.905304908752441, |
|
"rewards/margins": 11.902792930603027, |
|
"rewards/rejected": -20.808101654052734, |
|
"sft_loss": 1.149087905883789, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.7250821467688937, |
|
"grad_norm": 21.44861485257077, |
|
"learning_rate": 2.505365612485874e-08, |
|
"logits/chosen": 14.690909385681152, |
|
"logits/rejected": 15.39016056060791, |
|
"logps/chosen": -310.1071472167969, |
|
"logps/rejected": -257.1431884765625, |
|
"loss": 0.3935, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -9.03943157196045, |
|
"rewards/margins": 9.36133098602295, |
|
"rewards/rejected": -18.4007625579834, |
|
"sft_loss": 1.492135763168335, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7332968236582693, |
|
"grad_norm": 10.165639822250112, |
|
"learning_rate": 2.3590976559242275e-08, |
|
"logits/chosen": 16.5327091217041, |
|
"logits/rejected": 17.50569725036621, |
|
"logps/chosen": -327.5498962402344, |
|
"logps/rejected": -288.2828674316406, |
|
"loss": 0.3287, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.587510108947754, |
|
"rewards/margins": 10.448949813842773, |
|
"rewards/rejected": -20.036460876464844, |
|
"sft_loss": 1.2338570356369019, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.7415115005476451, |
|
"grad_norm": 9.87040734328389, |
|
"learning_rate": 2.21701727653025e-08, |
|
"logits/chosen": 15.633200645446777, |
|
"logits/rejected": 16.086591720581055, |
|
"logps/chosen": -352.7239990234375, |
|
"logps/rejected": -294.7661437988281, |
|
"loss": 0.3506, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.915904998779297, |
|
"rewards/margins": 11.233012199401855, |
|
"rewards/rejected": -21.14891815185547, |
|
"sft_loss": 1.2049648761749268, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.749726177437021, |
|
"grad_norm": 12.834737803326664, |
|
"learning_rate": 2.0791507529629522e-08, |
|
"logits/chosen": 16.351898193359375, |
|
"logits/rejected": 17.47950553894043, |
|
"logps/chosen": -281.7489318847656, |
|
"logps/rejected": -243.97483825683594, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.137645721435547, |
|
"rewards/margins": 8.15616226196289, |
|
"rewards/rejected": -17.29380989074707, |
|
"sft_loss": 1.157172679901123, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.7579408543263964, |
|
"grad_norm": 17.205116768747743, |
|
"learning_rate": 1.945523584502262e-08, |
|
"logits/chosen": 17.508634567260742, |
|
"logits/rejected": 17.94008445739746, |
|
"logps/chosen": -381.6427917480469, |
|
"logps/rejected": -311.2584228515625, |
|
"loss": 0.277, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.13695240020752, |
|
"rewards/margins": 12.883686065673828, |
|
"rewards/rejected": -22.02063751220703, |
|
"sft_loss": 1.055487036705017, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7661555312157722, |
|
"grad_norm": 20.851515512896743, |
|
"learning_rate": 1.8161604863327072e-08, |
|
"logits/chosen": 15.488776206970215, |
|
"logits/rejected": 16.223703384399414, |
|
"logps/chosen": -325.0180358886719, |
|
"logps/rejected": -262.5523376464844, |
|
"loss": 0.3441, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.707998275756836, |
|
"rewards/margins": 9.961923599243164, |
|
"rewards/rejected": -18.669921875, |
|
"sft_loss": 1.1589832305908203, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.774370208105148, |
|
"grad_norm": 17.972861201786518, |
|
"learning_rate": 1.691085384972235e-08, |
|
"logits/chosen": 14.909817695617676, |
|
"logits/rejected": 15.637177467346191, |
|
"logps/chosen": -278.62322998046875, |
|
"logps/rejected": -248.10516357421875, |
|
"loss": 0.3273, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.496481895446777, |
|
"rewards/margins": 9.37110424041748, |
|
"rewards/rejected": -17.867582321166992, |
|
"sft_loss": 1.2477223873138428, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7825848849945234, |
|
"grad_norm": 14.287110123465489, |
|
"learning_rate": 1.570321413846845e-08, |
|
"logits/chosen": 15.394953727722168, |
|
"logits/rejected": 17.261220932006836, |
|
"logps/chosen": -303.1915588378906, |
|
"logps/rejected": -277.51458740234375, |
|
"loss": 0.2832, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.813228607177734, |
|
"rewards/margins": 10.778543472290039, |
|
"rewards/rejected": -19.59177017211914, |
|
"sft_loss": 1.2371479272842407, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.7907995618838992, |
|
"grad_norm": 25.07441398024989, |
|
"learning_rate": 1.4538909090118846e-08, |
|
"logits/chosen": 16.854040145874023, |
|
"logits/rejected": 16.584880828857422, |
|
"logps/chosen": -322.2169494628906, |
|
"logps/rejected": -270.48895263671875, |
|
"loss": 0.3503, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.387935638427734, |
|
"rewards/margins": 10.15947437286377, |
|
"rewards/rejected": -19.547407150268555, |
|
"sft_loss": 1.2250884771347046, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.799014238773275, |
|
"grad_norm": 12.397083886048673, |
|
"learning_rate": 1.3418154050208936e-08, |
|
"logits/chosen": 15.345029830932617, |
|
"logits/rejected": 16.834665298461914, |
|
"logps/chosen": -297.9521484375, |
|
"logps/rejected": -269.69659423828125, |
|
"loss": 0.3526, |
|
"rewards/accuracies": 0.9066667556762695, |
|
"rewards/chosen": -8.90621280670166, |
|
"rewards/margins": 10.480603218078613, |
|
"rewards/rejected": -19.386816024780273, |
|
"sft_loss": 1.1300204992294312, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"grad_norm": 11.032455088728524, |
|
"learning_rate": 1.2341156309426447e-08, |
|
"logits/chosen": 14.950087547302246, |
|
"logits/rejected": 16.54684829711914, |
|
"logps/chosen": -332.92596435546875, |
|
"logps/rejected": -291.2406005859375, |
|
"loss": 0.289, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.32970905303955, |
|
"rewards/margins": 11.054911613464355, |
|
"rewards/rejected": -20.384618759155273, |
|
"sft_loss": 1.0616583824157715, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8154435925520263, |
|
"grad_norm": 12.222936203639813, |
|
"learning_rate": 1.130811506527149e-08, |
|
"logits/chosen": 16.257431030273438, |
|
"logits/rejected": 17.80784034729004, |
|
"logps/chosen": -374.90716552734375, |
|
"logps/rejected": -309.1212158203125, |
|
"loss": 0.2761, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.983473777770996, |
|
"rewards/margins": 11.696609497070312, |
|
"rewards/rejected": -21.680082321166992, |
|
"sft_loss": 1.1142687797546387, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.823658269441402, |
|
"grad_norm": 8.603199609340459, |
|
"learning_rate": 1.0319221385213934e-08, |
|
"logits/chosen": 15.376051902770996, |
|
"logits/rejected": 16.714609146118164, |
|
"logps/chosen": -314.27996826171875, |
|
"logps/rejected": -280.79901123046875, |
|
"loss": 0.3201, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.61314868927002, |
|
"rewards/margins": 9.986039161682129, |
|
"rewards/rejected": -19.59918785095215, |
|
"sft_loss": 1.2583483457565308, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8318729463307777, |
|
"grad_norm": 19.393082549696974, |
|
"learning_rate": 9.374658171354411e-09, |
|
"logits/chosen": 16.10991859436035, |
|
"logits/rejected": 17.19182586669922, |
|
"logps/chosen": -335.8138122558594, |
|
"logps/rejected": -285.86859130859375, |
|
"loss": 0.3573, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -9.598699569702148, |
|
"rewards/margins": 11.110600471496582, |
|
"rewards/rejected": -20.709299087524414, |
|
"sft_loss": 1.2626595497131348, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.8400876232201533, |
|
"grad_norm": 11.87556668069316, |
|
"learning_rate": 8.474600126594983e-09, |
|
"logits/chosen": 16.182172775268555, |
|
"logits/rejected": 17.73249053955078, |
|
"logps/chosen": -327.0877685546875, |
|
"logps/rejected": -281.38848876953125, |
|
"loss": 0.3247, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.166089057922363, |
|
"rewards/margins": 11.149109840393066, |
|
"rewards/rejected": -20.315196990966797, |
|
"sft_loss": 1.3075504302978516, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8483023001095291, |
|
"grad_norm": 14.892979384936938, |
|
"learning_rate": 7.619213722327184e-09, |
|
"logits/chosen": 16.07329750061035, |
|
"logits/rejected": 16.353158950805664, |
|
"logps/chosen": -328.3527526855469, |
|
"logps/rejected": -281.48565673828125, |
|
"loss": 0.3187, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.420902252197266, |
|
"rewards/margins": 10.730939865112305, |
|
"rewards/rejected": -20.151844024658203, |
|
"sft_loss": 1.2091686725616455, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.8565169769989047, |
|
"grad_norm": 12.380319924456133, |
|
"learning_rate": 6.808657167641896e-09, |
|
"logits/chosen": 15.801959037780762, |
|
"logits/rejected": 16.7104434967041, |
|
"logps/chosen": -357.0127258300781, |
|
"logps/rejected": -303.44989013671875, |
|
"loss": 0.3863, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.63782024383545, |
|
"rewards/margins": 12.076054573059082, |
|
"rewards/rejected": -21.71387481689453, |
|
"sft_loss": 1.1681187152862549, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8647316538882803, |
|
"grad_norm": 15.785691804360567, |
|
"learning_rate": 6.043080380067539e-09, |
|
"logits/chosen": 15.678844451904297, |
|
"logits/rejected": 16.41909408569336, |
|
"logps/chosen": -383.7453918457031, |
|
"logps/rejected": -308.8125915527344, |
|
"loss": 0.3156, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.041726112365723, |
|
"rewards/margins": 12.560318946838379, |
|
"rewards/rejected": -21.602046966552734, |
|
"sft_loss": 1.186676263809204, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.8729463307776562, |
|
"grad_norm": 19.758267623181617, |
|
"learning_rate": 5.322624957841998e-09, |
|
"logits/chosen": 16.686138153076172, |
|
"logits/rejected": 17.78066062927246, |
|
"logps/chosen": -342.8313293457031, |
|
"logps/rejected": -297.6686096191406, |
|
"loss": 0.38, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.914877891540527, |
|
"rewards/margins": 11.297541618347168, |
|
"rewards/rejected": -21.21242332458496, |
|
"sft_loss": 1.1149108409881592, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8811610076670318, |
|
"grad_norm": 14.356418798551582, |
|
"learning_rate": 4.647424153723101e-09, |
|
"logits/chosen": 16.441852569580078, |
|
"logits/rejected": 16.586217880249023, |
|
"logps/chosen": -318.8826599121094, |
|
"logps/rejected": -271.4314880371094, |
|
"loss": 0.367, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.366458892822266, |
|
"rewards/margins": 10.299457550048828, |
|
"rewards/rejected": -19.665918350219727, |
|
"sft_loss": 1.2187005281448364, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.8893756845564074, |
|
"grad_norm": 18.826459574147577, |
|
"learning_rate": 4.0176028503425826e-09, |
|
"logits/chosen": 15.749044418334961, |
|
"logits/rejected": 16.83735466003418, |
|
"logps/chosen": -308.5406188964844, |
|
"logps/rejected": -271.7100830078125, |
|
"loss": 0.3801, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.177282333374023, |
|
"rewards/margins": 10.317461967468262, |
|
"rewards/rejected": -19.4947452545166, |
|
"sft_loss": 1.252463698387146, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8975903614457832, |
|
"grad_norm": 16.226959543929514, |
|
"learning_rate": 3.433277537108481e-09, |
|
"logits/chosen": 15.832767486572266, |
|
"logits/rejected": 17.746004104614258, |
|
"logps/chosen": -343.33447265625, |
|
"logps/rejected": -305.2869873046875, |
|
"loss": 0.335, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.902791976928711, |
|
"rewards/margins": 11.3204345703125, |
|
"rewards/rejected": -21.223228454589844, |
|
"sft_loss": 1.2560192346572876, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.9058050383351588, |
|
"grad_norm": 14.860604119401957, |
|
"learning_rate": 2.8945562886593944e-09, |
|
"logits/chosen": 14.95615005493164, |
|
"logits/rejected": 16.35462760925293, |
|
"logps/chosen": -287.0328369140625, |
|
"logps/rejected": -257.26080322265625, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.02625560760498, |
|
"rewards/margins": 9.515448570251465, |
|
"rewards/rejected": -18.541706085205078, |
|
"sft_loss": 1.1147348880767822, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9140197152245344, |
|
"grad_norm": 12.84326688048793, |
|
"learning_rate": 2.4015387448756976e-09, |
|
"logits/chosen": 15.258326530456543, |
|
"logits/rejected": 16.413604736328125, |
|
"logps/chosen": -337.6728820800781, |
|
"logps/rejected": -276.5948181152344, |
|
"loss": 0.333, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.658801078796387, |
|
"rewards/margins": 11.077197074890137, |
|
"rewards/rejected": -19.736000061035156, |
|
"sft_loss": 1.313868761062622, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.9222343921139102, |
|
"grad_norm": 9.16948186267524, |
|
"learning_rate": 1.954316092450281e-09, |
|
"logits/chosen": 16.7126522064209, |
|
"logits/rejected": 16.963319778442383, |
|
"logps/chosen": -349.0697326660156, |
|
"logps/rejected": -294.1761169433594, |
|
"loss": 0.299, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.689640998840332, |
|
"rewards/margins": 11.0868558883667, |
|
"rewards/rejected": -20.77649688720703, |
|
"sft_loss": 1.2454497814178467, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9304490690032858, |
|
"grad_norm": 19.811301652971856, |
|
"learning_rate": 1.5529710480231272e-09, |
|
"logits/chosen": 17.24116325378418, |
|
"logits/rejected": 16.968626022338867, |
|
"logps/chosen": -310.8689270019531, |
|
"logps/rejected": -274.0096740722656, |
|
"loss": 0.3, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.42411994934082, |
|
"rewards/margins": 10.301589012145996, |
|
"rewards/rejected": -19.725709915161133, |
|
"sft_loss": 1.0850669145584106, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.9386637458926614, |
|
"grad_norm": 12.455631194759059, |
|
"learning_rate": 1.1975778428823524e-09, |
|
"logits/chosen": 15.130066871643066, |
|
"logits/rejected": 16.740190505981445, |
|
"logps/chosen": -351.4178466796875, |
|
"logps/rejected": -299.97235107421875, |
|
"loss": 0.3093, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.884756088256836, |
|
"rewards/margins": 11.209836959838867, |
|
"rewards/rejected": -21.094594955444336, |
|
"sft_loss": 1.0997297763824463, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9468784227820373, |
|
"grad_norm": 12.90904121827512, |
|
"learning_rate": 8.882022092346064e-10, |
|
"logits/chosen": 16.643354415893555, |
|
"logits/rejected": 16.99618148803711, |
|
"logps/chosen": -355.08087158203125, |
|
"logps/rejected": -291.8462219238281, |
|
"loss": 0.3245, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.204696655273438, |
|
"rewards/margins": 11.567381858825684, |
|
"rewards/rejected": -20.77208137512207, |
|
"sft_loss": 1.2387458086013794, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.9550930996714129, |
|
"grad_norm": 10.02772673186922, |
|
"learning_rate": 6.249013680474368e-10, |
|
"logits/chosen": 16.724010467529297, |
|
"logits/rejected": 16.2373104095459, |
|
"logps/chosen": -319.0643310546875, |
|
"logps/rejected": -268.2914123535156, |
|
"loss": 0.3367, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.252176284790039, |
|
"rewards/margins": 9.910921096801758, |
|
"rewards/rejected": -19.163097381591797, |
|
"sft_loss": 1.1700729131698608, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9633077765607885, |
|
"grad_norm": 16.734218614778506, |
|
"learning_rate": 4.0772401846608794e-10, |
|
"logits/chosen": 17.680179595947266, |
|
"logits/rejected": 17.80653190612793, |
|
"logps/chosen": -305.4862060546875, |
|
"logps/rejected": -267.6892395019531, |
|
"loss": 0.4133, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.576127052307129, |
|
"rewards/margins": 9.75358772277832, |
|
"rewards/rejected": -19.329715728759766, |
|
"sft_loss": 1.1736282110214233, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.9715224534501643, |
|
"grad_norm": 19.267511912968715, |
|
"learning_rate": 2.367103288061223e-10, |
|
"logits/chosen": 16.904399871826172, |
|
"logits/rejected": 16.482337951660156, |
|
"logps/chosen": -316.0256652832031, |
|
"logps/rejected": -265.80157470703125, |
|
"loss": 0.3574, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.74451732635498, |
|
"rewards/margins": 9.55905532836914, |
|
"rewards/rejected": -19.303569793701172, |
|
"sft_loss": 1.2237191200256348, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9797371303395401, |
|
"grad_norm": 11.51131736701793, |
|
"learning_rate": 1.1189192912416933e-10, |
|
"logits/chosen": 15.607586860656738, |
|
"logits/rejected": 16.690214157104492, |
|
"logps/chosen": -370.86328125, |
|
"logps/rejected": -313.1533508300781, |
|
"loss": 0.2989, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.272278785705566, |
|
"rewards/margins": 12.769195556640625, |
|
"rewards/rejected": -22.041475296020508, |
|
"sft_loss": 1.1835730075836182, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.9879518072289155, |
|
"grad_norm": 11.903861482033115, |
|
"learning_rate": 3.329190536757731e-11, |
|
"logits/chosen": 17.456689834594727, |
|
"logits/rejected": 18.812978744506836, |
|
"logps/chosen": -314.75823974609375, |
|
"logps/rejected": -271.9325256347656, |
|
"loss": 0.3344, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.277753829956055, |
|
"rewards/margins": 10.492895126342773, |
|
"rewards/rejected": -19.770648956298828, |
|
"sft_loss": 1.1376186609268188, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9961664841182913, |
|
"grad_norm": 14.744908012876596, |
|
"learning_rate": 9.247951046897906e-13, |
|
"logits/chosen": 16.54582977294922, |
|
"logits/rejected": 18.33929443359375, |
|
"logps/chosen": -319.89813232421875, |
|
"logps/rejected": -279.7975769042969, |
|
"loss": 0.352, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.205850601196289, |
|
"rewards/margins": 10.397418975830078, |
|
"rewards/rejected": -19.603271484375, |
|
"sft_loss": 1.1400221586227417, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.9978094194961664, |
|
"step": 1216, |
|
"total_flos": 200111899688960.0, |
|
"train_loss": 0.4716386401069988, |
|
"train_runtime": 41653.1021, |
|
"train_samples_per_second": 1.753, |
|
"train_steps_per_second": 0.029 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1216, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 200111899688960.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|