|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 980, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0010204081632653062, |
|
"grad_norm": 2.5858418941497803, |
|
"learning_rate": 1.020408163265306e-08, |
|
"logits/chosen": -0.8166377544403076, |
|
"logits/rejected": -0.6783266663551331, |
|
"logps/chosen": -295.1116943359375, |
|
"logps/rejected": -327.4919128417969, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01020408163265306, |
|
"grad_norm": 2.2085180282592773, |
|
"learning_rate": 1.0204081632653061e-07, |
|
"logits/chosen": -0.7580730319023132, |
|
"logits/rejected": -0.7665800452232361, |
|
"logps/chosen": -232.833984375, |
|
"logps/rejected": -262.5196533203125, |
|
"loss": 0.6943, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.008211496286094189, |
|
"rewards/margins": 0.006794700864702463, |
|
"rewards/rejected": 0.0014167949557304382, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02040816326530612, |
|
"grad_norm": 2.0046942234039307, |
|
"learning_rate": 2.0408163265306121e-07, |
|
"logits/chosen": -0.7919167280197144, |
|
"logits/rejected": -0.7875319719314575, |
|
"logps/chosen": -208.1246795654297, |
|
"logps/rejected": -255.45565795898438, |
|
"loss": 0.6917, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.0008393492316827178, |
|
"rewards/margins": 0.014187255874276161, |
|
"rewards/rejected": -0.015026603825390339, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.030612244897959183, |
|
"grad_norm": 1.6893500089645386, |
|
"learning_rate": 3.0612244897959183e-07, |
|
"logits/chosen": -0.653283953666687, |
|
"logits/rejected": -0.7346900105476379, |
|
"logps/chosen": -205.4357147216797, |
|
"logps/rejected": -266.03204345703125, |
|
"loss": 0.6916, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.015907617285847664, |
|
"rewards/margins": -0.014567399397492409, |
|
"rewards/rejected": -0.0013402182376012206, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04081632653061224, |
|
"grad_norm": 1.8859857320785522, |
|
"learning_rate": 4.0816326530612243e-07, |
|
"logits/chosen": -0.7067805528640747, |
|
"logits/rejected": -0.7997003793716431, |
|
"logps/chosen": -158.7813720703125, |
|
"logps/rejected": -174.23060607910156, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.0005168341449461877, |
|
"rewards/margins": 0.005659103859215975, |
|
"rewards/rejected": -0.006175938528031111, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05102040816326531, |
|
"grad_norm": 1.9115869998931885, |
|
"learning_rate": 5.10204081632653e-07, |
|
"logits/chosen": -0.714581310749054, |
|
"logits/rejected": -0.7048059701919556, |
|
"logps/chosen": -247.39810180664062, |
|
"logps/rejected": -303.0687255859375, |
|
"loss": 0.6939, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.016289008781313896, |
|
"rewards/margins": 0.003998421598225832, |
|
"rewards/rejected": 0.0122905895113945, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.061224489795918366, |
|
"grad_norm": 1.7422277927398682, |
|
"learning_rate": 6.122448979591837e-07, |
|
"logits/chosen": -0.6196914315223694, |
|
"logits/rejected": -0.5781084895133972, |
|
"logps/chosen": -144.5482940673828, |
|
"logps/rejected": -166.89369201660156, |
|
"loss": 0.6864, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.03231963887810707, |
|
"rewards/margins": 0.028604138642549515, |
|
"rewards/rejected": 0.003715501632541418, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07142857142857142, |
|
"grad_norm": 1.6072555780410767, |
|
"learning_rate": 7.142857142857143e-07, |
|
"logits/chosen": -0.8848626017570496, |
|
"logits/rejected": -0.8458296656608582, |
|
"logps/chosen": -295.02587890625, |
|
"logps/rejected": -262.872802734375, |
|
"loss": 0.6872, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.042309124022722244, |
|
"rewards/margins": 0.015812452882528305, |
|
"rewards/rejected": 0.02649666927754879, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08163265306122448, |
|
"grad_norm": 2.0791561603546143, |
|
"learning_rate": 8.163265306122449e-07, |
|
"logits/chosen": -0.6937960386276245, |
|
"logits/rejected": -0.7234374284744263, |
|
"logps/chosen": -224.0985565185547, |
|
"logps/rejected": -269.337158203125, |
|
"loss": 0.6732, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.07420104742050171, |
|
"rewards/margins": 0.04235782474279404, |
|
"rewards/rejected": 0.03184322267770767, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09183673469387756, |
|
"grad_norm": 1.962824821472168, |
|
"learning_rate": 9.183673469387755e-07, |
|
"logits/chosen": -0.8258784413337708, |
|
"logits/rejected": -0.8503522872924805, |
|
"logps/chosen": -144.6414337158203, |
|
"logps/rejected": -175.49205017089844, |
|
"loss": 0.6669, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.11745353788137436, |
|
"rewards/margins": 0.07300996780395508, |
|
"rewards/rejected": 0.04444356635212898, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"grad_norm": 2.2074368000030518, |
|
"learning_rate": 9.999873129474573e-07, |
|
"logits/chosen": -0.6715458035469055, |
|
"logits/rejected": -0.738847553730011, |
|
"logps/chosen": -164.20828247070312, |
|
"logps/rejected": -208.1692352294922, |
|
"loss": 0.6534, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.24040062725543976, |
|
"rewards/margins": 0.17234429717063904, |
|
"rewards/rejected": 0.06805632263422012, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.10204081632653061, |
|
"eval_logits/chosen": -0.6843910217285156, |
|
"eval_logits/rejected": -0.6762082576751709, |
|
"eval_logps/chosen": -236.70948791503906, |
|
"eval_logps/rejected": -269.5726623535156, |
|
"eval_loss": 0.6139070391654968, |
|
"eval_rewards/accuracies": 0.7572254538536072, |
|
"eval_rewards/chosen": 0.2871367931365967, |
|
"eval_rewards/margins": 0.19106332957744598, |
|
"eval_rewards/rejected": 0.09607347846031189, |
|
"eval_runtime": 255.7234, |
|
"eval_samples_per_second": 10.805, |
|
"eval_steps_per_second": 1.353, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11224489795918367, |
|
"grad_norm": 2.093169689178467, |
|
"learning_rate": 9.99543333708549e-07, |
|
"logits/chosen": -0.6496793031692505, |
|
"logits/rejected": -0.5985936522483826, |
|
"logps/chosen": -185.32540893554688, |
|
"logps/rejected": -167.50845336914062, |
|
"loss": 0.636, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.29740238189697266, |
|
"rewards/margins": 0.16500218212604523, |
|
"rewards/rejected": 0.13240019977092743, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.12244897959183673, |
|
"grad_norm": 1.4166107177734375, |
|
"learning_rate": 9.98465645540859e-07, |
|
"logits/chosen": -0.7910449504852295, |
|
"logits/rejected": -0.8481178283691406, |
|
"logps/chosen": -147.35057067871094, |
|
"logps/rejected": -244.04727172851562, |
|
"loss": 0.6246, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2833811640739441, |
|
"rewards/margins": 0.13916133344173431, |
|
"rewards/rejected": 0.14421981573104858, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.1326530612244898, |
|
"grad_norm": 1.4679032564163208, |
|
"learning_rate": 9.9675561557426e-07, |
|
"logits/chosen": -0.6362483501434326, |
|
"logits/rejected": -0.6534683704376221, |
|
"logps/chosen": -150.24880981445312, |
|
"logps/rejected": -175.07742309570312, |
|
"loss": 0.5876, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.458209753036499, |
|
"rewards/margins": 0.2481471598148346, |
|
"rewards/rejected": 0.21006262302398682, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.14285714285714285, |
|
"grad_norm": 1.6511420011520386, |
|
"learning_rate": 9.944154131125642e-07, |
|
"logits/chosen": -0.6063439846038818, |
|
"logits/rejected": -0.6045389771461487, |
|
"logps/chosen": -191.49533081054688, |
|
"logps/rejected": -255.36972045898438, |
|
"loss": 0.5851, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.5490959882736206, |
|
"rewards/margins": 0.31212958693504333, |
|
"rewards/rejected": 0.2369663417339325, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.15306122448979592, |
|
"grad_norm": 1.6853809356689453, |
|
"learning_rate": 9.914480068815961e-07, |
|
"logits/chosen": -0.7999967336654663, |
|
"logits/rejected": -0.8724571466445923, |
|
"logps/chosen": -184.25137329101562, |
|
"logps/rejected": -204.84927368164062, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.7743161916732788, |
|
"rewards/margins": 0.4069131910800934, |
|
"rewards/rejected": 0.3674030303955078, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.16326530612244897, |
|
"grad_norm": 1.6386051177978516, |
|
"learning_rate": 9.878571612631363e-07, |
|
"logits/chosen": -0.7798652052879333, |
|
"logits/rejected": -0.7520347833633423, |
|
"logps/chosen": -186.4380645751953, |
|
"logps/rejected": -206.5469207763672, |
|
"loss": 0.5345, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.9827211499214172, |
|
"rewards/margins": 0.4804193377494812, |
|
"rewards/rejected": 0.502301812171936, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.17346938775510204, |
|
"grad_norm": 1.3212541341781616, |
|
"learning_rate": 9.836474315195147e-07, |
|
"logits/chosen": -0.7808311581611633, |
|
"logits/rejected": -0.8207923769950867, |
|
"logps/chosen": -186.6453094482422, |
|
"logps/rejected": -277.3121032714844, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.9796573519706726, |
|
"rewards/margins": 0.272901713848114, |
|
"rewards/rejected": 0.7067556381225586, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.1836734693877551, |
|
"grad_norm": 1.7019201517105103, |
|
"learning_rate": 9.788241580149122e-07, |
|
"logits/chosen": -0.7383798360824585, |
|
"logits/rejected": -0.7045127153396606, |
|
"logps/chosen": -183.7281036376953, |
|
"logps/rejected": -172.53787231445312, |
|
"loss": 0.5175, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.2867637872695923, |
|
"rewards/margins": 0.5766666531562805, |
|
"rewards/rejected": 0.7100971341133118, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.19387755102040816, |
|
"grad_norm": 1.4821258783340454, |
|
"learning_rate": 9.73393459440701e-07, |
|
"logits/chosen": -0.7343258261680603, |
|
"logits/rejected": -0.7688428163528442, |
|
"logps/chosen": -228.34228515625, |
|
"logps/rejected": -278.99908447265625, |
|
"loss": 0.4814, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.2823985815048218, |
|
"rewards/margins": 0.6514226794242859, |
|
"rewards/rejected": 0.6309759616851807, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 1.6141135692596436, |
|
"learning_rate": 9.673622250534155e-07, |
|
"logits/chosen": -0.6541659235954285, |
|
"logits/rejected": -0.6301986575126648, |
|
"logps/chosen": -157.97817993164062, |
|
"logps/rejected": -167.7241973876953, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.514700174331665, |
|
"rewards/margins": 0.7473451495170593, |
|
"rewards/rejected": 0.7673550844192505, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"eval_logits/chosen": -0.650209903717041, |
|
"eval_logits/rejected": -0.6512798070907593, |
|
"eval_logps/chosen": -213.61082458496094, |
|
"eval_logps/rejected": -260.0234375, |
|
"eval_loss": 0.452963650226593, |
|
"eval_rewards/accuracies": 0.8063583970069885, |
|
"eval_rewards/chosen": 1.4420698881149292, |
|
"eval_rewards/margins": 0.8685339689254761, |
|
"eval_rewards/rejected": 0.5735359191894531, |
|
"eval_runtime": 252.7043, |
|
"eval_samples_per_second": 10.934, |
|
"eval_steps_per_second": 1.369, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.21428571428571427, |
|
"grad_norm": 1.1991658210754395, |
|
"learning_rate": 9.607381059352038e-07, |
|
"logits/chosen": -0.6899908781051636, |
|
"logits/rejected": -0.6769914627075195, |
|
"logps/chosen": -170.54293823242188, |
|
"logps/rejected": -229.1009521484375, |
|
"loss": 0.4374, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 1.6567569971084595, |
|
"rewards/margins": 0.7052143812179565, |
|
"rewards/rejected": 0.9515425562858582, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.22448979591836735, |
|
"grad_norm": 1.43000328540802, |
|
"learning_rate": 9.535295052878449e-07, |
|
"logits/chosen": -0.6404227614402771, |
|
"logits/rejected": -0.6296104192733765, |
|
"logps/chosen": -118.60369873046875, |
|
"logps/rejected": -171.08534240722656, |
|
"loss": 0.4418, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.7140804529190063, |
|
"rewards/margins": 0.9742295145988464, |
|
"rewards/rejected": 0.7398509979248047, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.23469387755102042, |
|
"grad_norm": 1.6586986780166626, |
|
"learning_rate": 9.457455677726447e-07, |
|
"logits/chosen": -0.7370392084121704, |
|
"logits/rejected": -0.7159712910652161, |
|
"logps/chosen": -156.8678436279297, |
|
"logps/rejected": -178.65988159179688, |
|
"loss": 0.4, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.953412413597107, |
|
"rewards/margins": 1.0005583763122559, |
|
"rewards/rejected": 0.9528541564941406, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.24489795918367346, |
|
"grad_norm": 1.3758224248886108, |
|
"learning_rate": 9.37396167909733e-07, |
|
"logits/chosen": -0.70029217004776, |
|
"logits/rejected": -0.6873424053192139, |
|
"logps/chosen": -127.82401275634766, |
|
"logps/rejected": -177.5338592529297, |
|
"loss": 0.4333, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.9316447973251343, |
|
"rewards/margins": 0.856708824634552, |
|
"rewards/rejected": 1.0749361515045166, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.25510204081632654, |
|
"grad_norm": 1.4639925956726074, |
|
"learning_rate": 9.284918975514797e-07, |
|
"logits/chosen": -0.6979633569717407, |
|
"logits/rejected": -0.7350119352340698, |
|
"logps/chosen": -141.00392150878906, |
|
"logps/rejected": -214.472412109375, |
|
"loss": 0.4482, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.902488350868225, |
|
"rewards/margins": 0.7867880463600159, |
|
"rewards/rejected": 1.1157002449035645, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.2653061224489796, |
|
"grad_norm": 1.1242108345031738, |
|
"learning_rate": 9.190440524459202e-07, |
|
"logits/chosen": -0.5260006785392761, |
|
"logits/rejected": -0.6740385293960571, |
|
"logps/chosen": -176.22897338867188, |
|
"logps/rejected": -269.9410400390625, |
|
"loss": 0.4658, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.040607213973999, |
|
"rewards/margins": 1.309356689453125, |
|
"rewards/rejected": 0.7312506437301636, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.2755102040816326, |
|
"grad_norm": 1.5435305833816528, |
|
"learning_rate": 9.09064617907235e-07, |
|
"logits/chosen": -0.7274152040481567, |
|
"logits/rejected": -0.7272646427154541, |
|
"logps/chosen": -203.15975952148438, |
|
"logps/rejected": -226.68399047851562, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.141923666000366, |
|
"rewards/margins": 1.0537580251693726, |
|
"rewards/rejected": 1.0881658792495728, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.2857142857142857, |
|
"grad_norm": 1.1201239824295044, |
|
"learning_rate": 8.985662536114612e-07, |
|
"logits/chosen": -0.6508474349975586, |
|
"logits/rejected": -0.659797191619873, |
|
"logps/chosen": -146.76266479492188, |
|
"logps/rejected": -182.32579040527344, |
|
"loss": 0.4144, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 2.2803127765655518, |
|
"rewards/margins": 1.108865737915039, |
|
"rewards/rejected": 1.1714469194412231, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.29591836734693877, |
|
"grad_norm": 0.9688098430633545, |
|
"learning_rate": 8.875622775367259e-07, |
|
"logits/chosen": -0.6407650709152222, |
|
"logits/rejected": -0.6345282793045044, |
|
"logps/chosen": -178.30838012695312, |
|
"logps/rejected": -213.2600860595703, |
|
"loss": 0.4007, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.2921342849731445, |
|
"rewards/margins": 0.9978988766670227, |
|
"rewards/rejected": 1.2942354679107666, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"grad_norm": 1.3581178188323975, |
|
"learning_rate": 8.760666490683719e-07, |
|
"logits/chosen": -0.643558144569397, |
|
"logits/rejected": -0.6327681541442871, |
|
"logps/chosen": -128.29537963867188, |
|
"logps/rejected": -162.97642517089844, |
|
"loss": 0.391, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.3287272453308105, |
|
"rewards/margins": 1.0853456258773804, |
|
"rewards/rejected": 1.2433817386627197, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.30612244897959184, |
|
"eval_logits/chosen": -0.6297730803489685, |
|
"eval_logits/rejected": -0.6321488618850708, |
|
"eval_logps/chosen": -204.23443603515625, |
|
"eval_logps/rejected": -257.63165283203125, |
|
"eval_loss": 0.39348161220550537, |
|
"eval_rewards/accuracies": 0.8381502628326416, |
|
"eval_rewards/chosen": 1.910889744758606, |
|
"eval_rewards/margins": 1.217763066291809, |
|
"eval_rewards/rejected": 0.6931266188621521, |
|
"eval_runtime": 252.9021, |
|
"eval_samples_per_second": 10.925, |
|
"eval_steps_per_second": 1.368, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.3163265306122449, |
|
"grad_norm": 1.3367196321487427, |
|
"learning_rate": 8.640939512904095e-07, |
|
"logits/chosen": -0.6319596767425537, |
|
"logits/rejected": -0.6244379281997681, |
|
"logps/chosen": -146.67283630371094, |
|
"logps/rejected": -179.8704833984375, |
|
"loss": 0.386, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.33925199508667, |
|
"rewards/margins": 1.3749719858169556, |
|
"rewards/rejected": 0.9642800092697144, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.32653061224489793, |
|
"grad_norm": 0.9586181044578552, |
|
"learning_rate": 8.516593724857597e-07, |
|
"logits/chosen": -0.6093655824661255, |
|
"logits/rejected": -0.580748438835144, |
|
"logps/chosen": -180.91659545898438, |
|
"logps/rejected": -233.35824584960938, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.2102105617523193, |
|
"rewards/margins": 1.3727915287017822, |
|
"rewards/rejected": 0.8374192118644714, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.336734693877551, |
|
"grad_norm": 0.8618925213813782, |
|
"learning_rate": 8.387786868687548e-07, |
|
"logits/chosen": -0.5689299702644348, |
|
"logits/rejected": -0.5300137400627136, |
|
"logps/chosen": -109.52386474609375, |
|
"logps/rejected": -144.4683837890625, |
|
"loss": 0.3223, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.3346762657165527, |
|
"rewards/margins": 1.4390984773635864, |
|
"rewards/rejected": 0.8955775499343872, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.3469387755102041, |
|
"grad_norm": 1.1246938705444336, |
|
"learning_rate": 8.254682345743405e-07, |
|
"logits/chosen": -0.769761323928833, |
|
"logits/rejected": -0.7216005921363831, |
|
"logps/chosen": -199.35218811035156, |
|
"logps/rejected": -197.91156005859375, |
|
"loss": 0.4085, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 2.1999125480651855, |
|
"rewards/margins": 1.3574109077453613, |
|
"rewards/rejected": 0.8425019383430481, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.35714285714285715, |
|
"grad_norm": 1.2478692531585693, |
|
"learning_rate": 8.117449009293668e-07, |
|
"logits/chosen": -0.7673205733299255, |
|
"logits/rejected": -0.7887976765632629, |
|
"logps/chosen": -165.0550537109375, |
|
"logps/rejected": -216.4749298095703, |
|
"loss": 0.3823, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.4004361629486084, |
|
"rewards/margins": 1.426731824874878, |
|
"rewards/rejected": 0.9737041592597961, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.3673469387755102, |
|
"grad_norm": 1.0376545190811157, |
|
"learning_rate": 7.976260950322571e-07, |
|
"logits/chosen": -0.6736082434654236, |
|
"logits/rejected": -0.6928958892822266, |
|
"logps/chosen": -181.90908813476562, |
|
"logps/rejected": -217.79177856445312, |
|
"loss": 0.3887, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 1.8513981103897095, |
|
"rewards/margins": 1.058131217956543, |
|
"rewards/rejected": 0.7932666540145874, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.37755102040816324, |
|
"grad_norm": 1.8091073036193848, |
|
"learning_rate": 7.831297276682368e-07, |
|
"logits/chosen": -0.6461857557296753, |
|
"logits/rejected": -0.7057845592498779, |
|
"logps/chosen": -89.2549057006836, |
|
"logps/rejected": -165.2856903076172, |
|
"loss": 0.3589, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.603538990020752, |
|
"rewards/margins": 1.7810055017471313, |
|
"rewards/rejected": 0.8225336074829102, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.3877551020408163, |
|
"grad_norm": 1.5131062269210815, |
|
"learning_rate": 7.682741885881314e-07, |
|
"logits/chosen": -0.6561241149902344, |
|
"logits/rejected": -0.6394567489624023, |
|
"logps/chosen": -191.556640625, |
|
"logps/rejected": -244.42593383789062, |
|
"loss": 0.3447, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.0958144664764404, |
|
"rewards/margins": 1.2701359987258911, |
|
"rewards/rejected": 0.8256783485412598, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.3979591836734694, |
|
"grad_norm": 1.401531457901001, |
|
"learning_rate": 7.530783231795614e-07, |
|
"logits/chosen": -0.5236614942550659, |
|
"logits/rejected": -0.6208306550979614, |
|
"logps/chosen": -173.1316680908203, |
|
"logps/rejected": -263.10003662109375, |
|
"loss": 0.3731, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.393113613128662, |
|
"rewards/margins": 1.6618480682373047, |
|
"rewards/rejected": 0.7312653660774231, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 1.200061559677124, |
|
"learning_rate": 7.375614085601264e-07, |
|
"logits/chosen": -0.6140845417976379, |
|
"logits/rejected": -0.575400173664093, |
|
"logps/chosen": -180.64183044433594, |
|
"logps/rejected": -241.3333282470703, |
|
"loss": 0.3497, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.477724075317383, |
|
"rewards/margins": 1.4085882902145386, |
|
"rewards/rejected": 1.0691356658935547, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"eval_logits/chosen": -0.6313372850418091, |
|
"eval_logits/rejected": -0.6323339343070984, |
|
"eval_logps/chosen": -203.02207946777344, |
|
"eval_logps/rejected": -260.01409912109375, |
|
"eval_loss": 0.36327043175697327, |
|
"eval_rewards/accuracies": 0.8468208312988281, |
|
"eval_rewards/chosen": 1.9715064764022827, |
|
"eval_rewards/margins": 1.3975027799606323, |
|
"eval_rewards/rejected": 0.5740035772323608, |
|
"eval_runtime": 252.8433, |
|
"eval_samples_per_second": 10.928, |
|
"eval_steps_per_second": 1.368, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.41836734693877553, |
|
"grad_norm": 1.6836450099945068, |
|
"learning_rate": 7.217431291229067e-07, |
|
"logits/chosen": -0.7939841747283936, |
|
"logits/rejected": -0.613411545753479, |
|
"logps/chosen": -217.92153930664062, |
|
"logps/rejected": -224.36270141601562, |
|
"loss": 0.3867, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.1043503284454346, |
|
"rewards/margins": 1.5646950006484985, |
|
"rewards/rejected": 0.5396553874015808, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.42857142857142855, |
|
"grad_norm": 1.3317594528198242, |
|
"learning_rate": 7.056435515653058e-07, |
|
"logits/chosen": -0.6276999115943909, |
|
"logits/rejected": -0.5372880697250366, |
|
"logps/chosen": -198.774658203125, |
|
"logps/rejected": -198.1266632080078, |
|
"loss": 0.3866, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.228754997253418, |
|
"rewards/margins": 1.542759895324707, |
|
"rewards/rejected": 0.6859949231147766, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.4387755102040816, |
|
"grad_norm": 1.6838312149047852, |
|
"learning_rate": 6.892830994329088e-07, |
|
"logits/chosen": -0.5538614392280579, |
|
"logits/rejected": -0.5668340921401978, |
|
"logps/chosen": -115.6775131225586, |
|
"logps/rejected": -192.1859588623047, |
|
"loss": 0.3508, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.350921630859375, |
|
"rewards/margins": 1.5608246326446533, |
|
"rewards/rejected": 0.7900969982147217, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.4489795918367347, |
|
"grad_norm": 1.517739176750183, |
|
"learning_rate": 6.726825272106538e-07, |
|
"logits/chosen": -0.7243350744247437, |
|
"logits/rejected": -0.6703056693077087, |
|
"logps/chosen": -159.9098663330078, |
|
"logps/rejected": -242.1475067138672, |
|
"loss": 0.3489, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.179281711578369, |
|
"rewards/margins": 1.4223108291625977, |
|
"rewards/rejected": 0.7569707632064819, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.45918367346938777, |
|
"grad_norm": 1.332316517829895, |
|
"learning_rate": 6.558628939941791e-07, |
|
"logits/chosen": -0.6250364184379578, |
|
"logits/rejected": -0.6584871411323547, |
|
"logps/chosen": -141.18350219726562, |
|
"logps/rejected": -210.0460205078125, |
|
"loss": 0.3277, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.4289634227752686, |
|
"rewards/margins": 1.7670814990997314, |
|
"rewards/rejected": 0.6618821024894714, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.46938775510204084, |
|
"grad_norm": 1.1579140424728394, |
|
"learning_rate": 6.388455367747502e-07, |
|
"logits/chosen": -0.6399953365325928, |
|
"logits/rejected": -0.6188939809799194, |
|
"logps/chosen": -122.9395751953125, |
|
"logps/rejected": -187.66537475585938, |
|
"loss": 0.3203, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.4994819164276123, |
|
"rewards/margins": 1.6673539876937866, |
|
"rewards/rejected": 0.8321278691291809, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.47959183673469385, |
|
"grad_norm": 1.3472042083740234, |
|
"learning_rate": 6.216520433716544e-07, |
|
"logits/chosen": -0.5729564428329468, |
|
"logits/rejected": -0.5995985865592957, |
|
"logps/chosen": -162.9604034423828, |
|
"logps/rejected": -261.8568420410156, |
|
"loss": 0.3378, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.0735116004943848, |
|
"rewards/margins": 1.7422335147857666, |
|
"rewards/rejected": 0.3312779664993286, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.4897959183673469, |
|
"grad_norm": 1.2735215425491333, |
|
"learning_rate": 6.043042250464004e-07, |
|
"logits/chosen": -0.6866289377212524, |
|
"logits/rejected": -0.6188154220581055, |
|
"logps/chosen": -123.27571868896484, |
|
"logps/rejected": -148.08094787597656, |
|
"loss": 0.2965, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.6011900901794434, |
|
"rewards/margins": 1.8508793115615845, |
|
"rewards/rejected": 0.7503107190132141, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 1.0802596807479858, |
|
"learning_rate": 5.868240888334652e-07, |
|
"logits/chosen": -0.803338885307312, |
|
"logits/rejected": -0.7212686538696289, |
|
"logps/chosen": -173.48219299316406, |
|
"logps/rejected": -273.45672607421875, |
|
"loss": 0.3035, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.5986487865448, |
|
"rewards/margins": 2.1381328105926514, |
|
"rewards/rejected": 0.4605160653591156, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"grad_norm": 0.8763641715049744, |
|
"learning_rate": 5.69233809622687e-07, |
|
"logits/chosen": -0.49485841393470764, |
|
"logits/rejected": -0.5676048398017883, |
|
"logps/chosen": -129.4870147705078, |
|
"logps/rejected": -191.2211151123047, |
|
"loss": 0.3378, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.351245403289795, |
|
"rewards/margins": 1.694441556930542, |
|
"rewards/rejected": 0.6568037271499634, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5102040816326531, |
|
"eval_logits/chosen": -0.6102895736694336, |
|
"eval_logits/rejected": -0.6196657419204712, |
|
"eval_logps/chosen": -201.76104736328125, |
|
"eval_logps/rejected": -262.2907409667969, |
|
"eval_loss": 0.34211036562919617, |
|
"eval_rewards/accuracies": 0.8699421882629395, |
|
"eval_rewards/chosen": 2.034559726715088, |
|
"eval_rewards/margins": 1.5743900537490845, |
|
"eval_rewards/rejected": 0.46016958355903625, |
|
"eval_runtime": 252.9052, |
|
"eval_samples_per_second": 10.925, |
|
"eval_steps_per_second": 1.368, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.5204081632653061, |
|
"grad_norm": 1.9054065942764282, |
|
"learning_rate": 5.515557020287218e-07, |
|
"logits/chosen": -0.6607390642166138, |
|
"logits/rejected": -0.6723104119300842, |
|
"logps/chosen": -144.58413696289062, |
|
"logps/rejected": -223.1163330078125, |
|
"loss": 0.3585, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.4164395332336426, |
|
"rewards/margins": 1.9343305826187134, |
|
"rewards/rejected": 0.4821089804172516, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.5306122448979592, |
|
"grad_norm": 1.4127832651138306, |
|
"learning_rate": 5.338121920832475e-07, |
|
"logits/chosen": -0.5705611109733582, |
|
"logits/rejected": -0.6025998592376709, |
|
"logps/chosen": -219.2863311767578, |
|
"logps/rejected": -222.2490692138672, |
|
"loss": 0.3387, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.2865653038024902, |
|
"rewards/margins": 1.6282856464385986, |
|
"rewards/rejected": 0.6582795977592468, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.5408163265306123, |
|
"grad_norm": 1.9716460704803467, |
|
"learning_rate": 5.160257887858277e-07, |
|
"logits/chosen": -0.7576996088027954, |
|
"logits/rejected": -0.7345749139785767, |
|
"logps/chosen": -144.2480926513672, |
|
"logps/rejected": -193.87539672851562, |
|
"loss": 0.3335, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.3815081119537354, |
|
"rewards/margins": 1.888880968093872, |
|
"rewards/rejected": 0.49262747168540955, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.5510204081632653, |
|
"grad_norm": 1.2313483953475952, |
|
"learning_rate": 4.982190555495235e-07, |
|
"logits/chosen": -0.6418560147285461, |
|
"logits/rejected": -0.6474324464797974, |
|
"logps/chosen": -136.88446044921875, |
|
"logps/rejected": -218.2066192626953, |
|
"loss": 0.3287, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.419318675994873, |
|
"rewards/margins": 1.6617262363433838, |
|
"rewards/rejected": 0.7575927376747131, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.5612244897959183, |
|
"grad_norm": 1.5645098686218262, |
|
"learning_rate": 4.804145815774786e-07, |
|
"logits/chosen": -0.6569366455078125, |
|
"logits/rejected": -0.669120192527771, |
|
"logps/chosen": -142.89028930664062, |
|
"logps/rejected": -210.3660888671875, |
|
"loss": 0.3265, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.5318100452423096, |
|
"rewards/margins": 1.5718120336532593, |
|
"rewards/rejected": 0.9599977731704712, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.5714285714285714, |
|
"grad_norm": 1.0555964708328247, |
|
"learning_rate": 4.626349532067879e-07, |
|
"logits/chosen": -0.7187200784683228, |
|
"logits/rejected": -0.7504357099533081, |
|
"logps/chosen": -145.81968688964844, |
|
"logps/rejected": -256.0328063964844, |
|
"loss": 0.321, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.87051522731781, |
|
"rewards/margins": 1.6857688426971436, |
|
"rewards/rejected": 0.18474629521369934, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.5816326530612245, |
|
"grad_norm": 3.175672769546509, |
|
"learning_rate": 4.4490272525599936e-07, |
|
"logits/chosen": -0.7243942618370056, |
|
"logits/rejected": -0.6909176707267761, |
|
"logps/chosen": -112.03414154052734, |
|
"logps/rejected": -205.05517578125, |
|
"loss": 0.3165, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.3773770332336426, |
|
"rewards/margins": 2.077335834503174, |
|
"rewards/rejected": 0.3000412583351135, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.5918367346938775, |
|
"grad_norm": 0.9734162092208862, |
|
"learning_rate": 4.272403924126035e-07, |
|
"logits/chosen": -0.6561388373374939, |
|
"logits/rejected": -0.5848960280418396, |
|
"logps/chosen": -148.91261291503906, |
|
"logps/rejected": -197.70758056640625, |
|
"loss": 0.2936, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.421445369720459, |
|
"rewards/margins": 2.3003313541412354, |
|
"rewards/rejected": 0.12111417204141617, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.6020408163265306, |
|
"grad_norm": 1.2602763175964355, |
|
"learning_rate": 4.096703606968006e-07, |
|
"logits/chosen": -0.5611236095428467, |
|
"logits/rejected": -0.5989875793457031, |
|
"logps/chosen": -181.88597106933594, |
|
"logps/rejected": -338.6321105957031, |
|
"loss": 0.3525, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.1114940643310547, |
|
"rewards/margins": 1.8167270421981812, |
|
"rewards/rejected": 0.294766902923584, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 1.2026368379592896, |
|
"learning_rate": 3.9221491903775013e-07, |
|
"logits/chosen": -0.6353614926338196, |
|
"logits/rejected": -0.6344829797744751, |
|
"logps/chosen": -225.2716064453125, |
|
"logps/rejected": -285.7774963378906, |
|
"loss": 0.2904, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.963526725769043, |
|
"rewards/margins": 1.5393855571746826, |
|
"rewards/rejected": 0.4241412281990051, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"eval_logits/chosen": -0.6159467697143555, |
|
"eval_logits/rejected": -0.6221225261688232, |
|
"eval_logps/chosen": -203.55426025390625, |
|
"eval_logps/rejected": -265.3277587890625, |
|
"eval_loss": 0.32874658703804016, |
|
"eval_rewards/accuracies": 0.8757225275039673, |
|
"eval_rewards/chosen": 1.9448989629745483, |
|
"eval_rewards/margins": 1.636578917503357, |
|
"eval_rewards/rejected": 0.3083205819129944, |
|
"eval_runtime": 252.8477, |
|
"eval_samples_per_second": 10.928, |
|
"eval_steps_per_second": 1.368, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.6224489795918368, |
|
"grad_norm": 0.9654647707939148, |
|
"learning_rate": 3.7489621099836043e-07, |
|
"logits/chosen": -0.6111562848091125, |
|
"logits/rejected": -0.5714690089225769, |
|
"logps/chosen": -193.060302734375, |
|
"logps/rejected": -250.1613311767578, |
|
"loss": 0.3157, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.0583136081695557, |
|
"rewards/margins": 1.5960966348648071, |
|
"rewards/rejected": 0.4622170329093933, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.6326530612244898, |
|
"grad_norm": 0.9556177854537964, |
|
"learning_rate": 3.577362066844838e-07, |
|
"logits/chosen": -0.6297867894172668, |
|
"logits/rejected": -0.6922434568405151, |
|
"logps/chosen": -128.23373413085938, |
|
"logps/rejected": -200.610595703125, |
|
"loss": 0.3082, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.2646005153656006, |
|
"rewards/margins": 1.6751960515975952, |
|
"rewards/rejected": 0.5894044041633606, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.6428571428571429, |
|
"grad_norm": 1.8962053060531616, |
|
"learning_rate": 3.4075667487415785e-07, |
|
"logits/chosen": -0.5675973892211914, |
|
"logits/rejected": -0.6235415935516357, |
|
"logps/chosen": -201.10992431640625, |
|
"logps/rejected": -309.8710632324219, |
|
"loss": 0.3583, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.077495813369751, |
|
"rewards/margins": 2.106235980987549, |
|
"rewards/rejected": -0.028740186244249344, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.6530612244897959, |
|
"grad_norm": 2.3064181804656982, |
|
"learning_rate": 3.239791554022449e-07, |
|
"logits/chosen": -0.647456705570221, |
|
"logits/rejected": -0.595936119556427, |
|
"logps/chosen": -185.43714904785156, |
|
"logps/rejected": -198.25482177734375, |
|
"loss": 0.3458, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.3314576148986816, |
|
"rewards/margins": 1.9370521306991577, |
|
"rewards/rejected": 0.3944053649902344, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.6632653061224489, |
|
"grad_norm": 1.1449445486068726, |
|
"learning_rate": 3.0742493183550454e-07, |
|
"logits/chosen": -0.6164785623550415, |
|
"logits/rejected": -0.5928055047988892, |
|
"logps/chosen": -167.90647888183594, |
|
"logps/rejected": -219.0677490234375, |
|
"loss": 0.2919, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.3085310459136963, |
|
"rewards/margins": 1.8304294347763062, |
|
"rewards/rejected": 0.47810110449790955, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.673469387755102, |
|
"grad_norm": 1.494554877281189, |
|
"learning_rate": 2.911150044727605e-07, |
|
"logits/chosen": -0.6391149163246155, |
|
"logits/rejected": -0.6734142303466797, |
|
"logps/chosen": -145.32362365722656, |
|
"logps/rejected": -197.6822967529297, |
|
"loss": 0.326, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 2.4695799350738525, |
|
"rewards/margins": 1.6128828525543213, |
|
"rewards/rejected": 0.8566972017288208, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.6836734693877551, |
|
"grad_norm": 0.9126285314559937, |
|
"learning_rate": 2.750700637044155e-07, |
|
"logits/chosen": -0.6191089749336243, |
|
"logits/rejected": -0.7010880708694458, |
|
"logps/chosen": -159.2322998046875, |
|
"logps/rejected": -246.2435760498047, |
|
"loss": 0.2968, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.389880895614624, |
|
"rewards/margins": 2.214604377746582, |
|
"rewards/rejected": 0.175276517868042, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.6938775510204082, |
|
"grad_norm": 1.308219075202942, |
|
"learning_rate": 2.593104637651087e-07, |
|
"logits/chosen": -0.5017037987709045, |
|
"logits/rejected": -0.5034186244010925, |
|
"logps/chosen": -121.1073226928711, |
|
"logps/rejected": -187.53866577148438, |
|
"loss": 0.3082, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.530785083770752, |
|
"rewards/margins": 2.0506749153137207, |
|
"rewards/rejected": 0.4801098704338074, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.7040816326530612, |
|
"grad_norm": 1.0809426307678223, |
|
"learning_rate": 2.438561969128114e-07, |
|
"logits/chosen": -0.590795636177063, |
|
"logits/rejected": -0.6325095891952515, |
|
"logps/chosen": -134.36793518066406, |
|
"logps/rejected": -201.61770629882812, |
|
"loss": 0.3408, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.304894208908081, |
|
"rewards/margins": 1.7218987941741943, |
|
"rewards/rejected": 0.5829951763153076, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"grad_norm": 1.6444560289382935, |
|
"learning_rate": 2.2872686806712032e-07, |
|
"logits/chosen": -0.6764811277389526, |
|
"logits/rejected": -0.6604726910591125, |
|
"logps/chosen": -178.37112426757812, |
|
"logps/rejected": -279.1824645996094, |
|
"loss": 0.3053, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.4546444416046143, |
|
"rewards/margins": 1.9984443187713623, |
|
"rewards/rejected": 0.4562003016471863, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7142857142857143, |
|
"eval_logits/chosen": -0.6110620498657227, |
|
"eval_logits/rejected": -0.616197943687439, |
|
"eval_logps/chosen": -202.58566284179688, |
|
"eval_logps/rejected": -266.28179931640625, |
|
"eval_loss": 0.3206620216369629, |
|
"eval_rewards/accuracies": 0.8901734352111816, |
|
"eval_rewards/chosen": 1.9933290481567383, |
|
"eval_rewards/margins": 1.7327111959457397, |
|
"eval_rewards/rejected": 0.2606178820133209, |
|
"eval_runtime": 253.1776, |
|
"eval_samples_per_second": 10.913, |
|
"eval_steps_per_second": 1.367, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.7244897959183674, |
|
"grad_norm": 2.030115842819214, |
|
"learning_rate": 2.1394166993891526e-07, |
|
"logits/chosen": -0.5332853198051453, |
|
"logits/rejected": -0.6465424299240112, |
|
"logps/chosen": -162.91802978515625, |
|
"logps/rejected": -236.94656372070312, |
|
"loss": 0.3133, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.1907992362976074, |
|
"rewards/margins": 2.2500343322753906, |
|
"rewards/rejected": -0.05923491716384888, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.7346938775510204, |
|
"grad_norm": 1.1324609518051147, |
|
"learning_rate": 1.995193586829387e-07, |
|
"logits/chosen": -0.658591091632843, |
|
"logits/rejected": -0.6164897084236145, |
|
"logps/chosen": -170.56906127929688, |
|
"logps/rejected": -209.83834838867188, |
|
"loss": 0.2705, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.4906888008117676, |
|
"rewards/margins": 1.8935827016830444, |
|
"rewards/rejected": 0.597105860710144, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.7448979591836735, |
|
"grad_norm": 1.2381037473678589, |
|
"learning_rate": 1.8547823010417873e-07, |
|
"logits/chosen": -0.5904741883277893, |
|
"logits/rejected": -0.571013331413269, |
|
"logps/chosen": -144.27389526367188, |
|
"logps/rejected": -181.3529510498047, |
|
"loss": 0.3058, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.409674882888794, |
|
"rewards/margins": 1.6706383228302002, |
|
"rewards/rejected": 0.739036500453949, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.7551020408163265, |
|
"grad_norm": 1.4913420677185059, |
|
"learning_rate": 1.7183609644824092e-07, |
|
"logits/chosen": -0.7272003293037415, |
|
"logits/rejected": -0.7424389123916626, |
|
"logps/chosen": -169.88607788085938, |
|
"logps/rejected": -232.5887908935547, |
|
"loss": 0.3306, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 2.0715317726135254, |
|
"rewards/margins": 1.9093472957611084, |
|
"rewards/rejected": 0.16218456625938416, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.7653061224489796, |
|
"grad_norm": 1.1602520942687988, |
|
"learning_rate": 1.5861026380515163e-07, |
|
"logits/chosen": -0.7100823521614075, |
|
"logits/rejected": -0.6146202683448792, |
|
"logps/chosen": -166.2672576904297, |
|
"logps/rejected": -280.4190673828125, |
|
"loss": 0.2727, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.526275157928467, |
|
"rewards/margins": 2.7400615215301514, |
|
"rewards/rejected": -0.21378597617149353, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.7755102040816326, |
|
"grad_norm": 1.6637645959854126, |
|
"learning_rate": 1.4581751015526033e-07, |
|
"logits/chosen": -0.6776250600814819, |
|
"logits/rejected": -0.6692344546318054, |
|
"logps/chosen": -113.79902648925781, |
|
"logps/rejected": -179.08889770507812, |
|
"loss": 0.3184, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.358813762664795, |
|
"rewards/margins": 2.0016608238220215, |
|
"rewards/rejected": 0.3571527600288391, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.7857142857142857, |
|
"grad_norm": 0.9753669500350952, |
|
"learning_rate": 1.3347406408508694e-07, |
|
"logits/chosen": -0.5141528844833374, |
|
"logits/rejected": -0.5624244809150696, |
|
"logps/chosen": -95.5937728881836, |
|
"logps/rejected": -185.74070739746094, |
|
"loss": 0.2773, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 3.0672779083251953, |
|
"rewards/margins": 2.953110694885254, |
|
"rewards/rejected": 0.11416707187891006, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.7959183673469388, |
|
"grad_norm": 2.153414249420166, |
|
"learning_rate": 1.2159558420011905e-07, |
|
"logits/chosen": -0.7019663453102112, |
|
"logits/rejected": -0.6160884499549866, |
|
"logps/chosen": -162.27789306640625, |
|
"logps/rejected": -200.33497619628906, |
|
"loss": 0.3196, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 2.516826629638672, |
|
"rewards/margins": 1.9273223876953125, |
|
"rewards/rejected": 0.5895041823387146, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.8061224489795918, |
|
"grad_norm": 1.181531548500061, |
|
"learning_rate": 1.1019713926067392e-07, |
|
"logits/chosen": -0.6071494817733765, |
|
"logits/rejected": -0.6017253398895264, |
|
"logps/chosen": -134.8675537109375, |
|
"logps/rejected": -199.9590301513672, |
|
"loss": 0.3118, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.1532888412475586, |
|
"rewards/margins": 1.8393771648406982, |
|
"rewards/rejected": 0.31391164660453796, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 1.002733826637268, |
|
"learning_rate": 9.929318906602174e-08, |
|
"logits/chosen": -0.6381187438964844, |
|
"logits/rejected": -0.6446717977523804, |
|
"logps/chosen": -127.46337890625, |
|
"logps/rejected": -191.8046112060547, |
|
"loss": 0.2655, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.273101568222046, |
|
"rewards/margins": 2.162346363067627, |
|
"rewards/rejected": 0.11075510829687119, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"eval_logits/chosen": -0.602638304233551, |
|
"eval_logits/rejected": -0.6127411723136902, |
|
"eval_logps/chosen": -202.7614288330078, |
|
"eval_logps/rejected": -266.9698486328125, |
|
"eval_loss": 0.315766304731369, |
|
"eval_rewards/accuracies": 0.8815028667449951, |
|
"eval_rewards/chosen": 1.9845408201217651, |
|
"eval_rewards/margins": 1.7583247423171997, |
|
"eval_rewards/rejected": 0.22621627151966095, |
|
"eval_runtime": 252.9772, |
|
"eval_samples_per_second": 10.922, |
|
"eval_steps_per_second": 1.368, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.826530612244898, |
|
"grad_norm": 1.031960129737854, |
|
"learning_rate": 8.889756611102539e-08, |
|
"logits/chosen": -0.6104857921600342, |
|
"logits/rejected": -0.6152311563491821, |
|
"logps/chosen": -177.21951293945312, |
|
"logps/rejected": -195.98184204101562, |
|
"loss": 0.2861, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 2.403092622756958, |
|
"rewards/margins": 2.128187656402588, |
|
"rewards/rejected": 0.27490508556365967, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.8367346938775511, |
|
"grad_norm": 1.2540313005447388, |
|
"learning_rate": 7.902345803856264e-08, |
|
"logits/chosen": -0.5539565682411194, |
|
"logits/rejected": -0.6319509744644165, |
|
"logps/chosen": -136.90011596679688, |
|
"logps/rejected": -277.3617248535156, |
|
"loss": 0.2758, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 2.5164365768432617, |
|
"rewards/margins": 2.1890451908111572, |
|
"rewards/rejected": 0.32739144563674927, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.8469387755102041, |
|
"grad_norm": 1.9569461345672607, |
|
"learning_rate": 6.968339090999186e-08, |
|
"logits/chosen": -0.7001439332962036, |
|
"logits/rejected": -0.7415611743927002, |
|
"logps/chosen": -111.2337646484375, |
|
"logps/rejected": -191.3740692138672, |
|
"loss": 0.2879, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.318671703338623, |
|
"rewards/margins": 2.1912620067596436, |
|
"rewards/rejected": 0.12740962207317352, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.8571428571428571, |
|
"grad_norm": 1.5039212703704834, |
|
"learning_rate": 6.088921331488566e-08, |
|
"logits/chosen": -0.5268384218215942, |
|
"logits/rejected": -0.5624841451644897, |
|
"logps/chosen": -117.46417236328125, |
|
"logps/rejected": -222.89755249023438, |
|
"loss": 0.3116, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.4481053352355957, |
|
"rewards/margins": 2.348428249359131, |
|
"rewards/rejected": 0.09967675060033798, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.8673469387755102, |
|
"grad_norm": 0.9287813305854797, |
|
"learning_rate": 5.2652081340188506e-08, |
|
"logits/chosen": -0.5797746777534485, |
|
"logits/rejected": -0.5913048386573792, |
|
"logps/chosen": -94.5796890258789, |
|
"logps/rejected": -174.916748046875, |
|
"loss": 0.2915, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.9385693073272705, |
|
"rewards/margins": 2.669956684112549, |
|
"rewards/rejected": 0.2686125636100769, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.8775510204081632, |
|
"grad_norm": 1.252423644065857, |
|
"learning_rate": 4.498244441786675e-08, |
|
"logits/chosen": -0.604209303855896, |
|
"logits/rejected": -0.6816591620445251, |
|
"logps/chosen": -102.5703125, |
|
"logps/rejected": -223.8843994140625, |
|
"loss": 0.2842, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.3505682945251465, |
|
"rewards/margins": 2.114715814590454, |
|
"rewards/rejected": 0.23585255444049835, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.8877551020408163, |
|
"grad_norm": 1.17081880569458, |
|
"learning_rate": 3.789003206900537e-08, |
|
"logits/chosen": -0.8669666051864624, |
|
"logits/rejected": -0.807847797870636, |
|
"logps/chosen": -248.0942840576172, |
|
"logps/rejected": -313.04302978515625, |
|
"loss": 0.317, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 1.950645089149475, |
|
"rewards/margins": 1.5294300317764282, |
|
"rewards/rejected": 0.4212152063846588, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.8979591836734694, |
|
"grad_norm": 0.9833048582077026, |
|
"learning_rate": 3.1383841561166134e-08, |
|
"logits/chosen": -0.6270785331726074, |
|
"logits/rejected": -0.6980186104774475, |
|
"logps/chosen": -178.78965759277344, |
|
"logps/rejected": -230.45925903320312, |
|
"loss": 0.3263, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.228593349456787, |
|
"rewards/margins": 2.083462953567505, |
|
"rewards/rejected": 0.14513027667999268, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.9081632653061225, |
|
"grad_norm": 0.9479549527168274, |
|
"learning_rate": 2.547212649466568e-08, |
|
"logits/chosen": -0.7561019062995911, |
|
"logits/rejected": -0.7451134324073792, |
|
"logps/chosen": -134.2133331298828, |
|
"logps/rejected": -225.8154754638672, |
|
"loss": 0.2947, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.320394992828369, |
|
"rewards/margins": 1.929671049118042, |
|
"rewards/rejected": 0.39072394371032715, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"grad_norm": 1.0661786794662476, |
|
"learning_rate": 2.0162386332251648e-08, |
|
"logits/chosen": -0.6181257963180542, |
|
"logits/rejected": -0.6277596354484558, |
|
"logps/chosen": -109.55928039550781, |
|
"logps/rejected": -188.4981689453125, |
|
"loss": 0.2943, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.673180103302002, |
|
"rewards/margins": 2.270214319229126, |
|
"rewards/rejected": 0.40296584367752075, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9183673469387755, |
|
"eval_logits/chosen": -0.6051714420318604, |
|
"eval_logits/rejected": -0.6136297583580017, |
|
"eval_logps/chosen": -202.5171356201172, |
|
"eval_logps/rejected": -267.1376647949219, |
|
"eval_loss": 0.3143753111362457, |
|
"eval_rewards/accuracies": 0.884393036365509, |
|
"eval_rewards/chosen": 1.9967551231384277, |
|
"eval_rewards/margins": 1.7789306640625, |
|
"eval_rewards/rejected": 0.21782423555850983, |
|
"eval_runtime": 252.9824, |
|
"eval_samples_per_second": 10.922, |
|
"eval_steps_per_second": 1.368, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.9285714285714286, |
|
"grad_norm": 1.4716278314590454, |
|
"learning_rate": 1.5461356885461075e-08, |
|
"logits/chosen": -0.639533519744873, |
|
"logits/rejected": -0.6445420980453491, |
|
"logps/chosen": -115.40907287597656, |
|
"logps/rejected": -226.23721313476562, |
|
"loss": 0.288, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.6810295581817627, |
|
"rewards/margins": 2.130437135696411, |
|
"rewards/rejected": 0.5505925416946411, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.9387755102040817, |
|
"grad_norm": 1.3117539882659912, |
|
"learning_rate": 1.1375001769727999e-08, |
|
"logits/chosen": -0.6442250609397888, |
|
"logits/rejected": -0.6030440926551819, |
|
"logps/chosen": -154.71214294433594, |
|
"logps/rejected": -220.8012237548828, |
|
"loss": 0.2717, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.1535449028015137, |
|
"rewards/margins": 1.923103928565979, |
|
"rewards/rejected": 0.23044133186340332, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.9489795918367347, |
|
"grad_norm": 1.646316409111023, |
|
"learning_rate": 7.908504839081342e-09, |
|
"logits/chosen": -0.7338714599609375, |
|
"logits/rejected": -0.7258167266845703, |
|
"logps/chosen": -154.7112274169922, |
|
"logps/rejected": -180.6020965576172, |
|
"loss": 0.2866, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 2.1198439598083496, |
|
"rewards/margins": 1.7403348684310913, |
|
"rewards/rejected": 0.3795092701911926, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.9591836734693877, |
|
"grad_norm": 1.2458878755569458, |
|
"learning_rate": 5.0662636100292086e-09, |
|
"logits/chosen": -0.6468678712844849, |
|
"logits/rejected": -0.5771836042404175, |
|
"logps/chosen": -185.34632873535156, |
|
"logps/rejected": -211.8957061767578, |
|
"loss": 0.2936, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.3122048377990723, |
|
"rewards/margins": 2.1624603271484375, |
|
"rewards/rejected": 0.14974427223205566, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.9693877551020408, |
|
"grad_norm": 1.061425805091858, |
|
"learning_rate": 2.851883682973233e-09, |
|
"logits/chosen": -0.6436801552772522, |
|
"logits/rejected": -0.6932533979415894, |
|
"logps/chosen": -126.62858581542969, |
|
"logps/rejected": -224.2174072265625, |
|
"loss": 0.299, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 2.1702253818511963, |
|
"rewards/margins": 2.06597638130188, |
|
"rewards/rejected": 0.1042490229010582, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.9795918367346939, |
|
"grad_norm": 1.376145362854004, |
|
"learning_rate": 1.2681741682282754e-09, |
|
"logits/chosen": -0.6445289850234985, |
|
"logits/rejected": -0.5357323884963989, |
|
"logps/chosen": -160.71707153320312, |
|
"logps/rejected": -172.3175811767578, |
|
"loss": 0.3125, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 2.2926859855651855, |
|
"rewards/margins": 1.9077179431915283, |
|
"rewards/rejected": 0.384968101978302, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.9897959183673469, |
|
"grad_norm": 1.1405550241470337, |
|
"learning_rate": 3.171441224514848e-10, |
|
"logits/chosen": -0.7027498483657837, |
|
"logits/rejected": -0.6814337968826294, |
|
"logps/chosen": -189.13955688476562, |
|
"logps/rejected": -220.84585571289062, |
|
"loss": 0.2744, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 2.1381711959838867, |
|
"rewards/margins": 1.7079559564590454, |
|
"rewards/rejected": 0.4302152693271637, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 1.1128407716751099, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.6814506649971008, |
|
"logits/rejected": -0.6116394996643066, |
|
"logps/chosen": -214.19149780273438, |
|
"logps/rejected": -261.8013916015625, |
|
"loss": 0.276, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 1.9612834453582764, |
|
"rewards/margins": 1.9090359210968018, |
|
"rewards/rejected": 0.05224757641553879, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 980, |
|
"total_flos": 0.0, |
|
"train_loss": 0.39260031933687173, |
|
"train_runtime": 7916.79, |
|
"train_samples_per_second": 3.96, |
|
"train_steps_per_second": 0.124 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 980, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|