|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9978094194961664, |
|
"eval_steps": 50000, |
|
"global_step": 1216, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.008214676889375685, |
|
"grad_norm": 28.31659169550249, |
|
"learning_rate": 4.0983606557377046e-08, |
|
"logits/chosen": 27.184185028076172, |
|
"logits/rejected": 25.856258392333984, |
|
"logps/chosen": -244.33399963378906, |
|
"logps/rejected": -79.7464828491211, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.015291815623641014, |
|
"rewards/margins": 0.03226657956838608, |
|
"rewards/rejected": -0.016974765807390213, |
|
"sft_loss": 0.6384800672531128, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01642935377875137, |
|
"grad_norm": 23.954861556073595, |
|
"learning_rate": 8.196721311475409e-08, |
|
"logits/chosen": 26.661327362060547, |
|
"logits/rejected": 25.39139175415039, |
|
"logps/chosen": -207.91012573242188, |
|
"logps/rejected": -72.70105743408203, |
|
"loss": 0.6278, |
|
"rewards/accuracies": 0.786666750907898, |
|
"rewards/chosen": -0.03712935373187065, |
|
"rewards/margins": 0.1326407492160797, |
|
"rewards/rejected": -0.16977010667324066, |
|
"sft_loss": 0.6469722986221313, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.024644030668127054, |
|
"grad_norm": 12.243904208611285, |
|
"learning_rate": 1.2295081967213113e-07, |
|
"logits/chosen": 27.410442352294922, |
|
"logits/rejected": 26.34275245666504, |
|
"logps/chosen": -211.9379119873047, |
|
"logps/rejected": -84.06497192382812, |
|
"loss": 0.4689, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -0.15980161726474762, |
|
"rewards/margins": 0.5716416239738464, |
|
"rewards/rejected": -0.7314431667327881, |
|
"sft_loss": 0.6353262662887573, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.03285870755750274, |
|
"grad_norm": 9.287884891913714, |
|
"learning_rate": 1.6393442622950818e-07, |
|
"logits/chosen": 27.55664825439453, |
|
"logits/rejected": 26.192903518676758, |
|
"logps/chosen": -280.5326843261719, |
|
"logps/rejected": -112.344970703125, |
|
"loss": 0.3581, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -0.6804031729698181, |
|
"rewards/margins": 1.3235772848129272, |
|
"rewards/rejected": -2.003980875015259, |
|
"sft_loss": 0.7250083684921265, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04107338444687842, |
|
"grad_norm": 7.405267430642346, |
|
"learning_rate": 2.0491803278688524e-07, |
|
"logits/chosen": 26.403255462646484, |
|
"logits/rejected": 25.545373916625977, |
|
"logps/chosen": -249.10903930664062, |
|
"logps/rejected": -114.06135559082031, |
|
"loss": 0.3047, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -1.3252760171890259, |
|
"rewards/margins": 1.8669867515563965, |
|
"rewards/rejected": -3.1922624111175537, |
|
"sft_loss": 0.7239670157432556, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.04928806133625411, |
|
"grad_norm": 4.80122081598555, |
|
"learning_rate": 2.4590163934426226e-07, |
|
"logits/chosen": 25.064653396606445, |
|
"logits/rejected": 24.138179779052734, |
|
"logps/chosen": -273.2266540527344, |
|
"logps/rejected": -129.16700744628906, |
|
"loss": 0.2507, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -1.8779884576797485, |
|
"rewards/margins": 2.8930463790893555, |
|
"rewards/rejected": -4.7710347175598145, |
|
"sft_loss": 0.7124671936035156, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.05750273822562979, |
|
"grad_norm": 4.89445256463419, |
|
"learning_rate": 2.868852459016393e-07, |
|
"logits/chosen": 23.180265426635742, |
|
"logits/rejected": 22.45435905456543, |
|
"logps/chosen": -281.69757080078125, |
|
"logps/rejected": -149.479248046875, |
|
"loss": 0.2291, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -2.5409016609191895, |
|
"rewards/margins": 3.659693956375122, |
|
"rewards/rejected": -6.200596809387207, |
|
"sft_loss": 0.78533536195755, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.06571741511500548, |
|
"grad_norm": 5.60278997158879, |
|
"learning_rate": 3.2786885245901637e-07, |
|
"logits/chosen": 21.637163162231445, |
|
"logits/rejected": 21.079978942871094, |
|
"logps/chosen": -259.0185546875, |
|
"logps/rejected": -145.42489624023438, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -2.746673583984375, |
|
"rewards/margins": 3.7938296794891357, |
|
"rewards/rejected": -6.54050350189209, |
|
"sft_loss": 0.7671460509300232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07393209200438117, |
|
"grad_norm": 4.3242655896865045, |
|
"learning_rate": 3.6885245901639347e-07, |
|
"logits/chosen": 22.72602653503418, |
|
"logits/rejected": 21.949237823486328, |
|
"logps/chosen": -265.2069396972656, |
|
"logps/rejected": -151.65060424804688, |
|
"loss": 0.1803, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -3.016242265701294, |
|
"rewards/margins": 4.000359058380127, |
|
"rewards/rejected": -7.016600608825684, |
|
"sft_loss": 0.7528119683265686, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.08214676889375684, |
|
"grad_norm": 4.333490981221177, |
|
"learning_rate": 4.0983606557377047e-07, |
|
"logits/chosen": 22.819128036499023, |
|
"logits/rejected": 21.5169620513916, |
|
"logps/chosen": -287.1695251464844, |
|
"logps/rejected": -152.8419647216797, |
|
"loss": 0.1609, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -3.535120725631714, |
|
"rewards/margins": 3.6887311935424805, |
|
"rewards/rejected": -7.223852157592773, |
|
"sft_loss": 0.7424061894416809, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09036144578313253, |
|
"grad_norm": 3.8031629734590995, |
|
"learning_rate": 4.508196721311475e-07, |
|
"logits/chosen": 22.994489669799805, |
|
"logits/rejected": 21.521961212158203, |
|
"logps/chosen": -276.05242919921875, |
|
"logps/rejected": -162.29678344726562, |
|
"loss": 0.1759, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -3.299473762512207, |
|
"rewards/margins": 4.333623886108398, |
|
"rewards/rejected": -7.6330976486206055, |
|
"sft_loss": 0.7973353266716003, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.09857612267250822, |
|
"grad_norm": 2.9463104781172405, |
|
"learning_rate": 4.918032786885245e-07, |
|
"logits/chosen": 23.39748764038086, |
|
"logits/rejected": 22.529293060302734, |
|
"logps/chosen": -226.25714111328125, |
|
"logps/rejected": -133.46775817871094, |
|
"loss": 0.166, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -2.8591995239257812, |
|
"rewards/margins": 3.5714006423950195, |
|
"rewards/rejected": -6.430600166320801, |
|
"sft_loss": 0.7205591797828674, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.10679079956188389, |
|
"grad_norm": 5.472434832813438, |
|
"learning_rate": 4.999852034151641e-07, |
|
"logits/chosen": 21.749300003051758, |
|
"logits/rejected": 21.059572219848633, |
|
"logps/chosen": -288.58892822265625, |
|
"logps/rejected": -163.43212890625, |
|
"loss": 0.1564, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -3.1936755180358887, |
|
"rewards/margins": 4.4503631591796875, |
|
"rewards/rejected": -7.644038200378418, |
|
"sft_loss": 0.8240499496459961, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.11500547645125958, |
|
"grad_norm": 2.7329123191165547, |
|
"learning_rate": 4.999250952911133e-07, |
|
"logits/chosen": 23.737056732177734, |
|
"logits/rejected": 22.234569549560547, |
|
"logps/chosen": -282.01953125, |
|
"logps/rejected": -153.94252014160156, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -3.0082314014434814, |
|
"rewards/margins": 4.5703325271606445, |
|
"rewards/rejected": -7.5785627365112305, |
|
"sft_loss": 0.8242512345314026, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12322015334063527, |
|
"grad_norm": 48.59604948359565, |
|
"learning_rate": 4.998187619501184e-07, |
|
"logits/chosen": 23.999074935913086, |
|
"logits/rejected": 22.998958587646484, |
|
"logps/chosen": -320.11505126953125, |
|
"logps/rejected": -178.56219482421875, |
|
"loss": 0.1269, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.2927637100219727, |
|
"rewards/margins": 5.467880725860596, |
|
"rewards/rejected": -8.760644912719727, |
|
"sft_loss": 0.8822128772735596, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.13143483023001096, |
|
"grad_norm": 34.18732885169035, |
|
"learning_rate": 4.996662230591989e-07, |
|
"logits/chosen": 21.339969635009766, |
|
"logits/rejected": 20.508026123046875, |
|
"logps/chosen": -297.17633056640625, |
|
"logps/rejected": -178.69137573242188, |
|
"loss": 0.1336, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -3.8860669136047363, |
|
"rewards/margins": 5.22913932800293, |
|
"rewards/rejected": -9.115203857421875, |
|
"sft_loss": 0.8318250179290771, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13964950711938665, |
|
"grad_norm": 3.158882002153525, |
|
"learning_rate": 4.994675068313813e-07, |
|
"logits/chosen": 21.070241928100586, |
|
"logits/rejected": 20.90212631225586, |
|
"logps/chosen": -286.97998046875, |
|
"logps/rejected": -186.49452209472656, |
|
"loss": 0.1122, |
|
"rewards/accuracies": 0.9200000762939453, |
|
"rewards/chosen": -4.223217010498047, |
|
"rewards/margins": 5.617285251617432, |
|
"rewards/rejected": -9.84050178527832, |
|
"sft_loss": 0.8361734747886658, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.14786418400876233, |
|
"grad_norm": 3.2825430282007164, |
|
"learning_rate": 4.992226500204806e-07, |
|
"logits/chosen": 22.27889060974121, |
|
"logits/rejected": 21.50484275817871, |
|
"logps/chosen": -287.25439453125, |
|
"logps/rejected": -167.19528198242188, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -3.6078903675079346, |
|
"rewards/margins": 5.157074451446533, |
|
"rewards/rejected": -8.764965057373047, |
|
"sft_loss": 0.865871787071228, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.156078860898138, |
|
"grad_norm": 4.571628146800866, |
|
"learning_rate": 4.989316979143029e-07, |
|
"logits/chosen": 21.98550796508789, |
|
"logits/rejected": 20.47240447998047, |
|
"logps/chosen": -276.35479736328125, |
|
"logps/rejected": -154.84384155273438, |
|
"loss": 0.1667, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.3969411849975586, |
|
"rewards/margins": 4.759787559509277, |
|
"rewards/rejected": -8.156728744506836, |
|
"sft_loss": 0.8606770038604736, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.16429353778751368, |
|
"grad_norm": 5.767989035541414, |
|
"learning_rate": 4.985947043262686e-07, |
|
"logits/chosen": 20.512102127075195, |
|
"logits/rejected": 19.77199935913086, |
|
"logps/chosen": -282.6247863769531, |
|
"logps/rejected": -166.79330444335938, |
|
"loss": 0.1454, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -3.124844551086426, |
|
"rewards/margins": 5.334622383117676, |
|
"rewards/rejected": -8.459466934204102, |
|
"sft_loss": 0.8327052593231201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.17250821467688937, |
|
"grad_norm": 2.592654441763786, |
|
"learning_rate": 4.982117315854593e-07, |
|
"logits/chosen": 20.459001541137695, |
|
"logits/rejected": 20.174640655517578, |
|
"logps/chosen": -248.44091796875, |
|
"logps/rejected": -155.70086669921875, |
|
"loss": 0.1293, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -2.7995762825012207, |
|
"rewards/margins": 4.992871284484863, |
|
"rewards/rejected": -7.792448043823242, |
|
"sft_loss": 0.8232018351554871, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.18072289156626506, |
|
"grad_norm": 3.061900848647029, |
|
"learning_rate": 4.977828505250903e-07, |
|
"logits/chosen": 20.415132522583008, |
|
"logits/rejected": 19.290271759033203, |
|
"logps/chosen": -247.19740295410156, |
|
"logps/rejected": -146.08677673339844, |
|
"loss": 0.1359, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -3.0541529655456543, |
|
"rewards/margins": 4.263410568237305, |
|
"rewards/rejected": -7.317563056945801, |
|
"sft_loss": 0.8086569309234619, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18893756845564075, |
|
"grad_norm": 3.8428826764124495, |
|
"learning_rate": 4.973081404694087e-07, |
|
"logits/chosen": 19.876977920532227, |
|
"logits/rejected": 19.5655574798584, |
|
"logps/chosen": -273.8357849121094, |
|
"logps/rejected": -173.55661010742188, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -3.349548578262329, |
|
"rewards/margins": 5.433572292327881, |
|
"rewards/rejected": -8.783121109008789, |
|
"sft_loss": 0.8695060610771179, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.19715224534501644, |
|
"grad_norm": 3.5510462679770107, |
|
"learning_rate": 4.967876892190227e-07, |
|
"logits/chosen": 21.53512191772461, |
|
"logits/rejected": 19.82799530029297, |
|
"logps/chosen": -300.6681823730469, |
|
"logps/rejected": -162.72463989257812, |
|
"loss": 0.1214, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -3.324470043182373, |
|
"rewards/margins": 5.188758850097656, |
|
"rewards/rejected": -8.513228416442871, |
|
"sft_loss": 0.8628395199775696, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.20536692223439212, |
|
"grad_norm": 2.7722703582939263, |
|
"learning_rate": 4.962215930346614e-07, |
|
"logits/chosen": 20.72852897644043, |
|
"logits/rejected": 19.19593048095703, |
|
"logps/chosen": -277.3075256347656, |
|
"logps/rejected": -168.2571258544922, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.5354294776916504, |
|
"rewards/margins": 5.148807048797607, |
|
"rewards/rejected": -8.684236526489258, |
|
"sft_loss": 0.8580695986747742, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.21358159912376778, |
|
"grad_norm": 2.509695712976081, |
|
"learning_rate": 4.956099566193716e-07, |
|
"logits/chosen": 19.78179931640625, |
|
"logits/rejected": 18.386613845825195, |
|
"logps/chosen": -292.8381042480469, |
|
"logps/rejected": -179.89065551757812, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8737540245056152, |
|
"rewards/margins": 5.42431116104126, |
|
"rewards/rejected": -9.298065185546875, |
|
"sft_loss": 0.8839088678359985, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.22179627601314347, |
|
"grad_norm": 3.4066444126067936, |
|
"learning_rate": 4.949528930991521e-07, |
|
"logits/chosen": 20.118431091308594, |
|
"logits/rejected": 18.660707473754883, |
|
"logps/chosen": -300.8720703125, |
|
"logps/rejected": -190.8058319091797, |
|
"loss": 0.1162, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.312110900878906, |
|
"rewards/margins": 6.067122936248779, |
|
"rewards/rejected": -10.379232406616211, |
|
"sft_loss": 0.8384607434272766, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.23001095290251916, |
|
"grad_norm": 2.863527702169043, |
|
"learning_rate": 4.9425052400203e-07, |
|
"logits/chosen": 19.4266300201416, |
|
"logits/rejected": 18.600717544555664, |
|
"logps/chosen": -289.1473693847656, |
|
"logps/rejected": -183.9163360595703, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.603145122528076, |
|
"rewards/margins": 5.1031365394592285, |
|
"rewards/rejected": -9.706281661987305, |
|
"sft_loss": 0.8902355432510376, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.23822562979189485, |
|
"grad_norm": 4.705307692419688, |
|
"learning_rate": 4.935029792355834e-07, |
|
"logits/chosen": 19.58378791809082, |
|
"logits/rejected": 18.723949432373047, |
|
"logps/chosen": -288.09527587890625, |
|
"logps/rejected": -178.56491088867188, |
|
"loss": 0.0997, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -3.634652853012085, |
|
"rewards/margins": 5.0857977867126465, |
|
"rewards/rejected": -8.720452308654785, |
|
"sft_loss": 0.8318749666213989, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.24644030668127054, |
|
"grad_norm": 4.443077031203506, |
|
"learning_rate": 4.927103970629147e-07, |
|
"logits/chosen": 19.900896072387695, |
|
"logits/rejected": 18.748838424682617, |
|
"logps/chosen": -296.9352111816406, |
|
"logps/rejected": -197.29566955566406, |
|
"loss": 0.1249, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.586284160614014, |
|
"rewards/margins": 5.75706672668457, |
|
"rewards/rejected": -11.343351364135742, |
|
"sft_loss": 0.8322177529335022, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.2546549835706462, |
|
"grad_norm": 3.0620385223375557, |
|
"learning_rate": 4.918729240770775e-07, |
|
"logits/chosen": 19.462993621826172, |
|
"logits/rejected": 19.144250869750977, |
|
"logps/chosen": -282.6866455078125, |
|
"logps/rejected": -189.68820190429688, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.4352288246154785, |
|
"rewards/margins": 6.065767288208008, |
|
"rewards/rejected": -11.500997543334961, |
|
"sft_loss": 0.9704034328460693, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.2628696604600219, |
|
"grad_norm": 3.5256501257886717, |
|
"learning_rate": 4.909907151739633e-07, |
|
"logits/chosen": 20.455379486083984, |
|
"logits/rejected": 19.202545166015625, |
|
"logps/chosen": -305.9945373535156, |
|
"logps/rejected": -189.74273681640625, |
|
"loss": 0.1238, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -4.548050403594971, |
|
"rewards/margins": 6.132920265197754, |
|
"rewards/rejected": -10.680971145629883, |
|
"sft_loss": 0.8781507015228271, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.2710843373493976, |
|
"grad_norm": 5.636178726208259, |
|
"learning_rate": 4.900639335236526e-07, |
|
"logits/chosen": 20.459400177001953, |
|
"logits/rejected": 19.524314880371094, |
|
"logps/chosen": -278.4658508300781, |
|
"logps/rejected": -170.0297088623047, |
|
"loss": 0.1276, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -3.6279122829437256, |
|
"rewards/margins": 5.202185153961182, |
|
"rewards/rejected": -8.830097198486328, |
|
"sft_loss": 0.8619104623794556, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.2792990142387733, |
|
"grad_norm": 3.839926119567715, |
|
"learning_rate": 4.890927505402359e-07, |
|
"logits/chosen": 18.842321395874023, |
|
"logits/rejected": 18.251901626586914, |
|
"logps/chosen": -251.1257781982422, |
|
"logps/rejected": -161.70802307128906, |
|
"loss": 0.1045, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -3.4545812606811523, |
|
"rewards/margins": 5.017471790313721, |
|
"rewards/rejected": -8.472052574157715, |
|
"sft_loss": 0.7917510271072388, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.28751369112814895, |
|
"grad_norm": 3.2492164364318836, |
|
"learning_rate": 4.880773458501089e-07, |
|
"logits/chosen": 21.866779327392578, |
|
"logits/rejected": 20.14942169189453, |
|
"logps/chosen": -265.2092590332031, |
|
"logps/rejected": -160.5437469482422, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -3.7316997051239014, |
|
"rewards/margins": 4.919445514678955, |
|
"rewards/rejected": -8.651144981384277, |
|
"sft_loss": 0.8359836339950562, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.29572836801752467, |
|
"grad_norm": 2.829467767903507, |
|
"learning_rate": 4.870179072587498e-07, |
|
"logits/chosen": 19.415096282958984, |
|
"logits/rejected": 17.55588722229004, |
|
"logps/chosen": -264.83953857421875, |
|
"logps/rejected": -170.27407836914062, |
|
"loss": 0.1141, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -4.813459396362305, |
|
"rewards/margins": 5.231393337249756, |
|
"rewards/rejected": -10.044852256774902, |
|
"sft_loss": 0.9549927115440369, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.30394304490690033, |
|
"grad_norm": 10.34631367942223, |
|
"learning_rate": 4.859146307159841e-07, |
|
"logits/chosen": 20.497596740722656, |
|
"logits/rejected": 18.766536712646484, |
|
"logps/chosen": -261.2963562011719, |
|
"logps/rejected": -182.26380920410156, |
|
"loss": 0.0915, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.357536792755127, |
|
"rewards/margins": 5.210677623748779, |
|
"rewards/rejected": -10.568212509155273, |
|
"sft_loss": 0.9084681868553162, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.312157721796276, |
|
"grad_norm": 5.783975007709455, |
|
"learning_rate": 4.847677202797414e-07, |
|
"logits/chosen": 21.4503116607666, |
|
"logits/rejected": 20.343997955322266, |
|
"logps/chosen": -277.43756103515625, |
|
"logps/rejected": -190.31895446777344, |
|
"loss": 0.1261, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -5.206136226654053, |
|
"rewards/margins": 6.117234230041504, |
|
"rewards/rejected": -11.323369979858398, |
|
"sft_loss": 0.8104835748672485, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.3203723986856517, |
|
"grad_norm": 5.842032329450428, |
|
"learning_rate": 4.835773880783144e-07, |
|
"logits/chosen": 18.739864349365234, |
|
"logits/rejected": 18.180030822753906, |
|
"logps/chosen": -279.97637939453125, |
|
"logps/rejected": -194.57496643066406, |
|
"loss": 0.0996, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.8305253982543945, |
|
"rewards/margins": 6.600159168243408, |
|
"rewards/rejected": -11.430684089660645, |
|
"sft_loss": 0.8295060396194458, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.32858707557502737, |
|
"grad_norm": 3.2670595180505755, |
|
"learning_rate": 4.823438542711238e-07, |
|
"logits/chosen": 19.910261154174805, |
|
"logits/rejected": 19.292858123779297, |
|
"logps/chosen": -298.5541076660156, |
|
"logps/rejected": -194.70803833007812, |
|
"loss": 0.0875, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -4.582242488861084, |
|
"rewards/margins": 6.173951625823975, |
|
"rewards/rejected": -10.756195068359375, |
|
"sft_loss": 0.917156994342804, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3368017524644031, |
|
"grad_norm": 3.1103237361524463, |
|
"learning_rate": 4.81067347007999e-07, |
|
"logits/chosen": 21.32513999938965, |
|
"logits/rejected": 20.00907325744629, |
|
"logps/chosen": -282.4539489746094, |
|
"logps/rejected": -180.13723754882812, |
|
"loss": 0.1094, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -5.038515090942383, |
|
"rewards/margins": 5.208615303039551, |
|
"rewards/rejected": -10.247130393981934, |
|
"sft_loss": 0.9259530305862427, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.34501642935377874, |
|
"grad_norm": 2.739223034780536, |
|
"learning_rate": 4.797481023869801e-07, |
|
"logits/chosen": 20.315624237060547, |
|
"logits/rejected": 19.15978240966797, |
|
"logps/chosen": -251.40025329589844, |
|
"logps/rejected": -175.2682647705078, |
|
"loss": 0.0868, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -4.576803684234619, |
|
"rewards/margins": 5.497461795806885, |
|
"rewards/rejected": -10.074265480041504, |
|
"sft_loss": 0.9133593440055847, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.35323110624315446, |
|
"grad_norm": 3.832286663529339, |
|
"learning_rate": 4.783863644106502e-07, |
|
"logits/chosen": 20.822975158691406, |
|
"logits/rejected": 19.61457633972168, |
|
"logps/chosen": -272.2485656738281, |
|
"logps/rejected": -176.64450073242188, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -4.567282676696777, |
|
"rewards/margins": 5.274582862854004, |
|
"rewards/rejected": -9.841866493225098, |
|
"sft_loss": 0.9593473672866821, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.3614457831325301, |
|
"grad_norm": 2.3582346070622857, |
|
"learning_rate": 4.769823849410053e-07, |
|
"logits/chosen": 18.23250961303711, |
|
"logits/rejected": 17.80348777770996, |
|
"logps/chosen": -308.80230712890625, |
|
"logps/rejected": -218.04078674316406, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.822837829589844, |
|
"rewards/margins": 7.0780110359191895, |
|
"rewards/rejected": -12.900848388671875, |
|
"sft_loss": 0.9446174502372742, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.3696604600219058, |
|
"grad_norm": 2.4553351337767424, |
|
"learning_rate": 4.7553642365287127e-07, |
|
"logits/chosen": 18.841230392456055, |
|
"logits/rejected": 18.06731414794922, |
|
"logps/chosen": -290.5154724121094, |
|
"logps/rejected": -198.66468811035156, |
|
"loss": 0.1065, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.4337263107299805, |
|
"rewards/margins": 6.076140880584717, |
|
"rewards/rejected": -11.509869575500488, |
|
"sft_loss": 0.9967135787010193, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.3778751369112815, |
|
"grad_norm": 5.091180856379443, |
|
"learning_rate": 4.7404874798587493e-07, |
|
"logits/chosen": 20.33222770690918, |
|
"logits/rejected": 19.57872772216797, |
|
"logps/chosen": -296.5139465332031, |
|
"logps/rejected": -187.86419677734375, |
|
"loss": 0.0947, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -4.502869129180908, |
|
"rewards/margins": 6.029680252075195, |
|
"rewards/rejected": -10.532547950744629, |
|
"sft_loss": 0.8854550719261169, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.38608981380065716, |
|
"grad_norm": 3.7252781041155845, |
|
"learning_rate": 4.7251963309497965e-07, |
|
"logits/chosen": 19.174985885620117, |
|
"logits/rejected": 18.191041946411133, |
|
"logps/chosen": -294.4977111816406, |
|
"logps/rejected": -195.45391845703125, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -4.810143947601318, |
|
"rewards/margins": 5.988135814666748, |
|
"rewards/rejected": -10.79827880859375, |
|
"sft_loss": 0.9932563304901123, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.39430449069003287, |
|
"grad_norm": 2.8445489541878457, |
|
"learning_rate": 4.709493617995938e-07, |
|
"logits/chosen": 19.772836685180664, |
|
"logits/rejected": 18.601234436035156, |
|
"logps/chosen": -281.09808349609375, |
|
"logps/rejected": -186.28834533691406, |
|
"loss": 0.0903, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.58309268951416, |
|
"rewards/margins": 6.093752384185791, |
|
"rewards/rejected": -10.67684555053711, |
|
"sft_loss": 0.8651002049446106, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.40251916757940853, |
|
"grad_norm": 3.5938197609476856, |
|
"learning_rate": 4.6933822453126114e-07, |
|
"logits/chosen": 18.83224105834961, |
|
"logits/rejected": 18.597658157348633, |
|
"logps/chosen": -230.34445190429688, |
|
"logps/rejected": -173.43992614746094, |
|
"loss": 0.1183, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -4.852273464202881, |
|
"rewards/margins": 5.561976432800293, |
|
"rewards/rejected": -10.414249420166016, |
|
"sft_loss": 0.9817420840263367, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.41073384446878425, |
|
"grad_norm": 5.781394523018812, |
|
"learning_rate": 4.676865192799443e-07, |
|
"logits/chosen": 20.996742248535156, |
|
"logits/rejected": 20.808259963989258, |
|
"logps/chosen": -305.62493896484375, |
|
"logps/rejected": -224.93048095703125, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.482853889465332, |
|
"rewards/margins": 6.840579509735107, |
|
"rewards/rejected": -13.323433876037598, |
|
"sft_loss": 0.9305270910263062, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.4189485213581599, |
|
"grad_norm": 2.956667548991593, |
|
"learning_rate": 4.65994551538909e-07, |
|
"logits/chosen": 20.87143898010254, |
|
"logits/rejected": 19.100732803344727, |
|
"logps/chosen": -339.39825439453125, |
|
"logps/rejected": -216.97386169433594, |
|
"loss": 0.1042, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.5454792976379395, |
|
"rewards/margins": 6.870533466339111, |
|
"rewards/rejected": -13.416014671325684, |
|
"sft_loss": 1.041126012802124, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.42716319824753557, |
|
"grad_norm": 4.392320655716504, |
|
"learning_rate": 4.642626342482215e-07, |
|
"logits/chosen": 19.316911697387695, |
|
"logits/rejected": 18.243391036987305, |
|
"logps/chosen": -256.42987060546875, |
|
"logps/rejected": -179.37509155273438, |
|
"loss": 0.1123, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -5.184771537780762, |
|
"rewards/margins": 5.357823848724365, |
|
"rewards/rejected": -10.542596817016602, |
|
"sft_loss": 0.9005042314529419, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.4353778751369113, |
|
"grad_norm": 3.4877310264508186, |
|
"learning_rate": 4.624910877368684e-07, |
|
"logits/chosen": 18.9818115234375, |
|
"logits/rejected": 18.550493240356445, |
|
"logps/chosen": -290.06829833984375, |
|
"logps/rejected": -182.69097900390625, |
|
"loss": 0.0943, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.028794765472412, |
|
"rewards/margins": 5.486021041870117, |
|
"rewards/rejected": -9.514815330505371, |
|
"sft_loss": 0.8677726984024048, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.44359255202628695, |
|
"grad_norm": 3.8262254077954485, |
|
"learning_rate": 4.606802396635098e-07, |
|
"logits/chosen": 19.717529296875, |
|
"logits/rejected": 19.07860565185547, |
|
"logps/chosen": -295.7071838378906, |
|
"logps/rejected": -187.46609497070312, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -4.471216678619385, |
|
"rewards/margins": 5.717488765716553, |
|
"rewards/rejected": -10.188706398010254, |
|
"sft_loss": 0.8504019379615784, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.45180722891566266, |
|
"grad_norm": 5.060345147215167, |
|
"learning_rate": 4.588304249558763e-07, |
|
"logits/chosen": 18.937856674194336, |
|
"logits/rejected": 17.51219940185547, |
|
"logps/chosen": -296.1573486328125, |
|
"logps/rejected": -193.85498046875, |
|
"loss": 0.0999, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -5.353262424468994, |
|
"rewards/margins": 5.619455814361572, |
|
"rewards/rejected": -10.97271728515625, |
|
"sft_loss": 0.9665505886077881, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.4600219058050383, |
|
"grad_norm": 4.00534202244361, |
|
"learning_rate": 4.569419857488228e-07, |
|
"logits/chosen": 19.323719024658203, |
|
"logits/rejected": 18.016199111938477, |
|
"logps/chosen": -321.9727783203125, |
|
"logps/rejected": -199.45265197753906, |
|
"loss": 0.1019, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -5.450780868530273, |
|
"rewards/margins": 6.008893013000488, |
|
"rewards/rejected": -11.459673881530762, |
|
"sft_loss": 0.99959397315979, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.46823658269441404, |
|
"grad_norm": 3.285371678607192, |
|
"learning_rate": 4.550152713210478e-07, |
|
"logits/chosen": 20.368547439575195, |
|
"logits/rejected": 18.82790756225586, |
|
"logps/chosen": -274.7012634277344, |
|
"logps/rejected": -187.615966796875, |
|
"loss": 0.0864, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -5.436644554138184, |
|
"rewards/margins": 5.6056671142578125, |
|
"rewards/rejected": -11.042311668395996, |
|
"sft_loss": 0.9444936513900757, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.4764512595837897, |
|
"grad_norm": 4.461231980649141, |
|
"learning_rate": 4.530506380304925e-07, |
|
"logits/chosen": 19.512760162353516, |
|
"logits/rejected": 17.607872009277344, |
|
"logps/chosen": -353.92181396484375, |
|
"logps/rejected": -215.2699432373047, |
|
"loss": 0.0889, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.762898921966553, |
|
"rewards/margins": 6.650979042053223, |
|
"rewards/rejected": -12.413877487182617, |
|
"sft_loss": 1.0174636840820312, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.4846659364731654, |
|
"grad_norm": 6.886972650518142, |
|
"learning_rate": 4.510484492484301e-07, |
|
"logits/chosen": 18.66490936279297, |
|
"logits/rejected": 19.016637802124023, |
|
"logps/chosen": -309.6719055175781, |
|
"logps/rejected": -233.22732543945312, |
|
"loss": 0.091, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.444864749908447, |
|
"rewards/margins": 7.558566093444824, |
|
"rewards/rejected": -14.003432273864746, |
|
"sft_loss": 0.9799606800079346, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.4928806133625411, |
|
"grad_norm": 7.0952145291800806, |
|
"learning_rate": 4.4900907529225797e-07, |
|
"logits/chosen": 18.357818603515625, |
|
"logits/rejected": 17.093488693237305, |
|
"logps/chosen": -321.1383972167969, |
|
"logps/rejected": -212.69412231445312, |
|
"loss": 0.1166, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.2735819816589355, |
|
"rewards/margins": 6.8673529624938965, |
|
"rewards/rejected": -13.140933990478516, |
|
"sft_loss": 0.9731053709983826, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.5010952902519168, |
|
"grad_norm": 3.035306319401663, |
|
"learning_rate": 4.46932893357005e-07, |
|
"logits/chosen": 19.986753463745117, |
|
"logits/rejected": 19.238985061645508, |
|
"logps/chosen": -317.82354736328125, |
|
"logps/rejected": -216.91896057128906, |
|
"loss": 0.0689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.982201099395752, |
|
"rewards/margins": 7.021622657775879, |
|
"rewards/rejected": -13.003824234008789, |
|
"sft_loss": 0.9540520310401917, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.5093099671412924, |
|
"grad_norm": 2.8463069418400826, |
|
"learning_rate": 4.448202874455672e-07, |
|
"logits/chosen": 18.925642013549805, |
|
"logits/rejected": 18.447513580322266, |
|
"logps/chosen": -310.6861267089844, |
|
"logps/rejected": -205.47084045410156, |
|
"loss": 0.1225, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -5.411799430847168, |
|
"rewards/margins": 6.359774112701416, |
|
"rewards/rejected": -11.771574020385742, |
|
"sft_loss": 1.0545023679733276, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.5175246440306681, |
|
"grad_norm": 3.9649140060367407, |
|
"learning_rate": 4.426716482976838e-07, |
|
"logits/chosen": 19.93076515197754, |
|
"logits/rejected": 19.637243270874023, |
|
"logps/chosen": -303.6711730957031, |
|
"logps/rejected": -199.40115356445312, |
|
"loss": 0.0913, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -4.821557521820068, |
|
"rewards/margins": 6.5209574699401855, |
|
"rewards/rejected": -11.342514991760254, |
|
"sft_loss": 0.9295309782028198, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.5257393209200438, |
|
"grad_norm": 3.5060241985803304, |
|
"learning_rate": 4.4048737331766774e-07, |
|
"logits/chosen": 21.6712646484375, |
|
"logits/rejected": 19.89933204650879, |
|
"logps/chosen": -271.3023681640625, |
|
"logps/rejected": -179.4087371826172, |
|
"loss": 0.1113, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -4.785543918609619, |
|
"rewards/margins": 5.356179714202881, |
|
"rewards/rejected": -10.1417236328125, |
|
"sft_loss": 0.8473352193832397, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.5339539978094195, |
|
"grad_norm": 9.641524189799103, |
|
"learning_rate": 4.3826786650090273e-07, |
|
"logits/chosen": 17.178016662597656, |
|
"logits/rejected": 17.272985458374023, |
|
"logps/chosen": -279.9751892089844, |
|
"logps/rejected": -187.63014221191406, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -5.042219161987305, |
|
"rewards/margins": 5.971033096313477, |
|
"rewards/rejected": -11.013254165649414, |
|
"sft_loss": 0.9807875156402588, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.5421686746987951, |
|
"grad_norm": 4.069079011849237, |
|
"learning_rate": 4.3601353835912235e-07, |
|
"logits/chosen": 18.978944778442383, |
|
"logits/rejected": 18.711896896362305, |
|
"logps/chosen": -257.9627990722656, |
|
"logps/rejected": -189.10731506347656, |
|
"loss": 0.1393, |
|
"rewards/accuracies": 0.9066666960716248, |
|
"rewards/chosen": -5.991618633270264, |
|
"rewards/margins": 5.532088279724121, |
|
"rewards/rejected": -11.523706436157227, |
|
"sft_loss": 0.9458999633789062, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5503833515881709, |
|
"grad_norm": 11.459818241023113, |
|
"learning_rate": 4.337248058444831e-07, |
|
"logits/chosen": 18.476844787597656, |
|
"logits/rejected": 16.893495559692383, |
|
"logps/chosen": -352.3243408203125, |
|
"logps/rejected": -229.020751953125, |
|
"loss": 0.0884, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.682969093322754, |
|
"rewards/margins": 6.964285373687744, |
|
"rewards/rejected": -13.64725399017334, |
|
"sft_loss": 1.09258234500885, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.5585980284775466, |
|
"grad_norm": 4.451183947920293, |
|
"learning_rate": 4.3140209227244617e-07, |
|
"logits/chosen": 19.733028411865234, |
|
"logits/rejected": 19.02443504333496, |
|
"logps/chosen": -255.39503479003906, |
|
"logps/rejected": -174.6426544189453, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.9066667556762695, |
|
"rewards/chosen": -4.919735908508301, |
|
"rewards/margins": 5.3243584632873535, |
|
"rewards/rejected": -10.24409294128418, |
|
"sft_loss": 0.974420428276062, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5668127053669222, |
|
"grad_norm": 4.806631914628035, |
|
"learning_rate": 4.2904582724348316e-07, |
|
"logits/chosen": 18.731412887573242, |
|
"logits/rejected": 17.712743759155273, |
|
"logps/chosen": -292.1322937011719, |
|
"logps/rejected": -186.6304473876953, |
|
"loss": 0.093, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -4.8624372482299805, |
|
"rewards/margins": 5.988229751586914, |
|
"rewards/rejected": -10.850666999816895, |
|
"sft_loss": 1.1051782369613647, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.5750273822562979, |
|
"grad_norm": 9.642655055260906, |
|
"learning_rate": 4.266564465636182e-07, |
|
"logits/chosen": 20.78282928466797, |
|
"logits/rejected": 19.54219627380371, |
|
"logps/chosen": -336.76849365234375, |
|
"logps/rejected": -231.12326049804688, |
|
"loss": 0.0859, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.264214515686035, |
|
"rewards/margins": 7.32451868057251, |
|
"rewards/rejected": -13.58873462677002, |
|
"sft_loss": 0.9469544291496277, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.5832420591456736, |
|
"grad_norm": 2.9057809929137535, |
|
"learning_rate": 4.242343921638234e-07, |
|
"logits/chosen": 20.12310028076172, |
|
"logits/rejected": 18.395959854125977, |
|
"logps/chosen": -332.9981994628906, |
|
"logps/rejected": -202.02816772460938, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.237385272979736, |
|
"rewards/margins": 6.5451836585998535, |
|
"rewards/rejected": -11.782567977905273, |
|
"sft_loss": 1.00226628780365, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.5914567360350493, |
|
"grad_norm": 3.5441590536933605, |
|
"learning_rate": 4.2178011201828044e-07, |
|
"logits/chosen": 19.193899154663086, |
|
"logits/rejected": 17.946836471557617, |
|
"logps/chosen": -299.01483154296875, |
|
"logps/rejected": -192.25360107421875, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -5.201097011566162, |
|
"rewards/margins": 6.014251708984375, |
|
"rewards/rejected": -11.215349197387695, |
|
"sft_loss": 0.9916761517524719, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.5996714129244249, |
|
"grad_norm": 15.329178085855654, |
|
"learning_rate": 4.1929406006152546e-07, |
|
"logits/chosen": 19.373323440551758, |
|
"logits/rejected": 19.08150291442871, |
|
"logps/chosen": -284.7733154296875, |
|
"logps/rejected": -214.26292419433594, |
|
"loss": 0.104, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.026785373687744, |
|
"rewards/margins": 7.561707496643066, |
|
"rewards/rejected": -13.588491439819336, |
|
"sft_loss": 1.0006964206695557, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.6078860898138007, |
|
"grad_norm": 3.928331003690106, |
|
"learning_rate": 4.167766961044906e-07, |
|
"logits/chosen": 20.291215896606445, |
|
"logits/rejected": 18.595857620239258, |
|
"logps/chosen": -307.8617858886719, |
|
"logps/rejected": -212.20635986328125, |
|
"loss": 0.0842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.978768348693848, |
|
"rewards/margins": 6.925237655639648, |
|
"rewards/rejected": -12.904006004333496, |
|
"sft_loss": 0.898669958114624, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.6161007667031764, |
|
"grad_norm": 7.9374465786346295, |
|
"learning_rate": 4.1422848574945923e-07, |
|
"logits/chosen": 19.617891311645508, |
|
"logits/rejected": 18.786251068115234, |
|
"logps/chosen": -309.9544982910156, |
|
"logps/rejected": -208.55972290039062, |
|
"loss": 0.086, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.407596111297607, |
|
"rewards/margins": 7.109365940093994, |
|
"rewards/rejected": -12.516963005065918, |
|
"sft_loss": 0.9856653809547424, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.624315443592552, |
|
"grad_norm": 21.17776800888567, |
|
"learning_rate": 4.1164990030394985e-07, |
|
"logits/chosen": 19.750553131103516, |
|
"logits/rejected": 18.34299659729004, |
|
"logps/chosen": -315.951171875, |
|
"logps/rejected": -218.6781768798828, |
|
"loss": 0.1158, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.80844783782959, |
|
"rewards/margins": 6.5122971534729, |
|
"rewards/rejected": -13.320744514465332, |
|
"sft_loss": 0.9718233942985535, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.6325301204819277, |
|
"grad_norm": 4.237566233767258, |
|
"learning_rate": 4.09041416693545e-07, |
|
"logits/chosen": 19.702919006347656, |
|
"logits/rejected": 18.19818115234375, |
|
"logps/chosen": -315.5003662109375, |
|
"logps/rejected": -213.6582794189453, |
|
"loss": 0.1037, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.986787796020508, |
|
"rewards/margins": 6.549233913421631, |
|
"rewards/rejected": -13.536023139953613, |
|
"sft_loss": 1.0769668817520142, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.6407447973713034, |
|
"grad_norm": 8.046716778811138, |
|
"learning_rate": 4.064035173736804e-07, |
|
"logits/chosen": 16.724149703979492, |
|
"logits/rejected": 16.12737464904785, |
|
"logps/chosen": -316.17547607421875, |
|
"logps/rejected": -242.3610076904297, |
|
"loss": 0.098, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.848334312438965, |
|
"rewards/margins": 8.00571060180664, |
|
"rewards/rejected": -15.854043960571289, |
|
"sft_loss": 1.207060694694519, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.6489594742606791, |
|
"grad_norm": 2.9537654489100564, |
|
"learning_rate": 4.0373669024041225e-07, |
|
"logits/chosen": 21.014055252075195, |
|
"logits/rejected": 19.712244033813477, |
|
"logps/chosen": -325.1595153808594, |
|
"logps/rejected": -232.09849548339844, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.954678058624268, |
|
"rewards/margins": 7.2714033126831055, |
|
"rewards/rejected": -15.226082801818848, |
|
"sft_loss": 1.0326578617095947, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.6571741511500547, |
|
"grad_norm": 7.381518685772535, |
|
"learning_rate": 4.010414285401776e-07, |
|
"logits/chosen": 20.833526611328125, |
|
"logits/rejected": 20.059972763061523, |
|
"logps/chosen": -301.8648681640625, |
|
"logps/rejected": -219.5161895751953, |
|
"loss": 0.0779, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.38788366317749, |
|
"rewards/margins": 7.111426830291748, |
|
"rewards/rejected": -14.499311447143555, |
|
"sft_loss": 1.039659023284912, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6653888280394304, |
|
"grad_norm": 4.428141508810293, |
|
"learning_rate": 3.9831823077856565e-07, |
|
"logits/chosen": 18.648929595947266, |
|
"logits/rejected": 18.11013412475586, |
|
"logps/chosen": -301.7897033691406, |
|
"logps/rejected": -215.20339965820312, |
|
"loss": 0.1026, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -6.600786209106445, |
|
"rewards/margins": 6.725845813751221, |
|
"rewards/rejected": -13.326631546020508, |
|
"sft_loss": 1.1041791439056396, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.6736035049288062, |
|
"grad_norm": 3.720237574886871, |
|
"learning_rate": 3.95567600628115e-07, |
|
"logits/chosen": 18.711490631103516, |
|
"logits/rejected": 17.353120803833008, |
|
"logps/chosen": -288.9316711425781, |
|
"logps/rejected": -199.5088348388672, |
|
"loss": 0.0833, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.722890377044678, |
|
"rewards/margins": 6.0634002685546875, |
|
"rewards/rejected": -11.786290168762207, |
|
"sft_loss": 0.9382212162017822, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 3.5502904501789456, |
|
"learning_rate": 3.9279004683515783e-07, |
|
"logits/chosen": 19.41602897644043, |
|
"logits/rejected": 18.82366180419922, |
|
"logps/chosen": -300.6460876464844, |
|
"logps/rejected": -198.58851623535156, |
|
"loss": 0.0921, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.369536876678467, |
|
"rewards/margins": 6.260385036468506, |
|
"rewards/rejected": -11.629922866821289, |
|
"sft_loss": 0.947778046131134, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.6900328587075575, |
|
"grad_norm": 3.775437774667387, |
|
"learning_rate": 3.8998608312572234e-07, |
|
"logits/chosen": 20.035261154174805, |
|
"logits/rejected": 18.32322883605957, |
|
"logps/chosen": -321.1794738769531, |
|
"logps/rejected": -206.26197814941406, |
|
"loss": 0.0666, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.206702709197998, |
|
"rewards/margins": 6.016441822052002, |
|
"rewards/rejected": -12.223145484924316, |
|
"sft_loss": 0.8840410709381104, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.6982475355969332, |
|
"grad_norm": 4.6471153709105355, |
|
"learning_rate": 3.8715622811051753e-07, |
|
"logits/chosen": 20.395259857177734, |
|
"logits/rejected": 18.99897003173828, |
|
"logps/chosen": -340.93096923828125, |
|
"logps/rejected": -233.05569458007812, |
|
"loss": 0.0824, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.279754638671875, |
|
"rewards/margins": 6.952295303344727, |
|
"rewards/rejected": -14.232048988342285, |
|
"sft_loss": 0.9561217427253723, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.7064622124863089, |
|
"grad_norm": 5.222781909367126, |
|
"learning_rate": 3.843010051890114e-07, |
|
"logits/chosen": 17.722442626953125, |
|
"logits/rejected": 17.12670135498047, |
|
"logps/chosen": -305.9290466308594, |
|
"logps/rejected": -227.79209899902344, |
|
"loss": 0.0968, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -7.020711898803711, |
|
"rewards/margins": 7.3669562339782715, |
|
"rewards/rejected": -14.38766860961914, |
|
"sft_loss": 1.0332236289978027, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.7146768893756845, |
|
"grad_norm": 8.793537356204498, |
|
"learning_rate": 3.8142094245262615e-07, |
|
"logits/chosen": 18.769723892211914, |
|
"logits/rejected": 17.301259994506836, |
|
"logps/chosen": -281.7428283691406, |
|
"logps/rejected": -198.91912841796875, |
|
"loss": 0.0907, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.324660301208496, |
|
"rewards/margins": 6.169273376464844, |
|
"rewards/rejected": -12.493931770324707, |
|
"sft_loss": 1.6358309984207153, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.7228915662650602, |
|
"grad_norm": 3.225698996369955, |
|
"learning_rate": 3.785165725870637e-07, |
|
"logits/chosen": 18.968225479125977, |
|
"logits/rejected": 18.159814834594727, |
|
"logps/chosen": -303.6697082519531, |
|
"logps/rejected": -215.95223999023438, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -5.786445140838623, |
|
"rewards/margins": 6.777508735656738, |
|
"rewards/rejected": -12.563952445983887, |
|
"sft_loss": 0.9366723299026489, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.731106243154436, |
|
"grad_norm": 5.69116038376307, |
|
"learning_rate": 3.7558843277378203e-07, |
|
"logits/chosen": 19.500343322753906, |
|
"logits/rejected": 18.194765090942383, |
|
"logps/chosen": -280.2967834472656, |
|
"logps/rejected": -190.11224365234375, |
|
"loss": 0.089, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -5.179433345794678, |
|
"rewards/margins": 6.132752418518066, |
|
"rewards/rejected": -11.31218433380127, |
|
"sft_loss": 0.9057294130325317, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.7393209200438116, |
|
"grad_norm": 2.910912446985568, |
|
"learning_rate": 3.726370645906407e-07, |
|
"logits/chosen": 18.821012496948242, |
|
"logits/rejected": 17.809709548950195, |
|
"logps/chosen": -301.2443542480469, |
|
"logps/rejected": -197.73570251464844, |
|
"loss": 0.072, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -5.806431293487549, |
|
"rewards/margins": 6.135568141937256, |
|
"rewards/rejected": -11.942000389099121, |
|
"sft_loss": 1.1080551147460938, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.7475355969331873, |
|
"grad_norm": 3.4976563659123787, |
|
"learning_rate": 3.6966301391173204e-07, |
|
"logits/chosen": 18.104902267456055, |
|
"logits/rejected": 19.11966896057129, |
|
"logps/chosen": -283.95404052734375, |
|
"logps/rejected": -216.9072265625, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.108445167541504, |
|
"rewards/margins": 7.591168403625488, |
|
"rewards/rejected": -13.699614524841309, |
|
"sft_loss": 1.0220645666122437, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.755750273822563, |
|
"grad_norm": 3.376381123051291, |
|
"learning_rate": 3.6666683080641843e-07, |
|
"logits/chosen": 17.50767707824707, |
|
"logits/rejected": 16.899118423461914, |
|
"logps/chosen": -325.40252685546875, |
|
"logps/rejected": -233.790283203125, |
|
"loss": 0.0663, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.453089237213135, |
|
"rewards/margins": 7.27883243560791, |
|
"rewards/rejected": -14.73192310333252, |
|
"sft_loss": 1.0263330936431885, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.7639649507119387, |
|
"grad_norm": 4.787229066445367, |
|
"learning_rate": 3.636490694375937e-07, |
|
"logits/chosen": 19.532527923583984, |
|
"logits/rejected": 18.083965301513672, |
|
"logps/chosen": -335.1581115722656, |
|
"logps/rejected": -236.63861083984375, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.509254455566406, |
|
"rewards/margins": 8.015713691711426, |
|
"rewards/rejected": -15.5249662399292, |
|
"sft_loss": 1.013353943824768, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.7721796276013143, |
|
"grad_norm": 1.6525330438642514, |
|
"learning_rate": 3.6061028795918734e-07, |
|
"logits/chosen": 19.80712127685547, |
|
"logits/rejected": 18.532358169555664, |
|
"logps/chosen": -338.9714660644531, |
|
"logps/rejected": -240.235595703125, |
|
"loss": 0.0643, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.31692886352539, |
|
"rewards/margins": 7.37734317779541, |
|
"rewards/rejected": -15.694271087646484, |
|
"sft_loss": 1.046373963356018, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.78039430449069, |
|
"grad_norm": 4.380627729663156, |
|
"learning_rate": 3.5755104841292974e-07, |
|
"logits/chosen": 18.083696365356445, |
|
"logits/rejected": 17.68763542175293, |
|
"logps/chosen": -296.51568603515625, |
|
"logps/rejected": -220.79627990722656, |
|
"loss": 0.0753, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.48270845413208, |
|
"rewards/margins": 7.01304817199707, |
|
"rewards/rejected": -14.495758056640625, |
|
"sft_loss": 1.1213568449020386, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.7886089813800657, |
|
"grad_norm": 3.5838233904248202, |
|
"learning_rate": 3.544719166243998e-07, |
|
"logits/chosen": 18.444488525390625, |
|
"logits/rejected": 17.91800880432129, |
|
"logps/chosen": -323.7398681640625, |
|
"logps/rejected": -239.03846740722656, |
|
"loss": 0.0675, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.762217998504639, |
|
"rewards/margins": 7.673830032348633, |
|
"rewards/rejected": -15.43604850769043, |
|
"sft_loss": 1.0297211408615112, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.7968236582694413, |
|
"grad_norm": 5.338329799579653, |
|
"learning_rate": 3.513734620983716e-07, |
|
"logits/chosen": 18.91893768310547, |
|
"logits/rejected": 17.848176956176758, |
|
"logps/chosen": -331.2665100097656, |
|
"logps/rejected": -252.34434509277344, |
|
"loss": 0.0585, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.252163887023926, |
|
"rewards/margins": 8.53612232208252, |
|
"rewards/rejected": -16.788286209106445, |
|
"sft_loss": 0.9914504289627075, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.8050383351588171, |
|
"grad_norm": 10.778725390322887, |
|
"learning_rate": 3.482562579134809e-07, |
|
"logits/chosen": 17.087730407714844, |
|
"logits/rejected": 16.604755401611328, |
|
"logps/chosen": -269.3510437011719, |
|
"logps/rejected": -222.06834411621094, |
|
"loss": 0.0883, |
|
"rewards/accuracies": 0.9200000166893005, |
|
"rewards/chosen": -8.483560562133789, |
|
"rewards/margins": 6.750950813293457, |
|
"rewards/rejected": -15.23451042175293, |
|
"sft_loss": 1.1086839437484741, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.8132530120481928, |
|
"grad_norm": 3.589498412964668, |
|
"learning_rate": 3.4512088061623073e-07, |
|
"logits/chosen": 20.214731216430664, |
|
"logits/rejected": 18.510662078857422, |
|
"logps/chosen": -364.5818176269531, |
|
"logps/rejected": -248.37899780273438, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -8.396151542663574, |
|
"rewards/margins": 7.525665283203125, |
|
"rewards/rejected": -15.9218168258667, |
|
"sft_loss": 1.0693514347076416, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.8214676889375685, |
|
"grad_norm": 3.668001282748929, |
|
"learning_rate": 3.419679101143555e-07, |
|
"logits/chosen": 19.246572494506836, |
|
"logits/rejected": 18.104793548583984, |
|
"logps/chosen": -286.7825012207031, |
|
"logps/rejected": -219.62115478515625, |
|
"loss": 0.0646, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.4586005210876465, |
|
"rewards/margins": 7.050439357757568, |
|
"rewards/rejected": -14.509037971496582, |
|
"sft_loss": 1.0848183631896973, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.8296823658269441, |
|
"grad_norm": 4.09686690298713, |
|
"learning_rate": 3.387979295695632e-07, |
|
"logits/chosen": 19.468666076660156, |
|
"logits/rejected": 17.898151397705078, |
|
"logps/chosen": -306.32623291015625, |
|
"logps/rejected": -223.23086547851562, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.60283899307251, |
|
"rewards/margins": 6.977162837982178, |
|
"rewards/rejected": -14.580002784729004, |
|
"sft_loss": 1.017836093902588, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.8378970427163198, |
|
"grad_norm": 5.03743167428466, |
|
"learning_rate": 3.356115252896764e-07, |
|
"logits/chosen": 18.379446029663086, |
|
"logits/rejected": 17.511045455932617, |
|
"logps/chosen": -338.8892517089844, |
|
"logps/rejected": -236.15655517578125, |
|
"loss": 0.084, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.8325958251953125, |
|
"rewards/margins": 7.418299674987793, |
|
"rewards/rejected": -15.250896453857422, |
|
"sft_loss": 1.1587491035461426, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.8461117196056955, |
|
"grad_norm": 1.3483241705386844, |
|
"learning_rate": 3.3240928662019043e-07, |
|
"logits/chosen": 17.142412185668945, |
|
"logits/rejected": 16.88658905029297, |
|
"logps/chosen": -323.4809265136719, |
|
"logps/rejected": -228.69459533691406, |
|
"loss": 0.076, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.122060298919678, |
|
"rewards/margins": 7.332087516784668, |
|
"rewards/rejected": -14.454146385192871, |
|
"sft_loss": 1.033144235610962, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.8543263964950711, |
|
"grad_norm": 5.6768934408505665, |
|
"learning_rate": 3.291918058352706e-07, |
|
"logits/chosen": 18.301881790161133, |
|
"logits/rejected": 17.546480178833008, |
|
"logps/chosen": -286.16534423828125, |
|
"logps/rejected": -223.24356079101562, |
|
"loss": 0.1012, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.11279010772705, |
|
"rewards/margins": 6.034675121307373, |
|
"rewards/rejected": -14.147465705871582, |
|
"sft_loss": 1.0691564083099365, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.8625410733844469, |
|
"grad_norm": 2.664959243016448, |
|
"learning_rate": 3.259596780282074e-07, |
|
"logits/chosen": 19.877466201782227, |
|
"logits/rejected": 18.95973014831543, |
|
"logps/chosen": -365.1341552734375, |
|
"logps/rejected": -256.1839294433594, |
|
"loss": 0.0634, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.447802543640137, |
|
"rewards/margins": 8.053640365600586, |
|
"rewards/rejected": -16.501441955566406, |
|
"sft_loss": 1.2106726169586182, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.8707557502738226, |
|
"grad_norm": 13.037778283402458, |
|
"learning_rate": 3.2271350100134975e-07, |
|
"logits/chosen": 19.298856735229492, |
|
"logits/rejected": 17.636470794677734, |
|
"logps/chosen": -322.1096496582031, |
|
"logps/rejected": -239.7886505126953, |
|
"loss": 0.0671, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.631354331970215, |
|
"rewards/margins": 7.214845657348633, |
|
"rewards/rejected": -15.846202850341797, |
|
"sft_loss": 1.1396478414535522, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.8789704271631983, |
|
"grad_norm": 3.29678671312156, |
|
"learning_rate": 3.1945387515553843e-07, |
|
"logits/chosen": 20.68004608154297, |
|
"logits/rejected": 18.87333106994629, |
|
"logps/chosen": -342.8370056152344, |
|
"logps/rejected": -231.9772491455078, |
|
"loss": 0.0816, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.539201259613037, |
|
"rewards/margins": 7.375776290893555, |
|
"rewards/rejected": -14.914976119995117, |
|
"sft_loss": 1.073767066001892, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.8871851040525739, |
|
"grad_norm": 5.524616451485275, |
|
"learning_rate": 3.1618140337905764e-07, |
|
"logits/chosen": 19.65703582763672, |
|
"logits/rejected": 18.759868621826172, |
|
"logps/chosen": -293.4176940917969, |
|
"logps/rejected": -222.76730346679688, |
|
"loss": 0.0785, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.326995372772217, |
|
"rewards/margins": 6.951819896697998, |
|
"rewards/rejected": -14.278814315795898, |
|
"sft_loss": 1.1393436193466187, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8953997809419496, |
|
"grad_norm": 3.710469983456331, |
|
"learning_rate": 3.128966909361271e-07, |
|
"logits/chosen": 19.455686569213867, |
|
"logits/rejected": 18.928129196166992, |
|
"logps/chosen": -346.8277893066406, |
|
"logps/rejected": -249.02398681640625, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.179071426391602, |
|
"rewards/margins": 7.697024345397949, |
|
"rewards/rejected": -15.87609577178955, |
|
"sft_loss": 1.0804804563522339, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.9036144578313253, |
|
"grad_norm": 11.048677395421713, |
|
"learning_rate": 3.096003453549549e-07, |
|
"logits/chosen": 19.185258865356445, |
|
"logits/rejected": 17.547964096069336, |
|
"logps/chosen": -356.4489440917969, |
|
"logps/rejected": -257.8424987792969, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.774456977844238, |
|
"rewards/margins": 8.595198631286621, |
|
"rewards/rejected": -17.36965560913086, |
|
"sft_loss": 1.0598615407943726, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.911829134720701, |
|
"grad_norm": 2.5715819705074447, |
|
"learning_rate": 3.06292976315371e-07, |
|
"logits/chosen": 18.52203369140625, |
|
"logits/rejected": 17.04202651977539, |
|
"logps/chosen": -332.1985168457031, |
|
"logps/rejected": -236.9717254638672, |
|
"loss": 0.0696, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.198724746704102, |
|
"rewards/margins": 7.527222156524658, |
|
"rewards/rejected": -15.725946426391602, |
|
"sft_loss": 1.1585354804992676, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.9200438116100766, |
|
"grad_norm": 5.125385527021205, |
|
"learning_rate": 3.0297519553606324e-07, |
|
"logits/chosen": 19.936016082763672, |
|
"logits/rejected": 18.45525360107422, |
|
"logps/chosen": -308.65936279296875, |
|
"logps/rejected": -223.3343048095703, |
|
"loss": 0.0879, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.173832893371582, |
|
"rewards/margins": 7.109025001525879, |
|
"rewards/rejected": -14.282858848571777, |
|
"sft_loss": 1.038684368133545, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.9282584884994524, |
|
"grad_norm": 3.7082409068486073, |
|
"learning_rate": 2.996476166614363e-07, |
|
"logits/chosen": 19.476381301879883, |
|
"logits/rejected": 17.608320236206055, |
|
"logps/chosen": -329.3957214355469, |
|
"logps/rejected": -218.0595703125, |
|
"loss": 0.0885, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -6.555365085601807, |
|
"rewards/margins": 6.53586483001709, |
|
"rewards/rejected": -13.091230392456055, |
|
"sft_loss": 1.0041550397872925, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.9364731653888281, |
|
"grad_norm": 4.785307756807971, |
|
"learning_rate": 2.963108551481142e-07, |
|
"logits/chosen": 20.289274215698242, |
|
"logits/rejected": 18.269121170043945, |
|
"logps/chosen": -354.8280029296875, |
|
"logps/rejected": -232.3466796875, |
|
"loss": 0.0877, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.978261470794678, |
|
"rewards/margins": 7.397289752960205, |
|
"rewards/rejected": -14.3755521774292, |
|
"sft_loss": 1.049277424812317, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.9446878422782037, |
|
"grad_norm": 2.70866282954725, |
|
"learning_rate": 2.929655281511075e-07, |
|
"logits/chosen": 18.807231903076172, |
|
"logits/rejected": 17.12908363342285, |
|
"logps/chosen": -354.8686828613281, |
|
"logps/rejected": -238.53443908691406, |
|
"loss": 0.0648, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.602488994598389, |
|
"rewards/margins": 7.430182933807373, |
|
"rewards/rejected": -15.032671928405762, |
|
"sft_loss": 1.1179447174072266, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.9529025191675794, |
|
"grad_norm": 1.9988385632818255, |
|
"learning_rate": 2.896122544096667e-07, |
|
"logits/chosen": 18.314443588256836, |
|
"logits/rejected": 17.118688583374023, |
|
"logps/chosen": -302.5119934082031, |
|
"logps/rejected": -227.22274780273438, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.768359661102295, |
|
"rewards/margins": 7.082144737243652, |
|
"rewards/rejected": -14.850504875183105, |
|
"sft_loss": 1.1123638153076172, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.9611171960569551, |
|
"grad_norm": 3.2519592276620872, |
|
"learning_rate": 2.8625165413284307e-07, |
|
"logits/chosen": 18.069074630737305, |
|
"logits/rejected": 17.50326919555664, |
|
"logps/chosen": -342.6742248535156, |
|
"logps/rejected": -235.6639862060547, |
|
"loss": 0.0951, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.134052753448486, |
|
"rewards/margins": 7.267971515655518, |
|
"rewards/rejected": -14.402023315429688, |
|
"sft_loss": 1.0139853954315186, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.9693318729463308, |
|
"grad_norm": 3.802310527872131, |
|
"learning_rate": 2.8288434888477626e-07, |
|
"logits/chosen": 20.131250381469727, |
|
"logits/rejected": 18.442293167114258, |
|
"logps/chosen": -264.7261047363281, |
|
"logps/rejected": -194.17242431640625, |
|
"loss": 0.0728, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.163069248199463, |
|
"rewards/margins": 5.962526798248291, |
|
"rewards/rejected": -12.125596046447754, |
|
"sft_loss": 0.9994211196899414, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.9775465498357064, |
|
"grad_norm": 4.623566930411208, |
|
"learning_rate": 2.795109614697326e-07, |
|
"logits/chosen": 19.592060089111328, |
|
"logits/rejected": 18.126672744750977, |
|
"logps/chosen": -295.9714050292969, |
|
"logps/rejected": -204.66427612304688, |
|
"loss": 0.0873, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -6.2958526611328125, |
|
"rewards/margins": 6.357571601867676, |
|
"rewards/rejected": -12.653424263000488, |
|
"sft_loss": 0.9684253931045532, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.9857612267250822, |
|
"grad_norm": 6.368570274437731, |
|
"learning_rate": 2.761321158169134e-07, |
|
"logits/chosen": 20.56801414489746, |
|
"logits/rejected": 19.605268478393555, |
|
"logps/chosen": -325.0837097167969, |
|
"logps/rejected": -226.78863525390625, |
|
"loss": 0.0896, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -6.953873634338379, |
|
"rewards/margins": 7.266047954559326, |
|
"rewards/rejected": -14.219923973083496, |
|
"sft_loss": 1.0414397716522217, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.9939759036144579, |
|
"grad_norm": 3.750987661763106, |
|
"learning_rate": 2.727484368650553e-07, |
|
"logits/chosen": 17.04819107055664, |
|
"logits/rejected": 16.657819747924805, |
|
"logps/chosen": -309.3585205078125, |
|
"logps/rejected": -228.43280029296875, |
|
"loss": 0.0826, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -7.683638572692871, |
|
"rewards/margins": 7.000718116760254, |
|
"rewards/rejected": -14.684355735778809, |
|
"sft_loss": 1.13958740234375, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.0021905805038336, |
|
"grad_norm": 2.4122584075980966, |
|
"learning_rate": 2.6936055044684425e-07, |
|
"logits/chosen": 18.877971649169922, |
|
"logits/rejected": 17.84745979309082, |
|
"logps/chosen": -272.0718078613281, |
|
"logps/rejected": -214.1260223388672, |
|
"loss": 0.0613, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.77817440032959, |
|
"rewards/margins": 6.673606872558594, |
|
"rewards/rejected": -14.4517822265625, |
|
"sft_loss": 1.05906343460083, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.0104052573932092, |
|
"grad_norm": 4.202536280729459, |
|
"learning_rate": 2.659690831731631e-07, |
|
"logits/chosen": 18.819133758544922, |
|
"logits/rejected": 18.853425979614258, |
|
"logps/chosen": -313.6888427734375, |
|
"logps/rejected": -247.36912536621094, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.692021369934082, |
|
"rewards/margins": 7.864367961883545, |
|
"rewards/rejected": -16.5563907623291, |
|
"sft_loss": 1.0393388271331787, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.0186199342825848, |
|
"grad_norm": 3.9279717638704224, |
|
"learning_rate": 2.6257466231719676e-07, |
|
"logits/chosen": 16.448410034179688, |
|
"logits/rejected": 15.970653533935547, |
|
"logps/chosen": -357.7733154296875, |
|
"logps/rejected": -276.7339782714844, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.677971839904785, |
|
"rewards/margins": 9.011359214782715, |
|
"rewards/rejected": -18.6893310546875, |
|
"sft_loss": 1.3019551038742065, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.0268346111719606, |
|
"grad_norm": 2.4098074473851, |
|
"learning_rate": 2.591779156984137e-07, |
|
"logits/chosen": 18.19355010986328, |
|
"logits/rejected": 16.887168884277344, |
|
"logps/chosen": -331.2437744140625, |
|
"logps/rejected": -270.1482849121094, |
|
"loss": 0.0655, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -10.20653247833252, |
|
"rewards/margins": 8.743158340454102, |
|
"rewards/rejected": -18.949687957763672, |
|
"sft_loss": 1.1369999647140503, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.0350492880613362, |
|
"grad_norm": 2.015176030113295, |
|
"learning_rate": 2.557794715664465e-07, |
|
"logits/chosen": 18.09113311767578, |
|
"logits/rejected": 16.872072219848633, |
|
"logps/chosen": -343.4937438964844, |
|
"logps/rejected": -252.0245819091797, |
|
"loss": 0.0574, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.15774154663086, |
|
"rewards/margins": 7.730542182922363, |
|
"rewards/rejected": -16.888282775878906, |
|
"sft_loss": 1.1037859916687012, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.0432639649507118, |
|
"grad_norm": 4.817911475238817, |
|
"learning_rate": 2.5237995848489417e-07, |
|
"logits/chosen": 19.07139778137207, |
|
"logits/rejected": 17.200511932373047, |
|
"logps/chosen": -335.5242614746094, |
|
"logps/rejected": -225.45147705078125, |
|
"loss": 0.0736, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.962551593780518, |
|
"rewards/margins": 7.362425804138184, |
|
"rewards/rejected": -14.32497787475586, |
|
"sft_loss": 1.0912320613861084, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.0514786418400877, |
|
"grad_norm": 5.399497912094965, |
|
"learning_rate": 2.48980005215064e-07, |
|
"logits/chosen": 19.333988189697266, |
|
"logits/rejected": 18.970035552978516, |
|
"logps/chosen": -263.4971008300781, |
|
"logps/rejected": -204.3727569580078, |
|
"loss": 0.0747, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.033030986785889, |
|
"rewards/margins": 6.5659332275390625, |
|
"rewards/rejected": -13.598965644836426, |
|
"sft_loss": 1.2809529304504395, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.0596933187294633, |
|
"grad_norm": 5.192926508245981, |
|
"learning_rate": 2.45580240599679e-07, |
|
"logits/chosen": 18.814851760864258, |
|
"logits/rejected": 19.112171173095703, |
|
"logps/chosen": -368.88470458984375, |
|
"logps/rejected": -262.5347900390625, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -7.645462512969971, |
|
"rewards/margins": 8.707850456237793, |
|
"rewards/rejected": -16.353313446044922, |
|
"sft_loss": 1.1692014932632446, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.067907995618839, |
|
"grad_norm": 2.533086454702698, |
|
"learning_rate": 2.421812934465696e-07, |
|
"logits/chosen": 20.8750057220459, |
|
"logits/rejected": 18.880239486694336, |
|
"logps/chosen": -341.56805419921875, |
|
"logps/rejected": -238.24147033691406, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.804737567901611, |
|
"rewards/margins": 7.901425838470459, |
|
"rewards/rejected": -15.706161499023438, |
|
"sft_loss": 1.0994611978530884, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.0761226725082147, |
|
"grad_norm": 1.8972892397596355, |
|
"learning_rate": 2.3878379241237134e-07, |
|
"logits/chosen": 18.171382904052734, |
|
"logits/rejected": 17.349416732788086, |
|
"logps/chosen": -333.02862548828125, |
|
"logps/rejected": -257.3088073730469, |
|
"loss": 0.0702, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.708560943603516, |
|
"rewards/margins": 8.212488174438477, |
|
"rewards/rejected": -17.92104721069336, |
|
"sft_loss": 1.3080743551254272, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.0843373493975903, |
|
"grad_norm": 7.296587489211492, |
|
"learning_rate": 2.3538836588625077e-07, |
|
"logits/chosen": 16.028032302856445, |
|
"logits/rejected": 15.767348289489746, |
|
"logps/chosen": -302.0737609863281, |
|
"logps/rejected": -251.35279846191406, |
|
"loss": 0.0709, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.596309661865234, |
|
"rewards/margins": 8.214980125427246, |
|
"rewards/rejected": -17.811288833618164, |
|
"sft_loss": 1.4402241706848145, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.0925520262869661, |
|
"grad_norm": 2.7162575555609547, |
|
"learning_rate": 2.3199564187368153e-07, |
|
"logits/chosen": 18.975399017333984, |
|
"logits/rejected": 17.070423126220703, |
|
"logps/chosen": -378.0216369628906, |
|
"logps/rejected": -273.23687744140625, |
|
"loss": 0.0699, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.879487037658691, |
|
"rewards/margins": 8.538008689880371, |
|
"rewards/rejected": -18.41749382019043, |
|
"sft_loss": 1.127976655960083, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.1007667031763417, |
|
"grad_norm": 10.07225580885619, |
|
"learning_rate": 2.2860624788029013e-07, |
|
"logits/chosen": 17.706756591796875, |
|
"logits/rejected": 17.792144775390625, |
|
"logps/chosen": -291.8415832519531, |
|
"logps/rejected": -239.1862030029297, |
|
"loss": 0.0756, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -8.226842880249023, |
|
"rewards/margins": 7.920691013336182, |
|
"rewards/rejected": -16.147533416748047, |
|
"sft_loss": 1.1893837451934814, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.1089813800657173, |
|
"grad_norm": 4.017695151821307, |
|
"learning_rate": 2.2522081079579497e-07, |
|
"logits/chosen": 17.455995559692383, |
|
"logits/rejected": 17.200504302978516, |
|
"logps/chosen": -316.4137878417969, |
|
"logps/rejected": -253.0958709716797, |
|
"loss": 0.0701, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -8.296252250671387, |
|
"rewards/margins": 8.523763656616211, |
|
"rewards/rejected": -16.82001495361328, |
|
"sft_loss": 1.2573199272155762, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.1171960569550932, |
|
"grad_norm": 2.9229430403917416, |
|
"learning_rate": 2.2183995677805967e-07, |
|
"logits/chosen": 17.359153747558594, |
|
"logits/rejected": 17.15728759765625, |
|
"logps/chosen": -337.8786315917969, |
|
"logps/rejected": -256.97259521484375, |
|
"loss": 0.0499, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.227910041809082, |
|
"rewards/margins": 8.76689624786377, |
|
"rewards/rejected": -16.99480628967285, |
|
"sft_loss": 1.1089237928390503, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.1254107338444688, |
|
"grad_norm": 3.6239751013897306, |
|
"learning_rate": 2.1846431113728062e-07, |
|
"logits/chosen": 17.55232048034668, |
|
"logits/rejected": 17.560558319091797, |
|
"logps/chosen": -321.5279846191406, |
|
"logps/rejected": -261.1816711425781, |
|
"loss": 0.0621, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.55235767364502, |
|
"rewards/margins": 9.196606636047363, |
|
"rewards/rejected": -17.748966217041016, |
|
"sft_loss": 1.1366941928863525, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.1336254107338444, |
|
"grad_norm": 6.419284533750765, |
|
"learning_rate": 2.1509449822033205e-07, |
|
"logits/chosen": 18.428213119506836, |
|
"logits/rejected": 17.30073356628418, |
|
"logps/chosen": -371.9090576171875, |
|
"logps/rejected": -266.3111572265625, |
|
"loss": 0.0415, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.018980026245117, |
|
"rewards/margins": 8.834003448486328, |
|
"rewards/rejected": -17.852983474731445, |
|
"sft_loss": 1.149424433708191, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.1418400876232202, |
|
"grad_norm": 6.07442729979061, |
|
"learning_rate": 2.1173114129528957e-07, |
|
"logits/chosen": 17.701704025268555, |
|
"logits/rejected": 17.770456314086914, |
|
"logps/chosen": -307.99798583984375, |
|
"logps/rejected": -250.03758239746094, |
|
"loss": 0.0795, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -9.118383407592773, |
|
"rewards/margins": 8.651429176330566, |
|
"rewards/rejected": -17.769811630249023, |
|
"sft_loss": 1.3337371349334717, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.1500547645125958, |
|
"grad_norm": 3.7073147975104885, |
|
"learning_rate": 2.0837486243615226e-07, |
|
"logits/chosen": 19.352502822875977, |
|
"logits/rejected": 17.753198623657227, |
|
"logps/chosen": -390.12890625, |
|
"logps/rejected": -298.8519592285156, |
|
"loss": 0.0802, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -10.813799858093262, |
|
"rewards/margins": 9.796292304992676, |
|
"rewards/rejected": -20.610090255737305, |
|
"sft_loss": 1.080621600151062, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.1582694414019716, |
|
"grad_norm": 3.173868712663281, |
|
"learning_rate": 2.0502628240778653e-07, |
|
"logits/chosen": 18.959392547607422, |
|
"logits/rejected": 19.250455856323242, |
|
"logps/chosen": -345.37005615234375, |
|
"logps/rejected": -278.861328125, |
|
"loss": 0.073, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.555724143981934, |
|
"rewards/margins": 9.474061012268066, |
|
"rewards/rejected": -19.02978515625, |
|
"sft_loss": 1.0641828775405884, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.1664841182913472, |
|
"grad_norm": 5.00787760201323, |
|
"learning_rate": 2.0168602055111173e-07, |
|
"logits/chosen": 18.32485008239746, |
|
"logits/rejected": 17.315608978271484, |
|
"logps/chosen": -337.5779724121094, |
|
"logps/rejected": -283.7364196777344, |
|
"loss": 0.043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.335200309753418, |
|
"rewards/margins": 10.991543769836426, |
|
"rewards/rejected": -20.32674217224121, |
|
"sft_loss": 1.1922962665557861, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.1746987951807228, |
|
"grad_norm": 4.0968602022146845, |
|
"learning_rate": 1.9835469466854887e-07, |
|
"logits/chosen": 18.45452880859375, |
|
"logits/rejected": 16.768016815185547, |
|
"logps/chosen": -344.7873840332031, |
|
"logps/rejected": -259.2659912109375, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.04322624206543, |
|
"rewards/margins": 8.293071746826172, |
|
"rewards/rejected": -17.336299896240234, |
|
"sft_loss": 1.140812635421753, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.1829134720700987, |
|
"grad_norm": 4.912796552705337, |
|
"learning_rate": 1.9503292090975454e-07, |
|
"logits/chosen": 19.511587142944336, |
|
"logits/rejected": 18.789897918701172, |
|
"logps/chosen": -267.87982177734375, |
|
"logps/rejected": -212.76730346679688, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -7.6522440910339355, |
|
"rewards/margins": 6.9143452644348145, |
|
"rewards/rejected": -14.56658935546875, |
|
"sft_loss": 1.1043713092803955, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.1911281489594743, |
|
"grad_norm": 3.7513682612693287, |
|
"learning_rate": 1.917213136576602e-07, |
|
"logits/chosen": 19.222810745239258, |
|
"logits/rejected": 18.952795028686523, |
|
"logps/chosen": -317.2191162109375, |
|
"logps/rejected": -242.29966735839844, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -7.920867919921875, |
|
"rewards/margins": 8.166583061218262, |
|
"rewards/rejected": -16.08745002746582, |
|
"sft_loss": 1.0877418518066406, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.1993428258488499, |
|
"grad_norm": 5.446402649753336, |
|
"learning_rate": 1.8842048541483756e-07, |
|
"logits/chosen": 19.713359832763672, |
|
"logits/rejected": 18.361555099487305, |
|
"logps/chosen": -319.3419189453125, |
|
"logps/rejected": -238.31849670410156, |
|
"loss": 0.0688, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.05375862121582, |
|
"rewards/margins": 7.3250813484191895, |
|
"rewards/rejected": -16.378841400146484, |
|
"sft_loss": 1.220839023590088, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.2075575027382257, |
|
"grad_norm": 3.597263962070461, |
|
"learning_rate": 1.8513104669021314e-07, |
|
"logits/chosen": 18.05857276916504, |
|
"logits/rejected": 17.213830947875977, |
|
"logps/chosen": -345.5421142578125, |
|
"logps/rejected": -268.8918151855469, |
|
"loss": 0.069, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.973356246948242, |
|
"rewards/margins": 8.510116577148438, |
|
"rewards/rejected": -18.48347282409668, |
|
"sft_loss": 1.1453214883804321, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.2157721796276013, |
|
"grad_norm": 9.406473544966678, |
|
"learning_rate": 1.8185360588615057e-07, |
|
"logits/chosen": 19.28816795349121, |
|
"logits/rejected": 17.978492736816406, |
|
"logps/chosen": -365.7281799316406, |
|
"logps/rejected": -270.6371154785156, |
|
"loss": 0.0678, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.327936172485352, |
|
"rewards/margins": 8.730137825012207, |
|
"rewards/rejected": -18.058074951171875, |
|
"sft_loss": 1.1658827066421509, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.223986856516977, |
|
"grad_norm": 3.7662686205808584, |
|
"learning_rate": 1.7858876918592232e-07, |
|
"logits/chosen": 17.326171875, |
|
"logits/rejected": 17.195329666137695, |
|
"logps/chosen": -301.6247863769531, |
|
"logps/rejected": -246.91964721679688, |
|
"loss": 0.0644, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -8.8589506149292, |
|
"rewards/margins": 8.645825386047363, |
|
"rewards/rejected": -17.50477409362793, |
|
"sft_loss": 1.1279815435409546, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.2322015334063527, |
|
"grad_norm": 3.8887147488856986, |
|
"learning_rate": 1.7533714044159299e-07, |
|
"logits/chosen": 18.013948440551758, |
|
"logits/rejected": 16.35959243774414, |
|
"logps/chosen": -331.99609375, |
|
"logps/rejected": -249.0239715576172, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.633424758911133, |
|
"rewards/margins": 7.326272487640381, |
|
"rewards/rejected": -16.95969581604004, |
|
"sft_loss": 1.6165919303894043, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.2404162102957283, |
|
"grad_norm": 2.5663067455515747, |
|
"learning_rate": 1.7209932106233264e-07, |
|
"logits/chosen": 16.976768493652344, |
|
"logits/rejected": 17.75543785095215, |
|
"logps/chosen": -336.4115905761719, |
|
"logps/rejected": -277.32012939453125, |
|
"loss": 0.0531, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.428933143615723, |
|
"rewards/margins": 9.152985572814941, |
|
"rewards/rejected": -18.58191680908203, |
|
"sft_loss": 1.1206488609313965, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.248630887185104, |
|
"grad_norm": 7.904031033651109, |
|
"learning_rate": 1.688759099031824e-07, |
|
"logits/chosen": 17.9715633392334, |
|
"logits/rejected": 16.856943130493164, |
|
"logps/chosen": -358.84051513671875, |
|
"logps/rejected": -278.71514892578125, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.9599999785423279, |
|
"rewards/chosen": -9.98255729675293, |
|
"rewards/margins": 9.00979995727539, |
|
"rewards/rejected": -18.99235725402832, |
|
"sft_loss": 1.155042290687561, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.2568455640744798, |
|
"grad_norm": 4.102166977987274, |
|
"learning_rate": 1.656675031542925e-07, |
|
"logits/chosen": 19.007150650024414, |
|
"logits/rejected": 18.558509826660156, |
|
"logps/chosen": -375.6373291015625, |
|
"logps/rejected": -275.5815734863281, |
|
"loss": 0.0407, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.460150718688965, |
|
"rewards/margins": 9.107392311096191, |
|
"rewards/rejected": -18.567543029785156, |
|
"sft_loss": 1.1842076778411865, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.2650602409638554, |
|
"grad_norm": 6.075552965079597, |
|
"learning_rate": 1.6247469423065343e-07, |
|
"logits/chosen": 18.225561141967773, |
|
"logits/rejected": 16.852903366088867, |
|
"logps/chosen": -316.5226135253906, |
|
"logps/rejected": -239.98927307128906, |
|
"loss": 0.0567, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.122289657592773, |
|
"rewards/margins": 7.42488431930542, |
|
"rewards/rejected": -16.54717445373535, |
|
"sft_loss": 1.2285444736480713, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.273274917853231, |
|
"grad_norm": 1.6786737917411083, |
|
"learning_rate": 1.5929807366233977e-07, |
|
"logits/chosen": 18.135303497314453, |
|
"logits/rejected": 16.776866912841797, |
|
"logps/chosen": -388.01239013671875, |
|
"logps/rejected": -284.525390625, |
|
"loss": 0.0526, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.297353744506836, |
|
"rewards/margins": 9.676100730895996, |
|
"rewards/rejected": -18.97345542907715, |
|
"sft_loss": 1.1469649076461792, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.2814895947426068, |
|
"grad_norm": 2.6753220341135213, |
|
"learning_rate": 1.5613822898528794e-07, |
|
"logits/chosen": 17.79352378845215, |
|
"logits/rejected": 17.512025833129883, |
|
"logps/chosen": -331.31829833984375, |
|
"logps/rejected": -265.259765625, |
|
"loss": 0.0508, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.44349479675293, |
|
"rewards/margins": 8.841734886169434, |
|
"rewards/rejected": -18.285228729248047, |
|
"sft_loss": 1.3215175867080688, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.2897042716319824, |
|
"grad_norm": 8.768934877423389, |
|
"learning_rate": 1.5299574463262794e-07, |
|
"logits/chosen": 17.9199275970459, |
|
"logits/rejected": 17.427675247192383, |
|
"logps/chosen": -371.37432861328125, |
|
"logps/rejected": -277.9888916015625, |
|
"loss": 0.0713, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.927423477172852, |
|
"rewards/margins": 9.477466583251953, |
|
"rewards/rejected": -18.404890060424805, |
|
"sft_loss": 1.1020786762237549, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.297918948521358, |
|
"grad_norm": 3.060484827435086, |
|
"learning_rate": 1.4987120182658877e-07, |
|
"logits/chosen": 18.74033546447754, |
|
"logits/rejected": 19.323453903198242, |
|
"logps/chosen": -322.2007141113281, |
|
"logps/rejected": -249.55172729492188, |
|
"loss": 0.0521, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -7.6707940101623535, |
|
"rewards/margins": 8.992262840270996, |
|
"rewards/rejected": -16.663057327270508, |
|
"sft_loss": 1.0123049020767212, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.3061336254107339, |
|
"grad_norm": 2.7470931382147814, |
|
"learning_rate": 1.4676517847099745e-07, |
|
"logits/chosen": 19.513704299926758, |
|
"logits/rejected": 19.02146339416504, |
|
"logps/chosen": -285.7082214355469, |
|
"logps/rejected": -215.02732849121094, |
|
"loss": 0.0717, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -6.395638465881348, |
|
"rewards/margins": 7.340782642364502, |
|
"rewards/rejected": -13.736421585083008, |
|
"sft_loss": 0.9653832316398621, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.3143483023001095, |
|
"grad_norm": 6.310958305446118, |
|
"learning_rate": 1.4367824904439242e-07, |
|
"logits/chosen": 20.603116989135742, |
|
"logits/rejected": 19.180021286010742, |
|
"logps/chosen": -326.3525085449219, |
|
"logps/rejected": -239.44825744628906, |
|
"loss": 0.0686, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.47385311126709, |
|
"rewards/margins": 7.884840488433838, |
|
"rewards/rejected": -15.358694076538086, |
|
"sft_loss": 0.995606005191803, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.3225629791894853, |
|
"grad_norm": 5.691699841334073, |
|
"learning_rate": 1.4061098449376985e-07, |
|
"logits/chosen": 19.0198974609375, |
|
"logits/rejected": 17.974164962768555, |
|
"logps/chosen": -380.7705383300781, |
|
"logps/rejected": -275.5369873046875, |
|
"loss": 0.0558, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.497329711914062, |
|
"rewards/margins": 9.07253646850586, |
|
"rewards/rejected": -17.569866180419922, |
|
"sft_loss": 1.1454724073410034, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.330777656078861, |
|
"grad_norm": 3.2559120936424177, |
|
"learning_rate": 1.375639521289836e-07, |
|
"logits/chosen": 18.49973487854004, |
|
"logits/rejected": 16.317462921142578, |
|
"logps/chosen": -342.45751953125, |
|
"logps/rejected": -251.50741577148438, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.949553489685059, |
|
"rewards/margins": 7.948643684387207, |
|
"rewards/rejected": -16.8981990814209, |
|
"sft_loss": 1.1290977001190186, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.3389923329682367, |
|
"grad_norm": 5.544530645757365, |
|
"learning_rate": 1.3453771551781756e-07, |
|
"logits/chosen": 16.78171157836914, |
|
"logits/rejected": 16.938737869262695, |
|
"logps/chosen": -313.18890380859375, |
|
"logps/rejected": -255.20042419433594, |
|
"loss": 0.0633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.961526870727539, |
|
"rewards/margins": 8.269176483154297, |
|
"rewards/rejected": -17.23070526123047, |
|
"sft_loss": 1.2107350826263428, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 1.3472070098576123, |
|
"grad_norm": 6.724733185846292, |
|
"learning_rate": 1.3153283438175034e-07, |
|
"logits/chosen": 16.646345138549805, |
|
"logits/rejected": 15.98381519317627, |
|
"logps/chosen": -331.96099853515625, |
|
"logps/rejected": -259.468017578125, |
|
"loss": 0.0652, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.80189037322998, |
|
"rewards/margins": 8.555667877197266, |
|
"rewards/rejected": -18.357561111450195, |
|
"sft_loss": 1.1712498664855957, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.355421686746988, |
|
"grad_norm": 3.2889655668134083, |
|
"learning_rate": 1.2854986449243124e-07, |
|
"logits/chosen": 17.65423011779785, |
|
"logits/rejected": 17.50803565979004, |
|
"logps/chosen": -319.4832458496094, |
|
"logps/rejected": -263.18182373046875, |
|
"loss": 0.0445, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.653801918029785, |
|
"rewards/margins": 8.747140884399414, |
|
"rewards/rejected": -18.400943756103516, |
|
"sft_loss": 1.077059268951416, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 3.0357613310647613, |
|
"learning_rate": 1.2558935756888675e-07, |
|
"logits/chosen": 16.95936393737793, |
|
"logits/rejected": 16.559120178222656, |
|
"logps/chosen": -322.41357421875, |
|
"logps/rejected": -263.3858947753906, |
|
"loss": 0.0624, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.595723152160645, |
|
"rewards/margins": 9.040340423583984, |
|
"rewards/rejected": -18.636064529418945, |
|
"sft_loss": 1.1398062705993652, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.3718510405257394, |
|
"grad_norm": 2.8650653830144184, |
|
"learning_rate": 1.226518611754767e-07, |
|
"logits/chosen": 19.48563003540039, |
|
"logits/rejected": 18.318378448486328, |
|
"logps/chosen": -321.63446044921875, |
|
"logps/rejected": -262.0445861816406, |
|
"loss": 0.0482, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.1325101852417, |
|
"rewards/margins": 8.926721572875977, |
|
"rewards/rejected": -18.05923080444336, |
|
"sft_loss": 1.1335971355438232, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 1.380065717415115, |
|
"grad_norm": 6.425860846095936, |
|
"learning_rate": 1.1973791862061871e-07, |
|
"logits/chosen": 17.781869888305664, |
|
"logits/rejected": 16.827882766723633, |
|
"logps/chosen": -339.0770568847656, |
|
"logps/rejected": -266.6395568847656, |
|
"loss": 0.0703, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.701363563537598, |
|
"rewards/margins": 9.394256591796875, |
|
"rewards/rejected": -18.095619201660156, |
|
"sft_loss": 1.0754181146621704, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.3882803943044908, |
|
"grad_norm": 2.9739559158072484, |
|
"learning_rate": 1.1684806885630003e-07, |
|
"logits/chosen": 18.772544860839844, |
|
"logits/rejected": 18.669767379760742, |
|
"logps/chosen": -341.22998046875, |
|
"logps/rejected": -270.3790283203125, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.742754936218262, |
|
"rewards/margins": 9.263623237609863, |
|
"rewards/rejected": -18.006380081176758, |
|
"sft_loss": 1.1030330657958984, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 1.3964950711938664, |
|
"grad_norm": 3.70632539902967, |
|
"learning_rate": 1.1398284637839486e-07, |
|
"logits/chosen": 19.672170639038086, |
|
"logits/rejected": 18.065275192260742, |
|
"logps/chosen": -303.6903076171875, |
|
"logps/rejected": -230.23797607421875, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.658843040466309, |
|
"rewards/margins": 7.194061756134033, |
|
"rewards/rejected": -15.85290241241455, |
|
"sft_loss": 1.3414223194122314, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.404709748083242, |
|
"grad_norm": 3.8454291780017145, |
|
"learning_rate": 1.1114278112780601e-07, |
|
"logits/chosen": 19.258955001831055, |
|
"logits/rejected": 17.727506637573242, |
|
"logps/chosen": -387.6003112792969, |
|
"logps/rejected": -285.1304016113281, |
|
"loss": 0.0441, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.63217830657959, |
|
"rewards/margins": 9.642850875854492, |
|
"rewards/rejected": -19.27503204345703, |
|
"sft_loss": 1.0949971675872803, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 1.4129244249726178, |
|
"grad_norm": 81.05899714514051, |
|
"learning_rate": 1.08328398392449e-07, |
|
"logits/chosen": 18.748620986938477, |
|
"logits/rejected": 17.344341278076172, |
|
"logps/chosen": -355.6446533203125, |
|
"logps/rejected": -269.2040710449219, |
|
"loss": 0.0629, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.797952651977539, |
|
"rewards/margins": 8.548418045043945, |
|
"rewards/rejected": -18.346372604370117, |
|
"sft_loss": 1.1425796747207642, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.4211391018619934, |
|
"grad_norm": 4.719466773161234, |
|
"learning_rate": 1.0554021871009677e-07, |
|
"logits/chosen": 18.971662521362305, |
|
"logits/rejected": 16.849485397338867, |
|
"logps/chosen": -342.5481872558594, |
|
"logps/rejected": -268.31475830078125, |
|
"loss": 0.0464, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.3140230178833, |
|
"rewards/margins": 9.42198657989502, |
|
"rewards/rejected": -18.73600959777832, |
|
"sft_loss": 1.3192195892333984, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 1.429353778751369, |
|
"grad_norm": 3.6243101382632688, |
|
"learning_rate": 1.0277875777210299e-07, |
|
"logits/chosen": 16.66229820251465, |
|
"logits/rejected": 15.501757621765137, |
|
"logps/chosen": -332.5435791015625, |
|
"logps/rejected": -261.5600280761719, |
|
"loss": 0.0537, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.856383323669434, |
|
"rewards/margins": 8.77900218963623, |
|
"rewards/rejected": -18.635387420654297, |
|
"sft_loss": 1.3038902282714844, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.4375684556407449, |
|
"grad_norm": 2.7537435903936074, |
|
"learning_rate": 1.0004452632802158e-07, |
|
"logits/chosen": 18.529787063598633, |
|
"logits/rejected": 17.4042911529541, |
|
"logps/chosen": -337.29791259765625, |
|
"logps/rejected": -277.3015441894531, |
|
"loss": 0.0514, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.470120429992676, |
|
"rewards/margins": 9.908967971801758, |
|
"rewards/rejected": -19.379091262817383, |
|
"sft_loss": 1.2056940793991089, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 1.4457831325301205, |
|
"grad_norm": 5.683629925320703, |
|
"learning_rate": 9.733803009114044e-08, |
|
"logits/chosen": 18.083925247192383, |
|
"logits/rejected": 16.712495803833008, |
|
"logps/chosen": -335.4129943847656, |
|
"logps/rejected": -262.9490966796875, |
|
"loss": 0.0474, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.186826705932617, |
|
"rewards/margins": 8.34049129486084, |
|
"rewards/rejected": -17.527315139770508, |
|
"sft_loss": 1.1717486381530762, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.453997809419496, |
|
"grad_norm": 3.283485850050913, |
|
"learning_rate": 9.465976964494682e-08, |
|
"logits/chosen": 17.851030349731445, |
|
"logits/rejected": 17.46946144104004, |
|
"logps/chosen": -280.1820373535156, |
|
"logps/rejected": -242.58209228515625, |
|
"loss": 0.0529, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.64423656463623, |
|
"rewards/margins": 8.354222297668457, |
|
"rewards/rejected": -16.998458862304688, |
|
"sft_loss": 1.281132459640503, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 1.462212486308872, |
|
"grad_norm": 2.4606306801838347, |
|
"learning_rate": 9.201024035054053e-08, |
|
"logits/chosen": 18.96923065185547, |
|
"logits/rejected": 17.189115524291992, |
|
"logps/chosen": -295.1808166503906, |
|
"logps/rejected": -223.66261291503906, |
|
"loss": 0.0605, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.9287748336792, |
|
"rewards/margins": 7.127224445343018, |
|
"rewards/rejected": -16.055997848510742, |
|
"sft_loss": 1.340820074081421, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.4704271631982475, |
|
"grad_norm": 2.471728869379881, |
|
"learning_rate": 8.938993225501495e-08, |
|
"logits/chosen": 19.450706481933594, |
|
"logits/rejected": 18.156089782714844, |
|
"logps/chosen": -343.1815490722656, |
|
"logps/rejected": -268.55767822265625, |
|
"loss": 0.0583, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.863726615905762, |
|
"rewards/margins": 9.138747215270996, |
|
"rewards/rejected": -18.002471923828125, |
|
"sft_loss": 1.042392611503601, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 1.4786418400876231, |
|
"grad_norm": 2.702831742745651, |
|
"learning_rate": 8.679933000081879e-08, |
|
"logits/chosen": 18.009992599487305, |
|
"logits/rejected": 17.068376541137695, |
|
"logps/chosen": -340.9268798828125, |
|
"logps/rejected": -244.87017822265625, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -8.484542846679688, |
|
"rewards/margins": 8.053364753723145, |
|
"rewards/rejected": -16.53791046142578, |
|
"sft_loss": 1.1751140356063843, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.486856516976999, |
|
"grad_norm": 4.0381849618513135, |
|
"learning_rate": 8.423891273611855e-08, |
|
"logits/chosen": 16.985027313232422, |
|
"logits/rejected": 16.37629508972168, |
|
"logps/chosen": -300.3452453613281, |
|
"logps/rejected": -240.13729858398438, |
|
"loss": 0.0587, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.195282936096191, |
|
"rewards/margins": 8.371771812438965, |
|
"rewards/rejected": -16.567054748535156, |
|
"sft_loss": 1.1615040302276611, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 1.4950711938663745, |
|
"grad_norm": 3.373858379077247, |
|
"learning_rate": 8.170915402617739e-08, |
|
"logits/chosen": 17.579252243041992, |
|
"logits/rejected": 16.743297576904297, |
|
"logps/chosen": -347.9422912597656, |
|
"logps/rejected": -262.06610107421875, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.031808853149414, |
|
"rewards/margins": 8.512653350830078, |
|
"rewards/rejected": -17.544462203979492, |
|
"sft_loss": 1.1611533164978027, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.5032858707557502, |
|
"grad_norm": 1.8601145872189842, |
|
"learning_rate": 7.921052176576643e-08, |
|
"logits/chosen": 17.42083740234375, |
|
"logits/rejected": 17.060815811157227, |
|
"logps/chosen": -306.4146728515625, |
|
"logps/rejected": -253.76126098632812, |
|
"loss": 0.0412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.15539264678955, |
|
"rewards/margins": 8.407408714294434, |
|
"rewards/rejected": -17.562801361083984, |
|
"sft_loss": 1.0932406187057495, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 1.511500547645126, |
|
"grad_norm": 4.848816782812418, |
|
"learning_rate": 7.674347809262377e-08, |
|
"logits/chosen": 17.446378707885742, |
|
"logits/rejected": 17.68989372253418, |
|
"logps/chosen": -295.2680969238281, |
|
"logps/rejected": -246.88856506347656, |
|
"loss": 0.0616, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.591629028320312, |
|
"rewards/margins": 8.282797813415527, |
|
"rewards/rejected": -16.874427795410156, |
|
"sft_loss": 1.1592586040496826, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.5197152245345018, |
|
"grad_norm": 3.1171315520626317, |
|
"learning_rate": 7.430847930198009e-08, |
|
"logits/chosen": 18.438880920410156, |
|
"logits/rejected": 16.9648494720459, |
|
"logps/chosen": -359.6697082519531, |
|
"logps/rejected": -265.92462158203125, |
|
"loss": 0.0599, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.22867202758789, |
|
"rewards/margins": 9.071920394897461, |
|
"rewards/rejected": -18.300594329833984, |
|
"sft_loss": 1.3760299682617188, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 1.5279299014238772, |
|
"grad_norm": 2.946496194046417, |
|
"learning_rate": 7.190597576216384e-08, |
|
"logits/chosen": 17.264368057250977, |
|
"logits/rejected": 17.493934631347656, |
|
"logps/chosen": -341.4744873046875, |
|
"logps/rejected": -280.3497619628906, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.569901466369629, |
|
"rewards/margins": 9.21591854095459, |
|
"rewards/rejected": -18.785818099975586, |
|
"sft_loss": 1.1669524908065796, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.536144578313253, |
|
"grad_norm": 4.598015503663298, |
|
"learning_rate": 6.953641183130224e-08, |
|
"logits/chosen": 18.845075607299805, |
|
"logits/rejected": 16.610898971557617, |
|
"logps/chosen": -364.0211181640625, |
|
"logps/rejected": -261.7635803222656, |
|
"loss": 0.055, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.992565155029297, |
|
"rewards/margins": 8.313262939453125, |
|
"rewards/rejected": -18.305830001831055, |
|
"sft_loss": 1.251989722251892, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 1.5443592552026288, |
|
"grad_norm": 3.1309871505463733, |
|
"learning_rate": 6.720022577513507e-08, |
|
"logits/chosen": 17.22759246826172, |
|
"logits/rejected": 15.910391807556152, |
|
"logps/chosen": -371.6169738769531, |
|
"logps/rejected": -277.5339050292969, |
|
"loss": 0.0495, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -10.53276538848877, |
|
"rewards/margins": 8.91848087310791, |
|
"rewards/rejected": -19.451250076293945, |
|
"sft_loss": 1.2916030883789062, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.5525739320920042, |
|
"grad_norm": 6.107763356196428, |
|
"learning_rate": 6.489784968595444e-08, |
|
"logits/chosen": 18.367130279541016, |
|
"logits/rejected": 16.745647430419922, |
|
"logps/chosen": -372.9920654296875, |
|
"logps/rejected": -294.3865966796875, |
|
"loss": 0.0797, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -10.943706512451172, |
|
"rewards/margins": 10.03380298614502, |
|
"rewards/rejected": -20.977510452270508, |
|
"sft_loss": 1.2319529056549072, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 1.56078860898138, |
|
"grad_norm": 5.019635803171075, |
|
"learning_rate": 6.262970940268652e-08, |
|
"logits/chosen": 17.80394172668457, |
|
"logits/rejected": 17.284276962280273, |
|
"logps/chosen": -319.9245910644531, |
|
"logps/rejected": -272.44287109375, |
|
"loss": 0.0502, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -10.027565956115723, |
|
"rewards/margins": 9.023143768310547, |
|
"rewards/rejected": -19.05070686340332, |
|
"sft_loss": 1.1501847505569458, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.5690032858707559, |
|
"grad_norm": 2.5718093284663017, |
|
"learning_rate": 6.039622443213008e-08, |
|
"logits/chosen": 17.769981384277344, |
|
"logits/rejected": 17.192481994628906, |
|
"logps/chosen": -338.2123718261719, |
|
"logps/rejected": -270.8006286621094, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.453187942504883, |
|
"rewards/margins": 8.49782943725586, |
|
"rewards/rejected": -18.95101547241211, |
|
"sft_loss": 1.23969304561615, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 1.5772179627601315, |
|
"grad_norm": 2.7871237377104685, |
|
"learning_rate": 5.8197807871366e-08, |
|
"logits/chosen": 16.671855926513672, |
|
"logits/rejected": 15.88489818572998, |
|
"logps/chosen": -383.57757568359375, |
|
"logps/rejected": -302.80078125, |
|
"loss": 0.0443, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.86944580078125, |
|
"rewards/margins": 9.499520301818848, |
|
"rewards/rejected": -20.368961334228516, |
|
"sft_loss": 2.283703565597534, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.585432639649507, |
|
"grad_norm": 8.539385336051494, |
|
"learning_rate": 5.6034866331352376e-08, |
|
"logits/chosen": 17.10059928894043, |
|
"logits/rejected": 15.988658905029297, |
|
"logps/chosen": -328.95880126953125, |
|
"logps/rejected": -276.1900939941406, |
|
"loss": 0.0789, |
|
"rewards/accuracies": 0.9333333969116211, |
|
"rewards/chosen": -11.029694557189941, |
|
"rewards/margins": 9.335869789123535, |
|
"rewards/rejected": -20.365564346313477, |
|
"sft_loss": 1.1840242147445679, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 1.593647316538883, |
|
"grad_norm": 4.957273671741837, |
|
"learning_rate": 5.390779986171934e-08, |
|
"logits/chosen": 18.002132415771484, |
|
"logits/rejected": 16.781938552856445, |
|
"logps/chosen": -365.2828674316406, |
|
"logps/rejected": -290.2236328125, |
|
"loss": 0.0487, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.259432792663574, |
|
"rewards/margins": 9.27906322479248, |
|
"rewards/rejected": -19.538496017456055, |
|
"sft_loss": 1.1914342641830444, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.6018619934282585, |
|
"grad_norm": 4.9428875327513655, |
|
"learning_rate": 5.1817001876777314e-08, |
|
"logits/chosen": 16.411977767944336, |
|
"logits/rejected": 16.428050994873047, |
|
"logps/chosen": -325.99688720703125, |
|
"logps/rejected": -282.6103515625, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.880454063415527, |
|
"rewards/margins": 9.455738067626953, |
|
"rewards/rejected": -19.336191177368164, |
|
"sft_loss": 1.2319772243499756, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 1.6100766703176341, |
|
"grad_norm": 2.957939197451581, |
|
"learning_rate": 4.9762859082752464e-08, |
|
"logits/chosen": 18.82546043395996, |
|
"logits/rejected": 17.623178482055664, |
|
"logps/chosen": -363.7794494628906, |
|
"logps/rejected": -286.4556884765625, |
|
"loss": 0.0679, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -10.052456855773926, |
|
"rewards/margins": 9.68246841430664, |
|
"rewards/rejected": -19.73492431640625, |
|
"sft_loss": 1.0845558643341064, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.61829134720701, |
|
"grad_norm": 4.380929225854475, |
|
"learning_rate": 4.774575140626316e-08, |
|
"logits/chosen": 16.805063247680664, |
|
"logits/rejected": 16.495389938354492, |
|
"logps/chosen": -326.19549560546875, |
|
"logps/rejected": -266.20477294921875, |
|
"loss": 0.0416, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.56114673614502, |
|
"rewards/margins": 9.003406524658203, |
|
"rewards/rejected": -18.56455421447754, |
|
"sft_loss": 1.1449401378631592, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 1.6265060240963856, |
|
"grad_norm": 3.1682867458550255, |
|
"learning_rate": 4.5766051924049975e-08, |
|
"logits/chosen": 19.578977584838867, |
|
"logits/rejected": 18.58966064453125, |
|
"logps/chosen": -329.7391357421875, |
|
"logps/rejected": -268.65899658203125, |
|
"loss": 0.078, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.10643482208252, |
|
"rewards/margins": 9.36843204498291, |
|
"rewards/rejected": -18.474864959716797, |
|
"sft_loss": 1.255157470703125, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.6347207009857612, |
|
"grad_norm": 0.6942420370445791, |
|
"learning_rate": 4.3824126793972934e-08, |
|
"logits/chosen": 17.492353439331055, |
|
"logits/rejected": 16.303081512451172, |
|
"logps/chosen": -363.3464050292969, |
|
"logps/rejected": -277.27960205078125, |
|
"loss": 0.0653, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.666760444641113, |
|
"rewards/margins": 10.318286895751953, |
|
"rewards/rejected": -18.98504638671875, |
|
"sft_loss": 1.1402015686035156, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 1.642935377875137, |
|
"grad_norm": 3.2190001250289964, |
|
"learning_rate": 4.192033518728819e-08, |
|
"logits/chosen": 17.813274383544922, |
|
"logits/rejected": 16.33589744567871, |
|
"logps/chosen": -336.6969909667969, |
|
"logps/rejected": -259.0225830078125, |
|
"loss": 0.0455, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -8.958456993103027, |
|
"rewards/margins": 8.632108688354492, |
|
"rewards/rejected": -17.590564727783203, |
|
"sft_loss": 1.3451780080795288, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.6511500547645126, |
|
"grad_norm": 7.393954413499783, |
|
"learning_rate": 4.0055029222217125e-08, |
|
"logits/chosen": 17.960172653198242, |
|
"logits/rejected": 16.416893005371094, |
|
"logps/chosen": -315.2778015136719, |
|
"logps/rejected": -254.21824645996094, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.643136978149414, |
|
"rewards/margins": 8.79008960723877, |
|
"rewards/rejected": -18.4332275390625, |
|
"sft_loss": 1.1033631563186646, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 1.6593647316538882, |
|
"grad_norm": 4.50975910962959, |
|
"learning_rate": 3.8228553898819904e-08, |
|
"logits/chosen": 19.406719207763672, |
|
"logits/rejected": 18.56427001953125, |
|
"logps/chosen": -345.2118835449219, |
|
"logps/rejected": -286.46697998046875, |
|
"loss": 0.0704, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -10.453201293945312, |
|
"rewards/margins": 9.429792404174805, |
|
"rewards/rejected": -19.882991790771484, |
|
"sft_loss": 1.1480904817581177, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.667579408543264, |
|
"grad_norm": 2.421328969282485, |
|
"learning_rate": 3.6441247035185416e-08, |
|
"logits/chosen": 18.4009952545166, |
|
"logits/rejected": 17.309907913208008, |
|
"logps/chosen": -386.3546447753906, |
|
"logps/rejected": -297.7645568847656, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.208230972290039, |
|
"rewards/margins": 10.017862319946289, |
|
"rewards/rejected": -20.22609519958496, |
|
"sft_loss": 1.1494494676589966, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 1.6757940854326396, |
|
"grad_norm": 3.7694600773057583, |
|
"learning_rate": 3.4693439204949855e-08, |
|
"logits/chosen": 17.161108016967773, |
|
"logits/rejected": 16.785768508911133, |
|
"logps/chosen": -296.5499267578125, |
|
"logps/rejected": -256.6122741699219, |
|
"loss": 0.0555, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.63855266571045, |
|
"rewards/margins": 8.829416275024414, |
|
"rewards/rejected": -18.467967987060547, |
|
"sft_loss": 1.2135652303695679, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.6840087623220152, |
|
"grad_norm": 7.949500193131256, |
|
"learning_rate": 3.298545367615493e-08, |
|
"logits/chosen": 18.552576065063477, |
|
"logits/rejected": 17.31821060180664, |
|
"logps/chosen": -295.35540771484375, |
|
"logps/rejected": -242.86293029785156, |
|
"loss": 0.0854, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -9.394362449645996, |
|
"rewards/margins": 7.929934501647949, |
|
"rewards/rejected": -17.324296951293945, |
|
"sft_loss": 1.2392216920852661, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 1.692223439211391, |
|
"grad_norm": 5.638532817532561, |
|
"learning_rate": 3.13176063514575e-08, |
|
"logits/chosen": 18.253822326660156, |
|
"logits/rejected": 17.51996421813965, |
|
"logps/chosen": -363.8645935058594, |
|
"logps/rejected": -290.9195556640625, |
|
"loss": 0.0719, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.809922218322754, |
|
"rewards/margins": 10.452852249145508, |
|
"rewards/rejected": -20.26277732849121, |
|
"sft_loss": 1.302940845489502, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.7004381161007667, |
|
"grad_norm": 4.692786337225903, |
|
"learning_rate": 2.96902057097011e-08, |
|
"logits/chosen": 18.16409683227539, |
|
"logits/rejected": 17.41961669921875, |
|
"logps/chosen": -346.486328125, |
|
"logps/rejected": -269.6810607910156, |
|
"loss": 0.0603, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.241110801696777, |
|
"rewards/margins": 9.675481796264648, |
|
"rewards/rejected": -18.916593551635742, |
|
"sft_loss": 1.3590866327285767, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 1.7086527929901423, |
|
"grad_norm": 5.302948433169166, |
|
"learning_rate": 2.8103552748861475e-08, |
|
"logits/chosen": 17.655193328857422, |
|
"logits/rejected": 16.845975875854492, |
|
"logps/chosen": -338.5050354003906, |
|
"logps/rejected": -274.38592529296875, |
|
"loss": 0.0456, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.701102256774902, |
|
"rewards/margins": 9.212477684020996, |
|
"rewards/rejected": -18.9135799407959, |
|
"sft_loss": 1.1541944742202759, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.716867469879518, |
|
"grad_norm": 7.958361212469992, |
|
"learning_rate": 2.65579409303745e-08, |
|
"logits/chosen": 18.777873992919922, |
|
"logits/rejected": 17.120763778686523, |
|
"logps/chosen": -387.65240478515625, |
|
"logps/rejected": -283.70098876953125, |
|
"loss": 0.0673, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.420891761779785, |
|
"rewards/margins": 10.373385429382324, |
|
"rewards/rejected": -19.79427719116211, |
|
"sft_loss": 1.1774132251739502, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 1.7250821467688937, |
|
"grad_norm": 9.531573914482877, |
|
"learning_rate": 2.505365612485874e-08, |
|
"logits/chosen": 17.264955520629883, |
|
"logits/rejected": 14.986218452453613, |
|
"logps/chosen": -337.1243896484375, |
|
"logps/rejected": -248.45452880859375, |
|
"loss": 0.0714, |
|
"rewards/accuracies": 0.9333333373069763, |
|
"rewards/chosen": -9.543818473815918, |
|
"rewards/margins": 7.988076686859131, |
|
"rewards/rejected": -17.53189468383789, |
|
"sft_loss": 1.5333364009857178, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.7332968236582693, |
|
"grad_norm": 4.137336908358683, |
|
"learning_rate": 2.3590976559242275e-08, |
|
"logits/chosen": 17.53969383239746, |
|
"logits/rejected": 17.553749084472656, |
|
"logps/chosen": -310.6015930175781, |
|
"logps/rejected": -270.8495178222656, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.537567138671875, |
|
"rewards/margins": 8.755559921264648, |
|
"rewards/rejected": -18.29313087463379, |
|
"sft_loss": 1.243970513343811, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 1.7415115005476451, |
|
"grad_norm": 4.1742977406410295, |
|
"learning_rate": 2.21701727653025e-08, |
|
"logits/chosen": 17.685638427734375, |
|
"logits/rejected": 15.837899208068848, |
|
"logps/chosen": -373.8312683105469, |
|
"logps/rejected": -280.65020751953125, |
|
"loss": 0.0635, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.465250968933105, |
|
"rewards/margins": 9.272075653076172, |
|
"rewards/rejected": -19.73732566833496, |
|
"sft_loss": 1.230289101600647, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.749726177437021, |
|
"grad_norm": 3.5905659893317434, |
|
"learning_rate": 2.0791507529629522e-08, |
|
"logits/chosen": 17.48339080810547, |
|
"logits/rejected": 17.49197006225586, |
|
"logps/chosen": -282.5596008300781, |
|
"logps/rejected": -230.78216552734375, |
|
"loss": 0.0617, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -8.903531074523926, |
|
"rewards/margins": 7.071009635925293, |
|
"rewards/rejected": -15.974542617797852, |
|
"sft_loss": 1.1227587461471558, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 1.7579408543263964, |
|
"grad_norm": 13.383809225376258, |
|
"learning_rate": 1.945523584502262e-08, |
|
"logits/chosen": 19.799388885498047, |
|
"logits/rejected": 17.97065544128418, |
|
"logps/chosen": -394.8106384277344, |
|
"logps/rejected": -280.33184814453125, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.54643726348877, |
|
"rewards/margins": 9.38154125213623, |
|
"rewards/rejected": -18.927978515625, |
|
"sft_loss": 1.0600664615631104, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.7661555312157722, |
|
"grad_norm": 3.85892797302041, |
|
"learning_rate": 1.8161604863327072e-08, |
|
"logits/chosen": 16.801225662231445, |
|
"logits/rejected": 16.272432327270508, |
|
"logps/chosen": -315.7366943359375, |
|
"logps/rejected": -254.6013946533203, |
|
"loss": 0.0406, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.108306884765625, |
|
"rewards/margins": 8.766523361206055, |
|
"rewards/rejected": -17.874826431274414, |
|
"sft_loss": 1.1722328662872314, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 1.774370208105148, |
|
"grad_norm": 18.187119238883547, |
|
"learning_rate": 1.691085384972235e-08, |
|
"logits/chosen": 16.329721450805664, |
|
"logits/rejected": 15.469901084899902, |
|
"logps/chosen": -283.27142333984375, |
|
"logps/rejected": -235.70811462402344, |
|
"loss": 0.0575, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.012235641479492, |
|
"rewards/margins": 7.615647792816162, |
|
"rewards/rejected": -16.62788200378418, |
|
"sft_loss": 1.2651445865631104, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.7825848849945234, |
|
"grad_norm": 3.4104772768917426, |
|
"learning_rate": 1.570321413846845e-08, |
|
"logits/chosen": 16.62438201904297, |
|
"logits/rejected": 17.102264404296875, |
|
"logps/chosen": -312.1449279785156, |
|
"logps/rejected": -271.4256591796875, |
|
"loss": 0.0299, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.285161018371582, |
|
"rewards/margins": 9.69771957397461, |
|
"rewards/rejected": -18.982881546020508, |
|
"sft_loss": 1.2524992227554321, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 1.7907995618838992, |
|
"grad_norm": 4.409471745750223, |
|
"learning_rate": 1.4538909090118846e-08, |
|
"logits/chosen": 18.32159423828125, |
|
"logits/rejected": 16.56727409362793, |
|
"logps/chosen": -338.2820129394531, |
|
"logps/rejected": -250.31729125976562, |
|
"loss": 0.0607, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.47223949432373, |
|
"rewards/margins": 8.058004379272461, |
|
"rewards/rejected": -17.530242919921875, |
|
"sft_loss": 1.2121272087097168, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.799014238773275, |
|
"grad_norm": 3.827920678689443, |
|
"learning_rate": 1.3418154050208936e-08, |
|
"logits/chosen": 17.697154998779297, |
|
"logits/rejected": 16.722187042236328, |
|
"logps/chosen": -303.7814636230469, |
|
"logps/rejected": -251.51583862304688, |
|
"loss": 0.0563, |
|
"rewards/accuracies": 0.9466667175292969, |
|
"rewards/chosen": -9.34262466430664, |
|
"rewards/margins": 8.22611141204834, |
|
"rewards/rejected": -17.568735122680664, |
|
"sft_loss": 1.133971095085144, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 1.8072289156626506, |
|
"grad_norm": 2.3604810623566084, |
|
"learning_rate": 1.2341156309426447e-08, |
|
"logits/chosen": 16.974082946777344, |
|
"logits/rejected": 16.85063362121582, |
|
"logps/chosen": -346.72998046875, |
|
"logps/rejected": -278.26165771484375, |
|
"loss": 0.036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.343656539916992, |
|
"rewards/margins": 9.743067741394043, |
|
"rewards/rejected": -19.086727142333984, |
|
"sft_loss": 1.044119954109192, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.8154435925520263, |
|
"grad_norm": 3.1591505712032237, |
|
"learning_rate": 1.130811506527149e-08, |
|
"logits/chosen": 18.949861526489258, |
|
"logits/rejected": 17.834264755249023, |
|
"logps/chosen": -389.62042236328125, |
|
"logps/rejected": -280.9618835449219, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.740713119506836, |
|
"rewards/margins": 9.123427391052246, |
|
"rewards/rejected": -18.8641414642334, |
|
"sft_loss": 1.1095327138900757, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 1.823658269441402, |
|
"grad_norm": 1.034828289458216, |
|
"learning_rate": 1.0319221385213934e-08, |
|
"logits/chosen": 17.111549377441406, |
|
"logits/rejected": 16.737529754638672, |
|
"logps/chosen": -317.83343505859375, |
|
"logps/rejected": -262.7276916503906, |
|
"loss": 0.0392, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.57455825805664, |
|
"rewards/margins": 8.217500686645508, |
|
"rewards/rejected": -17.792057037353516, |
|
"sft_loss": 1.276735782623291, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.8318729463307777, |
|
"grad_norm": 5.969957340964575, |
|
"learning_rate": 9.374658171354411e-09, |
|
"logits/chosen": 18.033985137939453, |
|
"logits/rejected": 16.812503814697266, |
|
"logps/chosen": -338.47796630859375, |
|
"logps/rejected": -267.0741271972656, |
|
"loss": 0.066, |
|
"rewards/accuracies": 0.9466666579246521, |
|
"rewards/chosen": -9.4961519241333, |
|
"rewards/margins": 9.333700180053711, |
|
"rewards/rejected": -18.829853057861328, |
|
"sft_loss": 1.2590656280517578, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 1.8400876232201533, |
|
"grad_norm": 3.650713168539723, |
|
"learning_rate": 8.474600126594983e-09, |
|
"logits/chosen": 18.25022315979004, |
|
"logits/rejected": 17.499792098999023, |
|
"logps/chosen": -332.2896728515625, |
|
"logps/rejected": -259.7283935546875, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.764142990112305, |
|
"rewards/margins": 9.385050773620605, |
|
"rewards/rejected": -18.149192810058594, |
|
"sft_loss": 1.2872370481491089, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.8483023001095291, |
|
"grad_norm": 3.519511198290722, |
|
"learning_rate": 7.619213722327184e-09, |
|
"logits/chosen": 17.54939842224121, |
|
"logits/rejected": 16.605783462524414, |
|
"logps/chosen": -330.8531799316406, |
|
"logps/rejected": -265.7262878417969, |
|
"loss": 0.0435, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.460249900817871, |
|
"rewards/margins": 9.11566162109375, |
|
"rewards/rejected": -18.575910568237305, |
|
"sft_loss": 1.2160968780517578, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 1.8565169769989047, |
|
"grad_norm": 4.185481328436663, |
|
"learning_rate": 6.808657167641896e-09, |
|
"logits/chosen": 17.643688201904297, |
|
"logits/rejected": 16.674888610839844, |
|
"logps/chosen": -361.64715576171875, |
|
"logps/rejected": -284.84881591796875, |
|
"loss": 0.0669, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.835926055908203, |
|
"rewards/margins": 10.017841339111328, |
|
"rewards/rejected": -19.8537654876709, |
|
"sft_loss": 1.1817443370819092, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.8647316538882803, |
|
"grad_norm": 5.8031765508458095, |
|
"learning_rate": 6.043080380067539e-09, |
|
"logits/chosen": 16.346317291259766, |
|
"logits/rejected": 16.2381649017334, |
|
"logps/chosen": -378.3958435058594, |
|
"logps/rejected": -303.12091064453125, |
|
"loss": 0.0523, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.526629447937012, |
|
"rewards/margins": 11.506245613098145, |
|
"rewards/rejected": -21.03287696838379, |
|
"sft_loss": 1.1959699392318726, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 1.8729463307776562, |
|
"grad_norm": 8.21221020965199, |
|
"learning_rate": 5.322624957841998e-09, |
|
"logits/chosen": 18.25189971923828, |
|
"logits/rejected": 17.553903579711914, |
|
"logps/chosen": -341.5302734375, |
|
"logps/rejected": -272.32012939453125, |
|
"loss": 0.0601, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.903505325317383, |
|
"rewards/margins": 8.774069786071777, |
|
"rewards/rejected": -18.677574157714844, |
|
"sft_loss": 1.110214114189148, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.8811610076670318, |
|
"grad_norm": 1.2597947012426969, |
|
"learning_rate": 4.647424153723101e-09, |
|
"logits/chosen": 18.352624893188477, |
|
"logits/rejected": 16.199268341064453, |
|
"logps/chosen": -322.0838928222656, |
|
"logps/rejected": -241.83558654785156, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.200777053833008, |
|
"rewards/margins": 7.505552768707275, |
|
"rewards/rejected": -16.706331253051758, |
|
"sft_loss": 1.2215784788131714, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 1.8893756845564074, |
|
"grad_norm": 4.292246600774085, |
|
"learning_rate": 4.0176028503425826e-09, |
|
"logits/chosen": 17.103918075561523, |
|
"logits/rejected": 16.931406021118164, |
|
"logps/chosen": -311.3345642089844, |
|
"logps/rejected": -262.2460021972656, |
|
"loss": 0.049, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.341358184814453, |
|
"rewards/margins": 9.20698070526123, |
|
"rewards/rejected": -18.548337936401367, |
|
"sft_loss": 1.2602629661560059, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.8975903614457832, |
|
"grad_norm": 3.9316412520602233, |
|
"learning_rate": 3.433277537108481e-09, |
|
"logits/chosen": 18.07435417175293, |
|
"logits/rejected": 17.416156768798828, |
|
"logps/chosen": -373.20477294921875, |
|
"logps/rejected": -284.2578430175781, |
|
"loss": 0.0481, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -10.183159828186035, |
|
"rewards/margins": 8.937155723571777, |
|
"rewards/rejected": -19.120319366455078, |
|
"sft_loss": 1.2882879972457886, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 1.9058050383351588, |
|
"grad_norm": 4.002247931370384, |
|
"learning_rate": 2.8945562886593944e-09, |
|
"logits/chosen": 16.535991668701172, |
|
"logits/rejected": 15.887761116027832, |
|
"logps/chosen": -281.4147033691406, |
|
"logps/rejected": -248.97198486328125, |
|
"loss": 0.0572, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.560811996459961, |
|
"rewards/margins": 8.152007102966309, |
|
"rewards/rejected": -17.712818145751953, |
|
"sft_loss": 1.1380819082260132, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.9140197152245344, |
|
"grad_norm": 6.09926805017728, |
|
"learning_rate": 2.4015387448756976e-09, |
|
"logits/chosen": 16.622642517089844, |
|
"logits/rejected": 15.969447135925293, |
|
"logps/chosen": -354.6962890625, |
|
"logps/rejected": -264.1971435546875, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.276373863220215, |
|
"rewards/margins": 9.219854354858398, |
|
"rewards/rejected": -18.496227264404297, |
|
"sft_loss": 1.35190749168396, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 1.9222343921139102, |
|
"grad_norm": 2.2008981068000257, |
|
"learning_rate": 1.954316092450281e-09, |
|
"logits/chosen": 17.52834129333496, |
|
"logits/rejected": 16.535673141479492, |
|
"logps/chosen": -338.9072570800781, |
|
"logps/rejected": -272.974853515625, |
|
"loss": 0.0483, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -10.078722953796387, |
|
"rewards/margins": 8.57765007019043, |
|
"rewards/rejected": -18.6563720703125, |
|
"sft_loss": 1.2665674686431885, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.9304490690032858, |
|
"grad_norm": 6.860255096147663, |
|
"learning_rate": 1.5529710480231272e-09, |
|
"logits/chosen": 18.16209602355957, |
|
"logits/rejected": 17.17742347717285, |
|
"logps/chosen": -305.7500305175781, |
|
"logps/rejected": -253.6704864501953, |
|
"loss": 0.0453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.595235824584961, |
|
"rewards/margins": 8.096555709838867, |
|
"rewards/rejected": -17.69179344177246, |
|
"sft_loss": 1.0956637859344482, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 1.9386637458926614, |
|
"grad_norm": 5.583742228828171, |
|
"learning_rate": 1.1975778428823524e-09, |
|
"logits/chosen": 17.371732711791992, |
|
"logits/rejected": 16.7208309173584, |
|
"logps/chosen": -353.3210754394531, |
|
"logps/rejected": -281.03302001953125, |
|
"loss": 0.0718, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -10.14670467376709, |
|
"rewards/margins": 9.053946495056152, |
|
"rewards/rejected": -19.200651168823242, |
|
"sft_loss": 1.1084918975830078, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.9468784227820373, |
|
"grad_norm": 7.482058530142954, |
|
"learning_rate": 8.882022092346064e-10, |
|
"logits/chosen": 17.550270080566406, |
|
"logits/rejected": 16.700857162475586, |
|
"logps/chosen": -347.03363037109375, |
|
"logps/rejected": -273.7410583496094, |
|
"loss": 0.0371, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.479987144470215, |
|
"rewards/margins": 9.481571197509766, |
|
"rewards/rejected": -18.961559295654297, |
|
"sft_loss": 1.2440842390060425, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 1.9550930996714129, |
|
"grad_norm": 2.060017516908208, |
|
"learning_rate": 6.249013680474368e-10, |
|
"logits/chosen": 17.62407875061035, |
|
"logits/rejected": 16.294307708740234, |
|
"logps/chosen": -312.6640625, |
|
"logps/rejected": -252.1957550048828, |
|
"loss": 0.042, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.584585189819336, |
|
"rewards/margins": 7.968944072723389, |
|
"rewards/rejected": -17.553531646728516, |
|
"sft_loss": 1.1957759857177734, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.9633077765607885, |
|
"grad_norm": 8.70865898012454, |
|
"learning_rate": 4.0772401846608794e-10, |
|
"logits/chosen": 18.594797134399414, |
|
"logits/rejected": 17.494707107543945, |
|
"logps/chosen": -298.91571044921875, |
|
"logps/rejected": -247.17645263671875, |
|
"loss": 0.0791, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.819672584533691, |
|
"rewards/margins": 7.458762168884277, |
|
"rewards/rejected": -17.27843475341797, |
|
"sft_loss": 1.1721436977386475, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 1.9715224534501643, |
|
"grad_norm": 8.143555814452077, |
|
"learning_rate": 2.367103288061223e-10, |
|
"logits/chosen": 17.42876625061035, |
|
"logits/rejected": 16.18610191345215, |
|
"logps/chosen": -311.3353271484375, |
|
"logps/rejected": -255.815673828125, |
|
"loss": 0.057, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.853960990905762, |
|
"rewards/margins": 8.45102310180664, |
|
"rewards/rejected": -18.304983139038086, |
|
"sft_loss": 1.2459405660629272, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.9797371303395401, |
|
"grad_norm": 3.1264960747238333, |
|
"learning_rate": 1.1189192912416933e-10, |
|
"logits/chosen": 17.570209503173828, |
|
"logits/rejected": 16.410261154174805, |
|
"logps/chosen": -398.1352233886719, |
|
"logps/rejected": -293.1958923339844, |
|
"loss": 0.0365, |
|
"rewards/accuracies": 0.9866666793823242, |
|
"rewards/chosen": -9.722556114196777, |
|
"rewards/margins": 10.323168754577637, |
|
"rewards/rejected": -20.045726776123047, |
|
"sft_loss": 1.2017709016799927, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 1.9879518072289155, |
|
"grad_norm": 4.393395171762388, |
|
"learning_rate": 3.329190536757731e-11, |
|
"logits/chosen": 18.872753143310547, |
|
"logits/rejected": 18.477998733520508, |
|
"logps/chosen": -312.2131042480469, |
|
"logps/rejected": -260.5821228027344, |
|
"loss": 0.0535, |
|
"rewards/accuracies": 0.9733333587646484, |
|
"rewards/chosen": -9.505008697509766, |
|
"rewards/margins": 9.130599021911621, |
|
"rewards/rejected": -18.635608673095703, |
|
"sft_loss": 1.1339497566223145, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.9961664841182913, |
|
"grad_norm": 5.10635428256011, |
|
"learning_rate": 9.247951046897906e-13, |
|
"logits/chosen": 18.22831916809082, |
|
"logits/rejected": 17.80943489074707, |
|
"logps/chosen": -330.88934326171875, |
|
"logps/rejected": -262.41461181640625, |
|
"loss": 0.0471, |
|
"rewards/accuracies": 0.9600000381469727, |
|
"rewards/chosen": -9.283769607543945, |
|
"rewards/margins": 8.581202507019043, |
|
"rewards/rejected": -17.864973068237305, |
|
"sft_loss": 1.1466354131698608, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 1.9978094194961664, |
|
"step": 1216, |
|
"total_flos": 131893560147968.0, |
|
"train_loss": 0.08742370063708604, |
|
"train_runtime": 41055.7102, |
|
"train_samples_per_second": 1.778, |
|
"train_steps_per_second": 0.03 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1216, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 131893560147968.0, |
|
"train_batch_size": 5, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|