|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9994666666666666, |
|
"eval_steps": 500, |
|
"global_step": 937, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 5.319148936170213e-08, |
|
"logits/chosen": 0.34782999753952026, |
|
"logits/rejected": 0.3427616059780121, |
|
"logps/chosen": -325.28106689453125, |
|
"logps/rejected": -307.72515869140625, |
|
"loss": 0.1853, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.319148936170213e-07, |
|
"logits/chosen": 0.30850738286972046, |
|
"logits/rejected": 0.3003820478916168, |
|
"logps/chosen": -350.7467956542969, |
|
"logps/rejected": -376.2046203613281, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.2291666716337204, |
|
"rewards/chosen": -4.5030174078419805e-05, |
|
"rewards/margins": 1.3379417396208737e-05, |
|
"rewards/rejected": -5.840959056513384e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 1.0638297872340427e-06, |
|
"logits/chosen": 0.2997807562351227, |
|
"logits/rejected": 0.3171563148498535, |
|
"logps/chosen": -282.0256652832031, |
|
"logps/rejected": -303.3882751464844, |
|
"loss": 0.2119, |
|
"rewards/accuracies": 0.21875, |
|
"rewards/chosen": -0.0002205806813435629, |
|
"rewards/margins": -0.00011417151836212724, |
|
"rewards/rejected": -0.00010640917753335088, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.595744680851064e-06, |
|
"logits/chosen": 0.31989020109176636, |
|
"logits/rejected": 0.3349720537662506, |
|
"logps/chosen": -293.8145751953125, |
|
"logps/rejected": -287.182373046875, |
|
"loss": 0.2235, |
|
"rewards/accuracies": 0.22499999403953552, |
|
"rewards/chosen": -0.00011079121759394184, |
|
"rewards/margins": -4.205655932310037e-05, |
|
"rewards/rejected": -6.873465463286266e-05, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 2.1276595744680853e-06, |
|
"logits/chosen": 0.2753044068813324, |
|
"logits/rejected": 0.28743210434913635, |
|
"logps/chosen": -245.92648315429688, |
|
"logps/rejected": -302.4917907714844, |
|
"loss": 0.2106, |
|
"rewards/accuracies": 0.24375000596046448, |
|
"rewards/chosen": 4.66261881229002e-05, |
|
"rewards/margins": 5.961294664302841e-05, |
|
"rewards/rejected": -1.2986754882149398e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.6595744680851065e-06, |
|
"logits/chosen": 0.2745297849178314, |
|
"logits/rejected": 0.2960417866706848, |
|
"logps/chosen": -269.48114013671875, |
|
"logps/rejected": -282.2041015625, |
|
"loss": 0.2124, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.00018958283180836588, |
|
"rewards/margins": 0.00012250976578798145, |
|
"rewards/rejected": -0.0003120926267001778, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.191489361702128e-06, |
|
"logits/chosen": 0.3166781961917877, |
|
"logits/rejected": 0.28668132424354553, |
|
"logps/chosen": -276.03466796875, |
|
"logps/rejected": -298.3278503417969, |
|
"loss": 0.2073, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0009398458641953766, |
|
"rewards/margins": 0.0002089624322252348, |
|
"rewards/rejected": -0.001148808398284018, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 3.723404255319149e-06, |
|
"logits/chosen": 0.32457390427589417, |
|
"logits/rejected": 0.3343047499656677, |
|
"logps/chosen": -228.984130859375, |
|
"logps/rejected": -267.0279235839844, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.0025071091949939728, |
|
"rewards/margins": 0.00024417496751993895, |
|
"rewards/rejected": -0.0027512842789292336, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 4.255319148936171e-06, |
|
"logits/chosen": 0.3037349581718445, |
|
"logits/rejected": 0.288893461227417, |
|
"logps/chosen": -264.49493408203125, |
|
"logps/rejected": -277.01397705078125, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.0028123059310019016, |
|
"rewards/margins": 3.8383899664040655e-05, |
|
"rewards/rejected": -0.0028506899252533913, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.787234042553192e-06, |
|
"logits/chosen": 0.27338820695877075, |
|
"logits/rejected": 0.2850671410560608, |
|
"logps/chosen": -274.2656555175781, |
|
"logps/rejected": -273.5088806152344, |
|
"loss": 0.2097, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.004053288139402866, |
|
"rewards/margins": 0.00020987665629945695, |
|
"rewards/rejected": -0.004263165406882763, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 4.999375059004058e-06, |
|
"logits/chosen": 0.2746526896953583, |
|
"logits/rejected": 0.3108225464820862, |
|
"logps/chosen": -305.6490478515625, |
|
"logps/rejected": -320.5104675292969, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.004907802678644657, |
|
"rewards/margins": 0.00013219797983765602, |
|
"rewards/rejected": -0.005040000192821026, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 4.9955571065548795e-06, |
|
"logits/chosen": 0.25255969166755676, |
|
"logits/rejected": 0.23279385268688202, |
|
"logps/chosen": -310.1192932128906, |
|
"logps/rejected": -333.32049560546875, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.00747376773506403, |
|
"rewards/margins": 0.0008040575194172561, |
|
"rewards/rejected": -0.008277825079858303, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.9882736864879e-06, |
|
"logits/chosen": 0.22498245537281036, |
|
"logits/rejected": 0.2073470801115036, |
|
"logps/chosen": -278.15087890625, |
|
"logps/rejected": -290.3549499511719, |
|
"loss": 0.2104, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": -0.013078084215521812, |
|
"rewards/margins": 0.0007778271683491766, |
|
"rewards/rejected": -0.013855909928679466, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 4.977534912960124e-06, |
|
"logits/chosen": 0.20476844906806946, |
|
"logits/rejected": 0.2006731778383255, |
|
"logps/chosen": -339.86907958984375, |
|
"logps/rejected": -366.39935302734375, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.029517754912376404, |
|
"rewards/margins": 0.0024804342538118362, |
|
"rewards/rejected": -0.03199819102883339, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 4.963355698422092e-06, |
|
"logits/chosen": 0.16123342514038086, |
|
"logits/rejected": 0.1650470793247223, |
|
"logps/chosen": -336.85479736328125, |
|
"logps/rejected": -380.90106201171875, |
|
"loss": 0.2096, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.046556852757930756, |
|
"rewards/margins": 0.00344092957675457, |
|
"rewards/rejected": -0.049997784197330475, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.945755732909625e-06, |
|
"logits/chosen": 0.127132385969162, |
|
"logits/rejected": 0.11233675479888916, |
|
"logps/chosen": -363.3497009277344, |
|
"logps/rejected": -395.849609375, |
|
"loss": 0.2161, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.06019137054681778, |
|
"rewards/margins": 0.002497343812137842, |
|
"rewards/rejected": -0.06268872320652008, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.924759456701167e-06, |
|
"logits/chosen": 0.05813015252351761, |
|
"logits/rejected": 0.10442493855953217, |
|
"logps/chosen": -364.0877990722656, |
|
"logps/rejected": -407.98272705078125, |
|
"loss": 0.2065, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.052812881767749786, |
|
"rewards/margins": 0.005263908766210079, |
|
"rewards/rejected": -0.05807679891586304, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.900396026378671e-06, |
|
"logits/chosen": -0.020289132371544838, |
|
"logits/rejected": 0.03128629922866821, |
|
"logps/chosen": -377.3454895019531, |
|
"logps/rejected": -375.7449951171875, |
|
"loss": 0.2054, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.061011601239442825, |
|
"rewards/margins": 0.0011604861356317997, |
|
"rewards/rejected": -0.06217208504676819, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.872699274339169e-06, |
|
"logits/chosen": 0.07980453968048096, |
|
"logits/rejected": 0.026858001947402954, |
|
"logps/chosen": -365.4123229980469, |
|
"logps/rejected": -389.89556884765625, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.05129029601812363, |
|
"rewards/margins": 0.0013983547687530518, |
|
"rewards/rejected": -0.05268865078687668, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.8417076618132434e-06, |
|
"logits/chosen": 0.06391559541225433, |
|
"logits/rejected": 0.08089544624090195, |
|
"logps/chosen": -294.32977294921875, |
|
"logps/rejected": -301.67242431640625, |
|
"loss": 0.2034, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.028584271669387817, |
|
"rewards/margins": 0.0021569356322288513, |
|
"rewards/rejected": -0.03074120543897152, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.807464225455655e-06, |
|
"logits/chosen": -0.007983528077602386, |
|
"logits/rejected": -0.02395419403910637, |
|
"logps/chosen": -364.5057373046875, |
|
"logps/rejected": -398.78265380859375, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.39375001192092896, |
|
"rewards/chosen": -0.03608248755335808, |
|
"rewards/margins": 0.007917111739516258, |
|
"rewards/rejected": -0.043999604880809784, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.770016517582283e-06, |
|
"logits/chosen": 0.052110157907009125, |
|
"logits/rejected": 0.04244590550661087, |
|
"logps/chosen": -333.72998046875, |
|
"logps/rejected": -362.7451171875, |
|
"loss": 0.212, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.05566523224115372, |
|
"rewards/margins": 0.008566088043153286, |
|
"rewards/rejected": -0.06423132121562958, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7294165401363616e-06, |
|
"logits/chosen": 0.014940446242690086, |
|
"logits/rejected": -0.00022823139443062246, |
|
"logps/chosen": -349.548095703125, |
|
"logps/rejected": -389.1466369628906, |
|
"loss": 0.2003, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.06377661973237991, |
|
"rewards/margins": 0.007529625203460455, |
|
"rewards/rejected": -0.0713062435388565, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.68572067247573e-06, |
|
"logits/chosen": -0.05766149237751961, |
|
"logits/rejected": -0.04082841798663139, |
|
"logps/chosen": -325.00189208984375, |
|
"logps/rejected": -329.35302734375, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": -0.04092506319284439, |
|
"rewards/margins": -0.0009445661562494934, |
|
"rewards/rejected": -0.03998050093650818, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.638989593081364e-06, |
|
"logits/chosen": -0.05024053901433945, |
|
"logits/rejected": 0.0004725128528662026, |
|
"logps/chosen": -322.26275634765625, |
|
"logps/rejected": -347.1831970214844, |
|
"loss": 0.207, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.019810866564512253, |
|
"rewards/margins": 0.003949201200157404, |
|
"rewards/rejected": -0.02376006543636322, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.5892881952959015e-06, |
|
"logits/chosen": -0.025025557726621628, |
|
"logits/rejected": -0.011019307188689709, |
|
"logps/chosen": -288.26739501953125, |
|
"logps/rejected": -308.3834533691406, |
|
"loss": 0.1995, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.008453729562461376, |
|
"rewards/margins": 0.004683175124228001, |
|
"rewards/rejected": -0.013136905618011951, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 4.536685497209182e-06, |
|
"logits/chosen": -0.044585295021533966, |
|
"logits/rejected": -0.011349612846970558, |
|
"logps/chosen": -275.3843078613281, |
|
"logps/rejected": -290.2745666503906, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.005228747613728046, |
|
"rewards/margins": 0.004847136326134205, |
|
"rewards/rejected": -0.010075883939862251, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.481254545815943e-06, |
|
"logits/chosen": -0.010054832324385643, |
|
"logits/rejected": 0.022083023563027382, |
|
"logps/chosen": -286.2877502441406, |
|
"logps/rejected": -314.843017578125, |
|
"loss": 0.2042, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.009463262744247913, |
|
"rewards/margins": 0.0016764893662184477, |
|
"rewards/rejected": -0.011139752343297005, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.42307231557875e-06, |
|
"logits/chosen": -0.06939663738012314, |
|
"logits/rejected": -0.06833665817975998, |
|
"logps/chosen": -292.0230407714844, |
|
"logps/rejected": -319.52374267578125, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.01107553206384182, |
|
"rewards/margins": 0.002820921130478382, |
|
"rewards/rejected": -0.013896455056965351, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3622196015370305e-06, |
|
"logits/chosen": -0.09846196323633194, |
|
"logits/rejected": -0.05906381085515022, |
|
"logps/chosen": -302.30340576171875, |
|
"logps/rejected": -341.5763244628906, |
|
"loss": 0.2079, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.01659044623374939, |
|
"rewards/margins": 0.010817909613251686, |
|
"rewards/rejected": -0.027408352121710777, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.298780907110648e-06, |
|
"logits/chosen": -0.15265251696109772, |
|
"logits/rejected": -0.09610708057880402, |
|
"logps/chosen": -327.8166198730469, |
|
"logps/rejected": -369.8201904296875, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.02352728322148323, |
|
"rewards/margins": 0.007946287281811237, |
|
"rewards/rejected": -0.03147356957197189, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.23284432675381e-06, |
|
"logits/chosen": -0.1901048719882965, |
|
"logits/rejected": -0.16242368519306183, |
|
"logps/chosen": -332.37945556640625, |
|
"logps/rejected": -352.0182800292969, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.03192076459527016, |
|
"rewards/margins": 0.005780586041510105, |
|
"rewards/rejected": -0.03770134598016739, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.164501423622277e-06, |
|
"logits/chosen": -0.14182882010936737, |
|
"logits/rejected": -0.18550843000411987, |
|
"logps/chosen": -321.2982177734375, |
|
"logps/rejected": -333.14837646484375, |
|
"loss": 0.2061, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.020713280886411667, |
|
"rewards/margins": 0.004291003569960594, |
|
"rewards/rejected": -0.025004282593727112, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.0938471024237355e-06, |
|
"logits/chosen": -0.2318871021270752, |
|
"logits/rejected": -0.12970159947872162, |
|
"logps/chosen": -304.32928466796875, |
|
"logps/rejected": -300.49517822265625, |
|
"loss": 0.2098, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.016532720997929573, |
|
"rewards/margins": 0.005365257151424885, |
|
"rewards/rejected": -0.021897977218031883, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.020979477627907e-06, |
|
"logits/chosen": -0.10503053665161133, |
|
"logits/rejected": -0.14621496200561523, |
|
"logps/chosen": -288.16839599609375, |
|
"logps/rejected": -330.89935302734375, |
|
"loss": 0.2076, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.011596577242016792, |
|
"rewards/margins": 0.006266799755394459, |
|
"rewards/rejected": -0.017863376066088676, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 3.9459997372194105e-06, |
|
"logits/chosen": -0.16021689772605896, |
|
"logits/rejected": -0.14831289649009705, |
|
"logps/chosen": -317.23468017578125, |
|
"logps/rejected": -378.10650634765625, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0146575216203928, |
|
"rewards/margins": 0.006193594075739384, |
|
"rewards/rejected": -0.020851116627454758, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.869012002182573e-06, |
|
"logits/chosen": -0.21353450417518616, |
|
"logits/rejected": -0.2229534387588501, |
|
"logps/chosen": -297.26434326171875, |
|
"logps/rejected": -286.7055358886719, |
|
"loss": 0.2086, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.013432545587420464, |
|
"rewards/margins": 0.001908454461954534, |
|
"rewards/rejected": -0.015341001562774181, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.7901231819133104e-06, |
|
"logits/chosen": -0.23490211367607117, |
|
"logits/rejected": -0.19254347681999207, |
|
"logps/chosen": -299.3580627441406, |
|
"logps/rejected": -300.2660827636719, |
|
"loss": 0.2118, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.01381734199821949, |
|
"rewards/margins": 0.004050114192068577, |
|
"rewards/rejected": -0.017867455258965492, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 3.709442825758875e-06, |
|
"logits/chosen": -0.20214135944843292, |
|
"logits/rejected": -0.26244884729385376, |
|
"logps/chosen": -289.81390380859375, |
|
"logps/rejected": -331.31158447265625, |
|
"loss": 0.2078, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.009037318639457226, |
|
"rewards/margins": 0.0066641224548220634, |
|
"rewards/rejected": -0.01570144109427929, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.6270829708916113e-06, |
|
"logits/chosen": -0.2581767439842224, |
|
"logits/rejected": -0.26762059330940247, |
|
"logps/chosen": -286.0437316894531, |
|
"logps/rejected": -320.72772216796875, |
|
"loss": 0.2007, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.012503271922469139, |
|
"rewards/margins": 0.00960660632699728, |
|
"rewards/rejected": -0.022109879180788994, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 3.543157986727991e-06, |
|
"logits/chosen": -0.3141801357269287, |
|
"logits/rejected": -0.3110749125480652, |
|
"logps/chosen": -358.1673278808594, |
|
"logps/rejected": -395.6116943359375, |
|
"loss": 0.2151, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.02111610770225525, |
|
"rewards/margins": 0.008691903203725815, |
|
"rewards/rejected": -0.029808010905981064, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4577844161089614e-06, |
|
"logits/chosen": -0.3446267545223236, |
|
"logits/rejected": -0.33043938875198364, |
|
"logps/chosen": -289.3715515136719, |
|
"logps/rejected": -327.91790771484375, |
|
"loss": 0.2074, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.015166997909545898, |
|
"rewards/margins": 0.009188723750412464, |
|
"rewards/rejected": -0.024355720728635788, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 3.3710808134621577e-06, |
|
"logits/chosen": -0.36505457758903503, |
|
"logits/rejected": -0.29573512077331543, |
|
"logps/chosen": -290.6937255859375, |
|
"logps/rejected": -299.7286071777344, |
|
"loss": 0.209, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.02466105856001377, |
|
"rewards/margins": 0.00368274818174541, |
|
"rewards/rejected": -0.02834380604326725, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 3.2831675801707126e-06, |
|
"logits/chosen": -0.37998563051223755, |
|
"logits/rejected": -0.3379359841346741, |
|
"logps/chosen": -385.1395568847656, |
|
"logps/rejected": -406.07147216796875, |
|
"loss": 0.2147, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.032053716480731964, |
|
"rewards/margins": 0.007807403802871704, |
|
"rewards/rejected": -0.03986112028360367, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.194166797377289e-06, |
|
"logits/chosen": -0.36350712180137634, |
|
"logits/rejected": -0.35324662923812866, |
|
"logps/chosen": -289.75543212890625, |
|
"logps/rejected": -322.47930908203125, |
|
"loss": 0.1993, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0282241590321064, |
|
"rewards/margins": 0.012596110813319683, |
|
"rewards/rejected": -0.04082026332616806, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 3.104202056455501e-06, |
|
"logits/chosen": -0.3876686990261078, |
|
"logits/rejected": -0.3610993027687073, |
|
"logps/chosen": -339.6667175292969, |
|
"logps/rejected": -349.5023193359375, |
|
"loss": 0.2046, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.035824716091156006, |
|
"rewards/margins": 0.0051012164913117886, |
|
"rewards/rejected": -0.04092593118548393, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 3.013398287384144e-06, |
|
"logits/chosen": -0.42761000990867615, |
|
"logits/rejected": -0.4186409115791321, |
|
"logps/chosen": -332.4256896972656, |
|
"logps/rejected": -350.052490234375, |
|
"loss": 0.2216, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.03896978124976158, |
|
"rewards/margins": 0.004214274697005749, |
|
"rewards/rejected": -0.0431840606033802, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.9218815852625717e-06, |
|
"logits/chosen": -0.36252036690711975, |
|
"logits/rejected": -0.36035069823265076, |
|
"logps/chosen": -379.29962158203125, |
|
"logps/rejected": -386.194091796875, |
|
"loss": 0.2114, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.03572685644030571, |
|
"rewards/margins": 0.0034367397893220186, |
|
"rewards/rejected": -0.03916360065340996, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 2.829779035208113e-06, |
|
"logits/chosen": -0.3759670555591583, |
|
"logits/rejected": -0.35854417085647583, |
|
"logps/chosen": -289.8544006347656, |
|
"logps/rejected": -321.8282775878906, |
|
"loss": 0.2106, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.02995840646326542, |
|
"rewards/margins": 0.007169515825808048, |
|
"rewards/rejected": -0.03712791949510574, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.737218535878705e-06, |
|
"logits/chosen": -0.3935397267341614, |
|
"logits/rejected": -0.41043296456336975, |
|
"logps/chosen": -355.903076171875, |
|
"logps/rejected": -344.1985778808594, |
|
"loss": 0.2127, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.0340442880988121, |
|
"rewards/margins": -0.001989628653973341, |
|
"rewards/rejected": -0.032054655253887177, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 2.64432862186579e-06, |
|
"logits/chosen": -0.30559736490249634, |
|
"logits/rejected": -0.33492517471313477, |
|
"logps/chosen": -292.39227294921875, |
|
"logps/rejected": -308.4276123046875, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.028410837054252625, |
|
"rewards/margins": 0.004543141461908817, |
|
"rewards/rejected": -0.03295397758483887, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 2.551238285204126e-06, |
|
"logits/chosen": -0.35250839591026306, |
|
"logits/rejected": -0.34054213762283325, |
|
"logps/chosen": -332.04315185546875, |
|
"logps/rejected": -336.251220703125, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.28125, |
|
"rewards/chosen": -0.024514295160770416, |
|
"rewards/margins": 0.002826205687597394, |
|
"rewards/rejected": -0.02734050154685974, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.4580767962463688e-06, |
|
"logits/chosen": -0.38282984495162964, |
|
"logits/rejected": -0.30817848443984985, |
|
"logps/chosen": -332.67364501953125, |
|
"logps/rejected": -353.9303894042969, |
|
"loss": 0.2045, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.02591048739850521, |
|
"rewards/margins": 0.006938849575817585, |
|
"rewards/rejected": -0.03284933418035507, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3649735241511546e-06, |
|
"logits/chosen": -0.40525302290916443, |
|
"logits/rejected": -0.36453166604042053, |
|
"logps/chosen": -304.2394714355469, |
|
"logps/rejected": -326.7181701660156, |
|
"loss": 0.203, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.0233243890106678, |
|
"rewards/margins": 0.00685765128582716, |
|
"rewards/rejected": -0.030182043090462685, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 2.2720577572339914e-06, |
|
"logits/chosen": -0.3453354239463806, |
|
"logits/rejected": -0.4035136103630066, |
|
"logps/chosen": -356.69140625, |
|
"logps/rejected": -386.82281494140625, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.025404874235391617, |
|
"rewards/margins": 0.011574333533644676, |
|
"rewards/rejected": -0.036979205906391144, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 2.1794585234303995e-06, |
|
"logits/chosen": -0.37287113070487976, |
|
"logits/rejected": -0.38295817375183105, |
|
"logps/chosen": -306.1138610839844, |
|
"logps/rejected": -333.82958984375, |
|
"loss": 0.2105, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.028054391965270042, |
|
"rewards/margins": 0.006188055966049433, |
|
"rewards/rejected": -0.034242451190948486, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0873044111206407e-06, |
|
"logits/chosen": -0.36476778984069824, |
|
"logits/rejected": -0.34431588649749756, |
|
"logps/chosen": -295.88592529296875, |
|
"logps/rejected": -300.2727355957031, |
|
"loss": 0.2048, |
|
"rewards/accuracies": 0.23125000298023224, |
|
"rewards/chosen": -0.03209972754120827, |
|
"rewards/margins": -0.0022686964366585016, |
|
"rewards/rejected": -0.02983103133738041, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 1.9957233905648293e-06, |
|
"logits/chosen": -0.3742281496524811, |
|
"logits/rejected": -0.37335073947906494, |
|
"logps/chosen": -328.6231994628906, |
|
"logps/rejected": -348.46990966796875, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.3187499940395355, |
|
"rewards/chosen": -0.028273243457078934, |
|
"rewards/margins": 0.00650573056191206, |
|
"rewards/rejected": -0.03477897495031357, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.904842636196402e-06, |
|
"logits/chosen": -0.36765938997268677, |
|
"logits/rejected": -0.36060625314712524, |
|
"logps/chosen": -323.7262268066406, |
|
"logps/rejected": -357.27813720703125, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.036755647510290146, |
|
"rewards/margins": 0.00827108509838581, |
|
"rewards/rejected": -0.04502673074603081, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.814788350020726e-06, |
|
"logits/chosen": -0.3832349181175232, |
|
"logits/rejected": -0.40613269805908203, |
|
"logps/chosen": -313.4564208984375, |
|
"logps/rejected": -355.4290771484375, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.03660092502832413, |
|
"rewards/margins": 0.008231913670897484, |
|
"rewards/rejected": -0.04483283683657646, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 1.725685586364051e-06, |
|
"logits/chosen": -0.3985538184642792, |
|
"logits/rejected": -0.44028186798095703, |
|
"logps/chosen": -342.6407470703125, |
|
"logps/rejected": -393.0326843261719, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.36250001192092896, |
|
"rewards/chosen": -0.03912167251110077, |
|
"rewards/margins": 0.012444810010492802, |
|
"rewards/rejected": -0.051566481590270996, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6376580782162172e-06, |
|
"logits/chosen": -0.3522663712501526, |
|
"logits/rejected": -0.3656110167503357, |
|
"logps/chosen": -300.55059814453125, |
|
"logps/rejected": -309.12005615234375, |
|
"loss": 0.1985, |
|
"rewards/accuracies": 0.25, |
|
"rewards/chosen": -0.043480511754751205, |
|
"rewards/margins": 0.0020387214608490467, |
|
"rewards/rejected": -0.045519232749938965, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 1.550828065408227e-06, |
|
"logits/chosen": -0.3727906346321106, |
|
"logits/rejected": -0.42088228464126587, |
|
"logps/chosen": -303.08184814453125, |
|
"logps/rejected": -355.48114013671875, |
|
"loss": 0.2027, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.041418589651584625, |
|
"rewards/margins": 0.010809944942593575, |
|
"rewards/rejected": -0.05222853273153305, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 1.4653161248633053e-06, |
|
"logits/chosen": -0.4294905662536621, |
|
"logits/rejected": -0.35463112592697144, |
|
"logps/chosen": -374.58221435546875, |
|
"logps/rejected": -413.10626220703125, |
|
"loss": 0.1983, |
|
"rewards/accuracies": 0.3812499940395355, |
|
"rewards/chosen": -0.04309406504034996, |
|
"rewards/margins": 0.013471859507262707, |
|
"rewards/rejected": -0.05656592175364494, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.381241003157162e-06, |
|
"logits/chosen": -0.38254573941230774, |
|
"logits/rejected": -0.3926312029361725, |
|
"logps/chosen": -307.92071533203125, |
|
"logps/rejected": -331.7400817871094, |
|
"loss": 0.1995, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.03662495315074921, |
|
"rewards/margins": 0.004892958328127861, |
|
"rewards/rejected": -0.04151790589094162, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.298719451619979e-06, |
|
"logits/chosen": -0.41799673438072205, |
|
"logits/rejected": -0.423776239156723, |
|
"logps/chosen": -352.4769592285156, |
|
"logps/rejected": -392.0356750488281, |
|
"loss": 0.2013, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.039123691618442535, |
|
"rewards/margins": 0.012257387861609459, |
|
"rewards/rejected": -0.05138107389211655, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.2178660642091036e-06, |
|
"logits/chosen": -0.4221061170101166, |
|
"logits/rejected": -0.3426254093647003, |
|
"logps/chosen": -371.6142883300781, |
|
"logps/rejected": -368.6783752441406, |
|
"loss": 0.2131, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.03723513334989548, |
|
"rewards/margins": 0.001372279948554933, |
|
"rewards/rejected": -0.038607411086559296, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 1.1387931183775821e-06, |
|
"logits/chosen": -0.43726977705955505, |
|
"logits/rejected": -0.433055579662323, |
|
"logps/chosen": -349.0133056640625, |
|
"logps/rejected": -424.606689453125, |
|
"loss": 0.197, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.02950415015220642, |
|
"rewards/margins": 0.016581665724515915, |
|
"rewards/rejected": -0.04608581215143204, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.061610419159532e-06, |
|
"logits/chosen": -0.41025829315185547, |
|
"logits/rejected": -0.39352136850357056, |
|
"logps/chosen": -351.42724609375, |
|
"logps/rejected": -362.4097900390625, |
|
"loss": 0.2084, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.04146607965230942, |
|
"rewards/margins": 0.003974739462137222, |
|
"rewards/rejected": -0.04544081538915634, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 9.864251466888364e-07, |
|
"logits/chosen": -0.3846993148326874, |
|
"logits/rejected": -0.36020296812057495, |
|
"logps/chosen": -345.7566833496094, |
|
"logps/rejected": -369.3031311035156, |
|
"loss": 0.2145, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.0327707901597023, |
|
"rewards/margins": 0.007174340076744556, |
|
"rewards/rejected": -0.03994513303041458, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 9.133417073629288e-07, |
|
"logits/chosen": -0.4323287010192871, |
|
"logits/rejected": -0.46247753500938416, |
|
"logps/chosen": -310.33221435546875, |
|
"logps/rejected": -337.3047790527344, |
|
"loss": 0.196, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.02941690944135189, |
|
"rewards/margins": 0.008200698532164097, |
|
"rewards/rejected": -0.03761760890483856, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 8.424615888583332e-07, |
|
"logits/chosen": -0.44240742921829224, |
|
"logits/rejected": -0.4352414011955261, |
|
"logps/chosen": -350.82220458984375, |
|
"logps/rejected": -371.5367431640625, |
|
"loss": 0.2113, |
|
"rewards/accuracies": 0.34375, |
|
"rewards/chosen": -0.03298790752887726, |
|
"rewards/margins": 0.009050843305885792, |
|
"rewards/rejected": -0.042038749903440475, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 7.738832191993092e-07, |
|
"logits/chosen": -0.4660700857639313, |
|
"logits/rejected": -0.40507060289382935, |
|
"logps/chosen": -363.92041015625, |
|
"logps/rejected": -369.3705139160156, |
|
"loss": 0.2011, |
|
"rewards/accuracies": 0.33125001192092896, |
|
"rewards/chosen": -0.03945212811231613, |
|
"rewards/margins": 0.004098129458725452, |
|
"rewards/rejected": -0.04355026036500931, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.077018300752917e-07, |
|
"logits/chosen": -0.44686412811279297, |
|
"logits/rejected": -0.4067825376987457, |
|
"logps/chosen": -303.0172424316406, |
|
"logps/rejected": -316.5196838378906, |
|
"loss": 0.2008, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.028305992484092712, |
|
"rewards/margins": 0.0041877999901771545, |
|
"rewards/rejected": -0.03249379247426987, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 6.440093245969342e-07, |
|
"logits/chosen": -0.4332183003425598, |
|
"logits/rejected": -0.4446489214897156, |
|
"logps/chosen": -340.51263427734375, |
|
"logps/rejected": -362.9837341308594, |
|
"loss": 0.2079, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.034973934292793274, |
|
"rewards/margins": 0.0031310406047850847, |
|
"rewards/rejected": -0.03810497373342514, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 5.828941496744075e-07, |
|
"logits/chosen": -0.3977935314178467, |
|
"logits/rejected": -0.36564216017723083, |
|
"logps/chosen": -354.89202880859375, |
|
"logps/rejected": -352.7674560546875, |
|
"loss": 0.2085, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.03232111781835556, |
|
"rewards/margins": 0.001277880510315299, |
|
"rewards/rejected": -0.033598996698856354, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.244411731951671e-07, |
|
"logits/chosen": -0.4228762090206146, |
|
"logits/rejected": -0.4223594665527344, |
|
"logps/chosen": -291.3377990722656, |
|
"logps/rejected": -305.91143798828125, |
|
"loss": 0.2033, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.026238340884447098, |
|
"rewards/margins": 0.004590832162648439, |
|
"rewards/rejected": -0.03082917258143425, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.6873156617173594e-07, |
|
"logits/chosen": -0.37430620193481445, |
|
"logits/rejected": -0.37224718928337097, |
|
"logps/chosen": -271.09088134765625, |
|
"logps/rejected": -290.9538879394531, |
|
"loss": 0.2111, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": -0.02569451928138733, |
|
"rewards/margins": 0.008396224118769169, |
|
"rewards/rejected": -0.03409074246883392, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.1584269002318653e-07, |
|
"logits/chosen": -0.44756025075912476, |
|
"logits/rejected": -0.3608166575431824, |
|
"logps/chosen": -325.45709228515625, |
|
"logps/rejected": -369.91351318359375, |
|
"loss": 0.1955, |
|
"rewards/accuracies": 0.3062500059604645, |
|
"rewards/chosen": -0.03604112192988396, |
|
"rewards/margins": 0.004559466149657965, |
|
"rewards/rejected": -0.04060059040784836, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.658479891468258e-07, |
|
"logits/chosen": -0.3722971975803375, |
|
"logits/rejected": -0.34547311067581177, |
|
"logps/chosen": -340.4040832519531, |
|
"logps/rejected": -355.20404052734375, |
|
"loss": 0.2012, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.03479091823101044, |
|
"rewards/margins": 0.0040581924840807915, |
|
"rewards/rejected": -0.038849107921123505, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.18816888929272e-07, |
|
"logits/chosen": -0.4394063949584961, |
|
"logits/rejected": -0.44897159934043884, |
|
"logps/chosen": -325.93255615234375, |
|
"logps/rejected": -360.37713623046875, |
|
"loss": 0.2055, |
|
"rewards/accuracies": 0.32499998807907104, |
|
"rewards/chosen": -0.034493155777454376, |
|
"rewards/margins": 0.005803712643682957, |
|
"rewards/rejected": -0.04029686748981476, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 2.748146993385484e-07, |
|
"logits/chosen": -0.3761179447174072, |
|
"logits/rejected": -0.4133715033531189, |
|
"logps/chosen": -341.07574462890625, |
|
"logps/rejected": -353.9838562011719, |
|
"loss": 0.2004, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.037453822791576385, |
|
"rewards/margins": 0.0007335458067245781, |
|
"rewards/rejected": -0.03818737342953682, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 2.3390252423108077e-07, |
|
"logits/chosen": -0.40443721413612366, |
|
"logits/rejected": -0.40503472089767456, |
|
"logps/chosen": -257.16357421875, |
|
"logps/rejected": -296.4949035644531, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.2874999940395355, |
|
"rewards/chosen": -0.025397296994924545, |
|
"rewards/margins": 0.011320685967803001, |
|
"rewards/rejected": -0.036717988550662994, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 1.961371764995243e-07, |
|
"logits/chosen": -0.42295295000076294, |
|
"logits/rejected": -0.4149630069732666, |
|
"logps/chosen": -346.1056823730469, |
|
"logps/rejected": -367.1171569824219, |
|
"loss": 0.2066, |
|
"rewards/accuracies": 0.2750000059604645, |
|
"rewards/chosen": -0.03263362497091293, |
|
"rewards/margins": 0.007565206382423639, |
|
"rewards/rejected": -0.04019883647561073, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 1.61571099179261e-07, |
|
"logits/chosen": -0.4583490490913391, |
|
"logits/rejected": -0.432847797870636, |
|
"logps/chosen": -328.78582763671875, |
|
"logps/rejected": -337.87445068359375, |
|
"loss": 0.2024, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.031004447489976883, |
|
"rewards/margins": 0.005912850610911846, |
|
"rewards/rejected": -0.03691729158163071, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.3025229262312367e-07, |
|
"logits/chosen": -0.4011611044406891, |
|
"logits/rejected": -0.4167350232601166, |
|
"logps/chosen": -345.6464538574219, |
|
"logps/rejected": -370.5746154785156, |
|
"loss": 0.205, |
|
"rewards/accuracies": 0.35624998807907104, |
|
"rewards/chosen": -0.03289477154612541, |
|
"rewards/margins": 0.008914651349186897, |
|
"rewards/rejected": -0.041809432208538055, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 1.0222424784546853e-07, |
|
"logits/chosen": -0.4343733787536621, |
|
"logits/rejected": -0.43256425857543945, |
|
"logps/chosen": -310.9982604980469, |
|
"logps/rejected": -327.97802734375, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.2562499940395355, |
|
"rewards/chosen": -0.025628242641687393, |
|
"rewards/margins": 0.006485571153461933, |
|
"rewards/rejected": -0.03211381286382675, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 7.752588612816553e-08, |
|
"logits/chosen": -0.4601428508758545, |
|
"logits/rejected": -0.4454170763492584, |
|
"logps/chosen": -326.32415771484375, |
|
"logps/rejected": -367.8747253417969, |
|
"loss": 0.21, |
|
"rewards/accuracies": 0.3375000059604645, |
|
"rewards/chosen": -0.02655145525932312, |
|
"rewards/margins": 0.01180974580347538, |
|
"rewards/rejected": -0.03836119920015335, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 5.619150497236991e-08, |
|
"logits/chosen": -0.41596898436546326, |
|
"logits/rejected": -0.4292394518852234, |
|
"logps/chosen": -312.1533203125, |
|
"logps/rejected": -325.45599365234375, |
|
"loss": 0.1936, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.03066674806177616, |
|
"rewards/margins": 0.004992074333131313, |
|
"rewards/rejected": -0.0356588289141655, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 3.825073047112743e-08, |
|
"logits/chosen": -0.41111326217651367, |
|
"logits/rejected": -0.4396681785583496, |
|
"logps/chosen": -300.7364807128906, |
|
"logps/rejected": -301.65289306640625, |
|
"loss": 0.207, |
|
"rewards/accuracies": 0.26875001192092896, |
|
"rewards/chosen": -0.026606258004903793, |
|
"rewards/margins": 0.0029051213059574366, |
|
"rewards/rejected": -0.029511380940675735, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.372847616895685e-08, |
|
"logits/chosen": -0.4996066689491272, |
|
"logits/rejected": -0.43205633759498596, |
|
"logps/chosen": -359.67010498046875, |
|
"logps/rejected": -420.83026123046875, |
|
"loss": 0.199, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": -0.030006730929017067, |
|
"rewards/margins": 0.015778595581650734, |
|
"rewards/rejected": -0.0457853302359581, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 1.264490846553279e-08, |
|
"logits/chosen": -0.42298024892807007, |
|
"logits/rejected": -0.40388956665992737, |
|
"logps/chosen": -319.5880432128906, |
|
"logps/rejected": -326.4891662597656, |
|
"loss": 0.201, |
|
"rewards/accuracies": 0.29374998807907104, |
|
"rewards/chosen": -0.0287660863250494, |
|
"rewards/margins": 0.004919327795505524, |
|
"rewards/rejected": -0.033685412257909775, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 5.015418611516165e-09, |
|
"logits/chosen": -0.38126152753829956, |
|
"logits/rejected": -0.43538981676101685, |
|
"logps/chosen": -315.6664123535156, |
|
"logps/rejected": -353.7925720214844, |
|
"loss": 0.2054, |
|
"rewards/accuracies": 0.3125, |
|
"rewards/chosen": -0.028091344982385635, |
|
"rewards/margins": 0.011006112210452557, |
|
"rewards/rejected": -0.03909745439887047, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 8.506013354186993e-10, |
|
"logits/chosen": -0.403546005487442, |
|
"logits/rejected": -0.42747077345848083, |
|
"logps/chosen": -312.3913879394531, |
|
"logps/rejected": -342.631103515625, |
|
"loss": 0.2149, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.029346201568841934, |
|
"rewards/margins": 0.008023385889828205, |
|
"rewards/rejected": -0.037369586527347565, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 937, |
|
"total_flos": 0.0, |
|
"train_loss": 0.051660444591444865, |
|
"train_runtime": 2852.4153, |
|
"train_samples_per_second": 10.517, |
|
"train_steps_per_second": 0.328 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 937, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|