ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
bf8b1e4 verified
raw
history blame
22.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 1.3046875,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 1.09375,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.09645080566406,
"logits/rejected": 80.80389404296875,
"logps/chosen": -34.27156066894531,
"logps/rejected": -33.039093017578125,
"loss": 0.9995,
"rewards/accuracies": 0.4722222089767456,
"rewards/chosen": -0.00029834467568434775,
"rewards/margins": 0.0005084889708086848,
"rewards/rejected": -0.0008068337920121849,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 1.2734375,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.66552734375,
"logits/rejected": 80.5560073852539,
"logps/chosen": -33.4774055480957,
"logps/rejected": -30.691213607788086,
"loss": 0.9987,
"rewards/accuracies": 0.4375,
"rewards/chosen": 0.0021250424906611443,
"rewards/margins": 0.0012622694484889507,
"rewards/rejected": 0.000862772751133889,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 1.2109375,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.51115417480469,
"logits/rejected": 82.54508972167969,
"logps/chosen": -33.80036926269531,
"logps/rejected": -31.189748764038086,
"loss": 1.0002,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.002124499063938856,
"rewards/margins": -0.00021631647541653365,
"rewards/rejected": 0.0023408152628690004,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 1.1796875,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.10090637207031,
"logits/rejected": 81.09576416015625,
"logps/chosen": -32.7674560546875,
"logps/rejected": -33.11550521850586,
"loss": 0.9985,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.0040660640224814415,
"rewards/margins": 0.0015358638484030962,
"rewards/rejected": 0.0025301999412477016,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 1.1875,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.85154724121094,
"logits/rejected": 78.85734558105469,
"logps/chosen": -30.360393524169922,
"logps/rejected": -30.609283447265625,
"loss": 0.9962,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.008448543958365917,
"rewards/margins": 0.0038085163105279207,
"rewards/rejected": 0.0046400283463299274,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 0.96484375,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.49021911621094,
"logits/rejected": 83.54866027832031,
"logps/chosen": -30.763973236083984,
"logps/rejected": -29.17538833618164,
"loss": 1.0,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": 0.004111888352781534,
"rewards/margins": -3.9446913433494046e-05,
"rewards/rejected": 0.004151335451751947,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 1.2109375,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 84.11228942871094,
"logits/rejected": 84.1441650390625,
"logps/chosen": -30.222454071044922,
"logps/rejected": -32.666595458984375,
"loss": 0.9996,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.0040657008066773415,
"rewards/margins": 0.0003545849467627704,
"rewards/rejected": 0.003711115103214979,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 1.2421875,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.86946868896484,
"logits/rejected": 81.84814453125,
"logps/chosen": -30.959096908569336,
"logps/rejected": -30.652545928955078,
"loss": 0.9956,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.005458436906337738,
"rewards/margins": 0.0044073979370296,
"rewards/rejected": 0.001051038852892816,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 1.484375,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.68418884277344,
"logits/rejected": 78.65721893310547,
"logps/chosen": -32.17829513549805,
"logps/rejected": -30.884775161743164,
"loss": 0.9961,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.00459885410964489,
"rewards/margins": 0.00393189862370491,
"rewards/rejected": 0.0006669552531093359,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 1.2421875,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.70716857910156,
"logits/rejected": 83.73751068115234,
"logps/chosen": -33.73701477050781,
"logps/rejected": -31.63702964782715,
"loss": 0.9955,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.005679761990904808,
"rewards/margins": 0.004544637631624937,
"rewards/rejected": 0.0011351245921105146,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.73394012451172,
"eval_logits/rejected": 98.7273941040039,
"eval_logps/chosen": -32.38990783691406,
"eval_logps/rejected": -35.92463684082031,
"eval_loss": 0.999876856803894,
"eval_rewards/accuracies": 0.5186877250671387,
"eval_rewards/chosen": 0.0005326389218680561,
"eval_rewards/margins": 0.00011375291069271043,
"eval_rewards/rejected": 0.00041888616397045553,
"eval_runtime": 104.2424,
"eval_samples_per_second": 3.29,
"eval_steps_per_second": 0.413,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 1.4140625,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.85816955566406,
"logits/rejected": 83.75128936767578,
"logps/chosen": -32.19211959838867,
"logps/rejected": -32.65901565551758,
"loss": 0.9917,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.008306830190122128,
"rewards/margins": 0.008338114246726036,
"rewards/rejected": -3.12842421408277e-05,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 1.3046875,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 83.82106018066406,
"logits/rejected": 83.92265319824219,
"logps/chosen": -28.150625228881836,
"logps/rejected": -35.3939208984375,
"loss": 0.9929,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.007789556868374348,
"rewards/margins": 0.0071373311802744865,
"rewards/rejected": 0.000652224407531321,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 0.9453125,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 80.96563720703125,
"logits/rejected": 80.99563598632812,
"logps/chosen": -30.216140747070312,
"logps/rejected": -31.844036102294922,
"loss": 0.994,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.006912143435329199,
"rewards/margins": 0.006036223843693733,
"rewards/rejected": 0.0008759202319197357,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 1.1171875,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 81.863525390625,
"logits/rejected": 81.86921691894531,
"logps/chosen": -26.845142364501953,
"logps/rejected": -33.07027816772461,
"loss": 0.9875,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.006021805107593536,
"rewards/margins": 0.012490840628743172,
"rewards/rejected": -0.006469034589827061,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 1.21875,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.13746643066406,
"logits/rejected": 80.10902404785156,
"logps/chosen": -28.976547241210938,
"logps/rejected": -33.208518981933594,
"loss": 0.9895,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.004125660751014948,
"rewards/margins": 0.010545835830271244,
"rewards/rejected": -0.006420175079256296,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 1.3515625,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 81.72142028808594,
"logits/rejected": 81.74298858642578,
"logps/chosen": -33.8978157043457,
"logps/rejected": -30.907711029052734,
"loss": 0.9881,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.002389371395111084,
"rewards/margins": 0.011941083706915379,
"rewards/rejected": -0.009551710449159145,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 1.4765625,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 82.35487365722656,
"logits/rejected": 82.30474090576172,
"logps/chosen": -30.870525360107422,
"logps/rejected": -33.04078674316406,
"loss": 0.9859,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.003887615632265806,
"rewards/margins": 0.014094889163970947,
"rewards/rejected": -0.010207273997366428,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 1.21875,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 79.4852066040039,
"logits/rejected": 79.46187591552734,
"logps/chosen": -31.02083396911621,
"logps/rejected": -32.165191650390625,
"loss": 0.9882,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0017631975933909416,
"rewards/margins": 0.011843027547001839,
"rewards/rejected": -0.010079829022288322,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 0.9609375,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 80.97419738769531,
"logits/rejected": 80.94820404052734,
"logps/chosen": -30.60634994506836,
"logps/rejected": -31.083566665649414,
"loss": 0.9922,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.0016006485093384981,
"rewards/margins": 0.007756076753139496,
"rewards/rejected": -0.006155428942292929,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 1.453125,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 76.12115478515625,
"logits/rejected": 76.07009887695312,
"logps/chosen": -34.18424606323242,
"logps/rejected": -33.341392517089844,
"loss": 0.9858,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.005684881471097469,
"rewards/margins": 0.014164777472615242,
"rewards/rejected": -0.008479896001517773,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.05126190185547,
"eval_logits/rejected": 98.02639770507812,
"eval_logps/chosen": -32.97175979614258,
"eval_logps/rejected": -36.845333099365234,
"eval_loss": 0.9964954853057861,
"eval_rewards/accuracies": 0.5274086594581604,
"eval_rewards/chosen": -0.005285844672471285,
"eval_rewards/margins": 0.0035022026859223843,
"eval_rewards/rejected": -0.008788047358393669,
"eval_runtime": 104.1082,
"eval_samples_per_second": 3.295,
"eval_steps_per_second": 0.413,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 1.84375,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 78.64119720458984,
"logits/rejected": 78.55430603027344,
"logps/chosen": -33.689414978027344,
"logps/rejected": -36.20193862915039,
"loss": 0.9853,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.002563029993325472,
"rewards/margins": 0.014721485786139965,
"rewards/rejected": -0.012158457189798355,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 1.484375,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 80.5840835571289,
"logits/rejected": 80.67861938476562,
"logps/chosen": -31.57720947265625,
"logps/rejected": -31.91719627380371,
"loss": 0.9844,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.003717451822012663,
"rewards/margins": 0.015599893406033516,
"rewards/rejected": -0.011882440187036991,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 1.3984375,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 77.53582763671875,
"logits/rejected": 77.5838851928711,
"logps/chosen": -32.72165298461914,
"logps/rejected": -35.34224319458008,
"loss": 0.9859,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.0008409392321482301,
"rewards/margins": 0.014141863211989403,
"rewards/rejected": -0.013300922699272633,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 1.671875,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 79.64659118652344,
"logits/rejected": 79.958984375,
"logps/chosen": -31.332469940185547,
"logps/rejected": -32.86049270629883,
"loss": 0.9832,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.003555959090590477,
"rewards/margins": 0.016840480268001556,
"rewards/rejected": -0.01328451931476593,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 1.484375,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 78.12522888183594,
"logits/rejected": 78.17500305175781,
"logps/chosen": -27.822484970092773,
"logps/rejected": -31.34881019592285,
"loss": 0.9878,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.002265883143991232,
"rewards/margins": 0.012214846909046173,
"rewards/rejected": -0.014480730518698692,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 1.6171875,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 75.08821105957031,
"logits/rejected": 75.22389221191406,
"logps/chosen": -31.112863540649414,
"logps/rejected": -38.40215301513672,
"loss": 0.9725,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.004481295123696327,
"rewards/margins": 0.027486557140946388,
"rewards/rejected": -0.023005260154604912,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 1.3515625,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 74.10564422607422,
"logits/rejected": 74.13673400878906,
"logps/chosen": -32.07135772705078,
"logps/rejected": -33.231197357177734,
"loss": 0.9857,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.002924318192526698,
"rewards/margins": 0.01433342695236206,
"rewards/rejected": -0.017257746309041977,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 2.03125,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 77.0625991821289,
"logits/rejected": 76.84493255615234,
"logps/chosen": -32.438629150390625,
"logps/rejected": -31.16558265686035,
"loss": 0.9913,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.006287367548793554,
"rewards/margins": 0.008698503486812115,
"rewards/rejected": -0.014985869638621807,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 1.34375,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 77.05482482910156,
"logits/rejected": 76.97974395751953,
"logps/chosen": -34.25292205810547,
"logps/rejected": -34.649898529052734,
"loss": 0.9752,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.0014867703430354595,
"rewards/margins": 0.02484356239438057,
"rewards/rejected": -0.026330333203077316,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 1.4921875,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 72.6989974975586,
"logits/rejected": 72.833984375,
"logps/chosen": -33.264137268066406,
"logps/rejected": -30.818592071533203,
"loss": 0.9814,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0014156814431771636,
"rewards/margins": 0.018581366166472435,
"rewards/rejected": -0.017165686935186386,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 97.67390441894531,
"eval_logits/rejected": 97.64021301269531,
"eval_logps/chosen": -33.30087661743164,
"eval_logps/rejected": -37.35591125488281,
"eval_loss": 0.99467533826828,
"eval_rewards/accuracies": 0.5888704061508179,
"eval_rewards/chosen": -0.008576988242566586,
"eval_rewards/margins": 0.00531682837754488,
"eval_rewards/rejected": -0.01389381755143404,
"eval_runtime": 103.9426,
"eval_samples_per_second": 3.3,
"eval_steps_per_second": 0.414,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 1.609375,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 80.20366668701172,
"logits/rejected": 80.20387268066406,
"logps/chosen": -30.933481216430664,
"logps/rejected": -34.256614685058594,
"loss": 0.9801,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.0006994610885158181,
"rewards/margins": 0.019869104027748108,
"rewards/rejected": -0.020568564534187317,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 1.5390625,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 77.26924133300781,
"logits/rejected": 77.28764343261719,
"logps/chosen": -31.467296600341797,
"logps/rejected": -30.592571258544922,
"loss": 0.982,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.0013126448029652238,
"rewards/margins": 0.01804344728589058,
"rewards/rejected": -0.01673080213367939,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 1.875,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 74.20513916015625,
"logits/rejected": 74.25221252441406,
"logps/chosen": -29.909320831298828,
"logps/rejected": -34.725521087646484,
"loss": 0.9747,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.004432853776961565,
"rewards/margins": 0.025339430198073387,
"rewards/rejected": -0.02090657688677311,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 1.7578125,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 78.9554672241211,
"logits/rejected": 78.990478515625,
"logps/chosen": -33.333351135253906,
"logps/rejected": -35.866722106933594,
"loss": 0.9762,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": -0.0012120162136852741,
"rewards/margins": 0.023768287152051926,
"rewards/rejected": -0.024980302900075912,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 1.5859375,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 77.8236083984375,
"logits/rejected": 77.84061431884766,
"logps/chosen": -33.3131103515625,
"logps/rejected": -35.05299758911133,
"loss": 0.9771,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.0033062633592635393,
"rewards/margins": 0.0229182131588459,
"rewards/rejected": -0.01961195096373558,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 1.5234375,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 79.53218078613281,
"logits/rejected": 79.56050109863281,
"logps/chosen": -29.001379013061523,
"logps/rejected": -33.16984176635742,
"loss": 0.9793,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.005508318077772856,
"rewards/margins": 0.020708225667476654,
"rewards/rejected": -0.015199905261397362,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 1.7890625,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 78.969970703125,
"logits/rejected": 78.9748306274414,
"logps/chosen": -33.2999267578125,
"logps/rejected": -37.32087326049805,
"loss": 0.9821,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": -0.004237635992467403,
"rewards/margins": 0.017877381294965744,
"rewards/rejected": -0.022115018218755722,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 1.359375,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 72.49492645263672,
"logits/rejected": 72.36249542236328,
"logps/chosen": -30.828378677368164,
"logps/rejected": -29.885875701904297,
"loss": 0.9853,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": -0.0025225188583135605,
"rewards/margins": 0.014730495400726795,
"rewards/rejected": -0.01725301705300808,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.9878765378679548,
"train_runtime": 2559.7639,
"train_samples_per_second": 1.203,
"train_steps_per_second": 0.15
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}