ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
75f5528 verified
raw
history blame
21.9 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 91.5,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 76.5,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.08255004882812,
"logits/rejected": 80.78926086425781,
"logps/chosen": -34.20470428466797,
"logps/rejected": -33.038047790527344,
"loss": 0.9368,
"rewards/accuracies": 0.5,
"rewards/chosen": 0.02591484785079956,
"rewards/margins": 0.0816662609577179,
"rewards/rejected": -0.055751409381628036,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 60.25,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.67174530029297,
"logits/rejected": 80.55998229980469,
"logps/chosen": -33.60923767089844,
"logps/rejected": -30.828128814697266,
"loss": 0.9439,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.05647152662277222,
"rewards/margins": 0.09191782772541046,
"rewards/rejected": -0.035446297377347946,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 67.5,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.49557495117188,
"logits/rejected": 82.5277099609375,
"logps/chosen": -33.90664291381836,
"logps/rejected": -31.188213348388672,
"loss": 1.0985,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.07432325184345245,
"rewards/margins": -0.0906088799238205,
"rewards/rejected": 0.16493213176727295,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 75.5,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.06272888183594,
"logits/rejected": 81.05645751953125,
"logps/chosen": -32.722740173339844,
"logps/rejected": -33.15789031982422,
"loss": 0.8862,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.3159271776676178,
"rewards/margins": 0.1684812754392624,
"rewards/rejected": 0.1474459171295166,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 48.75,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.66886901855469,
"logits/rejected": 78.68830871582031,
"logps/chosen": -30.57431411743164,
"logps/rejected": -30.798864364624023,
"loss": 0.9351,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.44165611267089844,
"rewards/margins": 0.24956238269805908,
"rewards/rejected": 0.19209368526935577,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 72.5,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.24879455566406,
"logits/rejected": 83.29969787597656,
"logps/chosen": -30.918521881103516,
"logps/rejected": -29.482006072998047,
"loss": 1.0359,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.17964980006217957,
"rewards/margins": 0.1036890521645546,
"rewards/rejected": 0.07596075534820557,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 87.0,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 83.91014099121094,
"logits/rejected": 83.93647766113281,
"logps/chosen": -30.502422332763672,
"logps/rejected": -33.12609100341797,
"loss": 0.9567,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.08862228691577911,
"rewards/margins": 0.15049318969249725,
"rewards/rejected": -0.06187089532613754,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 74.0,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.4886245727539,
"logits/rejected": 81.47552490234375,
"logps/chosen": -31.330123901367188,
"logps/rejected": -31.121978759765625,
"loss": 0.814,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.12237221002578735,
"rewards/margins": 0.3774043917655945,
"rewards/rejected": -0.25503218173980713,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 83.0,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.25160217285156,
"logits/rejected": 78.21989440917969,
"logps/chosen": -32.49519729614258,
"logps/rejected": -31.26288414001465,
"loss": 0.8564,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.10008607059717178,
"rewards/margins": 0.31807559728622437,
"rewards/rejected": -0.21798951923847198,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 82.5,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.49636840820312,
"logits/rejected": 83.53155517578125,
"logps/chosen": -34.187034606933594,
"logps/rejected": -31.907695770263672,
"loss": 0.9161,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.0825684443116188,
"rewards/margins": 0.19257497787475586,
"rewards/rejected": -0.11000655591487885,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.7728500366211,
"eval_logits/rejected": 98.76228332519531,
"eval_logps/chosen": -32.4925651550293,
"eval_logps/rejected": -36.00117492675781,
"eval_loss": 1.0662287473678589,
"eval_rewards/accuracies": 0.5128737688064575,
"eval_rewards/chosen": -0.03457321599125862,
"eval_rewards/margins": -0.010320436209440231,
"eval_rewards/rejected": -0.02425277978181839,
"eval_runtime": 104.2438,
"eval_samples_per_second": 3.29,
"eval_steps_per_second": 0.412,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 91.5,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.7912368774414,
"logits/rejected": 83.67459106445312,
"logps/chosen": -32.45995330810547,
"logps/rejected": -32.786006927490234,
"loss": 0.8008,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.39399290084838867,
"rewards/margins": 0.4850761294364929,
"rewards/rejected": -0.09108323603868484,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 95.0,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 84.00267028808594,
"logits/rejected": 84.11933898925781,
"logps/chosen": -28.27166175842285,
"logps/rejected": -35.5056037902832,
"loss": 0.7011,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.4605434536933899,
"rewards/margins": 0.4930640757083893,
"rewards/rejected": -0.03252064064145088,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 64.5,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 81.23250579833984,
"logits/rejected": 81.24813842773438,
"logps/chosen": -30.38728904724121,
"logps/rejected": -32.12664031982422,
"loss": 0.7344,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.3640448749065399,
"rewards/margins": 0.5005542039871216,
"rewards/rejected": -0.13650932908058167,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 59.75,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 82.44822692871094,
"logits/rejected": 82.46932220458984,
"logps/chosen": -27.172740936279297,
"logps/rejected": -33.0168571472168,
"loss": 0.7509,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.19220882654190063,
"rewards/margins": 0.6076450347900391,
"rewards/rejected": -0.4154362082481384,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 54.0,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.97503662109375,
"logits/rejected": 80.94606018066406,
"logps/chosen": -29.037616729736328,
"logps/rejected": -33.28493118286133,
"loss": 0.6493,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.2460467517375946,
"rewards/margins": 0.7489484548568726,
"rewards/rejected": -0.5029016733169556,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 63.0,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 82.87214660644531,
"logits/rejected": 82.87894439697266,
"logps/chosen": -33.497344970703125,
"logps/rejected": -30.375295639038086,
"loss": 0.7389,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.4475820064544678,
"rewards/margins": 0.7435113191604614,
"rewards/rejected": -0.2959292531013489,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 63.25,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 83.60514831542969,
"logits/rejected": 83.55717468261719,
"logps/chosen": -30.812519073486328,
"logps/rejected": -32.62251281738281,
"loss": 0.7275,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.31273749470710754,
"rewards/margins": 0.7344537377357483,
"rewards/rejected": -0.42171627283096313,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 54.25,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 81.30427551269531,
"logits/rejected": 81.28076171875,
"logps/chosen": -30.593530654907227,
"logps/rejected": -31.66329574584961,
"loss": 0.58,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.4225357472896576,
"rewards/margins": 0.7767966985702515,
"rewards/rejected": -0.3542609214782715,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 37.5,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 82.88330078125,
"logits/rejected": 82.88325500488281,
"logps/chosen": -30.3848934173584,
"logps/rejected": -30.6480770111084,
"loss": 0.8789,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.2670655846595764,
"rewards/margins": 0.39310184121131897,
"rewards/rejected": -0.12603625655174255,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 56.25,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 78.49826049804688,
"logits/rejected": 78.43463134765625,
"logps/chosen": -33.972293853759766,
"logps/rejected": -32.59092330932617,
"loss": 0.8398,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.5463098287582397,
"rewards/margins": 0.6145724058151245,
"rewards/rejected": -0.06826266646385193,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.71674346923828,
"eval_logits/rejected": 98.69157409667969,
"eval_logps/chosen": -32.63681411743164,
"eval_logps/rejected": -36.29216003417969,
"eval_loss": 1.0089657306671143,
"eval_rewards/accuracies": 0.5141196250915527,
"eval_rewards/chosen": -0.13554596900939941,
"eval_rewards/margins": 0.09239647537469864,
"eval_rewards/rejected": -0.22794245183467865,
"eval_runtime": 104.0982,
"eval_samples_per_second": 3.295,
"eval_steps_per_second": 0.413,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 73.5,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 81.18994140625,
"logits/rejected": 81.10676574707031,
"logps/chosen": -33.214935302734375,
"logps/rejected": -35.187870025634766,
"loss": 0.6733,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.511551022529602,
"rewards/margins": 0.6527955532073975,
"rewards/rejected": -0.14124450087547302,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 53.5,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 83.3000259399414,
"logits/rejected": 83.38983917236328,
"logps/chosen": -31.05356216430664,
"logps/rejected": -31.100086212158203,
"loss": 0.5573,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.6267757415771484,
"rewards/margins": 0.8865677714347839,
"rewards/rejected": -0.2597920000553131,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 59.5,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 80.46002197265625,
"logits/rejected": 80.5117416381836,
"logps/chosen": -32.2435417175293,
"logps/rejected": -34.2344970703125,
"loss": 0.7635,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.393540620803833,
"rewards/margins": 0.5491863489151001,
"rewards/rejected": -0.15564575791358948,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 72.0,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 82.81127166748047,
"logits/rejected": 83.10184478759766,
"logps/chosen": -30.6541748046875,
"logps/rejected": -31.797021865844727,
"loss": 0.5215,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.7237205505371094,
"rewards/margins": 0.9092057347297668,
"rewards/rejected": -0.18548506498336792,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 76.5,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 81.55049133300781,
"logits/rejected": 81.6218032836914,
"logps/chosen": -26.869796752929688,
"logps/rejected": -30.16421890258789,
"loss": 0.7649,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.5082693696022034,
"rewards/margins": 0.6927058696746826,
"rewards/rejected": -0.18443644046783447,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 52.5,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 78.84456634521484,
"logits/rejected": 78.9933090209961,
"logps/chosen": -30.304040908813477,
"logps/rejected": -36.30915069580078,
"loss": 0.4765,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.8798693418502808,
"rewards/margins": 1.0251331329345703,
"rewards/rejected": -0.1452637016773224,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 57.0,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 78.18994903564453,
"logits/rejected": 78.21639251708984,
"logps/chosen": -30.825641632080078,
"logps/rejected": -31.800342559814453,
"loss": 0.6623,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.6672974228858948,
"rewards/margins": 0.8737428784370422,
"rewards/rejected": -0.20644548535346985,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 76.0,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 80.78196716308594,
"logits/rejected": 80.56169128417969,
"logps/chosen": -30.946773529052734,
"logps/rejected": -29.854522705078125,
"loss": 0.6812,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.6041832566261292,
"rewards/margins": 0.7354522943496704,
"rewards/rejected": -0.13126906752586365,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 60.25,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 80.99944305419922,
"logits/rejected": 80.91734313964844,
"logps/chosen": -32.94629669189453,
"logps/rejected": -32.49176025390625,
"loss": 0.5405,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.8105659484863281,
"rewards/margins": 1.1429924964904785,
"rewards/rejected": -0.3324265480041504,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 72.5,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 76.61840057373047,
"logits/rejected": 76.69322204589844,
"logps/chosen": -32.09418487548828,
"logps/rejected": -29.156299591064453,
"loss": 0.733,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.9180667996406555,
"rewards/margins": 0.9560586214065552,
"rewards/rejected": -0.03799174353480339,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 98.73394012451172,
"eval_logits/rejected": 98.70979309082031,
"eval_logps/chosen": -32.53404235839844,
"eval_logps/rejected": -36.16787338256836,
"eval_loss": 1.0123846530914307,
"eval_rewards/accuracies": 0.5398671627044678,
"eval_rewards/chosen": -0.06360965222120285,
"eval_rewards/margins": 0.07733342051506042,
"eval_rewards/rejected": -0.14094306528568268,
"eval_runtime": 104.0616,
"eval_samples_per_second": 3.296,
"eval_steps_per_second": 0.413,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 63.0,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 83.6187515258789,
"logits/rejected": 83.65609741210938,
"logps/chosen": -30.124670028686523,
"logps/rejected": -32.455326080322266,
"loss": 0.6913,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.5172038078308105,
"rewards/margins": 0.6960991024971008,
"rewards/rejected": -0.1788952797651291,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 53.5,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 81.21861267089844,
"logits/rejected": 81.21974182128906,
"logps/chosen": -30.413522720336914,
"logps/rejected": -29.050277709960938,
"loss": 0.5746,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.8295270204544067,
"rewards/margins": 0.921076774597168,
"rewards/rejected": -0.09154972434043884,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 42.5,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 78.42388153076172,
"logits/rejected": 78.47276306152344,
"logps/chosen": -28.912973403930664,
"logps/rejected": -32.85163116455078,
"loss": 0.5118,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 1.007744550704956,
"rewards/margins": 1.1594822406768799,
"rewards/rejected": -0.151737779378891,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 78.0,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 82.58137512207031,
"logits/rejected": 82.63166046142578,
"logps/chosen": -32.287044525146484,
"logps/rejected": -33.664268493652344,
"loss": 0.7276,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.6475721597671509,
"rewards/margins": 0.8544757962226868,
"rewards/rejected": -0.2069036066532135,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 56.75,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 81.68583679199219,
"logits/rejected": 81.69776916503906,
"logps/chosen": -32.62708282470703,
"logps/rejected": -33.35443115234375,
"loss": 0.626,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.7116604447364807,
"rewards/margins": 0.8955025672912598,
"rewards/rejected": -0.18384216725826263,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 45.5,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 83.15383911132812,
"logits/rejected": 83.18663024902344,
"logps/chosen": -28.314571380615234,
"logps/rejected": -31.6760196685791,
"loss": 0.6231,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.8663473129272461,
"rewards/margins": 0.8846683502197266,
"rewards/rejected": -0.01832098886370659,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 76.0,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 82.54973602294922,
"logits/rejected": 82.57438659667969,
"logps/chosen": -31.776432037353516,
"logps/rejected": -35.274253845214844,
"loss": 0.6836,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.7698130011558533,
"rewards/margins": 0.8852267265319824,
"rewards/rejected": -0.11541371047496796,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 72.0,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 76.53568267822266,
"logits/rejected": 76.40200805664062,
"logps/chosen": -29.737590789794922,
"logps/rejected": -28.21639060974121,
"loss": 0.7558,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.5869752168655396,
"rewards/margins": 0.6260467767715454,
"rewards/rejected": -0.03907149285078049,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.7429099231571347,
"train_runtime": 2555.3187,
"train_samples_per_second": 1.205,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}