aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
87672b8 verified
raw
history blame
21.3 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-08,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 0.5,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.2820512820512818e-07,
"logits/chosen": -1.8662465810775757,
"logits/rejected": -1.8705615997314453,
"logps/chosen": -36.9855842590332,
"logps/rejected": -33.65031433105469,
"loss": 0.4935,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": 0.016807515174150467,
"rewards/margins": 0.03299880772829056,
"rewards/rejected": -0.01619129255414009,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.5641025641025636e-07,
"logits/chosen": -1.9972314834594727,
"logits/rejected": -1.999875783920288,
"logps/chosen": -29.622329711914062,
"logps/rejected": -29.04340171813965,
"loss": 0.4989,
"rewards/accuracies": 0.44999998807907104,
"rewards/chosen": 0.01589435711503029,
"rewards/margins": -0.00013793967082165182,
"rewards/rejected": 0.016032297164201736,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.8461538461538463e-07,
"logits/chosen": -1.9198119640350342,
"logits/rejected": -1.9171171188354492,
"logps/chosen": -31.40401268005371,
"logps/rejected": -33.211997985839844,
"loss": 0.4999,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.0096666868776083,
"rewards/margins": 0.0044937655329704285,
"rewards/rejected": 0.005172918550670147,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438433e-07,
"logits/chosen": -2.0162081718444824,
"logits/rejected": -2.007472515106201,
"logps/chosen": -32.587196350097656,
"logps/rejected": -32.514732360839844,
"loss": 0.5011,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.00831048097461462,
"rewards/margins": -0.00666379788890481,
"rewards/rejected": -0.0016466856468468904,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542186e-07,
"logits/chosen": -1.8641865253448486,
"logits/rejected": -1.8533992767333984,
"logps/chosen": -33.56541061401367,
"logps/rejected": -35.421974182128906,
"loss": 0.5049,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": -0.006343575660139322,
"rewards/margins": -0.030638951808214188,
"rewards/rejected": 0.02429538033902645,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941118e-07,
"logits/chosen": -1.945642113685608,
"logits/rejected": -1.9475781917572021,
"logps/chosen": -32.572914123535156,
"logps/rejected": -33.161590576171875,
"loss": 0.4972,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.021815191954374313,
"rewards/margins": 0.01398499310016632,
"rewards/rejected": 0.007830199785530567,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413548e-07,
"logits/chosen": -2.0798556804656982,
"logits/rejected": -2.0848286151885986,
"logps/chosen": -33.97870635986328,
"logps/rejected": -36.580543518066406,
"loss": 0.4965,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.007821302860975266,
"rewards/margins": 0.017155062407255173,
"rewards/rejected": -0.009333762340247631,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-07,
"logits/chosen": -1.9424225091934204,
"logits/rejected": -1.9455715417861938,
"logps/chosen": -34.389671325683594,
"logps/rejected": -34.575042724609375,
"loss": 0.4968,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.030510548502206802,
"rewards/margins": 0.016619805246591568,
"rewards/rejected": 0.013890743255615234,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.736716601303429e-07,
"logits/chosen": -1.9505561590194702,
"logits/rejected": -1.9550676345825195,
"logps/chosen": -32.456565856933594,
"logps/rejected": -32.361209869384766,
"loss": 0.4976,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.008682352490723133,
"rewards/margins": 0.0105207534506917,
"rewards/rejected": -0.0018384016584604979,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.62624545834521e-07,
"logits/chosen": -2.048992156982422,
"logits/rejected": -2.0470006465911865,
"logps/chosen": -32.2261848449707,
"logps/rejected": -31.262670516967773,
"loss": 0.4981,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.01171906292438507,
"rewards/margins": 0.008886159397661686,
"rewards/rejected": 0.0028329025954008102,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.244107484817505,
"eval_logits/rejected": -2.239222764968872,
"eval_logps/chosen": -34.02288818359375,
"eval_logps/rejected": -37.50926208496094,
"eval_loss": 0.49965521693229675,
"eval_rewards/accuracies": 0.5078904032707214,
"eval_rewards/chosen": 0.009330343455076218,
"eval_rewards/margins": 0.003444999223574996,
"eval_rewards/rejected": 0.005885345861315727,
"eval_runtime": 146.2405,
"eval_samples_per_second": 2.345,
"eval_steps_per_second": 0.294,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.4982572012636904e-07,
"logits/chosen": -2.005356788635254,
"logits/rejected": -2.0029444694519043,
"logps/chosen": -33.24960708618164,
"logps/rejected": -34.023834228515625,
"loss": 0.5009,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.004633937496691942,
"rewards/margins": -0.009062351658940315,
"rewards/rejected": 0.00442841649055481,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777677e-07,
"logits/chosen": -2.017059564590454,
"logits/rejected": -2.008686065673828,
"logps/chosen": -32.44651412963867,
"logps/rejected": -32.1725959777832,
"loss": 0.4978,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0012888375204056501,
"rewards/margins": 0.007584270089864731,
"rewards/rejected": -0.008873110637068748,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.194082707715275e-07,
"logits/chosen": -2.0462288856506348,
"logits/rejected": -2.038172960281372,
"logps/chosen": -30.503637313842773,
"logps/rejected": -32.05141067504883,
"loss": 0.5043,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": -0.022338179871439934,
"rewards/margins": -0.023616474121809006,
"rewards/rejected": 0.0012782930862158537,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.020402418666621e-07,
"logits/chosen": -1.9769847393035889,
"logits/rejected": -1.9872528314590454,
"logps/chosen": -31.388320922851562,
"logps/rejected": -32.554039001464844,
"loss": 0.4928,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.02507348358631134,
"rewards/margins": 0.04039759188890457,
"rewards/rejected": -0.015324106439948082,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.8341962650351185e-07,
"logits/chosen": -1.8903350830078125,
"logits/rejected": -1.8914152383804321,
"logps/chosen": -34.154296875,
"logps/rejected": -34.76646423339844,
"loss": 0.4927,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.030716974288225174,
"rewards/margins": 0.03808692842721939,
"rewards/rejected": -0.007369957864284515,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800572e-07,
"logits/chosen": -1.9426672458648682,
"logits/rejected": -1.939186453819275,
"logps/chosen": -36.157989501953125,
"logps/rejected": -32.7253303527832,
"loss": 0.4972,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.010952227748930454,
"rewards/margins": 0.013016450218856335,
"rewards/rejected": -0.0020642229355871677,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.430433172111807e-07,
"logits/chosen": -2.0421078205108643,
"logits/rejected": -2.0347187519073486,
"logps/chosen": -33.78765106201172,
"logps/rejected": -31.363611221313477,
"loss": 0.496,
"rewards/accuracies": 0.4625000059604645,
"rewards/chosen": 0.008164674043655396,
"rewards/margins": 0.0175738874822855,
"rewards/rejected": -0.009409213438630104,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.216202642830543e-07,
"logits/chosen": -2.0476274490356445,
"logits/rejected": -2.0528929233551025,
"logps/chosen": -32.52192306518555,
"logps/rejected": -32.50886917114258,
"loss": 0.4919,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.01828894577920437,
"rewards/margins": 0.037272557616233826,
"rewards/rejected": -0.018983609974384308,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.9960716642946403e-07,
"logits/chosen": -2.0479583740234375,
"logits/rejected": -2.0451717376708984,
"logps/chosen": -31.4959774017334,
"logps/rejected": -31.333343505859375,
"loss": 0.4986,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.005620955489575863,
"rewards/margins": 0.006484903395175934,
"rewards/rejected": -0.012105859816074371,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.771853789806683e-07,
"logits/chosen": -1.9188188314437866,
"logits/rejected": -1.9234987497329712,
"logps/chosen": -31.607952117919922,
"logps/rejected": -32.79724884033203,
"loss": 0.4987,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.0013334359973669052,
"rewards/margins": 0.0037218607030808926,
"rewards/rejected": -0.005055299494415522,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.243473768234253,
"eval_logits/rejected": -2.2385945320129395,
"eval_logps/chosen": -34.013954162597656,
"eval_logps/rejected": -37.49945068359375,
"eval_loss": 0.49993959069252014,
"eval_rewards/accuracies": 0.5074750781059265,
"eval_rewards/chosen": 0.016473928466439247,
"eval_rewards/margins": 0.0027353279292583466,
"eval_rewards/rejected": 0.0137386005371809,
"eval_runtime": 145.8931,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402e-07,
"logits/chosen": -2.0315659046173096,
"logits/rejected": -2.0422449111938477,
"logps/chosen": -31.945932388305664,
"logps/rejected": -33.85708999633789,
"loss": 0.497,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.020296860486268997,
"rewards/margins": 0.015811126679182053,
"rewards/rejected": 0.0044857352040708065,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.318564697655179e-07,
"logits/chosen": -1.9255174398422241,
"logits/rejected": -1.9403587579727173,
"logps/chosen": -30.07940101623535,
"logps/rejected": -31.557880401611328,
"loss": 0.4944,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.02570931240916252,
"rewards/margins": 0.02547053061425686,
"rewards/rejected": 0.00023878086358308792,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.093227910899832e-07,
"logits/chosen": -1.9828882217407227,
"logits/rejected": -1.986853003501892,
"logps/chosen": -33.40575408935547,
"logps/rejected": -31.552501678466797,
"loss": 0.4976,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.00937105156481266,
"rewards/margins": 0.011929656378924847,
"rewards/rejected": -0.0025586034171283245,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279356e-07,
"logits/chosen": -1.982568383216858,
"logits/rejected": -1.9606094360351562,
"logps/chosen": -34.157630920410156,
"logps/rejected": -34.96028518676758,
"loss": 0.4991,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": -0.00310176657512784,
"rewards/margins": 0.004490714054554701,
"rewards/rejected": -0.007592480629682541,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.654436768970182e-07,
"logits/chosen": -2.024078845977783,
"logits/rejected": -2.0207676887512207,
"logps/chosen": -32.89586639404297,
"logps/rejected": -36.22296905517578,
"loss": 0.4952,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.023358644917607307,
"rewards/margins": 0.02179408073425293,
"rewards/rejected": 0.0015645644161850214,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.444597403062196e-07,
"logits/chosen": -1.8911396265029907,
"logits/rejected": -1.8886913061141968,
"logps/chosen": -34.1867790222168,
"logps/rejected": -35.52009201049805,
"loss": 0.4978,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.0012989563401788473,
"rewards/margins": 0.013674641028046608,
"rewards/rejected": -0.012375684455037117,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.2434529917578887e-07,
"logits/chosen": -1.8761117458343506,
"logits/rejected": -1.8735707998275757,
"logps/chosen": -34.38152313232422,
"logps/rejected": -31.744131088256836,
"loss": 0.5019,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.012860517017543316,
"rewards/margins": -0.008900386281311512,
"rewards/rejected": 0.02176090143620968,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603521e-07,
"logits/chosen": -1.979835867881775,
"logits/rejected": -1.9692022800445557,
"logps/chosen": -35.314170837402344,
"logps/rejected": -31.835962295532227,
"loss": 0.4935,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.033973388373851776,
"rewards/margins": 0.029950443655252457,
"rewards/rejected": 0.004022946115583181,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071453e-08,
"logits/chosen": -2.075986385345459,
"logits/rejected": -2.0609803199768066,
"logps/chosen": -30.91201400756836,
"logps/rejected": -32.63774871826172,
"loss": 0.5005,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.009686904028058052,
"rewards/margins": -0.0015827339375391603,
"rewards/rejected": 0.011269642040133476,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-08,
"logits/chosen": -1.9475170373916626,
"logits/rejected": -1.944972038269043,
"logps/chosen": -32.88249969482422,
"logps/rejected": -30.8377685546875,
"loss": 0.4901,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.03568952530622482,
"rewards/margins": 0.052456192672252655,
"rewards/rejected": -0.01676667109131813,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.244415283203125,
"eval_logits/rejected": -2.2395384311676025,
"eval_logps/chosen": -34.01211166381836,
"eval_logps/rejected": -37.495113372802734,
"eval_loss": 0.5000770092010498,
"eval_rewards/accuracies": 0.5045680999755859,
"eval_rewards/chosen": 0.01795424334704876,
"eval_rewards/margins": 0.0007461770437657833,
"eval_rewards/rejected": 0.01720806397497654,
"eval_runtime": 145.9415,
"eval_samples_per_second": 2.35,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 5.576113578589034e-08,
"logits/chosen": -1.9293571710586548,
"logits/rejected": -1.926099419593811,
"logps/chosen": -31.5543212890625,
"logps/rejected": -33.74175262451172,
"loss": 0.4912,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.039222296327352524,
"rewards/margins": 0.03890404850244522,
"rewards/rejected": 0.00031825463520362973,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.229036944380912e-08,
"logits/chosen": -1.9810116291046143,
"logits/rejected": -1.9687116146087646,
"logps/chosen": -34.56316375732422,
"logps/rejected": -33.561100006103516,
"loss": 0.4923,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.026327725499868393,
"rewards/margins": 0.04413483291864395,
"rewards/rejected": -0.01780710555613041,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 3.053082288996112e-08,
"logits/chosen": -2.0162246227264404,
"logits/rejected": -2.014770269393921,
"logps/chosen": -33.477134704589844,
"logps/rejected": -32.470088958740234,
"loss": 0.4997,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.006926077418029308,
"rewards/margins": -0.0006712455069646239,
"rewards/rejected": 0.007597323507070541,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 2.05793773749158e-08,
"logits/chosen": -2.103529214859009,
"logits/rejected": -2.0877299308776855,
"logps/chosen": -34.1456298828125,
"logps/rejected": -33.083770751953125,
"loss": 0.5012,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.027009794488549232,
"rewards/margins": -0.0004009060503449291,
"rewards/rejected": 0.02741069719195366,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 1.251801807404168e-08,
"logits/chosen": -1.9753166437149048,
"logits/rejected": -1.974352478981018,
"logps/chosen": -33.25692367553711,
"logps/rejected": -32.45876693725586,
"loss": 0.4973,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.02013799361884594,
"rewards/margins": 0.015033388510346413,
"rewards/rejected": 0.005104603711515665,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 6.41315865106129e-09,
"logits/chosen": -1.931349515914917,
"logits/rejected": -1.941706895828247,
"logps/chosen": -32.206031799316406,
"logps/rejected": -35.306983947753906,
"loss": 0.4989,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.001073227496817708,
"rewards/margins": 0.013507463037967682,
"rewards/rejected": -0.01458069123327732,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 2.3150941078050324e-09,
"logits/chosen": -2.070504665374756,
"logits/rejected": -2.0639472007751465,
"logps/chosen": -33.65058135986328,
"logps/rejected": -29.202342987060547,
"loss": 0.504,
"rewards/accuracies": 0.4124999940395355,
"rewards/chosen": 0.0011108577018603683,
"rewards/margins": -0.018153894692659378,
"rewards/rejected": 0.01926475390791893,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 2.575864278703266e-10,
"logits/chosen": -1.9301570653915405,
"logits/rejected": -1.9323084354400635,
"logps/chosen": -34.23206329345703,
"logps/rejected": -30.903820037841797,
"loss": 0.4945,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.005591380409896374,
"rewards/margins": 0.03054152801632881,
"rewards/rejected": -0.024950148537755013,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.10985468208015739,
"train_runtime": 628.1848,
"train_samples_per_second": 4.901,
"train_steps_per_second": 0.613
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}