aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Training in progress, step 1300
b70143b verified
raw
history blame
21.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 1.0,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -1.866829752922058,
"logits/rejected": -1.8711390495300293,
"logps/chosen": -36.97040557861328,
"logps/rejected": -33.66280746459961,
"loss": 0.9586,
"rewards/accuracies": 0.5972222089767456,
"rewards/chosen": 0.021715592592954636,
"rewards/margins": 0.041353289037942886,
"rewards/rejected": -0.01963769644498825,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -1.9976301193237305,
"logits/rejected": -2.0002708435058594,
"logps/chosen": -29.64252281188965,
"logps/rejected": -29.05857276916504,
"loss": 1.0031,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.00019791116937994957,
"rewards/margins": -0.003119796048849821,
"rewards/rejected": 0.0029218837153166533,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.9210872650146484,
"logits/rejected": -1.918402910232544,
"logps/chosen": -31.397838592529297,
"logps/rejected": -33.22719192504883,
"loss": 0.9838,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.010952453128993511,
"rewards/margins": 0.01619068905711174,
"rewards/rejected": -0.005238235928118229,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.017606258392334,
"logits/rejected": -2.0088634490966797,
"logps/chosen": -32.566612243652344,
"logps/rejected": -32.52539825439453,
"loss": 0.9862,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.006120447069406509,
"rewards/margins": 0.013754432089626789,
"rewards/rejected": -0.007633985485881567,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -1.8625805377960205,
"logits/rejected": -1.8518139123916626,
"logps/chosen": -33.56818771362305,
"logps/rejected": -35.4713020324707,
"loss": 0.9951,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0064262161031365395,
"rewards/margins": 0.004948456306010485,
"rewards/rejected": -0.011374671943485737,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -1.9413652420043945,
"logits/rejected": -1.9432977437973022,
"logps/chosen": -32.551727294921875,
"logps/rejected": -33.218013763427734,
"loss": 0.9525,
"rewards/accuracies": 0.5375000238418579,
"rewards/chosen": 0.02907358668744564,
"rewards/margins": 0.05705299973487854,
"rewards/rejected": -0.02797941491007805,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.0722384452819824,
"logits/rejected": -2.0772085189819336,
"logps/chosen": -33.995296478271484,
"logps/rejected": -36.63286590576172,
"loss": 0.9657,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.004085049033164978,
"rewards/margins": 0.034312374889850616,
"rewards/rejected": -0.038397423923015594,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -1.9325292110443115,
"logits/rejected": -1.93563973903656,
"logps/chosen": -34.35693359375,
"logps/rejected": -34.67926788330078,
"loss": 0.9069,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.042526502162218094,
"rewards/margins": 0.09463920444250107,
"rewards/rejected": -0.05211270600557327,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -1.9406629800796509,
"logits/rejected": -1.9451707601547241,
"logps/chosen": -32.38554000854492,
"logps/rejected": -32.36783218383789,
"loss": 0.9455,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.04912734776735306,
"rewards/margins": 0.05448007583618164,
"rewards/rejected": -0.005352728068828583,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -2.037841320037842,
"logits/rejected": -2.0358526706695557,
"logps/chosen": -32.14287185668945,
"logps/rejected": -31.300832748413086,
"loss": 0.9204,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.058778904378414154,
"rewards/margins": 0.07955379039049149,
"rewards/rejected": -0.02077488601207733,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.2316644191741943,
"eval_logits/rejected": -2.2268166542053223,
"eval_logps/chosen": -34.03582000732422,
"eval_logps/rejected": -37.548709869384766,
"eval_loss": 0.9816102385520935,
"eval_rewards/accuracies": 0.5394518375396729,
"eval_rewards/chosen": -0.0007606110884808004,
"eval_rewards/margins": 0.018493397161364555,
"eval_rewards/rejected": -0.019254004582762718,
"eval_runtime": 145.9929,
"eval_samples_per_second": 2.349,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -1.9917171001434326,
"logits/rejected": -1.9893367290496826,
"logps/chosen": -33.120052337646484,
"logps/rejected": -34.02777099609375,
"loss": 0.9329,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.0742575079202652,
"rewards/margins": 0.0732978954911232,
"rewards/rejected": 0.0009596304735168815,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -2.0032849311828613,
"logits/rejected": -1.9949623346328735,
"logps/chosen": -32.30955123901367,
"logps/rejected": -32.144927978515625,
"loss": 0.9308,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.0812125876545906,
"rewards/margins": 0.07126720249652863,
"rewards/rejected": 0.009945395402610302,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.031322956085205,
"logits/rejected": -2.023355007171631,
"logps/chosen": -30.32772445678711,
"logps/rejected": -32.05691909790039,
"loss": 0.9282,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.08879294246435165,
"rewards/margins": 0.09113965928554535,
"rewards/rejected": -0.002346712863072753,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -1.9619598388671875,
"logits/rejected": -1.9721866846084595,
"logps/chosen": -31.2375431060791,
"logps/rejected": -32.572418212890625,
"loss": 0.8702,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.1092713475227356,
"rewards/margins": 0.13179358839988708,
"rewards/rejected": -0.02252225950360298,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -1.8724457025527954,
"logits/rejected": -1.8736213445663452,
"logps/chosen": -33.905731201171875,
"logps/rejected": -34.80728530883789,
"loss": 0.818,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.17217601835727692,
"rewards/margins": 0.20219452679157257,
"rewards/rejected": -0.03001854196190834,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -1.923604965209961,
"logits/rejected": -1.9202001094818115,
"logps/chosen": -36.00878143310547,
"logps/rejected": -32.71224594116211,
"loss": 0.9086,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.09773740917444229,
"rewards/margins": 0.09143415838479996,
"rewards/rejected": 0.0063032531179487705,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.024691343307495,
"logits/rejected": -2.0173799991607666,
"logps/chosen": -33.495445251464844,
"logps/rejected": -31.415653228759766,
"loss": 0.7882,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.18144862353801727,
"rewards/margins": 0.21973037719726562,
"rewards/rejected": -0.03828175365924835,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.031024694442749,
"logits/rejected": -2.036294937133789,
"logps/chosen": -32.22355270385742,
"logps/rejected": -32.47669219970703,
"loss": 0.8225,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.19274193048477173,
"rewards/margins": 0.1876741349697113,
"rewards/rejected": 0.00506778946146369,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.0325100421905518,
"logits/rejected": -2.029745578765869,
"logps/chosen": -31.281234741210938,
"logps/rejected": -31.360708236694336,
"loss": 0.8564,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.12463297694921494,
"rewards/margins": 0.15012845396995544,
"rewards/rejected": -0.025495493784546852,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -1.902600646018982,
"logits/rejected": -1.9072542190551758,
"logps/chosen": -31.33133316040039,
"logps/rejected": -32.85138702392578,
"loss": 0.8045,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.16497036814689636,
"rewards/margins": 0.2012440711259842,
"rewards/rejected": -0.03627369925379753,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.2286624908447266,
"eval_logits/rejected": -2.2238309383392334,
"eval_logps/chosen": -34.062259674072266,
"eval_logps/rejected": -37.57956314086914,
"eval_loss": 0.9791061282157898,
"eval_rewards/accuracies": 0.5361295938491821,
"eval_rewards/chosen": -0.016622914001345634,
"eval_rewards/margins": 0.021141981706023216,
"eval_rewards/rejected": -0.03776489570736885,
"eval_runtime": 145.8001,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.0155138969421387,
"logits/rejected": -2.0261716842651367,
"logps/chosen": -31.7918758392334,
"logps/rejected": -33.9483528137207,
"loss": 0.8508,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.10765652358531952,
"rewards/margins": 0.15904827415943146,
"rewards/rejected": -0.051391761749982834,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -1.908071756362915,
"logits/rejected": -1.9228509664535522,
"logps/chosen": -29.81874656677246,
"logps/rejected": -31.65224266052246,
"loss": 0.7758,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.175676628947258,
"rewards/margins": 0.23211655020713806,
"rewards/rejected": -0.05643991753458977,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -1.9642655849456787,
"logits/rejected": -1.9682365655899048,
"logps/chosen": -33.13427734375,
"logps/rejected": -31.658504486083984,
"loss": 0.7961,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.16991479694843292,
"rewards/margins": 0.23543640971183777,
"rewards/rejected": -0.06552163511514664,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -1.9623470306396484,
"logits/rejected": -1.9405139684677124,
"logps/chosen": -33.870643615722656,
"logps/rejected": -35.13296127319336,
"loss": 0.7551,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.16986075043678284,
"rewards/margins": 0.279163658618927,
"rewards/rejected": -0.10930290073156357,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -2.003516674041748,
"logits/rejected": -2.0002071857452393,
"logps/chosen": -32.743778228759766,
"logps/rejected": -36.293540954589844,
"loss": 0.867,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.10877454280853271,
"rewards/margins": 0.14994443953037262,
"rewards/rejected": -0.041169874370098114,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -1.8702592849731445,
"logits/rejected": -1.867851972579956,
"logps/chosen": -33.99380111694336,
"logps/rejected": -35.564971923828125,
"loss": 0.85,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.11675956100225449,
"rewards/margins": 0.15297001600265503,
"rewards/rejected": -0.03621045500040054,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -1.8547359704971313,
"logits/rejected": -1.8523473739624023,
"logps/chosen": -34.247703552246094,
"logps/rejected": -31.8588809967041,
"loss": 0.8606,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.0899367406964302,
"rewards/margins": 0.14246629178524017,
"rewards/rejected": -0.05252955108880997,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -1.9580624103546143,
"logits/rejected": -1.9475984573364258,
"logps/chosen": -35.032501220703125,
"logps/rejected": -31.907541275024414,
"loss": 0.7731,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.19448420405387878,
"rewards/margins": 0.23441271483898163,
"rewards/rejected": -0.039928533136844635,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.0538430213928223,
"logits/rejected": -2.038924217224121,
"logps/chosen": -30.76409912109375,
"logps/rejected": -32.64435958862305,
"loss": 0.9184,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.09601466357707977,
"rewards/margins": 0.09152902662754059,
"rewards/rejected": 0.004485635552555323,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -1.9241282939910889,
"logits/rejected": -1.9215872287750244,
"logps/chosen": -32.469078063964844,
"logps/rejected": -30.92861557006836,
"loss": 0.7186,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.2748206555843353,
"rewards/margins": 0.34190455079078674,
"rewards/rejected": -0.06708388030529022,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.224278211593628,
"eval_logits/rejected": -2.219449996948242,
"eval_logps/chosen": -34.111026763916016,
"eval_logps/rejected": -37.63855743408203,
"eval_loss": 0.9731553196907043,
"eval_rewards/accuracies": 0.5070598125457764,
"eval_rewards/chosen": -0.045885536819696426,
"eval_rewards/margins": 0.02727569453418255,
"eval_rewards/rejected": -0.07316123694181442,
"eval_runtime": 145.8155,
"eval_samples_per_second": 2.352,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": -1.9088821411132812,
"logits/rejected": -1.9056317806243896,
"logps/chosen": -31.343881607055664,
"logps/rejected": -33.82904052734375,
"loss": 0.8131,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.15568041801452637,
"rewards/margins": 0.20781342685222626,
"rewards/rejected": -0.05213301628828049,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": -1.9583854675292969,
"logits/rejected": -1.9461708068847656,
"logps/chosen": -34.326744079589844,
"logps/rejected": -33.68773651123047,
"loss": 0.763,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.1615992784500122,
"rewards/margins": 0.250935435295105,
"rewards/rejected": -0.08933614194393158,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": -1.9934914112091064,
"logits/rejected": -1.9920612573623657,
"logps/chosen": -33.20575714111328,
"logps/rejected": -32.5851936340332,
"loss": 0.7771,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.16801992058753967,
"rewards/margins": 0.23138853907585144,
"rewards/rejected": -0.06336863338947296,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": -2.080026149749756,
"logits/rejected": -2.064359188079834,
"logps/chosen": -33.830657958984375,
"logps/rejected": -33.15457534790039,
"loss": 0.7811,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.20923766493797302,
"rewards/margins": 0.23116116225719452,
"rewards/rejected": -0.021923482418060303,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": -1.952505111694336,
"logits/rejected": -1.95168936252594,
"logps/chosen": -32.850311279296875,
"logps/rejected": -32.56792449951172,
"loss": 0.7213,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.25907084345817566,
"rewards/margins": 0.3207371234893799,
"rewards/rejected": -0.061666231602430344,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": -1.9081252813339233,
"logits/rejected": -1.918378472328186,
"logps/chosen": -31.902362823486328,
"logps/rejected": -35.33353805541992,
"loss": 0.7965,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.1813959777355194,
"rewards/margins": 0.20826482772827148,
"rewards/rejected": -0.026868879795074463,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": -2.0479800701141357,
"logits/rejected": -2.0415284633636475,
"logps/chosen": -33.33736038208008,
"logps/rejected": -29.26900863647461,
"loss": 0.8,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.18876628577709198,
"rewards/margins": 0.21431729197502136,
"rewards/rejected": -0.025551024824380875,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": -1.9078292846679688,
"logits/rejected": -1.9100587368011475,
"logps/chosen": -33.90921401977539,
"logps/rejected": -30.994491577148438,
"loss": 0.7663,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.19790422916412354,
"rewards/margins": 0.271023154258728,
"rewards/rejected": -0.07311896234750748,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.859264765157328,
"train_runtime": 3251.7628,
"train_samples_per_second": 0.947,
"train_steps_per_second": 0.118
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}