aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
bf3851f verified
raw
history blame
21.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 0.5,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -1.8668408393859863,
"logits/rejected": -1.8711602687835693,
"logps/chosen": -36.98978042602539,
"logps/rejected": -33.66878890991211,
"loss": 0.4962,
"rewards/accuracies": 0.5416666865348816,
"rewards/chosen": 0.006728413049131632,
"rewards/margins": 0.022216208279132843,
"rewards/rejected": -0.015487794764339924,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -1.9970359802246094,
"logits/rejected": -1.9996885061264038,
"logps/chosen": -29.635208129882812,
"logps/rejected": -29.063350677490234,
"loss": 0.4994,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.002794977743178606,
"rewards/margins": 0.002757500857114792,
"rewards/rejected": 3.747665323317051e-05,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.9208602905273438,
"logits/rejected": -1.9181534051895142,
"logps/chosen": -31.40317726135254,
"logps/rejected": -33.23335647583008,
"loss": 0.4982,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.005168457515537739,
"rewards/margins": 0.011126698926091194,
"rewards/rejected": -0.005958239547908306,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.017474889755249,
"logits/rejected": -2.008759021759033,
"logps/chosen": -32.54490661621094,
"logps/rejected": -32.49110794067383,
"loss": 0.4994,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": 0.012759355828166008,
"rewards/margins": 0.004133358132094145,
"rewards/rejected": 0.008625999093055725,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -1.8631575107574463,
"logits/rejected": -1.8523809909820557,
"logps/chosen": -33.509056091308594,
"logps/rejected": -35.39984893798828,
"loss": 0.5001,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.01936880685389042,
"rewards/margins": -0.001629653968848288,
"rewards/rejected": 0.020998459309339523,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -1.9419567584991455,
"logits/rejected": -1.9438903331756592,
"logps/chosen": -32.50743103027344,
"logps/rejected": -33.188419342041016,
"loss": 0.491,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.03710051625967026,
"rewards/margins": 0.04391607269644737,
"rewards/rejected": -0.006815555039793253,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.0737738609313965,
"logits/rejected": -2.0787465572357178,
"logps/chosen": -33.928836822509766,
"logps/rejected": -36.540794372558594,
"loss": 0.4971,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.023859737440943718,
"rewards/margins": 0.01262708194553852,
"rewards/rejected": 0.01123266015201807,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -1.934011697769165,
"logits/rejected": -1.9371341466903687,
"logps/chosen": -34.23366928100586,
"logps/rejected": -34.542274475097656,
"loss": 0.4864,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.07765541225671768,
"rewards/margins": 0.057602256536483765,
"rewards/rejected": 0.020053153857588768,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -1.9421571493148804,
"logits/rejected": -1.9466804265975952,
"logps/chosen": -32.304595947265625,
"logps/rejected": -32.284873962402344,
"loss": 0.4909,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.0651295855641365,
"rewards/margins": 0.035516757518053055,
"rewards/rejected": 0.02961282804608345,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -2.0406017303466797,
"logits/rejected": -2.038613796234131,
"logps/chosen": -32.027652740478516,
"logps/rejected": -31.224151611328125,
"loss": 0.4845,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.08527366816997528,
"rewards/margins": 0.0684497207403183,
"rewards/rejected": 0.01682395115494728,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.234653949737549,
"eval_logits/rejected": -2.229806423187256,
"eval_logps/chosen": -33.91841125488281,
"eval_logps/rejected": -37.42335891723633,
"eval_loss": 0.4981803297996521,
"eval_rewards/accuracies": 0.5510797500610352,
"eval_rewards/chosen": 0.04645563289523125,
"eval_rewards/margins": 0.009152057580649853,
"eval_rewards/rejected": 0.03730357065796852,
"eval_runtime": 145.8684,
"eval_samples_per_second": 2.351,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -1.9949238300323486,
"logits/rejected": -1.9925572872161865,
"logps/chosen": -32.99268341064453,
"logps/rejected": -33.90182113647461,
"loss": 0.4869,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.10045422613620758,
"rewards/margins": 0.049433451145887375,
"rewards/rejected": 0.051020748913288116,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -2.0055007934570312,
"logits/rejected": -1.997157335281372,
"logps/chosen": -32.192996978759766,
"logps/rejected": -32.01173400878906,
"loss": 0.491,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.10076460987329483,
"rewards/margins": 0.04085635766386986,
"rewards/rejected": 0.05990824103355408,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.034147262573242,
"logits/rejected": -2.026184558868408,
"logps/chosen": -30.194936752319336,
"logps/rejected": -31.905689239501953,
"loss": 0.4876,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.11231068521738052,
"rewards/margins": 0.05338172987103462,
"rewards/rejected": 0.0589289590716362,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -1.9643388986587524,
"logits/rejected": -1.9745395183563232,
"logps/chosen": -31.08599853515625,
"logps/rejected": -32.422943115234375,
"loss": 0.4795,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.13346607983112335,
"rewards/margins": 0.08869143575429916,
"rewards/rejected": 0.044774629175662994,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -1.8776963949203491,
"logits/rejected": -1.8788686990737915,
"logps/chosen": -33.690345764160156,
"logps/rejected": -34.572776794433594,
"loss": 0.4699,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.20093801617622375,
"rewards/margins": 0.12714678049087524,
"rewards/rejected": 0.07379122078418732,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -1.9286582469940186,
"logits/rejected": -1.9253017902374268,
"logps/chosen": -35.773475646972656,
"logps/rejected": -32.47566604614258,
"loss": 0.4851,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.15928010642528534,
"rewards/margins": 0.06044477969408035,
"rewards/rejected": 0.09883531928062439,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.0298352241516113,
"logits/rejected": -2.0225348472595215,
"logps/chosen": -33.22509002685547,
"logps/rejected": -31.19403648376465,
"loss": 0.4603,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.22910937666893005,
"rewards/margins": 0.165984109044075,
"rewards/rejected": 0.06312531232833862,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.036612033843994,
"logits/rejected": -2.041813373565674,
"logps/chosen": -31.960119247436523,
"logps/rejected": -32.171165466308594,
"loss": 0.4734,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.23386716842651367,
"rewards/margins": 0.1082783117890358,
"rewards/rejected": 0.12558886408805847,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.037198066711426,
"logits/rejected": -2.0344595909118652,
"logps/chosen": -31.026615142822266,
"logps/rejected": -31.082998275756836,
"loss": 0.4781,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.184935063123703,
"rewards/margins": 0.09084881842136383,
"rewards/rejected": 0.09408621490001678,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -1.908818006515503,
"logits/rejected": -1.913496732711792,
"logps/chosen": -31.075199127197266,
"logps/rejected": -32.616241455078125,
"loss": 0.466,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.2124340832233429,
"rewards/margins": 0.14255891740322113,
"rewards/rejected": 0.06987515836954117,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.23366379737854,
"eval_logits/rejected": -2.2288384437561035,
"eval_logps/chosen": -33.779903411865234,
"eval_logps/rejected": -37.29892349243164,
"eval_loss": 0.4965229630470276,
"eval_rewards/accuracies": 0.545265793800354,
"eval_rewards/chosen": 0.10185908526182175,
"eval_rewards/margins": 0.014780867844820023,
"eval_rewards/rejected": 0.08707821369171143,
"eval_runtime": 145.8117,
"eval_samples_per_second": 2.352,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.0208163261413574,
"logits/rejected": -2.0314111709594727,
"logps/chosen": -31.514019012451172,
"logps/rejected": -33.690643310546875,
"loss": 0.4728,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.18291443586349487,
"rewards/margins": 0.114091657102108,
"rewards/rejected": 0.06882277131080627,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -1.9141031503677368,
"logits/rejected": -1.9288082122802734,
"logps/chosen": -29.5712947845459,
"logps/rejected": -31.429983139038086,
"loss": 0.4615,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.2160976380109787,
"rewards/margins": 0.16482076048851013,
"rewards/rejected": 0.05127686262130737,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -1.9712814092636108,
"logits/rejected": -1.975285291671753,
"logps/chosen": -32.82429885864258,
"logps/rejected": -31.416866302490234,
"loss": 0.4561,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.23726816475391388,
"rewards/margins": 0.18429425358772278,
"rewards/rejected": 0.052973903715610504,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -1.969780683517456,
"logits/rejected": -1.9480432271957397,
"logps/chosen": -33.583518981933594,
"logps/rejected": -34.8461799621582,
"loss": 0.4567,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.22809162735939026,
"rewards/margins": 0.18624703586101532,
"rewards/rejected": 0.04184458404779434,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -2.0109124183654785,
"logits/rejected": -2.0076212882995605,
"logps/chosen": -32.46331024169922,
"logps/rejected": -35.97381591796875,
"loss": 0.4799,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.18470348417758942,
"rewards/margins": 0.08426074683666229,
"rewards/rejected": 0.10044274479150772,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -1.878321647644043,
"logits/rejected": -1.8758872747421265,
"logps/chosen": -33.721397399902344,
"logps/rejected": -35.270362854003906,
"loss": 0.4785,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.1868000328540802,
"rewards/margins": 0.09309660643339157,
"rewards/rejected": 0.09370341151952744,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -1.8631956577301025,
"logits/rejected": -1.8606828451156616,
"logps/chosen": -33.90094757080078,
"logps/rejected": -31.57466697692871,
"loss": 0.4722,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.198659747838974,
"rewards/margins": 0.11999478191137314,
"rewards/rejected": 0.07866497337818146,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -1.9663877487182617,
"logits/rejected": -1.9559704065322876,
"logps/chosen": -34.72657775878906,
"logps/rejected": -31.63601303100586,
"loss": 0.4585,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.25202488899230957,
"rewards/margins": 0.17003390192985535,
"rewards/rejected": 0.08199100196361542,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.062107801437378,
"logits/rejected": -2.0472733974456787,
"logps/chosen": -30.40212059020996,
"logps/rejected": -32.340721130371094,
"loss": 0.4799,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.2088027447462082,
"rewards/margins": 0.08435753732919693,
"rewards/rejected": 0.12444518506526947,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -1.9330482482910156,
"logits/rejected": -1.9305979013442993,
"logps/chosen": -32.06965255737305,
"logps/rejected": -30.65035629272461,
"loss": 0.4349,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.3429831862449646,
"rewards/margins": 0.27640262246131897,
"rewards/rejected": 0.06658058613538742,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.2318336963653564,
"eval_logits/rejected": -2.2270052433013916,
"eval_logps/chosen": -33.749427795410156,
"eval_logps/rejected": -37.27041244506836,
"eval_loss": 0.4961945414543152,
"eval_rewards/accuracies": 0.5357142686843872,
"eval_rewards/chosen": 0.11405016481876373,
"eval_rewards/margins": 0.015567691065371037,
"eval_rewards/rejected": 0.09848246723413467,
"eval_runtime": 145.5479,
"eval_samples_per_second": 2.357,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": -1.9179494380950928,
"logits/rejected": -1.9148216247558594,
"logps/chosen": -31.037755966186523,
"logps/rejected": -33.56406021118164,
"loss": 0.4639,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.22623713314533234,
"rewards/margins": 0.15500028431415558,
"rewards/rejected": 0.07123686373233795,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": -1.9682966470718384,
"logits/rejected": -1.9561439752578735,
"logps/chosen": -34.023921966552734,
"logps/rejected": -33.415985107421875,
"loss": 0.4575,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.22886168956756592,
"rewards/margins": 0.17971986532211304,
"rewards/rejected": 0.04914180561900139,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": -2.003788709640503,
"logits/rejected": -2.002437114715576,
"logps/chosen": -32.8862419128418,
"logps/rejected": -32.21629333496094,
"loss": 0.4679,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.23982055485248566,
"rewards/margins": 0.13450448215007782,
"rewards/rejected": 0.10531606525182724,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": -2.0905163288116455,
"logits/rejected": -2.0749027729034424,
"logps/chosen": -33.47309112548828,
"logps/rejected": -32.81334686279297,
"loss": 0.4618,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.2825208008289337,
"rewards/margins": 0.16064420342445374,
"rewards/rejected": 0.12187659740447998,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": -1.9627025127410889,
"logits/rejected": -1.9618685245513916,
"logps/chosen": -32.528175354003906,
"logps/rejected": -32.22235107421875,
"loss": 0.4522,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.30156710743904114,
"rewards/margins": 0.20444798469543457,
"rewards/rejected": 0.09711913019418716,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": -1.918320655822754,
"logits/rejected": -1.9285932779312134,
"logps/chosen": -31.606945037841797,
"logps/rejected": -34.98893356323242,
"loss": 0.4718,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.2390974760055542,
"rewards/margins": 0.11916828155517578,
"rewards/rejected": 0.11992917954921722,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": -2.0581445693969727,
"logits/rejected": -2.051628828048706,
"logps/chosen": -33.017967224121094,
"logps/rejected": -28.989696502685547,
"loss": 0.4612,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.253601610660553,
"rewards/margins": 0.15891048312187195,
"rewards/rejected": 0.09469114243984222,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": -1.9176830053329468,
"logits/rejected": -1.919847846031189,
"logps/chosen": -33.614540100097656,
"logps/rejected": -30.760555267333984,
"loss": 0.4519,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.24980488419532776,
"rewards/margins": 0.2049761563539505,
"rewards/rejected": 0.04482869431376457,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.4751699732495593,
"train_runtime": 3253.0203,
"train_samples_per_second": 0.947,
"train_steps_per_second": 0.118
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}