aftonposten-6b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
d1c03c1 verified
raw
history blame
21.2 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": -1.7278180122375488,
"logits/rejected": -1.7377450466156006,
"logps/chosen": -29.553977966308594,
"logps/rejected": -42.813133239746094,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": -1.8668782711029053,
"logits/rejected": -1.8712005615234375,
"logps/chosen": -36.98646545410156,
"logps/rejected": -33.67870330810547,
"loss": 0.6747,
"rewards/accuracies": 0.5833333134651184,
"rewards/chosen": 0.012081857770681381,
"rewards/margins": 0.04126200079917908,
"rewards/rejected": -0.029180139303207397,
"step": 10
},
{
"epoch": 0.05,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": -1.9979515075683594,
"logits/rejected": -2.0005881786346436,
"logps/chosen": -29.662744522094727,
"logps/rejected": -29.051654815673828,
"loss": 0.7042,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.01232814695686102,
"rewards/margins": -0.019403135403990746,
"rewards/rejected": 0.007074988447129726,
"step": 20
},
{
"epoch": 0.08,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": -1.921088457107544,
"logits/rejected": -1.9184081554412842,
"logps/chosen": -31.383258819580078,
"logps/rejected": -33.23828887939453,
"loss": 0.6794,
"rewards/accuracies": 0.5874999761581421,
"rewards/chosen": 0.019701367244124413,
"rewards/margins": 0.03159697726368904,
"rewards/rejected": -0.011895612813532352,
"step": 30
},
{
"epoch": 0.1,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": -2.017509937286377,
"logits/rejected": -2.0087647438049316,
"logps/chosen": -32.577518463134766,
"logps/rejected": -32.509830474853516,
"loss": 0.6964,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": -0.00042539089918136597,
"rewards/margins": -0.0021285698749125004,
"rewards/rejected": 0.001703177229501307,
"step": 40
},
{
"epoch": 0.13,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": -1.8623021841049194,
"logits/rejected": -1.8515303134918213,
"logps/chosen": -33.56303787231445,
"logps/rejected": -35.47795867919922,
"loss": 0.6896,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.0033326249103993177,
"rewards/margins": 0.012034483253955841,
"rewards/rejected": -0.015367108397185802,
"step": 50
},
{
"epoch": 0.16,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": -1.9411529302597046,
"logits/rejected": -1.943098783493042,
"logps/chosen": -32.549232482910156,
"logps/rejected": -33.20621109008789,
"loss": 0.6747,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.030568569898605347,
"rewards/margins": 0.051466990262269974,
"rewards/rejected": -0.02089841663837433,
"step": 60
},
{
"epoch": 0.18,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": -2.0727076530456543,
"logits/rejected": -2.07767391204834,
"logps/chosen": -33.96394729614258,
"logps/rejected": -36.61058807373047,
"loss": 0.6813,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": 0.014724211767315865,
"rewards/margins": 0.03975607082247734,
"rewards/rejected": -0.025031859055161476,
"step": 70
},
{
"epoch": 0.21,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": -1.9330183267593384,
"logits/rejected": -1.936171293258667,
"logps/chosen": -34.318397521972656,
"logps/rejected": -34.61701583862305,
"loss": 0.6634,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.06564848870038986,
"rewards/margins": 0.08041460067033768,
"rewards/rejected": -0.014766111969947815,
"step": 80
},
{
"epoch": 0.23,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": -1.9412405490875244,
"logits/rejected": -1.945755958557129,
"logps/chosen": -32.374977111816406,
"logps/rejected": -32.33773422241211,
"loss": 0.6787,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.05546582490205765,
"rewards/margins": 0.04276125878095627,
"rewards/rejected": 0.012704563327133656,
"step": 90
},
{
"epoch": 0.26,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": -2.0381903648376465,
"logits/rejected": -2.0362119674682617,
"logps/chosen": -32.13701629638672,
"logps/rejected": -31.295801162719727,
"loss": 0.6595,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.06229018047451973,
"rewards/margins": 0.08004496991634369,
"rewards/rejected": -0.017754793167114258,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": -2.2335634231567383,
"eval_logits/rejected": -2.228721857070923,
"eval_logps/chosen": -34.00627899169922,
"eval_logps/rejected": -37.514984130859375,
"eval_loss": 0.6900185346603394,
"eval_rewards/accuracies": 0.5253322720527649,
"eval_rewards/chosen": 0.016964510083198547,
"eval_rewards/margins": 0.015982570126652718,
"eval_rewards/rejected": 0.0009819410042837262,
"eval_runtime": 145.7969,
"eval_samples_per_second": 2.353,
"eval_steps_per_second": 0.295,
"step": 100
},
{
"epoch": 0.29,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": -1.9929395914077759,
"logits/rejected": -1.99057936668396,
"logps/chosen": -33.06591796875,
"logps/rejected": -34.01008605957031,
"loss": 0.6678,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.10673947632312775,
"rewards/margins": 0.09516827017068863,
"rewards/rejected": 0.011571208015084267,
"step": 110
},
{
"epoch": 0.31,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": -2.0045437812805176,
"logits/rejected": -1.9962146282196045,
"logps/chosen": -32.33503341674805,
"logps/rejected": -32.12450408935547,
"loss": 0.68,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.06592197716236115,
"rewards/margins": 0.043721526861190796,
"rewards/rejected": 0.02220045030117035,
"step": 120
},
{
"epoch": 0.34,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": -2.032952308654785,
"logits/rejected": -2.024994134902954,
"logps/chosen": -30.302433013916016,
"logps/rejected": -32.04313278198242,
"loss": 0.6609,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.10397056490182877,
"rewards/margins": 0.09804753214120865,
"rewards/rejected": 0.0059230271726846695,
"step": 130
},
{
"epoch": 0.36,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": -1.9635902643203735,
"logits/rejected": -1.9738080501556396,
"logps/chosen": -31.201534271240234,
"logps/rejected": -32.556739807128906,
"loss": 0.6343,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.13087674975395203,
"rewards/margins": 0.1439923495054245,
"rewards/rejected": -0.013115609996020794,
"step": 140
},
{
"epoch": 0.39,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": -1.8755298852920532,
"logits/rejected": -1.876691222190857,
"logps/chosen": -33.89242172241211,
"logps/rejected": -34.753570556640625,
"loss": 0.6272,
"rewards/accuracies": 0.612500011920929,
"rewards/chosen": 0.1801605522632599,
"rewards/margins": 0.17795029282569885,
"rewards/rejected": 0.002210266888141632,
"step": 150
},
{
"epoch": 0.42,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": -1.9265025854110718,
"logits/rejected": -1.9230976104736328,
"logps/chosen": -35.98413848876953,
"logps/rejected": -32.69154357910156,
"loss": 0.6574,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": 0.11252401769161224,
"rewards/margins": 0.09379850327968597,
"rewards/rejected": 0.01872551441192627,
"step": 160
},
{
"epoch": 0.44,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": -2.0262577533721924,
"logits/rejected": -2.0189335346221924,
"logps/chosen": -33.45969772338867,
"logps/rejected": -31.372516632080078,
"loss": 0.6076,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.20289841294288635,
"rewards/margins": 0.21529710292816162,
"rewards/rejected": -0.012398697435855865,
"step": 170
},
{
"epoch": 0.47,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": -2.0338973999023438,
"logits/rejected": -2.0391509532928467,
"logps/chosen": -32.194793701171875,
"logps/rejected": -32.42069625854492,
"loss": 0.6221,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.20999665558338165,
"rewards/margins": 0.17132976651191711,
"rewards/rejected": 0.03866690397262573,
"step": 180
},
{
"epoch": 0.49,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": -2.035045862197876,
"logits/rejected": -2.032278060913086,
"logps/chosen": -31.230976104736328,
"logps/rejected": -31.29391860961914,
"loss": 0.6403,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.1547853648662567,
"rewards/margins": 0.14020755887031555,
"rewards/rejected": 0.014577840454876423,
"step": 190
},
{
"epoch": 0.52,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": -1.9042125940322876,
"logits/rejected": -1.9088417291641235,
"logps/chosen": -31.285167694091797,
"logps/rejected": -32.79944610595703,
"loss": 0.6189,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.192669078707695,
"rewards/margins": 0.1977730393409729,
"rewards/rejected": -0.0051039643585681915,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": -2.230701446533203,
"eval_logits/rejected": -2.2258734703063965,
"eval_logps/chosen": -34.0167350769043,
"eval_logps/rejected": -37.53435134887695,
"eval_loss": 0.6894960403442383,
"eval_rewards/accuracies": 0.5340532064437866,
"eval_rewards/chosen": 0.010691030882298946,
"eval_rewards/margins": 0.02133062295615673,
"eval_rewards/rejected": -0.010639593005180359,
"eval_runtime": 145.5559,
"eval_samples_per_second": 2.356,
"eval_steps_per_second": 0.295,
"step": 200
},
{
"epoch": 0.55,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": -2.018108367919922,
"logits/rejected": -2.0287561416625977,
"logps/chosen": -31.745798110961914,
"logps/rejected": -33.90629577636719,
"loss": 0.6305,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.13530384004116058,
"rewards/margins": 0.16146349906921387,
"rewards/rejected": -0.026159662753343582,
"step": 210
},
{
"epoch": 0.57,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": -1.9105422496795654,
"logits/rejected": -1.9253056049346924,
"logps/chosen": -29.849069595336914,
"logps/rejected": -31.58382797241211,
"loss": 0.6251,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.15748274326324463,
"rewards/margins": 0.17287404835224152,
"rewards/rejected": -0.015391310676932335,
"step": 220
},
{
"epoch": 0.6,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": -1.966581106185913,
"logits/rejected": -1.9705555438995361,
"logps/chosen": -33.059837341308594,
"logps/rejected": -31.605152130126953,
"loss": 0.6,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.21457910537719727,
"rewards/margins": 0.2480895221233368,
"rewards/rejected": -0.033510446548461914,
"step": 230
},
{
"epoch": 0.62,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": -1.9644801616668701,
"logits/rejected": -1.9426641464233398,
"logps/chosen": -33.819881439208984,
"logps/rejected": -35.105430603027344,
"loss": 0.5826,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.20031993091106415,
"rewards/margins": 0.2930986285209656,
"rewards/rejected": -0.09277870506048203,
"step": 240
},
{
"epoch": 0.65,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": -2.005814552307129,
"logits/rejected": -2.002516746520996,
"logps/chosen": -32.663055419921875,
"logps/rejected": -36.233436584472656,
"loss": 0.632,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.15720662474632263,
"rewards/margins": 0.16231317818164825,
"rewards/rejected": -0.0051065413281321526,
"step": 250
},
{
"epoch": 0.68,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": -1.8735284805297852,
"logits/rejected": -1.8710968494415283,
"logps/chosen": -33.964805603027344,
"logps/rejected": -35.50111389160156,
"loss": 0.6428,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.13415411114692688,
"rewards/margins": 0.13204893469810486,
"rewards/rejected": 0.0021051731891930103,
"step": 260
},
{
"epoch": 0.7,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": -1.8585532903671265,
"logits/rejected": -1.8561296463012695,
"logps/chosen": -34.17288589477539,
"logps/rejected": -31.792491912841797,
"loss": 0.6391,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.13482868671417236,
"rewards/margins": 0.14752644300460815,
"rewards/rejected": -0.012697766534984112,
"step": 270
},
{
"epoch": 0.73,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": -1.9624073505401611,
"logits/rejected": -1.9519150257110596,
"logps/chosen": -34.95975875854492,
"logps/rejected": -31.84967041015625,
"loss": 0.5947,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.2381301373243332,
"rewards/margins": 0.2433377206325531,
"rewards/rejected": -0.005207589361816645,
"step": 280
},
{
"epoch": 0.75,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": -2.056440830230713,
"logits/rejected": -2.0415446758270264,
"logps/chosen": -30.704137802124023,
"logps/rejected": -32.5970458984375,
"loss": 0.6654,
"rewards/accuracies": 0.5625,
"rewards/chosen": 0.13198992609977722,
"rewards/margins": 0.09911760687828064,
"rewards/rejected": 0.03287229686975479,
"step": 290
},
{
"epoch": 0.78,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": -1.927122712135315,
"logits/rejected": -1.924602746963501,
"logps/chosen": -32.37379455566406,
"logps/rejected": -30.879268646240234,
"loss": 0.5582,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.3319894075393677,
"rewards/margins": 0.3694665729999542,
"rewards/rejected": -0.03747714310884476,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": -2.226900815963745,
"eval_logits/rejected": -2.2220816612243652,
"eval_logps/chosen": -34.03526306152344,
"eval_logps/rejected": -37.56266403198242,
"eval_loss": 0.6872054934501648,
"eval_rewards/accuracies": 0.5598006844520569,
"eval_rewards/chosen": -0.0004257837135810405,
"eval_rewards/margins": 0.027201363816857338,
"eval_rewards/rejected": -0.027627145871520042,
"eval_runtime": 145.6667,
"eval_samples_per_second": 2.355,
"eval_steps_per_second": 0.295,
"step": 300
},
{
"epoch": 0.81,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": -1.9124987125396729,
"logits/rejected": -1.9092620611190796,
"logps/chosen": -31.285348892211914,
"logps/rejected": -33.75069046020508,
"loss": 0.6188,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.19080111384391785,
"rewards/margins": 0.1959228217601776,
"rewards/rejected": -0.0051217032596468925,
"step": 310
},
{
"epoch": 0.83,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": -1.9616212844848633,
"logits/rejected": -1.9494349956512451,
"logps/chosen": -34.30357360839844,
"logps/rejected": -33.64542770385742,
"loss": 0.5999,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.17549821734428406,
"rewards/margins": 0.23945149779319763,
"rewards/rejected": -0.06395327299833298,
"step": 320
},
{
"epoch": 0.86,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": -1.997859239578247,
"logits/rejected": -1.9964158535003662,
"logps/chosen": -33.130615234375,
"logps/rejected": -32.511531829833984,
"loss": 0.6073,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.21310487389564514,
"rewards/margins": 0.23227325081825256,
"rewards/rejected": -0.01916835829615593,
"step": 330
},
{
"epoch": 0.88,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": -2.083962917327881,
"logits/rejected": -2.06827449798584,
"logps/chosen": -33.732425689697266,
"logps/rejected": -33.07551193237305,
"loss": 0.5985,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.2681804299354553,
"rewards/margins": 0.24266552925109863,
"rewards/rejected": 0.025514895096421242,
"step": 340
},
{
"epoch": 0.91,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": -1.956364631652832,
"logits/rejected": -1.9555118083953857,
"logps/chosen": -32.79610824584961,
"logps/rejected": -32.512969970703125,
"loss": 0.5758,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.2915918231010437,
"rewards/margins": 0.3202829957008362,
"rewards/rejected": -0.028691178187727928,
"step": 350
},
{
"epoch": 0.94,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": -1.9118818044662476,
"logits/rejected": -1.9221813678741455,
"logps/chosen": -31.855304718017578,
"logps/rejected": -35.32331848144531,
"loss": 0.6036,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.2096320390701294,
"rewards/margins": 0.23036828637123108,
"rewards/rejected": -0.02073623239994049,
"step": 360
},
{
"epoch": 0.96,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": -2.0520732402801514,
"logits/rejected": -2.0456154346466064,
"logps/chosen": -33.327049255371094,
"logps/rejected": -29.233760833740234,
"loss": 0.6124,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.19495204091072083,
"rewards/margins": 0.19935496151447296,
"rewards/rejected": -0.004402949940413237,
"step": 370
},
{
"epoch": 0.99,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": -1.911370038986206,
"logits/rejected": -1.9135433435440063,
"logps/chosen": -33.83781814575195,
"logps/rejected": -30.931133270263672,
"loss": 0.5854,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.2407412976026535,
"rewards/margins": 0.27584755420684814,
"rewards/rejected": -0.035106249153614044,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.6364745784115482,
"train_runtime": 3251.6479,
"train_samples_per_second": 0.947,
"train_steps_per_second": 0.118
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}