ap-normistral-7b-align-scan / trainer_state.json
hugodk-sch's picture
Model save
141056b verified
raw
history blame
22 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 100,
"global_step": 385,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0,
"grad_norm": 45.75,
"learning_rate": 1.282051282051282e-07,
"logits/chosen": 88.18099975585938,
"logits/rejected": 88.25153350830078,
"logps/chosen": -29.073104858398438,
"logps/rejected": -26.25731658935547,
"loss": 0.6931,
"rewards/accuracies": 0.0,
"rewards/chosen": 0.0,
"rewards/margins": 0.0,
"rewards/rejected": 0.0,
"step": 1
},
{
"epoch": 0.03,
"grad_norm": 38.75,
"learning_rate": 1.282051282051282e-06,
"logits/chosen": 81.08214569091797,
"logits/rejected": 80.78972625732422,
"logps/chosen": -34.26863098144531,
"logps/rejected": -33.00303649902344,
"loss": 0.7238,
"rewards/accuracies": 0.4861111044883728,
"rewards/chosen": -0.018833572044968605,
"rewards/margins": 0.012407698668539524,
"rewards/rejected": -0.03124127723276615,
"step": 10
},
{
"epoch": 0.05,
"grad_norm": 37.25,
"learning_rate": 2.564102564102564e-06,
"logits/chosen": 80.68824005126953,
"logits/rejected": 80.57817840576172,
"logps/chosen": -33.58771514892578,
"logps/rejected": -30.75152015686035,
"loss": 0.7285,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.0715368390083313,
"rewards/margins": 0.05335939675569534,
"rewards/rejected": 0.01817743293941021,
"step": 20
},
{
"epoch": 0.08,
"grad_norm": 39.0,
"learning_rate": 3.846153846153847e-06,
"logits/chosen": 82.5134506225586,
"logits/rejected": 82.5453872680664,
"logps/chosen": -33.79930877685547,
"logps/rejected": -31.215984344482422,
"loss": 0.7655,
"rewards/accuracies": 0.42500001192092896,
"rewards/chosen": 0.14945873618125916,
"rewards/margins": 0.00396394869312644,
"rewards/rejected": 0.14549477398395538,
"step": 30
},
{
"epoch": 0.1,
"grad_norm": 44.5,
"learning_rate": 4.999896948438434e-06,
"logits/chosen": 81.0338363647461,
"logits/rejected": 81.03011322021484,
"logps/chosen": -32.87316131591797,
"logps/rejected": -33.17707061767578,
"loss": 0.7622,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.21063438057899475,
"rewards/margins": 0.07661890983581543,
"rewards/rejected": 0.13401541113853455,
"step": 40
},
{
"epoch": 0.13,
"grad_norm": 29.125,
"learning_rate": 4.987541037542187e-06,
"logits/chosen": 78.6342544555664,
"logits/rejected": 78.64932250976562,
"logps/chosen": -30.660537719726562,
"logps/rejected": -30.76174545288086,
"loss": 0.7328,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.38129910826683044,
"rewards/margins": 0.16322235763072968,
"rewards/rejected": 0.21807675063610077,
"step": 50
},
{
"epoch": 0.16,
"grad_norm": 38.0,
"learning_rate": 4.954691471941119e-06,
"logits/chosen": 83.13832092285156,
"logits/rejected": 83.19276428222656,
"logps/chosen": -30.93692970275879,
"logps/rejected": -29.44403648376465,
"loss": 0.7486,
"rewards/accuracies": 0.48750001192092896,
"rewards/chosen": 0.16676156222820282,
"rewards/margins": 0.0642227977514267,
"rewards/rejected": 0.10253874957561493,
"step": 60
},
{
"epoch": 0.18,
"grad_norm": 64.5,
"learning_rate": 4.901618883413549e-06,
"logits/chosen": 83.7562255859375,
"logits/rejected": 83.78288269042969,
"logps/chosen": -30.605281829833984,
"logps/rejected": -33.032676696777344,
"loss": 0.7785,
"rewards/accuracies": 0.512499988079071,
"rewards/chosen": 0.01661914773285389,
"rewards/margins": 0.013097524642944336,
"rewards/rejected": 0.003521624254062772,
"step": 70
},
{
"epoch": 0.21,
"grad_norm": 42.5,
"learning_rate": 4.828760511501322e-06,
"logits/chosen": 81.37464904785156,
"logits/rejected": 81.36463165283203,
"logps/chosen": -31.443639755249023,
"logps/rejected": -30.998950958251953,
"loss": 0.6854,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": 0.04291192442178726,
"rewards/margins": 0.211822509765625,
"rewards/rejected": -0.16891059279441833,
"step": 80
},
{
"epoch": 0.23,
"grad_norm": 38.5,
"learning_rate": 4.7367166013034295e-06,
"logits/chosen": 78.1283187866211,
"logits/rejected": 78.10060119628906,
"logps/chosen": -32.54193878173828,
"logps/rejected": -31.2618408203125,
"loss": 0.6597,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": 0.06736886501312256,
"rewards/margins": 0.2846258580684662,
"rewards/rejected": -0.21725702285766602,
"step": 90
},
{
"epoch": 0.26,
"grad_norm": 33.0,
"learning_rate": 4.626245458345211e-06,
"logits/chosen": 83.37786865234375,
"logits/rejected": 83.40235900878906,
"logps/chosen": -34.06679153442383,
"logps/rejected": -31.954029083251953,
"loss": 0.6331,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.166738823056221,
"rewards/margins": 0.30917495489120483,
"rewards/rejected": -0.14243611693382263,
"step": 100
},
{
"epoch": 0.26,
"eval_logits/chosen": 98.75102233886719,
"eval_logits/rejected": 98.73809814453125,
"eval_logps/chosen": -32.44057846069336,
"eval_logps/rejected": -36.09393310546875,
"eval_loss": 0.7266324758529663,
"eval_rewards/accuracies": 0.545265793800354,
"eval_rewards/chosen": 0.0018192834686487913,
"eval_rewards/margins": 0.09100572764873505,
"eval_rewards/rejected": -0.08918644487857819,
"eval_runtime": 104.1233,
"eval_samples_per_second": 3.294,
"eval_steps_per_second": 0.413,
"step": 100
},
{
"epoch": 0.29,
"grad_norm": 53.25,
"learning_rate": 4.498257201263691e-06,
"logits/chosen": 83.52274322509766,
"logits/rejected": 83.4115219116211,
"logps/chosen": -32.51097869873047,
"logps/rejected": -32.80630874633789,
"loss": 0.6201,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.35827895998954773,
"rewards/margins": 0.46357136964797974,
"rewards/rejected": -0.10529237985610962,
"step": 110
},
{
"epoch": 0.31,
"grad_norm": 50.25,
"learning_rate": 4.353806263777678e-06,
"logits/chosen": 83.7201919555664,
"logits/rejected": 83.82737731933594,
"logps/chosen": -28.233470916748047,
"logps/rejected": -35.50123977661133,
"loss": 0.5892,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.4872798025608063,
"rewards/margins": 0.5167454481124878,
"rewards/rejected": -0.029465626925230026,
"step": 120
},
{
"epoch": 0.34,
"grad_norm": 26.0,
"learning_rate": 4.1940827077152755e-06,
"logits/chosen": 80.89537048339844,
"logits/rejected": 80.91288757324219,
"logps/chosen": -30.439437866210938,
"logps/rejected": -32.11792755126953,
"loss": 0.6342,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.32754257321357727,
"rewards/margins": 0.457952082157135,
"rewards/rejected": -0.13040950894355774,
"step": 130
},
{
"epoch": 0.36,
"grad_norm": 30.5,
"learning_rate": 4.0204024186666215e-06,
"logits/chosen": 82.11260223388672,
"logits/rejected": 82.12245178222656,
"logps/chosen": -27.101327896118164,
"logps/rejected": -33.005577087402344,
"loss": 0.5503,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.24219810962677002,
"rewards/margins": 0.6497381329536438,
"rewards/rejected": -0.4075400233268738,
"step": 140
},
{
"epoch": 0.39,
"grad_norm": 28.25,
"learning_rate": 3.834196265035119e-06,
"logits/chosen": 80.61543273925781,
"logits/rejected": 80.58251953125,
"logps/chosen": -28.909435272216797,
"logps/rejected": -33.041297912597656,
"loss": 0.5489,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.3357718586921692,
"rewards/margins": 0.6681298613548279,
"rewards/rejected": -0.3323580324649811,
"step": 150
},
{
"epoch": 0.42,
"grad_norm": 55.5,
"learning_rate": 3.636998309800573e-06,
"logits/chosen": 82.49334716796875,
"logits/rejected": 82.49332427978516,
"logps/chosen": -33.531585693359375,
"logps/rejected": -30.385196685791016,
"loss": 0.6226,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.4236120283603668,
"rewards/margins": 0.7264719009399414,
"rewards/rejected": -0.3028598725795746,
"step": 160
},
{
"epoch": 0.44,
"grad_norm": 42.25,
"learning_rate": 3.4304331721118078e-06,
"logits/chosen": 83.25149536132812,
"logits/rejected": 83.19024658203125,
"logps/chosen": -30.89450454711914,
"logps/rejected": -32.51388931274414,
"loss": 0.5987,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.2553495168685913,
"rewards/margins": 0.601028323173523,
"rewards/rejected": -0.3456788957118988,
"step": 170
},
{
"epoch": 0.47,
"grad_norm": 34.25,
"learning_rate": 3.2162026428305436e-06,
"logits/chosen": 80.78834533691406,
"logits/rejected": 80.77064514160156,
"logps/chosen": -30.47861671447754,
"logps/rejected": -31.64987564086914,
"loss": 0.5119,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.5029744505882263,
"rewards/margins": 0.8478416204452515,
"rewards/rejected": -0.34486719965934753,
"step": 180
},
{
"epoch": 0.49,
"grad_norm": 21.125,
"learning_rate": 2.996071664294641e-06,
"logits/chosen": 82.49182891845703,
"logits/rejected": 82.4795150756836,
"logps/chosen": -30.340301513671875,
"logps/rejected": -30.779190063476562,
"loss": 0.6399,
"rewards/accuracies": 0.6625000238418579,
"rewards/chosen": 0.2982791066169739,
"rewards/margins": 0.5160930752754211,
"rewards/rejected": -0.21781396865844727,
"step": 190
},
{
"epoch": 0.52,
"grad_norm": 17.375,
"learning_rate": 2.7718537898066833e-06,
"logits/chosen": 77.9924545288086,
"logits/rejected": 77.93614196777344,
"logps/chosen": -33.81483459472656,
"logps/rejected": -32.65379333496094,
"loss": 0.6048,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": 0.6565331816673279,
"rewards/margins": 0.7688080072402954,
"rewards/rejected": -0.1122748851776123,
"step": 200
},
{
"epoch": 0.52,
"eval_logits/chosen": 98.63562774658203,
"eval_logits/rejected": 98.61270141601562,
"eval_logps/chosen": -32.66818618774414,
"eval_logps/rejected": -36.3549919128418,
"eval_loss": 0.7483024001121521,
"eval_rewards/accuracies": 0.5282392501831055,
"eval_rewards/chosen": -0.15750552713871002,
"eval_rewards/margins": 0.1144195944070816,
"eval_rewards/rejected": -0.27192509174346924,
"eval_runtime": 104.0056,
"eval_samples_per_second": 3.298,
"eval_steps_per_second": 0.413,
"step": 200
},
{
"epoch": 0.55,
"grad_norm": 67.5,
"learning_rate": 2.5453962426402006e-06,
"logits/chosen": 80.62068176269531,
"logits/rejected": 80.52841186523438,
"logps/chosen": -33.23737716674805,
"logps/rejected": -35.3394889831543,
"loss": 0.5657,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.49584078788757324,
"rewards/margins": 0.7432178258895874,
"rewards/rejected": -0.2473769634962082,
"step": 210
},
{
"epoch": 0.57,
"grad_norm": 22.875,
"learning_rate": 2.3185646976551794e-06,
"logits/chosen": 82.79103088378906,
"logits/rejected": 82.86891174316406,
"logps/chosen": -31.00775718688965,
"logps/rejected": -31.1812801361084,
"loss": 0.4699,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.6588366627693176,
"rewards/margins": 0.9754641652107239,
"rewards/rejected": -0.31662750244140625,
"step": 220
},
{
"epoch": 0.6,
"grad_norm": 39.75,
"learning_rate": 2.0932279108998323e-06,
"logits/chosen": 79.89860534667969,
"logits/rejected": 79.95353698730469,
"logps/chosen": -32.31645965576172,
"logps/rejected": -34.39720153808594,
"loss": 0.6045,
"rewards/accuracies": 0.625,
"rewards/chosen": 0.3425000309944153,
"rewards/margins": 0.612037718296051,
"rewards/rejected": -0.26953771710395813,
"step": 230
},
{
"epoch": 0.62,
"grad_norm": 34.0,
"learning_rate": 1.8712423238279358e-06,
"logits/chosen": 82.30177307128906,
"logits/rejected": 82.58096313476562,
"logps/chosen": -30.619409561157227,
"logps/rejected": -31.930099487304688,
"loss": 0.4503,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.7480591535568237,
"rewards/margins": 1.0266973972320557,
"rewards/rejected": -0.2786383032798767,
"step": 240
},
{
"epoch": 0.65,
"grad_norm": 37.25,
"learning_rate": 1.6544367689701824e-06,
"logits/chosen": 81.00114440917969,
"logits/rejected": 81.05775451660156,
"logps/chosen": -26.927043914794922,
"logps/rejected": -30.175378799438477,
"loss": 0.5818,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.46819525957107544,
"rewards/margins": 0.6604421138763428,
"rewards/rejected": -0.19224683940410614,
"step": 250
},
{
"epoch": 0.68,
"grad_norm": 28.125,
"learning_rate": 1.4445974030621963e-06,
"logits/chosen": 78.21713256835938,
"logits/rejected": 78.345458984375,
"logps/chosen": -30.480411529541016,
"logps/rejected": -36.508689880371094,
"loss": 0.4701,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.7564077377319336,
"rewards/margins": 1.0413528680801392,
"rewards/rejected": -0.28494516015052795,
"step": 260
},
{
"epoch": 0.7,
"grad_norm": 24.875,
"learning_rate": 1.243452991757889e-06,
"logits/chosen": 77.48748779296875,
"logits/rejected": 77.51399230957031,
"logps/chosen": -30.899953842163086,
"logps/rejected": -31.809417724609375,
"loss": 0.5373,
"rewards/accuracies": 0.7250000238418579,
"rewards/chosen": 0.6152798533439636,
"rewards/margins": 0.828079104423523,
"rewards/rejected": -0.2127993404865265,
"step": 270
},
{
"epoch": 0.73,
"grad_norm": 40.5,
"learning_rate": 1.0526606671603523e-06,
"logits/chosen": 80.2722396850586,
"logits/rejected": 80.06110382080078,
"logps/chosen": -31.229726791381836,
"logps/rejected": -29.85305404663086,
"loss": 0.6573,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.406115859746933,
"rewards/margins": 0.5363569259643555,
"rewards/rejected": -0.1302410513162613,
"step": 280
},
{
"epoch": 0.75,
"grad_norm": 23.5,
"learning_rate": 8.737922755071455e-07,
"logits/chosen": 80.33818054199219,
"logits/rejected": 80.25775146484375,
"logps/chosen": -33.049842834472656,
"logps/rejected": -32.65058135986328,
"loss": 0.4554,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": 0.7380836606025696,
"rewards/margins": 1.1816825866699219,
"rewards/rejected": -0.44359898567199707,
"step": 290
},
{
"epoch": 0.78,
"grad_norm": 40.0,
"learning_rate": 7.08321427484816e-07,
"logits/chosen": 76.02481079101562,
"logits/rejected": 76.12067413330078,
"logps/chosen": -32.21509552001953,
"logps/rejected": -29.180316925048828,
"loss": 0.5829,
"rewards/accuracies": 0.6875,
"rewards/chosen": 0.8334287405014038,
"rewards/margins": 0.8882354497909546,
"rewards/rejected": -0.054806679487228394,
"step": 300
},
{
"epoch": 0.78,
"eval_logits/chosen": 98.6636962890625,
"eval_logits/rejected": 98.63994598388672,
"eval_logps/chosen": -32.58477020263672,
"eval_logps/rejected": -36.28050231933594,
"eval_loss": 0.7390850782394409,
"eval_rewards/accuracies": 0.5485880374908447,
"eval_rewards/chosen": -0.09911961853504181,
"eval_rewards/margins": 0.12066645920276642,
"eval_rewards/rejected": -0.21978609263896942,
"eval_runtime": 103.9194,
"eval_samples_per_second": 3.301,
"eval_steps_per_second": 0.414,
"step": 300
},
{
"epoch": 0.81,
"grad_norm": 31.5,
"learning_rate": 5.576113578589035e-07,
"logits/chosen": 83.141357421875,
"logits/rejected": 83.17015075683594,
"logps/chosen": -30.017724990844727,
"logps/rejected": -32.537620544433594,
"loss": 0.5115,
"rewards/accuracies": 0.7875000238418579,
"rewards/chosen": 0.5920647382736206,
"rewards/margins": 0.8285657167434692,
"rewards/rejected": -0.23650094866752625,
"step": 310
},
{
"epoch": 0.83,
"grad_norm": 25.5,
"learning_rate": 4.229036944380913e-07,
"logits/chosen": 80.59849548339844,
"logits/rejected": 80.60069274902344,
"logps/chosen": -30.53042221069336,
"logps/rejected": -29.161365509033203,
"loss": 0.4928,
"rewards/accuracies": 0.762499988079071,
"rewards/chosen": 0.7476966977119446,
"rewards/margins": 0.9170078039169312,
"rewards/rejected": -0.16931119561195374,
"step": 320
},
{
"epoch": 0.86,
"grad_norm": 22.25,
"learning_rate": 3.053082288996112e-07,
"logits/chosen": 77.74131774902344,
"logits/rejected": 77.79161071777344,
"logps/chosen": -29.038299560546875,
"logps/rejected": -32.908966064453125,
"loss": 0.458,
"rewards/accuracies": 0.75,
"rewards/chosen": 0.9200155138969421,
"rewards/margins": 1.111884355545044,
"rewards/rejected": -0.19186890125274658,
"step": 330
},
{
"epoch": 0.88,
"grad_norm": 57.25,
"learning_rate": 2.0579377374915805e-07,
"logits/chosen": 82.08992767333984,
"logits/rejected": 82.12026977539062,
"logps/chosen": -32.29141616821289,
"logps/rejected": -33.880916595458984,
"loss": 0.5523,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": 0.6445139050483704,
"rewards/margins": 1.003070592880249,
"rewards/rejected": -0.3585566580295563,
"step": 340
},
{
"epoch": 0.91,
"grad_norm": 14.125,
"learning_rate": 1.2518018074041684e-07,
"logits/chosen": 81.13373565673828,
"logits/rejected": 81.14064025878906,
"logps/chosen": -32.35675048828125,
"logps/rejected": -33.414161682128906,
"loss": 0.4904,
"rewards/accuracies": 0.737500011920929,
"rewards/chosen": 0.9008921384811401,
"rewards/margins": 1.1265466213226318,
"rewards/rejected": -0.22565443813800812,
"step": 350
},
{
"epoch": 0.94,
"grad_norm": 27.625,
"learning_rate": 6.41315865106129e-08,
"logits/chosen": 82.62144470214844,
"logits/rejected": 82.64656066894531,
"logps/chosen": -28.411449432373047,
"logps/rejected": -31.78824806213379,
"loss": 0.5051,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": 0.7985339760780334,
"rewards/margins": 0.8954124450683594,
"rewards/rejected": -0.09687861800193787,
"step": 360
},
{
"epoch": 0.96,
"grad_norm": 43.0,
"learning_rate": 2.3150941078050325e-08,
"logits/chosen": 82.0575942993164,
"logits/rejected": 82.07881164550781,
"logps/chosen": -31.82853126525879,
"logps/rejected": -35.34919357299805,
"loss": 0.6002,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.7333434820175171,
"rewards/margins": 0.9012172818183899,
"rewards/rejected": -0.1678738296031952,
"step": 370
},
{
"epoch": 0.99,
"grad_norm": 34.75,
"learning_rate": 2.575864278703266e-09,
"logits/chosen": 75.95097351074219,
"logits/rejected": 75.82870483398438,
"logps/chosen": -29.8321475982666,
"logps/rejected": -28.438806533813477,
"loss": 0.5739,
"rewards/accuracies": 0.7124999761581421,
"rewards/chosen": 0.5207866430282593,
"rewards/margins": 0.7155483365058899,
"rewards/rejected": -0.19476178288459778,
"step": 380
},
{
"epoch": 1.0,
"step": 385,
"total_flos": 0.0,
"train_loss": 0.5948986065852178,
"train_runtime": 2557.7017,
"train_samples_per_second": 1.204,
"train_steps_per_second": 0.151
}
],
"logging_steps": 10,
"max_steps": 385,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"total_flos": 0.0,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}