|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 145.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.3086, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 288.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08262634277344, |
|
"logits/rejected": 80.7869873046875, |
|
"logps/chosen": -34.28562545776367, |
|
"logps/rejected": -33.03427505493164, |
|
"loss": 0.9415, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -0.039508190006017685, |
|
"rewards/margins": 0.02877185121178627, |
|
"rewards/rejected": -0.06828003376722336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 149.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.65168762207031, |
|
"logits/rejected": 80.53875732421875, |
|
"logps/chosen": -33.57862091064453, |
|
"logps/rejected": -30.82345199584961, |
|
"loss": 1.1289, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.1001572236418724, |
|
"rewards/margins": 0.14152315258979797, |
|
"rewards/rejected": -0.041365914046764374, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 234.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.32148742675781, |
|
"logits/rejected": 82.35160827636719, |
|
"logps/chosen": -33.95701599121094, |
|
"logps/rejected": -31.341405868530273, |
|
"loss": 1.3887, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.05022481083869934, |
|
"rewards/margins": -0.023957695811986923, |
|
"rewards/rejected": 0.07418251037597656, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 636.0, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 80.53231811523438, |
|
"logits/rejected": 80.530029296875, |
|
"logps/chosen": -33.09763717651367, |
|
"logps/rejected": -33.38296890258789, |
|
"loss": 2.1646, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.06877875328063965, |
|
"rewards/margins": 0.0817752406001091, |
|
"rewards/rejected": -0.0129964929074049, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 386.0, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.10733032226562, |
|
"logits/rejected": 78.12706756591797, |
|
"logps/chosen": -31.247085571289062, |
|
"logps/rejected": -31.239765167236328, |
|
"loss": 1.232, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.03765222057700157, |
|
"rewards/margins": 0.11217772960662842, |
|
"rewards/rejected": -0.14982998371124268, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 200.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 82.77879333496094, |
|
"logits/rejected": 82.82901763916016, |
|
"logps/chosen": -31.19879150390625, |
|
"logps/rejected": -29.63169288635254, |
|
"loss": 1.1982, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.021271925419569016, |
|
"rewards/margins": 0.015784021466970444, |
|
"rewards/rejected": -0.03705594688653946, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 832.0, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.74064636230469, |
|
"logits/rejected": 83.76899719238281, |
|
"logps/chosen": -30.796112060546875, |
|
"logps/rejected": -33.087310791015625, |
|
"loss": 2.0455, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": -0.15038001537322998, |
|
"rewards/margins": -0.10573381185531616, |
|
"rewards/rejected": -0.04464619606733322, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 432.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.53680419921875, |
|
"logits/rejected": 81.53197479248047, |
|
"logps/chosen": -31.50222396850586, |
|
"logps/rejected": -30.938213348388672, |
|
"loss": 2.2722, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0024462162982672453, |
|
"rewards/margins": 0.16495725512504578, |
|
"rewards/rejected": -0.16251103579998016, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 378.0, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.6462173461914, |
|
"logits/rejected": 78.61238098144531, |
|
"logps/chosen": -32.54735565185547, |
|
"logps/rejected": -30.98733139038086, |
|
"loss": 1.7909, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.0817408338189125, |
|
"rewards/margins": 0.11401765048503876, |
|
"rewards/rejected": -0.03227682039141655, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 210.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 84.08260345458984, |
|
"logits/rejected": 84.1148910522461, |
|
"logps/chosen": -34.308895111083984, |
|
"logps/rejected": -31.754663467407227, |
|
"loss": 2.0775, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.003516471479088068, |
|
"rewards/margins": 0.00018945932970382273, |
|
"rewards/rejected": -0.0037059492897242308, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.9691390991211, |
|
"eval_logits/rejected": 98.96244812011719, |
|
"eval_logps/chosen": -32.53602981567383, |
|
"eval_logps/rejected": -35.98977279663086, |
|
"eval_loss": 1.7336534261703491, |
|
"eval_rewards/accuracies": 0.4746677577495575, |
|
"eval_rewards/chosen": -0.08356913179159164, |
|
"eval_rewards/margins": -0.06264925748109818, |
|
"eval_rewards/rejected": -0.02091986872255802, |
|
"eval_runtime": 104.1909, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 256.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 84.40672302246094, |
|
"logits/rejected": 84.30145263671875, |
|
"logps/chosen": -32.60901641845703, |
|
"logps/rejected": -32.70885467529297, |
|
"loss": 3.1161, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.37241029739379883, |
|
"rewards/margins": 0.4200804829597473, |
|
"rewards/rejected": -0.04767021909356117, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 446.0, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 84.58103942871094, |
|
"logits/rejected": 84.6691665649414, |
|
"logps/chosen": -28.995365142822266, |
|
"logps/rejected": -35.473243713378906, |
|
"loss": 2.9665, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.05920146033167839, |
|
"rewards/margins": -0.0465129017829895, |
|
"rewards/rejected": -0.012688541784882545, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 208.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 82.01698303222656, |
|
"logits/rejected": 82.04023742675781, |
|
"logps/chosen": -30.6518497467041, |
|
"logps/rejected": -32.01683807373047, |
|
"loss": 1.858, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.22995714843273163, |
|
"rewards/margins": 0.3066490590572357, |
|
"rewards/rejected": -0.0766918882727623, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 392.0, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 83.59390258789062, |
|
"logits/rejected": 83.58688354492188, |
|
"logps/chosen": -27.2325439453125, |
|
"logps/rejected": -32.482357025146484, |
|
"loss": 2.0491, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.19329962134361267, |
|
"rewards/margins": 0.24638020992279053, |
|
"rewards/rejected": -0.05308058112859726, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 209.0, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 82.43211364746094, |
|
"logits/rejected": 82.41563415527344, |
|
"logps/chosen": -28.861217498779297, |
|
"logps/rejected": -32.51410675048828, |
|
"loss": 1.804, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4751041829586029, |
|
"rewards/margins": 0.42794743180274963, |
|
"rewards/rejected": 0.04715672880411148, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 450.0, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 84.4583969116211, |
|
"logits/rejected": 84.46671295166016, |
|
"logps/chosen": -33.707855224609375, |
|
"logps/rejected": -29.813602447509766, |
|
"loss": 3.5567, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.38600218296051025, |
|
"rewards/margins": 0.2609595060348511, |
|
"rewards/rejected": 0.12504267692565918, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 278.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 85.22765350341797, |
|
"logits/rejected": 85.16728973388672, |
|
"logps/chosen": -31.05304527282715, |
|
"logps/rejected": -32.11688995361328, |
|
"loss": 1.9364, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.1856154203414917, |
|
"rewards/margins": 0.2727610170841217, |
|
"rewards/rejected": -0.08714555948972702, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 196.0, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 82.93448638916016, |
|
"logits/rejected": 82.9182357788086, |
|
"logps/chosen": -30.893056869506836, |
|
"logps/rejected": -31.30733299255371, |
|
"loss": 1.6619, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.2736847400665283, |
|
"rewards/margins": 0.40879687666893005, |
|
"rewards/rejected": -0.13511209189891815, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 130.0, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 84.62263488769531, |
|
"logits/rejected": 84.59950256347656, |
|
"logps/chosen": -30.79315757751465, |
|
"logps/rejected": -30.666366577148438, |
|
"loss": 2.2923, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.02406691387295723, |
|
"rewards/margins": 0.15444278717041016, |
|
"rewards/rejected": -0.17850971221923828, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 536.0, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 79.89015197753906, |
|
"logits/rejected": 79.83697509765625, |
|
"logps/chosen": -34.30753707885742, |
|
"logps/rejected": -32.36396026611328, |
|
"loss": 3.1221, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.40067988634109497, |
|
"rewards/margins": 0.2841777205467224, |
|
"rewards/rejected": 0.11650214344263077, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 99.16912841796875, |
|
"eval_logits/rejected": 99.15798950195312, |
|
"eval_logps/chosen": -32.52165603637695, |
|
"eval_logps/rejected": -36.10483932495117, |
|
"eval_loss": 1.716450572013855, |
|
"eval_rewards/accuracies": 0.5278239250183105, |
|
"eval_rewards/chosen": -0.0706300213932991, |
|
"eval_rewards/margins": 0.05385042726993561, |
|
"eval_rewards/rejected": -0.12448045611381531, |
|
"eval_runtime": 103.9441, |
|
"eval_samples_per_second": 3.3, |
|
"eval_steps_per_second": 0.414, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 392.0, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 82.42179870605469, |
|
"logits/rejected": 82.31734466552734, |
|
"logps/chosen": -33.38166427612305, |
|
"logps/rejected": -35.02565383911133, |
|
"loss": 2.7032, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.507649302482605, |
|
"rewards/margins": 0.5432528257369995, |
|
"rewards/rejected": -0.03560344874858856, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 474.0, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 84.43553161621094, |
|
"logits/rejected": 84.52201080322266, |
|
"logps/chosen": -31.842187881469727, |
|
"logps/rejected": -30.83688735961914, |
|
"loss": 3.6089, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.09608902782201767, |
|
"rewards/margins": 0.1932254582643509, |
|
"rewards/rejected": -0.09713643789291382, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 324.0, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 81.6541976928711, |
|
"logits/rejected": 81.72249603271484, |
|
"logps/chosen": -32.656639099121094, |
|
"logps/rejected": -33.97739791870117, |
|
"loss": 1.7997, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.1341962367296219, |
|
"rewards/margins": 0.10292205959558487, |
|
"rewards/rejected": 0.03127415105700493, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 328.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 83.9974365234375, |
|
"logits/rejected": 84.2688217163086, |
|
"logps/chosen": -31.19537353515625, |
|
"logps/rejected": -31.57509994506836, |
|
"loss": 2.2222, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.443420946598053, |
|
"rewards/margins": 0.4821735918521881, |
|
"rewards/rejected": -0.038752567023038864, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 404.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 82.76817321777344, |
|
"logits/rejected": 82.85389709472656, |
|
"logps/chosen": -27.292465209960938, |
|
"logps/rejected": -29.717370986938477, |
|
"loss": 2.0726, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.27308744192123413, |
|
"rewards/margins": 0.10805711895227432, |
|
"rewards/rejected": 0.1650303304195404, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 672.0, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 79.98811340332031, |
|
"logits/rejected": 80.15504455566406, |
|
"logps/chosen": -31.048583984375, |
|
"logps/rejected": -35.97705841064453, |
|
"loss": 3.1081, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.4611719250679016, |
|
"rewards/margins": 0.3490581512451172, |
|
"rewards/rejected": 0.112113818526268, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 211.0, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 79.3856201171875, |
|
"logits/rejected": 79.41062927246094, |
|
"logps/chosen": -31.38250732421875, |
|
"logps/rejected": -31.722015380859375, |
|
"loss": 2.0801, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3567788600921631, |
|
"rewards/margins": 0.5517110228538513, |
|
"rewards/rejected": -0.194932222366333, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 498.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 81.8171157836914, |
|
"logits/rejected": 81.62063598632812, |
|
"logps/chosen": -31.32455062866211, |
|
"logps/rejected": -29.448989868164062, |
|
"loss": 2.1832, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.43680915236473083, |
|
"rewards/margins": 0.2406042516231537, |
|
"rewards/rejected": 0.19620487093925476, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 262.0, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 82.0579833984375, |
|
"logits/rejected": 81.96476745605469, |
|
"logps/chosen": -33.8171272277832, |
|
"logps/rejected": -31.962472915649414, |
|
"loss": 3.4215, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.25841274857521057, |
|
"rewards/margins": 0.2094600647687912, |
|
"rewards/rejected": 0.04895265772938728, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 169.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 77.75779724121094, |
|
"logits/rejected": 77.80491638183594, |
|
"logps/chosen": -32.91708755493164, |
|
"logps/rejected": -29.0297908782959, |
|
"loss": 2.4404, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4397595524787903, |
|
"rewards/margins": 0.37474992871284485, |
|
"rewards/rejected": 0.06500961631536484, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 99.06168365478516, |
|
"eval_logits/rejected": 99.05675506591797, |
|
"eval_logps/chosen": -32.30947494506836, |
|
"eval_logps/rejected": -35.82986068725586, |
|
"eval_loss": 1.5173850059509277, |
|
"eval_rewards/accuracies": 0.510797381401062, |
|
"eval_rewards/chosen": 0.12033051997423172, |
|
"eval_rewards/margins": -0.0026713553816080093, |
|
"eval_rewards/rejected": 0.12300187349319458, |
|
"eval_runtime": 103.9278, |
|
"eval_samples_per_second": 3.3, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 928.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 84.6921157836914, |
|
"logits/rejected": 84.73957824707031, |
|
"logps/chosen": -30.482025146484375, |
|
"logps/rejected": -32.063751220703125, |
|
"loss": 2.7405, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.3433563709259033, |
|
"rewards/margins": 0.22095146775245667, |
|
"rewards/rejected": 0.12240489572286606, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 406.0, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 82.20662689208984, |
|
"logits/rejected": 82.20085906982422, |
|
"logps/chosen": -31.305639266967773, |
|
"logps/rejected": -28.786062240600586, |
|
"loss": 2.2147, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.2636275887489319, |
|
"rewards/margins": 0.14354394376277924, |
|
"rewards/rejected": 0.12008367478847504, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 366.0, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 79.59921264648438, |
|
"logits/rejected": 79.63972473144531, |
|
"logps/chosen": -29.793676376342773, |
|
"logps/rejected": -32.53992462158203, |
|
"loss": 2.2293, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.5030375719070435, |
|
"rewards/margins": 0.41758814454078674, |
|
"rewards/rejected": 0.08544941991567612, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 300.0, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 83.62577056884766, |
|
"logits/rejected": 83.62586975097656, |
|
"logps/chosen": -32.759605407714844, |
|
"logps/rejected": -33.222259521484375, |
|
"loss": 2.4595, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.40729111433029175, |
|
"rewards/margins": 0.2755037844181061, |
|
"rewards/rejected": 0.13178732991218567, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 302.0, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 82.8216781616211, |
|
"logits/rejected": 82.81562805175781, |
|
"logps/chosen": -33.33207702636719, |
|
"logps/rejected": -32.93452835083008, |
|
"loss": 1.9575, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.2804979979991913, |
|
"rewards/margins": 0.13895203173160553, |
|
"rewards/rejected": 0.14154598116874695, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 218.0, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 84.19699096679688, |
|
"logits/rejected": 84.23506164550781, |
|
"logps/chosen": -28.99674415588379, |
|
"logps/rejected": -31.437408447265625, |
|
"loss": 2.09, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.4999212324619293, |
|
"rewards/margins": 0.3087221086025238, |
|
"rewards/rejected": 0.19119907915592194, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 508.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 83.60005187988281, |
|
"logits/rejected": 83.62088012695312, |
|
"logps/chosen": -32.41377258300781, |
|
"logps/rejected": -34.801570892333984, |
|
"loss": 3.3754, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.41615089774131775, |
|
"rewards/margins": 0.13912765681743622, |
|
"rewards/rejected": 0.2770232558250427, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 194.0, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 77.62187194824219, |
|
"logits/rejected": 77.48942565917969, |
|
"logps/chosen": -30.352733612060547, |
|
"logps/rejected": -28.094280242919922, |
|
"loss": 2.3864, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.20105357468128204, |
|
"rewards/margins": 0.14138910174369812, |
|
"rewards/rejected": 0.059664465487003326, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 2.2499145250041765, |
|
"train_runtime": 2558.7273, |
|
"train_samples_per_second": 1.203, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|