|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 45.75, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 38.75, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.08214569091797, |
|
"logits/rejected": 80.78972625732422, |
|
"logps/chosen": -34.26863098144531, |
|
"logps/rejected": -33.00303649902344, |
|
"loss": 0.7238, |
|
"rewards/accuracies": 0.4861111044883728, |
|
"rewards/chosen": -0.018833572044968605, |
|
"rewards/margins": 0.012407698668539524, |
|
"rewards/rejected": -0.03124127723276615, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 37.25, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.68824005126953, |
|
"logits/rejected": 80.57817840576172, |
|
"logps/chosen": -33.58771514892578, |
|
"logps/rejected": -30.75152015686035, |
|
"loss": 0.7285, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0715368390083313, |
|
"rewards/margins": 0.05335939675569534, |
|
"rewards/rejected": 0.01817743293941021, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 39.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.5134506225586, |
|
"logits/rejected": 82.5453872680664, |
|
"logps/chosen": -33.79930877685547, |
|
"logps/rejected": -31.215984344482422, |
|
"loss": 0.7655, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": 0.14945873618125916, |
|
"rewards/margins": 0.00396394869312644, |
|
"rewards/rejected": 0.14549477398395538, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 44.5, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.0338363647461, |
|
"logits/rejected": 81.03011322021484, |
|
"logps/chosen": -32.87316131591797, |
|
"logps/rejected": -33.17707061767578, |
|
"loss": 0.7622, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.21063438057899475, |
|
"rewards/margins": 0.07661890983581543, |
|
"rewards/rejected": 0.13401541113853455, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 29.125, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.6342544555664, |
|
"logits/rejected": 78.64932250976562, |
|
"logps/chosen": -30.660537719726562, |
|
"logps/rejected": -30.76174545288086, |
|
"loss": 0.7328, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.38129910826683044, |
|
"rewards/margins": 0.16322235763072968, |
|
"rewards/rejected": 0.21807675063610077, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 38.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.13832092285156, |
|
"logits/rejected": 83.19276428222656, |
|
"logps/chosen": -30.93692970275879, |
|
"logps/rejected": -29.44403648376465, |
|
"loss": 0.7486, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.16676156222820282, |
|
"rewards/margins": 0.0642227977514267, |
|
"rewards/rejected": 0.10253874957561493, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 64.5, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.7562255859375, |
|
"logits/rejected": 83.78288269042969, |
|
"logps/chosen": -30.605281829833984, |
|
"logps/rejected": -33.032676696777344, |
|
"loss": 0.7785, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.01661914773285389, |
|
"rewards/margins": 0.013097524642944336, |
|
"rewards/rejected": 0.003521624254062772, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 42.5, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.37464904785156, |
|
"logits/rejected": 81.36463165283203, |
|
"logps/chosen": -31.443639755249023, |
|
"logps/rejected": -30.998950958251953, |
|
"loss": 0.6854, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.04291192442178726, |
|
"rewards/margins": 0.211822509765625, |
|
"rewards/rejected": -0.16891059279441833, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 38.5, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.1283187866211, |
|
"logits/rejected": 78.10060119628906, |
|
"logps/chosen": -32.54193878173828, |
|
"logps/rejected": -31.2618408203125, |
|
"loss": 0.6597, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.06736886501312256, |
|
"rewards/margins": 0.2846258580684662, |
|
"rewards/rejected": -0.21725702285766602, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 33.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.37786865234375, |
|
"logits/rejected": 83.40235900878906, |
|
"logps/chosen": -34.06679153442383, |
|
"logps/rejected": -31.954029083251953, |
|
"loss": 0.6331, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.166738823056221, |
|
"rewards/margins": 0.30917495489120483, |
|
"rewards/rejected": -0.14243611693382263, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.75102233886719, |
|
"eval_logits/rejected": 98.73809814453125, |
|
"eval_logps/chosen": -32.44057846069336, |
|
"eval_logps/rejected": -36.09393310546875, |
|
"eval_loss": 0.7266324758529663, |
|
"eval_rewards/accuracies": 0.545265793800354, |
|
"eval_rewards/chosen": 0.0018192834686487913, |
|
"eval_rewards/margins": 0.09100572764873505, |
|
"eval_rewards/rejected": -0.08918644487857819, |
|
"eval_runtime": 104.1233, |
|
"eval_samples_per_second": 3.294, |
|
"eval_steps_per_second": 0.413, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 53.25, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.52274322509766, |
|
"logits/rejected": 83.4115219116211, |
|
"logps/chosen": -32.51097869873047, |
|
"logps/rejected": -32.80630874633789, |
|
"loss": 0.6201, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.35827895998954773, |
|
"rewards/margins": 0.46357136964797974, |
|
"rewards/rejected": -0.10529237985610962, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 50.25, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.7201919555664, |
|
"logits/rejected": 83.82737731933594, |
|
"logps/chosen": -28.233470916748047, |
|
"logps/rejected": -35.50123977661133, |
|
"loss": 0.5892, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.4872798025608063, |
|
"rewards/margins": 0.5167454481124878, |
|
"rewards/rejected": -0.029465626925230026, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 26.0, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.89537048339844, |
|
"logits/rejected": 80.91288757324219, |
|
"logps/chosen": -30.439437866210938, |
|
"logps/rejected": -32.11792755126953, |
|
"loss": 0.6342, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.32754257321357727, |
|
"rewards/margins": 0.457952082157135, |
|
"rewards/rejected": -0.13040950894355774, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 30.5, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.11260223388672, |
|
"logits/rejected": 82.12245178222656, |
|
"logps/chosen": -27.101327896118164, |
|
"logps/rejected": -33.005577087402344, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.24219810962677002, |
|
"rewards/margins": 0.6497381329536438, |
|
"rewards/rejected": -0.4075400233268738, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 28.25, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.61543273925781, |
|
"logits/rejected": 80.58251953125, |
|
"logps/chosen": -28.909435272216797, |
|
"logps/rejected": -33.041297912597656, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3357718586921692, |
|
"rewards/margins": 0.6681298613548279, |
|
"rewards/rejected": -0.3323580324649811, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 55.5, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.49334716796875, |
|
"logits/rejected": 82.49332427978516, |
|
"logps/chosen": -33.531585693359375, |
|
"logps/rejected": -30.385196685791016, |
|
"loss": 0.6226, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.4236120283603668, |
|
"rewards/margins": 0.7264719009399414, |
|
"rewards/rejected": -0.3028598725795746, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 42.25, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.25149536132812, |
|
"logits/rejected": 83.19024658203125, |
|
"logps/chosen": -30.89450454711914, |
|
"logps/rejected": -32.51388931274414, |
|
"loss": 0.5987, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2553495168685913, |
|
"rewards/margins": 0.601028323173523, |
|
"rewards/rejected": -0.3456788957118988, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 34.25, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.78834533691406, |
|
"logits/rejected": 80.77064514160156, |
|
"logps/chosen": -30.47861671447754, |
|
"logps/rejected": -31.64987564086914, |
|
"loss": 0.5119, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5029744505882263, |
|
"rewards/margins": 0.8478416204452515, |
|
"rewards/rejected": -0.34486719965934753, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 21.125, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 82.49182891845703, |
|
"logits/rejected": 82.4795150756836, |
|
"logps/chosen": -30.340301513671875, |
|
"logps/rejected": -30.779190063476562, |
|
"loss": 0.6399, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.2982791066169739, |
|
"rewards/margins": 0.5160930752754211, |
|
"rewards/rejected": -0.21781396865844727, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 17.375, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 77.9924545288086, |
|
"logits/rejected": 77.93614196777344, |
|
"logps/chosen": -33.81483459472656, |
|
"logps/rejected": -32.65379333496094, |
|
"loss": 0.6048, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6565331816673279, |
|
"rewards/margins": 0.7688080072402954, |
|
"rewards/rejected": -0.1122748851776123, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.63562774658203, |
|
"eval_logits/rejected": 98.61270141601562, |
|
"eval_logps/chosen": -32.66818618774414, |
|
"eval_logps/rejected": -36.3549919128418, |
|
"eval_loss": 0.7483024001121521, |
|
"eval_rewards/accuracies": 0.5282392501831055, |
|
"eval_rewards/chosen": -0.15750552713871002, |
|
"eval_rewards/margins": 0.1144195944070816, |
|
"eval_rewards/rejected": -0.27192509174346924, |
|
"eval_runtime": 104.0056, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 67.5, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 80.62068176269531, |
|
"logits/rejected": 80.52841186523438, |
|
"logps/chosen": -33.23737716674805, |
|
"logps/rejected": -35.3394889831543, |
|
"loss": 0.5657, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.49584078788757324, |
|
"rewards/margins": 0.7432178258895874, |
|
"rewards/rejected": -0.2473769634962082, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 22.875, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 82.79103088378906, |
|
"logits/rejected": 82.86891174316406, |
|
"logps/chosen": -31.00775718688965, |
|
"logps/rejected": -31.1812801361084, |
|
"loss": 0.4699, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.6588366627693176, |
|
"rewards/margins": 0.9754641652107239, |
|
"rewards/rejected": -0.31662750244140625, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 39.75, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 79.89860534667969, |
|
"logits/rejected": 79.95353698730469, |
|
"logps/chosen": -32.31645965576172, |
|
"logps/rejected": -34.39720153808594, |
|
"loss": 0.6045, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.3425000309944153, |
|
"rewards/margins": 0.612037718296051, |
|
"rewards/rejected": -0.26953771710395813, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 34.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 82.30177307128906, |
|
"logits/rejected": 82.58096313476562, |
|
"logps/chosen": -30.619409561157227, |
|
"logps/rejected": -31.930099487304688, |
|
"loss": 0.4503, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7480591535568237, |
|
"rewards/margins": 1.0266973972320557, |
|
"rewards/rejected": -0.2786383032798767, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 37.25, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 81.00114440917969, |
|
"logits/rejected": 81.05775451660156, |
|
"logps/chosen": -26.927043914794922, |
|
"logps/rejected": -30.175378799438477, |
|
"loss": 0.5818, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.46819525957107544, |
|
"rewards/margins": 0.6604421138763428, |
|
"rewards/rejected": -0.19224683940410614, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 28.125, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 78.21713256835938, |
|
"logits/rejected": 78.345458984375, |
|
"logps/chosen": -30.480411529541016, |
|
"logps/rejected": -36.508689880371094, |
|
"loss": 0.4701, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.7564077377319336, |
|
"rewards/margins": 1.0413528680801392, |
|
"rewards/rejected": -0.28494516015052795, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 24.875, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 77.48748779296875, |
|
"logits/rejected": 77.51399230957031, |
|
"logps/chosen": -30.899953842163086, |
|
"logps/rejected": -31.809417724609375, |
|
"loss": 0.5373, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6152798533439636, |
|
"rewards/margins": 0.828079104423523, |
|
"rewards/rejected": -0.2127993404865265, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 40.5, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.2722396850586, |
|
"logits/rejected": 80.06110382080078, |
|
"logps/chosen": -31.229726791381836, |
|
"logps/rejected": -29.85305404663086, |
|
"loss": 0.6573, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.406115859746933, |
|
"rewards/margins": 0.5363569259643555, |
|
"rewards/rejected": -0.1302410513162613, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 23.5, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.33818054199219, |
|
"logits/rejected": 80.25775146484375, |
|
"logps/chosen": -33.049842834472656, |
|
"logps/rejected": -32.65058135986328, |
|
"loss": 0.4554, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.7380836606025696, |
|
"rewards/margins": 1.1816825866699219, |
|
"rewards/rejected": -0.44359898567199707, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 40.0, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 76.02481079101562, |
|
"logits/rejected": 76.12067413330078, |
|
"logps/chosen": -32.21509552001953, |
|
"logps/rejected": -29.180316925048828, |
|
"loss": 0.5829, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.8334287405014038, |
|
"rewards/margins": 0.8882354497909546, |
|
"rewards/rejected": -0.054806679487228394, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.6636962890625, |
|
"eval_logits/rejected": 98.63994598388672, |
|
"eval_logps/chosen": -32.58477020263672, |
|
"eval_logps/rejected": -36.28050231933594, |
|
"eval_loss": 0.7390850782394409, |
|
"eval_rewards/accuracies": 0.5485880374908447, |
|
"eval_rewards/chosen": -0.09911961853504181, |
|
"eval_rewards/margins": 0.12066645920276642, |
|
"eval_rewards/rejected": -0.21978609263896942, |
|
"eval_runtime": 103.9194, |
|
"eval_samples_per_second": 3.301, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 31.5, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 83.141357421875, |
|
"logits/rejected": 83.17015075683594, |
|
"logps/chosen": -30.017724990844727, |
|
"logps/rejected": -32.537620544433594, |
|
"loss": 0.5115, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.5920647382736206, |
|
"rewards/margins": 0.8285657167434692, |
|
"rewards/rejected": -0.23650094866752625, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 25.5, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 80.59849548339844, |
|
"logits/rejected": 80.60069274902344, |
|
"logps/chosen": -30.53042221069336, |
|
"logps/rejected": -29.161365509033203, |
|
"loss": 0.4928, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.7476966977119446, |
|
"rewards/margins": 0.9170078039169312, |
|
"rewards/rejected": -0.16931119561195374, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 22.25, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 77.74131774902344, |
|
"logits/rejected": 77.79161071777344, |
|
"logps/chosen": -29.038299560546875, |
|
"logps/rejected": -32.908966064453125, |
|
"loss": 0.458, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.9200155138969421, |
|
"rewards/margins": 1.111884355545044, |
|
"rewards/rejected": -0.19186890125274658, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 57.25, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 82.08992767333984, |
|
"logits/rejected": 82.12026977539062, |
|
"logps/chosen": -32.29141616821289, |
|
"logps/rejected": -33.880916595458984, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.6445139050483704, |
|
"rewards/margins": 1.003070592880249, |
|
"rewards/rejected": -0.3585566580295563, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 14.125, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 81.13373565673828, |
|
"logits/rejected": 81.14064025878906, |
|
"logps/chosen": -32.35675048828125, |
|
"logps/rejected": -33.414161682128906, |
|
"loss": 0.4904, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.9008921384811401, |
|
"rewards/margins": 1.1265466213226318, |
|
"rewards/rejected": -0.22565443813800812, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 27.625, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 82.62144470214844, |
|
"logits/rejected": 82.64656066894531, |
|
"logps/chosen": -28.411449432373047, |
|
"logps/rejected": -31.78824806213379, |
|
"loss": 0.5051, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.7985339760780334, |
|
"rewards/margins": 0.8954124450683594, |
|
"rewards/rejected": -0.09687861800193787, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 43.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 82.0575942993164, |
|
"logits/rejected": 82.07881164550781, |
|
"logps/chosen": -31.82853126525879, |
|
"logps/rejected": -35.34919357299805, |
|
"loss": 0.6002, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.7333434820175171, |
|
"rewards/margins": 0.9012172818183899, |
|
"rewards/rejected": -0.1678738296031952, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 34.75, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 75.95097351074219, |
|
"logits/rejected": 75.82870483398438, |
|
"logps/chosen": -29.8321475982666, |
|
"logps/rejected": -28.438806533813477, |
|
"loss": 0.5739, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5207866430282593, |
|
"rewards/margins": 0.7155483365058899, |
|
"rewards/rejected": -0.19476178288459778, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5948986065852178, |
|
"train_runtime": 2557.7017, |
|
"train_samples_per_second": 1.204, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|