|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.09551239013672, |
|
"logits/rejected": 80.80068969726562, |
|
"logps/chosen": -34.28557586669922, |
|
"logps/rejected": -33.081260681152344, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": -0.008770154789090157, |
|
"rewards/margins": 0.01580061763525009, |
|
"rewards/rejected": -0.0245707705616951, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.68476867675781, |
|
"logits/rejected": 80.57356262207031, |
|
"logps/chosen": -33.56308364868164, |
|
"logps/rejected": -30.81569480895996, |
|
"loss": 0.6814, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.02536492981016636, |
|
"rewards/margins": 0.03300638496875763, |
|
"rewards/rejected": -0.007641455624252558, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.52334594726562, |
|
"logits/rejected": 82.55731964111328, |
|
"logps/chosen": -33.843528747558594, |
|
"logps/rejected": -31.111019134521484, |
|
"loss": 0.7142, |
|
"rewards/accuracies": 0.4124999940395355, |
|
"rewards/chosen": 0.03385842964053154, |
|
"rewards/margins": -0.028703877702355385, |
|
"rewards/rejected": 0.06256230920553207, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 81.0788803100586, |
|
"logits/rejected": 81.07527923583984, |
|
"logps/chosen": -32.68939971923828, |
|
"logps/rejected": -33.106483459472656, |
|
"loss": 0.6804, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0969332829117775, |
|
"rewards/margins": 0.044524990022182465, |
|
"rewards/rejected": 0.05240829661488533, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 9.25, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.8182373046875, |
|
"logits/rejected": 78.82775115966797, |
|
"logps/chosen": -30.46297836303711, |
|
"logps/rejected": -30.7069091796875, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.14845404028892517, |
|
"rewards/margins": 0.07517841458320618, |
|
"rewards/rejected": 0.073275625705719, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 10.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.39994812011719, |
|
"logits/rejected": 83.45416259765625, |
|
"logps/chosen": -30.7622013092041, |
|
"logps/rejected": -29.376235961914062, |
|
"loss": 0.6858, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.08259173482656479, |
|
"rewards/margins": 0.03973449766635895, |
|
"rewards/rejected": 0.04285724088549614, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 14.3125, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 84.03162384033203, |
|
"logits/rejected": 84.06212615966797, |
|
"logps/chosen": -30.389270782470703, |
|
"logps/rejected": -32.93138885498047, |
|
"loss": 0.6894, |
|
"rewards/accuracies": 0.4625000059604645, |
|
"rewards/chosen": 0.04795076698064804, |
|
"rewards/margins": 0.02668655477464199, |
|
"rewards/rejected": 0.02126421593129635, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 12.0, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.65167236328125, |
|
"logits/rejected": 81.63506317138672, |
|
"logps/chosen": -31.229572296142578, |
|
"logps/rejected": -30.818950653076172, |
|
"loss": 0.6715, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.05507441610097885, |
|
"rewards/margins": 0.06733567267656326, |
|
"rewards/rejected": -0.012261250987648964, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 13.875, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.45601654052734, |
|
"logits/rejected": 78.42174530029297, |
|
"logps/chosen": -32.425289154052734, |
|
"logps/rejected": -31.132177352905273, |
|
"loss": 0.6671, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.04257836565375328, |
|
"rewards/margins": 0.07871954143047333, |
|
"rewards/rejected": -0.03614116832613945, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 11.4375, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.59445190429688, |
|
"logits/rejected": 83.61743927001953, |
|
"logps/chosen": -33.955238342285156, |
|
"logps/rejected": -31.89117431640625, |
|
"loss": 0.6551, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.06995107233524323, |
|
"rewards/margins": 0.09807688742876053, |
|
"rewards/rejected": -0.028125811368227005, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.74779510498047, |
|
"eval_logits/rejected": 98.73747253417969, |
|
"eval_logps/chosen": -32.46078872680664, |
|
"eval_logps/rejected": -36.02348327636719, |
|
"eval_loss": 0.6956869959831238, |
|
"eval_rewards/accuracies": 0.5224252939224243, |
|
"eval_rewards/chosen": -0.003522348590195179, |
|
"eval_rewards/margins": 0.00786901917308569, |
|
"eval_rewards/rejected": -0.011391367763280869, |
|
"eval_runtime": 104.3728, |
|
"eval_samples_per_second": 3.286, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 14.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.79670715332031, |
|
"logits/rejected": 83.66975402832031, |
|
"logps/chosen": -32.42261505126953, |
|
"logps/rejected": -32.766204833984375, |
|
"loss": 0.6376, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.12003795802593231, |
|
"rewards/margins": 0.1421007364988327, |
|
"rewards/rejected": -0.02206278033554554, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 13.4375, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.85148620605469, |
|
"logits/rejected": 83.95706939697266, |
|
"logps/chosen": -28.113927841186523, |
|
"logps/rejected": -35.665374755859375, |
|
"loss": 0.6109, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1631302535533905, |
|
"rewards/margins": 0.20437581837177277, |
|
"rewards/rejected": -0.04124556854367256, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 9.5, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 80.99261474609375, |
|
"logits/rejected": 81.01200866699219, |
|
"logps/chosen": -30.389972686767578, |
|
"logps/rejected": -32.014381408691406, |
|
"loss": 0.6511, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.10347676277160645, |
|
"rewards/margins": 0.120028056204319, |
|
"rewards/rejected": -0.0165513064712286, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.00664520263672, |
|
"logits/rejected": 82.01942443847656, |
|
"logps/chosen": -27.06869125366211, |
|
"logps/rejected": -33.10210418701172, |
|
"loss": 0.611, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.07572650164365768, |
|
"rewards/margins": 0.21147167682647705, |
|
"rewards/rejected": -0.13574519753456116, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 80.3797378540039, |
|
"logits/rejected": 80.35098266601562, |
|
"logps/chosen": -28.888397216796875, |
|
"logps/rejected": -33.30742645263672, |
|
"loss": 0.596, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.10014281421899796, |
|
"rewards/margins": 0.24832835793495178, |
|
"rewards/rejected": -0.14818556606769562, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 13.9375, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 82.10334014892578, |
|
"logits/rejected": 82.10926818847656, |
|
"logps/chosen": -33.88823699951172, |
|
"logps/rejected": -30.74798011779785, |
|
"loss": 0.6209, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.04970277473330498, |
|
"rewards/margins": 0.2087908238172531, |
|
"rewards/rejected": -0.15908804535865784, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 82.82877349853516, |
|
"logits/rejected": 82.77672576904297, |
|
"logps/chosen": -30.899311065673828, |
|
"logps/rejected": -32.958866119384766, |
|
"loss": 0.598, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0719953253865242, |
|
"rewards/margins": 0.2597564160823822, |
|
"rewards/rejected": -0.1877611130475998, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 8.75, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 80.19332885742188, |
|
"logits/rejected": 80.17335510253906, |
|
"logps/chosen": -30.60544204711914, |
|
"logps/rejected": -31.953319549560547, |
|
"loss": 0.5849, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.1183418408036232, |
|
"rewards/margins": 0.2775646448135376, |
|
"rewards/rejected": -0.159222811460495, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 81.75331115722656, |
|
"logits/rejected": 81.74398040771484, |
|
"logps/chosen": -30.514236450195312, |
|
"logps/rejected": -30.920120239257812, |
|
"loss": 0.6564, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.050435226410627365, |
|
"rewards/margins": 0.14085446298122406, |
|
"rewards/rejected": -0.090419240295887, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 8.9375, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 77.08003997802734, |
|
"logits/rejected": 77.01986694335938, |
|
"logps/chosen": -33.93625259399414, |
|
"logps/rejected": -33.12162780761719, |
|
"loss": 0.5948, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.16329774260520935, |
|
"rewards/margins": 0.28894227743148804, |
|
"rewards/rejected": -0.1256445348262787, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.33805847167969, |
|
"eval_logits/rejected": 98.30875396728516, |
|
"eval_logps/chosen": -32.75787353515625, |
|
"eval_logps/rejected": -36.611534118652344, |
|
"eval_loss": 0.672012209892273, |
|
"eval_rewards/accuracies": 0.5514950156211853, |
|
"eval_rewards/chosen": -0.06293957680463791, |
|
"eval_rewards/margins": 0.06606128066778183, |
|
"eval_rewards/rejected": -0.12900085747241974, |
|
"eval_runtime": 104.1536, |
|
"eval_samples_per_second": 3.293, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 79.71280670166016, |
|
"logits/rejected": 79.62548065185547, |
|
"logps/chosen": -33.420021057128906, |
|
"logps/rejected": -35.76679229736328, |
|
"loss": 0.6026, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.10514093935489655, |
|
"rewards/margins": 0.2612799406051636, |
|
"rewards/rejected": -0.15613897144794464, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 10.375, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 81.73656463623047, |
|
"logits/rejected": 81.82989501953125, |
|
"logps/chosen": -31.172740936279297, |
|
"logps/rejected": -31.53964614868164, |
|
"loss": 0.5726, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.15524254739284515, |
|
"rewards/margins": 0.3173803985118866, |
|
"rewards/rejected": -0.16213785111904144, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 78.81214904785156, |
|
"logits/rejected": 78.8668212890625, |
|
"logps/chosen": -32.4487190246582, |
|
"logps/rejected": -34.767303466796875, |
|
"loss": 0.6172, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.07140493392944336, |
|
"rewards/margins": 0.22243526577949524, |
|
"rewards/rejected": -0.15103033185005188, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 14.25, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 81.07857513427734, |
|
"logits/rejected": 81.38665771484375, |
|
"logps/chosen": -31.030254364013672, |
|
"logps/rejected": -32.145328521728516, |
|
"loss": 0.5977, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.13156220316886902, |
|
"rewards/margins": 0.254219114780426, |
|
"rewards/rejected": -0.12265688180923462, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 12.75, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 79.71233367919922, |
|
"logits/rejected": 79.77317810058594, |
|
"logps/chosen": -27.3262939453125, |
|
"logps/rejected": -30.594249725341797, |
|
"loss": 0.6333, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.053920842707157135, |
|
"rewards/margins": 0.19262339174747467, |
|
"rewards/rejected": -0.13870255649089813, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 10.5, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 76.8447036743164, |
|
"logits/rejected": 76.98011779785156, |
|
"logps/chosen": -30.586124420166016, |
|
"logps/rejected": -37.12653350830078, |
|
"loss": 0.5519, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.19497378170490265, |
|
"rewards/margins": 0.3999541699886322, |
|
"rewards/rejected": -0.20498037338256836, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 76.1262435913086, |
|
"logits/rejected": 76.15739440917969, |
|
"logps/chosen": -31.1810245513916, |
|
"logps/rejected": -32.34056854248047, |
|
"loss": 0.5874, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.11957981437444687, |
|
"rewards/margins": 0.28660956025123596, |
|
"rewards/rejected": -0.1670297235250473, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 15.75, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 78.95848846435547, |
|
"logits/rejected": 78.7259750366211, |
|
"logps/chosen": -31.400470733642578, |
|
"logps/rejected": -30.376489639282227, |
|
"loss": 0.6251, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.08188416808843613, |
|
"rewards/margins": 0.22378277778625488, |
|
"rewards/rejected": -0.14189860224723816, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 9.3125, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 79.04127502441406, |
|
"logits/rejected": 78.95845031738281, |
|
"logps/chosen": -33.34519577026367, |
|
"logps/rejected": -33.35126876831055, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.15181000530719757, |
|
"rewards/margins": 0.4186907410621643, |
|
"rewards/rejected": -0.26688069105148315, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 12.8125, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 74.71063995361328, |
|
"logits/rejected": 74.8211441040039, |
|
"logps/chosen": -32.499046325683594, |
|
"logps/rejected": -29.784500122070312, |
|
"loss": 0.5903, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.18133236467838287, |
|
"rewards/margins": 0.3178277611732483, |
|
"rewards/rejected": -0.13649536669254303, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.2787857055664, |
|
"eval_logits/rejected": 98.24710083007812, |
|
"eval_logps/chosen": -32.80708312988281, |
|
"eval_logps/rejected": -36.62175750732422, |
|
"eval_loss": 0.6774110198020935, |
|
"eval_rewards/accuracies": 0.5340532064437866, |
|
"eval_rewards/chosen": -0.07278140634298325, |
|
"eval_rewards/margins": 0.05826449766755104, |
|
"eval_rewards/rejected": -0.13104592263698578, |
|
"eval_runtime": 104.1558, |
|
"eval_samples_per_second": 3.293, |
|
"eval_steps_per_second": 0.413, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 10.0, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 81.97877502441406, |
|
"logits/rejected": 82.00166320800781, |
|
"logps/chosen": -30.284442901611328, |
|
"logps/rejected": -33.162872314453125, |
|
"loss": 0.579, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.11581814289093018, |
|
"rewards/margins": 0.30844008922576904, |
|
"rewards/rejected": -0.19262196123600006, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 9.8125, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 79.28789520263672, |
|
"logits/rejected": 79.29541778564453, |
|
"logps/chosen": -30.684707641601562, |
|
"logps/rejected": -29.662694931030273, |
|
"loss": 0.5734, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1827705204486847, |
|
"rewards/margins": 0.3314109444618225, |
|
"rewards/rejected": -0.14864039421081543, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 76.40345764160156, |
|
"logits/rejected": 76.45513916015625, |
|
"logps/chosen": -29.180185317993164, |
|
"logps/rejected": -33.6518440246582, |
|
"loss": 0.5234, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2344847470521927, |
|
"rewards/margins": 0.43788036704063416, |
|
"rewards/rejected": -0.20339563488960266, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 16.875, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 80.8781509399414, |
|
"logits/rejected": 80.92503356933594, |
|
"logps/chosen": -32.53740310668945, |
|
"logps/rejected": -34.52736282348633, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.13494810461997986, |
|
"rewards/margins": 0.3666823208332062, |
|
"rewards/rejected": -0.23173420131206512, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 9.375, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 79.81520080566406, |
|
"logits/rejected": 79.83287048339844, |
|
"logps/chosen": -32.53202819824219, |
|
"logps/rejected": -33.971153259277344, |
|
"loss": 0.561, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.22234222292900085, |
|
"rewards/margins": 0.39821282029151917, |
|
"rewards/rejected": -0.1758706122636795, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 9.25, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 81.41835021972656, |
|
"logits/rejected": 81.45225524902344, |
|
"logps/chosen": -28.548419952392578, |
|
"logps/rejected": -32.2237434387207, |
|
"loss": 0.5819, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.20075836777687073, |
|
"rewards/margins": 0.3155379891395569, |
|
"rewards/rejected": -0.11477959156036377, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 12.4375, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 80.87324523925781, |
|
"logits/rejected": 80.88945007324219, |
|
"logps/chosen": -32.29994583129883, |
|
"logps/rejected": -36.0130615234375, |
|
"loss": 0.6003, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1152433529496193, |
|
"rewards/margins": 0.295980840921402, |
|
"rewards/rejected": -0.1807374656200409, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 11.1875, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 74.64203643798828, |
|
"logits/rejected": 74.50636291503906, |
|
"logps/chosen": -29.965755462646484, |
|
"logps/rejected": -28.94148826599121, |
|
"loss": 0.5896, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.12207402288913727, |
|
"rewards/margins": 0.2782566249370575, |
|
"rewards/rejected": -0.15618260204792023, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6168378699909557, |
|
"train_runtime": 2560.1818, |
|
"train_samples_per_second": 1.203, |
|
"train_steps_per_second": 0.15 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|