|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.5, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.8663586378097534, |
|
"logits/rejected": -1.8706679344177246, |
|
"logps/chosen": -36.9964485168457, |
|
"logps/rejected": -33.65947723388672, |
|
"loss": 0.4966, |
|
"rewards/accuracies": 0.5277777910232544, |
|
"rewards/chosen": 0.005075507797300816, |
|
"rewards/margins": 0.019778331741690636, |
|
"rewards/rejected": -0.014702823013067245, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9970680475234985, |
|
"logits/rejected": -1.9997154474258423, |
|
"logps/chosen": -29.64749526977539, |
|
"logps/rejected": -29.048025131225586, |
|
"loss": 0.5018, |
|
"rewards/accuracies": 0.42500001192092896, |
|
"rewards/chosen": -0.0026510744355618954, |
|
"rewards/margins": -0.010360640473663807, |
|
"rewards/rejected": 0.007709565572440624, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.9203827381134033, |
|
"logits/rejected": -1.9176925420761108, |
|
"logps/chosen": -31.42234230041504, |
|
"logps/rejected": -33.24127960205078, |
|
"loss": 0.4984, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": -0.003123724367469549, |
|
"rewards/margins": 0.008287688717246056, |
|
"rewards/rejected": -0.011411413550376892, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.018051862716675, |
|
"logits/rejected": -2.009334087371826, |
|
"logps/chosen": -32.55129623413086, |
|
"logps/rejected": -32.50330352783203, |
|
"loss": 0.4982, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.012754792347550392, |
|
"rewards/margins": 0.008071592077612877, |
|
"rewards/rejected": 0.004683199338614941, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8634856939315796, |
|
"logits/rejected": -1.8527206182479858, |
|
"logps/chosen": -33.50724411010742, |
|
"logps/rejected": -35.390602111816406, |
|
"loss": 0.5016, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.02511655166745186, |
|
"rewards/margins": -0.005753959529101849, |
|
"rewards/rejected": 0.030870508402585983, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9438356161117554, |
|
"logits/rejected": -1.94576096534729, |
|
"logps/chosen": -32.481632232666016, |
|
"logps/rejected": -33.15100860595703, |
|
"loss": 0.4894, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.05927763134241104, |
|
"rewards/margins": 0.049092620611190796, |
|
"rewards/rejected": 0.010185008868575096, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0757923126220703, |
|
"logits/rejected": -2.080766439437866, |
|
"logps/chosen": -33.89708709716797, |
|
"logps/rejected": -36.524818420410156, |
|
"loss": 0.4939, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.04569761082530022, |
|
"rewards/margins": 0.023670893162488937, |
|
"rewards/rejected": 0.02202671766281128, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.936668038368225, |
|
"logits/rejected": -1.9397681951522827, |
|
"logps/chosen": -34.20936965942383, |
|
"logps/rejected": -34.525596618652344, |
|
"loss": 0.4817, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.10922203958034515, |
|
"rewards/margins": 0.07581819593906403, |
|
"rewards/rejected": 0.033403851091861725, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9462896585464478, |
|
"logits/rejected": -1.9508006572723389, |
|
"logps/chosen": -32.27099609375, |
|
"logps/rejected": -32.275699615478516, |
|
"loss": 0.4865, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.0982138067483902, |
|
"rewards/margins": 0.05660901591181755, |
|
"rewards/rejected": 0.04160478338599205, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.043682336807251, |
|
"logits/rejected": -2.04168438911438, |
|
"logps/chosen": -31.95809555053711, |
|
"logps/rejected": -31.16133689880371, |
|
"loss": 0.4794, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.14136961102485657, |
|
"rewards/margins": 0.08893296122550964, |
|
"rewards/rejected": 0.05243664234876633, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2374911308288574, |
|
"eval_logits/rejected": -2.232652425765991, |
|
"eval_logps/chosen": -33.869850158691406, |
|
"eval_logps/rejected": -37.37673568725586, |
|
"eval_loss": 0.49711015820503235, |
|
"eval_rewards/accuracies": 0.5573089718818665, |
|
"eval_rewards/chosen": 0.08235026895999908, |
|
"eval_rewards/margins": 0.012408134527504444, |
|
"eval_rewards/rejected": 0.06994213908910751, |
|
"eval_runtime": 145.9739, |
|
"eval_samples_per_second": 2.35, |
|
"eval_steps_per_second": 0.295, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.9995167255401611, |
|
"logits/rejected": -1.9971492290496826, |
|
"logps/chosen": -32.9360237121582, |
|
"logps/rejected": -33.857337951660156, |
|
"loss": 0.4819, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.15389741957187653, |
|
"rewards/margins": 0.06788130104541779, |
|
"rewards/rejected": 0.08601613342761993, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0095458030700684, |
|
"logits/rejected": -2.001213788986206, |
|
"logps/chosen": -32.16087341308594, |
|
"logps/rejected": -31.98464584350586, |
|
"loss": 0.4876, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.142018124461174, |
|
"rewards/margins": 0.05358927324414253, |
|
"rewards/rejected": 0.08842884749174118, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0384786128997803, |
|
"logits/rejected": -2.030527114868164, |
|
"logps/chosen": -30.18499755859375, |
|
"logps/rejected": -31.905630111694336, |
|
"loss": 0.4841, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.14535793662071228, |
|
"rewards/margins": 0.07167014479637146, |
|
"rewards/rejected": 0.07368779182434082, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9675172567367554, |
|
"logits/rejected": -1.97771418094635, |
|
"logps/chosen": -31.068078994750977, |
|
"logps/rejected": -32.39047622680664, |
|
"loss": 0.4757, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.17579205334186554, |
|
"rewards/margins": 0.10358880460262299, |
|
"rewards/rejected": 0.07220325618982315, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.8799912929534912, |
|
"logits/rejected": -1.8811372518539429, |
|
"logps/chosen": -33.688819885253906, |
|
"logps/rejected": -34.5561637878418, |
|
"loss": 0.4651, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2519363760948181, |
|
"rewards/margins": 0.15139077603816986, |
|
"rewards/rejected": 0.10054560005664825, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9306682348251343, |
|
"logits/rejected": -1.9272987842559814, |
|
"logps/chosen": -35.7833251953125, |
|
"logps/rejected": -32.48335266113281, |
|
"loss": 0.4818, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.19417627155780792, |
|
"rewards/margins": 0.07447630167007446, |
|
"rewards/rejected": 0.11969996988773346, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.0319628715515137, |
|
"logits/rejected": -2.0246424674987793, |
|
"logps/chosen": -33.24143981933594, |
|
"logps/rejected": -31.190576553344727, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2782108783721924, |
|
"rewards/margins": 0.19757375121116638, |
|
"rewards/rejected": 0.0806371346116066, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.039504289627075, |
|
"logits/rejected": -2.044722080230713, |
|
"logps/chosen": -31.97749900817871, |
|
"logps/rejected": -32.195125579833984, |
|
"loss": 0.4662, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.28364285826683044, |
|
"rewards/margins": 0.1386357843875885, |
|
"rewards/rejected": 0.14500707387924194, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0397489070892334, |
|
"logits/rejected": -2.037031412124634, |
|
"logps/chosen": -31.067768096923828, |
|
"logps/rejected": -31.097219467163086, |
|
"loss": 0.4756, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.21059219539165497, |
|
"rewards/margins": 0.10009355843067169, |
|
"rewards/rejected": 0.11049864441156387, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.910517930984497, |
|
"logits/rejected": -1.915186882019043, |
|
"logps/chosen": -31.059677124023438, |
|
"logps/rejected": -32.61454391479492, |
|
"loss": 0.4561, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.27330341935157776, |
|
"rewards/margins": 0.1851106435060501, |
|
"rewards/rejected": 0.08819273114204407, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.234231948852539, |
|
"eval_logits/rejected": -2.2294156551361084, |
|
"eval_logps/chosen": -33.776145935058594, |
|
"eval_logps/rejected": -37.29295349121094, |
|
"eval_loss": 0.49589911103248596, |
|
"eval_rewards/accuracies": 0.5423588156700134, |
|
"eval_rewards/chosen": 0.12920260429382324, |
|
"eval_rewards/margins": 0.017370687797665596, |
|
"eval_rewards/rejected": 0.1118319109082222, |
|
"eval_runtime": 145.6148, |
|
"eval_samples_per_second": 2.356, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.0229763984680176, |
|
"logits/rejected": -2.0335872173309326, |
|
"logps/chosen": -31.537296295166016, |
|
"logps/rejected": -33.72160339355469, |
|
"loss": 0.466, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.21700111031532288, |
|
"rewards/margins": 0.14645527303218842, |
|
"rewards/rejected": 0.07054580748081207, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.9160172939300537, |
|
"logits/rejected": -1.9307291507720947, |
|
"logps/chosen": -29.558719635009766, |
|
"logps/rejected": -31.404027938842773, |
|
"loss": 0.4532, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.27640971541404724, |
|
"rewards/margins": 0.19933710992336273, |
|
"rewards/rejected": 0.07707259804010391, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9730017185211182, |
|
"logits/rejected": -1.9770148992538452, |
|
"logps/chosen": -32.84386444091797, |
|
"logps/rejected": -31.42836570739746, |
|
"loss": 0.4471, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2868002951145172, |
|
"rewards/margins": 0.22633206844329834, |
|
"rewards/rejected": 0.06046823784708977, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9717906713485718, |
|
"logits/rejected": -1.950059175491333, |
|
"logps/chosen": -33.563629150390625, |
|
"logps/rejected": -34.86870193481445, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.2950619161128998, |
|
"rewards/margins": 0.25401392579078674, |
|
"rewards/rejected": 0.041047997772693634, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.0124945640563965, |
|
"logits/rejected": -2.0092015266418457, |
|
"logps/chosen": -32.43610382080078, |
|
"logps/rejected": -35.959327697753906, |
|
"loss": 0.4732, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.24448053538799286, |
|
"rewards/margins": 0.11168196052312851, |
|
"rewards/rejected": 0.13279855251312256, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8797550201416016, |
|
"logits/rejected": -1.8773235082626343, |
|
"logps/chosen": -33.68698501586914, |
|
"logps/rejected": -35.27508544921875, |
|
"loss": 0.4682, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.25070956349372864, |
|
"rewards/margins": 0.13594172894954681, |
|
"rewards/rejected": 0.11476783454418182, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8655035495758057, |
|
"logits/rejected": -1.8629907369613647, |
|
"logps/chosen": -33.91318893432617, |
|
"logps/rejected": -31.576608657836914, |
|
"loss": 0.4665, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.24220648407936096, |
|
"rewards/margins": 0.1448442041873932, |
|
"rewards/rejected": 0.09736229479312897, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9690582752227783, |
|
"logits/rejected": -1.9586395025253296, |
|
"logps/chosen": -34.75696563720703, |
|
"logps/rejected": -31.642765045166016, |
|
"loss": 0.4519, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.2998362183570862, |
|
"rewards/margins": 0.20072226226329803, |
|
"rewards/rejected": 0.09911395609378815, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0643129348754883, |
|
"logits/rejected": -2.049489736557007, |
|
"logps/chosen": -30.391122817993164, |
|
"logps/rejected": -32.35709762573242, |
|
"loss": 0.4722, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.2665007710456848, |
|
"rewards/margins": 0.11913253366947174, |
|
"rewards/rejected": 0.14736825227737427, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.9364421367645264, |
|
"logits/rejected": -1.933985710144043, |
|
"logps/chosen": -32.0788688659668, |
|
"logps/rejected": -30.654926300048828, |
|
"loss": 0.4202, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.42412155866622925, |
|
"rewards/margins": 0.34317898750305176, |
|
"rewards/rejected": 0.08094261586666107, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.233308792114258, |
|
"eval_logits/rejected": -2.2285048961639404, |
|
"eval_logps/chosen": -33.75082015991211, |
|
"eval_logps/rejected": -37.27885055541992, |
|
"eval_loss": 0.4944371283054352, |
|
"eval_rewards/accuracies": 0.5307309031486511, |
|
"eval_rewards/chosen": 0.14186599850654602, |
|
"eval_rewards/margins": 0.022981125861406326, |
|
"eval_rewards/rejected": 0.1188848614692688, |
|
"eval_runtime": 145.5333, |
|
"eval_samples_per_second": 2.357, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": -1.9192613363265991, |
|
"logits/rejected": -1.9161159992218018, |
|
"logps/chosen": -31.030216217041016, |
|
"logps/rejected": -33.573768615722656, |
|
"loss": 0.4538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.2865663468837738, |
|
"rewards/margins": 0.20237243175506592, |
|
"rewards/rejected": 0.0841938853263855, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": -1.9703991413116455, |
|
"logits/rejected": -1.9582570791244507, |
|
"logps/chosen": -34.00434112548828, |
|
"logps/rejected": -33.427330017089844, |
|
"loss": 0.4439, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2958681583404541, |
|
"rewards/margins": 0.24011309444904327, |
|
"rewards/rejected": 0.05575507879257202, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": -2.0051374435424805, |
|
"logits/rejected": -2.003786563873291, |
|
"logps/chosen": -32.876285552978516, |
|
"logps/rejected": -32.25849914550781, |
|
"loss": 0.4543, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.30475252866744995, |
|
"rewards/margins": 0.1942104697227478, |
|
"rewards/rejected": 0.11054208129644394, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": -2.092106580734253, |
|
"logits/rejected": -2.0764684677124023, |
|
"logps/chosen": -33.44976043701172, |
|
"logps/rejected": -32.806304931640625, |
|
"loss": 0.4513, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.3648151457309723, |
|
"rewards/margins": 0.2089495211839676, |
|
"rewards/rejected": 0.1558656245470047, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": -1.9645261764526367, |
|
"logits/rejected": -1.963702917098999, |
|
"logps/chosen": -32.543540954589844, |
|
"logps/rejected": -32.22251510620117, |
|
"loss": 0.4427, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.36927860975265503, |
|
"rewards/margins": 0.24796243011951447, |
|
"rewards/rejected": 0.12131617963314056, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": -1.921724557876587, |
|
"logits/rejected": -1.9320251941680908, |
|
"logps/chosen": -31.573944091796875, |
|
"logps/rejected": -35.00251007080078, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3153747618198395, |
|
"rewards/margins": 0.17225053906440735, |
|
"rewards/rejected": 0.14312422275543213, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": -2.059705972671509, |
|
"logits/rejected": -2.0532097816467285, |
|
"logps/chosen": -33.03301239013672, |
|
"logps/rejected": -28.99460220336914, |
|
"loss": 0.4533, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3094797134399414, |
|
"rewards/margins": 0.19356802105903625, |
|
"rewards/rejected": 0.11591170728206635, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": -1.919736623764038, |
|
"logits/rejected": -1.9219011068344116, |
|
"logps/chosen": -33.58899688720703, |
|
"logps/rejected": -30.707019805908203, |
|
"loss": 0.4436, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.3250266909599304, |
|
"rewards/margins": 0.24222226440906525, |
|
"rewards/rejected": 0.08280440419912338, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.4687722819192069, |
|
"train_runtime": 3250.758, |
|
"train_samples_per_second": 0.947, |
|
"train_steps_per_second": 0.118 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|