|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 50, |
|
"global_step": 436, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.022935779816513763, |
|
"grad_norm": 1.642045632140662, |
|
"learning_rate": 1.1363636363636363e-07, |
|
"logits/chosen": -2.6192917823791504, |
|
"logits/rejected": -2.5524227619171143, |
|
"logps/chosen": -265.41119384765625, |
|
"logps/rejected": -236.11862182617188, |
|
"loss": 0.0154, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 0.00034645519917830825, |
|
"rewards/margins": 0.00023277592845261097, |
|
"rewards/rejected": 0.00011367930710548535, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.045871559633027525, |
|
"grad_norm": 1.5167637902207143, |
|
"learning_rate": 2.2727272727272726e-07, |
|
"logits/chosen": -2.657719135284424, |
|
"logits/rejected": -2.5759785175323486, |
|
"logps/chosen": -298.7945556640625, |
|
"logps/rejected": -274.304443359375, |
|
"loss": 0.0155, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -9.255408076569438e-05, |
|
"rewards/margins": 0.0018523468170315027, |
|
"rewards/rejected": -0.0019449004903435707, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06880733944954129, |
|
"grad_norm": 1.3745597654790793, |
|
"learning_rate": 3.4090909090909085e-07, |
|
"logits/chosen": -2.6762423515319824, |
|
"logits/rejected": -2.6026246547698975, |
|
"logps/chosen": -290.37896728515625, |
|
"logps/rejected": -234.3507080078125, |
|
"loss": 0.0146, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.009538007900118828, |
|
"rewards/margins": 0.013226142153143883, |
|
"rewards/rejected": -0.0036881337873637676, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09174311926605505, |
|
"grad_norm": 1.272192316985563, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -2.660547971725464, |
|
"logits/rejected": -2.6108529567718506, |
|
"logps/chosen": -280.96484375, |
|
"logps/rejected": -267.6105041503906, |
|
"loss": 0.013, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.04204345494508743, |
|
"rewards/margins": 0.04021826013922691, |
|
"rewards/rejected": 0.0018251972505822778, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"grad_norm": 1.4079236984792325, |
|
"learning_rate": 4.997110275491701e-07, |
|
"logits/chosen": -2.6261672973632812, |
|
"logits/rejected": -2.6208655834198, |
|
"logps/chosen": -289.76519775390625, |
|
"logps/rejected": -299.06353759765625, |
|
"loss": 0.012, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.06486029922962189, |
|
"rewards/margins": 0.06365373730659485, |
|
"rewards/rejected": 0.0012065758928656578, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11467889908256881, |
|
"eval_logits/chosen": -2.5786819458007812, |
|
"eval_logits/rejected": -2.502084732055664, |
|
"eval_logps/chosen": -277.3097839355469, |
|
"eval_logps/rejected": -247.28094482421875, |
|
"eval_loss": 0.011314952746033669, |
|
"eval_rewards/accuracies": 0.7025862336158752, |
|
"eval_rewards/chosen": 0.07780314981937408, |
|
"eval_rewards/margins": 0.08538833260536194, |
|
"eval_rewards/rejected": -0.007585177198052406, |
|
"eval_runtime": 94.9097, |
|
"eval_samples_per_second": 19.155, |
|
"eval_steps_per_second": 0.306, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13761467889908258, |
|
"grad_norm": 1.418614516322105, |
|
"learning_rate": 4.979475034558115e-07, |
|
"logits/chosen": -2.5812747478485107, |
|
"logits/rejected": -2.5265355110168457, |
|
"logps/chosen": -284.41473388671875, |
|
"logps/rejected": -259.66094970703125, |
|
"loss": 0.0112, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.05069952458143234, |
|
"rewards/margins": 0.0717436671257019, |
|
"rewards/rejected": -0.021044140681624413, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.16055045871559634, |
|
"grad_norm": 1.4648752438020702, |
|
"learning_rate": 4.945923025551788e-07, |
|
"logits/chosen": -2.5097055435180664, |
|
"logits/rejected": -2.4729437828063965, |
|
"logps/chosen": -319.59161376953125, |
|
"logps/rejected": -265.26019287109375, |
|
"loss": 0.0115, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.07979384809732437, |
|
"rewards/margins": 0.1183195561170578, |
|
"rewards/rejected": -0.03852573037147522, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.1834862385321101, |
|
"grad_norm": 1.379325651626498, |
|
"learning_rate": 4.896669632591651e-07, |
|
"logits/chosen": -2.4763333797454834, |
|
"logits/rejected": -2.385080099105835, |
|
"logps/chosen": -278.93988037109375, |
|
"logps/rejected": -255.78781127929688, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": 0.06909944862127304, |
|
"rewards/margins": 0.10041693598031998, |
|
"rewards/rejected": -0.031317487359046936, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20642201834862386, |
|
"grad_norm": 1.3303176283339873, |
|
"learning_rate": 4.832031033425662e-07, |
|
"logits/chosen": -2.423882246017456, |
|
"logits/rejected": -2.376399278640747, |
|
"logps/chosen": -280.73907470703125, |
|
"logps/rejected": -252.6663360595703, |
|
"loss": 0.0103, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.06854326277971268, |
|
"rewards/margins": 0.12719407677650452, |
|
"rewards/rejected": -0.058650821447372437, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"grad_norm": 1.4785994754129348, |
|
"learning_rate": 4.752422169756047e-07, |
|
"logits/chosen": -2.3845717906951904, |
|
"logits/rejected": -2.3276596069335938, |
|
"logps/chosen": -268.87396240234375, |
|
"logps/rejected": -280.0634460449219, |
|
"loss": 0.011, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": 0.035507336258888245, |
|
"rewards/margins": 0.11685723066329956, |
|
"rewards/rejected": -0.08134988695383072, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22935779816513763, |
|
"eval_logits/chosen": -2.4392149448394775, |
|
"eval_logits/rejected": -2.3370442390441895, |
|
"eval_logps/chosen": -280.90240478515625, |
|
"eval_logps/rejected": -254.33267211914062, |
|
"eval_loss": 0.009970244951546192, |
|
"eval_rewards/accuracies": 0.7068965435028076, |
|
"eval_rewards/chosen": 0.04187687486410141, |
|
"eval_rewards/margins": 0.1199791207909584, |
|
"eval_rewards/rejected": -0.078102245926857, |
|
"eval_runtime": 95.3397, |
|
"eval_samples_per_second": 19.069, |
|
"eval_steps_per_second": 0.304, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25229357798165136, |
|
"grad_norm": 2.0224189698687702, |
|
"learning_rate": 4.658354083558188e-07, |
|
"logits/chosen": -2.4632654190063477, |
|
"logits/rejected": -2.354820966720581, |
|
"logps/chosen": -263.564453125, |
|
"logps/rejected": -241.26791381835938, |
|
"loss": 0.0113, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.01623677834868431, |
|
"rewards/margins": 0.09158362448215485, |
|
"rewards/rejected": -0.07534684240818024, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27522935779816515, |
|
"grad_norm": 1.4135396422800603, |
|
"learning_rate": 4.550430636492389e-07, |
|
"logits/chosen": -2.382753610610962, |
|
"logits/rejected": -2.350785970687866, |
|
"logps/chosen": -275.2984619140625, |
|
"logps/rejected": -260.0333557128906, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.005208671558648348, |
|
"rewards/margins": 0.0909029170870781, |
|
"rewards/rejected": -0.08569424599409103, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.2981651376146789, |
|
"grad_norm": 2.470336886918338, |
|
"learning_rate": 4.429344633468004e-07, |
|
"logits/chosen": -2.404059648513794, |
|
"logits/rejected": -2.3569350242614746, |
|
"logps/chosen": -254.55068969726562, |
|
"logps/rejected": -252.3956756591797, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": 0.016294140368700027, |
|
"rewards/margins": 0.11412493139505386, |
|
"rewards/rejected": -0.09783079475164413, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3211009174311927, |
|
"grad_norm": 3.815944054063513, |
|
"learning_rate": 4.2958733752443187e-07, |
|
"logits/chosen": -2.4113519191741943, |
|
"logits/rejected": -2.340986728668213, |
|
"logps/chosen": -272.99420166015625, |
|
"logps/rejected": -231.37026977539062, |
|
"loss": 0.0123, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.0319095216691494, |
|
"rewards/margins": 0.10264714062213898, |
|
"rewards/rejected": -0.07073761522769928, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"grad_norm": 1.3170543399714232, |
|
"learning_rate": 4.150873668617898e-07, |
|
"logits/chosen": -2.454432249069214, |
|
"logits/rejected": -2.370666265487671, |
|
"logps/chosen": -268.79541015625, |
|
"logps/rejected": -247.0717315673828, |
|
"loss": 0.0104, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.022518452256917953, |
|
"rewards/margins": 0.11632315069437027, |
|
"rewards/rejected": -0.13884159922599792, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3440366972477064, |
|
"eval_logits/chosen": -2.494401454925537, |
|
"eval_logits/rejected": -2.39561128616333, |
|
"eval_logps/chosen": -285.85308837890625, |
|
"eval_logps/rejected": -260.5493469238281, |
|
"eval_loss": 0.009796149097383022, |
|
"eval_rewards/accuracies": 0.7198275923728943, |
|
"eval_rewards/chosen": -0.007629875559359789, |
|
"eval_rewards/margins": 0.13263897597789764, |
|
"eval_rewards/rejected": -0.14026884734630585, |
|
"eval_runtime": 95.8939, |
|
"eval_samples_per_second": 18.958, |
|
"eval_steps_per_second": 0.302, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3669724770642202, |
|
"grad_norm": 2.6585100320508737, |
|
"learning_rate": 3.9952763262280397e-07, |
|
"logits/chosen": -2.4578630924224854, |
|
"logits/rejected": -2.3927180767059326, |
|
"logps/chosen": -293.46124267578125, |
|
"logps/rejected": -295.8601379394531, |
|
"loss": 0.01, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.030898962169885635, |
|
"rewards/margins": 0.12981417775154114, |
|
"rewards/rejected": -0.0989152044057846, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.38990825688073394, |
|
"grad_norm": 1.3118055868117948, |
|
"learning_rate": 3.8300801912883414e-07, |
|
"logits/chosen": -2.504678249359131, |
|
"logits/rejected": -2.4544835090637207, |
|
"logps/chosen": -267.3302307128906, |
|
"logps/rejected": -279.33062744140625, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.04270657151937485, |
|
"rewards/margins": 0.13532397150993347, |
|
"rewards/rejected": -0.09261739999055862, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.41284403669724773, |
|
"grad_norm": 1.4248945431420277, |
|
"learning_rate": 3.6563457256020884e-07, |
|
"logits/chosen": -2.509859561920166, |
|
"logits/rejected": -2.4090194702148438, |
|
"logps/chosen": -306.2326965332031, |
|
"logps/rejected": -253.7351531982422, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.028747806325554848, |
|
"rewards/margins": 0.123654805123806, |
|
"rewards/rejected": -0.1524026244878769, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.43577981651376146, |
|
"grad_norm": 4.61575651504461, |
|
"learning_rate": 3.475188202022617e-07, |
|
"logits/chosen": -2.4155473709106445, |
|
"logits/rejected": -2.3843960762023926, |
|
"logps/chosen": -256.64117431640625, |
|
"logps/rejected": -276.67425537109375, |
|
"loss": 0.0105, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.014733311720192432, |
|
"rewards/margins": 0.13803140819072723, |
|
"rewards/rejected": -0.12329809367656708, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"grad_norm": 1.2837860780603194, |
|
"learning_rate": 3.287770545059052e-07, |
|
"logits/chosen": -2.529806613922119, |
|
"logits/rejected": -2.4307055473327637, |
|
"logps/chosen": -277.78741455078125, |
|
"logps/rejected": -256.05511474609375, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.004942134954035282, |
|
"rewards/margins": 0.12693332135677338, |
|
"rewards/rejected": -0.12199117988348007, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.45871559633027525, |
|
"eval_logits/chosen": -2.514023542404175, |
|
"eval_logits/rejected": -2.4208788871765137, |
|
"eval_logps/chosen": -282.1954345703125, |
|
"eval_logps/rejected": -257.61944580078125, |
|
"eval_loss": 0.009310290217399597, |
|
"eval_rewards/accuracies": 0.7931034564971924, |
|
"eval_rewards/chosen": 0.028946416452527046, |
|
"eval_rewards/margins": 0.13991650938987732, |
|
"eval_rewards/rejected": -0.11097008734941483, |
|
"eval_runtime": 96.1764, |
|
"eval_samples_per_second": 18.903, |
|
"eval_steps_per_second": 0.302, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.481651376146789, |
|
"grad_norm": 1.090616369349917, |
|
"learning_rate": 3.0952958655864954e-07, |
|
"logits/chosen": -2.4676451683044434, |
|
"logits/rejected": -2.4505865573883057, |
|
"logps/chosen": -270.9748840332031, |
|
"logps/rejected": -265.1389465332031, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.023295128718018532, |
|
"rewards/margins": 0.13126251101493835, |
|
"rewards/rejected": -0.10796739161014557, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5045871559633027, |
|
"grad_norm": 1.493461344772632, |
|
"learning_rate": 2.898999737583448e-07, |
|
"logits/chosen": -2.504462957382202, |
|
"logits/rejected": -2.404648542404175, |
|
"logps/chosen": -322.62689208984375, |
|
"logps/rejected": -300.34686279296875, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.00053420226322487, |
|
"rewards/margins": 0.15430037677288055, |
|
"rewards/rejected": -0.1537661850452423, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5275229357798165, |
|
"grad_norm": 1.2922195153842637, |
|
"learning_rate": 2.7001422664752333e-07, |
|
"logits/chosen": -2.3982276916503906, |
|
"logits/rejected": -2.3454272747039795, |
|
"logps/chosen": -269.4321594238281, |
|
"logps/rejected": -279.17926025390625, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.014886477962136269, |
|
"rewards/margins": 0.12070702016353607, |
|
"rewards/rejected": -0.1355935037136078, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5504587155963303, |
|
"grad_norm": 1.3338425925379636, |
|
"learning_rate": 2.5e-07, |
|
"logits/chosen": -2.4747557640075684, |
|
"logits/rejected": -2.3822951316833496, |
|
"logps/chosen": -288.63037109375, |
|
"logps/rejected": -274.5633850097656, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.010984973981976509, |
|
"rewards/margins": 0.12114904075860977, |
|
"rewards/rejected": -0.13213400542736053, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"grad_norm": 1.161806057366623, |
|
"learning_rate": 2.2998577335247667e-07, |
|
"logits/chosen": -2.513209819793701, |
|
"logits/rejected": -2.424811363220215, |
|
"logps/chosen": -303.5930480957031, |
|
"logps/rejected": -273.9664611816406, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": 0.002007619244977832, |
|
"rewards/margins": 0.13723386824131012, |
|
"rewards/rejected": -0.1352262645959854, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.573394495412844, |
|
"eval_logits/chosen": -2.4590651988983154, |
|
"eval_logits/rejected": -2.362999200820923, |
|
"eval_logps/chosen": -286.21722412109375, |
|
"eval_logps/rejected": -263.2693786621094, |
|
"eval_loss": 0.008912510238587856, |
|
"eval_rewards/accuracies": 0.7801724076271057, |
|
"eval_rewards/chosen": -0.011271164752542973, |
|
"eval_rewards/margins": 0.15619821846485138, |
|
"eval_rewards/rejected": -0.16746938228607178, |
|
"eval_runtime": 94.78, |
|
"eval_samples_per_second": 19.181, |
|
"eval_steps_per_second": 0.306, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5963302752293578, |
|
"grad_norm": 1.1995883831745127, |
|
"learning_rate": 2.1010002624165524e-07, |
|
"logits/chosen": -2.4454376697540283, |
|
"logits/rejected": -2.407090902328491, |
|
"logps/chosen": -273.8803405761719, |
|
"logps/rejected": -292.88421630859375, |
|
"loss": 0.0097, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.03165815398097038, |
|
"rewards/margins": 0.15324734151363373, |
|
"rewards/rejected": -0.1849054992198944, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6192660550458715, |
|
"grad_norm": 1.236047360327654, |
|
"learning_rate": 1.9047041344135043e-07, |
|
"logits/chosen": -2.4361116886138916, |
|
"logits/rejected": -2.4080004692077637, |
|
"logps/chosen": -270.04998779296875, |
|
"logps/rejected": -270.3052978515625, |
|
"loss": 0.0091, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03987707942724228, |
|
"rewards/margins": 0.14470525085926056, |
|
"rewards/rejected": -0.18458232283592224, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.6422018348623854, |
|
"grad_norm": 1.3000373431490932, |
|
"learning_rate": 1.7122294549409482e-07, |
|
"logits/chosen": -2.512075424194336, |
|
"logits/rejected": -2.448713779449463, |
|
"logps/chosen": -273.0007019042969, |
|
"logps/rejected": -280.35565185546875, |
|
"loss": 0.0098, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.8634367734193802e-05, |
|
"rewards/margins": 0.16557954251766205, |
|
"rewards/rejected": -0.16559818387031555, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6651376146788991, |
|
"grad_norm": 1.354856941159808, |
|
"learning_rate": 1.524811797977383e-07, |
|
"logits/chosen": -2.493873119354248, |
|
"logits/rejected": -2.416597843170166, |
|
"logps/chosen": -287.20404052734375, |
|
"logps/rejected": -267.6150817871094, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": 0.005267986096441746, |
|
"rewards/margins": 0.1422511339187622, |
|
"rewards/rejected": -0.13698314130306244, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"grad_norm": 1.3081620144699164, |
|
"learning_rate": 1.3436542743979125e-07, |
|
"logits/chosen": -2.4964632987976074, |
|
"logits/rejected": -2.472830057144165, |
|
"logps/chosen": -307.45733642578125, |
|
"logps/rejected": -271.1383361816406, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.015080097131431103, |
|
"rewards/margins": 0.10770467668771744, |
|
"rewards/rejected": -0.12278477847576141, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6880733944954128, |
|
"eval_logits/chosen": -2.477182388305664, |
|
"eval_logits/rejected": -2.382119655609131, |
|
"eval_logps/chosen": -286.4156188964844, |
|
"eval_logps/rejected": -262.90252685546875, |
|
"eval_loss": 0.008760624565184116, |
|
"eval_rewards/accuracies": 0.7844827771186829, |
|
"eval_rewards/chosen": -0.013255205936729908, |
|
"eval_rewards/margins": 0.15054550766944885, |
|
"eval_rewards/rejected": -0.16380071640014648, |
|
"eval_runtime": 95.1377, |
|
"eval_samples_per_second": 19.109, |
|
"eval_steps_per_second": 0.305, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7110091743119266, |
|
"grad_norm": 1.3153487126066306, |
|
"learning_rate": 1.1699198087116588e-07, |
|
"logits/chosen": -2.5111746788024902, |
|
"logits/rejected": -2.422899007797241, |
|
"logps/chosen": -280.66143798828125, |
|
"logps/rejected": -280.439697265625, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.026962900534272194, |
|
"rewards/margins": 0.11293704807758331, |
|
"rewards/rejected": -0.13989993929862976, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.7339449541284404, |
|
"grad_norm": 1.337124035482138, |
|
"learning_rate": 1.00472367377196e-07, |
|
"logits/chosen": -2.4476630687713623, |
|
"logits/rejected": -2.397982120513916, |
|
"logps/chosen": -275.42584228515625, |
|
"logps/rejected": -251.29122924804688, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.011938780546188354, |
|
"rewards/margins": 0.15607169270515442, |
|
"rewards/rejected": -0.16801045835018158, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.7568807339449541, |
|
"grad_norm": 1.4113093599011106, |
|
"learning_rate": 8.49126331382102e-08, |
|
"logits/chosen": -2.435225009918213, |
|
"logits/rejected": -2.386702537536621, |
|
"logps/chosen": -275.8332214355469, |
|
"logps/rejected": -260.52587890625, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.013110649771988392, |
|
"rewards/margins": 0.12114731222391129, |
|
"rewards/rejected": -0.1342579573392868, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7798165137614679, |
|
"grad_norm": 1.366187901878708, |
|
"learning_rate": 7.041266247556812e-08, |
|
"logits/chosen": -2.521238327026367, |
|
"logits/rejected": -2.4764904975891113, |
|
"logps/chosen": -289.8477478027344, |
|
"logps/rejected": -272.86456298828125, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.01855117455124855, |
|
"rewards/margins": 0.11025450378656387, |
|
"rewards/rejected": -0.12880566716194153, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"grad_norm": 1.3145992203531836, |
|
"learning_rate": 5.706553665319955e-08, |
|
"logits/chosen": -2.4982964992523193, |
|
"logits/rejected": -2.3964176177978516, |
|
"logps/chosen": -277.12042236328125, |
|
"logps/rejected": -253.37203979492188, |
|
"loss": 0.0096, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.026293223723769188, |
|
"rewards/margins": 0.13081298768520355, |
|
"rewards/rejected": -0.1571062207221985, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8027522935779816, |
|
"eval_logits/chosen": -2.481409788131714, |
|
"eval_logits/rejected": -2.3850882053375244, |
|
"eval_logps/chosen": -285.64593505859375, |
|
"eval_logps/rejected": -262.6905517578125, |
|
"eval_loss": 0.008698553778231144, |
|
"eval_rewards/accuracies": 0.7801724076271057, |
|
"eval_rewards/chosen": -0.005558254197239876, |
|
"eval_rewards/margins": 0.15612287819385529, |
|
"eval_rewards/rejected": -0.1616811454296112, |
|
"eval_runtime": 94.1358, |
|
"eval_samples_per_second": 19.313, |
|
"eval_steps_per_second": 0.308, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.8256880733944955, |
|
"grad_norm": 1.1160006320842197, |
|
"learning_rate": 4.4956936350761005e-08, |
|
"logits/chosen": -2.4785516262054443, |
|
"logits/rejected": -2.427774429321289, |
|
"logps/chosen": -252.49581909179688, |
|
"logps/rejected": -271.5592041015625, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.0017578303813934326, |
|
"rewards/margins": 0.12846335768699646, |
|
"rewards/rejected": -0.1302211880683899, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.8486238532110092, |
|
"grad_norm": 1.2680559242352918, |
|
"learning_rate": 3.416459164418123e-08, |
|
"logits/chosen": -2.514042854309082, |
|
"logits/rejected": -2.4509449005126953, |
|
"logps/chosen": -300.03057861328125, |
|
"logps/rejected": -276.8949279785156, |
|
"loss": 0.0087, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.004460807889699936, |
|
"rewards/margins": 0.13874222338199615, |
|
"rewards/rejected": -0.1432030349969864, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8715596330275229, |
|
"grad_norm": 1.5408590452374413, |
|
"learning_rate": 2.475778302439524e-08, |
|
"logits/chosen": -2.4833552837371826, |
|
"logits/rejected": -2.4306118488311768, |
|
"logps/chosen": -296.1810607910156, |
|
"logps/rejected": -272.9661560058594, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": 0.001969636185094714, |
|
"rewards/margins": 0.15506890416145325, |
|
"rewards/rejected": -0.15309928357601166, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8944954128440367, |
|
"grad_norm": 1.4629042335763474, |
|
"learning_rate": 1.6796896657433805e-08, |
|
"logits/chosen": -2.4795479774475098, |
|
"logits/rejected": -2.3772130012512207, |
|
"logps/chosen": -253.0847930908203, |
|
"logps/rejected": -242.50405883789062, |
|
"loss": 0.0095, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.011273050680756569, |
|
"rewards/margins": 0.12070544064044952, |
|
"rewards/rejected": -0.13197848200798035, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"grad_norm": 1.3631415527821928, |
|
"learning_rate": 1.0333036740834855e-08, |
|
"logits/chosen": -2.4071569442749023, |
|
"logits/rejected": -2.371851682662964, |
|
"logps/chosen": -224.0278778076172, |
|
"logps/rejected": -241.8699188232422, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": 0.0053174905478954315, |
|
"rewards/margins": 0.1318611204624176, |
|
"rewards/rejected": -0.12654362618923187, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9174311926605505, |
|
"eval_logits/chosen": -2.4800400733947754, |
|
"eval_logits/rejected": -2.383610486984253, |
|
"eval_logps/chosen": -285.21075439453125, |
|
"eval_logps/rejected": -261.9634704589844, |
|
"eval_loss": 0.008683313615620136, |
|
"eval_rewards/accuracies": 0.767241358757019, |
|
"eval_rewards/chosen": -0.0012068306095898151, |
|
"eval_rewards/margins": 0.15320327877998352, |
|
"eval_rewards/rejected": -0.15441007912158966, |
|
"eval_runtime": 95.2651, |
|
"eval_samples_per_second": 19.084, |
|
"eval_steps_per_second": 0.304, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.9403669724770642, |
|
"grad_norm": 1.2959245163133988, |
|
"learning_rate": 5.4076974448211685e-09, |
|
"logits/chosen": -2.414137363433838, |
|
"logits/rejected": -2.364811420440674, |
|
"logps/chosen": -271.1864318847656, |
|
"logps/rejected": -253.48568725585938, |
|
"loss": 0.009, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.0023452930618077517, |
|
"rewards/margins": 0.1520446538925171, |
|
"rewards/rejected": -0.15438991785049438, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.963302752293578, |
|
"grad_norm": 2.8287990040902504, |
|
"learning_rate": 2.052496544188487e-09, |
|
"logits/chosen": -2.4582016468048096, |
|
"logits/rejected": -2.3712384700775146, |
|
"logps/chosen": -260.9129638671875, |
|
"logps/rejected": -261.7887878417969, |
|
"loss": 0.0093, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.014739753678441048, |
|
"rewards/margins": 0.14437474310398102, |
|
"rewards/rejected": -0.1591145098209381, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9862385321100917, |
|
"grad_norm": 1.3866473112375508, |
|
"learning_rate": 2.889724508297886e-10, |
|
"logits/chosen": -2.474024772644043, |
|
"logits/rejected": -2.357807159423828, |
|
"logps/chosen": -303.5400695800781, |
|
"logps/rejected": -256.8912658691406, |
|
"loss": 0.0094, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -0.003646157681941986, |
|
"rewards/margins": 0.12490139901638031, |
|
"rewards/rejected": -0.1285475790500641, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 436, |
|
"total_flos": 0.0, |
|
"train_loss": 0.010265434613673512, |
|
"train_runtime": 11699.9785, |
|
"train_samples_per_second": 4.766, |
|
"train_steps_per_second": 0.037 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 436, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|