diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,3229 @@ +{ + "best_metric": 0.8434417247772217, + "best_model_checkpoint": "saves/Mistral-7B-Instruct-v0.3/lora/orpo-salt/checkpoint-1500", + "epoch": 2.9969690846635686, + "eval_steps": 500, + "global_step": 1854, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.01616488179430188, + "grad_norm": 8.316998481750488, + "learning_rate": 4.999648198770648e-06, + "logits/chosen": -2.9437620639801025, + "logits/rejected": -2.991391658782959, + "logps/chosen": -1.0850014686584473, + "logps/rejected": -1.700299620628357, + "loss": 1.1424, + "odds_ratio_loss": 0.5736632347106934, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.10850014537572861, + "rewards/margins": 0.06152981519699097, + "rewards/rejected": -0.17002995312213898, + "sft_loss": 1.0850014686584473, + "step": 10 + }, + { + "epoch": 0.03232976358860376, + "grad_norm": 11.040986061096191, + "learning_rate": 4.998578646361359e-06, + "logits/chosen": -2.942950963973999, + "logits/rejected": -2.972404956817627, + "logps/chosen": -1.057308316230774, + "logps/rejected": -1.3991749286651611, + "loss": 1.1175, + "odds_ratio_loss": 0.6021074056625366, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.10573084652423859, + "rewards/margins": 0.03418666869401932, + "rewards/rejected": -0.1399175077676773, + "sft_loss": 1.057308316230774, + "step": 20 + }, + { + "epoch": 0.04849464538290564, + "grad_norm": 6.759947776794434, + "learning_rate": 4.996791614004449e-06, + "logits/chosen": -2.967661142349243, + "logits/rejected": -2.988191604614258, + "logps/chosen": -0.9923480749130249, + "logps/rejected": -1.4220160245895386, + "loss": 1.0537, + "odds_ratio_loss": 0.6132165789604187, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.09923480451107025, + "rewards/margins": 0.042966801673173904, + "rewards/rejected": -0.14220160245895386, + "sft_loss": 0.9923480749130249, + "step": 30 + }, + { + "epoch": 0.06465952717720752, + "grad_norm": 7.137161731719971, + "learning_rate": 4.994287614855618e-06, + "logits/chosen": -2.920475721359253, + "logits/rejected": -2.9866600036621094, + "logps/chosen": -1.0413509607315063, + "logps/rejected": -1.3548152446746826, + "loss": 1.1081, + "odds_ratio_loss": 0.6675835847854614, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10413509607315063, + "rewards/margins": 0.03134642913937569, + "rewards/rejected": -0.13548150658607483, + "sft_loss": 1.0413509607315063, + "step": 40 + }, + { + "epoch": 0.0808244089715094, + "grad_norm": 5.560673236846924, + "learning_rate": 4.991067367951343e-06, + "logits/chosen": -3.0365490913391113, + "logits/rejected": -3.02650785446167, + "logps/chosen": -1.0141496658325195, + "logps/rejected": -1.311621904373169, + "loss": 1.0767, + "odds_ratio_loss": 0.6250823140144348, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.10141497850418091, + "rewards/margins": 0.02974722720682621, + "rewards/rejected": -0.13116219639778137, + "sft_loss": 1.0141496658325195, + "step": 50 + }, + { + "epoch": 0.09698929076581128, + "grad_norm": 2.7108840942382812, + "learning_rate": 4.987131798002389e-06, + "logits/chosen": -2.9634835720062256, + "logits/rejected": -2.984647274017334, + "logps/chosen": -0.9159129858016968, + "logps/rejected": -1.1770719289779663, + "loss": 0.9847, + "odds_ratio_loss": 0.6881905198097229, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.09159130603075027, + "rewards/margins": 0.026115888729691505, + "rewards/rejected": -0.11770719289779663, + "sft_loss": 0.9159129858016968, + "step": 60 + }, + { + "epoch": 0.11315417256011315, + "grad_norm": 10.522608757019043, + "learning_rate": 4.982482035128285e-06, + "logits/chosen": -2.909318208694458, + "logits/rejected": -2.938032388687134, + "logps/chosen": -0.9783811569213867, + "logps/rejected": -1.2935713529586792, + "loss": 1.0458, + "odds_ratio_loss": 0.6742582321166992, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.09783812612295151, + "rewards/margins": 0.03151901811361313, + "rewards/rejected": -0.12935714423656464, + "sft_loss": 0.9783811569213867, + "step": 70 + }, + { + "epoch": 0.12931905435441504, + "grad_norm": 5.515076160430908, + "learning_rate": 4.9771194145328e-06, + "logits/chosen": -2.9371273517608643, + "logits/rejected": -2.9489827156066895, + "logps/chosen": -0.8305400013923645, + "logps/rejected": -1.074385643005371, + "loss": 0.8918, + "odds_ratio_loss": 0.612562358379364, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.08305400609970093, + "rewards/margins": 0.024384554475545883, + "rewards/rejected": -0.10743856430053711, + "sft_loss": 0.8305400013923645, + "step": 80 + }, + { + "epoch": 0.1454839361487169, + "grad_norm": 4.674871444702148, + "learning_rate": 4.971045476120532e-06, + "logits/chosen": -2.9331607818603516, + "logits/rejected": -2.950925350189209, + "logps/chosen": -0.828619122505188, + "logps/rejected": -1.0910576581954956, + "loss": 0.892, + "odds_ratio_loss": 0.6336237192153931, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.08286191523075104, + "rewards/margins": 0.02624385617673397, + "rewards/rejected": -0.10910576581954956, + "sft_loss": 0.828619122505188, + "step": 90 + }, + { + "epoch": 0.1616488179430188, + "grad_norm": 1.4944851398468018, + "learning_rate": 4.964261964054713e-06, + "logits/chosen": -2.902466297149658, + "logits/rejected": -2.9229366779327393, + "logps/chosen": -0.8629521131515503, + "logps/rejected": -1.1251227855682373, + "loss": 0.9278, + "odds_ratio_loss": 0.6480029821395874, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.0862952247262001, + "rewards/margins": 0.02621707320213318, + "rewards/rejected": -0.11251229047775269, + "sft_loss": 0.8629521131515503, + "step": 100 + }, + { + "epoch": 0.17781369973732067, + "grad_norm": 2.9030275344848633, + "learning_rate": 4.956770826256372e-06, + "logits/chosen": -2.957075595855713, + "logits/rejected": -2.9636242389678955, + "logps/chosen": -0.8815444707870483, + "logps/rejected": -1.0787910223007202, + "loss": 0.9485, + "odds_ratio_loss": 0.6699932813644409, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08815445005893707, + "rewards/margins": 0.019724659621715546, + "rewards/rejected": -0.10787911713123322, + "sft_loss": 0.8815444707870483, + "step": 110 + }, + { + "epoch": 0.19397858153162256, + "grad_norm": 2.7190232276916504, + "learning_rate": 4.94857421384497e-06, + "logits/chosen": -2.9349093437194824, + "logits/rejected": -2.958707332611084, + "logps/chosen": -0.8882864117622375, + "logps/rejected": -1.1156352758407593, + "loss": 0.9568, + "odds_ratio_loss": 0.6850704550743103, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.08882863819599152, + "rewards/margins": 0.022734878584742546, + "rewards/rejected": -0.11156351864337921, + "sft_loss": 0.8882864117622375, + "step": 120 + }, + { + "epoch": 0.21014346332592443, + "grad_norm": 2.794067859649658, + "learning_rate": 4.939674480520701e-06, + "logits/chosen": -2.9200732707977295, + "logits/rejected": -2.971010446548462, + "logps/chosen": -0.9047578573226929, + "logps/rejected": -1.1047561168670654, + "loss": 0.971, + "odds_ratio_loss": 0.6627197861671448, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.09047579020261765, + "rewards/margins": 0.019999820739030838, + "rewards/rejected": -0.11047561466693878, + "sft_loss": 0.9047578573226929, + "step": 130 + }, + { + "epoch": 0.2263083451202263, + "grad_norm": 2.3947913646698, + "learning_rate": 4.930074181888613e-06, + "logits/chosen": -2.9679551124572754, + "logits/rejected": -3.003051280975342, + "logps/chosen": -0.8538404703140259, + "logps/rejected": -1.0635920763015747, + "loss": 0.9144, + "odds_ratio_loss": 0.6060706377029419, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.0853840559720993, + "rewards/margins": 0.020975153893232346, + "rewards/rejected": -0.10635919868946075, + "sft_loss": 0.8538404703140259, + "step": 140 + }, + { + "epoch": 0.2424732269145282, + "grad_norm": 1.2783300876617432, + "learning_rate": 4.91977607472475e-06, + "logits/chosen": -2.9910409450531006, + "logits/rejected": -3.0015668869018555, + "logps/chosen": -0.8571484684944153, + "logps/rejected": -1.0444114208221436, + "loss": 0.9202, + "odds_ratio_loss": 0.6310030221939087, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.08571484684944153, + "rewards/margins": 0.018726304173469543, + "rewards/rejected": -0.10444115102291107, + "sft_loss": 0.8571484684944153, + "step": 150 + }, + { + "epoch": 0.2586381087088301, + "grad_norm": 1.8696929216384888, + "learning_rate": 4.908783116184534e-06, + "logits/chosen": -2.924990177154541, + "logits/rejected": -2.9346060752868652, + "logps/chosen": -0.8140287399291992, + "logps/rejected": -1.0593068599700928, + "loss": 0.8729, + "odds_ratio_loss": 0.5890231132507324, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08140286803245544, + "rewards/margins": 0.024527812376618385, + "rewards/rejected": -0.10593068599700928, + "sft_loss": 0.8140287399291992, + "step": 160 + }, + { + "epoch": 0.27480299050313195, + "grad_norm": 1.7881205081939697, + "learning_rate": 4.897098462953598e-06, + "logits/chosen": -3.010953426361084, + "logits/rejected": -3.0108227729797363, + "logps/chosen": -0.8341928720474243, + "logps/rejected": -1.1377969980239868, + "loss": 0.8988, + "odds_ratio_loss": 0.6464797258377075, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.08341928571462631, + "rewards/margins": 0.030360404402017593, + "rewards/rejected": -0.1137797012925148, + "sft_loss": 0.8341928720474243, + "step": 170 + }, + { + "epoch": 0.2909678722974338, + "grad_norm": 1.495703935623169, + "learning_rate": 4.884725470341331e-06, + "logits/chosen": -2.979276180267334, + "logits/rejected": -3.003962755203247, + "logps/chosen": -0.8357053995132446, + "logps/rejected": -1.0890777111053467, + "loss": 0.894, + "odds_ratio_loss": 0.5833606123924255, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.08357055485248566, + "rewards/margins": 0.025337230414152145, + "rewards/rejected": -0.1089077740907669, + "sft_loss": 0.8357053995132446, + "step": 180 + }, + { + "epoch": 0.3071327540917357, + "grad_norm": 12.648097038269043, + "learning_rate": 4.871667691317377e-06, + "logits/chosen": -3.0223159790039062, + "logits/rejected": -3.021965742111206, + "logps/chosen": -0.9438085556030273, + "logps/rejected": -0.9988954663276672, + "loss": 1.0229, + "odds_ratio_loss": 0.7914139628410339, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.09438085556030273, + "rewards/margins": 0.005508692469447851, + "rewards/rejected": -0.09988953918218613, + "sft_loss": 0.9438085556030273, + "step": 190 + }, + { + "epoch": 0.3232976358860376, + "grad_norm": 10.996000289916992, + "learning_rate": 4.857928875491392e-06, + "logits/chosen": -2.9932854175567627, + "logits/rejected": -2.9929189682006836, + "logps/chosen": -0.7825512886047363, + "logps/rejected": -0.9704290628433228, + "loss": 0.8476, + "odds_ratio_loss": 0.65040123462677, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07825513184070587, + "rewards/margins": 0.018787771463394165, + "rewards/rejected": -0.09704291075468063, + "sft_loss": 0.7825512886047363, + "step": 200 + }, + { + "epoch": 0.33946251768033947, + "grad_norm": 2.8774170875549316, + "learning_rate": 4.843512968036314e-06, + "logits/chosen": -2.942992925643921, + "logits/rejected": -2.9653749465942383, + "logps/chosen": -0.7782715559005737, + "logps/rejected": -0.9667309522628784, + "loss": 0.8401, + "odds_ratio_loss": 0.6183562874794006, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07782715559005737, + "rewards/margins": 0.018845947459340096, + "rewards/rejected": -0.09667309373617172, + "sft_loss": 0.7782715559005737, + "step": 210 + }, + { + "epoch": 0.35562739947464134, + "grad_norm": 1.0692508220672607, + "learning_rate": 4.828424108555486e-06, + "logits/chosen": -3.0436058044433594, + "logits/rejected": -3.0385215282440186, + "logps/chosen": -0.9878608584403992, + "logps/rejected": -1.2349365949630737, + "loss": 1.0531, + "odds_ratio_loss": 0.6525717377662659, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.09878608584403992, + "rewards/margins": 0.024707583710551262, + "rewards/rejected": -0.12349365651607513, + "sft_loss": 0.9878608584403992, + "step": 220 + }, + { + "epoch": 0.3717922812689432, + "grad_norm": 0.9835062026977539, + "learning_rate": 4.812666629893957e-06, + "logits/chosen": -3.0219979286193848, + "logits/rejected": -3.0635628700256348, + "logps/chosen": -0.8264273405075073, + "logps/rejected": -0.9492254257202148, + "loss": 0.8963, + "odds_ratio_loss": 0.6982277631759644, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.08264274150133133, + "rewards/margins": 0.012279799208045006, + "rewards/rejected": -0.09492253512144089, + "sft_loss": 0.8264273405075073, + "step": 230 + }, + { + "epoch": 0.3879571630632451, + "grad_norm": 2.8942973613739014, + "learning_rate": 4.796245056894273e-06, + "logits/chosen": -2.9647586345672607, + "logits/rejected": -3.0110132694244385, + "logps/chosen": -0.8473427891731262, + "logps/rejected": -1.0194810628890991, + "loss": 0.9194, + "odds_ratio_loss": 0.7206528782844543, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.08473427593708038, + "rewards/margins": 0.01721382327377796, + "rewards/rejected": -0.10194810479879379, + "sft_loss": 0.8473427891731262, + "step": 240 + }, + { + "epoch": 0.404122044857547, + "grad_norm": 1.4256747961044312, + "learning_rate": 4.779164105097148e-06, + "logits/chosen": -3.019832134246826, + "logits/rejected": -3.0398221015930176, + "logps/chosen": -0.7970795631408691, + "logps/rejected": -1.0736204385757446, + "loss": 0.859, + "odds_ratio_loss": 0.6194061040878296, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.07970795035362244, + "rewards/margins": 0.02765408717095852, + "rewards/rejected": -0.1073620468378067, + "sft_loss": 0.7970795631408691, + "step": 250 + }, + { + "epoch": 0.42028692665184886, + "grad_norm": 2.344681739807129, + "learning_rate": 4.761428679387373e-06, + "logits/chosen": -3.003972291946411, + "logits/rejected": -3.0536952018737793, + "logps/chosen": -0.8031284213066101, + "logps/rejected": -0.9748827815055847, + "loss": 0.8698, + "odds_ratio_loss": 0.666685163974762, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.0803128331899643, + "rewards/margins": 0.017175443470478058, + "rewards/rejected": -0.09748829156160355, + "sft_loss": 0.8031284213066101, + "step": 260 + }, + { + "epoch": 0.4364518084461507, + "grad_norm": 3.5396459102630615, + "learning_rate": 4.7430438725853515e-06, + "logits/chosen": -2.9764037132263184, + "logits/rejected": -3.0066990852355957, + "logps/chosen": -0.8188526034355164, + "logps/rejected": -1.1920969486236572, + "loss": 0.8795, + "odds_ratio_loss": 0.6065649390220642, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.08188526332378387, + "rewards/margins": 0.03732443228363991, + "rewards/rejected": -0.11920969188213348, + "sft_loss": 0.8188526034355164, + "step": 270 + }, + { + "epoch": 0.4526166902404526, + "grad_norm": 2.1748006343841553, + "learning_rate": 4.724014963984669e-06, + "logits/chosen": -3.0459182262420654, + "logits/rejected": -3.0646400451660156, + "logps/chosen": -0.8359659910202026, + "logps/rejected": -1.090301513671875, + "loss": 0.9006, + "odds_ratio_loss": 0.6466442942619324, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.0835966020822525, + "rewards/margins": 0.02543354593217373, + "rewards/rejected": -0.10903014987707138, + "sft_loss": 0.8359659910202026, + "step": 280 + }, + { + "epoch": 0.4687815720347545, + "grad_norm": 4.898100852966309, + "learning_rate": 4.704347417836116e-06, + "logits/chosen": -3.0006985664367676, + "logits/rejected": -3.0678975582122803, + "logps/chosen": -0.7437621355056763, + "logps/rejected": -0.9980353116989136, + "loss": 0.8091, + "odds_ratio_loss": 0.6530498266220093, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07437621057033539, + "rewards/margins": 0.025427332147955894, + "rewards/rejected": -0.09980354458093643, + "sft_loss": 0.7437621355056763, + "step": 290 + }, + { + "epoch": 0.4849464538290564, + "grad_norm": 1.6706643104553223, + "learning_rate": 4.684046881778603e-06, + "logits/chosen": -3.0016372203826904, + "logits/rejected": -3.0139174461364746, + "logps/chosen": -0.7875638008117676, + "logps/rejected": -0.9269906282424927, + "loss": 0.8522, + "odds_ratio_loss": 0.6463567614555359, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.07875639200210571, + "rewards/margins": 0.013942673802375793, + "rewards/rejected": -0.0926990658044815, + "sft_loss": 0.7875638008117676, + "step": 300 + }, + { + "epoch": 0.5011113356233583, + "grad_norm": 2.1470396518707275, + "learning_rate": 4.663119185217409e-06, + "logits/chosen": -3.0117690563201904, + "logits/rejected": -3.0633795261383057, + "logps/chosen": -0.779082715511322, + "logps/rejected": -1.0186303853988647, + "loss": 0.8398, + "odds_ratio_loss": 0.6070552468299866, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.07790827751159668, + "rewards/margins": 0.023954764008522034, + "rewards/rejected": -0.10186304897069931, + "sft_loss": 0.779082715511322, + "step": 310 + }, + { + "epoch": 0.5172762174176602, + "grad_norm": 1.40939199924469, + "learning_rate": 4.641570337650232e-06, + "logits/chosen": -3.0548150539398193, + "logits/rejected": -3.0848946571350098, + "logps/chosen": -0.7415497303009033, + "logps/rejected": -0.982310950756073, + "loss": 0.8005, + "odds_ratio_loss": 0.589560866355896, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07415496557950974, + "rewards/margins": 0.024076132103800774, + "rewards/rejected": -0.09823110699653625, + "sft_loss": 0.7415497303009033, + "step": 320 + }, + { + "epoch": 0.533441099211962, + "grad_norm": 1.4589684009552002, + "learning_rate": 4.61940652694154e-06, + "logits/chosen": -2.9786767959594727, + "logits/rejected": -3.0444042682647705, + "logps/chosen": -0.8274497985839844, + "logps/rejected": -1.0422160625457764, + "loss": 0.8954, + "odds_ratio_loss": 0.6792970895767212, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.08274497836828232, + "rewards/margins": 0.021476630121469498, + "rewards/rejected": -0.10422160476446152, + "sft_loss": 0.8274497985839844, + "step": 330 + }, + { + "epoch": 0.5496059810062639, + "grad_norm": 1.7346688508987427, + "learning_rate": 4.596634117545689e-06, + "logits/chosen": -3.0816709995269775, + "logits/rejected": -3.0875000953674316, + "logps/chosen": -0.8030570149421692, + "logps/rejected": -1.0277900695800781, + "loss": 0.8675, + "odds_ratio_loss": 0.6442909836769104, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.08030570298433304, + "rewards/margins": 0.022473318502306938, + "rewards/rejected": -0.10277901589870453, + "sft_loss": 0.8030570149421692, + "step": 340 + }, + { + "epoch": 0.5657708628005658, + "grad_norm": 1.8591201305389404, + "learning_rate": 4.573259648679335e-06, + "logits/chosen": -3.0649001598358154, + "logits/rejected": -3.029323101043701, + "logps/chosen": -0.7972906231880188, + "logps/rejected": -1.030458688735962, + "loss": 0.8615, + "odds_ratio_loss": 0.6421998739242554, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07972906529903412, + "rewards/margins": 0.02331680618226528, + "rewards/rejected": -0.10304586589336395, + "sft_loss": 0.7972906231880188, + "step": 350 + }, + { + "epoch": 0.5819357445948676, + "grad_norm": 6.8644022941589355, + "learning_rate": 4.549289832443663e-06, + "logits/chosen": -3.0592093467712402, + "logits/rejected": -3.0790326595306396, + "logps/chosen": -0.7847403287887573, + "logps/rejected": -1.0331987142562866, + "loss": 0.8515, + "odds_ratio_loss": 0.6680801510810852, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.0784740298986435, + "rewards/margins": 0.024845842272043228, + "rewards/rejected": -0.10331986844539642, + "sft_loss": 0.7847403287887573, + "step": 360 + }, + { + "epoch": 0.5981006263891695, + "grad_norm": 2.649265766143799, + "learning_rate": 4.524731551896978e-06, + "logits/chosen": -3.0282368659973145, + "logits/rejected": -3.0504040718078613, + "logps/chosen": -0.7369459271430969, + "logps/rejected": -0.8965708017349243, + "loss": 0.801, + "odds_ratio_loss": 0.6401507258415222, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.07369460165500641, + "rewards/margins": 0.015962477773427963, + "rewards/rejected": -0.08965708315372467, + "sft_loss": 0.7369459271430969, + "step": 370 + }, + { + "epoch": 0.6142655081834714, + "grad_norm": 3.4245967864990234, + "learning_rate": 4.4995918590781925e-06, + "logits/chosen": -3.061760425567627, + "logits/rejected": -3.0728158950805664, + "logps/chosen": -0.7757102251052856, + "logps/rejected": -0.9465000033378601, + "loss": 0.8422, + "odds_ratio_loss": 0.6651015281677246, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.07757101953029633, + "rewards/margins": 0.017078977078199387, + "rewards/rejected": -0.09464999288320541, + "sft_loss": 0.7757102251052856, + "step": 380 + }, + { + "epoch": 0.6304303899777733, + "grad_norm": 1.3795065879821777, + "learning_rate": 4.473877972981797e-06, + "logits/chosen": -3.0188069343566895, + "logits/rejected": -3.024099588394165, + "logps/chosen": -0.7883812189102173, + "logps/rejected": -1.0172218084335327, + "loss": 0.8495, + "odds_ratio_loss": 0.6112133860588074, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07883813232183456, + "rewards/margins": 0.02288406528532505, + "rewards/rejected": -0.10172219574451447, + "sft_loss": 0.7883812189102173, + "step": 390 + }, + { + "epoch": 0.6465952717720752, + "grad_norm": 1.795720100402832, + "learning_rate": 4.447597277484894e-06, + "logits/chosen": -2.9778666496276855, + "logits/rejected": -3.0176868438720703, + "logps/chosen": -0.743531346321106, + "logps/rejected": -0.9195922017097473, + "loss": 0.8054, + "odds_ratio_loss": 0.6191025972366333, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.07435314357280731, + "rewards/margins": 0.017606090754270554, + "rewards/rejected": -0.09195923060178757, + "sft_loss": 0.743531346321106, + "step": 400 + }, + { + "epoch": 0.6627601535663771, + "grad_norm": 1.847349762916565, + "learning_rate": 4.42075731922687e-06, + "logits/chosen": -3.0738515853881836, + "logits/rejected": -3.080390453338623, + "logps/chosen": -0.8688371777534485, + "logps/rejected": -1.0517194271087646, + "loss": 0.9327, + "odds_ratio_loss": 0.6381778120994568, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.08688371628522873, + "rewards/margins": 0.01828821375966072, + "rewards/rejected": -0.10517191886901855, + "sft_loss": 0.8688371777534485, + "step": 410 + }, + { + "epoch": 0.6789250353606789, + "grad_norm": 5.425457954406738, + "learning_rate": 4.3933658054423465e-06, + "logits/chosen": -3.0373263359069824, + "logits/rejected": -3.0470852851867676, + "logps/chosen": -0.7733573913574219, + "logps/rejected": -1.0374972820281982, + "loss": 0.8332, + "odds_ratio_loss": 0.5986987352371216, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.07733573764562607, + "rewards/margins": 0.026413992047309875, + "rewards/rejected": -0.10374973714351654, + "sft_loss": 0.7733573913574219, + "step": 420 + }, + { + "epoch": 0.6950899171549808, + "grad_norm": 2.5281944274902344, + "learning_rate": 4.365430601748003e-06, + "logits/chosen": -3.036982536315918, + "logits/rejected": -3.0820653438568115, + "logps/chosen": -0.8373786807060242, + "logps/rejected": -0.9543370008468628, + "loss": 0.9048, + "odds_ratio_loss": 0.673850953578949, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.08373787254095078, + "rewards/margins": 0.011695821769535542, + "rewards/rejected": -0.09543369710445404, + "sft_loss": 0.8373786807060242, + "step": 430 + }, + { + "epoch": 0.7112547989492827, + "grad_norm": 6.225044250488281, + "learning_rate": 4.336959729883925e-06, + "logits/chosen": -3.0365397930145264, + "logits/rejected": -3.063159942626953, + "logps/chosen": -0.7677688598632812, + "logps/rejected": -0.8747943043708801, + "loss": 0.8394, + "odds_ratio_loss": 0.7163954973220825, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.0767768919467926, + "rewards/margins": 0.010702535510063171, + "rewards/rejected": -0.08747942745685577, + "sft_loss": 0.7677688598632812, + "step": 440 + }, + { + "epoch": 0.7274196807435845, + "grad_norm": 4.238840103149414, + "learning_rate": 4.307961365410118e-06, + "logits/chosen": -3.031554698944092, + "logits/rejected": -3.0537819862365723, + "logps/chosen": -0.7852329015731812, + "logps/rejected": -0.9492766261100769, + "loss": 0.8479, + "odds_ratio_loss": 0.6268799901008606, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07852329313755035, + "rewards/margins": 0.01640438288450241, + "rewards/rejected": -0.09492767602205276, + "sft_loss": 0.7852329015731812, + "step": 450 + }, + { + "epoch": 0.7435845625378864, + "grad_norm": 3.3165500164031982, + "learning_rate": 4.278443835358854e-06, + "logits/chosen": -3.0518264770507812, + "logits/rejected": -3.045757293701172, + "logps/chosen": -0.7719421982765198, + "logps/rejected": -1.0236573219299316, + "loss": 0.831, + "odds_ratio_loss": 0.5902360081672668, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0771942138671875, + "rewards/margins": 0.025171533226966858, + "rewards/rejected": -0.10236574709415436, + "sft_loss": 0.7719421982765198, + "step": 460 + }, + { + "epoch": 0.7597494443321883, + "grad_norm": 2.330195426940918, + "learning_rate": 4.248415615843523e-06, + "logits/chosen": -3.079732656478882, + "logits/rejected": -3.0858983993530273, + "logps/chosen": -0.7835872769355774, + "logps/rejected": -0.9195672273635864, + "loss": 0.8504, + "odds_ratio_loss": 0.6679321527481079, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0783587247133255, + "rewards/margins": 0.01359798014163971, + "rewards/rejected": -0.0919567197561264, + "sft_loss": 0.7835872769355774, + "step": 470 + }, + { + "epoch": 0.7759143261264903, + "grad_norm": 6.524634838104248, + "learning_rate": 4.217885329624666e-06, + "logits/chosen": -3.0687060356140137, + "logits/rejected": -3.066584348678589, + "logps/chosen": -0.7517032623291016, + "logps/rejected": -0.9625965356826782, + "loss": 0.8127, + "odds_ratio_loss": 0.6098276376724243, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07517032325267792, + "rewards/margins": 0.021089335903525352, + "rewards/rejected": -0.09625966101884842, + "sft_loss": 0.7517032623291016, + "step": 480 + }, + { + "epoch": 0.7920792079207921, + "grad_norm": 3.629946231842041, + "learning_rate": 4.186861743633911e-06, + "logits/chosen": -3.0519471168518066, + "logits/rejected": -3.0878560543060303, + "logps/chosen": -0.7691044807434082, + "logps/rejected": -1.0053441524505615, + "loss": 0.8348, + "odds_ratio_loss": 0.6568228006362915, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07691045105457306, + "rewards/margins": 0.023623958230018616, + "rewards/rejected": -0.10053440183401108, + "sft_loss": 0.7691044807434082, + "step": 490 + }, + { + "epoch": 0.808244089715094, + "grad_norm": 1.9105418920516968, + "learning_rate": 4.155353766456497e-06, + "logits/chosen": -3.1221230030059814, + "logits/rejected": -3.1099045276641846, + "logps/chosen": -0.8151519894599915, + "logps/rejected": -0.9320052862167358, + "loss": 0.8803, + "odds_ratio_loss": 0.6510958671569824, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.08151519298553467, + "rewards/margins": 0.011685335077345371, + "rewards/rejected": -0.09320052713155746, + "sft_loss": 0.8151519894599915, + "step": 500 + }, + { + "epoch": 0.808244089715094, + "eval_logits/chosen": -3.052025556564331, + "eval_logits/rejected": -3.0746355056762695, + "eval_logps/chosen": -0.7961810827255249, + "eval_logps/rejected": -0.9834145307540894, + "eval_loss": 0.8619003891944885, + "eval_odds_ratio_loss": 0.6571925282478333, + "eval_rewards/accuracies": 0.5654545426368713, + "eval_rewards/chosen": -0.07961811125278473, + "eval_rewards/margins": 0.018723346292972565, + "eval_rewards/rejected": -0.0983414575457573, + "eval_runtime": 369.3504, + "eval_samples_per_second": 2.978, + "eval_sft_loss": 0.7961810827255249, + "eval_steps_per_second": 1.489, + "step": 500 + }, + { + "epoch": 0.8244089715093958, + "grad_norm": 2.1485707759857178, + "learning_rate": 4.123370445773134e-06, + "logits/chosen": -3.0945208072662354, + "logits/rejected": -3.1061959266662598, + "logps/chosen": -0.788953423500061, + "logps/rejected": -0.8694869875907898, + "loss": 0.8606, + "odds_ratio_loss": 0.7168292999267578, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07889534533023834, + "rewards/margins": 0.008053350262343884, + "rewards/rejected": -0.0869487002491951, + "sft_loss": 0.788953423500061, + "step": 510 + }, + { + "epoch": 0.8405738533036977, + "grad_norm": 5.462285041809082, + "learning_rate": 4.090920965761906e-06, + "logits/chosen": -3.0212631225585938, + "logits/rejected": -3.031066417694092, + "logps/chosen": -0.8095367550849915, + "logps/rejected": -0.968643844127655, + "loss": 0.8752, + "odds_ratio_loss": 0.656153678894043, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0809536725282669, + "rewards/margins": 0.015910711139440536, + "rewards/rejected": -0.09686438739299774, + "sft_loss": 0.8095367550849915, + "step": 520 + }, + { + "epoch": 0.8567387350979996, + "grad_norm": 4.947661399841309, + "learning_rate": 4.058014644460991e-06, + "logits/chosen": -3.0334737300872803, + "logits/rejected": -3.049567937850952, + "logps/chosen": -0.756322979927063, + "logps/rejected": -0.8999664187431335, + "loss": 0.8179, + "odds_ratio_loss": 0.6152733564376831, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07563230395317078, + "rewards/margins": 0.014364344999194145, + "rewards/rejected": -0.08999665081501007, + "sft_loss": 0.756322979927063, + "step": 530 + }, + { + "epoch": 0.8729036168923014, + "grad_norm": 2.2244179248809814, + "learning_rate": 4.024660931092939e-06, + "logits/chosen": -3.0092921257019043, + "logits/rejected": -3.0213623046875, + "logps/chosen": -0.7882963418960571, + "logps/rejected": -0.9846014976501465, + "loss": 0.8497, + "odds_ratio_loss": 0.6144701838493347, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07882963120937347, + "rewards/margins": 0.019630510360002518, + "rewards/rejected": -0.0984601378440857, + "sft_loss": 0.7882963418960571, + "step": 540 + }, + { + "epoch": 0.8890684986866033, + "grad_norm": 1.6228642463684082, + "learning_rate": 3.990869403351272e-06, + "logits/chosen": -3.051035165786743, + "logits/rejected": -3.073690176010132, + "logps/chosen": -0.794242262840271, + "logps/rejected": -1.0482288599014282, + "loss": 0.8507, + "odds_ratio_loss": 0.564966082572937, + "rewards/accuracies": 0.675000011920929, + "rewards/chosen": -0.07942423224449158, + "rewards/margins": 0.025398656725883484, + "rewards/rejected": -0.10482288897037506, + "sft_loss": 0.794242262840271, + "step": 550 + }, + { + "epoch": 0.9052333804809052, + "grad_norm": 3.156888008117676, + "learning_rate": 3.956649764650206e-06, + "logits/chosen": -3.1107125282287598, + "logits/rejected": -3.121273994445801, + "logps/chosen": -0.7970541715621948, + "logps/rejected": -0.9795141220092773, + "loss": 0.864, + "odds_ratio_loss": 0.6697722673416138, + "rewards/accuracies": 0.4625000059604645, + "rewards/chosen": -0.07970540225505829, + "rewards/margins": 0.018245995044708252, + "rewards/rejected": -0.09795141220092773, + "sft_loss": 0.7970541715621948, + "step": 560 + }, + { + "epoch": 0.9213982622752072, + "grad_norm": 4.841604232788086, + "learning_rate": 3.92201184133826e-06, + "logits/chosen": -3.082477331161499, + "logits/rejected": -3.0972495079040527, + "logps/chosen": -0.7547809481620789, + "logps/rejected": -0.9688172340393066, + "loss": 0.8159, + "odds_ratio_loss": 0.6109867095947266, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.07547809928655624, + "rewards/margins": 0.021403620019555092, + "rewards/rejected": -0.09688171744346619, + "sft_loss": 0.7547809481620789, + "step": 570 + }, + { + "epoch": 0.937563144069509, + "grad_norm": 2.3844194412231445, + "learning_rate": 3.886965579876572e-06, + "logits/chosen": -3.114271640777588, + "logits/rejected": -3.1176934242248535, + "logps/chosen": -0.7549653053283691, + "logps/rejected": -0.858431339263916, + "loss": 0.8214, + "odds_ratio_loss": 0.664365828037262, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.07549652457237244, + "rewards/margins": 0.010346608236432076, + "rewards/rejected": -0.08584313094615936, + "sft_loss": 0.7549653053283691, + "step": 580 + }, + { + "epoch": 0.9537280258638109, + "grad_norm": 1.683606743812561, + "learning_rate": 3.851521043982716e-06, + "logits/chosen": -3.1139140129089355, + "logits/rejected": -3.134669780731201, + "logps/chosen": -0.793258011341095, + "logps/rejected": -0.9314570426940918, + "loss": 0.8603, + "odds_ratio_loss": 0.670341432094574, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.07932581007480621, + "rewards/margins": 0.013819903135299683, + "rewards/rejected": -0.0931456983089447, + "sft_loss": 0.793258011341095, + "step": 590 + }, + { + "epoch": 0.9698929076581128, + "grad_norm": 6.090215682983398, + "learning_rate": 3.81568841174086e-06, + "logits/chosen": -3.0772290229797363, + "logits/rejected": -3.1182470321655273, + "logps/chosen": -0.7526463270187378, + "logps/rejected": -0.9828107953071594, + "loss": 0.8172, + "odds_ratio_loss": 0.6451513171195984, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.07526463270187378, + "rewards/margins": 0.023016449064016342, + "rewards/rejected": -0.09828107804059982, + "sft_loss": 0.7526463270187378, + "step": 600 + }, + { + "epoch": 0.9860577894524146, + "grad_norm": 1.9422472715377808, + "learning_rate": 3.7794779726790664e-06, + "logits/chosen": -3.064382314682007, + "logits/rejected": -3.09804105758667, + "logps/chosen": -0.736395537853241, + "logps/rejected": -0.9441590309143066, + "loss": 0.798, + "odds_ratio_loss": 0.6159018278121948, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07363955676555634, + "rewards/margins": 0.02077634632587433, + "rewards/rejected": -0.09441590309143066, + "sft_loss": 0.736395537853241, + "step": 610 + }, + { + "epoch": 1.0022226712467166, + "grad_norm": 3.0195484161376953, + "learning_rate": 3.7429001248146096e-06, + "logits/chosen": -3.0517828464508057, + "logits/rejected": -3.0764060020446777, + "logps/chosen": -0.7535146474838257, + "logps/rejected": -0.9733519554138184, + "loss": 0.8118, + "odds_ratio_loss": 0.582946240901947, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07535146921873093, + "rewards/margins": 0.02198372781276703, + "rewards/rejected": -0.09733519703149796, + "sft_loss": 0.7535146474838257, + "step": 620 + }, + { + "epoch": 1.0183875530410185, + "grad_norm": 1.4307211637496948, + "learning_rate": 3.7059653716681227e-06, + "logits/chosen": -3.0821685791015625, + "logits/rejected": -3.0729098320007324, + "logps/chosen": -0.8158019185066223, + "logps/rejected": -1.0031102895736694, + "loss": 0.8821, + "odds_ratio_loss": 0.6627525091171265, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.08158019185066223, + "rewards/margins": 0.01873084530234337, + "rewards/rejected": -0.1003110408782959, + "sft_loss": 0.8158019185066223, + "step": 630 + }, + { + "epoch": 1.0345524348353203, + "grad_norm": 1.925041675567627, + "learning_rate": 3.668684319247463e-06, + "logits/chosen": -3.11029052734375, + "logits/rejected": -3.133237600326538, + "logps/chosen": -0.7127649188041687, + "logps/rejected": -0.9635750651359558, + "loss": 0.7692, + "odds_ratio_loss": 0.5642341375350952, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07127650082111359, + "rewards/margins": 0.02508101798593998, + "rewards/rejected": -0.09635750949382782, + "sft_loss": 0.7127649188041687, + "step": 640 + }, + { + "epoch": 1.0507173166296222, + "grad_norm": 1.3039898872375488, + "learning_rate": 3.6310676730021373e-06, + "logits/chosen": -3.1486330032348633, + "logits/rejected": -3.1563305854797363, + "logps/chosen": -0.7767339944839478, + "logps/rejected": -0.9257136583328247, + "loss": 0.8402, + "odds_ratio_loss": 0.6346315145492554, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07767340540885925, + "rewards/margins": 0.014897963032126427, + "rewards/rejected": -0.09257137030363083, + "sft_loss": 0.7767339944839478, + "step": 650 + }, + { + "epoch": 1.066882198423924, + "grad_norm": 8.102341651916504, + "learning_rate": 3.593126234749178e-06, + "logits/chosen": -3.1005399227142334, + "logits/rejected": -3.1532058715820312, + "logps/chosen": -0.8182880282402039, + "logps/rejected": -0.9324063062667847, + "loss": 0.8843, + "odds_ratio_loss": 0.6604392528533936, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.08182881772518158, + "rewards/margins": 0.011411817744374275, + "rewards/rejected": -0.0932406336069107, + "sft_loss": 0.8182880282402039, + "step": 660 + }, + { + "epoch": 1.083047080218226, + "grad_norm": 2.537015438079834, + "learning_rate": 3.554870899571343e-06, + "logits/chosen": -3.116610050201416, + "logits/rejected": -3.1441800594329834, + "logps/chosen": -0.7676142454147339, + "logps/rejected": -0.9440106153488159, + "loss": 0.8313, + "odds_ratio_loss": 0.6365249156951904, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07676141709089279, + "rewards/margins": 0.0176396407186985, + "rewards/rejected": -0.09440106153488159, + "sft_loss": 0.7676142454147339, + "step": 670 + }, + { + "epoch": 1.0992119620125278, + "grad_norm": 6.732462406158447, + "learning_rate": 3.5163126526885373e-06, + "logits/chosen": -3.077462673187256, + "logits/rejected": -3.1246635913848877, + "logps/chosen": -0.7416545152664185, + "logps/rejected": -0.9575145840644836, + "loss": 0.8024, + "odds_ratio_loss": 0.607850193977356, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0741654485464096, + "rewards/margins": 0.02158600650727749, + "rewards/rejected": -0.09575144946575165, + "sft_loss": 0.7416545152664185, + "step": 680 + }, + { + "epoch": 1.1153768438068297, + "grad_norm": 1.995318055152893, + "learning_rate": 3.4774625663033484e-06, + "logits/chosen": -3.088719367980957, + "logits/rejected": -3.1156630516052246, + "logps/chosen": -0.7460827231407166, + "logps/rejected": -0.913016140460968, + "loss": 0.8092, + "odds_ratio_loss": 0.6309585571289062, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07460827380418777, + "rewards/margins": 0.01669333688914776, + "rewards/rejected": -0.09130160510540009, + "sft_loss": 0.7460827231407166, + "step": 690 + }, + { + "epoch": 1.1315417256011315, + "grad_norm": 1.8286596536636353, + "learning_rate": 3.4383317964216067e-06, + "logits/chosen": -3.0861454010009766, + "logits/rejected": -3.1318280696868896, + "logps/chosen": -0.7198506593704224, + "logps/rejected": -0.8674876093864441, + "loss": 0.7881, + "odds_ratio_loss": 0.6827356219291687, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.07198506593704224, + "rewards/margins": 0.014763685874640942, + "rewards/rejected": -0.08674876391887665, + "sft_loss": 0.7198506593704224, + "step": 700 + }, + { + "epoch": 1.1477066073954334, + "grad_norm": 3.1164824962615967, + "learning_rate": 3.398931579648877e-06, + "logits/chosen": -3.1039557456970215, + "logits/rejected": -3.141571521759033, + "logps/chosen": -0.7915211915969849, + "logps/rejected": -1.108933687210083, + "loss": 0.8534, + "odds_ratio_loss": 0.6187322735786438, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07915211468935013, + "rewards/margins": 0.031741250306367874, + "rewards/rejected": -0.1108933687210083, + "sft_loss": 0.7915211915969849, + "step": 710 + }, + { + "epoch": 1.1638714891897353, + "grad_norm": 1.4664254188537598, + "learning_rate": 3.359273229963813e-06, + "logits/chosen": -3.1003873348236084, + "logits/rejected": -3.103529214859009, + "logps/chosen": -0.730610191822052, + "logps/rejected": -0.8717561960220337, + "loss": 0.7957, + "odds_ratio_loss": 0.6507007479667664, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.0730610117316246, + "rewards/margins": 0.014114594087004662, + "rewards/rejected": -0.08717561513185501, + "sft_loss": 0.730610191822052, + "step": 720 + }, + { + "epoch": 1.1800363709840371, + "grad_norm": 1.3891639709472656, + "learning_rate": 3.319368135469285e-06, + "logits/chosen": -3.1091885566711426, + "logits/rejected": -3.150235652923584, + "logps/chosen": -0.7716542482376099, + "logps/rejected": -1.0244488716125488, + "loss": 0.837, + "odds_ratio_loss": 0.6535542011260986, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07716542482376099, + "rewards/margins": 0.025279458612203598, + "rewards/rejected": -0.10244487226009369, + "sft_loss": 0.7716542482376099, + "step": 730 + }, + { + "epoch": 1.196201252778339, + "grad_norm": 3.498404026031494, + "learning_rate": 3.279227755122228e-06, + "logits/chosen": -3.0913896560668945, + "logits/rejected": -3.114501476287842, + "logps/chosen": -0.7233365178108215, + "logps/rejected": -1.0984933376312256, + "loss": 0.778, + "odds_ratio_loss": 0.5463576912879944, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.07233365625143051, + "rewards/margins": 0.037515684962272644, + "rewards/rejected": -0.10984933376312256, + "sft_loss": 0.7233365178108215, + "step": 740 + }, + { + "epoch": 1.2123661345726409, + "grad_norm": 1.6691545248031616, + "learning_rate": 3.2388636154431417e-06, + "logits/chosen": -3.1425840854644775, + "logits/rejected": -3.175088405609131, + "logps/chosen": -0.7860497236251831, + "logps/rejected": -1.0689184665679932, + "loss": 0.846, + "odds_ratio_loss": 0.5996078252792358, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.07860498130321503, + "rewards/margins": 0.028286874294281006, + "rewards/rejected": -0.10689185559749603, + "sft_loss": 0.7860497236251831, + "step": 750 + }, + { + "epoch": 1.2285310163669427, + "grad_norm": 1.6291383504867554, + "learning_rate": 3.198287307206192e-06, + "logits/chosen": -3.0734639167785645, + "logits/rejected": -3.0943312644958496, + "logps/chosen": -0.7356687784194946, + "logps/rejected": -0.9488394856452942, + "loss": 0.7952, + "odds_ratio_loss": 0.5951117277145386, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07356687635183334, + "rewards/margins": 0.021317074075341225, + "rewards/rejected": -0.09488394856452942, + "sft_loss": 0.7356687784194946, + "step": 760 + }, + { + "epoch": 1.2446958981612446, + "grad_norm": 1.6178934574127197, + "learning_rate": 3.157510482110856e-06, + "logits/chosen": -3.143188238143921, + "logits/rejected": -3.1431174278259277, + "logps/chosen": -0.7597110271453857, + "logps/rejected": -0.9578113555908203, + "loss": 0.8251, + "odds_ratio_loss": 0.6543140411376953, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0759711042046547, + "rewards/margins": 0.019810039550065994, + "rewards/rejected": -0.09578114002943039, + "sft_loss": 0.7597110271453857, + "step": 770 + }, + { + "epoch": 1.2608607799555465, + "grad_norm": 1.6571840047836304, + "learning_rate": 3.116544849436077e-06, + "logits/chosen": -3.084550142288208, + "logits/rejected": -3.089481830596924, + "logps/chosen": -0.804486095905304, + "logps/rejected": -1.1236674785614014, + "loss": 0.8661, + "odds_ratio_loss": 0.6160328388214111, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.08044860512018204, + "rewards/margins": 0.03191814199090004, + "rewards/rejected": -0.11236675083637238, + "sft_loss": 0.804486095905304, + "step": 780 + }, + { + "epoch": 1.2770256617498483, + "grad_norm": 2.3182828426361084, + "learning_rate": 3.0754021726778848e-06, + "logits/chosen": -3.086887836456299, + "logits/rejected": -3.093517780303955, + "logps/chosen": -0.7223183512687683, + "logps/rejected": -1.0067367553710938, + "loss": 0.7785, + "odds_ratio_loss": 0.5618979334831238, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.07223184406757355, + "rewards/margins": 0.028441840782761574, + "rewards/rejected": -0.10067367553710938, + "sft_loss": 0.7223183512687683, + "step": 790 + }, + { + "epoch": 1.2931905435441502, + "grad_norm": 1.3017044067382812, + "learning_rate": 3.0340942661714463e-06, + "logits/chosen": -3.1351680755615234, + "logits/rejected": -3.134371280670166, + "logps/chosen": -0.796164333820343, + "logps/rejected": -0.9728119969367981, + "loss": 0.8595, + "odds_ratio_loss": 0.6329259276390076, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.07961643487215042, + "rewards/margins": 0.017664765939116478, + "rewards/rejected": -0.09728120267391205, + "sft_loss": 0.796164333820343, + "step": 800 + }, + { + "epoch": 1.3093554253384523, + "grad_norm": 2.427229166030884, + "learning_rate": 2.992632991698512e-06, + "logits/chosen": -3.082369327545166, + "logits/rejected": -3.110403060913086, + "logps/chosen": -0.7753532528877258, + "logps/rejected": -0.9790387153625488, + "loss": 0.8377, + "odds_ratio_loss": 0.6236809492111206, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07753531634807587, + "rewards/margins": 0.0203685499727726, + "rewards/rejected": -0.09790387749671936, + "sft_loss": 0.7753532528877258, + "step": 810 + }, + { + "epoch": 1.3255203071327541, + "grad_norm": 1.9767731428146362, + "learning_rate": 2.9510302550812537e-06, + "logits/chosen": -3.089890480041504, + "logits/rejected": -3.138611078262329, + "logps/chosen": -0.6810489892959595, + "logps/rejected": -1.002256155014038, + "loss": 0.7382, + "odds_ratio_loss": 0.571026623249054, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.06810488551855087, + "rewards/margins": 0.032120734453201294, + "rewards/rejected": -0.10022562742233276, + "sft_loss": 0.6810489892959595, + "step": 820 + }, + { + "epoch": 1.341685188927056, + "grad_norm": 4.014182090759277, + "learning_rate": 2.9092980027634325e-06, + "logits/chosen": -3.095803737640381, + "logits/rejected": -3.1307053565979004, + "logps/chosen": -0.7113646268844604, + "logps/rejected": -0.9739853143692017, + "loss": 0.773, + "odds_ratio_loss": 0.6160944700241089, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.0711364597082138, + "rewards/margins": 0.02626206912100315, + "rewards/rejected": -0.0973985344171524, + "sft_loss": 0.7113646268844604, + "step": 830 + }, + { + "epoch": 1.3578500707213579, + "grad_norm": 2.0923309326171875, + "learning_rate": 2.867448218379927e-06, + "logits/chosen": -3.092233419418335, + "logits/rejected": -3.1379733085632324, + "logps/chosen": -0.8104713559150696, + "logps/rejected": -0.9551480412483215, + "loss": 0.8783, + "odds_ratio_loss": 0.6786799430847168, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.08104713261127472, + "rewards/margins": 0.014467673376202583, + "rewards/rejected": -0.09551481157541275, + "sft_loss": 0.8104713559150696, + "step": 840 + }, + { + "epoch": 1.3740149525156597, + "grad_norm": 5.772988319396973, + "learning_rate": 2.825492919315559e-06, + "logits/chosen": -3.1081454753875732, + "logits/rejected": -3.1527466773986816, + "logps/chosen": -0.8419367671012878, + "logps/rejected": -0.9567692875862122, + "loss": 0.9094, + "odds_ratio_loss": 0.6746194958686829, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.08419367671012878, + "rewards/margins": 0.011483250185847282, + "rewards/rejected": -0.09567694365978241, + "sft_loss": 0.8419367671012878, + "step": 850 + }, + { + "epoch": 1.3901798343099616, + "grad_norm": 3.8201489448547363, + "learning_rate": 2.7834441532542482e-06, + "logits/chosen": -3.1463775634765625, + "logits/rejected": -3.1600661277770996, + "logps/chosen": -0.7513538599014282, + "logps/rejected": -0.964932918548584, + "loss": 0.8093, + "odds_ratio_loss": 0.579144299030304, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.07513538748025894, + "rewards/margins": 0.021357912570238113, + "rewards/rejected": -0.09649328887462616, + "sft_loss": 0.7513538599014282, + "step": 860 + }, + { + "epoch": 1.4063447161042635, + "grad_norm": 2.0883562564849854, + "learning_rate": 2.74131399471945e-06, + "logits/chosen": -3.1237385272979736, + "logits/rejected": -3.1525537967681885, + "logps/chosen": -0.7669543027877808, + "logps/rejected": -0.9222270250320435, + "loss": 0.8335, + "odds_ratio_loss": 0.6657834053039551, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07669542729854584, + "rewards/margins": 0.0155272725969553, + "rewards/rejected": -0.09222270548343658, + "sft_loss": 0.7669543027877808, + "step": 870 + }, + { + "epoch": 1.4225095978985653, + "grad_norm": 2.7775161266326904, + "learning_rate": 2.6991145416068947e-06, + "logits/chosen": -3.078185796737671, + "logits/rejected": -3.1287853717803955, + "logps/chosen": -0.7748882174491882, + "logps/rejected": -0.8965023159980774, + "loss": 0.8403, + "odds_ratio_loss": 0.6543062925338745, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07748880982398987, + "rewards/margins": 0.012161416001617908, + "rewards/rejected": -0.0896502360701561, + "sft_loss": 0.7748882174491882, + "step": 880 + }, + { + "epoch": 1.4386744796928672, + "grad_norm": 1.0541467666625977, + "learning_rate": 2.6568579117106143e-06, + "logits/chosen": -3.077782392501831, + "logits/rejected": -3.0923101902008057, + "logps/chosen": -0.7235719561576843, + "logps/rejected": -0.9604678153991699, + "loss": 0.7832, + "odds_ratio_loss": 0.5962098240852356, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.07235720008611679, + "rewards/margins": 0.02368958666920662, + "rewards/rejected": -0.09604678303003311, + "sft_loss": 0.7235719561576843, + "step": 890 + }, + { + "epoch": 1.454839361487169, + "grad_norm": 1.1944150924682617, + "learning_rate": 2.6145562392432544e-06, + "logits/chosen": -3.139462947845459, + "logits/rejected": -3.1433637142181396, + "logps/chosen": -0.7596802115440369, + "logps/rejected": -0.9312537312507629, + "loss": 0.8259, + "odds_ratio_loss": 0.6623841524124146, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07596802711486816, + "rewards/margins": 0.01715734973549843, + "rewards/rejected": -0.09312538057565689, + "sft_loss": 0.7596802115440369, + "step": 900 + }, + { + "epoch": 1.471004243281471, + "grad_norm": 3.4694266319274902, + "learning_rate": 2.5722216713516682e-06, + "logits/chosen": -3.0813755989074707, + "logits/rejected": -3.128629207611084, + "logps/chosen": -0.7107754945755005, + "logps/rejected": -0.9305357933044434, + "loss": 0.7723, + "odds_ratio_loss": 0.6156551837921143, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.07107754051685333, + "rewards/margins": 0.021976038813591003, + "rewards/rejected": -0.09305357933044434, + "sft_loss": 0.7107754945755005, + "step": 910 + }, + { + "epoch": 1.4871691250757728, + "grad_norm": 4.237662315368652, + "learning_rate": 2.5298663646288064e-06, + "logits/chosen": -3.1253597736358643, + "logits/rejected": -3.1492881774902344, + "logps/chosen": -0.7484380602836609, + "logps/rejected": -0.9847918748855591, + "loss": 0.8097, + "odds_ratio_loss": 0.6130812168121338, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.07484380900859833, + "rewards/margins": 0.023635383695364, + "rewards/rejected": -0.09847918897867203, + "sft_loss": 0.7484380602836609, + "step": 920 + }, + { + "epoch": 1.503334006870075, + "grad_norm": 2.1491661071777344, + "learning_rate": 2.487502481622879e-06, + "logits/chosen": -3.0927679538726807, + "logits/rejected": -3.110395908355713, + "logps/chosen": -0.7894285321235657, + "logps/rejected": -0.9456027150154114, + "loss": 0.8507, + "odds_ratio_loss": 0.6129187345504761, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.07894285023212433, + "rewards/margins": 0.01561742089688778, + "rewards/rejected": -0.09456028044223785, + "sft_loss": 0.7894285321235657, + "step": 930 + }, + { + "epoch": 1.5194988886643768, + "grad_norm": 1.948228359222412, + "learning_rate": 2.4451421873448253e-06, + "logits/chosen": -3.0993218421936035, + "logits/rejected": -3.1382908821105957, + "logps/chosen": -0.7644907832145691, + "logps/rejected": -0.9289671778678894, + "loss": 0.8309, + "odds_ratio_loss": 0.6645576357841492, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07644907385110855, + "rewards/margins": 0.016447637230157852, + "rewards/rejected": -0.0928967148065567, + "sft_loss": 0.7644907832145691, + "step": 940 + }, + { + "epoch": 1.5356637704586786, + "grad_norm": 5.180652141571045, + "learning_rate": 2.40279764577506e-06, + "logits/chosen": -3.1537530422210693, + "logits/rejected": -3.1876587867736816, + "logps/chosen": -0.7837322354316711, + "logps/rejected": -0.9118620157241821, + "loss": 0.8488, + "odds_ratio_loss": 0.6507243514060974, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07837323099374771, + "rewards/margins": 0.012812974862754345, + "rewards/rejected": -0.09118620306253433, + "sft_loss": 0.7837322354316711, + "step": 950 + }, + { + "epoch": 1.5518286522529805, + "grad_norm": 3.1721439361572266, + "learning_rate": 2.3604810163705242e-06, + "logits/chosen": -3.117772102355957, + "logits/rejected": -3.1582953929901123, + "logps/chosen": -0.6980777978897095, + "logps/rejected": -0.9280182719230652, + "loss": 0.756, + "odds_ratio_loss": 0.5792473554611206, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.06980777531862259, + "rewards/margins": 0.02299405261874199, + "rewards/rejected": -0.09280182421207428, + "sft_loss": 0.6980777978897095, + "step": 960 + }, + { + "epoch": 1.5679935340472824, + "grad_norm": 2.930034637451172, + "learning_rate": 2.3182044505730364e-06, + "logits/chosen": -3.102292537689209, + "logits/rejected": -3.1203739643096924, + "logps/chosen": -0.6645774245262146, + "logps/rejected": -0.8846859931945801, + "loss": 0.7224, + "odds_ratio_loss": 0.5782453417778015, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.06645774096250534, + "rewards/margins": 0.02201084978878498, + "rewards/rejected": -0.08846859633922577, + "sft_loss": 0.6645774245262146, + "step": 970 + }, + { + "epoch": 1.5841584158415842, + "grad_norm": 4.4542083740234375, + "learning_rate": 2.275980088319941e-06, + "logits/chosen": -3.1215856075286865, + "logits/rejected": -3.1170594692230225, + "logps/chosen": -0.718813955783844, + "logps/rejected": -0.8550910949707031, + "loss": 0.7869, + "odds_ratio_loss": 0.6806875467300415, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07188138365745544, + "rewards/margins": 0.013627724722027779, + "rewards/rejected": -0.08550911396741867, + "sft_loss": 0.718813955783844, + "step": 980 + }, + { + "epoch": 1.600323297635886, + "grad_norm": 3.0541982650756836, + "learning_rate": 2.2338200545580577e-06, + "logits/chosen": -3.0836963653564453, + "logits/rejected": -3.1206369400024414, + "logps/chosen": -0.710333526134491, + "logps/rejected": -0.9507579803466797, + "loss": 0.7727, + "odds_ratio_loss": 0.623470664024353, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.07103335857391357, + "rewards/margins": 0.02404244802892208, + "rewards/rejected": -0.09507580101490021, + "sft_loss": 0.710333526134491, + "step": 990 + }, + { + "epoch": 1.616488179430188, + "grad_norm": 1.4722503423690796, + "learning_rate": 2.191736455761947e-06, + "logits/chosen": -3.127290964126587, + "logits/rejected": -3.1501638889312744, + "logps/chosen": -0.6768070459365845, + "logps/rejected": -0.8489478826522827, + "loss": 0.7341, + "odds_ratio_loss": 0.5731813311576843, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.06768069416284561, + "rewards/margins": 0.017214089632034302, + "rewards/rejected": -0.08489479124546051, + "sft_loss": 0.6768070459365845, + "step": 1000 + }, + { + "epoch": 1.616488179430188, + "eval_logits/chosen": -3.095952272415161, + "eval_logits/rejected": -3.1193559169769287, + "eval_logps/chosen": -0.7794692516326904, + "eval_logps/rejected": -0.9804279208183289, + "eval_loss": 0.8449718356132507, + "eval_odds_ratio_loss": 0.655025839805603, + "eval_rewards/accuracies": 0.5672727227210999, + "eval_rewards/chosen": -0.07794692367315292, + "eval_rewards/margins": 0.020095879212021828, + "eval_rewards/rejected": -0.098042793571949, + "eval_runtime": 367.195, + "eval_samples_per_second": 2.996, + "eval_sft_loss": 0.7794692516326904, + "eval_steps_per_second": 1.498, + "step": 1000 + }, + { + "epoch": 1.6326530612244898, + "grad_norm": 2.0673484802246094, + "learning_rate": 2.1497413764574673e-06, + "logits/chosen": -3.119420051574707, + "logits/rejected": -3.1228458881378174, + "logps/chosen": -0.7603039145469666, + "logps/rejected": -1.017571210861206, + "loss": 0.8184, + "odds_ratio_loss": 0.5807704329490662, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.07603039592504501, + "rewards/margins": 0.025726735591888428, + "rewards/rejected": -0.10175712406635284, + "sft_loss": 0.7603039145469666, + "step": 1010 + }, + { + "epoch": 1.6488179430187917, + "grad_norm": 2.212749719619751, + "learning_rate": 2.1078468757516395e-06, + "logits/chosen": -3.0987765789031982, + "logits/rejected": -3.1337482929229736, + "logps/chosen": -0.7031766772270203, + "logps/rejected": -0.9084986448287964, + "loss": 0.762, + "odds_ratio_loss": 0.5880716443061829, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.07031767070293427, + "rewards/margins": 0.020532192662358284, + "rewards/rejected": -0.0908498615026474, + "sft_loss": 0.7031766772270203, + "step": 1020 + }, + { + "epoch": 1.6649828248130936, + "grad_norm": 1.7040510177612305, + "learning_rate": 2.0660649838698145e-06, + "logits/chosen": -3.1251206398010254, + "logits/rejected": -3.140993595123291, + "logps/chosen": -0.7490108013153076, + "logps/rejected": -0.9883764982223511, + "loss": 0.8102, + "odds_ratio_loss": 0.6117704510688782, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07490108907222748, + "rewards/margins": 0.023936569690704346, + "rewards/rejected": -0.09883765131235123, + "sft_loss": 0.7490108013153076, + "step": 1030 + }, + { + "epoch": 1.6811477066073954, + "grad_norm": 2.5308990478515625, + "learning_rate": 2.0244076987011284e-06, + "logits/chosen": -3.156313419342041, + "logits/rejected": -3.149254560470581, + "logps/chosen": -0.7889419794082642, + "logps/rejected": -0.9795150756835938, + "loss": 0.8507, + "odds_ratio_loss": 0.6177965998649597, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07889419794082642, + "rewards/margins": 0.019057301804423332, + "rewards/rejected": -0.0979515090584755, + "sft_loss": 0.7889419794082642, + "step": 1040 + }, + { + "epoch": 1.6973125884016973, + "grad_norm": 1.713526725769043, + "learning_rate": 1.982886982353251e-06, + "logits/chosen": -3.1534323692321777, + "logits/rejected": -3.143846035003662, + "logps/chosen": -0.7670282125473022, + "logps/rejected": -1.0469207763671875, + "loss": 0.8306, + "odds_ratio_loss": 0.635661244392395, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.07670283317565918, + "rewards/margins": 0.027989249676465988, + "rewards/rejected": -0.10469207912683487, + "sft_loss": 0.7670282125473022, + "step": 1050 + }, + { + "epoch": 1.7134774701959992, + "grad_norm": 2.0344197750091553, + "learning_rate": 1.941514757717392e-06, + "logits/chosen": -3.1048598289489746, + "logits/rejected": -3.1271190643310547, + "logps/chosen": -0.7640289068222046, + "logps/rejected": -1.0232574939727783, + "loss": 0.8194, + "odds_ratio_loss": 0.5533130764961243, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.07640289515256882, + "rewards/margins": 0.025922849774360657, + "rewards/rejected": -0.10232573747634888, + "sft_loss": 0.7640289068222046, + "step": 1060 + }, + { + "epoch": 1.729642351990301, + "grad_norm": 4.370427131652832, + "learning_rate": 1.9003029050445953e-06, + "logits/chosen": -3.0902037620544434, + "logits/rejected": -3.1260688304901123, + "logps/chosen": -0.7717964053153992, + "logps/rejected": -0.9319893717765808, + "loss": 0.8345, + "odds_ratio_loss": 0.6267774701118469, + "rewards/accuracies": 0.5562499761581421, + "rewards/chosen": -0.07717963308095932, + "rewards/margins": 0.016019299626350403, + "rewards/rejected": -0.09319894015789032, + "sft_loss": 0.7717964053153992, + "step": 1070 + }, + { + "epoch": 1.745807233784603, + "grad_norm": 2.206437110900879, + "learning_rate": 1.8592632585342523e-06, + "logits/chosen": -3.108377456665039, + "logits/rejected": -3.137510299682617, + "logps/chosen": -0.730410635471344, + "logps/rejected": -0.9709598422050476, + "loss": 0.7903, + "odds_ratio_loss": 0.599312424659729, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07304105907678604, + "rewards/margins": 0.02405492775142193, + "rewards/rejected": -0.09709598869085312, + "sft_loss": 0.730410635471344, + "step": 1080 + }, + { + "epoch": 1.7619721155789048, + "grad_norm": 3.54589581489563, + "learning_rate": 1.8184076029358527e-06, + "logits/chosen": -3.0873734951019287, + "logits/rejected": -3.0916082859039307, + "logps/chosen": -0.7298410534858704, + "logps/rejected": -0.8651703000068665, + "loss": 0.797, + "odds_ratio_loss": 0.6711704134941101, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07298411428928375, + "rewards/margins": 0.01353292353451252, + "rewards/rejected": -0.08651703596115112, + "sft_loss": 0.7298410534858704, + "step": 1090 + }, + { + "epoch": 1.7781369973732066, + "grad_norm": 1.9253672361373901, + "learning_rate": 1.7777476701649318e-06, + "logits/chosen": -3.0890607833862305, + "logits/rejected": -3.1127114295959473, + "logps/chosen": -0.7377376556396484, + "logps/rejected": -0.9456714391708374, + "loss": 0.7993, + "odds_ratio_loss": 0.6157868504524231, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07377376407384872, + "rewards/margins": 0.02079339325428009, + "rewards/rejected": -0.09456716477870941, + "sft_loss": 0.7377376556396484, + "step": 1100 + }, + { + "epoch": 1.7943018791675085, + "grad_norm": 2.634758949279785, + "learning_rate": 1.7372951359341925e-06, + "logits/chosen": -3.1294898986816406, + "logits/rejected": -3.1275429725646973, + "logps/chosen": -0.7182799577713013, + "logps/rejected": -0.897103488445282, + "loss": 0.7819, + "odds_ratio_loss": 0.6362147331237793, + "rewards/accuracies": 0.53125, + "rewards/chosen": -0.07182799279689789, + "rewards/margins": 0.01788235269486904, + "rewards/rejected": -0.08971034735441208, + "sft_loss": 0.7182799577713013, + "step": 1110 + }, + { + "epoch": 1.8104667609618104, + "grad_norm": 4.11432409286499, + "learning_rate": 1.6970616164007547e-06, + "logits/chosen": -3.109255075454712, + "logits/rejected": -3.1157066822052, + "logps/chosen": -0.7095004320144653, + "logps/rejected": -0.9157026410102844, + "loss": 0.7723, + "odds_ratio_loss": 0.6284032464027405, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.07095004618167877, + "rewards/margins": 0.020620223134756088, + "rewards/rejected": -0.09157026559114456, + "sft_loss": 0.7095004320144653, + "step": 1120 + }, + { + "epoch": 1.8266316427561122, + "grad_norm": 3.5484671592712402, + "learning_rate": 1.6570586648305276e-06, + "logits/chosen": -3.1144962310791016, + "logits/rejected": -3.162351608276367, + "logps/chosen": -0.7635061144828796, + "logps/rejected": -0.9983874559402466, + "loss": 0.8279, + "odds_ratio_loss": 0.6435292959213257, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.0763506144285202, + "rewards/margins": 0.023488130420446396, + "rewards/rejected": -0.0998387485742569, + "sft_loss": 0.7635061144828796, + "step": 1130 + }, + { + "epoch": 1.842796524550414, + "grad_norm": 2.315484046936035, + "learning_rate": 1.6172977682806151e-06, + "logits/chosen": -3.1250674724578857, + "logits/rejected": -3.169023036956787, + "logps/chosen": -0.7526008486747742, + "logps/rejected": -0.9915586709976196, + "loss": 0.8118, + "odds_ratio_loss": 0.5918548703193665, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.07526008039712906, + "rewards/margins": 0.023895783349871635, + "rewards/rejected": -0.09915586560964584, + "sft_loss": 0.7526008486747742, + "step": 1140 + }, + { + "epoch": 1.858961406344716, + "grad_norm": 1.8489255905151367, + "learning_rate": 1.5777903443007586e-06, + "logits/chosen": -3.0853919982910156, + "logits/rejected": -3.067697286605835, + "logps/chosen": -0.761858344078064, + "logps/rejected": -1.018090844154358, + "loss": 0.8238, + "odds_ratio_loss": 0.6194515824317932, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.07618583738803864, + "rewards/margins": 0.02562323771417141, + "rewards/rejected": -0.1018090695142746, + "sft_loss": 0.761858344078064, + "step": 1150 + }, + { + "epoch": 1.8751262881390178, + "grad_norm": 3.776698589324951, + "learning_rate": 1.5385477376547226e-06, + "logits/chosen": -3.117295742034912, + "logits/rejected": -3.1292173862457275, + "logps/chosen": -0.7485045194625854, + "logps/rejected": -0.9388057589530945, + "loss": 0.8087, + "odds_ratio_loss": 0.6023274064064026, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07485045492649078, + "rewards/margins": 0.019030118361115456, + "rewards/rejected": -0.09388057142496109, + "sft_loss": 0.7485045194625854, + "step": 1160 + }, + { + "epoch": 1.89129116993332, + "grad_norm": 2.622969627380371, + "learning_rate": 1.4995812170625845e-06, + "logits/chosen": -3.100135564804077, + "logits/rejected": -3.120415210723877, + "logps/chosen": -0.7464675307273865, + "logps/rejected": -1.0535166263580322, + "loss": 0.8049, + "odds_ratio_loss": 0.5842532515525818, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07464675605297089, + "rewards/margins": 0.030704926699399948, + "rewards/rejected": -0.10535167157649994, + "sft_loss": 0.7464675307273865, + "step": 1170 + }, + { + "epoch": 1.9074560517276218, + "grad_norm": 2.950418710708618, + "learning_rate": 1.4609019719648666e-06, + "logits/chosen": -3.1377501487731934, + "logits/rejected": -3.1489570140838623, + "logps/chosen": -0.7555149793624878, + "logps/rejected": -0.9784708023071289, + "loss": 0.8149, + "odds_ratio_loss": 0.5935994386672974, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07555149495601654, + "rewards/margins": 0.022295573726296425, + "rewards/rejected": -0.09784707427024841, + "sft_loss": 0.7555149793624878, + "step": 1180 + }, + { + "epoch": 1.9236209335219236, + "grad_norm": 3.5587480068206787, + "learning_rate": 1.42252110930943e-06, + "logits/chosen": -3.077787399291992, + "logits/rejected": -3.097930908203125, + "logps/chosen": -0.6736657023429871, + "logps/rejected": -0.9208394289016724, + "loss": 0.7314, + "odds_ratio_loss": 0.5776128768920898, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.0673665776848793, + "rewards/margins": 0.024717364460229874, + "rewards/rejected": -0.09208394587039948, + "sft_loss": 0.6736657023429871, + "step": 1190 + }, + { + "epoch": 1.9397858153162255, + "grad_norm": 4.154015064239502, + "learning_rate": 1.3844496503620493e-06, + "logits/chosen": -3.0993504524230957, + "logits/rejected": -3.152251720428467, + "logps/chosen": -0.781159520149231, + "logps/rejected": -0.947624683380127, + "loss": 0.8436, + "odds_ratio_loss": 0.6244069933891296, + "rewards/accuracies": 0.512499988079071, + "rewards/chosen": -0.07811595499515533, + "rewards/margins": 0.01664651185274124, + "rewards/rejected": -0.09476246684789658, + "sft_loss": 0.781159520149231, + "step": 1200 + }, + { + "epoch": 1.9559506971105274, + "grad_norm": 3.2580513954162598, + "learning_rate": 1.3466985275416081e-06, + "logits/chosen": -3.074446439743042, + "logits/rejected": -3.0826351642608643, + "logps/chosen": -0.8331464529037476, + "logps/rejected": -0.9720133543014526, + "loss": 0.9006, + "odds_ratio_loss": 0.6748316287994385, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08331465721130371, + "rewards/margins": 0.013886680826544762, + "rewards/rejected": -0.09720133244991302, + "sft_loss": 0.8331464529037476, + "step": 1210 + }, + { + "epoch": 1.9721155789048292, + "grad_norm": 3.6283507347106934, + "learning_rate": 1.309278581280791e-06, + "logits/chosen": -3.1137681007385254, + "logits/rejected": -3.1031088829040527, + "logps/chosen": -0.6959558129310608, + "logps/rejected": -0.9509153366088867, + "loss": 0.7562, + "odds_ratio_loss": 0.602826714515686, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.0695955827832222, + "rewards/margins": 0.025495965033769608, + "rewards/rejected": -0.09509153664112091, + "sft_loss": 0.6959558129310608, + "step": 1220 + }, + { + "epoch": 1.9882804606991311, + "grad_norm": 1.468146800994873, + "learning_rate": 1.272200556913199e-06, + "logits/chosen": -3.116267204284668, + "logits/rejected": -3.1380016803741455, + "logps/chosen": -0.7585796117782593, + "logps/rejected": -0.9659263491630554, + "loss": 0.827, + "odds_ratio_loss": 0.6845245361328125, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07585795968770981, + "rewards/margins": 0.020734664052724838, + "rewards/rejected": -0.09659262746572495, + "sft_loss": 0.7585796117782593, + "step": 1230 + }, + { + "epoch": 2.004445342493433, + "grad_norm": 2.9131782054901123, + "learning_rate": 1.2354751015877698e-06, + "logits/chosen": -3.0867080688476562, + "logits/rejected": -3.120702028274536, + "logps/chosen": -0.7115014791488647, + "logps/rejected": -0.9811463356018066, + "loss": 0.7717, + "odds_ratio_loss": 0.6017391681671143, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07115015387535095, + "rewards/margins": 0.02696448564529419, + "rewards/rejected": -0.09811463207006454, + "sft_loss": 0.7115014791488647, + "step": 1240 + }, + { + "epoch": 2.020610224287735, + "grad_norm": 5.614047527313232, + "learning_rate": 1.1991127612113945e-06, + "logits/chosen": -3.103969097137451, + "logits/rejected": -3.1521923542022705, + "logps/chosen": -0.748454749584198, + "logps/rejected": -0.9711889028549194, + "loss": 0.8062, + "odds_ratio_loss": 0.5778619050979614, + "rewards/accuracies": 0.59375, + "rewards/chosen": -0.07484547793865204, + "rewards/margins": 0.022273404523730278, + "rewards/rejected": -0.09711887687444687, + "sft_loss": 0.748454749584198, + "step": 1250 + }, + { + "epoch": 2.036775106082037, + "grad_norm": 3.2708330154418945, + "learning_rate": 1.1631239774206035e-06, + "logits/chosen": -3.087298631668091, + "logits/rejected": -3.0905699729919434, + "logps/chosen": -0.7345898747444153, + "logps/rejected": -0.9632331132888794, + "loss": 0.7998, + "odds_ratio_loss": 0.6524921655654907, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07345898449420929, + "rewards/margins": 0.022864321246743202, + "rewards/rejected": -0.09632330387830734, + "sft_loss": 0.7345898747444153, + "step": 1260 + }, + { + "epoch": 2.052939987876339, + "grad_norm": 5.292441368103027, + "learning_rate": 1.1275190845831978e-06, + "logits/chosen": -3.094177007675171, + "logits/rejected": -3.105118989944458, + "logps/chosen": -0.6928293704986572, + "logps/rejected": -0.9615745544433594, + "loss": 0.7462, + "odds_ratio_loss": 0.5332867503166199, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.06928294897079468, + "rewards/margins": 0.026874512434005737, + "rewards/rejected": -0.09615744650363922, + "sft_loss": 0.6928293704986572, + "step": 1270 + }, + { + "epoch": 2.0691048696706407, + "grad_norm": 2.8853020668029785, + "learning_rate": 1.0923083068306778e-06, + "logits/chosen": -3.1118180751800537, + "logits/rejected": -3.0961227416992188, + "logps/chosen": -0.739953339099884, + "logps/rejected": -1.0457074642181396, + "loss": 0.7958, + "odds_ratio_loss": 0.5585684776306152, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.07399533689022064, + "rewards/margins": 0.03057541325688362, + "rewards/rejected": -0.10457074642181396, + "sft_loss": 0.739953339099884, + "step": 1280 + }, + { + "epoch": 2.0852697514649425, + "grad_norm": 2.046693801879883, + "learning_rate": 1.0575017551223348e-06, + "logits/chosen": -3.0827836990356445, + "logits/rejected": -3.099609851837158, + "logps/chosen": -0.6882905960083008, + "logps/rejected": -0.9119550585746765, + "loss": 0.7477, + "odds_ratio_loss": 0.594234824180603, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.06882907450199127, + "rewards/margins": 0.02236645109951496, + "rewards/rejected": -0.09119551628828049, + "sft_loss": 0.6882905960083008, + "step": 1290 + }, + { + "epoch": 2.1014346332592444, + "grad_norm": 3.569880723953247, + "learning_rate": 1.023109424341833e-06, + "logits/chosen": -3.091522693634033, + "logits/rejected": -3.1377675533294678, + "logps/chosen": -0.7397576570510864, + "logps/rejected": -0.9756298065185547, + "loss": 0.8002, + "odds_ratio_loss": 0.6045941114425659, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07397577166557312, + "rewards/margins": 0.023587211966514587, + "rewards/rejected": -0.09756298363208771, + "sft_loss": 0.7397576570510864, + "step": 1300 + }, + { + "epoch": 2.1175995150535463, + "grad_norm": 2.92287278175354, + "learning_rate": 9.891411904271273e-07, + "logits/chosen": -3.091432571411133, + "logits/rejected": -3.097942352294922, + "logps/chosen": -0.7274054884910583, + "logps/rejected": -0.9433088302612305, + "loss": 0.7877, + "odds_ratio_loss": 0.602813720703125, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07274055480957031, + "rewards/margins": 0.021590325981378555, + "rewards/rejected": -0.09433087706565857, + "sft_loss": 0.7274054884910583, + "step": 1310 + }, + { + "epoch": 2.133764396847848, + "grad_norm": 1.6639999151229858, + "learning_rate": 9.556068075345363e-07, + "logits/chosen": -3.1349058151245117, + "logits/rejected": -3.1192307472229004, + "logps/chosen": -0.7446939945220947, + "logps/rejected": -0.9326783418655396, + "loss": 0.8047, + "odds_ratio_loss": 0.5995923280715942, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07446939498186111, + "rewards/margins": 0.018798431381583214, + "rewards/rejected": -0.09326782822608948, + "sft_loss": 0.7446939945220947, + "step": 1320 + }, + { + "epoch": 2.14992927864215, + "grad_norm": 1.6199049949645996, + "learning_rate": 9.225159052377838e-07, + "logits/chosen": -3.085836887359619, + "logits/rejected": -3.1293978691101074, + "logps/chosen": -0.7635077238082886, + "logps/rejected": -1.0566623210906982, + "loss": 0.8227, + "odds_ratio_loss": 0.591996431350708, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.07635077089071274, + "rewards/margins": 0.029315466061234474, + "rewards/rejected": -0.10566625744104385, + "sft_loss": 0.7635077238082886, + "step": 1330 + }, + { + "epoch": 2.166094160436452, + "grad_norm": 14.56561279296875, + "learning_rate": 8.898779857628184e-07, + "logits/chosen": -3.0713887214660645, + "logits/rejected": -3.125135898590088, + "logps/chosen": -0.6671522259712219, + "logps/rejected": -0.8694232702255249, + "loss": 0.7265, + "odds_ratio_loss": 0.5937641859054565, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.06671522557735443, + "rewards/margins": 0.02022710070014, + "rewards/rejected": -0.08694233000278473, + "sft_loss": 0.6671522259712219, + "step": 1340 + }, + { + "epoch": 2.1822590422307537, + "grad_norm": 2.1917502880096436, + "learning_rate": 8.577024212591975e-07, + "logits/chosen": -3.152409791946411, + "logits/rejected": -3.142133951187134, + "logps/chosen": -0.7823044657707214, + "logps/rejected": -0.9282905459403992, + "loss": 0.8458, + "odds_ratio_loss": 0.6353241205215454, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07823045551776886, + "rewards/margins": 0.0145986033603549, + "rewards/rejected": -0.09282905608415604, + "sft_loss": 0.7823044657707214, + "step": 1350 + }, + { + "epoch": 2.1984239240250556, + "grad_norm": 2.377164363861084, + "learning_rate": 8.259984511088276e-07, + "logits/chosen": -3.0746350288391113, + "logits/rejected": -3.107931613922119, + "logps/chosen": -0.7670485973358154, + "logps/rejected": -0.9791353344917297, + "loss": 0.8321, + "odds_ratio_loss": 0.6501604914665222, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.07670485973358154, + "rewards/margins": 0.02120867185294628, + "rewards/rejected": -0.09791352599859238, + "sft_loss": 0.7670485973358154, + "step": 1360 + }, + { + "epoch": 2.2145888058193575, + "grad_norm": 1.4493186473846436, + "learning_rate": 7.947751792728237e-07, + "logits/chosen": -3.1149420738220215, + "logits/rejected": -3.106590986251831, + "logps/chosen": -0.736428439617157, + "logps/rejected": -1.0205104351043701, + "loss": 0.7973, + "odds_ratio_loss": 0.6086055040359497, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07364284247159958, + "rewards/margins": 0.02840820886194706, + "rewards/rejected": -0.10205105692148209, + "sft_loss": 0.736428439617157, + "step": 1370 + }, + { + "epoch": 2.2307536876136593, + "grad_norm": 2.596027374267578, + "learning_rate": 7.640415716772626e-07, + "logits/chosen": -3.1113359928131104, + "logits/rejected": -3.1498453617095947, + "logps/chosen": -0.7384335398674011, + "logps/rejected": -1.0052040815353394, + "loss": 0.7991, + "odds_ratio_loss": 0.606296718120575, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.07384335994720459, + "rewards/margins": 0.026677042245864868, + "rewards/rejected": -0.10052040964365005, + "sft_loss": 0.7384335398674011, + "step": 1380 + }, + { + "epoch": 2.246918569407961, + "grad_norm": 1.8183890581130981, + "learning_rate": 7.338064536385722e-07, + "logits/chosen": -3.1014134883880615, + "logits/rejected": -3.1173253059387207, + "logps/chosen": -0.7216871380805969, + "logps/rejected": -1.0171915292739868, + "loss": 0.7805, + "odds_ratio_loss": 0.5884080529212952, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.07216870784759521, + "rewards/margins": 0.029550448060035706, + "rewards/rejected": -0.10171915590763092, + "sft_loss": 0.7216871380805969, + "step": 1390 + }, + { + "epoch": 2.263083451202263, + "grad_norm": 2.7193105220794678, + "learning_rate": 7.040785073292883e-07, + "logits/chosen": -3.052278995513916, + "logits/rejected": -3.0792136192321777, + "logps/chosen": -0.8029114603996277, + "logps/rejected": -1.0175930261611938, + "loss": 0.8702, + "odds_ratio_loss": 0.6730188727378845, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.08029113709926605, + "rewards/margins": 0.02146816998720169, + "rewards/rejected": -0.10175931453704834, + "sft_loss": 0.8029114603996277, + "step": 1400 + }, + { + "epoch": 2.279248332996565, + "grad_norm": 1.6374540328979492, + "learning_rate": 6.748662692849297e-07, + "logits/chosen": -3.0864005088806152, + "logits/rejected": -3.1004090309143066, + "logps/chosen": -0.6923743486404419, + "logps/rejected": -1.0528560876846313, + "loss": 0.747, + "odds_ratio_loss": 0.5459417104721069, + "rewards/accuracies": 0.6499999761581421, + "rewards/chosen": -0.06923744082450867, + "rewards/margins": 0.03604819253087044, + "rewards/rejected": -0.10528562217950821, + "sft_loss": 0.6923743486404419, + "step": 1410 + }, + { + "epoch": 2.295413214790867, + "grad_norm": 5.552002429962158, + "learning_rate": 6.46178127952686e-07, + "logits/chosen": -3.1168243885040283, + "logits/rejected": -3.132902145385742, + "logps/chosen": -0.7120259404182434, + "logps/rejected": -0.9511427879333496, + "loss": 0.7669, + "odds_ratio_loss": 0.5487939119338989, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.0712025836110115, + "rewards/margins": 0.023911695927381516, + "rewards/rejected": -0.09511429071426392, + "sft_loss": 0.7120259404182434, + "step": 1420 + }, + { + "epoch": 2.3115780965851687, + "grad_norm": 6.720409870147705, + "learning_rate": 6.180223212826289e-07, + "logits/chosen": -3.1055123805999756, + "logits/rejected": -3.0998032093048096, + "logps/chosen": -0.7326000928878784, + "logps/rejected": -0.947357177734375, + "loss": 0.7933, + "odds_ratio_loss": 0.6070013046264648, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.07326001673936844, + "rewards/margins": 0.021475713700056076, + "rewards/rejected": -0.09473572671413422, + "sft_loss": 0.7326000928878784, + "step": 1430 + }, + { + "epoch": 2.3277429783794705, + "grad_norm": 1.7139315605163574, + "learning_rate": 5.904069343621443e-07, + "logits/chosen": -3.117607593536377, + "logits/rejected": -3.1124439239501953, + "logps/chosen": -0.7263907194137573, + "logps/rejected": -0.9684870839118958, + "loss": 0.7844, + "odds_ratio_loss": 0.5803811550140381, + "rewards/accuracies": 0.6312500238418579, + "rewards/chosen": -0.07263907045125961, + "rewards/margins": 0.024209631606936455, + "rewards/rejected": -0.09684871137142181, + "sft_loss": 0.7263907194137573, + "step": 1440 + }, + { + "epoch": 2.3439078601737724, + "grad_norm": 4.18460750579834, + "learning_rate": 5.633398970942544e-07, + "logits/chosen": -3.0699353218078613, + "logits/rejected": -3.1113662719726562, + "logps/chosen": -0.71888267993927, + "logps/rejected": -0.9213592410087585, + "loss": 0.7809, + "odds_ratio_loss": 0.6200512647628784, + "rewards/accuracies": 0.5874999761581421, + "rewards/chosen": -0.0718882754445076, + "rewards/margins": 0.020247649401426315, + "rewards/rejected": -0.09213592857122421, + "sft_loss": 0.71888267993927, + "step": 1450 + }, + { + "epoch": 2.3600727419680743, + "grad_norm": 2.875581741333008, + "learning_rate": 5.368289819205069e-07, + "logits/chosen": -3.1074984073638916, + "logits/rejected": -3.116929054260254, + "logps/chosen": -0.675830602645874, + "logps/rejected": -0.9225249290466309, + "loss": 0.7373, + "odds_ratio_loss": 0.6144708395004272, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.06758306175470352, + "rewards/margins": 0.02466944232583046, + "rewards/rejected": -0.09225250035524368, + "sft_loss": 0.675830602645874, + "step": 1460 + }, + { + "epoch": 2.376237623762376, + "grad_norm": 3.366608142852783, + "learning_rate": 5.108818015890785e-07, + "logits/chosen": -3.1295876502990723, + "logits/rejected": -3.1546216011047363, + "logps/chosen": -0.8030446171760559, + "logps/rejected": -0.9700073003768921, + "loss": 0.8661, + "odds_ratio_loss": 0.6306995153427124, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.08030445873737335, + "rewards/margins": 0.016696274280548096, + "rewards/rejected": -0.09700073301792145, + "sft_loss": 0.8030446171760559, + "step": 1470 + }, + { + "epoch": 2.392402505556678, + "grad_norm": 2.7780675888061523, + "learning_rate": 4.855058069687291e-07, + "logits/chosen": -3.067026376724243, + "logits/rejected": -3.0960917472839355, + "logps/chosen": -0.7043627500534058, + "logps/rejected": -0.9909790754318237, + "loss": 0.7594, + "odds_ratio_loss": 0.5507315397262573, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.07043628394603729, + "rewards/margins": 0.028661629185080528, + "rewards/rejected": -0.09909790754318237, + "sft_loss": 0.7043627500534058, + "step": 1480 + }, + { + "epoch": 2.40856738735098, + "grad_norm": 3.2751307487487793, + "learning_rate": 4.607082849092523e-07, + "logits/chosen": -3.099853277206421, + "logits/rejected": -3.1020543575286865, + "logps/chosen": -0.7886861562728882, + "logps/rejected": -0.976923942565918, + "loss": 0.8511, + "odds_ratio_loss": 0.6243327856063843, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07886861264705658, + "rewards/margins": 0.018823768943548203, + "rewards/rejected": -0.09769239276647568, + "sft_loss": 0.7886861562728882, + "step": 1490 + }, + { + "epoch": 2.4247322691452817, + "grad_norm": 3.08627986907959, + "learning_rate": 4.3649635614901405e-07, + "logits/chosen": -3.0676140785217285, + "logits/rejected": -3.144824504852295, + "logps/chosen": -0.704692006111145, + "logps/rejected": -0.8520969152450562, + "loss": 0.7691, + "odds_ratio_loss": 0.6437775492668152, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.0704691931605339, + "rewards/margins": 0.014740494079887867, + "rewards/rejected": -0.0852096900343895, + "sft_loss": 0.704692006111145, + "step": 1500 + }, + { + "epoch": 2.4247322691452817, + "eval_logits/chosen": -3.083430051803589, + "eval_logits/rejected": -3.107269287109375, + "eval_logps/chosen": -0.777300238609314, + "eval_logps/rejected": -0.9876723289489746, + "eval_loss": 0.8434417247772217, + "eval_odds_ratio_loss": 0.6614136099815369, + "eval_rewards/accuracies": 0.5690909028053284, + "eval_rewards/chosen": -0.07773003727197647, + "eval_rewards/margins": 0.021037202328443527, + "eval_rewards/rejected": -0.0987672358751297, + "eval_runtime": 371.5479, + "eval_samples_per_second": 2.961, + "eval_sft_loss": 0.777300238609314, + "eval_steps_per_second": 1.48, + "step": 1500 + }, + { + "epoch": 2.4408971509395836, + "grad_norm": 2.1558029651641846, + "learning_rate": 4.128769732701973e-07, + "logits/chosen": -3.0588672161102295, + "logits/rejected": -3.053375720977783, + "logps/chosen": -0.7354205250740051, + "logps/rejected": -0.9381022453308105, + "loss": 0.7959, + "odds_ratio_loss": 0.6045327186584473, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.0735420510172844, + "rewards/margins": 0.02026817761361599, + "rewards/rejected": -0.09381023049354553, + "sft_loss": 0.7354205250740051, + "step": 1510 + }, + { + "epoch": 2.4570620327338855, + "grad_norm": 3.434962749481201, + "learning_rate": 3.8985691870233046e-07, + "logits/chosen": -3.1096482276916504, + "logits/rejected": -3.1095433235168457, + "logps/chosen": -0.7352281212806702, + "logps/rejected": -0.9943790435791016, + "loss": 0.7949, + "odds_ratio_loss": 0.596770703792572, + "rewards/accuracies": 0.543749988079071, + "rewards/chosen": -0.07352282106876373, + "rewards/margins": 0.025915095582604408, + "rewards/rejected": -0.0994379073381424, + "sft_loss": 0.7352281212806702, + "step": 1520 + }, + { + "epoch": 2.4732269145281873, + "grad_norm": 2.586085557937622, + "learning_rate": 3.6744280277467904e-07, + "logits/chosen": -3.1208150386810303, + "logits/rejected": -3.1456665992736816, + "logps/chosen": -0.7365155816078186, + "logps/rejected": -0.9590933918952942, + "loss": 0.7993, + "odds_ratio_loss": 0.6281741857528687, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07365155220031738, + "rewards/margins": 0.022257793694734573, + "rewards/rejected": -0.09590934216976166, + "sft_loss": 0.7365155816078186, + "step": 1530 + }, + { + "epoch": 2.489391796322489, + "grad_norm": 6.254690170288086, + "learning_rate": 3.456410618180503e-07, + "logits/chosen": -3.0379281044006348, + "logits/rejected": -3.0891363620758057, + "logps/chosen": -0.6726978421211243, + "logps/rejected": -1.0303423404693604, + "loss": 0.7295, + "odds_ratio_loss": 0.5683592557907104, + "rewards/accuracies": 0.6625000238418579, + "rewards/chosen": -0.06726977974176407, + "rewards/margins": 0.03576444461941719, + "rewards/rejected": -0.10303423553705215, + "sft_loss": 0.6726978421211243, + "step": 1540 + }, + { + "epoch": 2.5055566781167915, + "grad_norm": 2.056490898132324, + "learning_rate": 3.244579563165753e-07, + "logits/chosen": -3.0974183082580566, + "logits/rejected": -3.1228187084198, + "logps/chosen": -0.711591362953186, + "logps/rejected": -1.0496152639389038, + "loss": 0.7689, + "odds_ratio_loss": 0.5726686716079712, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.07115913927555084, + "rewards/margins": 0.033802393823862076, + "rewards/rejected": -0.10496153682470322, + "sft_loss": 0.711591362953186, + "step": 1550 + }, + { + "epoch": 2.521721559911093, + "grad_norm": 1.438101053237915, + "learning_rate": 3.038995691099697e-07, + "logits/chosen": -3.08099365234375, + "logits/rejected": -3.1116271018981934, + "logps/chosen": -0.7405564188957214, + "logps/rejected": -1.075670838356018, + "loss": 0.7985, + "odds_ratio_loss": 0.5797412991523743, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07405563443899155, + "rewards/margins": 0.0335114449262619, + "rewards/rejected": -0.10756708681583405, + "sft_loss": 0.7405564188957214, + "step": 1560 + }, + { + "epoch": 2.5378864417053952, + "grad_norm": 9.284449577331543, + "learning_rate": 2.839718036468192e-07, + "logits/chosen": -3.127392530441284, + "logits/rejected": -3.1654505729675293, + "logps/chosen": -0.841861367225647, + "logps/rejected": -0.9995994567871094, + "loss": 0.9096, + "odds_ratio_loss": 0.6777136921882629, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.0841861441731453, + "rewards/margins": 0.015773817896842957, + "rewards/rejected": -0.09995995461940765, + "sft_loss": 0.841861367225647, + "step": 1570 + }, + { + "epoch": 2.5540513234996967, + "grad_norm": 2.9508066177368164, + "learning_rate": 2.646803822893723e-07, + "logits/chosen": -3.1416640281677246, + "logits/rejected": -3.1590213775634766, + "logps/chosen": -0.7539080381393433, + "logps/rejected": -0.9754577875137329, + "loss": 0.8158, + "odds_ratio_loss": 0.618767499923706, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07539081573486328, + "rewards/margins": 0.02215495891869068, + "rewards/rejected": -0.09754578024148941, + "sft_loss": 0.7539080381393433, + "step": 1580 + }, + { + "epoch": 2.570216205293999, + "grad_norm": 2.8406593799591064, + "learning_rate": 2.460308446703341e-07, + "logits/chosen": -3.1490259170532227, + "logits/rejected": -3.129563808441162, + "logps/chosen": -0.7597383260726929, + "logps/rejected": -0.9107489585876465, + "loss": 0.8268, + "odds_ratio_loss": 0.671094536781311, + "rewards/accuracies": 0.5625, + "rewards/chosen": -0.07597382366657257, + "rewards/margins": 0.01510106772184372, + "rewards/rejected": -0.09107489883899689, + "sft_loss": 0.7597383260726929, + "step": 1590 + }, + { + "epoch": 2.5863810870883004, + "grad_norm": 3.932293653488159, + "learning_rate": 2.2802854610213143e-07, + "logits/chosen": -3.0843563079833984, + "logits/rejected": -3.080658197402954, + "logps/chosen": -0.6698503494262695, + "logps/rejected": -1.0470666885375977, + "loss": 0.7238, + "odds_ratio_loss": 0.5398669838905334, + "rewards/accuracies": 0.668749988079071, + "rewards/chosen": -0.06698504090309143, + "rewards/margins": 0.03772163391113281, + "rewards/rejected": -0.10470668226480484, + "sft_loss": 0.6698503494262695, + "step": 1600 + }, + { + "epoch": 2.6025459688826027, + "grad_norm": 3.3689260482788086, + "learning_rate": 2.106786560391072e-07, + "logits/chosen": -3.08052134513855, + "logits/rejected": -3.1363070011138916, + "logps/chosen": -0.7746056914329529, + "logps/rejected": -0.9704595804214478, + "loss": 0.8356, + "odds_ratio_loss": 0.6099545359611511, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07746056467294693, + "rewards/margins": 0.019585389643907547, + "rewards/rejected": -0.09704595804214478, + "sft_loss": 0.7746056914329529, + "step": 1610 + }, + { + "epoch": 2.6187108506769046, + "grad_norm": 1.7709842920303345, + "learning_rate": 1.9398615659308255e-07, + "logits/chosen": -3.1098244190216064, + "logits/rejected": -3.1587026119232178, + "logps/chosen": -0.7326648831367493, + "logps/rejected": -0.9062278866767883, + "loss": 0.7951, + "odds_ratio_loss": 0.624650776386261, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.07326649129390717, + "rewards/margins": 0.017356308177113533, + "rewards/rejected": -0.09062279760837555, + "sft_loss": 0.7326648831367493, + "step": 1620 + }, + { + "epoch": 2.6348757324712064, + "grad_norm": 2.0778872966766357, + "learning_rate": 1.7795584110272184e-07, + "logits/chosen": -3.1534359455108643, + "logits/rejected": -3.1423211097717285, + "logps/chosen": -0.7454973459243774, + "logps/rejected": -0.936767578125, + "loss": 0.8102, + "odds_ratio_loss": 0.6473931670188904, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.07454973459243774, + "rewards/margins": 0.019127020612359047, + "rewards/rejected": -0.09367676079273224, + "sft_loss": 0.7454973459243774, + "step": 1630 + }, + { + "epoch": 2.6510406142655083, + "grad_norm": 3.814206838607788, + "learning_rate": 1.6259231275709636e-07, + "logits/chosen": -3.149001121520996, + "logits/rejected": -3.1615355014801025, + "logps/chosen": -0.716022789478302, + "logps/rejected": -0.8857117891311646, + "loss": 0.7825, + "odds_ratio_loss": 0.6644307971000671, + "rewards/accuracies": 0.5687500238418579, + "rewards/chosen": -0.07160228490829468, + "rewards/margins": 0.016968905925750732, + "rewards/rejected": -0.08857118338346481, + "sft_loss": 0.716022789478302, + "step": 1640 + }, + { + "epoch": 2.66720549605981, + "grad_norm": 1.4002131223678589, + "learning_rate": 1.478999832738548e-07, + "logits/chosen": -3.1335608959198, + "logits/rejected": -3.1366758346557617, + "logps/chosen": -0.7220789194107056, + "logps/rejected": -1.0130765438079834, + "loss": 0.7818, + "odds_ratio_loss": 0.597649335861206, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.07220789790153503, + "rewards/margins": 0.02909977361559868, + "rewards/rejected": -0.10130767524242401, + "sft_loss": 0.7220789194107056, + "step": 1650 + }, + { + "epoch": 2.683370377854112, + "grad_norm": 2.5022165775299072, + "learning_rate": 1.338830716323769e-07, + "logits/chosen": -3.0961499214172363, + "logits/rejected": -3.1152243614196777, + "logps/chosen": -0.7230523228645325, + "logps/rejected": -0.8930460810661316, + "loss": 0.7852, + "odds_ratio_loss": 0.6216701865196228, + "rewards/accuracies": 0.581250011920929, + "rewards/chosen": -0.07230523973703384, + "rewards/margins": 0.016999371349811554, + "rewards/rejected": -0.0893046110868454, + "sft_loss": 0.7230523228645325, + "step": 1660 + }, + { + "epoch": 2.699535259648414, + "grad_norm": 3.8939476013183594, + "learning_rate": 1.205456028622723e-07, + "logits/chosen": -3.1416923999786377, + "logits/rejected": -3.130157232284546, + "logps/chosen": -0.6890848875045776, + "logps/rejected": -0.9714914560317993, + "loss": 0.7441, + "odds_ratio_loss": 0.550174355506897, + "rewards/accuracies": 0.6187499761581421, + "rewards/chosen": -0.06890848278999329, + "rewards/margins": 0.028240656480193138, + "rewards/rejected": -0.09714914858341217, + "sft_loss": 0.6890848875045776, + "step": 1670 + }, + { + "epoch": 2.7157001414427158, + "grad_norm": 1.6698890924453735, + "learning_rate": 1.0789140688756805e-07, + "logits/chosen": -3.1566967964172363, + "logits/rejected": -3.149425745010376, + "logps/chosen": -0.7115526795387268, + "logps/rejected": -0.9621270298957825, + "loss": 0.7691, + "odds_ratio_loss": 0.5751715898513794, + "rewards/accuracies": 0.625, + "rewards/chosen": -0.07115526497364044, + "rewards/margins": 0.025057435035705566, + "rewards/rejected": -0.09621270000934601, + "sft_loss": 0.7115526795387268, + "step": 1680 + }, + { + "epoch": 2.7318650232370176, + "grad_norm": 5.264458179473877, + "learning_rate": 9.592411742693098e-08, + "logits/chosen": -3.078084945678711, + "logits/rejected": -3.0921549797058105, + "logps/chosen": -0.7386698722839355, + "logps/rejected": -0.9247487783432007, + "loss": 0.8064, + "odds_ratio_loss": 0.6777487397193909, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07386698573827744, + "rewards/margins": 0.018607888370752335, + "rewards/rejected": -0.09247487038373947, + "sft_loss": 0.7386698722839355, + "step": 1690 + }, + { + "epoch": 2.7480299050313195, + "grad_norm": 2.103006362915039, + "learning_rate": 8.464717095022168e-08, + "logits/chosen": -3.064146041870117, + "logits/rejected": -3.0713071823120117, + "logps/chosen": -0.7025493383407593, + "logps/rejected": -0.955985426902771, + "loss": 0.7608, + "odds_ratio_loss": 0.5827249884605408, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.07025493681430817, + "rewards/margins": 0.025343608111143112, + "rewards/rejected": -0.09559854120016098, + "sft_loss": 0.7025493383407593, + "step": 1700 + }, + { + "epoch": 2.7641947868256214, + "grad_norm": 2.800687789916992, + "learning_rate": 7.406380569169841e-08, + "logits/chosen": -3.0971622467041016, + "logits/rejected": -3.142702341079712, + "logps/chosen": -0.752859890460968, + "logps/rejected": -0.8985753059387207, + "loss": 0.8177, + "odds_ratio_loss": 0.6488373875617981, + "rewards/accuracies": 0.518750011920929, + "rewards/chosen": -0.07528599351644516, + "rewards/margins": 0.014571529813110828, + "rewards/rejected": -0.08985751867294312, + "sft_loss": 0.752859890460968, + "step": 1710 + }, + { + "epoch": 2.7803596686199232, + "grad_norm": 11.731331825256348, + "learning_rate": 6.417706072013808e-08, + "logits/chosen": -3.1208655834198, + "logits/rejected": -3.1548678874969482, + "logps/chosen": -0.7304657101631165, + "logps/rejected": -0.9190757870674133, + "loss": 0.7931, + "odds_ratio_loss": 0.6268162727355957, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07304656505584717, + "rewards/margins": 0.01886100508272648, + "rewards/rejected": -0.0919075757265091, + "sft_loss": 0.7304657101631165, + "step": 1720 + }, + { + "epoch": 2.796524550414225, + "grad_norm": 4.022006034851074, + "learning_rate": 5.498977506615294e-08, + "logits/chosen": -3.1070895195007324, + "logits/rejected": -3.145142078399658, + "logps/chosen": -0.7585607767105103, + "logps/rejected": -0.914089560508728, + "loss": 0.8244, + "odds_ratio_loss": 0.6582176089286804, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.07585608214139938, + "rewards/margins": 0.015552881173789501, + "rewards/rejected": -0.09140896052122116, + "sft_loss": 0.7585607767105103, + "step": 1730 + }, + { + "epoch": 2.812689432208527, + "grad_norm": 2.081886053085327, + "learning_rate": 4.6504586906947756e-08, + "logits/chosen": -3.1400046348571777, + "logits/rejected": -3.159885883331299, + "logps/chosen": -0.7848642468452454, + "logps/rejected": -0.9642190933227539, + "loss": 0.8438, + "odds_ratio_loss": 0.5897284746170044, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.07848642766475677, + "rewards/margins": 0.01793549209833145, + "rewards/rejected": -0.09642191231250763, + "sft_loss": 0.7848642468452454, + "step": 1740 + }, + { + "epoch": 2.828854314002829, + "grad_norm": 9.670836448669434, + "learning_rate": 3.8723932808754914e-08, + "logits/chosen": -3.1591954231262207, + "logits/rejected": -3.179676055908203, + "logps/chosen": -0.832345187664032, + "logps/rejected": -0.9517787098884583, + "loss": 0.8997, + "odds_ratio_loss": 0.6738894581794739, + "rewards/accuracies": 0.5375000238418579, + "rewards/chosen": -0.0832345113158226, + "rewards/margins": 0.01194334588944912, + "rewards/rejected": -0.09517785906791687, + "sft_loss": 0.832345187664032, + "step": 1750 + }, + { + "epoch": 2.8450191957971307, + "grad_norm": 4.27302360534668, + "learning_rate": 3.1650047027158014e-08, + "logits/chosen": -3.132617473602295, + "logits/rejected": -3.1612956523895264, + "logps/chosen": -0.7341474294662476, + "logps/rejected": -0.9550244212150574, + "loss": 0.7929, + "odds_ratio_loss": 0.5877509117126465, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.07341475039720535, + "rewards/margins": 0.0220877043902874, + "rewards/rejected": -0.09550245106220245, + "sft_loss": 0.7341474294662476, + "step": 1760 + }, + { + "epoch": 2.8611840775914326, + "grad_norm": 3.392895460128784, + "learning_rate": 2.5284960865517848e-08, + "logits/chosen": -3.093036413192749, + "logits/rejected": -3.1320202350616455, + "logps/chosen": -0.6883664727210999, + "logps/rejected": -0.9603809118270874, + "loss": 0.7445, + "odds_ratio_loss": 0.5616418123245239, + "rewards/accuracies": 0.606249988079071, + "rewards/chosen": -0.06883664429187775, + "rewards/margins": 0.027201462537050247, + "rewards/rejected": -0.0960381031036377, + "sft_loss": 0.6883664727210999, + "step": 1770 + }, + { + "epoch": 2.8773489593857344, + "grad_norm": 2.937486410140991, + "learning_rate": 1.9630502091670388e-08, + "logits/chosen": -3.09773325920105, + "logits/rejected": -3.1201109886169434, + "logps/chosen": -0.7190070152282715, + "logps/rejected": -0.9666651487350464, + "loss": 0.7761, + "odds_ratio_loss": 0.5710408687591553, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.07190070301294327, + "rewards/margins": 0.02476580999791622, + "rewards/rejected": -0.09666652232408524, + "sft_loss": 0.7190070152282715, + "step": 1780 + }, + { + "epoch": 2.8935138411800363, + "grad_norm": 3.6368203163146973, + "learning_rate": 1.4688294413074677e-08, + "logits/chosen": -3.076406717300415, + "logits/rejected": -3.1111807823181152, + "logps/chosen": -0.6571230888366699, + "logps/rejected": -0.9525207281112671, + "loss": 0.7146, + "odds_ratio_loss": 0.5746604204177856, + "rewards/accuracies": 0.6000000238418579, + "rewards/chosen": -0.06571231037378311, + "rewards/margins": 0.02953975833952427, + "rewards/rejected": -0.09525207430124283, + "sft_loss": 0.6571230888366699, + "step": 1790 + }, + { + "epoch": 2.909678722974338, + "grad_norm": 2.2510814666748047, + "learning_rate": 1.0459757010556626e-08, + "logits/chosen": -3.1042840480804443, + "logits/rejected": -3.1341700553894043, + "logps/chosen": -0.7357559204101562, + "logps/rejected": -0.8860123753547668, + "loss": 0.7989, + "odds_ratio_loss": 0.6314257383346558, + "rewards/accuracies": 0.550000011920929, + "rewards/chosen": -0.07357560098171234, + "rewards/margins": 0.015025639906525612, + "rewards/rejected": -0.0886012390255928, + "sft_loss": 0.7357559204101562, + "step": 1800 + }, + { + "epoch": 2.92584360476864, + "grad_norm": 2.13761043548584, + "learning_rate": 6.94610413078306e-09, + "logits/chosen": -3.028646945953369, + "logits/rejected": -3.105076789855957, + "logps/chosen": -0.7331860661506653, + "logps/rejected": -1.0410078763961792, + "loss": 0.7943, + "odds_ratio_loss": 0.6107637882232666, + "rewards/accuracies": 0.574999988079071, + "rewards/chosen": -0.07331861555576324, + "rewards/margins": 0.03078218176960945, + "rewards/rejected": -0.104100801050663, + "sft_loss": 0.7331860661506653, + "step": 1810 + }, + { + "epoch": 2.942008486562942, + "grad_norm": 2.224161148071289, + "learning_rate": 4.14834473758563e-09, + "logits/chosen": -3.0697426795959473, + "logits/rejected": -3.098311185836792, + "logps/chosen": -0.6795364618301392, + "logps/rejected": -0.9198546409606934, + "loss": 0.7354, + "odds_ratio_loss": 0.5586915016174316, + "rewards/accuracies": 0.65625, + "rewards/chosen": -0.06795365363359451, + "rewards/margins": 0.02403181977570057, + "rewards/rejected": -0.09198546409606934, + "sft_loss": 0.6795364618301392, + "step": 1820 + }, + { + "epoch": 2.9581733683572438, + "grad_norm": 1.7413092851638794, + "learning_rate": 2.067282222230349e-09, + "logits/chosen": -3.101163864135742, + "logits/rejected": -3.1490769386291504, + "logps/chosen": -0.69708651304245, + "logps/rejected": -0.9820737838745117, + "loss": 0.753, + "odds_ratio_loss": 0.5595835447311401, + "rewards/accuracies": 0.643750011920929, + "rewards/chosen": -0.06970865279436111, + "rewards/margins": 0.028498733416199684, + "rewards/rejected": -0.09820737689733505, + "sft_loss": 0.69708651304245, + "step": 1830 + }, + { + "epoch": 2.9743382501515456, + "grad_norm": 9.5276517868042, + "learning_rate": 7.035141727212979e-10, + "logits/chosen": -3.111255168914795, + "logits/rejected": -3.1476521492004395, + "logps/chosen": -0.6813724040985107, + "logps/rejected": -0.9502272605895996, + "loss": 0.7369, + "odds_ratio_loss": 0.555463969707489, + "rewards/accuracies": 0.637499988079071, + "rewards/chosen": -0.06813724339008331, + "rewards/margins": 0.026885494589805603, + "rewards/rejected": -0.09502272307872772, + "sft_loss": 0.6813724040985107, + "step": 1840 + }, + { + "epoch": 2.9905031319458475, + "grad_norm": 10.846762657165527, + "learning_rate": 5.743220219761592e-11, + "logits/chosen": -3.105236530303955, + "logits/rejected": -3.1389710903167725, + "logps/chosen": -0.8284826278686523, + "logps/rejected": -1.0016568899154663, + "loss": 0.8952, + "odds_ratio_loss": 0.6669169068336487, + "rewards/accuracies": 0.612500011920929, + "rewards/chosen": -0.08284827321767807, + "rewards/margins": 0.017317429184913635, + "rewards/rejected": -0.1001657024025917, + "sft_loss": 0.8284826278686523, + "step": 1850 + }, + { + "epoch": 2.9969690846635686, + "step": 1854, + "total_flos": 2.0970902870084813e+18, + "train_loss": 0.8330582246554065, + "train_runtime": 34111.2463, + "train_samples_per_second": 0.871, + "train_steps_per_second": 0.054 + } + ], + "logging_steps": 10, + "max_steps": 1854, + "num_input_tokens_seen": 0, + "num_train_epochs": 3, + "save_steps": 500, + "total_flos": 2.0970902870084813e+18, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}