{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9865871833084947, "eval_steps": 500, "global_step": 501, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.029806259314456036, "grad_norm": 1765.45556640625, "learning_rate": 2.5000000000000004e-07, "log_odds_chosen": -0.21943321824073792, "log_odds_ratio": -1.0067085027694702, "logits/chosen": 204.28456115722656, "logits/rejected": 202.977294921875, "logps/chosen": -14.824699401855469, "logps/rejected": -14.605265617370605, "loss": 14.9632, "nll_loss": 14.546000480651855, "rewards/accuracies": 0.3499999940395355, "rewards/chosen": -0.7412349581718445, "rewards/margins": -0.01097165048122406, "rewards/rejected": -0.7302632927894592, "step": 5 }, { "epoch": 0.05961251862891207, "grad_norm": 1195.8741455078125, "learning_rate": 5.000000000000001e-07, "log_odds_chosen": 0.24401184916496277, "log_odds_ratio": -0.7723467946052551, "logits/chosen": 219.5009307861328, "logits/rejected": 223.572021484375, "logps/chosen": -12.244219779968262, "logps/rejected": -12.487574577331543, "loss": 12.6127, "nll_loss": 12.338577270507812, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.6122109293937683, "rewards/margins": 0.012167713604867458, "rewards/rejected": -0.6243786215782166, "step": 10 }, { "epoch": 0.08941877794336811, "grad_norm": 722.9285278320312, "learning_rate": 7.5e-07, "log_odds_chosen": 0.0473303496837616, "log_odds_ratio": -0.7741748690605164, "logits/chosen": 282.27947998046875, "logits/rejected": 261.2786865234375, "logps/chosen": -7.970606803894043, "logps/rejected": -8.0178804397583, "loss": 8.2789, "nll_loss": 7.956001281738281, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.39853033423423767, "rewards/margins": 0.002363653387874365, "rewards/rejected": -0.40089401602745056, "step": 15 }, { "epoch": 0.11922503725782414, "grad_norm": 212.62242126464844, "learning_rate": 1.0000000000000002e-06, "log_odds_chosen": -0.15251407027244568, "log_odds_ratio": -0.9524042010307312, "logits/chosen": 281.0796813964844, "logits/rejected": 275.33013916015625, "logps/chosen": -5.375563621520996, "logps/rejected": -5.224381446838379, "loss": 5.4453, "nll_loss": 5.453672885894775, "rewards/accuracies": 0.4000000059604645, "rewards/chosen": -0.2687782049179077, "rewards/margins": -0.007559105753898621, "rewards/rejected": -0.2612191140651703, "step": 20 }, { "epoch": 0.14903129657228018, "grad_norm": 166.2330322265625, "learning_rate": 1.25e-06, "log_odds_chosen": -0.04391743987798691, "log_odds_ratio": -0.8879317045211792, "logits/chosen": 299.25030517578125, "logits/rejected": 308.5736389160156, "logps/chosen": -3.281724452972412, "logps/rejected": -3.2199606895446777, "loss": 3.5013, "nll_loss": 3.3902111053466797, "rewards/accuracies": 0.5, "rewards/chosen": -0.1640862375497818, "rewards/margins": -0.0030881778802722692, "rewards/rejected": -0.16099804639816284, "step": 25 }, { "epoch": 0.17883755588673622, "grad_norm": 83.01959228515625, "learning_rate": 1.5e-06, "log_odds_chosen": -0.07733707875013351, "log_odds_ratio": -0.8942793607711792, "logits/chosen": 347.654052734375, "logits/rejected": 376.1275329589844, "logps/chosen": -2.622657537460327, "logps/rejected": -2.5195186138153076, "loss": 2.5561, "nll_loss": 2.6379752159118652, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.13113287091255188, "rewards/margins": -0.005156923085451126, "rewards/rejected": -0.12597593665122986, "step": 30 }, { "epoch": 0.20864381520119224, "grad_norm": 40.82948684692383, "learning_rate": 1.75e-06, "log_odds_chosen": 0.16575101017951965, "log_odds_ratio": -0.7404494285583496, "logits/chosen": 382.174072265625, "logits/rejected": 370.3721008300781, "logps/chosen": -1.8132009506225586, "logps/rejected": -1.9216792583465576, "loss": 2.1303, "nll_loss": 2.0061001777648926, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.09066005051136017, "rewards/margins": 0.005423928610980511, "rewards/rejected": -0.09608397632837296, "step": 35 }, { "epoch": 0.23845007451564829, "grad_norm": 373.0379333496094, "learning_rate": 2.0000000000000003e-06, "log_odds_chosen": 0.017796561121940613, "log_odds_ratio": -0.7689038515090942, "logits/chosen": 372.22100830078125, "logits/rejected": 370.50439453125, "logps/chosen": -1.6518943309783936, "logps/rejected": -1.6649363040924072, "loss": 1.9486, "nll_loss": 2.0397918224334717, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.08259471505880356, "rewards/margins": 0.0006520989118143916, "rewards/rejected": -0.08324681222438812, "step": 40 }, { "epoch": 0.26825633383010433, "grad_norm": 45.907875061035156, "learning_rate": 2.25e-06, "log_odds_chosen": 0.027211258187890053, "log_odds_ratio": -0.7474765777587891, "logits/chosen": 388.0882873535156, "logits/rejected": 397.65460205078125, "logps/chosen": -1.570575475692749, "logps/rejected": -1.5880815982818604, "loss": 1.8867, "nll_loss": 1.7669483423233032, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.07852877676486969, "rewards/margins": 0.0008753080619499087, "rewards/rejected": -0.0794040784239769, "step": 45 }, { "epoch": 0.29806259314456035, "grad_norm": 45.138648986816406, "learning_rate": 2.5e-06, "log_odds_chosen": 0.19176200032234192, "log_odds_ratio": -0.6679073572158813, "logits/chosen": 396.5473327636719, "logits/rejected": 418.2545471191406, "logps/chosen": -1.404476523399353, "logps/rejected": -1.5453894138336182, "loss": 1.8521, "nll_loss": 1.8635737895965576, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.07022383064031601, "rewards/margins": 0.00704564293846488, "rewards/rejected": -0.07726947963237762, "step": 50 }, { "epoch": 0.32786885245901637, "grad_norm": 146.7917938232422, "learning_rate": 2.7500000000000004e-06, "log_odds_chosen": 0.06669901311397552, "log_odds_ratio": -0.7251878380775452, "logits/chosen": 385.10101318359375, "logits/rejected": 378.09368896484375, "logps/chosen": -1.4211018085479736, "logps/rejected": -1.4656105041503906, "loss": 1.8795, "nll_loss": 1.921286940574646, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.0710550919175148, "rewards/margins": 0.0022254353389143944, "rewards/rejected": -0.07328052818775177, "step": 55 }, { "epoch": 0.35767511177347244, "grad_norm": 36.712623596191406, "learning_rate": 3e-06, "log_odds_chosen": 0.1147073283791542, "log_odds_ratio": -0.6886881589889526, "logits/chosen": 391.64190673828125, "logits/rejected": 383.321044921875, "logps/chosen": -1.381176471710205, "logps/rejected": -1.4568852186203003, "loss": 1.7236, "nll_loss": 1.7853686809539795, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.06905882805585861, "rewards/margins": 0.0037854425609111786, "rewards/rejected": -0.07284426689147949, "step": 60 }, { "epoch": 0.38748137108792846, "grad_norm": 27.392560958862305, "learning_rate": 3.2500000000000002e-06, "log_odds_chosen": 0.0811905488371849, "log_odds_ratio": -0.705346941947937, "logits/chosen": 390.33514404296875, "logits/rejected": 391.02215576171875, "logps/chosen": -1.2655917406082153, "logps/rejected": -1.3007347583770752, "loss": 1.6207, "nll_loss": 1.5275566577911377, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06327958405017853, "rewards/margins": 0.0017571467906236649, "rewards/rejected": -0.06503672897815704, "step": 65 }, { "epoch": 0.4172876304023845, "grad_norm": 108.5710678100586, "learning_rate": 3.5e-06, "log_odds_chosen": 0.030709872022271156, "log_odds_ratio": -0.7311884760856628, "logits/chosen": 374.34515380859375, "logits/rejected": 382.85137939453125, "logps/chosen": -1.3965779542922974, "logps/rejected": -1.4153110980987549, "loss": 1.6444, "nll_loss": 1.6620601415634155, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.06982889771461487, "rewards/margins": 0.0009366500889882445, "rewards/rejected": -0.07076555490493774, "step": 70 }, { "epoch": 0.44709388971684055, "grad_norm": 42.35745620727539, "learning_rate": 3.7500000000000005e-06, "log_odds_chosen": 0.17296305298805237, "log_odds_ratio": -0.6624878644943237, "logits/chosen": 394.97998046875, "logits/rejected": 382.9609069824219, "logps/chosen": -1.2325050830841064, "logps/rejected": -1.3494950532913208, "loss": 1.593, "nll_loss": 1.53190016746521, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.06162526085972786, "rewards/margins": 0.005849492736160755, "rewards/rejected": -0.06747475266456604, "step": 75 }, { "epoch": 0.47690014903129657, "grad_norm": 36.82132339477539, "learning_rate": 4.000000000000001e-06, "log_odds_chosen": 0.17391765117645264, "log_odds_ratio": -0.6597349643707275, "logits/chosen": 381.92547607421875, "logits/rejected": 404.1871643066406, "logps/chosen": -1.219416856765747, "logps/rejected": -1.3357045650482178, "loss": 1.6131, "nll_loss": 1.6360372304916382, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06097083538770676, "rewards/margins": 0.005814389791339636, "rewards/rejected": -0.06678523123264313, "step": 80 }, { "epoch": 0.5067064083457526, "grad_norm": 29.89981460571289, "learning_rate": 4.25e-06, "log_odds_chosen": 0.18893679976463318, "log_odds_ratio": -0.6906715631484985, "logits/chosen": 408.48101806640625, "logits/rejected": 392.5835266113281, "logps/chosen": -1.2594187259674072, "logps/rejected": -1.3885504007339478, "loss": 1.5956, "nll_loss": 1.603137731552124, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.06297092139720917, "rewards/margins": 0.006456589791923761, "rewards/rejected": -0.06942752748727798, "step": 85 }, { "epoch": 0.5365126676602087, "grad_norm": 32.92945861816406, "learning_rate": 4.5e-06, "log_odds_chosen": 0.555855393409729, "log_odds_ratio": -0.5900682806968689, "logits/chosen": 401.2886657714844, "logits/rejected": 416.2565002441406, "logps/chosen": -1.208212971687317, "logps/rejected": -1.6518011093139648, "loss": 1.4631, "nll_loss": 1.474485158920288, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06041065603494644, "rewards/margins": 0.022179413586854935, "rewards/rejected": -0.08259007334709167, "step": 90 }, { "epoch": 0.5663189269746647, "grad_norm": 34.76884460449219, "learning_rate": 4.75e-06, "log_odds_chosen": 0.19581779837608337, "log_odds_ratio": -0.6574069261550903, "logits/chosen": 371.4412536621094, "logits/rejected": 383.65155029296875, "logps/chosen": -1.1392086744308472, "logps/rejected": -1.2306644916534424, "loss": 1.5584, "nll_loss": 1.438720941543579, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05696043372154236, "rewards/margins": 0.0045727952383458614, "rewards/rejected": -0.06153322383761406, "step": 95 }, { "epoch": 0.5961251862891207, "grad_norm": 677.6953125, "learning_rate": 5e-06, "log_odds_chosen": 0.08993122726678848, "log_odds_ratio": -0.7155017256736755, "logits/chosen": 406.64996337890625, "logits/rejected": 442.7906188964844, "logps/chosen": -1.3057138919830322, "logps/rejected": -1.3538284301757812, "loss": 1.6646, "nll_loss": 1.6470537185668945, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.06528569757938385, "rewards/margins": 0.0024057202972471714, "rewards/rejected": -0.06769142299890518, "step": 100 }, { "epoch": 0.6259314456035767, "grad_norm": 81.34811401367188, "learning_rate": 4.8795003647426654e-06, "log_odds_chosen": 0.3003528416156769, "log_odds_ratio": -0.6239514946937561, "logits/chosen": 391.1552734375, "logits/rejected": 403.55609130859375, "logps/chosen": -1.2369372844696045, "logps/rejected": -1.4132254123687744, "loss": 1.6764, "nll_loss": 1.68059504032135, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.061846863478422165, "rewards/margins": 0.008814404718577862, "rewards/rejected": -0.0706612691283226, "step": 105 }, { "epoch": 0.6557377049180327, "grad_norm": 98.60116577148438, "learning_rate": 4.767312946227961e-06, "log_odds_chosen": 0.21533890068531036, "log_odds_ratio": -0.6346350312232971, "logits/chosen": 378.2474670410156, "logits/rejected": 376.3426818847656, "logps/chosen": -1.1033828258514404, "logps/rejected": -1.25198233127594, "loss": 1.5681, "nll_loss": 1.557680368423462, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.055169135332107544, "rewards/margins": 0.007429977413266897, "rewards/rejected": -0.06259911507368088, "step": 110 }, { "epoch": 0.6855439642324889, "grad_norm": 22.872821807861328, "learning_rate": 4.662524041201569e-06, "log_odds_chosen": 0.2698196470737457, "log_odds_ratio": -0.6147719621658325, "logits/chosen": 400.8478698730469, "logits/rejected": 407.18634033203125, "logps/chosen": -0.9827004671096802, "logps/rejected": -1.127701997756958, "loss": 1.5249, "nll_loss": 1.42640221118927, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.04913502186536789, "rewards/margins": 0.007250082679092884, "rewards/rejected": -0.0563850998878479, "step": 115 }, { "epoch": 0.7153502235469449, "grad_norm": 27.9619083404541, "learning_rate": 4.564354645876385e-06, "log_odds_chosen": 0.3403358459472656, "log_odds_ratio": -0.6000555753707886, "logits/chosen": 381.107421875, "logits/rejected": 381.04864501953125, "logps/chosen": -1.048896074295044, "logps/rejected": -1.2232722043991089, "loss": 1.5554, "nll_loss": 1.5394407510757446, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.05244480445981026, "rewards/margins": 0.00871881190687418, "rewards/rejected": -0.06116361543536186, "step": 120 }, { "epoch": 0.7451564828614009, "grad_norm": 23.146177291870117, "learning_rate": 4.47213595499958e-06, "log_odds_chosen": 0.08713512122631073, "log_odds_ratio": -0.7354093790054321, "logits/chosen": 378.9410400390625, "logits/rejected": 391.9457702636719, "logps/chosen": -1.1668498516082764, "logps/rejected": -1.1973512172698975, "loss": 1.4862, "nll_loss": 1.4849971532821655, "rewards/accuracies": 0.550000011920929, "rewards/chosen": -0.05834249407052994, "rewards/margins": 0.0015250641154125333, "rewards/rejected": -0.05986756086349487, "step": 125 }, { "epoch": 0.7749627421758569, "grad_norm": 37.57433319091797, "learning_rate": 4.385290096535147e-06, "log_odds_chosen": 0.13211670517921448, "log_odds_ratio": -0.7139819860458374, "logits/chosen": 401.40985107421875, "logits/rejected": 389.37921142578125, "logps/chosen": -1.1555781364440918, "logps/rejected": -1.2059427499771118, "loss": 1.5256, "nll_loss": 1.4828169345855713, "rewards/accuracies": 0.5, "rewards/chosen": -0.05777891352772713, "rewards/margins": 0.002518222201615572, "rewards/rejected": -0.06029713153839111, "step": 130 }, { "epoch": 0.8047690014903129, "grad_norm": 37.914310455322266, "learning_rate": 4.303314829119352e-06, "log_odds_chosen": 0.10099569708108902, "log_odds_ratio": -0.7038587331771851, "logits/chosen": 414.90655517578125, "logits/rejected": 416.6064453125, "logps/chosen": -1.1292693614959717, "logps/rejected": -1.2150599956512451, "loss": 1.5378, "nll_loss": 1.5873870849609375, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.056463468819856644, "rewards/margins": 0.004289527423679829, "rewards/rejected": -0.060752999037504196, "step": 135 }, { "epoch": 0.834575260804769, "grad_norm": 21.97135353088379, "learning_rate": 4.2257712736425835e-06, "log_odds_chosen": -0.07928862422704697, "log_odds_ratio": -0.8006687164306641, "logits/chosen": 397.2544250488281, "logits/rejected": 403.22857666015625, "logps/chosen": -1.11940598487854, "logps/rejected": -1.0619796514511108, "loss": 1.5228, "nll_loss": 1.6315351724624634, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": -0.05597030371427536, "rewards/margins": -0.002871322212740779, "rewards/rejected": -0.05309898406267166, "step": 140 }, { "epoch": 0.8643815201192251, "grad_norm": 34.809200286865234, "learning_rate": 4.1522739926869985e-06, "log_odds_chosen": -0.004179268144071102, "log_odds_ratio": -0.7272334694862366, "logits/chosen": 394.76995849609375, "logits/rejected": 397.96514892578125, "logps/chosen": -1.2000293731689453, "logps/rejected": -1.1946508884429932, "loss": 1.5155, "nll_loss": 1.5167872905731201, "rewards/accuracies": 0.574999988079071, "rewards/chosen": -0.060001470148563385, "rewards/margins": -0.0002689236425794661, "rewards/rejected": -0.05973255634307861, "step": 145 }, { "epoch": 0.8941877794336811, "grad_norm": 30.051952362060547, "learning_rate": 4.082482904638631e-06, "log_odds_chosen": 0.36712345480918884, "log_odds_ratio": -0.5663259625434875, "logits/chosen": 400.39495849609375, "logits/rejected": 418.39678955078125, "logps/chosen": -1.0868648290634155, "logps/rejected": -1.3322699069976807, "loss": 1.477, "nll_loss": 1.3918894529342651, "rewards/accuracies": 0.75, "rewards/chosen": -0.054343242198228836, "rewards/margins": 0.01227025780826807, "rewards/rejected": -0.06661349534988403, "step": 150 }, { "epoch": 0.9239940387481371, "grad_norm": 16.999670028686523, "learning_rate": 4.016096644512495e-06, "log_odds_chosen": 0.13306589424610138, "log_odds_ratio": -0.6789790391921997, "logits/chosen": 380.4939880371094, "logits/rejected": 395.53143310546875, "logps/chosen": -1.1204369068145752, "logps/rejected": -1.2021987438201904, "loss": 1.436, "nll_loss": 1.3288953304290771, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": -0.05602184683084488, "rewards/margins": 0.004088088870048523, "rewards/rejected": -0.060109943151474, "step": 155 }, { "epoch": 0.9538002980625931, "grad_norm": 34.52124786376953, "learning_rate": 3.952847075210474e-06, "log_odds_chosen": 0.08932497352361679, "log_odds_ratio": -0.7400273084640503, "logits/chosen": 386.62786865234375, "logits/rejected": 432.2003479003906, "logps/chosen": -1.0199127197265625, "logps/rejected": -1.1069036722183228, "loss": 1.425, "nll_loss": 1.3653684854507446, "rewards/accuracies": 0.5, "rewards/chosen": -0.05099564045667648, "rewards/margins": 0.00434954185038805, "rewards/rejected": -0.05534517765045166, "step": 160 }, { "epoch": 0.9836065573770492, "grad_norm": 17.771682739257812, "learning_rate": 3.892494720807615e-06, "log_odds_chosen": 0.02889970876276493, "log_odds_ratio": -0.7212048768997192, "logits/chosen": 396.8811950683594, "logits/rejected": 409.22821044921875, "logps/chosen": -1.091715693473816, "logps/rejected": -1.1267164945602417, "loss": 1.441, "nll_loss": 1.3998154401779175, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": -0.05458579212427139, "rewards/margins": 0.0017500361427664757, "rewards/rejected": -0.056335825473070145, "step": 165 }, { "epoch": 0.9955290611028316, "eval_log_odds_chosen": 0.19335530698299408, "eval_log_odds_ratio": -0.6989776492118835, "eval_logits/chosen": 318.99652099609375, "eval_logits/rejected": 290.1581115722656, "eval_logps/chosen": -1.0203651189804077, "eval_logps/rejected": -1.1485036611557007, "eval_loss": 1.4761662483215332, "eval_nll_loss": 1.4310433864593506, "eval_rewards/accuracies": 0.5323740839958191, "eval_rewards/chosen": -0.051018260419368744, "eval_rewards/margins": 0.006406927481293678, "eval_rewards/rejected": -0.057425182312726974, "eval_runtime": 112.3238, "eval_samples_per_second": 4.923, "eval_steps_per_second": 1.237, "step": 167 }, { "epoch": 1.0134128166915053, "grad_norm": 17.029600143432617, "learning_rate": 3.834824944236852e-06, "log_odds_chosen": 0.46681445837020874, "log_odds_ratio": -0.5670086741447449, "logits/chosen": 377.62884521484375, "logits/rejected": 402.346435546875, "logps/chosen": -0.9154840707778931, "logps/rejected": -1.1631513833999634, "loss": 1.3055, "nll_loss": 1.1554943323135376, "rewards/accuracies": 0.625, "rewards/chosen": -0.045774202793836594, "rewards/margins": 0.012383360415697098, "rewards/rejected": -0.05815757066011429, "step": 170 }, { "epoch": 1.0432190760059612, "grad_norm": 20.676471710205078, "learning_rate": 3.7796447300922724e-06, "log_odds_chosen": 0.8411234021186829, "log_odds_ratio": -0.4436827600002289, "logits/chosen": 360.05242919921875, "logits/rejected": 400.02374267578125, "logps/chosen": -0.6783148646354675, "logps/rejected": -1.1674000024795532, "loss": 1.0898, "nll_loss": 1.1356347799301147, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.033915746957063675, "rewards/margins": 0.024454256519675255, "rewards/rejected": -0.058369994163513184, "step": 175 }, { "epoch": 1.0730253353204173, "grad_norm": 18.295047760009766, "learning_rate": 3.72677996249965e-06, "log_odds_chosen": 0.8419575691223145, "log_odds_ratio": -0.43040966987609863, "logits/chosen": 360.3951110839844, "logits/rejected": 335.01239013671875, "logps/chosen": -0.7921234965324402, "logps/rejected": -1.2925331592559814, "loss": 1.1448, "nll_loss": 1.2160179615020752, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03960617631673813, "rewards/margins": 0.025020483881235123, "rewards/rejected": -0.06462665647268295, "step": 180 }, { "epoch": 1.1028315946348732, "grad_norm": 20.26190757751465, "learning_rate": 3.6760731104690393e-06, "log_odds_chosen": 1.0061752796173096, "log_odds_ratio": -0.3863833546638489, "logits/chosen": 388.26934814453125, "logits/rejected": 379.1220703125, "logps/chosen": -0.6712931990623474, "logps/rejected": -1.1969959735870361, "loss": 1.0422, "nll_loss": 0.9980667233467102, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03356466069817543, "rewards/margins": 0.026285137981176376, "rewards/rejected": -0.05984979867935181, "step": 185 }, { "epoch": 1.1326378539493294, "grad_norm": 16.21082305908203, "learning_rate": 3.6273812505500587e-06, "log_odds_chosen": 0.6967722177505493, "log_odds_ratio": -0.49137839674949646, "logits/chosen": 353.41705322265625, "logits/rejected": 400.4765930175781, "logps/chosen": -0.75420081615448, "logps/rejected": -1.1797516345977783, "loss": 1.1136, "nll_loss": 1.0225417613983154, "rewards/accuracies": 0.75, "rewards/chosen": -0.0377100370824337, "rewards/margins": 0.021277543157339096, "rewards/rejected": -0.0589875802397728, "step": 190 }, { "epoch": 1.1624441132637853, "grad_norm": 18.45132827758789, "learning_rate": 3.5805743701971648e-06, "log_odds_chosen": 0.8713854551315308, "log_odds_ratio": -0.4125959873199463, "logits/chosen": 383.83868408203125, "logits/rejected": 397.7996520996094, "logps/chosen": -0.7979816198348999, "logps/rejected": -1.2784751653671265, "loss": 1.1249, "nll_loss": 1.1307828426361084, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.039899080991744995, "rewards/margins": 0.02402467653155327, "rewards/rejected": -0.06392376124858856, "step": 195 }, { "epoch": 1.1922503725782414, "grad_norm": 29.213319778442383, "learning_rate": 3.5355339059327378e-06, "log_odds_chosen": 0.9310399889945984, "log_odds_ratio": -0.43441715836524963, "logits/chosen": 408.2393798828125, "logits/rejected": 392.23309326171875, "logps/chosen": -0.65810626745224, "logps/rejected": -1.2119154930114746, "loss": 1.0925, "nll_loss": 1.0188348293304443, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03290531411767006, "rewards/margins": 0.0276904609054327, "rewards/rejected": -0.06059577316045761, "step": 200 }, { "epoch": 1.2220566318926975, "grad_norm": 20.968154907226562, "learning_rate": 3.4921514788478916e-06, "log_odds_chosen": 1.0998015403747559, "log_odds_ratio": -0.39691638946533203, "logits/chosen": 365.73724365234375, "logits/rejected": 359.8885803222656, "logps/chosen": -0.6815972924232483, "logps/rejected": -1.2400376796722412, "loss": 1.0466, "nll_loss": 1.0264532566070557, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03407986834645271, "rewards/margins": 0.027922023087739944, "rewards/rejected": -0.06200189143419266, "step": 205 }, { "epoch": 1.2518628912071534, "grad_norm": 17.646827697753906, "learning_rate": 3.450327796711771e-06, "log_odds_chosen": 1.2030134201049805, "log_odds_ratio": -0.3409472107887268, "logits/chosen": 371.56903076171875, "logits/rejected": 400.691162109375, "logps/chosen": -0.6153351664543152, "logps/rejected": -1.2756757736206055, "loss": 1.0517, "nll_loss": 0.9517441987991333, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03076675906777382, "rewards/margins": 0.03301702067255974, "rewards/rejected": -0.06378378719091415, "step": 210 }, { "epoch": 1.2816691505216096, "grad_norm": 25.96933364868164, "learning_rate": 3.409971697352368e-06, "log_odds_chosen": 1.0242887735366821, "log_odds_ratio": -0.3722797930240631, "logits/chosen": 393.1634826660156, "logits/rejected": 376.97198486328125, "logps/chosen": -0.7517871856689453, "logps/rejected": -1.3418635129928589, "loss": 1.0677, "nll_loss": 1.063118577003479, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.037589360028505325, "rewards/margins": 0.02950381301343441, "rewards/rejected": -0.06709317117929459, "step": 215 }, { "epoch": 1.3114754098360657, "grad_norm": 14.424154281616211, "learning_rate": 3.3709993123162106e-06, "log_odds_chosen": 0.6680114269256592, "log_odds_ratio": -0.5037292242050171, "logits/chosen": 385.2915344238281, "logits/rejected": 379.8268127441406, "logps/chosen": -0.8324653506278992, "logps/rejected": -1.1821435689926147, "loss": 1.071, "nll_loss": 1.0840386152267456, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.04162326827645302, "rewards/margins": 0.0174839086830616, "rewards/rejected": -0.05910717695951462, "step": 220 }, { "epoch": 1.3412816691505216, "grad_norm": 24.496023178100586, "learning_rate": 3.3333333333333333e-06, "log_odds_chosen": 0.5463358759880066, "log_odds_ratio": -0.5178000926971436, "logits/chosen": 381.60198974609375, "logits/rejected": 374.200439453125, "logps/chosen": -0.8569077253341675, "logps/rejected": -1.1593918800354004, "loss": 1.0304, "nll_loss": 1.1032346487045288, "rewards/accuracies": 0.75, "rewards/chosen": -0.042845387011766434, "rewards/margins": 0.015124207362532616, "rewards/rejected": -0.0579695925116539, "step": 225 }, { "epoch": 1.3710879284649775, "grad_norm": 31.743003845214844, "learning_rate": 3.296902366978936e-06, "log_odds_chosen": 1.1322697401046753, "log_odds_ratio": -0.3533535599708557, "logits/chosen": 353.97186279296875, "logits/rejected": 374.7437438964844, "logps/chosen": -0.5964599251747131, "logps/rejected": -1.2119852304458618, "loss": 1.0402, "nll_loss": 0.9073736071586609, "rewards/accuracies": 0.875, "rewards/chosen": -0.029822995886206627, "rewards/margins": 0.030776266008615494, "rewards/rejected": -0.06059925630688667, "step": 230 }, { "epoch": 1.4008941877794336, "grad_norm": 23.891324996948242, "learning_rate": 3.2616403652672114e-06, "log_odds_chosen": 1.1859080791473389, "log_odds_ratio": -0.37409111857414246, "logits/chosen": 381.7622985839844, "logits/rejected": 395.0599365234375, "logps/chosen": -0.6458351016044617, "logps/rejected": -1.346355676651001, "loss": 1.0587, "nll_loss": 0.9488533735275269, "rewards/accuracies": 0.875, "rewards/chosen": -0.032291755080223083, "rewards/margins": 0.035026032477617264, "rewards/rejected": -0.06731779128313065, "step": 235 }, { "epoch": 1.4307004470938898, "grad_norm": 16.38582992553711, "learning_rate": 3.2274861218395142e-06, "log_odds_chosen": 0.7762764692306519, "log_odds_ratio": -0.43844375014305115, "logits/chosen": 407.67388916015625, "logits/rejected": 413.35260009765625, "logps/chosen": -0.7236464619636536, "logps/rejected": -1.1575326919555664, "loss": 1.0752, "nll_loss": 1.0268566608428955, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03618232533335686, "rewards/margins": 0.02169431373476982, "rewards/rejected": -0.05787663906812668, "step": 240 }, { "epoch": 1.4605067064083457, "grad_norm": 16.295490264892578, "learning_rate": 3.1943828249997e-06, "log_odds_chosen": 0.9785711169242859, "log_odds_ratio": -0.4029998779296875, "logits/chosen": 400.16632080078125, "logits/rejected": 388.1484069824219, "logps/chosen": -0.6374613642692566, "logps/rejected": -1.146707534790039, "loss": 1.0837, "nll_loss": 1.123439073562622, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03187306597828865, "rewards/margins": 0.025462310761213303, "rewards/rejected": -0.05733537673950195, "step": 245 }, { "epoch": 1.4903129657228018, "grad_norm": 22.652774810791016, "learning_rate": 3.1622776601683796e-06, "log_odds_chosen": 1.0432734489440918, "log_odds_ratio": -0.4298950135707855, "logits/chosen": 374.0715637207031, "logits/rejected": 381.5113830566406, "logps/chosen": -0.6628987193107605, "logps/rejected": -1.2346137762069702, "loss": 0.9864, "nll_loss": 0.9000906944274902, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": -0.03314493969082832, "rewards/margins": 0.028585752472281456, "rewards/rejected": -0.06173068284988403, "step": 250 }, { "epoch": 1.520119225037258, "grad_norm": 15.01534652709961, "learning_rate": 3.131121455425748e-06, "log_odds_chosen": 1.0844942331314087, "log_odds_ratio": -0.34810084104537964, "logits/chosen": 391.9859924316406, "logits/rejected": 394.78021240234375, "logps/chosen": -0.5884779095649719, "logps/rejected": -1.1623605489730835, "loss": 1.0497, "nll_loss": 0.9377425312995911, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.029423898085951805, "rewards/margins": 0.028694134205579758, "rewards/rejected": -0.05811803415417671, "step": 255 }, { "epoch": 1.5499254843517138, "grad_norm": 22.26698112487793, "learning_rate": 3.1008683647302113e-06, "log_odds_chosen": 0.9070035815238953, "log_odds_ratio": -0.43072786927223206, "logits/chosen": 372.50006103515625, "logits/rejected": 414.58331298828125, "logps/chosen": -0.763823926448822, "logps/rejected": -1.341328740119934, "loss": 1.043, "nll_loss": 1.0060240030288696, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.03819119185209274, "rewards/margins": 0.028875242918729782, "rewards/rejected": -0.06706643104553223, "step": 260 }, { "epoch": 1.5797317436661698, "grad_norm": 14.599881172180176, "learning_rate": 3.0714755841697565e-06, "log_odds_chosen": 1.0877039432525635, "log_odds_ratio": -0.43615055084228516, "logits/chosen": 384.4775390625, "logits/rejected": 406.6970520019531, "logps/chosen": -0.6974985003471375, "logps/rejected": -1.3204139471054077, "loss": 1.098, "nll_loss": 1.024665117263794, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03487492725253105, "rewards/margins": 0.031145762652158737, "rewards/rejected": -0.06602068990468979, "step": 265 }, { "epoch": 1.6095380029806259, "grad_norm": 17.716583251953125, "learning_rate": 3.0429030972509227e-06, "log_odds_chosen": 0.9025327563285828, "log_odds_ratio": -0.4233691692352295, "logits/chosen": 367.71807861328125, "logits/rejected": 379.2008361816406, "logps/chosen": -0.777201771736145, "logps/rejected": -1.2777531147003174, "loss": 1.0837, "nll_loss": 1.1377698183059692, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.03886008635163307, "rewards/margins": 0.025027573108673096, "rewards/rejected": -0.06388765573501587, "step": 270 }, { "epoch": 1.639344262295082, "grad_norm": 14.134200096130371, "learning_rate": 3.0151134457776365e-06, "log_odds_chosen": 0.8205320239067078, "log_odds_ratio": -0.44056087732315063, "logits/chosen": 360.33575439453125, "logits/rejected": 350.024169921875, "logps/chosen": -0.6577683687210083, "logps/rejected": -1.066030740737915, "loss": 1.0837, "nll_loss": 1.0101639032363892, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.032888419926166534, "rewards/margins": 0.020413123071193695, "rewards/rejected": -0.05330154299736023, "step": 275 }, { "epoch": 1.669150521609538, "grad_norm": 15.517395973205566, "learning_rate": 2.988071523335984e-06, "log_odds_chosen": 0.7949902415275574, "log_odds_ratio": -0.5562250018119812, "logits/chosen": 404.2984313964844, "logits/rejected": 391.6941833496094, "logps/chosen": -0.7360959649085999, "logps/rejected": -1.1831490993499756, "loss": 1.0486, "nll_loss": 1.0734833478927612, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03680479899048805, "rewards/margins": 0.022352661937475204, "rewards/rejected": -0.05915746092796326, "step": 280 }, { "epoch": 1.698956780923994, "grad_norm": 14.143935203552246, "learning_rate": 2.961744388795462e-06, "log_odds_chosen": 0.9420916438102722, "log_odds_ratio": -0.42187291383743286, "logits/chosen": 367.45843505859375, "logits/rejected": 374.1835632324219, "logps/chosen": -0.6173609495162964, "logps/rejected": -1.1151915788650513, "loss": 0.996, "nll_loss": 0.9254717826843262, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03086804784834385, "rewards/margins": 0.024891531094908714, "rewards/rejected": -0.05575958639383316, "step": 285 }, { "epoch": 1.7287630402384502, "grad_norm": 13.992819786071777, "learning_rate": 2.9361010975735177e-06, "log_odds_chosen": 0.9273719787597656, "log_odds_ratio": -0.39941272139549255, "logits/chosen": 386.17742919921875, "logits/rejected": 424.8526306152344, "logps/chosen": -0.7709314227104187, "logps/rejected": -1.294065237045288, "loss": 1.0527, "nll_loss": 0.9949714541435242, "rewards/accuracies": 0.875, "rewards/chosen": -0.038546573370695114, "rewards/margins": 0.026156682521104813, "rewards/rejected": -0.06470325589179993, "step": 290 }, { "epoch": 1.758569299552906, "grad_norm": 15.243948936462402, "learning_rate": 2.9111125486979104e-06, "log_odds_chosen": 0.7636137008666992, "log_odds_ratio": -0.4647112786769867, "logits/chosen": 361.9948425292969, "logits/rejected": 406.70654296875, "logps/chosen": -0.7253848314285278, "logps/rejected": -1.145918607711792, "loss": 1.0847, "nll_loss": 1.016174554824829, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.03626924008131027, "rewards/margins": 0.021026695147156715, "rewards/rejected": -0.05729593709111214, "step": 295 }, { "epoch": 1.788375558867362, "grad_norm": 23.890466690063477, "learning_rate": 2.8867513459481293e-06, "log_odds_chosen": 1.2909433841705322, "log_odds_ratio": -0.3190842270851135, "logits/chosen": 403.19427490234375, "logits/rejected": 380.4273986816406, "logps/chosen": -0.6161251068115234, "logps/rejected": -1.2782180309295654, "loss": 0.9952, "nll_loss": 0.9254310727119446, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": -0.030806254595518112, "rewards/margins": 0.03310465067625046, "rewards/rejected": -0.06391090154647827, "step": 300 }, { "epoch": 1.8181818181818183, "grad_norm": 16.844104766845703, "learning_rate": 2.862991671569341e-06, "log_odds_chosen": 0.5357767939567566, "log_odds_ratio": -0.5353686213493347, "logits/chosen": 395.70831298828125, "logits/rejected": 405.61749267578125, "logps/chosen": -0.9245316386222839, "logps/rejected": -1.2031428813934326, "loss": 1.0432, "nll_loss": 1.1699957847595215, "rewards/accuracies": 0.824999988079071, "rewards/chosen": -0.046226583421230316, "rewards/margins": 0.013930551707744598, "rewards/rejected": -0.060157131403684616, "step": 305 }, { "epoch": 1.8479880774962743, "grad_norm": 14.692316055297852, "learning_rate": 2.839809171235324e-06, "log_odds_chosen": 1.0770504474639893, "log_odds_ratio": -0.42079129815101624, "logits/chosen": 377.4819030761719, "logits/rejected": 387.6199645996094, "logps/chosen": -0.7239227294921875, "logps/rejected": -1.3824554681777954, "loss": 1.0884, "nll_loss": 1.0769283771514893, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.036196134984493256, "rewards/margins": 0.032926641404628754, "rewards/rejected": -0.06912277638912201, "step": 310 }, { "epoch": 1.8777943368107302, "grad_norm": 15.1817045211792, "learning_rate": 2.817180849095055e-06, "log_odds_chosen": 0.5459250807762146, "log_odds_ratio": -0.5598369240760803, "logits/chosen": 352.6174621582031, "logits/rejected": 371.89764404296875, "logps/chosen": -0.9762029647827148, "logps/rejected": -1.3525390625, "loss": 1.0938, "nll_loss": 1.238140344619751, "rewards/accuracies": 0.75, "rewards/chosen": -0.04881014674901962, "rewards/margins": 0.018816810101270676, "rewards/rejected": -0.0676269605755806, "step": 315 }, { "epoch": 1.9076005961251863, "grad_norm": 17.332054138183594, "learning_rate": 2.7950849718747376e-06, "log_odds_chosen": 1.1397926807403564, "log_odds_ratio": -0.36622655391693115, "logits/chosen": 373.9564514160156, "logits/rejected": 395.34271240234375, "logps/chosen": -0.6329408884048462, "logps/rejected": -1.2445515394210815, "loss": 0.9928, "nll_loss": 0.9283340573310852, "rewards/accuracies": 0.875, "rewards/chosen": -0.03164704144001007, "rewards/margins": 0.030580539256334305, "rewards/rejected": -0.062227584421634674, "step": 320 }, { "epoch": 1.9374068554396424, "grad_norm": 20.475017547607422, "learning_rate": 2.773500981126146e-06, "log_odds_chosen": 1.1559429168701172, "log_odds_ratio": -0.3606329560279846, "logits/chosen": 372.6563720703125, "logits/rejected": 405.1517333984375, "logps/chosen": -0.6990076303482056, "logps/rejected": -1.3749182224273682, "loss": 1.0121, "nll_loss": 0.9322077631950378, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.0349503830075264, "rewards/margins": 0.03379552438855171, "rewards/rejected": -0.06874591112136841, "step": 325 }, { "epoch": 1.9672131147540983, "grad_norm": 20.384191513061523, "learning_rate": 2.752409412815902e-06, "log_odds_chosen": 0.8144651651382446, "log_odds_ratio": -0.4188029170036316, "logits/chosen": 367.2298889160156, "logits/rejected": 376.0736083984375, "logps/chosen": -0.7355102896690369, "logps/rejected": -1.211102843284607, "loss": 1.0378, "nll_loss": 0.8931636810302734, "rewards/accuracies": 0.875, "rewards/chosen": -0.036775510758161545, "rewards/margins": 0.023779626935720444, "rewards/rejected": -0.06055514141917229, "step": 330 }, { "epoch": 1.9970193740685542, "grad_norm": 15.695927619934082, "learning_rate": 2.7317918235407652e-06, "log_odds_chosen": 0.5675193071365356, "log_odds_ratio": -0.5574907660484314, "logits/chosen": 395.9285888671875, "logits/rejected": 387.2447204589844, "logps/chosen": -0.9066513776779175, "logps/rejected": -1.2281653881072998, "loss": 1.0908, "nll_loss": 1.2198901176452637, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.045332565903663635, "rewards/margins": 0.016075702384114265, "rewards/rejected": -0.06140827015042305, "step": 335 }, { "epoch": 1.9970193740685542, "eval_log_odds_chosen": 0.2359991818666458, "eval_log_odds_ratio": -0.6970126628875732, "eval_logits/chosen": 314.6778564453125, "eval_logits/rejected": 285.82061767578125, "eval_logps/chosen": -0.9949654936790466, "eval_logps/rejected": -1.1527600288391113, "eval_loss": 1.4250013828277588, "eval_nll_loss": 1.3697166442871094, "eval_rewards/accuracies": 0.5323740839958191, "eval_rewards/chosen": -0.04974827170372009, "eval_rewards/margins": 0.00788972433656454, "eval_rewards/rejected": -0.05763799697160721, "eval_runtime": 112.2726, "eval_samples_per_second": 4.926, "eval_steps_per_second": 1.238, "step": 335 }, { "epoch": 2.0268256333830106, "grad_norm": 21.729570388793945, "learning_rate": 2.711630722733202e-06, "log_odds_chosen": 2.0113790035247803, "log_odds_ratio": -0.19709806144237518, "logits/chosen": 389.3846435546875, "logits/rejected": 366.3945617675781, "logps/chosen": -0.38005977869033813, "logps/rejected": -1.4012727737426758, "loss": 0.6433, "nll_loss": 0.6980705261230469, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.019002988934516907, "rewards/margins": 0.051060646772384644, "rewards/rejected": -0.07006363570690155, "step": 340 }, { "epoch": 2.0566318926974665, "grad_norm": 11.89656925201416, "learning_rate": 2.691909510290828e-06, "log_odds_chosen": 2.5525763034820557, "log_odds_ratio": -0.12284793704748154, "logits/chosen": 351.57080078125, "logits/rejected": 357.44329833984375, "logps/chosen": -0.3399081528186798, "logps/rejected": -1.6293659210205078, "loss": 0.5495, "nll_loss": 0.5662155151367188, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01699540950357914, "rewards/margins": 0.06447288393974304, "rewards/rejected": -0.08146829158067703, "step": 345 }, { "epoch": 2.0864381520119224, "grad_norm": 13.419454574584961, "learning_rate": 2.6726124191242444e-06, "log_odds_chosen": 2.548877716064453, "log_odds_ratio": -0.11839280277490616, "logits/chosen": 350.29986572265625, "logits/rejected": 386.45709228515625, "logps/chosen": -0.382639080286026, "logps/rejected": -1.8921934366226196, "loss": 0.5743, "nll_loss": 0.5715562105178833, "rewards/accuracies": 1.0, "rewards/chosen": -0.01913195475935936, "rewards/margins": 0.0754777267575264, "rewards/rejected": -0.09460968524217606, "step": 350 }, { "epoch": 2.1162444113263787, "grad_norm": 13.355463027954102, "learning_rate": 2.6537244621713765e-06, "log_odds_chosen": 2.2259714603424072, "log_odds_ratio": -0.15891632437705994, "logits/chosen": 352.84619140625, "logits/rejected": 371.22576904296875, "logps/chosen": -0.37806540727615356, "logps/rejected": -1.5315955877304077, "loss": 0.5507, "nll_loss": 0.6317521333694458, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018903274089097977, "rewards/margins": 0.05767650529742241, "rewards/rejected": -0.07657978683710098, "step": 355 }, { "epoch": 2.1460506706408347, "grad_norm": 10.8477201461792, "learning_rate": 2.6352313834736496e-06, "log_odds_chosen": 2.581636428833008, "log_odds_ratio": -0.1250651776790619, "logits/chosen": 353.0003356933594, "logits/rejected": 398.9602355957031, "logps/chosen": -0.3573206067085266, "logps/rejected": -1.6087188720703125, "loss": 0.5407, "nll_loss": 0.5504949688911438, "rewards/accuracies": 1.0, "rewards/chosen": -0.01786603033542633, "rewards/margins": 0.06256992369890213, "rewards/rejected": -0.08043594658374786, "step": 360 }, { "epoch": 2.1758569299552906, "grad_norm": 12.167034149169922, "learning_rate": 2.6171196129510684e-06, "log_odds_chosen": 1.9800822734832764, "log_odds_ratio": -0.16938333213329315, "logits/chosen": 341.21527099609375, "logits/rejected": 329.54119873046875, "logps/chosen": -0.348991334438324, "logps/rejected": -1.3196141719818115, "loss": 0.5516, "nll_loss": 0.5312565565109253, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.017449568957090378, "rewards/margins": 0.04853113740682602, "rewards/rejected": -0.0659807100892067, "step": 365 }, { "epoch": 2.2056631892697465, "grad_norm": 15.195405960083008, "learning_rate": 2.599376224550182e-06, "log_odds_chosen": 2.0713467597961426, "log_odds_ratio": -0.19306516647338867, "logits/chosen": 316.6725769042969, "logits/rejected": 339.6087646484375, "logps/chosen": -0.36510804295539856, "logps/rejected": -1.4302679300308228, "loss": 0.5732, "nll_loss": 0.5869459509849548, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.01825539954006672, "rewards/margins": 0.05325800180435181, "rewards/rejected": -0.07151339948177338, "step": 370 }, { "epoch": 2.235469448584203, "grad_norm": 12.842897415161133, "learning_rate": 2.5819888974716113e-06, "log_odds_chosen": 1.9603370428085327, "log_odds_ratio": -0.18798741698265076, "logits/chosen": 368.00836181640625, "logits/rejected": 389.7608337402344, "logps/chosen": -0.4214121699333191, "logps/rejected": -1.4475972652435303, "loss": 0.5831, "nll_loss": 0.6068717241287231, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.021070610731840134, "rewards/margins": 0.051309265196323395, "rewards/rejected": -0.07237987220287323, "step": 375 }, { "epoch": 2.2652757078986587, "grad_norm": 15.618853569030762, "learning_rate": 2.564945880212886e-06, "log_odds_chosen": 2.2622876167297363, "log_odds_ratio": -0.1320658028125763, "logits/chosen": 366.3780517578125, "logits/rejected": 351.96820068359375, "logps/chosen": -0.3000200688838959, "logps/rejected": -1.3632047176361084, "loss": 0.5571, "nll_loss": 0.525825560092926, "rewards/accuracies": 1.0, "rewards/chosen": -0.015001003630459309, "rewards/margins": 0.05315924435853958, "rewards/rejected": -0.06816024333238602, "step": 380 }, { "epoch": 2.2950819672131146, "grad_norm": 13.373687744140625, "learning_rate": 2.5482359571881276e-06, "log_odds_chosen": 2.5866951942443848, "log_odds_ratio": -0.11987988650798798, "logits/chosen": 358.47344970703125, "logits/rejected": 352.4609375, "logps/chosen": -0.283217191696167, "logps/rejected": -1.4752601385116577, "loss": 0.5301, "nll_loss": 0.49565237760543823, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014160861261188984, "rewards/margins": 0.059602152556180954, "rewards/rejected": -0.07376301288604736, "step": 385 }, { "epoch": 2.3248882265275705, "grad_norm": 11.959152221679688, "learning_rate": 2.5318484177091667e-06, "log_odds_chosen": 2.3983092308044434, "log_odds_ratio": -0.11385631561279297, "logits/chosen": 370.3407287597656, "logits/rejected": 393.58978271484375, "logps/chosen": -0.36266201734542847, "logps/rejected": -1.6288502216339111, "loss": 0.578, "nll_loss": 0.5790597200393677, "rewards/accuracies": 1.0, "rewards/chosen": -0.018133098259568214, "rewards/margins": 0.06330940872430801, "rewards/rejected": -0.08144249767065048, "step": 390 }, { "epoch": 2.354694485842027, "grad_norm": 11.902227401733398, "learning_rate": 2.515773027133138e-06, "log_odds_chosen": 2.4830586910247803, "log_odds_ratio": -0.13829158246517181, "logits/chosen": 369.2203063964844, "logits/rejected": 362.56298828125, "logps/chosen": -0.2860831320285797, "logps/rejected": -1.3531745672225952, "loss": 0.5233, "nll_loss": 0.48577412962913513, "rewards/accuracies": 1.0, "rewards/chosen": -0.01430415641516447, "rewards/margins": 0.053354568779468536, "rewards/rejected": -0.06765872985124588, "step": 395 }, { "epoch": 2.384500745156483, "grad_norm": 18.2595157623291, "learning_rate": 2.5e-06, "log_odds_chosen": 2.4875540733337402, "log_odds_ratio": -0.13931448757648468, "logits/chosen": 366.81646728515625, "logits/rejected": 388.4540710449219, "logps/chosen": -0.3392675817012787, "logps/rejected": -1.6781524419784546, "loss": 0.5707, "nll_loss": 0.5266181826591492, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.016963381320238113, "rewards/margins": 0.06694425642490387, "rewards/rejected": -0.08390761911869049, "step": 400 }, { "epoch": 2.4143070044709387, "grad_norm": 10.78487777709961, "learning_rate": 2.484519974999767e-06, "log_odds_chosen": 2.3641769886016846, "log_odds_ratio": -0.18085625767707825, "logits/chosen": 417.9383850097656, "logits/rejected": 384.9745178222656, "logps/chosen": -0.36932411789894104, "logps/rejected": -1.5650533437728882, "loss": 0.5707, "nll_loss": 0.5322312712669373, "rewards/accuracies": 0.925000011920929, "rewards/chosen": -0.018466206267476082, "rewards/margins": 0.05978646129369736, "rewards/rejected": -0.07825267314910889, "step": 405 }, { "epoch": 2.444113263785395, "grad_norm": 12.914924621582031, "learning_rate": 2.4693239916239746e-06, "log_odds_chosen": 2.4095664024353027, "log_odds_ratio": -0.17002181708812714, "logits/chosen": 363.0850830078125, "logits/rejected": 378.43634033203125, "logps/chosen": -0.3721050024032593, "logps/rejected": -1.5407812595367432, "loss": 0.5689, "nll_loss": 0.5765537619590759, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018605249002575874, "rewards/margins": 0.058433812111616135, "rewards/rejected": -0.07703907042741776, "step": 410 }, { "epoch": 2.473919523099851, "grad_norm": 11.604476928710938, "learning_rate": 2.4544034683690802e-06, "log_odds_chosen": 2.4141106605529785, "log_odds_ratio": -0.13905009627342224, "logits/chosen": 363.8720703125, "logits/rejected": 393.9859924316406, "logps/chosen": -0.32817938923835754, "logps/rejected": -1.5454423427581787, "loss": 0.5702, "nll_loss": 0.5272970795631409, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.016408968716859818, "rewards/margins": 0.06086314842104912, "rewards/rejected": -0.07727211713790894, "step": 415 }, { "epoch": 2.503725782414307, "grad_norm": 11.285563468933105, "learning_rate": 2.4397501823713327e-06, "log_odds_chosen": 2.0902717113494873, "log_odds_ratio": -0.18547013401985168, "logits/chosen": 364.81866455078125, "logits/rejected": 342.7242736816406, "logps/chosen": -0.3733817934989929, "logps/rejected": -1.4410852193832397, "loss": 0.56, "nll_loss": 0.6532183885574341, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018669091165065765, "rewards/margins": 0.0533851683139801, "rewards/rejected": -0.07205425947904587, "step": 420 }, { "epoch": 2.533532041728763, "grad_norm": 12.517095565795898, "learning_rate": 2.4253562503633297e-06, "log_odds_chosen": 2.795741081237793, "log_odds_ratio": -0.08831789344549179, "logits/chosen": 362.86871337890625, "logits/rejected": 359.4671630859375, "logps/chosen": -0.32691091299057007, "logps/rejected": -1.8046060800552368, "loss": 0.5335, "nll_loss": 0.5374017357826233, "rewards/accuracies": 1.0, "rewards/chosen": -0.016345545649528503, "rewards/margins": 0.0738847479224205, "rewards/rejected": -0.0902303010225296, "step": 425 }, { "epoch": 2.563338301043219, "grad_norm": 13.33828353881836, "learning_rate": 2.411214110852061e-06, "log_odds_chosen": 2.7160139083862305, "log_odds_ratio": -0.10933760553598404, "logits/chosen": 362.9604187011719, "logits/rejected": 374.8692626953125, "logps/chosen": -0.27513235807418823, "logps/rejected": -1.615644097328186, "loss": 0.5522, "nll_loss": 0.48540863394737244, "rewards/accuracies": 1.0, "rewards/chosen": -0.01375661976635456, "rewards/margins": 0.06702558696269989, "rewards/rejected": -0.0807822048664093, "step": 430 }, { "epoch": 2.593144560357675, "grad_norm": 15.951871871948242, "learning_rate": 2.3973165074269213e-06, "log_odds_chosen": 2.399064779281616, "log_odds_ratio": -0.150381401181221, "logits/chosen": 368.9129943847656, "logits/rejected": 337.7628173828125, "logps/chosen": -0.3689618408679962, "logps/rejected": -1.5598814487457275, "loss": 0.5514, "nll_loss": 0.5270097255706787, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01844809204339981, "rewards/margins": 0.059545982629060745, "rewards/rejected": -0.07799407094717026, "step": 435 }, { "epoch": 2.6229508196721314, "grad_norm": 12.57322883605957, "learning_rate": 2.3836564731139807e-06, "log_odds_chosen": 2.7293245792388916, "log_odds_ratio": -0.10528914630413055, "logits/chosen": 354.0178527832031, "logits/rejected": 365.93829345703125, "logps/chosen": -0.2712039351463318, "logps/rejected": -1.582219123840332, "loss": 0.5703, "nll_loss": 0.5448659062385559, "rewards/accuracies": 1.0, "rewards/chosen": -0.013560195453464985, "rewards/margins": 0.06555076688528061, "rewards/rejected": -0.07911095768213272, "step": 440 }, { "epoch": 2.6527570789865873, "grad_norm": 13.282082557678223, "learning_rate": 2.3702273156998867e-06, "log_odds_chosen": 2.619792938232422, "log_odds_ratio": -0.10272769629955292, "logits/chosen": 335.6366271972656, "logits/rejected": 372.410400390625, "logps/chosen": -0.36089158058166504, "logps/rejected": -1.8113043308258057, "loss": 0.5563, "nll_loss": 0.5579748749732971, "rewards/accuracies": 1.0, "rewards/chosen": -0.018044577911496162, "rewards/margins": 0.07252063602209091, "rewards/rejected": -0.09056521207094193, "step": 445 }, { "epoch": 2.682563338301043, "grad_norm": 10.777383804321289, "learning_rate": 2.357022603955159e-06, "log_odds_chosen": 2.4564261436462402, "log_odds_ratio": -0.11157449334859848, "logits/chosen": 362.14312744140625, "logits/rejected": 363.303466796875, "logps/chosen": -0.39076924324035645, "logps/rejected": -1.7145166397094727, "loss": 0.5754, "nll_loss": 0.5376263856887817, "rewards/accuracies": 1.0, "rewards/chosen": -0.019538460299372673, "rewards/margins": 0.06618736684322357, "rewards/rejected": -0.0857258215546608, "step": 450 }, { "epoch": 2.712369597615499, "grad_norm": 12.512327194213867, "learning_rate": 2.3440361546924774e-06, "log_odds_chosen": 2.614637613296509, "log_odds_ratio": -0.11486033350229263, "logits/chosen": 395.16949462890625, "logits/rejected": 374.3088684082031, "logps/chosen": -0.3622822165489197, "logps/rejected": -1.6618531942367554, "loss": 0.6153, "nll_loss": 0.568195641040802, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018114114180207253, "rewards/margins": 0.06497855484485626, "rewards/rejected": -0.08309266716241837, "step": 455 }, { "epoch": 2.742175856929955, "grad_norm": 12.090532302856445, "learning_rate": 2.3312620206007847e-06, "log_odds_chosen": 2.508338451385498, "log_odds_ratio": -0.1204490214586258, "logits/chosen": 382.52630615234375, "logits/rejected": 401.80841064453125, "logps/chosen": -0.3474404215812683, "logps/rejected": -1.7473865747451782, "loss": 0.5838, "nll_loss": 0.6167483925819397, "rewards/accuracies": 1.0, "rewards/chosen": -0.017372019588947296, "rewards/margins": 0.06999730318784714, "rewards/rejected": -0.08736933022737503, "step": 460 }, { "epoch": 2.7719821162444114, "grad_norm": 13.27834701538086, "learning_rate": 2.3186944788008413e-06, "log_odds_chosen": 2.5867724418640137, "log_odds_ratio": -0.14203417301177979, "logits/chosen": 376.5874328613281, "logits/rejected": 381.06341552734375, "logps/chosen": -0.2869132459163666, "logps/rejected": -1.5630210638046265, "loss": 0.5778, "nll_loss": 0.55084627866745, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.014345663599669933, "rewards/margins": 0.06380538642406464, "rewards/rejected": -0.07815105468034744, "step": 465 }, { "epoch": 2.8017883755588673, "grad_norm": 11.784134864807129, "learning_rate": 2.3063280200722128e-06, "log_odds_chosen": 2.1283843517303467, "log_odds_ratio": -0.20095142722129822, "logits/chosen": 383.31781005859375, "logits/rejected": 354.9120788574219, "logps/chosen": -0.40080317854881287, "logps/rejected": -1.5093116760253906, "loss": 0.5644, "nll_loss": 0.575947642326355, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.020040160045027733, "rewards/margins": 0.05542542785406113, "rewards/rejected": -0.07546558976173401, "step": 470 }, { "epoch": 2.8315946348733236, "grad_norm": 13.008294105529785, "learning_rate": 2.2941573387056174e-06, "log_odds_chosen": 2.6808362007141113, "log_odds_ratio": -0.10760221630334854, "logits/chosen": 350.5984802246094, "logits/rejected": 374.9319152832031, "logps/chosen": -0.34488445520401, "logps/rejected": -1.7149194478988647, "loss": 0.5386, "nll_loss": 0.491553395986557, "rewards/accuracies": 1.0, "rewards/chosen": -0.01724422350525856, "rewards/margins": 0.06850175559520721, "rewards/rejected": -0.08574597537517548, "step": 475 }, { "epoch": 2.8614008941877795, "grad_norm": 10.424010276794434, "learning_rate": 2.2821773229381924e-06, "log_odds_chosen": 2.2412309646606445, "log_odds_ratio": -0.1566620171070099, "logits/chosen": 362.31378173828125, "logits/rejected": 402.6854248046875, "logps/chosen": -0.3766781687736511, "logps/rejected": -1.4856204986572266, "loss": 0.5052, "nll_loss": 0.48381978273391724, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.018833911046385765, "rewards/margins": 0.055447112768888474, "rewards/rejected": -0.07428102195262909, "step": 480 }, { "epoch": 2.8912071535022354, "grad_norm": 12.359146118164062, "learning_rate": 2.270383045932499e-06, "log_odds_chosen": 2.6057076454162598, "log_odds_ratio": -0.12701039016246796, "logits/chosen": 357.164306640625, "logits/rejected": 380.32073974609375, "logps/chosen": -0.37163636088371277, "logps/rejected": -1.8207753896713257, "loss": 0.5419, "nll_loss": 0.5325015187263489, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.01858181692659855, "rewards/margins": 0.07245694845914841, "rewards/rejected": -0.0910387635231018, "step": 485 }, { "epoch": 2.9210134128166914, "grad_norm": 11.646001815795898, "learning_rate": 2.2587697572631284e-06, "log_odds_chosen": 2.3249075412750244, "log_odds_ratio": -0.19486014544963837, "logits/chosen": 372.1253967285156, "logits/rejected": 338.1502380371094, "logps/chosen": -0.4259433150291443, "logps/rejected": -1.5797032117843628, "loss": 0.6087, "nll_loss": 0.5371652245521545, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.021297167986631393, "rewards/margins": 0.05768799036741257, "rewards/rejected": -0.07898515462875366, "step": 490 }, { "epoch": 2.9508196721311473, "grad_norm": 11.838138580322266, "learning_rate": 2.2473328748774737e-06, "log_odds_chosen": 2.3507559299468994, "log_odds_ratio": -0.1578751504421234, "logits/chosen": 366.9432373046875, "logits/rejected": 394.8822326660156, "logps/chosen": -0.3771159052848816, "logps/rejected": -1.533601999282837, "loss": 0.5442, "nll_loss": 0.5532703399658203, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.018855798989534378, "rewards/margins": 0.05782430246472359, "rewards/rejected": -0.07668010145425797, "step": 495 }, { "epoch": 2.9806259314456036, "grad_norm": 13.802445411682129, "learning_rate": 2.23606797749979e-06, "log_odds_chosen": 2.5029566287994385, "log_odds_ratio": -0.12695619463920593, "logits/chosen": 374.8814697265625, "logits/rejected": 372.7264099121094, "logps/chosen": -0.32484811544418335, "logps/rejected": -1.5648537874221802, "loss": 0.5724, "nll_loss": 0.47206535935401917, "rewards/accuracies": 1.0, "rewards/chosen": -0.016242407262325287, "rewards/margins": 0.062000274658203125, "rewards/rejected": -0.07824268192052841, "step": 500 }, { "epoch": 2.9865871833084947, "eval_log_odds_chosen": 0.2937372922897339, "eval_log_odds_ratio": -0.6945178508758545, "eval_logits/chosen": 300.6891174316406, "eval_logits/rejected": 271.8756103515625, "eval_logps/chosen": -1.0802680253982544, "eval_logps/rejected": -1.2502641677856445, "eval_loss": 1.539820671081543, "eval_nll_loss": 1.4724125862121582, "eval_rewards/accuracies": 0.5395683646202087, "eval_rewards/chosen": -0.05401340499520302, "eval_rewards/margins": 0.00849980115890503, "eval_rewards/rejected": -0.06251321732997894, "eval_runtime": 112.3165, "eval_samples_per_second": 4.924, "eval_steps_per_second": 1.238, "step": 501 }, { "epoch": 2.9865871833084947, "step": 501, "total_flos": 0.0, "train_loss": 1.4594077459590402, "train_runtime": 13816.0738, "train_samples_per_second": 1.165, "train_steps_per_second": 0.036 } ], "logging_steps": 5, "max_steps": 501, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }