|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9865871833084947, |
|
"eval_steps": 500, |
|
"global_step": 501, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.029806259314456036, |
|
"grad_norm": 1765.45556640625, |
|
"learning_rate": 2.5000000000000004e-07, |
|
"log_odds_chosen": -0.21943321824073792, |
|
"log_odds_ratio": -1.0067085027694702, |
|
"logits/chosen": 204.28456115722656, |
|
"logits/rejected": 202.977294921875, |
|
"logps/chosen": -14.824699401855469, |
|
"logps/rejected": -14.605265617370605, |
|
"loss": 14.9632, |
|
"nll_loss": 14.546000480651855, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": -0.7412349581718445, |
|
"rewards/margins": -0.01097165048122406, |
|
"rewards/rejected": -0.7302632927894592, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.05961251862891207, |
|
"grad_norm": 1195.8741455078125, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.24401184916496277, |
|
"log_odds_ratio": -0.7723467946052551, |
|
"logits/chosen": 219.5009307861328, |
|
"logits/rejected": 223.572021484375, |
|
"logps/chosen": -12.244219779968262, |
|
"logps/rejected": -12.487574577331543, |
|
"loss": 12.6127, |
|
"nll_loss": 12.338577270507812, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6122109293937683, |
|
"rewards/margins": 0.012167713604867458, |
|
"rewards/rejected": -0.6243786215782166, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.08941877794336811, |
|
"grad_norm": 722.9285278320312, |
|
"learning_rate": 7.5e-07, |
|
"log_odds_chosen": 0.0473303496837616, |
|
"log_odds_ratio": -0.7741748690605164, |
|
"logits/chosen": 282.27947998046875, |
|
"logits/rejected": 261.2786865234375, |
|
"logps/chosen": -7.970606803894043, |
|
"logps/rejected": -8.0178804397583, |
|
"loss": 8.2789, |
|
"nll_loss": 7.956001281738281, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.39853033423423767, |
|
"rewards/margins": 0.002363653387874365, |
|
"rewards/rejected": -0.40089401602745056, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.11922503725782414, |
|
"grad_norm": 212.62242126464844, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": -0.15251407027244568, |
|
"log_odds_ratio": -0.9524042010307312, |
|
"logits/chosen": 281.0796813964844, |
|
"logits/rejected": 275.33013916015625, |
|
"logps/chosen": -5.375563621520996, |
|
"logps/rejected": -5.224381446838379, |
|
"loss": 5.4453, |
|
"nll_loss": 5.453672885894775, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.2687782049179077, |
|
"rewards/margins": -0.007559105753898621, |
|
"rewards/rejected": -0.2612191140651703, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.14903129657228018, |
|
"grad_norm": 166.2330322265625, |
|
"learning_rate": 1.25e-06, |
|
"log_odds_chosen": -0.04391743987798691, |
|
"log_odds_ratio": -0.8879317045211792, |
|
"logits/chosen": 299.25030517578125, |
|
"logits/rejected": 308.5736389160156, |
|
"logps/chosen": -3.281724452972412, |
|
"logps/rejected": -3.2199606895446777, |
|
"loss": 3.5013, |
|
"nll_loss": 3.3902111053466797, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.1640862375497818, |
|
"rewards/margins": -0.0030881778802722692, |
|
"rewards/rejected": -0.16099804639816284, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.17883755588673622, |
|
"grad_norm": 83.01959228515625, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": -0.07733707875013351, |
|
"log_odds_ratio": -0.8942793607711792, |
|
"logits/chosen": 347.654052734375, |
|
"logits/rejected": 376.1275329589844, |
|
"logps/chosen": -2.622657537460327, |
|
"logps/rejected": -2.5195186138153076, |
|
"loss": 2.5561, |
|
"nll_loss": 2.6379752159118652, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.13113287091255188, |
|
"rewards/margins": -0.005156923085451126, |
|
"rewards/rejected": -0.12597593665122986, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.20864381520119224, |
|
"grad_norm": 40.82948684692383, |
|
"learning_rate": 1.75e-06, |
|
"log_odds_chosen": 0.16575101017951965, |
|
"log_odds_ratio": -0.7404494285583496, |
|
"logits/chosen": 382.174072265625, |
|
"logits/rejected": 370.3721008300781, |
|
"logps/chosen": -1.8132009506225586, |
|
"logps/rejected": -1.9216792583465576, |
|
"loss": 2.1303, |
|
"nll_loss": 2.0061001777648926, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.09066005051136017, |
|
"rewards/margins": 0.005423928610980511, |
|
"rewards/rejected": -0.09608397632837296, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.23845007451564829, |
|
"grad_norm": 373.0379333496094, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.017796561121940613, |
|
"log_odds_ratio": -0.7689038515090942, |
|
"logits/chosen": 372.22100830078125, |
|
"logits/rejected": 370.50439453125, |
|
"logps/chosen": -1.6518943309783936, |
|
"logps/rejected": -1.6649363040924072, |
|
"loss": 1.9486, |
|
"nll_loss": 2.0397918224334717, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.08259471505880356, |
|
"rewards/margins": 0.0006520989118143916, |
|
"rewards/rejected": -0.08324681222438812, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.26825633383010433, |
|
"grad_norm": 45.907875061035156, |
|
"learning_rate": 2.25e-06, |
|
"log_odds_chosen": 0.027211258187890053, |
|
"log_odds_ratio": -0.7474765777587891, |
|
"logits/chosen": 388.0882873535156, |
|
"logits/rejected": 397.65460205078125, |
|
"logps/chosen": -1.570575475692749, |
|
"logps/rejected": -1.5880815982818604, |
|
"loss": 1.8867, |
|
"nll_loss": 1.7669483423233032, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.07852877676486969, |
|
"rewards/margins": 0.0008753080619499087, |
|
"rewards/rejected": -0.0794040784239769, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.29806259314456035, |
|
"grad_norm": 45.138648986816406, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.19176200032234192, |
|
"log_odds_ratio": -0.6679073572158813, |
|
"logits/chosen": 396.5473327636719, |
|
"logits/rejected": 418.2545471191406, |
|
"logps/chosen": -1.404476523399353, |
|
"logps/rejected": -1.5453894138336182, |
|
"loss": 1.8521, |
|
"nll_loss": 1.8635737895965576, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.07022383064031601, |
|
"rewards/margins": 0.00704564293846488, |
|
"rewards/rejected": -0.07726947963237762, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.32786885245901637, |
|
"grad_norm": 146.7917938232422, |
|
"learning_rate": 2.7500000000000004e-06, |
|
"log_odds_chosen": 0.06669901311397552, |
|
"log_odds_ratio": -0.7251878380775452, |
|
"logits/chosen": 385.10101318359375, |
|
"logits/rejected": 378.09368896484375, |
|
"logps/chosen": -1.4211018085479736, |
|
"logps/rejected": -1.4656105041503906, |
|
"loss": 1.8795, |
|
"nll_loss": 1.921286940574646, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.0710550919175148, |
|
"rewards/margins": 0.0022254353389143944, |
|
"rewards/rejected": -0.07328052818775177, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.35767511177347244, |
|
"grad_norm": 36.712623596191406, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.1147073283791542, |
|
"log_odds_ratio": -0.6886881589889526, |
|
"logits/chosen": 391.64190673828125, |
|
"logits/rejected": 383.321044921875, |
|
"logps/chosen": -1.381176471710205, |
|
"logps/rejected": -1.4568852186203003, |
|
"loss": 1.7236, |
|
"nll_loss": 1.7853686809539795, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.06905882805585861, |
|
"rewards/margins": 0.0037854425609111786, |
|
"rewards/rejected": -0.07284426689147949, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.38748137108792846, |
|
"grad_norm": 27.392560958862305, |
|
"learning_rate": 3.2500000000000002e-06, |
|
"log_odds_chosen": 0.0811905488371849, |
|
"log_odds_ratio": -0.705346941947937, |
|
"logits/chosen": 390.33514404296875, |
|
"logits/rejected": 391.02215576171875, |
|
"logps/chosen": -1.2655917406082153, |
|
"logps/rejected": -1.3007347583770752, |
|
"loss": 1.6207, |
|
"nll_loss": 1.5275566577911377, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06327958405017853, |
|
"rewards/margins": 0.0017571467906236649, |
|
"rewards/rejected": -0.06503672897815704, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.4172876304023845, |
|
"grad_norm": 108.5710678100586, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.030709872022271156, |
|
"log_odds_ratio": -0.7311884760856628, |
|
"logits/chosen": 374.34515380859375, |
|
"logits/rejected": 382.85137939453125, |
|
"logps/chosen": -1.3965779542922974, |
|
"logps/rejected": -1.4153110980987549, |
|
"loss": 1.6444, |
|
"nll_loss": 1.6620601415634155, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.06982889771461487, |
|
"rewards/margins": 0.0009366500889882445, |
|
"rewards/rejected": -0.07076555490493774, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.44709388971684055, |
|
"grad_norm": 42.35745620727539, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"log_odds_chosen": 0.17296305298805237, |
|
"log_odds_ratio": -0.6624878644943237, |
|
"logits/chosen": 394.97998046875, |
|
"logits/rejected": 382.9609069824219, |
|
"logps/chosen": -1.2325050830841064, |
|
"logps/rejected": -1.3494950532913208, |
|
"loss": 1.593, |
|
"nll_loss": 1.53190016746521, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.06162526085972786, |
|
"rewards/margins": 0.005849492736160755, |
|
"rewards/rejected": -0.06747475266456604, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.47690014903129657, |
|
"grad_norm": 36.82132339477539, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.17391765117645264, |
|
"log_odds_ratio": -0.6597349643707275, |
|
"logits/chosen": 381.92547607421875, |
|
"logits/rejected": 404.1871643066406, |
|
"logps/chosen": -1.219416856765747, |
|
"logps/rejected": -1.3357045650482178, |
|
"loss": 1.6131, |
|
"nll_loss": 1.6360372304916382, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06097083538770676, |
|
"rewards/margins": 0.005814389791339636, |
|
"rewards/rejected": -0.06678523123264313, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5067064083457526, |
|
"grad_norm": 29.89981460571289, |
|
"learning_rate": 4.25e-06, |
|
"log_odds_chosen": 0.18893679976463318, |
|
"log_odds_ratio": -0.6906715631484985, |
|
"logits/chosen": 408.48101806640625, |
|
"logits/rejected": 392.5835266113281, |
|
"logps/chosen": -1.2594187259674072, |
|
"logps/rejected": -1.3885504007339478, |
|
"loss": 1.5956, |
|
"nll_loss": 1.603137731552124, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.06297092139720917, |
|
"rewards/margins": 0.006456589791923761, |
|
"rewards/rejected": -0.06942752748727798, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.5365126676602087, |
|
"grad_norm": 32.92945861816406, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.555855393409729, |
|
"log_odds_ratio": -0.5900682806968689, |
|
"logits/chosen": 401.2886657714844, |
|
"logits/rejected": 416.2565002441406, |
|
"logps/chosen": -1.208212971687317, |
|
"logps/rejected": -1.6518011093139648, |
|
"loss": 1.4631, |
|
"nll_loss": 1.474485158920288, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06041065603494644, |
|
"rewards/margins": 0.022179413586854935, |
|
"rewards/rejected": -0.08259007334709167, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.5663189269746647, |
|
"grad_norm": 34.76884460449219, |
|
"learning_rate": 4.75e-06, |
|
"log_odds_chosen": 0.19581779837608337, |
|
"log_odds_ratio": -0.6574069261550903, |
|
"logits/chosen": 371.4412536621094, |
|
"logits/rejected": 383.65155029296875, |
|
"logps/chosen": -1.1392086744308472, |
|
"logps/rejected": -1.2306644916534424, |
|
"loss": 1.5584, |
|
"nll_loss": 1.438720941543579, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05696043372154236, |
|
"rewards/margins": 0.0045727952383458614, |
|
"rewards/rejected": -0.06153322383761406, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.5961251862891207, |
|
"grad_norm": 677.6953125, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.08993122726678848, |
|
"log_odds_ratio": -0.7155017256736755, |
|
"logits/chosen": 406.64996337890625, |
|
"logits/rejected": 442.7906188964844, |
|
"logps/chosen": -1.3057138919830322, |
|
"logps/rejected": -1.3538284301757812, |
|
"loss": 1.6646, |
|
"nll_loss": 1.6470537185668945, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.06528569757938385, |
|
"rewards/margins": 0.0024057202972471714, |
|
"rewards/rejected": -0.06769142299890518, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.6259314456035767, |
|
"grad_norm": 81.34811401367188, |
|
"learning_rate": 4.8795003647426654e-06, |
|
"log_odds_chosen": 0.3003528416156769, |
|
"log_odds_ratio": -0.6239514946937561, |
|
"logits/chosen": 391.1552734375, |
|
"logits/rejected": 403.55609130859375, |
|
"logps/chosen": -1.2369372844696045, |
|
"logps/rejected": -1.4132254123687744, |
|
"loss": 1.6764, |
|
"nll_loss": 1.68059504032135, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.061846863478422165, |
|
"rewards/margins": 0.008814404718577862, |
|
"rewards/rejected": -0.0706612691283226, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.6557377049180327, |
|
"grad_norm": 98.60116577148438, |
|
"learning_rate": 4.767312946227961e-06, |
|
"log_odds_chosen": 0.21533890068531036, |
|
"log_odds_ratio": -0.6346350312232971, |
|
"logits/chosen": 378.2474670410156, |
|
"logits/rejected": 376.3426818847656, |
|
"logps/chosen": -1.1033828258514404, |
|
"logps/rejected": -1.25198233127594, |
|
"loss": 1.5681, |
|
"nll_loss": 1.557680368423462, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.055169135332107544, |
|
"rewards/margins": 0.007429977413266897, |
|
"rewards/rejected": -0.06259911507368088, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.6855439642324889, |
|
"grad_norm": 22.872821807861328, |
|
"learning_rate": 4.662524041201569e-06, |
|
"log_odds_chosen": 0.2698196470737457, |
|
"log_odds_ratio": -0.6147719621658325, |
|
"logits/chosen": 400.8478698730469, |
|
"logits/rejected": 407.18634033203125, |
|
"logps/chosen": -0.9827004671096802, |
|
"logps/rejected": -1.127701997756958, |
|
"loss": 1.5249, |
|
"nll_loss": 1.42640221118927, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.04913502186536789, |
|
"rewards/margins": 0.007250082679092884, |
|
"rewards/rejected": -0.0563850998878479, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.7153502235469449, |
|
"grad_norm": 27.9619083404541, |
|
"learning_rate": 4.564354645876385e-06, |
|
"log_odds_chosen": 0.3403358459472656, |
|
"log_odds_ratio": -0.6000555753707886, |
|
"logits/chosen": 381.107421875, |
|
"logits/rejected": 381.04864501953125, |
|
"logps/chosen": -1.048896074295044, |
|
"logps/rejected": -1.2232722043991089, |
|
"loss": 1.5554, |
|
"nll_loss": 1.5394407510757446, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.05244480445981026, |
|
"rewards/margins": 0.00871881190687418, |
|
"rewards/rejected": -0.06116361543536186, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.7451564828614009, |
|
"grad_norm": 23.146177291870117, |
|
"learning_rate": 4.47213595499958e-06, |
|
"log_odds_chosen": 0.08713512122631073, |
|
"log_odds_ratio": -0.7354093790054321, |
|
"logits/chosen": 378.9410400390625, |
|
"logits/rejected": 391.9457702636719, |
|
"logps/chosen": -1.1668498516082764, |
|
"logps/rejected": -1.1973512172698975, |
|
"loss": 1.4862, |
|
"nll_loss": 1.4849971532821655, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.05834249407052994, |
|
"rewards/margins": 0.0015250641154125333, |
|
"rewards/rejected": -0.05986756086349487, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.7749627421758569, |
|
"grad_norm": 37.57433319091797, |
|
"learning_rate": 4.385290096535147e-06, |
|
"log_odds_chosen": 0.13211670517921448, |
|
"log_odds_ratio": -0.7139819860458374, |
|
"logits/chosen": 401.40985107421875, |
|
"logits/rejected": 389.37921142578125, |
|
"logps/chosen": -1.1555781364440918, |
|
"logps/rejected": -1.2059427499771118, |
|
"loss": 1.5256, |
|
"nll_loss": 1.4828169345855713, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05777891352772713, |
|
"rewards/margins": 0.002518222201615572, |
|
"rewards/rejected": -0.06029713153839111, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8047690014903129, |
|
"grad_norm": 37.914310455322266, |
|
"learning_rate": 4.303314829119352e-06, |
|
"log_odds_chosen": 0.10099569708108902, |
|
"log_odds_ratio": -0.7038587331771851, |
|
"logits/chosen": 414.90655517578125, |
|
"logits/rejected": 416.6064453125, |
|
"logps/chosen": -1.1292693614959717, |
|
"logps/rejected": -1.2150599956512451, |
|
"loss": 1.5378, |
|
"nll_loss": 1.5873870849609375, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.056463468819856644, |
|
"rewards/margins": 0.004289527423679829, |
|
"rewards/rejected": -0.060752999037504196, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.834575260804769, |
|
"grad_norm": 21.97135353088379, |
|
"learning_rate": 4.2257712736425835e-06, |
|
"log_odds_chosen": -0.07928862422704697, |
|
"log_odds_ratio": -0.8006687164306641, |
|
"logits/chosen": 397.2544250488281, |
|
"logits/rejected": 403.22857666015625, |
|
"logps/chosen": -1.11940598487854, |
|
"logps/rejected": -1.0619796514511108, |
|
"loss": 1.5228, |
|
"nll_loss": 1.6315351724624634, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -0.05597030371427536, |
|
"rewards/margins": -0.002871322212740779, |
|
"rewards/rejected": -0.05309898406267166, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.8643815201192251, |
|
"grad_norm": 34.809200286865234, |
|
"learning_rate": 4.1522739926869985e-06, |
|
"log_odds_chosen": -0.004179268144071102, |
|
"log_odds_ratio": -0.7272334694862366, |
|
"logits/chosen": 394.76995849609375, |
|
"logits/rejected": 397.96514892578125, |
|
"logps/chosen": -1.2000293731689453, |
|
"logps/rejected": -1.1946508884429932, |
|
"loss": 1.5155, |
|
"nll_loss": 1.5167872905731201, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.060001470148563385, |
|
"rewards/margins": -0.0002689236425794661, |
|
"rewards/rejected": -0.05973255634307861, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.8941877794336811, |
|
"grad_norm": 30.051952362060547, |
|
"learning_rate": 4.082482904638631e-06, |
|
"log_odds_chosen": 0.36712345480918884, |
|
"log_odds_ratio": -0.5663259625434875, |
|
"logits/chosen": 400.39495849609375, |
|
"logits/rejected": 418.39678955078125, |
|
"logps/chosen": -1.0868648290634155, |
|
"logps/rejected": -1.3322699069976807, |
|
"loss": 1.477, |
|
"nll_loss": 1.3918894529342651, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.054343242198228836, |
|
"rewards/margins": 0.01227025780826807, |
|
"rewards/rejected": -0.06661349534988403, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.9239940387481371, |
|
"grad_norm": 16.999670028686523, |
|
"learning_rate": 4.016096644512495e-06, |
|
"log_odds_chosen": 0.13306589424610138, |
|
"log_odds_ratio": -0.6789790391921997, |
|
"logits/chosen": 380.4939880371094, |
|
"logits/rejected": 395.53143310546875, |
|
"logps/chosen": -1.1204369068145752, |
|
"logps/rejected": -1.2021987438201904, |
|
"loss": 1.436, |
|
"nll_loss": 1.3288953304290771, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.05602184683084488, |
|
"rewards/margins": 0.004088088870048523, |
|
"rewards/rejected": -0.060109943151474, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.9538002980625931, |
|
"grad_norm": 34.52124786376953, |
|
"learning_rate": 3.952847075210474e-06, |
|
"log_odds_chosen": 0.08932497352361679, |
|
"log_odds_ratio": -0.7400273084640503, |
|
"logits/chosen": 386.62786865234375, |
|
"logits/rejected": 432.2003479003906, |
|
"logps/chosen": -1.0199127197265625, |
|
"logps/rejected": -1.1069036722183228, |
|
"loss": 1.425, |
|
"nll_loss": 1.3653684854507446, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.05099564045667648, |
|
"rewards/margins": 0.00434954185038805, |
|
"rewards/rejected": -0.05534517765045166, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.9836065573770492, |
|
"grad_norm": 17.771682739257812, |
|
"learning_rate": 3.892494720807615e-06, |
|
"log_odds_chosen": 0.02889970876276493, |
|
"log_odds_ratio": -0.7212048768997192, |
|
"logits/chosen": 396.8811950683594, |
|
"logits/rejected": 409.22821044921875, |
|
"logps/chosen": -1.091715693473816, |
|
"logps/rejected": -1.1267164945602417, |
|
"loss": 1.441, |
|
"nll_loss": 1.3998154401779175, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.05458579212427139, |
|
"rewards/margins": 0.0017500361427664757, |
|
"rewards/rejected": -0.056335825473070145, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.9955290611028316, |
|
"eval_log_odds_chosen": 0.19335530698299408, |
|
"eval_log_odds_ratio": -0.6989776492118835, |
|
"eval_logits/chosen": 318.99652099609375, |
|
"eval_logits/rejected": 290.1581115722656, |
|
"eval_logps/chosen": -1.0203651189804077, |
|
"eval_logps/rejected": -1.1485036611557007, |
|
"eval_loss": 1.4761662483215332, |
|
"eval_nll_loss": 1.4310433864593506, |
|
"eval_rewards/accuracies": 0.5323740839958191, |
|
"eval_rewards/chosen": -0.051018260419368744, |
|
"eval_rewards/margins": 0.006406927481293678, |
|
"eval_rewards/rejected": -0.057425182312726974, |
|
"eval_runtime": 112.3238, |
|
"eval_samples_per_second": 4.923, |
|
"eval_steps_per_second": 1.237, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.0134128166915053, |
|
"grad_norm": 17.029600143432617, |
|
"learning_rate": 3.834824944236852e-06, |
|
"log_odds_chosen": 0.46681445837020874, |
|
"log_odds_ratio": -0.5670086741447449, |
|
"logits/chosen": 377.62884521484375, |
|
"logits/rejected": 402.346435546875, |
|
"logps/chosen": -0.9154840707778931, |
|
"logps/rejected": -1.1631513833999634, |
|
"loss": 1.3055, |
|
"nll_loss": 1.1554943323135376, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.045774202793836594, |
|
"rewards/margins": 0.012383360415697098, |
|
"rewards/rejected": -0.05815757066011429, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.0432190760059612, |
|
"grad_norm": 20.676471710205078, |
|
"learning_rate": 3.7796447300922724e-06, |
|
"log_odds_chosen": 0.8411234021186829, |
|
"log_odds_ratio": -0.4436827600002289, |
|
"logits/chosen": 360.05242919921875, |
|
"logits/rejected": 400.02374267578125, |
|
"logps/chosen": -0.6783148646354675, |
|
"logps/rejected": -1.1674000024795532, |
|
"loss": 1.0898, |
|
"nll_loss": 1.1356347799301147, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.033915746957063675, |
|
"rewards/margins": 0.024454256519675255, |
|
"rewards/rejected": -0.058369994163513184, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.0730253353204173, |
|
"grad_norm": 18.295047760009766, |
|
"learning_rate": 3.72677996249965e-06, |
|
"log_odds_chosen": 0.8419575691223145, |
|
"log_odds_ratio": -0.43040966987609863, |
|
"logits/chosen": 360.3951110839844, |
|
"logits/rejected": 335.01239013671875, |
|
"logps/chosen": -0.7921234965324402, |
|
"logps/rejected": -1.2925331592559814, |
|
"loss": 1.1448, |
|
"nll_loss": 1.2160179615020752, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03960617631673813, |
|
"rewards/margins": 0.025020483881235123, |
|
"rewards/rejected": -0.06462665647268295, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.1028315946348732, |
|
"grad_norm": 20.26190757751465, |
|
"learning_rate": 3.6760731104690393e-06, |
|
"log_odds_chosen": 1.0061752796173096, |
|
"log_odds_ratio": -0.3863833546638489, |
|
"logits/chosen": 388.26934814453125, |
|
"logits/rejected": 379.1220703125, |
|
"logps/chosen": -0.6712931990623474, |
|
"logps/rejected": -1.1969959735870361, |
|
"loss": 1.0422, |
|
"nll_loss": 0.9980667233467102, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03356466069817543, |
|
"rewards/margins": 0.026285137981176376, |
|
"rewards/rejected": -0.05984979867935181, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.1326378539493294, |
|
"grad_norm": 16.21082305908203, |
|
"learning_rate": 3.6273812505500587e-06, |
|
"log_odds_chosen": 0.6967722177505493, |
|
"log_odds_ratio": -0.49137839674949646, |
|
"logits/chosen": 353.41705322265625, |
|
"logits/rejected": 400.4765930175781, |
|
"logps/chosen": -0.75420081615448, |
|
"logps/rejected": -1.1797516345977783, |
|
"loss": 1.1136, |
|
"nll_loss": 1.0225417613983154, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.0377100370824337, |
|
"rewards/margins": 0.021277543157339096, |
|
"rewards/rejected": -0.0589875802397728, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.1624441132637853, |
|
"grad_norm": 18.45132827758789, |
|
"learning_rate": 3.5805743701971648e-06, |
|
"log_odds_chosen": 0.8713854551315308, |
|
"log_odds_ratio": -0.4125959873199463, |
|
"logits/chosen": 383.83868408203125, |
|
"logits/rejected": 397.7996520996094, |
|
"logps/chosen": -0.7979816198348999, |
|
"logps/rejected": -1.2784751653671265, |
|
"loss": 1.1249, |
|
"nll_loss": 1.1307828426361084, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.039899080991744995, |
|
"rewards/margins": 0.02402467653155327, |
|
"rewards/rejected": -0.06392376124858856, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.1922503725782414, |
|
"grad_norm": 29.213319778442383, |
|
"learning_rate": 3.5355339059327378e-06, |
|
"log_odds_chosen": 0.9310399889945984, |
|
"log_odds_ratio": -0.43441715836524963, |
|
"logits/chosen": 408.2393798828125, |
|
"logits/rejected": 392.23309326171875, |
|
"logps/chosen": -0.65810626745224, |
|
"logps/rejected": -1.2119154930114746, |
|
"loss": 1.0925, |
|
"nll_loss": 1.0188348293304443, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03290531411767006, |
|
"rewards/margins": 0.0276904609054327, |
|
"rewards/rejected": -0.06059577316045761, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.2220566318926975, |
|
"grad_norm": 20.968154907226562, |
|
"learning_rate": 3.4921514788478916e-06, |
|
"log_odds_chosen": 1.0998015403747559, |
|
"log_odds_ratio": -0.39691638946533203, |
|
"logits/chosen": 365.73724365234375, |
|
"logits/rejected": 359.8885803222656, |
|
"logps/chosen": -0.6815972924232483, |
|
"logps/rejected": -1.2400376796722412, |
|
"loss": 1.0466, |
|
"nll_loss": 1.0264532566070557, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03407986834645271, |
|
"rewards/margins": 0.027922023087739944, |
|
"rewards/rejected": -0.06200189143419266, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.2518628912071534, |
|
"grad_norm": 17.646827697753906, |
|
"learning_rate": 3.450327796711771e-06, |
|
"log_odds_chosen": 1.2030134201049805, |
|
"log_odds_ratio": -0.3409472107887268, |
|
"logits/chosen": 371.56903076171875, |
|
"logits/rejected": 400.691162109375, |
|
"logps/chosen": -0.6153351664543152, |
|
"logps/rejected": -1.2756757736206055, |
|
"loss": 1.0517, |
|
"nll_loss": 0.9517441987991333, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03076675906777382, |
|
"rewards/margins": 0.03301702067255974, |
|
"rewards/rejected": -0.06378378719091415, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.2816691505216096, |
|
"grad_norm": 25.96933364868164, |
|
"learning_rate": 3.409971697352368e-06, |
|
"log_odds_chosen": 1.0242887735366821, |
|
"log_odds_ratio": -0.3722797930240631, |
|
"logits/chosen": 393.1634826660156, |
|
"logits/rejected": 376.97198486328125, |
|
"logps/chosen": -0.7517871856689453, |
|
"logps/rejected": -1.3418635129928589, |
|
"loss": 1.0677, |
|
"nll_loss": 1.063118577003479, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.037589360028505325, |
|
"rewards/margins": 0.02950381301343441, |
|
"rewards/rejected": -0.06709317117929459, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 1.3114754098360657, |
|
"grad_norm": 14.424154281616211, |
|
"learning_rate": 3.3709993123162106e-06, |
|
"log_odds_chosen": 0.6680114269256592, |
|
"log_odds_ratio": -0.5037292242050171, |
|
"logits/chosen": 385.2915344238281, |
|
"logits/rejected": 379.8268127441406, |
|
"logps/chosen": -0.8324653506278992, |
|
"logps/rejected": -1.1821435689926147, |
|
"loss": 1.071, |
|
"nll_loss": 1.0840386152267456, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.04162326827645302, |
|
"rewards/margins": 0.0174839086830616, |
|
"rewards/rejected": -0.05910717695951462, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.3412816691505216, |
|
"grad_norm": 24.496023178100586, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"log_odds_chosen": 0.5463358759880066, |
|
"log_odds_ratio": -0.5178000926971436, |
|
"logits/chosen": 381.60198974609375, |
|
"logits/rejected": 374.200439453125, |
|
"logps/chosen": -0.8569077253341675, |
|
"logps/rejected": -1.1593918800354004, |
|
"loss": 1.0304, |
|
"nll_loss": 1.1032346487045288, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.042845387011766434, |
|
"rewards/margins": 0.015124207362532616, |
|
"rewards/rejected": -0.0579695925116539, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 1.3710879284649775, |
|
"grad_norm": 31.743003845214844, |
|
"learning_rate": 3.296902366978936e-06, |
|
"log_odds_chosen": 1.1322697401046753, |
|
"log_odds_ratio": -0.3533535599708557, |
|
"logits/chosen": 353.97186279296875, |
|
"logits/rejected": 374.7437438964844, |
|
"logps/chosen": -0.5964599251747131, |
|
"logps/rejected": -1.2119852304458618, |
|
"loss": 1.0402, |
|
"nll_loss": 0.9073736071586609, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.029822995886206627, |
|
"rewards/margins": 0.030776266008615494, |
|
"rewards/rejected": -0.06059925630688667, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.4008941877794336, |
|
"grad_norm": 23.891324996948242, |
|
"learning_rate": 3.2616403652672114e-06, |
|
"log_odds_chosen": 1.1859080791473389, |
|
"log_odds_ratio": -0.37409111857414246, |
|
"logits/chosen": 381.7622985839844, |
|
"logits/rejected": 395.0599365234375, |
|
"logps/chosen": -0.6458351016044617, |
|
"logps/rejected": -1.346355676651001, |
|
"loss": 1.0587, |
|
"nll_loss": 0.9488533735275269, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.032291755080223083, |
|
"rewards/margins": 0.035026032477617264, |
|
"rewards/rejected": -0.06731779128313065, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 1.4307004470938898, |
|
"grad_norm": 16.38582992553711, |
|
"learning_rate": 3.2274861218395142e-06, |
|
"log_odds_chosen": 0.7762764692306519, |
|
"log_odds_ratio": -0.43844375014305115, |
|
"logits/chosen": 407.67388916015625, |
|
"logits/rejected": 413.35260009765625, |
|
"logps/chosen": -0.7236464619636536, |
|
"logps/rejected": -1.1575326919555664, |
|
"loss": 1.0752, |
|
"nll_loss": 1.0268566608428955, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03618232533335686, |
|
"rewards/margins": 0.02169431373476982, |
|
"rewards/rejected": -0.05787663906812668, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.4605067064083457, |
|
"grad_norm": 16.295490264892578, |
|
"learning_rate": 3.1943828249997e-06, |
|
"log_odds_chosen": 0.9785711169242859, |
|
"log_odds_ratio": -0.4029998779296875, |
|
"logits/chosen": 400.16632080078125, |
|
"logits/rejected": 388.1484069824219, |
|
"logps/chosen": -0.6374613642692566, |
|
"logps/rejected": -1.146707534790039, |
|
"loss": 1.0837, |
|
"nll_loss": 1.123439073562622, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03187306597828865, |
|
"rewards/margins": 0.025462310761213303, |
|
"rewards/rejected": -0.05733537673950195, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 1.4903129657228018, |
|
"grad_norm": 22.652774810791016, |
|
"learning_rate": 3.1622776601683796e-06, |
|
"log_odds_chosen": 1.0432734489440918, |
|
"log_odds_ratio": -0.4298950135707855, |
|
"logits/chosen": 374.0715637207031, |
|
"logits/rejected": 381.5113830566406, |
|
"logps/chosen": -0.6628987193107605, |
|
"logps/rejected": -1.2346137762069702, |
|
"loss": 0.9864, |
|
"nll_loss": 0.9000906944274902, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03314493969082832, |
|
"rewards/margins": 0.028585752472281456, |
|
"rewards/rejected": -0.06173068284988403, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.520119225037258, |
|
"grad_norm": 15.01534652709961, |
|
"learning_rate": 3.131121455425748e-06, |
|
"log_odds_chosen": 1.0844942331314087, |
|
"log_odds_ratio": -0.34810084104537964, |
|
"logits/chosen": 391.9859924316406, |
|
"logits/rejected": 394.78021240234375, |
|
"logps/chosen": -0.5884779095649719, |
|
"logps/rejected": -1.1623605489730835, |
|
"loss": 1.0497, |
|
"nll_loss": 0.9377425312995911, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.029423898085951805, |
|
"rewards/margins": 0.028694134205579758, |
|
"rewards/rejected": -0.05811803415417671, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 1.5499254843517138, |
|
"grad_norm": 22.26698112487793, |
|
"learning_rate": 3.1008683647302113e-06, |
|
"log_odds_chosen": 0.9070035815238953, |
|
"log_odds_ratio": -0.43072786927223206, |
|
"logits/chosen": 372.50006103515625, |
|
"logits/rejected": 414.58331298828125, |
|
"logps/chosen": -0.763823926448822, |
|
"logps/rejected": -1.341328740119934, |
|
"loss": 1.043, |
|
"nll_loss": 1.0060240030288696, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03819119185209274, |
|
"rewards/margins": 0.028875242918729782, |
|
"rewards/rejected": -0.06706643104553223, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.5797317436661698, |
|
"grad_norm": 14.599881172180176, |
|
"learning_rate": 3.0714755841697565e-06, |
|
"log_odds_chosen": 1.0877039432525635, |
|
"log_odds_ratio": -0.43615055084228516, |
|
"logits/chosen": 384.4775390625, |
|
"logits/rejected": 406.6970520019531, |
|
"logps/chosen": -0.6974985003471375, |
|
"logps/rejected": -1.3204139471054077, |
|
"loss": 1.098, |
|
"nll_loss": 1.024665117263794, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03487492725253105, |
|
"rewards/margins": 0.031145762652158737, |
|
"rewards/rejected": -0.06602068990468979, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 1.6095380029806259, |
|
"grad_norm": 17.716583251953125, |
|
"learning_rate": 3.0429030972509227e-06, |
|
"log_odds_chosen": 0.9025327563285828, |
|
"log_odds_ratio": -0.4233691692352295, |
|
"logits/chosen": 367.71807861328125, |
|
"logits/rejected": 379.2008361816406, |
|
"logps/chosen": -0.777201771736145, |
|
"logps/rejected": -1.2777531147003174, |
|
"loss": 1.0837, |
|
"nll_loss": 1.1377698183059692, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.03886008635163307, |
|
"rewards/margins": 0.025027573108673096, |
|
"rewards/rejected": -0.06388765573501587, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.639344262295082, |
|
"grad_norm": 14.134200096130371, |
|
"learning_rate": 3.0151134457776365e-06, |
|
"log_odds_chosen": 0.8205320239067078, |
|
"log_odds_ratio": -0.44056087732315063, |
|
"logits/chosen": 360.33575439453125, |
|
"logits/rejected": 350.024169921875, |
|
"logps/chosen": -0.6577683687210083, |
|
"logps/rejected": -1.066030740737915, |
|
"loss": 1.0837, |
|
"nll_loss": 1.0101639032363892, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.032888419926166534, |
|
"rewards/margins": 0.020413123071193695, |
|
"rewards/rejected": -0.05330154299736023, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 1.669150521609538, |
|
"grad_norm": 15.517395973205566, |
|
"learning_rate": 2.988071523335984e-06, |
|
"log_odds_chosen": 0.7949902415275574, |
|
"log_odds_ratio": -0.5562250018119812, |
|
"logits/chosen": 404.2984313964844, |
|
"logits/rejected": 391.6941833496094, |
|
"logps/chosen": -0.7360959649085999, |
|
"logps/rejected": -1.1831490993499756, |
|
"loss": 1.0486, |
|
"nll_loss": 1.0734833478927612, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03680479899048805, |
|
"rewards/margins": 0.022352661937475204, |
|
"rewards/rejected": -0.05915746092796326, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.698956780923994, |
|
"grad_norm": 14.143935203552246, |
|
"learning_rate": 2.961744388795462e-06, |
|
"log_odds_chosen": 0.9420916438102722, |
|
"log_odds_ratio": -0.42187291383743286, |
|
"logits/chosen": 367.45843505859375, |
|
"logits/rejected": 374.1835632324219, |
|
"logps/chosen": -0.6173609495162964, |
|
"logps/rejected": -1.1151915788650513, |
|
"loss": 0.996, |
|
"nll_loss": 0.9254717826843262, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03086804784834385, |
|
"rewards/margins": 0.024891531094908714, |
|
"rewards/rejected": -0.05575958639383316, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 1.7287630402384502, |
|
"grad_norm": 13.992819786071777, |
|
"learning_rate": 2.9361010975735177e-06, |
|
"log_odds_chosen": 0.9273719787597656, |
|
"log_odds_ratio": -0.39941272139549255, |
|
"logits/chosen": 386.17742919921875, |
|
"logits/rejected": 424.8526306152344, |
|
"logps/chosen": -0.7709314227104187, |
|
"logps/rejected": -1.294065237045288, |
|
"loss": 1.0527, |
|
"nll_loss": 0.9949714541435242, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.038546573370695114, |
|
"rewards/margins": 0.026156682521104813, |
|
"rewards/rejected": -0.06470325589179993, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.758569299552906, |
|
"grad_norm": 15.243948936462402, |
|
"learning_rate": 2.9111125486979104e-06, |
|
"log_odds_chosen": 0.7636137008666992, |
|
"log_odds_ratio": -0.4647112786769867, |
|
"logits/chosen": 361.9948425292969, |
|
"logits/rejected": 406.70654296875, |
|
"logps/chosen": -0.7253848314285278, |
|
"logps/rejected": -1.145918607711792, |
|
"loss": 1.0847, |
|
"nll_loss": 1.016174554824829, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.03626924008131027, |
|
"rewards/margins": 0.021026695147156715, |
|
"rewards/rejected": -0.05729593709111214, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 1.788375558867362, |
|
"grad_norm": 23.890466690063477, |
|
"learning_rate": 2.8867513459481293e-06, |
|
"log_odds_chosen": 1.2909433841705322, |
|
"log_odds_ratio": -0.3190842270851135, |
|
"logits/chosen": 403.19427490234375, |
|
"logits/rejected": 380.4273986816406, |
|
"logps/chosen": -0.6161251068115234, |
|
"logps/rejected": -1.2782180309295654, |
|
"loss": 0.9952, |
|
"nll_loss": 0.9254310727119446, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.030806254595518112, |
|
"rewards/margins": 0.03310465067625046, |
|
"rewards/rejected": -0.06391090154647827, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 16.844104766845703, |
|
"learning_rate": 2.862991671569341e-06, |
|
"log_odds_chosen": 0.5357767939567566, |
|
"log_odds_ratio": -0.5353686213493347, |
|
"logits/chosen": 395.70831298828125, |
|
"logits/rejected": 405.61749267578125, |
|
"logps/chosen": -0.9245316386222839, |
|
"logps/rejected": -1.2031428813934326, |
|
"loss": 1.0432, |
|
"nll_loss": 1.1699957847595215, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.046226583421230316, |
|
"rewards/margins": 0.013930551707744598, |
|
"rewards/rejected": -0.060157131403684616, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 1.8479880774962743, |
|
"grad_norm": 14.692316055297852, |
|
"learning_rate": 2.839809171235324e-06, |
|
"log_odds_chosen": 1.0770504474639893, |
|
"log_odds_ratio": -0.42079129815101624, |
|
"logits/chosen": 377.4819030761719, |
|
"logits/rejected": 387.6199645996094, |
|
"logps/chosen": -0.7239227294921875, |
|
"logps/rejected": -1.3824554681777954, |
|
"loss": 1.0884, |
|
"nll_loss": 1.0769283771514893, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.036196134984493256, |
|
"rewards/margins": 0.032926641404628754, |
|
"rewards/rejected": -0.06912277638912201, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.8777943368107302, |
|
"grad_norm": 15.1817045211792, |
|
"learning_rate": 2.817180849095055e-06, |
|
"log_odds_chosen": 0.5459250807762146, |
|
"log_odds_ratio": -0.5598369240760803, |
|
"logits/chosen": 352.6174621582031, |
|
"logits/rejected": 371.89764404296875, |
|
"logps/chosen": -0.9762029647827148, |
|
"logps/rejected": -1.3525390625, |
|
"loss": 1.0938, |
|
"nll_loss": 1.238140344619751, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.04881014674901962, |
|
"rewards/margins": 0.018816810101270676, |
|
"rewards/rejected": -0.0676269605755806, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 1.9076005961251863, |
|
"grad_norm": 17.332054138183594, |
|
"learning_rate": 2.7950849718747376e-06, |
|
"log_odds_chosen": 1.1397926807403564, |
|
"log_odds_ratio": -0.36622655391693115, |
|
"logits/chosen": 373.9564514160156, |
|
"logits/rejected": 395.34271240234375, |
|
"logps/chosen": -0.6329408884048462, |
|
"logps/rejected": -1.2445515394210815, |
|
"loss": 0.9928, |
|
"nll_loss": 0.9283340573310852, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.03164704144001007, |
|
"rewards/margins": 0.030580539256334305, |
|
"rewards/rejected": -0.062227584421634674, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.9374068554396424, |
|
"grad_norm": 20.475017547607422, |
|
"learning_rate": 2.773500981126146e-06, |
|
"log_odds_chosen": 1.1559429168701172, |
|
"log_odds_ratio": -0.3606329560279846, |
|
"logits/chosen": 372.6563720703125, |
|
"logits/rejected": 405.1517333984375, |
|
"logps/chosen": -0.6990076303482056, |
|
"logps/rejected": -1.3749182224273682, |
|
"loss": 1.0121, |
|
"nll_loss": 0.9322077631950378, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.0349503830075264, |
|
"rewards/margins": 0.03379552438855171, |
|
"rewards/rejected": -0.06874591112136841, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 1.9672131147540983, |
|
"grad_norm": 20.384191513061523, |
|
"learning_rate": 2.752409412815902e-06, |
|
"log_odds_chosen": 0.8144651651382446, |
|
"log_odds_ratio": -0.4188029170036316, |
|
"logits/chosen": 367.2298889160156, |
|
"logits/rejected": 376.0736083984375, |
|
"logps/chosen": -0.7355102896690369, |
|
"logps/rejected": -1.211102843284607, |
|
"loss": 1.0378, |
|
"nll_loss": 0.8931636810302734, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.036775510758161545, |
|
"rewards/margins": 0.023779626935720444, |
|
"rewards/rejected": -0.06055514141917229, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.9970193740685542, |
|
"grad_norm": 15.695927619934082, |
|
"learning_rate": 2.7317918235407652e-06, |
|
"log_odds_chosen": 0.5675193071365356, |
|
"log_odds_ratio": -0.5574907660484314, |
|
"logits/chosen": 395.9285888671875, |
|
"logits/rejected": 387.2447204589844, |
|
"logps/chosen": -0.9066513776779175, |
|
"logps/rejected": -1.2281653881072998, |
|
"loss": 1.0908, |
|
"nll_loss": 1.2198901176452637, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.045332565903663635, |
|
"rewards/margins": 0.016075702384114265, |
|
"rewards/rejected": -0.06140827015042305, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 1.9970193740685542, |
|
"eval_log_odds_chosen": 0.2359991818666458, |
|
"eval_log_odds_ratio": -0.6970126628875732, |
|
"eval_logits/chosen": 314.6778564453125, |
|
"eval_logits/rejected": 285.82061767578125, |
|
"eval_logps/chosen": -0.9949654936790466, |
|
"eval_logps/rejected": -1.1527600288391113, |
|
"eval_loss": 1.4250013828277588, |
|
"eval_nll_loss": 1.3697166442871094, |
|
"eval_rewards/accuracies": 0.5323740839958191, |
|
"eval_rewards/chosen": -0.04974827170372009, |
|
"eval_rewards/margins": 0.00788972433656454, |
|
"eval_rewards/rejected": -0.05763799697160721, |
|
"eval_runtime": 112.2726, |
|
"eval_samples_per_second": 4.926, |
|
"eval_steps_per_second": 1.238, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 2.0268256333830106, |
|
"grad_norm": 21.729570388793945, |
|
"learning_rate": 2.711630722733202e-06, |
|
"log_odds_chosen": 2.0113790035247803, |
|
"log_odds_ratio": -0.19709806144237518, |
|
"logits/chosen": 389.3846435546875, |
|
"logits/rejected": 366.3945617675781, |
|
"logps/chosen": -0.38005977869033813, |
|
"logps/rejected": -1.4012727737426758, |
|
"loss": 0.6433, |
|
"nll_loss": 0.6980705261230469, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.019002988934516907, |
|
"rewards/margins": 0.051060646772384644, |
|
"rewards/rejected": -0.07006363570690155, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 2.0566318926974665, |
|
"grad_norm": 11.89656925201416, |
|
"learning_rate": 2.691909510290828e-06, |
|
"log_odds_chosen": 2.5525763034820557, |
|
"log_odds_ratio": -0.12284793704748154, |
|
"logits/chosen": 351.57080078125, |
|
"logits/rejected": 357.44329833984375, |
|
"logps/chosen": -0.3399081528186798, |
|
"logps/rejected": -1.6293659210205078, |
|
"loss": 0.5495, |
|
"nll_loss": 0.5662155151367188, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01699540950357914, |
|
"rewards/margins": 0.06447288393974304, |
|
"rewards/rejected": -0.08146829158067703, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 2.0864381520119224, |
|
"grad_norm": 13.419454574584961, |
|
"learning_rate": 2.6726124191242444e-06, |
|
"log_odds_chosen": 2.548877716064453, |
|
"log_odds_ratio": -0.11839280277490616, |
|
"logits/chosen": 350.29986572265625, |
|
"logits/rejected": 386.45709228515625, |
|
"logps/chosen": -0.382639080286026, |
|
"logps/rejected": -1.8921934366226196, |
|
"loss": 0.5743, |
|
"nll_loss": 0.5715562105178833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01913195475935936, |
|
"rewards/margins": 0.0754777267575264, |
|
"rewards/rejected": -0.09460968524217606, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 2.1162444113263787, |
|
"grad_norm": 13.355463027954102, |
|
"learning_rate": 2.6537244621713765e-06, |
|
"log_odds_chosen": 2.2259714603424072, |
|
"log_odds_ratio": -0.15891632437705994, |
|
"logits/chosen": 352.84619140625, |
|
"logits/rejected": 371.22576904296875, |
|
"logps/chosen": -0.37806540727615356, |
|
"logps/rejected": -1.5315955877304077, |
|
"loss": 0.5507, |
|
"nll_loss": 0.6317521333694458, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018903274089097977, |
|
"rewards/margins": 0.05767650529742241, |
|
"rewards/rejected": -0.07657978683710098, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 2.1460506706408347, |
|
"grad_norm": 10.8477201461792, |
|
"learning_rate": 2.6352313834736496e-06, |
|
"log_odds_chosen": 2.581636428833008, |
|
"log_odds_ratio": -0.1250651776790619, |
|
"logits/chosen": 353.0003356933594, |
|
"logits/rejected": 398.9602355957031, |
|
"logps/chosen": -0.3573206067085266, |
|
"logps/rejected": -1.6087188720703125, |
|
"loss": 0.5407, |
|
"nll_loss": 0.5504949688911438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01786603033542633, |
|
"rewards/margins": 0.06256992369890213, |
|
"rewards/rejected": -0.08043594658374786, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 2.1758569299552906, |
|
"grad_norm": 12.167034149169922, |
|
"learning_rate": 2.6171196129510684e-06, |
|
"log_odds_chosen": 1.9800822734832764, |
|
"log_odds_ratio": -0.16938333213329315, |
|
"logits/chosen": 341.21527099609375, |
|
"logits/rejected": 329.54119873046875, |
|
"logps/chosen": -0.348991334438324, |
|
"logps/rejected": -1.3196141719818115, |
|
"loss": 0.5516, |
|
"nll_loss": 0.5312565565109253, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.017449568957090378, |
|
"rewards/margins": 0.04853113740682602, |
|
"rewards/rejected": -0.0659807100892067, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 2.2056631892697465, |
|
"grad_norm": 15.195405960083008, |
|
"learning_rate": 2.599376224550182e-06, |
|
"log_odds_chosen": 2.0713467597961426, |
|
"log_odds_ratio": -0.19306516647338867, |
|
"logits/chosen": 316.6725769042969, |
|
"logits/rejected": 339.6087646484375, |
|
"logps/chosen": -0.36510804295539856, |
|
"logps/rejected": -1.4302679300308228, |
|
"loss": 0.5732, |
|
"nll_loss": 0.5869459509849548, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01825539954006672, |
|
"rewards/margins": 0.05325800180435181, |
|
"rewards/rejected": -0.07151339948177338, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 2.235469448584203, |
|
"grad_norm": 12.842897415161133, |
|
"learning_rate": 2.5819888974716113e-06, |
|
"log_odds_chosen": 1.9603370428085327, |
|
"log_odds_ratio": -0.18798741698265076, |
|
"logits/chosen": 368.00836181640625, |
|
"logits/rejected": 389.7608337402344, |
|
"logps/chosen": -0.4214121699333191, |
|
"logps/rejected": -1.4475972652435303, |
|
"loss": 0.5831, |
|
"nll_loss": 0.6068717241287231, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.021070610731840134, |
|
"rewards/margins": 0.051309265196323395, |
|
"rewards/rejected": -0.07237987220287323, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 2.2652757078986587, |
|
"grad_norm": 15.618853569030762, |
|
"learning_rate": 2.564945880212886e-06, |
|
"log_odds_chosen": 2.2622876167297363, |
|
"log_odds_ratio": -0.1320658028125763, |
|
"logits/chosen": 366.3780517578125, |
|
"logits/rejected": 351.96820068359375, |
|
"logps/chosen": -0.3000200688838959, |
|
"logps/rejected": -1.3632047176361084, |
|
"loss": 0.5571, |
|
"nll_loss": 0.525825560092926, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.015001003630459309, |
|
"rewards/margins": 0.05315924435853958, |
|
"rewards/rejected": -0.06816024333238602, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 2.2950819672131146, |
|
"grad_norm": 13.373687744140625, |
|
"learning_rate": 2.5482359571881276e-06, |
|
"log_odds_chosen": 2.5866951942443848, |
|
"log_odds_ratio": -0.11987988650798798, |
|
"logits/chosen": 358.47344970703125, |
|
"logits/rejected": 352.4609375, |
|
"logps/chosen": -0.283217191696167, |
|
"logps/rejected": -1.4752601385116577, |
|
"loss": 0.5301, |
|
"nll_loss": 0.49565237760543823, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.014160861261188984, |
|
"rewards/margins": 0.059602152556180954, |
|
"rewards/rejected": -0.07376301288604736, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 2.3248882265275705, |
|
"grad_norm": 11.959152221679688, |
|
"learning_rate": 2.5318484177091667e-06, |
|
"log_odds_chosen": 2.3983092308044434, |
|
"log_odds_ratio": -0.11385631561279297, |
|
"logits/chosen": 370.3407287597656, |
|
"logits/rejected": 393.58978271484375, |
|
"logps/chosen": -0.36266201734542847, |
|
"logps/rejected": -1.6288502216339111, |
|
"loss": 0.578, |
|
"nll_loss": 0.5790597200393677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.018133098259568214, |
|
"rewards/margins": 0.06330940872430801, |
|
"rewards/rejected": -0.08144249767065048, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 2.354694485842027, |
|
"grad_norm": 11.902227401733398, |
|
"learning_rate": 2.515773027133138e-06, |
|
"log_odds_chosen": 2.4830586910247803, |
|
"log_odds_ratio": -0.13829158246517181, |
|
"logits/chosen": 369.2203063964844, |
|
"logits/rejected": 362.56298828125, |
|
"logps/chosen": -0.2860831320285797, |
|
"logps/rejected": -1.3531745672225952, |
|
"loss": 0.5233, |
|
"nll_loss": 0.48577412962913513, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01430415641516447, |
|
"rewards/margins": 0.053354568779468536, |
|
"rewards/rejected": -0.06765872985124588, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 2.384500745156483, |
|
"grad_norm": 18.2595157623291, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 2.4875540733337402, |
|
"log_odds_ratio": -0.13931448757648468, |
|
"logits/chosen": 366.81646728515625, |
|
"logits/rejected": 388.4540710449219, |
|
"logps/chosen": -0.3392675817012787, |
|
"logps/rejected": -1.6781524419784546, |
|
"loss": 0.5707, |
|
"nll_loss": 0.5266181826591492, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.016963381320238113, |
|
"rewards/margins": 0.06694425642490387, |
|
"rewards/rejected": -0.08390761911869049, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 2.4143070044709387, |
|
"grad_norm": 10.78487777709961, |
|
"learning_rate": 2.484519974999767e-06, |
|
"log_odds_chosen": 2.3641769886016846, |
|
"log_odds_ratio": -0.18085625767707825, |
|
"logits/chosen": 417.9383850097656, |
|
"logits/rejected": 384.9745178222656, |
|
"logps/chosen": -0.36932411789894104, |
|
"logps/rejected": -1.5650533437728882, |
|
"loss": 0.5707, |
|
"nll_loss": 0.5322312712669373, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.018466206267476082, |
|
"rewards/margins": 0.05978646129369736, |
|
"rewards/rejected": -0.07825267314910889, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 2.444113263785395, |
|
"grad_norm": 12.914924621582031, |
|
"learning_rate": 2.4693239916239746e-06, |
|
"log_odds_chosen": 2.4095664024353027, |
|
"log_odds_ratio": -0.17002181708812714, |
|
"logits/chosen": 363.0850830078125, |
|
"logits/rejected": 378.43634033203125, |
|
"logps/chosen": -0.3721050024032593, |
|
"logps/rejected": -1.5407812595367432, |
|
"loss": 0.5689, |
|
"nll_loss": 0.5765537619590759, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018605249002575874, |
|
"rewards/margins": 0.058433812111616135, |
|
"rewards/rejected": -0.07703907042741776, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 2.473919523099851, |
|
"grad_norm": 11.604476928710938, |
|
"learning_rate": 2.4544034683690802e-06, |
|
"log_odds_chosen": 2.4141106605529785, |
|
"log_odds_ratio": -0.13905009627342224, |
|
"logits/chosen": 363.8720703125, |
|
"logits/rejected": 393.9859924316406, |
|
"logps/chosen": -0.32817938923835754, |
|
"logps/rejected": -1.5454423427581787, |
|
"loss": 0.5702, |
|
"nll_loss": 0.5272970795631409, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.016408968716859818, |
|
"rewards/margins": 0.06086314842104912, |
|
"rewards/rejected": -0.07727211713790894, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 2.503725782414307, |
|
"grad_norm": 11.285563468933105, |
|
"learning_rate": 2.4397501823713327e-06, |
|
"log_odds_chosen": 2.0902717113494873, |
|
"log_odds_ratio": -0.18547013401985168, |
|
"logits/chosen": 364.81866455078125, |
|
"logits/rejected": 342.7242736816406, |
|
"logps/chosen": -0.3733817934989929, |
|
"logps/rejected": -1.4410852193832397, |
|
"loss": 0.56, |
|
"nll_loss": 0.6532183885574341, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018669091165065765, |
|
"rewards/margins": 0.0533851683139801, |
|
"rewards/rejected": -0.07205425947904587, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 2.533532041728763, |
|
"grad_norm": 12.517095565795898, |
|
"learning_rate": 2.4253562503633297e-06, |
|
"log_odds_chosen": 2.795741081237793, |
|
"log_odds_ratio": -0.08831789344549179, |
|
"logits/chosen": 362.86871337890625, |
|
"logits/rejected": 359.4671630859375, |
|
"logps/chosen": -0.32691091299057007, |
|
"logps/rejected": -1.8046060800552368, |
|
"loss": 0.5335, |
|
"nll_loss": 0.5374017357826233, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.016345545649528503, |
|
"rewards/margins": 0.0738847479224205, |
|
"rewards/rejected": -0.0902303010225296, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 2.563338301043219, |
|
"grad_norm": 13.33828353881836, |
|
"learning_rate": 2.411214110852061e-06, |
|
"log_odds_chosen": 2.7160139083862305, |
|
"log_odds_ratio": -0.10933760553598404, |
|
"logits/chosen": 362.9604187011719, |
|
"logits/rejected": 374.8692626953125, |
|
"logps/chosen": -0.27513235807418823, |
|
"logps/rejected": -1.615644097328186, |
|
"loss": 0.5522, |
|
"nll_loss": 0.48540863394737244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01375661976635456, |
|
"rewards/margins": 0.06702558696269989, |
|
"rewards/rejected": -0.0807822048664093, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 2.593144560357675, |
|
"grad_norm": 15.951871871948242, |
|
"learning_rate": 2.3973165074269213e-06, |
|
"log_odds_chosen": 2.399064779281616, |
|
"log_odds_ratio": -0.150381401181221, |
|
"logits/chosen": 368.9129943847656, |
|
"logits/rejected": 337.7628173828125, |
|
"logps/chosen": -0.3689618408679962, |
|
"logps/rejected": -1.5598814487457275, |
|
"loss": 0.5514, |
|
"nll_loss": 0.5270097255706787, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01844809204339981, |
|
"rewards/margins": 0.059545982629060745, |
|
"rewards/rejected": -0.07799407094717026, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 2.6229508196721314, |
|
"grad_norm": 12.57322883605957, |
|
"learning_rate": 2.3836564731139807e-06, |
|
"log_odds_chosen": 2.7293245792388916, |
|
"log_odds_ratio": -0.10528914630413055, |
|
"logits/chosen": 354.0178527832031, |
|
"logits/rejected": 365.93829345703125, |
|
"logps/chosen": -0.2712039351463318, |
|
"logps/rejected": -1.582219123840332, |
|
"loss": 0.5703, |
|
"nll_loss": 0.5448659062385559, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.013560195453464985, |
|
"rewards/margins": 0.06555076688528061, |
|
"rewards/rejected": -0.07911095768213272, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 2.6527570789865873, |
|
"grad_norm": 13.282082557678223, |
|
"learning_rate": 2.3702273156998867e-06, |
|
"log_odds_chosen": 2.619792938232422, |
|
"log_odds_ratio": -0.10272769629955292, |
|
"logits/chosen": 335.6366271972656, |
|
"logits/rejected": 372.410400390625, |
|
"logps/chosen": -0.36089158058166504, |
|
"logps/rejected": -1.8113043308258057, |
|
"loss": 0.5563, |
|
"nll_loss": 0.5579748749732971, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.018044577911496162, |
|
"rewards/margins": 0.07252063602209091, |
|
"rewards/rejected": -0.09056521207094193, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 2.682563338301043, |
|
"grad_norm": 10.777383804321289, |
|
"learning_rate": 2.357022603955159e-06, |
|
"log_odds_chosen": 2.4564261436462402, |
|
"log_odds_ratio": -0.11157449334859848, |
|
"logits/chosen": 362.14312744140625, |
|
"logits/rejected": 363.303466796875, |
|
"logps/chosen": -0.39076924324035645, |
|
"logps/rejected": -1.7145166397094727, |
|
"loss": 0.5754, |
|
"nll_loss": 0.5376263856887817, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.019538460299372673, |
|
"rewards/margins": 0.06618736684322357, |
|
"rewards/rejected": -0.0857258215546608, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 2.712369597615499, |
|
"grad_norm": 12.512327194213867, |
|
"learning_rate": 2.3440361546924774e-06, |
|
"log_odds_chosen": 2.614637613296509, |
|
"log_odds_ratio": -0.11486033350229263, |
|
"logits/chosen": 395.16949462890625, |
|
"logits/rejected": 374.3088684082031, |
|
"logps/chosen": -0.3622822165489197, |
|
"logps/rejected": -1.6618531942367554, |
|
"loss": 0.6153, |
|
"nll_loss": 0.568195641040802, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018114114180207253, |
|
"rewards/margins": 0.06497855484485626, |
|
"rewards/rejected": -0.08309266716241837, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 2.742175856929955, |
|
"grad_norm": 12.090532302856445, |
|
"learning_rate": 2.3312620206007847e-06, |
|
"log_odds_chosen": 2.508338451385498, |
|
"log_odds_ratio": -0.1204490214586258, |
|
"logits/chosen": 382.52630615234375, |
|
"logits/rejected": 401.80841064453125, |
|
"logps/chosen": -0.3474404215812683, |
|
"logps/rejected": -1.7473865747451782, |
|
"loss": 0.5838, |
|
"nll_loss": 0.6167483925819397, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.017372019588947296, |
|
"rewards/margins": 0.06999730318784714, |
|
"rewards/rejected": -0.08736933022737503, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.7719821162444114, |
|
"grad_norm": 13.27834701538086, |
|
"learning_rate": 2.3186944788008413e-06, |
|
"log_odds_chosen": 2.5867724418640137, |
|
"log_odds_ratio": -0.14203417301177979, |
|
"logits/chosen": 376.5874328613281, |
|
"logits/rejected": 381.06341552734375, |
|
"logps/chosen": -0.2869132459163666, |
|
"logps/rejected": -1.5630210638046265, |
|
"loss": 0.5778, |
|
"nll_loss": 0.55084627866745, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.014345663599669933, |
|
"rewards/margins": 0.06380538642406464, |
|
"rewards/rejected": -0.07815105468034744, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 2.8017883755588673, |
|
"grad_norm": 11.784134864807129, |
|
"learning_rate": 2.3063280200722128e-06, |
|
"log_odds_chosen": 2.1283843517303467, |
|
"log_odds_ratio": -0.20095142722129822, |
|
"logits/chosen": 383.31781005859375, |
|
"logits/rejected": 354.9120788574219, |
|
"logps/chosen": -0.40080317854881287, |
|
"logps/rejected": -1.5093116760253906, |
|
"loss": 0.5644, |
|
"nll_loss": 0.575947642326355, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.020040160045027733, |
|
"rewards/margins": 0.05542542785406113, |
|
"rewards/rejected": -0.07546558976173401, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.8315946348733236, |
|
"grad_norm": 13.008294105529785, |
|
"learning_rate": 2.2941573387056174e-06, |
|
"log_odds_chosen": 2.6808362007141113, |
|
"log_odds_ratio": -0.10760221630334854, |
|
"logits/chosen": 350.5984802246094, |
|
"logits/rejected": 374.9319152832031, |
|
"logps/chosen": -0.34488445520401, |
|
"logps/rejected": -1.7149194478988647, |
|
"loss": 0.5386, |
|
"nll_loss": 0.491553395986557, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.01724422350525856, |
|
"rewards/margins": 0.06850175559520721, |
|
"rewards/rejected": -0.08574597537517548, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 2.8614008941877795, |
|
"grad_norm": 10.424010276794434, |
|
"learning_rate": 2.2821773229381924e-06, |
|
"log_odds_chosen": 2.2412309646606445, |
|
"log_odds_ratio": -0.1566620171070099, |
|
"logits/chosen": 362.31378173828125, |
|
"logits/rejected": 402.6854248046875, |
|
"logps/chosen": -0.3766781687736511, |
|
"logps/rejected": -1.4856204986572266, |
|
"loss": 0.5052, |
|
"nll_loss": 0.48381978273391724, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.018833911046385765, |
|
"rewards/margins": 0.055447112768888474, |
|
"rewards/rejected": -0.07428102195262909, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.8912071535022354, |
|
"grad_norm": 12.359146118164062, |
|
"learning_rate": 2.270383045932499e-06, |
|
"log_odds_chosen": 2.6057076454162598, |
|
"log_odds_ratio": -0.12701039016246796, |
|
"logits/chosen": 357.164306640625, |
|
"logits/rejected": 380.32073974609375, |
|
"logps/chosen": -0.37163636088371277, |
|
"logps/rejected": -1.8207753896713257, |
|
"loss": 0.5419, |
|
"nll_loss": 0.5325015187263489, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.01858181692659855, |
|
"rewards/margins": 0.07245694845914841, |
|
"rewards/rejected": -0.0910387635231018, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 2.9210134128166914, |
|
"grad_norm": 11.646001815795898, |
|
"learning_rate": 2.2587697572631284e-06, |
|
"log_odds_chosen": 2.3249075412750244, |
|
"log_odds_ratio": -0.19486014544963837, |
|
"logits/chosen": 372.1253967285156, |
|
"logits/rejected": 338.1502380371094, |
|
"logps/chosen": -0.4259433150291443, |
|
"logps/rejected": -1.5797032117843628, |
|
"loss": 0.6087, |
|
"nll_loss": 0.5371652245521545, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.021297167986631393, |
|
"rewards/margins": 0.05768799036741257, |
|
"rewards/rejected": -0.07898515462875366, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.9508196721311473, |
|
"grad_norm": 11.838138580322266, |
|
"learning_rate": 2.2473328748774737e-06, |
|
"log_odds_chosen": 2.3507559299468994, |
|
"log_odds_ratio": -0.1578751504421234, |
|
"logits/chosen": 366.9432373046875, |
|
"logits/rejected": 394.8822326660156, |
|
"logps/chosen": -0.3771159052848816, |
|
"logps/rejected": -1.533601999282837, |
|
"loss": 0.5442, |
|
"nll_loss": 0.5532703399658203, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.018855798989534378, |
|
"rewards/margins": 0.05782430246472359, |
|
"rewards/rejected": -0.07668010145425797, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 2.9806259314456036, |
|
"grad_norm": 13.802445411682129, |
|
"learning_rate": 2.23606797749979e-06, |
|
"log_odds_chosen": 2.5029566287994385, |
|
"log_odds_ratio": -0.12695619463920593, |
|
"logits/chosen": 374.8814697265625, |
|
"logits/rejected": 372.7264099121094, |
|
"logps/chosen": -0.32484811544418335, |
|
"logps/rejected": -1.5648537874221802, |
|
"loss": 0.5724, |
|
"nll_loss": 0.47206535935401917, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.016242407262325287, |
|
"rewards/margins": 0.062000274658203125, |
|
"rewards/rejected": -0.07824268192052841, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.9865871833084947, |
|
"eval_log_odds_chosen": 0.2937372922897339, |
|
"eval_log_odds_ratio": -0.6945178508758545, |
|
"eval_logits/chosen": 300.6891174316406, |
|
"eval_logits/rejected": 271.8756103515625, |
|
"eval_logps/chosen": -1.0802680253982544, |
|
"eval_logps/rejected": -1.2502641677856445, |
|
"eval_loss": 1.539820671081543, |
|
"eval_nll_loss": 1.4724125862121582, |
|
"eval_rewards/accuracies": 0.5395683646202087, |
|
"eval_rewards/chosen": -0.05401340499520302, |
|
"eval_rewards/margins": 0.00849980115890503, |
|
"eval_rewards/rejected": -0.06251321732997894, |
|
"eval_runtime": 112.3165, |
|
"eval_samples_per_second": 4.924, |
|
"eval_steps_per_second": 1.238, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 2.9865871833084947, |
|
"step": 501, |
|
"total_flos": 0.0, |
|
"train_loss": 1.4594077459590402, |
|
"train_runtime": 13816.0738, |
|
"train_samples_per_second": 1.165, |
|
"train_steps_per_second": 0.036 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 501, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|