gemma-7b-orpo-low-quality / trainer_state.json
silviasapora's picture
Model save
9dc97f6 verified
raw
history blame
69.1 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.9865871833084947,
"eval_steps": 500,
"global_step": 501,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.029806259314456036,
"grad_norm": 1765.45556640625,
"learning_rate": 2.5000000000000004e-07,
"log_odds_chosen": -0.21943321824073792,
"log_odds_ratio": -1.0067085027694702,
"logits/chosen": 204.28456115722656,
"logits/rejected": 202.977294921875,
"logps/chosen": -14.824699401855469,
"logps/rejected": -14.605265617370605,
"loss": 14.9632,
"nll_loss": 14.546000480651855,
"rewards/accuracies": 0.3499999940395355,
"rewards/chosen": -0.7412349581718445,
"rewards/margins": -0.01097165048122406,
"rewards/rejected": -0.7302632927894592,
"step": 5
},
{
"epoch": 0.05961251862891207,
"grad_norm": 1195.8741455078125,
"learning_rate": 5.000000000000001e-07,
"log_odds_chosen": 0.24401184916496277,
"log_odds_ratio": -0.7723467946052551,
"logits/chosen": 219.5009307861328,
"logits/rejected": 223.572021484375,
"logps/chosen": -12.244219779968262,
"logps/rejected": -12.487574577331543,
"loss": 12.6127,
"nll_loss": 12.338577270507812,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.6122109293937683,
"rewards/margins": 0.012167713604867458,
"rewards/rejected": -0.6243786215782166,
"step": 10
},
{
"epoch": 0.08941877794336811,
"grad_norm": 722.9285278320312,
"learning_rate": 7.5e-07,
"log_odds_chosen": 0.0473303496837616,
"log_odds_ratio": -0.7741748690605164,
"logits/chosen": 282.27947998046875,
"logits/rejected": 261.2786865234375,
"logps/chosen": -7.970606803894043,
"logps/rejected": -8.0178804397583,
"loss": 8.2789,
"nll_loss": 7.956001281738281,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.39853033423423767,
"rewards/margins": 0.002363653387874365,
"rewards/rejected": -0.40089401602745056,
"step": 15
},
{
"epoch": 0.11922503725782414,
"grad_norm": 212.62242126464844,
"learning_rate": 1.0000000000000002e-06,
"log_odds_chosen": -0.15251407027244568,
"log_odds_ratio": -0.9524042010307312,
"logits/chosen": 281.0796813964844,
"logits/rejected": 275.33013916015625,
"logps/chosen": -5.375563621520996,
"logps/rejected": -5.224381446838379,
"loss": 5.4453,
"nll_loss": 5.453672885894775,
"rewards/accuracies": 0.4000000059604645,
"rewards/chosen": -0.2687782049179077,
"rewards/margins": -0.007559105753898621,
"rewards/rejected": -0.2612191140651703,
"step": 20
},
{
"epoch": 0.14903129657228018,
"grad_norm": 166.2330322265625,
"learning_rate": 1.25e-06,
"log_odds_chosen": -0.04391743987798691,
"log_odds_ratio": -0.8879317045211792,
"logits/chosen": 299.25030517578125,
"logits/rejected": 308.5736389160156,
"logps/chosen": -3.281724452972412,
"logps/rejected": -3.2199606895446777,
"loss": 3.5013,
"nll_loss": 3.3902111053466797,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.1640862375497818,
"rewards/margins": -0.0030881778802722692,
"rewards/rejected": -0.16099804639816284,
"step": 25
},
{
"epoch": 0.17883755588673622,
"grad_norm": 83.01959228515625,
"learning_rate": 1.5e-06,
"log_odds_chosen": -0.07733707875013351,
"log_odds_ratio": -0.8942793607711792,
"logits/chosen": 347.654052734375,
"logits/rejected": 376.1275329589844,
"logps/chosen": -2.622657537460327,
"logps/rejected": -2.5195186138153076,
"loss": 2.5561,
"nll_loss": 2.6379752159118652,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.13113287091255188,
"rewards/margins": -0.005156923085451126,
"rewards/rejected": -0.12597593665122986,
"step": 30
},
{
"epoch": 0.20864381520119224,
"grad_norm": 40.82948684692383,
"learning_rate": 1.75e-06,
"log_odds_chosen": 0.16575101017951965,
"log_odds_ratio": -0.7404494285583496,
"logits/chosen": 382.174072265625,
"logits/rejected": 370.3721008300781,
"logps/chosen": -1.8132009506225586,
"logps/rejected": -1.9216792583465576,
"loss": 2.1303,
"nll_loss": 2.0061001777648926,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.09066005051136017,
"rewards/margins": 0.005423928610980511,
"rewards/rejected": -0.09608397632837296,
"step": 35
},
{
"epoch": 0.23845007451564829,
"grad_norm": 373.0379333496094,
"learning_rate": 2.0000000000000003e-06,
"log_odds_chosen": 0.017796561121940613,
"log_odds_ratio": -0.7689038515090942,
"logits/chosen": 372.22100830078125,
"logits/rejected": 370.50439453125,
"logps/chosen": -1.6518943309783936,
"logps/rejected": -1.6649363040924072,
"loss": 1.9486,
"nll_loss": 2.0397918224334717,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.08259471505880356,
"rewards/margins": 0.0006520989118143916,
"rewards/rejected": -0.08324681222438812,
"step": 40
},
{
"epoch": 0.26825633383010433,
"grad_norm": 45.907875061035156,
"learning_rate": 2.25e-06,
"log_odds_chosen": 0.027211258187890053,
"log_odds_ratio": -0.7474765777587891,
"logits/chosen": 388.0882873535156,
"logits/rejected": 397.65460205078125,
"logps/chosen": -1.570575475692749,
"logps/rejected": -1.5880815982818604,
"loss": 1.8867,
"nll_loss": 1.7669483423233032,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.07852877676486969,
"rewards/margins": 0.0008753080619499087,
"rewards/rejected": -0.0794040784239769,
"step": 45
},
{
"epoch": 0.29806259314456035,
"grad_norm": 45.138648986816406,
"learning_rate": 2.5e-06,
"log_odds_chosen": 0.19176200032234192,
"log_odds_ratio": -0.6679073572158813,
"logits/chosen": 396.5473327636719,
"logits/rejected": 418.2545471191406,
"logps/chosen": -1.404476523399353,
"logps/rejected": -1.5453894138336182,
"loss": 1.8521,
"nll_loss": 1.8635737895965576,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.07022383064031601,
"rewards/margins": 0.00704564293846488,
"rewards/rejected": -0.07726947963237762,
"step": 50
},
{
"epoch": 0.32786885245901637,
"grad_norm": 146.7917938232422,
"learning_rate": 2.7500000000000004e-06,
"log_odds_chosen": 0.06669901311397552,
"log_odds_ratio": -0.7251878380775452,
"logits/chosen": 385.10101318359375,
"logits/rejected": 378.09368896484375,
"logps/chosen": -1.4211018085479736,
"logps/rejected": -1.4656105041503906,
"loss": 1.8795,
"nll_loss": 1.921286940574646,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.0710550919175148,
"rewards/margins": 0.0022254353389143944,
"rewards/rejected": -0.07328052818775177,
"step": 55
},
{
"epoch": 0.35767511177347244,
"grad_norm": 36.712623596191406,
"learning_rate": 3e-06,
"log_odds_chosen": 0.1147073283791542,
"log_odds_ratio": -0.6886881589889526,
"logits/chosen": 391.64190673828125,
"logits/rejected": 383.321044921875,
"logps/chosen": -1.381176471710205,
"logps/rejected": -1.4568852186203003,
"loss": 1.7236,
"nll_loss": 1.7853686809539795,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.06905882805585861,
"rewards/margins": 0.0037854425609111786,
"rewards/rejected": -0.07284426689147949,
"step": 60
},
{
"epoch": 0.38748137108792846,
"grad_norm": 27.392560958862305,
"learning_rate": 3.2500000000000002e-06,
"log_odds_chosen": 0.0811905488371849,
"log_odds_ratio": -0.705346941947937,
"logits/chosen": 390.33514404296875,
"logits/rejected": 391.02215576171875,
"logps/chosen": -1.2655917406082153,
"logps/rejected": -1.3007347583770752,
"loss": 1.6207,
"nll_loss": 1.5275566577911377,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.06327958405017853,
"rewards/margins": 0.0017571467906236649,
"rewards/rejected": -0.06503672897815704,
"step": 65
},
{
"epoch": 0.4172876304023845,
"grad_norm": 108.5710678100586,
"learning_rate": 3.5e-06,
"log_odds_chosen": 0.030709872022271156,
"log_odds_ratio": -0.7311884760856628,
"logits/chosen": 374.34515380859375,
"logits/rejected": 382.85137939453125,
"logps/chosen": -1.3965779542922974,
"logps/rejected": -1.4153110980987549,
"loss": 1.6444,
"nll_loss": 1.6620601415634155,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.06982889771461487,
"rewards/margins": 0.0009366500889882445,
"rewards/rejected": -0.07076555490493774,
"step": 70
},
{
"epoch": 0.44709388971684055,
"grad_norm": 42.35745620727539,
"learning_rate": 3.7500000000000005e-06,
"log_odds_chosen": 0.17296305298805237,
"log_odds_ratio": -0.6624878644943237,
"logits/chosen": 394.97998046875,
"logits/rejected": 382.9609069824219,
"logps/chosen": -1.2325050830841064,
"logps/rejected": -1.3494950532913208,
"loss": 1.593,
"nll_loss": 1.53190016746521,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.06162526085972786,
"rewards/margins": 0.005849492736160755,
"rewards/rejected": -0.06747475266456604,
"step": 75
},
{
"epoch": 0.47690014903129657,
"grad_norm": 36.82132339477539,
"learning_rate": 4.000000000000001e-06,
"log_odds_chosen": 0.17391765117645264,
"log_odds_ratio": -0.6597349643707275,
"logits/chosen": 381.92547607421875,
"logits/rejected": 404.1871643066406,
"logps/chosen": -1.219416856765747,
"logps/rejected": -1.3357045650482178,
"loss": 1.6131,
"nll_loss": 1.6360372304916382,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.06097083538770676,
"rewards/margins": 0.005814389791339636,
"rewards/rejected": -0.06678523123264313,
"step": 80
},
{
"epoch": 0.5067064083457526,
"grad_norm": 29.89981460571289,
"learning_rate": 4.25e-06,
"log_odds_chosen": 0.18893679976463318,
"log_odds_ratio": -0.6906715631484985,
"logits/chosen": 408.48101806640625,
"logits/rejected": 392.5835266113281,
"logps/chosen": -1.2594187259674072,
"logps/rejected": -1.3885504007339478,
"loss": 1.5956,
"nll_loss": 1.603137731552124,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.06297092139720917,
"rewards/margins": 0.006456589791923761,
"rewards/rejected": -0.06942752748727798,
"step": 85
},
{
"epoch": 0.5365126676602087,
"grad_norm": 32.92945861816406,
"learning_rate": 4.5e-06,
"log_odds_chosen": 0.555855393409729,
"log_odds_ratio": -0.5900682806968689,
"logits/chosen": 401.2886657714844,
"logits/rejected": 416.2565002441406,
"logps/chosen": -1.208212971687317,
"logps/rejected": -1.6518011093139648,
"loss": 1.4631,
"nll_loss": 1.474485158920288,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.06041065603494644,
"rewards/margins": 0.022179413586854935,
"rewards/rejected": -0.08259007334709167,
"step": 90
},
{
"epoch": 0.5663189269746647,
"grad_norm": 34.76884460449219,
"learning_rate": 4.75e-06,
"log_odds_chosen": 0.19581779837608337,
"log_odds_ratio": -0.6574069261550903,
"logits/chosen": 371.4412536621094,
"logits/rejected": 383.65155029296875,
"logps/chosen": -1.1392086744308472,
"logps/rejected": -1.2306644916534424,
"loss": 1.5584,
"nll_loss": 1.438720941543579,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05696043372154236,
"rewards/margins": 0.0045727952383458614,
"rewards/rejected": -0.06153322383761406,
"step": 95
},
{
"epoch": 0.5961251862891207,
"grad_norm": 677.6953125,
"learning_rate": 5e-06,
"log_odds_chosen": 0.08993122726678848,
"log_odds_ratio": -0.7155017256736755,
"logits/chosen": 406.64996337890625,
"logits/rejected": 442.7906188964844,
"logps/chosen": -1.3057138919830322,
"logps/rejected": -1.3538284301757812,
"loss": 1.6646,
"nll_loss": 1.6470537185668945,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.06528569757938385,
"rewards/margins": 0.0024057202972471714,
"rewards/rejected": -0.06769142299890518,
"step": 100
},
{
"epoch": 0.6259314456035767,
"grad_norm": 81.34811401367188,
"learning_rate": 4.8795003647426654e-06,
"log_odds_chosen": 0.3003528416156769,
"log_odds_ratio": -0.6239514946937561,
"logits/chosen": 391.1552734375,
"logits/rejected": 403.55609130859375,
"logps/chosen": -1.2369372844696045,
"logps/rejected": -1.4132254123687744,
"loss": 1.6764,
"nll_loss": 1.68059504032135,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.061846863478422165,
"rewards/margins": 0.008814404718577862,
"rewards/rejected": -0.0706612691283226,
"step": 105
},
{
"epoch": 0.6557377049180327,
"grad_norm": 98.60116577148438,
"learning_rate": 4.767312946227961e-06,
"log_odds_chosen": 0.21533890068531036,
"log_odds_ratio": -0.6346350312232971,
"logits/chosen": 378.2474670410156,
"logits/rejected": 376.3426818847656,
"logps/chosen": -1.1033828258514404,
"logps/rejected": -1.25198233127594,
"loss": 1.5681,
"nll_loss": 1.557680368423462,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.055169135332107544,
"rewards/margins": 0.007429977413266897,
"rewards/rejected": -0.06259911507368088,
"step": 110
},
{
"epoch": 0.6855439642324889,
"grad_norm": 22.872821807861328,
"learning_rate": 4.662524041201569e-06,
"log_odds_chosen": 0.2698196470737457,
"log_odds_ratio": -0.6147719621658325,
"logits/chosen": 400.8478698730469,
"logits/rejected": 407.18634033203125,
"logps/chosen": -0.9827004671096802,
"logps/rejected": -1.127701997756958,
"loss": 1.5249,
"nll_loss": 1.42640221118927,
"rewards/accuracies": 0.6499999761581421,
"rewards/chosen": -0.04913502186536789,
"rewards/margins": 0.007250082679092884,
"rewards/rejected": -0.0563850998878479,
"step": 115
},
{
"epoch": 0.7153502235469449,
"grad_norm": 27.9619083404541,
"learning_rate": 4.564354645876385e-06,
"log_odds_chosen": 0.3403358459472656,
"log_odds_ratio": -0.6000555753707886,
"logits/chosen": 381.107421875,
"logits/rejected": 381.04864501953125,
"logps/chosen": -1.048896074295044,
"logps/rejected": -1.2232722043991089,
"loss": 1.5554,
"nll_loss": 1.5394407510757446,
"rewards/accuracies": 0.675000011920929,
"rewards/chosen": -0.05244480445981026,
"rewards/margins": 0.00871881190687418,
"rewards/rejected": -0.06116361543536186,
"step": 120
},
{
"epoch": 0.7451564828614009,
"grad_norm": 23.146177291870117,
"learning_rate": 4.47213595499958e-06,
"log_odds_chosen": 0.08713512122631073,
"log_odds_ratio": -0.7354093790054321,
"logits/chosen": 378.9410400390625,
"logits/rejected": 391.9457702636719,
"logps/chosen": -1.1668498516082764,
"logps/rejected": -1.1973512172698975,
"loss": 1.4862,
"nll_loss": 1.4849971532821655,
"rewards/accuracies": 0.550000011920929,
"rewards/chosen": -0.05834249407052994,
"rewards/margins": 0.0015250641154125333,
"rewards/rejected": -0.05986756086349487,
"step": 125
},
{
"epoch": 0.7749627421758569,
"grad_norm": 37.57433319091797,
"learning_rate": 4.385290096535147e-06,
"log_odds_chosen": 0.13211670517921448,
"log_odds_ratio": -0.7139819860458374,
"logits/chosen": 401.40985107421875,
"logits/rejected": 389.37921142578125,
"logps/chosen": -1.1555781364440918,
"logps/rejected": -1.2059427499771118,
"loss": 1.5256,
"nll_loss": 1.4828169345855713,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05777891352772713,
"rewards/margins": 0.002518222201615572,
"rewards/rejected": -0.06029713153839111,
"step": 130
},
{
"epoch": 0.8047690014903129,
"grad_norm": 37.914310455322266,
"learning_rate": 4.303314829119352e-06,
"log_odds_chosen": 0.10099569708108902,
"log_odds_ratio": -0.7038587331771851,
"logits/chosen": 414.90655517578125,
"logits/rejected": 416.6064453125,
"logps/chosen": -1.1292693614959717,
"logps/rejected": -1.2150599956512451,
"loss": 1.5378,
"nll_loss": 1.5873870849609375,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.056463468819856644,
"rewards/margins": 0.004289527423679829,
"rewards/rejected": -0.060752999037504196,
"step": 135
},
{
"epoch": 0.834575260804769,
"grad_norm": 21.97135353088379,
"learning_rate": 4.2257712736425835e-06,
"log_odds_chosen": -0.07928862422704697,
"log_odds_ratio": -0.8006687164306641,
"logits/chosen": 397.2544250488281,
"logits/rejected": 403.22857666015625,
"logps/chosen": -1.11940598487854,
"logps/rejected": -1.0619796514511108,
"loss": 1.5228,
"nll_loss": 1.6315351724624634,
"rewards/accuracies": 0.4749999940395355,
"rewards/chosen": -0.05597030371427536,
"rewards/margins": -0.002871322212740779,
"rewards/rejected": -0.05309898406267166,
"step": 140
},
{
"epoch": 0.8643815201192251,
"grad_norm": 34.809200286865234,
"learning_rate": 4.1522739926869985e-06,
"log_odds_chosen": -0.004179268144071102,
"log_odds_ratio": -0.7272334694862366,
"logits/chosen": 394.76995849609375,
"logits/rejected": 397.96514892578125,
"logps/chosen": -1.2000293731689453,
"logps/rejected": -1.1946508884429932,
"loss": 1.5155,
"nll_loss": 1.5167872905731201,
"rewards/accuracies": 0.574999988079071,
"rewards/chosen": -0.060001470148563385,
"rewards/margins": -0.0002689236425794661,
"rewards/rejected": -0.05973255634307861,
"step": 145
},
{
"epoch": 0.8941877794336811,
"grad_norm": 30.051952362060547,
"learning_rate": 4.082482904638631e-06,
"log_odds_chosen": 0.36712345480918884,
"log_odds_ratio": -0.5663259625434875,
"logits/chosen": 400.39495849609375,
"logits/rejected": 418.39678955078125,
"logps/chosen": -1.0868648290634155,
"logps/rejected": -1.3322699069976807,
"loss": 1.477,
"nll_loss": 1.3918894529342651,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.054343242198228836,
"rewards/margins": 0.01227025780826807,
"rewards/rejected": -0.06661349534988403,
"step": 150
},
{
"epoch": 0.9239940387481371,
"grad_norm": 16.999670028686523,
"learning_rate": 4.016096644512495e-06,
"log_odds_chosen": 0.13306589424610138,
"log_odds_ratio": -0.6789790391921997,
"logits/chosen": 380.4939880371094,
"logits/rejected": 395.53143310546875,
"logps/chosen": -1.1204369068145752,
"logps/rejected": -1.2021987438201904,
"loss": 1.436,
"nll_loss": 1.3288953304290771,
"rewards/accuracies": 0.6000000238418579,
"rewards/chosen": -0.05602184683084488,
"rewards/margins": 0.004088088870048523,
"rewards/rejected": -0.060109943151474,
"step": 155
},
{
"epoch": 0.9538002980625931,
"grad_norm": 34.52124786376953,
"learning_rate": 3.952847075210474e-06,
"log_odds_chosen": 0.08932497352361679,
"log_odds_ratio": -0.7400273084640503,
"logits/chosen": 386.62786865234375,
"logits/rejected": 432.2003479003906,
"logps/chosen": -1.0199127197265625,
"logps/rejected": -1.1069036722183228,
"loss": 1.425,
"nll_loss": 1.3653684854507446,
"rewards/accuracies": 0.5,
"rewards/chosen": -0.05099564045667648,
"rewards/margins": 0.00434954185038805,
"rewards/rejected": -0.05534517765045166,
"step": 160
},
{
"epoch": 0.9836065573770492,
"grad_norm": 17.771682739257812,
"learning_rate": 3.892494720807615e-06,
"log_odds_chosen": 0.02889970876276493,
"log_odds_ratio": -0.7212048768997192,
"logits/chosen": 396.8811950683594,
"logits/rejected": 409.22821044921875,
"logps/chosen": -1.091715693473816,
"logps/rejected": -1.1267164945602417,
"loss": 1.441,
"nll_loss": 1.3998154401779175,
"rewards/accuracies": 0.5249999761581421,
"rewards/chosen": -0.05458579212427139,
"rewards/margins": 0.0017500361427664757,
"rewards/rejected": -0.056335825473070145,
"step": 165
},
{
"epoch": 0.9955290611028316,
"eval_log_odds_chosen": 0.19335530698299408,
"eval_log_odds_ratio": -0.6989776492118835,
"eval_logits/chosen": 318.99652099609375,
"eval_logits/rejected": 290.1581115722656,
"eval_logps/chosen": -1.0203651189804077,
"eval_logps/rejected": -1.1485036611557007,
"eval_loss": 1.4761662483215332,
"eval_nll_loss": 1.4310433864593506,
"eval_rewards/accuracies": 0.5323740839958191,
"eval_rewards/chosen": -0.051018260419368744,
"eval_rewards/margins": 0.006406927481293678,
"eval_rewards/rejected": -0.057425182312726974,
"eval_runtime": 112.3238,
"eval_samples_per_second": 4.923,
"eval_steps_per_second": 1.237,
"step": 167
},
{
"epoch": 1.0134128166915053,
"grad_norm": 17.029600143432617,
"learning_rate": 3.834824944236852e-06,
"log_odds_chosen": 0.46681445837020874,
"log_odds_ratio": -0.5670086741447449,
"logits/chosen": 377.62884521484375,
"logits/rejected": 402.346435546875,
"logps/chosen": -0.9154840707778931,
"logps/rejected": -1.1631513833999634,
"loss": 1.3055,
"nll_loss": 1.1554943323135376,
"rewards/accuracies": 0.625,
"rewards/chosen": -0.045774202793836594,
"rewards/margins": 0.012383360415697098,
"rewards/rejected": -0.05815757066011429,
"step": 170
},
{
"epoch": 1.0432190760059612,
"grad_norm": 20.676471710205078,
"learning_rate": 3.7796447300922724e-06,
"log_odds_chosen": 0.8411234021186829,
"log_odds_ratio": -0.4436827600002289,
"logits/chosen": 360.05242919921875,
"logits/rejected": 400.02374267578125,
"logps/chosen": -0.6783148646354675,
"logps/rejected": -1.1674000024795532,
"loss": 1.0898,
"nll_loss": 1.1356347799301147,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.033915746957063675,
"rewards/margins": 0.024454256519675255,
"rewards/rejected": -0.058369994163513184,
"step": 175
},
{
"epoch": 1.0730253353204173,
"grad_norm": 18.295047760009766,
"learning_rate": 3.72677996249965e-06,
"log_odds_chosen": 0.8419575691223145,
"log_odds_ratio": -0.43040966987609863,
"logits/chosen": 360.3951110839844,
"logits/rejected": 335.01239013671875,
"logps/chosen": -0.7921234965324402,
"logps/rejected": -1.2925331592559814,
"loss": 1.1448,
"nll_loss": 1.2160179615020752,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03960617631673813,
"rewards/margins": 0.025020483881235123,
"rewards/rejected": -0.06462665647268295,
"step": 180
},
{
"epoch": 1.1028315946348732,
"grad_norm": 20.26190757751465,
"learning_rate": 3.6760731104690393e-06,
"log_odds_chosen": 1.0061752796173096,
"log_odds_ratio": -0.3863833546638489,
"logits/chosen": 388.26934814453125,
"logits/rejected": 379.1220703125,
"logps/chosen": -0.6712931990623474,
"logps/rejected": -1.1969959735870361,
"loss": 1.0422,
"nll_loss": 0.9980667233467102,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03356466069817543,
"rewards/margins": 0.026285137981176376,
"rewards/rejected": -0.05984979867935181,
"step": 185
},
{
"epoch": 1.1326378539493294,
"grad_norm": 16.21082305908203,
"learning_rate": 3.6273812505500587e-06,
"log_odds_chosen": 0.6967722177505493,
"log_odds_ratio": -0.49137839674949646,
"logits/chosen": 353.41705322265625,
"logits/rejected": 400.4765930175781,
"logps/chosen": -0.75420081615448,
"logps/rejected": -1.1797516345977783,
"loss": 1.1136,
"nll_loss": 1.0225417613983154,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.0377100370824337,
"rewards/margins": 0.021277543157339096,
"rewards/rejected": -0.0589875802397728,
"step": 190
},
{
"epoch": 1.1624441132637853,
"grad_norm": 18.45132827758789,
"learning_rate": 3.5805743701971648e-06,
"log_odds_chosen": 0.8713854551315308,
"log_odds_ratio": -0.4125959873199463,
"logits/chosen": 383.83868408203125,
"logits/rejected": 397.7996520996094,
"logps/chosen": -0.7979816198348999,
"logps/rejected": -1.2784751653671265,
"loss": 1.1249,
"nll_loss": 1.1307828426361084,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.039899080991744995,
"rewards/margins": 0.02402467653155327,
"rewards/rejected": -0.06392376124858856,
"step": 195
},
{
"epoch": 1.1922503725782414,
"grad_norm": 29.213319778442383,
"learning_rate": 3.5355339059327378e-06,
"log_odds_chosen": 0.9310399889945984,
"log_odds_ratio": -0.43441715836524963,
"logits/chosen": 408.2393798828125,
"logits/rejected": 392.23309326171875,
"logps/chosen": -0.65810626745224,
"logps/rejected": -1.2119154930114746,
"loss": 1.0925,
"nll_loss": 1.0188348293304443,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03290531411767006,
"rewards/margins": 0.0276904609054327,
"rewards/rejected": -0.06059577316045761,
"step": 200
},
{
"epoch": 1.2220566318926975,
"grad_norm": 20.968154907226562,
"learning_rate": 3.4921514788478916e-06,
"log_odds_chosen": 1.0998015403747559,
"log_odds_ratio": -0.39691638946533203,
"logits/chosen": 365.73724365234375,
"logits/rejected": 359.8885803222656,
"logps/chosen": -0.6815972924232483,
"logps/rejected": -1.2400376796722412,
"loss": 1.0466,
"nll_loss": 1.0264532566070557,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03407986834645271,
"rewards/margins": 0.027922023087739944,
"rewards/rejected": -0.06200189143419266,
"step": 205
},
{
"epoch": 1.2518628912071534,
"grad_norm": 17.646827697753906,
"learning_rate": 3.450327796711771e-06,
"log_odds_chosen": 1.2030134201049805,
"log_odds_ratio": -0.3409472107887268,
"logits/chosen": 371.56903076171875,
"logits/rejected": 400.691162109375,
"logps/chosen": -0.6153351664543152,
"logps/rejected": -1.2756757736206055,
"loss": 1.0517,
"nll_loss": 0.9517441987991333,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03076675906777382,
"rewards/margins": 0.03301702067255974,
"rewards/rejected": -0.06378378719091415,
"step": 210
},
{
"epoch": 1.2816691505216096,
"grad_norm": 25.96933364868164,
"learning_rate": 3.409971697352368e-06,
"log_odds_chosen": 1.0242887735366821,
"log_odds_ratio": -0.3722797930240631,
"logits/chosen": 393.1634826660156,
"logits/rejected": 376.97198486328125,
"logps/chosen": -0.7517871856689453,
"logps/rejected": -1.3418635129928589,
"loss": 1.0677,
"nll_loss": 1.063118577003479,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.037589360028505325,
"rewards/margins": 0.02950381301343441,
"rewards/rejected": -0.06709317117929459,
"step": 215
},
{
"epoch": 1.3114754098360657,
"grad_norm": 14.424154281616211,
"learning_rate": 3.3709993123162106e-06,
"log_odds_chosen": 0.6680114269256592,
"log_odds_ratio": -0.5037292242050171,
"logits/chosen": 385.2915344238281,
"logits/rejected": 379.8268127441406,
"logps/chosen": -0.8324653506278992,
"logps/rejected": -1.1821435689926147,
"loss": 1.071,
"nll_loss": 1.0840386152267456,
"rewards/accuracies": 0.800000011920929,
"rewards/chosen": -0.04162326827645302,
"rewards/margins": 0.0174839086830616,
"rewards/rejected": -0.05910717695951462,
"step": 220
},
{
"epoch": 1.3412816691505216,
"grad_norm": 24.496023178100586,
"learning_rate": 3.3333333333333333e-06,
"log_odds_chosen": 0.5463358759880066,
"log_odds_ratio": -0.5178000926971436,
"logits/chosen": 381.60198974609375,
"logits/rejected": 374.200439453125,
"logps/chosen": -0.8569077253341675,
"logps/rejected": -1.1593918800354004,
"loss": 1.0304,
"nll_loss": 1.1032346487045288,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.042845387011766434,
"rewards/margins": 0.015124207362532616,
"rewards/rejected": -0.0579695925116539,
"step": 225
},
{
"epoch": 1.3710879284649775,
"grad_norm": 31.743003845214844,
"learning_rate": 3.296902366978936e-06,
"log_odds_chosen": 1.1322697401046753,
"log_odds_ratio": -0.3533535599708557,
"logits/chosen": 353.97186279296875,
"logits/rejected": 374.7437438964844,
"logps/chosen": -0.5964599251747131,
"logps/rejected": -1.2119852304458618,
"loss": 1.0402,
"nll_loss": 0.9073736071586609,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.029822995886206627,
"rewards/margins": 0.030776266008615494,
"rewards/rejected": -0.06059925630688667,
"step": 230
},
{
"epoch": 1.4008941877794336,
"grad_norm": 23.891324996948242,
"learning_rate": 3.2616403652672114e-06,
"log_odds_chosen": 1.1859080791473389,
"log_odds_ratio": -0.37409111857414246,
"logits/chosen": 381.7622985839844,
"logits/rejected": 395.0599365234375,
"logps/chosen": -0.6458351016044617,
"logps/rejected": -1.346355676651001,
"loss": 1.0587,
"nll_loss": 0.9488533735275269,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.032291755080223083,
"rewards/margins": 0.035026032477617264,
"rewards/rejected": -0.06731779128313065,
"step": 235
},
{
"epoch": 1.4307004470938898,
"grad_norm": 16.38582992553711,
"learning_rate": 3.2274861218395142e-06,
"log_odds_chosen": 0.7762764692306519,
"log_odds_ratio": -0.43844375014305115,
"logits/chosen": 407.67388916015625,
"logits/rejected": 413.35260009765625,
"logps/chosen": -0.7236464619636536,
"logps/rejected": -1.1575326919555664,
"loss": 1.0752,
"nll_loss": 1.0268566608428955,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.03618232533335686,
"rewards/margins": 0.02169431373476982,
"rewards/rejected": -0.05787663906812668,
"step": 240
},
{
"epoch": 1.4605067064083457,
"grad_norm": 16.295490264892578,
"learning_rate": 3.1943828249997e-06,
"log_odds_chosen": 0.9785711169242859,
"log_odds_ratio": -0.4029998779296875,
"logits/chosen": 400.16632080078125,
"logits/rejected": 388.1484069824219,
"logps/chosen": -0.6374613642692566,
"logps/rejected": -1.146707534790039,
"loss": 1.0837,
"nll_loss": 1.123439073562622,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03187306597828865,
"rewards/margins": 0.025462310761213303,
"rewards/rejected": -0.05733537673950195,
"step": 245
},
{
"epoch": 1.4903129657228018,
"grad_norm": 22.652774810791016,
"learning_rate": 3.1622776601683796e-06,
"log_odds_chosen": 1.0432734489440918,
"log_odds_ratio": -0.4298950135707855,
"logits/chosen": 374.0715637207031,
"logits/rejected": 381.5113830566406,
"logps/chosen": -0.6628987193107605,
"logps/rejected": -1.2346137762069702,
"loss": 0.9864,
"nll_loss": 0.9000906944274902,
"rewards/accuracies": 0.8500000238418579,
"rewards/chosen": -0.03314493969082832,
"rewards/margins": 0.028585752472281456,
"rewards/rejected": -0.06173068284988403,
"step": 250
},
{
"epoch": 1.520119225037258,
"grad_norm": 15.01534652709961,
"learning_rate": 3.131121455425748e-06,
"log_odds_chosen": 1.0844942331314087,
"log_odds_ratio": -0.34810084104537964,
"logits/chosen": 391.9859924316406,
"logits/rejected": 394.78021240234375,
"logps/chosen": -0.5884779095649719,
"logps/rejected": -1.1623605489730835,
"loss": 1.0497,
"nll_loss": 0.9377425312995911,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.029423898085951805,
"rewards/margins": 0.028694134205579758,
"rewards/rejected": -0.05811803415417671,
"step": 255
},
{
"epoch": 1.5499254843517138,
"grad_norm": 22.26698112487793,
"learning_rate": 3.1008683647302113e-06,
"log_odds_chosen": 0.9070035815238953,
"log_odds_ratio": -0.43072786927223206,
"logits/chosen": 372.50006103515625,
"logits/rejected": 414.58331298828125,
"logps/chosen": -0.763823926448822,
"logps/rejected": -1.341328740119934,
"loss": 1.043,
"nll_loss": 1.0060240030288696,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.03819119185209274,
"rewards/margins": 0.028875242918729782,
"rewards/rejected": -0.06706643104553223,
"step": 260
},
{
"epoch": 1.5797317436661698,
"grad_norm": 14.599881172180176,
"learning_rate": 3.0714755841697565e-06,
"log_odds_chosen": 1.0877039432525635,
"log_odds_ratio": -0.43615055084228516,
"logits/chosen": 384.4775390625,
"logits/rejected": 406.6970520019531,
"logps/chosen": -0.6974985003471375,
"logps/rejected": -1.3204139471054077,
"loss": 1.098,
"nll_loss": 1.024665117263794,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03487492725253105,
"rewards/margins": 0.031145762652158737,
"rewards/rejected": -0.06602068990468979,
"step": 265
},
{
"epoch": 1.6095380029806259,
"grad_norm": 17.716583251953125,
"learning_rate": 3.0429030972509227e-06,
"log_odds_chosen": 0.9025327563285828,
"log_odds_ratio": -0.4233691692352295,
"logits/chosen": 367.71807861328125,
"logits/rejected": 379.2008361816406,
"logps/chosen": -0.777201771736145,
"logps/rejected": -1.2777531147003174,
"loss": 1.0837,
"nll_loss": 1.1377698183059692,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.03886008635163307,
"rewards/margins": 0.025027573108673096,
"rewards/rejected": -0.06388765573501587,
"step": 270
},
{
"epoch": 1.639344262295082,
"grad_norm": 14.134200096130371,
"learning_rate": 3.0151134457776365e-06,
"log_odds_chosen": 0.8205320239067078,
"log_odds_ratio": -0.44056087732315063,
"logits/chosen": 360.33575439453125,
"logits/rejected": 350.024169921875,
"logps/chosen": -0.6577683687210083,
"logps/rejected": -1.066030740737915,
"loss": 1.0837,
"nll_loss": 1.0101639032363892,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.032888419926166534,
"rewards/margins": 0.020413123071193695,
"rewards/rejected": -0.05330154299736023,
"step": 275
},
{
"epoch": 1.669150521609538,
"grad_norm": 15.517395973205566,
"learning_rate": 2.988071523335984e-06,
"log_odds_chosen": 0.7949902415275574,
"log_odds_ratio": -0.5562250018119812,
"logits/chosen": 404.2984313964844,
"logits/rejected": 391.6941833496094,
"logps/chosen": -0.7360959649085999,
"logps/rejected": -1.1831490993499756,
"loss": 1.0486,
"nll_loss": 1.0734833478927612,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03680479899048805,
"rewards/margins": 0.022352661937475204,
"rewards/rejected": -0.05915746092796326,
"step": 280
},
{
"epoch": 1.698956780923994,
"grad_norm": 14.143935203552246,
"learning_rate": 2.961744388795462e-06,
"log_odds_chosen": 0.9420916438102722,
"log_odds_ratio": -0.42187291383743286,
"logits/chosen": 367.45843505859375,
"logits/rejected": 374.1835632324219,
"logps/chosen": -0.6173609495162964,
"logps/rejected": -1.1151915788650513,
"loss": 0.996,
"nll_loss": 0.9254717826843262,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03086804784834385,
"rewards/margins": 0.024891531094908714,
"rewards/rejected": -0.05575958639383316,
"step": 285
},
{
"epoch": 1.7287630402384502,
"grad_norm": 13.992819786071777,
"learning_rate": 2.9361010975735177e-06,
"log_odds_chosen": 0.9273719787597656,
"log_odds_ratio": -0.39941272139549255,
"logits/chosen": 386.17742919921875,
"logits/rejected": 424.8526306152344,
"logps/chosen": -0.7709314227104187,
"logps/rejected": -1.294065237045288,
"loss": 1.0527,
"nll_loss": 0.9949714541435242,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.038546573370695114,
"rewards/margins": 0.026156682521104813,
"rewards/rejected": -0.06470325589179993,
"step": 290
},
{
"epoch": 1.758569299552906,
"grad_norm": 15.243948936462402,
"learning_rate": 2.9111125486979104e-06,
"log_odds_chosen": 0.7636137008666992,
"log_odds_ratio": -0.4647112786769867,
"logits/chosen": 361.9948425292969,
"logits/rejected": 406.70654296875,
"logps/chosen": -0.7253848314285278,
"logps/rejected": -1.145918607711792,
"loss": 1.0847,
"nll_loss": 1.016174554824829,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.03626924008131027,
"rewards/margins": 0.021026695147156715,
"rewards/rejected": -0.05729593709111214,
"step": 295
},
{
"epoch": 1.788375558867362,
"grad_norm": 23.890466690063477,
"learning_rate": 2.8867513459481293e-06,
"log_odds_chosen": 1.2909433841705322,
"log_odds_ratio": -0.3190842270851135,
"logits/chosen": 403.19427490234375,
"logits/rejected": 380.4273986816406,
"logps/chosen": -0.6161251068115234,
"logps/rejected": -1.2782180309295654,
"loss": 0.9952,
"nll_loss": 0.9254310727119446,
"rewards/accuracies": 0.8999999761581421,
"rewards/chosen": -0.030806254595518112,
"rewards/margins": 0.03310465067625046,
"rewards/rejected": -0.06391090154647827,
"step": 300
},
{
"epoch": 1.8181818181818183,
"grad_norm": 16.844104766845703,
"learning_rate": 2.862991671569341e-06,
"log_odds_chosen": 0.5357767939567566,
"log_odds_ratio": -0.5353686213493347,
"logits/chosen": 395.70831298828125,
"logits/rejected": 405.61749267578125,
"logps/chosen": -0.9245316386222839,
"logps/rejected": -1.2031428813934326,
"loss": 1.0432,
"nll_loss": 1.1699957847595215,
"rewards/accuracies": 0.824999988079071,
"rewards/chosen": -0.046226583421230316,
"rewards/margins": 0.013930551707744598,
"rewards/rejected": -0.060157131403684616,
"step": 305
},
{
"epoch": 1.8479880774962743,
"grad_norm": 14.692316055297852,
"learning_rate": 2.839809171235324e-06,
"log_odds_chosen": 1.0770504474639893,
"log_odds_ratio": -0.42079129815101624,
"logits/chosen": 377.4819030761719,
"logits/rejected": 387.6199645996094,
"logps/chosen": -0.7239227294921875,
"logps/rejected": -1.3824554681777954,
"loss": 1.0884,
"nll_loss": 1.0769283771514893,
"rewards/accuracies": 0.7749999761581421,
"rewards/chosen": -0.036196134984493256,
"rewards/margins": 0.032926641404628754,
"rewards/rejected": -0.06912277638912201,
"step": 310
},
{
"epoch": 1.8777943368107302,
"grad_norm": 15.1817045211792,
"learning_rate": 2.817180849095055e-06,
"log_odds_chosen": 0.5459250807762146,
"log_odds_ratio": -0.5598369240760803,
"logits/chosen": 352.6174621582031,
"logits/rejected": 371.89764404296875,
"logps/chosen": -0.9762029647827148,
"logps/rejected": -1.3525390625,
"loss": 1.0938,
"nll_loss": 1.238140344619751,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.04881014674901962,
"rewards/margins": 0.018816810101270676,
"rewards/rejected": -0.0676269605755806,
"step": 315
},
{
"epoch": 1.9076005961251863,
"grad_norm": 17.332054138183594,
"learning_rate": 2.7950849718747376e-06,
"log_odds_chosen": 1.1397926807403564,
"log_odds_ratio": -0.36622655391693115,
"logits/chosen": 373.9564514160156,
"logits/rejected": 395.34271240234375,
"logps/chosen": -0.6329408884048462,
"logps/rejected": -1.2445515394210815,
"loss": 0.9928,
"nll_loss": 0.9283340573310852,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.03164704144001007,
"rewards/margins": 0.030580539256334305,
"rewards/rejected": -0.062227584421634674,
"step": 320
},
{
"epoch": 1.9374068554396424,
"grad_norm": 20.475017547607422,
"learning_rate": 2.773500981126146e-06,
"log_odds_chosen": 1.1559429168701172,
"log_odds_ratio": -0.3606329560279846,
"logits/chosen": 372.6563720703125,
"logits/rejected": 405.1517333984375,
"logps/chosen": -0.6990076303482056,
"logps/rejected": -1.3749182224273682,
"loss": 1.0121,
"nll_loss": 0.9322077631950378,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.0349503830075264,
"rewards/margins": 0.03379552438855171,
"rewards/rejected": -0.06874591112136841,
"step": 325
},
{
"epoch": 1.9672131147540983,
"grad_norm": 20.384191513061523,
"learning_rate": 2.752409412815902e-06,
"log_odds_chosen": 0.8144651651382446,
"log_odds_ratio": -0.4188029170036316,
"logits/chosen": 367.2298889160156,
"logits/rejected": 376.0736083984375,
"logps/chosen": -0.7355102896690369,
"logps/rejected": -1.211102843284607,
"loss": 1.0378,
"nll_loss": 0.8931636810302734,
"rewards/accuracies": 0.875,
"rewards/chosen": -0.036775510758161545,
"rewards/margins": 0.023779626935720444,
"rewards/rejected": -0.06055514141917229,
"step": 330
},
{
"epoch": 1.9970193740685542,
"grad_norm": 15.695927619934082,
"learning_rate": 2.7317918235407652e-06,
"log_odds_chosen": 0.5675193071365356,
"log_odds_ratio": -0.5574907660484314,
"logits/chosen": 395.9285888671875,
"logits/rejected": 387.2447204589844,
"logps/chosen": -0.9066513776779175,
"logps/rejected": -1.2281653881072998,
"loss": 1.0908,
"nll_loss": 1.2198901176452637,
"rewards/accuracies": 0.699999988079071,
"rewards/chosen": -0.045332565903663635,
"rewards/margins": 0.016075702384114265,
"rewards/rejected": -0.06140827015042305,
"step": 335
},
{
"epoch": 1.9970193740685542,
"eval_log_odds_chosen": 0.2359991818666458,
"eval_log_odds_ratio": -0.6970126628875732,
"eval_logits/chosen": 314.6778564453125,
"eval_logits/rejected": 285.82061767578125,
"eval_logps/chosen": -0.9949654936790466,
"eval_logps/rejected": -1.1527600288391113,
"eval_loss": 1.4250013828277588,
"eval_nll_loss": 1.3697166442871094,
"eval_rewards/accuracies": 0.5323740839958191,
"eval_rewards/chosen": -0.04974827170372009,
"eval_rewards/margins": 0.00788972433656454,
"eval_rewards/rejected": -0.05763799697160721,
"eval_runtime": 112.2726,
"eval_samples_per_second": 4.926,
"eval_steps_per_second": 1.238,
"step": 335
},
{
"epoch": 2.0268256333830106,
"grad_norm": 21.729570388793945,
"learning_rate": 2.711630722733202e-06,
"log_odds_chosen": 2.0113790035247803,
"log_odds_ratio": -0.19709806144237518,
"logits/chosen": 389.3846435546875,
"logits/rejected": 366.3945617675781,
"logps/chosen": -0.38005977869033813,
"logps/rejected": -1.4012727737426758,
"loss": 0.6433,
"nll_loss": 0.6980705261230469,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.019002988934516907,
"rewards/margins": 0.051060646772384644,
"rewards/rejected": -0.07006363570690155,
"step": 340
},
{
"epoch": 2.0566318926974665,
"grad_norm": 11.89656925201416,
"learning_rate": 2.691909510290828e-06,
"log_odds_chosen": 2.5525763034820557,
"log_odds_ratio": -0.12284793704748154,
"logits/chosen": 351.57080078125,
"logits/rejected": 357.44329833984375,
"logps/chosen": -0.3399081528186798,
"logps/rejected": -1.6293659210205078,
"loss": 0.5495,
"nll_loss": 0.5662155151367188,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.01699540950357914,
"rewards/margins": 0.06447288393974304,
"rewards/rejected": -0.08146829158067703,
"step": 345
},
{
"epoch": 2.0864381520119224,
"grad_norm": 13.419454574584961,
"learning_rate": 2.6726124191242444e-06,
"log_odds_chosen": 2.548877716064453,
"log_odds_ratio": -0.11839280277490616,
"logits/chosen": 350.29986572265625,
"logits/rejected": 386.45709228515625,
"logps/chosen": -0.382639080286026,
"logps/rejected": -1.8921934366226196,
"loss": 0.5743,
"nll_loss": 0.5715562105178833,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01913195475935936,
"rewards/margins": 0.0754777267575264,
"rewards/rejected": -0.09460968524217606,
"step": 350
},
{
"epoch": 2.1162444113263787,
"grad_norm": 13.355463027954102,
"learning_rate": 2.6537244621713765e-06,
"log_odds_chosen": 2.2259714603424072,
"log_odds_ratio": -0.15891632437705994,
"logits/chosen": 352.84619140625,
"logits/rejected": 371.22576904296875,
"logps/chosen": -0.37806540727615356,
"logps/rejected": -1.5315955877304077,
"loss": 0.5507,
"nll_loss": 0.6317521333694458,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018903274089097977,
"rewards/margins": 0.05767650529742241,
"rewards/rejected": -0.07657978683710098,
"step": 355
},
{
"epoch": 2.1460506706408347,
"grad_norm": 10.8477201461792,
"learning_rate": 2.6352313834736496e-06,
"log_odds_chosen": 2.581636428833008,
"log_odds_ratio": -0.1250651776790619,
"logits/chosen": 353.0003356933594,
"logits/rejected": 398.9602355957031,
"logps/chosen": -0.3573206067085266,
"logps/rejected": -1.6087188720703125,
"loss": 0.5407,
"nll_loss": 0.5504949688911438,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01786603033542633,
"rewards/margins": 0.06256992369890213,
"rewards/rejected": -0.08043594658374786,
"step": 360
},
{
"epoch": 2.1758569299552906,
"grad_norm": 12.167034149169922,
"learning_rate": 2.6171196129510684e-06,
"log_odds_chosen": 1.9800822734832764,
"log_odds_ratio": -0.16938333213329315,
"logits/chosen": 341.21527099609375,
"logits/rejected": 329.54119873046875,
"logps/chosen": -0.348991334438324,
"logps/rejected": -1.3196141719818115,
"loss": 0.5516,
"nll_loss": 0.5312565565109253,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.017449568957090378,
"rewards/margins": 0.04853113740682602,
"rewards/rejected": -0.0659807100892067,
"step": 365
},
{
"epoch": 2.2056631892697465,
"grad_norm": 15.195405960083008,
"learning_rate": 2.599376224550182e-06,
"log_odds_chosen": 2.0713467597961426,
"log_odds_ratio": -0.19306516647338867,
"logits/chosen": 316.6725769042969,
"logits/rejected": 339.6087646484375,
"logps/chosen": -0.36510804295539856,
"logps/rejected": -1.4302679300308228,
"loss": 0.5732,
"nll_loss": 0.5869459509849548,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.01825539954006672,
"rewards/margins": 0.05325800180435181,
"rewards/rejected": -0.07151339948177338,
"step": 370
},
{
"epoch": 2.235469448584203,
"grad_norm": 12.842897415161133,
"learning_rate": 2.5819888974716113e-06,
"log_odds_chosen": 1.9603370428085327,
"log_odds_ratio": -0.18798741698265076,
"logits/chosen": 368.00836181640625,
"logits/rejected": 389.7608337402344,
"logps/chosen": -0.4214121699333191,
"logps/rejected": -1.4475972652435303,
"loss": 0.5831,
"nll_loss": 0.6068717241287231,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.021070610731840134,
"rewards/margins": 0.051309265196323395,
"rewards/rejected": -0.07237987220287323,
"step": 375
},
{
"epoch": 2.2652757078986587,
"grad_norm": 15.618853569030762,
"learning_rate": 2.564945880212886e-06,
"log_odds_chosen": 2.2622876167297363,
"log_odds_ratio": -0.1320658028125763,
"logits/chosen": 366.3780517578125,
"logits/rejected": 351.96820068359375,
"logps/chosen": -0.3000200688838959,
"logps/rejected": -1.3632047176361084,
"loss": 0.5571,
"nll_loss": 0.525825560092926,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.015001003630459309,
"rewards/margins": 0.05315924435853958,
"rewards/rejected": -0.06816024333238602,
"step": 380
},
{
"epoch": 2.2950819672131146,
"grad_norm": 13.373687744140625,
"learning_rate": 2.5482359571881276e-06,
"log_odds_chosen": 2.5866951942443848,
"log_odds_ratio": -0.11987988650798798,
"logits/chosen": 358.47344970703125,
"logits/rejected": 352.4609375,
"logps/chosen": -0.283217191696167,
"logps/rejected": -1.4752601385116577,
"loss": 0.5301,
"nll_loss": 0.49565237760543823,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.014160861261188984,
"rewards/margins": 0.059602152556180954,
"rewards/rejected": -0.07376301288604736,
"step": 385
},
{
"epoch": 2.3248882265275705,
"grad_norm": 11.959152221679688,
"learning_rate": 2.5318484177091667e-06,
"log_odds_chosen": 2.3983092308044434,
"log_odds_ratio": -0.11385631561279297,
"logits/chosen": 370.3407287597656,
"logits/rejected": 393.58978271484375,
"logps/chosen": -0.36266201734542847,
"logps/rejected": -1.6288502216339111,
"loss": 0.578,
"nll_loss": 0.5790597200393677,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.018133098259568214,
"rewards/margins": 0.06330940872430801,
"rewards/rejected": -0.08144249767065048,
"step": 390
},
{
"epoch": 2.354694485842027,
"grad_norm": 11.902227401733398,
"learning_rate": 2.515773027133138e-06,
"log_odds_chosen": 2.4830586910247803,
"log_odds_ratio": -0.13829158246517181,
"logits/chosen": 369.2203063964844,
"logits/rejected": 362.56298828125,
"logps/chosen": -0.2860831320285797,
"logps/rejected": -1.3531745672225952,
"loss": 0.5233,
"nll_loss": 0.48577412962913513,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01430415641516447,
"rewards/margins": 0.053354568779468536,
"rewards/rejected": -0.06765872985124588,
"step": 395
},
{
"epoch": 2.384500745156483,
"grad_norm": 18.2595157623291,
"learning_rate": 2.5e-06,
"log_odds_chosen": 2.4875540733337402,
"log_odds_ratio": -0.13931448757648468,
"logits/chosen": 366.81646728515625,
"logits/rejected": 388.4540710449219,
"logps/chosen": -0.3392675817012787,
"logps/rejected": -1.6781524419784546,
"loss": 0.5707,
"nll_loss": 0.5266181826591492,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.016963381320238113,
"rewards/margins": 0.06694425642490387,
"rewards/rejected": -0.08390761911869049,
"step": 400
},
{
"epoch": 2.4143070044709387,
"grad_norm": 10.78487777709961,
"learning_rate": 2.484519974999767e-06,
"log_odds_chosen": 2.3641769886016846,
"log_odds_ratio": -0.18085625767707825,
"logits/chosen": 417.9383850097656,
"logits/rejected": 384.9745178222656,
"logps/chosen": -0.36932411789894104,
"logps/rejected": -1.5650533437728882,
"loss": 0.5707,
"nll_loss": 0.5322312712669373,
"rewards/accuracies": 0.925000011920929,
"rewards/chosen": -0.018466206267476082,
"rewards/margins": 0.05978646129369736,
"rewards/rejected": -0.07825267314910889,
"step": 405
},
{
"epoch": 2.444113263785395,
"grad_norm": 12.914924621582031,
"learning_rate": 2.4693239916239746e-06,
"log_odds_chosen": 2.4095664024353027,
"log_odds_ratio": -0.17002181708812714,
"logits/chosen": 363.0850830078125,
"logits/rejected": 378.43634033203125,
"logps/chosen": -0.3721050024032593,
"logps/rejected": -1.5407812595367432,
"loss": 0.5689,
"nll_loss": 0.5765537619590759,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018605249002575874,
"rewards/margins": 0.058433812111616135,
"rewards/rejected": -0.07703907042741776,
"step": 410
},
{
"epoch": 2.473919523099851,
"grad_norm": 11.604476928710938,
"learning_rate": 2.4544034683690802e-06,
"log_odds_chosen": 2.4141106605529785,
"log_odds_ratio": -0.13905009627342224,
"logits/chosen": 363.8720703125,
"logits/rejected": 393.9859924316406,
"logps/chosen": -0.32817938923835754,
"logps/rejected": -1.5454423427581787,
"loss": 0.5702,
"nll_loss": 0.5272970795631409,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.016408968716859818,
"rewards/margins": 0.06086314842104912,
"rewards/rejected": -0.07727211713790894,
"step": 415
},
{
"epoch": 2.503725782414307,
"grad_norm": 11.285563468933105,
"learning_rate": 2.4397501823713327e-06,
"log_odds_chosen": 2.0902717113494873,
"log_odds_ratio": -0.18547013401985168,
"logits/chosen": 364.81866455078125,
"logits/rejected": 342.7242736816406,
"logps/chosen": -0.3733817934989929,
"logps/rejected": -1.4410852193832397,
"loss": 0.56,
"nll_loss": 0.6532183885574341,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018669091165065765,
"rewards/margins": 0.0533851683139801,
"rewards/rejected": -0.07205425947904587,
"step": 420
},
{
"epoch": 2.533532041728763,
"grad_norm": 12.517095565795898,
"learning_rate": 2.4253562503633297e-06,
"log_odds_chosen": 2.795741081237793,
"log_odds_ratio": -0.08831789344549179,
"logits/chosen": 362.86871337890625,
"logits/rejected": 359.4671630859375,
"logps/chosen": -0.32691091299057007,
"logps/rejected": -1.8046060800552368,
"loss": 0.5335,
"nll_loss": 0.5374017357826233,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.016345545649528503,
"rewards/margins": 0.0738847479224205,
"rewards/rejected": -0.0902303010225296,
"step": 425
},
{
"epoch": 2.563338301043219,
"grad_norm": 13.33828353881836,
"learning_rate": 2.411214110852061e-06,
"log_odds_chosen": 2.7160139083862305,
"log_odds_ratio": -0.10933760553598404,
"logits/chosen": 362.9604187011719,
"logits/rejected": 374.8692626953125,
"logps/chosen": -0.27513235807418823,
"logps/rejected": -1.615644097328186,
"loss": 0.5522,
"nll_loss": 0.48540863394737244,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01375661976635456,
"rewards/margins": 0.06702558696269989,
"rewards/rejected": -0.0807822048664093,
"step": 430
},
{
"epoch": 2.593144560357675,
"grad_norm": 15.951871871948242,
"learning_rate": 2.3973165074269213e-06,
"log_odds_chosen": 2.399064779281616,
"log_odds_ratio": -0.150381401181221,
"logits/chosen": 368.9129943847656,
"logits/rejected": 337.7628173828125,
"logps/chosen": -0.3689618408679962,
"logps/rejected": -1.5598814487457275,
"loss": 0.5514,
"nll_loss": 0.5270097255706787,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01844809204339981,
"rewards/margins": 0.059545982629060745,
"rewards/rejected": -0.07799407094717026,
"step": 435
},
{
"epoch": 2.6229508196721314,
"grad_norm": 12.57322883605957,
"learning_rate": 2.3836564731139807e-06,
"log_odds_chosen": 2.7293245792388916,
"log_odds_ratio": -0.10528914630413055,
"logits/chosen": 354.0178527832031,
"logits/rejected": 365.93829345703125,
"logps/chosen": -0.2712039351463318,
"logps/rejected": -1.582219123840332,
"loss": 0.5703,
"nll_loss": 0.5448659062385559,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.013560195453464985,
"rewards/margins": 0.06555076688528061,
"rewards/rejected": -0.07911095768213272,
"step": 440
},
{
"epoch": 2.6527570789865873,
"grad_norm": 13.282082557678223,
"learning_rate": 2.3702273156998867e-06,
"log_odds_chosen": 2.619792938232422,
"log_odds_ratio": -0.10272769629955292,
"logits/chosen": 335.6366271972656,
"logits/rejected": 372.410400390625,
"logps/chosen": -0.36089158058166504,
"logps/rejected": -1.8113043308258057,
"loss": 0.5563,
"nll_loss": 0.5579748749732971,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.018044577911496162,
"rewards/margins": 0.07252063602209091,
"rewards/rejected": -0.09056521207094193,
"step": 445
},
{
"epoch": 2.682563338301043,
"grad_norm": 10.777383804321289,
"learning_rate": 2.357022603955159e-06,
"log_odds_chosen": 2.4564261436462402,
"log_odds_ratio": -0.11157449334859848,
"logits/chosen": 362.14312744140625,
"logits/rejected": 363.303466796875,
"logps/chosen": -0.39076924324035645,
"logps/rejected": -1.7145166397094727,
"loss": 0.5754,
"nll_loss": 0.5376263856887817,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.019538460299372673,
"rewards/margins": 0.06618736684322357,
"rewards/rejected": -0.0857258215546608,
"step": 450
},
{
"epoch": 2.712369597615499,
"grad_norm": 12.512327194213867,
"learning_rate": 2.3440361546924774e-06,
"log_odds_chosen": 2.614637613296509,
"log_odds_ratio": -0.11486033350229263,
"logits/chosen": 395.16949462890625,
"logits/rejected": 374.3088684082031,
"logps/chosen": -0.3622822165489197,
"logps/rejected": -1.6618531942367554,
"loss": 0.6153,
"nll_loss": 0.568195641040802,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018114114180207253,
"rewards/margins": 0.06497855484485626,
"rewards/rejected": -0.08309266716241837,
"step": 455
},
{
"epoch": 2.742175856929955,
"grad_norm": 12.090532302856445,
"learning_rate": 2.3312620206007847e-06,
"log_odds_chosen": 2.508338451385498,
"log_odds_ratio": -0.1204490214586258,
"logits/chosen": 382.52630615234375,
"logits/rejected": 401.80841064453125,
"logps/chosen": -0.3474404215812683,
"logps/rejected": -1.7473865747451782,
"loss": 0.5838,
"nll_loss": 0.6167483925819397,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.017372019588947296,
"rewards/margins": 0.06999730318784714,
"rewards/rejected": -0.08736933022737503,
"step": 460
},
{
"epoch": 2.7719821162444114,
"grad_norm": 13.27834701538086,
"learning_rate": 2.3186944788008413e-06,
"log_odds_chosen": 2.5867724418640137,
"log_odds_ratio": -0.14203417301177979,
"logits/chosen": 376.5874328613281,
"logits/rejected": 381.06341552734375,
"logps/chosen": -0.2869132459163666,
"logps/rejected": -1.5630210638046265,
"loss": 0.5778,
"nll_loss": 0.55084627866745,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.014345663599669933,
"rewards/margins": 0.06380538642406464,
"rewards/rejected": -0.07815105468034744,
"step": 465
},
{
"epoch": 2.8017883755588673,
"grad_norm": 11.784134864807129,
"learning_rate": 2.3063280200722128e-06,
"log_odds_chosen": 2.1283843517303467,
"log_odds_ratio": -0.20095142722129822,
"logits/chosen": 383.31781005859375,
"logits/rejected": 354.9120788574219,
"logps/chosen": -0.40080317854881287,
"logps/rejected": -1.5093116760253906,
"loss": 0.5644,
"nll_loss": 0.575947642326355,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.020040160045027733,
"rewards/margins": 0.05542542785406113,
"rewards/rejected": -0.07546558976173401,
"step": 470
},
{
"epoch": 2.8315946348733236,
"grad_norm": 13.008294105529785,
"learning_rate": 2.2941573387056174e-06,
"log_odds_chosen": 2.6808362007141113,
"log_odds_ratio": -0.10760221630334854,
"logits/chosen": 350.5984802246094,
"logits/rejected": 374.9319152832031,
"logps/chosen": -0.34488445520401,
"logps/rejected": -1.7149194478988647,
"loss": 0.5386,
"nll_loss": 0.491553395986557,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.01724422350525856,
"rewards/margins": 0.06850175559520721,
"rewards/rejected": -0.08574597537517548,
"step": 475
},
{
"epoch": 2.8614008941877795,
"grad_norm": 10.424010276794434,
"learning_rate": 2.2821773229381924e-06,
"log_odds_chosen": 2.2412309646606445,
"log_odds_ratio": -0.1566620171070099,
"logits/chosen": 362.31378173828125,
"logits/rejected": 402.6854248046875,
"logps/chosen": -0.3766781687736511,
"logps/rejected": -1.4856204986572266,
"loss": 0.5052,
"nll_loss": 0.48381978273391724,
"rewards/accuracies": 0.949999988079071,
"rewards/chosen": -0.018833911046385765,
"rewards/margins": 0.055447112768888474,
"rewards/rejected": -0.07428102195262909,
"step": 480
},
{
"epoch": 2.8912071535022354,
"grad_norm": 12.359146118164062,
"learning_rate": 2.270383045932499e-06,
"log_odds_chosen": 2.6057076454162598,
"log_odds_ratio": -0.12701039016246796,
"logits/chosen": 357.164306640625,
"logits/rejected": 380.32073974609375,
"logps/chosen": -0.37163636088371277,
"logps/rejected": -1.8207753896713257,
"loss": 0.5419,
"nll_loss": 0.5325015187263489,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.01858181692659855,
"rewards/margins": 0.07245694845914841,
"rewards/rejected": -0.0910387635231018,
"step": 485
},
{
"epoch": 2.9210134128166914,
"grad_norm": 11.646001815795898,
"learning_rate": 2.2587697572631284e-06,
"log_odds_chosen": 2.3249075412750244,
"log_odds_ratio": -0.19486014544963837,
"logits/chosen": 372.1253967285156,
"logits/rejected": 338.1502380371094,
"logps/chosen": -0.4259433150291443,
"logps/rejected": -1.5797032117843628,
"loss": 0.6087,
"nll_loss": 0.5371652245521545,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.021297167986631393,
"rewards/margins": 0.05768799036741257,
"rewards/rejected": -0.07898515462875366,
"step": 490
},
{
"epoch": 2.9508196721311473,
"grad_norm": 11.838138580322266,
"learning_rate": 2.2473328748774737e-06,
"log_odds_chosen": 2.3507559299468994,
"log_odds_ratio": -0.1578751504421234,
"logits/chosen": 366.9432373046875,
"logits/rejected": 394.8822326660156,
"logps/chosen": -0.3771159052848816,
"logps/rejected": -1.533601999282837,
"loss": 0.5442,
"nll_loss": 0.5532703399658203,
"rewards/accuracies": 0.9750000238418579,
"rewards/chosen": -0.018855798989534378,
"rewards/margins": 0.05782430246472359,
"rewards/rejected": -0.07668010145425797,
"step": 495
},
{
"epoch": 2.9806259314456036,
"grad_norm": 13.802445411682129,
"learning_rate": 2.23606797749979e-06,
"log_odds_chosen": 2.5029566287994385,
"log_odds_ratio": -0.12695619463920593,
"logits/chosen": 374.8814697265625,
"logits/rejected": 372.7264099121094,
"logps/chosen": -0.32484811544418335,
"logps/rejected": -1.5648537874221802,
"loss": 0.5724,
"nll_loss": 0.47206535935401917,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.016242407262325287,
"rewards/margins": 0.062000274658203125,
"rewards/rejected": -0.07824268192052841,
"step": 500
},
{
"epoch": 2.9865871833084947,
"eval_log_odds_chosen": 0.2937372922897339,
"eval_log_odds_ratio": -0.6945178508758545,
"eval_logits/chosen": 300.6891174316406,
"eval_logits/rejected": 271.8756103515625,
"eval_logps/chosen": -1.0802680253982544,
"eval_logps/rejected": -1.2502641677856445,
"eval_loss": 1.539820671081543,
"eval_nll_loss": 1.4724125862121582,
"eval_rewards/accuracies": 0.5395683646202087,
"eval_rewards/chosen": -0.05401340499520302,
"eval_rewards/margins": 0.00849980115890503,
"eval_rewards/rejected": -0.06251321732997894,
"eval_runtime": 112.3165,
"eval_samples_per_second": 4.924,
"eval_steps_per_second": 1.238,
"step": 501
},
{
"epoch": 2.9865871833084947,
"step": 501,
"total_flos": 0.0,
"train_loss": 1.4594077459590402,
"train_runtime": 13816.0738,
"train_samples_per_second": 1.165,
"train_steps_per_second": 0.036
}
],
"logging_steps": 5,
"max_steps": 501,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": false,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}