avemio-digital's picture
Upload 13 files
10682b3 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 15.886524822695035,
"eval_steps": 80,
"global_step": 840,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.6052009456264775,
"grad_norm": 9.684629440307617,
"learning_rate": 1.9047619047619045e-07,
"log_odds_chosen": 0.08431098610162735,
"log_odds_ratio": -0.7315660715103149,
"logits/chosen": -2.4399943351745605,
"logits/rejected": -2.418248414993286,
"logps/chosen": -1.316224455833435,
"logps/rejected": -1.3803966045379639,
"loss": 1.6469,
"nll_loss": 1.5446076393127441,
"rewards/accuracies": 0.54296875,
"rewards/chosen": -0.1974336802959442,
"rewards/margins": 0.009625822305679321,
"rewards/rejected": -0.20705950260162354,
"step": 32
},
{
"epoch": 1.210401891252955,
"grad_norm": 6.682727813720703,
"learning_rate": 3.809523809523809e-07,
"log_odds_chosen": 0.1358582228422165,
"log_odds_ratio": -0.6844438314437866,
"logits/chosen": -2.4866111278533936,
"logits/rejected": -2.473512649536133,
"logps/chosen": -1.2416539192199707,
"logps/rejected": -1.338365912437439,
"loss": 1.5513,
"nll_loss": 1.43682861328125,
"rewards/accuracies": 0.55859375,
"rewards/chosen": -0.1862480789422989,
"rewards/margins": 0.01450679823756218,
"rewards/rejected": -0.20075488090515137,
"step": 64
},
{
"epoch": 1.5130023640661938,
"eval_log_odds_chosen": 1.1365413665771484,
"eval_log_odds_ratio": -0.2909667193889618,
"eval_logits/chosen": -2.8555617332458496,
"eval_logits/rejected": -2.7511401176452637,
"eval_logps/chosen": -1.1086950302124023,
"eval_logps/rejected": -1.990875244140625,
"eval_loss": 1.1764631271362305,
"eval_nll_loss": 1.1883823871612549,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.16630426049232483,
"eval_rewards/margins": 0.13232707977294922,
"eval_rewards/rejected": -0.29863131046295166,
"eval_runtime": 0.7889,
"eval_samples_per_second": 173.657,
"eval_steps_per_second": 6.338,
"step": 80
},
{
"epoch": 1.8156028368794326,
"grad_norm": 5.326328754425049,
"learning_rate": 4.996892303047305e-07,
"log_odds_chosen": 0.18785640597343445,
"log_odds_ratio": -0.6766043901443481,
"logits/chosen": -2.4521989822387695,
"logits/rejected": -2.457139492034912,
"logps/chosen": -1.1742055416107178,
"logps/rejected": -1.3268922567367554,
"loss": 1.4307,
"nll_loss": 1.3236442804336548,
"rewards/accuracies": 0.5390625,
"rewards/chosen": -0.17613083124160767,
"rewards/margins": 0.022903023287653923,
"rewards/rejected": -0.19903387129306793,
"step": 96
},
{
"epoch": 2.42080378250591,
"grad_norm": 6.345442295074463,
"learning_rate": 4.958326378681848e-07,
"log_odds_chosen": 0.20125526189804077,
"log_odds_ratio": -0.6606975793838501,
"logits/chosen": -2.4338855743408203,
"logits/rejected": -2.4032998085021973,
"logps/chosen": -1.2052891254425049,
"logps/rejected": -1.3293428421020508,
"loss": 1.3822,
"nll_loss": 1.3110582828521729,
"rewards/accuracies": 0.609375,
"rewards/chosen": -0.18079334497451782,
"rewards/margins": 0.018608052283525467,
"rewards/rejected": -0.19940140843391418,
"step": 128
},
{
"epoch": 3.0260047281323876,
"grad_norm": 6.9922590255737305,
"learning_rate": 4.876353872369572e-07,
"log_odds_chosen": 0.2849215567111969,
"log_odds_ratio": -0.6226438283920288,
"logits/chosen": -2.28694748878479,
"logits/rejected": -2.2813074588775635,
"logps/chosen": -1.177689552307129,
"logps/rejected": -1.3626967668533325,
"loss": 1.3457,
"nll_loss": 1.2851402759552002,
"rewards/accuracies": 0.68359375,
"rewards/chosen": -0.1766534298658371,
"rewards/margins": 0.027751106768846512,
"rewards/rejected": -0.20440451800823212,
"step": 160
},
{
"epoch": 3.0260047281323876,
"eval_log_odds_chosen": 1.289251685142517,
"eval_log_odds_ratio": -0.2533319592475891,
"eval_logits/chosen": -2.6994073390960693,
"eval_logits/rejected": -2.5944201946258545,
"eval_logps/chosen": -1.1015231609344482,
"eval_logps/rejected": -2.1139886379241943,
"eval_loss": 1.1738542318344116,
"eval_nll_loss": 1.1927688121795654,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1652284860610962,
"eval_rewards/margins": 0.15186984837055206,
"eval_rewards/rejected": -0.31709831953048706,
"eval_runtime": 0.7972,
"eval_samples_per_second": 171.841,
"eval_steps_per_second": 6.272,
"step": 160
},
{
"epoch": 3.631205673758865,
"grad_norm": 6.0752034187316895,
"learning_rate": 4.752422169756047e-07,
"log_odds_chosen": 0.29861366748809814,
"log_odds_ratio": -0.6187925338745117,
"logits/chosen": -2.278367280960083,
"logits/rejected": -2.197380781173706,
"logps/chosen": -1.1402652263641357,
"logps/rejected": -1.323896884918213,
"loss": 1.3117,
"nll_loss": 1.1778795719146729,
"rewards/accuracies": 0.69140625,
"rewards/chosen": -0.17103978991508484,
"rewards/margins": 0.0275447778403759,
"rewards/rejected": -0.19858455657958984,
"step": 192
},
{
"epoch": 4.236406619385343,
"grad_norm": 6.758876800537109,
"learning_rate": 4.588719528532341e-07,
"log_odds_chosen": 0.32968199253082275,
"log_odds_ratio": -0.6064258813858032,
"logits/chosen": -2.2086422443389893,
"logits/rejected": -2.213918447494507,
"logps/chosen": -1.2367851734161377,
"logps/rejected": -1.4456892013549805,
"loss": 1.2914,
"nll_loss": 1.220529317855835,
"rewards/accuracies": 0.703125,
"rewards/chosen": -0.1855177879333496,
"rewards/margins": 0.03133557736873627,
"rewards/rejected": -0.21685336530208588,
"step": 224
},
{
"epoch": 4.539007092198582,
"eval_log_odds_chosen": 1.4739799499511719,
"eval_log_odds_ratio": -0.21903792023658752,
"eval_logits/chosen": -2.6473140716552734,
"eval_logits/rejected": -2.5277247428894043,
"eval_logps/chosen": -1.2001134157180786,
"eval_logps/rejected": -2.410470485687256,
"eval_loss": 1.1219241619110107,
"eval_nll_loss": 1.137636423110962,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18001702427864075,
"eval_rewards/margins": 0.18155357241630554,
"eval_rewards/rejected": -0.3615706264972687,
"eval_runtime": 0.7853,
"eval_samples_per_second": 174.46,
"eval_steps_per_second": 6.367,
"step": 240
},
{
"epoch": 4.84160756501182,
"grad_norm": 6.2288618087768555,
"learning_rate": 4.3881364404463375e-07,
"log_odds_chosen": 0.46286657452583313,
"log_odds_ratio": -0.5630860328674316,
"logits/chosen": -2.216555595397949,
"logits/rejected": -2.1122565269470215,
"logps/chosen": -1.1853437423706055,
"logps/rejected": -1.4803636074066162,
"loss": 1.2727,
"nll_loss": 1.1509523391723633,
"rewards/accuracies": 0.77734375,
"rewards/chosen": -0.17780157923698425,
"rewards/margins": 0.04425298422574997,
"rewards/rejected": -0.222054585814476,
"step": 256
},
{
"epoch": 5.446808510638298,
"grad_norm": 8.837624549865723,
"learning_rate": 4.154214593992149e-07,
"log_odds_chosen": 0.571550726890564,
"log_odds_ratio": -0.5269472002983093,
"logits/chosen": -2.189985990524292,
"logits/rejected": -2.085646152496338,
"logps/chosen": -1.2091223001480103,
"logps/rejected": -1.5827255249023438,
"loss": 1.2582,
"nll_loss": 1.1602920293807983,
"rewards/accuracies": 0.78515625,
"rewards/chosen": -0.18136833608150482,
"rewards/margins": 0.05604049190878868,
"rewards/rejected": -0.237408846616745,
"step": 288
},
{
"epoch": 6.052009456264775,
"grad_norm": 10.696549415588379,
"learning_rate": 3.891084338941603e-07,
"log_odds_chosen": 0.5793906450271606,
"log_odds_ratio": -0.5278146862983704,
"logits/chosen": -2.0601658821105957,
"logits/rejected": -2.04327130317688,
"logps/chosen": -1.2197258472442627,
"logps/rejected": -1.6007359027862549,
"loss": 1.261,
"nll_loss": 1.1714200973510742,
"rewards/accuracies": 0.76953125,
"rewards/chosen": -0.18295888602733612,
"rewards/margins": 0.057151518762111664,
"rewards/rejected": -0.2401103973388672,
"step": 320
},
{
"epoch": 6.052009456264775,
"eval_log_odds_chosen": 1.5137661695480347,
"eval_log_odds_ratio": -0.21324041485786438,
"eval_logits/chosen": -2.5153615474700928,
"eval_logits/rejected": -2.389270067214966,
"eval_logps/chosen": -1.2208881378173828,
"eval_logps/rejected": -2.475594997406006,
"eval_loss": 1.0919252634048462,
"eval_nll_loss": 1.1019929647445679,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18313322961330414,
"eval_rewards/margins": 0.1882060021162033,
"eval_rewards/rejected": -0.37133923172950745,
"eval_runtime": 0.8032,
"eval_samples_per_second": 170.562,
"eval_steps_per_second": 6.225,
"step": 320
},
{
"epoch": 6.657210401891253,
"grad_norm": 14.943062782287598,
"learning_rate": 3.6033917569043597e-07,
"log_odds_chosen": 0.6752089858055115,
"log_odds_ratio": -0.5140572190284729,
"logits/chosen": -2.0157763957977295,
"logits/rejected": -2.004314422607422,
"logps/chosen": -1.2377139329910278,
"logps/rejected": -1.692950963973999,
"loss": 1.252,
"nll_loss": 1.1812970638275146,
"rewards/accuracies": 0.75390625,
"rewards/chosen": -0.1856570839881897,
"rewards/margins": 0.06828554719686508,
"rewards/rejected": -0.25394266843795776,
"step": 352
},
{
"epoch": 7.26241134751773,
"grad_norm": 24.735410690307617,
"learning_rate": 3.296216625629211e-07,
"log_odds_chosen": 0.8964105844497681,
"log_odds_ratio": -0.46805524826049805,
"logits/chosen": -1.9912079572677612,
"logits/rejected": -1.955162525177002,
"logps/chosen": -1.2576524019241333,
"logps/rejected": -1.9145023822784424,
"loss": 1.2436,
"nll_loss": 1.170907974243164,
"rewards/accuracies": 0.7578125,
"rewards/chosen": -0.18864786624908447,
"rewards/margins": 0.09852751344442368,
"rewards/rejected": -0.28717538714408875,
"step": 384
},
{
"epoch": 7.5650118203309695,
"eval_log_odds_chosen": 1.5138293504714966,
"eval_log_odds_ratio": -0.21318714320659637,
"eval_logits/chosen": -2.423281669616699,
"eval_logits/rejected": -2.29191255569458,
"eval_logps/chosen": -1.25368332862854,
"eval_logps/rejected": -2.5206007957458496,
"eval_loss": 1.0984269380569458,
"eval_nll_loss": 1.1060694456100464,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18805250525474548,
"eval_rewards/margins": 0.19003766775131226,
"eval_rewards/rejected": -0.37809017300605774,
"eval_runtime": 0.7836,
"eval_samples_per_second": 174.83,
"eval_steps_per_second": 6.381,
"step": 400
},
{
"epoch": 7.867612293144208,
"grad_norm": 17.922605514526367,
"learning_rate": 2.974982725547975e-07,
"log_odds_chosen": 1.3920270204544067,
"log_odds_ratio": -0.4031583368778229,
"logits/chosen": -1.9781438112258911,
"logits/rejected": -1.9409700632095337,
"logps/chosen": -1.2178527116775513,
"logps/rejected": -2.3021371364593506,
"loss": 1.2447,
"nll_loss": 1.1541370153427124,
"rewards/accuracies": 0.7734375,
"rewards/chosen": -0.18267790973186493,
"rewards/margins": 0.16264265775680542,
"rewards/rejected": -0.34532058238983154,
"step": 416
},
{
"epoch": 8.472813238770685,
"grad_norm": 18.74088478088379,
"learning_rate": 2.6453620722761895e-07,
"log_odds_chosen": 1.4127886295318604,
"log_odds_ratio": -0.45162278413772583,
"logits/chosen": -1.9333115816116333,
"logits/rejected": -1.9409185647964478,
"logps/chosen": -1.3663212060928345,
"logps/rejected": -2.5033111572265625,
"loss": 1.2511,
"nll_loss": 1.2117295265197754,
"rewards/accuracies": 0.76953125,
"rewards/chosen": -0.20494820177555084,
"rewards/margins": 0.17054852843284607,
"rewards/rejected": -0.37549668550491333,
"step": 448
},
{
"epoch": 9.078014184397164,
"grad_norm": 22.932889938354492,
"learning_rate": 2.3131747660339394e-07,
"log_odds_chosen": 1.6455353498458862,
"log_odds_ratio": -0.44734472036361694,
"logits/chosen": -1.923959493637085,
"logits/rejected": -1.9963738918304443,
"logps/chosen": -1.3357137441635132,
"logps/rejected": -2.676542043685913,
"loss": 1.2336,
"nll_loss": 1.197737455368042,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.2003570795059204,
"rewards/margins": 0.2011241912841797,
"rewards/rejected": -0.4014812707901001,
"step": 480
},
{
"epoch": 9.078014184397164,
"eval_log_odds_chosen": 1.4782460927963257,
"eval_log_odds_ratio": -0.21918949484825134,
"eval_logits/chosen": -2.3784029483795166,
"eval_logits/rejected": -2.244081497192383,
"eval_logps/chosen": -1.296876072883606,
"eval_logps/rejected": -2.5429465770721436,
"eval_loss": 1.1173924207687378,
"eval_nll_loss": 1.114406704902649,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1945313960313797,
"eval_rewards/margins": 0.18691061437129974,
"eval_rewards/rejected": -0.38144201040267944,
"eval_runtime": 0.8027,
"eval_samples_per_second": 170.681,
"eval_steps_per_second": 6.229,
"step": 480
},
{
"epoch": 9.68321513002364,
"grad_norm": 27.342180252075195,
"learning_rate": 1.984286226342056e-07,
"log_odds_chosen": 2.1517550945281982,
"log_odds_ratio": -0.3539085388183594,
"logits/chosen": -2.0003914833068848,
"logits/rejected": -1.958829641342163,
"logps/chosen": -1.2932095527648926,
"logps/rejected": -3.1245839595794678,
"loss": 1.2519,
"nll_loss": 1.177643060684204,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.19398145377635956,
"rewards/margins": 0.2747061550617218,
"rewards/rejected": -0.46868759393692017,
"step": 512
},
{
"epoch": 10.288416075650119,
"grad_norm": 21.41707992553711,
"learning_rate": 1.6645036265170313e-07,
"log_odds_chosen": 2.206300973892212,
"log_odds_ratio": -0.41246891021728516,
"logits/chosen": -1.9226995706558228,
"logits/rejected": -1.9452672004699707,
"logps/chosen": -1.3459149599075317,
"logps/rejected": -3.243032455444336,
"loss": 1.2377,
"nll_loss": 1.1743229627609253,
"rewards/accuracies": 0.73828125,
"rewards/chosen": -0.20188726484775543,
"rewards/margins": 0.28456762433052063,
"rewards/rejected": -0.48645487427711487,
"step": 544
},
{
"epoch": 10.591016548463356,
"eval_log_odds_chosen": 1.4928518533706665,
"eval_log_odds_ratio": -0.21719364821910858,
"eval_logits/chosen": -2.406038999557495,
"eval_logits/rejected": -2.2726240158081055,
"eval_logps/chosen": -1.2588163614273071,
"eval_logps/rejected": -2.5091373920440674,
"eval_loss": 1.0936493873596191,
"eval_nll_loss": 1.0926154851913452,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18882247805595398,
"eval_rewards/margins": 0.18754813075065613,
"eval_rewards/rejected": -0.3763706088066101,
"eval_runtime": 0.7924,
"eval_samples_per_second": 172.882,
"eval_steps_per_second": 6.31,
"step": 560
},
{
"epoch": 10.893617021276595,
"grad_norm": 28.251604080200195,
"learning_rate": 1.3594733566170925e-07,
"log_odds_chosen": 1.9746395349502563,
"log_odds_ratio": -0.39856040477752686,
"logits/chosen": -1.9557344913482666,
"logits/rejected": -1.9873769283294678,
"logps/chosen": -1.3181676864624023,
"logps/rejected": -2.9867465496063232,
"loss": 1.2325,
"nll_loss": 1.2073771953582764,
"rewards/accuracies": 0.796875,
"rewards/chosen": -0.19772517681121826,
"rewards/margins": 0.2502868175506592,
"rewards/rejected": -0.44801196455955505,
"step": 576
},
{
"epoch": 11.498817966903074,
"grad_norm": 30.6031551361084,
"learning_rate": 1.0745813253325956e-07,
"log_odds_chosen": 2.6722493171691895,
"log_odds_ratio": -0.35422736406326294,
"logits/chosen": -1.9136030673980713,
"logits/rejected": -1.8901042938232422,
"logps/chosen": -1.233724594116211,
"logps/rejected": -3.5643980503082275,
"loss": 1.2295,
"nll_loss": 1.1232733726501465,
"rewards/accuracies": 0.79296875,
"rewards/chosen": -0.18505869805812836,
"rewards/margins": 0.3496010899543762,
"rewards/rejected": -0.534659743309021,
"step": 608
},
{
"epoch": 12.10401891252955,
"grad_norm": 24.5542049407959,
"learning_rate": 8.148578611867113e-08,
"log_odds_chosen": 2.4431307315826416,
"log_odds_ratio": -0.3909703195095062,
"logits/chosen": -1.8700110912322998,
"logits/rejected": -1.9457833766937256,
"logps/chosen": -1.2895874977111816,
"logps/rejected": -3.4220337867736816,
"loss": 1.2212,
"nll_loss": 1.1831854581832886,
"rewards/accuracies": 0.75,
"rewards/chosen": -0.19343814253807068,
"rewards/margins": 0.31986698508262634,
"rewards/rejected": -0.5133051872253418,
"step": 640
},
{
"epoch": 12.10401891252955,
"eval_log_odds_chosen": 1.515697956085205,
"eval_log_odds_ratio": -0.213613823056221,
"eval_logits/chosen": -2.373903274536133,
"eval_logits/rejected": -2.2387218475341797,
"eval_logps/chosen": -1.2586114406585693,
"eval_logps/rejected": -2.530747175216675,
"eval_loss": 1.0882254838943481,
"eval_nll_loss": 1.085294485092163,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18879172205924988,
"eval_rewards/margins": 0.19082039594650269,
"eval_rewards/rejected": -0.37961211800575256,
"eval_runtime": 0.8037,
"eval_samples_per_second": 170.464,
"eval_steps_per_second": 6.221,
"step": 640
},
{
"epoch": 12.709219858156029,
"grad_norm": 19.59035301208496,
"learning_rate": 5.848888922025552e-08,
"log_odds_chosen": 2.298971176147461,
"log_odds_ratio": -0.4026568830013275,
"logits/chosen": -1.9471426010131836,
"logits/rejected": -1.9182159900665283,
"logps/chosen": -1.3048053979873657,
"logps/rejected": -3.2884950637817383,
"loss": 1.2173,
"nll_loss": 1.1782861948013306,
"rewards/accuracies": 0.76171875,
"rewards/chosen": -0.19572080671787262,
"rewards/margins": 0.2975533902645111,
"rewards/rejected": -0.4932742416858673,
"step": 672
},
{
"epoch": 13.314420803782506,
"grad_norm": 29.107627868652344,
"learning_rate": 3.887349723342303e-08,
"log_odds_chosen": 2.654096841812134,
"log_odds_ratio": -0.38686317205429077,
"logits/chosen": -1.9159326553344727,
"logits/rejected": -1.9410839080810547,
"logps/chosen": -1.2585792541503906,
"logps/rejected": -3.5659923553466797,
"loss": 1.2261,
"nll_loss": 1.1586594581604004,
"rewards/accuracies": 0.76953125,
"rewards/chosen": -0.18878689408302307,
"rewards/margins": 0.34611204266548157,
"rewards/rejected": -0.5348988771438599,
"step": 704
},
{
"epoch": 13.617021276595745,
"eval_log_odds_chosen": 1.5199410915374756,
"eval_log_odds_ratio": -0.21233825385570526,
"eval_logits/chosen": -2.366751194000244,
"eval_logits/rejected": -2.2316524982452393,
"eval_logps/chosen": -1.2421499490737915,
"eval_logps/rejected": -2.5126953125,
"eval_loss": 1.0794531106948853,
"eval_nll_loss": 1.0783252716064453,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18632249534130096,
"eval_rewards/margins": 0.190581813454628,
"eval_rewards/rejected": -0.37690430879592896,
"eval_runtime": 0.7829,
"eval_samples_per_second": 175.001,
"eval_steps_per_second": 6.387,
"step": 720
},
{
"epoch": 13.919621749408984,
"grad_norm": 22.09730339050293,
"learning_rate": 2.298595844092377e-08,
"log_odds_chosen": 2.671109199523926,
"log_odds_ratio": -0.3479268252849579,
"logits/chosen": -2.0034313201904297,
"logits/rejected": -1.870398998260498,
"logps/chosen": -1.2474991083145142,
"logps/rejected": -3.5732622146606445,
"loss": 1.2194,
"nll_loss": 1.1217308044433594,
"rewards/accuracies": 0.80859375,
"rewards/chosen": -0.18712489306926727,
"rewards/margins": 0.3488644063472748,
"rewards/rejected": -0.5359892845153809,
"step": 736
},
{
"epoch": 14.52482269503546,
"grad_norm": 24.665891647338867,
"learning_rate": 1.1106798553464802e-08,
"log_odds_chosen": 2.7288260459899902,
"log_odds_ratio": -0.34282395243644714,
"logits/chosen": -1.9847919940948486,
"logits/rejected": -1.9164719581604004,
"logps/chosen": -1.250791311264038,
"logps/rejected": -3.6267054080963135,
"loss": 1.2016,
"nll_loss": 1.1445385217666626,
"rewards/accuracies": 0.84375,
"rewards/chosen": -0.18761873245239258,
"rewards/margins": 0.3563871383666992,
"rewards/rejected": -0.5440058708190918,
"step": 768
},
{
"epoch": 15.130023640661939,
"grad_norm": 47.552574157714844,
"learning_rate": 3.4457674771554422e-09,
"log_odds_chosen": 2.26895809173584,
"log_odds_ratio": -0.3884022831916809,
"logits/chosen": -1.9943946599960327,
"logits/rejected": -1.961279034614563,
"logps/chosen": -1.3015713691711426,
"logps/rejected": -3.266200304031372,
"loss": 1.2176,
"nll_loss": 1.1944975852966309,
"rewards/accuracies": 0.76953125,
"rewards/chosen": -0.1952357143163681,
"rewards/margins": 0.29469433426856995,
"rewards/rejected": -0.48993009328842163,
"step": 800
},
{
"epoch": 15.130023640661939,
"eval_log_odds_chosen": 1.5204813480377197,
"eval_log_odds_ratio": -0.21171817183494568,
"eval_logits/chosen": -2.370277166366577,
"eval_logits/rejected": -2.233962059020996,
"eval_logps/chosen": -1.2457667589187622,
"eval_logps/rejected": -2.517129898071289,
"eval_loss": 1.081107497215271,
"eval_nll_loss": 1.0765924453735352,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18686501681804657,
"eval_rewards/margins": 0.19070449471473694,
"eval_rewards/rejected": -0.3775694966316223,
"eval_runtime": 0.8025,
"eval_samples_per_second": 170.724,
"eval_steps_per_second": 6.231,
"step": 800
},
{
"epoch": 15.735224586288416,
"grad_norm": 17.138763427734375,
"learning_rate": 1.3813576683111006e-10,
"log_odds_chosen": 2.620955228805542,
"log_odds_ratio": -0.34706899523735046,
"logits/chosen": -1.9596831798553467,
"logits/rejected": -1.9431588649749756,
"logps/chosen": -1.2250535488128662,
"logps/rejected": -3.4995248317718506,
"loss": 1.2191,
"nll_loss": 1.129616379737854,
"rewards/accuracies": 0.78515625,
"rewards/chosen": -0.18375803530216217,
"rewards/margins": 0.34117066860198975,
"rewards/rejected": -0.5249287486076355,
"step": 832
},
{
"epoch": 15.886524822695035,
"grad_norm": 26.069650650024414,
"learning_rate": 0.0,
"log_odds_chosen": 2.719402551651001,
"log_odds_ratio": -0.3425367772579193,
"logits/chosen": -2.0392448902130127,
"logits/rejected": -1.9386732578277588,
"logps/chosen": -1.2458713054656982,
"logps/rejected": -3.6082699298858643,
"loss": 1.2157,
"nll_loss": 1.1475220918655396,
"rewards/accuracies": 0.828125,
"rewards/chosen": -0.1868807077407837,
"rewards/margins": 0.35435977578163147,
"rewards/rejected": -0.5412405133247375,
"step": 840
},
{
"epoch": 15.886524822695035,
"eval_log_odds_chosen": 1.5112136602401733,
"eval_log_odds_ratio": -0.2135576754808426,
"eval_logits/chosen": -2.3702893257141113,
"eval_logits/rejected": -2.2342278957366943,
"eval_logps/chosen": -1.250806450843811,
"eval_logps/rejected": -2.515653133392334,
"eval_loss": 1.0810010433197021,
"eval_nll_loss": 1.0763533115386963,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.18762096762657166,
"eval_rewards/margins": 0.18972699344158173,
"eval_rewards/rejected": -0.37734800577163696,
"eval_runtime": 0.7924,
"eval_samples_per_second": 172.894,
"eval_steps_per_second": 6.31,
"step": 840
}
],
"logging_steps": 32,
"max_steps": 840,
"num_input_tokens_seen": 0,
"num_train_epochs": 17,
"save_steps": 80,
"total_flos": 0.0,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}