|
{
|
|
"best_metric": null,
|
|
"best_model_checkpoint": null,
|
|
"epoch": 15.886524822695035,
|
|
"eval_steps": 80,
|
|
"global_step": 840,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.6052009456264775,
|
|
"grad_norm": 9.684629440307617,
|
|
"learning_rate": 1.9047619047619045e-07,
|
|
"log_odds_chosen": 0.08431098610162735,
|
|
"log_odds_ratio": -0.7315660715103149,
|
|
"logits/chosen": -2.4399943351745605,
|
|
"logits/rejected": -2.418248414993286,
|
|
"logps/chosen": -1.316224455833435,
|
|
"logps/rejected": -1.3803966045379639,
|
|
"loss": 1.6469,
|
|
"nll_loss": 1.5446076393127441,
|
|
"rewards/accuracies": 0.54296875,
|
|
"rewards/chosen": -0.1974336802959442,
|
|
"rewards/margins": 0.009625822305679321,
|
|
"rewards/rejected": -0.20705950260162354,
|
|
"step": 32
|
|
},
|
|
{
|
|
"epoch": 1.210401891252955,
|
|
"grad_norm": 6.682727813720703,
|
|
"learning_rate": 3.809523809523809e-07,
|
|
"log_odds_chosen": 0.1358582228422165,
|
|
"log_odds_ratio": -0.6844438314437866,
|
|
"logits/chosen": -2.4866111278533936,
|
|
"logits/rejected": -2.473512649536133,
|
|
"logps/chosen": -1.2416539192199707,
|
|
"logps/rejected": -1.338365912437439,
|
|
"loss": 1.5513,
|
|
"nll_loss": 1.43682861328125,
|
|
"rewards/accuracies": 0.55859375,
|
|
"rewards/chosen": -0.1862480789422989,
|
|
"rewards/margins": 0.01450679823756218,
|
|
"rewards/rejected": -0.20075488090515137,
|
|
"step": 64
|
|
},
|
|
{
|
|
"epoch": 1.5130023640661938,
|
|
"eval_log_odds_chosen": 1.1365413665771484,
|
|
"eval_log_odds_ratio": -0.2909667193889618,
|
|
"eval_logits/chosen": -2.8555617332458496,
|
|
"eval_logits/rejected": -2.7511401176452637,
|
|
"eval_logps/chosen": -1.1086950302124023,
|
|
"eval_logps/rejected": -1.990875244140625,
|
|
"eval_loss": 1.1764631271362305,
|
|
"eval_nll_loss": 1.1883823871612549,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.16630426049232483,
|
|
"eval_rewards/margins": 0.13232707977294922,
|
|
"eval_rewards/rejected": -0.29863131046295166,
|
|
"eval_runtime": 0.7889,
|
|
"eval_samples_per_second": 173.657,
|
|
"eval_steps_per_second": 6.338,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.8156028368794326,
|
|
"grad_norm": 5.326328754425049,
|
|
"learning_rate": 4.996892303047305e-07,
|
|
"log_odds_chosen": 0.18785640597343445,
|
|
"log_odds_ratio": -0.6766043901443481,
|
|
"logits/chosen": -2.4521989822387695,
|
|
"logits/rejected": -2.457139492034912,
|
|
"logps/chosen": -1.1742055416107178,
|
|
"logps/rejected": -1.3268922567367554,
|
|
"loss": 1.4307,
|
|
"nll_loss": 1.3236442804336548,
|
|
"rewards/accuracies": 0.5390625,
|
|
"rewards/chosen": -0.17613083124160767,
|
|
"rewards/margins": 0.022903023287653923,
|
|
"rewards/rejected": -0.19903387129306793,
|
|
"step": 96
|
|
},
|
|
{
|
|
"epoch": 2.42080378250591,
|
|
"grad_norm": 6.345442295074463,
|
|
"learning_rate": 4.958326378681848e-07,
|
|
"log_odds_chosen": 0.20125526189804077,
|
|
"log_odds_ratio": -0.6606975793838501,
|
|
"logits/chosen": -2.4338855743408203,
|
|
"logits/rejected": -2.4032998085021973,
|
|
"logps/chosen": -1.2052891254425049,
|
|
"logps/rejected": -1.3293428421020508,
|
|
"loss": 1.3822,
|
|
"nll_loss": 1.3110582828521729,
|
|
"rewards/accuracies": 0.609375,
|
|
"rewards/chosen": -0.18079334497451782,
|
|
"rewards/margins": 0.018608052283525467,
|
|
"rewards/rejected": -0.19940140843391418,
|
|
"step": 128
|
|
},
|
|
{
|
|
"epoch": 3.0260047281323876,
|
|
"grad_norm": 6.9922590255737305,
|
|
"learning_rate": 4.876353872369572e-07,
|
|
"log_odds_chosen": 0.2849215567111969,
|
|
"log_odds_ratio": -0.6226438283920288,
|
|
"logits/chosen": -2.28694748878479,
|
|
"logits/rejected": -2.2813074588775635,
|
|
"logps/chosen": -1.177689552307129,
|
|
"logps/rejected": -1.3626967668533325,
|
|
"loss": 1.3457,
|
|
"nll_loss": 1.2851402759552002,
|
|
"rewards/accuracies": 0.68359375,
|
|
"rewards/chosen": -0.1766534298658371,
|
|
"rewards/margins": 0.027751106768846512,
|
|
"rewards/rejected": -0.20440451800823212,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 3.0260047281323876,
|
|
"eval_log_odds_chosen": 1.289251685142517,
|
|
"eval_log_odds_ratio": -0.2533319592475891,
|
|
"eval_logits/chosen": -2.6994073390960693,
|
|
"eval_logits/rejected": -2.5944201946258545,
|
|
"eval_logps/chosen": -1.1015231609344482,
|
|
"eval_logps/rejected": -2.1139886379241943,
|
|
"eval_loss": 1.1738542318344116,
|
|
"eval_nll_loss": 1.1927688121795654,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.1652284860610962,
|
|
"eval_rewards/margins": 0.15186984837055206,
|
|
"eval_rewards/rejected": -0.31709831953048706,
|
|
"eval_runtime": 0.7972,
|
|
"eval_samples_per_second": 171.841,
|
|
"eval_steps_per_second": 6.272,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 3.631205673758865,
|
|
"grad_norm": 6.0752034187316895,
|
|
"learning_rate": 4.752422169756047e-07,
|
|
"log_odds_chosen": 0.29861366748809814,
|
|
"log_odds_ratio": -0.6187925338745117,
|
|
"logits/chosen": -2.278367280960083,
|
|
"logits/rejected": -2.197380781173706,
|
|
"logps/chosen": -1.1402652263641357,
|
|
"logps/rejected": -1.323896884918213,
|
|
"loss": 1.3117,
|
|
"nll_loss": 1.1778795719146729,
|
|
"rewards/accuracies": 0.69140625,
|
|
"rewards/chosen": -0.17103978991508484,
|
|
"rewards/margins": 0.0275447778403759,
|
|
"rewards/rejected": -0.19858455657958984,
|
|
"step": 192
|
|
},
|
|
{
|
|
"epoch": 4.236406619385343,
|
|
"grad_norm": 6.758876800537109,
|
|
"learning_rate": 4.588719528532341e-07,
|
|
"log_odds_chosen": 0.32968199253082275,
|
|
"log_odds_ratio": -0.6064258813858032,
|
|
"logits/chosen": -2.2086422443389893,
|
|
"logits/rejected": -2.213918447494507,
|
|
"logps/chosen": -1.2367851734161377,
|
|
"logps/rejected": -1.4456892013549805,
|
|
"loss": 1.2914,
|
|
"nll_loss": 1.220529317855835,
|
|
"rewards/accuracies": 0.703125,
|
|
"rewards/chosen": -0.1855177879333496,
|
|
"rewards/margins": 0.03133557736873627,
|
|
"rewards/rejected": -0.21685336530208588,
|
|
"step": 224
|
|
},
|
|
{
|
|
"epoch": 4.539007092198582,
|
|
"eval_log_odds_chosen": 1.4739799499511719,
|
|
"eval_log_odds_ratio": -0.21903792023658752,
|
|
"eval_logits/chosen": -2.6473140716552734,
|
|
"eval_logits/rejected": -2.5277247428894043,
|
|
"eval_logps/chosen": -1.2001134157180786,
|
|
"eval_logps/rejected": -2.410470485687256,
|
|
"eval_loss": 1.1219241619110107,
|
|
"eval_nll_loss": 1.137636423110962,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18001702427864075,
|
|
"eval_rewards/margins": 0.18155357241630554,
|
|
"eval_rewards/rejected": -0.3615706264972687,
|
|
"eval_runtime": 0.7853,
|
|
"eval_samples_per_second": 174.46,
|
|
"eval_steps_per_second": 6.367,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 4.84160756501182,
|
|
"grad_norm": 6.2288618087768555,
|
|
"learning_rate": 4.3881364404463375e-07,
|
|
"log_odds_chosen": 0.46286657452583313,
|
|
"log_odds_ratio": -0.5630860328674316,
|
|
"logits/chosen": -2.216555595397949,
|
|
"logits/rejected": -2.1122565269470215,
|
|
"logps/chosen": -1.1853437423706055,
|
|
"logps/rejected": -1.4803636074066162,
|
|
"loss": 1.2727,
|
|
"nll_loss": 1.1509523391723633,
|
|
"rewards/accuracies": 0.77734375,
|
|
"rewards/chosen": -0.17780157923698425,
|
|
"rewards/margins": 0.04425298422574997,
|
|
"rewards/rejected": -0.222054585814476,
|
|
"step": 256
|
|
},
|
|
{
|
|
"epoch": 5.446808510638298,
|
|
"grad_norm": 8.837624549865723,
|
|
"learning_rate": 4.154214593992149e-07,
|
|
"log_odds_chosen": 0.571550726890564,
|
|
"log_odds_ratio": -0.5269472002983093,
|
|
"logits/chosen": -2.189985990524292,
|
|
"logits/rejected": -2.085646152496338,
|
|
"logps/chosen": -1.2091223001480103,
|
|
"logps/rejected": -1.5827255249023438,
|
|
"loss": 1.2582,
|
|
"nll_loss": 1.1602920293807983,
|
|
"rewards/accuracies": 0.78515625,
|
|
"rewards/chosen": -0.18136833608150482,
|
|
"rewards/margins": 0.05604049190878868,
|
|
"rewards/rejected": -0.237408846616745,
|
|
"step": 288
|
|
},
|
|
{
|
|
"epoch": 6.052009456264775,
|
|
"grad_norm": 10.696549415588379,
|
|
"learning_rate": 3.891084338941603e-07,
|
|
"log_odds_chosen": 0.5793906450271606,
|
|
"log_odds_ratio": -0.5278146862983704,
|
|
"logits/chosen": -2.0601658821105957,
|
|
"logits/rejected": -2.04327130317688,
|
|
"logps/chosen": -1.2197258472442627,
|
|
"logps/rejected": -1.6007359027862549,
|
|
"loss": 1.261,
|
|
"nll_loss": 1.1714200973510742,
|
|
"rewards/accuracies": 0.76953125,
|
|
"rewards/chosen": -0.18295888602733612,
|
|
"rewards/margins": 0.057151518762111664,
|
|
"rewards/rejected": -0.2401103973388672,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 6.052009456264775,
|
|
"eval_log_odds_chosen": 1.5137661695480347,
|
|
"eval_log_odds_ratio": -0.21324041485786438,
|
|
"eval_logits/chosen": -2.5153615474700928,
|
|
"eval_logits/rejected": -2.389270067214966,
|
|
"eval_logps/chosen": -1.2208881378173828,
|
|
"eval_logps/rejected": -2.475594997406006,
|
|
"eval_loss": 1.0919252634048462,
|
|
"eval_nll_loss": 1.1019929647445679,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18313322961330414,
|
|
"eval_rewards/margins": 0.1882060021162033,
|
|
"eval_rewards/rejected": -0.37133923172950745,
|
|
"eval_runtime": 0.8032,
|
|
"eval_samples_per_second": 170.562,
|
|
"eval_steps_per_second": 6.225,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 6.657210401891253,
|
|
"grad_norm": 14.943062782287598,
|
|
"learning_rate": 3.6033917569043597e-07,
|
|
"log_odds_chosen": 0.6752089858055115,
|
|
"log_odds_ratio": -0.5140572190284729,
|
|
"logits/chosen": -2.0157763957977295,
|
|
"logits/rejected": -2.004314422607422,
|
|
"logps/chosen": -1.2377139329910278,
|
|
"logps/rejected": -1.692950963973999,
|
|
"loss": 1.252,
|
|
"nll_loss": 1.1812970638275146,
|
|
"rewards/accuracies": 0.75390625,
|
|
"rewards/chosen": -0.1856570839881897,
|
|
"rewards/margins": 0.06828554719686508,
|
|
"rewards/rejected": -0.25394266843795776,
|
|
"step": 352
|
|
},
|
|
{
|
|
"epoch": 7.26241134751773,
|
|
"grad_norm": 24.735410690307617,
|
|
"learning_rate": 3.296216625629211e-07,
|
|
"log_odds_chosen": 0.8964105844497681,
|
|
"log_odds_ratio": -0.46805524826049805,
|
|
"logits/chosen": -1.9912079572677612,
|
|
"logits/rejected": -1.955162525177002,
|
|
"logps/chosen": -1.2576524019241333,
|
|
"logps/rejected": -1.9145023822784424,
|
|
"loss": 1.2436,
|
|
"nll_loss": 1.170907974243164,
|
|
"rewards/accuracies": 0.7578125,
|
|
"rewards/chosen": -0.18864786624908447,
|
|
"rewards/margins": 0.09852751344442368,
|
|
"rewards/rejected": -0.28717538714408875,
|
|
"step": 384
|
|
},
|
|
{
|
|
"epoch": 7.5650118203309695,
|
|
"eval_log_odds_chosen": 1.5138293504714966,
|
|
"eval_log_odds_ratio": -0.21318714320659637,
|
|
"eval_logits/chosen": -2.423281669616699,
|
|
"eval_logits/rejected": -2.29191255569458,
|
|
"eval_logps/chosen": -1.25368332862854,
|
|
"eval_logps/rejected": -2.5206007957458496,
|
|
"eval_loss": 1.0984269380569458,
|
|
"eval_nll_loss": 1.1060694456100464,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18805250525474548,
|
|
"eval_rewards/margins": 0.19003766775131226,
|
|
"eval_rewards/rejected": -0.37809017300605774,
|
|
"eval_runtime": 0.7836,
|
|
"eval_samples_per_second": 174.83,
|
|
"eval_steps_per_second": 6.381,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 7.867612293144208,
|
|
"grad_norm": 17.922605514526367,
|
|
"learning_rate": 2.974982725547975e-07,
|
|
"log_odds_chosen": 1.3920270204544067,
|
|
"log_odds_ratio": -0.4031583368778229,
|
|
"logits/chosen": -1.9781438112258911,
|
|
"logits/rejected": -1.9409700632095337,
|
|
"logps/chosen": -1.2178527116775513,
|
|
"logps/rejected": -2.3021371364593506,
|
|
"loss": 1.2447,
|
|
"nll_loss": 1.1541370153427124,
|
|
"rewards/accuracies": 0.7734375,
|
|
"rewards/chosen": -0.18267790973186493,
|
|
"rewards/margins": 0.16264265775680542,
|
|
"rewards/rejected": -0.34532058238983154,
|
|
"step": 416
|
|
},
|
|
{
|
|
"epoch": 8.472813238770685,
|
|
"grad_norm": 18.74088478088379,
|
|
"learning_rate": 2.6453620722761895e-07,
|
|
"log_odds_chosen": 1.4127886295318604,
|
|
"log_odds_ratio": -0.45162278413772583,
|
|
"logits/chosen": -1.9333115816116333,
|
|
"logits/rejected": -1.9409185647964478,
|
|
"logps/chosen": -1.3663212060928345,
|
|
"logps/rejected": -2.5033111572265625,
|
|
"loss": 1.2511,
|
|
"nll_loss": 1.2117295265197754,
|
|
"rewards/accuracies": 0.76953125,
|
|
"rewards/chosen": -0.20494820177555084,
|
|
"rewards/margins": 0.17054852843284607,
|
|
"rewards/rejected": -0.37549668550491333,
|
|
"step": 448
|
|
},
|
|
{
|
|
"epoch": 9.078014184397164,
|
|
"grad_norm": 22.932889938354492,
|
|
"learning_rate": 2.3131747660339394e-07,
|
|
"log_odds_chosen": 1.6455353498458862,
|
|
"log_odds_ratio": -0.44734472036361694,
|
|
"logits/chosen": -1.923959493637085,
|
|
"logits/rejected": -1.9963738918304443,
|
|
"logps/chosen": -1.3357137441635132,
|
|
"logps/rejected": -2.676542043685913,
|
|
"loss": 1.2336,
|
|
"nll_loss": 1.197737455368042,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.2003570795059204,
|
|
"rewards/margins": 0.2011241912841797,
|
|
"rewards/rejected": -0.4014812707901001,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 9.078014184397164,
|
|
"eval_log_odds_chosen": 1.4782460927963257,
|
|
"eval_log_odds_ratio": -0.21918949484825134,
|
|
"eval_logits/chosen": -2.3784029483795166,
|
|
"eval_logits/rejected": -2.244081497192383,
|
|
"eval_logps/chosen": -1.296876072883606,
|
|
"eval_logps/rejected": -2.5429465770721436,
|
|
"eval_loss": 1.1173924207687378,
|
|
"eval_nll_loss": 1.114406704902649,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.1945313960313797,
|
|
"eval_rewards/margins": 0.18691061437129974,
|
|
"eval_rewards/rejected": -0.38144201040267944,
|
|
"eval_runtime": 0.8027,
|
|
"eval_samples_per_second": 170.681,
|
|
"eval_steps_per_second": 6.229,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 9.68321513002364,
|
|
"grad_norm": 27.342180252075195,
|
|
"learning_rate": 1.984286226342056e-07,
|
|
"log_odds_chosen": 2.1517550945281982,
|
|
"log_odds_ratio": -0.3539085388183594,
|
|
"logits/chosen": -2.0003914833068848,
|
|
"logits/rejected": -1.958829641342163,
|
|
"logps/chosen": -1.2932095527648926,
|
|
"logps/rejected": -3.1245839595794678,
|
|
"loss": 1.2519,
|
|
"nll_loss": 1.177643060684204,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": -0.19398145377635956,
|
|
"rewards/margins": 0.2747061550617218,
|
|
"rewards/rejected": -0.46868759393692017,
|
|
"step": 512
|
|
},
|
|
{
|
|
"epoch": 10.288416075650119,
|
|
"grad_norm": 21.41707992553711,
|
|
"learning_rate": 1.6645036265170313e-07,
|
|
"log_odds_chosen": 2.206300973892212,
|
|
"log_odds_ratio": -0.41246891021728516,
|
|
"logits/chosen": -1.9226995706558228,
|
|
"logits/rejected": -1.9452672004699707,
|
|
"logps/chosen": -1.3459149599075317,
|
|
"logps/rejected": -3.243032455444336,
|
|
"loss": 1.2377,
|
|
"nll_loss": 1.1743229627609253,
|
|
"rewards/accuracies": 0.73828125,
|
|
"rewards/chosen": -0.20188726484775543,
|
|
"rewards/margins": 0.28456762433052063,
|
|
"rewards/rejected": -0.48645487427711487,
|
|
"step": 544
|
|
},
|
|
{
|
|
"epoch": 10.591016548463356,
|
|
"eval_log_odds_chosen": 1.4928518533706665,
|
|
"eval_log_odds_ratio": -0.21719364821910858,
|
|
"eval_logits/chosen": -2.406038999557495,
|
|
"eval_logits/rejected": -2.2726240158081055,
|
|
"eval_logps/chosen": -1.2588163614273071,
|
|
"eval_logps/rejected": -2.5091373920440674,
|
|
"eval_loss": 1.0936493873596191,
|
|
"eval_nll_loss": 1.0926154851913452,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18882247805595398,
|
|
"eval_rewards/margins": 0.18754813075065613,
|
|
"eval_rewards/rejected": -0.3763706088066101,
|
|
"eval_runtime": 0.7924,
|
|
"eval_samples_per_second": 172.882,
|
|
"eval_steps_per_second": 6.31,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 10.893617021276595,
|
|
"grad_norm": 28.251604080200195,
|
|
"learning_rate": 1.3594733566170925e-07,
|
|
"log_odds_chosen": 1.9746395349502563,
|
|
"log_odds_ratio": -0.39856040477752686,
|
|
"logits/chosen": -1.9557344913482666,
|
|
"logits/rejected": -1.9873769283294678,
|
|
"logps/chosen": -1.3181676864624023,
|
|
"logps/rejected": -2.9867465496063232,
|
|
"loss": 1.2325,
|
|
"nll_loss": 1.2073771953582764,
|
|
"rewards/accuracies": 0.796875,
|
|
"rewards/chosen": -0.19772517681121826,
|
|
"rewards/margins": 0.2502868175506592,
|
|
"rewards/rejected": -0.44801196455955505,
|
|
"step": 576
|
|
},
|
|
{
|
|
"epoch": 11.498817966903074,
|
|
"grad_norm": 30.6031551361084,
|
|
"learning_rate": 1.0745813253325956e-07,
|
|
"log_odds_chosen": 2.6722493171691895,
|
|
"log_odds_ratio": -0.35422736406326294,
|
|
"logits/chosen": -1.9136030673980713,
|
|
"logits/rejected": -1.8901042938232422,
|
|
"logps/chosen": -1.233724594116211,
|
|
"logps/rejected": -3.5643980503082275,
|
|
"loss": 1.2295,
|
|
"nll_loss": 1.1232733726501465,
|
|
"rewards/accuracies": 0.79296875,
|
|
"rewards/chosen": -0.18505869805812836,
|
|
"rewards/margins": 0.3496010899543762,
|
|
"rewards/rejected": -0.534659743309021,
|
|
"step": 608
|
|
},
|
|
{
|
|
"epoch": 12.10401891252955,
|
|
"grad_norm": 24.5542049407959,
|
|
"learning_rate": 8.148578611867113e-08,
|
|
"log_odds_chosen": 2.4431307315826416,
|
|
"log_odds_ratio": -0.3909703195095062,
|
|
"logits/chosen": -1.8700110912322998,
|
|
"logits/rejected": -1.9457833766937256,
|
|
"logps/chosen": -1.2895874977111816,
|
|
"logps/rejected": -3.4220337867736816,
|
|
"loss": 1.2212,
|
|
"nll_loss": 1.1831854581832886,
|
|
"rewards/accuracies": 0.75,
|
|
"rewards/chosen": -0.19343814253807068,
|
|
"rewards/margins": 0.31986698508262634,
|
|
"rewards/rejected": -0.5133051872253418,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 12.10401891252955,
|
|
"eval_log_odds_chosen": 1.515697956085205,
|
|
"eval_log_odds_ratio": -0.213613823056221,
|
|
"eval_logits/chosen": -2.373903274536133,
|
|
"eval_logits/rejected": -2.2387218475341797,
|
|
"eval_logps/chosen": -1.2586114406585693,
|
|
"eval_logps/rejected": -2.530747175216675,
|
|
"eval_loss": 1.0882254838943481,
|
|
"eval_nll_loss": 1.085294485092163,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18879172205924988,
|
|
"eval_rewards/margins": 0.19082039594650269,
|
|
"eval_rewards/rejected": -0.37961211800575256,
|
|
"eval_runtime": 0.8037,
|
|
"eval_samples_per_second": 170.464,
|
|
"eval_steps_per_second": 6.221,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 12.709219858156029,
|
|
"grad_norm": 19.59035301208496,
|
|
"learning_rate": 5.848888922025552e-08,
|
|
"log_odds_chosen": 2.298971176147461,
|
|
"log_odds_ratio": -0.4026568830013275,
|
|
"logits/chosen": -1.9471426010131836,
|
|
"logits/rejected": -1.9182159900665283,
|
|
"logps/chosen": -1.3048053979873657,
|
|
"logps/rejected": -3.2884950637817383,
|
|
"loss": 1.2173,
|
|
"nll_loss": 1.1782861948013306,
|
|
"rewards/accuracies": 0.76171875,
|
|
"rewards/chosen": -0.19572080671787262,
|
|
"rewards/margins": 0.2975533902645111,
|
|
"rewards/rejected": -0.4932742416858673,
|
|
"step": 672
|
|
},
|
|
{
|
|
"epoch": 13.314420803782506,
|
|
"grad_norm": 29.107627868652344,
|
|
"learning_rate": 3.887349723342303e-08,
|
|
"log_odds_chosen": 2.654096841812134,
|
|
"log_odds_ratio": -0.38686317205429077,
|
|
"logits/chosen": -1.9159326553344727,
|
|
"logits/rejected": -1.9410839080810547,
|
|
"logps/chosen": -1.2585792541503906,
|
|
"logps/rejected": -3.5659923553466797,
|
|
"loss": 1.2261,
|
|
"nll_loss": 1.1586594581604004,
|
|
"rewards/accuracies": 0.76953125,
|
|
"rewards/chosen": -0.18878689408302307,
|
|
"rewards/margins": 0.34611204266548157,
|
|
"rewards/rejected": -0.5348988771438599,
|
|
"step": 704
|
|
},
|
|
{
|
|
"epoch": 13.617021276595745,
|
|
"eval_log_odds_chosen": 1.5199410915374756,
|
|
"eval_log_odds_ratio": -0.21233825385570526,
|
|
"eval_logits/chosen": -2.366751194000244,
|
|
"eval_logits/rejected": -2.2316524982452393,
|
|
"eval_logps/chosen": -1.2421499490737915,
|
|
"eval_logps/rejected": -2.5126953125,
|
|
"eval_loss": 1.0794531106948853,
|
|
"eval_nll_loss": 1.0783252716064453,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18632249534130096,
|
|
"eval_rewards/margins": 0.190581813454628,
|
|
"eval_rewards/rejected": -0.37690430879592896,
|
|
"eval_runtime": 0.7829,
|
|
"eval_samples_per_second": 175.001,
|
|
"eval_steps_per_second": 6.387,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 13.919621749408984,
|
|
"grad_norm": 22.09730339050293,
|
|
"learning_rate": 2.298595844092377e-08,
|
|
"log_odds_chosen": 2.671109199523926,
|
|
"log_odds_ratio": -0.3479268252849579,
|
|
"logits/chosen": -2.0034313201904297,
|
|
"logits/rejected": -1.870398998260498,
|
|
"logps/chosen": -1.2474991083145142,
|
|
"logps/rejected": -3.5732622146606445,
|
|
"loss": 1.2194,
|
|
"nll_loss": 1.1217308044433594,
|
|
"rewards/accuracies": 0.80859375,
|
|
"rewards/chosen": -0.18712489306926727,
|
|
"rewards/margins": 0.3488644063472748,
|
|
"rewards/rejected": -0.5359892845153809,
|
|
"step": 736
|
|
},
|
|
{
|
|
"epoch": 14.52482269503546,
|
|
"grad_norm": 24.665891647338867,
|
|
"learning_rate": 1.1106798553464802e-08,
|
|
"log_odds_chosen": 2.7288260459899902,
|
|
"log_odds_ratio": -0.34282395243644714,
|
|
"logits/chosen": -1.9847919940948486,
|
|
"logits/rejected": -1.9164719581604004,
|
|
"logps/chosen": -1.250791311264038,
|
|
"logps/rejected": -3.6267054080963135,
|
|
"loss": 1.2016,
|
|
"nll_loss": 1.1445385217666626,
|
|
"rewards/accuracies": 0.84375,
|
|
"rewards/chosen": -0.18761873245239258,
|
|
"rewards/margins": 0.3563871383666992,
|
|
"rewards/rejected": -0.5440058708190918,
|
|
"step": 768
|
|
},
|
|
{
|
|
"epoch": 15.130023640661939,
|
|
"grad_norm": 47.552574157714844,
|
|
"learning_rate": 3.4457674771554422e-09,
|
|
"log_odds_chosen": 2.26895809173584,
|
|
"log_odds_ratio": -0.3884022831916809,
|
|
"logits/chosen": -1.9943946599960327,
|
|
"logits/rejected": -1.961279034614563,
|
|
"logps/chosen": -1.3015713691711426,
|
|
"logps/rejected": -3.266200304031372,
|
|
"loss": 1.2176,
|
|
"nll_loss": 1.1944975852966309,
|
|
"rewards/accuracies": 0.76953125,
|
|
"rewards/chosen": -0.1952357143163681,
|
|
"rewards/margins": 0.29469433426856995,
|
|
"rewards/rejected": -0.48993009328842163,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 15.130023640661939,
|
|
"eval_log_odds_chosen": 1.5204813480377197,
|
|
"eval_log_odds_ratio": -0.21171817183494568,
|
|
"eval_logits/chosen": -2.370277166366577,
|
|
"eval_logits/rejected": -2.233962059020996,
|
|
"eval_logps/chosen": -1.2457667589187622,
|
|
"eval_logps/rejected": -2.517129898071289,
|
|
"eval_loss": 1.081107497215271,
|
|
"eval_nll_loss": 1.0765924453735352,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18686501681804657,
|
|
"eval_rewards/margins": 0.19070449471473694,
|
|
"eval_rewards/rejected": -0.3775694966316223,
|
|
"eval_runtime": 0.8025,
|
|
"eval_samples_per_second": 170.724,
|
|
"eval_steps_per_second": 6.231,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 15.735224586288416,
|
|
"grad_norm": 17.138763427734375,
|
|
"learning_rate": 1.3813576683111006e-10,
|
|
"log_odds_chosen": 2.620955228805542,
|
|
"log_odds_ratio": -0.34706899523735046,
|
|
"logits/chosen": -1.9596831798553467,
|
|
"logits/rejected": -1.9431588649749756,
|
|
"logps/chosen": -1.2250535488128662,
|
|
"logps/rejected": -3.4995248317718506,
|
|
"loss": 1.2191,
|
|
"nll_loss": 1.129616379737854,
|
|
"rewards/accuracies": 0.78515625,
|
|
"rewards/chosen": -0.18375803530216217,
|
|
"rewards/margins": 0.34117066860198975,
|
|
"rewards/rejected": -0.5249287486076355,
|
|
"step": 832
|
|
},
|
|
{
|
|
"epoch": 15.886524822695035,
|
|
"grad_norm": 26.069650650024414,
|
|
"learning_rate": 0.0,
|
|
"log_odds_chosen": 2.719402551651001,
|
|
"log_odds_ratio": -0.3425367772579193,
|
|
"logits/chosen": -2.0392448902130127,
|
|
"logits/rejected": -1.9386732578277588,
|
|
"logps/chosen": -1.2458713054656982,
|
|
"logps/rejected": -3.6082699298858643,
|
|
"loss": 1.2157,
|
|
"nll_loss": 1.1475220918655396,
|
|
"rewards/accuracies": 0.828125,
|
|
"rewards/chosen": -0.1868807077407837,
|
|
"rewards/margins": 0.35435977578163147,
|
|
"rewards/rejected": -0.5412405133247375,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 15.886524822695035,
|
|
"eval_log_odds_chosen": 1.5112136602401733,
|
|
"eval_log_odds_ratio": -0.2135576754808426,
|
|
"eval_logits/chosen": -2.3702893257141113,
|
|
"eval_logits/rejected": -2.2342278957366943,
|
|
"eval_logps/chosen": -1.250806450843811,
|
|
"eval_logps/rejected": -2.515653133392334,
|
|
"eval_loss": 1.0810010433197021,
|
|
"eval_nll_loss": 1.0763533115386963,
|
|
"eval_rewards/accuracies": 1.0,
|
|
"eval_rewards/chosen": -0.18762096762657166,
|
|
"eval_rewards/margins": 0.18972699344158173,
|
|
"eval_rewards/rejected": -0.37734800577163696,
|
|
"eval_runtime": 0.7924,
|
|
"eval_samples_per_second": 172.894,
|
|
"eval_steps_per_second": 6.31,
|
|
"step": 840
|
|
}
|
|
],
|
|
"logging_steps": 32,
|
|
"max_steps": 840,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 17,
|
|
"save_steps": 80,
|
|
"total_flos": 0.0,
|
|
"train_batch_size": 1,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|