Echo-IE-3B-v0.1 / trainer_state.json
Rakuto's picture
End of training
5d604f7 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 8.0,
"eval_steps": 1,
"global_step": 20,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.4,
"grad_norm": 18.625,
"learning_rate": 7.599999999999999e-06,
"log_odds_chosen": 0.3293280005455017,
"log_odds_ratio": -0.545608401298523,
"logits/chosen": -0.22181883454322815,
"logits/rejected": -0.2948111891746521,
"logps/chosen": -1.9494528770446777,
"logps/rejected": -2.2380290031433105,
"loss": 1.8976,
"nll_loss": 1.8430625200271606,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.1949452906847,
"rewards/margins": 0.028857626020908356,
"rewards/rejected": -0.22380293905735016,
"step": 1
},
{
"epoch": 0.4,
"eval_log_odds_chosen": 0.3650469183921814,
"eval_log_odds_ratio": -0.5312943458557129,
"eval_logits/chosen": -0.11938808858394623,
"eval_logits/rejected": -0.15210816264152527,
"eval_logps/chosen": -1.7196087837219238,
"eval_logps/rejected": -2.026923179626465,
"eval_loss": 1.6382209062576294,
"eval_nll_loss": 1.585091471672058,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.17196084558963776,
"eval_rewards/margins": 0.030731473118066788,
"eval_rewards/rejected": -0.20269232988357544,
"eval_runtime": 0.9093,
"eval_samples_per_second": 19.795,
"eval_steps_per_second": 9.898,
"step": 1
},
{
"epoch": 0.8,
"grad_norm": 8.125,
"learning_rate": 7.2e-06,
"log_odds_chosen": 0.3380122482776642,
"log_odds_ratio": -0.5430496335029602,
"logits/chosen": -0.07896450906991959,
"logits/rejected": -0.11844252794981003,
"logps/chosen": -1.65217125415802,
"logps/rejected": -1.9326075315475464,
"loss": 1.5573,
"nll_loss": 1.5030204057693481,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.16521713137626648,
"rewards/margins": 0.02804364264011383,
"rewards/rejected": -0.19326075911521912,
"step": 2
},
{
"epoch": 0.8,
"eval_log_odds_chosen": 0.39069631695747375,
"eval_log_odds_ratio": -0.5206953883171082,
"eval_logits/chosen": -0.10498537868261337,
"eval_logits/rejected": -0.136393204331398,
"eval_logps/chosen": -1.5854017734527588,
"eval_logps/rejected": -1.9058791399002075,
"eval_loss": 1.5333527326583862,
"eval_nll_loss": 1.4812833070755005,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.15854017436504364,
"eval_rewards/margins": 0.032047729939222336,
"eval_rewards/rejected": -0.19058789312839508,
"eval_runtime": 0.9117,
"eval_samples_per_second": 19.744,
"eval_steps_per_second": 9.872,
"step": 2
},
{
"epoch": 1.2,
"grad_norm": 7.875,
"learning_rate": 6.799999999999999e-06,
"log_odds_chosen": 0.36618566513061523,
"log_odds_ratio": -0.5316208004951477,
"logits/chosen": -0.045309849083423615,
"logits/rejected": -0.09037788212299347,
"logps/chosen": -1.4718542098999023,
"logps/rejected": -1.7647374868392944,
"loss": 1.4427,
"nll_loss": 1.3894941806793213,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.14718542993068695,
"rewards/margins": 0.029288342222571373,
"rewards/rejected": -0.17647376656532288,
"step": 3
},
{
"epoch": 1.2,
"eval_log_odds_chosen": 0.41840454936027527,
"eval_log_odds_ratio": -0.5097466111183167,
"eval_logits/chosen": -0.10623180121183395,
"eval_logits/rejected": -0.13603489100933075,
"eval_logps/chosen": -1.4396543502807617,
"eval_logps/rejected": -1.7711676359176636,
"eval_loss": 1.4337514638900757,
"eval_nll_loss": 1.3827767372131348,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1439654380083084,
"eval_rewards/margins": 0.03315134346485138,
"eval_rewards/rejected": -0.1771167814731598,
"eval_runtime": 0.9101,
"eval_samples_per_second": 19.778,
"eval_steps_per_second": 9.889,
"step": 3
},
{
"epoch": 1.6,
"grad_norm": 7.78125,
"learning_rate": 6.4e-06,
"log_odds_chosen": 0.4042999744415283,
"log_odds_ratio": -0.5167055726051331,
"logits/chosen": -0.0431833378970623,
"logits/rejected": -0.07951641082763672,
"logps/chosen": -1.3549946546554565,
"logps/rejected": -1.6684811115264893,
"loss": 1.3493,
"nll_loss": 1.2975877523422241,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.13549946248531342,
"rewards/margins": 0.03134865313768387,
"rewards/rejected": -0.1668480932712555,
"step": 4
},
{
"epoch": 1.6,
"eval_log_odds_chosen": 0.44134366512298584,
"eval_log_odds_ratio": -0.5008935332298279,
"eval_logits/chosen": -0.104909747838974,
"eval_logits/rejected": -0.13491111993789673,
"eval_logps/chosen": -1.340553641319275,
"eval_logps/rejected": -1.6810719966888428,
"eval_loss": 1.3429497480392456,
"eval_nll_loss": 1.292860507965088,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.13405534625053406,
"eval_rewards/margins": 0.03405185043811798,
"eval_rewards/rejected": -0.16810721158981323,
"eval_runtime": 0.9119,
"eval_samples_per_second": 19.738,
"eval_steps_per_second": 9.869,
"step": 4
},
{
"epoch": 2.0,
"grad_norm": 6.96875,
"learning_rate": 6e-06,
"log_odds_chosen": 0.4279482960700989,
"log_odds_ratio": -0.5070147514343262,
"logits/chosen": -0.034176260232925415,
"logits/rejected": -0.07206660509109497,
"logps/chosen": -1.2959173917770386,
"logps/rejected": -1.6209981441497803,
"loss": 1.2683,
"nll_loss": 1.2175886631011963,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.12959173321723938,
"rewards/margins": 0.03250807896256447,
"rewards/rejected": -0.16209980845451355,
"step": 5
},
{
"epoch": 2.0,
"eval_log_odds_chosen": 0.46249422430992126,
"eval_log_odds_ratio": -0.49292659759521484,
"eval_logits/chosen": -0.11007735878229141,
"eval_logits/rejected": -0.14086602628231049,
"eval_logps/chosen": -1.2711644172668457,
"eval_logps/rejected": -1.6204769611358643,
"eval_loss": 1.2642947435379028,
"eval_nll_loss": 1.215002179145813,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.12711645662784576,
"eval_rewards/margins": 0.034931257367134094,
"eval_rewards/rejected": -0.16204769909381866,
"eval_runtime": 0.9131,
"eval_samples_per_second": 19.712,
"eval_steps_per_second": 9.856,
"step": 5
},
{
"epoch": 2.4,
"grad_norm": 6.65625,
"learning_rate": 5.6e-06,
"log_odds_chosen": 0.4413740336894989,
"log_odds_ratio": -0.5025829672813416,
"logits/chosen": -0.026908639818429947,
"logits/rejected": -0.06323603540658951,
"logps/chosen": -1.2035048007965088,
"logps/rejected": -1.5302585363388062,
"loss": 1.1736,
"nll_loss": 1.1233787536621094,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.12035048753023148,
"rewards/margins": 0.03267538174986839,
"rewards/rejected": -0.15302586555480957,
"step": 6
},
{
"epoch": 2.4,
"eval_log_odds_chosen": 0.4787115454673767,
"eval_log_odds_ratio": -0.4869447946548462,
"eval_logits/chosen": -0.11934076249599457,
"eval_logits/rejected": -0.15017718076705933,
"eval_logps/chosen": -1.2190965414047241,
"eval_logps/rejected": -1.5743210315704346,
"eval_loss": 1.1919617652893066,
"eval_nll_loss": 1.1432671546936035,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.12190967053174973,
"eval_rewards/margins": 0.03552243858575821,
"eval_rewards/rejected": -0.15743210911750793,
"eval_runtime": 0.9096,
"eval_samples_per_second": 19.79,
"eval_steps_per_second": 9.895,
"step": 6
},
{
"epoch": 2.8,
"grad_norm": 6.96875,
"learning_rate": 5.2e-06,
"log_odds_chosen": 0.46652132272720337,
"log_odds_ratio": -0.4945680797100067,
"logits/chosen": -0.0492391437292099,
"logits/rejected": -0.0838259607553482,
"logps/chosen": -1.1840012073516846,
"logps/rejected": -1.525024652481079,
"loss": 1.1212,
"nll_loss": 1.0717414617538452,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.11840011179447174,
"rewards/margins": 0.03410235792398453,
"rewards/rejected": -0.15250247716903687,
"step": 7
},
{
"epoch": 2.8,
"eval_log_odds_chosen": 0.4977823495864868,
"eval_log_odds_ratio": -0.4801054000854492,
"eval_logits/chosen": -0.12862297892570496,
"eval_logits/rejected": -0.16006678342819214,
"eval_logps/chosen": -1.1753649711608887,
"eval_logps/rejected": -1.5391558408737183,
"eval_loss": 1.1233677864074707,
"eval_nll_loss": 1.0753573179244995,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1175365075469017,
"eval_rewards/margins": 0.03637908399105072,
"eval_rewards/rejected": -0.15391558408737183,
"eval_runtime": 0.9109,
"eval_samples_per_second": 19.76,
"eval_steps_per_second": 9.88,
"step": 7
},
{
"epoch": 3.2,
"grad_norm": 6.84375,
"learning_rate": 4.8e-06,
"log_odds_chosen": 0.4919642210006714,
"log_odds_ratio": -0.48277872800827026,
"logits/chosen": -0.05812246352434158,
"logits/rejected": -0.0962948203086853,
"logps/chosen": -1.1183116436004639,
"logps/rejected": -1.4710171222686768,
"loss": 1.0518,
"nll_loss": 1.0035254955291748,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.11183115839958191,
"rewards/margins": 0.03527054935693741,
"rewards/rejected": -0.14710170030593872,
"step": 8
},
{
"epoch": 3.2,
"eval_log_odds_chosen": 0.5149489045143127,
"eval_log_odds_ratio": -0.47412246465682983,
"eval_logits/chosen": -0.14218762516975403,
"eval_logits/rejected": -0.17368356883525848,
"eval_logps/chosen": -1.1381663084030151,
"eval_logps/rejected": -1.5092874765396118,
"eval_loss": 1.0610299110412598,
"eval_nll_loss": 1.013617753982544,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.11381663382053375,
"eval_rewards/margins": 0.03711211308836937,
"eval_rewards/rejected": -0.15092875063419342,
"eval_runtime": 0.9136,
"eval_samples_per_second": 19.703,
"eval_steps_per_second": 9.852,
"step": 8
},
{
"epoch": 3.6,
"grad_norm": 6.71875,
"learning_rate": 4.4e-06,
"log_odds_chosen": 0.5135414004325867,
"log_odds_ratio": -0.47593823075294495,
"logits/chosen": -0.06834974884986877,
"logits/rejected": -0.11046632379293442,
"logps/chosen": -1.0532824993133545,
"logps/rejected": -1.4117248058319092,
"loss": 0.9805,
"nll_loss": 0.9329336285591125,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.1053282618522644,
"rewards/margins": 0.035844214260578156,
"rewards/rejected": -0.14117246866226196,
"step": 9
},
{
"epoch": 3.6,
"eval_log_odds_chosen": 0.5320238471031189,
"eval_log_odds_ratio": -0.4681590497493744,
"eval_logits/chosen": -0.1644686907529831,
"eval_logits/rejected": -0.19686466455459595,
"eval_logps/chosen": -1.104932427406311,
"eval_logps/rejected": -1.4836636781692505,
"eval_loss": 1.0012433528900146,
"eval_nll_loss": 0.9544275999069214,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1104932650923729,
"eval_rewards/margins": 0.03787311539053917,
"eval_rewards/rejected": -0.14836637675762177,
"eval_runtime": 0.9099,
"eval_samples_per_second": 19.783,
"eval_steps_per_second": 9.892,
"step": 9
},
{
"epoch": 4.0,
"grad_norm": 6.75,
"learning_rate": 4e-06,
"log_odds_chosen": 0.5470705032348633,
"log_odds_ratio": -0.4658868908882141,
"logits/chosen": -0.09963471442461014,
"logits/rejected": -0.13952209055423737,
"logps/chosen": -1.0679322481155396,
"logps/rejected": -1.4557496309280396,
"loss": 0.9299,
"nll_loss": 0.8832955956459045,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.10679321736097336,
"rewards/margins": 0.038781747221946716,
"rewards/rejected": -0.14557495713233948,
"step": 10
},
{
"epoch": 4.0,
"eval_log_odds_chosen": 0.547681450843811,
"eval_log_odds_ratio": -0.46276018023490906,
"eval_logits/chosen": -0.1875133067369461,
"eval_logits/rejected": -0.22008682787418365,
"eval_logps/chosen": -1.07937490940094,
"eval_logps/rejected": -1.4652737379074097,
"eval_loss": 0.9495540261268616,
"eval_nll_loss": 0.9032779335975647,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10793750733137131,
"eval_rewards/margins": 0.03858988359570503,
"eval_rewards/rejected": -0.14652739465236664,
"eval_runtime": 0.9118,
"eval_samples_per_second": 19.742,
"eval_steps_per_second": 9.871,
"step": 10
},
{
"epoch": 4.4,
"grad_norm": 6.5625,
"learning_rate": 3.6e-06,
"log_odds_chosen": 0.5115205645561218,
"log_odds_ratio": -0.4781632423400879,
"logits/chosen": -0.11988667398691177,
"logits/rejected": -0.15675179660320282,
"logps/chosen": -1.0269582271575928,
"logps/rejected": -1.3790360689163208,
"loss": 0.8761,
"nll_loss": 0.8282526135444641,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.10269583761692047,
"rewards/margins": 0.03520777449011803,
"rewards/rejected": -0.1379036009311676,
"step": 11
},
{
"epoch": 4.4,
"eval_log_odds_chosen": 0.560771107673645,
"eval_log_odds_ratio": -0.45837968587875366,
"eval_logits/chosen": -0.21046772599220276,
"eval_logits/rejected": -0.2431277632713318,
"eval_logps/chosen": -1.0591222047805786,
"eval_logps/rejected": -1.4509668350219727,
"eval_loss": 0.9070050120353699,
"eval_nll_loss": 0.8611669540405273,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10591220110654831,
"eval_rewards/margins": 0.03918447345495224,
"eval_rewards/rejected": -0.14509668946266174,
"eval_runtime": 0.9119,
"eval_samples_per_second": 19.739,
"eval_steps_per_second": 9.869,
"step": 11
},
{
"epoch": 4.8,
"grad_norm": 6.5625,
"learning_rate": 3.2e-06,
"log_odds_chosen": 0.6133227944374084,
"log_odds_ratio": -0.4412252902984619,
"logits/chosen": -0.14904728531837463,
"logits/rejected": -0.1963028907775879,
"logps/chosen": -0.9898001551628113,
"logps/rejected": -1.4105660915374756,
"loss": 0.8337,
"nll_loss": 0.7895629405975342,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.09898000955581665,
"rewards/margins": 0.04207659140229225,
"rewards/rejected": -0.1410566121339798,
"step": 12
},
{
"epoch": 4.8,
"eval_log_odds_chosen": 0.566861093044281,
"eval_log_odds_ratio": -0.45656245946884155,
"eval_logits/chosen": -0.22315236926078796,
"eval_logits/rejected": -0.2561546862125397,
"eval_logps/chosen": -1.0492280721664429,
"eval_logps/rejected": -1.443569302558899,
"eval_loss": 0.8863641023635864,
"eval_nll_loss": 0.8407078981399536,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.1049228087067604,
"eval_rewards/margins": 0.03943413123488426,
"eval_rewards/rejected": -0.14435693621635437,
"eval_runtime": 0.9164,
"eval_samples_per_second": 19.642,
"eval_steps_per_second": 9.821,
"step": 12
},
{
"epoch": 5.2,
"grad_norm": 6.5,
"learning_rate": 2.8e-06,
"log_odds_chosen": 0.49919161200523376,
"log_odds_ratio": -0.4824044108390808,
"logits/chosen": -0.15154853463172913,
"logits/rejected": -0.18894340097904205,
"logps/chosen": -1.0274688005447388,
"logps/rejected": -1.3725156784057617,
"loss": 0.7975,
"nll_loss": 0.7492961883544922,
"rewards/accuracies": 0.953125,
"rewards/chosen": -0.1027468740940094,
"rewards/margins": 0.034504685550928116,
"rewards/rejected": -0.13725155591964722,
"step": 13
},
{
"epoch": 5.2,
"eval_log_odds_chosen": 0.57403564453125,
"eval_log_odds_ratio": -0.45407184958457947,
"eval_logits/chosen": -0.23968791961669922,
"eval_logits/rejected": -0.2726818025112152,
"eval_logps/chosen": -1.0405869483947754,
"eval_logps/rejected": -1.4385521411895752,
"eval_loss": 0.8664105534553528,
"eval_nll_loss": 0.8210033774375916,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10405868291854858,
"eval_rewards/margins": 0.03979651629924774,
"eval_rewards/rejected": -0.14385519921779633,
"eval_runtime": 0.9125,
"eval_samples_per_second": 19.726,
"eval_steps_per_second": 9.863,
"step": 13
},
{
"epoch": 5.6,
"grad_norm": 6.40625,
"learning_rate": 2.4e-06,
"log_odds_chosen": 0.6314184665679932,
"log_odds_ratio": -0.4354270100593567,
"logits/chosen": -0.1857105791568756,
"logits/rejected": -0.22656874358654022,
"logps/chosen": -0.9448862075805664,
"logps/rejected": -1.3670051097869873,
"loss": 0.788,
"nll_loss": 0.7444556951522827,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.09448862820863724,
"rewards/margins": 0.04221189767122269,
"rewards/rejected": -0.13670052587985992,
"step": 14
},
{
"epoch": 5.6,
"eval_log_odds_chosen": 0.5794126987457275,
"eval_log_odds_ratio": -0.4523561894893646,
"eval_logits/chosen": -0.2507224678993225,
"eval_logits/rejected": -0.2837482690811157,
"eval_logps/chosen": -1.0325850248336792,
"eval_logps/rejected": -1.4329302310943604,
"eval_loss": 0.8492475152015686,
"eval_nll_loss": 0.8040118217468262,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10325851291418076,
"eval_rewards/margins": 0.04003452509641647,
"eval_rewards/rejected": -0.14329302310943604,
"eval_runtime": 0.9113,
"eval_samples_per_second": 19.752,
"eval_steps_per_second": 9.876,
"step": 14
},
{
"epoch": 6.0,
"grad_norm": 6.25,
"learning_rate": 2e-06,
"log_odds_chosen": 0.5837588906288147,
"log_odds_ratio": -0.45432496070861816,
"logits/chosen": -0.18676218390464783,
"logits/rejected": -0.23436766862869263,
"logps/chosen": -1.0190256834030151,
"logps/rejected": -1.4252986907958984,
"loss": 0.78,
"nll_loss": 0.7345477342605591,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.1019025593996048,
"rewards/margins": 0.040627315640449524,
"rewards/rejected": -0.14252987504005432,
"step": 15
},
{
"epoch": 6.0,
"eval_log_odds_chosen": 0.5839331150054932,
"eval_log_odds_ratio": -0.45080792903900146,
"eval_logits/chosen": -0.26143890619277954,
"eval_logits/rejected": -0.29441285133361816,
"eval_logps/chosen": -1.0263959169387817,
"eval_logps/rejected": -1.4286550283432007,
"eval_loss": 0.8333800435066223,
"eval_nll_loss": 0.7882992625236511,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10263960063457489,
"eval_rewards/margins": 0.040225885808467865,
"eval_rewards/rejected": -0.14286547899246216,
"eval_runtime": 0.9128,
"eval_samples_per_second": 19.72,
"eval_steps_per_second": 9.86,
"step": 15
},
{
"epoch": 6.4,
"grad_norm": 6.25,
"learning_rate": 1.6e-06,
"log_odds_chosen": 0.6216727495193481,
"log_odds_ratio": -0.4399999976158142,
"logits/chosen": -0.19661211967468262,
"logits/rejected": -0.24193710088729858,
"logps/chosen": -0.949080228805542,
"logps/rejected": -1.3682817220687866,
"loss": 0.7395,
"nll_loss": 0.6955283880233765,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.09490802884101868,
"rewards/margins": 0.04192016273736954,
"rewards/rejected": -0.13682818412780762,
"step": 16
},
{
"epoch": 6.4,
"eval_log_odds_chosen": 0.5865243673324585,
"eval_log_odds_ratio": -0.44995343685150146,
"eval_logits/chosen": -0.2722480595111847,
"eval_logits/rejected": -0.30540305376052856,
"eval_logps/chosen": -1.0213567018508911,
"eval_logps/rejected": -1.4244111776351929,
"eval_loss": 0.821089506149292,
"eval_nll_loss": 0.7760941386222839,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10213566571474075,
"eval_rewards/margins": 0.040305450558662415,
"eval_rewards/rejected": -0.14244110882282257,
"eval_runtime": 0.9109,
"eval_samples_per_second": 19.76,
"eval_steps_per_second": 9.88,
"step": 16
},
{
"epoch": 6.8,
"grad_norm": 6.15625,
"learning_rate": 1.2e-06,
"log_odds_chosen": 0.5441011786460876,
"log_odds_ratio": -0.4661071002483368,
"logits/chosen": -0.20644214749336243,
"logits/rejected": -0.2489599585533142,
"logps/chosen": -1.002626657485962,
"logps/rejected": -1.3740018606185913,
"loss": 0.7446,
"nll_loss": 0.6980130076408386,
"rewards/accuracies": 0.96875,
"rewards/chosen": -0.10026266425848007,
"rewards/margins": 0.037137530744075775,
"rewards/rejected": -0.13740019500255585,
"step": 17
},
{
"epoch": 6.8,
"eval_log_odds_chosen": 0.5888115763664246,
"eval_log_odds_ratio": -0.44922754168510437,
"eval_logits/chosen": -0.2722025513648987,
"eval_logits/rejected": -0.30542224645614624,
"eval_logps/chosen": -1.0186693668365479,
"eval_logps/rejected": -1.4229329824447632,
"eval_loss": 0.8164036870002747,
"eval_nll_loss": 0.7714808583259583,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10186693072319031,
"eval_rewards/margins": 0.040426358580589294,
"eval_rewards/rejected": -0.1422932893037796,
"eval_runtime": 0.9099,
"eval_samples_per_second": 19.782,
"eval_steps_per_second": 9.891,
"step": 17
},
{
"epoch": 7.2,
"grad_norm": 6.125,
"learning_rate": 8e-07,
"log_odds_chosen": 0.603852391242981,
"log_odds_ratio": -0.44559258222579956,
"logits/chosen": -0.21281519532203674,
"logits/rejected": -0.2561994791030884,
"logps/chosen": -0.9362643957138062,
"logps/rejected": -1.3377454280853271,
"loss": 0.7518,
"nll_loss": 0.7072104811668396,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.09362644702196121,
"rewards/margins": 0.040148116648197174,
"rewards/rejected": -0.1337745636701584,
"step": 18
},
{
"epoch": 7.2,
"eval_log_odds_chosen": 0.5903448462486267,
"eval_log_odds_ratio": -0.4487246870994568,
"eval_logits/chosen": -0.2774750292301178,
"eval_logits/rejected": -0.3105829358100891,
"eval_logps/chosen": -1.0175344944000244,
"eval_logps/rejected": -1.4226274490356445,
"eval_loss": 0.812524139881134,
"eval_nll_loss": 0.7676517963409424,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10175344347953796,
"eval_rewards/margins": 0.040509287267923355,
"eval_rewards/rejected": -0.1422627568244934,
"eval_runtime": 0.9145,
"eval_samples_per_second": 19.683,
"eval_steps_per_second": 9.841,
"step": 18
},
{
"epoch": 7.6,
"grad_norm": 6.21875,
"learning_rate": 4e-07,
"log_odds_chosen": 0.6179953217506409,
"log_odds_ratio": -0.4413047730922699,
"logits/chosen": -0.22949087619781494,
"logits/rejected": -0.27671945095062256,
"logps/chosen": -0.9877333045005798,
"logps/rejected": -1.4103630781173706,
"loss": 0.7431,
"nll_loss": 0.6990159749984741,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.09877333790063858,
"rewards/margins": 0.0422629676759243,
"rewards/rejected": -0.14103631675243378,
"step": 19
},
{
"epoch": 7.6,
"eval_log_odds_chosen": 0.5911502838134766,
"eval_log_odds_ratio": -0.44839149713516235,
"eval_logits/chosen": -0.276792049407959,
"eval_logits/rejected": -0.31038832664489746,
"eval_logps/chosen": -1.0161813497543335,
"eval_logps/rejected": -1.4216675758361816,
"eval_loss": 0.8106683492660522,
"eval_nll_loss": 0.7658291459083557,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10161812603473663,
"eval_rewards/margins": 0.04054862633347511,
"eval_rewards/rejected": -0.14216677844524384,
"eval_runtime": 0.9137,
"eval_samples_per_second": 19.701,
"eval_steps_per_second": 9.85,
"step": 19
},
{
"epoch": 8.0,
"grad_norm": 6.15625,
"learning_rate": 0.0,
"log_odds_chosen": 0.578275740146637,
"log_odds_ratio": -0.45558279752731323,
"logits/chosen": -0.210488423705101,
"logits/rejected": -0.24888469278812408,
"logps/chosen": -0.9616943001747131,
"logps/rejected": -1.3508220911026,
"loss": 0.726,
"nll_loss": 0.6804530620574951,
"rewards/accuracies": 0.984375,
"rewards/chosen": -0.09616944193840027,
"rewards/margins": 0.038912780582904816,
"rewards/rejected": -0.1350822150707245,
"step": 20
},
{
"epoch": 8.0,
"eval_log_odds_chosen": 0.5904660224914551,
"eval_log_odds_ratio": -0.44868627190589905,
"eval_logits/chosen": -0.27975308895111084,
"eval_logits/rejected": -0.3132215440273285,
"eval_logps/chosen": -1.0163097381591797,
"eval_logps/rejected": -1.4213618040084839,
"eval_loss": 0.8110137581825256,
"eval_nll_loss": 0.7661450505256653,
"eval_rewards/accuracies": 1.0,
"eval_rewards/chosen": -0.10163097083568573,
"eval_rewards/margins": 0.04050520807504654,
"eval_rewards/rejected": -0.14213618636131287,
"eval_runtime": 0.9114,
"eval_samples_per_second": 19.75,
"eval_steps_per_second": 9.875,
"step": 20
},
{
"epoch": 8.0,
"step": 20,
"total_flos": 0.0,
"train_loss": 1.0276277005672454,
"train_runtime": 261.791,
"train_samples_per_second": 6.074,
"train_steps_per_second": 0.076
}
],
"logging_steps": 1,
"max_steps": 20,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}