|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 8.0, |
|
"eval_steps": 1, |
|
"global_step": 20, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 18.625, |
|
"learning_rate": 7.599999999999999e-06, |
|
"log_odds_chosen": 0.3293280005455017, |
|
"log_odds_ratio": -0.545608401298523, |
|
"logits/chosen": -0.22181883454322815, |
|
"logits/rejected": -0.2948111891746521, |
|
"logps/chosen": -1.9494528770446777, |
|
"logps/rejected": -2.2380290031433105, |
|
"loss": 1.8976, |
|
"nll_loss": 1.8430625200271606, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1949452906847, |
|
"rewards/margins": 0.028857626020908356, |
|
"rewards/rejected": -0.22380293905735016, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"eval_log_odds_chosen": 0.3650469183921814, |
|
"eval_log_odds_ratio": -0.5312943458557129, |
|
"eval_logits/chosen": -0.11938808858394623, |
|
"eval_logits/rejected": -0.15210816264152527, |
|
"eval_logps/chosen": -1.7196087837219238, |
|
"eval_logps/rejected": -2.026923179626465, |
|
"eval_loss": 1.6382209062576294, |
|
"eval_nll_loss": 1.585091471672058, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.17196084558963776, |
|
"eval_rewards/margins": 0.030731473118066788, |
|
"eval_rewards/rejected": -0.20269232988357544, |
|
"eval_runtime": 0.9093, |
|
"eval_samples_per_second": 19.795, |
|
"eval_steps_per_second": 9.898, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 8.125, |
|
"learning_rate": 7.2e-06, |
|
"log_odds_chosen": 0.3380122482776642, |
|
"log_odds_ratio": -0.5430496335029602, |
|
"logits/chosen": -0.07896450906991959, |
|
"logits/rejected": -0.11844252794981003, |
|
"logps/chosen": -1.65217125415802, |
|
"logps/rejected": -1.9326075315475464, |
|
"loss": 1.5573, |
|
"nll_loss": 1.5030204057693481, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.16521713137626648, |
|
"rewards/margins": 0.02804364264011383, |
|
"rewards/rejected": -0.19326075911521912, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"eval_log_odds_chosen": 0.39069631695747375, |
|
"eval_log_odds_ratio": -0.5206953883171082, |
|
"eval_logits/chosen": -0.10498537868261337, |
|
"eval_logits/rejected": -0.136393204331398, |
|
"eval_logps/chosen": -1.5854017734527588, |
|
"eval_logps/rejected": -1.9058791399002075, |
|
"eval_loss": 1.5333527326583862, |
|
"eval_nll_loss": 1.4812833070755005, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.15854017436504364, |
|
"eval_rewards/margins": 0.032047729939222336, |
|
"eval_rewards/rejected": -0.19058789312839508, |
|
"eval_runtime": 0.9117, |
|
"eval_samples_per_second": 19.744, |
|
"eval_steps_per_second": 9.872, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 7.875, |
|
"learning_rate": 6.799999999999999e-06, |
|
"log_odds_chosen": 0.36618566513061523, |
|
"log_odds_ratio": -0.5316208004951477, |
|
"logits/chosen": -0.045309849083423615, |
|
"logits/rejected": -0.09037788212299347, |
|
"logps/chosen": -1.4718542098999023, |
|
"logps/rejected": -1.7647374868392944, |
|
"loss": 1.4427, |
|
"nll_loss": 1.3894941806793213, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.14718542993068695, |
|
"rewards/margins": 0.029288342222571373, |
|
"rewards/rejected": -0.17647376656532288, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"eval_log_odds_chosen": 0.41840454936027527, |
|
"eval_log_odds_ratio": -0.5097466111183167, |
|
"eval_logits/chosen": -0.10623180121183395, |
|
"eval_logits/rejected": -0.13603489100933075, |
|
"eval_logps/chosen": -1.4396543502807617, |
|
"eval_logps/rejected": -1.7711676359176636, |
|
"eval_loss": 1.4337514638900757, |
|
"eval_nll_loss": 1.3827767372131348, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.1439654380083084, |
|
"eval_rewards/margins": 0.03315134346485138, |
|
"eval_rewards/rejected": -0.1771167814731598, |
|
"eval_runtime": 0.9101, |
|
"eval_samples_per_second": 19.778, |
|
"eval_steps_per_second": 9.889, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"grad_norm": 7.78125, |
|
"learning_rate": 6.4e-06, |
|
"log_odds_chosen": 0.4042999744415283, |
|
"log_odds_ratio": -0.5167055726051331, |
|
"logits/chosen": -0.0431833378970623, |
|
"logits/rejected": -0.07951641082763672, |
|
"logps/chosen": -1.3549946546554565, |
|
"logps/rejected": -1.6684811115264893, |
|
"loss": 1.3493, |
|
"nll_loss": 1.2975877523422241, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.13549946248531342, |
|
"rewards/margins": 0.03134865313768387, |
|
"rewards/rejected": -0.1668480932712555, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"eval_log_odds_chosen": 0.44134366512298584, |
|
"eval_log_odds_ratio": -0.5008935332298279, |
|
"eval_logits/chosen": -0.104909747838974, |
|
"eval_logits/rejected": -0.13491111993789673, |
|
"eval_logps/chosen": -1.340553641319275, |
|
"eval_logps/rejected": -1.6810719966888428, |
|
"eval_loss": 1.3429497480392456, |
|
"eval_nll_loss": 1.292860507965088, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.13405534625053406, |
|
"eval_rewards/margins": 0.03405185043811798, |
|
"eval_rewards/rejected": -0.16810721158981323, |
|
"eval_runtime": 0.9119, |
|
"eval_samples_per_second": 19.738, |
|
"eval_steps_per_second": 9.869, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 6e-06, |
|
"log_odds_chosen": 0.4279482960700989, |
|
"log_odds_ratio": -0.5070147514343262, |
|
"logits/chosen": -0.034176260232925415, |
|
"logits/rejected": -0.07206660509109497, |
|
"logps/chosen": -1.2959173917770386, |
|
"logps/rejected": -1.6209981441497803, |
|
"loss": 1.2683, |
|
"nll_loss": 1.2175886631011963, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.12959173321723938, |
|
"rewards/margins": 0.03250807896256447, |
|
"rewards/rejected": -0.16209980845451355, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_log_odds_chosen": 0.46249422430992126, |
|
"eval_log_odds_ratio": -0.49292659759521484, |
|
"eval_logits/chosen": -0.11007735878229141, |
|
"eval_logits/rejected": -0.14086602628231049, |
|
"eval_logps/chosen": -1.2711644172668457, |
|
"eval_logps/rejected": -1.6204769611358643, |
|
"eval_loss": 1.2642947435379028, |
|
"eval_nll_loss": 1.215002179145813, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.12711645662784576, |
|
"eval_rewards/margins": 0.034931257367134094, |
|
"eval_rewards/rejected": -0.16204769909381866, |
|
"eval_runtime": 0.9131, |
|
"eval_samples_per_second": 19.712, |
|
"eval_steps_per_second": 9.856, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 5.6e-06, |
|
"log_odds_chosen": 0.4413740336894989, |
|
"log_odds_ratio": -0.5025829672813416, |
|
"logits/chosen": -0.026908639818429947, |
|
"logits/rejected": -0.06323603540658951, |
|
"logps/chosen": -1.2035048007965088, |
|
"logps/rejected": -1.5302585363388062, |
|
"loss": 1.1736, |
|
"nll_loss": 1.1233787536621094, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.12035048753023148, |
|
"rewards/margins": 0.03267538174986839, |
|
"rewards/rejected": -0.15302586555480957, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"eval_log_odds_chosen": 0.4787115454673767, |
|
"eval_log_odds_ratio": -0.4869447946548462, |
|
"eval_logits/chosen": -0.11934076249599457, |
|
"eval_logits/rejected": -0.15017718076705933, |
|
"eval_logps/chosen": -1.2190965414047241, |
|
"eval_logps/rejected": -1.5743210315704346, |
|
"eval_loss": 1.1919617652893066, |
|
"eval_nll_loss": 1.1432671546936035, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.12190967053174973, |
|
"eval_rewards/margins": 0.03552243858575821, |
|
"eval_rewards/rejected": -0.15743210911750793, |
|
"eval_runtime": 0.9096, |
|
"eval_samples_per_second": 19.79, |
|
"eval_steps_per_second": 9.895, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"grad_norm": 6.96875, |
|
"learning_rate": 5.2e-06, |
|
"log_odds_chosen": 0.46652132272720337, |
|
"log_odds_ratio": -0.4945680797100067, |
|
"logits/chosen": -0.0492391437292099, |
|
"logits/rejected": -0.0838259607553482, |
|
"logps/chosen": -1.1840012073516846, |
|
"logps/rejected": -1.525024652481079, |
|
"loss": 1.1212, |
|
"nll_loss": 1.0717414617538452, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11840011179447174, |
|
"rewards/margins": 0.03410235792398453, |
|
"rewards/rejected": -0.15250247716903687, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"eval_log_odds_chosen": 0.4977823495864868, |
|
"eval_log_odds_ratio": -0.4801054000854492, |
|
"eval_logits/chosen": -0.12862297892570496, |
|
"eval_logits/rejected": -0.16006678342819214, |
|
"eval_logps/chosen": -1.1753649711608887, |
|
"eval_logps/rejected": -1.5391558408737183, |
|
"eval_loss": 1.1233677864074707, |
|
"eval_nll_loss": 1.0753573179244995, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.1175365075469017, |
|
"eval_rewards/margins": 0.03637908399105072, |
|
"eval_rewards/rejected": -0.15391558408737183, |
|
"eval_runtime": 0.9109, |
|
"eval_samples_per_second": 19.76, |
|
"eval_steps_per_second": 9.88, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"grad_norm": 6.84375, |
|
"learning_rate": 4.8e-06, |
|
"log_odds_chosen": 0.4919642210006714, |
|
"log_odds_ratio": -0.48277872800827026, |
|
"logits/chosen": -0.05812246352434158, |
|
"logits/rejected": -0.0962948203086853, |
|
"logps/chosen": -1.1183116436004639, |
|
"logps/rejected": -1.4710171222686768, |
|
"loss": 1.0518, |
|
"nll_loss": 1.0035254955291748, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.11183115839958191, |
|
"rewards/margins": 0.03527054935693741, |
|
"rewards/rejected": -0.14710170030593872, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 3.2, |
|
"eval_log_odds_chosen": 0.5149489045143127, |
|
"eval_log_odds_ratio": -0.47412246465682983, |
|
"eval_logits/chosen": -0.14218762516975403, |
|
"eval_logits/rejected": -0.17368356883525848, |
|
"eval_logps/chosen": -1.1381663084030151, |
|
"eval_logps/rejected": -1.5092874765396118, |
|
"eval_loss": 1.0610299110412598, |
|
"eval_nll_loss": 1.013617753982544, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.11381663382053375, |
|
"eval_rewards/margins": 0.03711211308836937, |
|
"eval_rewards/rejected": -0.15092875063419342, |
|
"eval_runtime": 0.9136, |
|
"eval_samples_per_second": 19.703, |
|
"eval_steps_per_second": 9.852, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"grad_norm": 6.71875, |
|
"learning_rate": 4.4e-06, |
|
"log_odds_chosen": 0.5135414004325867, |
|
"log_odds_ratio": -0.47593823075294495, |
|
"logits/chosen": -0.06834974884986877, |
|
"logits/rejected": -0.11046632379293442, |
|
"logps/chosen": -1.0532824993133545, |
|
"logps/rejected": -1.4117248058319092, |
|
"loss": 0.9805, |
|
"nll_loss": 0.9329336285591125, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1053282618522644, |
|
"rewards/margins": 0.035844214260578156, |
|
"rewards/rejected": -0.14117246866226196, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.6, |
|
"eval_log_odds_chosen": 0.5320238471031189, |
|
"eval_log_odds_ratio": -0.4681590497493744, |
|
"eval_logits/chosen": -0.1644686907529831, |
|
"eval_logits/rejected": -0.19686466455459595, |
|
"eval_logps/chosen": -1.104932427406311, |
|
"eval_logps/rejected": -1.4836636781692505, |
|
"eval_loss": 1.0012433528900146, |
|
"eval_nll_loss": 0.9544275999069214, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.1104932650923729, |
|
"eval_rewards/margins": 0.03787311539053917, |
|
"eval_rewards/rejected": -0.14836637675762177, |
|
"eval_runtime": 0.9099, |
|
"eval_samples_per_second": 19.783, |
|
"eval_steps_per_second": 9.892, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 6.75, |
|
"learning_rate": 4e-06, |
|
"log_odds_chosen": 0.5470705032348633, |
|
"log_odds_ratio": -0.4658868908882141, |
|
"logits/chosen": -0.09963471442461014, |
|
"logits/rejected": -0.13952209055423737, |
|
"logps/chosen": -1.0679322481155396, |
|
"logps/rejected": -1.4557496309280396, |
|
"loss": 0.9299, |
|
"nll_loss": 0.8832955956459045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10679321736097336, |
|
"rewards/margins": 0.038781747221946716, |
|
"rewards/rejected": -0.14557495713233948, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_log_odds_chosen": 0.547681450843811, |
|
"eval_log_odds_ratio": -0.46276018023490906, |
|
"eval_logits/chosen": -0.1875133067369461, |
|
"eval_logits/rejected": -0.22008682787418365, |
|
"eval_logps/chosen": -1.07937490940094, |
|
"eval_logps/rejected": -1.4652737379074097, |
|
"eval_loss": 0.9495540261268616, |
|
"eval_nll_loss": 0.9032779335975647, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10793750733137131, |
|
"eval_rewards/margins": 0.03858988359570503, |
|
"eval_rewards/rejected": -0.14652739465236664, |
|
"eval_runtime": 0.9118, |
|
"eval_samples_per_second": 19.742, |
|
"eval_steps_per_second": 9.871, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 3.6e-06, |
|
"log_odds_chosen": 0.5115205645561218, |
|
"log_odds_ratio": -0.4781632423400879, |
|
"logits/chosen": -0.11988667398691177, |
|
"logits/rejected": -0.15675179660320282, |
|
"logps/chosen": -1.0269582271575928, |
|
"logps/rejected": -1.3790360689163208, |
|
"loss": 0.8761, |
|
"nll_loss": 0.8282526135444641, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.10269583761692047, |
|
"rewards/margins": 0.03520777449011803, |
|
"rewards/rejected": -0.1379036009311676, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.4, |
|
"eval_log_odds_chosen": 0.560771107673645, |
|
"eval_log_odds_ratio": -0.45837968587875366, |
|
"eval_logits/chosen": -0.21046772599220276, |
|
"eval_logits/rejected": -0.2431277632713318, |
|
"eval_logps/chosen": -1.0591222047805786, |
|
"eval_logps/rejected": -1.4509668350219727, |
|
"eval_loss": 0.9070050120353699, |
|
"eval_nll_loss": 0.8611669540405273, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10591220110654831, |
|
"eval_rewards/margins": 0.03918447345495224, |
|
"eval_rewards/rejected": -0.14509668946266174, |
|
"eval_runtime": 0.9119, |
|
"eval_samples_per_second": 19.739, |
|
"eval_steps_per_second": 9.869, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"grad_norm": 6.5625, |
|
"learning_rate": 3.2e-06, |
|
"log_odds_chosen": 0.6133227944374084, |
|
"log_odds_ratio": -0.4412252902984619, |
|
"logits/chosen": -0.14904728531837463, |
|
"logits/rejected": -0.1963028907775879, |
|
"logps/chosen": -0.9898001551628113, |
|
"logps/rejected": -1.4105660915374756, |
|
"loss": 0.8337, |
|
"nll_loss": 0.7895629405975342, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09898000955581665, |
|
"rewards/margins": 0.04207659140229225, |
|
"rewards/rejected": -0.1410566121339798, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 4.8, |
|
"eval_log_odds_chosen": 0.566861093044281, |
|
"eval_log_odds_ratio": -0.45656245946884155, |
|
"eval_logits/chosen": -0.22315236926078796, |
|
"eval_logits/rejected": -0.2561546862125397, |
|
"eval_logps/chosen": -1.0492280721664429, |
|
"eval_logps/rejected": -1.443569302558899, |
|
"eval_loss": 0.8863641023635864, |
|
"eval_nll_loss": 0.8407078981399536, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.1049228087067604, |
|
"eval_rewards/margins": 0.03943413123488426, |
|
"eval_rewards/rejected": -0.14435693621635437, |
|
"eval_runtime": 0.9164, |
|
"eval_samples_per_second": 19.642, |
|
"eval_steps_per_second": 9.821, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"grad_norm": 6.5, |
|
"learning_rate": 2.8e-06, |
|
"log_odds_chosen": 0.49919161200523376, |
|
"log_odds_ratio": -0.4824044108390808, |
|
"logits/chosen": -0.15154853463172913, |
|
"logits/rejected": -0.18894340097904205, |
|
"logps/chosen": -1.0274688005447388, |
|
"logps/rejected": -1.3725156784057617, |
|
"loss": 0.7975, |
|
"nll_loss": 0.7492961883544922, |
|
"rewards/accuracies": 0.953125, |
|
"rewards/chosen": -0.1027468740940094, |
|
"rewards/margins": 0.034504685550928116, |
|
"rewards/rejected": -0.13725155591964722, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 5.2, |
|
"eval_log_odds_chosen": 0.57403564453125, |
|
"eval_log_odds_ratio": -0.45407184958457947, |
|
"eval_logits/chosen": -0.23968791961669922, |
|
"eval_logits/rejected": -0.2726818025112152, |
|
"eval_logps/chosen": -1.0405869483947754, |
|
"eval_logps/rejected": -1.4385521411895752, |
|
"eval_loss": 0.8664105534553528, |
|
"eval_nll_loss": 0.8210033774375916, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10405868291854858, |
|
"eval_rewards/margins": 0.03979651629924774, |
|
"eval_rewards/rejected": -0.14385519921779633, |
|
"eval_runtime": 0.9125, |
|
"eval_samples_per_second": 19.726, |
|
"eval_steps_per_second": 9.863, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"grad_norm": 6.40625, |
|
"learning_rate": 2.4e-06, |
|
"log_odds_chosen": 0.6314184665679932, |
|
"log_odds_ratio": -0.4354270100593567, |
|
"logits/chosen": -0.1857105791568756, |
|
"logits/rejected": -0.22656874358654022, |
|
"logps/chosen": -0.9448862075805664, |
|
"logps/rejected": -1.3670051097869873, |
|
"loss": 0.788, |
|
"nll_loss": 0.7444556951522827, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09448862820863724, |
|
"rewards/margins": 0.04221189767122269, |
|
"rewards/rejected": -0.13670052587985992, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 5.6, |
|
"eval_log_odds_chosen": 0.5794126987457275, |
|
"eval_log_odds_ratio": -0.4523561894893646, |
|
"eval_logits/chosen": -0.2507224678993225, |
|
"eval_logits/rejected": -0.2837482690811157, |
|
"eval_logps/chosen": -1.0325850248336792, |
|
"eval_logps/rejected": -1.4329302310943604, |
|
"eval_loss": 0.8492475152015686, |
|
"eval_nll_loss": 0.8040118217468262, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10325851291418076, |
|
"eval_rewards/margins": 0.04003452509641647, |
|
"eval_rewards/rejected": -0.14329302310943604, |
|
"eval_runtime": 0.9113, |
|
"eval_samples_per_second": 19.752, |
|
"eval_steps_per_second": 9.876, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 6.25, |
|
"learning_rate": 2e-06, |
|
"log_odds_chosen": 0.5837588906288147, |
|
"log_odds_ratio": -0.45432496070861816, |
|
"logits/chosen": -0.18676218390464783, |
|
"logits/rejected": -0.23436766862869263, |
|
"logps/chosen": -1.0190256834030151, |
|
"logps/rejected": -1.4252986907958984, |
|
"loss": 0.78, |
|
"nll_loss": 0.7345477342605591, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.1019025593996048, |
|
"rewards/margins": 0.040627315640449524, |
|
"rewards/rejected": -0.14252987504005432, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_log_odds_chosen": 0.5839331150054932, |
|
"eval_log_odds_ratio": -0.45080792903900146, |
|
"eval_logits/chosen": -0.26143890619277954, |
|
"eval_logits/rejected": -0.29441285133361816, |
|
"eval_logps/chosen": -1.0263959169387817, |
|
"eval_logps/rejected": -1.4286550283432007, |
|
"eval_loss": 0.8333800435066223, |
|
"eval_nll_loss": 0.7882992625236511, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10263960063457489, |
|
"eval_rewards/margins": 0.040225885808467865, |
|
"eval_rewards/rejected": -0.14286547899246216, |
|
"eval_runtime": 0.9128, |
|
"eval_samples_per_second": 19.72, |
|
"eval_steps_per_second": 9.86, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"grad_norm": 6.25, |
|
"learning_rate": 1.6e-06, |
|
"log_odds_chosen": 0.6216727495193481, |
|
"log_odds_ratio": -0.4399999976158142, |
|
"logits/chosen": -0.19661211967468262, |
|
"logits/rejected": -0.24193710088729858, |
|
"logps/chosen": -0.949080228805542, |
|
"logps/rejected": -1.3682817220687866, |
|
"loss": 0.7395, |
|
"nll_loss": 0.6955283880233765, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09490802884101868, |
|
"rewards/margins": 0.04192016273736954, |
|
"rewards/rejected": -0.13682818412780762, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 6.4, |
|
"eval_log_odds_chosen": 0.5865243673324585, |
|
"eval_log_odds_ratio": -0.44995343685150146, |
|
"eval_logits/chosen": -0.2722480595111847, |
|
"eval_logits/rejected": -0.30540305376052856, |
|
"eval_logps/chosen": -1.0213567018508911, |
|
"eval_logps/rejected": -1.4244111776351929, |
|
"eval_loss": 0.821089506149292, |
|
"eval_nll_loss": 0.7760941386222839, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10213566571474075, |
|
"eval_rewards/margins": 0.040305450558662415, |
|
"eval_rewards/rejected": -0.14244110882282257, |
|
"eval_runtime": 0.9109, |
|
"eval_samples_per_second": 19.76, |
|
"eval_steps_per_second": 9.88, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 1.2e-06, |
|
"log_odds_chosen": 0.5441011786460876, |
|
"log_odds_ratio": -0.4661071002483368, |
|
"logits/chosen": -0.20644214749336243, |
|
"logits/rejected": -0.2489599585533142, |
|
"logps/chosen": -1.002626657485962, |
|
"logps/rejected": -1.3740018606185913, |
|
"loss": 0.7446, |
|
"nll_loss": 0.6980130076408386, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -0.10026266425848007, |
|
"rewards/margins": 0.037137530744075775, |
|
"rewards/rejected": -0.13740019500255585, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 6.8, |
|
"eval_log_odds_chosen": 0.5888115763664246, |
|
"eval_log_odds_ratio": -0.44922754168510437, |
|
"eval_logits/chosen": -0.2722025513648987, |
|
"eval_logits/rejected": -0.30542224645614624, |
|
"eval_logps/chosen": -1.0186693668365479, |
|
"eval_logps/rejected": -1.4229329824447632, |
|
"eval_loss": 0.8164036870002747, |
|
"eval_nll_loss": 0.7714808583259583, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10186693072319031, |
|
"eval_rewards/margins": 0.040426358580589294, |
|
"eval_rewards/rejected": -0.1422932893037796, |
|
"eval_runtime": 0.9099, |
|
"eval_samples_per_second": 19.782, |
|
"eval_steps_per_second": 9.891, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"grad_norm": 6.125, |
|
"learning_rate": 8e-07, |
|
"log_odds_chosen": 0.603852391242981, |
|
"log_odds_ratio": -0.44559258222579956, |
|
"logits/chosen": -0.21281519532203674, |
|
"logits/rejected": -0.2561994791030884, |
|
"logps/chosen": -0.9362643957138062, |
|
"logps/rejected": -1.3377454280853271, |
|
"loss": 0.7518, |
|
"nll_loss": 0.7072104811668396, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.09362644702196121, |
|
"rewards/margins": 0.040148116648197174, |
|
"rewards/rejected": -0.1337745636701584, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 7.2, |
|
"eval_log_odds_chosen": 0.5903448462486267, |
|
"eval_log_odds_ratio": -0.4487246870994568, |
|
"eval_logits/chosen": -0.2774750292301178, |
|
"eval_logits/rejected": -0.3105829358100891, |
|
"eval_logps/chosen": -1.0175344944000244, |
|
"eval_logps/rejected": -1.4226274490356445, |
|
"eval_loss": 0.812524139881134, |
|
"eval_nll_loss": 0.7676517963409424, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10175344347953796, |
|
"eval_rewards/margins": 0.040509287267923355, |
|
"eval_rewards/rejected": -0.1422627568244934, |
|
"eval_runtime": 0.9145, |
|
"eval_samples_per_second": 19.683, |
|
"eval_steps_per_second": 9.841, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 4e-07, |
|
"log_odds_chosen": 0.6179953217506409, |
|
"log_odds_ratio": -0.4413047730922699, |
|
"logits/chosen": -0.22949087619781494, |
|
"logits/rejected": -0.27671945095062256, |
|
"logps/chosen": -0.9877333045005798, |
|
"logps/rejected": -1.4103630781173706, |
|
"loss": 0.7431, |
|
"nll_loss": 0.6990159749984741, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09877333790063858, |
|
"rewards/margins": 0.0422629676759243, |
|
"rewards/rejected": -0.14103631675243378, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 7.6, |
|
"eval_log_odds_chosen": 0.5911502838134766, |
|
"eval_log_odds_ratio": -0.44839149713516235, |
|
"eval_logits/chosen": -0.276792049407959, |
|
"eval_logits/rejected": -0.31038832664489746, |
|
"eval_logps/chosen": -1.0161813497543335, |
|
"eval_logps/rejected": -1.4216675758361816, |
|
"eval_loss": 0.8106683492660522, |
|
"eval_nll_loss": 0.7658291459083557, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10161812603473663, |
|
"eval_rewards/margins": 0.04054862633347511, |
|
"eval_rewards/rejected": -0.14216677844524384, |
|
"eval_runtime": 0.9137, |
|
"eval_samples_per_second": 19.701, |
|
"eval_steps_per_second": 9.85, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 0.578275740146637, |
|
"log_odds_ratio": -0.45558279752731323, |
|
"logits/chosen": -0.210488423705101, |
|
"logits/rejected": -0.24888469278812408, |
|
"logps/chosen": -0.9616943001747131, |
|
"logps/rejected": -1.3508220911026, |
|
"loss": 0.726, |
|
"nll_loss": 0.6804530620574951, |
|
"rewards/accuracies": 0.984375, |
|
"rewards/chosen": -0.09616944193840027, |
|
"rewards/margins": 0.038912780582904816, |
|
"rewards/rejected": -0.1350822150707245, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_log_odds_chosen": 0.5904660224914551, |
|
"eval_log_odds_ratio": -0.44868627190589905, |
|
"eval_logits/chosen": -0.27975308895111084, |
|
"eval_logits/rejected": -0.3132215440273285, |
|
"eval_logps/chosen": -1.0163097381591797, |
|
"eval_logps/rejected": -1.4213618040084839, |
|
"eval_loss": 0.8110137581825256, |
|
"eval_nll_loss": 0.7661450505256653, |
|
"eval_rewards/accuracies": 1.0, |
|
"eval_rewards/chosen": -0.10163097083568573, |
|
"eval_rewards/margins": 0.04050520807504654, |
|
"eval_rewards/rejected": -0.14213618636131287, |
|
"eval_runtime": 0.9114, |
|
"eval_samples_per_second": 19.75, |
|
"eval_steps_per_second": 9.875, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"step": 20, |
|
"total_flos": 0.0, |
|
"train_loss": 1.0276277005672454, |
|
"train_runtime": 261.791, |
|
"train_samples_per_second": 6.074, |
|
"train_steps_per_second": 0.076 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 20, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|