|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.986666666666667, |
|
"eval_steps": 500, |
|
"global_step": 315, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.047407407407407405, |
|
"grad_norm": 736.0, |
|
"learning_rate": 7.8125e-06, |
|
"log_odds_chosen": 4.987946510314941, |
|
"log_odds_ratio": -9.761848449707031, |
|
"logits/chosen": 138.64175415039062, |
|
"logits/rejected": 152.19424438476562, |
|
"logps/chosen": -20.546340942382812, |
|
"logps/rejected": -25.53423309326172, |
|
"loss": 392.2693, |
|
"nll_loss": 8.064610481262207, |
|
"rewards/accuracies": 0.543749988079071, |
|
"rewards/chosen": -10.273170471191406, |
|
"rewards/margins": 2.493946075439453, |
|
"rewards/rejected": -12.76711654663086, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.09481481481481481, |
|
"grad_norm": 478.0, |
|
"learning_rate": 1.5625e-05, |
|
"log_odds_chosen": 2.351091146469116, |
|
"log_odds_ratio": -7.1593170166015625, |
|
"logits/chosen": 130.83258056640625, |
|
"logits/rejected": 159.64907836914062, |
|
"logps/chosen": -15.241386413574219, |
|
"logps/rejected": -17.591449737548828, |
|
"loss": 350.4958, |
|
"nll_loss": 6.879847526550293, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -7.620693206787109, |
|
"rewards/margins": 1.175031065940857, |
|
"rewards/rejected": -8.795724868774414, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.14222222222222222, |
|
"grad_norm": 498.0, |
|
"learning_rate": 2.34375e-05, |
|
"log_odds_chosen": 4.327846527099609, |
|
"log_odds_ratio": -7.479238986968994, |
|
"logits/chosen": 120.69163513183594, |
|
"logits/rejected": 148.0057373046875, |
|
"logps/chosen": -19.320337295532227, |
|
"logps/rejected": -23.6463623046875, |
|
"loss": 346.1611, |
|
"nll_loss": 7.978721618652344, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -9.660168647766113, |
|
"rewards/margins": 2.163012981414795, |
|
"rewards/rejected": -11.82318115234375, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.18962962962962962, |
|
"grad_norm": 1896.0, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": -0.3415478467941284, |
|
"log_odds_ratio": -5.298593997955322, |
|
"logits/chosen": 143.34091186523438, |
|
"logits/rejected": 147.2091064453125, |
|
"logps/chosen": -12.515907287597656, |
|
"logps/rejected": -12.175848007202148, |
|
"loss": 266.0229, |
|
"nll_loss": 5.948061943054199, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -6.257953643798828, |
|
"rewards/margins": -0.1700296252965927, |
|
"rewards/rejected": -6.087924003601074, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.23703703703703705, |
|
"grad_norm": 324.0, |
|
"learning_rate": 3.90625e-05, |
|
"log_odds_chosen": 0.98066645860672, |
|
"log_odds_ratio": -1.3506591320037842, |
|
"logits/chosen": 161.93701171875, |
|
"logits/rejected": 170.9370880126953, |
|
"logps/chosen": -3.294827938079834, |
|
"logps/rejected": -4.26317024230957, |
|
"loss": 92.5719, |
|
"nll_loss": 2.4116456508636475, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -1.647413969039917, |
|
"rewards/margins": 0.4841710925102234, |
|
"rewards/rejected": -2.131585121154785, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.28444444444444444, |
|
"grad_norm": 532.0, |
|
"learning_rate": 4.6875e-05, |
|
"log_odds_chosen": 0.14513510465621948, |
|
"log_odds_ratio": -0.7996392250061035, |
|
"logits/chosen": 191.52481079101562, |
|
"logits/rejected": 213.43954467773438, |
|
"logps/chosen": -1.6295182704925537, |
|
"logps/rejected": -1.75119149684906, |
|
"loss": 71.4959, |
|
"nll_loss": 1.912672758102417, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.8147591352462769, |
|
"rewards/margins": 0.06083657220005989, |
|
"rewards/rejected": -0.87559574842453, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.33185185185185184, |
|
"grad_norm": 149.0, |
|
"learning_rate": 4.998613757348784e-05, |
|
"log_odds_chosen": -0.0024008960463106632, |
|
"log_odds_ratio": -0.854382336139679, |
|
"logits/chosen": 231.1159210205078, |
|
"logits/rejected": 226.5718994140625, |
|
"logps/chosen": -1.568902850151062, |
|
"logps/rejected": -1.5733020305633545, |
|
"loss": 65.1785, |
|
"nll_loss": 1.7701244354248047, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.784451425075531, |
|
"rewards/margins": 0.002199609996750951, |
|
"rewards/rejected": -0.7866510152816772, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.37925925925925924, |
|
"grad_norm": 110.0, |
|
"learning_rate": 4.990147841143462e-05, |
|
"log_odds_chosen": 0.23734720051288605, |
|
"log_odds_ratio": -0.6953937411308289, |
|
"logits/chosen": 233.0690460205078, |
|
"logits/rejected": 235.18359375, |
|
"logps/chosen": -1.3615589141845703, |
|
"logps/rejected": -1.551184892654419, |
|
"loss": 61.3129, |
|
"nll_loss": 1.6592628955841064, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -0.6807794570922852, |
|
"rewards/margins": 0.09481293708086014, |
|
"rewards/rejected": -0.7755924463272095, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4266666666666667, |
|
"grad_norm": 168.0, |
|
"learning_rate": 4.97401218720448e-05, |
|
"log_odds_chosen": 0.17986582219600677, |
|
"log_odds_ratio": -0.7720650434494019, |
|
"logits/chosen": 214.67892456054688, |
|
"logits/rejected": 212.75643920898438, |
|
"logps/chosen": -1.3822438716888428, |
|
"logps/rejected": -1.5297141075134277, |
|
"loss": 59.3438, |
|
"nll_loss": 1.6401255130767822, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6911219358444214, |
|
"rewards/margins": 0.07373511791229248, |
|
"rewards/rejected": -0.7648570537567139, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4740740740740741, |
|
"grad_norm": 102.5, |
|
"learning_rate": 4.9502564938797946e-05, |
|
"log_odds_chosen": 0.18993845582008362, |
|
"log_odds_ratio": -0.7323363423347473, |
|
"logits/chosen": 208.16702270507812, |
|
"logits/rejected": 208.33804321289062, |
|
"logps/chosen": -1.3402252197265625, |
|
"logps/rejected": -1.4932218790054321, |
|
"loss": 60.8339, |
|
"nll_loss": 1.674740195274353, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6701126098632812, |
|
"rewards/margins": 0.07649824768304825, |
|
"rewards/rejected": -0.7466109395027161, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.5214814814814814, |
|
"grad_norm": 69.5, |
|
"learning_rate": 4.918953929490768e-05, |
|
"log_odds_chosen": 0.1412346512079239, |
|
"log_odds_ratio": -0.7170370221138, |
|
"logits/chosen": 207.9961395263672, |
|
"logits/rejected": 205.17996215820312, |
|
"logps/chosen": -1.2495604753494263, |
|
"logps/rejected": -1.3624210357666016, |
|
"loss": 55.5208, |
|
"nll_loss": 1.5490686893463135, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.6247802376747131, |
|
"rewards/margins": 0.05643026903271675, |
|
"rewards/rejected": -0.6812105178833008, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5688888888888889, |
|
"grad_norm": 126.0, |
|
"learning_rate": 4.88020090697132e-05, |
|
"log_odds_chosen": 0.21612174808979034, |
|
"log_odds_ratio": -0.6790373921394348, |
|
"logits/chosen": 208.6972198486328, |
|
"logits/rejected": 204.21548461914062, |
|
"logps/chosen": -1.1511867046356201, |
|
"logps/rejected": -1.3068695068359375, |
|
"loss": 53.3904, |
|
"nll_loss": 1.4371321201324463, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -0.5755933523178101, |
|
"rewards/margins": 0.07784143090248108, |
|
"rewards/rejected": -0.6534347534179688, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6162962962962963, |
|
"grad_norm": 116.5, |
|
"learning_rate": 4.834116786912897e-05, |
|
"log_odds_chosen": 0.20034953951835632, |
|
"log_odds_ratio": -0.7185416221618652, |
|
"logits/chosen": 211.181884765625, |
|
"logits/rejected": 212.2796173095703, |
|
"logps/chosen": -1.2082265615463257, |
|
"logps/rejected": -1.3656466007232666, |
|
"loss": 52.9814, |
|
"nll_loss": 1.4586594104766846, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": -0.6041132807731628, |
|
"rewards/margins": 0.07870997488498688, |
|
"rewards/rejected": -0.6828233003616333, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6637037037037037, |
|
"grad_norm": 102.5, |
|
"learning_rate": 4.7808435099299045e-05, |
|
"log_odds_chosen": 0.2931309938430786, |
|
"log_odds_ratio": -0.6710721254348755, |
|
"logits/chosen": 210.8258514404297, |
|
"logits/rejected": 204.9437713623047, |
|
"logps/chosen": -1.1364883184432983, |
|
"logps/rejected": -1.3803962469100952, |
|
"loss": 51.9763, |
|
"nll_loss": 1.429614543914795, |
|
"rewards/accuracies": 0.581250011920929, |
|
"rewards/chosen": -0.5682441592216492, |
|
"rewards/margins": 0.12195394933223724, |
|
"rewards/rejected": -0.6901981234550476, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7111111111111111, |
|
"grad_norm": 73.5, |
|
"learning_rate": 4.720545159477922e-05, |
|
"log_odds_chosen": 0.30529800057411194, |
|
"log_odds_ratio": -0.6410656571388245, |
|
"logits/chosen": 210.42739868164062, |
|
"logits/rejected": 212.0102081298828, |
|
"logps/chosen": -1.080447793006897, |
|
"logps/rejected": -1.3063446283340454, |
|
"loss": 51.2694, |
|
"nll_loss": 1.4210776090621948, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5402238965034485, |
|
"rewards/margins": 0.11294851452112198, |
|
"rewards/rejected": -0.6531723141670227, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7585185185185185, |
|
"grad_norm": 109.5, |
|
"learning_rate": 4.653407456471222e-05, |
|
"log_odds_chosen": 0.2542671263217926, |
|
"log_odds_ratio": -0.6951149106025696, |
|
"logits/chosen": 214.9987030029297, |
|
"logits/rejected": 210.99972534179688, |
|
"logps/chosen": -1.1126171350479126, |
|
"logps/rejected": -1.2992708683013916, |
|
"loss": 49.9945, |
|
"nll_loss": 1.4084731340408325, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5563085675239563, |
|
"rewards/margins": 0.09332697093486786, |
|
"rewards/rejected": -0.6496354341506958, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.8059259259259259, |
|
"grad_norm": 92.5, |
|
"learning_rate": 4.579637187256222e-05, |
|
"log_odds_chosen": 0.2188234031200409, |
|
"log_odds_ratio": -0.6811344027519226, |
|
"logits/chosen": 210.42398071289062, |
|
"logits/rejected": 202.4885711669922, |
|
"logps/chosen": -1.0539259910583496, |
|
"logps/rejected": -1.2093901634216309, |
|
"loss": 49.2791, |
|
"nll_loss": 1.3479670286178589, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.5269629955291748, |
|
"rewards/margins": 0.07773206382989883, |
|
"rewards/rejected": -0.6046950817108154, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8533333333333334, |
|
"grad_norm": 68.5, |
|
"learning_rate": 4.499461566702685e-05, |
|
"log_odds_chosen": 0.2878134846687317, |
|
"log_odds_ratio": -0.6503546833992004, |
|
"logits/chosen": 206.28036499023438, |
|
"logits/rejected": 204.7410125732422, |
|
"logps/chosen": -1.037368893623352, |
|
"logps/rejected": -1.231547474861145, |
|
"loss": 52.0954, |
|
"nll_loss": 1.4560641050338745, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.518684446811676, |
|
"rewards/margins": 0.09708929806947708, |
|
"rewards/rejected": -0.6157737374305725, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9007407407407407, |
|
"grad_norm": 79.0, |
|
"learning_rate": 4.413127538374411e-05, |
|
"log_odds_chosen": 0.3599183261394501, |
|
"log_odds_ratio": -0.6029065847396851, |
|
"logits/chosen": 201.10952758789062, |
|
"logits/rejected": 197.03341674804688, |
|
"logps/chosen": -0.9574621319770813, |
|
"logps/rejected": -1.1941089630126953, |
|
"loss": 48.7124, |
|
"nll_loss": 1.2800534963607788, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.47873106598854065, |
|
"rewards/margins": 0.1183234453201294, |
|
"rewards/rejected": -0.5970544815063477, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.9481481481481482, |
|
"grad_norm": 107.5, |
|
"learning_rate": 4.320901013934887e-05, |
|
"log_odds_chosen": 0.1424800455570221, |
|
"log_odds_ratio": -0.7256409525871277, |
|
"logits/chosen": 203.09567260742188, |
|
"logits/rejected": 196.48220825195312, |
|
"logps/chosen": -1.0806185007095337, |
|
"logps/rejected": -1.2011009454727173, |
|
"loss": 49.3262, |
|
"nll_loss": 1.3956550359725952, |
|
"rewards/accuracies": 0.5562499761581421, |
|
"rewards/chosen": -0.5403092503547668, |
|
"rewards/margins": 0.0602412223815918, |
|
"rewards/rejected": -0.6005504727363586, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.9955555555555555, |
|
"grad_norm": 101.0, |
|
"learning_rate": 4.223066054130568e-05, |
|
"log_odds_chosen": 0.29610657691955566, |
|
"log_odds_ratio": -0.6316549181938171, |
|
"logits/chosen": 204.3717041015625, |
|
"logits/rejected": 200.5919189453125, |
|
"logps/chosen": -1.006296992301941, |
|
"logps/rejected": -1.2125999927520752, |
|
"loss": 48.1369, |
|
"nll_loss": 1.312403678894043, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.5031484961509705, |
|
"rewards/margins": 0.10315157473087311, |
|
"rewards/rejected": -0.6062999963760376, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.0429629629629629, |
|
"grad_norm": 93.0, |
|
"learning_rate": 4.1199239938743797e-05, |
|
"log_odds_chosen": 0.5429099798202515, |
|
"log_odds_ratio": -0.5789119005203247, |
|
"logits/chosen": 202.58053588867188, |
|
"logits/rejected": 195.58265686035156, |
|
"logps/chosen": -0.8625534772872925, |
|
"logps/rejected": -1.2194641828536987, |
|
"loss": 42.5959, |
|
"nll_loss": 1.151334285736084, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.43127673864364624, |
|
"rewards/margins": 0.17845533788204193, |
|
"rewards/rejected": -0.6097320914268494, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0903703703703704, |
|
"grad_norm": 67.5, |
|
"learning_rate": 4.0117925141242174e-05, |
|
"log_odds_chosen": 0.7351988554000854, |
|
"log_odds_ratio": -0.494173139333725, |
|
"logits/chosen": 198.6049346923828, |
|
"logits/rejected": 193.7996063232422, |
|
"logps/chosen": -0.8116765022277832, |
|
"logps/rejected": -1.2775243520736694, |
|
"loss": 40.7484, |
|
"nll_loss": 1.101711392402649, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.4058382511138916, |
|
"rewards/margins": 0.2329239547252655, |
|
"rewards/rejected": -0.6387621760368347, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.1377777777777778, |
|
"grad_norm": 49.75, |
|
"learning_rate": 3.899004663415084e-05, |
|
"log_odds_chosen": 0.7911133170127869, |
|
"log_odds_ratio": -0.4592220187187195, |
|
"logits/chosen": 190.59217834472656, |
|
"logits/rejected": 190.78323364257812, |
|
"logps/chosen": -0.7666565775871277, |
|
"logps/rejected": -1.2360306978225708, |
|
"loss": 40.7742, |
|
"nll_loss": 1.0949238538742065, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.38332828879356384, |
|
"rewards/margins": 0.23468704521656036, |
|
"rewards/rejected": -0.6180153489112854, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.1851851851851851, |
|
"grad_norm": 46.25, |
|
"learning_rate": 3.781907832058587e-05, |
|
"log_odds_chosen": 0.6842738389968872, |
|
"log_odds_ratio": -0.5187292695045471, |
|
"logits/chosen": 190.21575927734375, |
|
"logits/rejected": 186.94387817382812, |
|
"logps/chosen": -0.8428317904472351, |
|
"logps/rejected": -1.2817766666412354, |
|
"loss": 38.4508, |
|
"nll_loss": 1.0788408517837524, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.42141589522361755, |
|
"rewards/margins": 0.21947243809700012, |
|
"rewards/rejected": -0.6408883333206177, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.2325925925925927, |
|
"grad_norm": 95.0, |
|
"learning_rate": 3.660862682169282e-05, |
|
"log_odds_chosen": 0.5702214241027832, |
|
"log_odds_ratio": -0.5559878349304199, |
|
"logits/chosen": 193.6043701171875, |
|
"logits/rejected": 190.05230712890625, |
|
"logps/chosen": -0.834156334400177, |
|
"logps/rejected": -1.1642882823944092, |
|
"loss": 40.1159, |
|
"nll_loss": 1.142547607421875, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4170781672000885, |
|
"rewards/margins": 0.16506603360176086, |
|
"rewards/rejected": -0.5821441411972046, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"grad_norm": 97.5, |
|
"learning_rate": 3.5362420368134356e-05, |
|
"log_odds_chosen": 0.677783191204071, |
|
"log_odds_ratio": -0.507438063621521, |
|
"logits/chosen": 187.4692840576172, |
|
"logits/rejected": 190.07284545898438, |
|
"logps/chosen": -0.7681523561477661, |
|
"logps/rejected": -1.1711633205413818, |
|
"loss": 40.1397, |
|
"nll_loss": 1.0826233625411987, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.38407617807388306, |
|
"rewards/margins": 0.20150542259216309, |
|
"rewards/rejected": -0.5855816602706909, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.3274074074074074, |
|
"grad_norm": 63.25, |
|
"learning_rate": 3.408429731701635e-05, |
|
"log_odds_chosen": 0.6302875280380249, |
|
"log_odds_ratio": -0.5435723066329956, |
|
"logits/chosen": 185.64149475097656, |
|
"logits/rejected": 188.64393615722656, |
|
"logps/chosen": -0.8573511242866516, |
|
"logps/rejected": -1.247619390487671, |
|
"loss": 41.5044, |
|
"nll_loss": 1.1718343496322632, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4286755621433258, |
|
"rewards/margins": 0.19513416290283203, |
|
"rewards/rejected": -0.6238096952438354, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.374814814814815, |
|
"grad_norm": 52.0, |
|
"learning_rate": 3.2778194329621104e-05, |
|
"log_odds_chosen": 0.6812846064567566, |
|
"log_odds_ratio": -0.49873781204223633, |
|
"logits/chosen": 186.69728088378906, |
|
"logits/rejected": 182.98965454101562, |
|
"logps/chosen": -0.8324145078659058, |
|
"logps/rejected": -1.271837592124939, |
|
"loss": 40.1295, |
|
"nll_loss": 1.1053495407104492, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -0.4162072539329529, |
|
"rewards/margins": 0.21971149742603302, |
|
"rewards/rejected": -0.6359187960624695, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.4222222222222223, |
|
"grad_norm": 59.0, |
|
"learning_rate": 3.144813424636031e-05, |
|
"log_odds_chosen": 0.6132742762565613, |
|
"log_odds_ratio": -0.5470070838928223, |
|
"logits/chosen": 181.83352661132812, |
|
"logits/rejected": 184.06472778320312, |
|
"logps/chosen": -0.7881155610084534, |
|
"logps/rejected": -1.150750994682312, |
|
"loss": 39.5806, |
|
"nll_loss": 1.0920333862304688, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.3940577805042267, |
|
"rewards/margins": 0.18131770193576813, |
|
"rewards/rejected": -0.575375497341156, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.4696296296296296, |
|
"grad_norm": 51.5, |
|
"learning_rate": 3.0098213696293542e-05, |
|
"log_odds_chosen": 0.5554690361022949, |
|
"log_odds_ratio": -0.5514319539070129, |
|
"logits/chosen": 189.21676635742188, |
|
"logits/rejected": 184.8980255126953, |
|
"logps/chosen": -0.8024199604988098, |
|
"logps/rejected": -1.117629051208496, |
|
"loss": 40.469, |
|
"nll_loss": 1.1150840520858765, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4012099802494049, |
|
"rewards/margins": 0.15760457515716553, |
|
"rewards/rejected": -0.558814525604248, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.5170370370370372, |
|
"grad_norm": 51.25, |
|
"learning_rate": 2.8732590479375165e-05, |
|
"log_odds_chosen": 0.6340516805648804, |
|
"log_odds_ratio": -0.5135641694068909, |
|
"logits/chosen": 191.99452209472656, |
|
"logits/rejected": 192.84353637695312, |
|
"logps/chosen": -0.8782358169555664, |
|
"logps/rejected": -1.275033950805664, |
|
"loss": 41.499, |
|
"nll_loss": 1.1827033758163452, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.4391179084777832, |
|
"rewards/margins": 0.19839909672737122, |
|
"rewards/rejected": -0.637516975402832, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.5644444444444443, |
|
"grad_norm": 45.75, |
|
"learning_rate": 2.7355470760292956e-05, |
|
"log_odds_chosen": 0.7264343500137329, |
|
"log_odds_ratio": -0.49429789185523987, |
|
"logits/chosen": 190.41761779785156, |
|
"logits/rejected": 196.09129333496094, |
|
"logps/chosen": -0.8138422966003418, |
|
"logps/rejected": -1.2546621561050415, |
|
"loss": 40.024, |
|
"nll_loss": 1.0924456119537354, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4069211483001709, |
|
"rewards/margins": 0.22040989995002747, |
|
"rewards/rejected": -0.6273310780525208, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.6118518518518519, |
|
"grad_norm": 79.0, |
|
"learning_rate": 2.597109611334169e-05, |
|
"log_odds_chosen": 0.7704585790634155, |
|
"log_odds_ratio": -0.4867461621761322, |
|
"logits/chosen": 189.745361328125, |
|
"logits/rejected": 187.7951202392578, |
|
"logps/chosen": -0.7925730347633362, |
|
"logps/rejected": -1.269953966140747, |
|
"loss": 39.7815, |
|
"nll_loss": 1.0841796398162842, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.3962865173816681, |
|
"rewards/margins": 0.23869049549102783, |
|
"rewards/rejected": -0.6349769830703735, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.6592592592592592, |
|
"grad_norm": 92.5, |
|
"learning_rate": 2.458373045823404e-05, |
|
"log_odds_chosen": 0.6176282167434692, |
|
"log_odds_ratio": -0.521507203578949, |
|
"logits/chosen": 191.61280822753906, |
|
"logits/rejected": 187.45404052734375, |
|
"logps/chosen": -0.7949572801589966, |
|
"logps/rejected": -1.1731722354888916, |
|
"loss": 39.3331, |
|
"nll_loss": 1.114823341369629, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -0.3974786400794983, |
|
"rewards/margins": 0.18910741806030273, |
|
"rewards/rejected": -0.5865861177444458, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.7066666666666666, |
|
"grad_norm": 65.5, |
|
"learning_rate": 2.3197646927086697e-05, |
|
"log_odds_chosen": 0.4565175473690033, |
|
"log_odds_ratio": -0.5909174680709839, |
|
"logits/chosen": 188.08010864257812, |
|
"logits/rejected": 186.85438537597656, |
|
"logps/chosen": -0.8470600247383118, |
|
"logps/rejected": -1.1192262172698975, |
|
"loss": 40.9749, |
|
"nll_loss": 1.1627644300460815, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.4235300123691559, |
|
"rewards/margins": 0.13608308136463165, |
|
"rewards/rejected": -0.5596131086349487, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.7540740740740741, |
|
"grad_norm": 43.0, |
|
"learning_rate": 2.1817114703032176e-05, |
|
"log_odds_chosen": 0.6112794876098633, |
|
"log_odds_ratio": -0.511337399482727, |
|
"logits/chosen": 188.8147735595703, |
|
"logits/rejected": 188.24966430664062, |
|
"logps/chosen": -0.8174026608467102, |
|
"logps/rejected": -1.194439172744751, |
|
"loss": 39.6962, |
|
"nll_loss": 1.0865495204925537, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.4087013304233551, |
|
"rewards/margins": 0.18851831555366516, |
|
"rewards/rejected": -0.5972195863723755, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.8014814814814815, |
|
"grad_norm": 49.0, |
|
"learning_rate": 2.0446385870993467e-05, |
|
"log_odds_chosen": 0.7244794964790344, |
|
"log_odds_ratio": -0.4969249665737152, |
|
"logits/chosen": 192.61624145507812, |
|
"logits/rejected": 189.07421875, |
|
"logps/chosen": -0.8428407907485962, |
|
"logps/rejected": -1.2970322370529175, |
|
"loss": 41.4332, |
|
"nll_loss": 1.1237175464630127, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.4214203953742981, |
|
"rewards/margins": 0.22709576785564423, |
|
"rewards/rejected": -0.6485161185264587, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.8488888888888888, |
|
"grad_norm": 48.25, |
|
"learning_rate": 1.9089682321121834e-05, |
|
"log_odds_chosen": 0.7443265914916992, |
|
"log_odds_ratio": -0.4911392629146576, |
|
"logits/chosen": 192.0789794921875, |
|
"logits/rejected": 185.12327575683594, |
|
"logps/chosen": -0.7979795932769775, |
|
"logps/rejected": -1.2549121379852295, |
|
"loss": 39.3639, |
|
"nll_loss": 1.074517846107483, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.39898979663848877, |
|
"rewards/margins": 0.2284662276506424, |
|
"rewards/rejected": -0.6274560689926147, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 1.8962962962962964, |
|
"grad_norm": 42.5, |
|
"learning_rate": 1.775118274523545e-05, |
|
"log_odds_chosen": 0.5745521187782288, |
|
"log_odds_ratio": -0.5652587413787842, |
|
"logits/chosen": 193.84976196289062, |
|
"logits/rejected": 192.41378784179688, |
|
"logps/chosen": -0.8293254971504211, |
|
"logps/rejected": -1.1743533611297607, |
|
"loss": 41.819, |
|
"nll_loss": 1.1135209798812866, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.41466274857521057, |
|
"rewards/margins": 0.17251388728618622, |
|
"rewards/rejected": -0.5871766805648804, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.9437037037037037, |
|
"grad_norm": 40.25, |
|
"learning_rate": 1.643500976631037e-05, |
|
"log_odds_chosen": 0.6025683879852295, |
|
"log_odds_ratio": -0.5356005430221558, |
|
"logits/chosen": 196.73837280273438, |
|
"logits/rejected": 188.22824096679688, |
|
"logps/chosen": -0.8113398551940918, |
|
"logps/rejected": -1.197618007659912, |
|
"loss": 37.6443, |
|
"nll_loss": 1.048313856124878, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.4056699275970459, |
|
"rewards/margins": 0.19313909113407135, |
|
"rewards/rejected": -0.598809003829956, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 1.991111111111111, |
|
"grad_norm": 57.0, |
|
"learning_rate": 1.514521724066537e-05, |
|
"log_odds_chosen": 0.5694688558578491, |
|
"log_odds_ratio": -0.5482410788536072, |
|
"logits/chosen": 187.6742401123047, |
|
"logits/rejected": 190.1154022216797, |
|
"logps/chosen": -0.7763108015060425, |
|
"logps/rejected": -1.1007602214813232, |
|
"loss": 37.9486, |
|
"nll_loss": 1.0500026941299438, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.38815540075302124, |
|
"rewards/margins": 0.16222473978996277, |
|
"rewards/rejected": -0.5503801107406616, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.0385185185185186, |
|
"grad_norm": 40.0, |
|
"learning_rate": 1.3885777771950348e-05, |
|
"log_odds_chosen": 0.9869217872619629, |
|
"log_odds_ratio": -0.41741856932640076, |
|
"logits/chosen": 183.7540740966797, |
|
"logits/rejected": 182.7440185546875, |
|
"logps/chosen": -0.681525707244873, |
|
"logps/rejected": -1.2115617990493774, |
|
"loss": 34.0815, |
|
"nll_loss": 0.9338465929031372, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.3407628536224365, |
|
"rewards/margins": 0.26501795649528503, |
|
"rewards/rejected": -0.6057808995246887, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 2.0859259259259257, |
|
"grad_norm": 86.0, |
|
"learning_rate": 1.2660570475395683e-05, |
|
"log_odds_chosen": 1.3432199954986572, |
|
"log_odds_ratio": -0.3477163016796112, |
|
"logits/chosen": 171.4412078857422, |
|
"logits/rejected": 176.1414337158203, |
|
"logps/chosen": -0.5886165499687195, |
|
"logps/rejected": -1.315865397453308, |
|
"loss": 31.0776, |
|
"nll_loss": 0.883051872253418, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.29430827498435974, |
|
"rewards/margins": 0.3636243939399719, |
|
"rewards/rejected": -0.657932698726654, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.1333333333333333, |
|
"grad_norm": 47.25, |
|
"learning_rate": 1.1473369030008974e-05, |
|
"log_odds_chosen": 1.0965574979782104, |
|
"log_odds_ratio": -0.3985925316810608, |
|
"logits/chosen": 178.91383361816406, |
|
"logits/rejected": 177.06851196289062, |
|
"logps/chosen": -0.6075170040130615, |
|
"logps/rejected": -1.1777050495147705, |
|
"loss": 32.3742, |
|
"nll_loss": 0.8941748738288879, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.30375850200653076, |
|
"rewards/margins": 0.2850940525531769, |
|
"rewards/rejected": -0.5888525247573853, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 2.180740740740741, |
|
"grad_norm": 51.0, |
|
"learning_rate": 1.0327830055518842e-05, |
|
"log_odds_chosen": 1.2348709106445312, |
|
"log_odds_ratio": -0.33719533681869507, |
|
"logits/chosen": 176.9068145751953, |
|
"logits/rejected": 177.44601440429688, |
|
"logps/chosen": -0.5915892720222473, |
|
"logps/rejected": -1.2602530717849731, |
|
"loss": 31.0203, |
|
"nll_loss": 0.8626457452774048, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.29579463601112366, |
|
"rewards/margins": 0.3343318998813629, |
|
"rewards/rejected": -0.6301265358924866, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.228148148148148, |
|
"grad_norm": 44.0, |
|
"learning_rate": 9.227481849865235e-06, |
|
"log_odds_chosen": 1.0602965354919434, |
|
"log_odds_ratio": -0.41205477714538574, |
|
"logits/chosen": 174.06771850585938, |
|
"logits/rejected": 178.76084899902344, |
|
"logps/chosen": -0.6463479995727539, |
|
"logps/rejected": -1.174586534500122, |
|
"loss": 32.0119, |
|
"nll_loss": 0.8945677876472473, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.32317399978637695, |
|
"rewards/margins": 0.2641192674636841, |
|
"rewards/rejected": -0.587293267250061, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 2.2755555555555556, |
|
"grad_norm": 47.25, |
|
"learning_rate": 8.175713521924978e-06, |
|
"log_odds_chosen": 1.286481499671936, |
|
"log_odds_ratio": -0.3498726785182953, |
|
"logits/chosen": 171.71487426757812, |
|
"logits/rejected": 176.8938751220703, |
|
"logps/chosen": -0.5816351771354675, |
|
"logps/rejected": -1.2401165962219238, |
|
"loss": 31.3421, |
|
"nll_loss": 0.8873510360717773, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.29081758856773376, |
|
"rewards/margins": 0.32924067974090576, |
|
"rewards/rejected": -0.6200582981109619, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.322962962962963, |
|
"grad_norm": 40.0, |
|
"learning_rate": 7.1757645529443665e-06, |
|
"log_odds_chosen": 1.2826303243637085, |
|
"log_odds_ratio": -0.35086172819137573, |
|
"logits/chosen": 172.84579467773438, |
|
"logits/rejected": 174.81533813476562, |
|
"logps/chosen": -0.6082225441932678, |
|
"logps/rejected": -1.2833037376403809, |
|
"loss": 31.0705, |
|
"nll_loss": 0.8847354054450989, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.3041112720966339, |
|
"rewards/margins": 0.3375406265258789, |
|
"rewards/rejected": -0.6416518688201904, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 2.3703703703703702, |
|
"grad_norm": 39.5, |
|
"learning_rate": 6.230714818829733e-06, |
|
"log_odds_chosen": 1.3057904243469238, |
|
"log_odds_ratio": -0.3325490355491638, |
|
"logits/chosen": 168.0796661376953, |
|
"logits/rejected": 173.2673797607422, |
|
"logps/chosen": -0.5869969129562378, |
|
"logps/rejected": -1.2978041172027588, |
|
"loss": 30.7508, |
|
"nll_loss": 0.8432788848876953, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2934984564781189, |
|
"rewards/margins": 0.3554036617279053, |
|
"rewards/rejected": -0.6489020586013794, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 2.417777777777778, |
|
"grad_norm": 41.5, |
|
"learning_rate": 5.343475104027743e-06, |
|
"log_odds_chosen": 1.4420109987258911, |
|
"log_odds_ratio": -0.32847458124160767, |
|
"logits/chosen": 165.7822265625, |
|
"logits/rejected": 168.45877075195312, |
|
"logps/chosen": -0.5281413793563843, |
|
"logps/rejected": -1.296473741531372, |
|
"loss": 30.1395, |
|
"nll_loss": 0.8313242793083191, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.26407068967819214, |
|
"rewards/margins": 0.3841661512851715, |
|
"rewards/rejected": -0.648236870765686, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 2.4651851851851854, |
|
"grad_norm": 52.0, |
|
"learning_rate": 4.516778136213037e-06, |
|
"log_odds_chosen": 1.3616795539855957, |
|
"log_odds_ratio": -0.3403770327568054, |
|
"logits/chosen": 166.247802734375, |
|
"logits/rejected": 171.7600555419922, |
|
"logps/chosen": -0.5720769166946411, |
|
"logps/rejected": -1.2837584018707275, |
|
"loss": 30.4011, |
|
"nll_loss": 0.8514798283576965, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.28603845834732056, |
|
"rewards/margins": 0.3558407723903656, |
|
"rewards/rejected": -0.6418792009353638, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 2.5125925925925925, |
|
"grad_norm": 44.25, |
|
"learning_rate": 3.7531701693965554e-06, |
|
"log_odds_chosen": 1.1984084844589233, |
|
"log_odds_ratio": -0.39315086603164673, |
|
"logits/chosen": 167.62319946289062, |
|
"logits/rejected": 171.07821655273438, |
|
"logps/chosen": -0.6130216717720032, |
|
"logps/rejected": -1.2671737670898438, |
|
"loss": 30.4602, |
|
"nll_loss": 0.8455079197883606, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.3065108358860016, |
|
"rewards/margins": 0.32707610726356506, |
|
"rewards/rejected": -0.6335868835449219, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"grad_norm": 42.75, |
|
"learning_rate": 3.055003141378948e-06, |
|
"log_odds_chosen": 1.36872136592865, |
|
"log_odds_ratio": -0.3242368698120117, |
|
"logits/chosen": 169.27310180664062, |
|
"logits/rejected": 174.55384826660156, |
|
"logps/chosen": -0.6026479005813599, |
|
"logps/rejected": -1.332188367843628, |
|
"loss": 31.0676, |
|
"nll_loss": 0.8819044828414917, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.30132395029067993, |
|
"rewards/margins": 0.3647702634334564, |
|
"rewards/rejected": -0.666094183921814, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 2.6074074074074076, |
|
"grad_norm": 45.75, |
|
"learning_rate": 2.424427429704365e-06, |
|
"log_odds_chosen": 1.3102140426635742, |
|
"log_odds_ratio": -0.3487251400947571, |
|
"logits/chosen": 168.80856323242188, |
|
"logits/rejected": 168.40838623046875, |
|
"logps/chosen": -0.6217229962348938, |
|
"logps/rejected": -1.3165475130081177, |
|
"loss": 31.6148, |
|
"nll_loss": 0.8825966119766235, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.3108614981174469, |
|
"rewards/margins": 0.3474121689796448, |
|
"rewards/rejected": -0.6582737565040588, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 2.6548148148148147, |
|
"grad_norm": 47.0, |
|
"learning_rate": 1.8633852284264508e-06, |
|
"log_odds_chosen": 1.2873857021331787, |
|
"log_odds_ratio": -0.3503342866897583, |
|
"logits/chosen": 168.55545043945312, |
|
"logits/rejected": 170.8942413330078, |
|
"logps/chosen": -0.5770654678344727, |
|
"logps/rejected": -1.2647120952606201, |
|
"loss": 29.521, |
|
"nll_loss": 0.8119584321975708, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.28853273391723633, |
|
"rewards/margins": 0.3438234031200409, |
|
"rewards/rejected": -0.6323560476303101, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 2.7022222222222223, |
|
"grad_norm": 50.25, |
|
"learning_rate": 1.3736045660864034e-06, |
|
"log_odds_chosen": 1.2470929622650146, |
|
"log_odds_ratio": -0.35600870847702026, |
|
"logits/chosen": 171.45352172851562, |
|
"logits/rejected": 169.7559051513672, |
|
"logps/chosen": -0.5817210674285889, |
|
"logps/rejected": -1.2366844415664673, |
|
"loss": 31.0448, |
|
"nll_loss": 0.8478399515151978, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.29086053371429443, |
|
"rewards/margins": 0.3274817168712616, |
|
"rewards/rejected": -0.6183422207832336, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 2.74962962962963, |
|
"grad_norm": 44.75, |
|
"learning_rate": 9.565939833279192e-07, |
|
"log_odds_chosen": 1.2974636554718018, |
|
"log_odds_ratio": -0.34885287284851074, |
|
"logits/chosen": 169.34690856933594, |
|
"logits/rejected": 175.30169677734375, |
|
"logps/chosen": -0.6116211414337158, |
|
"logps/rejected": -1.27151620388031, |
|
"loss": 31.3539, |
|
"nll_loss": 0.8839661478996277, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3058105707168579, |
|
"rewards/margins": 0.3299475312232971, |
|
"rewards/rejected": -0.635758101940155, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 2.797037037037037, |
|
"grad_norm": 73.5, |
|
"learning_rate": 6.136378865420872e-07, |
|
"log_odds_chosen": 1.340787649154663, |
|
"log_odds_ratio": -0.37529805302619934, |
|
"logits/chosen": 169.1226043701172, |
|
"logits/rejected": 172.68368530273438, |
|
"logps/chosen": -0.6207782030105591, |
|
"logps/rejected": -1.3479269742965698, |
|
"loss": 31.3718, |
|
"nll_loss": 0.8938194513320923, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.31038910150527954, |
|
"rewards/margins": 0.36357441544532776, |
|
"rewards/rejected": -0.6739634871482849, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 2.8444444444444446, |
|
"grad_norm": 47.5, |
|
"learning_rate": 3.45792591853214e-07, |
|
"log_odds_chosen": 1.1846152544021606, |
|
"log_odds_ratio": -0.38126683235168457, |
|
"logits/chosen": 177.59153747558594, |
|
"logits/rejected": 175.08778381347656, |
|
"logps/chosen": -0.6378912925720215, |
|
"logps/rejected": -1.279050350189209, |
|
"loss": 31.709, |
|
"nll_loss": 0.9202780723571777, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.31894564628601074, |
|
"rewards/margins": 0.32057955861091614, |
|
"rewards/rejected": -0.6395251750946045, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 2.891851851851852, |
|
"grad_norm": 45.0, |
|
"learning_rate": 1.538830716302092e-07, |
|
"log_odds_chosen": 1.2477754354476929, |
|
"log_odds_ratio": -0.35705170035362244, |
|
"logits/chosen": 169.60562133789062, |
|
"logits/rejected": 170.3755645751953, |
|
"logps/chosen": -0.6306732892990112, |
|
"logps/rejected": -1.272749900817871, |
|
"loss": 29.668, |
|
"nll_loss": 0.8301697969436646, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.3153366446495056, |
|
"rewards/margins": 0.3210383355617523, |
|
"rewards/rejected": -0.6363749504089355, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 2.9392592592592592, |
|
"grad_norm": 57.75, |
|
"learning_rate": 3.8500413544415025e-08, |
|
"log_odds_chosen": 1.2991540431976318, |
|
"log_odds_ratio": -0.34246373176574707, |
|
"logits/chosen": 171.08616638183594, |
|
"logits/rejected": 171.2054443359375, |
|
"logps/chosen": -0.6051537990570068, |
|
"logps/rejected": -1.2699061632156372, |
|
"loss": 30.8065, |
|
"nll_loss": 0.8361706733703613, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.3025768995285034, |
|
"rewards/margins": 0.3323762118816376, |
|
"rewards/rejected": -0.6349530816078186, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"grad_norm": 55.0, |
|
"learning_rate": 0.0, |
|
"log_odds_chosen": 1.4246046543121338, |
|
"log_odds_ratio": -0.31691890954971313, |
|
"logits/chosen": 167.8661346435547, |
|
"logits/rejected": 168.34278869628906, |
|
"logps/chosen": -0.5480004549026489, |
|
"logps/rejected": -1.2894717454910278, |
|
"loss": 30.1036, |
|
"nll_loss": 0.8535217046737671, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.27400022745132446, |
|
"rewards/margins": 0.37073561549186707, |
|
"rewards/rejected": -0.6447358727455139, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 2.986666666666667, |
|
"step": 315, |
|
"total_flos": 0.0, |
|
"train_loss": 60.72040763733879, |
|
"train_runtime": 6827.5776, |
|
"train_samples_per_second": 2.966, |
|
"train_steps_per_second": 0.046 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 315, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|