|
{ |
|
"best_metric": 0.1529119908809662, |
|
"best_model_checkpoint": "saves/Llama-3.1-8B-Instruct/lora/saa-800/checkpoint-450", |
|
"epoch": 10.0, |
|
"eval_steps": 50, |
|
"global_step": 450, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.2222222222222222, |
|
"grad_norm": 6.713047504425049, |
|
"learning_rate": 1.111111111111111e-06, |
|
"logits/chosen": -0.4419662356376648, |
|
"logits/rejected": -0.5169282555580139, |
|
"logps/chosen": -1.7502187490463257, |
|
"logps/rejected": -2.110954523086548, |
|
"loss": 1.8058, |
|
"odds_ratio_loss": 15.742452621459961, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.17502185702323914, |
|
"rewards/margins": 0.03607357665896416, |
|
"rewards/rejected": -0.21109545230865479, |
|
"sft_loss": 0.23150739073753357, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.4444444444444444, |
|
"grad_norm": 7.350757122039795, |
|
"learning_rate": 2.222222222222222e-06, |
|
"logits/chosen": -0.390705406665802, |
|
"logits/rejected": -0.4706195294857025, |
|
"logps/chosen": -1.6888424158096313, |
|
"logps/rejected": -2.1693763732910156, |
|
"loss": 1.7416, |
|
"odds_ratio_loss": 15.1643705368042, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.16888423264026642, |
|
"rewards/margins": 0.048053398728370667, |
|
"rewards/rejected": -0.21693763136863708, |
|
"sft_loss": 0.22513218224048615, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 8.413761138916016, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"logits/chosen": -0.42878788709640503, |
|
"logits/rejected": -0.5077282190322876, |
|
"logps/chosen": -1.698947548866272, |
|
"logps/rejected": -2.1924192905426025, |
|
"loss": 1.7505, |
|
"odds_ratio_loss": 15.268714904785156, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.1698947548866272, |
|
"rewards/margins": 0.04934718459844589, |
|
"rewards/rejected": -0.2192419320344925, |
|
"sft_loss": 0.22367195785045624, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8888888888888888, |
|
"grad_norm": 6.453053951263428, |
|
"learning_rate": 4.444444444444444e-06, |
|
"logits/chosen": -0.40729817748069763, |
|
"logits/rejected": -0.480067640542984, |
|
"logps/chosen": -1.677170753479004, |
|
"logps/rejected": -1.9914417266845703, |
|
"loss": 1.736, |
|
"odds_ratio_loss": 15.256231307983398, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -0.1677170693874359, |
|
"rewards/margins": 0.03142711520195007, |
|
"rewards/rejected": -0.1991441696882248, |
|
"sft_loss": 0.21038159728050232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"grad_norm": 5.584766864776611, |
|
"learning_rate": 4.998119881260576e-06, |
|
"logits/chosen": -0.4123212695121765, |
|
"logits/rejected": -0.49063047766685486, |
|
"logps/chosen": -1.4437320232391357, |
|
"logps/rejected": -1.951922059059143, |
|
"loss": 1.4945, |
|
"odds_ratio_loss": 13.130969047546387, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.14437320828437805, |
|
"rewards/margins": 0.050819020718336105, |
|
"rewards/rejected": -0.19519223272800446, |
|
"sft_loss": 0.18137237429618835, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1111111111111112, |
|
"eval_logits/chosen": -0.3793938159942627, |
|
"eval_logits/rejected": -0.45482414960861206, |
|
"eval_logps/chosen": -1.228076696395874, |
|
"eval_logps/rejected": -1.7094627618789673, |
|
"eval_loss": 1.276250958442688, |
|
"eval_odds_ratio_loss": 11.260908126831055, |
|
"eval_rewards/accuracies": 0.7875000238418579, |
|
"eval_rewards/chosen": -0.12280768156051636, |
|
"eval_rewards/margins": 0.04813859239220619, |
|
"eval_rewards/rejected": -0.17094627022743225, |
|
"eval_runtime": 3.3465, |
|
"eval_samples_per_second": 23.905, |
|
"eval_sft_loss": 0.15016020834445953, |
|
"eval_steps_per_second": 11.953, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 3.5215771198272705, |
|
"learning_rate": 4.983095894354858e-06, |
|
"logits/chosen": -0.4166947901248932, |
|
"logits/rejected": -0.49852150678634644, |
|
"logps/chosen": -1.0756200551986694, |
|
"logps/rejected": -1.5570634603500366, |
|
"loss": 1.1242, |
|
"odds_ratio_loss": 9.97706127166748, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.10756200551986694, |
|
"rewards/margins": 0.04814432933926582, |
|
"rewards/rejected": -0.15570633113384247, |
|
"sft_loss": 0.1265355795621872, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5555555555555556, |
|
"grad_norm": 3.005864381790161, |
|
"learning_rate": 4.953138276568462e-06, |
|
"logits/chosen": -0.45257633924484253, |
|
"logits/rejected": -0.5196300148963928, |
|
"logps/chosen": -0.6806862950325012, |
|
"logps/rejected": -1.1254621744155884, |
|
"loss": 0.7274, |
|
"odds_ratio_loss": 6.491485595703125, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.06806863099336624, |
|
"rewards/margins": 0.044477589428424835, |
|
"rewards/rejected": -0.11254620552062988, |
|
"sft_loss": 0.07826091349124908, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7777777777777777, |
|
"grad_norm": 3.241006851196289, |
|
"learning_rate": 4.908427196539701e-06, |
|
"logits/chosen": -0.4250833988189697, |
|
"logits/rejected": -0.49341732263565063, |
|
"logps/chosen": -0.586665153503418, |
|
"logps/rejected": -0.9428589940071106, |
|
"loss": 0.6374, |
|
"odds_ratio_loss": 5.749706268310547, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -0.05866651609539986, |
|
"rewards/margins": 0.03561938554048538, |
|
"rewards/rejected": -0.09428589791059494, |
|
"sft_loss": 0.06239296868443489, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 3.3662965297698975, |
|
"learning_rate": 4.849231551964771e-06, |
|
"logits/chosen": -0.39320772886276245, |
|
"logits/rejected": -0.48544201254844666, |
|
"logps/chosen": -0.3357451558113098, |
|
"logps/rejected": -0.9199529886245728, |
|
"loss": 0.3774, |
|
"odds_ratio_loss": 3.414803981781006, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.03357451409101486, |
|
"rewards/margins": 0.05842079594731331, |
|
"rewards/rejected": -0.09199531376361847, |
|
"sft_loss": 0.03593815118074417, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"grad_norm": 1.9536067247390747, |
|
"learning_rate": 4.775907352415367e-06, |
|
"logits/chosen": -0.3746485710144043, |
|
"logits/rejected": -0.46515822410583496, |
|
"logps/chosen": -0.22609436511993408, |
|
"logps/rejected": -0.7698042392730713, |
|
"loss": 0.2666, |
|
"odds_ratio_loss": 2.4158308506011963, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.022609438747167587, |
|
"rewards/margins": 0.05437099188566208, |
|
"rewards/rejected": -0.07698042690753937, |
|
"sft_loss": 0.02498885616660118, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2222222222222223, |
|
"eval_logits/chosen": -0.36658304929733276, |
|
"eval_logits/rejected": -0.4319043755531311, |
|
"eval_logps/chosen": -0.2119404524564743, |
|
"eval_logps/rejected": -0.7287805676460266, |
|
"eval_loss": 0.24907442927360535, |
|
"eval_odds_ratio_loss": 2.2483649253845215, |
|
"eval_rewards/accuracies": 0.824999988079071, |
|
"eval_rewards/chosen": -0.02119404636323452, |
|
"eval_rewards/margins": 0.05168401449918747, |
|
"eval_rewards/rejected": -0.07287804782390594, |
|
"eval_runtime": 3.3336, |
|
"eval_samples_per_second": 23.998, |
|
"eval_sft_loss": 0.02423796057701111, |
|
"eval_steps_per_second": 11.999, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.4444444444444446, |
|
"grad_norm": 1.7341715097427368, |
|
"learning_rate": 4.688895578255228e-06, |
|
"logits/chosen": -0.38399261236190796, |
|
"logits/rejected": -0.45104750990867615, |
|
"logps/chosen": -0.1595337688922882, |
|
"logps/rejected": -0.6476808190345764, |
|
"loss": 0.1984, |
|
"odds_ratio_loss": 1.7913240194320679, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.01595337688922882, |
|
"rewards/margins": 0.048814695328474045, |
|
"rewards/rejected": -0.06476806849241257, |
|
"sft_loss": 0.0192393995821476, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.6666666666666665, |
|
"grad_norm": 1.6577801704406738, |
|
"learning_rate": 4.588719528532342e-06, |
|
"logits/chosen": -0.42996135354042053, |
|
"logits/rejected": -0.5020943880081177, |
|
"logps/chosen": -0.11305193603038788, |
|
"logps/rejected": -0.6292703747749329, |
|
"loss": 0.1528, |
|
"odds_ratio_loss": 1.378528356552124, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.011305193416774273, |
|
"rewards/margins": 0.05162184312939644, |
|
"rewards/rejected": -0.06292703002691269, |
|
"sft_loss": 0.014913685619831085, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.888888888888889, |
|
"grad_norm": 0.804415762424469, |
|
"learning_rate": 4.475981673796899e-06, |
|
"logits/chosen": -0.4010156989097595, |
|
"logits/rejected": -0.4532565474510193, |
|
"logps/chosen": -0.11344721168279648, |
|
"logps/rejected": -0.5929852724075317, |
|
"loss": 0.1557, |
|
"odds_ratio_loss": 1.430558204650879, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.011344721540808678, |
|
"rewards/margins": 0.047953806817531586, |
|
"rewards/rejected": -0.059298526495695114, |
|
"sft_loss": 0.012685336172580719, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.111111111111111, |
|
"grad_norm": 1.188767910003662, |
|
"learning_rate": 4.351360032772512e-06, |
|
"logits/chosen": -0.37222054600715637, |
|
"logits/rejected": -0.4317198395729065, |
|
"logps/chosen": -0.10829365253448486, |
|
"logps/rejected": -0.6532029509544373, |
|
"loss": 0.1518, |
|
"odds_ratio_loss": 1.385451078414917, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.010829364880919456, |
|
"rewards/margins": 0.05449093505740166, |
|
"rewards/rejected": -0.06532029807567596, |
|
"sft_loss": 0.013293206691741943, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"grad_norm": 1.1802352666854858, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -0.3585512936115265, |
|
"logits/rejected": -0.4357479512691498, |
|
"logps/chosen": -0.07403306663036346, |
|
"logps/rejected": -0.7158663868904114, |
|
"loss": 0.1014, |
|
"odds_ratio_loss": 0.9041417837142944, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.007403307594358921, |
|
"rewards/margins": 0.06418333202600479, |
|
"rewards/rejected": -0.07158664613962173, |
|
"sft_loss": 0.010936584323644638, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.3333333333333335, |
|
"eval_logits/chosen": -0.3283754289150238, |
|
"eval_logits/rejected": -0.3820287585258484, |
|
"eval_logps/chosen": -0.1292387694120407, |
|
"eval_logps/rejected": -0.6057685017585754, |
|
"eval_loss": 0.16315264999866486, |
|
"eval_odds_ratio_loss": 1.4634513854980469, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.012923876754939556, |
|
"eval_rewards/margins": 0.047652970999479294, |
|
"eval_rewards/rejected": -0.060576848685741425, |
|
"eval_runtime": 3.3355, |
|
"eval_samples_per_second": 23.984, |
|
"eval_sft_loss": 0.016807515174150467, |
|
"eval_steps_per_second": 11.992, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.5555555555555554, |
|
"grad_norm": 2.5364763736724854, |
|
"learning_rate": 4.069530311680247e-06, |
|
"logits/chosen": -0.37899985909461975, |
|
"logits/rejected": -0.4429406225681305, |
|
"logps/chosen": -0.09646956622600555, |
|
"logps/rejected": -0.5016245245933533, |
|
"loss": 0.1441, |
|
"odds_ratio_loss": 1.3272716999053955, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -0.00964695680886507, |
|
"rewards/margins": 0.04051550105214119, |
|
"rewards/rejected": -0.050162456929683685, |
|
"sft_loss": 0.011343193240463734, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.7777777777777777, |
|
"grad_norm": 1.1225122213363647, |
|
"learning_rate": 3.914017188716347e-06, |
|
"logits/chosen": -0.32853808999061584, |
|
"logits/rejected": -0.3947625160217285, |
|
"logps/chosen": -0.0880068689584732, |
|
"logps/rejected": -0.5245143175125122, |
|
"loss": 0.1251, |
|
"odds_ratio_loss": 1.1624243259429932, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.00880068726837635, |
|
"rewards/margins": 0.04365074634552002, |
|
"rewards/rejected": -0.05245143175125122, |
|
"sft_loss": 0.00884676817804575, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 2.3566975593566895, |
|
"learning_rate": 3.7500000000000005e-06, |
|
"logits/chosen": -0.394605427980423, |
|
"logits/rejected": -0.4463408589363098, |
|
"logps/chosen": -0.09484803676605225, |
|
"logps/rejected": -0.5903218984603882, |
|
"loss": 0.1306, |
|
"odds_ratio_loss": 1.1676770448684692, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -0.00948480237275362, |
|
"rewards/margins": 0.049547381699085236, |
|
"rewards/rejected": -0.05903219059109688, |
|
"sft_loss": 0.013791071251034737, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 4.222222222222222, |
|
"grad_norm": 1.111855387687683, |
|
"learning_rate": 3.578465164203134e-06, |
|
"logits/chosen": -0.3551129698753357, |
|
"logits/rejected": -0.43426722288131714, |
|
"logps/chosen": -0.06811969727277756, |
|
"logps/rejected": -0.6758677959442139, |
|
"loss": 0.0949, |
|
"odds_ratio_loss": 0.8656018376350403, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.006811970379203558, |
|
"rewards/margins": 0.06077481061220169, |
|
"rewards/rejected": -0.06758677959442139, |
|
"sft_loss": 0.008338207378983498, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"grad_norm": 1.3708534240722656, |
|
"learning_rate": 3.400444312011776e-06, |
|
"logits/chosen": -0.345875084400177, |
|
"logits/rejected": -0.3952234089374542, |
|
"logps/chosen": -0.10109534114599228, |
|
"logps/rejected": -0.5074071288108826, |
|
"loss": 0.1429, |
|
"odds_ratio_loss": 1.325073003768921, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.010109534487128258, |
|
"rewards/margins": 0.04063117876648903, |
|
"rewards/rejected": -0.05074070766568184, |
|
"sft_loss": 0.010358814150094986, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.444444444444445, |
|
"eval_logits/chosen": -0.32978400588035583, |
|
"eval_logits/rejected": -0.38182884454727173, |
|
"eval_logps/chosen": -0.121058389544487, |
|
"eval_logps/rejected": -0.5841361284255981, |
|
"eval_loss": 0.1533508002758026, |
|
"eval_odds_ratio_loss": 1.3751581907272339, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.01210583932697773, |
|
"eval_rewards/margins": 0.04630777984857559, |
|
"eval_rewards/rejected": -0.058413613587617874, |
|
"eval_runtime": 3.3378, |
|
"eval_samples_per_second": 23.968, |
|
"eval_sft_loss": 0.015834979712963104, |
|
"eval_steps_per_second": 11.984, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 4.666666666666667, |
|
"grad_norm": 1.4620966911315918, |
|
"learning_rate": 3.217008081777726e-06, |
|
"logits/chosen": -0.3843366503715515, |
|
"logits/rejected": -0.4375368058681488, |
|
"logps/chosen": -0.08334760367870331, |
|
"logps/rejected": -0.5660797357559204, |
|
"loss": 0.1171, |
|
"odds_ratio_loss": 1.0661273002624512, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.00833476148545742, |
|
"rewards/margins": 0.04827320575714111, |
|
"rewards/rejected": -0.05660796910524368, |
|
"sft_loss": 0.010473220609128475, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 4.888888888888889, |
|
"grad_norm": 1.2518880367279053, |
|
"learning_rate": 3.0292596805735275e-06, |
|
"logits/chosen": -0.3598805367946625, |
|
"logits/rejected": -0.4199501872062683, |
|
"logps/chosen": -0.07187429815530777, |
|
"logps/rejected": -0.5510466694831848, |
|
"loss": 0.1029, |
|
"odds_ratio_loss": 0.9251865148544312, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -0.007187430746853352, |
|
"rewards/margins": 0.047917239367961884, |
|
"rewards/rejected": -0.05510466545820236, |
|
"sft_loss": 0.010374334640800953, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 5.111111111111111, |
|
"grad_norm": 1.3117250204086304, |
|
"learning_rate": 2.8383282493753282e-06, |
|
"logits/chosen": -0.3635903596878052, |
|
"logits/rejected": -0.43442487716674805, |
|
"logps/chosen": -0.06123008579015732, |
|
"logps/rejected": -0.7030965089797974, |
|
"loss": 0.0878, |
|
"odds_ratio_loss": 0.7977678179740906, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.006123009137809277, |
|
"rewards/margins": 0.06418663263320923, |
|
"rewards/rejected": -0.07030965387821198, |
|
"sft_loss": 0.007975312881171703, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 5.333333333333333, |
|
"grad_norm": 2.640739679336548, |
|
"learning_rate": 2.6453620722761897e-06, |
|
"logits/chosen": -0.350188285112381, |
|
"logits/rejected": -0.4023573398590088, |
|
"logps/chosen": -0.08658941090106964, |
|
"logps/rejected": -0.6677496433258057, |
|
"loss": 0.1183, |
|
"odds_ratio_loss": 1.068228006362915, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.00865894090384245, |
|
"rewards/margins": 0.05811602994799614, |
|
"rewards/rejected": -0.06677497178316116, |
|
"sft_loss": 0.011527000926434994, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"grad_norm": 1.3426445722579956, |
|
"learning_rate": 2.4515216705704396e-06, |
|
"logits/chosen": -0.3609291911125183, |
|
"logits/rejected": -0.4190797209739685, |
|
"logps/chosen": -0.0702720507979393, |
|
"logps/rejected": -0.596028745174408, |
|
"loss": 0.1007, |
|
"odds_ratio_loss": 0.925524115562439, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.00702720507979393, |
|
"rewards/margins": 0.052575670182704926, |
|
"rewards/rejected": -0.059602878987789154, |
|
"sft_loss": 0.008168576285243034, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.555555555555555, |
|
"eval_logits/chosen": -0.32348352670669556, |
|
"eval_logits/rejected": -0.37402915954589844, |
|
"eval_logps/chosen": -0.12062982469797134, |
|
"eval_logps/rejected": -0.6407424807548523, |
|
"eval_loss": 0.15295907855033875, |
|
"eval_odds_ratio_loss": 1.3704458475112915, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.012062981724739075, |
|
"eval_rewards/margins": 0.05201127007603645, |
|
"eval_rewards/rejected": -0.06407425552606583, |
|
"eval_runtime": 3.3317, |
|
"eval_samples_per_second": 24.012, |
|
"eval_sft_loss": 0.01591448113322258, |
|
"eval_steps_per_second": 12.006, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 5.777777777777778, |
|
"grad_norm": 1.4032063484191895, |
|
"learning_rate": 2.2579728232420524e-06, |
|
"logits/chosen": -0.40077710151672363, |
|
"logits/rejected": -0.4527947008609772, |
|
"logps/chosen": -0.0916273444890976, |
|
"logps/rejected": -0.5904611349105835, |
|
"loss": 0.127, |
|
"odds_ratio_loss": 1.1558631658554077, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.009162734262645245, |
|
"rewards/margins": 0.049883387982845306, |
|
"rewards/rejected": -0.05904611945152283, |
|
"sft_loss": 0.011430850252509117, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"grad_norm": 1.6447800397872925, |
|
"learning_rate": 2.0658795558326745e-06, |
|
"logits/chosen": -0.36904090642929077, |
|
"logits/rejected": -0.43377822637557983, |
|
"logps/chosen": -0.05662701651453972, |
|
"logps/rejected": -0.6641786098480225, |
|
"loss": 0.0778, |
|
"odds_ratio_loss": 0.7118644714355469, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -0.005662702023983002, |
|
"rewards/margins": 0.060755155980587006, |
|
"rewards/rejected": -0.06641785800457001, |
|
"sft_loss": 0.006575153209269047, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 6.222222222222222, |
|
"grad_norm": 2.223752021789551, |
|
"learning_rate": 1.876397139855047e-06, |
|
"logits/chosen": -0.3556548058986664, |
|
"logits/rejected": -0.4135330319404602, |
|
"logps/chosen": -0.0746704488992691, |
|
"logps/rejected": -0.6784394979476929, |
|
"loss": 0.1018, |
|
"odds_ratio_loss": 0.9240021705627441, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.007467044983059168, |
|
"rewards/margins": 0.06037690117955208, |
|
"rewards/rejected": -0.0678439512848854, |
|
"sft_loss": 0.009404648095369339, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 6.444444444444445, |
|
"grad_norm": 1.75334894657135, |
|
"learning_rate": 1.6906651448541977e-06, |
|
"logits/chosen": -0.3690106272697449, |
|
"logits/rejected": -0.4269256591796875, |
|
"logps/chosen": -0.05620593950152397, |
|
"logps/rejected": -0.6589001417160034, |
|
"loss": 0.0805, |
|
"odds_ratio_loss": 0.7411076426506042, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.005620594136416912, |
|
"rewards/margins": 0.060269422829151154, |
|
"rewards/rejected": -0.06589001417160034, |
|
"sft_loss": 0.006437716074287891, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"grad_norm": 4.5506911277771, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -0.3519323468208313, |
|
"logits/rejected": -0.41471752524375916, |
|
"logps/chosen": -0.10020919144153595, |
|
"logps/rejected": -0.6135331392288208, |
|
"loss": 0.1385, |
|
"odds_ratio_loss": 1.263484001159668, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.010020919144153595, |
|
"rewards/margins": 0.051332395523786545, |
|
"rewards/rejected": -0.06135330721735954, |
|
"sft_loss": 0.012116280384361744, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.666666666666667, |
|
"eval_logits/chosen": -0.32135990262031555, |
|
"eval_logits/rejected": -0.3725373148918152, |
|
"eval_logps/chosen": -0.12169794738292694, |
|
"eval_logps/rejected": -0.6881048083305359, |
|
"eval_loss": 0.15343782305717468, |
|
"eval_odds_ratio_loss": 1.3728852272033691, |
|
"eval_rewards/accuracies": 0.800000011920929, |
|
"eval_rewards/chosen": -0.012169795110821724, |
|
"eval_rewards/margins": 0.05664069205522537, |
|
"eval_rewards/rejected": -0.06881048530340195, |
|
"eval_runtime": 3.3342, |
|
"eval_samples_per_second": 23.994, |
|
"eval_sft_loss": 0.016149301081895828, |
|
"eval_steps_per_second": 11.997, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 6.888888888888889, |
|
"grad_norm": 1.4514155387878418, |
|
"learning_rate": 1.3348912007436538e-06, |
|
"logits/chosen": -0.33361396193504333, |
|
"logits/rejected": -0.3964681923389435, |
|
"logps/chosen": -0.05151065066456795, |
|
"logps/rejected": -0.6935869455337524, |
|
"loss": 0.075, |
|
"odds_ratio_loss": 0.6938644051551819, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.005151065066456795, |
|
"rewards/margins": 0.06420762091875076, |
|
"rewards/rejected": -0.069358691573143, |
|
"sft_loss": 0.005607123486697674, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 7.111111111111111, |
|
"grad_norm": 1.1120833158493042, |
|
"learning_rate": 1.1669889179957725e-06, |
|
"logits/chosen": -0.34897491335868835, |
|
"logits/rejected": -0.4280971884727478, |
|
"logps/chosen": -0.06502507627010345, |
|
"logps/rejected": -0.724215030670166, |
|
"loss": 0.0902, |
|
"odds_ratio_loss": 0.8162932395935059, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.006502507720142603, |
|
"rewards/margins": 0.06591899693012238, |
|
"rewards/rejected": -0.07242151349782944, |
|
"sft_loss": 0.008545474149286747, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 7.333333333333333, |
|
"grad_norm": 0.9275766611099243, |
|
"learning_rate": 1.0071035207430352e-06, |
|
"logits/chosen": -0.34552061557769775, |
|
"logits/rejected": -0.4186931550502777, |
|
"logps/chosen": -0.057514738291502, |
|
"logps/rejected": -0.7304258942604065, |
|
"loss": 0.0812, |
|
"odds_ratio_loss": 0.7468503713607788, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.005751474294811487, |
|
"rewards/margins": 0.06729111820459366, |
|
"rewards/rejected": -0.07304258644580841, |
|
"sft_loss": 0.006484520621597767, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 7.555555555555555, |
|
"grad_norm": 1.7526174783706665, |
|
"learning_rate": 8.561965785773413e-07, |
|
"logits/chosen": -0.35446950793266296, |
|
"logits/rejected": -0.41398343443870544, |
|
"logps/chosen": -0.04472974315285683, |
|
"logps/rejected": -0.6881515979766846, |
|
"loss": 0.0655, |
|
"odds_ratio_loss": 0.5992408394813538, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": -0.004472973756492138, |
|
"rewards/margins": 0.0643421858549118, |
|
"rewards/rejected": -0.06881515681743622, |
|
"sft_loss": 0.0055578285828232765, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"grad_norm": 1.4603219032287598, |
|
"learning_rate": 7.151756636052529e-07, |
|
"logits/chosen": -0.35244011878967285, |
|
"logits/rejected": -0.4057006239891052, |
|
"logps/chosen": -0.06629346311092377, |
|
"logps/rejected": -0.6709402203559875, |
|
"loss": 0.0918, |
|
"odds_ratio_loss": 0.841974139213562, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.0066293468698859215, |
|
"rewards/margins": 0.060464680194854736, |
|
"rewards/rejected": -0.06709402799606323, |
|
"sft_loss": 0.0076185790821909904, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 7.777777777777778, |
|
"eval_logits/chosen": -0.31914329528808594, |
|
"eval_logits/rejected": -0.3697816729545593, |
|
"eval_logps/chosen": -0.12174425274133682, |
|
"eval_logps/rejected": -0.6888757944107056, |
|
"eval_loss": 0.1536635458469391, |
|
"eval_odds_ratio_loss": 1.3741592168807983, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.012174425646662712, |
|
"eval_rewards/margins": 0.05671315640211105, |
|
"eval_rewards/rejected": -0.06888757646083832, |
|
"eval_runtime": 3.3381, |
|
"eval_samples_per_second": 23.966, |
|
"eval_sft_loss": 0.016247618943452835, |
|
"eval_steps_per_second": 11.983, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"grad_norm": 0.9194740056991577, |
|
"learning_rate": 5.848888922025553e-07, |
|
"logits/chosen": -0.3591064214706421, |
|
"logits/rejected": -0.4297390878200531, |
|
"logps/chosen": -0.08391134440898895, |
|
"logps/rejected": -0.7468418478965759, |
|
"loss": 0.118, |
|
"odds_ratio_loss": 1.0900821685791016, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.00839113537222147, |
|
"rewards/margins": 0.06629304587841034, |
|
"rewards/rejected": -0.07468418776988983, |
|
"sft_loss": 0.008979488164186478, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 8.222222222222221, |
|
"grad_norm": 1.1211438179016113, |
|
"learning_rate": 4.661198243425813e-07, |
|
"logits/chosen": -0.3469446301460266, |
|
"logits/rejected": -0.416546493768692, |
|
"logps/chosen": -0.06295167654752731, |
|
"logps/rejected": -0.6733905076980591, |
|
"loss": 0.0877, |
|
"odds_ratio_loss": 0.7965422868728638, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.006295167841017246, |
|
"rewards/margins": 0.0610438771545887, |
|
"rewards/rejected": -0.06733904778957367, |
|
"sft_loss": 0.008044925518333912, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 8.444444444444445, |
|
"grad_norm": 2.9023804664611816, |
|
"learning_rate": 3.595827511743341e-07, |
|
"logits/chosen": -0.36501026153564453, |
|
"logits/rejected": -0.4163384437561035, |
|
"logps/chosen": -0.0608031265437603, |
|
"logps/rejected": -0.7410011291503906, |
|
"loss": 0.0857, |
|
"odds_ratio_loss": 0.7958894371986389, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.006080311723053455, |
|
"rewards/margins": 0.068019799888134, |
|
"rewards/rejected": -0.07410011440515518, |
|
"sft_loss": 0.006128143519163132, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 8.666666666666666, |
|
"grad_norm": 1.2382421493530273, |
|
"learning_rate": 2.6591839919146963e-07, |
|
"logits/chosen": -0.3580993711948395, |
|
"logits/rejected": -0.4169589579105377, |
|
"logps/chosen": -0.07410109043121338, |
|
"logps/rejected": -0.7789124250411987, |
|
"loss": 0.1054, |
|
"odds_ratio_loss": 0.9693442583084106, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -0.0074101099744439125, |
|
"rewards/margins": 0.07048113644123077, |
|
"rewards/rejected": -0.07789124548435211, |
|
"sft_loss": 0.008436056785285473, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"grad_norm": 1.7495219707489014, |
|
"learning_rate": 1.8569007682777417e-07, |
|
"logits/chosen": -0.37370580434799194, |
|
"logits/rejected": -0.43429917097091675, |
|
"logps/chosen": -0.05244039371609688, |
|
"logps/rejected": -0.7023984789848328, |
|
"loss": 0.0752, |
|
"odds_ratio_loss": 0.6945605874061584, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -0.005244039464741945, |
|
"rewards/margins": 0.06499581038951874, |
|
"rewards/rejected": -0.0702398419380188, |
|
"sft_loss": 0.005755658261477947, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 8.88888888888889, |
|
"eval_logits/chosen": -0.3194546103477478, |
|
"eval_logits/rejected": -0.3705773651599884, |
|
"eval_logps/chosen": -0.1213468462228775, |
|
"eval_logps/rejected": -0.689638614654541, |
|
"eval_loss": 0.15339744091033936, |
|
"eval_odds_ratio_loss": 1.3723235130310059, |
|
"eval_rewards/accuracies": 0.800000011920929, |
|
"eval_rewards/chosen": -0.01213468424975872, |
|
"eval_rewards/margins": 0.056829191744327545, |
|
"eval_rewards/rejected": -0.06896387040615082, |
|
"eval_runtime": 3.3351, |
|
"eval_samples_per_second": 23.987, |
|
"eval_sft_loss": 0.016165101900696754, |
|
"eval_steps_per_second": 11.994, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 9.11111111111111, |
|
"grad_norm": 2.9914894104003906, |
|
"learning_rate": 1.1938028665396172e-07, |
|
"logits/chosen": -0.3595274090766907, |
|
"logits/rejected": -0.41625022888183594, |
|
"logps/chosen": -0.07939668744802475, |
|
"logps/rejected": -0.6182006597518921, |
|
"loss": 0.1064, |
|
"odds_ratio_loss": 0.9655207395553589, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -0.007939668372273445, |
|
"rewards/margins": 0.05388040468096733, |
|
"rewards/rejected": -0.06182006746530533, |
|
"sft_loss": 0.009880236349999905, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 9.333333333333334, |
|
"grad_norm": 1.7583844661712646, |
|
"learning_rate": 6.738782355044048e-08, |
|
"logits/chosen": -0.35592731833457947, |
|
"logits/rejected": -0.41578736901283264, |
|
"logps/chosen": -0.06436184048652649, |
|
"logps/rejected": -0.6822584867477417, |
|
"loss": 0.0919, |
|
"odds_ratio_loss": 0.8448025584220886, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.006436183117330074, |
|
"rewards/margins": 0.06178967282176018, |
|
"rewards/rejected": -0.06822585314512253, |
|
"sft_loss": 0.007466461509466171, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 9.555555555555555, |
|
"grad_norm": 0.7222520112991333, |
|
"learning_rate": 3.0025376307977474e-08, |
|
"logits/chosen": -0.3527710437774658, |
|
"logits/rejected": -0.41858869791030884, |
|
"logps/chosen": -0.042861782014369965, |
|
"logps/rejected": -0.7103925943374634, |
|
"loss": 0.0601, |
|
"odds_ratio_loss": 0.5496730804443359, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -0.004286178387701511, |
|
"rewards/margins": 0.0667530745267868, |
|
"rewards/rejected": -0.07103925198316574, |
|
"sft_loss": 0.005130757577717304, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 9.777777777777779, |
|
"grad_norm": 1.2489662170410156, |
|
"learning_rate": 7.517647080519941e-09, |
|
"logits/chosen": -0.34115296602249146, |
|
"logits/rejected": -0.39803385734558105, |
|
"logps/chosen": -0.05217872932553291, |
|
"logps/rejected": -0.8210271000862122, |
|
"loss": 0.0779, |
|
"odds_ratio_loss": 0.7251914739608765, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -0.0052178725600242615, |
|
"rewards/margins": 0.07688483595848083, |
|
"rewards/rejected": -0.0821027159690857, |
|
"sft_loss": 0.0053517939522862434, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"grad_norm": 1.9128329753875732, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.3783785402774811, |
|
"logits/rejected": -0.4389713406562805, |
|
"logps/chosen": -0.07910243421792984, |
|
"logps/rejected": -0.6643815040588379, |
|
"loss": 0.1052, |
|
"odds_ratio_loss": 0.9564045667648315, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.007910243235528469, |
|
"rewards/margins": 0.05852789804339409, |
|
"rewards/rejected": -0.06643815338611603, |
|
"sft_loss": 0.009510872885584831, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_logits/chosen": -0.3184443712234497, |
|
"eval_logits/rejected": -0.36903008818626404, |
|
"eval_logps/chosen": -0.1208958849310875, |
|
"eval_logps/rejected": -0.6922038793563843, |
|
"eval_loss": 0.1529119908809662, |
|
"eval_odds_ratio_loss": 1.3675727844238281, |
|
"eval_rewards/accuracies": 0.8125, |
|
"eval_rewards/chosen": -0.012089588679373264, |
|
"eval_rewards/margins": 0.05713079497218132, |
|
"eval_rewards/rejected": -0.06922038644552231, |
|
"eval_runtime": 3.3378, |
|
"eval_samples_per_second": 23.968, |
|
"eval_sft_loss": 0.016154715791344643, |
|
"eval_steps_per_second": 11.984, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"step": 450, |
|
"total_flos": 8.074648418018918e+16, |
|
"train_loss": 0.3428536836306254, |
|
"train_runtime": 994.0751, |
|
"train_samples_per_second": 7.243, |
|
"train_steps_per_second": 0.453 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 450, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.074648418018918e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|