|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 684, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 2.024759928283202, |
|
"learning_rate": 7.246376811594204e-08, |
|
"logits/chosen": -2.961127519607544, |
|
"logits/rejected": -2.9461119174957275, |
|
"logps/chosen": -261.90582275390625, |
|
"logps/rejected": -270.03265380859375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/margins_max": 0.0, |
|
"rewards/margins_min": 0.0, |
|
"rewards/margins_std": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 1.8980625865574121, |
|
"learning_rate": 7.246376811594204e-07, |
|
"logits/chosen": -2.873429775238037, |
|
"logits/rejected": -2.8538858890533447, |
|
"logps/chosen": -217.4855194091797, |
|
"logps/rejected": -222.1319580078125, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": 0.000618334801401943, |
|
"rewards/margins": 0.0005746870301663876, |
|
"rewards/margins_max": 0.0019774516113102436, |
|
"rewards/margins_min": -0.0008280774345621467, |
|
"rewards/margins_std": 0.001983808586373925, |
|
"rewards/rejected": 4.364784399513155e-05, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 2.110946747867592, |
|
"learning_rate": 1.4492753623188408e-06, |
|
"logits/chosen": -2.856001377105713, |
|
"logits/rejected": -2.873141050338745, |
|
"logps/chosen": -228.9456787109375, |
|
"logps/rejected": -176.6509246826172, |
|
"loss": 0.6924, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.000968199223279953, |
|
"rewards/margins": 0.0013832334661856294, |
|
"rewards/margins_max": 0.002613522345200181, |
|
"rewards/margins_min": 0.0001529444707557559, |
|
"rewards/margins_std": 0.0017398912459611893, |
|
"rewards/rejected": -0.00041503418469801545, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 2.0573110330048388, |
|
"learning_rate": 2.173913043478261e-06, |
|
"logits/chosen": -2.929853677749634, |
|
"logits/rejected": -2.875521183013916, |
|
"logps/chosen": -260.0462951660156, |
|
"logps/rejected": -239.0731658935547, |
|
"loss": 0.6904, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.004958462901413441, |
|
"rewards/margins": 0.005339525174349546, |
|
"rewards/margins_max": 0.011697771959006786, |
|
"rewards/margins_min": -0.001018722541630268, |
|
"rewards/margins_std": 0.00899192038923502, |
|
"rewards/rejected": -0.00038106151623651385, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 1.9291759229840313, |
|
"learning_rate": 2.8985507246376816e-06, |
|
"logits/chosen": -2.8282618522644043, |
|
"logits/rejected": -2.7805304527282715, |
|
"logps/chosen": -326.31719970703125, |
|
"logps/rejected": -365.41064453125, |
|
"loss": 0.6861, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.01588786579668522, |
|
"rewards/margins": 0.01591557264328003, |
|
"rewards/margins_max": 0.029321899637579918, |
|
"rewards/margins_min": 0.0025092470459640026, |
|
"rewards/margins_std": 0.0189594067633152, |
|
"rewards/rejected": -2.7706240871339105e-05, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 1.679205040063332, |
|
"learning_rate": 3.6231884057971017e-06, |
|
"logits/chosen": -2.890886068344116, |
|
"logits/rejected": -2.8183233737945557, |
|
"logps/chosen": -249.2432098388672, |
|
"logps/rejected": -246.5466766357422, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.013654066249728203, |
|
"rewards/margins": 0.013491788879036903, |
|
"rewards/margins_max": 0.030749738216400146, |
|
"rewards/margins_min": -0.003766159061342478, |
|
"rewards/margins_std": 0.024406425654888153, |
|
"rewards/rejected": 0.00016227728337980807, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 1.7034528224314027, |
|
"learning_rate": 4.347826086956522e-06, |
|
"logits/chosen": -3.011662006378174, |
|
"logits/rejected": -2.9398560523986816, |
|
"logps/chosen": -304.8922119140625, |
|
"logps/rejected": -246.6385498046875, |
|
"loss": 0.6705, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.03926776722073555, |
|
"rewards/margins": 0.04536719247698784, |
|
"rewards/margins_max": 0.0852198451757431, |
|
"rewards/margins_min": 0.005514549091458321, |
|
"rewards/margins_std": 0.056360144168138504, |
|
"rewards/rejected": -0.006099428050220013, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 1.9031177489294464, |
|
"learning_rate": 4.999967381905813e-06, |
|
"logits/chosen": -3.010206460952759, |
|
"logits/rejected": -2.93892240524292, |
|
"logps/chosen": -268.6554870605469, |
|
"logps/rejected": -208.88131713867188, |
|
"loss": 0.6535, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.034810397773981094, |
|
"rewards/margins": 0.07472650706768036, |
|
"rewards/margins_max": 0.11295346170663834, |
|
"rewards/margins_min": 0.03649955615401268, |
|
"rewards/margins_std": 0.054061077535152435, |
|
"rewards/rejected": -0.039916109293699265, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 2.0564287009461806, |
|
"learning_rate": 4.9960542403925095e-06, |
|
"logits/chosen": -2.7611324787139893, |
|
"logits/rejected": -2.683593273162842, |
|
"logps/chosen": -256.10906982421875, |
|
"logps/rejected": -241.2465057373047, |
|
"loss": 0.6403, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.01417328417301178, |
|
"rewards/margins": 0.08097021281719208, |
|
"rewards/margins_max": 0.19939911365509033, |
|
"rewards/margins_min": -0.03745868057012558, |
|
"rewards/margins_std": 0.16748374700546265, |
|
"rewards/rejected": -0.0667969286441803, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 1.9620079371246095, |
|
"learning_rate": 4.98562917836165e-06, |
|
"logits/chosen": -2.7862794399261475, |
|
"logits/rejected": -2.755009651184082, |
|
"logps/chosen": -260.46527099609375, |
|
"logps/rejected": -209.8500213623047, |
|
"loss": 0.6216, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.02841970883309841, |
|
"rewards/margins": 0.1533060073852539, |
|
"rewards/margins_max": 0.2169797718524933, |
|
"rewards/margins_min": 0.08963226526975632, |
|
"rewards/margins_std": 0.09004827588796616, |
|
"rewards/rejected": -0.12488631159067154, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 2.682732545242212, |
|
"learning_rate": 4.968719393609757e-06, |
|
"logits/chosen": -2.8458776473999023, |
|
"logits/rejected": -2.793788194656372, |
|
"logps/chosen": -373.66241455078125, |
|
"logps/rejected": -257.68841552734375, |
|
"loss": 0.5821, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.04686256870627403, |
|
"rewards/margins": 0.3232998847961426, |
|
"rewards/margins_max": 0.5046892166137695, |
|
"rewards/margins_min": 0.14191053807735443, |
|
"rewards/margins_std": 0.2565232217311859, |
|
"rewards/rejected": -0.27643734216690063, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"eval_logits/chosen": -2.7354917526245117, |
|
"eval_logits/rejected": -2.6965763568878174, |
|
"eval_logps/chosen": -303.8941650390625, |
|
"eval_logps/rejected": -286.7320861816406, |
|
"eval_loss": 0.6622087359428406, |
|
"eval_rewards/accuracies": 0.6150793433189392, |
|
"eval_rewards/chosen": -0.186729297041893, |
|
"eval_rewards/margins": 0.08876504004001617, |
|
"eval_rewards/margins_max": 0.39532026648521423, |
|
"eval_rewards/margins_min": -0.1795283555984497, |
|
"eval_rewards/margins_std": 0.25878801941871643, |
|
"eval_rewards/rejected": -0.2754943370819092, |
|
"eval_runtime": 284.1012, |
|
"eval_samples_per_second": 7.04, |
|
"eval_steps_per_second": 0.222, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 3.178965793594782, |
|
"learning_rate": 4.9453690018345144e-06, |
|
"logits/chosen": -2.7822461128234863, |
|
"logits/rejected": -2.7551050186157227, |
|
"logps/chosen": -346.56549072265625, |
|
"logps/rejected": -336.0809020996094, |
|
"loss": 0.5771, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.02590801753103733, |
|
"rewards/margins": 0.3409319818019867, |
|
"rewards/margins_max": 0.4643324017524719, |
|
"rewards/margins_min": 0.21753165125846863, |
|
"rewards/margins_std": 0.17451441287994385, |
|
"rewards/rejected": -0.3668400049209595, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 3.521510373240137, |
|
"learning_rate": 4.915638921541952e-06, |
|
"logits/chosen": -2.7236685752868652, |
|
"logits/rejected": -2.7280526161193848, |
|
"logps/chosen": -310.40826416015625, |
|
"logps/rejected": -319.103515625, |
|
"loss": 0.5407, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22706270217895508, |
|
"rewards/margins": 0.33027681708335876, |
|
"rewards/margins_max": 0.5542212724685669, |
|
"rewards/margins_min": 0.10633233934640884, |
|
"rewards/margins_std": 0.3167053163051605, |
|
"rewards/rejected": -0.5573395490646362, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 2.110244119813805, |
|
"learning_rate": 4.879606715117019e-06, |
|
"logits/chosen": -2.8192946910858154, |
|
"logits/rejected": -2.731480836868286, |
|
"logps/chosen": -343.879150390625, |
|
"logps/rejected": -308.55780029296875, |
|
"loss": 0.5251, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.37679314613342285, |
|
"rewards/margins": 0.30480116605758667, |
|
"rewards/margins_max": 0.5037060379981995, |
|
"rewards/margins_min": 0.10589637607336044, |
|
"rewards/margins_std": 0.2812938690185547, |
|
"rewards/rejected": -0.6815943121910095, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 5.313237756103227, |
|
"learning_rate": 4.837366386472175e-06, |
|
"logits/chosen": -2.8442323207855225, |
|
"logits/rejected": -2.757608413696289, |
|
"logps/chosen": -338.12030029296875, |
|
"logps/rejected": -321.89703369140625, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.4649910032749176, |
|
"rewards/margins": 0.3815266191959381, |
|
"rewards/margins_max": 0.6749385595321655, |
|
"rewards/margins_min": 0.08811453729867935, |
|
"rewards/margins_std": 0.4149473309516907, |
|
"rewards/rejected": -0.8465176820755005, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 3.739167908499058, |
|
"learning_rate": 4.789028135801919e-06, |
|
"logits/chosen": -2.8220436573028564, |
|
"logits/rejected": -2.8031575679779053, |
|
"logps/chosen": -304.0033874511719, |
|
"logps/rejected": -348.67108154296875, |
|
"loss": 0.5705, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.05871645733714104, |
|
"rewards/margins": 0.4513682425022125, |
|
"rewards/margins_max": 0.7341547012329102, |
|
"rewards/margins_min": 0.16858164966106415, |
|
"rewards/margins_std": 0.39992058277130127, |
|
"rewards/rejected": -0.5100846290588379, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 4.005524634813479, |
|
"learning_rate": 4.7347180720830635e-06, |
|
"logits/chosen": -2.8278419971466064, |
|
"logits/rejected": -2.6905112266540527, |
|
"logps/chosen": -371.9062805175781, |
|
"logps/rejected": -375.65728759765625, |
|
"loss": 0.5226, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.3725179135799408, |
|
"rewards/margins": 0.5365481972694397, |
|
"rewards/margins_max": 0.8424277305603027, |
|
"rewards/margins_min": 0.2306685745716095, |
|
"rewards/margins_std": 0.4325791001319885, |
|
"rewards/rejected": -0.9090660810470581, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 8.89880059550579, |
|
"learning_rate": 4.674577884070811e-06, |
|
"logits/chosen": -2.7482759952545166, |
|
"logits/rejected": -2.7097363471984863, |
|
"logps/chosen": -368.9304504394531, |
|
"logps/rejected": -368.5115661621094, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.42384210228919983, |
|
"rewards/margins": 0.7086302638053894, |
|
"rewards/margins_max": 1.0815365314483643, |
|
"rewards/margins_min": 0.33572402596473694, |
|
"rewards/margins_std": 0.5273691415786743, |
|
"rewards/rejected": -1.1324723958969116, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 12.030137047752005, |
|
"learning_rate": 4.608764470648971e-06, |
|
"logits/chosen": -2.782106399536133, |
|
"logits/rejected": -2.7207627296447754, |
|
"logps/chosen": -344.0601501464844, |
|
"logps/rejected": -446.43328857421875, |
|
"loss": 0.4886, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3604986369609833, |
|
"rewards/margins": 0.7753941416740417, |
|
"rewards/margins_max": 1.1650350093841553, |
|
"rewards/margins_min": 0.38575348258018494, |
|
"rewards/margins_std": 0.5510352253913879, |
|
"rewards/rejected": -1.1358928680419922, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 4.3464674099690255, |
|
"learning_rate": 4.5374495314986874e-06, |
|
"logits/chosen": -2.5000321865081787, |
|
"logits/rejected": -2.5499188899993896, |
|
"logps/chosen": -381.9058532714844, |
|
"logps/rejected": -390.3780212402344, |
|
"loss": 0.4735, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4734285771846771, |
|
"rewards/margins": 1.050258994102478, |
|
"rewards/margins_max": 1.5981611013412476, |
|
"rewards/margins_min": 0.5023568868637085, |
|
"rewards/margins_std": 0.7748504877090454, |
|
"rewards/rejected": -1.523687481880188, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 9.11783684491386, |
|
"learning_rate": 4.460819119153574e-06, |
|
"logits/chosen": -2.5870699882507324, |
|
"logits/rejected": -2.5736021995544434, |
|
"logps/chosen": -338.88360595703125, |
|
"logps/rejected": -435.57855224609375, |
|
"loss": 0.481, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.7319141030311584, |
|
"rewards/margins": 0.7730057835578918, |
|
"rewards/margins_max": 1.262407660484314, |
|
"rewards/margins_min": 0.2836039066314697, |
|
"rewards/margins_std": 0.6921188235282898, |
|
"rewards/rejected": -1.5049200057983398, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"eval_logits/chosen": -2.5660696029663086, |
|
"eval_logits/rejected": -2.540174722671509, |
|
"eval_logps/chosen": -410.9715881347656, |
|
"eval_logps/rejected": -423.9108581542969, |
|
"eval_loss": 0.6257268190383911, |
|
"eval_rewards/accuracies": 0.6746031641960144, |
|
"eval_rewards/chosen": -1.2575041055679321, |
|
"eval_rewards/margins": 0.389777809381485, |
|
"eval_rewards/margins_max": 1.3411911725997925, |
|
"eval_rewards/margins_min": -0.5259115695953369, |
|
"eval_rewards/margins_std": 0.8282801508903503, |
|
"eval_rewards/rejected": -1.6472818851470947, |
|
"eval_runtime": 283.175, |
|
"eval_samples_per_second": 7.063, |
|
"eval_steps_per_second": 0.222, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 8.587152119215933, |
|
"learning_rate": 4.379073153609896e-06, |
|
"logits/chosen": -2.6241440773010254, |
|
"logits/rejected": -2.5915939807891846, |
|
"logps/chosen": -408.4817199707031, |
|
"logps/rejected": -439.4585876464844, |
|
"loss": 0.4772, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.8052468299865723, |
|
"rewards/margins": 0.7498449087142944, |
|
"rewards/margins_max": 1.3446953296661377, |
|
"rewards/margins_min": 0.15499453246593475, |
|
"rewards/margins_std": 0.8412453532218933, |
|
"rewards/rejected": -1.5550918579101562, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 12.858094013695773, |
|
"learning_rate": 4.292424900758129e-06, |
|
"logits/chosen": -2.3552744388580322, |
|
"logits/rejected": -2.266150712966919, |
|
"logps/chosen": -326.7586669921875, |
|
"logps/rejected": -419.92950439453125, |
|
"loss": 0.4407, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6805569529533386, |
|
"rewards/margins": 0.8906445503234863, |
|
"rewards/margins_max": 1.5019675493240356, |
|
"rewards/margins_min": 0.27932122349739075, |
|
"rewards/margins_std": 0.8645416498184204, |
|
"rewards/rejected": -1.5712013244628906, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 10.547994008924384, |
|
"learning_rate": 4.201100415996598e-06, |
|
"logits/chosen": -2.2731940746307373, |
|
"logits/rejected": -2.208517551422119, |
|
"logps/chosen": -396.6240234375, |
|
"logps/rejected": -439.95367431640625, |
|
"loss": 0.4373, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -1.4822484254837036, |
|
"rewards/margins": 0.3612428605556488, |
|
"rewards/margins_max": 0.8377896547317505, |
|
"rewards/margins_min": -0.11530391871929169, |
|
"rewards/margins_std": 0.6739388704299927, |
|
"rewards/rejected": -1.8434913158416748, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 7.682347998116211, |
|
"learning_rate": 4.105337954478756e-06, |
|
"logits/chosen": -2.2987964153289795, |
|
"logits/rejected": -2.232954263687134, |
|
"logps/chosen": -488.194580078125, |
|
"logps/rejected": -482.121826171875, |
|
"loss": 0.497, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9666223526000977, |
|
"rewards/margins": 1.5014089345932007, |
|
"rewards/margins_max": 2.3409581184387207, |
|
"rewards/margins_min": 0.6618598103523254, |
|
"rewards/margins_std": 1.1873016357421875, |
|
"rewards/rejected": -2.468031167984009, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 6.522613509898599, |
|
"learning_rate": 4.005387349532697e-06, |
|
"logits/chosen": -2.3422703742980957, |
|
"logits/rejected": -2.3057916164398193, |
|
"logps/chosen": -449.12469482421875, |
|
"logps/rejected": -528.1080322265625, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4108879566192627, |
|
"rewards/margins": 1.1858874559402466, |
|
"rewards/margins_max": 2.1756746768951416, |
|
"rewards/margins_min": 0.19610002636909485, |
|
"rewards/margins_std": 1.3997704982757568, |
|
"rewards/rejected": -2.596775531768799, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 9.556919040050822, |
|
"learning_rate": 3.901509360874515e-06, |
|
"logits/chosen": -2.1438629627227783, |
|
"logits/rejected": -2.12241792678833, |
|
"logps/chosen": -336.4192199707031, |
|
"logps/rejected": -396.7298889160156, |
|
"loss": 0.3976, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.233567237854004, |
|
"rewards/margins": 0.7809652090072632, |
|
"rewards/margins_max": 1.5155531167984009, |
|
"rewards/margins_min": 0.04637749865651131, |
|
"rewards/margins_std": 1.0388638973236084, |
|
"rewards/rejected": -2.0145325660705566, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 8.191966927640436, |
|
"learning_rate": 3.793974994315991e-06, |
|
"logits/chosen": -1.8757396936416626, |
|
"logits/rejected": -1.9094167947769165, |
|
"logps/chosen": -297.0498352050781, |
|
"logps/rejected": -410.0951232910156, |
|
"loss": 0.4022, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.1613843441009521, |
|
"rewards/margins": 1.04617440700531, |
|
"rewards/margins_max": 1.7157318592071533, |
|
"rewards/margins_min": 0.3766169548034668, |
|
"rewards/margins_std": 0.9468971490859985, |
|
"rewards/rejected": -2.2075586318969727, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 9.284581469509805, |
|
"learning_rate": 3.68306479474137e-06, |
|
"logits/chosen": -2.3123269081115723, |
|
"logits/rejected": -2.223254680633545, |
|
"logps/chosen": -509.96392822265625, |
|
"logps/rejected": -465.5381774902344, |
|
"loss": 0.3547, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.3590073585510254, |
|
"rewards/margins": 1.2631444931030273, |
|
"rewards/margins_max": 1.895696997642517, |
|
"rewards/margins_min": 0.6305915713310242, |
|
"rewards/margins_std": 0.8945645093917847, |
|
"rewards/rejected": -2.6221518516540527, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 23.380435739964835, |
|
"learning_rate": 3.569068114197784e-06, |
|
"logits/chosen": -2.013559103012085, |
|
"logits/rejected": -1.945844054222107, |
|
"logps/chosen": -327.3985290527344, |
|
"logps/rejected": -448.2659606933594, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0471247434616089, |
|
"rewards/margins": 1.6471458673477173, |
|
"rewards/margins_max": 2.4206135272979736, |
|
"rewards/margins_min": 0.8736783266067505, |
|
"rewards/margins_std": 1.0938485860824585, |
|
"rewards/rejected": -2.694270610809326, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 14.305787223907084, |
|
"learning_rate": 3.4522823570088073e-06, |
|
"logits/chosen": -1.9044002294540405, |
|
"logits/rejected": -1.8846553564071655, |
|
"logps/chosen": -411.216552734375, |
|
"logps/rejected": -525.3153686523438, |
|
"loss": 0.4017, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.564225196838379, |
|
"rewards/margins": 1.5319175720214844, |
|
"rewards/margins_max": 2.567899703979492, |
|
"rewards/margins_min": 0.49593567848205566, |
|
"rewards/margins_std": 1.4650996923446655, |
|
"rewards/rejected": -3.0961427688598633, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"eval_logits/chosen": -2.0223634243011475, |
|
"eval_logits/rejected": -1.9880024194717407, |
|
"eval_logps/chosen": -462.021728515625, |
|
"eval_logps/rejected": -509.34765625, |
|
"eval_loss": 0.6111792922019958, |
|
"eval_rewards/accuracies": 0.6944444179534912, |
|
"eval_rewards/chosen": -1.7680050134658813, |
|
"eval_rewards/margins": 0.7336447238922119, |
|
"eval_rewards/margins_max": 2.3329057693481445, |
|
"eval_rewards/margins_min": -0.8123146891593933, |
|
"eval_rewards/margins_std": 1.4011034965515137, |
|
"eval_rewards/rejected": -2.501649856567383, |
|
"eval_runtime": 283.1994, |
|
"eval_samples_per_second": 7.062, |
|
"eval_steps_per_second": 0.222, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 13.373839711281382, |
|
"learning_rate": 3.333012203880528e-06, |
|
"logits/chosen": -2.034450054168701, |
|
"logits/rejected": -1.9856176376342773, |
|
"logps/chosen": -361.3979187011719, |
|
"logps/rejected": -426.4706115722656, |
|
"loss": 0.379, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.1962629556655884, |
|
"rewards/margins": 1.4263014793395996, |
|
"rewards/margins_max": 2.3072102069854736, |
|
"rewards/margins_min": 0.5453929901123047, |
|
"rewards/margins_std": 1.2457928657531738, |
|
"rewards/rejected": -2.6225647926330566, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 12.791871374689537, |
|
"learning_rate": 3.2115688170243735e-06, |
|
"logits/chosen": -2.104572057723999, |
|
"logits/rejected": -2.1095337867736816, |
|
"logps/chosen": -430.00299072265625, |
|
"logps/rejected": -597.9464721679688, |
|
"loss": 0.357, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.14143705368042, |
|
"rewards/margins": 1.844452142715454, |
|
"rewards/margins_max": 2.6857852935791016, |
|
"rewards/margins_min": 1.0031189918518066, |
|
"rewards/margins_std": 1.1898245811462402, |
|
"rewards/rejected": -2.985888957977295, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 7.114464717414236, |
|
"learning_rate": 3.0882690283704355e-06, |
|
"logits/chosen": -1.9746555089950562, |
|
"logits/rejected": -1.915001630783081, |
|
"logps/chosen": -350.51678466796875, |
|
"logps/rejected": -462.4185485839844, |
|
"loss": 0.3842, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0409258604049683, |
|
"rewards/margins": 1.465539813041687, |
|
"rewards/margins_max": 2.2879223823547363, |
|
"rewards/margins_min": 0.6431571841239929, |
|
"rewards/margins_std": 1.1630247831344604, |
|
"rewards/rejected": -2.506465435028076, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 11.717977839144085, |
|
"learning_rate": 2.9634345129891296e-06, |
|
"logits/chosen": -1.998160719871521, |
|
"logits/rejected": -1.8835424184799194, |
|
"logps/chosen": -419.52362060546875, |
|
"logps/rejected": -527.9315185546875, |
|
"loss": 0.3414, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.097505807876587, |
|
"rewards/margins": 1.5337189435958862, |
|
"rewards/margins_max": 2.7524514198303223, |
|
"rewards/margins_min": 0.31498652696609497, |
|
"rewards/margins_std": 1.723548173904419, |
|
"rewards/rejected": -2.631225109100342, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 14.724171411488078, |
|
"learning_rate": 2.8373909498776746e-06, |
|
"logits/chosen": -2.16463565826416, |
|
"logits/rejected": -2.167154550552368, |
|
"logps/chosen": -366.3368225097656, |
|
"logps/rejected": -522.6026611328125, |
|
"loss": 0.4421, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.8319808840751648, |
|
"rewards/margins": 1.7606074810028076, |
|
"rewards/margins_max": 2.4449410438537598, |
|
"rewards/margins_min": 1.076274037361145, |
|
"rewards/margins_std": 0.9677937626838684, |
|
"rewards/rejected": -2.5925886631011963, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 9.998195485379709, |
|
"learning_rate": 2.710467172300768e-06, |
|
"logits/chosen": -2.146489143371582, |
|
"logits/rejected": -2.0911166667938232, |
|
"logps/chosen": -431.58660888671875, |
|
"logps/rejected": -554.4029541015625, |
|
"loss": 0.3448, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7858428955078125, |
|
"rewards/margins": 1.576249361038208, |
|
"rewards/margins_max": 2.419480562210083, |
|
"rewards/margins_min": 0.7330182194709778, |
|
"rewards/margins_std": 1.1925089359283447, |
|
"rewards/rejected": -2.3620922565460205, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 16.428444808566788, |
|
"learning_rate": 2.582994309902146e-06, |
|
"logits/chosen": -2.021066665649414, |
|
"logits/rejected": -1.8753414154052734, |
|
"logps/chosen": -438.49176025390625, |
|
"logps/rejected": -512.5082397460938, |
|
"loss": 0.4111, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.347548007965088, |
|
"rewards/margins": 1.3524014949798584, |
|
"rewards/margins_max": 2.1463735103607178, |
|
"rewards/margins_min": 0.5584291815757751, |
|
"rewards/margins_std": 1.122846245765686, |
|
"rewards/rejected": -2.699949264526367, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 9.34369488447896, |
|
"learning_rate": 2.4553049248251512e-06, |
|
"logits/chosen": -1.9435670375823975, |
|
"logits/rejected": -1.9931520223617554, |
|
"logps/chosen": -381.89532470703125, |
|
"logps/rejected": -504.81341552734375, |
|
"loss": 0.3435, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.481609582901001, |
|
"rewards/margins": 1.2445435523986816, |
|
"rewards/margins_max": 1.940123200416565, |
|
"rewards/margins_min": 0.5489639043807983, |
|
"rewards/margins_std": 0.9836981892585754, |
|
"rewards/rejected": -2.7261533737182617, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 12.062558188816904, |
|
"learning_rate": 2.3277321440960733e-06, |
|
"logits/chosen": -2.1283860206604004, |
|
"logits/rejected": -2.116414785385132, |
|
"logps/chosen": -395.5448913574219, |
|
"logps/rejected": -539.8890380859375, |
|
"loss": 0.3457, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2231833934783936, |
|
"rewards/margins": 1.5952281951904297, |
|
"rewards/margins_max": 2.423370838165283, |
|
"rewards/margins_min": 0.7670857906341553, |
|
"rewards/margins_std": 1.1711702346801758, |
|
"rewards/rejected": -2.8184115886688232, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 10.060650957709997, |
|
"learning_rate": 2.20060879053377e-06, |
|
"logits/chosen": -1.8268073797225952, |
|
"logits/rejected": -1.7773048877716064, |
|
"logps/chosen": -360.2474670410156, |
|
"logps/rejected": -529.291259765625, |
|
"loss": 0.3427, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5878477096557617, |
|
"rewards/margins": 1.5394123792648315, |
|
"rewards/margins_max": 2.434696674346924, |
|
"rewards/margins_min": 0.6441282033920288, |
|
"rewards/margins_std": 1.2661231756210327, |
|
"rewards/rejected": -3.127260208129883, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"eval_logits/chosen": -1.986289143562317, |
|
"eval_logits/rejected": -1.9447433948516846, |
|
"eval_logps/chosen": -476.6219177246094, |
|
"eval_logps/rejected": -527.7764892578125, |
|
"eval_loss": 0.5955031514167786, |
|
"eval_rewards/accuracies": 0.7023809552192688, |
|
"eval_rewards/chosen": -1.9140070676803589, |
|
"eval_rewards/margins": 0.7719313502311707, |
|
"eval_rewards/margins_max": 2.272120952606201, |
|
"eval_rewards/margins_min": -0.7218120098114014, |
|
"eval_rewards/margins_std": 1.3400975465774536, |
|
"eval_rewards/rejected": -2.685938596725464, |
|
"eval_runtime": 283.4614, |
|
"eval_samples_per_second": 7.056, |
|
"eval_steps_per_second": 0.222, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 11.99849119237726, |
|
"learning_rate": 2.0742665144529374e-06, |
|
"logits/chosen": -1.9736402034759521, |
|
"logits/rejected": -1.901617407798767, |
|
"logps/chosen": -446.02227783203125, |
|
"logps/rejected": -546.3521728515625, |
|
"loss": 0.3666, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.0405278205871582, |
|
"rewards/margins": 1.8972772359848022, |
|
"rewards/margins_max": 2.6515231132507324, |
|
"rewards/margins_min": 1.1430312395095825, |
|
"rewards/margins_std": 1.066664695739746, |
|
"rewards/rejected": -2.937804937362671, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 6.458783566768089, |
|
"learning_rate": 1.9490349284263036e-06, |
|
"logits/chosen": -1.8606504201889038, |
|
"logits/rejected": -1.7730525732040405, |
|
"logps/chosen": -470.9712829589844, |
|
"logps/rejected": -612.8939208984375, |
|
"loss": 0.3331, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.6054725646972656, |
|
"rewards/margins": 2.13732647895813, |
|
"rewards/margins_max": 2.9402005672454834, |
|
"rewards/margins_min": 1.334452509880066, |
|
"rewards/margins_std": 1.1354353427886963, |
|
"rewards/rejected": -3.7427992820739746, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 9.837849328941022, |
|
"learning_rate": 1.8252407473630606e-06, |
|
"logits/chosen": -1.983541488647461, |
|
"logits/rejected": -1.9613971710205078, |
|
"logps/chosen": -458.0785217285156, |
|
"logps/rejected": -566.628662109375, |
|
"loss": 0.3559, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.701336145401001, |
|
"rewards/margins": 1.274548053741455, |
|
"rewards/margins_max": 1.9377739429473877, |
|
"rewards/margins_min": 0.6113225221633911, |
|
"rewards/margins_std": 0.9379426836967468, |
|
"rewards/rejected": -2.975884437561035, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 10.432841283470852, |
|
"learning_rate": 1.7032069361469765e-06, |
|
"logits/chosen": -1.858236312866211, |
|
"logits/rejected": -1.8411200046539307, |
|
"logps/chosen": -359.7177429199219, |
|
"logps/rejected": -587.9736938476562, |
|
"loss": 0.3632, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.2637131214141846, |
|
"rewards/margins": 1.9256131649017334, |
|
"rewards/margins_max": 2.9136805534362793, |
|
"rewards/margins_min": 0.9375454783439636, |
|
"rewards/margins_std": 1.397338628768921, |
|
"rewards/rejected": -3.189326047897339, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 18.395508823434238, |
|
"learning_rate": 1.5832518670578802e-06, |
|
"logits/chosen": -2.010892391204834, |
|
"logits/rejected": -1.9377777576446533, |
|
"logps/chosen": -429.5704040527344, |
|
"logps/rejected": -617.7625732421875, |
|
"loss": 0.4001, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3690681457519531, |
|
"rewards/margins": 1.9702503681182861, |
|
"rewards/margins_max": 3.1194424629211426, |
|
"rewards/margins_min": 0.8210585713386536, |
|
"rewards/margins_std": 1.6252025365829468, |
|
"rewards/rejected": -3.3393185138702393, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 11.402368506953254, |
|
"learning_rate": 1.4656884891747398e-06, |
|
"logits/chosen": -1.8819122314453125, |
|
"logits/rejected": -1.9186322689056396, |
|
"logps/chosen": -424.16986083984375, |
|
"logps/rejected": -586.4650268554688, |
|
"loss": 0.3423, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.3872971534729004, |
|
"rewards/margins": 1.951498031616211, |
|
"rewards/margins_max": 3.129544496536255, |
|
"rewards/margins_min": 0.7734516263008118, |
|
"rewards/margins_std": 1.6660093069076538, |
|
"rewards/rejected": -3.3387951850891113, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 10.853275743311396, |
|
"learning_rate": 1.3508235119272466e-06, |
|
"logits/chosen": -1.8404920101165771, |
|
"logits/rejected": -1.80814528465271, |
|
"logps/chosen": -476.41473388671875, |
|
"logps/rejected": -532.2838745117188, |
|
"loss": 0.3542, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -2.109175205230713, |
|
"rewards/margins": 0.6896736025810242, |
|
"rewards/margins_max": 1.9764328002929688, |
|
"rewards/margins_min": -0.5970857739448547, |
|
"rewards/margins_std": 1.8197526931762695, |
|
"rewards/rejected": -2.7988486289978027, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 13.491064434657412, |
|
"learning_rate": 1.238956604925934e-06, |
|
"logits/chosen": -1.7815355062484741, |
|
"logits/rejected": -1.760663628578186, |
|
"logps/chosen": -379.5592346191406, |
|
"logps/rejected": -617.5568237304688, |
|
"loss": 0.377, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.146844506263733, |
|
"rewards/margins": 2.3650784492492676, |
|
"rewards/margins_max": 3.120236396789551, |
|
"rewards/margins_min": 1.6099202632904053, |
|
"rewards/margins_std": 1.0679547786712646, |
|
"rewards/rejected": -3.5119223594665527, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 5.424010985619327, |
|
"learning_rate": 1.1303796161583763e-06, |
|
"logits/chosen": -2.0357632637023926, |
|
"logits/rejected": -2.0330684185028076, |
|
"logps/chosen": -418.2298278808594, |
|
"logps/rejected": -598.3395385742188, |
|
"loss": 0.3425, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.2266708612442017, |
|
"rewards/margins": 1.6023613214492798, |
|
"rewards/margins_max": 2.5102345943450928, |
|
"rewards/margins_min": 0.6944878697395325, |
|
"rewards/margins_std": 1.2839267253875732, |
|
"rewards/rejected": -2.8290319442749023, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 8.453582130806156, |
|
"learning_rate": 1.0253758105911169e-06, |
|
"logits/chosen": -2.0878758430480957, |
|
"logits/rejected": -1.991579294204712, |
|
"logps/chosen": -448.9693298339844, |
|
"logps/rejected": -671.1817626953125, |
|
"loss": 0.3246, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.2098839282989502, |
|
"rewards/margins": 2.05537486076355, |
|
"rewards/margins_max": 2.98439359664917, |
|
"rewards/margins_min": 1.1263563632965088, |
|
"rewards/margins_std": 1.313830852508545, |
|
"rewards/rejected": -3.2652587890625, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"eval_logits/chosen": -1.886383295059204, |
|
"eval_logits/rejected": -1.84441077709198, |
|
"eval_logps/chosen": -513.3748168945312, |
|
"eval_logps/rejected": -561.1234130859375, |
|
"eval_loss": 0.6025983691215515, |
|
"eval_rewards/accuracies": 0.6626983880996704, |
|
"eval_rewards/chosen": -2.281536102294922, |
|
"eval_rewards/margins": 0.7378710508346558, |
|
"eval_rewards/margins_max": 2.2879276275634766, |
|
"eval_rewards/margins_min": -0.7821336388587952, |
|
"eval_rewards/margins_std": 1.3716031312942505, |
|
"eval_rewards/rejected": -3.019407033920288, |
|
"eval_runtime": 283.5562, |
|
"eval_samples_per_second": 7.053, |
|
"eval_steps_per_second": 0.222, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 17.08831198492527, |
|
"learning_rate": 9.24219131163705e-07, |
|
"logits/chosen": -1.7034380435943604, |
|
"logits/rejected": -1.677706003189087, |
|
"logps/chosen": -501.74334716796875, |
|
"logps/rejected": -674.4254150390625, |
|
"loss": 0.3481, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.918776273727417, |
|
"rewards/margins": 1.897600531578064, |
|
"rewards/margins_max": 3.511080265045166, |
|
"rewards/margins_min": 0.284121036529541, |
|
"rewards/margins_std": 2.2818045616149902, |
|
"rewards/rejected": -3.8163769245147705, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 3.7704796107061735, |
|
"learning_rate": 8.271734841028553e-07, |
|
"logits/chosen": -1.5852091312408447, |
|
"logits/rejected": -1.471635103225708, |
|
"logps/chosen": -462.9111328125, |
|
"logps/rejected": -592.2215576171875, |
|
"loss": 0.3213, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.484692096710205, |
|
"rewards/margins": 2.1865782737731934, |
|
"rewards/margins_max": 2.6932501792907715, |
|
"rewards/margins_min": 1.6799061298370361, |
|
"rewards/margins_std": 0.7165425419807434, |
|
"rewards/rejected": -3.6712703704833984, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.607658272294071, |
|
"learning_rate": 7.344920504212244e-07, |
|
"logits/chosen": -1.838727593421936, |
|
"logits/rejected": -1.8044246435165405, |
|
"logps/chosen": -373.3072204589844, |
|
"logps/rejected": -488.6890563964844, |
|
"loss": 0.36, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.4621307849884033, |
|
"rewards/margins": 1.4751158952713013, |
|
"rewards/margins_max": 2.411809206008911, |
|
"rewards/margins_min": 0.5384225845336914, |
|
"rewards/margins_std": 1.3246843814849854, |
|
"rewards/rejected": -2.937246799468994, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 10.180442565411226, |
|
"learning_rate": 6.464166253970672e-07, |
|
"logits/chosen": -1.888649344444275, |
|
"logits/rejected": -1.9107511043548584, |
|
"logps/chosen": -483.1812438964844, |
|
"logps/rejected": -572.9668579101562, |
|
"loss": 0.3483, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.5701286792755127, |
|
"rewards/margins": 1.3985799551010132, |
|
"rewards/margins_max": 2.0270140171051025, |
|
"rewards/margins_min": 0.7701458930969238, |
|
"rewards/margins_std": 0.8887398838996887, |
|
"rewards/rejected": -2.9687085151672363, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 7.067320132481652, |
|
"learning_rate": 5.631769877579535e-07, |
|
"logits/chosen": -1.9241430759429932, |
|
"logits/rejected": -1.8503801822662354, |
|
"logps/chosen": -391.8656311035156, |
|
"logps/rejected": -515.9195556640625, |
|
"loss": 0.3546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.404034972190857, |
|
"rewards/margins": 1.4414308071136475, |
|
"rewards/margins_max": 2.243879556655884, |
|
"rewards/margins_min": 0.6389821171760559, |
|
"rewards/margins_std": 1.1348340511322021, |
|
"rewards/rejected": -2.845465898513794, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 23.109650997760248, |
|
"learning_rate": 4.849903002143114e-07, |
|
"logits/chosen": -2.134927988052368, |
|
"logits/rejected": -2.0764718055725098, |
|
"logps/chosen": -540.7327880859375, |
|
"logps/rejected": -669.7936401367188, |
|
"loss": 0.3152, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.6844943761825562, |
|
"rewards/margins": 1.754024863243103, |
|
"rewards/margins_max": 2.7484121322631836, |
|
"rewards/margins_min": 0.7596377730369568, |
|
"rewards/margins_std": 1.4062758684158325, |
|
"rewards/rejected": -3.438519239425659, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 14.65033008607176, |
|
"learning_rate": 4.1206054290670537e-07, |
|
"logits/chosen": -1.8871490955352783, |
|
"logits/rejected": -1.8968530893325806, |
|
"logps/chosen": -413.68682861328125, |
|
"logps/rejected": -668.4096069335938, |
|
"loss": 0.3224, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -1.614575743675232, |
|
"rewards/margins": 2.1032443046569824, |
|
"rewards/margins_max": 3.0623726844787598, |
|
"rewards/margins_min": 1.144116759300232, |
|
"rewards/margins_std": 1.3564116954803467, |
|
"rewards/rejected": -3.717820405960083, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 7.593895176885149, |
|
"learning_rate": 3.44577981244944e-07, |
|
"logits/chosen": -1.9477647542953491, |
|
"logits/rejected": -1.9660476446151733, |
|
"logps/chosen": -438.8433532714844, |
|
"logps/rejected": -546.8043212890625, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9253628253936768, |
|
"rewards/margins": 1.1086914539337158, |
|
"rewards/margins_max": 2.1370372772216797, |
|
"rewards/margins_min": 0.08034573495388031, |
|
"rewards/margins_std": 1.4543002843856812, |
|
"rewards/rejected": -3.0340542793273926, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.844449740859913, |
|
"learning_rate": 2.827186695273482e-07, |
|
"logits/chosen": -2.1650023460388184, |
|
"logits/rejected": -2.0401604175567627, |
|
"logps/chosen": -522.0162963867188, |
|
"logps/rejected": -637.875732421875, |
|
"loss": 0.3966, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.199137806892395, |
|
"rewards/margins": 1.8260653018951416, |
|
"rewards/margins_max": 3.141328811645508, |
|
"rewards/margins_min": 0.5108016729354858, |
|
"rewards/margins_std": 1.8600635528564453, |
|
"rewards/rejected": -3.025203227996826, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 8.653551252508137, |
|
"learning_rate": 2.2664399163518786e-07, |
|
"logits/chosen": -1.9303522109985352, |
|
"logits/rejected": -1.865822434425354, |
|
"logps/chosen": -474.86810302734375, |
|
"logps/rejected": -598.7958374023438, |
|
"loss": 0.2747, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5423686504364014, |
|
"rewards/margins": 2.11772084236145, |
|
"rewards/margins_max": 3.12973690032959, |
|
"rewards/margins_min": 1.1057052612304688, |
|
"rewards/margins_std": 1.431206464767456, |
|
"rewards/rejected": -3.6600890159606934, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"eval_logits/chosen": -1.8380076885223389, |
|
"eval_logits/rejected": -1.7933586835861206, |
|
"eval_logps/chosen": -502.5606994628906, |
|
"eval_logps/rejected": -556.8073120117188, |
|
"eval_loss": 0.5972898602485657, |
|
"eval_rewards/accuracies": 0.6785714030265808, |
|
"eval_rewards/chosen": -2.1733951568603516, |
|
"eval_rewards/margins": 0.8028514385223389, |
|
"eval_rewards/margins_max": 2.427276134490967, |
|
"eval_rewards/margins_min": -0.751455545425415, |
|
"eval_rewards/margins_std": 1.4232866764068604, |
|
"eval_rewards/rejected": -2.9762465953826904, |
|
"eval_runtime": 283.2826, |
|
"eval_samples_per_second": 7.06, |
|
"eval_steps_per_second": 0.222, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 9.369824417732737, |
|
"learning_rate": 1.7650024000056415e-07, |
|
"logits/chosen": -1.8018262386322021, |
|
"logits/rejected": -1.7738994359970093, |
|
"logps/chosen": -374.61505126953125, |
|
"logps/rejected": -558.9880981445312, |
|
"loss": 0.3286, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.5432480573654175, |
|
"rewards/margins": 1.8530250787734985, |
|
"rewards/margins_max": 2.7572684288024902, |
|
"rewards/margins_min": 0.9487819671630859, |
|
"rewards/margins_std": 1.2787930965423584, |
|
"rewards/rejected": -3.396273374557495, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 6.630649676820421, |
|
"learning_rate": 1.324182339461544e-07, |
|
"logits/chosen": -1.8650553226470947, |
|
"logits/rejected": -1.8343786001205444, |
|
"logps/chosen": -429.2254333496094, |
|
"logps/rejected": -518.1988525390625, |
|
"loss": 0.3508, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.508697271347046, |
|
"rewards/margins": 1.630495309829712, |
|
"rewards/margins_max": 2.3903756141662598, |
|
"rewards/margins_min": 0.8706151247024536, |
|
"rewards/margins_std": 1.0746327638626099, |
|
"rewards/rejected": -3.139192581176758, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 16.0557186788671, |
|
"learning_rate": 9.451297839253915e-08, |
|
"logits/chosen": -1.8724334239959717, |
|
"logits/rejected": -1.7563972473144531, |
|
"logps/chosen": -476.4231872558594, |
|
"logps/rejected": -727.5858154296875, |
|
"loss": 0.3078, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -1.4125255346298218, |
|
"rewards/margins": 2.7260119915008545, |
|
"rewards/margins_max": 3.8035919666290283, |
|
"rewards/margins_min": 1.6484321355819702, |
|
"rewards/margins_std": 1.5239282846450806, |
|
"rewards/rejected": -4.138537406921387, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 10.546642069187087, |
|
"learning_rate": 6.288336382349463e-08, |
|
"logits/chosen": -1.8520616292953491, |
|
"logits/rejected": -1.7307716608047485, |
|
"logps/chosen": -558.7571411132812, |
|
"logps/rejected": -656.2450561523438, |
|
"loss": 0.2733, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.7490208148956299, |
|
"rewards/margins": 2.0255560874938965, |
|
"rewards/margins_max": 2.8944671154022217, |
|
"rewards/margins_min": 1.1566449403762817, |
|
"rewards/margins_std": 1.2288259267807007, |
|
"rewards/rejected": -3.7745769023895264, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 14.096656651836478, |
|
"learning_rate": 3.761190829201067e-08, |
|
"logits/chosen": -1.8188579082489014, |
|
"logits/rejected": -1.7514938116073608, |
|
"logps/chosen": -534.6187133789062, |
|
"logps/rejected": -567.9749755859375, |
|
"loss": 0.3434, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.4054492712020874, |
|
"rewards/margins": 1.610443115234375, |
|
"rewards/margins_max": 2.8384835720062256, |
|
"rewards/margins_min": 0.38240256905555725, |
|
"rewards/margins_std": 1.7367115020751953, |
|
"rewards/rejected": -3.015892505645752, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 8.340110871014865, |
|
"learning_rate": 1.876454214011253e-08, |
|
"logits/chosen": -1.8232501745224, |
|
"logits/rejected": -1.779158592224121, |
|
"logps/chosen": -416.4646911621094, |
|
"logps/rejected": -530.9276123046875, |
|
"loss": 0.3517, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.5091989040374756, |
|
"rewards/margins": 1.5967410802841187, |
|
"rewards/margins_max": 2.6342692375183105, |
|
"rewards/margins_min": 0.5592130422592163, |
|
"rewards/margins_std": 1.4672863483428955, |
|
"rewards/rejected": -3.105940103530884, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 11.832885590441766, |
|
"learning_rate": 6.390435994127753e-09, |
|
"logits/chosen": -1.7567815780639648, |
|
"logits/rejected": -1.7844308614730835, |
|
"logps/chosen": -505.89019775390625, |
|
"logps/rejected": -757.8609619140625, |
|
"loss": 0.4029, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.9832277297973633, |
|
"rewards/margins": 2.035264492034912, |
|
"rewards/margins_max": 3.0966219902038574, |
|
"rewards/margins_min": 0.9739071130752563, |
|
"rewards/margins_std": 1.500985860824585, |
|
"rewards/rejected": -4.018492221832275, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 7.732345900580283, |
|
"learning_rate": 5.218724841346556e-10, |
|
"logits/chosen": -1.5676209926605225, |
|
"logits/rejected": -1.594948172569275, |
|
"logps/chosen": -497.2765197753906, |
|
"logps/rejected": -658.2857666015625, |
|
"loss": 0.3008, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.0746806859970093, |
|
"rewards/margins": 2.671840190887451, |
|
"rewards/margins_max": 3.8470091819763184, |
|
"rewards/margins_min": 1.4966704845428467, |
|
"rewards/margins_std": 1.661940336227417, |
|
"rewards/rejected": -3.74652099609375, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 684, |
|
"total_flos": 0.0, |
|
"train_loss": 0.42957196057888497, |
|
"train_runtime": 6346.2002, |
|
"train_samples_per_second": 1.724, |
|
"train_steps_per_second": 0.108 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 684, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|