|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.996784565916399, |
|
"eval_steps": 500, |
|
"global_step": 699, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 26.875, |
|
"learning_rate": 5.000000000000001e-07, |
|
"log_odds_chosen": 0.06212496757507324, |
|
"log_odds_ratio": -0.7014996409416199, |
|
"logits/chosen": -2.1857399940490723, |
|
"logits/rejected": -2.1817708015441895, |
|
"logps/chosen": -0.9498230814933777, |
|
"logps/rejected": -0.9784062504768372, |
|
"loss": 1.3118, |
|
"nll_loss": 1.3238964080810547, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 25.375, |
|
"learning_rate": 1.0000000000000002e-06, |
|
"log_odds_chosen": 0.16793885827064514, |
|
"log_odds_ratio": -0.6694984436035156, |
|
"logits/chosen": -2.263418674468994, |
|
"logits/rejected": -2.186417818069458, |
|
"logps/chosen": -0.8150558471679688, |
|
"logps/rejected": -0.9238722920417786, |
|
"loss": 1.205, |
|
"nll_loss": 1.1971018314361572, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 7.125, |
|
"learning_rate": 1.5e-06, |
|
"log_odds_chosen": 0.00997834000736475, |
|
"log_odds_ratio": -0.7417184114456177, |
|
"logits/chosen": -2.3210864067077637, |
|
"logits/rejected": -2.2824535369873047, |
|
"logps/chosen": -0.7758495807647705, |
|
"logps/rejected": -0.7887269258499146, |
|
"loss": 1.0752, |
|
"nll_loss": 1.1008635759353638, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 2.0000000000000003e-06, |
|
"log_odds_chosen": 0.12381462007761002, |
|
"log_odds_ratio": -0.6990920305252075, |
|
"logits/chosen": -2.387354612350464, |
|
"logits/rejected": -2.278778553009033, |
|
"logps/chosen": -0.6582767963409424, |
|
"logps/rejected": -0.7136567831039429, |
|
"loss": 0.9839, |
|
"nll_loss": 0.9438871145248413, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 2.5e-06, |
|
"log_odds_chosen": 0.10633399337530136, |
|
"log_odds_ratio": -0.702051043510437, |
|
"logits/chosen": -2.3278417587280273, |
|
"logits/rejected": -2.264376163482666, |
|
"logps/chosen": -0.6687366366386414, |
|
"logps/rejected": -0.7335516214370728, |
|
"loss": 0.9634, |
|
"nll_loss": 0.9985629320144653, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 3e-06, |
|
"log_odds_chosen": 0.03238314390182495, |
|
"log_odds_ratio": -0.7480851411819458, |
|
"logits/chosen": -2.3533706665039062, |
|
"logits/rejected": -2.2710163593292236, |
|
"logps/chosen": -0.7110857963562012, |
|
"logps/rejected": -0.7359805107116699, |
|
"loss": 0.9691, |
|
"nll_loss": 0.9716413617134094, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 3.5e-06, |
|
"log_odds_chosen": 0.03486456722021103, |
|
"log_odds_ratio": -0.7344475984573364, |
|
"logits/chosen": -2.3200392723083496, |
|
"logits/rejected": -2.2653918266296387, |
|
"logps/chosen": -0.6566824316978455, |
|
"logps/rejected": -0.6836172938346863, |
|
"loss": 0.9322, |
|
"nll_loss": 0.9105528593063354, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 6.375, |
|
"learning_rate": 4.000000000000001e-06, |
|
"log_odds_chosen": 0.06068553403019905, |
|
"log_odds_ratio": -0.7511457204818726, |
|
"logits/chosen": -2.2898621559143066, |
|
"logits/rejected": -2.282437324523926, |
|
"logps/chosen": -0.6991879343986511, |
|
"logps/rejected": -0.7374504208564758, |
|
"loss": 0.9628, |
|
"nll_loss": 0.9841943979263306, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.5e-06, |
|
"log_odds_chosen": 0.18408647179603577, |
|
"log_odds_ratio": -0.6827269792556763, |
|
"logits/chosen": -2.3683393001556396, |
|
"logits/rejected": -2.2830727100372314, |
|
"logps/chosen": -0.6308820843696594, |
|
"logps/rejected": -0.7424929141998291, |
|
"loss": 0.8944, |
|
"nll_loss": 0.9193886518478394, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 5e-06, |
|
"log_odds_chosen": 0.035386841744184494, |
|
"log_odds_ratio": -0.7439508438110352, |
|
"logits/chosen": -2.35438871383667, |
|
"logits/rejected": -2.2569315433502197, |
|
"logps/chosen": -0.6244274973869324, |
|
"logps/rejected": -0.6488394141197205, |
|
"loss": 0.8782, |
|
"nll_loss": 0.8835960626602173, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.996562390352354e-06, |
|
"log_odds_chosen": 0.13479042053222656, |
|
"log_odds_ratio": -0.7032457590103149, |
|
"logits/chosen": -2.338320255279541, |
|
"logits/rejected": -2.259917974472046, |
|
"logps/chosen": -0.6392095685005188, |
|
"logps/rejected": -0.7337976098060608, |
|
"loss": 0.904, |
|
"nll_loss": 0.9141031503677368, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.986259015137485e-06, |
|
"log_odds_chosen": 0.1813308447599411, |
|
"log_odds_ratio": -0.6845273971557617, |
|
"logits/chosen": -2.3334765434265137, |
|
"logits/rejected": -2.208571434020996, |
|
"logps/chosen": -0.6667813062667847, |
|
"logps/rejected": -0.7594255208969116, |
|
"loss": 0.8882, |
|
"nll_loss": 0.9459765553474426, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 5.15625, |
|
"learning_rate": 4.96911820954103e-06, |
|
"log_odds_chosen": 0.11342030763626099, |
|
"log_odds_ratio": -0.7043333649635315, |
|
"logits/chosen": -2.245288372039795, |
|
"logits/rejected": -2.161513328552246, |
|
"logps/chosen": -0.6394560933113098, |
|
"logps/rejected": -0.7009039521217346, |
|
"loss": 0.8905, |
|
"nll_loss": 0.8281471133232117, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 5.75, |
|
"learning_rate": 4.945187112281936e-06, |
|
"log_odds_chosen": 0.0401572659611702, |
|
"log_odds_ratio": -0.7479963302612305, |
|
"logits/chosen": -2.3580965995788574, |
|
"logits/rejected": -2.3082127571105957, |
|
"logps/chosen": -0.655213475227356, |
|
"logps/rejected": -0.6817822456359863, |
|
"loss": 0.9124, |
|
"nll_loss": 0.9263037443161011, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 5.59375, |
|
"learning_rate": 4.9145315359768575e-06, |
|
"log_odds_chosen": 0.08960182219743729, |
|
"log_odds_ratio": -0.7190832495689392, |
|
"logits/chosen": -2.3376193046569824, |
|
"logits/rejected": -2.2543139457702637, |
|
"logps/chosen": -0.6171378493309021, |
|
"logps/rejected": -0.6685695648193359, |
|
"loss": 0.8818, |
|
"nll_loss": 0.8352767825126648, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.877235786149681e-06, |
|
"log_odds_chosen": 0.001317462301813066, |
|
"log_odds_ratio": -0.7520455121994019, |
|
"logits/chosen": -2.294182300567627, |
|
"logits/rejected": -2.249718189239502, |
|
"logps/chosen": -0.5758674144744873, |
|
"logps/rejected": -0.5995661020278931, |
|
"loss": 0.8668, |
|
"nll_loss": 0.8348628282546997, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 4.833402429383947e-06, |
|
"log_odds_chosen": 0.007824589498341084, |
|
"log_odds_ratio": -0.7456755638122559, |
|
"logits/chosen": -2.394925832748413, |
|
"logits/rejected": -2.3120040893554688, |
|
"logps/chosen": -0.6642740368843079, |
|
"logps/rejected": -0.6849616169929504, |
|
"loss": 0.8936, |
|
"nll_loss": 0.8833368420600891, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.783152011255739e-06, |
|
"log_odds_chosen": 0.06460268795490265, |
|
"log_odds_ratio": -0.7266359925270081, |
|
"logits/chosen": -2.2752573490142822, |
|
"logits/rejected": -2.2238731384277344, |
|
"logps/chosen": -0.6234402656555176, |
|
"logps/rejected": -0.6678077578544617, |
|
"loss": 0.8939, |
|
"nll_loss": 0.8869989514350891, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 4.726622724822781e-06, |
|
"log_odds_chosen": 0.07030437141656876, |
|
"log_odds_ratio": -0.7147783041000366, |
|
"logits/chosen": -2.2865307331085205, |
|
"logits/rejected": -2.2341747283935547, |
|
"logps/chosen": -0.6607599854469299, |
|
"logps/rejected": -0.7076841592788696, |
|
"loss": 0.8779, |
|
"nll_loss": 0.8933170437812805, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 5.125, |
|
"learning_rate": 4.663970030581408e-06, |
|
"log_odds_chosen": 0.11411430686712265, |
|
"log_odds_ratio": -0.7219884395599365, |
|
"logits/chosen": -2.235715389251709, |
|
"logits/rejected": -2.195624828338623, |
|
"logps/chosen": -0.5864480137825012, |
|
"logps/rejected": -0.6477882266044617, |
|
"loss": 0.8489, |
|
"nll_loss": 0.8421682119369507, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 5.28125, |
|
"learning_rate": 4.59536622893656e-06, |
|
"log_odds_chosen": -0.019357014447450638, |
|
"log_odds_ratio": -0.7726019024848938, |
|
"logits/chosen": -2.271784782409668, |
|
"logits/rejected": -2.188135862350464, |
|
"logps/chosen": -0.6931017637252808, |
|
"logps/rejected": -0.7073479890823364, |
|
"loss": 0.8628, |
|
"nll_loss": 0.8622463345527649, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 4.520999986360555e-06, |
|
"log_odds_chosen": 0.07649385929107666, |
|
"log_odds_ratio": -0.7158172726631165, |
|
"logits/chosen": -2.250640392303467, |
|
"logits/rejected": -2.2138304710388184, |
|
"logps/chosen": -0.6532458662986755, |
|
"logps/rejected": -0.6902228593826294, |
|
"loss": 0.8572, |
|
"nll_loss": 0.868952751159668, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 4.441075816543745e-06, |
|
"log_odds_chosen": 0.030604243278503418, |
|
"log_odds_ratio": -0.7345600128173828, |
|
"logits/chosen": -2.3321053981781006, |
|
"logits/rejected": -2.2295913696289062, |
|
"logps/chosen": -0.6273137331008911, |
|
"logps/rejected": -0.6507210731506348, |
|
"loss": 0.8789, |
|
"nll_loss": 0.8177094459533691, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 4.355813517963924e-06, |
|
"log_odds_chosen": 0.40961089730262756, |
|
"log_odds_ratio": -0.5910171270370483, |
|
"logits/chosen": -2.3318493366241455, |
|
"logits/rejected": -2.2643091678619385, |
|
"logps/chosen": -0.4916546940803528, |
|
"logps/rejected": -0.6644417643547058, |
|
"loss": 0.758, |
|
"nll_loss": 0.740388810634613, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"grad_norm": 6.25, |
|
"learning_rate": 4.265447569421234e-06, |
|
"log_odds_chosen": 0.3989773392677307, |
|
"log_odds_ratio": -0.590238630771637, |
|
"logits/chosen": -2.3867685794830322, |
|
"logits/rejected": -2.3056893348693848, |
|
"logps/chosen": -0.5096999406814575, |
|
"logps/rejected": -0.6727738976478577, |
|
"loss": 0.6881, |
|
"nll_loss": 0.7256556153297424, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"grad_norm": 6.0625, |
|
"learning_rate": 4.170226485200899e-06, |
|
"log_odds_chosen": 0.48682594299316406, |
|
"log_odds_ratio": -0.5600502490997314, |
|
"logits/chosen": -2.3787410259246826, |
|
"logits/rejected": -2.30033016204834, |
|
"logps/chosen": -0.47443705797195435, |
|
"logps/rejected": -0.6933677792549133, |
|
"loss": 0.725, |
|
"nll_loss": 0.6927866339683533, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"grad_norm": 5.875, |
|
"learning_rate": 4.070412131637139e-06, |
|
"log_odds_chosen": 0.38443347811698914, |
|
"log_odds_ratio": -0.6165519952774048, |
|
"logits/chosen": -2.3166797161102295, |
|
"logits/rejected": -2.23835825920105, |
|
"logps/chosen": -0.4729565978050232, |
|
"logps/rejected": -0.623404324054718, |
|
"loss": 0.719, |
|
"nll_loss": 0.689362645149231, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 3.966279006957781e-06, |
|
"log_odds_chosen": 0.4762851297855377, |
|
"log_odds_ratio": -0.5657437443733215, |
|
"logits/chosen": -2.3141160011291504, |
|
"logits/rejected": -2.2335238456726074, |
|
"logps/chosen": -0.5018330812454224, |
|
"logps/rejected": -0.7071572542190552, |
|
"loss": 0.7292, |
|
"nll_loss": 0.7584232687950134, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"grad_norm": 5.625, |
|
"learning_rate": 3.858113486390056e-06, |
|
"log_odds_chosen": 0.461935818195343, |
|
"log_odds_ratio": -0.5941855907440186, |
|
"logits/chosen": -2.339001178741455, |
|
"logits/rejected": -2.2689056396484375, |
|
"logps/chosen": -0.48000845313072205, |
|
"logps/rejected": -0.7079610228538513, |
|
"loss": 0.7325, |
|
"nll_loss": 0.7081630229949951, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"grad_norm": 6.375, |
|
"learning_rate": 3.7462130346036e-06, |
|
"log_odds_chosen": 0.4929097592830658, |
|
"log_odds_ratio": -0.563489556312561, |
|
"logits/chosen": -2.3332343101501465, |
|
"logits/rejected": -2.2668535709381104, |
|
"logps/chosen": -0.4893871247768402, |
|
"logps/rejected": -0.7047632336616516, |
|
"loss": 0.6818, |
|
"nll_loss": 0.7156692147254944, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 5.625, |
|
"learning_rate": 3.6308853876565232e-06, |
|
"log_odds_chosen": 0.4562360346317291, |
|
"log_odds_ratio": -0.5689755082130432, |
|
"logits/chosen": -2.3754940032958984, |
|
"logits/rejected": -2.27487850189209, |
|
"logps/chosen": -0.4727197289466858, |
|
"logps/rejected": -0.6664949655532837, |
|
"loss": 0.7119, |
|
"nll_loss": 0.6855541467666626, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"grad_norm": 5.375, |
|
"learning_rate": 3.512447706694254e-06, |
|
"log_odds_chosen": 0.3677065372467041, |
|
"log_odds_ratio": -0.6210616230964661, |
|
"logits/chosen": -2.3326685428619385, |
|
"logits/rejected": -2.284008502960205, |
|
"logps/chosen": -0.5006684064865112, |
|
"logps/rejected": -0.6665970087051392, |
|
"loss": 0.7148, |
|
"nll_loss": 0.7629369497299194, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 3.3912257057285684e-06, |
|
"log_odds_chosen": 0.5201979279518127, |
|
"log_odds_ratio": -0.5558315515518188, |
|
"logits/chosen": -2.35974383354187, |
|
"logits/rejected": -2.2589011192321777, |
|
"logps/chosen": -0.44807687401771545, |
|
"logps/rejected": -0.6775830984115601, |
|
"loss": 0.7127, |
|
"nll_loss": 0.6746495962142944, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 3.2675527558954897e-06, |
|
"log_odds_chosen": 0.4437088966369629, |
|
"log_odds_ratio": -0.5744566321372986, |
|
"logits/chosen": -2.3658008575439453, |
|
"logits/rejected": -2.302891969680786, |
|
"logps/chosen": -0.5020125508308411, |
|
"logps/rejected": -0.703687310218811, |
|
"loss": 0.7293, |
|
"nll_loss": 0.7448792457580566, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 3.1417689686554144e-06, |
|
"log_odds_chosen": 0.4869805872440338, |
|
"log_odds_ratio": -0.5718838572502136, |
|
"logits/chosen": -2.3735198974609375, |
|
"logits/rejected": -2.3357536792755127, |
|
"logps/chosen": -0.4905489385128021, |
|
"logps/rejected": -0.7079612612724304, |
|
"loss": 0.7228, |
|
"nll_loss": 0.7263522148132324, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"grad_norm": 5.25, |
|
"learning_rate": 3.0142202604567724e-06, |
|
"log_odds_chosen": 0.4622114300727844, |
|
"log_odds_ratio": -0.580187201499939, |
|
"logits/chosen": -2.4162421226501465, |
|
"logits/rejected": -2.32672119140625, |
|
"logps/chosen": -0.4755636155605316, |
|
"logps/rejected": -0.6753752827644348, |
|
"loss": 0.6993, |
|
"nll_loss": 0.7006024122238159, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 2.8852574014354394e-06, |
|
"log_odds_chosen": 0.4685123562812805, |
|
"log_odds_ratio": -0.5639849901199341, |
|
"logits/chosen": -2.3573315143585205, |
|
"logits/rejected": -2.2810652256011963, |
|
"logps/chosen": -0.45586276054382324, |
|
"logps/rejected": -0.650667667388916, |
|
"loss": 0.6937, |
|
"nll_loss": 0.6792919039726257, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 2.7552350507661063e-06, |
|
"log_odds_chosen": 0.32628968358039856, |
|
"log_odds_ratio": -0.6266478300094604, |
|
"logits/chosen": -2.3865950107574463, |
|
"logits/rejected": -2.2999796867370605, |
|
"logps/chosen": -0.5150719285011292, |
|
"logps/rejected": -0.6419684290885925, |
|
"loss": 0.7454, |
|
"nll_loss": 0.7746638059616089, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 2.6245107813184286e-06, |
|
"log_odds_chosen": 0.4124643802642822, |
|
"log_odds_ratio": -0.6084017157554626, |
|
"logits/chosen": -2.300006151199341, |
|
"logits/rejected": -2.2486844062805176, |
|
"logps/chosen": -0.500686526298523, |
|
"logps/rejected": -0.7045504450798035, |
|
"loss": 0.7104, |
|
"nll_loss": 0.7621539235115051, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 6.3125, |
|
"learning_rate": 2.493444096300273e-06, |
|
"log_odds_chosen": 0.3271673321723938, |
|
"log_odds_ratio": -0.6363809704780579, |
|
"logits/chosen": -2.332576274871826, |
|
"logits/rejected": -2.2507784366607666, |
|
"logps/chosen": -0.5043616890907288, |
|
"logps/rejected": -0.6402055621147156, |
|
"loss": 0.7316, |
|
"nll_loss": 0.7292400598526001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 2.3623954405923636e-06, |
|
"log_odds_chosen": 0.35860806703567505, |
|
"log_odds_ratio": -0.6040534973144531, |
|
"logits/chosen": -2.3241639137268066, |
|
"logits/rejected": -2.2687346935272217, |
|
"logps/chosen": -0.4667055010795593, |
|
"logps/rejected": -0.6222271919250488, |
|
"loss": 0.7055, |
|
"nll_loss": 0.6745852828025818, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 2.2317252094932383e-06, |
|
"log_odds_chosen": 0.4658966064453125, |
|
"log_odds_ratio": -0.5793284177780151, |
|
"logits/chosen": -2.3523142337799072, |
|
"logits/rejected": -2.2792649269104004, |
|
"logps/chosen": -0.4811071753501892, |
|
"logps/rejected": -0.6847606897354126, |
|
"loss": 0.6927, |
|
"nll_loss": 0.7262173295021057, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 5.75, |
|
"learning_rate": 2.1017927576005657e-06, |
|
"log_odds_chosen": 0.4586619436740875, |
|
"log_odds_ratio": -0.5749837756156921, |
|
"logits/chosen": -2.309678554534912, |
|
"logits/rejected": -2.2428524494171143, |
|
"logps/chosen": -0.4688965380191803, |
|
"logps/rejected": -0.6654346585273743, |
|
"loss": 0.6911, |
|
"nll_loss": 0.7090088129043579, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"grad_norm": 5.875, |
|
"learning_rate": 1.9729554105544816e-06, |
|
"log_odds_chosen": 0.39126816391944885, |
|
"log_odds_ratio": -0.5965334177017212, |
|
"logits/chosen": -2.246513605117798, |
|
"logits/rejected": -2.1855998039245605, |
|
"logps/chosen": -0.5077515840530396, |
|
"logps/rejected": -0.685882031917572, |
|
"loss": 0.73, |
|
"nll_loss": 0.696818470954895, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"grad_norm": 5.78125, |
|
"learning_rate": 1.8455674823607312e-06, |
|
"log_odds_chosen": 0.2512452006340027, |
|
"log_odds_ratio": -0.6640199422836304, |
|
"logits/chosen": -2.342719078063965, |
|
"logits/rejected": -2.274512529373169, |
|
"logps/chosen": -0.521196722984314, |
|
"logps/rejected": -0.6386003494262695, |
|
"loss": 0.7091, |
|
"nll_loss": 0.7769466638565063, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 1.7199793009960766e-06, |
|
"log_odds_chosen": 0.4366021156311035, |
|
"log_odds_ratio": -0.5971043705940247, |
|
"logits/chosen": -2.2552707195281982, |
|
"logits/rejected": -2.2170627117156982, |
|
"logps/chosen": -0.5236691832542419, |
|
"logps/rejected": -0.7121491432189941, |
|
"loss": 0.7011, |
|
"nll_loss": 0.7103704214096069, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"grad_norm": 5.9375, |
|
"learning_rate": 1.5965362449756317e-06, |
|
"log_odds_chosen": 0.5843161344528198, |
|
"log_odds_ratio": -0.5287169218063354, |
|
"logits/chosen": -2.3433125019073486, |
|
"logits/rejected": -2.2500529289245605, |
|
"logps/chosen": -0.43613916635513306, |
|
"logps/rejected": -0.6702120304107666, |
|
"loss": 0.6759, |
|
"nll_loss": 0.6880279183387756, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"grad_norm": 6.5, |
|
"learning_rate": 1.4755777935316412e-06, |
|
"log_odds_chosen": 0.8127249479293823, |
|
"log_odds_ratio": -0.46383658051490784, |
|
"logits/chosen": -2.3992159366607666, |
|
"logits/rejected": -2.2700352668762207, |
|
"logps/chosen": -0.4094364047050476, |
|
"logps/rejected": -0.7208075523376465, |
|
"loss": 0.6001, |
|
"nll_loss": 0.5964864492416382, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 6.03125, |
|
"learning_rate": 1.3574365930158272e-06, |
|
"log_odds_chosen": 0.7765734791755676, |
|
"log_odds_ratio": -0.4649999141693115, |
|
"logits/chosen": -2.294707775115967, |
|
"logits/rejected": -2.244535446166992, |
|
"logps/chosen": -0.3731532692909241, |
|
"logps/rejected": -0.6663237810134888, |
|
"loss": 0.5958, |
|
"nll_loss": 0.5779340863227844, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"grad_norm": 5.34375, |
|
"learning_rate": 1.242437542092731e-06, |
|
"log_odds_chosen": 0.8575220108032227, |
|
"log_odds_ratio": -0.42505908012390137, |
|
"logits/chosen": -2.2913150787353516, |
|
"logits/rejected": -2.2416951656341553, |
|
"logps/chosen": -0.37527984380722046, |
|
"logps/rejected": -0.6905524730682373, |
|
"loss": 0.6326, |
|
"nll_loss": 0.623663604259491, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 1.1308968982398893e-06, |
|
"log_odds_chosen": 0.912135899066925, |
|
"log_odds_ratio": -0.41568368673324585, |
|
"logits/chosen": -2.313683271408081, |
|
"logits/rejected": -2.226602554321289, |
|
"logps/chosen": -0.3589690625667572, |
|
"logps/rejected": -0.7188132405281067, |
|
"loss": 0.5746, |
|
"nll_loss": 0.5657839179039001, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 1.0231214080120354e-06, |
|
"log_odds_chosen": 0.8424990773200989, |
|
"log_odds_ratio": -0.4574614465236664, |
|
"logits/chosen": -2.2810280323028564, |
|
"logits/rejected": -2.2405149936676025, |
|
"logps/chosen": -0.39062148332595825, |
|
"logps/rejected": -0.705338716506958, |
|
"loss": 0.616, |
|
"nll_loss": 0.6053126454353333, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 9.194074634611577e-07, |
|
"log_odds_chosen": 0.8043780326843262, |
|
"log_odds_ratio": -0.4463415741920471, |
|
"logits/chosen": -2.3421316146850586, |
|
"logits/rejected": -2.27047061920166, |
|
"logps/chosen": -0.3879227340221405, |
|
"logps/rejected": -0.681617259979248, |
|
"loss": 0.5906, |
|
"nll_loss": 0.6051921844482422, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 8.200402870323634e-07, |
|
"log_odds_chosen": 0.8168398141860962, |
|
"log_odds_ratio": -0.4651332497596741, |
|
"logits/chosen": -2.2850687503814697, |
|
"logits/rejected": -2.203339099884033, |
|
"logps/chosen": -0.4002053141593933, |
|
"logps/rejected": -0.7145139575004578, |
|
"loss": 0.6095, |
|
"nll_loss": 0.5440846681594849, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 7.252931471771322e-07, |
|
"log_odds_chosen": 0.957280158996582, |
|
"log_odds_ratio": -0.4218501150608063, |
|
"logits/chosen": -2.3870935440063477, |
|
"logits/rejected": -2.3037381172180176, |
|
"logps/chosen": -0.3641536235809326, |
|
"logps/rejected": -0.7135647535324097, |
|
"loss": 0.5756, |
|
"nll_loss": 0.5520345568656921, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"grad_norm": 6.0, |
|
"learning_rate": 6.354266068411078e-07, |
|
"log_odds_chosen": 0.8184865713119507, |
|
"log_odds_ratio": -0.4773890972137451, |
|
"logits/chosen": -2.2534213066101074, |
|
"logits/rejected": -2.22419810295105, |
|
"logps/chosen": -0.41363048553466797, |
|
"logps/rejected": -0.7267066240310669, |
|
"loss": 0.6069, |
|
"nll_loss": 0.6007322072982788, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 5.50687806893139e-07, |
|
"log_odds_chosen": 0.8616418838500977, |
|
"log_odds_ratio": -0.44923096895217896, |
|
"logits/chosen": -2.276895761489868, |
|
"logits/rejected": -2.2783918380737305, |
|
"logps/chosen": -0.37030458450317383, |
|
"logps/rejected": -0.666776180267334, |
|
"loss": 0.614, |
|
"nll_loss": 0.6462346911430359, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 4.7130978646620807e-07, |
|
"log_odds_chosen": 0.9556438326835632, |
|
"log_odds_ratio": -0.4390975534915924, |
|
"logits/chosen": -2.3128373622894287, |
|
"logits/rejected": -2.2027499675750732, |
|
"logps/chosen": -0.3874739706516266, |
|
"logps/rejected": -0.7368007898330688, |
|
"loss": 0.5845, |
|
"nll_loss": 0.6006309986114502, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"grad_norm": 5.5, |
|
"learning_rate": 3.975108420793819e-07, |
|
"log_odds_chosen": 0.8426518440246582, |
|
"log_odds_ratio": -0.44841188192367554, |
|
"logits/chosen": -2.2656946182250977, |
|
"logits/rejected": -2.2316298484802246, |
|
"logps/chosen": -0.3601939082145691, |
|
"logps/rejected": -0.6829022765159607, |
|
"loss": 0.6035, |
|
"nll_loss": 0.5817385911941528, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 6.09375, |
|
"learning_rate": 3.294939273032272e-07, |
|
"log_odds_chosen": 0.9172550439834595, |
|
"log_odds_ratio": -0.41558733582496643, |
|
"logits/chosen": -2.251704692840576, |
|
"logits/rejected": -2.228672742843628, |
|
"logps/chosen": -0.3531130254268646, |
|
"logps/rejected": -0.703464925289154, |
|
"loss": 0.5694, |
|
"nll_loss": 0.554794430732727, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 2.6744609461969523e-07, |
|
"log_odds_chosen": 0.877754807472229, |
|
"log_odds_ratio": -0.4350369870662689, |
|
"logits/chosen": -2.3534913063049316, |
|
"logits/rejected": -2.2918014526367188, |
|
"logps/chosen": -0.37197771668434143, |
|
"logps/rejected": -0.7188286781311035, |
|
"loss": 0.594, |
|
"nll_loss": 0.5847772359848022, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"grad_norm": 5.84375, |
|
"learning_rate": 2.1153798101138405e-07, |
|
"log_odds_chosen": 0.9580931663513184, |
|
"log_odds_ratio": -0.4062252938747406, |
|
"logits/chosen": -2.2987186908721924, |
|
"logits/rejected": -2.2561445236206055, |
|
"logps/chosen": -0.37130284309387207, |
|
"logps/rejected": -0.7186514735221863, |
|
"loss": 0.6113, |
|
"nll_loss": 0.6211617588996887, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 11.5625, |
|
"learning_rate": 1.61923338694871e-07, |
|
"log_odds_chosen": 0.8980989456176758, |
|
"log_odds_ratio": -0.44537553191185, |
|
"logits/chosen": -2.3041529655456543, |
|
"logits/rejected": -2.246042490005493, |
|
"logps/chosen": -0.3840414583683014, |
|
"logps/rejected": -0.7412872910499573, |
|
"loss": 0.6259, |
|
"nll_loss": 0.6146137714385986, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"grad_norm": 5.65625, |
|
"learning_rate": 1.1873861228862998e-07, |
|
"log_odds_chosen": 0.9331363439559937, |
|
"log_odds_ratio": -0.4274202883243561, |
|
"logits/chosen": -2.2929160594940186, |
|
"logits/rejected": -2.2195255756378174, |
|
"logps/chosen": -0.3945469558238983, |
|
"logps/rejected": -0.7453508377075195, |
|
"loss": 0.6133, |
|
"nll_loss": 0.621927797794342, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 8.210256357836065e-08, |
|
"log_odds_chosen": 1.0095536708831787, |
|
"log_odds_ratio": -0.3956521451473236, |
|
"logits/chosen": -2.263673782348633, |
|
"logits/rejected": -2.20683217048645, |
|
"logps/chosen": -0.3427670896053314, |
|
"logps/rejected": -0.7156583070755005, |
|
"loss": 0.574, |
|
"nll_loss": 0.5451396703720093, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 5.21159449116615e-08, |
|
"log_odds_chosen": 0.770971417427063, |
|
"log_odds_ratio": -0.4887896180152893, |
|
"logits/chosen": -2.284029006958008, |
|
"logits/rejected": -2.214066982269287, |
|
"logps/chosen": -0.4013218879699707, |
|
"logps/rejected": -0.6818682551383972, |
|
"loss": 0.5932, |
|
"nll_loss": 0.5923025012016296, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"grad_norm": 5.4375, |
|
"learning_rate": 2.8861222120235845e-08, |
|
"log_odds_chosen": 0.8828635215759277, |
|
"log_odds_ratio": -0.4312991201877594, |
|
"logits/chosen": -2.24355411529541, |
|
"logits/rejected": -2.219613552093506, |
|
"logps/chosen": -0.34176358580589294, |
|
"logps/rejected": -0.6538265943527222, |
|
"loss": 0.5864, |
|
"nll_loss": 0.58399498462677, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"grad_norm": 5.53125, |
|
"learning_rate": 1.2402347731620412e-08, |
|
"log_odds_chosen": 0.7922724485397339, |
|
"log_odds_ratio": -0.4506749212741852, |
|
"logits/chosen": -2.275207996368408, |
|
"logits/rejected": -2.178473949432373, |
|
"logps/chosen": -0.40824776887893677, |
|
"logps/rejected": -0.7051072120666504, |
|
"loss": 0.5958, |
|
"nll_loss": 0.5631005167961121, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 6.15625, |
|
"learning_rate": 2.7845850941254914e-09, |
|
"log_odds_chosen": 0.8703590631484985, |
|
"log_odds_ratio": -0.4304262101650238, |
|
"logits/chosen": -2.343256711959839, |
|
"logits/rejected": -2.2737724781036377, |
|
"logps/chosen": -0.366679310798645, |
|
"logps/rejected": -0.6560879349708557, |
|
"loss": 0.6025, |
|
"nll_loss": 0.5677012205123901, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"step": 699, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7502862380468454, |
|
"train_runtime": 21621.4849, |
|
"train_samples_per_second": 2.071, |
|
"train_steps_per_second": 0.032 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 699, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|