|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.9936102236421727, |
|
"eval_steps": 10000, |
|
"global_step": 312, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.06389776357827476, |
|
"grad_norm": 0.4242139016276009, |
|
"learning_rate": 3.125e-05, |
|
"log_odds_chosen": 0.05670202523469925, |
|
"log_odds_ratio": -0.6963189840316772, |
|
"logits/chosen": 33.65892791748047, |
|
"logits/rejected": 33.56386947631836, |
|
"logps/chosen": -0.9726333618164062, |
|
"logps/rejected": -1.0167349576950073, |
|
"loss": 1.6695, |
|
"nll_loss": 1.5828170776367188, |
|
"rewards/accuracies": 0.5406249761581421, |
|
"rewards/chosen": -0.09726335108280182, |
|
"rewards/margins": 0.004410145338624716, |
|
"rewards/rejected": -0.10167349874973297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.12779552715654952, |
|
"grad_norm": 0.527757575829653, |
|
"learning_rate": 6.25e-05, |
|
"log_odds_chosen": 0.060249220579862595, |
|
"log_odds_ratio": -0.6938394904136658, |
|
"logits/chosen": 34.7052001953125, |
|
"logits/rejected": 34.30677032470703, |
|
"logps/chosen": -0.8907906413078308, |
|
"logps/rejected": -0.9303584098815918, |
|
"loss": 1.4944, |
|
"nll_loss": 1.4315097332000732, |
|
"rewards/accuracies": 0.546875, |
|
"rewards/chosen": -0.08907906711101532, |
|
"rewards/margins": 0.003956770058721304, |
|
"rewards/rejected": -0.09303583949804306, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19169329073482427, |
|
"grad_norm": 0.4014606132018809, |
|
"learning_rate": 9.375e-05, |
|
"log_odds_chosen": 0.04292842000722885, |
|
"log_odds_ratio": -0.698261022567749, |
|
"logits/chosen": 30.987014770507812, |
|
"logits/rejected": 30.439056396484375, |
|
"logps/chosen": -0.6581910252571106, |
|
"logps/rejected": -0.6830389499664307, |
|
"loss": 1.0721, |
|
"nll_loss": 1.006190538406372, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.06581910699605942, |
|
"rewards/margins": 0.002484795870259404, |
|
"rewards/rejected": -0.0683038979768753, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25559105431309903, |
|
"grad_norm": 0.09410150706534143, |
|
"learning_rate": 9.979871469976196e-05, |
|
"log_odds_chosen": 0.07667034864425659, |
|
"log_odds_ratio": -0.6961523294448853, |
|
"logits/chosen": 28.151453018188477, |
|
"logits/rejected": 27.650014877319336, |
|
"logps/chosen": -0.43666666746139526, |
|
"logps/rejected": -0.46118512749671936, |
|
"loss": 0.8277, |
|
"nll_loss": 0.7572250366210938, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.043666668236255646, |
|
"rewards/margins": 0.0024518456775695086, |
|
"rewards/rejected": -0.046118512749671936, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.3194888178913738, |
|
"grad_norm": 0.09768125665941661, |
|
"learning_rate": 9.898376992116179e-05, |
|
"log_odds_chosen": 0.06605090945959091, |
|
"log_odds_ratio": -0.7164387106895447, |
|
"logits/chosen": 29.047494888305664, |
|
"logits/rejected": 27.90814781188965, |
|
"logps/chosen": -0.37932097911834717, |
|
"logps/rejected": -0.39148497581481934, |
|
"loss": 0.7778, |
|
"nll_loss": 0.7044206261634827, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -0.03793209791183472, |
|
"rewards/margins": 0.0012163992505520582, |
|
"rewards/rejected": -0.03914849832653999, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38338658146964855, |
|
"grad_norm": 0.07343907441931204, |
|
"learning_rate": 9.755282581475769e-05, |
|
"log_odds_chosen": 0.08987905830144882, |
|
"log_odds_ratio": -0.7063173055648804, |
|
"logits/chosen": 28.47494125366211, |
|
"logits/rejected": 28.794668197631836, |
|
"logps/chosen": -0.3645591139793396, |
|
"logps/rejected": -0.38652682304382324, |
|
"loss": 0.7576, |
|
"nll_loss": 0.6854395866394043, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.0364559181034565, |
|
"rewards/margins": 0.0021967687644064426, |
|
"rewards/rejected": -0.038652680814266205, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.4472843450479233, |
|
"grad_norm": 0.06951733417216101, |
|
"learning_rate": 9.552387733294081e-05, |
|
"log_odds_chosen": 0.1109728068113327, |
|
"log_odds_ratio": -0.7004402875900269, |
|
"logits/chosen": 29.863407135009766, |
|
"logits/rejected": 30.27783203125, |
|
"logps/chosen": -0.35166820883750916, |
|
"logps/rejected": -0.3710673451423645, |
|
"loss": 0.7621, |
|
"nll_loss": 0.6829724907875061, |
|
"rewards/accuracies": 0.559374988079071, |
|
"rewards/chosen": -0.035166822373867035, |
|
"rewards/margins": 0.0019399106968194246, |
|
"rewards/rejected": -0.03710673004388809, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5111821086261981, |
|
"grad_norm": 0.06675882825106033, |
|
"learning_rate": 9.292243968009331e-05, |
|
"log_odds_chosen": 0.22688157856464386, |
|
"log_odds_ratio": -0.6433924436569214, |
|
"logits/chosen": 31.850915908813477, |
|
"logits/rejected": 32.051448822021484, |
|
"logps/chosen": -0.35137858986854553, |
|
"logps/rejected": -0.4119800925254822, |
|
"loss": 0.7548, |
|
"nll_loss": 0.6953305602073669, |
|
"rewards/accuracies": 0.640625, |
|
"rewards/chosen": -0.03513786196708679, |
|
"rewards/margins": 0.006060150917619467, |
|
"rewards/rejected": -0.0411980114877224, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.5750798722044729, |
|
"grad_norm": 0.07132675829804414, |
|
"learning_rate": 8.978122744408906e-05, |
|
"log_odds_chosen": 0.20013344287872314, |
|
"log_odds_ratio": -0.6580983400344849, |
|
"logits/chosen": 32.63883972167969, |
|
"logits/rejected": 32.50263595581055, |
|
"logps/chosen": -0.35542625188827515, |
|
"logps/rejected": -0.41074585914611816, |
|
"loss": 0.7549, |
|
"nll_loss": 0.6860114932060242, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.03554262965917587, |
|
"rewards/margins": 0.0055319564417004585, |
|
"rewards/rejected": -0.04107458516955376, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.6389776357827476, |
|
"grad_norm": 0.09149445807314589, |
|
"learning_rate": 8.613974319136958e-05, |
|
"log_odds_chosen": 0.2682611346244812, |
|
"log_odds_ratio": -0.6336508989334106, |
|
"logits/chosen": 32.11505889892578, |
|
"logits/rejected": 32.7309455871582, |
|
"logps/chosen": -0.35972970724105835, |
|
"logps/rejected": -0.4426427483558655, |
|
"loss": 0.753, |
|
"nll_loss": 0.6944054365158081, |
|
"rewards/accuracies": 0.653124988079071, |
|
"rewards/chosen": -0.035972971469163895, |
|
"rewards/margins": 0.008291301317512989, |
|
"rewards/rejected": -0.04426427185535431, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.7028753993610224, |
|
"grad_norm": 0.08926217777865493, |
|
"learning_rate": 8.20437806992512e-05, |
|
"log_odds_chosen": 0.2868707776069641, |
|
"log_odds_ratio": -0.6233163475990295, |
|
"logits/chosen": 35.566490173339844, |
|
"logits/rejected": 34.921634674072266, |
|
"logps/chosen": -0.3582688868045807, |
|
"logps/rejected": -0.4581901431083679, |
|
"loss": 0.744, |
|
"nll_loss": 0.6832225918769836, |
|
"rewards/accuracies": 0.6343749761581421, |
|
"rewards/chosen": -0.03582689166069031, |
|
"rewards/margins": 0.00999213196337223, |
|
"rewards/rejected": -0.04581902176141739, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.7667731629392971, |
|
"grad_norm": 0.14837704688840048, |
|
"learning_rate": 7.754484907260513e-05, |
|
"log_odds_chosen": 0.30330824851989746, |
|
"log_odds_ratio": -0.6247963905334473, |
|
"logits/chosen": 34.01176834106445, |
|
"logits/rejected": 34.39574432373047, |
|
"logps/chosen": -0.35339340567588806, |
|
"logps/rejected": -0.457806259393692, |
|
"loss": 0.7419, |
|
"nll_loss": 0.6851586103439331, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.035339340567588806, |
|
"rewards/margins": 0.01044128555804491, |
|
"rewards/rejected": -0.045780621469020844, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.8306709265175719, |
|
"grad_norm": 0.10699968939091851, |
|
"learning_rate": 7.269952498697734e-05, |
|
"log_odds_chosen": 0.3560214638710022, |
|
"log_odds_ratio": -0.6051545143127441, |
|
"logits/chosen": 36.380924224853516, |
|
"logits/rejected": 35.524024963378906, |
|
"logps/chosen": -0.355410635471344, |
|
"logps/rejected": -0.47948652505874634, |
|
"loss": 0.7296, |
|
"nll_loss": 0.6667423844337463, |
|
"rewards/accuracies": 0.668749988079071, |
|
"rewards/chosen": -0.03554106876254082, |
|
"rewards/margins": 0.01240758690983057, |
|
"rewards/rejected": -0.04794865846633911, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.8945686900958466, |
|
"grad_norm": 0.1720595584929413, |
|
"learning_rate": 6.756874120406714e-05, |
|
"log_odds_chosen": 0.5141419172286987, |
|
"log_odds_ratio": -0.5401668548583984, |
|
"logits/chosen": 34.641624450683594, |
|
"logits/rejected": 34.34906768798828, |
|
"logps/chosen": -0.347829669713974, |
|
"logps/rejected": -0.5514229536056519, |
|
"loss": 0.7143, |
|
"nll_loss": 0.6645579934120178, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.03478296846151352, |
|
"rewards/margins": 0.020359333604574203, |
|
"rewards/rejected": -0.05514230206608772, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.9584664536741214, |
|
"grad_norm": 0.12630063541107736, |
|
"learning_rate": 6.22170203068947e-05, |
|
"log_odds_chosen": 0.5517296195030212, |
|
"log_odds_ratio": -0.543639600276947, |
|
"logits/chosen": 36.98326873779297, |
|
"logits/rejected": 36.52146530151367, |
|
"logps/chosen": -0.3562212884426117, |
|
"logps/rejected": -0.578203022480011, |
|
"loss": 0.7162, |
|
"nll_loss": 0.6639058589935303, |
|
"rewards/accuracies": 0.731249988079071, |
|
"rewards/chosen": -0.03562213480472565, |
|
"rewards/margins": 0.02219817042350769, |
|
"rewards/rejected": -0.05782030150294304, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0223642172523961, |
|
"grad_norm": 0.1403636887974213, |
|
"learning_rate": 5.6711663290882776e-05, |
|
"log_odds_chosen": 0.6598159074783325, |
|
"log_odds_ratio": -0.5024036169052124, |
|
"logits/chosen": 36.992469787597656, |
|
"logits/rejected": 35.901676177978516, |
|
"logps/chosen": -0.33446019887924194, |
|
"logps/rejected": -0.5835962891578674, |
|
"loss": 0.7105, |
|
"nll_loss": 0.657503604888916, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -0.033446017652750015, |
|
"rewards/margins": 0.0249136071652174, |
|
"rewards/rejected": -0.05835963040590286, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.0862619808306708, |
|
"grad_norm": 0.14799945317167665, |
|
"learning_rate": 5.112190321479026e-05, |
|
"log_odds_chosen": 0.6551031470298767, |
|
"log_odds_ratio": -0.5045552849769592, |
|
"logits/chosen": 35.59492874145508, |
|
"logits/rejected": 35.479576110839844, |
|
"logps/chosen": -0.36466851830482483, |
|
"logps/rejected": -0.6384136080741882, |
|
"loss": 0.7194, |
|
"nll_loss": 0.6874456405639648, |
|
"rewards/accuracies": 0.765625, |
|
"rewards/chosen": -0.03646685555577278, |
|
"rewards/margins": 0.027374515309929848, |
|
"rewards/rejected": -0.06384135782718658, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.1501597444089458, |
|
"grad_norm": 0.4144450852693044, |
|
"learning_rate": 4.551803455482833e-05, |
|
"log_odds_chosen": 0.8569015264511108, |
|
"log_odds_ratio": -0.43813252449035645, |
|
"logits/chosen": 36.478363037109375, |
|
"logits/rejected": 35.625038146972656, |
|
"logps/chosen": -0.3283715844154358, |
|
"logps/rejected": -0.6759421229362488, |
|
"loss": 0.7011, |
|
"nll_loss": 0.6358424425125122, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -0.0328371599316597, |
|
"rewards/margins": 0.03475705534219742, |
|
"rewards/rejected": -0.06759421527385712, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.2140575079872205, |
|
"grad_norm": 0.1393106135709619, |
|
"learning_rate": 3.9970529210836366e-05, |
|
"log_odds_chosen": 0.7694636583328247, |
|
"log_odds_ratio": -0.48203492164611816, |
|
"logits/chosen": 37.65850067138672, |
|
"logits/rejected": 37.32286071777344, |
|
"logps/chosen": -0.34162312746047974, |
|
"logps/rejected": -0.6649552583694458, |
|
"loss": 0.696, |
|
"nll_loss": 0.6590245962142944, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.034162312746047974, |
|
"rewards/margins": 0.032333213835954666, |
|
"rewards/rejected": -0.06649552285671234, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.2779552715654952, |
|
"grad_norm": 0.18676893862042468, |
|
"learning_rate": 3.4549150281252636e-05, |
|
"log_odds_chosen": 0.8638985753059387, |
|
"log_odds_ratio": -0.4615907073020935, |
|
"logits/chosen": 34.856327056884766, |
|
"logits/rejected": 34.688533782958984, |
|
"logps/chosen": -0.3576621413230896, |
|
"logps/rejected": -0.74553382396698, |
|
"loss": 0.6981, |
|
"nll_loss": 0.6673237085342407, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -0.03576621413230896, |
|
"rewards/margins": 0.03878717124462128, |
|
"rewards/rejected": -0.07455337792634964, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.34185303514377, |
|
"grad_norm": 0.2564156817901977, |
|
"learning_rate": 2.932207475167398e-05, |
|
"log_odds_chosen": 0.8325332403182983, |
|
"log_odds_ratio": -0.45953792333602905, |
|
"logits/chosen": 36.798072814941406, |
|
"logits/rejected": 35.09117889404297, |
|
"logps/chosen": -0.3448064923286438, |
|
"logps/rejected": -0.7093031406402588, |
|
"loss": 0.6982, |
|
"nll_loss": 0.6432278752326965, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.03448065370321274, |
|
"rewards/margins": 0.03644966334104538, |
|
"rewards/rejected": -0.07093031704425812, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.4057507987220448, |
|
"grad_norm": 0.1531688356816753, |
|
"learning_rate": 2.43550361297047e-05, |
|
"log_odds_chosen": 1.0281397104263306, |
|
"log_odds_ratio": -0.4170387387275696, |
|
"logits/chosen": 35.599212646484375, |
|
"logits/rejected": 34.3337287902832, |
|
"logps/chosen": -0.3171806335449219, |
|
"logps/rejected": -0.7794741988182068, |
|
"loss": 0.6881, |
|
"nll_loss": 0.6331400275230408, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -0.03171805664896965, |
|
"rewards/margins": 0.04622935503721237, |
|
"rewards/rejected": -0.07794742286205292, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.4696485623003195, |
|
"grad_norm": 0.13237811516695533, |
|
"learning_rate": 1.971049780795901e-05, |
|
"log_odds_chosen": 0.9918270111083984, |
|
"log_odds_ratio": -0.440161794424057, |
|
"logits/chosen": 34.80529022216797, |
|
"logits/rejected": 34.433349609375, |
|
"logps/chosen": -0.3528688848018646, |
|
"logps/rejected": -0.8333184123039246, |
|
"loss": 0.684, |
|
"nll_loss": 0.653908908367157, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -0.03528688848018646, |
|
"rewards/margins": 0.04804495349526405, |
|
"rewards/rejected": -0.08333183825016022, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.5335463258785942, |
|
"grad_norm": 0.17618952870677607, |
|
"learning_rate": 1.544686755065677e-05, |
|
"log_odds_chosen": 1.110446810722351, |
|
"log_odds_ratio": -0.39400768280029297, |
|
"logits/chosen": 35.90544891357422, |
|
"logits/rejected": 34.70677185058594, |
|
"logps/chosen": -0.3103191554546356, |
|
"logps/rejected": -0.8037627935409546, |
|
"loss": 0.6899, |
|
"nll_loss": 0.6227988004684448, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.03103191778063774, |
|
"rewards/margins": 0.04934436455368996, |
|
"rewards/rejected": -0.0803762823343277, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 1.5974440894568689, |
|
"grad_norm": 0.17643350651397863, |
|
"learning_rate": 1.1617762982099446e-05, |
|
"log_odds_chosen": 1.0278904438018799, |
|
"log_odds_ratio": -0.40579158067703247, |
|
"logits/chosen": 36.86786651611328, |
|
"logits/rejected": 35.534324645996094, |
|
"logps/chosen": -0.34132060408592224, |
|
"logps/rejected": -0.8093164563179016, |
|
"loss": 0.6839, |
|
"nll_loss": 0.6438942551612854, |
|
"rewards/accuracies": 0.8218749761581421, |
|
"rewards/chosen": -0.034132059663534164, |
|
"rewards/margins": 0.04679957777261734, |
|
"rewards/rejected": -0.0809316486120224, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 1.6613418530351438, |
|
"grad_norm": 0.20335250536853633, |
|
"learning_rate": 8.271337313934869e-06, |
|
"log_odds_chosen": 1.0964257717132568, |
|
"log_odds_ratio": -0.40502405166625977, |
|
"logits/chosen": 35.01730728149414, |
|
"logits/rejected": 33.87272262573242, |
|
"logps/chosen": -0.35952043533325195, |
|
"logps/rejected": -0.8931809663772583, |
|
"loss": 0.6875, |
|
"nll_loss": 0.6542503833770752, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.035952042788267136, |
|
"rewards/margins": 0.0533660463988781, |
|
"rewards/rejected": -0.08931808918714523, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 1.7252396166134185, |
|
"grad_norm": 0.1417159132341433, |
|
"learning_rate": 5.449673790581611e-06, |
|
"log_odds_chosen": 1.1632859706878662, |
|
"log_odds_ratio": -0.39327362179756165, |
|
"logits/chosen": 37.34235763549805, |
|
"logits/rejected": 35.14636993408203, |
|
"logps/chosen": -0.3363361358642578, |
|
"logps/rejected": -0.8959217071533203, |
|
"loss": 0.6837, |
|
"nll_loss": 0.6559703350067139, |
|
"rewards/accuracies": 0.8531249761581421, |
|
"rewards/chosen": -0.03363361582159996, |
|
"rewards/margins": 0.055958546698093414, |
|
"rewards/rejected": -0.08959217369556427, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 1.7891373801916934, |
|
"grad_norm": 0.39828151397493416, |
|
"learning_rate": 3.18825646801314e-06, |
|
"log_odds_chosen": 1.0620207786560059, |
|
"log_odds_ratio": -0.41589412093162537, |
|
"logits/chosen": 35.572288513183594, |
|
"logits/rejected": 34.087669372558594, |
|
"logps/chosen": -0.3491634726524353, |
|
"logps/rejected": -0.8419939875602722, |
|
"loss": 0.6856, |
|
"nll_loss": 0.6521117091178894, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.03491634875535965, |
|
"rewards/margins": 0.04928305745124817, |
|
"rewards/rejected": -0.08419940620660782, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 1.8530351437699681, |
|
"grad_norm": 0.17410175622655502, |
|
"learning_rate": 1.5155239811656563e-06, |
|
"log_odds_chosen": 1.1792383193969727, |
|
"log_odds_ratio": -0.3946213126182556, |
|
"logits/chosen": 36.629905700683594, |
|
"logits/rejected": 35.32917404174805, |
|
"logps/chosen": -0.33119866251945496, |
|
"logps/rejected": -0.899772047996521, |
|
"loss": 0.6837, |
|
"nll_loss": 0.6425603628158569, |
|
"rewards/accuracies": 0.815625011920929, |
|
"rewards/chosen": -0.033119868487119675, |
|
"rewards/margins": 0.05685734748840332, |
|
"rewards/rejected": -0.0899772122502327, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 1.9169329073482428, |
|
"grad_norm": 0.13374493386406422, |
|
"learning_rate": 4.52511911603265e-07, |
|
"log_odds_chosen": 1.0675084590911865, |
|
"log_odds_ratio": -0.42029696702957153, |
|
"logits/chosen": 36.18767547607422, |
|
"logits/rejected": 33.20973205566406, |
|
"logps/chosen": -0.34383276104927063, |
|
"logps/rejected": -0.8607551455497742, |
|
"loss": 0.6844, |
|
"nll_loss": 0.6432604193687439, |
|
"rewards/accuracies": 0.809374988079071, |
|
"rewards/chosen": -0.034383274614810944, |
|
"rewards/margins": 0.05169224739074707, |
|
"rewards/rejected": -0.08607552200555801, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 1.9808306709265175, |
|
"grad_norm": 0.1377772193573712, |
|
"learning_rate": 1.2588252874673468e-08, |
|
"log_odds_chosen": 1.0935267210006714, |
|
"log_odds_ratio": -0.41092976927757263, |
|
"logits/chosen": 36.207176208496094, |
|
"logits/rejected": 34.82603073120117, |
|
"logps/chosen": -0.344975084066391, |
|
"logps/rejected": -0.8865535855293274, |
|
"loss": 0.6781, |
|
"nll_loss": 0.6345787644386292, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -0.03449751064181328, |
|
"rewards/margins": 0.05415785312652588, |
|
"rewards/rejected": -0.08865536749362946, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 1.9936102236421727, |
|
"step": 312, |
|
"total_flos": 0.0, |
|
"train_loss": 0.7846963420892373, |
|
"train_runtime": 5577.3844, |
|
"train_samples_per_second": 7.172, |
|
"train_steps_per_second": 0.056 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 312, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|