{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 100, "global_step": 5898, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 8.47457627118644e-10, "logits/chosen": -2.827263116836548, "logits/rejected": -2.783407211303711, "logps/chosen": -255.93801879882812, "logps/rejected": -201.33627319335938, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.01, "learning_rate": 8.474576271186441e-09, "logits/chosen": -2.632491111755371, "logits/rejected": -2.627588987350464, "logps/chosen": -326.1221923828125, "logps/rejected": -333.5044860839844, "loss": 0.6956, "rewards/accuracies": 0.4305555522441864, "rewards/chosen": 0.0061164614744484425, "rewards/margins": 0.002570565789937973, "rewards/rejected": 0.0035458963830024004, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.6949152542372882e-08, "logits/chosen": -2.646186351776123, "logits/rejected": -2.6703312397003174, "logps/chosen": -241.2902374267578, "logps/rejected": -235.29745483398438, "loss": 0.6904, "rewards/accuracies": 0.5375000238418579, "rewards/chosen": 0.007977111265063286, "rewards/margins": 0.009717261418700218, "rewards/rejected": -0.0017401501536369324, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.5423728813559323e-08, "logits/chosen": -2.6961770057678223, "logits/rejected": -2.6650378704071045, "logps/chosen": -290.31256103515625, "logps/rejected": -256.8085632324219, "loss": 0.6935, "rewards/accuracies": 0.4375, "rewards/chosen": -0.0055731819011271, "rewards/margins": -0.004891841672360897, "rewards/rejected": -0.000681340170558542, "step": 30 }, { "epoch": 0.02, "learning_rate": 3.3898305084745764e-08, "logits/chosen": -2.662567377090454, "logits/rejected": -2.6888091564178467, "logps/chosen": -269.77191162109375, "logps/rejected": -251.4422149658203, "loss": 0.6902, "rewards/accuracies": 0.5249999761581421, "rewards/chosen": 0.005323584191501141, "rewards/margins": 0.0068922750651836395, "rewards/rejected": -0.0015686902916058898, "step": 40 }, { "epoch": 0.03, "learning_rate": 4.23728813559322e-08, "logits/chosen": -2.696052074432373, "logits/rejected": -2.8296093940734863, "logps/chosen": -297.51080322265625, "logps/rejected": -236.7479248046875, "loss": 0.6903, "rewards/accuracies": 0.5625, "rewards/chosen": 0.0028805125039070845, "rewards/margins": 0.009964686818420887, "rewards/rejected": -0.0070841750130057335, "step": 50 }, { "epoch": 0.03, "learning_rate": 5.0847457627118645e-08, "logits/chosen": -2.5665619373321533, "logits/rejected": -2.6641879081726074, "logps/chosen": -302.92889404296875, "logps/rejected": -283.39117431640625, "loss": 0.6888, "rewards/accuracies": 0.4749999940395355, "rewards/chosen": 0.007401665206998587, "rewards/margins": 0.0012593126157298684, "rewards/rejected": 0.0061423517763614655, "step": 60 }, { "epoch": 0.04, "learning_rate": 5.932203389830508e-08, "logits/chosen": -2.7289795875549316, "logits/rejected": -2.6271157264709473, "logps/chosen": -296.3174743652344, "logps/rejected": -259.4359130859375, "loss": 0.6807, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.014876957051455975, "rewards/margins": 0.02076360210776329, "rewards/rejected": -0.005886645056307316, "step": 70 }, { "epoch": 0.04, "learning_rate": 6.779661016949153e-08, "logits/chosen": -2.716602087020874, "logits/rejected": -2.6984047889709473, "logps/chosen": -277.34625244140625, "logps/rejected": -250.19580078125, "loss": 0.6768, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.022055678069591522, "rewards/margins": 0.037467751652002335, "rewards/rejected": -0.015412074513733387, "step": 80 }, { "epoch": 0.05, "learning_rate": 7.627118644067796e-08, "logits/chosen": -2.667224884033203, "logits/rejected": -2.606696605682373, "logps/chosen": -289.1363830566406, "logps/rejected": -275.8477478027344, "loss": 0.6707, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.026466140523552895, "rewards/margins": 0.05512174963951111, "rewards/rejected": -0.028655609115958214, "step": 90 }, { "epoch": 0.05, "learning_rate": 8.47457627118644e-08, "logits/chosen": -2.6877145767211914, "logits/rejected": -2.759593963623047, "logps/chosen": -290.8085021972656, "logps/rejected": -252.248779296875, "loss": 0.6612, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.04527619853615761, "rewards/margins": 0.062491558492183685, "rewards/rejected": -0.017215365543961525, "step": 100 }, { "epoch": 0.05, "eval_logits/chosen": -2.7410836219787598, "eval_logits/rejected": -2.8351781368255615, "eval_logps/chosen": -277.7868347167969, "eval_logps/rejected": -258.4841613769531, "eval_loss": 0.6640329360961914, "eval_rewards/accuracies": 0.6564885377883911, "eval_rewards/chosen": 0.04636840894818306, "eval_rewards/margins": 0.08127209544181824, "eval_rewards/rejected": -0.03490367904305458, "eval_runtime": 297.8788, "eval_samples_per_second": 7.003, "eval_steps_per_second": 0.44, "step": 100 }, { "epoch": 0.06, "learning_rate": 9.322033898305084e-08, "logits/chosen": -2.748792886734009, "logits/rejected": -2.71765398979187, "logps/chosen": -278.54345703125, "logps/rejected": -263.8841247558594, "loss": 0.6659, "rewards/accuracies": 0.6875, "rewards/chosen": 0.047357071191072464, "rewards/margins": 0.08142384141683578, "rewards/rejected": -0.03406677395105362, "step": 110 }, { "epoch": 0.06, "learning_rate": 1.0169491525423729e-07, "logits/chosen": -2.6274914741516113, "logits/rejected": -2.6163206100463867, "logps/chosen": -263.28070068359375, "logps/rejected": -255.45358276367188, "loss": 0.6585, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.06359101831912994, "rewards/margins": 0.11870710551738739, "rewards/rejected": -0.055116087198257446, "step": 120 }, { "epoch": 0.07, "learning_rate": 1.1016949152542372e-07, "logits/chosen": -2.6633336544036865, "logits/rejected": -2.539752721786499, "logps/chosen": -280.6170654296875, "logps/rejected": -276.40557861328125, "loss": 0.6454, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.05809624120593071, "rewards/margins": 0.1383211314678192, "rewards/rejected": -0.0802248865365982, "step": 130 }, { "epoch": 0.07, "learning_rate": 1.1864406779661017e-07, "logits/chosen": -2.6815719604492188, "logits/rejected": -2.706284523010254, "logps/chosen": -256.632080078125, "logps/rejected": -248.27810668945312, "loss": 0.6457, "rewards/accuracies": 0.625, "rewards/chosen": 0.07591484487056732, "rewards/margins": 0.13818106055259705, "rewards/rejected": -0.06226622313261032, "step": 140 }, { "epoch": 0.08, "learning_rate": 1.271186440677966e-07, "logits/chosen": -2.7584047317504883, "logits/rejected": -2.631559133529663, "logps/chosen": -275.9186706542969, "logps/rejected": -289.1915283203125, "loss": 0.6205, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.11922351270914078, "rewards/margins": 0.17620989680290222, "rewards/rejected": -0.056986384093761444, "step": 150 }, { "epoch": 0.08, "learning_rate": 1.3559322033898305e-07, "logits/chosen": -2.7076973915100098, "logits/rejected": -2.5795204639434814, "logps/chosen": -253.640625, "logps/rejected": -254.59957885742188, "loss": 0.6194, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.09049886465072632, "rewards/margins": 0.210663840174675, "rewards/rejected": -0.12016497552394867, "step": 160 }, { "epoch": 0.09, "learning_rate": 1.440677966101695e-07, "logits/chosen": -2.651660919189453, "logits/rejected": -2.6708879470825195, "logps/chosen": -245.13961791992188, "logps/rejected": -252.0153045654297, "loss": 0.6067, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.10699067264795303, "rewards/margins": 0.25036734342575073, "rewards/rejected": -0.1433766633272171, "step": 170 }, { "epoch": 0.09, "learning_rate": 1.5254237288135593e-07, "logits/chosen": -2.642138957977295, "logits/rejected": -2.730170726776123, "logps/chosen": -299.3494567871094, "logps/rejected": -285.53607177734375, "loss": 0.6136, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.1772647351026535, "rewards/margins": 0.32675617933273315, "rewards/rejected": -0.14949145913124084, "step": 180 }, { "epoch": 0.1, "learning_rate": 1.6101694915254234e-07, "logits/chosen": -2.62565541267395, "logits/rejected": -2.607515335083008, "logps/chosen": -304.5907897949219, "logps/rejected": -287.03076171875, "loss": 0.6115, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": 0.10709013044834137, "rewards/margins": 0.19044987857341766, "rewards/rejected": -0.08335976302623749, "step": 190 }, { "epoch": 0.1, "learning_rate": 1.694915254237288e-07, "logits/chosen": -2.631618022918701, "logits/rejected": -2.6601366996765137, "logps/chosen": -293.8777770996094, "logps/rejected": -290.7740173339844, "loss": 0.5924, "rewards/accuracies": 0.625, "rewards/chosen": 0.08614929020404816, "rewards/margins": 0.24201679229736328, "rewards/rejected": -0.15586748719215393, "step": 200 }, { "epoch": 0.1, "eval_logits/chosen": -2.7265350818634033, "eval_logits/rejected": -2.815322160720825, "eval_logps/chosen": -277.4727783203125, "eval_logps/rejected": -260.6597900390625, "eval_loss": 0.6067986488342285, "eval_rewards/accuracies": 0.6927480697631836, "eval_rewards/chosen": 0.0777706429362297, "eval_rewards/margins": 0.3302356004714966, "eval_rewards/rejected": -0.2524649202823639, "eval_runtime": 302.3774, "eval_samples_per_second": 6.899, "eval_steps_per_second": 0.433, "step": 200 }, { "epoch": 0.11, "learning_rate": 1.7796610169491524e-07, "logits/chosen": -2.70915150642395, "logits/rejected": -2.72874116897583, "logps/chosen": -276.5365905761719, "logps/rejected": -238.8749237060547, "loss": 0.5655, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.17417728900909424, "rewards/margins": 0.46528753638267517, "rewards/rejected": -0.2911103069782257, "step": 210 }, { "epoch": 0.11, "learning_rate": 1.8644067796610168e-07, "logits/chosen": -2.62576961517334, "logits/rejected": -2.6665549278259277, "logps/chosen": -338.6006164550781, "logps/rejected": -294.7860412597656, "loss": 0.6044, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.22065432369709015, "rewards/margins": 0.3981766104698181, "rewards/rejected": -0.17752234637737274, "step": 220 }, { "epoch": 0.12, "learning_rate": 1.9491525423728814e-07, "logits/chosen": -2.6506543159484863, "logits/rejected": -2.644029378890991, "logps/chosen": -254.7323455810547, "logps/rejected": -238.21957397460938, "loss": 0.5994, "rewards/accuracies": 0.6000000238418579, "rewards/chosen": 0.010533164255321026, "rewards/margins": 0.25938352942466736, "rewards/rejected": -0.24885031580924988, "step": 230 }, { "epoch": 0.12, "learning_rate": 2.0338983050847458e-07, "logits/chosen": -2.635986328125, "logits/rejected": -2.682281970977783, "logps/chosen": -273.7716979980469, "logps/rejected": -238.53280639648438, "loss": 0.5808, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.12138669192790985, "rewards/margins": 0.43423810601234436, "rewards/rejected": -0.3128513991832733, "step": 240 }, { "epoch": 0.13, "learning_rate": 2.11864406779661e-07, "logits/chosen": -2.698411464691162, "logits/rejected": -2.806183338165283, "logps/chosen": -275.4363098144531, "logps/rejected": -265.92254638671875, "loss": 0.556, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.2729412019252777, "rewards/margins": 0.37595731019973755, "rewards/rejected": -0.10301606357097626, "step": 250 }, { "epoch": 0.13, "learning_rate": 2.2033898305084743e-07, "logits/chosen": -2.689795970916748, "logits/rejected": -2.773268222808838, "logps/chosen": -255.66455078125, "logps/rejected": -241.80563354492188, "loss": 0.5563, "rewards/accuracies": 0.75, "rewards/chosen": 0.15760645270347595, "rewards/margins": 0.4827534556388855, "rewards/rejected": -0.32514697313308716, "step": 260 }, { "epoch": 0.14, "learning_rate": 2.288135593220339e-07, "logits/chosen": -2.5930798053741455, "logits/rejected": -2.6595733165740967, "logps/chosen": -261.72235107421875, "logps/rejected": -236.1577911376953, "loss": 0.5588, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.06991849839687347, "rewards/margins": 0.4791868329048157, "rewards/rejected": -0.4092682898044586, "step": 270 }, { "epoch": 0.14, "learning_rate": 2.3728813559322033e-07, "logits/chosen": -2.747859477996826, "logits/rejected": -2.7453150749206543, "logps/chosen": -288.51971435546875, "logps/rejected": -240.22616577148438, "loss": 0.593, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.09422459453344345, "rewards/margins": 0.4610508978366852, "rewards/rejected": -0.3668263256549835, "step": 280 }, { "epoch": 0.15, "learning_rate": 2.457627118644068e-07, "logits/chosen": -2.662792682647705, "logits/rejected": -2.6617178916931152, "logps/chosen": -287.103759765625, "logps/rejected": -269.4622497558594, "loss": 0.5615, "rewards/accuracies": 0.75, "rewards/chosen": 0.20917841792106628, "rewards/margins": 0.6041997671127319, "rewards/rejected": -0.39502137899398804, "step": 290 }, { "epoch": 0.15, "learning_rate": 2.542372881355932e-07, "logits/chosen": -2.665034770965576, "logits/rejected": -2.646695613861084, "logps/chosen": -285.3088684082031, "logps/rejected": -260.4288635253906, "loss": 0.5488, "rewards/accuracies": 0.75, "rewards/chosen": 0.2411040961742401, "rewards/margins": 0.6841613054275513, "rewards/rejected": -0.44305723905563354, "step": 300 }, { "epoch": 0.15, "eval_logits/chosen": -2.7548134326934814, "eval_logits/rejected": -2.836420774459839, "eval_logps/chosen": -276.56298828125, "eval_logps/rejected": -262.9786682128906, "eval_loss": 0.5772386789321899, "eval_rewards/accuracies": 0.7385495901107788, "eval_rewards/chosen": 0.16875192523002625, "eval_rewards/margins": 0.6531042456626892, "eval_rewards/rejected": -0.4843522906303406, "eval_runtime": 297.0708, "eval_samples_per_second": 7.022, "eval_steps_per_second": 0.441, "step": 300 }, { "epoch": 0.16, "learning_rate": 2.6271186440677967e-07, "logits/chosen": -2.689876079559326, "logits/rejected": -2.621389627456665, "logps/chosen": -270.97808837890625, "logps/rejected": -233.0233612060547, "loss": 0.6944, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.21461305022239685, "rewards/margins": 0.012382006272673607, "rewards/rejected": 0.2022310495376587, "step": 310 }, { "epoch": 0.16, "learning_rate": 2.711864406779661e-07, "logits/chosen": -2.5818138122558594, "logits/rejected": -2.620954990386963, "logps/chosen": -279.020751953125, "logps/rejected": -258.8118896484375, "loss": 0.536, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.15615960955619812, "rewards/margins": 0.626147449016571, "rewards/rejected": -0.4699878692626953, "step": 320 }, { "epoch": 0.17, "learning_rate": 2.796610169491525e-07, "logits/chosen": -2.713191509246826, "logits/rejected": -2.813098430633545, "logps/chosen": -328.0247802734375, "logps/rejected": -286.2821044921875, "loss": 0.5851, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.14517785608768463, "rewards/margins": 0.6851416826248169, "rewards/rejected": -0.5399638414382935, "step": 330 }, { "epoch": 0.17, "learning_rate": 2.88135593220339e-07, "logits/chosen": -2.6544597148895264, "logits/rejected": -2.637418270111084, "logps/chosen": -254.28646850585938, "logps/rejected": -286.66009521484375, "loss": 0.643, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.24947181344032288, "rewards/margins": 0.21678391098976135, "rewards/rejected": 0.032687850296497345, "step": 340 }, { "epoch": 0.18, "learning_rate": 2.966101694915254e-07, "logits/chosen": -2.679877758026123, "logits/rejected": -2.781639575958252, "logps/chosen": -301.66802978515625, "logps/rejected": -253.37588500976562, "loss": 0.5514, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16999612748622894, "rewards/margins": 0.7565746903419495, "rewards/rejected": -0.5865784883499146, "step": 350 }, { "epoch": 0.18, "learning_rate": 3.0508474576271186e-07, "logits/chosen": -2.6983745098114014, "logits/rejected": -2.752680540084839, "logps/chosen": -272.7869567871094, "logps/rejected": -261.7904357910156, "loss": 0.5101, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1574278473854065, "rewards/margins": 0.6489614248275757, "rewards/rejected": -0.4915335178375244, "step": 360 }, { "epoch": 0.19, "learning_rate": 3.135593220338983e-07, "logits/chosen": -2.6260931491851807, "logits/rejected": -2.671186685562134, "logps/chosen": -264.13714599609375, "logps/rejected": -273.489013671875, "loss": 0.6183, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.14635959267616272, "rewards/margins": 0.4912787079811096, "rewards/rejected": -0.3449190557003021, "step": 370 }, { "epoch": 0.19, "learning_rate": 3.220338983050847e-07, "logits/chosen": -2.669572114944458, "logits/rejected": -2.6285717487335205, "logps/chosen": -269.22406005859375, "logps/rejected": -264.6671447753906, "loss": 0.6076, "rewards/accuracies": 0.75, "rewards/chosen": 0.02564525045454502, "rewards/margins": 0.6520860195159912, "rewards/rejected": -0.6264407634735107, "step": 380 }, { "epoch": 0.2, "learning_rate": 3.3050847457627117e-07, "logits/chosen": -2.5862457752227783, "logits/rejected": -2.5745327472686768, "logps/chosen": -283.8790588378906, "logps/rejected": -280.4973449707031, "loss": 0.5356, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.08901579678058624, "rewards/margins": 0.765033483505249, "rewards/rejected": -0.6760177612304688, "step": 390 }, { "epoch": 0.2, "learning_rate": 3.389830508474576e-07, "logits/chosen": -2.601691722869873, "logits/rejected": -2.6026904582977295, "logps/chosen": -278.7247009277344, "logps/rejected": -300.48211669921875, "loss": 0.5144, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.02959282696247101, "rewards/margins": 0.6538323163986206, "rewards/rejected": -0.624239444732666, "step": 400 }, { "epoch": 0.2, "eval_logits/chosen": -2.707245111465454, "eval_logits/rejected": -2.788961887359619, "eval_logps/chosen": -277.64111328125, "eval_logps/rejected": -265.73919677734375, "eval_loss": 0.5635179281234741, "eval_rewards/accuracies": 0.7347328066825867, "eval_rewards/chosen": 0.06093722581863403, "eval_rewards/margins": 0.821345865726471, "eval_rewards/rejected": -0.7604085803031921, "eval_runtime": 302.2004, "eval_samples_per_second": 6.903, "eval_steps_per_second": 0.433, "step": 400 }, { "epoch": 0.21, "learning_rate": 3.4745762711864405e-07, "logits/chosen": -2.732656478881836, "logits/rejected": -2.776484727859497, "logps/chosen": -272.28106689453125, "logps/rejected": -281.3340148925781, "loss": 0.6293, "rewards/accuracies": 0.5874999761581421, "rewards/chosen": -0.03277791664004326, "rewards/margins": 0.327867716550827, "rewards/rejected": -0.3606456220149994, "step": 410 }, { "epoch": 0.21, "learning_rate": 3.559322033898305e-07, "logits/chosen": -2.6254220008850098, "logits/rejected": -2.658902645111084, "logps/chosen": -308.8375549316406, "logps/rejected": -248.82421875, "loss": 0.505, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.012793347239494324, "rewards/margins": 0.5741627812385559, "rewards/rejected": -0.5613693594932556, "step": 420 }, { "epoch": 0.22, "learning_rate": 3.644067796610169e-07, "logits/chosen": -2.6374409198760986, "logits/rejected": -2.6688437461853027, "logps/chosen": -308.5555419921875, "logps/rejected": -277.16455078125, "loss": 0.5239, "rewards/accuracies": 0.75, "rewards/chosen": 0.011289214715361595, "rewards/margins": 0.9072087407112122, "rewards/rejected": -0.8959195017814636, "step": 430 }, { "epoch": 0.22, "learning_rate": 3.7288135593220336e-07, "logits/chosen": -2.6448655128479004, "logits/rejected": -2.6344261169433594, "logps/chosen": -272.939697265625, "logps/rejected": -269.31878662109375, "loss": 0.5084, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.14245155453681946, "rewards/margins": 0.9787721633911133, "rewards/rejected": -0.836320698261261, "step": 440 }, { "epoch": 0.23, "learning_rate": 3.813559322033898e-07, "logits/chosen": -2.798731803894043, "logits/rejected": -2.735830783843994, "logps/chosen": -309.8362731933594, "logps/rejected": -275.83062744140625, "loss": 0.5688, "rewards/accuracies": 0.637499988079071, "rewards/chosen": 0.0009150445694103837, "rewards/margins": 0.4768896996974945, "rewards/rejected": -0.4759747087955475, "step": 450 }, { "epoch": 0.23, "learning_rate": 3.898305084745763e-07, "logits/chosen": -2.6862215995788574, "logits/rejected": -2.603532552719116, "logps/chosen": -251.1598663330078, "logps/rejected": -270.99981689453125, "loss": 0.4916, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.07209299504756927, "rewards/margins": 1.0653936862945557, "rewards/rejected": -0.9933006167411804, "step": 460 }, { "epoch": 0.24, "learning_rate": 3.9830508474576267e-07, "logits/chosen": -2.7343802452087402, "logits/rejected": -2.735741138458252, "logps/chosen": -325.2496643066406, "logps/rejected": -292.3957214355469, "loss": 0.5757, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.38839584589004517, "rewards/margins": 0.835391640663147, "rewards/rejected": -0.4469958245754242, "step": 470 }, { "epoch": 0.24, "learning_rate": 4.0677966101694916e-07, "logits/chosen": -2.7187466621398926, "logits/rejected": -2.738949775695801, "logps/chosen": -290.6060485839844, "logps/rejected": -292.77557373046875, "loss": 0.5272, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.18653082847595215, "rewards/margins": 0.8780761957168579, "rewards/rejected": -0.691545307636261, "step": 480 }, { "epoch": 0.25, "learning_rate": 4.152542372881356e-07, "logits/chosen": -2.6602752208709717, "logits/rejected": -2.580003261566162, "logps/chosen": -338.55584716796875, "logps/rejected": -300.44573974609375, "loss": 0.5326, "rewards/accuracies": 0.6875, "rewards/chosen": 0.28633373975753784, "rewards/margins": 1.0145152807235718, "rewards/rejected": -0.7281816005706787, "step": 490 }, { "epoch": 0.25, "learning_rate": 4.23728813559322e-07, "logits/chosen": -2.706733226776123, "logits/rejected": -2.6993355751037598, "logps/chosen": -300.39520263671875, "logps/rejected": -249.0484619140625, "loss": 0.5399, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.1879514753818512, "rewards/margins": 0.7782592177391052, "rewards/rejected": -0.5903077721595764, "step": 500 }, { "epoch": 0.25, "eval_logits/chosen": -2.756488084793091, "eval_logits/rejected": -2.837228775024414, "eval_logps/chosen": -277.9347229003906, "eval_logps/rejected": -268.0408630371094, "eval_loss": 0.5393199324607849, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": 0.031575243920087814, "eval_rewards/margins": 1.0221443176269531, "eval_rewards/rejected": -0.9905692338943481, "eval_runtime": 296.8577, "eval_samples_per_second": 7.027, "eval_steps_per_second": 0.441, "step": 500 }, { "epoch": 0.26, "learning_rate": 4.322033898305085e-07, "logits/chosen": -2.565380573272705, "logits/rejected": -2.54228138923645, "logps/chosen": -252.8906707763672, "logps/rejected": -253.0413360595703, "loss": 0.5438, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.370120108127594, "rewards/margins": 0.541715681552887, "rewards/rejected": -0.9118358492851257, "step": 510 }, { "epoch": 0.26, "learning_rate": 4.4067796610169486e-07, "logits/chosen": -2.683913469314575, "logits/rejected": -2.61004900932312, "logps/chosen": -247.6575164794922, "logps/rejected": -258.51666259765625, "loss": 0.5493, "rewards/accuracies": 0.612500011920929, "rewards/chosen": -0.16846349835395813, "rewards/margins": 0.47411054372787476, "rewards/rejected": -0.6425740122795105, "step": 520 }, { "epoch": 0.27, "learning_rate": 4.4915254237288135e-07, "logits/chosen": -2.7208123207092285, "logits/rejected": -2.671483039855957, "logps/chosen": -292.650390625, "logps/rejected": -272.16558837890625, "loss": 0.5554, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.020361602306365967, "rewards/margins": 0.8109226226806641, "rewards/rejected": -0.7905610203742981, "step": 530 }, { "epoch": 0.27, "learning_rate": 4.576271186440678e-07, "logits/chosen": -2.70005202293396, "logits/rejected": -2.7322866916656494, "logps/chosen": -282.77288818359375, "logps/rejected": -254.0004119873047, "loss": 0.5894, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.16902032494544983, "rewards/margins": 0.8821357488632202, "rewards/rejected": -0.7131155133247375, "step": 540 }, { "epoch": 0.28, "learning_rate": 4.661016949152542e-07, "logits/chosen": -2.5971081256866455, "logits/rejected": -2.5216164588928223, "logps/chosen": -305.2200012207031, "logps/rejected": -298.790283203125, "loss": 0.7142, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.7713154554367065, "rewards/margins": 0.3943839967250824, "rewards/rejected": -1.1656994819641113, "step": 550 }, { "epoch": 0.28, "learning_rate": 4.7457627118644066e-07, "logits/chosen": -2.633862257003784, "logits/rejected": -2.6472296714782715, "logps/chosen": -293.66778564453125, "logps/rejected": -248.5172576904297, "loss": 0.621, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.04602837562561035, "rewards/margins": 0.9052542448043823, "rewards/rejected": -0.9512826800346375, "step": 560 }, { "epoch": 0.29, "learning_rate": 4.830508474576271e-07, "logits/chosen": -2.578864574432373, "logits/rejected": -2.7244362831115723, "logps/chosen": -272.5432434082031, "logps/rejected": -249.15902709960938, "loss": 0.5396, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.0642789900302887, "rewards/margins": 0.9198991656303406, "rewards/rejected": -0.9841780662536621, "step": 570 }, { "epoch": 0.3, "learning_rate": 4.915254237288136e-07, "logits/chosen": -2.6675069332122803, "logits/rejected": -2.7055740356445312, "logps/chosen": -309.1647033691406, "logps/rejected": -292.64801025390625, "loss": 0.6726, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.008704179897904396, "rewards/margins": 0.8699982762336731, "rewards/rejected": -0.8612940907478333, "step": 580 }, { "epoch": 0.3, "learning_rate": 5e-07, "logits/chosen": -2.6913936138153076, "logits/rejected": -2.5511505603790283, "logps/chosen": -213.75790405273438, "logps/rejected": -269.3441467285156, "loss": 0.4877, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.07794573903083801, "rewards/margins": 1.021460771560669, "rewards/rejected": -0.9435150027275085, "step": 590 }, { "epoch": 0.31, "learning_rate": 4.99058025621703e-07, "logits/chosen": -2.743154287338257, "logits/rejected": -2.752743721008301, "logps/chosen": -271.9192199707031, "logps/rejected": -263.9624938964844, "loss": 0.5776, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.21212521195411682, "rewards/margins": 0.8748341798782349, "rewards/rejected": -0.6627089977264404, "step": 600 }, { "epoch": 0.31, "eval_logits/chosen": -2.7569472789764404, "eval_logits/rejected": -2.838770627975464, "eval_logps/chosen": -277.82574462890625, "eval_logps/rejected": -267.9344787597656, "eval_loss": 0.5705651640892029, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": 0.04247231036424637, "eval_rewards/margins": 1.022408366203308, "eval_rewards/rejected": -0.9799360632896423, "eval_runtime": 301.417, "eval_samples_per_second": 6.921, "eval_steps_per_second": 0.435, "step": 600 }, { "epoch": 0.31, "learning_rate": 4.981160512434062e-07, "logits/chosen": -2.7087044715881348, "logits/rejected": -2.7249045372009277, "logps/chosen": -249.1181640625, "logps/rejected": -284.8170471191406, "loss": 0.5501, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": 0.24952204525470734, "rewards/margins": 1.0174764394760132, "rewards/rejected": -0.7679542899131775, "step": 610 }, { "epoch": 0.32, "learning_rate": 4.971740768651092e-07, "logits/chosen": -2.5978667736053467, "logits/rejected": -2.5184450149536133, "logps/chosen": -280.148193359375, "logps/rejected": -278.3171691894531, "loss": 0.5401, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": 0.10301868617534637, "rewards/margins": 0.9203518629074097, "rewards/rejected": -0.8173332214355469, "step": 620 }, { "epoch": 0.32, "learning_rate": 4.962321024868124e-07, "logits/chosen": -2.585350275039673, "logits/rejected": -2.6179752349853516, "logps/chosen": -252.47607421875, "logps/rejected": -227.0454864501953, "loss": 0.4839, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.11939480155706406, "rewards/margins": 0.9655391573905945, "rewards/rejected": -0.8461443185806274, "step": 630 }, { "epoch": 0.33, "learning_rate": 4.952901281085154e-07, "logits/chosen": -2.6374013423919678, "logits/rejected": -2.614624500274658, "logps/chosen": -250.5303192138672, "logps/rejected": -274.0657653808594, "loss": 0.6261, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.0875898078083992, "rewards/margins": 0.7766585350036621, "rewards/rejected": -0.6890687346458435, "step": 640 }, { "epoch": 0.33, "learning_rate": 4.943481537302186e-07, "logits/chosen": -2.6180291175842285, "logits/rejected": -2.727692127227783, "logps/chosen": -236.42813110351562, "logps/rejected": -239.7740020751953, "loss": 0.5047, "rewards/accuracies": 0.762499988079071, "rewards/chosen": 0.21162250638008118, "rewards/margins": 0.9807373285293579, "rewards/rejected": -0.7691147923469543, "step": 650 }, { "epoch": 0.34, "learning_rate": 4.934061793519216e-07, "logits/chosen": -2.655010938644409, "logits/rejected": -2.6872143745422363, "logps/chosen": -299.6675109863281, "logps/rejected": -306.3320007324219, "loss": 0.584, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.09119521826505661, "rewards/margins": 0.5702224969863892, "rewards/rejected": -0.6614176630973816, "step": 660 }, { "epoch": 0.34, "learning_rate": 4.924642049736247e-07, "logits/chosen": -2.6529808044433594, "logits/rejected": -2.582202434539795, "logps/chosen": -261.41754150390625, "logps/rejected": -244.1402130126953, "loss": 0.5348, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.11745382845401764, "rewards/margins": 1.0263164043426514, "rewards/rejected": -0.9088624715805054, "step": 670 }, { "epoch": 0.35, "learning_rate": 4.915222305953277e-07, "logits/chosen": -2.6894426345825195, "logits/rejected": -2.5927743911743164, "logps/chosen": -272.3468933105469, "logps/rejected": -259.9945068359375, "loss": 0.5105, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.04899612069129944, "rewards/margins": 1.1350120306015015, "rewards/rejected": -1.0860159397125244, "step": 680 }, { "epoch": 0.35, "learning_rate": 4.905802562170309e-07, "logits/chosen": -2.5538647174835205, "logits/rejected": -2.6060104370117188, "logps/chosen": -294.0184020996094, "logps/rejected": -293.9318542480469, "loss": 0.6963, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.24619393050670624, "rewards/margins": 0.5672358274459839, "rewards/rejected": -0.8134298324584961, "step": 690 }, { "epoch": 0.36, "learning_rate": 4.896382818387339e-07, "logits/chosen": -2.6548666954040527, "logits/rejected": -2.6277880668640137, "logps/chosen": -268.58563232421875, "logps/rejected": -226.7079315185547, "loss": 0.5834, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06387074291706085, "rewards/margins": 0.5434452295303345, "rewards/rejected": -0.6073160171508789, "step": 700 }, { "epoch": 0.36, "eval_logits/chosen": -2.6940970420837402, "eval_logits/rejected": -2.7830452919006348, "eval_logps/chosen": -277.7963562011719, "eval_logps/rejected": -268.3512878417969, "eval_loss": 0.5596233010292053, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": 0.04541310295462608, "eval_rewards/margins": 1.067026972770691, "eval_rewards/rejected": -1.0216139554977417, "eval_runtime": 297.2834, "eval_samples_per_second": 7.017, "eval_steps_per_second": 0.441, "step": 700 }, { "epoch": 0.36, "learning_rate": 4.886963074604371e-07, "logits/chosen": -2.6284475326538086, "logits/rejected": -2.547412872314453, "logps/chosen": -270.00872802734375, "logps/rejected": -266.3251953125, "loss": 0.6424, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.025370970368385315, "rewards/margins": 0.7029758095741272, "rewards/rejected": -0.7283468842506409, "step": 710 }, { "epoch": 0.37, "learning_rate": 4.877543330821401e-07, "logits/chosen": -2.6589772701263428, "logits/rejected": -2.6408326625823975, "logps/chosen": -328.72137451171875, "logps/rejected": -260.0521240234375, "loss": 0.5091, "rewards/accuracies": 0.75, "rewards/chosen": -0.11194495111703873, "rewards/margins": 0.8934643864631653, "rewards/rejected": -1.0054093599319458, "step": 720 }, { "epoch": 0.37, "learning_rate": 4.868123587038433e-07, "logits/chosen": -2.526742935180664, "logits/rejected": -2.568509578704834, "logps/chosen": -261.3857727050781, "logps/rejected": -246.47189331054688, "loss": 0.7511, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.04114454239606857, "rewards/margins": 0.9449528455734253, "rewards/rejected": -0.9038082957267761, "step": 730 }, { "epoch": 0.38, "learning_rate": 4.858703843255463e-07, "logits/chosen": -2.4164764881134033, "logits/rejected": -2.587960720062256, "logps/chosen": -263.1320495605469, "logps/rejected": -233.99642944335938, "loss": 0.5897, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.08096225559711456, "rewards/margins": 1.0214519500732422, "rewards/rejected": -0.9404897689819336, "step": 740 }, { "epoch": 0.38, "learning_rate": 4.849284099472495e-07, "logits/chosen": -2.3981845378875732, "logits/rejected": -2.3214974403381348, "logps/chosen": -257.8962707519531, "logps/rejected": -246.64675903320312, "loss": 0.4864, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08276768028736115, "rewards/margins": 1.0210144519805908, "rewards/rejected": -1.103782296180725, "step": 750 }, { "epoch": 0.39, "learning_rate": 4.839864355689525e-07, "logits/chosen": -2.486299514770508, "logits/rejected": -2.3817036151885986, "logps/chosen": -263.58502197265625, "logps/rejected": -286.1875305175781, "loss": 0.614, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.5518718361854553, "rewards/margins": 1.387749433517456, "rewards/rejected": -0.8358775973320007, "step": 760 }, { "epoch": 0.39, "learning_rate": 4.830444611906556e-07, "logits/chosen": -2.2975809574127197, "logits/rejected": -2.5370969772338867, "logps/chosen": -306.3849182128906, "logps/rejected": -245.950927734375, "loss": 0.6453, "rewards/accuracies": 0.75, "rewards/chosen": -0.12810799479484558, "rewards/margins": 0.6762959361076355, "rewards/rejected": -0.8044039011001587, "step": 770 }, { "epoch": 0.4, "learning_rate": 4.821024868123586e-07, "logits/chosen": -2.5201289653778076, "logits/rejected": -2.517611026763916, "logps/chosen": -254.85104370117188, "logps/rejected": -244.4443817138672, "loss": 0.5778, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.0704607367515564, "rewards/margins": 0.6457270383834839, "rewards/rejected": -0.7161878347396851, "step": 780 }, { "epoch": 0.4, "learning_rate": 4.811605124340618e-07, "logits/chosen": -2.531914472579956, "logits/rejected": -2.509597063064575, "logps/chosen": -265.1935119628906, "logps/rejected": -241.33633422851562, "loss": 0.5434, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.4141203761100769, "rewards/margins": 1.151557445526123, "rewards/rejected": -0.7374370098114014, "step": 790 }, { "epoch": 0.41, "learning_rate": 4.802185380557648e-07, "logits/chosen": -2.439547538757324, "logits/rejected": -2.4296250343322754, "logps/chosen": -267.5282287597656, "logps/rejected": -269.441650390625, "loss": 0.5394, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.19664326310157776, "rewards/margins": 1.12895929813385, "rewards/rejected": -0.9323161244392395, "step": 800 }, { "epoch": 0.41, "eval_logits/chosen": -2.5479583740234375, "eval_logits/rejected": -2.631294012069702, "eval_logps/chosen": -277.4460144042969, "eval_logps/rejected": -267.67138671875, "eval_loss": 0.5357747077941895, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": 0.08044610917568207, "eval_rewards/margins": 1.034072995185852, "eval_rewards/rejected": -0.953626811504364, "eval_runtime": 302.149, "eval_samples_per_second": 6.904, "eval_steps_per_second": 0.434, "step": 800 }, { "epoch": 0.41, "learning_rate": 4.79276563677468e-07, "logits/chosen": -2.4669220447540283, "logits/rejected": -2.3798508644104004, "logps/chosen": -313.71148681640625, "logps/rejected": -296.85711669921875, "loss": 0.5522, "rewards/accuracies": 0.8500000238418579, "rewards/chosen": 0.39447715878486633, "rewards/margins": 1.3083837032318115, "rewards/rejected": -0.9139065742492676, "step": 810 }, { "epoch": 0.42, "learning_rate": 4.78334589299171e-07, "logits/chosen": -2.51173734664917, "logits/rejected": -2.452078342437744, "logps/chosen": -247.0750732421875, "logps/rejected": -237.9070281982422, "loss": 0.5305, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.09000038355588913, "rewards/margins": 1.039417028427124, "rewards/rejected": -1.1294173002243042, "step": 820 }, { "epoch": 0.42, "learning_rate": 4.773926149208742e-07, "logits/chosen": -2.6431527137756348, "logits/rejected": -2.5651488304138184, "logps/chosen": -293.65728759765625, "logps/rejected": -268.22406005859375, "loss": 0.5439, "rewards/accuracies": 0.699999988079071, "rewards/chosen": 0.03514687344431877, "rewards/margins": 0.8131545782089233, "rewards/rejected": -0.7780076861381531, "step": 830 }, { "epoch": 0.43, "learning_rate": 4.764506405425772e-07, "logits/chosen": -2.467764139175415, "logits/rejected": -2.464174509048462, "logps/chosen": -257.2640686035156, "logps/rejected": -247.9285125732422, "loss": 0.6165, "rewards/accuracies": 0.6875, "rewards/chosen": -0.22154740989208221, "rewards/margins": 0.8597299456596375, "rewards/rejected": -1.0812774896621704, "step": 840 }, { "epoch": 0.43, "learning_rate": 4.755086661642803e-07, "logits/chosen": -2.502732276916504, "logits/rejected": -2.6119589805603027, "logps/chosen": -255.53225708007812, "logps/rejected": -231.84963989257812, "loss": 0.5659, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15040960907936096, "rewards/margins": 0.9278343915939331, "rewards/rejected": -1.0782438516616821, "step": 850 }, { "epoch": 0.44, "learning_rate": 4.745666917859834e-07, "logits/chosen": -2.5779833793640137, "logits/rejected": -2.6192851066589355, "logps/chosen": -281.83319091796875, "logps/rejected": -291.05279541015625, "loss": 0.6056, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6201379299163818, "rewards/margins": 0.7899220585823059, "rewards/rejected": -1.4100600481033325, "step": 860 }, { "epoch": 0.44, "learning_rate": 4.736247174076865e-07, "logits/chosen": -2.4486584663391113, "logits/rejected": -2.5848748683929443, "logps/chosen": -343.8517150878906, "logps/rejected": -315.92327880859375, "loss": 0.5849, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21137447655200958, "rewards/margins": 0.9022495150566101, "rewards/rejected": -1.113624095916748, "step": 870 }, { "epoch": 0.45, "learning_rate": 4.726827430293896e-07, "logits/chosen": -2.5461716651916504, "logits/rejected": -2.6663496494293213, "logps/chosen": -323.36395263671875, "logps/rejected": -305.492431640625, "loss": 0.4929, "rewards/accuracies": 0.737500011920929, "rewards/chosen": 0.06274759024381638, "rewards/margins": 0.9509458541870117, "rewards/rejected": -0.8881982564926147, "step": 880 }, { "epoch": 0.45, "learning_rate": 4.717407686510927e-07, "logits/chosen": -2.5778603553771973, "logits/rejected": -2.5960335731506348, "logps/chosen": -270.0763854980469, "logps/rejected": -253.62646484375, "loss": 0.5639, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.30001965165138245, "rewards/margins": 1.3010705709457397, "rewards/rejected": -1.6010901927947998, "step": 890 }, { "epoch": 0.46, "learning_rate": 4.707987942727958e-07, "logits/chosen": -2.4405810832977295, "logits/rejected": -2.4201414585113525, "logps/chosen": -269.73748779296875, "logps/rejected": -268.6572265625, "loss": 0.5141, "rewards/accuracies": 0.75, "rewards/chosen": 0.06953033059835434, "rewards/margins": 1.3686714172363281, "rewards/rejected": -1.2991411685943604, "step": 900 }, { "epoch": 0.46, "eval_logits/chosen": -2.5832109451293945, "eval_logits/rejected": -2.666200876235962, "eval_logps/chosen": -280.95458984375, "eval_logps/rejected": -272.4443664550781, "eval_loss": 0.541175127029419, "eval_rewards/accuracies": 0.7442747950553894, "eval_rewards/chosen": -0.2704112231731415, "eval_rewards/margins": 1.1605098247528076, "eval_rewards/rejected": -1.430921196937561, "eval_runtime": 297.0452, "eval_samples_per_second": 7.023, "eval_steps_per_second": 0.441, "step": 900 }, { "epoch": 0.46, "learning_rate": 4.698568198944988e-07, "logits/chosen": -2.5255184173583984, "logits/rejected": -2.573775053024292, "logps/chosen": -300.562255859375, "logps/rejected": -269.5960998535156, "loss": 0.6796, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.0005705863004550338, "rewards/margins": 1.1774917840957642, "rewards/rejected": -1.1769212484359741, "step": 910 }, { "epoch": 0.47, "learning_rate": 4.689148455162019e-07, "logits/chosen": -2.4337317943573, "logits/rejected": -2.431641101837158, "logps/chosen": -230.990234375, "logps/rejected": -234.8484649658203, "loss": 0.5672, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.11962993443012238, "rewards/margins": 1.2908594608306885, "rewards/rejected": -1.4104894399642944, "step": 920 }, { "epoch": 0.47, "learning_rate": 4.67972871137905e-07, "logits/chosen": -2.48714017868042, "logits/rejected": -2.444317579269409, "logps/chosen": -283.33392333984375, "logps/rejected": -266.0936584472656, "loss": 0.5425, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.24451906979084015, "rewards/margins": 0.9864568710327148, "rewards/rejected": -1.230975866317749, "step": 930 }, { "epoch": 0.48, "learning_rate": 4.670308967596081e-07, "logits/chosen": -2.458479166030884, "logits/rejected": -2.4806995391845703, "logps/chosen": -281.46820068359375, "logps/rejected": -315.03167724609375, "loss": 0.5216, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.00037065744982101023, "rewards/margins": 1.3867638111114502, "rewards/rejected": -1.387134313583374, "step": 940 }, { "epoch": 0.48, "learning_rate": 4.660889223813112e-07, "logits/chosen": -2.488022565841675, "logits/rejected": -2.544905424118042, "logps/chosen": -282.5947570800781, "logps/rejected": -286.3911437988281, "loss": 0.6466, "rewards/accuracies": 0.625, "rewards/chosen": -0.15313948690891266, "rewards/margins": 0.5795355439186096, "rewards/rejected": -0.7326749563217163, "step": 950 }, { "epoch": 0.49, "learning_rate": 4.6514694800301427e-07, "logits/chosen": -2.4881091117858887, "logits/rejected": -2.4446139335632324, "logps/chosen": -298.6130065917969, "logps/rejected": -246.40298461914062, "loss": 0.5778, "rewards/accuracies": 0.6875, "rewards/chosen": 0.10312096774578094, "rewards/margins": 0.8071810007095337, "rewards/rejected": -0.7040599584579468, "step": 960 }, { "epoch": 0.49, "learning_rate": 4.6420497362471737e-07, "logits/chosen": -2.3994803428649902, "logits/rejected": -2.4658679962158203, "logps/chosen": -275.06878662109375, "logps/rejected": -252.40475463867188, "loss": 0.6466, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.259010374546051, "rewards/margins": 0.378709077835083, "rewards/rejected": -0.637719452381134, "step": 970 }, { "epoch": 0.5, "learning_rate": 4.6326299924642047e-07, "logits/chosen": -2.5499892234802246, "logits/rejected": -2.589505672454834, "logps/chosen": -272.2198486328125, "logps/rejected": -281.07366943359375, "loss": 0.5701, "rewards/accuracies": 0.6875, "rewards/chosen": -0.08379928767681122, "rewards/margins": 0.7602310180664062, "rewards/rejected": -0.8440302610397339, "step": 980 }, { "epoch": 0.5, "learning_rate": 4.6232102486812357e-07, "logits/chosen": -2.4911158084869385, "logits/rejected": -2.5684397220611572, "logps/chosen": -263.782958984375, "logps/rejected": -277.875244140625, "loss": 0.5929, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.34514790773391724, "rewards/margins": 1.0390870571136475, "rewards/rejected": -1.384235143661499, "step": 990 }, { "epoch": 0.51, "learning_rate": 4.613790504898267e-07, "logits/chosen": -2.5921616554260254, "logits/rejected": -2.600898265838623, "logps/chosen": -298.99871826171875, "logps/rejected": -312.77105712890625, "loss": 0.51, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": 0.06152200698852539, "rewards/margins": 1.18497633934021, "rewards/rejected": -1.1234543323516846, "step": 1000 }, { "epoch": 0.51, "eval_logits/chosen": -2.621718168258667, "eval_logits/rejected": -2.711763620376587, "eval_logps/chosen": -280.32061767578125, "eval_logps/rejected": -272.1781311035156, "eval_loss": 0.534999668598175, "eval_rewards/accuracies": 0.7366412281990051, "eval_rewards/chosen": -0.20701460540294647, "eval_rewards/margins": 1.197284460067749, "eval_rewards/rejected": -1.404299259185791, "eval_runtime": 301.9673, "eval_samples_per_second": 6.908, "eval_steps_per_second": 0.434, "step": 1000 }, { "epoch": 0.51, "learning_rate": 4.604370761115298e-07, "logits/chosen": -2.5198278427124023, "logits/rejected": -2.507664203643799, "logps/chosen": -297.51336669921875, "logps/rejected": -301.7564392089844, "loss": 0.5467, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.3291333317756653, "rewards/margins": 1.3960895538330078, "rewards/rejected": -1.7252228260040283, "step": 1010 }, { "epoch": 0.52, "learning_rate": 4.594951017332329e-07, "logits/chosen": -2.590463161468506, "logits/rejected": -2.6638598442077637, "logps/chosen": -300.2336120605469, "logps/rejected": -285.6252746582031, "loss": 0.6876, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.8592126965522766, "rewards/margins": 0.49618273973464966, "rewards/rejected": -1.3553953170776367, "step": 1020 }, { "epoch": 0.52, "learning_rate": 4.585531273549359e-07, "logits/chosen": -2.437877893447876, "logits/rejected": -2.4427528381347656, "logps/chosen": -236.7029266357422, "logps/rejected": -259.52545166015625, "loss": 0.4876, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.11370401084423065, "rewards/margins": 1.9164615869522095, "rewards/rejected": -1.8027576208114624, "step": 1030 }, { "epoch": 0.53, "learning_rate": 4.5761115297663897e-07, "logits/chosen": -2.515664577484131, "logits/rejected": -2.6834750175476074, "logps/chosen": -266.6720886230469, "logps/rejected": -255.0941925048828, "loss": 0.6642, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.22268524765968323, "rewards/margins": 1.3201810121536255, "rewards/rejected": -1.5428663492202759, "step": 1040 }, { "epoch": 0.53, "learning_rate": 4.5666917859834207e-07, "logits/chosen": -2.5942764282226562, "logits/rejected": -2.568809747695923, "logps/chosen": -287.2005615234375, "logps/rejected": -267.20391845703125, "loss": 0.586, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.07011856138706207, "rewards/margins": 0.9706190824508667, "rewards/rejected": -1.0407376289367676, "step": 1050 }, { "epoch": 0.54, "learning_rate": 4.5572720422004517e-07, "logits/chosen": -2.588561534881592, "logits/rejected": -2.6585402488708496, "logps/chosen": -256.96282958984375, "logps/rejected": -268.2518310546875, "loss": 0.4639, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.16893135011196136, "rewards/margins": 1.1751251220703125, "rewards/rejected": -1.3440563678741455, "step": 1060 }, { "epoch": 0.54, "learning_rate": 4.5478522984174827e-07, "logits/chosen": -2.582852840423584, "logits/rejected": -2.6485960483551025, "logps/chosen": -263.36187744140625, "logps/rejected": -249.5991973876953, "loss": 0.5816, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.08605729043483734, "rewards/margins": 0.928912341594696, "rewards/rejected": -1.0149695873260498, "step": 1070 }, { "epoch": 0.55, "learning_rate": 4.5384325546345137e-07, "logits/chosen": -2.650263786315918, "logits/rejected": -2.5726656913757324, "logps/chosen": -331.74774169921875, "logps/rejected": -295.6081237792969, "loss": 0.4773, "rewards/accuracies": 0.800000011920929, "rewards/chosen": 0.04656659811735153, "rewards/margins": 1.4996758699417114, "rewards/rejected": -1.4531093835830688, "step": 1080 }, { "epoch": 0.55, "learning_rate": 4.5290128108515447e-07, "logits/chosen": -2.63649582862854, "logits/rejected": -2.678618907928467, "logps/chosen": -272.94281005859375, "logps/rejected": -319.1750183105469, "loss": 0.6456, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.41687941551208496, "rewards/margins": 0.6916464567184448, "rewards/rejected": -1.1085259914398193, "step": 1090 }, { "epoch": 0.56, "learning_rate": 4.5195930670685757e-07, "logits/chosen": -2.5882906913757324, "logits/rejected": -2.614518642425537, "logps/chosen": -250.8640594482422, "logps/rejected": -250.9251708984375, "loss": 0.5219, "rewards/accuracies": 0.75, "rewards/chosen": -0.27617964148521423, "rewards/margins": 1.033569574356079, "rewards/rejected": -1.3097491264343262, "step": 1100 }, { "epoch": 0.56, "eval_logits/chosen": -2.6604645252227783, "eval_logits/rejected": -2.7450878620147705, "eval_logps/chosen": -279.9233093261719, "eval_logps/rejected": -271.287109375, "eval_loss": 0.5404812097549438, "eval_rewards/accuracies": 0.7290076613426208, "eval_rewards/chosen": -0.1672801822423935, "eval_rewards/margins": 1.1479164361953735, "eval_rewards/rejected": -1.3151966333389282, "eval_runtime": 297.089, "eval_samples_per_second": 7.021, "eval_steps_per_second": 0.441, "step": 1100 }, { "epoch": 0.56, "learning_rate": 4.5101733232856067e-07, "logits/chosen": -2.510779619216919, "logits/rejected": -2.5328545570373535, "logps/chosen": -327.66082763671875, "logps/rejected": -246.44662475585938, "loss": 0.5302, "rewards/accuracies": 0.75, "rewards/chosen": -0.4910829961299896, "rewards/margins": 0.9884698987007141, "rewards/rejected": -1.479552984237671, "step": 1110 }, { "epoch": 0.57, "learning_rate": 4.500753579502637e-07, "logits/chosen": -2.5775036811828613, "logits/rejected": -2.5860979557037354, "logps/chosen": -298.1309509277344, "logps/rejected": -257.0569763183594, "loss": 0.5651, "rewards/accuracies": 0.6875, "rewards/chosen": 0.1157577782869339, "rewards/margins": 1.2654132843017578, "rewards/rejected": -1.1496555805206299, "step": 1120 }, { "epoch": 0.57, "learning_rate": 4.491333835719668e-07, "logits/chosen": -2.624541759490967, "logits/rejected": -2.579864978790283, "logps/chosen": -281.26715087890625, "logps/rejected": -296.16571044921875, "loss": 0.5049, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3193248212337494, "rewards/margins": 0.9984002113342285, "rewards/rejected": -1.3177250623703003, "step": 1130 }, { "epoch": 0.58, "learning_rate": 4.481914091936699e-07, "logits/chosen": -2.517090320587158, "logits/rejected": -2.5506300926208496, "logps/chosen": -297.8021545410156, "logps/rejected": -274.5700988769531, "loss": 0.4954, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.06897372752428055, "rewards/margins": 1.647580862045288, "rewards/rejected": -1.7165546417236328, "step": 1140 }, { "epoch": 0.58, "learning_rate": 4.47249434815373e-07, "logits/chosen": -2.4685566425323486, "logits/rejected": -2.4525017738342285, "logps/chosen": -249.1833038330078, "logps/rejected": -268.045166015625, "loss": 0.6057, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.7198296785354614, "rewards/margins": 0.7819298505783081, "rewards/rejected": -1.5017595291137695, "step": 1150 }, { "epoch": 0.59, "learning_rate": 4.4630746043707607e-07, "logits/chosen": -2.4799113273620605, "logits/rejected": -2.4306507110595703, "logps/chosen": -287.40032958984375, "logps/rejected": -291.71063232421875, "loss": 0.5651, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2504750192165375, "rewards/margins": 1.1714638471603394, "rewards/rejected": -1.4219388961791992, "step": 1160 }, { "epoch": 0.6, "learning_rate": 4.4536548605877917e-07, "logits/chosen": -2.5734705924987793, "logits/rejected": -2.5858707427978516, "logps/chosen": -294.67681884765625, "logps/rejected": -243.31912231445312, "loss": 0.4886, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.02277236059308052, "rewards/margins": 1.20340096950531, "rewards/rejected": -1.2261732816696167, "step": 1170 }, { "epoch": 0.6, "learning_rate": 4.4442351168048227e-07, "logits/chosen": -2.62070369720459, "logits/rejected": -2.714472532272339, "logps/chosen": -256.4486083984375, "logps/rejected": -237.9633331298828, "loss": 0.5416, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.06612993031740189, "rewards/margins": 0.9648292660713196, "rewards/rejected": -1.0309593677520752, "step": 1180 }, { "epoch": 0.61, "learning_rate": 4.4348153730218537e-07, "logits/chosen": -2.557380437850952, "logits/rejected": -2.5687503814697266, "logps/chosen": -277.671142578125, "logps/rejected": -247.2373504638672, "loss": 0.5121, "rewards/accuracies": 0.75, "rewards/chosen": -0.04107171297073364, "rewards/margins": 1.0434238910675049, "rewards/rejected": -1.0844955444335938, "step": 1190 }, { "epoch": 0.61, "learning_rate": 4.425395629238884e-07, "logits/chosen": -2.5712647438049316, "logits/rejected": -2.5064244270324707, "logps/chosen": -272.5246887207031, "logps/rejected": -263.8275451660156, "loss": 0.5391, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.15056011080741882, "rewards/margins": 1.2076849937438965, "rewards/rejected": -1.3582451343536377, "step": 1200 }, { "epoch": 0.61, "eval_logits/chosen": -2.6691508293151855, "eval_logits/rejected": -2.755162477493286, "eval_logps/chosen": -280.7105712890625, "eval_logps/rejected": -272.5871276855469, "eval_loss": 0.5320115685462952, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -0.24601177871227264, "eval_rewards/margins": 1.199183702468872, "eval_rewards/rejected": -1.4451955556869507, "eval_runtime": 301.4996, "eval_samples_per_second": 6.919, "eval_steps_per_second": 0.434, "step": 1200 }, { "epoch": 0.62, "learning_rate": 4.415975885455915e-07, "logits/chosen": -2.581174612045288, "logits/rejected": -2.5149054527282715, "logps/chosen": -277.2206115722656, "logps/rejected": -276.4008483886719, "loss": 0.8695, "rewards/accuracies": 0.75, "rewards/chosen": -0.275297075510025, "rewards/margins": 1.014151930809021, "rewards/rejected": -1.2894489765167236, "step": 1210 }, { "epoch": 0.62, "learning_rate": 4.406556141672946e-07, "logits/chosen": -2.702441930770874, "logits/rejected": -2.6084256172180176, "logps/chosen": -262.9638977050781, "logps/rejected": -260.6354675292969, "loss": 0.581, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": 0.07331562787294388, "rewards/margins": 1.4954464435577393, "rewards/rejected": -1.4221307039260864, "step": 1220 }, { "epoch": 0.63, "learning_rate": 4.397136397889977e-07, "logits/chosen": -2.6779658794403076, "logits/rejected": -2.844871759414673, "logps/chosen": -309.64898681640625, "logps/rejected": -302.99163818359375, "loss": 0.5565, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01319421548396349, "rewards/margins": 1.1484153270721436, "rewards/rejected": -1.135221004486084, "step": 1230 }, { "epoch": 0.63, "learning_rate": 4.387716654107008e-07, "logits/chosen": -2.7568278312683105, "logits/rejected": -2.781827449798584, "logps/chosen": -309.62939453125, "logps/rejected": -271.3556823730469, "loss": 0.5592, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.15527522563934326, "rewards/margins": 0.7692685723304749, "rewards/rejected": -0.9245438575744629, "step": 1240 }, { "epoch": 0.64, "learning_rate": 4.378296910324039e-07, "logits/chosen": -2.705575942993164, "logits/rejected": -2.720196008682251, "logps/chosen": -272.5322570800781, "logps/rejected": -249.0066680908203, "loss": 0.6204, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.3543552756309509, "rewards/margins": 0.8724683523178101, "rewards/rejected": -1.2268235683441162, "step": 1250 }, { "epoch": 0.64, "learning_rate": 4.36887716654107e-07, "logits/chosen": -2.6648824214935303, "logits/rejected": -2.707021474838257, "logps/chosen": -322.87200927734375, "logps/rejected": -345.20843505859375, "loss": 0.5247, "rewards/accuracies": 0.675000011920929, "rewards/chosen": 0.01667252741754055, "rewards/margins": 1.1974635124206543, "rewards/rejected": -1.1807911396026611, "step": 1260 }, { "epoch": 0.65, "learning_rate": 4.359457422758101e-07, "logits/chosen": -2.589756488800049, "logits/rejected": -2.487732172012329, "logps/chosen": -232.86874389648438, "logps/rejected": -240.175537109375, "loss": 0.5913, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.03205676004290581, "rewards/margins": 1.1439334154129028, "rewards/rejected": -1.1759899854660034, "step": 1270 }, { "epoch": 0.65, "learning_rate": 4.350037678975131e-07, "logits/chosen": -2.6122286319732666, "logits/rejected": -2.5596940517425537, "logps/chosen": -276.717041015625, "logps/rejected": -275.9134216308594, "loss": 0.4956, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.1398329734802246, "rewards/margins": 1.3841016292572021, "rewards/rejected": -1.5239344835281372, "step": 1280 }, { "epoch": 0.66, "learning_rate": 4.340617935192162e-07, "logits/chosen": -2.6020092964172363, "logits/rejected": -2.5196774005889893, "logps/chosen": -298.8046875, "logps/rejected": -254.69033813476562, "loss": 0.5306, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.21991291642189026, "rewards/margins": 1.1622774600982666, "rewards/rejected": -1.382190465927124, "step": 1290 }, { "epoch": 0.66, "learning_rate": 4.331198191409193e-07, "logits/chosen": -2.506074905395508, "logits/rejected": -2.544355869293213, "logps/chosen": -321.9480285644531, "logps/rejected": -288.6408386230469, "loss": 0.536, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.04333578795194626, "rewards/margins": 1.2949002981185913, "rewards/rejected": -1.3382360935211182, "step": 1300 }, { "epoch": 0.66, "eval_logits/chosen": -2.6125967502593994, "eval_logits/rejected": -2.7006421089172363, "eval_logps/chosen": -280.16973876953125, "eval_logps/rejected": -271.69952392578125, "eval_loss": 0.5501763224601746, "eval_rewards/accuracies": 0.7270992398262024, "eval_rewards/chosen": -0.19192270934581757, "eval_rewards/margins": 1.1645148992538452, "eval_rewards/rejected": -1.3564376831054688, "eval_runtime": 296.9716, "eval_samples_per_second": 7.024, "eval_steps_per_second": 0.441, "step": 1300 }, { "epoch": 0.67, "learning_rate": 4.321778447626224e-07, "logits/chosen": -2.5924477577209473, "logits/rejected": -2.6131839752197266, "logps/chosen": -270.43280029296875, "logps/rejected": -248.0684051513672, "loss": 0.5595, "rewards/accuracies": 0.75, "rewards/chosen": -0.22290806472301483, "rewards/margins": 0.9343156814575195, "rewards/rejected": -1.1572238206863403, "step": 1310 }, { "epoch": 0.67, "learning_rate": 4.312358703843255e-07, "logits/chosen": -2.6144769191741943, "logits/rejected": -2.6292853355407715, "logps/chosen": -296.95001220703125, "logps/rejected": -249.1085968017578, "loss": 0.6089, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.16053441166877747, "rewards/margins": 0.939122200012207, "rewards/rejected": -0.7785876989364624, "step": 1320 }, { "epoch": 0.68, "learning_rate": 4.302938960060286e-07, "logits/chosen": -2.609095811843872, "logits/rejected": -2.566784381866455, "logps/chosen": -288.53204345703125, "logps/rejected": -279.9819641113281, "loss": 0.5419, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.10803576558828354, "rewards/margins": 1.2878508567810059, "rewards/rejected": -1.3958865404129028, "step": 1330 }, { "epoch": 0.68, "learning_rate": 4.293519216277317e-07, "logits/chosen": -2.6211318969726562, "logits/rejected": -2.663722276687622, "logps/chosen": -275.7018127441406, "logps/rejected": -259.57318115234375, "loss": 0.607, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.027861136943101883, "rewards/margins": 1.3817142248153687, "rewards/rejected": -1.4095754623413086, "step": 1340 }, { "epoch": 0.69, "learning_rate": 4.284099472494348e-07, "logits/chosen": -2.6988394260406494, "logits/rejected": -2.5893874168395996, "logps/chosen": -281.22705078125, "logps/rejected": -301.45855712890625, "loss": 0.5618, "rewards/accuracies": 0.625, "rewards/chosen": -0.31579267978668213, "rewards/margins": 0.5685089826583862, "rewards/rejected": -0.8843017816543579, "step": 1350 }, { "epoch": 0.69, "learning_rate": 4.2746797287113787e-07, "logits/chosen": -2.6224687099456787, "logits/rejected": -2.569154739379883, "logps/chosen": -249.50143432617188, "logps/rejected": -281.0968017578125, "loss": 0.5396, "rewards/accuracies": 0.75, "rewards/chosen": -0.27225735783576965, "rewards/margins": 1.1059520244598389, "rewards/rejected": -1.3782094717025757, "step": 1360 }, { "epoch": 0.7, "learning_rate": 4.2652599849284097e-07, "logits/chosen": -2.63877534866333, "logits/rejected": -2.773172616958618, "logps/chosen": -262.63165283203125, "logps/rejected": -245.67813110351562, "loss": 0.5318, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.320833295583725, "rewards/margins": 1.2154992818832397, "rewards/rejected": -1.536332607269287, "step": 1370 }, { "epoch": 0.7, "learning_rate": 4.2558402411454407e-07, "logits/chosen": -2.711285352706909, "logits/rejected": -2.621014356613159, "logps/chosen": -271.7641906738281, "logps/rejected": -252.8822784423828, "loss": 0.4981, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.1611357033252716, "rewards/margins": 1.2175296545028687, "rewards/rejected": -1.3786654472351074, "step": 1380 }, { "epoch": 0.71, "learning_rate": 4.2464204973624717e-07, "logits/chosen": -2.6401703357696533, "logits/rejected": -2.6722915172576904, "logps/chosen": -327.8500061035156, "logps/rejected": -291.4416809082031, "loss": 0.5434, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.25624486804008484, "rewards/margins": 1.0539482831954956, "rewards/rejected": -1.3101933002471924, "step": 1390 }, { "epoch": 0.71, "learning_rate": 4.2370007535795027e-07, "logits/chosen": -2.5574371814727783, "logits/rejected": -2.5931754112243652, "logps/chosen": -305.538818359375, "logps/rejected": -279.69171142578125, "loss": 0.6544, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.21988415718078613, "rewards/margins": 1.070007562637329, "rewards/rejected": -1.2898919582366943, "step": 1400 }, { "epoch": 0.71, "eval_logits/chosen": -2.604628324508667, "eval_logits/rejected": -2.697035789489746, "eval_logps/chosen": -282.0076904296875, "eval_logps/rejected": -274.89263916015625, "eval_loss": 0.5308603644371033, "eval_rewards/accuracies": 0.7080152630805969, "eval_rewards/chosen": -0.37571972608566284, "eval_rewards/margins": 1.3000292778015137, "eval_rewards/rejected": -1.6757489442825317, "eval_runtime": 301.8954, "eval_samples_per_second": 6.91, "eval_steps_per_second": 0.434, "step": 1400 }, { "epoch": 0.72, "learning_rate": 4.227581009796533e-07, "logits/chosen": -2.462588310241699, "logits/rejected": -2.500105381011963, "logps/chosen": -288.41705322265625, "logps/rejected": -258.5611877441406, "loss": 0.5735, "rewards/accuracies": 0.8125, "rewards/chosen": -0.04483962059020996, "rewards/margins": 2.0927834510803223, "rewards/rejected": -2.137622833251953, "step": 1410 }, { "epoch": 0.72, "learning_rate": 4.218161266013564e-07, "logits/chosen": -2.574207305908203, "logits/rejected": -2.731383800506592, "logps/chosen": -269.80035400390625, "logps/rejected": -245.1697235107422, "loss": 0.5215, "rewards/accuracies": 0.6875, "rewards/chosen": -0.26280277967453003, "rewards/margins": 1.3922348022460938, "rewards/rejected": -1.655037522315979, "step": 1420 }, { "epoch": 0.73, "learning_rate": 4.208741522230595e-07, "logits/chosen": -2.615461587905884, "logits/rejected": -2.531139612197876, "logps/chosen": -272.6134338378906, "logps/rejected": -255.28408813476562, "loss": 0.7724, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.5035494565963745, "rewards/margins": 0.5120665431022644, "rewards/rejected": -1.0156160593032837, "step": 1430 }, { "epoch": 0.73, "learning_rate": 4.1993217784476257e-07, "logits/chosen": -2.714182138442993, "logits/rejected": -2.6374399662017822, "logps/chosen": -275.6734924316406, "logps/rejected": -302.3877258300781, "loss": 0.5738, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.4428681433200836, "rewards/margins": 1.1528266668319702, "rewards/rejected": -1.5956947803497314, "step": 1440 }, { "epoch": 0.74, "learning_rate": 4.1899020346646567e-07, "logits/chosen": -2.605117082595825, "logits/rejected": -2.594167947769165, "logps/chosen": -280.275390625, "logps/rejected": -249.83792114257812, "loss": 0.6099, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": 0.11132706701755524, "rewards/margins": 1.3764369487762451, "rewards/rejected": -1.265109896659851, "step": 1450 }, { "epoch": 0.74, "learning_rate": 4.1804822908816877e-07, "logits/chosen": -2.6344046592712402, "logits/rejected": -2.5441765785217285, "logps/chosen": -287.36163330078125, "logps/rejected": -310.89288330078125, "loss": 0.6445, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4139525294303894, "rewards/margins": 0.49868473410606384, "rewards/rejected": -0.9126373529434204, "step": 1460 }, { "epoch": 0.75, "learning_rate": 4.1710625470987187e-07, "logits/chosen": -2.5128302574157715, "logits/rejected": -2.5804567337036133, "logps/chosen": -294.478515625, "logps/rejected": -288.2181091308594, "loss": 0.4808, "rewards/accuracies": 0.8125, "rewards/chosen": -0.4093014597892761, "rewards/margins": 1.3569328784942627, "rewards/rejected": -1.7662343978881836, "step": 1470 }, { "epoch": 0.75, "learning_rate": 4.1616428033157497e-07, "logits/chosen": -2.643336772918701, "logits/rejected": -2.68320369720459, "logps/chosen": -257.29913330078125, "logps/rejected": -263.56597900390625, "loss": 0.5644, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.8031185269355774, "rewards/margins": 1.1325935125350952, "rewards/rejected": -1.9357120990753174, "step": 1480 }, { "epoch": 0.76, "learning_rate": 4.1522230595327807e-07, "logits/chosen": -2.6495070457458496, "logits/rejected": -2.5842490196228027, "logps/chosen": -244.33584594726562, "logps/rejected": -298.60284423828125, "loss": 0.6466, "rewards/accuracies": 0.8125, "rewards/chosen": -0.39945897459983826, "rewards/margins": 1.1669641733169556, "rewards/rejected": -1.5664231777191162, "step": 1490 }, { "epoch": 0.76, "learning_rate": 4.1428033157498117e-07, "logits/chosen": -2.4568417072296143, "logits/rejected": -2.472951889038086, "logps/chosen": -264.20794677734375, "logps/rejected": -261.67279052734375, "loss": 0.5697, "rewards/accuracies": 0.75, "rewards/chosen": -0.658267617225647, "rewards/margins": 0.9868196249008179, "rewards/rejected": -1.6450872421264648, "step": 1500 }, { "epoch": 0.76, "eval_logits/chosen": -2.6730244159698486, "eval_logits/rejected": -2.765580654144287, "eval_logps/chosen": -280.74395751953125, "eval_logps/rejected": -272.92584228515625, "eval_loss": 0.5662475228309631, "eval_rewards/accuracies": 0.7156488299369812, "eval_rewards/chosen": -0.2493485063314438, "eval_rewards/margins": 1.229722023010254, "eval_rewards/rejected": -1.4790705442428589, "eval_runtime": 297.049, "eval_samples_per_second": 7.022, "eval_steps_per_second": 0.441, "step": 1500 }, { "epoch": 0.77, "learning_rate": 4.1333835719668427e-07, "logits/chosen": -2.618648052215576, "logits/rejected": -2.575504779815674, "logps/chosen": -279.4701232910156, "logps/rejected": -356.17584228515625, "loss": 0.5529, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.46867623925209045, "rewards/margins": 0.8481055498123169, "rewards/rejected": -1.316781759262085, "step": 1510 }, { "epoch": 0.77, "learning_rate": 4.1239638281838737e-07, "logits/chosen": -2.637680768966675, "logits/rejected": -2.670579433441162, "logps/chosen": -286.6285400390625, "logps/rejected": -271.7418518066406, "loss": 0.4942, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.38739800453186035, "rewards/margins": 1.265276551246643, "rewards/rejected": -1.6526744365692139, "step": 1520 }, { "epoch": 0.78, "learning_rate": 4.1145440844009036e-07, "logits/chosen": -2.6004691123962402, "logits/rejected": -2.691800594329834, "logps/chosen": -285.45989990234375, "logps/rejected": -255.03042602539062, "loss": 0.5876, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.542041540145874, "rewards/margins": 1.1405549049377441, "rewards/rejected": -1.6825965642929077, "step": 1530 }, { "epoch": 0.78, "learning_rate": 4.1051243406179347e-07, "logits/chosen": -2.543489933013916, "logits/rejected": -2.4293406009674072, "logps/chosen": -289.1936340332031, "logps/rejected": -281.3624572753906, "loss": 0.527, "rewards/accuracies": 0.75, "rewards/chosen": 0.05935020372271538, "rewards/margins": 1.4894200563430786, "rewards/rejected": -1.4300696849822998, "step": 1540 }, { "epoch": 0.79, "learning_rate": 4.0957045968349657e-07, "logits/chosen": -2.6180763244628906, "logits/rejected": -2.5505731105804443, "logps/chosen": -280.2388610839844, "logps/rejected": -261.02252197265625, "loss": 0.5261, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.565861165523529, "rewards/margins": 0.8470433354377747, "rewards/rejected": -1.4129045009613037, "step": 1550 }, { "epoch": 0.79, "learning_rate": 4.0862848530519967e-07, "logits/chosen": -2.5736451148986816, "logits/rejected": -2.4275200366973877, "logps/chosen": -265.53271484375, "logps/rejected": -287.5812683105469, "loss": 0.5627, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.08069765567779541, "rewards/margins": 1.4943009614944458, "rewards/rejected": -1.5749986171722412, "step": 1560 }, { "epoch": 0.8, "learning_rate": 4.0768651092690277e-07, "logits/chosen": -2.513634204864502, "logits/rejected": -2.589459180831909, "logps/chosen": -285.5072937011719, "logps/rejected": -252.2686004638672, "loss": 0.6622, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5319138765335083, "rewards/margins": 0.9326924085617065, "rewards/rejected": -1.4646062850952148, "step": 1570 }, { "epoch": 0.8, "learning_rate": 4.0674453654860587e-07, "logits/chosen": -2.482975959777832, "logits/rejected": -2.4376277923583984, "logps/chosen": -229.789306640625, "logps/rejected": -236.7023162841797, "loss": 0.545, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3413255214691162, "rewards/margins": 1.2952824831008911, "rewards/rejected": -1.6366078853607178, "step": 1580 }, { "epoch": 0.81, "learning_rate": 4.0580256217030897e-07, "logits/chosen": -2.4523279666900635, "logits/rejected": -2.4195103645324707, "logps/chosen": -303.5387268066406, "logps/rejected": -284.21075439453125, "loss": 0.5395, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3342668116092682, "rewards/margins": 0.9444997906684875, "rewards/rejected": -1.278766393661499, "step": 1590 }, { "epoch": 0.81, "learning_rate": 4.0486058779201207e-07, "logits/chosen": -2.4088969230651855, "logits/rejected": -2.5234007835388184, "logps/chosen": -274.2557067871094, "logps/rejected": -271.4913330078125, "loss": 0.5538, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.473999559879303, "rewards/margins": 1.0612614154815674, "rewards/rejected": -1.5352609157562256, "step": 1600 }, { "epoch": 0.81, "eval_logits/chosen": -2.594578981399536, "eval_logits/rejected": -2.6933910846710205, "eval_logps/chosen": -282.90802001953125, "eval_logps/rejected": -274.9263916015625, "eval_loss": 0.5325531363487244, "eval_rewards/accuracies": 0.7213740348815918, "eval_rewards/chosen": -0.4657546877861023, "eval_rewards/margins": 1.21336829662323, "eval_rewards/rejected": -1.6791231632232666, "eval_runtime": 302.0446, "eval_samples_per_second": 6.906, "eval_steps_per_second": 0.434, "step": 1600 }, { "epoch": 0.82, "learning_rate": 4.039186134137151e-07, "logits/chosen": -2.477017641067505, "logits/rejected": -2.512791156768799, "logps/chosen": -364.48138427734375, "logps/rejected": -329.79571533203125, "loss": 0.5225, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.33335477113723755, "rewards/margins": 1.133012056350708, "rewards/rejected": -1.4663667678833008, "step": 1610 }, { "epoch": 0.82, "learning_rate": 4.029766390354182e-07, "logits/chosen": -2.339780569076538, "logits/rejected": -2.2996575832366943, "logps/chosen": -254.8621368408203, "logps/rejected": -305.05218505859375, "loss": 0.5218, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.580379843711853, "rewards/margins": 1.2120063304901123, "rewards/rejected": -1.7923862934112549, "step": 1620 }, { "epoch": 0.83, "learning_rate": 4.020346646571213e-07, "logits/chosen": -2.46311092376709, "logits/rejected": -2.4910902976989746, "logps/chosen": -253.5718231201172, "logps/rejected": -241.31729125976562, "loss": 0.4909, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -0.30813589692115784, "rewards/margins": 0.8881072998046875, "rewards/rejected": -1.196243166923523, "step": 1630 }, { "epoch": 0.83, "learning_rate": 4.010926902788244e-07, "logits/chosen": -2.437481641769409, "logits/rejected": -2.487056016921997, "logps/chosen": -298.3309326171875, "logps/rejected": -307.5037536621094, "loss": 0.6464, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.40473002195358276, "rewards/margins": 1.2244799137115479, "rewards/rejected": -1.6292098760604858, "step": 1640 }, { "epoch": 0.84, "learning_rate": 4.001507159005275e-07, "logits/chosen": -2.4725682735443115, "logits/rejected": -2.531104564666748, "logps/chosen": -257.1470642089844, "logps/rejected": -232.2982635498047, "loss": 0.517, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.5268018245697021, "rewards/margins": 0.9982993006706238, "rewards/rejected": -1.5251011848449707, "step": 1650 }, { "epoch": 0.84, "learning_rate": 3.9920874152223057e-07, "logits/chosen": -2.416590929031372, "logits/rejected": -2.4863741397857666, "logps/chosen": -302.4273986816406, "logps/rejected": -274.00994873046875, "loss": 0.6356, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.31293386220932007, "rewards/margins": 1.3280731439590454, "rewards/rejected": -1.6410068273544312, "step": 1660 }, { "epoch": 0.85, "learning_rate": 3.9826676714393367e-07, "logits/chosen": -2.34822940826416, "logits/rejected": -2.3350391387939453, "logps/chosen": -237.3314208984375, "logps/rejected": -211.2093505859375, "loss": 0.6246, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.46049946546554565, "rewards/margins": 1.1196852922439575, "rewards/rejected": -1.580184817314148, "step": 1670 }, { "epoch": 0.85, "learning_rate": 3.9732479276563677e-07, "logits/chosen": -2.4428534507751465, "logits/rejected": -2.5402212142944336, "logps/chosen": -254.8546600341797, "logps/rejected": -236.1012725830078, "loss": 0.5941, "rewards/accuracies": 0.637499988079071, "rewards/chosen": -0.17953288555145264, "rewards/margins": 0.8878132700920105, "rewards/rejected": -1.0673460960388184, "step": 1680 }, { "epoch": 0.86, "learning_rate": 3.963828183873398e-07, "logits/chosen": -2.4770541191101074, "logits/rejected": -2.5738840103149414, "logps/chosen": -308.5177917480469, "logps/rejected": -253.67648315429688, "loss": 0.4828, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.23972062766551971, "rewards/margins": 1.4527523517608643, "rewards/rejected": -1.6924731731414795, "step": 1690 }, { "epoch": 0.86, "learning_rate": 3.954408440090429e-07, "logits/chosen": -2.4382712841033936, "logits/rejected": -2.453115940093994, "logps/chosen": -322.01043701171875, "logps/rejected": -257.23638916015625, "loss": 0.551, "rewards/accuracies": 0.625, "rewards/chosen": -0.4521487355232239, "rewards/margins": 0.9814785718917847, "rewards/rejected": -1.4336273670196533, "step": 1700 }, { "epoch": 0.86, "eval_logits/chosen": -2.556736469268799, "eval_logits/rejected": -2.653500556945801, "eval_logps/chosen": -284.46728515625, "eval_logps/rejected": -277.02783203125, "eval_loss": 0.5257931351661682, "eval_rewards/accuracies": 0.7137404680252075, "eval_rewards/chosen": -0.6216804385185242, "eval_rewards/margins": 1.26758873462677, "eval_rewards/rejected": -1.8892688751220703, "eval_runtime": 297.4077, "eval_samples_per_second": 7.014, "eval_steps_per_second": 0.44, "step": 1700 }, { "epoch": 0.87, "learning_rate": 3.94498869630746e-07, "logits/chosen": -2.4545798301696777, "logits/rejected": -2.398289680480957, "logps/chosen": -296.9984436035156, "logps/rejected": -284.75982666015625, "loss": 0.542, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.5290297269821167, "rewards/margins": 1.3974144458770752, "rewards/rejected": -1.9264440536499023, "step": 1710 }, { "epoch": 0.87, "learning_rate": 3.935568952524491e-07, "logits/chosen": -2.552342176437378, "logits/rejected": -2.430562734603882, "logps/chosen": -281.8885803222656, "logps/rejected": -275.8494873046875, "loss": 0.488, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.33996737003326416, "rewards/margins": 1.4860076904296875, "rewards/rejected": -1.8259750604629517, "step": 1720 }, { "epoch": 0.88, "learning_rate": 3.926149208741522e-07, "logits/chosen": -2.451730251312256, "logits/rejected": -2.521273136138916, "logps/chosen": -295.59625244140625, "logps/rejected": -254.90811157226562, "loss": 0.5294, "rewards/accuracies": 0.75, "rewards/chosen": -0.6222850680351257, "rewards/margins": 1.8649215698242188, "rewards/rejected": -2.48720645904541, "step": 1730 }, { "epoch": 0.89, "learning_rate": 3.916729464958553e-07, "logits/chosen": -2.4977612495422363, "logits/rejected": -2.6330878734588623, "logps/chosen": -350.9913330078125, "logps/rejected": -323.6842346191406, "loss": 0.5116, "rewards/accuracies": 0.7875000238418579, "rewards/chosen": -0.6430081129074097, "rewards/margins": 1.4963001012802124, "rewards/rejected": -2.139308452606201, "step": 1740 }, { "epoch": 0.89, "learning_rate": 3.907309721175584e-07, "logits/chosen": -2.342928171157837, "logits/rejected": -2.4169085025787354, "logps/chosen": -309.93853759765625, "logps/rejected": -251.88815307617188, "loss": 0.6657, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.6915287375450134, "rewards/margins": 1.0208818912506104, "rewards/rejected": -1.7124109268188477, "step": 1750 }, { "epoch": 0.9, "learning_rate": 3.897889977392615e-07, "logits/chosen": -2.515695810317993, "logits/rejected": -2.5377676486968994, "logps/chosen": -315.3117980957031, "logps/rejected": -284.48065185546875, "loss": 0.6156, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.5466021299362183, "rewards/margins": 1.2473156452178955, "rewards/rejected": -1.7939178943634033, "step": 1760 }, { "epoch": 0.9, "learning_rate": 3.8884702336096456e-07, "logits/chosen": -2.5526938438415527, "logits/rejected": -2.5009982585906982, "logps/chosen": -298.90252685546875, "logps/rejected": -268.0218200683594, "loss": 0.5509, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -0.27220678329467773, "rewards/margins": 0.9303116798400879, "rewards/rejected": -1.202518343925476, "step": 1770 }, { "epoch": 0.91, "learning_rate": 3.879050489826676e-07, "logits/chosen": -2.4461004734039307, "logits/rejected": -2.4862380027770996, "logps/chosen": -277.4447937011719, "logps/rejected": -279.57794189453125, "loss": 0.5103, "rewards/accuracies": 0.800000011920929, "rewards/chosen": -0.2458111047744751, "rewards/margins": 1.5718352794647217, "rewards/rejected": -1.8176462650299072, "step": 1780 }, { "epoch": 0.91, "learning_rate": 3.869630746043707e-07, "logits/chosen": -2.477055549621582, "logits/rejected": -2.5192625522613525, "logps/chosen": -304.3697204589844, "logps/rejected": -250.54501342773438, "loss": 0.5216, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.6367429494857788, "rewards/margins": 1.5008876323699951, "rewards/rejected": -2.1376309394836426, "step": 1790 }, { "epoch": 0.92, "learning_rate": 3.860211002260738e-07, "logits/chosen": -2.5067267417907715, "logits/rejected": -2.437110662460327, "logps/chosen": -306.4678039550781, "logps/rejected": -322.9219055175781, "loss": 0.5708, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.3336077034473419, "rewards/margins": 1.3294265270233154, "rewards/rejected": -1.663034200668335, "step": 1800 }, { "epoch": 0.92, "eval_logits/chosen": -2.556394338607788, "eval_logits/rejected": -2.627877950668335, "eval_logps/chosen": -283.4186096191406, "eval_logps/rejected": -277.09735107421875, "eval_loss": 0.5639130473136902, "eval_rewards/accuracies": 0.7213740348815918, "eval_rewards/chosen": -0.516810953617096, "eval_rewards/margins": 1.379412055015564, "eval_rewards/rejected": -1.8962229490280151, "eval_runtime": 301.7159, "eval_samples_per_second": 6.914, "eval_steps_per_second": 0.434, "step": 1800 }, { "epoch": 0.92, "learning_rate": 3.850791258477769e-07, "logits/chosen": -2.4557442665100098, "logits/rejected": -2.37328839302063, "logps/chosen": -285.089111328125, "logps/rejected": -283.9366760253906, "loss": 0.6032, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6370270848274231, "rewards/margins": 0.6979910135269165, "rewards/rejected": -1.3350181579589844, "step": 1810 }, { "epoch": 0.93, "learning_rate": 3.8413715146948e-07, "logits/chosen": -2.4120583534240723, "logits/rejected": -2.535059690475464, "logps/chosen": -274.90399169921875, "logps/rejected": -256.09332275390625, "loss": 0.5635, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.22262752056121826, "rewards/margins": 1.5661967992782593, "rewards/rejected": -1.788824439048767, "step": 1820 }, { "epoch": 0.93, "learning_rate": 3.831951770911831e-07, "logits/chosen": -2.4969706535339355, "logits/rejected": -2.5374817848205566, "logps/chosen": -242.24667358398438, "logps/rejected": -327.6692810058594, "loss": 0.6341, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.2129790335893631, "rewards/margins": 1.3899933099746704, "rewards/rejected": -1.6029722690582275, "step": 1830 }, { "epoch": 0.94, "learning_rate": 3.822532027128862e-07, "logits/chosen": -2.522143840789795, "logits/rejected": -2.6315150260925293, "logps/chosen": -242.66592407226562, "logps/rejected": -228.8377227783203, "loss": 0.4791, "rewards/accuracies": 0.7124999761581421, "rewards/chosen": -0.440913587808609, "rewards/margins": 1.237001657485962, "rewards/rejected": -1.6779152154922485, "step": 1840 }, { "epoch": 0.94, "learning_rate": 3.8131122833458926e-07, "logits/chosen": -2.5416033267974854, "logits/rejected": -2.469377040863037, "logps/chosen": -265.96160888671875, "logps/rejected": -278.81463623046875, "loss": 0.5192, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.5825539827346802, "rewards/margins": 1.0453699827194214, "rewards/rejected": -1.6279242038726807, "step": 1850 }, { "epoch": 0.95, "learning_rate": 3.8036925395629236e-07, "logits/chosen": -2.512683391571045, "logits/rejected": -2.4817757606506348, "logps/chosen": -272.36614990234375, "logps/rejected": -247.10476684570312, "loss": 0.5046, "rewards/accuracies": 0.6875, "rewards/chosen": -0.6125789880752563, "rewards/margins": 0.970922589302063, "rewards/rejected": -1.5835015773773193, "step": 1860 }, { "epoch": 0.95, "learning_rate": 3.7942727957799546e-07, "logits/chosen": -2.5154783725738525, "logits/rejected": -2.5012450218200684, "logps/chosen": -282.32952880859375, "logps/rejected": -270.69427490234375, "loss": 0.5542, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.6044799089431763, "rewards/margins": 1.4208053350448608, "rewards/rejected": -2.025285243988037, "step": 1870 }, { "epoch": 0.96, "learning_rate": 3.7848530519969856e-07, "logits/chosen": -2.4879260063171387, "logits/rejected": -2.457428455352783, "logps/chosen": -272.4909973144531, "logps/rejected": -241.22433471679688, "loss": 0.636, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.6453158855438232, "rewards/margins": 1.0200417041778564, "rewards/rejected": -1.6653575897216797, "step": 1880 }, { "epoch": 0.96, "learning_rate": 3.7754333082140166e-07, "logits/chosen": -2.5128307342529297, "logits/rejected": -2.5684497356414795, "logps/chosen": -336.8165588378906, "logps/rejected": -291.71453857421875, "loss": 0.5249, "rewards/accuracies": 0.6875, "rewards/chosen": -0.2176159918308258, "rewards/margins": 1.229262351989746, "rewards/rejected": -1.4468783140182495, "step": 1890 }, { "epoch": 0.97, "learning_rate": 3.7660135644310477e-07, "logits/chosen": -2.528883695602417, "logits/rejected": -2.5390408039093018, "logps/chosen": -289.835205078125, "logps/rejected": -272.76654052734375, "loss": 0.5344, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.27965977787971497, "rewards/margins": 1.2389864921569824, "rewards/rejected": -1.518646240234375, "step": 1900 }, { "epoch": 0.97, "eval_logits/chosen": -2.599806547164917, "eval_logits/rejected": -2.6680245399475098, "eval_logps/chosen": -282.03875732421875, "eval_logps/rejected": -276.2930908203125, "eval_loss": 0.5602597594261169, "eval_rewards/accuracies": 0.7270992398262024, "eval_rewards/chosen": -0.37883004546165466, "eval_rewards/margins": 1.436964750289917, "eval_rewards/rejected": -1.8157949447631836, "eval_runtime": 297.1212, "eval_samples_per_second": 7.021, "eval_steps_per_second": 0.441, "step": 1900 }, { "epoch": 0.97, "learning_rate": 3.756593820648078e-07, "logits/chosen": -2.472036361694336, "logits/rejected": -2.496267318725586, "logps/chosen": -292.5258483886719, "logps/rejected": -290.6319580078125, "loss": 0.5371, "rewards/accuracies": 0.737500011920929, "rewards/chosen": -0.3897474408149719, "rewards/margins": 1.1748746633529663, "rewards/rejected": -1.564622163772583, "step": 1910 }, { "epoch": 0.98, "learning_rate": 3.747174076865109e-07, "logits/chosen": -2.4090096950531006, "logits/rejected": -2.475609064102173, "logps/chosen": -296.208740234375, "logps/rejected": -296.7442626953125, "loss": 0.6168, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -0.4877321720123291, "rewards/margins": 1.1195850372314453, "rewards/rejected": -1.6073172092437744, "step": 1920 }, { "epoch": 0.98, "learning_rate": 3.7377543330821396e-07, "logits/chosen": -2.420328140258789, "logits/rejected": -2.475395679473877, "logps/chosen": -310.14166259765625, "logps/rejected": -292.9002685546875, "loss": 0.5162, "rewards/accuracies": 0.762499988079071, "rewards/chosen": -0.709976077079773, "rewards/margins": 1.334611415863037, "rewards/rejected": -2.0445871353149414, "step": 1930 }, { "epoch": 0.99, "learning_rate": 3.7283345892991706e-07, "logits/chosen": -2.4980602264404297, "logits/rejected": -2.55902361869812, "logps/chosen": -336.90155029296875, "logps/rejected": -322.50677490234375, "loss": 0.5316, "rewards/accuracies": 0.7749999761581421, "rewards/chosen": -0.526197075843811, "rewards/margins": 1.3391772508621216, "rewards/rejected": -1.865374207496643, "step": 1940 }, { "epoch": 0.99, "learning_rate": 3.7189148455162016e-07, "logits/chosen": -2.44006609916687, "logits/rejected": -2.403160333633423, "logps/chosen": -248.7161865234375, "logps/rejected": -256.5013732910156, "loss": 0.5521, "rewards/accuracies": 0.6499999761581421, "rewards/chosen": -0.4066084027290344, "rewards/margins": 1.3497653007507324, "rewards/rejected": -1.756373643875122, "step": 1950 }, { "epoch": 1.0, "learning_rate": 3.7094951017332326e-07, "logits/chosen": -2.5976829528808594, "logits/rejected": -2.587454319000244, "logps/chosen": -296.61785888671875, "logps/rejected": -235.3370819091797, "loss": 0.5499, "rewards/accuracies": 0.7250000238418579, "rewards/chosen": -0.2806735038757324, "rewards/margins": 1.0782190561294556, "rewards/rejected": -1.358892560005188, "step": 1960 }, { "epoch": 1.0, "learning_rate": 3.7000753579502636e-07, "logits/chosen": -2.4377546310424805, "logits/rejected": -2.4840946197509766, "logps/chosen": -256.7413330078125, "logps/rejected": -288.8653869628906, "loss": 0.417, "rewards/accuracies": 0.824999988079071, "rewards/chosen": 0.8871258497238159, "rewards/margins": 3.1716504096984863, "rewards/rejected": -2.284524440765381, "step": 1970 }, { "epoch": 1.01, "learning_rate": 3.6906556141672946e-07, "logits/chosen": -2.5575382709503174, "logits/rejected": -2.507671356201172, "logps/chosen": -298.3926086425781, "logps/rejected": -276.83197021484375, "loss": 0.104, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.7850697040557861, "rewards/margins": 4.537423610687256, "rewards/rejected": -2.7523539066314697, "step": 1980 }, { "epoch": 1.01, "learning_rate": 3.6812358703843256e-07, "logits/chosen": -2.5167181491851807, "logits/rejected": -2.4053378105163574, "logps/chosen": -250.5689239501953, "logps/rejected": -310.2710876464844, "loss": 0.102, "rewards/accuracies": 0.9375, "rewards/chosen": 1.1603522300720215, "rewards/margins": 4.360447883605957, "rewards/rejected": -3.2000954151153564, "step": 1990 }, { "epoch": 1.02, "learning_rate": 3.6718161266013566e-07, "logits/chosen": -2.483384609222412, "logits/rejected": -2.6710855960845947, "logps/chosen": -245.9284210205078, "logps/rejected": -308.3492431640625, "loss": 0.0925, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.2592953443527222, "rewards/margins": 5.212327480316162, "rewards/rejected": -3.9530320167541504, "step": 2000 }, { "epoch": 1.02, "eval_logits/chosen": -2.5824897289276123, "eval_logits/rejected": -2.6520135402679443, "eval_logps/chosen": -282.8787841796875, "eval_logps/rejected": -279.4119567871094, "eval_loss": 0.5587373375892639, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -0.46283194422721863, "eval_rewards/margins": 1.664847493171692, "eval_rewards/rejected": -2.1276795864105225, "eval_runtime": 297.407, "eval_samples_per_second": 7.014, "eval_steps_per_second": 0.44, "step": 2000 }, { "epoch": 1.02, "learning_rate": 3.662396382818387e-07, "logits/chosen": -2.466040849685669, "logits/rejected": -2.4850330352783203, "logps/chosen": -285.1557312011719, "logps/rejected": -314.3226623535156, "loss": 0.1259, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.797186017036438, "rewards/margins": 4.633586883544922, "rewards/rejected": -3.8364005088806152, "step": 2010 }, { "epoch": 1.03, "learning_rate": 3.652976639035418e-07, "logits/chosen": -2.433439254760742, "logits/rejected": -2.4428954124450684, "logps/chosen": -354.7630920410156, "logps/rejected": -337.88360595703125, "loss": 0.0888, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.6825096607208252, "rewards/margins": 6.562380313873291, "rewards/rejected": -4.879870891571045, "step": 2020 }, { "epoch": 1.03, "learning_rate": 3.6435568952524486e-07, "logits/chosen": -2.388587474822998, "logits/rejected": -2.4829463958740234, "logps/chosen": -251.0550537109375, "logps/rejected": -301.1805419921875, "loss": 0.0645, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.6248880624771118, "rewards/margins": 6.260207176208496, "rewards/rejected": -4.635318279266357, "step": 2030 }, { "epoch": 1.04, "learning_rate": 3.6341371514694796e-07, "logits/chosen": -2.5493760108947754, "logits/rejected": -2.528698682785034, "logps/chosen": -235.96932983398438, "logps/rejected": -289.5627746582031, "loss": 0.0544, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5340107679367065, "rewards/margins": 4.596531391143799, "rewards/rejected": -4.0625200271606445, "step": 2040 }, { "epoch": 1.04, "learning_rate": 3.6247174076865106e-07, "logits/chosen": -2.3305134773254395, "logits/rejected": -2.384181499481201, "logps/chosen": -265.8026428222656, "logps/rejected": -303.68646240234375, "loss": 0.1032, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.6562206745147705, "rewards/margins": 5.506114959716797, "rewards/rejected": -3.8498940467834473, "step": 2050 }, { "epoch": 1.05, "learning_rate": 3.6152976639035416e-07, "logits/chosen": -2.49692440032959, "logits/rejected": -2.456477403640747, "logps/chosen": -240.740966796875, "logps/rejected": -269.8097229003906, "loss": 0.1098, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8934639096260071, "rewards/margins": 4.806546688079834, "rewards/rejected": -3.9130825996398926, "step": 2060 }, { "epoch": 1.05, "learning_rate": 3.6058779201205726e-07, "logits/chosen": -2.4669885635375977, "logits/rejected": -2.393951892852783, "logps/chosen": -281.805419921875, "logps/rejected": -328.47344970703125, "loss": 0.0874, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.6799099445343018, "rewards/margins": 6.671219825744629, "rewards/rejected": -4.9913105964660645, "step": 2070 }, { "epoch": 1.06, "learning_rate": 3.5964581763376036e-07, "logits/chosen": -2.4482600688934326, "logits/rejected": -2.403702974319458, "logps/chosen": -224.82803344726562, "logps/rejected": -269.99334716796875, "loss": 0.0894, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.2562118768692017, "rewards/margins": 5.40142822265625, "rewards/rejected": -4.145216464996338, "step": 2080 }, { "epoch": 1.06, "learning_rate": 3.587038432554634e-07, "logits/chosen": -2.332878828048706, "logits/rejected": -2.235696315765381, "logps/chosen": -278.15740966796875, "logps/rejected": -327.5746154785156, "loss": 0.0823, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.4940156936645508, "rewards/margins": 5.760683536529541, "rewards/rejected": -4.26666784286499, "step": 2090 }, { "epoch": 1.07, "learning_rate": 3.577618688771665e-07, "logits/chosen": -2.2535722255706787, "logits/rejected": -2.3259873390197754, "logps/chosen": -257.2765197753906, "logps/rejected": -300.7965087890625, "loss": 0.112, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.2972657680511475, "rewards/margins": 5.322561740875244, "rewards/rejected": -4.025295257568359, "step": 2100 }, { "epoch": 1.07, "eval_logits/chosen": -2.50936222076416, "eval_logits/rejected": -2.5722339153289795, "eval_logps/chosen": -285.0382995605469, "eval_logps/rejected": -284.0433349609375, "eval_loss": 0.5730677843093872, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": -0.6787797808647156, "eval_rewards/margins": 1.9120395183563232, "eval_rewards/rejected": -2.5908188819885254, "eval_runtime": 302.0143, "eval_samples_per_second": 6.907, "eval_steps_per_second": 0.434, "step": 2100 }, { "epoch": 1.07, "learning_rate": 3.568198944988696e-07, "logits/chosen": -2.372765064239502, "logits/rejected": -2.356163501739502, "logps/chosen": -244.1463623046875, "logps/rejected": -309.8556823730469, "loss": 0.0817, "rewards/accuracies": 0.9375, "rewards/chosen": 0.721108078956604, "rewards/margins": 5.2660932540893555, "rewards/rejected": -4.544985294342041, "step": 2110 }, { "epoch": 1.08, "learning_rate": 3.558779201205727e-07, "logits/chosen": -2.3378894329071045, "logits/rejected": -2.524498462677002, "logps/chosen": -239.94595336914062, "logps/rejected": -288.84918212890625, "loss": 0.0831, "rewards/accuracies": 1.0, "rewards/chosen": 1.3765751123428345, "rewards/margins": 6.299831867218018, "rewards/rejected": -4.9232563972473145, "step": 2120 }, { "epoch": 1.08, "learning_rate": 3.549359457422758e-07, "logits/chosen": -2.316340923309326, "logits/rejected": -2.3115782737731934, "logps/chosen": -269.43890380859375, "logps/rejected": -307.49481201171875, "loss": 0.0837, "rewards/accuracies": 0.9375, "rewards/chosen": 1.7511188983917236, "rewards/margins": 5.8343915939331055, "rewards/rejected": -4.083272933959961, "step": 2130 }, { "epoch": 1.09, "learning_rate": 3.539939713639789e-07, "logits/chosen": -2.3972020149230957, "logits/rejected": -2.362435817718506, "logps/chosen": -242.6447296142578, "logps/rejected": -309.8039245605469, "loss": 0.0857, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.9248175621032715, "rewards/margins": 5.774969577789307, "rewards/rejected": -4.850151538848877, "step": 2140 }, { "epoch": 1.09, "learning_rate": 3.53051996985682e-07, "logits/chosen": -2.3665599822998047, "logits/rejected": -2.4754738807678223, "logps/chosen": -251.0576171875, "logps/rejected": -287.749267578125, "loss": 0.0704, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6899253129959106, "rewards/margins": 5.328171253204346, "rewards/rejected": -4.638245582580566, "step": 2150 }, { "epoch": 1.1, "learning_rate": 3.5211002260738506e-07, "logits/chosen": -2.524543046951294, "logits/rejected": -2.5408785343170166, "logps/chosen": -281.8375244140625, "logps/rejected": -346.6162414550781, "loss": 0.0891, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.8872039914131165, "rewards/margins": 6.403290748596191, "rewards/rejected": -5.516086578369141, "step": 2160 }, { "epoch": 1.1, "learning_rate": 3.511680482290881e-07, "logits/chosen": -2.4190125465393066, "logits/rejected": -2.5307183265686035, "logps/chosen": -280.59991455078125, "logps/rejected": -276.5904846191406, "loss": 0.0887, "rewards/accuracies": 0.8999999761581421, "rewards/chosen": 0.7298386096954346, "rewards/margins": 4.927010536193848, "rewards/rejected": -4.197172164916992, "step": 2170 }, { "epoch": 1.11, "learning_rate": 3.502260738507912e-07, "logits/chosen": -2.5079550743103027, "logits/rejected": -2.544100046157837, "logps/chosen": -240.7677001953125, "logps/rejected": -281.4288024902344, "loss": 0.0963, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6959851980209351, "rewards/margins": 4.724411487579346, "rewards/rejected": -4.028426170349121, "step": 2180 }, { "epoch": 1.11, "learning_rate": 3.492840994724943e-07, "logits/chosen": -2.4321839809417725, "logits/rejected": -2.495434284210205, "logps/chosen": -269.27874755859375, "logps/rejected": -292.94769287109375, "loss": 0.0849, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8701983690261841, "rewards/margins": 4.984339714050293, "rewards/rejected": -4.11414098739624, "step": 2190 }, { "epoch": 1.12, "learning_rate": 3.483421250941974e-07, "logits/chosen": -2.420361280441284, "logits/rejected": -2.4040703773498535, "logps/chosen": -263.71343994140625, "logps/rejected": -303.37493896484375, "loss": 0.0539, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6098963618278503, "rewards/margins": 5.93268346786499, "rewards/rejected": -5.322787284851074, "step": 2200 }, { "epoch": 1.12, "eval_logits/chosen": -2.5303497314453125, "eval_logits/rejected": -2.5936505794525146, "eval_logps/chosen": -289.07073974609375, "eval_logps/rejected": -287.4447937011719, "eval_loss": 0.5868561267852783, "eval_rewards/accuracies": 0.7366412281990051, "eval_rewards/chosen": -1.0820242166519165, "eval_rewards/margins": 1.8489437103271484, "eval_rewards/rejected": -2.9309680461883545, "eval_runtime": 297.2334, "eval_samples_per_second": 7.018, "eval_steps_per_second": 0.441, "step": 2200 }, { "epoch": 1.12, "learning_rate": 3.474001507159005e-07, "logits/chosen": -2.474795341491699, "logits/rejected": -2.452157497406006, "logps/chosen": -250.7163848876953, "logps/rejected": -301.6701354980469, "loss": 0.0699, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.8442606925964355, "rewards/margins": 5.76595401763916, "rewards/rejected": -4.921692848205566, "step": 2210 }, { "epoch": 1.13, "learning_rate": 3.464581763376036e-07, "logits/chosen": -2.435446262359619, "logits/rejected": -2.3850350379943848, "logps/chosen": -297.05499267578125, "logps/rejected": -325.5220947265625, "loss": 0.0517, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8659135103225708, "rewards/margins": 6.299160957336426, "rewards/rejected": -5.4332475662231445, "step": 2220 }, { "epoch": 1.13, "learning_rate": 3.455162019593067e-07, "logits/chosen": -2.297966480255127, "logits/rejected": -2.2471892833709717, "logps/chosen": -314.8829650878906, "logps/rejected": -365.14385986328125, "loss": 0.1462, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6976747512817383, "rewards/margins": 6.606612205505371, "rewards/rejected": -5.908937931060791, "step": 2230 }, { "epoch": 1.14, "learning_rate": 3.445742275810098e-07, "logits/chosen": -2.43511700630188, "logits/rejected": -2.417332410812378, "logps/chosen": -296.8207092285156, "logps/rejected": -336.8685607910156, "loss": 0.0711, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.1867338418960571, "rewards/margins": 7.082969665527344, "rewards/rejected": -5.896236419677734, "step": 2240 }, { "epoch": 1.14, "learning_rate": 3.4363225320271286e-07, "logits/chosen": -2.4127955436706543, "logits/rejected": -2.417238235473633, "logps/chosen": -281.3343200683594, "logps/rejected": -308.0960693359375, "loss": 0.1002, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.9532784223556519, "rewards/margins": 5.915719985961914, "rewards/rejected": -4.962441444396973, "step": 2250 }, { "epoch": 1.15, "learning_rate": 3.4269027882441596e-07, "logits/chosen": -2.3315882682800293, "logits/rejected": -2.4039595127105713, "logps/chosen": -268.7440490722656, "logps/rejected": -299.9024658203125, "loss": 0.0782, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6864622831344604, "rewards/margins": 5.834595203399658, "rewards/rejected": -5.148133277893066, "step": 2260 }, { "epoch": 1.15, "learning_rate": 3.4174830444611906e-07, "logits/chosen": -2.315544366836548, "logits/rejected": -2.394622802734375, "logps/chosen": -326.4775695800781, "logps/rejected": -344.4993896484375, "loss": 0.0859, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2968065738677979, "rewards/margins": 5.967825412750244, "rewards/rejected": -4.671019077301025, "step": 2270 }, { "epoch": 1.16, "learning_rate": 3.4080633006782216e-07, "logits/chosen": -2.3206300735473633, "logits/rejected": -2.3000760078430176, "logps/chosen": -260.9918212890625, "logps/rejected": -287.2997131347656, "loss": 0.099, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5387351512908936, "rewards/margins": 5.4082255363464355, "rewards/rejected": -4.869490146636963, "step": 2280 }, { "epoch": 1.16, "learning_rate": 3.398643556895252e-07, "logits/chosen": -2.3872666358947754, "logits/rejected": -2.346640110015869, "logps/chosen": -244.3435821533203, "logps/rejected": -296.1825256347656, "loss": 0.0937, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8320892453193665, "rewards/margins": 5.071323871612549, "rewards/rejected": -4.2392354011535645, "step": 2290 }, { "epoch": 1.17, "learning_rate": 3.389223813112283e-07, "logits/chosen": -2.3533055782318115, "logits/rejected": -2.340881824493408, "logps/chosen": -234.72256469726562, "logps/rejected": -282.35626220703125, "loss": 0.0811, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8372732400894165, "rewards/margins": 5.791101932525635, "rewards/rejected": -4.953829288482666, "step": 2300 }, { "epoch": 1.17, "eval_logits/chosen": -2.4560341835021973, "eval_logits/rejected": -2.513721466064453, "eval_logps/chosen": -286.5822448730469, "eval_logps/rejected": -285.3392028808594, "eval_loss": 0.6306248307228088, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -0.8331778645515442, "eval_rewards/margins": 1.88722825050354, "eval_rewards/rejected": -2.7204058170318604, "eval_runtime": 301.8255, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 2300 }, { "epoch": 1.17, "learning_rate": 3.379804069329314e-07, "logits/chosen": -2.408510208129883, "logits/rejected": -2.355539560317993, "logps/chosen": -269.3663024902344, "logps/rejected": -276.4700012207031, "loss": 0.1084, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 1.1696228981018066, "rewards/margins": 5.296755313873291, "rewards/rejected": -4.127132892608643, "step": 2310 }, { "epoch": 1.18, "learning_rate": 3.370384325546345e-07, "logits/chosen": -2.3480758666992188, "logits/rejected": -2.3206279277801514, "logps/chosen": -286.1962890625, "logps/rejected": -328.54376220703125, "loss": 0.1032, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8524169921875, "rewards/margins": 6.0059003829956055, "rewards/rejected": -5.153482913970947, "step": 2320 }, { "epoch": 1.19, "learning_rate": 3.3609645817633756e-07, "logits/chosen": -2.3404994010925293, "logits/rejected": -2.3466391563415527, "logps/chosen": -283.11956787109375, "logps/rejected": -303.4478454589844, "loss": 0.0713, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.290867418050766, "rewards/margins": 6.1293816566467285, "rewards/rejected": -5.8385138511657715, "step": 2330 }, { "epoch": 1.19, "learning_rate": 3.3515448379804066e-07, "logits/chosen": -2.335618257522583, "logits/rejected": -2.350062131881714, "logps/chosen": -250.2252960205078, "logps/rejected": -288.460693359375, "loss": 0.1828, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.27890732884407043, "rewards/margins": 5.905325889587402, "rewards/rejected": -5.6264190673828125, "step": 2340 }, { "epoch": 1.2, "learning_rate": 3.3421250941974376e-07, "logits/chosen": -2.405531406402588, "logits/rejected": -2.370527505874634, "logps/chosen": -326.8871765136719, "logps/rejected": -346.4276428222656, "loss": 0.0724, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5522273778915405, "rewards/margins": 6.337648868560791, "rewards/rejected": -5.785420894622803, "step": 2350 }, { "epoch": 1.2, "learning_rate": 3.3327053504144686e-07, "logits/chosen": -2.3920373916625977, "logits/rejected": -2.3272738456726074, "logps/chosen": -280.7931823730469, "logps/rejected": -281.2308654785156, "loss": 0.132, "rewards/accuracies": 0.949999988079071, "rewards/chosen": -0.21835920214653015, "rewards/margins": 5.349105358123779, "rewards/rejected": -5.567465305328369, "step": 2360 }, { "epoch": 1.21, "learning_rate": 3.3232856066314996e-07, "logits/chosen": -2.323270797729492, "logits/rejected": -2.3886377811431885, "logps/chosen": -273.2583923339844, "logps/rejected": -344.60296630859375, "loss": 0.1121, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7613003253936768, "rewards/margins": 7.667241096496582, "rewards/rejected": -6.905940055847168, "step": 2370 }, { "epoch": 1.21, "learning_rate": 3.3138658628485306e-07, "logits/chosen": -2.459217071533203, "logits/rejected": -2.5143425464630127, "logps/chosen": -271.41412353515625, "logps/rejected": -303.4784851074219, "loss": 0.092, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6888279914855957, "rewards/margins": 6.053578853607178, "rewards/rejected": -5.364750862121582, "step": 2380 }, { "epoch": 1.22, "learning_rate": 3.3044461190655616e-07, "logits/chosen": -2.5100953578948975, "logits/rejected": -2.5121243000030518, "logps/chosen": -308.7528381347656, "logps/rejected": -325.7645263671875, "loss": 0.0796, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9787341952323914, "rewards/margins": 5.852758884429932, "rewards/rejected": -4.874024391174316, "step": 2390 }, { "epoch": 1.22, "learning_rate": 3.2950263752825926e-07, "logits/chosen": -2.4334263801574707, "logits/rejected": -2.417992115020752, "logps/chosen": -254.85116577148438, "logps/rejected": -290.29803466796875, "loss": 0.0877, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.1692960113286972, "rewards/margins": 5.9460835456848145, "rewards/rejected": -5.776787757873535, "step": 2400 }, { "epoch": 1.22, "eval_logits/chosen": -2.5290729999542236, "eval_logits/rejected": -2.5924720764160156, "eval_logps/chosen": -291.32537841796875, "eval_logps/rejected": -291.75762939453125, "eval_loss": 0.5962603688240051, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": -1.307490348815918, "eval_rewards/margins": 2.0547597408294678, "eval_rewards/rejected": -3.3622498512268066, "eval_runtime": 296.9185, "eval_samples_per_second": 7.025, "eval_steps_per_second": 0.441, "step": 2400 }, { "epoch": 1.23, "learning_rate": 3.2856066314996225e-07, "logits/chosen": -2.4625680446624756, "logits/rejected": -2.4047350883483887, "logps/chosen": -313.5804138183594, "logps/rejected": -386.77752685546875, "loss": 0.0949, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6074233055114746, "rewards/margins": 6.5688276290893555, "rewards/rejected": -5.961404800415039, "step": 2410 }, { "epoch": 1.23, "learning_rate": 3.2761868877166536e-07, "logits/chosen": -2.4965758323669434, "logits/rejected": -2.4851412773132324, "logps/chosen": -283.6171875, "logps/rejected": -300.8470458984375, "loss": 0.1552, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.637147843837738, "rewards/margins": 6.918099403381348, "rewards/rejected": -6.280951499938965, "step": 2420 }, { "epoch": 1.24, "learning_rate": 3.2667671439336846e-07, "logits/chosen": -2.3009049892425537, "logits/rejected": -2.3287174701690674, "logps/chosen": -280.84259033203125, "logps/rejected": -308.1491394042969, "loss": 0.1894, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1745898723602295, "rewards/margins": 5.908175468444824, "rewards/rejected": -5.733586311340332, "step": 2430 }, { "epoch": 1.24, "learning_rate": 3.2573474001507156e-07, "logits/chosen": -2.3898582458496094, "logits/rejected": -2.408456325531006, "logps/chosen": -274.39056396484375, "logps/rejected": -301.08990478515625, "loss": 0.061, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.1737585067749023, "rewards/margins": 6.217486381530762, "rewards/rejected": -5.043726921081543, "step": 2440 }, { "epoch": 1.25, "learning_rate": 3.2479276563677466e-07, "logits/chosen": -2.416377544403076, "logits/rejected": -2.39912748336792, "logps/chosen": -294.30181884765625, "logps/rejected": -346.01507568359375, "loss": 0.0731, "rewards/accuracies": 0.9375, "rewards/chosen": 1.171212911605835, "rewards/margins": 6.255845069885254, "rewards/rejected": -5.084632873535156, "step": 2450 }, { "epoch": 1.25, "learning_rate": 3.2385079125847776e-07, "logits/chosen": -2.516638994216919, "logits/rejected": -2.461843729019165, "logps/chosen": -273.90057373046875, "logps/rejected": -311.27276611328125, "loss": 0.0697, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.6051326394081116, "rewards/margins": 6.512951850891113, "rewards/rejected": -5.907818794250488, "step": 2460 }, { "epoch": 1.26, "learning_rate": 3.2290881688018086e-07, "logits/chosen": -2.438994884490967, "logits/rejected": -2.5107030868530273, "logps/chosen": -266.1100158691406, "logps/rejected": -337.4862365722656, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5303374528884888, "rewards/margins": 6.261448860168457, "rewards/rejected": -5.731112003326416, "step": 2470 }, { "epoch": 1.26, "learning_rate": 3.2196684250188396e-07, "logits/chosen": -2.5541601181030273, "logits/rejected": -2.3836662769317627, "logps/chosen": -336.1649475097656, "logps/rejected": -354.42413330078125, "loss": 0.0768, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7871938347816467, "rewards/margins": 7.078111171722412, "rewards/rejected": -6.290916442871094, "step": 2480 }, { "epoch": 1.27, "learning_rate": 3.2102486812358706e-07, "logits/chosen": -2.3908188343048096, "logits/rejected": -2.292724609375, "logps/chosen": -282.8141784667969, "logps/rejected": -298.9007263183594, "loss": 0.0983, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6560268402099609, "rewards/margins": 5.793659210205078, "rewards/rejected": -5.137632846832275, "step": 2490 }, { "epoch": 1.27, "learning_rate": 3.200828937452901e-07, "logits/chosen": -2.3237905502319336, "logits/rejected": -2.2244579792022705, "logps/chosen": -226.01779174804688, "logps/rejected": -285.91009521484375, "loss": 0.1114, "rewards/accuracies": 0.9375, "rewards/chosen": 0.20410139858722687, "rewards/margins": 5.630406856536865, "rewards/rejected": -5.426304817199707, "step": 2500 }, { "epoch": 1.27, "eval_logits/chosen": -2.4141666889190674, "eval_logits/rejected": -2.479241132736206, "eval_logps/chosen": -291.859375, "eval_logps/rejected": -293.6587219238281, "eval_loss": 0.6126044392585754, "eval_rewards/accuracies": 0.7461832165718079, "eval_rewards/chosen": -1.360889196395874, "eval_rewards/margins": 2.191472291946411, "eval_rewards/rejected": -3.552361249923706, "eval_runtime": 302.1294, "eval_samples_per_second": 6.904, "eval_steps_per_second": 0.434, "step": 2500 }, { "epoch": 1.28, "learning_rate": 3.191409193669932e-07, "logits/chosen": -2.2465217113494873, "logits/rejected": -2.2498700618743896, "logps/chosen": -249.7085418701172, "logps/rejected": -300.81463623046875, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.22908727824687958, "rewards/margins": 5.943365097045898, "rewards/rejected": -5.714277744293213, "step": 2510 }, { "epoch": 1.28, "learning_rate": 3.181989449886963e-07, "logits/chosen": -2.2804088592529297, "logits/rejected": -2.2391762733459473, "logps/chosen": -248.0771942138672, "logps/rejected": -318.37176513671875, "loss": 0.0681, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.41089898347854614, "rewards/margins": 6.202281475067139, "rewards/rejected": -5.791383266448975, "step": 2520 }, { "epoch": 1.29, "learning_rate": 3.172569706103994e-07, "logits/chosen": -2.38960599899292, "logits/rejected": -2.3537020683288574, "logps/chosen": -231.6435089111328, "logps/rejected": -331.68170166015625, "loss": 0.123, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6632026433944702, "rewards/margins": 7.348013877868652, "rewards/rejected": -6.684811592102051, "step": 2530 }, { "epoch": 1.29, "learning_rate": 3.1631499623210246e-07, "logits/chosen": -2.399754047393799, "logits/rejected": -2.367912769317627, "logps/chosen": -252.1080780029297, "logps/rejected": -317.89190673828125, "loss": 0.0879, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6604760885238647, "rewards/margins": 6.377139568328857, "rewards/rejected": -5.716663360595703, "step": 2540 }, { "epoch": 1.3, "learning_rate": 3.1537302185380556e-07, "logits/chosen": -2.3488802909851074, "logits/rejected": -2.3999979496002197, "logps/chosen": -242.5727996826172, "logps/rejected": -307.24493408203125, "loss": 0.1015, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.9912754893302917, "rewards/margins": 6.489681243896484, "rewards/rejected": -5.498406410217285, "step": 2550 }, { "epoch": 1.3, "learning_rate": 3.1443104747550866e-07, "logits/chosen": -2.273897171020508, "logits/rejected": -2.38022780418396, "logps/chosen": -313.0953063964844, "logps/rejected": -299.621826171875, "loss": 0.1084, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5919345021247864, "rewards/margins": 5.737548828125, "rewards/rejected": -5.145614147186279, "step": 2560 }, { "epoch": 1.31, "learning_rate": 3.1348907309721176e-07, "logits/chosen": -2.4264285564422607, "logits/rejected": -2.3853797912597656, "logps/chosen": -290.6047058105469, "logps/rejected": -311.0415954589844, "loss": 0.0666, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.7939327955245972, "rewards/margins": 6.2428998947143555, "rewards/rejected": -5.448967456817627, "step": 2570 }, { "epoch": 1.31, "learning_rate": 3.125470987189148e-07, "logits/chosen": -2.335716962814331, "logits/rejected": -2.367015838623047, "logps/chosen": -275.623779296875, "logps/rejected": -326.81488037109375, "loss": 0.0736, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.8744140863418579, "rewards/margins": 6.54227352142334, "rewards/rejected": -5.6678595542907715, "step": 2580 }, { "epoch": 1.32, "learning_rate": 3.116051243406179e-07, "logits/chosen": -2.27899169921875, "logits/rejected": -2.2389168739318848, "logps/chosen": -277.07440185546875, "logps/rejected": -301.26190185546875, "loss": 0.1244, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.7585972547531128, "rewards/margins": 5.7357892990112305, "rewards/rejected": -4.977191925048828, "step": 2590 }, { "epoch": 1.32, "learning_rate": 3.10663149962321e-07, "logits/chosen": -2.448560953140259, "logits/rejected": -2.4991815090179443, "logps/chosen": -326.71539306640625, "logps/rejected": -349.9276428222656, "loss": 0.0864, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5657342672348022, "rewards/margins": 6.299088478088379, "rewards/rejected": -5.733354091644287, "step": 2600 }, { "epoch": 1.32, "eval_logits/chosen": -2.50575590133667, "eval_logits/rejected": -2.570965051651001, "eval_logps/chosen": -294.3439636230469, "eval_logps/rejected": -295.719482421875, "eval_loss": 0.6456525921821594, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.609349012374878, "eval_rewards/margins": 2.149090051651001, "eval_rewards/rejected": -3.758439302444458, "eval_runtime": 296.9418, "eval_samples_per_second": 7.025, "eval_steps_per_second": 0.441, "step": 2600 }, { "epoch": 1.33, "learning_rate": 3.097211755840241e-07, "logits/chosen": -2.336880683898926, "logits/rejected": -2.3799967765808105, "logps/chosen": -235.99459838867188, "logps/rejected": -279.66180419921875, "loss": 0.1127, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5167864561080933, "rewards/margins": 5.976006507873535, "rewards/rejected": -5.459219932556152, "step": 2610 }, { "epoch": 1.33, "learning_rate": 3.087792012057272e-07, "logits/chosen": -2.328923463821411, "logits/rejected": -2.3436508178710938, "logps/chosen": -272.0519714355469, "logps/rejected": -316.40594482421875, "loss": 0.097, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.3497155904769897, "rewards/margins": 7.2396721839904785, "rewards/rejected": -5.889956474304199, "step": 2620 }, { "epoch": 1.34, "learning_rate": 3.078372268274303e-07, "logits/chosen": -2.39420747756958, "logits/rejected": -2.4377219676971436, "logps/chosen": -301.8684997558594, "logps/rejected": -345.6015625, "loss": 0.0943, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.38849225640296936, "rewards/margins": 6.270806789398193, "rewards/rejected": -5.882315158843994, "step": 2630 }, { "epoch": 1.34, "learning_rate": 3.068952524491334e-07, "logits/chosen": -2.357003927230835, "logits/rejected": -2.4049508571624756, "logps/chosen": -240.2396697998047, "logps/rejected": -322.50738525390625, "loss": 0.0832, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.9055463671684265, "rewards/margins": 6.93111515045166, "rewards/rejected": -6.025568962097168, "step": 2640 }, { "epoch": 1.35, "learning_rate": 3.059532780708365e-07, "logits/chosen": -2.3569881916046143, "logits/rejected": -2.4609577655792236, "logps/chosen": -298.69207763671875, "logps/rejected": -278.62408447265625, "loss": 0.071, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.22584030032157898, "rewards/margins": 5.36661434173584, "rewards/rejected": -5.140774250030518, "step": 2650 }, { "epoch": 1.35, "learning_rate": 3.050113036925395e-07, "logits/chosen": -2.3827061653137207, "logits/rejected": -2.4103548526763916, "logps/chosen": -255.54251098632812, "logps/rejected": -301.31158447265625, "loss": 0.0674, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.49262189865112305, "rewards/margins": 6.265991687774658, "rewards/rejected": -5.773369789123535, "step": 2660 }, { "epoch": 1.36, "learning_rate": 3.040693293142426e-07, "logits/chosen": -2.323338031768799, "logits/rejected": -2.4888432025909424, "logps/chosen": -287.3337707519531, "logps/rejected": -295.5697326660156, "loss": 0.1157, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3787381052970886, "rewards/margins": 6.640289306640625, "rewards/rejected": -6.261551856994629, "step": 2670 }, { "epoch": 1.36, "learning_rate": 3.031273549359457e-07, "logits/chosen": -2.3468449115753174, "logits/rejected": -2.3720457553863525, "logps/chosen": -280.97100830078125, "logps/rejected": -349.592529296875, "loss": 0.0749, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.01323468703776598, "rewards/margins": 6.698792934417725, "rewards/rejected": -6.685558319091797, "step": 2680 }, { "epoch": 1.37, "learning_rate": 3.021853805576488e-07, "logits/chosen": -2.4187724590301514, "logits/rejected": -2.3718185424804688, "logps/chosen": -288.25787353515625, "logps/rejected": -358.4739990234375, "loss": 0.0975, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8575443029403687, "rewards/margins": 7.281981468200684, "rewards/rejected": -6.424437046051025, "step": 2690 }, { "epoch": 1.37, "learning_rate": 3.012434061793519e-07, "logits/chosen": -2.3994593620300293, "logits/rejected": -2.3024771213531494, "logps/chosen": -243.65792846679688, "logps/rejected": -296.03070068359375, "loss": 0.0708, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17152036726474762, "rewards/margins": 5.766635894775391, "rewards/rejected": -5.595116138458252, "step": 2700 }, { "epoch": 1.37, "eval_logits/chosen": -2.468432664871216, "eval_logits/rejected": -2.539393186569214, "eval_logps/chosen": -296.3444519042969, "eval_logps/rejected": -295.1769104003906, "eval_loss": 0.6079808473587036, "eval_rewards/accuracies": 0.7461832165718079, "eval_rewards/chosen": -1.8093987703323364, "eval_rewards/margins": 1.894778847694397, "eval_rewards/rejected": -3.7041778564453125, "eval_runtime": 301.6905, "eval_samples_per_second": 6.914, "eval_steps_per_second": 0.434, "step": 2700 }, { "epoch": 1.38, "learning_rate": 3.00301431801055e-07, "logits/chosen": -2.330199718475342, "logits/rejected": -2.2474539279937744, "logps/chosen": -263.4881286621094, "logps/rejected": -278.0344543457031, "loss": 0.0757, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5858259797096252, "rewards/margins": 6.23793363571167, "rewards/rejected": -5.652107238769531, "step": 2710 }, { "epoch": 1.38, "learning_rate": 2.993594574227581e-07, "logits/chosen": -2.25746488571167, "logits/rejected": -2.329878568649292, "logps/chosen": -248.5938262939453, "logps/rejected": -295.58740234375, "loss": 0.0946, "rewards/accuracies": 0.9375, "rewards/chosen": 0.2743186950683594, "rewards/margins": 5.52974271774292, "rewards/rejected": -5.255423545837402, "step": 2720 }, { "epoch": 1.39, "learning_rate": 2.984174830444612e-07, "logits/chosen": -2.3174257278442383, "logits/rejected": -2.2660281658172607, "logps/chosen": -276.1614074707031, "logps/rejected": -335.32891845703125, "loss": 0.0772, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.044588230550289154, "rewards/margins": 6.649122714996338, "rewards/rejected": -6.6937103271484375, "step": 2730 }, { "epoch": 1.39, "learning_rate": 2.9747550866616425e-07, "logits/chosen": -2.442142963409424, "logits/rejected": -2.360114574432373, "logps/chosen": -272.2823486328125, "logps/rejected": -308.30877685546875, "loss": 0.1091, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.7997811436653137, "rewards/margins": 6.003107070922852, "rewards/rejected": -5.2033257484436035, "step": 2740 }, { "epoch": 1.4, "learning_rate": 2.9653353428786735e-07, "logits/chosen": -2.4135284423828125, "logits/rejected": -2.41511869430542, "logps/chosen": -234.8014373779297, "logps/rejected": -294.651123046875, "loss": 0.158, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1786421835422516, "rewards/margins": 5.884564399719238, "rewards/rejected": -5.705922603607178, "step": 2750 }, { "epoch": 1.4, "learning_rate": 2.9559155990957045e-07, "logits/chosen": -2.3652257919311523, "logits/rejected": -2.398829936981201, "logps/chosen": -292.03717041015625, "logps/rejected": -300.37603759765625, "loss": 0.1072, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.36263585090637207, "rewards/margins": 6.317052841186523, "rewards/rejected": -5.954416751861572, "step": 2760 }, { "epoch": 1.41, "learning_rate": 2.9464958553127355e-07, "logits/chosen": -2.26711106300354, "logits/rejected": -2.275381565093994, "logps/chosen": -294.2849426269531, "logps/rejected": -328.8524169921875, "loss": 0.0606, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6599899530410767, "rewards/margins": 6.481578826904297, "rewards/rejected": -5.82158899307251, "step": 2770 }, { "epoch": 1.41, "learning_rate": 2.9370761115297666e-07, "logits/chosen": -2.4011237621307373, "logits/rejected": -2.46509051322937, "logps/chosen": -288.33978271484375, "logps/rejected": -335.15277099609375, "loss": 0.0992, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3577355742454529, "rewards/margins": 6.564070224761963, "rewards/rejected": -6.206334590911865, "step": 2780 }, { "epoch": 1.42, "learning_rate": 2.927656367746797e-07, "logits/chosen": -2.4565563201904297, "logits/rejected": -2.4363064765930176, "logps/chosen": -284.31683349609375, "logps/rejected": -353.6046447753906, "loss": 0.0643, "rewards/accuracies": 1.0, "rewards/chosen": 1.1482659578323364, "rewards/margins": 7.725053310394287, "rewards/rejected": -6.57678747177124, "step": 2790 }, { "epoch": 1.42, "learning_rate": 2.918236623963828e-07, "logits/chosen": -2.337040424346924, "logits/rejected": -2.4189975261688232, "logps/chosen": -324.59759521484375, "logps/rejected": -350.65301513671875, "loss": 0.0794, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5803574323654175, "rewards/margins": 6.807870388031006, "rewards/rejected": -6.227513313293457, "step": 2800 }, { "epoch": 1.42, "eval_logits/chosen": -2.4662692546844482, "eval_logits/rejected": -2.5368869304656982, "eval_logps/chosen": -295.93536376953125, "eval_logps/rejected": -296.73797607421875, "eval_loss": 0.6009625792503357, "eval_rewards/accuracies": 0.7538167834281921, "eval_rewards/chosen": -1.7684876918792725, "eval_rewards/margins": 2.0917961597442627, "eval_rewards/rejected": -3.8602840900421143, "eval_runtime": 296.7378, "eval_samples_per_second": 7.03, "eval_steps_per_second": 0.441, "step": 2800 }, { "epoch": 1.43, "learning_rate": 2.908816880180859e-07, "logits/chosen": -2.37556791305542, "logits/rejected": -2.3833582401275635, "logps/chosen": -273.7535400390625, "logps/rejected": -272.9826354980469, "loss": 0.07, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3809967041015625, "rewards/margins": 5.994932651519775, "rewards/rejected": -5.613936424255371, "step": 2810 }, { "epoch": 1.43, "learning_rate": 2.8993971363978895e-07, "logits/chosen": -2.3508405685424805, "logits/rejected": -2.31650710105896, "logps/chosen": -276.4288635253906, "logps/rejected": -333.42022705078125, "loss": 0.1003, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6772788763046265, "rewards/margins": 6.1997246742248535, "rewards/rejected": -5.522445201873779, "step": 2820 }, { "epoch": 1.44, "learning_rate": 2.8899773926149205e-07, "logits/chosen": -2.4621999263763428, "logits/rejected": -2.437527656555176, "logps/chosen": -288.25042724609375, "logps/rejected": -349.9354553222656, "loss": 0.0507, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.2092394828796387, "rewards/margins": 7.094609260559082, "rewards/rejected": -5.885369300842285, "step": 2830 }, { "epoch": 1.44, "learning_rate": 2.8805576488319515e-07, "logits/chosen": -2.376804828643799, "logits/rejected": -2.3722846508026123, "logps/chosen": -294.90625, "logps/rejected": -318.5020446777344, "loss": 0.0903, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.9208866953849792, "rewards/margins": 5.965109825134277, "rewards/rejected": -5.044223785400391, "step": 2840 }, { "epoch": 1.45, "learning_rate": 2.8711379050489825e-07, "logits/chosen": -2.377434253692627, "logits/rejected": -2.3900840282440186, "logps/chosen": -296.39239501953125, "logps/rejected": -353.85516357421875, "loss": 0.0987, "rewards/accuracies": 0.9375, "rewards/chosen": 0.18252840638160706, "rewards/margins": 5.856029510498047, "rewards/rejected": -5.673501491546631, "step": 2850 }, { "epoch": 1.45, "learning_rate": 2.8617181612660135e-07, "logits/chosen": -2.3156490325927734, "logits/rejected": -2.3677258491516113, "logps/chosen": -287.42572021484375, "logps/rejected": -316.38824462890625, "loss": 0.0966, "rewards/accuracies": 0.9375, "rewards/chosen": 0.88239586353302, "rewards/margins": 6.383681774139404, "rewards/rejected": -5.501286506652832, "step": 2860 }, { "epoch": 1.46, "learning_rate": 2.8522984174830445e-07, "logits/chosen": -2.3602707386016846, "logits/rejected": -2.4136736392974854, "logps/chosen": -227.7016143798828, "logps/rejected": -287.86175537109375, "loss": 0.0952, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7701293230056763, "rewards/margins": 6.855650901794434, "rewards/rejected": -6.085522174835205, "step": 2870 }, { "epoch": 1.46, "learning_rate": 2.8428786737000755e-07, "logits/chosen": -2.4034171104431152, "logits/rejected": -2.5166029930114746, "logps/chosen": -307.9529724121094, "logps/rejected": -370.57269287109375, "loss": 0.063, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6714164018630981, "rewards/margins": 7.327467918395996, "rewards/rejected": -6.6560516357421875, "step": 2880 }, { "epoch": 1.47, "learning_rate": 2.8334589299171065e-07, "logits/chosen": -2.331711769104004, "logits/rejected": -2.382993459701538, "logps/chosen": -295.94573974609375, "logps/rejected": -307.9302673339844, "loss": 0.0708, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8609134554862976, "rewards/margins": 6.539481163024902, "rewards/rejected": -5.678567409515381, "step": 2890 }, { "epoch": 1.48, "learning_rate": 2.824039186134137e-07, "logits/chosen": -2.412370204925537, "logits/rejected": -2.394695997238159, "logps/chosen": -280.772705078125, "logps/rejected": -312.4391174316406, "loss": 0.1009, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.9061541557312012, "rewards/margins": 6.6510467529296875, "rewards/rejected": -5.744892120361328, "step": 2900 }, { "epoch": 1.48, "eval_logits/chosen": -2.4072649478912354, "eval_logits/rejected": -2.483442544937134, "eval_logps/chosen": -294.30072021484375, "eval_logps/rejected": -294.0972900390625, "eval_loss": 0.6101788282394409, "eval_rewards/accuracies": 0.7347328066825867, "eval_rewards/chosen": -1.6050245761871338, "eval_rewards/margins": 1.9911925792694092, "eval_rewards/rejected": -3.596216917037964, "eval_runtime": 301.8264, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 2900 }, { "epoch": 1.48, "learning_rate": 2.8146194423511675e-07, "logits/chosen": -2.2668776512145996, "logits/rejected": -2.1470532417297363, "logps/chosen": -253.3878936767578, "logps/rejected": -266.178955078125, "loss": 0.1025, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.08239737898111343, "rewards/margins": 5.62557315826416, "rewards/rejected": -5.543176174163818, "step": 2910 }, { "epoch": 1.49, "learning_rate": 2.8051996985681985e-07, "logits/chosen": -2.3062376976013184, "logits/rejected": -2.282536029815674, "logps/chosen": -260.6484375, "logps/rejected": -307.1168518066406, "loss": 0.0821, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.140464186668396, "rewards/margins": 7.348119258880615, "rewards/rejected": -6.207655429840088, "step": 2920 }, { "epoch": 1.49, "learning_rate": 2.7957799547852295e-07, "logits/chosen": -2.347249984741211, "logits/rejected": -2.361231803894043, "logps/chosen": -270.4031066894531, "logps/rejected": -315.46820068359375, "loss": 0.0929, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.9538537859916687, "rewards/margins": 6.171472072601318, "rewards/rejected": -5.217618465423584, "step": 2930 }, { "epoch": 1.5, "learning_rate": 2.7863602110022605e-07, "logits/chosen": -2.223649501800537, "logits/rejected": -2.401780128479004, "logps/chosen": -278.6842346191406, "logps/rejected": -321.9062194824219, "loss": 0.0678, "rewards/accuracies": 1.0, "rewards/chosen": 0.5670070648193359, "rewards/margins": 6.330507278442383, "rewards/rejected": -5.763500213623047, "step": 2940 }, { "epoch": 1.5, "learning_rate": 2.7769404672192915e-07, "logits/chosen": -2.369929552078247, "logits/rejected": -2.3821492195129395, "logps/chosen": -314.00030517578125, "logps/rejected": -328.1274719238281, "loss": 0.1037, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.6795863509178162, "rewards/margins": 5.990577697753906, "rewards/rejected": -5.310991287231445, "step": 2950 }, { "epoch": 1.51, "learning_rate": 2.7675207234363225e-07, "logits/chosen": -2.370741128921509, "logits/rejected": -2.3149704933166504, "logps/chosen": -315.950927734375, "logps/rejected": -310.5325622558594, "loss": 0.1237, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6832824349403381, "rewards/margins": 6.224267482757568, "rewards/rejected": -5.540985107421875, "step": 2960 }, { "epoch": 1.51, "learning_rate": 2.7581009796533535e-07, "logits/chosen": -2.3546643257141113, "logits/rejected": -2.3952019214630127, "logps/chosen": -286.61553955078125, "logps/rejected": -338.02801513671875, "loss": 0.0919, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.4628432989120483, "rewards/margins": 7.785639762878418, "rewards/rejected": -6.32279634475708, "step": 2970 }, { "epoch": 1.52, "learning_rate": 2.748681235870384e-07, "logits/chosen": -2.380227565765381, "logits/rejected": -2.429625988006592, "logps/chosen": -289.5753173828125, "logps/rejected": -339.2673645019531, "loss": 0.101, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.3829438090324402, "rewards/margins": 7.5160417556762695, "rewards/rejected": -7.1330976486206055, "step": 2980 }, { "epoch": 1.52, "learning_rate": 2.739261492087415e-07, "logits/chosen": -2.327038526535034, "logits/rejected": -2.2786455154418945, "logps/chosen": -259.84698486328125, "logps/rejected": -318.90228271484375, "loss": 0.0979, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.09783251583576202, "rewards/margins": 5.981132507324219, "rewards/rejected": -6.078965187072754, "step": 2990 }, { "epoch": 1.53, "learning_rate": 2.729841748304446e-07, "logits/chosen": -2.2745556831359863, "logits/rejected": -2.400892734527588, "logps/chosen": -275.0760192871094, "logps/rejected": -331.4722595214844, "loss": 0.083, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.23459462821483612, "rewards/margins": 6.325497627258301, "rewards/rejected": -6.090902805328369, "step": 3000 }, { "epoch": 1.53, "eval_logits/chosen": -2.452087163925171, "eval_logits/rejected": -2.5306129455566406, "eval_logps/chosen": -294.6455078125, "eval_logps/rejected": -294.8183898925781, "eval_loss": 0.6125035881996155, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -1.6395032405853271, "eval_rewards/margins": 2.028822660446167, "eval_rewards/rejected": -3.668325424194336, "eval_runtime": 297.2135, "eval_samples_per_second": 7.019, "eval_steps_per_second": 0.441, "step": 3000 }, { "epoch": 1.53, "learning_rate": 2.720422004521477e-07, "logits/chosen": -2.395345687866211, "logits/rejected": -2.413201332092285, "logps/chosen": -297.8402099609375, "logps/rejected": -331.8768615722656, "loss": 0.0756, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.8696461915969849, "rewards/margins": 6.27551794052124, "rewards/rejected": -5.405871868133545, "step": 3010 }, { "epoch": 1.54, "learning_rate": 2.711002260738508e-07, "logits/chosen": -2.4581711292266846, "logits/rejected": -2.4832825660705566, "logps/chosen": -280.2608947753906, "logps/rejected": -315.7003173828125, "loss": 0.0761, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.1012874841690063, "rewards/margins": 6.882163047790527, "rewards/rejected": -5.780875205993652, "step": 3020 }, { "epoch": 1.54, "learning_rate": 2.701582516955539e-07, "logits/chosen": -2.3183887004852295, "logits/rejected": -2.3755974769592285, "logps/chosen": -273.87481689453125, "logps/rejected": -323.4425354003906, "loss": 0.081, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.9814306497573853, "rewards/margins": 6.785541534423828, "rewards/rejected": -5.804111003875732, "step": 3030 }, { "epoch": 1.55, "learning_rate": 2.6921627731725695e-07, "logits/chosen": -2.3994407653808594, "logits/rejected": -2.3673055171966553, "logps/chosen": -288.03717041015625, "logps/rejected": -322.63824462890625, "loss": 0.0837, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7873355150222778, "rewards/margins": 6.48495626449585, "rewards/rejected": -5.6976213455200195, "step": 3040 }, { "epoch": 1.55, "learning_rate": 2.6827430293896005e-07, "logits/chosen": -2.278913736343384, "logits/rejected": -2.270564079284668, "logps/chosen": -297.72705078125, "logps/rejected": -291.41119384765625, "loss": 0.0998, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.32713982462882996, "rewards/margins": 5.756745338439941, "rewards/rejected": -5.429605007171631, "step": 3050 }, { "epoch": 1.56, "learning_rate": 2.673323285606631e-07, "logits/chosen": -2.3376736640930176, "logits/rejected": -2.242246389389038, "logps/chosen": -211.8117218017578, "logps/rejected": -277.4327087402344, "loss": 0.1517, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6408033967018127, "rewards/margins": 5.57335090637207, "rewards/rejected": -4.932547092437744, "step": 3060 }, { "epoch": 1.56, "learning_rate": 2.663903541823662e-07, "logits/chosen": -2.285000801086426, "logits/rejected": -2.2973408699035645, "logps/chosen": -324.43658447265625, "logps/rejected": -333.1507263183594, "loss": 0.0702, "rewards/accuracies": 1.0, "rewards/chosen": 0.8930590748786926, "rewards/margins": 6.265158653259277, "rewards/rejected": -5.372099876403809, "step": 3070 }, { "epoch": 1.57, "learning_rate": 2.654483798040693e-07, "logits/chosen": -2.310891628265381, "logits/rejected": -2.262753963470459, "logps/chosen": -254.897705078125, "logps/rejected": -348.63897705078125, "loss": 0.1367, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6549896597862244, "rewards/margins": 6.947043418884277, "rewards/rejected": -6.292054176330566, "step": 3080 }, { "epoch": 1.57, "learning_rate": 2.645064054257724e-07, "logits/chosen": -2.2977728843688965, "logits/rejected": -2.272675037384033, "logps/chosen": -337.22393798828125, "logps/rejected": -307.8287353515625, "loss": 0.0783, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5678704977035522, "rewards/margins": 6.053879737854004, "rewards/rejected": -5.486009120941162, "step": 3090 }, { "epoch": 1.58, "learning_rate": 2.635644310474755e-07, "logits/chosen": -2.270324230194092, "logits/rejected": -2.3138184547424316, "logps/chosen": -277.0635070800781, "logps/rejected": -384.27130126953125, "loss": 0.0871, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7171944379806519, "rewards/margins": 7.3045196533203125, "rewards/rejected": -6.587324619293213, "step": 3100 }, { "epoch": 1.58, "eval_logits/chosen": -2.4278504848480225, "eval_logits/rejected": -2.5031864643096924, "eval_logps/chosen": -295.6979064941406, "eval_logps/rejected": -296.3849792480469, "eval_loss": 0.6392149329185486, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.7447423934936523, "eval_rewards/margins": 2.080242872238159, "eval_rewards/rejected": -3.8249852657318115, "eval_runtime": 302.4911, "eval_samples_per_second": 6.896, "eval_steps_per_second": 0.433, "step": 3100 }, { "epoch": 1.58, "learning_rate": 2.626224566691786e-07, "logits/chosen": -2.338169574737549, "logits/rejected": -2.2731688022613525, "logps/chosen": -271.0912170410156, "logps/rejected": -359.5860290527344, "loss": 0.075, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.45801377296447754, "rewards/margins": 6.8046746253967285, "rewards/rejected": -6.346660614013672, "step": 3110 }, { "epoch": 1.59, "learning_rate": 2.616804822908817e-07, "logits/chosen": -2.268383741378784, "logits/rejected": -2.2524795532226562, "logps/chosen": -249.7799530029297, "logps/rejected": -285.2999572753906, "loss": 0.0824, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.010730976238846779, "rewards/margins": 5.435656547546387, "rewards/rejected": -5.446387767791748, "step": 3120 }, { "epoch": 1.59, "learning_rate": 2.607385079125848e-07, "logits/chosen": -2.304624080657959, "logits/rejected": -2.3206493854522705, "logps/chosen": -249.6020965576172, "logps/rejected": -293.7482604980469, "loss": 0.0697, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.7361634373664856, "rewards/margins": 6.254228591918945, "rewards/rejected": -5.518064975738525, "step": 3130 }, { "epoch": 1.6, "learning_rate": 2.597965335342879e-07, "logits/chosen": -2.422750949859619, "logits/rejected": -2.4973697662353516, "logps/chosen": -302.3764343261719, "logps/rejected": -321.0247497558594, "loss": 0.0583, "rewards/accuracies": 1.0, "rewards/chosen": 0.8100934028625488, "rewards/margins": 7.106518745422363, "rewards/rejected": -6.296424865722656, "step": 3140 }, { "epoch": 1.6, "learning_rate": 2.5885455915599095e-07, "logits/chosen": -2.3840737342834473, "logits/rejected": -2.5003018379211426, "logps/chosen": -304.78143310546875, "logps/rejected": -324.4736022949219, "loss": 0.1689, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 1.0771976709365845, "rewards/margins": 6.414263725280762, "rewards/rejected": -5.337065696716309, "step": 3150 }, { "epoch": 1.61, "learning_rate": 2.57912584777694e-07, "logits/chosen": -2.5240299701690674, "logits/rejected": -2.528115749359131, "logps/chosen": -292.08929443359375, "logps/rejected": -291.10101318359375, "loss": 0.0777, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.09714029729366302, "rewards/margins": 5.437426567077637, "rewards/rejected": -5.340286731719971, "step": 3160 }, { "epoch": 1.61, "learning_rate": 2.569706103993971e-07, "logits/chosen": -2.579284191131592, "logits/rejected": -2.504931926727295, "logps/chosen": -318.3880310058594, "logps/rejected": -317.40771484375, "loss": 0.0932, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8843173980712891, "rewards/margins": 6.076625823974609, "rewards/rejected": -5.192307949066162, "step": 3170 }, { "epoch": 1.62, "learning_rate": 2.560286360211002e-07, "logits/chosen": -2.4689183235168457, "logits/rejected": -2.5415992736816406, "logps/chosen": -231.2115936279297, "logps/rejected": -298.3565979003906, "loss": 0.1103, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.31711095571517944, "rewards/margins": 6.669442653656006, "rewards/rejected": -6.35233211517334, "step": 3180 }, { "epoch": 1.62, "learning_rate": 2.550866616428033e-07, "logits/chosen": -2.434196949005127, "logits/rejected": -2.555429220199585, "logps/chosen": -269.8191223144531, "logps/rejected": -347.5003967285156, "loss": 0.1087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.26513025164604187, "rewards/margins": 6.933467864990234, "rewards/rejected": -6.668337345123291, "step": 3190 }, { "epoch": 1.63, "learning_rate": 2.541446872645064e-07, "logits/chosen": -2.322817802429199, "logits/rejected": -2.36637020111084, "logps/chosen": -289.0831604003906, "logps/rejected": -283.831787109375, "loss": 0.1168, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.49825185537338257, "rewards/margins": 6.191859245300293, "rewards/rejected": -5.693608283996582, "step": 3200 }, { "epoch": 1.63, "eval_logits/chosen": -2.4606454372406006, "eval_logits/rejected": -2.5371811389923096, "eval_logps/chosen": -294.4764099121094, "eval_logps/rejected": -293.7373962402344, "eval_loss": 0.5972779989242554, "eval_rewards/accuracies": 0.7442747950553894, "eval_rewards/chosen": -1.622592568397522, "eval_rewards/margins": 1.9376325607299805, "eval_rewards/rejected": -3.560225248336792, "eval_runtime": 297.2703, "eval_samples_per_second": 7.017, "eval_steps_per_second": 0.441, "step": 3200 }, { "epoch": 1.63, "learning_rate": 2.532027128862095e-07, "logits/chosen": -2.296696186065674, "logits/rejected": -2.2465457916259766, "logps/chosen": -268.1493835449219, "logps/rejected": -312.7187194824219, "loss": 0.0762, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5331908464431763, "rewards/margins": 6.631085395812988, "rewards/rejected": -6.097894191741943, "step": 3210 }, { "epoch": 1.64, "learning_rate": 2.5226073850791255e-07, "logits/chosen": -2.3496594429016113, "logits/rejected": -2.3881731033325195, "logps/chosen": -289.9051513671875, "logps/rejected": -349.10455322265625, "loss": 0.0827, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.41470688581466675, "rewards/margins": 6.188368320465088, "rewards/rejected": -5.773660659790039, "step": 3220 }, { "epoch": 1.64, "learning_rate": 2.5131876412961565e-07, "logits/chosen": -2.4228880405426025, "logits/rejected": -2.3895740509033203, "logps/chosen": -256.2016296386719, "logps/rejected": -296.44708251953125, "loss": 0.0787, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.17232027649879456, "rewards/margins": 5.873257160186768, "rewards/rejected": -5.700936794281006, "step": 3230 }, { "epoch": 1.65, "learning_rate": 2.5037678975131875e-07, "logits/chosen": -2.368595600128174, "logits/rejected": -2.3985543251037598, "logps/chosen": -286.7682189941406, "logps/rejected": -321.578125, "loss": 0.1001, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.45928245782852173, "rewards/margins": 7.228116512298584, "rewards/rejected": -6.768834114074707, "step": 3240 }, { "epoch": 1.65, "learning_rate": 2.4943481537302185e-07, "logits/chosen": -2.3476357460021973, "logits/rejected": -2.454817056655884, "logps/chosen": -273.3967590332031, "logps/rejected": -285.05596923828125, "loss": 0.1003, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.4910641610622406, "rewards/margins": 5.733672618865967, "rewards/rejected": -5.242608547210693, "step": 3250 }, { "epoch": 1.66, "learning_rate": 2.484928409947249e-07, "logits/chosen": -2.3722920417785645, "logits/rejected": -2.3269031047821045, "logps/chosen": -285.6039733886719, "logps/rejected": -324.99139404296875, "loss": 0.0677, "rewards/accuracies": 1.0, "rewards/chosen": 1.0653159618377686, "rewards/margins": 7.1584906578063965, "rewards/rejected": -6.093174934387207, "step": 3260 }, { "epoch": 1.66, "learning_rate": 2.47550866616428e-07, "logits/chosen": -2.284839153289795, "logits/rejected": -2.3606677055358887, "logps/chosen": -289.9405822753906, "logps/rejected": -329.0007019042969, "loss": 0.1531, "rewards/accuracies": 0.9375, "rewards/chosen": -0.04473470523953438, "rewards/margins": 5.93240213394165, "rewards/rejected": -5.977137088775635, "step": 3270 }, { "epoch": 1.67, "learning_rate": 2.466088922381311e-07, "logits/chosen": -2.3747289180755615, "logits/rejected": -2.3340859413146973, "logps/chosen": -296.7153015136719, "logps/rejected": -337.93951416015625, "loss": 0.1289, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3914230763912201, "rewards/margins": 6.5019402503967285, "rewards/rejected": -6.110517501831055, "step": 3280 }, { "epoch": 1.67, "learning_rate": 2.456669178598342e-07, "logits/chosen": -2.3325932025909424, "logits/rejected": -2.383507251739502, "logps/chosen": -251.0063018798828, "logps/rejected": -311.6315612792969, "loss": 0.1102, "rewards/accuracies": 0.9375, "rewards/chosen": 0.5094572901725769, "rewards/margins": 6.646310329437256, "rewards/rejected": -6.136853218078613, "step": 3290 }, { "epoch": 1.68, "learning_rate": 2.447249434815373e-07, "logits/chosen": -2.4116387367248535, "logits/rejected": -2.454158067703247, "logps/chosen": -355.3347473144531, "logps/rejected": -343.6013488769531, "loss": 0.0699, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.9019377827644348, "rewards/margins": 6.828858852386475, "rewards/rejected": -5.926921367645264, "step": 3300 }, { "epoch": 1.68, "eval_logits/chosen": -2.452702522277832, "eval_logits/rejected": -2.5287041664123535, "eval_logps/chosen": -294.6330871582031, "eval_logps/rejected": -293.4993591308594, "eval_loss": 0.5815873146057129, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -1.6382594108581543, "eval_rewards/margins": 1.89816153049469, "eval_rewards/rejected": -3.536421298980713, "eval_runtime": 302.0151, "eval_samples_per_second": 6.907, "eval_steps_per_second": 0.434, "step": 3300 }, { "epoch": 1.68, "learning_rate": 2.437829691032404e-07, "logits/chosen": -2.336447238922119, "logits/rejected": -2.3389265537261963, "logps/chosen": -306.2047424316406, "logps/rejected": -323.0862731933594, "loss": 0.076, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6071743965148926, "rewards/margins": 6.342054843902588, "rewards/rejected": -5.734879970550537, "step": 3310 }, { "epoch": 1.69, "learning_rate": 2.4284099472494345e-07, "logits/chosen": -2.3741564750671387, "logits/rejected": -2.2548439502716064, "logps/chosen": -272.26300048828125, "logps/rejected": -310.686279296875, "loss": 0.0688, "rewards/accuracies": 1.0, "rewards/chosen": 0.2735249698162079, "rewards/margins": 6.651821136474609, "rewards/rejected": -6.378296375274658, "step": 3320 }, { "epoch": 1.69, "learning_rate": 2.4189902034664655e-07, "logits/chosen": -2.2759578227996826, "logits/rejected": -2.294002056121826, "logps/chosen": -269.53619384765625, "logps/rejected": -372.09063720703125, "loss": 0.0551, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.05612024664878845, "rewards/margins": 6.168383598327637, "rewards/rejected": -6.224503517150879, "step": 3330 }, { "epoch": 1.7, "learning_rate": 2.4095704596834965e-07, "logits/chosen": -2.2575266361236572, "logits/rejected": -2.26688814163208, "logps/chosen": -275.0549621582031, "logps/rejected": -283.65008544921875, "loss": 0.0856, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3581641614437103, "rewards/margins": 5.8348259925842285, "rewards/rejected": -5.476661682128906, "step": 3340 }, { "epoch": 1.7, "learning_rate": 2.4001507159005275e-07, "logits/chosen": -2.3836681842803955, "logits/rejected": -2.337185859680176, "logps/chosen": -279.23419189453125, "logps/rejected": -305.5158996582031, "loss": 0.0836, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.38139820098876953, "rewards/margins": 5.845247268676758, "rewards/rejected": -5.463849067687988, "step": 3350 }, { "epoch": 1.71, "learning_rate": 2.3907309721175585e-07, "logits/chosen": -2.3031442165374756, "logits/rejected": -2.3276925086975098, "logps/chosen": -249.84371948242188, "logps/rejected": -288.9521179199219, "loss": 0.0464, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8048938512802124, "rewards/margins": 6.748453617095947, "rewards/rejected": -5.943559169769287, "step": 3360 }, { "epoch": 1.71, "learning_rate": 2.3813112283345892e-07, "logits/chosen": -2.280756711959839, "logits/rejected": -2.305971622467041, "logps/chosen": -272.00714111328125, "logps/rejected": -331.1332702636719, "loss": 0.065, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.5015135407447815, "rewards/margins": 6.879171848297119, "rewards/rejected": -6.377658367156982, "step": 3370 }, { "epoch": 1.72, "learning_rate": 2.37189148455162e-07, "logits/chosen": -2.3661789894104004, "logits/rejected": -2.3738746643066406, "logps/chosen": -302.61920166015625, "logps/rejected": -334.0452575683594, "loss": 0.1174, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5483524203300476, "rewards/margins": 6.481667995452881, "rewards/rejected": -5.933314800262451, "step": 3380 }, { "epoch": 1.72, "learning_rate": 2.362471740768651e-07, "logits/chosen": -2.316767454147339, "logits/rejected": -2.392373561859131, "logps/chosen": -311.609130859375, "logps/rejected": -357.13775634765625, "loss": 0.0771, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.3898351192474365, "rewards/margins": 7.740750789642334, "rewards/rejected": -6.350916385650635, "step": 3390 }, { "epoch": 1.73, "learning_rate": 2.353051996985682e-07, "logits/chosen": -2.132401943206787, "logits/rejected": -2.1744465827941895, "logps/chosen": -254.2714080810547, "logps/rejected": -341.75396728515625, "loss": 0.1082, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": -0.30060726404190063, "rewards/margins": 6.229058265686035, "rewards/rejected": -6.529665946960449, "step": 3400 }, { "epoch": 1.73, "eval_logits/chosen": -2.4441940784454346, "eval_logits/rejected": -2.5178475379943848, "eval_logps/chosen": -296.3059387207031, "eval_logps/rejected": -296.1109313964844, "eval_loss": 0.589521586894989, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -1.805547833442688, "eval_rewards/margins": 1.9920285940170288, "eval_rewards/rejected": -3.7975761890411377, "eval_runtime": 297.1424, "eval_samples_per_second": 7.02, "eval_steps_per_second": 0.441, "step": 3400 }, { "epoch": 1.73, "learning_rate": 2.3436322532027127e-07, "logits/chosen": -2.3531718254089355, "logits/rejected": -2.444936752319336, "logps/chosen": -279.48956298828125, "logps/rejected": -336.4264831542969, "loss": 0.0955, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.3256587982177734, "rewards/margins": 7.036476135253906, "rewards/rejected": -5.710817337036133, "step": 3410 }, { "epoch": 1.74, "learning_rate": 2.3342125094197437e-07, "logits/chosen": -2.337709665298462, "logits/rejected": -2.3244237899780273, "logps/chosen": -279.7310485839844, "logps/rejected": -314.3177490234375, "loss": 0.085, "rewards/accuracies": 1.0, "rewards/chosen": 0.8951374292373657, "rewards/margins": 6.653724670410156, "rewards/rejected": -5.758587837219238, "step": 3420 }, { "epoch": 1.74, "learning_rate": 2.3247927656367747e-07, "logits/chosen": -2.2383086681365967, "logits/rejected": -2.250387668609619, "logps/chosen": -247.69833374023438, "logps/rejected": -316.20166015625, "loss": 0.1076, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.32292360067367554, "rewards/margins": 7.693238735198975, "rewards/rejected": -7.370314598083496, "step": 3430 }, { "epoch": 1.75, "learning_rate": 2.3153730218538055e-07, "logits/chosen": -2.2730133533477783, "logits/rejected": -2.4274871349334717, "logps/chosen": -250.9065399169922, "logps/rejected": -288.06298828125, "loss": 0.0637, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.45260730385780334, "rewards/margins": 6.174647331237793, "rewards/rejected": -5.722040176391602, "step": 3440 }, { "epoch": 1.75, "learning_rate": 2.3059532780708362e-07, "logits/chosen": -2.339019298553467, "logits/rejected": -2.3908653259277344, "logps/chosen": -242.775146484375, "logps/rejected": -296.8477478027344, "loss": 0.1049, "rewards/accuracies": 0.9375, "rewards/chosen": 0.3753826320171356, "rewards/margins": 5.82875919342041, "rewards/rejected": -5.453376293182373, "step": 3450 }, { "epoch": 1.76, "learning_rate": 2.2965335342878672e-07, "logits/chosen": -2.258469820022583, "logits/rejected": -2.311418056488037, "logps/chosen": -268.7459411621094, "logps/rejected": -304.2618713378906, "loss": 0.0832, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8613001108169556, "rewards/margins": 6.3985466957092285, "rewards/rejected": -5.537246227264404, "step": 3460 }, { "epoch": 1.77, "learning_rate": 2.2871137905048982e-07, "logits/chosen": -2.3698153495788574, "logits/rejected": -2.3288581371307373, "logps/chosen": -242.6855926513672, "logps/rejected": -338.298583984375, "loss": 0.0687, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.5517158508300781, "rewards/margins": 7.039074897766113, "rewards/rejected": -6.487359046936035, "step": 3470 }, { "epoch": 1.77, "learning_rate": 2.2776940467219292e-07, "logits/chosen": -2.382579803466797, "logits/rejected": -2.2819111347198486, "logps/chosen": -254.0598907470703, "logps/rejected": -349.50286865234375, "loss": 0.0918, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.44244199991226196, "rewards/margins": 6.988653659820557, "rewards/rejected": -6.546212196350098, "step": 3480 }, { "epoch": 1.78, "learning_rate": 2.26827430293896e-07, "logits/chosen": -2.319734811782837, "logits/rejected": -2.2849202156066895, "logps/chosen": -251.9346923828125, "logps/rejected": -286.87060546875, "loss": 0.0785, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.08254555612802505, "rewards/margins": 5.771401882171631, "rewards/rejected": -5.853947639465332, "step": 3490 }, { "epoch": 1.78, "learning_rate": 2.258854559155991e-07, "logits/chosen": -2.3515708446502686, "logits/rejected": -2.3444418907165527, "logps/chosen": -279.8646545410156, "logps/rejected": -294.0340576171875, "loss": 0.09, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4335947036743164, "rewards/margins": 6.248974800109863, "rewards/rejected": -5.815380096435547, "step": 3500 }, { "epoch": 1.78, "eval_logits/chosen": -2.4561455249786377, "eval_logits/rejected": -2.5260605812072754, "eval_logps/chosen": -296.7054748535156, "eval_logps/rejected": -298.36944580078125, "eval_loss": 0.6231197714805603, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -1.84550142288208, "eval_rewards/margins": 2.177931547164917, "eval_rewards/rejected": -4.023432731628418, "eval_runtime": 301.7345, "eval_samples_per_second": 6.913, "eval_steps_per_second": 0.434, "step": 3500 }, { "epoch": 1.79, "learning_rate": 2.2494348153730217e-07, "logits/chosen": -2.338193416595459, "logits/rejected": -2.479274034500122, "logps/chosen": -269.9765930175781, "logps/rejected": -317.2516174316406, "loss": 0.084, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.20136389136314392, "rewards/margins": 6.459589958190918, "rewards/rejected": -6.25822639465332, "step": 3510 }, { "epoch": 1.79, "learning_rate": 2.2400150715900527e-07, "logits/chosen": -2.3768019676208496, "logits/rejected": -2.3087477684020996, "logps/chosen": -295.57440185546875, "logps/rejected": -335.53912353515625, "loss": 0.1071, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3411738872528076, "rewards/margins": 6.7873358726501465, "rewards/rejected": -6.44616174697876, "step": 3520 }, { "epoch": 1.8, "learning_rate": 2.2305953278070835e-07, "logits/chosen": -2.3922080993652344, "logits/rejected": -2.4363255500793457, "logps/chosen": -294.6719055175781, "logps/rejected": -299.4778137207031, "loss": 0.107, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5923565626144409, "rewards/margins": 6.195247650146484, "rewards/rejected": -5.602891445159912, "step": 3530 }, { "epoch": 1.8, "learning_rate": 2.2211755840241145e-07, "logits/chosen": -2.4313762187957764, "logits/rejected": -2.4436609745025635, "logps/chosen": -240.0015411376953, "logps/rejected": -298.2782287597656, "loss": 0.1136, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.5145145654678345, "rewards/margins": 6.5318708419799805, "rewards/rejected": -6.017355918884277, "step": 3540 }, { "epoch": 1.81, "learning_rate": 2.2117558402411455e-07, "logits/chosen": -2.2902588844299316, "logits/rejected": -2.2561826705932617, "logps/chosen": -231.10086059570312, "logps/rejected": -242.2857666015625, "loss": 0.0971, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.47640013694763184, "rewards/margins": 5.48819637298584, "rewards/rejected": -5.011795997619629, "step": 3550 }, { "epoch": 1.81, "learning_rate": 2.2023360964581765e-07, "logits/chosen": -2.2816267013549805, "logits/rejected": -2.3149495124816895, "logps/chosen": -265.31292724609375, "logps/rejected": -365.41943359375, "loss": 0.096, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.2561672925949097, "rewards/margins": 7.3695244789123535, "rewards/rejected": -6.113357067108154, "step": 3560 }, { "epoch": 1.82, "learning_rate": 2.192916352675207e-07, "logits/chosen": -2.2925052642822266, "logits/rejected": -2.317241668701172, "logps/chosen": -256.3150634765625, "logps/rejected": -273.255126953125, "loss": 0.0838, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.44810542464256287, "rewards/margins": 5.637542724609375, "rewards/rejected": -5.189436912536621, "step": 3570 }, { "epoch": 1.82, "learning_rate": 2.183496608892238e-07, "logits/chosen": -2.2493538856506348, "logits/rejected": -2.228045701980591, "logps/chosen": -290.98834228515625, "logps/rejected": -318.1434326171875, "loss": 0.1392, "rewards/accuracies": 0.9125000238418579, "rewards/chosen": 0.17248782515525818, "rewards/margins": 5.7656755447387695, "rewards/rejected": -5.5931878089904785, "step": 3580 }, { "epoch": 1.83, "learning_rate": 2.174076865109269e-07, "logits/chosen": -2.4821906089782715, "logits/rejected": -2.487905979156494, "logps/chosen": -254.1220245361328, "logps/rejected": -355.9189453125, "loss": 0.0894, "rewards/accuracies": 0.9375, "rewards/chosen": 0.6711649298667908, "rewards/margins": 6.761671543121338, "rewards/rejected": -6.090506553649902, "step": 3590 }, { "epoch": 1.83, "learning_rate": 2.1646571213263e-07, "logits/chosen": -2.421340227127075, "logits/rejected": -2.477186679840088, "logps/chosen": -300.8204345703125, "logps/rejected": -312.5556335449219, "loss": 0.1238, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.7146443128585815, "rewards/margins": 6.508008003234863, "rewards/rejected": -5.793363571166992, "step": 3600 }, { "epoch": 1.83, "eval_logits/chosen": -2.551158905029297, "eval_logits/rejected": -2.6294496059417725, "eval_logps/chosen": -295.0212707519531, "eval_logps/rejected": -294.1321105957031, "eval_loss": 0.6046690940856934, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -1.677080750465393, "eval_rewards/margins": 1.922616720199585, "eval_rewards/rejected": -3.5996978282928467, "eval_runtime": 296.8842, "eval_samples_per_second": 7.026, "eval_steps_per_second": 0.441, "step": 3600 }, { "epoch": 1.84, "learning_rate": 2.1552373775433307e-07, "logits/chosen": -2.4084277153015137, "logits/rejected": -2.4354751110076904, "logps/chosen": -290.44696044921875, "logps/rejected": -286.05389404296875, "loss": 0.0909, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7692252993583679, "rewards/margins": 6.157434940338135, "rewards/rejected": -5.388209342956543, "step": 3610 }, { "epoch": 1.84, "learning_rate": 2.1458176337603617e-07, "logits/chosen": -2.442023992538452, "logits/rejected": -2.503213882446289, "logps/chosen": -295.61212158203125, "logps/rejected": -324.33453369140625, "loss": 0.1087, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.38564035296440125, "rewards/margins": 6.0786871910095215, "rewards/rejected": -5.693046569824219, "step": 3620 }, { "epoch": 1.85, "learning_rate": 2.1363978899773924e-07, "logits/chosen": -2.4094722270965576, "logits/rejected": -2.527698040008545, "logps/chosen": -244.3059844970703, "logps/rejected": -309.1972961425781, "loss": 0.1041, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.954838752746582, "rewards/margins": 6.438208103179932, "rewards/rejected": -5.483368873596191, "step": 3630 }, { "epoch": 1.85, "learning_rate": 2.1269781461944234e-07, "logits/chosen": -2.5033748149871826, "logits/rejected": -2.5151901245117188, "logps/chosen": -286.1416931152344, "logps/rejected": -314.78759765625, "loss": 0.0851, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.8734874725341797, "rewards/margins": 6.574765205383301, "rewards/rejected": -5.701278209686279, "step": 3640 }, { "epoch": 1.86, "learning_rate": 2.1175584024114542e-07, "logits/chosen": -2.485938310623169, "logits/rejected": -2.4653992652893066, "logps/chosen": -336.11871337890625, "logps/rejected": -345.1873474121094, "loss": 0.0875, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 1.446183443069458, "rewards/margins": 7.834715843200684, "rewards/rejected": -6.388533115386963, "step": 3650 }, { "epoch": 1.86, "learning_rate": 2.1081386586284852e-07, "logits/chosen": -2.4055051803588867, "logits/rejected": -2.4212534427642822, "logps/chosen": -295.2558288574219, "logps/rejected": -342.17047119140625, "loss": 0.0596, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.8791106939315796, "rewards/margins": 6.56097412109375, "rewards/rejected": -5.681862831115723, "step": 3660 }, { "epoch": 1.87, "learning_rate": 2.0987189148455162e-07, "logits/chosen": -2.468773365020752, "logits/rejected": -2.427738904953003, "logps/chosen": -285.6302795410156, "logps/rejected": -299.8651428222656, "loss": 0.0816, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.422785222530365, "rewards/margins": 6.147997856140137, "rewards/rejected": -5.725213050842285, "step": 3670 }, { "epoch": 1.87, "learning_rate": 2.0892991710625472e-07, "logits/chosen": -2.520200490951538, "logits/rejected": -2.3786044120788574, "logps/chosen": -254.3336944580078, "logps/rejected": -342.19378662109375, "loss": 0.0871, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.026506716385483742, "rewards/margins": 6.259054660797119, "rewards/rejected": -6.285561561584473, "step": 3680 }, { "epoch": 1.88, "learning_rate": 2.0798794272795777e-07, "logits/chosen": -2.5233798027038574, "logits/rejected": -2.5141570568084717, "logps/chosen": -269.6754150390625, "logps/rejected": -339.6807556152344, "loss": 0.1134, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6938865780830383, "rewards/margins": 6.421724796295166, "rewards/rejected": -5.72783899307251, "step": 3690 }, { "epoch": 1.88, "learning_rate": 2.0704596834966087e-07, "logits/chosen": -2.522254705429077, "logits/rejected": -2.3955483436584473, "logps/chosen": -284.70574951171875, "logps/rejected": -297.5660400390625, "loss": 0.0847, "rewards/accuracies": 1.0, "rewards/chosen": 0.5291184186935425, "rewards/margins": 6.2721452713012695, "rewards/rejected": -5.743027210235596, "step": 3700 }, { "epoch": 1.88, "eval_logits/chosen": -2.547114849090576, "eval_logits/rejected": -2.6224122047424316, "eval_logps/chosen": -294.97576904296875, "eval_logps/rejected": -293.8779296875, "eval_loss": 0.5898069143295288, "eval_rewards/accuracies": 0.7347328066825867, "eval_rewards/chosen": -1.672528862953186, "eval_rewards/margins": 1.9017502069473267, "eval_rewards/rejected": -3.5742790699005127, "eval_runtime": 301.5335, "eval_samples_per_second": 6.918, "eval_steps_per_second": 0.434, "step": 3700 }, { "epoch": 1.89, "learning_rate": 2.0610399397136397e-07, "logits/chosen": -2.357499361038208, "logits/rejected": -2.3887948989868164, "logps/chosen": -238.7329559326172, "logps/rejected": -300.47772216796875, "loss": 0.2105, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 1.1059715747833252, "rewards/margins": 7.015279293060303, "rewards/rejected": -5.909307479858398, "step": 3710 }, { "epoch": 1.89, "learning_rate": 2.0516201959306707e-07, "logits/chosen": -2.3938887119293213, "logits/rejected": -2.5061392784118652, "logps/chosen": -270.1607666015625, "logps/rejected": -275.0926513671875, "loss": 0.0896, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.56185382604599, "rewards/margins": 5.6083197593688965, "rewards/rejected": -5.046465873718262, "step": 3720 }, { "epoch": 1.9, "learning_rate": 2.0422004521477014e-07, "logits/chosen": -2.4314119815826416, "logits/rejected": -2.4555587768554688, "logps/chosen": -279.8658142089844, "logps/rejected": -353.25482177734375, "loss": 0.0931, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.4166037440299988, "rewards/margins": 7.007256507873535, "rewards/rejected": -6.590653419494629, "step": 3730 }, { "epoch": 1.9, "learning_rate": 2.0327807083647324e-07, "logits/chosen": -2.5305721759796143, "logits/rejected": -2.468280792236328, "logps/chosen": -278.43310546875, "logps/rejected": -296.37237548828125, "loss": 0.12, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.6041958332061768, "rewards/margins": 5.6044135093688965, "rewards/rejected": -5.000216960906982, "step": 3740 }, { "epoch": 1.91, "learning_rate": 2.0233609645817634e-07, "logits/chosen": -2.36225962638855, "logits/rejected": -2.416337490081787, "logps/chosen": -305.2579650878906, "logps/rejected": -331.9045104980469, "loss": 0.1075, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.41681164503097534, "rewards/margins": 6.1702985763549805, "rewards/rejected": -5.7534871101379395, "step": 3750 }, { "epoch": 1.91, "learning_rate": 2.0139412207987942e-07, "logits/chosen": -2.4988961219787598, "logits/rejected": -2.494710922241211, "logps/chosen": -274.8974609375, "logps/rejected": -294.8260192871094, "loss": 0.0894, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.7716017961502075, "rewards/margins": 6.274075508117676, "rewards/rejected": -5.502473831176758, "step": 3760 }, { "epoch": 1.92, "learning_rate": 2.004521477015825e-07, "logits/chosen": -2.497544765472412, "logits/rejected": -2.4463839530944824, "logps/chosen": -240.38961791992188, "logps/rejected": -274.1627502441406, "loss": 0.0975, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.06918475031852722, "rewards/margins": 5.60644006729126, "rewards/rejected": -5.53725528717041, "step": 3770 }, { "epoch": 1.92, "learning_rate": 1.995101733232856e-07, "logits/chosen": -2.553527355194092, "logits/rejected": -2.420929431915283, "logps/chosen": -272.9808349609375, "logps/rejected": -331.07733154296875, "loss": 0.0665, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.6573607325553894, "rewards/margins": 6.504881858825684, "rewards/rejected": -5.8475213050842285, "step": 3780 }, { "epoch": 1.93, "learning_rate": 1.985681989449887e-07, "logits/chosen": -2.4012131690979004, "logits/rejected": -2.5193140506744385, "logps/chosen": -280.072998046875, "logps/rejected": -303.76251220703125, "loss": 0.1033, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.3876453936100006, "rewards/margins": 5.856767177581787, "rewards/rejected": -5.469121932983398, "step": 3790 }, { "epoch": 1.93, "learning_rate": 1.976262245666918e-07, "logits/chosen": -2.353722095489502, "logits/rejected": -2.453313112258911, "logps/chosen": -280.9483642578125, "logps/rejected": -288.4091796875, "loss": 0.0908, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.2342231273651123, "rewards/margins": 5.581732749938965, "rewards/rejected": -5.347509384155273, "step": 3800 }, { "epoch": 1.93, "eval_logits/chosen": -2.5046539306640625, "eval_logits/rejected": -2.577751636505127, "eval_logps/chosen": -294.3269348144531, "eval_logps/rejected": -293.51580810546875, "eval_loss": 0.5816638469696045, "eval_rewards/accuracies": 0.7366412281990051, "eval_rewards/chosen": -1.6076483726501465, "eval_rewards/margins": 1.9304182529449463, "eval_rewards/rejected": -3.5380663871765137, "eval_runtime": 296.9875, "eval_samples_per_second": 7.024, "eval_steps_per_second": 0.441, "step": 3800 }, { "epoch": 1.94, "learning_rate": 1.9668425018839487e-07, "logits/chosen": -2.355513572692871, "logits/rejected": -2.2735846042633057, "logps/chosen": -281.0897521972656, "logps/rejected": -296.72900390625, "loss": 0.0724, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.6503603458404541, "rewards/margins": 6.108792304992676, "rewards/rejected": -5.458431720733643, "step": 3810 }, { "epoch": 1.94, "learning_rate": 1.9574227581009794e-07, "logits/chosen": -2.418468952178955, "logits/rejected": -2.371502161026001, "logps/chosen": -311.9105224609375, "logps/rejected": -345.49322509765625, "loss": 0.1222, "rewards/accuracies": 0.9375, "rewards/chosen": 0.8610822558403015, "rewards/margins": 6.491988182067871, "rewards/rejected": -5.630906105041504, "step": 3820 }, { "epoch": 1.95, "learning_rate": 1.9480030143180104e-07, "logits/chosen": -2.348468542098999, "logits/rejected": -2.3723227977752686, "logps/chosen": -286.27313232421875, "logps/rejected": -353.67633056640625, "loss": 0.0726, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": 0.8641120791435242, "rewards/margins": 7.633820533752441, "rewards/rejected": -6.769708156585693, "step": 3830 }, { "epoch": 1.95, "learning_rate": 1.9385832705350414e-07, "logits/chosen": -2.211182117462158, "logits/rejected": -2.160393238067627, "logps/chosen": -223.9073028564453, "logps/rejected": -285.9527893066406, "loss": 0.0743, "rewards/accuracies": 0.949999988079071, "rewards/chosen": 0.1326831430196762, "rewards/margins": 5.998318672180176, "rewards/rejected": -5.865635395050049, "step": 3840 }, { "epoch": 1.96, "learning_rate": 1.9291635267520722e-07, "logits/chosen": -2.421313524246216, "logits/rejected": -2.4438533782958984, "logps/chosen": -280.8946838378906, "logps/rejected": -321.70123291015625, "loss": 0.1117, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4331938624382019, "rewards/margins": 6.441449165344238, "rewards/rejected": -6.008255481719971, "step": 3850 }, { "epoch": 1.96, "learning_rate": 1.9197437829691032e-07, "logits/chosen": -2.4228568077087402, "logits/rejected": -2.3654749393463135, "logps/chosen": -312.3072814941406, "logps/rejected": -329.73846435546875, "loss": 0.0721, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.6582003831863403, "rewards/margins": 6.057830810546875, "rewards/rejected": -5.399630546569824, "step": 3860 }, { "epoch": 1.97, "learning_rate": 1.9103240391861342e-07, "logits/chosen": -2.3000106811523438, "logits/rejected": -2.3441321849823, "logps/chosen": -257.6878967285156, "logps/rejected": -272.4525451660156, "loss": 0.0796, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.8951619863510132, "rewards/margins": 6.290835380554199, "rewards/rejected": -5.395674705505371, "step": 3870 }, { "epoch": 1.97, "learning_rate": 1.900904295403165e-07, "logits/chosen": -2.3968539237976074, "logits/rejected": -2.3876609802246094, "logps/chosen": -273.7939453125, "logps/rejected": -331.85369873046875, "loss": 0.0905, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.0945357084274292, "rewards/margins": 7.351518154144287, "rewards/rejected": -6.256982803344727, "step": 3880 }, { "epoch": 1.98, "learning_rate": 1.8914845516201957e-07, "logits/chosen": -2.3795459270477295, "logits/rejected": -2.3961644172668457, "logps/chosen": -221.3052215576172, "logps/rejected": -369.16656494140625, "loss": 0.0657, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.41156476736068726, "rewards/margins": 6.591817378997803, "rewards/rejected": -6.180253028869629, "step": 3890 }, { "epoch": 1.98, "learning_rate": 1.8820648078372267e-07, "logits/chosen": -2.4188742637634277, "logits/rejected": -2.39491605758667, "logps/chosen": -258.48797607421875, "logps/rejected": -302.8425598144531, "loss": 0.0666, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.1906200349330902, "rewards/margins": 6.002566337585449, "rewards/rejected": -5.811945915222168, "step": 3900 }, { "epoch": 1.98, "eval_logits/chosen": -2.5061278343200684, "eval_logits/rejected": -2.578442335128784, "eval_logps/chosen": -295.20037841796875, "eval_logps/rejected": -295.57177734375, "eval_loss": 0.6063258647918701, "eval_rewards/accuracies": 0.7309160232543945, "eval_rewards/chosen": -1.6949888467788696, "eval_rewards/margins": 2.0486738681793213, "eval_rewards/rejected": -3.7436630725860596, "eval_runtime": 301.8399, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 3900 }, { "epoch": 1.99, "learning_rate": 1.8726450640542577e-07, "logits/chosen": -2.4050233364105225, "logits/rejected": -2.385024309158325, "logps/chosen": -294.2882385253906, "logps/rejected": -355.4291687011719, "loss": 0.0876, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5518203377723694, "rewards/margins": 7.472037315368652, "rewards/rejected": -6.920217990875244, "step": 3910 }, { "epoch": 1.99, "learning_rate": 1.8632253202712887e-07, "logits/chosen": -2.4185214042663574, "logits/rejected": -2.4424355030059814, "logps/chosen": -267.9097900390625, "logps/rejected": -344.55609130859375, "loss": 0.0646, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.565269947052002, "rewards/margins": 7.364255428314209, "rewards/rejected": -6.798984527587891, "step": 3920 }, { "epoch": 2.0, "learning_rate": 1.8538055764883194e-07, "logits/chosen": -2.4195945262908936, "logits/rejected": -2.467193126678467, "logps/chosen": -264.4360656738281, "logps/rejected": -274.2948913574219, "loss": 0.1307, "rewards/accuracies": 0.925000011920929, "rewards/chosen": 0.0061029670760035515, "rewards/margins": 5.298325061798096, "rewards/rejected": -5.292221546173096, "step": 3930 }, { "epoch": 2.0, "learning_rate": 1.8443858327053502e-07, "logits/chosen": -2.435084819793701, "logits/rejected": -2.4718165397644043, "logps/chosen": -276.6685791015625, "logps/rejected": -309.0755310058594, "loss": 0.0263, "rewards/accuracies": 1.0, "rewards/chosen": 1.3204138278961182, "rewards/margins": 7.539528846740723, "rewards/rejected": -6.219114780426025, "step": 3940 }, { "epoch": 2.01, "learning_rate": 1.8349660889223812e-07, "logits/chosen": -2.4513020515441895, "logits/rejected": -2.452085018157959, "logps/chosen": -250.68972778320312, "logps/rejected": -308.9100036621094, "loss": 0.0161, "rewards/accuracies": 1.0, "rewards/chosen": 0.8725830316543579, "rewards/margins": 7.932334899902344, "rewards/rejected": -7.059751987457275, "step": 3950 }, { "epoch": 2.01, "learning_rate": 1.8255463451394122e-07, "logits/chosen": -2.415900945663452, "logits/rejected": -2.5540549755096436, "logps/chosen": -271.8116149902344, "logps/rejected": -332.2603454589844, "loss": 0.0216, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4665905833244324, "rewards/margins": 7.362532138824463, "rewards/rejected": -6.89594030380249, "step": 3960 }, { "epoch": 2.02, "learning_rate": 1.816126601356443e-07, "logits/chosen": -2.5276520252227783, "logits/rejected": -2.542480945587158, "logps/chosen": -298.09222412109375, "logps/rejected": -355.290283203125, "loss": 0.0175, "rewards/accuracies": 1.0, "rewards/chosen": 1.3401168584823608, "rewards/margins": 8.322530746459961, "rewards/rejected": -6.982413291931152, "step": 3970 }, { "epoch": 2.02, "learning_rate": 1.806706857573474e-07, "logits/chosen": -2.3913216590881348, "logits/rejected": -2.5120017528533936, "logps/chosen": -229.17861938476562, "logps/rejected": -295.5799865722656, "loss": 0.0133, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.28036922216415405, "rewards/margins": 6.928774833679199, "rewards/rejected": -6.648406028747559, "step": 3980 }, { "epoch": 2.03, "learning_rate": 1.797287113790505e-07, "logits/chosen": -2.4164083003997803, "logits/rejected": -2.4683470726013184, "logps/chosen": -250.1259307861328, "logps/rejected": -309.98895263671875, "loss": 0.0261, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.3368276357650757, "rewards/margins": 7.374268531799316, "rewards/rejected": -7.037441253662109, "step": 3990 }, { "epoch": 2.03, "learning_rate": 1.787867370007536e-07, "logits/chosen": -2.391810894012451, "logits/rejected": -2.4127678871154785, "logps/chosen": -282.1521301269531, "logps/rejected": -336.7127685546875, "loss": 0.0173, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.7647604942321777, "rewards/margins": 8.000178337097168, "rewards/rejected": -7.23541784286499, "step": 4000 }, { "epoch": 2.03, "eval_logits/chosen": -2.5494742393493652, "eval_logits/rejected": -2.619675636291504, "eval_logps/chosen": -299.4777526855469, "eval_logps/rejected": -301.586181640625, "eval_loss": 0.6212854385375977, "eval_rewards/accuracies": 0.7309160232543945, "eval_rewards/chosen": -2.122727632522583, "eval_rewards/margins": 2.22237491607666, "eval_rewards/rejected": -4.345102787017822, "eval_runtime": 301.8414, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 4000 }, { "epoch": 2.04, "learning_rate": 1.7784476262245664e-07, "logits/chosen": -2.4948716163635254, "logits/rejected": -2.453728437423706, "logps/chosen": -284.3287353515625, "logps/rejected": -372.3181457519531, "loss": 0.0164, "rewards/accuracies": 1.0, "rewards/chosen": 0.407473087310791, "rewards/margins": 9.088857650756836, "rewards/rejected": -8.681384086608887, "step": 4010 }, { "epoch": 2.04, "learning_rate": 1.7690278824415974e-07, "logits/chosen": -2.4881949424743652, "logits/rejected": -2.5378105640411377, "logps/chosen": -302.18536376953125, "logps/rejected": -335.93182373046875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": 1.3019936084747314, "rewards/margins": 8.437501907348633, "rewards/rejected": -7.1355085372924805, "step": 4020 }, { "epoch": 2.05, "learning_rate": 1.7596081386586284e-07, "logits/chosen": -2.519768238067627, "logits/rejected": -2.4485363960266113, "logps/chosen": -333.52679443359375, "logps/rejected": -379.7672424316406, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.38338369131088257, "rewards/margins": 7.786715030670166, "rewards/rejected": -7.403331756591797, "step": 4030 }, { "epoch": 2.05, "learning_rate": 1.7501883948756594e-07, "logits/chosen": -2.4867990016937256, "logits/rejected": -2.350109577178955, "logps/chosen": -261.5180969238281, "logps/rejected": -335.66876220703125, "loss": 0.0195, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.4131608009338379, "rewards/margins": 8.117756843566895, "rewards/rejected": -7.704596042633057, "step": 4040 }, { "epoch": 2.06, "learning_rate": 1.7407686510926901e-07, "logits/chosen": -2.53373384475708, "logits/rejected": -2.392754077911377, "logps/chosen": -321.0093688964844, "logps/rejected": -367.75335693359375, "loss": 0.0205, "rewards/accuracies": 1.0, "rewards/chosen": 0.7985485196113586, "rewards/margins": 8.87114429473877, "rewards/rejected": -8.072595596313477, "step": 4050 }, { "epoch": 2.07, "learning_rate": 1.7313489073097212e-07, "logits/chosen": -2.4697489738464355, "logits/rejected": -2.491966724395752, "logps/chosen": -282.41363525390625, "logps/rejected": -304.3522033691406, "loss": 0.0162, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5781731605529785, "rewards/margins": 8.477033615112305, "rewards/rejected": -7.89885950088501, "step": 4060 }, { "epoch": 2.07, "learning_rate": 1.721929163526752e-07, "logits/chosen": -2.4429774284362793, "logits/rejected": -2.391498327255249, "logps/chosen": -351.14971923828125, "logps/rejected": -382.9489440917969, "loss": 0.0092, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.589583158493042, "rewards/margins": 8.448725700378418, "rewards/rejected": -7.859143257141113, "step": 4070 }, { "epoch": 2.08, "learning_rate": 1.712509419743783e-07, "logits/chosen": -2.4819400310516357, "logits/rejected": -2.4833054542541504, "logps/chosen": -271.9066467285156, "logps/rejected": -388.4400329589844, "loss": 0.0136, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.5519894957542419, "rewards/margins": 9.0414457321167, "rewards/rejected": -8.489457130432129, "step": 4080 }, { "epoch": 2.08, "learning_rate": 1.7030896759608136e-07, "logits/chosen": -2.5111820697784424, "logits/rejected": -2.470893383026123, "logps/chosen": -298.361572265625, "logps/rejected": -385.54010009765625, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": 0.7081271409988403, "rewards/margins": 9.882740020751953, "rewards/rejected": -9.174612045288086, "step": 4090 }, { "epoch": 2.09, "learning_rate": 1.6936699321778446e-07, "logits/chosen": -2.463622808456421, "logits/rejected": -2.392191171646118, "logps/chosen": -251.0826873779297, "logps/rejected": -327.29534912109375, "loss": 0.0213, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.8025670051574707, "rewards/margins": 8.546449661254883, "rewards/rejected": -7.743882656097412, "step": 4100 }, { "epoch": 2.09, "eval_logits/chosen": -2.5334906578063965, "eval_logits/rejected": -2.602919578552246, "eval_logps/chosen": -302.7117004394531, "eval_logps/rejected": -307.35565185546875, "eval_loss": 0.6529473662376404, "eval_rewards/accuracies": 0.7366412281990051, "eval_rewards/chosen": -2.4461238384246826, "eval_rewards/margins": 2.4759316444396973, "eval_rewards/rejected": -4.922055244445801, "eval_runtime": 296.9611, "eval_samples_per_second": 7.024, "eval_steps_per_second": 0.441, "step": 4100 }, { "epoch": 2.09, "learning_rate": 1.6842501883948756e-07, "logits/chosen": -2.461470365524292, "logits/rejected": -2.4746007919311523, "logps/chosen": -293.7532043457031, "logps/rejected": -337.87152099609375, "loss": 0.0123, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2249172031879425, "rewards/margins": 8.05424976348877, "rewards/rejected": -7.829331874847412, "step": 4110 }, { "epoch": 2.1, "learning_rate": 1.6748304446119067e-07, "logits/chosen": -2.445439577102661, "logits/rejected": -2.4860050678253174, "logps/chosen": -294.78460693359375, "logps/rejected": -345.5479736328125, "loss": 0.0104, "rewards/accuracies": 1.0, "rewards/chosen": 1.4239388704299927, "rewards/margins": 9.9138765335083, "rewards/rejected": -8.489936828613281, "step": 4120 }, { "epoch": 2.1, "learning_rate": 1.665410700828937e-07, "logits/chosen": -2.5708627700805664, "logits/rejected": -2.4310455322265625, "logps/chosen": -300.51318359375, "logps/rejected": -346.686767578125, "loss": 0.009, "rewards/accuracies": 1.0, "rewards/chosen": 0.4349859356880188, "rewards/margins": 8.615959167480469, "rewards/rejected": -8.1809720993042, "step": 4130 }, { "epoch": 2.11, "learning_rate": 1.655990957045968e-07, "logits/chosen": -2.5158300399780273, "logits/rejected": -2.5776991844177246, "logps/chosen": -325.2979431152344, "logps/rejected": -375.5154113769531, "loss": 0.0168, "rewards/accuracies": 1.0, "rewards/chosen": 0.5380330681800842, "rewards/margins": 8.873096466064453, "rewards/rejected": -8.335062980651855, "step": 4140 }, { "epoch": 2.11, "learning_rate": 1.6465712132629991e-07, "logits/chosen": -2.3604540824890137, "logits/rejected": -2.3653512001037598, "logps/chosen": -237.39260864257812, "logps/rejected": -334.53955078125, "loss": 0.0227, "rewards/accuracies": 1.0, "rewards/chosen": -0.6499460935592651, "rewards/margins": 8.810885429382324, "rewards/rejected": -9.460832595825195, "step": 4150 }, { "epoch": 2.12, "learning_rate": 1.6371514694800301e-07, "logits/chosen": -2.449936628341675, "logits/rejected": -2.3841679096221924, "logps/chosen": -249.3923797607422, "logps/rejected": -348.3072814941406, "loss": 0.0198, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.338760644197464, "rewards/margins": 9.519830703735352, "rewards/rejected": -9.181070327758789, "step": 4160 }, { "epoch": 2.12, "learning_rate": 1.6277317256970611e-07, "logits/chosen": -2.406186103820801, "logits/rejected": -2.4443275928497314, "logps/chosen": -297.19384765625, "logps/rejected": -386.3888854980469, "loss": 0.0182, "rewards/accuracies": 1.0, "rewards/chosen": 0.3307214677333832, "rewards/margins": 9.84343147277832, "rewards/rejected": -9.512711524963379, "step": 4170 }, { "epoch": 2.13, "learning_rate": 1.618311981914092e-07, "logits/chosen": -2.3944525718688965, "logits/rejected": -2.2698841094970703, "logps/chosen": -240.02999877929688, "logps/rejected": -300.5351257324219, "loss": 0.0111, "rewards/accuracies": 1.0, "rewards/chosen": -0.23821866512298584, "rewards/margins": 8.546000480651855, "rewards/rejected": -8.784219741821289, "step": 4180 }, { "epoch": 2.13, "learning_rate": 1.6088922381311226e-07, "logits/chosen": -2.34932541847229, "logits/rejected": -2.371035099029541, "logps/chosen": -276.40679931640625, "logps/rejected": -384.0252685546875, "loss": 0.016, "rewards/accuracies": 1.0, "rewards/chosen": 0.10160058736801147, "rewards/margins": 8.838811874389648, "rewards/rejected": -8.737211227416992, "step": 4190 }, { "epoch": 2.14, "learning_rate": 1.5994724943481536e-07, "logits/chosen": -2.461995840072632, "logits/rejected": -2.3588435649871826, "logps/chosen": -251.30361938476562, "logps/rejected": -327.7369079589844, "loss": 0.0149, "rewards/accuracies": 1.0, "rewards/chosen": -0.21784713864326477, "rewards/margins": 9.507335662841797, "rewards/rejected": -9.72518253326416, "step": 4200 }, { "epoch": 2.14, "eval_logits/chosen": -2.527193307876587, "eval_logits/rejected": -2.5937540531158447, "eval_logps/chosen": -308.90386962890625, "eval_logps/rejected": -315.9820861816406, "eval_loss": 0.6933820843696594, "eval_rewards/accuracies": 0.7347328066825867, "eval_rewards/chosen": -3.065340280532837, "eval_rewards/margins": 2.7193539142608643, "eval_rewards/rejected": -5.784693241119385, "eval_runtime": 301.827, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 4200 }, { "epoch": 2.14, "learning_rate": 1.5900527505651846e-07, "logits/chosen": -2.406585216522217, "logits/rejected": -2.409055471420288, "logps/chosen": -314.01141357421875, "logps/rejected": -328.4725646972656, "loss": 0.003, "rewards/accuracies": 1.0, "rewards/chosen": 0.4073919355869293, "rewards/margins": 9.213891983032227, "rewards/rejected": -8.806499481201172, "step": 4210 }, { "epoch": 2.15, "learning_rate": 1.5806330067822154e-07, "logits/chosen": -2.4329631328582764, "logits/rejected": -2.4906868934631348, "logps/chosen": -292.4327392578125, "logps/rejected": -388.5157775878906, "loss": 0.0141, "rewards/accuracies": 1.0, "rewards/chosen": -0.3115505576133728, "rewards/margins": 9.646425247192383, "rewards/rejected": -9.957975387573242, "step": 4220 }, { "epoch": 2.15, "learning_rate": 1.5712132629992464e-07, "logits/chosen": -2.4410560131073, "logits/rejected": -2.573024272918701, "logps/chosen": -300.14385986328125, "logps/rejected": -341.1786193847656, "loss": 0.0214, "rewards/accuracies": 1.0, "rewards/chosen": 0.273735910654068, "rewards/margins": 8.2180814743042, "rewards/rejected": -7.944344997406006, "step": 4230 }, { "epoch": 2.16, "learning_rate": 1.5617935192162774e-07, "logits/chosen": -2.3949074745178223, "logits/rejected": -2.323303699493408, "logps/chosen": -277.2351379394531, "logps/rejected": -318.48736572265625, "loss": 0.0064, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.28990697860717773, "rewards/margins": 9.171941757202148, "rewards/rejected": -9.461848258972168, "step": 4240 }, { "epoch": 2.16, "learning_rate": 1.5523737754333084e-07, "logits/chosen": -2.290977954864502, "logits/rejected": -2.3106536865234375, "logps/chosen": -242.9626922607422, "logps/rejected": -332.17529296875, "loss": 0.018, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.07354114949703217, "rewards/margins": 8.418808937072754, "rewards/rejected": -8.492349624633789, "step": 4250 }, { "epoch": 2.17, "learning_rate": 1.5429540316503389e-07, "logits/chosen": -2.3914854526519775, "logits/rejected": -2.319920778274536, "logps/chosen": -255.0960235595703, "logps/rejected": -335.22113037109375, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.04593334347009659, "rewards/margins": 8.81833267211914, "rewards/rejected": -8.772398948669434, "step": 4260 }, { "epoch": 2.17, "learning_rate": 1.53353428786737e-07, "logits/chosen": -2.3034889698028564, "logits/rejected": -2.378272533416748, "logps/chosen": -281.86004638671875, "logps/rejected": -361.8553771972656, "loss": 0.0157, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 1.1320751905441284, "rewards/margins": 9.508333206176758, "rewards/rejected": -8.376259803771973, "step": 4270 }, { "epoch": 2.18, "learning_rate": 1.524114544084401e-07, "logits/chosen": -2.262380361557007, "logits/rejected": -2.337212085723877, "logps/chosen": -284.8341979980469, "logps/rejected": -381.337158203125, "loss": 0.0131, "rewards/accuracies": 1.0, "rewards/chosen": -0.03602912276983261, "rewards/margins": 9.237323760986328, "rewards/rejected": -9.27335262298584, "step": 4280 }, { "epoch": 2.18, "learning_rate": 1.514694800301432e-07, "logits/chosen": -2.2815897464752197, "logits/rejected": -2.350703716278076, "logps/chosen": -268.76922607421875, "logps/rejected": -363.57623291015625, "loss": 0.0138, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.15438330173492432, "rewards/margins": 9.449551582336426, "rewards/rejected": -9.603934288024902, "step": 4290 }, { "epoch": 2.19, "learning_rate": 1.5052750565184626e-07, "logits/chosen": -2.2737479209899902, "logits/rejected": -2.2863266468048096, "logps/chosen": -278.785888671875, "logps/rejected": -407.51702880859375, "loss": 0.0084, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.15904609858989716, "rewards/margins": 9.742883682250977, "rewards/rejected": -9.90192985534668, "step": 4300 }, { "epoch": 2.19, "eval_logits/chosen": -2.440368890762329, "eval_logits/rejected": -2.508787155151367, "eval_logps/chosen": -310.0954895019531, "eval_logps/rejected": -318.322998046875, "eval_loss": 0.7083070874214172, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -3.1845004558563232, "eval_rewards/margins": 2.8342840671539307, "eval_rewards/rejected": -6.018784523010254, "eval_runtime": 296.8234, "eval_samples_per_second": 7.028, "eval_steps_per_second": 0.441, "step": 4300 }, { "epoch": 2.19, "learning_rate": 1.4958553127354936e-07, "logits/chosen": -2.3321611881256104, "logits/rejected": -2.3306918144226074, "logps/chosen": -286.4504699707031, "logps/rejected": -348.179931640625, "loss": 0.008, "rewards/accuracies": 1.0, "rewards/chosen": -0.29880261421203613, "rewards/margins": 9.495977401733398, "rewards/rejected": -9.794778823852539, "step": 4310 }, { "epoch": 2.2, "learning_rate": 1.4864355689525244e-07, "logits/chosen": -2.307884454727173, "logits/rejected": -2.153669595718384, "logps/chosen": -231.07846069335938, "logps/rejected": -346.56982421875, "loss": 0.0122, "rewards/accuracies": 1.0, "rewards/chosen": 0.09392056614160538, "rewards/margins": 9.005593299865723, "rewards/rejected": -8.911672592163086, "step": 4320 }, { "epoch": 2.2, "learning_rate": 1.4770158251695554e-07, "logits/chosen": -2.318849802017212, "logits/rejected": -2.2451350688934326, "logps/chosen": -252.5889434814453, "logps/rejected": -347.39190673828125, "loss": 0.0171, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5418094992637634, "rewards/margins": 8.427831649780273, "rewards/rejected": -8.969642639160156, "step": 4330 }, { "epoch": 2.21, "learning_rate": 1.467596081386586e-07, "logits/chosen": -2.3386223316192627, "logits/rejected": -2.319746494293213, "logps/chosen": -249.5520477294922, "logps/rejected": -333.26812744140625, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": -0.6450480222702026, "rewards/margins": 8.832240104675293, "rewards/rejected": -9.477288246154785, "step": 4340 }, { "epoch": 2.21, "learning_rate": 1.458176337603617e-07, "logits/chosen": -2.4239277839660645, "logits/rejected": -2.302274227142334, "logps/chosen": -265.55548095703125, "logps/rejected": -356.56787109375, "loss": 0.0209, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4280431866645813, "rewards/margins": 9.597391128540039, "rewards/rejected": -10.025433540344238, "step": 4350 }, { "epoch": 2.22, "learning_rate": 1.448756593820648e-07, "logits/chosen": -2.408919334411621, "logits/rejected": -2.2459819316864014, "logps/chosen": -306.56903076171875, "logps/rejected": -341.38494873046875, "loss": 0.0251, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.19567224383354187, "rewards/margins": 8.941240310668945, "rewards/rejected": -9.136914253234863, "step": 4360 }, { "epoch": 2.22, "learning_rate": 1.439336850037679e-07, "logits/chosen": -2.3950576782226562, "logits/rejected": -2.3486268520355225, "logps/chosen": -286.4931945800781, "logps/rejected": -394.418212890625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.07271413505077362, "rewards/margins": 10.440177917480469, "rewards/rejected": -10.51289176940918, "step": 4370 }, { "epoch": 2.23, "learning_rate": 1.4299171062547096e-07, "logits/chosen": -2.3615708351135254, "logits/rejected": -2.3377792835235596, "logps/chosen": -295.9791564941406, "logps/rejected": -297.25128173828125, "loss": 0.008, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.30061447620391846, "rewards/margins": 8.242612838745117, "rewards/rejected": -8.54322624206543, "step": 4380 }, { "epoch": 2.23, "learning_rate": 1.4204973624717406e-07, "logits/chosen": -2.4010937213897705, "logits/rejected": -2.436049699783325, "logps/chosen": -280.08074951171875, "logps/rejected": -376.03857421875, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8056174516677856, "rewards/margins": 8.615859985351562, "rewards/rejected": -9.421477317810059, "step": 4390 }, { "epoch": 2.24, "learning_rate": 1.4110776186887716e-07, "logits/chosen": -2.3178069591522217, "logits/rejected": -2.3701210021972656, "logps/chosen": -306.66387939453125, "logps/rejected": -313.7434997558594, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.5981841683387756, "rewards/margins": 8.42170238494873, "rewards/rejected": -9.01988697052002, "step": 4400 }, { "epoch": 2.24, "eval_logits/chosen": -2.4479472637176514, "eval_logits/rejected": -2.5108604431152344, "eval_logps/chosen": -312.2333679199219, "eval_logps/rejected": -320.94183349609375, "eval_loss": 0.7192836403846741, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -3.3982906341552734, "eval_rewards/margins": 2.882378101348877, "eval_rewards/rejected": -6.280669212341309, "eval_runtime": 301.8883, "eval_samples_per_second": 6.91, "eval_steps_per_second": 0.434, "step": 4400 }, { "epoch": 2.24, "learning_rate": 1.4016578749058026e-07, "logits/chosen": -2.3137736320495605, "logits/rejected": -2.457977771759033, "logps/chosen": -281.3287658691406, "logps/rejected": -382.7276916503906, "loss": 0.0067, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.436063289642334, "rewards/margins": 10.180338859558105, "rewards/rejected": -10.616401672363281, "step": 4410 }, { "epoch": 2.25, "learning_rate": 1.3922381311228334e-07, "logits/chosen": -2.4059033393859863, "logits/rejected": -2.3955695629119873, "logps/chosen": -285.6089172363281, "logps/rejected": -304.55816650390625, "loss": 0.0149, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.42298993468284607, "rewards/margins": 8.393533706665039, "rewards/rejected": -8.816523551940918, "step": 4420 }, { "epoch": 2.25, "learning_rate": 1.3828183873398644e-07, "logits/chosen": -2.356703042984009, "logits/rejected": -2.3774828910827637, "logps/chosen": -259.9477233886719, "logps/rejected": -343.700927734375, "loss": 0.0127, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.021397648379206657, "rewards/margins": 9.302734375, "rewards/rejected": -9.324131965637207, "step": 4430 }, { "epoch": 2.26, "learning_rate": 1.373398643556895e-07, "logits/chosen": -2.3701252937316895, "logits/rejected": -2.2263360023498535, "logps/chosen": -260.7659912109375, "logps/rejected": -352.81146240234375, "loss": 0.0144, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.3572626113891602, "rewards/margins": 8.490302085876465, "rewards/rejected": -9.847564697265625, "step": 4440 }, { "epoch": 2.26, "learning_rate": 1.363978899773926e-07, "logits/chosen": -2.2707505226135254, "logits/rejected": -2.312922477722168, "logps/chosen": -230.23788452148438, "logps/rejected": -319.2444152832031, "loss": 0.0147, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7442948818206787, "rewards/margins": 9.182458877563477, "rewards/rejected": -9.92675495147705, "step": 4450 }, { "epoch": 2.27, "learning_rate": 1.3545591559909568e-07, "logits/chosen": -2.3916873931884766, "logits/rejected": -2.3660058975219727, "logps/chosen": -301.7834777832031, "logps/rejected": -381.32659912109375, "loss": 0.0333, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.15467780828475952, "rewards/margins": 9.97493839263916, "rewards/rejected": -9.820260047912598, "step": 4460 }, { "epoch": 2.27, "learning_rate": 1.3451394122079879e-07, "logits/chosen": -2.2422537803649902, "logits/rejected": -2.285167694091797, "logps/chosen": -299.6961975097656, "logps/rejected": -340.37255859375, "loss": 0.0195, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5292568802833557, "rewards/margins": 8.242840766906738, "rewards/rejected": -8.77209758758545, "step": 4470 }, { "epoch": 2.28, "learning_rate": 1.3357196684250189e-07, "logits/chosen": -2.368232250213623, "logits/rejected": -2.367140293121338, "logps/chosen": -251.2327880859375, "logps/rejected": -353.19757080078125, "loss": 0.0081, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.07832960784435272, "rewards/margins": 9.432154655456543, "rewards/rejected": -9.353825569152832, "step": 4480 }, { "epoch": 2.28, "learning_rate": 1.3262999246420499e-07, "logits/chosen": -2.3377442359924316, "logits/rejected": -2.3477752208709717, "logps/chosen": -290.51300048828125, "logps/rejected": -340.4662170410156, "loss": 0.0092, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.2261306792497635, "rewards/margins": 9.160150527954102, "rewards/rejected": -8.934020042419434, "step": 4490 }, { "epoch": 2.29, "learning_rate": 1.3168801808590806e-07, "logits/chosen": -2.2825725078582764, "logits/rejected": -2.302478313446045, "logps/chosen": -274.8885803222656, "logps/rejected": -333.46563720703125, "loss": 0.0116, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.43997201323509216, "rewards/margins": 8.661720275878906, "rewards/rejected": -9.101692199707031, "step": 4500 }, { "epoch": 2.29, "eval_logits/chosen": -2.4131815433502197, "eval_logits/rejected": -2.4787368774414062, "eval_logps/chosen": -311.6757507324219, "eval_logps/rejected": -320.0794677734375, "eval_loss": 0.7127760648727417, "eval_rewards/accuracies": 0.7461832165718079, "eval_rewards/chosen": -3.34252667427063, "eval_rewards/margins": 2.8519062995910645, "eval_rewards/rejected": -6.194432735443115, "eval_runtime": 297.542, "eval_samples_per_second": 7.011, "eval_steps_per_second": 0.44, "step": 4500 }, { "epoch": 2.29, "learning_rate": 1.3074604370761113e-07, "logits/chosen": -2.287743091583252, "logits/rejected": -2.327247142791748, "logps/chosen": -287.9313049316406, "logps/rejected": -366.33245849609375, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.5075663328170776, "rewards/margins": 8.495210647583008, "rewards/rejected": -9.002776145935059, "step": 4510 }, { "epoch": 2.3, "learning_rate": 1.2980406932931423e-07, "logits/chosen": -2.205138683319092, "logits/rejected": -2.3599820137023926, "logps/chosen": -263.99774169921875, "logps/rejected": -360.87890625, "loss": 0.0128, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -1.2749487161636353, "rewards/margins": 8.532350540161133, "rewards/rejected": -9.80729866027832, "step": 4520 }, { "epoch": 2.3, "learning_rate": 1.2886209495101734e-07, "logits/chosen": -2.2959253787994385, "logits/rejected": -2.1113479137420654, "logps/chosen": -279.8739318847656, "logps/rejected": -370.27825927734375, "loss": 0.0333, "rewards/accuracies": 1.0, "rewards/chosen": -0.19222494959831238, "rewards/margins": 9.703863143920898, "rewards/rejected": -9.896089553833008, "step": 4530 }, { "epoch": 2.31, "learning_rate": 1.279201205727204e-07, "logits/chosen": -2.155374526977539, "logits/rejected": -2.2465829849243164, "logps/chosen": -243.92562866210938, "logps/rejected": -288.19195556640625, "loss": 0.0115, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7421571016311646, "rewards/margins": 7.734604835510254, "rewards/rejected": -8.476762771606445, "step": 4540 }, { "epoch": 2.31, "learning_rate": 1.269781461944235e-07, "logits/chosen": -2.194904327392578, "logits/rejected": -2.2567036151885986, "logps/chosen": -240.79861450195312, "logps/rejected": -324.8790283203125, "loss": 0.0296, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.22452616691589355, "rewards/margins": 8.310542106628418, "rewards/rejected": -8.53506851196289, "step": 4550 }, { "epoch": 2.32, "learning_rate": 1.260361718161266e-07, "logits/chosen": -2.3649609088897705, "logits/rejected": -2.355762004852295, "logps/chosen": -291.94244384765625, "logps/rejected": -322.20025634765625, "loss": 0.0271, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": 0.1271209418773651, "rewards/margins": 8.356206893920898, "rewards/rejected": -8.229085922241211, "step": 4560 }, { "epoch": 2.32, "learning_rate": 1.2509419743782968e-07, "logits/chosen": -2.241570472717285, "logits/rejected": -2.2535672187805176, "logps/chosen": -311.3485412597656, "logps/rejected": -336.29437255859375, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": 0.06302668154239655, "rewards/margins": 8.881575584411621, "rewards/rejected": -8.818550109863281, "step": 4570 }, { "epoch": 2.33, "learning_rate": 1.2415222305953278e-07, "logits/chosen": -2.225210428237915, "logits/rejected": -2.2509846687316895, "logps/chosen": -288.7023010253906, "logps/rejected": -362.2086181640625, "loss": 0.0433, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.6437242031097412, "rewards/margins": 9.244821548461914, "rewards/rejected": -9.888544082641602, "step": 4580 }, { "epoch": 2.33, "learning_rate": 1.2321024868123586e-07, "logits/chosen": -2.152923822402954, "logits/rejected": -2.3054864406585693, "logps/chosen": -254.2602081298828, "logps/rejected": -339.173095703125, "loss": 0.0086, "rewards/accuracies": 1.0, "rewards/chosen": -0.7262587547302246, "rewards/margins": 9.192373275756836, "rewards/rejected": -9.918632507324219, "step": 4590 }, { "epoch": 2.34, "learning_rate": 1.2226827430293896e-07, "logits/chosen": -2.3011200428009033, "logits/rejected": -2.2208499908447266, "logps/chosen": -275.1327209472656, "logps/rejected": -350.51617431640625, "loss": 0.0077, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2207508087158203, "rewards/margins": 9.613337516784668, "rewards/rejected": -9.834087371826172, "step": 4600 }, { "epoch": 2.34, "eval_logits/chosen": -2.377899169921875, "eval_logits/rejected": -2.4448626041412354, "eval_logps/chosen": -310.5561828613281, "eval_logps/rejected": -319.6102294921875, "eval_loss": 0.7218549847602844, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": -3.2305691242218018, "eval_rewards/margins": 2.916940212249756, "eval_rewards/rejected": -6.147509574890137, "eval_runtime": 301.7525, "eval_samples_per_second": 6.913, "eval_steps_per_second": 0.434, "step": 4600 }, { "epoch": 2.34, "learning_rate": 1.2132629992464206e-07, "logits/chosen": -2.305997848510742, "logits/rejected": -2.269470691680908, "logps/chosen": -296.7271728515625, "logps/rejected": -344.4403991699219, "loss": 0.0099, "rewards/accuracies": 1.0, "rewards/chosen": 0.29369282722473145, "rewards/margins": 9.259397506713867, "rewards/rejected": -8.965703964233398, "step": 4610 }, { "epoch": 2.35, "learning_rate": 1.2038432554634513e-07, "logits/chosen": -2.270097255706787, "logits/rejected": -2.275355577468872, "logps/chosen": -305.75482177734375, "logps/rejected": -370.3743591308594, "loss": 0.017, "rewards/accuracies": 1.0, "rewards/chosen": -0.736997663974762, "rewards/margins": 8.412660598754883, "rewards/rejected": -9.149658203125, "step": 4620 }, { "epoch": 2.36, "learning_rate": 1.1944235116804823e-07, "logits/chosen": -2.1765987873077393, "logits/rejected": -2.2477710247039795, "logps/chosen": -234.5129852294922, "logps/rejected": -297.01812744140625, "loss": 0.0089, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9503382444381714, "rewards/margins": 8.9337158203125, "rewards/rejected": -9.884054183959961, "step": 4630 }, { "epoch": 2.36, "learning_rate": 1.1850037678975132e-07, "logits/chosen": -2.2448458671569824, "logits/rejected": -2.1628146171569824, "logps/chosen": -257.5953674316406, "logps/rejected": -362.9079284667969, "loss": 0.0075, "rewards/accuracies": 1.0, "rewards/chosen": -0.7563697099685669, "rewards/margins": 10.294295310974121, "rewards/rejected": -11.050664901733398, "step": 4640 }, { "epoch": 2.37, "learning_rate": 1.175584024114544e-07, "logits/chosen": -2.159095048904419, "logits/rejected": -2.044412612915039, "logps/chosen": -258.42095947265625, "logps/rejected": -369.79595947265625, "loss": 0.0076, "rewards/accuracies": 1.0, "rewards/chosen": -0.45193544030189514, "rewards/margins": 10.678693771362305, "rewards/rejected": -11.13062858581543, "step": 4650 }, { "epoch": 2.37, "learning_rate": 1.166164280331575e-07, "logits/chosen": -2.229854106903076, "logits/rejected": -2.2149221897125244, "logps/chosen": -253.66671752929688, "logps/rejected": -366.19329833984375, "loss": 0.0128, "rewards/accuracies": 1.0, "rewards/chosen": -0.015896644443273544, "rewards/margins": 10.538129806518555, "rewards/rejected": -10.55402660369873, "step": 4660 }, { "epoch": 2.38, "learning_rate": 1.1567445365486058e-07, "logits/chosen": -2.252213478088379, "logits/rejected": -2.1887106895446777, "logps/chosen": -289.4987487792969, "logps/rejected": -357.27313232421875, "loss": 0.0265, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4829696714878082, "rewards/margins": 9.562679290771484, "rewards/rejected": -10.045648574829102, "step": 4670 }, { "epoch": 2.38, "learning_rate": 1.1473247927656367e-07, "logits/chosen": -2.224421262741089, "logits/rejected": -2.2251248359680176, "logps/chosen": -284.118896484375, "logps/rejected": -349.9617919921875, "loss": 0.0229, "rewards/accuracies": 1.0, "rewards/chosen": 0.6989560127258301, "rewards/margins": 10.372453689575195, "rewards/rejected": -9.673498153686523, "step": 4680 }, { "epoch": 2.39, "learning_rate": 1.1379050489826676e-07, "logits/chosen": -2.155917167663574, "logits/rejected": -2.2038064002990723, "logps/chosen": -300.345458984375, "logps/rejected": -325.0493469238281, "loss": 0.0141, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.3622466027736664, "rewards/margins": 8.947761535644531, "rewards/rejected": -9.310009002685547, "step": 4690 }, { "epoch": 2.39, "learning_rate": 1.1284853051996986e-07, "logits/chosen": -2.1967692375183105, "logits/rejected": -2.1822071075439453, "logps/chosen": -286.33306884765625, "logps/rejected": -323.97869873046875, "loss": 0.0177, "rewards/accuracies": 1.0, "rewards/chosen": 0.10714861005544662, "rewards/margins": 9.238419532775879, "rewards/rejected": -9.131271362304688, "step": 4700 }, { "epoch": 2.39, "eval_logits/chosen": -2.31742000579834, "eval_logits/rejected": -2.3860576152801514, "eval_logps/chosen": -313.7193603515625, "eval_logps/rejected": -323.3455810546875, "eval_loss": 0.7450771331787109, "eval_rewards/accuracies": 0.75, "eval_rewards/chosen": -3.546886444091797, "eval_rewards/margins": 2.974155902862549, "eval_rewards/rejected": -6.521042346954346, "eval_runtime": 297.4748, "eval_samples_per_second": 7.012, "eval_steps_per_second": 0.44, "step": 4700 }, { "epoch": 2.4, "learning_rate": 1.1190655614167293e-07, "logits/chosen": -2.294706344604492, "logits/rejected": -2.340592861175537, "logps/chosen": -260.90826416015625, "logps/rejected": -362.89837646484375, "loss": 0.012, "rewards/accuracies": 1.0, "rewards/chosen": -0.9304442405700684, "rewards/margins": 8.845898628234863, "rewards/rejected": -9.776342391967773, "step": 4710 }, { "epoch": 2.4, "learning_rate": 1.1096458176337603e-07, "logits/chosen": -2.218076467514038, "logits/rejected": -2.146393060684204, "logps/chosen": -288.2243347167969, "logps/rejected": -399.74151611328125, "loss": 0.0059, "rewards/accuracies": 1.0, "rewards/chosen": -0.29963019490242004, "rewards/margins": 9.852890014648438, "rewards/rejected": -10.152520179748535, "step": 4720 }, { "epoch": 2.41, "learning_rate": 1.1002260738507912e-07, "logits/chosen": -2.213052272796631, "logits/rejected": -2.272000551223755, "logps/chosen": -258.693603515625, "logps/rejected": -314.71307373046875, "loss": 0.0034, "rewards/accuracies": 1.0, "rewards/chosen": -0.362382173538208, "rewards/margins": 9.149022102355957, "rewards/rejected": -9.511404037475586, "step": 4730 }, { "epoch": 2.41, "learning_rate": 1.0908063300678221e-07, "logits/chosen": -2.230384349822998, "logits/rejected": -2.141641855239868, "logps/chosen": -285.4044494628906, "logps/rejected": -359.29266357421875, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.2375614643096924, "rewards/margins": 8.541139602661133, "rewards/rejected": -9.77869987487793, "step": 4740 }, { "epoch": 2.42, "learning_rate": 1.081386586284853e-07, "logits/chosen": -2.3513834476470947, "logits/rejected": -2.3596320152282715, "logps/chosen": -266.305908203125, "logps/rejected": -380.3756103515625, "loss": 0.0051, "rewards/accuracies": 1.0, "rewards/chosen": -0.33188745379447937, "rewards/margins": 9.921003341674805, "rewards/rejected": -10.252891540527344, "step": 4750 }, { "epoch": 2.42, "learning_rate": 1.071966842501884e-07, "logits/chosen": -2.2266974449157715, "logits/rejected": -2.2696118354797363, "logps/chosen": -298.6575622558594, "logps/rejected": -355.7513427734375, "loss": 0.0111, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5612926483154297, "rewards/margins": 9.986989974975586, "rewards/rejected": -10.548282623291016, "step": 4760 }, { "epoch": 2.43, "learning_rate": 1.0625470987189147e-07, "logits/chosen": -2.29338002204895, "logits/rejected": -2.3379619121551514, "logps/chosen": -266.6180114746094, "logps/rejected": -322.62994384765625, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.5768706202507019, "rewards/margins": 9.321121215820312, "rewards/rejected": -9.897993087768555, "step": 4770 }, { "epoch": 2.43, "learning_rate": 1.0531273549359457e-07, "logits/chosen": -2.1704368591308594, "logits/rejected": -2.236093044281006, "logps/chosen": -266.098388671875, "logps/rejected": -318.8455505371094, "loss": 0.0137, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9614885449409485, "rewards/margins": 9.26085090637207, "rewards/rejected": -10.222338676452637, "step": 4780 }, { "epoch": 2.44, "learning_rate": 1.0437076111529766e-07, "logits/chosen": -2.2239723205566406, "logits/rejected": -2.147284984588623, "logps/chosen": -307.8612976074219, "logps/rejected": -340.2486572265625, "loss": 0.0284, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.18168917298316956, "rewards/margins": 9.09406566619873, "rewards/rejected": -9.275754928588867, "step": 4790 }, { "epoch": 2.44, "learning_rate": 1.0342878673700074e-07, "logits/chosen": -2.236088991165161, "logits/rejected": -2.2426741123199463, "logps/chosen": -297.51702880859375, "logps/rejected": -378.7740173339844, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.6450713872909546, "rewards/margins": 9.456080436706543, "rewards/rejected": -10.101151466369629, "step": 4800 }, { "epoch": 2.44, "eval_logits/chosen": -2.3242154121398926, "eval_logits/rejected": -2.393857955932617, "eval_logps/chosen": -313.05194091796875, "eval_logps/rejected": -322.5316467285156, "eval_loss": 0.7546879649162292, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -3.4801478385925293, "eval_rewards/margins": 2.959501266479492, "eval_rewards/rejected": -6.439650058746338, "eval_runtime": 301.9744, "eval_samples_per_second": 6.908, "eval_steps_per_second": 0.434, "step": 4800 }, { "epoch": 2.45, "learning_rate": 1.0248681235870383e-07, "logits/chosen": -2.2089924812316895, "logits/rejected": -2.2755725383758545, "logps/chosen": -270.680908203125, "logps/rejected": -341.1857604980469, "loss": 0.0045, "rewards/accuracies": 1.0, "rewards/chosen": -0.691789448261261, "rewards/margins": 8.840271949768066, "rewards/rejected": -9.532060623168945, "step": 4810 }, { "epoch": 2.45, "learning_rate": 1.0154483798040693e-07, "logits/chosen": -2.3454391956329346, "logits/rejected": -2.2591652870178223, "logps/chosen": -321.9779968261719, "logps/rejected": -342.6640930175781, "loss": 0.0223, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.39104217290878296, "rewards/margins": 9.368048667907715, "rewards/rejected": -9.759092330932617, "step": 4820 }, { "epoch": 2.46, "learning_rate": 1.0060286360211002e-07, "logits/chosen": -2.259584665298462, "logits/rejected": -2.161665439605713, "logps/chosen": -286.1450500488281, "logps/rejected": -370.9566650390625, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.1616705358028412, "rewards/margins": 9.993936538696289, "rewards/rejected": -10.155607223510742, "step": 4830 }, { "epoch": 2.46, "learning_rate": 9.96608892238131e-08, "logits/chosen": -2.105440378189087, "logits/rejected": -2.151834487915039, "logps/chosen": -306.2788391113281, "logps/rejected": -394.65850830078125, "loss": 0.0095, "rewards/accuracies": 1.0, "rewards/chosen": -0.5894113779067993, "rewards/margins": 9.950549125671387, "rewards/rejected": -10.539959907531738, "step": 4840 }, { "epoch": 2.47, "learning_rate": 9.87189148455162e-08, "logits/chosen": -2.1507091522216797, "logits/rejected": -2.1006345748901367, "logps/chosen": -300.9240417480469, "logps/rejected": -418.56134033203125, "loss": 0.0167, "rewards/accuracies": 1.0, "rewards/chosen": -1.125248670578003, "rewards/margins": 9.699620246887207, "rewards/rejected": -10.824868202209473, "step": 4850 }, { "epoch": 2.47, "learning_rate": 9.77769404672193e-08, "logits/chosen": -2.087984561920166, "logits/rejected": -2.0346813201904297, "logps/chosen": -281.7013854980469, "logps/rejected": -378.5484619140625, "loss": 0.0133, "rewards/accuracies": 1.0, "rewards/chosen": -0.6053246855735779, "rewards/margins": 9.81657886505127, "rewards/rejected": -10.421903610229492, "step": 4860 }, { "epoch": 2.48, "learning_rate": 9.683496608892237e-08, "logits/chosen": -2.2168502807617188, "logits/rejected": -2.277942419052124, "logps/chosen": -280.87860107421875, "logps/rejected": -340.9623107910156, "loss": 0.0093, "rewards/accuracies": 1.0, "rewards/chosen": -0.9028668403625488, "rewards/margins": 9.776205062866211, "rewards/rejected": -10.679072380065918, "step": 4870 }, { "epoch": 2.48, "learning_rate": 9.589299171062547e-08, "logits/chosen": -2.1402547359466553, "logits/rejected": -2.1672096252441406, "logps/chosen": -326.22125244140625, "logps/rejected": -379.2210998535156, "loss": 0.0066, "rewards/accuracies": 1.0, "rewards/chosen": -0.9656316637992859, "rewards/margins": 9.55207633972168, "rewards/rejected": -10.517707824707031, "step": 4880 }, { "epoch": 2.49, "learning_rate": 9.495101733232856e-08, "logits/chosen": -2.2046916484832764, "logits/rejected": -2.1590428352355957, "logps/chosen": -264.8122253417969, "logps/rejected": -351.7208557128906, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.2652990221977234, "rewards/margins": 10.268514633178711, "rewards/rejected": -10.5338134765625, "step": 4890 }, { "epoch": 2.49, "learning_rate": 9.400904295403164e-08, "logits/chosen": -2.1020796298980713, "logits/rejected": -2.1482295989990234, "logps/chosen": -239.36373901367188, "logps/rejected": -300.88494873046875, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.654992938041687, "rewards/margins": 9.085423469543457, "rewards/rejected": -9.740415573120117, "step": 4900 }, { "epoch": 2.49, "eval_logits/chosen": -2.283444404602051, "eval_logits/rejected": -2.3523569107055664, "eval_logps/chosen": -316.84600830078125, "eval_logps/rejected": -326.6253356933594, "eval_loss": 0.7691048383712769, "eval_rewards/accuracies": 0.7442747950553894, "eval_rewards/chosen": -3.8595566749572754, "eval_rewards/margins": 2.989461660385132, "eval_rewards/rejected": -6.84901762008667, "eval_runtime": 297.4323, "eval_samples_per_second": 7.013, "eval_steps_per_second": 0.44, "step": 4900 }, { "epoch": 2.5, "learning_rate": 9.306706857573473e-08, "logits/chosen": -2.2702176570892334, "logits/rejected": -2.197672128677368, "logps/chosen": -317.44647216796875, "logps/rejected": -370.15740966796875, "loss": 0.0087, "rewards/accuracies": 1.0, "rewards/chosen": -0.3453849256038666, "rewards/margins": 8.828009605407715, "rewards/rejected": -9.173395156860352, "step": 4910 }, { "epoch": 2.5, "learning_rate": 9.212509419743783e-08, "logits/chosen": -2.0067317485809326, "logits/rejected": -2.053682565689087, "logps/chosen": -284.0643005371094, "logps/rejected": -405.646484375, "loss": 0.0426, "rewards/accuracies": 1.0, "rewards/chosen": -1.2845280170440674, "rewards/margins": 10.26252555847168, "rewards/rejected": -11.547052383422852, "step": 4920 }, { "epoch": 2.51, "learning_rate": 9.11831198191409e-08, "logits/chosen": -2.069619655609131, "logits/rejected": -2.014024257659912, "logps/chosen": -285.60797119140625, "logps/rejected": -365.84307861328125, "loss": 0.0115, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7954369783401489, "rewards/margins": 9.284969329833984, "rewards/rejected": -10.08040714263916, "step": 4930 }, { "epoch": 2.51, "learning_rate": 9.0241145440844e-08, "logits/chosen": -2.1276021003723145, "logits/rejected": -2.140718460083008, "logps/chosen": -307.3009033203125, "logps/rejected": -401.25823974609375, "loss": 0.0081, "rewards/accuracies": 1.0, "rewards/chosen": -0.05557692050933838, "rewards/margins": 10.433725357055664, "rewards/rejected": -10.489302635192871, "step": 4940 }, { "epoch": 2.52, "learning_rate": 8.929917106254709e-08, "logits/chosen": -2.2412877082824707, "logits/rejected": -2.149836301803589, "logps/chosen": -265.6690368652344, "logps/rejected": -375.5118713378906, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": -0.8982291221618652, "rewards/margins": 9.667852401733398, "rewards/rejected": -10.566082954406738, "step": 4950 }, { "epoch": 2.52, "learning_rate": 8.835719668425018e-08, "logits/chosen": -2.1044938564300537, "logits/rejected": -2.225058078765869, "logps/chosen": -294.85675048828125, "logps/rejected": -398.77008056640625, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -1.0629851818084717, "rewards/margins": 10.607720375061035, "rewards/rejected": -11.670705795288086, "step": 4960 }, { "epoch": 2.53, "learning_rate": 8.741522230595327e-08, "logits/chosen": -2.183194398880005, "logits/rejected": -2.2053921222686768, "logps/chosen": -323.3440246582031, "logps/rejected": -361.22796630859375, "loss": 0.0148, "rewards/accuracies": 1.0, "rewards/chosen": -0.964472770690918, "rewards/margins": 9.647565841674805, "rewards/rejected": -10.61203670501709, "step": 4970 }, { "epoch": 2.53, "learning_rate": 8.647324792765637e-08, "logits/chosen": -2.1607978343963623, "logits/rejected": -2.1534907817840576, "logps/chosen": -293.7584228515625, "logps/rejected": -361.35565185546875, "loss": 0.0127, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.08604813367128372, "rewards/margins": 8.945721626281738, "rewards/rejected": -9.031770706176758, "step": 4980 }, { "epoch": 2.54, "learning_rate": 8.553127354935944e-08, "logits/chosen": -2.252793073654175, "logits/rejected": -2.175422191619873, "logps/chosen": -258.53887939453125, "logps/rejected": -349.27581787109375, "loss": 0.0114, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.41624441742897034, "rewards/margins": 9.724603652954102, "rewards/rejected": -10.140849113464355, "step": 4990 }, { "epoch": 2.54, "learning_rate": 8.458929917106254e-08, "logits/chosen": -2.075585126876831, "logits/rejected": -2.1678786277770996, "logps/chosen": -291.48297119140625, "logps/rejected": -383.5638732910156, "loss": 0.0118, "rewards/accuracies": 1.0, "rewards/chosen": -0.1401732861995697, "rewards/margins": 11.019479751586914, "rewards/rejected": -11.159653663635254, "step": 5000 }, { "epoch": 2.54, "eval_logits/chosen": -2.2657597064971924, "eval_logits/rejected": -2.3346636295318604, "eval_logps/chosen": -317.1129150390625, "eval_logps/rejected": -326.8659362792969, "eval_loss": 0.7716654539108276, "eval_rewards/accuracies": 0.7461832165718079, "eval_rewards/chosen": -3.8862454891204834, "eval_rewards/margins": 2.986832857131958, "eval_rewards/rejected": -6.873078346252441, "eval_runtime": 301.4436, "eval_samples_per_second": 6.92, "eval_steps_per_second": 0.435, "step": 5000 }, { "epoch": 2.55, "learning_rate": 8.364732479276564e-08, "logits/chosen": -2.0859687328338623, "logits/rejected": -2.1620824337005615, "logps/chosen": -300.7864685058594, "logps/rejected": -342.90472412109375, "loss": 0.0132, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5269465446472168, "rewards/margins": 9.316871643066406, "rewards/rejected": -9.843819618225098, "step": 5010 }, { "epoch": 2.55, "learning_rate": 8.270535041446872e-08, "logits/chosen": -2.2324938774108887, "logits/rejected": -2.171308755874634, "logps/chosen": -308.4803771972656, "logps/rejected": -374.90447998046875, "loss": 0.0178, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.11912157386541367, "rewards/margins": 9.949522018432617, "rewards/rejected": -9.830400466918945, "step": 5020 }, { "epoch": 2.56, "learning_rate": 8.176337603617182e-08, "logits/chosen": -2.222607374191284, "logits/rejected": -2.1958372592926025, "logps/chosen": -328.6293640136719, "logps/rejected": -375.71160888671875, "loss": 0.0127, "rewards/accuracies": 1.0, "rewards/chosen": -0.37949132919311523, "rewards/margins": 9.425061225891113, "rewards/rejected": -9.80455207824707, "step": 5030 }, { "epoch": 2.56, "learning_rate": 8.08214016578749e-08, "logits/chosen": -2.2369132041931152, "logits/rejected": -2.2004668712615967, "logps/chosen": -289.24847412109375, "logps/rejected": -363.97882080078125, "loss": 0.0104, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8536527752876282, "rewards/margins": 8.470075607299805, "rewards/rejected": -9.323729515075684, "step": 5040 }, { "epoch": 2.57, "learning_rate": 7.987942727957798e-08, "logits/chosen": -2.146327495574951, "logits/rejected": -2.2037487030029297, "logps/chosen": -307.6786804199219, "logps/rejected": -360.6956481933594, "loss": 0.0179, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.42890268564224243, "rewards/margins": 9.714688301086426, "rewards/rejected": -10.143590927124023, "step": 5050 }, { "epoch": 2.57, "learning_rate": 7.893745290128108e-08, "logits/chosen": -2.2009823322296143, "logits/rejected": -2.1575918197631836, "logps/chosen": -268.5896911621094, "logps/rejected": -395.19207763671875, "loss": 0.0113, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.4908009469509125, "rewards/margins": 9.655282974243164, "rewards/rejected": -10.146084785461426, "step": 5060 }, { "epoch": 2.58, "learning_rate": 7.799547852298418e-08, "logits/chosen": -2.2042064666748047, "logits/rejected": -2.186056613922119, "logps/chosen": -319.55474853515625, "logps/rejected": -393.94366455078125, "loss": 0.0099, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.20358404517173767, "rewards/margins": 10.148531913757324, "rewards/rejected": -10.352116584777832, "step": 5070 }, { "epoch": 2.58, "learning_rate": 7.705350414468727e-08, "logits/chosen": -2.129512071609497, "logits/rejected": -2.191772937774658, "logps/chosen": -279.49920654296875, "logps/rejected": -366.9100646972656, "loss": 0.0104, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.13818223774433136, "rewards/margins": 9.699727058410645, "rewards/rejected": -9.561545372009277, "step": 5080 }, { "epoch": 2.59, "learning_rate": 7.611152976639035e-08, "logits/chosen": -2.196213960647583, "logits/rejected": -2.2114243507385254, "logps/chosen": -271.9547424316406, "logps/rejected": -360.7081604003906, "loss": 0.0028, "rewards/accuracies": 1.0, "rewards/chosen": -0.4612036347389221, "rewards/margins": 10.259782791137695, "rewards/rejected": -10.720987319946289, "step": 5090 }, { "epoch": 2.59, "learning_rate": 7.516955538809344e-08, "logits/chosen": -2.1923937797546387, "logits/rejected": -2.1321029663085938, "logps/chosen": -259.404296875, "logps/rejected": -338.395751953125, "loss": 0.014, "rewards/accuracies": 1.0, "rewards/chosen": 0.023987997323274612, "rewards/margins": 9.157641410827637, "rewards/rejected": -9.13365364074707, "step": 5100 }, { "epoch": 2.59, "eval_logits/chosen": -2.278256893157959, "eval_logits/rejected": -2.3512167930603027, "eval_logps/chosen": -314.220458984375, "eval_logps/rejected": -324.13348388671875, "eval_loss": 0.7685028910636902, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": -3.5969960689544678, "eval_rewards/margins": 3.0028398036956787, "eval_rewards/rejected": -6.599836349487305, "eval_runtime": 296.8967, "eval_samples_per_second": 7.026, "eval_steps_per_second": 0.441, "step": 5100 }, { "epoch": 2.6, "learning_rate": 7.422758100979654e-08, "logits/chosen": -2.2003679275512695, "logits/rejected": -2.1358842849731445, "logps/chosen": -292.3018798828125, "logps/rejected": -338.69610595703125, "loss": 0.0154, "rewards/accuracies": 1.0, "rewards/chosen": -0.16416962444782257, "rewards/margins": 10.127435684204102, "rewards/rejected": -10.291604995727539, "step": 5110 }, { "epoch": 2.6, "learning_rate": 7.328560663149962e-08, "logits/chosen": -2.1990761756896973, "logits/rejected": -2.248767375946045, "logps/chosen": -293.7164306640625, "logps/rejected": -401.2142028808594, "loss": 0.0106, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5500536561012268, "rewards/margins": 9.808300018310547, "rewards/rejected": -10.358353614807129, "step": 5120 }, { "epoch": 2.61, "learning_rate": 7.234363225320272e-08, "logits/chosen": -2.156942844390869, "logits/rejected": -2.1898016929626465, "logps/chosen": -281.28411865234375, "logps/rejected": -315.0388488769531, "loss": 0.0425, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9101797342300415, "rewards/margins": 8.848825454711914, "rewards/rejected": -9.75900650024414, "step": 5130 }, { "epoch": 2.61, "learning_rate": 7.14016578749058e-08, "logits/chosen": -2.1186816692352295, "logits/rejected": -2.2431023120880127, "logps/chosen": -267.6355285644531, "logps/rejected": -338.63470458984375, "loss": 0.0103, "rewards/accuracies": 1.0, "rewards/chosen": -0.910683810710907, "rewards/margins": 8.7294921875, "rewards/rejected": -9.64017391204834, "step": 5140 }, { "epoch": 2.62, "learning_rate": 7.045968349660889e-08, "logits/chosen": -2.1849942207336426, "logits/rejected": -2.195441484451294, "logps/chosen": -300.42578125, "logps/rejected": -402.8392333984375, "loss": 0.0031, "rewards/accuracies": 1.0, "rewards/chosen": -0.7194881439208984, "rewards/margins": 10.500357627868652, "rewards/rejected": -11.219846725463867, "step": 5150 }, { "epoch": 2.62, "learning_rate": 6.951770911831198e-08, "logits/chosen": -2.2179243564605713, "logits/rejected": -2.1924123764038086, "logps/chosen": -289.130859375, "logps/rejected": -348.76812744140625, "loss": 0.0142, "rewards/accuracies": 1.0, "rewards/chosen": 0.5753608345985413, "rewards/margins": 10.490455627441406, "rewards/rejected": -9.915095329284668, "step": 5160 }, { "epoch": 2.63, "learning_rate": 6.857573474001508e-08, "logits/chosen": -2.194222927093506, "logits/rejected": -2.055819034576416, "logps/chosen": -280.11883544921875, "logps/rejected": -410.5008850097656, "loss": 0.0211, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2896275520324707, "rewards/margins": 10.427669525146484, "rewards/rejected": -10.717297554016113, "step": 5170 }, { "epoch": 2.63, "learning_rate": 6.763376036171815e-08, "logits/chosen": -2.2144243717193604, "logits/rejected": -2.179957866668701, "logps/chosen": -303.1216735839844, "logps/rejected": -402.1628723144531, "loss": 0.0067, "rewards/accuracies": 1.0, "rewards/chosen": -0.1424558013677597, "rewards/margins": 10.109817504882812, "rewards/rejected": -10.25227165222168, "step": 5180 }, { "epoch": 2.64, "learning_rate": 6.669178598342125e-08, "logits/chosen": -2.199436664581299, "logits/rejected": -2.155738353729248, "logps/chosen": -249.98184204101562, "logps/rejected": -326.74127197265625, "loss": 0.0068, "rewards/accuracies": 1.0, "rewards/chosen": -1.0048552751541138, "rewards/margins": 8.819491386413574, "rewards/rejected": -9.824346542358398, "step": 5190 }, { "epoch": 2.64, "learning_rate": 6.574981160512434e-08, "logits/chosen": -2.217965602874756, "logits/rejected": -2.1558725833892822, "logps/chosen": -290.10260009765625, "logps/rejected": -364.9586486816406, "loss": 0.0208, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.88923579454422, "rewards/margins": 8.772710800170898, "rewards/rejected": -9.661947250366211, "step": 5200 }, { "epoch": 2.64, "eval_logits/chosen": -2.31428599357605, "eval_logits/rejected": -2.3874757289886475, "eval_logps/chosen": -317.27935791015625, "eval_logps/rejected": -327.0299377441406, "eval_loss": 0.7740700244903564, "eval_rewards/accuracies": 0.7442747950553894, "eval_rewards/chosen": -3.9028892517089844, "eval_rewards/margins": 2.9865951538085938, "eval_rewards/rejected": -6.889484405517578, "eval_runtime": 301.8194, "eval_samples_per_second": 6.911, "eval_steps_per_second": 0.434, "step": 5200 }, { "epoch": 2.65, "learning_rate": 6.480783722682743e-08, "logits/chosen": -2.21779203414917, "logits/rejected": -2.2501723766326904, "logps/chosen": -277.32733154296875, "logps/rejected": -344.00689697265625, "loss": 0.0166, "rewards/accuracies": 1.0, "rewards/chosen": -0.8932816386222839, "rewards/margins": 8.774493217468262, "rewards/rejected": -9.66777515411377, "step": 5210 }, { "epoch": 2.66, "learning_rate": 6.386586284853051e-08, "logits/chosen": -2.2522101402282715, "logits/rejected": -2.258768081665039, "logps/chosen": -285.1710205078125, "logps/rejected": -361.4646911621094, "loss": 0.0112, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7185908555984497, "rewards/margins": 10.354574203491211, "rewards/rejected": -11.073163986206055, "step": 5220 }, { "epoch": 2.66, "learning_rate": 6.292388847023362e-08, "logits/chosen": -2.171731472015381, "logits/rejected": -2.1915600299835205, "logps/chosen": -300.89727783203125, "logps/rejected": -344.1522216796875, "loss": 0.0089, "rewards/accuracies": 1.0, "rewards/chosen": -0.9044286608695984, "rewards/margins": 8.943483352661133, "rewards/rejected": -9.847909927368164, "step": 5230 }, { "epoch": 2.67, "learning_rate": 6.19819140919367e-08, "logits/chosen": -2.1025567054748535, "logits/rejected": -2.0875024795532227, "logps/chosen": -278.44757080078125, "logps/rejected": -355.54962158203125, "loss": 0.0132, "rewards/accuracies": 1.0, "rewards/chosen": -0.6099380850791931, "rewards/margins": 8.724331855773926, "rewards/rejected": -9.334268569946289, "step": 5240 }, { "epoch": 2.67, "learning_rate": 6.103993971363979e-08, "logits/chosen": -2.236751079559326, "logits/rejected": -2.3104147911071777, "logps/chosen": -258.1316833496094, "logps/rejected": -380.6584167480469, "loss": 0.0041, "rewards/accuracies": 1.0, "rewards/chosen": -0.4985658526420593, "rewards/margins": 9.4805326461792, "rewards/rejected": -9.979098320007324, "step": 5250 }, { "epoch": 2.68, "learning_rate": 6.009796533534288e-08, "logits/chosen": -2.1440768241882324, "logits/rejected": -2.2688815593719482, "logps/chosen": -283.58758544921875, "logps/rejected": -368.4014892578125, "loss": 0.0178, "rewards/accuracies": 1.0, "rewards/chosen": -0.7808700203895569, "rewards/margins": 10.05615234375, "rewards/rejected": -10.837023735046387, "step": 5260 }, { "epoch": 2.68, "learning_rate": 5.9155990957045964e-08, "logits/chosen": -2.1298089027404785, "logits/rejected": -2.155709981918335, "logps/chosen": -315.98492431640625, "logps/rejected": -381.3930969238281, "loss": 0.0806, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.07813958823680878, "rewards/margins": 9.502008438110352, "rewards/rejected": -9.580145835876465, "step": 5270 }, { "epoch": 2.69, "learning_rate": 5.821401657874905e-08, "logits/chosen": -2.322612762451172, "logits/rejected": -2.144012928009033, "logps/chosen": -264.17950439453125, "logps/rejected": -371.1512756347656, "loss": 0.0138, "rewards/accuracies": 1.0, "rewards/chosen": -0.7406362295150757, "rewards/margins": 9.892260551452637, "rewards/rejected": -10.63289737701416, "step": 5280 }, { "epoch": 2.69, "learning_rate": 5.7272042200452145e-08, "logits/chosen": -2.262551784515381, "logits/rejected": -2.162778377532959, "logps/chosen": -235.9697265625, "logps/rejected": -357.1461181640625, "loss": 0.0243, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.732375979423523, "rewards/margins": 9.819782257080078, "rewards/rejected": -10.552157402038574, "step": 5290 }, { "epoch": 2.7, "learning_rate": 5.633006782215523e-08, "logits/chosen": -2.3629612922668457, "logits/rejected": -2.335319995880127, "logps/chosen": -256.4373474121094, "logps/rejected": -355.1130065917969, "loss": 0.0076, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7695462703704834, "rewards/margins": 8.623259544372559, "rewards/rejected": -9.392806053161621, "step": 5300 }, { "epoch": 2.7, "eval_logits/chosen": -2.3592212200164795, "eval_logits/rejected": -2.433117151260376, "eval_logps/chosen": -314.4091796875, "eval_logps/rejected": -323.9353332519531, "eval_loss": 0.7599921822547913, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -3.615870475769043, "eval_rewards/margins": 2.9641480445861816, "eval_rewards/rejected": -6.580018520355225, "eval_runtime": 296.8645, "eval_samples_per_second": 7.027, "eval_steps_per_second": 0.441, "step": 5300 }, { "epoch": 2.7, "learning_rate": 5.538809344385832e-08, "logits/chosen": -2.4587504863739014, "logits/rejected": -2.3089499473571777, "logps/chosen": -298.1319580078125, "logps/rejected": -371.02703857421875, "loss": 0.0098, "rewards/accuracies": 1.0, "rewards/chosen": -0.1819322109222412, "rewards/margins": 9.596673965454102, "rewards/rejected": -9.778606414794922, "step": 5310 }, { "epoch": 2.71, "learning_rate": 5.4446119065561414e-08, "logits/chosen": -2.204026460647583, "logits/rejected": -2.2400882244110107, "logps/chosen": -268.72308349609375, "logps/rejected": -335.87261962890625, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2533165216445923, "rewards/margins": 9.080385208129883, "rewards/rejected": -9.333700180053711, "step": 5320 }, { "epoch": 2.71, "learning_rate": 5.35041446872645e-08, "logits/chosen": -2.340484142303467, "logits/rejected": -2.1539273262023926, "logps/chosen": -260.55938720703125, "logps/rejected": -390.09027099609375, "loss": 0.0163, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.2474168837070465, "rewards/margins": 10.559521675109863, "rewards/rejected": -10.806938171386719, "step": 5330 }, { "epoch": 2.72, "learning_rate": 5.256217030896759e-08, "logits/chosen": -2.0718276500701904, "logits/rejected": -2.1071338653564453, "logps/chosen": -266.92828369140625, "logps/rejected": -395.7525939941406, "loss": 0.012, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5647242665290833, "rewards/margins": 10.194375991821289, "rewards/rejected": -10.759099960327148, "step": 5340 }, { "epoch": 2.72, "learning_rate": 5.162019593067068e-08, "logits/chosen": -2.312995433807373, "logits/rejected": -2.388847827911377, "logps/chosen": -292.45361328125, "logps/rejected": -374.68096923828125, "loss": 0.0086, "rewards/accuracies": 0.987500011920929, "rewards/chosen": 0.17983612418174744, "rewards/margins": 10.592061996459961, "rewards/rejected": -10.412225723266602, "step": 5350 }, { "epoch": 2.73, "learning_rate": 5.067822155237377e-08, "logits/chosen": -2.236600637435913, "logits/rejected": -2.2955946922302246, "logps/chosen": -262.1943664550781, "logps/rejected": -310.1500549316406, "loss": 0.0231, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8180893063545227, "rewards/margins": 8.33509349822998, "rewards/rejected": -9.15318489074707, "step": 5360 }, { "epoch": 2.73, "learning_rate": 4.973624717407686e-08, "logits/chosen": -2.3075735569000244, "logits/rejected": -2.3322246074676514, "logps/chosen": -238.5531768798828, "logps/rejected": -295.55279541015625, "loss": 0.0072, "rewards/accuracies": 1.0, "rewards/chosen": -0.7571172714233398, "rewards/margins": 8.686990737915039, "rewards/rejected": -9.444108963012695, "step": 5370 }, { "epoch": 2.74, "learning_rate": 4.879427279577995e-08, "logits/chosen": -2.305372953414917, "logits/rejected": -2.2694144248962402, "logps/chosen": -237.29641723632812, "logps/rejected": -319.9894104003906, "loss": 0.0129, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.9666566848754883, "rewards/margins": 8.84016227722168, "rewards/rejected": -9.806818008422852, "step": 5380 }, { "epoch": 2.74, "learning_rate": 4.785229841748304e-08, "logits/chosen": -2.3525428771972656, "logits/rejected": -2.3090224266052246, "logps/chosen": -306.0455627441406, "logps/rejected": -409.92120361328125, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": 0.3530218005180359, "rewards/margins": 10.933697700500488, "rewards/rejected": -10.58067512512207, "step": 5390 }, { "epoch": 2.75, "learning_rate": 4.691032403918613e-08, "logits/chosen": -2.281357526779175, "logits/rejected": -2.237678050994873, "logps/chosen": -251.3409881591797, "logps/rejected": -310.4827575683594, "loss": 0.0146, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.8187946081161499, "rewards/margins": 8.709222793579102, "rewards/rejected": -9.528017044067383, "step": 5400 }, { "epoch": 2.75, "eval_logits/chosen": -2.3750522136688232, "eval_logits/rejected": -2.447535753250122, "eval_logps/chosen": -315.9074401855469, "eval_logps/rejected": -326.6905212402344, "eval_loss": 0.7768276333808899, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -3.765695333480835, "eval_rewards/margins": 3.089841365814209, "eval_rewards/rejected": -6.855537414550781, "eval_runtime": 301.6538, "eval_samples_per_second": 6.915, "eval_steps_per_second": 0.434, "step": 5400 }, { "epoch": 2.75, "learning_rate": 4.596834966088922e-08, "logits/chosen": -2.2270708084106445, "logits/rejected": -2.240201473236084, "logps/chosen": -259.4315490722656, "logps/rejected": -358.59698486328125, "loss": 0.0106, "rewards/accuracies": 1.0, "rewards/chosen": -0.9431527256965637, "rewards/margins": 9.088873863220215, "rewards/rejected": -10.032026290893555, "step": 5410 }, { "epoch": 2.76, "learning_rate": 4.5026375282592306e-08, "logits/chosen": -2.4417529106140137, "logits/rejected": -2.3347702026367188, "logps/chosen": -303.779296875, "logps/rejected": -370.93328857421875, "loss": 0.009, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6730049848556519, "rewards/margins": 9.935312271118164, "rewards/rejected": -10.608317375183105, "step": 5420 }, { "epoch": 2.76, "learning_rate": 4.40844009042954e-08, "logits/chosen": -2.205430507659912, "logits/rejected": -2.3156282901763916, "logps/chosen": -245.5878448486328, "logps/rejected": -319.97259521484375, "loss": 0.0176, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7904682159423828, "rewards/margins": 9.259397506713867, "rewards/rejected": -10.049864768981934, "step": 5430 }, { "epoch": 2.77, "learning_rate": 4.314242652599849e-08, "logits/chosen": -2.3045692443847656, "logits/rejected": -2.176016330718994, "logps/chosen": -308.97247314453125, "logps/rejected": -334.92950439453125, "loss": 0.0055, "rewards/accuracies": 1.0, "rewards/chosen": -0.09693922102451324, "rewards/margins": 9.563849449157715, "rewards/rejected": -9.660788536071777, "step": 5440 }, { "epoch": 2.77, "learning_rate": 4.2200452147701575e-08, "logits/chosen": -2.167794704437256, "logits/rejected": -2.215398073196411, "logps/chosen": -295.13031005859375, "logps/rejected": -371.29888916015625, "loss": 0.016, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7187148928642273, "rewards/margins": 9.774347305297852, "rewards/rejected": -10.493062973022461, "step": 5450 }, { "epoch": 2.78, "learning_rate": 4.1258477769404675e-08, "logits/chosen": -2.213601589202881, "logits/rejected": -2.3222367763519287, "logps/chosen": -304.4975280761719, "logps/rejected": -384.44464111328125, "loss": 0.0079, "rewards/accuracies": 1.0, "rewards/chosen": 0.047443341463804245, "rewards/margins": 11.246149063110352, "rewards/rejected": -11.198705673217773, "step": 5460 }, { "epoch": 2.78, "learning_rate": 4.031650339110776e-08, "logits/chosen": -2.243669033050537, "logits/rejected": -2.140501022338867, "logps/chosen": -241.9927520751953, "logps/rejected": -313.94708251953125, "loss": 0.0211, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.9616853594779968, "rewards/margins": 8.771138191223145, "rewards/rejected": -9.732824325561523, "step": 5470 }, { "epoch": 2.79, "learning_rate": 3.9374529012810856e-08, "logits/chosen": -2.2058587074279785, "logits/rejected": -2.1941728591918945, "logps/chosen": -274.68304443359375, "logps/rejected": -333.4448547363281, "loss": 0.0191, "rewards/accuracies": 1.0, "rewards/chosen": -1.1749818325042725, "rewards/margins": 9.428367614746094, "rewards/rejected": -10.603350639343262, "step": 5480 }, { "epoch": 2.79, "learning_rate": 3.8432554634513943e-08, "logits/chosen": -2.2903943061828613, "logits/rejected": -2.271043300628662, "logps/chosen": -298.74688720703125, "logps/rejected": -383.7245178222656, "loss": 0.021, "rewards/accuracies": 1.0, "rewards/chosen": -0.09447214752435684, "rewards/margins": 10.52599811553955, "rewards/rejected": -10.620469093322754, "step": 5490 }, { "epoch": 2.8, "learning_rate": 3.749058025621703e-08, "logits/chosen": -2.282285690307617, "logits/rejected": -2.341499090194702, "logps/chosen": -267.6167297363281, "logps/rejected": -344.1416320800781, "loss": 0.0161, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.8351944088935852, "rewards/margins": 9.350050926208496, "rewards/rejected": -10.1852445602417, "step": 5500 }, { "epoch": 2.8, "eval_logits/chosen": -2.3620362281799316, "eval_logits/rejected": -2.433206558227539, "eval_logps/chosen": -317.4207763671875, "eval_logps/rejected": -328.7700500488281, "eval_loss": 0.7901568412780762, "eval_rewards/accuracies": 0.7480915784835815, "eval_rewards/chosen": -3.91702938079834, "eval_rewards/margins": 3.1464624404907227, "eval_rewards/rejected": -7.0634918212890625, "eval_runtime": 297.0888, "eval_samples_per_second": 7.021, "eval_steps_per_second": 0.441, "step": 5500 }, { "epoch": 2.8, "learning_rate": 3.6548605877920125e-08, "logits/chosen": -2.2243800163269043, "logits/rejected": -2.2160937786102295, "logps/chosen": -292.40545654296875, "logps/rejected": -356.87054443359375, "loss": 0.0155, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.35705074667930603, "rewards/margins": 10.191495895385742, "rewards/rejected": -10.548547744750977, "step": 5510 }, { "epoch": 2.81, "learning_rate": 3.560663149962321e-08, "logits/chosen": -2.180793046951294, "logits/rejected": -2.1247339248657227, "logps/chosen": -276.05218505859375, "logps/rejected": -372.5242614746094, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -0.866191029548645, "rewards/margins": 9.306673049926758, "rewards/rejected": -10.172863006591797, "step": 5520 }, { "epoch": 2.81, "learning_rate": 3.46646571213263e-08, "logits/chosen": -2.189734935760498, "logits/rejected": -2.2070634365081787, "logps/chosen": -317.32305908203125, "logps/rejected": -377.46575927734375, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.114651083946228, "rewards/margins": 9.642301559448242, "rewards/rejected": -10.756954193115234, "step": 5530 }, { "epoch": 2.82, "learning_rate": 3.372268274302939e-08, "logits/chosen": -2.2536730766296387, "logits/rejected": -2.286621570587158, "logps/chosen": -284.72882080078125, "logps/rejected": -376.8185119628906, "loss": 0.0071, "rewards/accuracies": 1.0, "rewards/chosen": -0.5705639123916626, "rewards/margins": 10.194475173950195, "rewards/rejected": -10.765039443969727, "step": 5540 }, { "epoch": 2.82, "learning_rate": 3.278070836473248e-08, "logits/chosen": -2.196479320526123, "logits/rejected": -2.2336013317108154, "logps/chosen": -239.5353546142578, "logps/rejected": -358.18426513671875, "loss": 0.0159, "rewards/accuracies": 1.0, "rewards/chosen": -0.5057913064956665, "rewards/margins": 9.998066902160645, "rewards/rejected": -10.50385856628418, "step": 5550 }, { "epoch": 2.83, "learning_rate": 3.183873398643557e-08, "logits/chosen": -2.253528118133545, "logits/rejected": -2.2258002758026123, "logps/chosen": -307.4124755859375, "logps/rejected": -409.0328369140625, "loss": 0.015, "rewards/accuracies": 1.0, "rewards/chosen": -0.3856652081012726, "rewards/margins": 10.914469718933105, "rewards/rejected": -11.300134658813477, "step": 5560 }, { "epoch": 2.83, "learning_rate": 3.089675960813866e-08, "logits/chosen": -2.326773166656494, "logits/rejected": -2.3797287940979004, "logps/chosen": -290.23492431640625, "logps/rejected": -325.58428955078125, "loss": 0.0089, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4582270085811615, "rewards/margins": 9.405486106872559, "rewards/rejected": -9.863713264465332, "step": 5570 }, { "epoch": 2.84, "learning_rate": 2.995478522984175e-08, "logits/chosen": -2.2719123363494873, "logits/rejected": -2.116914987564087, "logps/chosen": -295.5330810546875, "logps/rejected": -360.0628967285156, "loss": 0.0042, "rewards/accuracies": 1.0, "rewards/chosen": -0.7894667387008667, "rewards/margins": 9.751416206359863, "rewards/rejected": -10.54088306427002, "step": 5580 }, { "epoch": 2.84, "learning_rate": 2.9012810851544836e-08, "logits/chosen": -2.2664589881896973, "logits/rejected": -2.322875499725342, "logps/chosen": -259.9700012207031, "logps/rejected": -339.7001647949219, "loss": 0.0156, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.6178351640701294, "rewards/margins": 9.064801216125488, "rewards/rejected": -9.682635307312012, "step": 5590 }, { "epoch": 2.85, "learning_rate": 2.8070836473247926e-08, "logits/chosen": -2.291879177093506, "logits/rejected": -2.1940712928771973, "logps/chosen": -242.53451538085938, "logps/rejected": -346.9772033691406, "loss": 0.0056, "rewards/accuracies": 1.0, "rewards/chosen": -0.4267337918281555, "rewards/margins": 10.6320161819458, "rewards/rejected": -11.058749198913574, "step": 5600 }, { "epoch": 2.85, "eval_logits/chosen": -2.3599491119384766, "eval_logits/rejected": -2.4313082695007324, "eval_logps/chosen": -317.76324462890625, "eval_logps/rejected": -328.8216552734375, "eval_loss": 0.782730758190155, "eval_rewards/accuracies": 0.7423664331436157, "eval_rewards/chosen": -3.951274871826172, "eval_rewards/margins": 3.117375373840332, "eval_rewards/rejected": -7.068650245666504, "eval_runtime": 301.8036, "eval_samples_per_second": 6.912, "eval_steps_per_second": 0.434, "step": 5600 }, { "epoch": 2.85, "learning_rate": 2.7128862094951014e-08, "logits/chosen": -2.20296573638916, "logits/rejected": -2.3706116676330566, "logps/chosen": -271.8478088378906, "logps/rejected": -355.76934814453125, "loss": 0.0257, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.4428713917732239, "rewards/margins": 9.450614929199219, "rewards/rejected": -9.893485069274902, "step": 5610 }, { "epoch": 2.86, "learning_rate": 2.6186887716654104e-08, "logits/chosen": -2.257016181945801, "logits/rejected": -2.2679696083068848, "logps/chosen": -274.9690856933594, "logps/rejected": -358.05609130859375, "loss": 0.0189, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.6042969822883606, "rewards/margins": 9.57944107055664, "rewards/rejected": -10.183736801147461, "step": 5620 }, { "epoch": 2.86, "learning_rate": 2.5244913338357195e-08, "logits/chosen": -2.234752893447876, "logits/rejected": -2.2068352699279785, "logps/chosen": -286.51171875, "logps/rejected": -344.405517578125, "loss": 0.0123, "rewards/accuracies": 1.0, "rewards/chosen": -0.32843226194381714, "rewards/margins": 9.54783821105957, "rewards/rejected": -9.876269340515137, "step": 5630 }, { "epoch": 2.87, "learning_rate": 2.4302938960060285e-08, "logits/chosen": -2.1969046592712402, "logits/rejected": -2.1973648071289062, "logps/chosen": -300.8368835449219, "logps/rejected": -396.38775634765625, "loss": 0.0047, "rewards/accuracies": 1.0, "rewards/chosen": -0.2708401381969452, "rewards/margins": 9.721776008605957, "rewards/rejected": -9.992616653442383, "step": 5640 }, { "epoch": 2.87, "learning_rate": 2.3360964581763373e-08, "logits/chosen": -2.2528560161590576, "logits/rejected": -2.3048038482666016, "logps/chosen": -299.9004211425781, "logps/rejected": -379.5865783691406, "loss": 0.0146, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -0.9191215634346008, "rewards/margins": 9.807453155517578, "rewards/rejected": -10.72657585144043, "step": 5650 }, { "epoch": 2.88, "learning_rate": 2.2418990203466463e-08, "logits/chosen": -2.2575693130493164, "logits/rejected": -2.1442599296569824, "logps/chosen": -276.9645690917969, "logps/rejected": -345.7447814941406, "loss": 0.0174, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.8034262657165527, "rewards/margins": 9.3135404586792, "rewards/rejected": -10.11696720123291, "step": 5660 }, { "epoch": 2.88, "learning_rate": 2.1477015825169554e-08, "logits/chosen": -2.3129067420959473, "logits/rejected": -2.185722827911377, "logps/chosen": -285.9835510253906, "logps/rejected": -367.640380859375, "loss": 0.0281, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.37922757863998413, "rewards/margins": 10.11483383178711, "rewards/rejected": -10.494062423706055, "step": 5670 }, { "epoch": 2.89, "learning_rate": 2.053504144687264e-08, "logits/chosen": -2.2857284545898438, "logits/rejected": -2.3002820014953613, "logps/chosen": -289.5118103027344, "logps/rejected": -368.2225646972656, "loss": 0.0082, "rewards/accuracies": 1.0, "rewards/chosen": -0.49899688363075256, "rewards/margins": 10.350085258483887, "rewards/rejected": -10.84908390045166, "step": 5680 }, { "epoch": 2.89, "learning_rate": 1.9593067068575735e-08, "logits/chosen": -2.275081157684326, "logits/rejected": -2.1859753131866455, "logps/chosen": -254.21267700195312, "logps/rejected": -303.00335693359375, "loss": 0.014, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.5944699048995972, "rewards/margins": 8.801831245422363, "rewards/rejected": -9.396299362182617, "step": 5690 }, { "epoch": 2.9, "learning_rate": 1.8651092690278825e-08, "logits/chosen": -2.1943910121917725, "logits/rejected": -2.2771873474121094, "logps/chosen": -303.30877685546875, "logps/rejected": -356.7920227050781, "loss": 0.0083, "rewards/accuracies": 1.0, "rewards/chosen": -0.005204296205192804, "rewards/margins": 10.570098876953125, "rewards/rejected": -10.575301170349121, "step": 5700 }, { "epoch": 2.9, "eval_logits/chosen": -2.359806537628174, "eval_logits/rejected": -2.4324193000793457, "eval_logps/chosen": -317.05596923828125, "eval_logps/rejected": -327.8432312011719, "eval_loss": 0.7741116285324097, "eval_rewards/accuracies": 0.7442747950553894, "eval_rewards/chosen": -3.8805482387542725, "eval_rewards/margins": 3.090261220932007, "eval_rewards/rejected": -6.970809459686279, "eval_runtime": 296.9644, "eval_samples_per_second": 7.024, "eval_steps_per_second": 0.441, "step": 5700 }, { "epoch": 2.9, "learning_rate": 1.7709118311981916e-08, "logits/chosen": -2.2069544792175293, "logits/rejected": -2.2963550090789795, "logps/chosen": -285.5211486816406, "logps/rejected": -383.9078674316406, "loss": 0.0135, "rewards/accuracies": 1.0, "rewards/chosen": -1.3077685832977295, "rewards/margins": 10.186443328857422, "rewards/rejected": -11.49421215057373, "step": 5710 }, { "epoch": 2.91, "learning_rate": 1.6767143933685003e-08, "logits/chosen": -2.2039952278137207, "logits/rejected": -2.217965841293335, "logps/chosen": -280.9574890136719, "logps/rejected": -343.5159912109375, "loss": 0.0162, "rewards/accuracies": 1.0, "rewards/chosen": -0.5334898829460144, "rewards/margins": 9.367307662963867, "rewards/rejected": -9.900795936584473, "step": 5720 }, { "epoch": 2.91, "learning_rate": 1.5825169555388094e-08, "logits/chosen": -2.276930332183838, "logits/rejected": -2.289088249206543, "logps/chosen": -318.0512390136719, "logps/rejected": -363.48199462890625, "loss": 0.011, "rewards/accuracies": 1.0, "rewards/chosen": -0.5276788473129272, "rewards/margins": 9.740002632141113, "rewards/rejected": -10.267681121826172, "step": 5730 }, { "epoch": 2.92, "learning_rate": 1.4883195177091183e-08, "logits/chosen": -2.290330410003662, "logits/rejected": -2.239116907119751, "logps/chosen": -305.8013916015625, "logps/rejected": -340.6756896972656, "loss": 0.0085, "rewards/accuracies": 1.0, "rewards/chosen": -0.6274837851524353, "rewards/margins": 9.25733757019043, "rewards/rejected": -9.884819984436035, "step": 5740 }, { "epoch": 2.92, "learning_rate": 1.3941220798794272e-08, "logits/chosen": -2.2608349323272705, "logits/rejected": -2.174391984939575, "logps/chosen": -250.30111694335938, "logps/rejected": -341.62518310546875, "loss": 0.0112, "rewards/accuracies": 1.0, "rewards/chosen": -0.7482810616493225, "rewards/margins": 9.675050735473633, "rewards/rejected": -10.423332214355469, "step": 5750 }, { "epoch": 2.93, "learning_rate": 1.299924642049736e-08, "logits/chosen": -2.2983784675598145, "logits/rejected": -2.2714810371398926, "logps/chosen": -266.9122009277344, "logps/rejected": -360.6929931640625, "loss": 0.0163, "rewards/accuracies": 1.0, "rewards/chosen": -0.6003319621086121, "rewards/margins": 9.648820877075195, "rewards/rejected": -10.249155044555664, "step": 5760 }, { "epoch": 2.93, "learning_rate": 1.2057272042200451e-08, "logits/chosen": -2.3055496215820312, "logits/rejected": -2.239773750305176, "logps/chosen": -259.98968505859375, "logps/rejected": -362.4081115722656, "loss": 0.0065, "rewards/accuracies": 1.0, "rewards/chosen": -0.32147616147994995, "rewards/margins": 10.13626480102539, "rewards/rejected": -10.457742691040039, "step": 5770 }, { "epoch": 2.94, "learning_rate": 1.111529766390354e-08, "logits/chosen": -2.3338782787323, "logits/rejected": -2.3960072994232178, "logps/chosen": -288.9483337402344, "logps/rejected": -317.8904724121094, "loss": 0.0064, "rewards/accuracies": 1.0, "rewards/chosen": -0.20854076743125916, "rewards/margins": 9.097440719604492, "rewards/rejected": -9.305981636047363, "step": 5780 }, { "epoch": 2.95, "learning_rate": 1.0173323285606632e-08, "logits/chosen": -2.225872755050659, "logits/rejected": -2.1147029399871826, "logps/chosen": -270.2624816894531, "logps/rejected": -389.66741943359375, "loss": 0.0197, "rewards/accuracies": 0.9624999761581421, "rewards/chosen": -1.602431058883667, "rewards/margins": 9.71196460723877, "rewards/rejected": -11.314395904541016, "step": 5790 }, { "epoch": 2.95, "learning_rate": 9.231348907309721e-09, "logits/chosen": -2.22969913482666, "logits/rejected": -2.1869258880615234, "logps/chosen": -280.09869384765625, "logps/rejected": -333.8819885253906, "loss": 0.0243, "rewards/accuracies": 1.0, "rewards/chosen": -0.891821026802063, "rewards/margins": 8.560599327087402, "rewards/rejected": -9.452421188354492, "step": 5800 }, { "epoch": 2.95, "eval_logits/chosen": -2.3620197772979736, "eval_logits/rejected": -2.4354655742645264, "eval_logps/chosen": -316.4267578125, "eval_logps/rejected": -327.04864501953125, "eval_loss": 0.7656615376472473, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -3.8176305294036865, "eval_rewards/margins": 3.0737171173095703, "eval_rewards/rejected": -6.891347408294678, "eval_runtime": 301.7518, "eval_samples_per_second": 6.913, "eval_steps_per_second": 0.434, "step": 5800 }, { "epoch": 2.96, "learning_rate": 8.289374529012812e-09, "logits/chosen": -2.155146598815918, "logits/rejected": -2.232573986053467, "logps/chosen": -253.884765625, "logps/rejected": -316.1662292480469, "loss": 0.0089, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -0.4325670599937439, "rewards/margins": 9.85391902923584, "rewards/rejected": -10.28648567199707, "step": 5810 }, { "epoch": 2.96, "learning_rate": 7.3474001507159e-09, "logits/chosen": -2.3524250984191895, "logits/rejected": -2.2284231185913086, "logps/chosen": -325.3186340332031, "logps/rejected": -358.0306701660156, "loss": 0.0103, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.22089111804962158, "rewards/margins": 9.495137214660645, "rewards/rejected": -9.716028213500977, "step": 5820 }, { "epoch": 2.97, "learning_rate": 6.4054257724189895e-09, "logits/chosen": -2.2638158798217773, "logits/rejected": -2.269150972366333, "logps/chosen": -270.5865478515625, "logps/rejected": -370.8131408691406, "loss": 0.0156, "rewards/accuracies": 1.0, "rewards/chosen": -0.4692980647087097, "rewards/margins": 9.660306930541992, "rewards/rejected": -10.129603385925293, "step": 5830 }, { "epoch": 2.97, "learning_rate": 5.463451394122079e-09, "logits/chosen": -2.2793831825256348, "logits/rejected": -2.255838632583618, "logps/chosen": -314.7529296875, "logps/rejected": -368.70123291015625, "loss": 0.0251, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.5236554741859436, "rewards/margins": 9.78014850616455, "rewards/rejected": -10.303804397583008, "step": 5840 }, { "epoch": 2.98, "learning_rate": 4.52147701582517e-09, "logits/chosen": -2.2641196250915527, "logits/rejected": -2.307553768157959, "logps/chosen": -280.8160400390625, "logps/rejected": -338.44342041015625, "loss": 0.0151, "rewards/accuracies": 0.9750000238418579, "rewards/chosen": -1.047176718711853, "rewards/margins": 9.032100677490234, "rewards/rejected": -10.079277038574219, "step": 5850 }, { "epoch": 2.98, "learning_rate": 3.579502637528259e-09, "logits/chosen": -2.2263641357421875, "logits/rejected": -2.079395294189453, "logps/chosen": -266.8037109375, "logps/rejected": -345.18890380859375, "loss": 0.0102, "rewards/accuracies": 1.0, "rewards/chosen": -1.0268042087554932, "rewards/margins": 9.61182689666748, "rewards/rejected": -10.638631820678711, "step": 5860 }, { "epoch": 2.99, "learning_rate": 2.6375282592313484e-09, "logits/chosen": -2.2066586017608643, "logits/rejected": -2.21642804145813, "logps/chosen": -331.57293701171875, "logps/rejected": -358.3147277832031, "loss": 0.0062, "rewards/accuracies": 1.0, "rewards/chosen": -0.113747239112854, "rewards/margins": 11.221342086791992, "rewards/rejected": -11.335088729858398, "step": 5870 }, { "epoch": 2.99, "learning_rate": 1.6955538809344383e-09, "logits/chosen": -2.4038608074188232, "logits/rejected": -2.319794178009033, "logps/chosen": -329.29656982421875, "logps/rejected": -378.1693115234375, "loss": 0.0087, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.025945544242858887, "rewards/margins": 9.82371997833252, "rewards/rejected": -9.849664688110352, "step": 5880 }, { "epoch": 3.0, "learning_rate": 7.535795026375282e-10, "logits/chosen": -2.3141093254089355, "logits/rejected": -2.212184190750122, "logps/chosen": -255.28628540039062, "logps/rejected": -306.7519226074219, "loss": 0.0159, "rewards/accuracies": 0.987500011920929, "rewards/chosen": -0.7222930192947388, "rewards/margins": 8.908611297607422, "rewards/rejected": -9.630905151367188, "step": 5890 }, { "epoch": 3.0, "step": 5898, "total_flos": 0.0, "train_loss": 0.22745071912974513, "train_runtime": 76961.4992, "train_samples_per_second": 2.452, "train_steps_per_second": 0.077 } ], "logging_steps": 10, "max_steps": 5898, "num_train_epochs": 3, "save_steps": 500, "total_flos": 0.0, "trial_name": null, "trial_params": null }