|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.9959514170040484, |
|
"eval_steps": 500, |
|
"global_step": 1110, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01349527665317139, |
|
"grad_norm": 85.5, |
|
"learning_rate": 2.2522522522522524e-07, |
|
"logits/chosen": -1.500240683555603, |
|
"logits/rejected": -1.5190627574920654, |
|
"logps/chosen": -159.05484008789062, |
|
"logps/rejected": -164.59542846679688, |
|
"loss": 0.6946, |
|
"rewards/accuracies": 0.3499999940395355, |
|
"rewards/chosen": 0.006750366650521755, |
|
"rewards/margins": -0.002313111675903201, |
|
"rewards/rejected": 0.0090634785592556, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02699055330634278, |
|
"grad_norm": 92.5, |
|
"learning_rate": 4.504504504504505e-07, |
|
"logits/chosen": -1.4508098363876343, |
|
"logits/rejected": -1.4352288246154785, |
|
"logps/chosen": -141.31773376464844, |
|
"logps/rejected": -167.95175170898438, |
|
"loss": 0.7035, |
|
"rewards/accuracies": 0.30000001192092896, |
|
"rewards/chosen": -0.00739473570138216, |
|
"rewards/margins": -0.01960981823503971, |
|
"rewards/rejected": 0.01221508253365755, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.04048582995951417, |
|
"grad_norm": 74.0, |
|
"learning_rate": 6.756756756756758e-07, |
|
"logits/chosen": -1.3884494304656982, |
|
"logits/rejected": -1.3975419998168945, |
|
"logps/chosen": -192.84548950195312, |
|
"logps/rejected": -180.82046508789062, |
|
"loss": 0.6966, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.004980484023690224, |
|
"rewards/margins": -0.006102551706135273, |
|
"rewards/rejected": 0.011083034798502922, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.05398110661268556, |
|
"grad_norm": 99.0, |
|
"learning_rate": 9.00900900900901e-07, |
|
"logits/chosen": -1.4855096340179443, |
|
"logits/rejected": -1.4922425746917725, |
|
"logps/chosen": -148.1718292236328, |
|
"logps/rejected": -152.18133544921875, |
|
"loss": 0.6843, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.002431074623018503, |
|
"rewards/margins": 0.018751021474599838, |
|
"rewards/rejected": -0.016319945454597473, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.06747638326585695, |
|
"grad_norm": 113.0, |
|
"learning_rate": 1.1261261261261262e-06, |
|
"logits/chosen": -1.4175087213516235, |
|
"logits/rejected": -1.4836245775222778, |
|
"logps/chosen": -264.17132568359375, |
|
"logps/rejected": -193.3080596923828, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.002699580043554306, |
|
"rewards/margins": 0.005426598247140646, |
|
"rewards/rejected": -0.00272701820358634, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.08097165991902834, |
|
"grad_norm": 89.0, |
|
"learning_rate": 1.3513513513513515e-06, |
|
"logits/chosen": -1.3333433866500854, |
|
"logits/rejected": -1.4199435710906982, |
|
"logps/chosen": -220.9799041748047, |
|
"logps/rejected": -186.35690307617188, |
|
"loss": 0.688, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": 0.009898080490529537, |
|
"rewards/margins": 0.012090040370821953, |
|
"rewards/rejected": -0.0021919584833085537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.09446693657219973, |
|
"grad_norm": 66.5, |
|
"learning_rate": 1.5765765765765766e-06, |
|
"logits/chosen": -1.5576092004776, |
|
"logits/rejected": -1.493931770324707, |
|
"logps/chosen": -148.85377502441406, |
|
"logps/rejected": -168.85574340820312, |
|
"loss": 0.6811, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.014485938474535942, |
|
"rewards/margins": 0.025426441803574562, |
|
"rewards/rejected": -0.010940502397716045, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.10796221322537113, |
|
"grad_norm": 87.5, |
|
"learning_rate": 1.801801801801802e-06, |
|
"logits/chosen": -1.460998296737671, |
|
"logits/rejected": -1.4714558124542236, |
|
"logps/chosen": -165.34341430664062, |
|
"logps/rejected": -167.67092895507812, |
|
"loss": 0.6808, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.018663501366972923, |
|
"rewards/margins": 0.027817577123641968, |
|
"rewards/rejected": -0.009154075756669044, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1214574898785425, |
|
"grad_norm": 93.0, |
|
"learning_rate": 2.0270270270270273e-06, |
|
"logits/chosen": -1.3859444856643677, |
|
"logits/rejected": -1.4024606943130493, |
|
"logps/chosen": -162.58734130859375, |
|
"logps/rejected": -191.04025268554688, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.009018613025546074, |
|
"rewards/margins": 0.019761864095926285, |
|
"rewards/rejected": -0.010743250139057636, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1349527665317139, |
|
"grad_norm": 89.5, |
|
"learning_rate": 2.2522522522522524e-06, |
|
"logits/chosen": -1.4222023487091064, |
|
"logits/rejected": -1.54598069190979, |
|
"logps/chosen": -285.5871276855469, |
|
"logps/rejected": -167.19281005859375, |
|
"loss": 0.6684, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.02634511888027191, |
|
"rewards/margins": 0.052618540823459625, |
|
"rewards/rejected": -0.026273420080542564, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1484480431848853, |
|
"grad_norm": 69.5, |
|
"learning_rate": 2.4774774774774775e-06, |
|
"logits/chosen": -1.5841736793518066, |
|
"logits/rejected": -1.516913890838623, |
|
"logps/chosen": -170.33505249023438, |
|
"logps/rejected": -188.19314575195312, |
|
"loss": 0.6639, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.004526221659034491, |
|
"rewards/margins": 0.06425820291042328, |
|
"rewards/rejected": -0.06878442317247391, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.16194331983805668, |
|
"grad_norm": 72.0, |
|
"learning_rate": 2.702702702702703e-06, |
|
"logits/chosen": -1.438759207725525, |
|
"logits/rejected": -1.3985353708267212, |
|
"logps/chosen": -198.15411376953125, |
|
"logps/rejected": -208.3758544921875, |
|
"loss": 0.6501, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.047606997191905975, |
|
"rewards/margins": 0.09706764668226242, |
|
"rewards/rejected": -0.049460653215646744, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.17543859649122806, |
|
"grad_norm": 164.0, |
|
"learning_rate": 2.927927927927928e-06, |
|
"logits/chosen": -1.4191879034042358, |
|
"logits/rejected": -1.5293009281158447, |
|
"logps/chosen": -217.4423370361328, |
|
"logps/rejected": -202.1327362060547, |
|
"loss": 0.6846, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.014217356219887733, |
|
"rewards/margins": 0.027354473248124123, |
|
"rewards/rejected": -0.013137114234268665, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.18893387314439947, |
|
"grad_norm": 75.5, |
|
"learning_rate": 3.1531531531531532e-06, |
|
"logits/chosen": -1.510615587234497, |
|
"logits/rejected": -1.5524317026138306, |
|
"logps/chosen": -277.9597473144531, |
|
"logps/rejected": -174.99221801757812, |
|
"loss": 0.6538, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": 0.01016303151845932, |
|
"rewards/margins": 0.08965723216533661, |
|
"rewards/rejected": -0.07949419319629669, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.20242914979757085, |
|
"grad_norm": 127.5, |
|
"learning_rate": 3.3783783783783788e-06, |
|
"logits/chosen": -1.5467108488082886, |
|
"logits/rejected": -1.7057151794433594, |
|
"logps/chosen": -236.87759399414062, |
|
"logps/rejected": -171.19088745117188, |
|
"loss": 0.6316, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.024651767686009407, |
|
"rewards/margins": 0.13629736006259918, |
|
"rewards/rejected": -0.11164556443691254, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.21592442645074225, |
|
"grad_norm": 67.0, |
|
"learning_rate": 3.603603603603604e-06, |
|
"logits/chosen": -1.3438420295715332, |
|
"logits/rejected": -1.5014269351959229, |
|
"logps/chosen": -211.7142791748047, |
|
"logps/rejected": -149.79403686523438, |
|
"loss": 0.6296, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.016034509986639023, |
|
"rewards/margins": 0.1428973227739334, |
|
"rewards/rejected": -0.12686282396316528, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.22941970310391363, |
|
"grad_norm": 67.0, |
|
"learning_rate": 3.828828828828829e-06, |
|
"logits/chosen": -1.580759048461914, |
|
"logits/rejected": -1.5942776203155518, |
|
"logps/chosen": -186.5341339111328, |
|
"logps/rejected": -198.06871032714844, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.011369394138455391, |
|
"rewards/margins": 0.18740348517894745, |
|
"rewards/rejected": -0.1987728774547577, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.242914979757085, |
|
"grad_norm": 104.5, |
|
"learning_rate": 4.0540540540540545e-06, |
|
"logits/chosen": -1.5142263174057007, |
|
"logits/rejected": -1.526908040046692, |
|
"logps/chosen": -172.0498504638672, |
|
"logps/rejected": -204.1090545654297, |
|
"loss": 0.5947, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.001767634996213019, |
|
"rewards/margins": 0.23096399009227753, |
|
"rewards/rejected": -0.2327316552400589, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 67.0, |
|
"learning_rate": 4.27927927927928e-06, |
|
"logits/chosen": -1.2964483499526978, |
|
"logits/rejected": -1.287847876548767, |
|
"logps/chosen": -152.49652099609375, |
|
"logps/rejected": -162.25242614746094, |
|
"loss": 0.6261, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.031346581876277924, |
|
"rewards/margins": 0.17656004428863525, |
|
"rewards/rejected": -0.20790663361549377, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.2699055330634278, |
|
"grad_norm": 122.0, |
|
"learning_rate": 4.504504504504505e-06, |
|
"logits/chosen": -1.6146646738052368, |
|
"logits/rejected": -1.6288648843765259, |
|
"logps/chosen": -245.85440063476562, |
|
"logps/rejected": -252.33163452148438, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.008112089708447456, |
|
"rewards/margins": 0.4617583155632019, |
|
"rewards/rejected": -0.4536462426185608, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2834008097165992, |
|
"grad_norm": 54.75, |
|
"learning_rate": 4.72972972972973e-06, |
|
"logits/chosen": -1.7181060314178467, |
|
"logits/rejected": -1.6348508596420288, |
|
"logps/chosen": -181.34054565429688, |
|
"logps/rejected": -187.49969482421875, |
|
"loss": 0.5332, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.032877303659915924, |
|
"rewards/margins": 0.5002557635307312, |
|
"rewards/rejected": -0.5331330895423889, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2968960863697706, |
|
"grad_norm": 93.5, |
|
"learning_rate": 4.954954954954955e-06, |
|
"logits/chosen": -1.471880555152893, |
|
"logits/rejected": -1.4882009029388428, |
|
"logps/chosen": -239.46017456054688, |
|
"logps/rejected": -203.43408203125, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.20699986815452576, |
|
"rewards/margins": 0.2872315049171448, |
|
"rewards/rejected": -0.49423137307167053, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31039136302294196, |
|
"grad_norm": 83.5, |
|
"learning_rate": 4.999802215142814e-06, |
|
"logits/chosen": -1.572249174118042, |
|
"logits/rejected": -1.5214914083480835, |
|
"logps/chosen": -181.75244140625, |
|
"logps/rejected": -206.9883270263672, |
|
"loss": 0.4953, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2786533534526825, |
|
"rewards/margins": 0.6539293527603149, |
|
"rewards/rejected": -0.932582676410675, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.32388663967611336, |
|
"grad_norm": 63.25, |
|
"learning_rate": 4.998998767795805e-06, |
|
"logits/chosen": -1.3965647220611572, |
|
"logits/rejected": -1.5122724771499634, |
|
"logps/chosen": -185.1367645263672, |
|
"logps/rejected": -141.9375457763672, |
|
"loss": 0.5188, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.12487339973449707, |
|
"rewards/margins": 0.5116696357727051, |
|
"rewards/rejected": -0.6365430951118469, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.33738191632928477, |
|
"grad_norm": 94.5, |
|
"learning_rate": 4.9975774948882615e-06, |
|
"logits/chosen": -1.5592033863067627, |
|
"logits/rejected": -1.5545122623443604, |
|
"logps/chosen": -134.59095764160156, |
|
"logps/rejected": -159.44424438476562, |
|
"loss": 0.5878, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.218244269490242, |
|
"rewards/margins": 0.560061514377594, |
|
"rewards/rejected": -0.7783057689666748, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3508771929824561, |
|
"grad_norm": 159.0, |
|
"learning_rate": 4.995538747800403e-06, |
|
"logits/chosen": -1.5116926431655884, |
|
"logits/rejected": -1.5991663932800293, |
|
"logps/chosen": -196.37417602539062, |
|
"logps/rejected": -162.26467895507812, |
|
"loss": 0.555, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6864209175109863, |
|
"rewards/margins": 0.5580738186836243, |
|
"rewards/rejected": -1.2444946765899658, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3643724696356275, |
|
"grad_norm": 77.5, |
|
"learning_rate": 4.9928830305701164e-06, |
|
"logits/chosen": -1.4444091320037842, |
|
"logits/rejected": -1.404262661933899, |
|
"logps/chosen": -185.04042053222656, |
|
"logps/rejected": -186.958740234375, |
|
"loss": 0.4598, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.22133490443229675, |
|
"rewards/margins": 0.7992109060287476, |
|
"rewards/rejected": -1.0205457210540771, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.37786774628879893, |
|
"grad_norm": 50.25, |
|
"learning_rate": 4.98961099976835e-06, |
|
"logits/chosen": -1.5445549488067627, |
|
"logits/rejected": -1.586544156074524, |
|
"logps/chosen": -199.28408813476562, |
|
"logps/rejected": -183.11032104492188, |
|
"loss": 0.4536, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.06479507684707642, |
|
"rewards/margins": 0.9296582341194153, |
|
"rewards/rejected": -0.9944533109664917, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3913630229419703, |
|
"grad_norm": 68.0, |
|
"learning_rate": 4.985723464336783e-06, |
|
"logits/chosen": -1.4274847507476807, |
|
"logits/rejected": -1.4104160070419312, |
|
"logps/chosen": -185.9368896484375, |
|
"logps/rejected": -188.2207489013672, |
|
"loss": 0.4902, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.17553560435771942, |
|
"rewards/margins": 0.6832131743431091, |
|
"rewards/rejected": -0.8587487936019897, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.4048582995951417, |
|
"grad_norm": 65.0, |
|
"learning_rate": 4.9812213853878376e-06, |
|
"logits/chosen": -1.6410919427871704, |
|
"logits/rejected": -1.6832342147827148, |
|
"logps/chosen": -168.22726440429688, |
|
"logps/rejected": -165.28591918945312, |
|
"loss": 0.4942, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.19691412150859833, |
|
"rewards/margins": 0.8052200078964233, |
|
"rewards/rejected": -1.002134084701538, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.4183535762483131, |
|
"grad_norm": 84.0, |
|
"learning_rate": 4.9761058759670625e-06, |
|
"logits/chosen": -1.4086945056915283, |
|
"logits/rejected": -1.3933309316635132, |
|
"logps/chosen": -200.54226684570312, |
|
"logps/rejected": -191.30516052246094, |
|
"loss": 0.5805, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.38961219787597656, |
|
"rewards/margins": 0.6619648337364197, |
|
"rewards/rejected": -1.051577091217041, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4318488529014845, |
|
"grad_norm": 48.75, |
|
"learning_rate": 4.970378200777949e-06, |
|
"logits/chosen": -1.4240281581878662, |
|
"logits/rejected": -1.5275284051895142, |
|
"logps/chosen": -149.6121826171875, |
|
"logps/rejected": -153.7329864501953, |
|
"loss": 0.3726, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.22904136776924133, |
|
"rewards/margins": 1.2087788581848145, |
|
"rewards/rejected": -1.4378201961517334, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44534412955465585, |
|
"grad_norm": 57.5, |
|
"learning_rate": 4.964039775869271e-06, |
|
"logits/chosen": -1.5353929996490479, |
|
"logits/rejected": -1.5400171279907227, |
|
"logps/chosen": -172.69320678710938, |
|
"logps/rejected": -186.09596252441406, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.14687059819698334, |
|
"rewards/margins": 1.0381742715835571, |
|
"rewards/rejected": -1.1850450038909912, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.45883940620782726, |
|
"grad_norm": 68.5, |
|
"learning_rate": 4.957092168284987e-06, |
|
"logits/chosen": -1.5351091623306274, |
|
"logits/rejected": -1.480067253112793, |
|
"logps/chosen": -224.7134246826172, |
|
"logps/rejected": -280.2825012207031, |
|
"loss": 0.4522, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.15150094032287598, |
|
"rewards/margins": 0.8322998881340027, |
|
"rewards/rejected": -0.9838007092475891, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47233468286099867, |
|
"grad_norm": 47.25, |
|
"learning_rate": 4.949537095676824e-06, |
|
"logits/chosen": -1.5415345430374146, |
|
"logits/rejected": -1.4604427814483643, |
|
"logps/chosen": -173.94085693359375, |
|
"logps/rejected": -215.93075561523438, |
|
"loss": 0.45, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3776322901248932, |
|
"rewards/margins": 1.5937398672103882, |
|
"rewards/rejected": -1.9713722467422485, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.48582995951417, |
|
"grad_norm": 95.5, |
|
"learning_rate": 4.9413764258796236e-06, |
|
"logits/chosen": -1.5088344812393188, |
|
"logits/rejected": -1.6158044338226318, |
|
"logps/chosen": -273.03594970703125, |
|
"logps/rejected": -221.93997192382812, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.25630080699920654, |
|
"rewards/margins": 0.5983410477638245, |
|
"rewards/rejected": -0.8546417951583862, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4993252361673414, |
|
"grad_norm": 83.0, |
|
"learning_rate": 4.93261217644956e-06, |
|
"logits/chosen": -1.3866004943847656, |
|
"logits/rejected": -1.363396406173706, |
|
"logps/chosen": -211.2840576171875, |
|
"logps/rejected": -256.87811279296875, |
|
"loss": 0.4912, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.24753907322883606, |
|
"rewards/margins": 0.9087351560592651, |
|
"rewards/rejected": -1.1562741994857788, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 79.0, |
|
"learning_rate": 4.923246514165339e-06, |
|
"logits/chosen": -1.357788324356079, |
|
"logits/rejected": -1.322389841079712, |
|
"logps/chosen": -221.6494598388672, |
|
"logps/rejected": -238.56637573242188, |
|
"loss": 0.3841, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.21661829948425293, |
|
"rewards/margins": 1.6020748615264893, |
|
"rewards/rejected": -1.8186931610107422, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.5263157894736842, |
|
"grad_norm": 78.0, |
|
"learning_rate": 4.913281754492509e-06, |
|
"logits/chosen": -1.5164716243743896, |
|
"logits/rejected": -1.5658130645751953, |
|
"logps/chosen": -211.942138671875, |
|
"logps/rejected": -251.4232177734375, |
|
"loss": 0.439, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.2759682238101959, |
|
"rewards/margins": 1.2201299667358398, |
|
"rewards/rejected": -1.4960981607437134, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5398110661268556, |
|
"grad_norm": 68.0, |
|
"learning_rate": 4.902720361011007e-06, |
|
"logits/chosen": -1.43938148021698, |
|
"logits/rejected": -1.4012665748596191, |
|
"logps/chosen": -198.0753936767578, |
|
"logps/rejected": -230.1431121826172, |
|
"loss": 0.436, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.4660988748073578, |
|
"rewards/margins": 1.3129799365997314, |
|
"rewards/rejected": -1.7790788412094116, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.553306342780027, |
|
"grad_norm": 116.0, |
|
"learning_rate": 4.891564944806095e-06, |
|
"logits/chosen": -1.3829123973846436, |
|
"logits/rejected": -1.4532912969589233, |
|
"logps/chosen": -204.92056274414062, |
|
"logps/rejected": -184.2178192138672, |
|
"loss": 0.4408, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4832437038421631, |
|
"rewards/margins": 1.4000451564788818, |
|
"rewards/rejected": -1.8832887411117554, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5668016194331984, |
|
"grad_norm": 39.0, |
|
"learning_rate": 4.879818263822816e-06, |
|
"logits/chosen": -1.5301909446716309, |
|
"logits/rejected": -1.4669263362884521, |
|
"logps/chosen": -176.71139526367188, |
|
"logps/rejected": -210.8941192626953, |
|
"loss": 0.4359, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7508934140205383, |
|
"rewards/margins": 1.5884822607040405, |
|
"rewards/rejected": -2.3393757343292236, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5802968960863698, |
|
"grad_norm": 118.5, |
|
"learning_rate": 4.867483222184158e-06, |
|
"logits/chosen": -1.4969114065170288, |
|
"logits/rejected": -1.4513076543807983, |
|
"logps/chosen": -183.51742553710938, |
|
"logps/rejected": -234.21078491210938, |
|
"loss": 0.4083, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -2.1092641353607178, |
|
"rewards/margins": 2.7672932147979736, |
|
"rewards/rejected": -4.876556873321533, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5937921727395412, |
|
"grad_norm": 82.5, |
|
"learning_rate": 4.854562869473063e-06, |
|
"logits/chosen": -1.6114156246185303, |
|
"logits/rejected": -1.6086403131484985, |
|
"logps/chosen": -158.5917510986328, |
|
"logps/rejected": -182.981689453125, |
|
"loss": 0.5288, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.8133976459503174, |
|
"rewards/margins": 2.3693175315856934, |
|
"rewards/rejected": -4.182714939117432, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6072874493927125, |
|
"grad_norm": 64.5, |
|
"learning_rate": 4.841060399978481e-06, |
|
"logits/chosen": -1.4258265495300293, |
|
"logits/rejected": -1.5041557550430298, |
|
"logps/chosen": -203.29505920410156, |
|
"logps/rejected": -173.55667114257812, |
|
"loss": 0.467, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.451561838388443, |
|
"rewards/margins": 0.9895628094673157, |
|
"rewards/rejected": -1.4411247968673706, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.6207827260458839, |
|
"grad_norm": 53.75, |
|
"learning_rate": 4.826979151905655e-06, |
|
"logits/chosen": -1.3954380750656128, |
|
"logits/rejected": -1.4369020462036133, |
|
"logps/chosen": -133.7052764892578, |
|
"logps/rejected": -152.63189697265625, |
|
"loss": 0.3819, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.21247024834156036, |
|
"rewards/margins": 1.1218936443328857, |
|
"rewards/rejected": -1.3343639373779297, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6342780026990553, |
|
"grad_norm": 34.25, |
|
"learning_rate": 4.812322606550813e-06, |
|
"logits/chosen": -1.477416753768921, |
|
"logits/rejected": -1.35099196434021, |
|
"logps/chosen": -183.8603057861328, |
|
"logps/rejected": -200.47122192382812, |
|
"loss": 0.403, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.22856561839580536, |
|
"rewards/margins": 1.1782000064849854, |
|
"rewards/rejected": -1.4067654609680176, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6477732793522267, |
|
"grad_norm": 142.0, |
|
"learning_rate": 4.7970943874404904e-06, |
|
"logits/chosen": -1.5746204853057861, |
|
"logits/rejected": -1.5317301750183105, |
|
"logps/chosen": -132.62966918945312, |
|
"logps/rejected": -169.4604034423828, |
|
"loss": 0.4905, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.2887588441371918, |
|
"rewards/margins": 1.0178512334823608, |
|
"rewards/rejected": -1.3066102266311646, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6612685560053981, |
|
"grad_norm": 81.5, |
|
"learning_rate": 4.781298259435691e-06, |
|
"logits/chosen": -1.4620139598846436, |
|
"logits/rejected": -1.5366100072860718, |
|
"logps/chosen": -207.0232696533203, |
|
"logps/rejected": -182.5987548828125, |
|
"loss": 0.3498, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.38011789321899414, |
|
"rewards/margins": 1.517073392868042, |
|
"rewards/rejected": -1.8971912860870361, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6747638326585695, |
|
"grad_norm": 59.0, |
|
"learning_rate": 4.7649381278011e-06, |
|
"logits/chosen": -1.525059700012207, |
|
"logits/rejected": -1.4892899990081787, |
|
"logps/chosen": -132.02548217773438, |
|
"logps/rejected": -172.75595092773438, |
|
"loss": 0.4596, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.47625675797462463, |
|
"rewards/margins": 1.6200672388076782, |
|
"rewards/rejected": -2.0963237285614014, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6882591093117408, |
|
"grad_norm": 93.5, |
|
"learning_rate": 4.748018037239592e-06, |
|
"logits/chosen": -1.6185624599456787, |
|
"logits/rejected": -1.6007747650146484, |
|
"logps/chosen": -190.04196166992188, |
|
"logps/rejected": -271.9373474121094, |
|
"loss": 0.377, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.29186195135116577, |
|
"rewards/margins": 1.4247747659683228, |
|
"rewards/rejected": -1.7166366577148438, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.7017543859649122, |
|
"grad_norm": 54.75, |
|
"learning_rate": 4.7305421708922596e-06, |
|
"logits/chosen": -1.5387685298919678, |
|
"logits/rejected": -1.4462766647338867, |
|
"logps/chosen": -199.54568481445312, |
|
"logps/rejected": -219.14901733398438, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4976847767829895, |
|
"rewards/margins": 1.649714708328247, |
|
"rewards/rejected": -2.147399425506592, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7152496626180836, |
|
"grad_norm": 92.0, |
|
"learning_rate": 4.712514849304219e-06, |
|
"logits/chosen": -1.4592026472091675, |
|
"logits/rejected": -1.5086675882339478, |
|
"logps/chosen": -203.43939208984375, |
|
"logps/rejected": -182.27008056640625, |
|
"loss": 0.3704, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.30615222454071045, |
|
"rewards/margins": 1.7558097839355469, |
|
"rewards/rejected": -2.0619618892669678, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.728744939271255, |
|
"grad_norm": 94.0, |
|
"learning_rate": 4.693940529356444e-06, |
|
"logits/chosen": -1.5462654829025269, |
|
"logits/rejected": -1.5494886636734009, |
|
"logps/chosen": -204.8282470703125, |
|
"logps/rejected": -262.1166076660156, |
|
"loss": 0.4081, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.18543025851249695, |
|
"rewards/margins": 1.581555724143982, |
|
"rewards/rejected": -1.7669861316680908, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7422402159244265, |
|
"grad_norm": 49.5, |
|
"learning_rate": 4.674823803163899e-06, |
|
"logits/chosen": -1.5121240615844727, |
|
"logits/rejected": -1.378418207168579, |
|
"logps/chosen": -176.5196533203125, |
|
"logps/rejected": -259.83154296875, |
|
"loss": 0.2792, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.2915424704551697, |
|
"rewards/margins": 2.276181697845459, |
|
"rewards/rejected": -2.5677244663238525, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7557354925775979, |
|
"grad_norm": 63.5, |
|
"learning_rate": 4.655169396940229e-06, |
|
"logits/chosen": -1.488743782043457, |
|
"logits/rejected": -1.4984915256500244, |
|
"logps/chosen": -227.04574584960938, |
|
"logps/rejected": -223.5692596435547, |
|
"loss": 0.3756, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.3987753987312317, |
|
"rewards/margins": 1.6648337841033936, |
|
"rewards/rejected": -2.0636088848114014, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 62.75, |
|
"learning_rate": 4.6349821698293025e-06, |
|
"logits/chosen": -1.4782928228378296, |
|
"logits/rejected": -1.480554223060608, |
|
"logps/chosen": -168.77146911621094, |
|
"logps/rejected": -283.3312683105469, |
|
"loss": 0.3639, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.32490164041519165, |
|
"rewards/margins": 1.6070611476898193, |
|
"rewards/rejected": -1.9319626092910767, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7827260458839406, |
|
"grad_norm": 85.0, |
|
"learning_rate": 4.6142671127038905e-06, |
|
"logits/chosen": -1.5204181671142578, |
|
"logits/rejected": -1.4846007823944092, |
|
"logps/chosen": -122.49859619140625, |
|
"logps/rejected": -159.67666625976562, |
|
"loss": 0.3855, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5428152680397034, |
|
"rewards/margins": 1.4056587219238281, |
|
"rewards/rejected": -1.9484741687774658, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.796221322537112, |
|
"grad_norm": 124.5, |
|
"learning_rate": 4.593029346931777e-06, |
|
"logits/chosen": -1.5233218669891357, |
|
"logits/rejected": -1.4880311489105225, |
|
"logps/chosen": -190.8978271484375, |
|
"logps/rejected": -212.50808715820312, |
|
"loss": 0.4094, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.5791584253311157, |
|
"rewards/margins": 1.7821210622787476, |
|
"rewards/rejected": -2.3612794876098633, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.8097165991902834, |
|
"grad_norm": 121.0, |
|
"learning_rate": 4.571274123109606e-06, |
|
"logits/chosen": -1.5600152015686035, |
|
"logits/rejected": -1.5772325992584229, |
|
"logps/chosen": -211.6980438232422, |
|
"logps/rejected": -159.11520385742188, |
|
"loss": 0.5103, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.5033570528030396, |
|
"rewards/margins": 1.3233957290649414, |
|
"rewards/rejected": -1.8267529010772705, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.8232118758434548, |
|
"grad_norm": 87.0, |
|
"learning_rate": 4.549006819764779e-06, |
|
"logits/chosen": -1.3667839765548706, |
|
"logits/rejected": -1.408111333847046, |
|
"logps/chosen": -252.8665008544922, |
|
"logps/rejected": -246.56600952148438, |
|
"loss": 0.6645, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.4156951904296875, |
|
"rewards/margins": 0.9969050288200378, |
|
"rewards/rejected": -1.4126002788543701, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.8367071524966262, |
|
"grad_norm": 65.0, |
|
"learning_rate": 4.52623294202573e-06, |
|
"logits/chosen": -1.5357733964920044, |
|
"logits/rejected": -1.6000627279281616, |
|
"logps/chosen": -203.2954864501953, |
|
"logps/rejected": -178.47378540039062, |
|
"loss": 0.3625, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.1179068312048912, |
|
"rewards/margins": 1.5459201335906982, |
|
"rewards/rejected": -1.6638271808624268, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8502024291497976, |
|
"grad_norm": 38.75, |
|
"learning_rate": 4.502958120260894e-06, |
|
"logits/chosen": -1.4177687168121338, |
|
"logits/rejected": -1.466953992843628, |
|
"logps/chosen": -208.93142700195312, |
|
"logps/rejected": -204.0532989501953, |
|
"loss": 0.3943, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.10137398540973663, |
|
"rewards/margins": 1.5527517795562744, |
|
"rewards/rejected": -1.6541255712509155, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.863697705802969, |
|
"grad_norm": 94.5, |
|
"learning_rate": 4.479188108686714e-06, |
|
"logits/chosen": -1.543738603591919, |
|
"logits/rejected": -1.5562658309936523, |
|
"logps/chosen": -195.75601196289062, |
|
"logps/rejected": -243.9476776123047, |
|
"loss": 0.393, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.09615819901227951, |
|
"rewards/margins": 1.808638334274292, |
|
"rewards/rejected": -1.9047966003417969, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8771929824561403, |
|
"grad_norm": 53.25, |
|
"learning_rate": 4.454928783945033e-06, |
|
"logits/chosen": -1.4368815422058105, |
|
"logits/rejected": -1.465288519859314, |
|
"logps/chosen": -182.02488708496094, |
|
"logps/rejected": -166.5155487060547, |
|
"loss": 0.3673, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.09929310530424118, |
|
"rewards/margins": 1.477452039718628, |
|
"rewards/rejected": -1.5767452716827393, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8906882591093117, |
|
"grad_norm": 94.5, |
|
"learning_rate": 4.430186143650216e-06, |
|
"logits/chosen": -1.3891671895980835, |
|
"logits/rejected": -1.3638372421264648, |
|
"logps/chosen": -167.63204956054688, |
|
"logps/rejected": -166.39913940429688, |
|
"loss": 0.4332, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.18427793681621552, |
|
"rewards/margins": 1.2914403676986694, |
|
"rewards/rejected": -1.4757182598114014, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.9041835357624831, |
|
"grad_norm": 68.5, |
|
"learning_rate": 4.404966304906363e-06, |
|
"logits/chosen": -1.5300304889678955, |
|
"logits/rejected": -1.541245698928833, |
|
"logps/chosen": -237.1887969970703, |
|
"logps/rejected": -258.4833984375, |
|
"loss": 0.2851, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.2509092092514038, |
|
"rewards/margins": 2.2454378604888916, |
|
"rewards/rejected": -2.496346950531006, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.9176788124156545, |
|
"grad_norm": 91.5, |
|
"learning_rate": 4.379275502794984e-06, |
|
"logits/chosen": -1.4159671068191528, |
|
"logits/rejected": -1.3942148685455322, |
|
"logps/chosen": -204.76268005371094, |
|
"logps/rejected": -194.83755493164062, |
|
"loss": 0.3974, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.590856671333313, |
|
"rewards/margins": 1.8947960138320923, |
|
"rewards/rejected": -2.4856529235839844, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.9311740890688259, |
|
"grad_norm": 24.875, |
|
"learning_rate": 4.3531200888335015e-06, |
|
"logits/chosen": -1.499260663986206, |
|
"logits/rejected": -1.5041369199752808, |
|
"logps/chosen": -158.403076171875, |
|
"logps/rejected": -188.42300415039062, |
|
"loss": 0.3399, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.4716406464576721, |
|
"rewards/margins": 2.255904197692871, |
|
"rewards/rejected": -2.7275447845458984, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9446693657219973, |
|
"grad_norm": 49.0, |
|
"learning_rate": 4.326506529404973e-06, |
|
"logits/chosen": -1.4987239837646484, |
|
"logits/rejected": -1.5489791631698608, |
|
"logps/chosen": -228.030517578125, |
|
"logps/rejected": -199.24453735351562, |
|
"loss": 0.4954, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5366212129592896, |
|
"rewards/margins": 1.576836347579956, |
|
"rewards/rejected": -2.113457441329956, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9581646423751687, |
|
"grad_norm": 50.5, |
|
"learning_rate": 4.299441404159409e-06, |
|
"logits/chosen": -1.4427543878555298, |
|
"logits/rejected": -1.4410443305969238, |
|
"logps/chosen": -142.67196655273438, |
|
"logps/rejected": -182.15530395507812, |
|
"loss": 0.3882, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.45489731431007385, |
|
"rewards/margins": 1.885206937789917, |
|
"rewards/rejected": -2.340104341506958, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.97165991902834, |
|
"grad_norm": 71.0, |
|
"learning_rate": 4.271931404387096e-06, |
|
"logits/chosen": -1.4958666563034058, |
|
"logits/rejected": -1.4852968454360962, |
|
"logps/chosen": -203.7172088623047, |
|
"logps/rejected": -223.72958374023438, |
|
"loss": 0.3129, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4084866940975189, |
|
"rewards/margins": 2.0505545139312744, |
|
"rewards/rejected": -2.459041118621826, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9851551956815114, |
|
"grad_norm": 72.0, |
|
"learning_rate": 4.243983331364307e-06, |
|
"logits/chosen": -1.6051279306411743, |
|
"logits/rejected": -1.5763704776763916, |
|
"logps/chosen": -156.02700805664062, |
|
"logps/rejected": -212.16317749023438, |
|
"loss": 0.4821, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6169974207878113, |
|
"rewards/margins": 1.195291519165039, |
|
"rewards/rejected": -1.8122888803482056, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9986504723346828, |
|
"grad_norm": 91.0, |
|
"learning_rate": 4.215604094671835e-06, |
|
"logits/chosen": -1.5946276187896729, |
|
"logits/rejected": -1.525407075881958, |
|
"logps/chosen": -190.231689453125, |
|
"logps/rejected": -210.0182342529297, |
|
"loss": 0.4743, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5886441469192505, |
|
"rewards/margins": 1.6572681665420532, |
|
"rewards/rejected": -2.245912551879883, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 1.0121457489878543, |
|
"grad_norm": 71.5, |
|
"learning_rate": 4.186800710486732e-06, |
|
"logits/chosen": -1.503097414970398, |
|
"logits/rejected": -1.4615429639816284, |
|
"logps/chosen": -177.4516143798828, |
|
"logps/rejected": -223.7339324951172, |
|
"loss": 0.2691, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2226639688014984, |
|
"rewards/margins": 2.2762439250946045, |
|
"rewards/rejected": -2.4989078044891357, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 1.0256410256410255, |
|
"grad_norm": 16.75, |
|
"learning_rate": 4.157580299847717e-06, |
|
"logits/chosen": -1.4365036487579346, |
|
"logits/rejected": -1.4489128589630127, |
|
"logps/chosen": -185.9925994873047, |
|
"logps/rejected": -210.19802856445312, |
|
"loss": 0.126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1753823310136795, |
|
"rewards/margins": 3.160768508911133, |
|
"rewards/rejected": -3.336151123046875, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.039136302294197, |
|
"grad_norm": 27.125, |
|
"learning_rate": 4.12795008689464e-06, |
|
"logits/chosen": -1.4434540271759033, |
|
"logits/rejected": -1.5021578073501587, |
|
"logps/chosen": -210.2549591064453, |
|
"logps/rejected": -247.6964569091797, |
|
"loss": 0.2329, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.21405327320098877, |
|
"rewards/margins": 2.4333832263946533, |
|
"rewards/rejected": -2.219329833984375, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0526315789473684, |
|
"grad_norm": 29.5, |
|
"learning_rate": 4.0979173970824626e-06, |
|
"logits/chosen": -1.5133657455444336, |
|
"logits/rejected": -1.5038350820541382, |
|
"logps/chosen": -187.3416290283203, |
|
"logps/rejected": -197.63766479492188, |
|
"loss": 0.1885, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0815470814704895, |
|
"rewards/margins": 2.5452542304992676, |
|
"rewards/rejected": -2.463707447052002, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0661268556005399, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 4.067489655370197e-06, |
|
"logits/chosen": -1.486011028289795, |
|
"logits/rejected": -1.5427876710891724, |
|
"logps/chosen": -248.8966064453125, |
|
"logps/rejected": -205.6848602294922, |
|
"loss": 0.1103, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.543197751045227, |
|
"rewards/margins": 3.468106746673584, |
|
"rewards/rejected": -2.9249091148376465, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0796221322537112, |
|
"grad_norm": 21.625, |
|
"learning_rate": 4.0366743843852315e-06, |
|
"logits/chosen": -1.4536128044128418, |
|
"logits/rejected": -1.39426851272583, |
|
"logps/chosen": -157.4046173095703, |
|
"logps/rejected": -206.4637451171875, |
|
"loss": 0.1189, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14299368858337402, |
|
"rewards/margins": 3.642580032348633, |
|
"rewards/rejected": -3.7855734825134277, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0931174089068827, |
|
"grad_norm": 73.0, |
|
"learning_rate": 4.005479202563524e-06, |
|
"logits/chosen": -1.4207379817962646, |
|
"logits/rejected": -1.4653427600860596, |
|
"logps/chosen": -175.64657592773438, |
|
"logps/rejected": -188.96347045898438, |
|
"loss": 0.113, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.22152826189994812, |
|
"rewards/margins": 3.9064407348632812, |
|
"rewards/rejected": -4.127968788146973, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.106612685560054, |
|
"grad_norm": 22.5, |
|
"learning_rate": 3.973911822266099e-06, |
|
"logits/chosen": -1.3683284521102905, |
|
"logits/rejected": -1.4073810577392578, |
|
"logps/chosen": -200.2495880126953, |
|
"logps/rejected": -196.02499389648438, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4190312922000885, |
|
"rewards/margins": 3.017284393310547, |
|
"rewards/rejected": -3.4363160133361816, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.1201079622132253, |
|
"grad_norm": 61.0, |
|
"learning_rate": 3.941980047872324e-06, |
|
"logits/chosen": -1.3142037391662598, |
|
"logits/rejected": -1.3677208423614502, |
|
"logps/chosen": -200.49827575683594, |
|
"logps/rejected": -213.0048828125, |
|
"loss": 0.2229, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.29499849677085876, |
|
"rewards/margins": 2.430476188659668, |
|
"rewards/rejected": -2.7254748344421387, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.1336032388663968, |
|
"grad_norm": 33.5, |
|
"learning_rate": 3.9096917738504445e-06, |
|
"logits/chosen": -1.5029326677322388, |
|
"logits/rejected": -1.522037386894226, |
|
"logps/chosen": -211.3799285888672, |
|
"logps/rejected": -195.49777221679688, |
|
"loss": 0.2023, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.20138970017433167, |
|
"rewards/margins": 3.0471653938293457, |
|
"rewards/rejected": -3.2485554218292236, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.147098515519568, |
|
"grad_norm": 67.5, |
|
"learning_rate": 3.877054982805835e-06, |
|
"logits/chosen": -1.503327488899231, |
|
"logits/rejected": -1.5182857513427734, |
|
"logps/chosen": -206.69345092773438, |
|
"logps/rejected": -220.8511505126953, |
|
"loss": 0.2, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.075591079890728, |
|
"rewards/margins": 3.3199775218963623, |
|
"rewards/rejected": -3.39556884765625, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1605937921727396, |
|
"grad_norm": 41.25, |
|
"learning_rate": 3.844077743507468e-06, |
|
"logits/chosen": -1.4972890615463257, |
|
"logits/rejected": -1.4547359943389893, |
|
"logps/chosen": -190.38272094726562, |
|
"logps/rejected": -237.7483367919922, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.00510750338435173, |
|
"rewards/margins": 3.4418201446533203, |
|
"rewards/rejected": -3.446927309036255, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.174089068825911, |
|
"grad_norm": 43.0, |
|
"learning_rate": 3.8107682088930797e-06, |
|
"logits/chosen": -1.5898491144180298, |
|
"logits/rejected": -1.628394365310669, |
|
"logps/chosen": -209.7681884765625, |
|
"logps/rejected": -223.9811248779297, |
|
"loss": 0.2875, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.15720273554325104, |
|
"rewards/margins": 2.540311336517334, |
|
"rewards/rejected": -2.697514057159424, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.1875843454790824, |
|
"grad_norm": 19.875, |
|
"learning_rate": 3.777134614053522e-06, |
|
"logits/chosen": -1.3833550214767456, |
|
"logits/rejected": -1.3048458099365234, |
|
"logps/chosen": -153.44886779785156, |
|
"logps/rejected": -187.23211669921875, |
|
"loss": 0.2094, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.15450401604175568, |
|
"rewards/margins": 2.7406177520751953, |
|
"rewards/rejected": -2.8951218128204346, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.2010796221322537, |
|
"grad_norm": 25.25, |
|
"learning_rate": 3.7431852741968104e-06, |
|
"logits/chosen": -1.5894601345062256, |
|
"logits/rejected": -1.4398654699325562, |
|
"logps/chosen": -161.95870971679688, |
|
"logps/rejected": -259.89544677734375, |
|
"loss": 0.2674, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.3261250853538513, |
|
"rewards/margins": 2.652719497680664, |
|
"rewards/rejected": -2.9788451194763184, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.214574898785425, |
|
"grad_norm": 25.625, |
|
"learning_rate": 3.7089285825923614e-06, |
|
"logits/chosen": -1.481194257736206, |
|
"logits/rejected": -1.4744828939437866, |
|
"logps/chosen": -136.75341796875, |
|
"logps/rejected": -182.98255920410156, |
|
"loss": 0.216, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15366610884666443, |
|
"rewards/margins": 2.4346675872802734, |
|
"rewards/rejected": -2.5883336067199707, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.2280701754385965, |
|
"grad_norm": 59.0, |
|
"learning_rate": 3.6743730084959275e-06, |
|
"logits/chosen": -1.4641847610473633, |
|
"logits/rejected": -1.4495608806610107, |
|
"logps/chosen": -226.5570068359375, |
|
"logps/rejected": -231.99484252929688, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.0351928249001503, |
|
"rewards/margins": 2.678950071334839, |
|
"rewards/rejected": -2.6437573432922363, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.2415654520917678, |
|
"grad_norm": 29.125, |
|
"learning_rate": 3.639527095055753e-06, |
|
"logits/chosen": -1.4890583753585815, |
|
"logits/rejected": -1.4146323204040527, |
|
"logps/chosen": -211.8848419189453, |
|
"logps/rejected": -223.7265167236328, |
|
"loss": 0.1515, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.1912011355161667, |
|
"rewards/margins": 3.216825008392334, |
|
"rewards/rejected": -3.4080262184143066, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2550607287449393, |
|
"grad_norm": 28.5, |
|
"learning_rate": 3.604399457200458e-06, |
|
"logits/chosen": -1.5582194328308105, |
|
"logits/rejected": -1.530056357383728, |
|
"logps/chosen": -174.59786987304688, |
|
"logps/rejected": -235.314697265625, |
|
"loss": 0.1586, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.02259807661175728, |
|
"rewards/margins": 3.3205082416534424, |
|
"rewards/rejected": -3.343106508255005, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2685560053981106, |
|
"grad_norm": 47.0, |
|
"learning_rate": 3.5689987795091735e-06, |
|
"logits/chosen": -1.5336169004440308, |
|
"logits/rejected": -1.5555146932601929, |
|
"logps/chosen": -192.9527587890625, |
|
"logps/rejected": -217.05029296875, |
|
"loss": 0.1666, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.11079835891723633, |
|
"rewards/margins": 2.9511632919311523, |
|
"rewards/rejected": -3.0619616508483887, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.282051282051282, |
|
"grad_norm": 31.5, |
|
"learning_rate": 3.5333338140644602e-06, |
|
"logits/chosen": -1.567378044128418, |
|
"logits/rejected": -1.5020748376846313, |
|
"logps/chosen": -151.2008819580078, |
|
"logps/rejected": -193.5251007080078, |
|
"loss": 0.1562, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.06739845871925354, |
|
"rewards/margins": 2.88545560836792, |
|
"rewards/rejected": -2.81805682182312, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2955465587044535, |
|
"grad_norm": 27.625, |
|
"learning_rate": 3.497413378288541e-06, |
|
"logits/chosen": -1.558091402053833, |
|
"logits/rejected": -1.5880284309387207, |
|
"logps/chosen": -208.2618408203125, |
|
"logps/rejected": -215.33065795898438, |
|
"loss": 0.1537, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.08185993134975433, |
|
"rewards/margins": 2.7448792457580566, |
|
"rewards/rejected": -2.8267390727996826, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.3090418353576248, |
|
"grad_norm": 21.0, |
|
"learning_rate": 3.4612463527633728e-06, |
|
"logits/chosen": -1.517230749130249, |
|
"logits/rejected": -1.5125114917755127, |
|
"logps/chosen": -165.6942138671875, |
|
"logps/rejected": -177.20965576171875, |
|
"loss": 0.1097, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.21497321128845215, |
|
"rewards/margins": 3.283679485321045, |
|
"rewards/rejected": -3.498652935028076, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.3225371120107963, |
|
"grad_norm": 58.25, |
|
"learning_rate": 3.4248416790351086e-06, |
|
"logits/chosen": -1.4563219547271729, |
|
"logits/rejected": -1.4463237524032593, |
|
"logps/chosen": -222.70803833007812, |
|
"logps/rejected": -276.1205139160156, |
|
"loss": 0.1741, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.18195849657058716, |
|
"rewards/margins": 3.079150438308716, |
|
"rewards/rejected": -3.2611091136932373, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3360323886639676, |
|
"grad_norm": 26.5, |
|
"learning_rate": 3.3882083574034847e-06, |
|
"logits/chosen": -1.495981216430664, |
|
"logits/rejected": -1.510833501815796, |
|
"logps/chosen": -217.92416381835938, |
|
"logps/rejected": -232.9659881591797, |
|
"loss": 0.1075, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07798905670642853, |
|
"rewards/margins": 3.7004494667053223, |
|
"rewards/rejected": -3.6224606037139893, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.349527665317139, |
|
"grad_norm": 11.625, |
|
"learning_rate": 3.3513554446966846e-06, |
|
"logits/chosen": -1.607877492904663, |
|
"logits/rejected": -1.5209126472473145, |
|
"logps/chosen": -145.24710083007812, |
|
"logps/rejected": -269.81951904296875, |
|
"loss": 0.0835, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.1404910534620285, |
|
"rewards/margins": 3.947847843170166, |
|
"rewards/rejected": -4.088338375091553, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.349527665317139, |
|
"eval_logits/chosen": -1.5215187072753906, |
|
"eval_logits/rejected": -1.5562808513641357, |
|
"eval_logps/chosen": -190.62527465820312, |
|
"eval_logps/rejected": -222.86770629882812, |
|
"eval_loss": 0.3281523883342743, |
|
"eval_rewards/accuracies": 0.849397599697113, |
|
"eval_rewards/chosen": -0.6181024312973022, |
|
"eval_rewards/margins": 2.1862471103668213, |
|
"eval_rewards/rejected": -2.804349660873413, |
|
"eval_runtime": 23.4839, |
|
"eval_samples_per_second": 14.052, |
|
"eval_steps_per_second": 3.534, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3630229419703104, |
|
"grad_norm": 25.625, |
|
"learning_rate": 3.314292052032227e-06, |
|
"logits/chosen": -1.4269988536834717, |
|
"logits/rejected": -1.5553017854690552, |
|
"logps/chosen": -245.88330078125, |
|
"logps/rejected": -144.62518310546875, |
|
"loss": 0.2057, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.03630426153540611, |
|
"rewards/margins": 2.8089230060577393, |
|
"rewards/rejected": -2.8452274799346924, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.376518218623482, |
|
"grad_norm": 42.75, |
|
"learning_rate": 3.2770273425644285e-06, |
|
"logits/chosen": -1.3818541765213013, |
|
"logits/rejected": -1.31718909740448, |
|
"logps/chosen": -194.84194946289062, |
|
"logps/rejected": -197.08505249023438, |
|
"loss": 0.1862, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.11487498134374619, |
|
"rewards/margins": 3.057730197906494, |
|
"rewards/rejected": -3.172605276107788, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.3900134952766532, |
|
"grad_norm": 29.0, |
|
"learning_rate": 3.2395705292190067e-06, |
|
"logits/chosen": -1.467614769935608, |
|
"logits/rejected": -1.438024640083313, |
|
"logps/chosen": -180.7233428955078, |
|
"logps/rejected": -217.57559204101562, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.10827420651912689, |
|
"rewards/margins": 3.1877129077911377, |
|
"rewards/rejected": -3.295987367630005, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.4035087719298245, |
|
"grad_norm": 12.125, |
|
"learning_rate": 3.2019308724153743e-06, |
|
"logits/chosen": -1.4175347089767456, |
|
"logits/rejected": -1.5785712003707886, |
|
"logps/chosen": -196.76730346679688, |
|
"logps/rejected": -179.5243377685547, |
|
"loss": 0.114, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12531700730323792, |
|
"rewards/margins": 3.2615838050842285, |
|
"rewards/rejected": -3.1362667083740234, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.417004048582996, |
|
"grad_norm": 27.375, |
|
"learning_rate": 3.164117677777191e-06, |
|
"logits/chosen": -1.5264801979064941, |
|
"logits/rejected": -1.6040115356445312, |
|
"logps/chosen": -150.361328125, |
|
"logps/rejected": -164.02816772460938, |
|
"loss": 0.1757, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.4109339118003845, |
|
"rewards/margins": 3.098153591156006, |
|
"rewards/rejected": -3.509087324142456, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.4304993252361673, |
|
"grad_norm": 38.25, |
|
"learning_rate": 3.1261402938317465e-06, |
|
"logits/chosen": -1.5730303525924683, |
|
"logits/rejected": -1.6026499271392822, |
|
"logps/chosen": -164.3070831298828, |
|
"logps/rejected": -246.06338500976562, |
|
"loss": 0.1532, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.005527207162231207, |
|
"rewards/margins": 3.9187304973602295, |
|
"rewards/rejected": -3.913203477859497, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4439946018893388, |
|
"grad_norm": 20.375, |
|
"learning_rate": 3.088008109698726e-06, |
|
"logits/chosen": -1.444838285446167, |
|
"logits/rejected": -1.5232534408569336, |
|
"logps/chosen": -194.70555114746094, |
|
"logps/rejected": -218.77590942382812, |
|
"loss": 0.1892, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.10898719727993011, |
|
"rewards/margins": 3.2815093994140625, |
|
"rewards/rejected": -3.1725223064422607, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.45748987854251, |
|
"grad_norm": 43.0, |
|
"learning_rate": 3.0497305527689446e-06, |
|
"logits/chosen": -1.4176692962646484, |
|
"logits/rejected": -1.4581646919250488, |
|
"logps/chosen": -190.53550720214844, |
|
"logps/rejected": -202.92530822753906, |
|
"loss": 0.1852, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.12854930758476257, |
|
"rewards/margins": 3.113678216934204, |
|
"rewards/rejected": -3.242227554321289, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.4709851551956814, |
|
"grad_norm": 42.0, |
|
"learning_rate": 3.011317086373628e-06, |
|
"logits/chosen": -1.4024337530136108, |
|
"logits/rejected": -1.4260265827178955, |
|
"logps/chosen": -222.62124633789062, |
|
"logps/rejected": -228.56295776367188, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.02214776910841465, |
|
"rewards/margins": 3.127570629119873, |
|
"rewards/rejected": -3.1497180461883545, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.484480431848853, |
|
"grad_norm": 38.5, |
|
"learning_rate": 2.9727772074447916e-06, |
|
"logits/chosen": -1.4362146854400635, |
|
"logits/rejected": -1.4737937450408936, |
|
"logps/chosen": -190.13218688964844, |
|
"logps/rejected": -182.9353790283203, |
|
"loss": 0.1473, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.015029204078018665, |
|
"rewards/margins": 3.5559723377227783, |
|
"rewards/rejected": -3.5710015296936035, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.4979757085020242, |
|
"grad_norm": 105.0, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.5892771482467651, |
|
"logits/rejected": -1.5846550464630127, |
|
"logps/chosen": -128.55142211914062, |
|
"logps/rejected": -169.93356323242188, |
|
"loss": 0.2029, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.536339282989502, |
|
"rewards/margins": 2.9536054134368896, |
|
"rewards/rejected": -3.4899444580078125, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.5114709851551957, |
|
"grad_norm": 49.75, |
|
"learning_rate": 2.8953563536233525e-06, |
|
"logits/chosen": -1.650007963180542, |
|
"logits/rejected": -1.6943776607513428, |
|
"logps/chosen": -168.49082946777344, |
|
"logps/rejected": -202.83924865722656, |
|
"loss": 0.186, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.5661638379096985, |
|
"rewards/margins": 3.2901504039764404, |
|
"rewards/rejected": -3.856314182281494, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.524966261808367, |
|
"grad_norm": 21.0, |
|
"learning_rate": 2.8564945194294273e-06, |
|
"logits/chosen": -1.5658307075500488, |
|
"logits/rejected": -1.46593177318573, |
|
"logps/chosen": -162.1931915283203, |
|
"logps/rejected": -254.7098388671875, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5101041793823242, |
|
"rewards/margins": 3.1685078144073486, |
|
"rewards/rejected": -3.6786117553710938, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.5384615384615383, |
|
"grad_norm": 13.125, |
|
"learning_rate": 2.817544549367197e-06, |
|
"logits/chosen": -1.4567762613296509, |
|
"logits/rejected": -1.4438632726669312, |
|
"logps/chosen": -173.05821228027344, |
|
"logps/rejected": -226.567626953125, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4246034622192383, |
|
"rewards/margins": 3.5222201347351074, |
|
"rewards/rejected": -3.946824312210083, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.5519568151147098, |
|
"grad_norm": 18.875, |
|
"learning_rate": 2.778516073008071e-06, |
|
"logits/chosen": -1.3770719766616821, |
|
"logits/rejected": -1.4858124256134033, |
|
"logps/chosen": -178.8583221435547, |
|
"logps/rejected": -180.4306640625, |
|
"loss": 0.2049, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.34754911065101624, |
|
"rewards/margins": 2.8492796421051025, |
|
"rewards/rejected": -3.196829080581665, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.5654520917678814, |
|
"grad_norm": 51.0, |
|
"learning_rate": 2.7394187393325107e-06, |
|
"logits/chosen": -1.4935017824172974, |
|
"logits/rejected": -1.482154130935669, |
|
"logps/chosen": -183.38815307617188, |
|
"logps/rejected": -203.4325714111328, |
|
"loss": 0.2601, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.4926990866661072, |
|
"rewards/margins": 2.8543787002563477, |
|
"rewards/rejected": -3.347078323364258, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.5789473684210527, |
|
"grad_norm": 11.875, |
|
"learning_rate": 2.7002622143445177e-06, |
|
"logits/chosen": -1.5763792991638184, |
|
"logits/rejected": -1.581122875213623, |
|
"logps/chosen": -230.5819854736328, |
|
"logps/rejected": -290.4792175292969, |
|
"loss": 0.1305, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.07175219804048538, |
|
"rewards/margins": 4.184114933013916, |
|
"rewards/rejected": -4.112362861633301, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.592442645074224, |
|
"grad_norm": 46.75, |
|
"learning_rate": 2.6610561786819207e-06, |
|
"logits/chosen": -1.6340926885604858, |
|
"logits/rejected": -1.5590074062347412, |
|
"logps/chosen": -145.62442016601562, |
|
"logps/rejected": -248.79403686523438, |
|
"loss": 0.1715, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.3683429956436157, |
|
"rewards/margins": 3.4242138862609863, |
|
"rewards/rejected": -3.7925562858581543, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.6059379217273952, |
|
"grad_norm": 8.5625, |
|
"learning_rate": 2.6218103252230302e-06, |
|
"logits/chosen": -1.5815064907073975, |
|
"logits/rejected": -1.558189868927002, |
|
"logps/chosen": -145.986572265625, |
|
"logps/rejected": -209.48776245117188, |
|
"loss": 0.1382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15136297047138214, |
|
"rewards/margins": 3.156534194946289, |
|
"rewards/rejected": -3.3078970909118652, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.6194331983805668, |
|
"grad_norm": 33.5, |
|
"learning_rate": 2.582534356690284e-06, |
|
"logits/chosen": -1.4829189777374268, |
|
"logits/rejected": -1.5618332624435425, |
|
"logps/chosen": -280.50482177734375, |
|
"logps/rejected": -227.37191772460938, |
|
"loss": 0.111, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.014963224530220032, |
|
"rewards/margins": 3.7380282878875732, |
|
"rewards/rejected": -3.723065137863159, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.6329284750337383, |
|
"grad_norm": 19.25, |
|
"learning_rate": 2.5432379832514437e-06, |
|
"logits/chosen": -1.5892632007598877, |
|
"logits/rejected": -1.6352291107177734, |
|
"logps/chosen": -158.56002807617188, |
|
"logps/rejected": -202.90060424804688, |
|
"loss": 0.2301, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.49658140540122986, |
|
"rewards/margins": 3.0457987785339355, |
|
"rewards/rejected": -3.5423800945281982, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.6464237516869096, |
|
"grad_norm": 18.5, |
|
"learning_rate": 2.5039309201189618e-06, |
|
"logits/chosen": -1.6018474102020264, |
|
"logits/rejected": -1.6965217590332031, |
|
"logps/chosen": -161.53518676757812, |
|
"logps/rejected": -185.10025024414062, |
|
"loss": 0.1597, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.14256028831005096, |
|
"rewards/margins": 3.0957140922546387, |
|
"rewards/rejected": -3.238274335861206, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.6599190283400809, |
|
"grad_norm": 22.375, |
|
"learning_rate": 2.4646228851480957e-06, |
|
"logits/chosen": -1.391078233718872, |
|
"logits/rejected": -1.3691911697387695, |
|
"logps/chosen": -206.93734741210938, |
|
"logps/rejected": -213.29428100585938, |
|
"loss": 0.2172, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.01679610088467598, |
|
"rewards/margins": 2.988704204559326, |
|
"rewards/rejected": -2.9719078540802, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.6734143049932524, |
|
"grad_norm": 13.25, |
|
"learning_rate": 2.4253235964343677e-06, |
|
"logits/chosen": -1.590201497077942, |
|
"logits/rejected": -1.4947328567504883, |
|
"logps/chosen": -162.37301635742188, |
|
"logps/rejected": -259.95294189453125, |
|
"loss": 0.1116, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.33501359820365906, |
|
"rewards/margins": 4.118770599365234, |
|
"rewards/rejected": -4.453783988952637, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.686909581646424, |
|
"grad_norm": 73.0, |
|
"learning_rate": 2.3860427699109726e-06, |
|
"logits/chosen": -1.6217790842056274, |
|
"logits/rejected": -1.6454839706420898, |
|
"logps/chosen": -172.94483947753906, |
|
"logps/rejected": -205.34475708007812, |
|
"loss": 0.2869, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.9174480438232422, |
|
"rewards/margins": 3.128140449523926, |
|
"rewards/rejected": -4.045588493347168, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.7004048582995952, |
|
"grad_norm": 32.5, |
|
"learning_rate": 2.34679011694671e-06, |
|
"logits/chosen": -1.5026500225067139, |
|
"logits/rejected": -1.6494897603988647, |
|
"logps/chosen": -268.9452209472656, |
|
"logps/rejected": -212.0578155517578, |
|
"loss": 0.1194, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23328566551208496, |
|
"rewards/margins": 4.139514446258545, |
|
"rewards/rejected": -4.372800350189209, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.7139001349527665, |
|
"grad_norm": 70.5, |
|
"learning_rate": 2.3075753419450524e-06, |
|
"logits/chosen": -1.5526963472366333, |
|
"logits/rejected": -1.6195096969604492, |
|
"logps/chosen": -205.20431518554688, |
|
"logps/rejected": -197.59744262695312, |
|
"loss": 0.2026, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.3273767828941345, |
|
"rewards/margins": 2.9169745445251465, |
|
"rewards/rejected": -3.2443511486053467, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.7273954116059378, |
|
"grad_norm": 38.5, |
|
"learning_rate": 2.2684081399449327e-06, |
|
"logits/chosen": -1.4865336418151855, |
|
"logits/rejected": -1.479229211807251, |
|
"logps/chosen": -188.85787963867188, |
|
"logps/rejected": -203.17514038085938, |
|
"loss": 0.269, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.778891921043396, |
|
"rewards/margins": 3.5117366313934326, |
|
"rewards/rejected": -4.290627956390381, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.7408906882591093, |
|
"grad_norm": 116.0, |
|
"learning_rate": 2.2292981942238454e-06, |
|
"logits/chosen": -1.598434329032898, |
|
"logits/rejected": -1.6193567514419556, |
|
"logps/chosen": -170.999267578125, |
|
"logps/rejected": -234.42391967773438, |
|
"loss": 0.3528, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.586583137512207, |
|
"rewards/margins": 3.1737523078918457, |
|
"rewards/rejected": -3.7603354454040527, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.7543859649122808, |
|
"grad_norm": 36.0, |
|
"learning_rate": 2.1902551739038624e-06, |
|
"logits/chosen": -1.5177044868469238, |
|
"logits/rejected": -1.4585306644439697, |
|
"logps/chosen": -171.92758178710938, |
|
"logps/rejected": -219.8982696533203, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5028108954429626, |
|
"rewards/margins": 3.118129253387451, |
|
"rewards/rejected": -3.6209399700164795, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.7678812415654521, |
|
"grad_norm": 11.0625, |
|
"learning_rate": 2.151288731561136e-06, |
|
"logits/chosen": -1.532063364982605, |
|
"logits/rejected": -1.4071648120880127, |
|
"logps/chosen": -211.4221649169922, |
|
"logps/rejected": -240.8402099609375, |
|
"loss": 0.1651, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.42295771837234497, |
|
"rewards/margins": 3.7488913536071777, |
|
"rewards/rejected": -4.171849250793457, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.7813765182186234, |
|
"grad_norm": 23.625, |
|
"learning_rate": 2.1124085008395056e-06, |
|
"logits/chosen": -1.4962142705917358, |
|
"logits/rejected": -1.4677404165267944, |
|
"logps/chosen": -197.39447021484375, |
|
"logps/rejected": -263.4613342285156, |
|
"loss": 0.1999, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.16278569400310516, |
|
"rewards/margins": 3.5397281646728516, |
|
"rewards/rejected": -3.7025134563446045, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.7948717948717947, |
|
"grad_norm": 20.625, |
|
"learning_rate": 2.073624094068776e-06, |
|
"logits/chosen": -1.5467997789382935, |
|
"logits/rejected": -1.540650725364685, |
|
"logps/chosen": -186.6321563720703, |
|
"logps/rejected": -259.65045166015625, |
|
"loss": 0.2781, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.22982105612754822, |
|
"rewards/margins": 3.7238681316375732, |
|
"rewards/rejected": -3.9536895751953125, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.8083670715249662, |
|
"grad_norm": 12.875, |
|
"learning_rate": 2.03494509988827e-06, |
|
"logits/chosen": -1.6044431924819946, |
|
"logits/rejected": -1.627730131149292, |
|
"logps/chosen": -184.64320373535156, |
|
"logps/rejected": -204.9185791015625, |
|
"loss": 0.1212, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.07731951773166656, |
|
"rewards/margins": 3.6050572395324707, |
|
"rewards/rejected": -3.6823768615722656, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.8218623481781377, |
|
"grad_norm": 22.375, |
|
"learning_rate": 1.996381080876237e-06, |
|
"logits/chosen": -1.6212413311004639, |
|
"logits/rejected": -1.5563671588897705, |
|
"logps/chosen": -219.73171997070312, |
|
"logps/rejected": -281.0826721191406, |
|
"loss": 0.1177, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.023561427369713783, |
|
"rewards/margins": 3.5450756549835205, |
|
"rewards/rejected": -3.521514415740967, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.835357624831309, |
|
"grad_norm": 42.5, |
|
"learning_rate": 1.957941571185702e-06, |
|
"logits/chosen": -1.4472072124481201, |
|
"logits/rejected": -1.5231066942214966, |
|
"logps/chosen": -256.3811950683594, |
|
"logps/rejected": -225.1781768798828, |
|
"loss": 0.2672, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.01850978098809719, |
|
"rewards/margins": 3.1582770347595215, |
|
"rewards/rejected": -3.1767868995666504, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.8488529014844803, |
|
"grad_norm": 30.625, |
|
"learning_rate": 1.919636074187346e-06, |
|
"logits/chosen": -1.388319730758667, |
|
"logits/rejected": -1.4473168849945068, |
|
"logps/chosen": -253.48312377929688, |
|
"logps/rejected": -212.169189453125, |
|
"loss": 0.1468, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.18813356757164001, |
|
"rewards/margins": 3.097019672393799, |
|
"rewards/rejected": -2.908886194229126, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.8623481781376519, |
|
"grad_norm": 90.0, |
|
"learning_rate": 1.8814740601199943e-06, |
|
"logits/chosen": -1.4006351232528687, |
|
"logits/rejected": -1.4068963527679443, |
|
"logps/chosen": -164.6719970703125, |
|
"logps/rejected": -193.83538818359375, |
|
"loss": 0.2666, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.42647585272789, |
|
"rewards/margins": 2.7546064853668213, |
|
"rewards/rejected": -3.181082248687744, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.8758434547908234, |
|
"grad_norm": 25.75, |
|
"learning_rate": 1.8434649637492952e-06, |
|
"logits/chosen": -1.341395616531372, |
|
"logits/rejected": -1.3592100143432617, |
|
"logps/chosen": -181.58978271484375, |
|
"logps/rejected": -235.27456665039062, |
|
"loss": 0.1718, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.15408626198768616, |
|
"rewards/margins": 3.203856945037842, |
|
"rewards/rejected": -3.357943296432495, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.8893387314439947, |
|
"grad_norm": 18.625, |
|
"learning_rate": 1.8056181820351737e-06, |
|
"logits/chosen": -1.565199613571167, |
|
"logits/rejected": -1.5012518167495728, |
|
"logps/chosen": -241.5365753173828, |
|
"logps/rejected": -229.1800079345703, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.34816664457321167, |
|
"rewards/margins": 4.178171634674072, |
|
"rewards/rejected": -3.830005168914795, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.902834008097166, |
|
"grad_norm": 8.875, |
|
"learning_rate": 1.7679430718086244e-06, |
|
"logits/chosen": -1.5023219585418701, |
|
"logits/rejected": -1.4059240818023682, |
|
"logps/chosen": -240.8516082763672, |
|
"logps/rejected": -287.47955322265625, |
|
"loss": 0.0641, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.22036854922771454, |
|
"rewards/margins": 4.166906833648682, |
|
"rewards/rejected": -3.946538209915161, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.9163292847503373, |
|
"grad_norm": 35.5, |
|
"learning_rate": 1.7304489474584307e-06, |
|
"logits/chosen": -1.565582036972046, |
|
"logits/rejected": -1.4994531869888306, |
|
"logps/chosen": -148.25338745117188, |
|
"logps/rejected": -231.37741088867188, |
|
"loss": 0.123, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.1026371493935585, |
|
"rewards/margins": 3.7467575073242188, |
|
"rewards/rejected": -3.6441197395324707, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.9298245614035088, |
|
"grad_norm": 28.125, |
|
"learning_rate": 1.693145078628377e-06, |
|
"logits/chosen": -1.6054456233978271, |
|
"logits/rejected": -1.6087411642074585, |
|
"logps/chosen": -159.12234497070312, |
|
"logps/rejected": -214.5330352783203, |
|
"loss": 0.1255, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.13751724362373352, |
|
"rewards/margins": 3.8032824993133545, |
|
"rewards/rejected": -3.940800428390503, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.9433198380566803, |
|
"grad_norm": 18.375, |
|
"learning_rate": 1.6560406879255192e-06, |
|
"logits/chosen": -1.615686058998108, |
|
"logits/rejected": -1.678998351097107, |
|
"logps/chosen": -179.3768768310547, |
|
"logps/rejected": -188.79124450683594, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.13511483371257782, |
|
"rewards/margins": 3.1098551750183105, |
|
"rewards/rejected": -3.2449698448181152, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9568151147098516, |
|
"grad_norm": 20.5, |
|
"learning_rate": 1.6191449486400893e-06, |
|
"logits/chosen": -1.5641348361968994, |
|
"logits/rejected": -1.5269627571105957, |
|
"logps/chosen": -190.90200805664062, |
|
"logps/rejected": -200.14797973632812, |
|
"loss": 0.1858, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.057850100100040436, |
|
"rewards/margins": 3.392789363861084, |
|
"rewards/rejected": -3.4506402015686035, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.9703103913630229, |
|
"grad_norm": 46.25, |
|
"learning_rate": 1.5824669824775868e-06, |
|
"logits/chosen": -1.6585397720336914, |
|
"logits/rejected": -1.6145107746124268, |
|
"logps/chosen": -153.5370330810547, |
|
"logps/rejected": -246.87869262695312, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1705460101366043, |
|
"rewards/margins": 3.2724738121032715, |
|
"rewards/rejected": -3.4430203437805176, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.9838056680161942, |
|
"grad_norm": 21.125, |
|
"learning_rate": 1.5460158573036288e-06, |
|
"logits/chosen": -1.425318956375122, |
|
"logits/rejected": -1.5616633892059326, |
|
"logps/chosen": -228.63955688476562, |
|
"logps/rejected": -232.26235961914062, |
|
"loss": 0.1763, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15325972437858582, |
|
"rewards/margins": 2.676074504852295, |
|
"rewards/rejected": -2.8293344974517822, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.9973009446693657, |
|
"grad_norm": 57.75, |
|
"learning_rate": 1.509800584902108e-06, |
|
"logits/chosen": -1.4701238870620728, |
|
"logits/rejected": -1.335039496421814, |
|
"logps/chosen": -165.36788940429688, |
|
"logps/rejected": -248.84915161132812, |
|
"loss": 0.2088, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -0.21872563660144806, |
|
"rewards/margins": 3.6630032062530518, |
|
"rewards/rejected": -3.8817286491394043, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 2.010796221322537, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 1.473830118747216e-06, |
|
"logits/chosen": -1.3533880710601807, |
|
"logits/rejected": -1.4392606019973755, |
|
"logps/chosen": -173.4610595703125, |
|
"logps/rejected": -189.3846435546875, |
|
"loss": 0.1035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04286568984389305, |
|
"rewards/margins": 3.4945671558380127, |
|
"rewards/rejected": -3.537432909011841, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 2.0242914979757085, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 1.4381133517898803e-06, |
|
"logits/chosen": -1.5612472295761108, |
|
"logits/rejected": -1.6096746921539307, |
|
"logps/chosen": -244.1045684814453, |
|
"logps/rejected": -227.0, |
|
"loss": 0.068, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.28070664405822754, |
|
"rewards/margins": 4.070573329925537, |
|
"rewards/rejected": -3.7898666858673096, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 2.03778677462888, |
|
"grad_norm": 46.75, |
|
"learning_rate": 1.4026591142591733e-06, |
|
"logits/chosen": -1.4181170463562012, |
|
"logits/rejected": -1.5695334672927856, |
|
"logps/chosen": -218.1271514892578, |
|
"logps/rejected": -171.77000427246094, |
|
"loss": 0.1633, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2111760377883911, |
|
"rewards/margins": 2.9384093284606934, |
|
"rewards/rejected": -3.149585247039795, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 2.051282051282051, |
|
"grad_norm": 24.5, |
|
"learning_rate": 1.3674761714792153e-06, |
|
"logits/chosen": -1.5777294635772705, |
|
"logits/rejected": -1.6976985931396484, |
|
"logps/chosen": -224.3392791748047, |
|
"logps/rejected": -254.0798797607422, |
|
"loss": 0.0739, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.15005287528038025, |
|
"rewards/margins": 4.0651535987854, |
|
"rewards/rejected": -3.91510009765625, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.064777327935223, |
|
"grad_norm": 23.75, |
|
"learning_rate": 1.33257322170213e-06, |
|
"logits/chosen": -1.4911249876022339, |
|
"logits/rejected": -1.500860571861267, |
|
"logps/chosen": -172.9776611328125, |
|
"logps/rejected": -201.8260040283203, |
|
"loss": 0.1002, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.21206021308898926, |
|
"rewards/margins": 3.810685634613037, |
|
"rewards/rejected": -3.598625659942627, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 2.078272604588394, |
|
"grad_norm": 28.375, |
|
"learning_rate": 1.2979588939575879e-06, |
|
"logits/chosen": -1.5784046649932861, |
|
"logits/rejected": -1.5579355955123901, |
|
"logps/chosen": -192.16024780273438, |
|
"logps/rejected": -219.4779510498047, |
|
"loss": 0.1696, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.06577552855014801, |
|
"rewards/margins": 3.573701858520508, |
|
"rewards/rejected": -3.5079262256622314, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.0917678812415654, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 1.2636417459194536e-06, |
|
"logits/chosen": -1.5944167375564575, |
|
"logits/rejected": -1.6392465829849243, |
|
"logps/chosen": -235.58633422851562, |
|
"logps/rejected": -274.0408630371094, |
|
"loss": 0.0593, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.08247147500514984, |
|
"rewards/margins": 4.281358242034912, |
|
"rewards/rejected": -4.363830089569092, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 2.1052631578947367, |
|
"grad_norm": 5.21875, |
|
"learning_rate": 1.2296302617900772e-06, |
|
"logits/chosen": -1.5774985551834106, |
|
"logits/rejected": -1.6413581371307373, |
|
"logps/chosen": -171.0308074951172, |
|
"logps/rejected": -183.9725341796875, |
|
"loss": 0.0845, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.016073107719421387, |
|
"rewards/margins": 3.9465243816375732, |
|
"rewards/rejected": -3.9304511547088623, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.118758434547908, |
|
"grad_norm": 15.0, |
|
"learning_rate": 1.1959328502027556e-06, |
|
"logits/chosen": -1.5672693252563477, |
|
"logits/rejected": -1.5724976062774658, |
|
"logps/chosen": -161.8846435546875, |
|
"logps/rejected": -190.6571807861328, |
|
"loss": 0.1138, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.02019577845931053, |
|
"rewards/margins": 3.7148475646972656, |
|
"rewards/rejected": -3.6946518421173096, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 2.1322537112010798, |
|
"grad_norm": 19.125, |
|
"learning_rate": 1.1625578421428714e-06, |
|
"logits/chosen": -1.4088555574417114, |
|
"logits/rejected": -1.331659197807312, |
|
"logps/chosen": -197.23593139648438, |
|
"logps/rejected": -279.7657470703125, |
|
"loss": 0.1239, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.08394167572259903, |
|
"rewards/margins": 3.702916383743286, |
|
"rewards/rejected": -3.786858081817627, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.145748987854251, |
|
"grad_norm": 19.625, |
|
"learning_rate": 1.1295134888882258e-06, |
|
"logits/chosen": -1.5858689546585083, |
|
"logits/rejected": -1.6758959293365479, |
|
"logps/chosen": -194.56253051757812, |
|
"logps/rejected": -206.4073028564453, |
|
"loss": 0.0922, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.14167055487632751, |
|
"rewards/margins": 3.8033957481384277, |
|
"rewards/rejected": -3.945065975189209, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 2.1592442645074224, |
|
"grad_norm": 16.25, |
|
"learning_rate": 1.0968079599690872e-06, |
|
"logits/chosen": -1.5427080392837524, |
|
"logits/rejected": -1.509251356124878, |
|
"logps/chosen": -227.91281127929688, |
|
"logps/rejected": -196.93661499023438, |
|
"loss": 0.112, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0069570960476994514, |
|
"rewards/margins": 3.6783218383789062, |
|
"rewards/rejected": -3.6852786540985107, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1727395411605936, |
|
"grad_norm": 19.875, |
|
"learning_rate": 1.064449341148442e-06, |
|
"logits/chosen": -1.624629020690918, |
|
"logits/rejected": -1.647383689880371, |
|
"logps/chosen": -203.95071411132812, |
|
"logps/rejected": -221.9706573486328, |
|
"loss": 0.1216, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.1412554681301117, |
|
"rewards/margins": 3.5126278400421143, |
|
"rewards/rejected": -3.6538829803466797, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 2.1862348178137654, |
|
"grad_norm": 14.5, |
|
"learning_rate": 1.0324456324229536e-06, |
|
"logits/chosen": -1.4194597005844116, |
|
"logits/rejected": -1.3489387035369873, |
|
"logps/chosen": -166.34426879882812, |
|
"logps/rejected": -239.3138427734375, |
|
"loss": 0.0775, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.0588313452899456, |
|
"rewards/margins": 3.9181437492370605, |
|
"rewards/rejected": -3.976975202560425, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.1997300944669367, |
|
"grad_norm": 35.5, |
|
"learning_rate": 1.000804746045138e-06, |
|
"logits/chosen": -1.3923031091690063, |
|
"logits/rejected": -1.4646499156951904, |
|
"logps/chosen": -191.46279907226562, |
|
"logps/rejected": -184.79953002929688, |
|
"loss": 0.1111, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.008677695877850056, |
|
"rewards/margins": 3.193809986114502, |
|
"rewards/rejected": -3.2024874687194824, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.213225371120108, |
|
"grad_norm": 16.0, |
|
"learning_rate": 9.695345045672167e-07, |
|
"logits/chosen": -1.4313310384750366, |
|
"logits/rejected": -1.4792088270187378, |
|
"logps/chosen": -191.17092895507812, |
|
"logps/rejected": -196.5364532470703, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.33248454332351685, |
|
"rewards/margins": 3.7640583515167236, |
|
"rewards/rejected": -4.096542835235596, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.2267206477732793, |
|
"grad_norm": 15.5, |
|
"learning_rate": 9.386426389071532e-07, |
|
"logits/chosen": -1.4152162075042725, |
|
"logits/rejected": -1.363843321800232, |
|
"logps/chosen": -229.3914031982422, |
|
"logps/rejected": -278.37847900390625, |
|
"loss": 0.0961, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.30344587564468384, |
|
"rewards/margins": 4.63069486618042, |
|
"rewards/rejected": -4.934141635894775, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.2402159244264506, |
|
"grad_norm": 17.625, |
|
"learning_rate": 9.081367864373489e-07, |
|
"logits/chosen": -1.3973594903945923, |
|
"logits/rejected": -1.524677038192749, |
|
"logps/chosen": -168.33126831054688, |
|
"logps/rejected": -156.55892944335938, |
|
"loss": 0.0944, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1414262354373932, |
|
"rewards/margins": 3.3840813636779785, |
|
"rewards/rejected": -3.5255074501037598, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.2537112010796223, |
|
"grad_norm": 11.8125, |
|
"learning_rate": 8.780244890964567e-07, |
|
"logits/chosen": -1.4209728240966797, |
|
"logits/rejected": -1.2569080591201782, |
|
"logps/chosen": -177.04782104492188, |
|
"logps/rejected": -275.0938415527344, |
|
"loss": 0.0698, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16639022529125214, |
|
"rewards/margins": 3.9153380393981934, |
|
"rewards/rejected": -3.748948335647583, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.2672064777327936, |
|
"grad_norm": 10.625, |
|
"learning_rate": 8.483131915247969e-07, |
|
"logits/chosen": -1.563407301902771, |
|
"logits/rejected": -1.534883975982666, |
|
"logps/chosen": -171.35104370117188, |
|
"logps/rejected": -242.4336700439453, |
|
"loss": 0.0949, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2416602075099945, |
|
"rewards/margins": 4.914166450500488, |
|
"rewards/rejected": -5.155826568603516, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.280701754385965, |
|
"grad_norm": 19.75, |
|
"learning_rate": 8.190102392238191e-07, |
|
"logits/chosen": -1.4438880681991577, |
|
"logits/rejected": -1.4186255931854248, |
|
"logps/chosen": -154.63705444335938, |
|
"logps/rejected": -207.8048858642578, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18597714602947235, |
|
"rewards/margins": 4.108304500579834, |
|
"rewards/rejected": -4.294281959533691, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.294197031039136, |
|
"grad_norm": 32.25, |
|
"learning_rate": 7.90122876740086e-07, |
|
"logits/chosen": -1.63836669921875, |
|
"logits/rejected": -1.5565919876098633, |
|
"logps/chosen": -226.85037231445312, |
|
"logps/rejected": -326.13421630859375, |
|
"loss": 0.0577, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0758393257856369, |
|
"rewards/margins": 4.579066276550293, |
|
"rewards/rejected": -4.503227233886719, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.3076923076923075, |
|
"grad_norm": 5.0625, |
|
"learning_rate": 7.616582458742059e-07, |
|
"logits/chosen": -1.4565999507904053, |
|
"logits/rejected": -1.455143928527832, |
|
"logps/chosen": -212.2303009033203, |
|
"logps/rejected": -276.86834716796875, |
|
"loss": 0.0519, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1173635721206665, |
|
"rewards/margins": 4.344286918640137, |
|
"rewards/rejected": -4.46165132522583, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.3211875843454792, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 7.336233839151693e-07, |
|
"logits/chosen": -1.6497745513916016, |
|
"logits/rejected": -1.6588242053985596, |
|
"logps/chosen": -169.42959594726562, |
|
"logps/rejected": -258.19207763671875, |
|
"loss": 0.1057, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.21658802032470703, |
|
"rewards/margins": 3.805851697921753, |
|
"rewards/rejected": -4.022439479827881, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.3346828609986505, |
|
"grad_norm": 21.5, |
|
"learning_rate": 7.060252219005304e-07, |
|
"logits/chosen": -1.520618200302124, |
|
"logits/rejected": -1.5337458848953247, |
|
"logps/chosen": -227.05679321289062, |
|
"logps/rejected": -317.5985107421875, |
|
"loss": 0.0774, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.06503160297870636, |
|
"rewards/margins": 4.4666852951049805, |
|
"rewards/rejected": -4.531716823577881, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.348178137651822, |
|
"grad_norm": 12.5, |
|
"learning_rate": 6.788705829028483e-07, |
|
"logits/chosen": -1.5424460172653198, |
|
"logits/rejected": -1.527999997138977, |
|
"logps/chosen": -186.46414184570312, |
|
"logps/rejected": -190.83157348632812, |
|
"loss": 0.1022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1064692884683609, |
|
"rewards/margins": 3.359034776687622, |
|
"rewards/rejected": -3.2525649070739746, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.361673414304993, |
|
"grad_norm": 66.5, |
|
"learning_rate": 6.521661803428225e-07, |
|
"logits/chosen": -1.5013136863708496, |
|
"logits/rejected": -1.5206286907196045, |
|
"logps/chosen": -201.0956268310547, |
|
"logps/rejected": -198.01573181152344, |
|
"loss": 0.0955, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13122853636741638, |
|
"rewards/margins": 3.767671585083008, |
|
"rewards/rejected": -3.898899793624878, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.375168690958165, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 6.259186163295439e-07, |
|
"logits/chosen": -1.2552602291107178, |
|
"logits/rejected": -1.3482682704925537, |
|
"logps/chosen": -246.9757080078125, |
|
"logps/rejected": -239.8274383544922, |
|
"loss": 0.0983, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.1879548728466034, |
|
"rewards/margins": 3.7479751110076904, |
|
"rewards/rejected": -3.935929775238037, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.388663967611336, |
|
"grad_norm": 16.0, |
|
"learning_rate": 6.001343800282569e-07, |
|
"logits/chosen": -1.5184439420700073, |
|
"logits/rejected": -1.4158121347427368, |
|
"logps/chosen": -145.63616943359375, |
|
"logps/rejected": -212.58468627929688, |
|
"loss": 0.0783, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3523162603378296, |
|
"rewards/margins": 4.166034698486328, |
|
"rewards/rejected": -4.5183515548706055, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.4021592442645074, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 5.748198460560475e-07, |
|
"logits/chosen": -1.602419137954712, |
|
"logits/rejected": -1.6869083642959595, |
|
"logps/chosen": -211.70947265625, |
|
"logps/rejected": -220.8863525390625, |
|
"loss": 0.0806, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16532480716705322, |
|
"rewards/margins": 4.41878080368042, |
|
"rewards/rejected": -4.253456115722656, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.4156545209176787, |
|
"grad_norm": 32.75, |
|
"learning_rate": 5.499812729058546e-07, |
|
"logits/chosen": -1.56089186668396, |
|
"logits/rejected": -1.5883516073226929, |
|
"logps/chosen": -181.11459350585938, |
|
"logps/rejected": -161.60299682617188, |
|
"loss": 0.1433, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.2707998752593994, |
|
"rewards/margins": 3.216136932373047, |
|
"rewards/rejected": -3.4869370460510254, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.42914979757085, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 5.256248013991857e-07, |
|
"logits/chosen": -1.5014961957931519, |
|
"logits/rejected": -1.4206339120864868, |
|
"logps/chosen": -226.8283233642578, |
|
"logps/rejected": -266.60333251953125, |
|
"loss": 0.0715, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.00030528902425430715, |
|
"rewards/margins": 4.552371978759766, |
|
"rewards/rejected": -4.552066802978516, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.4426450742240218, |
|
"grad_norm": 23.0, |
|
"learning_rate": 5.01756453167925e-07, |
|
"logits/chosen": -1.5279182195663452, |
|
"logits/rejected": -1.5130751132965088, |
|
"logps/chosen": -199.68397521972656, |
|
"logps/rejected": -246.5128936767578, |
|
"loss": 0.0683, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.14631351828575134, |
|
"rewards/margins": 4.73899507522583, |
|
"rewards/rejected": -4.592680931091309, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.456140350877193, |
|
"grad_norm": 21.375, |
|
"learning_rate": 4.78382129165613e-07, |
|
"logits/chosen": -1.4500765800476074, |
|
"logits/rejected": -1.5014575719833374, |
|
"logps/chosen": -185.51475524902344, |
|
"logps/rejected": -181.7137908935547, |
|
"loss": 0.1049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.09951256215572357, |
|
"rewards/margins": 3.4707932472229004, |
|
"rewards/rejected": -3.371281147003174, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.4696356275303644, |
|
"grad_norm": 32.5, |
|
"learning_rate": 4.5550760820855633e-07, |
|
"logits/chosen": -1.557877779006958, |
|
"logits/rejected": -1.4586069583892822, |
|
"logps/chosen": -209.05062866210938, |
|
"logps/rejected": -308.66424560546875, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2664136290550232, |
|
"rewards/margins": 4.0513434410095215, |
|
"rewards/rejected": -4.3177571296691895, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.4831309041835357, |
|
"grad_norm": 22.5, |
|
"learning_rate": 4.3313854554713457e-07, |
|
"logits/chosen": -1.5593338012695312, |
|
"logits/rejected": -1.5647127628326416, |
|
"logps/chosen": -197.6747283935547, |
|
"logps/rejected": -253.01876831054688, |
|
"loss": 0.0716, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.0987640991806984, |
|
"rewards/margins": 4.090095043182373, |
|
"rewards/rejected": -3.9913315773010254, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.4966261808367074, |
|
"grad_norm": 20.125, |
|
"learning_rate": 4.1128047146765936e-07, |
|
"logits/chosen": -1.435847520828247, |
|
"logits/rejected": -1.453253149986267, |
|
"logps/chosen": -141.46656799316406, |
|
"logps/rejected": -162.93905639648438, |
|
"loss": 0.1007, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.20369374752044678, |
|
"rewards/margins": 3.790607452392578, |
|
"rewards/rejected": -3.586913585662842, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.5101214574898787, |
|
"grad_norm": 32.5, |
|
"learning_rate": 3.899387899251242e-07, |
|
"logits/chosen": -1.499912142753601, |
|
"logits/rejected": -1.5055288076400757, |
|
"logps/chosen": -179.4788360595703, |
|
"logps/rejected": -202.9369354248047, |
|
"loss": 0.0808, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.04291580989956856, |
|
"rewards/margins": 3.4943645000457764, |
|
"rewards/rejected": -3.537280321121216, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.52361673414305, |
|
"grad_norm": 6.59375, |
|
"learning_rate": 3.6911877720719053e-07, |
|
"logits/chosen": -1.6243568658828735, |
|
"logits/rejected": -1.5396671295166016, |
|
"logps/chosen": -155.4473419189453, |
|
"logps/rejected": -191.9477081298828, |
|
"loss": 0.1245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.33543360233306885, |
|
"rewards/margins": 4.113525867462158, |
|
"rewards/rejected": -4.4489593505859375, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.5371120107962213, |
|
"grad_norm": 10.3125, |
|
"learning_rate": 3.488255806297311e-07, |
|
"logits/chosen": -1.4612650871276855, |
|
"logits/rejected": -1.6070709228515625, |
|
"logps/chosen": -164.7592010498047, |
|
"logps/rejected": -161.7231903076172, |
|
"loss": 0.1901, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.06939269602298737, |
|
"rewards/margins": 3.406930446624756, |
|
"rewards/rejected": -3.3375372886657715, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.5506072874493926, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 3.2906421726426857e-07, |
|
"logits/chosen": -1.4703078269958496, |
|
"logits/rejected": -1.4379500150680542, |
|
"logps/chosen": -204.19473266601562, |
|
"logps/rejected": -244.11965942382812, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6960457563400269, |
|
"rewards/margins": 4.154335975646973, |
|
"rewards/rejected": -4.850381851196289, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.564102564102564, |
|
"grad_norm": 17.375, |
|
"learning_rate": 3.09839572697605e-07, |
|
"logits/chosen": -1.560767412185669, |
|
"logits/rejected": -1.4427921772003174, |
|
"logps/chosen": -243.10568237304688, |
|
"logps/rejected": -232.52108764648438, |
|
"loss": 0.0844, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.049421075731515884, |
|
"rewards/margins": 4.088489055633545, |
|
"rewards/rejected": -4.137909889221191, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.5775978407557356, |
|
"grad_norm": 19.75, |
|
"learning_rate": 2.9115639982396166e-07, |
|
"logits/chosen": -1.515772819519043, |
|
"logits/rejected": -1.6191974878311157, |
|
"logps/chosen": -210.3816375732422, |
|
"logps/rejected": -198.30801391601562, |
|
"loss": 0.1289, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.32715946435928345, |
|
"rewards/margins": 3.6732399463653564, |
|
"rewards/rejected": -4.000399589538574, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.591093117408907, |
|
"grad_norm": 16.875, |
|
"learning_rate": 2.7301931766992916e-07, |
|
"logits/chosen": -1.53992760181427, |
|
"logits/rejected": -1.6426169872283936, |
|
"logps/chosen": -202.2464599609375, |
|
"logps/rejected": -200.73020935058594, |
|
"loss": 0.0916, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2096923142671585, |
|
"rewards/margins": 3.49652361869812, |
|
"rewards/rejected": -3.2868313789367676, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.604588394062078, |
|
"grad_norm": 15.875, |
|
"learning_rate": 2.554328102525022e-07, |
|
"logits/chosen": -1.468806505203247, |
|
"logits/rejected": -1.5037376880645752, |
|
"logps/chosen": -225.407470703125, |
|
"logps/rejected": -265.16326904296875, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.1482563018798828, |
|
"rewards/margins": 3.908936023712158, |
|
"rewards/rejected": -3.760679244995117, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.6180836707152495, |
|
"grad_norm": 28.25, |
|
"learning_rate": 2.3840122547050482e-07, |
|
"logits/chosen": -1.4675546884536743, |
|
"logits/rejected": -1.427056074142456, |
|
"logps/chosen": -189.55482482910156, |
|
"logps/rejected": -238.43399047851562, |
|
"loss": 0.128, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.15342381596565247, |
|
"rewards/margins": 4.185477256774902, |
|
"rewards/rejected": -4.338901042938232, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.6315789473684212, |
|
"grad_norm": 10.6875, |
|
"learning_rate": 2.219287740296605e-07, |
|
"logits/chosen": -1.5017975568771362, |
|
"logits/rejected": -1.5283129215240479, |
|
"logps/chosen": -185.2952117919922, |
|
"logps/rejected": -218.5054168701172, |
|
"loss": 0.0971, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2838120460510254, |
|
"rewards/margins": 4.120657444000244, |
|
"rewards/rejected": -4.4044694900512695, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.6450742240215925, |
|
"grad_norm": 21.0, |
|
"learning_rate": 2.060195284015837e-07, |
|
"logits/chosen": -1.662113904953003, |
|
"logits/rejected": -1.6862503290176392, |
|
"logps/chosen": -150.606689453125, |
|
"logps/rejected": -198.61793518066406, |
|
"loss": 0.1152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2786501944065094, |
|
"rewards/margins": 3.8265221118927, |
|
"rewards/rejected": -4.105172157287598, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.658569500674764, |
|
"grad_norm": 19.75, |
|
"learning_rate": 1.9067742181694353e-07, |
|
"logits/chosen": -1.4568703174591064, |
|
"logits/rejected": -1.4512639045715332, |
|
"logps/chosen": -171.15443420410156, |
|
"logps/rejected": -221.99526977539062, |
|
"loss": 0.096, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.17055651545524597, |
|
"rewards/margins": 5.160454273223877, |
|
"rewards/rejected": -5.3310112953186035, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.672064777327935, |
|
"grad_norm": 93.0, |
|
"learning_rate": 1.75906247293057e-07, |
|
"logits/chosen": -1.6594133377075195, |
|
"logits/rejected": -1.5529086589813232, |
|
"logps/chosen": -156.86392211914062, |
|
"logps/rejected": -285.59197998046875, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.5136551856994629, |
|
"rewards/margins": 4.625790596008301, |
|
"rewards/rejected": -5.139446258544922, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.6855600539811064, |
|
"grad_norm": 11.3125, |
|
"learning_rate": 1.617096566961429e-07, |
|
"logits/chosen": -1.466498613357544, |
|
"logits/rejected": -1.4549661874771118, |
|
"logps/chosen": -155.0102081298828, |
|
"logps/rejected": -232.1795654296875, |
|
"loss": 0.152, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.09109257161617279, |
|
"rewards/margins": 3.467794418334961, |
|
"rewards/rejected": -3.558886766433716, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.699055330634278, |
|
"grad_norm": 15.9375, |
|
"learning_rate": 1.4809115983847267e-07, |
|
"logits/chosen": -1.377762794494629, |
|
"logits/rejected": -1.3253929615020752, |
|
"logps/chosen": -148.2834014892578, |
|
"logps/rejected": -208.0382080078125, |
|
"loss": 0.1151, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.24366268515586853, |
|
"rewards/margins": 3.6103515625, |
|
"rewards/rejected": -3.8540141582489014, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.699055330634278, |
|
"eval_logits/chosen": -1.536294937133789, |
|
"eval_logits/rejected": -1.5776937007904053, |
|
"eval_logps/chosen": -191.7211456298828, |
|
"eval_logps/rejected": -226.05455017089844, |
|
"eval_loss": 0.31860384345054626, |
|
"eval_rewards/accuracies": 0.8524096608161926, |
|
"eval_rewards/chosen": -0.7276893258094788, |
|
"eval_rewards/margins": 2.395343065261841, |
|
"eval_rewards/rejected": -3.1230320930480957, |
|
"eval_runtime": 23.3449, |
|
"eval_samples_per_second": 14.136, |
|
"eval_steps_per_second": 3.555, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.7125506072874495, |
|
"grad_norm": 23.625, |
|
"learning_rate": 1.3505412361064395e-07, |
|
"logits/chosen": -1.4981733560562134, |
|
"logits/rejected": -1.5207927227020264, |
|
"logps/chosen": -192.99154663085938, |
|
"logps/rejected": -194.6613311767578, |
|
"loss": 0.0649, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.07562440633773804, |
|
"rewards/margins": 4.270889759063721, |
|
"rewards/rejected": -4.195265769958496, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.7260458839406208, |
|
"grad_norm": 20.5, |
|
"learning_rate": 1.226017711491867e-07, |
|
"logits/chosen": -1.5061196088790894, |
|
"logits/rejected": -1.5956671237945557, |
|
"logps/chosen": -170.25169372558594, |
|
"logps/rejected": -240.0498046875, |
|
"loss": 0.0805, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27517637610435486, |
|
"rewards/margins": 3.623337507247925, |
|
"rewards/rejected": -3.89851450920105, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.739541160593792, |
|
"grad_norm": 31.25, |
|
"learning_rate": 1.107371810397076e-07, |
|
"logits/chosen": -1.4881411790847778, |
|
"logits/rejected": -1.5475780963897705, |
|
"logps/chosen": -237.45504760742188, |
|
"logps/rejected": -212.13330078125, |
|
"loss": 0.0625, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.10645435005426407, |
|
"rewards/margins": 4.086081027984619, |
|
"rewards/rejected": -4.192535400390625, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.753036437246964, |
|
"grad_norm": 16.75, |
|
"learning_rate": 9.946328655577625e-08, |
|
"logits/chosen": -1.5837833881378174, |
|
"logits/rejected": -1.6130040884017944, |
|
"logps/chosen": -137.10398864746094, |
|
"logps/rejected": -171.19357299804688, |
|
"loss": 0.0725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.27897781133651733, |
|
"rewards/margins": 3.8964107036590576, |
|
"rewards/rejected": -4.175389289855957, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.766531713900135, |
|
"grad_norm": 28.25, |
|
"learning_rate": 8.878287493373245e-08, |
|
"logits/chosen": -1.5753690004348755, |
|
"logits/rejected": -1.6070302724838257, |
|
"logps/chosen": -214.03018188476562, |
|
"logps/rejected": -189.55850219726562, |
|
"loss": 0.1188, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.10192601382732391, |
|
"rewards/margins": 3.4568443298339844, |
|
"rewards/rejected": -3.558769941329956, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.7800269905533064, |
|
"grad_norm": 26.625, |
|
"learning_rate": 7.869858668360042e-08, |
|
"logits/chosen": -1.4193127155303955, |
|
"logits/rejected": -1.2717030048370361, |
|
"logps/chosen": -187.0641632080078, |
|
"logps/rejected": -224.65066528320312, |
|
"loss": 0.1028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.18936040997505188, |
|
"rewards/margins": 4.242377758026123, |
|
"rewards/rejected": -4.43173885345459, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.7935222672064777, |
|
"grad_norm": 24.75, |
|
"learning_rate": 6.921291493627747e-08, |
|
"logits/chosen": -1.6177479028701782, |
|
"logits/rejected": -1.6725289821624756, |
|
"logps/chosen": -248.9903564453125, |
|
"logps/rejected": -230.86611938476562, |
|
"loss": 0.0856, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.29994627833366394, |
|
"rewards/margins": 3.9232945442199707, |
|
"rewards/rejected": -3.6233487129211426, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.807017543859649, |
|
"grad_norm": 42.5, |
|
"learning_rate": 6.032820482716001e-08, |
|
"logits/chosen": -1.5851434469223022, |
|
"logits/rejected": -1.5880482196807861, |
|
"logps/chosen": -155.3755340576172, |
|
"logps/rejected": -186.6389617919922, |
|
"loss": 0.1754, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.19228528439998627, |
|
"rewards/margins": 3.5754799842834473, |
|
"rewards/rejected": -3.7677650451660156, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.8205128205128203, |
|
"grad_norm": 43.0, |
|
"learning_rate": 5.204665291635519e-08, |
|
"logits/chosen": -1.496819019317627, |
|
"logits/rejected": -1.5007538795471191, |
|
"logps/chosen": -179.5200653076172, |
|
"logps/rejected": -266.001953125, |
|
"loss": 0.1038, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.35474246740341187, |
|
"rewards/margins": 3.8987841606140137, |
|
"rewards/rejected": -4.253526210784912, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.834008097165992, |
|
"grad_norm": 27.5, |
|
"learning_rate": 4.437030664562969e-08, |
|
"logits/chosen": -1.470956563949585, |
|
"logits/rejected": -1.52825927734375, |
|
"logps/chosen": -203.93551635742188, |
|
"logps/rejected": -220.02639770507812, |
|
"loss": 0.1639, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.0977933406829834, |
|
"rewards/margins": 3.205706834793091, |
|
"rewards/rejected": -3.3035004138946533, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.8475033738191633, |
|
"grad_norm": 65.0, |
|
"learning_rate": 3.730106383222132e-08, |
|
"logits/chosen": -1.5251743793487549, |
|
"logits/rejected": -1.3242510557174683, |
|
"logps/chosen": -186.79141235351562, |
|
"logps/rejected": -250.46566772460938, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.299465537071228, |
|
"rewards/margins": 4.545691967010498, |
|
"rewards/rejected": -4.845158100128174, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.8609986504723346, |
|
"grad_norm": 19.75, |
|
"learning_rate": 3.084067219964182e-08, |
|
"logits/chosen": -1.527754783630371, |
|
"logits/rejected": -1.5058457851409912, |
|
"logps/chosen": -173.50900268554688, |
|
"logps/rejected": -246.65628051757812, |
|
"loss": 0.2529, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -0.42231351137161255, |
|
"rewards/margins": 3.4638805389404297, |
|
"rewards/rejected": -3.8861937522888184, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.8744939271255063, |
|
"grad_norm": 43.75, |
|
"learning_rate": 2.499072894559057e-08, |
|
"logits/chosen": -1.6412513256072998, |
|
"logits/rejected": -1.6829668283462524, |
|
"logps/chosen": -180.06788635253906, |
|
"logps/rejected": -219.94528198242188, |
|
"loss": 0.1089, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.23302574455738068, |
|
"rewards/margins": 3.369854688644409, |
|
"rewards/rejected": -3.6028804779052734, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.8879892037786776, |
|
"grad_norm": 13.75, |
|
"learning_rate": 1.975268034707878e-08, |
|
"logits/chosen": -1.4751927852630615, |
|
"logits/rejected": -1.5141003131866455, |
|
"logps/chosen": -204.79470825195312, |
|
"logps/rejected": -223.97509765625, |
|
"loss": 0.0822, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.15369097888469696, |
|
"rewards/margins": 3.9829258918762207, |
|
"rewards/rejected": -3.8292346000671387, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.901484480431849, |
|
"grad_norm": 39.0, |
|
"learning_rate": 1.512782140286939e-08, |
|
"logits/chosen": -1.4587006568908691, |
|
"logits/rejected": -1.5042657852172852, |
|
"logps/chosen": -156.6952667236328, |
|
"logps/rejected": -263.0159912109375, |
|
"loss": 0.0959, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.11610189825296402, |
|
"rewards/margins": 3.9884142875671387, |
|
"rewards/rejected": -4.10451602935791, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.91497975708502, |
|
"grad_norm": 17.75, |
|
"learning_rate": 1.1117295513313475e-08, |
|
"logits/chosen": -1.665400743484497, |
|
"logits/rejected": -1.6617343425750732, |
|
"logps/chosen": -161.07443237304688, |
|
"logps/rejected": -207.7931671142578, |
|
"loss": 0.0872, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.10158289968967438, |
|
"rewards/margins": 4.066722869873047, |
|
"rewards/rejected": -3.965139865875244, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.9284750337381915, |
|
"grad_norm": 20.75, |
|
"learning_rate": 7.72209419766995e-09, |
|
"logits/chosen": -1.4860131740570068, |
|
"logits/rejected": -1.3406977653503418, |
|
"logps/chosen": -168.0951690673828, |
|
"logps/rejected": -274.35113525390625, |
|
"loss": 0.1053, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -0.44807571172714233, |
|
"rewards/margins": 3.924337863922119, |
|
"rewards/rejected": -4.372413635253906, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.941970310391363, |
|
"grad_norm": 16.25, |
|
"learning_rate": 4.943056848972227e-09, |
|
"logits/chosen": -1.493690848350525, |
|
"logits/rejected": -1.5224257707595825, |
|
"logps/chosen": -209.3112335205078, |
|
"logps/rejected": -208.22988891601562, |
|
"loss": 0.0878, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.035886406898498535, |
|
"rewards/margins": 3.8605358600616455, |
|
"rewards/rejected": -3.8246493339538574, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.9554655870445345, |
|
"grad_norm": 14.8125, |
|
"learning_rate": 2.7808705265053305e-09, |
|
"logits/chosen": -1.571223497390747, |
|
"logits/rejected": -1.5577231645584106, |
|
"logps/chosen": -169.42562866210938, |
|
"logps/rejected": -181.50631713867188, |
|
"loss": 0.1109, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.13199149072170258, |
|
"rewards/margins": 3.612278699874878, |
|
"rewards/rejected": -3.744269847869873, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.968960863697706, |
|
"grad_norm": 27.75, |
|
"learning_rate": 1.2360697859462035e-09, |
|
"logits/chosen": -1.5886671543121338, |
|
"logits/rejected": -1.562727928161621, |
|
"logps/chosen": -162.84046936035156, |
|
"logps/rejected": -219.8025360107422, |
|
"loss": 0.118, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3454614281654358, |
|
"rewards/margins": 4.1069793701171875, |
|
"rewards/rejected": -4.4524407386779785, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.982456140350877, |
|
"grad_norm": 13.0625, |
|
"learning_rate": 3.090365472041557e-10, |
|
"logits/chosen": -1.5336341857910156, |
|
"logits/rejected": -1.5714600086212158, |
|
"logps/chosen": -217.091064453125, |
|
"logps/rejected": -239.0583953857422, |
|
"loss": 0.1793, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.2769380509853363, |
|
"rewards/margins": 3.7623977661132812, |
|
"rewards/rejected": -4.039335250854492, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.9959514170040484, |
|
"grad_norm": 16.75, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.4733049869537354, |
|
"logits/rejected": -1.4821723699569702, |
|
"logps/chosen": -191.77438354492188, |
|
"logps/rejected": -275.19158935546875, |
|
"loss": 0.0661, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.11918088048696518, |
|
"rewards/margins": 4.132817268371582, |
|
"rewards/rejected": -4.013636589050293, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.9959514170040484, |
|
"step": 1110, |
|
"total_flos": 4.5615607240812134e+17, |
|
"train_loss": 0.26195224279218965, |
|
"train_runtime": 3105.2921, |
|
"train_samples_per_second": 2.862, |
|
"train_steps_per_second": 0.357 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1110, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 4.5615607240812134e+17, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|