|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 4.0, |
|
"eval_steps": 100, |
|
"global_step": 1540, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": -1.7278180122375488, |
|
"logits/rejected": -1.7377450466156006, |
|
"logps/chosen": -29.553977966308594, |
|
"logps/rejected": -42.813133239746094, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": -1.866413950920105, |
|
"logits/rejected": -1.8707411289215088, |
|
"logps/chosen": -36.98916244506836, |
|
"logps/rejected": -33.67436981201172, |
|
"loss": 0.6701, |
|
"rewards/accuracies": 0.5416666865348816, |
|
"rewards/chosen": 0.01569323241710663, |
|
"rewards/margins": 0.05555717274546623, |
|
"rewards/rejected": -0.039863936603069305, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": -1.9979650974273682, |
|
"logits/rejected": -2.0006086826324463, |
|
"logps/chosen": -29.624820709228516, |
|
"logps/rejected": -29.0762939453125, |
|
"loss": 0.6837, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": 0.01563635841012001, |
|
"rewards/margins": 0.027204299345612526, |
|
"rewards/rejected": -0.01156794372946024, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": -1.921021819114685, |
|
"logits/rejected": -1.9183374643325806, |
|
"logps/chosen": -31.40532875061035, |
|
"logps/rejected": -33.23241424560547, |
|
"loss": 0.6877, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.00968973059207201, |
|
"rewards/margins": 0.022251319140195847, |
|
"rewards/rejected": -0.012561586685478687, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": -2.0176353454589844, |
|
"logits/rejected": -2.008906364440918, |
|
"logps/chosen": -32.574256896972656, |
|
"logps/rejected": -32.53368377685547, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.0022967704571783543, |
|
"rewards/margins": 0.02120940014719963, |
|
"rewards/rejected": -0.018912632018327713, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": -1.8619186878204346, |
|
"logits/rejected": -1.85114324092865, |
|
"logps/chosen": -33.55537414550781, |
|
"logps/rejected": -35.45675277709961, |
|
"loss": 0.6957, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.001892436295747757, |
|
"rewards/margins": 0.005858602002263069, |
|
"rewards/rejected": -0.003966164775192738, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": -1.9400945901870728, |
|
"logits/rejected": -1.9420464038848877, |
|
"logps/chosen": -32.56509780883789, |
|
"logps/rejected": -33.2406120300293, |
|
"loss": 0.6632, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.031578924506902695, |
|
"rewards/margins": 0.09388783574104309, |
|
"rewards/rejected": -0.062308914959430695, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": -2.0712790489196777, |
|
"logits/rejected": -2.0762436389923096, |
|
"logps/chosen": -33.981910705566406, |
|
"logps/rejected": -36.62363815307617, |
|
"loss": 0.6833, |
|
"rewards/accuracies": 0.5249999761581421, |
|
"rewards/chosen": 0.005918038543313742, |
|
"rewards/margins": 0.05520814657211304, |
|
"rewards/rejected": -0.04929010197520256, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": -1.9327905178070068, |
|
"logits/rejected": -1.935909628868103, |
|
"logps/chosen": -34.32685470581055, |
|
"logps/rejected": -34.65606689453125, |
|
"loss": 0.639, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.09085920453071594, |
|
"rewards/margins": 0.14815348386764526, |
|
"rewards/rejected": -0.057294271886348724, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": -1.9414918422698975, |
|
"logits/rejected": -1.946007490158081, |
|
"logps/chosen": -32.406803131103516, |
|
"logps/rejected": -32.36021041870117, |
|
"loss": 0.6792, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.054556868970394135, |
|
"rewards/margins": 0.05573350936174393, |
|
"rewards/rejected": -0.0011766403913497925, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": -2.039034128189087, |
|
"logits/rejected": -2.0370402336120605, |
|
"logps/chosen": -32.172786712646484, |
|
"logps/rejected": -31.333194732666016, |
|
"loss": 0.6464, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": 0.06124376505613327, |
|
"rewards/margins": 0.12152798473834991, |
|
"rewards/rejected": -0.06028420478105545, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": -2.2339773178100586, |
|
"eval_logits/rejected": -2.229137420654297, |
|
"eval_logps/chosen": -34.04054641723633, |
|
"eval_logps/rejected": -37.549957275390625, |
|
"eval_loss": 0.6902773976325989, |
|
"eval_rewards/accuracies": 0.5685215592384338, |
|
"eval_rewards/chosen": -0.005393954925239086, |
|
"eval_rewards/margins": 0.024608083069324493, |
|
"eval_rewards/rejected": -0.030002037063241005, |
|
"eval_runtime": 146.034, |
|
"eval_samples_per_second": 2.349, |
|
"eval_steps_per_second": 0.294, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": -1.994192123413086, |
|
"logits/rejected": -1.9918158054351807, |
|
"logps/chosen": -33.142940521240234, |
|
"logps/rejected": -34.01188278198242, |
|
"loss": 0.6911, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.09078876674175262, |
|
"rewards/margins": 0.07505009323358536, |
|
"rewards/rejected": 0.015738680958747864, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": -2.0053954124450684, |
|
"logits/rejected": -1.997046709060669, |
|
"logps/chosen": -32.33894348144531, |
|
"logps/rejected": -32.1308708190918, |
|
"loss": 0.6746, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.09536493569612503, |
|
"rewards/margins": 0.06779730319976807, |
|
"rewards/rejected": 0.027567636221647263, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": -2.0336387157440186, |
|
"logits/rejected": -2.025650978088379, |
|
"logps/chosen": -30.345691680908203, |
|
"logps/rejected": -32.078697204589844, |
|
"loss": 0.6527, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.11702337116003036, |
|
"rewards/margins": 0.14014457166194916, |
|
"rewards/rejected": -0.023121213540434837, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": -1.9642337560653687, |
|
"logits/rejected": -1.9744552373886108, |
|
"logps/chosen": -31.243911743164062, |
|
"logps/rejected": -32.590267181396484, |
|
"loss": 0.6171, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": 0.1581769436597824, |
|
"rewards/margins": 0.20802685618400574, |
|
"rewards/rejected": -0.04984992742538452, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": -1.876604437828064, |
|
"logits/rejected": -1.8777605295181274, |
|
"logps/chosen": -33.938690185546875, |
|
"logps/rejected": -34.807891845703125, |
|
"loss": 0.6043, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.22860188782215118, |
|
"rewards/margins": 0.2741745412349701, |
|
"rewards/rejected": -0.0455726757645607, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": -1.9282041788101196, |
|
"logits/rejected": -1.9247684478759766, |
|
"logps/chosen": -36.02125930786133, |
|
"logps/rejected": -32.71831130981445, |
|
"loss": 0.6454, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.13537634909152985, |
|
"rewards/margins": 0.13137592375278473, |
|
"rewards/rejected": 0.004000450484454632, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": -2.029125928878784, |
|
"logits/rejected": -2.0217747688293457, |
|
"logps/chosen": -33.49839401245117, |
|
"logps/rejected": -31.400177001953125, |
|
"loss": 0.5828, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.26951926946640015, |
|
"rewards/margins": 0.3130132555961609, |
|
"rewards/rejected": -0.04349397122859955, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": -2.0355944633483887, |
|
"logits/rejected": -2.040832042694092, |
|
"logps/chosen": -32.235923767089844, |
|
"logps/rejected": -32.460418701171875, |
|
"loss": 0.5943, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.2779761850833893, |
|
"rewards/margins": 0.2557251751422882, |
|
"rewards/rejected": 0.02225096896290779, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": -2.0362112522125244, |
|
"logits/rejected": -2.0334599018096924, |
|
"logps/chosen": -31.269250869750977, |
|
"logps/rejected": -31.325435638427734, |
|
"loss": 0.6245, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.19773444533348083, |
|
"rewards/margins": 0.20423230528831482, |
|
"rewards/rejected": -0.0064978525042533875, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": -1.9060389995574951, |
|
"logits/rejected": -1.9106788635253906, |
|
"logps/chosen": -31.306299209594727, |
|
"logps/rejected": -32.81407165527344, |
|
"loss": 0.5931, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.2699825167655945, |
|
"rewards/margins": 0.2908058166503906, |
|
"rewards/rejected": -0.02082330361008644, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": -2.231553792953491, |
|
"eval_logits/rejected": -2.2267112731933594, |
|
"eval_logps/chosen": -34.07304763793945, |
|
"eval_logps/rejected": -37.57693862915039, |
|
"eval_loss": 0.6979728937149048, |
|
"eval_rewards/accuracies": 0.5157807469367981, |
|
"eval_rewards/chosen": -0.03464451804757118, |
|
"eval_rewards/margins": 0.019641490653157234, |
|
"eval_rewards/rejected": -0.054286014288663864, |
|
"eval_runtime": 145.8095, |
|
"eval_samples_per_second": 2.352, |
|
"eval_steps_per_second": 0.295, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": -2.018519163131714, |
|
"logits/rejected": -2.0291810035705566, |
|
"logps/chosen": -31.742992401123047, |
|
"logps/rejected": -33.946937561035156, |
|
"loss": 0.5902, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.2054794579744339, |
|
"rewards/margins": 0.2812942862510681, |
|
"rewards/rejected": -0.07581482082605362, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": -1.911586046218872, |
|
"logits/rejected": -1.9263393878936768, |
|
"logps/chosen": -29.84616470336914, |
|
"logps/rejected": -31.615009307861328, |
|
"loss": 0.5879, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.23883743584156036, |
|
"rewards/margins": 0.2899848222732544, |
|
"rewards/rejected": -0.051147449761629105, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": -1.9677941799163818, |
|
"logits/rejected": -1.9717823266983032, |
|
"logps/chosen": -33.100074768066406, |
|
"logps/rejected": -31.62213134765625, |
|
"loss": 0.5748, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.28565075993537903, |
|
"rewards/margins": 0.3511958718299866, |
|
"rewards/rejected": -0.06554517149925232, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": -1.9661725759506226, |
|
"logits/rejected": -1.944300651550293, |
|
"logps/chosen": -33.841453552246094, |
|
"logps/rejected": -35.11375045776367, |
|
"loss": 0.5473, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.2810631990432739, |
|
"rewards/margins": 0.4277234673500061, |
|
"rewards/rejected": -0.14666026830673218, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": -2.007416009902954, |
|
"logits/rejected": -2.0040948390960693, |
|
"logps/chosen": -32.70330810546875, |
|
"logps/rejected": -36.29412841796875, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.1995842456817627, |
|
"rewards/margins": 0.2618715763092041, |
|
"rewards/rejected": -0.06228730082511902, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": -1.8749721050262451, |
|
"logits/rejected": -1.8725513219833374, |
|
"logps/chosen": -34.00068664550781, |
|
"logps/rejected": -35.53888702392578, |
|
"loss": 0.6254, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.16894161701202393, |
|
"rewards/margins": 0.1997825801372528, |
|
"rewards/rejected": -0.030840963125228882, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": -1.8600317239761353, |
|
"logits/rejected": -1.8576066493988037, |
|
"logps/chosen": -34.1875, |
|
"logps/rejected": -31.8159122467041, |
|
"loss": 0.616, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.1890900433063507, |
|
"rewards/margins": 0.22921428084373474, |
|
"rewards/rejected": -0.04012420028448105, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": -1.9631398916244507, |
|
"logits/rejected": -1.9526073932647705, |
|
"logps/chosen": -35.023719787597656, |
|
"logps/rejected": -31.869693756103516, |
|
"loss": 0.5782, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.29963088035583496, |
|
"rewards/margins": 0.32546472549438477, |
|
"rewards/rejected": -0.025833839550614357, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": -2.0582926273345947, |
|
"logits/rejected": -2.0433640480041504, |
|
"logps/chosen": -30.733753204345703, |
|
"logps/rejected": -32.67460632324219, |
|
"loss": 0.6392, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.17133468389511108, |
|
"rewards/margins": 0.19182677567005157, |
|
"rewards/rejected": -0.020492086187005043, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": -1.929610013961792, |
|
"logits/rejected": -1.9270601272583008, |
|
"logps/chosen": -32.42620086669922, |
|
"logps/rejected": -30.873455047607422, |
|
"loss": 0.5301, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.450817346572876, |
|
"rewards/margins": 0.5018006563186646, |
|
"rewards/rejected": -0.050983332097530365, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": -2.229154348373413, |
|
"eval_logits/rejected": -2.2243051528930664, |
|
"eval_logps/chosen": -34.09621810913086, |
|
"eval_logps/rejected": -37.59999084472656, |
|
"eval_loss": 0.6972895860671997, |
|
"eval_rewards/accuracies": 0.5390365719795227, |
|
"eval_rewards/chosen": -0.05550166219472885, |
|
"eval_rewards/margins": 0.019528048112988472, |
|
"eval_rewards/rejected": -0.07502970844507217, |
|
"eval_runtime": 145.7792, |
|
"eval_samples_per_second": 2.353, |
|
"eval_steps_per_second": 0.295, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 11.75, |
|
"learning_rate": 4.84533120650964e-06, |
|
"logits/chosen": -2.0636165142059326, |
|
"logits/rejected": -2.0508041381835938, |
|
"logps/chosen": -32.113487243652344, |
|
"logps/rejected": -32.89537811279297, |
|
"loss": 0.4684, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.3734120726585388, |
|
"rewards/margins": 0.6023003458976746, |
|
"rewards/rejected": -0.2288883477449417, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 10.875, |
|
"learning_rate": 4.825108134172131e-06, |
|
"logits/chosen": -1.9748560190200806, |
|
"logits/rejected": -1.9662139415740967, |
|
"logps/chosen": -31.80029296875, |
|
"logps/rejected": -30.449291229248047, |
|
"loss": 0.463, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.49922245740890503, |
|
"rewards/margins": 0.6863331198692322, |
|
"rewards/rejected": -0.18711069226264954, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 12.875, |
|
"learning_rate": 4.80369052967602e-06, |
|
"logits/chosen": -1.910094976425171, |
|
"logits/rejected": -1.9221827983856201, |
|
"logps/chosen": -29.87582778930664, |
|
"logps/rejected": -33.66598129272461, |
|
"loss": 0.4237, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.5430954694747925, |
|
"rewards/margins": 0.7804504632949829, |
|
"rewards/rejected": -0.23735502362251282, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 18.375, |
|
"learning_rate": 4.781089396387968e-06, |
|
"logits/chosen": -1.8735644817352295, |
|
"logits/rejected": -1.8643461465835571, |
|
"logps/chosen": -34.02741241455078, |
|
"logps/rejected": -36.179935455322266, |
|
"loss": 0.4051, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.6043617725372314, |
|
"rewards/margins": 0.89524906873703, |
|
"rewards/rejected": -0.29088738560676575, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 12.25, |
|
"learning_rate": 4.757316345716554e-06, |
|
"logits/chosen": -1.9254121780395508, |
|
"logits/rejected": -1.9260631799697876, |
|
"logps/chosen": -33.68886947631836, |
|
"logps/rejected": -34.135963439941406, |
|
"loss": 0.4149, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.6447720527648926, |
|
"rewards/margins": 0.9030619859695435, |
|
"rewards/rejected": -0.2582899332046509, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 4.73238359114687e-06, |
|
"logits/chosen": -2.052173376083374, |
|
"logits/rejected": -2.0583267211914062, |
|
"logps/chosen": -31.09401512145996, |
|
"logps/rejected": -33.012630462646484, |
|
"loss": 0.4478, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.43130677938461304, |
|
"rewards/margins": 0.7412186861038208, |
|
"rewards/rejected": -0.30991190671920776, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 28.0, |
|
"learning_rate": 4.706303941965804e-06, |
|
"logits/chosen": -1.9808381795883179, |
|
"logits/rejected": -1.9804092645645142, |
|
"logps/chosen": -32.843143463134766, |
|
"logps/rejected": -36.34934616088867, |
|
"loss": 0.4406, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.5416162610054016, |
|
"rewards/margins": 0.8112291097640991, |
|
"rewards/rejected": -0.2696128487586975, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 10.5, |
|
"learning_rate": 4.679090796681225e-06, |
|
"logits/chosen": -2.012341022491455, |
|
"logits/rejected": -2.0077567100524902, |
|
"logps/chosen": -30.083026885986328, |
|
"logps/rejected": -29.55636215209961, |
|
"loss": 0.411, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.5648170709609985, |
|
"rewards/margins": 0.873041033744812, |
|
"rewards/rejected": -0.30822402238845825, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"grad_norm": 13.75, |
|
"learning_rate": 4.650758136138454e-06, |
|
"logits/chosen": -1.784257173538208, |
|
"logits/rejected": -1.7905915975570679, |
|
"logps/chosen": -31.67917823791504, |
|
"logps/rejected": -36.660545349121094, |
|
"loss": 0.3793, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.635884165763855, |
|
"rewards/margins": 1.1239113807678223, |
|
"rewards/rejected": -0.4880271553993225, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"grad_norm": 10.625, |
|
"learning_rate": 4.621320516337559e-06, |
|
"logits/chosen": -1.937954306602478, |
|
"logits/rejected": -1.931673288345337, |
|
"logps/chosen": -33.02653121948242, |
|
"logps/rejected": -32.67657470703125, |
|
"loss": 0.389, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.7255499958992004, |
|
"rewards/margins": 1.0564748048782349, |
|
"rewards/rejected": -0.33092474937438965, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"eval_logits/chosen": -2.2031846046447754, |
|
"eval_logits/rejected": -2.198335647583008, |
|
"eval_logps/chosen": -34.16795349121094, |
|
"eval_logps/rejected": -37.72208786010742, |
|
"eval_loss": 0.6933022737503052, |
|
"eval_rewards/accuracies": 0.550664484500885, |
|
"eval_rewards/chosen": -0.12006273865699768, |
|
"eval_rewards/margins": 0.06485801190137863, |
|
"eval_rewards/rejected": -0.1849207729101181, |
|
"eval_runtime": 145.5175, |
|
"eval_samples_per_second": 2.357, |
|
"eval_steps_per_second": 0.295, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"grad_norm": 13.375, |
|
"learning_rate": 4.590793060955158e-06, |
|
"logits/chosen": -1.9355392456054688, |
|
"logits/rejected": -1.9427944421768188, |
|
"logps/chosen": -28.549734115600586, |
|
"logps/rejected": -29.689483642578125, |
|
"loss": 0.3677, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.5135748982429504, |
|
"rewards/margins": 1.0457103252410889, |
|
"rewards/rejected": -0.5321354866027832, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"grad_norm": 11.375, |
|
"learning_rate": 4.559191453574582e-06, |
|
"logits/chosen": -1.9551494121551514, |
|
"logits/rejected": -1.9541441202163696, |
|
"logps/chosen": -33.60606002807617, |
|
"logps/rejected": -31.240774154663086, |
|
"loss": 0.4587, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.5801833868026733, |
|
"rewards/margins": 0.8481400609016418, |
|
"rewards/rejected": -0.2679567039012909, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"grad_norm": 15.125, |
|
"learning_rate": 4.52653192962838e-06, |
|
"logits/chosen": -1.9499473571777344, |
|
"logits/rejected": -1.932682752609253, |
|
"logps/chosen": -30.44845199584961, |
|
"logps/rejected": -33.599735260009766, |
|
"loss": 0.3784, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.4758743345737457, |
|
"rewards/margins": 1.0819809436798096, |
|
"rewards/rejected": -0.6061066389083862, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 4.492831268057307e-06, |
|
"logits/chosen": -1.9813868999481201, |
|
"logits/rejected": -1.9835193157196045, |
|
"logps/chosen": -35.81322479248047, |
|
"logps/rejected": -35.490821838378906, |
|
"loss": 0.313, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.7192217111587524, |
|
"rewards/margins": 1.2869927883148193, |
|
"rewards/rejected": -0.5677711367607117, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"grad_norm": 12.3125, |
|
"learning_rate": 4.458106782690094e-06, |
|
"logits/chosen": -2.05714750289917, |
|
"logits/rejected": -2.0569212436676025, |
|
"logps/chosen": -31.911890029907227, |
|
"logps/rejected": -34.006473541259766, |
|
"loss": 0.3759, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6493128538131714, |
|
"rewards/margins": 1.0510342121124268, |
|
"rewards/rejected": -0.40172141790390015, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"grad_norm": 11.9375, |
|
"learning_rate": 4.422376313348405e-06, |
|
"logits/chosen": -2.001530885696411, |
|
"logits/rejected": -1.9940494298934937, |
|
"logps/chosen": -31.56293296813965, |
|
"logps/rejected": -36.774314880371094, |
|
"loss": 0.3025, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.7587249279022217, |
|
"rewards/margins": 1.4069726467132568, |
|
"rewards/rejected": -0.6482478380203247, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"grad_norm": 20.375, |
|
"learning_rate": 4.3856582166815696e-06, |
|
"logits/chosen": -1.9044301509857178, |
|
"logits/rejected": -1.900957465171814, |
|
"logps/chosen": -33.156280517578125, |
|
"logps/rejected": -33.48976516723633, |
|
"loss": 0.3626, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.6984156966209412, |
|
"rewards/margins": 1.2077140808105469, |
|
"rewards/rejected": -0.5092984437942505, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 14.0625, |
|
"learning_rate": 4.347971356735789e-06, |
|
"logits/chosen": -2.0298960208892822, |
|
"logits/rejected": -2.0229077339172363, |
|
"logps/chosen": -30.285165786743164, |
|
"logps/rejected": -32.622947692871094, |
|
"loss": 0.4059, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": 0.48903241753578186, |
|
"rewards/margins": 1.0555496215820312, |
|
"rewards/rejected": -0.566517174243927, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 4.309335095262675e-06, |
|
"logits/chosen": -1.9742801189422607, |
|
"logits/rejected": -1.975847840309143, |
|
"logps/chosen": -34.49077606201172, |
|
"logps/rejected": -34.25959396362305, |
|
"loss": 0.3167, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.7761706113815308, |
|
"rewards/margins": 1.3324440717697144, |
|
"rewards/rejected": -0.5562735795974731, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"grad_norm": 20.25, |
|
"learning_rate": 4.269769281772082e-06, |
|
"logits/chosen": -1.8631584644317627, |
|
"logits/rejected": -1.860769510269165, |
|
"logps/chosen": -32.35368347167969, |
|
"logps/rejected": -37.259803771972656, |
|
"loss": 0.322, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.7367446422576904, |
|
"rewards/margins": 1.452678918838501, |
|
"rewards/rejected": -0.715934157371521, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"eval_logits/chosen": -2.201748847961426, |
|
"eval_logits/rejected": -2.1969025135040283, |
|
"eval_logps/chosen": -34.34733200073242, |
|
"eval_logps/rejected": -37.91175842285156, |
|
"eval_loss": 0.7055429816246033, |
|
"eval_rewards/accuracies": 0.5514950156211853, |
|
"eval_rewards/chosen": -0.281506210565567, |
|
"eval_rewards/margins": 0.07411985099315643, |
|
"eval_rewards/rejected": -0.35562604665756226, |
|
"eval_runtime": 145.2329, |
|
"eval_samples_per_second": 2.362, |
|
"eval_steps_per_second": 0.296, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"grad_norm": 14.9375, |
|
"learning_rate": 4.22929424333435e-06, |
|
"logits/chosen": -1.9645631313323975, |
|
"logits/rejected": -1.9692989587783813, |
|
"logps/chosen": -32.608741760253906, |
|
"logps/rejected": -32.16291809082031, |
|
"loss": 0.3561, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.7637556791305542, |
|
"rewards/margins": 1.2288849353790283, |
|
"rewards/rejected": -0.46512943506240845, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"grad_norm": 14.375, |
|
"learning_rate": 4.1879307741372085e-06, |
|
"logits/chosen": -1.9946361780166626, |
|
"logits/rejected": -2.005610466003418, |
|
"logps/chosen": -30.554733276367188, |
|
"logps/rejected": -32.16284942626953, |
|
"loss": 0.358, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.7521130442619324, |
|
"rewards/margins": 1.3417600393295288, |
|
"rewards/rejected": -0.589647114276886, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 4.145700124802693e-06, |
|
"logits/chosen": -1.9223169088363647, |
|
"logits/rejected": -1.9189828634262085, |
|
"logps/chosen": -31.747081756591797, |
|
"logps/rejected": -33.04930877685547, |
|
"loss": 0.349, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.6060883402824402, |
|
"rewards/margins": 1.2239677906036377, |
|
"rewards/rejected": -0.6178793907165527, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"grad_norm": 11.0, |
|
"learning_rate": 4.102623991469562e-06, |
|
"logits/chosen": -1.7875115871429443, |
|
"logits/rejected": -1.7967026233673096, |
|
"logps/chosen": -31.79451560974121, |
|
"logps/rejected": -32.5256233215332, |
|
"loss": 0.3606, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.697385847568512, |
|
"rewards/margins": 1.3000586032867432, |
|
"rewards/rejected": -0.602672815322876, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"grad_norm": 13.1875, |
|
"learning_rate": 4.058724504646834e-06, |
|
"logits/chosen": -1.8841272592544556, |
|
"logits/rejected": -1.8778330087661743, |
|
"logps/chosen": -32.83342742919922, |
|
"logps/rejected": -31.528995513916016, |
|
"loss": 0.3722, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.7873939275741577, |
|
"rewards/margins": 1.2712757587432861, |
|
"rewards/rejected": -0.48388180136680603, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"grad_norm": 10.125, |
|
"learning_rate": 4.014024217844167e-06, |
|
"logits/chosen": -1.9727208614349365, |
|
"logits/rejected": -1.9707006216049194, |
|
"logps/chosen": -33.61963653564453, |
|
"logps/rejected": -31.988178253173828, |
|
"loss": 0.3617, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8564074635505676, |
|
"rewards/margins": 1.298119306564331, |
|
"rewards/rejected": -0.4417116641998291, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"grad_norm": 12.625, |
|
"learning_rate": 3.968546095984911e-06, |
|
"logits/chosen": -1.8058189153671265, |
|
"logits/rejected": -1.8036988973617554, |
|
"logps/chosen": -31.945751190185547, |
|
"logps/rejected": -31.496994018554688, |
|
"loss": 0.3948, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.7655945420265198, |
|
"rewards/margins": 1.210815191268921, |
|
"rewards/rejected": -0.44522079825401306, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"grad_norm": 13.125, |
|
"learning_rate": 3.922313503607806e-06, |
|
"logits/chosen": -1.9404058456420898, |
|
"logits/rejected": -1.9370429515838623, |
|
"logps/chosen": -30.20537757873535, |
|
"logps/rejected": -35.37580871582031, |
|
"loss": 0.32, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.8093468546867371, |
|
"rewards/margins": 1.4840278625488281, |
|
"rewards/rejected": -0.6746810078620911, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"grad_norm": 9.6875, |
|
"learning_rate": 3.875350192863368e-06, |
|
"logits/chosen": -1.8791511058807373, |
|
"logits/rejected": -1.8827145099639893, |
|
"logps/chosen": -28.8929386138916, |
|
"logps/rejected": -31.133419036865234, |
|
"loss": 0.3795, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.6498847603797913, |
|
"rewards/margins": 1.0769810676574707, |
|
"rewards/rejected": -0.4270961880683899, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"grad_norm": 15.0625, |
|
"learning_rate": 3.8276802913111436e-06, |
|
"logits/chosen": -1.915834665298462, |
|
"logits/rejected": -1.9157018661499023, |
|
"logps/chosen": -31.168197631835938, |
|
"logps/rejected": -31.740047454833984, |
|
"loss": 0.327, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.9480020403862, |
|
"rewards/margins": 1.4403297901153564, |
|
"rewards/rejected": -0.49232783913612366, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"eval_logits/chosen": -2.1867270469665527, |
|
"eval_logits/rejected": -2.1818978786468506, |
|
"eval_logps/chosen": -34.19493103027344, |
|
"eval_logps/rejected": -37.843685150146484, |
|
"eval_loss": 0.6703336834907532, |
|
"eval_rewards/accuracies": 0.5805647969245911, |
|
"eval_rewards/chosen": -0.14434270560741425, |
|
"eval_rewards/margins": 0.15001599490642548, |
|
"eval_rewards/rejected": -0.2943587005138397, |
|
"eval_runtime": 145.4291, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.296, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"grad_norm": 13.75, |
|
"learning_rate": 3.7793282895240927e-06, |
|
"logits/chosen": -1.9787023067474365, |
|
"logits/rejected": -1.9794394969940186, |
|
"logps/chosen": -33.91895294189453, |
|
"logps/rejected": -33.77281951904297, |
|
"loss": 0.2961, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.0000724792480469, |
|
"rewards/margins": 1.6198437213897705, |
|
"rewards/rejected": -0.6197710037231445, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"grad_norm": 10.25, |
|
"learning_rate": 3.730319028506478e-06, |
|
"logits/chosen": -1.9425113201141357, |
|
"logits/rejected": -1.9399850368499756, |
|
"logps/chosen": -32.121673583984375, |
|
"logps/rejected": -32.70948791503906, |
|
"loss": 0.3125, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 0.9033487439155579, |
|
"rewards/margins": 1.4789619445800781, |
|
"rewards/rejected": -0.5756131410598755, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"grad_norm": 30.75, |
|
"learning_rate": 3.6806776869317074e-06, |
|
"logits/chosen": -1.953460693359375, |
|
"logits/rejected": -1.9444434642791748, |
|
"logps/chosen": -31.709529876708984, |
|
"logps/rejected": -31.5223388671875, |
|
"loss": 0.3619, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": 0.9599300622940063, |
|
"rewards/margins": 1.488205909729004, |
|
"rewards/rejected": -0.5282759070396423, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"grad_norm": 17.25, |
|
"learning_rate": 3.6304297682067146e-06, |
|
"logits/chosen": -1.9538257122039795, |
|
"logits/rejected": -1.9505916833877563, |
|
"logps/chosen": -31.257614135742188, |
|
"logps/rejected": -33.02531814575195, |
|
"loss": 0.3186, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 0.8467265367507935, |
|
"rewards/margins": 1.3920328617095947, |
|
"rewards/rejected": -0.5453063249588013, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 3.579601087369492e-06, |
|
"logits/chosen": -1.9619709253311157, |
|
"logits/rejected": -1.9643146991729736, |
|
"logps/chosen": -32.65868377685547, |
|
"logps/rejected": -34.36846160888672, |
|
"loss": 0.2785, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 0.8955224752426147, |
|
"rewards/margins": 1.512078881263733, |
|
"rewards/rejected": -0.6165562868118286, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 19.625, |
|
"learning_rate": 3.5282177578265295e-06, |
|
"logits/chosen": -1.8742033243179321, |
|
"logits/rejected": -1.8746894598007202, |
|
"logps/chosen": -32.91667175292969, |
|
"logps/rejected": -32.129493713378906, |
|
"loss": 0.328, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 0.9271462559700012, |
|
"rewards/margins": 1.3971627950668335, |
|
"rewards/rejected": -0.4700165390968323, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"grad_norm": 14.1875, |
|
"learning_rate": 3.476306177936961e-06, |
|
"logits/chosen": -1.9133888483047485, |
|
"logits/rejected": -1.9037227630615234, |
|
"logps/chosen": -32.933998107910156, |
|
"logps/rejected": -33.00373077392578, |
|
"loss": 0.2651, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 0.9605558514595032, |
|
"rewards/margins": 1.586902379989624, |
|
"rewards/rejected": -0.6263464689254761, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"grad_norm": 10.1875, |
|
"learning_rate": 3.423893017450324e-06, |
|
"logits/chosen": -1.8113712072372437, |
|
"logits/rejected": -1.8081716299057007, |
|
"logps/chosen": -30.349456787109375, |
|
"logps/rejected": -34.96870422363281, |
|
"loss": 0.2808, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.0003511905670166, |
|
"rewards/margins": 1.5854613780975342, |
|
"rewards/rejected": -0.5851101875305176, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"grad_norm": 8.4375, |
|
"learning_rate": 3.3710052038048794e-06, |
|
"logits/chosen": -1.8722549676895142, |
|
"logits/rejected": -1.8714803457260132, |
|
"logps/chosen": -34.03639602661133, |
|
"logps/rejected": -36.12696075439453, |
|
"loss": 0.2342, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.110505223274231, |
|
"rewards/margins": 1.8063312768936157, |
|
"rewards/rejected": -0.6958259344100952, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 3.3176699082935546e-06, |
|
"logits/chosen": -1.8479416370391846, |
|
"logits/rejected": -1.8507680892944336, |
|
"logps/chosen": -31.403573989868164, |
|
"logps/rejected": -36.341434478759766, |
|
"loss": 0.3034, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.079347014427185, |
|
"rewards/margins": 1.7091315984725952, |
|
"rewards/rejected": -0.6297845840454102, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"eval_logits/chosen": -2.174877166748047, |
|
"eval_logits/rejected": -2.1700735092163086, |
|
"eval_logps/chosen": -34.240230560302734, |
|
"eval_logps/rejected": -37.869354248046875, |
|
"eval_loss": 0.6868197917938232, |
|
"eval_rewards/accuracies": 0.565614640712738, |
|
"eval_rewards/chosen": -0.18511110544204712, |
|
"eval_rewards/margins": 0.13234683871269226, |
|
"eval_rewards/rejected": -0.31745797395706177, |
|
"eval_runtime": 145.1401, |
|
"eval_samples_per_second": 2.363, |
|
"eval_steps_per_second": 0.296, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"grad_norm": 15.9375, |
|
"learning_rate": 3.2639145321045933e-06, |
|
"logits/chosen": -1.9555838108062744, |
|
"logits/rejected": -1.958298683166504, |
|
"logps/chosen": -33.82456970214844, |
|
"logps/rejected": -34.84859085083008, |
|
"loss": 0.3553, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 0.9639171361923218, |
|
"rewards/margins": 1.4845099449157715, |
|
"rewards/rejected": -0.5205925703048706, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"grad_norm": 14.375, |
|
"learning_rate": 3.2097666922441107e-06, |
|
"logits/chosen": -1.8090355396270752, |
|
"logits/rejected": -1.8031476736068726, |
|
"logps/chosen": -33.71675491333008, |
|
"logps/rejected": -32.93245315551758, |
|
"loss": 0.323, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": 1.0048266649246216, |
|
"rewards/margins": 1.527376413345337, |
|
"rewards/rejected": -0.5225496292114258, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 7.46875, |
|
"learning_rate": 3.1552542073477554e-06, |
|
"logits/chosen": -1.9834911823272705, |
|
"logits/rejected": -1.9804458618164062, |
|
"logps/chosen": -29.5953426361084, |
|
"logps/rejected": -32.31734085083008, |
|
"loss": 0.2567, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.9518804550170898, |
|
"rewards/margins": 1.7341070175170898, |
|
"rewards/rejected": -0.7822265028953552, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 3.100405083388799e-06, |
|
"logits/chosen": -1.8222984075546265, |
|
"logits/rejected": -1.8223203420639038, |
|
"logps/chosen": -32.201148986816406, |
|
"logps/rejected": -38.2669792175293, |
|
"loss": 0.2789, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.1358217000961304, |
|
"rewards/margins": 1.7110786437988281, |
|
"rewards/rejected": -0.5752568244934082, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"grad_norm": 6.75, |
|
"learning_rate": 3.0452474992899645e-06, |
|
"logits/chosen": -1.7060989141464233, |
|
"logits/rejected": -1.711395025253296, |
|
"logps/chosen": -35.7166748046875, |
|
"logps/rejected": -34.62081527709961, |
|
"loss": 0.3267, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.09420907497406, |
|
"rewards/margins": 1.6778392791748047, |
|
"rewards/rejected": -0.5836302638053894, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"grad_norm": 9.0, |
|
"learning_rate": 2.989809792446417e-06, |
|
"logits/chosen": -1.9088201522827148, |
|
"logits/rejected": -1.910348892211914, |
|
"logps/chosen": -31.48809814453125, |
|
"logps/rejected": -33.39298629760742, |
|
"loss": 0.2839, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.005824327468872, |
|
"rewards/margins": 1.5582213401794434, |
|
"rewards/rejected": -0.5523970723152161, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 14.625, |
|
"learning_rate": 2.9341204441673267e-06, |
|
"logits/chosen": -1.882775068283081, |
|
"logits/rejected": -1.8818010091781616, |
|
"logps/chosen": -30.975149154663086, |
|
"logps/rejected": -35.45886993408203, |
|
"loss": 0.3076, |
|
"rewards/accuracies": 0.908333420753479, |
|
"rewards/chosen": 1.0804139375686646, |
|
"rewards/margins": 1.4680591821670532, |
|
"rewards/rejected": -0.3876451253890991, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 2.878208065043501e-06, |
|
"logits/chosen": -1.882063627243042, |
|
"logits/rejected": -1.8813728094100952, |
|
"logps/chosen": -33.322509765625, |
|
"logps/rejected": -32.38517379760742, |
|
"loss": 0.1884, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.193357229232788, |
|
"rewards/margins": 2.062441349029541, |
|
"rewards/rejected": -0.8690838813781738, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"grad_norm": 10.0625, |
|
"learning_rate": 2.8221013802485974e-06, |
|
"logits/chosen": -1.9113022089004517, |
|
"logits/rejected": -1.909850835800171, |
|
"logps/chosen": -28.325769424438477, |
|
"logps/rejected": -33.69379425048828, |
|
"loss": 0.1718, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1136726140975952, |
|
"rewards/margins": 2.0654101371765137, |
|
"rewards/rejected": -0.9517375826835632, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"grad_norm": 4.375, |
|
"learning_rate": 2.76582921478147e-06, |
|
"logits/chosen": -1.9627540111541748, |
|
"logits/rejected": -1.959313988685608, |
|
"logps/chosen": -31.032363891601562, |
|
"logps/rejected": -35.28049087524414, |
|
"loss": 0.1649, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2553597688674927, |
|
"rewards/margins": 2.2286696434020996, |
|
"rewards/rejected": -0.9733098745346069, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"eval_logits/chosen": -2.164193868637085, |
|
"eval_logits/rejected": -2.1594185829162598, |
|
"eval_logps/chosen": -34.28217315673828, |
|
"eval_logps/rejected": -37.94434356689453, |
|
"eval_loss": 0.6811564564704895, |
|
"eval_rewards/accuracies": 0.595099687576294, |
|
"eval_rewards/chosen": -0.22285737097263336, |
|
"eval_rewards/margins": 0.16209454834461212, |
|
"eval_rewards/rejected": -0.3849518895149231, |
|
"eval_runtime": 145.2348, |
|
"eval_samples_per_second": 2.362, |
|
"eval_steps_per_second": 0.296, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"grad_norm": 7.375, |
|
"learning_rate": 2.7094204786572254e-06, |
|
"logits/chosen": -1.793891191482544, |
|
"logits/rejected": -1.7861675024032593, |
|
"logps/chosen": -32.827354431152344, |
|
"logps/rejected": -35.588600158691406, |
|
"loss": 0.1606, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.465995192527771, |
|
"rewards/margins": 2.391630172729492, |
|
"rewards/rejected": -0.9256349802017212, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"grad_norm": 8.75, |
|
"learning_rate": 2.6529041520546072e-06, |
|
"logits/chosen": -1.8692048788070679, |
|
"logits/rejected": -1.879642128944397, |
|
"logps/chosen": -34.127784729003906, |
|
"logps/rejected": -33.262062072753906, |
|
"loss": 0.1935, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4111144542694092, |
|
"rewards/margins": 2.158674955368042, |
|
"rewards/rejected": -0.7475605607032776, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"grad_norm": 11.375, |
|
"learning_rate": 2.5963092704273302e-06, |
|
"logits/chosen": -1.918723702430725, |
|
"logits/rejected": -1.9233121871948242, |
|
"logps/chosen": -33.50402069091797, |
|
"logps/rejected": -30.132360458374023, |
|
"loss": 0.1956, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2588849067687988, |
|
"rewards/margins": 2.036588430404663, |
|
"rewards/rejected": -0.777703583240509, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"grad_norm": 6.65625, |
|
"learning_rate": 2.53966490958702e-06, |
|
"logits/chosen": -1.9170500040054321, |
|
"logits/rejected": -1.9250987768173218, |
|
"logps/chosen": -33.287925720214844, |
|
"logps/rejected": -30.88974952697754, |
|
"loss": 0.1648, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.350099802017212, |
|
"rewards/margins": 2.28794264793396, |
|
"rewards/rejected": -0.937842845916748, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"grad_norm": 9.1875, |
|
"learning_rate": 2.4830001707654135e-06, |
|
"logits/chosen": -1.8453495502471924, |
|
"logits/rejected": -1.835889458656311, |
|
"logps/chosen": -30.441539764404297, |
|
"logps/rejected": -32.993690490722656, |
|
"loss": 0.1814, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.2164894342422485, |
|
"rewards/margins": 2.098388195037842, |
|
"rewards/rejected": -0.8818984031677246, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"grad_norm": 5.25, |
|
"learning_rate": 2.4263441656635054e-06, |
|
"logits/chosen": -1.983048677444458, |
|
"logits/rejected": -1.973232626914978, |
|
"logps/chosen": -24.832033157348633, |
|
"logps/rejected": -30.95058250427246, |
|
"loss": 0.1791, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.2528350353240967, |
|
"rewards/margins": 2.2232518196105957, |
|
"rewards/rejected": -0.9704168438911438, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"grad_norm": 7.1875, |
|
"learning_rate": 2.3697260014953107e-06, |
|
"logits/chosen": -1.8357470035552979, |
|
"logits/rejected": -1.8370872735977173, |
|
"logps/chosen": -32.829017639160156, |
|
"logps/rejected": -30.976612091064453, |
|
"loss": 0.1644, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.353589653968811, |
|
"rewards/margins": 2.2482855319976807, |
|
"rewards/rejected": -0.8946956396102905, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"grad_norm": 6.46875, |
|
"learning_rate": 2.3131747660339396e-06, |
|
"logits/chosen": -1.8432958126068115, |
|
"logits/rejected": -1.8441736698150635, |
|
"logps/chosen": -31.128490447998047, |
|
"logps/rejected": -34.13156509399414, |
|
"loss": 0.2286, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.1346783638000488, |
|
"rewards/margins": 2.107259750366211, |
|
"rewards/rejected": -0.9725813865661621, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"grad_norm": 6.4375, |
|
"learning_rate": 2.256719512667651e-06, |
|
"logits/chosen": -1.747079849243164, |
|
"logits/rejected": -1.7454869747161865, |
|
"logps/chosen": -34.19733810424805, |
|
"logps/rejected": -37.23557662963867, |
|
"loss": 0.1614, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.3208175897598267, |
|
"rewards/margins": 2.543323040008545, |
|
"rewards/rejected": -1.2225055694580078, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"grad_norm": 6.0, |
|
"learning_rate": 2.2003892454735786e-06, |
|
"logits/chosen": -1.8969894647598267, |
|
"logits/rejected": -1.8902244567871094, |
|
"logps/chosen": -30.53342056274414, |
|
"logps/rejected": -33.870323181152344, |
|
"loss": 0.1691, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3222969770431519, |
|
"rewards/margins": 2.3233275413513184, |
|
"rewards/rejected": -1.0010308027267456, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"eval_logits/chosen": -2.15236496925354, |
|
"eval_logits/rejected": -2.1475820541381836, |
|
"eval_logps/chosen": -34.313846588134766, |
|
"eval_logps/rejected": -37.98138427734375, |
|
"eval_loss": 0.6880638599395752, |
|
"eval_rewards/accuracies": 0.5830564498901367, |
|
"eval_rewards/chosen": -0.2513664662837982, |
|
"eval_rewards/margins": 0.16692043840885162, |
|
"eval_rewards/rejected": -0.41828688979148865, |
|
"eval_runtime": 145.3621, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.296, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"grad_norm": 12.0625, |
|
"learning_rate": 2.1442129043167877e-06, |
|
"logits/chosen": -1.9361755847930908, |
|
"logits/rejected": -1.9315325021743774, |
|
"logps/chosen": -32.13105010986328, |
|
"logps/rejected": -35.70573425292969, |
|
"loss": 0.1596, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2839645147323608, |
|
"rewards/margins": 2.3433659076690674, |
|
"rewards/rejected": -1.0594011545181274, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"grad_norm": 6.5, |
|
"learning_rate": 2.088219349982323e-06, |
|
"logits/chosen": -1.8552948236465454, |
|
"logits/rejected": -1.860346794128418, |
|
"logps/chosen": -33.7907829284668, |
|
"logps/rejected": -33.559757232666016, |
|
"loss": 0.1777, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4020235538482666, |
|
"rewards/margins": 2.3558895587921143, |
|
"rewards/rejected": -0.9538658261299133, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 2.0324373493478803e-06, |
|
"logits/chosen": -1.953168272972107, |
|
"logits/rejected": -1.943918228149414, |
|
"logps/chosen": -30.902240753173828, |
|
"logps/rejected": -35.58992385864258, |
|
"loss": 0.1581, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2444217205047607, |
|
"rewards/margins": 2.276834011077881, |
|
"rewards/rejected": -1.032412052154541, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"grad_norm": 8.5, |
|
"learning_rate": 1.976895560604729e-06, |
|
"logits/chosen": -1.879119873046875, |
|
"logits/rejected": -1.8759711980819702, |
|
"logps/chosen": -30.023090362548828, |
|
"logps/rejected": -33.454833984375, |
|
"loss": 0.2017, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1388328075408936, |
|
"rewards/margins": 2.029343366622925, |
|
"rewards/rejected": -0.8905106782913208, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.921622518534466e-06, |
|
"logits/chosen": -1.8175594806671143, |
|
"logits/rejected": -1.8249561786651611, |
|
"logps/chosen": -31.570093154907227, |
|
"logps/rejected": -36.65325164794922, |
|
"loss": 0.2168, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.1708660125732422, |
|
"rewards/margins": 2.163855791091919, |
|
"rewards/rejected": -0.9929895401000977, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 1.8666466198491794e-06, |
|
"logits/chosen": -1.8278528451919556, |
|
"logits/rejected": -1.8209831714630127, |
|
"logps/chosen": -32.37910842895508, |
|
"logps/rejected": -37.90230178833008, |
|
"loss": 0.1494, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3044663667678833, |
|
"rewards/margins": 2.4776108264923096, |
|
"rewards/rejected": -1.1731446981430054, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"grad_norm": 5.375, |
|
"learning_rate": 1.8119961086025376e-06, |
|
"logits/chosen": -1.8297231197357178, |
|
"logits/rejected": -1.8303101062774658, |
|
"logps/chosen": -29.768722534179688, |
|
"logps/rejected": -33.98983383178711, |
|
"loss": 0.1847, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3209145069122314, |
|
"rewards/margins": 2.2286314964294434, |
|
"rewards/rejected": -0.9077168703079224, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"grad_norm": 7.9375, |
|
"learning_rate": 1.7576990616793139e-06, |
|
"logits/chosen": -1.8326854705810547, |
|
"logits/rejected": -1.8439195156097412, |
|
"logps/chosen": -31.450542449951172, |
|
"logps/rejected": -35.19971466064453, |
|
"loss": 0.168, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.4635357856750488, |
|
"rewards/margins": 2.4251866340637207, |
|
"rewards/rejected": -0.9616511464118958, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"grad_norm": 6.0, |
|
"learning_rate": 1.7037833743707892e-06, |
|
"logits/chosen": -1.903786301612854, |
|
"logits/rejected": -1.901314377784729, |
|
"logps/chosen": -34.712928771972656, |
|
"logps/rejected": -33.3343620300293, |
|
"loss": 0.2409, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": 1.2673081159591675, |
|
"rewards/margins": 2.1089327335357666, |
|
"rewards/rejected": -0.8416244387626648, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"grad_norm": 9.5, |
|
"learning_rate": 1.6502767460434588e-06, |
|
"logits/chosen": -1.875101089477539, |
|
"logits/rejected": -1.8791959285736084, |
|
"logps/chosen": -33.045143127441406, |
|
"logps/rejected": -35.46049880981445, |
|
"loss": 0.1953, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2763327360153198, |
|
"rewards/margins": 2.153337001800537, |
|
"rewards/rejected": -0.8770040273666382, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"eval_logits/chosen": -2.1447298526763916, |
|
"eval_logits/rejected": -2.139974594116211, |
|
"eval_logps/chosen": -34.36631774902344, |
|
"eval_logps/rejected": -38.036582946777344, |
|
"eval_loss": 0.695711076259613, |
|
"eval_rewards/accuracies": 0.5917773842811584, |
|
"eval_rewards/chosen": -0.2985913157463074, |
|
"eval_rewards/margins": 0.16937348246574402, |
|
"eval_rewards/rejected": -0.4679647982120514, |
|
"eval_runtime": 145.3859, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"grad_norm": 20.25, |
|
"learning_rate": 1.5972066659083796e-06, |
|
"logits/chosen": -1.8841043710708618, |
|
"logits/rejected": -1.8890196084976196, |
|
"logps/chosen": -31.524646759033203, |
|
"logps/rejected": -33.430824279785156, |
|
"loss": 0.2343, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": 1.1482000350952148, |
|
"rewards/margins": 1.9668105840682983, |
|
"rewards/rejected": -0.8186105489730835, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 15.5625, |
|
"learning_rate": 1.5446003988985041e-06, |
|
"logits/chosen": -1.843640685081482, |
|
"logits/rejected": -1.8461425304412842, |
|
"logps/chosen": -29.06394386291504, |
|
"logps/rejected": -32.841705322265625, |
|
"loss": 0.1766, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2435271739959717, |
|
"rewards/margins": 2.2718143463134766, |
|
"rewards/rejected": -1.0282870531082153, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.4924849716612211e-06, |
|
"logits/chosen": -1.8973257541656494, |
|
"logits/rejected": -1.8918126821517944, |
|
"logps/chosen": -32.64158248901367, |
|
"logps/rejected": -34.6368293762207, |
|
"loss": 0.1812, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.2127745151519775, |
|
"rewards/margins": 2.2197234630584717, |
|
"rewards/rejected": -1.0069488286972046, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"grad_norm": 17.875, |
|
"learning_rate": 1.440887158673332e-06, |
|
"logits/chosen": -1.8320951461791992, |
|
"logits/rejected": -1.836050033569336, |
|
"logps/chosen": -34.43301010131836, |
|
"logps/rejected": -35.46531677246094, |
|
"loss": 0.1938, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.1886460781097412, |
|
"rewards/margins": 2.1298069953918457, |
|
"rewards/rejected": -0.9411608576774597, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 1.3898334684855647e-06, |
|
"logits/chosen": -1.8290131092071533, |
|
"logits/rejected": -1.841897964477539, |
|
"logps/chosen": -30.590591430664062, |
|
"logps/rejected": -34.06149673461914, |
|
"loss": 0.2044, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.1450647115707397, |
|
"rewards/margins": 2.082374095916748, |
|
"rewards/rejected": -0.9373094439506531, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"grad_norm": 19.25, |
|
"learning_rate": 1.3393501301037245e-06, |
|
"logits/chosen": -1.8185697793960571, |
|
"logits/rejected": -1.8119618892669678, |
|
"logps/chosen": -30.647253036499023, |
|
"logps/rejected": -34.33002471923828, |
|
"loss": 0.1727, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3608683347702026, |
|
"rewards/margins": 2.2717270851135254, |
|
"rewards/rejected": -0.9108586311340332, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"grad_norm": 5.1875, |
|
"learning_rate": 1.2894630795134454e-06, |
|
"logits/chosen": -1.944183349609375, |
|
"logits/rejected": -1.944902777671814, |
|
"logps/chosen": -32.107669830322266, |
|
"logps/rejected": -33.907928466796875, |
|
"loss": 0.1656, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2954630851745605, |
|
"rewards/margins": 2.2523720264434814, |
|
"rewards/rejected": -0.9569088816642761, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"grad_norm": 10.0, |
|
"learning_rate": 1.2401979463554984e-06, |
|
"logits/chosen": -1.8745800256729126, |
|
"logits/rejected": -1.8734228610992432, |
|
"logps/chosen": -32.74195861816406, |
|
"logps/rejected": -34.108428955078125, |
|
"loss": 0.2386, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": 1.2130850553512573, |
|
"rewards/margins": 2.0380234718322754, |
|
"rewards/rejected": -0.8249381184577942, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 1.1915800407584705e-06, |
|
"logits/chosen": -1.8999583721160889, |
|
"logits/rejected": -1.8924366235733032, |
|
"logps/chosen": -32.777381896972656, |
|
"logps/rejected": -31.974105834960938, |
|
"loss": 0.1764, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3036444187164307, |
|
"rewards/margins": 2.142960548400879, |
|
"rewards/rejected": -0.8393163681030273, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"grad_norm": 5.125, |
|
"learning_rate": 1.1436343403356019e-06, |
|
"logits/chosen": -1.8712406158447266, |
|
"logits/rejected": -1.870269775390625, |
|
"logps/chosen": -33.90606689453125, |
|
"logps/rejected": -37.61457061767578, |
|
"loss": 0.1463, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.361112117767334, |
|
"rewards/margins": 2.496788263320923, |
|
"rewards/rejected": -1.1356757879257202, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"eval_logits/chosen": -2.142699956893921, |
|
"eval_logits/rejected": -2.1379306316375732, |
|
"eval_logps/chosen": -34.36824035644531, |
|
"eval_logps/rejected": -38.02313995361328, |
|
"eval_loss": 0.7009721994400024, |
|
"eval_rewards/accuracies": 0.5714285373687744, |
|
"eval_rewards/chosen": -0.30031847953796387, |
|
"eval_rewards/margins": 0.15554992854595184, |
|
"eval_rewards/rejected": -0.4558684229850769, |
|
"eval_runtime": 145.3955, |
|
"eval_samples_per_second": 2.359, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"grad_norm": 6.53125, |
|
"learning_rate": 1.0963854773524548e-06, |
|
"logits/chosen": -1.9193534851074219, |
|
"logits/rejected": -1.9247900247573853, |
|
"logps/chosen": -34.15703582763672, |
|
"logps/rejected": -36.63218688964844, |
|
"loss": 0.2075, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.179071307182312, |
|
"rewards/margins": 2.15881085395813, |
|
"rewards/rejected": -0.9797393679618835, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"grad_norm": 4.625, |
|
"learning_rate": 1.049857726072005e-06, |
|
"logits/chosen": -1.888942003250122, |
|
"logits/rejected": -1.8869132995605469, |
|
"logps/chosen": -31.29522132873535, |
|
"logps/rejected": -34.09874725341797, |
|
"loss": 0.1873, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.1888843774795532, |
|
"rewards/margins": 2.193281650543213, |
|
"rewards/rejected": -1.0043971538543701, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"grad_norm": 5.90625, |
|
"learning_rate": 1.0040749902836508e-06, |
|
"logits/chosen": -1.7918437719345093, |
|
"logits/rejected": -1.7945436239242554, |
|
"logps/chosen": -28.33380126953125, |
|
"logps/rejected": -31.490909576416016, |
|
"loss": 0.156, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.273350715637207, |
|
"rewards/margins": 2.394632577896118, |
|
"rewards/rejected": -1.1212818622589111, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"grad_norm": 5.8125, |
|
"learning_rate": 9.59060791022566e-07, |
|
"logits/chosen": -1.8893934488296509, |
|
"logits/rejected": -1.8862323760986328, |
|
"logps/chosen": -31.71720314025879, |
|
"logps/rejected": -33.38925552368164, |
|
"loss": 0.2101, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.337890863418579, |
|
"rewards/margins": 2.114030122756958, |
|
"rewards/rejected": -0.7761393189430237, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 9.148382544856885e-07, |
|
"logits/chosen": -1.8082507848739624, |
|
"logits/rejected": -1.8019250631332397, |
|
"logps/chosen": -26.8853702545166, |
|
"logps/rejected": -33.35715103149414, |
|
"loss": 0.1765, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1273572444915771, |
|
"rewards/margins": 2.2153234481811523, |
|
"rewards/rejected": -1.0879663228988647, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 3.01, |
|
"grad_norm": 3.359375, |
|
"learning_rate": 8.714301001505568e-07, |
|
"logits/chosen": -1.922569990158081, |
|
"logits/rejected": -1.920064926147461, |
|
"logps/chosen": -31.649404525756836, |
|
"logps/rejected": -36.346885681152344, |
|
"loss": 0.1692, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3019969463348389, |
|
"rewards/margins": 2.431814670562744, |
|
"rewards/rejected": -1.1298176050186157, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 3.04, |
|
"grad_norm": 4.625, |
|
"learning_rate": 8.288586291031025e-07, |
|
"logits/chosen": -1.9079128503799438, |
|
"logits/rejected": -1.9058338403701782, |
|
"logps/chosen": -30.213918685913086, |
|
"logps/rejected": -33.71696472167969, |
|
"loss": 0.1383, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2604410648345947, |
|
"rewards/margins": 2.478085517883301, |
|
"rewards/rejected": -1.217644453048706, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 7.871457125803897e-07, |
|
"logits/chosen": -1.9004592895507812, |
|
"logits/rejected": -1.8882315158843994, |
|
"logps/chosen": -34.54669189453125, |
|
"logps/rejected": -35.38771057128906, |
|
"loss": 0.1478, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.348745584487915, |
|
"rewards/margins": 2.5333569049835205, |
|
"rewards/rejected": -1.1846110820770264, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"grad_norm": 6.75, |
|
"learning_rate": 7.463127807341966e-07, |
|
"logits/chosen": -1.7955455780029297, |
|
"logits/rejected": -1.790220022201538, |
|
"logps/chosen": -33.658992767333984, |
|
"logps/rejected": -35.314979553222656, |
|
"loss": 0.1451, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.52162504196167, |
|
"rewards/margins": 2.4910683631896973, |
|
"rewards/rejected": -0.9694433212280273, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"grad_norm": 6.90625, |
|
"learning_rate": 7.063808116212021e-07, |
|
"logits/chosen": -1.8400506973266602, |
|
"logits/rejected": -1.8432636260986328, |
|
"logps/chosen": -31.264917373657227, |
|
"logps/rejected": -32.59490203857422, |
|
"loss": 0.1796, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2151604890823364, |
|
"rewards/margins": 2.21301007270813, |
|
"rewards/rejected": -0.9978495836257935, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"eval_logits/chosen": -2.142347574234009, |
|
"eval_logits/rejected": -2.137584686279297, |
|
"eval_logps/chosen": -34.35411071777344, |
|
"eval_logps/rejected": -38.025672912597656, |
|
"eval_loss": 0.6907772421836853, |
|
"eval_rewards/accuracies": 0.5747508406639099, |
|
"eval_rewards/chosen": -0.2876059412956238, |
|
"eval_rewards/margins": 0.1705409586429596, |
|
"eval_rewards/rejected": -0.45814695954322815, |
|
"eval_runtime": 145.3519, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 3.14, |
|
"grad_norm": 9.9375, |
|
"learning_rate": 6.673703204254348e-07, |
|
"logits/chosen": -1.8635101318359375, |
|
"logits/rejected": -1.858994722366333, |
|
"logps/chosen": -29.87615966796875, |
|
"logps/rejected": -32.12238311767578, |
|
"loss": 0.1548, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.2577589750289917, |
|
"rewards/margins": 2.350618362426758, |
|
"rewards/rejected": -1.0928595066070557, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 3.17, |
|
"grad_norm": 5.375, |
|
"learning_rate": 6.293013489185315e-07, |
|
"logits/chosen": -1.8775825500488281, |
|
"logits/rejected": -1.8721100091934204, |
|
"logps/chosen": -33.751792907714844, |
|
"logps/rejected": -35.85100555419922, |
|
"loss": 0.1601, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3699350357055664, |
|
"rewards/margins": 2.522946834564209, |
|
"rewards/rejected": -1.1530119180679321, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 3.19, |
|
"grad_norm": 4.25, |
|
"learning_rate": 5.921934551632086e-07, |
|
"logits/chosen": -1.879861831665039, |
|
"logits/rejected": -1.8666155338287354, |
|
"logps/chosen": -31.769283294677734, |
|
"logps/rejected": -35.38819122314453, |
|
"loss": 0.1889, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 1.1922564506530762, |
|
"rewards/margins": 2.333768606185913, |
|
"rewards/rejected": -1.141512393951416, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 3.22, |
|
"grad_norm": 3.765625, |
|
"learning_rate": 5.560657034652405e-07, |
|
"logits/chosen": -1.9246861934661865, |
|
"logits/rejected": -1.922136664390564, |
|
"logps/chosen": -33.49988555908203, |
|
"logps/rejected": -32.82632064819336, |
|
"loss": 0.1608, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.3797134160995483, |
|
"rewards/margins": 2.2675235271453857, |
|
"rewards/rejected": -0.8878101110458374, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 3.25, |
|
"grad_norm": 5.96875, |
|
"learning_rate": 5.2093665457911e-07, |
|
"logits/chosen": -1.851680040359497, |
|
"logits/rejected": -1.8488292694091797, |
|
"logps/chosen": -32.87358856201172, |
|
"logps/rejected": -36.248497009277344, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3703018426895142, |
|
"rewards/margins": 2.6765310764312744, |
|
"rewards/rejected": -1.3062288761138916, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 3.27, |
|
"grad_norm": 4.5625, |
|
"learning_rate": 4.868243561723535e-07, |
|
"logits/chosen": -1.9186599254608154, |
|
"logits/rejected": -1.913433313369751, |
|
"logps/chosen": -29.251794815063477, |
|
"logps/rejected": -33.522377014160156, |
|
"loss": 0.1506, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.3581030368804932, |
|
"rewards/margins": 2.383944511413574, |
|
"rewards/rejected": -1.025841474533081, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 3.3, |
|
"grad_norm": 4.15625, |
|
"learning_rate": 4.537463335535161e-07, |
|
"logits/chosen": -1.9449501037597656, |
|
"logits/rejected": -1.9497982263565063, |
|
"logps/chosen": -31.12078285217285, |
|
"logps/rejected": -32.769309997558594, |
|
"loss": 0.1426, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4311946630477905, |
|
"rewards/margins": 2.410348892211914, |
|
"rewards/rejected": -0.9791544079780579, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"grad_norm": 7.09375, |
|
"learning_rate": 4.217195806684629e-07, |
|
"logits/chosen": -1.8380733728408813, |
|
"logits/rejected": -1.8454326391220093, |
|
"logps/chosen": -33.449485778808594, |
|
"logps/rejected": -33.86452102661133, |
|
"loss": 0.1535, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.3119791746139526, |
|
"rewards/margins": 2.4516043663024902, |
|
"rewards/rejected": -1.1396249532699585, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"grad_norm": 4.625, |
|
"learning_rate": 3.907605513696808e-07, |
|
"logits/chosen": -1.7485191822052002, |
|
"logits/rejected": -1.7507747411727905, |
|
"logps/chosen": -31.557483673095703, |
|
"logps/rejected": -37.77085494995117, |
|
"loss": 0.1711, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.416973352432251, |
|
"rewards/margins": 2.511898994445801, |
|
"rewards/rejected": -1.0949256420135498, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"grad_norm": 4.65625, |
|
"learning_rate": 3.6088515096305675e-07, |
|
"logits/chosen": -1.7980448007583618, |
|
"logits/rejected": -1.8015098571777344, |
|
"logps/chosen": -31.520572662353516, |
|
"logps/rejected": -33.67357635498047, |
|
"loss": 0.1264, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5264867544174194, |
|
"rewards/margins": 2.614957332611084, |
|
"rewards/rejected": -1.088470458984375, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"eval_logits/chosen": -2.1421782970428467, |
|
"eval_logits/rejected": -2.1374199390411377, |
|
"eval_logps/chosen": -34.34251022338867, |
|
"eval_logps/rejected": -38.01955795288086, |
|
"eval_loss": 0.6911265850067139, |
|
"eval_rewards/accuracies": 0.5892857313156128, |
|
"eval_rewards/chosen": -0.27716198563575745, |
|
"eval_rewards/margins": 0.17548424005508423, |
|
"eval_rewards/rejected": -0.4526461660861969, |
|
"eval_runtime": 145.3336, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 3.4, |
|
"grad_norm": 3.578125, |
|
"learning_rate": 3.321087280364757e-07, |
|
"logits/chosen": -1.9077885150909424, |
|
"logits/rejected": -1.8874790668487549, |
|
"logps/chosen": -29.57822036743164, |
|
"logps/rejected": -36.54710388183594, |
|
"loss": 0.1528, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": 1.3816629648208618, |
|
"rewards/margins": 2.624289035797119, |
|
"rewards/rejected": -1.242626428604126, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 3.43, |
|
"grad_norm": 5.71875, |
|
"learning_rate": 3.044460665744284e-07, |
|
"logits/chosen": -1.8845760822296143, |
|
"logits/rejected": -1.8903357982635498, |
|
"logps/chosen": -31.33782958984375, |
|
"logps/rejected": -33.613182067871094, |
|
"loss": 0.1368, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.5263617038726807, |
|
"rewards/margins": 2.5365498065948486, |
|
"rewards/rejected": -1.0101878643035889, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 3.45, |
|
"grad_norm": 3.9375, |
|
"learning_rate": 2.779113783626916e-07, |
|
"logits/chosen": -1.8593124151229858, |
|
"logits/rejected": -1.8543720245361328, |
|
"logps/chosen": -31.642696380615234, |
|
"logps/rejected": -36.003231048583984, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2418328523635864, |
|
"rewards/margins": 2.529675245285034, |
|
"rewards/rejected": -1.2878425121307373, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 3.48, |
|
"grad_norm": 5.03125, |
|
"learning_rate": 2.5251829568697204e-07, |
|
"logits/chosen": -1.7216873168945312, |
|
"logits/rejected": -1.731299638748169, |
|
"logps/chosen": -32.13956069946289, |
|
"logps/rejected": -32.022666931152344, |
|
"loss": 0.153, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4940283298492432, |
|
"rewards/margins": 2.383723735809326, |
|
"rewards/rejected": -0.8896951675415039, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 3.51, |
|
"grad_norm": 4.46875, |
|
"learning_rate": 2.2827986432927774e-07, |
|
"logits/chosen": -1.779524803161621, |
|
"logits/rejected": -1.784233808517456, |
|
"logps/chosen": -32.241004943847656, |
|
"logps/rejected": -34.2801513671875, |
|
"loss": 0.1178, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.5389398336410522, |
|
"rewards/margins": 2.5067484378814697, |
|
"rewards/rejected": -0.9678082466125488, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"grad_norm": 22.75, |
|
"learning_rate": 2.0520853686560177e-07, |
|
"logits/chosen": -1.808258056640625, |
|
"logits/rejected": -1.813680648803711, |
|
"logps/chosen": -32.30830001831055, |
|
"logps/rejected": -35.810401916503906, |
|
"loss": 0.1575, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.371025800704956, |
|
"rewards/margins": 2.493631601333618, |
|
"rewards/rejected": -1.1226056814193726, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"grad_norm": 3.953125, |
|
"learning_rate": 1.833161662683672e-07, |
|
"logits/chosen": -1.900948166847229, |
|
"logits/rejected": -1.8964240550994873, |
|
"logps/chosen": -31.239208221435547, |
|
"logps/rejected": -32.06488800048828, |
|
"loss": 0.1478, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.2568284273147583, |
|
"rewards/margins": 2.2945187091827393, |
|
"rewards/rejected": -1.0376904010772705, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 3.58, |
|
"grad_norm": 6.21875, |
|
"learning_rate": 1.626139998169246e-07, |
|
"logits/chosen": -1.8157840967178345, |
|
"logits/rejected": -1.817800760269165, |
|
"logps/chosen": -28.267253875732422, |
|
"logps/rejected": -31.886409759521484, |
|
"loss": 0.1623, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2528432607650757, |
|
"rewards/margins": 2.2808501720428467, |
|
"rewards/rejected": -1.0280072689056396, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"grad_norm": 9.25, |
|
"learning_rate": 1.4311267331922535e-07, |
|
"logits/chosen": -1.8119876384735107, |
|
"logits/rejected": -1.8166316747665405, |
|
"logps/chosen": -30.596643447875977, |
|
"logps/rejected": -33.92557907104492, |
|
"loss": 0.1638, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.280565619468689, |
|
"rewards/margins": 2.124329090118408, |
|
"rewards/rejected": -0.8437638282775879, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"grad_norm": 3.5, |
|
"learning_rate": 1.2482220564763669e-07, |
|
"logits/chosen": -1.8785665035247803, |
|
"logits/rejected": -1.881792664527893, |
|
"logps/chosen": -33.31572723388672, |
|
"logps/rejected": -34.651222229003906, |
|
"loss": 0.1206, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.4533611536026, |
|
"rewards/margins": 2.5582594871520996, |
|
"rewards/rejected": -1.10489821434021, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.64, |
|
"eval_logits/chosen": -2.1418943405151367, |
|
"eval_logits/rejected": -2.137127637863159, |
|
"eval_logps/chosen": -34.35324478149414, |
|
"eval_logps/rejected": -38.02573013305664, |
|
"eval_loss": 0.692409336566925, |
|
"eval_rewards/accuracies": 0.5917773842811584, |
|
"eval_rewards/chosen": -0.28682059049606323, |
|
"eval_rewards/margins": 0.17138195037841797, |
|
"eval_rewards/rejected": -0.4582025408744812, |
|
"eval_runtime": 145.3556, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 3.66, |
|
"grad_norm": 6.125, |
|
"learning_rate": 1.0775199359171346e-07, |
|
"logits/chosen": -1.946838140487671, |
|
"logits/rejected": -1.939971923828125, |
|
"logps/chosen": -32.17081832885742, |
|
"logps/rejected": -35.340518951416016, |
|
"loss": 0.1438, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4685560464859009, |
|
"rewards/margins": 2.454911470413208, |
|
"rewards/rejected": -0.9863556027412415, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 3.69, |
|
"grad_norm": 11.0, |
|
"learning_rate": 9.191080703056604e-08, |
|
"logits/chosen": -1.8355038166046143, |
|
"logits/rejected": -1.8465068340301514, |
|
"logps/chosen": -32.56521987915039, |
|
"logps/rejected": -34.721900939941406, |
|
"loss": 0.1399, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.4256752729415894, |
|
"rewards/margins": 2.4434168338775635, |
|
"rewards/rejected": -1.0177414417266846, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"grad_norm": 7.5625, |
|
"learning_rate": 7.730678442730539e-08, |
|
"logits/chosen": -1.9164823293685913, |
|
"logits/rejected": -1.928348183631897, |
|
"logps/chosen": -33.428531646728516, |
|
"logps/rejected": -34.8869514465332, |
|
"loss": 0.1342, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.4690487384796143, |
|
"rewards/margins": 2.5928056240081787, |
|
"rewards/rejected": -1.123757004737854, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"grad_norm": 9.5625, |
|
"learning_rate": 6.394742864787806e-08, |
|
"logits/chosen": -1.9064290523529053, |
|
"logits/rejected": -1.9086523056030273, |
|
"logps/chosen": -31.696964263916016, |
|
"logps/rejected": -35.06704330444336, |
|
"loss": 0.1635, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.3602821826934814, |
|
"rewards/margins": 2.4466512203216553, |
|
"rewards/rejected": -1.086369276046753, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 3.77, |
|
"grad_norm": 5.46875, |
|
"learning_rate": 5.183960310644748e-08, |
|
"logits/chosen": -1.8891935348510742, |
|
"logits/rejected": -1.8821351528167725, |
|
"logps/chosen": -33.91851043701172, |
|
"logps/rejected": -34.860595703125, |
|
"loss": 0.1585, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4848333597183228, |
|
"rewards/margins": 2.4660050868988037, |
|
"rewards/rejected": -0.981171727180481, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"grad_norm": 4.96875, |
|
"learning_rate": 4.098952823928693e-08, |
|
"logits/chosen": -1.8560640811920166, |
|
"logits/rejected": -1.8620649576187134, |
|
"logps/chosen": -29.741008758544922, |
|
"logps/rejected": -34.997169494628906, |
|
"loss": 0.1378, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.3249523639678955, |
|
"rewards/margins": 2.519402027130127, |
|
"rewards/rejected": -1.194449782371521, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 3.1402778309014284e-08, |
|
"logits/chosen": -1.803776502609253, |
|
"logits/rejected": -1.801746129989624, |
|
"logps/chosen": -29.282577514648438, |
|
"logps/rejected": -31.621936798095703, |
|
"loss": 0.1565, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.4066790342330933, |
|
"rewards/margins": 2.331610918045044, |
|
"rewards/rejected": -0.9249318242073059, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 3.84, |
|
"grad_norm": 6.1875, |
|
"learning_rate": 2.3084278540791427e-08, |
|
"logits/chosen": -1.9790493249893188, |
|
"logits/rejected": -1.9734687805175781, |
|
"logps/chosen": -33.892913818359375, |
|
"logps/rejected": -33.2518310546875, |
|
"loss": 0.1448, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.253114104270935, |
|
"rewards/margins": 2.3287227153778076, |
|
"rewards/rejected": -1.0756088495254517, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 3.87, |
|
"grad_norm": 7.28125, |
|
"learning_rate": 1.6038302591975807e-08, |
|
"logits/chosen": -1.8717892169952393, |
|
"logits/rejected": -1.8740745782852173, |
|
"logps/chosen": -27.078582763671875, |
|
"logps/rejected": -29.020471572875977, |
|
"loss": 0.1879, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.2580173015594482, |
|
"rewards/margins": 2.156388759613037, |
|
"rewards/rejected": -0.8983713984489441, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"grad_norm": 5.75, |
|
"learning_rate": 1.0268470356514237e-08, |
|
"logits/chosen": -1.8792669773101807, |
|
"logits/rejected": -1.8736785650253296, |
|
"logps/chosen": -31.454416275024414, |
|
"logps/rejected": -33.20934295654297, |
|
"loss": 0.1645, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": 1.2117339372634888, |
|
"rewards/margins": 2.328080892562866, |
|
"rewards/rejected": -1.116347074508667, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.9, |
|
"eval_logits/chosen": -2.1418604850769043, |
|
"eval_logits/rejected": -2.1370973587036133, |
|
"eval_logps/chosen": -34.35454177856445, |
|
"eval_logps/rejected": -38.024742126464844, |
|
"eval_loss": 0.6943246126174927, |
|
"eval_rewards/accuracies": 0.5718438625335693, |
|
"eval_rewards/chosen": -0.28799253702163696, |
|
"eval_rewards/margins": 0.16931606829166412, |
|
"eval_rewards/rejected": -0.4573085606098175, |
|
"eval_runtime": 145.3262, |
|
"eval_samples_per_second": 2.36, |
|
"eval_steps_per_second": 0.296, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 3.92, |
|
"grad_norm": 9.4375, |
|
"learning_rate": 5.777746105209147e-09, |
|
"logits/chosen": -1.8055070638656616, |
|
"logits/rejected": -1.8097158670425415, |
|
"logps/chosen": -32.785335540771484, |
|
"logps/rejected": -35.42612075805664, |
|
"loss": 0.1841, |
|
"rewards/accuracies": 0.9125000238418579, |
|
"rewards/chosen": 1.2470948696136475, |
|
"rewards/margins": 2.1612586975097656, |
|
"rewards/rejected": -0.9141640663146973, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 3.95, |
|
"grad_norm": 5.40625, |
|
"learning_rate": 2.5684369628148352e-09, |
|
"logits/chosen": -1.8615461587905884, |
|
"logits/rejected": -1.8599445819854736, |
|
"logps/chosen": -29.254053115844727, |
|
"logps/rejected": -33.70402908325195, |
|
"loss": 0.1759, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 1.3321115970611572, |
|
"rewards/margins": 2.334989547729492, |
|
"rewards/rejected": -1.002877950668335, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"grad_norm": 8.8125, |
|
"learning_rate": 6.421917227455999e-10, |
|
"logits/chosen": -1.960646390914917, |
|
"logits/rejected": -1.9528770446777344, |
|
"logps/chosen": -26.7325439453125, |
|
"logps/rejected": -29.855510711669922, |
|
"loss": 0.1584, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 1.1973587274551392, |
|
"rewards/margins": 2.2628719806671143, |
|
"rewards/rejected": -1.065513253211975, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"grad_norm": 7.0625, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -1.863152265548706, |
|
"logits/rejected": -1.85297429561615, |
|
"logps/chosen": -31.610912322998047, |
|
"logps/rejected": -36.648475646972656, |
|
"loss": 0.1361, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": 1.3097574710845947, |
|
"rewards/margins": 2.4233245849609375, |
|
"rewards/rejected": -1.1135669946670532, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 1540, |
|
"total_flos": 0.0, |
|
"train_loss": 0.19153660760297403, |
|
"train_runtime": 10767.2812, |
|
"train_samples_per_second": 1.144, |
|
"train_steps_per_second": 0.143 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1540, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|