|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 2000, |
|
"global_step": 1911, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0005232862375719519, |
|
"grad_norm": 5.2912027840794105, |
|
"learning_rate": 2.6041666666666664e-09, |
|
"logits/chosen": -0.613072395324707, |
|
"logits/rejected": -0.9113049507141113, |
|
"logps/chosen": -675.448974609375, |
|
"logps/rejected": -580.3436279296875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0052328623757195184, |
|
"grad_norm": 5.207863571024578, |
|
"learning_rate": 2.6041666666666667e-08, |
|
"logits/chosen": -0.8929427266120911, |
|
"logits/rejected": -0.9015587568283081, |
|
"logps/chosen": -316.1885070800781, |
|
"logps/rejected": -265.829833984375, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.3888888955116272, |
|
"rewards/chosen": -0.0001502325030742213, |
|
"rewards/margins": -0.00036717823240906, |
|
"rewards/rejected": 0.00021694571478292346, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.010465724751439037, |
|
"grad_norm": 4.518662580055778, |
|
"learning_rate": 5.208333333333333e-08, |
|
"logits/chosen": -1.0493892431259155, |
|
"logits/rejected": -1.0917696952819824, |
|
"logps/chosen": -225.9092254638672, |
|
"logps/rejected": -229.95114135742188, |
|
"loss": 0.6934, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": -0.0009278160287067294, |
|
"rewards/margins": -0.0010514187160879374, |
|
"rewards/rejected": 0.00012360289110802114, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.015698587127158554, |
|
"grad_norm": 4.590969349531553, |
|
"learning_rate": 7.812499999999999e-08, |
|
"logits/chosen": -0.993874192237854, |
|
"logits/rejected": -1.0379687547683716, |
|
"logps/chosen": -285.5773010253906, |
|
"logps/rejected": -273.5916748046875, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.0005485258880071342, |
|
"rewards/margins": 0.0011458387598395348, |
|
"rewards/rejected": -0.0005973129882477224, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.020931449502878074, |
|
"grad_norm": 4.856489671520613, |
|
"learning_rate": 1.0416666666666667e-07, |
|
"logits/chosen": -0.8481732606887817, |
|
"logits/rejected": -0.9292299151420593, |
|
"logps/chosen": -298.5322570800781, |
|
"logps/rejected": -292.2106018066406, |
|
"loss": 0.6932, |
|
"rewards/accuracies": 0.4000000059604645, |
|
"rewards/chosen": 3.153112993459217e-05, |
|
"rewards/margins": 0.0002942727878689766, |
|
"rewards/rejected": -0.000262741552432999, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.026164311878597593, |
|
"grad_norm": 5.145809878741566, |
|
"learning_rate": 1.3020833333333334e-07, |
|
"logits/chosen": -0.7799001932144165, |
|
"logits/rejected": -0.9963434338569641, |
|
"logps/chosen": -285.77923583984375, |
|
"logps/rejected": -258.2279357910156, |
|
"loss": 0.6928, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.0015076332492753863, |
|
"rewards/margins": 0.001345943077467382, |
|
"rewards/rejected": 0.00016169001173693687, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03139717425431711, |
|
"grad_norm": 4.7530882145258, |
|
"learning_rate": 1.5624999999999999e-07, |
|
"logits/chosen": -1.0012855529785156, |
|
"logits/rejected": -1.029201626777649, |
|
"logps/chosen": -265.9154968261719, |
|
"logps/rejected": -263.63165283203125, |
|
"loss": 0.6929, |
|
"rewards/accuracies": 0.375, |
|
"rewards/chosen": -0.0019283884903416038, |
|
"rewards/margins": -0.0014839128125458956, |
|
"rewards/rejected": -0.0004444758524186909, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.03663003663003663, |
|
"grad_norm": 5.411287853510262, |
|
"learning_rate": 1.8229166666666666e-07, |
|
"logits/chosen": -0.7499306797981262, |
|
"logits/rejected": -0.9206749200820923, |
|
"logps/chosen": -337.06011962890625, |
|
"logps/rejected": -261.22991943359375, |
|
"loss": 0.693, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.0009921113960444927, |
|
"rewards/margins": 0.00047098720096983016, |
|
"rewards/rejected": -0.0014630984514951706, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.04186289900575615, |
|
"grad_norm": 4.971639223521402, |
|
"learning_rate": 2.0833333333333333e-07, |
|
"logits/chosen": -0.8354660868644714, |
|
"logits/rejected": -0.8838351368904114, |
|
"logps/chosen": -306.07379150390625, |
|
"logps/rejected": -292.2297058105469, |
|
"loss": 0.6927, |
|
"rewards/accuracies": 0.4749999940395355, |
|
"rewards/chosen": -8.171911031240597e-05, |
|
"rewards/margins": -0.0005383099196478724, |
|
"rewards/rejected": 0.00045659096213057637, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.04709576138147567, |
|
"grad_norm": 4.558913262944195, |
|
"learning_rate": 2.3437499999999998e-07, |
|
"logits/chosen": -0.8262732625007629, |
|
"logits/rejected": -0.9426995515823364, |
|
"logps/chosen": -250.512451171875, |
|
"logps/rejected": -203.21292114257812, |
|
"loss": 0.6926, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.000877672981005162, |
|
"rewards/margins": 0.0007694482337683439, |
|
"rewards/rejected": 0.00010822457261383533, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.052328623757195186, |
|
"grad_norm": 4.502101598833216, |
|
"learning_rate": 2.604166666666667e-07, |
|
"logits/chosen": -0.9312704205513, |
|
"logits/rejected": -1.0538091659545898, |
|
"logps/chosen": -209.8478546142578, |
|
"logps/rejected": -190.95339965820312, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.0008268986130133271, |
|
"rewards/margins": 0.0016605403507128358, |
|
"rewards/rejected": -0.0024874391965568066, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0575614861329147, |
|
"grad_norm": 4.621365428807003, |
|
"learning_rate": 2.864583333333333e-07, |
|
"logits/chosen": -0.8299196362495422, |
|
"logits/rejected": -0.9797964096069336, |
|
"logps/chosen": -249.6680908203125, |
|
"logps/rejected": -200.65777587890625, |
|
"loss": 0.6913, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.0012093835975974798, |
|
"rewards/margins": 0.004302759654819965, |
|
"rewards/rejected": -0.0030933755915611982, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.06279434850863422, |
|
"grad_norm": 4.60719554844544, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -0.8148070573806763, |
|
"logits/rejected": -0.8203374147415161, |
|
"logps/chosen": -379.8021240234375, |
|
"logps/rejected": -344.6637878417969, |
|
"loss": 0.6915, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": 0.0022750557400286198, |
|
"rewards/margins": 0.0023332713171839714, |
|
"rewards/rejected": -5.8215529861627147e-05, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.06802721088435375, |
|
"grad_norm": 4.4712927326454155, |
|
"learning_rate": 3.3854166666666667e-07, |
|
"logits/chosen": -0.8715853691101074, |
|
"logits/rejected": -0.9833810925483704, |
|
"logps/chosen": -205.82373046875, |
|
"logps/rejected": -231.65127563476562, |
|
"loss": 0.6903, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.00287313642911613, |
|
"rewards/margins": 0.006604389753192663, |
|
"rewards/rejected": -0.0037312530912458897, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.07326007326007326, |
|
"grad_norm": 4.733449427919051, |
|
"learning_rate": 3.645833333333333e-07, |
|
"logits/chosen": -1.0960773229599, |
|
"logits/rejected": -1.0255062580108643, |
|
"logps/chosen": -282.6625671386719, |
|
"logps/rejected": -292.23785400390625, |
|
"loss": 0.689, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.002779006026685238, |
|
"rewards/margins": 0.012505276128649712, |
|
"rewards/rejected": -0.009726271964609623, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.07849293563579278, |
|
"grad_norm": 4.813942709544535, |
|
"learning_rate": 3.9062499999999997e-07, |
|
"logits/chosen": -0.8290227055549622, |
|
"logits/rejected": -0.9419819116592407, |
|
"logps/chosen": -260.06561279296875, |
|
"logps/rejected": -244.0886688232422, |
|
"loss": 0.6879, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.004813476465642452, |
|
"rewards/margins": -0.0007598908850923181, |
|
"rewards/rejected": -0.004053585696965456, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0837257980115123, |
|
"grad_norm": 5.027647064510372, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/chosen": -0.8543462753295898, |
|
"logits/rejected": -0.9786416292190552, |
|
"logps/chosen": -244.1794891357422, |
|
"logps/rejected": -233.8755645751953, |
|
"loss": 0.6866, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.00423551257699728, |
|
"rewards/margins": 0.01706615462899208, |
|
"rewards/rejected": -0.0128306420519948, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.08895866038723181, |
|
"grad_norm": 5.173910287170609, |
|
"learning_rate": 4.427083333333333e-07, |
|
"logits/chosen": -0.827562153339386, |
|
"logits/rejected": -0.8305305242538452, |
|
"logps/chosen": -227.57394409179688, |
|
"logps/rejected": -246.3840789794922, |
|
"loss": 0.6836, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.004262433387339115, |
|
"rewards/margins": 0.008676105178892612, |
|
"rewards/rejected": -0.00441367132589221, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.09419152276295134, |
|
"grad_norm": 3.942933028309808, |
|
"learning_rate": 4.6874999999999996e-07, |
|
"logits/chosen": -0.9691295623779297, |
|
"logits/rejected": -1.081464171409607, |
|
"logps/chosen": -338.87176513671875, |
|
"logps/rejected": -273.46832275390625, |
|
"loss": 0.6863, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.009511679410934448, |
|
"rewards/margins": 0.0174461267888546, |
|
"rewards/rejected": -0.026957804337143898, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.09942438513867086, |
|
"grad_norm": 5.933795378160298, |
|
"learning_rate": 4.947916666666667e-07, |
|
"logits/chosen": -0.9045713543891907, |
|
"logits/rejected": -1.0874099731445312, |
|
"logps/chosen": -289.9124450683594, |
|
"logps/rejected": -246.2819366455078, |
|
"loss": 0.6778, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.007765337824821472, |
|
"rewards/margins": 0.03508571907877922, |
|
"rewards/rejected": -0.0273203793913126, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.10465724751439037, |
|
"grad_norm": 4.772143956400877, |
|
"learning_rate": 4.999732803821339e-07, |
|
"logits/chosen": -0.8650393486022949, |
|
"logits/rejected": -0.926235556602478, |
|
"logps/chosen": -262.9915771484375, |
|
"logps/rejected": -298.93353271484375, |
|
"loss": 0.6771, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.011056855320930481, |
|
"rewards/margins": 0.02525464817881584, |
|
"rewards/rejected": -0.036311499774456024, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.10989010989010989, |
|
"grad_norm": 4.996286850099119, |
|
"learning_rate": 4.998647417232375e-07, |
|
"logits/chosen": -0.9325754046440125, |
|
"logits/rejected": -1.0693069696426392, |
|
"logps/chosen": -203.27798461914062, |
|
"logps/rejected": -191.7810821533203, |
|
"loss": 0.6841, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0018788937013596296, |
|
"rewards/margins": 0.009139192290604115, |
|
"rewards/rejected": -0.011018086224794388, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.1151229722658294, |
|
"grad_norm": 4.64438234097018, |
|
"learning_rate": 4.996727502703357e-07, |
|
"logits/chosen": -0.8251157999038696, |
|
"logits/rejected": -0.8886043429374695, |
|
"logps/chosen": -297.6938781738281, |
|
"logps/rejected": -257.5809631347656, |
|
"loss": 0.6773, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": 0.028462525457143784, |
|
"rewards/margins": 0.07731024920940399, |
|
"rewards/rejected": -0.04884772375226021, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.12035583464154893, |
|
"grad_norm": 4.84006289249012, |
|
"learning_rate": 4.993973701470142e-07, |
|
"logits/chosen": -0.7838829755783081, |
|
"logits/rejected": -0.837600827217102, |
|
"logps/chosen": -234.60653686523438, |
|
"logps/rejected": -311.21441650390625, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.0013715565437451005, |
|
"rewards/margins": 0.05342661589384079, |
|
"rewards/rejected": -0.054798174649477005, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.12558869701726844, |
|
"grad_norm": 5.056335509183621, |
|
"learning_rate": 4.990386933279972e-07, |
|
"logits/chosen": -0.926543116569519, |
|
"logits/rejected": -1.05828857421875, |
|
"logps/chosen": -227.8069305419922, |
|
"logps/rejected": -217.5619659423828, |
|
"loss": 0.6696, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.03816365823149681, |
|
"rewards/margins": 0.036327291280031204, |
|
"rewards/rejected": -0.07449094951152802, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.13082155939298795, |
|
"grad_norm": 4.903194310499359, |
|
"learning_rate": 4.985968396084284e-07, |
|
"logits/chosen": -0.9419911503791809, |
|
"logits/rejected": -0.9368138313293457, |
|
"logps/chosen": -273.85748291015625, |
|
"logps/rejected": -251.20681762695312, |
|
"loss": 0.6636, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.03060590662062168, |
|
"rewards/margins": 0.0605587437748909, |
|
"rewards/rejected": -0.09116465598344803, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.1360544217687075, |
|
"grad_norm": 5.658505335877987, |
|
"learning_rate": 4.98071956563861e-07, |
|
"logits/chosen": -0.845447838306427, |
|
"logits/rejected": -0.9603404998779297, |
|
"logps/chosen": -291.99737548828125, |
|
"logps/rejected": -279.862548828125, |
|
"loss": 0.6656, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.013078232295811176, |
|
"rewards/margins": 0.10106952488422394, |
|
"rewards/rejected": -0.11414775997400284, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.141287284144427, |
|
"grad_norm": 4.523399348408552, |
|
"learning_rate": 4.97464219500968e-07, |
|
"logits/chosen": -0.9943469166755676, |
|
"logits/rejected": -1.018686294555664, |
|
"logps/chosen": -284.5821228027344, |
|
"logps/rejected": -299.7913513183594, |
|
"loss": 0.6482, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.09529539942741394, |
|
"rewards/margins": 0.04227287694811821, |
|
"rewards/rejected": -0.13756826519966125, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.14652014652014653, |
|
"grad_norm": 5.272141127441927, |
|
"learning_rate": 4.967738313989918e-07, |
|
"logits/chosen": -0.9240992665290833, |
|
"logits/rejected": -0.9420005679130554, |
|
"logps/chosen": -306.8502197265625, |
|
"logps/rejected": -290.08953857421875, |
|
"loss": 0.6627, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.052370209246873856, |
|
"rewards/margins": 0.10898049920797348, |
|
"rewards/rejected": -0.16135069727897644, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.15175300889586604, |
|
"grad_norm": 4.892220404106949, |
|
"learning_rate": 4.960010228419499e-07, |
|
"logits/chosen": -0.83436119556427, |
|
"logits/rejected": -1.0053937435150146, |
|
"logps/chosen": -327.9340515136719, |
|
"logps/rejected": -256.15679931640625, |
|
"loss": 0.6634, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.11021896451711655, |
|
"rewards/margins": 0.08143133670091629, |
|
"rewards/rejected": -0.19165030121803284, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.15698587127158556, |
|
"grad_norm": 5.0307188285371245, |
|
"learning_rate": 4.951460519416227e-07, |
|
"logits/chosen": -0.7597275972366333, |
|
"logits/rejected": -0.8329798579216003, |
|
"logps/chosen": -309.31396484375, |
|
"logps/rejected": -261.9037170410156, |
|
"loss": 0.6581, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.1599477231502533, |
|
"rewards/margins": 0.05440790206193924, |
|
"rewards/rejected": -0.21435561776161194, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.16221873364730507, |
|
"grad_norm": 5.10301703350847, |
|
"learning_rate": 4.942092042513458e-07, |
|
"logits/chosen": -0.8226385116577148, |
|
"logits/rejected": -0.975459098815918, |
|
"logps/chosen": -312.9759826660156, |
|
"logps/rejected": -300.53985595703125, |
|
"loss": 0.6553, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.0716322511434555, |
|
"rewards/margins": 0.10881064087152481, |
|
"rewards/rejected": -0.1804428994655609, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.1674515960230246, |
|
"grad_norm": 5.700591112038071, |
|
"learning_rate": 4.931907926706373e-07, |
|
"logits/chosen": -0.8691483736038208, |
|
"logits/rejected": -1.0902544260025024, |
|
"logps/chosen": -347.06158447265625, |
|
"logps/rejected": -232.77938842773438, |
|
"loss": 0.6474, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.17011448740959167, |
|
"rewards/margins": 0.08706648647785187, |
|
"rewards/rejected": -0.25718098878860474, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.1726844583987441, |
|
"grad_norm": 5.816432334802341, |
|
"learning_rate": 4.920911573406924e-07, |
|
"logits/chosen": -0.9927451014518738, |
|
"logits/rejected": -1.230741262435913, |
|
"logps/chosen": -251.46133422851562, |
|
"logps/rejected": -209.3514404296875, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14585557579994202, |
|
"rewards/margins": 0.09222600609064102, |
|
"rewards/rejected": -0.23808160424232483, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.17791732077446362, |
|
"grad_norm": 6.704270656287176, |
|
"learning_rate": 4.909106655307787e-07, |
|
"logits/chosen": -0.9669449925422668, |
|
"logits/rejected": -0.8335038423538208, |
|
"logps/chosen": -284.52716064453125, |
|
"logps/rejected": -351.88427734375, |
|
"loss": 0.63, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.14778541028499603, |
|
"rewards/margins": 0.1832939088344574, |
|
"rewards/rejected": -0.3310793340206146, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.18315018315018314, |
|
"grad_norm": 6.276672163574993, |
|
"learning_rate": 4.896497115155709e-07, |
|
"logits/chosen": -1.0247383117675781, |
|
"logits/rejected": -0.9595943689346313, |
|
"logps/chosen": -198.9410858154297, |
|
"logps/rejected": -248.18814086914062, |
|
"loss": 0.6228, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.1372702717781067, |
|
"rewards/margins": 0.25034016370773315, |
|
"rewards/rejected": -0.38761046528816223, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.18838304552590268, |
|
"grad_norm": 5.746693380854425, |
|
"learning_rate": 4.883087164434672e-07, |
|
"logits/chosen": -0.8813273310661316, |
|
"logits/rejected": -1.021053433418274, |
|
"logps/chosen": -287.7932434082031, |
|
"logps/rejected": -295.81683349609375, |
|
"loss": 0.6309, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.26605018973350525, |
|
"rewards/margins": 0.08224769681692123, |
|
"rewards/rejected": -0.3482978940010071, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.1936159079016222, |
|
"grad_norm": 6.815865277948172, |
|
"learning_rate": 4.868881281959282e-07, |
|
"logits/chosen": -0.8798068165779114, |
|
"logits/rejected": -1.0722447633743286, |
|
"logps/chosen": -305.884765625, |
|
"logps/rejected": -305.1341247558594, |
|
"loss": 0.6222, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.30294957756996155, |
|
"rewards/margins": 0.2897435128688812, |
|
"rewards/rejected": -0.5926931500434875, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.1988487702773417, |
|
"grad_norm": 6.505239560678682, |
|
"learning_rate": 4.853884212378889e-07, |
|
"logits/chosen": -1.0362972021102905, |
|
"logits/rejected": -1.0566703081130981, |
|
"logps/chosen": -260.21527099609375, |
|
"logps/rejected": -412.50079345703125, |
|
"loss": 0.6119, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.2835327982902527, |
|
"rewards/margins": 0.32830673456192017, |
|
"rewards/rejected": -0.6118394732475281, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.20408163265306123, |
|
"grad_norm": 6.743940065948427, |
|
"learning_rate": 4.838100964592904e-07, |
|
"logits/chosen": -0.8700660467147827, |
|
"logits/rejected": -1.0450454950332642, |
|
"logps/chosen": -416.87078857421875, |
|
"logps/rejected": -305.87506103515625, |
|
"loss": 0.6197, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.31748878955841064, |
|
"rewards/margins": 0.21450409293174744, |
|
"rewards/rejected": -0.5319928526878357, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.20931449502878074, |
|
"grad_norm": 6.828641090762346, |
|
"learning_rate": 4.821536810077878e-07, |
|
"logits/chosen": -0.8353579640388489, |
|
"logits/rejected": -1.031461477279663, |
|
"logps/chosen": -313.9386901855469, |
|
"logps/rejected": -301.3431396484375, |
|
"loss": 0.6001, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.4328695237636566, |
|
"rewards/margins": 0.27747058868408203, |
|
"rewards/rejected": -0.7103400230407715, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.21454735740450026, |
|
"grad_norm": 6.905075751508215, |
|
"learning_rate": 4.804197281126862e-07, |
|
"logits/chosen": -1.030688762664795, |
|
"logits/rejected": -1.0457452535629272, |
|
"logps/chosen": -334.46826171875, |
|
"logps/rejected": -377.8026428222656, |
|
"loss": 0.6288, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.6228912472724915, |
|
"rewards/margins": 0.17153984308242798, |
|
"rewards/rejected": -0.7944310903549194, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.21978021978021978, |
|
"grad_norm": 8.196590997844144, |
|
"learning_rate": 4.786088169001671e-07, |
|
"logits/chosen": -0.8937980532646179, |
|
"logits/rejected": -0.9507759809494019, |
|
"logps/chosen": -329.95458984375, |
|
"logps/rejected": -369.8622741699219, |
|
"loss": 0.5992, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.5947745442390442, |
|
"rewards/margins": 0.21435976028442383, |
|
"rewards/rejected": -0.8091343641281128, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.2250130821559393, |
|
"grad_norm": 7.961032416351325, |
|
"learning_rate": 4.767215521998648e-07, |
|
"logits/chosen": -0.8109152913093567, |
|
"logits/rejected": -0.9932589530944824, |
|
"logps/chosen": -330.60394287109375, |
|
"logps/rejected": -318.09564208984375, |
|
"loss": 0.5913, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.3654792308807373, |
|
"rewards/margins": 0.4697452485561371, |
|
"rewards/rejected": -0.8352245092391968, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.2302459445316588, |
|
"grad_norm": 8.367649286131634, |
|
"learning_rate": 4.7475856434285853e-07, |
|
"logits/chosen": -0.9328480958938599, |
|
"logits/rejected": -1.0322339534759521, |
|
"logps/chosen": -305.74041748046875, |
|
"logps/rejected": -315.39947509765625, |
|
"loss": 0.5974, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.4867183566093445, |
|
"rewards/margins": 0.32400593161582947, |
|
"rewards/rejected": -0.8107242584228516, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.23547880690737832, |
|
"grad_norm": 8.474835469399126, |
|
"learning_rate": 4.727205089511466e-07, |
|
"logits/chosen": -0.9545785784721375, |
|
"logits/rejected": -0.9672814607620239, |
|
"logps/chosen": -307.53125, |
|
"logps/rejected": -335.9497985839844, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6702186465263367, |
|
"rewards/margins": 0.2993699610233307, |
|
"rewards/rejected": -0.9695886373519897, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.24071166928309787, |
|
"grad_norm": 8.59698500773277, |
|
"learning_rate": 4.706080667186738e-07, |
|
"logits/chosen": -0.8426378965377808, |
|
"logits/rejected": -0.922264575958252, |
|
"logps/chosen": -324.0062561035156, |
|
"logps/rejected": -368.1070251464844, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4376835823059082, |
|
"rewards/margins": 0.5105610489845276, |
|
"rewards/rejected": -0.9482446908950806, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.24594453165881738, |
|
"grad_norm": 9.852887746579286, |
|
"learning_rate": 4.68421943183986e-07, |
|
"logits/chosen": -0.8820059895515442, |
|
"logits/rejected": -0.9242187738418579, |
|
"logps/chosen": -324.6005859375, |
|
"logps/rejected": -369.9400939941406, |
|
"loss": 0.5528, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.538652241230011, |
|
"rewards/margins": 0.3738628029823303, |
|
"rewards/rejected": -0.9125150442123413, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.25117739403453687, |
|
"grad_norm": 8.603529097346893, |
|
"learning_rate": 4.661628684945851e-07, |
|
"logits/chosen": -1.0860228538513184, |
|
"logits/rejected": -1.1342618465423584, |
|
"logps/chosen": -266.6770935058594, |
|
"logps/rejected": -296.2251281738281, |
|
"loss": 0.567, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.3188680410385132, |
|
"rewards/margins": 0.47449684143066406, |
|
"rewards/rejected": -0.7933648824691772, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.2564102564102564, |
|
"grad_norm": 8.5211812968141, |
|
"learning_rate": 4.638315971630662e-07, |
|
"logits/chosen": -1.0311534404754639, |
|
"logits/rejected": -1.0557152032852173, |
|
"logps/chosen": -331.73358154296875, |
|
"logps/rejected": -337.86724853515625, |
|
"loss": 0.5574, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.5146857500076294, |
|
"rewards/margins": 0.3701913356781006, |
|
"rewards/rejected": -0.88487708568573, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.2616431187859759, |
|
"grad_norm": 8.377843401032223, |
|
"learning_rate": 4.6142890781511635e-07, |
|
"logits/chosen": -1.0599453449249268, |
|
"logits/rejected": -1.076552391052246, |
|
"logps/chosen": -302.8187561035156, |
|
"logps/rejected": -389.29205322265625, |
|
"loss": 0.5551, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5295077562332153, |
|
"rewards/margins": 0.6217503547668457, |
|
"rewards/rejected": -1.151257872581482, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.2668759811616955, |
|
"grad_norm": 8.512186027439014, |
|
"learning_rate": 4.5895560292945996e-07, |
|
"logits/chosen": -0.9369387626647949, |
|
"logits/rejected": -1.1153618097305298, |
|
"logps/chosen": -341.99407958984375, |
|
"logps/rejected": -331.5032653808594, |
|
"loss": 0.5868, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.6802676916122437, |
|
"rewards/margins": 0.2563009560108185, |
|
"rewards/rejected": -0.9365686178207397, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.272108843537415, |
|
"grad_norm": 10.104036998058119, |
|
"learning_rate": 4.5641250856983743e-07, |
|
"logits/chosen": -1.0164803266525269, |
|
"logits/rejected": -1.0712977647781372, |
|
"logps/chosen": -339.76318359375, |
|
"logps/rejected": -381.4029541015625, |
|
"loss": 0.5763, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -0.7330660820007324, |
|
"rewards/margins": 0.2603517472743988, |
|
"rewards/rejected": -0.9934177398681641, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.2773417059131345, |
|
"grad_norm": 10.06969780430147, |
|
"learning_rate": 4.5380047410910655e-07, |
|
"logits/chosen": -1.067757248878479, |
|
"logits/rejected": -1.210282564163208, |
|
"logps/chosen": -369.5272521972656, |
|
"logps/rejected": -336.33172607421875, |
|
"loss": 0.5628, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.40420642495155334, |
|
"rewards/margins": 0.6128471493721008, |
|
"rewards/rejected": -1.017053484916687, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.282574568288854, |
|
"grad_norm": 8.93627123396453, |
|
"learning_rate": 4.5112037194555876e-07, |
|
"logits/chosen": -1.0878922939300537, |
|
"logits/rejected": -1.141874074935913, |
|
"logps/chosen": -336.7235412597656, |
|
"logps/rejected": -364.3137512207031, |
|
"loss": 0.5363, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6179962158203125, |
|
"rewards/margins": 0.45184534788131714, |
|
"rewards/rejected": -1.0698416233062744, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.28780743066457354, |
|
"grad_norm": 8.720601331273834, |
|
"learning_rate": 4.4837309721154536e-07, |
|
"logits/chosen": -0.9832679629325867, |
|
"logits/rejected": -1.089292287826538, |
|
"logps/chosen": -380.1823425292969, |
|
"logps/rejected": -369.4367370605469, |
|
"loss": 0.6046, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.49308666586875916, |
|
"rewards/margins": 0.5638622641563416, |
|
"rewards/rejected": -1.0569490194320679, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.29304029304029305, |
|
"grad_norm": 8.122021775438958, |
|
"learning_rate": 4.4555956747451065e-07, |
|
"logits/chosen": -0.9626067876815796, |
|
"logits/rejected": -1.0849710702896118, |
|
"logps/chosen": -331.22589111328125, |
|
"logps/rejected": -364.7495422363281, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.4361996054649353, |
|
"rewards/margins": 0.5262717008590698, |
|
"rewards/rejected": -0.9624713659286499, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.29827315541601257, |
|
"grad_norm": 9.058735023964035, |
|
"learning_rate": 4.426807224305315e-07, |
|
"logits/chosen": -0.9820379018783569, |
|
"logits/rejected": -1.1078466176986694, |
|
"logps/chosen": -328.267822265625, |
|
"logps/rejected": -369.1883239746094, |
|
"loss": 0.5588, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.597852885723114, |
|
"rewards/margins": 0.5885254740715027, |
|
"rewards/rejected": -1.1863784790039062, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.3035060177917321, |
|
"grad_norm": 10.393753670315041, |
|
"learning_rate": 4.397375235904669e-07, |
|
"logits/chosen": -0.9954863786697388, |
|
"logits/rejected": -1.1284685134887695, |
|
"logps/chosen": -390.5664978027344, |
|
"logps/rejected": -338.8497009277344, |
|
"loss": 0.5495, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7412960529327393, |
|
"rewards/margins": 0.5047825574874878, |
|
"rewards/rejected": -1.2460787296295166, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.3087388801674516, |
|
"grad_norm": 10.230885742921803, |
|
"learning_rate": 4.3673095395882074e-07, |
|
"logits/chosen": -0.9860847592353821, |
|
"logits/rejected": -1.067049503326416, |
|
"logps/chosen": -270.33978271484375, |
|
"logps/rejected": -309.6357727050781, |
|
"loss": 0.5701, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6597366333007812, |
|
"rewards/margins": 0.40602293610572815, |
|
"rewards/rejected": -1.065759539604187, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.3139717425431711, |
|
"grad_norm": 11.671000583145103, |
|
"learning_rate": 4.3366201770542687e-07, |
|
"logits/chosen": -1.0431239604949951, |
|
"logits/rejected": -1.1043084859848022, |
|
"logps/chosen": -349.1619873046875, |
|
"logps/rejected": -369.27593994140625, |
|
"loss": 0.5881, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7552496194839478, |
|
"rewards/margins": 0.28604671359062195, |
|
"rewards/rejected": -1.041296362876892, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.31920460491889063, |
|
"grad_norm": 12.220956059151984, |
|
"learning_rate": 4.3053173983006395e-07, |
|
"logits/chosen": -1.039014458656311, |
|
"logits/rejected": -1.2115468978881836, |
|
"logps/chosen": -268.86944580078125, |
|
"logps/rejected": -332.51348876953125, |
|
"loss": 0.5508, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7133229970932007, |
|
"rewards/margins": 0.5132843255996704, |
|
"rewards/rejected": -1.2266072034835815, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.32443746729461015, |
|
"grad_norm": 11.379646125310213, |
|
"learning_rate": 4.2734116582011403e-07, |
|
"logits/chosen": -0.86652010679245, |
|
"logits/rejected": -1.2231082916259766, |
|
"logps/chosen": -414.2841796875, |
|
"logps/rejected": -326.4421691894531, |
|
"loss": 0.5612, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6138820648193359, |
|
"rewards/margins": 0.6353493332862854, |
|
"rewards/rejected": -1.2492313385009766, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.32967032967032966, |
|
"grad_norm": 7.7814508252983385, |
|
"learning_rate": 4.2409136130137845e-07, |
|
"logits/chosen": -1.1352870464324951, |
|
"logits/rejected": -1.1191558837890625, |
|
"logps/chosen": -278.9501647949219, |
|
"logps/rejected": -384.22528076171875, |
|
"loss": 0.605, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6691921949386597, |
|
"rewards/margins": 0.47407063841819763, |
|
"rewards/rejected": -1.1432626247406006, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.3349031920460492, |
|
"grad_norm": 11.028751044651262, |
|
"learning_rate": 4.207834116821672e-07, |
|
"logits/chosen": -0.9769953489303589, |
|
"logits/rejected": -1.1401973962783813, |
|
"logps/chosen": -330.58074951171875, |
|
"logps/rejected": -386.44622802734375, |
|
"loss": 0.5514, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.4714907705783844, |
|
"rewards/margins": 0.5420767068862915, |
|
"rewards/rejected": -1.0135674476623535, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.3401360544217687, |
|
"grad_norm": 9.056266468898434, |
|
"learning_rate": 4.174184217907818e-07, |
|
"logits/chosen": -0.9672349095344543, |
|
"logits/rejected": -1.060320496559143, |
|
"logps/chosen": -332.55218505859375, |
|
"logps/rejected": -368.73651123046875, |
|
"loss": 0.5554, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6068300008773804, |
|
"rewards/margins": 0.6446004509925842, |
|
"rewards/rejected": -1.2514302730560303, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.3453689167974882, |
|
"grad_norm": 8.814652599770767, |
|
"learning_rate": 4.1399751550651084e-07, |
|
"logits/chosen": -1.0880647897720337, |
|
"logits/rejected": -1.1898562908172607, |
|
"logps/chosen": -315.17413330078125, |
|
"logps/rejected": -347.0714111328125, |
|
"loss": 0.5482, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8068042993545532, |
|
"rewards/margins": 0.4671156406402588, |
|
"rewards/rejected": -1.2739198207855225, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.35060177917320773, |
|
"grad_norm": 14.937683752075078, |
|
"learning_rate": 4.1052183538426426e-07, |
|
"logits/chosen": -1.2153905630111694, |
|
"logits/rejected": -1.29157555103302, |
|
"logps/chosen": -331.7286682128906, |
|
"logps/rejected": -331.05328369140625, |
|
"loss": 0.5794, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.7625390291213989, |
|
"rewards/margins": 0.35178858041763306, |
|
"rewards/rejected": -1.1143276691436768, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.35583464154892724, |
|
"grad_norm": 12.003219375511447, |
|
"learning_rate": 4.0699254227296884e-07, |
|
"logits/chosen": -1.116801381111145, |
|
"logits/rejected": -1.1564903259277344, |
|
"logps/chosen": -306.5473937988281, |
|
"logps/rejected": -331.49139404296875, |
|
"loss": 0.5429, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.757170557975769, |
|
"rewards/margins": 0.5436859130859375, |
|
"rewards/rejected": -1.300856351852417, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.36106750392464676, |
|
"grad_norm": 9.112786075319724, |
|
"learning_rate": 4.034108149278543e-07, |
|
"logits/chosen": -1.034462332725525, |
|
"logits/rejected": -1.1058332920074463, |
|
"logps/chosen": -296.75445556640625, |
|
"logps/rejected": -326.24468994140625, |
|
"loss": 0.5479, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5596586465835571, |
|
"rewards/margins": 0.48433390259742737, |
|
"rewards/rejected": -1.0439926385879517, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.3663003663003663, |
|
"grad_norm": 9.548789994350182, |
|
"learning_rate": 3.9977784961675833e-07, |
|
"logits/chosen": -1.1233961582183838, |
|
"logits/rejected": -1.1769764423370361, |
|
"logps/chosen": -317.4129638671875, |
|
"logps/rejected": -339.80096435546875, |
|
"loss": 0.5418, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.735338568687439, |
|
"rewards/margins": 0.4009101390838623, |
|
"rewards/rejected": -1.1362487077713013, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.3715332286760858, |
|
"grad_norm": 9.744434333555398, |
|
"learning_rate": 3.96094859720583e-07, |
|
"logits/chosen": -1.0553786754608154, |
|
"logits/rejected": -1.1237189769744873, |
|
"logps/chosen": -386.6427001953125, |
|
"logps/rejected": -401.42901611328125, |
|
"loss": 0.5079, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7077308297157288, |
|
"rewards/margins": 0.6166214346885681, |
|
"rewards/rejected": -1.3243521451950073, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.37676609105180536, |
|
"grad_norm": 8.917911314746286, |
|
"learning_rate": 3.923630753280357e-07, |
|
"logits/chosen": -1.1400585174560547, |
|
"logits/rejected": -1.1914727687835693, |
|
"logps/chosen": -292.65716552734375, |
|
"logps/rejected": -340.9034729003906, |
|
"loss": 0.523, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7678954005241394, |
|
"rewards/margins": 0.3283305764198303, |
|
"rewards/rejected": -1.0962259769439697, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.3819989534275249, |
|
"grad_norm": 10.682611358880774, |
|
"learning_rate": 3.8858374282478893e-07, |
|
"logits/chosen": -1.1396077871322632, |
|
"logits/rejected": -1.0520281791687012, |
|
"logps/chosen": -351.7391052246094, |
|
"logps/rejected": -429.90301513671875, |
|
"loss": 0.5649, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9305592775344849, |
|
"rewards/margins": 0.651319682598114, |
|
"rewards/rejected": -1.581878900527954, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.3872318158032444, |
|
"grad_norm": 10.046409983126262, |
|
"learning_rate": 3.8475812447719823e-07, |
|
"logits/chosen": -1.0544391870498657, |
|
"logits/rejected": -1.036026120185852, |
|
"logps/chosen": -306.620849609375, |
|
"logps/rejected": -323.1575927734375, |
|
"loss": 0.533, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7031368017196655, |
|
"rewards/margins": 0.4120270609855652, |
|
"rewards/rejected": -1.115163803100586, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.3924646781789639, |
|
"grad_norm": 12.554983975343603, |
|
"learning_rate": 3.8088749801071496e-07, |
|
"logits/chosen": -1.1147099733352661, |
|
"logits/rejected": -1.1453278064727783, |
|
"logps/chosen": -351.16778564453125, |
|
"logps/rejected": -433.20355224609375, |
|
"loss": 0.5283, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.6787683367729187, |
|
"rewards/margins": 0.4242841303348541, |
|
"rewards/rejected": -1.1030524969100952, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.3976975405546834, |
|
"grad_norm": 13.756345420434204, |
|
"learning_rate": 3.7697315618313644e-07, |
|
"logits/chosen": -1.2937476634979248, |
|
"logits/rejected": -1.324729561805725, |
|
"logps/chosen": -272.1294250488281, |
|
"logps/rejected": -297.9911193847656, |
|
"loss": 0.5791, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.49157842993736267, |
|
"rewards/margins": 0.5376006960868835, |
|
"rewards/rejected": -1.0291790962219238, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.40293040293040294, |
|
"grad_norm": 8.445800627854018, |
|
"learning_rate": 3.7301640635283584e-07, |
|
"logits/chosen": -1.0468685626983643, |
|
"logits/rejected": -1.0729619264602661, |
|
"logps/chosen": -320.27886962890625, |
|
"logps/rejected": -397.4753723144531, |
|
"loss": 0.5853, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8435429334640503, |
|
"rewards/margins": 0.4757511019706726, |
|
"rewards/rejected": -1.3192939758300781, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.40816326530612246, |
|
"grad_norm": 10.23215084321274, |
|
"learning_rate": 3.6901857004211443e-07, |
|
"logits/chosen": -1.1420189142227173, |
|
"logits/rejected": -1.1897895336151123, |
|
"logps/chosen": -306.6218566894531, |
|
"logps/rejected": -333.86529541015625, |
|
"loss": 0.5477, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.8528299331665039, |
|
"rewards/margins": 0.570170521736145, |
|
"rewards/rejected": -1.4230002164840698, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.413396127681842, |
|
"grad_norm": 7.768713138499057, |
|
"learning_rate": 3.6498098249582444e-07, |
|
"logits/chosen": -1.1434110403060913, |
|
"logits/rejected": -1.1015931367874146, |
|
"logps/chosen": -289.0810241699219, |
|
"logps/rejected": -400.203857421875, |
|
"loss": 0.535, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -0.738580584526062, |
|
"rewards/margins": 0.36460793018341064, |
|
"rewards/rejected": -1.1031885147094727, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.4186289900575615, |
|
"grad_norm": 9.222279959215516, |
|
"learning_rate": 3.6090499223540757e-07, |
|
"logits/chosen": -1.0318394899368286, |
|
"logits/rejected": -1.096234917640686, |
|
"logps/chosen": -372.46697998046875, |
|
"logps/rejected": -402.5672607421875, |
|
"loss": 0.5629, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6092363595962524, |
|
"rewards/margins": 0.5345269441604614, |
|
"rewards/rejected": -1.1437633037567139, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.423861852433281, |
|
"grad_norm": 11.495873829346216, |
|
"learning_rate": 3.5679196060850034e-07, |
|
"logits/chosen": -1.0856099128723145, |
|
"logits/rejected": -1.189156174659729, |
|
"logps/chosen": -354.1376037597656, |
|
"logps/rejected": -397.07867431640625, |
|
"loss": 0.5489, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6487225294113159, |
|
"rewards/margins": 0.575424313545227, |
|
"rewards/rejected": -1.224146842956543, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.4290947148090005, |
|
"grad_norm": 9.191445360758081, |
|
"learning_rate": 3.5264326133425464e-07, |
|
"logits/chosen": -1.1175934076309204, |
|
"logits/rejected": -1.1940885782241821, |
|
"logps/chosen": -336.60986328125, |
|
"logps/rejected": -362.86102294921875, |
|
"loss": 0.5637, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7536613941192627, |
|
"rewards/margins": 0.709743320941925, |
|
"rewards/rejected": -1.463404655456543, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.43432757718472004, |
|
"grad_norm": 8.657867428538292, |
|
"learning_rate": 3.4846028004452693e-07, |
|
"logits/chosen": -1.0080922842025757, |
|
"logits/rejected": -1.0725328922271729, |
|
"logps/chosen": -341.5125427246094, |
|
"logps/rejected": -365.8641662597656, |
|
"loss": 0.5806, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8481569290161133, |
|
"rewards/margins": 0.4187043607234955, |
|
"rewards/rejected": -1.2668612003326416, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.43956043956043955, |
|
"grad_norm": 14.202414150454052, |
|
"learning_rate": 3.4424441382108826e-07, |
|
"logits/chosen": -0.9394620656967163, |
|
"logits/rejected": -1.249443769454956, |
|
"logps/chosen": -447.23114013671875, |
|
"logps/rejected": -404.6824951171875, |
|
"loss": 0.5523, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.6666282415390015, |
|
"rewards/margins": 0.6986402273178101, |
|
"rewards/rejected": -1.3652684688568115, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.44479330193615907, |
|
"grad_norm": 10.60914774682239, |
|
"learning_rate": 3.399970707290105e-07, |
|
"logits/chosen": -1.121051549911499, |
|
"logits/rejected": -1.2734081745147705, |
|
"logps/chosen": -319.57073974609375, |
|
"logps/rejected": -331.162353515625, |
|
"loss": 0.5707, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7978970408439636, |
|
"rewards/margins": 0.3845141530036926, |
|
"rewards/rejected": -1.1824110746383667, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.4500261643118786, |
|
"grad_norm": 9.92330100833777, |
|
"learning_rate": 3.3571966934638376e-07, |
|
"logits/chosen": -1.1604969501495361, |
|
"logits/rejected": -1.2318819761276245, |
|
"logps/chosen": -235.0649871826172, |
|
"logps/rejected": -369.85101318359375, |
|
"loss": 0.5412, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.5089398622512817, |
|
"rewards/margins": 0.6952416300773621, |
|
"rewards/rejected": -1.204181432723999, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.4552590266875981, |
|
"grad_norm": 10.13666736824925, |
|
"learning_rate": 3.314136382905234e-07, |
|
"logits/chosen": -1.2827130556106567, |
|
"logits/rejected": -1.2579691410064697, |
|
"logps/chosen": -267.91741943359375, |
|
"logps/rejected": -337.76373291015625, |
|
"loss": 0.5464, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.665598452091217, |
|
"rewards/margins": 0.6469942331314087, |
|
"rewards/rejected": -1.3125927448272705, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.4604918890633176, |
|
"grad_norm": 11.300262953662553, |
|
"learning_rate": 3.270804157408225e-07, |
|
"logits/chosen": -1.1657836437225342, |
|
"logits/rejected": -1.204975962638855, |
|
"logps/chosen": -356.23004150390625, |
|
"logps/rejected": -367.14715576171875, |
|
"loss": 0.5503, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9099416732788086, |
|
"rewards/margins": 0.3574753701686859, |
|
"rewards/rejected": -1.2674171924591064, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.46572475143903713, |
|
"grad_norm": 13.16396572942542, |
|
"learning_rate": 3.227214489584128e-07, |
|
"logits/chosen": -0.9544562101364136, |
|
"logits/rejected": -1.0509642362594604, |
|
"logps/chosen": -374.04803466796875, |
|
"logps/rejected": -359.88800048828125, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7183581590652466, |
|
"rewards/margins": 0.5582870244979858, |
|
"rewards/rejected": -1.2766451835632324, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.47095761381475665, |
|
"grad_norm": 10.532502099774513, |
|
"learning_rate": 3.1833819380279023e-07, |
|
"logits/chosen": -1.101965308189392, |
|
"logits/rejected": -1.2270119190216064, |
|
"logps/chosen": -309.1413879394531, |
|
"logps/rejected": -422.55731201171875, |
|
"loss": 0.5354, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.671983003616333, |
|
"rewards/margins": 0.9125014543533325, |
|
"rewards/rejected": -1.5844844579696655, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.47619047619047616, |
|
"grad_norm": 12.350587135347572, |
|
"learning_rate": 3.139321142455703e-07, |
|
"logits/chosen": -1.092979907989502, |
|
"logits/rejected": -1.1662163734436035, |
|
"logps/chosen": -211.8175811767578, |
|
"logps/rejected": -299.8162536621094, |
|
"loss": 0.5752, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.49126997590065, |
|
"rewards/margins": 0.7602614164352417, |
|
"rewards/rejected": -1.2515313625335693, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.48142333856619574, |
|
"grad_norm": 9.43043477617756, |
|
"learning_rate": 3.095046818815331e-07, |
|
"logits/chosen": -0.962468147277832, |
|
"logits/rejected": -1.1546117067337036, |
|
"logps/chosen": -343.376953125, |
|
"logps/rejected": -345.5092468261719, |
|
"loss": 0.5556, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.615440845489502, |
|
"rewards/margins": 0.5722676515579224, |
|
"rewards/rejected": -1.1877084970474243, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.48665620094191525, |
|
"grad_norm": 8.135223143391158, |
|
"learning_rate": 3.0505737543712275e-07, |
|
"logits/chosen": -1.0646668672561646, |
|
"logits/rejected": -1.0549968481063843, |
|
"logps/chosen": -324.72381591796875, |
|
"logps/rejected": -370.3115539550781, |
|
"loss": 0.5388, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.643751859664917, |
|
"rewards/margins": 0.4123232364654541, |
|
"rewards/rejected": -1.0560752153396606, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.49188906331763477, |
|
"grad_norm": 10.054077403477091, |
|
"learning_rate": 3.0059168027656475e-07, |
|
"logits/chosen": -1.0609228610992432, |
|
"logits/rejected": -1.1830861568450928, |
|
"logps/chosen": -350.5185852050781, |
|
"logps/rejected": -352.5489196777344, |
|
"loss": 0.509, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7355393171310425, |
|
"rewards/margins": 0.5512505769729614, |
|
"rewards/rejected": -1.286789894104004, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.4971219256933543, |
|
"grad_norm": 11.74797414461764, |
|
"learning_rate": 2.9610908790576663e-07, |
|
"logits/chosen": -0.9819517135620117, |
|
"logits/rejected": -1.0499684810638428, |
|
"logps/chosen": -335.88037109375, |
|
"logps/rejected": -419.1771545410156, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.6323331594467163, |
|
"rewards/margins": 0.839043915271759, |
|
"rewards/rejected": -1.4713770151138306, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.5023547880690737, |
|
"grad_norm": 9.96668666601236, |
|
"learning_rate": 2.9161109547416667e-07, |
|
"logits/chosen": -1.037734866142273, |
|
"logits/rejected": -1.1786705255508423, |
|
"logps/chosen": -347.0103759765625, |
|
"logps/rejected": -399.40478515625, |
|
"loss": 0.5471, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.743817925453186, |
|
"rewards/margins": 0.5720555782318115, |
|
"rewards/rejected": -1.3158735036849976, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.5075876504447933, |
|
"grad_norm": 10.601625897857508, |
|
"learning_rate": 2.8709920527469834e-07, |
|
"logits/chosen": -1.1739659309387207, |
|
"logits/rejected": -1.3225023746490479, |
|
"logps/chosen": -343.32000732421875, |
|
"logps/rejected": -377.5054016113281, |
|
"loss": 0.5309, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.6069706082344055, |
|
"rewards/margins": 0.7253375053405762, |
|
"rewards/rejected": -1.332308053970337, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.5128205128205128, |
|
"grad_norm": 7.289801762662034, |
|
"learning_rate": 2.8257492424203685e-07, |
|
"logits/chosen": -1.1249244213104248, |
|
"logits/rejected": -1.2511053085327148, |
|
"logps/chosen": -373.466552734375, |
|
"logps/rejected": -389.02410888671875, |
|
"loss": 0.5167, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5530604124069214, |
|
"rewards/margins": 0.7902454137802124, |
|
"rewards/rejected": -1.3433058261871338, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.5180533751962323, |
|
"grad_norm": 10.09098324922051, |
|
"learning_rate": 2.780397634492949e-07, |
|
"logits/chosen": -1.054896354675293, |
|
"logits/rejected": -1.1181640625, |
|
"logps/chosen": -296.7220764160156, |
|
"logps/rejected": -363.0633239746094, |
|
"loss": 0.5387, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.5451050400733948, |
|
"rewards/margins": 0.8848390579223633, |
|
"rewards/rejected": -1.4299442768096924, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.5232862375719518, |
|
"grad_norm": 9.787172704225766, |
|
"learning_rate": 2.7349523760333674e-07, |
|
"logits/chosen": -1.0413906574249268, |
|
"logits/rejected": -1.117727279663086, |
|
"logps/chosen": -294.98175048828125, |
|
"logps/rejected": -349.99237060546875, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7654287219047546, |
|
"rewards/margins": 0.6528303623199463, |
|
"rewards/rejected": -1.4182591438293457, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5285190999476713, |
|
"grad_norm": 14.621424246568333, |
|
"learning_rate": 2.6894286453887827e-07, |
|
"logits/chosen": -1.1582221984863281, |
|
"logits/rejected": -1.2330009937286377, |
|
"logps/chosen": -280.18353271484375, |
|
"logps/rejected": -368.38482666015625, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6598084568977356, |
|
"rewards/margins": 0.6717857718467712, |
|
"rewards/rejected": -1.3315941095352173, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.533751962323391, |
|
"grad_norm": 10.241549905247137, |
|
"learning_rate": 2.6438416471154273e-07, |
|
"logits/chosen": -1.1604335308074951, |
|
"logits/rejected": -1.2005010843276978, |
|
"logps/chosen": -332.10205078125, |
|
"logps/rejected": -378.48541259765625, |
|
"loss": 0.5089, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6059886813163757, |
|
"rewards/margins": 0.849461555480957, |
|
"rewards/rejected": -1.455450415611267, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.5389848246991105, |
|
"grad_norm": 9.013727354575442, |
|
"learning_rate": 2.598206606900406e-07, |
|
"logits/chosen": -1.1394898891448975, |
|
"logits/rejected": -1.2945622205734253, |
|
"logps/chosen": -316.51446533203125, |
|
"logps/rejected": -328.1947937011719, |
|
"loss": 0.5333, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.7797945737838745, |
|
"rewards/margins": 0.45246514678001404, |
|
"rewards/rejected": -1.232259750366211, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.54421768707483, |
|
"grad_norm": 8.111336781776568, |
|
"learning_rate": 2.552538766476443e-07, |
|
"logits/chosen": -1.1438872814178467, |
|
"logits/rejected": -1.1238300800323486, |
|
"logps/chosen": -323.6461181640625, |
|
"logps/rejected": -386.71478271484375, |
|
"loss": 0.5564, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7553135752677917, |
|
"rewards/margins": 0.5062183141708374, |
|
"rewards/rejected": -1.2615320682525635, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.5494505494505495, |
|
"grad_norm": 10.703676009985099, |
|
"learning_rate": 2.5068533785312666e-07, |
|
"logits/chosen": -1.0496041774749756, |
|
"logits/rejected": -1.1529366970062256, |
|
"logps/chosen": -375.7614440917969, |
|
"logps/rejected": -418.11248779296875, |
|
"loss": 0.5067, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.5294119119644165, |
|
"rewards/margins": 0.8402576446533203, |
|
"rewards/rejected": -1.3696695566177368, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.554683411826269, |
|
"grad_norm": 12.77058304597108, |
|
"learning_rate": 2.461165701613333e-07, |
|
"logits/chosen": -1.1512768268585205, |
|
"logits/rejected": -1.184362530708313, |
|
"logps/chosen": -310.32611083984375, |
|
"logps/rejected": -410.4712829589844, |
|
"loss": 0.5389, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.45870405435562134, |
|
"rewards/margins": 0.9488838315010071, |
|
"rewards/rejected": -1.407588005065918, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.5599162742019885, |
|
"grad_norm": 9.668288275009493, |
|
"learning_rate": 2.415490995035596e-07, |
|
"logits/chosen": -1.1707966327667236, |
|
"logits/rejected": -1.2220295667648315, |
|
"logps/chosen": -371.3827819824219, |
|
"logps/rejected": -394.5265197753906, |
|
"loss": 0.5431, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6321583986282349, |
|
"rewards/margins": 0.6839585304260254, |
|
"rewards/rejected": -1.3161169290542603, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.565149136577708, |
|
"grad_norm": 12.531799821628884, |
|
"learning_rate": 2.3698445137790258e-07, |
|
"logits/chosen": -1.1429023742675781, |
|
"logits/rejected": -1.2653216123580933, |
|
"logps/chosen": -314.1465759277344, |
|
"logps/rejected": -360.99285888671875, |
|
"loss": 0.5355, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.7225407958030701, |
|
"rewards/margins": 0.6546424031257629, |
|
"rewards/rejected": -1.377183198928833, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.5703819989534276, |
|
"grad_norm": 17.81295562815824, |
|
"learning_rate": 2.3242415033975575e-07, |
|
"logits/chosen": -1.0764009952545166, |
|
"logits/rejected": -1.2749533653259277, |
|
"logps/chosen": -387.68292236328125, |
|
"logps/rejected": -314.9404602050781, |
|
"loss": 0.5505, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.9429619908332825, |
|
"rewards/margins": 0.37492847442626953, |
|
"rewards/rejected": -1.3178904056549072, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.5756148613291471, |
|
"grad_norm": 9.692932105761217, |
|
"learning_rate": 2.2786971949262134e-07, |
|
"logits/chosen": -1.1522599458694458, |
|
"logits/rejected": -1.2962963581085205, |
|
"logps/chosen": -344.22064208984375, |
|
"logps/rejected": -393.7493591308594, |
|
"loss": 0.5266, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.6750838160514832, |
|
"rewards/margins": 0.5769845843315125, |
|
"rewards/rejected": -1.252068281173706, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.5808477237048666, |
|
"grad_norm": 11.579642689145317, |
|
"learning_rate": 2.2332267997940513e-07, |
|
"logits/chosen": -1.2705551385879517, |
|
"logits/rejected": -1.2792996168136597, |
|
"logps/chosen": -203.71925354003906, |
|
"logps/rejected": -326.6751708984375, |
|
"loss": 0.5749, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.4526478350162506, |
|
"rewards/margins": 0.8654245138168335, |
|
"rewards/rejected": -1.3180721998214722, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.5860805860805861, |
|
"grad_norm": 12.621329221135278, |
|
"learning_rate": 2.1878455047436753e-07, |
|
"logits/chosen": -1.1624226570129395, |
|
"logits/rejected": -1.2655895948410034, |
|
"logps/chosen": -341.18841552734375, |
|
"logps/rejected": -382.3958435058594, |
|
"loss": 0.5163, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.7849606275558472, |
|
"rewards/margins": 0.6679478883743286, |
|
"rewards/rejected": -1.4529087543487549, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.5913134484563056, |
|
"grad_norm": 12.668403273893347, |
|
"learning_rate": 2.1425684667589852e-07, |
|
"logits/chosen": -1.3048256635665894, |
|
"logits/rejected": -1.2398827075958252, |
|
"logps/chosen": -347.1092224121094, |
|
"logps/rejected": -453.8326721191406, |
|
"loss": 0.5516, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.2174538373947144, |
|
"rewards/margins": 0.5305573344230652, |
|
"rewards/rejected": -1.7480109930038452, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.5965463108320251, |
|
"grad_norm": 12.023926242949333, |
|
"learning_rate": 2.0974108080028692e-07, |
|
"logits/chosen": -1.06456458568573, |
|
"logits/rejected": -1.0955144166946411, |
|
"logps/chosen": -325.31622314453125, |
|
"logps/rejected": -423.0694274902344, |
|
"loss": 0.5159, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8166521787643433, |
|
"rewards/margins": 0.6923406720161438, |
|
"rewards/rejected": -1.5089927911758423, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.6017791732077447, |
|
"grad_norm": 10.799097847904763, |
|
"learning_rate": 2.0523876107665194e-07, |
|
"logits/chosen": -1.2194546461105347, |
|
"logits/rejected": -1.280065894126892, |
|
"logps/chosen": -336.81768798828125, |
|
"logps/rejected": -378.32562255859375, |
|
"loss": 0.5344, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7641147971153259, |
|
"rewards/margins": 0.6722861528396606, |
|
"rewards/rejected": -1.4364010095596313, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.6070120355834642, |
|
"grad_norm": 12.522164728436978, |
|
"learning_rate": 2.0075139124320787e-07, |
|
"logits/chosen": -1.2554112672805786, |
|
"logits/rejected": -1.3160804510116577, |
|
"logps/chosen": -269.1253356933594, |
|
"logps/rejected": -307.1266784667969, |
|
"loss": 0.5443, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6767506003379822, |
|
"rewards/margins": 0.6231390237808228, |
|
"rewards/rejected": -1.2998898029327393, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.6122448979591837, |
|
"grad_norm": 12.094410032709751, |
|
"learning_rate": 1.962804700450265e-07, |
|
"logits/chosen": -1.1103699207305908, |
|
"logits/rejected": -1.2241857051849365, |
|
"logps/chosen": -358.9649963378906, |
|
"logps/rejected": -439.03521728515625, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7203751802444458, |
|
"rewards/margins": 0.9127132296562195, |
|
"rewards/rejected": -1.6330883502960205, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.6174777603349032, |
|
"grad_norm": 8.270545089093483, |
|
"learning_rate": 1.9182749073346943e-07, |
|
"logits/chosen": -1.0301766395568848, |
|
"logits/rejected": -1.1534416675567627, |
|
"logps/chosen": -394.6572265625, |
|
"logps/rejected": -391.7749938964844, |
|
"loss": 0.4896, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.9890543818473816, |
|
"rewards/margins": 0.38350680470466614, |
|
"rewards/rejected": -1.3725612163543701, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.6227106227106227, |
|
"grad_norm": 9.554247673666037, |
|
"learning_rate": 1.8739394056745372e-07, |
|
"logits/chosen": -1.1898452043533325, |
|
"logits/rejected": -1.28109610080719, |
|
"logps/chosen": -451.5150451660156, |
|
"logps/rejected": -415.083251953125, |
|
"loss": 0.4908, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.704225480556488, |
|
"rewards/margins": 0.5918093919754028, |
|
"rewards/rejected": -1.2960349321365356, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.6279434850863422, |
|
"grad_norm": 9.027062976356286, |
|
"learning_rate": 1.8298130031671972e-07, |
|
"logits/chosen": -1.2719876766204834, |
|
"logits/rejected": -1.3323533535003662, |
|
"logps/chosen": -381.16339111328125, |
|
"logps/rejected": -408.12457275390625, |
|
"loss": 0.5196, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9807897806167603, |
|
"rewards/margins": 0.5190504193305969, |
|
"rewards/rejected": -1.4998401403427124, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.6331763474620618, |
|
"grad_norm": 8.68315282690499, |
|
"learning_rate": 1.785910437672658e-07, |
|
"logits/chosen": -1.1212913990020752, |
|
"logits/rejected": -1.1531600952148438, |
|
"logps/chosen": -383.25640869140625, |
|
"logps/rejected": -408.6227722167969, |
|
"loss": 0.5249, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.9310611486434937, |
|
"rewards/margins": 0.5751183032989502, |
|
"rewards/rejected": -1.5061795711517334, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.6384092098377813, |
|
"grad_norm": 11.716703884026428, |
|
"learning_rate": 1.7422463722911624e-07, |
|
"logits/chosen": -1.110461711883545, |
|
"logits/rejected": -1.1867046356201172, |
|
"logps/chosen": -368.7302551269531, |
|
"logps/rejected": -419.4322204589844, |
|
"loss": 0.5433, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7190608978271484, |
|
"rewards/margins": 0.8615682721138, |
|
"rewards/rejected": -1.5806291103363037, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.6436420722135008, |
|
"grad_norm": 10.184537916022915, |
|
"learning_rate": 1.6988353904658492e-07, |
|
"logits/chosen": -1.0632117986679077, |
|
"logits/rejected": -1.2908556461334229, |
|
"logps/chosen": -423.96270751953125, |
|
"logps/rejected": -391.506591796875, |
|
"loss": 0.4879, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7387353777885437, |
|
"rewards/margins": 0.8664577603340149, |
|
"rewards/rejected": -1.6051928997039795, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.6488749345892203, |
|
"grad_norm": 12.166712100792914, |
|
"learning_rate": 1.6556919911120081e-07, |
|
"logits/chosen": -1.178629994392395, |
|
"logits/rejected": -1.1636600494384766, |
|
"logps/chosen": -295.05731201171875, |
|
"logps/rejected": -349.7823486328125, |
|
"loss": 0.5021, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.6630123257637024, |
|
"rewards/margins": 0.7607491612434387, |
|
"rewards/rejected": -1.4237613677978516, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.6541077969649398, |
|
"grad_norm": 8.365177431092949, |
|
"learning_rate": 1.6128305837745546e-07, |
|
"logits/chosen": -1.134745717048645, |
|
"logits/rejected": -1.2268315553665161, |
|
"logps/chosen": -335.88470458984375, |
|
"logps/rejected": -419.7283630371094, |
|
"loss": 0.5064, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7585079669952393, |
|
"rewards/margins": 0.6967343091964722, |
|
"rewards/rejected": -1.4552422761917114, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.6593406593406593, |
|
"grad_norm": 10.11243118504911, |
|
"learning_rate": 1.570265483815364e-07, |
|
"logits/chosen": -1.2292219400405884, |
|
"logits/rejected": -1.3019181489944458, |
|
"logps/chosen": -311.6837158203125, |
|
"logps/rejected": -287.0787048339844, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.6337876319885254, |
|
"rewards/margins": 0.5905357599258423, |
|
"rewards/rejected": -1.2243235111236572, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.6645735217163788, |
|
"grad_norm": 10.434415583794985, |
|
"learning_rate": 1.5280109076320506e-07, |
|
"logits/chosen": -1.3286902904510498, |
|
"logits/rejected": -1.3394997119903564, |
|
"logps/chosen": -293.60614013671875, |
|
"logps/rejected": -345.57568359375, |
|
"loss": 0.4924, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7150704264640808, |
|
"rewards/margins": 0.7000972628593445, |
|
"rewards/rejected": -1.4151678085327148, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.6698063840920984, |
|
"grad_norm": 10.400220883666378, |
|
"learning_rate": 1.4860809679098158e-07, |
|
"logits/chosen": -1.2018976211547852, |
|
"logits/rejected": -1.325392246246338, |
|
"logps/chosen": -316.5107421875, |
|
"logps/rejected": -351.9391174316406, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.816146969795227, |
|
"rewards/margins": 0.6872614622116089, |
|
"rewards/rejected": -1.503408432006836, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.6750392464678179, |
|
"grad_norm": 8.95257167533715, |
|
"learning_rate": 1.444489668907914e-07, |
|
"logits/chosen": -1.2102806568145752, |
|
"logits/rejected": -1.2517168521881104, |
|
"logps/chosen": -324.88275146484375, |
|
"logps/rejected": -426.87713623046875, |
|
"loss": 0.5292, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.6853327751159668, |
|
"rewards/margins": 0.8505553007125854, |
|
"rewards/rejected": -1.5358881950378418, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.6802721088435374, |
|
"grad_norm": 10.218025125539159, |
|
"learning_rate": 1.403250901782354e-07, |
|
"logits/chosen": -1.2663801908493042, |
|
"logits/rejected": -1.2181060314178467, |
|
"logps/chosen": -361.97479248046875, |
|
"logps/rejected": -415.7010803222656, |
|
"loss": 0.5265, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9003621935844421, |
|
"rewards/margins": 0.5406725406646729, |
|
"rewards/rejected": -1.4410346746444702, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.6855049712192569, |
|
"grad_norm": 12.330414497118486, |
|
"learning_rate": 1.3623784399463584e-07, |
|
"logits/chosen": -1.126432180404663, |
|
"logits/rejected": -1.1779577732086182, |
|
"logps/chosen": -310.89398193359375, |
|
"logps/rejected": -351.1187744140625, |
|
"loss": 0.5091, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.7655483484268188, |
|
"rewards/margins": 0.66068434715271, |
|
"rewards/rejected": -1.4262326955795288, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.6907378335949764, |
|
"grad_norm": 13.55513826496698, |
|
"learning_rate": 1.3218859344701632e-07, |
|
"logits/chosen": -1.0685323476791382, |
|
"logits/rejected": -1.0921872854232788, |
|
"logps/chosen": -357.3135986328125, |
|
"logps/rejected": -424.2374572753906, |
|
"loss": 0.496, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.8321298360824585, |
|
"rewards/margins": 0.6854997873306274, |
|
"rewards/rejected": -1.5176297426223755, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.6959706959706959, |
|
"grad_norm": 11.973601998113498, |
|
"learning_rate": 1.2817869095216624e-07, |
|
"logits/chosen": -1.0887490510940552, |
|
"logits/rejected": -1.0985252857208252, |
|
"logps/chosen": -329.9906311035156, |
|
"logps/rejected": -428.9029235839844, |
|
"loss": 0.5013, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.6890138387680054, |
|
"rewards/margins": 0.7360657453536987, |
|
"rewards/rejected": -1.4250797033309937, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.7012035583464155, |
|
"grad_norm": 9.263981118702059, |
|
"learning_rate": 1.2420947578494522e-07, |
|
"logits/chosen": -1.1637510061264038, |
|
"logits/rejected": -1.3139464855194092, |
|
"logps/chosen": -347.0451965332031, |
|
"logps/rejected": -364.57647705078125, |
|
"loss": 0.5143, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7794694304466248, |
|
"rewards/margins": 0.8562557101249695, |
|
"rewards/rejected": -1.6357250213623047, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.706436420722135, |
|
"grad_norm": 13.571595353683206, |
|
"learning_rate": 1.202822736309758e-07, |
|
"logits/chosen": -1.0500030517578125, |
|
"logits/rejected": -1.0962427854537964, |
|
"logps/chosen": -360.6551208496094, |
|
"logps/rejected": -407.03448486328125, |
|
"loss": 0.5026, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8235113024711609, |
|
"rewards/margins": 0.6789143681526184, |
|
"rewards/rejected": -1.5024257898330688, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.7116692830978545, |
|
"grad_norm": 9.965928066396195, |
|
"learning_rate": 1.1639839614387572e-07, |
|
"logits/chosen": -1.1780047416687012, |
|
"logits/rejected": -1.2829917669296265, |
|
"logps/chosen": -435.4798278808594, |
|
"logps/rejected": -439.09173583984375, |
|
"loss": 0.5544, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.0139061212539673, |
|
"rewards/margins": 0.5282729864120483, |
|
"rewards/rejected": -1.542178988456726, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.716902145473574, |
|
"grad_norm": 11.275357703138836, |
|
"learning_rate": 1.1255914050717552e-07, |
|
"logits/chosen": -1.1181185245513916, |
|
"logits/rejected": -1.3721811771392822, |
|
"logps/chosen": -379.4562683105469, |
|
"logps/rejected": -353.43157958984375, |
|
"loss": 0.4895, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8644318580627441, |
|
"rewards/margins": 0.5917305946350098, |
|
"rewards/rejected": -1.4561625719070435, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.7221350078492935, |
|
"grad_norm": 12.737103206928401, |
|
"learning_rate": 1.0876578900107053e-07, |
|
"logits/chosen": -1.2187843322753906, |
|
"logits/rejected": -1.2873694896697998, |
|
"logps/chosen": -264.6595153808594, |
|
"logps/rejected": -384.25225830078125, |
|
"loss": 0.4935, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.5753992795944214, |
|
"rewards/margins": 0.8757054209709167, |
|
"rewards/rejected": -1.4511046409606934, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.727367870225013, |
|
"grad_norm": 12.97168258093361, |
|
"learning_rate": 1.050196085741491e-07, |
|
"logits/chosen": -1.2466976642608643, |
|
"logits/rejected": -1.3973594903945923, |
|
"logps/chosen": -346.0795593261719, |
|
"logps/rejected": -363.08380126953125, |
|
"loss": 0.489, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8371197581291199, |
|
"rewards/margins": 0.7653623819351196, |
|
"rewards/rejected": -1.6024821996688843, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.7326007326007326, |
|
"grad_norm": 13.41322662896709, |
|
"learning_rate": 1.0132185042024246e-07, |
|
"logits/chosen": -1.3399550914764404, |
|
"logits/rejected": -1.3757188320159912, |
|
"logps/chosen": -323.0008544921875, |
|
"logps/rejected": -408.1412658691406, |
|
"loss": 0.5233, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9458025097846985, |
|
"rewards/margins": 0.7458266019821167, |
|
"rewards/rejected": -1.6916290521621704, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.7378335949764521, |
|
"grad_norm": 12.965281325135477, |
|
"learning_rate": 9.767374956053584e-08, |
|
"logits/chosen": -1.347015619277954, |
|
"logits/rejected": -1.3667693138122559, |
|
"logps/chosen": -324.0329895019531, |
|
"logps/rejected": -401.6748352050781, |
|
"loss": 0.5521, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8981969952583313, |
|
"rewards/margins": 0.8032199144363403, |
|
"rewards/rejected": -1.7014169692993164, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.7430664573521716, |
|
"grad_norm": 13.396320385256042, |
|
"learning_rate": 9.407652443108192e-08, |
|
"logits/chosen": -1.1957058906555176, |
|
"logits/rejected": -1.2673814296722412, |
|
"logps/chosen": -389.23223876953125, |
|
"logps/rejected": -418.9557189941406, |
|
"loss": 0.5632, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.886542797088623, |
|
"rewards/margins": 0.7842100262641907, |
|
"rewards/rejected": -1.670752763748169, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.7482993197278912, |
|
"grad_norm": 12.063700551819691, |
|
"learning_rate": 9.053137647585229e-08, |
|
"logits/chosen": -1.19754159450531, |
|
"logits/rejected": -1.3801628351211548, |
|
"logps/chosen": -395.425537109375, |
|
"logps/rejected": -390.66278076171875, |
|
"loss": 0.5038, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": -0.8353336453437805, |
|
"rewards/margins": 0.6488715410232544, |
|
"rewards/rejected": -1.4842052459716797, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.7535321821036107, |
|
"grad_norm": 11.990445284897602, |
|
"learning_rate": 8.70394897454659e-08, |
|
"logits/chosen": -1.1735503673553467, |
|
"logits/rejected": -1.292515516281128, |
|
"logps/chosen": -401.2131042480469, |
|
"logps/rejected": -466.5420837402344, |
|
"loss": 0.522, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.558559775352478, |
|
"rewards/margins": 0.9399889707565308, |
|
"rewards/rejected": -1.4985487461090088, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.7587650444793302, |
|
"grad_norm": 11.935902040542206, |
|
"learning_rate": 8.360203050172488e-08, |
|
"logits/chosen": -1.2571841478347778, |
|
"logits/rejected": -1.2958558797836304, |
|
"logps/chosen": -362.5431213378906, |
|
"logps/rejected": -411.53997802734375, |
|
"loss": 0.5259, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9163681268692017, |
|
"rewards/margins": 0.7593438029289246, |
|
"rewards/rejected": -1.675711989402771, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.7639979068550498, |
|
"grad_norm": 9.068298634968928, |
|
"learning_rate": 8.022014682809305e-08, |
|
"logits/chosen": -1.3620612621307373, |
|
"logits/rejected": -1.3937609195709229, |
|
"logps/chosen": -273.1943664550781, |
|
"logps/rejected": -345.25555419921875, |
|
"loss": 0.5177, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.8040696978569031, |
|
"rewards/margins": 0.6031185984611511, |
|
"rewards/rejected": -1.4071884155273438, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.7692307692307693, |
|
"grad_norm": 11.382282536026993, |
|
"learning_rate": 7.689496824624525e-08, |
|
"logits/chosen": -1.3386561870574951, |
|
"logits/rejected": -1.527148962020874, |
|
"logps/chosen": -396.3229064941406, |
|
"logps/rejected": -402.32891845703125, |
|
"loss": 0.5072, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8049358129501343, |
|
"rewards/margins": 0.9081543684005737, |
|
"rewards/rejected": -1.7130903005599976, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.7744636316064888, |
|
"grad_norm": 12.33392884893423, |
|
"learning_rate": 7.362760533881649e-08, |
|
"logits/chosen": -1.422590732574463, |
|
"logits/rejected": -1.4653226137161255, |
|
"logps/chosen": -330.05987548828125, |
|
"logps/rejected": -365.05535888671875, |
|
"loss": 0.5244, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -0.9965084195137024, |
|
"rewards/margins": 0.5925046801567078, |
|
"rewards/rejected": -1.5890133380889893, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.7796964939822083, |
|
"grad_norm": 12.928253984063549, |
|
"learning_rate": 7.041914937847584e-08, |
|
"logits/chosen": -1.3457154035568237, |
|
"logits/rejected": -1.3661472797393799, |
|
"logps/chosen": -366.29339599609375, |
|
"logps/rejected": -452.2115173339844, |
|
"loss": 0.51, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.0464023351669312, |
|
"rewards/margins": 0.8855581283569336, |
|
"rewards/rejected": -1.9319604635238647, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.7849293563579278, |
|
"grad_norm": 11.846016623066228, |
|
"learning_rate": 6.727067196345099e-08, |
|
"logits/chosen": -1.2849265336990356, |
|
"logits/rejected": -1.3893473148345947, |
|
"logps/chosen": -296.6019592285156, |
|
"logps/rejected": -444.0691833496094, |
|
"loss": 0.4968, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9065812826156616, |
|
"rewards/margins": 1.098984718322754, |
|
"rewards/rejected": -2.005566120147705, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.7901622187336473, |
|
"grad_norm": 9.960268668010652, |
|
"learning_rate": 6.418322465962233e-08, |
|
"logits/chosen": -1.3249355554580688, |
|
"logits/rejected": -1.3122931718826294, |
|
"logps/chosen": -381.8565979003906, |
|
"logps/rejected": -499.72161865234375, |
|
"loss": 0.5356, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.3371002674102783, |
|
"rewards/margins": 0.8308144807815552, |
|
"rewards/rejected": -2.167914628982544, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.7953950811093669, |
|
"grad_norm": 12.546874816293252, |
|
"learning_rate": 6.115783864930905e-08, |
|
"logits/chosen": -1.3687621355056763, |
|
"logits/rejected": -1.45345139503479, |
|
"logps/chosen": -298.7080383300781, |
|
"logps/rejected": -421.26422119140625, |
|
"loss": 0.5174, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8631349802017212, |
|
"rewards/margins": 1.0178935527801514, |
|
"rewards/rejected": -1.8810285329818726, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.8006279434850864, |
|
"grad_norm": 15.449951101743657, |
|
"learning_rate": 5.8195524386862374e-08, |
|
"logits/chosen": -1.1471742391586304, |
|
"logits/rejected": -1.1375999450683594, |
|
"logps/chosen": -405.28619384765625, |
|
"logps/rejected": -550.5733642578125, |
|
"loss": 0.4858, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0993586778640747, |
|
"rewards/margins": 0.7847630381584167, |
|
"rewards/rejected": -1.8841215372085571, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.8058608058608059, |
|
"grad_norm": 7.57695758921922, |
|
"learning_rate": 5.529727126118228e-08, |
|
"logits/chosen": -1.2175819873809814, |
|
"logits/rejected": -1.2101088762283325, |
|
"logps/chosen": -447.8209533691406, |
|
"logps/rejected": -454.62249755859375, |
|
"loss": 0.5209, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": -1.1148792505264282, |
|
"rewards/margins": 0.4590207040309906, |
|
"rewards/rejected": -1.5738999843597412, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.8110936682365254, |
|
"grad_norm": 16.121099871171214, |
|
"learning_rate": 5.246404726526918e-08, |
|
"logits/chosen": -1.2117811441421509, |
|
"logits/rejected": -1.4320929050445557, |
|
"logps/chosen": -382.09588623046875, |
|
"logps/rejected": -381.78546142578125, |
|
"loss": 0.4976, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.7104379534721375, |
|
"rewards/margins": 0.8749149441719055, |
|
"rewards/rejected": -1.585352897644043, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.8163265306122449, |
|
"grad_norm": 14.686715983716352, |
|
"learning_rate": 4.969679867292276e-08, |
|
"logits/chosen": -1.1892309188842773, |
|
"logits/rejected": -1.2661142349243164, |
|
"logps/chosen": -429.66680908203125, |
|
"logps/rejected": -465.50543212890625, |
|
"loss": 0.5241, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.9120133519172668, |
|
"rewards/margins": 0.9419037699699402, |
|
"rewards/rejected": -1.853917121887207, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.8215593929879644, |
|
"grad_norm": 8.406024980782155, |
|
"learning_rate": 4.6996449722693315e-08, |
|
"logits/chosen": -1.3920047283172607, |
|
"logits/rejected": -1.4546654224395752, |
|
"logps/chosen": -303.8490295410156, |
|
"logps/rejected": -380.0191650390625, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -0.7151414155960083, |
|
"rewards/margins": 0.7757319808006287, |
|
"rewards/rejected": -1.4908732175827026, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.826792255363684, |
|
"grad_norm": 10.726509305028333, |
|
"learning_rate": 4.436390230919465e-08, |
|
"logits/chosen": -1.309987187385559, |
|
"logits/rejected": -1.4876197576522827, |
|
"logps/chosen": -366.0071716308594, |
|
"logps/rejected": -395.597412109375, |
|
"loss": 0.5465, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.959511399269104, |
|
"rewards/margins": 0.7645119428634644, |
|
"rewards/rejected": -1.724023461341858, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.8320251177394035, |
|
"grad_norm": 12.88345857405619, |
|
"learning_rate": 4.180003568187776e-08, |
|
"logits/chosen": -1.1906282901763916, |
|
"logits/rejected": -1.4163811206817627, |
|
"logps/chosen": -328.5754699707031, |
|
"logps/rejected": -397.17022705078125, |
|
"loss": 0.504, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.827189564704895, |
|
"rewards/margins": 0.9950035214424133, |
|
"rewards/rejected": -1.822192907333374, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.837257980115123, |
|
"grad_norm": 11.786525332826088, |
|
"learning_rate": 3.930570615136919e-08, |
|
"logits/chosen": -1.3638622760772705, |
|
"logits/rejected": -1.3613183498382568, |
|
"logps/chosen": -340.79046630859375, |
|
"logps/rejected": -425.66632080078125, |
|
"loss": 0.5046, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -1.038352608680725, |
|
"rewards/margins": 0.6946929693222046, |
|
"rewards/rejected": -1.7330455780029297, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.8424908424908425, |
|
"grad_norm": 10.921746476297674, |
|
"learning_rate": 3.6881746803469756e-08, |
|
"logits/chosen": -1.2174428701400757, |
|
"logits/rejected": -1.4488804340362549, |
|
"logps/chosen": -471.79669189453125, |
|
"logps/rejected": -489.78411865234375, |
|
"loss": 0.5129, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -0.9443795084953308, |
|
"rewards/margins": 0.927892804145813, |
|
"rewards/rejected": -1.872272253036499, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.847723704866562, |
|
"grad_norm": 9.455246482209075, |
|
"learning_rate": 3.452896722091128e-08, |
|
"logits/chosen": -1.2315971851348877, |
|
"logits/rejected": -1.416680097579956, |
|
"logps/chosen": -377.7510681152344, |
|
"logps/rejected": -402.55242919921875, |
|
"loss": 0.5117, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.1194517612457275, |
|
"rewards/margins": 0.7395168542861938, |
|
"rewards/rejected": -1.8589684963226318, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.8529565672422815, |
|
"grad_norm": 11.403985101827214, |
|
"learning_rate": 3.2248153212961677e-08, |
|
"logits/chosen": -1.237486481666565, |
|
"logits/rejected": -1.262434959411621, |
|
"logps/chosen": -348.9461669921875, |
|
"logps/rejected": -417.15863037109375, |
|
"loss": 0.5023, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.8699714541435242, |
|
"rewards/margins": 0.9124903678894043, |
|
"rewards/rejected": -1.7824617624282837, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.858189429618001, |
|
"grad_norm": 10.508074438895047, |
|
"learning_rate": 3.004006655297209e-08, |
|
"logits/chosen": -1.2522094249725342, |
|
"logits/rejected": -1.2810122966766357, |
|
"logps/chosen": -351.80853271484375, |
|
"logps/rejected": -417.1268615722656, |
|
"loss": 0.5365, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.7258558869361877, |
|
"rewards/margins": 0.8368725776672363, |
|
"rewards/rejected": -1.5627284049987793, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.8634222919937206, |
|
"grad_norm": 11.237693491740915, |
|
"learning_rate": 2.7905444723949762e-08, |
|
"logits/chosen": -1.2839200496673584, |
|
"logits/rejected": -1.3117491006851196, |
|
"logps/chosen": -376.00311279296875, |
|
"logps/rejected": -447.16436767578125, |
|
"loss": 0.5161, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.0290104150772095, |
|
"rewards/margins": 0.6223047971725464, |
|
"rewards/rejected": -1.6513150930404663, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.8686551543694401, |
|
"grad_norm": 11.12452801324459, |
|
"learning_rate": 2.5845000672245572e-08, |
|
"logits/chosen": -1.328969955444336, |
|
"logits/rejected": -1.3776309490203857, |
|
"logps/chosen": -274.914794921875, |
|
"logps/rejected": -414.8409118652344, |
|
"loss": 0.5253, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8333606719970703, |
|
"rewards/margins": 1.0471667051315308, |
|
"rewards/rejected": -1.8805274963378906, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.8738880167451596, |
|
"grad_norm": 12.769128426893182, |
|
"learning_rate": 2.385942256943499e-08, |
|
"logits/chosen": -1.0632429122924805, |
|
"logits/rejected": -1.2329734563827515, |
|
"logps/chosen": -372.83538818359375, |
|
"logps/rejected": -433.05462646484375, |
|
"loss": 0.4994, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.1593245267868042, |
|
"rewards/margins": 0.6629003286361694, |
|
"rewards/rejected": -1.8222248554229736, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.8791208791208791, |
|
"grad_norm": 11.097228517795964, |
|
"learning_rate": 2.194937358247506e-08, |
|
"logits/chosen": -1.355252981185913, |
|
"logits/rejected": -1.4283853769302368, |
|
"logps/chosen": -370.66558837890625, |
|
"logps/rejected": -464.18096923828125, |
|
"loss": 0.4679, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -0.9099235534667969, |
|
"rewards/margins": 0.9922647476196289, |
|
"rewards/rejected": -1.9021883010864258, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.8843537414965986, |
|
"grad_norm": 14.718338385740743, |
|
"learning_rate": 2.011549165221127e-08, |
|
"logits/chosen": -1.252229928970337, |
|
"logits/rejected": -1.31440007686615, |
|
"logps/chosen": -340.9755859375, |
|
"logps/rejected": -405.0323791503906, |
|
"loss": 0.4857, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9458065032958984, |
|
"rewards/margins": 0.9342392086982727, |
|
"rewards/rejected": -1.8800458908081055, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.8895866038723181, |
|
"grad_norm": 12.150291217399005, |
|
"learning_rate": 1.8358389280311303e-08, |
|
"logits/chosen": -1.4331414699554443, |
|
"logits/rejected": -1.4398243427276611, |
|
"logps/chosen": -375.02655029296875, |
|
"logps/rejected": -449.79510498046875, |
|
"loss": 0.4881, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -1.0200358629226685, |
|
"rewards/margins": 0.7897581458091736, |
|
"rewards/rejected": -1.8097938299179077, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.8948194662480377, |
|
"grad_norm": 16.03783513635316, |
|
"learning_rate": 1.6678653324693787e-08, |
|
"logits/chosen": -1.295977234840393, |
|
"logits/rejected": -1.335905909538269, |
|
"logps/chosen": -410.98858642578125, |
|
"logps/rejected": -475.57720947265625, |
|
"loss": 0.5011, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9554780125617981, |
|
"rewards/margins": 0.7775296568870544, |
|
"rewards/rejected": -1.733007788658142, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.9000523286237572, |
|
"grad_norm": 13.521037055172467, |
|
"learning_rate": 1.507684480352292e-08, |
|
"logits/chosen": -1.1388336420059204, |
|
"logits/rejected": -1.3264272212982178, |
|
"logps/chosen": -376.89617919921875, |
|
"logps/rejected": -386.2228088378906, |
|
"loss": 0.5166, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -1.0570180416107178, |
|
"rewards/margins": 0.4624674320220947, |
|
"rewards/rejected": -1.519485354423523, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.9052851909994767, |
|
"grad_norm": 10.015858817548864, |
|
"learning_rate": 1.3553498707832761e-08, |
|
"logits/chosen": -1.2846730947494507, |
|
"logits/rejected": -1.3853490352630615, |
|
"logps/chosen": -296.6762390136719, |
|
"logps/rejected": -344.22509765625, |
|
"loss": 0.4947, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.7420059442520142, |
|
"rewards/margins": 0.8823955655097961, |
|
"rewards/rejected": -1.6244014501571655, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.9105180533751962, |
|
"grad_norm": 10.663148094639538, |
|
"learning_rate": 1.2109123822844653e-08, |
|
"logits/chosen": -1.2846484184265137, |
|
"logits/rejected": -1.3924165964126587, |
|
"logps/chosen": -337.274658203125, |
|
"logps/rejected": -369.1552429199219, |
|
"loss": 0.5413, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9676492810249329, |
|
"rewards/margins": 0.6571919322013855, |
|
"rewards/rejected": -1.624841332435608, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.9157509157509157, |
|
"grad_norm": 11.75016661537887, |
|
"learning_rate": 1.0744202558037014e-08, |
|
"logits/chosen": -1.2299830913543701, |
|
"logits/rejected": -1.314736247062683, |
|
"logps/chosen": -416.3876037597656, |
|
"logps/rejected": -447.00653076171875, |
|
"loss": 0.5348, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9475942850112915, |
|
"rewards/margins": 0.7942700982093811, |
|
"rewards/rejected": -1.7418642044067383, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.9209837781266352, |
|
"grad_norm": 10.166512726785776, |
|
"learning_rate": 9.459190786024696e-09, |
|
"logits/chosen": -1.3395822048187256, |
|
"logits/rejected": -1.3754020929336548, |
|
"logps/chosen": -295.82745361328125, |
|
"logps/rejected": -336.22552490234375, |
|
"loss": 0.4754, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.8604118227958679, |
|
"rewards/margins": 0.5532194375991821, |
|
"rewards/rejected": -1.4136312007904053, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.9262166405023547, |
|
"grad_norm": 11.291165980710177, |
|
"learning_rate": 8.254517690300944e-09, |
|
"logits/chosen": -1.1100029945373535, |
|
"logits/rejected": -1.2225925922393799, |
|
"logps/chosen": -354.47979736328125, |
|
"logps/rejected": -421.1259765625, |
|
"loss": 0.5095, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.8763915300369263, |
|
"rewards/margins": 0.9879170656204224, |
|
"rewards/rejected": -1.8643085956573486, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.9314495028780743, |
|
"grad_norm": 11.362297064245054, |
|
"learning_rate": 7.130585621893809e-09, |
|
"logits/chosen": -1.3329540491104126, |
|
"logits/rejected": -1.432592511177063, |
|
"logps/chosen": -308.84466552734375, |
|
"logps/rejected": -347.50848388671875, |
|
"loss": 0.546, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8689080476760864, |
|
"rewards/margins": 0.6261805295944214, |
|
"rewards/rejected": -1.4950885772705078, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.9366823652537938, |
|
"grad_norm": 11.948692977185269, |
|
"learning_rate": 6.0877699649840574e-09, |
|
"logits/chosen": -1.1590728759765625, |
|
"logits/rejected": -1.1631317138671875, |
|
"logps/chosen": -414.06805419921875, |
|
"logps/rejected": -474.5601501464844, |
|
"loss": 0.5078, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9375759959220886, |
|
"rewards/margins": 0.5639073848724365, |
|
"rewards/rejected": -1.50148344039917, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.9419152276295133, |
|
"grad_norm": 13.428163080513379, |
|
"learning_rate": 5.126419011529992e-09, |
|
"logits/chosen": -1.2970398664474487, |
|
"logits/rejected": -1.4397534132003784, |
|
"logps/chosen": -339.0660705566406, |
|
"logps/rejected": -468.02801513671875, |
|
"loss": 0.4911, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -1.0843138694763184, |
|
"rewards/margins": 1.0834088325500488, |
|
"rewards/rejected": -2.167722463607788, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.9471480900052328, |
|
"grad_norm": 13.583498099799197, |
|
"learning_rate": 4.246853844940723e-09, |
|
"logits/chosen": -1.2331116199493408, |
|
"logits/rejected": -1.3253166675567627, |
|
"logps/chosen": -342.36871337890625, |
|
"logps/rejected": -370.36529541015625, |
|
"loss": 0.5195, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9595893621444702, |
|
"rewards/margins": 0.6066954135894775, |
|
"rewards/rejected": -1.5662847757339478, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.9523809523809523, |
|
"grad_norm": 11.121286337310199, |
|
"learning_rate": 3.449368232836869e-09, |
|
"logits/chosen": -1.3873889446258545, |
|
"logits/rejected": -1.5287644863128662, |
|
"logps/chosen": -303.5126647949219, |
|
"logps/rejected": -344.46868896484375, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.8543957471847534, |
|
"rewards/margins": 0.759514570236206, |
|
"rewards/rejected": -1.613910436630249, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.957613814756672, |
|
"grad_norm": 16.825805293360574, |
|
"learning_rate": 2.734228528934679e-09, |
|
"logits/chosen": -1.2536653280258179, |
|
"logits/rejected": -1.3062855005264282, |
|
"logps/chosen": -343.5291442871094, |
|
"logps/rejected": -403.6029968261719, |
|
"loss": 0.5206, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8069379925727844, |
|
"rewards/margins": 0.8626105189323425, |
|
"rewards/rejected": -1.6695486307144165, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.9628466771323915, |
|
"grad_norm": 11.493646679091231, |
|
"learning_rate": 2.1016735840859447e-09, |
|
"logits/chosen": -1.2242063283920288, |
|
"logits/rejected": -1.4777610301971436, |
|
"logps/chosen": -426.0459899902344, |
|
"logps/rejected": -438.92108154296875, |
|
"loss": 0.4916, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -1.0979913473129272, |
|
"rewards/margins": 0.821546733379364, |
|
"rewards/rejected": -1.919538140296936, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.968079539508111, |
|
"grad_norm": 9.306375726544099, |
|
"learning_rate": 1.551914666503812e-09, |
|
"logits/chosen": -1.2090791463851929, |
|
"logits/rejected": -1.3315274715423584, |
|
"logps/chosen": -424.6327209472656, |
|
"logps/rejected": -421.3246154785156, |
|
"loss": 0.4724, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7541013360023499, |
|
"rewards/margins": 0.6384197473526001, |
|
"rewards/rejected": -1.3925210237503052, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.9733124018838305, |
|
"grad_norm": 11.04339214589406, |
|
"learning_rate": 1.0851353912008642e-09, |
|
"logits/chosen": -1.3252540826797485, |
|
"logits/rejected": -1.5138733386993408, |
|
"logps/chosen": -393.14459228515625, |
|
"logps/rejected": -373.80706787109375, |
|
"loss": 0.4887, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -0.9482460021972656, |
|
"rewards/margins": 0.6573212742805481, |
|
"rewards/rejected": -1.605567216873169, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.97854526425955, |
|
"grad_norm": 12.262645565063035, |
|
"learning_rate": 7.014916586632336e-10, |
|
"logits/chosen": -1.2882921695709229, |
|
"logits/rejected": -1.4113324880599976, |
|
"logps/chosen": -311.13079833984375, |
|
"logps/rejected": -411.09759521484375, |
|
"loss": 0.5073, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7763317227363586, |
|
"rewards/margins": 0.7843965291976929, |
|
"rewards/rejected": -1.5607283115386963, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.9837781266352695, |
|
"grad_norm": 9.069425686799821, |
|
"learning_rate": 4.011116027811956e-10, |
|
"logits/chosen": -1.3659498691558838, |
|
"logits/rejected": -1.2314040660858154, |
|
"logps/chosen": -339.2212219238281, |
|
"logps/rejected": -494.2679138183594, |
|
"loss": 0.4938, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -0.8353347778320312, |
|
"rewards/margins": 0.8836865425109863, |
|
"rewards/rejected": -1.719021201133728, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.989010989010989, |
|
"grad_norm": 13.468123125126455, |
|
"learning_rate": 1.840955480532924e-10, |
|
"logits/chosen": -1.1797103881835938, |
|
"logits/rejected": -1.274377703666687, |
|
"logps/chosen": -481.68157958984375, |
|
"logps/rejected": -480.9568786621094, |
|
"loss": 0.5258, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.9562680125236511, |
|
"rewards/margins": 0.5136700868606567, |
|
"rewards/rejected": -1.4699381589889526, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.9942438513867086, |
|
"grad_norm": 16.36790843814101, |
|
"learning_rate": 5.051597607894087e-11, |
|
"logits/chosen": -1.2505313158035278, |
|
"logits/rejected": -1.3999385833740234, |
|
"logps/chosen": -310.5320739746094, |
|
"logps/rejected": -401.0025939941406, |
|
"loss": 0.5107, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -0.9702110290527344, |
|
"rewards/margins": 0.921745777130127, |
|
"rewards/rejected": -1.8919566869735718, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.9994767137624281, |
|
"grad_norm": 12.286951734943862, |
|
"learning_rate": 4.1750135001961117e-13, |
|
"logits/chosen": -1.396078109741211, |
|
"logits/rejected": -1.3952105045318604, |
|
"logps/chosen": -433.47412109375, |
|
"logps/rejected": -525.8956298828125, |
|
"loss": 0.5001, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -0.7201281189918518, |
|
"rewards/margins": 1.2082884311676025, |
|
"rewards/rejected": -1.9284168481826782, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1911, |
|
"total_flos": 0.0, |
|
"train_loss": 0.5605571830478531, |
|
"train_runtime": 8972.3869, |
|
"train_samples_per_second": 6.814, |
|
"train_steps_per_second": 0.213 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1911, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|