|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 1563, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 53.88663101196289, |
|
"kl": 0.023513054475188255, |
|
"learning_rate": 1.592356687898089e-08, |
|
"logps/chosen": -297.60601806640625, |
|
"logps/rejected": -325.4705810546875, |
|
"loss": 0.5832, |
|
"rewards/chosen": 0.0006075352430343628, |
|
"rewards/margins": -0.00015557828010059893, |
|
"rewards/rejected": 0.0007631134940311313, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 50.249412536621094, |
|
"kl": 0.045294154435396194, |
|
"learning_rate": 3.184713375796178e-08, |
|
"logps/chosen": -287.09796142578125, |
|
"logps/rejected": -340.85723876953125, |
|
"loss": 0.584, |
|
"rewards/chosen": 0.01942053996026516, |
|
"rewards/margins": 0.016482431441545486, |
|
"rewards/rejected": 0.002938109915703535, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 53.71184158325195, |
|
"kl": 0.3499985337257385, |
|
"learning_rate": 4.777070063694268e-08, |
|
"logps/chosen": -319.9255676269531, |
|
"logps/rejected": -329.686767578125, |
|
"loss": 0.5731, |
|
"rewards/chosen": 0.08261503279209137, |
|
"rewards/margins": 0.06412206590175629, |
|
"rewards/rejected": 0.01849297434091568, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 54.06269073486328, |
|
"kl": 0.7365144491195679, |
|
"learning_rate": 6.369426751592356e-08, |
|
"logps/chosen": -303.22174072265625, |
|
"logps/rejected": -343.46942138671875, |
|
"loss": 0.5572, |
|
"rewards/chosen": 0.16511321067810059, |
|
"rewards/margins": 0.12509918212890625, |
|
"rewards/rejected": 0.04001404717564583, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 51.1727294921875, |
|
"kl": 0.8321301341056824, |
|
"learning_rate": 7.961783439490445e-08, |
|
"logps/chosen": -319.74725341796875, |
|
"logps/rejected": -343.55291748046875, |
|
"loss": 0.5462, |
|
"rewards/chosen": 0.28225231170654297, |
|
"rewards/margins": 0.2485344409942627, |
|
"rewards/rejected": 0.033717863261699677, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 51.29603576660156, |
|
"kl": 0.5910934209823608, |
|
"learning_rate": 9.554140127388536e-08, |
|
"logps/chosen": -284.5264892578125, |
|
"logps/rejected": -357.3599853515625, |
|
"loss": 0.5268, |
|
"rewards/chosen": 0.3866623044013977, |
|
"rewards/margins": 0.3753780722618103, |
|
"rewards/rejected": 0.01128421537578106, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 49.49940872192383, |
|
"kl": 0.014250552281737328, |
|
"learning_rate": 1.1146496815286624e-07, |
|
"logps/chosen": -280.2434997558594, |
|
"logps/rejected": -375.0678405761719, |
|
"loss": 0.4927, |
|
"rewards/chosen": 0.38746222853660583, |
|
"rewards/margins": 0.5707041025161743, |
|
"rewards/rejected": -0.18324188888072968, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 43.36540222167969, |
|
"kl": 0.0, |
|
"learning_rate": 1.2738853503184713e-07, |
|
"logps/chosen": -274.5870666503906, |
|
"logps/rejected": -330.31182861328125, |
|
"loss": 0.4715, |
|
"rewards/chosen": 0.33990636467933655, |
|
"rewards/margins": 0.7924365997314453, |
|
"rewards/rejected": -0.45253023505210876, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 45.98810577392578, |
|
"kl": 0.0, |
|
"learning_rate": 1.43312101910828e-07, |
|
"logps/chosen": -304.57012939453125, |
|
"logps/rejected": -337.6307373046875, |
|
"loss": 0.4331, |
|
"rewards/chosen": 0.35226964950561523, |
|
"rewards/margins": 1.2349388599395752, |
|
"rewards/rejected": -0.8826690912246704, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 47.690940856933594, |
|
"kl": 0.0, |
|
"learning_rate": 1.592356687898089e-07, |
|
"logps/chosen": -296.6683654785156, |
|
"logps/rejected": -354.68280029296875, |
|
"loss": 0.3929, |
|
"rewards/chosen": 0.3493489623069763, |
|
"rewards/margins": 1.7278648614883423, |
|
"rewards/rejected": -1.3785159587860107, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 44.06206130981445, |
|
"kl": 0.0, |
|
"learning_rate": 1.7515923566878978e-07, |
|
"logps/chosen": -313.95135498046875, |
|
"logps/rejected": -300.2484436035156, |
|
"loss": 0.3888, |
|
"rewards/chosen": 0.27411553263664246, |
|
"rewards/margins": 1.9409167766571045, |
|
"rewards/rejected": -1.66680109500885, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 41.619537353515625, |
|
"kl": 0.0, |
|
"learning_rate": 1.9108280254777072e-07, |
|
"logps/chosen": -295.02264404296875, |
|
"logps/rejected": -348.7203674316406, |
|
"loss": 0.3558, |
|
"rewards/chosen": 0.3618777394294739, |
|
"rewards/margins": 2.562573194503784, |
|
"rewards/rejected": -2.200695514678955, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 38.81331253051758, |
|
"kl": 0.0, |
|
"learning_rate": 2.070063694267516e-07, |
|
"logps/chosen": -229.79873657226562, |
|
"logps/rejected": -352.5125732421875, |
|
"loss": 0.362, |
|
"rewards/chosen": 0.35671067237854004, |
|
"rewards/margins": 2.826559543609619, |
|
"rewards/rejected": -2.469848871231079, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 46.18341827392578, |
|
"kl": 0.0, |
|
"learning_rate": 2.2292993630573247e-07, |
|
"logps/chosen": -297.68218994140625, |
|
"logps/rejected": -379.452880859375, |
|
"loss": 0.317, |
|
"rewards/chosen": 0.41898855566978455, |
|
"rewards/margins": 3.3871779441833496, |
|
"rewards/rejected": -2.968189239501953, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 42.065818786621094, |
|
"kl": 0.0, |
|
"learning_rate": 2.388535031847134e-07, |
|
"logps/chosen": -287.422607421875, |
|
"logps/rejected": -360.3113708496094, |
|
"loss": 0.3633, |
|
"rewards/chosen": 0.4496755599975586, |
|
"rewards/margins": 3.4658408164978027, |
|
"rewards/rejected": -3.016165256500244, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 42.938968658447266, |
|
"kl": 0.0, |
|
"learning_rate": 2.5477707006369425e-07, |
|
"logps/chosen": -315.4157409667969, |
|
"logps/rejected": -347.26983642578125, |
|
"loss": 0.3146, |
|
"rewards/chosen": 0.49002504348754883, |
|
"rewards/margins": 3.6589598655700684, |
|
"rewards/rejected": -3.1689350605010986, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 35.48064422607422, |
|
"kl": 0.0, |
|
"learning_rate": 2.7070063694267513e-07, |
|
"logps/chosen": -265.584228515625, |
|
"logps/rejected": -382.82342529296875, |
|
"loss": 0.3244, |
|
"rewards/chosen": 0.5259283781051636, |
|
"rewards/margins": 4.123469352722168, |
|
"rewards/rejected": -3.5975403785705566, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 40.673316955566406, |
|
"kl": 0.0, |
|
"learning_rate": 2.86624203821656e-07, |
|
"logps/chosen": -269.2232666015625, |
|
"logps/rejected": -394.211669921875, |
|
"loss": 0.3456, |
|
"rewards/chosen": 0.44943737983703613, |
|
"rewards/margins": 3.7313075065612793, |
|
"rewards/rejected": -3.2818703651428223, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 43.21498489379883, |
|
"kl": 0.0, |
|
"learning_rate": 3.0254777070063694e-07, |
|
"logps/chosen": -259.74285888671875, |
|
"logps/rejected": -350.27001953125, |
|
"loss": 0.3277, |
|
"rewards/chosen": 0.4997062683105469, |
|
"rewards/margins": 3.3499958515167236, |
|
"rewards/rejected": -2.8502893447875977, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 42.415992736816406, |
|
"kl": 0.0, |
|
"learning_rate": 3.184713375796178e-07, |
|
"logps/chosen": -285.27569580078125, |
|
"logps/rejected": -372.94757080078125, |
|
"loss": 0.3198, |
|
"rewards/chosen": 0.5120081901550293, |
|
"rewards/margins": 4.236746788024902, |
|
"rewards/rejected": -3.724738597869873, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 41.03474044799805, |
|
"kl": 0.0, |
|
"learning_rate": 3.343949044585987e-07, |
|
"logps/chosen": -299.6357116699219, |
|
"logps/rejected": -353.11334228515625, |
|
"loss": 0.3132, |
|
"rewards/chosen": 0.4896661341190338, |
|
"rewards/margins": 4.393103122711182, |
|
"rewards/rejected": -3.9034361839294434, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 37.77847671508789, |
|
"kl": 0.0, |
|
"learning_rate": 3.5031847133757957e-07, |
|
"logps/chosen": -239.7168731689453, |
|
"logps/rejected": -370.69866943359375, |
|
"loss": 0.3157, |
|
"rewards/chosen": 0.4915805459022522, |
|
"rewards/margins": 4.615452766418457, |
|
"rewards/rejected": -4.12387228012085, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 44.40388488769531, |
|
"kl": 0.0, |
|
"learning_rate": 3.6624203821656045e-07, |
|
"logps/chosen": -292.5115051269531, |
|
"logps/rejected": -374.26849365234375, |
|
"loss": 0.3164, |
|
"rewards/chosen": 0.5263981819152832, |
|
"rewards/margins": 4.860444068908691, |
|
"rewards/rejected": -4.33404541015625, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 39.663360595703125, |
|
"kl": 0.0, |
|
"learning_rate": 3.8216560509554143e-07, |
|
"logps/chosen": -305.62591552734375, |
|
"logps/rejected": -418.4881286621094, |
|
"loss": 0.2919, |
|
"rewards/chosen": 0.5590957999229431, |
|
"rewards/margins": 5.1840081214904785, |
|
"rewards/rejected": -4.624912261962891, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 49.241607666015625, |
|
"kl": 0.0, |
|
"learning_rate": 3.980891719745223e-07, |
|
"logps/chosen": -299.54486083984375, |
|
"logps/rejected": -396.44586181640625, |
|
"loss": 0.302, |
|
"rewards/chosen": 0.5733675360679626, |
|
"rewards/margins": 5.690032482147217, |
|
"rewards/rejected": -5.116665363311768, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 41.309715270996094, |
|
"kl": 0.0, |
|
"learning_rate": 4.140127388535032e-07, |
|
"logps/chosen": -285.24725341796875, |
|
"logps/rejected": -408.84326171875, |
|
"loss": 0.2895, |
|
"rewards/chosen": 0.5746848583221436, |
|
"rewards/margins": 5.906516075134277, |
|
"rewards/rejected": -5.331830978393555, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 32.96284103393555, |
|
"kl": 0.0, |
|
"learning_rate": 4.2993630573248406e-07, |
|
"logps/chosen": -258.3543395996094, |
|
"logps/rejected": -388.1202087402344, |
|
"loss": 0.3022, |
|
"rewards/chosen": 0.5746804475784302, |
|
"rewards/margins": 5.607410430908203, |
|
"rewards/rejected": -5.0327301025390625, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 45.53158187866211, |
|
"kl": 0.0, |
|
"learning_rate": 4.4585987261146494e-07, |
|
"logps/chosen": -292.7215270996094, |
|
"logps/rejected": -364.63116455078125, |
|
"loss": 0.2933, |
|
"rewards/chosen": 0.6362745761871338, |
|
"rewards/margins": 5.103418350219727, |
|
"rewards/rejected": -4.467143535614014, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 33.09851837158203, |
|
"kl": 0.0, |
|
"learning_rate": 4.6178343949044587e-07, |
|
"logps/chosen": -287.439208984375, |
|
"logps/rejected": -362.47430419921875, |
|
"loss": 0.2741, |
|
"rewards/chosen": 0.5601326823234558, |
|
"rewards/margins": 5.778112888336182, |
|
"rewards/rejected": -5.217979431152344, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 36.87670135498047, |
|
"kl": 0.0, |
|
"learning_rate": 4.777070063694267e-07, |
|
"logps/chosen": -275.13983154296875, |
|
"logps/rejected": -419.91290283203125, |
|
"loss": 0.3079, |
|
"rewards/chosen": 0.677828311920166, |
|
"rewards/margins": 5.985136985778809, |
|
"rewards/rejected": -5.307309150695801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 38.875118255615234, |
|
"kl": 0.0, |
|
"learning_rate": 4.936305732484076e-07, |
|
"logps/chosen": -301.48345947265625, |
|
"logps/rejected": -402.216796875, |
|
"loss": 0.2858, |
|
"rewards/chosen": 0.6449128985404968, |
|
"rewards/margins": 6.0093674659729, |
|
"rewards/rejected": -5.364454746246338, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 34.41432189941406, |
|
"kl": 0.0, |
|
"learning_rate": 4.989331436699858e-07, |
|
"logps/chosen": -266.0882873535156, |
|
"logps/rejected": -377.15911865234375, |
|
"loss": 0.2722, |
|
"rewards/chosen": 0.7968356013298035, |
|
"rewards/margins": 6.45413064956665, |
|
"rewards/rejected": -5.657294750213623, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 40.4543342590332, |
|
"kl": 0.0, |
|
"learning_rate": 4.971550497866287e-07, |
|
"logps/chosen": -310.2311096191406, |
|
"logps/rejected": -341.25482177734375, |
|
"loss": 0.2852, |
|
"rewards/chosen": 0.6570959091186523, |
|
"rewards/margins": 6.051398277282715, |
|
"rewards/rejected": -5.394301414489746, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 42.1318359375, |
|
"kl": 0.0, |
|
"learning_rate": 4.953769559032717e-07, |
|
"logps/chosen": -270.304931640625, |
|
"logps/rejected": -369.9417724609375, |
|
"loss": 0.2895, |
|
"rewards/chosen": 0.6669288873672485, |
|
"rewards/margins": 6.19234561920166, |
|
"rewards/rejected": -5.525416374206543, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 29.986738204956055, |
|
"kl": 0.0, |
|
"learning_rate": 4.935988620199146e-07, |
|
"logps/chosen": -276.00048828125, |
|
"logps/rejected": -409.60626220703125, |
|
"loss": 0.2606, |
|
"rewards/chosen": 0.7553092837333679, |
|
"rewards/margins": 6.95382022857666, |
|
"rewards/rejected": -6.198510646820068, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 36.70397186279297, |
|
"kl": 0.0, |
|
"learning_rate": 4.918207681365576e-07, |
|
"logps/chosen": -275.0423583984375, |
|
"logps/rejected": -362.8970642089844, |
|
"loss": 0.2695, |
|
"rewards/chosen": 0.7985103726387024, |
|
"rewards/margins": 6.52749490737915, |
|
"rewards/rejected": -5.7289838790893555, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 38.225425720214844, |
|
"kl": 0.0, |
|
"learning_rate": 4.900426742532006e-07, |
|
"logps/chosen": -310.0399475097656, |
|
"logps/rejected": -392.6236572265625, |
|
"loss": 0.2647, |
|
"rewards/chosen": 0.8835963010787964, |
|
"rewards/margins": 7.008363246917725, |
|
"rewards/rejected": -6.124766826629639, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 37.8361930847168, |
|
"kl": 0.0, |
|
"learning_rate": 4.882645803698435e-07, |
|
"logps/chosen": -289.03814697265625, |
|
"logps/rejected": -387.9421081542969, |
|
"loss": 0.3058, |
|
"rewards/chosen": 0.6273610591888428, |
|
"rewards/margins": 6.493137359619141, |
|
"rewards/rejected": -5.865776538848877, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 39.47262191772461, |
|
"kl": 0.0, |
|
"learning_rate": 4.864864864864865e-07, |
|
"logps/chosen": -316.1293029785156, |
|
"logps/rejected": -404.95843505859375, |
|
"loss": 0.2767, |
|
"rewards/chosen": 0.8789302110671997, |
|
"rewards/margins": 7.016871452331543, |
|
"rewards/rejected": -6.137940883636475, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 33.78382110595703, |
|
"kl": 0.0, |
|
"learning_rate": 4.847083926031294e-07, |
|
"logps/chosen": -279.5328369140625, |
|
"logps/rejected": -444.06768798828125, |
|
"loss": 0.2948, |
|
"rewards/chosen": 0.8043686747550964, |
|
"rewards/margins": 7.215568542480469, |
|
"rewards/rejected": -6.411200523376465, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 35.95137405395508, |
|
"kl": 0.0, |
|
"learning_rate": 4.829302987197724e-07, |
|
"logps/chosen": -314.162109375, |
|
"logps/rejected": -412.83203125, |
|
"loss": 0.2587, |
|
"rewards/chosen": 0.9799895286560059, |
|
"rewards/margins": 7.341480255126953, |
|
"rewards/rejected": -6.361490249633789, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 28.62522315979004, |
|
"kl": 0.0, |
|
"learning_rate": 4.811522048364154e-07, |
|
"logps/chosen": -311.1414489746094, |
|
"logps/rejected": -400.20025634765625, |
|
"loss": 0.2843, |
|
"rewards/chosen": 0.4934808611869812, |
|
"rewards/margins": 6.236133098602295, |
|
"rewards/rejected": -5.742652416229248, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 36.00955581665039, |
|
"kl": 0.0, |
|
"learning_rate": 4.793741109530583e-07, |
|
"logps/chosen": -308.5085754394531, |
|
"logps/rejected": -402.4859313964844, |
|
"loss": 0.2709, |
|
"rewards/chosen": 0.7078418135643005, |
|
"rewards/margins": 6.969674587249756, |
|
"rewards/rejected": -6.261832237243652, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 36.267826080322266, |
|
"kl": 0.0, |
|
"learning_rate": 4.775960170697012e-07, |
|
"logps/chosen": -288.0454406738281, |
|
"logps/rejected": -410.33843994140625, |
|
"loss": 0.2625, |
|
"rewards/chosen": 0.7268310785293579, |
|
"rewards/margins": 7.102780342102051, |
|
"rewards/rejected": -6.375949382781982, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 43.02051544189453, |
|
"kl": 0.0, |
|
"learning_rate": 4.7581792318634425e-07, |
|
"logps/chosen": -293.15704345703125, |
|
"logps/rejected": -410.19415283203125, |
|
"loss": 0.2691, |
|
"rewards/chosen": 0.8471654653549194, |
|
"rewards/margins": 7.160550117492676, |
|
"rewards/rejected": -6.313384056091309, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 35.434814453125, |
|
"kl": 0.0, |
|
"learning_rate": 4.7403982930298717e-07, |
|
"logps/chosen": -329.09716796875, |
|
"logps/rejected": -404.0379333496094, |
|
"loss": 0.2705, |
|
"rewards/chosen": 0.8330980539321899, |
|
"rewards/margins": 7.071954250335693, |
|
"rewards/rejected": -6.238855838775635, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 36.30598068237305, |
|
"kl": 0.0, |
|
"learning_rate": 4.7226173541963014e-07, |
|
"logps/chosen": -251.01351928710938, |
|
"logps/rejected": -404.2989196777344, |
|
"loss": 0.2729, |
|
"rewards/chosen": 0.7619932293891907, |
|
"rewards/margins": 6.793028354644775, |
|
"rewards/rejected": -6.03103494644165, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 46.27096939086914, |
|
"kl": 0.0, |
|
"learning_rate": 4.7048364153627306e-07, |
|
"logps/chosen": -286.8477478027344, |
|
"logps/rejected": -389.7752685546875, |
|
"loss": 0.2742, |
|
"rewards/chosen": 0.79796302318573, |
|
"rewards/margins": 7.297283172607422, |
|
"rewards/rejected": -6.499319553375244, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 45.02741241455078, |
|
"kl": 0.0, |
|
"learning_rate": 4.6870554765291604e-07, |
|
"logps/chosen": -308.306396484375, |
|
"logps/rejected": -399.7406311035156, |
|
"loss": 0.2764, |
|
"rewards/chosen": 0.8131675720214844, |
|
"rewards/margins": 7.1386399269104, |
|
"rewards/rejected": -6.325472354888916, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 35.437339782714844, |
|
"kl": 0.0, |
|
"learning_rate": 4.66927453769559e-07, |
|
"logps/chosen": -259.05401611328125, |
|
"logps/rejected": -398.1236877441406, |
|
"loss": 0.2899, |
|
"rewards/chosen": 0.6805434823036194, |
|
"rewards/margins": 7.213263511657715, |
|
"rewards/rejected": -6.532719612121582, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 35.13607406616211, |
|
"kl": 0.0, |
|
"learning_rate": 4.65149359886202e-07, |
|
"logps/chosen": -256.3832092285156, |
|
"logps/rejected": -410.2801208496094, |
|
"loss": 0.2647, |
|
"rewards/chosen": 0.9173786044120789, |
|
"rewards/margins": 7.905294895172119, |
|
"rewards/rejected": -6.987915992736816, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 35.4403190612793, |
|
"kl": 0.0, |
|
"learning_rate": 4.633712660028449e-07, |
|
"logps/chosen": -318.9344787597656, |
|
"logps/rejected": -403.5733337402344, |
|
"loss": 0.2669, |
|
"rewards/chosen": 0.9034342765808105, |
|
"rewards/margins": 7.315404415130615, |
|
"rewards/rejected": -6.411970615386963, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 40.87915802001953, |
|
"kl": 0.0, |
|
"learning_rate": 4.615931721194879e-07, |
|
"logps/chosen": -307.8759765625, |
|
"logps/rejected": -405.61383056640625, |
|
"loss": 0.2424, |
|
"rewards/chosen": 0.9530359506607056, |
|
"rewards/margins": 8.030720710754395, |
|
"rewards/rejected": -7.0776848793029785, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 37.06570816040039, |
|
"kl": 0.0, |
|
"learning_rate": 4.5981507823613085e-07, |
|
"logps/chosen": -279.9657897949219, |
|
"logps/rejected": -429.54638671875, |
|
"loss": 0.2654, |
|
"rewards/chosen": 1.0000529289245605, |
|
"rewards/margins": 7.914543151855469, |
|
"rewards/rejected": -6.91448974609375, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 35.289634704589844, |
|
"kl": 0.0, |
|
"learning_rate": 4.580369843527738e-07, |
|
"logps/chosen": -314.99267578125, |
|
"logps/rejected": -417.67401123046875, |
|
"loss": 0.2673, |
|
"rewards/chosen": 0.7197456955909729, |
|
"rewards/margins": 7.778171539306641, |
|
"rewards/rejected": -7.0584259033203125, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 34.34975814819336, |
|
"kl": 0.0, |
|
"learning_rate": 4.562588904694168e-07, |
|
"logps/chosen": -266.03350830078125, |
|
"logps/rejected": -420.0655822753906, |
|
"loss": 0.2683, |
|
"rewards/chosen": 0.8104265332221985, |
|
"rewards/margins": 7.52780294418335, |
|
"rewards/rejected": -6.717376708984375, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 36.18477249145508, |
|
"kl": 0.0, |
|
"learning_rate": 4.544807965860597e-07, |
|
"logps/chosen": -312.69708251953125, |
|
"logps/rejected": -419.86883544921875, |
|
"loss": 0.2483, |
|
"rewards/chosen": 0.8221893310546875, |
|
"rewards/margins": 7.910370826721191, |
|
"rewards/rejected": -7.088181972503662, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 34.63364028930664, |
|
"kl": 0.0, |
|
"learning_rate": 4.5270270270270264e-07, |
|
"logps/chosen": -286.63555908203125, |
|
"logps/rejected": -437.01031494140625, |
|
"loss": 0.2576, |
|
"rewards/chosen": 1.0530911684036255, |
|
"rewards/margins": 8.051950454711914, |
|
"rewards/rejected": -6.998859405517578, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 33.81501388549805, |
|
"kl": 0.0, |
|
"learning_rate": 4.509246088193456e-07, |
|
"logps/chosen": -295.6054992675781, |
|
"logps/rejected": -395.41796875, |
|
"loss": 0.2539, |
|
"rewards/chosen": 1.0546354055404663, |
|
"rewards/margins": 7.8214921951293945, |
|
"rewards/rejected": -6.766855716705322, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 38.72855758666992, |
|
"kl": 0.0, |
|
"learning_rate": 4.491465149359886e-07, |
|
"logps/chosen": -305.07586669921875, |
|
"logps/rejected": -399.9986267089844, |
|
"loss": 0.2502, |
|
"rewards/chosen": 1.029415488243103, |
|
"rewards/margins": 8.159204483032227, |
|
"rewards/rejected": -7.129788398742676, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 36.09114456176758, |
|
"kl": 0.0, |
|
"learning_rate": 4.4736842105263156e-07, |
|
"logps/chosen": -252.03683471679688, |
|
"logps/rejected": -418.17938232421875, |
|
"loss": 0.2554, |
|
"rewards/chosen": 0.9251939058303833, |
|
"rewards/margins": 8.25263786315918, |
|
"rewards/rejected": -7.327445030212402, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 39.85062026977539, |
|
"kl": 0.0, |
|
"learning_rate": 4.4559032716927454e-07, |
|
"logps/chosen": -311.30535888671875, |
|
"logps/rejected": -386.61248779296875, |
|
"loss": 0.2836, |
|
"rewards/chosen": 0.8921693563461304, |
|
"rewards/margins": 7.110099792480469, |
|
"rewards/rejected": -6.217930793762207, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 31.65120506286621, |
|
"kl": 0.0, |
|
"learning_rate": 4.438122332859175e-07, |
|
"logps/chosen": -270.47064208984375, |
|
"logps/rejected": -450.70965576171875, |
|
"loss": 0.2624, |
|
"rewards/chosen": 0.9781096577644348, |
|
"rewards/margins": 8.819513320922852, |
|
"rewards/rejected": -7.841403007507324, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 43.421443939208984, |
|
"kl": 0.0, |
|
"learning_rate": 4.420341394025605e-07, |
|
"logps/chosen": -290.4539489746094, |
|
"logps/rejected": -405.73968505859375, |
|
"loss": 0.2676, |
|
"rewards/chosen": 0.9018673896789551, |
|
"rewards/margins": 8.168411254882812, |
|
"rewards/rejected": -7.266544342041016, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 36.024681091308594, |
|
"kl": 0.0, |
|
"learning_rate": 4.4025604551920335e-07, |
|
"logps/chosen": -278.21484375, |
|
"logps/rejected": -393.1819152832031, |
|
"loss": 0.2447, |
|
"rewards/chosen": 0.9998067617416382, |
|
"rewards/margins": 7.97442626953125, |
|
"rewards/rejected": -6.9746198654174805, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 31.11004066467285, |
|
"kl": 0.0, |
|
"learning_rate": 4.384779516358463e-07, |
|
"logps/chosen": -249.5725555419922, |
|
"logps/rejected": -443.29388427734375, |
|
"loss": 0.2397, |
|
"rewards/chosen": 0.7795785665512085, |
|
"rewards/margins": 8.376214981079102, |
|
"rewards/rejected": -7.596636772155762, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 36.15337371826172, |
|
"kl": 0.0, |
|
"learning_rate": 4.366998577524893e-07, |
|
"logps/chosen": -283.8440856933594, |
|
"logps/rejected": -403.4510498046875, |
|
"loss": 0.2555, |
|
"rewards/chosen": 0.98023521900177, |
|
"rewards/margins": 8.079346656799316, |
|
"rewards/rejected": -7.099111080169678, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 35.31691360473633, |
|
"kl": 0.0, |
|
"learning_rate": 4.3492176386913227e-07, |
|
"logps/chosen": -265.3778381347656, |
|
"logps/rejected": -388.70953369140625, |
|
"loss": 0.2543, |
|
"rewards/chosen": 0.9121273159980774, |
|
"rewards/margins": 7.6136298179626465, |
|
"rewards/rejected": -6.701502799987793, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 29.128305435180664, |
|
"kl": 0.0, |
|
"learning_rate": 4.3314366998577524e-07, |
|
"logps/chosen": -257.42572021484375, |
|
"logps/rejected": -397.4496765136719, |
|
"loss": 0.2452, |
|
"rewards/chosen": 1.0432583093643188, |
|
"rewards/margins": 7.451505184173584, |
|
"rewards/rejected": -6.4082465171813965, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 32.244834899902344, |
|
"kl": 0.0, |
|
"learning_rate": 4.313655761024182e-07, |
|
"logps/chosen": -290.0133056640625, |
|
"logps/rejected": -423.0615234375, |
|
"loss": 0.2449, |
|
"rewards/chosen": 1.174845576286316, |
|
"rewards/margins": 8.203340530395508, |
|
"rewards/rejected": -7.028494358062744, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 37.363033294677734, |
|
"kl": 0.0, |
|
"learning_rate": 4.2958748221906114e-07, |
|
"logps/chosen": -325.817138671875, |
|
"logps/rejected": -419.47589111328125, |
|
"loss": 0.2419, |
|
"rewards/chosen": 1.2895926237106323, |
|
"rewards/margins": 8.572031021118164, |
|
"rewards/rejected": -7.282437801361084, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 35.990299224853516, |
|
"kl": 0.0, |
|
"learning_rate": 4.278093883357041e-07, |
|
"logps/chosen": -259.43365478515625, |
|
"logps/rejected": -414.167724609375, |
|
"loss": 0.2343, |
|
"rewards/chosen": 1.0641326904296875, |
|
"rewards/margins": 8.207174301147461, |
|
"rewards/rejected": -7.143041133880615, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 34.36444091796875, |
|
"kl": 0.0, |
|
"learning_rate": 4.260312944523471e-07, |
|
"logps/chosen": -293.53240966796875, |
|
"logps/rejected": -361.9354553222656, |
|
"loss": 0.2701, |
|
"rewards/chosen": 0.8572598695755005, |
|
"rewards/margins": 7.768780708312988, |
|
"rewards/rejected": -6.911520957946777, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 36.629608154296875, |
|
"kl": 0.0, |
|
"learning_rate": 4.2425320056899e-07, |
|
"logps/chosen": -279.6189880371094, |
|
"logps/rejected": -408.66680908203125, |
|
"loss": 0.2506, |
|
"rewards/chosen": 0.9547025561332703, |
|
"rewards/margins": 8.172720909118652, |
|
"rewards/rejected": -7.218017578125, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 38.02170944213867, |
|
"kl": 0.0, |
|
"learning_rate": 4.22475106685633e-07, |
|
"logps/chosen": -287.5861511230469, |
|
"logps/rejected": -396.6712341308594, |
|
"loss": 0.2446, |
|
"rewards/chosen": 1.0745197534561157, |
|
"rewards/margins": 7.910072326660156, |
|
"rewards/rejected": -6.835552215576172, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 38.12566375732422, |
|
"kl": 0.0, |
|
"learning_rate": 4.2069701280227595e-07, |
|
"logps/chosen": -255.07809448242188, |
|
"logps/rejected": -413.735595703125, |
|
"loss": 0.2231, |
|
"rewards/chosen": 0.9518634676933289, |
|
"rewards/margins": 8.076669692993164, |
|
"rewards/rejected": -7.124806880950928, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 34.425228118896484, |
|
"kl": 0.0, |
|
"learning_rate": 4.189189189189189e-07, |
|
"logps/chosen": -240.9487762451172, |
|
"logps/rejected": -432.3330993652344, |
|
"loss": 0.2357, |
|
"rewards/chosen": 1.2129076719284058, |
|
"rewards/margins": 8.845483779907227, |
|
"rewards/rejected": -7.632575035095215, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 34.311222076416016, |
|
"kl": 0.0, |
|
"learning_rate": 4.1714082503556185e-07, |
|
"logps/chosen": -255.38522338867188, |
|
"logps/rejected": -384.539794921875, |
|
"loss": 0.2416, |
|
"rewards/chosen": 1.1686419248580933, |
|
"rewards/margins": 8.326786041259766, |
|
"rewards/rejected": -7.158143520355225, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 33.39529800415039, |
|
"kl": 0.0, |
|
"learning_rate": 4.153627311522048e-07, |
|
"logps/chosen": -287.487060546875, |
|
"logps/rejected": -402.24749755859375, |
|
"loss": 0.2599, |
|
"rewards/chosen": 1.0036966800689697, |
|
"rewards/margins": 8.261211395263672, |
|
"rewards/rejected": -7.257514953613281, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 31.273969650268555, |
|
"kl": 0.0, |
|
"learning_rate": 4.135846372688478e-07, |
|
"logps/chosen": -297.91522216796875, |
|
"logps/rejected": -443.12835693359375, |
|
"loss": 0.2628, |
|
"rewards/chosen": 1.006545066833496, |
|
"rewards/margins": 8.5576810836792, |
|
"rewards/rejected": -7.551136016845703, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 28.262744903564453, |
|
"kl": 0.0, |
|
"learning_rate": 4.1180654338549077e-07, |
|
"logps/chosen": -328.5188903808594, |
|
"logps/rejected": -388.5363464355469, |
|
"loss": 0.2463, |
|
"rewards/chosen": 1.4216535091400146, |
|
"rewards/margins": 8.22613525390625, |
|
"rewards/rejected": -6.804482460021973, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 31.14788246154785, |
|
"kl": 0.0, |
|
"learning_rate": 4.100284495021337e-07, |
|
"logps/chosen": -300.68963623046875, |
|
"logps/rejected": -445.842529296875, |
|
"loss": 0.2304, |
|
"rewards/chosen": 1.0163438320159912, |
|
"rewards/margins": 9.177450180053711, |
|
"rewards/rejected": -8.161107063293457, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 27.50638771057129, |
|
"kl": 0.0, |
|
"learning_rate": 4.082503556187766e-07, |
|
"logps/chosen": -278.8860168457031, |
|
"logps/rejected": -449.13104248046875, |
|
"loss": 0.2188, |
|
"rewards/chosen": 1.2694514989852905, |
|
"rewards/margins": 9.333213806152344, |
|
"rewards/rejected": -8.063762664794922, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 35.6218376159668, |
|
"kl": 0.0, |
|
"learning_rate": 4.064722617354196e-07, |
|
"logps/chosen": -280.0066833496094, |
|
"logps/rejected": -398.3285827636719, |
|
"loss": 0.2508, |
|
"rewards/chosen": 1.4511891603469849, |
|
"rewards/margins": 8.606059074401855, |
|
"rewards/rejected": -7.15487003326416, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 40.70615768432617, |
|
"kl": 0.0, |
|
"learning_rate": 4.0469416785206256e-07, |
|
"logps/chosen": -331.06085205078125, |
|
"logps/rejected": -405.2574768066406, |
|
"loss": 0.2451, |
|
"rewards/chosen": 1.0914924144744873, |
|
"rewards/margins": 8.246747016906738, |
|
"rewards/rejected": -7.155255317687988, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 36.7715950012207, |
|
"kl": 0.0, |
|
"learning_rate": 4.0291607396870553e-07, |
|
"logps/chosen": -257.7249450683594, |
|
"logps/rejected": -490.748046875, |
|
"loss": 0.2151, |
|
"rewards/chosen": 1.230444073677063, |
|
"rewards/margins": 9.618934631347656, |
|
"rewards/rejected": -8.388490676879883, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 34.04132080078125, |
|
"kl": 0.0, |
|
"learning_rate": 4.011379800853485e-07, |
|
"logps/chosen": -270.65325927734375, |
|
"logps/rejected": -423.4205017089844, |
|
"loss": 0.2498, |
|
"rewards/chosen": 1.293565034866333, |
|
"rewards/margins": 8.601400375366211, |
|
"rewards/rejected": -7.307835578918457, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 34.57319259643555, |
|
"kl": 0.0, |
|
"learning_rate": 3.993598862019915e-07, |
|
"logps/chosen": -298.8142395019531, |
|
"logps/rejected": -436.76202392578125, |
|
"loss": 0.2337, |
|
"rewards/chosen": 1.1631160974502563, |
|
"rewards/margins": 9.01208782196045, |
|
"rewards/rejected": -7.848972320556641, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 39.89948272705078, |
|
"kl": 0.0, |
|
"learning_rate": 3.975817923186344e-07, |
|
"logps/chosen": -262.2899475097656, |
|
"logps/rejected": -423.69189453125, |
|
"loss": 0.2658, |
|
"rewards/chosen": 1.0408456325531006, |
|
"rewards/margins": 8.8524808883667, |
|
"rewards/rejected": -7.8116350173950195, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 31.670787811279297, |
|
"kl": 0.0, |
|
"learning_rate": 3.9580369843527737e-07, |
|
"logps/chosen": -290.41326904296875, |
|
"logps/rejected": -415.8921813964844, |
|
"loss": 0.2251, |
|
"rewards/chosen": 1.409716010093689, |
|
"rewards/margins": 8.17490291595459, |
|
"rewards/rejected": -6.7651872634887695, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 29.412275314331055, |
|
"kl": 0.0, |
|
"learning_rate": 3.940256045519203e-07, |
|
"logps/chosen": -270.35009765625, |
|
"logps/rejected": -436.0580139160156, |
|
"loss": 0.2476, |
|
"rewards/chosen": 1.0782283544540405, |
|
"rewards/margins": 8.212353706359863, |
|
"rewards/rejected": -7.134124755859375, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 34.431488037109375, |
|
"kl": 0.0, |
|
"learning_rate": 3.9224751066856327e-07, |
|
"logps/chosen": -283.64166259765625, |
|
"logps/rejected": -411.4891662597656, |
|
"loss": 0.2534, |
|
"rewards/chosen": 1.0376536846160889, |
|
"rewards/margins": 7.781105041503906, |
|
"rewards/rejected": -6.743451118469238, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 30.757057189941406, |
|
"kl": 0.0, |
|
"learning_rate": 3.9046941678520624e-07, |
|
"logps/chosen": -265.84423828125, |
|
"logps/rejected": -379.9093933105469, |
|
"loss": 0.2352, |
|
"rewards/chosen": 1.3857548236846924, |
|
"rewards/margins": 8.423876762390137, |
|
"rewards/rejected": -7.038122653961182, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 31.575454711914062, |
|
"kl": 0.0, |
|
"learning_rate": 3.886913229018492e-07, |
|
"logps/chosen": -264.2037048339844, |
|
"logps/rejected": -408.8255920410156, |
|
"loss": 0.2324, |
|
"rewards/chosen": 1.3042352199554443, |
|
"rewards/margins": 8.68317985534668, |
|
"rewards/rejected": -7.378944396972656, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 34.39158630371094, |
|
"kl": 0.0, |
|
"learning_rate": 3.8691322901849213e-07, |
|
"logps/chosen": -271.2469177246094, |
|
"logps/rejected": -422.51129150390625, |
|
"loss": 0.2041, |
|
"rewards/chosen": 1.7015053033828735, |
|
"rewards/margins": 9.282489776611328, |
|
"rewards/rejected": -7.580985069274902, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 40.40760040283203, |
|
"kl": 0.0, |
|
"learning_rate": 3.851351351351351e-07, |
|
"logps/chosen": -271.46319580078125, |
|
"logps/rejected": -394.5502014160156, |
|
"loss": 0.2627, |
|
"rewards/chosen": 1.2340047359466553, |
|
"rewards/margins": 7.9939703941345215, |
|
"rewards/rejected": -6.7599663734436035, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 38.451168060302734, |
|
"kl": 0.0, |
|
"learning_rate": 3.833570412517781e-07, |
|
"logps/chosen": -291.4540100097656, |
|
"logps/rejected": -382.9058532714844, |
|
"loss": 0.2501, |
|
"rewards/chosen": 1.120945930480957, |
|
"rewards/margins": 7.974093437194824, |
|
"rewards/rejected": -6.853147983551025, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 36.719886779785156, |
|
"kl": 0.0, |
|
"learning_rate": 3.8157894736842105e-07, |
|
"logps/chosen": -314.1134338378906, |
|
"logps/rejected": -406.29437255859375, |
|
"loss": 0.2247, |
|
"rewards/chosen": 1.3398951292037964, |
|
"rewards/margins": 9.037193298339844, |
|
"rewards/rejected": -7.6972975730896, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 31.631505966186523, |
|
"kl": 0.0, |
|
"learning_rate": 3.7980085348506403e-07, |
|
"logps/chosen": -290.2677307128906, |
|
"logps/rejected": -407.5957336425781, |
|
"loss": 0.2352, |
|
"rewards/chosen": 1.519084095954895, |
|
"rewards/margins": 8.60625171661377, |
|
"rewards/rejected": -7.087167263031006, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 25.598243713378906, |
|
"kl": 0.0, |
|
"learning_rate": 3.7802275960170695e-07, |
|
"logps/chosen": -261.9007263183594, |
|
"logps/rejected": -433.05047607421875, |
|
"loss": 0.2367, |
|
"rewards/chosen": 1.5405170917510986, |
|
"rewards/margins": 9.024420738220215, |
|
"rewards/rejected": -7.483903408050537, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 30.555438995361328, |
|
"kl": 0.0, |
|
"learning_rate": 3.7624466571834987e-07, |
|
"logps/chosen": -278.92108154296875, |
|
"logps/rejected": -417.1661682128906, |
|
"loss": 0.2369, |
|
"rewards/chosen": 1.382774829864502, |
|
"rewards/margins": 8.678990364074707, |
|
"rewards/rejected": -7.296216011047363, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 25.65595245361328, |
|
"kl": 0.0, |
|
"learning_rate": 3.7446657183499284e-07, |
|
"logps/chosen": -259.3731384277344, |
|
"logps/rejected": -409.6930236816406, |
|
"loss": 0.2376, |
|
"rewards/chosen": 1.2507401704788208, |
|
"rewards/margins": 8.60232162475586, |
|
"rewards/rejected": -7.351581573486328, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 36.59701156616211, |
|
"kl": 0.0, |
|
"learning_rate": 3.726884779516358e-07, |
|
"logps/chosen": -287.05059814453125, |
|
"logps/rejected": -396.98638916015625, |
|
"loss": 0.2075, |
|
"rewards/chosen": 1.687380075454712, |
|
"rewards/margins": 9.18794059753418, |
|
"rewards/rejected": -7.5005598068237305, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 36.45357131958008, |
|
"kl": 0.0, |
|
"learning_rate": 3.709103840682788e-07, |
|
"logps/chosen": -267.5130920410156, |
|
"logps/rejected": -435.63885498046875, |
|
"loss": 0.2306, |
|
"rewards/chosen": 1.1616590023040771, |
|
"rewards/margins": 9.239825248718262, |
|
"rewards/rejected": -8.078166007995605, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 36.515785217285156, |
|
"kl": 0.0, |
|
"learning_rate": 3.6913229018492176e-07, |
|
"logps/chosen": -281.87744140625, |
|
"logps/rejected": -413.88006591796875, |
|
"loss": 0.2496, |
|
"rewards/chosen": 1.3109562397003174, |
|
"rewards/margins": 8.770210266113281, |
|
"rewards/rejected": -7.459254264831543, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 31.515460968017578, |
|
"kl": 0.0, |
|
"learning_rate": 3.6735419630156474e-07, |
|
"logps/chosen": -224.22750854492188, |
|
"logps/rejected": -424.96649169921875, |
|
"loss": 0.2045, |
|
"rewards/chosen": 1.2929831743240356, |
|
"rewards/margins": 8.969011306762695, |
|
"rewards/rejected": -7.676026821136475, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 29.997665405273438, |
|
"kl": 0.0, |
|
"learning_rate": 3.655761024182077e-07, |
|
"logps/chosen": -289.0644836425781, |
|
"logps/rejected": -391.5130920410156, |
|
"loss": 0.252, |
|
"rewards/chosen": 1.3215197324752808, |
|
"rewards/margins": 9.083866119384766, |
|
"rewards/rejected": -7.7623467445373535, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 33.1764030456543, |
|
"kl": 0.0, |
|
"learning_rate": 3.637980085348506e-07, |
|
"logps/chosen": -252.8762664794922, |
|
"logps/rejected": -432.19952392578125, |
|
"loss": 0.2093, |
|
"rewards/chosen": 1.495452880859375, |
|
"rewards/margins": 9.005461692810059, |
|
"rewards/rejected": -7.510009765625, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 28.675111770629883, |
|
"kl": 0.0, |
|
"learning_rate": 3.6201991465149355e-07, |
|
"logps/chosen": -260.71624755859375, |
|
"logps/rejected": -408.84759521484375, |
|
"loss": 0.206, |
|
"rewards/chosen": 1.433053970336914, |
|
"rewards/margins": 8.826322555541992, |
|
"rewards/rejected": -7.393267631530762, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 39.82828903198242, |
|
"kl": 0.0, |
|
"learning_rate": 3.602418207681365e-07, |
|
"logps/chosen": -365.56134033203125, |
|
"logps/rejected": -382.9657897949219, |
|
"loss": 0.2252, |
|
"rewards/chosen": 1.4427438974380493, |
|
"rewards/margins": 9.311979293823242, |
|
"rewards/rejected": -7.869235992431641, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 29.757482528686523, |
|
"kl": 0.0, |
|
"learning_rate": 3.584637268847795e-07, |
|
"logps/chosen": -270.66815185546875, |
|
"logps/rejected": -418.73883056640625, |
|
"loss": 0.2445, |
|
"rewards/chosen": 1.2224515676498413, |
|
"rewards/margins": 9.511232376098633, |
|
"rewards/rejected": -8.28878116607666, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 32.810874938964844, |
|
"kl": 0.0, |
|
"learning_rate": 3.5668563300142247e-07, |
|
"logps/chosen": -280.0058288574219, |
|
"logps/rejected": -459.1136779785156, |
|
"loss": 0.2115, |
|
"rewards/chosen": 1.7389733791351318, |
|
"rewards/margins": 9.870342254638672, |
|
"rewards/rejected": -8.131368637084961, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 34.706581115722656, |
|
"kl": 0.0, |
|
"learning_rate": 3.5490753911806545e-07, |
|
"logps/chosen": -251.6792755126953, |
|
"logps/rejected": -390.3374328613281, |
|
"loss": 0.2354, |
|
"rewards/chosen": 1.290079116821289, |
|
"rewards/margins": 8.472095489501953, |
|
"rewards/rejected": -7.182016849517822, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 37.13447952270508, |
|
"kl": 0.0, |
|
"learning_rate": 3.5312944523470837e-07, |
|
"logps/chosen": -291.58502197265625, |
|
"logps/rejected": -395.8794860839844, |
|
"loss": 0.2129, |
|
"rewards/chosen": 1.712994933128357, |
|
"rewards/margins": 9.457635879516602, |
|
"rewards/rejected": -7.744640350341797, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 37.750213623046875, |
|
"kl": 0.0, |
|
"learning_rate": 3.5135135135135134e-07, |
|
"logps/chosen": -275.55645751953125, |
|
"logps/rejected": -421.70611572265625, |
|
"loss": 0.2109, |
|
"rewards/chosen": 1.398840308189392, |
|
"rewards/margins": 9.466266632080078, |
|
"rewards/rejected": -8.067425727844238, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 29.82187843322754, |
|
"kl": 0.0, |
|
"learning_rate": 3.495732574679943e-07, |
|
"logps/chosen": -275.10003662109375, |
|
"logps/rejected": -414.6404724121094, |
|
"loss": 0.251, |
|
"rewards/chosen": 1.1742942333221436, |
|
"rewards/margins": 8.796428680419922, |
|
"rewards/rejected": -7.622133731842041, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 34.12580490112305, |
|
"kl": 0.0, |
|
"learning_rate": 3.4779516358463724e-07, |
|
"logps/chosen": -273.7625427246094, |
|
"logps/rejected": -423.67364501953125, |
|
"loss": 0.2324, |
|
"rewards/chosen": 1.2637519836425781, |
|
"rewards/margins": 9.396784782409668, |
|
"rewards/rejected": -8.13303279876709, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 32.073856353759766, |
|
"kl": 0.0, |
|
"learning_rate": 3.460170697012802e-07, |
|
"logps/chosen": -240.8434295654297, |
|
"logps/rejected": -415.4698791503906, |
|
"loss": 0.2355, |
|
"rewards/chosen": 1.2490732669830322, |
|
"rewards/margins": 8.936406135559082, |
|
"rewards/rejected": -7.6873321533203125, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 33.48440170288086, |
|
"kl": 0.0, |
|
"learning_rate": 3.442389758179232e-07, |
|
"logps/chosen": -281.2816467285156, |
|
"logps/rejected": -413.6434631347656, |
|
"loss": 0.2315, |
|
"rewards/chosen": 1.444791555404663, |
|
"rewards/margins": 8.960540771484375, |
|
"rewards/rejected": -7.515749454498291, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 36.15184020996094, |
|
"kl": 0.0, |
|
"learning_rate": 3.424608819345661e-07, |
|
"logps/chosen": -294.50115966796875, |
|
"logps/rejected": -425.34393310546875, |
|
"loss": 0.2232, |
|
"rewards/chosen": 1.6126306056976318, |
|
"rewards/margins": 9.582573890686035, |
|
"rewards/rejected": -7.969943046569824, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 29.551122665405273, |
|
"kl": 0.0, |
|
"learning_rate": 3.406827880512091e-07, |
|
"logps/chosen": -267.5435485839844, |
|
"logps/rejected": -402.02362060546875, |
|
"loss": 0.2326, |
|
"rewards/chosen": 1.3285887241363525, |
|
"rewards/margins": 8.892502784729004, |
|
"rewards/rejected": -7.563913822174072, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 31.5975284576416, |
|
"kl": 0.0, |
|
"learning_rate": 3.3890469416785205e-07, |
|
"logps/chosen": -254.05795288085938, |
|
"logps/rejected": -413.3831481933594, |
|
"loss": 0.2466, |
|
"rewards/chosen": 1.2830978631973267, |
|
"rewards/margins": 9.083808898925781, |
|
"rewards/rejected": -7.800711154937744, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 32.5655632019043, |
|
"kl": 0.0, |
|
"learning_rate": 3.37126600284495e-07, |
|
"logps/chosen": -262.8067626953125, |
|
"logps/rejected": -406.58074951171875, |
|
"loss": 0.2192, |
|
"rewards/chosen": 1.6863523721694946, |
|
"rewards/margins": 9.860769271850586, |
|
"rewards/rejected": -8.174417495727539, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 35.858154296875, |
|
"kl": 0.0, |
|
"learning_rate": 3.35348506401138e-07, |
|
"logps/chosen": -282.96319580078125, |
|
"logps/rejected": -426.3731384277344, |
|
"loss": 0.2181, |
|
"rewards/chosen": 1.4725250005722046, |
|
"rewards/margins": 9.46699333190918, |
|
"rewards/rejected": -7.994467735290527, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 40.72344970703125, |
|
"kl": 0.0, |
|
"learning_rate": 3.335704125177809e-07, |
|
"logps/chosen": -268.4210510253906, |
|
"logps/rejected": -439.38671875, |
|
"loss": 0.2173, |
|
"rewards/chosen": 1.4876525402069092, |
|
"rewards/margins": 10.092061996459961, |
|
"rewards/rejected": -8.604409217834473, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 30.59383773803711, |
|
"kl": 0.0, |
|
"learning_rate": 3.3179231863442384e-07, |
|
"logps/chosen": -287.0130310058594, |
|
"logps/rejected": -452.73974609375, |
|
"loss": 0.2039, |
|
"rewards/chosen": 1.455565333366394, |
|
"rewards/margins": 10.204021453857422, |
|
"rewards/rejected": -8.748456001281738, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 33.28047561645508, |
|
"kl": 0.0, |
|
"learning_rate": 3.300142247510668e-07, |
|
"logps/chosen": -244.6796875, |
|
"logps/rejected": -446.35272216796875, |
|
"loss": 0.233, |
|
"rewards/chosen": 1.5680058002471924, |
|
"rewards/margins": 9.671673774719238, |
|
"rewards/rejected": -8.103668212890625, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 30.61500358581543, |
|
"kl": 0.0, |
|
"learning_rate": 3.282361308677098e-07, |
|
"logps/chosen": -253.5943145751953, |
|
"logps/rejected": -432.730224609375, |
|
"loss": 0.2031, |
|
"rewards/chosen": 1.5605218410491943, |
|
"rewards/margins": 9.725998878479004, |
|
"rewards/rejected": -8.16547679901123, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 33.63188934326172, |
|
"kl": 0.0, |
|
"learning_rate": 3.2645803698435276e-07, |
|
"logps/chosen": -271.66546630859375, |
|
"logps/rejected": -433.5645446777344, |
|
"loss": 0.2207, |
|
"rewards/chosen": 1.5964945554733276, |
|
"rewards/margins": 9.680837631225586, |
|
"rewards/rejected": -8.084342956542969, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 33.40596389770508, |
|
"kl": 0.0, |
|
"learning_rate": 3.2467994310099573e-07, |
|
"logps/chosen": -285.18707275390625, |
|
"logps/rejected": -403.45782470703125, |
|
"loss": 0.2294, |
|
"rewards/chosen": 1.3258507251739502, |
|
"rewards/margins": 8.883944511413574, |
|
"rewards/rejected": -7.558093070983887, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 30.218576431274414, |
|
"kl": 0.0, |
|
"learning_rate": 3.229018492176387e-07, |
|
"logps/chosen": -286.4705505371094, |
|
"logps/rejected": -406.6639099121094, |
|
"loss": 0.2219, |
|
"rewards/chosen": 1.627684235572815, |
|
"rewards/margins": 8.956671714782715, |
|
"rewards/rejected": -7.328988075256348, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 32.87017059326172, |
|
"kl": 0.0, |
|
"learning_rate": 3.211237553342817e-07, |
|
"logps/chosen": -279.32794189453125, |
|
"logps/rejected": -414.5611267089844, |
|
"loss": 0.2147, |
|
"rewards/chosen": 1.4831647872924805, |
|
"rewards/margins": 9.314391136169434, |
|
"rewards/rejected": -7.831225395202637, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 32.71424865722656, |
|
"kl": 0.0, |
|
"learning_rate": 3.193456614509246e-07, |
|
"logps/chosen": -304.61181640625, |
|
"logps/rejected": -443.0315856933594, |
|
"loss": 0.1965, |
|
"rewards/chosen": 1.8419644832611084, |
|
"rewards/margins": 10.106998443603516, |
|
"rewards/rejected": -8.265034675598145, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 33.63492965698242, |
|
"kl": 0.0, |
|
"learning_rate": 3.175675675675675e-07, |
|
"logps/chosen": -310.9371032714844, |
|
"logps/rejected": -430.70367431640625, |
|
"loss": 0.2095, |
|
"rewards/chosen": 1.5251778364181519, |
|
"rewards/margins": 9.424676895141602, |
|
"rewards/rejected": -7.899499416351318, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 28.59478759765625, |
|
"kl": 0.0, |
|
"learning_rate": 3.157894736842105e-07, |
|
"logps/chosen": -282.77020263671875, |
|
"logps/rejected": -390.97052001953125, |
|
"loss": 0.2338, |
|
"rewards/chosen": 1.6484334468841553, |
|
"rewards/margins": 9.223466873168945, |
|
"rewards/rejected": -7.575033664703369, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 30.481687545776367, |
|
"kl": 0.0, |
|
"learning_rate": 3.1401137980085347e-07, |
|
"logps/chosen": -268.91680908203125, |
|
"logps/rejected": -405.0919494628906, |
|
"loss": 0.2032, |
|
"rewards/chosen": 2.029101610183716, |
|
"rewards/margins": 10.308364868164062, |
|
"rewards/rejected": -8.279263496398926, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 31.664047241210938, |
|
"kl": 0.0, |
|
"learning_rate": 3.1223328591749644e-07, |
|
"logps/chosen": -276.86871337890625, |
|
"logps/rejected": -431.42156982421875, |
|
"loss": 0.1914, |
|
"rewards/chosen": 2.007537603378296, |
|
"rewards/margins": 10.265495300292969, |
|
"rewards/rejected": -8.257957458496094, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 34.09368896484375, |
|
"kl": 0.0, |
|
"learning_rate": 3.104551920341394e-07, |
|
"logps/chosen": -281.3551025390625, |
|
"logps/rejected": -436.32196044921875, |
|
"loss": 0.2118, |
|
"rewards/chosen": 1.5368009805679321, |
|
"rewards/margins": 9.339311599731445, |
|
"rewards/rejected": -7.8025102615356445, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 29.23426055908203, |
|
"kl": 0.0, |
|
"learning_rate": 3.0867709815078234e-07, |
|
"logps/chosen": -260.82012939453125, |
|
"logps/rejected": -410.68548583984375, |
|
"loss": 0.2196, |
|
"rewards/chosen": 1.3135395050048828, |
|
"rewards/margins": 9.747769355773926, |
|
"rewards/rejected": -8.434229850769043, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 38.7562370300293, |
|
"kl": 0.0, |
|
"learning_rate": 3.068990042674253e-07, |
|
"logps/chosen": -308.45013427734375, |
|
"logps/rejected": -457.8919982910156, |
|
"loss": 0.2002, |
|
"rewards/chosen": 1.5923526287078857, |
|
"rewards/margins": 9.828653335571289, |
|
"rewards/rejected": -8.236300468444824, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 30.01862907409668, |
|
"kl": 0.0, |
|
"learning_rate": 3.051209103840683e-07, |
|
"logps/chosen": -247.7996368408203, |
|
"logps/rejected": -413.0341796875, |
|
"loss": 0.1966, |
|
"rewards/chosen": 1.7354379892349243, |
|
"rewards/margins": 10.121583938598633, |
|
"rewards/rejected": -8.38614559173584, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 30.769304275512695, |
|
"kl": 0.0, |
|
"learning_rate": 3.033428165007112e-07, |
|
"logps/chosen": -307.517822265625, |
|
"logps/rejected": -431.41375732421875, |
|
"loss": 0.2173, |
|
"rewards/chosen": 1.3397324085235596, |
|
"rewards/margins": 9.70555305480957, |
|
"rewards/rejected": -8.36582088470459, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 35.207061767578125, |
|
"kl": 0.0, |
|
"learning_rate": 3.015647226173542e-07, |
|
"logps/chosen": -310.77899169921875, |
|
"logps/rejected": -399.6103515625, |
|
"loss": 0.2141, |
|
"rewards/chosen": 1.5721590518951416, |
|
"rewards/margins": 9.228109359741211, |
|
"rewards/rejected": -7.655949592590332, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 28.209081649780273, |
|
"kl": 0.0, |
|
"learning_rate": 2.9978662873399715e-07, |
|
"logps/chosen": -259.1542663574219, |
|
"logps/rejected": -434.557373046875, |
|
"loss": 0.2125, |
|
"rewards/chosen": 1.7099430561065674, |
|
"rewards/margins": 9.807859420776367, |
|
"rewards/rejected": -8.097915649414062, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 29.12105941772461, |
|
"kl": 0.0, |
|
"learning_rate": 2.9800853485064007e-07, |
|
"logps/chosen": -256.1063232421875, |
|
"logps/rejected": -449.45684814453125, |
|
"loss": 0.2144, |
|
"rewards/chosen": 1.423112154006958, |
|
"rewards/margins": 10.24951457977295, |
|
"rewards/rejected": -8.82640266418457, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 31.086488723754883, |
|
"kl": 0.0, |
|
"learning_rate": 2.9623044096728305e-07, |
|
"logps/chosen": -332.27093505859375, |
|
"logps/rejected": -441.17303466796875, |
|
"loss": 0.1925, |
|
"rewards/chosen": 1.8641254901885986, |
|
"rewards/margins": 10.427138328552246, |
|
"rewards/rejected": -8.563013076782227, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 30.249380111694336, |
|
"kl": 0.0, |
|
"learning_rate": 2.94452347083926e-07, |
|
"logps/chosen": -253.9059600830078, |
|
"logps/rejected": -411.62481689453125, |
|
"loss": 0.2097, |
|
"rewards/chosen": 1.827418565750122, |
|
"rewards/margins": 9.92874813079834, |
|
"rewards/rejected": -8.10132884979248, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 34.48613739013672, |
|
"kl": 0.0, |
|
"learning_rate": 2.92674253200569e-07, |
|
"logps/chosen": -287.5168151855469, |
|
"logps/rejected": -488.13800048828125, |
|
"loss": 0.1975, |
|
"rewards/chosen": 1.7286927700042725, |
|
"rewards/margins": 10.497554779052734, |
|
"rewards/rejected": -8.768861770629883, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 26.248523712158203, |
|
"kl": 0.0, |
|
"learning_rate": 2.9089615931721197e-07, |
|
"logps/chosen": -228.9366912841797, |
|
"logps/rejected": -417.3013610839844, |
|
"loss": 0.2257, |
|
"rewards/chosen": 1.364012360572815, |
|
"rewards/margins": 9.509763717651367, |
|
"rewards/rejected": -8.145750999450684, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 29.836442947387695, |
|
"kl": 0.0, |
|
"learning_rate": 2.8911806543385494e-07, |
|
"logps/chosen": -267.7503967285156, |
|
"logps/rejected": -438.37884521484375, |
|
"loss": 0.1908, |
|
"rewards/chosen": 1.686102271080017, |
|
"rewards/margins": 10.126192092895508, |
|
"rewards/rejected": -8.440089225769043, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 28.22798728942871, |
|
"kl": 0.0, |
|
"learning_rate": 2.873399715504978e-07, |
|
"logps/chosen": -268.92193603515625, |
|
"logps/rejected": -439.41021728515625, |
|
"loss": 0.1942, |
|
"rewards/chosen": 1.7055965662002563, |
|
"rewards/margins": 9.828977584838867, |
|
"rewards/rejected": -8.123380661010742, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 30.871654510498047, |
|
"kl": 0.0, |
|
"learning_rate": 2.855618776671408e-07, |
|
"logps/chosen": -277.0738220214844, |
|
"logps/rejected": -409.88104248046875, |
|
"loss": 0.2134, |
|
"rewards/chosen": 1.6514962911605835, |
|
"rewards/margins": 10.07313346862793, |
|
"rewards/rejected": -8.421636581420898, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 31.168502807617188, |
|
"kl": 0.0, |
|
"learning_rate": 2.8378378378378376e-07, |
|
"logps/chosen": -269.1263732910156, |
|
"logps/rejected": -439.88031005859375, |
|
"loss": 0.1708, |
|
"rewards/chosen": 1.8253052234649658, |
|
"rewards/margins": 10.730072021484375, |
|
"rewards/rejected": -8.904766082763672, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 29.153135299682617, |
|
"kl": 0.0, |
|
"learning_rate": 2.8200568990042673e-07, |
|
"logps/chosen": -253.00991821289062, |
|
"logps/rejected": -433.29962158203125, |
|
"loss": 0.2071, |
|
"rewards/chosen": 1.6923249959945679, |
|
"rewards/margins": 9.882917404174805, |
|
"rewards/rejected": -8.190591812133789, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 30.694440841674805, |
|
"kl": 0.0, |
|
"learning_rate": 2.802275960170697e-07, |
|
"logps/chosen": -291.7933044433594, |
|
"logps/rejected": -395.35870361328125, |
|
"loss": 0.2081, |
|
"rewards/chosen": 1.7577203512191772, |
|
"rewards/margins": 9.164716720581055, |
|
"rewards/rejected": -7.406996250152588, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 33.14602279663086, |
|
"kl": 0.0, |
|
"learning_rate": 2.784495021337127e-07, |
|
"logps/chosen": -314.156005859375, |
|
"logps/rejected": -397.37933349609375, |
|
"loss": 0.2057, |
|
"rewards/chosen": 1.6907199621200562, |
|
"rewards/margins": 9.452787399291992, |
|
"rewards/rejected": -7.7620673179626465, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 38.135986328125, |
|
"kl": 0.0, |
|
"learning_rate": 2.766714082503556e-07, |
|
"logps/chosen": -283.73907470703125, |
|
"logps/rejected": -405.26409912109375, |
|
"loss": 0.2019, |
|
"rewards/chosen": 1.7740901708602905, |
|
"rewards/margins": 10.007485389709473, |
|
"rewards/rejected": -8.23339557647705, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 33.268096923828125, |
|
"kl": 0.0, |
|
"learning_rate": 2.7489331436699857e-07, |
|
"logps/chosen": -285.5155944824219, |
|
"logps/rejected": -444.726806640625, |
|
"loss": 0.2084, |
|
"rewards/chosen": 1.6661376953125, |
|
"rewards/margins": 9.84503173828125, |
|
"rewards/rejected": -8.178893089294434, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 37.14868927001953, |
|
"kl": 0.0, |
|
"learning_rate": 2.7311522048364154e-07, |
|
"logps/chosen": -313.14813232421875, |
|
"logps/rejected": -434.33349609375, |
|
"loss": 0.216, |
|
"rewards/chosen": 1.766885757446289, |
|
"rewards/margins": 10.550158500671387, |
|
"rewards/rejected": -8.783272743225098, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 27.936447143554688, |
|
"kl": 0.0, |
|
"learning_rate": 2.7133712660028446e-07, |
|
"logps/chosen": -328.349853515625, |
|
"logps/rejected": -440.5518493652344, |
|
"loss": 0.1845, |
|
"rewards/chosen": 2.068641424179077, |
|
"rewards/margins": 10.594073295593262, |
|
"rewards/rejected": -8.525431632995605, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 34.39814376831055, |
|
"kl": 0.0, |
|
"learning_rate": 2.6955903271692744e-07, |
|
"logps/chosen": -265.58770751953125, |
|
"logps/rejected": -437.67791748046875, |
|
"loss": 0.2087, |
|
"rewards/chosen": 1.520298719406128, |
|
"rewards/margins": 10.759889602661133, |
|
"rewards/rejected": -9.239590644836426, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 34.08564758300781, |
|
"kl": 0.0, |
|
"learning_rate": 2.677809388335704e-07, |
|
"logps/chosen": -292.2442321777344, |
|
"logps/rejected": -440.2151794433594, |
|
"loss": 0.1862, |
|
"rewards/chosen": 1.5941486358642578, |
|
"rewards/margins": 10.367888450622559, |
|
"rewards/rejected": -8.7737398147583, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 28.865568161010742, |
|
"kl": 0.0, |
|
"learning_rate": 2.6600284495021333e-07, |
|
"logps/chosen": -287.80438232421875, |
|
"logps/rejected": -450.5841369628906, |
|
"loss": 0.1758, |
|
"rewards/chosen": 2.1789932250976562, |
|
"rewards/margins": 10.92069149017334, |
|
"rewards/rejected": -8.741698265075684, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 30.55199432373047, |
|
"kl": 0.0, |
|
"learning_rate": 2.642247510668563e-07, |
|
"logps/chosen": -272.82916259765625, |
|
"logps/rejected": -385.1436462402344, |
|
"loss": 0.1674, |
|
"rewards/chosen": 2.123709201812744, |
|
"rewards/margins": 10.312447547912598, |
|
"rewards/rejected": -8.188737869262695, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 29.896678924560547, |
|
"kl": 0.0, |
|
"learning_rate": 2.624466571834993e-07, |
|
"logps/chosen": -270.98382568359375, |
|
"logps/rejected": -423.6415100097656, |
|
"loss": 0.1591, |
|
"rewards/chosen": 1.9183921813964844, |
|
"rewards/margins": 10.909276008605957, |
|
"rewards/rejected": -8.990884780883789, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 36.479026794433594, |
|
"kl": 0.0, |
|
"learning_rate": 2.6066856330014225e-07, |
|
"logps/chosen": -287.6585693359375, |
|
"logps/rejected": -416.476806640625, |
|
"loss": 0.2022, |
|
"rewards/chosen": 1.8126230239868164, |
|
"rewards/margins": 10.275026321411133, |
|
"rewards/rejected": -8.462403297424316, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 29.992250442504883, |
|
"kl": 0.0, |
|
"learning_rate": 2.5889046941678523e-07, |
|
"logps/chosen": -292.3607482910156, |
|
"logps/rejected": -415.16961669921875, |
|
"loss": 0.1922, |
|
"rewards/chosen": 1.6790504455566406, |
|
"rewards/margins": 10.113423347473145, |
|
"rewards/rejected": -8.434372901916504, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 27.18990707397461, |
|
"kl": 0.0, |
|
"learning_rate": 2.5711237553342815e-07, |
|
"logps/chosen": -300.2317810058594, |
|
"logps/rejected": -464.2012634277344, |
|
"loss": 0.1886, |
|
"rewards/chosen": 1.892055869102478, |
|
"rewards/margins": 11.013105392456055, |
|
"rewards/rejected": -9.121049880981445, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 31.40631866455078, |
|
"kl": 0.0, |
|
"learning_rate": 2.5533428165007107e-07, |
|
"logps/chosen": -250.4829559326172, |
|
"logps/rejected": -420.1806640625, |
|
"loss": 0.173, |
|
"rewards/chosen": 1.9857699871063232, |
|
"rewards/margins": 10.808717727661133, |
|
"rewards/rejected": -8.82294750213623, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 33.97587203979492, |
|
"kl": 0.0, |
|
"learning_rate": 2.5355618776671404e-07, |
|
"logps/chosen": -272.48077392578125, |
|
"logps/rejected": -404.07952880859375, |
|
"loss": 0.2001, |
|
"rewards/chosen": 1.8833481073379517, |
|
"rewards/margins": 9.897645950317383, |
|
"rewards/rejected": -8.014297485351562, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 25.89413833618164, |
|
"kl": 0.0, |
|
"learning_rate": 2.51778093883357e-07, |
|
"logps/chosen": -262.54461669921875, |
|
"logps/rejected": -457.70391845703125, |
|
"loss": 0.1959, |
|
"rewards/chosen": 1.7342389822006226, |
|
"rewards/margins": 11.080788612365723, |
|
"rewards/rejected": -9.346549987792969, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 26.989194869995117, |
|
"kl": 0.0, |
|
"learning_rate": 2.5e-07, |
|
"logps/chosen": -281.6241455078125, |
|
"logps/rejected": -437.0460510253906, |
|
"loss": 0.1866, |
|
"rewards/chosen": 2.0029733180999756, |
|
"rewards/margins": 10.16287899017334, |
|
"rewards/rejected": -8.159906387329102, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 25.836423873901367, |
|
"kl": 0.0, |
|
"learning_rate": 2.4822190611664296e-07, |
|
"logps/chosen": -244.0792999267578, |
|
"logps/rejected": -401.6324768066406, |
|
"loss": 0.1968, |
|
"rewards/chosen": 2.024254322052002, |
|
"rewards/margins": 9.46214771270752, |
|
"rewards/rejected": -7.437892913818359, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 33.39433670043945, |
|
"kl": 0.0, |
|
"learning_rate": 2.4644381223328594e-07, |
|
"logps/chosen": -309.8813781738281, |
|
"logps/rejected": -404.27130126953125, |
|
"loss": 0.186, |
|
"rewards/chosen": 2.3213727474212646, |
|
"rewards/margins": 10.310930252075195, |
|
"rewards/rejected": -7.98955774307251, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 30.139936447143555, |
|
"kl": 0.0, |
|
"learning_rate": 2.4466571834992886e-07, |
|
"logps/chosen": -258.7943420410156, |
|
"logps/rejected": -407.911865234375, |
|
"loss": 0.1945, |
|
"rewards/chosen": 1.719043493270874, |
|
"rewards/margins": 9.795989990234375, |
|
"rewards/rejected": -8.076947212219238, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 28.36025047302246, |
|
"kl": 0.0, |
|
"learning_rate": 2.4288762446657183e-07, |
|
"logps/chosen": -289.2504577636719, |
|
"logps/rejected": -441.6619567871094, |
|
"loss": 0.2112, |
|
"rewards/chosen": 1.8992469310760498, |
|
"rewards/margins": 10.501276969909668, |
|
"rewards/rejected": -8.602029800415039, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 32.14490509033203, |
|
"kl": 0.0, |
|
"learning_rate": 2.411095305832148e-07, |
|
"logps/chosen": -261.7134704589844, |
|
"logps/rejected": -387.33746337890625, |
|
"loss": 0.2077, |
|
"rewards/chosen": 1.7473371028900146, |
|
"rewards/margins": 10.049051284790039, |
|
"rewards/rejected": -8.301712989807129, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 26.520404815673828, |
|
"kl": 0.0, |
|
"learning_rate": 2.393314366998578e-07, |
|
"logps/chosen": -289.41156005859375, |
|
"logps/rejected": -428.0794982910156, |
|
"loss": 0.1848, |
|
"rewards/chosen": 1.9077268838882446, |
|
"rewards/margins": 10.590463638305664, |
|
"rewards/rejected": -8.68273639678955, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 33.48114776611328, |
|
"kl": 0.0, |
|
"learning_rate": 2.375533428165007e-07, |
|
"logps/chosen": -222.7843780517578, |
|
"logps/rejected": -410.1954040527344, |
|
"loss": 0.2028, |
|
"rewards/chosen": 1.7176685333251953, |
|
"rewards/margins": 10.505666732788086, |
|
"rewards/rejected": -8.787999153137207, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 27.82444190979004, |
|
"kl": 0.0, |
|
"learning_rate": 2.3577524893314365e-07, |
|
"logps/chosen": -287.64862060546875, |
|
"logps/rejected": -468.0263671875, |
|
"loss": 0.1755, |
|
"rewards/chosen": 1.7359222173690796, |
|
"rewards/margins": 10.884846687316895, |
|
"rewards/rejected": -9.14892578125, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 25.083499908447266, |
|
"kl": 0.0, |
|
"learning_rate": 2.3399715504978662e-07, |
|
"logps/chosen": -240.41531372070312, |
|
"logps/rejected": -419.208740234375, |
|
"loss": 0.1942, |
|
"rewards/chosen": 1.688421607017517, |
|
"rewards/margins": 10.170954704284668, |
|
"rewards/rejected": -8.482534408569336, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 30.813711166381836, |
|
"kl": 0.0, |
|
"learning_rate": 2.322190611664296e-07, |
|
"logps/chosen": -289.48516845703125, |
|
"logps/rejected": -434.6170349121094, |
|
"loss": 0.2155, |
|
"rewards/chosen": 1.709018349647522, |
|
"rewards/margins": 10.145505905151367, |
|
"rewards/rejected": -8.436488151550293, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 32.20737075805664, |
|
"kl": 0.0, |
|
"learning_rate": 2.304409672830725e-07, |
|
"logps/chosen": -250.640380859375, |
|
"logps/rejected": -414.97930908203125, |
|
"loss": 0.2066, |
|
"rewards/chosen": 1.4742424488067627, |
|
"rewards/margins": 9.822726249694824, |
|
"rewards/rejected": -8.348483085632324, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 36.69318389892578, |
|
"kl": 0.0, |
|
"learning_rate": 2.2866287339971549e-07, |
|
"logps/chosen": -299.4349060058594, |
|
"logps/rejected": -427.6712341308594, |
|
"loss": 0.1928, |
|
"rewards/chosen": 1.8161312341690063, |
|
"rewards/margins": 10.13882827758789, |
|
"rewards/rejected": -8.322697639465332, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 32.18443298339844, |
|
"kl": 0.0, |
|
"learning_rate": 2.2688477951635846e-07, |
|
"logps/chosen": -243.90664672851562, |
|
"logps/rejected": -439.81182861328125, |
|
"loss": 0.193, |
|
"rewards/chosen": 1.9404337406158447, |
|
"rewards/margins": 10.789703369140625, |
|
"rewards/rejected": -8.84926986694336, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 27.605424880981445, |
|
"kl": 0.0, |
|
"learning_rate": 2.251066856330014e-07, |
|
"logps/chosen": -309.91455078125, |
|
"logps/rejected": -394.070068359375, |
|
"loss": 0.169, |
|
"rewards/chosen": 1.9091155529022217, |
|
"rewards/margins": 10.408263206481934, |
|
"rewards/rejected": -8.499147415161133, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 29.7116756439209, |
|
"kl": 0.0, |
|
"learning_rate": 2.2332859174964438e-07, |
|
"logps/chosen": -249.909423828125, |
|
"logps/rejected": -431.39007568359375, |
|
"loss": 0.2046, |
|
"rewards/chosen": 1.6568987369537354, |
|
"rewards/margins": 10.274327278137207, |
|
"rewards/rejected": -8.617426872253418, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 22.485855102539062, |
|
"kl": 0.0, |
|
"learning_rate": 2.2155049786628733e-07, |
|
"logps/chosen": -263.9428405761719, |
|
"logps/rejected": -435.47802734375, |
|
"loss": 0.1713, |
|
"rewards/chosen": 1.8609449863433838, |
|
"rewards/margins": 11.174790382385254, |
|
"rewards/rejected": -9.31384563446045, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 29.809690475463867, |
|
"kl": 0.0, |
|
"learning_rate": 2.1977240398293027e-07, |
|
"logps/chosen": -252.637451171875, |
|
"logps/rejected": -403.4510803222656, |
|
"loss": 0.1744, |
|
"rewards/chosen": 1.9656826257705688, |
|
"rewards/margins": 10.494858741760254, |
|
"rewards/rejected": -8.529176712036133, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 26.905038833618164, |
|
"kl": 0.0, |
|
"learning_rate": 2.1799431009957325e-07, |
|
"logps/chosen": -269.87066650390625, |
|
"logps/rejected": -427.05584716796875, |
|
"loss": 0.1693, |
|
"rewards/chosen": 2.0619406700134277, |
|
"rewards/margins": 11.020936012268066, |
|
"rewards/rejected": -8.95899486541748, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 29.09079933166504, |
|
"kl": 0.0, |
|
"learning_rate": 2.1621621621621622e-07, |
|
"logps/chosen": -288.0301208496094, |
|
"logps/rejected": -480.09796142578125, |
|
"loss": 0.2055, |
|
"rewards/chosen": 1.6792428493499756, |
|
"rewards/margins": 10.952609062194824, |
|
"rewards/rejected": -9.273366928100586, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 28.329050064086914, |
|
"kl": 0.0, |
|
"learning_rate": 2.1443812233285914e-07, |
|
"logps/chosen": -249.3640594482422, |
|
"logps/rejected": -464.89483642578125, |
|
"loss": 0.1886, |
|
"rewards/chosen": 2.1078505516052246, |
|
"rewards/margins": 11.554121017456055, |
|
"rewards/rejected": -9.446270942687988, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 27.633623123168945, |
|
"kl": 0.0, |
|
"learning_rate": 2.1266002844950212e-07, |
|
"logps/chosen": -282.1102294921875, |
|
"logps/rejected": -424.1873474121094, |
|
"loss": 0.1832, |
|
"rewards/chosen": 1.9060941934585571, |
|
"rewards/margins": 10.455194473266602, |
|
"rewards/rejected": -8.549099922180176, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 28.65962028503418, |
|
"kl": 0.0, |
|
"learning_rate": 2.108819345661451e-07, |
|
"logps/chosen": -232.1042938232422, |
|
"logps/rejected": -440.0047912597656, |
|
"loss": 0.1883, |
|
"rewards/chosen": 1.5378611087799072, |
|
"rewards/margins": 10.000123023986816, |
|
"rewards/rejected": -8.462263107299805, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 31.88002586364746, |
|
"kl": 0.0, |
|
"learning_rate": 2.0910384068278806e-07, |
|
"logps/chosen": -277.8791198730469, |
|
"logps/rejected": -397.03375244140625, |
|
"loss": 0.1909, |
|
"rewards/chosen": 1.89794921875, |
|
"rewards/margins": 11.083429336547852, |
|
"rewards/rejected": -9.185480117797852, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 25.281034469604492, |
|
"kl": 0.0, |
|
"learning_rate": 2.0732574679943098e-07, |
|
"logps/chosen": -237.05593872070312, |
|
"logps/rejected": -407.40594482421875, |
|
"loss": 0.1685, |
|
"rewards/chosen": 2.0257680416107178, |
|
"rewards/margins": 10.262957572937012, |
|
"rewards/rejected": -8.237189292907715, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 29.927499771118164, |
|
"kl": 0.0, |
|
"learning_rate": 2.0554765291607396e-07, |
|
"logps/chosen": -271.0245056152344, |
|
"logps/rejected": -412.03094482421875, |
|
"loss": 0.1945, |
|
"rewards/chosen": 1.5757964849472046, |
|
"rewards/margins": 9.921743392944336, |
|
"rewards/rejected": -8.345946311950684, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 31.359935760498047, |
|
"kl": 0.0, |
|
"learning_rate": 2.0376955903271693e-07, |
|
"logps/chosen": -267.9440002441406, |
|
"logps/rejected": -442.4165954589844, |
|
"loss": 0.1952, |
|
"rewards/chosen": 1.9685808420181274, |
|
"rewards/margins": 10.528787612915039, |
|
"rewards/rejected": -8.560208320617676, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 28.901763916015625, |
|
"kl": 0.0, |
|
"learning_rate": 2.0199146514935988e-07, |
|
"logps/chosen": -268.46533203125, |
|
"logps/rejected": -420.45538330078125, |
|
"loss": 0.1856, |
|
"rewards/chosen": 1.9667234420776367, |
|
"rewards/margins": 10.6937255859375, |
|
"rewards/rejected": -8.727002143859863, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 25.9229736328125, |
|
"kl": 0.0, |
|
"learning_rate": 2.0021337126600283e-07, |
|
"logps/chosen": -287.6095886230469, |
|
"logps/rejected": -430.07647705078125, |
|
"loss": 0.1474, |
|
"rewards/chosen": 2.176244020462036, |
|
"rewards/margins": 11.091053009033203, |
|
"rewards/rejected": -8.91480827331543, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 23.820232391357422, |
|
"kl": 0.0, |
|
"learning_rate": 1.984352773826458e-07, |
|
"logps/chosen": -239.20166015625, |
|
"logps/rejected": -456.2108459472656, |
|
"loss": 0.1644, |
|
"rewards/chosen": 1.9704856872558594, |
|
"rewards/margins": 11.435120582580566, |
|
"rewards/rejected": -9.464634895324707, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 26.05778694152832, |
|
"kl": 0.0, |
|
"learning_rate": 1.9665718349928875e-07, |
|
"logps/chosen": -215.20626831054688, |
|
"logps/rejected": -431.2613220214844, |
|
"loss": 0.1632, |
|
"rewards/chosen": 1.852900505065918, |
|
"rewards/margins": 11.106436729431152, |
|
"rewards/rejected": -9.25353717803955, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 29.80303382873535, |
|
"kl": 0.0, |
|
"learning_rate": 1.9487908961593172e-07, |
|
"logps/chosen": -298.92431640625, |
|
"logps/rejected": -413.9326171875, |
|
"loss": 0.1737, |
|
"rewards/chosen": 2.1202621459960938, |
|
"rewards/margins": 11.231690406799316, |
|
"rewards/rejected": -9.111427307128906, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 27.96302032470703, |
|
"kl": 0.0, |
|
"learning_rate": 1.931009957325747e-07, |
|
"logps/chosen": -256.3444519042969, |
|
"logps/rejected": -434.3214416503906, |
|
"loss": 0.1876, |
|
"rewards/chosen": 1.7924187183380127, |
|
"rewards/margins": 10.647282600402832, |
|
"rewards/rejected": -8.854864120483398, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 20.548818588256836, |
|
"kl": 0.0, |
|
"learning_rate": 1.9132290184921761e-07, |
|
"logps/chosen": -277.95831298828125, |
|
"logps/rejected": -389.59893798828125, |
|
"loss": 0.1743, |
|
"rewards/chosen": 1.9753564596176147, |
|
"rewards/margins": 9.79386043548584, |
|
"rewards/rejected": -7.81850528717041, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 31.688438415527344, |
|
"kl": 0.0, |
|
"learning_rate": 1.895448079658606e-07, |
|
"logps/chosen": -237.60061645507812, |
|
"logps/rejected": -409.19232177734375, |
|
"loss": 0.1659, |
|
"rewards/chosen": 2.4839866161346436, |
|
"rewards/margins": 11.480685234069824, |
|
"rewards/rejected": -8.996698379516602, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 21.887004852294922, |
|
"kl": 0.0, |
|
"learning_rate": 1.8776671408250356e-07, |
|
"logps/chosen": -292.5876770019531, |
|
"logps/rejected": -428.93377685546875, |
|
"loss": 0.1665, |
|
"rewards/chosen": 2.078939199447632, |
|
"rewards/margins": 11.130914688110352, |
|
"rewards/rejected": -9.051974296569824, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 26.495267868041992, |
|
"kl": 0.0, |
|
"learning_rate": 1.859886201991465e-07, |
|
"logps/chosen": -267.95135498046875, |
|
"logps/rejected": -391.75408935546875, |
|
"loss": 0.1798, |
|
"rewards/chosen": 1.9513813257217407, |
|
"rewards/margins": 9.968805313110352, |
|
"rewards/rejected": -8.017423629760742, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 29.68768310546875, |
|
"kl": 0.0, |
|
"learning_rate": 1.8421052631578946e-07, |
|
"logps/chosen": -286.04925537109375, |
|
"logps/rejected": -390.62457275390625, |
|
"loss": 0.1773, |
|
"rewards/chosen": 2.327118158340454, |
|
"rewards/margins": 10.014382362365723, |
|
"rewards/rejected": -7.687264442443848, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 22.868907928466797, |
|
"kl": 0.0, |
|
"learning_rate": 1.8243243243243243e-07, |
|
"logps/chosen": -257.67413330078125, |
|
"logps/rejected": -435.93560791015625, |
|
"loss": 0.174, |
|
"rewards/chosen": 1.741437315940857, |
|
"rewards/margins": 10.20213508605957, |
|
"rewards/rejected": -8.46069622039795, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 26.902700424194336, |
|
"kl": 0.0, |
|
"learning_rate": 1.8065433854907538e-07, |
|
"logps/chosen": -243.84512329101562, |
|
"logps/rejected": -452.46405029296875, |
|
"loss": 0.1457, |
|
"rewards/chosen": 1.995959997177124, |
|
"rewards/margins": 11.115839004516602, |
|
"rewards/rejected": -9.119877815246582, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 30.123769760131836, |
|
"kl": 0.0, |
|
"learning_rate": 1.7887624466571835e-07, |
|
"logps/chosen": -273.56011962890625, |
|
"logps/rejected": -375.4561462402344, |
|
"loss": 0.1599, |
|
"rewards/chosen": 2.687070608139038, |
|
"rewards/margins": 10.254888534545898, |
|
"rewards/rejected": -7.567817687988281, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 33.86235809326172, |
|
"kl": 0.0, |
|
"learning_rate": 1.770981507823613e-07, |
|
"logps/chosen": -295.79144287109375, |
|
"logps/rejected": -406.8284606933594, |
|
"loss": 0.1904, |
|
"rewards/chosen": 1.9145209789276123, |
|
"rewards/margins": 10.37600326538086, |
|
"rewards/rejected": -8.461481094360352, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 34.696876525878906, |
|
"kl": 0.0, |
|
"learning_rate": 1.7532005689900424e-07, |
|
"logps/chosen": -278.31610107421875, |
|
"logps/rejected": -454.6402893066406, |
|
"loss": 0.1903, |
|
"rewards/chosen": 1.857825517654419, |
|
"rewards/margins": 10.412918090820312, |
|
"rewards/rejected": -8.555091857910156, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 28.04399871826172, |
|
"kl": 0.0, |
|
"learning_rate": 1.7354196301564722e-07, |
|
"logps/chosen": -279.0931091308594, |
|
"logps/rejected": -444.87322998046875, |
|
"loss": 0.174, |
|
"rewards/chosen": 1.844491720199585, |
|
"rewards/margins": 10.60900592803955, |
|
"rewards/rejected": -8.764513969421387, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 28.754440307617188, |
|
"kl": 0.0, |
|
"learning_rate": 1.717638691322902e-07, |
|
"logps/chosen": -277.6640625, |
|
"logps/rejected": -444.380615234375, |
|
"loss": 0.1684, |
|
"rewards/chosen": 1.9001327753067017, |
|
"rewards/margins": 10.396875381469727, |
|
"rewards/rejected": -8.496744155883789, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 23.833982467651367, |
|
"kl": 0.0, |
|
"learning_rate": 1.6998577524893314e-07, |
|
"logps/chosen": -271.98101806640625, |
|
"logps/rejected": -418.50384521484375, |
|
"loss": 0.176, |
|
"rewards/chosen": 1.8560775518417358, |
|
"rewards/margins": 9.872191429138184, |
|
"rewards/rejected": -8.016115188598633, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 24.5834903717041, |
|
"kl": 0.0, |
|
"learning_rate": 1.6820768136557609e-07, |
|
"logps/chosen": -253.29019165039062, |
|
"logps/rejected": -451.9681091308594, |
|
"loss": 0.1764, |
|
"rewards/chosen": 2.107499361038208, |
|
"rewards/margins": 10.960962295532227, |
|
"rewards/rejected": -8.853462219238281, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 25.093826293945312, |
|
"kl": 0.0, |
|
"learning_rate": 1.6642958748221906e-07, |
|
"logps/chosen": -296.2508239746094, |
|
"logps/rejected": -452.71624755859375, |
|
"loss": 0.1589, |
|
"rewards/chosen": 2.3644440174102783, |
|
"rewards/margins": 11.254218101501465, |
|
"rewards/rejected": -8.889775276184082, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 30.393264770507812, |
|
"kl": 0.0, |
|
"learning_rate": 1.64651493598862e-07, |
|
"logps/chosen": -244.84036254882812, |
|
"logps/rejected": -413.4102478027344, |
|
"loss": 0.1689, |
|
"rewards/chosen": 2.0877583026885986, |
|
"rewards/margins": 10.486618041992188, |
|
"rewards/rejected": -8.398859977722168, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 26.028972625732422, |
|
"kl": 0.0, |
|
"learning_rate": 1.6287339971550498e-07, |
|
"logps/chosen": -308.08587646484375, |
|
"logps/rejected": -427.04632568359375, |
|
"loss": 0.1642, |
|
"rewards/chosen": 2.250349760055542, |
|
"rewards/margins": 11.379538536071777, |
|
"rewards/rejected": -9.129188537597656, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 26.561185836791992, |
|
"kl": 0.0, |
|
"learning_rate": 1.6109530583214793e-07, |
|
"logps/chosen": -264.28485107421875, |
|
"logps/rejected": -427.21533203125, |
|
"loss": 0.1738, |
|
"rewards/chosen": 2.1204895973205566, |
|
"rewards/margins": 11.067842483520508, |
|
"rewards/rejected": -8.94735336303711, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 27.91498374938965, |
|
"kl": 0.0, |
|
"learning_rate": 1.5931721194879087e-07, |
|
"logps/chosen": -267.13275146484375, |
|
"logps/rejected": -440.71209716796875, |
|
"loss": 0.1777, |
|
"rewards/chosen": 1.9779777526855469, |
|
"rewards/margins": 10.832992553710938, |
|
"rewards/rejected": -8.85501480102539, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 22.371728897094727, |
|
"kl": 0.0, |
|
"learning_rate": 1.5753911806543385e-07, |
|
"logps/chosen": -253.9912109375, |
|
"logps/rejected": -447.35308837890625, |
|
"loss": 0.1532, |
|
"rewards/chosen": 2.4193108081817627, |
|
"rewards/margins": 11.426763534545898, |
|
"rewards/rejected": -9.007452964782715, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 32.35457229614258, |
|
"kl": 0.0, |
|
"learning_rate": 1.5576102418207682e-07, |
|
"logps/chosen": -263.775634765625, |
|
"logps/rejected": -407.1927795410156, |
|
"loss": 0.1735, |
|
"rewards/chosen": 2.165008068084717, |
|
"rewards/margins": 10.231494903564453, |
|
"rewards/rejected": -8.066487312316895, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 30.724489212036133, |
|
"kl": 0.0, |
|
"learning_rate": 1.5398293029871974e-07, |
|
"logps/chosen": -289.01605224609375, |
|
"logps/rejected": -425.20428466796875, |
|
"loss": 0.1649, |
|
"rewards/chosen": 2.4617397785186768, |
|
"rewards/margins": 11.305328369140625, |
|
"rewards/rejected": -8.843587875366211, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 23.74848747253418, |
|
"kl": 0.0, |
|
"learning_rate": 1.5220483641536272e-07, |
|
"logps/chosen": -260.9978942871094, |
|
"logps/rejected": -437.7108459472656, |
|
"loss": 0.1759, |
|
"rewards/chosen": 1.955735445022583, |
|
"rewards/margins": 10.639429092407227, |
|
"rewards/rejected": -8.683694839477539, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 30.680461883544922, |
|
"kl": 0.0, |
|
"learning_rate": 1.504267425320057e-07, |
|
"logps/chosen": -262.09991455078125, |
|
"logps/rejected": -451.2193908691406, |
|
"loss": 0.1629, |
|
"rewards/chosen": 1.9774525165557861, |
|
"rewards/margins": 11.371380805969238, |
|
"rewards/rejected": -9.393927574157715, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 28.192245483398438, |
|
"kl": 0.0, |
|
"learning_rate": 1.4864864864864866e-07, |
|
"logps/chosen": -264.33843994140625, |
|
"logps/rejected": -408.20050048828125, |
|
"loss": 0.2076, |
|
"rewards/chosen": 1.9607311487197876, |
|
"rewards/margins": 9.993951797485352, |
|
"rewards/rejected": -8.033220291137695, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 26.542776107788086, |
|
"kl": 0.0, |
|
"learning_rate": 1.4687055476529158e-07, |
|
"logps/chosen": -248.61154174804688, |
|
"logps/rejected": -396.5494079589844, |
|
"loss": 0.1501, |
|
"rewards/chosen": 2.4335556030273438, |
|
"rewards/margins": 11.315145492553711, |
|
"rewards/rejected": -8.88158893585205, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 26.956071853637695, |
|
"kl": 0.0, |
|
"learning_rate": 1.4509246088193456e-07, |
|
"logps/chosen": -240.13662719726562, |
|
"logps/rejected": -425.4685974121094, |
|
"loss": 0.17, |
|
"rewards/chosen": 2.171187162399292, |
|
"rewards/margins": 11.282613754272461, |
|
"rewards/rejected": -9.111427307128906, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 33.09626388549805, |
|
"kl": 0.0, |
|
"learning_rate": 1.4331436699857753e-07, |
|
"logps/chosen": -256.2728271484375, |
|
"logps/rejected": -413.5420837402344, |
|
"loss": 0.1812, |
|
"rewards/chosen": 1.7524036169052124, |
|
"rewards/margins": 10.1653470993042, |
|
"rewards/rejected": -8.412943840026855, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 22.70789909362793, |
|
"kl": 0.0, |
|
"learning_rate": 1.4153627311522048e-07, |
|
"logps/chosen": -270.51812744140625, |
|
"logps/rejected": -399.8717956542969, |
|
"loss": 0.1735, |
|
"rewards/chosen": 2.118551015853882, |
|
"rewards/margins": 10.402674674987793, |
|
"rewards/rejected": -8.284123420715332, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 21.700260162353516, |
|
"kl": 0.0, |
|
"learning_rate": 1.3975817923186345e-07, |
|
"logps/chosen": -270.01190185546875, |
|
"logps/rejected": -435.75439453125, |
|
"loss": 0.1542, |
|
"rewards/chosen": 2.2069079875946045, |
|
"rewards/margins": 11.315336227416992, |
|
"rewards/rejected": -9.108428955078125, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 29.502779006958008, |
|
"kl": 0.0, |
|
"learning_rate": 1.379800853485064e-07, |
|
"logps/chosen": -308.8605041503906, |
|
"logps/rejected": -433.614013671875, |
|
"loss": 0.167, |
|
"rewards/chosen": 1.9083741903305054, |
|
"rewards/margins": 11.036389350891113, |
|
"rewards/rejected": -9.12801456451416, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 25.997594833374023, |
|
"kl": 0.0, |
|
"learning_rate": 1.3620199146514935e-07, |
|
"logps/chosen": -294.05596923828125, |
|
"logps/rejected": -458.2794494628906, |
|
"loss": 0.1613, |
|
"rewards/chosen": 2.1892623901367188, |
|
"rewards/margins": 12.30145263671875, |
|
"rewards/rejected": -10.112190246582031, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 28.619224548339844, |
|
"kl": 0.0, |
|
"learning_rate": 1.3442389758179232e-07, |
|
"logps/chosen": -271.43994140625, |
|
"logps/rejected": -424.03033447265625, |
|
"loss": 0.19, |
|
"rewards/chosen": 1.8617703914642334, |
|
"rewards/margins": 10.569725036621094, |
|
"rewards/rejected": -8.707954406738281, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 35.789634704589844, |
|
"kl": 0.0, |
|
"learning_rate": 1.326458036984353e-07, |
|
"logps/chosen": -239.49771118164062, |
|
"logps/rejected": -422.3915100097656, |
|
"loss": 0.2053, |
|
"rewards/chosen": 1.7723499536514282, |
|
"rewards/margins": 10.303890228271484, |
|
"rewards/rejected": -8.531539916992188, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 28.460819244384766, |
|
"kl": 0.0, |
|
"learning_rate": 1.3086770981507821e-07, |
|
"logps/chosen": -275.1038818359375, |
|
"logps/rejected": -407.48260498046875, |
|
"loss": 0.1854, |
|
"rewards/chosen": 2.025543689727783, |
|
"rewards/margins": 10.616659164428711, |
|
"rewards/rejected": -8.591115951538086, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 32.08914566040039, |
|
"kl": 0.0, |
|
"learning_rate": 1.290896159317212e-07, |
|
"logps/chosen": -220.82662963867188, |
|
"logps/rejected": -450.47454833984375, |
|
"loss": 0.1691, |
|
"rewards/chosen": 1.8781731128692627, |
|
"rewards/margins": 11.196914672851562, |
|
"rewards/rejected": -9.318740844726562, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 24.067890167236328, |
|
"kl": 0.0, |
|
"learning_rate": 1.2731152204836416e-07, |
|
"logps/chosen": -248.81246948242188, |
|
"logps/rejected": -438.7205505371094, |
|
"loss": 0.1745, |
|
"rewards/chosen": 2.0623905658721924, |
|
"rewards/margins": 11.863210678100586, |
|
"rewards/rejected": -9.800821304321289, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 19.57704734802246, |
|
"kl": 0.0, |
|
"learning_rate": 1.255334281650071e-07, |
|
"logps/chosen": -251.44198608398438, |
|
"logps/rejected": -444.56219482421875, |
|
"loss": 0.1694, |
|
"rewards/chosen": 2.024569272994995, |
|
"rewards/margins": 11.268827438354492, |
|
"rewards/rejected": -9.244257926940918, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 26.092870712280273, |
|
"kl": 0.0, |
|
"learning_rate": 1.2375533428165005e-07, |
|
"logps/chosen": -252.7013702392578, |
|
"logps/rejected": -421.412841796875, |
|
"loss": 0.1779, |
|
"rewards/chosen": 2.1870219707489014, |
|
"rewards/margins": 10.504962921142578, |
|
"rewards/rejected": -8.317939758300781, |
|
"step": 1215 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 27.678754806518555, |
|
"kl": 0.0, |
|
"learning_rate": 1.2197724039829303e-07, |
|
"logps/chosen": -272.28997802734375, |
|
"logps/rejected": -456.04644775390625, |
|
"loss": 0.1578, |
|
"rewards/chosen": 2.1497280597686768, |
|
"rewards/margins": 11.176363945007324, |
|
"rewards/rejected": -9.026636123657227, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 29.93063735961914, |
|
"kl": 0.0, |
|
"learning_rate": 1.2019914651493598e-07, |
|
"logps/chosen": -297.31317138671875, |
|
"logps/rejected": -434.39874267578125, |
|
"loss": 0.1417, |
|
"rewards/chosen": 2.197824001312256, |
|
"rewards/margins": 11.505438804626465, |
|
"rewards/rejected": -9.30761432647705, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 31.17092514038086, |
|
"kl": 0.0, |
|
"learning_rate": 1.1842105263157894e-07, |
|
"logps/chosen": -284.43408203125, |
|
"logps/rejected": -430.77850341796875, |
|
"loss": 0.1479, |
|
"rewards/chosen": 2.323302745819092, |
|
"rewards/margins": 11.516798973083496, |
|
"rewards/rejected": -9.193496704101562, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 21.476613998413086, |
|
"kl": 0.0, |
|
"learning_rate": 1.166429587482219e-07, |
|
"logps/chosen": -237.2047576904297, |
|
"logps/rejected": -418.5458984375, |
|
"loss": 0.1591, |
|
"rewards/chosen": 2.236788034439087, |
|
"rewards/margins": 10.972362518310547, |
|
"rewards/rejected": -8.735574722290039, |
|
"step": 1235 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 27.719011306762695, |
|
"kl": 0.0, |
|
"learning_rate": 1.1486486486486487e-07, |
|
"logps/chosen": -267.37603759765625, |
|
"logps/rejected": -426.73822021484375, |
|
"loss": 0.1792, |
|
"rewards/chosen": 2.1171622276306152, |
|
"rewards/margins": 11.552632331848145, |
|
"rewards/rejected": -9.435468673706055, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 22.965457916259766, |
|
"kl": 0.0, |
|
"learning_rate": 1.1308677098150782e-07, |
|
"logps/chosen": -294.0093688964844, |
|
"logps/rejected": -444.3059997558594, |
|
"loss": 0.1481, |
|
"rewards/chosen": 2.485621929168701, |
|
"rewards/margins": 11.076322555541992, |
|
"rewards/rejected": -8.59070110321045, |
|
"step": 1245 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 33.64476013183594, |
|
"kl": 0.0, |
|
"learning_rate": 1.1130867709815078e-07, |
|
"logps/chosen": -279.5322265625, |
|
"logps/rejected": -423.5594787597656, |
|
"loss": 0.1573, |
|
"rewards/chosen": 2.5543296337127686, |
|
"rewards/margins": 11.313433647155762, |
|
"rewards/rejected": -8.759103775024414, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 31.360397338867188, |
|
"kl": 0.0, |
|
"learning_rate": 1.0953058321479374e-07, |
|
"logps/chosen": -257.98211669921875, |
|
"logps/rejected": -386.41375732421875, |
|
"loss": 0.1399, |
|
"rewards/chosen": 2.3777503967285156, |
|
"rewards/margins": 10.759562492370605, |
|
"rewards/rejected": -8.38181209564209, |
|
"step": 1255 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 23.901107788085938, |
|
"kl": 0.0, |
|
"learning_rate": 1.077524893314367e-07, |
|
"logps/chosen": -222.93588256835938, |
|
"logps/rejected": -408.0344543457031, |
|
"loss": 0.1802, |
|
"rewards/chosen": 1.903738021850586, |
|
"rewards/margins": 10.58789348602295, |
|
"rewards/rejected": -8.684155464172363, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 30.361534118652344, |
|
"kl": 0.0, |
|
"learning_rate": 1.0597439544807964e-07, |
|
"logps/chosen": -246.6664276123047, |
|
"logps/rejected": -412.19891357421875, |
|
"loss": 0.1639, |
|
"rewards/chosen": 2.1586594581604004, |
|
"rewards/margins": 11.214345932006836, |
|
"rewards/rejected": -9.055686950683594, |
|
"step": 1265 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 28.034950256347656, |
|
"kl": 0.0, |
|
"learning_rate": 1.0419630156472262e-07, |
|
"logps/chosen": -343.211669921875, |
|
"logps/rejected": -472.8130798339844, |
|
"loss": 0.1302, |
|
"rewards/chosen": 2.3979649543762207, |
|
"rewards/margins": 12.091375350952148, |
|
"rewards/rejected": -9.693410873413086, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 32.206668853759766, |
|
"kl": 0.0, |
|
"learning_rate": 1.0241820768136557e-07, |
|
"logps/chosen": -293.3280334472656, |
|
"logps/rejected": -412.0704040527344, |
|
"loss": 0.1618, |
|
"rewards/chosen": 2.4007017612457275, |
|
"rewards/margins": 11.050657272338867, |
|
"rewards/rejected": -8.649954795837402, |
|
"step": 1275 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 26.69512367248535, |
|
"kl": 0.0, |
|
"learning_rate": 1.0064011379800854e-07, |
|
"logps/chosen": -264.37652587890625, |
|
"logps/rejected": -443.7510681152344, |
|
"loss": 0.144, |
|
"rewards/chosen": 2.442653179168701, |
|
"rewards/margins": 11.348746299743652, |
|
"rewards/rejected": -8.90609359741211, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 17.890409469604492, |
|
"kl": 0.0, |
|
"learning_rate": 9.886201991465149e-08, |
|
"logps/chosen": -276.50457763671875, |
|
"logps/rejected": -464.80499267578125, |
|
"loss": 0.1395, |
|
"rewards/chosen": 2.2761783599853516, |
|
"rewards/margins": 11.834190368652344, |
|
"rewards/rejected": -9.558012008666992, |
|
"step": 1285 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 25.79632568359375, |
|
"kl": 0.0, |
|
"learning_rate": 9.708392603129445e-08, |
|
"logps/chosen": -278.9715881347656, |
|
"logps/rejected": -442.2305603027344, |
|
"loss": 0.1614, |
|
"rewards/chosen": 2.176389217376709, |
|
"rewards/margins": 11.664901733398438, |
|
"rewards/rejected": -9.488512992858887, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 29.441097259521484, |
|
"kl": 0.0, |
|
"learning_rate": 9.530583214793741e-08, |
|
"logps/chosen": -276.7694396972656, |
|
"logps/rejected": -402.5328063964844, |
|
"loss": 0.155, |
|
"rewards/chosen": 2.361067533493042, |
|
"rewards/margins": 10.893728256225586, |
|
"rewards/rejected": -8.532661437988281, |
|
"step": 1295 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 24.844789505004883, |
|
"kl": 0.0, |
|
"learning_rate": 9.352773826458037e-08, |
|
"logps/chosen": -289.9748229980469, |
|
"logps/rejected": -440.4497985839844, |
|
"loss": 0.1708, |
|
"rewards/chosen": 2.353919506072998, |
|
"rewards/margins": 11.433218002319336, |
|
"rewards/rejected": -9.079299926757812, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 28.899717330932617, |
|
"kl": 0.0, |
|
"learning_rate": 9.174964438122331e-08, |
|
"logps/chosen": -312.39605712890625, |
|
"logps/rejected": -427.78570556640625, |
|
"loss": 0.1639, |
|
"rewards/chosen": 2.488450527191162, |
|
"rewards/margins": 11.697278022766113, |
|
"rewards/rejected": -9.208827018737793, |
|
"step": 1305 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 25.086254119873047, |
|
"kl": 0.0, |
|
"learning_rate": 8.997155049786629e-08, |
|
"logps/chosen": -273.1850280761719, |
|
"logps/rejected": -413.96282958984375, |
|
"loss": 0.1535, |
|
"rewards/chosen": 2.2840752601623535, |
|
"rewards/margins": 11.001981735229492, |
|
"rewards/rejected": -8.71790599822998, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 24.174297332763672, |
|
"kl": 0.0, |
|
"learning_rate": 8.819345661450925e-08, |
|
"logps/chosen": -260.79534912109375, |
|
"logps/rejected": -404.79302978515625, |
|
"loss": 0.1608, |
|
"rewards/chosen": 2.0041592121124268, |
|
"rewards/margins": 10.949081420898438, |
|
"rewards/rejected": -8.944923400878906, |
|
"step": 1315 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 22.477380752563477, |
|
"kl": 0.0, |
|
"learning_rate": 8.64153627311522e-08, |
|
"logps/chosen": -287.4140625, |
|
"logps/rejected": -402.8482360839844, |
|
"loss": 0.1478, |
|
"rewards/chosen": 2.44392728805542, |
|
"rewards/margins": 11.139135360717773, |
|
"rewards/rejected": -8.695208549499512, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 24.919546127319336, |
|
"kl": 0.0, |
|
"learning_rate": 8.463726884779517e-08, |
|
"logps/chosen": -331.8410949707031, |
|
"logps/rejected": -441.4012756347656, |
|
"loss": 0.1521, |
|
"rewards/chosen": 2.6449999809265137, |
|
"rewards/margins": 11.747162818908691, |
|
"rewards/rejected": -9.102163314819336, |
|
"step": 1325 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 24.160480499267578, |
|
"kl": 0.0, |
|
"learning_rate": 8.285917496443812e-08, |
|
"logps/chosen": -315.6636657714844, |
|
"logps/rejected": -459.8597717285156, |
|
"loss": 0.1475, |
|
"rewards/chosen": 2.497434139251709, |
|
"rewards/margins": 11.85330867767334, |
|
"rewards/rejected": -9.355875015258789, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 28.45322608947754, |
|
"kl": 0.0, |
|
"learning_rate": 8.108108108108108e-08, |
|
"logps/chosen": -248.135498046875, |
|
"logps/rejected": -418.92388916015625, |
|
"loss": 0.1706, |
|
"rewards/chosen": 2.0972299575805664, |
|
"rewards/margins": 10.873918533325195, |
|
"rewards/rejected": -8.776689529418945, |
|
"step": 1335 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 24.973793029785156, |
|
"kl": 0.0, |
|
"learning_rate": 7.930298719772404e-08, |
|
"logps/chosen": -274.0903015136719, |
|
"logps/rejected": -436.2459411621094, |
|
"loss": 0.1563, |
|
"rewards/chosen": 2.0231974124908447, |
|
"rewards/margins": 10.445603370666504, |
|
"rewards/rejected": -8.422405242919922, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 26.009220123291016, |
|
"kl": 0.0, |
|
"learning_rate": 7.7524893314367e-08, |
|
"logps/chosen": -303.43560791015625, |
|
"logps/rejected": -419.0430603027344, |
|
"loss": 0.1562, |
|
"rewards/chosen": 2.5205211639404297, |
|
"rewards/margins": 11.333113670349121, |
|
"rewards/rejected": -8.812593460083008, |
|
"step": 1345 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 31.752500534057617, |
|
"kl": 0.0, |
|
"learning_rate": 7.574679943100994e-08, |
|
"logps/chosen": -257.14483642578125, |
|
"logps/rejected": -424.982666015625, |
|
"loss": 0.1654, |
|
"rewards/chosen": 2.0342319011688232, |
|
"rewards/margins": 10.941479682922363, |
|
"rewards/rejected": -8.907247543334961, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 23.12604522705078, |
|
"kl": 0.0, |
|
"learning_rate": 7.396870554765292e-08, |
|
"logps/chosen": -269.96258544921875, |
|
"logps/rejected": -391.7086181640625, |
|
"loss": 0.1525, |
|
"rewards/chosen": 2.2532811164855957, |
|
"rewards/margins": 11.61485481262207, |
|
"rewards/rejected": -9.361574172973633, |
|
"step": 1355 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 23.052330017089844, |
|
"kl": 0.0, |
|
"learning_rate": 7.219061166429587e-08, |
|
"logps/chosen": -262.48516845703125, |
|
"logps/rejected": -439.3185119628906, |
|
"loss": 0.1466, |
|
"rewards/chosen": 2.3807787895202637, |
|
"rewards/margins": 11.63065242767334, |
|
"rewards/rejected": -9.249873161315918, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 24.81175422668457, |
|
"kl": 0.0, |
|
"learning_rate": 7.041251778093883e-08, |
|
"logps/chosen": -269.81109619140625, |
|
"logps/rejected": -454.2550354003906, |
|
"loss": 0.1531, |
|
"rewards/chosen": 2.286533832550049, |
|
"rewards/margins": 12.61316967010498, |
|
"rewards/rejected": -10.326637268066406, |
|
"step": 1365 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 28.95956802368164, |
|
"kl": 0.0, |
|
"learning_rate": 6.863442389758179e-08, |
|
"logps/chosen": -281.4117126464844, |
|
"logps/rejected": -444.41888427734375, |
|
"loss": 0.1483, |
|
"rewards/chosen": 2.1853625774383545, |
|
"rewards/margins": 11.702371597290039, |
|
"rewards/rejected": -9.517007827758789, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 21.27782440185547, |
|
"kl": 0.0, |
|
"learning_rate": 6.685633001422475e-08, |
|
"logps/chosen": -243.04190063476562, |
|
"logps/rejected": -399.4400634765625, |
|
"loss": 0.1326, |
|
"rewards/chosen": 2.5309996604919434, |
|
"rewards/margins": 11.317906379699707, |
|
"rewards/rejected": -8.786906242370605, |
|
"step": 1375 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 27.220003128051758, |
|
"kl": 0.0, |
|
"learning_rate": 6.507823613086771e-08, |
|
"logps/chosen": -305.0284118652344, |
|
"logps/rejected": -460.77850341796875, |
|
"loss": 0.1451, |
|
"rewards/chosen": 2.280113935470581, |
|
"rewards/margins": 12.219154357910156, |
|
"rewards/rejected": -9.93903923034668, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 26.04877471923828, |
|
"kl": 0.0, |
|
"learning_rate": 6.330014224751067e-08, |
|
"logps/chosen": -299.484619140625, |
|
"logps/rejected": -444.88055419921875, |
|
"loss": 0.1383, |
|
"rewards/chosen": 2.363182544708252, |
|
"rewards/margins": 12.05009937286377, |
|
"rewards/rejected": -9.686917304992676, |
|
"step": 1385 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 15.905016899108887, |
|
"kl": 0.0, |
|
"learning_rate": 6.152204836415363e-08, |
|
"logps/chosen": -253.1654815673828, |
|
"logps/rejected": -438.5511169433594, |
|
"loss": 0.1442, |
|
"rewards/chosen": 2.5684008598327637, |
|
"rewards/margins": 11.512083053588867, |
|
"rewards/rejected": -8.943681716918945, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 18.374181747436523, |
|
"kl": 0.0, |
|
"learning_rate": 5.974395448079659e-08, |
|
"logps/chosen": -240.8791046142578, |
|
"logps/rejected": -411.80487060546875, |
|
"loss": 0.1513, |
|
"rewards/chosen": 2.337771415710449, |
|
"rewards/margins": 11.283363342285156, |
|
"rewards/rejected": -8.945591926574707, |
|
"step": 1395 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 30.287019729614258, |
|
"kl": 0.0, |
|
"learning_rate": 5.796586059743954e-08, |
|
"logps/chosen": -293.8108825683594, |
|
"logps/rejected": -451.5350646972656, |
|
"loss": 0.1654, |
|
"rewards/chosen": 1.9501450061798096, |
|
"rewards/margins": 11.551190376281738, |
|
"rewards/rejected": -9.601045608520508, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 24.658693313598633, |
|
"kl": 0.0, |
|
"learning_rate": 5.61877667140825e-08, |
|
"logps/chosen": -258.39886474609375, |
|
"logps/rejected": -444.2666015625, |
|
"loss": 0.1634, |
|
"rewards/chosen": 2.189380407333374, |
|
"rewards/margins": 11.383289337158203, |
|
"rewards/rejected": -9.19390869140625, |
|
"step": 1405 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 24.983081817626953, |
|
"kl": 0.0, |
|
"learning_rate": 5.4409672830725456e-08, |
|
"logps/chosen": -292.2581787109375, |
|
"logps/rejected": -433.89453125, |
|
"loss": 0.1553, |
|
"rewards/chosen": 2.1917474269866943, |
|
"rewards/margins": 11.13335132598877, |
|
"rewards/rejected": -8.941603660583496, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 30.233684539794922, |
|
"kl": 0.0, |
|
"learning_rate": 5.2631578947368416e-08, |
|
"logps/chosen": -298.6884765625, |
|
"logps/rejected": -440.4461975097656, |
|
"loss": 0.1469, |
|
"rewards/chosen": 2.396724224090576, |
|
"rewards/margins": 12.125951766967773, |
|
"rewards/rejected": -9.729228019714355, |
|
"step": 1415 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 27.67980194091797, |
|
"kl": 0.0, |
|
"learning_rate": 5.0853485064011376e-08, |
|
"logps/chosen": -293.10589599609375, |
|
"logps/rejected": -429.2474060058594, |
|
"loss": 0.1648, |
|
"rewards/chosen": 2.3396992683410645, |
|
"rewards/margins": 11.225849151611328, |
|
"rewards/rejected": -8.886149406433105, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 29.193660736083984, |
|
"kl": 0.0, |
|
"learning_rate": 4.9075391180654337e-08, |
|
"logps/chosen": -270.0514221191406, |
|
"logps/rejected": -460.9169921875, |
|
"loss": 0.177, |
|
"rewards/chosen": 1.9409351348876953, |
|
"rewards/margins": 11.288286209106445, |
|
"rewards/rejected": -9.34735107421875, |
|
"step": 1425 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 25.77621078491211, |
|
"kl": 0.0, |
|
"learning_rate": 4.72972972972973e-08, |
|
"logps/chosen": -262.68060302734375, |
|
"logps/rejected": -414.3082580566406, |
|
"loss": 0.1517, |
|
"rewards/chosen": 2.055493116378784, |
|
"rewards/margins": 10.508028030395508, |
|
"rewards/rejected": -8.452535629272461, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 39.052642822265625, |
|
"kl": 0.0, |
|
"learning_rate": 4.551920341394026e-08, |
|
"logps/chosen": -323.07977294921875, |
|
"logps/rejected": -415.31268310546875, |
|
"loss": 0.1704, |
|
"rewards/chosen": 2.5183842182159424, |
|
"rewards/margins": 11.62377643585205, |
|
"rewards/rejected": -9.105392456054688, |
|
"step": 1435 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 27.706132888793945, |
|
"kl": 0.0, |
|
"learning_rate": 4.374110953058322e-08, |
|
"logps/chosen": -279.017822265625, |
|
"logps/rejected": -474.2362365722656, |
|
"loss": 0.1525, |
|
"rewards/chosen": 2.0204992294311523, |
|
"rewards/margins": 11.512517929077148, |
|
"rewards/rejected": -9.49201774597168, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 24.838924407958984, |
|
"kl": 0.0, |
|
"learning_rate": 4.196301564722617e-08, |
|
"logps/chosen": -285.550048828125, |
|
"logps/rejected": -462.3155822753906, |
|
"loss": 0.1689, |
|
"rewards/chosen": 2.1153082847595215, |
|
"rewards/margins": 11.936882019042969, |
|
"rewards/rejected": -9.821573257446289, |
|
"step": 1445 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 34.149696350097656, |
|
"kl": 0.0, |
|
"learning_rate": 4.018492176386913e-08, |
|
"logps/chosen": -261.6683349609375, |
|
"logps/rejected": -418.8206481933594, |
|
"loss": 0.1861, |
|
"rewards/chosen": 1.9666168689727783, |
|
"rewards/margins": 10.487935066223145, |
|
"rewards/rejected": -8.521318435668945, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 23.415525436401367, |
|
"kl": 0.0, |
|
"learning_rate": 3.840682788051209e-08, |
|
"logps/chosen": -239.5380096435547, |
|
"logps/rejected": -418.8392639160156, |
|
"loss": 0.1519, |
|
"rewards/chosen": 2.0457942485809326, |
|
"rewards/margins": 11.141304969787598, |
|
"rewards/rejected": -9.095510482788086, |
|
"step": 1455 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 18.416677474975586, |
|
"kl": 0.0, |
|
"learning_rate": 3.6628733997155046e-08, |
|
"logps/chosen": -293.07208251953125, |
|
"logps/rejected": -434.031494140625, |
|
"loss": 0.1416, |
|
"rewards/chosen": 2.377678632736206, |
|
"rewards/margins": 11.937261581420898, |
|
"rewards/rejected": -9.55958366394043, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 24.344444274902344, |
|
"kl": 0.0, |
|
"learning_rate": 3.4850640113798006e-08, |
|
"logps/chosen": -297.32183837890625, |
|
"logps/rejected": -437.09100341796875, |
|
"loss": 0.1229, |
|
"rewards/chosen": 2.8577122688293457, |
|
"rewards/margins": 12.127481460571289, |
|
"rewards/rejected": -9.269769668579102, |
|
"step": 1465 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 30.76423454284668, |
|
"kl": 0.0, |
|
"learning_rate": 3.3072546230440967e-08, |
|
"logps/chosen": -279.06219482421875, |
|
"logps/rejected": -445.42156982421875, |
|
"loss": 0.1722, |
|
"rewards/chosen": 2.1692028045654297, |
|
"rewards/margins": 11.08595085144043, |
|
"rewards/rejected": -8.916748046875, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 22.21062660217285, |
|
"kl": 0.0, |
|
"learning_rate": 3.129445234708392e-08, |
|
"logps/chosen": -255.56723022460938, |
|
"logps/rejected": -437.4034729003906, |
|
"loss": 0.1455, |
|
"rewards/chosen": 2.3138813972473145, |
|
"rewards/margins": 11.781050682067871, |
|
"rewards/rejected": -9.467168807983398, |
|
"step": 1475 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 22.927778244018555, |
|
"kl": 0.0, |
|
"learning_rate": 2.9516358463726884e-08, |
|
"logps/chosen": -323.7676696777344, |
|
"logps/rejected": -436.6930236816406, |
|
"loss": 0.1404, |
|
"rewards/chosen": 2.3460052013397217, |
|
"rewards/margins": 11.245654106140137, |
|
"rewards/rejected": -8.899648666381836, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 31.1018123626709, |
|
"kl": 0.0, |
|
"learning_rate": 2.7738264580369844e-08, |
|
"logps/chosen": -326.9073486328125, |
|
"logps/rejected": -446.97479248046875, |
|
"loss": 0.1572, |
|
"rewards/chosen": 2.2194085121154785, |
|
"rewards/margins": 12.011299133300781, |
|
"rewards/rejected": -9.791891098022461, |
|
"step": 1485 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 27.92519760131836, |
|
"kl": 0.0, |
|
"learning_rate": 2.59601706970128e-08, |
|
"logps/chosen": -265.1518859863281, |
|
"logps/rejected": -455.7247009277344, |
|
"loss": 0.1584, |
|
"rewards/chosen": 2.3967678546905518, |
|
"rewards/margins": 11.583536148071289, |
|
"rewards/rejected": -9.186769485473633, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 21.29725456237793, |
|
"kl": 0.0, |
|
"learning_rate": 2.418207681365576e-08, |
|
"logps/chosen": -253.9468231201172, |
|
"logps/rejected": -426.5579528808594, |
|
"loss": 0.1479, |
|
"rewards/chosen": 2.272754192352295, |
|
"rewards/margins": 11.449048042297363, |
|
"rewards/rejected": -9.176294326782227, |
|
"step": 1495 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 26.227853775024414, |
|
"kl": 0.0, |
|
"learning_rate": 2.240398293029872e-08, |
|
"logps/chosen": -270.14581298828125, |
|
"logps/rejected": -418.4717712402344, |
|
"loss": 0.1802, |
|
"rewards/chosen": 1.7642427682876587, |
|
"rewards/margins": 10.709261894226074, |
|
"rewards/rejected": -8.94502067565918, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 26.655635833740234, |
|
"kl": 0.0, |
|
"learning_rate": 2.0625889046941676e-08, |
|
"logps/chosen": -309.02117919921875, |
|
"logps/rejected": -432.2864685058594, |
|
"loss": 0.1593, |
|
"rewards/chosen": 2.1008925437927246, |
|
"rewards/margins": 11.279916763305664, |
|
"rewards/rejected": -9.179023742675781, |
|
"step": 1505 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 25.60175323486328, |
|
"kl": 0.0, |
|
"learning_rate": 1.8847795163584636e-08, |
|
"logps/chosen": -268.1394958496094, |
|
"logps/rejected": -452.39202880859375, |
|
"loss": 0.1485, |
|
"rewards/chosen": 2.5142464637756348, |
|
"rewards/margins": 12.03423023223877, |
|
"rewards/rejected": -9.519984245300293, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 25.590343475341797, |
|
"kl": 0.0, |
|
"learning_rate": 1.7069701280227596e-08, |
|
"logps/chosen": -288.04571533203125, |
|
"logps/rejected": -439.8759765625, |
|
"loss": 0.162, |
|
"rewards/chosen": 2.3935296535491943, |
|
"rewards/margins": 11.7361421585083, |
|
"rewards/rejected": -9.342611312866211, |
|
"step": 1515 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 19.91452407836914, |
|
"kl": 0.0, |
|
"learning_rate": 1.5291607396870554e-08, |
|
"logps/chosen": -288.63836669921875, |
|
"logps/rejected": -410.79815673828125, |
|
"loss": 0.1494, |
|
"rewards/chosen": 2.5123307704925537, |
|
"rewards/margins": 11.557938575744629, |
|
"rewards/rejected": -9.04560661315918, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 22.47270965576172, |
|
"kl": 0.0, |
|
"learning_rate": 1.3513513513513514e-08, |
|
"logps/chosen": -252.8772430419922, |
|
"logps/rejected": -407.09222412109375, |
|
"loss": 0.1353, |
|
"rewards/chosen": 2.3054208755493164, |
|
"rewards/margins": 11.360735893249512, |
|
"rewards/rejected": -9.055315017700195, |
|
"step": 1525 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 24.753129959106445, |
|
"kl": 0.0, |
|
"learning_rate": 1.1735419630156473e-08, |
|
"logps/chosen": -282.2532958984375, |
|
"logps/rejected": -405.8521423339844, |
|
"loss": 0.1346, |
|
"rewards/chosen": 2.3494057655334473, |
|
"rewards/margins": 11.439948081970215, |
|
"rewards/rejected": -9.090542793273926, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 25.65488052368164, |
|
"kl": 0.0, |
|
"learning_rate": 9.95732574679943e-09, |
|
"logps/chosen": -289.83135986328125, |
|
"logps/rejected": -440.650634765625, |
|
"loss": 0.1592, |
|
"rewards/chosen": 2.2116000652313232, |
|
"rewards/margins": 11.503582954406738, |
|
"rewards/rejected": -9.291983604431152, |
|
"step": 1535 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 28.50649070739746, |
|
"kl": 0.0, |
|
"learning_rate": 8.179231863442388e-09, |
|
"logps/chosen": -247.19186401367188, |
|
"logps/rejected": -430.9366149902344, |
|
"loss": 0.1613, |
|
"rewards/chosen": 2.0414962768554688, |
|
"rewards/margins": 11.605134963989258, |
|
"rewards/rejected": -9.563637733459473, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 22.502891540527344, |
|
"kl": 0.0, |
|
"learning_rate": 6.401137980085348e-09, |
|
"logps/chosen": -280.7741394042969, |
|
"logps/rejected": -432.50140380859375, |
|
"loss": 0.1613, |
|
"rewards/chosen": 2.1499953269958496, |
|
"rewards/margins": 10.987812995910645, |
|
"rewards/rejected": -8.837817192077637, |
|
"step": 1545 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 38.24708938598633, |
|
"kl": 0.0, |
|
"learning_rate": 4.623044096728307e-09, |
|
"logps/chosen": -268.7917785644531, |
|
"logps/rejected": -439.6647033691406, |
|
"loss": 0.1653, |
|
"rewards/chosen": 2.114647388458252, |
|
"rewards/margins": 11.40100383758545, |
|
"rewards/rejected": -9.286355972290039, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 28.025665283203125, |
|
"kl": 0.0, |
|
"learning_rate": 2.844950213371266e-09, |
|
"logps/chosen": -299.0115051269531, |
|
"logps/rejected": -446.5692443847656, |
|
"loss": 0.1523, |
|
"rewards/chosen": 2.446258783340454, |
|
"rewards/margins": 11.819780349731445, |
|
"rewards/rejected": -9.373520851135254, |
|
"step": 1555 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 31.7341251373291, |
|
"kl": 0.0, |
|
"learning_rate": 1.0668563300142248e-09, |
|
"logps/chosen": -234.8123779296875, |
|
"logps/rejected": -408.6230773925781, |
|
"loss": 0.1703, |
|
"rewards/chosen": 2.086718797683716, |
|
"rewards/margins": 11.345036506652832, |
|
"rewards/rejected": -9.258317947387695, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 1563, |
|
"total_flos": 0.0, |
|
"train_loss": 0.2182784026735346, |
|
"train_runtime": 11066.0236, |
|
"train_samples_per_second": 9.037, |
|
"train_steps_per_second": 0.141 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 1563, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|