|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 192, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 16.651927947998047, |
|
"learning_rate": 2e-05, |
|
"logits/chosen": -0.780666708946228, |
|
"logits/rejected": -1.1369192600250244, |
|
"logps/chosen": -3061.823486328125, |
|
"logps/rejected": -220.772705078125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 13.908960342407227, |
|
"learning_rate": 4e-05, |
|
"logits/chosen": -0.8081845641136169, |
|
"logits/rejected": -1.2085561752319336, |
|
"logps/chosen": -4412.2578125, |
|
"logps/rejected": -267.08087158203125, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 15.761119842529297, |
|
"learning_rate": 6e-05, |
|
"logits/chosen": -0.9457322359085083, |
|
"logits/rejected": -1.1844913959503174, |
|
"logps/chosen": -3298.71435546875, |
|
"logps/rejected": -223.52560424804688, |
|
"loss": 0.6763, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -0.01404876634478569, |
|
"rewards/margins": -0.040423206984996796, |
|
"rewards/rejected": 0.026374435052275658, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 9.980835914611816, |
|
"learning_rate": 8e-05, |
|
"logits/chosen": -0.8384261131286621, |
|
"logits/rejected": -1.271468162536621, |
|
"logps/chosen": -3999.277587890625, |
|
"logps/rejected": -225.41534423828125, |
|
"loss": 0.4997, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.3802902400493622, |
|
"rewards/margins": 0.5509151816368103, |
|
"rewards/rejected": -0.17062492668628693, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 6.138638973236084, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": -0.8365007638931274, |
|
"logits/rejected": -1.362606406211853, |
|
"logps/chosen": -3391.94677734375, |
|
"logps/rejected": -245.4417724609375, |
|
"loss": 0.2954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9162139892578125, |
|
"rewards/margins": 1.1493107080459595, |
|
"rewards/rejected": -0.23309671878814697, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 1.2922340631484985, |
|
"learning_rate": 0.00012, |
|
"logits/chosen": -0.8477813005447388, |
|
"logits/rejected": -1.4737606048583984, |
|
"logps/chosen": -7458.923828125, |
|
"logps/rejected": -282.75335693359375, |
|
"loss": 0.0518, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.14288330078125, |
|
"rewards/margins": 5.705089569091797, |
|
"rewards/rejected": -2.562206268310547, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"grad_norm": 0.03415762260556221, |
|
"learning_rate": 0.00014, |
|
"logits/chosen": -0.7428842186927795, |
|
"logits/rejected": -1.3752648830413818, |
|
"logps/chosen": -6296.4091796875, |
|
"logps/rejected": -276.24859619140625, |
|
"loss": 0.001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 6.91751766204834, |
|
"rewards/margins": 11.728842735290527, |
|
"rewards/rejected": -4.8113250732421875, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 0.0005445057176984847, |
|
"learning_rate": 0.00016, |
|
"logits/chosen": -0.8007906079292297, |
|
"logits/rejected": -1.3979616165161133, |
|
"logps/chosen": -6775.5029296875, |
|
"logps/rejected": -370.7599182128906, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.398236274719238, |
|
"rewards/margins": 20.943986892700195, |
|
"rewards/rejected": -9.545750617980957, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 0.005390194710344076, |
|
"learning_rate": 0.00018, |
|
"logits/chosen": -0.8591928482055664, |
|
"logits/rejected": -0.895159125328064, |
|
"logps/chosen": -2164.9541015625, |
|
"logps/rejected": -472.39715576171875, |
|
"loss": 0.0001, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.005324363708496, |
|
"rewards/margins": 25.770355224609375, |
|
"rewards/rejected": -20.765029907226562, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 3.3636655416557915e-07, |
|
"learning_rate": 0.0002, |
|
"logits/chosen": -0.8266401886940002, |
|
"logits/rejected": -0.8240926265716553, |
|
"logps/chosen": -2985.67236328125, |
|
"logps/rejected": -462.8504638671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.8522109985351562, |
|
"rewards/margins": 27.728797912597656, |
|
"rewards/rejected": -23.876588821411133, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"grad_norm": 5.287571491763288e-10, |
|
"learning_rate": 0.00019998510240408496, |
|
"logits/chosen": -0.9868767261505127, |
|
"logits/rejected": -0.8054253458976746, |
|
"logps/chosen": -1482.732177734375, |
|
"logps/rejected": -553.533935546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7153975963592529, |
|
"rewards/margins": 32.48991394042969, |
|
"rewards/rejected": -33.20531463623047, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 26.858783721923828, |
|
"learning_rate": 0.00019994041405510705, |
|
"logits/chosen": -1.03706955909729, |
|
"logits/rejected": -0.9741817712783813, |
|
"logps/chosen": -4695.4384765625, |
|
"logps/rejected": -721.7908935546875, |
|
"loss": 0.0838, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.903982162475586, |
|
"rewards/margins": 36.569175720214844, |
|
"rewards/rejected": -46.47315979003906, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"grad_norm": 9.055187155659894e-10, |
|
"learning_rate": 0.0001998659482680456, |
|
"logits/chosen": -0.962079644203186, |
|
"logits/rejected": -1.3349699974060059, |
|
"logps/chosen": -9016.650390625, |
|
"logps/rejected": -793.662109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.13396000862121582, |
|
"rewards/margins": 55.1308708190918, |
|
"rewards/rejected": -54.99691390991211, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 4.094690808642554e-09, |
|
"learning_rate": 0.0001997617272301248, |
|
"logits/chosen": -0.7595808506011963, |
|
"logits/rejected": -1.0764431953430176, |
|
"logps/chosen": -2220.482421875, |
|
"logps/rejected": -798.6175537109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7651824951171875, |
|
"rewards/margins": 52.960113525390625, |
|
"rewards/rejected": -55.72529602050781, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 9.434321379675481e-12, |
|
"learning_rate": 0.00019962778199420265, |
|
"logits/chosen": -0.8709142804145813, |
|
"logits/rejected": -1.1885780096054077, |
|
"logps/chosen": -4517.1044921875, |
|
"logps/rejected": -827.2481079101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.643299102783203, |
|
"rewards/margins": 46.70207977294922, |
|
"rewards/rejected": -59.34538269042969, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 2.213202831359747e-20, |
|
"learning_rate": 0.0001994641524695193, |
|
"logits/chosen": -0.8794220685958862, |
|
"logits/rejected": -0.8576048612594604, |
|
"logps/chosen": -2103.159912109375, |
|
"logps/rejected": -1043.563720703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4151291847229004, |
|
"rewards/margins": 73.81220245361328, |
|
"rewards/rejected": -76.22732543945312, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"grad_norm": 3.0938494821608487e-11, |
|
"learning_rate": 0.0001992708874098054, |
|
"logits/chosen": -0.8563255071640015, |
|
"logits/rejected": -1.1423197984695435, |
|
"logps/chosen": -5297.2294921875, |
|
"logps/rejected": -875.4127197265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.190893173217773, |
|
"rewards/margins": 58.889068603515625, |
|
"rewards/rejected": -64.0799560546875, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 1.6154518123142973e-19, |
|
"learning_rate": 0.00019904804439875633, |
|
"logits/chosen": -0.7584964632987976, |
|
"logits/rejected": -1.1960705518722534, |
|
"logps/chosen": -4858.4482421875, |
|
"logps/rejected": -979.1630249023438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.413223743438721, |
|
"rewards/margins": 65.54483795166016, |
|
"rewards/rejected": -72.95806121826172, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"grad_norm": 1.5190748665707273e-20, |
|
"learning_rate": 0.00019879568983287467, |
|
"logits/chosen": -0.9190940856933594, |
|
"logits/rejected": -1.1393502950668335, |
|
"logps/chosen": -2892.24267578125, |
|
"logps/rejected": -967.30615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.6857757568359375, |
|
"rewards/margins": 66.6280288696289, |
|
"rewards/rejected": -72.31380462646484, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 5.615921756608865e-20, |
|
"learning_rate": 0.0001985138989016874, |
|
"logits/chosen": -0.8962702751159668, |
|
"logits/rejected": -1.3306204080581665, |
|
"logps/chosen": -6762.5302734375, |
|
"logps/rejected": -995.3267211914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.49383544921875, |
|
"rewards/margins": 71.0030746459961, |
|
"rewards/rejected": -75.49691009521484, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 1.7029158998823055e-17, |
|
"learning_rate": 0.00019820275556534304, |
|
"logits/chosen": -0.8105654716491699, |
|
"logits/rejected": -1.3740750551223755, |
|
"logps/chosen": -9757.419921875, |
|
"logps/rejected": -954.4122314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.01361083984375, |
|
"rewards/margins": 78.48196411132812, |
|
"rewards/rejected": -71.46835327148438, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019786235252959553, |
|
"logits/chosen": -0.8967666625976562, |
|
"logits/rejected": -1.1008408069610596, |
|
"logps/chosen": -2355.2626953125, |
|
"logps/rejected": -1078.12060546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.126736640930176, |
|
"rewards/margins": 69.51663970947266, |
|
"rewards/rejected": -82.64337921142578, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00019749279121818235, |
|
"logits/chosen": -0.8905834555625916, |
|
"logits/rejected": -1.0848970413208008, |
|
"logps/chosen": -2228.7802734375, |
|
"logps/rejected": -996.7811279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.612191677093506, |
|
"rewards/margins": 73.11354064941406, |
|
"rewards/rejected": -76.7257308959961, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 7.647393330508138e-19, |
|
"learning_rate": 0.0001970941817426052, |
|
"logits/chosen": -0.8828914165496826, |
|
"logits/rejected": -1.166571855545044, |
|
"logps/chosen": -3267.3076171875, |
|
"logps/rejected": -1007.6031494140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.523654460906982, |
|
"rewards/margins": 67.767822265625, |
|
"rewards/rejected": -75.29147338867188, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"grad_norm": 1.2719628243941199e-17, |
|
"learning_rate": 0.00019666664286932198, |
|
"logits/chosen": -0.9088311791419983, |
|
"logits/rejected": -1.0525307655334473, |
|
"logps/chosen": -1816.2103271484375, |
|
"logps/rejected": -972.173583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.783884048461914, |
|
"rewards/margins": 61.763031005859375, |
|
"rewards/rejected": -74.54691314697266, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 5.43750881430877e-17, |
|
"learning_rate": 0.00019621030198436006, |
|
"logits/chosen": -1.0071444511413574, |
|
"logits/rejected": -0.8765456676483154, |
|
"logps/chosen": -1344.3419189453125, |
|
"logps/rejected": -992.904052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.93281364440918, |
|
"rewards/margins": 58.60974884033203, |
|
"rewards/rejected": -74.54256439208984, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 1.87694066018651e-12, |
|
"learning_rate": 0.0001957252950553616, |
|
"logits/chosen": -0.9608248472213745, |
|
"logits/rejected": -1.1069602966308594, |
|
"logps/chosen": -2613.587158203125, |
|
"logps/rejected": -1058.5823974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.87086296081543, |
|
"rewards/margins": 73.99343872070312, |
|
"rewards/rejected": -79.86430358886719, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 4.830028054873832e-19, |
|
"learning_rate": 0.00019521176659107142, |
|
"logits/chosen": -0.9120213985443115, |
|
"logits/rejected": -1.3496168851852417, |
|
"logps/chosen": -7237.09912109375, |
|
"logps/rejected": -1196.45458984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.345461368560791, |
|
"rewards/margins": 95.51947784423828, |
|
"rewards/rejected": -93.17402648925781, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"grad_norm": 4.793881215301673e-22, |
|
"learning_rate": 0.0001946698695982806, |
|
"logits/chosen": -0.8377887010574341, |
|
"logits/rejected": -1.4051532745361328, |
|
"logps/chosen": -4109.2421875, |
|
"logps/rejected": -1000.89111328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.656280517578125, |
|
"rewards/margins": 71.58284759521484, |
|
"rewards/rejected": -77.23912811279297, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 6.545510560170225e-20, |
|
"learning_rate": 0.00019409976553623766, |
|
"logits/chosen": -1.0247584581375122, |
|
"logits/rejected": -1.1513103246688843, |
|
"logps/chosen": -3428.35400390625, |
|
"logps/rejected": -1014.9542236328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2859251499176025, |
|
"rewards/margins": 76.68400573730469, |
|
"rewards/rejected": -77.96993255615234, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"grad_norm": 4.597397983254012e-19, |
|
"learning_rate": 0.0001935016242685415, |
|
"logits/chosen": -0.8390676975250244, |
|
"logits/rejected": -1.2956117391586304, |
|
"logps/chosen": -9027.2763671875, |
|
"logps/rejected": -916.6038818359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 7.76643705368042, |
|
"rewards/margins": 78.03052520751953, |
|
"rewards/rejected": -70.26408386230469, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 1.3613232990958984e-13, |
|
"learning_rate": 0.00019287562401253022, |
|
"logits/chosen": -0.9091357588768005, |
|
"logits/rejected": -1.3022887706756592, |
|
"logps/chosen": -4318.7275390625, |
|
"logps/rejected": -1064.7750244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.792261123657227, |
|
"rewards/margins": 69.23944854736328, |
|
"rewards/rejected": -82.03170776367188, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 1.1914185441577697e-15, |
|
"learning_rate": 0.00019222195128618106, |
|
"logits/chosen": -0.8301032781600952, |
|
"logits/rejected": -1.1454460620880127, |
|
"logps/chosen": -3503.22802734375, |
|
"logps/rejected": -1033.736083984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 5.304028511047363, |
|
"rewards/margins": 86.28971099853516, |
|
"rewards/rejected": -80.98567962646484, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 1.2077592159173465e-17, |
|
"learning_rate": 0.00019154080085253666, |
|
"logits/chosen": -0.9025095105171204, |
|
"logits/rejected": -1.0029141902923584, |
|
"logps/chosen": -2160.233642578125, |
|
"logps/rejected": -988.8320922851562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.006901741027832, |
|
"rewards/margins": 67.99139404296875, |
|
"rewards/rejected": -74.99829864501953, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.3645833333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001908323756616754, |
|
"logits/chosen": -0.9620730876922607, |
|
"logits/rejected": -1.303593635559082, |
|
"logps/chosen": -4102.265625, |
|
"logps/rejected": -1039.8701171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7988373041152954, |
|
"rewards/margins": 78.1258544921875, |
|
"rewards/rejected": -79.92469024658203, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 2.327858659183149e-19, |
|
"learning_rate": 0.0001900968867902419, |
|
"logits/chosen": -0.9782366156578064, |
|
"logits/rejected": -1.1345422267913818, |
|
"logps/chosen": -5296.07373046875, |
|
"logps/rejected": -1034.09130859375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9609848260879517, |
|
"rewards/margins": 76.84639739990234, |
|
"rewards/rejected": -75.88541412353516, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.3854166666666667, |
|
"grad_norm": 5.993950300697435e-16, |
|
"learning_rate": 0.00018933455337855632, |
|
"logits/chosen": -0.9719871282577515, |
|
"logits/rejected": -1.2671374082565308, |
|
"logps/chosen": -3619.00830078125, |
|
"logps/rejected": -960.418212890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8363280296325684, |
|
"rewards/margins": 73.91217041015625, |
|
"rewards/rejected": -73.07584381103516, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.000188545602565321, |
|
"logits/chosen": -0.8795933127403259, |
|
"logits/rejected": -1.1734505891799927, |
|
"logps/chosen": -2961.7783203125, |
|
"logps/rejected": -1115.31494140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.957999229431152, |
|
"rewards/margins": 72.54431915283203, |
|
"rewards/rejected": -87.5023193359375, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 9.981808812592569e-17, |
|
"learning_rate": 0.0001877302694199442, |
|
"logits/chosen": -0.9492607116699219, |
|
"logits/rejected": -1.0690467357635498, |
|
"logps/chosen": -3375.642578125, |
|
"logps/rejected": -970.4015502929688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.076546669006348, |
|
"rewards/margins": 63.78575897216797, |
|
"rewards/rejected": -74.8623046875, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 1.677683582476988e-19, |
|
"learning_rate": 0.00018688879687250067, |
|
"logits/chosen": -0.7364388108253479, |
|
"logits/rejected": -1.2299830913543701, |
|
"logps/chosen": -4095.498046875, |
|
"logps/rejected": -1125.764892578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.576887130737305, |
|
"rewards/margins": 71.2161865234375, |
|
"rewards/rejected": -86.79308319091797, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.4270833333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001860214356413501, |
|
"logits/chosen": -0.8531750440597534, |
|
"logits/rejected": -1.2205849885940552, |
|
"logps/chosen": -4229.4951171875, |
|
"logps/rejected": -1032.5693359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7727296352386475, |
|
"rewards/margins": 79.98316192626953, |
|
"rewards/rejected": -78.2104263305664, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 7.346881715894921e-19, |
|
"learning_rate": 0.00018512844415843514, |
|
"logits/chosen": -0.8872064352035522, |
|
"logits/rejected": -1.347999095916748, |
|
"logps/chosen": -6776.28759765625, |
|
"logps/rejected": -1047.91064453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6724517345428467, |
|
"rewards/margins": 77.55692291259766, |
|
"rewards/rejected": -78.22937774658203, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.4479166666666667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018421008849228118, |
|
"logits/chosen": -0.9359738230705261, |
|
"logits/rejected": -1.132539987564087, |
|
"logps/chosen": -1939.6717529296875, |
|
"logps/rejected": -968.7603759765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.06414794921875, |
|
"rewards/margins": 64.12657928466797, |
|
"rewards/rejected": -74.19072723388672, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 1.1837645481569055e-22, |
|
"learning_rate": 0.00018326664226872065, |
|
"logits/chosen": -1.025484561920166, |
|
"logits/rejected": -1.1521519422531128, |
|
"logps/chosen": -3046.67138671875, |
|
"logps/rejected": -1010.868896484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.433685302734375, |
|
"rewards/margins": 79.09088134765625, |
|
"rewards/rejected": -76.65719604492188, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 1.0841111503063809e-20, |
|
"learning_rate": 0.00018229838658936564, |
|
"logits/chosen": -0.8892084956169128, |
|
"logits/rejected": -1.0169939994812012, |
|
"logps/chosen": -3305.859130859375, |
|
"logps/rejected": -1010.6842041015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.042553901672363, |
|
"rewards/margins": 71.8106689453125, |
|
"rewards/rejected": -77.85322570800781, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 4.2357202397516193e-19, |
|
"learning_rate": 0.00018130560994785325, |
|
"logits/chosen": -0.8572372198104858, |
|
"logits/rejected": -1.3382017612457275, |
|
"logps/chosen": -3960.66796875, |
|
"logps/rejected": -1066.4102783203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.808450698852539, |
|
"rewards/margins": 67.266357421875, |
|
"rewards/rejected": -82.07479858398438, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.4895833333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00018028860814388827, |
|
"logits/chosen": -0.9053932428359985, |
|
"logits/rejected": -1.245862603187561, |
|
"logps/chosen": -4537.33935546875, |
|
"logps/rejected": -1066.2734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.734985828399658, |
|
"rewards/margins": 74.92082977294922, |
|
"rewards/rejected": -81.65581512451172, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017924768419510904, |
|
"logits/chosen": -1.034196138381958, |
|
"logits/rejected": -1.2119903564453125, |
|
"logps/chosen": -4595.2041015625, |
|
"logps/rejected": -964.75146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.3183517456054688, |
|
"rewards/margins": 72.99729919433594, |
|
"rewards/rejected": -74.3156509399414, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.5104166666666666, |
|
"grad_norm": 1.567794059815996e-19, |
|
"learning_rate": 0.000178183148246803, |
|
"logits/chosen": -0.9009298086166382, |
|
"logits/rejected": -1.2393324375152588, |
|
"logps/chosen": -2687.751953125, |
|
"logps/rejected": -1079.8603515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.771054267883301, |
|
"rewards/margins": 76.5450210571289, |
|
"rewards/rejected": -82.31607818603516, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017709531747949796, |
|
"logits/chosen": -0.7311493158340454, |
|
"logits/rejected": -0.9473284482955933, |
|
"logps/chosen": -1437.1895751953125, |
|
"logps/rejected": -1016.4439086914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1630539894104004, |
|
"rewards/margins": 73.96327209472656, |
|
"rewards/rejected": -75.1263198852539, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 7.311199956683466e-17, |
|
"learning_rate": 0.0001759845160144579, |
|
"logits/chosen": -0.9461476802825928, |
|
"logits/rejected": -1.2513340711593628, |
|
"logps/chosen": -9740.1708984375, |
|
"logps/rejected": -998.367431640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.608297348022461, |
|
"rewards/margins": 67.94587707519531, |
|
"rewards/rejected": -76.5541763305664, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017485107481711012, |
|
"logits/chosen": -1.0065760612487793, |
|
"logits/rejected": -1.176657795906067, |
|
"logps/chosen": -3810.3505859375, |
|
"logps/rejected": -1045.2769775390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.958648681640625, |
|
"rewards/margins": 79.65708923339844, |
|
"rewards/rejected": -80.61573791503906, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.5520833333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017369533159843369, |
|
"logits/chosen": -0.7354742288589478, |
|
"logits/rejected": -1.1330161094665527, |
|
"logps/chosen": -4675.5478515625, |
|
"logps/rejected": -1095.03857421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.42770004272461, |
|
"rewards/margins": 73.42713928222656, |
|
"rewards/rejected": -84.8548355102539, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017251763071433765, |
|
"logits/chosen": -0.846391499042511, |
|
"logits/rejected": -1.4115591049194336, |
|
"logps/chosen": -6438.18359375, |
|
"logps/rejected": -1123.985107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.262456893920898, |
|
"rewards/margins": 74.60464477539062, |
|
"rewards/rejected": -84.86710357666016, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.5729166666666666, |
|
"grad_norm": 1.4700509215416664e-14, |
|
"learning_rate": 0.00017131832306305965, |
|
"logits/chosen": -0.960889458656311, |
|
"logits/rejected": -1.234442114830017, |
|
"logps/chosen": -3840.143310546875, |
|
"logps/rejected": -906.8041381835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.883606433868408, |
|
"rewards/margins": 67.3929443359375, |
|
"rewards/rejected": -71.27655029296875, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00017009776598061495, |
|
"logits/chosen": -0.9445286393165588, |
|
"logits/rejected": -0.9528659582138062, |
|
"logps/chosen": -1339.93603515625, |
|
"logps/rejected": -1048.4464111328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.9421162605285645, |
|
"rewards/margins": 72.82998657226562, |
|
"rewards/rejected": -78.77210998535156, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001688563231343277, |
|
"logits/chosen": -0.921560525894165, |
|
"logits/rejected": -1.464065432548523, |
|
"logps/chosen": -6107.14794921875, |
|
"logps/rejected": -1177.58154296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16182255744934082, |
|
"rewards/margins": 89.7105941772461, |
|
"rewards/rejected": -89.54876708984375, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 2.2782913188817383e-17, |
|
"learning_rate": 0.00016759436441447545, |
|
"logits/chosen": -0.9432457685470581, |
|
"logits/rejected": -0.9813876748085022, |
|
"logps/chosen": -2878.421875, |
|
"logps/rejected": -973.9136962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.945228576660156, |
|
"rewards/margins": 64.66465759277344, |
|
"rewards/rejected": -73.6098861694336, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.6145833333333334, |
|
"grad_norm": 3.532171644213204e-18, |
|
"learning_rate": 0.00016631226582407952, |
|
"logits/chosen": -1.011328101158142, |
|
"logits/rejected": -1.1443777084350586, |
|
"logps/chosen": -4537.185546875, |
|
"logps/rejected": -1023.2095336914062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.814834713935852, |
|
"rewards/margins": 78.44985961914062, |
|
"rewards/rejected": -77.63502502441406, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 1.0837672078321995e-16, |
|
"learning_rate": 0.00016501040936687443, |
|
"logits/chosen": -0.7958677411079407, |
|
"logits/rejected": -1.2076987028121948, |
|
"logps/chosen": -3676.333984375, |
|
"logps/rejected": -991.25146484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.135693550109863, |
|
"rewards/margins": 61.884666442871094, |
|
"rewards/rejected": -76.0203628540039, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.6354166666666666, |
|
"grad_norm": 5.757463345409198e-20, |
|
"learning_rate": 0.00016368918293348892, |
|
"logits/chosen": -0.9800617694854736, |
|
"logits/rejected": -1.1600340604782104, |
|
"logps/chosen": -3713.349853515625, |
|
"logps/rejected": -1017.238037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.153411865234375, |
|
"rewards/margins": 75.27999114990234, |
|
"rewards/rejected": -79.43340301513672, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 5.311725612781588e-16, |
|
"learning_rate": 0.00016234898018587337, |
|
"logits/chosen": -0.8364681005477905, |
|
"logits/rejected": -1.16648530960083, |
|
"logps/chosen": -2505.11962890625, |
|
"logps/rejected": -1125.0926513671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.13332176208496, |
|
"rewards/margins": 66.983642578125, |
|
"rewards/rejected": -85.11697387695312, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 9.133200582579723e-18, |
|
"learning_rate": 0.00016099020044000727, |
|
"logits/chosen": -1.0037099123001099, |
|
"logits/rejected": -1.054494857788086, |
|
"logps/chosen": -2708.493408203125, |
|
"logps/rejected": -1137.5875244140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.967913627624512, |
|
"rewards/margins": 81.5879898071289, |
|
"rewards/rejected": -88.555908203125, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 2.593275012522582e-16, |
|
"learning_rate": 0.00015961324854692254, |
|
"logits/chosen": -0.8983689546585083, |
|
"logits/rejected": -1.127020001411438, |
|
"logps/chosen": -2932.719482421875, |
|
"logps/rejected": -1062.8060302734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.111117362976074, |
|
"rewards/margins": 79.9205551147461, |
|
"rewards/rejected": -82.03167724609375, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.6770833333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00015821853477207708, |
|
"logits/chosen": -0.9519010186195374, |
|
"logits/rejected": -1.1288293600082397, |
|
"logps/chosen": -2201.759521484375, |
|
"logps/rejected": -898.9304809570312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.13864278793335, |
|
"rewards/margins": 63.858665466308594, |
|
"rewards/rejected": -69.997314453125, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 3.743392066509216e-23, |
|
"learning_rate": 0.00015680647467311557, |
|
"logits/chosen": -0.8885273337364197, |
|
"logits/rejected": -1.2975856065750122, |
|
"logps/chosen": -3025.942138671875, |
|
"logps/rejected": -998.9232177734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.540674209594727, |
|
"rewards/margins": 62.277652740478516, |
|
"rewards/rejected": -76.81832885742188, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.6979166666666666, |
|
"grad_norm": 1.5581050296373422e-17, |
|
"learning_rate": 0.0001553774889760533, |
|
"logits/chosen": -0.922539472579956, |
|
"logits/rejected": -1.1917293071746826, |
|
"logps/chosen": -2637.00439453125, |
|
"logps/rejected": -1103.3564453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.390442848205566, |
|
"rewards/margins": 80.02389526367188, |
|
"rewards/rejected": -85.41433715820312, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.7083333333333334, |
|
"grad_norm": 1.5840932732213986e-11, |
|
"learning_rate": 0.00015393200344991995, |
|
"logits/chosen": -0.9601885080337524, |
|
"logits/rejected": -1.379237174987793, |
|
"logps/chosen": -7276.7919921875, |
|
"logps/rejected": -1131.1331787109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.817126274108887, |
|
"rewards/margins": 79.90594482421875, |
|
"rewards/rejected": -85.72306823730469, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 3.447390015483765e-17, |
|
"learning_rate": 0.0001524704487799008, |
|
"logits/chosen": -0.9075456857681274, |
|
"logits/rejected": -1.2865498065948486, |
|
"logps/chosen": -4736.16015625, |
|
"logps/rejected": -1066.030029296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.429553270339966, |
|
"rewards/margins": 80.66062927246094, |
|
"rewards/rejected": -83.0901870727539, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001509932604390136, |
|
"logits/chosen": -0.9576442837715149, |
|
"logits/rejected": -1.1714496612548828, |
|
"logps/chosen": -2777.48486328125, |
|
"logps/rejected": -981.0249633789062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5866700410842896, |
|
"rewards/margins": 74.16746520996094, |
|
"rewards/rejected": -74.75413513183594, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.7395833333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014950087855835815, |
|
"logits/chosen": -0.957038164138794, |
|
"logits/rejected": -1.309131383895874, |
|
"logps/chosen": -3568.40625, |
|
"logps/rejected": -1100.7939453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.437561511993408, |
|
"rewards/margins": 76.48944854736328, |
|
"rewards/rejected": -83.92700958251953, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 2.023447353270838e-18, |
|
"learning_rate": 0.00014799374779597867, |
|
"logits/chosen": -0.879957377910614, |
|
"logits/rejected": -1.1743906736373901, |
|
"logps/chosen": -2927.203369140625, |
|
"logps/rejected": -942.46728515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.356558322906494, |
|
"rewards/margins": 65.15306854248047, |
|
"rewards/rejected": -72.50962829589844, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.7604166666666666, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014647231720437686, |
|
"logits/chosen": -0.9975137710571289, |
|
"logits/rejected": -1.096327543258667, |
|
"logps/chosen": -2194.671875, |
|
"logps/rejected": -1060.455322265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.339242458343506, |
|
"rewards/margins": 73.82234191894531, |
|
"rewards/rejected": -81.16158294677734, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.7708333333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00014493704009671613, |
|
"logits/chosen": -0.8398927450180054, |
|
"logits/rejected": -1.05116868019104, |
|
"logps/chosen": -2852.46875, |
|
"logps/rejected": -1053.2982177734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2871978282928467, |
|
"rewards/margins": 77.74524688720703, |
|
"rewards/rejected": -81.0324478149414, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 2.823407507790421e-09, |
|
"learning_rate": 0.00014338837391175582, |
|
"logits/chosen": -0.8749938607215881, |
|
"logits/rejected": -1.3326060771942139, |
|
"logps/chosen": -6237.5947265625, |
|
"logps/rejected": -1008.5339965820312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.8448486328125, |
|
"rewards/margins": 65.41060638427734, |
|
"rewards/rejected": -78.25544738769531, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.7916666666666666, |
|
"grad_norm": 2.146854222062471e-20, |
|
"learning_rate": 0.0001418267800775565, |
|
"logits/chosen": -0.9367476105690002, |
|
"logits/rejected": -1.0628546476364136, |
|
"logps/chosen": -3435.0185546875, |
|
"logps/rejected": -1105.599609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.55868911743164, |
|
"rewards/margins": 72.16712951660156, |
|
"rewards/rejected": -84.72582244873047, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.8020833333333334, |
|
"grad_norm": 4.4584097976070534e-21, |
|
"learning_rate": 0.00014025272387399674, |
|
"logits/chosen": -1.013418436050415, |
|
"logits/rejected": -1.210724115371704, |
|
"logps/chosen": -4156.4990234375, |
|
"logps/rejected": -1065.85009765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.16033363342285, |
|
"rewards/margins": 65.87461853027344, |
|
"rewards/rejected": -82.03495025634766, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 5.350105004942962e-20, |
|
"learning_rate": 0.0001386666742941419, |
|
"logits/chosen": -0.9383605718612671, |
|
"logits/rejected": -1.2753703594207764, |
|
"logps/chosen": -5303.810546875, |
|
"logps/rejected": -1006.8165893554688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.698285102844238, |
|
"rewards/margins": 73.23123168945312, |
|
"rewards/rejected": -77.92951965332031, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.8229166666666666, |
|
"grad_norm": 2.6276267314835005e-17, |
|
"learning_rate": 0.00013706910390450677, |
|
"logits/chosen": -0.928679347038269, |
|
"logits/rejected": -1.1945797204971313, |
|
"logps/chosen": -5853.28369140625, |
|
"logps/rejected": -996.289306640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.84226131439209, |
|
"rewards/margins": 66.27059173583984, |
|
"rewards/rejected": -78.11285400390625, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013546048870425356, |
|
"logits/chosen": -0.90166175365448, |
|
"logits/rejected": -1.432413935661316, |
|
"logps/chosen": -5923.66845703125, |
|
"logps/rejected": -1065.8446044921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.516674041748047, |
|
"rewards/margins": 72.67932891845703, |
|
"rewards/rejected": -82.19600677490234, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00013384130798336705, |
|
"logits/chosen": -0.9168898463249207, |
|
"logits/rejected": -1.3556921482086182, |
|
"logps/chosen": -6369.79296875, |
|
"logps/rejected": -1242.0037841796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.644824028015137, |
|
"rewards/margins": 84.6693344116211, |
|
"rewards/rejected": -96.31415557861328, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.8541666666666666, |
|
"grad_norm": 9.279878730339463e-19, |
|
"learning_rate": 0.00013221204417984908, |
|
"logits/chosen": -0.8945307731628418, |
|
"logits/rejected": -1.1446576118469238, |
|
"logps/chosen": -4128.4306640625, |
|
"logps/rejected": -958.488037109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.115054190158844, |
|
"rewards/margins": 73.7982177734375, |
|
"rewards/rejected": -73.91326904296875, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.8645833333333334, |
|
"grad_norm": 2.394110249810547e-15, |
|
"learning_rate": 0.0001305731827359753, |
|
"logits/chosen": -0.9232196807861328, |
|
"logits/rejected": -1.1997020244598389, |
|
"logps/chosen": -4594.69287109375, |
|
"logps/rejected": -1065.5748291015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.030016899108887, |
|
"rewards/margins": 74.59611511230469, |
|
"rewards/rejected": -81.62613677978516, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 1.8590894193296648e-17, |
|
"learning_rate": 0.00012892521195365678, |
|
"logits/chosen": -1.0041048526763916, |
|
"logits/rejected": -1.1207554340362549, |
|
"logps/chosen": -3463.850341796875, |
|
"logps/rejected": -1104.9521484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.943849563598633, |
|
"rewards/margins": 80.47566223144531, |
|
"rewards/rejected": -84.41950988769531, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.8854166666666666, |
|
"grad_norm": 9.305548438880714e-21, |
|
"learning_rate": 0.00012726862284894938, |
|
"logits/chosen": -0.9365824460983276, |
|
"logits/rejected": -1.378727674484253, |
|
"logps/chosen": -5006.0732421875, |
|
"logps/rejected": -1108.4298095703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 11.526544570922852, |
|
"rewards/margins": 95.75658416748047, |
|
"rewards/rejected": -84.23004913330078, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.8958333333333334, |
|
"grad_norm": 2.6055184079070218e-14, |
|
"learning_rate": 0.0001256039090057547, |
|
"logits/chosen": -0.8847850561141968, |
|
"logits/rejected": -1.2132179737091064, |
|
"logps/chosen": -3019.03466796875, |
|
"logps/rejected": -1116.0423583984375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.331069946289062, |
|
"rewards/margins": 68.22811889648438, |
|
"rewards/rejected": -85.55919647216797, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 1.337394415048057e-13, |
|
"learning_rate": 0.0001239315664287558, |
|
"logits/chosen": -0.9080939292907715, |
|
"logits/rejected": -1.1880122423171997, |
|
"logps/chosen": -6956.1767578125, |
|
"logps/rejected": -1072.7021484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3681154251098633, |
|
"rewards/margins": 78.97460174560547, |
|
"rewards/rejected": -81.34272003173828, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 1.1238145078521735e-18, |
|
"learning_rate": 0.00012225209339563145, |
|
"logits/chosen": -0.871356189250946, |
|
"logits/rejected": -1.2935049533843994, |
|
"logps/chosen": -7261.5849609375, |
|
"logps/rejected": -934.6922607421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1817047595977783, |
|
"rewards/margins": 71.47894287109375, |
|
"rewards/rejected": -72.66064453125, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.9270833333333334, |
|
"grad_norm": 3.7998657856768726e-21, |
|
"learning_rate": 0.00012056599030859366, |
|
"logits/chosen": -0.8709771633148193, |
|
"logits/rejected": -1.3064707517623901, |
|
"logps/chosen": -4282.35302734375, |
|
"logps/rejected": -1147.09765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.661895751953125, |
|
"rewards/margins": 80.48426818847656, |
|
"rewards/rejected": -88.14615631103516, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 3.9913009729067834e-17, |
|
"learning_rate": 0.00011887375954529168, |
|
"logits/chosen": -1.0120631456375122, |
|
"logits/rejected": -1.4202260971069336, |
|
"logps/chosen": -4564.931640625, |
|
"logps/rejected": -1029.2471923828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 25.81535530090332, |
|
"rewards/margins": 104.50590515136719, |
|
"rewards/rejected": -78.6905517578125, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.9479166666666666, |
|
"grad_norm": 4.153984713967665e-21, |
|
"learning_rate": 0.00011717590530912763, |
|
"logits/chosen": -0.8913455009460449, |
|
"logits/rejected": -1.2823923826217651, |
|
"logps/chosen": -3196.804443359375, |
|
"logps/rejected": -1069.353271484375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.811059951782227, |
|
"rewards/margins": 67.35678100585938, |
|
"rewards/rejected": -82.16783142089844, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.9583333333333334, |
|
"grad_norm": 4.296434017478194e-15, |
|
"learning_rate": 0.00011547293347902812, |
|
"logits/chosen": -1.0650385618209839, |
|
"logits/rejected": -1.330976963043213, |
|
"logps/chosen": -5423.759765625, |
|
"logps/rejected": -1142.623779296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.46921157836914, |
|
"rewards/margins": 78.08798217773438, |
|
"rewards/rejected": -89.55718994140625, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 6.566554551941457e-20, |
|
"learning_rate": 0.00011376535145871684, |
|
"logits/chosen": -0.9406051635742188, |
|
"logits/rejected": -1.2664682865142822, |
|
"logps/chosen": -3839.88671875, |
|
"logps/rejected": -1140.073486328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -17.9644775390625, |
|
"rewards/margins": 70.13662719726562, |
|
"rewards/rejected": -88.10110473632812, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.9791666666666666, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0001120536680255323, |
|
"logits/chosen": -0.9597834348678589, |
|
"logits/rejected": -1.1578893661499023, |
|
"logps/chosen": -2502.4541015625, |
|
"logps/rejected": -1016.5823974609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.05165565013885498, |
|
"rewards/margins": 78.03656005859375, |
|
"rewards/rejected": -78.08821105957031, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.9895833333333334, |
|
"grad_norm": 2.4043631071810274e-16, |
|
"learning_rate": 0.00011033839317883701, |
|
"logits/chosen": -0.876549482345581, |
|
"logits/rejected": -1.2577821016311646, |
|
"logps/chosen": -3624.76611328125, |
|
"logps/rejected": -1066.66015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 13.061856269836426, |
|
"rewards/margins": 95.77525329589844, |
|
"rewards/rejected": -82.7134017944336, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010862003798806196, |
|
"logits/chosen": -0.9283385276794434, |
|
"logits/rejected": -1.1488637924194336, |
|
"logps/chosen": -4157.3427734375, |
|
"logps/rejected": -1044.188232421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.248306274414062, |
|
"rewards/margins": 71.64842224121094, |
|
"rewards/rejected": -81.896728515625, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 1.0104166666666667, |
|
"grad_norm": 5.512825971716348e-20, |
|
"learning_rate": 0.00010689911444043248, |
|
"logits/chosen": -0.7668694257736206, |
|
"logits/rejected": -1.1733273267745972, |
|
"logps/chosen": -3267.61962890625, |
|
"logps/rejected": -1233.9364013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -16.425668716430664, |
|
"rewards/margins": 77.82489013671875, |
|
"rewards/rejected": -94.25055694580078, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 1.0208333333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010517613528842097, |
|
"logits/chosen": -1.0368558168411255, |
|
"logits/rejected": -1.1599735021591187, |
|
"logps/chosen": -3525.150634765625, |
|
"logps/rejected": -1166.176025390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.8862044811248779, |
|
"rewards/margins": 90.17179107666016, |
|
"rewards/rejected": -89.28559112548828, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010345161389697082, |
|
"logits/chosen": -0.7527876496315002, |
|
"logits/rejected": -1.1752800941467285, |
|
"logps/chosen": -4636.1845703125, |
|
"logps/rejected": -945.1699829101562, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 3.4130585193634033, |
|
"rewards/margins": 77.43528747558594, |
|
"rewards/rejected": -74.02222442626953, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.00010172606409053886, |
|
"logits/chosen": -0.9287230968475342, |
|
"logits/rejected": -1.0137070417404175, |
|
"logps/chosen": -2631.903564453125, |
|
"logps/rejected": -1053.97802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.378585815429688, |
|
"rewards/margins": 67.58805847167969, |
|
"rewards/rejected": -79.96664428710938, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0520833333333333, |
|
"grad_norm": 2.335907492570711e-16, |
|
"learning_rate": 0.0001, |
|
"logits/chosen": -0.9676191806793213, |
|
"logits/rejected": -1.2866928577423096, |
|
"logps/chosen": -3778.99462890625, |
|
"logps/rejected": -1169.9747314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -19.198566436767578, |
|
"rewards/margins": 72.15303802490234, |
|
"rewards/rejected": -91.35160064697266, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 3.0693231460494636e-17, |
|
"learning_rate": 9.827393590946116e-05, |
|
"logits/chosen": -0.8066754341125488, |
|
"logits/rejected": -1.3259482383728027, |
|
"logps/chosen": -4285.404296875, |
|
"logps/rejected": -1049.9791259765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.3176026344299316, |
|
"rewards/margins": 77.5743179321289, |
|
"rewards/rejected": -80.89192199707031, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 1.0729166666666667, |
|
"grad_norm": 7.908512019412167e-19, |
|
"learning_rate": 9.654838610302923e-05, |
|
"logits/chosen": -0.8500939607620239, |
|
"logits/rejected": -1.3845233917236328, |
|
"logps/chosen": -4154.744140625, |
|
"logps/rejected": -998.893798828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.2993621826171875, |
|
"rewards/margins": 79.06625366210938, |
|
"rewards/rejected": -77.76689147949219, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 1.884301279524458e-19, |
|
"learning_rate": 9.482386471157904e-05, |
|
"logits/chosen": -0.803484320640564, |
|
"logits/rejected": -1.1875181198120117, |
|
"logps/chosen": -2875.72509765625, |
|
"logps/rejected": -1111.62060546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.236685037612915, |
|
"rewards/margins": 81.31465148925781, |
|
"rewards/rejected": -83.55133819580078, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.31008855595675e-05, |
|
"logits/chosen": -0.9018905162811279, |
|
"logits/rejected": -1.1758604049682617, |
|
"logps/chosen": -2899.448486328125, |
|
"logps/rejected": -1177.0833740234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.149624824523926, |
|
"rewards/margins": 84.1700668334961, |
|
"rewards/rejected": -90.31968688964844, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 1.1041666666666667, |
|
"grad_norm": 4.704146143799659e-13, |
|
"learning_rate": 9.137996201193805e-05, |
|
"logits/chosen": -0.9577762484550476, |
|
"logits/rejected": -1.2123889923095703, |
|
"logps/chosen": -3999.40087890625, |
|
"logps/rejected": -1089.39453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.861725091934204, |
|
"rewards/margins": 79.9576187133789, |
|
"rewards/rejected": -83.81934356689453, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 1.1145833333333333, |
|
"grad_norm": 3.220187770403815e-22, |
|
"learning_rate": 8.9661606821163e-05, |
|
"logits/chosen": -0.9275646209716797, |
|
"logits/rejected": -1.140000343322754, |
|
"logps/chosen": -6079.86474609375, |
|
"logps/rejected": -1045.149658203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.459826946258545, |
|
"rewards/margins": 80.20182800292969, |
|
"rewards/rejected": -81.66165161132812, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 8.79463319744677e-05, |
|
"logits/chosen": -0.9333359003067017, |
|
"logits/rejected": -1.2084262371063232, |
|
"logps/chosen": -3093.892578125, |
|
"logps/rejected": -1011.2752685546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.587095260620117, |
|
"rewards/margins": 64.42304992675781, |
|
"rewards/rejected": -79.01013946533203, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 1.1354166666666667, |
|
"grad_norm": 4.516256854982578e-16, |
|
"learning_rate": 8.62346485412832e-05, |
|
"logits/chosen": -0.9335057735443115, |
|
"logits/rejected": -1.2948254346847534, |
|
"logps/chosen": -3485.91015625, |
|
"logps/rejected": -943.7158203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -15.117718696594238, |
|
"rewards/margins": 58.55492401123047, |
|
"rewards/rejected": -73.67264556884766, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 1.1458333333333333, |
|
"grad_norm": 1.7443403811759252e-20, |
|
"learning_rate": 8.452706652097186e-05, |
|
"logits/chosen": -0.8658100962638855, |
|
"logits/rejected": -1.2765027284622192, |
|
"logps/chosen": -7044.2666015625, |
|
"logps/rejected": -1111.2900390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.557941436767578, |
|
"rewards/margins": 70.0662841796875, |
|
"rewards/rejected": -84.62422943115234, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 8.975433514902403e-14, |
|
"learning_rate": 8.282409469087239e-05, |
|
"logits/chosen": -0.8567153215408325, |
|
"logits/rejected": -1.1527928113937378, |
|
"logps/chosen": -2753.281005859375, |
|
"logps/rejected": -1106.07861328125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.908112049102783, |
|
"rewards/margins": 77.96578979492188, |
|
"rewards/rejected": -84.8739013671875, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 1.8399236127730756e-13, |
|
"learning_rate": 8.112624045470835e-05, |
|
"logits/chosen": -0.8963369727134705, |
|
"logits/rejected": -1.1266080141067505, |
|
"logps/chosen": -1911.738037109375, |
|
"logps/rejected": -935.976806640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.368061065673828, |
|
"rewards/margins": 60.68651580810547, |
|
"rewards/rejected": -71.05458068847656, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 1.1770833333333333, |
|
"grad_norm": 4.411308711811606e-19, |
|
"learning_rate": 7.943400969140635e-05, |
|
"logits/chosen": -0.9802509546279907, |
|
"logits/rejected": -1.2022995948791504, |
|
"logps/chosen": -4026.29296875, |
|
"logps/rejected": -1028.704345703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.705430030822754, |
|
"rewards/margins": 69.18346405029297, |
|
"rewards/rejected": -78.8888931274414, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 6.856654159841134e-19, |
|
"learning_rate": 7.774790660436858e-05, |
|
"logits/chosen": -0.7806614637374878, |
|
"logits/rejected": -1.2099708318710327, |
|
"logps/chosen": -3049.276123046875, |
|
"logps/rejected": -1083.7523193359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.252618432044983, |
|
"rewards/margins": 82.09521484375, |
|
"rewards/rejected": -83.34783935546875, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 1.1979166666666667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.606843357124426e-05, |
|
"logits/chosen": -0.8244500160217285, |
|
"logits/rejected": -1.3403112888336182, |
|
"logps/chosen": -2625.35205078125, |
|
"logps/rejected": -1107.463134765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.240277290344238, |
|
"rewards/margins": 75.68666076660156, |
|
"rewards/rejected": -84.92694091796875, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 1.2083333333333333, |
|
"grad_norm": 6.782594740894701e-20, |
|
"learning_rate": 7.43960909942453e-05, |
|
"logits/chosen": -0.9496563673019409, |
|
"logits/rejected": -1.0177761316299438, |
|
"logps/chosen": -1753.7301025390625, |
|
"logps/rejected": -994.8018798828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.330022811889648, |
|
"rewards/margins": 62.15534210205078, |
|
"rewards/rejected": -75.48535919189453, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"grad_norm": 1.650893022233743e-17, |
|
"learning_rate": 7.273137715105063e-05, |
|
"logits/chosen": -0.8159215450286865, |
|
"logits/rejected": -1.11468505859375, |
|
"logps/chosen": -2901.86376953125, |
|
"logps/rejected": -1028.982666015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.9665374755859375, |
|
"rewards/margins": 75.73015594482422, |
|
"rewards/rejected": -80.69668579101562, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 1.2291666666666667, |
|
"grad_norm": 9.338665629292994e-18, |
|
"learning_rate": 7.107478804634325e-05, |
|
"logits/chosen": -0.9511147737503052, |
|
"logits/rejected": -1.131158471107483, |
|
"logps/chosen": -2839.02099609375, |
|
"logps/rejected": -916.0258178710938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 4.969090938568115, |
|
"rewards/margins": 76.42955780029297, |
|
"rewards/rejected": -71.46046447753906, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 1.2395833333333333, |
|
"grad_norm": 8.370479371910218e-23, |
|
"learning_rate": 6.942681726402473e-05, |
|
"logits/chosen": -0.945068895816803, |
|
"logits/rejected": -1.2830828428268433, |
|
"logps/chosen": -3800.778076171875, |
|
"logps/rejected": -905.9716796875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.0239747762680054, |
|
"rewards/margins": 69.88013458251953, |
|
"rewards/rejected": -68.85615539550781, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.778795582015097e-05, |
|
"logits/chosen": -0.9145641326904297, |
|
"logits/rejected": -1.1153881549835205, |
|
"logps/chosen": -2149.349365234375, |
|
"logps/rejected": -1061.110107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1018218994140625, |
|
"rewards/margins": 79.47099304199219, |
|
"rewards/rejected": -81.57280731201172, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.2604166666666667, |
|
"grad_norm": 3.655288958431413e-17, |
|
"learning_rate": 6.615869201663296e-05, |
|
"logits/chosen": -0.8868393898010254, |
|
"logits/rejected": -1.3098794221878052, |
|
"logps/chosen": -5764.04736328125, |
|
"logps/rejected": -1124.540283203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0704987049102783, |
|
"rewards/margins": 85.03953552246094, |
|
"rewards/rejected": -86.11003112792969, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 1.2708333333333333, |
|
"grad_norm": 5.505299111008798e-16, |
|
"learning_rate": 6.453951129574644e-05, |
|
"logits/chosen": -0.9295135140419006, |
|
"logits/rejected": -1.1804428100585938, |
|
"logps/chosen": -5864.943359375, |
|
"logps/rejected": -1078.9697265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.316208839416504, |
|
"rewards/margins": 71.87071228027344, |
|
"rewards/rejected": -82.18692016601562, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 6.293089609549325e-05, |
|
"logits/chosen": -0.7975695133209229, |
|
"logits/rejected": -1.3155672550201416, |
|
"logps/chosen": -3814.4150390625, |
|
"logps/rejected": -1204.483154296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.016815185546875, |
|
"rewards/margins": 94.49725341796875, |
|
"rewards/rejected": -93.48043823242188, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 1.2916666666666667, |
|
"grad_norm": 1.4148333719063387e-19, |
|
"learning_rate": 6.133332570585812e-05, |
|
"logits/chosen": -1.063108205795288, |
|
"logits/rejected": -1.2205861806869507, |
|
"logps/chosen": -2892.2685546875, |
|
"logps/rejected": -1125.844970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.142558097839355, |
|
"rewards/margins": 72.0556411743164, |
|
"rewards/rejected": -85.19820404052734, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"grad_norm": 5.8489230255285674e-21, |
|
"learning_rate": 5.9747276126003257e-05, |
|
"logits/chosen": -0.7630377411842346, |
|
"logits/rejected": -1.316693902015686, |
|
"logps/chosen": -3374.70263671875, |
|
"logps/rejected": -1142.8436279296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.068140029907227, |
|
"rewards/margins": 79.92533874511719, |
|
"rewards/rejected": -87.99348449707031, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 1.3125, |
|
"grad_norm": 1.6185688098081561e-18, |
|
"learning_rate": 5.817321992244351e-05, |
|
"logits/chosen": -0.9196380376815796, |
|
"logits/rejected": -1.2248481512069702, |
|
"logps/chosen": -3206.56591796875, |
|
"logps/rejected": -1012.30908203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.104272842407227, |
|
"rewards/margins": 65.4676284790039, |
|
"rewards/rejected": -76.5718994140625, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 1.3229166666666667, |
|
"grad_norm": 2.5687962656379646e-21, |
|
"learning_rate": 5.6611626088244194e-05, |
|
"logits/chosen": -0.9707508087158203, |
|
"logits/rejected": -1.2447017431259155, |
|
"logps/chosen": -4089.699462890625, |
|
"logps/rejected": -1120.4154052734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.00418758392334, |
|
"rewards/margins": 76.06697082519531, |
|
"rewards/rejected": -85.07115936279297, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 1.3333333333333333, |
|
"grad_norm": 2.0503403694456911e-22, |
|
"learning_rate": 5.506295990328385e-05, |
|
"logits/chosen": -0.9009501934051514, |
|
"logits/rejected": -1.353840708732605, |
|
"logps/chosen": -7221.36279296875, |
|
"logps/rejected": -1135.1197509765625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.12373661994934082, |
|
"rewards/margins": 88.40489959716797, |
|
"rewards/rejected": -88.28115844726562, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 1.34375, |
|
"grad_norm": 1.344498075173984e-21, |
|
"learning_rate": 5.3527682795623146e-05, |
|
"logits/chosen": -0.8090143799781799, |
|
"logits/rejected": -1.0533530712127686, |
|
"logps/chosen": -2006.4832763671875, |
|
"logps/rejected": -1043.121826171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.14123249053955, |
|
"rewards/margins": 70.5691146850586, |
|
"rewards/rejected": -79.71034240722656, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 1.3541666666666667, |
|
"grad_norm": 6.440810335695039e-21, |
|
"learning_rate": 5.200625220402139e-05, |
|
"logits/chosen": -0.9704806208610535, |
|
"logits/rejected": -1.0929075479507446, |
|
"logps/chosen": -2996.92822265625, |
|
"logps/rejected": -1068.793212890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -9.935484886169434, |
|
"rewards/margins": 71.94236755371094, |
|
"rewards/rejected": -81.87785339355469, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.3645833333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.0499121441641864e-05, |
|
"logits/chosen": -0.9403969049453735, |
|
"logits/rejected": -1.3582621812820435, |
|
"logps/chosen": -8281.2607421875, |
|
"logps/rejected": -1090.318115234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.1239867210388184, |
|
"rewards/margins": 84.06963348388672, |
|
"rewards/rejected": -82.94564819335938, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 1.375, |
|
"grad_norm": 1.1185308141413086e-19, |
|
"learning_rate": 4.900673956098644e-05, |
|
"logits/chosen": -1.0328657627105713, |
|
"logits/rejected": -1.1909589767456055, |
|
"logps/chosen": -1817.351318359375, |
|
"logps/rejected": -1014.7122802734375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.688718318939209, |
|
"rewards/margins": 72.41348266601562, |
|
"rewards/rejected": -78.1021957397461, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 1.3854166666666667, |
|
"grad_norm": 3.2959890386576225e-18, |
|
"learning_rate": 4.75295512200992e-05, |
|
"logits/chosen": -0.9048435091972351, |
|
"logits/rejected": -1.2165101766586304, |
|
"logps/chosen": -3945.4169921875, |
|
"logps/rejected": -1073.447265625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.457312107086182, |
|
"rewards/margins": 76.04530334472656, |
|
"rewards/rejected": -81.50261688232422, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 1.3958333333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.606799655008009e-05, |
|
"logits/chosen": -0.903038740158081, |
|
"logits/rejected": -1.4270763397216797, |
|
"logps/chosen": -5543.68017578125, |
|
"logps/rejected": -1106.446533203125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.1213319301605225, |
|
"rewards/margins": 83.86710357666016, |
|
"rewards/rejected": -86.98844146728516, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 1.40625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.462251102394669e-05, |
|
"logits/chosen": -0.8994408845901489, |
|
"logits/rejected": -1.3292877674102783, |
|
"logps/chosen": -3318.34814453125, |
|
"logps/rejected": -1010.1569213867188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.89552640914917, |
|
"rewards/margins": 67.90574645996094, |
|
"rewards/rejected": -75.80128479003906, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 1.4166666666666667, |
|
"grad_norm": 1.062754219307314e-21, |
|
"learning_rate": 4.3193525326884435e-05, |
|
"logits/chosen": -0.9541503190994263, |
|
"logits/rejected": -1.0579588413238525, |
|
"logps/chosen": -4469.5009765625, |
|
"logps/rejected": -955.7963256835938, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -12.544214248657227, |
|
"rewards/margins": 58.10245132446289, |
|
"rewards/rejected": -70.64666748046875, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 1.4270833333333333, |
|
"grad_norm": 4.9187133763885766e-17, |
|
"learning_rate": 4.1781465227922957e-05, |
|
"logits/chosen": -1.0173542499542236, |
|
"logits/rejected": -1.2005999088287354, |
|
"logps/chosen": -2528.12060546875, |
|
"logps/rejected": -1051.4796142578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2586699724197388, |
|
"rewards/margins": 77.34017944335938, |
|
"rewards/rejected": -78.59884643554688, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 1.4375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.038675145307747e-05, |
|
"logits/chosen": -0.967241644859314, |
|
"logits/rejected": -1.1873809099197388, |
|
"logps/chosen": -3900.778564453125, |
|
"logps/rejected": -1132.687744140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1816892623901367, |
|
"rewards/margins": 83.33070373535156, |
|
"rewards/rejected": -84.51239013671875, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 1.4479166666666667, |
|
"grad_norm": 2.2958917370833966e-17, |
|
"learning_rate": 3.900979955999271e-05, |
|
"logits/chosen": -0.8518582582473755, |
|
"logits/rejected": -0.9221400618553162, |
|
"logps/chosen": -1183.7716064453125, |
|
"logps/rejected": -911.7036743164062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.865546226501465, |
|
"rewards/margins": 64.08380126953125, |
|
"rewards/rejected": -70.9493408203125, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 1.4583333333333333, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.7651019814126654e-05, |
|
"logits/chosen": -0.9010441899299622, |
|
"logits/rejected": -1.190494179725647, |
|
"logps/chosen": -5493.0009765625, |
|
"logps/rejected": -1155.7314453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8245346546173096, |
|
"rewards/margins": 85.78129577636719, |
|
"rewards/rejected": -88.6058349609375, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.46875, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.6310817066511105e-05, |
|
"logits/chosen": -1.0124684572219849, |
|
"logits/rejected": -1.103775978088379, |
|
"logps/chosen": -1913.3040771484375, |
|
"logps/rejected": -1010.6784057617188, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.425868511199951, |
|
"rewards/margins": 71.89915466308594, |
|
"rewards/rejected": -77.32502746582031, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 1.4791666666666667, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.498959063312558e-05, |
|
"logits/chosen": -0.8120708465576172, |
|
"logits/rejected": -1.4091525077819824, |
|
"logps/chosen": -4661.7578125, |
|
"logps/rejected": -888.3680419921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.055895805358887, |
|
"rewards/margins": 62.75235366821289, |
|
"rewards/rejected": -67.8082504272461, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 1.4895833333333333, |
|
"grad_norm": 1.9747184031200515e-15, |
|
"learning_rate": 3.36877341759205e-05, |
|
"logits/chosen": -0.8257265090942383, |
|
"logits/rejected": -1.121315836906433, |
|
"logps/chosen": -1288.9190673828125, |
|
"logps/rejected": -972.7769775390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.115662097930908, |
|
"rewards/margins": 67.79209899902344, |
|
"rewards/rejected": -74.90776824951172, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.2405635585524565e-05, |
|
"logits/chosen": -1.0017499923706055, |
|
"logits/rejected": -1.1351391077041626, |
|
"logps/chosen": -3693.326171875, |
|
"logps/rejected": -1070.761962890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.161089897155762, |
|
"rewards/margins": 69.91401672363281, |
|
"rewards/rejected": -83.07510375976562, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 1.5104166666666665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.114367686567228e-05, |
|
"logits/chosen": -0.848055899143219, |
|
"logits/rejected": -1.0882829427719116, |
|
"logps/chosen": -2654.097412109375, |
|
"logps/rejected": -1041.62060546875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.7376070022583, |
|
"rewards/margins": 65.37726593017578, |
|
"rewards/rejected": -80.11487579345703, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 1.5208333333333335, |
|
"grad_norm": 3.5242391416192426e-14, |
|
"learning_rate": 2.9902234019385057e-05, |
|
"logits/chosen": -0.8896538019180298, |
|
"logits/rejected": -1.258103370666504, |
|
"logps/chosen": -3455.951171875, |
|
"logps/rejected": -998.981201171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.142499923706055, |
|
"rewards/margins": 64.65351104736328, |
|
"rewards/rejected": -75.79601287841797, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 1.53125, |
|
"grad_norm": 4.5005137312183197e-13, |
|
"learning_rate": 2.8681676936940393e-05, |
|
"logits/chosen": -0.9204589128494263, |
|
"logits/rejected": -1.0349059104919434, |
|
"logps/chosen": -1558.69287109375, |
|
"logps/rejected": -1021.4735107421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.636395454406738, |
|
"rewards/margins": 64.9701919555664, |
|
"rewards/rejected": -76.6065902709961, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 1.5416666666666665, |
|
"grad_norm": 3.658716396254952e-13, |
|
"learning_rate": 2.7482369285662378e-05, |
|
"logits/chosen": -0.8321296572685242, |
|
"logits/rejected": -1.1634087562561035, |
|
"logps/chosen": -2103.5849609375, |
|
"logps/rejected": -974.1217041015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -28.24515724182129, |
|
"rewards/margins": 45.907371520996094, |
|
"rewards/rejected": -74.15252685546875, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 1.5520833333333335, |
|
"grad_norm": 3.7246280947998844e-22, |
|
"learning_rate": 2.6304668401566335e-05, |
|
"logits/chosen": -0.9244425296783447, |
|
"logits/rejected": -1.3096859455108643, |
|
"logps/chosen": -5354.45458984375, |
|
"logps/rejected": -1064.54443359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.046282768249512, |
|
"rewards/margins": 76.85649871826172, |
|
"rewards/rejected": -81.90278625488281, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 1.5625, |
|
"grad_norm": 1.2950027054019793e-20, |
|
"learning_rate": 2.514892518288988e-05, |
|
"logits/chosen": -0.8687339425086975, |
|
"logits/rejected": -1.2188998460769653, |
|
"logps/chosen": -4577.24462890625, |
|
"logps/rejected": -929.9271240234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.647838115692139, |
|
"rewards/margins": 67.19004821777344, |
|
"rewards/rejected": -71.83788299560547, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.5729166666666665, |
|
"grad_norm": 3.180693751106868e-17, |
|
"learning_rate": 2.401548398554213e-05, |
|
"logits/chosen": -0.8746656775474548, |
|
"logits/rejected": -1.1335382461547852, |
|
"logps/chosen": -2621.76708984375, |
|
"logps/rejected": -1069.098876953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.797000408172607, |
|
"rewards/margins": 76.91392517089844, |
|
"rewards/rejected": -81.71092987060547, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 1.5833333333333335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.290468252050204e-05, |
|
"logits/chosen": -0.7061495780944824, |
|
"logits/rejected": -1.2625129222869873, |
|
"logps/chosen": -5062.939453125, |
|
"logps/rejected": -1201.242919921875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.179224014282227, |
|
"rewards/margins": 82.08878326416016, |
|
"rewards/rejected": -93.26800537109375, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 1.59375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.181685175319702e-05, |
|
"logits/chosen": -0.7203177213668823, |
|
"logits/rejected": -1.3191170692443848, |
|
"logps/chosen": -3958.314453125, |
|
"logps/rejected": -1011.8955078125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.7906357049942017, |
|
"rewards/margins": 78.65202331542969, |
|
"rewards/rejected": -77.86138916015625, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 1.6041666666666665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.0752315804890977e-05, |
|
"logits/chosen": -0.9195300340652466, |
|
"logits/rejected": -1.2774739265441895, |
|
"logps/chosen": -8186.68408203125, |
|
"logps/rejected": -1072.425048828125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4632796049118042, |
|
"rewards/margins": 80.83226013183594, |
|
"rewards/rejected": -82.29553985595703, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 1.6145833333333335, |
|
"grad_norm": 4.0147753867538186e-20, |
|
"learning_rate": 1.971139185611176e-05, |
|
"logits/chosen": -0.9163135886192322, |
|
"logits/rejected": -1.3071476221084595, |
|
"logps/chosen": -4530.13427734375, |
|
"logps/rejected": -1092.8966064453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.696630954742432, |
|
"rewards/margins": 80.32160949707031, |
|
"rewards/rejected": -85.01823425292969, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 1.625, |
|
"grad_norm": 1.5609576705471867e-19, |
|
"learning_rate": 1.8694390052146737e-05, |
|
"logits/chosen": -0.8604260683059692, |
|
"logits/rejected": -1.270695686340332, |
|
"logps/chosen": -3943.85595703125, |
|
"logps/rejected": -1087.4296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.993682861328125, |
|
"rewards/margins": 75.90512084960938, |
|
"rewards/rejected": -84.8988037109375, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 1.6354166666666665, |
|
"grad_norm": 2.811402606227264e-17, |
|
"learning_rate": 1.7701613410634365e-05, |
|
"logits/chosen": -0.8890691995620728, |
|
"logits/rejected": -1.3079321384429932, |
|
"logps/chosen": -7185.2529296875, |
|
"logps/rejected": -1080.0333251953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.33111572265625, |
|
"rewards/margins": 74.3923568725586, |
|
"rewards/rejected": -82.72347259521484, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 1.6458333333333335, |
|
"grad_norm": 1.0592323188268055e-13, |
|
"learning_rate": 1.6733357731279377e-05, |
|
"logits/chosen": -0.8480991721153259, |
|
"logits/rejected": -1.1103326082229614, |
|
"logps/chosen": -3468.0048828125, |
|
"logps/rejected": -1067.985595703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.42734670639038086, |
|
"rewards/margins": 82.94938659667969, |
|
"rewards/rejected": -83.3767318725586, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 1.65625, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.5789911507718826e-05, |
|
"logits/chosen": -0.7679794430732727, |
|
"logits/rejected": -1.1826531887054443, |
|
"logps/chosen": -2692.7138671875, |
|
"logps/rejected": -1040.9814453125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.65127182006836, |
|
"rewards/margins": 67.80027770996094, |
|
"rewards/rejected": -81.45155334472656, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 1.6666666666666665, |
|
"grad_norm": 2.101126106157376e-17, |
|
"learning_rate": 1.4871555841564887e-05, |
|
"logits/chosen": -0.7971072196960449, |
|
"logits/rejected": -1.2701507806777954, |
|
"logps/chosen": -2931.4375, |
|
"logps/rejected": -1022.1775512695312, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.8612060546875, |
|
"rewards/margins": 74.42487335205078, |
|
"rewards/rejected": -80.28607940673828, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.6770833333333335, |
|
"grad_norm": 1.693179950985332e-16, |
|
"learning_rate": 1.3978564358649927e-05, |
|
"logits/chosen": -1.000674843788147, |
|
"logits/rejected": -1.1274532079696655, |
|
"logps/chosen": -2341.431640625, |
|
"logps/rejected": -1146.2425537109375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8021240234375, |
|
"rewards/margins": 82.45307922363281, |
|
"rewards/rejected": -86.25519561767578, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 1.6875, |
|
"grad_norm": 1.248795501349495e-18, |
|
"learning_rate": 1.311120312749935e-05, |
|
"logits/chosen": -0.9954484701156616, |
|
"logits/rejected": -1.0421305894851685, |
|
"logps/chosen": -1413.7686767578125, |
|
"logps/rejected": -919.8433837890625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.1903657913208, |
|
"rewards/margins": 60.226654052734375, |
|
"rewards/rejected": -70.4170150756836, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 1.6979166666666665, |
|
"grad_norm": 7.506130692859752e-17, |
|
"learning_rate": 1.2269730580055805e-05, |
|
"logits/chosen": -0.7554247975349426, |
|
"logits/rejected": -1.1726787090301514, |
|
"logps/chosen": -2398.90234375, |
|
"logps/rejected": -1041.2353515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.836740493774414, |
|
"rewards/margins": 68.93004608154297, |
|
"rewards/rejected": -79.76679229736328, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 1.7083333333333335, |
|
"grad_norm": 3.144996540161571e-16, |
|
"learning_rate": 1.1454397434679021e-05, |
|
"logits/chosen": -0.8778572082519531, |
|
"logits/rejected": -1.1429264545440674, |
|
"logps/chosen": -2819.60009765625, |
|
"logps/rejected": -1024.1851806640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5772266387939453, |
|
"rewards/margins": 75.2021713256836, |
|
"rewards/rejected": -78.7793960571289, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 1.71875, |
|
"grad_norm": 4.54889006982497e-15, |
|
"learning_rate": 1.0665446621443708e-05, |
|
"logits/chosen": -1.0242542028427124, |
|
"logits/rejected": -1.2949650287628174, |
|
"logps/chosen": -6388.978515625, |
|
"logps/rejected": -1101.523193359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 23.088947296142578, |
|
"rewards/margins": 107.18841552734375, |
|
"rewards/rejected": -84.09947204589844, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 1.7291666666666665, |
|
"grad_norm": 1.2591869387457155e-11, |
|
"learning_rate": 9.903113209758096e-06, |
|
"logits/chosen": -1.0302661657333374, |
|
"logits/rejected": -1.0212035179138184, |
|
"logps/chosen": -1567.7783203125, |
|
"logps/rejected": -1051.6270751953125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.1559648513793945, |
|
"rewards/margins": 72.15129089355469, |
|
"rewards/rejected": -78.30725860595703, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 1.7395833333333335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.1676243383246e-06, |
|
"logits/chosen": -0.9699329137802124, |
|
"logits/rejected": -1.217832088470459, |
|
"logps/chosen": -3370.20361328125, |
|
"logps/rejected": -1056.4344482421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.4962918758392334, |
|
"rewards/margins": 82.08119201660156, |
|
"rewards/rejected": -80.58489990234375, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"grad_norm": 6.902272136647668e-19, |
|
"learning_rate": 8.45919914746337e-06, |
|
"logits/chosen": -0.8314164876937866, |
|
"logits/rejected": -1.1248615980148315, |
|
"logps/chosen": -2003.40185546875, |
|
"logps/rejected": -976.593017578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.036044120788574, |
|
"rewards/margins": 70.12460327148438, |
|
"rewards/rejected": -75.16064453125, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 1.7604166666666665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.778048713818975e-06, |
|
"logits/chosen": -1.0096337795257568, |
|
"logits/rejected": -1.224050521850586, |
|
"logps/chosen": -3408.56396484375, |
|
"logps/rejected": -1120.5277099609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.887763977050781, |
|
"rewards/margins": 79.56890869140625, |
|
"rewards/rejected": -85.45668029785156, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 1.7708333333333335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 7.124375987469767e-06, |
|
"logits/chosen": -0.7806891798973083, |
|
"logits/rejected": -1.4077966213226318, |
|
"logps/chosen": -4579.115234375, |
|
"logps/rejected": -1092.3642578125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.048169136047363, |
|
"rewards/margins": 79.57754516601562, |
|
"rewards/rejected": -84.6257095336914, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.78125, |
|
"grad_norm": 8.991780414846024e-21, |
|
"learning_rate": 6.498375731458528e-06, |
|
"logits/chosen": -0.8369664549827576, |
|
"logits/rejected": -1.2401611804962158, |
|
"logps/chosen": -4373.06640625, |
|
"logps/rejected": -1185.349365234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.959744453430176, |
|
"rewards/margins": 80.5737533569336, |
|
"rewards/rejected": -91.53350067138672, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 1.7916666666666665, |
|
"grad_norm": 3.254465541259741e-20, |
|
"learning_rate": 5.900234463762366e-06, |
|
"logits/chosen": -0.9217289090156555, |
|
"logits/rejected": -1.3291540145874023, |
|
"logps/chosen": -4704.79736328125, |
|
"logps/rejected": -1218.4345703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -18.816030502319336, |
|
"rewards/margins": 75.90402221679688, |
|
"rewards/rejected": -94.72004699707031, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 1.8020833333333335, |
|
"grad_norm": 6.992826570984174e-20, |
|
"learning_rate": 5.3301304017194135e-06, |
|
"logits/chosen": -0.8925175666809082, |
|
"logits/rejected": -1.3186287879943848, |
|
"logps/chosen": -5796.84375, |
|
"logps/rejected": -1010.6710815429688, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.582611560821533, |
|
"rewards/margins": 71.03655242919922, |
|
"rewards/rejected": -78.61917114257812, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 1.8125, |
|
"grad_norm": 0.0, |
|
"learning_rate": 4.788233408928589e-06, |
|
"logits/chosen": -0.9071134328842163, |
|
"logits/rejected": -1.1578150987625122, |
|
"logps/chosen": -3065.755615234375, |
|
"logps/rejected": -1122.5201416015625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.236775398254395, |
|
"rewards/margins": 73.01652526855469, |
|
"rewards/rejected": -87.2532958984375, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 1.8229166666666665, |
|
"grad_norm": 5.8420251147017e-20, |
|
"learning_rate": 4.27470494463843e-06, |
|
"logits/chosen": -0.8542720079421997, |
|
"logits/rejected": -1.22062087059021, |
|
"logps/chosen": -4755.65087890625, |
|
"logps/rejected": -1087.69970703125, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 1.7637419700622559, |
|
"rewards/margins": 85.65948486328125, |
|
"rewards/rejected": -83.89574432373047, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 1.8333333333333335, |
|
"grad_norm": 4.5275721134567166e-20, |
|
"learning_rate": 3.789698015639953e-06, |
|
"logits/chosen": -0.8436378836631775, |
|
"logits/rejected": -1.2432456016540527, |
|
"logps/chosen": -7707.376953125, |
|
"logps/rejected": -1006.131103515625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.132122755050659, |
|
"rewards/margins": 74.2339096069336, |
|
"rewards/rejected": -76.36602783203125, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 1.84375, |
|
"grad_norm": 0.0, |
|
"learning_rate": 3.3333571306780497e-06, |
|
"logits/chosen": -1.1021344661712646, |
|
"logits/rejected": -1.146433711051941, |
|
"logps/chosen": -2144.27734375, |
|
"logps/rejected": -1221.460693359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.952573776245117, |
|
"rewards/margins": 81.55621337890625, |
|
"rewards/rejected": -95.5087890625, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 1.8541666666666665, |
|
"grad_norm": 3.1447959791819358e-09, |
|
"learning_rate": 2.905818257394799e-06, |
|
"logits/chosen": -0.9445458650588989, |
|
"logits/rejected": -1.1954491138458252, |
|
"logps/chosen": -3473.337158203125, |
|
"logps/rejected": -898.44140625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.790580749511719, |
|
"rewards/margins": 61.649497985839844, |
|
"rewards/rejected": -68.44007873535156, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 1.8645833333333335, |
|
"grad_norm": 3.4308754787927946e-22, |
|
"learning_rate": 2.5072087818176382e-06, |
|
"logits/chosen": -0.9306582808494568, |
|
"logits/rejected": -1.2371917963027954, |
|
"logps/chosen": -3138.842041015625, |
|
"logps/rejected": -1054.2342529296875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.166943550109863, |
|
"rewards/margins": 68.14605712890625, |
|
"rewards/rejected": -81.31299591064453, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 1.875, |
|
"grad_norm": 0.0, |
|
"learning_rate": 2.137647470404469e-06, |
|
"logits/chosen": -1.0094091892242432, |
|
"logits/rejected": -1.3734456300735474, |
|
"logps/chosen": -6787.31982421875, |
|
"logps/rejected": -909.1167602539062, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7543637752532959, |
|
"rewards/margins": 70.34402465820312, |
|
"rewards/rejected": -71.09838104248047, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.8854166666666665, |
|
"grad_norm": 6.411009053864542e-20, |
|
"learning_rate": 1.797244434656975e-06, |
|
"logits/chosen": -1.0234758853912354, |
|
"logits/rejected": -1.2576205730438232, |
|
"logps/chosen": -2733.744384765625, |
|
"logps/rejected": -1061.349609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 2.5823726654052734, |
|
"rewards/margins": 84.70896911621094, |
|
"rewards/rejected": -82.12660217285156, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 1.8958333333333335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.48610109831262e-06, |
|
"logits/chosen": -0.8497348427772522, |
|
"logits/rejected": -1.0572631359100342, |
|
"logps/chosen": -2718.9326171875, |
|
"logps/rejected": -961.6370849609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.9914793968200684, |
|
"rewards/margins": 75.01031494140625, |
|
"rewards/rejected": -74.01884460449219, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 1.90625, |
|
"grad_norm": 2.2827734383898875e-18, |
|
"learning_rate": 1.2043101671253554e-06, |
|
"logits/chosen": -0.9114844799041748, |
|
"logits/rejected": -1.1452364921569824, |
|
"logps/chosen": -1678.3172607421875, |
|
"logps/rejected": -1151.85400390625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -14.129800796508789, |
|
"rewards/margins": 74.070556640625, |
|
"rewards/rejected": -88.20036315917969, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 1.9166666666666665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 9.519556012436815e-07, |
|
"logits/chosen": -0.9810994267463684, |
|
"logits/rejected": -1.067641019821167, |
|
"logps/chosen": -2078.021728515625, |
|
"logps/rejected": -1072.85498046875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -13.242856979370117, |
|
"rewards/margins": 66.07984161376953, |
|
"rewards/rejected": -79.32270050048828, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 1.9270833333333335, |
|
"grad_norm": 1.8167316177431924e-16, |
|
"learning_rate": 7.291125901946027e-07, |
|
"logits/chosen": -0.8321889042854309, |
|
"logits/rejected": -1.1509894132614136, |
|
"logps/chosen": -3819.90966796875, |
|
"logps/rejected": -1095.14306640625, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -10.86672592163086, |
|
"rewards/margins": 71.09647369384766, |
|
"rewards/rejected": -81.96320343017578, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 1.9375, |
|
"grad_norm": 1.0820013757477458e-17, |
|
"learning_rate": 5.358475304807375e-07, |
|
"logits/chosen": -1.0654444694519043, |
|
"logits/rejected": -1.2142765522003174, |
|
"logps/chosen": -7236.927734375, |
|
"logps/rejected": -949.28857421875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7587647438049316, |
|
"rewards/margins": 70.48192596435547, |
|
"rewards/rejected": -73.24069213867188, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 1.9479166666666665, |
|
"grad_norm": 2.45197261230215e-17, |
|
"learning_rate": 3.7221800579735346e-07, |
|
"logits/chosen": -0.8213499188423157, |
|
"logits/rejected": -1.2857908010482788, |
|
"logps/chosen": -6349.48046875, |
|
"logps/rejected": -1107.80615234375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.317949295043945, |
|
"rewards/margins": 78.53043365478516, |
|
"rewards/rejected": -84.84838104248047, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 1.9583333333333335, |
|
"grad_norm": 2.0503403694456911e-22, |
|
"learning_rate": 2.382727698752474e-07, |
|
"logits/chosen": -0.8970575332641602, |
|
"logits/rejected": -1.145188570022583, |
|
"logps/chosen": -2160.99365234375, |
|
"logps/rejected": -1063.6136474609375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.6992769241333, |
|
"rewards/margins": 72.37908935546875, |
|
"rewards/rejected": -81.07836151123047, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 1.96875, |
|
"grad_norm": 1.2547314776381705e-16, |
|
"learning_rate": 1.340517319543877e-07, |
|
"logits/chosen": -0.8810423612594604, |
|
"logits/rejected": -1.099363088607788, |
|
"logps/chosen": -2089.138671875, |
|
"logps/rejected": -1003.574951171875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -11.649038314819336, |
|
"rewards/margins": 64.49651336669922, |
|
"rewards/rejected": -76.14555358886719, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 1.9791666666666665, |
|
"grad_norm": 0.0, |
|
"learning_rate": 5.958594489295921e-08, |
|
"logits/chosen": -0.9419525861740112, |
|
"logits/rejected": -1.2568933963775635, |
|
"logps/chosen": -3585.5712890625, |
|
"logps/rejected": -979.148193359375, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2342803478240967, |
|
"rewards/margins": 71.78946685791016, |
|
"rewards/rejected": -74.02375030517578, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.9895833333333335, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1.4897595915053242e-08, |
|
"logits/chosen": -0.9514296054840088, |
|
"logits/rejected": -1.1773942708969116, |
|
"logps/chosen": -3752.94091796875, |
|
"logps/rejected": -1072.39013671875, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.917599678039551, |
|
"rewards/margins": 77.99908447265625, |
|
"rewards/rejected": -81.91668701171875, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"grad_norm": 5.172685426425421e-20, |
|
"learning_rate": 0.0, |
|
"logits/chosen": -0.8919022083282471, |
|
"logits/rejected": -1.0698691606521606, |
|
"logps/chosen": -1968.748046875, |
|
"logps/rejected": -921.0995483398438, |
|
"loss": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -8.440521240234375, |
|
"rewards/margins": 60.94007110595703, |
|
"rewards/rejected": -69.3805923461914, |
|
"step": 192 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 192, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 96, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|