|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.0, |
|
"eval_steps": 500, |
|
"global_step": 1388, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.5971223021582734e-09, |
|
"logits/chosen": -2.8839163780212402, |
|
"logits/rejected": -2.699483633041382, |
|
"logps/chosen": -106.361572265625, |
|
"logps/rejected": -50.8937873840332, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.597122302158273e-08, |
|
"logits/chosen": -2.9716877937316895, |
|
"logits/rejected": -2.8243343830108643, |
|
"logps/chosen": -148.80015563964844, |
|
"logps/rejected": -84.43142700195312, |
|
"loss": 0.6918, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": 0.006020313128829002, |
|
"rewards/margins": 0.0030713342130184174, |
|
"rewards/rejected": 0.0029489779844880104, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 7.194244604316546e-08, |
|
"logits/chosen": -2.9206809997558594, |
|
"logits/rejected": -2.7788352966308594, |
|
"logps/chosen": -167.4009246826172, |
|
"logps/rejected": -95.04873657226562, |
|
"loss": 0.6525, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": 0.06761552393436432, |
|
"rewards/margins": 0.0887872725725174, |
|
"rewards/rejected": -0.021171752363443375, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 1.0791366906474819e-07, |
|
"logits/chosen": -2.907208204269409, |
|
"logits/rejected": -2.7389509677886963, |
|
"logps/chosen": -128.09487915039062, |
|
"logps/rejected": -80.83646392822266, |
|
"loss": 0.5577, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.2007008045911789, |
|
"rewards/margins": 0.2701273560523987, |
|
"rewards/rejected": -0.06942657381296158, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 1.4388489208633092e-07, |
|
"logits/chosen": -2.9200387001037598, |
|
"logits/rejected": -2.8407883644104004, |
|
"logps/chosen": -148.62106323242188, |
|
"logps/rejected": -105.0569839477539, |
|
"loss": 0.3744, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.6417331099510193, |
|
"rewards/margins": 1.1058695316314697, |
|
"rewards/rejected": -0.46413642168045044, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.7985611510791365e-07, |
|
"logits/chosen": -2.7872376441955566, |
|
"logits/rejected": -2.709198236465454, |
|
"logps/chosen": -146.15286254882812, |
|
"logps/rejected": -104.78489685058594, |
|
"loss": 0.2995, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": 0.24810883402824402, |
|
"rewards/margins": 1.5657349824905396, |
|
"rewards/rejected": -1.3176262378692627, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 2.1582733812949638e-07, |
|
"logits/chosen": -2.8873581886291504, |
|
"logits/rejected": -2.7115185260772705, |
|
"logps/chosen": -146.1516571044922, |
|
"logps/rejected": -108.72274017333984, |
|
"loss": 0.1946, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.5725937485694885, |
|
"rewards/margins": 2.431591033935547, |
|
"rewards/rejected": -1.8589973449707031, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 2.517985611510791e-07, |
|
"logits/chosen": -2.838667392730713, |
|
"logits/rejected": -2.7343862056732178, |
|
"logps/chosen": -130.49063110351562, |
|
"logps/rejected": -113.7320327758789, |
|
"loss": 0.1533, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.05752415582537651, |
|
"rewards/margins": 2.663848400115967, |
|
"rewards/rejected": -2.6063244342803955, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 2.8776978417266184e-07, |
|
"logits/chosen": -2.8950698375701904, |
|
"logits/rejected": -2.691622495651245, |
|
"logps/chosen": -138.45028686523438, |
|
"logps/rejected": -100.14655303955078, |
|
"loss": 0.1717, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": 0.42671164870262146, |
|
"rewards/margins": 2.7645459175109863, |
|
"rewards/rejected": -2.337834358215332, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 3.2374100719424457e-07, |
|
"logits/chosen": -2.7722439765930176, |
|
"logits/rejected": -2.690833330154419, |
|
"logps/chosen": -135.5113067626953, |
|
"logps/rejected": -121.447021484375, |
|
"loss": 0.1075, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": 0.4925897717475891, |
|
"rewards/margins": 4.084370136260986, |
|
"rewards/rejected": -3.591780185699463, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 3.597122302158273e-07, |
|
"logits/chosen": -2.888807773590088, |
|
"logits/rejected": -2.7131495475769043, |
|
"logps/chosen": -164.85647583007812, |
|
"logps/rejected": -124.16983795166016, |
|
"loss": 0.1169, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.2238633632659912, |
|
"rewards/margins": 3.586012601852417, |
|
"rewards/rejected": -3.362149715423584, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 3.9568345323741003e-07, |
|
"logits/chosen": -2.7212650775909424, |
|
"logits/rejected": -2.576204538345337, |
|
"logps/chosen": -136.82293701171875, |
|
"logps/rejected": -123.34732818603516, |
|
"loss": 0.0757, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.43352875113487244, |
|
"rewards/margins": 4.669638633728027, |
|
"rewards/rejected": -4.236109733581543, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 4.3165467625899276e-07, |
|
"logits/chosen": -2.7657124996185303, |
|
"logits/rejected": -2.6242692470550537, |
|
"logps/chosen": -165.17178344726562, |
|
"logps/rejected": -137.9097442626953, |
|
"loss": 0.0741, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4255678057670593, |
|
"rewards/margins": 4.908309459686279, |
|
"rewards/rejected": -4.482741355895996, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.676258992805755e-07, |
|
"logits/chosen": -2.7995333671569824, |
|
"logits/rejected": -2.628948926925659, |
|
"logps/chosen": -143.13916015625, |
|
"logps/rejected": -135.01576232910156, |
|
"loss": 0.0557, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.4051126539707184, |
|
"rewards/margins": 6.454569339752197, |
|
"rewards/rejected": -6.0494561195373535, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 4.99599679743795e-07, |
|
"logits/chosen": -2.856595516204834, |
|
"logits/rejected": -2.64650297164917, |
|
"logps/chosen": -182.11863708496094, |
|
"logps/rejected": -170.402587890625, |
|
"loss": 0.0904, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6085208058357239, |
|
"rewards/margins": 5.310309886932373, |
|
"rewards/rejected": -5.918830871582031, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 4.955964771817453e-07, |
|
"logits/chosen": -2.720083475112915, |
|
"logits/rejected": -2.5524630546569824, |
|
"logps/chosen": -138.2317352294922, |
|
"logps/rejected": -122.82208251953125, |
|
"loss": 0.0887, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.38773685693740845, |
|
"rewards/margins": 3.9186530113220215, |
|
"rewards/rejected": -4.306389808654785, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 4.915932746196957e-07, |
|
"logits/chosen": -2.6586403846740723, |
|
"logits/rejected": -2.5184950828552246, |
|
"logps/chosen": -146.91192626953125, |
|
"logps/rejected": -146.19537353515625, |
|
"loss": 0.0528, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4298267364501953, |
|
"rewards/margins": 5.707052707672119, |
|
"rewards/rejected": -6.136878490447998, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 4.875900720576461e-07, |
|
"logits/chosen": -2.7607617378234863, |
|
"logits/rejected": -2.59885573387146, |
|
"logps/chosen": -161.85403442382812, |
|
"logps/rejected": -172.4330596923828, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.16717226803302765, |
|
"rewards/margins": 7.492938041687012, |
|
"rewards/rejected": -7.325766086578369, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 4.835868694955965e-07, |
|
"logits/chosen": -2.6541225910186768, |
|
"logits/rejected": -2.5471792221069336, |
|
"logps/chosen": -148.55508422851562, |
|
"logps/rejected": -166.07257080078125, |
|
"loss": 0.0758, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -1.3034805059432983, |
|
"rewards/margins": 6.427194118499756, |
|
"rewards/rejected": -7.730674743652344, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 4.795836669335467e-07, |
|
"logits/chosen": -2.652890205383301, |
|
"logits/rejected": -2.4126124382019043, |
|
"logps/chosen": -134.67652893066406, |
|
"logps/rejected": -130.59608459472656, |
|
"loss": 0.0909, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.3820854127407074, |
|
"rewards/margins": 5.234072685241699, |
|
"rewards/rejected": -5.616158485412598, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 4.755804643714972e-07, |
|
"logits/chosen": -2.6649863719940186, |
|
"logits/rejected": -2.4534084796905518, |
|
"logps/chosen": -154.67408752441406, |
|
"logps/rejected": -138.3970947265625, |
|
"loss": 0.1012, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.47632989287376404, |
|
"rewards/margins": 5.756840229034424, |
|
"rewards/rejected": -5.2805094718933105, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 4.715772618094475e-07, |
|
"logits/chosen": -2.7960267066955566, |
|
"logits/rejected": -2.5353095531463623, |
|
"logps/chosen": -171.26986694335938, |
|
"logps/rejected": -157.50350952148438, |
|
"loss": 0.0516, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.2727116644382477, |
|
"rewards/margins": 5.267422676086426, |
|
"rewards/rejected": -5.540134429931641, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 4.675740592473979e-07, |
|
"logits/chosen": -2.5903918743133545, |
|
"logits/rejected": -2.481639862060547, |
|
"logps/chosen": -164.92575073242188, |
|
"logps/rejected": -157.99099731445312, |
|
"loss": 0.0328, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6272125244140625, |
|
"rewards/margins": 6.131289005279541, |
|
"rewards/rejected": -6.7585015296936035, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 4.635708566853482e-07, |
|
"logits/chosen": -2.683683156967163, |
|
"logits/rejected": -2.465529441833496, |
|
"logps/chosen": -179.80831909179688, |
|
"logps/rejected": -153.77401733398438, |
|
"loss": 0.0659, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.9624654054641724, |
|
"rewards/margins": 5.732499599456787, |
|
"rewards/rejected": -6.6949663162231445, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 4.595676541232986e-07, |
|
"logits/chosen": -2.538198709487915, |
|
"logits/rejected": -2.472057580947876, |
|
"logps/chosen": -134.72840881347656, |
|
"logps/rejected": -163.64105224609375, |
|
"loss": 0.0385, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.4930785596370697, |
|
"rewards/margins": 7.688417911529541, |
|
"rewards/rejected": -8.181497573852539, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 4.5556445156124894e-07, |
|
"logits/chosen": -2.7781453132629395, |
|
"logits/rejected": -2.5276522636413574, |
|
"logps/chosen": -143.19754028320312, |
|
"logps/rejected": -150.41925048828125, |
|
"loss": 0.0375, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.8589959144592285, |
|
"rewards/margins": 6.875401973724365, |
|
"rewards/rejected": -7.73439884185791, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 4.515612489991993e-07, |
|
"logits/chosen": -2.6978352069854736, |
|
"logits/rejected": -2.4410510063171387, |
|
"logps/chosen": -163.37667846679688, |
|
"logps/rejected": -157.6444854736328, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5269836783409119, |
|
"rewards/margins": 6.9025750160217285, |
|
"rewards/rejected": -7.429558753967285, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 4.4755804643714965e-07, |
|
"logits/chosen": -2.6981711387634277, |
|
"logits/rejected": -2.4240591526031494, |
|
"logps/chosen": -145.78114318847656, |
|
"logps/rejected": -153.4759521484375, |
|
"loss": 0.0466, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.961786150932312, |
|
"rewards/margins": 6.817984580993652, |
|
"rewards/rejected": -7.779770851135254, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 4.4355484387510004e-07, |
|
"logits/chosen": -2.6088438034057617, |
|
"logits/rejected": -2.49806809425354, |
|
"logps/chosen": -167.5294952392578, |
|
"logps/rejected": -196.126953125, |
|
"loss": 0.0326, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.763117790222168, |
|
"rewards/margins": 8.220617294311523, |
|
"rewards/rejected": -9.983735084533691, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 4.3955164131305047e-07, |
|
"logits/chosen": -2.692411184310913, |
|
"logits/rejected": -2.426178455352783, |
|
"logps/chosen": -149.58558654785156, |
|
"logps/rejected": -154.73678588867188, |
|
"loss": 0.0126, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.13495102524757385, |
|
"rewards/margins": 8.114767074584961, |
|
"rewards/rejected": -8.249719619750977, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 4.355484387510008e-07, |
|
"logits/chosen": -2.507620334625244, |
|
"logits/rejected": -2.3317294120788574, |
|
"logps/chosen": -165.7401123046875, |
|
"logps/rejected": -168.38839721679688, |
|
"loss": 0.0765, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -2.2425320148468018, |
|
"rewards/margins": 5.922076225280762, |
|
"rewards/rejected": -8.164608001708984, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 4.315452361889512e-07, |
|
"logits/chosen": -2.6149442195892334, |
|
"logits/rejected": -2.3606739044189453, |
|
"logps/chosen": -135.47378540039062, |
|
"logps/rejected": -146.0089569091797, |
|
"loss": 0.0298, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.3370214700698853, |
|
"rewards/margins": 6.675353050231934, |
|
"rewards/rejected": -8.012373924255371, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 4.275420336269015e-07, |
|
"logits/chosen": -2.5880959033966064, |
|
"logits/rejected": -2.35605788230896, |
|
"logps/chosen": -182.91497802734375, |
|
"logps/rejected": -189.45333862304688, |
|
"loss": 0.0479, |
|
"rewards/accuracies": 0.925000011920929, |
|
"rewards/chosen": -2.185288190841675, |
|
"rewards/margins": 7.346780300140381, |
|
"rewards/rejected": -9.532068252563477, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 4.235388310648519e-07, |
|
"logits/chosen": -2.5866305828094482, |
|
"logits/rejected": -2.3037662506103516, |
|
"logps/chosen": -184.96115112304688, |
|
"logps/rejected": -184.9438934326172, |
|
"loss": 0.0831, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.369056224822998, |
|
"rewards/margins": 7.428493499755859, |
|
"rewards/rejected": -10.797548294067383, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 4.1953562850280223e-07, |
|
"logits/chosen": -2.519735336303711, |
|
"logits/rejected": -2.334322690963745, |
|
"logps/chosen": -173.40858459472656, |
|
"logps/rejected": -215.0013885498047, |
|
"loss": 0.031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5725579261779785, |
|
"rewards/margins": 9.760233879089355, |
|
"rewards/rejected": -12.332793235778809, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 4.155324259407526e-07, |
|
"logits/chosen": -2.586958408355713, |
|
"logits/rejected": -2.3933067321777344, |
|
"logps/chosen": -180.2490234375, |
|
"logps/rejected": -214.9405059814453, |
|
"loss": 0.0285, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.519786834716797, |
|
"rewards/margins": 9.661985397338867, |
|
"rewards/rejected": -12.181772232055664, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 4.1152922337870295e-07, |
|
"logits/chosen": -2.4591023921966553, |
|
"logits/rejected": -2.2511239051818848, |
|
"logps/chosen": -131.71694946289062, |
|
"logps/rejected": -165.09132385253906, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7444407939910889, |
|
"rewards/margins": 9.10871410369873, |
|
"rewards/rejected": -9.853155136108398, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 4.0752602081665333e-07, |
|
"logits/chosen": -2.651655912399292, |
|
"logits/rejected": -2.4027347564697266, |
|
"logps/chosen": -175.1227569580078, |
|
"logps/rejected": -186.62240600585938, |
|
"loss": 0.0247, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8536550998687744, |
|
"rewards/margins": 8.069160461425781, |
|
"rewards/rejected": -9.922816276550293, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 4.0352281825460366e-07, |
|
"logits/chosen": -2.515545606613159, |
|
"logits/rejected": -2.379769802093506, |
|
"logps/chosen": -159.44683837890625, |
|
"logps/rejected": -185.20753479003906, |
|
"loss": 0.0349, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.584536612033844, |
|
"rewards/margins": 8.809714317321777, |
|
"rewards/rejected": -9.394251823425293, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 3.9951961569255404e-07, |
|
"logits/chosen": -2.880056142807007, |
|
"logits/rejected": -2.5667223930358887, |
|
"logps/chosen": -166.15879821777344, |
|
"logps/rejected": -169.60914611816406, |
|
"loss": 0.0666, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2807844877243042, |
|
"rewards/margins": 7.306063652038574, |
|
"rewards/rejected": -8.586848258972168, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 3.9551641313050437e-07, |
|
"logits/chosen": -2.6910109519958496, |
|
"logits/rejected": -2.438204526901245, |
|
"logps/chosen": -167.52279663085938, |
|
"logps/rejected": -193.27291870117188, |
|
"loss": 0.0591, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.8132003545761108, |
|
"rewards/margins": 8.618528366088867, |
|
"rewards/rejected": -10.431727409362793, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 3.9151321056845476e-07, |
|
"logits/chosen": -2.5091681480407715, |
|
"logits/rejected": -2.2911810874938965, |
|
"logps/chosen": -140.93154907226562, |
|
"logps/rejected": -179.24794006347656, |
|
"loss": 0.0231, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7046592235565186, |
|
"rewards/margins": 9.365106582641602, |
|
"rewards/rejected": -11.0697660446167, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 3.875100080064051e-07, |
|
"logits/chosen": -2.5162720680236816, |
|
"logits/rejected": -2.318171501159668, |
|
"logps/chosen": -140.7128143310547, |
|
"logps/rejected": -169.2827911376953, |
|
"loss": 0.0437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0878578424453735, |
|
"rewards/margins": 8.321041107177734, |
|
"rewards/rejected": -9.408899307250977, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 3.8350680544435547e-07, |
|
"logits/chosen": -2.5058627128601074, |
|
"logits/rejected": -2.285526752471924, |
|
"logps/chosen": -158.39208984375, |
|
"logps/rejected": -191.4017791748047, |
|
"loss": 0.0205, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.077756643295288, |
|
"rewards/margins": 9.962626457214355, |
|
"rewards/rejected": -11.040384292602539, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 3.795036028823058e-07, |
|
"logits/chosen": -2.5467917919158936, |
|
"logits/rejected": -2.293295383453369, |
|
"logps/chosen": -165.51400756835938, |
|
"logps/rejected": -176.27957153320312, |
|
"loss": 0.0429, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.651084542274475, |
|
"rewards/margins": 8.067974090576172, |
|
"rewards/rejected": -9.719058990478516, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 3.755004003202562e-07, |
|
"logits/chosen": -2.5901718139648438, |
|
"logits/rejected": -2.3814101219177246, |
|
"logps/chosen": -149.50888061523438, |
|
"logps/rejected": -192.05587768554688, |
|
"loss": 0.0323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6450309753417969, |
|
"rewards/margins": 9.971355438232422, |
|
"rewards/rejected": -11.616386413574219, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 3.714971977582065e-07, |
|
"logits/chosen": -2.676997661590576, |
|
"logits/rejected": -2.5134191513061523, |
|
"logps/chosen": -156.29513549804688, |
|
"logps/rejected": -190.77088928222656, |
|
"loss": 0.0357, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1767628192901611, |
|
"rewards/margins": 9.212038040161133, |
|
"rewards/rejected": -10.388800621032715, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 3.674939951961569e-07, |
|
"logits/chosen": -2.75260853767395, |
|
"logits/rejected": -2.4446868896484375, |
|
"logps/chosen": -167.54098510742188, |
|
"logps/rejected": -198.13467407226562, |
|
"loss": 0.0099, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5461426973342896, |
|
"rewards/margins": 10.665987968444824, |
|
"rewards/rejected": -11.212130546569824, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 3.634907926341073e-07, |
|
"logits/chosen": -2.7042384147644043, |
|
"logits/rejected": -2.5021824836730957, |
|
"logps/chosen": -177.01632690429688, |
|
"logps/rejected": -194.0543670654297, |
|
"loss": 0.0369, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1976065635681152, |
|
"rewards/margins": 7.7528533935546875, |
|
"rewards/rejected": -9.950460433959961, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 3.5948759007205767e-07, |
|
"logits/chosen": -2.609654188156128, |
|
"logits/rejected": -2.442094326019287, |
|
"logps/chosen": -186.2327423095703, |
|
"logps/rejected": -238.8030242919922, |
|
"loss": 0.0343, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.969175934791565, |
|
"rewards/margins": 12.21094036102295, |
|
"rewards/rejected": -14.18011474609375, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 3.55484387510008e-07, |
|
"logits/chosen": -2.5849597454071045, |
|
"logits/rejected": -2.4151904582977295, |
|
"logps/chosen": -184.76492309570312, |
|
"logps/rejected": -203.13241577148438, |
|
"loss": 0.0268, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -2.5779895782470703, |
|
"rewards/margins": 8.900407791137695, |
|
"rewards/rejected": -11.478398323059082, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"eval_logits/chosen": -2.4085986614227295, |
|
"eval_logits/rejected": -2.233201026916504, |
|
"eval_logps/chosen": -157.914306640625, |
|
"eval_logps/rejected": -183.62203979492188, |
|
"eval_loss": 0.03143342584371567, |
|
"eval_rewards/accuracies": 0.9960317611694336, |
|
"eval_rewards/chosen": -0.9699568152427673, |
|
"eval_rewards/margins": 8.822220802307129, |
|
"eval_rewards/rejected": -9.7921781539917, |
|
"eval_runtime": 869.9338, |
|
"eval_samples_per_second": 2.299, |
|
"eval_steps_per_second": 0.072, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 3.514811849479584e-07, |
|
"logits/chosen": -2.4951071739196777, |
|
"logits/rejected": -2.249694585800171, |
|
"logps/chosen": -164.900634765625, |
|
"logps/rejected": -189.8306427001953, |
|
"loss": 0.0301, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.6467971801757812, |
|
"rewards/margins": 9.143733024597168, |
|
"rewards/rejected": -10.79053020477295, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 3.474779823859087e-07, |
|
"logits/chosen": -2.575314521789551, |
|
"logits/rejected": -2.233131170272827, |
|
"logps/chosen": -182.89932250976562, |
|
"logps/rejected": -243.9858856201172, |
|
"loss": 0.0217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4116153717041016, |
|
"rewards/margins": 13.464820861816406, |
|
"rewards/rejected": -14.876436233520508, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 3.434747798238591e-07, |
|
"logits/chosen": -2.4697818756103516, |
|
"logits/rejected": -2.356581926345825, |
|
"logps/chosen": -201.47634887695312, |
|
"logps/rejected": -548.7425537109375, |
|
"loss": 0.0351, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6284077167510986, |
|
"rewards/margins": 39.81824493408203, |
|
"rewards/rejected": -43.44664764404297, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 3.394715772618094e-07, |
|
"logits/chosen": -2.418208599090576, |
|
"logits/rejected": -2.22477126121521, |
|
"logps/chosen": -165.20034790039062, |
|
"logps/rejected": -628.0509643554688, |
|
"loss": 0.0124, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.872553825378418, |
|
"rewards/margins": 52.413848876953125, |
|
"rewards/rejected": -54.286399841308594, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 3.354683746997598e-07, |
|
"logits/chosen": -2.5192372798919678, |
|
"logits/rejected": -2.295382261276245, |
|
"logps/chosen": -176.81497192382812, |
|
"logps/rejected": -363.97174072265625, |
|
"loss": 0.0275, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7440744638442993, |
|
"rewards/margins": 24.896778106689453, |
|
"rewards/rejected": -26.640857696533203, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 3.3146517213771014e-07, |
|
"logits/chosen": -2.6569995880126953, |
|
"logits/rejected": -2.2872815132141113, |
|
"logps/chosen": -215.7018585205078, |
|
"logps/rejected": -305.87799072265625, |
|
"loss": 0.0359, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.8822388648986816, |
|
"rewards/margins": 16.257402420043945, |
|
"rewards/rejected": -19.1396427154541, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 3.274619695756605e-07, |
|
"logits/chosen": -2.393214702606201, |
|
"logits/rejected": -2.0127763748168945, |
|
"logps/chosen": -185.98187255859375, |
|
"logps/rejected": -338.15576171875, |
|
"loss": 0.0316, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.6043496131896973, |
|
"rewards/margins": 21.948997497558594, |
|
"rewards/rejected": -25.553346633911133, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 3.2345876701361085e-07, |
|
"logits/chosen": -2.4011263847351074, |
|
"logits/rejected": -2.0070765018463135, |
|
"logps/chosen": -196.2584228515625, |
|
"logps/rejected": -484.80926513671875, |
|
"loss": 0.0152, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.4227471351623535, |
|
"rewards/margins": 34.711463928222656, |
|
"rewards/rejected": -38.13420867919922, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 3.1945556445156124e-07, |
|
"logits/chosen": -1.9645344018936157, |
|
"logits/rejected": -1.2225711345672607, |
|
"logps/chosen": -209.5574951171875, |
|
"logps/rejected": -543.1734008789062, |
|
"loss": 0.0257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.6722359657287598, |
|
"rewards/margins": 40.712772369384766, |
|
"rewards/rejected": -44.385005950927734, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 3.1545236188951157e-07, |
|
"logits/chosen": -1.978044867515564, |
|
"logits/rejected": -1.2324669361114502, |
|
"logps/chosen": -192.2949981689453, |
|
"logps/rejected": -480.520751953125, |
|
"loss": 0.0255, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.5515990257263184, |
|
"rewards/margins": 35.85133743286133, |
|
"rewards/rejected": -39.40293884277344, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 3.1144915932746195e-07, |
|
"logits/chosen": -2.275550365447998, |
|
"logits/rejected": -1.6282291412353516, |
|
"logps/chosen": -196.13803100585938, |
|
"logps/rejected": -297.6045837402344, |
|
"loss": 0.0368, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.565072059631348, |
|
"rewards/margins": 17.173152923583984, |
|
"rewards/rejected": -21.738224029541016, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 3.074459567654123e-07, |
|
"logits/chosen": -2.4316773414611816, |
|
"logits/rejected": -1.7663853168487549, |
|
"logps/chosen": -189.8267822265625, |
|
"logps/rejected": -321.9093017578125, |
|
"loss": 0.0363, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.32307505607605, |
|
"rewards/margins": 19.51993179321289, |
|
"rewards/rejected": -22.843008041381836, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 3.0344275420336267e-07, |
|
"logits/chosen": -2.2213199138641357, |
|
"logits/rejected": -1.702415108680725, |
|
"logps/chosen": -189.29393005371094, |
|
"logps/rejected": -392.8915100097656, |
|
"loss": 0.0107, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.8561224937438965, |
|
"rewards/margins": 26.04262924194336, |
|
"rewards/rejected": -29.898754119873047, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 2.99439551641313e-07, |
|
"logits/chosen": -2.0172629356384277, |
|
"logits/rejected": -1.2431235313415527, |
|
"logps/chosen": -208.375732421875, |
|
"logps/rejected": -378.9666748046875, |
|
"loss": 0.0228, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -6.202768325805664, |
|
"rewards/margins": 22.777095794677734, |
|
"rewards/rejected": -28.9798641204834, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 2.954363490792634e-07, |
|
"logits/chosen": -1.7751468420028687, |
|
"logits/rejected": -1.2209514379501343, |
|
"logps/chosen": -241.2014617919922, |
|
"logps/rejected": -514.4192504882812, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -7.191336154937744, |
|
"rewards/margins": 33.45779800415039, |
|
"rewards/rejected": -40.649131774902344, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 2.914331465172137e-07, |
|
"logits/chosen": -1.6847556829452515, |
|
"logits/rejected": -1.00700843334198, |
|
"logps/chosen": -197.2582550048828, |
|
"logps/rejected": -443.4234313964844, |
|
"loss": 0.0166, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.894466400146484, |
|
"rewards/margins": 29.733402252197266, |
|
"rewards/rejected": -35.627864837646484, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 2.8742994395516415e-07, |
|
"logits/chosen": -2.154357433319092, |
|
"logits/rejected": -1.1991710662841797, |
|
"logps/chosen": -189.2927703857422, |
|
"logps/rejected": -484.61785888671875, |
|
"loss": 0.0175, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.121435642242432, |
|
"rewards/margins": 36.48408508300781, |
|
"rewards/rejected": -40.60551834106445, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 2.834267413931145e-07, |
|
"logits/chosen": -1.9125760793685913, |
|
"logits/rejected": -1.0993740558624268, |
|
"logps/chosen": -212.220947265625, |
|
"logps/rejected": -427.8121643066406, |
|
"loss": 0.0128, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -5.176814079284668, |
|
"rewards/margins": 28.130443572998047, |
|
"rewards/rejected": -33.30725860595703, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 2.7942353883106486e-07, |
|
"logits/chosen": -2.2864699363708496, |
|
"logits/rejected": -1.399320363998413, |
|
"logps/chosen": -189.4803924560547, |
|
"logps/rejected": -217.796142578125, |
|
"loss": 0.0196, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -3.550410032272339, |
|
"rewards/margins": 9.777883529663086, |
|
"rewards/rejected": -13.328292846679688, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.754203362690152e-07, |
|
"logits/chosen": -2.773916244506836, |
|
"logits/rejected": -2.527047872543335, |
|
"logps/chosen": -183.30543518066406, |
|
"logps/rejected": -429.06365966796875, |
|
"loss": 0.0442, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4481396675109863, |
|
"rewards/margins": 30.67641258239746, |
|
"rewards/rejected": -33.124549865722656, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 2.714171337069656e-07, |
|
"logits/chosen": -2.882967948913574, |
|
"logits/rejected": -2.72076153755188, |
|
"logps/chosen": -167.57542419433594, |
|
"logps/rejected": -250.15274047851562, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.485212802886963, |
|
"rewards/margins": 14.637022018432617, |
|
"rewards/rejected": -16.122234344482422, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.674139311449159e-07, |
|
"logits/chosen": -2.7207860946655273, |
|
"logits/rejected": -2.5453267097473145, |
|
"logps/chosen": -151.00723266601562, |
|
"logps/rejected": -295.5516357421875, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0635440349578857, |
|
"rewards/margins": 20.84624671936035, |
|
"rewards/rejected": -21.9097900390625, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 2.634107285828663e-07, |
|
"logits/chosen": -2.799225091934204, |
|
"logits/rejected": -2.6195671558380127, |
|
"logps/chosen": -207.414306640625, |
|
"logps/rejected": -471.4579162597656, |
|
"loss": 0.015, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -5.837430477142334, |
|
"rewards/margins": 32.82604217529297, |
|
"rewards/rejected": -38.66347885131836, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.594075260208166e-07, |
|
"logits/chosen": -2.8809666633605957, |
|
"logits/rejected": -2.6668760776519775, |
|
"logps/chosen": -189.98104858398438, |
|
"logps/rejected": -271.88641357421875, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0119662284851074, |
|
"rewards/margins": 15.516934394836426, |
|
"rewards/rejected": -17.528902053833008, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 2.55404323458767e-07, |
|
"logits/chosen": -2.8235630989074707, |
|
"logits/rejected": -2.6322312355041504, |
|
"logps/chosen": -159.7130584716797, |
|
"logps/rejected": -270.10101318359375, |
|
"loss": 0.0086, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.5178642272949219, |
|
"rewards/margins": 17.20760154724121, |
|
"rewards/rejected": -18.725465774536133, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 2.514011208967174e-07, |
|
"logits/chosen": -2.6800270080566406, |
|
"logits/rejected": -2.516126871109009, |
|
"logps/chosen": -163.38233947753906, |
|
"logps/rejected": -561.2886352539062, |
|
"loss": 0.0059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2010345458984375, |
|
"rewards/margins": 45.494956970214844, |
|
"rewards/rejected": -47.69599914550781, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.473979183346677e-07, |
|
"logits/chosen": -2.7506349086761475, |
|
"logits/rejected": -2.5886902809143066, |
|
"logps/chosen": -141.7732696533203, |
|
"logps/rejected": -377.6195373535156, |
|
"loss": 0.0144, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.402200698852539, |
|
"rewards/margins": 28.883886337280273, |
|
"rewards/rejected": -30.286090850830078, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 2.433947157726181e-07, |
|
"logits/chosen": -2.842419147491455, |
|
"logits/rejected": -2.6155142784118652, |
|
"logps/chosen": -180.7171630859375, |
|
"logps/rejected": -282.32220458984375, |
|
"loss": 0.0039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.1550252437591553, |
|
"rewards/margins": 17.016422271728516, |
|
"rewards/rejected": -19.17144775390625, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.3939151321056843e-07, |
|
"logits/chosen": -2.924471378326416, |
|
"logits/rejected": -2.6854348182678223, |
|
"logps/chosen": -170.48342895507812, |
|
"logps/rejected": -230.1216583251953, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.0587759017944336, |
|
"rewards/margins": 12.352742195129395, |
|
"rewards/rejected": -14.411517143249512, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 2.353883106485188e-07, |
|
"logits/chosen": -2.8059592247009277, |
|
"logits/rejected": -2.5042202472686768, |
|
"logps/chosen": -162.3031463623047, |
|
"logps/rejected": -295.27105712890625, |
|
"loss": 0.0055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9291874170303345, |
|
"rewards/margins": 20.166751861572266, |
|
"rewards/rejected": -22.09593963623047, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.3138510808646917e-07, |
|
"logits/chosen": -2.7831666469573975, |
|
"logits/rejected": -2.5290932655334473, |
|
"logps/chosen": -156.5865478515625, |
|
"logps/rejected": -512.7086791992188, |
|
"loss": 0.0045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6812311410903931, |
|
"rewards/margins": 43.404396057128906, |
|
"rewards/rejected": -44.085628509521484, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 2.2738190552441953e-07, |
|
"logits/chosen": -2.8566126823425293, |
|
"logits/rejected": -2.582984447479248, |
|
"logps/chosen": -172.5872802734375, |
|
"logps/rejected": -197.41136169433594, |
|
"loss": 0.0106, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.777562141418457, |
|
"rewards/margins": 9.757658958435059, |
|
"rewards/rejected": -11.535221099853516, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.2337870296236989e-07, |
|
"logits/chosen": -2.78861927986145, |
|
"logits/rejected": -2.546877384185791, |
|
"logps/chosen": -144.1377716064453, |
|
"logps/rejected": -255.5492706298828, |
|
"loss": 0.0064, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.23271174728870392, |
|
"rewards/margins": 17.866634368896484, |
|
"rewards/rejected": -18.09934425354004, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.1937550040032024e-07, |
|
"logits/chosen": -2.7955825328826904, |
|
"logits/rejected": -2.6178054809570312, |
|
"logps/chosen": -174.67660522460938, |
|
"logps/rejected": -284.92230224609375, |
|
"loss": 0.0046, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.2215218544006348, |
|
"rewards/margins": 17.042997360229492, |
|
"rewards/rejected": -19.264522552490234, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 2.153722978382706e-07, |
|
"logits/chosen": -2.6289403438568115, |
|
"logits/rejected": -2.4111552238464355, |
|
"logps/chosen": -174.08555603027344, |
|
"logps/rejected": -477.3523864746094, |
|
"loss": 0.0051, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3877720832824707, |
|
"rewards/margins": 36.872703552246094, |
|
"rewards/rejected": -39.260475158691406, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.1136909527622096e-07, |
|
"logits/chosen": -2.8065085411071777, |
|
"logits/rejected": -2.5915045738220215, |
|
"logps/chosen": -155.339599609375, |
|
"logps/rejected": -247.944091796875, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.14878419041633606, |
|
"rewards/margins": 16.316041946411133, |
|
"rewards/rejected": -16.464826583862305, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 2.0736589271417131e-07, |
|
"logits/chosen": -2.9013023376464844, |
|
"logits/rejected": -2.6518332958221436, |
|
"logps/chosen": -196.78958129882812, |
|
"logps/rejected": -258.1006774902344, |
|
"loss": 0.0071, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.3248391151428223, |
|
"rewards/margins": 12.6865816116333, |
|
"rewards/rejected": -16.011423110961914, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.0336269015212167e-07, |
|
"logits/chosen": -2.7131872177124023, |
|
"logits/rejected": -2.4915966987609863, |
|
"logps/chosen": -186.66929626464844, |
|
"logps/rejected": -523.0761108398438, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.9156155586242676, |
|
"rewards/margins": 41.05046844482422, |
|
"rewards/rejected": -43.96608352661133, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 1.9935948759007203e-07, |
|
"logits/chosen": -2.802422285079956, |
|
"logits/rejected": -2.6357274055480957, |
|
"logps/chosen": -171.12216186523438, |
|
"logps/rejected": -279.912109375, |
|
"loss": 0.0198, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -3.927133560180664, |
|
"rewards/margins": 16.069978713989258, |
|
"rewards/rejected": -19.997112274169922, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 1.953562850280224e-07, |
|
"logits/chosen": -2.743924140930176, |
|
"logits/rejected": -2.569491147994995, |
|
"logps/chosen": -167.91322326660156, |
|
"logps/rejected": -284.1127624511719, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.3164217472076416, |
|
"rewards/margins": 17.26920509338379, |
|
"rewards/rejected": -19.585628509521484, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 1.9135308246597277e-07, |
|
"logits/chosen": -2.8673033714294434, |
|
"logits/rejected": -2.6498348712921143, |
|
"logps/chosen": -142.06503295898438, |
|
"logps/rejected": -239.8588409423828, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7565892338752747, |
|
"rewards/margins": 15.723353385925293, |
|
"rewards/rejected": -16.47994613647461, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 1.8734987990392313e-07, |
|
"logits/chosen": -2.821174144744873, |
|
"logits/rejected": -2.6474757194519043, |
|
"logps/chosen": -171.9628143310547, |
|
"logps/rejected": -232.91439819335938, |
|
"loss": 0.0072, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7939846515655518, |
|
"rewards/margins": 12.49173355102539, |
|
"rewards/rejected": -14.285717964172363, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 1.8334667734187348e-07, |
|
"logits/chosen": -2.755138397216797, |
|
"logits/rejected": -2.5319771766662598, |
|
"logps/chosen": -180.15707397460938, |
|
"logps/rejected": -481.31512451171875, |
|
"loss": 0.0056, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.5247740745544434, |
|
"rewards/margins": 36.42875289916992, |
|
"rewards/rejected": -38.95352554321289, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 1.7934347477982384e-07, |
|
"logits/chosen": -2.872758388519287, |
|
"logits/rejected": -2.609778881072998, |
|
"logps/chosen": -157.4242401123047, |
|
"logps/rejected": -223.9275360107422, |
|
"loss": 0.0048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7862883806228638, |
|
"rewards/margins": 13.082984924316406, |
|
"rewards/rejected": -13.869272232055664, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 1.753402722177742e-07, |
|
"logits/chosen": -2.79685115814209, |
|
"logits/rejected": -2.5169830322265625, |
|
"logps/chosen": -157.01585388183594, |
|
"logps/rejected": -436.61602783203125, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8217869997024536, |
|
"rewards/margins": 34.775390625, |
|
"rewards/rejected": -35.59718322753906, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 1.7133706965572455e-07, |
|
"logits/chosen": -2.896519660949707, |
|
"logits/rejected": -2.5488381385803223, |
|
"logps/chosen": -175.59397888183594, |
|
"logps/rejected": -218.03466796875, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.972507119178772, |
|
"rewards/margins": 11.009244918823242, |
|
"rewards/rejected": -12.981752395629883, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 1.673338670936749e-07, |
|
"logits/chosen": -2.8404831886291504, |
|
"logits/rejected": -2.5836331844329834, |
|
"logps/chosen": -184.15939331054688, |
|
"logps/rejected": -292.54693603515625, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.556754469871521, |
|
"rewards/margins": 17.97411346435547, |
|
"rewards/rejected": -19.530866622924805, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 1.633306645316253e-07, |
|
"logits/chosen": -2.78204345703125, |
|
"logits/rejected": -2.52099609375, |
|
"logps/chosen": -180.96939086914062, |
|
"logps/rejected": -375.949462890625, |
|
"loss": 0.0131, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.8274238109588623, |
|
"rewards/margins": 26.82219886779785, |
|
"rewards/rejected": -28.64962387084961, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 1.5932746196957568e-07, |
|
"logits/chosen": -2.8116698265075684, |
|
"logits/rejected": -2.564847707748413, |
|
"logps/chosen": -159.66482543945312, |
|
"logps/rejected": -267.16583251953125, |
|
"loss": 0.0067, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7409461736679077, |
|
"rewards/margins": 17.136062622070312, |
|
"rewards/rejected": -18.87700843811035, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 1.5532425940752604e-07, |
|
"logits/chosen": -2.7542591094970703, |
|
"logits/rejected": -2.5157008171081543, |
|
"logps/chosen": -183.57919311523438, |
|
"logps/rejected": -581.8060913085938, |
|
"loss": 0.0022, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -3.2423195838928223, |
|
"rewards/margins": 46.37403869628906, |
|
"rewards/rejected": -49.616355895996094, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"eval_logits/chosen": -2.633044958114624, |
|
"eval_logits/rejected": -2.436069965362549, |
|
"eval_logps/chosen": -164.32315063476562, |
|
"eval_logps/rejected": -331.22625732421875, |
|
"eval_loss": 0.022912979125976562, |
|
"eval_rewards/accuracies": 0.9960317611694336, |
|
"eval_rewards/chosen": -1.6108430624008179, |
|
"eval_rewards/margins": 22.94175910949707, |
|
"eval_rewards/rejected": -24.55260467529297, |
|
"eval_runtime": 924.6056, |
|
"eval_samples_per_second": 2.163, |
|
"eval_steps_per_second": 0.068, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 1.513210568454764e-07, |
|
"logits/chosen": -2.764820098876953, |
|
"logits/rejected": -2.5704541206359863, |
|
"logps/chosen": -163.67437744140625, |
|
"logps/rejected": -395.3125915527344, |
|
"loss": 0.0031, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.4941201210021973, |
|
"rewards/margins": 27.328876495361328, |
|
"rewards/rejected": -29.8229923248291, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 1.4731785428342675e-07, |
|
"logits/chosen": -2.8067145347595215, |
|
"logits/rejected": -2.502478837966919, |
|
"logps/chosen": -146.5375518798828, |
|
"logps/rejected": -257.7701110839844, |
|
"loss": 0.0008, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.222342848777771, |
|
"rewards/margins": 17.490814208984375, |
|
"rewards/rejected": -18.713157653808594, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 1.433146517213771e-07, |
|
"logits/chosen": -2.8830108642578125, |
|
"logits/rejected": -2.599515438079834, |
|
"logps/chosen": -164.00958251953125, |
|
"logps/rejected": -202.6780242919922, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6045265197753906, |
|
"rewards/margins": 10.405461311340332, |
|
"rewards/rejected": -12.009988784790039, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 1.3931144915932746e-07, |
|
"logits/chosen": -2.8134148120880127, |
|
"logits/rejected": -2.5562379360198975, |
|
"logps/chosen": -167.71530151367188, |
|
"logps/rejected": -212.62570190429688, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.176457166671753, |
|
"rewards/margins": 11.668563842773438, |
|
"rewards/rejected": -12.84502124786377, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 1.3530824659727782e-07, |
|
"logits/chosen": -2.564943790435791, |
|
"logits/rejected": -2.4214589595794678, |
|
"logps/chosen": -144.50045776367188, |
|
"logps/rejected": -427.9203186035156, |
|
"loss": 0.0074, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0412085056304932, |
|
"rewards/margins": 33.99787139892578, |
|
"rewards/rejected": -35.03908157348633, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 1.3130504403522818e-07, |
|
"logits/chosen": -2.8820528984069824, |
|
"logits/rejected": -2.634192943572998, |
|
"logps/chosen": -147.8389129638672, |
|
"logps/rejected": -352.130859375, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7727874517440796, |
|
"rewards/margins": 26.576446533203125, |
|
"rewards/rejected": -27.3492374420166, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 1.2730184147317853e-07, |
|
"logits/chosen": -2.7832694053649902, |
|
"logits/rejected": -2.5845096111297607, |
|
"logps/chosen": -170.01785278320312, |
|
"logps/rejected": -473.90863037109375, |
|
"loss": 0.0036, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1985461711883545, |
|
"rewards/margins": 36.935890197753906, |
|
"rewards/rejected": -38.13444137573242, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 1.232986389111289e-07, |
|
"logits/chosen": -2.7855515480041504, |
|
"logits/rejected": -2.5294415950775146, |
|
"logps/chosen": -158.38758850097656, |
|
"logps/rejected": -244.84619140625, |
|
"loss": 0.0049, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8522005081176758, |
|
"rewards/margins": 15.829713821411133, |
|
"rewards/rejected": -16.681913375854492, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 1.1929543634907927e-07, |
|
"logits/chosen": -2.898603916168213, |
|
"logits/rejected": -2.705836772918701, |
|
"logps/chosen": -159.56637573242188, |
|
"logps/rejected": -371.81805419921875, |
|
"loss": 0.0088, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.5499234199523926, |
|
"rewards/margins": 27.168338775634766, |
|
"rewards/rejected": -28.71826171875, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 1.1529223378702962e-07, |
|
"logits/chosen": -2.733206272125244, |
|
"logits/rejected": -2.5612919330596924, |
|
"logps/chosen": -172.57748413085938, |
|
"logps/rejected": -559.9437255859375, |
|
"loss": 0.0195, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.0921106338500977, |
|
"rewards/margins": 45.248077392578125, |
|
"rewards/rejected": -46.340187072753906, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 1.1128903122497999e-07, |
|
"logits/chosen": -2.8048062324523926, |
|
"logits/rejected": -2.533332347869873, |
|
"logps/chosen": -145.4043731689453, |
|
"logps/rejected": -206.748291015625, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4871163368225098, |
|
"rewards/margins": 12.425850868225098, |
|
"rewards/rejected": -13.91296672821045, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 1.0728582866293035e-07, |
|
"logits/chosen": -2.8180341720581055, |
|
"logits/rejected": -2.671854257583618, |
|
"logps/chosen": -164.4940948486328, |
|
"logps/rejected": -299.62310791015625, |
|
"loss": 0.0026, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2362325191497803, |
|
"rewards/margins": 19.141382217407227, |
|
"rewards/rejected": -20.377614974975586, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 1.032826261008807e-07, |
|
"logits/chosen": -2.7941746711730957, |
|
"logits/rejected": -2.4854462146759033, |
|
"logps/chosen": -170.0912628173828, |
|
"logps/rejected": -517.0354614257812, |
|
"loss": 0.0037, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.6481285095214844, |
|
"rewards/margins": 41.76611328125, |
|
"rewards/rejected": -43.414241790771484, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 9.927942353883106e-08, |
|
"logits/chosen": -2.7304294109344482, |
|
"logits/rejected": -2.458939790725708, |
|
"logps/chosen": -147.95701599121094, |
|
"logps/rejected": -289.230224609375, |
|
"loss": 0.0069, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.3893832266330719, |
|
"rewards/margins": 20.953866958618164, |
|
"rewards/rejected": -21.343250274658203, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 9.527622097678143e-08, |
|
"logits/chosen": -2.786956310272217, |
|
"logits/rejected": -2.565520763397217, |
|
"logps/chosen": -172.35365295410156, |
|
"logps/rejected": -397.92059326171875, |
|
"loss": 0.0058, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4105862379074097, |
|
"rewards/margins": 28.689733505249023, |
|
"rewards/rejected": -30.10032081604004, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 9.127301841473179e-08, |
|
"logits/chosen": -2.803377151489258, |
|
"logits/rejected": -2.601539134979248, |
|
"logps/chosen": -152.02151489257812, |
|
"logps/rejected": -315.120849609375, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8650063276290894, |
|
"rewards/margins": 22.168514251708984, |
|
"rewards/rejected": -23.033519744873047, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 8.726981585268214e-08, |
|
"logits/chosen": -2.8182191848754883, |
|
"logits/rejected": -2.570002555847168, |
|
"logps/chosen": -149.609619140625, |
|
"logps/rejected": -231.5469512939453, |
|
"loss": 0.0062, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5421051979064941, |
|
"rewards/margins": 14.738876342773438, |
|
"rewards/rejected": -15.280984878540039, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 8.32666132906325e-08, |
|
"logits/chosen": -2.7290663719177246, |
|
"logits/rejected": -2.519298791885376, |
|
"logps/chosen": -149.35226440429688, |
|
"logps/rejected": -345.97222900390625, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2555046081542969, |
|
"rewards/margins": 24.839550018310547, |
|
"rewards/rejected": -26.09505271911621, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 7.926341072858286e-08, |
|
"logits/chosen": -2.8426907062530518, |
|
"logits/rejected": -2.560478448867798, |
|
"logps/chosen": -147.8146209716797, |
|
"logps/rejected": -243.75064086914062, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8054535984992981, |
|
"rewards/margins": 15.806452751159668, |
|
"rewards/rejected": -16.61190414428711, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 7.526020816653323e-08, |
|
"logits/chosen": -2.674760341644287, |
|
"logits/rejected": -2.367633819580078, |
|
"logps/chosen": -151.23182678222656, |
|
"logps/rejected": -421.35333251953125, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -2.7838943004608154, |
|
"rewards/margins": 32.74809646606445, |
|
"rewards/rejected": -35.5319938659668, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 7.125700560448359e-08, |
|
"logits/chosen": -2.726081371307373, |
|
"logits/rejected": -2.4065871238708496, |
|
"logps/chosen": -150.547119140625, |
|
"logps/rejected": -347.12457275390625, |
|
"loss": 0.003, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.623246431350708, |
|
"rewards/margins": 26.384979248046875, |
|
"rewards/rejected": -27.008224487304688, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 6.725380304243394e-08, |
|
"logits/chosen": -2.7504703998565674, |
|
"logits/rejected": -2.449279308319092, |
|
"logps/chosen": -158.4932861328125, |
|
"logps/rejected": -315.344482421875, |
|
"loss": 0.0019, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7700117826461792, |
|
"rewards/margins": 22.627595901489258, |
|
"rewards/rejected": -23.397607803344727, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 6.32506004803843e-08, |
|
"logits/chosen": -2.7155704498291016, |
|
"logits/rejected": -2.4125099182128906, |
|
"logps/chosen": -149.95974731445312, |
|
"logps/rejected": -408.9559631347656, |
|
"loss": 0.0061, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.43645238876342773, |
|
"rewards/margins": 32.31442642211914, |
|
"rewards/rejected": -32.750877380371094, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 5.9247397918334664e-08, |
|
"logits/chosen": -2.7009987831115723, |
|
"logits/rejected": -2.5183584690093994, |
|
"logps/chosen": -175.1053466796875, |
|
"logps/rejected": -374.5074157714844, |
|
"loss": 0.0018, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2859503030776978, |
|
"rewards/margins": 26.71976089477539, |
|
"rewards/rejected": -28.005706787109375, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 5.524419535628502e-08, |
|
"logits/chosen": -2.724604368209839, |
|
"logits/rejected": -2.4586219787597656, |
|
"logps/chosen": -204.1591033935547, |
|
"logps/rejected": -626.8215942382812, |
|
"loss": 0.002, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -4.939435958862305, |
|
"rewards/margins": 48.43818283081055, |
|
"rewards/rejected": -53.37761688232422, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 5.1240992794235385e-08, |
|
"logits/chosen": -2.8005754947662354, |
|
"logits/rejected": -2.5299735069274902, |
|
"logps/chosen": -153.12969970703125, |
|
"logps/rejected": -334.99786376953125, |
|
"loss": 0.0016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.5083599090576172, |
|
"rewards/margins": 24.945537567138672, |
|
"rewards/rejected": -25.45389747619629, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 4.723779023218575e-08, |
|
"logits/chosen": -2.721325159072876, |
|
"logits/rejected": -2.4367713928222656, |
|
"logps/chosen": -154.41665649414062, |
|
"logps/rejected": -428.780517578125, |
|
"loss": 0.0082, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.7341115474700928, |
|
"rewards/margins": 34.29324722290039, |
|
"rewards/rejected": -36.02735137939453, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 4.323458767013611e-08, |
|
"logits/chosen": -2.7661736011505127, |
|
"logits/rejected": -2.489382028579712, |
|
"logps/chosen": -153.54531860351562, |
|
"logps/rejected": -436.8839416503906, |
|
"loss": 0.006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.38232460618019104, |
|
"rewards/margins": 35.33488082885742, |
|
"rewards/rejected": -35.71720504760742, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 3.923138510808647e-08, |
|
"logits/chosen": -2.6737539768218994, |
|
"logits/rejected": -2.4625658988952637, |
|
"logps/chosen": -154.32327270507812, |
|
"logps/rejected": -682.4031982421875, |
|
"loss": 0.0042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.4486008882522583, |
|
"rewards/margins": 58.11639404296875, |
|
"rewards/rejected": -59.56499481201172, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 3.5228182546036826e-08, |
|
"logits/chosen": -2.5800118446350098, |
|
"logits/rejected": -2.395155191421509, |
|
"logps/chosen": -140.3927764892578, |
|
"logps/rejected": -396.32806396484375, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.859173595905304, |
|
"rewards/margins": 30.5933780670166, |
|
"rewards/rejected": -31.452550888061523, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 3.122497998398719e-08, |
|
"logits/chosen": -2.6436543464660645, |
|
"logits/rejected": -2.371372699737549, |
|
"logps/chosen": -123.04959869384766, |
|
"logps/rejected": -331.83624267578125, |
|
"loss": 0.0034, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.4394907057285309, |
|
"rewards/margins": 26.18316650390625, |
|
"rewards/rejected": -26.622655868530273, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 2.722177742193755e-08, |
|
"logits/chosen": -2.7385857105255127, |
|
"logits/rejected": -2.4508581161499023, |
|
"logps/chosen": -175.63052368164062, |
|
"logps/rejected": -294.7503356933594, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.2934653759002686, |
|
"rewards/margins": 19.555923461914062, |
|
"rewards/rejected": -20.849384307861328, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 2.3218574859887907e-08, |
|
"logits/chosen": -2.794159412384033, |
|
"logits/rejected": -2.5210330486297607, |
|
"logps/chosen": -155.2234344482422, |
|
"logps/rejected": -240.988037109375, |
|
"loss": 0.0078, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.8834775686264038, |
|
"rewards/margins": 14.685577392578125, |
|
"rewards/rejected": -15.569055557250977, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 1.9215372297838268e-08, |
|
"logits/chosen": -2.6487419605255127, |
|
"logits/rejected": -2.3688254356384277, |
|
"logps/chosen": -140.86117553710938, |
|
"logps/rejected": -280.75250244140625, |
|
"loss": 0.0014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": 0.05746353790163994, |
|
"rewards/margins": 20.066030502319336, |
|
"rewards/rejected": -20.008569717407227, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 1.521216973578863e-08, |
|
"logits/chosen": -2.875211715698242, |
|
"logits/rejected": -2.490218162536621, |
|
"logps/chosen": -172.50912475585938, |
|
"logps/rejected": -273.8616943359375, |
|
"loss": 0.0021, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.1841375827789307, |
|
"rewards/margins": 17.2589054107666, |
|
"rewards/rejected": -18.443042755126953, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 1.120896717373899e-08, |
|
"logits/chosen": -2.7332968711853027, |
|
"logits/rejected": -2.491285800933838, |
|
"logps/chosen": -150.06475830078125, |
|
"logps/rejected": -376.63824462890625, |
|
"loss": 0.0102, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -1.1961749792099, |
|
"rewards/margins": 28.852294921875, |
|
"rewards/rejected": -30.048471450805664, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 7.205764611689351e-09, |
|
"logits/chosen": -2.6668992042541504, |
|
"logits/rejected": -2.389853000640869, |
|
"logps/chosen": -170.96156311035156, |
|
"logps/rejected": -512.9969482421875, |
|
"loss": 0.0035, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.6127735376358032, |
|
"rewards/margins": 40.84336471557617, |
|
"rewards/rejected": -41.456138610839844, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 3.2025620496397115e-09, |
|
"logits/chosen": -2.7764010429382324, |
|
"logits/rejected": -2.528985023498535, |
|
"logps/chosen": -173.30990600585938, |
|
"logps/rejected": -363.98358154296875, |
|
"loss": 0.0028, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -0.7063196897506714, |
|
"rewards/margins": 26.459331512451172, |
|
"rewards/rejected": -27.165653228759766, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"step": 1388, |
|
"total_flos": 0.0, |
|
"train_loss": 0.04402416471998484, |
|
"train_runtime": 16535.8816, |
|
"train_samples_per_second": 1.341, |
|
"train_steps_per_second": 0.084 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 1388, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|