|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9982631930527722, |
|
"eval_steps": 400, |
|
"global_step": 467, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01068804275217101, |
|
"grad_norm": 62.3482779515011, |
|
"learning_rate": 1.0638297872340425e-07, |
|
"logits/chosen": -1.0140018463134766, |
|
"logits/rejected": -0.9845958948135376, |
|
"logps/chosen": -0.27406683564186096, |
|
"logps/rejected": -0.2714424729347229, |
|
"loss": 3.0994, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -2.7406680583953857, |
|
"rewards/margins": -0.02624346688389778, |
|
"rewards/rejected": -2.7144248485565186, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.02137608550434202, |
|
"grad_norm": 39.525628188076254, |
|
"learning_rate": 2.127659574468085e-07, |
|
"logits/chosen": -1.0465514659881592, |
|
"logits/rejected": -0.9793618321418762, |
|
"logps/chosen": -0.29423215985298157, |
|
"logps/rejected": -0.2993616461753845, |
|
"loss": 3.1379, |
|
"rewards/accuracies": 0.4937500059604645, |
|
"rewards/chosen": -2.942321538925171, |
|
"rewards/margins": 0.051294513046741486, |
|
"rewards/rejected": -2.9936161041259766, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03206412825651302, |
|
"grad_norm": 51.851115967445885, |
|
"learning_rate": 3.1914893617021275e-07, |
|
"logits/chosen": -0.9649394750595093, |
|
"logits/rejected": -0.983955979347229, |
|
"logps/chosen": -0.26406729221343994, |
|
"logps/rejected": -0.3004179894924164, |
|
"loss": 3.2301, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.6406729221343994, |
|
"rewards/margins": 0.363506942987442, |
|
"rewards/rejected": -3.0041799545288086, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04275217100868404, |
|
"grad_norm": 99.02642749476678, |
|
"learning_rate": 4.25531914893617e-07, |
|
"logits/chosen": -0.9679675102233887, |
|
"logits/rejected": -0.9419299960136414, |
|
"logps/chosen": -0.2775927186012268, |
|
"logps/rejected": -0.2915174961090088, |
|
"loss": 3.1605, |
|
"rewards/accuracies": 0.5062500238418579, |
|
"rewards/chosen": -2.7759270668029785, |
|
"rewards/margins": 0.13924789428710938, |
|
"rewards/rejected": -2.915174961090088, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.053440213760855046, |
|
"grad_norm": 57.54560785330943, |
|
"learning_rate": 5.319148936170212e-07, |
|
"logits/chosen": -1.003973126411438, |
|
"logits/rejected": -0.9752557873725891, |
|
"logps/chosen": -0.2722216844558716, |
|
"logps/rejected": -0.2782929539680481, |
|
"loss": 3.3103, |
|
"rewards/accuracies": 0.5, |
|
"rewards/chosen": -2.7222166061401367, |
|
"rewards/margins": 0.060712575912475586, |
|
"rewards/rejected": -2.7829294204711914, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06412825651302605, |
|
"grad_norm": 52.02880771091385, |
|
"learning_rate": 6.382978723404255e-07, |
|
"logits/chosen": -0.9953545331954956, |
|
"logits/rejected": -0.9509505033493042, |
|
"logps/chosen": -0.273654043674469, |
|
"logps/rejected": -0.27911943197250366, |
|
"loss": 2.9866, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": -2.7365403175354004, |
|
"rewards/margins": 0.05465413257479668, |
|
"rewards/rejected": -2.791194438934326, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.07481629926519706, |
|
"grad_norm": 62.86254815286924, |
|
"learning_rate": 7.446808510638297e-07, |
|
"logits/chosen": -1.0501649379730225, |
|
"logits/rejected": -0.9741900563240051, |
|
"logps/chosen": -0.2949184775352478, |
|
"logps/rejected": -0.32086285948753357, |
|
"loss": 3.0094, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -2.9491848945617676, |
|
"rewards/margins": 0.2594442367553711, |
|
"rewards/rejected": -3.2086288928985596, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.08550434201736808, |
|
"grad_norm": 65.11890009591963, |
|
"learning_rate": 8.51063829787234e-07, |
|
"logits/chosen": -1.0010614395141602, |
|
"logits/rejected": -0.9576476216316223, |
|
"logps/chosen": -0.2806803584098816, |
|
"logps/rejected": -0.32674694061279297, |
|
"loss": 2.9254, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -2.8068039417266846, |
|
"rewards/margins": 0.46066540479660034, |
|
"rewards/rejected": -3.2674694061279297, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.09619238476953908, |
|
"grad_norm": 36.14523632736934, |
|
"learning_rate": 9.574468085106384e-07, |
|
"logits/chosen": -1.0506356954574585, |
|
"logits/rejected": -1.0073630809783936, |
|
"logps/chosen": -0.3041021227836609, |
|
"logps/rejected": -0.35804516077041626, |
|
"loss": 3.0106, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -3.0410218238830566, |
|
"rewards/margins": 0.5394296050071716, |
|
"rewards/rejected": -3.580451250076294, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.10688042752171009, |
|
"grad_norm": 61.101997429137676, |
|
"learning_rate": 9.998741174712533e-07, |
|
"logits/chosen": -1.025564432144165, |
|
"logits/rejected": -0.9764531850814819, |
|
"logps/chosen": -0.3140087425708771, |
|
"logps/rejected": -0.3549434542655945, |
|
"loss": 3.1031, |
|
"rewards/accuracies": 0.46875, |
|
"rewards/chosen": -3.140087604522705, |
|
"rewards/margins": 0.40934714674949646, |
|
"rewards/rejected": -3.5494346618652344, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.11756847027388109, |
|
"grad_norm": 128.15530312228304, |
|
"learning_rate": 9.991050648838675e-07, |
|
"logits/chosen": -1.0674389600753784, |
|
"logits/rejected": -1.0326998233795166, |
|
"logps/chosen": -0.2986104488372803, |
|
"logps/rejected": -0.3673686683177948, |
|
"loss": 2.7756, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -2.9861044883728027, |
|
"rewards/margins": 0.6875823736190796, |
|
"rewards/rejected": -3.6736865043640137, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.1282565130260521, |
|
"grad_norm": 59.025780817391194, |
|
"learning_rate": 9.97637968732563e-07, |
|
"logits/chosen": -1.108605980873108, |
|
"logits/rejected": -1.075398325920105, |
|
"logps/chosen": -0.3378816246986389, |
|
"logps/rejected": -0.3586636483669281, |
|
"loss": 2.9029, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -3.3788161277770996, |
|
"rewards/margins": 0.20782046020030975, |
|
"rewards/rejected": -3.5866363048553467, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.13894455577822312, |
|
"grad_norm": 66.41947079309563, |
|
"learning_rate": 9.954748808839674e-07, |
|
"logits/chosen": -1.0052762031555176, |
|
"logits/rejected": -0.9777056574821472, |
|
"logps/chosen": -0.4097859263420105, |
|
"logps/rejected": -0.4840938150882721, |
|
"loss": 2.9214, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -4.0978593826293945, |
|
"rewards/margins": 0.7430787682533264, |
|
"rewards/rejected": -4.840937614440918, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.14963259853039412, |
|
"grad_norm": 42.264544083175664, |
|
"learning_rate": 9.926188266120295e-07, |
|
"logits/chosen": -1.0273511409759521, |
|
"logits/rejected": -1.0030959844589233, |
|
"logps/chosen": -0.39483898878097534, |
|
"logps/rejected": -0.4872601628303528, |
|
"loss": 2.8869, |
|
"rewards/accuracies": 0.550000011920929, |
|
"rewards/chosen": -3.948390245437622, |
|
"rewards/margins": 0.9242109060287476, |
|
"rewards/rejected": -4.872600555419922, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.16032064128256512, |
|
"grad_norm": 71.26393008122545, |
|
"learning_rate": 9.890738003669027e-07, |
|
"logits/chosen": -0.9822802543640137, |
|
"logits/rejected": -0.9115797281265259, |
|
"logps/chosen": -0.38268035650253296, |
|
"logps/rejected": -0.4444147050380707, |
|
"loss": 2.9837, |
|
"rewards/accuracies": 0.53125, |
|
"rewards/chosen": -3.826803207397461, |
|
"rewards/margins": 0.6173437237739563, |
|
"rewards/rejected": -4.444147109985352, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.17100868403473615, |
|
"grad_norm": 50.319154680061054, |
|
"learning_rate": 9.848447601883433e-07, |
|
"logits/chosen": -0.9707294702529907, |
|
"logits/rejected": -0.9569045305252075, |
|
"logps/chosen": -0.35631316900253296, |
|
"logps/rejected": -0.46392399072647095, |
|
"loss": 2.7881, |
|
"rewards/accuracies": 0.5874999761581421, |
|
"rewards/chosen": -3.563131809234619, |
|
"rewards/margins": 1.0761077404022217, |
|
"rewards/rejected": -4.63923978805542, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.18169672678690715, |
|
"grad_norm": 70.667078624953, |
|
"learning_rate": 9.799376207714444e-07, |
|
"logits/chosen": -0.9739160537719727, |
|
"logits/rejected": -0.9520059823989868, |
|
"logps/chosen": -0.3384969234466553, |
|
"logps/rejected": -0.4023989737033844, |
|
"loss": 2.7081, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -3.3849689960479736, |
|
"rewards/margins": 0.6390206217765808, |
|
"rewards/rejected": -4.023990154266357, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.19238476953907815, |
|
"grad_norm": 70.40191576067738, |
|
"learning_rate": 9.743592451943998e-07, |
|
"logits/chosen": -1.0218126773834229, |
|
"logits/rejected": -0.9869282841682434, |
|
"logps/chosen": -0.43610191345214844, |
|
"logps/rejected": -0.5330775380134583, |
|
"loss": 2.9174, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": -4.361019134521484, |
|
"rewards/margins": 0.9697564840316772, |
|
"rewards/rejected": -5.330776214599609, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.20307281229124916, |
|
"grad_norm": 48.668822510411275, |
|
"learning_rate": 9.681174353198686e-07, |
|
"logits/chosen": -1.0933444499969482, |
|
"logits/rejected": -1.01079523563385, |
|
"logps/chosen": -0.4492688775062561, |
|
"logps/rejected": -0.49912723898887634, |
|
"loss": 2.7939, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": -4.49268913269043, |
|
"rewards/margins": 0.49858370423316956, |
|
"rewards/rejected": -4.99127197265625, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.21376085504342018, |
|
"grad_norm": 68.88980102518907, |
|
"learning_rate": 9.612209208833646e-07, |
|
"logits/chosen": -0.9715415239334106, |
|
"logits/rejected": -0.9469987154006958, |
|
"logps/chosen": -0.4551132619380951, |
|
"logps/rejected": -0.49960607290267944, |
|
"loss": 3.0421, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -4.551133155822754, |
|
"rewards/margins": 0.44492778182029724, |
|
"rewards/rejected": -4.996060848236084, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.22444889779559118, |
|
"grad_norm": 73.34499964545964, |
|
"learning_rate": 9.536793472839324e-07, |
|
"logits/chosen": -0.9907165765762329, |
|
"logits/rejected": -0.9372614622116089, |
|
"logps/chosen": -0.39754587411880493, |
|
"logps/rejected": -0.5162733793258667, |
|
"loss": 2.9277, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -3.975458860397339, |
|
"rewards/margins": 1.187274694442749, |
|
"rewards/rejected": -5.16273307800293, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.23513694054776219, |
|
"grad_norm": 55.456582167351314, |
|
"learning_rate": 9.455032620941839e-07, |
|
"logits/chosen": -0.9392507672309875, |
|
"logits/rejected": -0.8784140348434448, |
|
"logps/chosen": -0.4648202955722809, |
|
"logps/rejected": -0.5995782017707825, |
|
"loss": 2.788, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -4.648202896118164, |
|
"rewards/margins": 1.347578763961792, |
|
"rewards/rejected": -5.995781898498535, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2458249832999332, |
|
"grad_norm": 60.644976413076485, |
|
"learning_rate": 9.367041003085648e-07, |
|
"logits/chosen": -1.0021904706954956, |
|
"logits/rejected": -0.9409273266792297, |
|
"logps/chosen": -0.48944035172462463, |
|
"logps/rejected": -0.5491489171981812, |
|
"loss": 2.6847, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -4.89440393447876, |
|
"rewards/margins": 0.5970853567123413, |
|
"rewards/rejected": -5.491488933563232, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2565130260521042, |
|
"grad_norm": 66.42443726320764, |
|
"learning_rate": 9.272941683504808e-07, |
|
"logits/chosen": -0.9655882716178894, |
|
"logits/rejected": -0.8733075857162476, |
|
"logps/chosen": -0.49609699845314026, |
|
"logps/rejected": -0.6988444328308105, |
|
"loss": 2.6175, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -4.9609694480896, |
|
"rewards/margins": 2.027474880218506, |
|
"rewards/rejected": -6.9884443283081055, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.26720106880427524, |
|
"grad_norm": 80.7904476004964, |
|
"learning_rate": 9.172866268606513e-07, |
|
"logits/chosen": -1.0375964641571045, |
|
"logits/rejected": -0.9925470352172852, |
|
"logps/chosen": -0.5534143447875977, |
|
"logps/rejected": -0.6445597410202026, |
|
"loss": 2.4088, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -5.534143447875977, |
|
"rewards/margins": 0.9114534258842468, |
|
"rewards/rejected": -6.445597171783447, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.27788911155644624, |
|
"grad_norm": 164.83298553765533, |
|
"learning_rate": 9.066954722907638e-07, |
|
"logits/chosen": -1.0506072044372559, |
|
"logits/rejected": -1.0421117544174194, |
|
"logps/chosen": -0.5260264873504639, |
|
"logps/rejected": -0.809399425983429, |
|
"loss": 2.3687, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -5.260264873504639, |
|
"rewards/margins": 2.833728551864624, |
|
"rewards/rejected": -8.093994140625, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.28857715430861725, |
|
"grad_norm": 110.42493262615906, |
|
"learning_rate": 8.955355173281707e-07, |
|
"logits/chosen": -1.0204308032989502, |
|
"logits/rejected": -0.9703726768493652, |
|
"logps/chosen": -0.5653955936431885, |
|
"logps/rejected": -0.6786874532699585, |
|
"loss": 2.4188, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -5.653956413269043, |
|
"rewards/margins": 1.132918119430542, |
|
"rewards/rejected": -6.786874294281006, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.29926519706078825, |
|
"grad_norm": 71.93596841122324, |
|
"learning_rate": 8.838223701790055e-07, |
|
"logits/chosen": -1.100914716720581, |
|
"logits/rejected": -1.0754241943359375, |
|
"logps/chosen": -0.6414980888366699, |
|
"logps/rejected": -0.7632189989089966, |
|
"loss": 2.4389, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": -6.414980411529541, |
|
"rewards/margins": 1.217208981513977, |
|
"rewards/rejected": -7.6321892738342285, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.30995323981295925, |
|
"grad_norm": 66.0213241480768, |
|
"learning_rate": 8.71572412738697e-07, |
|
"logits/chosen": -0.9927597045898438, |
|
"logits/rejected": -0.9650676846504211, |
|
"logps/chosen": -0.6458258032798767, |
|
"logps/rejected": -0.8493485450744629, |
|
"loss": 2.1456, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -6.458258152008057, |
|
"rewards/margins": 2.0352275371551514, |
|
"rewards/rejected": -8.493486404418945, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.32064128256513025, |
|
"grad_norm": 69.48762102431438, |
|
"learning_rate": 8.588027776804058e-07, |
|
"logits/chosen": -1.0225753784179688, |
|
"logits/rejected": -0.9999582171440125, |
|
"logps/chosen": -0.7057562470436096, |
|
"logps/rejected": -0.8774517774581909, |
|
"loss": 2.2075, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -7.057562351226807, |
|
"rewards/margins": 1.7169564962387085, |
|
"rewards/rejected": -8.774518966674805, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.33132932531730125, |
|
"grad_norm": 67.84761069060667, |
|
"learning_rate": 8.455313244934324e-07, |
|
"logits/chosen": -1.0279462337493896, |
|
"logits/rejected": -1.0052425861358643, |
|
"logps/chosen": -0.7633088231086731, |
|
"logps/rejected": -1.005048155784607, |
|
"loss": 2.2792, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -7.633088111877441, |
|
"rewards/margins": 2.4173941612243652, |
|
"rewards/rejected": -10.050481796264648, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.3420173680694723, |
|
"grad_norm": 82.96608468035394, |
|
"learning_rate": 8.317766145051057e-07, |
|
"logits/chosen": -1.030012845993042, |
|
"logits/rejected": -1.012251377105713, |
|
"logps/chosen": -0.8448828458786011, |
|
"logps/rejected": -1.189296007156372, |
|
"loss": 2.2782, |
|
"rewards/accuracies": 0.7562500238418579, |
|
"rewards/chosen": -8.448829650878906, |
|
"rewards/margins": 3.444131851196289, |
|
"rewards/rejected": -11.892961502075195, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.3527054108216433, |
|
"grad_norm": 64.3657602730046, |
|
"learning_rate": 8.175578849210894e-07, |
|
"logits/chosen": -1.0416388511657715, |
|
"logits/rejected": -1.0139344930648804, |
|
"logps/chosen": -0.8959344625473022, |
|
"logps/rejected": -1.2197812795639038, |
|
"loss": 2.1737, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -8.959344863891602, |
|
"rewards/margins": 3.238468885421753, |
|
"rewards/rejected": -12.197813034057617, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.3633934535738143, |
|
"grad_norm": 72.02799237773567, |
|
"learning_rate": 8.028950219204099e-07, |
|
"logits/chosen": -1.0289143323898315, |
|
"logits/rejected": -1.0052926540374756, |
|
"logps/chosen": -0.87171471118927, |
|
"logps/rejected": -1.2460126876831055, |
|
"loss": 2.0202, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -8.717145919799805, |
|
"rewards/margins": 3.7429795265197754, |
|
"rewards/rejected": -12.460125923156738, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.3740814963259853, |
|
"grad_norm": 98.19729820258998, |
|
"learning_rate": 7.878085328428368e-07, |
|
"logits/chosen": -1.0445234775543213, |
|
"logits/rejected": -0.9937236905097961, |
|
"logps/chosen": -0.9708870649337769, |
|
"logps/rejected": -1.1840471029281616, |
|
"loss": 1.8631, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -9.708871841430664, |
|
"rewards/margins": 2.1315996646881104, |
|
"rewards/rejected": -11.840471267700195, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.3847695390781563, |
|
"grad_norm": 69.56663708781485, |
|
"learning_rate": 7.723195175075135e-07, |
|
"logits/chosen": -1.001516580581665, |
|
"logits/rejected": -0.9787738919258118, |
|
"logps/chosen": -0.9560983777046204, |
|
"logps/rejected": -1.2841722965240479, |
|
"loss": 1.8859, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -9.560983657836914, |
|
"rewards/margins": 3.2807374000549316, |
|
"rewards/rejected": -12.841720581054688, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.3954575818303273, |
|
"grad_norm": 91.74349211425728, |
|
"learning_rate": 7.564496387029531e-07, |
|
"logits/chosen": -1.0450928211212158, |
|
"logits/rejected": -0.9868279695510864, |
|
"logps/chosen": -1.0066741704940796, |
|
"logps/rejected": -1.3705105781555176, |
|
"loss": 1.7618, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -10.066742897033691, |
|
"rewards/margins": 3.6383633613586426, |
|
"rewards/rejected": -13.705105781555176, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4061456245824983, |
|
"grad_norm": 88.77063833211469, |
|
"learning_rate": 7.402210918896689e-07, |
|
"logits/chosen": -1.0353386402130127, |
|
"logits/rejected": -1.0412102937698364, |
|
"logps/chosen": -1.136867642402649, |
|
"logps/rejected": -1.6356449127197266, |
|
"loss": 1.6848, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -11.36867618560791, |
|
"rewards/margins": 4.987773895263672, |
|
"rewards/rejected": -16.356449127197266, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4168336673346693, |
|
"grad_norm": 78.26789286604094, |
|
"learning_rate": 7.236565741578162e-07, |
|
"logits/chosen": -0.9953984022140503, |
|
"logits/rejected": -0.973209023475647, |
|
"logps/chosen": -1.1132943630218506, |
|
"logps/rejected": -1.454097867012024, |
|
"loss": 1.6983, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -11.132943153381348, |
|
"rewards/margins": 3.408036470413208, |
|
"rewards/rejected": -14.540979385375977, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.42752171008684037, |
|
"grad_norm": 124.50175267795422, |
|
"learning_rate": 7.067792524832603e-07, |
|
"logits/chosen": -0.9921610951423645, |
|
"logits/rejected": -0.9790946245193481, |
|
"logps/chosen": -1.1536376476287842, |
|
"logps/rejected": -1.5450570583343506, |
|
"loss": 1.8334, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -11.536375045776367, |
|
"rewards/margins": 3.914196729660034, |
|
"rewards/rejected": -15.45057201385498, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.43820975283901137, |
|
"grad_norm": 88.73588073000924, |
|
"learning_rate": 6.896127313264642e-07, |
|
"logits/chosen": -1.0303648710250854, |
|
"logits/rejected": -0.9800698161125183, |
|
"logps/chosen": -1.2688630819320679, |
|
"logps/rejected": -1.6606884002685547, |
|
"loss": 1.8744, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": -12.688631057739258, |
|
"rewards/margins": 3.918254852294922, |
|
"rewards/rejected": -16.606884002685547, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.44889779559118237, |
|
"grad_norm": 88.06916392468375, |
|
"learning_rate": 6.721810196195174e-07, |
|
"logits/chosen": -1.0542664527893066, |
|
"logits/rejected": -1.0434906482696533, |
|
"logps/chosen": -1.3758102655410767, |
|
"logps/rejected": -1.7928674221038818, |
|
"loss": 1.7772, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -13.75810432434082, |
|
"rewards/margins": 4.170571327209473, |
|
"rewards/rejected": -17.928674697875977, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.45958583834335337, |
|
"grad_norm": 121.97560618316521, |
|
"learning_rate": 6.545084971874736e-07, |
|
"logits/chosen": -0.9826368093490601, |
|
"logits/rejected": -0.9663593173027039, |
|
"logps/chosen": -1.4412583112716675, |
|
"logps/rejected": -1.9161531925201416, |
|
"loss": 1.7058, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -14.412582397460938, |
|
"rewards/margins": 4.748946189880371, |
|
"rewards/rejected": -19.161529541015625, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.47027388109552437, |
|
"grad_norm": 123.47452460404413, |
|
"learning_rate": 6.3661988065096e-07, |
|
"logits/chosen": -1.0570485591888428, |
|
"logits/rejected": -1.0392574071884155, |
|
"logps/chosen": -1.5018644332885742, |
|
"logps/rejected": -2.0092532634735107, |
|
"loss": 1.6513, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -15.018644332885742, |
|
"rewards/margins": 5.073886394500732, |
|
"rewards/rejected": -20.092533111572266, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.48096192384769537, |
|
"grad_norm": 67.98272511740662, |
|
"learning_rate": 6.185401888577487e-07, |
|
"logits/chosen": -1.0517892837524414, |
|
"logits/rejected": -1.0197094678878784, |
|
"logps/chosen": -1.4916750192642212, |
|
"logps/rejected": -1.9748971462249756, |
|
"loss": 1.5538, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -14.916749954223633, |
|
"rewards/margins": 4.832221508026123, |
|
"rewards/rejected": -19.74897003173828, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.4916499665998664, |
|
"grad_norm": 91.18490250667737, |
|
"learning_rate": 6.002947078916364e-07, |
|
"logits/chosen": -1.1277612447738647, |
|
"logits/rejected": -1.0792579650878906, |
|
"logps/chosen": -1.4870562553405762, |
|
"logps/rejected": -1.9353138208389282, |
|
"loss": 1.5151, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": -14.870562553405762, |
|
"rewards/margins": 4.482577323913574, |
|
"rewards/rejected": -19.353137969970703, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5023380093520374, |
|
"grad_norm": 110.64166427250719, |
|
"learning_rate": 5.819089557075688e-07, |
|
"logits/chosen": -1.1586382389068604, |
|
"logits/rejected": -1.1309268474578857, |
|
"logps/chosen": -1.5005654096603394, |
|
"logps/rejected": -2.048241138458252, |
|
"loss": 1.5958, |
|
"rewards/accuracies": 0.768750011920929, |
|
"rewards/chosen": -15.005655288696289, |
|
"rewards/margins": 5.476757049560547, |
|
"rewards/rejected": -20.482410430908203, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.5130260521042084, |
|
"grad_norm": 105.27714488639405, |
|
"learning_rate": 5.634086464424742e-07, |
|
"logits/chosen": -1.1117291450500488, |
|
"logits/rejected": -1.113638162612915, |
|
"logps/chosen": -1.4002972841262817, |
|
"logps/rejected": -1.889203667640686, |
|
"loss": 1.5807, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -14.002973556518555, |
|
"rewards/margins": 4.889064788818359, |
|
"rewards/rejected": -18.892038345336914, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.5237140948563794, |
|
"grad_norm": 124.65949242661881, |
|
"learning_rate": 5.448196544517167e-07, |
|
"logits/chosen": -1.2370043992996216, |
|
"logits/rejected": -1.1781737804412842, |
|
"logps/chosen": -1.4051058292388916, |
|
"logps/rejected": -1.9604175090789795, |
|
"loss": 1.4672, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -14.051058769226074, |
|
"rewards/margins": 5.553117752075195, |
|
"rewards/rejected": -19.604177474975586, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.5344021376085505, |
|
"grad_norm": 144.8350321427832, |
|
"learning_rate": 5.26167978121472e-07, |
|
"logits/chosen": -1.1803662776947021, |
|
"logits/rejected": -1.1651079654693604, |
|
"logps/chosen": -1.4542481899261475, |
|
"logps/rejected": -2.0275795459747314, |
|
"loss": 1.4629, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -14.54248046875, |
|
"rewards/margins": 5.73331356048584, |
|
"rewards/rejected": -20.275793075561523, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.5450901803607214, |
|
"grad_norm": 78.0251331894233, |
|
"learning_rate": 5.074797035076318e-07, |
|
"logits/chosen": -1.209343433380127, |
|
"logits/rejected": -1.182515263557434, |
|
"logps/chosen": -1.5509597063064575, |
|
"logps/rejected": -2.0222866535186768, |
|
"loss": 1.6056, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.50959587097168, |
|
"rewards/margins": 4.713270664215088, |
|
"rewards/rejected": -20.22286605834961, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.5557782231128925, |
|
"grad_norm": 123.4068534133138, |
|
"learning_rate": 4.887809678520975e-07, |
|
"logits/chosen": -1.195039987564087, |
|
"logits/rejected": -1.1650830507278442, |
|
"logps/chosen": -1.4355518817901611, |
|
"logps/rejected": -1.9077056646347046, |
|
"loss": 1.3832, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -14.35551929473877, |
|
"rewards/margins": 4.721535682678223, |
|
"rewards/rejected": -19.077056884765625, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.5664662658650634, |
|
"grad_norm": 86.09154207388711, |
|
"learning_rate": 4.700979230274829e-07, |
|
"logits/chosen": -1.1370768547058105, |
|
"logits/rejected": -1.1196085214614868, |
|
"logps/chosen": -1.5277538299560547, |
|
"logps/rejected": -2.003265857696533, |
|
"loss": 1.5697, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -15.277536392211914, |
|
"rewards/margins": 4.755120277404785, |
|
"rewards/rejected": -20.032657623291016, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.5771543086172345, |
|
"grad_norm": 198.6225861486916, |
|
"learning_rate": 4.514566989613559e-07, |
|
"logits/chosen": -1.1517788171768188, |
|
"logits/rejected": -1.1230041980743408, |
|
"logps/chosen": -1.4101899862289429, |
|
"logps/rejected": -1.9304449558258057, |
|
"loss": 1.5201, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -14.101900100708008, |
|
"rewards/margins": 5.202548980712891, |
|
"rewards/rejected": -19.3044490814209, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.5878423513694054, |
|
"grad_norm": 88.44447901867754, |
|
"learning_rate": 4.328833670911724e-07, |
|
"logits/chosen": -1.1258559226989746, |
|
"logits/rejected": -1.087894082069397, |
|
"logps/chosen": -1.4194262027740479, |
|
"logps/rejected": -1.82901930809021, |
|
"loss": 1.6626, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -14.19426155090332, |
|
"rewards/margins": 4.095931529998779, |
|
"rewards/rejected": -18.290193557739258, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.5985303941215765, |
|
"grad_norm": 74.99606064357101, |
|
"learning_rate": 4.144039039010124e-07, |
|
"logits/chosen": -1.2096599340438843, |
|
"logits/rejected": -1.183319091796875, |
|
"logps/chosen": -1.4303152561187744, |
|
"logps/rejected": -1.9751609563827515, |
|
"loss": 1.4679, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -14.303152084350586, |
|
"rewards/margins": 5.44845724105835, |
|
"rewards/rejected": -19.751609802246094, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.6092184368737475, |
|
"grad_norm": 112.3023562652983, |
|
"learning_rate": 3.960441545911204e-07, |
|
"logits/chosen": -1.17905592918396, |
|
"logits/rejected": -1.1455281972885132, |
|
"logps/chosen": -1.5073192119598389, |
|
"logps/rejected": -2.059325695037842, |
|
"loss": 1.313, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -15.073190689086914, |
|
"rewards/margins": 5.520066738128662, |
|
"rewards/rejected": -20.593257904052734, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.6199064796259185, |
|
"grad_norm": 86.7215875996648, |
|
"learning_rate": 3.778297969310529e-07, |
|
"logits/chosen": -1.2023842334747314, |
|
"logits/rejected": -1.1609100103378296, |
|
"logps/chosen": -1.4881120920181274, |
|
"logps/rejected": -1.9453773498535156, |
|
"loss": 1.5443, |
|
"rewards/accuracies": 0.793749988079071, |
|
"rewards/chosen": -14.881118774414062, |
|
"rewards/margins": 4.572653770446777, |
|
"rewards/rejected": -19.453771591186523, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.6305945223780896, |
|
"grad_norm": 112.19728139908482, |
|
"learning_rate": 3.5978630534699865e-07, |
|
"logits/chosen": -1.1330126523971558, |
|
"logits/rejected": -1.117620825767517, |
|
"logps/chosen": -1.5452992916107178, |
|
"logps/rejected": -2.0189967155456543, |
|
"loss": 1.3971, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -15.452993392944336, |
|
"rewards/margins": 4.736973285675049, |
|
"rewards/rejected": -20.18996810913086, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.6412825651302605, |
|
"grad_norm": 103.78285178770669, |
|
"learning_rate": 3.4193891529348795e-07, |
|
"logits/chosen": -1.0798468589782715, |
|
"logits/rejected": -1.0519963502883911, |
|
"logps/chosen": -1.5262010097503662, |
|
"logps/rejected": -1.9304592609405518, |
|
"loss": 1.8782, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": -15.262011528015137, |
|
"rewards/margins": 4.042581558227539, |
|
"rewards/rejected": -19.30459213256836, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.6519706078824316, |
|
"grad_norm": 89.28193531018944, |
|
"learning_rate": 3.243125879593286e-07, |
|
"logits/chosen": -1.1877957582473755, |
|
"logits/rejected": -1.141606092453003, |
|
"logps/chosen": -1.4767545461654663, |
|
"logps/rejected": -1.900796890258789, |
|
"loss": 1.5065, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": -14.767545700073242, |
|
"rewards/margins": 4.240423679351807, |
|
"rewards/rejected": -19.007970809936523, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.6626586506346025, |
|
"grad_norm": 118.8609587886762, |
|
"learning_rate": 3.069319753571269e-07, |
|
"logits/chosen": -1.2135640382766724, |
|
"logits/rejected": -1.1922129392623901, |
|
"logps/chosen": -1.5534937381744385, |
|
"logps/rejected": -2.036700487136841, |
|
"loss": 1.6677, |
|
"rewards/accuracies": 0.78125, |
|
"rewards/chosen": -15.534937858581543, |
|
"rewards/margins": 4.832065582275391, |
|
"rewards/rejected": -20.36700439453125, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.6733466933867736, |
|
"grad_norm": 99.40008827127444, |
|
"learning_rate": 2.898213858452173e-07, |
|
"logits/chosen": -1.2052314281463623, |
|
"logits/rejected": -1.1488008499145508, |
|
"logps/chosen": -1.4641424417495728, |
|
"logps/rejected": -1.947819709777832, |
|
"loss": 1.5204, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -14.641424179077148, |
|
"rewards/margins": 4.8367719650268555, |
|
"rewards/rejected": -19.478195190429688, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.6840347361389446, |
|
"grad_norm": 111.94261532026978, |
|
"learning_rate": 2.730047501302266e-07, |
|
"logits/chosen": -1.1900420188903809, |
|
"logits/rejected": -1.184233546257019, |
|
"logps/chosen": -1.5346307754516602, |
|
"logps/rejected": -2.0948898792266846, |
|
"loss": 1.5013, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.346307754516602, |
|
"rewards/margins": 5.6025896072387695, |
|
"rewards/rejected": -20.948898315429688, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.6947227788911156, |
|
"grad_norm": 92.96649699793859, |
|
"learning_rate": 2.5650558779781635e-07, |
|
"logits/chosen": -1.2109119892120361, |
|
"logits/rejected": -1.1595691442489624, |
|
"logps/chosen": -1.6573654413223267, |
|
"logps/rejected": -2.3071436882019043, |
|
"loss": 1.4311, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.57365608215332, |
|
"rewards/margins": 6.497782230377197, |
|
"rewards/rejected": -23.07143783569336, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.7054108216432866, |
|
"grad_norm": 81.17346063961124, |
|
"learning_rate": 2.403469744184154e-07, |
|
"logits/chosen": -1.117084264755249, |
|
"logits/rejected": -1.0746078491210938, |
|
"logps/chosen": -1.5377912521362305, |
|
"logps/rejected": -1.992598533630371, |
|
"loss": 1.5151, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.377914428710938, |
|
"rewards/margins": 4.548072814941406, |
|
"rewards/rejected": -19.92598533630371, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.7160988643954576, |
|
"grad_norm": 143.05041993788979, |
|
"learning_rate": 2.2455150927394878e-07, |
|
"logits/chosen": -1.1712327003479004, |
|
"logits/rejected": -1.151883602142334, |
|
"logps/chosen": -1.4956694841384888, |
|
"logps/rejected": -2.041016101837158, |
|
"loss": 1.335, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -14.956695556640625, |
|
"rewards/margins": 5.453465938568115, |
|
"rewards/rejected": -20.410160064697266, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.7267869071476286, |
|
"grad_norm": 114.52568542094356, |
|
"learning_rate": 2.0914128375069722e-07, |
|
"logits/chosen": -1.1757316589355469, |
|
"logits/rejected": -1.1399040222167969, |
|
"logps/chosen": -1.5182468891143799, |
|
"logps/rejected": -2.0508017539978027, |
|
"loss": 1.5897, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.182469367980957, |
|
"rewards/margins": 5.3255486488342285, |
|
"rewards/rejected": -20.50801658630371, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.7374749498997996, |
|
"grad_norm": 91.55669201238004, |
|
"learning_rate": 1.9413785044249676e-07, |
|
"logits/chosen": -1.2101690769195557, |
|
"logits/rejected": -1.185270071029663, |
|
"logps/chosen": -1.5700366497039795, |
|
"logps/rejected": -2.17747163772583, |
|
"loss": 1.5591, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.70036506652832, |
|
"rewards/margins": 6.074349403381348, |
|
"rewards/rejected": -21.774715423583984, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.7481629926519706, |
|
"grad_norm": 163.06372328196355, |
|
"learning_rate": 1.7956219300748792e-07, |
|
"logits/chosen": -1.191392183303833, |
|
"logits/rejected": -1.1933467388153076, |
|
"logps/chosen": -1.5097607374191284, |
|
"logps/rejected": -1.995661735534668, |
|
"loss": 1.5515, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.097607612609863, |
|
"rewards/margins": 4.859010696411133, |
|
"rewards/rejected": -19.956619262695312, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.7588510354041417, |
|
"grad_norm": 116.64866987709867, |
|
"learning_rate": 1.6543469682057104e-07, |
|
"logits/chosen": -1.1239954233169556, |
|
"logits/rejected": -1.1377310752868652, |
|
"logps/chosen": -1.4421604871749878, |
|
"logps/rejected": -1.9545857906341553, |
|
"loss": 1.2498, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.421605110168457, |
|
"rewards/margins": 5.1242547035217285, |
|
"rewards/rejected": -19.545862197875977, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.7695390781563126, |
|
"grad_norm": 96.86652927133838, |
|
"learning_rate": 1.5177512046261666e-07, |
|
"logits/chosen": -1.1753429174423218, |
|
"logits/rejected": -1.1725155115127563, |
|
"logps/chosen": -1.4693684577941895, |
|
"logps/rejected": -2.0869216918945312, |
|
"loss": 1.4734, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -14.693684577941895, |
|
"rewards/margins": 6.175534248352051, |
|
"rewards/rejected": -20.869220733642578, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.7802271209084837, |
|
"grad_norm": 102.14193626103624, |
|
"learning_rate": 1.3860256808630427e-07, |
|
"logits/chosen": -1.2227611541748047, |
|
"logits/rejected": -1.1525650024414062, |
|
"logps/chosen": -1.5389500856399536, |
|
"logps/rejected": -2.149737596511841, |
|
"loss": 1.4471, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.389498710632324, |
|
"rewards/margins": 6.107874393463135, |
|
"rewards/rejected": -21.497373580932617, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.7909151636606546, |
|
"grad_norm": 117.88219247560441, |
|
"learning_rate": 1.2593546269723647e-07, |
|
"logits/chosen": -1.136879324913025, |
|
"logits/rejected": -1.1233110427856445, |
|
"logps/chosen": -1.5077215433120728, |
|
"logps/rejected": -1.9501771926879883, |
|
"loss": 1.5365, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.077214241027832, |
|
"rewards/margins": 4.424557685852051, |
|
"rewards/rejected": -19.501771926879883, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.8016032064128257, |
|
"grad_norm": 146.62372943161364, |
|
"learning_rate": 1.1379152038770029e-07, |
|
"logits/chosen": -1.1779382228851318, |
|
"logits/rejected": -1.180654764175415, |
|
"logps/chosen": -1.6213643550872803, |
|
"logps/rejected": -2.1708412170410156, |
|
"loss": 1.528, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -16.213642120361328, |
|
"rewards/margins": 5.494766712188721, |
|
"rewards/rejected": -21.708412170410156, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.8122912491649966, |
|
"grad_norm": 138.00637470724254, |
|
"learning_rate": 1.0218772555910954e-07, |
|
"logits/chosen": -1.1884950399398804, |
|
"logits/rejected": -1.1677783727645874, |
|
"logps/chosen": -1.466828465461731, |
|
"logps/rejected": -1.9588056802749634, |
|
"loss": 1.5783, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -14.668286323547363, |
|
"rewards/margins": 4.919771194458008, |
|
"rewards/rejected": -19.588054656982422, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.8229792919171677, |
|
"grad_norm": 103.64385074010784, |
|
"learning_rate": 9.114030716778432e-08, |
|
"logits/chosen": -1.1786987781524658, |
|
"logits/rejected": -1.1550320386886597, |
|
"logps/chosen": -1.5073349475860596, |
|
"logps/rejected": -2.1525402069091797, |
|
"loss": 1.292, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": -15.073351860046387, |
|
"rewards/margins": 6.45205020904541, |
|
"rewards/rejected": -21.525402069091797, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.8336673346693386, |
|
"grad_norm": 91.3096727719287, |
|
"learning_rate": 8.066471602728803e-08, |
|
"logits/chosen": -1.1919111013412476, |
|
"logits/rejected": -1.1740847826004028, |
|
"logps/chosen": -1.580262541770935, |
|
"logps/rejected": -2.1456198692321777, |
|
"loss": 1.4583, |
|
"rewards/accuracies": 0.831250011920929, |
|
"rewards/chosen": -15.80262565612793, |
|
"rewards/margins": 5.6535725593566895, |
|
"rewards/rejected": -21.45619773864746, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.8443553774215097, |
|
"grad_norm": 83.70090604282484, |
|
"learning_rate": 7.077560319906694e-08, |
|
"logits/chosen": -1.1921932697296143, |
|
"logits/rejected": -1.1703288555145264, |
|
"logps/chosen": -1.5114152431488037, |
|
"logps/rejected": -2.060832977294922, |
|
"loss": 1.4804, |
|
"rewards/accuracies": 0.8125, |
|
"rewards/chosen": -15.114153861999512, |
|
"rewards/margins": 5.494177341461182, |
|
"rewards/rejected": -20.60832977294922, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"grad_norm": 78.24949652820646, |
|
"learning_rate": 6.148679950161672e-08, |
|
"logits/chosen": -1.2033644914627075, |
|
"logits/rejected": -1.1850937604904175, |
|
"logps/chosen": -1.5316810607910156, |
|
"logps/rejected": -2.022660732269287, |
|
"loss": 1.3196, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -15.316810607910156, |
|
"rewards/margins": 4.909798622131348, |
|
"rewards/rejected": -20.226608276367188, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8550434201736807, |
|
"eval_logits/chosen": -1.3681788444519043, |
|
"eval_logits/rejected": -1.3776241540908813, |
|
"eval_logps/chosen": -1.531295895576477, |
|
"eval_logps/rejected": -2.054961919784546, |
|
"eval_loss": 1.3846672773361206, |
|
"eval_rewards/accuracies": 0.8292682766914368, |
|
"eval_rewards/chosen": -15.312957763671875, |
|
"eval_rewards/margins": 5.236661434173584, |
|
"eval_rewards/rejected": -20.549619674682617, |
|
"eval_runtime": 94.8838, |
|
"eval_samples_per_second": 20.667, |
|
"eval_steps_per_second": 1.296, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.8657314629258517, |
|
"grad_norm": 117.65350933148179, |
|
"learning_rate": 5.2811296166831666e-08, |
|
"logits/chosen": -1.166261911392212, |
|
"logits/rejected": -1.183724284172058, |
|
"logps/chosen": -1.5941721200942993, |
|
"logps/rejected": -2.0773017406463623, |
|
"loss": 1.3853, |
|
"rewards/accuracies": 0.8500000238418579, |
|
"rewards/chosen": -15.94172191619873, |
|
"rewards/margins": 4.831295967102051, |
|
"rewards/rejected": -20.77301597595215, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.8764195056780227, |
|
"grad_norm": 165.1398306935317, |
|
"learning_rate": 4.4761226670592066e-08, |
|
"logits/chosen": -1.1748878955841064, |
|
"logits/rejected": -1.1613463163375854, |
|
"logps/chosen": -1.5498403310775757, |
|
"logps/rejected": -2.065404176712036, |
|
"loss": 1.5604, |
|
"rewards/accuracies": 0.84375, |
|
"rewards/chosen": -15.49840259552002, |
|
"rewards/margins": 5.155638694763184, |
|
"rewards/rejected": -20.654041290283203, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.8871075484301937, |
|
"grad_norm": 96.86053857753426, |
|
"learning_rate": 3.734784976300165e-08, |
|
"logits/chosen": -1.1749510765075684, |
|
"logits/rejected": -1.1216485500335693, |
|
"logps/chosen": -1.4522340297698975, |
|
"logps/rejected": -2.062474012374878, |
|
"loss": 1.6792, |
|
"rewards/accuracies": 0.8687499761581421, |
|
"rewards/chosen": -14.5223388671875, |
|
"rewards/margins": 6.102401256561279, |
|
"rewards/rejected": -20.624740600585938, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.8977955911823647, |
|
"grad_norm": 106.60182495249761, |
|
"learning_rate": 3.058153372200695e-08, |
|
"logits/chosen": -1.2051994800567627, |
|
"logits/rejected": -1.1551568508148193, |
|
"logps/chosen": -1.4368181228637695, |
|
"logps/rejected": -1.9974851608276367, |
|
"loss": 1.4114, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -14.368182182312012, |
|
"rewards/margins": 5.606671333312988, |
|
"rewards/rejected": -19.974851608276367, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.9084836339345357, |
|
"grad_norm": 133.4506452029166, |
|
"learning_rate": 2.4471741852423233e-08, |
|
"logits/chosen": -1.1993720531463623, |
|
"logits/rejected": -1.1902838945388794, |
|
"logps/chosen": -1.5988143682479858, |
|
"logps/rejected": -2.0698258876800537, |
|
"loss": 1.6235, |
|
"rewards/accuracies": 0.8062499761581421, |
|
"rewards/chosen": -15.988142013549805, |
|
"rewards/margins": 4.710117340087891, |
|
"rewards/rejected": -20.698259353637695, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.9191716766867067, |
|
"grad_norm": 135.55475022461232, |
|
"learning_rate": 1.9027019250647036e-08, |
|
"logits/chosen": -1.1921052932739258, |
|
"logits/rejected": -1.1760125160217285, |
|
"logps/chosen": -1.6189323663711548, |
|
"logps/rejected": -2.1575050354003906, |
|
"loss": 1.5124, |
|
"rewards/accuracies": 0.862500011920929, |
|
"rewards/chosen": -16.1893253326416, |
|
"rewards/margins": 5.385725498199463, |
|
"rewards/rejected": -21.57505226135254, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.9298597194388778, |
|
"grad_norm": 107.7429496590572, |
|
"learning_rate": 1.4254980853566246e-08, |
|
"logits/chosen": -1.145105004310608, |
|
"logits/rejected": -1.1043111085891724, |
|
"logps/chosen": -1.4653263092041016, |
|
"logps/rejected": -2.004653215408325, |
|
"loss": 1.4369, |
|
"rewards/accuracies": 0.887499988079071, |
|
"rewards/chosen": -14.6532621383667, |
|
"rewards/margins": 5.393268585205078, |
|
"rewards/rejected": -20.046531677246094, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.9405477621910487, |
|
"grad_norm": 99.21866608543166, |
|
"learning_rate": 1.016230078838226e-08, |
|
"logits/chosen": -1.1760567426681519, |
|
"logits/rejected": -1.1167289018630981, |
|
"logps/chosen": -1.5557719469070435, |
|
"logps/rejected": -2.0364768505096436, |
|
"loss": 1.4144, |
|
"rewards/accuracies": 0.800000011920929, |
|
"rewards/chosen": -15.557719230651855, |
|
"rewards/margins": 4.807051181793213, |
|
"rewards/rejected": -20.364770889282227, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.9512358049432198, |
|
"grad_norm": 90.97863869904057, |
|
"learning_rate": 6.754703038239329e-09, |
|
"logits/chosen": -1.1209274530410767, |
|
"logits/rejected": -1.1030616760253906, |
|
"logps/chosen": -1.5600204467773438, |
|
"logps/rejected": -2.162205934524536, |
|
"loss": 1.2667, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.60020637512207, |
|
"rewards/margins": 6.021853446960449, |
|
"rewards/rejected": -21.622058868408203, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.9619238476953907, |
|
"grad_norm": 111.38924570455498, |
|
"learning_rate": 4.036953436716895e-09, |
|
"logits/chosen": -1.2349797487258911, |
|
"logits/rejected": -1.2146203517913818, |
|
"logps/chosen": -1.5040500164031982, |
|
"logps/rejected": -2.0357797145843506, |
|
"loss": 1.4976, |
|
"rewards/accuracies": 0.824999988079071, |
|
"rewards/chosen": -15.040499687194824, |
|
"rewards/margins": 5.31729793548584, |
|
"rewards/rejected": -20.357799530029297, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.9726118904475618, |
|
"grad_norm": 109.67244088589509, |
|
"learning_rate": 2.0128530023804656e-09, |
|
"logits/chosen": -1.1971113681793213, |
|
"logits/rejected": -1.162461519241333, |
|
"logps/chosen": -1.4996418952941895, |
|
"logps/rejected": -2.1262269020080566, |
|
"loss": 1.1322, |
|
"rewards/accuracies": 0.893750011920929, |
|
"rewards/chosen": -14.996419906616211, |
|
"rewards/margins": 6.265848636627197, |
|
"rewards/rejected": -21.262269973754883, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.9832999331997327, |
|
"grad_norm": 102.11437295080724, |
|
"learning_rate": 6.852326227130833e-10, |
|
"logits/chosen": -1.1955955028533936, |
|
"logits/rejected": -1.1833515167236328, |
|
"logps/chosen": -1.5992295742034912, |
|
"logps/rejected": -2.1679768562316895, |
|
"loss": 1.3852, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -15.99229621887207, |
|
"rewards/margins": 5.687474727630615, |
|
"rewards/rejected": -21.67976951599121, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.9939879759519038, |
|
"grad_norm": 101.68998572629944, |
|
"learning_rate": 5.594909486328348e-11, |
|
"logits/chosen": -1.179431438446045, |
|
"logits/rejected": -1.184579610824585, |
|
"logps/chosen": -1.569451093673706, |
|
"logps/rejected": -2.143585681915283, |
|
"loss": 1.6178, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -15.694511413574219, |
|
"rewards/margins": 5.741344451904297, |
|
"rewards/rejected": -21.435855865478516, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.9982631930527722, |
|
"step": 467, |
|
"total_flos": 0.0, |
|
"train_loss": 1.9788420639405668, |
|
"train_runtime": 11440.9622, |
|
"train_samples_per_second": 5.233, |
|
"train_steps_per_second": 0.041 |
|
} |
|
], |
|
"logging_steps": 5, |
|
"max_steps": 467, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|