|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100, |
|
"global_step": 385, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 78.5, |
|
"learning_rate": 1.282051282051282e-07, |
|
"logits/chosen": 88.18099975585938, |
|
"logits/rejected": 88.25153350830078, |
|
"logps/chosen": -29.073104858398438, |
|
"logps/rejected": -26.25731658935547, |
|
"loss": 1.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 66.0, |
|
"learning_rate": 1.282051282051282e-06, |
|
"logits/chosen": 81.06854248046875, |
|
"logits/rejected": 80.77387237548828, |
|
"logps/chosen": -34.293479919433594, |
|
"logps/rejected": -32.99631118774414, |
|
"loss": 1.0126, |
|
"rewards/accuracies": 0.4305555522441864, |
|
"rewards/chosen": -0.03105219267308712, |
|
"rewards/margins": -0.008308152668178082, |
|
"rewards/rejected": -0.022744040936231613, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 67.0, |
|
"learning_rate": 2.564102564102564e-06, |
|
"logits/chosen": 80.62992095947266, |
|
"logits/rejected": 80.52108001708984, |
|
"logps/chosen": -33.51418685913086, |
|
"logps/rejected": -30.840993881225586, |
|
"loss": 0.8773, |
|
"rewards/accuracies": 0.5625, |
|
"rewards/chosen": 0.10543360561132431, |
|
"rewards/margins": 0.14353612065315247, |
|
"rewards/rejected": -0.038102518767118454, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 68.0, |
|
"learning_rate": 3.846153846153847e-06, |
|
"logits/chosen": 82.46273803710938, |
|
"logits/rejected": 82.4950180053711, |
|
"logps/chosen": -33.98906707763672, |
|
"logps/rejected": -31.265905380249023, |
|
"loss": 1.1079, |
|
"rewards/accuracies": 0.44999998807907104, |
|
"rewards/chosen": 0.014249947853386402, |
|
"rewards/margins": -0.08050690591335297, |
|
"rewards/rejected": 0.09475686401128769, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 55.75, |
|
"learning_rate": 4.999896948438434e-06, |
|
"logits/chosen": 80.95271301269531, |
|
"logits/rejected": 80.94709777832031, |
|
"logps/chosen": -32.69623565673828, |
|
"logps/rejected": -33.20489501953125, |
|
"loss": 0.8832, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.2866959571838379, |
|
"rewards/margins": 0.1885182112455368, |
|
"rewards/rejected": 0.09817773848772049, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 37.5, |
|
"learning_rate": 4.987541037542187e-06, |
|
"logits/chosen": 78.52803039550781, |
|
"logits/rejected": 78.53800964355469, |
|
"logps/chosen": -30.692174911499023, |
|
"logps/rejected": -30.75704002380371, |
|
"loss": 1.0095, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.30784496665000916, |
|
"rewards/margins": 0.1180974617600441, |
|
"rewards/rejected": 0.18974749743938446, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 53.0, |
|
"learning_rate": 4.954691471941119e-06, |
|
"logits/chosen": 83.14569854736328, |
|
"logits/rejected": 83.19773864746094, |
|
"logps/chosen": -30.899333953857422, |
|
"logps/rejected": -29.450210571289062, |
|
"loss": 0.9563, |
|
"rewards/accuracies": 0.512499988079071, |
|
"rewards/chosen": 0.16549696028232574, |
|
"rewards/margins": 0.08130919188261032, |
|
"rewards/rejected": 0.08418775349855423, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 69.5, |
|
"learning_rate": 4.901618883413549e-06, |
|
"logits/chosen": 83.82180786132812, |
|
"logits/rejected": 83.84942626953125, |
|
"logps/chosen": -30.46747398376465, |
|
"logps/rejected": -33.01859664916992, |
|
"loss": 0.9812, |
|
"rewards/accuracies": 0.48750001192092896, |
|
"rewards/chosen": 0.09693069756031036, |
|
"rewards/margins": 0.08546572178602219, |
|
"rewards/rejected": 0.011464978568255901, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 56.25, |
|
"learning_rate": 4.828760511501322e-06, |
|
"logits/chosen": 81.54592895507812, |
|
"logits/rejected": 81.53177642822266, |
|
"logps/chosen": -31.3655948638916, |
|
"logps/rejected": -30.929000854492188, |
|
"loss": 0.9098, |
|
"rewards/accuracies": 0.5375000238418579, |
|
"rewards/chosen": 0.0836077481508255, |
|
"rewards/margins": 0.1864190548658371, |
|
"rewards/rejected": -0.102811299264431, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 71.5, |
|
"learning_rate": 4.7367166013034295e-06, |
|
"logits/chosen": 78.40837097167969, |
|
"logits/rejected": 78.37776947021484, |
|
"logps/chosen": -32.40131378173828, |
|
"logps/rejected": -30.98993492126465, |
|
"loss": 0.913, |
|
"rewards/accuracies": 0.574999988079071, |
|
"rewards/chosen": 0.14211884140968323, |
|
"rewards/margins": 0.16519510746002197, |
|
"rewards/rejected": -0.023076282814145088, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 69.0, |
|
"learning_rate": 4.626245458345211e-06, |
|
"logits/chosen": 83.61566162109375, |
|
"logits/rejected": 83.63754272460938, |
|
"logps/chosen": -33.954978942871094, |
|
"logps/rejected": -31.89337158203125, |
|
"loss": 0.8073, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.21000584959983826, |
|
"rewards/margins": 0.29570215940475464, |
|
"rewards/rejected": -0.08569628745317459, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"eval_logits/chosen": 98.75115966796875, |
|
"eval_logits/rejected": 98.7406005859375, |
|
"eval_logps/chosen": -32.40181350708008, |
|
"eval_logps/rejected": -35.920406341552734, |
|
"eval_loss": 1.0338481664657593, |
|
"eval_rewards/accuracies": 0.4966777563095093, |
|
"eval_rewards/chosen": 0.02481812983751297, |
|
"eval_rewards/margins": -0.002855697413906455, |
|
"eval_rewards/rejected": 0.02767382375895977, |
|
"eval_runtime": 104.4142, |
|
"eval_samples_per_second": 3.285, |
|
"eval_steps_per_second": 0.412, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 79.0, |
|
"learning_rate": 4.498257201263691e-06, |
|
"logits/chosen": 83.83598327636719, |
|
"logits/rejected": 83.71295928955078, |
|
"logps/chosen": -32.41645431518555, |
|
"logps/rejected": -32.721168518066406, |
|
"loss": 0.8304, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.3638074994087219, |
|
"rewards/margins": 0.4029727876186371, |
|
"rewards/rejected": -0.03916532173752785, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 80.5, |
|
"learning_rate": 4.353806263777678e-06, |
|
"logits/chosen": 83.92488098144531, |
|
"logits/rejected": 84.05035400390625, |
|
"logps/chosen": -28.292224884033203, |
|
"logps/rejected": -35.42224884033203, |
|
"loss": 0.7915, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.3824160099029541, |
|
"rewards/margins": 0.36027655005455017, |
|
"rewards/rejected": 0.022139430046081543, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 48.5, |
|
"learning_rate": 4.1940827077152755e-06, |
|
"logits/chosen": 81.21788024902344, |
|
"logits/rejected": 81.23727416992188, |
|
"logps/chosen": -30.410778045654297, |
|
"logps/rejected": -32.07198715209961, |
|
"loss": 0.828, |
|
"rewards/accuracies": 0.612500011920929, |
|
"rewards/chosen": 0.2979464530944824, |
|
"rewards/margins": 0.38216376304626465, |
|
"rewards/rejected": -0.08421732485294342, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 62.5, |
|
"learning_rate": 4.0204024186666215e-06, |
|
"logits/chosen": 82.4339828491211, |
|
"logits/rejected": 82.45420837402344, |
|
"logps/chosen": -27.09214210510254, |
|
"logps/rejected": -33.011024475097656, |
|
"loss": 0.6935, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.213111013174057, |
|
"rewards/margins": 0.5656993985176086, |
|
"rewards/rejected": -0.35258835554122925, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 54.75, |
|
"learning_rate": 3.834196265035119e-06, |
|
"logits/chosen": 81.10945129394531, |
|
"logits/rejected": 81.082763671875, |
|
"logps/chosen": -28.871135711669922, |
|
"logps/rejected": -32.9850959777832, |
|
"loss": 0.6411, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.31078284978866577, |
|
"rewards/margins": 0.5619392395019531, |
|
"rewards/rejected": -0.25115638971328735, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 63.25, |
|
"learning_rate": 3.636998309800573e-06, |
|
"logits/chosen": 83.00135803222656, |
|
"logits/rejected": 82.99880981445312, |
|
"logps/chosen": -33.499019622802734, |
|
"logps/rejected": -30.40065574645996, |
|
"loss": 0.6874, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.3826376497745514, |
|
"rewards/margins": 0.6515066027641296, |
|
"rewards/rejected": -0.26886898279190063, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 68.0, |
|
"learning_rate": 3.4304331721118078e-06, |
|
"logits/chosen": 83.82991790771484, |
|
"logits/rejected": 83.77062225341797, |
|
"logps/chosen": -30.814950942993164, |
|
"logps/rejected": -32.44989776611328, |
|
"loss": 0.7545, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.2666035592556, |
|
"rewards/margins": 0.5245082974433899, |
|
"rewards/rejected": -0.2579047381877899, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 50.25, |
|
"learning_rate": 3.2162026428305436e-06, |
|
"logits/chosen": 81.48609924316406, |
|
"logits/rejected": 81.4673843383789, |
|
"logps/chosen": -30.380239486694336, |
|
"logps/rejected": -31.43511962890625, |
|
"loss": 0.6492, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.49014702439308167, |
|
"rewards/margins": 0.6568924188613892, |
|
"rewards/rejected": -0.16674548387527466, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 33.5, |
|
"learning_rate": 2.996071664294641e-06, |
|
"logits/chosen": 83.15840148925781, |
|
"logits/rejected": 83.14761352539062, |
|
"logps/chosen": -30.205703735351562, |
|
"logps/rejected": -30.463912963867188, |
|
"loss": 0.9036, |
|
"rewards/accuracies": 0.6000000238418579, |
|
"rewards/chosen": 0.33642834424972534, |
|
"rewards/margins": 0.33396121859550476, |
|
"rewards/rejected": 0.0024670884013175964, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 54.75, |
|
"learning_rate": 2.7718537898066833e-06, |
|
"logits/chosen": 78.7307357788086, |
|
"logits/rejected": 78.67295837402344, |
|
"logps/chosen": -33.62441635131836, |
|
"logps/rejected": -32.52568054199219, |
|
"loss": 0.7227, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.6769934892654419, |
|
"rewards/margins": 0.6963596343994141, |
|
"rewards/rejected": -0.019366104155778885, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"eval_logits/chosen": 98.77264404296875, |
|
"eval_logits/rejected": 98.75276947021484, |
|
"eval_logps/chosen": -32.62995529174805, |
|
"eval_logps/rejected": -36.19297790527344, |
|
"eval_loss": 1.0227326154708862, |
|
"eval_rewards/accuracies": 0.49501657485961914, |
|
"eval_rewards/chosen": -0.11206817626953125, |
|
"eval_rewards/margins": 0.023798702284693718, |
|
"eval_rewards/rejected": -0.13586686551570892, |
|
"eval_runtime": 104.1915, |
|
"eval_samples_per_second": 3.292, |
|
"eval_steps_per_second": 0.413, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 65.5, |
|
"learning_rate": 2.5453962426402006e-06, |
|
"logits/chosen": 81.24237823486328, |
|
"logits/rejected": 81.15150451660156, |
|
"logps/chosen": -33.17626953125, |
|
"logps/rejected": -35.2652587890625, |
|
"loss": 0.6447, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.46167102456092834, |
|
"rewards/margins": 0.6291700601577759, |
|
"rewards/rejected": -0.1674990952014923, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 48.5, |
|
"learning_rate": 2.3185646976551794e-06, |
|
"logits/chosen": 83.28003692626953, |
|
"logits/rejected": 83.37066650390625, |
|
"logps/chosen": -31.01590347290039, |
|
"logps/rejected": -31.09220314025879, |
|
"loss": 0.5863, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5598315000534058, |
|
"rewards/margins": 0.7777799367904663, |
|
"rewards/rejected": -0.21794843673706055, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 62.75, |
|
"learning_rate": 2.0932279108998323e-06, |
|
"logits/chosen": 80.46641540527344, |
|
"logits/rejected": 80.52613067626953, |
|
"logps/chosen": -32.29460525512695, |
|
"logps/rejected": -34.29331588745117, |
|
"loss": 0.8345, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": 0.30668336153030396, |
|
"rewards/margins": 0.4753844141960144, |
|
"rewards/rejected": -0.16870108246803284, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 74.0, |
|
"learning_rate": 1.8712423238279358e-06, |
|
"logits/chosen": 82.80165100097656, |
|
"logits/rejected": 83.07733917236328, |
|
"logps/chosen": -30.73067283630371, |
|
"logps/rejected": -31.75331687927246, |
|
"loss": 0.5911, |
|
"rewards/accuracies": 0.762499988079071, |
|
"rewards/chosen": 0.5744358897209167, |
|
"rewards/margins": 0.7072005271911621, |
|
"rewards/rejected": -0.13276457786560059, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 69.0, |
|
"learning_rate": 1.6544367689701824e-06, |
|
"logits/chosen": 81.53695678710938, |
|
"logits/rejected": 81.60115051269531, |
|
"logps/chosen": -26.971792221069336, |
|
"logps/rejected": -30.23346519470215, |
|
"loss": 0.7449, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.3744624853134155, |
|
"rewards/margins": 0.5740996599197388, |
|
"rewards/rejected": -0.19963720440864563, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 45.5, |
|
"learning_rate": 1.4445974030621963e-06, |
|
"logits/chosen": 78.7826156616211, |
|
"logits/rejected": 78.90965270996094, |
|
"logps/chosen": -30.57162857055664, |
|
"logps/rejected": -36.598594665527344, |
|
"loss": 0.5112, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 0.5936219692230225, |
|
"rewards/margins": 0.8918012380599976, |
|
"rewards/rejected": -0.29817938804626465, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 38.25, |
|
"learning_rate": 1.243452991757889e-06, |
|
"logits/chosen": 78.14024353027344, |
|
"logits/rejected": 78.16914367675781, |
|
"logps/chosen": -30.831195831298828, |
|
"logps/rejected": -31.901092529296875, |
|
"loss": 0.5884, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.56863933801651, |
|
"rewards/margins": 0.8060423135757446, |
|
"rewards/rejected": -0.23740296065807343, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 65.0, |
|
"learning_rate": 1.0526606671603523e-06, |
|
"logits/chosen": 80.71138000488281, |
|
"logits/rejected": 80.48588562011719, |
|
"logps/chosen": -31.13303565979004, |
|
"logps/rejected": -29.891042709350586, |
|
"loss": 0.751, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.4061153829097748, |
|
"rewards/margins": 0.540542483329773, |
|
"rewards/rejected": -0.13442707061767578, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 47.75, |
|
"learning_rate": 8.737922755071455e-07, |
|
"logits/chosen": 80.93512725830078, |
|
"logits/rejected": 80.84117126464844, |
|
"logps/chosen": -32.98380661010742, |
|
"logps/rejected": -32.5334587097168, |
|
"loss": 0.4856, |
|
"rewards/accuracies": 0.7875000238418579, |
|
"rewards/chosen": 0.672263503074646, |
|
"rewards/margins": 0.9822207689285278, |
|
"rewards/rejected": -0.30995720624923706, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 51.75, |
|
"learning_rate": 7.08321427484816e-07, |
|
"logits/chosen": 76.60159301757812, |
|
"logits/rejected": 76.69679260253906, |
|
"logps/chosen": -31.996997833251953, |
|
"logps/rejected": -29.191226959228516, |
|
"loss": 0.6355, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.8452256321907043, |
|
"rewards/margins": 0.8987467885017395, |
|
"rewards/rejected": -0.05352109670639038, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"eval_logits/chosen": 98.76406860351562, |
|
"eval_logits/rejected": 98.7339859008789, |
|
"eval_logps/chosen": -32.608062744140625, |
|
"eval_logps/rejected": -36.16687774658203, |
|
"eval_loss": 1.0328983068466187, |
|
"eval_rewards/accuracies": 0.5456810593605042, |
|
"eval_rewards/chosen": -0.09893348813056946, |
|
"eval_rewards/margins": 0.02127554826438427, |
|
"eval_rewards/rejected": -0.12020901590585709, |
|
"eval_runtime": 103.9872, |
|
"eval_samples_per_second": 3.298, |
|
"eval_steps_per_second": 0.414, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 53.25, |
|
"learning_rate": 5.576113578589035e-07, |
|
"logits/chosen": 83.55947875976562, |
|
"logits/rejected": 83.5940170288086, |
|
"logps/chosen": -29.987110137939453, |
|
"logps/rejected": -32.35546112060547, |
|
"loss": 0.6477, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.5258544683456421, |
|
"rewards/margins": 0.6192765831947327, |
|
"rewards/rejected": -0.09342211484909058, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 46.5, |
|
"learning_rate": 4.229036944380913e-07, |
|
"logits/chosen": 81.14942932128906, |
|
"logits/rejected": 81.15238952636719, |
|
"logps/chosen": -30.662851333618164, |
|
"logps/rejected": -29.089075088500977, |
|
"loss": 0.6327, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": 0.5614250898361206, |
|
"rewards/margins": 0.6631743311882019, |
|
"rewards/rejected": -0.10174927860498428, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 43.5, |
|
"learning_rate": 3.053082288996112e-07, |
|
"logits/chosen": 78.36723327636719, |
|
"logits/rejected": 78.41484832763672, |
|
"logps/chosen": -29.124780654907227, |
|
"logps/rejected": -32.84328079223633, |
|
"loss": 0.6057, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": 0.7366967797279358, |
|
"rewards/margins": 0.8617448806762695, |
|
"rewards/rejected": -0.12504807114601135, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 65.5, |
|
"learning_rate": 2.0579377374915805e-07, |
|
"logits/chosen": 82.55430603027344, |
|
"logits/rejected": 82.59879302978516, |
|
"logps/chosen": -32.09717559814453, |
|
"logps/rejected": -33.71881103515625, |
|
"loss": 0.6103, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": 0.6689842343330383, |
|
"rewards/margins": 0.8790559768676758, |
|
"rewards/rejected": -0.2100718766450882, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 45.25, |
|
"learning_rate": 1.2518018074041684e-07, |
|
"logits/chosen": 81.6875228881836, |
|
"logits/rejected": 81.70503234863281, |
|
"logps/chosen": -32.540645599365234, |
|
"logps/rejected": -33.16196823120117, |
|
"loss": 0.6663, |
|
"rewards/accuracies": 0.7250000238418579, |
|
"rewards/chosen": 0.6618569493293762, |
|
"rewards/margins": 0.7039581537246704, |
|
"rewards/rejected": -0.042101241648197174, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 37.5, |
|
"learning_rate": 6.41315865106129e-08, |
|
"logits/chosen": 83.08977508544922, |
|
"logits/rejected": 83.10887145996094, |
|
"logps/chosen": -28.5622615814209, |
|
"logps/rejected": -31.776697158813477, |
|
"loss": 0.6199, |
|
"rewards/accuracies": 0.737500011920929, |
|
"rewards/chosen": 0.5939711332321167, |
|
"rewards/margins": 0.6700795292854309, |
|
"rewards/rejected": -0.07610837370157242, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 69.0, |
|
"learning_rate": 2.3150941078050325e-08, |
|
"logits/chosen": 82.51141357421875, |
|
"logits/rejected": 82.53152465820312, |
|
"logps/chosen": -31.914348602294922, |
|
"logps/rejected": -35.504486083984375, |
|
"loss": 0.6338, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": 0.5770877599716187, |
|
"rewards/margins": 0.8141579627990723, |
|
"rewards/rejected": -0.237070232629776, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 52.75, |
|
"learning_rate": 2.575864278703266e-09, |
|
"logits/chosen": 76.49024963378906, |
|
"logits/rejected": 76.35295104980469, |
|
"logps/chosen": -29.67061996459961, |
|
"logps/rejected": -28.207141876220703, |
|
"loss": 0.6675, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": 0.5433036088943481, |
|
"rewards/margins": 0.571243166923523, |
|
"rewards/rejected": -0.027939552441239357, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 385, |
|
"total_flos": 0.0, |
|
"train_loss": 0.746250953302755, |
|
"train_runtime": 2556.1232, |
|
"train_samples_per_second": 1.205, |
|
"train_steps_per_second": 0.151 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 385, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|