sfulay's picture
Model save
41fe581 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.985781990521327,
"eval_steps": 50,
"global_step": 315,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0947867298578199,
"grad_norm": 56.951628924108704,
"learning_rate": 1.5624999999999999e-07,
"logits/chosen": -2.8022689819335938,
"logits/rejected": -2.699367046356201,
"logps/chosen": -354.14007568359375,
"logps/rejected": -648.7852783203125,
"loss": 0.6846,
"rewards/accuracies": 0.637499988079071,
"rewards/chosen": 0.0030409712344408035,
"rewards/margins": 0.015484926290810108,
"rewards/rejected": -0.01244395412504673,
"step": 10
},
{
"epoch": 0.1895734597156398,
"grad_norm": 16.911922497415656,
"learning_rate": 3.1249999999999997e-07,
"logits/chosen": -2.8449482917785645,
"logits/rejected": -2.7297720909118652,
"logps/chosen": -361.7726135253906,
"logps/rejected": -731.9713134765625,
"loss": 0.4488,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.11448182910680771,
"rewards/margins": 0.7460837364196777,
"rewards/rejected": -0.6316019892692566,
"step": 20
},
{
"epoch": 0.2843601895734597,
"grad_norm": 2.8879981399804886,
"learning_rate": 4.6874999999999996e-07,
"logits/chosen": -2.924880027770996,
"logits/rejected": -2.7608063220977783,
"logps/chosen": -344.0640869140625,
"logps/rejected": -1062.529541015625,
"loss": 0.1128,
"rewards/accuracies": 1.0,
"rewards/chosen": 0.4154191017150879,
"rewards/margins": 4.946678638458252,
"rewards/rejected": -4.531259536743164,
"step": 30
},
{
"epoch": 0.3791469194312796,
"grad_norm": 0.7967945507055681,
"learning_rate": 4.990147841143461e-07,
"logits/chosen": -2.9928297996520996,
"logits/rejected": -2.858860969543457,
"logps/chosen": -369.7523193359375,
"logps/rejected": -2523.788818359375,
"loss": 0.0145,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.17358417809009552,
"rewards/margins": 18.882659912109375,
"rewards/rejected": -19.056243896484375,
"step": 40
},
{
"epoch": 0.47393364928909953,
"grad_norm": 0.08242657747458541,
"learning_rate": 4.950256493879794e-07,
"logits/chosen": -3.1458115577697754,
"logits/rejected": -3.068504810333252,
"logps/chosen": -445.88641357421875,
"logps/rejected": -3839.385498046875,
"loss": 0.0025,
"rewards/accuracies": 1.0,
"rewards/chosen": -0.8680551648139954,
"rewards/margins": 31.354045867919922,
"rewards/rejected": -32.22209930419922,
"step": 50
},
{
"epoch": 0.47393364928909953,
"eval_logits/chosen": -3.214230537414551,
"eval_logits/rejected": -3.0434162616729736,
"eval_logps/chosen": -511.5262451171875,
"eval_logps/rejected": -4356.53564453125,
"eval_loss": 0.006651720497757196,
"eval_rewards/accuracies": 0.9939516186714172,
"eval_rewards/chosen": -1.4454454183578491,
"eval_rewards/margins": 35.71202850341797,
"eval_rewards/rejected": -37.157470703125,
"eval_runtime": 194.5294,
"eval_samples_per_second": 20.074,
"eval_steps_per_second": 0.319,
"step": 50
},
{
"epoch": 0.5687203791469194,
"grad_norm": 0.08904936739654302,
"learning_rate": 4.88020090697132e-07,
"logits/chosen": -3.2791202068328857,
"logits/rejected": -3.141754150390625,
"logps/chosen": -564.9468383789062,
"logps/rejected": -4684.3271484375,
"loss": 0.004,
"rewards/accuracies": 0.9937499761581421,
"rewards/chosen": -1.9347045421600342,
"rewards/margins": 38.56499099731445,
"rewards/rejected": -40.49969482421875,
"step": 60
},
{
"epoch": 0.6635071090047393,
"grad_norm": 1.511268095124282,
"learning_rate": 4.780843509929904e-07,
"logits/chosen": -3.2914862632751465,
"logits/rejected": -3.0883309841156006,
"logps/chosen": -603.4210205078125,
"logps/rejected": -4877.28662109375,
"loss": 0.0021,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.6363024711608887,
"rewards/margins": 39.97002410888672,
"rewards/rejected": -42.606327056884766,
"step": 70
},
{
"epoch": 0.7582938388625592,
"grad_norm": 0.22202350824430725,
"learning_rate": 4.6534074564712217e-07,
"logits/chosen": -3.417383909225464,
"logits/rejected": -3.290362596511841,
"logps/chosen": -600.4118041992188,
"logps/rejected": -5436.11376953125,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.5593833923339844,
"rewards/margins": 45.55999755859375,
"rewards/rejected": -48.11937713623047,
"step": 80
},
{
"epoch": 0.8530805687203792,
"grad_norm": 2.0861019684034874,
"learning_rate": 4.4994615667026846e-07,
"logits/chosen": -3.4805240631103516,
"logits/rejected": -3.3906772136688232,
"logps/chosen": -624.0176391601562,
"logps/rejected": -5296.82275390625,
"loss": 0.0018,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.676025867462158,
"rewards/margins": 44.0660285949707,
"rewards/rejected": -46.7420539855957,
"step": 90
},
{
"epoch": 0.9478672985781991,
"grad_norm": 2.8965011668216905,
"learning_rate": 4.320901013934887e-07,
"logits/chosen": -3.4210407733917236,
"logits/rejected": -3.3643829822540283,
"logps/chosen": -556.0076904296875,
"logps/rejected": -4813.1806640625,
"loss": 0.0036,
"rewards/accuracies": 1.0,
"rewards/chosen": -2.0869507789611816,
"rewards/margins": 39.87181854248047,
"rewards/rejected": -41.95877456665039,
"step": 100
},
{
"epoch": 0.9478672985781991,
"eval_logits/chosen": -3.4104061126708984,
"eval_logits/rejected": -3.2429261207580566,
"eval_logps/chosen": -570.0164184570312,
"eval_logps/rejected": -4765.2841796875,
"eval_loss": 0.0052900416776537895,
"eval_rewards/accuracies": 0.9939516186714172,
"eval_rewards/chosen": -2.0303473472595215,
"eval_rewards/margins": 39.21460723876953,
"eval_rewards/rejected": -41.24495315551758,
"eval_runtime": 192.2337,
"eval_samples_per_second": 20.314,
"eval_steps_per_second": 0.323,
"step": 100
},
{
"epoch": 1.042654028436019,
"grad_norm": 1.2489542878599509,
"learning_rate": 4.119923993874379e-07,
"logits/chosen": -3.4639148712158203,
"logits/rejected": -3.4126315116882324,
"logps/chosen": -549.92138671875,
"logps/rejected": -5150.29638671875,
"loss": 0.0006,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.9557552337646484,
"rewards/margins": 43.08815002441406,
"rewards/rejected": -45.04390335083008,
"step": 110
},
{
"epoch": 1.1374407582938388,
"grad_norm": 0.919711694376481,
"learning_rate": 3.899004663415083e-07,
"logits/chosen": -3.455725908279419,
"logits/rejected": -3.3397490978240967,
"logps/chosen": -534.6444702148438,
"logps/rejected": -5193.822265625,
"loss": 0.0005,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.8104517459869385,
"rewards/margins": 43.72606658935547,
"rewards/rejected": -45.53651809692383,
"step": 120
},
{
"epoch": 1.2322274881516588,
"grad_norm": 0.03772744312797018,
"learning_rate": 3.6608626821692824e-07,
"logits/chosen": -3.503054141998291,
"logits/rejected": -3.4913394451141357,
"logps/chosen": -509.2953186035156,
"logps/rejected": -5831.84228515625,
"loss": 0.0012,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6301825046539307,
"rewards/margins": 49.84960174560547,
"rewards/rejected": -51.47977828979492,
"step": 130
},
{
"epoch": 1.3270142180094786,
"grad_norm": 0.00011722006953608906,
"learning_rate": 3.408429731701635e-07,
"logits/chosen": -3.636444091796875,
"logits/rejected": -3.614245891571045,
"logps/chosen": -664.00341796875,
"logps/rejected": -5503.0537109375,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -3.150538682937622,
"rewards/margins": 45.41934585571289,
"rewards/rejected": -48.56988525390625,
"step": 140
},
{
"epoch": 1.4218009478672986,
"grad_norm": 0.0013414969188062405,
"learning_rate": 3.144813424636031e-07,
"logits/chosen": -3.788306713104248,
"logits/rejected": -3.686079740524292,
"logps/chosen": -791.1682739257812,
"logps/rejected": -5721.5634765625,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -4.1505842208862305,
"rewards/margins": 46.614662170410156,
"rewards/rejected": -50.7652473449707,
"step": 150
},
{
"epoch": 1.4218009478672986,
"eval_logits/chosen": -3.5867350101470947,
"eval_logits/rejected": -3.5067942142486572,
"eval_logps/chosen": -561.57568359375,
"eval_logps/rejected": -5161.087890625,
"eval_loss": 0.006992733106017113,
"eval_rewards/accuracies": 0.9939516186714172,
"eval_rewards/chosen": -1.9459394216537476,
"eval_rewards/margins": 43.25704574584961,
"eval_rewards/rejected": -45.2029914855957,
"eval_runtime": 191.7726,
"eval_samples_per_second": 20.363,
"eval_steps_per_second": 0.323,
"step": 150
},
{
"epoch": 1.5165876777251186,
"grad_norm": 0.0004138099071654368,
"learning_rate": 2.8732590479375165e-07,
"logits/chosen": -3.556847333908081,
"logits/rejected": -3.5835862159729004,
"logps/chosen": -528.8604736328125,
"logps/rejected": -5157.8740234375,
"loss": 0.0003,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.7568155527114868,
"rewards/margins": 43.957759857177734,
"rewards/rejected": -45.714576721191406,
"step": 160
},
{
"epoch": 1.6113744075829384,
"grad_norm": 0.0016286137021698196,
"learning_rate": 2.597109611334169e-07,
"logits/chosen": -3.579390287399292,
"logits/rejected": -3.6478075981140137,
"logps/chosen": -520.5675048828125,
"logps/rejected": -5432.5673828125,
"loss": 0.0001,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6220014095306396,
"rewards/margins": 46.55379867553711,
"rewards/rejected": -48.17579650878906,
"step": 170
},
{
"epoch": 1.7061611374407581,
"grad_norm": 0.00799320909391895,
"learning_rate": 2.3197646927086694e-07,
"logits/chosen": -3.5350117683410645,
"logits/rejected": -3.6110050678253174,
"logps/chosen": -534.5997314453125,
"logps/rejected": -5420.73583984375,
"loss": 0.0075,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.6480720043182373,
"rewards/margins": 46.55036163330078,
"rewards/rejected": -48.19843292236328,
"step": 180
},
{
"epoch": 1.8009478672985781,
"grad_norm": 0.0014081828819370304,
"learning_rate": 2.0446385870993467e-07,
"logits/chosen": -3.5267558097839355,
"logits/rejected": -3.5355076789855957,
"logps/chosen": -524.6720581054688,
"logps/rejected": -5069.0888671875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4387648105621338,
"rewards/margins": 43.29344177246094,
"rewards/rejected": -44.73220443725586,
"step": 190
},
{
"epoch": 1.8957345971563981,
"grad_norm": 0.018000801767423476,
"learning_rate": 1.775118274523545e-07,
"logits/chosen": -3.5183377265930176,
"logits/rejected": -3.5119102001190186,
"logps/chosen": -486.629150390625,
"logps/rejected": -5021.52490234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3421844244003296,
"rewards/margins": 42.632965087890625,
"rewards/rejected": -43.97514724731445,
"step": 200
},
{
"epoch": 1.8957345971563981,
"eval_logits/chosen": -3.502014398574829,
"eval_logits/rejected": -3.422856092453003,
"eval_logps/chosen": -512.3704223632812,
"eval_logps/rejected": -5067.64501953125,
"eval_loss": 0.004733214620500803,
"eval_rewards/accuracies": 0.9959677457809448,
"eval_rewards/chosen": -1.4538869857788086,
"eval_rewards/margins": 42.814674377441406,
"eval_rewards/rejected": -44.26856231689453,
"eval_runtime": 194.1121,
"eval_samples_per_second": 20.117,
"eval_steps_per_second": 0.319,
"step": 200
},
{
"epoch": 1.9905213270142181,
"grad_norm": 0.9312964869423628,
"learning_rate": 1.514521724066537e-07,
"logits/chosen": -3.540240526199341,
"logits/rejected": -3.5632777214050293,
"logps/chosen": -531.4307861328125,
"logps/rejected": -5061.63818359375,
"loss": 0.0002,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.486299753189087,
"rewards/margins": 42.955726623535156,
"rewards/rejected": -44.44202423095703,
"step": 210
},
{
"epoch": 2.085308056872038,
"grad_norm": 0.029566978048640967,
"learning_rate": 1.266057047539568e-07,
"logits/chosen": -3.5052971839904785,
"logits/rejected": -3.5332977771759033,
"logps/chosen": -477.3848571777344,
"logps/rejected": -5269.00390625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4086942672729492,
"rewards/margins": 45.027523040771484,
"rewards/rejected": -46.43621826171875,
"step": 220
},
{
"epoch": 2.1800947867298577,
"grad_norm": 0.0005556188331340245,
"learning_rate": 1.032783005551884e-07,
"logits/chosen": -3.5509438514709473,
"logits/rejected": -3.5611331462860107,
"logps/chosen": -473.364501953125,
"logps/rejected": -4865.369140625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.312354326248169,
"rewards/margins": 41.259403228759766,
"rewards/rejected": -42.57175827026367,
"step": 230
},
{
"epoch": 2.2748815165876777,
"grad_norm": 0.005629678669869344,
"learning_rate": 8.175713521924976e-08,
"logits/chosen": -3.5678086280822754,
"logits/rejected": -3.5121123790740967,
"logps/chosen": -496.83258056640625,
"logps/rejected": -5081.9599609375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4451147317886353,
"rewards/margins": 43.37391662597656,
"rewards/rejected": -44.81903839111328,
"step": 240
},
{
"epoch": 2.3696682464454977,
"grad_norm": 0.0012113886351427462,
"learning_rate": 6.230714818829733e-08,
"logits/chosen": -3.530911922454834,
"logits/rejected": -3.5102057456970215,
"logps/chosen": -484.5502014160156,
"logps/rejected": -5412.3271484375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.327695608139038,
"rewards/margins": 46.98969268798828,
"rewards/rejected": -48.31739044189453,
"step": 250
},
{
"epoch": 2.3696682464454977,
"eval_logits/chosen": -3.5510308742523193,
"eval_logits/rejected": -3.444518566131592,
"eval_logps/chosen": -512.2269287109375,
"eval_logps/rejected": -5116.15771484375,
"eval_loss": 0.005008448380976915,
"eval_rewards/accuracies": 0.9959677457809448,
"eval_rewards/chosen": -1.4524519443511963,
"eval_rewards/margins": 43.301239013671875,
"eval_rewards/rejected": -44.753692626953125,
"eval_runtime": 192.1218,
"eval_samples_per_second": 20.326,
"eval_steps_per_second": 0.323,
"step": 250
},
{
"epoch": 2.4644549763033177,
"grad_norm": 0.005272804838769864,
"learning_rate": 4.516778136213037e-08,
"logits/chosen": -3.5464816093444824,
"logits/rejected": -3.532754898071289,
"logps/chosen": -474.98077392578125,
"logps/rejected": -5214.1748046875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3641650676727295,
"rewards/margins": 44.85725021362305,
"rewards/rejected": -46.22141647338867,
"step": 260
},
{
"epoch": 2.5592417061611377,
"grad_norm": 0.01760309981671165,
"learning_rate": 3.055003141378948e-08,
"logits/chosen": -3.5305237770080566,
"logits/rejected": -3.543522357940674,
"logps/chosen": -502.1796875,
"logps/rejected": -5842.8251953125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.2657973766326904,
"rewards/margins": 50.87003707885742,
"rewards/rejected": -52.135841369628906,
"step": 270
},
{
"epoch": 2.654028436018957,
"grad_norm": 0.01345213655983596,
"learning_rate": 1.8633852284264508e-08,
"logits/chosen": -3.5437607765197754,
"logits/rejected": -3.537663221359253,
"logps/chosen": -519.03759765625,
"logps/rejected": -5507.5615234375,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3319001197814941,
"rewards/margins": 47.102291107177734,
"rewards/rejected": -48.4341926574707,
"step": 280
},
{
"epoch": 2.748815165876777,
"grad_norm": 5.9654408780918595e-05,
"learning_rate": 9.56593983327919e-09,
"logits/chosen": -3.5722999572753906,
"logits/rejected": -3.5434532165527344,
"logps/chosen": -525.2794189453125,
"logps/rejected": -5359.7451171875,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4624111652374268,
"rewards/margins": 46.21337890625,
"rewards/rejected": -47.675785064697266,
"step": 290
},
{
"epoch": 2.843601895734597,
"grad_norm": 0.0012624104591569302,
"learning_rate": 3.4579259185321398e-09,
"logits/chosen": -3.5550761222839355,
"logits/rejected": -3.541923999786377,
"logps/chosen": -513.0765380859375,
"logps/rejected": -5235.28759765625,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.3970950841903687,
"rewards/margins": 45.181175231933594,
"rewards/rejected": -46.578269958496094,
"step": 300
},
{
"epoch": 2.843601895734597,
"eval_logits/chosen": -3.5504369735717773,
"eval_logits/rejected": -3.444122552871704,
"eval_logps/chosen": -512.8049926757812,
"eval_logps/rejected": -5128.248046875,
"eval_loss": 0.004975645802915096,
"eval_rewards/accuracies": 0.9959677457809448,
"eval_rewards/chosen": -1.4582326412200928,
"eval_rewards/margins": 43.41635513305664,
"eval_rewards/rejected": -44.87459182739258,
"eval_runtime": 192.6295,
"eval_samples_per_second": 20.272,
"eval_steps_per_second": 0.322,
"step": 300
},
{
"epoch": 2.938388625592417,
"grad_norm": 0.0005953504074610172,
"learning_rate": 3.850041354441502e-10,
"logits/chosen": -3.5716750621795654,
"logits/rejected": -3.5102698802948,
"logps/chosen": -509.0469665527344,
"logps/rejected": -4801.1611328125,
"loss": 0.0,
"rewards/accuracies": 1.0,
"rewards/chosen": -1.4760140180587769,
"rewards/margins": 40.71800994873047,
"rewards/rejected": -42.19402313232422,
"step": 310
},
{
"epoch": 2.985781990521327,
"step": 315,
"total_flos": 0.0,
"train_loss": 0.04083177362173292,
"train_runtime": 9033.5209,
"train_samples_per_second": 4.483,
"train_steps_per_second": 0.035
}
],
"logging_steps": 10,
"max_steps": 315,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}