|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 2.957345971563981, |
|
"eval_steps": 100, |
|
"global_step": 156, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 265.9657020142556, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": 123.11854553222656, |
|
"logits/rejected": 97.00198364257812, |
|
"logps/chosen": -425.18585205078125, |
|
"logps/rejected": -424.1869201660156, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 372.96475604860683, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": 117.39414978027344, |
|
"logits/rejected": 136.32305908203125, |
|
"logps/chosen": -442.28045654296875, |
|
"logps/rejected": -524.1576538085938, |
|
"loss": 0.7748, |
|
"rewards/accuracies": 0.4722222089767456, |
|
"rewards/chosen": 0.1107947826385498, |
|
"rewards/margins": 0.115071140229702, |
|
"rewards/rejected": -0.004276359919458628, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 213.17360776034892, |
|
"learning_rate": 4.989935734988097e-07, |
|
"logits/chosen": 125.45426177978516, |
|
"logits/rejected": 133.0935821533203, |
|
"logps/chosen": -426.3124084472656, |
|
"logps/rejected": -493.6737365722656, |
|
"loss": 0.6502, |
|
"rewards/accuracies": 0.625, |
|
"rewards/chosen": -0.004780772142112255, |
|
"rewards/margins": 0.6115384101867676, |
|
"rewards/rejected": -0.6163192987442017, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 204.32015271324505, |
|
"learning_rate": 4.877641290737883e-07, |
|
"logits/chosen": 123.5244369506836, |
|
"logits/rejected": 126.99784851074219, |
|
"logps/chosen": -466.4947814941406, |
|
"logps/rejected": -534.4097900390625, |
|
"loss": 0.6377, |
|
"rewards/accuracies": 0.7437499761581421, |
|
"rewards/chosen": -1.137466549873352, |
|
"rewards/margins": 1.5820324420928955, |
|
"rewards/rejected": -2.719499111175537, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 200.5277184123756, |
|
"learning_rate": 4.646121984004665e-07, |
|
"logits/chosen": 126.0523910522461, |
|
"logits/rejected": 120.98759460449219, |
|
"logps/chosen": -489.78033447265625, |
|
"logps/rejected": -514.3892211914062, |
|
"loss": 0.6145, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -1.659891128540039, |
|
"rewards/margins": 1.3025611639022827, |
|
"rewards/rejected": -2.9624521732330322, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 205.88684610219255, |
|
"learning_rate": 4.3069871595684787e-07, |
|
"logits/chosen": 134.4938201904297, |
|
"logits/rejected": 134.79849243164062, |
|
"logps/chosen": -502.4814453125, |
|
"logps/rejected": -523.4627685546875, |
|
"loss": 0.5901, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -1.9658725261688232, |
|
"rewards/margins": 1.2555662393569946, |
|
"rewards/rejected": -3.2214386463165283, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"grad_norm": 87.33372866728267, |
|
"learning_rate": 3.877242453630256e-07, |
|
"logits/chosen": 133.0808563232422, |
|
"logits/rejected": 136.4200897216797, |
|
"logps/chosen": -467.0462951660156, |
|
"logps/rejected": -506.5567321777344, |
|
"loss": 0.2517, |
|
"rewards/accuracies": 0.875, |
|
"rewards/chosen": -1.0280801057815552, |
|
"rewards/margins": 3.1976516246795654, |
|
"rewards/rejected": -4.22573184967041, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"grad_norm": 71.00765502932312, |
|
"learning_rate": 3.378437060203357e-07, |
|
"logits/chosen": 128.8972625732422, |
|
"logits/rejected": 129.42971801757812, |
|
"logps/chosen": -440.6957092285156, |
|
"logps/rejected": -552.7991943359375, |
|
"loss": 0.1231, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -1.1781926155090332, |
|
"rewards/margins": 3.8879973888397217, |
|
"rewards/rejected": -5.066189765930176, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"grad_norm": 68.54852270837435, |
|
"learning_rate": 2.8355831645441387e-07, |
|
"logits/chosen": 133.30160522460938, |
|
"logits/rejected": 134.1454620361328, |
|
"logps/chosen": -499.032958984375, |
|
"logps/rejected": -585.234130859375, |
|
"loss": 0.1193, |
|
"rewards/accuracies": 0.9750000238418579, |
|
"rewards/chosen": -0.9932767748832703, |
|
"rewards/margins": 4.856534481048584, |
|
"rewards/rejected": -5.849810600280762, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"grad_norm": 96.90359229935069, |
|
"learning_rate": 2.2759017277414164e-07, |
|
"logits/chosen": 120.51957702636719, |
|
"logits/rejected": 123.01716613769531, |
|
"logps/chosen": -477.44366455078125, |
|
"logps/rejected": -548.8724365234375, |
|
"loss": 0.1182, |
|
"rewards/accuracies": 0.9624999761581421, |
|
"rewards/chosen": -1.5276060104370117, |
|
"rewards/margins": 4.922442436218262, |
|
"rewards/rejected": -6.450047969818115, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"grad_norm": 57.0241100183833, |
|
"learning_rate": 1.7274575140626315e-07, |
|
"logits/chosen": 134.63583374023438, |
|
"logits/rejected": 125.1494140625, |
|
"logps/chosen": -492.84930419921875, |
|
"logps/rejected": -590.646728515625, |
|
"loss": 0.1371, |
|
"rewards/accuracies": 0.987500011920929, |
|
"rewards/chosen": -1.4459283351898193, |
|
"rewards/margins": 5.340741157531738, |
|
"rewards/rejected": -6.7866692543029785, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"eval_logits/chosen": 102.37699127197266, |
|
"eval_logits/rejected": 96.37848663330078, |
|
"eval_logps/chosen": -483.2002868652344, |
|
"eval_logps/rejected": -502.7766418457031, |
|
"eval_loss": 0.5052046775817871, |
|
"eval_rewards/accuracies": 0.7708333134651184, |
|
"eval_rewards/chosen": -2.9795796871185303, |
|
"eval_rewards/margins": 2.143129587173462, |
|
"eval_rewards/rejected": -5.122709274291992, |
|
"eval_runtime": 48.3488, |
|
"eval_samples_per_second": 15.512, |
|
"eval_steps_per_second": 0.496, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"grad_norm": 49.58918461406247, |
|
"learning_rate": 1.2177518064852348e-07, |
|
"logits/chosen": 113.58479309082031, |
|
"logits/rejected": 127.19537353515625, |
|
"logps/chosen": -511.4662170410156, |
|
"logps/rejected": -620.75537109375, |
|
"loss": 0.0929, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.6378005743026733, |
|
"rewards/margins": 5.002486228942871, |
|
"rewards/rejected": -6.640286922454834, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"grad_norm": 30.33881666826054, |
|
"learning_rate": 7.723433775328384e-08, |
|
"logits/chosen": 124.79753112792969, |
|
"logits/rejected": 125.82017517089844, |
|
"logps/chosen": -493.6553649902344, |
|
"logps/rejected": -574.6585693359375, |
|
"loss": 0.053, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.6378300189971924, |
|
"rewards/margins": 5.504368305206299, |
|
"rewards/rejected": -7.142198085784912, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"grad_norm": 23.06668849787155, |
|
"learning_rate": 4.1356686569674335e-08, |
|
"logits/chosen": 129.1642608642578, |
|
"logits/rejected": 134.2584228515625, |
|
"logps/chosen": -508.4425354003906, |
|
"logps/rejected": -594.6097412109375, |
|
"loss": 0.0541, |
|
"rewards/accuracies": 0.9937499761581421, |
|
"rewards/chosen": -1.4683927297592163, |
|
"rewards/margins": 5.420845985412598, |
|
"rewards/rejected": -6.8892388343811035, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"grad_norm": 38.07392153670512, |
|
"learning_rate": 1.5941282340065697e-08, |
|
"logits/chosen": 114.87281799316406, |
|
"logits/rejected": 115.1664047241211, |
|
"logps/chosen": -465.3794860839844, |
|
"logps/rejected": -584.0101318359375, |
|
"loss": 0.0538, |
|
"rewards/accuracies": 1.0, |
|
"rewards/chosen": -1.9863094091415405, |
|
"rewards/margins": 5.705286979675293, |
|
"rewards/rejected": -7.691596984863281, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"grad_norm": 46.62238024751895, |
|
"learning_rate": 2.2625595580163247e-09, |
|
"logits/chosen": 121.98884582519531, |
|
"logits/rejected": 135.7744598388672, |
|
"logps/chosen": -492.438720703125, |
|
"logps/rejected": -580.8795166015625, |
|
"loss": 0.0589, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -1.7609163522720337, |
|
"rewards/margins": 5.512405872344971, |
|
"rewards/rejected": -7.273321628570557, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"step": 156, |
|
"total_flos": 0.0, |
|
"train_loss": 0.27917395799587935, |
|
"train_runtime": 1799.8719, |
|
"train_samples_per_second": 11.251, |
|
"train_steps_per_second": 0.087 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 156, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|