|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 100, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 384.97491646147324, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": -11.400373458862305, |
|
"logits/rejected": -11.167098045349121, |
|
"logps/chosen": -1579.2471923828125, |
|
"logps/rejected": -1833.805419921875, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 285.06639638114245, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": -14.52730941772461, |
|
"logits/rejected": -14.906502723693848, |
|
"logps/chosen": -1777.32421875, |
|
"logps/rejected": -1881.382568359375, |
|
"loss": 0.6985, |
|
"rewards/accuracies": 0.4236111044883728, |
|
"rewards/chosen": -0.15817444026470184, |
|
"rewards/margins": 0.02842862159013748, |
|
"rewards/rejected": -0.18660305440425873, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 534.5020309330747, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": -21.795948028564453, |
|
"logits/rejected": -23.514450073242188, |
|
"logps/chosen": -1682.7659912109375, |
|
"logps/rejected": -1746.1217041015625, |
|
"loss": 0.7332, |
|
"rewards/accuracies": 0.6312500238418579, |
|
"rewards/chosen": -0.4811238646507263, |
|
"rewards/margins": 0.043054938316345215, |
|
"rewards/rejected": -0.5241788625717163, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 425.21287499734404, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": -7.340817451477051, |
|
"logits/rejected": -15.593961715698242, |
|
"logps/chosen": -1770.065673828125, |
|
"logps/rejected": -1880.151611328125, |
|
"loss": 0.8071, |
|
"rewards/accuracies": 0.6812499761581421, |
|
"rewards/chosen": -0.468301385641098, |
|
"rewards/margins": 0.7533053159713745, |
|
"rewards/rejected": -1.2216066122055054, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 712.3590680432743, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": -14.5736665725708, |
|
"logits/rejected": -23.62198829650879, |
|
"logps/chosen": -1539.815185546875, |
|
"logps/rejected": -1462.7039794921875, |
|
"loss": 0.9612, |
|
"rewards/accuracies": 0.6499999761581421, |
|
"rewards/chosen": -4.467960357666016, |
|
"rewards/margins": -0.5169845819473267, |
|
"rewards/rejected": -3.950974941253662, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 881.241885579057, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": -9.282499313354492, |
|
"logits/rejected": -10.507891654968262, |
|
"logps/chosen": -2434.360595703125, |
|
"logps/rejected": -2703.219970703125, |
|
"loss": 0.7979, |
|
"rewards/accuracies": 0.65625, |
|
"rewards/chosen": 0.584517240524292, |
|
"rewards/margins": 0.6922141313552856, |
|
"rewards/rejected": -0.1076967716217041, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 429.8468773274208, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": -6.557864189147949, |
|
"logits/rejected": -9.822066307067871, |
|
"logps/chosen": -1984.517578125, |
|
"logps/rejected": -1996.927490234375, |
|
"loss": 0.5975, |
|
"rewards/accuracies": 0.7749999761581421, |
|
"rewards/chosen": 1.916027307510376, |
|
"rewards/margins": 1.0445150136947632, |
|
"rewards/rejected": 0.871512234210968, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 175.85699006716078, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": -7.904175758361816, |
|
"logits/rejected": -13.800127983093262, |
|
"logps/chosen": -1871.2291259765625, |
|
"logps/rejected": -1999.8929443359375, |
|
"loss": 0.4226, |
|
"rewards/accuracies": 0.8187500238418579, |
|
"rewards/chosen": -0.032440781593322754, |
|
"rewards/margins": 1.4170843362808228, |
|
"rewards/rejected": -1.4495251178741455, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 231.31575551946057, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": -12.700660705566406, |
|
"logits/rejected": -10.377889633178711, |
|
"logps/chosen": -1366.731689453125, |
|
"logps/rejected": -1466.466552734375, |
|
"loss": 0.3894, |
|
"rewards/accuracies": 0.856249988079071, |
|
"rewards/chosen": 0.11536725610494614, |
|
"rewards/margins": 1.3466222286224365, |
|
"rewards/rejected": -1.2312551736831665, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 230.19784681381057, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": -10.58217716217041, |
|
"logits/rejected": -11.754631996154785, |
|
"logps/chosen": -1986.6451416015625, |
|
"logps/rejected": -2165.16162109375, |
|
"loss": 0.3852, |
|
"rewards/accuracies": 0.90625, |
|
"rewards/chosen": -0.339664489030838, |
|
"rewards/margins": 2.0723910331726074, |
|
"rewards/rejected": -2.412055492401123, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 302.98036373926305, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": -8.607019424438477, |
|
"logits/rejected": -15.033491134643555, |
|
"logps/chosen": -2083.50048828125, |
|
"logps/rejected": -2193.92431640625, |
|
"loss": 0.4114, |
|
"rewards/accuracies": 0.8374999761581421, |
|
"rewards/chosen": -0.0890653133392334, |
|
"rewards/margins": 1.4980413913726807, |
|
"rewards/rejected": -1.5871065855026245, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": -18.000696182250977, |
|
"eval_logits/rejected": -25.16254234313965, |
|
"eval_logps/chosen": -1530.4515380859375, |
|
"eval_logps/rejected": -1648.5675048828125, |
|
"eval_loss": 0.8002049326896667, |
|
"eval_rewards/accuracies": 0.7604166865348816, |
|
"eval_rewards/chosen": -0.46603381633758545, |
|
"eval_rewards/margins": 0.8467853665351868, |
|
"eval_rewards/rejected": -1.3128191232681274, |
|
"eval_runtime": 36.1276, |
|
"eval_samples_per_second": 20.76, |
|
"eval_steps_per_second": 0.664, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6178501087885636, |
|
"train_runtime": 1142.0913, |
|
"train_samples_per_second": 11.82, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|