|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.971563981042654, |
|
"eval_steps": 100, |
|
"global_step": 104, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.018957345971563982, |
|
"grad_norm": 134.91035482329545, |
|
"learning_rate": 4.545454545454545e-08, |
|
"logits/chosen": 117.67350769042969, |
|
"logits/rejected": 126.90988159179688, |
|
"logps/chosen": -336.5020751953125, |
|
"logps/rejected": -438.0943298339844, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.1895734597156398, |
|
"grad_norm": 136.70946628723402, |
|
"learning_rate": 4.545454545454545e-07, |
|
"logits/chosen": 134.7197723388672, |
|
"logits/rejected": 138.20950317382812, |
|
"logps/chosen": -395.6691589355469, |
|
"logps/rejected": -439.5714111328125, |
|
"loss": 0.6989, |
|
"rewards/accuracies": 0.4375, |
|
"rewards/chosen": 0.009534548036754131, |
|
"rewards/margins": -0.0016968999989330769, |
|
"rewards/rejected": 0.011231447570025921, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.3791469194312796, |
|
"grad_norm": 131.61830677294216, |
|
"learning_rate": 4.885348141000122e-07, |
|
"logits/chosen": 121.12580871582031, |
|
"logits/rejected": 124.68989562988281, |
|
"logps/chosen": -371.40533447265625, |
|
"logps/rejected": -424.2557678222656, |
|
"loss": 0.6373, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": 0.0666501596570015, |
|
"rewards/margins": 0.27632588148117065, |
|
"rewards/rejected": -0.20967569947242737, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5687203791469194, |
|
"grad_norm": 106.84429986615133, |
|
"learning_rate": 4.5025027361734613e-07, |
|
"logits/chosen": 140.8944091796875, |
|
"logits/rejected": 134.4232177734375, |
|
"logps/chosen": -420.9166564941406, |
|
"logps/rejected": -467.222900390625, |
|
"loss": 0.5663, |
|
"rewards/accuracies": 0.706250011920929, |
|
"rewards/chosen": -1.4017441272735596, |
|
"rewards/margins": 0.9039627909660339, |
|
"rewards/rejected": -2.3057069778442383, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.7582938388625592, |
|
"grad_norm": 103.77783353366277, |
|
"learning_rate": 3.893311157806091e-07, |
|
"logits/chosen": 123.17936706542969, |
|
"logits/rejected": 112.06998443603516, |
|
"logps/chosen": -389.9255676269531, |
|
"logps/rejected": -417.8564453125, |
|
"loss": 0.537, |
|
"rewards/accuracies": 0.75, |
|
"rewards/chosen": -1.825113296508789, |
|
"rewards/margins": 1.1491353511810303, |
|
"rewards/rejected": -2.9742486476898193, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.9478672985781991, |
|
"grad_norm": 111.71527698813779, |
|
"learning_rate": 3.126631330646801e-07, |
|
"logits/chosen": 136.4942169189453, |
|
"logits/rejected": 140.7519073486328, |
|
"logps/chosen": -462.2588806152344, |
|
"logps/rejected": -544.9989624023438, |
|
"loss": 0.4903, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -2.164924144744873, |
|
"rewards/margins": 1.2726519107818604, |
|
"rewards/rejected": -3.4375758171081543, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1374407582938388, |
|
"grad_norm": 47.62030824870374, |
|
"learning_rate": 2.2891223348923882e-07, |
|
"logits/chosen": 127.03621673583984, |
|
"logits/rejected": 130.60501098632812, |
|
"logps/chosen": -459.1299743652344, |
|
"logps/rejected": -547.1569213867188, |
|
"loss": 0.2948, |
|
"rewards/accuracies": 0.8999999761581421, |
|
"rewards/chosen": -3.0450351238250732, |
|
"rewards/margins": 2.4868345260620117, |
|
"rewards/rejected": -5.531870365142822, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.3270142180094786, |
|
"grad_norm": 40.05134350853385, |
|
"learning_rate": 1.4754491880085317e-07, |
|
"logits/chosen": 116.989013671875, |
|
"logits/rejected": 119.484130859375, |
|
"logps/chosen": -432.87384033203125, |
|
"logps/rejected": -542.761474609375, |
|
"loss": 0.1734, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -3.288933515548706, |
|
"rewards/margins": 3.1884524822235107, |
|
"rewards/rejected": -6.477385520935059, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.5165876777251186, |
|
"grad_norm": 173.76861933524862, |
|
"learning_rate": 7.775827023107834e-08, |
|
"logits/chosen": 100.70893859863281, |
|
"logits/rejected": 117.65828704833984, |
|
"logps/chosen": -442.055419921875, |
|
"logps/rejected": -570.5432739257812, |
|
"loss": 0.1466, |
|
"rewards/accuracies": 0.949999988079071, |
|
"rewards/chosen": -4.423954963684082, |
|
"rewards/margins": 3.4382846355438232, |
|
"rewards/rejected": -7.862239837646484, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.7061611374407581, |
|
"grad_norm": 46.767267802887574, |
|
"learning_rate": 2.7440387297912122e-08, |
|
"logits/chosen": 98.97454071044922, |
|
"logits/rejected": 111.58085632324219, |
|
"logps/chosen": -476.39898681640625, |
|
"logps/rejected": -606.3544311523438, |
|
"loss": 0.1352, |
|
"rewards/accuracies": 0.96875, |
|
"rewards/chosen": -4.5173468589782715, |
|
"rewards/margins": 3.891281843185425, |
|
"rewards/rejected": -8.408628463745117, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"grad_norm": 47.930873499541235, |
|
"learning_rate": 2.27878296044029e-09, |
|
"logits/chosen": 105.6420669555664, |
|
"logits/rejected": 105.06365966796875, |
|
"logps/chosen": -465.14501953125, |
|
"logps/rejected": -568.013916015625, |
|
"loss": 0.1404, |
|
"rewards/accuracies": 0.9375, |
|
"rewards/chosen": -4.164032936096191, |
|
"rewards/margins": 3.4092299938201904, |
|
"rewards/rejected": -7.573262691497803, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.8957345971563981, |
|
"eval_logits/chosen": 85.3946533203125, |
|
"eval_logits/rejected": 79.68743133544922, |
|
"eval_logps/chosen": -464.6122131347656, |
|
"eval_logps/rejected": -499.045654296875, |
|
"eval_loss": 0.480917751789093, |
|
"eval_rewards/accuracies": 0.7083333134651184, |
|
"eval_rewards/chosen": -5.093915939331055, |
|
"eval_rewards/margins": 1.7607501745224, |
|
"eval_rewards/rejected": -6.854665756225586, |
|
"eval_runtime": 35.5196, |
|
"eval_samples_per_second": 21.115, |
|
"eval_steps_per_second": 0.676, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.971563981042654, |
|
"step": 104, |
|
"total_flos": 0.0, |
|
"train_loss": 0.37812595069408417, |
|
"train_runtime": 1139.5405, |
|
"train_samples_per_second": 11.847, |
|
"train_steps_per_second": 0.091 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 104, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 0.0, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|