gemma-7b-dpo-full-mix2-beta-0.1 / eval_results.json
lewtun's picture
lewtun HF staff
End of training
4b03d26 verified
{
"epoch": 1.0,
"eval_logits/chosen": 83.16413879394531,
"eval_logits/rejected": 83.84245300292969,
"eval_logps/chosen": -392.341552734375,
"eval_logps/rejected": -414.519775390625,
"eval_loss": 0.40556877851486206,
"eval_rewards/accuracies": 0.792553186416626,
"eval_rewards/chosen": -0.3995126485824585,
"eval_rewards/margins": 3.1725716590881348,
"eval_rewards/rejected": -3.5720841884613037,
"eval_runtime": 140.3273,
"eval_samples": 3000,
"eval_samples_per_second": 21.379,
"eval_steps_per_second": 0.67
}