{ "epoch": 1.0, "eval_logits/chosen": 98.4628677368164, "eval_logits/rejected": 98.43635559082031, "eval_logps/chosen": -32.64057159423828, "eval_logps/rejected": -36.409393310546875, "eval_loss": 0.686401903629303, "eval_rewards/accuracies": 0.5685215592384338, "eval_rewards/chosen": -0.07895812392234802, "eval_rewards/margins": 0.09818949550390244, "eval_rewards/rejected": -0.17714762687683105, "eval_runtime": 103.7748, "eval_samples": 343, "eval_samples_per_second": 3.305, "eval_steps_per_second": 0.414 }