{ "epoch": 1.971563981042654, "eval_logits/chosen": 85.37242889404297, "eval_logits/rejected": 79.6775131225586, "eval_logps/chosen": -464.193603515625, "eval_logps/rejected": -498.6763916015625, "eval_loss": 0.4798102080821991, "eval_rewards/accuracies": 0.7083333134651184, "eval_rewards/chosen": -5.0729851722717285, "eval_rewards/margins": 1.7632160186767578, "eval_rewards/rejected": -6.836201190948486, "eval_runtime": 36.7059, "eval_samples": 750, "eval_samples_per_second": 20.433, "eval_steps_per_second": 0.654 }