{ "epoch": 1.0, "eval_logits/chosen": -2.230694055557251, "eval_logits/rejected": -2.225895643234253, "eval_logps/chosen": -33.75852966308594, "eval_logps/rejected": -37.27712631225586, "eval_loss": 0.4946732223033905, "eval_rewards/accuracies": 0.5365448594093323, "eval_rewards/chosen": 0.16561271250247955, "eval_rewards/margins": 0.02191758342087269, "eval_rewards/rejected": 0.14369513094425201, "eval_runtime": 145.7007, "eval_samples": 343, "eval_samples_per_second": 2.354, "eval_steps_per_second": 0.295 }