{ "epoch": 1.0, "eval_logits/chosen": -1.844919204711914, "eval_logits/rejected": -1.8009027242660522, "eval_logps/chosen": -500.84112548828125, "eval_logps/rejected": -555.3530883789062, "eval_loss": 0.5972276329994202, "eval_rewards/accuracies": 0.6865079402923584, "eval_rewards/chosen": -2.1561989784240723, "eval_rewards/margins": 0.8055059313774109, "eval_rewards/margins_max": 2.425264835357666, "eval_rewards/margins_min": -0.7591551542282104, "eval_rewards/margins_std": 1.4236739873886108, "eval_rewards/rejected": -2.961704730987549, "eval_runtime": 282.7661, "eval_samples": 2000, "eval_samples_per_second": 7.073, "eval_steps_per_second": 0.223 }