{ "epoch": 0.9999183606825047, "eval_logits/chosen": -0.67052161693573, "eval_logits/rejected": -0.61020427942276, "eval_logps/chosen": -990.1327514648438, "eval_logps/rejected": -1154.4678955078125, "eval_loss": 0.38660529255867004, "eval_rewards/accuracies": 0.8059999942779541, "eval_rewards/chosen": -5.162255764007568, "eval_rewards/margins": 1.7307368516921997, "eval_rewards/rejected": -6.89299201965332, "eval_runtime": 614.9178, "eval_samples": 4000, "eval_samples_per_second": 6.505, "eval_steps_per_second": 0.407, "total_flos": 0.0, "train_loss": 0.46421666473520107, "train_runtime": 86694.5826, "train_samples": 195977, "train_samples_per_second": 2.261, "train_steps_per_second": 0.018 }