{ "epoch": 3.0, "eval_logits/chosen": -2.3639509677886963, "eval_logits/rejected": -2.4372596740722656, "eval_logps/chosen": -316.3568115234375, "eval_logps/rejected": -327.0231628417969, "eval_loss": 0.7655794024467468, "eval_rewards/accuracies": 0.7404580116271973, "eval_rewards/chosen": -3.8106329441070557, "eval_rewards/margins": 3.0781688690185547, "eval_rewards/rejected": -6.8888020515441895, "eval_runtime": 297.1799, "eval_samples": 2086, "eval_samples_per_second": 7.019, "eval_steps_per_second": 0.441, "train_loss": 0.22745071912974513, "train_runtime": 76961.4992, "train_samples": 62901, "train_samples_per_second": 2.452, "train_steps_per_second": 0.077 }