{ "epoch": 0.9981298423724285, "eval_logits/chosen": -1.302140235900879, "eval_logits/rejected": -1.2597098350524902, "eval_logps/chosen": -0.46992775797843933, "eval_logps/rejected": -0.4985562562942505, "eval_loss": 1.5686445236206055, "eval_rewards/accuracies": 0.5282257795333862, "eval_rewards/chosen": -1.1748194694519043, "eval_rewards/margins": 0.07157127559185028, "eval_rewards/rejected": -1.246390700340271, "eval_runtime": 57.3321, "eval_samples": 1961, "eval_samples_per_second": 34.204, "eval_steps_per_second": 1.081 }