{ "epoch": 1.0, "eval_logits/chosen": 99.08063507080078, "eval_logits/rejected": 99.07344818115234, "eval_logps/chosen": -32.34668731689453, "eval_logps/rejected": -35.868736267089844, "eval_loss": 1.664738655090332, "eval_rewards/accuracies": 0.510797381401062, "eval_rewards/chosen": 0.08684198558330536, "eval_rewards/margins": -0.0011658848961815238, "eval_rewards/rejected": 0.08800788223743439, "eval_runtime": 103.8246, "eval_samples": 343, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.414 }