{ "epoch": 0.9981298423724285, "eval_logits/chosen": 15.170717239379883, "eval_logits/rejected": 15.209513664245605, "eval_logps/chosen": -32589.357421875, "eval_logps/rejected": -32736.40234375, "eval_loss": 0.007080146111547947, "eval_rewards/accuracies": 0.4838709533214569, "eval_rewards/chosen": -325.0106201171875, "eval_rewards/margins": 1.4858086109161377, "eval_rewards/rejected": -326.4964294433594, "eval_runtime": 162.3744, "eval_samples_per_second": 12.077, "eval_steps_per_second": 0.191 }