{ "epoch": 1.9979402677651905, "eval_logits/chosen": 0.15889930725097656, "eval_logits/rejected": 0.12512841820716858, "eval_logps/chosen": -411.48541259765625, "eval_logps/rejected": -561.3089599609375, "eval_loss": 0.5019017457962036, "eval_rewards/accuracies": 0.7734375, "eval_rewards/chosen": -1.5493115186691284, "eval_rewards/margins": 1.4171409606933594, "eval_rewards/rejected": -2.9664525985717773, "eval_runtime": 24.4653, "eval_samples_per_second": 40.874, "eval_steps_per_second": 0.654 }