{ "epoch": 4.0, "eval_logits/chosen": -2.1216881275177, "eval_logits/rejected": -2.1169610023498535, "eval_logps/chosen": -34.41923522949219, "eval_logps/rejected": -38.13933563232422, "eval_loss": 0.6746898293495178, "eval_rewards/accuracies": 0.6009136438369751, "eval_rewards/chosen": -0.2692793905735016, "eval_rewards/margins": 0.16662099957466125, "eval_rewards/rejected": -0.43590039014816284, "eval_runtime": 145.1817, "eval_samples": 343, "eval_samples_per_second": 2.363, "eval_steps_per_second": 0.296 }