{ "epoch": 4.0, "eval_logits/chosen": -2.1517372131347656, "eval_logits/rejected": -2.1470396518707275, "eval_logps/chosen": -33.62392044067383, "eval_logps/rejected": -37.32072830200195, "eval_loss": 0.47301676869392395, "eval_rewards/accuracies": 0.594684362411499, "eval_rewards/chosen": 0.24637830257415771, "eval_rewards/margins": 0.1288420557975769, "eval_rewards/rejected": 0.11753623187541962, "eval_runtime": 145.7991, "eval_samples": 343, "eval_samples_per_second": 2.353, "eval_steps_per_second": 0.295, "train_loss": 0.19402528018146367, "train_runtime": 10806.8766, "train_samples": 3079, "train_samples_per_second": 1.14, "train_steps_per_second": 0.143 }