{ "epoch": 3.0, "eval_dpo_losses": 0.6792477369308472, "eval_logits/chosen": -2.845798969268799, "eval_logits/rejected": -2.8005707263946533, "eval_logps/chosen": -277.1849060058594, "eval_logps/rejected": -254.03167724609375, "eval_loss": 0.6876965165138245, "eval_positive_losses": 0.06534965336322784, "eval_rewards/accuracies": 0.6785714030265808, "eval_rewards/chosen": 0.080363430082798, "eval_rewards/margins": 0.028853610157966614, "eval_rewards/margins_max": 0.09140578657388687, "eval_rewards/margins_min": -0.03146994486451149, "eval_rewards/margins_std": 0.05510696768760681, "eval_rewards/rejected": 0.05150982737541199, "eval_runtime": 281.437, "eval_samples": 2000, "eval_samples_per_second": 7.106, "eval_steps_per_second": 0.224, "train_loss": 0.684076373458754, "train_runtime": 8695.656, "train_samples": 5263, "train_samples_per_second": 1.816, "train_steps_per_second": 0.114 }