{ "epoch": 1.0, "eval_dpo_losses": 0.6379860043525696, "eval_logits/chosen": -2.6679418087005615, "eval_logits/rejected": -2.6358492374420166, "eval_logps/chosen": -261.21728515625, "eval_logps/rejected": -247.80003356933594, "eval_loss": 0.6778276562690735, "eval_positive_losses": 0.25107473134994507, "eval_rewards/accuracies": 0.722000002861023, "eval_rewards/chosen": 0.23376142978668213, "eval_rewards/margins": 0.12597255408763885, "eval_rewards/margins_max": 0.45901384949684143, "eval_rewards/margins_min": -0.15312375128269196, "eval_rewards/margins_std": 0.2062978446483612, "eval_rewards/rejected": 0.10778886079788208, "eval_runtime": 428.6701, "eval_samples": 2000, "eval_samples_per_second": 4.666, "eval_steps_per_second": 0.292, "train_loss": 0.6803070375102567, "train_runtime": 46122.1645, "train_samples": 61134, "train_samples_per_second": 1.325, "train_steps_per_second": 0.083 }