{ "epoch": 1.0, "eval_dpo_losses": 0.6513049602508545, "eval_logits/chosen": -2.831768035888672, "eval_logits/rejected": -2.788257598876953, "eval_logps/chosen": -274.86163330078125, "eval_logps/rejected": -259.8882751464844, "eval_loss": 1.0971728563308716, "eval_positive_losses": 4.613582134246826, "eval_rewards/accuracies": 0.6269841194152832, "eval_rewards/chosen": 0.10359582304954529, "eval_rewards/margins": 0.11065211147069931, "eval_rewards/margins_max": 0.4366241991519928, "eval_rewards/margins_min": -0.18976463377475739, "eval_rewards/margins_std": 0.27969247102737427, "eval_rewards/rejected": -0.007056289818137884, "eval_runtime": 283.0495, "eval_samples": 2000, "eval_samples_per_second": 7.066, "eval_steps_per_second": 0.223 }