{ "epoch": 0.9982631930527722, "eval_logits/chosen": 0.10372808575630188, "eval_logits/rejected": 0.08583810925483704, "eval_logps/chosen": -0.3924722671508789, "eval_logps/rejected": -0.5309580564498901, "eval_loss": 1.2116273641586304, "eval_rewards/accuracies": 0.7113820910453796, "eval_rewards/chosen": -0.3924722671508789, "eval_rewards/margins": 0.13848578929901123, "eval_rewards/rejected": -0.5309580564498901, "eval_runtime": 427.6213, "eval_samples": 1961, "eval_samples_per_second": 4.586, "eval_steps_per_second": 0.288, "total_flos": 0.0, "train_loss": 1.2289682925385632, "train_runtime": 21112.5317, "train_samples": 59876, "train_samples_per_second": 2.836, "train_steps_per_second": 0.022 }