{ "epoch": 1.0, "eval_logits/chosen": 99.08063507080078, "eval_logits/rejected": 99.07344818115234, "eval_logps/chosen": -32.34668731689453, "eval_logps/rejected": -35.868736267089844, "eval_loss": 1.664738655090332, "eval_rewards/accuracies": 0.510797381401062, "eval_rewards/chosen": 0.08684198558330536, "eval_rewards/margins": -0.0011658848961815238, "eval_rewards/rejected": 0.08800788223743439, "eval_runtime": 103.8246, "eval_samples": 343, "eval_samples_per_second": 3.304, "eval_steps_per_second": 0.414, "train_loss": 2.2499145250041765, "train_runtime": 2558.7273, "train_samples": 3079, "train_samples_per_second": 1.203, "train_steps_per_second": 0.15 }