{ "epoch": 0.9994761655316919, "eval_logits/chosen": -0.6647697687149048, "eval_logits/rejected": -0.7098657488822937, "eval_logps/chosen": -225.6959991455078, "eval_logps/rejected": -275.41259765625, "eval_loss": 0.3207552433013916, "eval_rewards/accuracies": 0.8852941393852234, "eval_rewards/chosen": 1.798000693321228, "eval_rewards/margins": 1.842041015625, "eval_rewards/rejected": -0.044040482491254807, "eval_runtime": 260.7673, "eval_samples": 2720, "eval_samples_per_second": 10.431, "eval_steps_per_second": 1.304, "total_flos": 0.0, "train_loss": 0.4010467823571379, "train_runtime": 7944.4042, "train_samples": 30540, "train_samples_per_second": 3.844, "train_steps_per_second": 0.12 }