{ "epoch": 0.9990762978015888, "eval_logits/chosen": -1.0146139860153198, "eval_logits/rejected": -0.7880604267120361, "eval_logps/chosen": -470.7486267089844, "eval_logps/rejected": -667.5509033203125, "eval_loss": 0.668347179889679, "eval_rewards/accuracies": 0.7120000123977661, "eval_rewards/chosen": -3.7777063846588135, "eval_rewards/margins": 1.7061176300048828, "eval_rewards/rejected": -5.483823776245117, "eval_runtime": 225.0171, "eval_samples": 1999, "eval_samples_per_second": 8.884, "eval_steps_per_second": 1.111, "total_flos": 0.0, "train_loss": 0.3847499547390308, "train_runtime": 27570.709, "train_samples": 64955, "train_samples_per_second": 2.356, "train_steps_per_second": 0.018 }