{ "epoch": 0.9994767137624281, "eval_logits/chosen": 0.23715664446353912, "eval_logits/rejected": 1.1001231670379639, "eval_logps/chosen": -418.1593017578125, "eval_logps/rejected": -499.1413879394531, "eval_loss": 0.4963093400001526, "eval_rewards/accuracies": 0.761904776096344, "eval_rewards/chosen": -1.3619133234024048, "eval_rewards/margins": 1.0274325609207153, "eval_rewards/rejected": -2.38934588432312, "eval_runtime": 185.5011, "eval_samples": 2000, "eval_samples_per_second": 10.782, "eval_steps_per_second": 0.34, "total_flos": 0.0, "train_loss": 0.1815465917137905, "train_runtime": 7518.522, "train_samples": 61134, "train_samples_per_second": 8.131, "train_steps_per_second": 0.127 }