{ "epoch": 0.64, "eval_logits/chosen": -0.4409240782260895, "eval_logits/rejected": -0.3783179819583893, "eval_logps/chosen": -1.6689454317092896, "eval_logps/rejected": -1.7649023532867432, "eval_loss": 5.835046768188477, "eval_rewards/accuracies": 0.5245413780212402, "eval_rewards/chosen": -16.689455032348633, "eval_rewards/margins": 0.9595676064491272, "eval_rewards/rejected": -17.649023056030273, "eval_runtime": 294.9751, "eval_samples": 5595, "eval_samples_per_second": 18.968, "eval_steps_per_second": 4.743, "total_flos": 0.0, "train_loss": 98.40630645751953, "train_runtime": 107.1933, "train_samples_per_second": 5.971, "train_steps_per_second": 0.187 }