{ "epoch": 2.985781990521327, "eval_logits/chosen": -3.554823398590088, "eval_logits/rejected": -3.4271774291992188, "eval_logps/chosen": -527.9577026367188, "eval_logps/rejected": -5177.13427734375, "eval_loss": 0.004887364339083433, "eval_rewards/accuracies": 0.9959677457809448, "eval_rewards/chosen": -1.6097602844238281, "eval_rewards/margins": 43.75369644165039, "eval_rewards/rejected": -45.363460540771484, "eval_runtime": 195.0759, "eval_samples": 3905, "eval_samples_per_second": 20.018, "eval_steps_per_second": 0.318, "total_flos": 0.0, "train_loss": 0.04083177362173292, "train_runtime": 9033.5209, "train_samples": 13500, "train_samples_per_second": 4.483, "train_steps_per_second": 0.035 }