{ "epoch": 2.9969690846635686, "eval_logits/chosen": -3.083430051803589, "eval_logits/rejected": -3.107269287109375, "eval_logps/chosen": -0.777300238609314, "eval_logps/rejected": -0.9876723289489746, "eval_loss": 0.8434417247772217, "eval_odds_ratio_loss": 0.6614136099815369, "eval_rewards/accuracies": 0.5690909028053284, "eval_rewards/chosen": -0.07773003727197647, "eval_rewards/margins": 0.021037202328443527, "eval_rewards/rejected": -0.0987672358751297, "eval_runtime": 367.4747, "eval_samples_per_second": 2.993, "eval_sft_loss": 0.777300238609314, "eval_steps_per_second": 1.497, "total_flos": 2.0970902870084813e+18, "train_loss": 0.8330582246554065, "train_runtime": 34111.2463, "train_samples_per_second": 0.871, "train_steps_per_second": 0.054 }