{ "epoch": 2.9969690846635686, "eval_logits/chosen": -20.66438865661621, "eval_logits/rejected": -20.695892333984375, "eval_logps/chosen": -1.2573115825653076, "eval_logps/rejected": -1.3996058702468872, "eval_loss": 1.3293771743774414, "eval_odds_ratio_loss": 0.7206552624702454, "eval_rewards/accuracies": 0.5345454812049866, "eval_rewards/chosen": -0.12573117017745972, "eval_rewards/margins": 0.014229409396648407, "eval_rewards/rejected": -0.13996057212352753, "eval_runtime": 82.3315, "eval_samples_per_second": 13.361, "eval_sft_loss": 1.2573115825653076, "eval_steps_per_second": 6.68, "total_flos": 5.618252880760013e+17, "train_loss": 1.3892074464594277, "train_runtime": 8722.5011, "train_samples_per_second": 3.404, "train_steps_per_second": 0.213 }