{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9931869795609387, "eval_steps": 100, "global_step": 82, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 5.555555555555555e-07, "logits/chosen": -2.692568302154541, "logits/rejected": -2.667994260787964, "logps/chosen": -299.3318176269531, "logps/rejected": -311.19903564453125, "loss": 0.6931, "rewards/accuracies": 0.0, "rewards/chosen": 0.0, "rewards/margins": 0.0, "rewards/rejected": 0.0, "step": 1 }, { "epoch": 0.12, "learning_rate": 4.9976852877555755e-06, "logits/chosen": -2.7045490741729736, "logits/rejected": -2.6807374954223633, "logps/chosen": -274.0246276855469, "logps/rejected": -284.09259033203125, "loss": 0.6918, "rewards/accuracies": 0.484375, "rewards/chosen": 0.007271192967891693, "rewards/margins": 0.002095710253342986, "rewards/rejected": 0.005175482481718063, "step": 10 }, { "epoch": 0.24, "learning_rate": 4.725068941086693e-06, "logits/chosen": -2.6655147075653076, "logits/rejected": -2.6132850646972656, "logps/chosen": -288.611328125, "logps/rejected": -283.16064453125, "loss": 0.6796, "rewards/accuracies": 0.598437488079071, "rewards/chosen": 0.11068712174892426, "rewards/margins": 0.026317646726965904, "rewards/rejected": 0.0843694880604744, "step": 20 }, { "epoch": 0.36, "learning_rate": 4.046678508156259e-06, "logits/chosen": -2.6170387268066406, "logits/rejected": -2.5729379653930664, "logps/chosen": -272.33197021484375, "logps/rejected": -286.86785888671875, "loss": 0.664, "rewards/accuracies": 0.6171875, "rewards/chosen": 0.1085876002907753, "rewards/margins": 0.06517539173364639, "rewards/rejected": 0.04341219738125801, "step": 30 }, { "epoch": 0.48, "learning_rate": 3.0862284739260247e-06, "logits/chosen": -2.555568218231201, "logits/rejected": -2.5221917629241943, "logps/chosen": -302.2290954589844, "logps/rejected": -319.53826904296875, "loss": 0.6616, "rewards/accuracies": 0.6234375238418579, "rewards/chosen": 0.04670686274766922, "rewards/margins": 0.09154415130615234, "rewards/rejected": -0.04483727738261223, "step": 40 }, { "epoch": 0.61, "learning_rate": 2.0188710450729255e-06, "logits/chosen": -2.580871105194092, "logits/rejected": -2.525937557220459, "logps/chosen": -271.65765380859375, "logps/rejected": -297.47052001953125, "loss": 0.6567, "rewards/accuracies": 0.625, "rewards/chosen": 0.07181344926357269, "rewards/margins": 0.09304286539554596, "rewards/rejected": -0.021229416131973267, "step": 50 }, { "epoch": 0.73, "learning_rate": 1.0392545659290789e-06, "logits/chosen": -2.575265407562256, "logits/rejected": -2.492567539215088, "logps/chosen": -287.5987854003906, "logps/rejected": -283.6988830566406, "loss": 0.6442, "rewards/accuracies": 0.6796875, "rewards/chosen": 0.09387712180614471, "rewards/margins": 0.14719055593013763, "rewards/rejected": -0.05331344157457352, "step": 60 }, { "epoch": 0.85, "learning_rate": 3.260265266334725e-07, "logits/chosen": -2.5620856285095215, "logits/rejected": -2.51887845993042, "logps/chosen": -289.19384765625, "logps/rejected": -305.2491760253906, "loss": 0.6476, "rewards/accuracies": 0.6078125238418579, "rewards/chosen": 0.042650334537029266, "rewards/margins": 0.11376988887786865, "rewards/rejected": -0.07111954689025879, "step": 70 }, { "epoch": 0.97, "learning_rate": 9.25456266348046e-09, "logits/chosen": -2.559069871902466, "logits/rejected": -2.489203929901123, "logps/chosen": -297.2583923339844, "logps/rejected": -301.9067687988281, "loss": 0.6453, "rewards/accuracies": 0.6734374761581421, "rewards/chosen": 0.04438754543662071, "rewards/margins": 0.13041387498378754, "rewards/rejected": -0.08602632582187653, "step": 80 }, { "epoch": 0.99, "step": 82, "total_flos": 0.0, "train_loss": 0.6611525947000922, "train_runtime": 6866.2393, "train_samples_per_second": 3.078, "train_steps_per_second": 0.012 } ], "logging_steps": 10, "max_steps": 82, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 100, "total_flos": 0.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }