{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9984, "eval_steps": 500, "global_step": 468, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.10666666666666667, "grad_norm": 34.43344966418026, "learning_rate": 4.999373573764186e-07, "logits/chosen": -1.2502222061157227, "logits/rejected": -1.2531837224960327, "logps/chosen": -380.0782165527344, "logps/rejected": -386.2172546386719, "loss": 0.8964, "rewards/accuracies": 0.4950000047683716, "rewards/chosen": -0.023556923493742943, "rewards/margins": 0.005924960598349571, "rewards/rejected": -0.029481882229447365, "step": 50 }, { "epoch": 0.21333333333333335, "grad_norm": 37.45052221475562, "learning_rate": 4.807012604511541e-07, "logits/chosen": -1.2312381267547607, "logits/rejected": -1.2327096462249756, "logps/chosen": -395.3690490722656, "logps/rejected": -403.4739990234375, "loss": 0.8496, "rewards/accuracies": 0.5987499952316284, "rewards/chosen": -0.4553294777870178, "rewards/margins": 0.1346004605293274, "rewards/rejected": -0.5899299383163452, "step": 100 }, { "epoch": 0.32, "grad_norm": 35.36746039819262, "learning_rate": 4.2971971741276185e-07, "logits/chosen": -1.260800838470459, "logits/rejected": -1.2635284662246704, "logps/chosen": -376.4151306152344, "logps/rejected": -384.2753601074219, "loss": 0.8243, "rewards/accuracies": 0.5987499952316284, "rewards/chosen": -0.9694927930831909, "rewards/margins": 0.27553480863571167, "rewards/rejected": -1.2450276613235474, "step": 150 }, { "epoch": 0.4266666666666667, "grad_norm": 36.27189952514476, "learning_rate": 3.540079991103235e-07, "logits/chosen": -1.26079523563385, "logits/rejected": -1.2639491558074951, "logps/chosen": -394.5101623535156, "logps/rejected": -405.01641845703125, "loss": 0.8048, "rewards/accuracies": 0.6625000238418579, "rewards/chosen": -1.2865251302719116, "rewards/margins": 0.49585381150245667, "rewards/rejected": -1.7823787927627563, "step": 200 }, { "epoch": 0.5333333333333333, "grad_norm": 32.6541770215758, "learning_rate": 2.639843506318899e-07, "logits/chosen": -1.252700686454773, "logits/rejected": -1.263854742050171, "logps/chosen": -384.5255126953125, "logps/rejected": -390.5956726074219, "loss": 0.7906, "rewards/accuracies": 0.65625, "rewards/chosen": -1.3771264553070068, "rewards/margins": 0.43737706542015076, "rewards/rejected": -1.8145036697387695, "step": 250 }, { "epoch": 0.64, "grad_norm": 36.7055592501965, "learning_rate": 1.7203639775848423e-07, "logits/chosen": -1.2447404861450195, "logits/rejected": -1.2467286586761475, "logps/chosen": -384.63165283203125, "logps/rejected": -393.9804992675781, "loss": 0.7835, "rewards/accuracies": 0.699999988079071, "rewards/chosen": -1.325575590133667, "rewards/margins": 0.5131824016571045, "rewards/rejected": -1.8387579917907715, "step": 300 }, { "epoch": 0.7466666666666667, "grad_norm": 30.904620054592144, "learning_rate": 9.081655850224449e-08, "logits/chosen": -1.2564585208892822, "logits/rejected": -1.256412386894226, "logps/chosen": -383.8439025878906, "logps/rejected": -396.41839599609375, "loss": 0.7776, "rewards/accuracies": 0.6775000095367432, "rewards/chosen": -1.3743709325790405, "rewards/margins": 0.5735958814620972, "rewards/rejected": -1.9479665756225586, "step": 350 }, { "epoch": 0.8533333333333334, "grad_norm": 34.514970559024796, "learning_rate": 3.150101814011136e-08, "logits/chosen": -1.2832634449005127, "logits/rejected": -1.2844091653823853, "logps/chosen": -393.8801574707031, "logps/rejected": -403.3578186035156, "loss": 0.7851, "rewards/accuracies": 0.675000011920929, "rewards/chosen": -1.4652782678604126, "rewards/margins": 0.5471586585044861, "rewards/rejected": -2.012436866760254, "step": 400 }, { "epoch": 0.96, "grad_norm": 33.17626594386728, "learning_rate": 2.251839967945535e-09, "logits/chosen": -1.242918610572815, "logits/rejected": -1.2440977096557617, "logps/chosen": -381.6874084472656, "logps/rejected": -393.9809265136719, "loss": 0.7781, "rewards/accuracies": 0.6937500238418579, "rewards/chosen": -1.4276537895202637, "rewards/margins": 0.5591089129447937, "rewards/rejected": -1.9867626428604126, "step": 450 }, { "epoch": 0.9984, "step": 468, "total_flos": 160065019445248.0, "train_loss": 0.8088664193438668, "train_runtime": 8283.0284, "train_samples_per_second": 7.244, "train_steps_per_second": 0.057 } ], "logging_steps": 50, "max_steps": 468, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 160065019445248.0, "train_batch_size": 2, "trial_name": null, "trial_params": null }