{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 200, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 198.25669918398648, "learning_rate": 7.142857142857142e-08, "logits/generated": -3.0665292739868164, "logits/real": -3.1044487953186035, "logps/generated": -260.97100830078125, "logps/real": -222.56707763671875, "loss": 0.9049, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 191.14682428587082, "learning_rate": 4.727272727272727e-07, "logits/generated": -2.957974910736084, "logits/real": -3.005094051361084, "logps/generated": -255.78170776367188, "logps/real": -211.8086700439453, "loss": 0.7861, "rewards/accuracies": 0.6805555820465088, "rewards/generated": -1.2839804887771606, "rewards/margins": 1.258236289024353, "rewards/real": -0.025744358077645302, "step": 10 }, { "epoch": 0.32, "grad_norm": 127.1618329581298, "learning_rate": 3.818181818181818e-07, "logits/generated": -3.0288896560668945, "logits/real": -3.092313289642334, "logps/generated": -240.3448028564453, "logps/real": -194.00479125976562, "loss": 0.6416, "rewards/accuracies": 0.762499988079071, "rewards/generated": -0.07217645645141602, "rewards/margins": 1.4789001941680908, "rewards/real": 1.4067238569259644, "step": 20 }, { "epoch": 0.48, "grad_norm": 158.65956873160914, "learning_rate": 2.909090909090909e-07, "logits/generated": -2.827226161956787, "logits/real": -2.930786609649658, "logps/generated": -263.2423400878906, "logps/real": -203.39224243164062, "loss": 0.5109, "rewards/accuracies": 0.824999988079071, "rewards/generated": -1.4480739831924438, "rewards/margins": 2.398531675338745, "rewards/real": 0.9504578709602356, "step": 30 }, { "epoch": 0.64, "grad_norm": 160.69468296728675, "learning_rate": 2e-07, "logits/generated": -2.819359064102173, "logits/real": -2.861755847930908, "logps/generated": -266.3091735839844, "logps/real": -194.5775146484375, "loss": 0.5326, "rewards/accuracies": 0.9125000238418579, "rewards/generated": -2.1509218215942383, "rewards/margins": 3.369136095046997, "rewards/real": 1.2182139158248901, "step": 40 }, { "epoch": 0.8, "grad_norm": 94.40293067720134, "learning_rate": 1.0909090909090908e-07, "logits/generated": -2.8005573749542236, "logits/real": -2.8787262439727783, "logps/generated": -261.2699279785156, "logps/real": -196.7130889892578, "loss": 0.4681, "rewards/accuracies": 0.824999988079071, "rewards/generated": -2.203646659851074, "rewards/margins": 3.2588391304016113, "rewards/real": 1.055192232131958, "step": 50 }, { "epoch": 0.96, "grad_norm": 75.79186500073192, "learning_rate": 1.818181818181818e-08, "logits/generated": -2.8064606189727783, "logits/real": -2.833669900894165, "logps/generated": -264.3329162597656, "logps/real": -198.6341094970703, "loss": 0.4321, "rewards/accuracies": 0.8125, "rewards/generated": -2.0550496578216553, "rewards/margins": 3.226325511932373, "rewards/real": 1.1712758541107178, "step": 60 }, { "epoch": 0.992, "step": 62, "total_flos": 0.0, "train_loss": 0.5572480242098531, "train_runtime": 732.2453, "train_samples_per_second": 2.731, "train_steps_per_second": 0.085 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }