{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.992, "eval_steps": 200, "global_step": 62, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016, "grad_norm": 107.88710762985947, "learning_rate": 7.142857142857142e-08, "logits/generated": -2.752808094024658, "logits/real": -2.2284693717956543, "logps/generated": -121.13522338867188, "logps/real": -153.264892578125, "loss": 0.8501, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.16, "grad_norm": 91.94956537605988, "learning_rate": 4.727272727272727e-07, "logits/generated": -2.6014535427093506, "logits/real": -2.6951539516448975, "logps/generated": -133.1143035888672, "logps/real": -140.10543823242188, "loss": 0.7861, "rewards/accuracies": 0.5416666865348816, "rewards/generated": 0.36728113889694214, "rewards/margins": 0.08309322595596313, "rewards/real": 0.45037439465522766, "step": 10 }, { "epoch": 0.32, "grad_norm": 81.49584102213795, "learning_rate": 3.818181818181818e-07, "logits/generated": -2.740352153778076, "logits/real": -2.7934975624084473, "logps/generated": -124.69525146484375, "logps/real": -122.46867370605469, "loss": 0.8205, "rewards/accuracies": 0.6000000238418579, "rewards/generated": 0.6012061834335327, "rewards/margins": 0.2280128300189972, "rewards/real": 0.8292189836502075, "step": 20 }, { "epoch": 0.48, "grad_norm": 86.29390865351938, "learning_rate": 2.909090909090909e-07, "logits/generated": -2.7653279304504395, "logits/real": -2.861011028289795, "logps/generated": -104.97627258300781, "logps/real": -110.78263854980469, "loss": 0.7926, "rewards/accuracies": 0.6875, "rewards/generated": 1.0582153797149658, "rewards/margins": 0.4253184199333191, "rewards/real": 1.4835339784622192, "step": 30 }, { "epoch": 0.64, "grad_norm": 73.40988849274181, "learning_rate": 2e-07, "logits/generated": -2.899013042449951, "logits/real": -2.9045419692993164, "logps/generated": -116.1448745727539, "logps/real": -126.13529968261719, "loss": 0.8216, "rewards/accuracies": 0.574999988079071, "rewards/generated": 1.988576889038086, "rewards/margins": 0.31346917152404785, "rewards/real": 2.302046060562134, "step": 40 }, { "epoch": 0.8, "grad_norm": 87.84851470741809, "learning_rate": 1.0909090909090908e-07, "logits/generated": -2.8664755821228027, "logits/real": -2.9287781715393066, "logps/generated": -113.2905502319336, "logps/real": -129.95413208007812, "loss": 0.803, "rewards/accuracies": 0.6000000238418579, "rewards/generated": 2.1775314807891846, "rewards/margins": 0.2134767472743988, "rewards/real": 2.391007900238037, "step": 50 }, { "epoch": 0.96, "grad_norm": 73.6437417177821, "learning_rate": 1.818181818181818e-08, "logits/generated": -2.788062572479248, "logits/real": -2.8471646308898926, "logps/generated": -119.0680923461914, "logps/real": -124.4272232055664, "loss": 0.7742, "rewards/accuracies": 0.625, "rewards/generated": 2.202199935913086, "rewards/margins": 0.361619770526886, "rewards/real": 2.5638198852539062, "step": 60 }, { "epoch": 0.992, "step": 62, "total_flos": 0.0, "train_loss": 0.8044227688543258, "train_runtime": 744.0688, "train_samples_per_second": 2.688, "train_steps_per_second": 0.083 } ], "logging_steps": 10, "max_steps": 62, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }