{ "best_metric": 0.5354864001274109, "best_model_checkpoint": "saves/Mistral-7B/lora/train_1/checkpoint-200", "epoch": 1.3125512715340442, "eval_steps": 200, "global_step": 1200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.21875854525567404, "grad_norm": 0.38192033767700195, "learning_rate": 0.00029856911617379416, "loss": 0.4441, "step": 200 }, { "epoch": 0.21875854525567404, "eval_loss": 0.5354864001274109, "eval_runtime": 2190.7796, "eval_samples_per_second": 21.508, "eval_steps_per_second": 0.672, "step": 200 }, { "epoch": 0.4375170905113481, "grad_norm": 0.6002383232116699, "learning_rate": 0.0002896017519370078, "loss": 0.2562, "step": 400 }, { "epoch": 0.4375170905113481, "eval_loss": 0.6111302971839905, "eval_runtime": 2191.5198, "eval_samples_per_second": 21.501, "eval_steps_per_second": 0.672, "step": 400 }, { "epoch": 0.6562756357670222, "grad_norm": 0.913849949836731, "learning_rate": 0.0002728756302319302, "loss": 0.1736, "step": 600 }, { "epoch": 0.6562756357670222, "eval_loss": 0.7389799356460571, "eval_runtime": 2191.6271, "eval_samples_per_second": 21.5, "eval_steps_per_second": 0.672, "step": 600 }, { "epoch": 0.8750341810226961, "grad_norm": 0.9672319293022156, "learning_rate": 0.00024932035201194605, "loss": 0.0946, "step": 800 }, { "epoch": 0.8750341810226961, "eval_loss": 0.8541895151138306, "eval_runtime": 2193.7603, "eval_samples_per_second": 21.479, "eval_steps_per_second": 0.671, "step": 800 }, { "epoch": 1.0937927262783702, "grad_norm": 0.8369513750076294, "learning_rate": 0.00022024506768721243, "loss": 0.0477, "step": 1000 }, { "epoch": 1.0937927262783702, "eval_loss": 1.010321021080017, "eval_runtime": 2193.5651, "eval_samples_per_second": 21.481, "eval_steps_per_second": 0.672, "step": 1000 }, { "epoch": 1.3125512715340442, "grad_norm": 0.8144668340682983, "learning_rate": 0.0001872657174323126, "loss": 0.0275, "step": 1200 }, { "epoch": 1.3125512715340442, "eval_loss": 1.1369109153747559, "eval_runtime": 2191.5102, "eval_samples_per_second": 21.501, "eval_steps_per_second": 0.672, "step": 1200 }, { "epoch": 1.3125512715340442, "step": 1200, "total_flos": 2.3027143417496863e+18, "train_loss": 0.17396480162938435, "train_runtime": 69952.3324, "train_samples_per_second": 10.037, "train_steps_per_second": 0.039 } ], "logging_steps": 200, "max_steps": 2742, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.3027143417496863e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }