{ "best_metric": 0.5116574764251709, "best_model_checkpoint": "/datastor1/fcyin/finetuned_checkpoints/mquake/llama2_7B_mquake_lofit_seed42_96heads/checkpoint-51", "epoch": 5.0, "eval_steps": 500, "global_step": 85, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 0.8741050362586975, "learning_rate": 0.004, "loss": 0.6931, "step": 17 }, { "epoch": 1.0, "eval_loss": 0.5563876628875732, "eval_runtime": 4.2921, "eval_samples_per_second": 22.134, "eval_steps_per_second": 0.699, "step": 17 }, { "epoch": 2.0, "grad_norm": 0.8767452836036682, "learning_rate": 0.003, "loss": 0.1286, "step": 34 }, { "epoch": 2.0, "eval_loss": 0.5252991318702698, "eval_runtime": 4.3599, "eval_samples_per_second": 21.789, "eval_steps_per_second": 0.688, "step": 34 }, { "epoch": 3.0, "grad_norm": 0.04879095405340195, "learning_rate": 0.002, "loss": 0.0505, "step": 51 }, { "epoch": 3.0, "eval_loss": 0.5116574764251709, "eval_runtime": 4.368, "eval_samples_per_second": 21.749, "eval_steps_per_second": 0.687, "step": 51 }, { "epoch": 4.0, "grad_norm": 0.08225993812084198, "learning_rate": 0.001, "loss": 0.0177, "step": 68 }, { "epoch": 4.0, "eval_loss": 0.5452096462249756, "eval_runtime": 4.3007, "eval_samples_per_second": 22.089, "eval_steps_per_second": 0.698, "step": 68 }, { "epoch": 5.0, "grad_norm": 0.09921150654554367, "learning_rate": 0.0, "loss": 0.0107, "step": 85 }, { "epoch": 5.0, "eval_loss": 0.5571908354759216, "eval_runtime": 4.3773, "eval_samples_per_second": 21.703, "eval_steps_per_second": 0.685, "step": 85 } ], "logging_steps": 500, "max_steps": 85, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1420900423434240.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }