{ "best_metric": 0.7879278063774109, "best_model_checkpoint": "/home/khalid/Documents/github_rep/bigscience/data/processed/35/bloom-560m_my_sft_100000samples_-1vocab_original-frozen/checkpoint-25000", "epoch": 0.39631741854686253, "global_step": 25000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.04, "l1_reg_loss": 0.0, "learning_rate": 9e-05, "loss": 1.0457, "step": 2500 }, { "epoch": 0.08, "l1_reg_loss": 0.0, "learning_rate": 8e-05, "loss": 0.8994, "step": 5000 }, { "epoch": 0.08, "eval_loss": 0.8788017630577087, "eval_runtime": 2668.4849, "eval_samples_per_second": 9.399, "eval_steps_per_second": 4.7, "step": 5000 }, { "epoch": 0.12, "l1_reg_loss": 0.0, "learning_rate": 7e-05, "loss": 0.8556, "step": 7500 }, { "epoch": 0.16, "l1_reg_loss": 0.0, "learning_rate": 6e-05, "loss": 0.8299, "step": 10000 }, { "epoch": 0.16, "eval_loss": 0.8276682496070862, "eval_runtime": 2661.8009, "eval_samples_per_second": 9.423, "eval_steps_per_second": 4.711, "step": 10000 }, { "epoch": 0.2, "l1_reg_loss": 0.0, "learning_rate": 5e-05, "loss": 0.815, "step": 12500 }, { "epoch": 0.24, "l1_reg_loss": 0.0, "learning_rate": 4e-05, "loss": 0.8019, "step": 15000 }, { "epoch": 0.24, "eval_loss": 0.804674506187439, "eval_runtime": 2658.9167, "eval_samples_per_second": 9.433, "eval_steps_per_second": 4.717, "step": 15000 }, { "epoch": 0.28, "l1_reg_loss": 0.0, "learning_rate": 3e-05, "loss": 0.7965, "step": 17500 }, { "epoch": 0.32, "l1_reg_loss": 0.0, "learning_rate": 2e-05, "loss": 0.7875, "step": 20000 }, { "epoch": 0.32, "eval_loss": 0.7925558686256409, "eval_runtime": 2660.1365, "eval_samples_per_second": 9.428, "eval_steps_per_second": 4.714, "step": 20000 }, { "epoch": 0.36, "l1_reg_loss": 0.0, "learning_rate": 1e-05, "loss": 0.7866, "step": 22500 }, { "epoch": 0.4, "l1_reg_loss": 0.0, "learning_rate": 0.0, "loss": 0.7835, "step": 25000 }, { "epoch": 0.4, "eval_loss": 0.7879278063774109, "eval_runtime": 2660.4205, "eval_samples_per_second": 9.427, "eval_steps_per_second": 4.714, "step": 25000 }, { "epoch": 0.4, "step": 25000, "total_flos": 3.714827943936e+17, "train_loss": 0.8401528125, "train_runtime": 72520.074, "train_samples_per_second": 2.758, "train_steps_per_second": 0.345 } ], "max_steps": 25000, "num_train_epochs": 1, "total_flos": 3.714827943936e+17, "trial_name": null, "trial_params": null }