{ "best_metric": 1.2555217742919922, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_mnli_128/checkpoint-33748", "epoch": 16.0, "global_step": 49088, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 1.6825, "step": 3068 }, { "epoch": 1.0, "eval_accuracy": 0.5256240448293429, "eval_loss": 1.4580906629562378, "eval_runtime": 17.2992, "eval_samples_per_second": 567.368, "eval_steps_per_second": 4.451, "step": 3068 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 1.4941, "step": 6136 }, { "epoch": 2.0, "eval_accuracy": 0.5680081507896078, "eval_loss": 1.3516298532485962, "eval_runtime": 17.2707, "eval_samples_per_second": 568.305, "eval_steps_per_second": 4.458, "step": 6136 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 1.4199, "step": 9204 }, { "epoch": 3.0, "eval_accuracy": 0.5711665817626083, "eval_loss": 1.325850486755371, "eval_runtime": 17.2788, "eval_samples_per_second": 568.038, "eval_steps_per_second": 4.456, "step": 9204 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 1.3747, "step": 12272 }, { "epoch": 4.0, "eval_accuracy": 0.5856342333163526, "eval_loss": 1.3024353981018066, "eval_runtime": 17.2902, "eval_samples_per_second": 567.664, "eval_steps_per_second": 4.453, "step": 12272 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 1.34, "step": 15340 }, { "epoch": 5.0, "eval_accuracy": 0.5930718288334182, "eval_loss": 1.2874709367752075, "eval_runtime": 18.1448, "eval_samples_per_second": 540.927, "eval_steps_per_second": 4.244, "step": 15340 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 1.3087, "step": 18408 }, { "epoch": 6.0, "eval_accuracy": 0.5927661742231278, "eval_loss": 1.2729827165603638, "eval_runtime": 17.888, "eval_samples_per_second": 548.691, "eval_steps_per_second": 4.305, "step": 18408 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 1.2769, "step": 21476 }, { "epoch": 7.0, "eval_accuracy": 0.5916454406520631, "eval_loss": 1.2844635248184204, "eval_runtime": 17.8702, "eval_samples_per_second": 549.237, "eval_steps_per_second": 4.309, "step": 21476 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 1.246, "step": 24544 }, { "epoch": 8.0, "eval_accuracy": 0.5965359144167092, "eval_loss": 1.2749732732772827, "eval_runtime": 17.7491, "eval_samples_per_second": 552.985, "eval_steps_per_second": 4.338, "step": 24544 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 1.2166, "step": 27612 }, { "epoch": 9.0, "eval_accuracy": 0.6020376974019358, "eval_loss": 1.2651457786560059, "eval_runtime": 17.7969, "eval_samples_per_second": 551.501, "eval_steps_per_second": 4.327, "step": 27612 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 1.1883, "step": 30680 }, { "epoch": 10.0, "eval_accuracy": 0.6042791645440652, "eval_loss": 1.2773230075836182, "eval_runtime": 17.8377, "eval_samples_per_second": 550.239, "eval_steps_per_second": 4.317, "step": 30680 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 1.1604, "step": 33748 }, { "epoch": 11.0, "eval_accuracy": 0.6011207335710647, "eval_loss": 1.2555217742919922, "eval_runtime": 17.9826, "eval_samples_per_second": 545.807, "eval_steps_per_second": 4.282, "step": 33748 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 1.1329, "step": 36816 }, { "epoch": 12.0, "eval_accuracy": 0.5990830361691288, "eval_loss": 1.2792441844940186, "eval_runtime": 17.9163, "eval_samples_per_second": 547.824, "eval_steps_per_second": 4.298, "step": 36816 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 1.1074, "step": 39884 }, { "epoch": 13.0, "eval_accuracy": 0.5985736118186449, "eval_loss": 1.2890774011611938, "eval_runtime": 17.8675, "eval_samples_per_second": 549.32, "eval_steps_per_second": 4.309, "step": 39884 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 1.0812, "step": 42952 }, { "epoch": 14.0, "eval_accuracy": 0.5947019867549669, "eval_loss": 1.2889435291290283, "eval_runtime": 17.9697, "eval_samples_per_second": 546.196, "eval_steps_per_second": 4.285, "step": 42952 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 1.0577, "step": 46020 }, { "epoch": 15.0, "eval_accuracy": 0.5970453387671931, "eval_loss": 1.2870742082595825, "eval_runtime": 18.0046, "eval_samples_per_second": 545.14, "eval_steps_per_second": 4.277, "step": 46020 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 1.0338, "step": 49088 }, { "epoch": 16.0, "eval_accuracy": 0.6026490066225165, "eval_loss": 1.3296325206756592, "eval_runtime": 17.9586, "eval_samples_per_second": 546.536, "eval_steps_per_second": 4.288, "step": 49088 }, { "epoch": 16.0, "step": 49088, "total_flos": 1.4975953101704397e+17, "train_loss": 1.2575687075220776, "train_runtime": 27523.0396, "train_samples_per_second": 713.406, "train_steps_per_second": 5.574 } ], "max_steps": 153400, "num_train_epochs": 50, "total_flos": 1.4975953101704397e+17, "trial_name": null, "trial_params": null }