{ "best_metric": 0.1256455034017563, "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_logit_kd_data_aug_mrpc/checkpoint-80319", "epoch": 46.0, "global_step": 90114, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 4.9e-05, "loss": 0.2964, "step": 1959 }, { "epoch": 1.0, "eval_accuracy": 0.9607843137254902, "eval_combined_score": 0.9663076498204916, "eval_f1": 0.9718309859154929, "eval_loss": 0.20259901881217957, "eval_runtime": 0.7823, "eval_samples_per_second": 521.547, "eval_steps_per_second": 5.113, "step": 1959 }, { "epoch": 2.0, "learning_rate": 4.8e-05, "loss": 0.2307, "step": 3918 }, { "epoch": 2.0, "eval_accuracy": 0.9705882352941176, "eval_combined_score": 0.9747678018575852, "eval_f1": 0.9789473684210526, "eval_loss": 0.19426828622817993, "eval_runtime": 0.7765, "eval_samples_per_second": 525.442, "eval_steps_per_second": 5.151, "step": 3918 }, { "epoch": 3.0, "learning_rate": 4.7e-05, "loss": 0.2221, "step": 5877 }, { "epoch": 3.0, "eval_accuracy": 0.9803921568627451, "eval_combined_score": 0.9831038798498122, "eval_f1": 0.9858156028368794, "eval_loss": 0.18736393749713898, "eval_runtime": 0.773, "eval_samples_per_second": 527.78, "eval_steps_per_second": 5.174, "step": 5877 }, { "epoch": 4.0, "learning_rate": 4.600000000000001e-05, "loss": 0.2163, "step": 7836 }, { "epoch": 4.0, "eval_accuracy": 0.9852941176470589, "eval_combined_score": 0.9873279098873593, "eval_f1": 0.9893617021276596, "eval_loss": 0.17034168541431427, "eval_runtime": 0.7757, "eval_samples_per_second": 526.002, "eval_steps_per_second": 5.157, "step": 7836 }, { "epoch": 5.0, "learning_rate": 4.5e-05, "loss": 0.2115, "step": 9795 }, { "epoch": 5.0, "eval_accuracy": 0.9852941176470589, "eval_combined_score": 0.9873279098873593, "eval_f1": 0.9893617021276596, "eval_loss": 0.18048794567584991, "eval_runtime": 0.7764, "eval_samples_per_second": 525.485, "eval_steps_per_second": 5.152, "step": 9795 }, { "epoch": 6.0, "learning_rate": 4.4000000000000006e-05, "loss": 0.2071, "step": 11754 }, { "epoch": 6.0, "eval_accuracy": 0.9803921568627451, "eval_combined_score": 0.9831289406221853, "eval_f1": 0.9858657243816255, "eval_loss": 0.16820354759693146, "eval_runtime": 0.7809, "eval_samples_per_second": 522.481, "eval_steps_per_second": 5.122, "step": 11754 }, { "epoch": 7.0, "learning_rate": 4.3e-05, "loss": 0.2036, "step": 13713 }, { "epoch": 7.0, "eval_accuracy": 0.9877450980392157, "eval_combined_score": 0.9894320516839064, "eval_f1": 0.9911190053285969, "eval_loss": 0.15831029415130615, "eval_runtime": 0.78, "eval_samples_per_second": 523.105, "eval_steps_per_second": 5.128, "step": 13713 }, { "epoch": 8.0, "learning_rate": 4.2e-05, "loss": 0.2007, "step": 15672 }, { "epoch": 8.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.16275052726268768, "eval_runtime": 0.7788, "eval_samples_per_second": 523.893, "eval_steps_per_second": 5.136, "step": 15672 }, { "epoch": 9.0, "learning_rate": 4.1e-05, "loss": 0.1985, "step": 17631 }, { "epoch": 9.0, "eval_accuracy": 0.9852941176470589, "eval_combined_score": 0.9873279098873593, "eval_f1": 0.9893617021276596, "eval_loss": 0.15478737652301788, "eval_runtime": 0.7768, "eval_samples_per_second": 525.26, "eval_steps_per_second": 5.15, "step": 17631 }, { "epoch": 10.0, "learning_rate": 4e-05, "loss": 0.1965, "step": 19590 }, { "epoch": 10.0, "eval_accuracy": 0.9877450980392157, "eval_combined_score": 0.9894162210338681, "eval_f1": 0.9910873440285204, "eval_loss": 0.15827800333499908, "eval_runtime": 0.7788, "eval_samples_per_second": 523.897, "eval_steps_per_second": 5.136, "step": 19590 }, { "epoch": 11.0, "learning_rate": 3.9000000000000006e-05, "loss": 0.195, "step": 21549 }, { "epoch": 11.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915138098250054, "eval_f1": 0.992831541218638, "eval_loss": 0.1527128666639328, "eval_runtime": 0.7814, "eval_samples_per_second": 522.15, "eval_steps_per_second": 5.119, "step": 21549 }, { "epoch": 12.0, "learning_rate": 3.8e-05, "loss": 0.1938, "step": 23508 }, { "epoch": 12.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915266106442577, "eval_f1": 0.9928571428571428, "eval_loss": 0.15122443437576294, "eval_runtime": 0.7775, "eval_samples_per_second": 524.756, "eval_steps_per_second": 5.145, "step": 23508 }, { "epoch": 13.0, "learning_rate": 3.7e-05, "loss": 0.1926, "step": 25467 }, { "epoch": 13.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.1425897628068924, "eval_runtime": 0.7755, "eval_samples_per_second": 526.144, "eval_steps_per_second": 5.158, "step": 25467 }, { "epoch": 14.0, "learning_rate": 3.6e-05, "loss": 0.1917, "step": 27426 }, { "epoch": 14.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.14360927045345306, "eval_runtime": 0.7791, "eval_samples_per_second": 523.666, "eval_steps_per_second": 5.134, "step": 27426 }, { "epoch": 15.0, "learning_rate": 3.5e-05, "loss": 0.191, "step": 29385 }, { "epoch": 15.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936401662632853, "eval_f1": 0.9946332737030412, "eval_loss": 0.1502537876367569, "eval_runtime": 0.7823, "eval_samples_per_second": 521.529, "eval_steps_per_second": 5.113, "step": 29385 }, { "epoch": 16.0, "learning_rate": 3.4000000000000007e-05, "loss": 0.1901, "step": 31344 }, { "epoch": 16.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.14606404304504395, "eval_runtime": 0.7757, "eval_samples_per_second": 525.986, "eval_steps_per_second": 5.157, "step": 31344 }, { "epoch": 17.0, "learning_rate": 3.3e-05, "loss": 0.1894, "step": 33303 }, { "epoch": 17.0, "eval_accuracy": 0.9975490196078431, "eval_combined_score": 0.9978800554210951, "eval_f1": 0.998211091234347, "eval_loss": 0.1498469114303589, "eval_runtime": 0.7758, "eval_samples_per_second": 525.884, "eval_steps_per_second": 5.156, "step": 33303 }, { "epoch": 18.0, "learning_rate": 3.2000000000000005e-05, "loss": 0.1888, "step": 35262 }, { "epoch": 18.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915266106442577, "eval_f1": 0.9928571428571428, "eval_loss": 0.14023421704769135, "eval_runtime": 0.7766, "eval_samples_per_second": 525.375, "eval_steps_per_second": 5.151, "step": 35262 }, { "epoch": 19.0, "learning_rate": 3.1e-05, "loss": 0.1882, "step": 37221 }, { "epoch": 19.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936401662632853, "eval_f1": 0.9946332737030412, "eval_loss": 0.14202910661697388, "eval_runtime": 0.7794, "eval_samples_per_second": 523.449, "eval_steps_per_second": 5.132, "step": 37221 }, { "epoch": 20.0, "learning_rate": 3e-05, "loss": 0.1876, "step": 39180 }, { "epoch": 20.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.13458134233951569, "eval_runtime": 0.7781, "eval_samples_per_second": 524.385, "eval_steps_per_second": 5.141, "step": 39180 }, { "epoch": 21.0, "learning_rate": 2.9e-05, "loss": 0.1871, "step": 41139 }, { "epoch": 21.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.13956362009048462, "eval_runtime": 0.7768, "eval_samples_per_second": 525.249, "eval_steps_per_second": 5.15, "step": 41139 }, { "epoch": 22.0, "learning_rate": 2.8000000000000003e-05, "loss": 0.1867, "step": 43098 }, { "epoch": 22.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.14428994059562683, "eval_runtime": 0.7783, "eval_samples_per_second": 524.192, "eval_steps_per_second": 5.139, "step": 43098 }, { "epoch": 23.0, "learning_rate": 2.7000000000000002e-05, "loss": 0.1862, "step": 45057 }, { "epoch": 23.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.1346110701560974, "eval_runtime": 0.7827, "eval_samples_per_second": 521.253, "eval_steps_per_second": 5.11, "step": 45057 }, { "epoch": 24.0, "learning_rate": 2.6000000000000002e-05, "loss": 0.1857, "step": 47016 }, { "epoch": 24.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.13611076772212982, "eval_runtime": 0.7925, "eval_samples_per_second": 514.847, "eval_steps_per_second": 5.048, "step": 47016 }, { "epoch": 25.0, "learning_rate": 2.5e-05, "loss": 0.1854, "step": 48975 }, { "epoch": 25.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.13179393112659454, "eval_runtime": 0.7822, "eval_samples_per_second": 521.594, "eval_steps_per_second": 5.114, "step": 48975 }, { "epoch": 26.0, "learning_rate": 2.4e-05, "loss": 0.185, "step": 50934 }, { "epoch": 26.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.13099727034568787, "eval_runtime": 0.851, "eval_samples_per_second": 479.464, "eval_steps_per_second": 4.701, "step": 50934 }, { "epoch": 27.0, "learning_rate": 2.3000000000000003e-05, "loss": 0.1846, "step": 52893 }, { "epoch": 27.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.13022534549236298, "eval_runtime": 0.7818, "eval_samples_per_second": 521.85, "eval_steps_per_second": 5.116, "step": 52893 }, { "epoch": 28.0, "learning_rate": 2.2000000000000003e-05, "loss": 0.1842, "step": 54852 }, { "epoch": 28.0, "eval_accuracy": 0.9950980392156863, "eval_combined_score": 0.9957633053221289, "eval_f1": 0.9964285714285714, "eval_loss": 0.13285598158836365, "eval_runtime": 0.7818, "eval_samples_per_second": 521.866, "eval_steps_per_second": 5.116, "step": 54852 }, { "epoch": 29.0, "learning_rate": 2.1e-05, "loss": 0.1839, "step": 56811 }, { "epoch": 29.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.13004697859287262, "eval_runtime": 0.7761, "eval_samples_per_second": 525.714, "eval_steps_per_second": 5.154, "step": 56811 }, { "epoch": 30.0, "learning_rate": 2e-05, "loss": 0.1836, "step": 58770 }, { "epoch": 30.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.1328008770942688, "eval_runtime": 0.7782, "eval_samples_per_second": 524.253, "eval_steps_per_second": 5.14, "step": 58770 }, { "epoch": 31.0, "learning_rate": 1.9e-05, "loss": 0.1832, "step": 60729 }, { "epoch": 31.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.1326626092195511, "eval_runtime": 0.7751, "eval_samples_per_second": 526.407, "eval_steps_per_second": 5.161, "step": 60729 }, { "epoch": 32.0, "learning_rate": 1.8e-05, "loss": 0.1829, "step": 62688 }, { "epoch": 32.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.13079187273979187, "eval_runtime": 0.7779, "eval_samples_per_second": 524.516, "eval_steps_per_second": 5.142, "step": 62688 }, { "epoch": 33.0, "learning_rate": 1.7000000000000003e-05, "loss": 0.1826, "step": 64647 }, { "epoch": 33.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.12873521447181702, "eval_runtime": 0.7782, "eval_samples_per_second": 524.295, "eval_steps_per_second": 5.14, "step": 64647 }, { "epoch": 34.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.1824, "step": 66606 }, { "epoch": 34.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.1308642029762268, "eval_runtime": 0.7782, "eval_samples_per_second": 524.283, "eval_steps_per_second": 5.14, "step": 66606 }, { "epoch": 35.0, "learning_rate": 1.5e-05, "loss": 0.1821, "step": 68565 }, { "epoch": 35.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.13088451325893402, "eval_runtime": 0.7758, "eval_samples_per_second": 525.926, "eval_steps_per_second": 5.156, "step": 68565 }, { "epoch": 36.0, "learning_rate": 1.4000000000000001e-05, "loss": 0.1818, "step": 70524 }, { "epoch": 36.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.1271485686302185, "eval_runtime": 0.7809, "eval_samples_per_second": 522.489, "eval_steps_per_second": 5.122, "step": 70524 }, { "epoch": 37.0, "learning_rate": 1.3000000000000001e-05, "loss": 0.1816, "step": 72483 }, { "epoch": 37.0, "eval_accuracy": 0.9877450980392157, "eval_combined_score": 0.9894320516839064, "eval_f1": 0.9911190053285969, "eval_loss": 0.127828910946846, "eval_runtime": 0.7755, "eval_samples_per_second": 526.078, "eval_steps_per_second": 5.158, "step": 72483 }, { "epoch": 38.0, "learning_rate": 1.2e-05, "loss": 0.1813, "step": 74442 }, { "epoch": 38.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.1280445158481598, "eval_runtime": 0.7777, "eval_samples_per_second": 524.647, "eval_steps_per_second": 5.144, "step": 74442 }, { "epoch": 39.0, "learning_rate": 1.1000000000000001e-05, "loss": 0.1811, "step": 76401 }, { "epoch": 39.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.12891501188278198, "eval_runtime": 0.7798, "eval_samples_per_second": 523.193, "eval_steps_per_second": 5.129, "step": 76401 }, { "epoch": 40.0, "learning_rate": 1e-05, "loss": 0.1809, "step": 78360 }, { "epoch": 40.0, "eval_accuracy": 0.9877450980392157, "eval_combined_score": 0.9894320516839064, "eval_f1": 0.9911190053285969, "eval_loss": 0.12895023822784424, "eval_runtime": 0.7761, "eval_samples_per_second": 525.732, "eval_steps_per_second": 5.154, "step": 78360 }, { "epoch": 41.0, "learning_rate": 9e-06, "loss": 0.1807, "step": 80319 }, { "epoch": 41.0, "eval_accuracy": 0.9877450980392157, "eval_combined_score": 0.9894320516839064, "eval_f1": 0.9911190053285969, "eval_loss": 0.1256455034017563, "eval_runtime": 0.7742, "eval_samples_per_second": 526.998, "eval_steps_per_second": 5.167, "step": 80319 }, { "epoch": 42.0, "learning_rate": 8.000000000000001e-06, "loss": 0.1805, "step": 82278 }, { "epoch": 42.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.12676292657852173, "eval_runtime": 0.7808, "eval_samples_per_second": 522.525, "eval_steps_per_second": 5.123, "step": 82278 }, { "epoch": 43.0, "learning_rate": 7.000000000000001e-06, "loss": 0.1803, "step": 84237 }, { "epoch": 43.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.12742425501346588, "eval_runtime": 0.7744, "eval_samples_per_second": 526.85, "eval_steps_per_second": 5.165, "step": 84237 }, { "epoch": 44.0, "learning_rate": 6e-06, "loss": 0.1801, "step": 86196 }, { "epoch": 44.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.12773118913173676, "eval_runtime": 0.7768, "eval_samples_per_second": 525.233, "eval_steps_per_second": 5.149, "step": 86196 }, { "epoch": 45.0, "learning_rate": 5e-06, "loss": 0.1799, "step": 88155 }, { "epoch": 45.0, "eval_accuracy": 0.9926470588235294, "eval_combined_score": 0.9936497326203209, "eval_f1": 0.9946524064171123, "eval_loss": 0.12637551128864288, "eval_runtime": 0.7756, "eval_samples_per_second": 526.076, "eval_steps_per_second": 5.158, "step": 88155 }, { "epoch": 46.0, "learning_rate": 4.000000000000001e-06, "loss": 0.1797, "step": 90114 }, { "epoch": 46.0, "eval_accuracy": 0.9901960784313726, "eval_combined_score": 0.9915393203544763, "eval_f1": 0.9928825622775801, "eval_loss": 0.12735696136951447, "eval_runtime": 0.7805, "eval_samples_per_second": 522.735, "eval_steps_per_second": 5.125, "step": 90114 }, { "epoch": 46.0, "step": 90114, "total_flos": 3.6163552369495245e+17, "train_loss": 0.19228331322046327, "train_runtime": 51035.534, "train_samples_per_second": 245.648, "train_steps_per_second": 1.919 } ], "max_steps": 97950, "num_train_epochs": 50, "total_flos": 3.6163552369495245e+17, "trial_name": null, "trial_params": null }