{ "best_metric": 0.8048780487804879, "best_model_checkpoint": "MAE-CT-CPC-Dicotomized-v4-early-stop/checkpoint-648", "epoch": 15.05625, "eval_steps": 500, "global_step": 1296, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 6.944444444444446e-07, "loss": 0.5724, "step": 10 }, { "epoch": 0.01, "learning_rate": 1.3888888888888892e-06, "loss": 0.633, "step": 20 }, { "epoch": 0.02, "learning_rate": 2.0833333333333334e-06, "loss": 0.5686, "step": 30 }, { "epoch": 0.03, "learning_rate": 2.7777777777777783e-06, "loss": 0.605, "step": 40 }, { "epoch": 0.03, "learning_rate": 3.4722222222222224e-06, "loss": 0.7115, "step": 50 }, { "epoch": 0.04, "learning_rate": 4.166666666666667e-06, "loss": 0.6218, "step": 60 }, { "epoch": 0.05, "learning_rate": 4.861111111111111e-06, "loss": 0.6258, "step": 70 }, { "epoch": 0.06, "learning_rate": 5.555555555555557e-06, "loss": 0.5342, "step": 80 }, { "epoch": 0.06, "eval_accuracy": 0.6829268292682927, "eval_loss": 0.6496201753616333, "eval_runtime": 8.1028, "eval_samples_per_second": 5.06, "eval_steps_per_second": 1.358, "step": 81 }, { "epoch": 1.01, "learning_rate": 6.25e-06, "loss": 0.6638, "step": 90 }, { "epoch": 1.01, "learning_rate": 6.944444444444445e-06, "loss": 0.5117, "step": 100 }, { "epoch": 1.02, "learning_rate": 7.638888888888888e-06, "loss": 0.6552, "step": 110 }, { "epoch": 1.03, "learning_rate": 8.333333333333334e-06, "loss": 0.5231, "step": 120 }, { "epoch": 1.03, "learning_rate": 9.027777777777779e-06, "loss": 0.6073, "step": 130 }, { "epoch": 1.04, "learning_rate": 9.722222222222223e-06, "loss": 0.6471, "step": 140 }, { "epoch": 1.05, "learning_rate": 9.953703703703704e-06, "loss": 0.6375, "step": 150 }, { "epoch": 1.05, "learning_rate": 9.876543209876543e-06, "loss": 0.6266, "step": 160 }, { "epoch": 1.06, "eval_accuracy": 0.6829268292682927, "eval_loss": 0.5853927135467529, "eval_runtime": 8.0931, "eval_samples_per_second": 5.066, "eval_steps_per_second": 1.359, "step": 162 }, { "epoch": 2.01, "learning_rate": 9.799382716049384e-06, "loss": 0.5042, "step": 170 }, { "epoch": 2.01, "learning_rate": 9.722222222222223e-06, "loss": 0.4928, "step": 180 }, { "epoch": 2.02, "learning_rate": 9.645061728395062e-06, "loss": 0.5231, "step": 190 }, { "epoch": 2.03, "learning_rate": 9.567901234567902e-06, "loss": 0.5299, "step": 200 }, { "epoch": 2.03, "learning_rate": 9.490740740740741e-06, "loss": 0.6126, "step": 210 }, { "epoch": 2.04, "learning_rate": 9.413580246913581e-06, "loss": 0.4196, "step": 220 }, { "epoch": 2.05, "learning_rate": 9.33641975308642e-06, "loss": 0.9175, "step": 230 }, { "epoch": 2.05, "learning_rate": 9.25925925925926e-06, "loss": 0.6599, "step": 240 }, { "epoch": 2.06, "eval_accuracy": 0.6829268292682927, "eval_loss": 0.5205796957015991, "eval_runtime": 8.4167, "eval_samples_per_second": 4.871, "eval_steps_per_second": 1.307, "step": 243 }, { "epoch": 3.0, "learning_rate": 9.1820987654321e-06, "loss": 0.5163, "step": 250 }, { "epoch": 3.01, "learning_rate": 9.10493827160494e-06, "loss": 0.6428, "step": 260 }, { "epoch": 3.02, "learning_rate": 9.027777777777779e-06, "loss": 0.5963, "step": 270 }, { "epoch": 3.03, "learning_rate": 8.950617283950618e-06, "loss": 0.5579, "step": 280 }, { "epoch": 3.03, "learning_rate": 8.873456790123458e-06, "loss": 0.6436, "step": 290 }, { "epoch": 3.04, "learning_rate": 8.796296296296297e-06, "loss": 0.6368, "step": 300 }, { "epoch": 3.05, "learning_rate": 8.719135802469136e-06, "loss": 0.5194, "step": 310 }, { "epoch": 3.05, "learning_rate": 8.641975308641975e-06, "loss": 0.877, "step": 320 }, { "epoch": 3.06, "eval_accuracy": 0.6097560975609756, "eval_loss": 0.5995270013809204, "eval_runtime": 8.3891, "eval_samples_per_second": 4.887, "eval_steps_per_second": 1.311, "step": 324 }, { "epoch": 4.0, "learning_rate": 8.564814814814816e-06, "loss": 0.5644, "step": 330 }, { "epoch": 4.01, "learning_rate": 8.487654320987654e-06, "loss": 0.4776, "step": 340 }, { "epoch": 4.02, "learning_rate": 8.410493827160495e-06, "loss": 0.5117, "step": 350 }, { "epoch": 4.03, "learning_rate": 8.333333333333334e-06, "loss": 0.4388, "step": 360 }, { "epoch": 4.03, "learning_rate": 8.256172839506174e-06, "loss": 0.5065, "step": 370 }, { "epoch": 4.04, "learning_rate": 8.179012345679013e-06, "loss": 0.5147, "step": 380 }, { "epoch": 4.05, "learning_rate": 8.101851851851854e-06, "loss": 0.6924, "step": 390 }, { "epoch": 4.05, "learning_rate": 8.024691358024692e-06, "loss": 0.653, "step": 400 }, { "epoch": 4.06, "eval_accuracy": 0.7560975609756098, "eval_loss": 0.4907689392566681, "eval_runtime": 7.7719, "eval_samples_per_second": 5.275, "eval_steps_per_second": 1.415, "step": 405 }, { "epoch": 5.0, "learning_rate": 7.947530864197531e-06, "loss": 0.536, "step": 410 }, { "epoch": 5.01, "learning_rate": 7.870370370370372e-06, "loss": 0.4254, "step": 420 }, { "epoch": 5.02, "learning_rate": 7.79320987654321e-06, "loss": 0.4454, "step": 430 }, { "epoch": 5.02, "learning_rate": 7.71604938271605e-06, "loss": 0.4594, "step": 440 }, { "epoch": 5.03, "learning_rate": 7.638888888888888e-06, "loss": 0.4811, "step": 450 }, { "epoch": 5.04, "learning_rate": 7.561728395061729e-06, "loss": 0.4024, "step": 460 }, { "epoch": 5.05, "learning_rate": 7.484567901234569e-06, "loss": 0.3778, "step": 470 }, { "epoch": 5.05, "learning_rate": 7.4074074074074075e-06, "loss": 0.7604, "step": 480 }, { "epoch": 5.06, "eval_accuracy": 0.7804878048780488, "eval_loss": 0.49357670545578003, "eval_runtime": 8.1369, "eval_samples_per_second": 5.039, "eval_steps_per_second": 1.352, "step": 486 }, { "epoch": 6.0, "learning_rate": 7.330246913580248e-06, "loss": 0.5151, "step": 490 }, { "epoch": 6.01, "learning_rate": 7.253086419753087e-06, "loss": 0.3392, "step": 500 }, { "epoch": 6.02, "learning_rate": 7.1759259259259266e-06, "loss": 0.5844, "step": 510 }, { "epoch": 6.02, "learning_rate": 7.098765432098766e-06, "loss": 0.5688, "step": 520 }, { "epoch": 6.03, "learning_rate": 7.021604938271606e-06, "loss": 0.247, "step": 530 }, { "epoch": 6.04, "learning_rate": 6.944444444444445e-06, "loss": 0.7517, "step": 540 }, { "epoch": 6.04, "learning_rate": 6.867283950617285e-06, "loss": 0.3608, "step": 550 }, { "epoch": 6.05, "learning_rate": 6.790123456790124e-06, "loss": 0.4795, "step": 560 }, { "epoch": 6.06, "eval_accuracy": 0.6829268292682927, "eval_loss": 0.9527755379676819, "eval_runtime": 8.3829, "eval_samples_per_second": 4.891, "eval_steps_per_second": 1.312, "step": 567 }, { "epoch": 7.0, "learning_rate": 6.712962962962963e-06, "loss": 0.3914, "step": 570 }, { "epoch": 7.01, "learning_rate": 6.635802469135803e-06, "loss": 0.4056, "step": 580 }, { "epoch": 7.02, "learning_rate": 6.558641975308642e-06, "loss": 0.4884, "step": 590 }, { "epoch": 7.02, "learning_rate": 6.481481481481482e-06, "loss": 0.3237, "step": 600 }, { "epoch": 7.03, "learning_rate": 6.404320987654321e-06, "loss": 0.4742, "step": 610 }, { "epoch": 7.04, "learning_rate": 6.3271604938271615e-06, "loss": 0.4839, "step": 620 }, { "epoch": 7.04, "learning_rate": 6.25e-06, "loss": 0.265, "step": 630 }, { "epoch": 7.05, "learning_rate": 6.17283950617284e-06, "loss": 0.278, "step": 640 }, { "epoch": 7.06, "eval_accuracy": 0.8048780487804879, "eval_loss": 0.5564919710159302, "eval_runtime": 8.4164, "eval_samples_per_second": 4.871, "eval_steps_per_second": 1.307, "step": 648 }, { "epoch": 8.0, "learning_rate": 6.09567901234568e-06, "loss": 0.1885, "step": 650 }, { "epoch": 8.01, "learning_rate": 6.018518518518519e-06, "loss": 0.2307, "step": 660 }, { "epoch": 8.02, "learning_rate": 5.941358024691358e-06, "loss": 0.4786, "step": 670 }, { "epoch": 8.02, "learning_rate": 5.864197530864199e-06, "loss": 0.3708, "step": 680 }, { "epoch": 8.03, "learning_rate": 5.787037037037038e-06, "loss": 0.5062, "step": 690 }, { "epoch": 8.04, "learning_rate": 5.7098765432098764e-06, "loss": 0.7825, "step": 700 }, { "epoch": 8.04, "learning_rate": 5.632716049382716e-06, "loss": 0.1902, "step": 710 }, { "epoch": 8.05, "learning_rate": 5.555555555555557e-06, "loss": 0.3548, "step": 720 }, { "epoch": 8.06, "eval_accuracy": 0.7560975609756098, "eval_loss": 0.5855191946029663, "eval_runtime": 7.7883, "eval_samples_per_second": 5.264, "eval_steps_per_second": 1.412, "step": 729 }, { "epoch": 9.0, "learning_rate": 5.4783950617283955e-06, "loss": 0.2654, "step": 730 }, { "epoch": 9.01, "learning_rate": 5.401234567901234e-06, "loss": 0.2532, "step": 740 }, { "epoch": 9.01, "learning_rate": 5.324074074074075e-06, "loss": 0.243, "step": 750 }, { "epoch": 9.02, "learning_rate": 5.246913580246914e-06, "loss": 0.4346, "step": 760 }, { "epoch": 9.03, "learning_rate": 5.1697530864197534e-06, "loss": 0.1327, "step": 770 }, { "epoch": 9.04, "learning_rate": 5.092592592592593e-06, "loss": 0.3484, "step": 780 }, { "epoch": 9.04, "learning_rate": 5.015432098765433e-06, "loss": 0.4213, "step": 790 }, { "epoch": 9.05, "learning_rate": 4.938271604938272e-06, "loss": 0.3673, "step": 800 }, { "epoch": 9.06, "learning_rate": 4.861111111111111e-06, "loss": 0.4386, "step": 810 }, { "epoch": 9.06, "eval_accuracy": 0.7560975609756098, "eval_loss": 0.6577650904655457, "eval_runtime": 8.1212, "eval_samples_per_second": 5.049, "eval_steps_per_second": 1.354, "step": 810 }, { "epoch": 10.01, "learning_rate": 4.783950617283951e-06, "loss": 0.5757, "step": 820 }, { "epoch": 10.01, "learning_rate": 4.706790123456791e-06, "loss": 0.2701, "step": 830 }, { "epoch": 10.02, "learning_rate": 4.62962962962963e-06, "loss": 0.1286, "step": 840 }, { "epoch": 10.03, "learning_rate": 4.55246913580247e-06, "loss": 0.2345, "step": 850 }, { "epoch": 10.03, "learning_rate": 4.475308641975309e-06, "loss": 0.1286, "step": 860 }, { "epoch": 10.04, "learning_rate": 4.398148148148149e-06, "loss": 0.3579, "step": 870 }, { "epoch": 10.05, "learning_rate": 4.3209876543209875e-06, "loss": 0.408, "step": 880 }, { "epoch": 10.06, "learning_rate": 4.243827160493827e-06, "loss": 0.3007, "step": 890 }, { "epoch": 10.06, "eval_accuracy": 0.7804878048780488, "eval_loss": 0.6622430682182312, "eval_runtime": 8.0725, "eval_samples_per_second": 5.079, "eval_steps_per_second": 1.363, "step": 891 }, { "epoch": 11.01, "learning_rate": 4.166666666666667e-06, "loss": 0.4887, "step": 900 }, { "epoch": 11.01, "learning_rate": 4.0895061728395066e-06, "loss": 0.1813, "step": 910 }, { "epoch": 11.02, "learning_rate": 4.012345679012346e-06, "loss": 0.1648, "step": 920 }, { "epoch": 11.03, "learning_rate": 3.935185185185186e-06, "loss": 0.4133, "step": 930 }, { "epoch": 11.03, "learning_rate": 3.858024691358025e-06, "loss": 0.1691, "step": 940 }, { "epoch": 11.04, "learning_rate": 3.7808641975308645e-06, "loss": 0.1626, "step": 950 }, { "epoch": 11.05, "learning_rate": 3.7037037037037037e-06, "loss": 0.2215, "step": 960 }, { "epoch": 11.05, "learning_rate": 3.6265432098765434e-06, "loss": 0.313, "step": 970 }, { "epoch": 11.06, "eval_accuracy": 0.7560975609756098, "eval_loss": 0.8349580764770508, "eval_runtime": 8.0234, "eval_samples_per_second": 5.11, "eval_steps_per_second": 1.371, "step": 972 }, { "epoch": 12.01, "learning_rate": 3.549382716049383e-06, "loss": 0.4007, "step": 980 }, { "epoch": 12.01, "learning_rate": 3.4722222222222224e-06, "loss": 0.2461, "step": 990 }, { "epoch": 12.02, "learning_rate": 3.395061728395062e-06, "loss": 0.2541, "step": 1000 }, { "epoch": 12.03, "learning_rate": 3.3179012345679013e-06, "loss": 0.3941, "step": 1010 }, { "epoch": 12.03, "learning_rate": 3.240740740740741e-06, "loss": 0.0845, "step": 1020 }, { "epoch": 12.04, "learning_rate": 3.1635802469135807e-06, "loss": 0.2944, "step": 1030 }, { "epoch": 12.05, "learning_rate": 3.08641975308642e-06, "loss": 0.2391, "step": 1040 }, { "epoch": 12.05, "learning_rate": 3.0092592592592597e-06, "loss": 0.0554, "step": 1050 }, { "epoch": 12.06, "eval_accuracy": 0.7073170731707317, "eval_loss": 1.0043153762817383, "eval_runtime": 7.5796, "eval_samples_per_second": 5.409, "eval_steps_per_second": 1.451, "step": 1053 }, { "epoch": 13.0, "learning_rate": 2.9320987654320994e-06, "loss": 0.2887, "step": 1060 }, { "epoch": 13.01, "learning_rate": 2.8549382716049382e-06, "loss": 0.4346, "step": 1070 }, { "epoch": 13.02, "learning_rate": 2.7777777777777783e-06, "loss": 0.3038, "step": 1080 }, { "epoch": 13.03, "learning_rate": 2.700617283950617e-06, "loss": 0.1073, "step": 1090 }, { "epoch": 13.03, "learning_rate": 2.623456790123457e-06, "loss": 0.2334, "step": 1100 }, { "epoch": 13.04, "learning_rate": 2.5462962962962966e-06, "loss": 0.34, "step": 1110 }, { "epoch": 13.05, "learning_rate": 2.469135802469136e-06, "loss": 0.0705, "step": 1120 }, { "epoch": 13.05, "learning_rate": 2.3919753086419755e-06, "loss": 0.2804, "step": 1130 }, { "epoch": 13.06, "eval_accuracy": 0.7073170731707317, "eval_loss": 1.0246809720993042, "eval_runtime": 7.4342, "eval_samples_per_second": 5.515, "eval_steps_per_second": 1.48, "step": 1134 }, { "epoch": 14.0, "learning_rate": 2.314814814814815e-06, "loss": 0.0367, "step": 1140 }, { "epoch": 14.01, "learning_rate": 2.2376543209876545e-06, "loss": 0.2872, "step": 1150 }, { "epoch": 14.02, "learning_rate": 2.1604938271604937e-06, "loss": 0.0806, "step": 1160 }, { "epoch": 14.03, "learning_rate": 2.0833333333333334e-06, "loss": 0.1376, "step": 1170 }, { "epoch": 14.03, "learning_rate": 2.006172839506173e-06, "loss": 0.466, "step": 1180 }, { "epoch": 14.04, "learning_rate": 1.9290123456790124e-06, "loss": 0.2337, "step": 1190 }, { "epoch": 14.05, "learning_rate": 1.8518518518518519e-06, "loss": 0.3175, "step": 1200 }, { "epoch": 14.05, "learning_rate": 1.7746913580246916e-06, "loss": 0.1424, "step": 1210 }, { "epoch": 14.06, "eval_accuracy": 0.7804878048780488, "eval_loss": 0.8541743755340576, "eval_runtime": 7.7108, "eval_samples_per_second": 5.317, "eval_steps_per_second": 1.427, "step": 1215 }, { "epoch": 15.0, "learning_rate": 1.697530864197531e-06, "loss": 0.1911, "step": 1220 }, { "epoch": 15.01, "learning_rate": 1.6203703703703705e-06, "loss": 0.0041, "step": 1230 }, { "epoch": 15.02, "learning_rate": 1.54320987654321e-06, "loss": 0.3306, "step": 1240 }, { "epoch": 15.02, "learning_rate": 1.4660493827160497e-06, "loss": 0.1108, "step": 1250 }, { "epoch": 15.03, "learning_rate": 1.3888888888888892e-06, "loss": 0.1808, "step": 1260 }, { "epoch": 15.04, "learning_rate": 1.3117283950617284e-06, "loss": 0.1984, "step": 1270 }, { "epoch": 15.05, "learning_rate": 1.234567901234568e-06, "loss": 0.0157, "step": 1280 }, { "epoch": 15.05, "learning_rate": 1.1574074074074076e-06, "loss": 0.4692, "step": 1290 }, { "epoch": 15.06, "eval_accuracy": 0.7317073170731707, "eval_loss": 1.0264424085617065, "eval_runtime": 7.57, "eval_samples_per_second": 5.416, "eval_steps_per_second": 1.453, "step": 1296 }, { "epoch": 15.06, "step": 1296, "total_flos": 2.262256757640895e+19, "train_loss": 0.4041450611419148, "train_runtime": 2319.3656, "train_samples_per_second": 2.483, "train_steps_per_second": 0.621 }, { "epoch": 15.06, "eval_accuracy": 0.8292682926829268, "eval_loss": 0.4281896650791168, "eval_runtime": 7.8902, "eval_samples_per_second": 5.196, "eval_steps_per_second": 1.394, "step": 1296 }, { "epoch": 15.06, "eval_accuracy": 0.8292682926829268, "eval_loss": 0.4281897246837616, "eval_runtime": 7.7423, "eval_samples_per_second": 5.296, "eval_steps_per_second": 1.421, "step": 1296 } ], "logging_steps": 10, "max_steps": 1440, "num_input_tokens_seen": 0, "num_train_epochs": 9223372036854775807, "save_steps": 500, "total_flos": 2.262256757640895e+19, "train_batch_size": 4, "trial_name": null, "trial_params": null }